1 /* Subroutines used for code generation on IA-32. 2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 3 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010 4 Free Software Foundation, Inc. 5 6 This file is part of GCC. 7 8 GCC is free software; you can redistribute it and/or modify 9 it under the terms of the GNU General Public License as published by 10 the Free Software Foundation; either version 3, or (at your option) 11 any later version. 12 13 GCC is distributed in the hope that it will be useful, 14 but WITHOUT ANY WARRANTY; without even the implied warranty of 15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 GNU General Public License for more details. 17 18 You should have received a copy of the GNU General Public License 19 along with GCC; see the file COPYING3. If not see 20 <http://www.gnu.org/licenses/>. */ 21 22 #include "config.h" 23 #include "system.h" 24 #include "coretypes.h" 25 #include "tm.h" 26 #include "rtl.h" 27 #include "tree.h" 28 #include "tm_p.h" 29 #include "regs.h" 30 #include "hard-reg-set.h" 31 #include "real.h" 32 #include "insn-config.h" 33 #include "conditions.h" 34 #include "output.h" 35 #include "insn-codes.h" 36 #include "insn-attr.h" 37 #include "flags.h" 38 #include "c-common.h" 39 #include "except.h" 40 #include "function.h" 41 #include "recog.h" 42 #include "expr.h" 43 #include "optabs.h" 44 #include "toplev.h" 45 #include "basic-block.h" 46 #include "ggc.h" 47 #include "target.h" 48 #include "target-def.h" 49 #include "langhooks.h" 50 #include "cgraph.h" 51 #include "gimple.h" 52 #include "dwarf2.h" 53 #include "df.h" 54 #include "tm-constrs.h" 55 #include "params.h" 56 #include "cselib.h" 57 58 static int x86_builtin_vectorization_cost (bool); 59 static rtx legitimize_dllimport_symbol (rtx, bool); 60 61 #ifndef CHECK_STACK_LIMIT 62 #define CHECK_STACK_LIMIT (-1) 63 #endif 64 65 /* Return index of given mode in mult and division cost tables. */ 66 #define MODE_INDEX(mode) \ 67 ((mode) == QImode ? 0 \ 68 : (mode) == HImode ? 1 \ 69 : (mode) == SImode ? 2 \ 70 : (mode) == DImode ? 3 \ 71 : 4) 72 73 /* Processor costs (relative to an add) */ 74 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */ 75 #define COSTS_N_BYTES(N) ((N) * 2) 76 77 #define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall}}} 78 79 const 80 struct processor_costs ix86_size_cost = {/* costs for tuning for size */ 81 COSTS_N_BYTES (2), /* cost of an add instruction */ 82 COSTS_N_BYTES (3), /* cost of a lea instruction */ 83 COSTS_N_BYTES (2), /* variable shift costs */ 84 COSTS_N_BYTES (3), /* constant shift costs */ 85 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */ 86 COSTS_N_BYTES (3), /* HI */ 87 COSTS_N_BYTES (3), /* SI */ 88 COSTS_N_BYTES (3), /* DI */ 89 COSTS_N_BYTES (5)}, /* other */ 90 0, /* cost of multiply per each bit set */ 91 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */ 92 COSTS_N_BYTES (3), /* HI */ 93 COSTS_N_BYTES (3), /* SI */ 94 COSTS_N_BYTES (3), /* DI */ 95 COSTS_N_BYTES (5)}, /* other */ 96 COSTS_N_BYTES (3), /* cost of movsx */ 97 COSTS_N_BYTES (3), /* cost of movzx */ 98 0, /* "large" insn */ 99 2, /* MOVE_RATIO */ 100 2, /* cost for loading QImode using movzbl */ 101 {2, 2, 2}, /* cost of loading integer registers 102 in QImode, HImode and SImode. 103 Relative to reg-reg move (2). */ 104 {2, 2, 2}, /* cost of storing integer registers */ 105 2, /* cost of reg,reg fld/fst */ 106 {2, 2, 2}, /* cost of loading fp registers 107 in SFmode, DFmode and XFmode */ 108 {2, 2, 2}, /* cost of storing fp registers 109 in SFmode, DFmode and XFmode */ 110 3, /* cost of moving MMX register */ 111 {3, 3}, /* cost of loading MMX registers 112 in SImode and DImode */ 113 {3, 3}, /* cost of storing MMX registers 114 in SImode and DImode */ 115 3, /* cost of moving SSE register */ 116 {3, 3, 3}, /* cost of loading SSE registers 117 in SImode, DImode and TImode */ 118 {3, 3, 3}, /* cost of storing SSE registers 119 in SImode, DImode and TImode */ 120 3, /* MMX or SSE register to integer */ 121 0, /* size of l1 cache */ 122 0, /* size of l2 cache */ 123 0, /* size of prefetch block */ 124 0, /* number of parallel prefetches */ 125 2, /* Branch cost */ 126 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */ 127 COSTS_N_BYTES (2), /* cost of FMUL instruction. */ 128 COSTS_N_BYTES (2), /* cost of FDIV instruction. */ 129 COSTS_N_BYTES (2), /* cost of FABS instruction. */ 130 COSTS_N_BYTES (2), /* cost of FCHS instruction. */ 131 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */ 132 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}, 133 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}}, 134 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}, 135 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}}, 136 1, /* scalar_stmt_cost. */ 137 1, /* scalar load_cost. */ 138 1, /* scalar_store_cost. */ 139 1, /* vec_stmt_cost. */ 140 1, /* vec_to_scalar_cost. */ 141 1, /* scalar_to_vec_cost. */ 142 1, /* vec_align_load_cost. */ 143 1, /* vec_unalign_load_cost. */ 144 1, /* vec_store_cost. */ 145 1, /* cond_taken_branch_cost. */ 146 1, /* cond_not_taken_branch_cost. */ 147 }; 148 149 /* Processor costs (relative to an add) */ 150 static const 151 struct processor_costs i386_cost = { /* 386 specific costs */ 152 COSTS_N_INSNS (1), /* cost of an add instruction */ 153 COSTS_N_INSNS (1), /* cost of a lea instruction */ 154 COSTS_N_INSNS (3), /* variable shift costs */ 155 COSTS_N_INSNS (2), /* constant shift costs */ 156 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */ 157 COSTS_N_INSNS (6), /* HI */ 158 COSTS_N_INSNS (6), /* SI */ 159 COSTS_N_INSNS (6), /* DI */ 160 COSTS_N_INSNS (6)}, /* other */ 161 COSTS_N_INSNS (1), /* cost of multiply per each bit set */ 162 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */ 163 COSTS_N_INSNS (23), /* HI */ 164 COSTS_N_INSNS (23), /* SI */ 165 COSTS_N_INSNS (23), /* DI */ 166 COSTS_N_INSNS (23)}, /* other */ 167 COSTS_N_INSNS (3), /* cost of movsx */ 168 COSTS_N_INSNS (2), /* cost of movzx */ 169 15, /* "large" insn */ 170 3, /* MOVE_RATIO */ 171 4, /* cost for loading QImode using movzbl */ 172 {2, 4, 2}, /* cost of loading integer registers 173 in QImode, HImode and SImode. 174 Relative to reg-reg move (2). */ 175 {2, 4, 2}, /* cost of storing integer registers */ 176 2, /* cost of reg,reg fld/fst */ 177 {8, 8, 8}, /* cost of loading fp registers 178 in SFmode, DFmode and XFmode */ 179 {8, 8, 8}, /* cost of storing fp registers 180 in SFmode, DFmode and XFmode */ 181 2, /* cost of moving MMX register */ 182 {4, 8}, /* cost of loading MMX registers 183 in SImode and DImode */ 184 {4, 8}, /* cost of storing MMX registers 185 in SImode and DImode */ 186 2, /* cost of moving SSE register */ 187 {4, 8, 16}, /* cost of loading SSE registers 188 in SImode, DImode and TImode */ 189 {4, 8, 16}, /* cost of storing SSE registers 190 in SImode, DImode and TImode */ 191 3, /* MMX or SSE register to integer */ 192 0, /* size of l1 cache */ 193 0, /* size of l2 cache */ 194 0, /* size of prefetch block */ 195 0, /* number of parallel prefetches */ 196 1, /* Branch cost */ 197 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */ 198 COSTS_N_INSNS (27), /* cost of FMUL instruction. */ 199 COSTS_N_INSNS (88), /* cost of FDIV instruction. */ 200 COSTS_N_INSNS (22), /* cost of FABS instruction. */ 201 COSTS_N_INSNS (24), /* cost of FCHS instruction. */ 202 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */ 203 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}, 204 DUMMY_STRINGOP_ALGS}, 205 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}, 206 DUMMY_STRINGOP_ALGS}, 207 1, /* scalar_stmt_cost. */ 208 1, /* scalar load_cost. */ 209 1, /* scalar_store_cost. */ 210 1, /* vec_stmt_cost. */ 211 1, /* vec_to_scalar_cost. */ 212 1, /* scalar_to_vec_cost. */ 213 1, /* vec_align_load_cost. */ 214 2, /* vec_unalign_load_cost. */ 215 1, /* vec_store_cost. */ 216 3, /* cond_taken_branch_cost. */ 217 1, /* cond_not_taken_branch_cost. */ 218 }; 219 220 static const 221 struct processor_costs i486_cost = { /* 486 specific costs */ 222 COSTS_N_INSNS (1), /* cost of an add instruction */ 223 COSTS_N_INSNS (1), /* cost of a lea instruction */ 224 COSTS_N_INSNS (3), /* variable shift costs */ 225 COSTS_N_INSNS (2), /* constant shift costs */ 226 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */ 227 COSTS_N_INSNS (12), /* HI */ 228 COSTS_N_INSNS (12), /* SI */ 229 COSTS_N_INSNS (12), /* DI */ 230 COSTS_N_INSNS (12)}, /* other */ 231 1, /* cost of multiply per each bit set */ 232 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */ 233 COSTS_N_INSNS (40), /* HI */ 234 COSTS_N_INSNS (40), /* SI */ 235 COSTS_N_INSNS (40), /* DI */ 236 COSTS_N_INSNS (40)}, /* other */ 237 COSTS_N_INSNS (3), /* cost of movsx */ 238 COSTS_N_INSNS (2), /* cost of movzx */ 239 15, /* "large" insn */ 240 3, /* MOVE_RATIO */ 241 4, /* cost for loading QImode using movzbl */ 242 {2, 4, 2}, /* cost of loading integer registers 243 in QImode, HImode and SImode. 244 Relative to reg-reg move (2). */ 245 {2, 4, 2}, /* cost of storing integer registers */ 246 2, /* cost of reg,reg fld/fst */ 247 {8, 8, 8}, /* cost of loading fp registers 248 in SFmode, DFmode and XFmode */ 249 {8, 8, 8}, /* cost of storing fp registers 250 in SFmode, DFmode and XFmode */ 251 2, /* cost of moving MMX register */ 252 {4, 8}, /* cost of loading MMX registers 253 in SImode and DImode */ 254 {4, 8}, /* cost of storing MMX registers 255 in SImode and DImode */ 256 2, /* cost of moving SSE register */ 257 {4, 8, 16}, /* cost of loading SSE registers 258 in SImode, DImode and TImode */ 259 {4, 8, 16}, /* cost of storing SSE registers 260 in SImode, DImode and TImode */ 261 3, /* MMX or SSE register to integer */ 262 4, /* size of l1 cache. 486 has 8kB cache 263 shared for code and data, so 4kB is 264 not really precise. */ 265 4, /* size of l2 cache */ 266 0, /* size of prefetch block */ 267 0, /* number of parallel prefetches */ 268 1, /* Branch cost */ 269 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */ 270 COSTS_N_INSNS (16), /* cost of FMUL instruction. */ 271 COSTS_N_INSNS (73), /* cost of FDIV instruction. */ 272 COSTS_N_INSNS (3), /* cost of FABS instruction. */ 273 COSTS_N_INSNS (3), /* cost of FCHS instruction. */ 274 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */ 275 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}}, 276 DUMMY_STRINGOP_ALGS}, 277 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}}, 278 DUMMY_STRINGOP_ALGS}, 279 1, /* scalar_stmt_cost. */ 280 1, /* scalar load_cost. */ 281 1, /* scalar_store_cost. */ 282 1, /* vec_stmt_cost. */ 283 1, /* vec_to_scalar_cost. */ 284 1, /* scalar_to_vec_cost. */ 285 1, /* vec_align_load_cost. */ 286 2, /* vec_unalign_load_cost. */ 287 1, /* vec_store_cost. */ 288 3, /* cond_taken_branch_cost. */ 289 1, /* cond_not_taken_branch_cost. */ 290 }; 291 292 static const 293 struct processor_costs pentium_cost = { 294 COSTS_N_INSNS (1), /* cost of an add instruction */ 295 COSTS_N_INSNS (1), /* cost of a lea instruction */ 296 COSTS_N_INSNS (4), /* variable shift costs */ 297 COSTS_N_INSNS (1), /* constant shift costs */ 298 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */ 299 COSTS_N_INSNS (11), /* HI */ 300 COSTS_N_INSNS (11), /* SI */ 301 COSTS_N_INSNS (11), /* DI */ 302 COSTS_N_INSNS (11)}, /* other */ 303 0, /* cost of multiply per each bit set */ 304 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */ 305 COSTS_N_INSNS (25), /* HI */ 306 COSTS_N_INSNS (25), /* SI */ 307 COSTS_N_INSNS (25), /* DI */ 308 COSTS_N_INSNS (25)}, /* other */ 309 COSTS_N_INSNS (3), /* cost of movsx */ 310 COSTS_N_INSNS (2), /* cost of movzx */ 311 8, /* "large" insn */ 312 6, /* MOVE_RATIO */ 313 6, /* cost for loading QImode using movzbl */ 314 {2, 4, 2}, /* cost of loading integer registers 315 in QImode, HImode and SImode. 316 Relative to reg-reg move (2). */ 317 {2, 4, 2}, /* cost of storing integer registers */ 318 2, /* cost of reg,reg fld/fst */ 319 {2, 2, 6}, /* cost of loading fp registers 320 in SFmode, DFmode and XFmode */ 321 {4, 4, 6}, /* cost of storing fp registers 322 in SFmode, DFmode and XFmode */ 323 8, /* cost of moving MMX register */ 324 {8, 8}, /* cost of loading MMX registers 325 in SImode and DImode */ 326 {8, 8}, /* cost of storing MMX registers 327 in SImode and DImode */ 328 2, /* cost of moving SSE register */ 329 {4, 8, 16}, /* cost of loading SSE registers 330 in SImode, DImode and TImode */ 331 {4, 8, 16}, /* cost of storing SSE registers 332 in SImode, DImode and TImode */ 333 3, /* MMX or SSE register to integer */ 334 8, /* size of l1 cache. */ 335 8, /* size of l2 cache */ 336 0, /* size of prefetch block */ 337 0, /* number of parallel prefetches */ 338 2, /* Branch cost */ 339 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */ 340 COSTS_N_INSNS (3), /* cost of FMUL instruction. */ 341 COSTS_N_INSNS (39), /* cost of FDIV instruction. */ 342 COSTS_N_INSNS (1), /* cost of FABS instruction. */ 343 COSTS_N_INSNS (1), /* cost of FCHS instruction. */ 344 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */ 345 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}}, 346 DUMMY_STRINGOP_ALGS}, 347 {{libcall, {{-1, rep_prefix_4_byte}}}, 348 DUMMY_STRINGOP_ALGS}, 349 1, /* scalar_stmt_cost. */ 350 1, /* scalar load_cost. */ 351 1, /* scalar_store_cost. */ 352 1, /* vec_stmt_cost. */ 353 1, /* vec_to_scalar_cost. */ 354 1, /* scalar_to_vec_cost. */ 355 1, /* vec_align_load_cost. */ 356 2, /* vec_unalign_load_cost. */ 357 1, /* vec_store_cost. */ 358 3, /* cond_taken_branch_cost. */ 359 1, /* cond_not_taken_branch_cost. */ 360 }; 361 362 static const 363 struct processor_costs pentiumpro_cost = { 364 COSTS_N_INSNS (1), /* cost of an add instruction */ 365 COSTS_N_INSNS (1), /* cost of a lea instruction */ 366 COSTS_N_INSNS (1), /* variable shift costs */ 367 COSTS_N_INSNS (1), /* constant shift costs */ 368 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */ 369 COSTS_N_INSNS (4), /* HI */ 370 COSTS_N_INSNS (4), /* SI */ 371 COSTS_N_INSNS (4), /* DI */ 372 COSTS_N_INSNS (4)}, /* other */ 373 0, /* cost of multiply per each bit set */ 374 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */ 375 COSTS_N_INSNS (17), /* HI */ 376 COSTS_N_INSNS (17), /* SI */ 377 COSTS_N_INSNS (17), /* DI */ 378 COSTS_N_INSNS (17)}, /* other */ 379 COSTS_N_INSNS (1), /* cost of movsx */ 380 COSTS_N_INSNS (1), /* cost of movzx */ 381 8, /* "large" insn */ 382 6, /* MOVE_RATIO */ 383 2, /* cost for loading QImode using movzbl */ 384 {4, 4, 4}, /* cost of loading integer registers 385 in QImode, HImode and SImode. 386 Relative to reg-reg move (2). */ 387 {2, 2, 2}, /* cost of storing integer registers */ 388 2, /* cost of reg,reg fld/fst */ 389 {2, 2, 6}, /* cost of loading fp registers 390 in SFmode, DFmode and XFmode */ 391 {4, 4, 6}, /* cost of storing fp registers 392 in SFmode, DFmode and XFmode */ 393 2, /* cost of moving MMX register */ 394 {2, 2}, /* cost of loading MMX registers 395 in SImode and DImode */ 396 {2, 2}, /* cost of storing MMX registers 397 in SImode and DImode */ 398 2, /* cost of moving SSE register */ 399 {2, 2, 8}, /* cost of loading SSE registers 400 in SImode, DImode and TImode */ 401 {2, 2, 8}, /* cost of storing SSE registers 402 in SImode, DImode and TImode */ 403 3, /* MMX or SSE register to integer */ 404 8, /* size of l1 cache. */ 405 256, /* size of l2 cache */ 406 32, /* size of prefetch block */ 407 6, /* number of parallel prefetches */ 408 2, /* Branch cost */ 409 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */ 410 COSTS_N_INSNS (5), /* cost of FMUL instruction. */ 411 COSTS_N_INSNS (56), /* cost of FDIV instruction. */ 412 COSTS_N_INSNS (2), /* cost of FABS instruction. */ 413 COSTS_N_INSNS (2), /* cost of FCHS instruction. */ 414 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */ 415 /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes (we ensure 416 the alignment). For small blocks inline loop is still a noticeable win, for bigger 417 blocks either rep movsl or rep movsb is way to go. Rep movsb has apparently 418 more expensive startup time in CPU, but after 4K the difference is down in the noise. 419 */ 420 {{rep_prefix_4_byte, {{128, loop}, {1024, unrolled_loop}, 421 {8192, rep_prefix_4_byte}, {-1, rep_prefix_1_byte}}}, 422 DUMMY_STRINGOP_ALGS}, 423 {{rep_prefix_4_byte, {{1024, unrolled_loop}, 424 {8192, rep_prefix_4_byte}, {-1, libcall}}}, 425 DUMMY_STRINGOP_ALGS}, 426 1, /* scalar_stmt_cost. */ 427 1, /* scalar load_cost. */ 428 1, /* scalar_store_cost. */ 429 1, /* vec_stmt_cost. */ 430 1, /* vec_to_scalar_cost. */ 431 1, /* scalar_to_vec_cost. */ 432 1, /* vec_align_load_cost. */ 433 2, /* vec_unalign_load_cost. */ 434 1, /* vec_store_cost. */ 435 3, /* cond_taken_branch_cost. */ 436 1, /* cond_not_taken_branch_cost. */ 437 }; 438 439 static const 440 struct processor_costs geode_cost = { 441 COSTS_N_INSNS (1), /* cost of an add instruction */ 442 COSTS_N_INSNS (1), /* cost of a lea instruction */ 443 COSTS_N_INSNS (2), /* variable shift costs */ 444 COSTS_N_INSNS (1), /* constant shift costs */ 445 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */ 446 COSTS_N_INSNS (4), /* HI */ 447 COSTS_N_INSNS (7), /* SI */ 448 COSTS_N_INSNS (7), /* DI */ 449 COSTS_N_INSNS (7)}, /* other */ 450 0, /* cost of multiply per each bit set */ 451 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */ 452 COSTS_N_INSNS (23), /* HI */ 453 COSTS_N_INSNS (39), /* SI */ 454 COSTS_N_INSNS (39), /* DI */ 455 COSTS_N_INSNS (39)}, /* other */ 456 COSTS_N_INSNS (1), /* cost of movsx */ 457 COSTS_N_INSNS (1), /* cost of movzx */ 458 8, /* "large" insn */ 459 4, /* MOVE_RATIO */ 460 1, /* cost for loading QImode using movzbl */ 461 {1, 1, 1}, /* cost of loading integer registers 462 in QImode, HImode and SImode. 463 Relative to reg-reg move (2). */ 464 {1, 1, 1}, /* cost of storing integer registers */ 465 1, /* cost of reg,reg fld/fst */ 466 {1, 1, 1}, /* cost of loading fp registers 467 in SFmode, DFmode and XFmode */ 468 {4, 6, 6}, /* cost of storing fp registers 469 in SFmode, DFmode and XFmode */ 470 471 1, /* cost of moving MMX register */ 472 {1, 1}, /* cost of loading MMX registers 473 in SImode and DImode */ 474 {1, 1}, /* cost of storing MMX registers 475 in SImode and DImode */ 476 1, /* cost of moving SSE register */ 477 {1, 1, 1}, /* cost of loading SSE registers 478 in SImode, DImode and TImode */ 479 {1, 1, 1}, /* cost of storing SSE registers 480 in SImode, DImode and TImode */ 481 1, /* MMX or SSE register to integer */ 482 64, /* size of l1 cache. */ 483 128, /* size of l2 cache. */ 484 32, /* size of prefetch block */ 485 1, /* number of parallel prefetches */ 486 1, /* Branch cost */ 487 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */ 488 COSTS_N_INSNS (11), /* cost of FMUL instruction. */ 489 COSTS_N_INSNS (47), /* cost of FDIV instruction. */ 490 COSTS_N_INSNS (1), /* cost of FABS instruction. */ 491 COSTS_N_INSNS (1), /* cost of FCHS instruction. */ 492 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */ 493 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}}, 494 DUMMY_STRINGOP_ALGS}, 495 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}}, 496 DUMMY_STRINGOP_ALGS}, 497 1, /* scalar_stmt_cost. */ 498 1, /* scalar load_cost. */ 499 1, /* scalar_store_cost. */ 500 1, /* vec_stmt_cost. */ 501 1, /* vec_to_scalar_cost. */ 502 1, /* scalar_to_vec_cost. */ 503 1, /* vec_align_load_cost. */ 504 2, /* vec_unalign_load_cost. */ 505 1, /* vec_store_cost. */ 506 3, /* cond_taken_branch_cost. */ 507 1, /* cond_not_taken_branch_cost. */ 508 }; 509 510 static const 511 struct processor_costs k6_cost = { 512 COSTS_N_INSNS (1), /* cost of an add instruction */ 513 COSTS_N_INSNS (2), /* cost of a lea instruction */ 514 COSTS_N_INSNS (1), /* variable shift costs */ 515 COSTS_N_INSNS (1), /* constant shift costs */ 516 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */ 517 COSTS_N_INSNS (3), /* HI */ 518 COSTS_N_INSNS (3), /* SI */ 519 COSTS_N_INSNS (3), /* DI */ 520 COSTS_N_INSNS (3)}, /* other */ 521 0, /* cost of multiply per each bit set */ 522 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */ 523 COSTS_N_INSNS (18), /* HI */ 524 COSTS_N_INSNS (18), /* SI */ 525 COSTS_N_INSNS (18), /* DI */ 526 COSTS_N_INSNS (18)}, /* other */ 527 COSTS_N_INSNS (2), /* cost of movsx */ 528 COSTS_N_INSNS (2), /* cost of movzx */ 529 8, /* "large" insn */ 530 4, /* MOVE_RATIO */ 531 3, /* cost for loading QImode using movzbl */ 532 {4, 5, 4}, /* cost of loading integer registers 533 in QImode, HImode and SImode. 534 Relative to reg-reg move (2). */ 535 {2, 3, 2}, /* cost of storing integer registers */ 536 4, /* cost of reg,reg fld/fst */ 537 {6, 6, 6}, /* cost of loading fp registers 538 in SFmode, DFmode and XFmode */ 539 {4, 4, 4}, /* cost of storing fp registers 540 in SFmode, DFmode and XFmode */ 541 2, /* cost of moving MMX register */ 542 {2, 2}, /* cost of loading MMX registers 543 in SImode and DImode */ 544 {2, 2}, /* cost of storing MMX registers 545 in SImode and DImode */ 546 2, /* cost of moving SSE register */ 547 {2, 2, 8}, /* cost of loading SSE registers 548 in SImode, DImode and TImode */ 549 {2, 2, 8}, /* cost of storing SSE registers 550 in SImode, DImode and TImode */ 551 6, /* MMX or SSE register to integer */ 552 32, /* size of l1 cache. */ 553 32, /* size of l2 cache. Some models 554 have integrated l2 cache, but 555 optimizing for k6 is not important 556 enough to worry about that. */ 557 32, /* size of prefetch block */ 558 1, /* number of parallel prefetches */ 559 1, /* Branch cost */ 560 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */ 561 COSTS_N_INSNS (2), /* cost of FMUL instruction. */ 562 COSTS_N_INSNS (56), /* cost of FDIV instruction. */ 563 COSTS_N_INSNS (2), /* cost of FABS instruction. */ 564 COSTS_N_INSNS (2), /* cost of FCHS instruction. */ 565 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */ 566 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}}, 567 DUMMY_STRINGOP_ALGS}, 568 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}}, 569 DUMMY_STRINGOP_ALGS}, 570 1, /* scalar_stmt_cost. */ 571 1, /* scalar load_cost. */ 572 1, /* scalar_store_cost. */ 573 1, /* vec_stmt_cost. */ 574 1, /* vec_to_scalar_cost. */ 575 1, /* scalar_to_vec_cost. */ 576 1, /* vec_align_load_cost. */ 577 2, /* vec_unalign_load_cost. */ 578 1, /* vec_store_cost. */ 579 3, /* cond_taken_branch_cost. */ 580 1, /* cond_not_taken_branch_cost. */ 581 }; 582 583 static const 584 struct processor_costs athlon_cost = { 585 COSTS_N_INSNS (1), /* cost of an add instruction */ 586 COSTS_N_INSNS (2), /* cost of a lea instruction */ 587 COSTS_N_INSNS (1), /* variable shift costs */ 588 COSTS_N_INSNS (1), /* constant shift costs */ 589 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */ 590 COSTS_N_INSNS (5), /* HI */ 591 COSTS_N_INSNS (5), /* SI */ 592 COSTS_N_INSNS (5), /* DI */ 593 COSTS_N_INSNS (5)}, /* other */ 594 0, /* cost of multiply per each bit set */ 595 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */ 596 COSTS_N_INSNS (26), /* HI */ 597 COSTS_N_INSNS (42), /* SI */ 598 COSTS_N_INSNS (74), /* DI */ 599 COSTS_N_INSNS (74)}, /* other */ 600 COSTS_N_INSNS (1), /* cost of movsx */ 601 COSTS_N_INSNS (1), /* cost of movzx */ 602 8, /* "large" insn */ 603 9, /* MOVE_RATIO */ 604 4, /* cost for loading QImode using movzbl */ 605 {3, 4, 3}, /* cost of loading integer registers 606 in QImode, HImode and SImode. 607 Relative to reg-reg move (2). */ 608 {3, 4, 3}, /* cost of storing integer registers */ 609 4, /* cost of reg,reg fld/fst */ 610 {4, 4, 12}, /* cost of loading fp registers 611 in SFmode, DFmode and XFmode */ 612 {6, 6, 8}, /* cost of storing fp registers 613 in SFmode, DFmode and XFmode */ 614 2, /* cost of moving MMX register */ 615 {4, 4}, /* cost of loading MMX registers 616 in SImode and DImode */ 617 {4, 4}, /* cost of storing MMX registers 618 in SImode and DImode */ 619 2, /* cost of moving SSE register */ 620 {4, 4, 6}, /* cost of loading SSE registers 621 in SImode, DImode and TImode */ 622 {4, 4, 5}, /* cost of storing SSE registers 623 in SImode, DImode and TImode */ 624 5, /* MMX or SSE register to integer */ 625 64, /* size of l1 cache. */ 626 256, /* size of l2 cache. */ 627 64, /* size of prefetch block */ 628 6, /* number of parallel prefetches */ 629 5, /* Branch cost */ 630 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */ 631 COSTS_N_INSNS (4), /* cost of FMUL instruction. */ 632 COSTS_N_INSNS (24), /* cost of FDIV instruction. */ 633 COSTS_N_INSNS (2), /* cost of FABS instruction. */ 634 COSTS_N_INSNS (2), /* cost of FCHS instruction. */ 635 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */ 636 /* For some reason, Athlon deals better with REP prefix (relative to loops) 637 compared to K8. Alignment becomes important after 8 bytes for memcpy and 638 128 bytes for memset. */ 639 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}}, 640 DUMMY_STRINGOP_ALGS}, 641 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}}, 642 DUMMY_STRINGOP_ALGS}, 643 1, /* scalar_stmt_cost. */ 644 1, /* scalar load_cost. */ 645 1, /* scalar_store_cost. */ 646 1, /* vec_stmt_cost. */ 647 1, /* vec_to_scalar_cost. */ 648 1, /* scalar_to_vec_cost. */ 649 1, /* vec_align_load_cost. */ 650 2, /* vec_unalign_load_cost. */ 651 1, /* vec_store_cost. */ 652 3, /* cond_taken_branch_cost. */ 653 1, /* cond_not_taken_branch_cost. */ 654 }; 655 656 static const 657 struct processor_costs k8_cost = { 658 COSTS_N_INSNS (1), /* cost of an add instruction */ 659 COSTS_N_INSNS (2), /* cost of a lea instruction */ 660 COSTS_N_INSNS (1), /* variable shift costs */ 661 COSTS_N_INSNS (1), /* constant shift costs */ 662 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */ 663 COSTS_N_INSNS (4), /* HI */ 664 COSTS_N_INSNS (3), /* SI */ 665 COSTS_N_INSNS (4), /* DI */ 666 COSTS_N_INSNS (5)}, /* other */ 667 0, /* cost of multiply per each bit set */ 668 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */ 669 COSTS_N_INSNS (26), /* HI */ 670 COSTS_N_INSNS (42), /* SI */ 671 COSTS_N_INSNS (74), /* DI */ 672 COSTS_N_INSNS (74)}, /* other */ 673 COSTS_N_INSNS (1), /* cost of movsx */ 674 COSTS_N_INSNS (1), /* cost of movzx */ 675 8, /* "large" insn */ 676 9, /* MOVE_RATIO */ 677 4, /* cost for loading QImode using movzbl */ 678 {3, 4, 3}, /* cost of loading integer registers 679 in QImode, HImode and SImode. 680 Relative to reg-reg move (2). */ 681 {3, 4, 3}, /* cost of storing integer registers */ 682 4, /* cost of reg,reg fld/fst */ 683 {4, 4, 12}, /* cost of loading fp registers 684 in SFmode, DFmode and XFmode */ 685 {6, 6, 8}, /* cost of storing fp registers 686 in SFmode, DFmode and XFmode */ 687 2, /* cost of moving MMX register */ 688 {3, 3}, /* cost of loading MMX registers 689 in SImode and DImode */ 690 {4, 4}, /* cost of storing MMX registers 691 in SImode and DImode */ 692 2, /* cost of moving SSE register */ 693 {4, 3, 6}, /* cost of loading SSE registers 694 in SImode, DImode and TImode */ 695 {4, 4, 5}, /* cost of storing SSE registers 696 in SImode, DImode and TImode */ 697 5, /* MMX or SSE register to integer */ 698 64, /* size of l1 cache. */ 699 512, /* size of l2 cache. */ 700 64, /* size of prefetch block */ 701 /* New AMD processors never drop prefetches; if they cannot be performed 702 immediately, they are queued. We set number of simultaneous prefetches 703 to a large constant to reflect this (it probably is not a good idea not 704 to limit number of prefetches at all, as their execution also takes some 705 time). */ 706 100, /* number of parallel prefetches */ 707 3, /* Branch cost */ 708 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */ 709 COSTS_N_INSNS (4), /* cost of FMUL instruction. */ 710 COSTS_N_INSNS (19), /* cost of FDIV instruction. */ 711 COSTS_N_INSNS (2), /* cost of FABS instruction. */ 712 COSTS_N_INSNS (2), /* cost of FCHS instruction. */ 713 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */ 714 /* K8 has optimized REP instruction for medium sized blocks, but for very small 715 blocks it is better to use loop. For large blocks, libcall can do 716 nontemporary accesses and beat inline considerably. */ 717 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}}, 718 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}}, 719 {{libcall, {{8, loop}, {24, unrolled_loop}, 720 {2048, rep_prefix_4_byte}, {-1, libcall}}}, 721 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}}, 722 4, /* scalar_stmt_cost. */ 723 2, /* scalar load_cost. */ 724 2, /* scalar_store_cost. */ 725 5, /* vec_stmt_cost. */ 726 0, /* vec_to_scalar_cost. */ 727 2, /* scalar_to_vec_cost. */ 728 2, /* vec_align_load_cost. */ 729 3, /* vec_unalign_load_cost. */ 730 3, /* vec_store_cost. */ 731 3, /* cond_taken_branch_cost. */ 732 2, /* cond_not_taken_branch_cost. */ 733 }; 734 735 struct processor_costs amdfam10_cost = { 736 COSTS_N_INSNS (1), /* cost of an add instruction */ 737 COSTS_N_INSNS (2), /* cost of a lea instruction */ 738 COSTS_N_INSNS (1), /* variable shift costs */ 739 COSTS_N_INSNS (1), /* constant shift costs */ 740 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */ 741 COSTS_N_INSNS (4), /* HI */ 742 COSTS_N_INSNS (3), /* SI */ 743 COSTS_N_INSNS (4), /* DI */ 744 COSTS_N_INSNS (5)}, /* other */ 745 0, /* cost of multiply per each bit set */ 746 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */ 747 COSTS_N_INSNS (35), /* HI */ 748 COSTS_N_INSNS (51), /* SI */ 749 COSTS_N_INSNS (83), /* DI */ 750 COSTS_N_INSNS (83)}, /* other */ 751 COSTS_N_INSNS (1), /* cost of movsx */ 752 COSTS_N_INSNS (1), /* cost of movzx */ 753 8, /* "large" insn */ 754 9, /* MOVE_RATIO */ 755 4, /* cost for loading QImode using movzbl */ 756 {3, 4, 3}, /* cost of loading integer registers 757 in QImode, HImode and SImode. 758 Relative to reg-reg move (2). */ 759 {3, 4, 3}, /* cost of storing integer registers */ 760 4, /* cost of reg,reg fld/fst */ 761 {4, 4, 12}, /* cost of loading fp registers 762 in SFmode, DFmode and XFmode */ 763 {6, 6, 8}, /* cost of storing fp registers 764 in SFmode, DFmode and XFmode */ 765 2, /* cost of moving MMX register */ 766 {3, 3}, /* cost of loading MMX registers 767 in SImode and DImode */ 768 {4, 4}, /* cost of storing MMX registers 769 in SImode and DImode */ 770 2, /* cost of moving SSE register */ 771 {4, 4, 3}, /* cost of loading SSE registers 772 in SImode, DImode and TImode */ 773 {4, 4, 5}, /* cost of storing SSE registers 774 in SImode, DImode and TImode */ 775 3, /* MMX or SSE register to integer */ 776 /* On K8 777 MOVD reg64, xmmreg Double FSTORE 4 778 MOVD reg32, xmmreg Double FSTORE 4 779 On AMDFAM10 780 MOVD reg64, xmmreg Double FADD 3 781 1/1 1/1 782 MOVD reg32, xmmreg Double FADD 3 783 1/1 1/1 */ 784 64, /* size of l1 cache. */ 785 512, /* size of l2 cache. */ 786 64, /* size of prefetch block */ 787 /* New AMD processors never drop prefetches; if they cannot be performed 788 immediately, they are queued. We set number of simultaneous prefetches 789 to a large constant to reflect this (it probably is not a good idea not 790 to limit number of prefetches at all, as their execution also takes some 791 time). */ 792 100, /* number of parallel prefetches */ 793 2, /* Branch cost */ 794 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */ 795 COSTS_N_INSNS (4), /* cost of FMUL instruction. */ 796 COSTS_N_INSNS (19), /* cost of FDIV instruction. */ 797 COSTS_N_INSNS (2), /* cost of FABS instruction. */ 798 COSTS_N_INSNS (2), /* cost of FCHS instruction. */ 799 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */ 800 801 /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for 802 very small blocks it is better to use loop. For large blocks, libcall can 803 do nontemporary accesses and beat inline considerably. */ 804 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}}, 805 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}}, 806 {{libcall, {{8, loop}, {24, unrolled_loop}, 807 {2048, rep_prefix_4_byte}, {-1, libcall}}}, 808 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}}, 809 4, /* scalar_stmt_cost. */ 810 2, /* scalar load_cost. */ 811 2, /* scalar_store_cost. */ 812 6, /* vec_stmt_cost. */ 813 0, /* vec_to_scalar_cost. */ 814 2, /* scalar_to_vec_cost. */ 815 2, /* vec_align_load_cost. */ 816 2, /* vec_unalign_load_cost. */ 817 2, /* vec_store_cost. */ 818 2, /* cond_taken_branch_cost. */ 819 1, /* cond_not_taken_branch_cost. */ 820 }; 821 822 static const 823 struct processor_costs pentium4_cost = { 824 COSTS_N_INSNS (1), /* cost of an add instruction */ 825 COSTS_N_INSNS (3), /* cost of a lea instruction */ 826 COSTS_N_INSNS (4), /* variable shift costs */ 827 COSTS_N_INSNS (4), /* constant shift costs */ 828 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */ 829 COSTS_N_INSNS (15), /* HI */ 830 COSTS_N_INSNS (15), /* SI */ 831 COSTS_N_INSNS (15), /* DI */ 832 COSTS_N_INSNS (15)}, /* other */ 833 0, /* cost of multiply per each bit set */ 834 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */ 835 COSTS_N_INSNS (56), /* HI */ 836 COSTS_N_INSNS (56), /* SI */ 837 COSTS_N_INSNS (56), /* DI */ 838 COSTS_N_INSNS (56)}, /* other */ 839 COSTS_N_INSNS (1), /* cost of movsx */ 840 COSTS_N_INSNS (1), /* cost of movzx */ 841 16, /* "large" insn */ 842 6, /* MOVE_RATIO */ 843 2, /* cost for loading QImode using movzbl */ 844 {4, 5, 4}, /* cost of loading integer registers 845 in QImode, HImode and SImode. 846 Relative to reg-reg move (2). */ 847 {2, 3, 2}, /* cost of storing integer registers */ 848 2, /* cost of reg,reg fld/fst */ 849 {2, 2, 6}, /* cost of loading fp registers 850 in SFmode, DFmode and XFmode */ 851 {4, 4, 6}, /* cost of storing fp registers 852 in SFmode, DFmode and XFmode */ 853 2, /* cost of moving MMX register */ 854 {2, 2}, /* cost of loading MMX registers 855 in SImode and DImode */ 856 {2, 2}, /* cost of storing MMX registers 857 in SImode and DImode */ 858 12, /* cost of moving SSE register */ 859 {12, 12, 12}, /* cost of loading SSE registers 860 in SImode, DImode and TImode */ 861 {2, 2, 8}, /* cost of storing SSE registers 862 in SImode, DImode and TImode */ 863 10, /* MMX or SSE register to integer */ 864 8, /* size of l1 cache. */ 865 256, /* size of l2 cache. */ 866 64, /* size of prefetch block */ 867 6, /* number of parallel prefetches */ 868 2, /* Branch cost */ 869 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */ 870 COSTS_N_INSNS (7), /* cost of FMUL instruction. */ 871 COSTS_N_INSNS (43), /* cost of FDIV instruction. */ 872 COSTS_N_INSNS (2), /* cost of FABS instruction. */ 873 COSTS_N_INSNS (2), /* cost of FCHS instruction. */ 874 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */ 875 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}}, 876 DUMMY_STRINGOP_ALGS}, 877 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte}, 878 {-1, libcall}}}, 879 DUMMY_STRINGOP_ALGS}, 880 1, /* scalar_stmt_cost. */ 881 1, /* scalar load_cost. */ 882 1, /* scalar_store_cost. */ 883 1, /* vec_stmt_cost. */ 884 1, /* vec_to_scalar_cost. */ 885 1, /* scalar_to_vec_cost. */ 886 1, /* vec_align_load_cost. */ 887 2, /* vec_unalign_load_cost. */ 888 1, /* vec_store_cost. */ 889 3, /* cond_taken_branch_cost. */ 890 1, /* cond_not_taken_branch_cost. */ 891 }; 892 893 static const 894 struct processor_costs nocona_cost = { 895 COSTS_N_INSNS (1), /* cost of an add instruction */ 896 COSTS_N_INSNS (1), /* cost of a lea instruction */ 897 COSTS_N_INSNS (1), /* variable shift costs */ 898 COSTS_N_INSNS (1), /* constant shift costs */ 899 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */ 900 COSTS_N_INSNS (10), /* HI */ 901 COSTS_N_INSNS (10), /* SI */ 902 COSTS_N_INSNS (10), /* DI */ 903 COSTS_N_INSNS (10)}, /* other */ 904 0, /* cost of multiply per each bit set */ 905 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */ 906 COSTS_N_INSNS (66), /* HI */ 907 COSTS_N_INSNS (66), /* SI */ 908 COSTS_N_INSNS (66), /* DI */ 909 COSTS_N_INSNS (66)}, /* other */ 910 COSTS_N_INSNS (1), /* cost of movsx */ 911 COSTS_N_INSNS (1), /* cost of movzx */ 912 16, /* "large" insn */ 913 17, /* MOVE_RATIO */ 914 4, /* cost for loading QImode using movzbl */ 915 {4, 4, 4}, /* cost of loading integer registers 916 in QImode, HImode and SImode. 917 Relative to reg-reg move (2). */ 918 {4, 4, 4}, /* cost of storing integer registers */ 919 3, /* cost of reg,reg fld/fst */ 920 {12, 12, 12}, /* cost of loading fp registers 921 in SFmode, DFmode and XFmode */ 922 {4, 4, 4}, /* cost of storing fp registers 923 in SFmode, DFmode and XFmode */ 924 6, /* cost of moving MMX register */ 925 {12, 12}, /* cost of loading MMX registers 926 in SImode and DImode */ 927 {12, 12}, /* cost of storing MMX registers 928 in SImode and DImode */ 929 6, /* cost of moving SSE register */ 930 {12, 12, 12}, /* cost of loading SSE registers 931 in SImode, DImode and TImode */ 932 {12, 12, 12}, /* cost of storing SSE registers 933 in SImode, DImode and TImode */ 934 8, /* MMX or SSE register to integer */ 935 8, /* size of l1 cache. */ 936 1024, /* size of l2 cache. */ 937 128, /* size of prefetch block */ 938 8, /* number of parallel prefetches */ 939 1, /* Branch cost */ 940 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */ 941 COSTS_N_INSNS (8), /* cost of FMUL instruction. */ 942 COSTS_N_INSNS (40), /* cost of FDIV instruction. */ 943 COSTS_N_INSNS (3), /* cost of FABS instruction. */ 944 COSTS_N_INSNS (3), /* cost of FCHS instruction. */ 945 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */ 946 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}}, 947 {libcall, {{32, loop}, {20000, rep_prefix_8_byte}, 948 {100000, unrolled_loop}, {-1, libcall}}}}, 949 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte}, 950 {-1, libcall}}}, 951 {libcall, {{24, loop}, {64, unrolled_loop}, 952 {8192, rep_prefix_8_byte}, {-1, libcall}}}}, 953 1, /* scalar_stmt_cost. */ 954 1, /* scalar load_cost. */ 955 1, /* scalar_store_cost. */ 956 1, /* vec_stmt_cost. */ 957 1, /* vec_to_scalar_cost. */ 958 1, /* scalar_to_vec_cost. */ 959 1, /* vec_align_load_cost. */ 960 2, /* vec_unalign_load_cost. */ 961 1, /* vec_store_cost. */ 962 3, /* cond_taken_branch_cost. */ 963 1, /* cond_not_taken_branch_cost. */ 964 }; 965 966 static const 967 struct processor_costs core2_cost = { 968 COSTS_N_INSNS (1), /* cost of an add instruction */ 969 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */ 970 COSTS_N_INSNS (1), /* variable shift costs */ 971 COSTS_N_INSNS (1), /* constant shift costs */ 972 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */ 973 COSTS_N_INSNS (3), /* HI */ 974 COSTS_N_INSNS (3), /* SI */ 975 COSTS_N_INSNS (3), /* DI */ 976 COSTS_N_INSNS (3)}, /* other */ 977 0, /* cost of multiply per each bit set */ 978 {COSTS_N_INSNS (22), /* cost of a divide/mod for QI */ 979 COSTS_N_INSNS (22), /* HI */ 980 COSTS_N_INSNS (22), /* SI */ 981 COSTS_N_INSNS (22), /* DI */ 982 COSTS_N_INSNS (22)}, /* other */ 983 COSTS_N_INSNS (1), /* cost of movsx */ 984 COSTS_N_INSNS (1), /* cost of movzx */ 985 8, /* "large" insn */ 986 16, /* MOVE_RATIO */ 987 2, /* cost for loading QImode using movzbl */ 988 {6, 6, 6}, /* cost of loading integer registers 989 in QImode, HImode and SImode. 990 Relative to reg-reg move (2). */ 991 {4, 4, 4}, /* cost of storing integer registers */ 992 2, /* cost of reg,reg fld/fst */ 993 {6, 6, 6}, /* cost of loading fp registers 994 in SFmode, DFmode and XFmode */ 995 {4, 4, 4}, /* cost of storing fp registers 996 in SFmode, DFmode and XFmode */ 997 2, /* cost of moving MMX register */ 998 {6, 6}, /* cost of loading MMX registers 999 in SImode and DImode */ 1000 {4, 4}, /* cost of storing MMX registers 1001 in SImode and DImode */ 1002 2, /* cost of moving SSE register */ 1003 {6, 6, 6}, /* cost of loading SSE registers 1004 in SImode, DImode and TImode */ 1005 {4, 4, 4}, /* cost of storing SSE registers 1006 in SImode, DImode and TImode */ 1007 2, /* MMX or SSE register to integer */ 1008 32, /* size of l1 cache. */ 1009 2048, /* size of l2 cache. */ 1010 128, /* size of prefetch block */ 1011 8, /* number of parallel prefetches */ 1012 3, /* Branch cost */ 1013 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */ 1014 COSTS_N_INSNS (5), /* cost of FMUL instruction. */ 1015 COSTS_N_INSNS (32), /* cost of FDIV instruction. */ 1016 COSTS_N_INSNS (1), /* cost of FABS instruction. */ 1017 COSTS_N_INSNS (1), /* cost of FCHS instruction. */ 1018 COSTS_N_INSNS (58), /* cost of FSQRT instruction. */ 1019 {{libcall, {{11, loop}, {-1, rep_prefix_4_byte}}}, 1020 {libcall, {{32, loop}, {64, rep_prefix_4_byte}, 1021 {8192, rep_prefix_8_byte}, {-1, libcall}}}}, 1022 {{libcall, {{8, loop}, {15, unrolled_loop}, 1023 {2048, rep_prefix_4_byte}, {-1, libcall}}}, 1024 {libcall, {{24, loop}, {32, unrolled_loop}, 1025 {8192, rep_prefix_8_byte}, {-1, libcall}}}}, 1026 1, /* scalar_stmt_cost. */ 1027 1, /* scalar load_cost. */ 1028 1, /* scalar_store_cost. */ 1029 1, /* vec_stmt_cost. */ 1030 1, /* vec_to_scalar_cost. */ 1031 1, /* scalar_to_vec_cost. */ 1032 1, /* vec_align_load_cost. */ 1033 2, /* vec_unalign_load_cost. */ 1034 1, /* vec_store_cost. */ 1035 3, /* cond_taken_branch_cost. */ 1036 1, /* cond_not_taken_branch_cost. */ 1037 }; 1038 1039 /* Generic64 should produce code tuned for Nocona and K8. */ 1040 static const 1041 struct processor_costs generic64_cost = { 1042 COSTS_N_INSNS (1), /* cost of an add instruction */ 1043 /* On all chips taken into consideration lea is 2 cycles and more. With 1044 this cost however our current implementation of synth_mult results in 1045 use of unnecessary temporary registers causing regression on several 1046 SPECfp benchmarks. */ 1047 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */ 1048 COSTS_N_INSNS (1), /* variable shift costs */ 1049 COSTS_N_INSNS (1), /* constant shift costs */ 1050 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */ 1051 COSTS_N_INSNS (4), /* HI */ 1052 COSTS_N_INSNS (3), /* SI */ 1053 COSTS_N_INSNS (4), /* DI */ 1054 COSTS_N_INSNS (2)}, /* other */ 1055 0, /* cost of multiply per each bit set */ 1056 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */ 1057 COSTS_N_INSNS (26), /* HI */ 1058 COSTS_N_INSNS (42), /* SI */ 1059 COSTS_N_INSNS (74), /* DI */ 1060 COSTS_N_INSNS (74)}, /* other */ 1061 COSTS_N_INSNS (1), /* cost of movsx */ 1062 COSTS_N_INSNS (1), /* cost of movzx */ 1063 8, /* "large" insn */ 1064 17, /* MOVE_RATIO */ 1065 4, /* cost for loading QImode using movzbl */ 1066 {4, 4, 4}, /* cost of loading integer registers 1067 in QImode, HImode and SImode. 1068 Relative to reg-reg move (2). */ 1069 {4, 4, 4}, /* cost of storing integer registers */ 1070 4, /* cost of reg,reg fld/fst */ 1071 {12, 12, 12}, /* cost of loading fp registers 1072 in SFmode, DFmode and XFmode */ 1073 {6, 6, 8}, /* cost of storing fp registers 1074 in SFmode, DFmode and XFmode */ 1075 2, /* cost of moving MMX register */ 1076 {8, 8}, /* cost of loading MMX registers 1077 in SImode and DImode */ 1078 {8, 8}, /* cost of storing MMX registers 1079 in SImode and DImode */ 1080 2, /* cost of moving SSE register */ 1081 {8, 8, 8}, /* cost of loading SSE registers 1082 in SImode, DImode and TImode */ 1083 {8, 8, 8}, /* cost of storing SSE registers 1084 in SImode, DImode and TImode */ 1085 5, /* MMX or SSE register to integer */ 1086 32, /* size of l1 cache. */ 1087 512, /* size of l2 cache. */ 1088 64, /* size of prefetch block */ 1089 6, /* number of parallel prefetches */ 1090 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this value 1091 is increased to perhaps more appropriate value of 5. */ 1092 3, /* Branch cost */ 1093 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */ 1094 COSTS_N_INSNS (8), /* cost of FMUL instruction. */ 1095 COSTS_N_INSNS (20), /* cost of FDIV instruction. */ 1096 COSTS_N_INSNS (8), /* cost of FABS instruction. */ 1097 COSTS_N_INSNS (8), /* cost of FCHS instruction. */ 1098 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */ 1099 {DUMMY_STRINGOP_ALGS, 1100 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}}, 1101 {DUMMY_STRINGOP_ALGS, 1102 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}}, 1103 1, /* scalar_stmt_cost. */ 1104 1, /* scalar load_cost. */ 1105 1, /* scalar_store_cost. */ 1106 1, /* vec_stmt_cost. */ 1107 1, /* vec_to_scalar_cost. */ 1108 1, /* scalar_to_vec_cost. */ 1109 1, /* vec_align_load_cost. */ 1110 2, /* vec_unalign_load_cost. */ 1111 1, /* vec_store_cost. */ 1112 3, /* cond_taken_branch_cost. */ 1113 1, /* cond_not_taken_branch_cost. */ 1114 }; 1115 1116 /* Generic32 should produce code tuned for Athlon, PPro, Pentium4, Nocona and K8. */ 1117 static const 1118 struct processor_costs generic32_cost = { 1119 COSTS_N_INSNS (1), /* cost of an add instruction */ 1120 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */ 1121 COSTS_N_INSNS (1), /* variable shift costs */ 1122 COSTS_N_INSNS (1), /* constant shift costs */ 1123 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */ 1124 COSTS_N_INSNS (4), /* HI */ 1125 COSTS_N_INSNS (3), /* SI */ 1126 COSTS_N_INSNS (4), /* DI */ 1127 COSTS_N_INSNS (2)}, /* other */ 1128 0, /* cost of multiply per each bit set */ 1129 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */ 1130 COSTS_N_INSNS (26), /* HI */ 1131 COSTS_N_INSNS (42), /* SI */ 1132 COSTS_N_INSNS (74), /* DI */ 1133 COSTS_N_INSNS (74)}, /* other */ 1134 COSTS_N_INSNS (1), /* cost of movsx */ 1135 COSTS_N_INSNS (1), /* cost of movzx */ 1136 8, /* "large" insn */ 1137 17, /* MOVE_RATIO */ 1138 4, /* cost for loading QImode using movzbl */ 1139 {4, 4, 4}, /* cost of loading integer registers 1140 in QImode, HImode and SImode. 1141 Relative to reg-reg move (2). */ 1142 {4, 4, 4}, /* cost of storing integer registers */ 1143 4, /* cost of reg,reg fld/fst */ 1144 {12, 12, 12}, /* cost of loading fp registers 1145 in SFmode, DFmode and XFmode */ 1146 {6, 6, 8}, /* cost of storing fp registers 1147 in SFmode, DFmode and XFmode */ 1148 2, /* cost of moving MMX register */ 1149 {8, 8}, /* cost of loading MMX registers 1150 in SImode and DImode */ 1151 {8, 8}, /* cost of storing MMX registers 1152 in SImode and DImode */ 1153 2, /* cost of moving SSE register */ 1154 {8, 8, 8}, /* cost of loading SSE registers 1155 in SImode, DImode and TImode */ 1156 {8, 8, 8}, /* cost of storing SSE registers 1157 in SImode, DImode and TImode */ 1158 5, /* MMX or SSE register to integer */ 1159 32, /* size of l1 cache. */ 1160 256, /* size of l2 cache. */ 1161 64, /* size of prefetch block */ 1162 6, /* number of parallel prefetches */ 1163 3, /* Branch cost */ 1164 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */ 1165 COSTS_N_INSNS (8), /* cost of FMUL instruction. */ 1166 COSTS_N_INSNS (20), /* cost of FDIV instruction. */ 1167 COSTS_N_INSNS (8), /* cost of FABS instruction. */ 1168 COSTS_N_INSNS (8), /* cost of FCHS instruction. */ 1169 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */ 1170 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}}, 1171 DUMMY_STRINGOP_ALGS}, 1172 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}}, 1173 DUMMY_STRINGOP_ALGS}, 1174 1, /* scalar_stmt_cost. */ 1175 1, /* scalar load_cost. */ 1176 1, /* scalar_store_cost. */ 1177 1, /* vec_stmt_cost. */ 1178 1, /* vec_to_scalar_cost. */ 1179 1, /* scalar_to_vec_cost. */ 1180 1, /* vec_align_load_cost. */ 1181 2, /* vec_unalign_load_cost. */ 1182 1, /* vec_store_cost. */ 1183 3, /* cond_taken_branch_cost. */ 1184 1, /* cond_not_taken_branch_cost. */ 1185 }; 1186 1187 const struct processor_costs *ix86_cost = &pentium_cost; 1188 1189 /* Processor feature/optimization bitmasks. */ 1190 #define m_386 (1<<PROCESSOR_I386) 1191 #define m_486 (1<<PROCESSOR_I486) 1192 #define m_PENT (1<<PROCESSOR_PENTIUM) 1193 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO) 1194 #define m_PENT4 (1<<PROCESSOR_PENTIUM4) 1195 #define m_NOCONA (1<<PROCESSOR_NOCONA) 1196 #define m_CORE2 (1<<PROCESSOR_CORE2) 1197 1198 #define m_GEODE (1<<PROCESSOR_GEODE) 1199 #define m_K6 (1<<PROCESSOR_K6) 1200 #define m_K6_GEODE (m_K6 | m_GEODE) 1201 #define m_K8 (1<<PROCESSOR_K8) 1202 #define m_ATHLON (1<<PROCESSOR_ATHLON) 1203 #define m_ATHLON_K8 (m_K8 | m_ATHLON) 1204 #define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10) 1205 #define m_AMD_MULTIPLE (m_K8 | m_ATHLON | m_AMDFAM10) 1206 1207 #define m_GENERIC32 (1<<PROCESSOR_GENERIC32) 1208 #define m_GENERIC64 (1<<PROCESSOR_GENERIC64) 1209 1210 /* Generic instruction choice should be common subset of supported CPUs 1211 (PPro/PENT4/NOCONA/CORE2/Athlon/K8). */ 1212 #define m_GENERIC (m_GENERIC32 | m_GENERIC64) 1213 1214 /* Feature tests against the various tunings. */ 1215 unsigned char ix86_tune_features[X86_TUNE_LAST]; 1216 1217 /* Feature tests against the various tunings used to create ix86_tune_features 1218 based on the processor mask. */ 1219 static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = { 1220 /* X86_TUNE_USE_LEAVE: Leave does not affect Nocona SPEC2000 results 1221 negatively, so enabling for Generic64 seems like good code size 1222 tradeoff. We can't enable it for 32bit generic because it does not 1223 work well with PPro base chips. */ 1224 m_386 | m_K6_GEODE | m_AMD_MULTIPLE | m_CORE2 | m_GENERIC64, 1225 1226 /* X86_TUNE_PUSH_MEMORY */ 1227 m_386 | m_K6_GEODE | m_AMD_MULTIPLE | m_PENT4 1228 | m_NOCONA | m_CORE2 | m_GENERIC, 1229 1230 /* X86_TUNE_ZERO_EXTEND_WITH_AND */ 1231 m_486 | m_PENT, 1232 1233 /* X86_TUNE_UNROLL_STRLEN */ 1234 m_486 | m_PENT | m_PPRO | m_AMD_MULTIPLE | m_K6 | m_CORE2 | m_GENERIC, 1235 1236 /* X86_TUNE_DEEP_BRANCH_PREDICTION */ 1237 m_PPRO | m_K6_GEODE | m_AMD_MULTIPLE | m_PENT4 | m_GENERIC, 1238 1239 /* X86_TUNE_BRANCH_PREDICTION_HINTS: Branch hints were put in P4 based 1240 on simulation result. But after P4 was made, no performance benefit 1241 was observed with branch hints. It also increases the code size. 1242 As a result, icc never generates branch hints. */ 1243 0, 1244 1245 /* X86_TUNE_DOUBLE_WITH_ADD */ 1246 ~m_386, 1247 1248 /* X86_TUNE_USE_SAHF */ 1249 m_PPRO | m_K6_GEODE | m_K8 | m_AMDFAM10 | m_PENT4 1250 | m_NOCONA | m_CORE2 | m_GENERIC, 1251 1252 /* X86_TUNE_MOVX: Enable to zero extend integer registers to avoid 1253 partial dependencies. */ 1254 m_AMD_MULTIPLE | m_PPRO | m_PENT4 | m_NOCONA 1255 | m_CORE2 | m_GENERIC | m_GEODE /* m_386 | m_K6 */, 1256 1257 /* X86_TUNE_PARTIAL_REG_STALL: We probably ought to watch for partial 1258 register stalls on Generic32 compilation setting as well. However 1259 in current implementation the partial register stalls are not eliminated 1260 very well - they can be introduced via subregs synthesized by combine 1261 and can happen in caller/callee saving sequences. Because this option 1262 pays back little on PPro based chips and is in conflict with partial reg 1263 dependencies used by Athlon/P4 based chips, it is better to leave it off 1264 for generic32 for now. */ 1265 m_PPRO, 1266 1267 /* X86_TUNE_PARTIAL_FLAG_REG_STALL */ 1268 m_CORE2 | m_GENERIC, 1269 1270 /* X86_TUNE_USE_HIMODE_FIOP */ 1271 m_386 | m_486 | m_K6_GEODE, 1272 1273 /* X86_TUNE_USE_SIMODE_FIOP */ 1274 ~(m_PPRO | m_AMD_MULTIPLE | m_PENT | m_CORE2 | m_GENERIC), 1275 1276 /* X86_TUNE_USE_MOV0 */ 1277 m_K6, 1278 1279 /* X86_TUNE_USE_CLTD */ 1280 ~(m_PENT | m_K6 | m_CORE2 | m_GENERIC), 1281 1282 /* X86_TUNE_USE_XCHGB: Use xchgb %rh,%rl instead of rolw/rorw $8,rx. */ 1283 m_PENT4, 1284 1285 /* X86_TUNE_SPLIT_LONG_MOVES */ 1286 m_PPRO, 1287 1288 /* X86_TUNE_READ_MODIFY_WRITE */ 1289 ~m_PENT, 1290 1291 /* X86_TUNE_READ_MODIFY */ 1292 ~(m_PENT | m_PPRO), 1293 1294 /* X86_TUNE_PROMOTE_QIMODE */ 1295 m_K6_GEODE | m_PENT | m_386 | m_486 | m_AMD_MULTIPLE | m_CORE2 1296 | m_GENERIC /* | m_PENT4 ? */, 1297 1298 /* X86_TUNE_FAST_PREFIX */ 1299 ~(m_PENT | m_486 | m_386), 1300 1301 /* X86_TUNE_SINGLE_STRINGOP */ 1302 m_386 | m_PENT4 | m_NOCONA, 1303 1304 /* X86_TUNE_QIMODE_MATH */ 1305 ~0, 1306 1307 /* X86_TUNE_HIMODE_MATH: On PPro this flag is meant to avoid partial 1308 register stalls. Just like X86_TUNE_PARTIAL_REG_STALL this option 1309 might be considered for Generic32 if our scheme for avoiding partial 1310 stalls was more effective. */ 1311 ~m_PPRO, 1312 1313 /* X86_TUNE_PROMOTE_QI_REGS */ 1314 0, 1315 1316 /* X86_TUNE_PROMOTE_HI_REGS */ 1317 m_PPRO, 1318 1319 /* X86_TUNE_ADD_ESP_4: Enable if add/sub is preferred over 1/2 push/pop. */ 1320 m_AMD_MULTIPLE | m_K6_GEODE | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC, 1321 1322 /* X86_TUNE_ADD_ESP_8 */ 1323 m_AMD_MULTIPLE | m_PPRO | m_K6_GEODE | m_386 1324 | m_486 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC, 1325 1326 /* X86_TUNE_SUB_ESP_4 */ 1327 m_AMD_MULTIPLE | m_PPRO | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC, 1328 1329 /* X86_TUNE_SUB_ESP_8 */ 1330 m_AMD_MULTIPLE | m_PPRO | m_386 | m_486 1331 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC, 1332 1333 /* X86_TUNE_INTEGER_DFMODE_MOVES: Enable if integer moves are preferred 1334 for DFmode copies */ 1335 ~(m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 1336 | m_GENERIC | m_GEODE), 1337 1338 /* X86_TUNE_PARTIAL_REG_DEPENDENCY */ 1339 m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC, 1340 1341 /* X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY: In the Generic model we have a 1342 conflict here in between PPro/Pentium4 based chips that thread 128bit 1343 SSE registers as single units versus K8 based chips that divide SSE 1344 registers to two 64bit halves. This knob promotes all store destinations 1345 to be 128bit to allow register renaming on 128bit SSE units, but usually 1346 results in one extra microop on 64bit SSE units. Experimental results 1347 shows that disabling this option on P4 brings over 20% SPECfp regression, 1348 while enabling it on K8 brings roughly 2.4% regression that can be partly 1349 masked by careful scheduling of moves. */ 1350 m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC | m_AMDFAM10, 1351 1352 /* X86_TUNE_SSE_UNALIGNED_MOVE_OPTIMAL */ 1353 m_AMDFAM10, 1354 1355 /* X86_TUNE_SSE_SPLIT_REGS: Set for machines where the type and dependencies 1356 are resolved on SSE register parts instead of whole registers, so we may 1357 maintain just lower part of scalar values in proper format leaving the 1358 upper part undefined. */ 1359 m_ATHLON_K8, 1360 1361 /* X86_TUNE_SSE_TYPELESS_STORES */ 1362 m_AMD_MULTIPLE, 1363 1364 /* X86_TUNE_SSE_LOAD0_BY_PXOR */ 1365 m_PPRO | m_PENT4 | m_NOCONA, 1366 1367 /* X86_TUNE_MEMORY_MISMATCH_STALL */ 1368 m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC, 1369 1370 /* X86_TUNE_PROLOGUE_USING_MOVE */ 1371 m_ATHLON_K8 | m_PPRO | m_CORE2 | m_GENERIC, 1372 1373 /* X86_TUNE_EPILOGUE_USING_MOVE */ 1374 m_ATHLON_K8 | m_PPRO | m_CORE2 | m_GENERIC, 1375 1376 /* X86_TUNE_SHIFT1 */ 1377 ~m_486, 1378 1379 /* X86_TUNE_USE_FFREEP */ 1380 m_AMD_MULTIPLE, 1381 1382 /* X86_TUNE_INTER_UNIT_MOVES */ 1383 ~(m_AMD_MULTIPLE | m_GENERIC), 1384 1385 /* X86_TUNE_INTER_UNIT_CONVERSIONS */ 1386 ~(m_AMDFAM10), 1387 1388 /* X86_TUNE_FOUR_JUMP_LIMIT: Some CPU cores are not able to predict more 1389 than 4 branch instructions in the 16 byte window. */ 1390 m_PPRO | m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC, 1391 1392 /* X86_TUNE_SCHEDULE */ 1393 m_PPRO | m_AMD_MULTIPLE | m_K6_GEODE | m_PENT | m_CORE2 | m_GENERIC, 1394 1395 /* X86_TUNE_USE_BT */ 1396 m_AMD_MULTIPLE | m_CORE2 | m_GENERIC, 1397 1398 /* X86_TUNE_USE_INCDEC */ 1399 ~(m_PENT4 | m_NOCONA | m_GENERIC), 1400 1401 /* X86_TUNE_PAD_RETURNS */ 1402 m_AMD_MULTIPLE | m_CORE2 | m_GENERIC, 1403 1404 /* X86_TUNE_EXT_80387_CONSTANTS */ 1405 m_K6_GEODE | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC, 1406 1407 /* X86_TUNE_SHORTEN_X87_SSE */ 1408 ~m_K8, 1409 1410 /* X86_TUNE_AVOID_VECTOR_DECODE */ 1411 m_K8 | m_GENERIC64, 1412 1413 /* X86_TUNE_PROMOTE_HIMODE_IMUL: Modern CPUs have same latency for HImode 1414 and SImode multiply, but 386 and 486 do HImode multiply faster. */ 1415 ~(m_386 | m_486), 1416 1417 /* X86_TUNE_SLOW_IMUL_IMM32_MEM: Imul of 32-bit constant and memory is 1418 vector path on AMD machines. */ 1419 m_K8 | m_GENERIC64 | m_AMDFAM10, 1420 1421 /* X86_TUNE_SLOW_IMUL_IMM8: Imul of 8-bit constant is vector path on AMD 1422 machines. */ 1423 m_K8 | m_GENERIC64 | m_AMDFAM10, 1424 1425 /* X86_TUNE_MOVE_M1_VIA_OR: On pentiums, it is faster to load -1 via OR 1426 than a MOV. */ 1427 m_PENT, 1428 1429 /* X86_TUNE_NOT_UNPAIRABLE: NOT is not pairable on Pentium, while XOR is, 1430 but one byte longer. */ 1431 m_PENT, 1432 1433 /* X86_TUNE_NOT_VECTORMODE: On AMD K6, NOT is vector decoded with memory 1434 operand that cannot be represented using a modRM byte. The XOR 1435 replacement is long decoded, so this split helps here as well. */ 1436 m_K6, 1437 1438 /* X86_TUNE_USE_VECTOR_FP_CONVERTS: Prefer vector packed SSE conversion 1439 from FP to FP. */ 1440 m_AMDFAM10 | m_GENERIC, 1441 1442 /* X86_TUNE_USE_VECTOR_CONVERTS: Prefer vector packed SSE conversion 1443 from integer to FP. */ 1444 m_AMDFAM10, 1445 1446 /* X86_TUNE_FUSE_CMP_AND_BRANCH: Fuse a compare or test instruction 1447 with a subsequent conditional jump instruction into a single 1448 compare-and-branch uop. */ 1449 m_CORE2, 1450 }; 1451 1452 /* Feature tests against the various architecture variations. */ 1453 unsigned char ix86_arch_features[X86_ARCH_LAST]; 1454 1455 /* Feature tests against the various architecture variations, used to create 1456 ix86_arch_features based on the processor mask. */ 1457 static unsigned int initial_ix86_arch_features[X86_ARCH_LAST] = { 1458 /* X86_ARCH_CMOVE: Conditional move was added for pentiumpro. */ 1459 ~(m_386 | m_486 | m_PENT | m_K6), 1460 1461 /* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486. */ 1462 ~m_386, 1463 1464 /* X86_ARCH_CMPXCHG8B: Compare and exchange 8 bytes was added for pentium. */ 1465 ~(m_386 | m_486), 1466 1467 /* X86_ARCH_XADD: Exchange and add was added for 80486. */ 1468 ~m_386, 1469 1470 /* X86_ARCH_BSWAP: Byteswap was added for 80486. */ 1471 ~m_386, 1472 }; 1473 1474 static const unsigned int x86_accumulate_outgoing_args 1475 = m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC; 1476 1477 static const unsigned int x86_arch_always_fancy_math_387 1478 = m_PENT | m_PPRO | m_AMD_MULTIPLE | m_PENT4 1479 | m_NOCONA | m_CORE2 | m_GENERIC; 1480 1481 static enum stringop_alg stringop_alg = no_stringop; 1482 1483 /* In case the average insn count for single function invocation is 1484 lower than this constant, emit fast (but longer) prologue and 1485 epilogue code. */ 1486 #define FAST_PROLOGUE_INSN_COUNT 20 1487 1488 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */ 1489 static const char *const qi_reg_name[] = QI_REGISTER_NAMES; 1490 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES; 1491 static const char *const hi_reg_name[] = HI_REGISTER_NAMES; 1492 1493 /* Array of the smallest class containing reg number REGNO, indexed by 1494 REGNO. Used by REGNO_REG_CLASS in i386.h. */ 1495 1496 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] = 1497 { 1498 /* ax, dx, cx, bx */ 1499 AREG, DREG, CREG, BREG, 1500 /* si, di, bp, sp */ 1501 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS, 1502 /* FP registers */ 1503 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS, 1504 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, 1505 /* arg pointer */ 1506 NON_Q_REGS, 1507 /* flags, fpsr, fpcr, frame */ 1508 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS, 1509 /* SSE registers */ 1510 SSE_FIRST_REG, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, 1511 SSE_REGS, SSE_REGS, 1512 /* MMX registers */ 1513 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, 1514 MMX_REGS, MMX_REGS, 1515 /* REX registers */ 1516 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, 1517 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, 1518 /* SSE REX registers */ 1519 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, 1520 SSE_REGS, SSE_REGS, 1521 }; 1522 1523 /* The "default" register map used in 32bit mode. */ 1524 1525 int const dbx_register_map[FIRST_PSEUDO_REGISTER] = 1526 { 1527 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */ 1528 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */ 1529 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */ 1530 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */ 1531 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */ 1532 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */ 1533 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */ 1534 }; 1535 1536 /* The "default" register map used in 64bit mode. */ 1537 1538 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] = 1539 { 1540 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */ 1541 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */ 1542 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */ 1543 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */ 1544 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */ 1545 8,9,10,11,12,13,14,15, /* extended integer registers */ 1546 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */ 1547 }; 1548 1549 /* Define the register numbers to be used in Dwarf debugging information. 1550 The SVR4 reference port C compiler uses the following register numbers 1551 in its Dwarf output code: 1552 0 for %eax (gcc regno = 0) 1553 1 for %ecx (gcc regno = 2) 1554 2 for %edx (gcc regno = 1) 1555 3 for %ebx (gcc regno = 3) 1556 4 for %esp (gcc regno = 7) 1557 5 for %ebp (gcc regno = 6) 1558 6 for %esi (gcc regno = 4) 1559 7 for %edi (gcc regno = 5) 1560 The following three DWARF register numbers are never generated by 1561 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4 1562 believes these numbers have these meanings. 1563 8 for %eip (no gcc equivalent) 1564 9 for %eflags (gcc regno = 17) 1565 10 for %trapno (no gcc equivalent) 1566 It is not at all clear how we should number the FP stack registers 1567 for the x86 architecture. If the version of SDB on x86/svr4 were 1568 a bit less brain dead with respect to floating-point then we would 1569 have a precedent to follow with respect to DWARF register numbers 1570 for x86 FP registers, but the SDB on x86/svr4 is so completely 1571 broken with respect to FP registers that it is hardly worth thinking 1572 of it as something to strive for compatibility with. 1573 The version of x86/svr4 SDB I have at the moment does (partially) 1574 seem to believe that DWARF register number 11 is associated with 1575 the x86 register %st(0), but that's about all. Higher DWARF 1576 register numbers don't seem to be associated with anything in 1577 particular, and even for DWARF regno 11, SDB only seems to under- 1578 stand that it should say that a variable lives in %st(0) (when 1579 asked via an `=' command) if we said it was in DWARF regno 11, 1580 but SDB still prints garbage when asked for the value of the 1581 variable in question (via a `/' command). 1582 (Also note that the labels SDB prints for various FP stack regs 1583 when doing an `x' command are all wrong.) 1584 Note that these problems generally don't affect the native SVR4 1585 C compiler because it doesn't allow the use of -O with -g and 1586 because when it is *not* optimizing, it allocates a memory 1587 location for each floating-point variable, and the memory 1588 location is what gets described in the DWARF AT_location 1589 attribute for the variable in question. 1590 Regardless of the severe mental illness of the x86/svr4 SDB, we 1591 do something sensible here and we use the following DWARF 1592 register numbers. Note that these are all stack-top-relative 1593 numbers. 1594 11 for %st(0) (gcc regno = 8) 1595 12 for %st(1) (gcc regno = 9) 1596 13 for %st(2) (gcc regno = 10) 1597 14 for %st(3) (gcc regno = 11) 1598 15 for %st(4) (gcc regno = 12) 1599 16 for %st(5) (gcc regno = 13) 1600 17 for %st(6) (gcc regno = 14) 1601 18 for %st(7) (gcc regno = 15) 1602 */ 1603 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] = 1604 { 1605 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */ 1606 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */ 1607 -1, 9, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */ 1608 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */ 1609 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */ 1610 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */ 1611 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */ 1612 }; 1613 1614 /* Test and compare insns in i386.md store the information needed to 1615 generate branch and scc insns here. */ 1616 1617 rtx ix86_compare_op0 = NULL_RTX; 1618 rtx ix86_compare_op1 = NULL_RTX; 1619 rtx ix86_compare_emitted = NULL_RTX; 1620 1621 /* Define parameter passing and return registers. */ 1622 1623 static int const x86_64_int_parameter_registers[6] = 1624 { 1625 DI_REG, SI_REG, DX_REG, CX_REG, R8_REG, R9_REG 1626 }; 1627 1628 static int const x86_64_ms_abi_int_parameter_registers[4] = 1629 { 1630 CX_REG, DX_REG, R8_REG, R9_REG 1631 }; 1632 1633 static int const x86_64_int_return_registers[4] = 1634 { 1635 AX_REG, DX_REG, DI_REG, SI_REG 1636 }; 1637 1638 /* Define the structure for the machine field in struct function. */ 1639 1640 struct stack_local_entry GTY(()) 1641 { 1642 unsigned short mode; 1643 unsigned short n; 1644 rtx rtl; 1645 struct stack_local_entry *next; 1646 }; 1647 1648 /* Structure describing stack frame layout. 1649 Stack grows downward: 1650 1651 [arguments] 1652 <- ARG_POINTER 1653 saved pc 1654 1655 saved frame pointer if frame_pointer_needed 1656 <- HARD_FRAME_POINTER 1657 [-msave-args] 1658 1659 [padding0] 1660 1661 [saved regs] 1662 1663 [padding05] 1664 1665 [saved SSE regs] 1666 1667 [padding1] \ 1668 ) 1669 [va_arg registers] ( 1670 > to_allocate <- FRAME_POINTER 1671 [frame] ( 1672 ) 1673 [padding2] / 1674 */ 1675 struct ix86_frame 1676 { 1677 int nmsave_args; 1678 int padding0; 1679 int nsseregs; 1680 int padding05; 1681 int nregs; 1682 int padding1; 1683 int va_arg_size; 1684 HOST_WIDE_INT frame; 1685 int padding2; 1686 int outgoing_arguments_size; 1687 int red_zone_size; 1688 1689 HOST_WIDE_INT to_allocate; 1690 /* The offsets relative to ARG_POINTER. */ 1691 HOST_WIDE_INT frame_pointer_offset; 1692 HOST_WIDE_INT hard_frame_pointer_offset; 1693 HOST_WIDE_INT stack_pointer_offset; 1694 1695 /* When save_regs_using_mov is set, emit prologue using 1696 move instead of push instructions. */ 1697 bool save_regs_using_mov; 1698 }; 1699 1700 /* Code model option. */ 1701 enum cmodel ix86_cmodel; 1702 /* Asm dialect. */ 1703 enum asm_dialect ix86_asm_dialect = ASM_ATT; 1704 /* TLS dialects. */ 1705 enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU; 1706 1707 /* Which unit we are generating floating point math for. */ 1708 enum fpmath_unit ix86_fpmath; 1709 1710 /* Which cpu are we scheduling for. */ 1711 enum attr_cpu ix86_schedule; 1712 1713 /* Which cpu are we optimizing for. */ 1714 enum processor_type ix86_tune; 1715 1716 /* Which instruction set architecture to use. */ 1717 enum processor_type ix86_arch; 1718 1719 /* true if sse prefetch instruction is not NOOP. */ 1720 int x86_prefetch_sse; 1721 1722 /* ix86_regparm_string as a number */ 1723 static int ix86_regparm; 1724 1725 /* -mstackrealign option */ 1726 extern int ix86_force_align_arg_pointer; 1727 static const char ix86_force_align_arg_pointer_string[] 1728 = "force_align_arg_pointer"; 1729 1730 static rtx (*ix86_gen_leave) (void); 1731 static rtx (*ix86_gen_pop1) (rtx); 1732 static rtx (*ix86_gen_add3) (rtx, rtx, rtx); 1733 static rtx (*ix86_gen_sub3) (rtx, rtx, rtx); 1734 static rtx (*ix86_gen_sub3_carry) (rtx, rtx, rtx, rtx); 1735 static rtx (*ix86_gen_one_cmpl2) (rtx, rtx); 1736 static rtx (*ix86_gen_monitor) (rtx, rtx, rtx); 1737 static rtx (*ix86_gen_andsp) (rtx, rtx, rtx); 1738 1739 /* Preferred alignment for stack boundary in bits. */ 1740 unsigned int ix86_preferred_stack_boundary; 1741 1742 /* Alignment for incoming stack boundary in bits specified at 1743 command line. */ 1744 static unsigned int ix86_user_incoming_stack_boundary; 1745 1746 /* Default alignment for incoming stack boundary in bits. */ 1747 static unsigned int ix86_default_incoming_stack_boundary; 1748 1749 /* Alignment for incoming stack boundary in bits. */ 1750 unsigned int ix86_incoming_stack_boundary; 1751 1752 /* Values 1-5: see jump.c */ 1753 int ix86_branch_cost; 1754 1755 /* Calling abi specific va_list type nodes. */ 1756 static GTY(()) tree sysv_va_list_type_node; 1757 static GTY(()) tree ms_va_list_type_node; 1758 1759 /* Variables which are this size or smaller are put in the data/bss 1760 or ldata/lbss sections. */ 1761 1762 int ix86_section_threshold = 65536; 1763 1764 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */ 1765 char internal_label_prefix[16]; 1766 int internal_label_prefix_len; 1767 1768 /* Fence to use after loop using movnt. */ 1769 tree x86_mfence; 1770 1771 static int ix86_nsaved_args (void); 1772 1773 /* Register class used for passing given 64bit part of the argument. 1774 These represent classes as documented by the PS ABI, with the exception 1775 of SSESF, SSEDF classes, that are basically SSE class, just gcc will 1776 use SF or DFmode move instead of DImode to avoid reformatting penalties. 1777 1778 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves 1779 whenever possible (upper half does contain padding). */ 1780 enum x86_64_reg_class 1781 { 1782 X86_64_NO_CLASS, 1783 X86_64_INTEGER_CLASS, 1784 X86_64_INTEGERSI_CLASS, 1785 X86_64_SSE_CLASS, 1786 X86_64_SSESF_CLASS, 1787 X86_64_SSEDF_CLASS, 1788 X86_64_SSEUP_CLASS, 1789 X86_64_X87_CLASS, 1790 X86_64_X87UP_CLASS, 1791 X86_64_COMPLEX_X87_CLASS, 1792 X86_64_MEMORY_CLASS 1793 }; 1794 1795 #define MAX_CLASSES 4 1796 1797 /* Table of constants used by fldpi, fldln2, etc.... */ 1798 static REAL_VALUE_TYPE ext_80387_constants_table [5]; 1799 static bool ext_80387_constants_init = 0; 1800 1801 1802 static struct machine_function * ix86_init_machine_status (void); 1803 static rtx ix86_function_value (const_tree, const_tree, bool); 1804 static int ix86_function_regparm (const_tree, const_tree); 1805 static void ix86_compute_frame_layout (struct ix86_frame *); 1806 static bool ix86_expand_vector_init_one_nonzero (bool, enum machine_mode, 1807 rtx, rtx, int); 1808 static void ix86_add_new_builtins (int); 1809 1810 enum ix86_function_specific_strings 1811 { 1812 IX86_FUNCTION_SPECIFIC_ARCH, 1813 IX86_FUNCTION_SPECIFIC_TUNE, 1814 IX86_FUNCTION_SPECIFIC_FPMATH, 1815 IX86_FUNCTION_SPECIFIC_MAX 1816 }; 1817 1818 static char *ix86_target_string (int, int, const char *, const char *, 1819 const char *, bool); 1820 static void ix86_debug_options (void) ATTRIBUTE_UNUSED; 1821 static void ix86_function_specific_save (struct cl_target_option *); 1822 static void ix86_function_specific_restore (struct cl_target_option *); 1823 static void ix86_function_specific_print (FILE *, int, 1824 struct cl_target_option *); 1825 static bool ix86_valid_target_attribute_p (tree, tree, tree, int); 1826 static bool ix86_valid_target_attribute_inner_p (tree, char *[]); 1827 static bool ix86_can_inline_p (tree, tree); 1828 static void ix86_set_current_function (tree); 1829 1830 static void pro_epilogue_adjust_stack (rtx, rtx, rtx, int); 1831 1832 1833 /* The svr4 ABI for the i386 says that records and unions are returned 1834 in memory. */ 1835 #ifndef DEFAULT_PCC_STRUCT_RETURN 1836 #define DEFAULT_PCC_STRUCT_RETURN 1 1837 #endif 1838 1839 /* Whether -mtune= or -march= were specified */ 1840 static int ix86_tune_defaulted; 1841 static int ix86_arch_specified; 1842 1843 /* Bit flags that specify the ISA we are compiling for. */ 1844 int ix86_isa_flags = TARGET_64BIT_DEFAULT | TARGET_SUBTARGET_ISA_DEFAULT; 1845 1846 /* A mask of ix86_isa_flags that includes bit X if X 1847 was set or cleared on the command line. */ 1848 static int ix86_isa_flags_explicit; 1849 1850 /* Define a set of ISAs which are available when a given ISA is 1851 enabled. MMX and SSE ISAs are handled separately. */ 1852 1853 #define OPTION_MASK_ISA_MMX_SET OPTION_MASK_ISA_MMX 1854 #define OPTION_MASK_ISA_3DNOW_SET \ 1855 (OPTION_MASK_ISA_3DNOW | OPTION_MASK_ISA_MMX_SET) 1856 1857 #define OPTION_MASK_ISA_SSE_SET OPTION_MASK_ISA_SSE 1858 #define OPTION_MASK_ISA_SSE2_SET \ 1859 (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE_SET) 1860 #define OPTION_MASK_ISA_SSE3_SET \ 1861 (OPTION_MASK_ISA_SSE3 | OPTION_MASK_ISA_SSE2_SET) 1862 #define OPTION_MASK_ISA_SSSE3_SET \ 1863 (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_SSE3_SET) 1864 #define OPTION_MASK_ISA_SSE4_1_SET \ 1865 (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSSE3_SET) 1866 #define OPTION_MASK_ISA_SSE4_2_SET \ 1867 (OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_SSE4_1_SET) 1868 #define OPTION_MASK_ISA_AVX_SET \ 1869 (OPTION_MASK_ISA_AVX | OPTION_MASK_ISA_SSE4_2_SET) 1870 #define OPTION_MASK_ISA_FMA_SET \ 1871 (OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_AVX_SET) 1872 1873 /* SSE4 includes both SSE4.1 and SSE4.2. -msse4 should be the same 1874 as -msse4.2. */ 1875 #define OPTION_MASK_ISA_SSE4_SET OPTION_MASK_ISA_SSE4_2_SET 1876 1877 #define OPTION_MASK_ISA_SSE4A_SET \ 1878 (OPTION_MASK_ISA_SSE4A | OPTION_MASK_ISA_SSE3_SET) 1879 #define OPTION_MASK_ISA_SSE5_SET \ 1880 (OPTION_MASK_ISA_SSE5 | OPTION_MASK_ISA_SSE4A_SET) 1881 1882 /* AES and PCLMUL need SSE2 because they use xmm registers */ 1883 #define OPTION_MASK_ISA_AES_SET \ 1884 (OPTION_MASK_ISA_AES | OPTION_MASK_ISA_SSE2_SET) 1885 #define OPTION_MASK_ISA_PCLMUL_SET \ 1886 (OPTION_MASK_ISA_PCLMUL | OPTION_MASK_ISA_SSE2_SET) 1887 1888 #define OPTION_MASK_ISA_ABM_SET \ 1889 (OPTION_MASK_ISA_ABM | OPTION_MASK_ISA_POPCNT) 1890 #define OPTION_MASK_ISA_POPCNT_SET OPTION_MASK_ISA_POPCNT 1891 #define OPTION_MASK_ISA_CX16_SET OPTION_MASK_ISA_CX16 1892 #define OPTION_MASK_ISA_SAHF_SET OPTION_MASK_ISA_SAHF 1893 1894 /* Define a set of ISAs which aren't available when a given ISA is 1895 disabled. MMX and SSE ISAs are handled separately. */ 1896 1897 #define OPTION_MASK_ISA_MMX_UNSET \ 1898 (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_3DNOW_UNSET) 1899 #define OPTION_MASK_ISA_3DNOW_UNSET \ 1900 (OPTION_MASK_ISA_3DNOW | OPTION_MASK_ISA_3DNOW_A_UNSET) 1901 #define OPTION_MASK_ISA_3DNOW_A_UNSET OPTION_MASK_ISA_3DNOW_A 1902 1903 #define OPTION_MASK_ISA_SSE_UNSET \ 1904 (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_SSE2_UNSET) 1905 #define OPTION_MASK_ISA_SSE2_UNSET \ 1906 (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE3_UNSET) 1907 #define OPTION_MASK_ISA_SSE3_UNSET \ 1908 (OPTION_MASK_ISA_SSE3 \ 1909 | OPTION_MASK_ISA_SSSE3_UNSET \ 1910 | OPTION_MASK_ISA_SSE4A_UNSET ) 1911 #define OPTION_MASK_ISA_SSSE3_UNSET \ 1912 (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_SSE4_1_UNSET) 1913 #define OPTION_MASK_ISA_SSE4_1_UNSET \ 1914 (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSE4_2_UNSET) 1915 #define OPTION_MASK_ISA_SSE4_2_UNSET \ 1916 (OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_AVX_UNSET ) 1917 #define OPTION_MASK_ISA_AVX_UNSET \ 1918 (OPTION_MASK_ISA_AVX | OPTION_MASK_ISA_FMA_UNSET) 1919 #define OPTION_MASK_ISA_FMA_UNSET OPTION_MASK_ISA_FMA 1920 1921 /* SSE4 includes both SSE4.1 and SSE4.2. -mno-sse4 should the same 1922 as -mno-sse4.1. */ 1923 #define OPTION_MASK_ISA_SSE4_UNSET OPTION_MASK_ISA_SSE4_1_UNSET 1924 1925 #define OPTION_MASK_ISA_SSE4A_UNSET \ 1926 (OPTION_MASK_ISA_SSE4A | OPTION_MASK_ISA_SSE5_UNSET) 1927 #define OPTION_MASK_ISA_SSE5_UNSET OPTION_MASK_ISA_SSE5 1928 #define OPTION_MASK_ISA_AES_UNSET OPTION_MASK_ISA_AES 1929 #define OPTION_MASK_ISA_PCLMUL_UNSET OPTION_MASK_ISA_PCLMUL 1930 #define OPTION_MASK_ISA_ABM_UNSET OPTION_MASK_ISA_ABM 1931 #define OPTION_MASK_ISA_POPCNT_UNSET OPTION_MASK_ISA_POPCNT 1932 #define OPTION_MASK_ISA_CX16_UNSET OPTION_MASK_ISA_CX16 1933 #define OPTION_MASK_ISA_SAHF_UNSET OPTION_MASK_ISA_SAHF 1934 1935 /* Vectorization library interface and handlers. */ 1936 tree (*ix86_veclib_handler)(enum built_in_function, tree, tree) = NULL; 1937 static tree ix86_veclibabi_svml (enum built_in_function, tree, tree); 1938 static tree ix86_veclibabi_acml (enum built_in_function, tree, tree); 1939 1940 /* Processor target table, indexed by processor number */ 1941 struct ptt 1942 { 1943 const struct processor_costs *cost; /* Processor costs */ 1944 const int align_loop; /* Default alignments. */ 1945 const int align_loop_max_skip; 1946 const int align_jump; 1947 const int align_jump_max_skip; 1948 const int align_func; 1949 }; 1950 1951 static const struct ptt processor_target_table[PROCESSOR_max] = 1952 { 1953 {&i386_cost, 4, 3, 4, 3, 4}, 1954 {&i486_cost, 16, 15, 16, 15, 16}, 1955 {&pentium_cost, 16, 7, 16, 7, 16}, 1956 {&pentiumpro_cost, 16, 15, 16, 10, 16}, 1957 {&geode_cost, 0, 0, 0, 0, 0}, 1958 {&k6_cost, 32, 7, 32, 7, 32}, 1959 {&athlon_cost, 16, 7, 16, 7, 16}, 1960 {&pentium4_cost, 0, 0, 0, 0, 0}, 1961 {&k8_cost, 16, 7, 16, 7, 16}, 1962 {&nocona_cost, 0, 0, 0, 0, 0}, 1963 {&core2_cost, 16, 10, 16, 10, 16}, 1964 {&generic32_cost, 16, 7, 16, 7, 16}, 1965 {&generic64_cost, 16, 10, 16, 10, 16}, 1966 {&amdfam10_cost, 32, 24, 32, 7, 32} 1967 }; 1968 1969 static const char *const cpu_names[TARGET_CPU_DEFAULT_max] = 1970 { 1971 "generic", 1972 "i386", 1973 "i486", 1974 "pentium", 1975 "pentium-mmx", 1976 "pentiumpro", 1977 "pentium2", 1978 "pentium3", 1979 "pentium4", 1980 "pentium-m", 1981 "prescott", 1982 "nocona", 1983 "core2", 1984 "geode", 1985 "k6", 1986 "k6-2", 1987 "k6-3", 1988 "athlon", 1989 "athlon-4", 1990 "k8", 1991 "amdfam10" 1992 }; 1993 1994 /* Implement TARGET_HANDLE_OPTION. */ 1995 1996 static bool 1997 ix86_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED, int value) 1998 { 1999 switch (code) 2000 { 2001 case OPT_mmmx: 2002 if (value) 2003 { 2004 ix86_isa_flags |= OPTION_MASK_ISA_MMX_SET; 2005 ix86_isa_flags_explicit |= OPTION_MASK_ISA_MMX_SET; 2006 } 2007 else 2008 { 2009 ix86_isa_flags &= ~OPTION_MASK_ISA_MMX_UNSET; 2010 ix86_isa_flags_explicit |= OPTION_MASK_ISA_MMX_UNSET; 2011 } 2012 return true; 2013 2014 case OPT_m3dnow: 2015 if (value) 2016 { 2017 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_SET; 2018 ix86_isa_flags_explicit |= OPTION_MASK_ISA_3DNOW_SET; 2019 } 2020 else 2021 { 2022 ix86_isa_flags &= ~OPTION_MASK_ISA_3DNOW_UNSET; 2023 ix86_isa_flags_explicit |= OPTION_MASK_ISA_3DNOW_UNSET; 2024 } 2025 return true; 2026 2027 case OPT_m3dnowa: 2028 return false; 2029 2030 case OPT_msse: 2031 if (value) 2032 { 2033 ix86_isa_flags |= OPTION_MASK_ISA_SSE_SET; 2034 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE_SET; 2035 } 2036 else 2037 { 2038 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE_UNSET; 2039 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE_UNSET; 2040 } 2041 return true; 2042 2043 case OPT_msse2: 2044 if (value) 2045 { 2046 ix86_isa_flags |= OPTION_MASK_ISA_SSE2_SET; 2047 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE2_SET; 2048 } 2049 else 2050 { 2051 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE2_UNSET; 2052 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE2_UNSET; 2053 } 2054 return true; 2055 2056 case OPT_msse3: 2057 if (value) 2058 { 2059 ix86_isa_flags |= OPTION_MASK_ISA_SSE3_SET; 2060 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE3_SET; 2061 } 2062 else 2063 { 2064 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE3_UNSET; 2065 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE3_UNSET; 2066 } 2067 return true; 2068 2069 case OPT_mssse3: 2070 if (value) 2071 { 2072 ix86_isa_flags |= OPTION_MASK_ISA_SSSE3_SET; 2073 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSSE3_SET; 2074 } 2075 else 2076 { 2077 ix86_isa_flags &= ~OPTION_MASK_ISA_SSSE3_UNSET; 2078 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSSE3_UNSET; 2079 } 2080 return true; 2081 2082 case OPT_msse4_1: 2083 if (value) 2084 { 2085 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1_SET; 2086 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_1_SET; 2087 } 2088 else 2089 { 2090 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_1_UNSET; 2091 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_1_UNSET; 2092 } 2093 return true; 2094 2095 case OPT_msse4_2: 2096 if (value) 2097 { 2098 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2_SET; 2099 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_2_SET; 2100 } 2101 else 2102 { 2103 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_2_UNSET; 2104 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_2_UNSET; 2105 } 2106 return true; 2107 2108 case OPT_mavx: 2109 if (value) 2110 { 2111 ix86_isa_flags |= OPTION_MASK_ISA_AVX_SET; 2112 ix86_isa_flags_explicit |= OPTION_MASK_ISA_AVX_SET; 2113 } 2114 else 2115 { 2116 ix86_isa_flags &= ~OPTION_MASK_ISA_AVX_UNSET; 2117 ix86_isa_flags_explicit |= OPTION_MASK_ISA_AVX_UNSET; 2118 } 2119 return true; 2120 2121 case OPT_mfma: 2122 if (value) 2123 { 2124 ix86_isa_flags |= OPTION_MASK_ISA_FMA_SET; 2125 ix86_isa_flags_explicit |= OPTION_MASK_ISA_FMA_SET; 2126 } 2127 else 2128 { 2129 ix86_isa_flags &= ~OPTION_MASK_ISA_FMA_UNSET; 2130 ix86_isa_flags_explicit |= OPTION_MASK_ISA_FMA_UNSET; 2131 } 2132 return true; 2133 2134 case OPT_msse4: 2135 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_SET; 2136 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_SET; 2137 return true; 2138 2139 case OPT_mno_sse4: 2140 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_UNSET; 2141 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_UNSET; 2142 return true; 2143 2144 case OPT_msse4a: 2145 if (value) 2146 { 2147 ix86_isa_flags |= OPTION_MASK_ISA_SSE4A_SET; 2148 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4A_SET; 2149 } 2150 else 2151 { 2152 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4A_UNSET; 2153 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4A_UNSET; 2154 } 2155 return true; 2156 2157 case OPT_msse5: 2158 if (value) 2159 { 2160 ix86_isa_flags |= OPTION_MASK_ISA_SSE5_SET; 2161 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE5_SET; 2162 } 2163 else 2164 { 2165 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE5_UNSET; 2166 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE5_UNSET; 2167 } 2168 return true; 2169 2170 case OPT_mabm: 2171 if (value) 2172 { 2173 ix86_isa_flags |= OPTION_MASK_ISA_ABM_SET; 2174 ix86_isa_flags_explicit |= OPTION_MASK_ISA_ABM_SET; 2175 } 2176 else 2177 { 2178 ix86_isa_flags &= ~OPTION_MASK_ISA_ABM_UNSET; 2179 ix86_isa_flags_explicit |= OPTION_MASK_ISA_ABM_UNSET; 2180 } 2181 return true; 2182 2183 case OPT_mpopcnt: 2184 if (value) 2185 { 2186 ix86_isa_flags |= OPTION_MASK_ISA_POPCNT_SET; 2187 ix86_isa_flags_explicit |= OPTION_MASK_ISA_POPCNT_SET; 2188 } 2189 else 2190 { 2191 ix86_isa_flags &= ~OPTION_MASK_ISA_POPCNT_UNSET; 2192 ix86_isa_flags_explicit |= OPTION_MASK_ISA_POPCNT_UNSET; 2193 } 2194 return true; 2195 2196 case OPT_msahf: 2197 if (value) 2198 { 2199 ix86_isa_flags |= OPTION_MASK_ISA_SAHF_SET; 2200 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SAHF_SET; 2201 } 2202 else 2203 { 2204 ix86_isa_flags &= ~OPTION_MASK_ISA_SAHF_UNSET; 2205 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SAHF_UNSET; 2206 } 2207 return true; 2208 2209 case OPT_mcx16: 2210 if (value) 2211 { 2212 ix86_isa_flags |= OPTION_MASK_ISA_CX16_SET; 2213 ix86_isa_flags_explicit |= OPTION_MASK_ISA_CX16_SET; 2214 } 2215 else 2216 { 2217 ix86_isa_flags &= ~OPTION_MASK_ISA_CX16_UNSET; 2218 ix86_isa_flags_explicit |= OPTION_MASK_ISA_CX16_UNSET; 2219 } 2220 return true; 2221 2222 case OPT_maes: 2223 if (value) 2224 { 2225 ix86_isa_flags |= OPTION_MASK_ISA_AES_SET; 2226 ix86_isa_flags_explicit |= OPTION_MASK_ISA_AES_SET; 2227 } 2228 else 2229 { 2230 ix86_isa_flags &= ~OPTION_MASK_ISA_AES_UNSET; 2231 ix86_isa_flags_explicit |= OPTION_MASK_ISA_AES_UNSET; 2232 } 2233 return true; 2234 2235 case OPT_mpclmul: 2236 if (value) 2237 { 2238 ix86_isa_flags |= OPTION_MASK_ISA_PCLMUL_SET; 2239 ix86_isa_flags_explicit |= OPTION_MASK_ISA_PCLMUL_SET; 2240 } 2241 else 2242 { 2243 ix86_isa_flags &= ~OPTION_MASK_ISA_PCLMUL_UNSET; 2244 ix86_isa_flags_explicit |= OPTION_MASK_ISA_PCLMUL_UNSET; 2245 } 2246 return true; 2247 2248 default: 2249 return true; 2250 } 2251 } 2252 2253 /* Return a string the documents the current -m options. The caller is 2254 responsible for freeing the string. */ 2255 2256 static char * 2257 ix86_target_string (int isa, int flags, const char *arch, const char *tune, 2258 const char *fpmath, bool add_nl_p) 2259 { 2260 struct ix86_target_opts 2261 { 2262 const char *option; /* option string */ 2263 int mask; /* isa mask options */ 2264 }; 2265 2266 /* This table is ordered so that options like -msse5 or -msse4.2 that imply 2267 preceding options while match those first. */ 2268 static struct ix86_target_opts isa_opts[] = 2269 { 2270 { "-m64", OPTION_MASK_ISA_64BIT }, 2271 { "-msse5", OPTION_MASK_ISA_SSE5 }, 2272 { "-msse4a", OPTION_MASK_ISA_SSE4A }, 2273 { "-msse4.2", OPTION_MASK_ISA_SSE4_2 }, 2274 { "-msse4.1", OPTION_MASK_ISA_SSE4_1 }, 2275 { "-mssse3", OPTION_MASK_ISA_SSSE3 }, 2276 { "-msse3", OPTION_MASK_ISA_SSE3 }, 2277 { "-msse2", OPTION_MASK_ISA_SSE2 }, 2278 { "-msse", OPTION_MASK_ISA_SSE }, 2279 { "-m3dnow", OPTION_MASK_ISA_3DNOW }, 2280 { "-m3dnowa", OPTION_MASK_ISA_3DNOW_A }, 2281 { "-mmmx", OPTION_MASK_ISA_MMX }, 2282 { "-mabm", OPTION_MASK_ISA_ABM }, 2283 { "-mpopcnt", OPTION_MASK_ISA_POPCNT }, 2284 { "-maes", OPTION_MASK_ISA_AES }, 2285 { "-mpclmul", OPTION_MASK_ISA_PCLMUL }, 2286 }; 2287 2288 /* Flag options. */ 2289 static struct ix86_target_opts flag_opts[] = 2290 { 2291 { "-m128bit-long-double", MASK_128BIT_LONG_DOUBLE }, 2292 { "-m80387", MASK_80387 }, 2293 { "-maccumulate-outgoing-args", MASK_ACCUMULATE_OUTGOING_ARGS }, 2294 { "-malign-double", MASK_ALIGN_DOUBLE }, 2295 { "-mcld", MASK_CLD }, 2296 { "-mfp-ret-in-387", MASK_FLOAT_RETURNS }, 2297 { "-mieee-fp", MASK_IEEE_FP }, 2298 { "-minline-all-stringops", MASK_INLINE_ALL_STRINGOPS }, 2299 { "-minline-stringops-dynamically", MASK_INLINE_STRINGOPS_DYNAMICALLY }, 2300 { "-mms-bitfields", MASK_MS_BITFIELD_LAYOUT }, 2301 { "-mno-align-stringops", MASK_NO_ALIGN_STRINGOPS }, 2302 { "-mno-fancy-math-387", MASK_NO_FANCY_MATH_387 }, 2303 { "-mno-fused-madd", MASK_NO_FUSED_MADD }, 2304 { "-mno-push-args", MASK_NO_PUSH_ARGS }, 2305 { "-mno-red-zone", MASK_NO_RED_ZONE }, 2306 { "-momit-leaf-frame-pointer", MASK_OMIT_LEAF_FRAME_POINTER }, 2307 { "-mrecip", MASK_RECIP }, 2308 { "-mrtd", MASK_RTD }, 2309 { "-msseregparm", MASK_SSEREGPARM }, 2310 { "-mstack-arg-probe", MASK_STACK_PROBE }, 2311 { "-mtls-direct-seg-refs", MASK_TLS_DIRECT_SEG_REFS }, 2312 }; 2313 2314 const char *opts[ARRAY_SIZE (isa_opts) + ARRAY_SIZE (flag_opts) + 6][2]; 2315 2316 char isa_other[40]; 2317 char target_other[40]; 2318 unsigned num = 0; 2319 unsigned i, j; 2320 char *ret; 2321 char *ptr; 2322 size_t len; 2323 size_t line_len; 2324 size_t sep_len; 2325 2326 memset (opts, '\0', sizeof (opts)); 2327 2328 /* Add -march= option. */ 2329 if (arch) 2330 { 2331 opts[num][0] = "-march="; 2332 opts[num++][1] = arch; 2333 } 2334 2335 /* Add -mtune= option. */ 2336 if (tune) 2337 { 2338 opts[num][0] = "-mtune="; 2339 opts[num++][1] = tune; 2340 } 2341 2342 /* Pick out the options in isa options. */ 2343 for (i = 0; i < ARRAY_SIZE (isa_opts); i++) 2344 { 2345 if ((isa & isa_opts[i].mask) != 0) 2346 { 2347 opts[num++][0] = isa_opts[i].option; 2348 isa &= ~ isa_opts[i].mask; 2349 } 2350 } 2351 2352 if (isa && add_nl_p) 2353 { 2354 opts[num++][0] = isa_other; 2355 sprintf (isa_other, "(other isa: 0x%x)", isa); 2356 } 2357 2358 /* Add flag options. */ 2359 for (i = 0; i < ARRAY_SIZE (flag_opts); i++) 2360 { 2361 if ((flags & flag_opts[i].mask) != 0) 2362 { 2363 opts[num++][0] = flag_opts[i].option; 2364 flags &= ~ flag_opts[i].mask; 2365 } 2366 } 2367 2368 if (flags && add_nl_p) 2369 { 2370 opts[num++][0] = target_other; 2371 sprintf (target_other, "(other flags: 0x%x)", isa); 2372 } 2373 2374 /* Add -fpmath= option. */ 2375 if (fpmath) 2376 { 2377 opts[num][0] = "-mfpmath="; 2378 opts[num++][1] = fpmath; 2379 } 2380 2381 /* Any options? */ 2382 if (num == 0) 2383 return NULL; 2384 2385 gcc_assert (num < ARRAY_SIZE (opts)); 2386 2387 /* Size the string. */ 2388 len = 0; 2389 sep_len = (add_nl_p) ? 3 : 1; 2390 for (i = 0; i < num; i++) 2391 { 2392 len += sep_len; 2393 for (j = 0; j < 2; j++) 2394 if (opts[i][j]) 2395 len += strlen (opts[i][j]); 2396 } 2397 2398 /* Build the string. */ 2399 ret = ptr = (char *) xmalloc (len); 2400 line_len = 0; 2401 2402 for (i = 0; i < num; i++) 2403 { 2404 size_t len2[2]; 2405 2406 for (j = 0; j < 2; j++) 2407 len2[j] = (opts[i][j]) ? strlen (opts[i][j]) : 0; 2408 2409 if (i != 0) 2410 { 2411 *ptr++ = ' '; 2412 line_len++; 2413 2414 if (add_nl_p && line_len + len2[0] + len2[1] > 70) 2415 { 2416 *ptr++ = '\\'; 2417 *ptr++ = '\n'; 2418 line_len = 0; 2419 } 2420 } 2421 2422 for (j = 0; j < 2; j++) 2423 if (opts[i][j]) 2424 { 2425 memcpy (ptr, opts[i][j], len2[j]); 2426 ptr += len2[j]; 2427 line_len += len2[j]; 2428 } 2429 } 2430 2431 *ptr = '\0'; 2432 gcc_assert (ret + len >= ptr); 2433 2434 return ret; 2435 } 2436 2437 /* Function that is callable from the debugger to print the current 2438 options. */ 2439 void 2440 ix86_debug_options (void) 2441 { 2442 char *opts = ix86_target_string (ix86_isa_flags, target_flags, 2443 ix86_arch_string, ix86_tune_string, 2444 ix86_fpmath_string, true); 2445 2446 if (opts) 2447 { 2448 fprintf (stderr, "%s\n\n", opts); 2449 free (opts); 2450 } 2451 else 2452 fprintf (stderr, "<no options>\n\n"); 2453 2454 return; 2455 } 2456 2457 /* Sometimes certain combinations of command options do not make 2458 sense on a particular target machine. You can define a macro 2459 `OVERRIDE_OPTIONS' to take account of this. This macro, if 2460 defined, is executed once just after all the command options have 2461 been parsed. 2462 2463 Don't use this macro to turn on various extra optimizations for 2464 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */ 2465 2466 void 2467 override_options (bool main_args_p) 2468 { 2469 int i; 2470 unsigned int ix86_arch_mask, ix86_tune_mask; 2471 const char *prefix; 2472 const char *suffix; 2473 const char *sw; 2474 2475 /* Comes from final.c -- no real reason to change it. */ 2476 #define MAX_CODE_ALIGN 16 2477 2478 enum pta_flags 2479 { 2480 PTA_SSE = 1 << 0, 2481 PTA_SSE2 = 1 << 1, 2482 PTA_SSE3 = 1 << 2, 2483 PTA_MMX = 1 << 3, 2484 PTA_PREFETCH_SSE = 1 << 4, 2485 PTA_3DNOW = 1 << 5, 2486 PTA_3DNOW_A = 1 << 6, 2487 PTA_64BIT = 1 << 7, 2488 PTA_SSSE3 = 1 << 8, 2489 PTA_CX16 = 1 << 9, 2490 PTA_POPCNT = 1 << 10, 2491 PTA_ABM = 1 << 11, 2492 PTA_SSE4A = 1 << 12, 2493 PTA_NO_SAHF = 1 << 13, 2494 PTA_SSE4_1 = 1 << 14, 2495 PTA_SSE4_2 = 1 << 15, 2496 PTA_SSE5 = 1 << 16, 2497 PTA_AES = 1 << 17, 2498 PTA_PCLMUL = 1 << 18, 2499 PTA_AVX = 1 << 19, 2500 PTA_FMA = 1 << 20 2501 }; 2502 2503 static struct pta 2504 { 2505 const char *const name; /* processor name or nickname. */ 2506 const enum processor_type processor; 2507 const enum attr_cpu schedule; 2508 const unsigned /*enum pta_flags*/ flags; 2509 } 2510 const processor_alias_table[] = 2511 { 2512 {"i386", PROCESSOR_I386, CPU_NONE, 0}, 2513 {"i486", PROCESSOR_I486, CPU_NONE, 0}, 2514 {"i586", PROCESSOR_PENTIUM, CPU_PENTIUM, 0}, 2515 {"pentium", PROCESSOR_PENTIUM, CPU_PENTIUM, 0}, 2516 {"pentium-mmx", PROCESSOR_PENTIUM, CPU_PENTIUM, PTA_MMX}, 2517 {"winchip-c6", PROCESSOR_I486, CPU_NONE, PTA_MMX}, 2518 {"winchip2", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW}, 2519 {"c3", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW}, 2520 {"c3-2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, PTA_MMX | PTA_SSE}, 2521 {"i686", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0}, 2522 {"pentiumpro", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0}, 2523 {"pentium2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, PTA_MMX}, 2524 {"pentium3", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 2525 PTA_MMX | PTA_SSE}, 2526 {"pentium3m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 2527 PTA_MMX | PTA_SSE}, 2528 {"pentium-m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 2529 PTA_MMX | PTA_SSE | PTA_SSE2}, 2530 {"pentium4", PROCESSOR_PENTIUM4, CPU_NONE, 2531 PTA_MMX |PTA_SSE | PTA_SSE2}, 2532 {"pentium4m", PROCESSOR_PENTIUM4, CPU_NONE, 2533 PTA_MMX | PTA_SSE | PTA_SSE2}, 2534 {"prescott", PROCESSOR_NOCONA, CPU_NONE, 2535 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3}, 2536 {"nocona", PROCESSOR_NOCONA, CPU_NONE, 2537 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 2538 | PTA_CX16 | PTA_NO_SAHF}, 2539 {"core2", PROCESSOR_CORE2, CPU_CORE2, 2540 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 2541 | PTA_SSSE3 | PTA_CX16}, 2542 {"geode", PROCESSOR_GEODE, CPU_GEODE, 2543 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A |PTA_PREFETCH_SSE}, 2544 {"k6", PROCESSOR_K6, CPU_K6, PTA_MMX}, 2545 {"k6-2", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW}, 2546 {"k6-3", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW}, 2547 {"athlon", PROCESSOR_ATHLON, CPU_ATHLON, 2548 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE}, 2549 {"athlon-tbird", PROCESSOR_ATHLON, CPU_ATHLON, 2550 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE}, 2551 {"athlon-4", PROCESSOR_ATHLON, CPU_ATHLON, 2552 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE}, 2553 {"athlon-xp", PROCESSOR_ATHLON, CPU_ATHLON, 2554 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE}, 2555 {"athlon-mp", PROCESSOR_ATHLON, CPU_ATHLON, 2556 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE}, 2557 {"x86-64", PROCESSOR_K8, CPU_K8, 2558 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_NO_SAHF}, 2559 {"k8", PROCESSOR_K8, CPU_K8, 2560 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE 2561 | PTA_SSE2 | PTA_NO_SAHF}, 2562 {"k8-sse3", PROCESSOR_K8, CPU_K8, 2563 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE 2564 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF}, 2565 {"opteron", PROCESSOR_K8, CPU_K8, 2566 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE 2567 | PTA_SSE2 | PTA_NO_SAHF}, 2568 {"opteron-sse3", PROCESSOR_K8, CPU_K8, 2569 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE 2570 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF}, 2571 {"athlon64", PROCESSOR_K8, CPU_K8, 2572 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE 2573 | PTA_SSE2 | PTA_NO_SAHF}, 2574 {"athlon64-sse3", PROCESSOR_K8, CPU_K8, 2575 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE 2576 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF}, 2577 {"athlon-fx", PROCESSOR_K8, CPU_K8, 2578 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE 2579 | PTA_SSE2 | PTA_NO_SAHF}, 2580 {"amdfam10", PROCESSOR_AMDFAM10, CPU_AMDFAM10, 2581 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE 2582 | PTA_SSE2 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM}, 2583 {"barcelona", PROCESSOR_AMDFAM10, CPU_AMDFAM10, 2584 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE 2585 | PTA_SSE2 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM}, 2586 {"generic32", PROCESSOR_GENERIC32, CPU_PENTIUMPRO, 2587 0 /* flags are only used for -march switch. */ }, 2588 {"generic64", PROCESSOR_GENERIC64, CPU_GENERIC64, 2589 PTA_64BIT /* flags are only used for -march switch. */ }, 2590 }; 2591 2592 int const pta_size = ARRAY_SIZE (processor_alias_table); 2593 2594 /* Set up prefix/suffix so the error messages refer to either the command 2595 line argument, or the attribute(target). */ 2596 if (main_args_p) 2597 { 2598 prefix = "-m"; 2599 suffix = ""; 2600 sw = "switch"; 2601 } 2602 else 2603 { 2604 prefix = "option(\""; 2605 suffix = "\")"; 2606 sw = "attribute"; 2607 } 2608 2609 #ifdef SUBTARGET_OVERRIDE_OPTIONS 2610 SUBTARGET_OVERRIDE_OPTIONS; 2611 #endif 2612 2613 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS 2614 SUBSUBTARGET_OVERRIDE_OPTIONS; 2615 #endif 2616 2617 /* -fPIC is the default for x86_64. */ 2618 if (TARGET_MACHO && TARGET_64BIT) 2619 flag_pic = 2; 2620 2621 /* Set the default values for switches whose default depends on TARGET_64BIT 2622 in case they weren't overwritten by command line options. */ 2623 if (TARGET_64BIT) 2624 { 2625 /* Mach-O doesn't support omitting the frame pointer for now. */ 2626 if (flag_omit_frame_pointer == 2) 2627 flag_omit_frame_pointer = (TARGET_MACHO ? 0 : 1); 2628 if (flag_asynchronous_unwind_tables == 2) 2629 flag_asynchronous_unwind_tables = 1; 2630 if (flag_pcc_struct_return == 2) 2631 flag_pcc_struct_return = 0; 2632 } 2633 else 2634 { 2635 if (flag_omit_frame_pointer == 2) 2636 flag_omit_frame_pointer = 0; 2637 if (flag_asynchronous_unwind_tables == 2) 2638 flag_asynchronous_unwind_tables = 0; 2639 if (flag_pcc_struct_return == 2) 2640 flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN; 2641 } 2642 2643 /* Need to check -mtune=generic first. */ 2644 if (ix86_tune_string) 2645 { 2646 if (!strcmp (ix86_tune_string, "generic") 2647 || !strcmp (ix86_tune_string, "i686") 2648 /* As special support for cross compilers we read -mtune=native 2649 as -mtune=generic. With native compilers we won't see the 2650 -mtune=native, as it was changed by the driver. */ 2651 || !strcmp (ix86_tune_string, "native")) 2652 { 2653 if (TARGET_64BIT) 2654 ix86_tune_string = "generic64"; 2655 else 2656 ix86_tune_string = "generic32"; 2657 } 2658 /* If this call is for setting the option attribute, allow the 2659 generic32/generic64 that was previously set. */ 2660 else if (!main_args_p 2661 && (!strcmp (ix86_tune_string, "generic32") 2662 || !strcmp (ix86_tune_string, "generic64"))) 2663 ; 2664 else if (!strncmp (ix86_tune_string, "generic", 7)) 2665 error ("bad value (%s) for %stune=%s %s", 2666 ix86_tune_string, prefix, suffix, sw); 2667 } 2668 else 2669 { 2670 if (ix86_arch_string) 2671 ix86_tune_string = ix86_arch_string; 2672 if (!ix86_tune_string) 2673 { 2674 ix86_tune_string = cpu_names[TARGET_CPU_DEFAULT]; 2675 ix86_tune_defaulted = 1; 2676 } 2677 2678 /* ix86_tune_string is set to ix86_arch_string or defaulted. We 2679 need to use a sensible tune option. */ 2680 if (!strcmp (ix86_tune_string, "generic") 2681 || !strcmp (ix86_tune_string, "x86-64") 2682 || !strcmp (ix86_tune_string, "i686")) 2683 { 2684 if (TARGET_64BIT) 2685 ix86_tune_string = "generic64"; 2686 else 2687 ix86_tune_string = "generic32"; 2688 } 2689 } 2690 if (ix86_stringop_string) 2691 { 2692 if (!strcmp (ix86_stringop_string, "rep_byte")) 2693 stringop_alg = rep_prefix_1_byte; 2694 else if (!strcmp (ix86_stringop_string, "libcall")) 2695 stringop_alg = libcall; 2696 else if (!strcmp (ix86_stringop_string, "rep_4byte")) 2697 stringop_alg = rep_prefix_4_byte; 2698 else if (!strcmp (ix86_stringop_string, "rep_8byte") 2699 && TARGET_64BIT) 2700 /* rep; movq isn't available in 32-bit code. */ 2701 stringop_alg = rep_prefix_8_byte; 2702 else if (!strcmp (ix86_stringop_string, "byte_loop")) 2703 stringop_alg = loop_1_byte; 2704 else if (!strcmp (ix86_stringop_string, "loop")) 2705 stringop_alg = loop; 2706 else if (!strcmp (ix86_stringop_string, "unrolled_loop")) 2707 stringop_alg = unrolled_loop; 2708 else 2709 error ("bad value (%s) for %sstringop-strategy=%s %s", 2710 ix86_stringop_string, prefix, suffix, sw); 2711 } 2712 if (!strcmp (ix86_tune_string, "x86-64")) 2713 warning (OPT_Wdeprecated, "%stune=x86-64%s is deprecated. Use " 2714 "%stune=k8%s or %stune=generic%s instead as appropriate.", 2715 prefix, suffix, prefix, suffix, prefix, suffix); 2716 2717 if (!ix86_arch_string) 2718 ix86_arch_string = TARGET_64BIT ? "x86-64" : "i386"; 2719 else 2720 ix86_arch_specified = 1; 2721 2722 if (!strcmp (ix86_arch_string, "generic")) 2723 error ("generic CPU can be used only for %stune=%s %s", 2724 prefix, suffix, sw); 2725 if (!strncmp (ix86_arch_string, "generic", 7)) 2726 error ("bad value (%s) for %sarch=%s %s", 2727 ix86_arch_string, prefix, suffix, sw); 2728 2729 if (ix86_cmodel_string != 0) 2730 { 2731 if (!strcmp (ix86_cmodel_string, "small")) 2732 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL; 2733 else if (!strcmp (ix86_cmodel_string, "medium")) 2734 ix86_cmodel = flag_pic ? CM_MEDIUM_PIC : CM_MEDIUM; 2735 else if (!strcmp (ix86_cmodel_string, "large")) 2736 ix86_cmodel = flag_pic ? CM_LARGE_PIC : CM_LARGE; 2737 else if (flag_pic) 2738 error ("code model %s does not support PIC mode", ix86_cmodel_string); 2739 else if (!strcmp (ix86_cmodel_string, "32")) 2740 ix86_cmodel = CM_32; 2741 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic) 2742 ix86_cmodel = CM_KERNEL; 2743 else 2744 error ("bad value (%s) for %scmodel=%s %s", 2745 ix86_cmodel_string, prefix, suffix, sw); 2746 } 2747 else 2748 { 2749 /* For TARGET_64BIT and MS_ABI, force pic on, in order to enable the 2750 use of rip-relative addressing. This eliminates fixups that 2751 would otherwise be needed if this object is to be placed in a 2752 DLL, and is essentially just as efficient as direct addressing. */ 2753 if (TARGET_64BIT && DEFAULT_ABI == MS_ABI) 2754 ix86_cmodel = CM_SMALL_PIC, flag_pic = 1; 2755 else if (TARGET_64BIT) 2756 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL; 2757 else 2758 ix86_cmodel = CM_32; 2759 } 2760 if (ix86_asm_string != 0) 2761 { 2762 if (! TARGET_MACHO 2763 && !strcmp (ix86_asm_string, "intel")) 2764 ix86_asm_dialect = ASM_INTEL; 2765 else if (!strcmp (ix86_asm_string, "att")) 2766 ix86_asm_dialect = ASM_ATT; 2767 else 2768 error ("bad value (%s) for %sasm=%s %s", 2769 ix86_asm_string, prefix, suffix, sw); 2770 } 2771 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32)) 2772 error ("code model %qs not supported in the %s bit mode", 2773 ix86_cmodel_string, TARGET_64BIT ? "64" : "32"); 2774 if ((TARGET_64BIT != 0) != ((ix86_isa_flags & OPTION_MASK_ISA_64BIT) != 0)) 2775 sorry ("%i-bit mode not compiled in", 2776 (ix86_isa_flags & OPTION_MASK_ISA_64BIT) ? 64 : 32); 2777 2778 for (i = 0; i < pta_size; i++) 2779 if (! strcmp (ix86_arch_string, processor_alias_table[i].name)) 2780 { 2781 ix86_schedule = processor_alias_table[i].schedule; 2782 ix86_arch = processor_alias_table[i].processor; 2783 /* Default cpu tuning to the architecture. */ 2784 ix86_tune = ix86_arch; 2785 2786 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT)) 2787 error ("CPU you selected does not support x86-64 " 2788 "instruction set"); 2789 2790 if (processor_alias_table[i].flags & PTA_MMX 2791 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_MMX)) 2792 ix86_isa_flags |= OPTION_MASK_ISA_MMX; 2793 if (processor_alias_table[i].flags & PTA_3DNOW 2794 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW)) 2795 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW; 2796 if (processor_alias_table[i].flags & PTA_3DNOW_A 2797 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW_A)) 2798 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_A; 2799 if (processor_alias_table[i].flags & PTA_SSE 2800 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE)) 2801 ix86_isa_flags |= OPTION_MASK_ISA_SSE; 2802 if (processor_alias_table[i].flags & PTA_SSE2 2803 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE2)) 2804 ix86_isa_flags |= OPTION_MASK_ISA_SSE2; 2805 if (processor_alias_table[i].flags & PTA_SSE3 2806 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE3)) 2807 ix86_isa_flags |= OPTION_MASK_ISA_SSE3; 2808 if (processor_alias_table[i].flags & PTA_SSSE3 2809 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSSE3)) 2810 ix86_isa_flags |= OPTION_MASK_ISA_SSSE3; 2811 if (processor_alias_table[i].flags & PTA_SSE4_1 2812 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_1)) 2813 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1; 2814 if (processor_alias_table[i].flags & PTA_SSE4_2 2815 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_2)) 2816 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2; 2817 if (processor_alias_table[i].flags & PTA_AVX 2818 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX)) 2819 ix86_isa_flags |= OPTION_MASK_ISA_AVX; 2820 if (processor_alias_table[i].flags & PTA_FMA 2821 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA)) 2822 ix86_isa_flags |= OPTION_MASK_ISA_FMA; 2823 if (processor_alias_table[i].flags & PTA_SSE4A 2824 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4A)) 2825 ix86_isa_flags |= OPTION_MASK_ISA_SSE4A; 2826 if (processor_alias_table[i].flags & PTA_SSE5 2827 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE5)) 2828 ix86_isa_flags |= OPTION_MASK_ISA_SSE5; 2829 if (processor_alias_table[i].flags & PTA_ABM 2830 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_ABM)) 2831 ix86_isa_flags |= OPTION_MASK_ISA_ABM; 2832 if (processor_alias_table[i].flags & PTA_CX16 2833 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_CX16)) 2834 ix86_isa_flags |= OPTION_MASK_ISA_CX16; 2835 if (processor_alias_table[i].flags & (PTA_POPCNT | PTA_ABM) 2836 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_POPCNT)) 2837 ix86_isa_flags |= OPTION_MASK_ISA_POPCNT; 2838 if (!(TARGET_64BIT && (processor_alias_table[i].flags & PTA_NO_SAHF)) 2839 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SAHF)) 2840 ix86_isa_flags |= OPTION_MASK_ISA_SAHF; 2841 if (processor_alias_table[i].flags & PTA_AES 2842 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AES)) 2843 ix86_isa_flags |= OPTION_MASK_ISA_AES; 2844 if (processor_alias_table[i].flags & PTA_PCLMUL 2845 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_PCLMUL)) 2846 ix86_isa_flags |= OPTION_MASK_ISA_PCLMUL; 2847 if (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE)) 2848 x86_prefetch_sse = true; 2849 2850 break; 2851 } 2852 2853 if (i == pta_size) 2854 error ("bad value (%s) for %sarch=%s %s", 2855 ix86_arch_string, prefix, suffix, sw); 2856 2857 ix86_arch_mask = 1u << ix86_arch; 2858 for (i = 0; i < X86_ARCH_LAST; ++i) 2859 ix86_arch_features[i] = !!(initial_ix86_arch_features[i] & ix86_arch_mask); 2860 2861 for (i = 0; i < pta_size; i++) 2862 if (! strcmp (ix86_tune_string, processor_alias_table[i].name)) 2863 { 2864 ix86_schedule = processor_alias_table[i].schedule; 2865 ix86_tune = processor_alias_table[i].processor; 2866 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT)) 2867 { 2868 if (ix86_tune_defaulted) 2869 { 2870 ix86_tune_string = "x86-64"; 2871 for (i = 0; i < pta_size; i++) 2872 if (! strcmp (ix86_tune_string, 2873 processor_alias_table[i].name)) 2874 break; 2875 ix86_schedule = processor_alias_table[i].schedule; 2876 ix86_tune = processor_alias_table[i].processor; 2877 } 2878 else 2879 error ("CPU you selected does not support x86-64 " 2880 "instruction set"); 2881 } 2882 2883 /* Intel CPUs have always interpreted SSE prefetch instructions as 2884 NOPs; so, we can enable SSE prefetch instructions even when 2885 -mtune (rather than -march) points us to a processor that has them. 2886 However, the VIA C3 gives a SIGILL, so we only do that for i686 and 2887 higher processors. */ 2888 if (TARGET_CMOVE 2889 && (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE))) 2890 x86_prefetch_sse = true; 2891 break; 2892 } 2893 if (i == pta_size) 2894 error ("bad value (%s) for %stune=%s %s", 2895 ix86_tune_string, prefix, suffix, sw); 2896 2897 ix86_tune_mask = 1u << ix86_tune; 2898 for (i = 0; i < X86_TUNE_LAST; ++i) 2899 ix86_tune_features[i] = !!(initial_ix86_tune_features[i] & ix86_tune_mask); 2900 2901 if (optimize_size) 2902 ix86_cost = &ix86_size_cost; 2903 else 2904 ix86_cost = processor_target_table[ix86_tune].cost; 2905 2906 /* Arrange to set up i386_stack_locals for all functions. */ 2907 init_machine_status = ix86_init_machine_status; 2908 2909 /* Validate -mregparm= value. */ 2910 if (ix86_regparm_string) 2911 { 2912 if (TARGET_64BIT) 2913 warning (0, "%sregparm%s is ignored in 64-bit mode", prefix, suffix); 2914 i = atoi (ix86_regparm_string); 2915 if (i < 0 || i > REGPARM_MAX) 2916 error ("%sregparm=%d%s is not between 0 and %d", 2917 prefix, i, suffix, REGPARM_MAX); 2918 else 2919 ix86_regparm = i; 2920 } 2921 if (TARGET_64BIT) 2922 ix86_regparm = REGPARM_MAX; 2923 2924 /* If the user has provided any of the -malign-* options, 2925 warn and use that value only if -falign-* is not set. 2926 Remove this code in GCC 3.2 or later. */ 2927 if (ix86_align_loops_string) 2928 { 2929 warning (0, "%salign-loops%s is obsolete, use -falign-loops%s", 2930 prefix, suffix, suffix); 2931 if (align_loops == 0) 2932 { 2933 i = atoi (ix86_align_loops_string); 2934 if (i < 0 || i > MAX_CODE_ALIGN) 2935 error ("%salign-loops=%d%s is not between 0 and %d", 2936 prefix, i, suffix, MAX_CODE_ALIGN); 2937 else 2938 align_loops = 1 << i; 2939 } 2940 } 2941 2942 if (ix86_align_jumps_string) 2943 { 2944 warning (0, "%salign-jumps%s is obsolete, use -falign-jumps%s", 2945 prefix, suffix, suffix); 2946 if (align_jumps == 0) 2947 { 2948 i = atoi (ix86_align_jumps_string); 2949 if (i < 0 || i > MAX_CODE_ALIGN) 2950 error ("%salign-loops=%d%s is not between 0 and %d", 2951 prefix, i, suffix, MAX_CODE_ALIGN); 2952 else 2953 align_jumps = 1 << i; 2954 } 2955 } 2956 2957 if (ix86_align_funcs_string) 2958 { 2959 warning (0, "%salign-functions%s is obsolete, use -falign-functions%s", 2960 prefix, suffix, suffix); 2961 if (align_functions == 0) 2962 { 2963 i = atoi (ix86_align_funcs_string); 2964 if (i < 0 || i > MAX_CODE_ALIGN) 2965 error ("%salign-loops=%d%s is not between 0 and %d", 2966 prefix, i, suffix, MAX_CODE_ALIGN); 2967 else 2968 align_functions = 1 << i; 2969 } 2970 } 2971 2972 /* Default align_* from the processor table. */ 2973 if (align_loops == 0) 2974 { 2975 align_loops = processor_target_table[ix86_tune].align_loop; 2976 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip; 2977 } 2978 if (align_jumps == 0) 2979 { 2980 align_jumps = processor_target_table[ix86_tune].align_jump; 2981 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip; 2982 } 2983 if (align_functions == 0) 2984 { 2985 align_functions = processor_target_table[ix86_tune].align_func; 2986 } 2987 2988 /* Validate -mbranch-cost= value, or provide default. */ 2989 ix86_branch_cost = ix86_cost->branch_cost; 2990 if (ix86_branch_cost_string) 2991 { 2992 i = atoi (ix86_branch_cost_string); 2993 if (i < 0 || i > 5) 2994 error ("%sbranch-cost=%d%s is not between 0 and 5", prefix, i, suffix); 2995 else 2996 ix86_branch_cost = i; 2997 } 2998 if (ix86_section_threshold_string) 2999 { 3000 i = atoi (ix86_section_threshold_string); 3001 if (i < 0) 3002 error ("%slarge-data-threshold=%d%s is negative", prefix, i, suffix); 3003 else 3004 ix86_section_threshold = i; 3005 } 3006 3007 if (ix86_tls_dialect_string) 3008 { 3009 if (strcmp (ix86_tls_dialect_string, "gnu") == 0) 3010 ix86_tls_dialect = TLS_DIALECT_GNU; 3011 else if (strcmp (ix86_tls_dialect_string, "gnu2") == 0) 3012 ix86_tls_dialect = TLS_DIALECT_GNU2; 3013 else if (strcmp (ix86_tls_dialect_string, "sun") == 0) 3014 ix86_tls_dialect = TLS_DIALECT_SUN; 3015 else 3016 error ("bad value (%s) for %stls-dialect=%s %s", 3017 ix86_tls_dialect_string, prefix, suffix, sw); 3018 } 3019 3020 if (ix87_precision_string) 3021 { 3022 i = atoi (ix87_precision_string); 3023 if (i != 32 && i != 64 && i != 80) 3024 error ("pc%d is not valid precision setting (32, 64 or 80)", i); 3025 } 3026 3027 if (TARGET_64BIT) 3028 { 3029 target_flags |= TARGET_SUBTARGET64_DEFAULT & ~target_flags_explicit; 3030 3031 /* Enable by default the SSE and MMX builtins. Do allow the user to 3032 explicitly disable any of these. In particular, disabling SSE and 3033 MMX for kernel code is extremely useful. */ 3034 if (!ix86_arch_specified) 3035 ix86_isa_flags 3036 |= ((OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_MMX 3037 | TARGET_SUBTARGET64_ISA_DEFAULT) & ~ix86_isa_flags_explicit); 3038 3039 if (TARGET_RTD) 3040 warning (0, "%srtd%s is ignored in 64bit mode", prefix, suffix); 3041 } 3042 else 3043 { 3044 target_flags |= TARGET_SUBTARGET32_DEFAULT & ~target_flags_explicit; 3045 3046 if (!ix86_arch_specified) 3047 ix86_isa_flags 3048 |= TARGET_SUBTARGET32_ISA_DEFAULT & ~ix86_isa_flags_explicit; 3049 3050 /* i386 ABI does not specify red zone. It still makes sense to use it 3051 when programmer takes care to stack from being destroyed. */ 3052 if (!(target_flags_explicit & MASK_NO_RED_ZONE)) 3053 target_flags |= MASK_NO_RED_ZONE; 3054 } 3055 3056 /* Keep nonleaf frame pointers. */ 3057 if (flag_omit_frame_pointer) 3058 target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER; 3059 else if (TARGET_OMIT_LEAF_FRAME_POINTER) 3060 flag_omit_frame_pointer = 1; 3061 3062 /* If we're doing fast math, we don't care about comparison order 3063 wrt NaNs. This lets us use a shorter comparison sequence. */ 3064 if (flag_finite_math_only) 3065 target_flags &= ~MASK_IEEE_FP; 3066 3067 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387, 3068 since the insns won't need emulation. */ 3069 if (x86_arch_always_fancy_math_387 & ix86_arch_mask) 3070 target_flags &= ~MASK_NO_FANCY_MATH_387; 3071 3072 /* Likewise, if the target doesn't have a 387, or we've specified 3073 software floating point, don't use 387 inline intrinsics. */ 3074 if (!TARGET_80387) 3075 target_flags |= MASK_NO_FANCY_MATH_387; 3076 3077 /* Turn on MMX builtins for -msse. */ 3078 if (TARGET_SSE) 3079 { 3080 ix86_isa_flags |= OPTION_MASK_ISA_MMX & ~ix86_isa_flags_explicit; 3081 x86_prefetch_sse = true; 3082 } 3083 3084 /* Turn on popcnt instruction for -msse4.2 or -mabm. */ 3085 if (TARGET_SSE4_2 || TARGET_ABM) 3086 ix86_isa_flags |= OPTION_MASK_ISA_POPCNT & ~ix86_isa_flags_explicit; 3087 3088 if (!TARGET_64BIT && TARGET_SAVE_ARGS) 3089 error ("-msave-args makes no sense in the 32-bit mode"); 3090 3091 /* Validate -mpreferred-stack-boundary= value or default it to 3092 PREFERRED_STACK_BOUNDARY_DEFAULT. */ 3093 ix86_preferred_stack_boundary = PREFERRED_STACK_BOUNDARY_DEFAULT; 3094 if (ix86_preferred_stack_boundary_string) 3095 { 3096 i = atoi (ix86_preferred_stack_boundary_string); 3097 if (i < (TARGET_64BIT ? 4 : 2) || i > 12) 3098 error ("%spreferred-stack-boundary=%d%s is not between %d and 12", 3099 prefix, i, suffix, TARGET_64BIT ? 4 : 2); 3100 else 3101 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT; 3102 } 3103 3104 /* Set the default value for -mstackrealign. */ 3105 if (ix86_force_align_arg_pointer == -1) 3106 ix86_force_align_arg_pointer = STACK_REALIGN_DEFAULT; 3107 3108 /* Validate -mincoming-stack-boundary= value or default it to 3109 MIN_STACK_BOUNDARY/PREFERRED_STACK_BOUNDARY. */ 3110 if (ix86_force_align_arg_pointer) 3111 ix86_default_incoming_stack_boundary = MIN_STACK_BOUNDARY; 3112 else 3113 ix86_default_incoming_stack_boundary = PREFERRED_STACK_BOUNDARY; 3114 ix86_incoming_stack_boundary = ix86_default_incoming_stack_boundary; 3115 if (ix86_incoming_stack_boundary_string) 3116 { 3117 i = atoi (ix86_incoming_stack_boundary_string); 3118 if (i < (TARGET_64BIT ? 4 : 2) || i > 12) 3119 error ("-mincoming-stack-boundary=%d is not between %d and 12", 3120 i, TARGET_64BIT ? 4 : 2); 3121 else 3122 { 3123 ix86_user_incoming_stack_boundary = (1 << i) * BITS_PER_UNIT; 3124 ix86_incoming_stack_boundary 3125 = ix86_user_incoming_stack_boundary; 3126 } 3127 } 3128 3129 /* Accept -msseregparm only if at least SSE support is enabled. */ 3130 if (TARGET_SSEREGPARM 3131 && ! TARGET_SSE) 3132 error ("%ssseregparm%s used without SSE enabled", prefix, suffix); 3133 3134 ix86_fpmath = TARGET_FPMATH_DEFAULT; 3135 if (ix86_fpmath_string != 0) 3136 { 3137 if (! strcmp (ix86_fpmath_string, "387")) 3138 ix86_fpmath = FPMATH_387; 3139 else if (! strcmp (ix86_fpmath_string, "sse")) 3140 { 3141 if (!TARGET_SSE) 3142 { 3143 warning (0, "SSE instruction set disabled, using 387 arithmetics"); 3144 ix86_fpmath = FPMATH_387; 3145 } 3146 else 3147 ix86_fpmath = FPMATH_SSE; 3148 } 3149 else if (! strcmp (ix86_fpmath_string, "387,sse") 3150 || ! strcmp (ix86_fpmath_string, "387+sse") 3151 || ! strcmp (ix86_fpmath_string, "sse,387") 3152 || ! strcmp (ix86_fpmath_string, "sse+387") 3153 || ! strcmp (ix86_fpmath_string, "both")) 3154 { 3155 if (!TARGET_SSE) 3156 { 3157 warning (0, "SSE instruction set disabled, using 387 arithmetics"); 3158 ix86_fpmath = FPMATH_387; 3159 } 3160 else if (!TARGET_80387) 3161 { 3162 warning (0, "387 instruction set disabled, using SSE arithmetics"); 3163 ix86_fpmath = FPMATH_SSE; 3164 } 3165 else 3166 ix86_fpmath = (enum fpmath_unit) (FPMATH_SSE | FPMATH_387); 3167 } 3168 else 3169 error ("bad value (%s) for %sfpmath=%s %s", 3170 ix86_fpmath_string, prefix, suffix, sw); 3171 } 3172 3173 /* If the i387 is disabled, then do not return values in it. */ 3174 if (!TARGET_80387) 3175 target_flags &= ~MASK_FLOAT_RETURNS; 3176 3177 /* Use external vectorized library in vectorizing intrinsics. */ 3178 if (ix86_veclibabi_string) 3179 { 3180 if (strcmp (ix86_veclibabi_string, "svml") == 0) 3181 ix86_veclib_handler = ix86_veclibabi_svml; 3182 else if (strcmp (ix86_veclibabi_string, "acml") == 0) 3183 ix86_veclib_handler = ix86_veclibabi_acml; 3184 else 3185 error ("unknown vectorization library ABI type (%s) for " 3186 "%sveclibabi=%s %s", ix86_veclibabi_string, 3187 prefix, suffix, sw); 3188 } 3189 3190 if ((x86_accumulate_outgoing_args & ix86_tune_mask) 3191 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS) 3192 && !optimize_size) 3193 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS; 3194 3195 /* ??? Unwind info is not correct around the CFG unless either a frame 3196 pointer is present or M_A_O_A is set. Fixing this requires rewriting 3197 unwind info generation to be aware of the CFG and propagating states 3198 around edges. */ 3199 if ((flag_unwind_tables || flag_asynchronous_unwind_tables 3200 || flag_exceptions || flag_non_call_exceptions) 3201 && flag_omit_frame_pointer 3202 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS)) 3203 { 3204 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS) 3205 warning (0, "unwind tables currently require either a frame pointer " 3206 "or %saccumulate-outgoing-args%s for correctness", 3207 prefix, suffix); 3208 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS; 3209 } 3210 3211 /* If stack probes are required, the space used for large function 3212 arguments on the stack must also be probed, so enable 3213 -maccumulate-outgoing-args so this happens in the prologue. */ 3214 if (TARGET_STACK_PROBE 3215 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS)) 3216 { 3217 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS) 3218 warning (0, "stack probing requires %saccumulate-outgoing-args%s " 3219 "for correctness", prefix, suffix); 3220 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS; 3221 } 3222 3223 /* For sane SSE instruction set generation we need fcomi instruction. 3224 It is safe to enable all CMOVE instructions. */ 3225 if (TARGET_SSE) 3226 TARGET_CMOVE = 1; 3227 3228 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */ 3229 { 3230 char *p; 3231 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0); 3232 p = strchr (internal_label_prefix, 'X'); 3233 internal_label_prefix_len = p - internal_label_prefix; 3234 *p = '\0'; 3235 } 3236 3237 /* When scheduling description is not available, disable scheduler pass 3238 so it won't slow down the compilation and make x87 code slower. */ 3239 if (!TARGET_SCHEDULE) 3240 flag_schedule_insns_after_reload = flag_schedule_insns = 0; 3241 3242 if (!PARAM_SET_P (PARAM_SIMULTANEOUS_PREFETCHES)) 3243 set_param_value ("simultaneous-prefetches", 3244 ix86_cost->simultaneous_prefetches); 3245 if (!PARAM_SET_P (PARAM_L1_CACHE_LINE_SIZE)) 3246 set_param_value ("l1-cache-line-size", ix86_cost->prefetch_block); 3247 if (!PARAM_SET_P (PARAM_L1_CACHE_SIZE)) 3248 set_param_value ("l1-cache-size", ix86_cost->l1_cache_size); 3249 if (!PARAM_SET_P (PARAM_L2_CACHE_SIZE)) 3250 set_param_value ("l2-cache-size", ix86_cost->l2_cache_size); 3251 3252 /* If using typedef char *va_list, signal that __builtin_va_start (&ap, 0) 3253 can be optimized to ap = __builtin_next_arg (0). */ 3254 if (!TARGET_64BIT) 3255 targetm.expand_builtin_va_start = NULL; 3256 3257 if (TARGET_64BIT) 3258 { 3259 ix86_gen_leave = gen_leave_rex64; 3260 ix86_gen_pop1 = gen_popdi1; 3261 ix86_gen_add3 = gen_adddi3; 3262 ix86_gen_sub3 = gen_subdi3; 3263 ix86_gen_sub3_carry = gen_subdi3_carry_rex64; 3264 ix86_gen_one_cmpl2 = gen_one_cmpldi2; 3265 ix86_gen_monitor = gen_sse3_monitor64; 3266 ix86_gen_andsp = gen_anddi3; 3267 } 3268 else 3269 { 3270 ix86_gen_leave = gen_leave; 3271 ix86_gen_pop1 = gen_popsi1; 3272 ix86_gen_add3 = gen_addsi3; 3273 ix86_gen_sub3 = gen_subsi3; 3274 ix86_gen_sub3_carry = gen_subsi3_carry; 3275 ix86_gen_one_cmpl2 = gen_one_cmplsi2; 3276 ix86_gen_monitor = gen_sse3_monitor; 3277 ix86_gen_andsp = gen_andsi3; 3278 } 3279 3280 #ifdef USE_IX86_CLD 3281 /* Use -mcld by default for 32-bit code if configured with --enable-cld. */ 3282 if (!TARGET_64BIT) 3283 target_flags |= MASK_CLD & ~target_flags_explicit; 3284 #endif 3285 3286 /* Save the initial options in case the user does function specific options */ 3287 if (main_args_p) 3288 target_option_default_node = target_option_current_node 3289 = build_target_option_node (); 3290 } 3291 3292 /* Update register usage after having seen the compiler flags. */ 3293 3294 void 3295 ix86_conditional_register_usage (void) 3296 { 3297 int i; 3298 unsigned int j; 3299 3300 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++) 3301 { 3302 if (fixed_regs[i] > 1) 3303 fixed_regs[i] = (fixed_regs[i] == (TARGET_64BIT ? 3 : 2)); 3304 if (call_used_regs[i] > 1) 3305 call_used_regs[i] = (call_used_regs[i] == (TARGET_64BIT ? 3 : 2)); 3306 } 3307 3308 /* The PIC register, if it exists, is fixed. */ 3309 j = PIC_OFFSET_TABLE_REGNUM; 3310 if (j != INVALID_REGNUM) 3311 fixed_regs[j] = call_used_regs[j] = 1; 3312 3313 /* The MS_ABI changes the set of call-used registers. */ 3314 if (TARGET_64BIT && ix86_cfun_abi () == MS_ABI) 3315 { 3316 call_used_regs[SI_REG] = 0; 3317 call_used_regs[DI_REG] = 0; 3318 call_used_regs[XMM6_REG] = 0; 3319 call_used_regs[XMM7_REG] = 0; 3320 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++) 3321 call_used_regs[i] = 0; 3322 } 3323 3324 /* The default setting of CLOBBERED_REGS is for 32-bit; add in the 3325 other call-clobbered regs for 64-bit. */ 3326 if (TARGET_64BIT) 3327 { 3328 CLEAR_HARD_REG_SET (reg_class_contents[(int)CLOBBERED_REGS]); 3329 3330 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++) 3331 if (TEST_HARD_REG_BIT (reg_class_contents[(int)GENERAL_REGS], i) 3332 && call_used_regs[i]) 3333 SET_HARD_REG_BIT (reg_class_contents[(int)CLOBBERED_REGS], i); 3334 } 3335 3336 /* If MMX is disabled, squash the registers. */ 3337 if (! TARGET_MMX) 3338 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++) 3339 if (TEST_HARD_REG_BIT (reg_class_contents[(int)MMX_REGS], i)) 3340 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = ""; 3341 3342 /* If SSE is disabled, squash the registers. */ 3343 if (! TARGET_SSE) 3344 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++) 3345 if (TEST_HARD_REG_BIT (reg_class_contents[(int)SSE_REGS], i)) 3346 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = ""; 3347 3348 /* If the FPU is disabled, squash the registers. */ 3349 if (! (TARGET_80387 || TARGET_FLOAT_RETURNS_IN_80387)) 3350 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++) 3351 if (TEST_HARD_REG_BIT (reg_class_contents[(int)FLOAT_REGS], i)) 3352 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = ""; 3353 3354 /* If 32-bit, squash the 64-bit registers. */ 3355 if (! TARGET_64BIT) 3356 { 3357 for (i = FIRST_REX_INT_REG; i <= LAST_REX_INT_REG; i++) 3358 reg_names[i] = ""; 3359 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++) 3360 reg_names[i] = ""; 3361 } 3362 } 3363 3364 3365 /* Save the current options */ 3366 3367 static void 3368 ix86_function_specific_save (struct cl_target_option *ptr) 3369 { 3370 gcc_assert (IN_RANGE (ix86_arch, 0, 255)); 3371 gcc_assert (IN_RANGE (ix86_schedule, 0, 255)); 3372 gcc_assert (IN_RANGE (ix86_tune, 0, 255)); 3373 gcc_assert (IN_RANGE (ix86_fpmath, 0, 255)); 3374 gcc_assert (IN_RANGE (ix86_branch_cost, 0, 255)); 3375 3376 ptr->arch = ix86_arch; 3377 ptr->schedule = ix86_schedule; 3378 ptr->tune = ix86_tune; 3379 ptr->fpmath = ix86_fpmath; 3380 ptr->branch_cost = ix86_branch_cost; 3381 ptr->tune_defaulted = ix86_tune_defaulted; 3382 ptr->arch_specified = ix86_arch_specified; 3383 ptr->ix86_isa_flags_explicit = ix86_isa_flags_explicit; 3384 ptr->target_flags_explicit = target_flags_explicit; 3385 } 3386 3387 /* Restore the current options */ 3388 3389 static void 3390 ix86_function_specific_restore (struct cl_target_option *ptr) 3391 { 3392 enum processor_type old_tune = ix86_tune; 3393 enum processor_type old_arch = ix86_arch; 3394 unsigned int ix86_arch_mask, ix86_tune_mask; 3395 int i; 3396 3397 ix86_arch = ptr->arch; 3398 ix86_schedule = ptr->schedule; 3399 ix86_tune = ptr->tune; 3400 ix86_fpmath = ptr->fpmath; 3401 ix86_branch_cost = ptr->branch_cost; 3402 ix86_tune_defaulted = ptr->tune_defaulted; 3403 ix86_arch_specified = ptr->arch_specified; 3404 ix86_isa_flags_explicit = ptr->ix86_isa_flags_explicit; 3405 target_flags_explicit = ptr->target_flags_explicit; 3406 3407 /* Recreate the arch feature tests if the arch changed */ 3408 if (old_arch != ix86_arch) 3409 { 3410 ix86_arch_mask = 1u << ix86_arch; 3411 for (i = 0; i < X86_ARCH_LAST; ++i) 3412 ix86_arch_features[i] 3413 = !!(initial_ix86_arch_features[i] & ix86_arch_mask); 3414 } 3415 3416 /* Recreate the tune optimization tests */ 3417 if (old_tune != ix86_tune) 3418 { 3419 ix86_tune_mask = 1u << ix86_tune; 3420 for (i = 0; i < X86_TUNE_LAST; ++i) 3421 ix86_tune_features[i] 3422 = !!(initial_ix86_tune_features[i] & ix86_tune_mask); 3423 } 3424 } 3425 3426 /* Print the current options */ 3427 3428 static void 3429 ix86_function_specific_print (FILE *file, int indent, 3430 struct cl_target_option *ptr) 3431 { 3432 char *target_string 3433 = ix86_target_string (ptr->ix86_isa_flags, ptr->target_flags, 3434 NULL, NULL, NULL, false); 3435 3436 fprintf (file, "%*sarch = %d (%s)\n", 3437 indent, "", 3438 ptr->arch, 3439 ((ptr->arch < TARGET_CPU_DEFAULT_max) 3440 ? cpu_names[ptr->arch] 3441 : "<unknown>")); 3442 3443 fprintf (file, "%*stune = %d (%s)\n", 3444 indent, "", 3445 ptr->tune, 3446 ((ptr->tune < TARGET_CPU_DEFAULT_max) 3447 ? cpu_names[ptr->tune] 3448 : "<unknown>")); 3449 3450 fprintf (file, "%*sfpmath = %d%s%s\n", indent, "", ptr->fpmath, 3451 (ptr->fpmath & FPMATH_387) ? ", 387" : "", 3452 (ptr->fpmath & FPMATH_SSE) ? ", sse" : ""); 3453 fprintf (file, "%*sbranch_cost = %d\n", indent, "", ptr->branch_cost); 3454 3455 if (target_string) 3456 { 3457 fprintf (file, "%*s%s\n", indent, "", target_string); 3458 free (target_string); 3459 } 3460 } 3461 3462 3463 /* Inner function to process the attribute((target(...))), take an argument and 3464 set the current options from the argument. If we have a list, recursively go 3465 over the list. */ 3466 3467 static bool 3468 ix86_valid_target_attribute_inner_p (tree args, char *p_strings[]) 3469 { 3470 char *next_optstr; 3471 bool ret = true; 3472 3473 #define IX86_ATTR_ISA(S,O) { S, sizeof (S)-1, ix86_opt_isa, O, 0 } 3474 #define IX86_ATTR_STR(S,O) { S, sizeof (S)-1, ix86_opt_str, O, 0 } 3475 #define IX86_ATTR_YES(S,O,M) { S, sizeof (S)-1, ix86_opt_yes, O, M } 3476 #define IX86_ATTR_NO(S,O,M) { S, sizeof (S)-1, ix86_opt_no, O, M } 3477 3478 enum ix86_opt_type 3479 { 3480 ix86_opt_unknown, 3481 ix86_opt_yes, 3482 ix86_opt_no, 3483 ix86_opt_str, 3484 ix86_opt_isa 3485 }; 3486 3487 static const struct 3488 { 3489 const char *string; 3490 size_t len; 3491 enum ix86_opt_type type; 3492 int opt; 3493 int mask; 3494 } attrs[] = { 3495 /* isa options */ 3496 IX86_ATTR_ISA ("3dnow", OPT_m3dnow), 3497 IX86_ATTR_ISA ("abm", OPT_mabm), 3498 IX86_ATTR_ISA ("aes", OPT_maes), 3499 IX86_ATTR_ISA ("avx", OPT_mavx), 3500 IX86_ATTR_ISA ("mmx", OPT_mmmx), 3501 IX86_ATTR_ISA ("pclmul", OPT_mpclmul), 3502 IX86_ATTR_ISA ("popcnt", OPT_mpopcnt), 3503 IX86_ATTR_ISA ("sse", OPT_msse), 3504 IX86_ATTR_ISA ("sse2", OPT_msse2), 3505 IX86_ATTR_ISA ("sse3", OPT_msse3), 3506 IX86_ATTR_ISA ("sse4", OPT_msse4), 3507 IX86_ATTR_ISA ("sse4.1", OPT_msse4_1), 3508 IX86_ATTR_ISA ("sse4.2", OPT_msse4_2), 3509 IX86_ATTR_ISA ("sse4a", OPT_msse4a), 3510 IX86_ATTR_ISA ("sse5", OPT_msse5), 3511 IX86_ATTR_ISA ("ssse3", OPT_mssse3), 3512 3513 /* string options */ 3514 IX86_ATTR_STR ("arch=", IX86_FUNCTION_SPECIFIC_ARCH), 3515 IX86_ATTR_STR ("fpmath=", IX86_FUNCTION_SPECIFIC_FPMATH), 3516 IX86_ATTR_STR ("tune=", IX86_FUNCTION_SPECIFIC_TUNE), 3517 3518 /* flag options */ 3519 IX86_ATTR_YES ("cld", 3520 OPT_mcld, 3521 MASK_CLD), 3522 3523 IX86_ATTR_NO ("fancy-math-387", 3524 OPT_mfancy_math_387, 3525 MASK_NO_FANCY_MATH_387), 3526 3527 IX86_ATTR_NO ("fused-madd", 3528 OPT_mfused_madd, 3529 MASK_NO_FUSED_MADD), 3530 3531 IX86_ATTR_YES ("ieee-fp", 3532 OPT_mieee_fp, 3533 MASK_IEEE_FP), 3534 3535 IX86_ATTR_YES ("inline-all-stringops", 3536 OPT_minline_all_stringops, 3537 MASK_INLINE_ALL_STRINGOPS), 3538 3539 IX86_ATTR_YES ("inline-stringops-dynamically", 3540 OPT_minline_stringops_dynamically, 3541 MASK_INLINE_STRINGOPS_DYNAMICALLY), 3542 3543 IX86_ATTR_NO ("align-stringops", 3544 OPT_mno_align_stringops, 3545 MASK_NO_ALIGN_STRINGOPS), 3546 3547 IX86_ATTR_YES ("recip", 3548 OPT_mrecip, 3549 MASK_RECIP), 3550 3551 }; 3552 3553 /* If this is a list, recurse to get the options. */ 3554 if (TREE_CODE (args) == TREE_LIST) 3555 { 3556 bool ret = true; 3557 3558 for (; args; args = TREE_CHAIN (args)) 3559 if (TREE_VALUE (args) 3560 && !ix86_valid_target_attribute_inner_p (TREE_VALUE (args), p_strings)) 3561 ret = false; 3562 3563 return ret; 3564 } 3565 3566 else if (TREE_CODE (args) != STRING_CST) 3567 gcc_unreachable (); 3568 3569 /* Handle multiple arguments separated by commas. */ 3570 next_optstr = ASTRDUP (TREE_STRING_POINTER (args)); 3571 3572 while (next_optstr && *next_optstr != '\0') 3573 { 3574 char *p = next_optstr; 3575 char *orig_p = p; 3576 char *comma = strchr (next_optstr, ','); 3577 const char *opt_string; 3578 size_t len, opt_len; 3579 int opt; 3580 bool opt_set_p; 3581 char ch; 3582 unsigned i; 3583 enum ix86_opt_type type = ix86_opt_unknown; 3584 int mask = 0; 3585 3586 if (comma) 3587 { 3588 *comma = '\0'; 3589 len = comma - next_optstr; 3590 next_optstr = comma + 1; 3591 } 3592 else 3593 { 3594 len = strlen (p); 3595 next_optstr = NULL; 3596 } 3597 3598 /* Recognize no-xxx. */ 3599 if (len > 3 && p[0] == 'n' && p[1] == 'o' && p[2] == '-') 3600 { 3601 opt_set_p = false; 3602 p += 3; 3603 len -= 3; 3604 } 3605 else 3606 opt_set_p = true; 3607 3608 /* Find the option. */ 3609 ch = *p; 3610 opt = N_OPTS; 3611 for (i = 0; i < ARRAY_SIZE (attrs); i++) 3612 { 3613 type = attrs[i].type; 3614 opt_len = attrs[i].len; 3615 if (ch == attrs[i].string[0] 3616 && ((type != ix86_opt_str) ? len == opt_len : len > opt_len) 3617 && memcmp (p, attrs[i].string, opt_len) == 0) 3618 { 3619 opt = attrs[i].opt; 3620 mask = attrs[i].mask; 3621 opt_string = attrs[i].string; 3622 break; 3623 } 3624 } 3625 3626 /* Process the option. */ 3627 if (opt == N_OPTS) 3628 { 3629 error ("attribute(target(\"%s\")) is unknown", orig_p); 3630 ret = false; 3631 } 3632 3633 else if (type == ix86_opt_isa) 3634 ix86_handle_option (opt, p, opt_set_p); 3635 3636 else if (type == ix86_opt_yes || type == ix86_opt_no) 3637 { 3638 if (type == ix86_opt_no) 3639 opt_set_p = !opt_set_p; 3640 3641 if (opt_set_p) 3642 target_flags |= mask; 3643 else 3644 target_flags &= ~mask; 3645 } 3646 3647 else if (type == ix86_opt_str) 3648 { 3649 if (p_strings[opt]) 3650 { 3651 error ("option(\"%s\") was already specified", opt_string); 3652 ret = false; 3653 } 3654 else 3655 p_strings[opt] = xstrdup (p + opt_len); 3656 } 3657 3658 else 3659 gcc_unreachable (); 3660 } 3661 3662 return ret; 3663 } 3664 3665 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */ 3666 3667 tree 3668 ix86_valid_target_attribute_tree (tree args) 3669 { 3670 const char *orig_arch_string = ix86_arch_string; 3671 const char *orig_tune_string = ix86_tune_string; 3672 const char *orig_fpmath_string = ix86_fpmath_string; 3673 int orig_tune_defaulted = ix86_tune_defaulted; 3674 int orig_arch_specified = ix86_arch_specified; 3675 char *option_strings[IX86_FUNCTION_SPECIFIC_MAX] = { NULL, NULL, NULL }; 3676 tree t = NULL_TREE; 3677 int i; 3678 struct cl_target_option *def 3679 = TREE_TARGET_OPTION (target_option_default_node); 3680 3681 /* Process each of the options on the chain. */ 3682 if (! ix86_valid_target_attribute_inner_p (args, option_strings)) 3683 return NULL_TREE; 3684 3685 /* If the changed options are different from the default, rerun override_options, 3686 and then save the options away. The string options are are attribute options, 3687 and will be undone when we copy the save structure. */ 3688 if (ix86_isa_flags != def->ix86_isa_flags 3689 || target_flags != def->target_flags 3690 || option_strings[IX86_FUNCTION_SPECIFIC_ARCH] 3691 || option_strings[IX86_FUNCTION_SPECIFIC_TUNE] 3692 || option_strings[IX86_FUNCTION_SPECIFIC_FPMATH]) 3693 { 3694 /* If we are using the default tune= or arch=, undo the string assigned, 3695 and use the default. */ 3696 if (option_strings[IX86_FUNCTION_SPECIFIC_ARCH]) 3697 ix86_arch_string = option_strings[IX86_FUNCTION_SPECIFIC_ARCH]; 3698 else if (!orig_arch_specified) 3699 ix86_arch_string = NULL; 3700 3701 if (option_strings[IX86_FUNCTION_SPECIFIC_TUNE]) 3702 ix86_tune_string = option_strings[IX86_FUNCTION_SPECIFIC_TUNE]; 3703 else if (orig_tune_defaulted) 3704 ix86_tune_string = NULL; 3705 3706 /* If fpmath= is not set, and we now have sse2 on 32-bit, use it. */ 3707 if (option_strings[IX86_FUNCTION_SPECIFIC_FPMATH]) 3708 ix86_fpmath_string = option_strings[IX86_FUNCTION_SPECIFIC_FPMATH]; 3709 else if (!TARGET_64BIT && TARGET_SSE) 3710 ix86_fpmath_string = "sse,387"; 3711 3712 /* Do any overrides, such as arch=xxx, or tune=xxx support. */ 3713 override_options (false); 3714 3715 /* Add any builtin functions with the new isa if any. */ 3716 ix86_add_new_builtins (ix86_isa_flags); 3717 3718 /* Save the current options unless we are validating options for 3719 #pragma. */ 3720 t = build_target_option_node (); 3721 3722 ix86_arch_string = orig_arch_string; 3723 ix86_tune_string = orig_tune_string; 3724 ix86_fpmath_string = orig_fpmath_string; 3725 3726 /* Free up memory allocated to hold the strings */ 3727 for (i = 0; i < IX86_FUNCTION_SPECIFIC_MAX; i++) 3728 if (option_strings[i]) 3729 free (option_strings[i]); 3730 } 3731 3732 return t; 3733 } 3734 3735 /* Hook to validate attribute((target("string"))). */ 3736 3737 static bool 3738 ix86_valid_target_attribute_p (tree fndecl, 3739 tree ARG_UNUSED (name), 3740 tree args, 3741 int ARG_UNUSED (flags)) 3742 { 3743 struct cl_target_option cur_target; 3744 bool ret = true; 3745 tree old_optimize = build_optimization_node (); 3746 tree new_target, new_optimize; 3747 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl); 3748 3749 /* If the function changed the optimization levels as well as setting target 3750 options, start with the optimizations specified. */ 3751 if (func_optimize && func_optimize != old_optimize) 3752 cl_optimization_restore (TREE_OPTIMIZATION (func_optimize)); 3753 3754 /* The target attributes may also change some optimization flags, so update 3755 the optimization options if necessary. */ 3756 cl_target_option_save (&cur_target); 3757 new_target = ix86_valid_target_attribute_tree (args); 3758 new_optimize = build_optimization_node (); 3759 3760 if (!new_target) 3761 ret = false; 3762 3763 else if (fndecl) 3764 { 3765 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target; 3766 3767 if (old_optimize != new_optimize) 3768 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize; 3769 } 3770 3771 cl_target_option_restore (&cur_target); 3772 3773 if (old_optimize != new_optimize) 3774 cl_optimization_restore (TREE_OPTIMIZATION (old_optimize)); 3775 3776 return ret; 3777 } 3778 3779 3780 /* Hook to determine if one function can safely inline another. */ 3781 3782 static bool 3783 ix86_can_inline_p (tree caller, tree callee) 3784 { 3785 bool ret = false; 3786 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller); 3787 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee); 3788 3789 /* If callee has no option attributes, then it is ok to inline. */ 3790 if (!callee_tree) 3791 ret = true; 3792 3793 /* If caller has no option attributes, but callee does then it is not ok to 3794 inline. */ 3795 else if (!caller_tree) 3796 ret = false; 3797 3798 else 3799 { 3800 struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree); 3801 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree); 3802 3803 /* Callee's isa options should a subset of the caller's, i.e. a SSE5 function 3804 can inline a SSE2 function but a SSE2 function can't inline a SSE5 3805 function. */ 3806 if ((caller_opts->ix86_isa_flags & callee_opts->ix86_isa_flags) 3807 != callee_opts->ix86_isa_flags) 3808 ret = false; 3809 3810 /* See if we have the same non-isa options. */ 3811 else if (caller_opts->target_flags != callee_opts->target_flags) 3812 ret = false; 3813 3814 /* See if arch, tune, etc. are the same. */ 3815 else if (caller_opts->arch != callee_opts->arch) 3816 ret = false; 3817 3818 else if (caller_opts->tune != callee_opts->tune) 3819 ret = false; 3820 3821 else if (caller_opts->fpmath != callee_opts->fpmath) 3822 ret = false; 3823 3824 else if (caller_opts->branch_cost != callee_opts->branch_cost) 3825 ret = false; 3826 3827 else 3828 ret = true; 3829 } 3830 3831 return ret; 3832 } 3833 3834 3835 /* Remember the last target of ix86_set_current_function. */ 3836 static GTY(()) tree ix86_previous_fndecl; 3837 3838 /* Establish appropriate back-end context for processing the function 3839 FNDECL. The argument might be NULL to indicate processing at top 3840 level, outside of any function scope. */ 3841 static void 3842 ix86_set_current_function (tree fndecl) 3843 { 3844 /* Only change the context if the function changes. This hook is called 3845 several times in the course of compiling a function, and we don't want to 3846 slow things down too much or call target_reinit when it isn't safe. */ 3847 if (fndecl && fndecl != ix86_previous_fndecl) 3848 { 3849 tree old_tree = (ix86_previous_fndecl 3850 ? DECL_FUNCTION_SPECIFIC_TARGET (ix86_previous_fndecl) 3851 : NULL_TREE); 3852 3853 tree new_tree = (fndecl 3854 ? DECL_FUNCTION_SPECIFIC_TARGET (fndecl) 3855 : NULL_TREE); 3856 3857 ix86_previous_fndecl = fndecl; 3858 if (old_tree == new_tree) 3859 ; 3860 3861 else if (new_tree) 3862 { 3863 cl_target_option_restore (TREE_TARGET_OPTION (new_tree)); 3864 target_reinit (); 3865 } 3866 3867 else if (old_tree) 3868 { 3869 struct cl_target_option *def 3870 = TREE_TARGET_OPTION (target_option_current_node); 3871 3872 cl_target_option_restore (def); 3873 target_reinit (); 3874 } 3875 } 3876 } 3877 3878 3879 /* Return true if this goes in large data/bss. */ 3880 3881 static bool 3882 ix86_in_large_data_p (tree exp) 3883 { 3884 if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC) 3885 return false; 3886 3887 /* Functions are never large data. */ 3888 if (TREE_CODE (exp) == FUNCTION_DECL) 3889 return false; 3890 3891 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp)) 3892 { 3893 const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp)); 3894 if (strcmp (section, ".ldata") == 0 3895 || strcmp (section, ".lbss") == 0) 3896 return true; 3897 return false; 3898 } 3899 else 3900 { 3901 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp)); 3902 3903 /* If this is an incomplete type with size 0, then we can't put it 3904 in data because it might be too big when completed. */ 3905 if (!size || size > ix86_section_threshold) 3906 return true; 3907 } 3908 3909 return false; 3910 } 3911 3912 /* Switch to the appropriate section for output of DECL. 3913 DECL is either a `VAR_DECL' node or a constant of some sort. 3914 RELOC indicates whether forming the initial value of DECL requires 3915 link-time relocations. */ 3916 3917 static section * x86_64_elf_select_section (tree, int, unsigned HOST_WIDE_INT) 3918 ATTRIBUTE_UNUSED; 3919 3920 static section * 3921 x86_64_elf_select_section (tree decl, int reloc, 3922 unsigned HOST_WIDE_INT align) 3923 { 3924 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC) 3925 && ix86_in_large_data_p (decl)) 3926 { 3927 const char *sname = NULL; 3928 unsigned int flags = SECTION_WRITE; 3929 switch (categorize_decl_for_section (decl, reloc)) 3930 { 3931 case SECCAT_DATA: 3932 sname = ".ldata"; 3933 break; 3934 case SECCAT_DATA_REL: 3935 sname = ".ldata.rel"; 3936 break; 3937 case SECCAT_DATA_REL_LOCAL: 3938 sname = ".ldata.rel.local"; 3939 break; 3940 case SECCAT_DATA_REL_RO: 3941 sname = ".ldata.rel.ro"; 3942 break; 3943 case SECCAT_DATA_REL_RO_LOCAL: 3944 sname = ".ldata.rel.ro.local"; 3945 break; 3946 case SECCAT_BSS: 3947 sname = ".lbss"; 3948 flags |= SECTION_BSS; 3949 break; 3950 case SECCAT_RODATA: 3951 case SECCAT_RODATA_MERGE_STR: 3952 case SECCAT_RODATA_MERGE_STR_INIT: 3953 case SECCAT_RODATA_MERGE_CONST: 3954 sname = ".lrodata"; 3955 flags = 0; 3956 break; 3957 case SECCAT_SRODATA: 3958 case SECCAT_SDATA: 3959 case SECCAT_SBSS: 3960 gcc_unreachable (); 3961 case SECCAT_TEXT: 3962 case SECCAT_TDATA: 3963 case SECCAT_TBSS: 3964 /* We don't split these for medium model. Place them into 3965 default sections and hope for best. */ 3966 break; 3967 case SECCAT_EMUTLS_VAR: 3968 case SECCAT_EMUTLS_TMPL: 3969 gcc_unreachable (); 3970 } 3971 if (sname) 3972 { 3973 /* We might get called with string constants, but get_named_section 3974 doesn't like them as they are not DECLs. Also, we need to set 3975 flags in that case. */ 3976 if (!DECL_P (decl)) 3977 return get_section (sname, flags, NULL); 3978 return get_named_section (decl, sname, reloc); 3979 } 3980 } 3981 return default_elf_select_section (decl, reloc, align); 3982 } 3983 3984 /* Build up a unique section name, expressed as a 3985 STRING_CST node, and assign it to DECL_SECTION_NAME (decl). 3986 RELOC indicates whether the initial value of EXP requires 3987 link-time relocations. */ 3988 3989 static void ATTRIBUTE_UNUSED 3990 x86_64_elf_unique_section (tree decl, int reloc) 3991 { 3992 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC) 3993 && ix86_in_large_data_p (decl)) 3994 { 3995 const char *prefix = NULL; 3996 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */ 3997 bool one_only = DECL_ONE_ONLY (decl) && !HAVE_COMDAT_GROUP; 3998 3999 switch (categorize_decl_for_section (decl, reloc)) 4000 { 4001 case SECCAT_DATA: 4002 case SECCAT_DATA_REL: 4003 case SECCAT_DATA_REL_LOCAL: 4004 case SECCAT_DATA_REL_RO: 4005 case SECCAT_DATA_REL_RO_LOCAL: 4006 prefix = one_only ? ".ld" : ".ldata"; 4007 break; 4008 case SECCAT_BSS: 4009 prefix = one_only ? ".lb" : ".lbss"; 4010 break; 4011 case SECCAT_RODATA: 4012 case SECCAT_RODATA_MERGE_STR: 4013 case SECCAT_RODATA_MERGE_STR_INIT: 4014 case SECCAT_RODATA_MERGE_CONST: 4015 prefix = one_only ? ".lr" : ".lrodata"; 4016 break; 4017 case SECCAT_SRODATA: 4018 case SECCAT_SDATA: 4019 case SECCAT_SBSS: 4020 gcc_unreachable (); 4021 case SECCAT_TEXT: 4022 case SECCAT_TDATA: 4023 case SECCAT_TBSS: 4024 /* We don't split these for medium model. Place them into 4025 default sections and hope for best. */ 4026 break; 4027 case SECCAT_EMUTLS_VAR: 4028 prefix = targetm.emutls.var_section; 4029 break; 4030 case SECCAT_EMUTLS_TMPL: 4031 prefix = targetm.emutls.tmpl_section; 4032 break; 4033 } 4034 if (prefix) 4035 { 4036 const char *name, *linkonce; 4037 char *string; 4038 4039 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl)); 4040 name = targetm.strip_name_encoding (name); 4041 4042 /* If we're using one_only, then there needs to be a .gnu.linkonce 4043 prefix to the section name. */ 4044 linkonce = one_only ? ".gnu.linkonce" : ""; 4045 4046 string = ACONCAT ((linkonce, prefix, ".", name, NULL)); 4047 4048 DECL_SECTION_NAME (decl) = build_string (strlen (string), string); 4049 return; 4050 } 4051 } 4052 default_unique_section (decl, reloc); 4053 } 4054 4055 #ifdef COMMON_ASM_OP 4056 /* This says how to output assembler code to declare an 4057 uninitialized external linkage data object. 4058 4059 For medium model x86-64 we need to use .largecomm opcode for 4060 large objects. */ 4061 void 4062 x86_elf_aligned_common (FILE *file, 4063 const char *name, unsigned HOST_WIDE_INT size, 4064 int align) 4065 { 4066 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC) 4067 && size > (unsigned int)ix86_section_threshold) 4068 fprintf (file, ".largecomm\t"); 4069 else 4070 fprintf (file, "%s", COMMON_ASM_OP); 4071 assemble_name (file, name); 4072 fprintf (file, ","HOST_WIDE_INT_PRINT_UNSIGNED",%u\n", 4073 size, align / BITS_PER_UNIT); 4074 } 4075 #endif 4076 4077 /* Utility function for targets to use in implementing 4078 ASM_OUTPUT_ALIGNED_BSS. */ 4079 4080 void 4081 x86_output_aligned_bss (FILE *file, tree decl ATTRIBUTE_UNUSED, 4082 const char *name, unsigned HOST_WIDE_INT size, 4083 int align) 4084 { 4085 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC) 4086 && size > (unsigned int)ix86_section_threshold) 4087 switch_to_section (get_named_section (decl, ".lbss", 0)); 4088 else 4089 switch_to_section (bss_section); 4090 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT)); 4091 #ifdef ASM_DECLARE_OBJECT_NAME 4092 last_assemble_variable_decl = decl; 4093 ASM_DECLARE_OBJECT_NAME (file, name, decl); 4094 #else 4095 /* Standard thing is just output label for the object. */ 4096 ASM_OUTPUT_LABEL (file, name); 4097 #endif /* ASM_DECLARE_OBJECT_NAME */ 4098 ASM_OUTPUT_SKIP (file, size ? size : 1); 4099 } 4100 4101 void 4102 optimization_options (int level, int size ATTRIBUTE_UNUSED) 4103 { 4104 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to 4105 make the problem with not enough registers even worse. */ 4106 #ifdef INSN_SCHEDULING 4107 if (level > 1) 4108 flag_schedule_insns = 0; 4109 #endif 4110 4111 if (TARGET_MACHO) 4112 /* The Darwin libraries never set errno, so we might as well 4113 avoid calling them when that's the only reason we would. */ 4114 flag_errno_math = 0; 4115 4116 /* The default values of these switches depend on the TARGET_64BIT 4117 that is not known at this moment. Mark these values with 2 and 4118 let user the to override these. In case there is no command line option 4119 specifying them, we will set the defaults in override_options. */ 4120 if (optimize >= 1) 4121 flag_omit_frame_pointer = 2; 4122 flag_pcc_struct_return = 2; 4123 flag_asynchronous_unwind_tables = 2; 4124 flag_vect_cost_model = 1; 4125 #ifdef SUBTARGET_OPTIMIZATION_OPTIONS 4126 SUBTARGET_OPTIMIZATION_OPTIONS; 4127 #endif 4128 } 4129 4130 /* Decide whether we can make a sibling call to a function. DECL is the 4131 declaration of the function being targeted by the call and EXP is the 4132 CALL_EXPR representing the call. */ 4133 4134 static bool 4135 ix86_function_ok_for_sibcall (tree decl, tree exp) 4136 { 4137 tree type, decl_or_type; 4138 rtx a, b; 4139 4140 /* If we are generating position-independent code, we cannot sibcall 4141 optimize any indirect call, or a direct call to a global function, 4142 as the PLT requires %ebx be live. */ 4143 if (!TARGET_64BIT && flag_pic && (!decl || !targetm.binds_local_p (decl))) 4144 return false; 4145 4146 /* If we need to align the outgoing stack, then sibcalling would 4147 unalign the stack, which may break the called function. */ 4148 if (ix86_incoming_stack_boundary < PREFERRED_STACK_BOUNDARY) 4149 return false; 4150 4151 if (decl) 4152 { 4153 decl_or_type = decl; 4154 type = TREE_TYPE (decl); 4155 } 4156 else 4157 { 4158 /* We're looking at the CALL_EXPR, we need the type of the function. */ 4159 type = CALL_EXPR_FN (exp); /* pointer expression */ 4160 type = TREE_TYPE (type); /* pointer type */ 4161 type = TREE_TYPE (type); /* function type */ 4162 decl_or_type = type; 4163 } 4164 4165 /* Check that the return value locations are the same. Like 4166 if we are returning floats on the 80387 register stack, we cannot 4167 make a sibcall from a function that doesn't return a float to a 4168 function that does or, conversely, from a function that does return 4169 a float to a function that doesn't; the necessary stack adjustment 4170 would not be executed. This is also the place we notice 4171 differences in the return value ABI. Note that it is ok for one 4172 of the functions to have void return type as long as the return 4173 value of the other is passed in a register. */ 4174 a = ix86_function_value (TREE_TYPE (exp), decl_or_type, false); 4175 b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)), 4176 cfun->decl, false); 4177 if (STACK_REG_P (a) || STACK_REG_P (b)) 4178 { 4179 if (!rtx_equal_p (a, b)) 4180 return false; 4181 } 4182 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl)))) 4183 ; 4184 else if (!rtx_equal_p (a, b)) 4185 return false; 4186 4187 if (TARGET_64BIT) 4188 { 4189 /* The SYSV ABI has more call-clobbered registers; 4190 disallow sibcalls from MS to SYSV. */ 4191 if (cfun->machine->call_abi == MS_ABI 4192 && ix86_function_type_abi (type) == SYSV_ABI) 4193 return false; 4194 } 4195 else 4196 { 4197 /* If this call is indirect, we'll need to be able to use a 4198 call-clobbered register for the address of the target function. 4199 Make sure that all such registers are not used for passing 4200 parameters. Note that DLLIMPORT functions are indirect. */ 4201 if (!decl 4202 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && DECL_DLLIMPORT_P (decl))) 4203 { 4204 if (ix86_function_regparm (type, NULL) >= 3) 4205 { 4206 /* ??? Need to count the actual number of registers to be used, 4207 not the possible number of registers. Fix later. */ 4208 return false; 4209 } 4210 } 4211 } 4212 4213 /* Otherwise okay. That also includes certain types of indirect calls. */ 4214 return true; 4215 } 4216 4217 /* Handle "cdecl", "stdcall", "fastcall", "regparm" and "sseregparm" 4218 calling convention attributes; 4219 arguments as in struct attribute_spec.handler. */ 4220 4221 static tree 4222 ix86_handle_cconv_attribute (tree *node, tree name, 4223 tree args, 4224 int flags ATTRIBUTE_UNUSED, 4225 bool *no_add_attrs) 4226 { 4227 if (TREE_CODE (*node) != FUNCTION_TYPE 4228 && TREE_CODE (*node) != METHOD_TYPE 4229 && TREE_CODE (*node) != FIELD_DECL 4230 && TREE_CODE (*node) != TYPE_DECL) 4231 { 4232 warning (OPT_Wattributes, "%qs attribute only applies to functions", 4233 IDENTIFIER_POINTER (name)); 4234 *no_add_attrs = true; 4235 return NULL_TREE; 4236 } 4237 4238 /* Can combine regparm with all attributes but fastcall. */ 4239 if (is_attribute_p ("regparm", name)) 4240 { 4241 tree cst; 4242 4243 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node))) 4244 { 4245 error ("fastcall and regparm attributes are not compatible"); 4246 } 4247 4248 cst = TREE_VALUE (args); 4249 if (TREE_CODE (cst) != INTEGER_CST) 4250 { 4251 warning (OPT_Wattributes, 4252 "%qs attribute requires an integer constant argument", 4253 IDENTIFIER_POINTER (name)); 4254 *no_add_attrs = true; 4255 } 4256 else if (compare_tree_int (cst, REGPARM_MAX) > 0) 4257 { 4258 warning (OPT_Wattributes, "argument to %qs attribute larger than %d", 4259 IDENTIFIER_POINTER (name), REGPARM_MAX); 4260 *no_add_attrs = true; 4261 } 4262 4263 return NULL_TREE; 4264 } 4265 4266 if (TARGET_64BIT) 4267 { 4268 /* Do not warn when emulating the MS ABI. */ 4269 if (TREE_CODE (*node) != FUNCTION_TYPE || ix86_function_type_abi (*node)!=MS_ABI) 4270 warning (OPT_Wattributes, "%qs attribute ignored", 4271 IDENTIFIER_POINTER (name)); 4272 *no_add_attrs = true; 4273 return NULL_TREE; 4274 } 4275 4276 /* Can combine fastcall with stdcall (redundant) and sseregparm. */ 4277 if (is_attribute_p ("fastcall", name)) 4278 { 4279 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node))) 4280 { 4281 error ("fastcall and cdecl attributes are not compatible"); 4282 } 4283 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node))) 4284 { 4285 error ("fastcall and stdcall attributes are not compatible"); 4286 } 4287 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node))) 4288 { 4289 error ("fastcall and regparm attributes are not compatible"); 4290 } 4291 } 4292 4293 /* Can combine stdcall with fastcall (redundant), regparm and 4294 sseregparm. */ 4295 else if (is_attribute_p ("stdcall", name)) 4296 { 4297 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node))) 4298 { 4299 error ("stdcall and cdecl attributes are not compatible"); 4300 } 4301 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node))) 4302 { 4303 error ("stdcall and fastcall attributes are not compatible"); 4304 } 4305 } 4306 4307 /* Can combine cdecl with regparm and sseregparm. */ 4308 else if (is_attribute_p ("cdecl", name)) 4309 { 4310 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node))) 4311 { 4312 error ("stdcall and cdecl attributes are not compatible"); 4313 } 4314 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node))) 4315 { 4316 error ("fastcall and cdecl attributes are not compatible"); 4317 } 4318 } 4319 4320 /* Can combine sseregparm with all attributes. */ 4321 4322 return NULL_TREE; 4323 } 4324 4325 /* Return 0 if the attributes for two types are incompatible, 1 if they 4326 are compatible, and 2 if they are nearly compatible (which causes a 4327 warning to be generated). */ 4328 4329 static int 4330 ix86_comp_type_attributes (const_tree type1, const_tree type2) 4331 { 4332 /* Check for mismatch of non-default calling convention. */ 4333 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall"; 4334 4335 if (TREE_CODE (type1) != FUNCTION_TYPE 4336 && TREE_CODE (type1) != METHOD_TYPE) 4337 return 1; 4338 4339 /* Check for mismatched fastcall/regparm types. */ 4340 if ((!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1)) 4341 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2))) 4342 || (ix86_function_regparm (type1, NULL) 4343 != ix86_function_regparm (type2, NULL))) 4344 return 0; 4345 4346 /* Check for mismatched sseregparm types. */ 4347 if (!lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type1)) 4348 != !lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type2))) 4349 return 0; 4350 4351 /* Check for mismatched return types (cdecl vs stdcall). */ 4352 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1)) 4353 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2))) 4354 return 0; 4355 4356 return 1; 4357 } 4358 4359 /* Return the regparm value for a function with the indicated TYPE and DECL. 4360 DECL may be NULL when calling function indirectly 4361 or considering a libcall. */ 4362 4363 static int 4364 ix86_function_regparm (const_tree type, const_tree decl) 4365 { 4366 tree attr; 4367 int regparm; 4368 4369 static bool error_issued; 4370 4371 if (TARGET_64BIT) 4372 return (ix86_function_type_abi (type) == SYSV_ABI 4373 ? X86_64_REGPARM_MAX : X64_REGPARM_MAX); 4374 4375 regparm = ix86_regparm; 4376 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type)); 4377 if (attr) 4378 { 4379 regparm 4380 = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr))); 4381 4382 if (decl && TREE_CODE (decl) == FUNCTION_DECL) 4383 { 4384 /* We can't use regparm(3) for nested functions because 4385 these pass static chain pointer in %ecx register. */ 4386 if (!error_issued && regparm == 3 4387 && decl_function_context (decl) 4388 && !DECL_NO_STATIC_CHAIN (decl)) 4389 { 4390 error ("nested functions are limited to 2 register parameters"); 4391 error_issued = true; 4392 return 0; 4393 } 4394 } 4395 4396 return regparm; 4397 } 4398 4399 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type))) 4400 return 2; 4401 4402 /* Use register calling convention for local functions when possible. */ 4403 if (decl 4404 && TREE_CODE (decl) == FUNCTION_DECL 4405 && optimize 4406 && !profile_flag) 4407 { 4408 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */ 4409 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl)); 4410 if (i && i->local) 4411 { 4412 int local_regparm, globals = 0, regno; 4413 struct function *f; 4414 4415 /* Make sure no regparm register is taken by a 4416 fixed register variable. */ 4417 for (local_regparm = 0; local_regparm < REGPARM_MAX; local_regparm++) 4418 if (fixed_regs[local_regparm]) 4419 break; 4420 4421 /* We can't use regparm(3) for nested functions as these use 4422 static chain pointer in third argument. */ 4423 if (local_regparm == 3 4424 && decl_function_context (decl) 4425 && !DECL_NO_STATIC_CHAIN (decl)) 4426 local_regparm = 2; 4427 4428 /* If the function realigns its stackpointer, the prologue will 4429 clobber %ecx. If we've already generated code for the callee, 4430 the callee DECL_STRUCT_FUNCTION is gone, so we fall back to 4431 scanning the attributes for the self-realigning property. */ 4432 f = DECL_STRUCT_FUNCTION (decl); 4433 /* Since current internal arg pointer won't conflict with 4434 parameter passing regs, so no need to change stack 4435 realignment and adjust regparm number. 4436 4437 Each fixed register usage increases register pressure, 4438 so less registers should be used for argument passing. 4439 This functionality can be overriden by an explicit 4440 regparm value. */ 4441 for (regno = 0; regno <= DI_REG; regno++) 4442 if (fixed_regs[regno]) 4443 globals++; 4444 4445 local_regparm 4446 = globals < local_regparm ? local_regparm - globals : 0; 4447 4448 if (local_regparm > regparm) 4449 regparm = local_regparm; 4450 } 4451 } 4452 4453 return regparm; 4454 } 4455 4456 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and 4457 DFmode (2) arguments in SSE registers for a function with the 4458 indicated TYPE and DECL. DECL may be NULL when calling function 4459 indirectly or considering a libcall. Otherwise return 0. */ 4460 4461 static int 4462 ix86_function_sseregparm (const_tree type, const_tree decl, bool warn) 4463 { 4464 gcc_assert (!TARGET_64BIT); 4465 4466 /* Use SSE registers to pass SFmode and DFmode arguments if requested 4467 by the sseregparm attribute. */ 4468 if (TARGET_SSEREGPARM 4469 || (type && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type)))) 4470 { 4471 if (!TARGET_SSE) 4472 { 4473 if (warn) 4474 { 4475 if (decl) 4476 error ("Calling %qD with attribute sseregparm without " 4477 "SSE/SSE2 enabled", decl); 4478 else 4479 error ("Calling %qT with attribute sseregparm without " 4480 "SSE/SSE2 enabled", type); 4481 } 4482 return 0; 4483 } 4484 4485 return 2; 4486 } 4487 4488 /* For local functions, pass up to SSE_REGPARM_MAX SFmode 4489 (and DFmode for SSE2) arguments in SSE registers. */ 4490 if (decl && TARGET_SSE_MATH && optimize && !profile_flag) 4491 { 4492 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */ 4493 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl)); 4494 if (i && i->local) 4495 return TARGET_SSE2 ? 2 : 1; 4496 } 4497 4498 return 0; 4499 } 4500 4501 /* Return true if EAX is live at the start of the function. Used by 4502 ix86_expand_prologue to determine if we need special help before 4503 calling allocate_stack_worker. */ 4504 4505 static bool 4506 ix86_eax_live_at_start_p (void) 4507 { 4508 /* Cheat. Don't bother working forward from ix86_function_regparm 4509 to the function type to whether an actual argument is located in 4510 eax. Instead just look at cfg info, which is still close enough 4511 to correct at this point. This gives false positives for broken 4512 functions that might use uninitialized data that happens to be 4513 allocated in eax, but who cares? */ 4514 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR), 0); 4515 } 4516 4517 /* Value is the number of bytes of arguments automatically 4518 popped when returning from a subroutine call. 4519 FUNDECL is the declaration node of the function (as a tree), 4520 FUNTYPE is the data type of the function (as a tree), 4521 or for a library call it is an identifier node for the subroutine name. 4522 SIZE is the number of bytes of arguments passed on the stack. 4523 4524 On the 80386, the RTD insn may be used to pop them if the number 4525 of args is fixed, but if the number is variable then the caller 4526 must pop them all. RTD can't be used for library calls now 4527 because the library is compiled with the Unix compiler. 4528 Use of RTD is a selectable option, since it is incompatible with 4529 standard Unix calling sequences. If the option is not selected, 4530 the caller must always pop the args. 4531 4532 The attribute stdcall is equivalent to RTD on a per module basis. */ 4533 4534 int 4535 ix86_return_pops_args (tree fundecl, tree funtype, int size) 4536 { 4537 int rtd; 4538 4539 /* None of the 64-bit ABIs pop arguments. */ 4540 if (TARGET_64BIT) 4541 return 0; 4542 4543 rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE); 4544 4545 /* Cdecl functions override -mrtd, and never pop the stack. */ 4546 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype))) 4547 { 4548 /* Stdcall and fastcall functions will pop the stack if not 4549 variable args. */ 4550 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype)) 4551 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype))) 4552 rtd = 1; 4553 4554 if (rtd && ! stdarg_p (funtype)) 4555 return size; 4556 } 4557 4558 /* Lose any fake structure return argument if it is passed on the stack. */ 4559 if (aggregate_value_p (TREE_TYPE (funtype), fundecl) 4560 && !KEEP_AGGREGATE_RETURN_POINTER) 4561 { 4562 int nregs = ix86_function_regparm (funtype, fundecl); 4563 if (nregs == 0) 4564 return GET_MODE_SIZE (Pmode); 4565 } 4566 4567 return 0; 4568 } 4569 4570 /* Argument support functions. */ 4571 4572 /* Return true when register may be used to pass function parameters. */ 4573 bool 4574 ix86_function_arg_regno_p (int regno) 4575 { 4576 int i; 4577 const int *parm_regs; 4578 4579 if (!TARGET_64BIT) 4580 { 4581 if (TARGET_MACHO) 4582 return (regno < REGPARM_MAX 4583 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno])); 4584 else 4585 return (regno < REGPARM_MAX 4586 || (TARGET_MMX && MMX_REGNO_P (regno) 4587 && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX)) 4588 || (TARGET_SSE && SSE_REGNO_P (regno) 4589 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))); 4590 } 4591 4592 if (TARGET_MACHO) 4593 { 4594 if (SSE_REGNO_P (regno) && TARGET_SSE) 4595 return true; 4596 } 4597 else 4598 { 4599 if (TARGET_SSE && SSE_REGNO_P (regno) 4600 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)) 4601 return true; 4602 } 4603 4604 /* TODO: The function should depend on current function ABI but 4605 builtins.c would need updating then. Therefore we use the 4606 default ABI. */ 4607 4608 /* RAX is used as hidden argument to va_arg functions. */ 4609 if (DEFAULT_ABI == SYSV_ABI && regno == AX_REG) 4610 return true; 4611 4612 if (DEFAULT_ABI == MS_ABI) 4613 parm_regs = x86_64_ms_abi_int_parameter_registers; 4614 else 4615 parm_regs = x86_64_int_parameter_registers; 4616 for (i = 0; i < (DEFAULT_ABI == MS_ABI ? X64_REGPARM_MAX 4617 : X86_64_REGPARM_MAX); i++) 4618 if (regno == parm_regs[i]) 4619 return true; 4620 return false; 4621 } 4622 4623 /* Return if we do not know how to pass TYPE solely in registers. */ 4624 4625 static bool 4626 ix86_must_pass_in_stack (enum machine_mode mode, const_tree type) 4627 { 4628 if (must_pass_in_stack_var_size_or_pad (mode, type)) 4629 return true; 4630 4631 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out! 4632 The layout_type routine is crafty and tries to trick us into passing 4633 currently unsupported vector types on the stack by using TImode. */ 4634 return (!TARGET_64BIT && mode == TImode 4635 && type && TREE_CODE (type) != VECTOR_TYPE); 4636 } 4637 4638 /* It returns the size, in bytes, of the area reserved for arguments passed 4639 in registers for the function represented by fndecl dependent to the used 4640 abi format. */ 4641 int 4642 ix86_reg_parm_stack_space (const_tree fndecl) 4643 { 4644 int call_abi = SYSV_ABI; 4645 if (fndecl != NULL_TREE && TREE_CODE (fndecl) == FUNCTION_DECL) 4646 call_abi = ix86_function_abi (fndecl); 4647 else 4648 call_abi = ix86_function_type_abi (fndecl); 4649 if (call_abi == MS_ABI) 4650 return 32; 4651 return 0; 4652 } 4653 4654 /* Returns value SYSV_ABI, MS_ABI dependent on fntype, specifying the 4655 call abi used. */ 4656 int 4657 ix86_function_type_abi (const_tree fntype) 4658 { 4659 if (TARGET_64BIT && fntype != NULL) 4660 { 4661 int abi; 4662 if (DEFAULT_ABI == SYSV_ABI) 4663 abi = lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (fntype)) ? MS_ABI : SYSV_ABI; 4664 else 4665 abi = lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (fntype)) ? SYSV_ABI : MS_ABI; 4666 4667 return abi; 4668 } 4669 return DEFAULT_ABI; 4670 } 4671 4672 int 4673 ix86_function_abi (const_tree fndecl) 4674 { 4675 if (! fndecl) 4676 return DEFAULT_ABI; 4677 return ix86_function_type_abi (TREE_TYPE (fndecl)); 4678 } 4679 4680 /* Returns value SYSV_ABI, MS_ABI dependent on cfun, specifying the 4681 call abi used. */ 4682 int 4683 ix86_cfun_abi (void) 4684 { 4685 if (! cfun || ! TARGET_64BIT) 4686 return DEFAULT_ABI; 4687 return cfun->machine->call_abi; 4688 } 4689 4690 /* regclass.c */ 4691 extern void init_regs (void); 4692 4693 /* Implementation of call abi switching target hook. Specific to FNDECL 4694 the specific call register sets are set. See also CONDITIONAL_REGISTER_USAGE 4695 for more details. */ 4696 void 4697 ix86_call_abi_override (const_tree fndecl) 4698 { 4699 if (fndecl == NULL_TREE) 4700 cfun->machine->call_abi = DEFAULT_ABI; 4701 else 4702 cfun->machine->call_abi = ix86_function_type_abi (TREE_TYPE (fndecl)); 4703 } 4704 4705 /* MS and SYSV ABI have different set of call used registers. Avoid expensive 4706 re-initialization of init_regs each time we switch function context since 4707 this is needed only during RTL expansion. */ 4708 static void 4709 ix86_maybe_switch_abi (void) 4710 { 4711 if (TARGET_64BIT && 4712 call_used_regs[SI_REG] == (cfun->machine->call_abi == MS_ABI)) 4713 reinit_regs (); 4714 } 4715 4716 /* Initialize a variable CUM of type CUMULATIVE_ARGS 4717 for a call to a function whose data type is FNTYPE. 4718 For a library call, FNTYPE is 0. */ 4719 4720 void 4721 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */ 4722 tree fntype, /* tree ptr for function decl */ 4723 rtx libname, /* SYMBOL_REF of library name or 0 */ 4724 tree fndecl) 4725 { 4726 struct cgraph_local_info *i = fndecl ? cgraph_local_info (fndecl) : NULL; 4727 memset (cum, 0, sizeof (*cum)); 4728 4729 if (fndecl) 4730 cum->call_abi = ix86_function_abi (fndecl); 4731 else 4732 cum->call_abi = ix86_function_type_abi (fntype); 4733 /* Set up the number of registers to use for passing arguments. */ 4734 4735 if (cum->call_abi == MS_ABI && !ACCUMULATE_OUTGOING_ARGS) 4736 sorry ("ms_abi attribute requires -maccumulate-outgoing-args " 4737 "or subtarget optimization implying it"); 4738 cum->nregs = ix86_regparm; 4739 if (TARGET_64BIT) 4740 { 4741 if (cum->call_abi != DEFAULT_ABI) 4742 cum->nregs = DEFAULT_ABI != SYSV_ABI ? X86_64_REGPARM_MAX 4743 : X64_REGPARM_MAX; 4744 } 4745 if (TARGET_SSE) 4746 { 4747 cum->sse_nregs = SSE_REGPARM_MAX; 4748 if (TARGET_64BIT) 4749 { 4750 if (cum->call_abi != DEFAULT_ABI) 4751 cum->sse_nregs = DEFAULT_ABI != SYSV_ABI ? X86_64_SSE_REGPARM_MAX 4752 : X64_SSE_REGPARM_MAX; 4753 } 4754 } 4755 if (TARGET_MMX) 4756 cum->mmx_nregs = MMX_REGPARM_MAX; 4757 cum->warn_avx = true; 4758 cum->warn_sse = true; 4759 cum->warn_mmx = true; 4760 4761 /* Because type might mismatch in between caller and callee, we need to 4762 use actual type of function for local calls. 4763 FIXME: cgraph_analyze can be told to actually record if function uses 4764 va_start so for local functions maybe_vaarg can be made aggressive 4765 helping K&R code. 4766 FIXME: once typesytem is fixed, we won't need this code anymore. */ 4767 if (i && i->local) 4768 fntype = TREE_TYPE (fndecl); 4769 cum->maybe_vaarg = (fntype 4770 ? (!prototype_p (fntype) || stdarg_p (fntype)) 4771 : !libname); 4772 4773 if (!TARGET_64BIT) 4774 { 4775 /* If there are variable arguments, then we won't pass anything 4776 in registers in 32-bit mode. */ 4777 if (stdarg_p (fntype)) 4778 { 4779 cum->nregs = 0; 4780 cum->sse_nregs = 0; 4781 cum->mmx_nregs = 0; 4782 cum->warn_avx = 0; 4783 cum->warn_sse = 0; 4784 cum->warn_mmx = 0; 4785 return; 4786 } 4787 4788 /* Use ecx and edx registers if function has fastcall attribute, 4789 else look for regparm information. */ 4790 if (fntype) 4791 { 4792 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype))) 4793 { 4794 cum->nregs = 2; 4795 cum->fastcall = 1; 4796 } 4797 else 4798 cum->nregs = ix86_function_regparm (fntype, fndecl); 4799 } 4800 4801 /* Set up the number of SSE registers used for passing SFmode 4802 and DFmode arguments. Warn for mismatching ABI. */ 4803 cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl, true); 4804 } 4805 } 4806 4807 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE. 4808 But in the case of vector types, it is some vector mode. 4809 4810 When we have only some of our vector isa extensions enabled, then there 4811 are some modes for which vector_mode_supported_p is false. For these 4812 modes, the generic vector support in gcc will choose some non-vector mode 4813 in order to implement the type. By computing the natural mode, we'll 4814 select the proper ABI location for the operand and not depend on whatever 4815 the middle-end decides to do with these vector types. 4816 4817 The midde-end can't deal with the vector types > 16 bytes. In this 4818 case, we return the original mode and warn ABI change if CUM isn't 4819 NULL. */ 4820 4821 static enum machine_mode 4822 type_natural_mode (const_tree type, CUMULATIVE_ARGS *cum) 4823 { 4824 enum machine_mode mode = TYPE_MODE (type); 4825 4826 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode)) 4827 { 4828 HOST_WIDE_INT size = int_size_in_bytes (type); 4829 if ((size == 8 || size == 16 || size == 32) 4830 /* ??? Generic code allows us to create width 1 vectors. Ignore. */ 4831 && TYPE_VECTOR_SUBPARTS (type) > 1) 4832 { 4833 enum machine_mode innermode = TYPE_MODE (TREE_TYPE (type)); 4834 4835 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE) 4836 mode = MIN_MODE_VECTOR_FLOAT; 4837 else 4838 mode = MIN_MODE_VECTOR_INT; 4839 4840 /* Get the mode which has this inner mode and number of units. */ 4841 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode)) 4842 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type) 4843 && GET_MODE_INNER (mode) == innermode) 4844 { 4845 if (size == 32 && !TARGET_AVX) 4846 { 4847 static bool warnedavx; 4848 4849 if (cum 4850 && !warnedavx 4851 && cum->warn_avx) 4852 { 4853 warnedavx = true; 4854 warning (0, "AVX vector argument without AVX " 4855 "enabled changes the ABI"); 4856 } 4857 return TYPE_MODE (type); 4858 } 4859 else 4860 return mode; 4861 } 4862 4863 gcc_unreachable (); 4864 } 4865 } 4866 4867 return mode; 4868 } 4869 4870 /* We want to pass a value in REGNO whose "natural" mode is MODE. However, 4871 this may not agree with the mode that the type system has chosen for the 4872 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can 4873 go ahead and use it. Otherwise we have to build a PARALLEL instead. */ 4874 4875 static rtx 4876 gen_reg_or_parallel (enum machine_mode mode, enum machine_mode orig_mode, 4877 unsigned int regno) 4878 { 4879 rtx tmp; 4880 4881 if (orig_mode != BLKmode) 4882 tmp = gen_rtx_REG (orig_mode, regno); 4883 else 4884 { 4885 tmp = gen_rtx_REG (mode, regno); 4886 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx); 4887 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp)); 4888 } 4889 4890 return tmp; 4891 } 4892 4893 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal 4894 of this code is to classify each 8bytes of incoming argument by the register 4895 class and assign registers accordingly. */ 4896 4897 /* Return the union class of CLASS1 and CLASS2. 4898 See the x86-64 PS ABI for details. */ 4899 4900 static enum x86_64_reg_class 4901 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2) 4902 { 4903 /* Rule #1: If both classes are equal, this is the resulting class. */ 4904 if (class1 == class2) 4905 return class1; 4906 4907 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is 4908 the other class. */ 4909 if (class1 == X86_64_NO_CLASS) 4910 return class2; 4911 if (class2 == X86_64_NO_CLASS) 4912 return class1; 4913 4914 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */ 4915 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS) 4916 return X86_64_MEMORY_CLASS; 4917 4918 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */ 4919 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS) 4920 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS)) 4921 return X86_64_INTEGERSI_CLASS; 4922 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS 4923 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS) 4924 return X86_64_INTEGER_CLASS; 4925 4926 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class, 4927 MEMORY is used. */ 4928 if (class1 == X86_64_X87_CLASS 4929 || class1 == X86_64_X87UP_CLASS 4930 || class1 == X86_64_COMPLEX_X87_CLASS 4931 || class2 == X86_64_X87_CLASS 4932 || class2 == X86_64_X87UP_CLASS 4933 || class2 == X86_64_COMPLEX_X87_CLASS) 4934 return X86_64_MEMORY_CLASS; 4935 4936 /* Rule #6: Otherwise class SSE is used. */ 4937 return X86_64_SSE_CLASS; 4938 } 4939 4940 /* Classify the argument of type TYPE and mode MODE. 4941 CLASSES will be filled by the register class used to pass each word 4942 of the operand. The number of words is returned. In case the parameter 4943 should be passed in memory, 0 is returned. As a special case for zero 4944 sized containers, classes[0] will be NO_CLASS and 1 is returned. 4945 4946 BIT_OFFSET is used internally for handling records and specifies offset 4947 of the offset in bits modulo 256 to avoid overflow cases. 4948 4949 See the x86-64 PS ABI for details. 4950 */ 4951 4952 static int 4953 classify_argument (enum machine_mode mode, const_tree type, 4954 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset) 4955 { 4956 HOST_WIDE_INT bytes = 4957 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode); 4958 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD; 4959 4960 /* Variable sized entities are always passed/returned in memory. */ 4961 if (bytes < 0) 4962 return 0; 4963 4964 if (mode != VOIDmode 4965 && targetm.calls.must_pass_in_stack (mode, type)) 4966 return 0; 4967 4968 if (type && AGGREGATE_TYPE_P (type)) 4969 { 4970 int i; 4971 tree field; 4972 enum x86_64_reg_class subclasses[MAX_CLASSES]; 4973 4974 /* On x86-64 we pass structures larger than 32 bytes on the stack. */ 4975 if (bytes > 32) 4976 return 0; 4977 4978 for (i = 0; i < words; i++) 4979 classes[i] = X86_64_NO_CLASS; 4980 4981 /* Zero sized arrays or structures are NO_CLASS. We return 0 to 4982 signalize memory class, so handle it as special case. */ 4983 if (!words) 4984 { 4985 classes[0] = X86_64_NO_CLASS; 4986 return 1; 4987 } 4988 4989 /* Classify each field of record and merge classes. */ 4990 switch (TREE_CODE (type)) 4991 { 4992 case RECORD_TYPE: 4993 /* And now merge the fields of structure. */ 4994 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field)) 4995 { 4996 if (TREE_CODE (field) == FIELD_DECL) 4997 { 4998 int num; 4999 5000 if (TREE_TYPE (field) == error_mark_node) 5001 continue; 5002 5003 /* Bitfields are always classified as integer. Handle them 5004 early, since later code would consider them to be 5005 misaligned integers. */ 5006 if (DECL_BIT_FIELD (field)) 5007 { 5008 for (i = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8; 5009 i < ((int_bit_position (field) + (bit_offset % 64)) 5010 + tree_low_cst (DECL_SIZE (field), 0) 5011 + 63) / 8 / 8; i++) 5012 classes[i] = 5013 merge_classes (X86_64_INTEGER_CLASS, 5014 classes[i]); 5015 } 5016 else 5017 { 5018 type = TREE_TYPE (field); 5019 5020 /* Flexible array member is ignored. */ 5021 if (TYPE_MODE (type) == BLKmode 5022 && TREE_CODE (type) == ARRAY_TYPE 5023 && TYPE_SIZE (type) == NULL_TREE 5024 && TYPE_DOMAIN (type) != NULL_TREE 5025 && (TYPE_MAX_VALUE (TYPE_DOMAIN (type)) 5026 == NULL_TREE)) 5027 { 5028 static bool warned; 5029 5030 if (!warned && warn_psabi) 5031 { 5032 warned = true; 5033 inform (input_location, 5034 "The ABI of passing struct with" 5035 " a flexible array member has" 5036 " changed in GCC 4.4"); 5037 } 5038 continue; 5039 } 5040 num = classify_argument (TYPE_MODE (type), type, 5041 subclasses, 5042 (int_bit_position (field) 5043 + bit_offset) % 256); 5044 if (!num) 5045 return 0; 5046 for (i = 0; i < num; i++) 5047 { 5048 int pos = 5049 (int_bit_position (field) + (bit_offset % 64)) / 8 / 8; 5050 classes[i + pos] = 5051 merge_classes (subclasses[i], classes[i + pos]); 5052 } 5053 } 5054 } 5055 } 5056 break; 5057 5058 case ARRAY_TYPE: 5059 /* Arrays are handled as small records. */ 5060 { 5061 int num; 5062 num = classify_argument (TYPE_MODE (TREE_TYPE (type)), 5063 TREE_TYPE (type), subclasses, bit_offset); 5064 if (!num) 5065 return 0; 5066 5067 /* The partial classes are now full classes. */ 5068 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4) 5069 subclasses[0] = X86_64_SSE_CLASS; 5070 if (subclasses[0] == X86_64_INTEGERSI_CLASS 5071 && !((bit_offset % 64) == 0 && bytes == 4)) 5072 subclasses[0] = X86_64_INTEGER_CLASS; 5073 5074 for (i = 0; i < words; i++) 5075 classes[i] = subclasses[i % num]; 5076 5077 break; 5078 } 5079 case UNION_TYPE: 5080 case QUAL_UNION_TYPE: 5081 /* Unions are similar to RECORD_TYPE but offset is always 0. 5082 */ 5083 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field)) 5084 { 5085 if (TREE_CODE (field) == FIELD_DECL) 5086 { 5087 int num; 5088 5089 if (TREE_TYPE (field) == error_mark_node) 5090 continue; 5091 5092 num = classify_argument (TYPE_MODE (TREE_TYPE (field)), 5093 TREE_TYPE (field), subclasses, 5094 bit_offset); 5095 if (!num) 5096 return 0; 5097 for (i = 0; i < num; i++) 5098 classes[i] = merge_classes (subclasses[i], classes[i]); 5099 } 5100 } 5101 break; 5102 5103 default: 5104 gcc_unreachable (); 5105 } 5106 5107 if (words > 2) 5108 { 5109 /* When size > 16 bytes, if the first one isn't 5110 X86_64_SSE_CLASS or any other ones aren't 5111 X86_64_SSEUP_CLASS, everything should be passed in 5112 memory. */ 5113 if (classes[0] != X86_64_SSE_CLASS) 5114 return 0; 5115 5116 for (i = 1; i < words; i++) 5117 if (classes[i] != X86_64_SSEUP_CLASS) 5118 return 0; 5119 } 5120 5121 /* Final merger cleanup. */ 5122 for (i = 0; i < words; i++) 5123 { 5124 /* If one class is MEMORY, everything should be passed in 5125 memory. */ 5126 if (classes[i] == X86_64_MEMORY_CLASS) 5127 return 0; 5128 5129 /* The X86_64_SSEUP_CLASS should be always preceded by 5130 X86_64_SSE_CLASS or X86_64_SSEUP_CLASS. */ 5131 if (classes[i] == X86_64_SSEUP_CLASS 5132 && classes[i - 1] != X86_64_SSE_CLASS 5133 && classes[i - 1] != X86_64_SSEUP_CLASS) 5134 { 5135 /* The first one should never be X86_64_SSEUP_CLASS. */ 5136 gcc_assert (i != 0); 5137 classes[i] = X86_64_SSE_CLASS; 5138 } 5139 5140 /* If X86_64_X87UP_CLASS isn't preceded by X86_64_X87_CLASS, 5141 everything should be passed in memory. */ 5142 if (classes[i] == X86_64_X87UP_CLASS 5143 && (classes[i - 1] != X86_64_X87_CLASS)) 5144 { 5145 static bool warned; 5146 5147 /* The first one should never be X86_64_X87UP_CLASS. */ 5148 gcc_assert (i != 0); 5149 if (!warned && warn_psabi) 5150 { 5151 warned = true; 5152 inform (input_location, 5153 "The ABI of passing union with long double" 5154 " has changed in GCC 4.4"); 5155 } 5156 return 0; 5157 } 5158 } 5159 return words; 5160 } 5161 5162 /* Compute alignment needed. We align all types to natural boundaries with 5163 exception of XFmode that is aligned to 64bits. */ 5164 if (mode != VOIDmode && mode != BLKmode) 5165 { 5166 int mode_alignment = GET_MODE_BITSIZE (mode); 5167 5168 if (mode == XFmode) 5169 mode_alignment = 128; 5170 else if (mode == XCmode) 5171 mode_alignment = 256; 5172 if (COMPLEX_MODE_P (mode)) 5173 mode_alignment /= 2; 5174 /* Misaligned fields are always returned in memory. */ 5175 if (bit_offset % mode_alignment) 5176 return 0; 5177 } 5178 5179 /* for V1xx modes, just use the base mode */ 5180 if (VECTOR_MODE_P (mode) && mode != V1DImode 5181 && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes) 5182 mode = GET_MODE_INNER (mode); 5183 5184 /* Classification of atomic types. */ 5185 switch (mode) 5186 { 5187 case SDmode: 5188 case DDmode: 5189 classes[0] = X86_64_SSE_CLASS; 5190 return 1; 5191 case TDmode: 5192 classes[0] = X86_64_SSE_CLASS; 5193 classes[1] = X86_64_SSEUP_CLASS; 5194 return 2; 5195 case DImode: 5196 case SImode: 5197 case HImode: 5198 case QImode: 5199 case CSImode: 5200 case CHImode: 5201 case CQImode: 5202 { 5203 int size = (bit_offset % 64)+ (int) GET_MODE_BITSIZE (mode); 5204 5205 if (size <= 32) 5206 { 5207 classes[0] = X86_64_INTEGERSI_CLASS; 5208 return 1; 5209 } 5210 else if (size <= 64) 5211 { 5212 classes[0] = X86_64_INTEGER_CLASS; 5213 return 1; 5214 } 5215 else if (size <= 64+32) 5216 { 5217 classes[0] = X86_64_INTEGER_CLASS; 5218 classes[1] = X86_64_INTEGERSI_CLASS; 5219 return 2; 5220 } 5221 else if (size <= 64+64) 5222 { 5223 classes[0] = classes[1] = X86_64_INTEGER_CLASS; 5224 return 2; 5225 } 5226 else 5227 gcc_unreachable (); 5228 } 5229 case CDImode: 5230 case TImode: 5231 classes[0] = classes[1] = X86_64_INTEGER_CLASS; 5232 return 2; 5233 case COImode: 5234 case OImode: 5235 /* OImode shouldn't be used directly. */ 5236 gcc_unreachable (); 5237 case CTImode: 5238 return 0; 5239 case SFmode: 5240 if (!(bit_offset % 64)) 5241 classes[0] = X86_64_SSESF_CLASS; 5242 else 5243 classes[0] = X86_64_SSE_CLASS; 5244 return 1; 5245 case DFmode: 5246 classes[0] = X86_64_SSEDF_CLASS; 5247 return 1; 5248 case XFmode: 5249 classes[0] = X86_64_X87_CLASS; 5250 classes[1] = X86_64_X87UP_CLASS; 5251 return 2; 5252 case TFmode: 5253 classes[0] = X86_64_SSE_CLASS; 5254 classes[1] = X86_64_SSEUP_CLASS; 5255 return 2; 5256 case SCmode: 5257 classes[0] = X86_64_SSE_CLASS; 5258 if (!(bit_offset % 64)) 5259 return 1; 5260 else 5261 { 5262 static bool warned; 5263 5264 if (!warned && warn_psabi) 5265 { 5266 warned = true; 5267 inform (input_location, 5268 "The ABI of passing structure with complex float" 5269 " member has changed in GCC 4.4"); 5270 } 5271 classes[1] = X86_64_SSESF_CLASS; 5272 return 2; 5273 } 5274 case DCmode: 5275 classes[0] = X86_64_SSEDF_CLASS; 5276 classes[1] = X86_64_SSEDF_CLASS; 5277 return 2; 5278 case XCmode: 5279 classes[0] = X86_64_COMPLEX_X87_CLASS; 5280 return 1; 5281 case TCmode: 5282 /* This modes is larger than 16 bytes. */ 5283 return 0; 5284 case V8SFmode: 5285 case V8SImode: 5286 case V32QImode: 5287 case V16HImode: 5288 case V4DFmode: 5289 case V4DImode: 5290 classes[0] = X86_64_SSE_CLASS; 5291 classes[1] = X86_64_SSEUP_CLASS; 5292 classes[2] = X86_64_SSEUP_CLASS; 5293 classes[3] = X86_64_SSEUP_CLASS; 5294 return 4; 5295 case V4SFmode: 5296 case V4SImode: 5297 case V16QImode: 5298 case V8HImode: 5299 case V2DFmode: 5300 case V2DImode: 5301 classes[0] = X86_64_SSE_CLASS; 5302 classes[1] = X86_64_SSEUP_CLASS; 5303 return 2; 5304 case V1DImode: 5305 case V2SFmode: 5306 case V2SImode: 5307 case V4HImode: 5308 case V8QImode: 5309 classes[0] = X86_64_SSE_CLASS; 5310 return 1; 5311 case BLKmode: 5312 case VOIDmode: 5313 return 0; 5314 default: 5315 gcc_assert (VECTOR_MODE_P (mode)); 5316 5317 if (bytes > 16) 5318 return 0; 5319 5320 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT); 5321 5322 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32) 5323 classes[0] = X86_64_INTEGERSI_CLASS; 5324 else 5325 classes[0] = X86_64_INTEGER_CLASS; 5326 classes[1] = X86_64_INTEGER_CLASS; 5327 return 1 + (bytes > 8); 5328 } 5329 } 5330 5331 /* Examine the argument and return set number of register required in each 5332 class. Return 0 iff parameter should be passed in memory. */ 5333 static int 5334 examine_argument (enum machine_mode mode, const_tree type, int in_return, 5335 int *int_nregs, int *sse_nregs) 5336 { 5337 enum x86_64_reg_class regclass[MAX_CLASSES]; 5338 int n = classify_argument (mode, type, regclass, 0); 5339 5340 *int_nregs = 0; 5341 *sse_nregs = 0; 5342 if (!n) 5343 return 0; 5344 for (n--; n >= 0; n--) 5345 switch (regclass[n]) 5346 { 5347 case X86_64_INTEGER_CLASS: 5348 case X86_64_INTEGERSI_CLASS: 5349 (*int_nregs)++; 5350 break; 5351 case X86_64_SSE_CLASS: 5352 case X86_64_SSESF_CLASS: 5353 case X86_64_SSEDF_CLASS: 5354 (*sse_nregs)++; 5355 break; 5356 case X86_64_NO_CLASS: 5357 case X86_64_SSEUP_CLASS: 5358 break; 5359 case X86_64_X87_CLASS: 5360 case X86_64_X87UP_CLASS: 5361 if (!in_return) 5362 return 0; 5363 break; 5364 case X86_64_COMPLEX_X87_CLASS: 5365 return in_return ? 2 : 0; 5366 case X86_64_MEMORY_CLASS: 5367 gcc_unreachable (); 5368 } 5369 return 1; 5370 } 5371 5372 /* Construct container for the argument used by GCC interface. See 5373 FUNCTION_ARG for the detailed description. */ 5374 5375 static rtx 5376 construct_container (enum machine_mode mode, enum machine_mode orig_mode, 5377 const_tree type, int in_return, int nintregs, int nsseregs, 5378 const int *intreg, int sse_regno) 5379 { 5380 /* The following variables hold the static issued_error state. */ 5381 static bool issued_sse_arg_error; 5382 static bool issued_sse_ret_error; 5383 static bool issued_x87_ret_error; 5384 5385 enum machine_mode tmpmode; 5386 int bytes = 5387 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode); 5388 enum x86_64_reg_class regclass[MAX_CLASSES]; 5389 int n; 5390 int i; 5391 int nexps = 0; 5392 int needed_sseregs, needed_intregs; 5393 rtx exp[MAX_CLASSES]; 5394 rtx ret; 5395 5396 n = classify_argument (mode, type, regclass, 0); 5397 if (!n) 5398 return NULL; 5399 if (!examine_argument (mode, type, in_return, &needed_intregs, 5400 &needed_sseregs)) 5401 return NULL; 5402 if (needed_intregs > nintregs || needed_sseregs > nsseregs) 5403 return NULL; 5404 5405 /* We allowed the user to turn off SSE for kernel mode. Don't crash if 5406 some less clueful developer tries to use floating-point anyway. */ 5407 if (needed_sseregs && !TARGET_SSE) 5408 { 5409 if (in_return) 5410 { 5411 if (!issued_sse_ret_error) 5412 { 5413 error ("SSE register return with SSE disabled"); 5414 issued_sse_ret_error = true; 5415 } 5416 } 5417 else if (!issued_sse_arg_error) 5418 { 5419 error ("SSE register argument with SSE disabled"); 5420 issued_sse_arg_error = true; 5421 } 5422 return NULL; 5423 } 5424 5425 /* Likewise, error if the ABI requires us to return values in the 5426 x87 registers and the user specified -mno-80387. */ 5427 if (!TARGET_80387 && in_return) 5428 for (i = 0; i < n; i++) 5429 if (regclass[i] == X86_64_X87_CLASS 5430 || regclass[i] == X86_64_X87UP_CLASS 5431 || regclass[i] == X86_64_COMPLEX_X87_CLASS) 5432 { 5433 if (!issued_x87_ret_error) 5434 { 5435 error ("x87 register return with x87 disabled"); 5436 issued_x87_ret_error = true; 5437 } 5438 return NULL; 5439 } 5440 5441 /* First construct simple cases. Avoid SCmode, since we want to use 5442 single register to pass this type. */ 5443 if (n == 1 && mode != SCmode) 5444 switch (regclass[0]) 5445 { 5446 case X86_64_INTEGER_CLASS: 5447 case X86_64_INTEGERSI_CLASS: 5448 return gen_rtx_REG (mode, intreg[0]); 5449 case X86_64_SSE_CLASS: 5450 case X86_64_SSESF_CLASS: 5451 case X86_64_SSEDF_CLASS: 5452 if (mode != BLKmode) 5453 return gen_reg_or_parallel (mode, orig_mode, 5454 SSE_REGNO (sse_regno)); 5455 break; 5456 case X86_64_X87_CLASS: 5457 case X86_64_COMPLEX_X87_CLASS: 5458 return gen_rtx_REG (mode, FIRST_STACK_REG); 5459 case X86_64_NO_CLASS: 5460 /* Zero sized array, struct or class. */ 5461 return NULL; 5462 default: 5463 gcc_unreachable (); 5464 } 5465 if (n == 2 && regclass[0] == X86_64_SSE_CLASS 5466 && regclass[1] == X86_64_SSEUP_CLASS && mode != BLKmode) 5467 return gen_rtx_REG (mode, SSE_REGNO (sse_regno)); 5468 if (n == 4 5469 && regclass[0] == X86_64_SSE_CLASS 5470 && regclass[1] == X86_64_SSEUP_CLASS 5471 && regclass[2] == X86_64_SSEUP_CLASS 5472 && regclass[3] == X86_64_SSEUP_CLASS 5473 && mode != BLKmode) 5474 return gen_rtx_REG (mode, SSE_REGNO (sse_regno)); 5475 5476 if (n == 2 5477 && regclass[0] == X86_64_X87_CLASS && regclass[1] == X86_64_X87UP_CLASS) 5478 return gen_rtx_REG (XFmode, FIRST_STACK_REG); 5479 if (n == 2 && regclass[0] == X86_64_INTEGER_CLASS 5480 && regclass[1] == X86_64_INTEGER_CLASS 5481 && (mode == CDImode || mode == TImode || mode == TFmode) 5482 && intreg[0] + 1 == intreg[1]) 5483 return gen_rtx_REG (mode, intreg[0]); 5484 5485 /* Otherwise figure out the entries of the PARALLEL. */ 5486 for (i = 0; i < n; i++) 5487 { 5488 int pos; 5489 5490 switch (regclass[i]) 5491 { 5492 case X86_64_NO_CLASS: 5493 break; 5494 case X86_64_INTEGER_CLASS: 5495 case X86_64_INTEGERSI_CLASS: 5496 /* Merge TImodes on aligned occasions here too. */ 5497 if (i * 8 + 8 > bytes) 5498 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0); 5499 else if (regclass[i] == X86_64_INTEGERSI_CLASS) 5500 tmpmode = SImode; 5501 else 5502 tmpmode = DImode; 5503 /* We've requested 24 bytes we don't have mode for. Use DImode. */ 5504 if (tmpmode == BLKmode) 5505 tmpmode = DImode; 5506 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode, 5507 gen_rtx_REG (tmpmode, *intreg), 5508 GEN_INT (i*8)); 5509 intreg++; 5510 break; 5511 case X86_64_SSESF_CLASS: 5512 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode, 5513 gen_rtx_REG (SFmode, 5514 SSE_REGNO (sse_regno)), 5515 GEN_INT (i*8)); 5516 sse_regno++; 5517 break; 5518 case X86_64_SSEDF_CLASS: 5519 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode, 5520 gen_rtx_REG (DFmode, 5521 SSE_REGNO (sse_regno)), 5522 GEN_INT (i*8)); 5523 sse_regno++; 5524 break; 5525 case X86_64_SSE_CLASS: 5526 pos = i; 5527 switch (n) 5528 { 5529 case 1: 5530 tmpmode = DImode; 5531 break; 5532 case 2: 5533 if (i == 0 && regclass[1] == X86_64_SSEUP_CLASS) 5534 { 5535 tmpmode = TImode; 5536 i++; 5537 } 5538 else 5539 tmpmode = DImode; 5540 break; 5541 case 4: 5542 gcc_assert (i == 0 5543 && regclass[1] == X86_64_SSEUP_CLASS 5544 && regclass[2] == X86_64_SSEUP_CLASS 5545 && regclass[3] == X86_64_SSEUP_CLASS); 5546 tmpmode = OImode; 5547 i += 3; 5548 break; 5549 default: 5550 gcc_unreachable (); 5551 } 5552 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode, 5553 gen_rtx_REG (tmpmode, 5554 SSE_REGNO (sse_regno)), 5555 GEN_INT (pos*8)); 5556 sse_regno++; 5557 break; 5558 default: 5559 gcc_unreachable (); 5560 } 5561 } 5562 5563 /* Empty aligned struct, union or class. */ 5564 if (nexps == 0) 5565 return NULL; 5566 5567 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps)); 5568 for (i = 0; i < nexps; i++) 5569 XVECEXP (ret, 0, i) = exp [i]; 5570 return ret; 5571 } 5572 5573 /* Update the data in CUM to advance over an argument of mode MODE 5574 and data type TYPE. (TYPE is null for libcalls where that information 5575 may not be available.) */ 5576 5577 static void 5578 function_arg_advance_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode, 5579 tree type, HOST_WIDE_INT bytes, HOST_WIDE_INT words) 5580 { 5581 switch (mode) 5582 { 5583 default: 5584 break; 5585 5586 case BLKmode: 5587 if (bytes < 0) 5588 break; 5589 /* FALLTHRU */ 5590 5591 case DImode: 5592 case SImode: 5593 case HImode: 5594 case QImode: 5595 cum->words += words; 5596 cum->nregs -= words; 5597 cum->regno += words; 5598 5599 if (cum->nregs <= 0) 5600 { 5601 cum->nregs = 0; 5602 cum->regno = 0; 5603 } 5604 break; 5605 5606 case OImode: 5607 /* OImode shouldn't be used directly. */ 5608 gcc_unreachable (); 5609 5610 case DFmode: 5611 if (cum->float_in_sse < 2) 5612 break; 5613 case SFmode: 5614 if (cum->float_in_sse < 1) 5615 break; 5616 /* FALLTHRU */ 5617 5618 case V8SFmode: 5619 case V8SImode: 5620 case V32QImode: 5621 case V16HImode: 5622 case V4DFmode: 5623 case V4DImode: 5624 case TImode: 5625 case V16QImode: 5626 case V8HImode: 5627 case V4SImode: 5628 case V2DImode: 5629 case V4SFmode: 5630 case V2DFmode: 5631 if (!type || !AGGREGATE_TYPE_P (type)) 5632 { 5633 cum->sse_words += words; 5634 cum->sse_nregs -= 1; 5635 cum->sse_regno += 1; 5636 if (cum->sse_nregs <= 0) 5637 { 5638 cum->sse_nregs = 0; 5639 cum->sse_regno = 0; 5640 } 5641 } 5642 break; 5643 5644 case V8QImode: 5645 case V4HImode: 5646 case V2SImode: 5647 case V2SFmode: 5648 case V1DImode: 5649 if (!type || !AGGREGATE_TYPE_P (type)) 5650 { 5651 cum->mmx_words += words; 5652 cum->mmx_nregs -= 1; 5653 cum->mmx_regno += 1; 5654 if (cum->mmx_nregs <= 0) 5655 { 5656 cum->mmx_nregs = 0; 5657 cum->mmx_regno = 0; 5658 } 5659 } 5660 break; 5661 } 5662 } 5663 5664 static void 5665 function_arg_advance_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode, 5666 tree type, HOST_WIDE_INT words, int named) 5667 { 5668 int int_nregs, sse_nregs; 5669 5670 /* Unnamed 256bit vector mode parameters are passed on stack. */ 5671 if (!named && VALID_AVX256_REG_MODE (mode)) 5672 return; 5673 5674 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs)) 5675 cum->words += words; 5676 else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs) 5677 { 5678 cum->nregs -= int_nregs; 5679 cum->sse_nregs -= sse_nregs; 5680 cum->regno += int_nregs; 5681 cum->sse_regno += sse_nregs; 5682 } 5683 else 5684 cum->words += words; 5685 } 5686 5687 static void 5688 function_arg_advance_ms_64 (CUMULATIVE_ARGS *cum, HOST_WIDE_INT bytes, 5689 HOST_WIDE_INT words) 5690 { 5691 /* Otherwise, this should be passed indirect. */ 5692 gcc_assert (bytes == 1 || bytes == 2 || bytes == 4 || bytes == 8); 5693 5694 cum->words += words; 5695 if (cum->nregs > 0) 5696 { 5697 cum->nregs -= 1; 5698 cum->regno += 1; 5699 } 5700 } 5701 5702 void 5703 function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode, 5704 tree type, int named) 5705 { 5706 HOST_WIDE_INT bytes, words; 5707 5708 if (mode == BLKmode) 5709 bytes = int_size_in_bytes (type); 5710 else 5711 bytes = GET_MODE_SIZE (mode); 5712 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD; 5713 5714 if (type) 5715 mode = type_natural_mode (type, NULL); 5716 5717 if (TARGET_64BIT && (cum ? cum->call_abi : DEFAULT_ABI) == MS_ABI) 5718 function_arg_advance_ms_64 (cum, bytes, words); 5719 else if (TARGET_64BIT) 5720 function_arg_advance_64 (cum, mode, type, words, named); 5721 else 5722 function_arg_advance_32 (cum, mode, type, bytes, words); 5723 } 5724 5725 /* Define where to put the arguments to a function. 5726 Value is zero to push the argument on the stack, 5727 or a hard register in which to store the argument. 5728 5729 MODE is the argument's machine mode. 5730 TYPE is the data type of the argument (as a tree). 5731 This is null for libcalls where that information may 5732 not be available. 5733 CUM is a variable of type CUMULATIVE_ARGS which gives info about 5734 the preceding args and about the function being called. 5735 NAMED is nonzero if this argument is a named parameter 5736 (otherwise it is an extra parameter matching an ellipsis). */ 5737 5738 static rtx 5739 function_arg_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode, 5740 enum machine_mode orig_mode, tree type, 5741 HOST_WIDE_INT bytes, HOST_WIDE_INT words) 5742 { 5743 static bool warnedsse, warnedmmx; 5744 5745 /* Avoid the AL settings for the Unix64 ABI. */ 5746 if (mode == VOIDmode) 5747 return constm1_rtx; 5748 5749 switch (mode) 5750 { 5751 default: 5752 break; 5753 5754 case BLKmode: 5755 if (bytes < 0) 5756 break; 5757 /* FALLTHRU */ 5758 case DImode: 5759 case SImode: 5760 case HImode: 5761 case QImode: 5762 if (words <= cum->nregs) 5763 { 5764 int regno = cum->regno; 5765 5766 /* Fastcall allocates the first two DWORD (SImode) or 5767 smaller arguments to ECX and EDX if it isn't an 5768 aggregate type . */ 5769 if (cum->fastcall) 5770 { 5771 if (mode == BLKmode 5772 || mode == DImode 5773 || (type && AGGREGATE_TYPE_P (type))) 5774 break; 5775 5776 /* ECX not EAX is the first allocated register. */ 5777 if (regno == AX_REG) 5778 regno = CX_REG; 5779 } 5780 return gen_rtx_REG (mode, regno); 5781 } 5782 break; 5783 5784 case DFmode: 5785 if (cum->float_in_sse < 2) 5786 break; 5787 case SFmode: 5788 if (cum->float_in_sse < 1) 5789 break; 5790 /* FALLTHRU */ 5791 case TImode: 5792 /* In 32bit, we pass TImode in xmm registers. */ 5793 case V16QImode: 5794 case V8HImode: 5795 case V4SImode: 5796 case V2DImode: 5797 case V4SFmode: 5798 case V2DFmode: 5799 if (!type || !AGGREGATE_TYPE_P (type)) 5800 { 5801 if (!TARGET_SSE && !warnedsse && cum->warn_sse) 5802 { 5803 warnedsse = true; 5804 warning (0, "SSE vector argument without SSE enabled " 5805 "changes the ABI"); 5806 } 5807 if (cum->sse_nregs) 5808 return gen_reg_or_parallel (mode, orig_mode, 5809 cum->sse_regno + FIRST_SSE_REG); 5810 } 5811 break; 5812 5813 case OImode: 5814 /* OImode shouldn't be used directly. */ 5815 gcc_unreachable (); 5816 5817 case V8SFmode: 5818 case V8SImode: 5819 case V32QImode: 5820 case V16HImode: 5821 case V4DFmode: 5822 case V4DImode: 5823 if (!type || !AGGREGATE_TYPE_P (type)) 5824 { 5825 if (cum->sse_nregs) 5826 return gen_reg_or_parallel (mode, orig_mode, 5827 cum->sse_regno + FIRST_SSE_REG); 5828 } 5829 break; 5830 5831 case V8QImode: 5832 case V4HImode: 5833 case V2SImode: 5834 case V2SFmode: 5835 case V1DImode: 5836 if (!type || !AGGREGATE_TYPE_P (type)) 5837 { 5838 if (!TARGET_MMX && !warnedmmx && cum->warn_mmx) 5839 { 5840 warnedmmx = true; 5841 warning (0, "MMX vector argument without MMX enabled " 5842 "changes the ABI"); 5843 } 5844 if (cum->mmx_nregs) 5845 return gen_reg_or_parallel (mode, orig_mode, 5846 cum->mmx_regno + FIRST_MMX_REG); 5847 } 5848 break; 5849 } 5850 5851 return NULL_RTX; 5852 } 5853 5854 static rtx 5855 function_arg_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode, 5856 enum machine_mode orig_mode, tree type, int named) 5857 { 5858 /* Handle a hidden AL argument containing number of registers 5859 for varargs x86-64 functions. */ 5860 if (mode == VOIDmode) 5861 return GEN_INT (cum->maybe_vaarg 5862 ? (cum->sse_nregs < 0 5863 ? (cum->call_abi == DEFAULT_ABI 5864 ? SSE_REGPARM_MAX 5865 : (DEFAULT_ABI != SYSV_ABI ? X86_64_SSE_REGPARM_MAX 5866 : X64_SSE_REGPARM_MAX)) 5867 : cum->sse_regno) 5868 : -1); 5869 5870 switch (mode) 5871 { 5872 default: 5873 break; 5874 5875 case V8SFmode: 5876 case V8SImode: 5877 case V32QImode: 5878 case V16HImode: 5879 case V4DFmode: 5880 case V4DImode: 5881 /* Unnamed 256bit vector mode parameters are passed on stack. */ 5882 if (!named) 5883 return NULL; 5884 break; 5885 } 5886 5887 return construct_container (mode, orig_mode, type, 0, cum->nregs, 5888 cum->sse_nregs, 5889 &x86_64_int_parameter_registers [cum->regno], 5890 cum->sse_regno); 5891 } 5892 5893 static rtx 5894 function_arg_ms_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode, 5895 enum machine_mode orig_mode, int named, 5896 HOST_WIDE_INT bytes) 5897 { 5898 unsigned int regno; 5899 5900 /* We need to add clobber for MS_ABI->SYSV ABI calls in expand_call. 5901 We use value of -2 to specify that current function call is MSABI. */ 5902 if (mode == VOIDmode) 5903 return GEN_INT (-2); 5904 5905 /* If we've run out of registers, it goes on the stack. */ 5906 if (cum->nregs == 0) 5907 return NULL_RTX; 5908 5909 regno = x86_64_ms_abi_int_parameter_registers[cum->regno]; 5910 5911 /* Only floating point modes are passed in anything but integer regs. */ 5912 if (TARGET_SSE && (mode == SFmode || mode == DFmode)) 5913 { 5914 if (named) 5915 regno = cum->regno + FIRST_SSE_REG; 5916 else 5917 { 5918 rtx t1, t2; 5919 5920 /* Unnamed floating parameters are passed in both the 5921 SSE and integer registers. */ 5922 t1 = gen_rtx_REG (mode, cum->regno + FIRST_SSE_REG); 5923 t2 = gen_rtx_REG (mode, regno); 5924 t1 = gen_rtx_EXPR_LIST (VOIDmode, t1, const0_rtx); 5925 t2 = gen_rtx_EXPR_LIST (VOIDmode, t2, const0_rtx); 5926 return gen_rtx_PARALLEL (mode, gen_rtvec (2, t1, t2)); 5927 } 5928 } 5929 /* Handle aggregated types passed in register. */ 5930 if (orig_mode == BLKmode) 5931 { 5932 if (bytes > 0 && bytes <= 8) 5933 mode = (bytes > 4 ? DImode : SImode); 5934 if (mode == BLKmode) 5935 mode = DImode; 5936 } 5937 5938 return gen_reg_or_parallel (mode, orig_mode, regno); 5939 } 5940 5941 rtx 5942 function_arg (CUMULATIVE_ARGS *cum, enum machine_mode omode, 5943 tree type, int named) 5944 { 5945 enum machine_mode mode = omode; 5946 HOST_WIDE_INT bytes, words; 5947 5948 if (mode == BLKmode) 5949 bytes = int_size_in_bytes (type); 5950 else 5951 bytes = GET_MODE_SIZE (mode); 5952 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD; 5953 5954 /* To simplify the code below, represent vector types with a vector mode 5955 even if MMX/SSE are not active. */ 5956 if (type && TREE_CODE (type) == VECTOR_TYPE) 5957 mode = type_natural_mode (type, cum); 5958 5959 if (TARGET_64BIT && (cum ? cum->call_abi : DEFAULT_ABI) == MS_ABI) 5960 return function_arg_ms_64 (cum, mode, omode, named, bytes); 5961 else if (TARGET_64BIT) 5962 return function_arg_64 (cum, mode, omode, type, named); 5963 else 5964 return function_arg_32 (cum, mode, omode, type, bytes, words); 5965 } 5966 5967 /* A C expression that indicates when an argument must be passed by 5968 reference. If nonzero for an argument, a copy of that argument is 5969 made in memory and a pointer to the argument is passed instead of 5970 the argument itself. The pointer is passed in whatever way is 5971 appropriate for passing a pointer to that type. */ 5972 5973 static bool 5974 ix86_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED, 5975 enum machine_mode mode ATTRIBUTE_UNUSED, 5976 const_tree type, bool named ATTRIBUTE_UNUSED) 5977 { 5978 /* See Windows x64 Software Convention. */ 5979 if (TARGET_64BIT && (cum ? cum->call_abi : DEFAULT_ABI) == MS_ABI) 5980 { 5981 int msize = (int) GET_MODE_SIZE (mode); 5982 if (type) 5983 { 5984 /* Arrays are passed by reference. */ 5985 if (TREE_CODE (type) == ARRAY_TYPE) 5986 return true; 5987 5988 if (AGGREGATE_TYPE_P (type)) 5989 { 5990 /* Structs/unions of sizes other than 8, 16, 32, or 64 bits 5991 are passed by reference. */ 5992 msize = int_size_in_bytes (type); 5993 } 5994 } 5995 5996 /* __m128 is passed by reference. */ 5997 switch (msize) { 5998 case 1: case 2: case 4: case 8: 5999 break; 6000 default: 6001 return true; 6002 } 6003 } 6004 else if (TARGET_64BIT && type && int_size_in_bytes (type) == -1) 6005 return 1; 6006 6007 return 0; 6008 } 6009 6010 /* Return true when TYPE should be 128bit aligned for 32bit argument passing 6011 ABI. */ 6012 static bool 6013 contains_aligned_value_p (tree type) 6014 { 6015 enum machine_mode mode = TYPE_MODE (type); 6016 if (((TARGET_SSE && SSE_REG_MODE_P (mode)) 6017 || mode == TDmode 6018 || mode == TFmode 6019 || mode == TCmode) 6020 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128)) 6021 return true; 6022 if (TYPE_ALIGN (type) < 128) 6023 return false; 6024 6025 if (AGGREGATE_TYPE_P (type)) 6026 { 6027 /* Walk the aggregates recursively. */ 6028 switch (TREE_CODE (type)) 6029 { 6030 case RECORD_TYPE: 6031 case UNION_TYPE: 6032 case QUAL_UNION_TYPE: 6033 { 6034 tree field; 6035 6036 /* Walk all the structure fields. */ 6037 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field)) 6038 { 6039 if (TREE_CODE (field) == FIELD_DECL 6040 && contains_aligned_value_p (TREE_TYPE (field))) 6041 return true; 6042 } 6043 break; 6044 } 6045 6046 case ARRAY_TYPE: 6047 /* Just for use if some languages passes arrays by value. */ 6048 if (contains_aligned_value_p (TREE_TYPE (type))) 6049 return true; 6050 break; 6051 6052 default: 6053 gcc_unreachable (); 6054 } 6055 } 6056 return false; 6057 } 6058 6059 /* Gives the alignment boundary, in bits, of an argument with the 6060 specified mode and type. */ 6061 6062 int 6063 ix86_function_arg_boundary (enum machine_mode mode, tree type) 6064 { 6065 int align; 6066 if (type) 6067 { 6068 /* Since canonical type is used for call, we convert it to 6069 canonical type if needed. */ 6070 if (!TYPE_STRUCTURAL_EQUALITY_P (type)) 6071 type = TYPE_CANONICAL (type); 6072 align = TYPE_ALIGN (type); 6073 } 6074 else 6075 align = GET_MODE_ALIGNMENT (mode); 6076 if (align < PARM_BOUNDARY) 6077 align = PARM_BOUNDARY; 6078 /* In 32bit, only _Decimal128 and __float128 are aligned to their 6079 natural boundaries. */ 6080 if (!TARGET_64BIT && mode != TDmode && mode != TFmode) 6081 { 6082 /* i386 ABI defines all arguments to be 4 byte aligned. We have to 6083 make an exception for SSE modes since these require 128bit 6084 alignment. 6085 6086 The handling here differs from field_alignment. ICC aligns MMX 6087 arguments to 4 byte boundaries, while structure fields are aligned 6088 to 8 byte boundaries. */ 6089 if (!type) 6090 { 6091 if (!(TARGET_SSE && SSE_REG_MODE_P (mode))) 6092 align = PARM_BOUNDARY; 6093 } 6094 else 6095 { 6096 if (!contains_aligned_value_p (type)) 6097 align = PARM_BOUNDARY; 6098 } 6099 } 6100 if (align > BIGGEST_ALIGNMENT) 6101 align = BIGGEST_ALIGNMENT; 6102 return align; 6103 } 6104 6105 /* Return true if N is a possible register number of function value. */ 6106 6107 bool 6108 ix86_function_value_regno_p (int regno) 6109 { 6110 switch (regno) 6111 { 6112 case 0: 6113 return true; 6114 6115 case FIRST_FLOAT_REG: 6116 /* TODO: The function should depend on current function ABI but 6117 builtins.c would need updating then. Therefore we use the 6118 default ABI. */ 6119 if (TARGET_64BIT && DEFAULT_ABI == MS_ABI) 6120 return false; 6121 return TARGET_FLOAT_RETURNS_IN_80387; 6122 6123 case FIRST_SSE_REG: 6124 return TARGET_SSE; 6125 6126 case FIRST_MMX_REG: 6127 if (TARGET_MACHO || TARGET_64BIT) 6128 return false; 6129 return TARGET_MMX; 6130 } 6131 6132 return false; 6133 } 6134 6135 /* Define how to find the value returned by a function. 6136 VALTYPE is the data type of the value (as a tree). 6137 If the precise function being called is known, FUNC is its FUNCTION_DECL; 6138 otherwise, FUNC is 0. */ 6139 6140 static rtx 6141 function_value_32 (enum machine_mode orig_mode, enum machine_mode mode, 6142 const_tree fntype, const_tree fn) 6143 { 6144 unsigned int regno; 6145 6146 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where 6147 we normally prevent this case when mmx is not available. However 6148 some ABIs may require the result to be returned like DImode. */ 6149 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8) 6150 regno = TARGET_MMX ? FIRST_MMX_REG : 0; 6151 6152 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where 6153 we prevent this case when sse is not available. However some ABIs 6154 may require the result to be returned like integer TImode. */ 6155 else if (mode == TImode 6156 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16)) 6157 regno = TARGET_SSE ? FIRST_SSE_REG : 0; 6158 6159 /* 32-byte vector modes in %ymm0. */ 6160 else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 32) 6161 regno = TARGET_AVX ? FIRST_SSE_REG : 0; 6162 6163 /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */ 6164 else if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387) 6165 regno = FIRST_FLOAT_REG; 6166 else 6167 /* Most things go in %eax. */ 6168 regno = AX_REG; 6169 6170 /* Override FP return register with %xmm0 for local functions when 6171 SSE math is enabled or for functions with sseregparm attribute. */ 6172 if ((fn || fntype) && (mode == SFmode || mode == DFmode)) 6173 { 6174 int sse_level = ix86_function_sseregparm (fntype, fn, false); 6175 if ((sse_level >= 1 && mode == SFmode) 6176 || (sse_level == 2 && mode == DFmode)) 6177 regno = FIRST_SSE_REG; 6178 } 6179 6180 /* OImode shouldn't be used directly. */ 6181 gcc_assert (mode != OImode); 6182 6183 return gen_rtx_REG (orig_mode, regno); 6184 } 6185 6186 static rtx 6187 function_value_64 (enum machine_mode orig_mode, enum machine_mode mode, 6188 const_tree valtype) 6189 { 6190 rtx ret; 6191 6192 /* Handle libcalls, which don't provide a type node. */ 6193 if (valtype == NULL) 6194 { 6195 switch (mode) 6196 { 6197 case SFmode: 6198 case SCmode: 6199 case DFmode: 6200 case DCmode: 6201 case TFmode: 6202 case SDmode: 6203 case DDmode: 6204 case TDmode: 6205 return gen_rtx_REG (mode, FIRST_SSE_REG); 6206 case XFmode: 6207 case XCmode: 6208 return gen_rtx_REG (mode, FIRST_FLOAT_REG); 6209 case TCmode: 6210 return NULL; 6211 default: 6212 return gen_rtx_REG (mode, AX_REG); 6213 } 6214 } 6215 6216 ret = construct_container (mode, orig_mode, valtype, 1, 6217 X86_64_REGPARM_MAX, X86_64_SSE_REGPARM_MAX, 6218 x86_64_int_return_registers, 0); 6219 6220 /* For zero sized structures, construct_container returns NULL, but we 6221 need to keep rest of compiler happy by returning meaningful value. */ 6222 if (!ret) 6223 ret = gen_rtx_REG (orig_mode, AX_REG); 6224 6225 return ret; 6226 } 6227 6228 static rtx 6229 function_value_ms_64 (enum machine_mode orig_mode, enum machine_mode mode) 6230 { 6231 unsigned int regno = AX_REG; 6232 6233 if (TARGET_SSE) 6234 { 6235 switch (GET_MODE_SIZE (mode)) 6236 { 6237 case 16: 6238 if((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode)) 6239 && !COMPLEX_MODE_P (mode)) 6240 regno = FIRST_SSE_REG; 6241 break; 6242 case 8: 6243 case 4: 6244 if (mode == SFmode || mode == DFmode) 6245 regno = FIRST_SSE_REG; 6246 break; 6247 default: 6248 break; 6249 } 6250 } 6251 return gen_rtx_REG (orig_mode, regno); 6252 } 6253 6254 static rtx 6255 ix86_function_value_1 (const_tree valtype, const_tree fntype_or_decl, 6256 enum machine_mode orig_mode, enum machine_mode mode) 6257 { 6258 const_tree fn, fntype; 6259 6260 fn = NULL_TREE; 6261 if (fntype_or_decl && DECL_P (fntype_or_decl)) 6262 fn = fntype_or_decl; 6263 fntype = fn ? TREE_TYPE (fn) : fntype_or_decl; 6264 6265 if (TARGET_64BIT && ix86_function_type_abi (fntype) == MS_ABI) 6266 return function_value_ms_64 (orig_mode, mode); 6267 else if (TARGET_64BIT) 6268 return function_value_64 (orig_mode, mode, valtype); 6269 else 6270 return function_value_32 (orig_mode, mode, fntype, fn); 6271 } 6272 6273 static rtx 6274 ix86_function_value (const_tree valtype, const_tree fntype_or_decl, 6275 bool outgoing ATTRIBUTE_UNUSED) 6276 { 6277 enum machine_mode mode, orig_mode; 6278 6279 orig_mode = TYPE_MODE (valtype); 6280 mode = type_natural_mode (valtype, NULL); 6281 return ix86_function_value_1 (valtype, fntype_or_decl, orig_mode, mode); 6282 } 6283 6284 rtx 6285 ix86_libcall_value (enum machine_mode mode) 6286 { 6287 return ix86_function_value_1 (NULL, NULL, mode, mode); 6288 } 6289 6290 /* Return true iff type is returned in memory. */ 6291 6292 static int ATTRIBUTE_UNUSED 6293 return_in_memory_32 (const_tree type, enum machine_mode mode) 6294 { 6295 HOST_WIDE_INT size; 6296 6297 if (mode == BLKmode) 6298 return 1; 6299 6300 size = int_size_in_bytes (type); 6301 6302 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8) 6303 return 0; 6304 6305 if (VECTOR_MODE_P (mode) || mode == TImode) 6306 { 6307 /* User-created vectors small enough to fit in EAX. */ 6308 if (size < 8) 6309 return 0; 6310 6311 /* MMX/3dNow values are returned in MM0, 6312 except when it doesn't exits. */ 6313 if (size == 8) 6314 return (TARGET_MMX ? 0 : 1); 6315 6316 /* SSE values are returned in XMM0, except when it doesn't exist. */ 6317 if (size == 16) 6318 return (TARGET_SSE ? 0 : 1); 6319 6320 /* AVX values are returned in YMM0, except when it doesn't exist. */ 6321 if (size == 32) 6322 return TARGET_AVX ? 0 : 1; 6323 } 6324 6325 if (mode == XFmode) 6326 return 0; 6327 6328 if (size > 12) 6329 return 1; 6330 6331 /* OImode shouldn't be used directly. */ 6332 gcc_assert (mode != OImode); 6333 6334 return 0; 6335 } 6336 6337 static int ATTRIBUTE_UNUSED 6338 return_in_memory_64 (const_tree type, enum machine_mode mode) 6339 { 6340 int needed_intregs, needed_sseregs; 6341 return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs); 6342 } 6343 6344 static int ATTRIBUTE_UNUSED 6345 return_in_memory_ms_64 (const_tree type, enum machine_mode mode) 6346 { 6347 HOST_WIDE_INT size = int_size_in_bytes (type); 6348 6349 /* __m128 is returned in xmm0. */ 6350 if ((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode)) 6351 && !COMPLEX_MODE_P (mode) && (GET_MODE_SIZE (mode) == 16 || size == 16)) 6352 return 0; 6353 6354 /* Otherwise, the size must be exactly in [1248]. */ 6355 return (size != 1 && size != 2 && size != 4 && size != 8); 6356 } 6357 6358 static bool 6359 ix86_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED) 6360 { 6361 #ifdef SUBTARGET_RETURN_IN_MEMORY 6362 return SUBTARGET_RETURN_IN_MEMORY (type, fntype); 6363 #else 6364 const enum machine_mode mode = type_natural_mode (type, NULL); 6365 6366 if (TARGET_64BIT) 6367 { 6368 if (ix86_function_type_abi (fntype) == MS_ABI) 6369 return return_in_memory_ms_64 (type, mode); 6370 else 6371 return return_in_memory_64 (type, mode); 6372 } 6373 else 6374 return return_in_memory_32 (type, mode); 6375 #endif 6376 } 6377 6378 /* Return false iff TYPE is returned in memory. This version is used 6379 on Solaris 10. It is similar to the generic ix86_return_in_memory, 6380 but differs notably in that when MMX is available, 8-byte vectors 6381 are returned in memory, rather than in MMX registers. */ 6382 6383 bool 6384 ix86_sol10_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED) 6385 { 6386 int size; 6387 enum machine_mode mode = type_natural_mode (type, NULL); 6388 6389 if (TARGET_64BIT) 6390 return return_in_memory_64 (type, mode); 6391 6392 if (mode == BLKmode) 6393 return 1; 6394 6395 size = int_size_in_bytes (type); 6396 6397 if (VECTOR_MODE_P (mode)) 6398 { 6399 /* Return in memory only if MMX registers *are* available. This 6400 seems backwards, but it is consistent with the existing 6401 Solaris x86 ABI. */ 6402 if (size == 8) 6403 return TARGET_MMX; 6404 if (size == 16) 6405 return !TARGET_SSE; 6406 } 6407 else if (mode == TImode) 6408 return !TARGET_SSE; 6409 else if (mode == XFmode) 6410 return 0; 6411 6412 return size > 12; 6413 } 6414 6415 /* When returning SSE vector types, we have a choice of either 6416 (1) being abi incompatible with a -march switch, or 6417 (2) generating an error. 6418 Given no good solution, I think the safest thing is one warning. 6419 The user won't be able to use -Werror, but.... 6420 6421 Choose the STRUCT_VALUE_RTX hook because that's (at present) only 6422 called in response to actually generating a caller or callee that 6423 uses such a type. As opposed to TARGET_RETURN_IN_MEMORY, which is called 6424 via aggregate_value_p for general type probing from tree-ssa. */ 6425 6426 static rtx 6427 ix86_struct_value_rtx (tree type, int incoming ATTRIBUTE_UNUSED) 6428 { 6429 static bool warnedsse, warnedmmx; 6430 6431 if (!TARGET_64BIT && type) 6432 { 6433 /* Look at the return type of the function, not the function type. */ 6434 enum machine_mode mode = TYPE_MODE (TREE_TYPE (type)); 6435 6436 if (!TARGET_SSE && !warnedsse) 6437 { 6438 if (mode == TImode 6439 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16)) 6440 { 6441 warnedsse = true; 6442 warning (0, "SSE vector return without SSE enabled " 6443 "changes the ABI"); 6444 } 6445 } 6446 6447 if (!TARGET_MMX && !warnedmmx) 6448 { 6449 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8) 6450 { 6451 warnedmmx = true; 6452 warning (0, "MMX vector return without MMX enabled " 6453 "changes the ABI"); 6454 } 6455 } 6456 } 6457 6458 return NULL; 6459 } 6460 6461 6462 /* Create the va_list data type. */ 6463 6464 /* Returns the calling convention specific va_list date type. 6465 The argument ABI can be DEFAULT_ABI, MS_ABI, or SYSV_ABI. */ 6466 6467 static tree 6468 ix86_build_builtin_va_list_abi (enum calling_abi abi) 6469 { 6470 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl; 6471 6472 /* For i386 we use plain pointer to argument area. */ 6473 if (!TARGET_64BIT || abi == MS_ABI) 6474 return build_pointer_type (char_type_node); 6475 6476 record = (*lang_hooks.types.make_type) (RECORD_TYPE); 6477 type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record); 6478 6479 f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"), 6480 unsigned_type_node); 6481 f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"), 6482 unsigned_type_node); 6483 f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"), 6484 ptr_type_node); 6485 f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"), 6486 ptr_type_node); 6487 6488 va_list_gpr_counter_field = f_gpr; 6489 va_list_fpr_counter_field = f_fpr; 6490 6491 DECL_FIELD_CONTEXT (f_gpr) = record; 6492 DECL_FIELD_CONTEXT (f_fpr) = record; 6493 DECL_FIELD_CONTEXT (f_ovf) = record; 6494 DECL_FIELD_CONTEXT (f_sav) = record; 6495 6496 TREE_CHAIN (record) = type_decl; 6497 TYPE_NAME (record) = type_decl; 6498 TYPE_FIELDS (record) = f_gpr; 6499 TREE_CHAIN (f_gpr) = f_fpr; 6500 TREE_CHAIN (f_fpr) = f_ovf; 6501 TREE_CHAIN (f_ovf) = f_sav; 6502 6503 layout_type (record); 6504 6505 /* The correct type is an array type of one element. */ 6506 return build_array_type (record, build_index_type (size_zero_node)); 6507 } 6508 6509 /* Setup the builtin va_list data type and for 64-bit the additional 6510 calling convention specific va_list data types. */ 6511 6512 static tree 6513 ix86_build_builtin_va_list (void) 6514 { 6515 tree ret = ix86_build_builtin_va_list_abi (DEFAULT_ABI); 6516 6517 /* Initialize abi specific va_list builtin types. */ 6518 if (TARGET_64BIT) 6519 { 6520 tree t; 6521 if (DEFAULT_ABI == MS_ABI) 6522 { 6523 t = ix86_build_builtin_va_list_abi (SYSV_ABI); 6524 if (TREE_CODE (t) != RECORD_TYPE) 6525 t = build_variant_type_copy (t); 6526 sysv_va_list_type_node = t; 6527 } 6528 else 6529 { 6530 t = ret; 6531 if (TREE_CODE (t) != RECORD_TYPE) 6532 t = build_variant_type_copy (t); 6533 sysv_va_list_type_node = t; 6534 } 6535 if (DEFAULT_ABI != MS_ABI) 6536 { 6537 t = ix86_build_builtin_va_list_abi (MS_ABI); 6538 if (TREE_CODE (t) != RECORD_TYPE) 6539 t = build_variant_type_copy (t); 6540 ms_va_list_type_node = t; 6541 } 6542 else 6543 { 6544 t = ret; 6545 if (TREE_CODE (t) != RECORD_TYPE) 6546 t = build_variant_type_copy (t); 6547 ms_va_list_type_node = t; 6548 } 6549 } 6550 6551 return ret; 6552 } 6553 6554 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */ 6555 6556 static void 6557 setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum) 6558 { 6559 rtx save_area, mem; 6560 rtx label; 6561 rtx label_ref; 6562 rtx tmp_reg; 6563 rtx nsse_reg; 6564 alias_set_type set; 6565 int i; 6566 int regparm = ix86_regparm; 6567 6568 if (cum->call_abi != DEFAULT_ABI) 6569 regparm = DEFAULT_ABI != SYSV_ABI ? X86_64_REGPARM_MAX : X64_REGPARM_MAX; 6570 6571 /* GPR size of varargs save area. */ 6572 if (cfun->va_list_gpr_size) 6573 ix86_varargs_gpr_size = X86_64_REGPARM_MAX * UNITS_PER_WORD; 6574 else 6575 ix86_varargs_gpr_size = 0; 6576 6577 /* FPR size of varargs save area. We don't need it if we don't pass 6578 anything in SSE registers. */ 6579 if (cum->sse_nregs && cfun->va_list_fpr_size) 6580 ix86_varargs_fpr_size = X86_64_SSE_REGPARM_MAX * 16; 6581 else 6582 ix86_varargs_fpr_size = 0; 6583 6584 if (! ix86_varargs_gpr_size && ! ix86_varargs_fpr_size) 6585 return; 6586 6587 save_area = frame_pointer_rtx; 6588 set = get_varargs_alias_set (); 6589 6590 for (i = cum->regno; 6591 i < regparm 6592 && i < cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD; 6593 i++) 6594 { 6595 mem = gen_rtx_MEM (Pmode, 6596 plus_constant (save_area, i * UNITS_PER_WORD)); 6597 MEM_NOTRAP_P (mem) = 1; 6598 set_mem_alias_set (mem, set); 6599 emit_move_insn (mem, gen_rtx_REG (Pmode, 6600 x86_64_int_parameter_registers[i])); 6601 } 6602 6603 if (ix86_varargs_fpr_size) 6604 { 6605 /* Stack must be aligned to 16byte for FP register save area. */ 6606 if (crtl->stack_alignment_needed < 128) 6607 crtl->stack_alignment_needed = 128; 6608 6609 /* Now emit code to save SSE registers. The AX parameter contains number 6610 of SSE parameter registers used to call this function. We use 6611 sse_prologue_save insn template that produces computed jump across 6612 SSE saves. We need some preparation work to get this working. */ 6613 6614 label = gen_label_rtx (); 6615 label_ref = gen_rtx_LABEL_REF (Pmode, label); 6616 6617 /* Compute address to jump to : 6618 label - eax*4 + nnamed_sse_arguments*4 Or 6619 label - eax*5 + nnamed_sse_arguments*5 for AVX. */ 6620 tmp_reg = gen_reg_rtx (Pmode); 6621 nsse_reg = gen_reg_rtx (Pmode); 6622 emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, AX_REG))); 6623 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg, 6624 gen_rtx_MULT (Pmode, nsse_reg, 6625 GEN_INT (4)))); 6626 6627 /* vmovaps is one byte longer than movaps. */ 6628 if (TARGET_AVX) 6629 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg, 6630 gen_rtx_PLUS (Pmode, tmp_reg, 6631 nsse_reg))); 6632 6633 if (cum->sse_regno) 6634 emit_move_insn 6635 (nsse_reg, 6636 gen_rtx_CONST (DImode, 6637 gen_rtx_PLUS (DImode, 6638 label_ref, 6639 GEN_INT (cum->sse_regno 6640 * (TARGET_AVX ? 5 : 4))))); 6641 else 6642 emit_move_insn (nsse_reg, label_ref); 6643 emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg)); 6644 6645 /* Compute address of memory block we save into. We always use pointer 6646 pointing 127 bytes after first byte to store - this is needed to keep 6647 instruction size limited by 4 bytes (5 bytes for AVX) with one 6648 byte displacement. */ 6649 tmp_reg = gen_reg_rtx (Pmode); 6650 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg, 6651 plus_constant (save_area, 6652 ix86_varargs_gpr_size + 127))); 6653 mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127)); 6654 MEM_NOTRAP_P (mem) = 1; 6655 set_mem_alias_set (mem, set); 6656 set_mem_align (mem, BITS_PER_WORD); 6657 6658 /* And finally do the dirty job! */ 6659 emit_insn (gen_sse_prologue_save (mem, nsse_reg, 6660 GEN_INT (cum->sse_regno), label)); 6661 } 6662 } 6663 6664 static void 6665 setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS *cum) 6666 { 6667 alias_set_type set = get_varargs_alias_set (); 6668 int i; 6669 6670 for (i = cum->regno; i < X64_REGPARM_MAX; i++) 6671 { 6672 rtx reg, mem; 6673 6674 mem = gen_rtx_MEM (Pmode, 6675 plus_constant (virtual_incoming_args_rtx, 6676 i * UNITS_PER_WORD)); 6677 MEM_NOTRAP_P (mem) = 1; 6678 set_mem_alias_set (mem, set); 6679 6680 reg = gen_rtx_REG (Pmode, x86_64_ms_abi_int_parameter_registers[i]); 6681 emit_move_insn (mem, reg); 6682 } 6683 } 6684 6685 static void 6686 ix86_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode, 6687 tree type, int *pretend_size ATTRIBUTE_UNUSED, 6688 int no_rtl) 6689 { 6690 CUMULATIVE_ARGS next_cum; 6691 tree fntype; 6692 6693 /* This argument doesn't appear to be used anymore. Which is good, 6694 because the old code here didn't suppress rtl generation. */ 6695 gcc_assert (!no_rtl); 6696 6697 if (!TARGET_64BIT) 6698 return; 6699 6700 fntype = TREE_TYPE (current_function_decl); 6701 6702 /* For varargs, we do not want to skip the dummy va_dcl argument. 6703 For stdargs, we do want to skip the last named argument. */ 6704 next_cum = *cum; 6705 if (stdarg_p (fntype)) 6706 function_arg_advance (&next_cum, mode, type, 1); 6707 6708 if (cum->call_abi == MS_ABI) 6709 setup_incoming_varargs_ms_64 (&next_cum); 6710 else 6711 setup_incoming_varargs_64 (&next_cum); 6712 } 6713 6714 /* Checks if TYPE is of kind va_list char *. */ 6715 6716 static bool 6717 is_va_list_char_pointer (tree type) 6718 { 6719 tree canonic; 6720 6721 /* For 32-bit it is always true. */ 6722 if (!TARGET_64BIT) 6723 return true; 6724 canonic = ix86_canonical_va_list_type (type); 6725 return (canonic == ms_va_list_type_node 6726 || (DEFAULT_ABI == MS_ABI && canonic == va_list_type_node)); 6727 } 6728 6729 /* Implement va_start. */ 6730 6731 static void 6732 ix86_va_start (tree valist, rtx nextarg) 6733 { 6734 HOST_WIDE_INT words, n_gpr, n_fpr; 6735 tree f_gpr, f_fpr, f_ovf, f_sav; 6736 tree gpr, fpr, ovf, sav, t; 6737 tree type; 6738 6739 /* Only 64bit target needs something special. */ 6740 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist))) 6741 { 6742 std_expand_builtin_va_start (valist, nextarg); 6743 return; 6744 } 6745 6746 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node)); 6747 f_fpr = TREE_CHAIN (f_gpr); 6748 f_ovf = TREE_CHAIN (f_fpr); 6749 f_sav = TREE_CHAIN (f_ovf); 6750 6751 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist); 6752 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE); 6753 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE); 6754 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE); 6755 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE); 6756 6757 /* Count number of gp and fp argument registers used. */ 6758 words = crtl->args.info.words; 6759 n_gpr = crtl->args.info.regno; 6760 n_fpr = crtl->args.info.sse_regno; 6761 6762 if (cfun->va_list_gpr_size) 6763 { 6764 type = TREE_TYPE (gpr); 6765 t = build2 (MODIFY_EXPR, type, 6766 gpr, build_int_cst (type, n_gpr * 8)); 6767 TREE_SIDE_EFFECTS (t) = 1; 6768 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL); 6769 } 6770 6771 if (TARGET_SSE && cfun->va_list_fpr_size) 6772 { 6773 type = TREE_TYPE (fpr); 6774 t = build2 (MODIFY_EXPR, type, fpr, 6775 build_int_cst (type, n_fpr * 16 + 8*X86_64_REGPARM_MAX)); 6776 TREE_SIDE_EFFECTS (t) = 1; 6777 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL); 6778 } 6779 6780 /* Find the overflow area. */ 6781 type = TREE_TYPE (ovf); 6782 t = make_tree (type, crtl->args.internal_arg_pointer); 6783 if (words != 0) 6784 t = build2 (POINTER_PLUS_EXPR, type, t, 6785 size_int (words * UNITS_PER_WORD)); 6786 t = build2 (MODIFY_EXPR, type, ovf, t); 6787 TREE_SIDE_EFFECTS (t) = 1; 6788 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL); 6789 6790 if (ix86_varargs_gpr_size || ix86_varargs_fpr_size) 6791 { 6792 /* Find the register save area. 6793 Prologue of the function save it right above stack frame. */ 6794 type = TREE_TYPE (sav); 6795 t = make_tree (type, frame_pointer_rtx); 6796 if (!ix86_varargs_gpr_size) 6797 t = build2 (POINTER_PLUS_EXPR, type, t, 6798 size_int (-8 * X86_64_REGPARM_MAX)); 6799 t = build2 (MODIFY_EXPR, type, sav, t); 6800 TREE_SIDE_EFFECTS (t) = 1; 6801 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL); 6802 } 6803 } 6804 6805 /* Implement va_arg. */ 6806 6807 static tree 6808 ix86_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p, 6809 gimple_seq *post_p) 6810 { 6811 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 }; 6812 tree f_gpr, f_fpr, f_ovf, f_sav; 6813 tree gpr, fpr, ovf, sav, t; 6814 int size, rsize; 6815 tree lab_false, lab_over = NULL_TREE; 6816 tree addr, t2; 6817 rtx container; 6818 int indirect_p = 0; 6819 tree ptrtype; 6820 enum machine_mode nat_mode; 6821 int arg_boundary; 6822 6823 /* Only 64bit target needs something special. */ 6824 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist))) 6825 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p); 6826 6827 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node)); 6828 f_fpr = TREE_CHAIN (f_gpr); 6829 f_ovf = TREE_CHAIN (f_fpr); 6830 f_sav = TREE_CHAIN (f_ovf); 6831 6832 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), 6833 build_va_arg_indirect_ref (valist), f_gpr, NULL_TREE); 6834 valist = build_va_arg_indirect_ref (valist); 6835 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE); 6836 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE); 6837 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE); 6838 6839 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false); 6840 if (indirect_p) 6841 type = build_pointer_type (type); 6842 size = int_size_in_bytes (type); 6843 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD; 6844 6845 nat_mode = type_natural_mode (type, NULL); 6846 switch (nat_mode) 6847 { 6848 case V8SFmode: 6849 case V8SImode: 6850 case V32QImode: 6851 case V16HImode: 6852 case V4DFmode: 6853 case V4DImode: 6854 /* Unnamed 256bit vector mode parameters are passed on stack. */ 6855 if (ix86_cfun_abi () == SYSV_ABI) 6856 { 6857 container = NULL; 6858 break; 6859 } 6860 6861 default: 6862 container = construct_container (nat_mode, TYPE_MODE (type), 6863 type, 0, X86_64_REGPARM_MAX, 6864 X86_64_SSE_REGPARM_MAX, intreg, 6865 0); 6866 break; 6867 } 6868 6869 /* Pull the value out of the saved registers. */ 6870 6871 addr = create_tmp_var (ptr_type_node, "addr"); 6872 DECL_POINTER_ALIAS_SET (addr) = get_varargs_alias_set (); 6873 6874 if (container) 6875 { 6876 int needed_intregs, needed_sseregs; 6877 bool need_temp; 6878 tree int_addr, sse_addr; 6879 6880 lab_false = create_artificial_label (); 6881 lab_over = create_artificial_label (); 6882 6883 examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs); 6884 6885 need_temp = (!REG_P (container) 6886 && ((needed_intregs && TYPE_ALIGN (type) > 64) 6887 || TYPE_ALIGN (type) > 128)); 6888 6889 /* In case we are passing structure, verify that it is consecutive block 6890 on the register save area. If not we need to do moves. */ 6891 if (!need_temp && !REG_P (container)) 6892 { 6893 /* Verify that all registers are strictly consecutive */ 6894 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0)))) 6895 { 6896 int i; 6897 6898 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++) 6899 { 6900 rtx slot = XVECEXP (container, 0, i); 6901 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i 6902 || INTVAL (XEXP (slot, 1)) != i * 16) 6903 need_temp = 1; 6904 } 6905 } 6906 else 6907 { 6908 int i; 6909 6910 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++) 6911 { 6912 rtx slot = XVECEXP (container, 0, i); 6913 if (REGNO (XEXP (slot, 0)) != (unsigned int) i 6914 || INTVAL (XEXP (slot, 1)) != i * 8) 6915 need_temp = 1; 6916 } 6917 } 6918 } 6919 if (!need_temp) 6920 { 6921 int_addr = addr; 6922 sse_addr = addr; 6923 } 6924 else 6925 { 6926 int_addr = create_tmp_var (ptr_type_node, "int_addr"); 6927 DECL_POINTER_ALIAS_SET (int_addr) = get_varargs_alias_set (); 6928 sse_addr = create_tmp_var (ptr_type_node, "sse_addr"); 6929 DECL_POINTER_ALIAS_SET (sse_addr) = get_varargs_alias_set (); 6930 } 6931 6932 /* First ensure that we fit completely in registers. */ 6933 if (needed_intregs) 6934 { 6935 t = build_int_cst (TREE_TYPE (gpr), 6936 (X86_64_REGPARM_MAX - needed_intregs + 1) * 8); 6937 t = build2 (GE_EXPR, boolean_type_node, gpr, t); 6938 t2 = build1 (GOTO_EXPR, void_type_node, lab_false); 6939 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE); 6940 gimplify_and_add (t, pre_p); 6941 } 6942 if (needed_sseregs) 6943 { 6944 t = build_int_cst (TREE_TYPE (fpr), 6945 (X86_64_SSE_REGPARM_MAX - needed_sseregs + 1) * 16 6946 + X86_64_REGPARM_MAX * 8); 6947 t = build2 (GE_EXPR, boolean_type_node, fpr, t); 6948 t2 = build1 (GOTO_EXPR, void_type_node, lab_false); 6949 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE); 6950 gimplify_and_add (t, pre_p); 6951 } 6952 6953 /* Compute index to start of area used for integer regs. */ 6954 if (needed_intregs) 6955 { 6956 /* int_addr = gpr + sav; */ 6957 t = fold_convert (sizetype, gpr); 6958 t = build2 (POINTER_PLUS_EXPR, ptr_type_node, sav, t); 6959 gimplify_assign (int_addr, t, pre_p); 6960 } 6961 if (needed_sseregs) 6962 { 6963 /* sse_addr = fpr + sav; */ 6964 t = fold_convert (sizetype, fpr); 6965 t = build2 (POINTER_PLUS_EXPR, ptr_type_node, sav, t); 6966 gimplify_assign (sse_addr, t, pre_p); 6967 } 6968 if (need_temp) 6969 { 6970 int i; 6971 tree temp = create_tmp_var (type, "va_arg_tmp"); 6972 6973 /* addr = &temp; */ 6974 t = build1 (ADDR_EXPR, build_pointer_type (type), temp); 6975 gimplify_assign (addr, t, pre_p); 6976 6977 for (i = 0; i < XVECLEN (container, 0); i++) 6978 { 6979 rtx slot = XVECEXP (container, 0, i); 6980 rtx reg = XEXP (slot, 0); 6981 enum machine_mode mode = GET_MODE (reg); 6982 tree piece_type = lang_hooks.types.type_for_mode (mode, 1); 6983 tree addr_type = build_pointer_type (piece_type); 6984 tree daddr_type = build_pointer_type_for_mode (piece_type, 6985 ptr_mode, true); 6986 tree src_addr, src; 6987 int src_offset; 6988 tree dest_addr, dest; 6989 6990 if (SSE_REGNO_P (REGNO (reg))) 6991 { 6992 src_addr = sse_addr; 6993 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16; 6994 } 6995 else 6996 { 6997 src_addr = int_addr; 6998 src_offset = REGNO (reg) * 8; 6999 } 7000 src_addr = fold_convert (addr_type, src_addr); 7001 src_addr = fold_build2 (POINTER_PLUS_EXPR, addr_type, src_addr, 7002 size_int (src_offset)); 7003 src = build_va_arg_indirect_ref (src_addr); 7004 7005 dest_addr = fold_convert (daddr_type, addr); 7006 dest_addr = fold_build2 (POINTER_PLUS_EXPR, daddr_type, dest_addr, 7007 size_int (INTVAL (XEXP (slot, 1)))); 7008 dest = build_va_arg_indirect_ref (dest_addr); 7009 7010 gimplify_assign (dest, src, pre_p); 7011 } 7012 } 7013 7014 if (needed_intregs) 7015 { 7016 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr, 7017 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8)); 7018 gimplify_assign (gpr, t, pre_p); 7019 } 7020 7021 if (needed_sseregs) 7022 { 7023 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr, 7024 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16)); 7025 gimplify_assign (fpr, t, pre_p); 7026 } 7027 7028 gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over)); 7029 7030 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_false)); 7031 } 7032 7033 /* ... otherwise out of the overflow area. */ 7034 7035 /* When we align parameter on stack for caller, if the parameter 7036 alignment is beyond MAX_SUPPORTED_STACK_ALIGNMENT, it will be 7037 aligned at MAX_SUPPORTED_STACK_ALIGNMENT. We will match callee 7038 here with caller. */ 7039 arg_boundary = FUNCTION_ARG_BOUNDARY (VOIDmode, type); 7040 if ((unsigned int) arg_boundary > MAX_SUPPORTED_STACK_ALIGNMENT) 7041 arg_boundary = MAX_SUPPORTED_STACK_ALIGNMENT; 7042 7043 /* Care for on-stack alignment if needed. */ 7044 if (arg_boundary <= 64 7045 || integer_zerop (TYPE_SIZE (type))) 7046 t = ovf; 7047 else 7048 { 7049 HOST_WIDE_INT align = arg_boundary / 8; 7050 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (ovf), ovf, 7051 size_int (align - 1)); 7052 t = fold_convert (sizetype, t); 7053 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t, 7054 size_int (-align)); 7055 t = fold_convert (TREE_TYPE (ovf), t); 7056 } 7057 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue); 7058 gimplify_assign (addr, t, pre_p); 7059 7060 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (t), t, 7061 size_int (rsize * UNITS_PER_WORD)); 7062 gimplify_assign (unshare_expr (ovf), t, pre_p); 7063 7064 if (container) 7065 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_over)); 7066 7067 ptrtype = build_pointer_type (type); 7068 addr = fold_convert (ptrtype, addr); 7069 7070 if (indirect_p) 7071 addr = build_va_arg_indirect_ref (addr); 7072 return build_va_arg_indirect_ref (addr); 7073 } 7074 7075 /* Return nonzero if OPNUM's MEM should be matched 7076 in movabs* patterns. */ 7077 7078 int 7079 ix86_check_movabs (rtx insn, int opnum) 7080 { 7081 rtx set, mem; 7082 7083 set = PATTERN (insn); 7084 if (GET_CODE (set) == PARALLEL) 7085 set = XVECEXP (set, 0, 0); 7086 gcc_assert (GET_CODE (set) == SET); 7087 mem = XEXP (set, opnum); 7088 while (GET_CODE (mem) == SUBREG) 7089 mem = SUBREG_REG (mem); 7090 gcc_assert (MEM_P (mem)); 7091 return (volatile_ok || !MEM_VOLATILE_P (mem)); 7092 } 7093 7094 /* Initialize the table of extra 80387 mathematical constants. */ 7095 7096 static void 7097 init_ext_80387_constants (void) 7098 { 7099 static const char * cst[5] = 7100 { 7101 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */ 7102 "0.6931471805599453094286904741849753009", /* 1: fldln2 */ 7103 "1.4426950408889634073876517827983434472", /* 2: fldl2e */ 7104 "3.3219280948873623478083405569094566090", /* 3: fldl2t */ 7105 "3.1415926535897932385128089594061862044", /* 4: fldpi */ 7106 }; 7107 int i; 7108 7109 for (i = 0; i < 5; i++) 7110 { 7111 real_from_string (&ext_80387_constants_table[i], cst[i]); 7112 /* Ensure each constant is rounded to XFmode precision. */ 7113 real_convert (&ext_80387_constants_table[i], 7114 XFmode, &ext_80387_constants_table[i]); 7115 } 7116 7117 ext_80387_constants_init = 1; 7118 } 7119 7120 /* Return true if the constant is something that can be loaded with 7121 a special instruction. */ 7122 7123 int 7124 standard_80387_constant_p (rtx x) 7125 { 7126 enum machine_mode mode = GET_MODE (x); 7127 7128 REAL_VALUE_TYPE r; 7129 7130 if (!(X87_FLOAT_MODE_P (mode) && (GET_CODE (x) == CONST_DOUBLE))) 7131 return -1; 7132 7133 if (x == CONST0_RTX (mode)) 7134 return 1; 7135 if (x == CONST1_RTX (mode)) 7136 return 2; 7137 7138 REAL_VALUE_FROM_CONST_DOUBLE (r, x); 7139 7140 /* For XFmode constants, try to find a special 80387 instruction when 7141 optimizing for size or on those CPUs that benefit from them. */ 7142 if (mode == XFmode 7143 && (optimize_function_for_size_p (cfun) || TARGET_EXT_80387_CONSTANTS)) 7144 { 7145 int i; 7146 7147 if (! ext_80387_constants_init) 7148 init_ext_80387_constants (); 7149 7150 for (i = 0; i < 5; i++) 7151 if (real_identical (&r, &ext_80387_constants_table[i])) 7152 return i + 3; 7153 } 7154 7155 /* Load of the constant -0.0 or -1.0 will be split as 7156 fldz;fchs or fld1;fchs sequence. */ 7157 if (real_isnegzero (&r)) 7158 return 8; 7159 if (real_identical (&r, &dconstm1)) 7160 return 9; 7161 7162 return 0; 7163 } 7164 7165 /* Return the opcode of the special instruction to be used to load 7166 the constant X. */ 7167 7168 const char * 7169 standard_80387_constant_opcode (rtx x) 7170 { 7171 switch (standard_80387_constant_p (x)) 7172 { 7173 case 1: 7174 return "fldz"; 7175 case 2: 7176 return "fld1"; 7177 case 3: 7178 return "fldlg2"; 7179 case 4: 7180 return "fldln2"; 7181 case 5: 7182 return "fldl2e"; 7183 case 6: 7184 return "fldl2t"; 7185 case 7: 7186 return "fldpi"; 7187 case 8: 7188 case 9: 7189 return "#"; 7190 default: 7191 gcc_unreachable (); 7192 } 7193 } 7194 7195 /* Return the CONST_DOUBLE representing the 80387 constant that is 7196 loaded by the specified special instruction. The argument IDX 7197 matches the return value from standard_80387_constant_p. */ 7198 7199 rtx 7200 standard_80387_constant_rtx (int idx) 7201 { 7202 int i; 7203 7204 if (! ext_80387_constants_init) 7205 init_ext_80387_constants (); 7206 7207 switch (idx) 7208 { 7209 case 3: 7210 case 4: 7211 case 5: 7212 case 6: 7213 case 7: 7214 i = idx - 3; 7215 break; 7216 7217 default: 7218 gcc_unreachable (); 7219 } 7220 7221 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i], 7222 XFmode); 7223 } 7224 7225 /* Return 1 if mode is a valid mode for sse. */ 7226 static int 7227 standard_sse_mode_p (enum machine_mode mode) 7228 { 7229 switch (mode) 7230 { 7231 case V16QImode: 7232 case V8HImode: 7233 case V4SImode: 7234 case V2DImode: 7235 case V4SFmode: 7236 case V2DFmode: 7237 return 1; 7238 7239 default: 7240 return 0; 7241 } 7242 } 7243 7244 /* Return 1 if X is all 0s. For all 1s, return 2 if X is in 128bit 7245 SSE modes and SSE2 is enabled, return 3 if X is in 256bit AVX 7246 modes and AVX is enabled. */ 7247 7248 int 7249 standard_sse_constant_p (rtx x) 7250 { 7251 enum machine_mode mode = GET_MODE (x); 7252 7253 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x))) 7254 return 1; 7255 if (vector_all_ones_operand (x, mode)) 7256 { 7257 if (standard_sse_mode_p (mode)) 7258 return TARGET_SSE2 ? 2 : -2; 7259 else if (VALID_AVX256_REG_MODE (mode)) 7260 return TARGET_AVX ? 3 : -3; 7261 } 7262 7263 return 0; 7264 } 7265 7266 /* Return the opcode of the special instruction to be used to load 7267 the constant X. */ 7268 7269 const char * 7270 standard_sse_constant_opcode (rtx insn, rtx x) 7271 { 7272 switch (standard_sse_constant_p (x)) 7273 { 7274 case 1: 7275 switch (get_attr_mode (insn)) 7276 { 7277 case MODE_V4SF: 7278 return TARGET_AVX ? "vxorps\t%0, %0, %0" : "xorps\t%0, %0"; 7279 case MODE_V2DF: 7280 return TARGET_AVX ? "vxorpd\t%0, %0, %0" : "xorpd\t%0, %0"; 7281 case MODE_TI: 7282 return TARGET_AVX ? "vpxor\t%0, %0, %0" : "pxor\t%0, %0"; 7283 case MODE_V8SF: 7284 return "vxorps\t%x0, %x0, %x0"; 7285 case MODE_V4DF: 7286 return "vxorpd\t%x0, %x0, %x0"; 7287 case MODE_OI: 7288 return "vpxor\t%x0, %x0, %x0"; 7289 default: 7290 gcc_unreachable (); 7291 } 7292 case 2: 7293 if (TARGET_AVX) 7294 switch (get_attr_mode (insn)) 7295 { 7296 case MODE_V4SF: 7297 case MODE_V2DF: 7298 case MODE_TI: 7299 return "vpcmpeqd\t%0, %0, %0"; 7300 break; 7301 default: 7302 gcc_unreachable (); 7303 } 7304 else 7305 return "pcmpeqd\t%0, %0"; 7306 } 7307 gcc_unreachable (); 7308 } 7309 7310 /* Returns 1 if OP contains a symbol reference */ 7311 7312 int 7313 symbolic_reference_mentioned_p (rtx op) 7314 { 7315 const char *fmt; 7316 int i; 7317 7318 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF) 7319 return 1; 7320 7321 fmt = GET_RTX_FORMAT (GET_CODE (op)); 7322 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--) 7323 { 7324 if (fmt[i] == 'E') 7325 { 7326 int j; 7327 7328 for (j = XVECLEN (op, i) - 1; j >= 0; j--) 7329 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j))) 7330 return 1; 7331 } 7332 7333 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i))) 7334 return 1; 7335 } 7336 7337 return 0; 7338 } 7339 7340 /* Return 1 if it is appropriate to emit `ret' instructions in the 7341 body of a function. Do this only if the epilogue is simple, needing a 7342 couple of insns. Prior to reloading, we can't tell how many registers 7343 must be saved, so return 0 then. Return 0 if there is no frame 7344 marker to de-allocate. */ 7345 7346 int 7347 ix86_can_use_return_insn_p (void) 7348 { 7349 struct ix86_frame frame; 7350 7351 if (! reload_completed || frame_pointer_needed) 7352 return 0; 7353 7354 /* Don't allow more than 32 pop, since that's all we can do 7355 with one instruction. */ 7356 if (crtl->args.pops_args 7357 && crtl->args.size >= 32768) 7358 return 0; 7359 7360 ix86_compute_frame_layout (&frame); 7361 return frame.to_allocate == 0 && frame.padding05 == 0 && 7362 frame.nmsave_args == 0 && (frame.nregs + frame.nsseregs) == 0; 7363 } 7364 7365 /* Value should be nonzero if functions must have frame pointers. 7366 Zero means the frame pointer need not be set up (and parms may 7367 be accessed via the stack pointer) in functions that seem suitable. */ 7368 7369 int 7370 ix86_frame_pointer_required (void) 7371 { 7372 /* If we accessed previous frames, then the generated code expects 7373 to be able to access the saved ebp value in our frame. */ 7374 if (cfun->machine->accesses_prev_frame) 7375 return 1; 7376 7377 /* Several x86 os'es need a frame pointer for other reasons, 7378 usually pertaining to setjmp. */ 7379 if (SUBTARGET_FRAME_POINTER_REQUIRED) 7380 return 1; 7381 7382 if (TARGET_SAVE_ARGS) 7383 return 1; 7384 7385 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off 7386 the frame pointer by default. Turn it back on now if we've not 7387 got a leaf function. */ 7388 if (TARGET_OMIT_LEAF_FRAME_POINTER 7389 && (!current_function_is_leaf 7390 || ix86_current_function_calls_tls_descriptor)) 7391 return 1; 7392 7393 if (crtl->profile) 7394 return 1; 7395 7396 return 0; 7397 } 7398 7399 /* Record that the current function accesses previous call frames. */ 7400 7401 void 7402 ix86_setup_frame_addresses (void) 7403 { 7404 cfun->machine->accesses_prev_frame = 1; 7405 } 7406 7407 #ifndef USE_HIDDEN_LINKONCE 7408 # if (defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)) || TARGET_MACHO 7409 # define USE_HIDDEN_LINKONCE 1 7410 # else 7411 # define USE_HIDDEN_LINKONCE 0 7412 # endif 7413 #endif 7414 7415 static int pic_labels_used; 7416 7417 /* Fills in the label name that should be used for a pc thunk for 7418 the given register. */ 7419 7420 static void 7421 get_pc_thunk_name (char name[32], unsigned int regno) 7422 { 7423 gcc_assert (!TARGET_64BIT); 7424 7425 if (USE_HIDDEN_LINKONCE) 7426 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]); 7427 else 7428 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno); 7429 } 7430 7431 7432 /* This function generates code for -fpic that loads %ebx with 7433 the return address of the caller and then returns. */ 7434 7435 void 7436 ix86_file_end (void) 7437 { 7438 rtx xops[2]; 7439 int regno; 7440 7441 for (regno = 0; regno < 8; ++regno) 7442 { 7443 char name[32]; 7444 7445 if (! ((pic_labels_used >> regno) & 1)) 7446 continue; 7447 7448 get_pc_thunk_name (name, regno); 7449 7450 #if TARGET_MACHO 7451 if (TARGET_MACHO) 7452 { 7453 switch_to_section (darwin_sections[text_coal_section]); 7454 fputs ("\t.weak_definition\t", asm_out_file); 7455 assemble_name (asm_out_file, name); 7456 fputs ("\n\t.private_extern\t", asm_out_file); 7457 assemble_name (asm_out_file, name); 7458 fputs ("\n", asm_out_file); 7459 ASM_OUTPUT_LABEL (asm_out_file, name); 7460 } 7461 else 7462 #endif 7463 if (USE_HIDDEN_LINKONCE) 7464 { 7465 tree decl; 7466 7467 decl = build_decl (FUNCTION_DECL, get_identifier (name), 7468 error_mark_node); 7469 TREE_PUBLIC (decl) = 1; 7470 TREE_STATIC (decl) = 1; 7471 DECL_ONE_ONLY (decl) = 1; 7472 7473 (*targetm.asm_out.unique_section) (decl, 0); 7474 switch_to_section (get_named_section (decl, NULL, 0)); 7475 7476 (*targetm.asm_out.globalize_label) (asm_out_file, name); 7477 fputs ("\t.hidden\t", asm_out_file); 7478 assemble_name (asm_out_file, name); 7479 fputc ('\n', asm_out_file); 7480 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl); 7481 } 7482 else 7483 { 7484 switch_to_section (text_section); 7485 ASM_OUTPUT_LABEL (asm_out_file, name); 7486 } 7487 7488 xops[0] = gen_rtx_REG (Pmode, regno); 7489 xops[1] = gen_rtx_MEM (Pmode, stack_pointer_rtx); 7490 output_asm_insn ("mov%z0\t{%1, %0|%0, %1}", xops); 7491 output_asm_insn ("ret", xops); 7492 } 7493 7494 if (NEED_INDICATE_EXEC_STACK) 7495 file_end_indicate_exec_stack (); 7496 } 7497 7498 /* Emit code for the SET_GOT patterns. */ 7499 7500 const char * 7501 output_set_got (rtx dest, rtx label ATTRIBUTE_UNUSED) 7502 { 7503 rtx xops[3]; 7504 7505 xops[0] = dest; 7506 7507 if (TARGET_VXWORKS_RTP && flag_pic) 7508 { 7509 /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */ 7510 xops[2] = gen_rtx_MEM (Pmode, 7511 gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE)); 7512 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops); 7513 7514 /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register. 7515 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as 7516 an unadorned address. */ 7517 xops[2] = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX); 7518 SYMBOL_REF_FLAGS (xops[2]) |= SYMBOL_FLAG_LOCAL; 7519 output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops); 7520 return ""; 7521 } 7522 7523 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME); 7524 7525 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic) 7526 { 7527 xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ()); 7528 7529 if (!flag_pic) 7530 output_asm_insn ("mov%z0\t{%2, %0|%0, %2}", xops); 7531 else 7532 output_asm_insn ("call\t%a2", xops); 7533 7534 #if TARGET_MACHO 7535 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This 7536 is what will be referenced by the Mach-O PIC subsystem. */ 7537 if (!label) 7538 ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME); 7539 #endif 7540 7541 (*targetm.asm_out.internal_label) (asm_out_file, "L", 7542 CODE_LABEL_NUMBER (XEXP (xops[2], 0))); 7543 7544 if (flag_pic) 7545 output_asm_insn ("pop%z0\t%0", xops); 7546 } 7547 else 7548 { 7549 char name[32]; 7550 get_pc_thunk_name (name, REGNO (dest)); 7551 pic_labels_used |= 1 << REGNO (dest); 7552 7553 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name)); 7554 xops[2] = gen_rtx_MEM (QImode, xops[2]); 7555 output_asm_insn ("call\t%X2", xops); 7556 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This 7557 is what will be referenced by the Mach-O PIC subsystem. */ 7558 #if TARGET_MACHO 7559 if (!label) 7560 ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME); 7561 else 7562 targetm.asm_out.internal_label (asm_out_file, "L", 7563 CODE_LABEL_NUMBER (label)); 7564 #endif 7565 } 7566 7567 if (TARGET_MACHO) 7568 return ""; 7569 7570 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION) 7571 output_asm_insn ("add%z0\t{%1, %0|%0, %1}", xops); 7572 else 7573 output_asm_insn ("add%z0\t{%1+[.-%a2], %0|%0, %1+(.-%a2)}", xops); 7574 7575 return ""; 7576 } 7577 7578 /* Generate an "push" pattern for input ARG. */ 7579 7580 static rtx 7581 gen_push (rtx arg) 7582 { 7583 return gen_rtx_SET (VOIDmode, 7584 gen_rtx_MEM (Pmode, 7585 gen_rtx_PRE_DEC (Pmode, 7586 stack_pointer_rtx)), 7587 arg); 7588 } 7589 7590 /* Return >= 0 if there is an unused call-clobbered register available 7591 for the entire function. */ 7592 7593 static unsigned int 7594 ix86_select_alt_pic_regnum (void) 7595 { 7596 if (current_function_is_leaf && !crtl->profile 7597 && !ix86_current_function_calls_tls_descriptor) 7598 { 7599 int i, drap; 7600 /* Can't use the same register for both PIC and DRAP. */ 7601 if (crtl->drap_reg) 7602 drap = REGNO (crtl->drap_reg); 7603 else 7604 drap = -1; 7605 for (i = 2; i >= 0; --i) 7606 if (i != drap && !df_regs_ever_live_p (i)) 7607 return i; 7608 } 7609 7610 return INVALID_REGNUM; 7611 } 7612 7613 /* Return 1 if we need to save REGNO. */ 7614 static int 7615 ix86_save_reg (unsigned int regno, int maybe_eh_return) 7616 { 7617 if (pic_offset_table_rtx 7618 && regno == REAL_PIC_OFFSET_TABLE_REGNUM 7619 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM) 7620 || crtl->profile 7621 || crtl->calls_eh_return 7622 || crtl->uses_const_pool)) 7623 { 7624 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM) 7625 return 0; 7626 return 1; 7627 } 7628 7629 if (crtl->calls_eh_return && maybe_eh_return) 7630 { 7631 unsigned i; 7632 for (i = 0; ; i++) 7633 { 7634 unsigned test = EH_RETURN_DATA_REGNO (i); 7635 if (test == INVALID_REGNUM) 7636 break; 7637 if (test == regno) 7638 return 1; 7639 } 7640 } 7641 7642 if (crtl->drap_reg 7643 && regno == REGNO (crtl->drap_reg)) 7644 return 1; 7645 7646 return (df_regs_ever_live_p (regno) 7647 && !call_used_regs[regno] 7648 && !fixed_regs[regno] 7649 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed)); 7650 } 7651 7652 /* Return number of saved general prupose registers. */ 7653 7654 static int 7655 ix86_nsaved_regs (void) 7656 { 7657 int nregs = 0; 7658 int regno; 7659 7660 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) 7661 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true)) 7662 nregs ++; 7663 return nregs; 7664 } 7665 7666 /* Return number of saved SSE registrers. */ 7667 7668 static int 7669 ix86_nsaved_sseregs (void) 7670 { 7671 int nregs = 0; 7672 int regno; 7673 7674 if (ix86_cfun_abi () != MS_ABI) 7675 return 0; 7676 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) 7677 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true)) 7678 nregs ++; 7679 return nregs; 7680 } 7681 7682 /* Given FROM and TO register numbers, say whether this elimination is 7683 allowed. If stack alignment is needed, we can only replace argument 7684 pointer with hard frame pointer, or replace frame pointer with stack 7685 pointer. Otherwise, frame pointer elimination is automatically 7686 handled and all other eliminations are valid. */ 7687 7688 int 7689 ix86_can_eliminate (int from, int to) 7690 { 7691 if (stack_realign_fp) 7692 return ((from == ARG_POINTER_REGNUM 7693 && to == HARD_FRAME_POINTER_REGNUM) 7694 || (from == FRAME_POINTER_REGNUM 7695 && to == STACK_POINTER_REGNUM)); 7696 else 7697 return to == STACK_POINTER_REGNUM ? !frame_pointer_needed : 1; 7698 } 7699 7700 /* Return the offset between two registers, one to be eliminated, and the other 7701 its replacement, at the start of a routine. */ 7702 7703 HOST_WIDE_INT 7704 ix86_initial_elimination_offset (int from, int to) 7705 { 7706 struct ix86_frame frame; 7707 ix86_compute_frame_layout (&frame); 7708 7709 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM) 7710 return frame.hard_frame_pointer_offset; 7711 else if (from == FRAME_POINTER_REGNUM 7712 && to == HARD_FRAME_POINTER_REGNUM) 7713 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset; 7714 else 7715 { 7716 gcc_assert (to == STACK_POINTER_REGNUM); 7717 7718 if (from == ARG_POINTER_REGNUM) 7719 return frame.stack_pointer_offset; 7720 7721 gcc_assert (from == FRAME_POINTER_REGNUM); 7722 return frame.stack_pointer_offset - frame.frame_pointer_offset; 7723 } 7724 } 7725 7726 /* In a dynamically-aligned function, we can't know the offset from 7727 stack pointer to frame pointer, so we must ensure that setjmp 7728 eliminates fp against the hard fp (%ebp) rather than trying to 7729 index from %esp up to the top of the frame across a gap that is 7730 of unknown (at compile-time) size. */ 7731 static rtx 7732 ix86_builtin_setjmp_frame_value (void) 7733 { 7734 return stack_realign_fp ? hard_frame_pointer_rtx : virtual_stack_vars_rtx; 7735 } 7736 7737 /* Fill structure ix86_frame about frame of currently computed function. */ 7738 7739 static void 7740 ix86_compute_frame_layout (struct ix86_frame *frame) 7741 { 7742 HOST_WIDE_INT total_size; 7743 unsigned int stack_alignment_needed; 7744 HOST_WIDE_INT offset; 7745 unsigned int preferred_alignment; 7746 HOST_WIDE_INT size = get_frame_size (); 7747 7748 frame->nregs = ix86_nsaved_regs (); 7749 frame->nsseregs = ix86_nsaved_sseregs (); 7750 frame->nmsave_args = ix86_nsaved_args (); 7751 total_size = size; 7752 7753 stack_alignment_needed = crtl->stack_alignment_needed / BITS_PER_UNIT; 7754 preferred_alignment = crtl->preferred_stack_boundary / BITS_PER_UNIT; 7755 7756 /* MS ABI seem to require stack alignment to be always 16 except for function 7757 prologues. */ 7758 if (ix86_cfun_abi () == MS_ABI && preferred_alignment < 16) 7759 { 7760 preferred_alignment = 16; 7761 stack_alignment_needed = 16; 7762 crtl->preferred_stack_boundary = 128; 7763 crtl->stack_alignment_needed = 128; 7764 } 7765 7766 gcc_assert (!size || stack_alignment_needed); 7767 gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT); 7768 gcc_assert (preferred_alignment <= stack_alignment_needed); 7769 7770 /* During reload iteration the amount of registers saved can change. 7771 Recompute the value as needed. Do not recompute when amount of registers 7772 didn't change as reload does multiple calls to the function and does not 7773 expect the decision to change within single iteration. */ 7774 if (!optimize_function_for_size_p (cfun) 7775 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs) 7776 { 7777 int count = frame->nregs; 7778 7779 cfun->machine->use_fast_prologue_epilogue_nregs = count; 7780 /* The fast prologue uses move instead of push to save registers. This 7781 is significantly longer, but also executes faster as modern hardware 7782 can execute the moves in parallel, but can't do that for push/pop. 7783 7784 Be careful about choosing what prologue to emit: When function takes 7785 many instructions to execute we may use slow version as well as in 7786 case function is known to be outside hot spot (this is known with 7787 feedback only). Weight the size of function by number of registers 7788 to save as it is cheap to use one or two push instructions but very 7789 slow to use many of them. */ 7790 if (count) 7791 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT; 7792 if (cfun->function_frequency < FUNCTION_FREQUENCY_NORMAL 7793 || (flag_branch_probabilities 7794 && cfun->function_frequency < FUNCTION_FREQUENCY_HOT)) 7795 cfun->machine->use_fast_prologue_epilogue = false; 7796 else 7797 cfun->machine->use_fast_prologue_epilogue 7798 = !expensive_function_p (count); 7799 } 7800 if (TARGET_PROLOGUE_USING_MOVE 7801 && cfun->machine->use_fast_prologue_epilogue) 7802 frame->save_regs_using_mov = true; 7803 else 7804 frame->save_regs_using_mov = false; 7805 7806 if (TARGET_SAVE_ARGS) 7807 { 7808 cfun->machine->use_fast_prologue_epilogue = true; 7809 frame->save_regs_using_mov = true; 7810 } 7811 7812 /* Skip return address and saved base pointer. */ 7813 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD; 7814 7815 frame->hard_frame_pointer_offset = offset; 7816 7817 /* Set offset to aligned because the realigned frame starts from 7818 here. */ 7819 if (stack_realign_fp) 7820 offset = (offset + stack_alignment_needed -1) & -stack_alignment_needed; 7821 7822 /* Argument save area */ 7823 if (TARGET_SAVE_ARGS) 7824 { 7825 offset += frame->nmsave_args * UNITS_PER_WORD; 7826 frame->padding0 = (frame->nmsave_args % 2) * UNITS_PER_WORD; 7827 offset += frame->padding0; 7828 } 7829 else 7830 frame->padding0 = 0; 7831 7832 /* Register save area */ 7833 offset += frame->nregs * UNITS_PER_WORD; 7834 7835 /* Align SSE reg save area. */ 7836 if (frame->nsseregs) 7837 frame->padding05 = ((offset + 16 - 1) & -16) - offset; 7838 else 7839 frame->padding05 = 0; 7840 7841 /* SSE register save area. */ 7842 offset += frame->padding05 + frame->nsseregs * 16; 7843 7844 /* Va-arg area */ 7845 frame->va_arg_size = ix86_varargs_gpr_size + ix86_varargs_fpr_size; 7846 offset += frame->va_arg_size; 7847 7848 /* Align start of frame for local function. */ 7849 frame->padding1 = ((offset + stack_alignment_needed - 1) 7850 & -stack_alignment_needed) - offset; 7851 7852 offset += frame->padding1; 7853 7854 /* Frame pointer points here. */ 7855 frame->frame_pointer_offset = offset; 7856 7857 offset += size; 7858 7859 /* Add outgoing arguments area. Can be skipped if we eliminated 7860 all the function calls as dead code. 7861 Skipping is however impossible when function calls alloca. Alloca 7862 expander assumes that last crtl->outgoing_args_size 7863 of stack frame are unused. */ 7864 if (ACCUMULATE_OUTGOING_ARGS 7865 && (!current_function_is_leaf || cfun->calls_alloca 7866 || ix86_current_function_calls_tls_descriptor)) 7867 { 7868 offset += crtl->outgoing_args_size; 7869 frame->outgoing_arguments_size = crtl->outgoing_args_size; 7870 } 7871 else 7872 frame->outgoing_arguments_size = 0; 7873 7874 /* Align stack boundary. Only needed if we're calling another function 7875 or using alloca. */ 7876 if (!current_function_is_leaf || cfun->calls_alloca 7877 || ix86_current_function_calls_tls_descriptor) 7878 frame->padding2 = ((offset + preferred_alignment - 1) 7879 & -preferred_alignment) - offset; 7880 else 7881 frame->padding2 = 0; 7882 7883 offset += frame->padding2; 7884 7885 /* We've reached end of stack frame. */ 7886 frame->stack_pointer_offset = offset; 7887 7888 /* Size prologue needs to allocate. */ 7889 frame->to_allocate = 7890 (size + frame->padding1 + frame->padding2 7891 + frame->outgoing_arguments_size + frame->va_arg_size); 7892 7893 if (!TARGET_SAVE_ARGS 7894 && ((!frame->to_allocate && frame->nregs <= 1) 7895 || (TARGET_64BIT 7896 && frame->to_allocate >= (HOST_WIDE_INT) 0x80000000))) 7897 frame->save_regs_using_mov = false; 7898 7899 if (!TARGET_64BIT_MS_ABI && TARGET_RED_ZONE 7900 && current_function_sp_is_unchanging 7901 && current_function_is_leaf 7902 && !ix86_current_function_calls_tls_descriptor) 7903 { 7904 frame->red_zone_size = frame->to_allocate; 7905 if (frame->save_regs_using_mov) 7906 { 7907 frame->red_zone_size 7908 += (frame->nregs + frame->nmsave_args) * UNITS_PER_WORD; 7909 frame->red_zone_size += frame->padding0; 7910 } 7911 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE) 7912 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE; 7913 } 7914 else 7915 frame->red_zone_size = 0; 7916 frame->to_allocate -= frame->red_zone_size; 7917 frame->stack_pointer_offset -= frame->red_zone_size; 7918 #if 0 7919 fprintf (stderr, "\n"); 7920 fprintf (stderr, "size: %ld\n", (long)size); 7921 fprintf (stderr, "nregs: %ld\n", (long)frame->nregs); 7922 fprintf (stderr, "nsseregs: %ld\n", (long)frame->nsseregs); 7923 fprintf (stderr, "nmsave_args: %ld\n", (long)frame->nmsave_args); 7924 fprintf (stderr, "padding0: %ld\n", (long)frame->padding0); 7925 fprintf (stderr, "padding05: %ld\n", (long)frame->padding0); 7926 fprintf (stderr, "alignment1: %ld\n", (long)stack_alignment_needed); 7927 fprintf (stderr, "padding1: %ld\n", (long)frame->padding1); 7928 fprintf (stderr, "va_arg: %ld\n", (long)frame->va_arg_size); 7929 fprintf (stderr, "padding2: %ld\n", (long)frame->padding2); 7930 fprintf (stderr, "to_allocate: %ld\n", (long)frame->to_allocate); 7931 fprintf (stderr, "red_zone_size: %ld\n", (long)frame->red_zone_size); 7932 fprintf (stderr, "frame_pointer_offset: %ld\n", (long)frame->frame_pointer_offset); 7933 fprintf (stderr, "hard_frame_pointer_offset: %ld\n", 7934 (long)frame->hard_frame_pointer_offset); 7935 fprintf (stderr, "stack_pointer_offset: %ld\n", (long)frame->stack_pointer_offset); 7936 fprintf (stderr, "current_function_is_leaf: %ld\n", (long)current_function_is_leaf); 7937 fprintf (stderr, "cfun->calls_alloca: %ld\n", (long)cfun->calls_alloca); 7938 fprintf (stderr, "x86_current_function_calls_tls_descriptor: %ld\n", (long)ix86_current_function_calls_tls_descriptor); 7939 #endif 7940 } 7941 7942 7943 /* Emit code to save registers in the prologue. */ 7944 7945 static void 7946 ix86_emit_save_regs (void) 7947 { 7948 unsigned int regno; 7949 rtx insn; 7950 7951 if (TARGET_SAVE_ARGS) 7952 { 7953 int i; 7954 int nsaved = ix86_nsaved_args (); 7955 int start = cfun->returns_struct; 7956 for (i = start; i < start + nsaved; i++) 7957 { 7958 regno = x86_64_int_parameter_registers[i]; 7959 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno))); 7960 RTX_FRAME_RELATED_P (insn) = 1; 7961 } 7962 if (nsaved % 2 != 0) 7963 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx, 7964 GEN_INT (-UNITS_PER_WORD), -1); 7965 } 7966 7967 for (regno = FIRST_PSEUDO_REGISTER - 1; regno-- > 0; ) 7968 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true)) 7969 { 7970 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno))); 7971 RTX_FRAME_RELATED_P (insn) = 1; 7972 } 7973 } 7974 7975 /* Emit code to save registers using MOV insns. First register 7976 is restored from POINTER + OFFSET. */ 7977 static void 7978 ix86_emit_save_regs_using_mov (rtx pointer, HOST_WIDE_INT offset) 7979 { 7980 unsigned int regno; 7981 rtx insn; 7982 7983 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) 7984 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true)) 7985 { 7986 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer), 7987 Pmode, offset), 7988 gen_rtx_REG (Pmode, regno)); 7989 RTX_FRAME_RELATED_P (insn) = 1; 7990 offset += UNITS_PER_WORD; 7991 } 7992 7993 if (TARGET_SAVE_ARGS) 7994 { 7995 int i; 7996 int nsaved = ix86_nsaved_args (); 7997 int start = cfun->returns_struct; 7998 if (nsaved % 2 != 0) 7999 offset += UNITS_PER_WORD; 8000 for (i = start + nsaved - 1; i >= start; i--) 8001 { 8002 regno = x86_64_int_parameter_registers[i]; 8003 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer), 8004 Pmode, offset), 8005 gen_rtx_REG (Pmode, regno)); 8006 RTX_FRAME_RELATED_P (insn) = 1; 8007 offset += UNITS_PER_WORD; 8008 } 8009 } 8010 } 8011 8012 /* Emit code to save registers using MOV insns. First register 8013 is restored from POINTER + OFFSET. */ 8014 static void 8015 ix86_emit_save_sse_regs_using_mov (rtx pointer, HOST_WIDE_INT offset) 8016 { 8017 unsigned int regno; 8018 rtx insn; 8019 rtx mem; 8020 8021 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) 8022 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true)) 8023 { 8024 mem = adjust_address (gen_rtx_MEM (TImode, pointer), TImode, offset); 8025 set_mem_align (mem, 128); 8026 insn = emit_move_insn (mem, gen_rtx_REG (TImode, regno)); 8027 RTX_FRAME_RELATED_P (insn) = 1; 8028 offset += 16; 8029 } 8030 } 8031 8032 /* Expand prologue or epilogue stack adjustment. 8033 The pattern exist to put a dependency on all ebp-based memory accesses. 8034 STYLE should be negative if instructions should be marked as frame related, 8035 zero if %r11 register is live and cannot be freely used and positive 8036 otherwise. */ 8037 8038 static void 8039 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset, int style) 8040 { 8041 rtx insn; 8042 8043 if (! TARGET_64BIT) 8044 insn = emit_insn (gen_pro_epilogue_adjust_stack_1 (dest, src, offset)); 8045 else if (x86_64_immediate_operand (offset, DImode)) 8046 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64 (dest, src, offset)); 8047 else 8048 { 8049 rtx r11; 8050 /* r11 is used by indirect sibcall return as well, set before the 8051 epilogue and used after the epilogue. ATM indirect sibcall 8052 shouldn't be used together with huge frame sizes in one 8053 function because of the frame_size check in sibcall.c. */ 8054 gcc_assert (style); 8055 r11 = gen_rtx_REG (DImode, R11_REG); 8056 insn = emit_insn (gen_rtx_SET (DImode, r11, offset)); 8057 if (style < 0) 8058 RTX_FRAME_RELATED_P (insn) = 1; 8059 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64_2 (dest, src, r11, 8060 offset)); 8061 } 8062 if (style < 0) 8063 RTX_FRAME_RELATED_P (insn) = 1; 8064 } 8065 8066 /* Find an available register to be used as dynamic realign argument 8067 pointer regsiter. Such a register will be written in prologue and 8068 used in begin of body, so it must not be 8069 1. parameter passing register. 8070 2. GOT pointer. 8071 We reuse static-chain register if it is available. Otherwise, we 8072 use DI for i386 and R13 for x86-64. We chose R13 since it has 8073 shorter encoding. 8074 8075 Return: the regno of chosen register. */ 8076 8077 static unsigned int 8078 find_drap_reg (void) 8079 { 8080 tree decl = cfun->decl; 8081 8082 if (TARGET_64BIT) 8083 { 8084 /* Use R13 for nested function or function need static chain. 8085 Since function with tail call may use any caller-saved 8086 registers in epilogue, DRAP must not use caller-saved 8087 register in such case. */ 8088 if ((decl_function_context (decl) 8089 && !DECL_NO_STATIC_CHAIN (decl)) 8090 || crtl->tail_call_emit) 8091 return R13_REG; 8092 8093 return R10_REG; 8094 } 8095 else 8096 { 8097 /* Use DI for nested function or function need static chain. 8098 Since function with tail call may use any caller-saved 8099 registers in epilogue, DRAP must not use caller-saved 8100 register in such case. */ 8101 if ((decl_function_context (decl) 8102 && !DECL_NO_STATIC_CHAIN (decl)) 8103 || crtl->tail_call_emit) 8104 return DI_REG; 8105 8106 /* Reuse static chain register if it isn't used for parameter 8107 passing. */ 8108 if (ix86_function_regparm (TREE_TYPE (decl), decl) <= 2 8109 && !lookup_attribute ("fastcall", 8110 TYPE_ATTRIBUTES (TREE_TYPE (decl)))) 8111 return CX_REG; 8112 else 8113 return DI_REG; 8114 } 8115 } 8116 8117 /* Update incoming stack boundary and estimated stack alignment. */ 8118 8119 static void 8120 ix86_update_stack_boundary (void) 8121 { 8122 /* Prefer the one specified at command line. */ 8123 ix86_incoming_stack_boundary 8124 = (ix86_user_incoming_stack_boundary 8125 ? ix86_user_incoming_stack_boundary 8126 : ix86_default_incoming_stack_boundary); 8127 8128 /* Incoming stack alignment can be changed on individual functions 8129 via force_align_arg_pointer attribute. We use the smallest 8130 incoming stack boundary. */ 8131 if (ix86_incoming_stack_boundary > MIN_STACK_BOUNDARY 8132 && lookup_attribute (ix86_force_align_arg_pointer_string, 8133 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl)))) 8134 ix86_incoming_stack_boundary = MIN_STACK_BOUNDARY; 8135 8136 /* The incoming stack frame has to be aligned at least at 8137 parm_stack_boundary. */ 8138 if (ix86_incoming_stack_boundary < crtl->parm_stack_boundary) 8139 ix86_incoming_stack_boundary = crtl->parm_stack_boundary; 8140 8141 /* Stack at entrance of main is aligned by runtime. We use the 8142 smallest incoming stack boundary. */ 8143 if (ix86_incoming_stack_boundary > MAIN_STACK_BOUNDARY 8144 && DECL_NAME (current_function_decl) 8145 && MAIN_NAME_P (DECL_NAME (current_function_decl)) 8146 && DECL_FILE_SCOPE_P (current_function_decl)) 8147 ix86_incoming_stack_boundary = MAIN_STACK_BOUNDARY; 8148 8149 /* x86_64 vararg needs 16byte stack alignment for register save 8150 area. */ 8151 if (TARGET_64BIT 8152 && cfun->stdarg 8153 && crtl->stack_alignment_estimated < 128) 8154 crtl->stack_alignment_estimated = 128; 8155 } 8156 8157 /* Handle the TARGET_GET_DRAP_RTX hook. Return NULL if no DRAP is 8158 needed or an rtx for DRAP otherwise. */ 8159 8160 static rtx 8161 ix86_get_drap_rtx (void) 8162 { 8163 if (ix86_force_drap || !ACCUMULATE_OUTGOING_ARGS) 8164 crtl->need_drap = true; 8165 8166 if (stack_realign_drap) 8167 { 8168 /* Assign DRAP to vDRAP and returns vDRAP */ 8169 unsigned int regno = find_drap_reg (); 8170 rtx drap_vreg; 8171 rtx arg_ptr; 8172 rtx seq, insn; 8173 8174 arg_ptr = gen_rtx_REG (Pmode, regno); 8175 crtl->drap_reg = arg_ptr; 8176 8177 start_sequence (); 8178 drap_vreg = copy_to_reg (arg_ptr); 8179 seq = get_insns (); 8180 end_sequence (); 8181 8182 insn = emit_insn_before (seq, NEXT_INSN (entry_of_function ())); 8183 RTX_FRAME_RELATED_P (insn) = 1; 8184 return drap_vreg; 8185 } 8186 else 8187 return NULL; 8188 } 8189 8190 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */ 8191 8192 static rtx 8193 ix86_internal_arg_pointer (void) 8194 { 8195 return virtual_incoming_args_rtx; 8196 } 8197 8198 /* Handle the TARGET_DWARF_HANDLE_FRAME_UNSPEC hook. 8199 This is called from dwarf2out.c to emit call frame instructions 8200 for frame-related insns containing UNSPECs and UNSPEC_VOLATILEs. */ 8201 static void 8202 ix86_dwarf_handle_frame_unspec (const char *label, rtx pattern, int index) 8203 { 8204 rtx unspec = SET_SRC (pattern); 8205 gcc_assert (GET_CODE (unspec) == UNSPEC); 8206 8207 switch (index) 8208 { 8209 case UNSPEC_REG_SAVE: 8210 dwarf2out_reg_save_reg (label, XVECEXP (unspec, 0, 0), 8211 SET_DEST (pattern)); 8212 break; 8213 case UNSPEC_DEF_CFA: 8214 dwarf2out_def_cfa (label, REGNO (SET_DEST (pattern)), 8215 INTVAL (XVECEXP (unspec, 0, 0))); 8216 break; 8217 default: 8218 gcc_unreachable (); 8219 } 8220 } 8221 8222 /* Finalize stack_realign_needed flag, which will guide prologue/epilogue 8223 to be generated in correct form. */ 8224 static void 8225 ix86_finalize_stack_realign_flags (void) 8226 { 8227 /* Check if stack realign is really needed after reload, and 8228 stores result in cfun */ 8229 unsigned int incoming_stack_boundary 8230 = (crtl->parm_stack_boundary > ix86_incoming_stack_boundary 8231 ? crtl->parm_stack_boundary : ix86_incoming_stack_boundary); 8232 unsigned int stack_realign = (incoming_stack_boundary 8233 < (current_function_is_leaf 8234 ? crtl->max_used_stack_slot_alignment 8235 : crtl->stack_alignment_needed)); 8236 8237 if (crtl->stack_realign_finalized) 8238 { 8239 /* After stack_realign_needed is finalized, we can't no longer 8240 change it. */ 8241 gcc_assert (crtl->stack_realign_needed == stack_realign); 8242 } 8243 else 8244 { 8245 crtl->stack_realign_needed = stack_realign; 8246 crtl->stack_realign_finalized = true; 8247 } 8248 } 8249 8250 /* Expand the prologue into a bunch of separate insns. */ 8251 8252 void 8253 ix86_expand_prologue (void) 8254 { 8255 rtx insn; 8256 bool pic_reg_used; 8257 struct ix86_frame frame; 8258 HOST_WIDE_INT allocate; 8259 8260 ix86_finalize_stack_realign_flags (); 8261 8262 /* DRAP should not coexist with stack_realign_fp */ 8263 gcc_assert (!(crtl->drap_reg && stack_realign_fp)); 8264 8265 ix86_compute_frame_layout (&frame); 8266 8267 /* Emit prologue code to adjust stack alignment and setup DRAP, in case 8268 of DRAP is needed and stack realignment is really needed after reload */ 8269 if (crtl->drap_reg && crtl->stack_realign_needed) 8270 { 8271 rtx x, y; 8272 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT; 8273 int param_ptr_offset = (call_used_regs[REGNO (crtl->drap_reg)] 8274 ? 0 : UNITS_PER_WORD); 8275 8276 gcc_assert (stack_realign_drap); 8277 8278 /* Grab the argument pointer. */ 8279 x = plus_constant (stack_pointer_rtx, 8280 (UNITS_PER_WORD + param_ptr_offset)); 8281 y = crtl->drap_reg; 8282 8283 /* Only need to push parameter pointer reg if it is caller 8284 saved reg */ 8285 if (!call_used_regs[REGNO (crtl->drap_reg)]) 8286 { 8287 /* Push arg pointer reg */ 8288 insn = emit_insn (gen_push (y)); 8289 RTX_FRAME_RELATED_P (insn) = 1; 8290 } 8291 8292 insn = emit_insn (gen_rtx_SET (VOIDmode, y, x)); 8293 RTX_FRAME_RELATED_P (insn) = 1; 8294 8295 /* Align the stack. */ 8296 insn = emit_insn ((*ix86_gen_andsp) (stack_pointer_rtx, 8297 stack_pointer_rtx, 8298 GEN_INT (-align_bytes))); 8299 RTX_FRAME_RELATED_P (insn) = 1; 8300 8301 /* Replicate the return address on the stack so that return 8302 address can be reached via (argp - 1) slot. This is needed 8303 to implement macro RETURN_ADDR_RTX and intrinsic function 8304 expand_builtin_return_addr etc. */ 8305 x = crtl->drap_reg; 8306 x = gen_frame_mem (Pmode, 8307 plus_constant (x, -UNITS_PER_WORD)); 8308 insn = emit_insn (gen_push (x)); 8309 RTX_FRAME_RELATED_P (insn) = 1; 8310 } 8311 8312 /* Note: AT&T enter does NOT have reversed args. Enter is probably 8313 slower on all targets. Also sdb doesn't like it. */ 8314 8315 if (frame_pointer_needed) 8316 { 8317 insn = emit_insn (gen_push (hard_frame_pointer_rtx)); 8318 RTX_FRAME_RELATED_P (insn) = 1; 8319 8320 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx); 8321 RTX_FRAME_RELATED_P (insn) = 1; 8322 } 8323 8324 if (stack_realign_fp) 8325 { 8326 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT; 8327 gcc_assert (align_bytes > MIN_STACK_BOUNDARY / BITS_PER_UNIT); 8328 8329 /* Align the stack. */ 8330 insn = emit_insn ((*ix86_gen_andsp) (stack_pointer_rtx, 8331 stack_pointer_rtx, 8332 GEN_INT (-align_bytes))); 8333 RTX_FRAME_RELATED_P (insn) = 1; 8334 } 8335 8336 allocate = frame.to_allocate + frame.nsseregs * 16 + frame.padding05; 8337 8338 if (!frame.save_regs_using_mov) 8339 ix86_emit_save_regs (); 8340 else 8341 allocate += (frame.nregs + frame.nmsave_args) * UNITS_PER_WORD 8342 + frame.padding0; 8343 8344 /* When using red zone we may start register saving before allocating 8345 the stack frame saving one cycle of the prologue. However I will 8346 avoid doing this if I am going to have to probe the stack since 8347 at least on x86_64 the stack probe can turn into a call that clobbers 8348 a red zone location */ 8349 if (!TARGET_64BIT_MS_ABI && TARGET_RED_ZONE && frame.save_regs_using_mov 8350 && (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)) 8351 ix86_emit_save_regs_using_mov ((frame_pointer_needed 8352 && !crtl->stack_realign_needed) 8353 ? hard_frame_pointer_rtx 8354 : stack_pointer_rtx, 8355 -(frame.nregs + frame.nmsave_args) 8356 * UNITS_PER_WORD - frame.padding0); 8357 8358 if (allocate == 0) 8359 ; 8360 else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT) 8361 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx, 8362 GEN_INT (-allocate), -1); 8363 else 8364 { 8365 rtx eax = gen_rtx_REG (Pmode, AX_REG); 8366 bool eax_live; 8367 rtx t; 8368 8369 if (cfun->machine->call_abi == MS_ABI) 8370 eax_live = false; 8371 else 8372 eax_live = ix86_eax_live_at_start_p (); 8373 8374 if (eax_live) 8375 { 8376 emit_insn (gen_push (eax)); 8377 allocate -= UNITS_PER_WORD; 8378 } 8379 8380 emit_move_insn (eax, GEN_INT (allocate)); 8381 8382 if (TARGET_64BIT) 8383 insn = gen_allocate_stack_worker_64 (eax, eax); 8384 else 8385 insn = gen_allocate_stack_worker_32 (eax, eax); 8386 insn = emit_insn (insn); 8387 RTX_FRAME_RELATED_P (insn) = 1; 8388 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (-allocate)); 8389 t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t); 8390 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, 8391 t, REG_NOTES (insn)); 8392 8393 if (eax_live) 8394 { 8395 if (frame_pointer_needed) 8396 t = plus_constant (hard_frame_pointer_rtx, 8397 allocate 8398 - frame.to_allocate 8399 - frame.nregs * UNITS_PER_WORD); 8400 else 8401 t = plus_constant (stack_pointer_rtx, allocate); 8402 emit_move_insn (eax, gen_rtx_MEM (Pmode, t)); 8403 } 8404 } 8405 8406 if (frame.save_regs_using_mov 8407 && !(!TARGET_64BIT_MS_ABI && TARGET_RED_ZONE 8408 && (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT))) 8409 { 8410 if (!TARGET_SAVE_ARGS && 8411 (!frame_pointer_needed 8412 || !(frame.to_allocate + frame.padding05) 8413 || crtl->stack_realign_needed)) 8414 ix86_emit_save_regs_using_mov (stack_pointer_rtx, 8415 frame.to_allocate 8416 + frame.nsseregs * 16 + frame.padding05); 8417 else 8418 /* XXX: Does this need help for SSE? */ 8419 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx, 8420 -(frame.nregs + frame.nmsave_args) 8421 * UNITS_PER_WORD - frame.padding0); 8422 } 8423 /* XXX: Does these need help for save-args? */ 8424 if (!frame_pointer_needed 8425 || !(frame.to_allocate + frame.padding0) 8426 || crtl->stack_realign_needed) 8427 ix86_emit_save_sse_regs_using_mov (stack_pointer_rtx, 8428 frame.to_allocate); 8429 else 8430 ix86_emit_save_sse_regs_using_mov (hard_frame_pointer_rtx, 8431 - frame.nregs * UNITS_PER_WORD 8432 - frame.nsseregs * 16 8433 - frame.padding05); 8434 8435 pic_reg_used = false; 8436 if (pic_offset_table_rtx 8437 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM) 8438 || crtl->profile)) 8439 { 8440 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum (); 8441 8442 if (alt_pic_reg_used != INVALID_REGNUM) 8443 SET_REGNO (pic_offset_table_rtx, alt_pic_reg_used); 8444 8445 pic_reg_used = true; 8446 } 8447 8448 if (pic_reg_used) 8449 { 8450 if (TARGET_64BIT) 8451 { 8452 if (ix86_cmodel == CM_LARGE_PIC) 8453 { 8454 rtx tmp_reg = gen_rtx_REG (DImode, R11_REG); 8455 rtx label = gen_label_rtx (); 8456 emit_label (label); 8457 LABEL_PRESERVE_P (label) = 1; 8458 gcc_assert (REGNO (pic_offset_table_rtx) != REGNO (tmp_reg)); 8459 insn = emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx, label)); 8460 insn = emit_insn (gen_set_got_offset_rex64 (tmp_reg, label)); 8461 insn = emit_insn (gen_adddi3 (pic_offset_table_rtx, 8462 pic_offset_table_rtx, tmp_reg)); 8463 } 8464 else 8465 insn = emit_insn (gen_set_got_rex64 (pic_offset_table_rtx)); 8466 } 8467 else 8468 insn = emit_insn (gen_set_got (pic_offset_table_rtx)); 8469 } 8470 8471 /* In the pic_reg_used case, make sure that the got load isn't deleted 8472 when mcount needs it. Blockage to avoid call movement across mcount 8473 call is emitted in generic code after the NOTE_INSN_PROLOGUE_END 8474 note. */ 8475 if (crtl->profile && pic_reg_used) 8476 emit_insn (gen_prologue_use (pic_offset_table_rtx)); 8477 8478 if (crtl->drap_reg && !crtl->stack_realign_needed) 8479 { 8480 /* vDRAP is setup but after reload it turns out stack realign 8481 isn't necessary, here we will emit prologue to setup DRAP 8482 without stack realign adjustment */ 8483 int drap_bp_offset = UNITS_PER_WORD * 2; 8484 rtx x = plus_constant (hard_frame_pointer_rtx, drap_bp_offset); 8485 insn = emit_insn (gen_rtx_SET (VOIDmode, crtl->drap_reg, x)); 8486 } 8487 8488 /* Prevent instructions from being scheduled into register save push 8489 sequence when access to the redzone area is done through frame pointer. 8490 The offset betweeh the frame pointer and the stack pointer is calculated 8491 relative to the value of the stack pointer at the end of the function 8492 prologue, and moving instructions that access redzone area via frame 8493 pointer inside push sequence violates this assumption. */ 8494 if (frame_pointer_needed && frame.red_zone_size) 8495 emit_insn (gen_memory_blockage ()); 8496 8497 /* Emit cld instruction if stringops are used in the function. */ 8498 if (TARGET_CLD && ix86_current_function_needs_cld) 8499 emit_insn (gen_cld ()); 8500 } 8501 8502 /* Emit code to restore saved registers using MOV insns. First register 8503 is restored from POINTER + OFFSET. */ 8504 static void 8505 ix86_emit_restore_regs_using_mov (rtx pointer, HOST_WIDE_INT offset, 8506 int maybe_eh_return) 8507 { 8508 int regno; 8509 rtx base_address = gen_rtx_MEM (Pmode, pointer); 8510 8511 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) 8512 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return)) 8513 { 8514 /* Ensure that adjust_address won't be forced to produce pointer 8515 out of range allowed by x86-64 instruction set. */ 8516 if (TARGET_64BIT && offset != trunc_int_for_mode (offset, SImode)) 8517 { 8518 rtx r11; 8519 8520 r11 = gen_rtx_REG (DImode, R11_REG); 8521 emit_move_insn (r11, GEN_INT (offset)); 8522 emit_insn (gen_adddi3 (r11, r11, pointer)); 8523 base_address = gen_rtx_MEM (Pmode, r11); 8524 offset = 0; 8525 } 8526 emit_move_insn (gen_rtx_REG (Pmode, regno), 8527 adjust_address (base_address, Pmode, offset)); 8528 offset += UNITS_PER_WORD; 8529 } 8530 } 8531 8532 /* Emit code to restore saved registers using MOV insns. First register 8533 is restored from POINTER + OFFSET. */ 8534 static void 8535 ix86_emit_restore_sse_regs_using_mov (rtx pointer, HOST_WIDE_INT offset, 8536 int maybe_eh_return) 8537 { 8538 int regno; 8539 rtx base_address = gen_rtx_MEM (TImode, pointer); 8540 rtx mem; 8541 8542 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) 8543 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return)) 8544 { 8545 /* Ensure that adjust_address won't be forced to produce pointer 8546 out of range allowed by x86-64 instruction set. */ 8547 if (TARGET_64BIT && offset != trunc_int_for_mode (offset, SImode)) 8548 { 8549 rtx r11; 8550 8551 r11 = gen_rtx_REG (DImode, R11_REG); 8552 emit_move_insn (r11, GEN_INT (offset)); 8553 emit_insn (gen_adddi3 (r11, r11, pointer)); 8554 base_address = gen_rtx_MEM (TImode, r11); 8555 offset = 0; 8556 } 8557 mem = adjust_address (base_address, TImode, offset); 8558 set_mem_align (mem, 128); 8559 emit_move_insn (gen_rtx_REG (TImode, regno), mem); 8560 offset += 16; 8561 } 8562 } 8563 8564 /* Restore function stack, frame, and registers. */ 8565 8566 void 8567 ix86_expand_epilogue (int style) 8568 { 8569 int regno; 8570 int sp_valid; 8571 struct ix86_frame frame; 8572 HOST_WIDE_INT offset; 8573 8574 ix86_finalize_stack_realign_flags (); 8575 8576 /* When stack is realigned, SP must be valid. */ 8577 sp_valid = (!frame_pointer_needed 8578 || current_function_sp_is_unchanging 8579 || stack_realign_fp); 8580 8581 ix86_compute_frame_layout (&frame); 8582 8583 /* See the comment about red zone and frame 8584 pointer usage in ix86_expand_prologue. */ 8585 if (frame_pointer_needed && frame.red_zone_size) 8586 emit_insn (gen_memory_blockage ()); 8587 8588 /* Calculate start of saved registers relative to ebp. Special care 8589 must be taken for the normal return case of a function using 8590 eh_return: the eax and edx registers are marked as saved, but not 8591 restored along this path. */ 8592 offset = frame.nregs + frame.nmsave_args; 8593 if (crtl->calls_eh_return && style != 2) 8594 offset -= 2; 8595 offset *= -UNITS_PER_WORD; 8596 offset -= frame.nsseregs * 16 + frame.padding05 + frame.padding0; 8597 8598 /* If we're only restoring one register and sp is not valid then 8599 using a move instruction to restore the register since it's 8600 less work than reloading sp and popping the register. 8601 8602 The default code result in stack adjustment using add/lea instruction, 8603 while this code results in LEAVE instruction (or discrete equivalent), 8604 so it is profitable in some other cases as well. Especially when there 8605 are no registers to restore. We also use this code when TARGET_USE_LEAVE 8606 and there is exactly one register to pop. This heuristic may need some 8607 tuning in future. */ 8608 if ((!sp_valid && (frame.nregs + frame.nsseregs) <= 1) 8609 || (TARGET_EPILOGUE_USING_MOVE 8610 && cfun->machine->use_fast_prologue_epilogue 8611 && ((frame.nregs + frame.nsseregs) > 1 8612 || (frame.to_allocate + frame.padding0) != 0)) 8613 || (frame_pointer_needed && !(frame.nregs + frame.nsseregs) 8614 && (frame.to_allocate + frame.padding0) != 0) 8615 || (frame_pointer_needed && TARGET_USE_LEAVE 8616 && cfun->machine->use_fast_prologue_epilogue 8617 && (frame.nregs + frame.nsseregs) == 1) 8618 || crtl->calls_eh_return) 8619 { 8620 /* Restore registers. We can use ebp or esp to address the memory 8621 locations. If both are available, default to ebp, since offsets 8622 are known to be small. Only exception is esp pointing directly 8623 to the end of block of saved registers, where we may simplify 8624 addressing mode. 8625 8626 If we are realigning stack with bp and sp, regs restore can't 8627 be addressed by bp. sp must be used instead. */ 8628 8629 if (!frame_pointer_needed 8630 || (sp_valid && !(frame.to_allocate + frame.padding0)) 8631 || stack_realign_fp) 8632 { 8633 ix86_emit_restore_sse_regs_using_mov (stack_pointer_rtx, 8634 frame.to_allocate, style == 2); 8635 ix86_emit_restore_regs_using_mov (stack_pointer_rtx, 8636 frame.to_allocate 8637 + frame.nsseregs * 16 8638 + frame.padding05, style == 2); 8639 } 8640 else 8641 { 8642 ix86_emit_restore_sse_regs_using_mov (hard_frame_pointer_rtx, 8643 offset, style == 2); 8644 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx, 8645 offset 8646 + frame.nsseregs * 16 8647 + frame.padding05, style == 2); 8648 } 8649 8650 /* eh_return epilogues need %ecx added to the stack pointer. */ 8651 if (style == 2) 8652 { 8653 rtx tmp, sa = EH_RETURN_STACKADJ_RTX; 8654 8655 /* Stack align doesn't work with eh_return. */ 8656 gcc_assert (!crtl->stack_realign_needed); 8657 8658 if (frame_pointer_needed) 8659 { 8660 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa); 8661 tmp = plus_constant (tmp, UNITS_PER_WORD); 8662 emit_insn (gen_rtx_SET (VOIDmode, sa, tmp)); 8663 8664 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx); 8665 emit_move_insn (hard_frame_pointer_rtx, tmp); 8666 8667 pro_epilogue_adjust_stack (stack_pointer_rtx, sa, 8668 const0_rtx, style); 8669 } 8670 else 8671 { 8672 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa); 8673 tmp = plus_constant (tmp, (frame.to_allocate 8674 + (frame.nregs + frame.nmsave_args) 8675 * UNITS_PER_WORD 8676 + frame.nsseregs * 16 8677 + frame.padding05 + frame.padding0)); 8678 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp)); 8679 } 8680 } 8681 else if (!frame_pointer_needed) 8682 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx, 8683 GEN_INT (frame.to_allocate 8684 + (frame.nregs + frame.nmsave_args) 8685 * UNITS_PER_WORD 8686 + frame.nsseregs * 16 8687 + frame.padding05 + frame.padding0), 8688 style); 8689 /* If not an i386, mov & pop is faster than "leave". */ 8690 else if (TARGET_USE_LEAVE || optimize_function_for_size_p (cfun) 8691 || !cfun->machine->use_fast_prologue_epilogue) 8692 emit_insn ((*ix86_gen_leave) ()); 8693 else 8694 { 8695 pro_epilogue_adjust_stack (stack_pointer_rtx, 8696 hard_frame_pointer_rtx, 8697 const0_rtx, style); 8698 8699 emit_insn ((*ix86_gen_pop1) (hard_frame_pointer_rtx)); 8700 } 8701 } 8702 else 8703 { 8704 /* First step is to deallocate the stack frame so that we can 8705 pop the registers. 8706 8707 If we realign stack with frame pointer, then stack pointer 8708 won't be able to recover via lea $offset(%bp), %sp, because 8709 there is a padding area between bp and sp for realign. 8710 "add $to_allocate, %sp" must be used instead. */ 8711 if (!sp_valid) 8712 { 8713 gcc_assert (frame_pointer_needed); 8714 gcc_assert (!stack_realign_fp); 8715 pro_epilogue_adjust_stack (stack_pointer_rtx, 8716 hard_frame_pointer_rtx, 8717 GEN_INT (offset), style); 8718 ix86_emit_restore_sse_regs_using_mov (stack_pointer_rtx, 8719 0, style == 2); 8720 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx, 8721 GEN_INT (frame.nsseregs * 16 + 8722 frame.padding0), style); 8723 } 8724 else if (frame.to_allocate || frame.padding0 || frame.nsseregs) 8725 { 8726 ix86_emit_restore_sse_regs_using_mov (stack_pointer_rtx, 8727 frame.to_allocate, 8728 style == 2); 8729 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx, 8730 GEN_INT (frame.to_allocate 8731 + frame.nsseregs * 16 8732 + frame.padding05), style); 8733 } 8734 8735 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) 8736 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, false)) 8737 emit_insn ((*ix86_gen_pop1) (gen_rtx_REG (Pmode, regno))); 8738 8739 /* XXX: Needs adjustment for SSE regs? */ 8740 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx, 8741 GEN_INT (frame.nmsave_args * UNITS_PER_WORD 8742 + frame.padding0), style); 8743 if (frame_pointer_needed) 8744 { 8745 /* Leave results in shorter dependency chains on CPUs that are 8746 able to grok it fast. */ 8747 if (TARGET_USE_LEAVE) 8748 emit_insn ((*ix86_gen_leave) ()); 8749 else 8750 { 8751 /* For stack realigned really happens, recover stack 8752 pointer to hard frame pointer is a must, if not using 8753 leave. */ 8754 if (stack_realign_fp) 8755 pro_epilogue_adjust_stack (stack_pointer_rtx, 8756 hard_frame_pointer_rtx, 8757 const0_rtx, style); 8758 emit_insn ((*ix86_gen_pop1) (hard_frame_pointer_rtx)); 8759 } 8760 } 8761 } 8762 8763 if (crtl->drap_reg && crtl->stack_realign_needed) 8764 { 8765 int param_ptr_offset = (call_used_regs[REGNO (crtl->drap_reg)] 8766 ? 0 : UNITS_PER_WORD); 8767 gcc_assert (stack_realign_drap); 8768 emit_insn ((*ix86_gen_add3) (stack_pointer_rtx, 8769 crtl->drap_reg, 8770 GEN_INT (-(UNITS_PER_WORD 8771 + param_ptr_offset)))); 8772 if (!call_used_regs[REGNO (crtl->drap_reg)]) 8773 emit_insn ((*ix86_gen_pop1) (crtl->drap_reg)); 8774 8775 } 8776 8777 /* Sibcall epilogues don't want a return instruction. */ 8778 if (style == 0) 8779 return; 8780 8781 if (crtl->args.pops_args && crtl->args.size) 8782 { 8783 rtx popc = GEN_INT (crtl->args.pops_args); 8784 8785 /* i386 can only pop 64K bytes. If asked to pop more, pop 8786 return address, do explicit add, and jump indirectly to the 8787 caller. */ 8788 8789 if (crtl->args.pops_args >= 65536) 8790 { 8791 rtx ecx = gen_rtx_REG (SImode, CX_REG); 8792 8793 /* There is no "pascal" calling convention in any 64bit ABI. */ 8794 gcc_assert (!TARGET_64BIT); 8795 8796 emit_insn (gen_popsi1 (ecx)); 8797 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc)); 8798 emit_jump_insn (gen_return_indirect_internal (ecx)); 8799 } 8800 else 8801 emit_jump_insn (gen_return_pop_internal (popc)); 8802 } 8803 else 8804 emit_jump_insn (gen_return_internal ()); 8805 } 8806 8807 /* Reset from the function's potential modifications. */ 8808 8809 static void 8810 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED, 8811 HOST_WIDE_INT size ATTRIBUTE_UNUSED) 8812 { 8813 if (pic_offset_table_rtx) 8814 SET_REGNO (pic_offset_table_rtx, REAL_PIC_OFFSET_TABLE_REGNUM); 8815 #if TARGET_MACHO 8816 /* Mach-O doesn't support labels at the end of objects, so if 8817 it looks like we might want one, insert a NOP. */ 8818 { 8819 rtx insn = get_last_insn (); 8820 while (insn 8821 && NOTE_P (insn) 8822 && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL) 8823 insn = PREV_INSN (insn); 8824 if (insn 8825 && (LABEL_P (insn) 8826 || (NOTE_P (insn) 8827 && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL))) 8828 fputs ("\tnop\n", file); 8829 } 8830 #endif 8831 8832 } 8833 8834 /* Extract the parts of an RTL expression that is a valid memory address 8835 for an instruction. Return 0 if the structure of the address is 8836 grossly off. Return -1 if the address contains ASHIFT, so it is not 8837 strictly valid, but still used for computing length of lea instruction. */ 8838 8839 int 8840 ix86_decompose_address (rtx addr, struct ix86_address *out) 8841 { 8842 rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX; 8843 rtx base_reg, index_reg; 8844 HOST_WIDE_INT scale = 1; 8845 rtx scale_rtx = NULL_RTX; 8846 int retval = 1; 8847 enum ix86_address_seg seg = SEG_DEFAULT; 8848 8849 if (REG_P (addr) || GET_CODE (addr) == SUBREG) 8850 base = addr; 8851 else if (GET_CODE (addr) == PLUS) 8852 { 8853 rtx addends[4], op; 8854 int n = 0, i; 8855 8856 op = addr; 8857 do 8858 { 8859 if (n >= 4) 8860 return 0; 8861 addends[n++] = XEXP (op, 1); 8862 op = XEXP (op, 0); 8863 } 8864 while (GET_CODE (op) == PLUS); 8865 if (n >= 4) 8866 return 0; 8867 addends[n] = op; 8868 8869 for (i = n; i >= 0; --i) 8870 { 8871 op = addends[i]; 8872 switch (GET_CODE (op)) 8873 { 8874 case MULT: 8875 if (index) 8876 return 0; 8877 index = XEXP (op, 0); 8878 scale_rtx = XEXP (op, 1); 8879 break; 8880 8881 case UNSPEC: 8882 if (XINT (op, 1) == UNSPEC_TP 8883 && TARGET_TLS_DIRECT_SEG_REFS 8884 && seg == SEG_DEFAULT) 8885 seg = TARGET_64BIT ? SEG_FS : SEG_GS; 8886 else 8887 return 0; 8888 break; 8889 8890 case REG: 8891 case SUBREG: 8892 if (!base) 8893 base = op; 8894 else if (!index) 8895 index = op; 8896 else 8897 return 0; 8898 break; 8899 8900 case CONST: 8901 case CONST_INT: 8902 case SYMBOL_REF: 8903 case LABEL_REF: 8904 if (disp) 8905 return 0; 8906 disp = op; 8907 break; 8908 8909 default: 8910 return 0; 8911 } 8912 } 8913 } 8914 else if (GET_CODE (addr) == MULT) 8915 { 8916 index = XEXP (addr, 0); /* index*scale */ 8917 scale_rtx = XEXP (addr, 1); 8918 } 8919 else if (GET_CODE (addr) == ASHIFT) 8920 { 8921 rtx tmp; 8922 8923 /* We're called for lea too, which implements ashift on occasion. */ 8924 index = XEXP (addr, 0); 8925 tmp = XEXP (addr, 1); 8926 if (!CONST_INT_P (tmp)) 8927 return 0; 8928 scale = INTVAL (tmp); 8929 if ((unsigned HOST_WIDE_INT) scale > 3) 8930 return 0; 8931 scale = 1 << scale; 8932 retval = -1; 8933 } 8934 else 8935 disp = addr; /* displacement */ 8936 8937 /* Extract the integral value of scale. */ 8938 if (scale_rtx) 8939 { 8940 if (!CONST_INT_P (scale_rtx)) 8941 return 0; 8942 scale = INTVAL (scale_rtx); 8943 } 8944 8945 base_reg = base && GET_CODE (base) == SUBREG ? SUBREG_REG (base) : base; 8946 index_reg = index && GET_CODE (index) == SUBREG ? SUBREG_REG (index) : index; 8947 8948 /* Allow arg pointer and stack pointer as index if there is not scaling. */ 8949 if (base_reg && index_reg && scale == 1 8950 && (index_reg == arg_pointer_rtx 8951 || index_reg == frame_pointer_rtx 8952 || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM))) 8953 { 8954 rtx tmp; 8955 tmp = base, base = index, index = tmp; 8956 tmp = base_reg, base_reg = index_reg, index_reg = tmp; 8957 } 8958 8959 /* Special case: %ebp cannot be encoded as a base without a displacement. */ 8960 if ((base_reg == hard_frame_pointer_rtx 8961 || base_reg == frame_pointer_rtx 8962 || base_reg == arg_pointer_rtx) && !disp) 8963 disp = const0_rtx; 8964 8965 /* Special case: on K6, [%esi] makes the instruction vector decoded. 8966 Avoid this by transforming to [%esi+0]. 8967 Reload calls address legitimization without cfun defined, so we need 8968 to test cfun for being non-NULL. */ 8969 if (TARGET_K6 && cfun && optimize_function_for_speed_p (cfun) 8970 && base_reg && !index_reg && !disp 8971 && REG_P (base_reg) 8972 && REGNO_REG_CLASS (REGNO (base_reg)) == SIREG) 8973 disp = const0_rtx; 8974 8975 /* Special case: encode reg+reg instead of reg*2. */ 8976 if (!base && index && scale && scale == 2) 8977 base = index, base_reg = index_reg, scale = 1; 8978 8979 /* Special case: scaling cannot be encoded without base or displacement. */ 8980 if (!base && !disp && index && scale != 1) 8981 disp = const0_rtx; 8982 8983 out->base = base; 8984 out->index = index; 8985 out->disp = disp; 8986 out->scale = scale; 8987 out->seg = seg; 8988 8989 return retval; 8990 } 8991 8992 /* Return cost of the memory address x. 8993 For i386, it is better to use a complex address than let gcc copy 8994 the address into a reg and make a new pseudo. But not if the address 8995 requires to two regs - that would mean more pseudos with longer 8996 lifetimes. */ 8997 static int 8998 ix86_address_cost (rtx x, bool speed ATTRIBUTE_UNUSED) 8999 { 9000 struct ix86_address parts; 9001 int cost = 1; 9002 int ok = ix86_decompose_address (x, &parts); 9003 9004 gcc_assert (ok); 9005 9006 if (parts.base && GET_CODE (parts.base) == SUBREG) 9007 parts.base = SUBREG_REG (parts.base); 9008 if (parts.index && GET_CODE (parts.index) == SUBREG) 9009 parts.index = SUBREG_REG (parts.index); 9010 9011 /* Attempt to minimize number of registers in the address. */ 9012 if ((parts.base 9013 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)) 9014 || (parts.index 9015 && (!REG_P (parts.index) 9016 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER))) 9017 cost++; 9018 9019 if (parts.base 9020 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER) 9021 && parts.index 9022 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER) 9023 && parts.base != parts.index) 9024 cost++; 9025 9026 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b, 9027 since it's predecode logic can't detect the length of instructions 9028 and it degenerates to vector decoded. Increase cost of such 9029 addresses here. The penalty is minimally 2 cycles. It may be worthwhile 9030 to split such addresses or even refuse such addresses at all. 9031 9032 Following addressing modes are affected: 9033 [base+scale*index] 9034 [scale*index+disp] 9035 [base+index] 9036 9037 The first and last case may be avoidable by explicitly coding the zero in 9038 memory address, but I don't have AMD-K6 machine handy to check this 9039 theory. */ 9040 9041 if (TARGET_K6 9042 && ((!parts.disp && parts.base && parts.index && parts.scale != 1) 9043 || (parts.disp && !parts.base && parts.index && parts.scale != 1) 9044 || (!parts.disp && parts.base && parts.index && parts.scale == 1))) 9045 cost += 10; 9046 9047 return cost; 9048 } 9049 9050 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as 9051 this is used for to form addresses to local data when -fPIC is in 9052 use. */ 9053 9054 static bool 9055 darwin_local_data_pic (rtx disp) 9056 { 9057 return (GET_CODE (disp) == UNSPEC 9058 && XINT (disp, 1) == UNSPEC_MACHOPIC_OFFSET); 9059 } 9060 9061 /* Determine if a given RTX is a valid constant. We already know this 9062 satisfies CONSTANT_P. */ 9063 9064 bool 9065 legitimate_constant_p (rtx x) 9066 { 9067 switch (GET_CODE (x)) 9068 { 9069 case CONST: 9070 x = XEXP (x, 0); 9071 9072 if (GET_CODE (x) == PLUS) 9073 { 9074 if (!CONST_INT_P (XEXP (x, 1))) 9075 return false; 9076 x = XEXP (x, 0); 9077 } 9078 9079 if (TARGET_MACHO && darwin_local_data_pic (x)) 9080 return true; 9081 9082 /* Only some unspecs are valid as "constants". */ 9083 if (GET_CODE (x) == UNSPEC) 9084 switch (XINT (x, 1)) 9085 { 9086 case UNSPEC_GOT: 9087 case UNSPEC_GOTOFF: 9088 case UNSPEC_PLTOFF: 9089 return TARGET_64BIT; 9090 case UNSPEC_TPOFF: 9091 case UNSPEC_NTPOFF: 9092 x = XVECEXP (x, 0, 0); 9093 return (GET_CODE (x) == SYMBOL_REF 9094 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC); 9095 case UNSPEC_DTPOFF: 9096 x = XVECEXP (x, 0, 0); 9097 return (GET_CODE (x) == SYMBOL_REF 9098 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC); 9099 default: 9100 return false; 9101 } 9102 9103 /* We must have drilled down to a symbol. */ 9104 if (GET_CODE (x) == LABEL_REF) 9105 return true; 9106 if (GET_CODE (x) != SYMBOL_REF) 9107 return false; 9108 /* FALLTHRU */ 9109 9110 case SYMBOL_REF: 9111 /* TLS symbols are never valid. */ 9112 if (SYMBOL_REF_TLS_MODEL (x)) 9113 return false; 9114 9115 /* DLLIMPORT symbols are never valid. */ 9116 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES 9117 && SYMBOL_REF_DLLIMPORT_P (x)) 9118 return false; 9119 break; 9120 9121 case CONST_DOUBLE: 9122 if (GET_MODE (x) == TImode 9123 && x != CONST0_RTX (TImode) 9124 && !TARGET_64BIT) 9125 return false; 9126 break; 9127 9128 case CONST_VECTOR: 9129 if (x == CONST0_RTX (GET_MODE (x))) 9130 return true; 9131 return false; 9132 9133 default: 9134 break; 9135 } 9136 9137 /* Otherwise we handle everything else in the move patterns. */ 9138 return true; 9139 } 9140 9141 /* Determine if it's legal to put X into the constant pool. This 9142 is not possible for the address of thread-local symbols, which 9143 is checked above. */ 9144 9145 static bool 9146 ix86_cannot_force_const_mem (rtx x) 9147 { 9148 /* We can always put integral constants and vectors in memory. */ 9149 switch (GET_CODE (x)) 9150 { 9151 case CONST_INT: 9152 case CONST_DOUBLE: 9153 case CONST_VECTOR: 9154 return false; 9155 9156 default: 9157 break; 9158 } 9159 return !legitimate_constant_p (x); 9160 } 9161 9162 /* Determine if a given RTX is a valid constant address. */ 9163 9164 bool 9165 constant_address_p (rtx x) 9166 { 9167 return CONSTANT_P (x) && legitimate_address_p (Pmode, x, 1); 9168 } 9169 9170 /* Return number of arguments to be saved on the stack with 9171 -msave-args. */ 9172 9173 static int 9174 ix86_nsaved_args (void) 9175 { 9176 if (TARGET_SAVE_ARGS) 9177 return crtl->args.info.regno - cfun->returns_struct; 9178 else 9179 return 0; 9180 } 9181 9182 /* Nonzero if the constant value X is a legitimate general operand 9183 when generating PIC code. It is given that flag_pic is on and 9184 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */ 9185 bool 9186 legitimate_pic_operand_p (rtx x) 9187 { 9188 rtx inner; 9189 9190 switch (GET_CODE (x)) 9191 { 9192 case CONST: 9193 inner = XEXP (x, 0); 9194 if (GET_CODE (inner) == PLUS 9195 && CONST_INT_P (XEXP (inner, 1))) 9196 inner = XEXP (inner, 0); 9197 9198 /* Only some unspecs are valid as "constants". */ 9199 if (GET_CODE (inner) == UNSPEC) 9200 switch (XINT (inner, 1)) 9201 { 9202 case UNSPEC_GOT: 9203 case UNSPEC_GOTOFF: 9204 case UNSPEC_PLTOFF: 9205 return TARGET_64BIT; 9206 case UNSPEC_TPOFF: 9207 x = XVECEXP (inner, 0, 0); 9208 return (GET_CODE (x) == SYMBOL_REF 9209 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC); 9210 case UNSPEC_MACHOPIC_OFFSET: 9211 return legitimate_pic_address_disp_p (x); 9212 default: 9213 return false; 9214 } 9215 /* FALLTHRU */ 9216 9217 case SYMBOL_REF: 9218 case LABEL_REF: 9219 return legitimate_pic_address_disp_p (x); 9220 9221 default: 9222 return true; 9223 } 9224 } 9225 9226 /* Determine if a given CONST RTX is a valid memory displacement 9227 in PIC mode. */ 9228 9229 int 9230 legitimate_pic_address_disp_p (rtx disp) 9231 { 9232 bool saw_plus; 9233 9234 /* In 64bit mode we can allow direct addresses of symbols and labels 9235 when they are not dynamic symbols. */ 9236 if (TARGET_64BIT) 9237 { 9238 rtx op0 = disp, op1; 9239 9240 switch (GET_CODE (disp)) 9241 { 9242 case LABEL_REF: 9243 return true; 9244 9245 case CONST: 9246 if (GET_CODE (XEXP (disp, 0)) != PLUS) 9247 break; 9248 op0 = XEXP (XEXP (disp, 0), 0); 9249 op1 = XEXP (XEXP (disp, 0), 1); 9250 if (!CONST_INT_P (op1) 9251 || INTVAL (op1) >= 16*1024*1024 9252 || INTVAL (op1) < -16*1024*1024) 9253 break; 9254 if (GET_CODE (op0) == LABEL_REF) 9255 return true; 9256 if (GET_CODE (op0) != SYMBOL_REF) 9257 break; 9258 /* FALLTHRU */ 9259 9260 case SYMBOL_REF: 9261 /* TLS references should always be enclosed in UNSPEC. */ 9262 if (SYMBOL_REF_TLS_MODEL (op0)) 9263 return false; 9264 if (!SYMBOL_REF_FAR_ADDR_P (op0) && SYMBOL_REF_LOCAL_P (op0) 9265 && ix86_cmodel != CM_LARGE_PIC) 9266 return true; 9267 break; 9268 9269 default: 9270 break; 9271 } 9272 } 9273 if (GET_CODE (disp) != CONST) 9274 return 0; 9275 disp = XEXP (disp, 0); 9276 9277 if (TARGET_64BIT) 9278 { 9279 /* We are unsafe to allow PLUS expressions. This limit allowed distance 9280 of GOT tables. We should not need these anyway. */ 9281 if (GET_CODE (disp) != UNSPEC 9282 || (XINT (disp, 1) != UNSPEC_GOTPCREL 9283 && XINT (disp, 1) != UNSPEC_GOTOFF 9284 && XINT (disp, 1) != UNSPEC_PLTOFF)) 9285 return 0; 9286 9287 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF 9288 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF) 9289 return 0; 9290 return 1; 9291 } 9292 9293 saw_plus = false; 9294 if (GET_CODE (disp) == PLUS) 9295 { 9296 if (!CONST_INT_P (XEXP (disp, 1))) 9297 return 0; 9298 disp = XEXP (disp, 0); 9299 saw_plus = true; 9300 } 9301 9302 if (TARGET_MACHO && darwin_local_data_pic (disp)) 9303 return 1; 9304 9305 if (GET_CODE (disp) != UNSPEC) 9306 return 0; 9307 9308 switch (XINT (disp, 1)) 9309 { 9310 case UNSPEC_GOT: 9311 if (saw_plus) 9312 return false; 9313 /* We need to check for both symbols and labels because VxWorks loads 9314 text labels with @GOT rather than @GOTOFF. See gotoff_operand for 9315 details. */ 9316 return (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF 9317 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF); 9318 case UNSPEC_GOTOFF: 9319 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used. 9320 While ABI specify also 32bit relocation but we don't produce it in 9321 small PIC model at all. */ 9322 if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF 9323 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF) 9324 && !TARGET_64BIT) 9325 return gotoff_operand (XVECEXP (disp, 0, 0), Pmode); 9326 return false; 9327 case UNSPEC_GOTTPOFF: 9328 case UNSPEC_GOTNTPOFF: 9329 case UNSPEC_INDNTPOFF: 9330 if (saw_plus) 9331 return false; 9332 disp = XVECEXP (disp, 0, 0); 9333 return (GET_CODE (disp) == SYMBOL_REF 9334 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC); 9335 case UNSPEC_NTPOFF: 9336 disp = XVECEXP (disp, 0, 0); 9337 return (GET_CODE (disp) == SYMBOL_REF 9338 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC); 9339 case UNSPEC_DTPOFF: 9340 disp = XVECEXP (disp, 0, 0); 9341 return (GET_CODE (disp) == SYMBOL_REF 9342 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC); 9343 } 9344 9345 return 0; 9346 } 9347 9348 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid 9349 memory address for an instruction. The MODE argument is the machine mode 9350 for the MEM expression that wants to use this address. 9351 9352 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should 9353 convert common non-canonical forms to canonical form so that they will 9354 be recognized. */ 9355 9356 int 9357 legitimate_address_p (enum machine_mode mode ATTRIBUTE_UNUSED, 9358 rtx addr, int strict) 9359 { 9360 struct ix86_address parts; 9361 rtx base, index, disp; 9362 HOST_WIDE_INT scale; 9363 const char *reason = NULL; 9364 rtx reason_rtx = NULL_RTX; 9365 9366 if (ix86_decompose_address (addr, &parts) <= 0) 9367 { 9368 reason = "decomposition failed"; 9369 goto report_error; 9370 } 9371 9372 base = parts.base; 9373 index = parts.index; 9374 disp = parts.disp; 9375 scale = parts.scale; 9376 9377 /* Validate base register. 9378 9379 Don't allow SUBREG's that span more than a word here. It can lead to spill 9380 failures when the base is one word out of a two word structure, which is 9381 represented internally as a DImode int. */ 9382 9383 if (base) 9384 { 9385 rtx reg; 9386 reason_rtx = base; 9387 9388 if (REG_P (base)) 9389 reg = base; 9390 else if (GET_CODE (base) == SUBREG 9391 && REG_P (SUBREG_REG (base)) 9392 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (base))) 9393 <= UNITS_PER_WORD) 9394 reg = SUBREG_REG (base); 9395 else 9396 { 9397 reason = "base is not a register"; 9398 goto report_error; 9399 } 9400 9401 if (GET_MODE (base) != Pmode) 9402 { 9403 reason = "base is not in Pmode"; 9404 goto report_error; 9405 } 9406 9407 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg)) 9408 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg))) 9409 { 9410 reason = "base is not valid"; 9411 goto report_error; 9412 } 9413 } 9414 9415 /* Validate index register. 9416 9417 Don't allow SUBREG's that span more than a word here -- same as above. */ 9418 9419 if (index) 9420 { 9421 rtx reg; 9422 reason_rtx = index; 9423 9424 if (REG_P (index)) 9425 reg = index; 9426 else if (GET_CODE (index) == SUBREG 9427 && REG_P (SUBREG_REG (index)) 9428 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (index))) 9429 <= UNITS_PER_WORD) 9430 reg = SUBREG_REG (index); 9431 else 9432 { 9433 reason = "index is not a register"; 9434 goto report_error; 9435 } 9436 9437 if (GET_MODE (index) != Pmode) 9438 { 9439 reason = "index is not in Pmode"; 9440 goto report_error; 9441 } 9442 9443 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg)) 9444 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg))) 9445 { 9446 reason = "index is not valid"; 9447 goto report_error; 9448 } 9449 } 9450 9451 /* Validate scale factor. */ 9452 if (scale != 1) 9453 { 9454 reason_rtx = GEN_INT (scale); 9455 if (!index) 9456 { 9457 reason = "scale without index"; 9458 goto report_error; 9459 } 9460 9461 if (scale != 2 && scale != 4 && scale != 8) 9462 { 9463 reason = "scale is not a valid multiplier"; 9464 goto report_error; 9465 } 9466 } 9467 9468 /* Validate displacement. */ 9469 if (disp) 9470 { 9471 reason_rtx = disp; 9472 9473 if (GET_CODE (disp) == CONST 9474 && GET_CODE (XEXP (disp, 0)) == UNSPEC 9475 && XINT (XEXP (disp, 0), 1) != UNSPEC_MACHOPIC_OFFSET) 9476 switch (XINT (XEXP (disp, 0), 1)) 9477 { 9478 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when 9479 used. While ABI specify also 32bit relocations, we don't produce 9480 them at all and use IP relative instead. */ 9481 case UNSPEC_GOT: 9482 case UNSPEC_GOTOFF: 9483 gcc_assert (flag_pic); 9484 if (!TARGET_64BIT) 9485 goto is_legitimate_pic; 9486 reason = "64bit address unspec"; 9487 goto report_error; 9488 9489 case UNSPEC_GOTPCREL: 9490 gcc_assert (flag_pic); 9491 goto is_legitimate_pic; 9492 9493 case UNSPEC_GOTTPOFF: 9494 case UNSPEC_GOTNTPOFF: 9495 case UNSPEC_INDNTPOFF: 9496 case UNSPEC_NTPOFF: 9497 case UNSPEC_DTPOFF: 9498 break; 9499 9500 default: 9501 reason = "invalid address unspec"; 9502 goto report_error; 9503 } 9504 9505 else if (SYMBOLIC_CONST (disp) 9506 && (flag_pic 9507 || (TARGET_MACHO 9508 #if TARGET_MACHO 9509 && MACHOPIC_INDIRECT 9510 && !machopic_operand_p (disp) 9511 #endif 9512 ))) 9513 { 9514 9515 is_legitimate_pic: 9516 if (TARGET_64BIT && (index || base)) 9517 { 9518 /* foo@dtpoff(%rX) is ok. */ 9519 if (GET_CODE (disp) != CONST 9520 || GET_CODE (XEXP (disp, 0)) != PLUS 9521 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC 9522 || !CONST_INT_P (XEXP (XEXP (disp, 0), 1)) 9523 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF 9524 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF)) 9525 { 9526 reason = "non-constant pic memory reference"; 9527 goto report_error; 9528 } 9529 } 9530 else if (! legitimate_pic_address_disp_p (disp)) 9531 { 9532 reason = "displacement is an invalid pic construct"; 9533 goto report_error; 9534 } 9535 9536 /* This code used to verify that a symbolic pic displacement 9537 includes the pic_offset_table_rtx register. 9538 9539 While this is good idea, unfortunately these constructs may 9540 be created by "adds using lea" optimization for incorrect 9541 code like: 9542 9543 int a; 9544 int foo(int i) 9545 { 9546 return *(&a+i); 9547 } 9548 9549 This code is nonsensical, but results in addressing 9550 GOT table with pic_offset_table_rtx base. We can't 9551 just refuse it easily, since it gets matched by 9552 "addsi3" pattern, that later gets split to lea in the 9553 case output register differs from input. While this 9554 can be handled by separate addsi pattern for this case 9555 that never results in lea, this seems to be easier and 9556 correct fix for crash to disable this test. */ 9557 } 9558 else if (GET_CODE (disp) != LABEL_REF 9559 && !CONST_INT_P (disp) 9560 && (GET_CODE (disp) != CONST 9561 || !legitimate_constant_p (disp)) 9562 && (GET_CODE (disp) != SYMBOL_REF 9563 || !legitimate_constant_p (disp))) 9564 { 9565 reason = "displacement is not constant"; 9566 goto report_error; 9567 } 9568 else if (TARGET_64BIT 9569 && !x86_64_immediate_operand (disp, VOIDmode)) 9570 { 9571 reason = "displacement is out of range"; 9572 goto report_error; 9573 } 9574 } 9575 9576 /* Everything looks valid. */ 9577 return TRUE; 9578 9579 report_error: 9580 return FALSE; 9581 } 9582 9583 /* Return a unique alias set for the GOT. */ 9584 9585 static alias_set_type 9586 ix86_GOT_alias_set (void) 9587 { 9588 static alias_set_type set = -1; 9589 if (set == -1) 9590 set = new_alias_set (); 9591 return set; 9592 } 9593 9594 /* Return a legitimate reference for ORIG (an address) using the 9595 register REG. If REG is 0, a new pseudo is generated. 9596 9597 There are two types of references that must be handled: 9598 9599 1. Global data references must load the address from the GOT, via 9600 the PIC reg. An insn is emitted to do this load, and the reg is 9601 returned. 9602 9603 2. Static data references, constant pool addresses, and code labels 9604 compute the address as an offset from the GOT, whose base is in 9605 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to 9606 differentiate them from global data objects. The returned 9607 address is the PIC reg + an unspec constant. 9608 9609 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC 9610 reg also appears in the address. */ 9611 9612 static rtx 9613 legitimize_pic_address (rtx orig, rtx reg) 9614 { 9615 rtx addr = orig; 9616 rtx new_rtx = orig; 9617 rtx base; 9618 9619 #if TARGET_MACHO 9620 if (TARGET_MACHO && !TARGET_64BIT) 9621 { 9622 if (reg == 0) 9623 reg = gen_reg_rtx (Pmode); 9624 /* Use the generic Mach-O PIC machinery. */ 9625 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg); 9626 } 9627 #endif 9628 9629 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr)) 9630 new_rtx = addr; 9631 else if (TARGET_64BIT 9632 && ix86_cmodel != CM_SMALL_PIC 9633 && gotoff_operand (addr, Pmode)) 9634 { 9635 rtx tmpreg; 9636 /* This symbol may be referenced via a displacement from the PIC 9637 base address (@GOTOFF). */ 9638 9639 if (reload_in_progress) 9640 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true); 9641 if (GET_CODE (addr) == CONST) 9642 addr = XEXP (addr, 0); 9643 if (GET_CODE (addr) == PLUS) 9644 { 9645 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)), 9646 UNSPEC_GOTOFF); 9647 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1)); 9648 } 9649 else 9650 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF); 9651 new_rtx = gen_rtx_CONST (Pmode, new_rtx); 9652 if (!reg) 9653 tmpreg = gen_reg_rtx (Pmode); 9654 else 9655 tmpreg = reg; 9656 emit_move_insn (tmpreg, new_rtx); 9657 9658 if (reg != 0) 9659 { 9660 new_rtx = expand_simple_binop (Pmode, PLUS, reg, pic_offset_table_rtx, 9661 tmpreg, 1, OPTAB_DIRECT); 9662 new_rtx = reg; 9663 } 9664 else new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, tmpreg); 9665 } 9666 else if (!TARGET_64BIT && gotoff_operand (addr, Pmode)) 9667 { 9668 /* This symbol may be referenced via a displacement from the PIC 9669 base address (@GOTOFF). */ 9670 9671 if (reload_in_progress) 9672 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true); 9673 if (GET_CODE (addr) == CONST) 9674 addr = XEXP (addr, 0); 9675 if (GET_CODE (addr) == PLUS) 9676 { 9677 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)), 9678 UNSPEC_GOTOFF); 9679 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1)); 9680 } 9681 else 9682 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF); 9683 new_rtx = gen_rtx_CONST (Pmode, new_rtx); 9684 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx); 9685 9686 if (reg != 0) 9687 { 9688 emit_move_insn (reg, new_rtx); 9689 new_rtx = reg; 9690 } 9691 } 9692 else if ((GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0) 9693 /* We can't use @GOTOFF for text labels on VxWorks; 9694 see gotoff_operand. */ 9695 || (TARGET_VXWORKS_RTP && GET_CODE (addr) == LABEL_REF)) 9696 { 9697 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES) 9698 { 9699 if (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (addr)) 9700 return legitimize_dllimport_symbol (addr, true); 9701 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS 9702 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF 9703 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (addr, 0), 0))) 9704 { 9705 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (addr, 0), 0), true); 9706 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1)); 9707 } 9708 } 9709 9710 if (TARGET_64BIT && ix86_cmodel != CM_LARGE_PIC) 9711 { 9712 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL); 9713 new_rtx = gen_rtx_CONST (Pmode, new_rtx); 9714 new_rtx = gen_const_mem (Pmode, new_rtx); 9715 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ()); 9716 9717 if (reg == 0) 9718 reg = gen_reg_rtx (Pmode); 9719 /* Use directly gen_movsi, otherwise the address is loaded 9720 into register for CSE. We don't want to CSE this addresses, 9721 instead we CSE addresses from the GOT table, so skip this. */ 9722 emit_insn (gen_movsi (reg, new_rtx)); 9723 new_rtx = reg; 9724 } 9725 else 9726 { 9727 /* This symbol must be referenced via a load from the 9728 Global Offset Table (@GOT). */ 9729 9730 if (reload_in_progress) 9731 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true); 9732 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT); 9733 new_rtx = gen_rtx_CONST (Pmode, new_rtx); 9734 if (TARGET_64BIT) 9735 new_rtx = force_reg (Pmode, new_rtx); 9736 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx); 9737 new_rtx = gen_const_mem (Pmode, new_rtx); 9738 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ()); 9739 9740 if (reg == 0) 9741 reg = gen_reg_rtx (Pmode); 9742 emit_move_insn (reg, new_rtx); 9743 new_rtx = reg; 9744 } 9745 } 9746 else 9747 { 9748 if (CONST_INT_P (addr) 9749 && !x86_64_immediate_operand (addr, VOIDmode)) 9750 { 9751 if (reg) 9752 { 9753 emit_move_insn (reg, addr); 9754 new_rtx = reg; 9755 } 9756 else 9757 new_rtx = force_reg (Pmode, addr); 9758 } 9759 else if (GET_CODE (addr) == CONST) 9760 { 9761 addr = XEXP (addr, 0); 9762 9763 /* We must match stuff we generate before. Assume the only 9764 unspecs that can get here are ours. Not that we could do 9765 anything with them anyway.... */ 9766 if (GET_CODE (addr) == UNSPEC 9767 || (GET_CODE (addr) == PLUS 9768 && GET_CODE (XEXP (addr, 0)) == UNSPEC)) 9769 return orig; 9770 gcc_assert (GET_CODE (addr) == PLUS); 9771 } 9772 if (GET_CODE (addr) == PLUS) 9773 { 9774 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1); 9775 9776 /* Check first to see if this is a constant offset from a @GOTOFF 9777 symbol reference. */ 9778 if (gotoff_operand (op0, Pmode) 9779 && CONST_INT_P (op1)) 9780 { 9781 if (!TARGET_64BIT) 9782 { 9783 if (reload_in_progress) 9784 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true); 9785 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0), 9786 UNSPEC_GOTOFF); 9787 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, op1); 9788 new_rtx = gen_rtx_CONST (Pmode, new_rtx); 9789 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx); 9790 9791 if (reg != 0) 9792 { 9793 emit_move_insn (reg, new_rtx); 9794 new_rtx = reg; 9795 } 9796 } 9797 else 9798 { 9799 if (INTVAL (op1) < -16*1024*1024 9800 || INTVAL (op1) >= 16*1024*1024) 9801 { 9802 if (!x86_64_immediate_operand (op1, Pmode)) 9803 op1 = force_reg (Pmode, op1); 9804 new_rtx = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1); 9805 } 9806 } 9807 } 9808 else 9809 { 9810 base = legitimize_pic_address (XEXP (addr, 0), reg); 9811 new_rtx = legitimize_pic_address (XEXP (addr, 1), 9812 base == reg ? NULL_RTX : reg); 9813 9814 if (CONST_INT_P (new_rtx)) 9815 new_rtx = plus_constant (base, INTVAL (new_rtx)); 9816 else 9817 { 9818 if (GET_CODE (new_rtx) == PLUS && CONSTANT_P (XEXP (new_rtx, 1))) 9819 { 9820 base = gen_rtx_PLUS (Pmode, base, XEXP (new_rtx, 0)); 9821 new_rtx = XEXP (new_rtx, 1); 9822 } 9823 new_rtx = gen_rtx_PLUS (Pmode, base, new_rtx); 9824 } 9825 } 9826 } 9827 } 9828 return new_rtx; 9829 } 9830 9831 /* Load the thread pointer. If TO_REG is true, force it into a register. */ 9832 9833 static rtx 9834 get_thread_pointer (int to_reg) 9835 { 9836 rtx tp, reg, insn; 9837 9838 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP); 9839 if (!to_reg) 9840 return tp; 9841 9842 reg = gen_reg_rtx (Pmode); 9843 insn = gen_rtx_SET (VOIDmode, reg, tp); 9844 insn = emit_insn (insn); 9845 9846 return reg; 9847 } 9848 9849 /* A subroutine of legitimize_address and ix86_expand_move. FOR_MOV is 9850 false if we expect this to be used for a memory address and true if 9851 we expect to load the address into a register. */ 9852 9853 static rtx 9854 legitimize_tls_address (rtx x, enum tls_model model, int for_mov) 9855 { 9856 rtx dest, base, off, pic, tp; 9857 int type; 9858 9859 switch (model) 9860 { 9861 case TLS_MODEL_GLOBAL_DYNAMIC: 9862 dest = gen_reg_rtx (Pmode); 9863 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0; 9864 9865 if (TARGET_64BIT && ! TARGET_GNU2_TLS) 9866 { 9867 rtx rax = gen_rtx_REG (Pmode, AX_REG), insns; 9868 9869 start_sequence (); 9870 emit_call_insn (gen_tls_global_dynamic_64 (rax, x)); 9871 insns = get_insns (); 9872 end_sequence (); 9873 9874 RTL_CONST_CALL_P (insns) = 1; 9875 emit_libcall_block (insns, dest, rax, x); 9876 } 9877 else if (TARGET_64BIT && TARGET_GNU2_TLS) 9878 emit_insn (gen_tls_global_dynamic_64 (dest, x)); 9879 else 9880 emit_insn (gen_tls_global_dynamic_32 (dest, x)); 9881 9882 if (TARGET_GNU2_TLS) 9883 { 9884 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest)); 9885 9886 set_unique_reg_note (get_last_insn (), REG_EQUIV, x); 9887 } 9888 break; 9889 9890 case TLS_MODEL_LOCAL_DYNAMIC: 9891 base = gen_reg_rtx (Pmode); 9892 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0; 9893 9894 if (TARGET_64BIT && ! TARGET_GNU2_TLS) 9895 { 9896 rtx rax = gen_rtx_REG (Pmode, AX_REG), insns, note; 9897 9898 start_sequence (); 9899 emit_call_insn (gen_tls_local_dynamic_base_64 (rax)); 9900 insns = get_insns (); 9901 end_sequence (); 9902 9903 note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL); 9904 note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note); 9905 RTL_CONST_CALL_P (insns) = 1; 9906 emit_libcall_block (insns, base, rax, note); 9907 } 9908 else if (TARGET_64BIT && TARGET_GNU2_TLS) 9909 emit_insn (gen_tls_local_dynamic_base_64 (base)); 9910 else 9911 emit_insn (gen_tls_local_dynamic_base_32 (base)); 9912 9913 if (TARGET_GNU2_TLS) 9914 { 9915 rtx x = ix86_tls_module_base (); 9916 9917 set_unique_reg_note (get_last_insn (), REG_EQUIV, 9918 gen_rtx_MINUS (Pmode, x, tp)); 9919 } 9920 9921 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF); 9922 off = gen_rtx_CONST (Pmode, off); 9923 9924 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off)); 9925 9926 if (TARGET_GNU2_TLS) 9927 { 9928 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp)); 9929 9930 set_unique_reg_note (get_last_insn (), REG_EQUIV, x); 9931 } 9932 9933 break; 9934 9935 case TLS_MODEL_INITIAL_EXEC: 9936 if (TARGET_64BIT) 9937 { 9938 pic = NULL; 9939 type = UNSPEC_GOTNTPOFF; 9940 } 9941 else if (flag_pic) 9942 { 9943 if (reload_in_progress) 9944 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true); 9945 pic = pic_offset_table_rtx; 9946 type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF; 9947 } 9948 else if (!TARGET_ANY_GNU_TLS) 9949 { 9950 pic = gen_reg_rtx (Pmode); 9951 emit_insn (gen_set_got (pic)); 9952 type = UNSPEC_GOTTPOFF; 9953 } 9954 else 9955 { 9956 pic = NULL; 9957 type = UNSPEC_INDNTPOFF; 9958 } 9959 9960 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type); 9961 off = gen_rtx_CONST (Pmode, off); 9962 if (pic) 9963 off = gen_rtx_PLUS (Pmode, pic, off); 9964 off = gen_const_mem (Pmode, off); 9965 set_mem_alias_set (off, ix86_GOT_alias_set ()); 9966 9967 if (TARGET_64BIT || TARGET_ANY_GNU_TLS) 9968 { 9969 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS); 9970 off = force_reg (Pmode, off); 9971 return gen_rtx_PLUS (Pmode, base, off); 9972 } 9973 else 9974 { 9975 base = get_thread_pointer (true); 9976 dest = gen_reg_rtx (Pmode); 9977 emit_insn (gen_subsi3 (dest, base, off)); 9978 } 9979 break; 9980 9981 case TLS_MODEL_LOCAL_EXEC: 9982 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), 9983 (TARGET_64BIT || TARGET_ANY_GNU_TLS) 9984 ? UNSPEC_NTPOFF : UNSPEC_TPOFF); 9985 off = gen_rtx_CONST (Pmode, off); 9986 9987 if (TARGET_64BIT || TARGET_ANY_GNU_TLS) 9988 { 9989 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS); 9990 return gen_rtx_PLUS (Pmode, base, off); 9991 } 9992 else 9993 { 9994 base = get_thread_pointer (true); 9995 dest = gen_reg_rtx (Pmode); 9996 emit_insn (gen_subsi3 (dest, base, off)); 9997 } 9998 break; 9999 10000 default: 10001 gcc_unreachable (); 10002 } 10003 10004 return dest; 10005 } 10006 10007 /* Create or return the unique __imp_DECL dllimport symbol corresponding 10008 to symbol DECL. */ 10009 10010 static GTY((if_marked ("tree_map_marked_p"), param_is (struct tree_map))) 10011 htab_t dllimport_map; 10012 10013 static tree 10014 get_dllimport_decl (tree decl) 10015 { 10016 struct tree_map *h, in; 10017 void **loc; 10018 const char *name; 10019 const char *prefix; 10020 size_t namelen, prefixlen; 10021 char *imp_name; 10022 tree to; 10023 rtx rtl; 10024 10025 if (!dllimport_map) 10026 dllimport_map = htab_create_ggc (512, tree_map_hash, tree_map_eq, 0); 10027 10028 in.hash = htab_hash_pointer (decl); 10029 in.base.from = decl; 10030 loc = htab_find_slot_with_hash (dllimport_map, &in, in.hash, INSERT); 10031 h = (struct tree_map *) *loc; 10032 if (h) 10033 return h->to; 10034 10035 *loc = h = GGC_NEW (struct tree_map); 10036 h->hash = in.hash; 10037 h->base.from = decl; 10038 h->to = to = build_decl (VAR_DECL, NULL, ptr_type_node); 10039 DECL_ARTIFICIAL (to) = 1; 10040 DECL_IGNORED_P (to) = 1; 10041 DECL_EXTERNAL (to) = 1; 10042 TREE_READONLY (to) = 1; 10043 10044 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl)); 10045 name = targetm.strip_name_encoding (name); 10046 prefix = name[0] == FASTCALL_PREFIX || user_label_prefix[0] == 0 10047 ? "*__imp_" : "*__imp__"; 10048 namelen = strlen (name); 10049 prefixlen = strlen (prefix); 10050 imp_name = (char *) alloca (namelen + prefixlen + 1); 10051 memcpy (imp_name, prefix, prefixlen); 10052 memcpy (imp_name + prefixlen, name, namelen + 1); 10053 10054 name = ggc_alloc_string (imp_name, namelen + prefixlen); 10055 rtl = gen_rtx_SYMBOL_REF (Pmode, name); 10056 SET_SYMBOL_REF_DECL (rtl, to); 10057 SYMBOL_REF_FLAGS (rtl) = SYMBOL_FLAG_LOCAL; 10058 10059 rtl = gen_const_mem (Pmode, rtl); 10060 set_mem_alias_set (rtl, ix86_GOT_alias_set ()); 10061 10062 SET_DECL_RTL (to, rtl); 10063 SET_DECL_ASSEMBLER_NAME (to, get_identifier (name)); 10064 10065 return to; 10066 } 10067 10068 /* Expand SYMBOL into its corresponding dllimport symbol. WANT_REG is 10069 true if we require the result be a register. */ 10070 10071 static rtx 10072 legitimize_dllimport_symbol (rtx symbol, bool want_reg) 10073 { 10074 tree imp_decl; 10075 rtx x; 10076 10077 gcc_assert (SYMBOL_REF_DECL (symbol)); 10078 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol)); 10079 10080 x = DECL_RTL (imp_decl); 10081 if (want_reg) 10082 x = force_reg (Pmode, x); 10083 return x; 10084 } 10085 10086 /* Try machine-dependent ways of modifying an illegitimate address 10087 to be legitimate. If we find one, return the new, valid address. 10088 This macro is used in only one place: `memory_address' in explow.c. 10089 10090 OLDX is the address as it was before break_out_memory_refs was called. 10091 In some cases it is useful to look at this to decide what needs to be done. 10092 10093 MODE and WIN are passed so that this macro can use 10094 GO_IF_LEGITIMATE_ADDRESS. 10095 10096 It is always safe for this macro to do nothing. It exists to recognize 10097 opportunities to optimize the output. 10098 10099 For the 80386, we handle X+REG by loading X into a register R and 10100 using R+REG. R will go in a general reg and indexing will be used. 10101 However, if REG is a broken-out memory address or multiplication, 10102 nothing needs to be done because REG can certainly go in a general reg. 10103 10104 When -fpic is used, special handling is needed for symbolic references. 10105 See comments by legitimize_pic_address in i386.c for details. */ 10106 10107 rtx 10108 legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED, enum machine_mode mode) 10109 { 10110 int changed = 0; 10111 unsigned log; 10112 10113 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0; 10114 if (log) 10115 return legitimize_tls_address (x, (enum tls_model) log, false); 10116 if (GET_CODE (x) == CONST 10117 && GET_CODE (XEXP (x, 0)) == PLUS 10118 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF 10119 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0)))) 10120 { 10121 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0), 10122 (enum tls_model) log, false); 10123 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1)); 10124 } 10125 10126 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES) 10127 { 10128 if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (x)) 10129 return legitimize_dllimport_symbol (x, true); 10130 if (GET_CODE (x) == CONST 10131 && GET_CODE (XEXP (x, 0)) == PLUS 10132 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF 10133 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (x, 0), 0))) 10134 { 10135 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (x, 0), 0), true); 10136 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1)); 10137 } 10138 } 10139 10140 if (flag_pic && SYMBOLIC_CONST (x)) 10141 return legitimize_pic_address (x, 0); 10142 10143 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */ 10144 if (GET_CODE (x) == ASHIFT 10145 && CONST_INT_P (XEXP (x, 1)) 10146 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4) 10147 { 10148 changed = 1; 10149 log = INTVAL (XEXP (x, 1)); 10150 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)), 10151 GEN_INT (1 << log)); 10152 } 10153 10154 if (GET_CODE (x) == PLUS) 10155 { 10156 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */ 10157 10158 if (GET_CODE (XEXP (x, 0)) == ASHIFT 10159 && CONST_INT_P (XEXP (XEXP (x, 0), 1)) 10160 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4) 10161 { 10162 changed = 1; 10163 log = INTVAL (XEXP (XEXP (x, 0), 1)); 10164 XEXP (x, 0) = gen_rtx_MULT (Pmode, 10165 force_reg (Pmode, XEXP (XEXP (x, 0), 0)), 10166 GEN_INT (1 << log)); 10167 } 10168 10169 if (GET_CODE (XEXP (x, 1)) == ASHIFT 10170 && CONST_INT_P (XEXP (XEXP (x, 1), 1)) 10171 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4) 10172 { 10173 changed = 1; 10174 log = INTVAL (XEXP (XEXP (x, 1), 1)); 10175 XEXP (x, 1) = gen_rtx_MULT (Pmode, 10176 force_reg (Pmode, XEXP (XEXP (x, 1), 0)), 10177 GEN_INT (1 << log)); 10178 } 10179 10180 /* Put multiply first if it isn't already. */ 10181 if (GET_CODE (XEXP (x, 1)) == MULT) 10182 { 10183 rtx tmp = XEXP (x, 0); 10184 XEXP (x, 0) = XEXP (x, 1); 10185 XEXP (x, 1) = tmp; 10186 changed = 1; 10187 } 10188 10189 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const))) 10190 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be 10191 created by virtual register instantiation, register elimination, and 10192 similar optimizations. */ 10193 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS) 10194 { 10195 changed = 1; 10196 x = gen_rtx_PLUS (Pmode, 10197 gen_rtx_PLUS (Pmode, XEXP (x, 0), 10198 XEXP (XEXP (x, 1), 0)), 10199 XEXP (XEXP (x, 1), 1)); 10200 } 10201 10202 /* Canonicalize 10203 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const) 10204 into (plus (plus (mult (reg) (const)) (reg)) (const)). */ 10205 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS 10206 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT 10207 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS 10208 && CONSTANT_P (XEXP (x, 1))) 10209 { 10210 rtx constant; 10211 rtx other = NULL_RTX; 10212 10213 if (CONST_INT_P (XEXP (x, 1))) 10214 { 10215 constant = XEXP (x, 1); 10216 other = XEXP (XEXP (XEXP (x, 0), 1), 1); 10217 } 10218 else if (CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 1), 1))) 10219 { 10220 constant = XEXP (XEXP (XEXP (x, 0), 1), 1); 10221 other = XEXP (x, 1); 10222 } 10223 else 10224 constant = 0; 10225 10226 if (constant) 10227 { 10228 changed = 1; 10229 x = gen_rtx_PLUS (Pmode, 10230 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0), 10231 XEXP (XEXP (XEXP (x, 0), 1), 0)), 10232 plus_constant (other, INTVAL (constant))); 10233 } 10234 } 10235 10236 if (changed && legitimate_address_p (mode, x, FALSE)) 10237 return x; 10238 10239 if (GET_CODE (XEXP (x, 0)) == MULT) 10240 { 10241 changed = 1; 10242 XEXP (x, 0) = force_operand (XEXP (x, 0), 0); 10243 } 10244 10245 if (GET_CODE (XEXP (x, 1)) == MULT) 10246 { 10247 changed = 1; 10248 XEXP (x, 1) = force_operand (XEXP (x, 1), 0); 10249 } 10250 10251 if (changed 10252 && REG_P (XEXP (x, 1)) 10253 && REG_P (XEXP (x, 0))) 10254 return x; 10255 10256 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1))) 10257 { 10258 changed = 1; 10259 x = legitimize_pic_address (x, 0); 10260 } 10261 10262 if (changed && legitimate_address_p (mode, x, FALSE)) 10263 return x; 10264 10265 if (REG_P (XEXP (x, 0))) 10266 { 10267 rtx temp = gen_reg_rtx (Pmode); 10268 rtx val = force_operand (XEXP (x, 1), temp); 10269 if (val != temp) 10270 emit_move_insn (temp, val); 10271 10272 XEXP (x, 1) = temp; 10273 return x; 10274 } 10275 10276 else if (REG_P (XEXP (x, 1))) 10277 { 10278 rtx temp = gen_reg_rtx (Pmode); 10279 rtx val = force_operand (XEXP (x, 0), temp); 10280 if (val != temp) 10281 emit_move_insn (temp, val); 10282 10283 XEXP (x, 0) = temp; 10284 return x; 10285 } 10286 } 10287 10288 return x; 10289 } 10290 10291 /* Print an integer constant expression in assembler syntax. Addition 10292 and subtraction are the only arithmetic that may appear in these 10293 expressions. FILE is the stdio stream to write to, X is the rtx, and 10294 CODE is the operand print code from the output string. */ 10295 10296 static void 10297 output_pic_addr_const (FILE *file, rtx x, int code) 10298 { 10299 char buf[256]; 10300 10301 switch (GET_CODE (x)) 10302 { 10303 case PC: 10304 gcc_assert (flag_pic); 10305 putc ('.', file); 10306 break; 10307 10308 case SYMBOL_REF: 10309 if (! TARGET_MACHO || TARGET_64BIT) 10310 output_addr_const (file, x); 10311 else 10312 { 10313 const char *name = XSTR (x, 0); 10314 10315 /* Mark the decl as referenced so that cgraph will 10316 output the function. */ 10317 if (SYMBOL_REF_DECL (x)) 10318 mark_decl_referenced (SYMBOL_REF_DECL (x)); 10319 10320 #if TARGET_MACHO 10321 if (MACHOPIC_INDIRECT 10322 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION) 10323 name = machopic_indirection_name (x, /*stub_p=*/true); 10324 #endif 10325 assemble_name (file, name); 10326 } 10327 if (!TARGET_MACHO && !(TARGET_64BIT && DEFAULT_ABI == MS_ABI) 10328 && code == 'P' && ! SYMBOL_REF_LOCAL_P (x)) 10329 fputs ("@PLT", file); 10330 break; 10331 10332 case LABEL_REF: 10333 x = XEXP (x, 0); 10334 /* FALLTHRU */ 10335 case CODE_LABEL: 10336 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x)); 10337 assemble_name (asm_out_file, buf); 10338 break; 10339 10340 case CONST_INT: 10341 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x)); 10342 break; 10343 10344 case CONST: 10345 /* This used to output parentheses around the expression, 10346 but that does not work on the 386 (either ATT or BSD assembler). */ 10347 output_pic_addr_const (file, XEXP (x, 0), code); 10348 break; 10349 10350 case CONST_DOUBLE: 10351 if (GET_MODE (x) == VOIDmode) 10352 { 10353 /* We can use %d if the number is <32 bits and positive. */ 10354 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0) 10355 fprintf (file, "0x%lx%08lx", 10356 (unsigned long) CONST_DOUBLE_HIGH (x), 10357 (unsigned long) CONST_DOUBLE_LOW (x)); 10358 else 10359 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x)); 10360 } 10361 else 10362 /* We can't handle floating point constants; 10363 PRINT_OPERAND must handle them. */ 10364 output_operand_lossage ("floating constant misused"); 10365 break; 10366 10367 case PLUS: 10368 /* Some assemblers need integer constants to appear first. */ 10369 if (CONST_INT_P (XEXP (x, 0))) 10370 { 10371 output_pic_addr_const (file, XEXP (x, 0), code); 10372 putc ('+', file); 10373 output_pic_addr_const (file, XEXP (x, 1), code); 10374 } 10375 else 10376 { 10377 gcc_assert (CONST_INT_P (XEXP (x, 1))); 10378 output_pic_addr_const (file, XEXP (x, 1), code); 10379 putc ('+', file); 10380 output_pic_addr_const (file, XEXP (x, 0), code); 10381 } 10382 break; 10383 10384 case MINUS: 10385 if (!TARGET_MACHO) 10386 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file); 10387 output_pic_addr_const (file, XEXP (x, 0), code); 10388 putc ('-', file); 10389 output_pic_addr_const (file, XEXP (x, 1), code); 10390 if (!TARGET_MACHO) 10391 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file); 10392 break; 10393 10394 case UNSPEC: 10395 gcc_assert (XVECLEN (x, 0) == 1); 10396 output_pic_addr_const (file, XVECEXP (x, 0, 0), code); 10397 switch (XINT (x, 1)) 10398 { 10399 case UNSPEC_GOT: 10400 fputs ("@GOT", file); 10401 break; 10402 case UNSPEC_GOTOFF: 10403 fputs ("@GOTOFF", file); 10404 break; 10405 case UNSPEC_PLTOFF: 10406 fputs ("@PLTOFF", file); 10407 break; 10408 case UNSPEC_GOTPCREL: 10409 fputs (ASSEMBLER_DIALECT == ASM_ATT ? 10410 "@GOTPCREL(%rip)" : "@GOTPCREL[rip]", file); 10411 break; 10412 case UNSPEC_GOTTPOFF: 10413 /* FIXME: This might be @TPOFF in Sun ld too. */ 10414 fputs ("@GOTTPOFF", file); 10415 break; 10416 case UNSPEC_TPOFF: 10417 fputs ("@TPOFF", file); 10418 break; 10419 case UNSPEC_NTPOFF: 10420 if (TARGET_64BIT) 10421 fputs ("@TPOFF", file); 10422 else 10423 fputs ("@NTPOFF", file); 10424 break; 10425 case UNSPEC_DTPOFF: 10426 fputs ("@DTPOFF", file); 10427 break; 10428 case UNSPEC_GOTNTPOFF: 10429 if (TARGET_64BIT) 10430 fputs (ASSEMBLER_DIALECT == ASM_ATT ? 10431 "@GOTTPOFF(%rip)": "@GOTTPOFF[rip]", file); 10432 else 10433 fputs ("@GOTNTPOFF", file); 10434 break; 10435 case UNSPEC_INDNTPOFF: 10436 fputs ("@INDNTPOFF", file); 10437 break; 10438 #if TARGET_MACHO 10439 case UNSPEC_MACHOPIC_OFFSET: 10440 putc ('-', file); 10441 machopic_output_function_base_name (file); 10442 break; 10443 #endif 10444 default: 10445 output_operand_lossage ("invalid UNSPEC as operand"); 10446 break; 10447 } 10448 break; 10449 10450 default: 10451 output_operand_lossage ("invalid expression as operand"); 10452 } 10453 } 10454 10455 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL. 10456 We need to emit DTP-relative relocations. */ 10457 10458 static void ATTRIBUTE_UNUSED 10459 i386_output_dwarf_dtprel (FILE *file, int size, rtx x) 10460 { 10461 fputs (ASM_LONG, file); 10462 output_addr_const (file, x); 10463 fputs ("@DTPOFF", file); 10464 switch (size) 10465 { 10466 case 4: 10467 break; 10468 case 8: 10469 fputs (", 0", file); 10470 break; 10471 default: 10472 gcc_unreachable (); 10473 } 10474 } 10475 10476 /* Return true if X is a representation of the PIC register. This copes 10477 with calls from ix86_find_base_term, where the register might have 10478 been replaced by a cselib value. */ 10479 10480 static bool 10481 ix86_pic_register_p (rtx x) 10482 { 10483 if (GET_CODE (x) == VALUE && CSELIB_VAL_PTR (x)) 10484 return (pic_offset_table_rtx 10485 && rtx_equal_for_cselib_p (x, pic_offset_table_rtx)); 10486 else 10487 return REG_P (x) && REGNO (x) == PIC_OFFSET_TABLE_REGNUM; 10488 } 10489 10490 /* In the name of slightly smaller debug output, and to cater to 10491 general assembler lossage, recognize PIC+GOTOFF and turn it back 10492 into a direct symbol reference. 10493 10494 On Darwin, this is necessary to avoid a crash, because Darwin 10495 has a different PIC label for each routine but the DWARF debugging 10496 information is not associated with any particular routine, so it's 10497 necessary to remove references to the PIC label from RTL stored by 10498 the DWARF output code. */ 10499 10500 static rtx 10501 ix86_delegitimize_address (rtx orig_x) 10502 { 10503 rtx x = orig_x; 10504 /* reg_addend is NULL or a multiple of some register. */ 10505 rtx reg_addend = NULL_RTX; 10506 /* const_addend is NULL or a const_int. */ 10507 rtx const_addend = NULL_RTX; 10508 /* This is the result, or NULL. */ 10509 rtx result = NULL_RTX; 10510 10511 if (MEM_P (x)) 10512 x = XEXP (x, 0); 10513 10514 if (TARGET_64BIT) 10515 { 10516 if (GET_CODE (x) != CONST 10517 || GET_CODE (XEXP (x, 0)) != UNSPEC 10518 || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL 10519 || !MEM_P (orig_x)) 10520 return orig_x; 10521 return XVECEXP (XEXP (x, 0), 0, 0); 10522 } 10523 10524 if (GET_CODE (x) != PLUS 10525 || GET_CODE (XEXP (x, 1)) != CONST) 10526 return orig_x; 10527 10528 if (ix86_pic_register_p (XEXP (x, 0))) 10529 /* %ebx + GOT/GOTOFF */ 10530 ; 10531 else if (GET_CODE (XEXP (x, 0)) == PLUS) 10532 { 10533 /* %ebx + %reg * scale + GOT/GOTOFF */ 10534 reg_addend = XEXP (x, 0); 10535 if (ix86_pic_register_p (XEXP (reg_addend, 0))) 10536 reg_addend = XEXP (reg_addend, 1); 10537 else if (ix86_pic_register_p (XEXP (reg_addend, 1))) 10538 reg_addend = XEXP (reg_addend, 0); 10539 else 10540 return orig_x; 10541 if (!REG_P (reg_addend) 10542 && GET_CODE (reg_addend) != MULT 10543 && GET_CODE (reg_addend) != ASHIFT) 10544 return orig_x; 10545 } 10546 else 10547 return orig_x; 10548 10549 x = XEXP (XEXP (x, 1), 0); 10550 if (GET_CODE (x) == PLUS 10551 && CONST_INT_P (XEXP (x, 1))) 10552 { 10553 const_addend = XEXP (x, 1); 10554 x = XEXP (x, 0); 10555 } 10556 10557 if (GET_CODE (x) == UNSPEC 10558 && ((XINT (x, 1) == UNSPEC_GOT && MEM_P (orig_x)) 10559 || (XINT (x, 1) == UNSPEC_GOTOFF && !MEM_P (orig_x)))) 10560 result = XVECEXP (x, 0, 0); 10561 10562 if (TARGET_MACHO && darwin_local_data_pic (x) 10563 && !MEM_P (orig_x)) 10564 result = XVECEXP (x, 0, 0); 10565 10566 if (! result) 10567 return orig_x; 10568 10569 if (const_addend) 10570 result = gen_rtx_CONST (Pmode, gen_rtx_PLUS (Pmode, result, const_addend)); 10571 if (reg_addend) 10572 result = gen_rtx_PLUS (Pmode, reg_addend, result); 10573 return result; 10574 } 10575 10576 /* If X is a machine specific address (i.e. a symbol or label being 10577 referenced as a displacement from the GOT implemented using an 10578 UNSPEC), then return the base term. Otherwise return X. */ 10579 10580 rtx 10581 ix86_find_base_term (rtx x) 10582 { 10583 rtx term; 10584 10585 if (TARGET_64BIT) 10586 { 10587 if (GET_CODE (x) != CONST) 10588 return x; 10589 term = XEXP (x, 0); 10590 if (GET_CODE (term) == PLUS 10591 && (CONST_INT_P (XEXP (term, 1)) 10592 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE)) 10593 term = XEXP (term, 0); 10594 if (GET_CODE (term) != UNSPEC 10595 || XINT (term, 1) != UNSPEC_GOTPCREL) 10596 return x; 10597 10598 return XVECEXP (term, 0, 0); 10599 } 10600 10601 return ix86_delegitimize_address (x); 10602 } 10603 10604 static void 10605 put_condition_code (enum rtx_code code, enum machine_mode mode, int reverse, 10606 int fp, FILE *file) 10607 { 10608 const char *suffix; 10609 10610 if (mode == CCFPmode || mode == CCFPUmode) 10611 { 10612 enum rtx_code second_code, bypass_code; 10613 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code); 10614 gcc_assert (bypass_code == UNKNOWN && second_code == UNKNOWN); 10615 code = ix86_fp_compare_code_to_integer (code); 10616 mode = CCmode; 10617 } 10618 if (reverse) 10619 code = reverse_condition (code); 10620 10621 switch (code) 10622 { 10623 case EQ: 10624 switch (mode) 10625 { 10626 case CCAmode: 10627 suffix = "a"; 10628 break; 10629 10630 case CCCmode: 10631 suffix = "c"; 10632 break; 10633 10634 case CCOmode: 10635 suffix = "o"; 10636 break; 10637 10638 case CCSmode: 10639 suffix = "s"; 10640 break; 10641 10642 default: 10643 suffix = "e"; 10644 } 10645 break; 10646 case NE: 10647 switch (mode) 10648 { 10649 case CCAmode: 10650 suffix = "na"; 10651 break; 10652 10653 case CCCmode: 10654 suffix = "nc"; 10655 break; 10656 10657 case CCOmode: 10658 suffix = "no"; 10659 break; 10660 10661 case CCSmode: 10662 suffix = "ns"; 10663 break; 10664 10665 default: 10666 suffix = "ne"; 10667 } 10668 break; 10669 case GT: 10670 gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode); 10671 suffix = "g"; 10672 break; 10673 case GTU: 10674 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers. 10675 Those same assemblers have the same but opposite lossage on cmov. */ 10676 if (mode == CCmode) 10677 suffix = fp ? "nbe" : "a"; 10678 else if (mode == CCCmode) 10679 suffix = "b"; 10680 else 10681 gcc_unreachable (); 10682 break; 10683 case LT: 10684 switch (mode) 10685 { 10686 case CCNOmode: 10687 case CCGOCmode: 10688 suffix = "s"; 10689 break; 10690 10691 case CCmode: 10692 case CCGCmode: 10693 suffix = "l"; 10694 break; 10695 10696 default: 10697 gcc_unreachable (); 10698 } 10699 break; 10700 case LTU: 10701 gcc_assert (mode == CCmode || mode == CCCmode); 10702 suffix = "b"; 10703 break; 10704 case GE: 10705 switch (mode) 10706 { 10707 case CCNOmode: 10708 case CCGOCmode: 10709 suffix = "ns"; 10710 break; 10711 10712 case CCmode: 10713 case CCGCmode: 10714 suffix = "ge"; 10715 break; 10716 10717 default: 10718 gcc_unreachable (); 10719 } 10720 break; 10721 case GEU: 10722 /* ??? As above. */ 10723 gcc_assert (mode == CCmode || mode == CCCmode); 10724 suffix = fp ? "nb" : "ae"; 10725 break; 10726 case LE: 10727 gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode); 10728 suffix = "le"; 10729 break; 10730 case LEU: 10731 /* ??? As above. */ 10732 if (mode == CCmode) 10733 suffix = "be"; 10734 else if (mode == CCCmode) 10735 suffix = fp ? "nb" : "ae"; 10736 else 10737 gcc_unreachable (); 10738 break; 10739 case UNORDERED: 10740 suffix = fp ? "u" : "p"; 10741 break; 10742 case ORDERED: 10743 suffix = fp ? "nu" : "np"; 10744 break; 10745 default: 10746 gcc_unreachable (); 10747 } 10748 fputs (suffix, file); 10749 } 10750 10751 /* Print the name of register X to FILE based on its machine mode and number. 10752 If CODE is 'w', pretend the mode is HImode. 10753 If CODE is 'b', pretend the mode is QImode. 10754 If CODE is 'k', pretend the mode is SImode. 10755 If CODE is 'q', pretend the mode is DImode. 10756 If CODE is 'x', pretend the mode is V4SFmode. 10757 If CODE is 't', pretend the mode is V8SFmode. 10758 If CODE is 'h', pretend the reg is the 'high' byte register. 10759 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op. 10760 If CODE is 'd', duplicate the operand for AVX instruction. 10761 */ 10762 10763 void 10764 print_reg (rtx x, int code, FILE *file) 10765 { 10766 const char *reg; 10767 bool duplicated = code == 'd' && TARGET_AVX; 10768 10769 gcc_assert (x == pc_rtx 10770 || (REGNO (x) != ARG_POINTER_REGNUM 10771 && REGNO (x) != FRAME_POINTER_REGNUM 10772 && REGNO (x) != FLAGS_REG 10773 && REGNO (x) != FPSR_REG 10774 && REGNO (x) != FPCR_REG)); 10775 10776 if (ASSEMBLER_DIALECT == ASM_ATT) 10777 putc ('%', file); 10778 10779 if (x == pc_rtx) 10780 { 10781 gcc_assert (TARGET_64BIT); 10782 fputs ("rip", file); 10783 return; 10784 } 10785 10786 if (code == 'w' || MMX_REG_P (x)) 10787 code = 2; 10788 else if (code == 'b') 10789 code = 1; 10790 else if (code == 'k') 10791 code = 4; 10792 else if (code == 'q') 10793 code = 8; 10794 else if (code == 'y') 10795 code = 3; 10796 else if (code == 'h') 10797 code = 0; 10798 else if (code == 'x') 10799 code = 16; 10800 else if (code == 't') 10801 code = 32; 10802 else 10803 code = GET_MODE_SIZE (GET_MODE (x)); 10804 10805 /* Irritatingly, AMD extended registers use different naming convention 10806 from the normal registers. */ 10807 if (REX_INT_REG_P (x)) 10808 { 10809 gcc_assert (TARGET_64BIT); 10810 switch (code) 10811 { 10812 case 0: 10813 error ("extended registers have no high halves"); 10814 break; 10815 case 1: 10816 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8); 10817 break; 10818 case 2: 10819 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8); 10820 break; 10821 case 4: 10822 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8); 10823 break; 10824 case 8: 10825 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8); 10826 break; 10827 default: 10828 error ("unsupported operand size for extended register"); 10829 break; 10830 } 10831 return; 10832 } 10833 10834 reg = NULL; 10835 switch (code) 10836 { 10837 case 3: 10838 if (STACK_TOP_P (x)) 10839 { 10840 reg = "st(0)"; 10841 break; 10842 } 10843 /* FALLTHRU */ 10844 case 8: 10845 case 4: 10846 case 12: 10847 if (! ANY_FP_REG_P (x)) 10848 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file); 10849 /* FALLTHRU */ 10850 case 16: 10851 case 2: 10852 normal: 10853 reg = hi_reg_name[REGNO (x)]; 10854 break; 10855 case 1: 10856 if (REGNO (x) >= ARRAY_SIZE (qi_reg_name)) 10857 goto normal; 10858 reg = qi_reg_name[REGNO (x)]; 10859 break; 10860 case 0: 10861 if (REGNO (x) >= ARRAY_SIZE (qi_high_reg_name)) 10862 goto normal; 10863 reg = qi_high_reg_name[REGNO (x)]; 10864 break; 10865 case 32: 10866 if (SSE_REG_P (x)) 10867 { 10868 gcc_assert (!duplicated); 10869 putc ('y', file); 10870 fputs (hi_reg_name[REGNO (x)] + 1, file); 10871 return; 10872 } 10873 break; 10874 default: 10875 gcc_unreachable (); 10876 } 10877 10878 fputs (reg, file); 10879 if (duplicated) 10880 { 10881 if (ASSEMBLER_DIALECT == ASM_ATT) 10882 fprintf (file, ", %%%s", reg); 10883 else 10884 fprintf (file, ", %s", reg); 10885 } 10886 } 10887 10888 /* Locate some local-dynamic symbol still in use by this function 10889 so that we can print its name in some tls_local_dynamic_base 10890 pattern. */ 10891 10892 static int 10893 get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED) 10894 { 10895 rtx x = *px; 10896 10897 if (GET_CODE (x) == SYMBOL_REF 10898 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC) 10899 { 10900 cfun->machine->some_ld_name = XSTR (x, 0); 10901 return 1; 10902 } 10903 10904 return 0; 10905 } 10906 10907 static const char * 10908 get_some_local_dynamic_name (void) 10909 { 10910 rtx insn; 10911 10912 if (cfun->machine->some_ld_name) 10913 return cfun->machine->some_ld_name; 10914 10915 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn)) 10916 if (INSN_P (insn) 10917 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0)) 10918 return cfun->machine->some_ld_name; 10919 10920 gcc_unreachable (); 10921 } 10922 10923 /* Meaning of CODE: 10924 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand. 10925 C -- print opcode suffix for set/cmov insn. 10926 c -- like C, but print reversed condition 10927 F,f -- likewise, but for floating-point. 10928 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.", 10929 otherwise nothing 10930 R -- print the prefix for register names. 10931 z -- print the opcode suffix for the size of the current operand. 10932 * -- print a star (in certain assembler syntax) 10933 A -- print an absolute memory reference. 10934 w -- print the operand as if it's a "word" (HImode) even if it isn't. 10935 s -- print a shift double count, followed by the assemblers argument 10936 delimiter. 10937 b -- print the QImode name of the register for the indicated operand. 10938 %b0 would print %al if operands[0] is reg 0. 10939 w -- likewise, print the HImode name of the register. 10940 k -- likewise, print the SImode name of the register. 10941 q -- likewise, print the DImode name of the register. 10942 x -- likewise, print the V4SFmode name of the register. 10943 t -- likewise, print the V8SFmode name of the register. 10944 h -- print the QImode name for a "high" register, either ah, bh, ch or dh. 10945 y -- print "st(0)" instead of "st" as a register. 10946 d -- print duplicated register operand for AVX instruction. 10947 D -- print condition for SSE cmp instruction. 10948 P -- if PIC, print an @PLT suffix. 10949 X -- don't print any sort of PIC '@' suffix for a symbol. 10950 & -- print some in-use local-dynamic symbol name. 10951 H -- print a memory address offset by 8; used for sse high-parts 10952 Y -- print condition for SSE5 com* instruction. 10953 + -- print a branch hint as 'cs' or 'ds' prefix 10954 ; -- print a semicolon (after prefixes due to bug in older gas). 10955 */ 10956 10957 void 10958 print_operand (FILE *file, rtx x, int code) 10959 { 10960 if (code) 10961 { 10962 switch (code) 10963 { 10964 case '*': 10965 if (ASSEMBLER_DIALECT == ASM_ATT) 10966 putc ('*', file); 10967 return; 10968 10969 case '&': 10970 assemble_name (file, get_some_local_dynamic_name ()); 10971 return; 10972 10973 case 'A': 10974 switch (ASSEMBLER_DIALECT) 10975 { 10976 case ASM_ATT: 10977 putc ('*', file); 10978 break; 10979 10980 case ASM_INTEL: 10981 /* Intel syntax. For absolute addresses, registers should not 10982 be surrounded by braces. */ 10983 if (!REG_P (x)) 10984 { 10985 putc ('[', file); 10986 PRINT_OPERAND (file, x, 0); 10987 putc (']', file); 10988 return; 10989 } 10990 break; 10991 10992 default: 10993 gcc_unreachable (); 10994 } 10995 10996 PRINT_OPERAND (file, x, 0); 10997 return; 10998 10999 11000 case 'L': 11001 if (ASSEMBLER_DIALECT == ASM_ATT) 11002 putc ('l', file); 11003 return; 11004 11005 case 'W': 11006 if (ASSEMBLER_DIALECT == ASM_ATT) 11007 putc ('w', file); 11008 return; 11009 11010 case 'B': 11011 if (ASSEMBLER_DIALECT == ASM_ATT) 11012 putc ('b', file); 11013 return; 11014 11015 case 'Q': 11016 if (ASSEMBLER_DIALECT == ASM_ATT) 11017 putc ('l', file); 11018 return; 11019 11020 case 'S': 11021 if (ASSEMBLER_DIALECT == ASM_ATT) 11022 putc ('s', file); 11023 return; 11024 11025 case 'T': 11026 if (ASSEMBLER_DIALECT == ASM_ATT) 11027 putc ('t', file); 11028 return; 11029 11030 case 'z': 11031 /* 387 opcodes don't get size suffixes if the operands are 11032 registers. */ 11033 if (STACK_REG_P (x)) 11034 return; 11035 11036 /* Likewise if using Intel opcodes. */ 11037 if (ASSEMBLER_DIALECT == ASM_INTEL) 11038 return; 11039 11040 /* This is the size of op from size of operand. */ 11041 switch (GET_MODE_SIZE (GET_MODE (x))) 11042 { 11043 case 1: 11044 putc ('b', file); 11045 return; 11046 11047 case 2: 11048 if (MEM_P (x)) 11049 { 11050 #ifdef HAVE_GAS_FILDS_FISTS 11051 putc ('s', file); 11052 #endif 11053 return; 11054 } 11055 else 11056 putc ('w', file); 11057 return; 11058 11059 case 4: 11060 if (GET_MODE (x) == SFmode) 11061 { 11062 putc ('s', file); 11063 return; 11064 } 11065 else 11066 putc ('l', file); 11067 return; 11068 11069 case 12: 11070 case 16: 11071 putc ('t', file); 11072 return; 11073 11074 case 8: 11075 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT) 11076 { 11077 if (MEM_P (x)) 11078 { 11079 #ifdef GAS_MNEMONICS 11080 putc ('q', file); 11081 #else 11082 putc ('l', file); 11083 putc ('l', file); 11084 #endif 11085 } 11086 else 11087 putc ('q', file); 11088 } 11089 else 11090 putc ('l', file); 11091 return; 11092 11093 default: 11094 gcc_unreachable (); 11095 } 11096 11097 case 'd': 11098 case 'b': 11099 case 'w': 11100 case 'k': 11101 case 'q': 11102 case 'h': 11103 case 't': 11104 case 'y': 11105 case 'x': 11106 case 'X': 11107 case 'P': 11108 break; 11109 11110 case 's': 11111 if (CONST_INT_P (x) || ! SHIFT_DOUBLE_OMITS_COUNT) 11112 { 11113 PRINT_OPERAND (file, x, 0); 11114 fputs (", ", file); 11115 } 11116 return; 11117 11118 case 'D': 11119 /* Little bit of braindamage here. The SSE compare instructions 11120 does use completely different names for the comparisons that the 11121 fp conditional moves. */ 11122 if (TARGET_AVX) 11123 { 11124 switch (GET_CODE (x)) 11125 { 11126 case EQ: 11127 fputs ("eq", file); 11128 break; 11129 case UNEQ: 11130 fputs ("eq_us", file); 11131 break; 11132 case LT: 11133 fputs ("lt", file); 11134 break; 11135 case UNLT: 11136 fputs ("nge", file); 11137 break; 11138 case LE: 11139 fputs ("le", file); 11140 break; 11141 case UNLE: 11142 fputs ("ngt", file); 11143 break; 11144 case UNORDERED: 11145 fputs ("unord", file); 11146 break; 11147 case NE: 11148 fputs ("neq", file); 11149 break; 11150 case LTGT: 11151 fputs ("neq_oq", file); 11152 break; 11153 case GE: 11154 fputs ("ge", file); 11155 break; 11156 case UNGE: 11157 fputs ("nlt", file); 11158 break; 11159 case GT: 11160 fputs ("gt", file); 11161 break; 11162 case UNGT: 11163 fputs ("nle", file); 11164 break; 11165 case ORDERED: 11166 fputs ("ord", file); 11167 break; 11168 default: 11169 output_operand_lossage ("operand is not a condition code, invalid operand code 'D'"); 11170 return; 11171 } 11172 } 11173 else 11174 { 11175 switch (GET_CODE (x)) 11176 { 11177 case EQ: 11178 case UNEQ: 11179 fputs ("eq", file); 11180 break; 11181 case LT: 11182 case UNLT: 11183 fputs ("lt", file); 11184 break; 11185 case LE: 11186 case UNLE: 11187 fputs ("le", file); 11188 break; 11189 case UNORDERED: 11190 fputs ("unord", file); 11191 break; 11192 case NE: 11193 case LTGT: 11194 fputs ("neq", file); 11195 break; 11196 case UNGE: 11197 case GE: 11198 fputs ("nlt", file); 11199 break; 11200 case UNGT: 11201 case GT: 11202 fputs ("nle", file); 11203 break; 11204 case ORDERED: 11205 fputs ("ord", file); 11206 break; 11207 default: 11208 output_operand_lossage ("operand is not a condition code, invalid operand code 'D'"); 11209 return; 11210 } 11211 } 11212 return; 11213 case 'O': 11214 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX 11215 if (ASSEMBLER_DIALECT == ASM_ATT) 11216 { 11217 switch (GET_MODE (x)) 11218 { 11219 case HImode: putc ('w', file); break; 11220 case SImode: 11221 case SFmode: putc ('l', file); break; 11222 case DImode: 11223 case DFmode: putc ('q', file); break; 11224 default: gcc_unreachable (); 11225 } 11226 putc ('.', file); 11227 } 11228 #endif 11229 return; 11230 case 'C': 11231 if (!COMPARISON_P (x)) 11232 { 11233 output_operand_lossage ("operand is neither a constant nor a " 11234 "condition code, invalid operand code " 11235 "'C'"); 11236 return; 11237 } 11238 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file); 11239 return; 11240 case 'F': 11241 if (!COMPARISON_P (x)) 11242 { 11243 output_operand_lossage ("operand is neither a constant nor a " 11244 "condition code, invalid operand code " 11245 "'F'"); 11246 return; 11247 } 11248 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX 11249 if (ASSEMBLER_DIALECT == ASM_ATT) 11250 putc ('.', file); 11251 #endif 11252 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file); 11253 return; 11254 11255 /* Like above, but reverse condition */ 11256 case 'c': 11257 /* Check to see if argument to %c is really a constant 11258 and not a condition code which needs to be reversed. */ 11259 if (!COMPARISON_P (x)) 11260 { 11261 output_operand_lossage ("operand is neither a constant nor a " 11262 "condition code, invalid operand " 11263 "code 'c'"); 11264 return; 11265 } 11266 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file); 11267 return; 11268 case 'f': 11269 if (!COMPARISON_P (x)) 11270 { 11271 output_operand_lossage ("operand is neither a constant nor a " 11272 "condition code, invalid operand " 11273 "code 'f'"); 11274 return; 11275 } 11276 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX 11277 if (ASSEMBLER_DIALECT == ASM_ATT) 11278 putc ('.', file); 11279 #endif 11280 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file); 11281 return; 11282 11283 case 'H': 11284 /* It doesn't actually matter what mode we use here, as we're 11285 only going to use this for printing. */ 11286 x = adjust_address_nv (x, DImode, 8); 11287 break; 11288 11289 case '+': 11290 { 11291 rtx x; 11292 11293 if (!optimize 11294 || optimize_function_for_size_p (cfun) || !TARGET_BRANCH_PREDICTION_HINTS) 11295 return; 11296 11297 x = find_reg_note (current_output_insn, REG_BR_PROB, 0); 11298 if (x) 11299 { 11300 int pred_val = INTVAL (XEXP (x, 0)); 11301 11302 if (pred_val < REG_BR_PROB_BASE * 45 / 100 11303 || pred_val > REG_BR_PROB_BASE * 55 / 100) 11304 { 11305 int taken = pred_val > REG_BR_PROB_BASE / 2; 11306 int cputaken = final_forward_branch_p (current_output_insn) == 0; 11307 11308 /* Emit hints only in the case default branch prediction 11309 heuristics would fail. */ 11310 if (taken != cputaken) 11311 { 11312 /* We use 3e (DS) prefix for taken branches and 11313 2e (CS) prefix for not taken branches. */ 11314 if (taken) 11315 fputs ("ds ; ", file); 11316 else 11317 fputs ("cs ; ", file); 11318 } 11319 } 11320 } 11321 return; 11322 } 11323 11324 case 'Y': 11325 switch (GET_CODE (x)) 11326 { 11327 case NE: 11328 fputs ("neq", file); 11329 break; 11330 case EQ: 11331 fputs ("eq", file); 11332 break; 11333 case GE: 11334 case GEU: 11335 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "ge" : "unlt", file); 11336 break; 11337 case GT: 11338 case GTU: 11339 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "gt" : "unle", file); 11340 break; 11341 case LE: 11342 case LEU: 11343 fputs ("le", file); 11344 break; 11345 case LT: 11346 case LTU: 11347 fputs ("lt", file); 11348 break; 11349 case UNORDERED: 11350 fputs ("unord", file); 11351 break; 11352 case ORDERED: 11353 fputs ("ord", file); 11354 break; 11355 case UNEQ: 11356 fputs ("ueq", file); 11357 break; 11358 case UNGE: 11359 fputs ("nlt", file); 11360 break; 11361 case UNGT: 11362 fputs ("nle", file); 11363 break; 11364 case UNLE: 11365 fputs ("ule", file); 11366 break; 11367 case UNLT: 11368 fputs ("ult", file); 11369 break; 11370 case LTGT: 11371 fputs ("une", file); 11372 break; 11373 default: 11374 output_operand_lossage ("operand is not a condition code, invalid operand code 'D'"); 11375 return; 11376 } 11377 return; 11378 11379 case ';': 11380 #if TARGET_MACHO 11381 fputs (" ; ", file); 11382 #else 11383 fputc (' ', file); 11384 #endif 11385 return; 11386 11387 default: 11388 output_operand_lossage ("invalid operand code '%c'", code); 11389 } 11390 } 11391 11392 if (REG_P (x)) 11393 print_reg (x, code, file); 11394 11395 else if (MEM_P (x)) 11396 { 11397 /* No `byte ptr' prefix for call instructions or BLKmode operands. */ 11398 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P' 11399 && GET_MODE (x) != BLKmode) 11400 { 11401 const char * size; 11402 switch (GET_MODE_SIZE (GET_MODE (x))) 11403 { 11404 case 1: size = "BYTE"; break; 11405 case 2: size = "WORD"; break; 11406 case 4: size = "DWORD"; break; 11407 case 8: size = "QWORD"; break; 11408 case 12: size = "TBYTE"; break; 11409 case 16: 11410 if (GET_MODE (x) == XFmode) 11411 size = "TBYTE"; 11412 else 11413 size = "XMMWORD"; 11414 break; 11415 case 32: size = "YMMWORD"; break; 11416 default: 11417 gcc_unreachable (); 11418 } 11419 11420 /* Check for explicit size override (codes 'b', 'w' and 'k') */ 11421 if (code == 'b') 11422 size = "BYTE"; 11423 else if (code == 'w') 11424 size = "WORD"; 11425 else if (code == 'k') 11426 size = "DWORD"; 11427 11428 fputs (size, file); 11429 fputs (" PTR ", file); 11430 } 11431 11432 x = XEXP (x, 0); 11433 /* Avoid (%rip) for call operands. */ 11434 if (CONSTANT_ADDRESS_P (x) && code == 'P' 11435 && !CONST_INT_P (x)) 11436 output_addr_const (file, x); 11437 else if (this_is_asm_operands && ! address_operand (x, VOIDmode)) 11438 output_operand_lossage ("invalid constraints for operand"); 11439 else 11440 output_address (x); 11441 } 11442 11443 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode) 11444 { 11445 REAL_VALUE_TYPE r; 11446 long l; 11447 11448 REAL_VALUE_FROM_CONST_DOUBLE (r, x); 11449 REAL_VALUE_TO_TARGET_SINGLE (r, l); 11450 11451 if (ASSEMBLER_DIALECT == ASM_ATT) 11452 putc ('$', file); 11453 fprintf (file, "0x%08lx", (long unsigned int) l); 11454 } 11455 11456 /* These float cases don't actually occur as immediate operands. */ 11457 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode) 11458 { 11459 char dstr[30]; 11460 11461 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1); 11462 fprintf (file, "%s", dstr); 11463 } 11464 11465 else if (GET_CODE (x) == CONST_DOUBLE 11466 && GET_MODE (x) == XFmode) 11467 { 11468 char dstr[30]; 11469 11470 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1); 11471 fprintf (file, "%s", dstr); 11472 } 11473 11474 else 11475 { 11476 /* We have patterns that allow zero sets of memory, for instance. 11477 In 64-bit mode, we should probably support all 8-byte vectors, 11478 since we can in fact encode that into an immediate. */ 11479 if (GET_CODE (x) == CONST_VECTOR) 11480 { 11481 gcc_assert (x == CONST0_RTX (GET_MODE (x))); 11482 x = const0_rtx; 11483 } 11484 11485 if (code != 'P') 11486 { 11487 if (CONST_INT_P (x) || GET_CODE (x) == CONST_DOUBLE) 11488 { 11489 if (ASSEMBLER_DIALECT == ASM_ATT) 11490 putc ('$', file); 11491 } 11492 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF 11493 || GET_CODE (x) == LABEL_REF) 11494 { 11495 if (ASSEMBLER_DIALECT == ASM_ATT) 11496 putc ('$', file); 11497 else 11498 fputs ("OFFSET FLAT:", file); 11499 } 11500 } 11501 if (CONST_INT_P (x)) 11502 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x)); 11503 else if (flag_pic) 11504 output_pic_addr_const (file, x, code); 11505 else 11506 output_addr_const (file, x); 11507 } 11508 } 11509 11510 /* Print a memory operand whose address is ADDR. */ 11511 11512 void 11513 print_operand_address (FILE *file, rtx addr) 11514 { 11515 struct ix86_address parts; 11516 rtx base, index, disp; 11517 int scale; 11518 int ok = ix86_decompose_address (addr, &parts); 11519 11520 gcc_assert (ok); 11521 11522 base = parts.base; 11523 index = parts.index; 11524 disp = parts.disp; 11525 scale = parts.scale; 11526 11527 switch (parts.seg) 11528 { 11529 case SEG_DEFAULT: 11530 break; 11531 case SEG_FS: 11532 case SEG_GS: 11533 if (ASSEMBLER_DIALECT == ASM_ATT) 11534 putc ('%', file); 11535 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file); 11536 break; 11537 default: 11538 gcc_unreachable (); 11539 } 11540 11541 /* Use one byte shorter RIP relative addressing for 64bit mode. */ 11542 if (TARGET_64BIT && !base && !index) 11543 { 11544 rtx symbol = disp; 11545 11546 if (GET_CODE (disp) == CONST 11547 && GET_CODE (XEXP (disp, 0)) == PLUS 11548 && CONST_INT_P (XEXP (XEXP (disp, 0), 1))) 11549 symbol = XEXP (XEXP (disp, 0), 0); 11550 11551 if (GET_CODE (symbol) == LABEL_REF 11552 || (GET_CODE (symbol) == SYMBOL_REF 11553 && SYMBOL_REF_TLS_MODEL (symbol) == 0)) 11554 base = pc_rtx; 11555 } 11556 if (!base && !index) 11557 { 11558 /* Displacement only requires special attention. */ 11559 11560 if (CONST_INT_P (disp)) 11561 { 11562 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT) 11563 fputs ("ds:", file); 11564 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp)); 11565 } 11566 else if (flag_pic) 11567 output_pic_addr_const (file, disp, 0); 11568 else 11569 output_addr_const (file, disp); 11570 } 11571 else 11572 { 11573 if (ASSEMBLER_DIALECT == ASM_ATT) 11574 { 11575 if (disp) 11576 { 11577 if (flag_pic) 11578 output_pic_addr_const (file, disp, 0); 11579 else if (GET_CODE (disp) == LABEL_REF) 11580 output_asm_label (disp); 11581 else 11582 output_addr_const (file, disp); 11583 } 11584 11585 putc ('(', file); 11586 if (base) 11587 print_reg (base, 0, file); 11588 if (index) 11589 { 11590 putc (',', file); 11591 print_reg (index, 0, file); 11592 if (scale != 1) 11593 fprintf (file, ",%d", scale); 11594 } 11595 putc (')', file); 11596 } 11597 else 11598 { 11599 rtx offset = NULL_RTX; 11600 11601 if (disp) 11602 { 11603 /* Pull out the offset of a symbol; print any symbol itself. */ 11604 if (GET_CODE (disp) == CONST 11605 && GET_CODE (XEXP (disp, 0)) == PLUS 11606 && CONST_INT_P (XEXP (XEXP (disp, 0), 1))) 11607 { 11608 offset = XEXP (XEXP (disp, 0), 1); 11609 disp = gen_rtx_CONST (VOIDmode, 11610 XEXP (XEXP (disp, 0), 0)); 11611 } 11612 11613 if (flag_pic) 11614 output_pic_addr_const (file, disp, 0); 11615 else if (GET_CODE (disp) == LABEL_REF) 11616 output_asm_label (disp); 11617 else if (CONST_INT_P (disp)) 11618 offset = disp; 11619 else 11620 output_addr_const (file, disp); 11621 } 11622 11623 putc ('[', file); 11624 if (base) 11625 { 11626 print_reg (base, 0, file); 11627 if (offset) 11628 { 11629 if (INTVAL (offset) >= 0) 11630 putc ('+', file); 11631 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset)); 11632 } 11633 } 11634 else if (offset) 11635 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset)); 11636 else 11637 putc ('0', file); 11638 11639 if (index) 11640 { 11641 putc ('+', file); 11642 print_reg (index, 0, file); 11643 if (scale != 1) 11644 fprintf (file, "*%d", scale); 11645 } 11646 putc (']', file); 11647 } 11648 } 11649 } 11650 11651 bool 11652 output_addr_const_extra (FILE *file, rtx x) 11653 { 11654 rtx op; 11655 11656 if (GET_CODE (x) != UNSPEC) 11657 return false; 11658 11659 op = XVECEXP (x, 0, 0); 11660 switch (XINT (x, 1)) 11661 { 11662 case UNSPEC_GOTTPOFF: 11663 output_addr_const (file, op); 11664 /* FIXME: This might be @TPOFF in Sun ld. */ 11665 fputs ("@GOTTPOFF", file); 11666 break; 11667 case UNSPEC_TPOFF: 11668 output_addr_const (file, op); 11669 fputs ("@TPOFF", file); 11670 break; 11671 case UNSPEC_NTPOFF: 11672 output_addr_const (file, op); 11673 if (TARGET_64BIT) 11674 fputs ("@TPOFF", file); 11675 else 11676 fputs ("@NTPOFF", file); 11677 break; 11678 case UNSPEC_DTPOFF: 11679 output_addr_const (file, op); 11680 fputs ("@DTPOFF", file); 11681 break; 11682 case UNSPEC_GOTNTPOFF: 11683 output_addr_const (file, op); 11684 if (TARGET_64BIT) 11685 fputs (ASSEMBLER_DIALECT == ASM_ATT ? 11686 "@GOTTPOFF(%rip)" : "@GOTTPOFF[rip]", file); 11687 else 11688 fputs ("@GOTNTPOFF", file); 11689 break; 11690 case UNSPEC_INDNTPOFF: 11691 output_addr_const (file, op); 11692 fputs ("@INDNTPOFF", file); 11693 break; 11694 #if TARGET_MACHO 11695 case UNSPEC_MACHOPIC_OFFSET: 11696 output_addr_const (file, op); 11697 putc ('-', file); 11698 machopic_output_function_base_name (file); 11699 break; 11700 #endif 11701 11702 default: 11703 return false; 11704 } 11705 11706 return true; 11707 } 11708 11709 /* Split one or more DImode RTL references into pairs of SImode 11710 references. The RTL can be REG, offsettable MEM, integer constant, or 11711 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to 11712 split and "num" is its length. lo_half and hi_half are output arrays 11713 that parallel "operands". */ 11714 11715 void 11716 split_di (rtx operands[], int num, rtx lo_half[], rtx hi_half[]) 11717 { 11718 while (num--) 11719 { 11720 rtx op = operands[num]; 11721 11722 /* simplify_subreg refuse to split volatile memory addresses, 11723 but we still have to handle it. */ 11724 if (MEM_P (op)) 11725 { 11726 lo_half[num] = adjust_address (op, SImode, 0); 11727 hi_half[num] = adjust_address (op, SImode, 4); 11728 } 11729 else 11730 { 11731 lo_half[num] = simplify_gen_subreg (SImode, op, 11732 GET_MODE (op) == VOIDmode 11733 ? DImode : GET_MODE (op), 0); 11734 hi_half[num] = simplify_gen_subreg (SImode, op, 11735 GET_MODE (op) == VOIDmode 11736 ? DImode : GET_MODE (op), 4); 11737 } 11738 } 11739 } 11740 /* Split one or more TImode RTL references into pairs of DImode 11741 references. The RTL can be REG, offsettable MEM, integer constant, or 11742 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to 11743 split and "num" is its length. lo_half and hi_half are output arrays 11744 that parallel "operands". */ 11745 11746 void 11747 split_ti (rtx operands[], int num, rtx lo_half[], rtx hi_half[]) 11748 { 11749 while (num--) 11750 { 11751 rtx op = operands[num]; 11752 11753 /* simplify_subreg refuse to split volatile memory addresses, but we 11754 still have to handle it. */ 11755 if (MEM_P (op)) 11756 { 11757 lo_half[num] = adjust_address (op, DImode, 0); 11758 hi_half[num] = adjust_address (op, DImode, 8); 11759 } 11760 else 11761 { 11762 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0); 11763 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8); 11764 } 11765 } 11766 } 11767 11768 /* Output code to perform a 387 binary operation in INSN, one of PLUS, 11769 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3] 11770 is the expression of the binary operation. The output may either be 11771 emitted here, or returned to the caller, like all output_* functions. 11772 11773 There is no guarantee that the operands are the same mode, as they 11774 might be within FLOAT or FLOAT_EXTEND expressions. */ 11775 11776 #ifndef SYSV386_COMPAT 11777 /* Set to 1 for compatibility with brain-damaged assemblers. No-one 11778 wants to fix the assemblers because that causes incompatibility 11779 with gcc. No-one wants to fix gcc because that causes 11780 incompatibility with assemblers... You can use the option of 11781 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */ 11782 #define SYSV386_COMPAT 1 11783 #endif 11784 11785 const char * 11786 output_387_binary_op (rtx insn, rtx *operands) 11787 { 11788 static char buf[40]; 11789 const char *p; 11790 const char *ssep; 11791 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]); 11792 11793 #ifdef ENABLE_CHECKING 11794 /* Even if we do not want to check the inputs, this documents input 11795 constraints. Which helps in understanding the following code. */ 11796 if (STACK_REG_P (operands[0]) 11797 && ((REG_P (operands[1]) 11798 && REGNO (operands[0]) == REGNO (operands[1]) 11799 && (STACK_REG_P (operands[2]) || MEM_P (operands[2]))) 11800 || (REG_P (operands[2]) 11801 && REGNO (operands[0]) == REGNO (operands[2]) 11802 && (STACK_REG_P (operands[1]) || MEM_P (operands[1])))) 11803 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2]))) 11804 ; /* ok */ 11805 else 11806 gcc_assert (is_sse); 11807 #endif 11808 11809 switch (GET_CODE (operands[3])) 11810 { 11811 case PLUS: 11812 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT 11813 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT) 11814 p = "fiadd"; 11815 else 11816 p = "fadd"; 11817 ssep = "vadd"; 11818 break; 11819 11820 case MINUS: 11821 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT 11822 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT) 11823 p = "fisub"; 11824 else 11825 p = "fsub"; 11826 ssep = "vsub"; 11827 break; 11828 11829 case MULT: 11830 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT 11831 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT) 11832 p = "fimul"; 11833 else 11834 p = "fmul"; 11835 ssep = "vmul"; 11836 break; 11837 11838 case DIV: 11839 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT 11840 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT) 11841 p = "fidiv"; 11842 else 11843 p = "fdiv"; 11844 ssep = "vdiv"; 11845 break; 11846 11847 default: 11848 gcc_unreachable (); 11849 } 11850 11851 if (is_sse) 11852 { 11853 if (TARGET_AVX) 11854 { 11855 strcpy (buf, ssep); 11856 if (GET_MODE (operands[0]) == SFmode) 11857 strcat (buf, "ss\t{%2, %1, %0|%0, %1, %2}"); 11858 else 11859 strcat (buf, "sd\t{%2, %1, %0|%0, %1, %2}"); 11860 } 11861 else 11862 { 11863 strcpy (buf, ssep + 1); 11864 if (GET_MODE (operands[0]) == SFmode) 11865 strcat (buf, "ss\t{%2, %0|%0, %2}"); 11866 else 11867 strcat (buf, "sd\t{%2, %0|%0, %2}"); 11868 } 11869 return buf; 11870 } 11871 strcpy (buf, p); 11872 11873 switch (GET_CODE (operands[3])) 11874 { 11875 case MULT: 11876 case PLUS: 11877 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2])) 11878 { 11879 rtx temp = operands[2]; 11880 operands[2] = operands[1]; 11881 operands[1] = temp; 11882 } 11883 11884 /* know operands[0] == operands[1]. */ 11885 11886 if (MEM_P (operands[2])) 11887 { 11888 p = "%z2\t%2"; 11889 break; 11890 } 11891 11892 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2]))) 11893 { 11894 if (STACK_TOP_P (operands[0])) 11895 /* How is it that we are storing to a dead operand[2]? 11896 Well, presumably operands[1] is dead too. We can't 11897 store the result to st(0) as st(0) gets popped on this 11898 instruction. Instead store to operands[2] (which I 11899 think has to be st(1)). st(1) will be popped later. 11900 gcc <= 2.8.1 didn't have this check and generated 11901 assembly code that the Unixware assembler rejected. */ 11902 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */ 11903 else 11904 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */ 11905 break; 11906 } 11907 11908 if (STACK_TOP_P (operands[0])) 11909 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */ 11910 else 11911 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */ 11912 break; 11913 11914 case MINUS: 11915 case DIV: 11916 if (MEM_P (operands[1])) 11917 { 11918 p = "r%z1\t%1"; 11919 break; 11920 } 11921 11922 if (MEM_P (operands[2])) 11923 { 11924 p = "%z2\t%2"; 11925 break; 11926 } 11927 11928 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2]))) 11929 { 11930 #if SYSV386_COMPAT 11931 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T 11932 derived assemblers, confusingly reverse the direction of 11933 the operation for fsub{r} and fdiv{r} when the 11934 destination register is not st(0). The Intel assembler 11935 doesn't have this brain damage. Read !SYSV386_COMPAT to 11936 figure out what the hardware really does. */ 11937 if (STACK_TOP_P (operands[0])) 11938 p = "{p\t%0, %2|rp\t%2, %0}"; 11939 else 11940 p = "{rp\t%2, %0|p\t%0, %2}"; 11941 #else 11942 if (STACK_TOP_P (operands[0])) 11943 /* As above for fmul/fadd, we can't store to st(0). */ 11944 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */ 11945 else 11946 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */ 11947 #endif 11948 break; 11949 } 11950 11951 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1]))) 11952 { 11953 #if SYSV386_COMPAT 11954 if (STACK_TOP_P (operands[0])) 11955 p = "{rp\t%0, %1|p\t%1, %0}"; 11956 else 11957 p = "{p\t%1, %0|rp\t%0, %1}"; 11958 #else 11959 if (STACK_TOP_P (operands[0])) 11960 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */ 11961 else 11962 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */ 11963 #endif 11964 break; 11965 } 11966 11967 if (STACK_TOP_P (operands[0])) 11968 { 11969 if (STACK_TOP_P (operands[1])) 11970 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */ 11971 else 11972 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */ 11973 break; 11974 } 11975 else if (STACK_TOP_P (operands[1])) 11976 { 11977 #if SYSV386_COMPAT 11978 p = "{\t%1, %0|r\t%0, %1}"; 11979 #else 11980 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */ 11981 #endif 11982 } 11983 else 11984 { 11985 #if SYSV386_COMPAT 11986 p = "{r\t%2, %0|\t%0, %2}"; 11987 #else 11988 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */ 11989 #endif 11990 } 11991 break; 11992 11993 default: 11994 gcc_unreachable (); 11995 } 11996 11997 strcat (buf, p); 11998 return buf; 11999 } 12000 12001 /* Return needed mode for entity in optimize_mode_switching pass. */ 12002 12003 int 12004 ix86_mode_needed (int entity, rtx insn) 12005 { 12006 enum attr_i387_cw mode; 12007 12008 /* The mode UNINITIALIZED is used to store control word after a 12009 function call or ASM pattern. The mode ANY specify that function 12010 has no requirements on the control word and make no changes in the 12011 bits we are interested in. */ 12012 12013 if (CALL_P (insn) 12014 || (NONJUMP_INSN_P (insn) 12015 && (asm_noperands (PATTERN (insn)) >= 0 12016 || GET_CODE (PATTERN (insn)) == ASM_INPUT))) 12017 return I387_CW_UNINITIALIZED; 12018 12019 if (recog_memoized (insn) < 0) 12020 return I387_CW_ANY; 12021 12022 mode = get_attr_i387_cw (insn); 12023 12024 switch (entity) 12025 { 12026 case I387_TRUNC: 12027 if (mode == I387_CW_TRUNC) 12028 return mode; 12029 break; 12030 12031 case I387_FLOOR: 12032 if (mode == I387_CW_FLOOR) 12033 return mode; 12034 break; 12035 12036 case I387_CEIL: 12037 if (mode == I387_CW_CEIL) 12038 return mode; 12039 break; 12040 12041 case I387_MASK_PM: 12042 if (mode == I387_CW_MASK_PM) 12043 return mode; 12044 break; 12045 12046 default: 12047 gcc_unreachable (); 12048 } 12049 12050 return I387_CW_ANY; 12051 } 12052 12053 /* Output code to initialize control word copies used by trunc?f?i and 12054 rounding patterns. CURRENT_MODE is set to current control word, 12055 while NEW_MODE is set to new control word. */ 12056 12057 void 12058 emit_i387_cw_initialization (int mode) 12059 { 12060 rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED); 12061 rtx new_mode; 12062 12063 enum ix86_stack_slot slot; 12064 12065 rtx reg = gen_reg_rtx (HImode); 12066 12067 emit_insn (gen_x86_fnstcw_1 (stored_mode)); 12068 emit_move_insn (reg, copy_rtx (stored_mode)); 12069 12070 if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL 12071 || optimize_function_for_size_p (cfun)) 12072 { 12073 switch (mode) 12074 { 12075 case I387_CW_TRUNC: 12076 /* round toward zero (truncate) */ 12077 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00))); 12078 slot = SLOT_CW_TRUNC; 12079 break; 12080 12081 case I387_CW_FLOOR: 12082 /* round down toward -oo */ 12083 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00))); 12084 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400))); 12085 slot = SLOT_CW_FLOOR; 12086 break; 12087 12088 case I387_CW_CEIL: 12089 /* round up toward +oo */ 12090 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00))); 12091 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800))); 12092 slot = SLOT_CW_CEIL; 12093 break; 12094 12095 case I387_CW_MASK_PM: 12096 /* mask precision exception for nearbyint() */ 12097 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020))); 12098 slot = SLOT_CW_MASK_PM; 12099 break; 12100 12101 default: 12102 gcc_unreachable (); 12103 } 12104 } 12105 else 12106 { 12107 switch (mode) 12108 { 12109 case I387_CW_TRUNC: 12110 /* round toward zero (truncate) */ 12111 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc))); 12112 slot = SLOT_CW_TRUNC; 12113 break; 12114 12115 case I387_CW_FLOOR: 12116 /* round down toward -oo */ 12117 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4))); 12118 slot = SLOT_CW_FLOOR; 12119 break; 12120 12121 case I387_CW_CEIL: 12122 /* round up toward +oo */ 12123 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8))); 12124 slot = SLOT_CW_CEIL; 12125 break; 12126 12127 case I387_CW_MASK_PM: 12128 /* mask precision exception for nearbyint() */ 12129 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020))); 12130 slot = SLOT_CW_MASK_PM; 12131 break; 12132 12133 default: 12134 gcc_unreachable (); 12135 } 12136 } 12137 12138 gcc_assert (slot < MAX_386_STACK_LOCALS); 12139 12140 new_mode = assign_386_stack_local (HImode, slot); 12141 emit_move_insn (new_mode, reg); 12142 } 12143 12144 /* Output code for INSN to convert a float to a signed int. OPERANDS 12145 are the insn operands. The output may be [HSD]Imode and the input 12146 operand may be [SDX]Fmode. */ 12147 12148 const char * 12149 output_fix_trunc (rtx insn, rtx *operands, int fisttp) 12150 { 12151 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0; 12152 int dimode_p = GET_MODE (operands[0]) == DImode; 12153 int round_mode = get_attr_i387_cw (insn); 12154 12155 /* Jump through a hoop or two for DImode, since the hardware has no 12156 non-popping instruction. We used to do this a different way, but 12157 that was somewhat fragile and broke with post-reload splitters. */ 12158 if ((dimode_p || fisttp) && !stack_top_dies) 12159 output_asm_insn ("fld\t%y1", operands); 12160 12161 gcc_assert (STACK_TOP_P (operands[1])); 12162 gcc_assert (MEM_P (operands[0])); 12163 gcc_assert (GET_MODE (operands[1]) != TFmode); 12164 12165 if (fisttp) 12166 output_asm_insn ("fisttp%z0\t%0", operands); 12167 else 12168 { 12169 if (round_mode != I387_CW_ANY) 12170 output_asm_insn ("fldcw\t%3", operands); 12171 if (stack_top_dies || dimode_p) 12172 output_asm_insn ("fistp%z0\t%0", operands); 12173 else 12174 output_asm_insn ("fist%z0\t%0", operands); 12175 if (round_mode != I387_CW_ANY) 12176 output_asm_insn ("fldcw\t%2", operands); 12177 } 12178 12179 return ""; 12180 } 12181 12182 /* Output code for x87 ffreep insn. The OPNO argument, which may only 12183 have the values zero or one, indicates the ffreep insn's operand 12184 from the OPERANDS array. */ 12185 12186 static const char * 12187 output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno) 12188 { 12189 if (TARGET_USE_FFREEP) 12190 #ifdef HAVE_AS_IX86_FFREEP 12191 return opno ? "ffreep\t%y1" : "ffreep\t%y0"; 12192 #else 12193 { 12194 static char retval[32]; 12195 int regno = REGNO (operands[opno]); 12196 12197 gcc_assert (FP_REGNO_P (regno)); 12198 12199 regno -= FIRST_STACK_REG; 12200 12201 snprintf (retval, sizeof (retval), ASM_SHORT "0xc%ddf", regno); 12202 return retval; 12203 } 12204 #endif 12205 12206 return opno ? "fstp\t%y1" : "fstp\t%y0"; 12207 } 12208 12209 12210 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi 12211 should be used. UNORDERED_P is true when fucom should be used. */ 12212 12213 const char * 12214 output_fp_compare (rtx insn, rtx *operands, int eflags_p, int unordered_p) 12215 { 12216 int stack_top_dies; 12217 rtx cmp_op0, cmp_op1; 12218 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]); 12219 12220 if (eflags_p) 12221 { 12222 cmp_op0 = operands[0]; 12223 cmp_op1 = operands[1]; 12224 } 12225 else 12226 { 12227 cmp_op0 = operands[1]; 12228 cmp_op1 = operands[2]; 12229 } 12230 12231 if (is_sse) 12232 { 12233 static const char ucomiss[] = "vucomiss\t{%1, %0|%0, %1}"; 12234 static const char ucomisd[] = "vucomisd\t{%1, %0|%0, %1}"; 12235 static const char comiss[] = "vcomiss\t{%1, %0|%0, %1}"; 12236 static const char comisd[] = "vcomisd\t{%1, %0|%0, %1}"; 12237 12238 if (GET_MODE (operands[0]) == SFmode) 12239 if (unordered_p) 12240 return &ucomiss[TARGET_AVX ? 0 : 1]; 12241 else 12242 return &comiss[TARGET_AVX ? 0 : 1]; 12243 else 12244 if (unordered_p) 12245 return &ucomisd[TARGET_AVX ? 0 : 1]; 12246 else 12247 return &comisd[TARGET_AVX ? 0 : 1]; 12248 } 12249 12250 gcc_assert (STACK_TOP_P (cmp_op0)); 12251 12252 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0; 12253 12254 if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1))) 12255 { 12256 if (stack_top_dies) 12257 { 12258 output_asm_insn ("ftst\n\tfnstsw\t%0", operands); 12259 return output_387_ffreep (operands, 1); 12260 } 12261 else 12262 return "ftst\n\tfnstsw\t%0"; 12263 } 12264 12265 if (STACK_REG_P (cmp_op1) 12266 && stack_top_dies 12267 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1)) 12268 && REGNO (cmp_op1) != FIRST_STACK_REG) 12269 { 12270 /* If both the top of the 387 stack dies, and the other operand 12271 is also a stack register that dies, then this must be a 12272 `fcompp' float compare */ 12273 12274 if (eflags_p) 12275 { 12276 /* There is no double popping fcomi variant. Fortunately, 12277 eflags is immune from the fstp's cc clobbering. */ 12278 if (unordered_p) 12279 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands); 12280 else 12281 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands); 12282 return output_387_ffreep (operands, 0); 12283 } 12284 else 12285 { 12286 if (unordered_p) 12287 return "fucompp\n\tfnstsw\t%0"; 12288 else 12289 return "fcompp\n\tfnstsw\t%0"; 12290 } 12291 } 12292 else 12293 { 12294 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */ 12295 12296 static const char * const alt[16] = 12297 { 12298 "fcom%z2\t%y2\n\tfnstsw\t%0", 12299 "fcomp%z2\t%y2\n\tfnstsw\t%0", 12300 "fucom%z2\t%y2\n\tfnstsw\t%0", 12301 "fucomp%z2\t%y2\n\tfnstsw\t%0", 12302 12303 "ficom%z2\t%y2\n\tfnstsw\t%0", 12304 "ficomp%z2\t%y2\n\tfnstsw\t%0", 12305 NULL, 12306 NULL, 12307 12308 "fcomi\t{%y1, %0|%0, %y1}", 12309 "fcomip\t{%y1, %0|%0, %y1}", 12310 "fucomi\t{%y1, %0|%0, %y1}", 12311 "fucomip\t{%y1, %0|%0, %y1}", 12312 12313 NULL, 12314 NULL, 12315 NULL, 12316 NULL 12317 }; 12318 12319 int mask; 12320 const char *ret; 12321 12322 mask = eflags_p << 3; 12323 mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2; 12324 mask |= unordered_p << 1; 12325 mask |= stack_top_dies; 12326 12327 gcc_assert (mask < 16); 12328 ret = alt[mask]; 12329 gcc_assert (ret); 12330 12331 return ret; 12332 } 12333 } 12334 12335 void 12336 ix86_output_addr_vec_elt (FILE *file, int value) 12337 { 12338 const char *directive = ASM_LONG; 12339 12340 #ifdef ASM_QUAD 12341 if (TARGET_64BIT) 12342 directive = ASM_QUAD; 12343 #else 12344 gcc_assert (!TARGET_64BIT); 12345 #endif 12346 12347 fprintf (file, "%s%s%d\n", directive, LPREFIX, value); 12348 } 12349 12350 void 12351 ix86_output_addr_diff_elt (FILE *file, int value, int rel) 12352 { 12353 const char *directive = ASM_LONG; 12354 12355 #ifdef ASM_QUAD 12356 if (TARGET_64BIT && CASE_VECTOR_MODE == DImode) 12357 directive = ASM_QUAD; 12358 #else 12359 gcc_assert (!TARGET_64BIT); 12360 #endif 12361 /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */ 12362 if (TARGET_64BIT || TARGET_VXWORKS_RTP) 12363 fprintf (file, "%s%s%d-%s%d\n", 12364 directive, LPREFIX, value, LPREFIX, rel); 12365 else if (HAVE_AS_GOTOFF_IN_DATA) 12366 fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value); 12367 #if TARGET_MACHO 12368 else if (TARGET_MACHO) 12369 { 12370 fprintf (file, "%s%s%d-", ASM_LONG, LPREFIX, value); 12371 machopic_output_function_base_name (file); 12372 fprintf(file, "\n"); 12373 } 12374 #endif 12375 else 12376 asm_fprintf (file, "%s%U%s+[.-%s%d]\n", 12377 ASM_LONG, GOT_SYMBOL_NAME, LPREFIX, value); 12378 } 12379 12380 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate 12381 for the target. */ 12382 12383 void 12384 ix86_expand_clear (rtx dest) 12385 { 12386 rtx tmp; 12387 12388 /* We play register width games, which are only valid after reload. */ 12389 gcc_assert (reload_completed); 12390 12391 /* Avoid HImode and its attendant prefix byte. */ 12392 if (GET_MODE_SIZE (GET_MODE (dest)) < 4) 12393 dest = gen_rtx_REG (SImode, REGNO (dest)); 12394 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx); 12395 12396 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */ 12397 if (reload_completed && (!TARGET_USE_MOV0 || optimize_insn_for_speed_p ())) 12398 { 12399 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG)); 12400 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob)); 12401 } 12402 12403 emit_insn (tmp); 12404 } 12405 12406 /* X is an unchanging MEM. If it is a constant pool reference, return 12407 the constant pool rtx, else NULL. */ 12408 12409 rtx 12410 maybe_get_pool_constant (rtx x) 12411 { 12412 x = ix86_delegitimize_address (XEXP (x, 0)); 12413 12414 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x)) 12415 return get_pool_constant (x); 12416 12417 return NULL_RTX; 12418 } 12419 12420 void 12421 ix86_expand_move (enum machine_mode mode, rtx operands[]) 12422 { 12423 rtx op0, op1; 12424 enum tls_model model; 12425 12426 op0 = operands[0]; 12427 op1 = operands[1]; 12428 12429 if (GET_CODE (op1) == SYMBOL_REF) 12430 { 12431 model = SYMBOL_REF_TLS_MODEL (op1); 12432 if (model) 12433 { 12434 op1 = legitimize_tls_address (op1, model, true); 12435 op1 = force_operand (op1, op0); 12436 if (op1 == op0) 12437 return; 12438 } 12439 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES 12440 && SYMBOL_REF_DLLIMPORT_P (op1)) 12441 op1 = legitimize_dllimport_symbol (op1, false); 12442 } 12443 else if (GET_CODE (op1) == CONST 12444 && GET_CODE (XEXP (op1, 0)) == PLUS 12445 && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF) 12446 { 12447 rtx addend = XEXP (XEXP (op1, 0), 1); 12448 rtx symbol = XEXP (XEXP (op1, 0), 0); 12449 rtx tmp = NULL; 12450 12451 model = SYMBOL_REF_TLS_MODEL (symbol); 12452 if (model) 12453 tmp = legitimize_tls_address (symbol, model, true); 12454 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES 12455 && SYMBOL_REF_DLLIMPORT_P (symbol)) 12456 tmp = legitimize_dllimport_symbol (symbol, true); 12457 12458 if (tmp) 12459 { 12460 tmp = force_operand (tmp, NULL); 12461 tmp = expand_simple_binop (Pmode, PLUS, tmp, addend, 12462 op0, 1, OPTAB_DIRECT); 12463 if (tmp == op0) 12464 return; 12465 } 12466 } 12467 12468 if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode)) 12469 { 12470 if (TARGET_MACHO && !TARGET_64BIT) 12471 { 12472 #if TARGET_MACHO 12473 if (MACHOPIC_PURE) 12474 { 12475 rtx temp = ((reload_in_progress 12476 || ((op0 && REG_P (op0)) 12477 && mode == Pmode)) 12478 ? op0 : gen_reg_rtx (Pmode)); 12479 op1 = machopic_indirect_data_reference (op1, temp); 12480 op1 = machopic_legitimize_pic_address (op1, mode, 12481 temp == op1 ? 0 : temp); 12482 } 12483 else if (MACHOPIC_INDIRECT) 12484 op1 = machopic_indirect_data_reference (op1, 0); 12485 if (op0 == op1) 12486 return; 12487 #endif 12488 } 12489 else 12490 { 12491 if (MEM_P (op0)) 12492 op1 = force_reg (Pmode, op1); 12493 else if (!TARGET_64BIT || !x86_64_movabs_operand (op1, Pmode)) 12494 { 12495 rtx reg = !can_create_pseudo_p () ? op0 : NULL_RTX; 12496 op1 = legitimize_pic_address (op1, reg); 12497 if (op0 == op1) 12498 return; 12499 } 12500 } 12501 } 12502 else 12503 { 12504 if (MEM_P (op0) 12505 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode) 12506 || !push_operand (op0, mode)) 12507 && MEM_P (op1)) 12508 op1 = force_reg (mode, op1); 12509 12510 if (push_operand (op0, mode) 12511 && ! general_no_elim_operand (op1, mode)) 12512 op1 = copy_to_mode_reg (mode, op1); 12513 12514 /* Force large constants in 64bit compilation into register 12515 to get them CSEed. */ 12516 if (can_create_pseudo_p () 12517 && (mode == DImode) && TARGET_64BIT 12518 && immediate_operand (op1, mode) 12519 && !x86_64_zext_immediate_operand (op1, VOIDmode) 12520 && !register_operand (op0, mode) 12521 && optimize) 12522 op1 = copy_to_mode_reg (mode, op1); 12523 12524 if (can_create_pseudo_p () 12525 && FLOAT_MODE_P (mode) 12526 && GET_CODE (op1) == CONST_DOUBLE) 12527 { 12528 /* If we are loading a floating point constant to a register, 12529 force the value to memory now, since we'll get better code 12530 out the back end. */ 12531 12532 op1 = validize_mem (force_const_mem (mode, op1)); 12533 if (!register_operand (op0, mode)) 12534 { 12535 rtx temp = gen_reg_rtx (mode); 12536 emit_insn (gen_rtx_SET (VOIDmode, temp, op1)); 12537 emit_move_insn (op0, temp); 12538 return; 12539 } 12540 } 12541 } 12542 12543 emit_insn (gen_rtx_SET (VOIDmode, op0, op1)); 12544 } 12545 12546 void 12547 ix86_expand_vector_move (enum machine_mode mode, rtx operands[]) 12548 { 12549 rtx op0 = operands[0], op1 = operands[1]; 12550 unsigned int align = GET_MODE_ALIGNMENT (mode); 12551 12552 /* Force constants other than zero into memory. We do not know how 12553 the instructions used to build constants modify the upper 64 bits 12554 of the register, once we have that information we may be able 12555 to handle some of them more efficiently. */ 12556 if (can_create_pseudo_p () 12557 && register_operand (op0, mode) 12558 && (CONSTANT_P (op1) 12559 || (GET_CODE (op1) == SUBREG 12560 && CONSTANT_P (SUBREG_REG (op1)))) 12561 && standard_sse_constant_p (op1) <= 0) 12562 op1 = validize_mem (force_const_mem (mode, op1)); 12563 12564 /* We need to check memory alignment for SSE mode since attribute 12565 can make operands unaligned. */ 12566 if (can_create_pseudo_p () 12567 && SSE_REG_MODE_P (mode) 12568 && ((MEM_P (op0) && (MEM_ALIGN (op0) < align)) 12569 || (MEM_P (op1) && (MEM_ALIGN (op1) < align)))) 12570 { 12571 rtx tmp[2]; 12572 12573 /* ix86_expand_vector_move_misalign() does not like constants ... */ 12574 if (CONSTANT_P (op1) 12575 || (GET_CODE (op1) == SUBREG 12576 && CONSTANT_P (SUBREG_REG (op1)))) 12577 op1 = validize_mem (force_const_mem (mode, op1)); 12578 12579 /* ... nor both arguments in memory. */ 12580 if (!register_operand (op0, mode) 12581 && !register_operand (op1, mode)) 12582 op1 = force_reg (mode, op1); 12583 12584 tmp[0] = op0; tmp[1] = op1; 12585 ix86_expand_vector_move_misalign (mode, tmp); 12586 return; 12587 } 12588 12589 /* Make operand1 a register if it isn't already. */ 12590 if (can_create_pseudo_p () 12591 && !register_operand (op0, mode) 12592 && !register_operand (op1, mode)) 12593 { 12594 emit_move_insn (op0, force_reg (GET_MODE (op0), op1)); 12595 return; 12596 } 12597 12598 emit_insn (gen_rtx_SET (VOIDmode, op0, op1)); 12599 } 12600 12601 /* Implement the movmisalign patterns for SSE. Non-SSE modes go 12602 straight to ix86_expand_vector_move. */ 12603 /* Code generation for scalar reg-reg moves of single and double precision data: 12604 if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true) 12605 movaps reg, reg 12606 else 12607 movss reg, reg 12608 if (x86_sse_partial_reg_dependency == true) 12609 movapd reg, reg 12610 else 12611 movsd reg, reg 12612 12613 Code generation for scalar loads of double precision data: 12614 if (x86_sse_split_regs == true) 12615 movlpd mem, reg (gas syntax) 12616 else 12617 movsd mem, reg 12618 12619 Code generation for unaligned packed loads of single precision data 12620 (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency): 12621 if (x86_sse_unaligned_move_optimal) 12622 movups mem, reg 12623 12624 if (x86_sse_partial_reg_dependency == true) 12625 { 12626 xorps reg, reg 12627 movlps mem, reg 12628 movhps mem+8, reg 12629 } 12630 else 12631 { 12632 movlps mem, reg 12633 movhps mem+8, reg 12634 } 12635 12636 Code generation for unaligned packed loads of double precision data 12637 (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs): 12638 if (x86_sse_unaligned_move_optimal) 12639 movupd mem, reg 12640 12641 if (x86_sse_split_regs == true) 12642 { 12643 movlpd mem, reg 12644 movhpd mem+8, reg 12645 } 12646 else 12647 { 12648 movsd mem, reg 12649 movhpd mem+8, reg 12650 } 12651 */ 12652 12653 void 12654 ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[]) 12655 { 12656 rtx op0, op1, m; 12657 12658 op0 = operands[0]; 12659 op1 = operands[1]; 12660 12661 if (TARGET_AVX) 12662 { 12663 switch (GET_MODE_CLASS (mode)) 12664 { 12665 case MODE_VECTOR_INT: 12666 case MODE_INT: 12667 switch (GET_MODE_SIZE (mode)) 12668 { 12669 case 16: 12670 op0 = gen_lowpart (V16QImode, op0); 12671 op1 = gen_lowpart (V16QImode, op1); 12672 emit_insn (gen_avx_movdqu (op0, op1)); 12673 break; 12674 case 32: 12675 op0 = gen_lowpart (V32QImode, op0); 12676 op1 = gen_lowpart (V32QImode, op1); 12677 emit_insn (gen_avx_movdqu256 (op0, op1)); 12678 break; 12679 default: 12680 gcc_unreachable (); 12681 } 12682 break; 12683 case MODE_VECTOR_FLOAT: 12684 op0 = gen_lowpart (mode, op0); 12685 op1 = gen_lowpart (mode, op1); 12686 12687 switch (mode) 12688 { 12689 case V4SFmode: 12690 emit_insn (gen_avx_movups (op0, op1)); 12691 break; 12692 case V8SFmode: 12693 emit_insn (gen_avx_movups256 (op0, op1)); 12694 break; 12695 case V2DFmode: 12696 emit_insn (gen_avx_movupd (op0, op1)); 12697 break; 12698 case V4DFmode: 12699 emit_insn (gen_avx_movupd256 (op0, op1)); 12700 break; 12701 default: 12702 gcc_unreachable (); 12703 } 12704 break; 12705 12706 default: 12707 gcc_unreachable (); 12708 } 12709 12710 return; 12711 } 12712 12713 if (MEM_P (op1)) 12714 { 12715 /* If we're optimizing for size, movups is the smallest. */ 12716 if (optimize_insn_for_size_p ()) 12717 { 12718 op0 = gen_lowpart (V4SFmode, op0); 12719 op1 = gen_lowpart (V4SFmode, op1); 12720 emit_insn (gen_sse_movups (op0, op1)); 12721 return; 12722 } 12723 12724 /* ??? If we have typed data, then it would appear that using 12725 movdqu is the only way to get unaligned data loaded with 12726 integer type. */ 12727 if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT) 12728 { 12729 op0 = gen_lowpart (V16QImode, op0); 12730 op1 = gen_lowpart (V16QImode, op1); 12731 emit_insn (gen_sse2_movdqu (op0, op1)); 12732 return; 12733 } 12734 12735 if (TARGET_SSE2 && mode == V2DFmode) 12736 { 12737 rtx zero; 12738 12739 if (TARGET_SSE_UNALIGNED_MOVE_OPTIMAL) 12740 { 12741 op0 = gen_lowpart (V2DFmode, op0); 12742 op1 = gen_lowpart (V2DFmode, op1); 12743 emit_insn (gen_sse2_movupd (op0, op1)); 12744 return; 12745 } 12746 12747 /* When SSE registers are split into halves, we can avoid 12748 writing to the top half twice. */ 12749 if (TARGET_SSE_SPLIT_REGS) 12750 { 12751 emit_clobber (op0); 12752 zero = op0; 12753 } 12754 else 12755 { 12756 /* ??? Not sure about the best option for the Intel chips. 12757 The following would seem to satisfy; the register is 12758 entirely cleared, breaking the dependency chain. We 12759 then store to the upper half, with a dependency depth 12760 of one. A rumor has it that Intel recommends two movsd 12761 followed by an unpacklpd, but this is unconfirmed. And 12762 given that the dependency depth of the unpacklpd would 12763 still be one, I'm not sure why this would be better. */ 12764 zero = CONST0_RTX (V2DFmode); 12765 } 12766 12767 m = adjust_address (op1, DFmode, 0); 12768 emit_insn (gen_sse2_loadlpd (op0, zero, m)); 12769 m = adjust_address (op1, DFmode, 8); 12770 emit_insn (gen_sse2_loadhpd (op0, op0, m)); 12771 } 12772 else 12773 { 12774 if (TARGET_SSE_UNALIGNED_MOVE_OPTIMAL) 12775 { 12776 op0 = gen_lowpart (V4SFmode, op0); 12777 op1 = gen_lowpart (V4SFmode, op1); 12778 emit_insn (gen_sse_movups (op0, op1)); 12779 return; 12780 } 12781 12782 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY) 12783 emit_move_insn (op0, CONST0_RTX (mode)); 12784 else 12785 emit_clobber (op0); 12786 12787 if (mode != V4SFmode) 12788 op0 = gen_lowpart (V4SFmode, op0); 12789 m = adjust_address (op1, V2SFmode, 0); 12790 emit_insn (gen_sse_loadlps (op0, op0, m)); 12791 m = adjust_address (op1, V2SFmode, 8); 12792 emit_insn (gen_sse_loadhps (op0, op0, m)); 12793 } 12794 } 12795 else if (MEM_P (op0)) 12796 { 12797 /* If we're optimizing for size, movups is the smallest. */ 12798 if (optimize_insn_for_size_p ()) 12799 { 12800 op0 = gen_lowpart (V4SFmode, op0); 12801 op1 = gen_lowpart (V4SFmode, op1); 12802 emit_insn (gen_sse_movups (op0, op1)); 12803 return; 12804 } 12805 12806 /* ??? Similar to above, only less clear because of quote 12807 typeless stores unquote. */ 12808 if (TARGET_SSE2 && !TARGET_SSE_TYPELESS_STORES 12809 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT) 12810 { 12811 op0 = gen_lowpart (V16QImode, op0); 12812 op1 = gen_lowpart (V16QImode, op1); 12813 emit_insn (gen_sse2_movdqu (op0, op1)); 12814 return; 12815 } 12816 12817 if (TARGET_SSE2 && mode == V2DFmode) 12818 { 12819 m = adjust_address (op0, DFmode, 0); 12820 emit_insn (gen_sse2_storelpd (m, op1)); 12821 m = adjust_address (op0, DFmode, 8); 12822 emit_insn (gen_sse2_storehpd (m, op1)); 12823 } 12824 else 12825 { 12826 if (mode != V4SFmode) 12827 op1 = gen_lowpart (V4SFmode, op1); 12828 m = adjust_address (op0, V2SFmode, 0); 12829 emit_insn (gen_sse_storelps (m, op1)); 12830 m = adjust_address (op0, V2SFmode, 8); 12831 emit_insn (gen_sse_storehps (m, op1)); 12832 } 12833 } 12834 else 12835 gcc_unreachable (); 12836 } 12837 12838 /* Expand a push in MODE. This is some mode for which we do not support 12839 proper push instructions, at least from the registers that we expect 12840 the value to live in. */ 12841 12842 void 12843 ix86_expand_push (enum machine_mode mode, rtx x) 12844 { 12845 rtx tmp; 12846 12847 tmp = expand_simple_binop (Pmode, PLUS, stack_pointer_rtx, 12848 GEN_INT (-GET_MODE_SIZE (mode)), 12849 stack_pointer_rtx, 1, OPTAB_DIRECT); 12850 if (tmp != stack_pointer_rtx) 12851 emit_move_insn (stack_pointer_rtx, tmp); 12852 12853 tmp = gen_rtx_MEM (mode, stack_pointer_rtx); 12854 12855 /* When we push an operand onto stack, it has to be aligned at least 12856 at the function argument boundary. However since we don't have 12857 the argument type, we can't determine the actual argument 12858 boundary. */ 12859 emit_move_insn (tmp, x); 12860 } 12861 12862 /* Helper function of ix86_fixup_binary_operands to canonicalize 12863 operand order. Returns true if the operands should be swapped. */ 12864 12865 static bool 12866 ix86_swap_binary_operands_p (enum rtx_code code, enum machine_mode mode, 12867 rtx operands[]) 12868 { 12869 rtx dst = operands[0]; 12870 rtx src1 = operands[1]; 12871 rtx src2 = operands[2]; 12872 12873 /* If the operation is not commutative, we can't do anything. */ 12874 if (GET_RTX_CLASS (code) != RTX_COMM_ARITH) 12875 return false; 12876 12877 /* Highest priority is that src1 should match dst. */ 12878 if (rtx_equal_p (dst, src1)) 12879 return false; 12880 if (rtx_equal_p (dst, src2)) 12881 return true; 12882 12883 /* Next highest priority is that immediate constants come second. */ 12884 if (immediate_operand (src2, mode)) 12885 return false; 12886 if (immediate_operand (src1, mode)) 12887 return true; 12888 12889 /* Lowest priority is that memory references should come second. */ 12890 if (MEM_P (src2)) 12891 return false; 12892 if (MEM_P (src1)) 12893 return true; 12894 12895 return false; 12896 } 12897 12898 12899 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the 12900 destination to use for the operation. If different from the true 12901 destination in operands[0], a copy operation will be required. */ 12902 12903 rtx 12904 ix86_fixup_binary_operands (enum rtx_code code, enum machine_mode mode, 12905 rtx operands[]) 12906 { 12907 rtx dst = operands[0]; 12908 rtx src1 = operands[1]; 12909 rtx src2 = operands[2]; 12910 12911 /* Canonicalize operand order. */ 12912 if (ix86_swap_binary_operands_p (code, mode, operands)) 12913 { 12914 rtx temp; 12915 12916 /* It is invalid to swap operands of different modes. */ 12917 gcc_assert (GET_MODE (src1) == GET_MODE (src2)); 12918 12919 temp = src1; 12920 src1 = src2; 12921 src2 = temp; 12922 } 12923 12924 /* Both source operands cannot be in memory. */ 12925 if (MEM_P (src1) && MEM_P (src2)) 12926 { 12927 /* Optimization: Only read from memory once. */ 12928 if (rtx_equal_p (src1, src2)) 12929 { 12930 src2 = force_reg (mode, src2); 12931 src1 = src2; 12932 } 12933 else 12934 src2 = force_reg (mode, src2); 12935 } 12936 12937 /* If the destination is memory, and we do not have matching source 12938 operands, do things in registers. */ 12939 if (MEM_P (dst) && !rtx_equal_p (dst, src1)) 12940 dst = gen_reg_rtx (mode); 12941 12942 /* Source 1 cannot be a constant. */ 12943 if (CONSTANT_P (src1)) 12944 src1 = force_reg (mode, src1); 12945 12946 /* Source 1 cannot be a non-matching memory. */ 12947 if (MEM_P (src1) && !rtx_equal_p (dst, src1)) 12948 src1 = force_reg (mode, src1); 12949 12950 operands[1] = src1; 12951 operands[2] = src2; 12952 return dst; 12953 } 12954 12955 /* Similarly, but assume that the destination has already been 12956 set up properly. */ 12957 12958 void 12959 ix86_fixup_binary_operands_no_copy (enum rtx_code code, 12960 enum machine_mode mode, rtx operands[]) 12961 { 12962 rtx dst = ix86_fixup_binary_operands (code, mode, operands); 12963 gcc_assert (dst == operands[0]); 12964 } 12965 12966 /* Attempt to expand a binary operator. Make the expansion closer to the 12967 actual machine, then just general_operand, which will allow 3 separate 12968 memory references (one output, two input) in a single insn. */ 12969 12970 void 12971 ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode, 12972 rtx operands[]) 12973 { 12974 rtx src1, src2, dst, op, clob; 12975 12976 dst = ix86_fixup_binary_operands (code, mode, operands); 12977 src1 = operands[1]; 12978 src2 = operands[2]; 12979 12980 /* Emit the instruction. */ 12981 12982 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2)); 12983 if (reload_in_progress) 12984 { 12985 /* Reload doesn't know about the flags register, and doesn't know that 12986 it doesn't want to clobber it. We can only do this with PLUS. */ 12987 gcc_assert (code == PLUS); 12988 emit_insn (op); 12989 } 12990 else 12991 { 12992 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG)); 12993 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob))); 12994 } 12995 12996 /* Fix up the destination if needed. */ 12997 if (dst != operands[0]) 12998 emit_move_insn (operands[0], dst); 12999 } 13000 13001 /* Return TRUE or FALSE depending on whether the binary operator meets the 13002 appropriate constraints. */ 13003 13004 int 13005 ix86_binary_operator_ok (enum rtx_code code, enum machine_mode mode, 13006 rtx operands[3]) 13007 { 13008 rtx dst = operands[0]; 13009 rtx src1 = operands[1]; 13010 rtx src2 = operands[2]; 13011 13012 /* Both source operands cannot be in memory. */ 13013 if (MEM_P (src1) && MEM_P (src2)) 13014 return 0; 13015 13016 /* Canonicalize operand order for commutative operators. */ 13017 if (ix86_swap_binary_operands_p (code, mode, operands)) 13018 { 13019 rtx temp = src1; 13020 src1 = src2; 13021 src2 = temp; 13022 } 13023 13024 /* If the destination is memory, we must have a matching source operand. */ 13025 if (MEM_P (dst) && !rtx_equal_p (dst, src1)) 13026 return 0; 13027 13028 /* Source 1 cannot be a constant. */ 13029 if (CONSTANT_P (src1)) 13030 return 0; 13031 13032 /* Source 1 cannot be a non-matching memory. */ 13033 if (MEM_P (src1) && !rtx_equal_p (dst, src1)) 13034 return 0; 13035 13036 return 1; 13037 } 13038 13039 /* Attempt to expand a unary operator. Make the expansion closer to the 13040 actual machine, then just general_operand, which will allow 2 separate 13041 memory references (one output, one input) in a single insn. */ 13042 13043 void 13044 ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode, 13045 rtx operands[]) 13046 { 13047 int matching_memory; 13048 rtx src, dst, op, clob; 13049 13050 dst = operands[0]; 13051 src = operands[1]; 13052 13053 /* If the destination is memory, and we do not have matching source 13054 operands, do things in registers. */ 13055 matching_memory = 0; 13056 if (MEM_P (dst)) 13057 { 13058 if (rtx_equal_p (dst, src)) 13059 matching_memory = 1; 13060 else 13061 dst = gen_reg_rtx (mode); 13062 } 13063 13064 /* When source operand is memory, destination must match. */ 13065 if (MEM_P (src) && !matching_memory) 13066 src = force_reg (mode, src); 13067 13068 /* Emit the instruction. */ 13069 13070 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src)); 13071 if (reload_in_progress || code == NOT) 13072 { 13073 /* Reload doesn't know about the flags register, and doesn't know that 13074 it doesn't want to clobber it. */ 13075 gcc_assert (code == NOT); 13076 emit_insn (op); 13077 } 13078 else 13079 { 13080 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG)); 13081 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob))); 13082 } 13083 13084 /* Fix up the destination if needed. */ 13085 if (dst != operands[0]) 13086 emit_move_insn (operands[0], dst); 13087 } 13088 13089 /* Return TRUE or FALSE depending on whether the unary operator meets the 13090 appropriate constraints. */ 13091 13092 int 13093 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED, 13094 enum machine_mode mode ATTRIBUTE_UNUSED, 13095 rtx operands[2] ATTRIBUTE_UNUSED) 13096 { 13097 /* If one of operands is memory, source and destination must match. */ 13098 if ((MEM_P (operands[0]) 13099 || MEM_P (operands[1])) 13100 && ! rtx_equal_p (operands[0], operands[1])) 13101 return FALSE; 13102 return TRUE; 13103 } 13104 13105 /* Post-reload splitter for converting an SF or DFmode value in an 13106 SSE register into an unsigned SImode. */ 13107 13108 void 13109 ix86_split_convert_uns_si_sse (rtx operands[]) 13110 { 13111 enum machine_mode vecmode; 13112 rtx value, large, zero_or_two31, input, two31, x; 13113 13114 large = operands[1]; 13115 zero_or_two31 = operands[2]; 13116 input = operands[3]; 13117 two31 = operands[4]; 13118 vecmode = GET_MODE (large); 13119 value = gen_rtx_REG (vecmode, REGNO (operands[0])); 13120 13121 /* Load up the value into the low element. We must ensure that the other 13122 elements are valid floats -- zero is the easiest such value. */ 13123 if (MEM_P (input)) 13124 { 13125 if (vecmode == V4SFmode) 13126 emit_insn (gen_vec_setv4sf_0 (value, CONST0_RTX (V4SFmode), input)); 13127 else 13128 emit_insn (gen_sse2_loadlpd (value, CONST0_RTX (V2DFmode), input)); 13129 } 13130 else 13131 { 13132 input = gen_rtx_REG (vecmode, REGNO (input)); 13133 emit_move_insn (value, CONST0_RTX (vecmode)); 13134 if (vecmode == V4SFmode) 13135 emit_insn (gen_sse_movss (value, value, input)); 13136 else 13137 emit_insn (gen_sse2_movsd (value, value, input)); 13138 } 13139 13140 emit_move_insn (large, two31); 13141 emit_move_insn (zero_or_two31, MEM_P (two31) ? large : two31); 13142 13143 x = gen_rtx_fmt_ee (LE, vecmode, large, value); 13144 emit_insn (gen_rtx_SET (VOIDmode, large, x)); 13145 13146 x = gen_rtx_AND (vecmode, zero_or_two31, large); 13147 emit_insn (gen_rtx_SET (VOIDmode, zero_or_two31, x)); 13148 13149 x = gen_rtx_MINUS (vecmode, value, zero_or_two31); 13150 emit_insn (gen_rtx_SET (VOIDmode, value, x)); 13151 13152 large = gen_rtx_REG (V4SImode, REGNO (large)); 13153 emit_insn (gen_ashlv4si3 (large, large, GEN_INT (31))); 13154 13155 x = gen_rtx_REG (V4SImode, REGNO (value)); 13156 if (vecmode == V4SFmode) 13157 emit_insn (gen_sse2_cvttps2dq (x, value)); 13158 else 13159 emit_insn (gen_sse2_cvttpd2dq (x, value)); 13160 value = x; 13161 13162 emit_insn (gen_xorv4si3 (value, value, large)); 13163 } 13164 13165 /* Convert an unsigned DImode value into a DFmode, using only SSE. 13166 Expects the 64-bit DImode to be supplied in a pair of integral 13167 registers. Requires SSE2; will use SSE3 if available. For x86_32, 13168 -mfpmath=sse, !optimize_size only. */ 13169 13170 void 13171 ix86_expand_convert_uns_didf_sse (rtx target, rtx input) 13172 { 13173 REAL_VALUE_TYPE bias_lo_rvt, bias_hi_rvt; 13174 rtx int_xmm, fp_xmm; 13175 rtx biases, exponents; 13176 rtx x; 13177 13178 int_xmm = gen_reg_rtx (V4SImode); 13179 if (TARGET_INTER_UNIT_MOVES) 13180 emit_insn (gen_movdi_to_sse (int_xmm, input)); 13181 else if (TARGET_SSE_SPLIT_REGS) 13182 { 13183 emit_clobber (int_xmm); 13184 emit_move_insn (gen_lowpart (DImode, int_xmm), input); 13185 } 13186 else 13187 { 13188 x = gen_reg_rtx (V2DImode); 13189 ix86_expand_vector_init_one_nonzero (false, V2DImode, x, input, 0); 13190 emit_move_insn (int_xmm, gen_lowpart (V4SImode, x)); 13191 } 13192 13193 x = gen_rtx_CONST_VECTOR (V4SImode, 13194 gen_rtvec (4, GEN_INT (0x43300000UL), 13195 GEN_INT (0x45300000UL), 13196 const0_rtx, const0_rtx)); 13197 exponents = validize_mem (force_const_mem (V4SImode, x)); 13198 13199 /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */ 13200 emit_insn (gen_sse2_punpckldq (int_xmm, int_xmm, exponents)); 13201 13202 /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm) 13203 yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)). 13204 Similarly (0x45300000UL ## fp_value_hi_xmm) yields 13205 (0x1.0p84 + double(fp_value_hi_xmm)). 13206 Note these exponents differ by 32. */ 13207 13208 fp_xmm = copy_to_mode_reg (V2DFmode, gen_lowpart (V2DFmode, int_xmm)); 13209 13210 /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values 13211 in [0,2**32-1] and [0]+[2**32,2**64-1] respectively. */ 13212 real_ldexp (&bias_lo_rvt, &dconst1, 52); 13213 real_ldexp (&bias_hi_rvt, &dconst1, 84); 13214 biases = const_double_from_real_value (bias_lo_rvt, DFmode); 13215 x = const_double_from_real_value (bias_hi_rvt, DFmode); 13216 biases = gen_rtx_CONST_VECTOR (V2DFmode, gen_rtvec (2, biases, x)); 13217 biases = validize_mem (force_const_mem (V2DFmode, biases)); 13218 emit_insn (gen_subv2df3 (fp_xmm, fp_xmm, biases)); 13219 13220 /* Add the upper and lower DFmode values together. */ 13221 if (TARGET_SSE3) 13222 emit_insn (gen_sse3_haddv2df3 (fp_xmm, fp_xmm, fp_xmm)); 13223 else 13224 { 13225 x = copy_to_mode_reg (V2DFmode, fp_xmm); 13226 emit_insn (gen_sse2_unpckhpd (fp_xmm, fp_xmm, fp_xmm)); 13227 emit_insn (gen_addv2df3 (fp_xmm, fp_xmm, x)); 13228 } 13229 13230 ix86_expand_vector_extract (false, target, fp_xmm, 0); 13231 } 13232 13233 /* Not used, but eases macroization of patterns. */ 13234 void 13235 ix86_expand_convert_uns_sixf_sse (rtx target ATTRIBUTE_UNUSED, 13236 rtx input ATTRIBUTE_UNUSED) 13237 { 13238 gcc_unreachable (); 13239 } 13240 13241 /* Convert an unsigned SImode value into a DFmode. Only currently used 13242 for SSE, but applicable anywhere. */ 13243 13244 void 13245 ix86_expand_convert_uns_sidf_sse (rtx target, rtx input) 13246 { 13247 REAL_VALUE_TYPE TWO31r; 13248 rtx x, fp; 13249 13250 x = expand_simple_binop (SImode, PLUS, input, GEN_INT (-2147483647 - 1), 13251 NULL, 1, OPTAB_DIRECT); 13252 13253 fp = gen_reg_rtx (DFmode); 13254 emit_insn (gen_floatsidf2 (fp, x)); 13255 13256 real_ldexp (&TWO31r, &dconst1, 31); 13257 x = const_double_from_real_value (TWO31r, DFmode); 13258 13259 x = expand_simple_binop (DFmode, PLUS, fp, x, target, 0, OPTAB_DIRECT); 13260 if (x != target) 13261 emit_move_insn (target, x); 13262 } 13263 13264 /* Convert a signed DImode value into a DFmode. Only used for SSE in 13265 32-bit mode; otherwise we have a direct convert instruction. */ 13266 13267 void 13268 ix86_expand_convert_sign_didf_sse (rtx target, rtx input) 13269 { 13270 REAL_VALUE_TYPE TWO32r; 13271 rtx fp_lo, fp_hi, x; 13272 13273 fp_lo = gen_reg_rtx (DFmode); 13274 fp_hi = gen_reg_rtx (DFmode); 13275 13276 emit_insn (gen_floatsidf2 (fp_hi, gen_highpart (SImode, input))); 13277 13278 real_ldexp (&TWO32r, &dconst1, 32); 13279 x = const_double_from_real_value (TWO32r, DFmode); 13280 fp_hi = expand_simple_binop (DFmode, MULT, fp_hi, x, fp_hi, 0, OPTAB_DIRECT); 13281 13282 ix86_expand_convert_uns_sidf_sse (fp_lo, gen_lowpart (SImode, input)); 13283 13284 x = expand_simple_binop (DFmode, PLUS, fp_hi, fp_lo, target, 13285 0, OPTAB_DIRECT); 13286 if (x != target) 13287 emit_move_insn (target, x); 13288 } 13289 13290 /* Convert an unsigned SImode value into a SFmode, using only SSE. 13291 For x86_32, -mfpmath=sse, !optimize_size only. */ 13292 void 13293 ix86_expand_convert_uns_sisf_sse (rtx target, rtx input) 13294 { 13295 REAL_VALUE_TYPE ONE16r; 13296 rtx fp_hi, fp_lo, int_hi, int_lo, x; 13297 13298 real_ldexp (&ONE16r, &dconst1, 16); 13299 x = const_double_from_real_value (ONE16r, SFmode); 13300 int_lo = expand_simple_binop (SImode, AND, input, GEN_INT(0xffff), 13301 NULL, 0, OPTAB_DIRECT); 13302 int_hi = expand_simple_binop (SImode, LSHIFTRT, input, GEN_INT(16), 13303 NULL, 0, OPTAB_DIRECT); 13304 fp_hi = gen_reg_rtx (SFmode); 13305 fp_lo = gen_reg_rtx (SFmode); 13306 emit_insn (gen_floatsisf2 (fp_hi, int_hi)); 13307 emit_insn (gen_floatsisf2 (fp_lo, int_lo)); 13308 fp_hi = expand_simple_binop (SFmode, MULT, fp_hi, x, fp_hi, 13309 0, OPTAB_DIRECT); 13310 fp_hi = expand_simple_binop (SFmode, PLUS, fp_hi, fp_lo, target, 13311 0, OPTAB_DIRECT); 13312 if (!rtx_equal_p (target, fp_hi)) 13313 emit_move_insn (target, fp_hi); 13314 } 13315 13316 /* A subroutine of ix86_build_signbit_mask_vector. If VECT is true, 13317 then replicate the value for all elements of the vector 13318 register. */ 13319 13320 rtx 13321 ix86_build_const_vector (enum machine_mode mode, bool vect, rtx value) 13322 { 13323 rtvec v; 13324 switch (mode) 13325 { 13326 case SImode: 13327 gcc_assert (vect); 13328 v = gen_rtvec (4, value, value, value, value); 13329 return gen_rtx_CONST_VECTOR (V4SImode, v); 13330 13331 case DImode: 13332 gcc_assert (vect); 13333 v = gen_rtvec (2, value, value); 13334 return gen_rtx_CONST_VECTOR (V2DImode, v); 13335 13336 case SFmode: 13337 if (vect) 13338 v = gen_rtvec (4, value, value, value, value); 13339 else 13340 v = gen_rtvec (4, value, CONST0_RTX (SFmode), 13341 CONST0_RTX (SFmode), CONST0_RTX (SFmode)); 13342 return gen_rtx_CONST_VECTOR (V4SFmode, v); 13343 13344 case DFmode: 13345 if (vect) 13346 v = gen_rtvec (2, value, value); 13347 else 13348 v = gen_rtvec (2, value, CONST0_RTX (DFmode)); 13349 return gen_rtx_CONST_VECTOR (V2DFmode, v); 13350 13351 default: 13352 gcc_unreachable (); 13353 } 13354 } 13355 13356 /* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders 13357 and ix86_expand_int_vcond. Create a mask for the sign bit in MODE 13358 for an SSE register. If VECT is true, then replicate the mask for 13359 all elements of the vector register. If INVERT is true, then create 13360 a mask excluding the sign bit. */ 13361 13362 rtx 13363 ix86_build_signbit_mask (enum machine_mode mode, bool vect, bool invert) 13364 { 13365 enum machine_mode vec_mode, imode; 13366 HOST_WIDE_INT hi, lo; 13367 int shift = 63; 13368 rtx v; 13369 rtx mask; 13370 13371 /* Find the sign bit, sign extended to 2*HWI. */ 13372 switch (mode) 13373 { 13374 case SImode: 13375 case SFmode: 13376 imode = SImode; 13377 vec_mode = (mode == SImode) ? V4SImode : V4SFmode; 13378 lo = 0x80000000, hi = lo < 0; 13379 break; 13380 13381 case DImode: 13382 case DFmode: 13383 imode = DImode; 13384 vec_mode = (mode == DImode) ? V2DImode : V2DFmode; 13385 if (HOST_BITS_PER_WIDE_INT >= 64) 13386 lo = (HOST_WIDE_INT)1 << shift, hi = -1; 13387 else 13388 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT); 13389 break; 13390 13391 case TImode: 13392 case TFmode: 13393 vec_mode = VOIDmode; 13394 if (HOST_BITS_PER_WIDE_INT >= 64) 13395 { 13396 imode = TImode; 13397 lo = 0, hi = (HOST_WIDE_INT)1 << shift; 13398 } 13399 else 13400 { 13401 rtvec vec; 13402 13403 imode = DImode; 13404 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT); 13405 13406 if (invert) 13407 { 13408 lo = ~lo, hi = ~hi; 13409 v = constm1_rtx; 13410 } 13411 else 13412 v = const0_rtx; 13413 13414 mask = immed_double_const (lo, hi, imode); 13415 13416 vec = gen_rtvec (2, v, mask); 13417 v = gen_rtx_CONST_VECTOR (V2DImode, vec); 13418 v = copy_to_mode_reg (mode, gen_lowpart (mode, v)); 13419 13420 return v; 13421 } 13422 break; 13423 13424 default: 13425 gcc_unreachable (); 13426 } 13427 13428 if (invert) 13429 lo = ~lo, hi = ~hi; 13430 13431 /* Force this value into the low part of a fp vector constant. */ 13432 mask = immed_double_const (lo, hi, imode); 13433 mask = gen_lowpart (mode, mask); 13434 13435 if (vec_mode == VOIDmode) 13436 return force_reg (mode, mask); 13437 13438 v = ix86_build_const_vector (mode, vect, mask); 13439 return force_reg (vec_mode, v); 13440 } 13441 13442 /* Generate code for floating point ABS or NEG. */ 13443 13444 void 13445 ix86_expand_fp_absneg_operator (enum rtx_code code, enum machine_mode mode, 13446 rtx operands[]) 13447 { 13448 rtx mask, set, use, clob, dst, src; 13449 bool use_sse = false; 13450 bool vector_mode = VECTOR_MODE_P (mode); 13451 enum machine_mode elt_mode = mode; 13452 13453 if (vector_mode) 13454 { 13455 elt_mode = GET_MODE_INNER (mode); 13456 use_sse = true; 13457 } 13458 else if (mode == TFmode) 13459 use_sse = true; 13460 else if (TARGET_SSE_MATH) 13461 use_sse = SSE_FLOAT_MODE_P (mode); 13462 13463 /* NEG and ABS performed with SSE use bitwise mask operations. 13464 Create the appropriate mask now. */ 13465 if (use_sse) 13466 mask = ix86_build_signbit_mask (elt_mode, vector_mode, code == ABS); 13467 else 13468 mask = NULL_RTX; 13469 13470 dst = operands[0]; 13471 src = operands[1]; 13472 13473 if (vector_mode) 13474 { 13475 set = gen_rtx_fmt_ee (code == NEG ? XOR : AND, mode, src, mask); 13476 set = gen_rtx_SET (VOIDmode, dst, set); 13477 emit_insn (set); 13478 } 13479 else 13480 { 13481 set = gen_rtx_fmt_e (code, mode, src); 13482 set = gen_rtx_SET (VOIDmode, dst, set); 13483 if (mask) 13484 { 13485 use = gen_rtx_USE (VOIDmode, mask); 13486 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG)); 13487 emit_insn (gen_rtx_PARALLEL (VOIDmode, 13488 gen_rtvec (3, set, use, clob))); 13489 } 13490 else 13491 emit_insn (set); 13492 } 13493 } 13494 13495 /* Expand a copysign operation. Special case operand 0 being a constant. */ 13496 13497 void 13498 ix86_expand_copysign (rtx operands[]) 13499 { 13500 enum machine_mode mode; 13501 rtx dest, op0, op1, mask, nmask; 13502 13503 dest = operands[0]; 13504 op0 = operands[1]; 13505 op1 = operands[2]; 13506 13507 mode = GET_MODE (dest); 13508 13509 if (GET_CODE (op0) == CONST_DOUBLE) 13510 { 13511 rtx (*copysign_insn)(rtx, rtx, rtx, rtx); 13512 13513 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0))) 13514 op0 = simplify_unary_operation (ABS, mode, op0, mode); 13515 13516 if (mode == SFmode || mode == DFmode) 13517 { 13518 enum machine_mode vmode; 13519 13520 vmode = mode == SFmode ? V4SFmode : V2DFmode; 13521 13522 if (op0 == CONST0_RTX (mode)) 13523 op0 = CONST0_RTX (vmode); 13524 else 13525 { 13526 rtvec v; 13527 13528 if (mode == SFmode) 13529 v = gen_rtvec (4, op0, CONST0_RTX (SFmode), 13530 CONST0_RTX (SFmode), CONST0_RTX (SFmode)); 13531 else 13532 v = gen_rtvec (2, op0, CONST0_RTX (DFmode)); 13533 13534 op0 = force_reg (vmode, gen_rtx_CONST_VECTOR (vmode, v)); 13535 } 13536 } 13537 else if (op0 != CONST0_RTX (mode)) 13538 op0 = force_reg (mode, op0); 13539 13540 mask = ix86_build_signbit_mask (mode, 0, 0); 13541 13542 if (mode == SFmode) 13543 copysign_insn = gen_copysignsf3_const; 13544 else if (mode == DFmode) 13545 copysign_insn = gen_copysigndf3_const; 13546 else 13547 copysign_insn = gen_copysigntf3_const; 13548 13549 emit_insn (copysign_insn (dest, op0, op1, mask)); 13550 } 13551 else 13552 { 13553 rtx (*copysign_insn)(rtx, rtx, rtx, rtx, rtx, rtx); 13554 13555 nmask = ix86_build_signbit_mask (mode, 0, 1); 13556 mask = ix86_build_signbit_mask (mode, 0, 0); 13557 13558 if (mode == SFmode) 13559 copysign_insn = gen_copysignsf3_var; 13560 else if (mode == DFmode) 13561 copysign_insn = gen_copysigndf3_var; 13562 else 13563 copysign_insn = gen_copysigntf3_var; 13564 13565 emit_insn (copysign_insn (dest, NULL_RTX, op0, op1, nmask, mask)); 13566 } 13567 } 13568 13569 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to 13570 be a constant, and so has already been expanded into a vector constant. */ 13571 13572 void 13573 ix86_split_copysign_const (rtx operands[]) 13574 { 13575 enum machine_mode mode, vmode; 13576 rtx dest, op0, op1, mask, x; 13577 13578 dest = operands[0]; 13579 op0 = operands[1]; 13580 op1 = operands[2]; 13581 mask = operands[3]; 13582 13583 mode = GET_MODE (dest); 13584 vmode = GET_MODE (mask); 13585 13586 dest = simplify_gen_subreg (vmode, dest, mode, 0); 13587 x = gen_rtx_AND (vmode, dest, mask); 13588 emit_insn (gen_rtx_SET (VOIDmode, dest, x)); 13589 13590 if (op0 != CONST0_RTX (vmode)) 13591 { 13592 x = gen_rtx_IOR (vmode, dest, op0); 13593 emit_insn (gen_rtx_SET (VOIDmode, dest, x)); 13594 } 13595 } 13596 13597 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable, 13598 so we have to do two masks. */ 13599 13600 void 13601 ix86_split_copysign_var (rtx operands[]) 13602 { 13603 enum machine_mode mode, vmode; 13604 rtx dest, scratch, op0, op1, mask, nmask, x; 13605 13606 dest = operands[0]; 13607 scratch = operands[1]; 13608 op0 = operands[2]; 13609 op1 = operands[3]; 13610 nmask = operands[4]; 13611 mask = operands[5]; 13612 13613 mode = GET_MODE (dest); 13614 vmode = GET_MODE (mask); 13615 13616 if (rtx_equal_p (op0, op1)) 13617 { 13618 /* Shouldn't happen often (it's useless, obviously), but when it does 13619 we'd generate incorrect code if we continue below. */ 13620 emit_move_insn (dest, op0); 13621 return; 13622 } 13623 13624 if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */ 13625 { 13626 gcc_assert (REGNO (op1) == REGNO (scratch)); 13627 13628 x = gen_rtx_AND (vmode, scratch, mask); 13629 emit_insn (gen_rtx_SET (VOIDmode, scratch, x)); 13630 13631 dest = mask; 13632 op0 = simplify_gen_subreg (vmode, op0, mode, 0); 13633 x = gen_rtx_NOT (vmode, dest); 13634 x = gen_rtx_AND (vmode, x, op0); 13635 emit_insn (gen_rtx_SET (VOIDmode, dest, x)); 13636 } 13637 else 13638 { 13639 if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 */ 13640 { 13641 x = gen_rtx_AND (vmode, scratch, mask); 13642 } 13643 else /* alternative 2,4 */ 13644 { 13645 gcc_assert (REGNO (mask) == REGNO (scratch)); 13646 op1 = simplify_gen_subreg (vmode, op1, mode, 0); 13647 x = gen_rtx_AND (vmode, scratch, op1); 13648 } 13649 emit_insn (gen_rtx_SET (VOIDmode, scratch, x)); 13650 13651 if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 */ 13652 { 13653 dest = simplify_gen_subreg (vmode, op0, mode, 0); 13654 x = gen_rtx_AND (vmode, dest, nmask); 13655 } 13656 else /* alternative 3,4 */ 13657 { 13658 gcc_assert (REGNO (nmask) == REGNO (dest)); 13659 dest = nmask; 13660 op0 = simplify_gen_subreg (vmode, op0, mode, 0); 13661 x = gen_rtx_AND (vmode, dest, op0); 13662 } 13663 emit_insn (gen_rtx_SET (VOIDmode, dest, x)); 13664 } 13665 13666 x = gen_rtx_IOR (vmode, dest, scratch); 13667 emit_insn (gen_rtx_SET (VOIDmode, dest, x)); 13668 } 13669 13670 /* Return TRUE or FALSE depending on whether the first SET in INSN 13671 has source and destination with matching CC modes, and that the 13672 CC mode is at least as constrained as REQ_MODE. */ 13673 13674 int 13675 ix86_match_ccmode (rtx insn, enum machine_mode req_mode) 13676 { 13677 rtx set; 13678 enum machine_mode set_mode; 13679 13680 set = PATTERN (insn); 13681 if (GET_CODE (set) == PARALLEL) 13682 set = XVECEXP (set, 0, 0); 13683 gcc_assert (GET_CODE (set) == SET); 13684 gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE); 13685 13686 set_mode = GET_MODE (SET_DEST (set)); 13687 switch (set_mode) 13688 { 13689 case CCNOmode: 13690 if (req_mode != CCNOmode 13691 && (req_mode != CCmode 13692 || XEXP (SET_SRC (set), 1) != const0_rtx)) 13693 return 0; 13694 break; 13695 case CCmode: 13696 if (req_mode == CCGCmode) 13697 return 0; 13698 /* FALLTHRU */ 13699 case CCGCmode: 13700 if (req_mode == CCGOCmode || req_mode == CCNOmode) 13701 return 0; 13702 /* FALLTHRU */ 13703 case CCGOCmode: 13704 if (req_mode == CCZmode) 13705 return 0; 13706 /* FALLTHRU */ 13707 case CCAmode: 13708 case CCCmode: 13709 case CCOmode: 13710 case CCSmode: 13711 case CCZmode: 13712 break; 13713 13714 default: 13715 gcc_unreachable (); 13716 } 13717 13718 return (GET_MODE (SET_SRC (set)) == set_mode); 13719 } 13720 13721 /* Generate insn patterns to do an integer compare of OPERANDS. */ 13722 13723 static rtx 13724 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1) 13725 { 13726 enum machine_mode cmpmode; 13727 rtx tmp, flags; 13728 13729 cmpmode = SELECT_CC_MODE (code, op0, op1); 13730 flags = gen_rtx_REG (cmpmode, FLAGS_REG); 13731 13732 /* This is very simple, but making the interface the same as in the 13733 FP case makes the rest of the code easier. */ 13734 tmp = gen_rtx_COMPARE (cmpmode, op0, op1); 13735 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp)); 13736 13737 /* Return the test that should be put into the flags user, i.e. 13738 the bcc, scc, or cmov instruction. */ 13739 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx); 13740 } 13741 13742 /* Figure out whether to use ordered or unordered fp comparisons. 13743 Return the appropriate mode to use. */ 13744 13745 enum machine_mode 13746 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED) 13747 { 13748 /* ??? In order to make all comparisons reversible, we do all comparisons 13749 non-trapping when compiling for IEEE. Once gcc is able to distinguish 13750 all forms trapping and nontrapping comparisons, we can make inequality 13751 comparisons trapping again, since it results in better code when using 13752 FCOM based compares. */ 13753 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode; 13754 } 13755 13756 enum machine_mode 13757 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1) 13758 { 13759 enum machine_mode mode = GET_MODE (op0); 13760 13761 if (SCALAR_FLOAT_MODE_P (mode)) 13762 { 13763 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode)); 13764 return ix86_fp_compare_mode (code); 13765 } 13766 13767 switch (code) 13768 { 13769 /* Only zero flag is needed. */ 13770 case EQ: /* ZF=0 */ 13771 case NE: /* ZF!=0 */ 13772 return CCZmode; 13773 /* Codes needing carry flag. */ 13774 case GEU: /* CF=0 */ 13775 case LTU: /* CF=1 */ 13776 /* Detect overflow checks. They need just the carry flag. */ 13777 if (GET_CODE (op0) == PLUS 13778 && rtx_equal_p (op1, XEXP (op0, 0))) 13779 return CCCmode; 13780 else 13781 return CCmode; 13782 case GTU: /* CF=0 & ZF=0 */ 13783 case LEU: /* CF=1 | ZF=1 */ 13784 /* Detect overflow checks. They need just the carry flag. */ 13785 if (GET_CODE (op0) == MINUS 13786 && rtx_equal_p (op1, XEXP (op0, 0))) 13787 return CCCmode; 13788 else 13789 return CCmode; 13790 /* Codes possibly doable only with sign flag when 13791 comparing against zero. */ 13792 case GE: /* SF=OF or SF=0 */ 13793 case LT: /* SF<>OF or SF=1 */ 13794 if (op1 == const0_rtx) 13795 return CCGOCmode; 13796 else 13797 /* For other cases Carry flag is not required. */ 13798 return CCGCmode; 13799 /* Codes doable only with sign flag when comparing 13800 against zero, but we miss jump instruction for it 13801 so we need to use relational tests against overflow 13802 that thus needs to be zero. */ 13803 case GT: /* ZF=0 & SF=OF */ 13804 case LE: /* ZF=1 | SF<>OF */ 13805 if (op1 == const0_rtx) 13806 return CCNOmode; 13807 else 13808 return CCGCmode; 13809 /* strcmp pattern do (use flags) and combine may ask us for proper 13810 mode. */ 13811 case USE: 13812 return CCmode; 13813 default: 13814 gcc_unreachable (); 13815 } 13816 } 13817 13818 /* Return the fixed registers used for condition codes. */ 13819 13820 static bool 13821 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2) 13822 { 13823 *p1 = FLAGS_REG; 13824 *p2 = FPSR_REG; 13825 return true; 13826 } 13827 13828 /* If two condition code modes are compatible, return a condition code 13829 mode which is compatible with both. Otherwise, return 13830 VOIDmode. */ 13831 13832 static enum machine_mode 13833 ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2) 13834 { 13835 if (m1 == m2) 13836 return m1; 13837 13838 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC) 13839 return VOIDmode; 13840 13841 if ((m1 == CCGCmode && m2 == CCGOCmode) 13842 || (m1 == CCGOCmode && m2 == CCGCmode)) 13843 return CCGCmode; 13844 13845 switch (m1) 13846 { 13847 default: 13848 gcc_unreachable (); 13849 13850 case CCmode: 13851 case CCGCmode: 13852 case CCGOCmode: 13853 case CCNOmode: 13854 case CCAmode: 13855 case CCCmode: 13856 case CCOmode: 13857 case CCSmode: 13858 case CCZmode: 13859 switch (m2) 13860 { 13861 default: 13862 return VOIDmode; 13863 13864 case CCmode: 13865 case CCGCmode: 13866 case CCGOCmode: 13867 case CCNOmode: 13868 case CCAmode: 13869 case CCCmode: 13870 case CCOmode: 13871 case CCSmode: 13872 case CCZmode: 13873 return CCmode; 13874 } 13875 13876 case CCFPmode: 13877 case CCFPUmode: 13878 /* These are only compatible with themselves, which we already 13879 checked above. */ 13880 return VOIDmode; 13881 } 13882 } 13883 13884 /* Split comparison code CODE into comparisons we can do using branch 13885 instructions. BYPASS_CODE is comparison code for branch that will 13886 branch around FIRST_CODE and SECOND_CODE. If some of branches 13887 is not required, set value to UNKNOWN. 13888 We never require more than two branches. */ 13889 13890 void 13891 ix86_fp_comparison_codes (enum rtx_code code, enum rtx_code *bypass_code, 13892 enum rtx_code *first_code, 13893 enum rtx_code *second_code) 13894 { 13895 *first_code = code; 13896 *bypass_code = UNKNOWN; 13897 *second_code = UNKNOWN; 13898 13899 /* The fcomi comparison sets flags as follows: 13900 13901 cmp ZF PF CF 13902 > 0 0 0 13903 < 0 0 1 13904 = 1 0 0 13905 un 1 1 1 */ 13906 13907 switch (code) 13908 { 13909 case GT: /* GTU - CF=0 & ZF=0 */ 13910 case GE: /* GEU - CF=0 */ 13911 case ORDERED: /* PF=0 */ 13912 case UNORDERED: /* PF=1 */ 13913 case UNEQ: /* EQ - ZF=1 */ 13914 case UNLT: /* LTU - CF=1 */ 13915 case UNLE: /* LEU - CF=1 | ZF=1 */ 13916 case LTGT: /* EQ - ZF=0 */ 13917 break; 13918 case LT: /* LTU - CF=1 - fails on unordered */ 13919 *first_code = UNLT; 13920 *bypass_code = UNORDERED; 13921 break; 13922 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */ 13923 *first_code = UNLE; 13924 *bypass_code = UNORDERED; 13925 break; 13926 case EQ: /* EQ - ZF=1 - fails on unordered */ 13927 *first_code = UNEQ; 13928 *bypass_code = UNORDERED; 13929 break; 13930 case NE: /* NE - ZF=0 - fails on unordered */ 13931 *first_code = LTGT; 13932 *second_code = UNORDERED; 13933 break; 13934 case UNGE: /* GEU - CF=0 - fails on unordered */ 13935 *first_code = GE; 13936 *second_code = UNORDERED; 13937 break; 13938 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */ 13939 *first_code = GT; 13940 *second_code = UNORDERED; 13941 break; 13942 default: 13943 gcc_unreachable (); 13944 } 13945 if (!TARGET_IEEE_FP) 13946 { 13947 *second_code = UNKNOWN; 13948 *bypass_code = UNKNOWN; 13949 } 13950 } 13951 13952 /* Return cost of comparison done fcom + arithmetics operations on AX. 13953 All following functions do use number of instructions as a cost metrics. 13954 In future this should be tweaked to compute bytes for optimize_size and 13955 take into account performance of various instructions on various CPUs. */ 13956 static int 13957 ix86_fp_comparison_arithmetics_cost (enum rtx_code code) 13958 { 13959 if (!TARGET_IEEE_FP) 13960 return 4; 13961 /* The cost of code output by ix86_expand_fp_compare. */ 13962 switch (code) 13963 { 13964 case UNLE: 13965 case UNLT: 13966 case LTGT: 13967 case GT: 13968 case GE: 13969 case UNORDERED: 13970 case ORDERED: 13971 case UNEQ: 13972 return 4; 13973 break; 13974 case LT: 13975 case NE: 13976 case EQ: 13977 case UNGE: 13978 return 5; 13979 break; 13980 case LE: 13981 case UNGT: 13982 return 6; 13983 break; 13984 default: 13985 gcc_unreachable (); 13986 } 13987 } 13988 13989 /* Return cost of comparison done using fcomi operation. 13990 See ix86_fp_comparison_arithmetics_cost for the metrics. */ 13991 static int 13992 ix86_fp_comparison_fcomi_cost (enum rtx_code code) 13993 { 13994 enum rtx_code bypass_code, first_code, second_code; 13995 /* Return arbitrarily high cost when instruction is not supported - this 13996 prevents gcc from using it. */ 13997 if (!TARGET_CMOVE) 13998 return 1024; 13999 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code); 14000 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 2; 14001 } 14002 14003 /* Return cost of comparison done using sahf operation. 14004 See ix86_fp_comparison_arithmetics_cost for the metrics. */ 14005 static int 14006 ix86_fp_comparison_sahf_cost (enum rtx_code code) 14007 { 14008 enum rtx_code bypass_code, first_code, second_code; 14009 /* Return arbitrarily high cost when instruction is not preferred - this 14010 avoids gcc from using it. */ 14011 if (!(TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ()))) 14012 return 1024; 14013 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code); 14014 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 3; 14015 } 14016 14017 /* Compute cost of the comparison done using any method. 14018 See ix86_fp_comparison_arithmetics_cost for the metrics. */ 14019 static int 14020 ix86_fp_comparison_cost (enum rtx_code code) 14021 { 14022 int fcomi_cost, sahf_cost, arithmetics_cost = 1024; 14023 int min; 14024 14025 fcomi_cost = ix86_fp_comparison_fcomi_cost (code); 14026 sahf_cost = ix86_fp_comparison_sahf_cost (code); 14027 14028 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code); 14029 if (min > sahf_cost) 14030 min = sahf_cost; 14031 if (min > fcomi_cost) 14032 min = fcomi_cost; 14033 return min; 14034 } 14035 14036 /* Return true if we should use an FCOMI instruction for this 14037 fp comparison. */ 14038 14039 int 14040 ix86_use_fcomi_compare (enum rtx_code code ATTRIBUTE_UNUSED) 14041 { 14042 enum rtx_code swapped_code = swap_condition (code); 14043 14044 return ((ix86_fp_comparison_cost (code) 14045 == ix86_fp_comparison_fcomi_cost (code)) 14046 || (ix86_fp_comparison_cost (swapped_code) 14047 == ix86_fp_comparison_fcomi_cost (swapped_code))); 14048 } 14049 14050 /* Swap, force into registers, or otherwise massage the two operands 14051 to a fp comparison. The operands are updated in place; the new 14052 comparison code is returned. */ 14053 14054 static enum rtx_code 14055 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1) 14056 { 14057 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code); 14058 rtx op0 = *pop0, op1 = *pop1; 14059 enum machine_mode op_mode = GET_MODE (op0); 14060 int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode); 14061 14062 /* All of the unordered compare instructions only work on registers. 14063 The same is true of the fcomi compare instructions. The XFmode 14064 compare instructions require registers except when comparing 14065 against zero or when converting operand 1 from fixed point to 14066 floating point. */ 14067 14068 if (!is_sse 14069 && (fpcmp_mode == CCFPUmode 14070 || (op_mode == XFmode 14071 && ! (standard_80387_constant_p (op0) == 1 14072 || standard_80387_constant_p (op1) == 1) 14073 && GET_CODE (op1) != FLOAT) 14074 || ix86_use_fcomi_compare (code))) 14075 { 14076 op0 = force_reg (op_mode, op0); 14077 op1 = force_reg (op_mode, op1); 14078 } 14079 else 14080 { 14081 /* %%% We only allow op1 in memory; op0 must be st(0). So swap 14082 things around if they appear profitable, otherwise force op0 14083 into a register. */ 14084 14085 if (standard_80387_constant_p (op0) == 0 14086 || (MEM_P (op0) 14087 && ! (standard_80387_constant_p (op1) == 0 14088 || MEM_P (op1)))) 14089 { 14090 rtx tmp; 14091 tmp = op0, op0 = op1, op1 = tmp; 14092 code = swap_condition (code); 14093 } 14094 14095 if (!REG_P (op0)) 14096 op0 = force_reg (op_mode, op0); 14097 14098 if (CONSTANT_P (op1)) 14099 { 14100 int tmp = standard_80387_constant_p (op1); 14101 if (tmp == 0) 14102 op1 = validize_mem (force_const_mem (op_mode, op1)); 14103 else if (tmp == 1) 14104 { 14105 if (TARGET_CMOVE) 14106 op1 = force_reg (op_mode, op1); 14107 } 14108 else 14109 op1 = force_reg (op_mode, op1); 14110 } 14111 } 14112 14113 /* Try to rearrange the comparison to make it cheaper. */ 14114 if (ix86_fp_comparison_cost (code) 14115 > ix86_fp_comparison_cost (swap_condition (code)) 14116 && (REG_P (op1) || can_create_pseudo_p ())) 14117 { 14118 rtx tmp; 14119 tmp = op0, op0 = op1, op1 = tmp; 14120 code = swap_condition (code); 14121 if (!REG_P (op0)) 14122 op0 = force_reg (op_mode, op0); 14123 } 14124 14125 *pop0 = op0; 14126 *pop1 = op1; 14127 return code; 14128 } 14129 14130 /* Convert comparison codes we use to represent FP comparison to integer 14131 code that will result in proper branch. Return UNKNOWN if no such code 14132 is available. */ 14133 14134 enum rtx_code 14135 ix86_fp_compare_code_to_integer (enum rtx_code code) 14136 { 14137 switch (code) 14138 { 14139 case GT: 14140 return GTU; 14141 case GE: 14142 return GEU; 14143 case ORDERED: 14144 case UNORDERED: 14145 return code; 14146 break; 14147 case UNEQ: 14148 return EQ; 14149 break; 14150 case UNLT: 14151 return LTU; 14152 break; 14153 case UNLE: 14154 return LEU; 14155 break; 14156 case LTGT: 14157 return NE; 14158 break; 14159 default: 14160 return UNKNOWN; 14161 } 14162 } 14163 14164 /* Generate insn patterns to do a floating point compare of OPERANDS. */ 14165 14166 static rtx 14167 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch, 14168 rtx *second_test, rtx *bypass_test) 14169 { 14170 enum machine_mode fpcmp_mode, intcmp_mode; 14171 rtx tmp, tmp2; 14172 int cost = ix86_fp_comparison_cost (code); 14173 enum rtx_code bypass_code, first_code, second_code; 14174 14175 fpcmp_mode = ix86_fp_compare_mode (code); 14176 code = ix86_prepare_fp_compare_args (code, &op0, &op1); 14177 14178 if (second_test) 14179 *second_test = NULL_RTX; 14180 if (bypass_test) 14181 *bypass_test = NULL_RTX; 14182 14183 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code); 14184 14185 /* Do fcomi/sahf based test when profitable. */ 14186 if (ix86_fp_comparison_arithmetics_cost (code) > cost 14187 && (bypass_code == UNKNOWN || bypass_test) 14188 && (second_code == UNKNOWN || second_test)) 14189 { 14190 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1); 14191 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG), 14192 tmp); 14193 if (TARGET_CMOVE) 14194 emit_insn (tmp); 14195 else 14196 { 14197 gcc_assert (TARGET_SAHF); 14198 14199 if (!scratch) 14200 scratch = gen_reg_rtx (HImode); 14201 tmp2 = gen_rtx_CLOBBER (VOIDmode, scratch); 14202 14203 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, tmp2))); 14204 } 14205 14206 /* The FP codes work out to act like unsigned. */ 14207 intcmp_mode = fpcmp_mode; 14208 code = first_code; 14209 if (bypass_code != UNKNOWN) 14210 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode, 14211 gen_rtx_REG (intcmp_mode, FLAGS_REG), 14212 const0_rtx); 14213 if (second_code != UNKNOWN) 14214 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode, 14215 gen_rtx_REG (intcmp_mode, FLAGS_REG), 14216 const0_rtx); 14217 } 14218 else 14219 { 14220 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */ 14221 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1); 14222 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW); 14223 if (!scratch) 14224 scratch = gen_reg_rtx (HImode); 14225 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2)); 14226 14227 /* In the unordered case, we have to check C2 for NaN's, which 14228 doesn't happen to work out to anything nice combination-wise. 14229 So do some bit twiddling on the value we've got in AH to come 14230 up with an appropriate set of condition codes. */ 14231 14232 intcmp_mode = CCNOmode; 14233 switch (code) 14234 { 14235 case GT: 14236 case UNGT: 14237 if (code == GT || !TARGET_IEEE_FP) 14238 { 14239 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45))); 14240 code = EQ; 14241 } 14242 else 14243 { 14244 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45))); 14245 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx)); 14246 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44))); 14247 intcmp_mode = CCmode; 14248 code = GEU; 14249 } 14250 break; 14251 case LT: 14252 case UNLT: 14253 if (code == LT && TARGET_IEEE_FP) 14254 { 14255 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45))); 14256 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01))); 14257 intcmp_mode = CCmode; 14258 code = EQ; 14259 } 14260 else 14261 { 14262 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01))); 14263 code = NE; 14264 } 14265 break; 14266 case GE: 14267 case UNGE: 14268 if (code == GE || !TARGET_IEEE_FP) 14269 { 14270 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05))); 14271 code = EQ; 14272 } 14273 else 14274 { 14275 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45))); 14276 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch, 14277 GEN_INT (0x01))); 14278 code = NE; 14279 } 14280 break; 14281 case LE: 14282 case UNLE: 14283 if (code == LE && TARGET_IEEE_FP) 14284 { 14285 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45))); 14286 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx)); 14287 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40))); 14288 intcmp_mode = CCmode; 14289 code = LTU; 14290 } 14291 else 14292 { 14293 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45))); 14294 code = NE; 14295 } 14296 break; 14297 case EQ: 14298 case UNEQ: 14299 if (code == EQ && TARGET_IEEE_FP) 14300 { 14301 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45))); 14302 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40))); 14303 intcmp_mode = CCmode; 14304 code = EQ; 14305 } 14306 else 14307 { 14308 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40))); 14309 code = NE; 14310 break; 14311 } 14312 break; 14313 case NE: 14314 case LTGT: 14315 if (code == NE && TARGET_IEEE_FP) 14316 { 14317 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45))); 14318 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch, 14319 GEN_INT (0x40))); 14320 code = NE; 14321 } 14322 else 14323 { 14324 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40))); 14325 code = EQ; 14326 } 14327 break; 14328 14329 case UNORDERED: 14330 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04))); 14331 code = NE; 14332 break; 14333 case ORDERED: 14334 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04))); 14335 code = EQ; 14336 break; 14337 14338 default: 14339 gcc_unreachable (); 14340 } 14341 } 14342 14343 /* Return the test that should be put into the flags user, i.e. 14344 the bcc, scc, or cmov instruction. */ 14345 return gen_rtx_fmt_ee (code, VOIDmode, 14346 gen_rtx_REG (intcmp_mode, FLAGS_REG), 14347 const0_rtx); 14348 } 14349 14350 rtx 14351 ix86_expand_compare (enum rtx_code code, rtx *second_test, rtx *bypass_test) 14352 { 14353 rtx op0, op1, ret; 14354 op0 = ix86_compare_op0; 14355 op1 = ix86_compare_op1; 14356 14357 if (second_test) 14358 *second_test = NULL_RTX; 14359 if (bypass_test) 14360 *bypass_test = NULL_RTX; 14361 14362 if (ix86_compare_emitted) 14363 { 14364 ret = gen_rtx_fmt_ee (code, VOIDmode, ix86_compare_emitted, const0_rtx); 14365 ix86_compare_emitted = NULL_RTX; 14366 } 14367 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0))) 14368 { 14369 gcc_assert (!DECIMAL_FLOAT_MODE_P (GET_MODE (op0))); 14370 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX, 14371 second_test, bypass_test); 14372 } 14373 else 14374 ret = ix86_expand_int_compare (code, op0, op1); 14375 14376 return ret; 14377 } 14378 14379 /* Return true if the CODE will result in nontrivial jump sequence. */ 14380 bool 14381 ix86_fp_jump_nontrivial_p (enum rtx_code code) 14382 { 14383 enum rtx_code bypass_code, first_code, second_code; 14384 if (!TARGET_CMOVE) 14385 return true; 14386 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code); 14387 return bypass_code != UNKNOWN || second_code != UNKNOWN; 14388 } 14389 14390 void 14391 ix86_expand_branch (enum rtx_code code, rtx label) 14392 { 14393 rtx tmp; 14394 14395 /* If we have emitted a compare insn, go straight to simple. 14396 ix86_expand_compare won't emit anything if ix86_compare_emitted 14397 is non NULL. */ 14398 if (ix86_compare_emitted) 14399 goto simple; 14400 14401 switch (GET_MODE (ix86_compare_op0)) 14402 { 14403 case QImode: 14404 case HImode: 14405 case SImode: 14406 simple: 14407 tmp = ix86_expand_compare (code, NULL, NULL); 14408 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp, 14409 gen_rtx_LABEL_REF (VOIDmode, label), 14410 pc_rtx); 14411 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp)); 14412 return; 14413 14414 case SFmode: 14415 case DFmode: 14416 case XFmode: 14417 { 14418 rtvec vec; 14419 int use_fcomi; 14420 enum rtx_code bypass_code, first_code, second_code; 14421 14422 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0, 14423 &ix86_compare_op1); 14424 14425 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code); 14426 14427 /* Check whether we will use the natural sequence with one jump. If 14428 so, we can expand jump early. Otherwise delay expansion by 14429 creating compound insn to not confuse optimizers. */ 14430 if (bypass_code == UNKNOWN && second_code == UNKNOWN) 14431 { 14432 ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1, 14433 gen_rtx_LABEL_REF (VOIDmode, label), 14434 pc_rtx, NULL_RTX, NULL_RTX); 14435 } 14436 else 14437 { 14438 tmp = gen_rtx_fmt_ee (code, VOIDmode, 14439 ix86_compare_op0, ix86_compare_op1); 14440 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp, 14441 gen_rtx_LABEL_REF (VOIDmode, label), 14442 pc_rtx); 14443 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp); 14444 14445 use_fcomi = ix86_use_fcomi_compare (code); 14446 vec = rtvec_alloc (3 + !use_fcomi); 14447 RTVEC_ELT (vec, 0) = tmp; 14448 RTVEC_ELT (vec, 1) 14449 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, FPSR_REG)); 14450 RTVEC_ELT (vec, 2) 14451 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, FLAGS_REG)); 14452 if (! use_fcomi) 14453 RTVEC_ELT (vec, 3) 14454 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode)); 14455 14456 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec)); 14457 } 14458 return; 14459 } 14460 14461 case DImode: 14462 if (TARGET_64BIT) 14463 goto simple; 14464 case TImode: 14465 /* Expand DImode branch into multiple compare+branch. */ 14466 { 14467 rtx lo[2], hi[2], label2; 14468 enum rtx_code code1, code2, code3; 14469 enum machine_mode submode; 14470 14471 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1)) 14472 { 14473 tmp = ix86_compare_op0; 14474 ix86_compare_op0 = ix86_compare_op1; 14475 ix86_compare_op1 = tmp; 14476 code = swap_condition (code); 14477 } 14478 if (GET_MODE (ix86_compare_op0) == DImode) 14479 { 14480 split_di (&ix86_compare_op0, 1, lo+0, hi+0); 14481 split_di (&ix86_compare_op1, 1, lo+1, hi+1); 14482 submode = SImode; 14483 } 14484 else 14485 { 14486 split_ti (&ix86_compare_op0, 1, lo+0, hi+0); 14487 split_ti (&ix86_compare_op1, 1, lo+1, hi+1); 14488 submode = DImode; 14489 } 14490 14491 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to 14492 avoid two branches. This costs one extra insn, so disable when 14493 optimizing for size. */ 14494 14495 if ((code == EQ || code == NE) 14496 && (!optimize_insn_for_size_p () 14497 || hi[1] == const0_rtx || lo[1] == const0_rtx)) 14498 { 14499 rtx xor0, xor1; 14500 14501 xor1 = hi[0]; 14502 if (hi[1] != const0_rtx) 14503 xor1 = expand_binop (submode, xor_optab, xor1, hi[1], 14504 NULL_RTX, 0, OPTAB_WIDEN); 14505 14506 xor0 = lo[0]; 14507 if (lo[1] != const0_rtx) 14508 xor0 = expand_binop (submode, xor_optab, xor0, lo[1], 14509 NULL_RTX, 0, OPTAB_WIDEN); 14510 14511 tmp = expand_binop (submode, ior_optab, xor1, xor0, 14512 NULL_RTX, 0, OPTAB_WIDEN); 14513 14514 ix86_compare_op0 = tmp; 14515 ix86_compare_op1 = const0_rtx; 14516 ix86_expand_branch (code, label); 14517 return; 14518 } 14519 14520 /* Otherwise, if we are doing less-than or greater-or-equal-than, 14521 op1 is a constant and the low word is zero, then we can just 14522 examine the high word. Similarly for low word -1 and 14523 less-or-equal-than or greater-than. */ 14524 14525 if (CONST_INT_P (hi[1])) 14526 switch (code) 14527 { 14528 case LT: case LTU: case GE: case GEU: 14529 if (lo[1] == const0_rtx) 14530 { 14531 ix86_compare_op0 = hi[0]; 14532 ix86_compare_op1 = hi[1]; 14533 ix86_expand_branch (code, label); 14534 return; 14535 } 14536 break; 14537 case LE: case LEU: case GT: case GTU: 14538 if (lo[1] == constm1_rtx) 14539 { 14540 ix86_compare_op0 = hi[0]; 14541 ix86_compare_op1 = hi[1]; 14542 ix86_expand_branch (code, label); 14543 return; 14544 } 14545 break; 14546 default: 14547 break; 14548 } 14549 14550 /* Otherwise, we need two or three jumps. */ 14551 14552 label2 = gen_label_rtx (); 14553 14554 code1 = code; 14555 code2 = swap_condition (code); 14556 code3 = unsigned_condition (code); 14557 14558 switch (code) 14559 { 14560 case LT: case GT: case LTU: case GTU: 14561 break; 14562 14563 case LE: code1 = LT; code2 = GT; break; 14564 case GE: code1 = GT; code2 = LT; break; 14565 case LEU: code1 = LTU; code2 = GTU; break; 14566 case GEU: code1 = GTU; code2 = LTU; break; 14567 14568 case EQ: code1 = UNKNOWN; code2 = NE; break; 14569 case NE: code2 = UNKNOWN; break; 14570 14571 default: 14572 gcc_unreachable (); 14573 } 14574 14575 /* 14576 * a < b => 14577 * if (hi(a) < hi(b)) goto true; 14578 * if (hi(a) > hi(b)) goto false; 14579 * if (lo(a) < lo(b)) goto true; 14580 * false: 14581 */ 14582 14583 ix86_compare_op0 = hi[0]; 14584 ix86_compare_op1 = hi[1]; 14585 14586 if (code1 != UNKNOWN) 14587 ix86_expand_branch (code1, label); 14588 if (code2 != UNKNOWN) 14589 ix86_expand_branch (code2, label2); 14590 14591 ix86_compare_op0 = lo[0]; 14592 ix86_compare_op1 = lo[1]; 14593 ix86_expand_branch (code3, label); 14594 14595 if (code2 != UNKNOWN) 14596 emit_label (label2); 14597 return; 14598 } 14599 14600 default: 14601 gcc_unreachable (); 14602 } 14603 } 14604 14605 /* Split branch based on floating point condition. */ 14606 void 14607 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2, 14608 rtx target1, rtx target2, rtx tmp, rtx pushed) 14609 { 14610 rtx second, bypass; 14611 rtx label = NULL_RTX; 14612 rtx condition; 14613 int bypass_probability = -1, second_probability = -1, probability = -1; 14614 rtx i; 14615 14616 if (target2 != pc_rtx) 14617 { 14618 rtx tmp = target2; 14619 code = reverse_condition_maybe_unordered (code); 14620 target2 = target1; 14621 target1 = tmp; 14622 } 14623 14624 condition = ix86_expand_fp_compare (code, op1, op2, 14625 tmp, &second, &bypass); 14626 14627 /* Remove pushed operand from stack. */ 14628 if (pushed) 14629 ix86_free_from_memory (GET_MODE (pushed)); 14630 14631 if (split_branch_probability >= 0) 14632 { 14633 /* Distribute the probabilities across the jumps. 14634 Assume the BYPASS and SECOND to be always test 14635 for UNORDERED. */ 14636 probability = split_branch_probability; 14637 14638 /* Value of 1 is low enough to make no need for probability 14639 to be updated. Later we may run some experiments and see 14640 if unordered values are more frequent in practice. */ 14641 if (bypass) 14642 bypass_probability = 1; 14643 if (second) 14644 second_probability = 1; 14645 } 14646 if (bypass != NULL_RTX) 14647 { 14648 label = gen_label_rtx (); 14649 i = emit_jump_insn (gen_rtx_SET 14650 (VOIDmode, pc_rtx, 14651 gen_rtx_IF_THEN_ELSE (VOIDmode, 14652 bypass, 14653 gen_rtx_LABEL_REF (VOIDmode, 14654 label), 14655 pc_rtx))); 14656 if (bypass_probability >= 0) 14657 REG_NOTES (i) 14658 = gen_rtx_EXPR_LIST (REG_BR_PROB, 14659 GEN_INT (bypass_probability), 14660 REG_NOTES (i)); 14661 } 14662 i = emit_jump_insn (gen_rtx_SET 14663 (VOIDmode, pc_rtx, 14664 gen_rtx_IF_THEN_ELSE (VOIDmode, 14665 condition, target1, target2))); 14666 if (probability >= 0) 14667 REG_NOTES (i) 14668 = gen_rtx_EXPR_LIST (REG_BR_PROB, 14669 GEN_INT (probability), 14670 REG_NOTES (i)); 14671 if (second != NULL_RTX) 14672 { 14673 i = emit_jump_insn (gen_rtx_SET 14674 (VOIDmode, pc_rtx, 14675 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1, 14676 target2))); 14677 if (second_probability >= 0) 14678 REG_NOTES (i) 14679 = gen_rtx_EXPR_LIST (REG_BR_PROB, 14680 GEN_INT (second_probability), 14681 REG_NOTES (i)); 14682 } 14683 if (label != NULL_RTX) 14684 emit_label (label); 14685 } 14686 14687 int 14688 ix86_expand_setcc (enum rtx_code code, rtx dest) 14689 { 14690 rtx ret, tmp, tmpreg, equiv; 14691 rtx second_test, bypass_test; 14692 14693 if (GET_MODE (ix86_compare_op0) == (TARGET_64BIT ? TImode : DImode)) 14694 return 0; /* FAIL */ 14695 14696 gcc_assert (GET_MODE (dest) == QImode); 14697 14698 ret = ix86_expand_compare (code, &second_test, &bypass_test); 14699 PUT_MODE (ret, QImode); 14700 14701 tmp = dest; 14702 tmpreg = dest; 14703 14704 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret)); 14705 if (bypass_test || second_test) 14706 { 14707 rtx test = second_test; 14708 int bypass = 0; 14709 rtx tmp2 = gen_reg_rtx (QImode); 14710 if (bypass_test) 14711 { 14712 gcc_assert (!second_test); 14713 test = bypass_test; 14714 bypass = 1; 14715 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test))); 14716 } 14717 PUT_MODE (test, QImode); 14718 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test)); 14719 14720 if (bypass) 14721 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2)); 14722 else 14723 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2)); 14724 } 14725 14726 /* Attach a REG_EQUAL note describing the comparison result. */ 14727 if (ix86_compare_op0 && ix86_compare_op1) 14728 { 14729 equiv = simplify_gen_relational (code, QImode, 14730 GET_MODE (ix86_compare_op0), 14731 ix86_compare_op0, ix86_compare_op1); 14732 set_unique_reg_note (get_last_insn (), REG_EQUAL, equiv); 14733 } 14734 14735 return 1; /* DONE */ 14736 } 14737 14738 /* Expand comparison setting or clearing carry flag. Return true when 14739 successful and set pop for the operation. */ 14740 static bool 14741 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop) 14742 { 14743 enum machine_mode mode = 14744 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1); 14745 14746 /* Do not handle DImode compares that go through special path. */ 14747 if (mode == (TARGET_64BIT ? TImode : DImode)) 14748 return false; 14749 14750 if (SCALAR_FLOAT_MODE_P (mode)) 14751 { 14752 rtx second_test = NULL, bypass_test = NULL; 14753 rtx compare_op, compare_seq; 14754 14755 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode)); 14756 14757 /* Shortcut: following common codes never translate 14758 into carry flag compares. */ 14759 if (code == EQ || code == NE || code == UNEQ || code == LTGT 14760 || code == ORDERED || code == UNORDERED) 14761 return false; 14762 14763 /* These comparisons require zero flag; swap operands so they won't. */ 14764 if ((code == GT || code == UNLE || code == LE || code == UNGT) 14765 && !TARGET_IEEE_FP) 14766 { 14767 rtx tmp = op0; 14768 op0 = op1; 14769 op1 = tmp; 14770 code = swap_condition (code); 14771 } 14772 14773 /* Try to expand the comparison and verify that we end up with 14774 carry flag based comparison. This fails to be true only when 14775 we decide to expand comparison using arithmetic that is not 14776 too common scenario. */ 14777 start_sequence (); 14778 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX, 14779 &second_test, &bypass_test); 14780 compare_seq = get_insns (); 14781 end_sequence (); 14782 14783 if (second_test || bypass_test) 14784 return false; 14785 14786 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode 14787 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode) 14788 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op)); 14789 else 14790 code = GET_CODE (compare_op); 14791 14792 if (code != LTU && code != GEU) 14793 return false; 14794 14795 emit_insn (compare_seq); 14796 *pop = compare_op; 14797 return true; 14798 } 14799 14800 if (!INTEGRAL_MODE_P (mode)) 14801 return false; 14802 14803 switch (code) 14804 { 14805 case LTU: 14806 case GEU: 14807 break; 14808 14809 /* Convert a==0 into (unsigned)a<1. */ 14810 case EQ: 14811 case NE: 14812 if (op1 != const0_rtx) 14813 return false; 14814 op1 = const1_rtx; 14815 code = (code == EQ ? LTU : GEU); 14816 break; 14817 14818 /* Convert a>b into b<a or a>=b-1. */ 14819 case GTU: 14820 case LEU: 14821 if (CONST_INT_P (op1)) 14822 { 14823 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0)); 14824 /* Bail out on overflow. We still can swap operands but that 14825 would force loading of the constant into register. */ 14826 if (op1 == const0_rtx 14827 || !x86_64_immediate_operand (op1, GET_MODE (op1))) 14828 return false; 14829 code = (code == GTU ? GEU : LTU); 14830 } 14831 else 14832 { 14833 rtx tmp = op1; 14834 op1 = op0; 14835 op0 = tmp; 14836 code = (code == GTU ? LTU : GEU); 14837 } 14838 break; 14839 14840 /* Convert a>=0 into (unsigned)a<0x80000000. */ 14841 case LT: 14842 case GE: 14843 if (mode == DImode || op1 != const0_rtx) 14844 return false; 14845 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode); 14846 code = (code == LT ? GEU : LTU); 14847 break; 14848 case LE: 14849 case GT: 14850 if (mode == DImode || op1 != constm1_rtx) 14851 return false; 14852 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode); 14853 code = (code == LE ? GEU : LTU); 14854 break; 14855 14856 default: 14857 return false; 14858 } 14859 /* Swapping operands may cause constant to appear as first operand. */ 14860 if (!nonimmediate_operand (op0, VOIDmode)) 14861 { 14862 if (!can_create_pseudo_p ()) 14863 return false; 14864 op0 = force_reg (mode, op0); 14865 } 14866 ix86_compare_op0 = op0; 14867 ix86_compare_op1 = op1; 14868 *pop = ix86_expand_compare (code, NULL, NULL); 14869 gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU); 14870 return true; 14871 } 14872 14873 int 14874 ix86_expand_int_movcc (rtx operands[]) 14875 { 14876 enum rtx_code code = GET_CODE (operands[1]), compare_code; 14877 rtx compare_seq, compare_op; 14878 rtx second_test, bypass_test; 14879 enum machine_mode mode = GET_MODE (operands[0]); 14880 bool sign_bit_compare_p = false;; 14881 14882 start_sequence (); 14883 compare_op = ix86_expand_compare (code, &second_test, &bypass_test); 14884 compare_seq = get_insns (); 14885 end_sequence (); 14886 14887 compare_code = GET_CODE (compare_op); 14888 14889 if ((ix86_compare_op1 == const0_rtx && (code == GE || code == LT)) 14890 || (ix86_compare_op1 == constm1_rtx && (code == GT || code == LE))) 14891 sign_bit_compare_p = true; 14892 14893 /* Don't attempt mode expansion here -- if we had to expand 5 or 6 14894 HImode insns, we'd be swallowed in word prefix ops. */ 14895 14896 if ((mode != HImode || TARGET_FAST_PREFIX) 14897 && (mode != (TARGET_64BIT ? TImode : DImode)) 14898 && CONST_INT_P (operands[2]) 14899 && CONST_INT_P (operands[3])) 14900 { 14901 rtx out = operands[0]; 14902 HOST_WIDE_INT ct = INTVAL (operands[2]); 14903 HOST_WIDE_INT cf = INTVAL (operands[3]); 14904 HOST_WIDE_INT diff; 14905 14906 diff = ct - cf; 14907 /* Sign bit compares are better done using shifts than we do by using 14908 sbb. */ 14909 if (sign_bit_compare_p 14910 || ix86_expand_carry_flag_compare (code, ix86_compare_op0, 14911 ix86_compare_op1, &compare_op)) 14912 { 14913 /* Detect overlap between destination and compare sources. */ 14914 rtx tmp = out; 14915 14916 if (!sign_bit_compare_p) 14917 { 14918 bool fpcmp = false; 14919 14920 compare_code = GET_CODE (compare_op); 14921 14922 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode 14923 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode) 14924 { 14925 fpcmp = true; 14926 compare_code = ix86_fp_compare_code_to_integer (compare_code); 14927 } 14928 14929 /* To simplify rest of code, restrict to the GEU case. */ 14930 if (compare_code == LTU) 14931 { 14932 HOST_WIDE_INT tmp = ct; 14933 ct = cf; 14934 cf = tmp; 14935 compare_code = reverse_condition (compare_code); 14936 code = reverse_condition (code); 14937 } 14938 else 14939 { 14940 if (fpcmp) 14941 PUT_CODE (compare_op, 14942 reverse_condition_maybe_unordered 14943 (GET_CODE (compare_op))); 14944 else 14945 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op))); 14946 } 14947 diff = ct - cf; 14948 14949 if (reg_overlap_mentioned_p (out, ix86_compare_op0) 14950 || reg_overlap_mentioned_p (out, ix86_compare_op1)) 14951 tmp = gen_reg_rtx (mode); 14952 14953 if (mode == DImode) 14954 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp, compare_op)); 14955 else 14956 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp), compare_op)); 14957 } 14958 else 14959 { 14960 if (code == GT || code == GE) 14961 code = reverse_condition (code); 14962 else 14963 { 14964 HOST_WIDE_INT tmp = ct; 14965 ct = cf; 14966 cf = tmp; 14967 diff = ct - cf; 14968 } 14969 tmp = emit_store_flag (tmp, code, ix86_compare_op0, 14970 ix86_compare_op1, VOIDmode, 0, -1); 14971 } 14972 14973 if (diff == 1) 14974 { 14975 /* 14976 * cmpl op0,op1 14977 * sbbl dest,dest 14978 * [addl dest, ct] 14979 * 14980 * Size 5 - 8. 14981 */ 14982 if (ct) 14983 tmp = expand_simple_binop (mode, PLUS, 14984 tmp, GEN_INT (ct), 14985 copy_rtx (tmp), 1, OPTAB_DIRECT); 14986 } 14987 else if (cf == -1) 14988 { 14989 /* 14990 * cmpl op0,op1 14991 * sbbl dest,dest 14992 * orl $ct, dest 14993 * 14994 * Size 8. 14995 */ 14996 tmp = expand_simple_binop (mode, IOR, 14997 tmp, GEN_INT (ct), 14998 copy_rtx (tmp), 1, OPTAB_DIRECT); 14999 } 15000 else if (diff == -1 && ct) 15001 { 15002 /* 15003 * cmpl op0,op1 15004 * sbbl dest,dest 15005 * notl dest 15006 * [addl dest, cf] 15007 * 15008 * Size 8 - 11. 15009 */ 15010 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1); 15011 if (cf) 15012 tmp = expand_simple_binop (mode, PLUS, 15013 copy_rtx (tmp), GEN_INT (cf), 15014 copy_rtx (tmp), 1, OPTAB_DIRECT); 15015 } 15016 else 15017 { 15018 /* 15019 * cmpl op0,op1 15020 * sbbl dest,dest 15021 * [notl dest] 15022 * andl cf - ct, dest 15023 * [addl dest, ct] 15024 * 15025 * Size 8 - 11. 15026 */ 15027 15028 if (cf == 0) 15029 { 15030 cf = ct; 15031 ct = 0; 15032 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1); 15033 } 15034 15035 tmp = expand_simple_binop (mode, AND, 15036 copy_rtx (tmp), 15037 gen_int_mode (cf - ct, mode), 15038 copy_rtx (tmp), 1, OPTAB_DIRECT); 15039 if (ct) 15040 tmp = expand_simple_binop (mode, PLUS, 15041 copy_rtx (tmp), GEN_INT (ct), 15042 copy_rtx (tmp), 1, OPTAB_DIRECT); 15043 } 15044 15045 if (!rtx_equal_p (tmp, out)) 15046 emit_move_insn (copy_rtx (out), copy_rtx (tmp)); 15047 15048 return 1; /* DONE */ 15049 } 15050 15051 if (diff < 0) 15052 { 15053 enum machine_mode cmp_mode = GET_MODE (ix86_compare_op0); 15054 15055 HOST_WIDE_INT tmp; 15056 tmp = ct, ct = cf, cf = tmp; 15057 diff = -diff; 15058 15059 if (SCALAR_FLOAT_MODE_P (cmp_mode)) 15060 { 15061 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode)); 15062 15063 /* We may be reversing unordered compare to normal compare, that 15064 is not valid in general (we may convert non-trapping condition 15065 to trapping one), however on i386 we currently emit all 15066 comparisons unordered. */ 15067 compare_code = reverse_condition_maybe_unordered (compare_code); 15068 code = reverse_condition_maybe_unordered (code); 15069 } 15070 else 15071 { 15072 compare_code = reverse_condition (compare_code); 15073 code = reverse_condition (code); 15074 } 15075 } 15076 15077 compare_code = UNKNOWN; 15078 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT 15079 && CONST_INT_P (ix86_compare_op1)) 15080 { 15081 if (ix86_compare_op1 == const0_rtx 15082 && (code == LT || code == GE)) 15083 compare_code = code; 15084 else if (ix86_compare_op1 == constm1_rtx) 15085 { 15086 if (code == LE) 15087 compare_code = LT; 15088 else if (code == GT) 15089 compare_code = GE; 15090 } 15091 } 15092 15093 /* Optimize dest = (op0 < 0) ? -1 : cf. */ 15094 if (compare_code != UNKNOWN 15095 && GET_MODE (ix86_compare_op0) == GET_MODE (out) 15096 && (cf == -1 || ct == -1)) 15097 { 15098 /* If lea code below could be used, only optimize 15099 if it results in a 2 insn sequence. */ 15100 15101 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8 15102 || diff == 3 || diff == 5 || diff == 9) 15103 || (compare_code == LT && ct == -1) 15104 || (compare_code == GE && cf == -1)) 15105 { 15106 /* 15107 * notl op1 (if necessary) 15108 * sarl $31, op1 15109 * orl cf, op1 15110 */ 15111 if (ct != -1) 15112 { 15113 cf = ct; 15114 ct = -1; 15115 code = reverse_condition (code); 15116 } 15117 15118 out = emit_store_flag (out, code, ix86_compare_op0, 15119 ix86_compare_op1, VOIDmode, 0, -1); 15120 15121 out = expand_simple_binop (mode, IOR, 15122 out, GEN_INT (cf), 15123 out, 1, OPTAB_DIRECT); 15124 if (out != operands[0]) 15125 emit_move_insn (operands[0], out); 15126 15127 return 1; /* DONE */ 15128 } 15129 } 15130 15131 15132 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8 15133 || diff == 3 || diff == 5 || diff == 9) 15134 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL) 15135 && (mode != DImode 15136 || x86_64_immediate_operand (GEN_INT (cf), VOIDmode))) 15137 { 15138 /* 15139 * xorl dest,dest 15140 * cmpl op1,op2 15141 * setcc dest 15142 * lea cf(dest*(ct-cf)),dest 15143 * 15144 * Size 14. 15145 * 15146 * This also catches the degenerate setcc-only case. 15147 */ 15148 15149 rtx tmp; 15150 int nops; 15151 15152 out = emit_store_flag (out, code, ix86_compare_op0, 15153 ix86_compare_op1, VOIDmode, 0, 1); 15154 15155 nops = 0; 15156 /* On x86_64 the lea instruction operates on Pmode, so we need 15157 to get arithmetics done in proper mode to match. */ 15158 if (diff == 1) 15159 tmp = copy_rtx (out); 15160 else 15161 { 15162 rtx out1; 15163 out1 = copy_rtx (out); 15164 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1)); 15165 nops++; 15166 if (diff & 1) 15167 { 15168 tmp = gen_rtx_PLUS (mode, tmp, out1); 15169 nops++; 15170 } 15171 } 15172 if (cf != 0) 15173 { 15174 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf)); 15175 nops++; 15176 } 15177 if (!rtx_equal_p (tmp, out)) 15178 { 15179 if (nops == 1) 15180 out = force_operand (tmp, copy_rtx (out)); 15181 else 15182 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp))); 15183 } 15184 if (!rtx_equal_p (out, operands[0])) 15185 emit_move_insn (operands[0], copy_rtx (out)); 15186 15187 return 1; /* DONE */ 15188 } 15189 15190 /* 15191 * General case: Jumpful: 15192 * xorl dest,dest cmpl op1, op2 15193 * cmpl op1, op2 movl ct, dest 15194 * setcc dest jcc 1f 15195 * decl dest movl cf, dest 15196 * andl (cf-ct),dest 1: 15197 * addl ct,dest 15198 * 15199 * Size 20. Size 14. 15200 * 15201 * This is reasonably steep, but branch mispredict costs are 15202 * high on modern cpus, so consider failing only if optimizing 15203 * for space. 15204 */ 15205 15206 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL)) 15207 && BRANCH_COST (optimize_insn_for_speed_p (), 15208 false) >= 2) 15209 { 15210 if (cf == 0) 15211 { 15212 enum machine_mode cmp_mode = GET_MODE (ix86_compare_op0); 15213 15214 cf = ct; 15215 ct = 0; 15216 15217 if (SCALAR_FLOAT_MODE_P (cmp_mode)) 15218 { 15219 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode)); 15220 15221 /* We may be reversing unordered compare to normal compare, 15222 that is not valid in general (we may convert non-trapping 15223 condition to trapping one), however on i386 we currently 15224 emit all comparisons unordered. */ 15225 code = reverse_condition_maybe_unordered (code); 15226 } 15227 else 15228 { 15229 code = reverse_condition (code); 15230 if (compare_code != UNKNOWN) 15231 compare_code = reverse_condition (compare_code); 15232 } 15233 } 15234 15235 if (compare_code != UNKNOWN) 15236 { 15237 /* notl op1 (if needed) 15238 sarl $31, op1 15239 andl (cf-ct), op1 15240 addl ct, op1 15241 15242 For x < 0 (resp. x <= -1) there will be no notl, 15243 so if possible swap the constants to get rid of the 15244 complement. 15245 True/false will be -1/0 while code below (store flag 15246 followed by decrement) is 0/-1, so the constants need 15247 to be exchanged once more. */ 15248 15249 if (compare_code == GE || !cf) 15250 { 15251 code = reverse_condition (code); 15252 compare_code = LT; 15253 } 15254 else 15255 { 15256 HOST_WIDE_INT tmp = cf; 15257 cf = ct; 15258 ct = tmp; 15259 } 15260 15261 out = emit_store_flag (out, code, ix86_compare_op0, 15262 ix86_compare_op1, VOIDmode, 0, -1); 15263 } 15264 else 15265 { 15266 out = emit_store_flag (out, code, ix86_compare_op0, 15267 ix86_compare_op1, VOIDmode, 0, 1); 15268 15269 out = expand_simple_binop (mode, PLUS, copy_rtx (out), constm1_rtx, 15270 copy_rtx (out), 1, OPTAB_DIRECT); 15271 } 15272 15273 out = expand_simple_binop (mode, AND, copy_rtx (out), 15274 gen_int_mode (cf - ct, mode), 15275 copy_rtx (out), 1, OPTAB_DIRECT); 15276 if (ct) 15277 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct), 15278 copy_rtx (out), 1, OPTAB_DIRECT); 15279 if (!rtx_equal_p (out, operands[0])) 15280 emit_move_insn (operands[0], copy_rtx (out)); 15281 15282 return 1; /* DONE */ 15283 } 15284 } 15285 15286 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL)) 15287 { 15288 /* Try a few things more with specific constants and a variable. */ 15289 15290 optab op; 15291 rtx var, orig_out, out, tmp; 15292 15293 if (BRANCH_COST (optimize_insn_for_speed_p (), false) <= 2) 15294 return 0; /* FAIL */ 15295 15296 /* If one of the two operands is an interesting constant, load a 15297 constant with the above and mask it in with a logical operation. */ 15298 15299 if (CONST_INT_P (operands[2])) 15300 { 15301 var = operands[3]; 15302 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx) 15303 operands[3] = constm1_rtx, op = and_optab; 15304 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx) 15305 operands[3] = const0_rtx, op = ior_optab; 15306 else 15307 return 0; /* FAIL */ 15308 } 15309 else if (CONST_INT_P (operands[3])) 15310 { 15311 var = operands[2]; 15312 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx) 15313 operands[2] = constm1_rtx, op = and_optab; 15314 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx) 15315 operands[2] = const0_rtx, op = ior_optab; 15316 else 15317 return 0; /* FAIL */ 15318 } 15319 else 15320 return 0; /* FAIL */ 15321 15322 orig_out = operands[0]; 15323 tmp = gen_reg_rtx (mode); 15324 operands[0] = tmp; 15325 15326 /* Recurse to get the constant loaded. */ 15327 if (ix86_expand_int_movcc (operands) == 0) 15328 return 0; /* FAIL */ 15329 15330 /* Mask in the interesting variable. */ 15331 out = expand_binop (mode, op, var, tmp, orig_out, 0, 15332 OPTAB_WIDEN); 15333 if (!rtx_equal_p (out, orig_out)) 15334 emit_move_insn (copy_rtx (orig_out), copy_rtx (out)); 15335 15336 return 1; /* DONE */ 15337 } 15338 15339 /* 15340 * For comparison with above, 15341 * 15342 * movl cf,dest 15343 * movl ct,tmp 15344 * cmpl op1,op2 15345 * cmovcc tmp,dest 15346 * 15347 * Size 15. 15348 */ 15349 15350 if (! nonimmediate_operand (operands[2], mode)) 15351 operands[2] = force_reg (mode, operands[2]); 15352 if (! nonimmediate_operand (operands[3], mode)) 15353 operands[3] = force_reg (mode, operands[3]); 15354 15355 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3])) 15356 { 15357 rtx tmp = gen_reg_rtx (mode); 15358 emit_move_insn (tmp, operands[3]); 15359 operands[3] = tmp; 15360 } 15361 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2])) 15362 { 15363 rtx tmp = gen_reg_rtx (mode); 15364 emit_move_insn (tmp, operands[2]); 15365 operands[2] = tmp; 15366 } 15367 15368 if (! register_operand (operands[2], VOIDmode) 15369 && (mode == QImode 15370 || ! register_operand (operands[3], VOIDmode))) 15371 operands[2] = force_reg (mode, operands[2]); 15372 15373 if (mode == QImode 15374 && ! register_operand (operands[3], VOIDmode)) 15375 operands[3] = force_reg (mode, operands[3]); 15376 15377 emit_insn (compare_seq); 15378 emit_insn (gen_rtx_SET (VOIDmode, operands[0], 15379 gen_rtx_IF_THEN_ELSE (mode, 15380 compare_op, operands[2], 15381 operands[3]))); 15382 if (bypass_test) 15383 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]), 15384 gen_rtx_IF_THEN_ELSE (mode, 15385 bypass_test, 15386 copy_rtx (operands[3]), 15387 copy_rtx (operands[0])))); 15388 if (second_test) 15389 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]), 15390 gen_rtx_IF_THEN_ELSE (mode, 15391 second_test, 15392 copy_rtx (operands[2]), 15393 copy_rtx (operands[0])))); 15394 15395 return 1; /* DONE */ 15396 } 15397 15398 /* Swap, force into registers, or otherwise massage the two operands 15399 to an sse comparison with a mask result. Thus we differ a bit from 15400 ix86_prepare_fp_compare_args which expects to produce a flags result. 15401 15402 The DEST operand exists to help determine whether to commute commutative 15403 operators. The POP0/POP1 operands are updated in place. The new 15404 comparison code is returned, or UNKNOWN if not implementable. */ 15405 15406 static enum rtx_code 15407 ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code, 15408 rtx *pop0, rtx *pop1) 15409 { 15410 rtx tmp; 15411 15412 switch (code) 15413 { 15414 case LTGT: 15415 case UNEQ: 15416 /* We have no LTGT as an operator. We could implement it with 15417 NE & ORDERED, but this requires an extra temporary. It's 15418 not clear that it's worth it. */ 15419 return UNKNOWN; 15420 15421 case LT: 15422 case LE: 15423 case UNGT: 15424 case UNGE: 15425 /* These are supported directly. */ 15426 break; 15427 15428 case EQ: 15429 case NE: 15430 case UNORDERED: 15431 case ORDERED: 15432 /* For commutative operators, try to canonicalize the destination 15433 operand to be first in the comparison - this helps reload to 15434 avoid extra moves. */ 15435 if (!dest || !rtx_equal_p (dest, *pop1)) 15436 break; 15437 /* FALLTHRU */ 15438 15439 case GE: 15440 case GT: 15441 case UNLE: 15442 case UNLT: 15443 /* These are not supported directly. Swap the comparison operands 15444 to transform into something that is supported. */ 15445 tmp = *pop0; 15446 *pop0 = *pop1; 15447 *pop1 = tmp; 15448 code = swap_condition (code); 15449 break; 15450 15451 default: 15452 gcc_unreachable (); 15453 } 15454 15455 return code; 15456 } 15457 15458 /* Detect conditional moves that exactly match min/max operational 15459 semantics. Note that this is IEEE safe, as long as we don't 15460 interchange the operands. 15461 15462 Returns FALSE if this conditional move doesn't match a MIN/MAX, 15463 and TRUE if the operation is successful and instructions are emitted. */ 15464 15465 static bool 15466 ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0, 15467 rtx cmp_op1, rtx if_true, rtx if_false) 15468 { 15469 enum machine_mode mode; 15470 bool is_min; 15471 rtx tmp; 15472 15473 if (code == LT) 15474 ; 15475 else if (code == UNGE) 15476 { 15477 tmp = if_true; 15478 if_true = if_false; 15479 if_false = tmp; 15480 } 15481 else 15482 return false; 15483 15484 if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false)) 15485 is_min = true; 15486 else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false)) 15487 is_min = false; 15488 else 15489 return false; 15490 15491 mode = GET_MODE (dest); 15492 15493 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here, 15494 but MODE may be a vector mode and thus not appropriate. */ 15495 if (!flag_finite_math_only || !flag_unsafe_math_optimizations) 15496 { 15497 int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX; 15498 rtvec v; 15499 15500 if_true = force_reg (mode, if_true); 15501 v = gen_rtvec (2, if_true, if_false); 15502 tmp = gen_rtx_UNSPEC (mode, v, u); 15503 } 15504 else 15505 { 15506 code = is_min ? SMIN : SMAX; 15507 tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false); 15508 } 15509 15510 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp)); 15511 return true; 15512 } 15513 15514 /* Expand an sse vector comparison. Return the register with the result. */ 15515 15516 static rtx 15517 ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1, 15518 rtx op_true, rtx op_false) 15519 { 15520 enum machine_mode mode = GET_MODE (dest); 15521 rtx x; 15522 15523 cmp_op0 = force_reg (mode, cmp_op0); 15524 if (!nonimmediate_operand (cmp_op1, mode)) 15525 cmp_op1 = force_reg (mode, cmp_op1); 15526 15527 if (optimize 15528 || reg_overlap_mentioned_p (dest, op_true) 15529 || reg_overlap_mentioned_p (dest, op_false)) 15530 dest = gen_reg_rtx (mode); 15531 15532 x = gen_rtx_fmt_ee (code, mode, cmp_op0, cmp_op1); 15533 emit_insn (gen_rtx_SET (VOIDmode, dest, x)); 15534 15535 return dest; 15536 } 15537 15538 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical 15539 operations. This is used for both scalar and vector conditional moves. */ 15540 15541 static void 15542 ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false) 15543 { 15544 enum machine_mode mode = GET_MODE (dest); 15545 rtx t2, t3, x; 15546 15547 if (op_false == CONST0_RTX (mode)) 15548 { 15549 op_true = force_reg (mode, op_true); 15550 x = gen_rtx_AND (mode, cmp, op_true); 15551 emit_insn (gen_rtx_SET (VOIDmode, dest, x)); 15552 } 15553 else if (op_true == CONST0_RTX (mode)) 15554 { 15555 op_false = force_reg (mode, op_false); 15556 x = gen_rtx_NOT (mode, cmp); 15557 x = gen_rtx_AND (mode, x, op_false); 15558 emit_insn (gen_rtx_SET (VOIDmode, dest, x)); 15559 } 15560 else if (TARGET_SSE5) 15561 { 15562 rtx pcmov = gen_rtx_SET (mode, dest, 15563 gen_rtx_IF_THEN_ELSE (mode, cmp, 15564 op_true, 15565 op_false)); 15566 emit_insn (pcmov); 15567 } 15568 else 15569 { 15570 op_true = force_reg (mode, op_true); 15571 op_false = force_reg (mode, op_false); 15572 15573 t2 = gen_reg_rtx (mode); 15574 if (optimize) 15575 t3 = gen_reg_rtx (mode); 15576 else 15577 t3 = dest; 15578 15579 x = gen_rtx_AND (mode, op_true, cmp); 15580 emit_insn (gen_rtx_SET (VOIDmode, t2, x)); 15581 15582 x = gen_rtx_NOT (mode, cmp); 15583 x = gen_rtx_AND (mode, x, op_false); 15584 emit_insn (gen_rtx_SET (VOIDmode, t3, x)); 15585 15586 x = gen_rtx_IOR (mode, t3, t2); 15587 emit_insn (gen_rtx_SET (VOIDmode, dest, x)); 15588 } 15589 } 15590 15591 /* Expand a floating-point conditional move. Return true if successful. */ 15592 15593 int 15594 ix86_expand_fp_movcc (rtx operands[]) 15595 { 15596 enum machine_mode mode = GET_MODE (operands[0]); 15597 enum rtx_code code = GET_CODE (operands[1]); 15598 rtx tmp, compare_op, second_test, bypass_test; 15599 15600 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode)) 15601 { 15602 enum machine_mode cmode; 15603 15604 /* Since we've no cmove for sse registers, don't force bad register 15605 allocation just to gain access to it. Deny movcc when the 15606 comparison mode doesn't match the move mode. */ 15607 cmode = GET_MODE (ix86_compare_op0); 15608 if (cmode == VOIDmode) 15609 cmode = GET_MODE (ix86_compare_op1); 15610 if (cmode != mode) 15611 return 0; 15612 15613 code = ix86_prepare_sse_fp_compare_args (operands[0], code, 15614 &ix86_compare_op0, 15615 &ix86_compare_op1); 15616 if (code == UNKNOWN) 15617 return 0; 15618 15619 if (ix86_expand_sse_fp_minmax (operands[0], code, ix86_compare_op0, 15620 ix86_compare_op1, operands[2], 15621 operands[3])) 15622 return 1; 15623 15624 tmp = ix86_expand_sse_cmp (operands[0], code, ix86_compare_op0, 15625 ix86_compare_op1, operands[2], operands[3]); 15626 ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]); 15627 return 1; 15628 } 15629 15630 /* The floating point conditional move instructions don't directly 15631 support conditions resulting from a signed integer comparison. */ 15632 15633 compare_op = ix86_expand_compare (code, &second_test, &bypass_test); 15634 15635 /* The floating point conditional move instructions don't directly 15636 support signed integer comparisons. */ 15637 15638 if (!fcmov_comparison_operator (compare_op, VOIDmode)) 15639 { 15640 gcc_assert (!second_test && !bypass_test); 15641 tmp = gen_reg_rtx (QImode); 15642 ix86_expand_setcc (code, tmp); 15643 code = NE; 15644 ix86_compare_op0 = tmp; 15645 ix86_compare_op1 = const0_rtx; 15646 compare_op = ix86_expand_compare (code, &second_test, &bypass_test); 15647 } 15648 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3])) 15649 { 15650 tmp = gen_reg_rtx (mode); 15651 emit_move_insn (tmp, operands[3]); 15652 operands[3] = tmp; 15653 } 15654 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2])) 15655 { 15656 tmp = gen_reg_rtx (mode); 15657 emit_move_insn (tmp, operands[2]); 15658 operands[2] = tmp; 15659 } 15660 15661 emit_insn (gen_rtx_SET (VOIDmode, operands[0], 15662 gen_rtx_IF_THEN_ELSE (mode, compare_op, 15663 operands[2], operands[3]))); 15664 if (bypass_test) 15665 emit_insn (gen_rtx_SET (VOIDmode, operands[0], 15666 gen_rtx_IF_THEN_ELSE (mode, bypass_test, 15667 operands[3], operands[0]))); 15668 if (second_test) 15669 emit_insn (gen_rtx_SET (VOIDmode, operands[0], 15670 gen_rtx_IF_THEN_ELSE (mode, second_test, 15671 operands[2], operands[0]))); 15672 15673 return 1; 15674 } 15675 15676 /* Expand a floating-point vector conditional move; a vcond operation 15677 rather than a movcc operation. */ 15678 15679 bool 15680 ix86_expand_fp_vcond (rtx operands[]) 15681 { 15682 enum rtx_code code = GET_CODE (operands[3]); 15683 rtx cmp; 15684 15685 code = ix86_prepare_sse_fp_compare_args (operands[0], code, 15686 &operands[4], &operands[5]); 15687 if (code == UNKNOWN) 15688 return false; 15689 15690 if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4], 15691 operands[5], operands[1], operands[2])) 15692 return true; 15693 15694 cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5], 15695 operands[1], operands[2]); 15696 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]); 15697 return true; 15698 } 15699 15700 /* Expand a signed/unsigned integral vector conditional move. */ 15701 15702 bool 15703 ix86_expand_int_vcond (rtx operands[]) 15704 { 15705 enum machine_mode mode = GET_MODE (operands[0]); 15706 enum rtx_code code = GET_CODE (operands[3]); 15707 bool negate = false; 15708 rtx x, cop0, cop1; 15709 15710 cop0 = operands[4]; 15711 cop1 = operands[5]; 15712 15713 /* SSE5 supports all of the comparisons on all vector int types. */ 15714 if (!TARGET_SSE5) 15715 { 15716 /* Canonicalize the comparison to EQ, GT, GTU. */ 15717 switch (code) 15718 { 15719 case EQ: 15720 case GT: 15721 case GTU: 15722 break; 15723 15724 case NE: 15725 case LE: 15726 case LEU: 15727 code = reverse_condition (code); 15728 negate = true; 15729 break; 15730 15731 case GE: 15732 case GEU: 15733 code = reverse_condition (code); 15734 negate = true; 15735 /* FALLTHRU */ 15736 15737 case LT: 15738 case LTU: 15739 code = swap_condition (code); 15740 x = cop0, cop0 = cop1, cop1 = x; 15741 break; 15742 15743 default: 15744 gcc_unreachable (); 15745 } 15746 15747 /* Only SSE4.1/SSE4.2 supports V2DImode. */ 15748 if (mode == V2DImode) 15749 { 15750 switch (code) 15751 { 15752 case EQ: 15753 /* SSE4.1 supports EQ. */ 15754 if (!TARGET_SSE4_1) 15755 return false; 15756 break; 15757 15758 case GT: 15759 case GTU: 15760 /* SSE4.2 supports GT/GTU. */ 15761 if (!TARGET_SSE4_2) 15762 return false; 15763 break; 15764 15765 default: 15766 gcc_unreachable (); 15767 } 15768 } 15769 15770 /* Unsigned parallel compare is not supported by the hardware. 15771 Play some tricks to turn this into a signed comparison 15772 against 0. */ 15773 if (code == GTU) 15774 { 15775 cop0 = force_reg (mode, cop0); 15776 15777 switch (mode) 15778 { 15779 case V4SImode: 15780 case V2DImode: 15781 { 15782 rtx t1, t2, mask; 15783 rtx (*gen_sub3) (rtx, rtx, rtx); 15784 15785 /* Subtract (-(INT MAX) - 1) from both operands to make 15786 them signed. */ 15787 mask = ix86_build_signbit_mask (GET_MODE_INNER (mode), 15788 true, false); 15789 gen_sub3 = (mode == V4SImode 15790 ? gen_subv4si3 : gen_subv2di3); 15791 t1 = gen_reg_rtx (mode); 15792 emit_insn (gen_sub3 (t1, cop0, mask)); 15793 15794 t2 = gen_reg_rtx (mode); 15795 emit_insn (gen_sub3 (t2, cop1, mask)); 15796 15797 cop0 = t1; 15798 cop1 = t2; 15799 code = GT; 15800 } 15801 break; 15802 15803 case V16QImode: 15804 case V8HImode: 15805 /* Perform a parallel unsigned saturating subtraction. */ 15806 x = gen_reg_rtx (mode); 15807 emit_insn (gen_rtx_SET (VOIDmode, x, 15808 gen_rtx_US_MINUS (mode, cop0, cop1))); 15809 15810 cop0 = x; 15811 cop1 = CONST0_RTX (mode); 15812 code = EQ; 15813 negate = !negate; 15814 break; 15815 15816 default: 15817 gcc_unreachable (); 15818 } 15819 } 15820 } 15821 15822 x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1, 15823 operands[1+negate], operands[2-negate]); 15824 15825 ix86_expand_sse_movcc (operands[0], x, operands[1+negate], 15826 operands[2-negate]); 15827 return true; 15828 } 15829 15830 /* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is 15831 true if we should do zero extension, else sign extension. HIGH_P is 15832 true if we want the N/2 high elements, else the low elements. */ 15833 15834 void 15835 ix86_expand_sse_unpack (rtx operands[2], bool unsigned_p, bool high_p) 15836 { 15837 enum machine_mode imode = GET_MODE (operands[1]); 15838 rtx (*unpack)(rtx, rtx, rtx); 15839 rtx se, dest; 15840 15841 switch (imode) 15842 { 15843 case V16QImode: 15844 if (high_p) 15845 unpack = gen_vec_interleave_highv16qi; 15846 else 15847 unpack = gen_vec_interleave_lowv16qi; 15848 break; 15849 case V8HImode: 15850 if (high_p) 15851 unpack = gen_vec_interleave_highv8hi; 15852 else 15853 unpack = gen_vec_interleave_lowv8hi; 15854 break; 15855 case V4SImode: 15856 if (high_p) 15857 unpack = gen_vec_interleave_highv4si; 15858 else 15859 unpack = gen_vec_interleave_lowv4si; 15860 break; 15861 default: 15862 gcc_unreachable (); 15863 } 15864 15865 dest = gen_lowpart (imode, operands[0]); 15866 15867 if (unsigned_p) 15868 se = force_reg (imode, CONST0_RTX (imode)); 15869 else 15870 se = ix86_expand_sse_cmp (gen_reg_rtx (imode), GT, CONST0_RTX (imode), 15871 operands[1], pc_rtx, pc_rtx); 15872 15873 emit_insn (unpack (dest, operands[1], se)); 15874 } 15875 15876 /* This function performs the same task as ix86_expand_sse_unpack, 15877 but with SSE4.1 instructions. */ 15878 15879 void 15880 ix86_expand_sse4_unpack (rtx operands[2], bool unsigned_p, bool high_p) 15881 { 15882 enum machine_mode imode = GET_MODE (operands[1]); 15883 rtx (*unpack)(rtx, rtx); 15884 rtx src, dest; 15885 15886 switch (imode) 15887 { 15888 case V16QImode: 15889 if (unsigned_p) 15890 unpack = gen_sse4_1_zero_extendv8qiv8hi2; 15891 else 15892 unpack = gen_sse4_1_extendv8qiv8hi2; 15893 break; 15894 case V8HImode: 15895 if (unsigned_p) 15896 unpack = gen_sse4_1_zero_extendv4hiv4si2; 15897 else 15898 unpack = gen_sse4_1_extendv4hiv4si2; 15899 break; 15900 case V4SImode: 15901 if (unsigned_p) 15902 unpack = gen_sse4_1_zero_extendv2siv2di2; 15903 else 15904 unpack = gen_sse4_1_extendv2siv2di2; 15905 break; 15906 default: 15907 gcc_unreachable (); 15908 } 15909 15910 dest = operands[0]; 15911 if (high_p) 15912 { 15913 /* Shift higher 8 bytes to lower 8 bytes. */ 15914 src = gen_reg_rtx (imode); 15915 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, src), 15916 gen_lowpart (TImode, operands[1]), 15917 GEN_INT (64))); 15918 } 15919 else 15920 src = operands[1]; 15921 15922 emit_insn (unpack (dest, src)); 15923 } 15924 15925 /* This function performs the same task as ix86_expand_sse_unpack, 15926 but with sse5 instructions. */ 15927 15928 void 15929 ix86_expand_sse5_unpack (rtx operands[2], bool unsigned_p, bool high_p) 15930 { 15931 enum machine_mode imode = GET_MODE (operands[1]); 15932 int pperm_bytes[16]; 15933 int i; 15934 int h = (high_p) ? 8 : 0; 15935 int h2; 15936 int sign_extend; 15937 rtvec v = rtvec_alloc (16); 15938 rtvec vs; 15939 rtx x, p; 15940 rtx op0 = operands[0], op1 = operands[1]; 15941 15942 switch (imode) 15943 { 15944 case V16QImode: 15945 vs = rtvec_alloc (8); 15946 h2 = (high_p) ? 8 : 0; 15947 for (i = 0; i < 8; i++) 15948 { 15949 pperm_bytes[2*i+0] = PPERM_SRC | PPERM_SRC2 | i | h; 15950 pperm_bytes[2*i+1] = ((unsigned_p) 15951 ? PPERM_ZERO 15952 : PPERM_SIGN | PPERM_SRC2 | i | h); 15953 } 15954 15955 for (i = 0; i < 16; i++) 15956 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]); 15957 15958 for (i = 0; i < 8; i++) 15959 RTVEC_ELT (vs, i) = GEN_INT (i + h2); 15960 15961 p = gen_rtx_PARALLEL (VOIDmode, vs); 15962 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v)); 15963 if (unsigned_p) 15964 emit_insn (gen_sse5_pperm_zero_v16qi_v8hi (op0, op1, p, x)); 15965 else 15966 emit_insn (gen_sse5_pperm_sign_v16qi_v8hi (op0, op1, p, x)); 15967 break; 15968 15969 case V8HImode: 15970 vs = rtvec_alloc (4); 15971 h2 = (high_p) ? 4 : 0; 15972 for (i = 0; i < 4; i++) 15973 { 15974 sign_extend = ((unsigned_p) 15975 ? PPERM_ZERO 15976 : PPERM_SIGN | PPERM_SRC2 | ((2*i) + 1 + h)); 15977 pperm_bytes[4*i+0] = PPERM_SRC | PPERM_SRC2 | ((2*i) + 0 + h); 15978 pperm_bytes[4*i+1] = PPERM_SRC | PPERM_SRC2 | ((2*i) + 1 + h); 15979 pperm_bytes[4*i+2] = sign_extend; 15980 pperm_bytes[4*i+3] = sign_extend; 15981 } 15982 15983 for (i = 0; i < 16; i++) 15984 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]); 15985 15986 for (i = 0; i < 4; i++) 15987 RTVEC_ELT (vs, i) = GEN_INT (i + h2); 15988 15989 p = gen_rtx_PARALLEL (VOIDmode, vs); 15990 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v)); 15991 if (unsigned_p) 15992 emit_insn (gen_sse5_pperm_zero_v8hi_v4si (op0, op1, p, x)); 15993 else 15994 emit_insn (gen_sse5_pperm_sign_v8hi_v4si (op0, op1, p, x)); 15995 break; 15996 15997 case V4SImode: 15998 vs = rtvec_alloc (2); 15999 h2 = (high_p) ? 2 : 0; 16000 for (i = 0; i < 2; i++) 16001 { 16002 sign_extend = ((unsigned_p) 16003 ? PPERM_ZERO 16004 : PPERM_SIGN | PPERM_SRC2 | ((4*i) + 3 + h)); 16005 pperm_bytes[8*i+0] = PPERM_SRC | PPERM_SRC2 | ((4*i) + 0 + h); 16006 pperm_bytes[8*i+1] = PPERM_SRC | PPERM_SRC2 | ((4*i) + 1 + h); 16007 pperm_bytes[8*i+2] = PPERM_SRC | PPERM_SRC2 | ((4*i) + 2 + h); 16008 pperm_bytes[8*i+3] = PPERM_SRC | PPERM_SRC2 | ((4*i) + 3 + h); 16009 pperm_bytes[8*i+4] = sign_extend; 16010 pperm_bytes[8*i+5] = sign_extend; 16011 pperm_bytes[8*i+6] = sign_extend; 16012 pperm_bytes[8*i+7] = sign_extend; 16013 } 16014 16015 for (i = 0; i < 16; i++) 16016 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]); 16017 16018 for (i = 0; i < 2; i++) 16019 RTVEC_ELT (vs, i) = GEN_INT (i + h2); 16020 16021 p = gen_rtx_PARALLEL (VOIDmode, vs); 16022 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v)); 16023 if (unsigned_p) 16024 emit_insn (gen_sse5_pperm_zero_v4si_v2di (op0, op1, p, x)); 16025 else 16026 emit_insn (gen_sse5_pperm_sign_v4si_v2di (op0, op1, p, x)); 16027 break; 16028 16029 default: 16030 gcc_unreachable (); 16031 } 16032 16033 return; 16034 } 16035 16036 /* Pack the high bits from OPERANDS[1] and low bits from OPERANDS[2] into the 16037 next narrower integer vector type */ 16038 void 16039 ix86_expand_sse5_pack (rtx operands[3]) 16040 { 16041 enum machine_mode imode = GET_MODE (operands[0]); 16042 int pperm_bytes[16]; 16043 int i; 16044 rtvec v = rtvec_alloc (16); 16045 rtx x; 16046 rtx op0 = operands[0]; 16047 rtx op1 = operands[1]; 16048 rtx op2 = operands[2]; 16049 16050 switch (imode) 16051 { 16052 case V16QImode: 16053 for (i = 0; i < 8; i++) 16054 { 16055 pperm_bytes[i+0] = PPERM_SRC | PPERM_SRC1 | (i*2); 16056 pperm_bytes[i+8] = PPERM_SRC | PPERM_SRC2 | (i*2); 16057 } 16058 16059 for (i = 0; i < 16; i++) 16060 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]); 16061 16062 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v)); 16063 emit_insn (gen_sse5_pperm_pack_v8hi_v16qi (op0, op1, op2, x)); 16064 break; 16065 16066 case V8HImode: 16067 for (i = 0; i < 4; i++) 16068 { 16069 pperm_bytes[(2*i)+0] = PPERM_SRC | PPERM_SRC1 | ((i*4) + 0); 16070 pperm_bytes[(2*i)+1] = PPERM_SRC | PPERM_SRC1 | ((i*4) + 1); 16071 pperm_bytes[(2*i)+8] = PPERM_SRC | PPERM_SRC2 | ((i*4) + 0); 16072 pperm_bytes[(2*i)+9] = PPERM_SRC | PPERM_SRC2 | ((i*4) + 1); 16073 } 16074 16075 for (i = 0; i < 16; i++) 16076 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]); 16077 16078 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v)); 16079 emit_insn (gen_sse5_pperm_pack_v4si_v8hi (op0, op1, op2, x)); 16080 break; 16081 16082 case V4SImode: 16083 for (i = 0; i < 2; i++) 16084 { 16085 pperm_bytes[(4*i)+0] = PPERM_SRC | PPERM_SRC1 | ((i*8) + 0); 16086 pperm_bytes[(4*i)+1] = PPERM_SRC | PPERM_SRC1 | ((i*8) + 1); 16087 pperm_bytes[(4*i)+2] = PPERM_SRC | PPERM_SRC1 | ((i*8) + 2); 16088 pperm_bytes[(4*i)+3] = PPERM_SRC | PPERM_SRC1 | ((i*8) + 3); 16089 pperm_bytes[(4*i)+8] = PPERM_SRC | PPERM_SRC2 | ((i*8) + 0); 16090 pperm_bytes[(4*i)+9] = PPERM_SRC | PPERM_SRC2 | ((i*8) + 1); 16091 pperm_bytes[(4*i)+10] = PPERM_SRC | PPERM_SRC2 | ((i*8) + 2); 16092 pperm_bytes[(4*i)+11] = PPERM_SRC | PPERM_SRC2 | ((i*8) + 3); 16093 } 16094 16095 for (i = 0; i < 16; i++) 16096 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]); 16097 16098 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v)); 16099 emit_insn (gen_sse5_pperm_pack_v2di_v4si (op0, op1, op2, x)); 16100 break; 16101 16102 default: 16103 gcc_unreachable (); 16104 } 16105 16106 return; 16107 } 16108 16109 /* Expand conditional increment or decrement using adb/sbb instructions. 16110 The default case using setcc followed by the conditional move can be 16111 done by generic code. */ 16112 int 16113 ix86_expand_int_addcc (rtx operands[]) 16114 { 16115 enum rtx_code code = GET_CODE (operands[1]); 16116 rtx compare_op; 16117 rtx val = const0_rtx; 16118 bool fpcmp = false; 16119 enum machine_mode mode = GET_MODE (operands[0]); 16120 16121 if (operands[3] != const1_rtx 16122 && operands[3] != constm1_rtx) 16123 return 0; 16124 if (!ix86_expand_carry_flag_compare (code, ix86_compare_op0, 16125 ix86_compare_op1, &compare_op)) 16126 return 0; 16127 code = GET_CODE (compare_op); 16128 16129 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode 16130 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode) 16131 { 16132 fpcmp = true; 16133 code = ix86_fp_compare_code_to_integer (code); 16134 } 16135 16136 if (code != LTU) 16137 { 16138 val = constm1_rtx; 16139 if (fpcmp) 16140 PUT_CODE (compare_op, 16141 reverse_condition_maybe_unordered 16142 (GET_CODE (compare_op))); 16143 else 16144 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op))); 16145 } 16146 PUT_MODE (compare_op, mode); 16147 16148 /* Construct either adc or sbb insn. */ 16149 if ((code == LTU) == (operands[3] == constm1_rtx)) 16150 { 16151 switch (GET_MODE (operands[0])) 16152 { 16153 case QImode: 16154 emit_insn (gen_subqi3_carry (operands[0], operands[2], val, compare_op)); 16155 break; 16156 case HImode: 16157 emit_insn (gen_subhi3_carry (operands[0], operands[2], val, compare_op)); 16158 break; 16159 case SImode: 16160 emit_insn (gen_subsi3_carry (operands[0], operands[2], val, compare_op)); 16161 break; 16162 case DImode: 16163 emit_insn (gen_subdi3_carry_rex64 (operands[0], operands[2], val, compare_op)); 16164 break; 16165 default: 16166 gcc_unreachable (); 16167 } 16168 } 16169 else 16170 { 16171 switch (GET_MODE (operands[0])) 16172 { 16173 case QImode: 16174 emit_insn (gen_addqi3_carry (operands[0], operands[2], val, compare_op)); 16175 break; 16176 case HImode: 16177 emit_insn (gen_addhi3_carry (operands[0], operands[2], val, compare_op)); 16178 break; 16179 case SImode: 16180 emit_insn (gen_addsi3_carry (operands[0], operands[2], val, compare_op)); 16181 break; 16182 case DImode: 16183 emit_insn (gen_adddi3_carry_rex64 (operands[0], operands[2], val, compare_op)); 16184 break; 16185 default: 16186 gcc_unreachable (); 16187 } 16188 } 16189 return 1; /* DONE */ 16190 } 16191 16192 16193 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but 16194 works for floating pointer parameters and nonoffsetable memories. 16195 For pushes, it returns just stack offsets; the values will be saved 16196 in the right order. Maximally three parts are generated. */ 16197 16198 static int 16199 ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode) 16200 { 16201 int size; 16202 16203 if (!TARGET_64BIT) 16204 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4; 16205 else 16206 size = (GET_MODE_SIZE (mode) + 4) / 8; 16207 16208 gcc_assert (!REG_P (operand) || !MMX_REGNO_P (REGNO (operand))); 16209 gcc_assert (size >= 2 && size <= 4); 16210 16211 /* Optimize constant pool reference to immediates. This is used by fp 16212 moves, that force all constants to memory to allow combining. */ 16213 if (MEM_P (operand) && MEM_READONLY_P (operand)) 16214 { 16215 rtx tmp = maybe_get_pool_constant (operand); 16216 if (tmp) 16217 operand = tmp; 16218 } 16219 16220 if (MEM_P (operand) && !offsettable_memref_p (operand)) 16221 { 16222 /* The only non-offsetable memories we handle are pushes. */ 16223 int ok = push_operand (operand, VOIDmode); 16224 16225 gcc_assert (ok); 16226 16227 operand = copy_rtx (operand); 16228 PUT_MODE (operand, Pmode); 16229 parts[0] = parts[1] = parts[2] = parts[3] = operand; 16230 return size; 16231 } 16232 16233 if (GET_CODE (operand) == CONST_VECTOR) 16234 { 16235 enum machine_mode imode = int_mode_for_mode (mode); 16236 /* Caution: if we looked through a constant pool memory above, 16237 the operand may actually have a different mode now. That's 16238 ok, since we want to pun this all the way back to an integer. */ 16239 operand = simplify_subreg (imode, operand, GET_MODE (operand), 0); 16240 gcc_assert (operand != NULL); 16241 mode = imode; 16242 } 16243 16244 if (!TARGET_64BIT) 16245 { 16246 if (mode == DImode) 16247 split_di (&operand, 1, &parts[0], &parts[1]); 16248 else 16249 { 16250 int i; 16251 16252 if (REG_P (operand)) 16253 { 16254 gcc_assert (reload_completed); 16255 for (i = 0; i < size; i++) 16256 parts[i] = gen_rtx_REG (SImode, REGNO (operand) + i); 16257 } 16258 else if (offsettable_memref_p (operand)) 16259 { 16260 operand = adjust_address (operand, SImode, 0); 16261 parts[0] = operand; 16262 for (i = 1; i < size; i++) 16263 parts[i] = adjust_address (operand, SImode, 4 * i); 16264 } 16265 else if (GET_CODE (operand) == CONST_DOUBLE) 16266 { 16267 REAL_VALUE_TYPE r; 16268 long l[4]; 16269 16270 REAL_VALUE_FROM_CONST_DOUBLE (r, operand); 16271 switch (mode) 16272 { 16273 case TFmode: 16274 real_to_target (l, &r, mode); 16275 parts[3] = gen_int_mode (l[3], SImode); 16276 parts[2] = gen_int_mode (l[2], SImode); 16277 break; 16278 case XFmode: 16279 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l); 16280 parts[2] = gen_int_mode (l[2], SImode); 16281 break; 16282 case DFmode: 16283 REAL_VALUE_TO_TARGET_DOUBLE (r, l); 16284 break; 16285 default: 16286 gcc_unreachable (); 16287 } 16288 parts[1] = gen_int_mode (l[1], SImode); 16289 parts[0] = gen_int_mode (l[0], SImode); 16290 } 16291 else 16292 gcc_unreachable (); 16293 } 16294 } 16295 else 16296 { 16297 if (mode == TImode) 16298 split_ti (&operand, 1, &parts[0], &parts[1]); 16299 if (mode == XFmode || mode == TFmode) 16300 { 16301 enum machine_mode upper_mode = mode==XFmode ? SImode : DImode; 16302 if (REG_P (operand)) 16303 { 16304 gcc_assert (reload_completed); 16305 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0); 16306 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1); 16307 } 16308 else if (offsettable_memref_p (operand)) 16309 { 16310 operand = adjust_address (operand, DImode, 0); 16311 parts[0] = operand; 16312 parts[1] = adjust_address (operand, upper_mode, 8); 16313 } 16314 else if (GET_CODE (operand) == CONST_DOUBLE) 16315 { 16316 REAL_VALUE_TYPE r; 16317 long l[4]; 16318 16319 REAL_VALUE_FROM_CONST_DOUBLE (r, operand); 16320 real_to_target (l, &r, mode); 16321 16322 /* Do not use shift by 32 to avoid warning on 32bit systems. */ 16323 if (HOST_BITS_PER_WIDE_INT >= 64) 16324 parts[0] 16325 = gen_int_mode 16326 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1)) 16327 + ((((HOST_WIDE_INT) l[1]) << 31) << 1), 16328 DImode); 16329 else 16330 parts[0] = immed_double_const (l[0], l[1], DImode); 16331 16332 if (upper_mode == SImode) 16333 parts[1] = gen_int_mode (l[2], SImode); 16334 else if (HOST_BITS_PER_WIDE_INT >= 64) 16335 parts[1] 16336 = gen_int_mode 16337 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1)) 16338 + ((((HOST_WIDE_INT) l[3]) << 31) << 1), 16339 DImode); 16340 else 16341 parts[1] = immed_double_const (l[2], l[3], DImode); 16342 } 16343 else 16344 gcc_unreachable (); 16345 } 16346 } 16347 16348 return size; 16349 } 16350 16351 /* Emit insns to perform a move or push of DI, DF, XF, and TF values. 16352 Return false when normal moves are needed; true when all required 16353 insns have been emitted. Operands 2-4 contain the input values 16354 int the correct order; operands 5-7 contain the output values. */ 16355 16356 void 16357 ix86_split_long_move (rtx operands[]) 16358 { 16359 rtx part[2][4]; 16360 int nparts, i, j; 16361 int push = 0; 16362 int collisions = 0; 16363 enum machine_mode mode = GET_MODE (operands[0]); 16364 bool collisionparts[4]; 16365 16366 /* The DFmode expanders may ask us to move double. 16367 For 64bit target this is single move. By hiding the fact 16368 here we simplify i386.md splitters. */ 16369 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT) 16370 { 16371 /* Optimize constant pool reference to immediates. This is used by 16372 fp moves, that force all constants to memory to allow combining. */ 16373 16374 if (MEM_P (operands[1]) 16375 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF 16376 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0))) 16377 operands[1] = get_pool_constant (XEXP (operands[1], 0)); 16378 if (push_operand (operands[0], VOIDmode)) 16379 { 16380 operands[0] = copy_rtx (operands[0]); 16381 PUT_MODE (operands[0], Pmode); 16382 } 16383 else 16384 operands[0] = gen_lowpart (DImode, operands[0]); 16385 operands[1] = gen_lowpart (DImode, operands[1]); 16386 emit_move_insn (operands[0], operands[1]); 16387 return; 16388 } 16389 16390 /* The only non-offsettable memory we handle is push. */ 16391 if (push_operand (operands[0], VOIDmode)) 16392 push = 1; 16393 else 16394 gcc_assert (!MEM_P (operands[0]) 16395 || offsettable_memref_p (operands[0])); 16396 16397 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0])); 16398 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0])); 16399 16400 /* When emitting push, take care for source operands on the stack. */ 16401 if (push && MEM_P (operands[1]) 16402 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1])) 16403 { 16404 rtx src_base = XEXP (part[1][nparts - 1], 0); 16405 16406 /* Compensate for the stack decrement by 4. */ 16407 if (!TARGET_64BIT && nparts == 3 16408 && mode == XFmode && TARGET_128BIT_LONG_DOUBLE) 16409 src_base = plus_constant (src_base, 4); 16410 16411 /* src_base refers to the stack pointer and is 16412 automatically decreased by emitted push. */ 16413 for (i = 0; i < nparts; i++) 16414 part[1][i] = change_address (part[1][i], 16415 GET_MODE (part[1][i]), src_base); 16416 } 16417 16418 /* We need to do copy in the right order in case an address register 16419 of the source overlaps the destination. */ 16420 if (REG_P (part[0][0]) && MEM_P (part[1][0])) 16421 { 16422 rtx tmp; 16423 16424 for (i = 0; i < nparts; i++) 16425 { 16426 collisionparts[i] 16427 = reg_overlap_mentioned_p (part[0][i], XEXP (part[1][0], 0)); 16428 if (collisionparts[i]) 16429 collisions++; 16430 } 16431 16432 /* Collision in the middle part can be handled by reordering. */ 16433 if (collisions == 1 && nparts == 3 && collisionparts [1]) 16434 { 16435 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp; 16436 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp; 16437 } 16438 else if (collisions == 1 16439 && nparts == 4 16440 && (collisionparts [1] || collisionparts [2])) 16441 { 16442 if (collisionparts [1]) 16443 { 16444 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp; 16445 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp; 16446 } 16447 else 16448 { 16449 tmp = part[0][2]; part[0][2] = part[0][3]; part[0][3] = tmp; 16450 tmp = part[1][2]; part[1][2] = part[1][3]; part[1][3] = tmp; 16451 } 16452 } 16453 16454 /* If there are more collisions, we can't handle it by reordering. 16455 Do an lea to the last part and use only one colliding move. */ 16456 else if (collisions > 1) 16457 { 16458 rtx base; 16459 16460 collisions = 1; 16461 16462 base = part[0][nparts - 1]; 16463 16464 /* Handle the case when the last part isn't valid for lea. 16465 Happens in 64-bit mode storing the 12-byte XFmode. */ 16466 if (GET_MODE (base) != Pmode) 16467 base = gen_rtx_REG (Pmode, REGNO (base)); 16468 16469 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0))); 16470 part[1][0] = replace_equiv_address (part[1][0], base); 16471 for (i = 1; i < nparts; i++) 16472 { 16473 tmp = plus_constant (base, UNITS_PER_WORD * i); 16474 part[1][i] = replace_equiv_address (part[1][i], tmp); 16475 } 16476 } 16477 } 16478 16479 if (push) 16480 { 16481 if (!TARGET_64BIT) 16482 { 16483 if (nparts == 3) 16484 { 16485 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode) 16486 emit_insn (gen_addsi3 (stack_pointer_rtx, 16487 stack_pointer_rtx, GEN_INT (-4))); 16488 emit_move_insn (part[0][2], part[1][2]); 16489 } 16490 else if (nparts == 4) 16491 { 16492 emit_move_insn (part[0][3], part[1][3]); 16493 emit_move_insn (part[0][2], part[1][2]); 16494 } 16495 } 16496 else 16497 { 16498 /* In 64bit mode we don't have 32bit push available. In case this is 16499 register, it is OK - we will just use larger counterpart. We also 16500 retype memory - these comes from attempt to avoid REX prefix on 16501 moving of second half of TFmode value. */ 16502 if (GET_MODE (part[1][1]) == SImode) 16503 { 16504 switch (GET_CODE (part[1][1])) 16505 { 16506 case MEM: 16507 part[1][1] = adjust_address (part[1][1], DImode, 0); 16508 break; 16509 16510 case REG: 16511 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1])); 16512 break; 16513 16514 default: 16515 gcc_unreachable (); 16516 } 16517 16518 if (GET_MODE (part[1][0]) == SImode) 16519 part[1][0] = part[1][1]; 16520 } 16521 } 16522 emit_move_insn (part[0][1], part[1][1]); 16523 emit_move_insn (part[0][0], part[1][0]); 16524 return; 16525 } 16526 16527 /* Choose correct order to not overwrite the source before it is copied. */ 16528 if ((REG_P (part[0][0]) 16529 && REG_P (part[1][1]) 16530 && (REGNO (part[0][0]) == REGNO (part[1][1]) 16531 || (nparts == 3 16532 && REGNO (part[0][0]) == REGNO (part[1][2])) 16533 || (nparts == 4 16534 && REGNO (part[0][0]) == REGNO (part[1][3])))) 16535 || (collisions > 0 16536 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))) 16537 { 16538 for (i = 0, j = nparts - 1; i < nparts; i++, j--) 16539 { 16540 operands[2 + i] = part[0][j]; 16541 operands[6 + i] = part[1][j]; 16542 } 16543 } 16544 else 16545 { 16546 for (i = 0; i < nparts; i++) 16547 { 16548 operands[2 + i] = part[0][i]; 16549 operands[6 + i] = part[1][i]; 16550 } 16551 } 16552 16553 /* If optimizing for size, attempt to locally unCSE nonzero constants. */ 16554 if (optimize_insn_for_size_p ()) 16555 { 16556 for (j = 0; j < nparts - 1; j++) 16557 if (CONST_INT_P (operands[6 + j]) 16558 && operands[6 + j] != const0_rtx 16559 && REG_P (operands[2 + j])) 16560 for (i = j; i < nparts - 1; i++) 16561 if (CONST_INT_P (operands[7 + i]) 16562 && INTVAL (operands[7 + i]) == INTVAL (operands[6 + j])) 16563 operands[7 + i] = operands[2 + j]; 16564 } 16565 16566 for (i = 0; i < nparts; i++) 16567 emit_move_insn (operands[2 + i], operands[6 + i]); 16568 16569 return; 16570 } 16571 16572 /* Helper function of ix86_split_ashl used to generate an SImode/DImode 16573 left shift by a constant, either using a single shift or 16574 a sequence of add instructions. */ 16575 16576 static void 16577 ix86_expand_ashl_const (rtx operand, int count, enum machine_mode mode) 16578 { 16579 if (count == 1) 16580 { 16581 emit_insn ((mode == DImode 16582 ? gen_addsi3 16583 : gen_adddi3) (operand, operand, operand)); 16584 } 16585 else if (!optimize_insn_for_size_p () 16586 && count * ix86_cost->add <= ix86_cost->shift_const) 16587 { 16588 int i; 16589 for (i=0; i<count; i++) 16590 { 16591 emit_insn ((mode == DImode 16592 ? gen_addsi3 16593 : gen_adddi3) (operand, operand, operand)); 16594 } 16595 } 16596 else 16597 emit_insn ((mode == DImode 16598 ? gen_ashlsi3 16599 : gen_ashldi3) (operand, operand, GEN_INT (count))); 16600 } 16601 16602 void 16603 ix86_split_ashl (rtx *operands, rtx scratch, enum machine_mode mode) 16604 { 16605 rtx low[2], high[2]; 16606 int count; 16607 const int single_width = mode == DImode ? 32 : 64; 16608 16609 if (CONST_INT_P (operands[2])) 16610 { 16611 (mode == DImode ? split_di : split_ti) (operands, 2, low, high); 16612 count = INTVAL (operands[2]) & (single_width * 2 - 1); 16613 16614 if (count >= single_width) 16615 { 16616 emit_move_insn (high[0], low[1]); 16617 emit_move_insn (low[0], const0_rtx); 16618 16619 if (count > single_width) 16620 ix86_expand_ashl_const (high[0], count - single_width, mode); 16621 } 16622 else 16623 { 16624 if (!rtx_equal_p (operands[0], operands[1])) 16625 emit_move_insn (operands[0], operands[1]); 16626 emit_insn ((mode == DImode 16627 ? gen_x86_shld 16628 : gen_x86_64_shld) (high[0], low[0], GEN_INT (count))); 16629 ix86_expand_ashl_const (low[0], count, mode); 16630 } 16631 return; 16632 } 16633 16634 (mode == DImode ? split_di : split_ti) (operands, 1, low, high); 16635 16636 if (operands[1] == const1_rtx) 16637 { 16638 /* Assuming we've chosen a QImode capable registers, then 1 << N 16639 can be done with two 32/64-bit shifts, no branches, no cmoves. */ 16640 if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0])) 16641 { 16642 rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG); 16643 16644 ix86_expand_clear (low[0]); 16645 ix86_expand_clear (high[0]); 16646 emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (single_width))); 16647 16648 d = gen_lowpart (QImode, low[0]); 16649 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d); 16650 s = gen_rtx_EQ (QImode, flags, const0_rtx); 16651 emit_insn (gen_rtx_SET (VOIDmode, d, s)); 16652 16653 d = gen_lowpart (QImode, high[0]); 16654 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d); 16655 s = gen_rtx_NE (QImode, flags, const0_rtx); 16656 emit_insn (gen_rtx_SET (VOIDmode, d, s)); 16657 } 16658 16659 /* Otherwise, we can get the same results by manually performing 16660 a bit extract operation on bit 5/6, and then performing the two 16661 shifts. The two methods of getting 0/1 into low/high are exactly 16662 the same size. Avoiding the shift in the bit extract case helps 16663 pentium4 a bit; no one else seems to care much either way. */ 16664 else 16665 { 16666 rtx x; 16667 16668 if (TARGET_PARTIAL_REG_STALL && !optimize_insn_for_size_p ()) 16669 x = gen_rtx_ZERO_EXTEND (mode == DImode ? SImode : DImode, operands[2]); 16670 else 16671 x = gen_lowpart (mode == DImode ? SImode : DImode, operands[2]); 16672 emit_insn (gen_rtx_SET (VOIDmode, high[0], x)); 16673 16674 emit_insn ((mode == DImode 16675 ? gen_lshrsi3 16676 : gen_lshrdi3) (high[0], high[0], GEN_INT (mode == DImode ? 5 : 6))); 16677 emit_insn ((mode == DImode 16678 ? gen_andsi3 16679 : gen_anddi3) (high[0], high[0], GEN_INT (1))); 16680 emit_move_insn (low[0], high[0]); 16681 emit_insn ((mode == DImode 16682 ? gen_xorsi3 16683 : gen_xordi3) (low[0], low[0], GEN_INT (1))); 16684 } 16685 16686 emit_insn ((mode == DImode 16687 ? gen_ashlsi3 16688 : gen_ashldi3) (low[0], low[0], operands[2])); 16689 emit_insn ((mode == DImode 16690 ? gen_ashlsi3 16691 : gen_ashldi3) (high[0], high[0], operands[2])); 16692 return; 16693 } 16694 16695 if (operands[1] == constm1_rtx) 16696 { 16697 /* For -1 << N, we can avoid the shld instruction, because we 16698 know that we're shifting 0...31/63 ones into a -1. */ 16699 emit_move_insn (low[0], constm1_rtx); 16700 if (optimize_insn_for_size_p ()) 16701 emit_move_insn (high[0], low[0]); 16702 else 16703 emit_move_insn (high[0], constm1_rtx); 16704 } 16705 else 16706 { 16707 if (!rtx_equal_p (operands[0], operands[1])) 16708 emit_move_insn (operands[0], operands[1]); 16709 16710 (mode == DImode ? split_di : split_ti) (operands, 1, low, high); 16711 emit_insn ((mode == DImode 16712 ? gen_x86_shld 16713 : gen_x86_64_shld) (high[0], low[0], operands[2])); 16714 } 16715 16716 emit_insn ((mode == DImode ? gen_ashlsi3 : gen_ashldi3) (low[0], low[0], operands[2])); 16717 16718 if (TARGET_CMOVE && scratch) 16719 { 16720 ix86_expand_clear (scratch); 16721 emit_insn ((mode == DImode 16722 ? gen_x86_shift_adj_1 16723 : gen_x86_64_shift_adj_1) (high[0], low[0], operands[2], 16724 scratch)); 16725 } 16726 else 16727 emit_insn ((mode == DImode 16728 ? gen_x86_shift_adj_2 16729 : gen_x86_64_shift_adj_2) (high[0], low[0], operands[2])); 16730 } 16731 16732 void 16733 ix86_split_ashr (rtx *operands, rtx scratch, enum machine_mode mode) 16734 { 16735 rtx low[2], high[2]; 16736 int count; 16737 const int single_width = mode == DImode ? 32 : 64; 16738 16739 if (CONST_INT_P (operands[2])) 16740 { 16741 (mode == DImode ? split_di : split_ti) (operands, 2, low, high); 16742 count = INTVAL (operands[2]) & (single_width * 2 - 1); 16743 16744 if (count == single_width * 2 - 1) 16745 { 16746 emit_move_insn (high[0], high[1]); 16747 emit_insn ((mode == DImode 16748 ? gen_ashrsi3 16749 : gen_ashrdi3) (high[0], high[0], 16750 GEN_INT (single_width - 1))); 16751 emit_move_insn (low[0], high[0]); 16752 16753 } 16754 else if (count >= single_width) 16755 { 16756 emit_move_insn (low[0], high[1]); 16757 emit_move_insn (high[0], low[0]); 16758 emit_insn ((mode == DImode 16759 ? gen_ashrsi3 16760 : gen_ashrdi3) (high[0], high[0], 16761 GEN_INT (single_width - 1))); 16762 if (count > single_width) 16763 emit_insn ((mode == DImode 16764 ? gen_ashrsi3 16765 : gen_ashrdi3) (low[0], low[0], 16766 GEN_INT (count - single_width))); 16767 } 16768 else 16769 { 16770 if (!rtx_equal_p (operands[0], operands[1])) 16771 emit_move_insn (operands[0], operands[1]); 16772 emit_insn ((mode == DImode 16773 ? gen_x86_shrd 16774 : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count))); 16775 emit_insn ((mode == DImode 16776 ? gen_ashrsi3 16777 : gen_ashrdi3) (high[0], high[0], GEN_INT (count))); 16778 } 16779 } 16780 else 16781 { 16782 if (!rtx_equal_p (operands[0], operands[1])) 16783 emit_move_insn (operands[0], operands[1]); 16784 16785 (mode == DImode ? split_di : split_ti) (operands, 1, low, high); 16786 16787 emit_insn ((mode == DImode 16788 ? gen_x86_shrd 16789 : gen_x86_64_shrd) (low[0], high[0], operands[2])); 16790 emit_insn ((mode == DImode 16791 ? gen_ashrsi3 16792 : gen_ashrdi3) (high[0], high[0], operands[2])); 16793 16794 if (TARGET_CMOVE && scratch) 16795 { 16796 emit_move_insn (scratch, high[0]); 16797 emit_insn ((mode == DImode 16798 ? gen_ashrsi3 16799 : gen_ashrdi3) (scratch, scratch, 16800 GEN_INT (single_width - 1))); 16801 emit_insn ((mode == DImode 16802 ? gen_x86_shift_adj_1 16803 : gen_x86_64_shift_adj_1) (low[0], high[0], operands[2], 16804 scratch)); 16805 } 16806 else 16807 emit_insn ((mode == DImode 16808 ? gen_x86_shift_adj_3 16809 : gen_x86_64_shift_adj_3) (low[0], high[0], operands[2])); 16810 } 16811 } 16812 16813 void 16814 ix86_split_lshr (rtx *operands, rtx scratch, enum machine_mode mode) 16815 { 16816 rtx low[2], high[2]; 16817 int count; 16818 const int single_width = mode == DImode ? 32 : 64; 16819 16820 if (CONST_INT_P (operands[2])) 16821 { 16822 (mode == DImode ? split_di : split_ti) (operands, 2, low, high); 16823 count = INTVAL (operands[2]) & (single_width * 2 - 1); 16824 16825 if (count >= single_width) 16826 { 16827 emit_move_insn (low[0], high[1]); 16828 ix86_expand_clear (high[0]); 16829 16830 if (count > single_width) 16831 emit_insn ((mode == DImode 16832 ? gen_lshrsi3 16833 : gen_lshrdi3) (low[0], low[0], 16834 GEN_INT (count - single_width))); 16835 } 16836 else 16837 { 16838 if (!rtx_equal_p (operands[0], operands[1])) 16839 emit_move_insn (operands[0], operands[1]); 16840 emit_insn ((mode == DImode 16841 ? gen_x86_shrd 16842 : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count))); 16843 emit_insn ((mode == DImode 16844 ? gen_lshrsi3 16845 : gen_lshrdi3) (high[0], high[0], GEN_INT (count))); 16846 } 16847 } 16848 else 16849 { 16850 if (!rtx_equal_p (operands[0], operands[1])) 16851 emit_move_insn (operands[0], operands[1]); 16852 16853 (mode == DImode ? split_di : split_ti) (operands, 1, low, high); 16854 16855 emit_insn ((mode == DImode 16856 ? gen_x86_shrd 16857 : gen_x86_64_shrd) (low[0], high[0], operands[2])); 16858 emit_insn ((mode == DImode 16859 ? gen_lshrsi3 16860 : gen_lshrdi3) (high[0], high[0], operands[2])); 16861 16862 /* Heh. By reversing the arguments, we can reuse this pattern. */ 16863 if (TARGET_CMOVE && scratch) 16864 { 16865 ix86_expand_clear (scratch); 16866 emit_insn ((mode == DImode 16867 ? gen_x86_shift_adj_1 16868 : gen_x86_64_shift_adj_1) (low[0], high[0], operands[2], 16869 scratch)); 16870 } 16871 else 16872 emit_insn ((mode == DImode 16873 ? gen_x86_shift_adj_2 16874 : gen_x86_64_shift_adj_2) (low[0], high[0], operands[2])); 16875 } 16876 } 16877 16878 /* Predict just emitted jump instruction to be taken with probability PROB. */ 16879 static void 16880 predict_jump (int prob) 16881 { 16882 rtx insn = get_last_insn (); 16883 gcc_assert (JUMP_P (insn)); 16884 REG_NOTES (insn) 16885 = gen_rtx_EXPR_LIST (REG_BR_PROB, 16886 GEN_INT (prob), 16887 REG_NOTES (insn)); 16888 } 16889 16890 /* Helper function for the string operations below. Dest VARIABLE whether 16891 it is aligned to VALUE bytes. If true, jump to the label. */ 16892 static rtx 16893 ix86_expand_aligntest (rtx variable, int value, bool epilogue) 16894 { 16895 rtx label = gen_label_rtx (); 16896 rtx tmpcount = gen_reg_rtx (GET_MODE (variable)); 16897 if (GET_MODE (variable) == DImode) 16898 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value))); 16899 else 16900 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value))); 16901 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable), 16902 1, label); 16903 if (epilogue) 16904 predict_jump (REG_BR_PROB_BASE * 50 / 100); 16905 else 16906 predict_jump (REG_BR_PROB_BASE * 90 / 100); 16907 return label; 16908 } 16909 16910 /* Adjust COUNTER by the VALUE. */ 16911 static void 16912 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value) 16913 { 16914 if (GET_MODE (countreg) == DImode) 16915 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value))); 16916 else 16917 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value))); 16918 } 16919 16920 /* Zero extend possibly SImode EXP to Pmode register. */ 16921 rtx 16922 ix86_zero_extend_to_Pmode (rtx exp) 16923 { 16924 rtx r; 16925 if (GET_MODE (exp) == VOIDmode) 16926 return force_reg (Pmode, exp); 16927 if (GET_MODE (exp) == Pmode) 16928 return copy_to_mode_reg (Pmode, exp); 16929 r = gen_reg_rtx (Pmode); 16930 emit_insn (gen_zero_extendsidi2 (r, exp)); 16931 return r; 16932 } 16933 16934 /* Divide COUNTREG by SCALE. */ 16935 static rtx 16936 scale_counter (rtx countreg, int scale) 16937 { 16938 rtx sc; 16939 rtx piece_size_mask; 16940 16941 if (scale == 1) 16942 return countreg; 16943 if (CONST_INT_P (countreg)) 16944 return GEN_INT (INTVAL (countreg) / scale); 16945 gcc_assert (REG_P (countreg)); 16946 16947 piece_size_mask = GEN_INT (scale - 1); 16948 sc = expand_simple_binop (GET_MODE (countreg), LSHIFTRT, countreg, 16949 GEN_INT (exact_log2 (scale)), 16950 NULL, 1, OPTAB_DIRECT); 16951 return sc; 16952 } 16953 16954 /* Return mode for the memcpy/memset loop counter. Prefer SImode over 16955 DImode for constant loop counts. */ 16956 16957 static enum machine_mode 16958 counter_mode (rtx count_exp) 16959 { 16960 if (GET_MODE (count_exp) != VOIDmode) 16961 return GET_MODE (count_exp); 16962 if (GET_CODE (count_exp) != CONST_INT) 16963 return Pmode; 16964 if (TARGET_64BIT && (INTVAL (count_exp) & ~0xffffffff)) 16965 return DImode; 16966 return SImode; 16967 } 16968 16969 /* When SRCPTR is non-NULL, output simple loop to move memory 16970 pointer to SRCPTR to DESTPTR via chunks of MODE unrolled UNROLL times, 16971 overall size is COUNT specified in bytes. When SRCPTR is NULL, output the 16972 equivalent loop to set memory by VALUE (supposed to be in MODE). 16973 16974 The size is rounded down to whole number of chunk size moved at once. 16975 SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */ 16976 16977 16978 static void 16979 expand_set_or_movmem_via_loop (rtx destmem, rtx srcmem, 16980 rtx destptr, rtx srcptr, rtx value, 16981 rtx count, enum machine_mode mode, int unroll, 16982 int expected_size) 16983 { 16984 rtx out_label, top_label, iter, tmp; 16985 enum machine_mode iter_mode = counter_mode (count); 16986 rtx piece_size = GEN_INT (GET_MODE_SIZE (mode) * unroll); 16987 rtx piece_size_mask = GEN_INT (~((GET_MODE_SIZE (mode) * unroll) - 1)); 16988 rtx size; 16989 rtx x_addr; 16990 rtx y_addr; 16991 int i; 16992 16993 top_label = gen_label_rtx (); 16994 out_label = gen_label_rtx (); 16995 iter = gen_reg_rtx (iter_mode); 16996 16997 size = expand_simple_binop (iter_mode, AND, count, piece_size_mask, 16998 NULL, 1, OPTAB_DIRECT); 16999 /* Those two should combine. */ 17000 if (piece_size == const1_rtx) 17001 { 17002 emit_cmp_and_jump_insns (size, const0_rtx, EQ, NULL_RTX, iter_mode, 17003 true, out_label); 17004 predict_jump (REG_BR_PROB_BASE * 10 / 100); 17005 } 17006 emit_move_insn (iter, const0_rtx); 17007 17008 emit_label (top_label); 17009 17010 tmp = convert_modes (Pmode, iter_mode, iter, true); 17011 x_addr = gen_rtx_PLUS (Pmode, destptr, tmp); 17012 destmem = change_address (destmem, mode, x_addr); 17013 17014 if (srcmem) 17015 { 17016 y_addr = gen_rtx_PLUS (Pmode, srcptr, copy_rtx (tmp)); 17017 srcmem = change_address (srcmem, mode, y_addr); 17018 17019 /* When unrolling for chips that reorder memory reads and writes, 17020 we can save registers by using single temporary. 17021 Also using 4 temporaries is overkill in 32bit mode. */ 17022 if (!TARGET_64BIT && 0) 17023 { 17024 for (i = 0; i < unroll; i++) 17025 { 17026 if (i) 17027 { 17028 destmem = 17029 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode)); 17030 srcmem = 17031 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode)); 17032 } 17033 emit_move_insn (destmem, srcmem); 17034 } 17035 } 17036 else 17037 { 17038 rtx tmpreg[4]; 17039 gcc_assert (unroll <= 4); 17040 for (i = 0; i < unroll; i++) 17041 { 17042 tmpreg[i] = gen_reg_rtx (mode); 17043 if (i) 17044 { 17045 srcmem = 17046 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode)); 17047 } 17048 emit_move_insn (tmpreg[i], srcmem); 17049 } 17050 for (i = 0; i < unroll; i++) 17051 { 17052 if (i) 17053 { 17054 destmem = 17055 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode)); 17056 } 17057 emit_move_insn (destmem, tmpreg[i]); 17058 } 17059 } 17060 } 17061 else 17062 for (i = 0; i < unroll; i++) 17063 { 17064 if (i) 17065 destmem = 17066 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode)); 17067 emit_move_insn (destmem, value); 17068 } 17069 17070 tmp = expand_simple_binop (iter_mode, PLUS, iter, piece_size, iter, 17071 true, OPTAB_LIB_WIDEN); 17072 if (tmp != iter) 17073 emit_move_insn (iter, tmp); 17074 17075 emit_cmp_and_jump_insns (iter, size, LT, NULL_RTX, iter_mode, 17076 true, top_label); 17077 if (expected_size != -1) 17078 { 17079 expected_size /= GET_MODE_SIZE (mode) * unroll; 17080 if (expected_size == 0) 17081 predict_jump (0); 17082 else if (expected_size > REG_BR_PROB_BASE) 17083 predict_jump (REG_BR_PROB_BASE - 1); 17084 else 17085 predict_jump (REG_BR_PROB_BASE - (REG_BR_PROB_BASE + expected_size / 2) / expected_size); 17086 } 17087 else 17088 predict_jump (REG_BR_PROB_BASE * 80 / 100); 17089 iter = ix86_zero_extend_to_Pmode (iter); 17090 tmp = expand_simple_binop (Pmode, PLUS, destptr, iter, destptr, 17091 true, OPTAB_LIB_WIDEN); 17092 if (tmp != destptr) 17093 emit_move_insn (destptr, tmp); 17094 if (srcptr) 17095 { 17096 tmp = expand_simple_binop (Pmode, PLUS, srcptr, iter, srcptr, 17097 true, OPTAB_LIB_WIDEN); 17098 if (tmp != srcptr) 17099 emit_move_insn (srcptr, tmp); 17100 } 17101 emit_label (out_label); 17102 } 17103 17104 /* Output "rep; mov" instruction. 17105 Arguments have same meaning as for previous function */ 17106 static void 17107 expand_movmem_via_rep_mov (rtx destmem, rtx srcmem, 17108 rtx destptr, rtx srcptr, 17109 rtx count, 17110 enum machine_mode mode) 17111 { 17112 rtx destexp; 17113 rtx srcexp; 17114 rtx countreg; 17115 17116 /* If the size is known, it is shorter to use rep movs. */ 17117 if (mode == QImode && CONST_INT_P (count) 17118 && !(INTVAL (count) & 3)) 17119 mode = SImode; 17120 17121 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode) 17122 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0); 17123 if (srcptr != XEXP (srcmem, 0) || GET_MODE (srcmem) != BLKmode) 17124 srcmem = adjust_automodify_address_nv (srcmem, BLKmode, srcptr, 0); 17125 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode))); 17126 if (mode != QImode) 17127 { 17128 destexp = gen_rtx_ASHIFT (Pmode, countreg, 17129 GEN_INT (exact_log2 (GET_MODE_SIZE (mode)))); 17130 destexp = gen_rtx_PLUS (Pmode, destexp, destptr); 17131 srcexp = gen_rtx_ASHIFT (Pmode, countreg, 17132 GEN_INT (exact_log2 (GET_MODE_SIZE (mode)))); 17133 srcexp = gen_rtx_PLUS (Pmode, srcexp, srcptr); 17134 } 17135 else 17136 { 17137 destexp = gen_rtx_PLUS (Pmode, destptr, countreg); 17138 srcexp = gen_rtx_PLUS (Pmode, srcptr, countreg); 17139 } 17140 if (CONST_INT_P (count)) 17141 { 17142 count = GEN_INT (INTVAL (count) 17143 & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1)); 17144 destmem = shallow_copy_rtx (destmem); 17145 srcmem = shallow_copy_rtx (srcmem); 17146 set_mem_size (destmem, count); 17147 set_mem_size (srcmem, count); 17148 } 17149 else 17150 { 17151 if (MEM_SIZE (destmem)) 17152 set_mem_size (destmem, NULL_RTX); 17153 if (MEM_SIZE (srcmem)) 17154 set_mem_size (srcmem, NULL_RTX); 17155 } 17156 emit_insn (gen_rep_mov (destptr, destmem, srcptr, srcmem, countreg, 17157 destexp, srcexp)); 17158 } 17159 17160 /* Output "rep; stos" instruction. 17161 Arguments have same meaning as for previous function */ 17162 static void 17163 expand_setmem_via_rep_stos (rtx destmem, rtx destptr, rtx value, 17164 rtx count, enum machine_mode mode, 17165 rtx orig_value) 17166 { 17167 rtx destexp; 17168 rtx countreg; 17169 17170 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode) 17171 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0); 17172 value = force_reg (mode, gen_lowpart (mode, value)); 17173 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode))); 17174 if (mode != QImode) 17175 { 17176 destexp = gen_rtx_ASHIFT (Pmode, countreg, 17177 GEN_INT (exact_log2 (GET_MODE_SIZE (mode)))); 17178 destexp = gen_rtx_PLUS (Pmode, destexp, destptr); 17179 } 17180 else 17181 destexp = gen_rtx_PLUS (Pmode, destptr, countreg); 17182 if (orig_value == const0_rtx && CONST_INT_P (count)) 17183 { 17184 count = GEN_INT (INTVAL (count) 17185 & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1)); 17186 destmem = shallow_copy_rtx (destmem); 17187 set_mem_size (destmem, count); 17188 } 17189 else if (MEM_SIZE (destmem)) 17190 set_mem_size (destmem, NULL_RTX); 17191 emit_insn (gen_rep_stos (destptr, countreg, destmem, value, destexp)); 17192 } 17193 17194 static void 17195 emit_strmov (rtx destmem, rtx srcmem, 17196 rtx destptr, rtx srcptr, enum machine_mode mode, int offset) 17197 { 17198 rtx src = adjust_automodify_address_nv (srcmem, mode, srcptr, offset); 17199 rtx dest = adjust_automodify_address_nv (destmem, mode, destptr, offset); 17200 emit_insn (gen_strmov (destptr, dest, srcptr, src)); 17201 } 17202 17203 /* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST. */ 17204 static void 17205 expand_movmem_epilogue (rtx destmem, rtx srcmem, 17206 rtx destptr, rtx srcptr, rtx count, int max_size) 17207 { 17208 rtx src, dest; 17209 if (CONST_INT_P (count)) 17210 { 17211 HOST_WIDE_INT countval = INTVAL (count); 17212 int offset = 0; 17213 17214 if ((countval & 0x10) && max_size > 16) 17215 { 17216 if (TARGET_64BIT) 17217 { 17218 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset); 17219 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset + 8); 17220 } 17221 else 17222 gcc_unreachable (); 17223 offset += 16; 17224 } 17225 if ((countval & 0x08) && max_size > 8) 17226 { 17227 if (TARGET_64BIT) 17228 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset); 17229 else 17230 { 17231 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset); 17232 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset + 4); 17233 } 17234 offset += 8; 17235 } 17236 if ((countval & 0x04) && max_size > 4) 17237 { 17238 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset); 17239 offset += 4; 17240 } 17241 if ((countval & 0x02) && max_size > 2) 17242 { 17243 emit_strmov (destmem, srcmem, destptr, srcptr, HImode, offset); 17244 offset += 2; 17245 } 17246 if ((countval & 0x01) && max_size > 1) 17247 { 17248 emit_strmov (destmem, srcmem, destptr, srcptr, QImode, offset); 17249 offset += 1; 17250 } 17251 return; 17252 } 17253 if (max_size > 8) 17254 { 17255 count = expand_simple_binop (GET_MODE (count), AND, count, GEN_INT (max_size - 1), 17256 count, 1, OPTAB_DIRECT); 17257 expand_set_or_movmem_via_loop (destmem, srcmem, destptr, srcptr, NULL, 17258 count, QImode, 1, 4); 17259 return; 17260 } 17261 17262 /* When there are stringops, we can cheaply increase dest and src pointers. 17263 Otherwise we save code size by maintaining offset (zero is readily 17264 available from preceding rep operation) and using x86 addressing modes. 17265 */ 17266 if (TARGET_SINGLE_STRINGOP) 17267 { 17268 if (max_size > 4) 17269 { 17270 rtx label = ix86_expand_aligntest (count, 4, true); 17271 src = change_address (srcmem, SImode, srcptr); 17272 dest = change_address (destmem, SImode, destptr); 17273 emit_insn (gen_strmov (destptr, dest, srcptr, src)); 17274 emit_label (label); 17275 LABEL_NUSES (label) = 1; 17276 } 17277 if (max_size > 2) 17278 { 17279 rtx label = ix86_expand_aligntest (count, 2, true); 17280 src = change_address (srcmem, HImode, srcptr); 17281 dest = change_address (destmem, HImode, destptr); 17282 emit_insn (gen_strmov (destptr, dest, srcptr, src)); 17283 emit_label (label); 17284 LABEL_NUSES (label) = 1; 17285 } 17286 if (max_size > 1) 17287 { 17288 rtx label = ix86_expand_aligntest (count, 1, true); 17289 src = change_address (srcmem, QImode, srcptr); 17290 dest = change_address (destmem, QImode, destptr); 17291 emit_insn (gen_strmov (destptr, dest, srcptr, src)); 17292 emit_label (label); 17293 LABEL_NUSES (label) = 1; 17294 } 17295 } 17296 else 17297 { 17298 rtx offset = force_reg (Pmode, const0_rtx); 17299 rtx tmp; 17300 17301 if (max_size > 4) 17302 { 17303 rtx label = ix86_expand_aligntest (count, 4, true); 17304 src = change_address (srcmem, SImode, srcptr); 17305 dest = change_address (destmem, SImode, destptr); 17306 emit_move_insn (dest, src); 17307 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (4), NULL, 17308 true, OPTAB_LIB_WIDEN); 17309 if (tmp != offset) 17310 emit_move_insn (offset, tmp); 17311 emit_label (label); 17312 LABEL_NUSES (label) = 1; 17313 } 17314 if (max_size > 2) 17315 { 17316 rtx label = ix86_expand_aligntest (count, 2, true); 17317 tmp = gen_rtx_PLUS (Pmode, srcptr, offset); 17318 src = change_address (srcmem, HImode, tmp); 17319 tmp = gen_rtx_PLUS (Pmode, destptr, offset); 17320 dest = change_address (destmem, HImode, tmp); 17321 emit_move_insn (dest, src); 17322 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (2), tmp, 17323 true, OPTAB_LIB_WIDEN); 17324 if (tmp != offset) 17325 emit_move_insn (offset, tmp); 17326 emit_label (label); 17327 LABEL_NUSES (label) = 1; 17328 } 17329 if (max_size > 1) 17330 { 17331 rtx label = ix86_expand_aligntest (count, 1, true); 17332 tmp = gen_rtx_PLUS (Pmode, srcptr, offset); 17333 src = change_address (srcmem, QImode, tmp); 17334 tmp = gen_rtx_PLUS (Pmode, destptr, offset); 17335 dest = change_address (destmem, QImode, tmp); 17336 emit_move_insn (dest, src); 17337 emit_label (label); 17338 LABEL_NUSES (label) = 1; 17339 } 17340 } 17341 } 17342 17343 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */ 17344 static void 17345 expand_setmem_epilogue_via_loop (rtx destmem, rtx destptr, rtx value, 17346 rtx count, int max_size) 17347 { 17348 count = 17349 expand_simple_binop (counter_mode (count), AND, count, 17350 GEN_INT (max_size - 1), count, 1, OPTAB_DIRECT); 17351 expand_set_or_movmem_via_loop (destmem, NULL, destptr, NULL, 17352 gen_lowpart (QImode, value), count, QImode, 17353 1, max_size / 2); 17354 } 17355 17356 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */ 17357 static void 17358 expand_setmem_epilogue (rtx destmem, rtx destptr, rtx value, rtx count, int max_size) 17359 { 17360 rtx dest; 17361 17362 if (CONST_INT_P (count)) 17363 { 17364 HOST_WIDE_INT countval = INTVAL (count); 17365 int offset = 0; 17366 17367 if ((countval & 0x10) && max_size > 16) 17368 { 17369 if (TARGET_64BIT) 17370 { 17371 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset); 17372 emit_insn (gen_strset (destptr, dest, value)); 17373 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset + 8); 17374 emit_insn (gen_strset (destptr, dest, value)); 17375 } 17376 else 17377 gcc_unreachable (); 17378 offset += 16; 17379 } 17380 if ((countval & 0x08) && max_size > 8) 17381 { 17382 if (TARGET_64BIT) 17383 { 17384 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset); 17385 emit_insn (gen_strset (destptr, dest, value)); 17386 } 17387 else 17388 { 17389 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset); 17390 emit_insn (gen_strset (destptr, dest, value)); 17391 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset + 4); 17392 emit_insn (gen_strset (destptr, dest, value)); 17393 } 17394 offset += 8; 17395 } 17396 if ((countval & 0x04) && max_size > 4) 17397 { 17398 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset); 17399 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value))); 17400 offset += 4; 17401 } 17402 if ((countval & 0x02) && max_size > 2) 17403 { 17404 dest = adjust_automodify_address_nv (destmem, HImode, destptr, offset); 17405 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value))); 17406 offset += 2; 17407 } 17408 if ((countval & 0x01) && max_size > 1) 17409 { 17410 dest = adjust_automodify_address_nv (destmem, QImode, destptr, offset); 17411 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value))); 17412 offset += 1; 17413 } 17414 return; 17415 } 17416 if (max_size > 32) 17417 { 17418 expand_setmem_epilogue_via_loop (destmem, destptr, value, count, max_size); 17419 return; 17420 } 17421 if (max_size > 16) 17422 { 17423 rtx label = ix86_expand_aligntest (count, 16, true); 17424 if (TARGET_64BIT) 17425 { 17426 dest = change_address (destmem, DImode, destptr); 17427 emit_insn (gen_strset (destptr, dest, value)); 17428 emit_insn (gen_strset (destptr, dest, value)); 17429 } 17430 else 17431 { 17432 dest = change_address (destmem, SImode, destptr); 17433 emit_insn (gen_strset (destptr, dest, value)); 17434 emit_insn (gen_strset (destptr, dest, value)); 17435 emit_insn (gen_strset (destptr, dest, value)); 17436 emit_insn (gen_strset (destptr, dest, value)); 17437 } 17438 emit_label (label); 17439 LABEL_NUSES (label) = 1; 17440 } 17441 if (max_size > 8) 17442 { 17443 rtx label = ix86_expand_aligntest (count, 8, true); 17444 if (TARGET_64BIT) 17445 { 17446 dest = change_address (destmem, DImode, destptr); 17447 emit_insn (gen_strset (destptr, dest, value)); 17448 } 17449 else 17450 { 17451 dest = change_address (destmem, SImode, destptr); 17452 emit_insn (gen_strset (destptr, dest, value)); 17453 emit_insn (gen_strset (destptr, dest, value)); 17454 } 17455 emit_label (label); 17456 LABEL_NUSES (label) = 1; 17457 } 17458 if (max_size > 4) 17459 { 17460 rtx label = ix86_expand_aligntest (count, 4, true); 17461 dest = change_address (destmem, SImode, destptr); 17462 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value))); 17463 emit_label (label); 17464 LABEL_NUSES (label) = 1; 17465 } 17466 if (max_size > 2) 17467 { 17468 rtx label = ix86_expand_aligntest (count, 2, true); 17469 dest = change_address (destmem, HImode, destptr); 17470 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value))); 17471 emit_label (label); 17472 LABEL_NUSES (label) = 1; 17473 } 17474 if (max_size > 1) 17475 { 17476 rtx label = ix86_expand_aligntest (count, 1, true); 17477 dest = change_address (destmem, QImode, destptr); 17478 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value))); 17479 emit_label (label); 17480 LABEL_NUSES (label) = 1; 17481 } 17482 } 17483 17484 /* Copy enough from DEST to SRC to align DEST known to by aligned by ALIGN to 17485 DESIRED_ALIGNMENT. */ 17486 static void 17487 expand_movmem_prologue (rtx destmem, rtx srcmem, 17488 rtx destptr, rtx srcptr, rtx count, 17489 int align, int desired_alignment) 17490 { 17491 if (align <= 1 && desired_alignment > 1) 17492 { 17493 rtx label = ix86_expand_aligntest (destptr, 1, false); 17494 srcmem = change_address (srcmem, QImode, srcptr); 17495 destmem = change_address (destmem, QImode, destptr); 17496 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem)); 17497 ix86_adjust_counter (count, 1); 17498 emit_label (label); 17499 LABEL_NUSES (label) = 1; 17500 } 17501 if (align <= 2 && desired_alignment > 2) 17502 { 17503 rtx label = ix86_expand_aligntest (destptr, 2, false); 17504 srcmem = change_address (srcmem, HImode, srcptr); 17505 destmem = change_address (destmem, HImode, destptr); 17506 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem)); 17507 ix86_adjust_counter (count, 2); 17508 emit_label (label); 17509 LABEL_NUSES (label) = 1; 17510 } 17511 if (align <= 4 && desired_alignment > 4) 17512 { 17513 rtx label = ix86_expand_aligntest (destptr, 4, false); 17514 srcmem = change_address (srcmem, SImode, srcptr); 17515 destmem = change_address (destmem, SImode, destptr); 17516 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem)); 17517 ix86_adjust_counter (count, 4); 17518 emit_label (label); 17519 LABEL_NUSES (label) = 1; 17520 } 17521 gcc_assert (desired_alignment <= 8); 17522 } 17523 17524 /* Copy enough from DST to SRC to align DST known to DESIRED_ALIGN. 17525 ALIGN_BYTES is how many bytes need to be copied. */ 17526 static rtx 17527 expand_constant_movmem_prologue (rtx dst, rtx *srcp, rtx destreg, rtx srcreg, 17528 int desired_align, int align_bytes) 17529 { 17530 rtx src = *srcp; 17531 rtx src_size, dst_size; 17532 int off = 0; 17533 int src_align_bytes = get_mem_align_offset (src, desired_align * BITS_PER_UNIT); 17534 if (src_align_bytes >= 0) 17535 src_align_bytes = desired_align - src_align_bytes; 17536 src_size = MEM_SIZE (src); 17537 dst_size = MEM_SIZE (dst); 17538 if (align_bytes & 1) 17539 { 17540 dst = adjust_automodify_address_nv (dst, QImode, destreg, 0); 17541 src = adjust_automodify_address_nv (src, QImode, srcreg, 0); 17542 off = 1; 17543 emit_insn (gen_strmov (destreg, dst, srcreg, src)); 17544 } 17545 if (align_bytes & 2) 17546 { 17547 dst = adjust_automodify_address_nv (dst, HImode, destreg, off); 17548 src = adjust_automodify_address_nv (src, HImode, srcreg, off); 17549 if (MEM_ALIGN (dst) < 2 * BITS_PER_UNIT) 17550 set_mem_align (dst, 2 * BITS_PER_UNIT); 17551 if (src_align_bytes >= 0 17552 && (src_align_bytes & 1) == (align_bytes & 1) 17553 && MEM_ALIGN (src) < 2 * BITS_PER_UNIT) 17554 set_mem_align (src, 2 * BITS_PER_UNIT); 17555 off = 2; 17556 emit_insn (gen_strmov (destreg, dst, srcreg, src)); 17557 } 17558 if (align_bytes & 4) 17559 { 17560 dst = adjust_automodify_address_nv (dst, SImode, destreg, off); 17561 src = adjust_automodify_address_nv (src, SImode, srcreg, off); 17562 if (MEM_ALIGN (dst) < 4 * BITS_PER_UNIT) 17563 set_mem_align (dst, 4 * BITS_PER_UNIT); 17564 if (src_align_bytes >= 0) 17565 { 17566 unsigned int src_align = 0; 17567 if ((src_align_bytes & 3) == (align_bytes & 3)) 17568 src_align = 4; 17569 else if ((src_align_bytes & 1) == (align_bytes & 1)) 17570 src_align = 2; 17571 if (MEM_ALIGN (src) < src_align * BITS_PER_UNIT) 17572 set_mem_align (src, src_align * BITS_PER_UNIT); 17573 } 17574 off = 4; 17575 emit_insn (gen_strmov (destreg, dst, srcreg, src)); 17576 } 17577 dst = adjust_automodify_address_nv (dst, BLKmode, destreg, off); 17578 src = adjust_automodify_address_nv (src, BLKmode, srcreg, off); 17579 if (MEM_ALIGN (dst) < (unsigned int) desired_align * BITS_PER_UNIT) 17580 set_mem_align (dst, desired_align * BITS_PER_UNIT); 17581 if (src_align_bytes >= 0) 17582 { 17583 unsigned int src_align = 0; 17584 if ((src_align_bytes & 7) == (align_bytes & 7)) 17585 src_align = 8; 17586 else if ((src_align_bytes & 3) == (align_bytes & 3)) 17587 src_align = 4; 17588 else if ((src_align_bytes & 1) == (align_bytes & 1)) 17589 src_align = 2; 17590 if (src_align > (unsigned int) desired_align) 17591 src_align = desired_align; 17592 if (MEM_ALIGN (src) < src_align * BITS_PER_UNIT) 17593 set_mem_align (src, src_align * BITS_PER_UNIT); 17594 } 17595 if (dst_size) 17596 set_mem_size (dst, GEN_INT (INTVAL (dst_size) - align_bytes)); 17597 if (src_size) 17598 set_mem_size (dst, GEN_INT (INTVAL (src_size) - align_bytes)); 17599 *srcp = src; 17600 return dst; 17601 } 17602 17603 /* Set enough from DEST to align DEST known to by aligned by ALIGN to 17604 DESIRED_ALIGNMENT. */ 17605 static void 17606 expand_setmem_prologue (rtx destmem, rtx destptr, rtx value, rtx count, 17607 int align, int desired_alignment) 17608 { 17609 if (align <= 1 && desired_alignment > 1) 17610 { 17611 rtx label = ix86_expand_aligntest (destptr, 1, false); 17612 destmem = change_address (destmem, QImode, destptr); 17613 emit_insn (gen_strset (destptr, destmem, gen_lowpart (QImode, value))); 17614 ix86_adjust_counter (count, 1); 17615 emit_label (label); 17616 LABEL_NUSES (label) = 1; 17617 } 17618 if (align <= 2 && desired_alignment > 2) 17619 { 17620 rtx label = ix86_expand_aligntest (destptr, 2, false); 17621 destmem = change_address (destmem, HImode, destptr); 17622 emit_insn (gen_strset (destptr, destmem, gen_lowpart (HImode, value))); 17623 ix86_adjust_counter (count, 2); 17624 emit_label (label); 17625 LABEL_NUSES (label) = 1; 17626 } 17627 if (align <= 4 && desired_alignment > 4) 17628 { 17629 rtx label = ix86_expand_aligntest (destptr, 4, false); 17630 destmem = change_address (destmem, SImode, destptr); 17631 emit_insn (gen_strset (destptr, destmem, gen_lowpart (SImode, value))); 17632 ix86_adjust_counter (count, 4); 17633 emit_label (label); 17634 LABEL_NUSES (label) = 1; 17635 } 17636 gcc_assert (desired_alignment <= 8); 17637 } 17638 17639 /* Set enough from DST to align DST known to by aligned by ALIGN to 17640 DESIRED_ALIGN. ALIGN_BYTES is how many bytes need to be stored. */ 17641 static rtx 17642 expand_constant_setmem_prologue (rtx dst, rtx destreg, rtx value, 17643 int desired_align, int align_bytes) 17644 { 17645 int off = 0; 17646 rtx dst_size = MEM_SIZE (dst); 17647 if (align_bytes & 1) 17648 { 17649 dst = adjust_automodify_address_nv (dst, QImode, destreg, 0); 17650 off = 1; 17651 emit_insn (gen_strset (destreg, dst, 17652 gen_lowpart (QImode, value))); 17653 } 17654 if (align_bytes & 2) 17655 { 17656 dst = adjust_automodify_address_nv (dst, HImode, destreg, off); 17657 if (MEM_ALIGN (dst) < 2 * BITS_PER_UNIT) 17658 set_mem_align (dst, 2 * BITS_PER_UNIT); 17659 off = 2; 17660 emit_insn (gen_strset (destreg, dst, 17661 gen_lowpart (HImode, value))); 17662 } 17663 if (align_bytes & 4) 17664 { 17665 dst = adjust_automodify_address_nv (dst, SImode, destreg, off); 17666 if (MEM_ALIGN (dst) < 4 * BITS_PER_UNIT) 17667 set_mem_align (dst, 4 * BITS_PER_UNIT); 17668 off = 4; 17669 emit_insn (gen_strset (destreg, dst, 17670 gen_lowpart (SImode, value))); 17671 } 17672 dst = adjust_automodify_address_nv (dst, BLKmode, destreg, off); 17673 if (MEM_ALIGN (dst) < (unsigned int) desired_align * BITS_PER_UNIT) 17674 set_mem_align (dst, desired_align * BITS_PER_UNIT); 17675 if (dst_size) 17676 set_mem_size (dst, GEN_INT (INTVAL (dst_size) - align_bytes)); 17677 return dst; 17678 } 17679 17680 /* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */ 17681 static enum stringop_alg 17682 decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size, bool memset, 17683 int *dynamic_check) 17684 { 17685 const struct stringop_algs * algs; 17686 bool optimize_for_speed; 17687 /* Algorithms using the rep prefix want at least edi and ecx; 17688 additionally, memset wants eax and memcpy wants esi. Don't 17689 consider such algorithms if the user has appropriated those 17690 registers for their own purposes. */ 17691 bool rep_prefix_usable = !(fixed_regs[CX_REG] || fixed_regs[DI_REG] 17692 || (memset 17693 ? fixed_regs[AX_REG] : fixed_regs[SI_REG])); 17694 17695 #define ALG_USABLE_P(alg) (rep_prefix_usable \ 17696 || (alg != rep_prefix_1_byte \ 17697 && alg != rep_prefix_4_byte \ 17698 && alg != rep_prefix_8_byte)) 17699 const struct processor_costs *cost; 17700 17701 /* Even if the string operation call is cold, we still might spend a lot 17702 of time processing large blocks. */ 17703 if (optimize_function_for_size_p (cfun) 17704 || (optimize_insn_for_size_p () 17705 && expected_size != -1 && expected_size < 256)) 17706 optimize_for_speed = false; 17707 else 17708 optimize_for_speed = true; 17709 17710 cost = optimize_for_speed ? ix86_cost : &ix86_size_cost; 17711 17712 *dynamic_check = -1; 17713 if (memset) 17714 algs = &cost->memset[TARGET_64BIT != 0]; 17715 else 17716 algs = &cost->memcpy[TARGET_64BIT != 0]; 17717 if (stringop_alg != no_stringop && ALG_USABLE_P (stringop_alg)) 17718 return stringop_alg; 17719 /* rep; movq or rep; movl is the smallest variant. */ 17720 else if (!optimize_for_speed) 17721 { 17722 if (!count || (count & 3)) 17723 return rep_prefix_usable ? rep_prefix_1_byte : loop_1_byte; 17724 else 17725 return rep_prefix_usable ? rep_prefix_4_byte : loop; 17726 } 17727 /* Very tiny blocks are best handled via the loop, REP is expensive to setup. 17728 */ 17729 else if (expected_size != -1 && expected_size < 4) 17730 return loop_1_byte; 17731 else if (expected_size != -1) 17732 { 17733 unsigned int i; 17734 enum stringop_alg alg = libcall; 17735 for (i = 0; i < NAX_STRINGOP_ALGS; i++) 17736 { 17737 /* We get here if the algorithms that were not libcall-based 17738 were rep-prefix based and we are unable to use rep prefixes 17739 based on global register usage. Break out of the loop and 17740 use the heuristic below. */ 17741 if (algs->size[i].max == 0) 17742 break; 17743 if (algs->size[i].max >= expected_size || algs->size[i].max == -1) 17744 { 17745 enum stringop_alg candidate = algs->size[i].alg; 17746 17747 if (candidate != libcall && ALG_USABLE_P (candidate)) 17748 alg = candidate; 17749 /* Honor TARGET_INLINE_ALL_STRINGOPS by picking 17750 last non-libcall inline algorithm. */ 17751 if (TARGET_INLINE_ALL_STRINGOPS) 17752 { 17753 /* When the current size is best to be copied by a libcall, 17754 but we are still forced to inline, run the heuristic below 17755 that will pick code for medium sized blocks. */ 17756 if (alg != libcall) 17757 return alg; 17758 break; 17759 } 17760 else if (ALG_USABLE_P (candidate)) 17761 return candidate; 17762 } 17763 } 17764 gcc_assert (TARGET_INLINE_ALL_STRINGOPS || !rep_prefix_usable); 17765 } 17766 /* When asked to inline the call anyway, try to pick meaningful choice. 17767 We look for maximal size of block that is faster to copy by hand and 17768 take blocks of at most of that size guessing that average size will 17769 be roughly half of the block. 17770 17771 If this turns out to be bad, we might simply specify the preferred 17772 choice in ix86_costs. */ 17773 if ((TARGET_INLINE_ALL_STRINGOPS || TARGET_INLINE_STRINGOPS_DYNAMICALLY) 17774 && (algs->unknown_size == libcall || !ALG_USABLE_P (algs->unknown_size))) 17775 { 17776 int max = -1; 17777 enum stringop_alg alg; 17778 int i; 17779 bool any_alg_usable_p = true; 17780 17781 for (i = 0; i < NAX_STRINGOP_ALGS; i++) 17782 { 17783 enum stringop_alg candidate = algs->size[i].alg; 17784 any_alg_usable_p = any_alg_usable_p && ALG_USABLE_P (candidate); 17785 17786 if (candidate != libcall && candidate 17787 && ALG_USABLE_P (candidate)) 17788 max = algs->size[i].max; 17789 } 17790 /* If there aren't any usable algorithms, then recursing on 17791 smaller sizes isn't going to find anything. Just return the 17792 simple byte-at-a-time copy loop. */ 17793 if (!any_alg_usable_p) 17794 { 17795 /* Pick something reasonable. */ 17796 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY) 17797 *dynamic_check = 128; 17798 return loop_1_byte; 17799 } 17800 if (max == -1) 17801 max = 4096; 17802 alg = decide_alg (count, max / 2, memset, dynamic_check); 17803 gcc_assert (*dynamic_check == -1); 17804 gcc_assert (alg != libcall); 17805 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY) 17806 *dynamic_check = max; 17807 return alg; 17808 } 17809 return ALG_USABLE_P (algs->unknown_size) ? algs->unknown_size : libcall; 17810 #undef ALG_USABLE_P 17811 } 17812 17813 /* Decide on alignment. We know that the operand is already aligned to ALIGN 17814 (ALIGN can be based on profile feedback and thus it is not 100% guaranteed). */ 17815 static int 17816 decide_alignment (int align, 17817 enum stringop_alg alg, 17818 int expected_size) 17819 { 17820 int desired_align = 0; 17821 switch (alg) 17822 { 17823 case no_stringop: 17824 gcc_unreachable (); 17825 case loop: 17826 case unrolled_loop: 17827 desired_align = GET_MODE_SIZE (Pmode); 17828 break; 17829 case rep_prefix_8_byte: 17830 desired_align = 8; 17831 break; 17832 case rep_prefix_4_byte: 17833 /* PentiumPro has special logic triggering for 8 byte aligned blocks. 17834 copying whole cacheline at once. */ 17835 if (TARGET_PENTIUMPRO) 17836 desired_align = 8; 17837 else 17838 desired_align = 4; 17839 break; 17840 case rep_prefix_1_byte: 17841 /* PentiumPro has special logic triggering for 8 byte aligned blocks. 17842 copying whole cacheline at once. */ 17843 if (TARGET_PENTIUMPRO) 17844 desired_align = 8; 17845 else 17846 desired_align = 1; 17847 break; 17848 case loop_1_byte: 17849 desired_align = 1; 17850 break; 17851 case libcall: 17852 return 0; 17853 } 17854 17855 if (optimize_size) 17856 desired_align = 1; 17857 if (desired_align < align) 17858 desired_align = align; 17859 if (expected_size != -1 && expected_size < 4) 17860 desired_align = align; 17861 return desired_align; 17862 } 17863 17864 /* Return the smallest power of 2 greater than VAL. */ 17865 static int 17866 smallest_pow2_greater_than (int val) 17867 { 17868 int ret = 1; 17869 while (ret <= val) 17870 ret <<= 1; 17871 return ret; 17872 } 17873 17874 /* Expand string move (memcpy) operation. Use i386 string operations when 17875 profitable. expand_setmem contains similar code. The code depends upon 17876 architecture, block size and alignment, but always has the same 17877 overall structure: 17878 17879 1) Prologue guard: Conditional that jumps up to epilogues for small 17880 blocks that can be handled by epilogue alone. This is faster but 17881 also needed for correctness, since prologue assume the block is larger 17882 than the desired alignment. 17883 17884 Optional dynamic check for size and libcall for large 17885 blocks is emitted here too, with -minline-stringops-dynamically. 17886 17887 2) Prologue: copy first few bytes in order to get destination aligned 17888 to DESIRED_ALIGN. It is emitted only when ALIGN is less than 17889 DESIRED_ALIGN and and up to DESIRED_ALIGN - ALIGN bytes can be copied. 17890 We emit either a jump tree on power of two sized blocks, or a byte loop. 17891 17892 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks 17893 with specified algorithm. 17894 17895 4) Epilogue: code copying tail of the block that is too small to be 17896 handled by main body (or up to size guarded by prologue guard). */ 17897 17898 int 17899 ix86_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp, 17900 rtx expected_align_exp, rtx expected_size_exp) 17901 { 17902 rtx destreg; 17903 rtx srcreg; 17904 rtx label = NULL; 17905 rtx tmp; 17906 rtx jump_around_label = NULL; 17907 HOST_WIDE_INT align = 1; 17908 unsigned HOST_WIDE_INT count = 0; 17909 HOST_WIDE_INT expected_size = -1; 17910 int size_needed = 0, epilogue_size_needed; 17911 int desired_align = 0, align_bytes = 0; 17912 enum stringop_alg alg; 17913 int dynamic_check; 17914 bool need_zero_guard = false; 17915 17916 if (CONST_INT_P (align_exp)) 17917 align = INTVAL (align_exp); 17918 /* i386 can do misaligned access on reasonably increased cost. */ 17919 if (CONST_INT_P (expected_align_exp) 17920 && INTVAL (expected_align_exp) > align) 17921 align = INTVAL (expected_align_exp); 17922 /* ALIGN is the minimum of destination and source alignment, but we care here 17923 just about destination alignment. */ 17924 else if (MEM_ALIGN (dst) > (unsigned HOST_WIDE_INT) align * BITS_PER_UNIT) 17925 align = MEM_ALIGN (dst) / BITS_PER_UNIT; 17926 17927 if (CONST_INT_P (count_exp)) 17928 count = expected_size = INTVAL (count_exp); 17929 if (CONST_INT_P (expected_size_exp) && count == 0) 17930 expected_size = INTVAL (expected_size_exp); 17931 17932 /* Make sure we don't need to care about overflow later on. */ 17933 if (count > ((unsigned HOST_WIDE_INT) 1 << 30)) 17934 return 0; 17935 17936 /* Step 0: Decide on preferred algorithm, desired alignment and 17937 size of chunks to be copied by main loop. */ 17938 17939 alg = decide_alg (count, expected_size, false, &dynamic_check); 17940 desired_align = decide_alignment (align, alg, expected_size); 17941 17942 if (!TARGET_ALIGN_STRINGOPS) 17943 align = desired_align; 17944 17945 if (alg == libcall) 17946 return 0; 17947 gcc_assert (alg != no_stringop); 17948 if (!count) 17949 count_exp = copy_to_mode_reg (GET_MODE (count_exp), count_exp); 17950 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0)); 17951 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0)); 17952 switch (alg) 17953 { 17954 case libcall: 17955 case no_stringop: 17956 gcc_unreachable (); 17957 case loop: 17958 need_zero_guard = true; 17959 size_needed = GET_MODE_SIZE (Pmode); 17960 break; 17961 case unrolled_loop: 17962 need_zero_guard = true; 17963 size_needed = GET_MODE_SIZE (Pmode) * (TARGET_64BIT ? 4 : 2); 17964 break; 17965 case rep_prefix_8_byte: 17966 size_needed = 8; 17967 break; 17968 case rep_prefix_4_byte: 17969 size_needed = 4; 17970 break; 17971 case rep_prefix_1_byte: 17972 size_needed = 1; 17973 break; 17974 case loop_1_byte: 17975 need_zero_guard = true; 17976 size_needed = 1; 17977 break; 17978 } 17979 17980 epilogue_size_needed = size_needed; 17981 17982 /* Step 1: Prologue guard. */ 17983 17984 /* Alignment code needs count to be in register. */ 17985 if (CONST_INT_P (count_exp) && desired_align > align) 17986 { 17987 if (INTVAL (count_exp) > desired_align 17988 && INTVAL (count_exp) > size_needed) 17989 { 17990 align_bytes 17991 = get_mem_align_offset (dst, desired_align * BITS_PER_UNIT); 17992 if (align_bytes <= 0) 17993 align_bytes = 0; 17994 else 17995 align_bytes = desired_align - align_bytes; 17996 } 17997 if (align_bytes == 0) 17998 count_exp = force_reg (counter_mode (count_exp), count_exp); 17999 } 18000 gcc_assert (desired_align >= 1 && align >= 1); 18001 18002 /* Ensure that alignment prologue won't copy past end of block. */ 18003 if (size_needed > 1 || (desired_align > 1 && desired_align > align)) 18004 { 18005 epilogue_size_needed = MAX (size_needed - 1, desired_align - align); 18006 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes. 18007 Make sure it is power of 2. */ 18008 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed); 18009 18010 if (count) 18011 { 18012 if (count < (unsigned HOST_WIDE_INT)epilogue_size_needed) 18013 { 18014 /* If main algorithm works on QImode, no epilogue is needed. 18015 For small sizes just don't align anything. */ 18016 if (size_needed == 1) 18017 desired_align = align; 18018 else 18019 goto epilogue; 18020 } 18021 } 18022 else 18023 { 18024 label = gen_label_rtx (); 18025 emit_cmp_and_jump_insns (count_exp, 18026 GEN_INT (epilogue_size_needed), 18027 LTU, 0, counter_mode (count_exp), 1, label); 18028 if (expected_size == -1 || expected_size < epilogue_size_needed) 18029 predict_jump (REG_BR_PROB_BASE * 60 / 100); 18030 else 18031 predict_jump (REG_BR_PROB_BASE * 20 / 100); 18032 } 18033 } 18034 18035 /* Emit code to decide on runtime whether library call or inline should be 18036 used. */ 18037 if (dynamic_check != -1) 18038 { 18039 if (CONST_INT_P (count_exp)) 18040 { 18041 if (UINTVAL (count_exp) >= (unsigned HOST_WIDE_INT)dynamic_check) 18042 { 18043 emit_block_move_via_libcall (dst, src, count_exp, false); 18044 count_exp = const0_rtx; 18045 goto epilogue; 18046 } 18047 } 18048 else 18049 { 18050 rtx hot_label = gen_label_rtx (); 18051 jump_around_label = gen_label_rtx (); 18052 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1), 18053 LEU, 0, GET_MODE (count_exp), 1, hot_label); 18054 predict_jump (REG_BR_PROB_BASE * 90 / 100); 18055 emit_block_move_via_libcall (dst, src, count_exp, false); 18056 emit_jump (jump_around_label); 18057 emit_label (hot_label); 18058 } 18059 } 18060 18061 /* Step 2: Alignment prologue. */ 18062 18063 if (desired_align > align) 18064 { 18065 if (align_bytes == 0) 18066 { 18067 /* Except for the first move in epilogue, we no longer know 18068 constant offset in aliasing info. It don't seems to worth 18069 the pain to maintain it for the first move, so throw away 18070 the info early. */ 18071 src = change_address (src, BLKmode, srcreg); 18072 dst = change_address (dst, BLKmode, destreg); 18073 expand_movmem_prologue (dst, src, destreg, srcreg, count_exp, align, 18074 desired_align); 18075 } 18076 else 18077 { 18078 /* If we know how many bytes need to be stored before dst is 18079 sufficiently aligned, maintain aliasing info accurately. */ 18080 dst = expand_constant_movmem_prologue (dst, &src, destreg, srcreg, 18081 desired_align, align_bytes); 18082 count_exp = plus_constant (count_exp, -align_bytes); 18083 count -= align_bytes; 18084 } 18085 if (need_zero_guard 18086 && (count < (unsigned HOST_WIDE_INT) size_needed 18087 || (align_bytes == 0 18088 && count < ((unsigned HOST_WIDE_INT) size_needed 18089 + desired_align - align)))) 18090 { 18091 /* It is possible that we copied enough so the main loop will not 18092 execute. */ 18093 gcc_assert (size_needed > 1); 18094 if (label == NULL_RTX) 18095 label = gen_label_rtx (); 18096 emit_cmp_and_jump_insns (count_exp, 18097 GEN_INT (size_needed), 18098 LTU, 0, counter_mode (count_exp), 1, label); 18099 if (expected_size == -1 18100 || expected_size < (desired_align - align) / 2 + size_needed) 18101 predict_jump (REG_BR_PROB_BASE * 20 / 100); 18102 else 18103 predict_jump (REG_BR_PROB_BASE * 60 / 100); 18104 } 18105 } 18106 if (label && size_needed == 1) 18107 { 18108 emit_label (label); 18109 LABEL_NUSES (label) = 1; 18110 label = NULL; 18111 epilogue_size_needed = 1; 18112 } 18113 else if (label == NULL_RTX) 18114 epilogue_size_needed = size_needed; 18115 18116 /* Step 3: Main loop. */ 18117 18118 switch (alg) 18119 { 18120 case libcall: 18121 case no_stringop: 18122 gcc_unreachable (); 18123 case loop_1_byte: 18124 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL, 18125 count_exp, QImode, 1, expected_size); 18126 break; 18127 case loop: 18128 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL, 18129 count_exp, Pmode, 1, expected_size); 18130 break; 18131 case unrolled_loop: 18132 /* Unroll only by factor of 2 in 32bit mode, since we don't have enough 18133 registers for 4 temporaries anyway. */ 18134 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL, 18135 count_exp, Pmode, TARGET_64BIT ? 4 : 2, 18136 expected_size); 18137 break; 18138 case rep_prefix_8_byte: 18139 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp, 18140 DImode); 18141 break; 18142 case rep_prefix_4_byte: 18143 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp, 18144 SImode); 18145 break; 18146 case rep_prefix_1_byte: 18147 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp, 18148 QImode); 18149 break; 18150 } 18151 /* Adjust properly the offset of src and dest memory for aliasing. */ 18152 if (CONST_INT_P (count_exp)) 18153 { 18154 src = adjust_automodify_address_nv (src, BLKmode, srcreg, 18155 (count / size_needed) * size_needed); 18156 dst = adjust_automodify_address_nv (dst, BLKmode, destreg, 18157 (count / size_needed) * size_needed); 18158 } 18159 else 18160 { 18161 src = change_address (src, BLKmode, srcreg); 18162 dst = change_address (dst, BLKmode, destreg); 18163 } 18164 18165 /* Step 4: Epilogue to copy the remaining bytes. */ 18166 epilogue: 18167 if (label) 18168 { 18169 /* When the main loop is done, COUNT_EXP might hold original count, 18170 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes. 18171 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED 18172 bytes. Compensate if needed. */ 18173 18174 if (size_needed < epilogue_size_needed) 18175 { 18176 tmp = 18177 expand_simple_binop (counter_mode (count_exp), AND, count_exp, 18178 GEN_INT (size_needed - 1), count_exp, 1, 18179 OPTAB_DIRECT); 18180 if (tmp != count_exp) 18181 emit_move_insn (count_exp, tmp); 18182 } 18183 emit_label (label); 18184 LABEL_NUSES (label) = 1; 18185 } 18186 18187 if (count_exp != const0_rtx && epilogue_size_needed > 1) 18188 expand_movmem_epilogue (dst, src, destreg, srcreg, count_exp, 18189 epilogue_size_needed); 18190 if (jump_around_label) 18191 emit_label (jump_around_label); 18192 return 1; 18193 } 18194 18195 /* Helper function for memcpy. For QImode value 0xXY produce 18196 0xXYXYXYXY of wide specified by MODE. This is essentially 18197 a * 0x10101010, but we can do slightly better than 18198 synth_mult by unwinding the sequence by hand on CPUs with 18199 slow multiply. */ 18200 static rtx 18201 promote_duplicated_reg (enum machine_mode mode, rtx val) 18202 { 18203 enum machine_mode valmode = GET_MODE (val); 18204 rtx tmp; 18205 int nops = mode == DImode ? 3 : 2; 18206 18207 gcc_assert (mode == SImode || mode == DImode); 18208 if (val == const0_rtx) 18209 return copy_to_mode_reg (mode, const0_rtx); 18210 if (CONST_INT_P (val)) 18211 { 18212 HOST_WIDE_INT v = INTVAL (val) & 255; 18213 18214 v |= v << 8; 18215 v |= v << 16; 18216 if (mode == DImode) 18217 v |= (v << 16) << 16; 18218 return copy_to_mode_reg (mode, gen_int_mode (v, mode)); 18219 } 18220 18221 if (valmode == VOIDmode) 18222 valmode = QImode; 18223 if (valmode != QImode) 18224 val = gen_lowpart (QImode, val); 18225 if (mode == QImode) 18226 return val; 18227 if (!TARGET_PARTIAL_REG_STALL) 18228 nops--; 18229 if (ix86_cost->mult_init[mode == DImode ? 3 : 2] 18230 + ix86_cost->mult_bit * (mode == DImode ? 8 : 4) 18231 <= (ix86_cost->shift_const + ix86_cost->add) * nops 18232 + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL == 0))) 18233 { 18234 rtx reg = convert_modes (mode, QImode, val, true); 18235 tmp = promote_duplicated_reg (mode, const1_rtx); 18236 return expand_simple_binop (mode, MULT, reg, tmp, NULL, 1, 18237 OPTAB_DIRECT); 18238 } 18239 else 18240 { 18241 rtx reg = convert_modes (mode, QImode, val, true); 18242 18243 if (!TARGET_PARTIAL_REG_STALL) 18244 if (mode == SImode) 18245 emit_insn (gen_movsi_insv_1 (reg, reg)); 18246 else 18247 emit_insn (gen_movdi_insv_1_rex64 (reg, reg)); 18248 else 18249 { 18250 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (8), 18251 NULL, 1, OPTAB_DIRECT); 18252 reg = 18253 expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT); 18254 } 18255 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (16), 18256 NULL, 1, OPTAB_DIRECT); 18257 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT); 18258 if (mode == SImode) 18259 return reg; 18260 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (32), 18261 NULL, 1, OPTAB_DIRECT); 18262 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT); 18263 return reg; 18264 } 18265 } 18266 18267 /* Duplicate value VAL using promote_duplicated_reg into maximal size that will 18268 be needed by main loop copying SIZE_NEEDED chunks and prologue getting 18269 alignment from ALIGN to DESIRED_ALIGN. */ 18270 static rtx 18271 promote_duplicated_reg_to_size (rtx val, int size_needed, int desired_align, int align) 18272 { 18273 rtx promoted_val; 18274 18275 if (TARGET_64BIT 18276 && (size_needed > 4 || (desired_align > align && desired_align > 4))) 18277 promoted_val = promote_duplicated_reg (DImode, val); 18278 else if (size_needed > 2 || (desired_align > align && desired_align > 2)) 18279 promoted_val = promote_duplicated_reg (SImode, val); 18280 else if (size_needed > 1 || (desired_align > align && desired_align > 1)) 18281 promoted_val = promote_duplicated_reg (HImode, val); 18282 else 18283 promoted_val = val; 18284 18285 return promoted_val; 18286 } 18287 18288 /* Expand string clear operation (bzero). Use i386 string operations when 18289 profitable. See expand_movmem comment for explanation of individual 18290 steps performed. */ 18291 int 18292 ix86_expand_setmem (rtx dst, rtx count_exp, rtx val_exp, rtx align_exp, 18293 rtx expected_align_exp, rtx expected_size_exp) 18294 { 18295 rtx destreg; 18296 rtx label = NULL; 18297 rtx tmp; 18298 rtx jump_around_label = NULL; 18299 HOST_WIDE_INT align = 1; 18300 unsigned HOST_WIDE_INT count = 0; 18301 HOST_WIDE_INT expected_size = -1; 18302 int size_needed = 0, epilogue_size_needed; 18303 int desired_align = 0, align_bytes = 0; 18304 enum stringop_alg alg; 18305 rtx promoted_val = NULL; 18306 bool force_loopy_epilogue = false; 18307 int dynamic_check; 18308 bool need_zero_guard = false; 18309 18310 if (CONST_INT_P (align_exp)) 18311 align = INTVAL (align_exp); 18312 /* i386 can do misaligned access on reasonably increased cost. */ 18313 if (CONST_INT_P (expected_align_exp) 18314 && INTVAL (expected_align_exp) > align) 18315 align = INTVAL (expected_align_exp); 18316 if (CONST_INT_P (count_exp)) 18317 count = expected_size = INTVAL (count_exp); 18318 if (CONST_INT_P (expected_size_exp) && count == 0) 18319 expected_size = INTVAL (expected_size_exp); 18320 18321 /* Make sure we don't need to care about overflow later on. */ 18322 if (count > ((unsigned HOST_WIDE_INT) 1 << 30)) 18323 return 0; 18324 18325 /* Step 0: Decide on preferred algorithm, desired alignment and 18326 size of chunks to be copied by main loop. */ 18327 18328 alg = decide_alg (count, expected_size, true, &dynamic_check); 18329 desired_align = decide_alignment (align, alg, expected_size); 18330 18331 if (!TARGET_ALIGN_STRINGOPS) 18332 align = desired_align; 18333 18334 if (alg == libcall) 18335 return 0; 18336 gcc_assert (alg != no_stringop); 18337 if (!count) 18338 count_exp = copy_to_mode_reg (counter_mode (count_exp), count_exp); 18339 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0)); 18340 switch (alg) 18341 { 18342 case libcall: 18343 case no_stringop: 18344 gcc_unreachable (); 18345 case loop: 18346 need_zero_guard = true; 18347 size_needed = GET_MODE_SIZE (Pmode); 18348 break; 18349 case unrolled_loop: 18350 need_zero_guard = true; 18351 size_needed = GET_MODE_SIZE (Pmode) * 4; 18352 break; 18353 case rep_prefix_8_byte: 18354 size_needed = 8; 18355 break; 18356 case rep_prefix_4_byte: 18357 size_needed = 4; 18358 break; 18359 case rep_prefix_1_byte: 18360 size_needed = 1; 18361 break; 18362 case loop_1_byte: 18363 need_zero_guard = true; 18364 size_needed = 1; 18365 break; 18366 } 18367 epilogue_size_needed = size_needed; 18368 18369 /* Step 1: Prologue guard. */ 18370 18371 /* Alignment code needs count to be in register. */ 18372 if (CONST_INT_P (count_exp) && desired_align > align) 18373 { 18374 if (INTVAL (count_exp) > desired_align 18375 && INTVAL (count_exp) > size_needed) 18376 { 18377 align_bytes 18378 = get_mem_align_offset (dst, desired_align * BITS_PER_UNIT); 18379 if (align_bytes <= 0) 18380 align_bytes = 0; 18381 else 18382 align_bytes = desired_align - align_bytes; 18383 } 18384 if (align_bytes == 0) 18385 { 18386 enum machine_mode mode = SImode; 18387 if (TARGET_64BIT && (count & ~0xffffffff)) 18388 mode = DImode; 18389 count_exp = force_reg (mode, count_exp); 18390 } 18391 } 18392 /* Do the cheap promotion to allow better CSE across the 18393 main loop and epilogue (ie one load of the big constant in the 18394 front of all code. */ 18395 if (CONST_INT_P (val_exp)) 18396 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed, 18397 desired_align, align); 18398 /* Ensure that alignment prologue won't copy past end of block. */ 18399 if (size_needed > 1 || (desired_align > 1 && desired_align > align)) 18400 { 18401 epilogue_size_needed = MAX (size_needed - 1, desired_align - align); 18402 /* Epilogue always copies COUNT_EXP & (EPILOGUE_SIZE_NEEDED - 1) bytes. 18403 Make sure it is power of 2. */ 18404 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed); 18405 18406 /* To improve performance of small blocks, we jump around the VAL 18407 promoting mode. This mean that if the promoted VAL is not constant, 18408 we might not use it in the epilogue and have to use byte 18409 loop variant. */ 18410 if (epilogue_size_needed > 2 && !promoted_val) 18411 force_loopy_epilogue = true; 18412 if (count) 18413 { 18414 if (count < (unsigned HOST_WIDE_INT)epilogue_size_needed) 18415 { 18416 /* If main algorithm works on QImode, no epilogue is needed. 18417 For small sizes just don't align anything. */ 18418 if (size_needed == 1) 18419 desired_align = align; 18420 else 18421 goto epilogue; 18422 } 18423 } 18424 else 18425 { 18426 label = gen_label_rtx (); 18427 emit_cmp_and_jump_insns (count_exp, 18428 GEN_INT (epilogue_size_needed), 18429 LTU, 0, counter_mode (count_exp), 1, label); 18430 if (expected_size == -1 || expected_size <= epilogue_size_needed) 18431 predict_jump (REG_BR_PROB_BASE * 60 / 100); 18432 else 18433 predict_jump (REG_BR_PROB_BASE * 20 / 100); 18434 } 18435 } 18436 if (dynamic_check != -1) 18437 { 18438 rtx hot_label = gen_label_rtx (); 18439 jump_around_label = gen_label_rtx (); 18440 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1), 18441 LEU, 0, counter_mode (count_exp), 1, hot_label); 18442 predict_jump (REG_BR_PROB_BASE * 90 / 100); 18443 set_storage_via_libcall (dst, count_exp, val_exp, false); 18444 emit_jump (jump_around_label); 18445 emit_label (hot_label); 18446 } 18447 18448 /* Step 2: Alignment prologue. */ 18449 18450 /* Do the expensive promotion once we branched off the small blocks. */ 18451 if (!promoted_val) 18452 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed, 18453 desired_align, align); 18454 gcc_assert (desired_align >= 1 && align >= 1); 18455 18456 if (desired_align > align) 18457 { 18458 if (align_bytes == 0) 18459 { 18460 /* Except for the first move in epilogue, we no longer know 18461 constant offset in aliasing info. It don't seems to worth 18462 the pain to maintain it for the first move, so throw away 18463 the info early. */ 18464 dst = change_address (dst, BLKmode, destreg); 18465 expand_setmem_prologue (dst, destreg, promoted_val, count_exp, align, 18466 desired_align); 18467 } 18468 else 18469 { 18470 /* If we know how many bytes need to be stored before dst is 18471 sufficiently aligned, maintain aliasing info accurately. */ 18472 dst = expand_constant_setmem_prologue (dst, destreg, promoted_val, 18473 desired_align, align_bytes); 18474 count_exp = plus_constant (count_exp, -align_bytes); 18475 count -= align_bytes; 18476 } 18477 if (need_zero_guard 18478 && (count < (unsigned HOST_WIDE_INT) size_needed 18479 || (align_bytes == 0 18480 && count < ((unsigned HOST_WIDE_INT) size_needed 18481 + desired_align - align)))) 18482 { 18483 /* It is possible that we copied enough so the main loop will not 18484 execute. */ 18485 gcc_assert (size_needed > 1); 18486 if (label == NULL_RTX) 18487 label = gen_label_rtx (); 18488 emit_cmp_and_jump_insns (count_exp, 18489 GEN_INT (size_needed), 18490 LTU, 0, counter_mode (count_exp), 1, label); 18491 if (expected_size == -1 18492 || expected_size < (desired_align - align) / 2 + size_needed) 18493 predict_jump (REG_BR_PROB_BASE * 20 / 100); 18494 else 18495 predict_jump (REG_BR_PROB_BASE * 60 / 100); 18496 } 18497 } 18498 if (label && size_needed == 1) 18499 { 18500 emit_label (label); 18501 LABEL_NUSES (label) = 1; 18502 label = NULL; 18503 promoted_val = val_exp; 18504 epilogue_size_needed = 1; 18505 } 18506 else if (label == NULL_RTX) 18507 epilogue_size_needed = size_needed; 18508 18509 /* Step 3: Main loop. */ 18510 18511 switch (alg) 18512 { 18513 case libcall: 18514 case no_stringop: 18515 gcc_unreachable (); 18516 case loop_1_byte: 18517 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val, 18518 count_exp, QImode, 1, expected_size); 18519 break; 18520 case loop: 18521 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val, 18522 count_exp, Pmode, 1, expected_size); 18523 break; 18524 case unrolled_loop: 18525 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val, 18526 count_exp, Pmode, 4, expected_size); 18527 break; 18528 case rep_prefix_8_byte: 18529 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp, 18530 DImode, val_exp); 18531 break; 18532 case rep_prefix_4_byte: 18533 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp, 18534 SImode, val_exp); 18535 break; 18536 case rep_prefix_1_byte: 18537 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp, 18538 QImode, val_exp); 18539 break; 18540 } 18541 /* Adjust properly the offset of src and dest memory for aliasing. */ 18542 if (CONST_INT_P (count_exp)) 18543 dst = adjust_automodify_address_nv (dst, BLKmode, destreg, 18544 (count / size_needed) * size_needed); 18545 else 18546 dst = change_address (dst, BLKmode, destreg); 18547 18548 /* Step 4: Epilogue to copy the remaining bytes. */ 18549 18550 if (label) 18551 { 18552 /* When the main loop is done, COUNT_EXP might hold original count, 18553 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes. 18554 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED 18555 bytes. Compensate if needed. */ 18556 18557 if (size_needed < epilogue_size_needed) 18558 { 18559 tmp = 18560 expand_simple_binop (counter_mode (count_exp), AND, count_exp, 18561 GEN_INT (size_needed - 1), count_exp, 1, 18562 OPTAB_DIRECT); 18563 if (tmp != count_exp) 18564 emit_move_insn (count_exp, tmp); 18565 } 18566 emit_label (label); 18567 LABEL_NUSES (label) = 1; 18568 } 18569 epilogue: 18570 if (count_exp != const0_rtx && epilogue_size_needed > 1) 18571 { 18572 if (force_loopy_epilogue) 18573 expand_setmem_epilogue_via_loop (dst, destreg, val_exp, count_exp, 18574 epilogue_size_needed); 18575 else 18576 expand_setmem_epilogue (dst, destreg, promoted_val, count_exp, 18577 epilogue_size_needed); 18578 } 18579 if (jump_around_label) 18580 emit_label (jump_around_label); 18581 return 1; 18582 } 18583 18584 /* Expand the appropriate insns for doing strlen if not just doing 18585 repnz; scasb 18586 18587 out = result, initialized with the start address 18588 align_rtx = alignment of the address. 18589 scratch = scratch register, initialized with the startaddress when 18590 not aligned, otherwise undefined 18591 18592 This is just the body. It needs the initializations mentioned above and 18593 some address computing at the end. These things are done in i386.md. */ 18594 18595 static void 18596 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx) 18597 { 18598 int align; 18599 rtx tmp; 18600 rtx align_2_label = NULL_RTX; 18601 rtx align_3_label = NULL_RTX; 18602 rtx align_4_label = gen_label_rtx (); 18603 rtx end_0_label = gen_label_rtx (); 18604 rtx mem; 18605 rtx tmpreg = gen_reg_rtx (SImode); 18606 rtx scratch = gen_reg_rtx (SImode); 18607 rtx cmp; 18608 18609 align = 0; 18610 if (CONST_INT_P (align_rtx)) 18611 align = INTVAL (align_rtx); 18612 18613 /* Loop to check 1..3 bytes for null to get an aligned pointer. */ 18614 18615 /* Is there a known alignment and is it less than 4? */ 18616 if (align < 4) 18617 { 18618 rtx scratch1 = gen_reg_rtx (Pmode); 18619 emit_move_insn (scratch1, out); 18620 /* Is there a known alignment and is it not 2? */ 18621 if (align != 2) 18622 { 18623 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */ 18624 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */ 18625 18626 /* Leave just the 3 lower bits. */ 18627 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3), 18628 NULL_RTX, 0, OPTAB_WIDEN); 18629 18630 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL, 18631 Pmode, 1, align_4_label); 18632 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL, 18633 Pmode, 1, align_2_label); 18634 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL, 18635 Pmode, 1, align_3_label); 18636 } 18637 else 18638 { 18639 /* Since the alignment is 2, we have to check 2 or 0 bytes; 18640 check if is aligned to 4 - byte. */ 18641 18642 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx, 18643 NULL_RTX, 0, OPTAB_WIDEN); 18644 18645 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL, 18646 Pmode, 1, align_4_label); 18647 } 18648 18649 mem = change_address (src, QImode, out); 18650 18651 /* Now compare the bytes. */ 18652 18653 /* Compare the first n unaligned byte on a byte per byte basis. */ 18654 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, 18655 QImode, 1, end_0_label); 18656 18657 /* Increment the address. */ 18658 emit_insn ((*ix86_gen_add3) (out, out, const1_rtx)); 18659 18660 /* Not needed with an alignment of 2 */ 18661 if (align != 2) 18662 { 18663 emit_label (align_2_label); 18664 18665 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1, 18666 end_0_label); 18667 18668 emit_insn ((*ix86_gen_add3) (out, out, const1_rtx)); 18669 18670 emit_label (align_3_label); 18671 } 18672 18673 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1, 18674 end_0_label); 18675 18676 emit_insn ((*ix86_gen_add3) (out, out, const1_rtx)); 18677 } 18678 18679 /* Generate loop to check 4 bytes at a time. It is not a good idea to 18680 align this loop. It gives only huge programs, but does not help to 18681 speed up. */ 18682 emit_label (align_4_label); 18683 18684 mem = change_address (src, SImode, out); 18685 emit_move_insn (scratch, mem); 18686 emit_insn ((*ix86_gen_add3) (out, out, GEN_INT (4))); 18687 18688 /* This formula yields a nonzero result iff one of the bytes is zero. 18689 This saves three branches inside loop and many cycles. */ 18690 18691 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101))); 18692 emit_insn (gen_one_cmplsi2 (scratch, scratch)); 18693 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch)); 18694 emit_insn (gen_andsi3 (tmpreg, tmpreg, 18695 gen_int_mode (0x80808080, SImode))); 18696 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1, 18697 align_4_label); 18698 18699 if (TARGET_CMOVE) 18700 { 18701 rtx reg = gen_reg_rtx (SImode); 18702 rtx reg2 = gen_reg_rtx (Pmode); 18703 emit_move_insn (reg, tmpreg); 18704 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16))); 18705 18706 /* If zero is not in the first two bytes, move two bytes forward. */ 18707 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080))); 18708 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG); 18709 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx); 18710 emit_insn (gen_rtx_SET (VOIDmode, tmpreg, 18711 gen_rtx_IF_THEN_ELSE (SImode, tmp, 18712 reg, 18713 tmpreg))); 18714 /* Emit lea manually to avoid clobbering of flags. */ 18715 emit_insn (gen_rtx_SET (SImode, reg2, 18716 gen_rtx_PLUS (Pmode, out, const2_rtx))); 18717 18718 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG); 18719 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx); 18720 emit_insn (gen_rtx_SET (VOIDmode, out, 18721 gen_rtx_IF_THEN_ELSE (Pmode, tmp, 18722 reg2, 18723 out))); 18724 18725 } 18726 else 18727 { 18728 rtx end_2_label = gen_label_rtx (); 18729 /* Is zero in the first two bytes? */ 18730 18731 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080))); 18732 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG); 18733 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx); 18734 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp, 18735 gen_rtx_LABEL_REF (VOIDmode, end_2_label), 18736 pc_rtx); 18737 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp)); 18738 JUMP_LABEL (tmp) = end_2_label; 18739 18740 /* Not in the first two. Move two bytes forward. */ 18741 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16))); 18742 emit_insn ((*ix86_gen_add3) (out, out, const2_rtx)); 18743 18744 emit_label (end_2_label); 18745 18746 } 18747 18748 /* Avoid branch in fixing the byte. */ 18749 tmpreg = gen_lowpart (QImode, tmpreg); 18750 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg)); 18751 cmp = gen_rtx_LTU (Pmode, gen_rtx_REG (CCmode, FLAGS_REG), const0_rtx); 18752 emit_insn ((*ix86_gen_sub3_carry) (out, out, GEN_INT (3), cmp)); 18753 18754 emit_label (end_0_label); 18755 } 18756 18757 /* Expand strlen. */ 18758 18759 int 18760 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align) 18761 { 18762 rtx addr, scratch1, scratch2, scratch3, scratch4; 18763 18764 /* The generic case of strlen expander is long. Avoid it's 18765 expanding unless TARGET_INLINE_ALL_STRINGOPS. */ 18766 18767 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1 18768 && !TARGET_INLINE_ALL_STRINGOPS 18769 && !optimize_insn_for_size_p () 18770 && (!CONST_INT_P (align) || INTVAL (align) < 4)) 18771 return 0; 18772 18773 addr = force_reg (Pmode, XEXP (src, 0)); 18774 scratch1 = gen_reg_rtx (Pmode); 18775 18776 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1 18777 && !optimize_insn_for_size_p ()) 18778 { 18779 /* Well it seems that some optimizer does not combine a call like 18780 foo(strlen(bar), strlen(bar)); 18781 when the move and the subtraction is done here. It does calculate 18782 the length just once when these instructions are done inside of 18783 output_strlen_unroll(). But I think since &bar[strlen(bar)] is 18784 often used and I use one fewer register for the lifetime of 18785 output_strlen_unroll() this is better. */ 18786 18787 emit_move_insn (out, addr); 18788 18789 ix86_expand_strlensi_unroll_1 (out, src, align); 18790 18791 /* strlensi_unroll_1 returns the address of the zero at the end of 18792 the string, like memchr(), so compute the length by subtracting 18793 the start address. */ 18794 emit_insn ((*ix86_gen_sub3) (out, out, addr)); 18795 } 18796 else 18797 { 18798 rtx unspec; 18799 18800 /* Can't use this if the user has appropriated eax, ecx, or edi. */ 18801 if (fixed_regs[AX_REG] || fixed_regs[CX_REG] || fixed_regs[DI_REG]) 18802 return false; 18803 18804 scratch2 = gen_reg_rtx (Pmode); 18805 scratch3 = gen_reg_rtx (Pmode); 18806 scratch4 = force_reg (Pmode, constm1_rtx); 18807 18808 emit_move_insn (scratch3, addr); 18809 eoschar = force_reg (QImode, eoschar); 18810 18811 src = replace_equiv_address_nv (src, scratch3); 18812 18813 /* If .md starts supporting :P, this can be done in .md. */ 18814 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align, 18815 scratch4), UNSPEC_SCAS); 18816 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec)); 18817 emit_insn ((*ix86_gen_one_cmpl2) (scratch2, scratch1)); 18818 emit_insn ((*ix86_gen_add3) (out, scratch2, constm1_rtx)); 18819 } 18820 return 1; 18821 } 18822 18823 /* For given symbol (function) construct code to compute address of it's PLT 18824 entry in large x86-64 PIC model. */ 18825 rtx 18826 construct_plt_address (rtx symbol) 18827 { 18828 rtx tmp = gen_reg_rtx (Pmode); 18829 rtx unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, symbol), UNSPEC_PLTOFF); 18830 18831 gcc_assert (GET_CODE (symbol) == SYMBOL_REF); 18832 gcc_assert (ix86_cmodel == CM_LARGE_PIC); 18833 18834 emit_move_insn (tmp, gen_rtx_CONST (Pmode, unspec)); 18835 emit_insn (gen_adddi3 (tmp, tmp, pic_offset_table_rtx)); 18836 return tmp; 18837 } 18838 18839 void 18840 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1, 18841 rtx callarg2, 18842 rtx pop, int sibcall) 18843 { 18844 rtx use = NULL, call; 18845 18846 if (pop == const0_rtx) 18847 pop = NULL; 18848 gcc_assert (!TARGET_64BIT || !pop); 18849 18850 if (TARGET_MACHO && !TARGET_64BIT) 18851 { 18852 #if TARGET_MACHO 18853 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF) 18854 fnaddr = machopic_indirect_call_target (fnaddr); 18855 #endif 18856 } 18857 else 18858 { 18859 /* Static functions and indirect calls don't need the pic register. */ 18860 if (flag_pic && (!TARGET_64BIT || ix86_cmodel == CM_LARGE_PIC) 18861 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF 18862 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0))) 18863 use_reg (&use, pic_offset_table_rtx); 18864 } 18865 18866 if (TARGET_64BIT && INTVAL (callarg2) >= 0) 18867 { 18868 rtx al = gen_rtx_REG (QImode, AX_REG); 18869 emit_move_insn (al, callarg2); 18870 use_reg (&use, al); 18871 } 18872 18873 if (ix86_cmodel == CM_LARGE_PIC 18874 && GET_CODE (fnaddr) == MEM 18875 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF 18876 && !local_symbolic_operand (XEXP (fnaddr, 0), VOIDmode)) 18877 fnaddr = gen_rtx_MEM (QImode, construct_plt_address (XEXP (fnaddr, 0))); 18878 else if (sibcall 18879 ? !sibcall_insn_operand (XEXP (fnaddr, 0), Pmode) 18880 : !call_insn_operand (XEXP (fnaddr, 0), Pmode)) 18881 { 18882 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0)); 18883 fnaddr = gen_rtx_MEM (QImode, fnaddr); 18884 } 18885 18886 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1); 18887 if (retval) 18888 call = gen_rtx_SET (VOIDmode, retval, call); 18889 if (pop) 18890 { 18891 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop); 18892 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop); 18893 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop)); 18894 } 18895 if (TARGET_64BIT 18896 && ix86_cfun_abi () == MS_ABI 18897 && (!callarg2 || INTVAL (callarg2) != -2)) 18898 { 18899 /* We need to represent that SI and DI registers are clobbered 18900 by SYSV calls. */ 18901 static int clobbered_registers[] = { 18902 XMM6_REG, XMM7_REG, XMM8_REG, 18903 XMM9_REG, XMM10_REG, XMM11_REG, 18904 XMM12_REG, XMM13_REG, XMM14_REG, 18905 XMM15_REG, SI_REG, DI_REG 18906 }; 18907 unsigned int i; 18908 rtx vec[ARRAY_SIZE (clobbered_registers) + 2]; 18909 rtx unspec = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, const0_rtx), 18910 UNSPEC_MS_TO_SYSV_CALL); 18911 18912 vec[0] = call; 18913 vec[1] = unspec; 18914 for (i = 0; i < ARRAY_SIZE (clobbered_registers); i++) 18915 vec[i + 2] = gen_rtx_CLOBBER (SSE_REGNO_P (clobbered_registers[i]) 18916 ? TImode : DImode, 18917 gen_rtx_REG 18918 (SSE_REGNO_P (clobbered_registers[i]) 18919 ? TImode : DImode, 18920 clobbered_registers[i])); 18921 18922 call = gen_rtx_PARALLEL (VOIDmode, 18923 gen_rtvec_v (ARRAY_SIZE (clobbered_registers) 18924 + 2, vec)); 18925 } 18926 18927 call = emit_call_insn (call); 18928 if (use) 18929 CALL_INSN_FUNCTION_USAGE (call) = use; 18930 } 18931 18932 18933 /* Clear stack slot assignments remembered from previous functions. 18934 This is called from INIT_EXPANDERS once before RTL is emitted for each 18935 function. */ 18936 18937 static struct machine_function * 18938 ix86_init_machine_status (void) 18939 { 18940 struct machine_function *f; 18941 18942 f = GGC_CNEW (struct machine_function); 18943 f->use_fast_prologue_epilogue_nregs = -1; 18944 f->tls_descriptor_call_expanded_p = 0; 18945 f->call_abi = DEFAULT_ABI; 18946 18947 return f; 18948 } 18949 18950 /* Return a MEM corresponding to a stack slot with mode MODE. 18951 Allocate a new slot if necessary. 18952 18953 The RTL for a function can have several slots available: N is 18954 which slot to use. */ 18955 18956 rtx 18957 assign_386_stack_local (enum machine_mode mode, enum ix86_stack_slot n) 18958 { 18959 struct stack_local_entry *s; 18960 18961 gcc_assert (n < MAX_386_STACK_LOCALS); 18962 18963 /* Virtual slot is valid only before vregs are instantiated. */ 18964 gcc_assert ((n == SLOT_VIRTUAL) == !virtuals_instantiated); 18965 18966 for (s = ix86_stack_locals; s; s = s->next) 18967 if (s->mode == mode && s->n == n) 18968 return copy_rtx (s->rtl); 18969 18970 s = (struct stack_local_entry *) 18971 ggc_alloc (sizeof (struct stack_local_entry)); 18972 s->n = n; 18973 s->mode = mode; 18974 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0); 18975 18976 s->next = ix86_stack_locals; 18977 ix86_stack_locals = s; 18978 return s->rtl; 18979 } 18980 18981 /* Construct the SYMBOL_REF for the tls_get_addr function. */ 18982 18983 static GTY(()) rtx ix86_tls_symbol; 18984 rtx 18985 ix86_tls_get_addr (void) 18986 { 18987 18988 if (!ix86_tls_symbol) 18989 { 18990 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, 18991 (TARGET_ANY_GNU_TLS 18992 && !TARGET_64BIT) 18993 ? "___tls_get_addr" 18994 : "__tls_get_addr"); 18995 } 18996 18997 return ix86_tls_symbol; 18998 } 18999 19000 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */ 19001 19002 static GTY(()) rtx ix86_tls_module_base_symbol; 19003 rtx 19004 ix86_tls_module_base (void) 19005 { 19006 19007 if (!ix86_tls_module_base_symbol) 19008 { 19009 ix86_tls_module_base_symbol = gen_rtx_SYMBOL_REF (Pmode, 19010 "_TLS_MODULE_BASE_"); 19011 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol) 19012 |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT; 19013 } 19014 19015 return ix86_tls_module_base_symbol; 19016 } 19017 19018 /* Calculate the length of the memory address in the instruction 19019 encoding. Does not include the one-byte modrm, opcode, or prefix. */ 19020 19021 int 19022 memory_address_length (rtx addr) 19023 { 19024 struct ix86_address parts; 19025 rtx base, index, disp; 19026 int len; 19027 int ok; 19028 19029 if (GET_CODE (addr) == PRE_DEC 19030 || GET_CODE (addr) == POST_INC 19031 || GET_CODE (addr) == PRE_MODIFY 19032 || GET_CODE (addr) == POST_MODIFY) 19033 return 0; 19034 19035 ok = ix86_decompose_address (addr, &parts); 19036 gcc_assert (ok); 19037 19038 if (parts.base && GET_CODE (parts.base) == SUBREG) 19039 parts.base = SUBREG_REG (parts.base); 19040 if (parts.index && GET_CODE (parts.index) == SUBREG) 19041 parts.index = SUBREG_REG (parts.index); 19042 19043 base = parts.base; 19044 index = parts.index; 19045 disp = parts.disp; 19046 len = 0; 19047 19048 /* Rule of thumb: 19049 - esp as the base always wants an index, 19050 - ebp as the base always wants a displacement. */ 19051 19052 /* Register Indirect. */ 19053 if (base && !index && !disp) 19054 { 19055 /* esp (for its index) and ebp (for its displacement) need 19056 the two-byte modrm form. */ 19057 if (addr == stack_pointer_rtx 19058 || addr == arg_pointer_rtx 19059 || addr == frame_pointer_rtx 19060 || addr == hard_frame_pointer_rtx) 19061 len = 1; 19062 } 19063 19064 /* Direct Addressing. */ 19065 else if (disp && !base && !index) 19066 len = 4; 19067 19068 else 19069 { 19070 /* Find the length of the displacement constant. */ 19071 if (disp) 19072 { 19073 if (base && satisfies_constraint_K (disp)) 19074 len = 1; 19075 else 19076 len = 4; 19077 } 19078 /* ebp always wants a displacement. */ 19079 else if (base == hard_frame_pointer_rtx) 19080 len = 1; 19081 19082 /* An index requires the two-byte modrm form.... */ 19083 if (index 19084 /* ...like esp, which always wants an index. */ 19085 || base == stack_pointer_rtx 19086 || base == arg_pointer_rtx 19087 || base == frame_pointer_rtx) 19088 len += 1; 19089 } 19090 19091 return len; 19092 } 19093 19094 /* Compute default value for "length_immediate" attribute. When SHORTFORM 19095 is set, expect that insn have 8bit immediate alternative. */ 19096 int 19097 ix86_attr_length_immediate_default (rtx insn, int shortform) 19098 { 19099 int len = 0; 19100 int i; 19101 extract_insn_cached (insn); 19102 for (i = recog_data.n_operands - 1; i >= 0; --i) 19103 if (CONSTANT_P (recog_data.operand[i])) 19104 { 19105 gcc_assert (!len); 19106 if (shortform && satisfies_constraint_K (recog_data.operand[i])) 19107 len = 1; 19108 else 19109 { 19110 switch (get_attr_mode (insn)) 19111 { 19112 case MODE_QI: 19113 len+=1; 19114 break; 19115 case MODE_HI: 19116 len+=2; 19117 break; 19118 case MODE_SI: 19119 len+=4; 19120 break; 19121 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */ 19122 case MODE_DI: 19123 len+=4; 19124 break; 19125 default: 19126 fatal_insn ("unknown insn mode", insn); 19127 } 19128 } 19129 } 19130 return len; 19131 } 19132 /* Compute default value for "length_address" attribute. */ 19133 int 19134 ix86_attr_length_address_default (rtx insn) 19135 { 19136 int i; 19137 19138 if (get_attr_type (insn) == TYPE_LEA) 19139 { 19140 rtx set = PATTERN (insn); 19141 19142 if (GET_CODE (set) == PARALLEL) 19143 set = XVECEXP (set, 0, 0); 19144 19145 gcc_assert (GET_CODE (set) == SET); 19146 19147 return memory_address_length (SET_SRC (set)); 19148 } 19149 19150 extract_insn_cached (insn); 19151 for (i = recog_data.n_operands - 1; i >= 0; --i) 19152 if (MEM_P (recog_data.operand[i])) 19153 { 19154 return memory_address_length (XEXP (recog_data.operand[i], 0)); 19155 break; 19156 } 19157 return 0; 19158 } 19159 19160 /* Compute default value for "length_vex" attribute. It includes 19161 2 or 3 byte VEX prefix and 1 opcode byte. */ 19162 19163 int 19164 ix86_attr_length_vex_default (rtx insn, int has_0f_opcode, 19165 int has_vex_w) 19166 { 19167 int i; 19168 19169 /* Only 0f opcode can use 2 byte VEX prefix and VEX W bit uses 3 19170 byte VEX prefix. */ 19171 if (!has_0f_opcode || has_vex_w) 19172 return 3 + 1; 19173 19174 /* We can always use 2 byte VEX prefix in 32bit. */ 19175 if (!TARGET_64BIT) 19176 return 2 + 1; 19177 19178 extract_insn_cached (insn); 19179 19180 for (i = recog_data.n_operands - 1; i >= 0; --i) 19181 if (REG_P (recog_data.operand[i])) 19182 { 19183 /* REX.W bit uses 3 byte VEX prefix. */ 19184 if (GET_MODE (recog_data.operand[i]) == DImode) 19185 return 3 + 1; 19186 } 19187 else 19188 { 19189 /* REX.X or REX.B bits use 3 byte VEX prefix. */ 19190 if (MEM_P (recog_data.operand[i]) 19191 && x86_extended_reg_mentioned_p (recog_data.operand[i])) 19192 return 3 + 1; 19193 } 19194 19195 return 2 + 1; 19196 } 19197 19198 /* Return the maximum number of instructions a cpu can issue. */ 19199 19200 static int 19201 ix86_issue_rate (void) 19202 { 19203 switch (ix86_tune) 19204 { 19205 case PROCESSOR_PENTIUM: 19206 case PROCESSOR_K6: 19207 return 2; 19208 19209 case PROCESSOR_PENTIUMPRO: 19210 case PROCESSOR_PENTIUM4: 19211 case PROCESSOR_ATHLON: 19212 case PROCESSOR_K8: 19213 case PROCESSOR_AMDFAM10: 19214 case PROCESSOR_NOCONA: 19215 case PROCESSOR_GENERIC32: 19216 case PROCESSOR_GENERIC64: 19217 return 3; 19218 19219 case PROCESSOR_CORE2: 19220 return 4; 19221 19222 default: 19223 return 1; 19224 } 19225 } 19226 19227 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set 19228 by DEP_INSN and nothing set by DEP_INSN. */ 19229 19230 static int 19231 ix86_flags_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type) 19232 { 19233 rtx set, set2; 19234 19235 /* Simplify the test for uninteresting insns. */ 19236 if (insn_type != TYPE_SETCC 19237 && insn_type != TYPE_ICMOV 19238 && insn_type != TYPE_FCMOV 19239 && insn_type != TYPE_IBR) 19240 return 0; 19241 19242 if ((set = single_set (dep_insn)) != 0) 19243 { 19244 set = SET_DEST (set); 19245 set2 = NULL_RTX; 19246 } 19247 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL 19248 && XVECLEN (PATTERN (dep_insn), 0) == 2 19249 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET 19250 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET) 19251 { 19252 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0)); 19253 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0)); 19254 } 19255 else 19256 return 0; 19257 19258 if (!REG_P (set) || REGNO (set) != FLAGS_REG) 19259 return 0; 19260 19261 /* This test is true if the dependent insn reads the flags but 19262 not any other potentially set register. */ 19263 if (!reg_overlap_mentioned_p (set, PATTERN (insn))) 19264 return 0; 19265 19266 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn))) 19267 return 0; 19268 19269 return 1; 19270 } 19271 19272 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory 19273 address with operands set by DEP_INSN. */ 19274 19275 static int 19276 ix86_agi_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type) 19277 { 19278 rtx addr; 19279 19280 if (insn_type == TYPE_LEA 19281 && TARGET_PENTIUM) 19282 { 19283 addr = PATTERN (insn); 19284 19285 if (GET_CODE (addr) == PARALLEL) 19286 addr = XVECEXP (addr, 0, 0); 19287 19288 gcc_assert (GET_CODE (addr) == SET); 19289 19290 addr = SET_SRC (addr); 19291 } 19292 else 19293 { 19294 int i; 19295 extract_insn_cached (insn); 19296 for (i = recog_data.n_operands - 1; i >= 0; --i) 19297 if (MEM_P (recog_data.operand[i])) 19298 { 19299 addr = XEXP (recog_data.operand[i], 0); 19300 goto found; 19301 } 19302 return 0; 19303 found:; 19304 } 19305 19306 return modified_in_p (addr, dep_insn); 19307 } 19308 19309 static int 19310 ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost) 19311 { 19312 enum attr_type insn_type, dep_insn_type; 19313 enum attr_memory memory; 19314 rtx set, set2; 19315 int dep_insn_code_number; 19316 19317 /* Anti and output dependencies have zero cost on all CPUs. */ 19318 if (REG_NOTE_KIND (link) != 0) 19319 return 0; 19320 19321 dep_insn_code_number = recog_memoized (dep_insn); 19322 19323 /* If we can't recognize the insns, we can't really do anything. */ 19324 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0) 19325 return cost; 19326 19327 insn_type = get_attr_type (insn); 19328 dep_insn_type = get_attr_type (dep_insn); 19329 19330 switch (ix86_tune) 19331 { 19332 case PROCESSOR_PENTIUM: 19333 /* Address Generation Interlock adds a cycle of latency. */ 19334 if (ix86_agi_dependent (insn, dep_insn, insn_type)) 19335 cost += 1; 19336 19337 /* ??? Compares pair with jump/setcc. */ 19338 if (ix86_flags_dependent (insn, dep_insn, insn_type)) 19339 cost = 0; 19340 19341 /* Floating point stores require value to be ready one cycle earlier. */ 19342 if (insn_type == TYPE_FMOV 19343 && get_attr_memory (insn) == MEMORY_STORE 19344 && !ix86_agi_dependent (insn, dep_insn, insn_type)) 19345 cost += 1; 19346 break; 19347 19348 case PROCESSOR_PENTIUMPRO: 19349 memory = get_attr_memory (insn); 19350 19351 /* INT->FP conversion is expensive. */ 19352 if (get_attr_fp_int_src (dep_insn)) 19353 cost += 5; 19354 19355 /* There is one cycle extra latency between an FP op and a store. */ 19356 if (insn_type == TYPE_FMOV 19357 && (set = single_set (dep_insn)) != NULL_RTX 19358 && (set2 = single_set (insn)) != NULL_RTX 19359 && rtx_equal_p (SET_DEST (set), SET_SRC (set2)) 19360 && MEM_P (SET_DEST (set2))) 19361 cost += 1; 19362 19363 /* Show ability of reorder buffer to hide latency of load by executing 19364 in parallel with previous instruction in case 19365 previous instruction is not needed to compute the address. */ 19366 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH) 19367 && !ix86_agi_dependent (insn, dep_insn, insn_type)) 19368 { 19369 /* Claim moves to take one cycle, as core can issue one load 19370 at time and the next load can start cycle later. */ 19371 if (dep_insn_type == TYPE_IMOV 19372 || dep_insn_type == TYPE_FMOV) 19373 cost = 1; 19374 else if (cost > 1) 19375 cost--; 19376 } 19377 break; 19378 19379 case PROCESSOR_K6: 19380 memory = get_attr_memory (insn); 19381 19382 /* The esp dependency is resolved before the instruction is really 19383 finished. */ 19384 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP) 19385 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP)) 19386 return 1; 19387 19388 /* INT->FP conversion is expensive. */ 19389 if (get_attr_fp_int_src (dep_insn)) 19390 cost += 5; 19391 19392 /* Show ability of reorder buffer to hide latency of load by executing 19393 in parallel with previous instruction in case 19394 previous instruction is not needed to compute the address. */ 19395 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH) 19396 && !ix86_agi_dependent (insn, dep_insn, insn_type)) 19397 { 19398 /* Claim moves to take one cycle, as core can issue one load 19399 at time and the next load can start cycle later. */ 19400 if (dep_insn_type == TYPE_IMOV 19401 || dep_insn_type == TYPE_FMOV) 19402 cost = 1; 19403 else if (cost > 2) 19404 cost -= 2; 19405 else 19406 cost = 1; 19407 } 19408 break; 19409 19410 case PROCESSOR_ATHLON: 19411 case PROCESSOR_K8: 19412 case PROCESSOR_AMDFAM10: 19413 case PROCESSOR_GENERIC32: 19414 case PROCESSOR_GENERIC64: 19415 memory = get_attr_memory (insn); 19416 19417 /* Show ability of reorder buffer to hide latency of load by executing 19418 in parallel with previous instruction in case 19419 previous instruction is not needed to compute the address. */ 19420 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH) 19421 && !ix86_agi_dependent (insn, dep_insn, insn_type)) 19422 { 19423 enum attr_unit unit = get_attr_unit (insn); 19424 int loadcost = 3; 19425 19426 /* Because of the difference between the length of integer and 19427 floating unit pipeline preparation stages, the memory operands 19428 for floating point are cheaper. 19429 19430 ??? For Athlon it the difference is most probably 2. */ 19431 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN) 19432 loadcost = 3; 19433 else 19434 loadcost = TARGET_ATHLON ? 2 : 0; 19435 19436 if (cost >= loadcost) 19437 cost -= loadcost; 19438 else 19439 cost = 0; 19440 } 19441 19442 default: 19443 break; 19444 } 19445 19446 return cost; 19447 } 19448 19449 /* How many alternative schedules to try. This should be as wide as the 19450 scheduling freedom in the DFA, but no wider. Making this value too 19451 large results extra work for the scheduler. */ 19452 19453 static int 19454 ia32_multipass_dfa_lookahead (void) 19455 { 19456 switch (ix86_tune) 19457 { 19458 case PROCESSOR_PENTIUM: 19459 return 2; 19460 19461 case PROCESSOR_PENTIUMPRO: 19462 case PROCESSOR_K6: 19463 return 1; 19464 19465 default: 19466 return 0; 19467 } 19468 } 19469 19470 19471 /* Compute the alignment given to a constant that is being placed in memory. 19472 EXP is the constant and ALIGN is the alignment that the object would 19473 ordinarily have. 19474 The value of this function is used instead of that alignment to align 19475 the object. */ 19476 19477 int 19478 ix86_constant_alignment (tree exp, int align) 19479 { 19480 if (TREE_CODE (exp) == REAL_CST || TREE_CODE (exp) == VECTOR_CST 19481 || TREE_CODE (exp) == INTEGER_CST) 19482 { 19483 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64) 19484 return 64; 19485 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128) 19486 return 128; 19487 } 19488 else if (!optimize_size && TREE_CODE (exp) == STRING_CST 19489 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD) 19490 return BITS_PER_WORD; 19491 19492 return align; 19493 } 19494 19495 /* Compute the alignment for a static variable. 19496 TYPE is the data type, and ALIGN is the alignment that 19497 the object would ordinarily have. The value of this function is used 19498 instead of that alignment to align the object. */ 19499 19500 int 19501 ix86_data_alignment (tree type, int align) 19502 { 19503 int max_align = optimize_size ? BITS_PER_WORD : MIN (256, MAX_OFILE_ALIGNMENT); 19504 19505 if (AGGREGATE_TYPE_P (type) 19506 && TYPE_SIZE (type) 19507 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST 19508 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= (unsigned) max_align 19509 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) 19510 && align < max_align) 19511 align = max_align; 19512 19513 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned 19514 to 16byte boundary. */ 19515 if (TARGET_64BIT) 19516 { 19517 if (AGGREGATE_TYPE_P (type) 19518 && TYPE_SIZE (type) 19519 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST 19520 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128 19521 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128) 19522 return 128; 19523 } 19524 19525 if (TREE_CODE (type) == ARRAY_TYPE) 19526 { 19527 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64) 19528 return 64; 19529 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128) 19530 return 128; 19531 } 19532 else if (TREE_CODE (type) == COMPLEX_TYPE) 19533 { 19534 19535 if (TYPE_MODE (type) == DCmode && align < 64) 19536 return 64; 19537 if ((TYPE_MODE (type) == XCmode 19538 || TYPE_MODE (type) == TCmode) && align < 128) 19539 return 128; 19540 } 19541 else if ((TREE_CODE (type) == RECORD_TYPE 19542 || TREE_CODE (type) == UNION_TYPE 19543 || TREE_CODE (type) == QUAL_UNION_TYPE) 19544 && TYPE_FIELDS (type)) 19545 { 19546 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64) 19547 return 64; 19548 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128) 19549 return 128; 19550 } 19551 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE 19552 || TREE_CODE (type) == INTEGER_TYPE) 19553 { 19554 if (TYPE_MODE (type) == DFmode && align < 64) 19555 return 64; 19556 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128) 19557 return 128; 19558 } 19559 19560 return align; 19561 } 19562 19563 /* Compute the alignment for a local variable or a stack slot. EXP is 19564 the data type or decl itself, MODE is the widest mode available and 19565 ALIGN is the alignment that the object would ordinarily have. The 19566 value of this macro is used instead of that alignment to align the 19567 object. */ 19568 19569 unsigned int 19570 ix86_local_alignment (tree exp, enum machine_mode mode, 19571 unsigned int align) 19572 { 19573 tree type, decl; 19574 19575 if (exp && DECL_P (exp)) 19576 { 19577 type = TREE_TYPE (exp); 19578 decl = exp; 19579 } 19580 else 19581 { 19582 type = exp; 19583 decl = NULL; 19584 } 19585 19586 /* Don't do dynamic stack realignment for long long objects with 19587 -mpreferred-stack-boundary=2. */ 19588 if (!TARGET_64BIT 19589 && align == 64 19590 && ix86_preferred_stack_boundary < 64 19591 && (mode == DImode || (type && TYPE_MODE (type) == DImode)) 19592 && (!type || !TYPE_USER_ALIGN (type)) 19593 && (!decl || !DECL_USER_ALIGN (decl))) 19594 align = 32; 19595 19596 /* If TYPE is NULL, we are allocating a stack slot for caller-save 19597 register in MODE. We will return the largest alignment of XF 19598 and DF. */ 19599 if (!type) 19600 { 19601 if (mode == XFmode && align < GET_MODE_ALIGNMENT (DFmode)) 19602 align = GET_MODE_ALIGNMENT (DFmode); 19603 return align; 19604 } 19605 19606 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned 19607 to 16byte boundary. */ 19608 if (TARGET_64BIT) 19609 { 19610 if (AGGREGATE_TYPE_P (type) 19611 && TYPE_SIZE (type) 19612 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST 19613 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16 19614 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128) 19615 return 128; 19616 } 19617 if (TREE_CODE (type) == ARRAY_TYPE) 19618 { 19619 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64) 19620 return 64; 19621 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128) 19622 return 128; 19623 } 19624 else if (TREE_CODE (type) == COMPLEX_TYPE) 19625 { 19626 if (TYPE_MODE (type) == DCmode && align < 64) 19627 return 64; 19628 if ((TYPE_MODE (type) == XCmode 19629 || TYPE_MODE (type) == TCmode) && align < 128) 19630 return 128; 19631 } 19632 else if ((TREE_CODE (type) == RECORD_TYPE 19633 || TREE_CODE (type) == UNION_TYPE 19634 || TREE_CODE (type) == QUAL_UNION_TYPE) 19635 && TYPE_FIELDS (type)) 19636 { 19637 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64) 19638 return 64; 19639 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128) 19640 return 128; 19641 } 19642 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE 19643 || TREE_CODE (type) == INTEGER_TYPE) 19644 { 19645 19646 if (TYPE_MODE (type) == DFmode && align < 64) 19647 return 64; 19648 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128) 19649 return 128; 19650 } 19651 return align; 19652 } 19653 19654 /* Compute the minimum required alignment for dynamic stack realignment 19655 purposes for a local variable, parameter or a stack slot. EXP is 19656 the data type or decl itself, MODE is its mode and ALIGN is the 19657 alignment that the object would ordinarily have. */ 19658 19659 unsigned int 19660 ix86_minimum_alignment (tree exp, enum machine_mode mode, 19661 unsigned int align) 19662 { 19663 tree type, decl; 19664 19665 if (TARGET_64BIT || align != 64 || ix86_preferred_stack_boundary >= 64) 19666 return align; 19667 19668 if (exp && DECL_P (exp)) 19669 { 19670 type = TREE_TYPE (exp); 19671 decl = exp; 19672 } 19673 else 19674 { 19675 type = exp; 19676 decl = NULL; 19677 } 19678 19679 /* Don't do dynamic stack realignment for long long objects with 19680 -mpreferred-stack-boundary=2. */ 19681 if ((mode == DImode || (type && TYPE_MODE (type) == DImode)) 19682 && (!type || !TYPE_USER_ALIGN (type)) 19683 && (!decl || !DECL_USER_ALIGN (decl))) 19684 return 32; 19685 19686 return align; 19687 } 19688 19689 /* Emit RTL insns to initialize the variable parts of a trampoline. 19690 FNADDR is an RTX for the address of the function's pure code. 19691 CXT is an RTX for the static chain value for the function. */ 19692 void 19693 x86_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt) 19694 { 19695 if (!TARGET_64BIT) 19696 { 19697 /* Compute offset from the end of the jmp to the target function. */ 19698 rtx disp = expand_binop (SImode, sub_optab, fnaddr, 19699 plus_constant (tramp, 10), 19700 NULL_RTX, 1, OPTAB_DIRECT); 19701 emit_move_insn (gen_rtx_MEM (QImode, tramp), 19702 gen_int_mode (0xb9, QImode)); 19703 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt); 19704 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)), 19705 gen_int_mode (0xe9, QImode)); 19706 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp); 19707 } 19708 else 19709 { 19710 int offset = 0; 19711 /* Try to load address using shorter movl instead of movabs. 19712 We may want to support movq for kernel mode, but kernel does not use 19713 trampolines at the moment. */ 19714 if (x86_64_zext_immediate_operand (fnaddr, VOIDmode)) 19715 { 19716 fnaddr = copy_to_mode_reg (DImode, fnaddr); 19717 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)), 19718 gen_int_mode (0xbb41, HImode)); 19719 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)), 19720 gen_lowpart (SImode, fnaddr)); 19721 offset += 6; 19722 } 19723 else 19724 { 19725 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)), 19726 gen_int_mode (0xbb49, HImode)); 19727 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)), 19728 fnaddr); 19729 offset += 10; 19730 } 19731 /* Load static chain using movabs to r10. */ 19732 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)), 19733 gen_int_mode (0xba49, HImode)); 19734 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)), 19735 cxt); 19736 offset += 10; 19737 /* Jump to the r11 */ 19738 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)), 19739 gen_int_mode (0xff49, HImode)); 19740 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)), 19741 gen_int_mode (0xe3, QImode)); 19742 offset += 3; 19743 gcc_assert (offset <= TRAMPOLINE_SIZE); 19744 } 19745 19746 #ifdef ENABLE_EXECUTE_STACK 19747 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"), 19748 LCT_NORMAL, VOIDmode, 1, tramp, Pmode); 19749 #endif 19750 } 19751 19752 /* Codes for all the SSE/MMX builtins. */ 19753 enum ix86_builtins 19754 { 19755 IX86_BUILTIN_ADDPS, 19756 IX86_BUILTIN_ADDSS, 19757 IX86_BUILTIN_DIVPS, 19758 IX86_BUILTIN_DIVSS, 19759 IX86_BUILTIN_MULPS, 19760 IX86_BUILTIN_MULSS, 19761 IX86_BUILTIN_SUBPS, 19762 IX86_BUILTIN_SUBSS, 19763 19764 IX86_BUILTIN_CMPEQPS, 19765 IX86_BUILTIN_CMPLTPS, 19766 IX86_BUILTIN_CMPLEPS, 19767 IX86_BUILTIN_CMPGTPS, 19768 IX86_BUILTIN_CMPGEPS, 19769 IX86_BUILTIN_CMPNEQPS, 19770 IX86_BUILTIN_CMPNLTPS, 19771 IX86_BUILTIN_CMPNLEPS, 19772 IX86_BUILTIN_CMPNGTPS, 19773 IX86_BUILTIN_CMPNGEPS, 19774 IX86_BUILTIN_CMPORDPS, 19775 IX86_BUILTIN_CMPUNORDPS, 19776 IX86_BUILTIN_CMPEQSS, 19777 IX86_BUILTIN_CMPLTSS, 19778 IX86_BUILTIN_CMPLESS, 19779 IX86_BUILTIN_CMPNEQSS, 19780 IX86_BUILTIN_CMPNLTSS, 19781 IX86_BUILTIN_CMPNLESS, 19782 IX86_BUILTIN_CMPNGTSS, 19783 IX86_BUILTIN_CMPNGESS, 19784 IX86_BUILTIN_CMPORDSS, 19785 IX86_BUILTIN_CMPUNORDSS, 19786 19787 IX86_BUILTIN_COMIEQSS, 19788 IX86_BUILTIN_COMILTSS, 19789 IX86_BUILTIN_COMILESS, 19790 IX86_BUILTIN_COMIGTSS, 19791 IX86_BUILTIN_COMIGESS, 19792 IX86_BUILTIN_COMINEQSS, 19793 IX86_BUILTIN_UCOMIEQSS, 19794 IX86_BUILTIN_UCOMILTSS, 19795 IX86_BUILTIN_UCOMILESS, 19796 IX86_BUILTIN_UCOMIGTSS, 19797 IX86_BUILTIN_UCOMIGESS, 19798 IX86_BUILTIN_UCOMINEQSS, 19799 19800 IX86_BUILTIN_CVTPI2PS, 19801 IX86_BUILTIN_CVTPS2PI, 19802 IX86_BUILTIN_CVTSI2SS, 19803 IX86_BUILTIN_CVTSI642SS, 19804 IX86_BUILTIN_CVTSS2SI, 19805 IX86_BUILTIN_CVTSS2SI64, 19806 IX86_BUILTIN_CVTTPS2PI, 19807 IX86_BUILTIN_CVTTSS2SI, 19808 IX86_BUILTIN_CVTTSS2SI64, 19809 19810 IX86_BUILTIN_MAXPS, 19811 IX86_BUILTIN_MAXSS, 19812 IX86_BUILTIN_MINPS, 19813 IX86_BUILTIN_MINSS, 19814 19815 IX86_BUILTIN_LOADUPS, 19816 IX86_BUILTIN_STOREUPS, 19817 IX86_BUILTIN_MOVSS, 19818 19819 IX86_BUILTIN_MOVHLPS, 19820 IX86_BUILTIN_MOVLHPS, 19821 IX86_BUILTIN_LOADHPS, 19822 IX86_BUILTIN_LOADLPS, 19823 IX86_BUILTIN_STOREHPS, 19824 IX86_BUILTIN_STORELPS, 19825 19826 IX86_BUILTIN_MASKMOVQ, 19827 IX86_BUILTIN_MOVMSKPS, 19828 IX86_BUILTIN_PMOVMSKB, 19829 19830 IX86_BUILTIN_MOVNTPS, 19831 IX86_BUILTIN_MOVNTQ, 19832 19833 IX86_BUILTIN_LOADDQU, 19834 IX86_BUILTIN_STOREDQU, 19835 19836 IX86_BUILTIN_PACKSSWB, 19837 IX86_BUILTIN_PACKSSDW, 19838 IX86_BUILTIN_PACKUSWB, 19839 19840 IX86_BUILTIN_PADDB, 19841 IX86_BUILTIN_PADDW, 19842 IX86_BUILTIN_PADDD, 19843 IX86_BUILTIN_PADDQ, 19844 IX86_BUILTIN_PADDSB, 19845 IX86_BUILTIN_PADDSW, 19846 IX86_BUILTIN_PADDUSB, 19847 IX86_BUILTIN_PADDUSW, 19848 IX86_BUILTIN_PSUBB, 19849 IX86_BUILTIN_PSUBW, 19850 IX86_BUILTIN_PSUBD, 19851 IX86_BUILTIN_PSUBQ, 19852 IX86_BUILTIN_PSUBSB, 19853 IX86_BUILTIN_PSUBSW, 19854 IX86_BUILTIN_PSUBUSB, 19855 IX86_BUILTIN_PSUBUSW, 19856 19857 IX86_BUILTIN_PAND, 19858 IX86_BUILTIN_PANDN, 19859 IX86_BUILTIN_POR, 19860 IX86_BUILTIN_PXOR, 19861 19862 IX86_BUILTIN_PAVGB, 19863 IX86_BUILTIN_PAVGW, 19864 19865 IX86_BUILTIN_PCMPEQB, 19866 IX86_BUILTIN_PCMPEQW, 19867 IX86_BUILTIN_PCMPEQD, 19868 IX86_BUILTIN_PCMPGTB, 19869 IX86_BUILTIN_PCMPGTW, 19870 IX86_BUILTIN_PCMPGTD, 19871 19872 IX86_BUILTIN_PMADDWD, 19873 19874 IX86_BUILTIN_PMAXSW, 19875 IX86_BUILTIN_PMAXUB, 19876 IX86_BUILTIN_PMINSW, 19877 IX86_BUILTIN_PMINUB, 19878 19879 IX86_BUILTIN_PMULHUW, 19880 IX86_BUILTIN_PMULHW, 19881 IX86_BUILTIN_PMULLW, 19882 19883 IX86_BUILTIN_PSADBW, 19884 IX86_BUILTIN_PSHUFW, 19885 19886 IX86_BUILTIN_PSLLW, 19887 IX86_BUILTIN_PSLLD, 19888 IX86_BUILTIN_PSLLQ, 19889 IX86_BUILTIN_PSRAW, 19890 IX86_BUILTIN_PSRAD, 19891 IX86_BUILTIN_PSRLW, 19892 IX86_BUILTIN_PSRLD, 19893 IX86_BUILTIN_PSRLQ, 19894 IX86_BUILTIN_PSLLWI, 19895 IX86_BUILTIN_PSLLDI, 19896 IX86_BUILTIN_PSLLQI, 19897 IX86_BUILTIN_PSRAWI, 19898 IX86_BUILTIN_PSRADI, 19899 IX86_BUILTIN_PSRLWI, 19900 IX86_BUILTIN_PSRLDI, 19901 IX86_BUILTIN_PSRLQI, 19902 19903 IX86_BUILTIN_PUNPCKHBW, 19904 IX86_BUILTIN_PUNPCKHWD, 19905 IX86_BUILTIN_PUNPCKHDQ, 19906 IX86_BUILTIN_PUNPCKLBW, 19907 IX86_BUILTIN_PUNPCKLWD, 19908 IX86_BUILTIN_PUNPCKLDQ, 19909 19910 IX86_BUILTIN_SHUFPS, 19911 19912 IX86_BUILTIN_RCPPS, 19913 IX86_BUILTIN_RCPSS, 19914 IX86_BUILTIN_RSQRTPS, 19915 IX86_BUILTIN_RSQRTPS_NR, 19916 IX86_BUILTIN_RSQRTSS, 19917 IX86_BUILTIN_RSQRTF, 19918 IX86_BUILTIN_SQRTPS, 19919 IX86_BUILTIN_SQRTPS_NR, 19920 IX86_BUILTIN_SQRTSS, 19921 19922 IX86_BUILTIN_UNPCKHPS, 19923 IX86_BUILTIN_UNPCKLPS, 19924 19925 IX86_BUILTIN_ANDPS, 19926 IX86_BUILTIN_ANDNPS, 19927 IX86_BUILTIN_ORPS, 19928 IX86_BUILTIN_XORPS, 19929 19930 IX86_BUILTIN_EMMS, 19931 IX86_BUILTIN_LDMXCSR, 19932 IX86_BUILTIN_STMXCSR, 19933 IX86_BUILTIN_SFENCE, 19934 19935 /* 3DNow! Original */ 19936 IX86_BUILTIN_FEMMS, 19937 IX86_BUILTIN_PAVGUSB, 19938 IX86_BUILTIN_PF2ID, 19939 IX86_BUILTIN_PFACC, 19940 IX86_BUILTIN_PFADD, 19941 IX86_BUILTIN_PFCMPEQ, 19942 IX86_BUILTIN_PFCMPGE, 19943 IX86_BUILTIN_PFCMPGT, 19944 IX86_BUILTIN_PFMAX, 19945 IX86_BUILTIN_PFMIN, 19946 IX86_BUILTIN_PFMUL, 19947 IX86_BUILTIN_PFRCP, 19948 IX86_BUILTIN_PFRCPIT1, 19949 IX86_BUILTIN_PFRCPIT2, 19950 IX86_BUILTIN_PFRSQIT1, 19951 IX86_BUILTIN_PFRSQRT, 19952 IX86_BUILTIN_PFSUB, 19953 IX86_BUILTIN_PFSUBR, 19954 IX86_BUILTIN_PI2FD, 19955 IX86_BUILTIN_PMULHRW, 19956 19957 /* 3DNow! Athlon Extensions */ 19958 IX86_BUILTIN_PF2IW, 19959 IX86_BUILTIN_PFNACC, 19960 IX86_BUILTIN_PFPNACC, 19961 IX86_BUILTIN_PI2FW, 19962 IX86_BUILTIN_PSWAPDSI, 19963 IX86_BUILTIN_PSWAPDSF, 19964 19965 /* SSE2 */ 19966 IX86_BUILTIN_ADDPD, 19967 IX86_BUILTIN_ADDSD, 19968 IX86_BUILTIN_DIVPD, 19969 IX86_BUILTIN_DIVSD, 19970 IX86_BUILTIN_MULPD, 19971 IX86_BUILTIN_MULSD, 19972 IX86_BUILTIN_SUBPD, 19973 IX86_BUILTIN_SUBSD, 19974 19975 IX86_BUILTIN_CMPEQPD, 19976 IX86_BUILTIN_CMPLTPD, 19977 IX86_BUILTIN_CMPLEPD, 19978 IX86_BUILTIN_CMPGTPD, 19979 IX86_BUILTIN_CMPGEPD, 19980 IX86_BUILTIN_CMPNEQPD, 19981 IX86_BUILTIN_CMPNLTPD, 19982 IX86_BUILTIN_CMPNLEPD, 19983 IX86_BUILTIN_CMPNGTPD, 19984 IX86_BUILTIN_CMPNGEPD, 19985 IX86_BUILTIN_CMPORDPD, 19986 IX86_BUILTIN_CMPUNORDPD, 19987 IX86_BUILTIN_CMPEQSD, 19988 IX86_BUILTIN_CMPLTSD, 19989 IX86_BUILTIN_CMPLESD, 19990 IX86_BUILTIN_CMPNEQSD, 19991 IX86_BUILTIN_CMPNLTSD, 19992 IX86_BUILTIN_CMPNLESD, 19993 IX86_BUILTIN_CMPORDSD, 19994 IX86_BUILTIN_CMPUNORDSD, 19995 19996 IX86_BUILTIN_COMIEQSD, 19997 IX86_BUILTIN_COMILTSD, 19998 IX86_BUILTIN_COMILESD, 19999 IX86_BUILTIN_COMIGTSD, 20000 IX86_BUILTIN_COMIGESD, 20001 IX86_BUILTIN_COMINEQSD, 20002 IX86_BUILTIN_UCOMIEQSD, 20003 IX86_BUILTIN_UCOMILTSD, 20004 IX86_BUILTIN_UCOMILESD, 20005 IX86_BUILTIN_UCOMIGTSD, 20006 IX86_BUILTIN_UCOMIGESD, 20007 IX86_BUILTIN_UCOMINEQSD, 20008 20009 IX86_BUILTIN_MAXPD, 20010 IX86_BUILTIN_MAXSD, 20011 IX86_BUILTIN_MINPD, 20012 IX86_BUILTIN_MINSD, 20013 20014 IX86_BUILTIN_ANDPD, 20015 IX86_BUILTIN_ANDNPD, 20016 IX86_BUILTIN_ORPD, 20017 IX86_BUILTIN_XORPD, 20018 20019 IX86_BUILTIN_SQRTPD, 20020 IX86_BUILTIN_SQRTSD, 20021 20022 IX86_BUILTIN_UNPCKHPD, 20023 IX86_BUILTIN_UNPCKLPD, 20024 20025 IX86_BUILTIN_SHUFPD, 20026 20027 IX86_BUILTIN_LOADUPD, 20028 IX86_BUILTIN_STOREUPD, 20029 IX86_BUILTIN_MOVSD, 20030 20031 IX86_BUILTIN_LOADHPD, 20032 IX86_BUILTIN_LOADLPD, 20033 20034 IX86_BUILTIN_CVTDQ2PD, 20035 IX86_BUILTIN_CVTDQ2PS, 20036 20037 IX86_BUILTIN_CVTPD2DQ, 20038 IX86_BUILTIN_CVTPD2PI, 20039 IX86_BUILTIN_CVTPD2PS, 20040 IX86_BUILTIN_CVTTPD2DQ, 20041 IX86_BUILTIN_CVTTPD2PI, 20042 20043 IX86_BUILTIN_CVTPI2PD, 20044 IX86_BUILTIN_CVTSI2SD, 20045 IX86_BUILTIN_CVTSI642SD, 20046 20047 IX86_BUILTIN_CVTSD2SI, 20048 IX86_BUILTIN_CVTSD2SI64, 20049 IX86_BUILTIN_CVTSD2SS, 20050 IX86_BUILTIN_CVTSS2SD, 20051 IX86_BUILTIN_CVTTSD2SI, 20052 IX86_BUILTIN_CVTTSD2SI64, 20053 20054 IX86_BUILTIN_CVTPS2DQ, 20055 IX86_BUILTIN_CVTPS2PD, 20056 IX86_BUILTIN_CVTTPS2DQ, 20057 20058 IX86_BUILTIN_MOVNTI, 20059 IX86_BUILTIN_MOVNTPD, 20060 IX86_BUILTIN_MOVNTDQ, 20061 20062 IX86_BUILTIN_MOVQ128, 20063 20064 /* SSE2 MMX */ 20065 IX86_BUILTIN_MASKMOVDQU, 20066 IX86_BUILTIN_MOVMSKPD, 20067 IX86_BUILTIN_PMOVMSKB128, 20068 20069 IX86_BUILTIN_PACKSSWB128, 20070 IX86_BUILTIN_PACKSSDW128, 20071 IX86_BUILTIN_PACKUSWB128, 20072 20073 IX86_BUILTIN_PADDB128, 20074 IX86_BUILTIN_PADDW128, 20075 IX86_BUILTIN_PADDD128, 20076 IX86_BUILTIN_PADDQ128, 20077 IX86_BUILTIN_PADDSB128, 20078 IX86_BUILTIN_PADDSW128, 20079 IX86_BUILTIN_PADDUSB128, 20080 IX86_BUILTIN_PADDUSW128, 20081 IX86_BUILTIN_PSUBB128, 20082 IX86_BUILTIN_PSUBW128, 20083 IX86_BUILTIN_PSUBD128, 20084 IX86_BUILTIN_PSUBQ128, 20085 IX86_BUILTIN_PSUBSB128, 20086 IX86_BUILTIN_PSUBSW128, 20087 IX86_BUILTIN_PSUBUSB128, 20088 IX86_BUILTIN_PSUBUSW128, 20089 20090 IX86_BUILTIN_PAND128, 20091 IX86_BUILTIN_PANDN128, 20092 IX86_BUILTIN_POR128, 20093 IX86_BUILTIN_PXOR128, 20094 20095 IX86_BUILTIN_PAVGB128, 20096 IX86_BUILTIN_PAVGW128, 20097 20098 IX86_BUILTIN_PCMPEQB128, 20099 IX86_BUILTIN_PCMPEQW128, 20100 IX86_BUILTIN_PCMPEQD128, 20101 IX86_BUILTIN_PCMPGTB128, 20102 IX86_BUILTIN_PCMPGTW128, 20103 IX86_BUILTIN_PCMPGTD128, 20104 20105 IX86_BUILTIN_PMADDWD128, 20106 20107 IX86_BUILTIN_PMAXSW128, 20108 IX86_BUILTIN_PMAXUB128, 20109 IX86_BUILTIN_PMINSW128, 20110 IX86_BUILTIN_PMINUB128, 20111 20112 IX86_BUILTIN_PMULUDQ, 20113 IX86_BUILTIN_PMULUDQ128, 20114 IX86_BUILTIN_PMULHUW128, 20115 IX86_BUILTIN_PMULHW128, 20116 IX86_BUILTIN_PMULLW128, 20117 20118 IX86_BUILTIN_PSADBW128, 20119 IX86_BUILTIN_PSHUFHW, 20120 IX86_BUILTIN_PSHUFLW, 20121 IX86_BUILTIN_PSHUFD, 20122 20123 IX86_BUILTIN_PSLLDQI128, 20124 IX86_BUILTIN_PSLLWI128, 20125 IX86_BUILTIN_PSLLDI128, 20126 IX86_BUILTIN_PSLLQI128, 20127 IX86_BUILTIN_PSRAWI128, 20128 IX86_BUILTIN_PSRADI128, 20129 IX86_BUILTIN_PSRLDQI128, 20130 IX86_BUILTIN_PSRLWI128, 20131 IX86_BUILTIN_PSRLDI128, 20132 IX86_BUILTIN_PSRLQI128, 20133 20134 IX86_BUILTIN_PSLLDQ128, 20135 IX86_BUILTIN_PSLLW128, 20136 IX86_BUILTIN_PSLLD128, 20137 IX86_BUILTIN_PSLLQ128, 20138 IX86_BUILTIN_PSRAW128, 20139 IX86_BUILTIN_PSRAD128, 20140 IX86_BUILTIN_PSRLW128, 20141 IX86_BUILTIN_PSRLD128, 20142 IX86_BUILTIN_PSRLQ128, 20143 20144 IX86_BUILTIN_PUNPCKHBW128, 20145 IX86_BUILTIN_PUNPCKHWD128, 20146 IX86_BUILTIN_PUNPCKHDQ128, 20147 IX86_BUILTIN_PUNPCKHQDQ128, 20148 IX86_BUILTIN_PUNPCKLBW128, 20149 IX86_BUILTIN_PUNPCKLWD128, 20150 IX86_BUILTIN_PUNPCKLDQ128, 20151 IX86_BUILTIN_PUNPCKLQDQ128, 20152 20153 IX86_BUILTIN_CLFLUSH, 20154 IX86_BUILTIN_MFENCE, 20155 IX86_BUILTIN_LFENCE, 20156 20157 /* SSE3. */ 20158 IX86_BUILTIN_ADDSUBPS, 20159 IX86_BUILTIN_HADDPS, 20160 IX86_BUILTIN_HSUBPS, 20161 IX86_BUILTIN_MOVSHDUP, 20162 IX86_BUILTIN_MOVSLDUP, 20163 IX86_BUILTIN_ADDSUBPD, 20164 IX86_BUILTIN_HADDPD, 20165 IX86_BUILTIN_HSUBPD, 20166 IX86_BUILTIN_LDDQU, 20167 20168 IX86_BUILTIN_MONITOR, 20169 IX86_BUILTIN_MWAIT, 20170 20171 /* SSSE3. */ 20172 IX86_BUILTIN_PHADDW, 20173 IX86_BUILTIN_PHADDD, 20174 IX86_BUILTIN_PHADDSW, 20175 IX86_BUILTIN_PHSUBW, 20176 IX86_BUILTIN_PHSUBD, 20177 IX86_BUILTIN_PHSUBSW, 20178 IX86_BUILTIN_PMADDUBSW, 20179 IX86_BUILTIN_PMULHRSW, 20180 IX86_BUILTIN_PSHUFB, 20181 IX86_BUILTIN_PSIGNB, 20182 IX86_BUILTIN_PSIGNW, 20183 IX86_BUILTIN_PSIGND, 20184 IX86_BUILTIN_PALIGNR, 20185 IX86_BUILTIN_PABSB, 20186 IX86_BUILTIN_PABSW, 20187 IX86_BUILTIN_PABSD, 20188 20189 IX86_BUILTIN_PHADDW128, 20190 IX86_BUILTIN_PHADDD128, 20191 IX86_BUILTIN_PHADDSW128, 20192 IX86_BUILTIN_PHSUBW128, 20193 IX86_BUILTIN_PHSUBD128, 20194 IX86_BUILTIN_PHSUBSW128, 20195 IX86_BUILTIN_PMADDUBSW128, 20196 IX86_BUILTIN_PMULHRSW128, 20197 IX86_BUILTIN_PSHUFB128, 20198 IX86_BUILTIN_PSIGNB128, 20199 IX86_BUILTIN_PSIGNW128, 20200 IX86_BUILTIN_PSIGND128, 20201 IX86_BUILTIN_PALIGNR128, 20202 IX86_BUILTIN_PABSB128, 20203 IX86_BUILTIN_PABSW128, 20204 IX86_BUILTIN_PABSD128, 20205 20206 /* AMDFAM10 - SSE4A New Instructions. */ 20207 IX86_BUILTIN_MOVNTSD, 20208 IX86_BUILTIN_MOVNTSS, 20209 IX86_BUILTIN_EXTRQI, 20210 IX86_BUILTIN_EXTRQ, 20211 IX86_BUILTIN_INSERTQI, 20212 IX86_BUILTIN_INSERTQ, 20213 20214 /* SSE4.1. */ 20215 IX86_BUILTIN_BLENDPD, 20216 IX86_BUILTIN_BLENDPS, 20217 IX86_BUILTIN_BLENDVPD, 20218 IX86_BUILTIN_BLENDVPS, 20219 IX86_BUILTIN_PBLENDVB128, 20220 IX86_BUILTIN_PBLENDW128, 20221 20222 IX86_BUILTIN_DPPD, 20223 IX86_BUILTIN_DPPS, 20224 20225 IX86_BUILTIN_INSERTPS128, 20226 20227 IX86_BUILTIN_MOVNTDQA, 20228 IX86_BUILTIN_MPSADBW128, 20229 IX86_BUILTIN_PACKUSDW128, 20230 IX86_BUILTIN_PCMPEQQ, 20231 IX86_BUILTIN_PHMINPOSUW128, 20232 20233 IX86_BUILTIN_PMAXSB128, 20234 IX86_BUILTIN_PMAXSD128, 20235 IX86_BUILTIN_PMAXUD128, 20236 IX86_BUILTIN_PMAXUW128, 20237 20238 IX86_BUILTIN_PMINSB128, 20239 IX86_BUILTIN_PMINSD128, 20240 IX86_BUILTIN_PMINUD128, 20241 IX86_BUILTIN_PMINUW128, 20242 20243 IX86_BUILTIN_PMOVSXBW128, 20244 IX86_BUILTIN_PMOVSXBD128, 20245 IX86_BUILTIN_PMOVSXBQ128, 20246 IX86_BUILTIN_PMOVSXWD128, 20247 IX86_BUILTIN_PMOVSXWQ128, 20248 IX86_BUILTIN_PMOVSXDQ128, 20249 20250 IX86_BUILTIN_PMOVZXBW128, 20251 IX86_BUILTIN_PMOVZXBD128, 20252 IX86_BUILTIN_PMOVZXBQ128, 20253 IX86_BUILTIN_PMOVZXWD128, 20254 IX86_BUILTIN_PMOVZXWQ128, 20255 IX86_BUILTIN_PMOVZXDQ128, 20256 20257 IX86_BUILTIN_PMULDQ128, 20258 IX86_BUILTIN_PMULLD128, 20259 20260 IX86_BUILTIN_ROUNDPD, 20261 IX86_BUILTIN_ROUNDPS, 20262 IX86_BUILTIN_ROUNDSD, 20263 IX86_BUILTIN_ROUNDSS, 20264 20265 IX86_BUILTIN_PTESTZ, 20266 IX86_BUILTIN_PTESTC, 20267 IX86_BUILTIN_PTESTNZC, 20268 20269 IX86_BUILTIN_VEC_INIT_V2SI, 20270 IX86_BUILTIN_VEC_INIT_V4HI, 20271 IX86_BUILTIN_VEC_INIT_V8QI, 20272 IX86_BUILTIN_VEC_EXT_V2DF, 20273 IX86_BUILTIN_VEC_EXT_V2DI, 20274 IX86_BUILTIN_VEC_EXT_V4SF, 20275 IX86_BUILTIN_VEC_EXT_V4SI, 20276 IX86_BUILTIN_VEC_EXT_V8HI, 20277 IX86_BUILTIN_VEC_EXT_V2SI, 20278 IX86_BUILTIN_VEC_EXT_V4HI, 20279 IX86_BUILTIN_VEC_EXT_V16QI, 20280 IX86_BUILTIN_VEC_SET_V2DI, 20281 IX86_BUILTIN_VEC_SET_V4SF, 20282 IX86_BUILTIN_VEC_SET_V4SI, 20283 IX86_BUILTIN_VEC_SET_V8HI, 20284 IX86_BUILTIN_VEC_SET_V4HI, 20285 IX86_BUILTIN_VEC_SET_V16QI, 20286 20287 IX86_BUILTIN_VEC_PACK_SFIX, 20288 20289 /* SSE4.2. */ 20290 IX86_BUILTIN_CRC32QI, 20291 IX86_BUILTIN_CRC32HI, 20292 IX86_BUILTIN_CRC32SI, 20293 IX86_BUILTIN_CRC32DI, 20294 20295 IX86_BUILTIN_PCMPESTRI128, 20296 IX86_BUILTIN_PCMPESTRM128, 20297 IX86_BUILTIN_PCMPESTRA128, 20298 IX86_BUILTIN_PCMPESTRC128, 20299 IX86_BUILTIN_PCMPESTRO128, 20300 IX86_BUILTIN_PCMPESTRS128, 20301 IX86_BUILTIN_PCMPESTRZ128, 20302 IX86_BUILTIN_PCMPISTRI128, 20303 IX86_BUILTIN_PCMPISTRM128, 20304 IX86_BUILTIN_PCMPISTRA128, 20305 IX86_BUILTIN_PCMPISTRC128, 20306 IX86_BUILTIN_PCMPISTRO128, 20307 IX86_BUILTIN_PCMPISTRS128, 20308 IX86_BUILTIN_PCMPISTRZ128, 20309 20310 IX86_BUILTIN_PCMPGTQ, 20311 20312 /* AES instructions */ 20313 IX86_BUILTIN_AESENC128, 20314 IX86_BUILTIN_AESENCLAST128, 20315 IX86_BUILTIN_AESDEC128, 20316 IX86_BUILTIN_AESDECLAST128, 20317 IX86_BUILTIN_AESIMC128, 20318 IX86_BUILTIN_AESKEYGENASSIST128, 20319 20320 /* PCLMUL instruction */ 20321 IX86_BUILTIN_PCLMULQDQ128, 20322 20323 /* AVX */ 20324 IX86_BUILTIN_ADDPD256, 20325 IX86_BUILTIN_ADDPS256, 20326 IX86_BUILTIN_ADDSUBPD256, 20327 IX86_BUILTIN_ADDSUBPS256, 20328 IX86_BUILTIN_ANDPD256, 20329 IX86_BUILTIN_ANDPS256, 20330 IX86_BUILTIN_ANDNPD256, 20331 IX86_BUILTIN_ANDNPS256, 20332 IX86_BUILTIN_BLENDPD256, 20333 IX86_BUILTIN_BLENDPS256, 20334 IX86_BUILTIN_BLENDVPD256, 20335 IX86_BUILTIN_BLENDVPS256, 20336 IX86_BUILTIN_DIVPD256, 20337 IX86_BUILTIN_DIVPS256, 20338 IX86_BUILTIN_DPPS256, 20339 IX86_BUILTIN_HADDPD256, 20340 IX86_BUILTIN_HADDPS256, 20341 IX86_BUILTIN_HSUBPD256, 20342 IX86_BUILTIN_HSUBPS256, 20343 IX86_BUILTIN_MAXPD256, 20344 IX86_BUILTIN_MAXPS256, 20345 IX86_BUILTIN_MINPD256, 20346 IX86_BUILTIN_MINPS256, 20347 IX86_BUILTIN_MULPD256, 20348 IX86_BUILTIN_MULPS256, 20349 IX86_BUILTIN_ORPD256, 20350 IX86_BUILTIN_ORPS256, 20351 IX86_BUILTIN_SHUFPD256, 20352 IX86_BUILTIN_SHUFPS256, 20353 IX86_BUILTIN_SUBPD256, 20354 IX86_BUILTIN_SUBPS256, 20355 IX86_BUILTIN_XORPD256, 20356 IX86_BUILTIN_XORPS256, 20357 IX86_BUILTIN_CMPSD, 20358 IX86_BUILTIN_CMPSS, 20359 IX86_BUILTIN_CMPPD, 20360 IX86_BUILTIN_CMPPS, 20361 IX86_BUILTIN_CMPPD256, 20362 IX86_BUILTIN_CMPPS256, 20363 IX86_BUILTIN_CVTDQ2PD256, 20364 IX86_BUILTIN_CVTDQ2PS256, 20365 IX86_BUILTIN_CVTPD2PS256, 20366 IX86_BUILTIN_CVTPS2DQ256, 20367 IX86_BUILTIN_CVTPS2PD256, 20368 IX86_BUILTIN_CVTTPD2DQ256, 20369 IX86_BUILTIN_CVTPD2DQ256, 20370 IX86_BUILTIN_CVTTPS2DQ256, 20371 IX86_BUILTIN_EXTRACTF128PD256, 20372 IX86_BUILTIN_EXTRACTF128PS256, 20373 IX86_BUILTIN_EXTRACTF128SI256, 20374 IX86_BUILTIN_VZEROALL, 20375 IX86_BUILTIN_VZEROUPPER, 20376 IX86_BUILTIN_VZEROUPPER_REX64, 20377 IX86_BUILTIN_VPERMILVARPD, 20378 IX86_BUILTIN_VPERMILVARPS, 20379 IX86_BUILTIN_VPERMILVARPD256, 20380 IX86_BUILTIN_VPERMILVARPS256, 20381 IX86_BUILTIN_VPERMILPD, 20382 IX86_BUILTIN_VPERMILPS, 20383 IX86_BUILTIN_VPERMILPD256, 20384 IX86_BUILTIN_VPERMILPS256, 20385 IX86_BUILTIN_VPERM2F128PD256, 20386 IX86_BUILTIN_VPERM2F128PS256, 20387 IX86_BUILTIN_VPERM2F128SI256, 20388 IX86_BUILTIN_VBROADCASTSS, 20389 IX86_BUILTIN_VBROADCASTSD256, 20390 IX86_BUILTIN_VBROADCASTSS256, 20391 IX86_BUILTIN_VBROADCASTPD256, 20392 IX86_BUILTIN_VBROADCASTPS256, 20393 IX86_BUILTIN_VINSERTF128PD256, 20394 IX86_BUILTIN_VINSERTF128PS256, 20395 IX86_BUILTIN_VINSERTF128SI256, 20396 IX86_BUILTIN_LOADUPD256, 20397 IX86_BUILTIN_LOADUPS256, 20398 IX86_BUILTIN_STOREUPD256, 20399 IX86_BUILTIN_STOREUPS256, 20400 IX86_BUILTIN_LDDQU256, 20401 IX86_BUILTIN_MOVNTDQ256, 20402 IX86_BUILTIN_MOVNTPD256, 20403 IX86_BUILTIN_MOVNTPS256, 20404 IX86_BUILTIN_LOADDQU256, 20405 IX86_BUILTIN_STOREDQU256, 20406 IX86_BUILTIN_MASKLOADPD, 20407 IX86_BUILTIN_MASKLOADPS, 20408 IX86_BUILTIN_MASKSTOREPD, 20409 IX86_BUILTIN_MASKSTOREPS, 20410 IX86_BUILTIN_MASKLOADPD256, 20411 IX86_BUILTIN_MASKLOADPS256, 20412 IX86_BUILTIN_MASKSTOREPD256, 20413 IX86_BUILTIN_MASKSTOREPS256, 20414 IX86_BUILTIN_MOVSHDUP256, 20415 IX86_BUILTIN_MOVSLDUP256, 20416 IX86_BUILTIN_MOVDDUP256, 20417 20418 IX86_BUILTIN_SQRTPD256, 20419 IX86_BUILTIN_SQRTPS256, 20420 IX86_BUILTIN_SQRTPS_NR256, 20421 IX86_BUILTIN_RSQRTPS256, 20422 IX86_BUILTIN_RSQRTPS_NR256, 20423 20424 IX86_BUILTIN_RCPPS256, 20425 20426 IX86_BUILTIN_ROUNDPD256, 20427 IX86_BUILTIN_ROUNDPS256, 20428 20429 IX86_BUILTIN_UNPCKHPD256, 20430 IX86_BUILTIN_UNPCKLPD256, 20431 IX86_BUILTIN_UNPCKHPS256, 20432 IX86_BUILTIN_UNPCKLPS256, 20433 20434 IX86_BUILTIN_SI256_SI, 20435 IX86_BUILTIN_PS256_PS, 20436 IX86_BUILTIN_PD256_PD, 20437 IX86_BUILTIN_SI_SI256, 20438 IX86_BUILTIN_PS_PS256, 20439 IX86_BUILTIN_PD_PD256, 20440 20441 IX86_BUILTIN_VTESTZPD, 20442 IX86_BUILTIN_VTESTCPD, 20443 IX86_BUILTIN_VTESTNZCPD, 20444 IX86_BUILTIN_VTESTZPS, 20445 IX86_BUILTIN_VTESTCPS, 20446 IX86_BUILTIN_VTESTNZCPS, 20447 IX86_BUILTIN_VTESTZPD256, 20448 IX86_BUILTIN_VTESTCPD256, 20449 IX86_BUILTIN_VTESTNZCPD256, 20450 IX86_BUILTIN_VTESTZPS256, 20451 IX86_BUILTIN_VTESTCPS256, 20452 IX86_BUILTIN_VTESTNZCPS256, 20453 IX86_BUILTIN_PTESTZ256, 20454 IX86_BUILTIN_PTESTC256, 20455 IX86_BUILTIN_PTESTNZC256, 20456 20457 IX86_BUILTIN_MOVMSKPD256, 20458 IX86_BUILTIN_MOVMSKPS256, 20459 20460 /* TFmode support builtins. */ 20461 IX86_BUILTIN_INFQ, 20462 IX86_BUILTIN_FABSQ, 20463 IX86_BUILTIN_COPYSIGNQ, 20464 20465 /* SSE5 instructions */ 20466 IX86_BUILTIN_FMADDSS, 20467 IX86_BUILTIN_FMADDSD, 20468 IX86_BUILTIN_FMADDPS, 20469 IX86_BUILTIN_FMADDPD, 20470 IX86_BUILTIN_FMSUBSS, 20471 IX86_BUILTIN_FMSUBSD, 20472 IX86_BUILTIN_FMSUBPS, 20473 IX86_BUILTIN_FMSUBPD, 20474 IX86_BUILTIN_FNMADDSS, 20475 IX86_BUILTIN_FNMADDSD, 20476 IX86_BUILTIN_FNMADDPS, 20477 IX86_BUILTIN_FNMADDPD, 20478 IX86_BUILTIN_FNMSUBSS, 20479 IX86_BUILTIN_FNMSUBSD, 20480 IX86_BUILTIN_FNMSUBPS, 20481 IX86_BUILTIN_FNMSUBPD, 20482 IX86_BUILTIN_PCMOV, 20483 IX86_BUILTIN_PCMOV_V2DI, 20484 IX86_BUILTIN_PCMOV_V4SI, 20485 IX86_BUILTIN_PCMOV_V8HI, 20486 IX86_BUILTIN_PCMOV_V16QI, 20487 IX86_BUILTIN_PCMOV_V4SF, 20488 IX86_BUILTIN_PCMOV_V2DF, 20489 IX86_BUILTIN_PPERM, 20490 IX86_BUILTIN_PERMPS, 20491 IX86_BUILTIN_PERMPD, 20492 IX86_BUILTIN_PMACSSWW, 20493 IX86_BUILTIN_PMACSWW, 20494 IX86_BUILTIN_PMACSSWD, 20495 IX86_BUILTIN_PMACSWD, 20496 IX86_BUILTIN_PMACSSDD, 20497 IX86_BUILTIN_PMACSDD, 20498 IX86_BUILTIN_PMACSSDQL, 20499 IX86_BUILTIN_PMACSSDQH, 20500 IX86_BUILTIN_PMACSDQL, 20501 IX86_BUILTIN_PMACSDQH, 20502 IX86_BUILTIN_PMADCSSWD, 20503 IX86_BUILTIN_PMADCSWD, 20504 IX86_BUILTIN_PHADDBW, 20505 IX86_BUILTIN_PHADDBD, 20506 IX86_BUILTIN_PHADDBQ, 20507 IX86_BUILTIN_PHADDWD, 20508 IX86_BUILTIN_PHADDWQ, 20509 IX86_BUILTIN_PHADDDQ, 20510 IX86_BUILTIN_PHADDUBW, 20511 IX86_BUILTIN_PHADDUBD, 20512 IX86_BUILTIN_PHADDUBQ, 20513 IX86_BUILTIN_PHADDUWD, 20514 IX86_BUILTIN_PHADDUWQ, 20515 IX86_BUILTIN_PHADDUDQ, 20516 IX86_BUILTIN_PHSUBBW, 20517 IX86_BUILTIN_PHSUBWD, 20518 IX86_BUILTIN_PHSUBDQ, 20519 IX86_BUILTIN_PROTB, 20520 IX86_BUILTIN_PROTW, 20521 IX86_BUILTIN_PROTD, 20522 IX86_BUILTIN_PROTQ, 20523 IX86_BUILTIN_PROTB_IMM, 20524 IX86_BUILTIN_PROTW_IMM, 20525 IX86_BUILTIN_PROTD_IMM, 20526 IX86_BUILTIN_PROTQ_IMM, 20527 IX86_BUILTIN_PSHLB, 20528 IX86_BUILTIN_PSHLW, 20529 IX86_BUILTIN_PSHLD, 20530 IX86_BUILTIN_PSHLQ, 20531 IX86_BUILTIN_PSHAB, 20532 IX86_BUILTIN_PSHAW, 20533 IX86_BUILTIN_PSHAD, 20534 IX86_BUILTIN_PSHAQ, 20535 IX86_BUILTIN_FRCZSS, 20536 IX86_BUILTIN_FRCZSD, 20537 IX86_BUILTIN_FRCZPS, 20538 IX86_BUILTIN_FRCZPD, 20539 IX86_BUILTIN_CVTPH2PS, 20540 IX86_BUILTIN_CVTPS2PH, 20541 20542 IX86_BUILTIN_COMEQSS, 20543 IX86_BUILTIN_COMNESS, 20544 IX86_BUILTIN_COMLTSS, 20545 IX86_BUILTIN_COMLESS, 20546 IX86_BUILTIN_COMGTSS, 20547 IX86_BUILTIN_COMGESS, 20548 IX86_BUILTIN_COMUEQSS, 20549 IX86_BUILTIN_COMUNESS, 20550 IX86_BUILTIN_COMULTSS, 20551 IX86_BUILTIN_COMULESS, 20552 IX86_BUILTIN_COMUGTSS, 20553 IX86_BUILTIN_COMUGESS, 20554 IX86_BUILTIN_COMORDSS, 20555 IX86_BUILTIN_COMUNORDSS, 20556 IX86_BUILTIN_COMFALSESS, 20557 IX86_BUILTIN_COMTRUESS, 20558 20559 IX86_BUILTIN_COMEQSD, 20560 IX86_BUILTIN_COMNESD, 20561 IX86_BUILTIN_COMLTSD, 20562 IX86_BUILTIN_COMLESD, 20563 IX86_BUILTIN_COMGTSD, 20564 IX86_BUILTIN_COMGESD, 20565 IX86_BUILTIN_COMUEQSD, 20566 IX86_BUILTIN_COMUNESD, 20567 IX86_BUILTIN_COMULTSD, 20568 IX86_BUILTIN_COMULESD, 20569 IX86_BUILTIN_COMUGTSD, 20570 IX86_BUILTIN_COMUGESD, 20571 IX86_BUILTIN_COMORDSD, 20572 IX86_BUILTIN_COMUNORDSD, 20573 IX86_BUILTIN_COMFALSESD, 20574 IX86_BUILTIN_COMTRUESD, 20575 20576 IX86_BUILTIN_COMEQPS, 20577 IX86_BUILTIN_COMNEPS, 20578 IX86_BUILTIN_COMLTPS, 20579 IX86_BUILTIN_COMLEPS, 20580 IX86_BUILTIN_COMGTPS, 20581 IX86_BUILTIN_COMGEPS, 20582 IX86_BUILTIN_COMUEQPS, 20583 IX86_BUILTIN_COMUNEPS, 20584 IX86_BUILTIN_COMULTPS, 20585 IX86_BUILTIN_COMULEPS, 20586 IX86_BUILTIN_COMUGTPS, 20587 IX86_BUILTIN_COMUGEPS, 20588 IX86_BUILTIN_COMORDPS, 20589 IX86_BUILTIN_COMUNORDPS, 20590 IX86_BUILTIN_COMFALSEPS, 20591 IX86_BUILTIN_COMTRUEPS, 20592 20593 IX86_BUILTIN_COMEQPD, 20594 IX86_BUILTIN_COMNEPD, 20595 IX86_BUILTIN_COMLTPD, 20596 IX86_BUILTIN_COMLEPD, 20597 IX86_BUILTIN_COMGTPD, 20598 IX86_BUILTIN_COMGEPD, 20599 IX86_BUILTIN_COMUEQPD, 20600 IX86_BUILTIN_COMUNEPD, 20601 IX86_BUILTIN_COMULTPD, 20602 IX86_BUILTIN_COMULEPD, 20603 IX86_BUILTIN_COMUGTPD, 20604 IX86_BUILTIN_COMUGEPD, 20605 IX86_BUILTIN_COMORDPD, 20606 IX86_BUILTIN_COMUNORDPD, 20607 IX86_BUILTIN_COMFALSEPD, 20608 IX86_BUILTIN_COMTRUEPD, 20609 20610 IX86_BUILTIN_PCOMEQUB, 20611 IX86_BUILTIN_PCOMNEUB, 20612 IX86_BUILTIN_PCOMLTUB, 20613 IX86_BUILTIN_PCOMLEUB, 20614 IX86_BUILTIN_PCOMGTUB, 20615 IX86_BUILTIN_PCOMGEUB, 20616 IX86_BUILTIN_PCOMFALSEUB, 20617 IX86_BUILTIN_PCOMTRUEUB, 20618 IX86_BUILTIN_PCOMEQUW, 20619 IX86_BUILTIN_PCOMNEUW, 20620 IX86_BUILTIN_PCOMLTUW, 20621 IX86_BUILTIN_PCOMLEUW, 20622 IX86_BUILTIN_PCOMGTUW, 20623 IX86_BUILTIN_PCOMGEUW, 20624 IX86_BUILTIN_PCOMFALSEUW, 20625 IX86_BUILTIN_PCOMTRUEUW, 20626 IX86_BUILTIN_PCOMEQUD, 20627 IX86_BUILTIN_PCOMNEUD, 20628 IX86_BUILTIN_PCOMLTUD, 20629 IX86_BUILTIN_PCOMLEUD, 20630 IX86_BUILTIN_PCOMGTUD, 20631 IX86_BUILTIN_PCOMGEUD, 20632 IX86_BUILTIN_PCOMFALSEUD, 20633 IX86_BUILTIN_PCOMTRUEUD, 20634 IX86_BUILTIN_PCOMEQUQ, 20635 IX86_BUILTIN_PCOMNEUQ, 20636 IX86_BUILTIN_PCOMLTUQ, 20637 IX86_BUILTIN_PCOMLEUQ, 20638 IX86_BUILTIN_PCOMGTUQ, 20639 IX86_BUILTIN_PCOMGEUQ, 20640 IX86_BUILTIN_PCOMFALSEUQ, 20641 IX86_BUILTIN_PCOMTRUEUQ, 20642 20643 IX86_BUILTIN_PCOMEQB, 20644 IX86_BUILTIN_PCOMNEB, 20645 IX86_BUILTIN_PCOMLTB, 20646 IX86_BUILTIN_PCOMLEB, 20647 IX86_BUILTIN_PCOMGTB, 20648 IX86_BUILTIN_PCOMGEB, 20649 IX86_BUILTIN_PCOMFALSEB, 20650 IX86_BUILTIN_PCOMTRUEB, 20651 IX86_BUILTIN_PCOMEQW, 20652 IX86_BUILTIN_PCOMNEW, 20653 IX86_BUILTIN_PCOMLTW, 20654 IX86_BUILTIN_PCOMLEW, 20655 IX86_BUILTIN_PCOMGTW, 20656 IX86_BUILTIN_PCOMGEW, 20657 IX86_BUILTIN_PCOMFALSEW, 20658 IX86_BUILTIN_PCOMTRUEW, 20659 IX86_BUILTIN_PCOMEQD, 20660 IX86_BUILTIN_PCOMNED, 20661 IX86_BUILTIN_PCOMLTD, 20662 IX86_BUILTIN_PCOMLED, 20663 IX86_BUILTIN_PCOMGTD, 20664 IX86_BUILTIN_PCOMGED, 20665 IX86_BUILTIN_PCOMFALSED, 20666 IX86_BUILTIN_PCOMTRUED, 20667 IX86_BUILTIN_PCOMEQQ, 20668 IX86_BUILTIN_PCOMNEQ, 20669 IX86_BUILTIN_PCOMLTQ, 20670 IX86_BUILTIN_PCOMLEQ, 20671 IX86_BUILTIN_PCOMGTQ, 20672 IX86_BUILTIN_PCOMGEQ, 20673 IX86_BUILTIN_PCOMFALSEQ, 20674 IX86_BUILTIN_PCOMTRUEQ, 20675 20676 IX86_BUILTIN_MAX 20677 }; 20678 20679 /* Table for the ix86 builtin decls. */ 20680 static GTY(()) tree ix86_builtins[(int) IX86_BUILTIN_MAX]; 20681 20682 /* Table of all of the builtin functions that are possible with different ISA's 20683 but are waiting to be built until a function is declared to use that 20684 ISA. */ 20685 struct builtin_isa GTY(()) 20686 { 20687 tree type; /* builtin type to use in the declaration */ 20688 const char *name; /* function name */ 20689 int isa; /* isa_flags this builtin is defined for */ 20690 bool const_p; /* true if the declaration is constant */ 20691 }; 20692 20693 static GTY(()) struct builtin_isa ix86_builtins_isa[(int) IX86_BUILTIN_MAX]; 20694 20695 20696 /* Add an ix86 target builtin function with CODE, NAME and TYPE. Save the MASK 20697 * of which isa_flags to use in the ix86_builtins_isa array. Stores the 20698 * function decl in the ix86_builtins array. Returns the function decl or 20699 * NULL_TREE, if the builtin was not added. 20700 * 20701 * If the front end has a special hook for builtin functions, delay adding 20702 * builtin functions that aren't in the current ISA until the ISA is changed 20703 * with function specific optimization. Doing so, can save about 300K for the 20704 * default compiler. When the builtin is expanded, check at that time whether 20705 * it is valid. 20706 * 20707 * If the front end doesn't have a special hook, record all builtins, even if 20708 * it isn't an instruction set in the current ISA in case the user uses 20709 * function specific options for a different ISA, so that we don't get scope 20710 * errors if a builtin is added in the middle of a function scope. */ 20711 20712 static inline tree 20713 def_builtin (int mask, const char *name, tree type, enum ix86_builtins code) 20714 { 20715 tree decl = NULL_TREE; 20716 20717 if (!(mask & OPTION_MASK_ISA_64BIT) || TARGET_64BIT) 20718 { 20719 ix86_builtins_isa[(int) code].isa = mask; 20720 20721 if ((mask & ix86_isa_flags) != 0 20722 || (lang_hooks.builtin_function 20723 == lang_hooks.builtin_function_ext_scope)) 20724 20725 { 20726 decl = add_builtin_function (name, type, code, BUILT_IN_MD, NULL, 20727 NULL_TREE); 20728 ix86_builtins[(int) code] = decl; 20729 ix86_builtins_isa[(int) code].type = NULL_TREE; 20730 } 20731 else 20732 { 20733 ix86_builtins[(int) code] = NULL_TREE; 20734 ix86_builtins_isa[(int) code].const_p = false; 20735 ix86_builtins_isa[(int) code].type = type; 20736 ix86_builtins_isa[(int) code].name = name; 20737 } 20738 } 20739 20740 return decl; 20741 } 20742 20743 /* Like def_builtin, but also marks the function decl "const". */ 20744 20745 static inline tree 20746 def_builtin_const (int mask, const char *name, tree type, 20747 enum ix86_builtins code) 20748 { 20749 tree decl = def_builtin (mask, name, type, code); 20750 if (decl) 20751 TREE_READONLY (decl) = 1; 20752 else 20753 ix86_builtins_isa[(int) code].const_p = true; 20754 20755 return decl; 20756 } 20757 20758 /* Add any new builtin functions for a given ISA that may not have been 20759 declared. This saves a bit of space compared to adding all of the 20760 declarations to the tree, even if we didn't use them. */ 20761 20762 static void 20763 ix86_add_new_builtins (int isa) 20764 { 20765 int i; 20766 tree decl; 20767 20768 for (i = 0; i < (int)IX86_BUILTIN_MAX; i++) 20769 { 20770 if ((ix86_builtins_isa[i].isa & isa) != 0 20771 && ix86_builtins_isa[i].type != NULL_TREE) 20772 { 20773 decl = add_builtin_function_ext_scope (ix86_builtins_isa[i].name, 20774 ix86_builtins_isa[i].type, 20775 i, BUILT_IN_MD, NULL, 20776 NULL_TREE); 20777 20778 ix86_builtins[i] = decl; 20779 ix86_builtins_isa[i].type = NULL_TREE; 20780 if (ix86_builtins_isa[i].const_p) 20781 TREE_READONLY (decl) = 1; 20782 } 20783 } 20784 } 20785 20786 /* Bits for builtin_description.flag. */ 20787 20788 /* Set when we don't support the comparison natively, and should 20789 swap_comparison in order to support it. */ 20790 #define BUILTIN_DESC_SWAP_OPERANDS 1 20791 20792 struct builtin_description 20793 { 20794 const unsigned int mask; 20795 const enum insn_code icode; 20796 const char *const name; 20797 const enum ix86_builtins code; 20798 const enum rtx_code comparison; 20799 const int flag; 20800 }; 20801 20802 static const struct builtin_description bdesc_comi[] = 20803 { 20804 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 }, 20805 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 }, 20806 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 }, 20807 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 }, 20808 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 }, 20809 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 }, 20810 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 }, 20811 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 }, 20812 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 }, 20813 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 }, 20814 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 }, 20815 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 }, 20816 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 }, 20817 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 }, 20818 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 }, 20819 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 }, 20820 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 }, 20821 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 }, 20822 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 }, 20823 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 }, 20824 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 }, 20825 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 }, 20826 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 }, 20827 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 }, 20828 }; 20829 20830 static const struct builtin_description bdesc_pcmpestr[] = 20831 { 20832 /* SSE4.2 */ 20833 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestri128", IX86_BUILTIN_PCMPESTRI128, UNKNOWN, 0 }, 20834 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrm128", IX86_BUILTIN_PCMPESTRM128, UNKNOWN, 0 }, 20835 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestria128", IX86_BUILTIN_PCMPESTRA128, UNKNOWN, (int) CCAmode }, 20836 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestric128", IX86_BUILTIN_PCMPESTRC128, UNKNOWN, (int) CCCmode }, 20837 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrio128", IX86_BUILTIN_PCMPESTRO128, UNKNOWN, (int) CCOmode }, 20838 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestris128", IX86_BUILTIN_PCMPESTRS128, UNKNOWN, (int) CCSmode }, 20839 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestriz128", IX86_BUILTIN_PCMPESTRZ128, UNKNOWN, (int) CCZmode }, 20840 }; 20841 20842 static const struct builtin_description bdesc_pcmpistr[] = 20843 { 20844 /* SSE4.2 */ 20845 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistri128", IX86_BUILTIN_PCMPISTRI128, UNKNOWN, 0 }, 20846 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrm128", IX86_BUILTIN_PCMPISTRM128, UNKNOWN, 0 }, 20847 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistria128", IX86_BUILTIN_PCMPISTRA128, UNKNOWN, (int) CCAmode }, 20848 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistric128", IX86_BUILTIN_PCMPISTRC128, UNKNOWN, (int) CCCmode }, 20849 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrio128", IX86_BUILTIN_PCMPISTRO128, UNKNOWN, (int) CCOmode }, 20850 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistris128", IX86_BUILTIN_PCMPISTRS128, UNKNOWN, (int) CCSmode }, 20851 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistriz128", IX86_BUILTIN_PCMPISTRZ128, UNKNOWN, (int) CCZmode }, 20852 }; 20853 20854 /* Special builtin types */ 20855 enum ix86_special_builtin_type 20856 { 20857 SPECIAL_FTYPE_UNKNOWN, 20858 VOID_FTYPE_VOID, 20859 V32QI_FTYPE_PCCHAR, 20860 V16QI_FTYPE_PCCHAR, 20861 V8SF_FTYPE_PCV4SF, 20862 V8SF_FTYPE_PCFLOAT, 20863 V4DF_FTYPE_PCV2DF, 20864 V4DF_FTYPE_PCDOUBLE, 20865 V4SF_FTYPE_PCFLOAT, 20866 V2DF_FTYPE_PCDOUBLE, 20867 V8SF_FTYPE_PCV8SF_V8SF, 20868 V4DF_FTYPE_PCV4DF_V4DF, 20869 V4SF_FTYPE_V4SF_PCV2SF, 20870 V4SF_FTYPE_PCV4SF_V4SF, 20871 V2DF_FTYPE_V2DF_PCDOUBLE, 20872 V2DF_FTYPE_PCV2DF_V2DF, 20873 V2DI_FTYPE_PV2DI, 20874 VOID_FTYPE_PV2SF_V4SF, 20875 VOID_FTYPE_PV4DI_V4DI, 20876 VOID_FTYPE_PV2DI_V2DI, 20877 VOID_FTYPE_PCHAR_V32QI, 20878 VOID_FTYPE_PCHAR_V16QI, 20879 VOID_FTYPE_PFLOAT_V8SF, 20880 VOID_FTYPE_PFLOAT_V4SF, 20881 VOID_FTYPE_PDOUBLE_V4DF, 20882 VOID_FTYPE_PDOUBLE_V2DF, 20883 VOID_FTYPE_PDI_DI, 20884 VOID_FTYPE_PINT_INT, 20885 VOID_FTYPE_PV8SF_V8SF_V8SF, 20886 VOID_FTYPE_PV4DF_V4DF_V4DF, 20887 VOID_FTYPE_PV4SF_V4SF_V4SF, 20888 VOID_FTYPE_PV2DF_V2DF_V2DF 20889 }; 20890 20891 /* Builtin types */ 20892 enum ix86_builtin_type 20893 { 20894 FTYPE_UNKNOWN, 20895 FLOAT128_FTYPE_FLOAT128, 20896 FLOAT_FTYPE_FLOAT, 20897 FLOAT128_FTYPE_FLOAT128_FLOAT128, 20898 INT_FTYPE_V8SF_V8SF_PTEST, 20899 INT_FTYPE_V4DI_V4DI_PTEST, 20900 INT_FTYPE_V4DF_V4DF_PTEST, 20901 INT_FTYPE_V4SF_V4SF_PTEST, 20902 INT_FTYPE_V2DI_V2DI_PTEST, 20903 INT_FTYPE_V2DF_V2DF_PTEST, 20904 INT64_FTYPE_V4SF, 20905 INT64_FTYPE_V2DF, 20906 INT_FTYPE_V16QI, 20907 INT_FTYPE_V8QI, 20908 INT_FTYPE_V8SF, 20909 INT_FTYPE_V4DF, 20910 INT_FTYPE_V4SF, 20911 INT_FTYPE_V2DF, 20912 V16QI_FTYPE_V16QI, 20913 V8SI_FTYPE_V8SF, 20914 V8SI_FTYPE_V4SI, 20915 V8HI_FTYPE_V8HI, 20916 V8HI_FTYPE_V16QI, 20917 V8QI_FTYPE_V8QI, 20918 V8SF_FTYPE_V8SF, 20919 V8SF_FTYPE_V8SI, 20920 V8SF_FTYPE_V4SF, 20921 V4SI_FTYPE_V4SI, 20922 V4SI_FTYPE_V16QI, 20923 V4SI_FTYPE_V8SI, 20924 V4SI_FTYPE_V8HI, 20925 V4SI_FTYPE_V4DF, 20926 V4SI_FTYPE_V4SF, 20927 V4SI_FTYPE_V2DF, 20928 V4HI_FTYPE_V4HI, 20929 V4DF_FTYPE_V4DF, 20930 V4DF_FTYPE_V4SI, 20931 V4DF_FTYPE_V4SF, 20932 V4DF_FTYPE_V2DF, 20933 V4SF_FTYPE_V4DF, 20934 V4SF_FTYPE_V4SF, 20935 V4SF_FTYPE_V4SF_VEC_MERGE, 20936 V4SF_FTYPE_V8SF, 20937 V4SF_FTYPE_V4SI, 20938 V4SF_FTYPE_V2DF, 20939 V2DI_FTYPE_V2DI, 20940 V2DI_FTYPE_V16QI, 20941 V2DI_FTYPE_V8HI, 20942 V2DI_FTYPE_V4SI, 20943 V2DF_FTYPE_V2DF, 20944 V2DF_FTYPE_V2DF_VEC_MERGE, 20945 V2DF_FTYPE_V4SI, 20946 V2DF_FTYPE_V4DF, 20947 V2DF_FTYPE_V4SF, 20948 V2DF_FTYPE_V2SI, 20949 V2SI_FTYPE_V2SI, 20950 V2SI_FTYPE_V4SF, 20951 V2SI_FTYPE_V2SF, 20952 V2SI_FTYPE_V2DF, 20953 V2SF_FTYPE_V2SF, 20954 V2SF_FTYPE_V2SI, 20955 V16QI_FTYPE_V16QI_V16QI, 20956 V16QI_FTYPE_V8HI_V8HI, 20957 V8QI_FTYPE_V8QI_V8QI, 20958 V8QI_FTYPE_V4HI_V4HI, 20959 V8HI_FTYPE_V8HI_V8HI, 20960 V8HI_FTYPE_V8HI_V8HI_COUNT, 20961 V8HI_FTYPE_V16QI_V16QI, 20962 V8HI_FTYPE_V4SI_V4SI, 20963 V8HI_FTYPE_V8HI_SI_COUNT, 20964 V8SF_FTYPE_V8SF_V8SF, 20965 V8SF_FTYPE_V8SF_V8SI, 20966 V4SI_FTYPE_V4SI_V4SI, 20967 V4SI_FTYPE_V4SI_V4SI_COUNT, 20968 V4SI_FTYPE_V8HI_V8HI, 20969 V4SI_FTYPE_V4SF_V4SF, 20970 V4SI_FTYPE_V2DF_V2DF, 20971 V4SI_FTYPE_V4SI_SI_COUNT, 20972 V4HI_FTYPE_V4HI_V4HI, 20973 V4HI_FTYPE_V4HI_V4HI_COUNT, 20974 V4HI_FTYPE_V8QI_V8QI, 20975 V4HI_FTYPE_V2SI_V2SI, 20976 V4HI_FTYPE_V4HI_SI_COUNT, 20977 V4DF_FTYPE_V4DF_V4DF, 20978 V4DF_FTYPE_V4DF_V4DI, 20979 V4SF_FTYPE_V4SF_V4SF, 20980 V4SF_FTYPE_V4SF_V4SF_SWAP, 20981 V4SF_FTYPE_V4SF_V4SI, 20982 V4SF_FTYPE_V4SF_V2SI, 20983 V4SF_FTYPE_V4SF_V2DF, 20984 V4SF_FTYPE_V4SF_DI, 20985 V4SF_FTYPE_V4SF_SI, 20986 V2DI_FTYPE_V2DI_V2DI, 20987 V2DI_FTYPE_V2DI_V2DI_COUNT, 20988 V2DI_FTYPE_V16QI_V16QI, 20989 V2DI_FTYPE_V4SI_V4SI, 20990 V2DI_FTYPE_V2DI_V16QI, 20991 V2DI_FTYPE_V2DF_V2DF, 20992 V2DI_FTYPE_V2DI_SI_COUNT, 20993 V2SI_FTYPE_V2SI_V2SI, 20994 V2SI_FTYPE_V2SI_V2SI_COUNT, 20995 V2SI_FTYPE_V4HI_V4HI, 20996 V2SI_FTYPE_V2SF_V2SF, 20997 V2SI_FTYPE_V2SI_SI_COUNT, 20998 V2DF_FTYPE_V2DF_V2DF, 20999 V2DF_FTYPE_V2DF_V2DF_SWAP, 21000 V2DF_FTYPE_V2DF_V4SF, 21001 V2DF_FTYPE_V2DF_V2DI, 21002 V2DF_FTYPE_V2DF_DI, 21003 V2DF_FTYPE_V2DF_SI, 21004 V2SF_FTYPE_V2SF_V2SF, 21005 V1DI_FTYPE_V1DI_V1DI, 21006 V1DI_FTYPE_V1DI_V1DI_COUNT, 21007 V1DI_FTYPE_V8QI_V8QI, 21008 V1DI_FTYPE_V2SI_V2SI, 21009 V1DI_FTYPE_V1DI_SI_COUNT, 21010 UINT64_FTYPE_UINT64_UINT64, 21011 UINT_FTYPE_UINT_UINT, 21012 UINT_FTYPE_UINT_USHORT, 21013 UINT_FTYPE_UINT_UCHAR, 21014 V8HI_FTYPE_V8HI_INT, 21015 V4SI_FTYPE_V4SI_INT, 21016 V4HI_FTYPE_V4HI_INT, 21017 V8SF_FTYPE_V8SF_INT, 21018 V4SI_FTYPE_V8SI_INT, 21019 V4SF_FTYPE_V8SF_INT, 21020 V2DF_FTYPE_V4DF_INT, 21021 V4DF_FTYPE_V4DF_INT, 21022 V4SF_FTYPE_V4SF_INT, 21023 V2DI_FTYPE_V2DI_INT, 21024 V2DI2TI_FTYPE_V2DI_INT, 21025 V2DF_FTYPE_V2DF_INT, 21026 V16QI_FTYPE_V16QI_V16QI_V16QI, 21027 V8SF_FTYPE_V8SF_V8SF_V8SF, 21028 V4DF_FTYPE_V4DF_V4DF_V4DF, 21029 V4SF_FTYPE_V4SF_V4SF_V4SF, 21030 V2DF_FTYPE_V2DF_V2DF_V2DF, 21031 V16QI_FTYPE_V16QI_V16QI_INT, 21032 V8SI_FTYPE_V8SI_V8SI_INT, 21033 V8SI_FTYPE_V8SI_V4SI_INT, 21034 V8HI_FTYPE_V8HI_V8HI_INT, 21035 V8SF_FTYPE_V8SF_V8SF_INT, 21036 V8SF_FTYPE_V8SF_V4SF_INT, 21037 V4SI_FTYPE_V4SI_V4SI_INT, 21038 V4DF_FTYPE_V4DF_V4DF_INT, 21039 V4DF_FTYPE_V4DF_V2DF_INT, 21040 V4SF_FTYPE_V4SF_V4SF_INT, 21041 V2DI_FTYPE_V2DI_V2DI_INT, 21042 V2DI2TI_FTYPE_V2DI_V2DI_INT, 21043 V1DI2DI_FTYPE_V1DI_V1DI_INT, 21044 V2DF_FTYPE_V2DF_V2DF_INT, 21045 V2DI_FTYPE_V2DI_UINT_UINT, 21046 V2DI_FTYPE_V2DI_V2DI_UINT_UINT 21047 }; 21048 21049 /* Special builtins with variable number of arguments. */ 21050 static const struct builtin_description bdesc_special_args[] = 21051 { 21052 /* MMX */ 21053 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_emms, "__builtin_ia32_emms", IX86_BUILTIN_EMMS, UNKNOWN, (int) VOID_FTYPE_VOID }, 21054 21055 /* 3DNow! */ 21056 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_femms, "__builtin_ia32_femms", IX86_BUILTIN_FEMMS, UNKNOWN, (int) VOID_FTYPE_VOID }, 21057 21058 /* SSE */ 21059 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movups, "__builtin_ia32_storeups", IX86_BUILTIN_STOREUPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF }, 21060 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movntv4sf, "__builtin_ia32_movntps", IX86_BUILTIN_MOVNTPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF }, 21061 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movups, "__builtin_ia32_loadups", IX86_BUILTIN_LOADUPS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT }, 21062 21063 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadhps_exp, "__builtin_ia32_loadhps", IX86_BUILTIN_LOADHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF }, 21064 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadlps_exp, "__builtin_ia32_loadlps", IX86_BUILTIN_LOADLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF }, 21065 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storehps, "__builtin_ia32_storehps", IX86_BUILTIN_STOREHPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF }, 21066 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storelps, "__builtin_ia32_storelps", IX86_BUILTIN_STORELPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF }, 21067 21068 /* SSE or 3DNow!A */ 21069 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_sfence, "__builtin_ia32_sfence", IX86_BUILTIN_SFENCE, UNKNOWN, (int) VOID_FTYPE_VOID }, 21070 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_movntdi, "__builtin_ia32_movntq", IX86_BUILTIN_MOVNTQ, UNKNOWN, (int) VOID_FTYPE_PDI_DI }, 21071 21072 /* SSE2 */ 21073 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lfence, "__builtin_ia32_lfence", IX86_BUILTIN_LFENCE, UNKNOWN, (int) VOID_FTYPE_VOID }, 21074 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_mfence, 0, IX86_BUILTIN_MFENCE, UNKNOWN, (int) VOID_FTYPE_VOID }, 21075 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movupd, "__builtin_ia32_storeupd", IX86_BUILTIN_STOREUPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF }, 21076 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movdqu, "__builtin_ia32_storedqu", IX86_BUILTIN_STOREDQU, UNKNOWN, (int) VOID_FTYPE_PCHAR_V16QI }, 21077 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2df, "__builtin_ia32_movntpd", IX86_BUILTIN_MOVNTPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF }, 21078 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2di, "__builtin_ia32_movntdq", IX86_BUILTIN_MOVNTDQ, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI }, 21079 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntsi, "__builtin_ia32_movnti", IX86_BUILTIN_MOVNTI, UNKNOWN, (int) VOID_FTYPE_PINT_INT }, 21080 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movupd, "__builtin_ia32_loadupd", IX86_BUILTIN_LOADUPD, UNKNOWN, (int) V2DF_FTYPE_PCDOUBLE }, 21081 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movdqu, "__builtin_ia32_loaddqu", IX86_BUILTIN_LOADDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR }, 21082 21083 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadhpd_exp, "__builtin_ia32_loadhpd", IX86_BUILTIN_LOADHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE }, 21084 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadlpd_exp, "__builtin_ia32_loadlpd", IX86_BUILTIN_LOADLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE }, 21085 21086 /* SSE3 */ 21087 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_lddqu, "__builtin_ia32_lddqu", IX86_BUILTIN_LDDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR }, 21088 21089 /* SSE4.1 */ 21090 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_movntdqa, "__builtin_ia32_movntdqa", IX86_BUILTIN_MOVNTDQA, UNKNOWN, (int) V2DI_FTYPE_PV2DI }, 21091 21092 /* SSE4A */ 21093 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv2df, "__builtin_ia32_movntsd", IX86_BUILTIN_MOVNTSD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF }, 21094 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv4sf, "__builtin_ia32_movntss", IX86_BUILTIN_MOVNTSS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF }, 21095 21096 /* AVX */ 21097 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroall, "__builtin_ia32_vzeroall", IX86_BUILTIN_VZEROALL, UNKNOWN, (int) VOID_FTYPE_VOID }, 21098 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroupper, 0, IX86_BUILTIN_VZEROUPPER, UNKNOWN, (int) VOID_FTYPE_VOID }, 21099 { OPTION_MASK_ISA_AVX | OPTION_MASK_ISA_64BIT, CODE_FOR_avx_vzeroupper_rex64, 0, IX86_BUILTIN_VZEROUPPER_REX64, UNKNOWN, (int) VOID_FTYPE_VOID }, 21100 21101 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastss, "__builtin_ia32_vbroadcastss", IX86_BUILTIN_VBROADCASTSS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT }, 21102 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastsd256, "__builtin_ia32_vbroadcastsd256", IX86_BUILTIN_VBROADCASTSD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE }, 21103 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastss256, "__builtin_ia32_vbroadcastss256", IX86_BUILTIN_VBROADCASTSS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT }, 21104 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_pd256, "__builtin_ia32_vbroadcastf128_pd256", IX86_BUILTIN_VBROADCASTPD256, UNKNOWN, (int) V4DF_FTYPE_PCV2DF }, 21105 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_ps256, "__builtin_ia32_vbroadcastf128_ps256", IX86_BUILTIN_VBROADCASTPS256, UNKNOWN, (int) V8SF_FTYPE_PCV4SF }, 21106 21107 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movupd256, "__builtin_ia32_loadupd256", IX86_BUILTIN_LOADUPD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE }, 21108 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movups256, "__builtin_ia32_loadups256", IX86_BUILTIN_LOADUPS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT }, 21109 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movupd256, "__builtin_ia32_storeupd256", IX86_BUILTIN_STOREUPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF }, 21110 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movups256, "__builtin_ia32_storeups256", IX86_BUILTIN_STOREUPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF }, 21111 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movdqu256, "__builtin_ia32_loaddqu256", IX86_BUILTIN_LOADDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR }, 21112 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movdqu256, "__builtin_ia32_storedqu256", IX86_BUILTIN_STOREDQU256, UNKNOWN, (int) VOID_FTYPE_PCHAR_V32QI }, 21113 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_lddqu256, "__builtin_ia32_lddqu256", IX86_BUILTIN_LDDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR }, 21114 21115 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4di, "__builtin_ia32_movntdq256", IX86_BUILTIN_MOVNTDQ256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI }, 21116 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4df, "__builtin_ia32_movntpd256", IX86_BUILTIN_MOVNTPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF }, 21117 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv8sf, "__builtin_ia32_movntps256", IX86_BUILTIN_MOVNTPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF }, 21118 21119 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd, "__builtin_ia32_maskloadpd", IX86_BUILTIN_MASKLOADPD, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DF }, 21120 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps, "__builtin_ia32_maskloadps", IX86_BUILTIN_MASKLOADPS, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SF }, 21121 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd256, "__builtin_ia32_maskloadpd256", IX86_BUILTIN_MASKLOADPD256, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DF }, 21122 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps256, "__builtin_ia32_maskloadps256", IX86_BUILTIN_MASKLOADPS256, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SF }, 21123 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd, "__builtin_ia32_maskstorepd", IX86_BUILTIN_MASKSTOREPD, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DF_V2DF }, 21124 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps, "__builtin_ia32_maskstoreps", IX86_BUILTIN_MASKSTOREPS, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SF_V4SF }, 21125 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd256, "__builtin_ia32_maskstorepd256", IX86_BUILTIN_MASKSTOREPD256, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DF_V4DF }, 21126 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps256, "__builtin_ia32_maskstoreps256", IX86_BUILTIN_MASKSTOREPS256, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SF_V8SF }, 21127 }; 21128 21129 /* Builtins with variable number of arguments. */ 21130 static const struct builtin_description bdesc_args[] = 21131 { 21132 /* MMX */ 21133 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI }, 21134 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI }, 21135 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI }, 21136 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI }, 21137 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI }, 21138 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI }, 21139 21140 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI }, 21141 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI }, 21142 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI }, 21143 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI }, 21144 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI }, 21145 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI }, 21146 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI }, 21147 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI }, 21148 21149 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI }, 21150 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI }, 21151 21152 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI }, 21153 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andnotv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI }, 21154 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI }, 21155 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI }, 21156 21157 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI }, 21158 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI }, 21159 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI }, 21160 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI }, 21161 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI }, 21162 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI }, 21163 21164 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI }, 21165 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI }, 21166 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI }, 21167 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI }, 21168 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI}, 21169 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI}, 21170 21171 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packsswb, "__builtin_ia32_packsswb", IX86_BUILTIN_PACKSSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI }, 21172 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packssdw, "__builtin_ia32_packssdw", IX86_BUILTIN_PACKSSDW, UNKNOWN, (int) V4HI_FTYPE_V2SI_V2SI }, 21173 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packuswb, "__builtin_ia32_packuswb", IX86_BUILTIN_PACKUSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI }, 21174 21175 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_pmaddwd, "__builtin_ia32_pmaddwd", IX86_BUILTIN_PMADDWD, UNKNOWN, (int) V2SI_FTYPE_V4HI_V4HI }, 21176 21177 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllwi", IX86_BUILTIN_PSLLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT }, 21178 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslldi", IX86_BUILTIN_PSLLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT }, 21179 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllqi", IX86_BUILTIN_PSLLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT }, 21180 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllw", IX86_BUILTIN_PSLLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT }, 21181 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslld", IX86_BUILTIN_PSLLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT }, 21182 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllq", IX86_BUILTIN_PSLLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT }, 21183 21184 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlwi", IX86_BUILTIN_PSRLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT }, 21185 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrldi", IX86_BUILTIN_PSRLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT }, 21186 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlqi", IX86_BUILTIN_PSRLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT }, 21187 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlw", IX86_BUILTIN_PSRLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT }, 21188 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrld", IX86_BUILTIN_PSRLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT }, 21189 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlq", IX86_BUILTIN_PSRLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT }, 21190 21191 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psrawi", IX86_BUILTIN_PSRAWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT }, 21192 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psradi", IX86_BUILTIN_PSRADI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT }, 21193 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psraw", IX86_BUILTIN_PSRAW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT }, 21194 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psrad", IX86_BUILTIN_PSRAD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT }, 21195 21196 /* 3DNow! */ 21197 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pf2id, "__builtin_ia32_pf2id", IX86_BUILTIN_PF2ID, UNKNOWN, (int) V2SI_FTYPE_V2SF }, 21198 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_floatv2si2, "__builtin_ia32_pi2fd", IX86_BUILTIN_PI2FD, UNKNOWN, (int) V2SF_FTYPE_V2SI }, 21199 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpv2sf2, "__builtin_ia32_pfrcp", IX86_BUILTIN_PFRCP, UNKNOWN, (int) V2SF_FTYPE_V2SF }, 21200 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqrtv2sf2, "__builtin_ia32_pfrsqrt", IX86_BUILTIN_PFRSQRT, UNKNOWN, (int) V2SF_FTYPE_V2SF }, 21201 21202 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgusb", IX86_BUILTIN_PAVGUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI }, 21203 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_haddv2sf3, "__builtin_ia32_pfacc", IX86_BUILTIN_PFACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF }, 21204 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_addv2sf3, "__builtin_ia32_pfadd", IX86_BUILTIN_PFADD, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF }, 21205 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_eqv2sf3, "__builtin_ia32_pfcmpeq", IX86_BUILTIN_PFCMPEQ, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF }, 21206 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gev2sf3, "__builtin_ia32_pfcmpge", IX86_BUILTIN_PFCMPGE, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF }, 21207 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gtv2sf3, "__builtin_ia32_pfcmpgt", IX86_BUILTIN_PFCMPGT, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF }, 21208 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_smaxv2sf3, "__builtin_ia32_pfmax", IX86_BUILTIN_PFMAX, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF }, 21209 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_sminv2sf3, "__builtin_ia32_pfmin", IX86_BUILTIN_PFMIN, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF }, 21210 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_mulv2sf3, "__builtin_ia32_pfmul", IX86_BUILTIN_PFMUL, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF }, 21211 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit1v2sf3, "__builtin_ia32_pfrcpit1", IX86_BUILTIN_PFRCPIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF }, 21212 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit2v2sf3, "__builtin_ia32_pfrcpit2", IX86_BUILTIN_PFRCPIT2, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF }, 21213 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqit1v2sf3, "__builtin_ia32_pfrsqit1", IX86_BUILTIN_PFRSQIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF }, 21214 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subv2sf3, "__builtin_ia32_pfsub", IX86_BUILTIN_PFSUB, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF }, 21215 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subrv2sf3, "__builtin_ia32_pfsubr", IX86_BUILTIN_PFSUBR, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF }, 21216 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pmulhrwv4hi3, "__builtin_ia32_pmulhrw", IX86_BUILTIN_PMULHRW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI }, 21217 21218 /* 3DNow!A */ 21219 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pf2iw, "__builtin_ia32_pf2iw", IX86_BUILTIN_PF2IW, UNKNOWN, (int) V2SI_FTYPE_V2SF }, 21220 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pi2fw, "__builtin_ia32_pi2fw", IX86_BUILTIN_PI2FW, UNKNOWN, (int) V2SF_FTYPE_V2SI }, 21221 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2si2, "__builtin_ia32_pswapdsi", IX86_BUILTIN_PSWAPDSI, UNKNOWN, (int) V2SI_FTYPE_V2SI }, 21222 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2sf2, "__builtin_ia32_pswapdsf", IX86_BUILTIN_PSWAPDSF, UNKNOWN, (int) V2SF_FTYPE_V2SF }, 21223 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_hsubv2sf3, "__builtin_ia32_pfnacc", IX86_BUILTIN_PFNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF }, 21224 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_addsubv2sf3, "__builtin_ia32_pfpnacc", IX86_BUILTIN_PFPNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF }, 21225 21226 /* SSE */ 21227 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movmskps, "__builtin_ia32_movmskps", IX86_BUILTIN_MOVMSKPS, UNKNOWN, (int) INT_FTYPE_V4SF }, 21228 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_sqrtv4sf2, "__builtin_ia32_sqrtps", IX86_BUILTIN_SQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF }, 21229 { OPTION_MASK_ISA_SSE, CODE_FOR_sqrtv4sf2, "__builtin_ia32_sqrtps_nr", IX86_BUILTIN_SQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF }, 21230 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rsqrtv4sf2, "__builtin_ia32_rsqrtps", IX86_BUILTIN_RSQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF }, 21231 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtv4sf2, "__builtin_ia32_rsqrtps_nr", IX86_BUILTIN_RSQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF }, 21232 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rcpv4sf2, "__builtin_ia32_rcpps", IX86_BUILTIN_RCPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF }, 21233 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtps2pi, "__builtin_ia32_cvtps2pi", IX86_BUILTIN_CVTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF }, 21234 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtss2si, "__builtin_ia32_cvtss2si", IX86_BUILTIN_CVTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF }, 21235 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtss2siq, "__builtin_ia32_cvtss2si64", IX86_BUILTIN_CVTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF }, 21236 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttps2pi, "__builtin_ia32_cvttps2pi", IX86_BUILTIN_CVTTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF }, 21237 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttss2si, "__builtin_ia32_cvttss2si", IX86_BUILTIN_CVTTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF }, 21238 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvttss2siq, "__builtin_ia32_cvttss2si64", IX86_BUILTIN_CVTTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF }, 21239 21240 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_shufps, "__builtin_ia32_shufps", IX86_BUILTIN_SHUFPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT }, 21241 21242 { OPTION_MASK_ISA_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF }, 21243 { OPTION_MASK_ISA_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF }, 21244 { OPTION_MASK_ISA_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF }, 21245 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF }, 21246 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF }, 21247 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF }, 21248 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF }, 21249 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF }, 21250 21251 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, (int) V4SF_FTYPE_V4SF_V4SF }, 21252 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF }, 21253 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF }, 21254 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP }, 21255 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP }, 21256 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF }, 21257 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, (int) V4SF_FTYPE_V4SF_V4SF }, 21258 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF }, 21259 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF }, 21260 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP }, 21261 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP}, 21262 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF }, 21263 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, (int) V4SF_FTYPE_V4SF_V4SF }, 21264 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, (int) V4SF_FTYPE_V4SF_V4SF }, 21265 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, (int) V4SF_FTYPE_V4SF_V4SF }, 21266 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF }, 21267 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, (int) V4SF_FTYPE_V4SF_V4SF }, 21268 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF }, 21269 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF }, 21270 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP }, 21271 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP }, 21272 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF }, 21273 21274 { OPTION_MASK_ISA_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF }, 21275 { OPTION_MASK_ISA_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF }, 21276 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF }, 21277 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF }, 21278 21279 { OPTION_MASK_ISA_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF }, 21280 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_andnotv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF }, 21281 { OPTION_MASK_ISA_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF }, 21282 { OPTION_MASK_ISA_SSE, CODE_FOR_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF }, 21283 21284 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF }, 21285 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movhlps_exp, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF }, 21286 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movlhps_exp, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF }, 21287 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF }, 21288 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF }, 21289 21290 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtpi2ps, "__builtin_ia32_cvtpi2ps", IX86_BUILTIN_CVTPI2PS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2SI }, 21291 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtsi2ss, "__builtin_ia32_cvtsi2ss", IX86_BUILTIN_CVTSI2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_SI }, 21292 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtsi2ssq, "__builtin_ia32_cvtsi642ss", IX86_BUILTIN_CVTSI642SS, UNKNOWN, V4SF_FTYPE_V4SF_DI }, 21293 21294 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtsf2, "__builtin_ia32_rsqrtf", IX86_BUILTIN_RSQRTF, UNKNOWN, (int) FLOAT_FTYPE_FLOAT }, 21295 21296 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsqrtv4sf2, "__builtin_ia32_sqrtss", IX86_BUILTIN_SQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE }, 21297 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrsqrtv4sf2, "__builtin_ia32_rsqrtss", IX86_BUILTIN_RSQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE }, 21298 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrcpv4sf2, "__builtin_ia32_rcpss", IX86_BUILTIN_RCPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE }, 21299 21300 /* SSE MMX or 3Dnow!A */ 21301 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI }, 21302 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI }, 21303 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI }, 21304 21305 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI }, 21306 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI }, 21307 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI }, 21308 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI }, 21309 21310 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_psadbw, "__builtin_ia32_psadbw", IX86_BUILTIN_PSADBW, UNKNOWN, (int) V1DI_FTYPE_V8QI_V8QI }, 21311 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pmovmskb, "__builtin_ia32_pmovmskb", IX86_BUILTIN_PMOVMSKB, UNKNOWN, (int) INT_FTYPE_V8QI }, 21312 21313 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pshufw, "__builtin_ia32_pshufw", IX86_BUILTIN_PSHUFW, UNKNOWN, (int) V4HI_FTYPE_V4HI_INT }, 21314 21315 /* SSE2 */ 21316 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_shufpd, "__builtin_ia32_shufpd", IX86_BUILTIN_SHUFPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT }, 21317 21318 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movmskpd, "__builtin_ia32_movmskpd", IX86_BUILTIN_MOVMSKPD, UNKNOWN, (int) INT_FTYPE_V2DF }, 21319 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmovmskb, "__builtin_ia32_pmovmskb128", IX86_BUILTIN_PMOVMSKB128, UNKNOWN, (int) INT_FTYPE_V16QI }, 21320 { OPTION_MASK_ISA_SSE2, CODE_FOR_sqrtv2df2, "__builtin_ia32_sqrtpd", IX86_BUILTIN_SQRTPD, UNKNOWN, (int) V2DF_FTYPE_V2DF }, 21321 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2pd, "__builtin_ia32_cvtdq2pd", IX86_BUILTIN_CVTDQ2PD, UNKNOWN, (int) V2DF_FTYPE_V4SI }, 21322 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2ps, "__builtin_ia32_cvtdq2ps", IX86_BUILTIN_CVTDQ2PS, UNKNOWN, (int) V4SF_FTYPE_V4SI }, 21323 21324 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2dq, "__builtin_ia32_cvtpd2dq", IX86_BUILTIN_CVTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF }, 21325 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2pi, "__builtin_ia32_cvtpd2pi", IX86_BUILTIN_CVTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF }, 21326 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2ps, "__builtin_ia32_cvtpd2ps", IX86_BUILTIN_CVTPD2PS, UNKNOWN, (int) V4SF_FTYPE_V2DF }, 21327 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2dq, "__builtin_ia32_cvttpd2dq", IX86_BUILTIN_CVTTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF }, 21328 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2pi, "__builtin_ia32_cvttpd2pi", IX86_BUILTIN_CVTTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF }, 21329 21330 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpi2pd, "__builtin_ia32_cvtpi2pd", IX86_BUILTIN_CVTPI2PD, UNKNOWN, (int) V2DF_FTYPE_V2SI }, 21331 21332 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2si, "__builtin_ia32_cvtsd2si", IX86_BUILTIN_CVTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF }, 21333 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttsd2si, "__builtin_ia32_cvttsd2si", IX86_BUILTIN_CVTTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF }, 21334 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsd2siq, "__builtin_ia32_cvtsd2si64", IX86_BUILTIN_CVTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF }, 21335 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvttsd2siq, "__builtin_ia32_cvttsd2si64", IX86_BUILTIN_CVTTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF }, 21336 21337 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2dq, "__builtin_ia32_cvtps2dq", IX86_BUILTIN_CVTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF }, 21338 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2pd, "__builtin_ia32_cvtps2pd", IX86_BUILTIN_CVTPS2PD, UNKNOWN, (int) V2DF_FTYPE_V4SF }, 21339 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttps2dq, "__builtin_ia32_cvttps2dq", IX86_BUILTIN_CVTTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF }, 21340 21341 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF }, 21342 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF }, 21343 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF }, 21344 { OPTION_MASK_ISA_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF }, 21345 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF }, 21346 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF }, 21347 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF }, 21348 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF }, 21349 21350 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, (int) V2DF_FTYPE_V2DF_V2DF }, 21351 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF }, 21352 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF }, 21353 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP }, 21354 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP}, 21355 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF }, 21356 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, (int) V2DF_FTYPE_V2DF_V2DF }, 21357 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF }, 21358 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF }, 21359 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP }, 21360 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP }, 21361 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF }, 21362 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, (int) V2DF_FTYPE_V2DF_V2DF }, 21363 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, (int) V2DF_FTYPE_V2DF_V2DF }, 21364 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, (int) V2DF_FTYPE_V2DF_V2DF }, 21365 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF }, 21366 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, (int) V2DF_FTYPE_V2DF_V2DF }, 21367 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF }, 21368 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF }, 21369 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF }, 21370 21371 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF }, 21372 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF }, 21373 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF }, 21374 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF }, 21375 21376 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF }, 21377 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF }, 21378 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF }, 21379 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF }, 21380 21381 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF }, 21382 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_unpckhpd_exp, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF }, 21383 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_unpcklpd_exp, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF }, 21384 21385 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_pack_sfix_v2df, "__builtin_ia32_vec_pack_sfix", IX86_BUILTIN_VEC_PACK_SFIX, UNKNOWN, (int) V4SI_FTYPE_V2DF_V2DF }, 21386 21387 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI }, 21388 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI }, 21389 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI }, 21390 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI }, 21391 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI }, 21392 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI }, 21393 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI }, 21394 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI }, 21395 21396 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI }, 21397 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI }, 21398 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI }, 21399 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI }, 21400 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI }, 21401 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI }, 21402 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI }, 21403 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI }, 21404 21405 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI }, 21406 { OPTION_MASK_ISA_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, UNKNOWN,(int) V8HI_FTYPE_V8HI_V8HI }, 21407 21408 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI }, 21409 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI }, 21410 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI }, 21411 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI }, 21412 21413 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI }, 21414 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI }, 21415 21416 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI }, 21417 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI }, 21418 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI }, 21419 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI }, 21420 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI }, 21421 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI }, 21422 21423 { OPTION_MASK_ISA_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI }, 21424 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI }, 21425 { OPTION_MASK_ISA_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI }, 21426 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI }, 21427 21428 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI }, 21429 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI }, 21430 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI }, 21431 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhqdq, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI }, 21432 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI }, 21433 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI }, 21434 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI }, 21435 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpcklqdq, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI }, 21436 21437 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI }, 21438 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI }, 21439 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI }, 21440 21441 { OPTION_MASK_ISA_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI }, 21442 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_psadbw, "__builtin_ia32_psadbw128", IX86_BUILTIN_PSADBW128, UNKNOWN, (int) V2DI_FTYPE_V16QI_V16QI }, 21443 21444 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv1siv1di3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, UNKNOWN, (int) V1DI_FTYPE_V2SI_V2SI }, 21445 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv2siv2di3, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI }, 21446 21447 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmaddwd, "__builtin_ia32_pmaddwd128", IX86_BUILTIN_PMADDWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI_V8HI }, 21448 21449 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsi2sd, "__builtin_ia32_cvtsi2sd", IX86_BUILTIN_CVTSI2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_SI }, 21450 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsi2sdq, "__builtin_ia32_cvtsi642sd", IX86_BUILTIN_CVTSI642SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_DI }, 21451 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2ss, "__builtin_ia32_cvtsd2ss", IX86_BUILTIN_CVTSD2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2DF }, 21452 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtss2sd, "__builtin_ia32_cvtss2sd", IX86_BUILTIN_CVTSS2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V4SF }, 21453 21454 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ashlti3, "__builtin_ia32_pslldqi128", IX86_BUILTIN_PSLLDQI128, UNKNOWN, (int) V2DI2TI_FTYPE_V2DI_INT }, 21455 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllwi128", IX86_BUILTIN_PSLLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT }, 21456 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslldi128", IX86_BUILTIN_PSLLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT }, 21457 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllqi128", IX86_BUILTIN_PSLLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT }, 21458 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllw128", IX86_BUILTIN_PSLLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT }, 21459 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslld128", IX86_BUILTIN_PSLLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT }, 21460 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllq128", IX86_BUILTIN_PSLLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT }, 21461 21462 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lshrti3, "__builtin_ia32_psrldqi128", IX86_BUILTIN_PSRLDQI128, UNKNOWN, (int) V2DI2TI_FTYPE_V2DI_INT }, 21463 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlwi128", IX86_BUILTIN_PSRLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT }, 21464 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrldi128", IX86_BUILTIN_PSRLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT }, 21465 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlqi128", IX86_BUILTIN_PSRLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT }, 21466 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlw128", IX86_BUILTIN_PSRLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT }, 21467 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrld128", IX86_BUILTIN_PSRLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT }, 21468 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlq128", IX86_BUILTIN_PSRLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT }, 21469 21470 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psrawi128", IX86_BUILTIN_PSRAWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT }, 21471 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psradi128", IX86_BUILTIN_PSRADI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT }, 21472 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psraw128", IX86_BUILTIN_PSRAW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT }, 21473 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psrad128", IX86_BUILTIN_PSRAD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT }, 21474 21475 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufd, "__builtin_ia32_pshufd", IX86_BUILTIN_PSHUFD, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT }, 21476 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshuflw, "__builtin_ia32_pshuflw", IX86_BUILTIN_PSHUFLW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT }, 21477 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufhw, "__builtin_ia32_pshufhw", IX86_BUILTIN_PSHUFHW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT }, 21478 21479 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsqrtv2df2, "__builtin_ia32_sqrtsd", IX86_BUILTIN_SQRTSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_VEC_MERGE }, 21480 21481 { OPTION_MASK_ISA_SSE2, CODE_FOR_abstf2, 0, IX86_BUILTIN_FABSQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128 }, 21482 { OPTION_MASK_ISA_SSE2, CODE_FOR_copysigntf3, 0, IX86_BUILTIN_COPYSIGNQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128_FLOAT128 }, 21483 21484 { OPTION_MASK_ISA_SSE, CODE_FOR_sse2_movq128, "__builtin_ia32_movq128", IX86_BUILTIN_MOVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI }, 21485 21486 /* SSE2 MMX */ 21487 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_addv1di3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI }, 21488 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_subv1di3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI }, 21489 21490 /* SSE3 */ 21491 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movshdup, "__builtin_ia32_movshdup", IX86_BUILTIN_MOVSHDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF}, 21492 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movsldup, "__builtin_ia32_movsldup", IX86_BUILTIN_MOVSLDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF }, 21493 21494 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF }, 21495 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF }, 21496 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF }, 21497 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF }, 21498 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF }, 21499 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF }, 21500 21501 /* SSSE3 */ 21502 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv16qi2, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI }, 21503 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, UNKNOWN, (int) V8QI_FTYPE_V8QI }, 21504 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8hi2, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI }, 21505 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, UNKNOWN, (int) V4HI_FTYPE_V4HI }, 21506 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4si2, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI }, 21507 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, UNKNOWN, (int) V2SI_FTYPE_V2SI }, 21508 21509 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv8hi3, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI }, 21510 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv4hi3, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI }, 21511 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv4si3, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI }, 21512 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv2si3, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI }, 21513 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv8hi3, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI }, 21514 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv4hi3, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI }, 21515 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv8hi3, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI }, 21516 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv4hi3, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI }, 21517 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv4si3, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI }, 21518 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv2si3, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI }, 21519 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv8hi3, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI }, 21520 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv4hi3, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI }, 21521 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw128, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128, UNKNOWN, (int) V8HI_FTYPE_V16QI_V16QI }, 21522 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW, UNKNOWN, (int) V4HI_FTYPE_V8QI_V8QI }, 21523 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv8hi3, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI }, 21524 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv4hi3, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI }, 21525 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv16qi3, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI }, 21526 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv8qi3, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI }, 21527 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv16qi3, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI }, 21528 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8qi3, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI }, 21529 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8hi3, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI }, 21530 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4hi3, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI }, 21531 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4si3, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI }, 21532 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv2si3, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI }, 21533 21534 /* SSSE3. */ 21535 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrti, "__builtin_ia32_palignr128", IX86_BUILTIN_PALIGNR128, UNKNOWN, (int) V2DI2TI_FTYPE_V2DI_V2DI_INT }, 21536 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrdi, "__builtin_ia32_palignr", IX86_BUILTIN_PALIGNR, UNKNOWN, (int) V1DI2DI_FTYPE_V1DI_V1DI_INT }, 21537 21538 /* SSE4.1 */ 21539 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendpd, "__builtin_ia32_blendpd", IX86_BUILTIN_BLENDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT }, 21540 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendps, "__builtin_ia32_blendps", IX86_BUILTIN_BLENDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT }, 21541 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvpd, "__builtin_ia32_blendvpd", IX86_BUILTIN_BLENDVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF }, 21542 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvps, "__builtin_ia32_blendvps", IX86_BUILTIN_BLENDVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF }, 21543 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dppd, "__builtin_ia32_dppd", IX86_BUILTIN_DPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT }, 21544 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dpps, "__builtin_ia32_dpps", IX86_BUILTIN_DPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT }, 21545 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_insertps, "__builtin_ia32_insertps128", IX86_BUILTIN_INSERTPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT }, 21546 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mpsadbw, "__builtin_ia32_mpsadbw128", IX86_BUILTIN_MPSADBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_INT }, 21547 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendvb, "__builtin_ia32_pblendvb128", IX86_BUILTIN_PBLENDVB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI }, 21548 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendw, "__builtin_ia32_pblendw128", IX86_BUILTIN_PBLENDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_INT }, 21549 21550 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv8qiv8hi2, "__builtin_ia32_pmovsxbw128", IX86_BUILTIN_PMOVSXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI }, 21551 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv4qiv4si2, "__builtin_ia32_pmovsxbd128", IX86_BUILTIN_PMOVSXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI }, 21552 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv2qiv2di2, "__builtin_ia32_pmovsxbq128", IX86_BUILTIN_PMOVSXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI }, 21553 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv4hiv4si2, "__builtin_ia32_pmovsxwd128", IX86_BUILTIN_PMOVSXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI }, 21554 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv2hiv2di2, "__builtin_ia32_pmovsxwq128", IX86_BUILTIN_PMOVSXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI }, 21555 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv2siv2di2, "__builtin_ia32_pmovsxdq128", IX86_BUILTIN_PMOVSXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI }, 21556 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv8qiv8hi2, "__builtin_ia32_pmovzxbw128", IX86_BUILTIN_PMOVZXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI }, 21557 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4qiv4si2, "__builtin_ia32_pmovzxbd128", IX86_BUILTIN_PMOVZXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI }, 21558 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2qiv2di2, "__builtin_ia32_pmovzxbq128", IX86_BUILTIN_PMOVZXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI }, 21559 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4hiv4si2, "__builtin_ia32_pmovzxwd128", IX86_BUILTIN_PMOVZXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI }, 21560 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2hiv2di2, "__builtin_ia32_pmovzxwq128", IX86_BUILTIN_PMOVZXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI }, 21561 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2siv2di2, "__builtin_ia32_pmovzxdq128", IX86_BUILTIN_PMOVZXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI }, 21562 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_phminposuw, "__builtin_ia32_phminposuw128", IX86_BUILTIN_PHMINPOSUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI }, 21563 21564 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_packusdw, "__builtin_ia32_packusdw128", IX86_BUILTIN_PACKUSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI }, 21565 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_eqv2di3, "__builtin_ia32_pcmpeqq", IX86_BUILTIN_PCMPEQQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI }, 21566 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv16qi3, "__builtin_ia32_pmaxsb128", IX86_BUILTIN_PMAXSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI }, 21567 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv4si3, "__builtin_ia32_pmaxsd128", IX86_BUILTIN_PMAXSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI }, 21568 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv4si3, "__builtin_ia32_pmaxud128", IX86_BUILTIN_PMAXUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI }, 21569 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv8hi3, "__builtin_ia32_pmaxuw128", IX86_BUILTIN_PMAXUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI }, 21570 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv16qi3, "__builtin_ia32_pminsb128", IX86_BUILTIN_PMINSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI }, 21571 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv4si3, "__builtin_ia32_pminsd128", IX86_BUILTIN_PMINSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI }, 21572 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv4si3, "__builtin_ia32_pminud128", IX86_BUILTIN_PMINUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI }, 21573 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv8hi3, "__builtin_ia32_pminuw128", IX86_BUILTIN_PMINUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI }, 21574 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mulv2siv2di3, "__builtin_ia32_pmuldq128", IX86_BUILTIN_PMULDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI }, 21575 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_mulv4si3, "__builtin_ia32_pmulld128", IX86_BUILTIN_PMULLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI }, 21576 21577 /* SSE4.1 and SSE5 */ 21578 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_roundpd", IX86_BUILTIN_ROUNDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT }, 21579 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_roundps", IX86_BUILTIN_ROUNDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT }, 21580 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundsd, "__builtin_ia32_roundsd", IX86_BUILTIN_ROUNDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT }, 21581 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundss, "__builtin_ia32_roundss", IX86_BUILTIN_ROUNDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT }, 21582 21583 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestz128", IX86_BUILTIN_PTESTZ, EQ, (int) INT_FTYPE_V2DI_V2DI_PTEST }, 21584 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestc128", IX86_BUILTIN_PTESTC, LTU, (int) INT_FTYPE_V2DI_V2DI_PTEST }, 21585 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestnzc128", IX86_BUILTIN_PTESTNZC, GTU, (int) INT_FTYPE_V2DI_V2DI_PTEST }, 21586 21587 /* SSE4.2 */ 21588 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_gtv2di3, "__builtin_ia32_pcmpgtq", IX86_BUILTIN_PCMPGTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI }, 21589 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_crc32qi, "__builtin_ia32_crc32qi", IX86_BUILTIN_CRC32QI, UNKNOWN, (int) UINT_FTYPE_UINT_UCHAR }, 21590 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_crc32hi, "__builtin_ia32_crc32hi", IX86_BUILTIN_CRC32HI, UNKNOWN, (int) UINT_FTYPE_UINT_USHORT }, 21591 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_crc32si, "__builtin_ia32_crc32si", IX86_BUILTIN_CRC32SI, UNKNOWN, (int) UINT_FTYPE_UINT_UINT }, 21592 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse4_2_crc32di, "__builtin_ia32_crc32di", IX86_BUILTIN_CRC32DI, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 }, 21593 21594 /* SSE4A */ 21595 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrqi, "__builtin_ia32_extrqi", IX86_BUILTIN_EXTRQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_UINT_UINT }, 21596 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrq, "__builtin_ia32_extrq", IX86_BUILTIN_EXTRQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V16QI }, 21597 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertqi, "__builtin_ia32_insertqi", IX86_BUILTIN_INSERTQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_UINT_UINT }, 21598 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertq, "__builtin_ia32_insertq", IX86_BUILTIN_INSERTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI }, 21599 21600 /* AES */ 21601 { OPTION_MASK_ISA_SSE2, CODE_FOR_aeskeygenassist, 0, IX86_BUILTIN_AESKEYGENASSIST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT }, 21602 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesimc, 0, IX86_BUILTIN_AESIMC128, UNKNOWN, (int) V2DI_FTYPE_V2DI }, 21603 21604 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenc, 0, IX86_BUILTIN_AESENC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI }, 21605 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenclast, 0, IX86_BUILTIN_AESENCLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI }, 21606 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdec, 0, IX86_BUILTIN_AESDEC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI }, 21607 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdeclast, 0, IX86_BUILTIN_AESDECLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI }, 21608 21609 /* PCLMUL */ 21610 { OPTION_MASK_ISA_SSE2, CODE_FOR_pclmulqdq, 0, IX86_BUILTIN_PCLMULQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT }, 21611 21612 /* AVX */ 21613 { OPTION_MASK_ISA_AVX, CODE_FOR_addv4df3, "__builtin_ia32_addpd256", IX86_BUILTIN_ADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF }, 21614 { OPTION_MASK_ISA_AVX, CODE_FOR_addv8sf3, "__builtin_ia32_addps256", IX86_BUILTIN_ADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF }, 21615 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv4df3, "__builtin_ia32_addsubpd256", IX86_BUILTIN_ADDSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF }, 21616 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv8sf3, "__builtin_ia32_addsubps256", IX86_BUILTIN_ADDSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF }, 21617 { OPTION_MASK_ISA_AVX, CODE_FOR_andv4df3, "__builtin_ia32_andpd256", IX86_BUILTIN_ANDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF }, 21618 { OPTION_MASK_ISA_AVX, CODE_FOR_andv8sf3, "__builtin_ia32_andps256", IX86_BUILTIN_ANDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF }, 21619 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv4df3, "__builtin_ia32_andnpd256", IX86_BUILTIN_ANDNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF }, 21620 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv8sf3, "__builtin_ia32_andnps256", IX86_BUILTIN_ANDNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF }, 21621 { OPTION_MASK_ISA_AVX, CODE_FOR_divv4df3, "__builtin_ia32_divpd256", IX86_BUILTIN_DIVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF }, 21622 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_divv8sf3, "__builtin_ia32_divps256", IX86_BUILTIN_DIVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF }, 21623 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv4df3, "__builtin_ia32_haddpd256", IX86_BUILTIN_HADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF }, 21624 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv8sf3, "__builtin_ia32_hsubps256", IX86_BUILTIN_HSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF }, 21625 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv4df3, "__builtin_ia32_hsubpd256", IX86_BUILTIN_HSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF }, 21626 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv8sf3, "__builtin_ia32_haddps256", IX86_BUILTIN_HADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF }, 21627 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv4df3, "__builtin_ia32_maxpd256", IX86_BUILTIN_MAXPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF }, 21628 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv8sf3, "__builtin_ia32_maxps256", IX86_BUILTIN_MAXPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF }, 21629 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv4df3, "__builtin_ia32_minpd256", IX86_BUILTIN_MINPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF }, 21630 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv8sf3, "__builtin_ia32_minps256", IX86_BUILTIN_MINPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF }, 21631 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv4df3, "__builtin_ia32_mulpd256", IX86_BUILTIN_MULPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF }, 21632 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv8sf3, "__builtin_ia32_mulps256", IX86_BUILTIN_MULPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF }, 21633 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv4df3, "__builtin_ia32_orpd256", IX86_BUILTIN_ORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF }, 21634 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv8sf3, "__builtin_ia32_orps256", IX86_BUILTIN_ORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF }, 21635 { OPTION_MASK_ISA_AVX, CODE_FOR_subv4df3, "__builtin_ia32_subpd256", IX86_BUILTIN_SUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF }, 21636 { OPTION_MASK_ISA_AVX, CODE_FOR_subv8sf3, "__builtin_ia32_subps256", IX86_BUILTIN_SUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF }, 21637 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv4df3, "__builtin_ia32_xorpd256", IX86_BUILTIN_XORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF }, 21638 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv8sf3, "__builtin_ia32_xorps256", IX86_BUILTIN_XORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF }, 21639 21640 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv2df3, "__builtin_ia32_vpermilvarpd", IX86_BUILTIN_VPERMILVARPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DI }, 21641 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4sf3, "__builtin_ia32_vpermilvarps", IX86_BUILTIN_VPERMILVARPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SI }, 21642 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4df3, "__builtin_ia32_vpermilvarpd256", IX86_BUILTIN_VPERMILVARPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI }, 21643 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv8sf3, "__builtin_ia32_vpermilvarps256", IX86_BUILTIN_VPERMILVARPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI }, 21644 21645 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendpd256, "__builtin_ia32_blendpd256", IX86_BUILTIN_BLENDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT }, 21646 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendps256, "__builtin_ia32_blendps256", IX86_BUILTIN_BLENDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT }, 21647 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvpd256, "__builtin_ia32_blendvpd256", IX86_BUILTIN_BLENDVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF }, 21648 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvps256, "__builtin_ia32_blendvps256", IX86_BUILTIN_BLENDVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF }, 21649 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_dpps256, "__builtin_ia32_dpps256", IX86_BUILTIN_DPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT }, 21650 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufpd256, "__builtin_ia32_shufpd256", IX86_BUILTIN_SHUFPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT }, 21651 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufps256, "__builtin_ia32_shufps256", IX86_BUILTIN_SHUFPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT }, 21652 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpsdv2df3, "__builtin_ia32_cmpsd", IX86_BUILTIN_CMPSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT }, 21653 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpssv4sf3, "__builtin_ia32_cmpss", IX86_BUILTIN_CMPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT }, 21654 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmppdv2df3, "__builtin_ia32_cmppd", IX86_BUILTIN_CMPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT }, 21655 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmppsv4sf3, "__builtin_ia32_cmpps", IX86_BUILTIN_CMPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT }, 21656 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmppdv4df3, "__builtin_ia32_cmppd256", IX86_BUILTIN_CMPPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT }, 21657 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmppsv8sf3, "__builtin_ia32_cmpps256", IX86_BUILTIN_CMPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT }, 21658 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v4df, "__builtin_ia32_vextractf128_pd256", IX86_BUILTIN_EXTRACTF128PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF_INT }, 21659 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8sf, "__builtin_ia32_vextractf128_ps256", IX86_BUILTIN_EXTRACTF128PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF_INT }, 21660 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8si, "__builtin_ia32_vextractf128_si256", IX86_BUILTIN_EXTRACTF128SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI_INT }, 21661 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtdq2pd256, "__builtin_ia32_cvtdq2pd256", IX86_BUILTIN_CVTDQ2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SI }, 21662 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtdq2ps256, "__builtin_ia32_cvtdq2ps256", IX86_BUILTIN_CVTDQ2PS256, UNKNOWN, (int) V8SF_FTYPE_V8SI }, 21663 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2ps256, "__builtin_ia32_cvtpd2ps256", IX86_BUILTIN_CVTPD2PS256, UNKNOWN, (int) V4SF_FTYPE_V4DF }, 21664 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtps2dq256, "__builtin_ia32_cvtps2dq256", IX86_BUILTIN_CVTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF }, 21665 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtps2pd256, "__builtin_ia32_cvtps2pd256", IX86_BUILTIN_CVTPS2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SF }, 21666 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvttpd2dq256, "__builtin_ia32_cvttpd2dq256", IX86_BUILTIN_CVTTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF }, 21667 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2dq256, "__builtin_ia32_cvtpd2dq256", IX86_BUILTIN_CVTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF }, 21668 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvttps2dq256, "__builtin_ia32_cvttps2dq256", IX86_BUILTIN_CVTTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF }, 21669 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v4df3, "__builtin_ia32_vperm2f128_pd256", IX86_BUILTIN_VPERM2F128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT }, 21670 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8sf3, "__builtin_ia32_vperm2f128_ps256", IX86_BUILTIN_VPERM2F128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT }, 21671 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8si3, "__builtin_ia32_vperm2f128_si256", IX86_BUILTIN_VPERM2F128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT }, 21672 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv2df, "__builtin_ia32_vpermilpd", IX86_BUILTIN_VPERMILPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT }, 21673 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4sf, "__builtin_ia32_vpermilps", IX86_BUILTIN_VPERMILPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT }, 21674 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4df, "__builtin_ia32_vpermilpd256", IX86_BUILTIN_VPERMILPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT }, 21675 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv8sf, "__builtin_ia32_vpermilps256", IX86_BUILTIN_VPERMILPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT }, 21676 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v4df, "__builtin_ia32_vinsertf128_pd256", IX86_BUILTIN_VINSERTF128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V2DF_INT }, 21677 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8sf, "__builtin_ia32_vinsertf128_ps256", IX86_BUILTIN_VINSERTF128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V4SF_INT }, 21678 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8si, "__builtin_ia32_vinsertf128_si256", IX86_BUILTIN_VINSERTF128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_INT }, 21679 21680 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movshdup256, "__builtin_ia32_movshdup256", IX86_BUILTIN_MOVSHDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF }, 21681 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movsldup256, "__builtin_ia32_movsldup256", IX86_BUILTIN_MOVSLDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF }, 21682 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movddup256, "__builtin_ia32_movddup256", IX86_BUILTIN_MOVDDUP256, UNKNOWN, (int) V4DF_FTYPE_V4DF }, 21683 21684 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv4df2, "__builtin_ia32_sqrtpd256", IX86_BUILTIN_SQRTPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF }, 21685 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_sqrtv8sf2, "__builtin_ia32_sqrtps256", IX86_BUILTIN_SQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF }, 21686 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv8sf2, "__builtin_ia32_sqrtps_nr256", IX86_BUILTIN_SQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF }, 21687 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rsqrtv8sf2, "__builtin_ia32_rsqrtps256", IX86_BUILTIN_RSQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF }, 21688 { OPTION_MASK_ISA_AVX, CODE_FOR_rsqrtv8sf2, "__builtin_ia32_rsqrtps_nr256", IX86_BUILTIN_RSQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF }, 21689 21690 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rcpv8sf2, "__builtin_ia32_rcpps256", IX86_BUILTIN_RCPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF }, 21691 21692 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_roundpd256", IX86_BUILTIN_ROUNDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT }, 21693 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_roundps256", IX86_BUILTIN_ROUNDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT }, 21694 21695 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhpd256, "__builtin_ia32_unpckhpd256", IX86_BUILTIN_UNPCKHPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF }, 21696 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklpd256, "__builtin_ia32_unpcklpd256", IX86_BUILTIN_UNPCKLPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF }, 21697 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhps256, "__builtin_ia32_unpckhps256", IX86_BUILTIN_UNPCKHPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF }, 21698 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklps256, "__builtin_ia32_unpcklps256", IX86_BUILTIN_UNPCKLPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF }, 21699 21700 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_si256_si, "__builtin_ia32_si256_si", IX86_BUILTIN_SI256_SI, UNKNOWN, (int) V8SI_FTYPE_V4SI }, 21701 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ps256_ps, "__builtin_ia32_ps256_ps", IX86_BUILTIN_PS256_PS, UNKNOWN, (int) V8SF_FTYPE_V4SF }, 21702 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_pd256_pd, "__builtin_ia32_pd256_pd", IX86_BUILTIN_PD256_PD, UNKNOWN, (int) V4DF_FTYPE_V2DF }, 21703 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_si_si256, "__builtin_ia32_si_si256", IX86_BUILTIN_SI_SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI }, 21704 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ps_ps256, "__builtin_ia32_ps_ps256", IX86_BUILTIN_PS_PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF }, 21705 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_pd_pd256, "__builtin_ia32_pd_pd256", IX86_BUILTIN_PD_PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF }, 21706 21707 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestzpd", IX86_BUILTIN_VTESTZPD, EQ, (int) INT_FTYPE_V2DF_V2DF_PTEST }, 21708 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestcpd", IX86_BUILTIN_VTESTCPD, LTU, (int) INT_FTYPE_V2DF_V2DF_PTEST }, 21709 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestnzcpd", IX86_BUILTIN_VTESTNZCPD, GTU, (int) INT_FTYPE_V2DF_V2DF_PTEST }, 21710 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestzps", IX86_BUILTIN_VTESTZPS, EQ, (int) INT_FTYPE_V4SF_V4SF_PTEST }, 21711 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestcps", IX86_BUILTIN_VTESTCPS, LTU, (int) INT_FTYPE_V4SF_V4SF_PTEST }, 21712 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestnzcps", IX86_BUILTIN_VTESTNZCPS, GTU, (int) INT_FTYPE_V4SF_V4SF_PTEST }, 21713 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestzpd256", IX86_BUILTIN_VTESTZPD256, EQ, (int) INT_FTYPE_V4DF_V4DF_PTEST }, 21714 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestcpd256", IX86_BUILTIN_VTESTCPD256, LTU, (int) INT_FTYPE_V4DF_V4DF_PTEST }, 21715 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestnzcpd256", IX86_BUILTIN_VTESTNZCPD256, GTU, (int) INT_FTYPE_V4DF_V4DF_PTEST }, 21716 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestzps256", IX86_BUILTIN_VTESTZPS256, EQ, (int) INT_FTYPE_V8SF_V8SF_PTEST }, 21717 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestcps256", IX86_BUILTIN_VTESTCPS256, LTU, (int) INT_FTYPE_V8SF_V8SF_PTEST }, 21718 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestnzcps256", IX86_BUILTIN_VTESTNZCPS256, GTU, (int) INT_FTYPE_V8SF_V8SF_PTEST }, 21719 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestz256", IX86_BUILTIN_PTESTZ256, EQ, (int) INT_FTYPE_V4DI_V4DI_PTEST }, 21720 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestc256", IX86_BUILTIN_PTESTC256, LTU, (int) INT_FTYPE_V4DI_V4DI_PTEST }, 21721 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestnzc256", IX86_BUILTIN_PTESTNZC256, GTU, (int) INT_FTYPE_V4DI_V4DI_PTEST }, 21722 21723 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskpd256, "__builtin_ia32_movmskpd256", IX86_BUILTIN_MOVMSKPD256, UNKNOWN, (int) INT_FTYPE_V4DF }, 21724 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskps256, "__builtin_ia32_movmskps256", IX86_BUILTIN_MOVMSKPS256, UNKNOWN, (int) INT_FTYPE_V8SF }, 21725 }; 21726 21727 /* SSE5 */ 21728 enum multi_arg_type { 21729 MULTI_ARG_UNKNOWN, 21730 MULTI_ARG_3_SF, 21731 MULTI_ARG_3_DF, 21732 MULTI_ARG_3_DI, 21733 MULTI_ARG_3_SI, 21734 MULTI_ARG_3_SI_DI, 21735 MULTI_ARG_3_HI, 21736 MULTI_ARG_3_HI_SI, 21737 MULTI_ARG_3_QI, 21738 MULTI_ARG_3_PERMPS, 21739 MULTI_ARG_3_PERMPD, 21740 MULTI_ARG_2_SF, 21741 MULTI_ARG_2_DF, 21742 MULTI_ARG_2_DI, 21743 MULTI_ARG_2_SI, 21744 MULTI_ARG_2_HI, 21745 MULTI_ARG_2_QI, 21746 MULTI_ARG_2_DI_IMM, 21747 MULTI_ARG_2_SI_IMM, 21748 MULTI_ARG_2_HI_IMM, 21749 MULTI_ARG_2_QI_IMM, 21750 MULTI_ARG_2_SF_CMP, 21751 MULTI_ARG_2_DF_CMP, 21752 MULTI_ARG_2_DI_CMP, 21753 MULTI_ARG_2_SI_CMP, 21754 MULTI_ARG_2_HI_CMP, 21755 MULTI_ARG_2_QI_CMP, 21756 MULTI_ARG_2_DI_TF, 21757 MULTI_ARG_2_SI_TF, 21758 MULTI_ARG_2_HI_TF, 21759 MULTI_ARG_2_QI_TF, 21760 MULTI_ARG_2_SF_TF, 21761 MULTI_ARG_2_DF_TF, 21762 MULTI_ARG_1_SF, 21763 MULTI_ARG_1_DF, 21764 MULTI_ARG_1_DI, 21765 MULTI_ARG_1_SI, 21766 MULTI_ARG_1_HI, 21767 MULTI_ARG_1_QI, 21768 MULTI_ARG_1_SI_DI, 21769 MULTI_ARG_1_HI_DI, 21770 MULTI_ARG_1_HI_SI, 21771 MULTI_ARG_1_QI_DI, 21772 MULTI_ARG_1_QI_SI, 21773 MULTI_ARG_1_QI_HI, 21774 MULTI_ARG_1_PH2PS, 21775 MULTI_ARG_1_PS2PH 21776 }; 21777 21778 static const struct builtin_description bdesc_multi_arg[] = 21779 { 21780 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfmaddv4sf4, "__builtin_ia32_fmaddss", IX86_BUILTIN_FMADDSS, 0, (int)MULTI_ARG_3_SF }, 21781 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfmaddv2df4, "__builtin_ia32_fmaddsd", IX86_BUILTIN_FMADDSD, 0, (int)MULTI_ARG_3_DF }, 21782 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fmaddv4sf4, "__builtin_ia32_fmaddps", IX86_BUILTIN_FMADDPS, 0, (int)MULTI_ARG_3_SF }, 21783 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fmaddv2df4, "__builtin_ia32_fmaddpd", IX86_BUILTIN_FMADDPD, 0, (int)MULTI_ARG_3_DF }, 21784 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfmsubv4sf4, "__builtin_ia32_fmsubss", IX86_BUILTIN_FMSUBSS, 0, (int)MULTI_ARG_3_SF }, 21785 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfmsubv2df4, "__builtin_ia32_fmsubsd", IX86_BUILTIN_FMSUBSD, 0, (int)MULTI_ARG_3_DF }, 21786 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fmsubv4sf4, "__builtin_ia32_fmsubps", IX86_BUILTIN_FMSUBPS, 0, (int)MULTI_ARG_3_SF }, 21787 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fmsubv2df4, "__builtin_ia32_fmsubpd", IX86_BUILTIN_FMSUBPD, 0, (int)MULTI_ARG_3_DF }, 21788 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfnmaddv4sf4, "__builtin_ia32_fnmaddss", IX86_BUILTIN_FNMADDSS, 0, (int)MULTI_ARG_3_SF }, 21789 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfnmaddv2df4, "__builtin_ia32_fnmaddsd", IX86_BUILTIN_FNMADDSD, 0, (int)MULTI_ARG_3_DF }, 21790 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fnmaddv4sf4, "__builtin_ia32_fnmaddps", IX86_BUILTIN_FNMADDPS, 0, (int)MULTI_ARG_3_SF }, 21791 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fnmaddv2df4, "__builtin_ia32_fnmaddpd", IX86_BUILTIN_FNMADDPD, 0, (int)MULTI_ARG_3_DF }, 21792 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfnmsubv4sf4, "__builtin_ia32_fnmsubss", IX86_BUILTIN_FNMSUBSS, 0, (int)MULTI_ARG_3_SF }, 21793 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfnmsubv2df4, "__builtin_ia32_fnmsubsd", IX86_BUILTIN_FNMSUBSD, 0, (int)MULTI_ARG_3_DF }, 21794 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fnmsubv4sf4, "__builtin_ia32_fnmsubps", IX86_BUILTIN_FNMSUBPS, 0, (int)MULTI_ARG_3_SF }, 21795 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fnmsubv2df4, "__builtin_ia32_fnmsubpd", IX86_BUILTIN_FNMSUBPD, 0, (int)MULTI_ARG_3_DF }, 21796 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v2di, "__builtin_ia32_pcmov", IX86_BUILTIN_PCMOV, 0, (int)MULTI_ARG_3_DI }, 21797 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v2di, "__builtin_ia32_pcmov_v2di", IX86_BUILTIN_PCMOV_V2DI, 0, (int)MULTI_ARG_3_DI }, 21798 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v4si, "__builtin_ia32_pcmov_v4si", IX86_BUILTIN_PCMOV_V4SI, 0, (int)MULTI_ARG_3_SI }, 21799 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v8hi, "__builtin_ia32_pcmov_v8hi", IX86_BUILTIN_PCMOV_V8HI, 0, (int)MULTI_ARG_3_HI }, 21800 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v16qi, "__builtin_ia32_pcmov_v16qi",IX86_BUILTIN_PCMOV_V16QI,0, (int)MULTI_ARG_3_QI }, 21801 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v2df, "__builtin_ia32_pcmov_v2df", IX86_BUILTIN_PCMOV_V2DF, 0, (int)MULTI_ARG_3_DF }, 21802 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v4sf, "__builtin_ia32_pcmov_v4sf", IX86_BUILTIN_PCMOV_V4SF, 0, (int)MULTI_ARG_3_SF }, 21803 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pperm, "__builtin_ia32_pperm", IX86_BUILTIN_PPERM, 0, (int)MULTI_ARG_3_QI }, 21804 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_permv4sf, "__builtin_ia32_permps", IX86_BUILTIN_PERMPS, 0, (int)MULTI_ARG_3_PERMPS }, 21805 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_permv2df, "__builtin_ia32_permpd", IX86_BUILTIN_PERMPD, 0, (int)MULTI_ARG_3_PERMPD }, 21806 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacssww, "__builtin_ia32_pmacssww", IX86_BUILTIN_PMACSSWW, 0, (int)MULTI_ARG_3_HI }, 21807 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsww, "__builtin_ia32_pmacsww", IX86_BUILTIN_PMACSWW, 0, (int)MULTI_ARG_3_HI }, 21808 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsswd, "__builtin_ia32_pmacsswd", IX86_BUILTIN_PMACSSWD, 0, (int)MULTI_ARG_3_HI_SI }, 21809 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacswd, "__builtin_ia32_pmacswd", IX86_BUILTIN_PMACSWD, 0, (int)MULTI_ARG_3_HI_SI }, 21810 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacssdd, "__builtin_ia32_pmacssdd", IX86_BUILTIN_PMACSSDD, 0, (int)MULTI_ARG_3_SI }, 21811 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsdd, "__builtin_ia32_pmacsdd", IX86_BUILTIN_PMACSDD, 0, (int)MULTI_ARG_3_SI }, 21812 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacssdql, "__builtin_ia32_pmacssdql", IX86_BUILTIN_PMACSSDQL, 0, (int)MULTI_ARG_3_SI_DI }, 21813 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacssdqh, "__builtin_ia32_pmacssdqh", IX86_BUILTIN_PMACSSDQH, 0, (int)MULTI_ARG_3_SI_DI }, 21814 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsdql, "__builtin_ia32_pmacsdql", IX86_BUILTIN_PMACSDQL, 0, (int)MULTI_ARG_3_SI_DI }, 21815 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsdqh, "__builtin_ia32_pmacsdqh", IX86_BUILTIN_PMACSDQH, 0, (int)MULTI_ARG_3_SI_DI }, 21816 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmadcsswd, "__builtin_ia32_pmadcsswd", IX86_BUILTIN_PMADCSSWD, 0, (int)MULTI_ARG_3_HI_SI }, 21817 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmadcswd, "__builtin_ia32_pmadcswd", IX86_BUILTIN_PMADCSWD, 0, (int)MULTI_ARG_3_HI_SI }, 21818 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vrotlv2di3, "__builtin_ia32_protq", IX86_BUILTIN_PROTQ, 0, (int)MULTI_ARG_2_DI }, 21819 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vrotlv4si3, "__builtin_ia32_protd", IX86_BUILTIN_PROTD, 0, (int)MULTI_ARG_2_SI }, 21820 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vrotlv8hi3, "__builtin_ia32_protw", IX86_BUILTIN_PROTW, 0, (int)MULTI_ARG_2_HI }, 21821 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vrotlv16qi3, "__builtin_ia32_protb", IX86_BUILTIN_PROTB, 0, (int)MULTI_ARG_2_QI }, 21822 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_rotlv2di3, "__builtin_ia32_protqi", IX86_BUILTIN_PROTQ_IMM, 0, (int)MULTI_ARG_2_DI_IMM }, 21823 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_rotlv4si3, "__builtin_ia32_protdi", IX86_BUILTIN_PROTD_IMM, 0, (int)MULTI_ARG_2_SI_IMM }, 21824 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_rotlv8hi3, "__builtin_ia32_protwi", IX86_BUILTIN_PROTW_IMM, 0, (int)MULTI_ARG_2_HI_IMM }, 21825 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_rotlv16qi3, "__builtin_ia32_protbi", IX86_BUILTIN_PROTB_IMM, 0, (int)MULTI_ARG_2_QI_IMM }, 21826 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_ashlv2di3, "__builtin_ia32_pshaq", IX86_BUILTIN_PSHAQ, 0, (int)MULTI_ARG_2_DI }, 21827 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_ashlv4si3, "__builtin_ia32_pshad", IX86_BUILTIN_PSHAD, 0, (int)MULTI_ARG_2_SI }, 21828 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_ashlv8hi3, "__builtin_ia32_pshaw", IX86_BUILTIN_PSHAW, 0, (int)MULTI_ARG_2_HI }, 21829 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_ashlv16qi3, "__builtin_ia32_pshab", IX86_BUILTIN_PSHAB, 0, (int)MULTI_ARG_2_QI }, 21830 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_lshlv2di3, "__builtin_ia32_pshlq", IX86_BUILTIN_PSHLQ, 0, (int)MULTI_ARG_2_DI }, 21831 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_lshlv4si3, "__builtin_ia32_pshld", IX86_BUILTIN_PSHLD, 0, (int)MULTI_ARG_2_SI }, 21832 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_lshlv8hi3, "__builtin_ia32_pshlw", IX86_BUILTIN_PSHLW, 0, (int)MULTI_ARG_2_HI }, 21833 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_lshlv16qi3, "__builtin_ia32_pshlb", IX86_BUILTIN_PSHLB, 0, (int)MULTI_ARG_2_QI }, 21834 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmfrczv4sf2, "__builtin_ia32_frczss", IX86_BUILTIN_FRCZSS, 0, (int)MULTI_ARG_2_SF }, 21835 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmfrczv2df2, "__builtin_ia32_frczsd", IX86_BUILTIN_FRCZSD, 0, (int)MULTI_ARG_2_DF }, 21836 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_frczv4sf2, "__builtin_ia32_frczps", IX86_BUILTIN_FRCZPS, 0, (int)MULTI_ARG_1_SF }, 21837 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_frczv2df2, "__builtin_ia32_frczpd", IX86_BUILTIN_FRCZPD, 0, (int)MULTI_ARG_1_DF }, 21838 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_cvtph2ps, "__builtin_ia32_cvtph2ps", IX86_BUILTIN_CVTPH2PS, 0, (int)MULTI_ARG_1_PH2PS }, 21839 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_cvtps2ph, "__builtin_ia32_cvtps2ph", IX86_BUILTIN_CVTPS2PH, 0, (int)MULTI_ARG_1_PS2PH }, 21840 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddbw, "__builtin_ia32_phaddbw", IX86_BUILTIN_PHADDBW, 0, (int)MULTI_ARG_1_QI_HI }, 21841 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddbd, "__builtin_ia32_phaddbd", IX86_BUILTIN_PHADDBD, 0, (int)MULTI_ARG_1_QI_SI }, 21842 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddbq, "__builtin_ia32_phaddbq", IX86_BUILTIN_PHADDBQ, 0, (int)MULTI_ARG_1_QI_DI }, 21843 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddwd, "__builtin_ia32_phaddwd", IX86_BUILTIN_PHADDWD, 0, (int)MULTI_ARG_1_HI_SI }, 21844 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddwq, "__builtin_ia32_phaddwq", IX86_BUILTIN_PHADDWQ, 0, (int)MULTI_ARG_1_HI_DI }, 21845 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phadddq, "__builtin_ia32_phadddq", IX86_BUILTIN_PHADDDQ, 0, (int)MULTI_ARG_1_SI_DI }, 21846 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddubw, "__builtin_ia32_phaddubw", IX86_BUILTIN_PHADDUBW, 0, (int)MULTI_ARG_1_QI_HI }, 21847 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddubd, "__builtin_ia32_phaddubd", IX86_BUILTIN_PHADDUBD, 0, (int)MULTI_ARG_1_QI_SI }, 21848 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddubq, "__builtin_ia32_phaddubq", IX86_BUILTIN_PHADDUBQ, 0, (int)MULTI_ARG_1_QI_DI }, 21849 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phadduwd, "__builtin_ia32_phadduwd", IX86_BUILTIN_PHADDUWD, 0, (int)MULTI_ARG_1_HI_SI }, 21850 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phadduwq, "__builtin_ia32_phadduwq", IX86_BUILTIN_PHADDUWQ, 0, (int)MULTI_ARG_1_HI_DI }, 21851 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddudq, "__builtin_ia32_phaddudq", IX86_BUILTIN_PHADDUDQ, 0, (int)MULTI_ARG_1_SI_DI }, 21852 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phsubbw, "__builtin_ia32_phsubbw", IX86_BUILTIN_PHSUBBW, 0, (int)MULTI_ARG_1_QI_HI }, 21853 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phsubwd, "__builtin_ia32_phsubwd", IX86_BUILTIN_PHSUBWD, 0, (int)MULTI_ARG_1_HI_SI }, 21854 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phsubdq, "__builtin_ia32_phsubdq", IX86_BUILTIN_PHSUBDQ, 0, (int)MULTI_ARG_1_SI_DI }, 21855 21856 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comeqss", IX86_BUILTIN_COMEQSS, EQ, (int)MULTI_ARG_2_SF_CMP }, 21857 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comness", IX86_BUILTIN_COMNESS, NE, (int)MULTI_ARG_2_SF_CMP }, 21858 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comneqss", IX86_BUILTIN_COMNESS, NE, (int)MULTI_ARG_2_SF_CMP }, 21859 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comltss", IX86_BUILTIN_COMLTSS, LT, (int)MULTI_ARG_2_SF_CMP }, 21860 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comless", IX86_BUILTIN_COMLESS, LE, (int)MULTI_ARG_2_SF_CMP }, 21861 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comgtss", IX86_BUILTIN_COMGTSS, GT, (int)MULTI_ARG_2_SF_CMP }, 21862 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comgess", IX86_BUILTIN_COMGESS, GE, (int)MULTI_ARG_2_SF_CMP }, 21863 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comueqss", IX86_BUILTIN_COMUEQSS, UNEQ, (int)MULTI_ARG_2_SF_CMP }, 21864 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comuness", IX86_BUILTIN_COMUNESS, LTGT, (int)MULTI_ARG_2_SF_CMP }, 21865 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comuneqss", IX86_BUILTIN_COMUNESS, LTGT, (int)MULTI_ARG_2_SF_CMP }, 21866 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comunltss", IX86_BUILTIN_COMULTSS, UNLT, (int)MULTI_ARG_2_SF_CMP }, 21867 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comunless", IX86_BUILTIN_COMULESS, UNLE, (int)MULTI_ARG_2_SF_CMP }, 21868 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comungtss", IX86_BUILTIN_COMUGTSS, UNGT, (int)MULTI_ARG_2_SF_CMP }, 21869 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comungess", IX86_BUILTIN_COMUGESS, UNGE, (int)MULTI_ARG_2_SF_CMP }, 21870 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comordss", IX86_BUILTIN_COMORDSS, ORDERED, (int)MULTI_ARG_2_SF_CMP }, 21871 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comunordss", IX86_BUILTIN_COMUNORDSS, UNORDERED, (int)MULTI_ARG_2_SF_CMP }, 21872 21873 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comeqsd", IX86_BUILTIN_COMEQSD, EQ, (int)MULTI_ARG_2_DF_CMP }, 21874 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comnesd", IX86_BUILTIN_COMNESD, NE, (int)MULTI_ARG_2_DF_CMP }, 21875 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comneqsd", IX86_BUILTIN_COMNESD, NE, (int)MULTI_ARG_2_DF_CMP }, 21876 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comltsd", IX86_BUILTIN_COMLTSD, LT, (int)MULTI_ARG_2_DF_CMP }, 21877 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comlesd", IX86_BUILTIN_COMLESD, LE, (int)MULTI_ARG_2_DF_CMP }, 21878 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comgtsd", IX86_BUILTIN_COMGTSD, GT, (int)MULTI_ARG_2_DF_CMP }, 21879 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comgesd", IX86_BUILTIN_COMGESD, GE, (int)MULTI_ARG_2_DF_CMP }, 21880 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comueqsd", IX86_BUILTIN_COMUEQSD, UNEQ, (int)MULTI_ARG_2_DF_CMP }, 21881 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comunesd", IX86_BUILTIN_COMUNESD, LTGT, (int)MULTI_ARG_2_DF_CMP }, 21882 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comuneqsd", IX86_BUILTIN_COMUNESD, LTGT, (int)MULTI_ARG_2_DF_CMP }, 21883 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comunltsd", IX86_BUILTIN_COMULTSD, UNLT, (int)MULTI_ARG_2_DF_CMP }, 21884 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comunlesd", IX86_BUILTIN_COMULESD, UNLE, (int)MULTI_ARG_2_DF_CMP }, 21885 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comungtsd", IX86_BUILTIN_COMUGTSD, UNGT, (int)MULTI_ARG_2_DF_CMP }, 21886 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comungesd", IX86_BUILTIN_COMUGESD, UNGE, (int)MULTI_ARG_2_DF_CMP }, 21887 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comordsd", IX86_BUILTIN_COMORDSD, ORDERED, (int)MULTI_ARG_2_DF_CMP }, 21888 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comunordsd", IX86_BUILTIN_COMUNORDSD, UNORDERED, (int)MULTI_ARG_2_DF_CMP }, 21889 21890 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comeqps", IX86_BUILTIN_COMEQPS, EQ, (int)MULTI_ARG_2_SF_CMP }, 21891 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comneps", IX86_BUILTIN_COMNEPS, NE, (int)MULTI_ARG_2_SF_CMP }, 21892 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comneqps", IX86_BUILTIN_COMNEPS, NE, (int)MULTI_ARG_2_SF_CMP }, 21893 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comltps", IX86_BUILTIN_COMLTPS, LT, (int)MULTI_ARG_2_SF_CMP }, 21894 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comleps", IX86_BUILTIN_COMLEPS, LE, (int)MULTI_ARG_2_SF_CMP }, 21895 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comgtps", IX86_BUILTIN_COMGTPS, GT, (int)MULTI_ARG_2_SF_CMP }, 21896 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comgeps", IX86_BUILTIN_COMGEPS, GE, (int)MULTI_ARG_2_SF_CMP }, 21897 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comueqps", IX86_BUILTIN_COMUEQPS, UNEQ, (int)MULTI_ARG_2_SF_CMP }, 21898 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comuneps", IX86_BUILTIN_COMUNEPS, LTGT, (int)MULTI_ARG_2_SF_CMP }, 21899 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comuneqps", IX86_BUILTIN_COMUNEPS, LTGT, (int)MULTI_ARG_2_SF_CMP }, 21900 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comunltps", IX86_BUILTIN_COMULTPS, UNLT, (int)MULTI_ARG_2_SF_CMP }, 21901 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comunleps", IX86_BUILTIN_COMULEPS, UNLE, (int)MULTI_ARG_2_SF_CMP }, 21902 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comungtps", IX86_BUILTIN_COMUGTPS, UNGT, (int)MULTI_ARG_2_SF_CMP }, 21903 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comungeps", IX86_BUILTIN_COMUGEPS, UNGE, (int)MULTI_ARG_2_SF_CMP }, 21904 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comordps", IX86_BUILTIN_COMORDPS, ORDERED, (int)MULTI_ARG_2_SF_CMP }, 21905 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comunordps", IX86_BUILTIN_COMUNORDPS, UNORDERED, (int)MULTI_ARG_2_SF_CMP }, 21906 21907 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comeqpd", IX86_BUILTIN_COMEQPD, EQ, (int)MULTI_ARG_2_DF_CMP }, 21908 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comnepd", IX86_BUILTIN_COMNEPD, NE, (int)MULTI_ARG_2_DF_CMP }, 21909 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comneqpd", IX86_BUILTIN_COMNEPD, NE, (int)MULTI_ARG_2_DF_CMP }, 21910 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comltpd", IX86_BUILTIN_COMLTPD, LT, (int)MULTI_ARG_2_DF_CMP }, 21911 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comlepd", IX86_BUILTIN_COMLEPD, LE, (int)MULTI_ARG_2_DF_CMP }, 21912 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comgtpd", IX86_BUILTIN_COMGTPD, GT, (int)MULTI_ARG_2_DF_CMP }, 21913 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comgepd", IX86_BUILTIN_COMGEPD, GE, (int)MULTI_ARG_2_DF_CMP }, 21914 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comueqpd", IX86_BUILTIN_COMUEQPD, UNEQ, (int)MULTI_ARG_2_DF_CMP }, 21915 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comunepd", IX86_BUILTIN_COMUNEPD, LTGT, (int)MULTI_ARG_2_DF_CMP }, 21916 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comuneqpd", IX86_BUILTIN_COMUNEPD, LTGT, (int)MULTI_ARG_2_DF_CMP }, 21917 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comunltpd", IX86_BUILTIN_COMULTPD, UNLT, (int)MULTI_ARG_2_DF_CMP }, 21918 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comunlepd", IX86_BUILTIN_COMULEPD, UNLE, (int)MULTI_ARG_2_DF_CMP }, 21919 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comungtpd", IX86_BUILTIN_COMUGTPD, UNGT, (int)MULTI_ARG_2_DF_CMP }, 21920 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comungepd", IX86_BUILTIN_COMUGEPD, UNGE, (int)MULTI_ARG_2_DF_CMP }, 21921 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comordpd", IX86_BUILTIN_COMORDPD, ORDERED, (int)MULTI_ARG_2_DF_CMP }, 21922 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comunordpd", IX86_BUILTIN_COMUNORDPD, UNORDERED, (int)MULTI_ARG_2_DF_CMP }, 21923 21924 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomeqb", IX86_BUILTIN_PCOMEQB, EQ, (int)MULTI_ARG_2_QI_CMP }, 21925 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomneb", IX86_BUILTIN_PCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP }, 21926 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomneqb", IX86_BUILTIN_PCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP }, 21927 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomltb", IX86_BUILTIN_PCOMLTB, LT, (int)MULTI_ARG_2_QI_CMP }, 21928 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomleb", IX86_BUILTIN_PCOMLEB, LE, (int)MULTI_ARG_2_QI_CMP }, 21929 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomgtb", IX86_BUILTIN_PCOMGTB, GT, (int)MULTI_ARG_2_QI_CMP }, 21930 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomgeb", IX86_BUILTIN_PCOMGEB, GE, (int)MULTI_ARG_2_QI_CMP }, 21931 21932 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomeqw", IX86_BUILTIN_PCOMEQW, EQ, (int)MULTI_ARG_2_HI_CMP }, 21933 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomnew", IX86_BUILTIN_PCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP }, 21934 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomneqw", IX86_BUILTIN_PCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP }, 21935 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomltw", IX86_BUILTIN_PCOMLTW, LT, (int)MULTI_ARG_2_HI_CMP }, 21936 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomlew", IX86_BUILTIN_PCOMLEW, LE, (int)MULTI_ARG_2_HI_CMP }, 21937 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomgtw", IX86_BUILTIN_PCOMGTW, GT, (int)MULTI_ARG_2_HI_CMP }, 21938 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomgew", IX86_BUILTIN_PCOMGEW, GE, (int)MULTI_ARG_2_HI_CMP }, 21939 21940 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomeqd", IX86_BUILTIN_PCOMEQD, EQ, (int)MULTI_ARG_2_SI_CMP }, 21941 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomned", IX86_BUILTIN_PCOMNED, NE, (int)MULTI_ARG_2_SI_CMP }, 21942 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomneqd", IX86_BUILTIN_PCOMNED, NE, (int)MULTI_ARG_2_SI_CMP }, 21943 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomltd", IX86_BUILTIN_PCOMLTD, LT, (int)MULTI_ARG_2_SI_CMP }, 21944 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomled", IX86_BUILTIN_PCOMLED, LE, (int)MULTI_ARG_2_SI_CMP }, 21945 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomgtd", IX86_BUILTIN_PCOMGTD, GT, (int)MULTI_ARG_2_SI_CMP }, 21946 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomged", IX86_BUILTIN_PCOMGED, GE, (int)MULTI_ARG_2_SI_CMP }, 21947 21948 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomeqq", IX86_BUILTIN_PCOMEQQ, EQ, (int)MULTI_ARG_2_DI_CMP }, 21949 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomneq", IX86_BUILTIN_PCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP }, 21950 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomneqq", IX86_BUILTIN_PCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP }, 21951 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomltq", IX86_BUILTIN_PCOMLTQ, LT, (int)MULTI_ARG_2_DI_CMP }, 21952 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomleq", IX86_BUILTIN_PCOMLEQ, LE, (int)MULTI_ARG_2_DI_CMP }, 21953 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomgtq", IX86_BUILTIN_PCOMGTQ, GT, (int)MULTI_ARG_2_DI_CMP }, 21954 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomgeq", IX86_BUILTIN_PCOMGEQ, GE, (int)MULTI_ARG_2_DI_CMP }, 21955 21956 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v16qi3,"__builtin_ia32_pcomequb", IX86_BUILTIN_PCOMEQUB, EQ, (int)MULTI_ARG_2_QI_CMP }, 21957 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v16qi3,"__builtin_ia32_pcomneub", IX86_BUILTIN_PCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP }, 21958 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v16qi3,"__builtin_ia32_pcomnequb", IX86_BUILTIN_PCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP }, 21959 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv16qi3, "__builtin_ia32_pcomltub", IX86_BUILTIN_PCOMLTUB, LTU, (int)MULTI_ARG_2_QI_CMP }, 21960 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv16qi3, "__builtin_ia32_pcomleub", IX86_BUILTIN_PCOMLEUB, LEU, (int)MULTI_ARG_2_QI_CMP }, 21961 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv16qi3, "__builtin_ia32_pcomgtub", IX86_BUILTIN_PCOMGTUB, GTU, (int)MULTI_ARG_2_QI_CMP }, 21962 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv16qi3, "__builtin_ia32_pcomgeub", IX86_BUILTIN_PCOMGEUB, GEU, (int)MULTI_ARG_2_QI_CMP }, 21963 21964 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v8hi3, "__builtin_ia32_pcomequw", IX86_BUILTIN_PCOMEQUW, EQ, (int)MULTI_ARG_2_HI_CMP }, 21965 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v8hi3, "__builtin_ia32_pcomneuw", IX86_BUILTIN_PCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP }, 21966 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v8hi3, "__builtin_ia32_pcomnequw", IX86_BUILTIN_PCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP }, 21967 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv8hi3, "__builtin_ia32_pcomltuw", IX86_BUILTIN_PCOMLTUW, LTU, (int)MULTI_ARG_2_HI_CMP }, 21968 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv8hi3, "__builtin_ia32_pcomleuw", IX86_BUILTIN_PCOMLEUW, LEU, (int)MULTI_ARG_2_HI_CMP }, 21969 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv8hi3, "__builtin_ia32_pcomgtuw", IX86_BUILTIN_PCOMGTUW, GTU, (int)MULTI_ARG_2_HI_CMP }, 21970 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv8hi3, "__builtin_ia32_pcomgeuw", IX86_BUILTIN_PCOMGEUW, GEU, (int)MULTI_ARG_2_HI_CMP }, 21971 21972 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v4si3, "__builtin_ia32_pcomequd", IX86_BUILTIN_PCOMEQUD, EQ, (int)MULTI_ARG_2_SI_CMP }, 21973 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v4si3, "__builtin_ia32_pcomneud", IX86_BUILTIN_PCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP }, 21974 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v4si3, "__builtin_ia32_pcomnequd", IX86_BUILTIN_PCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP }, 21975 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv4si3, "__builtin_ia32_pcomltud", IX86_BUILTIN_PCOMLTUD, LTU, (int)MULTI_ARG_2_SI_CMP }, 21976 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv4si3, "__builtin_ia32_pcomleud", IX86_BUILTIN_PCOMLEUD, LEU, (int)MULTI_ARG_2_SI_CMP }, 21977 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv4si3, "__builtin_ia32_pcomgtud", IX86_BUILTIN_PCOMGTUD, GTU, (int)MULTI_ARG_2_SI_CMP }, 21978 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv4si3, "__builtin_ia32_pcomgeud", IX86_BUILTIN_PCOMGEUD, GEU, (int)MULTI_ARG_2_SI_CMP }, 21979 21980 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v2di3, "__builtin_ia32_pcomequq", IX86_BUILTIN_PCOMEQUQ, EQ, (int)MULTI_ARG_2_DI_CMP }, 21981 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v2di3, "__builtin_ia32_pcomneuq", IX86_BUILTIN_PCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP }, 21982 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v2di3, "__builtin_ia32_pcomnequq", IX86_BUILTIN_PCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP }, 21983 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv2di3, "__builtin_ia32_pcomltuq", IX86_BUILTIN_PCOMLTUQ, LTU, (int)MULTI_ARG_2_DI_CMP }, 21984 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv2di3, "__builtin_ia32_pcomleuq", IX86_BUILTIN_PCOMLEUQ, LEU, (int)MULTI_ARG_2_DI_CMP }, 21985 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv2di3, "__builtin_ia32_pcomgtuq", IX86_BUILTIN_PCOMGTUQ, GTU, (int)MULTI_ARG_2_DI_CMP }, 21986 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv2di3, "__builtin_ia32_pcomgeuq", IX86_BUILTIN_PCOMGEUQ, GEU, (int)MULTI_ARG_2_DI_CMP }, 21987 21988 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv4sf3, "__builtin_ia32_comfalsess", IX86_BUILTIN_COMFALSESS, COM_FALSE_S, (int)MULTI_ARG_2_SF_TF }, 21989 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv4sf3, "__builtin_ia32_comtruess", IX86_BUILTIN_COMTRUESS, COM_TRUE_S, (int)MULTI_ARG_2_SF_TF }, 21990 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv4sf3, "__builtin_ia32_comfalseps", IX86_BUILTIN_COMFALSEPS, COM_FALSE_P, (int)MULTI_ARG_2_SF_TF }, 21991 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv4sf3, "__builtin_ia32_comtrueps", IX86_BUILTIN_COMTRUEPS, COM_TRUE_P, (int)MULTI_ARG_2_SF_TF }, 21992 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv2df3, "__builtin_ia32_comfalsesd", IX86_BUILTIN_COMFALSESD, COM_FALSE_S, (int)MULTI_ARG_2_DF_TF }, 21993 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv2df3, "__builtin_ia32_comtruesd", IX86_BUILTIN_COMTRUESD, COM_TRUE_S, (int)MULTI_ARG_2_DF_TF }, 21994 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv2df3, "__builtin_ia32_comfalsepd", IX86_BUILTIN_COMFALSEPD, COM_FALSE_P, (int)MULTI_ARG_2_DF_TF }, 21995 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv2df3, "__builtin_ia32_comtruepd", IX86_BUILTIN_COMTRUEPD, COM_TRUE_P, (int)MULTI_ARG_2_DF_TF }, 21996 21997 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv16qi3, "__builtin_ia32_pcomfalseb", IX86_BUILTIN_PCOMFALSEB, PCOM_FALSE, (int)MULTI_ARG_2_QI_TF }, 21998 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv8hi3, "__builtin_ia32_pcomfalsew", IX86_BUILTIN_PCOMFALSEW, PCOM_FALSE, (int)MULTI_ARG_2_HI_TF }, 21999 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv4si3, "__builtin_ia32_pcomfalsed", IX86_BUILTIN_PCOMFALSED, PCOM_FALSE, (int)MULTI_ARG_2_SI_TF }, 22000 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv2di3, "__builtin_ia32_pcomfalseq", IX86_BUILTIN_PCOMFALSEQ, PCOM_FALSE, (int)MULTI_ARG_2_DI_TF }, 22001 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv16qi3, "__builtin_ia32_pcomfalseub",IX86_BUILTIN_PCOMFALSEUB,PCOM_FALSE, (int)MULTI_ARG_2_QI_TF }, 22002 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv8hi3, "__builtin_ia32_pcomfalseuw",IX86_BUILTIN_PCOMFALSEUW,PCOM_FALSE, (int)MULTI_ARG_2_HI_TF }, 22003 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv4si3, "__builtin_ia32_pcomfalseud",IX86_BUILTIN_PCOMFALSEUD,PCOM_FALSE, (int)MULTI_ARG_2_SI_TF }, 22004 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv2di3, "__builtin_ia32_pcomfalseuq",IX86_BUILTIN_PCOMFALSEUQ,PCOM_FALSE, (int)MULTI_ARG_2_DI_TF }, 22005 22006 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv16qi3, "__builtin_ia32_pcomtrueb", IX86_BUILTIN_PCOMTRUEB, PCOM_TRUE, (int)MULTI_ARG_2_QI_TF }, 22007 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv8hi3, "__builtin_ia32_pcomtruew", IX86_BUILTIN_PCOMTRUEW, PCOM_TRUE, (int)MULTI_ARG_2_HI_TF }, 22008 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv4si3, "__builtin_ia32_pcomtrued", IX86_BUILTIN_PCOMTRUED, PCOM_TRUE, (int)MULTI_ARG_2_SI_TF }, 22009 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv2di3, "__builtin_ia32_pcomtrueq", IX86_BUILTIN_PCOMTRUEQ, PCOM_TRUE, (int)MULTI_ARG_2_DI_TF }, 22010 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv16qi3, "__builtin_ia32_pcomtrueub", IX86_BUILTIN_PCOMTRUEUB, PCOM_TRUE, (int)MULTI_ARG_2_QI_TF }, 22011 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv8hi3, "__builtin_ia32_pcomtrueuw", IX86_BUILTIN_PCOMTRUEUW, PCOM_TRUE, (int)MULTI_ARG_2_HI_TF }, 22012 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv4si3, "__builtin_ia32_pcomtrueud", IX86_BUILTIN_PCOMTRUEUD, PCOM_TRUE, (int)MULTI_ARG_2_SI_TF }, 22013 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv2di3, "__builtin_ia32_pcomtrueuq", IX86_BUILTIN_PCOMTRUEUQ, PCOM_TRUE, (int)MULTI_ARG_2_DI_TF }, 22014 }; 22015 22016 /* Set up all the MMX/SSE builtins, even builtins for instructions that are not 22017 in the current target ISA to allow the user to compile particular modules 22018 with different target specific options that differ from the command line 22019 options. */ 22020 static void 22021 ix86_init_mmx_sse_builtins (void) 22022 { 22023 const struct builtin_description * d; 22024 size_t i; 22025 22026 tree V16QI_type_node = build_vector_type_for_mode (char_type_node, V16QImode); 22027 tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode); 22028 tree V1DI_type_node 22029 = build_vector_type_for_mode (long_long_integer_type_node, V1DImode); 22030 tree V2SF_type_node = build_vector_type_for_mode (float_type_node, V2SFmode); 22031 tree V2DI_type_node 22032 = build_vector_type_for_mode (long_long_integer_type_node, V2DImode); 22033 tree V2DF_type_node = build_vector_type_for_mode (double_type_node, V2DFmode); 22034 tree V4SF_type_node = build_vector_type_for_mode (float_type_node, V4SFmode); 22035 tree V4SI_type_node = build_vector_type_for_mode (intSI_type_node, V4SImode); 22036 tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode); 22037 tree V8QI_type_node = build_vector_type_for_mode (char_type_node, V8QImode); 22038 tree V8HI_type_node = build_vector_type_for_mode (intHI_type_node, V8HImode); 22039 22040 tree pchar_type_node = build_pointer_type (char_type_node); 22041 tree pcchar_type_node 22042 = build_pointer_type (build_type_variant (char_type_node, 1, 0)); 22043 tree pfloat_type_node = build_pointer_type (float_type_node); 22044 tree pcfloat_type_node 22045 = build_pointer_type (build_type_variant (float_type_node, 1, 0)); 22046 tree pv2sf_type_node = build_pointer_type (V2SF_type_node); 22047 tree pcv2sf_type_node 22048 = build_pointer_type (build_type_variant (V2SF_type_node, 1, 0)); 22049 tree pv2di_type_node = build_pointer_type (V2DI_type_node); 22050 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node); 22051 22052 /* Comparisons. */ 22053 tree int_ftype_v4sf_v4sf 22054 = build_function_type_list (integer_type_node, 22055 V4SF_type_node, V4SF_type_node, NULL_TREE); 22056 tree v4si_ftype_v4sf_v4sf 22057 = build_function_type_list (V4SI_type_node, 22058 V4SF_type_node, V4SF_type_node, NULL_TREE); 22059 /* MMX/SSE/integer conversions. */ 22060 tree int_ftype_v4sf 22061 = build_function_type_list (integer_type_node, 22062 V4SF_type_node, NULL_TREE); 22063 tree int64_ftype_v4sf 22064 = build_function_type_list (long_long_integer_type_node, 22065 V4SF_type_node, NULL_TREE); 22066 tree int_ftype_v8qi 22067 = build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE); 22068 tree v4sf_ftype_v4sf_int 22069 = build_function_type_list (V4SF_type_node, 22070 V4SF_type_node, integer_type_node, NULL_TREE); 22071 tree v4sf_ftype_v4sf_int64 22072 = build_function_type_list (V4SF_type_node, 22073 V4SF_type_node, long_long_integer_type_node, 22074 NULL_TREE); 22075 tree v4sf_ftype_v4sf_v2si 22076 = build_function_type_list (V4SF_type_node, 22077 V4SF_type_node, V2SI_type_node, NULL_TREE); 22078 22079 /* Miscellaneous. */ 22080 tree v8qi_ftype_v4hi_v4hi 22081 = build_function_type_list (V8QI_type_node, 22082 V4HI_type_node, V4HI_type_node, NULL_TREE); 22083 tree v4hi_ftype_v2si_v2si 22084 = build_function_type_list (V4HI_type_node, 22085 V2SI_type_node, V2SI_type_node, NULL_TREE); 22086 tree v4sf_ftype_v4sf_v4sf_int 22087 = build_function_type_list (V4SF_type_node, 22088 V4SF_type_node, V4SF_type_node, 22089 integer_type_node, NULL_TREE); 22090 tree v2si_ftype_v4hi_v4hi 22091 = build_function_type_list (V2SI_type_node, 22092 V4HI_type_node, V4HI_type_node, NULL_TREE); 22093 tree v4hi_ftype_v4hi_int 22094 = build_function_type_list (V4HI_type_node, 22095 V4HI_type_node, integer_type_node, NULL_TREE); 22096 tree v2si_ftype_v2si_int 22097 = build_function_type_list (V2SI_type_node, 22098 V2SI_type_node, integer_type_node, NULL_TREE); 22099 tree v1di_ftype_v1di_int 22100 = build_function_type_list (V1DI_type_node, 22101 V1DI_type_node, integer_type_node, NULL_TREE); 22102 22103 tree void_ftype_void 22104 = build_function_type (void_type_node, void_list_node); 22105 tree void_ftype_unsigned 22106 = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE); 22107 tree void_ftype_unsigned_unsigned 22108 = build_function_type_list (void_type_node, unsigned_type_node, 22109 unsigned_type_node, NULL_TREE); 22110 tree void_ftype_pcvoid_unsigned_unsigned 22111 = build_function_type_list (void_type_node, const_ptr_type_node, 22112 unsigned_type_node, unsigned_type_node, 22113 NULL_TREE); 22114 tree unsigned_ftype_void 22115 = build_function_type (unsigned_type_node, void_list_node); 22116 tree v2si_ftype_v4sf 22117 = build_function_type_list (V2SI_type_node, V4SF_type_node, NULL_TREE); 22118 /* Loads/stores. */ 22119 tree void_ftype_v8qi_v8qi_pchar 22120 = build_function_type_list (void_type_node, 22121 V8QI_type_node, V8QI_type_node, 22122 pchar_type_node, NULL_TREE); 22123 tree v4sf_ftype_pcfloat 22124 = build_function_type_list (V4SF_type_node, pcfloat_type_node, NULL_TREE); 22125 tree v4sf_ftype_v4sf_pcv2sf 22126 = build_function_type_list (V4SF_type_node, 22127 V4SF_type_node, pcv2sf_type_node, NULL_TREE); 22128 tree void_ftype_pv2sf_v4sf 22129 = build_function_type_list (void_type_node, 22130 pv2sf_type_node, V4SF_type_node, NULL_TREE); 22131 tree void_ftype_pfloat_v4sf 22132 = build_function_type_list (void_type_node, 22133 pfloat_type_node, V4SF_type_node, NULL_TREE); 22134 tree void_ftype_pdi_di 22135 = build_function_type_list (void_type_node, 22136 pdi_type_node, long_long_unsigned_type_node, 22137 NULL_TREE); 22138 tree void_ftype_pv2di_v2di 22139 = build_function_type_list (void_type_node, 22140 pv2di_type_node, V2DI_type_node, NULL_TREE); 22141 /* Normal vector unops. */ 22142 tree v4sf_ftype_v4sf 22143 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE); 22144 tree v16qi_ftype_v16qi 22145 = build_function_type_list (V16QI_type_node, V16QI_type_node, NULL_TREE); 22146 tree v8hi_ftype_v8hi 22147 = build_function_type_list (V8HI_type_node, V8HI_type_node, NULL_TREE); 22148 tree v4si_ftype_v4si 22149 = build_function_type_list (V4SI_type_node, V4SI_type_node, NULL_TREE); 22150 tree v8qi_ftype_v8qi 22151 = build_function_type_list (V8QI_type_node, V8QI_type_node, NULL_TREE); 22152 tree v4hi_ftype_v4hi 22153 = build_function_type_list (V4HI_type_node, V4HI_type_node, NULL_TREE); 22154 22155 /* Normal vector binops. */ 22156 tree v4sf_ftype_v4sf_v4sf 22157 = build_function_type_list (V4SF_type_node, 22158 V4SF_type_node, V4SF_type_node, NULL_TREE); 22159 tree v8qi_ftype_v8qi_v8qi 22160 = build_function_type_list (V8QI_type_node, 22161 V8QI_type_node, V8QI_type_node, NULL_TREE); 22162 tree v4hi_ftype_v4hi_v4hi 22163 = build_function_type_list (V4HI_type_node, 22164 V4HI_type_node, V4HI_type_node, NULL_TREE); 22165 tree v2si_ftype_v2si_v2si 22166 = build_function_type_list (V2SI_type_node, 22167 V2SI_type_node, V2SI_type_node, NULL_TREE); 22168 tree v1di_ftype_v1di_v1di 22169 = build_function_type_list (V1DI_type_node, 22170 V1DI_type_node, V1DI_type_node, NULL_TREE); 22171 tree v1di_ftype_v1di_v1di_int 22172 = build_function_type_list (V1DI_type_node, 22173 V1DI_type_node, V1DI_type_node, 22174 integer_type_node, NULL_TREE); 22175 tree v2si_ftype_v2sf 22176 = build_function_type_list (V2SI_type_node, V2SF_type_node, NULL_TREE); 22177 tree v2sf_ftype_v2si 22178 = build_function_type_list (V2SF_type_node, V2SI_type_node, NULL_TREE); 22179 tree v2si_ftype_v2si 22180 = build_function_type_list (V2SI_type_node, V2SI_type_node, NULL_TREE); 22181 tree v2sf_ftype_v2sf 22182 = build_function_type_list (V2SF_type_node, V2SF_type_node, NULL_TREE); 22183 tree v2sf_ftype_v2sf_v2sf 22184 = build_function_type_list (V2SF_type_node, 22185 V2SF_type_node, V2SF_type_node, NULL_TREE); 22186 tree v2si_ftype_v2sf_v2sf 22187 = build_function_type_list (V2SI_type_node, 22188 V2SF_type_node, V2SF_type_node, NULL_TREE); 22189 tree pint_type_node = build_pointer_type (integer_type_node); 22190 tree pdouble_type_node = build_pointer_type (double_type_node); 22191 tree pcdouble_type_node = build_pointer_type ( 22192 build_type_variant (double_type_node, 1, 0)); 22193 tree int_ftype_v2df_v2df 22194 = build_function_type_list (integer_type_node, 22195 V2DF_type_node, V2DF_type_node, NULL_TREE); 22196 22197 tree void_ftype_pcvoid 22198 = build_function_type_list (void_type_node, const_ptr_type_node, NULL_TREE); 22199 tree v4sf_ftype_v4si 22200 = build_function_type_list (V4SF_type_node, V4SI_type_node, NULL_TREE); 22201 tree v4si_ftype_v4sf 22202 = build_function_type_list (V4SI_type_node, V4SF_type_node, NULL_TREE); 22203 tree v2df_ftype_v4si 22204 = build_function_type_list (V2DF_type_node, V4SI_type_node, NULL_TREE); 22205 tree v4si_ftype_v2df 22206 = build_function_type_list (V4SI_type_node, V2DF_type_node, NULL_TREE); 22207 tree v4si_ftype_v2df_v2df 22208 = build_function_type_list (V4SI_type_node, 22209 V2DF_type_node, V2DF_type_node, NULL_TREE); 22210 tree v2si_ftype_v2df 22211 = build_function_type_list (V2SI_type_node, V2DF_type_node, NULL_TREE); 22212 tree v4sf_ftype_v2df 22213 = build_function_type_list (V4SF_type_node, V2DF_type_node, NULL_TREE); 22214 tree v2df_ftype_v2si 22215 = build_function_type_list (V2DF_type_node, V2SI_type_node, NULL_TREE); 22216 tree v2df_ftype_v4sf 22217 = build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE); 22218 tree int_ftype_v2df 22219 = build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE); 22220 tree int64_ftype_v2df 22221 = build_function_type_list (long_long_integer_type_node, 22222 V2DF_type_node, NULL_TREE); 22223 tree v2df_ftype_v2df_int 22224 = build_function_type_list (V2DF_type_node, 22225 V2DF_type_node, integer_type_node, NULL_TREE); 22226 tree v2df_ftype_v2df_int64 22227 = build_function_type_list (V2DF_type_node, 22228 V2DF_type_node, long_long_integer_type_node, 22229 NULL_TREE); 22230 tree v4sf_ftype_v4sf_v2df 22231 = build_function_type_list (V4SF_type_node, 22232 V4SF_type_node, V2DF_type_node, NULL_TREE); 22233 tree v2df_ftype_v2df_v4sf 22234 = build_function_type_list (V2DF_type_node, 22235 V2DF_type_node, V4SF_type_node, NULL_TREE); 22236 tree v2df_ftype_v2df_v2df_int 22237 = build_function_type_list (V2DF_type_node, 22238 V2DF_type_node, V2DF_type_node, 22239 integer_type_node, 22240 NULL_TREE); 22241 tree v2df_ftype_v2df_pcdouble 22242 = build_function_type_list (V2DF_type_node, 22243 V2DF_type_node, pcdouble_type_node, NULL_TREE); 22244 tree void_ftype_pdouble_v2df 22245 = build_function_type_list (void_type_node, 22246 pdouble_type_node, V2DF_type_node, NULL_TREE); 22247 tree void_ftype_pint_int 22248 = build_function_type_list (void_type_node, 22249 pint_type_node, integer_type_node, NULL_TREE); 22250 tree void_ftype_v16qi_v16qi_pchar 22251 = build_function_type_list (void_type_node, 22252 V16QI_type_node, V16QI_type_node, 22253 pchar_type_node, NULL_TREE); 22254 tree v2df_ftype_pcdouble 22255 = build_function_type_list (V2DF_type_node, pcdouble_type_node, NULL_TREE); 22256 tree v2df_ftype_v2df_v2df 22257 = build_function_type_list (V2DF_type_node, 22258 V2DF_type_node, V2DF_type_node, NULL_TREE); 22259 tree v16qi_ftype_v16qi_v16qi 22260 = build_function_type_list (V16QI_type_node, 22261 V16QI_type_node, V16QI_type_node, NULL_TREE); 22262 tree v8hi_ftype_v8hi_v8hi 22263 = build_function_type_list (V8HI_type_node, 22264 V8HI_type_node, V8HI_type_node, NULL_TREE); 22265 tree v4si_ftype_v4si_v4si 22266 = build_function_type_list (V4SI_type_node, 22267 V4SI_type_node, V4SI_type_node, NULL_TREE); 22268 tree v2di_ftype_v2di_v2di 22269 = build_function_type_list (V2DI_type_node, 22270 V2DI_type_node, V2DI_type_node, NULL_TREE); 22271 tree v2di_ftype_v2df_v2df 22272 = build_function_type_list (V2DI_type_node, 22273 V2DF_type_node, V2DF_type_node, NULL_TREE); 22274 tree v2df_ftype_v2df 22275 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE); 22276 tree v2di_ftype_v2di_int 22277 = build_function_type_list (V2DI_type_node, 22278 V2DI_type_node, integer_type_node, NULL_TREE); 22279 tree v2di_ftype_v2di_v2di_int 22280 = build_function_type_list (V2DI_type_node, V2DI_type_node, 22281 V2DI_type_node, integer_type_node, NULL_TREE); 22282 tree v4si_ftype_v4si_int 22283 = build_function_type_list (V4SI_type_node, 22284 V4SI_type_node, integer_type_node, NULL_TREE); 22285 tree v8hi_ftype_v8hi_int 22286 = build_function_type_list (V8HI_type_node, 22287 V8HI_type_node, integer_type_node, NULL_TREE); 22288 tree v4si_ftype_v8hi_v8hi 22289 = build_function_type_list (V4SI_type_node, 22290 V8HI_type_node, V8HI_type_node, NULL_TREE); 22291 tree v1di_ftype_v8qi_v8qi 22292 = build_function_type_list (V1DI_type_node, 22293 V8QI_type_node, V8QI_type_node, NULL_TREE); 22294 tree v1di_ftype_v2si_v2si 22295 = build_function_type_list (V1DI_type_node, 22296 V2SI_type_node, V2SI_type_node, NULL_TREE); 22297 tree v2di_ftype_v16qi_v16qi 22298 = build_function_type_list (V2DI_type_node, 22299 V16QI_type_node, V16QI_type_node, NULL_TREE); 22300 tree v2di_ftype_v4si_v4si 22301 = build_function_type_list (V2DI_type_node, 22302 V4SI_type_node, V4SI_type_node, NULL_TREE); 22303 tree int_ftype_v16qi 22304 = build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE); 22305 tree v16qi_ftype_pcchar 22306 = build_function_type_list (V16QI_type_node, pcchar_type_node, NULL_TREE); 22307 tree void_ftype_pchar_v16qi 22308 = build_function_type_list (void_type_node, 22309 pchar_type_node, V16QI_type_node, NULL_TREE); 22310 22311 tree v2di_ftype_v2di_unsigned_unsigned 22312 = build_function_type_list (V2DI_type_node, V2DI_type_node, 22313 unsigned_type_node, unsigned_type_node, 22314 NULL_TREE); 22315 tree v2di_ftype_v2di_v2di_unsigned_unsigned 22316 = build_function_type_list (V2DI_type_node, V2DI_type_node, V2DI_type_node, 22317 unsigned_type_node, unsigned_type_node, 22318 NULL_TREE); 22319 tree v2di_ftype_v2di_v16qi 22320 = build_function_type_list (V2DI_type_node, V2DI_type_node, V16QI_type_node, 22321 NULL_TREE); 22322 tree v2df_ftype_v2df_v2df_v2df 22323 = build_function_type_list (V2DF_type_node, 22324 V2DF_type_node, V2DF_type_node, 22325 V2DF_type_node, NULL_TREE); 22326 tree v4sf_ftype_v4sf_v4sf_v4sf 22327 = build_function_type_list (V4SF_type_node, 22328 V4SF_type_node, V4SF_type_node, 22329 V4SF_type_node, NULL_TREE); 22330 tree v8hi_ftype_v16qi 22331 = build_function_type_list (V8HI_type_node, V16QI_type_node, 22332 NULL_TREE); 22333 tree v4si_ftype_v16qi 22334 = build_function_type_list (V4SI_type_node, V16QI_type_node, 22335 NULL_TREE); 22336 tree v2di_ftype_v16qi 22337 = build_function_type_list (V2DI_type_node, V16QI_type_node, 22338 NULL_TREE); 22339 tree v4si_ftype_v8hi 22340 = build_function_type_list (V4SI_type_node, V8HI_type_node, 22341 NULL_TREE); 22342 tree v2di_ftype_v8hi 22343 = build_function_type_list (V2DI_type_node, V8HI_type_node, 22344 NULL_TREE); 22345 tree v2di_ftype_v4si 22346 = build_function_type_list (V2DI_type_node, V4SI_type_node, 22347 NULL_TREE); 22348 tree v2di_ftype_pv2di 22349 = build_function_type_list (V2DI_type_node, pv2di_type_node, 22350 NULL_TREE); 22351 tree v16qi_ftype_v16qi_v16qi_int 22352 = build_function_type_list (V16QI_type_node, V16QI_type_node, 22353 V16QI_type_node, integer_type_node, 22354 NULL_TREE); 22355 tree v16qi_ftype_v16qi_v16qi_v16qi 22356 = build_function_type_list (V16QI_type_node, V16QI_type_node, 22357 V16QI_type_node, V16QI_type_node, 22358 NULL_TREE); 22359 tree v8hi_ftype_v8hi_v8hi_int 22360 = build_function_type_list (V8HI_type_node, V8HI_type_node, 22361 V8HI_type_node, integer_type_node, 22362 NULL_TREE); 22363 tree v4si_ftype_v4si_v4si_int 22364 = build_function_type_list (V4SI_type_node, V4SI_type_node, 22365 V4SI_type_node, integer_type_node, 22366 NULL_TREE); 22367 tree int_ftype_v2di_v2di 22368 = build_function_type_list (integer_type_node, 22369 V2DI_type_node, V2DI_type_node, 22370 NULL_TREE); 22371 tree int_ftype_v16qi_int_v16qi_int_int 22372 = build_function_type_list (integer_type_node, 22373 V16QI_type_node, 22374 integer_type_node, 22375 V16QI_type_node, 22376 integer_type_node, 22377 integer_type_node, 22378 NULL_TREE); 22379 tree v16qi_ftype_v16qi_int_v16qi_int_int 22380 = build_function_type_list (V16QI_type_node, 22381 V16QI_type_node, 22382 integer_type_node, 22383 V16QI_type_node, 22384 integer_type_node, 22385 integer_type_node, 22386 NULL_TREE); 22387 tree int_ftype_v16qi_v16qi_int 22388 = build_function_type_list (integer_type_node, 22389 V16QI_type_node, 22390 V16QI_type_node, 22391 integer_type_node, 22392 NULL_TREE); 22393 22394 /* SSE5 instructions */ 22395 tree v2di_ftype_v2di_v2di_v2di 22396 = build_function_type_list (V2DI_type_node, 22397 V2DI_type_node, 22398 V2DI_type_node, 22399 V2DI_type_node, 22400 NULL_TREE); 22401 22402 tree v4si_ftype_v4si_v4si_v4si 22403 = build_function_type_list (V4SI_type_node, 22404 V4SI_type_node, 22405 V4SI_type_node, 22406 V4SI_type_node, 22407 NULL_TREE); 22408 22409 tree v4si_ftype_v4si_v4si_v2di 22410 = build_function_type_list (V4SI_type_node, 22411 V4SI_type_node, 22412 V4SI_type_node, 22413 V2DI_type_node, 22414 NULL_TREE); 22415 22416 tree v8hi_ftype_v8hi_v8hi_v8hi 22417 = build_function_type_list (V8HI_type_node, 22418 V8HI_type_node, 22419 V8HI_type_node, 22420 V8HI_type_node, 22421 NULL_TREE); 22422 22423 tree v8hi_ftype_v8hi_v8hi_v4si 22424 = build_function_type_list (V8HI_type_node, 22425 V8HI_type_node, 22426 V8HI_type_node, 22427 V4SI_type_node, 22428 NULL_TREE); 22429 22430 tree v2df_ftype_v2df_v2df_v16qi 22431 = build_function_type_list (V2DF_type_node, 22432 V2DF_type_node, 22433 V2DF_type_node, 22434 V16QI_type_node, 22435 NULL_TREE); 22436 22437 tree v4sf_ftype_v4sf_v4sf_v16qi 22438 = build_function_type_list (V4SF_type_node, 22439 V4SF_type_node, 22440 V4SF_type_node, 22441 V16QI_type_node, 22442 NULL_TREE); 22443 22444 tree v2di_ftype_v2di_si 22445 = build_function_type_list (V2DI_type_node, 22446 V2DI_type_node, 22447 integer_type_node, 22448 NULL_TREE); 22449 22450 tree v4si_ftype_v4si_si 22451 = build_function_type_list (V4SI_type_node, 22452 V4SI_type_node, 22453 integer_type_node, 22454 NULL_TREE); 22455 22456 tree v8hi_ftype_v8hi_si 22457 = build_function_type_list (V8HI_type_node, 22458 V8HI_type_node, 22459 integer_type_node, 22460 NULL_TREE); 22461 22462 tree v16qi_ftype_v16qi_si 22463 = build_function_type_list (V16QI_type_node, 22464 V16QI_type_node, 22465 integer_type_node, 22466 NULL_TREE); 22467 tree v4sf_ftype_v4hi 22468 = build_function_type_list (V4SF_type_node, 22469 V4HI_type_node, 22470 NULL_TREE); 22471 22472 tree v4hi_ftype_v4sf 22473 = build_function_type_list (V4HI_type_node, 22474 V4SF_type_node, 22475 NULL_TREE); 22476 22477 tree v2di_ftype_v2di 22478 = build_function_type_list (V2DI_type_node, V2DI_type_node, NULL_TREE); 22479 22480 tree v16qi_ftype_v8hi_v8hi 22481 = build_function_type_list (V16QI_type_node, 22482 V8HI_type_node, V8HI_type_node, 22483 NULL_TREE); 22484 tree v8hi_ftype_v4si_v4si 22485 = build_function_type_list (V8HI_type_node, 22486 V4SI_type_node, V4SI_type_node, 22487 NULL_TREE); 22488 tree v8hi_ftype_v16qi_v16qi 22489 = build_function_type_list (V8HI_type_node, 22490 V16QI_type_node, V16QI_type_node, 22491 NULL_TREE); 22492 tree v4hi_ftype_v8qi_v8qi 22493 = build_function_type_list (V4HI_type_node, 22494 V8QI_type_node, V8QI_type_node, 22495 NULL_TREE); 22496 tree unsigned_ftype_unsigned_uchar 22497 = build_function_type_list (unsigned_type_node, 22498 unsigned_type_node, 22499 unsigned_char_type_node, 22500 NULL_TREE); 22501 tree unsigned_ftype_unsigned_ushort 22502 = build_function_type_list (unsigned_type_node, 22503 unsigned_type_node, 22504 short_unsigned_type_node, 22505 NULL_TREE); 22506 tree unsigned_ftype_unsigned_unsigned 22507 = build_function_type_list (unsigned_type_node, 22508 unsigned_type_node, 22509 unsigned_type_node, 22510 NULL_TREE); 22511 tree uint64_ftype_uint64_uint64 22512 = build_function_type_list (long_long_unsigned_type_node, 22513 long_long_unsigned_type_node, 22514 long_long_unsigned_type_node, 22515 NULL_TREE); 22516 tree float_ftype_float 22517 = build_function_type_list (float_type_node, 22518 float_type_node, 22519 NULL_TREE); 22520 22521 /* AVX builtins */ 22522 tree V32QI_type_node = build_vector_type_for_mode (char_type_node, 22523 V32QImode); 22524 tree V8SI_type_node = build_vector_type_for_mode (intSI_type_node, 22525 V8SImode); 22526 tree V8SF_type_node = build_vector_type_for_mode (float_type_node, 22527 V8SFmode); 22528 tree V4DI_type_node = build_vector_type_for_mode (long_long_integer_type_node, 22529 V4DImode); 22530 tree V4DF_type_node = build_vector_type_for_mode (double_type_node, 22531 V4DFmode); 22532 tree v8sf_ftype_v8sf 22533 = build_function_type_list (V8SF_type_node, 22534 V8SF_type_node, 22535 NULL_TREE); 22536 tree v8si_ftype_v8sf 22537 = build_function_type_list (V8SI_type_node, 22538 V8SF_type_node, 22539 NULL_TREE); 22540 tree v8sf_ftype_v8si 22541 = build_function_type_list (V8SF_type_node, 22542 V8SI_type_node, 22543 NULL_TREE); 22544 tree v4si_ftype_v4df 22545 = build_function_type_list (V4SI_type_node, 22546 V4DF_type_node, 22547 NULL_TREE); 22548 tree v4df_ftype_v4df 22549 = build_function_type_list (V4DF_type_node, 22550 V4DF_type_node, 22551 NULL_TREE); 22552 tree v4df_ftype_v4si 22553 = build_function_type_list (V4DF_type_node, 22554 V4SI_type_node, 22555 NULL_TREE); 22556 tree v4df_ftype_v4sf 22557 = build_function_type_list (V4DF_type_node, 22558 V4SF_type_node, 22559 NULL_TREE); 22560 tree v4sf_ftype_v4df 22561 = build_function_type_list (V4SF_type_node, 22562 V4DF_type_node, 22563 NULL_TREE); 22564 tree v8sf_ftype_v8sf_v8sf 22565 = build_function_type_list (V8SF_type_node, 22566 V8SF_type_node, V8SF_type_node, 22567 NULL_TREE); 22568 tree v4df_ftype_v4df_v4df 22569 = build_function_type_list (V4DF_type_node, 22570 V4DF_type_node, V4DF_type_node, 22571 NULL_TREE); 22572 tree v8sf_ftype_v8sf_int 22573 = build_function_type_list (V8SF_type_node, 22574 V8SF_type_node, integer_type_node, 22575 NULL_TREE); 22576 tree v4si_ftype_v8si_int 22577 = build_function_type_list (V4SI_type_node, 22578 V8SI_type_node, integer_type_node, 22579 NULL_TREE); 22580 tree v4df_ftype_v4df_int 22581 = build_function_type_list (V4DF_type_node, 22582 V4DF_type_node, integer_type_node, 22583 NULL_TREE); 22584 tree v4sf_ftype_v8sf_int 22585 = build_function_type_list (V4SF_type_node, 22586 V8SF_type_node, integer_type_node, 22587 NULL_TREE); 22588 tree v2df_ftype_v4df_int 22589 = build_function_type_list (V2DF_type_node, 22590 V4DF_type_node, integer_type_node, 22591 NULL_TREE); 22592 tree v8sf_ftype_v8sf_v8sf_int 22593 = build_function_type_list (V8SF_type_node, 22594 V8SF_type_node, V8SF_type_node, 22595 integer_type_node, 22596 NULL_TREE); 22597 tree v8sf_ftype_v8sf_v8sf_v8sf 22598 = build_function_type_list (V8SF_type_node, 22599 V8SF_type_node, V8SF_type_node, 22600 V8SF_type_node, 22601 NULL_TREE); 22602 tree v4df_ftype_v4df_v4df_v4df 22603 = build_function_type_list (V4DF_type_node, 22604 V4DF_type_node, V4DF_type_node, 22605 V4DF_type_node, 22606 NULL_TREE); 22607 tree v8si_ftype_v8si_v8si_int 22608 = build_function_type_list (V8SI_type_node, 22609 V8SI_type_node, V8SI_type_node, 22610 integer_type_node, 22611 NULL_TREE); 22612 tree v4df_ftype_v4df_v4df_int 22613 = build_function_type_list (V4DF_type_node, 22614 V4DF_type_node, V4DF_type_node, 22615 integer_type_node, 22616 NULL_TREE); 22617 tree v8sf_ftype_pcfloat 22618 = build_function_type_list (V8SF_type_node, 22619 pcfloat_type_node, 22620 NULL_TREE); 22621 tree v4df_ftype_pcdouble 22622 = build_function_type_list (V4DF_type_node, 22623 pcdouble_type_node, 22624 NULL_TREE); 22625 tree pcv4sf_type_node 22626 = build_pointer_type (build_type_variant (V4SF_type_node, 1, 0)); 22627 tree pcv2df_type_node 22628 = build_pointer_type (build_type_variant (V2DF_type_node, 1, 0)); 22629 tree v8sf_ftype_pcv4sf 22630 = build_function_type_list (V8SF_type_node, 22631 pcv4sf_type_node, 22632 NULL_TREE); 22633 tree v4df_ftype_pcv2df 22634 = build_function_type_list (V4DF_type_node, 22635 pcv2df_type_node, 22636 NULL_TREE); 22637 tree v32qi_ftype_pcchar 22638 = build_function_type_list (V32QI_type_node, 22639 pcchar_type_node, 22640 NULL_TREE); 22641 tree void_ftype_pchar_v32qi 22642 = build_function_type_list (void_type_node, 22643 pchar_type_node, V32QI_type_node, 22644 NULL_TREE); 22645 tree v8si_ftype_v8si_v4si_int 22646 = build_function_type_list (V8SI_type_node, 22647 V8SI_type_node, V4SI_type_node, 22648 integer_type_node, 22649 NULL_TREE); 22650 tree pv4di_type_node = build_pointer_type (V4DI_type_node); 22651 tree void_ftype_pv4di_v4di 22652 = build_function_type_list (void_type_node, 22653 pv4di_type_node, V4DI_type_node, 22654 NULL_TREE); 22655 tree v8sf_ftype_v8sf_v4sf_int 22656 = build_function_type_list (V8SF_type_node, 22657 V8SF_type_node, V4SF_type_node, 22658 integer_type_node, 22659 NULL_TREE); 22660 tree v4df_ftype_v4df_v2df_int 22661 = build_function_type_list (V4DF_type_node, 22662 V4DF_type_node, V2DF_type_node, 22663 integer_type_node, 22664 NULL_TREE); 22665 tree void_ftype_pfloat_v8sf 22666 = build_function_type_list (void_type_node, 22667 pfloat_type_node, V8SF_type_node, 22668 NULL_TREE); 22669 tree void_ftype_pdouble_v4df 22670 = build_function_type_list (void_type_node, 22671 pdouble_type_node, V4DF_type_node, 22672 NULL_TREE); 22673 tree pv8sf_type_node = build_pointer_type (V8SF_type_node); 22674 tree pv4sf_type_node = build_pointer_type (V4SF_type_node); 22675 tree pv4df_type_node = build_pointer_type (V4DF_type_node); 22676 tree pv2df_type_node = build_pointer_type (V2DF_type_node); 22677 tree pcv8sf_type_node 22678 = build_pointer_type (build_type_variant (V8SF_type_node, 1, 0)); 22679 tree pcv4df_type_node 22680 = build_pointer_type (build_type_variant (V4DF_type_node, 1, 0)); 22681 tree v8sf_ftype_pcv8sf_v8sf 22682 = build_function_type_list (V8SF_type_node, 22683 pcv8sf_type_node, V8SF_type_node, 22684 NULL_TREE); 22685 tree v4df_ftype_pcv4df_v4df 22686 = build_function_type_list (V4DF_type_node, 22687 pcv4df_type_node, V4DF_type_node, 22688 NULL_TREE); 22689 tree v4sf_ftype_pcv4sf_v4sf 22690 = build_function_type_list (V4SF_type_node, 22691 pcv4sf_type_node, V4SF_type_node, 22692 NULL_TREE); 22693 tree v2df_ftype_pcv2df_v2df 22694 = build_function_type_list (V2DF_type_node, 22695 pcv2df_type_node, V2DF_type_node, 22696 NULL_TREE); 22697 tree void_ftype_pv8sf_v8sf_v8sf 22698 = build_function_type_list (void_type_node, 22699 pv8sf_type_node, V8SF_type_node, 22700 V8SF_type_node, 22701 NULL_TREE); 22702 tree void_ftype_pv4df_v4df_v4df 22703 = build_function_type_list (void_type_node, 22704 pv4df_type_node, V4DF_type_node, 22705 V4DF_type_node, 22706 NULL_TREE); 22707 tree void_ftype_pv4sf_v4sf_v4sf 22708 = build_function_type_list (void_type_node, 22709 pv4sf_type_node, V4SF_type_node, 22710 V4SF_type_node, 22711 NULL_TREE); 22712 tree void_ftype_pv2df_v2df_v2df 22713 = build_function_type_list (void_type_node, 22714 pv2df_type_node, V2DF_type_node, 22715 V2DF_type_node, 22716 NULL_TREE); 22717 tree v4df_ftype_v2df 22718 = build_function_type_list (V4DF_type_node, 22719 V2DF_type_node, 22720 NULL_TREE); 22721 tree v8sf_ftype_v4sf 22722 = build_function_type_list (V8SF_type_node, 22723 V4SF_type_node, 22724 NULL_TREE); 22725 tree v8si_ftype_v4si 22726 = build_function_type_list (V8SI_type_node, 22727 V4SI_type_node, 22728 NULL_TREE); 22729 tree v2df_ftype_v4df 22730 = build_function_type_list (V2DF_type_node, 22731 V4DF_type_node, 22732 NULL_TREE); 22733 tree v4sf_ftype_v8sf 22734 = build_function_type_list (V4SF_type_node, 22735 V8SF_type_node, 22736 NULL_TREE); 22737 tree v4si_ftype_v8si 22738 = build_function_type_list (V4SI_type_node, 22739 V8SI_type_node, 22740 NULL_TREE); 22741 tree int_ftype_v4df 22742 = build_function_type_list (integer_type_node, 22743 V4DF_type_node, 22744 NULL_TREE); 22745 tree int_ftype_v8sf 22746 = build_function_type_list (integer_type_node, 22747 V8SF_type_node, 22748 NULL_TREE); 22749 tree int_ftype_v8sf_v8sf 22750 = build_function_type_list (integer_type_node, 22751 V8SF_type_node, V8SF_type_node, 22752 NULL_TREE); 22753 tree int_ftype_v4di_v4di 22754 = build_function_type_list (integer_type_node, 22755 V4DI_type_node, V4DI_type_node, 22756 NULL_TREE); 22757 tree int_ftype_v4df_v4df 22758 = build_function_type_list (integer_type_node, 22759 V4DF_type_node, V4DF_type_node, 22760 NULL_TREE); 22761 tree v8sf_ftype_v8sf_v8si 22762 = build_function_type_list (V8SF_type_node, 22763 V8SF_type_node, V8SI_type_node, 22764 NULL_TREE); 22765 tree v4df_ftype_v4df_v4di 22766 = build_function_type_list (V4DF_type_node, 22767 V4DF_type_node, V4DI_type_node, 22768 NULL_TREE); 22769 tree v4sf_ftype_v4sf_v4si 22770 = build_function_type_list (V4SF_type_node, 22771 V4SF_type_node, V4SI_type_node, NULL_TREE); 22772 tree v2df_ftype_v2df_v2di 22773 = build_function_type_list (V2DF_type_node, 22774 V2DF_type_node, V2DI_type_node, NULL_TREE); 22775 22776 tree ftype; 22777 22778 /* Add all special builtins with variable number of operands. */ 22779 for (i = 0, d = bdesc_special_args; 22780 i < ARRAY_SIZE (bdesc_special_args); 22781 i++, d++) 22782 { 22783 tree type; 22784 22785 if (d->name == 0) 22786 continue; 22787 22788 switch ((enum ix86_special_builtin_type) d->flag) 22789 { 22790 case VOID_FTYPE_VOID: 22791 type = void_ftype_void; 22792 break; 22793 case V32QI_FTYPE_PCCHAR: 22794 type = v32qi_ftype_pcchar; 22795 break; 22796 case V16QI_FTYPE_PCCHAR: 22797 type = v16qi_ftype_pcchar; 22798 break; 22799 case V8SF_FTYPE_PCV4SF: 22800 type = v8sf_ftype_pcv4sf; 22801 break; 22802 case V8SF_FTYPE_PCFLOAT: 22803 type = v8sf_ftype_pcfloat; 22804 break; 22805 case V4DF_FTYPE_PCV2DF: 22806 type = v4df_ftype_pcv2df; 22807 break; 22808 case V4DF_FTYPE_PCDOUBLE: 22809 type = v4df_ftype_pcdouble; 22810 break; 22811 case V4SF_FTYPE_PCFLOAT: 22812 type = v4sf_ftype_pcfloat; 22813 break; 22814 case V2DI_FTYPE_PV2DI: 22815 type = v2di_ftype_pv2di; 22816 break; 22817 case V2DF_FTYPE_PCDOUBLE: 22818 type = v2df_ftype_pcdouble; 22819 break; 22820 case V8SF_FTYPE_PCV8SF_V8SF: 22821 type = v8sf_ftype_pcv8sf_v8sf; 22822 break; 22823 case V4DF_FTYPE_PCV4DF_V4DF: 22824 type = v4df_ftype_pcv4df_v4df; 22825 break; 22826 case V4SF_FTYPE_V4SF_PCV2SF: 22827 type = v4sf_ftype_v4sf_pcv2sf; 22828 break; 22829 case V4SF_FTYPE_PCV4SF_V4SF: 22830 type = v4sf_ftype_pcv4sf_v4sf; 22831 break; 22832 case V2DF_FTYPE_V2DF_PCDOUBLE: 22833 type = v2df_ftype_v2df_pcdouble; 22834 break; 22835 case V2DF_FTYPE_PCV2DF_V2DF: 22836 type = v2df_ftype_pcv2df_v2df; 22837 break; 22838 case VOID_FTYPE_PV2SF_V4SF: 22839 type = void_ftype_pv2sf_v4sf; 22840 break; 22841 case VOID_FTYPE_PV4DI_V4DI: 22842 type = void_ftype_pv4di_v4di; 22843 break; 22844 case VOID_FTYPE_PV2DI_V2DI: 22845 type = void_ftype_pv2di_v2di; 22846 break; 22847 case VOID_FTYPE_PCHAR_V32QI: 22848 type = void_ftype_pchar_v32qi; 22849 break; 22850 case VOID_FTYPE_PCHAR_V16QI: 22851 type = void_ftype_pchar_v16qi; 22852 break; 22853 case VOID_FTYPE_PFLOAT_V8SF: 22854 type = void_ftype_pfloat_v8sf; 22855 break; 22856 case VOID_FTYPE_PFLOAT_V4SF: 22857 type = void_ftype_pfloat_v4sf; 22858 break; 22859 case VOID_FTYPE_PDOUBLE_V4DF: 22860 type = void_ftype_pdouble_v4df; 22861 break; 22862 case VOID_FTYPE_PDOUBLE_V2DF: 22863 type = void_ftype_pdouble_v2df; 22864 break; 22865 case VOID_FTYPE_PDI_DI: 22866 type = void_ftype_pdi_di; 22867 break; 22868 case VOID_FTYPE_PINT_INT: 22869 type = void_ftype_pint_int; 22870 break; 22871 case VOID_FTYPE_PV8SF_V8SF_V8SF: 22872 type = void_ftype_pv8sf_v8sf_v8sf; 22873 break; 22874 case VOID_FTYPE_PV4DF_V4DF_V4DF: 22875 type = void_ftype_pv4df_v4df_v4df; 22876 break; 22877 case VOID_FTYPE_PV4SF_V4SF_V4SF: 22878 type = void_ftype_pv4sf_v4sf_v4sf; 22879 break; 22880 case VOID_FTYPE_PV2DF_V2DF_V2DF: 22881 type = void_ftype_pv2df_v2df_v2df; 22882 break; 22883 default: 22884 gcc_unreachable (); 22885 } 22886 22887 def_builtin (d->mask, d->name, type, d->code); 22888 } 22889 22890 /* Add all builtins with variable number of operands. */ 22891 for (i = 0, d = bdesc_args; 22892 i < ARRAY_SIZE (bdesc_args); 22893 i++, d++) 22894 { 22895 tree type; 22896 22897 if (d->name == 0) 22898 continue; 22899 22900 switch ((enum ix86_builtin_type) d->flag) 22901 { 22902 case FLOAT_FTYPE_FLOAT: 22903 type = float_ftype_float; 22904 break; 22905 case INT_FTYPE_V8SF_V8SF_PTEST: 22906 type = int_ftype_v8sf_v8sf; 22907 break; 22908 case INT_FTYPE_V4DI_V4DI_PTEST: 22909 type = int_ftype_v4di_v4di; 22910 break; 22911 case INT_FTYPE_V4DF_V4DF_PTEST: 22912 type = int_ftype_v4df_v4df; 22913 break; 22914 case INT_FTYPE_V4SF_V4SF_PTEST: 22915 type = int_ftype_v4sf_v4sf; 22916 break; 22917 case INT_FTYPE_V2DI_V2DI_PTEST: 22918 type = int_ftype_v2di_v2di; 22919 break; 22920 case INT_FTYPE_V2DF_V2DF_PTEST: 22921 type = int_ftype_v2df_v2df; 22922 break; 22923 case INT64_FTYPE_V4SF: 22924 type = int64_ftype_v4sf; 22925 break; 22926 case INT64_FTYPE_V2DF: 22927 type = int64_ftype_v2df; 22928 break; 22929 case INT_FTYPE_V16QI: 22930 type = int_ftype_v16qi; 22931 break; 22932 case INT_FTYPE_V8QI: 22933 type = int_ftype_v8qi; 22934 break; 22935 case INT_FTYPE_V8SF: 22936 type = int_ftype_v8sf; 22937 break; 22938 case INT_FTYPE_V4DF: 22939 type = int_ftype_v4df; 22940 break; 22941 case INT_FTYPE_V4SF: 22942 type = int_ftype_v4sf; 22943 break; 22944 case INT_FTYPE_V2DF: 22945 type = int_ftype_v2df; 22946 break; 22947 case V16QI_FTYPE_V16QI: 22948 type = v16qi_ftype_v16qi; 22949 break; 22950 case V8SI_FTYPE_V8SF: 22951 type = v8si_ftype_v8sf; 22952 break; 22953 case V8SI_FTYPE_V4SI: 22954 type = v8si_ftype_v4si; 22955 break; 22956 case V8HI_FTYPE_V8HI: 22957 type = v8hi_ftype_v8hi; 22958 break; 22959 case V8HI_FTYPE_V16QI: 22960 type = v8hi_ftype_v16qi; 22961 break; 22962 case V8QI_FTYPE_V8QI: 22963 type = v8qi_ftype_v8qi; 22964 break; 22965 case V8SF_FTYPE_V8SF: 22966 type = v8sf_ftype_v8sf; 22967 break; 22968 case V8SF_FTYPE_V8SI: 22969 type = v8sf_ftype_v8si; 22970 break; 22971 case V8SF_FTYPE_V4SF: 22972 type = v8sf_ftype_v4sf; 22973 break; 22974 case V4SI_FTYPE_V4DF: 22975 type = v4si_ftype_v4df; 22976 break; 22977 case V4SI_FTYPE_V4SI: 22978 type = v4si_ftype_v4si; 22979 break; 22980 case V4SI_FTYPE_V16QI: 22981 type = v4si_ftype_v16qi; 22982 break; 22983 case V4SI_FTYPE_V8SI: 22984 type = v4si_ftype_v8si; 22985 break; 22986 case V4SI_FTYPE_V8HI: 22987 type = v4si_ftype_v8hi; 22988 break; 22989 case V4SI_FTYPE_V4SF: 22990 type = v4si_ftype_v4sf; 22991 break; 22992 case V4SI_FTYPE_V2DF: 22993 type = v4si_ftype_v2df; 22994 break; 22995 case V4HI_FTYPE_V4HI: 22996 type = v4hi_ftype_v4hi; 22997 break; 22998 case V4DF_FTYPE_V4DF: 22999 type = v4df_ftype_v4df; 23000 break; 23001 case V4DF_FTYPE_V4SI: 23002 type = v4df_ftype_v4si; 23003 break; 23004 case V4DF_FTYPE_V4SF: 23005 type = v4df_ftype_v4sf; 23006 break; 23007 case V4DF_FTYPE_V2DF: 23008 type = v4df_ftype_v2df; 23009 break; 23010 case V4SF_FTYPE_V4SF: 23011 case V4SF_FTYPE_V4SF_VEC_MERGE: 23012 type = v4sf_ftype_v4sf; 23013 break; 23014 case V4SF_FTYPE_V8SF: 23015 type = v4sf_ftype_v8sf; 23016 break; 23017 case V4SF_FTYPE_V4SI: 23018 type = v4sf_ftype_v4si; 23019 break; 23020 case V4SF_FTYPE_V4DF: 23021 type = v4sf_ftype_v4df; 23022 break; 23023 case V4SF_FTYPE_V2DF: 23024 type = v4sf_ftype_v2df; 23025 break; 23026 case V2DI_FTYPE_V2DI: 23027 type = v2di_ftype_v2di; 23028 break; 23029 case V2DI_FTYPE_V16QI: 23030 type = v2di_ftype_v16qi; 23031 break; 23032 case V2DI_FTYPE_V8HI: 23033 type = v2di_ftype_v8hi; 23034 break; 23035 case V2DI_FTYPE_V4SI: 23036 type = v2di_ftype_v4si; 23037 break; 23038 case V2SI_FTYPE_V2SI: 23039 type = v2si_ftype_v2si; 23040 break; 23041 case V2SI_FTYPE_V4SF: 23042 type = v2si_ftype_v4sf; 23043 break; 23044 case V2SI_FTYPE_V2DF: 23045 type = v2si_ftype_v2df; 23046 break; 23047 case V2SI_FTYPE_V2SF: 23048 type = v2si_ftype_v2sf; 23049 break; 23050 case V2DF_FTYPE_V4DF: 23051 type = v2df_ftype_v4df; 23052 break; 23053 case V2DF_FTYPE_V4SF: 23054 type = v2df_ftype_v4sf; 23055 break; 23056 case V2DF_FTYPE_V2DF: 23057 case V2DF_FTYPE_V2DF_VEC_MERGE: 23058 type = v2df_ftype_v2df; 23059 break; 23060 case V2DF_FTYPE_V2SI: 23061 type = v2df_ftype_v2si; 23062 break; 23063 case V2DF_FTYPE_V4SI: 23064 type = v2df_ftype_v4si; 23065 break; 23066 case V2SF_FTYPE_V2SF: 23067 type = v2sf_ftype_v2sf; 23068 break; 23069 case V2SF_FTYPE_V2SI: 23070 type = v2sf_ftype_v2si; 23071 break; 23072 case V16QI_FTYPE_V16QI_V16QI: 23073 type = v16qi_ftype_v16qi_v16qi; 23074 break; 23075 case V16QI_FTYPE_V8HI_V8HI: 23076 type = v16qi_ftype_v8hi_v8hi; 23077 break; 23078 case V8QI_FTYPE_V8QI_V8QI: 23079 type = v8qi_ftype_v8qi_v8qi; 23080 break; 23081 case V8QI_FTYPE_V4HI_V4HI: 23082 type = v8qi_ftype_v4hi_v4hi; 23083 break; 23084 case V8HI_FTYPE_V8HI_V8HI: 23085 case V8HI_FTYPE_V8HI_V8HI_COUNT: 23086 type = v8hi_ftype_v8hi_v8hi; 23087 break; 23088 case V8HI_FTYPE_V16QI_V16QI: 23089 type = v8hi_ftype_v16qi_v16qi; 23090 break; 23091 case V8HI_FTYPE_V4SI_V4SI: 23092 type = v8hi_ftype_v4si_v4si; 23093 break; 23094 case V8HI_FTYPE_V8HI_SI_COUNT: 23095 type = v8hi_ftype_v8hi_int; 23096 break; 23097 case V8SF_FTYPE_V8SF_V8SF: 23098 type = v8sf_ftype_v8sf_v8sf; 23099 break; 23100 case V8SF_FTYPE_V8SF_V8SI: 23101 type = v8sf_ftype_v8sf_v8si; 23102 break; 23103 case V4SI_FTYPE_V4SI_V4SI: 23104 case V4SI_FTYPE_V4SI_V4SI_COUNT: 23105 type = v4si_ftype_v4si_v4si; 23106 break; 23107 case V4SI_FTYPE_V8HI_V8HI: 23108 type = v4si_ftype_v8hi_v8hi; 23109 break; 23110 case V4SI_FTYPE_V4SF_V4SF: 23111 type = v4si_ftype_v4sf_v4sf; 23112 break; 23113 case V4SI_FTYPE_V2DF_V2DF: 23114 type = v4si_ftype_v2df_v2df; 23115 break; 23116 case V4SI_FTYPE_V4SI_SI_COUNT: 23117 type = v4si_ftype_v4si_int; 23118 break; 23119 case V4HI_FTYPE_V4HI_V4HI: 23120 case V4HI_FTYPE_V4HI_V4HI_COUNT: 23121 type = v4hi_ftype_v4hi_v4hi; 23122 break; 23123 case V4HI_FTYPE_V8QI_V8QI: 23124 type = v4hi_ftype_v8qi_v8qi; 23125 break; 23126 case V4HI_FTYPE_V2SI_V2SI: 23127 type = v4hi_ftype_v2si_v2si; 23128 break; 23129 case V4HI_FTYPE_V4HI_SI_COUNT: 23130 type = v4hi_ftype_v4hi_int; 23131 break; 23132 case V4DF_FTYPE_V4DF_V4DF: 23133 type = v4df_ftype_v4df_v4df; 23134 break; 23135 case V4DF_FTYPE_V4DF_V4DI: 23136 type = v4df_ftype_v4df_v4di; 23137 break; 23138 case V4SF_FTYPE_V4SF_V4SF: 23139 case V4SF_FTYPE_V4SF_V4SF_SWAP: 23140 type = v4sf_ftype_v4sf_v4sf; 23141 break; 23142 case V4SF_FTYPE_V4SF_V4SI: 23143 type = v4sf_ftype_v4sf_v4si; 23144 break; 23145 case V4SF_FTYPE_V4SF_V2SI: 23146 type = v4sf_ftype_v4sf_v2si; 23147 break; 23148 case V4SF_FTYPE_V4SF_V2DF: 23149 type = v4sf_ftype_v4sf_v2df; 23150 break; 23151 case V4SF_FTYPE_V4SF_DI: 23152 type = v4sf_ftype_v4sf_int64; 23153 break; 23154 case V4SF_FTYPE_V4SF_SI: 23155 type = v4sf_ftype_v4sf_int; 23156 break; 23157 case V2DI_FTYPE_V2DI_V2DI: 23158 case V2DI_FTYPE_V2DI_V2DI_COUNT: 23159 type = v2di_ftype_v2di_v2di; 23160 break; 23161 case V2DI_FTYPE_V16QI_V16QI: 23162 type = v2di_ftype_v16qi_v16qi; 23163 break; 23164 case V2DI_FTYPE_V4SI_V4SI: 23165 type = v2di_ftype_v4si_v4si; 23166 break; 23167 case V2DI_FTYPE_V2DI_V16QI: 23168 type = v2di_ftype_v2di_v16qi; 23169 break; 23170 case V2DI_FTYPE_V2DF_V2DF: 23171 type = v2di_ftype_v2df_v2df; 23172 break; 23173 case V2DI_FTYPE_V2DI_SI_COUNT: 23174 type = v2di_ftype_v2di_int; 23175 break; 23176 case V2SI_FTYPE_V2SI_V2SI: 23177 case V2SI_FTYPE_V2SI_V2SI_COUNT: 23178 type = v2si_ftype_v2si_v2si; 23179 break; 23180 case V2SI_FTYPE_V4HI_V4HI: 23181 type = v2si_ftype_v4hi_v4hi; 23182 break; 23183 case V2SI_FTYPE_V2SF_V2SF: 23184 type = v2si_ftype_v2sf_v2sf; 23185 break; 23186 case V2SI_FTYPE_V2SI_SI_COUNT: 23187 type = v2si_ftype_v2si_int; 23188 break; 23189 case V2DF_FTYPE_V2DF_V2DF: 23190 case V2DF_FTYPE_V2DF_V2DF_SWAP: 23191 type = v2df_ftype_v2df_v2df; 23192 break; 23193 case V2DF_FTYPE_V2DF_V4SF: 23194 type = v2df_ftype_v2df_v4sf; 23195 break; 23196 case V2DF_FTYPE_V2DF_V2DI: 23197 type = v2df_ftype_v2df_v2di; 23198 break; 23199 case V2DF_FTYPE_V2DF_DI: 23200 type = v2df_ftype_v2df_int64; 23201 break; 23202 case V2DF_FTYPE_V2DF_SI: 23203 type = v2df_ftype_v2df_int; 23204 break; 23205 case V2SF_FTYPE_V2SF_V2SF: 23206 type = v2sf_ftype_v2sf_v2sf; 23207 break; 23208 case V1DI_FTYPE_V1DI_V1DI: 23209 case V1DI_FTYPE_V1DI_V1DI_COUNT: 23210 type = v1di_ftype_v1di_v1di; 23211 break; 23212 case V1DI_FTYPE_V8QI_V8QI: 23213 type = v1di_ftype_v8qi_v8qi; 23214 break; 23215 case V1DI_FTYPE_V2SI_V2SI: 23216 type = v1di_ftype_v2si_v2si; 23217 break; 23218 case V1DI_FTYPE_V1DI_SI_COUNT: 23219 type = v1di_ftype_v1di_int; 23220 break; 23221 case UINT64_FTYPE_UINT64_UINT64: 23222 type = uint64_ftype_uint64_uint64; 23223 break; 23224 case UINT_FTYPE_UINT_UINT: 23225 type = unsigned_ftype_unsigned_unsigned; 23226 break; 23227 case UINT_FTYPE_UINT_USHORT: 23228 type = unsigned_ftype_unsigned_ushort; 23229 break; 23230 case UINT_FTYPE_UINT_UCHAR: 23231 type = unsigned_ftype_unsigned_uchar; 23232 break; 23233 case V8HI_FTYPE_V8HI_INT: 23234 type = v8hi_ftype_v8hi_int; 23235 break; 23236 case V8SF_FTYPE_V8SF_INT: 23237 type = v8sf_ftype_v8sf_int; 23238 break; 23239 case V4SI_FTYPE_V4SI_INT: 23240 type = v4si_ftype_v4si_int; 23241 break; 23242 case V4SI_FTYPE_V8SI_INT: 23243 type = v4si_ftype_v8si_int; 23244 break; 23245 case V4HI_FTYPE_V4HI_INT: 23246 type = v4hi_ftype_v4hi_int; 23247 break; 23248 case V4DF_FTYPE_V4DF_INT: 23249 type = v4df_ftype_v4df_int; 23250 break; 23251 case V4SF_FTYPE_V4SF_INT: 23252 type = v4sf_ftype_v4sf_int; 23253 break; 23254 case V4SF_FTYPE_V8SF_INT: 23255 type = v4sf_ftype_v8sf_int; 23256 break; 23257 case V2DI_FTYPE_V2DI_INT: 23258 case V2DI2TI_FTYPE_V2DI_INT: 23259 type = v2di_ftype_v2di_int; 23260 break; 23261 case V2DF_FTYPE_V2DF_INT: 23262 type = v2df_ftype_v2df_int; 23263 break; 23264 case V2DF_FTYPE_V4DF_INT: 23265 type = v2df_ftype_v4df_int; 23266 break; 23267 case V16QI_FTYPE_V16QI_V16QI_V16QI: 23268 type = v16qi_ftype_v16qi_v16qi_v16qi; 23269 break; 23270 case V8SF_FTYPE_V8SF_V8SF_V8SF: 23271 type = v8sf_ftype_v8sf_v8sf_v8sf; 23272 break; 23273 case V4DF_FTYPE_V4DF_V4DF_V4DF: 23274 type = v4df_ftype_v4df_v4df_v4df; 23275 break; 23276 case V4SF_FTYPE_V4SF_V4SF_V4SF: 23277 type = v4sf_ftype_v4sf_v4sf_v4sf; 23278 break; 23279 case V2DF_FTYPE_V2DF_V2DF_V2DF: 23280 type = v2df_ftype_v2df_v2df_v2df; 23281 break; 23282 case V16QI_FTYPE_V16QI_V16QI_INT: 23283 type = v16qi_ftype_v16qi_v16qi_int; 23284 break; 23285 case V8SI_FTYPE_V8SI_V8SI_INT: 23286 type = v8si_ftype_v8si_v8si_int; 23287 break; 23288 case V8SI_FTYPE_V8SI_V4SI_INT: 23289 type = v8si_ftype_v8si_v4si_int; 23290 break; 23291 case V8HI_FTYPE_V8HI_V8HI_INT: 23292 type = v8hi_ftype_v8hi_v8hi_int; 23293 break; 23294 case V8SF_FTYPE_V8SF_V8SF_INT: 23295 type = v8sf_ftype_v8sf_v8sf_int; 23296 break; 23297 case V8SF_FTYPE_V8SF_V4SF_INT: 23298 type = v8sf_ftype_v8sf_v4sf_int; 23299 break; 23300 case V4SI_FTYPE_V4SI_V4SI_INT: 23301 type = v4si_ftype_v4si_v4si_int; 23302 break; 23303 case V4DF_FTYPE_V4DF_V4DF_INT: 23304 type = v4df_ftype_v4df_v4df_int; 23305 break; 23306 case V4DF_FTYPE_V4DF_V2DF_INT: 23307 type = v4df_ftype_v4df_v2df_int; 23308 break; 23309 case V4SF_FTYPE_V4SF_V4SF_INT: 23310 type = v4sf_ftype_v4sf_v4sf_int; 23311 break; 23312 case V2DI_FTYPE_V2DI_V2DI_INT: 23313 case V2DI2TI_FTYPE_V2DI_V2DI_INT: 23314 type = v2di_ftype_v2di_v2di_int; 23315 break; 23316 case V2DF_FTYPE_V2DF_V2DF_INT: 23317 type = v2df_ftype_v2df_v2df_int; 23318 break; 23319 case V2DI_FTYPE_V2DI_UINT_UINT: 23320 type = v2di_ftype_v2di_unsigned_unsigned; 23321 break; 23322 case V2DI_FTYPE_V2DI_V2DI_UINT_UINT: 23323 type = v2di_ftype_v2di_v2di_unsigned_unsigned; 23324 break; 23325 case V1DI2DI_FTYPE_V1DI_V1DI_INT: 23326 type = v1di_ftype_v1di_v1di_int; 23327 break; 23328 default: 23329 gcc_unreachable (); 23330 } 23331 23332 def_builtin_const (d->mask, d->name, type, d->code); 23333 } 23334 23335 /* pcmpestr[im] insns. */ 23336 for (i = 0, d = bdesc_pcmpestr; 23337 i < ARRAY_SIZE (bdesc_pcmpestr); 23338 i++, d++) 23339 { 23340 if (d->code == IX86_BUILTIN_PCMPESTRM128) 23341 ftype = v16qi_ftype_v16qi_int_v16qi_int_int; 23342 else 23343 ftype = int_ftype_v16qi_int_v16qi_int_int; 23344 def_builtin_const (d->mask, d->name, ftype, d->code); 23345 } 23346 23347 /* pcmpistr[im] insns. */ 23348 for (i = 0, d = bdesc_pcmpistr; 23349 i < ARRAY_SIZE (bdesc_pcmpistr); 23350 i++, d++) 23351 { 23352 if (d->code == IX86_BUILTIN_PCMPISTRM128) 23353 ftype = v16qi_ftype_v16qi_v16qi_int; 23354 else 23355 ftype = int_ftype_v16qi_v16qi_int; 23356 def_builtin_const (d->mask, d->name, ftype, d->code); 23357 } 23358 23359 /* comi/ucomi insns. */ 23360 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++) 23361 if (d->mask == OPTION_MASK_ISA_SSE2) 23362 def_builtin_const (d->mask, d->name, int_ftype_v2df_v2df, d->code); 23363 else 23364 def_builtin_const (d->mask, d->name, int_ftype_v4sf_v4sf, d->code); 23365 23366 /* SSE */ 23367 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR); 23368 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR); 23369 23370 /* SSE or 3DNow!A */ 23371 def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ); 23372 23373 /* SSE2 */ 23374 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU); 23375 23376 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_clflush", void_ftype_pcvoid, IX86_BUILTIN_CLFLUSH); 23377 x86_mfence = def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE); 23378 23379 /* SSE3. */ 23380 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_monitor", void_ftype_pcvoid_unsigned_unsigned, IX86_BUILTIN_MONITOR); 23381 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_mwait", void_ftype_unsigned_unsigned, IX86_BUILTIN_MWAIT); 23382 23383 /* AES */ 23384 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenc128", v2di_ftype_v2di_v2di, IX86_BUILTIN_AESENC128); 23385 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenclast128", v2di_ftype_v2di_v2di, IX86_BUILTIN_AESENCLAST128); 23386 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdec128", v2di_ftype_v2di_v2di, IX86_BUILTIN_AESDEC128); 23387 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdeclast128", v2di_ftype_v2di_v2di, IX86_BUILTIN_AESDECLAST128); 23388 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesimc128", v2di_ftype_v2di, IX86_BUILTIN_AESIMC128); 23389 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aeskeygenassist128", v2di_ftype_v2di_int, IX86_BUILTIN_AESKEYGENASSIST128); 23390 23391 /* PCLMUL */ 23392 def_builtin_const (OPTION_MASK_ISA_PCLMUL, "__builtin_ia32_pclmulqdq128", v2di_ftype_v2di_v2di_int, IX86_BUILTIN_PCLMULQDQ128); 23393 23394 /* AVX */ 23395 def_builtin (OPTION_MASK_ISA_AVX, "__builtin_ia32_vzeroupper", void_ftype_void, 23396 TARGET_64BIT ? IX86_BUILTIN_VZEROUPPER_REX64 : IX86_BUILTIN_VZEROUPPER); 23397 23398 /* Access to the vec_init patterns. */ 23399 ftype = build_function_type_list (V2SI_type_node, integer_type_node, 23400 integer_type_node, NULL_TREE); 23401 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v2si", ftype, IX86_BUILTIN_VEC_INIT_V2SI); 23402 23403 ftype = build_function_type_list (V4HI_type_node, short_integer_type_node, 23404 short_integer_type_node, 23405 short_integer_type_node, 23406 short_integer_type_node, NULL_TREE); 23407 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v4hi", ftype, IX86_BUILTIN_VEC_INIT_V4HI); 23408 23409 ftype = build_function_type_list (V8QI_type_node, char_type_node, 23410 char_type_node, char_type_node, 23411 char_type_node, char_type_node, 23412 char_type_node, char_type_node, 23413 char_type_node, NULL_TREE); 23414 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v8qi", ftype, IX86_BUILTIN_VEC_INIT_V8QI); 23415 23416 /* Access to the vec_extract patterns. */ 23417 ftype = build_function_type_list (double_type_node, V2DF_type_node, 23418 integer_type_node, NULL_TREE); 23419 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2df", ftype, IX86_BUILTIN_VEC_EXT_V2DF); 23420 23421 ftype = build_function_type_list (long_long_integer_type_node, 23422 V2DI_type_node, integer_type_node, 23423 NULL_TREE); 23424 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2di", ftype, IX86_BUILTIN_VEC_EXT_V2DI); 23425 23426 ftype = build_function_type_list (float_type_node, V4SF_type_node, 23427 integer_type_node, NULL_TREE); 23428 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_vec_ext_v4sf", ftype, IX86_BUILTIN_VEC_EXT_V4SF); 23429 23430 ftype = build_function_type_list (intSI_type_node, V4SI_type_node, 23431 integer_type_node, NULL_TREE); 23432 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v4si", ftype, IX86_BUILTIN_VEC_EXT_V4SI); 23433 23434 ftype = build_function_type_list (intHI_type_node, V8HI_type_node, 23435 integer_type_node, NULL_TREE); 23436 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v8hi", ftype, IX86_BUILTIN_VEC_EXT_V8HI); 23437 23438 ftype = build_function_type_list (intHI_type_node, V4HI_type_node, 23439 integer_type_node, NULL_TREE); 23440 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_vec_ext_v4hi", ftype, IX86_BUILTIN_VEC_EXT_V4HI); 23441 23442 ftype = build_function_type_list (intSI_type_node, V2SI_type_node, 23443 integer_type_node, NULL_TREE); 23444 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_ext_v2si", ftype, IX86_BUILTIN_VEC_EXT_V2SI); 23445 23446 ftype = build_function_type_list (intQI_type_node, V16QI_type_node, 23447 integer_type_node, NULL_TREE); 23448 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v16qi", ftype, IX86_BUILTIN_VEC_EXT_V16QI); 23449 23450 /* Access to the vec_set patterns. */ 23451 ftype = build_function_type_list (V2DI_type_node, V2DI_type_node, 23452 intDI_type_node, 23453 integer_type_node, NULL_TREE); 23454 def_builtin_const (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_64BIT, "__builtin_ia32_vec_set_v2di", ftype, IX86_BUILTIN_VEC_SET_V2DI); 23455 23456 ftype = build_function_type_list (V4SF_type_node, V4SF_type_node, 23457 float_type_node, 23458 integer_type_node, NULL_TREE); 23459 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4sf", ftype, IX86_BUILTIN_VEC_SET_V4SF); 23460 23461 ftype = build_function_type_list (V4SI_type_node, V4SI_type_node, 23462 intSI_type_node, 23463 integer_type_node, NULL_TREE); 23464 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4si", ftype, IX86_BUILTIN_VEC_SET_V4SI); 23465 23466 ftype = build_function_type_list (V8HI_type_node, V8HI_type_node, 23467 intHI_type_node, 23468 integer_type_node, NULL_TREE); 23469 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_set_v8hi", ftype, IX86_BUILTIN_VEC_SET_V8HI); 23470 23471 ftype = build_function_type_list (V4HI_type_node, V4HI_type_node, 23472 intHI_type_node, 23473 integer_type_node, NULL_TREE); 23474 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_vec_set_v4hi", ftype, IX86_BUILTIN_VEC_SET_V4HI); 23475 23476 ftype = build_function_type_list (V16QI_type_node, V16QI_type_node, 23477 intQI_type_node, 23478 integer_type_node, NULL_TREE); 23479 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v16qi", ftype, IX86_BUILTIN_VEC_SET_V16QI); 23480 23481 /* Add SSE5 multi-arg argument instructions */ 23482 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++) 23483 { 23484 tree mtype = NULL_TREE; 23485 23486 if (d->name == 0) 23487 continue; 23488 23489 switch ((enum multi_arg_type)d->flag) 23490 { 23491 case MULTI_ARG_3_SF: mtype = v4sf_ftype_v4sf_v4sf_v4sf; break; 23492 case MULTI_ARG_3_DF: mtype = v2df_ftype_v2df_v2df_v2df; break; 23493 case MULTI_ARG_3_DI: mtype = v2di_ftype_v2di_v2di_v2di; break; 23494 case MULTI_ARG_3_SI: mtype = v4si_ftype_v4si_v4si_v4si; break; 23495 case MULTI_ARG_3_SI_DI: mtype = v4si_ftype_v4si_v4si_v2di; break; 23496 case MULTI_ARG_3_HI: mtype = v8hi_ftype_v8hi_v8hi_v8hi; break; 23497 case MULTI_ARG_3_HI_SI: mtype = v8hi_ftype_v8hi_v8hi_v4si; break; 23498 case MULTI_ARG_3_QI: mtype = v16qi_ftype_v16qi_v16qi_v16qi; break; 23499 case MULTI_ARG_3_PERMPS: mtype = v4sf_ftype_v4sf_v4sf_v16qi; break; 23500 case MULTI_ARG_3_PERMPD: mtype = v2df_ftype_v2df_v2df_v16qi; break; 23501 case MULTI_ARG_2_SF: mtype = v4sf_ftype_v4sf_v4sf; break; 23502 case MULTI_ARG_2_DF: mtype = v2df_ftype_v2df_v2df; break; 23503 case MULTI_ARG_2_DI: mtype = v2di_ftype_v2di_v2di; break; 23504 case MULTI_ARG_2_SI: mtype = v4si_ftype_v4si_v4si; break; 23505 case MULTI_ARG_2_HI: mtype = v8hi_ftype_v8hi_v8hi; break; 23506 case MULTI_ARG_2_QI: mtype = v16qi_ftype_v16qi_v16qi; break; 23507 case MULTI_ARG_2_DI_IMM: mtype = v2di_ftype_v2di_si; break; 23508 case MULTI_ARG_2_SI_IMM: mtype = v4si_ftype_v4si_si; break; 23509 case MULTI_ARG_2_HI_IMM: mtype = v8hi_ftype_v8hi_si; break; 23510 case MULTI_ARG_2_QI_IMM: mtype = v16qi_ftype_v16qi_si; break; 23511 case MULTI_ARG_2_SF_CMP: mtype = v4sf_ftype_v4sf_v4sf; break; 23512 case MULTI_ARG_2_DF_CMP: mtype = v2df_ftype_v2df_v2df; break; 23513 case MULTI_ARG_2_DI_CMP: mtype = v2di_ftype_v2di_v2di; break; 23514 case MULTI_ARG_2_SI_CMP: mtype = v4si_ftype_v4si_v4si; break; 23515 case MULTI_ARG_2_HI_CMP: mtype = v8hi_ftype_v8hi_v8hi; break; 23516 case MULTI_ARG_2_QI_CMP: mtype = v16qi_ftype_v16qi_v16qi; break; 23517 case MULTI_ARG_2_SF_TF: mtype = v4sf_ftype_v4sf_v4sf; break; 23518 case MULTI_ARG_2_DF_TF: mtype = v2df_ftype_v2df_v2df; break; 23519 case MULTI_ARG_2_DI_TF: mtype = v2di_ftype_v2di_v2di; break; 23520 case MULTI_ARG_2_SI_TF: mtype = v4si_ftype_v4si_v4si; break; 23521 case MULTI_ARG_2_HI_TF: mtype = v8hi_ftype_v8hi_v8hi; break; 23522 case MULTI_ARG_2_QI_TF: mtype = v16qi_ftype_v16qi_v16qi; break; 23523 case MULTI_ARG_1_SF: mtype = v4sf_ftype_v4sf; break; 23524 case MULTI_ARG_1_DF: mtype = v2df_ftype_v2df; break; 23525 case MULTI_ARG_1_DI: mtype = v2di_ftype_v2di; break; 23526 case MULTI_ARG_1_SI: mtype = v4si_ftype_v4si; break; 23527 case MULTI_ARG_1_HI: mtype = v8hi_ftype_v8hi; break; 23528 case MULTI_ARG_1_QI: mtype = v16qi_ftype_v16qi; break; 23529 case MULTI_ARG_1_SI_DI: mtype = v2di_ftype_v4si; break; 23530 case MULTI_ARG_1_HI_DI: mtype = v2di_ftype_v8hi; break; 23531 case MULTI_ARG_1_HI_SI: mtype = v4si_ftype_v8hi; break; 23532 case MULTI_ARG_1_QI_DI: mtype = v2di_ftype_v16qi; break; 23533 case MULTI_ARG_1_QI_SI: mtype = v4si_ftype_v16qi; break; 23534 case MULTI_ARG_1_QI_HI: mtype = v8hi_ftype_v16qi; break; 23535 case MULTI_ARG_1_PH2PS: mtype = v4sf_ftype_v4hi; break; 23536 case MULTI_ARG_1_PS2PH: mtype = v4hi_ftype_v4sf; break; 23537 case MULTI_ARG_UNKNOWN: 23538 default: 23539 gcc_unreachable (); 23540 } 23541 23542 if (mtype) 23543 def_builtin_const (d->mask, d->name, mtype, d->code); 23544 } 23545 } 23546 23547 /* Internal method for ix86_init_builtins. */ 23548 23549 static void 23550 ix86_init_builtins_va_builtins_abi (void) 23551 { 23552 tree ms_va_ref, sysv_va_ref; 23553 tree fnvoid_va_end_ms, fnvoid_va_end_sysv; 23554 tree fnvoid_va_start_ms, fnvoid_va_start_sysv; 23555 tree fnvoid_va_copy_ms, fnvoid_va_copy_sysv; 23556 tree fnattr_ms = NULL_TREE, fnattr_sysv = NULL_TREE; 23557 23558 if (!TARGET_64BIT) 23559 return; 23560 fnattr_ms = build_tree_list (get_identifier ("ms_abi"), NULL_TREE); 23561 fnattr_sysv = build_tree_list (get_identifier ("sysv_abi"), NULL_TREE); 23562 ms_va_ref = build_reference_type (ms_va_list_type_node); 23563 sysv_va_ref = 23564 build_pointer_type (TREE_TYPE (sysv_va_list_type_node)); 23565 23566 fnvoid_va_end_ms = 23567 build_function_type_list (void_type_node, ms_va_ref, NULL_TREE); 23568 fnvoid_va_start_ms = 23569 build_varargs_function_type_list (void_type_node, ms_va_ref, NULL_TREE); 23570 fnvoid_va_end_sysv = 23571 build_function_type_list (void_type_node, sysv_va_ref, NULL_TREE); 23572 fnvoid_va_start_sysv = 23573 build_varargs_function_type_list (void_type_node, sysv_va_ref, 23574 NULL_TREE); 23575 fnvoid_va_copy_ms = 23576 build_function_type_list (void_type_node, ms_va_ref, ms_va_list_type_node, 23577 NULL_TREE); 23578 fnvoid_va_copy_sysv = 23579 build_function_type_list (void_type_node, sysv_va_ref, 23580 sysv_va_ref, NULL_TREE); 23581 23582 add_builtin_function ("__builtin_ms_va_start", fnvoid_va_start_ms, 23583 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_ms); 23584 add_builtin_function ("__builtin_ms_va_end", fnvoid_va_end_ms, 23585 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_ms); 23586 add_builtin_function ("__builtin_ms_va_copy", fnvoid_va_copy_ms, 23587 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_ms); 23588 add_builtin_function ("__builtin_sysv_va_start", fnvoid_va_start_sysv, 23589 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_sysv); 23590 add_builtin_function ("__builtin_sysv_va_end", fnvoid_va_end_sysv, 23591 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_sysv); 23592 add_builtin_function ("__builtin_sysv_va_copy", fnvoid_va_copy_sysv, 23593 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_sysv); 23594 } 23595 23596 static void 23597 ix86_init_builtins (void) 23598 { 23599 tree float128_type_node = make_node (REAL_TYPE); 23600 tree ftype, decl; 23601 23602 /* The __float80 type. */ 23603 if (TYPE_MODE (long_double_type_node) == XFmode) 23604 (*lang_hooks.types.register_builtin_type) (long_double_type_node, 23605 "__float80"); 23606 else 23607 { 23608 /* The __float80 type. */ 23609 tree float80_type_node = make_node (REAL_TYPE); 23610 23611 TYPE_PRECISION (float80_type_node) = 80; 23612 layout_type (float80_type_node); 23613 (*lang_hooks.types.register_builtin_type) (float80_type_node, 23614 "__float80"); 23615 } 23616 23617 /* The __float128 type. */ 23618 TYPE_PRECISION (float128_type_node) = 128; 23619 layout_type (float128_type_node); 23620 (*lang_hooks.types.register_builtin_type) (float128_type_node, 23621 "__float128"); 23622 23623 /* TFmode support builtins. */ 23624 ftype = build_function_type (float128_type_node, void_list_node); 23625 decl = add_builtin_function ("__builtin_infq", ftype, 23626 IX86_BUILTIN_INFQ, BUILT_IN_MD, 23627 NULL, NULL_TREE); 23628 ix86_builtins[(int) IX86_BUILTIN_INFQ] = decl; 23629 23630 /* We will expand them to normal call if SSE2 isn't available since 23631 they are used by libgcc. */ 23632 ftype = build_function_type_list (float128_type_node, 23633 float128_type_node, 23634 NULL_TREE); 23635 decl = add_builtin_function ("__builtin_fabsq", ftype, 23636 IX86_BUILTIN_FABSQ, BUILT_IN_MD, 23637 "__fabstf2", NULL_TREE); 23638 ix86_builtins[(int) IX86_BUILTIN_FABSQ] = decl; 23639 TREE_READONLY (decl) = 1; 23640 23641 ftype = build_function_type_list (float128_type_node, 23642 float128_type_node, 23643 float128_type_node, 23644 NULL_TREE); 23645 decl = add_builtin_function ("__builtin_copysignq", ftype, 23646 IX86_BUILTIN_COPYSIGNQ, BUILT_IN_MD, 23647 "__copysigntf3", NULL_TREE); 23648 ix86_builtins[(int) IX86_BUILTIN_COPYSIGNQ] = decl; 23649 TREE_READONLY (decl) = 1; 23650 23651 ix86_init_mmx_sse_builtins (); 23652 if (TARGET_64BIT) 23653 ix86_init_builtins_va_builtins_abi (); 23654 } 23655 23656 /* Errors in the source file can cause expand_expr to return const0_rtx 23657 where we expect a vector. To avoid crashing, use one of the vector 23658 clear instructions. */ 23659 static rtx 23660 safe_vector_operand (rtx x, enum machine_mode mode) 23661 { 23662 if (x == const0_rtx) 23663 x = CONST0_RTX (mode); 23664 return x; 23665 } 23666 23667 /* Subroutine of ix86_expand_builtin to take care of binop insns. */ 23668 23669 static rtx 23670 ix86_expand_binop_builtin (enum insn_code icode, tree exp, rtx target) 23671 { 23672 rtx pat; 23673 tree arg0 = CALL_EXPR_ARG (exp, 0); 23674 tree arg1 = CALL_EXPR_ARG (exp, 1); 23675 rtx op0 = expand_normal (arg0); 23676 rtx op1 = expand_normal (arg1); 23677 enum machine_mode tmode = insn_data[icode].operand[0].mode; 23678 enum machine_mode mode0 = insn_data[icode].operand[1].mode; 23679 enum machine_mode mode1 = insn_data[icode].operand[2].mode; 23680 23681 if (VECTOR_MODE_P (mode0)) 23682 op0 = safe_vector_operand (op0, mode0); 23683 if (VECTOR_MODE_P (mode1)) 23684 op1 = safe_vector_operand (op1, mode1); 23685 23686 if (optimize || !target 23687 || GET_MODE (target) != tmode 23688 || ! (*insn_data[icode].operand[0].predicate) (target, tmode)) 23689 target = gen_reg_rtx (tmode); 23690 23691 if (GET_MODE (op1) == SImode && mode1 == TImode) 23692 { 23693 rtx x = gen_reg_rtx (V4SImode); 23694 emit_insn (gen_sse2_loadd (x, op1)); 23695 op1 = gen_lowpart (TImode, x); 23696 } 23697 23698 if (!(*insn_data[icode].operand[1].predicate) (op0, mode0)) 23699 op0 = copy_to_mode_reg (mode0, op0); 23700 if (!(*insn_data[icode].operand[2].predicate) (op1, mode1)) 23701 op1 = copy_to_mode_reg (mode1, op1); 23702 23703 pat = GEN_FCN (icode) (target, op0, op1); 23704 if (! pat) 23705 return 0; 23706 23707 emit_insn (pat); 23708 23709 return target; 23710 } 23711 23712 /* Subroutine of ix86_expand_builtin to take care of 2-4 argument insns. */ 23713 23714 static rtx 23715 ix86_expand_multi_arg_builtin (enum insn_code icode, tree exp, rtx target, 23716 enum multi_arg_type m_type, 23717 enum insn_code sub_code) 23718 { 23719 rtx pat; 23720 int i; 23721 int nargs; 23722 bool comparison_p = false; 23723 bool tf_p = false; 23724 bool last_arg_constant = false; 23725 int num_memory = 0; 23726 struct { 23727 rtx op; 23728 enum machine_mode mode; 23729 } args[4]; 23730 23731 enum machine_mode tmode = insn_data[icode].operand[0].mode; 23732 23733 switch (m_type) 23734 { 23735 case MULTI_ARG_3_SF: 23736 case MULTI_ARG_3_DF: 23737 case MULTI_ARG_3_DI: 23738 case MULTI_ARG_3_SI: 23739 case MULTI_ARG_3_SI_DI: 23740 case MULTI_ARG_3_HI: 23741 case MULTI_ARG_3_HI_SI: 23742 case MULTI_ARG_3_QI: 23743 case MULTI_ARG_3_PERMPS: 23744 case MULTI_ARG_3_PERMPD: 23745 nargs = 3; 23746 break; 23747 23748 case MULTI_ARG_2_SF: 23749 case MULTI_ARG_2_DF: 23750 case MULTI_ARG_2_DI: 23751 case MULTI_ARG_2_SI: 23752 case MULTI_ARG_2_HI: 23753 case MULTI_ARG_2_QI: 23754 nargs = 2; 23755 break; 23756 23757 case MULTI_ARG_2_DI_IMM: 23758 case MULTI_ARG_2_SI_IMM: 23759 case MULTI_ARG_2_HI_IMM: 23760 case MULTI_ARG_2_QI_IMM: 23761 nargs = 2; 23762 last_arg_constant = true; 23763 break; 23764 23765 case MULTI_ARG_1_SF: 23766 case MULTI_ARG_1_DF: 23767 case MULTI_ARG_1_DI: 23768 case MULTI_ARG_1_SI: 23769 case MULTI_ARG_1_HI: 23770 case MULTI_ARG_1_QI: 23771 case MULTI_ARG_1_SI_DI: 23772 case MULTI_ARG_1_HI_DI: 23773 case MULTI_ARG_1_HI_SI: 23774 case MULTI_ARG_1_QI_DI: 23775 case MULTI_ARG_1_QI_SI: 23776 case MULTI_ARG_1_QI_HI: 23777 case MULTI_ARG_1_PH2PS: 23778 case MULTI_ARG_1_PS2PH: 23779 nargs = 1; 23780 break; 23781 23782 case MULTI_ARG_2_SF_CMP: 23783 case MULTI_ARG_2_DF_CMP: 23784 case MULTI_ARG_2_DI_CMP: 23785 case MULTI_ARG_2_SI_CMP: 23786 case MULTI_ARG_2_HI_CMP: 23787 case MULTI_ARG_2_QI_CMP: 23788 nargs = 2; 23789 comparison_p = true; 23790 break; 23791 23792 case MULTI_ARG_2_SF_TF: 23793 case MULTI_ARG_2_DF_TF: 23794 case MULTI_ARG_2_DI_TF: 23795 case MULTI_ARG_2_SI_TF: 23796 case MULTI_ARG_2_HI_TF: 23797 case MULTI_ARG_2_QI_TF: 23798 nargs = 2; 23799 tf_p = true; 23800 break; 23801 23802 case MULTI_ARG_UNKNOWN: 23803 default: 23804 gcc_unreachable (); 23805 } 23806 23807 if (optimize || !target 23808 || GET_MODE (target) != tmode 23809 || ! (*insn_data[icode].operand[0].predicate) (target, tmode)) 23810 target = gen_reg_rtx (tmode); 23811 23812 gcc_assert (nargs <= 4); 23813 23814 for (i = 0; i < nargs; i++) 23815 { 23816 tree arg = CALL_EXPR_ARG (exp, i); 23817 rtx op = expand_normal (arg); 23818 int adjust = (comparison_p) ? 1 : 0; 23819 enum machine_mode mode = insn_data[icode].operand[i+adjust+1].mode; 23820 23821 if (last_arg_constant && i == nargs-1) 23822 { 23823 if (GET_CODE (op) != CONST_INT) 23824 { 23825 error ("last argument must be an immediate"); 23826 return gen_reg_rtx (tmode); 23827 } 23828 } 23829 else 23830 { 23831 if (VECTOR_MODE_P (mode)) 23832 op = safe_vector_operand (op, mode); 23833 23834 /* If we aren't optimizing, only allow one memory operand to be 23835 generated. */ 23836 if (memory_operand (op, mode)) 23837 num_memory++; 23838 23839 gcc_assert (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode); 23840 23841 if (optimize 23842 || ! (*insn_data[icode].operand[i+adjust+1].predicate) (op, mode) 23843 || num_memory > 1) 23844 op = force_reg (mode, op); 23845 } 23846 23847 args[i].op = op; 23848 args[i].mode = mode; 23849 } 23850 23851 switch (nargs) 23852 { 23853 case 1: 23854 pat = GEN_FCN (icode) (target, args[0].op); 23855 break; 23856 23857 case 2: 23858 if (tf_p) 23859 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, 23860 GEN_INT ((int)sub_code)); 23861 else if (! comparison_p) 23862 pat = GEN_FCN (icode) (target, args[0].op, args[1].op); 23863 else 23864 { 23865 rtx cmp_op = gen_rtx_fmt_ee (sub_code, GET_MODE (target), 23866 args[0].op, 23867 args[1].op); 23868 23869 pat = GEN_FCN (icode) (target, cmp_op, args[0].op, args[1].op); 23870 } 23871 break; 23872 23873 case 3: 23874 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op); 23875 break; 23876 23877 default: 23878 gcc_unreachable (); 23879 } 23880 23881 if (! pat) 23882 return 0; 23883 23884 emit_insn (pat); 23885 return target; 23886 } 23887 23888 /* Subroutine of ix86_expand_args_builtin to take care of scalar unop 23889 insns with vec_merge. */ 23890 23891 static rtx 23892 ix86_expand_unop_vec_merge_builtin (enum insn_code icode, tree exp, 23893 rtx target) 23894 { 23895 rtx pat; 23896 tree arg0 = CALL_EXPR_ARG (exp, 0); 23897 rtx op1, op0 = expand_normal (arg0); 23898 enum machine_mode tmode = insn_data[icode].operand[0].mode; 23899 enum machine_mode mode0 = insn_data[icode].operand[1].mode; 23900 23901 if (optimize || !target 23902 || GET_MODE (target) != tmode 23903 || ! (*insn_data[icode].operand[0].predicate) (target, tmode)) 23904 target = gen_reg_rtx (tmode); 23905 23906 if (VECTOR_MODE_P (mode0)) 23907 op0 = safe_vector_operand (op0, mode0); 23908 23909 if ((optimize && !register_operand (op0, mode0)) 23910 || ! (*insn_data[icode].operand[1].predicate) (op0, mode0)) 23911 op0 = copy_to_mode_reg (mode0, op0); 23912 23913 op1 = op0; 23914 if (! (*insn_data[icode].operand[2].predicate) (op1, mode0)) 23915 op1 = copy_to_mode_reg (mode0, op1); 23916 23917 pat = GEN_FCN (icode) (target, op0, op1); 23918 if (! pat) 23919 return 0; 23920 emit_insn (pat); 23921 return target; 23922 } 23923 23924 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */ 23925 23926 static rtx 23927 ix86_expand_sse_compare (const struct builtin_description *d, 23928 tree exp, rtx target, bool swap) 23929 { 23930 rtx pat; 23931 tree arg0 = CALL_EXPR_ARG (exp, 0); 23932 tree arg1 = CALL_EXPR_ARG (exp, 1); 23933 rtx op0 = expand_normal (arg0); 23934 rtx op1 = expand_normal (arg1); 23935 rtx op2; 23936 enum machine_mode tmode = insn_data[d->icode].operand[0].mode; 23937 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode; 23938 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode; 23939 enum rtx_code comparison = d->comparison; 23940 23941 if (VECTOR_MODE_P (mode0)) 23942 op0 = safe_vector_operand (op0, mode0); 23943 if (VECTOR_MODE_P (mode1)) 23944 op1 = safe_vector_operand (op1, mode1); 23945 23946 /* Swap operands if we have a comparison that isn't available in 23947 hardware. */ 23948 if (swap) 23949 { 23950 rtx tmp = gen_reg_rtx (mode1); 23951 emit_move_insn (tmp, op1); 23952 op1 = op0; 23953 op0 = tmp; 23954 } 23955 23956 if (optimize || !target 23957 || GET_MODE (target) != tmode 23958 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode)) 23959 target = gen_reg_rtx (tmode); 23960 23961 if ((optimize && !register_operand (op0, mode0)) 23962 || ! (*insn_data[d->icode].operand[1].predicate) (op0, mode0)) 23963 op0 = copy_to_mode_reg (mode0, op0); 23964 if ((optimize && !register_operand (op1, mode1)) 23965 || ! (*insn_data[d->icode].operand[2].predicate) (op1, mode1)) 23966 op1 = copy_to_mode_reg (mode1, op1); 23967 23968 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1); 23969 pat = GEN_FCN (d->icode) (target, op0, op1, op2); 23970 if (! pat) 23971 return 0; 23972 emit_insn (pat); 23973 return target; 23974 } 23975 23976 /* Subroutine of ix86_expand_builtin to take care of comi insns. */ 23977 23978 static rtx 23979 ix86_expand_sse_comi (const struct builtin_description *d, tree exp, 23980 rtx target) 23981 { 23982 rtx pat; 23983 tree arg0 = CALL_EXPR_ARG (exp, 0); 23984 tree arg1 = CALL_EXPR_ARG (exp, 1); 23985 rtx op0 = expand_normal (arg0); 23986 rtx op1 = expand_normal (arg1); 23987 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode; 23988 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode; 23989 enum rtx_code comparison = d->comparison; 23990 23991 if (VECTOR_MODE_P (mode0)) 23992 op0 = safe_vector_operand (op0, mode0); 23993 if (VECTOR_MODE_P (mode1)) 23994 op1 = safe_vector_operand (op1, mode1); 23995 23996 /* Swap operands if we have a comparison that isn't available in 23997 hardware. */ 23998 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS) 23999 { 24000 rtx tmp = op1; 24001 op1 = op0; 24002 op0 = tmp; 24003 } 24004 24005 target = gen_reg_rtx (SImode); 24006 emit_move_insn (target, const0_rtx); 24007 target = gen_rtx_SUBREG (QImode, target, 0); 24008 24009 if ((optimize && !register_operand (op0, mode0)) 24010 || !(*insn_data[d->icode].operand[0].predicate) (op0, mode0)) 24011 op0 = copy_to_mode_reg (mode0, op0); 24012 if ((optimize && !register_operand (op1, mode1)) 24013 || !(*insn_data[d->icode].operand[1].predicate) (op1, mode1)) 24014 op1 = copy_to_mode_reg (mode1, op1); 24015 24016 pat = GEN_FCN (d->icode) (op0, op1); 24017 if (! pat) 24018 return 0; 24019 emit_insn (pat); 24020 emit_insn (gen_rtx_SET (VOIDmode, 24021 gen_rtx_STRICT_LOW_PART (VOIDmode, target), 24022 gen_rtx_fmt_ee (comparison, QImode, 24023 SET_DEST (pat), 24024 const0_rtx))); 24025 24026 return SUBREG_REG (target); 24027 } 24028 24029 /* Subroutine of ix86_expand_builtin to take care of ptest insns. */ 24030 24031 static rtx 24032 ix86_expand_sse_ptest (const struct builtin_description *d, tree exp, 24033 rtx target) 24034 { 24035 rtx pat; 24036 tree arg0 = CALL_EXPR_ARG (exp, 0); 24037 tree arg1 = CALL_EXPR_ARG (exp, 1); 24038 rtx op0 = expand_normal (arg0); 24039 rtx op1 = expand_normal (arg1); 24040 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode; 24041 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode; 24042 enum rtx_code comparison = d->comparison; 24043 24044 if (VECTOR_MODE_P (mode0)) 24045 op0 = safe_vector_operand (op0, mode0); 24046 if (VECTOR_MODE_P (mode1)) 24047 op1 = safe_vector_operand (op1, mode1); 24048 24049 target = gen_reg_rtx (SImode); 24050 emit_move_insn (target, const0_rtx); 24051 target = gen_rtx_SUBREG (QImode, target, 0); 24052 24053 if ((optimize && !register_operand (op0, mode0)) 24054 || !(*insn_data[d->icode].operand[0].predicate) (op0, mode0)) 24055 op0 = copy_to_mode_reg (mode0, op0); 24056 if ((optimize && !register_operand (op1, mode1)) 24057 || !(*insn_data[d->icode].operand[1].predicate) (op1, mode1)) 24058 op1 = copy_to_mode_reg (mode1, op1); 24059 24060 pat = GEN_FCN (d->icode) (op0, op1); 24061 if (! pat) 24062 return 0; 24063 emit_insn (pat); 24064 emit_insn (gen_rtx_SET (VOIDmode, 24065 gen_rtx_STRICT_LOW_PART (VOIDmode, target), 24066 gen_rtx_fmt_ee (comparison, QImode, 24067 SET_DEST (pat), 24068 const0_rtx))); 24069 24070 return SUBREG_REG (target); 24071 } 24072 24073 /* Subroutine of ix86_expand_builtin to take care of pcmpestr[im] insns. */ 24074 24075 static rtx 24076 ix86_expand_sse_pcmpestr (const struct builtin_description *d, 24077 tree exp, rtx target) 24078 { 24079 rtx pat; 24080 tree arg0 = CALL_EXPR_ARG (exp, 0); 24081 tree arg1 = CALL_EXPR_ARG (exp, 1); 24082 tree arg2 = CALL_EXPR_ARG (exp, 2); 24083 tree arg3 = CALL_EXPR_ARG (exp, 3); 24084 tree arg4 = CALL_EXPR_ARG (exp, 4); 24085 rtx scratch0, scratch1; 24086 rtx op0 = expand_normal (arg0); 24087 rtx op1 = expand_normal (arg1); 24088 rtx op2 = expand_normal (arg2); 24089 rtx op3 = expand_normal (arg3); 24090 rtx op4 = expand_normal (arg4); 24091 enum machine_mode tmode0, tmode1, modev2, modei3, modev4, modei5, modeimm; 24092 24093 tmode0 = insn_data[d->icode].operand[0].mode; 24094 tmode1 = insn_data[d->icode].operand[1].mode; 24095 modev2 = insn_data[d->icode].operand[2].mode; 24096 modei3 = insn_data[d->icode].operand[3].mode; 24097 modev4 = insn_data[d->icode].operand[4].mode; 24098 modei5 = insn_data[d->icode].operand[5].mode; 24099 modeimm = insn_data[d->icode].operand[6].mode; 24100 24101 if (VECTOR_MODE_P (modev2)) 24102 op0 = safe_vector_operand (op0, modev2); 24103 if (VECTOR_MODE_P (modev4)) 24104 op2 = safe_vector_operand (op2, modev4); 24105 24106 if (! (*insn_data[d->icode].operand[2].predicate) (op0, modev2)) 24107 op0 = copy_to_mode_reg (modev2, op0); 24108 if (! (*insn_data[d->icode].operand[3].predicate) (op1, modei3)) 24109 op1 = copy_to_mode_reg (modei3, op1); 24110 if ((optimize && !register_operand (op2, modev4)) 24111 || !(*insn_data[d->icode].operand[4].predicate) (op2, modev4)) 24112 op2 = copy_to_mode_reg (modev4, op2); 24113 if (! (*insn_data[d->icode].operand[5].predicate) (op3, modei5)) 24114 op3 = copy_to_mode_reg (modei5, op3); 24115 24116 if (! (*insn_data[d->icode].operand[6].predicate) (op4, modeimm)) 24117 { 24118 error ("the fifth argument must be a 8-bit immediate"); 24119 return const0_rtx; 24120 } 24121 24122 if (d->code == IX86_BUILTIN_PCMPESTRI128) 24123 { 24124 if (optimize || !target 24125 || GET_MODE (target) != tmode0 24126 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode0)) 24127 target = gen_reg_rtx (tmode0); 24128 24129 scratch1 = gen_reg_rtx (tmode1); 24130 24131 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2, op3, op4); 24132 } 24133 else if (d->code == IX86_BUILTIN_PCMPESTRM128) 24134 { 24135 if (optimize || !target 24136 || GET_MODE (target) != tmode1 24137 || ! (*insn_data[d->icode].operand[1].predicate) (target, tmode1)) 24138 target = gen_reg_rtx (tmode1); 24139 24140 scratch0 = gen_reg_rtx (tmode0); 24141 24142 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2, op3, op4); 24143 } 24144 else 24145 { 24146 gcc_assert (d->flag); 24147 24148 scratch0 = gen_reg_rtx (tmode0); 24149 scratch1 = gen_reg_rtx (tmode1); 24150 24151 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2, op3, op4); 24152 } 24153 24154 if (! pat) 24155 return 0; 24156 24157 emit_insn (pat); 24158 24159 if (d->flag) 24160 { 24161 target = gen_reg_rtx (SImode); 24162 emit_move_insn (target, const0_rtx); 24163 target = gen_rtx_SUBREG (QImode, target, 0); 24164 24165 emit_insn 24166 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target), 24167 gen_rtx_fmt_ee (EQ, QImode, 24168 gen_rtx_REG ((enum machine_mode) d->flag, 24169 FLAGS_REG), 24170 const0_rtx))); 24171 return SUBREG_REG (target); 24172 } 24173 else 24174 return target; 24175 } 24176 24177 24178 /* Subroutine of ix86_expand_builtin to take care of pcmpistr[im] insns. */ 24179 24180 static rtx 24181 ix86_expand_sse_pcmpistr (const struct builtin_description *d, 24182 tree exp, rtx target) 24183 { 24184 rtx pat; 24185 tree arg0 = CALL_EXPR_ARG (exp, 0); 24186 tree arg1 = CALL_EXPR_ARG (exp, 1); 24187 tree arg2 = CALL_EXPR_ARG (exp, 2); 24188 rtx scratch0, scratch1; 24189 rtx op0 = expand_normal (arg0); 24190 rtx op1 = expand_normal (arg1); 24191 rtx op2 = expand_normal (arg2); 24192 enum machine_mode tmode0, tmode1, modev2, modev3, modeimm; 24193 24194 tmode0 = insn_data[d->icode].operand[0].mode; 24195 tmode1 = insn_data[d->icode].operand[1].mode; 24196 modev2 = insn_data[d->icode].operand[2].mode; 24197 modev3 = insn_data[d->icode].operand[3].mode; 24198 modeimm = insn_data[d->icode].operand[4].mode; 24199 24200 if (VECTOR_MODE_P (modev2)) 24201 op0 = safe_vector_operand (op0, modev2); 24202 if (VECTOR_MODE_P (modev3)) 24203 op1 = safe_vector_operand (op1, modev3); 24204 24205 if (! (*insn_data[d->icode].operand[2].predicate) (op0, modev2)) 24206 op0 = copy_to_mode_reg (modev2, op0); 24207 if ((optimize && !register_operand (op1, modev3)) 24208 || !(*insn_data[d->icode].operand[3].predicate) (op1, modev3)) 24209 op1 = copy_to_mode_reg (modev3, op1); 24210 24211 if (! (*insn_data[d->icode].operand[4].predicate) (op2, modeimm)) 24212 { 24213 error ("the third argument must be a 8-bit immediate"); 24214 return const0_rtx; 24215 } 24216 24217 if (d->code == IX86_BUILTIN_PCMPISTRI128) 24218 { 24219 if (optimize || !target 24220 || GET_MODE (target) != tmode0 24221 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode0)) 24222 target = gen_reg_rtx (tmode0); 24223 24224 scratch1 = gen_reg_rtx (tmode1); 24225 24226 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2); 24227 } 24228 else if (d->code == IX86_BUILTIN_PCMPISTRM128) 24229 { 24230 if (optimize || !target 24231 || GET_MODE (target) != tmode1 24232 || ! (*insn_data[d->icode].operand[1].predicate) (target, tmode1)) 24233 target = gen_reg_rtx (tmode1); 24234 24235 scratch0 = gen_reg_rtx (tmode0); 24236 24237 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2); 24238 } 24239 else 24240 { 24241 gcc_assert (d->flag); 24242 24243 scratch0 = gen_reg_rtx (tmode0); 24244 scratch1 = gen_reg_rtx (tmode1); 24245 24246 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2); 24247 } 24248 24249 if (! pat) 24250 return 0; 24251 24252 emit_insn (pat); 24253 24254 if (d->flag) 24255 { 24256 target = gen_reg_rtx (SImode); 24257 emit_move_insn (target, const0_rtx); 24258 target = gen_rtx_SUBREG (QImode, target, 0); 24259 24260 emit_insn 24261 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target), 24262 gen_rtx_fmt_ee (EQ, QImode, 24263 gen_rtx_REG ((enum machine_mode) d->flag, 24264 FLAGS_REG), 24265 const0_rtx))); 24266 return SUBREG_REG (target); 24267 } 24268 else 24269 return target; 24270 } 24271 24272 /* Subroutine of ix86_expand_builtin to take care of insns with 24273 variable number of operands. */ 24274 24275 static rtx 24276 ix86_expand_args_builtin (const struct builtin_description *d, 24277 tree exp, rtx target) 24278 { 24279 rtx pat, real_target; 24280 unsigned int i, nargs; 24281 unsigned int nargs_constant = 0; 24282 int num_memory = 0; 24283 struct 24284 { 24285 rtx op; 24286 enum machine_mode mode; 24287 } args[4]; 24288 bool last_arg_count = false; 24289 enum insn_code icode = d->icode; 24290 const struct insn_data *insn_p = &insn_data[icode]; 24291 enum machine_mode tmode = insn_p->operand[0].mode; 24292 enum machine_mode rmode = VOIDmode; 24293 bool swap = false; 24294 enum rtx_code comparison = d->comparison; 24295 24296 switch ((enum ix86_builtin_type) d->flag) 24297 { 24298 case INT_FTYPE_V8SF_V8SF_PTEST: 24299 case INT_FTYPE_V4DI_V4DI_PTEST: 24300 case INT_FTYPE_V4DF_V4DF_PTEST: 24301 case INT_FTYPE_V4SF_V4SF_PTEST: 24302 case INT_FTYPE_V2DI_V2DI_PTEST: 24303 case INT_FTYPE_V2DF_V2DF_PTEST: 24304 return ix86_expand_sse_ptest (d, exp, target); 24305 case FLOAT128_FTYPE_FLOAT128: 24306 case FLOAT_FTYPE_FLOAT: 24307 case INT64_FTYPE_V4SF: 24308 case INT64_FTYPE_V2DF: 24309 case INT_FTYPE_V16QI: 24310 case INT_FTYPE_V8QI: 24311 case INT_FTYPE_V8SF: 24312 case INT_FTYPE_V4DF: 24313 case INT_FTYPE_V4SF: 24314 case INT_FTYPE_V2DF: 24315 case V16QI_FTYPE_V16QI: 24316 case V8SI_FTYPE_V8SF: 24317 case V8SI_FTYPE_V4SI: 24318 case V8HI_FTYPE_V8HI: 24319 case V8HI_FTYPE_V16QI: 24320 case V8QI_FTYPE_V8QI: 24321 case V8SF_FTYPE_V8SF: 24322 case V8SF_FTYPE_V8SI: 24323 case V8SF_FTYPE_V4SF: 24324 case V4SI_FTYPE_V4SI: 24325 case V4SI_FTYPE_V16QI: 24326 case V4SI_FTYPE_V4SF: 24327 case V4SI_FTYPE_V8SI: 24328 case V4SI_FTYPE_V8HI: 24329 case V4SI_FTYPE_V4DF: 24330 case V4SI_FTYPE_V2DF: 24331 case V4HI_FTYPE_V4HI: 24332 case V4DF_FTYPE_V4DF: 24333 case V4DF_FTYPE_V4SI: 24334 case V4DF_FTYPE_V4SF: 24335 case V4DF_FTYPE_V2DF: 24336 case V4SF_FTYPE_V4SF: 24337 case V4SF_FTYPE_V4SI: 24338 case V4SF_FTYPE_V8SF: 24339 case V4SF_FTYPE_V4DF: 24340 case V4SF_FTYPE_V2DF: 24341 case V2DI_FTYPE_V2DI: 24342 case V2DI_FTYPE_V16QI: 24343 case V2DI_FTYPE_V8HI: 24344 case V2DI_FTYPE_V4SI: 24345 case V2DF_FTYPE_V2DF: 24346 case V2DF_FTYPE_V4SI: 24347 case V2DF_FTYPE_V4DF: 24348 case V2DF_FTYPE_V4SF: 24349 case V2DF_FTYPE_V2SI: 24350 case V2SI_FTYPE_V2SI: 24351 case V2SI_FTYPE_V4SF: 24352 case V2SI_FTYPE_V2SF: 24353 case V2SI_FTYPE_V2DF: 24354 case V2SF_FTYPE_V2SF: 24355 case V2SF_FTYPE_V2SI: 24356 nargs = 1; 24357 break; 24358 case V4SF_FTYPE_V4SF_VEC_MERGE: 24359 case V2DF_FTYPE_V2DF_VEC_MERGE: 24360 return ix86_expand_unop_vec_merge_builtin (icode, exp, target); 24361 case FLOAT128_FTYPE_FLOAT128_FLOAT128: 24362 case V16QI_FTYPE_V16QI_V16QI: 24363 case V16QI_FTYPE_V8HI_V8HI: 24364 case V8QI_FTYPE_V8QI_V8QI: 24365 case V8QI_FTYPE_V4HI_V4HI: 24366 case V8HI_FTYPE_V8HI_V8HI: 24367 case V8HI_FTYPE_V16QI_V16QI: 24368 case V8HI_FTYPE_V4SI_V4SI: 24369 case V8SF_FTYPE_V8SF_V8SF: 24370 case V8SF_FTYPE_V8SF_V8SI: 24371 case V4SI_FTYPE_V4SI_V4SI: 24372 case V4SI_FTYPE_V8HI_V8HI: 24373 case V4SI_FTYPE_V4SF_V4SF: 24374 case V4SI_FTYPE_V2DF_V2DF: 24375 case V4HI_FTYPE_V4HI_V4HI: 24376 case V4HI_FTYPE_V8QI_V8QI: 24377 case V4HI_FTYPE_V2SI_V2SI: 24378 case V4DF_FTYPE_V4DF_V4DF: 24379 case V4DF_FTYPE_V4DF_V4DI: 24380 case V4SF_FTYPE_V4SF_V4SF: 24381 case V4SF_FTYPE_V4SF_V4SI: 24382 case V4SF_FTYPE_V4SF_V2SI: 24383 case V4SF_FTYPE_V4SF_V2DF: 24384 case V4SF_FTYPE_V4SF_DI: 24385 case V4SF_FTYPE_V4SF_SI: 24386 case V2DI_FTYPE_V2DI_V2DI: 24387 case V2DI_FTYPE_V16QI_V16QI: 24388 case V2DI_FTYPE_V4SI_V4SI: 24389 case V2DI_FTYPE_V2DI_V16QI: 24390 case V2DI_FTYPE_V2DF_V2DF: 24391 case V2SI_FTYPE_V2SI_V2SI: 24392 case V2SI_FTYPE_V4HI_V4HI: 24393 case V2SI_FTYPE_V2SF_V2SF: 24394 case V2DF_FTYPE_V2DF_V2DF: 24395 case V2DF_FTYPE_V2DF_V4SF: 24396 case V2DF_FTYPE_V2DF_V2DI: 24397 case V2DF_FTYPE_V2DF_DI: 24398 case V2DF_FTYPE_V2DF_SI: 24399 case V2SF_FTYPE_V2SF_V2SF: 24400 case V1DI_FTYPE_V1DI_V1DI: 24401 case V1DI_FTYPE_V8QI_V8QI: 24402 case V1DI_FTYPE_V2SI_V2SI: 24403 if (comparison == UNKNOWN) 24404 return ix86_expand_binop_builtin (icode, exp, target); 24405 nargs = 2; 24406 break; 24407 case V4SF_FTYPE_V4SF_V4SF_SWAP: 24408 case V2DF_FTYPE_V2DF_V2DF_SWAP: 24409 gcc_assert (comparison != UNKNOWN); 24410 nargs = 2; 24411 swap = true; 24412 break; 24413 case V8HI_FTYPE_V8HI_V8HI_COUNT: 24414 case V8HI_FTYPE_V8HI_SI_COUNT: 24415 case V4SI_FTYPE_V4SI_V4SI_COUNT: 24416 case V4SI_FTYPE_V4SI_SI_COUNT: 24417 case V4HI_FTYPE_V4HI_V4HI_COUNT: 24418 case V4HI_FTYPE_V4HI_SI_COUNT: 24419 case V2DI_FTYPE_V2DI_V2DI_COUNT: 24420 case V2DI_FTYPE_V2DI_SI_COUNT: 24421 case V2SI_FTYPE_V2SI_V2SI_COUNT: 24422 case V2SI_FTYPE_V2SI_SI_COUNT: 24423 case V1DI_FTYPE_V1DI_V1DI_COUNT: 24424 case V1DI_FTYPE_V1DI_SI_COUNT: 24425 nargs = 2; 24426 last_arg_count = true; 24427 break; 24428 case UINT64_FTYPE_UINT64_UINT64: 24429 case UINT_FTYPE_UINT_UINT: 24430 case UINT_FTYPE_UINT_USHORT: 24431 case UINT_FTYPE_UINT_UCHAR: 24432 nargs = 2; 24433 break; 24434 case V2DI2TI_FTYPE_V2DI_INT: 24435 nargs = 2; 24436 rmode = V2DImode; 24437 nargs_constant = 1; 24438 break; 24439 case V8HI_FTYPE_V8HI_INT: 24440 case V8SF_FTYPE_V8SF_INT: 24441 case V4SI_FTYPE_V4SI_INT: 24442 case V4SI_FTYPE_V8SI_INT: 24443 case V4HI_FTYPE_V4HI_INT: 24444 case V4DF_FTYPE_V4DF_INT: 24445 case V4SF_FTYPE_V4SF_INT: 24446 case V4SF_FTYPE_V8SF_INT: 24447 case V2DI_FTYPE_V2DI_INT: 24448 case V2DF_FTYPE_V2DF_INT: 24449 case V2DF_FTYPE_V4DF_INT: 24450 nargs = 2; 24451 nargs_constant = 1; 24452 break; 24453 case V16QI_FTYPE_V16QI_V16QI_V16QI: 24454 case V8SF_FTYPE_V8SF_V8SF_V8SF: 24455 case V4DF_FTYPE_V4DF_V4DF_V4DF: 24456 case V4SF_FTYPE_V4SF_V4SF_V4SF: 24457 case V2DF_FTYPE_V2DF_V2DF_V2DF: 24458 nargs = 3; 24459 break; 24460 case V16QI_FTYPE_V16QI_V16QI_INT: 24461 case V8HI_FTYPE_V8HI_V8HI_INT: 24462 case V8SI_FTYPE_V8SI_V8SI_INT: 24463 case V8SI_FTYPE_V8SI_V4SI_INT: 24464 case V8SF_FTYPE_V8SF_V8SF_INT: 24465 case V8SF_FTYPE_V8SF_V4SF_INT: 24466 case V4SI_FTYPE_V4SI_V4SI_INT: 24467 case V4DF_FTYPE_V4DF_V4DF_INT: 24468 case V4DF_FTYPE_V4DF_V2DF_INT: 24469 case V4SF_FTYPE_V4SF_V4SF_INT: 24470 case V2DI_FTYPE_V2DI_V2DI_INT: 24471 case V2DF_FTYPE_V2DF_V2DF_INT: 24472 nargs = 3; 24473 nargs_constant = 1; 24474 break; 24475 case V2DI2TI_FTYPE_V2DI_V2DI_INT: 24476 nargs = 3; 24477 rmode = V2DImode; 24478 nargs_constant = 1; 24479 break; 24480 case V1DI2DI_FTYPE_V1DI_V1DI_INT: 24481 nargs = 3; 24482 rmode = DImode; 24483 nargs_constant = 1; 24484 break; 24485 case V2DI_FTYPE_V2DI_UINT_UINT: 24486 nargs = 3; 24487 nargs_constant = 2; 24488 break; 24489 case V2DI_FTYPE_V2DI_V2DI_UINT_UINT: 24490 nargs = 4; 24491 nargs_constant = 2; 24492 break; 24493 default: 24494 gcc_unreachable (); 24495 } 24496 24497 gcc_assert (nargs <= ARRAY_SIZE (args)); 24498 24499 if (comparison != UNKNOWN) 24500 { 24501 gcc_assert (nargs == 2); 24502 return ix86_expand_sse_compare (d, exp, target, swap); 24503 } 24504 24505 if (rmode == VOIDmode || rmode == tmode) 24506 { 24507 if (optimize 24508 || target == 0 24509 || GET_MODE (target) != tmode 24510 || ! (*insn_p->operand[0].predicate) (target, tmode)) 24511 target = gen_reg_rtx (tmode); 24512 real_target = target; 24513 } 24514 else 24515 { 24516 target = gen_reg_rtx (rmode); 24517 real_target = simplify_gen_subreg (tmode, target, rmode, 0); 24518 } 24519 24520 for (i = 0; i < nargs; i++) 24521 { 24522 tree arg = CALL_EXPR_ARG (exp, i); 24523 rtx op = expand_normal (arg); 24524 enum machine_mode mode = insn_p->operand[i + 1].mode; 24525 bool match = (*insn_p->operand[i + 1].predicate) (op, mode); 24526 24527 if (last_arg_count && (i + 1) == nargs) 24528 { 24529 /* SIMD shift insns take either an 8-bit immediate or 24530 register as count. But builtin functions take int as 24531 count. If count doesn't match, we put it in register. */ 24532 if (!match) 24533 { 24534 op = simplify_gen_subreg (SImode, op, GET_MODE (op), 0); 24535 if (!(*insn_p->operand[i + 1].predicate) (op, mode)) 24536 op = copy_to_reg (op); 24537 } 24538 } 24539 else if ((nargs - i) <= nargs_constant) 24540 { 24541 if (!match) 24542 switch (icode) 24543 { 24544 case CODE_FOR_sse4_1_roundpd: 24545 case CODE_FOR_sse4_1_roundps: 24546 case CODE_FOR_sse4_1_roundsd: 24547 case CODE_FOR_sse4_1_roundss: 24548 case CODE_FOR_sse4_1_blendps: 24549 case CODE_FOR_avx_blendpd256: 24550 case CODE_FOR_avx_vpermilv4df: 24551 case CODE_FOR_avx_roundpd256: 24552 case CODE_FOR_avx_roundps256: 24553 error ("the last argument must be a 4-bit immediate"); 24554 return const0_rtx; 24555 24556 case CODE_FOR_sse4_1_blendpd: 24557 case CODE_FOR_avx_vpermilv2df: 24558 error ("the last argument must be a 2-bit immediate"); 24559 return const0_rtx; 24560 24561 case CODE_FOR_avx_vextractf128v4df: 24562 case CODE_FOR_avx_vextractf128v8sf: 24563 case CODE_FOR_avx_vextractf128v8si: 24564 case CODE_FOR_avx_vinsertf128v4df: 24565 case CODE_FOR_avx_vinsertf128v8sf: 24566 case CODE_FOR_avx_vinsertf128v8si: 24567 error ("the last argument must be a 1-bit immediate"); 24568 return const0_rtx; 24569 24570 case CODE_FOR_avx_cmpsdv2df3: 24571 case CODE_FOR_avx_cmpssv4sf3: 24572 case CODE_FOR_avx_cmppdv2df3: 24573 case CODE_FOR_avx_cmppsv4sf3: 24574 case CODE_FOR_avx_cmppdv4df3: 24575 case CODE_FOR_avx_cmppsv8sf3: 24576 error ("the last argument must be a 5-bit immediate"); 24577 return const0_rtx; 24578 24579 default: 24580 switch (nargs_constant) 24581 { 24582 case 2: 24583 if ((nargs - i) == nargs_constant) 24584 { 24585 error ("the next to last argument must be an 8-bit immediate"); 24586 break; 24587 } 24588 case 1: 24589 error ("the last argument must be an 8-bit immediate"); 24590 break; 24591 default: 24592 gcc_unreachable (); 24593 } 24594 return const0_rtx; 24595 } 24596 } 24597 else 24598 { 24599 if (VECTOR_MODE_P (mode)) 24600 op = safe_vector_operand (op, mode); 24601 24602 /* If we aren't optimizing, only allow one memory operand to 24603 be generated. */ 24604 if (memory_operand (op, mode)) 24605 num_memory++; 24606 24607 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode) 24608 { 24609 if (optimize || !match || num_memory > 1) 24610 op = copy_to_mode_reg (mode, op); 24611 } 24612 else 24613 { 24614 op = copy_to_reg (op); 24615 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0); 24616 } 24617 } 24618 24619 args[i].op = op; 24620 args[i].mode = mode; 24621 } 24622 24623 switch (nargs) 24624 { 24625 case 1: 24626 pat = GEN_FCN (icode) (real_target, args[0].op); 24627 break; 24628 case 2: 24629 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op); 24630 break; 24631 case 3: 24632 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op, 24633 args[2].op); 24634 break; 24635 case 4: 24636 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op, 24637 args[2].op, args[3].op); 24638 break; 24639 default: 24640 gcc_unreachable (); 24641 } 24642 24643 if (! pat) 24644 return 0; 24645 24646 emit_insn (pat); 24647 return target; 24648 } 24649 24650 /* Subroutine of ix86_expand_builtin to take care of special insns 24651 with variable number of operands. */ 24652 24653 static rtx 24654 ix86_expand_special_args_builtin (const struct builtin_description *d, 24655 tree exp, rtx target) 24656 { 24657 tree arg; 24658 rtx pat, op; 24659 unsigned int i, nargs, arg_adjust, memory; 24660 struct 24661 { 24662 rtx op; 24663 enum machine_mode mode; 24664 } args[2]; 24665 enum insn_code icode = d->icode; 24666 bool last_arg_constant = false; 24667 const struct insn_data *insn_p = &insn_data[icode]; 24668 enum machine_mode tmode = insn_p->operand[0].mode; 24669 enum { load, store } klass; 24670 24671 switch ((enum ix86_special_builtin_type) d->flag) 24672 { 24673 case VOID_FTYPE_VOID: 24674 emit_insn (GEN_FCN (icode) (target)); 24675 return 0; 24676 case V2DI_FTYPE_PV2DI: 24677 case V32QI_FTYPE_PCCHAR: 24678 case V16QI_FTYPE_PCCHAR: 24679 case V8SF_FTYPE_PCV4SF: 24680 case V8SF_FTYPE_PCFLOAT: 24681 case V4SF_FTYPE_PCFLOAT: 24682 case V4DF_FTYPE_PCV2DF: 24683 case V4DF_FTYPE_PCDOUBLE: 24684 case V2DF_FTYPE_PCDOUBLE: 24685 nargs = 1; 24686 klass = load; 24687 memory = 0; 24688 break; 24689 case VOID_FTYPE_PV2SF_V4SF: 24690 case VOID_FTYPE_PV4DI_V4DI: 24691 case VOID_FTYPE_PV2DI_V2DI: 24692 case VOID_FTYPE_PCHAR_V32QI: 24693 case VOID_FTYPE_PCHAR_V16QI: 24694 case VOID_FTYPE_PFLOAT_V8SF: 24695 case VOID_FTYPE_PFLOAT_V4SF: 24696 case VOID_FTYPE_PDOUBLE_V4DF: 24697 case VOID_FTYPE_PDOUBLE_V2DF: 24698 case VOID_FTYPE_PDI_DI: 24699 case VOID_FTYPE_PINT_INT: 24700 nargs = 1; 24701 klass = store; 24702 /* Reserve memory operand for target. */ 24703 memory = ARRAY_SIZE (args); 24704 break; 24705 case V4SF_FTYPE_V4SF_PCV2SF: 24706 case V2DF_FTYPE_V2DF_PCDOUBLE: 24707 nargs = 2; 24708 klass = load; 24709 memory = 1; 24710 break; 24711 case V8SF_FTYPE_PCV8SF_V8SF: 24712 case V4DF_FTYPE_PCV4DF_V4DF: 24713 case V4SF_FTYPE_PCV4SF_V4SF: 24714 case V2DF_FTYPE_PCV2DF_V2DF: 24715 nargs = 2; 24716 klass = load; 24717 memory = 0; 24718 break; 24719 case VOID_FTYPE_PV8SF_V8SF_V8SF: 24720 case VOID_FTYPE_PV4DF_V4DF_V4DF: 24721 case VOID_FTYPE_PV4SF_V4SF_V4SF: 24722 case VOID_FTYPE_PV2DF_V2DF_V2DF: 24723 nargs = 2; 24724 klass = store; 24725 /* Reserve memory operand for target. */ 24726 memory = ARRAY_SIZE (args); 24727 break; 24728 default: 24729 gcc_unreachable (); 24730 } 24731 24732 gcc_assert (nargs <= ARRAY_SIZE (args)); 24733 24734 if (klass == store) 24735 { 24736 arg = CALL_EXPR_ARG (exp, 0); 24737 op = expand_normal (arg); 24738 gcc_assert (target == 0); 24739 target = gen_rtx_MEM (tmode, copy_to_mode_reg (Pmode, op)); 24740 arg_adjust = 1; 24741 } 24742 else 24743 { 24744 arg_adjust = 0; 24745 if (optimize 24746 || target == 0 24747 || GET_MODE (target) != tmode 24748 || ! (*insn_p->operand[0].predicate) (target, tmode)) 24749 target = gen_reg_rtx (tmode); 24750 } 24751 24752 for (i = 0; i < nargs; i++) 24753 { 24754 enum machine_mode mode = insn_p->operand[i + 1].mode; 24755 bool match; 24756 24757 arg = CALL_EXPR_ARG (exp, i + arg_adjust); 24758 op = expand_normal (arg); 24759 match = (*insn_p->operand[i + 1].predicate) (op, mode); 24760 24761 if (last_arg_constant && (i + 1) == nargs) 24762 { 24763 if (!match) 24764 switch (icode) 24765 { 24766 default: 24767 error ("the last argument must be an 8-bit immediate"); 24768 return const0_rtx; 24769 } 24770 } 24771 else 24772 { 24773 if (i == memory) 24774 { 24775 /* This must be the memory operand. */ 24776 op = gen_rtx_MEM (mode, copy_to_mode_reg (Pmode, op)); 24777 gcc_assert (GET_MODE (op) == mode 24778 || GET_MODE (op) == VOIDmode); 24779 } 24780 else 24781 { 24782 /* This must be register. */ 24783 if (VECTOR_MODE_P (mode)) 24784 op = safe_vector_operand (op, mode); 24785 24786 gcc_assert (GET_MODE (op) == mode 24787 || GET_MODE (op) == VOIDmode); 24788 op = copy_to_mode_reg (mode, op); 24789 } 24790 } 24791 24792 args[i].op = op; 24793 args[i].mode = mode; 24794 } 24795 24796 switch (nargs) 24797 { 24798 case 1: 24799 pat = GEN_FCN (icode) (target, args[0].op); 24800 break; 24801 case 2: 24802 pat = GEN_FCN (icode) (target, args[0].op, args[1].op); 24803 break; 24804 default: 24805 gcc_unreachable (); 24806 } 24807 24808 if (! pat) 24809 return 0; 24810 emit_insn (pat); 24811 return klass == store ? 0 : target; 24812 } 24813 24814 /* Return the integer constant in ARG. Constrain it to be in the range 24815 of the subparts of VEC_TYPE; issue an error if not. */ 24816 24817 static int 24818 get_element_number (tree vec_type, tree arg) 24819 { 24820 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1; 24821 24822 if (!host_integerp (arg, 1) 24823 || (elt = tree_low_cst (arg, 1), elt > max)) 24824 { 24825 error ("selector must be an integer constant in the range 0..%wi", max); 24826 return 0; 24827 } 24828 24829 return elt; 24830 } 24831 24832 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around 24833 ix86_expand_vector_init. We DO have language-level syntax for this, in 24834 the form of (type){ init-list }. Except that since we can't place emms 24835 instructions from inside the compiler, we can't allow the use of MMX 24836 registers unless the user explicitly asks for it. So we do *not* define 24837 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead 24838 we have builtins invoked by mmintrin.h that gives us license to emit 24839 these sorts of instructions. */ 24840 24841 static rtx 24842 ix86_expand_vec_init_builtin (tree type, tree exp, rtx target) 24843 { 24844 enum machine_mode tmode = TYPE_MODE (type); 24845 enum machine_mode inner_mode = GET_MODE_INNER (tmode); 24846 int i, n_elt = GET_MODE_NUNITS (tmode); 24847 rtvec v = rtvec_alloc (n_elt); 24848 24849 gcc_assert (VECTOR_MODE_P (tmode)); 24850 gcc_assert (call_expr_nargs (exp) == n_elt); 24851 24852 for (i = 0; i < n_elt; ++i) 24853 { 24854 rtx x = expand_normal (CALL_EXPR_ARG (exp, i)); 24855 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x); 24856 } 24857 24858 if (!target || !register_operand (target, tmode)) 24859 target = gen_reg_rtx (tmode); 24860 24861 ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v)); 24862 return target; 24863 } 24864 24865 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around 24866 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we 24867 had a language-level syntax for referencing vector elements. */ 24868 24869 static rtx 24870 ix86_expand_vec_ext_builtin (tree exp, rtx target) 24871 { 24872 enum machine_mode tmode, mode0; 24873 tree arg0, arg1; 24874 int elt; 24875 rtx op0; 24876 24877 arg0 = CALL_EXPR_ARG (exp, 0); 24878 arg1 = CALL_EXPR_ARG (exp, 1); 24879 24880 op0 = expand_normal (arg0); 24881 elt = get_element_number (TREE_TYPE (arg0), arg1); 24882 24883 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0))); 24884 mode0 = TYPE_MODE (TREE_TYPE (arg0)); 24885 gcc_assert (VECTOR_MODE_P (mode0)); 24886 24887 op0 = force_reg (mode0, op0); 24888 24889 if (optimize || !target || !register_operand (target, tmode)) 24890 target = gen_reg_rtx (tmode); 24891 24892 ix86_expand_vector_extract (true, target, op0, elt); 24893 24894 return target; 24895 } 24896 24897 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around 24898 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had 24899 a language-level syntax for referencing vector elements. */ 24900 24901 static rtx 24902 ix86_expand_vec_set_builtin (tree exp) 24903 { 24904 enum machine_mode tmode, mode1; 24905 tree arg0, arg1, arg2; 24906 int elt; 24907 rtx op0, op1, target; 24908 24909 arg0 = CALL_EXPR_ARG (exp, 0); 24910 arg1 = CALL_EXPR_ARG (exp, 1); 24911 arg2 = CALL_EXPR_ARG (exp, 2); 24912 24913 tmode = TYPE_MODE (TREE_TYPE (arg0)); 24914 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0))); 24915 gcc_assert (VECTOR_MODE_P (tmode)); 24916 24917 op0 = expand_expr (arg0, NULL_RTX, tmode, EXPAND_NORMAL); 24918 op1 = expand_expr (arg1, NULL_RTX, mode1, EXPAND_NORMAL); 24919 elt = get_element_number (TREE_TYPE (arg0), arg2); 24920 24921 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode) 24922 op1 = convert_modes (mode1, GET_MODE (op1), op1, true); 24923 24924 op0 = force_reg (tmode, op0); 24925 op1 = force_reg (mode1, op1); 24926 24927 /* OP0 is the source of these builtin functions and shouldn't be 24928 modified. Create a copy, use it and return it as target. */ 24929 target = gen_reg_rtx (tmode); 24930 emit_move_insn (target, op0); 24931 ix86_expand_vector_set (true, target, op1, elt); 24932 24933 return target; 24934 } 24935 24936 /* Expand an expression EXP that calls a built-in function, 24937 with result going to TARGET if that's convenient 24938 (and in mode MODE if that's convenient). 24939 SUBTARGET may be used as the target for computing one of EXP's operands. 24940 IGNORE is nonzero if the value is to be ignored. */ 24941 24942 static rtx 24943 ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED, 24944 enum machine_mode mode ATTRIBUTE_UNUSED, 24945 int ignore ATTRIBUTE_UNUSED) 24946 { 24947 const struct builtin_description *d; 24948 size_t i; 24949 enum insn_code icode; 24950 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0); 24951 tree arg0, arg1, arg2; 24952 rtx op0, op1, op2, pat; 24953 enum machine_mode mode0, mode1, mode2; 24954 unsigned int fcode = DECL_FUNCTION_CODE (fndecl); 24955 24956 /* Determine whether the builtin function is available under the current ISA. 24957 Originally the builtin was not created if it wasn't applicable to the 24958 current ISA based on the command line switches. With function specific 24959 options, we need to check in the context of the function making the call 24960 whether it is supported. */ 24961 if (ix86_builtins_isa[fcode].isa 24962 && !(ix86_builtins_isa[fcode].isa & ix86_isa_flags)) 24963 { 24964 char *opts = ix86_target_string (ix86_builtins_isa[fcode].isa, 0, NULL, 24965 NULL, NULL, false); 24966 24967 if (!opts) 24968 error ("%qE needs unknown isa option", fndecl); 24969 else 24970 { 24971 gcc_assert (opts != NULL); 24972 error ("%qE needs isa option %s", fndecl, opts); 24973 free (opts); 24974 } 24975 return const0_rtx; 24976 } 24977 24978 switch (fcode) 24979 { 24980 case IX86_BUILTIN_MASKMOVQ: 24981 case IX86_BUILTIN_MASKMOVDQU: 24982 icode = (fcode == IX86_BUILTIN_MASKMOVQ 24983 ? CODE_FOR_mmx_maskmovq 24984 : CODE_FOR_sse2_maskmovdqu); 24985 /* Note the arg order is different from the operand order. */ 24986 arg1 = CALL_EXPR_ARG (exp, 0); 24987 arg2 = CALL_EXPR_ARG (exp, 1); 24988 arg0 = CALL_EXPR_ARG (exp, 2); 24989 op0 = expand_normal (arg0); 24990 op1 = expand_normal (arg1); 24991 op2 = expand_normal (arg2); 24992 mode0 = insn_data[icode].operand[0].mode; 24993 mode1 = insn_data[icode].operand[1].mode; 24994 mode2 = insn_data[icode].operand[2].mode; 24995 24996 op0 = force_reg (Pmode, op0); 24997 op0 = gen_rtx_MEM (mode1, op0); 24998 24999 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0)) 25000 op0 = copy_to_mode_reg (mode0, op0); 25001 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1)) 25002 op1 = copy_to_mode_reg (mode1, op1); 25003 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2)) 25004 op2 = copy_to_mode_reg (mode2, op2); 25005 pat = GEN_FCN (icode) (op0, op1, op2); 25006 if (! pat) 25007 return 0; 25008 emit_insn (pat); 25009 return 0; 25010 25011 case IX86_BUILTIN_LDMXCSR: 25012 op0 = expand_normal (CALL_EXPR_ARG (exp, 0)); 25013 target = assign_386_stack_local (SImode, SLOT_VIRTUAL); 25014 emit_move_insn (target, op0); 25015 emit_insn (gen_sse_ldmxcsr (target)); 25016 return 0; 25017 25018 case IX86_BUILTIN_STMXCSR: 25019 target = assign_386_stack_local (SImode, SLOT_VIRTUAL); 25020 emit_insn (gen_sse_stmxcsr (target)); 25021 return copy_to_mode_reg (SImode, target); 25022 25023 case IX86_BUILTIN_CLFLUSH: 25024 arg0 = CALL_EXPR_ARG (exp, 0); 25025 op0 = expand_normal (arg0); 25026 icode = CODE_FOR_sse2_clflush; 25027 if (! (*insn_data[icode].operand[0].predicate) (op0, Pmode)) 25028 op0 = copy_to_mode_reg (Pmode, op0); 25029 25030 emit_insn (gen_sse2_clflush (op0)); 25031 return 0; 25032 25033 case IX86_BUILTIN_MONITOR: 25034 arg0 = CALL_EXPR_ARG (exp, 0); 25035 arg1 = CALL_EXPR_ARG (exp, 1); 25036 arg2 = CALL_EXPR_ARG (exp, 2); 25037 op0 = expand_normal (arg0); 25038 op1 = expand_normal (arg1); 25039 op2 = expand_normal (arg2); 25040 if (!REG_P (op0)) 25041 op0 = copy_to_mode_reg (Pmode, op0); 25042 if (!REG_P (op1)) 25043 op1 = copy_to_mode_reg (SImode, op1); 25044 if (!REG_P (op2)) 25045 op2 = copy_to_mode_reg (SImode, op2); 25046 emit_insn ((*ix86_gen_monitor) (op0, op1, op2)); 25047 return 0; 25048 25049 case IX86_BUILTIN_MWAIT: 25050 arg0 = CALL_EXPR_ARG (exp, 0); 25051 arg1 = CALL_EXPR_ARG (exp, 1); 25052 op0 = expand_normal (arg0); 25053 op1 = expand_normal (arg1); 25054 if (!REG_P (op0)) 25055 op0 = copy_to_mode_reg (SImode, op0); 25056 if (!REG_P (op1)) 25057 op1 = copy_to_mode_reg (SImode, op1); 25058 emit_insn (gen_sse3_mwait (op0, op1)); 25059 return 0; 25060 25061 case IX86_BUILTIN_VEC_INIT_V2SI: 25062 case IX86_BUILTIN_VEC_INIT_V4HI: 25063 case IX86_BUILTIN_VEC_INIT_V8QI: 25064 return ix86_expand_vec_init_builtin (TREE_TYPE (exp), exp, target); 25065 25066 case IX86_BUILTIN_VEC_EXT_V2DF: 25067 case IX86_BUILTIN_VEC_EXT_V2DI: 25068 case IX86_BUILTIN_VEC_EXT_V4SF: 25069 case IX86_BUILTIN_VEC_EXT_V4SI: 25070 case IX86_BUILTIN_VEC_EXT_V8HI: 25071 case IX86_BUILTIN_VEC_EXT_V2SI: 25072 case IX86_BUILTIN_VEC_EXT_V4HI: 25073 case IX86_BUILTIN_VEC_EXT_V16QI: 25074 return ix86_expand_vec_ext_builtin (exp, target); 25075 25076 case IX86_BUILTIN_VEC_SET_V2DI: 25077 case IX86_BUILTIN_VEC_SET_V4SF: 25078 case IX86_BUILTIN_VEC_SET_V4SI: 25079 case IX86_BUILTIN_VEC_SET_V8HI: 25080 case IX86_BUILTIN_VEC_SET_V4HI: 25081 case IX86_BUILTIN_VEC_SET_V16QI: 25082 return ix86_expand_vec_set_builtin (exp); 25083 25084 case IX86_BUILTIN_INFQ: 25085 { 25086 REAL_VALUE_TYPE inf; 25087 rtx tmp; 25088 25089 real_inf (&inf); 25090 tmp = CONST_DOUBLE_FROM_REAL_VALUE (inf, mode); 25091 25092 tmp = validize_mem (force_const_mem (mode, tmp)); 25093 25094 if (target == 0) 25095 target = gen_reg_rtx (mode); 25096 25097 emit_move_insn (target, tmp); 25098 return target; 25099 } 25100 25101 default: 25102 break; 25103 } 25104 25105 for (i = 0, d = bdesc_special_args; 25106 i < ARRAY_SIZE (bdesc_special_args); 25107 i++, d++) 25108 if (d->code == fcode) 25109 return ix86_expand_special_args_builtin (d, exp, target); 25110 25111 for (i = 0, d = bdesc_args; 25112 i < ARRAY_SIZE (bdesc_args); 25113 i++, d++) 25114 if (d->code == fcode) 25115 switch (fcode) 25116 { 25117 case IX86_BUILTIN_FABSQ: 25118 case IX86_BUILTIN_COPYSIGNQ: 25119 if (!TARGET_SSE2) 25120 /* Emit a normal call if SSE2 isn't available. */ 25121 return expand_call (exp, target, ignore); 25122 default: 25123 return ix86_expand_args_builtin (d, exp, target); 25124 } 25125 25126 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++) 25127 if (d->code == fcode) 25128 return ix86_expand_sse_comi (d, exp, target); 25129 25130 for (i = 0, d = bdesc_pcmpestr; 25131 i < ARRAY_SIZE (bdesc_pcmpestr); 25132 i++, d++) 25133 if (d->code == fcode) 25134 return ix86_expand_sse_pcmpestr (d, exp, target); 25135 25136 for (i = 0, d = bdesc_pcmpistr; 25137 i < ARRAY_SIZE (bdesc_pcmpistr); 25138 i++, d++) 25139 if (d->code == fcode) 25140 return ix86_expand_sse_pcmpistr (d, exp, target); 25141 25142 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++) 25143 if (d->code == fcode) 25144 return ix86_expand_multi_arg_builtin (d->icode, exp, target, 25145 (enum multi_arg_type)d->flag, 25146 d->comparison); 25147 25148 gcc_unreachable (); 25149 } 25150 25151 /* Returns a function decl for a vectorized version of the builtin function 25152 with builtin function code FN and the result vector type TYPE, or NULL_TREE 25153 if it is not available. */ 25154 25155 static tree 25156 ix86_builtin_vectorized_function (unsigned int fn, tree type_out, 25157 tree type_in) 25158 { 25159 enum machine_mode in_mode, out_mode; 25160 int in_n, out_n; 25161 25162 if (TREE_CODE (type_out) != VECTOR_TYPE 25163 || TREE_CODE (type_in) != VECTOR_TYPE) 25164 return NULL_TREE; 25165 25166 out_mode = TYPE_MODE (TREE_TYPE (type_out)); 25167 out_n = TYPE_VECTOR_SUBPARTS (type_out); 25168 in_mode = TYPE_MODE (TREE_TYPE (type_in)); 25169 in_n = TYPE_VECTOR_SUBPARTS (type_in); 25170 25171 switch (fn) 25172 { 25173 case BUILT_IN_SQRT: 25174 if (out_mode == DFmode && out_n == 2 25175 && in_mode == DFmode && in_n == 2) 25176 return ix86_builtins[IX86_BUILTIN_SQRTPD]; 25177 break; 25178 25179 case BUILT_IN_SQRTF: 25180 if (out_mode == SFmode && out_n == 4 25181 && in_mode == SFmode && in_n == 4) 25182 return ix86_builtins[IX86_BUILTIN_SQRTPS_NR]; 25183 break; 25184 25185 case BUILT_IN_LRINT: 25186 if (out_mode == SImode && out_n == 4 25187 && in_mode == DFmode && in_n == 2) 25188 return ix86_builtins[IX86_BUILTIN_VEC_PACK_SFIX]; 25189 break; 25190 25191 case BUILT_IN_LRINTF: 25192 if (out_mode == SImode && out_n == 4 25193 && in_mode == SFmode && in_n == 4) 25194 return ix86_builtins[IX86_BUILTIN_CVTPS2DQ]; 25195 break; 25196 25197 default: 25198 ; 25199 } 25200 25201 /* Dispatch to a handler for a vectorization library. */ 25202 if (ix86_veclib_handler) 25203 return (*ix86_veclib_handler)(fn, type_out, type_in); 25204 25205 return NULL_TREE; 25206 } 25207 25208 /* Handler for an SVML-style interface to 25209 a library with vectorized intrinsics. */ 25210 25211 static tree 25212 ix86_veclibabi_svml (enum built_in_function fn, tree type_out, tree type_in) 25213 { 25214 char name[20]; 25215 tree fntype, new_fndecl, args; 25216 unsigned arity; 25217 const char *bname; 25218 enum machine_mode el_mode, in_mode; 25219 int n, in_n; 25220 25221 /* The SVML is suitable for unsafe math only. */ 25222 if (!flag_unsafe_math_optimizations) 25223 return NULL_TREE; 25224 25225 el_mode = TYPE_MODE (TREE_TYPE (type_out)); 25226 n = TYPE_VECTOR_SUBPARTS (type_out); 25227 in_mode = TYPE_MODE (TREE_TYPE (type_in)); 25228 in_n = TYPE_VECTOR_SUBPARTS (type_in); 25229 if (el_mode != in_mode 25230 || n != in_n) 25231 return NULL_TREE; 25232 25233 switch (fn) 25234 { 25235 case BUILT_IN_EXP: 25236 case BUILT_IN_LOG: 25237 case BUILT_IN_LOG10: 25238 case BUILT_IN_POW: 25239 case BUILT_IN_TANH: 25240 case BUILT_IN_TAN: 25241 case BUILT_IN_ATAN: 25242 case BUILT_IN_ATAN2: 25243 case BUILT_IN_ATANH: 25244 case BUILT_IN_CBRT: 25245 case BUILT_IN_SINH: 25246 case BUILT_IN_SIN: 25247 case BUILT_IN_ASINH: 25248 case BUILT_IN_ASIN: 25249 case BUILT_IN_COSH: 25250 case BUILT_IN_COS: 25251 case BUILT_IN_ACOSH: 25252 case BUILT_IN_ACOS: 25253 if (el_mode != DFmode || n != 2) 25254 return NULL_TREE; 25255 break; 25256 25257 case BUILT_IN_EXPF: 25258 case BUILT_IN_LOGF: 25259 case BUILT_IN_LOG10F: 25260 case BUILT_IN_POWF: 25261 case BUILT_IN_TANHF: 25262 case BUILT_IN_TANF: 25263 case BUILT_IN_ATANF: 25264 case BUILT_IN_ATAN2F: 25265 case BUILT_IN_ATANHF: 25266 case BUILT_IN_CBRTF: 25267 case BUILT_IN_SINHF: 25268 case BUILT_IN_SINF: 25269 case BUILT_IN_ASINHF: 25270 case BUILT_IN_ASINF: 25271 case BUILT_IN_COSHF: 25272 case BUILT_IN_COSF: 25273 case BUILT_IN_ACOSHF: 25274 case BUILT_IN_ACOSF: 25275 if (el_mode != SFmode || n != 4) 25276 return NULL_TREE; 25277 break; 25278 25279 default: 25280 return NULL_TREE; 25281 } 25282 25283 bname = IDENTIFIER_POINTER (DECL_NAME (implicit_built_in_decls[fn])); 25284 25285 if (fn == BUILT_IN_LOGF) 25286 strcpy (name, "vmlsLn4"); 25287 else if (fn == BUILT_IN_LOG) 25288 strcpy (name, "vmldLn2"); 25289 else if (n == 4) 25290 { 25291 sprintf (name, "vmls%s", bname+10); 25292 name[strlen (name)-1] = '4'; 25293 } 25294 else 25295 sprintf (name, "vmld%s2", bname+10); 25296 25297 /* Convert to uppercase. */ 25298 name[4] &= ~0x20; 25299 25300 arity = 0; 25301 for (args = DECL_ARGUMENTS (implicit_built_in_decls[fn]); args; 25302 args = TREE_CHAIN (args)) 25303 arity++; 25304 25305 if (arity == 1) 25306 fntype = build_function_type_list (type_out, type_in, NULL); 25307 else 25308 fntype = build_function_type_list (type_out, type_in, type_in, NULL); 25309 25310 /* Build a function declaration for the vectorized function. */ 25311 new_fndecl = build_decl (FUNCTION_DECL, get_identifier (name), fntype); 25312 TREE_PUBLIC (new_fndecl) = 1; 25313 DECL_EXTERNAL (new_fndecl) = 1; 25314 DECL_IS_NOVOPS (new_fndecl) = 1; 25315 TREE_READONLY (new_fndecl) = 1; 25316 25317 return new_fndecl; 25318 } 25319 25320 /* Handler for an ACML-style interface to 25321 a library with vectorized intrinsics. */ 25322 25323 static tree 25324 ix86_veclibabi_acml (enum built_in_function fn, tree type_out, tree type_in) 25325 { 25326 char name[20] = "__vr.._"; 25327 tree fntype, new_fndecl, args; 25328 unsigned arity; 25329 const char *bname; 25330 enum machine_mode el_mode, in_mode; 25331 int n, in_n; 25332 25333 /* The ACML is 64bits only and suitable for unsafe math only as 25334 it does not correctly support parts of IEEE with the required 25335 precision such as denormals. */ 25336 if (!TARGET_64BIT 25337 || !flag_unsafe_math_optimizations) 25338 return NULL_TREE; 25339 25340 el_mode = TYPE_MODE (TREE_TYPE (type_out)); 25341 n = TYPE_VECTOR_SUBPARTS (type_out); 25342 in_mode = TYPE_MODE (TREE_TYPE (type_in)); 25343 in_n = TYPE_VECTOR_SUBPARTS (type_in); 25344 if (el_mode != in_mode 25345 || n != in_n) 25346 return NULL_TREE; 25347 25348 switch (fn) 25349 { 25350 case BUILT_IN_SIN: 25351 case BUILT_IN_COS: 25352 case BUILT_IN_EXP: 25353 case BUILT_IN_LOG: 25354 case BUILT_IN_LOG2: 25355 case BUILT_IN_LOG10: 25356 name[4] = 'd'; 25357 name[5] = '2'; 25358 if (el_mode != DFmode 25359 || n != 2) 25360 return NULL_TREE; 25361 break; 25362 25363 case BUILT_IN_SINF: 25364 case BUILT_IN_COSF: 25365 case BUILT_IN_EXPF: 25366 case BUILT_IN_POWF: 25367 case BUILT_IN_LOGF: 25368 case BUILT_IN_LOG2F: 25369 case BUILT_IN_LOG10F: 25370 name[4] = 's'; 25371 name[5] = '4'; 25372 if (el_mode != SFmode 25373 || n != 4) 25374 return NULL_TREE; 25375 break; 25376 25377 default: 25378 return NULL_TREE; 25379 } 25380 25381 bname = IDENTIFIER_POINTER (DECL_NAME (implicit_built_in_decls[fn])); 25382 sprintf (name + 7, "%s", bname+10); 25383 25384 arity = 0; 25385 for (args = DECL_ARGUMENTS (implicit_built_in_decls[fn]); args; 25386 args = TREE_CHAIN (args)) 25387 arity++; 25388 25389 if (arity == 1) 25390 fntype = build_function_type_list (type_out, type_in, NULL); 25391 else 25392 fntype = build_function_type_list (type_out, type_in, type_in, NULL); 25393 25394 /* Build a function declaration for the vectorized function. */ 25395 new_fndecl = build_decl (FUNCTION_DECL, get_identifier (name), fntype); 25396 TREE_PUBLIC (new_fndecl) = 1; 25397 DECL_EXTERNAL (new_fndecl) = 1; 25398 DECL_IS_NOVOPS (new_fndecl) = 1; 25399 TREE_READONLY (new_fndecl) = 1; 25400 25401 return new_fndecl; 25402 } 25403 25404 25405 /* Returns a decl of a function that implements conversion of an integer vector 25406 into a floating-point vector, or vice-versa. TYPE is the type of the integer 25407 side of the conversion. 25408 Return NULL_TREE if it is not available. */ 25409 25410 static tree 25411 ix86_vectorize_builtin_conversion (unsigned int code, tree type) 25412 { 25413 if (!TARGET_SSE2 || TREE_CODE (type) != VECTOR_TYPE 25414 /* There are only conversions from/to signed integers. */ 25415 || TYPE_UNSIGNED (TREE_TYPE (type))) 25416 return NULL_TREE; 25417 25418 switch (code) 25419 { 25420 case FLOAT_EXPR: 25421 switch (TYPE_MODE (type)) 25422 { 25423 case V4SImode: 25424 return ix86_builtins[IX86_BUILTIN_CVTDQ2PS]; 25425 default: 25426 return NULL_TREE; 25427 } 25428 25429 case FIX_TRUNC_EXPR: 25430 switch (TYPE_MODE (type)) 25431 { 25432 case V4SImode: 25433 return ix86_builtins[IX86_BUILTIN_CVTTPS2DQ]; 25434 default: 25435 return NULL_TREE; 25436 } 25437 default: 25438 return NULL_TREE; 25439 25440 } 25441 } 25442 25443 /* Returns a code for a target-specific builtin that implements 25444 reciprocal of the function, or NULL_TREE if not available. */ 25445 25446 static tree 25447 ix86_builtin_reciprocal (unsigned int fn, bool md_fn, 25448 bool sqrt ATTRIBUTE_UNUSED) 25449 { 25450 if (! (TARGET_SSE_MATH && TARGET_RECIP && !optimize_insn_for_size_p () 25451 && flag_finite_math_only && !flag_trapping_math 25452 && flag_unsafe_math_optimizations)) 25453 return NULL_TREE; 25454 25455 if (md_fn) 25456 /* Machine dependent builtins. */ 25457 switch (fn) 25458 { 25459 /* Vectorized version of sqrt to rsqrt conversion. */ 25460 case IX86_BUILTIN_SQRTPS_NR: 25461 return ix86_builtins[IX86_BUILTIN_RSQRTPS_NR]; 25462 25463 default: 25464 return NULL_TREE; 25465 } 25466 else 25467 /* Normal builtins. */ 25468 switch (fn) 25469 { 25470 /* Sqrt to rsqrt conversion. */ 25471 case BUILT_IN_SQRTF: 25472 return ix86_builtins[IX86_BUILTIN_RSQRTF]; 25473 25474 default: 25475 return NULL_TREE; 25476 } 25477 } 25478 25479 /* Store OPERAND to the memory after reload is completed. This means 25480 that we can't easily use assign_stack_local. */ 25481 rtx 25482 ix86_force_to_memory (enum machine_mode mode, rtx operand) 25483 { 25484 rtx result; 25485 25486 gcc_assert (reload_completed); 25487 if (!TARGET_64BIT_MS_ABI && TARGET_RED_ZONE) 25488 { 25489 result = gen_rtx_MEM (mode, 25490 gen_rtx_PLUS (Pmode, 25491 stack_pointer_rtx, 25492 GEN_INT (-RED_ZONE_SIZE))); 25493 emit_move_insn (result, operand); 25494 } 25495 else if ((TARGET_64BIT_MS_ABI || !TARGET_RED_ZONE) && TARGET_64BIT) 25496 { 25497 switch (mode) 25498 { 25499 case HImode: 25500 case SImode: 25501 operand = gen_lowpart (DImode, operand); 25502 /* FALLTHRU */ 25503 case DImode: 25504 emit_insn ( 25505 gen_rtx_SET (VOIDmode, 25506 gen_rtx_MEM (DImode, 25507 gen_rtx_PRE_DEC (DImode, 25508 stack_pointer_rtx)), 25509 operand)); 25510 break; 25511 default: 25512 gcc_unreachable (); 25513 } 25514 result = gen_rtx_MEM (mode, stack_pointer_rtx); 25515 } 25516 else 25517 { 25518 switch (mode) 25519 { 25520 case DImode: 25521 { 25522 rtx operands[2]; 25523 split_di (&operand, 1, operands, operands + 1); 25524 emit_insn ( 25525 gen_rtx_SET (VOIDmode, 25526 gen_rtx_MEM (SImode, 25527 gen_rtx_PRE_DEC (Pmode, 25528 stack_pointer_rtx)), 25529 operands[1])); 25530 emit_insn ( 25531 gen_rtx_SET (VOIDmode, 25532 gen_rtx_MEM (SImode, 25533 gen_rtx_PRE_DEC (Pmode, 25534 stack_pointer_rtx)), 25535 operands[0])); 25536 } 25537 break; 25538 case HImode: 25539 /* Store HImodes as SImodes. */ 25540 operand = gen_lowpart (SImode, operand); 25541 /* FALLTHRU */ 25542 case SImode: 25543 emit_insn ( 25544 gen_rtx_SET (VOIDmode, 25545 gen_rtx_MEM (GET_MODE (operand), 25546 gen_rtx_PRE_DEC (SImode, 25547 stack_pointer_rtx)), 25548 operand)); 25549 break; 25550 default: 25551 gcc_unreachable (); 25552 } 25553 result = gen_rtx_MEM (mode, stack_pointer_rtx); 25554 } 25555 return result; 25556 } 25557 25558 /* Free operand from the memory. */ 25559 void 25560 ix86_free_from_memory (enum machine_mode mode) 25561 { 25562 if (!TARGET_RED_ZONE || TARGET_64BIT_MS_ABI) 25563 { 25564 int size; 25565 25566 if (mode == DImode || TARGET_64BIT) 25567 size = 8; 25568 else 25569 size = 4; 25570 /* Use LEA to deallocate stack space. In peephole2 it will be converted 25571 to pop or add instruction if registers are available. */ 25572 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, 25573 gen_rtx_PLUS (Pmode, stack_pointer_rtx, 25574 GEN_INT (size)))); 25575 } 25576 } 25577 25578 /* Put float CONST_DOUBLE in the constant pool instead of fp regs. 25579 QImode must go into class Q_REGS. 25580 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and 25581 movdf to do mem-to-mem moves through integer regs. */ 25582 enum reg_class 25583 ix86_preferred_reload_class (rtx x, enum reg_class regclass) 25584 { 25585 enum machine_mode mode = GET_MODE (x); 25586 25587 /* We're only allowed to return a subclass of CLASS. Many of the 25588 following checks fail for NO_REGS, so eliminate that early. */ 25589 if (regclass == NO_REGS) 25590 return NO_REGS; 25591 25592 /* All classes can load zeros. */ 25593 if (x == CONST0_RTX (mode)) 25594 return regclass; 25595 25596 /* Force constants into memory if we are loading a (nonzero) constant into 25597 an MMX or SSE register. This is because there are no MMX/SSE instructions 25598 to load from a constant. */ 25599 if (CONSTANT_P (x) 25600 && (MAYBE_MMX_CLASS_P (regclass) || MAYBE_SSE_CLASS_P (regclass))) 25601 return NO_REGS; 25602 25603 /* Prefer SSE regs only, if we can use them for math. */ 25604 if (TARGET_SSE_MATH && !TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (mode)) 25605 return SSE_CLASS_P (regclass) ? regclass : NO_REGS; 25606 25607 /* Floating-point constants need more complex checks. */ 25608 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode) 25609 { 25610 /* General regs can load everything. */ 25611 if (reg_class_subset_p (regclass, GENERAL_REGS)) 25612 return regclass; 25613 25614 /* Floats can load 0 and 1 plus some others. Note that we eliminated 25615 zero above. We only want to wind up preferring 80387 registers if 25616 we plan on doing computation with them. */ 25617 if (TARGET_80387 25618 && standard_80387_constant_p (x)) 25619 { 25620 /* Limit class to non-sse. */ 25621 if (regclass == FLOAT_SSE_REGS) 25622 return FLOAT_REGS; 25623 if (regclass == FP_TOP_SSE_REGS) 25624 return FP_TOP_REG; 25625 if (regclass == FP_SECOND_SSE_REGS) 25626 return FP_SECOND_REG; 25627 if (regclass == FLOAT_INT_REGS || regclass == FLOAT_REGS) 25628 return regclass; 25629 } 25630 25631 return NO_REGS; 25632 } 25633 25634 /* Generally when we see PLUS here, it's the function invariant 25635 (plus soft-fp const_int). Which can only be computed into general 25636 regs. */ 25637 if (GET_CODE (x) == PLUS) 25638 return reg_class_subset_p (regclass, GENERAL_REGS) ? regclass : NO_REGS; 25639 25640 /* QImode constants are easy to load, but non-constant QImode data 25641 must go into Q_REGS. */ 25642 if (GET_MODE (x) == QImode && !CONSTANT_P (x)) 25643 { 25644 if (reg_class_subset_p (regclass, Q_REGS)) 25645 return regclass; 25646 if (reg_class_subset_p (Q_REGS, regclass)) 25647 return Q_REGS; 25648 return NO_REGS; 25649 } 25650 25651 return regclass; 25652 } 25653 25654 /* Discourage putting floating-point values in SSE registers unless 25655 SSE math is being used, and likewise for the 387 registers. */ 25656 enum reg_class 25657 ix86_preferred_output_reload_class (rtx x, enum reg_class regclass) 25658 { 25659 enum machine_mode mode = GET_MODE (x); 25660 25661 /* Restrict the output reload class to the register bank that we are doing 25662 math on. If we would like not to return a subset of CLASS, reject this 25663 alternative: if reload cannot do this, it will still use its choice. */ 25664 mode = GET_MODE (x); 25665 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode)) 25666 return MAYBE_SSE_CLASS_P (regclass) ? SSE_REGS : NO_REGS; 25667 25668 if (X87_FLOAT_MODE_P (mode)) 25669 { 25670 if (regclass == FP_TOP_SSE_REGS) 25671 return FP_TOP_REG; 25672 else if (regclass == FP_SECOND_SSE_REGS) 25673 return FP_SECOND_REG; 25674 else 25675 return FLOAT_CLASS_P (regclass) ? regclass : NO_REGS; 25676 } 25677 25678 return regclass; 25679 } 25680 25681 static enum reg_class 25682 ix86_secondary_reload (bool in_p, rtx x, enum reg_class rclass, 25683 enum machine_mode mode, 25684 secondary_reload_info *sri ATTRIBUTE_UNUSED) 25685 { 25686 /* QImode spills from non-QI registers require 25687 intermediate register on 32bit targets. */ 25688 if (!in_p && mode == QImode && !TARGET_64BIT 25689 && (rclass == GENERAL_REGS 25690 || rclass == LEGACY_REGS 25691 || rclass == INDEX_REGS)) 25692 { 25693 int regno; 25694 25695 if (REG_P (x)) 25696 regno = REGNO (x); 25697 else 25698 regno = -1; 25699 25700 if (regno >= FIRST_PSEUDO_REGISTER || GET_CODE (x) == SUBREG) 25701 regno = true_regnum (x); 25702 25703 /* Return Q_REGS if the operand is in memory. */ 25704 if (regno == -1) 25705 return Q_REGS; 25706 } 25707 25708 return NO_REGS; 25709 } 25710 25711 /* If we are copying between general and FP registers, we need a memory 25712 location. The same is true for SSE and MMX registers. 25713 25714 To optimize register_move_cost performance, allow inline variant. 25715 25716 The macro can't work reliably when one of the CLASSES is class containing 25717 registers from multiple units (SSE, MMX, integer). We avoid this by never 25718 combining those units in single alternative in the machine description. 25719 Ensure that this constraint holds to avoid unexpected surprises. 25720 25721 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not 25722 enforce these sanity checks. */ 25723 25724 static inline int 25725 inline_secondary_memory_needed (enum reg_class class1, enum reg_class class2, 25726 enum machine_mode mode, int strict) 25727 { 25728 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1) 25729 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2) 25730 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1) 25731 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2) 25732 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1) 25733 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2)) 25734 { 25735 gcc_assert (!strict); 25736 return true; 25737 } 25738 25739 if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2)) 25740 return true; 25741 25742 /* ??? This is a lie. We do have moves between mmx/general, and for 25743 mmx/sse2. But by saying we need secondary memory we discourage the 25744 register allocator from using the mmx registers unless needed. */ 25745 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)) 25746 return true; 25747 25748 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2)) 25749 { 25750 /* SSE1 doesn't have any direct moves from other classes. */ 25751 if (!TARGET_SSE2) 25752 return true; 25753 25754 /* If the target says that inter-unit moves are more expensive 25755 than moving through memory, then don't generate them. */ 25756 if (!TARGET_INTER_UNIT_MOVES) 25757 return true; 25758 25759 /* Between SSE and general, we have moves no larger than word size. */ 25760 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD) 25761 return true; 25762 } 25763 25764 return false; 25765 } 25766 25767 int 25768 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2, 25769 enum machine_mode mode, int strict) 25770 { 25771 return inline_secondary_memory_needed (class1, class2, mode, strict); 25772 } 25773 25774 /* Return true if the registers in CLASS cannot represent the change from 25775 modes FROM to TO. */ 25776 25777 bool 25778 ix86_cannot_change_mode_class (enum machine_mode from, enum machine_mode to, 25779 enum reg_class regclass) 25780 { 25781 if (from == to) 25782 return false; 25783 25784 /* x87 registers can't do subreg at all, as all values are reformatted 25785 to extended precision. */ 25786 if (MAYBE_FLOAT_CLASS_P (regclass)) 25787 return true; 25788 25789 if (MAYBE_SSE_CLASS_P (regclass) || MAYBE_MMX_CLASS_P (regclass)) 25790 { 25791 /* Vector registers do not support QI or HImode loads. If we don't 25792 disallow a change to these modes, reload will assume it's ok to 25793 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects 25794 the vec_dupv4hi pattern. */ 25795 if (GET_MODE_SIZE (from) < 4) 25796 return true; 25797 25798 /* Vector registers do not support subreg with nonzero offsets, which 25799 are otherwise valid for integer registers. Since we can't see 25800 whether we have a nonzero offset from here, prohibit all 25801 nonparadoxical subregs changing size. */ 25802 if (GET_MODE_SIZE (to) < GET_MODE_SIZE (from)) 25803 return true; 25804 } 25805 25806 return false; 25807 } 25808 25809 /* Return the cost of moving data of mode M between a 25810 register and memory. A value of 2 is the default; this cost is 25811 relative to those in `REGISTER_MOVE_COST'. 25812 25813 This function is used extensively by register_move_cost that is used to 25814 build tables at startup. Make it inline in this case. 25815 When IN is 2, return maximum of in and out move cost. 25816 25817 If moving between registers and memory is more expensive than 25818 between two registers, you should define this macro to express the 25819 relative cost. 25820 25821 Model also increased moving costs of QImode registers in non 25822 Q_REGS classes. 25823 */ 25824 static inline int 25825 inline_memory_move_cost (enum machine_mode mode, enum reg_class regclass, 25826 int in) 25827 { 25828 int cost; 25829 if (FLOAT_CLASS_P (regclass)) 25830 { 25831 int index; 25832 switch (mode) 25833 { 25834 case SFmode: 25835 index = 0; 25836 break; 25837 case DFmode: 25838 index = 1; 25839 break; 25840 case XFmode: 25841 index = 2; 25842 break; 25843 default: 25844 return 100; 25845 } 25846 if (in == 2) 25847 return MAX (ix86_cost->fp_load [index], ix86_cost->fp_store [index]); 25848 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index]; 25849 } 25850 if (SSE_CLASS_P (regclass)) 25851 { 25852 int index; 25853 switch (GET_MODE_SIZE (mode)) 25854 { 25855 case 4: 25856 index = 0; 25857 break; 25858 case 8: 25859 index = 1; 25860 break; 25861 case 16: 25862 index = 2; 25863 break; 25864 default: 25865 return 100; 25866 } 25867 if (in == 2) 25868 return MAX (ix86_cost->sse_load [index], ix86_cost->sse_store [index]); 25869 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index]; 25870 } 25871 if (MMX_CLASS_P (regclass)) 25872 { 25873 int index; 25874 switch (GET_MODE_SIZE (mode)) 25875 { 25876 case 4: 25877 index = 0; 25878 break; 25879 case 8: 25880 index = 1; 25881 break; 25882 default: 25883 return 100; 25884 } 25885 if (in) 25886 return MAX (ix86_cost->mmx_load [index], ix86_cost->mmx_store [index]); 25887 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index]; 25888 } 25889 switch (GET_MODE_SIZE (mode)) 25890 { 25891 case 1: 25892 if (Q_CLASS_P (regclass) || TARGET_64BIT) 25893 { 25894 if (!in) 25895 return ix86_cost->int_store[0]; 25896 if (TARGET_PARTIAL_REG_DEPENDENCY 25897 && optimize_function_for_speed_p (cfun)) 25898 cost = ix86_cost->movzbl_load; 25899 else 25900 cost = ix86_cost->int_load[0]; 25901 if (in == 2) 25902 return MAX (cost, ix86_cost->int_store[0]); 25903 return cost; 25904 } 25905 else 25906 { 25907 if (in == 2) 25908 return MAX (ix86_cost->movzbl_load, ix86_cost->int_store[0] + 4); 25909 if (in) 25910 return ix86_cost->movzbl_load; 25911 else 25912 return ix86_cost->int_store[0] + 4; 25913 } 25914 break; 25915 case 2: 25916 if (in == 2) 25917 return MAX (ix86_cost->int_load[1], ix86_cost->int_store[1]); 25918 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1]; 25919 default: 25920 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */ 25921 if (mode == TFmode) 25922 mode = XFmode; 25923 if (in == 2) 25924 cost = MAX (ix86_cost->int_load[2] , ix86_cost->int_store[2]); 25925 else if (in) 25926 cost = ix86_cost->int_load[2]; 25927 else 25928 cost = ix86_cost->int_store[2]; 25929 return (cost * (((int) GET_MODE_SIZE (mode) 25930 + UNITS_PER_WORD - 1) / UNITS_PER_WORD)); 25931 } 25932 } 25933 25934 int 25935 ix86_memory_move_cost (enum machine_mode mode, enum reg_class regclass, int in) 25936 { 25937 return inline_memory_move_cost (mode, regclass, in); 25938 } 25939 25940 25941 /* Return the cost of moving data from a register in class CLASS1 to 25942 one in class CLASS2. 25943 25944 It is not required that the cost always equal 2 when FROM is the same as TO; 25945 on some machines it is expensive to move between registers if they are not 25946 general registers. */ 25947 25948 int 25949 ix86_register_move_cost (enum machine_mode mode, enum reg_class class1, 25950 enum reg_class class2) 25951 { 25952 /* In case we require secondary memory, compute cost of the store followed 25953 by load. In order to avoid bad register allocation choices, we need 25954 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */ 25955 25956 if (inline_secondary_memory_needed (class1, class2, mode, 0)) 25957 { 25958 int cost = 1; 25959 25960 cost += inline_memory_move_cost (mode, class1, 2); 25961 cost += inline_memory_move_cost (mode, class2, 2); 25962 25963 /* In case of copying from general_purpose_register we may emit multiple 25964 stores followed by single load causing memory size mismatch stall. 25965 Count this as arbitrarily high cost of 20. */ 25966 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode)) 25967 cost += 20; 25968 25969 /* In the case of FP/MMX moves, the registers actually overlap, and we 25970 have to switch modes in order to treat them differently. */ 25971 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2)) 25972 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1))) 25973 cost += 20; 25974 25975 return cost; 25976 } 25977 25978 /* Moves between SSE/MMX and integer unit are expensive. */ 25979 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2) 25980 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2)) 25981 25982 /* ??? By keeping returned value relatively high, we limit the number 25983 of moves between integer and MMX/SSE registers for all targets. 25984 Additionally, high value prevents problem with x86_modes_tieable_p(), 25985 where integer modes in MMX/SSE registers are not tieable 25986 because of missing QImode and HImode moves to, from or between 25987 MMX/SSE registers. */ 25988 return MAX (8, ix86_cost->mmxsse_to_integer); 25989 25990 if (MAYBE_FLOAT_CLASS_P (class1)) 25991 return ix86_cost->fp_move; 25992 if (MAYBE_SSE_CLASS_P (class1)) 25993 return ix86_cost->sse_move; 25994 if (MAYBE_MMX_CLASS_P (class1)) 25995 return ix86_cost->mmx_move; 25996 return 2; 25997 } 25998 25999 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */ 26000 26001 bool 26002 ix86_hard_regno_mode_ok (int regno, enum machine_mode mode) 26003 { 26004 /* Flags and only flags can only hold CCmode values. */ 26005 if (CC_REGNO_P (regno)) 26006 return GET_MODE_CLASS (mode) == MODE_CC; 26007 if (GET_MODE_CLASS (mode) == MODE_CC 26008 || GET_MODE_CLASS (mode) == MODE_RANDOM 26009 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT) 26010 return 0; 26011 if (FP_REGNO_P (regno)) 26012 return VALID_FP_MODE_P (mode); 26013 if (SSE_REGNO_P (regno)) 26014 { 26015 /* We implement the move patterns for all vector modes into and 26016 out of SSE registers, even when no operation instructions 26017 are available. OImode move is available only when AVX is 26018 enabled. */ 26019 return ((TARGET_AVX && mode == OImode) 26020 || VALID_AVX256_REG_MODE (mode) 26021 || VALID_SSE_REG_MODE (mode) 26022 || VALID_SSE2_REG_MODE (mode) 26023 || VALID_MMX_REG_MODE (mode) 26024 || VALID_MMX_REG_MODE_3DNOW (mode)); 26025 } 26026 if (MMX_REGNO_P (regno)) 26027 { 26028 /* We implement the move patterns for 3DNOW modes even in MMX mode, 26029 so if the register is available at all, then we can move data of 26030 the given mode into or out of it. */ 26031 return (VALID_MMX_REG_MODE (mode) 26032 || VALID_MMX_REG_MODE_3DNOW (mode)); 26033 } 26034 26035 if (mode == QImode) 26036 { 26037 /* Take care for QImode values - they can be in non-QI regs, 26038 but then they do cause partial register stalls. */ 26039 if (regno <= BX_REG || TARGET_64BIT) 26040 return 1; 26041 if (!TARGET_PARTIAL_REG_STALL) 26042 return 1; 26043 return reload_in_progress || reload_completed; 26044 } 26045 /* We handle both integer and floats in the general purpose registers. */ 26046 else if (VALID_INT_MODE_P (mode)) 26047 return 1; 26048 else if (VALID_FP_MODE_P (mode)) 26049 return 1; 26050 else if (VALID_DFP_MODE_P (mode)) 26051 return 1; 26052 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go 26053 on to use that value in smaller contexts, this can easily force a 26054 pseudo to be allocated to GENERAL_REGS. Since this is no worse than 26055 supporting DImode, allow it. */ 26056 else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode)) 26057 return 1; 26058 26059 return 0; 26060 } 26061 26062 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a 26063 tieable integer mode. */ 26064 26065 static bool 26066 ix86_tieable_integer_mode_p (enum machine_mode mode) 26067 { 26068 switch (mode) 26069 { 26070 case HImode: 26071 case SImode: 26072 return true; 26073 26074 case QImode: 26075 return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL; 26076 26077 case DImode: 26078 return TARGET_64BIT; 26079 26080 default: 26081 return false; 26082 } 26083 } 26084 26085 /* Return true if MODE1 is accessible in a register that can hold MODE2 26086 without copying. That is, all register classes that can hold MODE2 26087 can also hold MODE1. */ 26088 26089 bool 26090 ix86_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2) 26091 { 26092 if (mode1 == mode2) 26093 return true; 26094 26095 if (ix86_tieable_integer_mode_p (mode1) 26096 && ix86_tieable_integer_mode_p (mode2)) 26097 return true; 26098 26099 /* MODE2 being XFmode implies fp stack or general regs, which means we 26100 can tie any smaller floating point modes to it. Note that we do not 26101 tie this with TFmode. */ 26102 if (mode2 == XFmode) 26103 return mode1 == SFmode || mode1 == DFmode; 26104 26105 /* MODE2 being DFmode implies fp stack, general or sse regs, which means 26106 that we can tie it with SFmode. */ 26107 if (mode2 == DFmode) 26108 return mode1 == SFmode; 26109 26110 /* If MODE2 is only appropriate for an SSE register, then tie with 26111 any other mode acceptable to SSE registers. */ 26112 if (GET_MODE_SIZE (mode2) == 16 26113 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2)) 26114 return (GET_MODE_SIZE (mode1) == 16 26115 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1)); 26116 26117 /* If MODE2 is appropriate for an MMX register, then tie 26118 with any other mode acceptable to MMX registers. */ 26119 if (GET_MODE_SIZE (mode2) == 8 26120 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2)) 26121 return (GET_MODE_SIZE (mode1) == 8 26122 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1)); 26123 26124 return false; 26125 } 26126 26127 /* Compute a (partial) cost for rtx X. Return true if the complete 26128 cost has been computed, and false if subexpressions should be 26129 scanned. In either case, *TOTAL contains the cost result. */ 26130 26131 static bool 26132 ix86_rtx_costs (rtx x, int code, int outer_code_i, int *total, bool speed) 26133 { 26134 enum rtx_code outer_code = (enum rtx_code) outer_code_i; 26135 enum machine_mode mode = GET_MODE (x); 26136 const struct processor_costs *cost = speed ? ix86_cost : &ix86_size_cost; 26137 26138 switch (code) 26139 { 26140 case CONST_INT: 26141 case CONST: 26142 case LABEL_REF: 26143 case SYMBOL_REF: 26144 if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode)) 26145 *total = 3; 26146 else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode)) 26147 *total = 2; 26148 else if (flag_pic && SYMBOLIC_CONST (x) 26149 && (!TARGET_64BIT 26150 || (!GET_CODE (x) != LABEL_REF 26151 && (GET_CODE (x) != SYMBOL_REF 26152 || !SYMBOL_REF_LOCAL_P (x))))) 26153 *total = 1; 26154 else 26155 *total = 0; 26156 return true; 26157 26158 case CONST_DOUBLE: 26159 if (mode == VOIDmode) 26160 *total = 0; 26161 else 26162 switch (standard_80387_constant_p (x)) 26163 { 26164 case 1: /* 0.0 */ 26165 *total = 1; 26166 break; 26167 default: /* Other constants */ 26168 *total = 2; 26169 break; 26170 case 0: 26171 case -1: 26172 /* Start with (MEM (SYMBOL_REF)), since that's where 26173 it'll probably end up. Add a penalty for size. */ 26174 *total = (COSTS_N_INSNS (1) 26175 + (flag_pic != 0 && !TARGET_64BIT) 26176 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2)); 26177 break; 26178 } 26179 return true; 26180 26181 case ZERO_EXTEND: 26182 /* The zero extensions is often completely free on x86_64, so make 26183 it as cheap as possible. */ 26184 if (TARGET_64BIT && mode == DImode 26185 && GET_MODE (XEXP (x, 0)) == SImode) 26186 *total = 1; 26187 else if (TARGET_ZERO_EXTEND_WITH_AND) 26188 *total = cost->add; 26189 else 26190 *total = cost->movzx; 26191 return false; 26192 26193 case SIGN_EXTEND: 26194 *total = cost->movsx; 26195 return false; 26196 26197 case ASHIFT: 26198 if (CONST_INT_P (XEXP (x, 1)) 26199 && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT)) 26200 { 26201 HOST_WIDE_INT value = INTVAL (XEXP (x, 1)); 26202 if (value == 1) 26203 { 26204 *total = cost->add; 26205 return false; 26206 } 26207 if ((value == 2 || value == 3) 26208 && cost->lea <= cost->shift_const) 26209 { 26210 *total = cost->lea; 26211 return false; 26212 } 26213 } 26214 /* FALLTHRU */ 26215 26216 case ROTATE: 26217 case ASHIFTRT: 26218 case LSHIFTRT: 26219 case ROTATERT: 26220 if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode) 26221 { 26222 if (CONST_INT_P (XEXP (x, 1))) 26223 { 26224 if (INTVAL (XEXP (x, 1)) > 32) 26225 *total = cost->shift_const + COSTS_N_INSNS (2); 26226 else 26227 *total = cost->shift_const * 2; 26228 } 26229 else 26230 { 26231 if (GET_CODE (XEXP (x, 1)) == AND) 26232 *total = cost->shift_var * 2; 26233 else 26234 *total = cost->shift_var * 6 + COSTS_N_INSNS (2); 26235 } 26236 } 26237 else 26238 { 26239 if (CONST_INT_P (XEXP (x, 1))) 26240 *total = cost->shift_const; 26241 else 26242 *total = cost->shift_var; 26243 } 26244 return false; 26245 26246 case MULT: 26247 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) 26248 { 26249 /* ??? SSE scalar cost should be used here. */ 26250 *total = cost->fmul; 26251 return false; 26252 } 26253 else if (X87_FLOAT_MODE_P (mode)) 26254 { 26255 *total = cost->fmul; 26256 return false; 26257 } 26258 else if (FLOAT_MODE_P (mode)) 26259 { 26260 /* ??? SSE vector cost should be used here. */ 26261 *total = cost->fmul; 26262 return false; 26263 } 26264 else 26265 { 26266 rtx op0 = XEXP (x, 0); 26267 rtx op1 = XEXP (x, 1); 26268 int nbits; 26269 if (CONST_INT_P (XEXP (x, 1))) 26270 { 26271 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1)); 26272 for (nbits = 0; value != 0; value &= value - 1) 26273 nbits++; 26274 } 26275 else 26276 /* This is arbitrary. */ 26277 nbits = 7; 26278 26279 /* Compute costs correctly for widening multiplication. */ 26280 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND) 26281 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2 26282 == GET_MODE_SIZE (mode)) 26283 { 26284 int is_mulwiden = 0; 26285 enum machine_mode inner_mode = GET_MODE (op0); 26286 26287 if (GET_CODE (op0) == GET_CODE (op1)) 26288 is_mulwiden = 1, op1 = XEXP (op1, 0); 26289 else if (CONST_INT_P (op1)) 26290 { 26291 if (GET_CODE (op0) == SIGN_EXTEND) 26292 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode) 26293 == INTVAL (op1); 26294 else 26295 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode)); 26296 } 26297 26298 if (is_mulwiden) 26299 op0 = XEXP (op0, 0), mode = GET_MODE (op0); 26300 } 26301 26302 *total = (cost->mult_init[MODE_INDEX (mode)] 26303 + nbits * cost->mult_bit 26304 + rtx_cost (op0, outer_code, speed) + rtx_cost (op1, outer_code, speed)); 26305 26306 return true; 26307 } 26308 26309 case DIV: 26310 case UDIV: 26311 case MOD: 26312 case UMOD: 26313 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) 26314 /* ??? SSE cost should be used here. */ 26315 *total = cost->fdiv; 26316 else if (X87_FLOAT_MODE_P (mode)) 26317 *total = cost->fdiv; 26318 else if (FLOAT_MODE_P (mode)) 26319 /* ??? SSE vector cost should be used here. */ 26320 *total = cost->fdiv; 26321 else 26322 *total = cost->divide[MODE_INDEX (mode)]; 26323 return false; 26324 26325 case PLUS: 26326 if (GET_MODE_CLASS (mode) == MODE_INT 26327 && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode)) 26328 { 26329 if (GET_CODE (XEXP (x, 0)) == PLUS 26330 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT 26331 && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1)) 26332 && CONSTANT_P (XEXP (x, 1))) 26333 { 26334 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1)); 26335 if (val == 2 || val == 4 || val == 8) 26336 { 26337 *total = cost->lea; 26338 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code, speed); 26339 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), 26340 outer_code, speed); 26341 *total += rtx_cost (XEXP (x, 1), outer_code, speed); 26342 return true; 26343 } 26344 } 26345 else if (GET_CODE (XEXP (x, 0)) == MULT 26346 && CONST_INT_P (XEXP (XEXP (x, 0), 1))) 26347 { 26348 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1)); 26349 if (val == 2 || val == 4 || val == 8) 26350 { 26351 *total = cost->lea; 26352 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, speed); 26353 *total += rtx_cost (XEXP (x, 1), outer_code, speed); 26354 return true; 26355 } 26356 } 26357 else if (GET_CODE (XEXP (x, 0)) == PLUS) 26358 { 26359 *total = cost->lea; 26360 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, speed); 26361 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code, speed); 26362 *total += rtx_cost (XEXP (x, 1), outer_code, speed); 26363 return true; 26364 } 26365 } 26366 /* FALLTHRU */ 26367 26368 case MINUS: 26369 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) 26370 { 26371 /* ??? SSE cost should be used here. */ 26372 *total = cost->fadd; 26373 return false; 26374 } 26375 else if (X87_FLOAT_MODE_P (mode)) 26376 { 26377 *total = cost->fadd; 26378 return false; 26379 } 26380 else if (FLOAT_MODE_P (mode)) 26381 { 26382 /* ??? SSE vector cost should be used here. */ 26383 *total = cost->fadd; 26384 return false; 26385 } 26386 /* FALLTHRU */ 26387 26388 case AND: 26389 case IOR: 26390 case XOR: 26391 if (!TARGET_64BIT && mode == DImode) 26392 { 26393 *total = (cost->add * 2 26394 + (rtx_cost (XEXP (x, 0), outer_code, speed) 26395 << (GET_MODE (XEXP (x, 0)) != DImode)) 26396 + (rtx_cost (XEXP (x, 1), outer_code, speed) 26397 << (GET_MODE (XEXP (x, 1)) != DImode))); 26398 return true; 26399 } 26400 /* FALLTHRU */ 26401 26402 case NEG: 26403 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) 26404 { 26405 /* ??? SSE cost should be used here. */ 26406 *total = cost->fchs; 26407 return false; 26408 } 26409 else if (X87_FLOAT_MODE_P (mode)) 26410 { 26411 *total = cost->fchs; 26412 return false; 26413 } 26414 else if (FLOAT_MODE_P (mode)) 26415 { 26416 /* ??? SSE vector cost should be used here. */ 26417 *total = cost->fchs; 26418 return false; 26419 } 26420 /* FALLTHRU */ 26421 26422 case NOT: 26423 if (!TARGET_64BIT && mode == DImode) 26424 *total = cost->add * 2; 26425 else 26426 *total = cost->add; 26427 return false; 26428 26429 case COMPARE: 26430 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT 26431 && XEXP (XEXP (x, 0), 1) == const1_rtx 26432 && CONST_INT_P (XEXP (XEXP (x, 0), 2)) 26433 && XEXP (x, 1) == const0_rtx) 26434 { 26435 /* This kind of construct is implemented using test[bwl]. 26436 Treat it as if we had an AND. */ 26437 *total = (cost->add 26438 + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, speed) 26439 + rtx_cost (const1_rtx, outer_code, speed)); 26440 return true; 26441 } 26442 return false; 26443 26444 case FLOAT_EXTEND: 26445 if (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)) 26446 *total = 0; 26447 return false; 26448 26449 case ABS: 26450 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) 26451 /* ??? SSE cost should be used here. */ 26452 *total = cost->fabs; 26453 else if (X87_FLOAT_MODE_P (mode)) 26454 *total = cost->fabs; 26455 else if (FLOAT_MODE_P (mode)) 26456 /* ??? SSE vector cost should be used here. */ 26457 *total = cost->fabs; 26458 return false; 26459 26460 case SQRT: 26461 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) 26462 /* ??? SSE cost should be used here. */ 26463 *total = cost->fsqrt; 26464 else if (X87_FLOAT_MODE_P (mode)) 26465 *total = cost->fsqrt; 26466 else if (FLOAT_MODE_P (mode)) 26467 /* ??? SSE vector cost should be used here. */ 26468 *total = cost->fsqrt; 26469 return false; 26470 26471 case UNSPEC: 26472 if (XINT (x, 1) == UNSPEC_TP) 26473 *total = 0; 26474 return false; 26475 26476 default: 26477 return false; 26478 } 26479 } 26480 26481 #if TARGET_MACHO 26482 26483 static int current_machopic_label_num; 26484 26485 /* Given a symbol name and its associated stub, write out the 26486 definition of the stub. */ 26487 26488 void 26489 machopic_output_stub (FILE *file, const char *symb, const char *stub) 26490 { 26491 unsigned int length; 26492 char *binder_name, *symbol_name, lazy_ptr_name[32]; 26493 int label = ++current_machopic_label_num; 26494 26495 /* For 64-bit we shouldn't get here. */ 26496 gcc_assert (!TARGET_64BIT); 26497 26498 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */ 26499 symb = (*targetm.strip_name_encoding) (symb); 26500 26501 length = strlen (stub); 26502 binder_name = XALLOCAVEC (char, length + 32); 26503 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length); 26504 26505 length = strlen (symb); 26506 symbol_name = XALLOCAVEC (char, length + 32); 26507 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length); 26508 26509 sprintf (lazy_ptr_name, "L%d$lz", label); 26510 26511 if (MACHOPIC_PURE) 26512 switch_to_section (darwin_sections[machopic_picsymbol_stub_section]); 26513 else 26514 switch_to_section (darwin_sections[machopic_symbol_stub_section]); 26515 26516 fprintf (file, "%s:\n", stub); 26517 fprintf (file, "\t.indirect_symbol %s\n", symbol_name); 26518 26519 if (MACHOPIC_PURE) 26520 { 26521 fprintf (file, "\tcall\tLPC$%d\nLPC$%d:\tpopl\t%%eax\n", label, label); 26522 fprintf (file, "\tmovl\t%s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label); 26523 fprintf (file, "\tjmp\t*%%edx\n"); 26524 } 26525 else 26526 fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name); 26527 26528 fprintf (file, "%s:\n", binder_name); 26529 26530 if (MACHOPIC_PURE) 26531 { 26532 fprintf (file, "\tlea\t%s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label); 26533 fprintf (file, "\tpushl\t%%eax\n"); 26534 } 26535 else 26536 fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name); 26537 26538 fprintf (file, "\tjmp\tdyld_stub_binding_helper\n"); 26539 26540 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr_section]); 26541 fprintf (file, "%s:\n", lazy_ptr_name); 26542 fprintf (file, "\t.indirect_symbol %s\n", symbol_name); 26543 fprintf (file, "\t.long %s\n", binder_name); 26544 } 26545 26546 void 26547 darwin_x86_file_end (void) 26548 { 26549 darwin_file_end (); 26550 ix86_file_end (); 26551 } 26552 #endif /* TARGET_MACHO */ 26553 26554 /* Order the registers for register allocator. */ 26555 26556 void 26557 x86_order_regs_for_local_alloc (void) 26558 { 26559 int pos = 0; 26560 int i; 26561 26562 /* First allocate the local general purpose registers. */ 26563 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++) 26564 if (GENERAL_REGNO_P (i) && call_used_regs[i]) 26565 reg_alloc_order [pos++] = i; 26566 26567 /* Global general purpose registers. */ 26568 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++) 26569 if (GENERAL_REGNO_P (i) && !call_used_regs[i]) 26570 reg_alloc_order [pos++] = i; 26571 26572 /* x87 registers come first in case we are doing FP math 26573 using them. */ 26574 if (!TARGET_SSE_MATH) 26575 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++) 26576 reg_alloc_order [pos++] = i; 26577 26578 /* SSE registers. */ 26579 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++) 26580 reg_alloc_order [pos++] = i; 26581 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++) 26582 reg_alloc_order [pos++] = i; 26583 26584 /* x87 registers. */ 26585 if (TARGET_SSE_MATH) 26586 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++) 26587 reg_alloc_order [pos++] = i; 26588 26589 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++) 26590 reg_alloc_order [pos++] = i; 26591 26592 /* Initialize the rest of array as we do not allocate some registers 26593 at all. */ 26594 while (pos < FIRST_PSEUDO_REGISTER) 26595 reg_alloc_order [pos++] = 0; 26596 } 26597 26598 /* Handle a "ms_abi" or "sysv" attribute; arguments as in 26599 struct attribute_spec.handler. */ 26600 static tree 26601 ix86_handle_abi_attribute (tree *node, tree name, 26602 tree args ATTRIBUTE_UNUSED, 26603 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs) 26604 { 26605 if (TREE_CODE (*node) != FUNCTION_TYPE 26606 && TREE_CODE (*node) != METHOD_TYPE 26607 && TREE_CODE (*node) != FIELD_DECL 26608 && TREE_CODE (*node) != TYPE_DECL) 26609 { 26610 warning (OPT_Wattributes, "%qs attribute only applies to functions", 26611 IDENTIFIER_POINTER (name)); 26612 *no_add_attrs = true; 26613 return NULL_TREE; 26614 } 26615 if (!TARGET_64BIT) 26616 { 26617 warning (OPT_Wattributes, "%qs attribute only available for 64-bit", 26618 IDENTIFIER_POINTER (name)); 26619 *no_add_attrs = true; 26620 return NULL_TREE; 26621 } 26622 26623 /* Can combine regparm with all attributes but fastcall. */ 26624 if (is_attribute_p ("ms_abi", name)) 26625 { 26626 if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (*node))) 26627 { 26628 error ("ms_abi and sysv_abi attributes are not compatible"); 26629 } 26630 26631 return NULL_TREE; 26632 } 26633 else if (is_attribute_p ("sysv_abi", name)) 26634 { 26635 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (*node))) 26636 { 26637 error ("ms_abi and sysv_abi attributes are not compatible"); 26638 } 26639 26640 return NULL_TREE; 26641 } 26642 26643 return NULL_TREE; 26644 } 26645 26646 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in 26647 struct attribute_spec.handler. */ 26648 static tree 26649 ix86_handle_struct_attribute (tree *node, tree name, 26650 tree args ATTRIBUTE_UNUSED, 26651 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs) 26652 { 26653 tree *type = NULL; 26654 if (DECL_P (*node)) 26655 { 26656 if (TREE_CODE (*node) == TYPE_DECL) 26657 type = &TREE_TYPE (*node); 26658 } 26659 else 26660 type = node; 26661 26662 if (!(type && (TREE_CODE (*type) == RECORD_TYPE 26663 || TREE_CODE (*type) == UNION_TYPE))) 26664 { 26665 warning (OPT_Wattributes, "%qs attribute ignored", 26666 IDENTIFIER_POINTER (name)); 26667 *no_add_attrs = true; 26668 } 26669 26670 else if ((is_attribute_p ("ms_struct", name) 26671 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type))) 26672 || ((is_attribute_p ("gcc_struct", name) 26673 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type))))) 26674 { 26675 warning (OPT_Wattributes, "%qs incompatible attribute ignored", 26676 IDENTIFIER_POINTER (name)); 26677 *no_add_attrs = true; 26678 } 26679 26680 return NULL_TREE; 26681 } 26682 26683 static bool 26684 ix86_ms_bitfield_layout_p (const_tree record_type) 26685 { 26686 return (TARGET_MS_BITFIELD_LAYOUT && 26687 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type))) 26688 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type)); 26689 } 26690 26691 /* Returns an expression indicating where the this parameter is 26692 located on entry to the FUNCTION. */ 26693 26694 static rtx 26695 x86_this_parameter (tree function) 26696 { 26697 tree type = TREE_TYPE (function); 26698 bool aggr = aggregate_value_p (TREE_TYPE (type), type) != 0; 26699 int nregs; 26700 26701 if (TARGET_64BIT) 26702 { 26703 const int *parm_regs; 26704 26705 if (ix86_function_type_abi (type) == MS_ABI) 26706 parm_regs = x86_64_ms_abi_int_parameter_registers; 26707 else 26708 parm_regs = x86_64_int_parameter_registers; 26709 return gen_rtx_REG (DImode, parm_regs[aggr]); 26710 } 26711 26712 nregs = ix86_function_regparm (type, function); 26713 26714 if (nregs > 0 && !stdarg_p (type)) 26715 { 26716 int regno; 26717 26718 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type))) 26719 regno = aggr ? DX_REG : CX_REG; 26720 else 26721 { 26722 regno = AX_REG; 26723 if (aggr) 26724 { 26725 regno = DX_REG; 26726 if (nregs == 1) 26727 return gen_rtx_MEM (SImode, 26728 plus_constant (stack_pointer_rtx, 4)); 26729 } 26730 } 26731 return gen_rtx_REG (SImode, regno); 26732 } 26733 26734 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, aggr ? 8 : 4)); 26735 } 26736 26737 /* Determine whether x86_output_mi_thunk can succeed. */ 26738 26739 static bool 26740 x86_can_output_mi_thunk (const_tree thunk ATTRIBUTE_UNUSED, 26741 HOST_WIDE_INT delta ATTRIBUTE_UNUSED, 26742 HOST_WIDE_INT vcall_offset, const_tree function) 26743 { 26744 /* 64-bit can handle anything. */ 26745 if (TARGET_64BIT) 26746 return true; 26747 26748 /* For 32-bit, everything's fine if we have one free register. */ 26749 if (ix86_function_regparm (TREE_TYPE (function), function) < 3) 26750 return true; 26751 26752 /* Need a free register for vcall_offset. */ 26753 if (vcall_offset) 26754 return false; 26755 26756 /* Need a free register for GOT references. */ 26757 if (flag_pic && !(*targetm.binds_local_p) (function)) 26758 return false; 26759 26760 /* Otherwise ok. */ 26761 return true; 26762 } 26763 26764 /* Output the assembler code for a thunk function. THUNK_DECL is the 26765 declaration for the thunk function itself, FUNCTION is the decl for 26766 the target function. DELTA is an immediate constant offset to be 26767 added to THIS. If VCALL_OFFSET is nonzero, the word at 26768 *(*this + vcall_offset) should be added to THIS. */ 26769 26770 static void 26771 x86_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED, 26772 tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta, 26773 HOST_WIDE_INT vcall_offset, tree function) 26774 { 26775 rtx xops[3]; 26776 rtx this_param = x86_this_parameter (function); 26777 rtx this_reg, tmp; 26778 26779 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well 26780 pull it in now and let DELTA benefit. */ 26781 if (REG_P (this_param)) 26782 this_reg = this_param; 26783 else if (vcall_offset) 26784 { 26785 /* Put the this parameter into %eax. */ 26786 xops[0] = this_param; 26787 xops[1] = this_reg = gen_rtx_REG (Pmode, AX_REG); 26788 output_asm_insn ("mov%z1\t{%0, %1|%1, %0}", xops); 26789 } 26790 else 26791 this_reg = NULL_RTX; 26792 26793 /* Adjust the this parameter by a fixed constant. */ 26794 if (delta) 26795 { 26796 xops[0] = GEN_INT (delta); 26797 xops[1] = this_reg ? this_reg : this_param; 26798 if (TARGET_64BIT) 26799 { 26800 if (!x86_64_general_operand (xops[0], DImode)) 26801 { 26802 tmp = gen_rtx_REG (DImode, R10_REG); 26803 xops[1] = tmp; 26804 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops); 26805 xops[0] = tmp; 26806 xops[1] = this_param; 26807 } 26808 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops); 26809 } 26810 else 26811 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops); 26812 } 26813 26814 /* Adjust the this parameter by a value stored in the vtable. */ 26815 if (vcall_offset) 26816 { 26817 if (TARGET_64BIT) 26818 tmp = gen_rtx_REG (DImode, R10_REG); 26819 else 26820 { 26821 int tmp_regno = CX_REG; 26822 if (lookup_attribute ("fastcall", 26823 TYPE_ATTRIBUTES (TREE_TYPE (function)))) 26824 tmp_regno = AX_REG; 26825 tmp = gen_rtx_REG (SImode, tmp_regno); 26826 } 26827 26828 xops[0] = gen_rtx_MEM (Pmode, this_reg); 26829 xops[1] = tmp; 26830 output_asm_insn ("mov%z1\t{%0, %1|%1, %0}", xops); 26831 26832 /* Adjust the this parameter. */ 26833 xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset)); 26834 if (TARGET_64BIT && !memory_operand (xops[0], Pmode)) 26835 { 26836 rtx tmp2 = gen_rtx_REG (DImode, R11_REG); 26837 xops[0] = GEN_INT (vcall_offset); 26838 xops[1] = tmp2; 26839 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops); 26840 xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2)); 26841 } 26842 xops[1] = this_reg; 26843 output_asm_insn ("add%z1\t{%0, %1|%1, %0}", xops); 26844 } 26845 26846 /* If necessary, drop THIS back to its stack slot. */ 26847 if (this_reg && this_reg != this_param) 26848 { 26849 xops[0] = this_reg; 26850 xops[1] = this_param; 26851 output_asm_insn ("mov%z1\t{%0, %1|%1, %0}", xops); 26852 } 26853 26854 xops[0] = XEXP (DECL_RTL (function), 0); 26855 if (TARGET_64BIT) 26856 { 26857 if (!flag_pic || (*targetm.binds_local_p) (function)) 26858 output_asm_insn ("jmp\t%P0", xops); 26859 /* All thunks should be in the same object as their target, 26860 and thus binds_local_p should be true. */ 26861 else if (TARGET_64BIT && cfun->machine->call_abi == MS_ABI) 26862 gcc_unreachable (); 26863 else 26864 { 26865 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, xops[0]), UNSPEC_GOTPCREL); 26866 tmp = gen_rtx_CONST (Pmode, tmp); 26867 tmp = gen_rtx_MEM (QImode, tmp); 26868 xops[0] = tmp; 26869 output_asm_insn ("jmp\t%A0", xops); 26870 } 26871 } 26872 else 26873 { 26874 if (!flag_pic || (*targetm.binds_local_p) (function)) 26875 output_asm_insn ("jmp\t%P0", xops); 26876 else 26877 #if TARGET_MACHO 26878 if (TARGET_MACHO) 26879 { 26880 rtx sym_ref = XEXP (DECL_RTL (function), 0); 26881 tmp = (gen_rtx_SYMBOL_REF 26882 (Pmode, 26883 machopic_indirection_name (sym_ref, /*stub_p=*/true))); 26884 tmp = gen_rtx_MEM (QImode, tmp); 26885 xops[0] = tmp; 26886 output_asm_insn ("jmp\t%0", xops); 26887 } 26888 else 26889 #endif /* TARGET_MACHO */ 26890 { 26891 tmp = gen_rtx_REG (SImode, CX_REG); 26892 output_set_got (tmp, NULL_RTX); 26893 26894 xops[1] = tmp; 26895 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops); 26896 output_asm_insn ("jmp\t{*}%1", xops); 26897 } 26898 } 26899 } 26900 26901 static void 26902 x86_file_start (void) 26903 { 26904 default_file_start (); 26905 #if TARGET_MACHO 26906 darwin_file_start (); 26907 #endif 26908 if (X86_FILE_START_VERSION_DIRECTIVE) 26909 fputs ("\t.version\t\"01.01\"\n", asm_out_file); 26910 if (X86_FILE_START_FLTUSED) 26911 fputs ("\t.global\t__fltused\n", asm_out_file); 26912 if (ix86_asm_dialect == ASM_INTEL) 26913 fputs ("\t.intel_syntax noprefix\n", asm_out_file); 26914 } 26915 26916 int 26917 x86_field_alignment (tree field, int computed) 26918 { 26919 enum machine_mode mode; 26920 tree type = TREE_TYPE (field); 26921 26922 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE) 26923 return computed; 26924 mode = TYPE_MODE (strip_array_types (type)); 26925 if (mode == DFmode || mode == DCmode 26926 || GET_MODE_CLASS (mode) == MODE_INT 26927 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT) 26928 return MIN (32, computed); 26929 return computed; 26930 } 26931 26932 /* Output assembler code to FILE to increment profiler label # LABELNO 26933 for profiling a function entry. */ 26934 void 26935 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED) 26936 { 26937 if (TARGET_64BIT) 26938 { 26939 #ifndef NO_PROFILE_COUNTERS 26940 fprintf (file, "\tleaq\t%sP%d(%%rip),%%r11\n", LPREFIX, labelno); 26941 #endif 26942 26943 if (DEFAULT_ABI == SYSV_ABI && flag_pic) 26944 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME); 26945 else 26946 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME); 26947 } 26948 else if (flag_pic) 26949 { 26950 #ifndef NO_PROFILE_COUNTERS 26951 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n", 26952 LPREFIX, labelno, PROFILE_COUNT_REGISTER); 26953 #endif 26954 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME); 26955 } 26956 else 26957 { 26958 #ifndef NO_PROFILE_COUNTERS 26959 fprintf (file, "\tmovl\t$%sP%d,%%%s\n", LPREFIX, labelno, 26960 PROFILE_COUNT_REGISTER); 26961 #endif 26962 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME); 26963 } 26964 } 26965 26966 /* We don't have exact information about the insn sizes, but we may assume 26967 quite safely that we are informed about all 1 byte insns and memory 26968 address sizes. This is enough to eliminate unnecessary padding in 26969 99% of cases. */ 26970 26971 static int 26972 min_insn_size (rtx insn) 26973 { 26974 int l = 0; 26975 26976 if (!INSN_P (insn) || !active_insn_p (insn)) 26977 return 0; 26978 26979 /* Discard alignments we've emit and jump instructions. */ 26980 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE 26981 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN) 26982 return 0; 26983 if (JUMP_P (insn) 26984 && (GET_CODE (PATTERN (insn)) == ADDR_VEC 26985 || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC)) 26986 return 0; 26987 26988 /* Important case - calls are always 5 bytes. 26989 It is common to have many calls in the row. */ 26990 if (CALL_P (insn) 26991 && symbolic_reference_mentioned_p (PATTERN (insn)) 26992 && !SIBLING_CALL_P (insn)) 26993 return 5; 26994 if (get_attr_length (insn) <= 1) 26995 return 1; 26996 26997 /* For normal instructions we may rely on the sizes of addresses 26998 and the presence of symbol to require 4 bytes of encoding. 26999 This is not the case for jumps where references are PC relative. */ 27000 if (!JUMP_P (insn)) 27001 { 27002 l = get_attr_length_address (insn); 27003 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn))) 27004 l = 4; 27005 } 27006 if (l) 27007 return 1+l; 27008 else 27009 return 2; 27010 } 27011 27012 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte 27013 window. */ 27014 27015 static void 27016 ix86_avoid_jump_misspredicts (void) 27017 { 27018 rtx insn, start = get_insns (); 27019 int nbytes = 0, njumps = 0; 27020 int isjump = 0; 27021 27022 /* Look for all minimal intervals of instructions containing 4 jumps. 27023 The intervals are bounded by START and INSN. NBYTES is the total 27024 size of instructions in the interval including INSN and not including 27025 START. When the NBYTES is smaller than 16 bytes, it is possible 27026 that the end of START and INSN ends up in the same 16byte page. 27027 27028 The smallest offset in the page INSN can start is the case where START 27029 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN). 27030 We add p2align to 16byte window with maxskip 17 - NBYTES + sizeof (INSN). 27031 */ 27032 for (insn = get_insns (); insn; insn = NEXT_INSN (insn)) 27033 { 27034 27035 nbytes += min_insn_size (insn); 27036 if (dump_file) 27037 fprintf(dump_file, "Insn %i estimated to %i bytes\n", 27038 INSN_UID (insn), min_insn_size (insn)); 27039 if ((JUMP_P (insn) 27040 && GET_CODE (PATTERN (insn)) != ADDR_VEC 27041 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC) 27042 || CALL_P (insn)) 27043 njumps++; 27044 else 27045 continue; 27046 27047 while (njumps > 3) 27048 { 27049 start = NEXT_INSN (start); 27050 if ((JUMP_P (start) 27051 && GET_CODE (PATTERN (start)) != ADDR_VEC 27052 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC) 27053 || CALL_P (start)) 27054 njumps--, isjump = 1; 27055 else 27056 isjump = 0; 27057 nbytes -= min_insn_size (start); 27058 } 27059 gcc_assert (njumps >= 0); 27060 if (dump_file) 27061 fprintf (dump_file, "Interval %i to %i has %i bytes\n", 27062 INSN_UID (start), INSN_UID (insn), nbytes); 27063 27064 if (njumps == 3 && isjump && nbytes < 16) 27065 { 27066 int padsize = 15 - nbytes + min_insn_size (insn); 27067 27068 if (dump_file) 27069 fprintf (dump_file, "Padding insn %i by %i bytes!\n", 27070 INSN_UID (insn), padsize); 27071 emit_insn_before (gen_align (GEN_INT (padsize)), insn); 27072 } 27073 } 27074 } 27075 27076 /* AMD Athlon works faster 27077 when RET is not destination of conditional jump or directly preceded 27078 by other jump instruction. We avoid the penalty by inserting NOP just 27079 before the RET instructions in such cases. */ 27080 static void 27081 ix86_pad_returns (void) 27082 { 27083 edge e; 27084 edge_iterator ei; 27085 27086 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds) 27087 { 27088 basic_block bb = e->src; 27089 rtx ret = BB_END (bb); 27090 rtx prev; 27091 bool replace = false; 27092 27093 if (!JUMP_P (ret) || GET_CODE (PATTERN (ret)) != RETURN 27094 || optimize_bb_for_size_p (bb)) 27095 continue; 27096 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev)) 27097 if (active_insn_p (prev) || LABEL_P (prev)) 27098 break; 27099 if (prev && LABEL_P (prev)) 27100 { 27101 edge e; 27102 edge_iterator ei; 27103 27104 FOR_EACH_EDGE (e, ei, bb->preds) 27105 if (EDGE_FREQUENCY (e) && e->src->index >= 0 27106 && !(e->flags & EDGE_FALLTHRU)) 27107 replace = true; 27108 } 27109 if (!replace) 27110 { 27111 prev = prev_active_insn (ret); 27112 if (prev 27113 && ((JUMP_P (prev) && any_condjump_p (prev)) 27114 || CALL_P (prev))) 27115 replace = true; 27116 /* Empty functions get branch mispredict even when the jump destination 27117 is not visible to us. */ 27118 if (!prev && cfun->function_frequency > FUNCTION_FREQUENCY_UNLIKELY_EXECUTED) 27119 replace = true; 27120 } 27121 if (replace) 27122 { 27123 emit_insn_before (gen_return_internal_long (), ret); 27124 delete_insn (ret); 27125 } 27126 } 27127 } 27128 27129 /* Implement machine specific optimizations. We implement padding of returns 27130 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */ 27131 static void 27132 ix86_reorg (void) 27133 { 27134 if (TARGET_PAD_RETURNS && optimize 27135 && optimize_function_for_speed_p (cfun)) 27136 ix86_pad_returns (); 27137 if (TARGET_FOUR_JUMP_LIMIT && optimize 27138 && optimize_function_for_speed_p (cfun)) 27139 ix86_avoid_jump_misspredicts (); 27140 } 27141 27142 /* Return nonzero when QImode register that must be represented via REX prefix 27143 is used. */ 27144 bool 27145 x86_extended_QIreg_mentioned_p (rtx insn) 27146 { 27147 int i; 27148 extract_insn_cached (insn); 27149 for (i = 0; i < recog_data.n_operands; i++) 27150 if (REG_P (recog_data.operand[i]) 27151 && REGNO (recog_data.operand[i]) > BX_REG) 27152 return true; 27153 return false; 27154 } 27155 27156 /* Return nonzero when P points to register encoded via REX prefix. 27157 Called via for_each_rtx. */ 27158 static int 27159 extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED) 27160 { 27161 unsigned int regno; 27162 if (!REG_P (*p)) 27163 return 0; 27164 regno = REGNO (*p); 27165 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno); 27166 } 27167 27168 /* Return true when INSN mentions register that must be encoded using REX 27169 prefix. */ 27170 bool 27171 x86_extended_reg_mentioned_p (rtx insn) 27172 { 27173 return for_each_rtx (INSN_P (insn) ? &PATTERN (insn) : &insn, 27174 extended_reg_mentioned_1, NULL); 27175 } 27176 27177 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code 27178 optabs would emit if we didn't have TFmode patterns. */ 27179 27180 void 27181 x86_emit_floatuns (rtx operands[2]) 27182 { 27183 rtx neglab, donelab, i0, i1, f0, in, out; 27184 enum machine_mode mode, inmode; 27185 27186 inmode = GET_MODE (operands[1]); 27187 gcc_assert (inmode == SImode || inmode == DImode); 27188 27189 out = operands[0]; 27190 in = force_reg (inmode, operands[1]); 27191 mode = GET_MODE (out); 27192 neglab = gen_label_rtx (); 27193 donelab = gen_label_rtx (); 27194 f0 = gen_reg_rtx (mode); 27195 27196 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, inmode, 0, neglab); 27197 27198 expand_float (out, in, 0); 27199 27200 emit_jump_insn (gen_jump (donelab)); 27201 emit_barrier (); 27202 27203 emit_label (neglab); 27204 27205 i0 = expand_simple_binop (inmode, LSHIFTRT, in, const1_rtx, NULL, 27206 1, OPTAB_DIRECT); 27207 i1 = expand_simple_binop (inmode, AND, in, const1_rtx, NULL, 27208 1, OPTAB_DIRECT); 27209 i0 = expand_simple_binop (inmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT); 27210 27211 expand_float (f0, i0, 0); 27212 27213 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0))); 27214 27215 emit_label (donelab); 27216 } 27217 27218 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector 27219 with all elements equal to VAR. Return true if successful. */ 27220 27221 static bool 27222 ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode, 27223 rtx target, rtx val) 27224 { 27225 enum machine_mode hmode, smode, wsmode, wvmode; 27226 rtx x; 27227 27228 switch (mode) 27229 { 27230 case V2SImode: 27231 case V2SFmode: 27232 if (!mmx_ok) 27233 return false; 27234 /* FALLTHRU */ 27235 27236 case V2DFmode: 27237 case V2DImode: 27238 case V4SFmode: 27239 case V4SImode: 27240 val = force_reg (GET_MODE_INNER (mode), val); 27241 x = gen_rtx_VEC_DUPLICATE (mode, val); 27242 emit_insn (gen_rtx_SET (VOIDmode, target, x)); 27243 return true; 27244 27245 case V4HImode: 27246 if (!mmx_ok) 27247 return false; 27248 if (TARGET_SSE || TARGET_3DNOW_A) 27249 { 27250 val = gen_lowpart (SImode, val); 27251 x = gen_rtx_TRUNCATE (HImode, val); 27252 x = gen_rtx_VEC_DUPLICATE (mode, x); 27253 emit_insn (gen_rtx_SET (VOIDmode, target, x)); 27254 return true; 27255 } 27256 else 27257 { 27258 smode = HImode; 27259 wsmode = SImode; 27260 wvmode = V2SImode; 27261 goto widen; 27262 } 27263 27264 case V8QImode: 27265 if (!mmx_ok) 27266 return false; 27267 smode = QImode; 27268 wsmode = HImode; 27269 wvmode = V4HImode; 27270 goto widen; 27271 case V8HImode: 27272 if (TARGET_SSE2) 27273 { 27274 rtx tmp1, tmp2; 27275 /* Extend HImode to SImode using a paradoxical SUBREG. */ 27276 tmp1 = gen_reg_rtx (SImode); 27277 emit_move_insn (tmp1, gen_lowpart (SImode, val)); 27278 /* Insert the SImode value as low element of V4SImode vector. */ 27279 tmp2 = gen_reg_rtx (V4SImode); 27280 tmp1 = gen_rtx_VEC_MERGE (V4SImode, 27281 gen_rtx_VEC_DUPLICATE (V4SImode, tmp1), 27282 CONST0_RTX (V4SImode), 27283 const1_rtx); 27284 emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1)); 27285 /* Cast the V4SImode vector back to a V8HImode vector. */ 27286 tmp1 = gen_reg_rtx (V8HImode); 27287 emit_move_insn (tmp1, gen_lowpart (V8HImode, tmp2)); 27288 /* Duplicate the low short through the whole low SImode word. */ 27289 emit_insn (gen_sse2_punpcklwd (tmp1, tmp1, tmp1)); 27290 /* Cast the V8HImode vector back to a V4SImode vector. */ 27291 tmp2 = gen_reg_rtx (V4SImode); 27292 emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1)); 27293 /* Replicate the low element of the V4SImode vector. */ 27294 emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx)); 27295 /* Cast the V2SImode back to V8HImode, and store in target. */ 27296 emit_move_insn (target, gen_lowpart (V8HImode, tmp2)); 27297 return true; 27298 } 27299 smode = HImode; 27300 wsmode = SImode; 27301 wvmode = V4SImode; 27302 goto widen; 27303 case V16QImode: 27304 if (TARGET_SSE2) 27305 { 27306 rtx tmp1, tmp2; 27307 /* Extend QImode to SImode using a paradoxical SUBREG. */ 27308 tmp1 = gen_reg_rtx (SImode); 27309 emit_move_insn (tmp1, gen_lowpart (SImode, val)); 27310 /* Insert the SImode value as low element of V4SImode vector. */ 27311 tmp2 = gen_reg_rtx (V4SImode); 27312 tmp1 = gen_rtx_VEC_MERGE (V4SImode, 27313 gen_rtx_VEC_DUPLICATE (V4SImode, tmp1), 27314 CONST0_RTX (V4SImode), 27315 const1_rtx); 27316 emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1)); 27317 /* Cast the V4SImode vector back to a V16QImode vector. */ 27318 tmp1 = gen_reg_rtx (V16QImode); 27319 emit_move_insn (tmp1, gen_lowpart (V16QImode, tmp2)); 27320 /* Duplicate the low byte through the whole low SImode word. */ 27321 emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1)); 27322 emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1)); 27323 /* Cast the V16QImode vector back to a V4SImode vector. */ 27324 tmp2 = gen_reg_rtx (V4SImode); 27325 emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1)); 27326 /* Replicate the low element of the V4SImode vector. */ 27327 emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx)); 27328 /* Cast the V2SImode back to V16QImode, and store in target. */ 27329 emit_move_insn (target, gen_lowpart (V16QImode, tmp2)); 27330 return true; 27331 } 27332 smode = QImode; 27333 wsmode = HImode; 27334 wvmode = V8HImode; 27335 goto widen; 27336 widen: 27337 /* Replicate the value once into the next wider mode and recurse. */ 27338 val = convert_modes (wsmode, smode, val, true); 27339 x = expand_simple_binop (wsmode, ASHIFT, val, 27340 GEN_INT (GET_MODE_BITSIZE (smode)), 27341 NULL_RTX, 1, OPTAB_LIB_WIDEN); 27342 val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN); 27343 27344 x = gen_reg_rtx (wvmode); 27345 if (!ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val)) 27346 gcc_unreachable (); 27347 emit_move_insn (target, gen_lowpart (mode, x)); 27348 return true; 27349 27350 case V4DFmode: 27351 hmode = V2DFmode; 27352 goto half; 27353 case V4DImode: 27354 hmode = V2DImode; 27355 goto half; 27356 case V8SFmode: 27357 hmode = V4SFmode; 27358 goto half; 27359 case V8SImode: 27360 hmode = V4SImode; 27361 goto half; 27362 case V16HImode: 27363 hmode = V8HImode; 27364 goto half; 27365 case V32QImode: 27366 hmode = V16QImode; 27367 goto half; 27368 half: 27369 { 27370 rtx tmp = gen_reg_rtx (hmode); 27371 ix86_expand_vector_init_duplicate (mmx_ok, hmode, tmp, val); 27372 emit_insn (gen_rtx_SET (VOIDmode, target, 27373 gen_rtx_VEC_CONCAT (mode, tmp, tmp))); 27374 } 27375 return true; 27376 27377 default: 27378 return false; 27379 } 27380 } 27381 27382 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector 27383 whose ONE_VAR element is VAR, and other elements are zero. Return true 27384 if successful. */ 27385 27386 static bool 27387 ix86_expand_vector_init_one_nonzero (bool mmx_ok, enum machine_mode mode, 27388 rtx target, rtx var, int one_var) 27389 { 27390 enum machine_mode vsimode; 27391 rtx new_target; 27392 rtx x, tmp; 27393 bool use_vector_set = false; 27394 27395 switch (mode) 27396 { 27397 case V2DImode: 27398 /* For SSE4.1, we normally use vector set. But if the second 27399 element is zero and inter-unit moves are OK, we use movq 27400 instead. */ 27401 use_vector_set = (TARGET_64BIT 27402 && TARGET_SSE4_1 27403 && !(TARGET_INTER_UNIT_MOVES 27404 && one_var == 0)); 27405 break; 27406 case V16QImode: 27407 case V4SImode: 27408 case V4SFmode: 27409 use_vector_set = TARGET_SSE4_1; 27410 break; 27411 case V8HImode: 27412 use_vector_set = TARGET_SSE2; 27413 break; 27414 case V4HImode: 27415 use_vector_set = TARGET_SSE || TARGET_3DNOW_A; 27416 break; 27417 case V32QImode: 27418 case V16HImode: 27419 case V8SImode: 27420 case V8SFmode: 27421 case V4DFmode: 27422 use_vector_set = TARGET_AVX; 27423 break; 27424 case V4DImode: 27425 /* Use ix86_expand_vector_set in 64bit mode only. */ 27426 use_vector_set = TARGET_AVX && TARGET_64BIT; 27427 break; 27428 default: 27429 break; 27430 } 27431 27432 if (use_vector_set) 27433 { 27434 emit_insn (gen_rtx_SET (VOIDmode, target, CONST0_RTX (mode))); 27435 var = force_reg (GET_MODE_INNER (mode), var); 27436 ix86_expand_vector_set (mmx_ok, target, var, one_var); 27437 return true; 27438 } 27439 27440 switch (mode) 27441 { 27442 case V2SFmode: 27443 case V2SImode: 27444 if (!mmx_ok) 27445 return false; 27446 /* FALLTHRU */ 27447 27448 case V2DFmode: 27449 case V2DImode: 27450 if (one_var != 0) 27451 return false; 27452 var = force_reg (GET_MODE_INNER (mode), var); 27453 x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode))); 27454 emit_insn (gen_rtx_SET (VOIDmode, target, x)); 27455 return true; 27456 27457 case V4SFmode: 27458 case V4SImode: 27459 if (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER) 27460 new_target = gen_reg_rtx (mode); 27461 else 27462 new_target = target; 27463 var = force_reg (GET_MODE_INNER (mode), var); 27464 x = gen_rtx_VEC_DUPLICATE (mode, var); 27465 x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx); 27466 emit_insn (gen_rtx_SET (VOIDmode, new_target, x)); 27467 if (one_var != 0) 27468 { 27469 /* We need to shuffle the value to the correct position, so 27470 create a new pseudo to store the intermediate result. */ 27471 27472 /* With SSE2, we can use the integer shuffle insns. */ 27473 if (mode != V4SFmode && TARGET_SSE2) 27474 { 27475 emit_insn (gen_sse2_pshufd_1 (new_target, new_target, 27476 GEN_INT (1), 27477 GEN_INT (one_var == 1 ? 0 : 1), 27478 GEN_INT (one_var == 2 ? 0 : 1), 27479 GEN_INT (one_var == 3 ? 0 : 1))); 27480 if (target != new_target) 27481 emit_move_insn (target, new_target); 27482 return true; 27483 } 27484 27485 /* Otherwise convert the intermediate result to V4SFmode and 27486 use the SSE1 shuffle instructions. */ 27487 if (mode != V4SFmode) 27488 { 27489 tmp = gen_reg_rtx (V4SFmode); 27490 emit_move_insn (tmp, gen_lowpart (V4SFmode, new_target)); 27491 } 27492 else 27493 tmp = new_target; 27494 27495 emit_insn (gen_sse_shufps_v4sf (tmp, tmp, tmp, 27496 GEN_INT (1), 27497 GEN_INT (one_var == 1 ? 0 : 1), 27498 GEN_INT (one_var == 2 ? 0+4 : 1+4), 27499 GEN_INT (one_var == 3 ? 0+4 : 1+4))); 27500 27501 if (mode != V4SFmode) 27502 emit_move_insn (target, gen_lowpart (V4SImode, tmp)); 27503 else if (tmp != target) 27504 emit_move_insn (target, tmp); 27505 } 27506 else if (target != new_target) 27507 emit_move_insn (target, new_target); 27508 return true; 27509 27510 case V8HImode: 27511 case V16QImode: 27512 vsimode = V4SImode; 27513 goto widen; 27514 case V4HImode: 27515 case V8QImode: 27516 if (!mmx_ok) 27517 return false; 27518 vsimode = V2SImode; 27519 goto widen; 27520 widen: 27521 if (one_var != 0) 27522 return false; 27523 27524 /* Zero extend the variable element to SImode and recurse. */ 27525 var = convert_modes (SImode, GET_MODE_INNER (mode), var, true); 27526 27527 x = gen_reg_rtx (vsimode); 27528 if (!ix86_expand_vector_init_one_nonzero (mmx_ok, vsimode, x, 27529 var, one_var)) 27530 gcc_unreachable (); 27531 27532 emit_move_insn (target, gen_lowpart (mode, x)); 27533 return true; 27534 27535 default: 27536 return false; 27537 } 27538 } 27539 27540 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector 27541 consisting of the values in VALS. It is known that all elements 27542 except ONE_VAR are constants. Return true if successful. */ 27543 27544 static bool 27545 ix86_expand_vector_init_one_var (bool mmx_ok, enum machine_mode mode, 27546 rtx target, rtx vals, int one_var) 27547 { 27548 rtx var = XVECEXP (vals, 0, one_var); 27549 enum machine_mode wmode; 27550 rtx const_vec, x; 27551 27552 const_vec = copy_rtx (vals); 27553 XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode)); 27554 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0)); 27555 27556 switch (mode) 27557 { 27558 case V2DFmode: 27559 case V2DImode: 27560 case V2SFmode: 27561 case V2SImode: 27562 /* For the two element vectors, it's just as easy to use 27563 the general case. */ 27564 return false; 27565 27566 case V4DImode: 27567 /* Use ix86_expand_vector_set in 64bit mode only. */ 27568 if (!TARGET_64BIT) 27569 return false; 27570 case V4DFmode: 27571 case V8SFmode: 27572 case V8SImode: 27573 case V16HImode: 27574 case V32QImode: 27575 case V4SFmode: 27576 case V4SImode: 27577 case V8HImode: 27578 case V4HImode: 27579 break; 27580 27581 case V16QImode: 27582 if (TARGET_SSE4_1) 27583 break; 27584 wmode = V8HImode; 27585 goto widen; 27586 case V8QImode: 27587 wmode = V4HImode; 27588 goto widen; 27589 widen: 27590 /* There's no way to set one QImode entry easily. Combine 27591 the variable value with its adjacent constant value, and 27592 promote to an HImode set. */ 27593 x = XVECEXP (vals, 0, one_var ^ 1); 27594 if (one_var & 1) 27595 { 27596 var = convert_modes (HImode, QImode, var, true); 27597 var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8), 27598 NULL_RTX, 1, OPTAB_LIB_WIDEN); 27599 x = GEN_INT (INTVAL (x) & 0xff); 27600 } 27601 else 27602 { 27603 var = convert_modes (HImode, QImode, var, true); 27604 x = gen_int_mode (INTVAL (x) << 8, HImode); 27605 } 27606 if (x != const0_rtx) 27607 var = expand_simple_binop (HImode, IOR, var, x, var, 27608 1, OPTAB_LIB_WIDEN); 27609 27610 x = gen_reg_rtx (wmode); 27611 emit_move_insn (x, gen_lowpart (wmode, const_vec)); 27612 ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1); 27613 27614 emit_move_insn (target, gen_lowpart (mode, x)); 27615 return true; 27616 27617 default: 27618 return false; 27619 } 27620 27621 emit_move_insn (target, const_vec); 27622 ix86_expand_vector_set (mmx_ok, target, var, one_var); 27623 return true; 27624 } 27625 27626 /* A subroutine of ix86_expand_vector_init_general. Use vector 27627 concatenate to handle the most general case: all values variable, 27628 and none identical. */ 27629 27630 static void 27631 ix86_expand_vector_init_concat (enum machine_mode mode, 27632 rtx target, rtx *ops, int n) 27633 { 27634 enum machine_mode cmode, hmode = VOIDmode; 27635 rtx first[8], second[4]; 27636 rtvec v; 27637 int i, j; 27638 27639 switch (n) 27640 { 27641 case 2: 27642 switch (mode) 27643 { 27644 case V8SImode: 27645 cmode = V4SImode; 27646 break; 27647 case V8SFmode: 27648 cmode = V4SFmode; 27649 break; 27650 case V4DImode: 27651 cmode = V2DImode; 27652 break; 27653 case V4DFmode: 27654 cmode = V2DFmode; 27655 break; 27656 case V4SImode: 27657 cmode = V2SImode; 27658 break; 27659 case V4SFmode: 27660 cmode = V2SFmode; 27661 break; 27662 case V2DImode: 27663 cmode = DImode; 27664 break; 27665 case V2SImode: 27666 cmode = SImode; 27667 break; 27668 case V2DFmode: 27669 cmode = DFmode; 27670 break; 27671 case V2SFmode: 27672 cmode = SFmode; 27673 break; 27674 default: 27675 gcc_unreachable (); 27676 } 27677 27678 if (!register_operand (ops[1], cmode)) 27679 ops[1] = force_reg (cmode, ops[1]); 27680 if (!register_operand (ops[0], cmode)) 27681 ops[0] = force_reg (cmode, ops[0]); 27682 emit_insn (gen_rtx_SET (VOIDmode, target, 27683 gen_rtx_VEC_CONCAT (mode, ops[0], 27684 ops[1]))); 27685 break; 27686 27687 case 4: 27688 switch (mode) 27689 { 27690 case V4DImode: 27691 cmode = V2DImode; 27692 break; 27693 case V4DFmode: 27694 cmode = V2DFmode; 27695 break; 27696 case V4SImode: 27697 cmode = V2SImode; 27698 break; 27699 case V4SFmode: 27700 cmode = V2SFmode; 27701 break; 27702 default: 27703 gcc_unreachable (); 27704 } 27705 goto half; 27706 27707 case 8: 27708 switch (mode) 27709 { 27710 case V8SImode: 27711 cmode = V2SImode; 27712 hmode = V4SImode; 27713 break; 27714 case V8SFmode: 27715 cmode = V2SFmode; 27716 hmode = V4SFmode; 27717 break; 27718 default: 27719 gcc_unreachable (); 27720 } 27721 goto half; 27722 27723 half: 27724 /* FIXME: We process inputs backward to help RA. PR 36222. */ 27725 i = n - 1; 27726 j = (n >> 1) - 1; 27727 for (; i > 0; i -= 2, j--) 27728 { 27729 first[j] = gen_reg_rtx (cmode); 27730 v = gen_rtvec (2, ops[i - 1], ops[i]); 27731 ix86_expand_vector_init (false, first[j], 27732 gen_rtx_PARALLEL (cmode, v)); 27733 } 27734 27735 n >>= 1; 27736 if (n > 2) 27737 { 27738 gcc_assert (hmode != VOIDmode); 27739 for (i = j = 0; i < n; i += 2, j++) 27740 { 27741 second[j] = gen_reg_rtx (hmode); 27742 ix86_expand_vector_init_concat (hmode, second [j], 27743 &first [i], 2); 27744 } 27745 n >>= 1; 27746 ix86_expand_vector_init_concat (mode, target, second, n); 27747 } 27748 else 27749 ix86_expand_vector_init_concat (mode, target, first, n); 27750 break; 27751 27752 default: 27753 gcc_unreachable (); 27754 } 27755 } 27756 27757 /* A subroutine of ix86_expand_vector_init_general. Use vector 27758 interleave to handle the most general case: all values variable, 27759 and none identical. */ 27760 27761 static void 27762 ix86_expand_vector_init_interleave (enum machine_mode mode, 27763 rtx target, rtx *ops, int n) 27764 { 27765 enum machine_mode first_imode, second_imode, third_imode, inner_mode; 27766 int i, j; 27767 rtx op0, op1; 27768 rtx (*gen_load_even) (rtx, rtx, rtx); 27769 rtx (*gen_interleave_first_low) (rtx, rtx, rtx); 27770 rtx (*gen_interleave_second_low) (rtx, rtx, rtx); 27771 27772 switch (mode) 27773 { 27774 case V8HImode: 27775 gen_load_even = gen_vec_setv8hi; 27776 gen_interleave_first_low = gen_vec_interleave_lowv4si; 27777 gen_interleave_second_low = gen_vec_interleave_lowv2di; 27778 inner_mode = HImode; 27779 first_imode = V4SImode; 27780 second_imode = V2DImode; 27781 third_imode = VOIDmode; 27782 break; 27783 case V16QImode: 27784 gen_load_even = gen_vec_setv16qi; 27785 gen_interleave_first_low = gen_vec_interleave_lowv8hi; 27786 gen_interleave_second_low = gen_vec_interleave_lowv4si; 27787 inner_mode = QImode; 27788 first_imode = V8HImode; 27789 second_imode = V4SImode; 27790 third_imode = V2DImode; 27791 break; 27792 default: 27793 gcc_unreachable (); 27794 } 27795 27796 for (i = 0; i < n; i++) 27797 { 27798 /* Extend the odd elment to SImode using a paradoxical SUBREG. */ 27799 op0 = gen_reg_rtx (SImode); 27800 emit_move_insn (op0, gen_lowpart (SImode, ops [i + i])); 27801 27802 /* Insert the SImode value as low element of V4SImode vector. */ 27803 op1 = gen_reg_rtx (V4SImode); 27804 op0 = gen_rtx_VEC_MERGE (V4SImode, 27805 gen_rtx_VEC_DUPLICATE (V4SImode, 27806 op0), 27807 CONST0_RTX (V4SImode), 27808 const1_rtx); 27809 emit_insn (gen_rtx_SET (VOIDmode, op1, op0)); 27810 27811 /* Cast the V4SImode vector back to a vector in orignal mode. */ 27812 op0 = gen_reg_rtx (mode); 27813 emit_move_insn (op0, gen_lowpart (mode, op1)); 27814 27815 /* Load even elements into the second positon. */ 27816 emit_insn ((*gen_load_even) (op0, 27817 force_reg (inner_mode, 27818 ops [i + i + 1]), 27819 const1_rtx)); 27820 27821 /* Cast vector to FIRST_IMODE vector. */ 27822 ops[i] = gen_reg_rtx (first_imode); 27823 emit_move_insn (ops[i], gen_lowpart (first_imode, op0)); 27824 } 27825 27826 /* Interleave low FIRST_IMODE vectors. */ 27827 for (i = j = 0; i < n; i += 2, j++) 27828 { 27829 op0 = gen_reg_rtx (first_imode); 27830 emit_insn ((*gen_interleave_first_low) (op0, ops[i], ops[i + 1])); 27831 27832 /* Cast FIRST_IMODE vector to SECOND_IMODE vector. */ 27833 ops[j] = gen_reg_rtx (second_imode); 27834 emit_move_insn (ops[j], gen_lowpart (second_imode, op0)); 27835 } 27836 27837 /* Interleave low SECOND_IMODE vectors. */ 27838 switch (second_imode) 27839 { 27840 case V4SImode: 27841 for (i = j = 0; i < n / 2; i += 2, j++) 27842 { 27843 op0 = gen_reg_rtx (second_imode); 27844 emit_insn ((*gen_interleave_second_low) (op0, ops[i], 27845 ops[i + 1])); 27846 27847 /* Cast the SECOND_IMODE vector to the THIRD_IMODE 27848 vector. */ 27849 ops[j] = gen_reg_rtx (third_imode); 27850 emit_move_insn (ops[j], gen_lowpart (third_imode, op0)); 27851 } 27852 second_imode = V2DImode; 27853 gen_interleave_second_low = gen_vec_interleave_lowv2di; 27854 /* FALLTHRU */ 27855 27856 case V2DImode: 27857 op0 = gen_reg_rtx (second_imode); 27858 emit_insn ((*gen_interleave_second_low) (op0, ops[0], 27859 ops[1])); 27860 27861 /* Cast the SECOND_IMODE vector back to a vector on original 27862 mode. */ 27863 emit_insn (gen_rtx_SET (VOIDmode, target, 27864 gen_lowpart (mode, op0))); 27865 break; 27866 27867 default: 27868 gcc_unreachable (); 27869 } 27870 } 27871 27872 /* A subroutine of ix86_expand_vector_init. Handle the most general case: 27873 all values variable, and none identical. */ 27874 27875 static void 27876 ix86_expand_vector_init_general (bool mmx_ok, enum machine_mode mode, 27877 rtx target, rtx vals) 27878 { 27879 rtx ops[32], op0, op1; 27880 enum machine_mode half_mode = VOIDmode; 27881 int n, i; 27882 27883 switch (mode) 27884 { 27885 case V2SFmode: 27886 case V2SImode: 27887 if (!mmx_ok && !TARGET_SSE) 27888 break; 27889 /* FALLTHRU */ 27890 27891 case V8SFmode: 27892 case V8SImode: 27893 case V4DFmode: 27894 case V4DImode: 27895 case V4SFmode: 27896 case V4SImode: 27897 case V2DFmode: 27898 case V2DImode: 27899 n = GET_MODE_NUNITS (mode); 27900 for (i = 0; i < n; i++) 27901 ops[i] = XVECEXP (vals, 0, i); 27902 ix86_expand_vector_init_concat (mode, target, ops, n); 27903 return; 27904 27905 case V32QImode: 27906 half_mode = V16QImode; 27907 goto half; 27908 27909 case V16HImode: 27910 half_mode = V8HImode; 27911 goto half; 27912 27913 half: 27914 n = GET_MODE_NUNITS (mode); 27915 for (i = 0; i < n; i++) 27916 ops[i] = XVECEXP (vals, 0, i); 27917 op0 = gen_reg_rtx (half_mode); 27918 op1 = gen_reg_rtx (half_mode); 27919 ix86_expand_vector_init_interleave (half_mode, op0, ops, 27920 n >> 2); 27921 ix86_expand_vector_init_interleave (half_mode, op1, 27922 &ops [n >> 1], n >> 2); 27923 emit_insn (gen_rtx_SET (VOIDmode, target, 27924 gen_rtx_VEC_CONCAT (mode, op0, op1))); 27925 return; 27926 27927 case V16QImode: 27928 if (!TARGET_SSE4_1) 27929 break; 27930 /* FALLTHRU */ 27931 27932 case V8HImode: 27933 if (!TARGET_SSE2) 27934 break; 27935 27936 /* Don't use ix86_expand_vector_init_interleave if we can't 27937 move from GPR to SSE register directly. */ 27938 if (!TARGET_INTER_UNIT_MOVES) 27939 break; 27940 27941 n = GET_MODE_NUNITS (mode); 27942 for (i = 0; i < n; i++) 27943 ops[i] = XVECEXP (vals, 0, i); 27944 ix86_expand_vector_init_interleave (mode, target, ops, n >> 1); 27945 return; 27946 27947 case V4HImode: 27948 case V8QImode: 27949 break; 27950 27951 default: 27952 gcc_unreachable (); 27953 } 27954 27955 { 27956 int i, j, n_elts, n_words, n_elt_per_word; 27957 enum machine_mode inner_mode; 27958 rtx words[4], shift; 27959 27960 inner_mode = GET_MODE_INNER (mode); 27961 n_elts = GET_MODE_NUNITS (mode); 27962 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD; 27963 n_elt_per_word = n_elts / n_words; 27964 shift = GEN_INT (GET_MODE_BITSIZE (inner_mode)); 27965 27966 for (i = 0; i < n_words; ++i) 27967 { 27968 rtx word = NULL_RTX; 27969 27970 for (j = 0; j < n_elt_per_word; ++j) 27971 { 27972 rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1); 27973 elt = convert_modes (word_mode, inner_mode, elt, true); 27974 27975 if (j == 0) 27976 word = elt; 27977 else 27978 { 27979 word = expand_simple_binop (word_mode, ASHIFT, word, shift, 27980 word, 1, OPTAB_LIB_WIDEN); 27981 word = expand_simple_binop (word_mode, IOR, word, elt, 27982 word, 1, OPTAB_LIB_WIDEN); 27983 } 27984 } 27985 27986 words[i] = word; 27987 } 27988 27989 if (n_words == 1) 27990 emit_move_insn (target, gen_lowpart (mode, words[0])); 27991 else if (n_words == 2) 27992 { 27993 rtx tmp = gen_reg_rtx (mode); 27994 emit_clobber (tmp); 27995 emit_move_insn (gen_lowpart (word_mode, tmp), words[0]); 27996 emit_move_insn (gen_highpart (word_mode, tmp), words[1]); 27997 emit_move_insn (target, tmp); 27998 } 27999 else if (n_words == 4) 28000 { 28001 rtx tmp = gen_reg_rtx (V4SImode); 28002 gcc_assert (word_mode == SImode); 28003 vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words)); 28004 ix86_expand_vector_init_general (false, V4SImode, tmp, vals); 28005 emit_move_insn (target, gen_lowpart (mode, tmp)); 28006 } 28007 else 28008 gcc_unreachable (); 28009 } 28010 } 28011 28012 /* Initialize vector TARGET via VALS. Suppress the use of MMX 28013 instructions unless MMX_OK is true. */ 28014 28015 void 28016 ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals) 28017 { 28018 enum machine_mode mode = GET_MODE (target); 28019 enum machine_mode inner_mode = GET_MODE_INNER (mode); 28020 int n_elts = GET_MODE_NUNITS (mode); 28021 int n_var = 0, one_var = -1; 28022 bool all_same = true, all_const_zero = true; 28023 int i; 28024 rtx x; 28025 28026 for (i = 0; i < n_elts; ++i) 28027 { 28028 x = XVECEXP (vals, 0, i); 28029 if (!(CONST_INT_P (x) 28030 || GET_CODE (x) == CONST_DOUBLE 28031 || GET_CODE (x) == CONST_FIXED)) 28032 n_var++, one_var = i; 28033 else if (x != CONST0_RTX (inner_mode)) 28034 all_const_zero = false; 28035 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0))) 28036 all_same = false; 28037 } 28038 28039 /* Constants are best loaded from the constant pool. */ 28040 if (n_var == 0) 28041 { 28042 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0))); 28043 return; 28044 } 28045 28046 /* If all values are identical, broadcast the value. */ 28047 if (all_same 28048 && ix86_expand_vector_init_duplicate (mmx_ok, mode, target, 28049 XVECEXP (vals, 0, 0))) 28050 return; 28051 28052 /* Values where only one field is non-constant are best loaded from 28053 the pool and overwritten via move later. */ 28054 if (n_var == 1) 28055 { 28056 if (all_const_zero 28057 && ix86_expand_vector_init_one_nonzero (mmx_ok, mode, target, 28058 XVECEXP (vals, 0, one_var), 28059 one_var)) 28060 return; 28061 28062 if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var)) 28063 return; 28064 } 28065 28066 ix86_expand_vector_init_general (mmx_ok, mode, target, vals); 28067 } 28068 28069 void 28070 ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt) 28071 { 28072 enum machine_mode mode = GET_MODE (target); 28073 enum machine_mode inner_mode = GET_MODE_INNER (mode); 28074 enum machine_mode half_mode; 28075 bool use_vec_merge = false; 28076 rtx tmp; 28077 static rtx (*gen_extract[6][2]) (rtx, rtx) 28078 = { 28079 { gen_vec_extract_lo_v32qi, gen_vec_extract_hi_v32qi }, 28080 { gen_vec_extract_lo_v16hi, gen_vec_extract_hi_v16hi }, 28081 { gen_vec_extract_lo_v8si, gen_vec_extract_hi_v8si }, 28082 { gen_vec_extract_lo_v4di, gen_vec_extract_hi_v4di }, 28083 { gen_vec_extract_lo_v8sf, gen_vec_extract_hi_v8sf }, 28084 { gen_vec_extract_lo_v4df, gen_vec_extract_hi_v4df } 28085 }; 28086 static rtx (*gen_insert[6][2]) (rtx, rtx, rtx) 28087 = { 28088 { gen_vec_set_lo_v32qi, gen_vec_set_hi_v32qi }, 28089 { gen_vec_set_lo_v16hi, gen_vec_set_hi_v16hi }, 28090 { gen_vec_set_lo_v8si, gen_vec_set_hi_v8si }, 28091 { gen_vec_set_lo_v4di, gen_vec_set_hi_v4di }, 28092 { gen_vec_set_lo_v8sf, gen_vec_set_hi_v8sf }, 28093 { gen_vec_set_lo_v4df, gen_vec_set_hi_v4df } 28094 }; 28095 int i, j, n; 28096 28097 switch (mode) 28098 { 28099 case V2SFmode: 28100 case V2SImode: 28101 if (mmx_ok) 28102 { 28103 tmp = gen_reg_rtx (GET_MODE_INNER (mode)); 28104 ix86_expand_vector_extract (true, tmp, target, 1 - elt); 28105 if (elt == 0) 28106 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val); 28107 else 28108 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp); 28109 emit_insn (gen_rtx_SET (VOIDmode, target, tmp)); 28110 return; 28111 } 28112 break; 28113 28114 case V2DImode: 28115 use_vec_merge = TARGET_SSE4_1; 28116 if (use_vec_merge) 28117 break; 28118 28119 case V2DFmode: 28120 { 28121 rtx op0, op1; 28122 28123 /* For the two element vectors, we implement a VEC_CONCAT with 28124 the extraction of the other element. */ 28125 28126 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt))); 28127 tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp); 28128 28129 if (elt == 0) 28130 op0 = val, op1 = tmp; 28131 else 28132 op0 = tmp, op1 = val; 28133 28134 tmp = gen_rtx_VEC_CONCAT (mode, op0, op1); 28135 emit_insn (gen_rtx_SET (VOIDmode, target, tmp)); 28136 } 28137 return; 28138 28139 case V4SFmode: 28140 use_vec_merge = TARGET_SSE4_1; 28141 if (use_vec_merge) 28142 break; 28143 28144 switch (elt) 28145 { 28146 case 0: 28147 use_vec_merge = true; 28148 break; 28149 28150 case 1: 28151 /* tmp = target = A B C D */ 28152 tmp = copy_to_reg (target); 28153 /* target = A A B B */ 28154 emit_insn (gen_sse_unpcklps (target, target, target)); 28155 /* target = X A B B */ 28156 ix86_expand_vector_set (false, target, val, 0); 28157 /* target = A X C D */ 28158 emit_insn (gen_sse_shufps_v4sf (target, target, tmp, 28159 GEN_INT (1), GEN_INT (0), 28160 GEN_INT (2+4), GEN_INT (3+4))); 28161 return; 28162 28163 case 2: 28164 /* tmp = target = A B C D */ 28165 tmp = copy_to_reg (target); 28166 /* tmp = X B C D */ 28167 ix86_expand_vector_set (false, tmp, val, 0); 28168 /* target = A B X D */ 28169 emit_insn (gen_sse_shufps_v4sf (target, target, tmp, 28170 GEN_INT (0), GEN_INT (1), 28171 GEN_INT (0+4), GEN_INT (3+4))); 28172 return; 28173 28174 case 3: 28175 /* tmp = target = A B C D */ 28176 tmp = copy_to_reg (target); 28177 /* tmp = X B C D */ 28178 ix86_expand_vector_set (false, tmp, val, 0); 28179 /* target = A B X D */ 28180 emit_insn (gen_sse_shufps_v4sf (target, target, tmp, 28181 GEN_INT (0), GEN_INT (1), 28182 GEN_INT (2+4), GEN_INT (0+4))); 28183 return; 28184 28185 default: 28186 gcc_unreachable (); 28187 } 28188 break; 28189 28190 case V4SImode: 28191 use_vec_merge = TARGET_SSE4_1; 28192 if (use_vec_merge) 28193 break; 28194 28195 /* Element 0 handled by vec_merge below. */ 28196 if (elt == 0) 28197 { 28198 use_vec_merge = true; 28199 break; 28200 } 28201 28202 if (TARGET_SSE2) 28203 { 28204 /* With SSE2, use integer shuffles to swap element 0 and ELT, 28205 store into element 0, then shuffle them back. */ 28206 28207 rtx order[4]; 28208 28209 order[0] = GEN_INT (elt); 28210 order[1] = const1_rtx; 28211 order[2] = const2_rtx; 28212 order[3] = GEN_INT (3); 28213 order[elt] = const0_rtx; 28214 28215 emit_insn (gen_sse2_pshufd_1 (target, target, order[0], 28216 order[1], order[2], order[3])); 28217 28218 ix86_expand_vector_set (false, target, val, 0); 28219 28220 emit_insn (gen_sse2_pshufd_1 (target, target, order[0], 28221 order[1], order[2], order[3])); 28222 } 28223 else 28224 { 28225 /* For SSE1, we have to reuse the V4SF code. */ 28226 ix86_expand_vector_set (false, gen_lowpart (V4SFmode, target), 28227 gen_lowpart (SFmode, val), elt); 28228 } 28229 return; 28230 28231 case V8HImode: 28232 use_vec_merge = TARGET_SSE2; 28233 break; 28234 case V4HImode: 28235 use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A); 28236 break; 28237 28238 case V16QImode: 28239 use_vec_merge = TARGET_SSE4_1; 28240 break; 28241 28242 case V8QImode: 28243 break; 28244 28245 case V32QImode: 28246 half_mode = V16QImode; 28247 j = 0; 28248 n = 16; 28249 goto half; 28250 28251 case V16HImode: 28252 half_mode = V8HImode; 28253 j = 1; 28254 n = 8; 28255 goto half; 28256 28257 case V8SImode: 28258 half_mode = V4SImode; 28259 j = 2; 28260 n = 4; 28261 goto half; 28262 28263 case V4DImode: 28264 half_mode = V2DImode; 28265 j = 3; 28266 n = 2; 28267 goto half; 28268 28269 case V8SFmode: 28270 half_mode = V4SFmode; 28271 j = 4; 28272 n = 4; 28273 goto half; 28274 28275 case V4DFmode: 28276 half_mode = V2DFmode; 28277 j = 5; 28278 n = 2; 28279 goto half; 28280 28281 half: 28282 /* Compute offset. */ 28283 i = elt / n; 28284 elt %= n; 28285 28286 gcc_assert (i <= 1); 28287 28288 /* Extract the half. */ 28289 tmp = gen_reg_rtx (half_mode); 28290 emit_insn ((*gen_extract[j][i]) (tmp, target)); 28291 28292 /* Put val in tmp at elt. */ 28293 ix86_expand_vector_set (false, tmp, val, elt); 28294 28295 /* Put it back. */ 28296 emit_insn ((*gen_insert[j][i]) (target, target, tmp)); 28297 return; 28298 28299 default: 28300 break; 28301 } 28302 28303 if (use_vec_merge) 28304 { 28305 tmp = gen_rtx_VEC_DUPLICATE (mode, val); 28306 tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt)); 28307 emit_insn (gen_rtx_SET (VOIDmode, target, tmp)); 28308 } 28309 else 28310 { 28311 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false); 28312 28313 emit_move_insn (mem, target); 28314 28315 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode)); 28316 emit_move_insn (tmp, val); 28317 28318 emit_move_insn (target, mem); 28319 } 28320 } 28321 28322 void 28323 ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt) 28324 { 28325 enum machine_mode mode = GET_MODE (vec); 28326 enum machine_mode inner_mode = GET_MODE_INNER (mode); 28327 bool use_vec_extr = false; 28328 rtx tmp; 28329 28330 switch (mode) 28331 { 28332 case V2SImode: 28333 case V2SFmode: 28334 if (!mmx_ok) 28335 break; 28336 /* FALLTHRU */ 28337 28338 case V2DFmode: 28339 case V2DImode: 28340 use_vec_extr = true; 28341 break; 28342 28343 case V4SFmode: 28344 use_vec_extr = TARGET_SSE4_1; 28345 if (use_vec_extr) 28346 break; 28347 28348 switch (elt) 28349 { 28350 case 0: 28351 tmp = vec; 28352 break; 28353 28354 case 1: 28355 case 3: 28356 tmp = gen_reg_rtx (mode); 28357 emit_insn (gen_sse_shufps_v4sf (tmp, vec, vec, 28358 GEN_INT (elt), GEN_INT (elt), 28359 GEN_INT (elt+4), GEN_INT (elt+4))); 28360 break; 28361 28362 case 2: 28363 tmp = gen_reg_rtx (mode); 28364 emit_insn (gen_sse_unpckhps (tmp, vec, vec)); 28365 break; 28366 28367 default: 28368 gcc_unreachable (); 28369 } 28370 vec = tmp; 28371 use_vec_extr = true; 28372 elt = 0; 28373 break; 28374 28375 case V4SImode: 28376 use_vec_extr = TARGET_SSE4_1; 28377 if (use_vec_extr) 28378 break; 28379 28380 if (TARGET_SSE2) 28381 { 28382 switch (elt) 28383 { 28384 case 0: 28385 tmp = vec; 28386 break; 28387 28388 case 1: 28389 case 3: 28390 tmp = gen_reg_rtx (mode); 28391 emit_insn (gen_sse2_pshufd_1 (tmp, vec, 28392 GEN_INT (elt), GEN_INT (elt), 28393 GEN_INT (elt), GEN_INT (elt))); 28394 break; 28395 28396 case 2: 28397 tmp = gen_reg_rtx (mode); 28398 emit_insn (gen_sse2_punpckhdq (tmp, vec, vec)); 28399 break; 28400 28401 default: 28402 gcc_unreachable (); 28403 } 28404 vec = tmp; 28405 use_vec_extr = true; 28406 elt = 0; 28407 } 28408 else 28409 { 28410 /* For SSE1, we have to reuse the V4SF code. */ 28411 ix86_expand_vector_extract (false, gen_lowpart (SFmode, target), 28412 gen_lowpart (V4SFmode, vec), elt); 28413 return; 28414 } 28415 break; 28416 28417 case V8HImode: 28418 use_vec_extr = TARGET_SSE2; 28419 break; 28420 case V4HImode: 28421 use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A); 28422 break; 28423 28424 case V16QImode: 28425 use_vec_extr = TARGET_SSE4_1; 28426 break; 28427 28428 case V8QImode: 28429 /* ??? Could extract the appropriate HImode element and shift. */ 28430 default: 28431 break; 28432 } 28433 28434 if (use_vec_extr) 28435 { 28436 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt))); 28437 tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp); 28438 28439 /* Let the rtl optimizers know about the zero extension performed. */ 28440 if (inner_mode == QImode || inner_mode == HImode) 28441 { 28442 tmp = gen_rtx_ZERO_EXTEND (SImode, tmp); 28443 target = gen_lowpart (SImode, target); 28444 } 28445 28446 emit_insn (gen_rtx_SET (VOIDmode, target, tmp)); 28447 } 28448 else 28449 { 28450 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false); 28451 28452 emit_move_insn (mem, vec); 28453 28454 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode)); 28455 emit_move_insn (target, tmp); 28456 } 28457 } 28458 28459 /* Expand a vector reduction on V4SFmode for SSE1. FN is the binary 28460 pattern to reduce; DEST is the destination; IN is the input vector. */ 28461 28462 void 28463 ix86_expand_reduc_v4sf (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in) 28464 { 28465 rtx tmp1, tmp2, tmp3; 28466 28467 tmp1 = gen_reg_rtx (V4SFmode); 28468 tmp2 = gen_reg_rtx (V4SFmode); 28469 tmp3 = gen_reg_rtx (V4SFmode); 28470 28471 emit_insn (gen_sse_movhlps (tmp1, in, in)); 28472 emit_insn (fn (tmp2, tmp1, in)); 28473 28474 emit_insn (gen_sse_shufps_v4sf (tmp3, tmp2, tmp2, 28475 GEN_INT (1), GEN_INT (1), 28476 GEN_INT (1+4), GEN_INT (1+4))); 28477 emit_insn (fn (dest, tmp2, tmp3)); 28478 } 28479 28480 /* Target hook for scalar_mode_supported_p. */ 28481 static bool 28482 ix86_scalar_mode_supported_p (enum machine_mode mode) 28483 { 28484 if (DECIMAL_FLOAT_MODE_P (mode)) 28485 return true; 28486 else if (mode == TFmode) 28487 return true; 28488 else 28489 return default_scalar_mode_supported_p (mode); 28490 } 28491 28492 /* Implements target hook vector_mode_supported_p. */ 28493 static bool 28494 ix86_vector_mode_supported_p (enum machine_mode mode) 28495 { 28496 if (TARGET_SSE && VALID_SSE_REG_MODE (mode)) 28497 return true; 28498 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode)) 28499 return true; 28500 if (TARGET_AVX && VALID_AVX256_REG_MODE (mode)) 28501 return true; 28502 if (TARGET_MMX && VALID_MMX_REG_MODE (mode)) 28503 return true; 28504 if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode)) 28505 return true; 28506 return false; 28507 } 28508 28509 /* Target hook for c_mode_for_suffix. */ 28510 static enum machine_mode 28511 ix86_c_mode_for_suffix (char suffix) 28512 { 28513 if (suffix == 'q') 28514 return TFmode; 28515 if (suffix == 'w') 28516 return XFmode; 28517 28518 return VOIDmode; 28519 } 28520 28521 /* Worker function for TARGET_MD_ASM_CLOBBERS. 28522 28523 We do this in the new i386 backend to maintain source compatibility 28524 with the old cc0-based compiler. */ 28525 28526 static tree 28527 ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED, 28528 tree inputs ATTRIBUTE_UNUSED, 28529 tree clobbers) 28530 { 28531 clobbers = tree_cons (NULL_TREE, build_string (5, "flags"), 28532 clobbers); 28533 clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"), 28534 clobbers); 28535 return clobbers; 28536 } 28537 28538 /* Implements target vector targetm.asm.encode_section_info. This 28539 is not used by netware. */ 28540 28541 static void ATTRIBUTE_UNUSED 28542 ix86_encode_section_info (tree decl, rtx rtl, int first) 28543 { 28544 default_encode_section_info (decl, rtl, first); 28545 28546 if (TREE_CODE (decl) == VAR_DECL 28547 && (TREE_STATIC (decl) || DECL_EXTERNAL (decl)) 28548 && ix86_in_large_data_p (decl)) 28549 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR; 28550 } 28551 28552 /* Worker function for REVERSE_CONDITION. */ 28553 28554 enum rtx_code 28555 ix86_reverse_condition (enum rtx_code code, enum machine_mode mode) 28556 { 28557 return (mode != CCFPmode && mode != CCFPUmode 28558 ? reverse_condition (code) 28559 : reverse_condition_maybe_unordered (code)); 28560 } 28561 28562 /* Output code to perform an x87 FP register move, from OPERANDS[1] 28563 to OPERANDS[0]. */ 28564 28565 const char * 28566 output_387_reg_move (rtx insn, rtx *operands) 28567 { 28568 if (REG_P (operands[0])) 28569 { 28570 if (REG_P (operands[1]) 28571 && find_regno_note (insn, REG_DEAD, REGNO (operands[1]))) 28572 { 28573 if (REGNO (operands[0]) == FIRST_STACK_REG) 28574 return output_387_ffreep (operands, 0); 28575 return "fstp\t%y0"; 28576 } 28577 if (STACK_TOP_P (operands[0])) 28578 return "fld%z1\t%y1"; 28579 return "fst\t%y0"; 28580 } 28581 else if (MEM_P (operands[0])) 28582 { 28583 gcc_assert (REG_P (operands[1])); 28584 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1]))) 28585 return "fstp%z0\t%y0"; 28586 else 28587 { 28588 /* There is no non-popping store to memory for XFmode. 28589 So if we need one, follow the store with a load. */ 28590 if (GET_MODE (operands[0]) == XFmode) 28591 return "fstp%z0\t%y0\n\tfld%z0\t%y0"; 28592 else 28593 return "fst%z0\t%y0"; 28594 } 28595 } 28596 else 28597 gcc_unreachable(); 28598 } 28599 28600 /* Output code to perform a conditional jump to LABEL, if C2 flag in 28601 FP status register is set. */ 28602 28603 void 28604 ix86_emit_fp_unordered_jump (rtx label) 28605 { 28606 rtx reg = gen_reg_rtx (HImode); 28607 rtx temp; 28608 28609 emit_insn (gen_x86_fnstsw_1 (reg)); 28610 28611 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ())) 28612 { 28613 emit_insn (gen_x86_sahf_1 (reg)); 28614 28615 temp = gen_rtx_REG (CCmode, FLAGS_REG); 28616 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx); 28617 } 28618 else 28619 { 28620 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04))); 28621 28622 temp = gen_rtx_REG (CCNOmode, FLAGS_REG); 28623 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx); 28624 } 28625 28626 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp, 28627 gen_rtx_LABEL_REF (VOIDmode, label), 28628 pc_rtx); 28629 temp = gen_rtx_SET (VOIDmode, pc_rtx, temp); 28630 28631 emit_jump_insn (temp); 28632 predict_jump (REG_BR_PROB_BASE * 10 / 100); 28633 } 28634 28635 /* Output code to perform a log1p XFmode calculation. */ 28636 28637 void ix86_emit_i387_log1p (rtx op0, rtx op1) 28638 { 28639 rtx label1 = gen_label_rtx (); 28640 rtx label2 = gen_label_rtx (); 28641 28642 rtx tmp = gen_reg_rtx (XFmode); 28643 rtx tmp2 = gen_reg_rtx (XFmode); 28644 28645 emit_insn (gen_absxf2 (tmp, op1)); 28646 emit_insn (gen_cmpxf (tmp, 28647 CONST_DOUBLE_FROM_REAL_VALUE ( 28648 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode), 28649 XFmode))); 28650 emit_jump_insn (gen_bge (label1)); 28651 28652 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */ 28653 emit_insn (gen_fyl2xp1xf3_i387 (op0, op1, tmp2)); 28654 emit_jump (label2); 28655 28656 emit_label (label1); 28657 emit_move_insn (tmp, CONST1_RTX (XFmode)); 28658 emit_insn (gen_addxf3 (tmp, op1, tmp)); 28659 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */ 28660 emit_insn (gen_fyl2xxf3_i387 (op0, tmp, tmp2)); 28661 28662 emit_label (label2); 28663 } 28664 28665 /* Output code to perform a Newton-Rhapson approximation of a single precision 28666 floating point divide [http://en.wikipedia.org/wiki/N-th_root_algorithm]. */ 28667 28668 void ix86_emit_swdivsf (rtx res, rtx a, rtx b, enum machine_mode mode) 28669 { 28670 rtx x0, x1, e0, e1, two; 28671 28672 x0 = gen_reg_rtx (mode); 28673 e0 = gen_reg_rtx (mode); 28674 e1 = gen_reg_rtx (mode); 28675 x1 = gen_reg_rtx (mode); 28676 28677 two = CONST_DOUBLE_FROM_REAL_VALUE (dconst2, SFmode); 28678 28679 if (VECTOR_MODE_P (mode)) 28680 two = ix86_build_const_vector (SFmode, true, two); 28681 28682 two = force_reg (mode, two); 28683 28684 /* a / b = a * rcp(b) * (2.0 - b * rcp(b)) */ 28685 28686 /* x0 = rcp(b) estimate */ 28687 emit_insn (gen_rtx_SET (VOIDmode, x0, 28688 gen_rtx_UNSPEC (mode, gen_rtvec (1, b), 28689 UNSPEC_RCP))); 28690 /* e0 = x0 * b */ 28691 emit_insn (gen_rtx_SET (VOIDmode, e0, 28692 gen_rtx_MULT (mode, x0, b))); 28693 /* e1 = 2. - e0 */ 28694 emit_insn (gen_rtx_SET (VOIDmode, e1, 28695 gen_rtx_MINUS (mode, two, e0))); 28696 /* x1 = x0 * e1 */ 28697 emit_insn (gen_rtx_SET (VOIDmode, x1, 28698 gen_rtx_MULT (mode, x0, e1))); 28699 /* res = a * x1 */ 28700 emit_insn (gen_rtx_SET (VOIDmode, res, 28701 gen_rtx_MULT (mode, a, x1))); 28702 } 28703 28704 /* Output code to perform a Newton-Rhapson approximation of a 28705 single precision floating point [reciprocal] square root. */ 28706 28707 void ix86_emit_swsqrtsf (rtx res, rtx a, enum machine_mode mode, 28708 bool recip) 28709 { 28710 rtx x0, e0, e1, e2, e3, mthree, mhalf; 28711 REAL_VALUE_TYPE r; 28712 28713 x0 = gen_reg_rtx (mode); 28714 e0 = gen_reg_rtx (mode); 28715 e1 = gen_reg_rtx (mode); 28716 e2 = gen_reg_rtx (mode); 28717 e3 = gen_reg_rtx (mode); 28718 28719 real_from_integer (&r, VOIDmode, -3, -1, 0); 28720 mthree = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode); 28721 28722 real_arithmetic (&r, NEGATE_EXPR, &dconsthalf, NULL); 28723 mhalf = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode); 28724 28725 if (VECTOR_MODE_P (mode)) 28726 { 28727 mthree = ix86_build_const_vector (SFmode, true, mthree); 28728 mhalf = ix86_build_const_vector (SFmode, true, mhalf); 28729 } 28730 28731 /* sqrt(a) = -0.5 * a * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0) 28732 rsqrt(a) = -0.5 * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0) */ 28733 28734 /* x0 = rsqrt(a) estimate */ 28735 emit_insn (gen_rtx_SET (VOIDmode, x0, 28736 gen_rtx_UNSPEC (mode, gen_rtvec (1, a), 28737 UNSPEC_RSQRT))); 28738 28739 /* If (a == 0.0) Filter out infinity to prevent NaN for sqrt(0.0). */ 28740 if (!recip) 28741 { 28742 rtx zero, mask; 28743 28744 zero = gen_reg_rtx (mode); 28745 mask = gen_reg_rtx (mode); 28746 28747 zero = force_reg (mode, CONST0_RTX(mode)); 28748 emit_insn (gen_rtx_SET (VOIDmode, mask, 28749 gen_rtx_NE (mode, zero, a))); 28750 28751 emit_insn (gen_rtx_SET (VOIDmode, x0, 28752 gen_rtx_AND (mode, x0, mask))); 28753 } 28754 28755 /* e0 = x0 * a */ 28756 emit_insn (gen_rtx_SET (VOIDmode, e0, 28757 gen_rtx_MULT (mode, x0, a))); 28758 /* e1 = e0 * x0 */ 28759 emit_insn (gen_rtx_SET (VOIDmode, e1, 28760 gen_rtx_MULT (mode, e0, x0))); 28761 28762 /* e2 = e1 - 3. */ 28763 mthree = force_reg (mode, mthree); 28764 emit_insn (gen_rtx_SET (VOIDmode, e2, 28765 gen_rtx_PLUS (mode, e1, mthree))); 28766 28767 mhalf = force_reg (mode, mhalf); 28768 if (recip) 28769 /* e3 = -.5 * x0 */ 28770 emit_insn (gen_rtx_SET (VOIDmode, e3, 28771 gen_rtx_MULT (mode, x0, mhalf))); 28772 else 28773 /* e3 = -.5 * e0 */ 28774 emit_insn (gen_rtx_SET (VOIDmode, e3, 28775 gen_rtx_MULT (mode, e0, mhalf))); 28776 /* ret = e2 * e3 */ 28777 emit_insn (gen_rtx_SET (VOIDmode, res, 28778 gen_rtx_MULT (mode, e2, e3))); 28779 } 28780 28781 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */ 28782 28783 static void ATTRIBUTE_UNUSED 28784 i386_solaris_elf_named_section (const char *name, unsigned int flags, 28785 tree decl) 28786 { 28787 /* With Binutils 2.15, the "@unwind" marker must be specified on 28788 every occurrence of the ".eh_frame" section, not just the first 28789 one. */ 28790 if (TARGET_64BIT 28791 && strcmp (name, ".eh_frame") == 0) 28792 { 28793 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name, 28794 flags & SECTION_WRITE ? "aw" : "a"); 28795 return; 28796 } 28797 default_elf_asm_named_section (name, flags, decl); 28798 } 28799 28800 /* Return the mangling of TYPE if it is an extended fundamental type. */ 28801 28802 static const char * 28803 ix86_mangle_type (const_tree type) 28804 { 28805 type = TYPE_MAIN_VARIANT (type); 28806 28807 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE 28808 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE) 28809 return NULL; 28810 28811 switch (TYPE_MODE (type)) 28812 { 28813 case TFmode: 28814 /* __float128 is "g". */ 28815 return "g"; 28816 case XFmode: 28817 /* "long double" or __float80 is "e". */ 28818 return "e"; 28819 default: 28820 return NULL; 28821 } 28822 } 28823 28824 /* For 32-bit code we can save PIC register setup by using 28825 __stack_chk_fail_local hidden function instead of calling 28826 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC 28827 register, so it is better to call __stack_chk_fail directly. */ 28828 28829 static tree 28830 ix86_stack_protect_fail (void) 28831 { 28832 return TARGET_64BIT 28833 ? default_external_stack_protect_fail () 28834 : default_hidden_stack_protect_fail (); 28835 } 28836 28837 /* Select a format to encode pointers in exception handling data. CODE 28838 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is 28839 true if the symbol may be affected by dynamic relocations. 28840 28841 ??? All x86 object file formats are capable of representing this. 28842 After all, the relocation needed is the same as for the call insn. 28843 Whether or not a particular assembler allows us to enter such, I 28844 guess we'll have to see. */ 28845 int 28846 asm_preferred_eh_data_format (int code, int global) 28847 { 28848 if (flag_pic) 28849 { 28850 int type = DW_EH_PE_sdata8; 28851 if (!TARGET_64BIT 28852 || ix86_cmodel == CM_SMALL_PIC 28853 || (ix86_cmodel == CM_MEDIUM_PIC && (global || code))) 28854 type = DW_EH_PE_sdata4; 28855 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type; 28856 } 28857 if (ix86_cmodel == CM_SMALL 28858 || (ix86_cmodel == CM_MEDIUM && code)) 28859 return DW_EH_PE_udata4; 28860 return DW_EH_PE_absptr; 28861 } 28862 28863 /* Expand copysign from SIGN to the positive value ABS_VALUE 28864 storing in RESULT. If MASK is non-null, it shall be a mask to mask out 28865 the sign-bit. */ 28866 static void 28867 ix86_sse_copysign_to_positive (rtx result, rtx abs_value, rtx sign, rtx mask) 28868 { 28869 enum machine_mode mode = GET_MODE (sign); 28870 rtx sgn = gen_reg_rtx (mode); 28871 if (mask == NULL_RTX) 28872 { 28873 mask = ix86_build_signbit_mask (mode, VECTOR_MODE_P (mode), false); 28874 if (!VECTOR_MODE_P (mode)) 28875 { 28876 /* We need to generate a scalar mode mask in this case. */ 28877 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx)); 28878 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp); 28879 mask = gen_reg_rtx (mode); 28880 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp)); 28881 } 28882 } 28883 else 28884 mask = gen_rtx_NOT (mode, mask); 28885 emit_insn (gen_rtx_SET (VOIDmode, sgn, 28886 gen_rtx_AND (mode, mask, sign))); 28887 emit_insn (gen_rtx_SET (VOIDmode, result, 28888 gen_rtx_IOR (mode, abs_value, sgn))); 28889 } 28890 28891 /* Expand fabs (OP0) and return a new rtx that holds the result. The 28892 mask for masking out the sign-bit is stored in *SMASK, if that is 28893 non-null. */ 28894 static rtx 28895 ix86_expand_sse_fabs (rtx op0, rtx *smask) 28896 { 28897 enum machine_mode mode = GET_MODE (op0); 28898 rtx xa, mask; 28899 28900 xa = gen_reg_rtx (mode); 28901 mask = ix86_build_signbit_mask (mode, VECTOR_MODE_P (mode), true); 28902 if (!VECTOR_MODE_P (mode)) 28903 { 28904 /* We need to generate a scalar mode mask in this case. */ 28905 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx)); 28906 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp); 28907 mask = gen_reg_rtx (mode); 28908 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp)); 28909 } 28910 emit_insn (gen_rtx_SET (VOIDmode, xa, 28911 gen_rtx_AND (mode, op0, mask))); 28912 28913 if (smask) 28914 *smask = mask; 28915 28916 return xa; 28917 } 28918 28919 /* Expands a comparison of OP0 with OP1 using comparison code CODE, 28920 swapping the operands if SWAP_OPERANDS is true. The expanded 28921 code is a forward jump to a newly created label in case the 28922 comparison is true. The generated label rtx is returned. */ 28923 static rtx 28924 ix86_expand_sse_compare_and_jump (enum rtx_code code, rtx op0, rtx op1, 28925 bool swap_operands) 28926 { 28927 rtx label, tmp; 28928 28929 if (swap_operands) 28930 { 28931 tmp = op0; 28932 op0 = op1; 28933 op1 = tmp; 28934 } 28935 28936 label = gen_label_rtx (); 28937 tmp = gen_rtx_REG (CCFPUmode, FLAGS_REG); 28938 emit_insn (gen_rtx_SET (VOIDmode, tmp, 28939 gen_rtx_COMPARE (CCFPUmode, op0, op1))); 28940 tmp = gen_rtx_fmt_ee (code, VOIDmode, tmp, const0_rtx); 28941 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp, 28942 gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx); 28943 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp)); 28944 JUMP_LABEL (tmp) = label; 28945 28946 return label; 28947 } 28948 28949 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1 28950 using comparison code CODE. Operands are swapped for the comparison if 28951 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */ 28952 static rtx 28953 ix86_expand_sse_compare_mask (enum rtx_code code, rtx op0, rtx op1, 28954 bool swap_operands) 28955 { 28956 enum machine_mode mode = GET_MODE (op0); 28957 rtx mask = gen_reg_rtx (mode); 28958 28959 if (swap_operands) 28960 { 28961 rtx tmp = op0; 28962 op0 = op1; 28963 op1 = tmp; 28964 } 28965 28966 if (mode == DFmode) 28967 emit_insn (gen_sse2_maskcmpdf3 (mask, op0, op1, 28968 gen_rtx_fmt_ee (code, mode, op0, op1))); 28969 else 28970 emit_insn (gen_sse_maskcmpsf3 (mask, op0, op1, 28971 gen_rtx_fmt_ee (code, mode, op0, op1))); 28972 28973 return mask; 28974 } 28975 28976 /* Generate and return a rtx of mode MODE for 2**n where n is the number 28977 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */ 28978 static rtx 28979 ix86_gen_TWO52 (enum machine_mode mode) 28980 { 28981 REAL_VALUE_TYPE TWO52r; 28982 rtx TWO52; 28983 28984 real_ldexp (&TWO52r, &dconst1, mode == DFmode ? 52 : 23); 28985 TWO52 = const_double_from_real_value (TWO52r, mode); 28986 TWO52 = force_reg (mode, TWO52); 28987 28988 return TWO52; 28989 } 28990 28991 /* Expand SSE sequence for computing lround from OP1 storing 28992 into OP0. */ 28993 void 28994 ix86_expand_lround (rtx op0, rtx op1) 28995 { 28996 /* C code for the stuff we're doing below: 28997 tmp = op1 + copysign (nextafter (0.5, 0.0), op1) 28998 return (long)tmp; 28999 */ 29000 enum machine_mode mode = GET_MODE (op1); 29001 const struct real_format *fmt; 29002 REAL_VALUE_TYPE pred_half, half_minus_pred_half; 29003 rtx adj; 29004 29005 /* load nextafter (0.5, 0.0) */ 29006 fmt = REAL_MODE_FORMAT (mode); 29007 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode); 29008 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half); 29009 29010 /* adj = copysign (0.5, op1) */ 29011 adj = force_reg (mode, const_double_from_real_value (pred_half, mode)); 29012 ix86_sse_copysign_to_positive (adj, adj, force_reg (mode, op1), NULL_RTX); 29013 29014 /* adj = op1 + adj */ 29015 adj = expand_simple_binop (mode, PLUS, adj, op1, NULL_RTX, 0, OPTAB_DIRECT); 29016 29017 /* op0 = (imode)adj */ 29018 expand_fix (op0, adj, 0); 29019 } 29020 29021 /* Expand SSE2 sequence for computing lround from OPERAND1 storing 29022 into OPERAND0. */ 29023 void 29024 ix86_expand_lfloorceil (rtx op0, rtx op1, bool do_floor) 29025 { 29026 /* C code for the stuff we're doing below (for do_floor): 29027 xi = (long)op1; 29028 xi -= (double)xi > op1 ? 1 : 0; 29029 return xi; 29030 */ 29031 enum machine_mode fmode = GET_MODE (op1); 29032 enum machine_mode imode = GET_MODE (op0); 29033 rtx ireg, freg, label, tmp; 29034 29035 /* reg = (long)op1 */ 29036 ireg = gen_reg_rtx (imode); 29037 expand_fix (ireg, op1, 0); 29038 29039 /* freg = (double)reg */ 29040 freg = gen_reg_rtx (fmode); 29041 expand_float (freg, ireg, 0); 29042 29043 /* ireg = (freg > op1) ? ireg - 1 : ireg */ 29044 label = ix86_expand_sse_compare_and_jump (UNLE, 29045 freg, op1, !do_floor); 29046 tmp = expand_simple_binop (imode, do_floor ? MINUS : PLUS, 29047 ireg, const1_rtx, NULL_RTX, 0, OPTAB_DIRECT); 29048 emit_move_insn (ireg, tmp); 29049 29050 emit_label (label); 29051 LABEL_NUSES (label) = 1; 29052 29053 emit_move_insn (op0, ireg); 29054 } 29055 29056 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the 29057 result in OPERAND0. */ 29058 void 29059 ix86_expand_rint (rtx operand0, rtx operand1) 29060 { 29061 /* C code for the stuff we're doing below: 29062 xa = fabs (operand1); 29063 if (!isless (xa, 2**52)) 29064 return operand1; 29065 xa = xa + 2**52 - 2**52; 29066 return copysign (xa, operand1); 29067 */ 29068 enum machine_mode mode = GET_MODE (operand0); 29069 rtx res, xa, label, TWO52, mask; 29070 29071 res = gen_reg_rtx (mode); 29072 emit_move_insn (res, operand1); 29073 29074 /* xa = abs (operand1) */ 29075 xa = ix86_expand_sse_fabs (res, &mask); 29076 29077 /* if (!isless (xa, TWO52)) goto label; */ 29078 TWO52 = ix86_gen_TWO52 (mode); 29079 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false); 29080 29081 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT); 29082 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT); 29083 29084 ix86_sse_copysign_to_positive (res, xa, res, mask); 29085 29086 emit_label (label); 29087 LABEL_NUSES (label) = 1; 29088 29089 emit_move_insn (operand0, res); 29090 } 29091 29092 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing 29093 into OPERAND0. */ 29094 void 29095 ix86_expand_floorceildf_32 (rtx operand0, rtx operand1, bool do_floor) 29096 { 29097 /* C code for the stuff we expand below. 29098 double xa = fabs (x), x2; 29099 if (!isless (xa, TWO52)) 29100 return x; 29101 xa = xa + TWO52 - TWO52; 29102 x2 = copysign (xa, x); 29103 Compensate. Floor: 29104 if (x2 > x) 29105 x2 -= 1; 29106 Compensate. Ceil: 29107 if (x2 < x) 29108 x2 -= -1; 29109 return x2; 29110 */ 29111 enum machine_mode mode = GET_MODE (operand0); 29112 rtx xa, TWO52, tmp, label, one, res, mask; 29113 29114 TWO52 = ix86_gen_TWO52 (mode); 29115 29116 /* Temporary for holding the result, initialized to the input 29117 operand to ease control flow. */ 29118 res = gen_reg_rtx (mode); 29119 emit_move_insn (res, operand1); 29120 29121 /* xa = abs (operand1) */ 29122 xa = ix86_expand_sse_fabs (res, &mask); 29123 29124 /* if (!isless (xa, TWO52)) goto label; */ 29125 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false); 29126 29127 /* xa = xa + TWO52 - TWO52; */ 29128 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT); 29129 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT); 29130 29131 /* xa = copysign (xa, operand1) */ 29132 ix86_sse_copysign_to_positive (xa, xa, res, mask); 29133 29134 /* generate 1.0 or -1.0 */ 29135 one = force_reg (mode, 29136 const_double_from_real_value (do_floor 29137 ? dconst1 : dconstm1, mode)); 29138 29139 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */ 29140 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor); 29141 emit_insn (gen_rtx_SET (VOIDmode, tmp, 29142 gen_rtx_AND (mode, one, tmp))); 29143 /* We always need to subtract here to preserve signed zero. */ 29144 tmp = expand_simple_binop (mode, MINUS, 29145 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT); 29146 emit_move_insn (res, tmp); 29147 29148 emit_label (label); 29149 LABEL_NUSES (label) = 1; 29150 29151 emit_move_insn (operand0, res); 29152 } 29153 29154 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing 29155 into OPERAND0. */ 29156 void 29157 ix86_expand_floorceil (rtx operand0, rtx operand1, bool do_floor) 29158 { 29159 /* C code for the stuff we expand below. 29160 double xa = fabs (x), x2; 29161 if (!isless (xa, TWO52)) 29162 return x; 29163 x2 = (double)(long)x; 29164 Compensate. Floor: 29165 if (x2 > x) 29166 x2 -= 1; 29167 Compensate. Ceil: 29168 if (x2 < x) 29169 x2 += 1; 29170 if (HONOR_SIGNED_ZEROS (mode)) 29171 return copysign (x2, x); 29172 return x2; 29173 */ 29174 enum machine_mode mode = GET_MODE (operand0); 29175 rtx xa, xi, TWO52, tmp, label, one, res, mask; 29176 29177 TWO52 = ix86_gen_TWO52 (mode); 29178 29179 /* Temporary for holding the result, initialized to the input 29180 operand to ease control flow. */ 29181 res = gen_reg_rtx (mode); 29182 emit_move_insn (res, operand1); 29183 29184 /* xa = abs (operand1) */ 29185 xa = ix86_expand_sse_fabs (res, &mask); 29186 29187 /* if (!isless (xa, TWO52)) goto label; */ 29188 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false); 29189 29190 /* xa = (double)(long)x */ 29191 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode); 29192 expand_fix (xi, res, 0); 29193 expand_float (xa, xi, 0); 29194 29195 /* generate 1.0 */ 29196 one = force_reg (mode, const_double_from_real_value (dconst1, mode)); 29197 29198 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */ 29199 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor); 29200 emit_insn (gen_rtx_SET (VOIDmode, tmp, 29201 gen_rtx_AND (mode, one, tmp))); 29202 tmp = expand_simple_binop (mode, do_floor ? MINUS : PLUS, 29203 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT); 29204 emit_move_insn (res, tmp); 29205 29206 if (HONOR_SIGNED_ZEROS (mode)) 29207 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask); 29208 29209 emit_label (label); 29210 LABEL_NUSES (label) = 1; 29211 29212 emit_move_insn (operand0, res); 29213 } 29214 29215 /* Expand SSE sequence for computing round from OPERAND1 storing 29216 into OPERAND0. Sequence that works without relying on DImode truncation 29217 via cvttsd2siq that is only available on 64bit targets. */ 29218 void 29219 ix86_expand_rounddf_32 (rtx operand0, rtx operand1) 29220 { 29221 /* C code for the stuff we expand below. 29222 double xa = fabs (x), xa2, x2; 29223 if (!isless (xa, TWO52)) 29224 return x; 29225 Using the absolute value and copying back sign makes 29226 -0.0 -> -0.0 correct. 29227 xa2 = xa + TWO52 - TWO52; 29228 Compensate. 29229 dxa = xa2 - xa; 29230 if (dxa <= -0.5) 29231 xa2 += 1; 29232 else if (dxa > 0.5) 29233 xa2 -= 1; 29234 x2 = copysign (xa2, x); 29235 return x2; 29236 */ 29237 enum machine_mode mode = GET_MODE (operand0); 29238 rtx xa, xa2, dxa, TWO52, tmp, label, half, mhalf, one, res, mask; 29239 29240 TWO52 = ix86_gen_TWO52 (mode); 29241 29242 /* Temporary for holding the result, initialized to the input 29243 operand to ease control flow. */ 29244 res = gen_reg_rtx (mode); 29245 emit_move_insn (res, operand1); 29246 29247 /* xa = abs (operand1) */ 29248 xa = ix86_expand_sse_fabs (res, &mask); 29249 29250 /* if (!isless (xa, TWO52)) goto label; */ 29251 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false); 29252 29253 /* xa2 = xa + TWO52 - TWO52; */ 29254 xa2 = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT); 29255 xa2 = expand_simple_binop (mode, MINUS, xa2, TWO52, xa2, 0, OPTAB_DIRECT); 29256 29257 /* dxa = xa2 - xa; */ 29258 dxa = expand_simple_binop (mode, MINUS, xa2, xa, NULL_RTX, 0, OPTAB_DIRECT); 29259 29260 /* generate 0.5, 1.0 and -0.5 */ 29261 half = force_reg (mode, const_double_from_real_value (dconsthalf, mode)); 29262 one = expand_simple_binop (mode, PLUS, half, half, NULL_RTX, 0, OPTAB_DIRECT); 29263 mhalf = expand_simple_binop (mode, MINUS, half, one, NULL_RTX, 29264 0, OPTAB_DIRECT); 29265 29266 /* Compensate. */ 29267 tmp = gen_reg_rtx (mode); 29268 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */ 29269 tmp = ix86_expand_sse_compare_mask (UNGT, dxa, half, false); 29270 emit_insn (gen_rtx_SET (VOIDmode, tmp, 29271 gen_rtx_AND (mode, one, tmp))); 29272 xa2 = expand_simple_binop (mode, MINUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT); 29273 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */ 29274 tmp = ix86_expand_sse_compare_mask (UNGE, mhalf, dxa, false); 29275 emit_insn (gen_rtx_SET (VOIDmode, tmp, 29276 gen_rtx_AND (mode, one, tmp))); 29277 xa2 = expand_simple_binop (mode, PLUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT); 29278 29279 /* res = copysign (xa2, operand1) */ 29280 ix86_sse_copysign_to_positive (res, xa2, force_reg (mode, operand1), mask); 29281 29282 emit_label (label); 29283 LABEL_NUSES (label) = 1; 29284 29285 emit_move_insn (operand0, res); 29286 } 29287 29288 /* Expand SSE sequence for computing trunc from OPERAND1 storing 29289 into OPERAND0. */ 29290 void 29291 ix86_expand_trunc (rtx operand0, rtx operand1) 29292 { 29293 /* C code for SSE variant we expand below. 29294 double xa = fabs (x), x2; 29295 if (!isless (xa, TWO52)) 29296 return x; 29297 x2 = (double)(long)x; 29298 if (HONOR_SIGNED_ZEROS (mode)) 29299 return copysign (x2, x); 29300 return x2; 29301 */ 29302 enum machine_mode mode = GET_MODE (operand0); 29303 rtx xa, xi, TWO52, label, res, mask; 29304 29305 TWO52 = ix86_gen_TWO52 (mode); 29306 29307 /* Temporary for holding the result, initialized to the input 29308 operand to ease control flow. */ 29309 res = gen_reg_rtx (mode); 29310 emit_move_insn (res, operand1); 29311 29312 /* xa = abs (operand1) */ 29313 xa = ix86_expand_sse_fabs (res, &mask); 29314 29315 /* if (!isless (xa, TWO52)) goto label; */ 29316 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false); 29317 29318 /* x = (double)(long)x */ 29319 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode); 29320 expand_fix (xi, res, 0); 29321 expand_float (res, xi, 0); 29322 29323 if (HONOR_SIGNED_ZEROS (mode)) 29324 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask); 29325 29326 emit_label (label); 29327 LABEL_NUSES (label) = 1; 29328 29329 emit_move_insn (operand0, res); 29330 } 29331 29332 /* Expand SSE sequence for computing trunc from OPERAND1 storing 29333 into OPERAND0. */ 29334 void 29335 ix86_expand_truncdf_32 (rtx operand0, rtx operand1) 29336 { 29337 enum machine_mode mode = GET_MODE (operand0); 29338 rtx xa, mask, TWO52, label, one, res, smask, tmp; 29339 29340 /* C code for SSE variant we expand below. 29341 double xa = fabs (x), x2; 29342 if (!isless (xa, TWO52)) 29343 return x; 29344 xa2 = xa + TWO52 - TWO52; 29345 Compensate: 29346 if (xa2 > xa) 29347 xa2 -= 1.0; 29348 x2 = copysign (xa2, x); 29349 return x2; 29350 */ 29351 29352 TWO52 = ix86_gen_TWO52 (mode); 29353 29354 /* Temporary for holding the result, initialized to the input 29355 operand to ease control flow. */ 29356 res = gen_reg_rtx (mode); 29357 emit_move_insn (res, operand1); 29358 29359 /* xa = abs (operand1) */ 29360 xa = ix86_expand_sse_fabs (res, &smask); 29361 29362 /* if (!isless (xa, TWO52)) goto label; */ 29363 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false); 29364 29365 /* res = xa + TWO52 - TWO52; */ 29366 tmp = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT); 29367 tmp = expand_simple_binop (mode, MINUS, tmp, TWO52, tmp, 0, OPTAB_DIRECT); 29368 emit_move_insn (res, tmp); 29369 29370 /* generate 1.0 */ 29371 one = force_reg (mode, const_double_from_real_value (dconst1, mode)); 29372 29373 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */ 29374 mask = ix86_expand_sse_compare_mask (UNGT, res, xa, false); 29375 emit_insn (gen_rtx_SET (VOIDmode, mask, 29376 gen_rtx_AND (mode, mask, one))); 29377 tmp = expand_simple_binop (mode, MINUS, 29378 res, mask, NULL_RTX, 0, OPTAB_DIRECT); 29379 emit_move_insn (res, tmp); 29380 29381 /* res = copysign (res, operand1) */ 29382 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), smask); 29383 29384 emit_label (label); 29385 LABEL_NUSES (label) = 1; 29386 29387 emit_move_insn (operand0, res); 29388 } 29389 29390 /* Expand SSE sequence for computing round from OPERAND1 storing 29391 into OPERAND0. */ 29392 void 29393 ix86_expand_round (rtx operand0, rtx operand1) 29394 { 29395 /* C code for the stuff we're doing below: 29396 double xa = fabs (x); 29397 if (!isless (xa, TWO52)) 29398 return x; 29399 xa = (double)(long)(xa + nextafter (0.5, 0.0)); 29400 return copysign (xa, x); 29401 */ 29402 enum machine_mode mode = GET_MODE (operand0); 29403 rtx res, TWO52, xa, label, xi, half, mask; 29404 const struct real_format *fmt; 29405 REAL_VALUE_TYPE pred_half, half_minus_pred_half; 29406 29407 /* Temporary for holding the result, initialized to the input 29408 operand to ease control flow. */ 29409 res = gen_reg_rtx (mode); 29410 emit_move_insn (res, operand1); 29411 29412 TWO52 = ix86_gen_TWO52 (mode); 29413 xa = ix86_expand_sse_fabs (res, &mask); 29414 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false); 29415 29416 /* load nextafter (0.5, 0.0) */ 29417 fmt = REAL_MODE_FORMAT (mode); 29418 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode); 29419 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half); 29420 29421 /* xa = xa + 0.5 */ 29422 half = force_reg (mode, const_double_from_real_value (pred_half, mode)); 29423 xa = expand_simple_binop (mode, PLUS, xa, half, NULL_RTX, 0, OPTAB_DIRECT); 29424 29425 /* xa = (double)(int64_t)xa */ 29426 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode); 29427 expand_fix (xi, xa, 0); 29428 expand_float (xa, xi, 0); 29429 29430 /* res = copysign (xa, operand1) */ 29431 ix86_sse_copysign_to_positive (res, xa, force_reg (mode, operand1), mask); 29432 29433 emit_label (label); 29434 LABEL_NUSES (label) = 1; 29435 29436 emit_move_insn (operand0, res); 29437 } 29438 29439 29440 /* Validate whether a SSE5 instruction is valid or not. 29441 OPERANDS is the array of operands. 29442 NUM is the number of operands. 29443 USES_OC0 is true if the instruction uses OC0 and provides 4 variants. 29444 NUM_MEMORY is the maximum number of memory operands to accept. 29445 when COMMUTATIVE is set, operand 1 and 2 can be swapped. */ 29446 29447 bool 29448 ix86_sse5_valid_op_p (rtx operands[], rtx insn ATTRIBUTE_UNUSED, int num, 29449 bool uses_oc0, int num_memory, bool commutative) 29450 { 29451 int mem_mask; 29452 int mem_count; 29453 int i; 29454 29455 /* Count the number of memory arguments */ 29456 mem_mask = 0; 29457 mem_count = 0; 29458 for (i = 0; i < num; i++) 29459 { 29460 enum machine_mode mode = GET_MODE (operands[i]); 29461 if (register_operand (operands[i], mode)) 29462 ; 29463 29464 else if (memory_operand (operands[i], mode)) 29465 { 29466 mem_mask |= (1 << i); 29467 mem_count++; 29468 } 29469 29470 else 29471 { 29472 rtx pattern = PATTERN (insn); 29473 29474 /* allow 0 for pcmov */ 29475 if (GET_CODE (pattern) != SET 29476 || GET_CODE (SET_SRC (pattern)) != IF_THEN_ELSE 29477 || i < 2 29478 || operands[i] != CONST0_RTX (mode)) 29479 return false; 29480 } 29481 } 29482 29483 /* Special case pmacsdq{l,h} where we allow the 3rd argument to be 29484 a memory operation. */ 29485 if (num_memory < 0) 29486 { 29487 num_memory = -num_memory; 29488 if ((mem_mask & (1 << (num-1))) != 0) 29489 { 29490 mem_mask &= ~(1 << (num-1)); 29491 mem_count--; 29492 } 29493 } 29494 29495 /* If there were no memory operations, allow the insn */ 29496 if (mem_mask == 0) 29497 return true; 29498 29499 /* Do not allow the destination register to be a memory operand. */ 29500 else if (mem_mask & (1 << 0)) 29501 return false; 29502 29503 /* If there are too many memory operations, disallow the instruction. While 29504 the hardware only allows 1 memory reference, before register allocation 29505 for some insns, we allow two memory operations sometimes in order to allow 29506 code like the following to be optimized: 29507 29508 float fmadd (float *a, float *b, float *c) { return (*a * *b) + *c; } 29509 29510 or similar cases that are vectorized into using the fmaddss 29511 instruction. */ 29512 else if (mem_count > num_memory) 29513 return false; 29514 29515 /* Don't allow more than one memory operation if not optimizing. */ 29516 else if (mem_count > 1 && !optimize) 29517 return false; 29518 29519 else if (num == 4 && mem_count == 1) 29520 { 29521 /* formats (destination is the first argument), example fmaddss: 29522 xmm1, xmm1, xmm2, xmm3/mem 29523 xmm1, xmm1, xmm2/mem, xmm3 29524 xmm1, xmm2, xmm3/mem, xmm1 29525 xmm1, xmm2/mem, xmm3, xmm1 */ 29526 if (uses_oc0) 29527 return ((mem_mask == (1 << 1)) 29528 || (mem_mask == (1 << 2)) 29529 || (mem_mask == (1 << 3))); 29530 29531 /* format, example pmacsdd: 29532 xmm1, xmm2, xmm3/mem, xmm1 */ 29533 if (commutative) 29534 return (mem_mask == (1 << 2) || mem_mask == (1 << 1)); 29535 else 29536 return (mem_mask == (1 << 2)); 29537 } 29538 29539 else if (num == 4 && num_memory == 2) 29540 { 29541 /* If there are two memory operations, we can load one of the memory ops 29542 into the destination register. This is for optimizing the 29543 multiply/add ops, which the combiner has optimized both the multiply 29544 and the add insns to have a memory operation. We have to be careful 29545 that the destination doesn't overlap with the inputs. */ 29546 rtx op0 = operands[0]; 29547 29548 if (reg_mentioned_p (op0, operands[1]) 29549 || reg_mentioned_p (op0, operands[2]) 29550 || reg_mentioned_p (op0, operands[3])) 29551 return false; 29552 29553 /* formats (destination is the first argument), example fmaddss: 29554 xmm1, xmm1, xmm2, xmm3/mem 29555 xmm1, xmm1, xmm2/mem, xmm3 29556 xmm1, xmm2, xmm3/mem, xmm1 29557 xmm1, xmm2/mem, xmm3, xmm1 29558 29559 For the oc0 case, we will load either operands[1] or operands[3] into 29560 operands[0], so any combination of 2 memory operands is ok. */ 29561 if (uses_oc0) 29562 return true; 29563 29564 /* format, example pmacsdd: 29565 xmm1, xmm2, xmm3/mem, xmm1 29566 29567 For the integer multiply/add instructions be more restrictive and 29568 require operands[2] and operands[3] to be the memory operands. */ 29569 if (commutative) 29570 return (mem_mask == ((1 << 1) | (1 << 3)) || ((1 << 2) | (1 << 3))); 29571 else 29572 return (mem_mask == ((1 << 2) | (1 << 3))); 29573 } 29574 29575 else if (num == 3 && num_memory == 1) 29576 { 29577 /* formats, example protb: 29578 xmm1, xmm2, xmm3/mem 29579 xmm1, xmm2/mem, xmm3 */ 29580 if (uses_oc0) 29581 return ((mem_mask == (1 << 1)) || (mem_mask == (1 << 2))); 29582 29583 /* format, example comeq: 29584 xmm1, xmm2, xmm3/mem */ 29585 else 29586 return (mem_mask == (1 << 2)); 29587 } 29588 29589 else 29590 gcc_unreachable (); 29591 29592 return false; 29593 } 29594 29595 29596 /* Fixup an SSE5 instruction that has 2 memory input references into a form the 29597 hardware will allow by using the destination register to load one of the 29598 memory operations. Presently this is used by the multiply/add routines to 29599 allow 2 memory references. */ 29600 29601 void 29602 ix86_expand_sse5_multiple_memory (rtx operands[], 29603 int num, 29604 enum machine_mode mode) 29605 { 29606 rtx op0 = operands[0]; 29607 if (num != 4 29608 || memory_operand (op0, mode) 29609 || reg_mentioned_p (op0, operands[1]) 29610 || reg_mentioned_p (op0, operands[2]) 29611 || reg_mentioned_p (op0, operands[3])) 29612 gcc_unreachable (); 29613 29614 /* For 2 memory operands, pick either operands[1] or operands[3] to move into 29615 the destination register. */ 29616 if (memory_operand (operands[1], mode)) 29617 { 29618 emit_move_insn (op0, operands[1]); 29619 operands[1] = op0; 29620 } 29621 else if (memory_operand (operands[3], mode)) 29622 { 29623 emit_move_insn (op0, operands[3]); 29624 operands[3] = op0; 29625 } 29626 else 29627 gcc_unreachable (); 29628 29629 return; 29630 } 29631 29632 29633 /* Table of valid machine attributes. */ 29634 static const struct attribute_spec ix86_attribute_table[] = 29635 { 29636 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */ 29637 /* Stdcall attribute says callee is responsible for popping arguments 29638 if they are not variable. */ 29639 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute }, 29640 /* Fastcall attribute says callee is responsible for popping arguments 29641 if they are not variable. */ 29642 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute }, 29643 /* Cdecl attribute says the callee is a normal C declaration */ 29644 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute }, 29645 /* Regparm attribute specifies how many integer arguments are to be 29646 passed in registers. */ 29647 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute }, 29648 /* Sseregparm attribute says we are using x86_64 calling conventions 29649 for FP arguments. */ 29650 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute }, 29651 /* force_align_arg_pointer says this function realigns the stack at entry. */ 29652 { (const char *)&ix86_force_align_arg_pointer_string, 0, 0, 29653 false, true, true, ix86_handle_cconv_attribute }, 29654 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES 29655 { "dllimport", 0, 0, false, false, false, handle_dll_attribute }, 29656 { "dllexport", 0, 0, false, false, false, handle_dll_attribute }, 29657 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute }, 29658 #endif 29659 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute }, 29660 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute }, 29661 #ifdef SUBTARGET_ATTRIBUTE_TABLE 29662 SUBTARGET_ATTRIBUTE_TABLE, 29663 #endif 29664 /* ms_abi and sysv_abi calling convention function attributes. */ 29665 { "ms_abi", 0, 0, false, true, true, ix86_handle_abi_attribute }, 29666 { "sysv_abi", 0, 0, false, true, true, ix86_handle_abi_attribute }, 29667 /* End element. */ 29668 { NULL, 0, 0, false, false, false, NULL } 29669 }; 29670 29671 /* Implement targetm.vectorize.builtin_vectorization_cost. */ 29672 static int 29673 x86_builtin_vectorization_cost (bool runtime_test) 29674 { 29675 /* If the branch of the runtime test is taken - i.e. - the vectorized 29676 version is skipped - this incurs a misprediction cost (because the 29677 vectorized version is expected to be the fall-through). So we subtract 29678 the latency of a mispredicted branch from the costs that are incured 29679 when the vectorized version is executed. 29680 29681 TODO: The values in individual target tables have to be tuned or new 29682 fields may be needed. For eg. on K8, the default branch path is the 29683 not-taken path. If the taken path is predicted correctly, the minimum 29684 penalty of going down the taken-path is 1 cycle. If the taken-path is 29685 not predicted correctly, then the minimum penalty is 10 cycles. */ 29686 29687 if (runtime_test) 29688 { 29689 return (-(ix86_cost->cond_taken_branch_cost)); 29690 } 29691 else 29692 return 0; 29693 } 29694 29695 /* This function returns the calling abi specific va_list type node. 29696 It returns the FNDECL specific va_list type. */ 29697 29698 tree 29699 ix86_fn_abi_va_list (tree fndecl) 29700 { 29701 int abi; 29702 29703 if (!TARGET_64BIT) 29704 return va_list_type_node; 29705 gcc_assert (fndecl != NULL_TREE); 29706 abi = ix86_function_abi ((const_tree) fndecl); 29707 29708 if (abi == MS_ABI) 29709 return ms_va_list_type_node; 29710 else 29711 return sysv_va_list_type_node; 29712 } 29713 29714 /* Returns the canonical va_list type specified by TYPE. If there 29715 is no valid TYPE provided, it return NULL_TREE. */ 29716 29717 tree 29718 ix86_canonical_va_list_type (tree type) 29719 { 29720 tree wtype, htype; 29721 29722 /* Resolve references and pointers to va_list type. */ 29723 if (INDIRECT_REF_P (type)) 29724 type = TREE_TYPE (type); 29725 else if (POINTER_TYPE_P (type) && POINTER_TYPE_P (TREE_TYPE(type))) 29726 type = TREE_TYPE (type); 29727 29728 if (TARGET_64BIT) 29729 { 29730 wtype = va_list_type_node; 29731 gcc_assert (wtype != NULL_TREE); 29732 htype = type; 29733 if (TREE_CODE (wtype) == ARRAY_TYPE) 29734 { 29735 /* If va_list is an array type, the argument may have decayed 29736 to a pointer type, e.g. by being passed to another function. 29737 In that case, unwrap both types so that we can compare the 29738 underlying records. */ 29739 if (TREE_CODE (htype) == ARRAY_TYPE 29740 || POINTER_TYPE_P (htype)) 29741 { 29742 wtype = TREE_TYPE (wtype); 29743 htype = TREE_TYPE (htype); 29744 } 29745 } 29746 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype)) 29747 return va_list_type_node; 29748 wtype = sysv_va_list_type_node; 29749 gcc_assert (wtype != NULL_TREE); 29750 htype = type; 29751 if (TREE_CODE (wtype) == ARRAY_TYPE) 29752 { 29753 /* If va_list is an array type, the argument may have decayed 29754 to a pointer type, e.g. by being passed to another function. 29755 In that case, unwrap both types so that we can compare the 29756 underlying records. */ 29757 if (TREE_CODE (htype) == ARRAY_TYPE 29758 || POINTER_TYPE_P (htype)) 29759 { 29760 wtype = TREE_TYPE (wtype); 29761 htype = TREE_TYPE (htype); 29762 } 29763 } 29764 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype)) 29765 return sysv_va_list_type_node; 29766 wtype = ms_va_list_type_node; 29767 gcc_assert (wtype != NULL_TREE); 29768 htype = type; 29769 if (TREE_CODE (wtype) == ARRAY_TYPE) 29770 { 29771 /* If va_list is an array type, the argument may have decayed 29772 to a pointer type, e.g. by being passed to another function. 29773 In that case, unwrap both types so that we can compare the 29774 underlying records. */ 29775 if (TREE_CODE (htype) == ARRAY_TYPE 29776 || POINTER_TYPE_P (htype)) 29777 { 29778 wtype = TREE_TYPE (wtype); 29779 htype = TREE_TYPE (htype); 29780 } 29781 } 29782 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype)) 29783 return ms_va_list_type_node; 29784 return NULL_TREE; 29785 } 29786 return std_canonical_va_list_type (type); 29787 } 29788 29789 /* Iterate through the target-specific builtin types for va_list. 29790 IDX denotes the iterator, *PTREE is set to the result type of 29791 the va_list builtin, and *PNAME to its internal type. 29792 Returns zero if there is no element for this index, otherwise 29793 IDX should be increased upon the next call. 29794 Note, do not iterate a base builtin's name like __builtin_va_list. 29795 Used from c_common_nodes_and_builtins. */ 29796 29797 int 29798 ix86_enum_va_list (int idx, const char **pname, tree *ptree) 29799 { 29800 if (!TARGET_64BIT) 29801 return 0; 29802 switch (idx) { 29803 case 0: 29804 *ptree = ms_va_list_type_node; 29805 *pname = "__builtin_ms_va_list"; 29806 break; 29807 case 1: 29808 *ptree = sysv_va_list_type_node; 29809 *pname = "__builtin_sysv_va_list"; 29810 break; 29811 default: 29812 return 0; 29813 } 29814 return 1; 29815 } 29816 29817 /* Initialize the GCC target structure. */ 29818 #undef TARGET_RETURN_IN_MEMORY 29819 #define TARGET_RETURN_IN_MEMORY ix86_return_in_memory 29820 29821 #undef TARGET_ATTRIBUTE_TABLE 29822 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table 29823 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES 29824 # undef TARGET_MERGE_DECL_ATTRIBUTES 29825 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes 29826 #endif 29827 29828 #undef TARGET_COMP_TYPE_ATTRIBUTES 29829 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes 29830 29831 #undef TARGET_INIT_BUILTINS 29832 #define TARGET_INIT_BUILTINS ix86_init_builtins 29833 #undef TARGET_EXPAND_BUILTIN 29834 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin 29835 29836 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION 29837 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \ 29838 ix86_builtin_vectorized_function 29839 29840 #undef TARGET_VECTORIZE_BUILTIN_CONVERSION 29841 #define TARGET_VECTORIZE_BUILTIN_CONVERSION ix86_vectorize_builtin_conversion 29842 29843 #undef TARGET_BUILTIN_RECIPROCAL 29844 #define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal 29845 29846 #undef TARGET_ASM_FUNCTION_EPILOGUE 29847 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue 29848 29849 #undef TARGET_ENCODE_SECTION_INFO 29850 #ifndef SUBTARGET_ENCODE_SECTION_INFO 29851 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info 29852 #else 29853 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO 29854 #endif 29855 29856 #undef TARGET_ASM_OPEN_PAREN 29857 #define TARGET_ASM_OPEN_PAREN "" 29858 #undef TARGET_ASM_CLOSE_PAREN 29859 #define TARGET_ASM_CLOSE_PAREN "" 29860 29861 #undef TARGET_ASM_ALIGNED_HI_OP 29862 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT 29863 #undef TARGET_ASM_ALIGNED_SI_OP 29864 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG 29865 #ifdef ASM_QUAD 29866 #undef TARGET_ASM_ALIGNED_DI_OP 29867 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD 29868 #endif 29869 29870 #undef TARGET_ASM_UNALIGNED_HI_OP 29871 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP 29872 #undef TARGET_ASM_UNALIGNED_SI_OP 29873 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP 29874 #undef TARGET_ASM_UNALIGNED_DI_OP 29875 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP 29876 29877 #undef TARGET_SCHED_ADJUST_COST 29878 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost 29879 #undef TARGET_SCHED_ISSUE_RATE 29880 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate 29881 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD 29882 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \ 29883 ia32_multipass_dfa_lookahead 29884 29885 #undef TARGET_FUNCTION_OK_FOR_SIBCALL 29886 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall 29887 29888 #ifdef HAVE_AS_TLS 29889 #undef TARGET_HAVE_TLS 29890 #define TARGET_HAVE_TLS true 29891 #endif 29892 #undef TARGET_CANNOT_FORCE_CONST_MEM 29893 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem 29894 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P 29895 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true 29896 29897 #undef TARGET_DELEGITIMIZE_ADDRESS 29898 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address 29899 29900 #undef TARGET_MS_BITFIELD_LAYOUT_P 29901 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p 29902 29903 #if TARGET_MACHO 29904 #undef TARGET_BINDS_LOCAL_P 29905 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p 29906 #endif 29907 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES 29908 #undef TARGET_BINDS_LOCAL_P 29909 #define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p 29910 #endif 29911 29912 #undef TARGET_ASM_OUTPUT_MI_THUNK 29913 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk 29914 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK 29915 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk 29916 29917 #undef TARGET_ASM_FILE_START 29918 #define TARGET_ASM_FILE_START x86_file_start 29919 29920 #undef TARGET_DEFAULT_TARGET_FLAGS 29921 #define TARGET_DEFAULT_TARGET_FLAGS \ 29922 (TARGET_DEFAULT \ 29923 | TARGET_SUBTARGET_DEFAULT \ 29924 | TARGET_TLS_DIRECT_SEG_REFS_DEFAULT) 29925 29926 #undef TARGET_HANDLE_OPTION 29927 #define TARGET_HANDLE_OPTION ix86_handle_option 29928 29929 #undef TARGET_RTX_COSTS 29930 #define TARGET_RTX_COSTS ix86_rtx_costs 29931 #undef TARGET_ADDRESS_COST 29932 #define TARGET_ADDRESS_COST ix86_address_cost 29933 29934 #undef TARGET_FIXED_CONDITION_CODE_REGS 29935 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs 29936 #undef TARGET_CC_MODES_COMPATIBLE 29937 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible 29938 29939 #undef TARGET_MACHINE_DEPENDENT_REORG 29940 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg 29941 29942 #undef TARGET_BUILTIN_SETJMP_FRAME_VALUE 29943 #define TARGET_BUILTIN_SETJMP_FRAME_VALUE ix86_builtin_setjmp_frame_value 29944 29945 #undef TARGET_BUILD_BUILTIN_VA_LIST 29946 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list 29947 29948 #undef TARGET_FN_ABI_VA_LIST 29949 #define TARGET_FN_ABI_VA_LIST ix86_fn_abi_va_list 29950 29951 #undef TARGET_CANONICAL_VA_LIST_TYPE 29952 #define TARGET_CANONICAL_VA_LIST_TYPE ix86_canonical_va_list_type 29953 29954 #undef TARGET_EXPAND_BUILTIN_VA_START 29955 #define TARGET_EXPAND_BUILTIN_VA_START ix86_va_start 29956 29957 #undef TARGET_MD_ASM_CLOBBERS 29958 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers 29959 29960 #undef TARGET_PROMOTE_PROTOTYPES 29961 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true 29962 #undef TARGET_STRUCT_VALUE_RTX 29963 #define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx 29964 #undef TARGET_SETUP_INCOMING_VARARGS 29965 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs 29966 #undef TARGET_MUST_PASS_IN_STACK 29967 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack 29968 #undef TARGET_PASS_BY_REFERENCE 29969 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference 29970 #undef TARGET_INTERNAL_ARG_POINTER 29971 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer 29972 #undef TARGET_UPDATE_STACK_BOUNDARY 29973 #define TARGET_UPDATE_STACK_BOUNDARY ix86_update_stack_boundary 29974 #undef TARGET_GET_DRAP_RTX 29975 #define TARGET_GET_DRAP_RTX ix86_get_drap_rtx 29976 #undef TARGET_DWARF_HANDLE_FRAME_UNSPEC 29977 #define TARGET_DWARF_HANDLE_FRAME_UNSPEC ix86_dwarf_handle_frame_unspec 29978 #undef TARGET_STRICT_ARGUMENT_NAMING 29979 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true 29980 29981 #undef TARGET_GIMPLIFY_VA_ARG_EXPR 29982 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg 29983 29984 #undef TARGET_SCALAR_MODE_SUPPORTED_P 29985 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p 29986 29987 #undef TARGET_VECTOR_MODE_SUPPORTED_P 29988 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p 29989 29990 #undef TARGET_C_MODE_FOR_SUFFIX 29991 #define TARGET_C_MODE_FOR_SUFFIX ix86_c_mode_for_suffix 29992 29993 #ifdef HAVE_AS_TLS 29994 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL 29995 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel 29996 #endif 29997 29998 #ifdef SUBTARGET_INSERT_ATTRIBUTES 29999 #undef TARGET_INSERT_ATTRIBUTES 30000 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES 30001 #endif 30002 30003 #undef TARGET_MANGLE_TYPE 30004 #define TARGET_MANGLE_TYPE ix86_mangle_type 30005 30006 #undef TARGET_STACK_PROTECT_FAIL 30007 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail 30008 30009 #undef TARGET_FUNCTION_VALUE 30010 #define TARGET_FUNCTION_VALUE ix86_function_value 30011 30012 #undef TARGET_SECONDARY_RELOAD 30013 #define TARGET_SECONDARY_RELOAD ix86_secondary_reload 30014 30015 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST 30016 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST x86_builtin_vectorization_cost 30017 30018 #undef TARGET_SET_CURRENT_FUNCTION 30019 #define TARGET_SET_CURRENT_FUNCTION ix86_set_current_function 30020 30021 #undef TARGET_OPTION_VALID_ATTRIBUTE_P 30022 #define TARGET_OPTION_VALID_ATTRIBUTE_P ix86_valid_target_attribute_p 30023 30024 #undef TARGET_OPTION_SAVE 30025 #define TARGET_OPTION_SAVE ix86_function_specific_save 30026 30027 #undef TARGET_OPTION_RESTORE 30028 #define TARGET_OPTION_RESTORE ix86_function_specific_restore 30029 30030 #undef TARGET_OPTION_PRINT 30031 #define TARGET_OPTION_PRINT ix86_function_specific_print 30032 30033 #undef TARGET_OPTION_CAN_INLINE_P 30034 #define TARGET_OPTION_CAN_INLINE_P ix86_can_inline_p 30035 30036 #undef TARGET_EXPAND_TO_RTL_HOOK 30037 #define TARGET_EXPAND_TO_RTL_HOOK ix86_maybe_switch_abi 30038 30039 struct gcc_target targetm = TARGET_INITIALIZER; 30040 30041 #include "gt-i386.h"