1 /* Subroutines used for code generation on IA-32.
   2    Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
   3    2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
   4    Free Software Foundation, Inc.
   5 
   6 This file is part of GCC.
   7 
   8 GCC is free software; you can redistribute it and/or modify
   9 it under the terms of the GNU General Public License as published by
  10 the Free Software Foundation; either version 3, or (at your option)
  11 any later version.
  12 
  13 GCC is distributed in the hope that it will be useful,
  14 but WITHOUT ANY WARRANTY; without even the implied warranty of
  15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  16 GNU General Public License for more details.
  17 
  18 You should have received a copy of the GNU General Public License
  19 along with GCC; see the file COPYING3.  If not see
  20 <http://www.gnu.org/licenses/>.  */
  21 
  22 #include "config.h"
  23 #include "system.h"
  24 #include "coretypes.h"
  25 #include "tm.h"
  26 #include "rtl.h"
  27 #include "tree.h"
  28 #include "tm_p.h"
  29 #include "regs.h"
  30 #include "hard-reg-set.h"
  31 #include "real.h"
  32 #include "insn-config.h"
  33 #include "conditions.h"
  34 #include "output.h"
  35 #include "insn-codes.h"
  36 #include "insn-attr.h"
  37 #include "flags.h"
  38 #include "c-common.h"
  39 #include "except.h"
  40 #include "function.h"
  41 #include "recog.h"
  42 #include "expr.h"
  43 #include "optabs.h"
  44 #include "toplev.h"
  45 #include "basic-block.h"
  46 #include "ggc.h"
  47 #include "target.h"
  48 #include "target-def.h"
  49 #include "langhooks.h"
  50 #include "cgraph.h"
  51 #include "gimple.h"
  52 #include "dwarf2.h"
  53 #include "df.h"
  54 #include "tm-constrs.h"
  55 #include "params.h"
  56 #include "cselib.h"
  57 
  58 static int x86_builtin_vectorization_cost (bool);
  59 static rtx legitimize_dllimport_symbol (rtx, bool);
  60 
  61 #ifndef CHECK_STACK_LIMIT
  62 #define CHECK_STACK_LIMIT (-1)
  63 #endif
  64 
  65 /* Return index of given mode in mult and division cost tables.  */
  66 #define MODE_INDEX(mode)                                        \
  67   ((mode) == QImode ? 0                                         \
  68    : (mode) == HImode ? 1                                       \
  69    : (mode) == SImode ? 2                                       \
  70    : (mode) == DImode ? 3                                       \
  71    : 4)
  72 
  73 /* Processor costs (relative to an add) */
  74 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes.  */
  75 #define COSTS_N_BYTES(N) ((N) * 2)
  76 
  77 #define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall}}}
  78 
  79 const
  80 struct processor_costs ix86_size_cost = {/* costs for tuning for size */
  81   COSTS_N_BYTES (2),                    /* cost of an add instruction */
  82   COSTS_N_BYTES (3),                    /* cost of a lea instruction */
  83   COSTS_N_BYTES (2),                    /* variable shift costs */
  84   COSTS_N_BYTES (3),                    /* constant shift costs */
  85   {COSTS_N_BYTES (3),                   /* cost of starting multiply for QI */
  86    COSTS_N_BYTES (3),                   /*                               HI */
  87    COSTS_N_BYTES (3),                   /*                               SI */
  88    COSTS_N_BYTES (3),                   /*                               DI */
  89    COSTS_N_BYTES (5)},                  /*                            other */
  90   0,                                    /* cost of multiply per each bit set */
  91   {COSTS_N_BYTES (3),                   /* cost of a divide/mod for QI */
  92    COSTS_N_BYTES (3),                   /*                          HI */
  93    COSTS_N_BYTES (3),                   /*                          SI */
  94    COSTS_N_BYTES (3),                   /*                          DI */
  95    COSTS_N_BYTES (5)},                  /*                       other */
  96   COSTS_N_BYTES (3),                    /* cost of movsx */
  97   COSTS_N_BYTES (3),                    /* cost of movzx */
  98   0,                                    /* "large" insn */
  99   2,                                    /* MOVE_RATIO */
 100   2,                                    /* cost for loading QImode using movzbl */
 101   {2, 2, 2},                            /* cost of loading integer registers
 102                                            in QImode, HImode and SImode.
 103                                            Relative to reg-reg move (2).  */
 104   {2, 2, 2},                            /* cost of storing integer registers */
 105   2,                                    /* cost of reg,reg fld/fst */
 106   {2, 2, 2},                            /* cost of loading fp registers
 107                                            in SFmode, DFmode and XFmode */
 108   {2, 2, 2},                            /* cost of storing fp registers
 109                                            in SFmode, DFmode and XFmode */
 110   3,                                    /* cost of moving MMX register */
 111   {3, 3},                               /* cost of loading MMX registers
 112                                            in SImode and DImode */
 113   {3, 3},                               /* cost of storing MMX registers
 114                                            in SImode and DImode */
 115   3,                                    /* cost of moving SSE register */
 116   {3, 3, 3},                            /* cost of loading SSE registers
 117                                            in SImode, DImode and TImode */
 118   {3, 3, 3},                            /* cost of storing SSE registers
 119                                            in SImode, DImode and TImode */
 120   3,                                    /* MMX or SSE register to integer */
 121   0,                                    /* size of l1 cache  */
 122   0,                                    /* size of l2 cache  */
 123   0,                                    /* size of prefetch block */
 124   0,                                    /* number of parallel prefetches */
 125   2,                                    /* Branch cost */
 126   COSTS_N_BYTES (2),                    /* cost of FADD and FSUB insns.  */
 127   COSTS_N_BYTES (2),                    /* cost of FMUL instruction.  */
 128   COSTS_N_BYTES (2),                    /* cost of FDIV instruction.  */
 129   COSTS_N_BYTES (2),                    /* cost of FABS instruction.  */
 130   COSTS_N_BYTES (2),                    /* cost of FCHS instruction.  */
 131   COSTS_N_BYTES (2),                    /* cost of FSQRT instruction.  */
 132   {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
 133    {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}},
 134   {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
 135    {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}},
 136   1,                                    /* scalar_stmt_cost.  */
 137   1,                                    /* scalar load_cost.  */
 138   1,                                    /* scalar_store_cost.  */
 139   1,                                    /* vec_stmt_cost.  */
 140   1,                                    /* vec_to_scalar_cost.  */
 141   1,                                    /* scalar_to_vec_cost.  */
 142   1,                                    /* vec_align_load_cost.  */
 143   1,                                    /* vec_unalign_load_cost.  */
 144   1,                                    /* vec_store_cost.  */
 145   1,                                    /* cond_taken_branch_cost.  */
 146   1,                                    /* cond_not_taken_branch_cost.  */
 147 };
 148 
 149 /* Processor costs (relative to an add) */
 150 static const
 151 struct processor_costs i386_cost = {    /* 386 specific costs */
 152   COSTS_N_INSNS (1),                    /* cost of an add instruction */
 153   COSTS_N_INSNS (1),                    /* cost of a lea instruction */
 154   COSTS_N_INSNS (3),                    /* variable shift costs */
 155   COSTS_N_INSNS (2),                    /* constant shift costs */
 156   {COSTS_N_INSNS (6),                   /* cost of starting multiply for QI */
 157    COSTS_N_INSNS (6),                   /*                               HI */
 158    COSTS_N_INSNS (6),                   /*                               SI */
 159    COSTS_N_INSNS (6),                   /*                               DI */
 160    COSTS_N_INSNS (6)},                  /*                               other */
 161   COSTS_N_INSNS (1),                    /* cost of multiply per each bit set */
 162   {COSTS_N_INSNS (23),                  /* cost of a divide/mod for QI */
 163    COSTS_N_INSNS (23),                  /*                          HI */
 164    COSTS_N_INSNS (23),                  /*                          SI */
 165    COSTS_N_INSNS (23),                  /*                          DI */
 166    COSTS_N_INSNS (23)},                 /*                          other */
 167   COSTS_N_INSNS (3),                    /* cost of movsx */
 168   COSTS_N_INSNS (2),                    /* cost of movzx */
 169   15,                                   /* "large" insn */
 170   3,                                    /* MOVE_RATIO */
 171   4,                                    /* cost for loading QImode using movzbl */
 172   {2, 4, 2},                            /* cost of loading integer registers
 173                                            in QImode, HImode and SImode.
 174                                            Relative to reg-reg move (2).  */
 175   {2, 4, 2},                            /* cost of storing integer registers */
 176   2,                                    /* cost of reg,reg fld/fst */
 177   {8, 8, 8},                            /* cost of loading fp registers
 178                                            in SFmode, DFmode and XFmode */
 179   {8, 8, 8},                            /* cost of storing fp registers
 180                                            in SFmode, DFmode and XFmode */
 181   2,                                    /* cost of moving MMX register */
 182   {4, 8},                               /* cost of loading MMX registers
 183                                            in SImode and DImode */
 184   {4, 8},                               /* cost of storing MMX registers
 185                                            in SImode and DImode */
 186   2,                                    /* cost of moving SSE register */
 187   {4, 8, 16},                           /* cost of loading SSE registers
 188                                            in SImode, DImode and TImode */
 189   {4, 8, 16},                           /* cost of storing SSE registers
 190                                            in SImode, DImode and TImode */
 191   3,                                    /* MMX or SSE register to integer */
 192   0,                                    /* size of l1 cache  */
 193   0,                                    /* size of l2 cache  */
 194   0,                                    /* size of prefetch block */
 195   0,                                    /* number of parallel prefetches */
 196   1,                                    /* Branch cost */
 197   COSTS_N_INSNS (23),                   /* cost of FADD and FSUB insns.  */
 198   COSTS_N_INSNS (27),                   /* cost of FMUL instruction.  */
 199   COSTS_N_INSNS (88),                   /* cost of FDIV instruction.  */
 200   COSTS_N_INSNS (22),                   /* cost of FABS instruction.  */
 201   COSTS_N_INSNS (24),                   /* cost of FCHS instruction.  */
 202   COSTS_N_INSNS (122),                  /* cost of FSQRT instruction.  */
 203   {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
 204    DUMMY_STRINGOP_ALGS},
 205   {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
 206    DUMMY_STRINGOP_ALGS},
 207   1,                                    /* scalar_stmt_cost.  */
 208   1,                                    /* scalar load_cost.  */
 209   1,                                    /* scalar_store_cost.  */
 210   1,                                    /* vec_stmt_cost.  */
 211   1,                                    /* vec_to_scalar_cost.  */
 212   1,                                    /* scalar_to_vec_cost.  */
 213   1,                                    /* vec_align_load_cost.  */
 214   2,                                    /* vec_unalign_load_cost.  */
 215   1,                                    /* vec_store_cost.  */
 216   3,                                    /* cond_taken_branch_cost.  */
 217   1,                                    /* cond_not_taken_branch_cost.  */
 218 };
 219 
 220 static const
 221 struct processor_costs i486_cost = {    /* 486 specific costs */
 222   COSTS_N_INSNS (1),                    /* cost of an add instruction */
 223   COSTS_N_INSNS (1),                    /* cost of a lea instruction */
 224   COSTS_N_INSNS (3),                    /* variable shift costs */
 225   COSTS_N_INSNS (2),                    /* constant shift costs */
 226   {COSTS_N_INSNS (12),                  /* cost of starting multiply for QI */
 227    COSTS_N_INSNS (12),                  /*                               HI */
 228    COSTS_N_INSNS (12),                  /*                               SI */
 229    COSTS_N_INSNS (12),                  /*                               DI */
 230    COSTS_N_INSNS (12)},                 /*                               other */
 231   1,                                    /* cost of multiply per each bit set */
 232   {COSTS_N_INSNS (40),                  /* cost of a divide/mod for QI */
 233    COSTS_N_INSNS (40),                  /*                          HI */
 234    COSTS_N_INSNS (40),                  /*                          SI */
 235    COSTS_N_INSNS (40),                  /*                          DI */
 236    COSTS_N_INSNS (40)},                 /*                          other */
 237   COSTS_N_INSNS (3),                    /* cost of movsx */
 238   COSTS_N_INSNS (2),                    /* cost of movzx */
 239   15,                                   /* "large" insn */
 240   3,                                    /* MOVE_RATIO */
 241   4,                                    /* cost for loading QImode using movzbl */
 242   {2, 4, 2},                            /* cost of loading integer registers
 243                                            in QImode, HImode and SImode.
 244                                            Relative to reg-reg move (2).  */
 245   {2, 4, 2},                            /* cost of storing integer registers */
 246   2,                                    /* cost of reg,reg fld/fst */
 247   {8, 8, 8},                            /* cost of loading fp registers
 248                                            in SFmode, DFmode and XFmode */
 249   {8, 8, 8},                            /* cost of storing fp registers
 250                                            in SFmode, DFmode and XFmode */
 251   2,                                    /* cost of moving MMX register */
 252   {4, 8},                               /* cost of loading MMX registers
 253                                            in SImode and DImode */
 254   {4, 8},                               /* cost of storing MMX registers
 255                                            in SImode and DImode */
 256   2,                                    /* cost of moving SSE register */
 257   {4, 8, 16},                           /* cost of loading SSE registers
 258                                            in SImode, DImode and TImode */
 259   {4, 8, 16},                           /* cost of storing SSE registers
 260                                            in SImode, DImode and TImode */
 261   3,                                    /* MMX or SSE register to integer */
 262   4,                                    /* size of l1 cache.  486 has 8kB cache
 263                                            shared for code and data, so 4kB is
 264                                            not really precise.  */
 265   4,                                    /* size of l2 cache  */
 266   0,                                    /* size of prefetch block */
 267   0,                                    /* number of parallel prefetches */
 268   1,                                    /* Branch cost */
 269   COSTS_N_INSNS (8),                    /* cost of FADD and FSUB insns.  */
 270   COSTS_N_INSNS (16),                   /* cost of FMUL instruction.  */
 271   COSTS_N_INSNS (73),                   /* cost of FDIV instruction.  */
 272   COSTS_N_INSNS (3),                    /* cost of FABS instruction.  */
 273   COSTS_N_INSNS (3),                    /* cost of FCHS instruction.  */
 274   COSTS_N_INSNS (83),                   /* cost of FSQRT instruction.  */
 275   {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
 276    DUMMY_STRINGOP_ALGS},
 277   {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
 278    DUMMY_STRINGOP_ALGS},
 279   1,                                    /* scalar_stmt_cost.  */
 280   1,                                    /* scalar load_cost.  */
 281   1,                                    /* scalar_store_cost.  */
 282   1,                                    /* vec_stmt_cost.  */
 283   1,                                    /* vec_to_scalar_cost.  */
 284   1,                                    /* scalar_to_vec_cost.  */
 285   1,                                    /* vec_align_load_cost.  */
 286   2,                                    /* vec_unalign_load_cost.  */
 287   1,                                    /* vec_store_cost.  */
 288   3,                                    /* cond_taken_branch_cost.  */
 289   1,                                    /* cond_not_taken_branch_cost.  */
 290 };
 291 
 292 static const
 293 struct processor_costs pentium_cost = {
 294   COSTS_N_INSNS (1),                    /* cost of an add instruction */
 295   COSTS_N_INSNS (1),                    /* cost of a lea instruction */
 296   COSTS_N_INSNS (4),                    /* variable shift costs */
 297   COSTS_N_INSNS (1),                    /* constant shift costs */
 298   {COSTS_N_INSNS (11),                  /* cost of starting multiply for QI */
 299    COSTS_N_INSNS (11),                  /*                               HI */
 300    COSTS_N_INSNS (11),                  /*                               SI */
 301    COSTS_N_INSNS (11),                  /*                               DI */
 302    COSTS_N_INSNS (11)},                 /*                               other */
 303   0,                                    /* cost of multiply per each bit set */
 304   {COSTS_N_INSNS (25),                  /* cost of a divide/mod for QI */
 305    COSTS_N_INSNS (25),                  /*                          HI */
 306    COSTS_N_INSNS (25),                  /*                          SI */
 307    COSTS_N_INSNS (25),                  /*                          DI */
 308    COSTS_N_INSNS (25)},                 /*                          other */
 309   COSTS_N_INSNS (3),                    /* cost of movsx */
 310   COSTS_N_INSNS (2),                    /* cost of movzx */
 311   8,                                    /* "large" insn */
 312   6,                                    /* MOVE_RATIO */
 313   6,                                    /* cost for loading QImode using movzbl */
 314   {2, 4, 2},                            /* cost of loading integer registers
 315                                            in QImode, HImode and SImode.
 316                                            Relative to reg-reg move (2).  */
 317   {2, 4, 2},                            /* cost of storing integer registers */
 318   2,                                    /* cost of reg,reg fld/fst */
 319   {2, 2, 6},                            /* cost of loading fp registers
 320                                            in SFmode, DFmode and XFmode */
 321   {4, 4, 6},                            /* cost of storing fp registers
 322                                            in SFmode, DFmode and XFmode */
 323   8,                                    /* cost of moving MMX register */
 324   {8, 8},                               /* cost of loading MMX registers
 325                                            in SImode and DImode */
 326   {8, 8},                               /* cost of storing MMX registers
 327                                            in SImode and DImode */
 328   2,                                    /* cost of moving SSE register */
 329   {4, 8, 16},                           /* cost of loading SSE registers
 330                                            in SImode, DImode and TImode */
 331   {4, 8, 16},                           /* cost of storing SSE registers
 332                                            in SImode, DImode and TImode */
 333   3,                                    /* MMX or SSE register to integer */
 334   8,                                    /* size of l1 cache.  */
 335   8,                                    /* size of l2 cache  */
 336   0,                                    /* size of prefetch block */
 337   0,                                    /* number of parallel prefetches */
 338   2,                                    /* Branch cost */
 339   COSTS_N_INSNS (3),                    /* cost of FADD and FSUB insns.  */
 340   COSTS_N_INSNS (3),                    /* cost of FMUL instruction.  */
 341   COSTS_N_INSNS (39),                   /* cost of FDIV instruction.  */
 342   COSTS_N_INSNS (1),                    /* cost of FABS instruction.  */
 343   COSTS_N_INSNS (1),                    /* cost of FCHS instruction.  */
 344   COSTS_N_INSNS (70),                   /* cost of FSQRT instruction.  */
 345   {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
 346    DUMMY_STRINGOP_ALGS},
 347   {{libcall, {{-1, rep_prefix_4_byte}}},
 348    DUMMY_STRINGOP_ALGS},
 349   1,                                    /* scalar_stmt_cost.  */
 350   1,                                    /* scalar load_cost.  */
 351   1,                                    /* scalar_store_cost.  */
 352   1,                                    /* vec_stmt_cost.  */
 353   1,                                    /* vec_to_scalar_cost.  */
 354   1,                                    /* scalar_to_vec_cost.  */
 355   1,                                    /* vec_align_load_cost.  */
 356   2,                                    /* vec_unalign_load_cost.  */
 357   1,                                    /* vec_store_cost.  */
 358   3,                                    /* cond_taken_branch_cost.  */
 359   1,                                    /* cond_not_taken_branch_cost.  */
 360 };
 361 
 362 static const
 363 struct processor_costs pentiumpro_cost = {
 364   COSTS_N_INSNS (1),                    /* cost of an add instruction */
 365   COSTS_N_INSNS (1),                    /* cost of a lea instruction */
 366   COSTS_N_INSNS (1),                    /* variable shift costs */
 367   COSTS_N_INSNS (1),                    /* constant shift costs */
 368   {COSTS_N_INSNS (4),                   /* cost of starting multiply for QI */
 369    COSTS_N_INSNS (4),                   /*                               HI */
 370    COSTS_N_INSNS (4),                   /*                               SI */
 371    COSTS_N_INSNS (4),                   /*                               DI */
 372    COSTS_N_INSNS (4)},                  /*                               other */
 373   0,                                    /* cost of multiply per each bit set */
 374   {COSTS_N_INSNS (17),                  /* cost of a divide/mod for QI */
 375    COSTS_N_INSNS (17),                  /*                          HI */
 376    COSTS_N_INSNS (17),                  /*                          SI */
 377    COSTS_N_INSNS (17),                  /*                          DI */
 378    COSTS_N_INSNS (17)},                 /*                          other */
 379   COSTS_N_INSNS (1),                    /* cost of movsx */
 380   COSTS_N_INSNS (1),                    /* cost of movzx */
 381   8,                                    /* "large" insn */
 382   6,                                    /* MOVE_RATIO */
 383   2,                                    /* cost for loading QImode using movzbl */
 384   {4, 4, 4},                            /* cost of loading integer registers
 385                                            in QImode, HImode and SImode.
 386                                            Relative to reg-reg move (2).  */
 387   {2, 2, 2},                            /* cost of storing integer registers */
 388   2,                                    /* cost of reg,reg fld/fst */
 389   {2, 2, 6},                            /* cost of loading fp registers
 390                                            in SFmode, DFmode and XFmode */
 391   {4, 4, 6},                            /* cost of storing fp registers
 392                                            in SFmode, DFmode and XFmode */
 393   2,                                    /* cost of moving MMX register */
 394   {2, 2},                               /* cost of loading MMX registers
 395                                            in SImode and DImode */
 396   {2, 2},                               /* cost of storing MMX registers
 397                                            in SImode and DImode */
 398   2,                                    /* cost of moving SSE register */
 399   {2, 2, 8},                            /* cost of loading SSE registers
 400                                            in SImode, DImode and TImode */
 401   {2, 2, 8},                            /* cost of storing SSE registers
 402                                            in SImode, DImode and TImode */
 403   3,                                    /* MMX or SSE register to integer */
 404   8,                                    /* size of l1 cache.  */
 405   256,                                  /* size of l2 cache  */
 406   32,                                   /* size of prefetch block */
 407   6,                                    /* number of parallel prefetches */
 408   2,                                    /* Branch cost */
 409   COSTS_N_INSNS (3),                    /* cost of FADD and FSUB insns.  */
 410   COSTS_N_INSNS (5),                    /* cost of FMUL instruction.  */
 411   COSTS_N_INSNS (56),                   /* cost of FDIV instruction.  */
 412   COSTS_N_INSNS (2),                    /* cost of FABS instruction.  */
 413   COSTS_N_INSNS (2),                    /* cost of FCHS instruction.  */
 414   COSTS_N_INSNS (56),                   /* cost of FSQRT instruction.  */
 415   /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes (we ensure
 416      the alignment).  For small blocks inline loop is still a noticeable win, for bigger
 417      blocks either rep movsl or rep movsb is way to go.  Rep movsb has apparently
 418      more expensive startup time in CPU, but after 4K the difference is down in the noise.
 419    */
 420   {{rep_prefix_4_byte, {{128, loop}, {1024, unrolled_loop},
 421                         {8192, rep_prefix_4_byte}, {-1, rep_prefix_1_byte}}},
 422    DUMMY_STRINGOP_ALGS},
 423   {{rep_prefix_4_byte, {{1024, unrolled_loop},
 424                         {8192, rep_prefix_4_byte}, {-1, libcall}}},
 425    DUMMY_STRINGOP_ALGS},
 426   1,                                    /* scalar_stmt_cost.  */
 427   1,                                    /* scalar load_cost.  */
 428   1,                                    /* scalar_store_cost.  */
 429   1,                                    /* vec_stmt_cost.  */
 430   1,                                    /* vec_to_scalar_cost.  */
 431   1,                                    /* scalar_to_vec_cost.  */
 432   1,                                    /* vec_align_load_cost.  */
 433   2,                                    /* vec_unalign_load_cost.  */
 434   1,                                    /* vec_store_cost.  */
 435   3,                                    /* cond_taken_branch_cost.  */
 436   1,                                    /* cond_not_taken_branch_cost.  */
 437 };
 438 
 439 static const
 440 struct processor_costs geode_cost = {
 441   COSTS_N_INSNS (1),                    /* cost of an add instruction */
 442   COSTS_N_INSNS (1),                    /* cost of a lea instruction */
 443   COSTS_N_INSNS (2),                    /* variable shift costs */
 444   COSTS_N_INSNS (1),                    /* constant shift costs */
 445   {COSTS_N_INSNS (3),                   /* cost of starting multiply for QI */
 446    COSTS_N_INSNS (4),                   /*                               HI */
 447    COSTS_N_INSNS (7),                   /*                               SI */
 448    COSTS_N_INSNS (7),                   /*                               DI */
 449    COSTS_N_INSNS (7)},                  /*                               other */
 450   0,                                    /* cost of multiply per each bit set */
 451   {COSTS_N_INSNS (15),                  /* cost of a divide/mod for QI */
 452    COSTS_N_INSNS (23),                  /*                          HI */
 453    COSTS_N_INSNS (39),                  /*                          SI */
 454    COSTS_N_INSNS (39),                  /*                          DI */
 455    COSTS_N_INSNS (39)},                 /*                          other */
 456   COSTS_N_INSNS (1),                    /* cost of movsx */
 457   COSTS_N_INSNS (1),                    /* cost of movzx */
 458   8,                                    /* "large" insn */
 459   4,                                    /* MOVE_RATIO */
 460   1,                                    /* cost for loading QImode using movzbl */
 461   {1, 1, 1},                            /* cost of loading integer registers
 462                                            in QImode, HImode and SImode.
 463                                            Relative to reg-reg move (2).  */
 464   {1, 1, 1},                            /* cost of storing integer registers */
 465   1,                                    /* cost of reg,reg fld/fst */
 466   {1, 1, 1},                            /* cost of loading fp registers
 467                                            in SFmode, DFmode and XFmode */
 468   {4, 6, 6},                            /* cost of storing fp registers
 469                                            in SFmode, DFmode and XFmode */
 470 
 471   1,                                    /* cost of moving MMX register */
 472   {1, 1},                               /* cost of loading MMX registers
 473                                            in SImode and DImode */
 474   {1, 1},                               /* cost of storing MMX registers
 475                                            in SImode and DImode */
 476   1,                                    /* cost of moving SSE register */
 477   {1, 1, 1},                            /* cost of loading SSE registers
 478                                            in SImode, DImode and TImode */
 479   {1, 1, 1},                            /* cost of storing SSE registers
 480                                            in SImode, DImode and TImode */
 481   1,                                    /* MMX or SSE register to integer */
 482   64,                                   /* size of l1 cache.  */
 483   128,                                  /* size of l2 cache.  */
 484   32,                                   /* size of prefetch block */
 485   1,                                    /* number of parallel prefetches */
 486   1,                                    /* Branch cost */
 487   COSTS_N_INSNS (6),                    /* cost of FADD and FSUB insns.  */
 488   COSTS_N_INSNS (11),                   /* cost of FMUL instruction.  */
 489   COSTS_N_INSNS (47),                   /* cost of FDIV instruction.  */
 490   COSTS_N_INSNS (1),                    /* cost of FABS instruction.  */
 491   COSTS_N_INSNS (1),                    /* cost of FCHS instruction.  */
 492   COSTS_N_INSNS (54),                   /* cost of FSQRT instruction.  */
 493   {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
 494    DUMMY_STRINGOP_ALGS},
 495   {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
 496    DUMMY_STRINGOP_ALGS},
 497   1,                                    /* scalar_stmt_cost.  */
 498   1,                                    /* scalar load_cost.  */
 499   1,                                    /* scalar_store_cost.  */
 500   1,                                    /* vec_stmt_cost.  */
 501   1,                                    /* vec_to_scalar_cost.  */
 502   1,                                    /* scalar_to_vec_cost.  */
 503   1,                                    /* vec_align_load_cost.  */
 504   2,                                    /* vec_unalign_load_cost.  */
 505   1,                                    /* vec_store_cost.  */
 506   3,                                    /* cond_taken_branch_cost.  */
 507   1,                                    /* cond_not_taken_branch_cost.  */
 508 };
 509 
 510 static const
 511 struct processor_costs k6_cost = {
 512   COSTS_N_INSNS (1),                    /* cost of an add instruction */
 513   COSTS_N_INSNS (2),                    /* cost of a lea instruction */
 514   COSTS_N_INSNS (1),                    /* variable shift costs */
 515   COSTS_N_INSNS (1),                    /* constant shift costs */
 516   {COSTS_N_INSNS (3),                   /* cost of starting multiply for QI */
 517    COSTS_N_INSNS (3),                   /*                               HI */
 518    COSTS_N_INSNS (3),                   /*                               SI */
 519    COSTS_N_INSNS (3),                   /*                               DI */
 520    COSTS_N_INSNS (3)},                  /*                               other */
 521   0,                                    /* cost of multiply per each bit set */
 522   {COSTS_N_INSNS (18),                  /* cost of a divide/mod for QI */
 523    COSTS_N_INSNS (18),                  /*                          HI */
 524    COSTS_N_INSNS (18),                  /*                          SI */
 525    COSTS_N_INSNS (18),                  /*                          DI */
 526    COSTS_N_INSNS (18)},                 /*                          other */
 527   COSTS_N_INSNS (2),                    /* cost of movsx */
 528   COSTS_N_INSNS (2),                    /* cost of movzx */
 529   8,                                    /* "large" insn */
 530   4,                                    /* MOVE_RATIO */
 531   3,                                    /* cost for loading QImode using movzbl */
 532   {4, 5, 4},                            /* cost of loading integer registers
 533                                            in QImode, HImode and SImode.
 534                                            Relative to reg-reg move (2).  */
 535   {2, 3, 2},                            /* cost of storing integer registers */
 536   4,                                    /* cost of reg,reg fld/fst */
 537   {6, 6, 6},                            /* cost of loading fp registers
 538                                            in SFmode, DFmode and XFmode */
 539   {4, 4, 4},                            /* cost of storing fp registers
 540                                            in SFmode, DFmode and XFmode */
 541   2,                                    /* cost of moving MMX register */
 542   {2, 2},                               /* cost of loading MMX registers
 543                                            in SImode and DImode */
 544   {2, 2},                               /* cost of storing MMX registers
 545                                            in SImode and DImode */
 546   2,                                    /* cost of moving SSE register */
 547   {2, 2, 8},                            /* cost of loading SSE registers
 548                                            in SImode, DImode and TImode */
 549   {2, 2, 8},                            /* cost of storing SSE registers
 550                                            in SImode, DImode and TImode */
 551   6,                                    /* MMX or SSE register to integer */
 552   32,                                   /* size of l1 cache.  */
 553   32,                                   /* size of l2 cache.  Some models
 554                                            have integrated l2 cache, but
 555                                            optimizing for k6 is not important
 556                                            enough to worry about that.  */
 557   32,                                   /* size of prefetch block */
 558   1,                                    /* number of parallel prefetches */
 559   1,                                    /* Branch cost */
 560   COSTS_N_INSNS (2),                    /* cost of FADD and FSUB insns.  */
 561   COSTS_N_INSNS (2),                    /* cost of FMUL instruction.  */
 562   COSTS_N_INSNS (56),                   /* cost of FDIV instruction.  */
 563   COSTS_N_INSNS (2),                    /* cost of FABS instruction.  */
 564   COSTS_N_INSNS (2),                    /* cost of FCHS instruction.  */
 565   COSTS_N_INSNS (56),                   /* cost of FSQRT instruction.  */
 566   {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
 567    DUMMY_STRINGOP_ALGS},
 568   {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
 569    DUMMY_STRINGOP_ALGS},
 570   1,                                    /* scalar_stmt_cost.  */
 571   1,                                    /* scalar load_cost.  */
 572   1,                                    /* scalar_store_cost.  */
 573   1,                                    /* vec_stmt_cost.  */
 574   1,                                    /* vec_to_scalar_cost.  */
 575   1,                                    /* scalar_to_vec_cost.  */
 576   1,                                    /* vec_align_load_cost.  */
 577   2,                                    /* vec_unalign_load_cost.  */
 578   1,                                    /* vec_store_cost.  */
 579   3,                                    /* cond_taken_branch_cost.  */
 580   1,                                    /* cond_not_taken_branch_cost.  */
 581 };
 582 
 583 static const
 584 struct processor_costs athlon_cost = {
 585   COSTS_N_INSNS (1),                    /* cost of an add instruction */
 586   COSTS_N_INSNS (2),                    /* cost of a lea instruction */
 587   COSTS_N_INSNS (1),                    /* variable shift costs */
 588   COSTS_N_INSNS (1),                    /* constant shift costs */
 589   {COSTS_N_INSNS (5),                   /* cost of starting multiply for QI */
 590    COSTS_N_INSNS (5),                   /*                               HI */
 591    COSTS_N_INSNS (5),                   /*                               SI */
 592    COSTS_N_INSNS (5),                   /*                               DI */
 593    COSTS_N_INSNS (5)},                  /*                               other */
 594   0,                                    /* cost of multiply per each bit set */
 595   {COSTS_N_INSNS (18),                  /* cost of a divide/mod for QI */
 596    COSTS_N_INSNS (26),                  /*                          HI */
 597    COSTS_N_INSNS (42),                  /*                          SI */
 598    COSTS_N_INSNS (74),                  /*                          DI */
 599    COSTS_N_INSNS (74)},                 /*                          other */
 600   COSTS_N_INSNS (1),                    /* cost of movsx */
 601   COSTS_N_INSNS (1),                    /* cost of movzx */
 602   8,                                    /* "large" insn */
 603   9,                                    /* MOVE_RATIO */
 604   4,                                    /* cost for loading QImode using movzbl */
 605   {3, 4, 3},                            /* cost of loading integer registers
 606                                            in QImode, HImode and SImode.
 607                                            Relative to reg-reg move (2).  */
 608   {3, 4, 3},                            /* cost of storing integer registers */
 609   4,                                    /* cost of reg,reg fld/fst */
 610   {4, 4, 12},                           /* cost of loading fp registers
 611                                            in SFmode, DFmode and XFmode */
 612   {6, 6, 8},                            /* cost of storing fp registers
 613                                            in SFmode, DFmode and XFmode */
 614   2,                                    /* cost of moving MMX register */
 615   {4, 4},                               /* cost of loading MMX registers
 616                                            in SImode and DImode */
 617   {4, 4},                               /* cost of storing MMX registers
 618                                            in SImode and DImode */
 619   2,                                    /* cost of moving SSE register */
 620   {4, 4, 6},                            /* cost of loading SSE registers
 621                                            in SImode, DImode and TImode */
 622   {4, 4, 5},                            /* cost of storing SSE registers
 623                                            in SImode, DImode and TImode */
 624   5,                                    /* MMX or SSE register to integer */
 625   64,                                   /* size of l1 cache.  */
 626   256,                                  /* size of l2 cache.  */
 627   64,                                   /* size of prefetch block */
 628   6,                                    /* number of parallel prefetches */
 629   5,                                    /* Branch cost */
 630   COSTS_N_INSNS (4),                    /* cost of FADD and FSUB insns.  */
 631   COSTS_N_INSNS (4),                    /* cost of FMUL instruction.  */
 632   COSTS_N_INSNS (24),                   /* cost of FDIV instruction.  */
 633   COSTS_N_INSNS (2),                    /* cost of FABS instruction.  */
 634   COSTS_N_INSNS (2),                    /* cost of FCHS instruction.  */
 635   COSTS_N_INSNS (35),                   /* cost of FSQRT instruction.  */
 636   /* For some reason, Athlon deals better with REP prefix (relative to loops)
 637      compared to K8. Alignment becomes important after 8 bytes for memcpy and
 638      128 bytes for memset.  */
 639   {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
 640    DUMMY_STRINGOP_ALGS},
 641   {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
 642    DUMMY_STRINGOP_ALGS},
 643   1,                                    /* scalar_stmt_cost.  */
 644   1,                                    /* scalar load_cost.  */
 645   1,                                    /* scalar_store_cost.  */
 646   1,                                    /* vec_stmt_cost.  */
 647   1,                                    /* vec_to_scalar_cost.  */
 648   1,                                    /* scalar_to_vec_cost.  */
 649   1,                                    /* vec_align_load_cost.  */
 650   2,                                    /* vec_unalign_load_cost.  */
 651   1,                                    /* vec_store_cost.  */
 652   3,                                    /* cond_taken_branch_cost.  */
 653   1,                                    /* cond_not_taken_branch_cost.  */
 654 };
 655 
 656 static const
 657 struct processor_costs k8_cost = {
 658   COSTS_N_INSNS (1),                    /* cost of an add instruction */
 659   COSTS_N_INSNS (2),                    /* cost of a lea instruction */
 660   COSTS_N_INSNS (1),                    /* variable shift costs */
 661   COSTS_N_INSNS (1),                    /* constant shift costs */
 662   {COSTS_N_INSNS (3),                   /* cost of starting multiply for QI */
 663    COSTS_N_INSNS (4),                   /*                               HI */
 664    COSTS_N_INSNS (3),                   /*                               SI */
 665    COSTS_N_INSNS (4),                   /*                               DI */
 666    COSTS_N_INSNS (5)},                  /*                               other */
 667   0,                                    /* cost of multiply per each bit set */
 668   {COSTS_N_INSNS (18),                  /* cost of a divide/mod for QI */
 669    COSTS_N_INSNS (26),                  /*                          HI */
 670    COSTS_N_INSNS (42),                  /*                          SI */
 671    COSTS_N_INSNS (74),                  /*                          DI */
 672    COSTS_N_INSNS (74)},                 /*                          other */
 673   COSTS_N_INSNS (1),                    /* cost of movsx */
 674   COSTS_N_INSNS (1),                    /* cost of movzx */
 675   8,                                    /* "large" insn */
 676   9,                                    /* MOVE_RATIO */
 677   4,                                    /* cost for loading QImode using movzbl */
 678   {3, 4, 3},                            /* cost of loading integer registers
 679                                            in QImode, HImode and SImode.
 680                                            Relative to reg-reg move (2).  */
 681   {3, 4, 3},                            /* cost of storing integer registers */
 682   4,                                    /* cost of reg,reg fld/fst */
 683   {4, 4, 12},                           /* cost of loading fp registers
 684                                            in SFmode, DFmode and XFmode */
 685   {6, 6, 8},                            /* cost of storing fp registers
 686                                            in SFmode, DFmode and XFmode */
 687   2,                                    /* cost of moving MMX register */
 688   {3, 3},                               /* cost of loading MMX registers
 689                                            in SImode and DImode */
 690   {4, 4},                               /* cost of storing MMX registers
 691                                            in SImode and DImode */
 692   2,                                    /* cost of moving SSE register */
 693   {4, 3, 6},                            /* cost of loading SSE registers
 694                                            in SImode, DImode and TImode */
 695   {4, 4, 5},                            /* cost of storing SSE registers
 696                                            in SImode, DImode and TImode */
 697   5,                                    /* MMX or SSE register to integer */
 698   64,                                   /* size of l1 cache.  */
 699   512,                                  /* size of l2 cache.  */
 700   64,                                   /* size of prefetch block */
 701   /* New AMD processors never drop prefetches; if they cannot be performed
 702      immediately, they are queued.  We set number of simultaneous prefetches
 703      to a large constant to reflect this (it probably is not a good idea not
 704      to limit number of prefetches at all, as their execution also takes some
 705      time).  */
 706   100,                                  /* number of parallel prefetches */
 707   3,                                    /* Branch cost */
 708   COSTS_N_INSNS (4),                    /* cost of FADD and FSUB insns.  */
 709   COSTS_N_INSNS (4),                    /* cost of FMUL instruction.  */
 710   COSTS_N_INSNS (19),                   /* cost of FDIV instruction.  */
 711   COSTS_N_INSNS (2),                    /* cost of FABS instruction.  */
 712   COSTS_N_INSNS (2),                    /* cost of FCHS instruction.  */
 713   COSTS_N_INSNS (35),                   /* cost of FSQRT instruction.  */
 714   /* K8 has optimized REP instruction for medium sized blocks, but for very small
 715      blocks it is better to use loop. For large blocks, libcall can do
 716      nontemporary accesses and beat inline considerably.  */
 717   {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
 718    {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
 719   {{libcall, {{8, loop}, {24, unrolled_loop},
 720               {2048, rep_prefix_4_byte}, {-1, libcall}}},
 721    {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
 722   4,                                    /* scalar_stmt_cost.  */
 723   2,                                    /* scalar load_cost.  */
 724   2,                                    /* scalar_store_cost.  */
 725   5,                                    /* vec_stmt_cost.  */
 726   0,                                    /* vec_to_scalar_cost.  */
 727   2,                                    /* scalar_to_vec_cost.  */
 728   2,                                    /* vec_align_load_cost.  */
 729   3,                                    /* vec_unalign_load_cost.  */
 730   3,                                    /* vec_store_cost.  */
 731   3,                                    /* cond_taken_branch_cost.  */
 732   2,                                    /* cond_not_taken_branch_cost.  */
 733 };
 734 
 735 struct processor_costs amdfam10_cost = {
 736   COSTS_N_INSNS (1),                    /* cost of an add instruction */
 737   COSTS_N_INSNS (2),                    /* cost of a lea instruction */
 738   COSTS_N_INSNS (1),                    /* variable shift costs */
 739   COSTS_N_INSNS (1),                    /* constant shift costs */
 740   {COSTS_N_INSNS (3),                   /* cost of starting multiply for QI */
 741    COSTS_N_INSNS (4),                   /*                               HI */
 742    COSTS_N_INSNS (3),                   /*                               SI */
 743    COSTS_N_INSNS (4),                   /*                               DI */
 744    COSTS_N_INSNS (5)},                  /*                               other */
 745   0,                                    /* cost of multiply per each bit set */
 746   {COSTS_N_INSNS (19),                  /* cost of a divide/mod for QI */
 747    COSTS_N_INSNS (35),                  /*                          HI */
 748    COSTS_N_INSNS (51),                  /*                          SI */
 749    COSTS_N_INSNS (83),                  /*                          DI */
 750    COSTS_N_INSNS (83)},                 /*                          other */
 751   COSTS_N_INSNS (1),                    /* cost of movsx */
 752   COSTS_N_INSNS (1),                    /* cost of movzx */
 753   8,                                    /* "large" insn */
 754   9,                                    /* MOVE_RATIO */
 755   4,                                    /* cost for loading QImode using movzbl */
 756   {3, 4, 3},                            /* cost of loading integer registers
 757                                            in QImode, HImode and SImode.
 758                                            Relative to reg-reg move (2).  */
 759   {3, 4, 3},                            /* cost of storing integer registers */
 760   4,                                    /* cost of reg,reg fld/fst */
 761   {4, 4, 12},                           /* cost of loading fp registers
 762                                            in SFmode, DFmode and XFmode */
 763   {6, 6, 8},                            /* cost of storing fp registers
 764                                            in SFmode, DFmode and XFmode */
 765   2,                                    /* cost of moving MMX register */
 766   {3, 3},                               /* cost of loading MMX registers
 767                                            in SImode and DImode */
 768   {4, 4},                               /* cost of storing MMX registers
 769                                            in SImode and DImode */
 770   2,                                    /* cost of moving SSE register */
 771   {4, 4, 3},                            /* cost of loading SSE registers
 772                                            in SImode, DImode and TImode */
 773   {4, 4, 5},                            /* cost of storing SSE registers
 774                                            in SImode, DImode and TImode */
 775   3,                                    /* MMX or SSE register to integer */
 776                                         /* On K8
 777                                             MOVD reg64, xmmreg  Double  FSTORE 4
 778                                             MOVD reg32, xmmreg  Double  FSTORE 4
 779                                            On AMDFAM10
 780                                             MOVD reg64, xmmreg  Double  FADD 3
 781                                                                 1/1  1/1
 782                                             MOVD reg32, xmmreg  Double  FADD 3
 783                                                                 1/1  1/1 */
 784   64,                                   /* size of l1 cache.  */
 785   512,                                  /* size of l2 cache.  */
 786   64,                                   /* size of prefetch block */
 787   /* New AMD processors never drop prefetches; if they cannot be performed
 788      immediately, they are queued.  We set number of simultaneous prefetches
 789      to a large constant to reflect this (it probably is not a good idea not
 790      to limit number of prefetches at all, as their execution also takes some
 791      time).  */
 792   100,                                  /* number of parallel prefetches */
 793   2,                                    /* Branch cost */
 794   COSTS_N_INSNS (4),                    /* cost of FADD and FSUB insns.  */
 795   COSTS_N_INSNS (4),                    /* cost of FMUL instruction.  */
 796   COSTS_N_INSNS (19),                   /* cost of FDIV instruction.  */
 797   COSTS_N_INSNS (2),                    /* cost of FABS instruction.  */
 798   COSTS_N_INSNS (2),                    /* cost of FCHS instruction.  */
 799   COSTS_N_INSNS (35),                   /* cost of FSQRT instruction.  */
 800 
 801   /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
 802      very small blocks it is better to use loop. For large blocks, libcall can
 803      do nontemporary accesses and beat inline considerably.  */
 804   {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
 805    {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
 806   {{libcall, {{8, loop}, {24, unrolled_loop},
 807               {2048, rep_prefix_4_byte}, {-1, libcall}}},
 808    {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
 809   4,                                    /* scalar_stmt_cost.  */
 810   2,                                    /* scalar load_cost.  */
 811   2,                                    /* scalar_store_cost.  */
 812   6,                                    /* vec_stmt_cost.  */
 813   0,                                    /* vec_to_scalar_cost.  */
 814   2,                                    /* scalar_to_vec_cost.  */
 815   2,                                    /* vec_align_load_cost.  */
 816   2,                                    /* vec_unalign_load_cost.  */
 817   2,                                    /* vec_store_cost.  */
 818   2,                                    /* cond_taken_branch_cost.  */
 819   1,                                    /* cond_not_taken_branch_cost.  */
 820 };
 821 
 822 static const
 823 struct processor_costs pentium4_cost = {
 824   COSTS_N_INSNS (1),                    /* cost of an add instruction */
 825   COSTS_N_INSNS (3),                    /* cost of a lea instruction */
 826   COSTS_N_INSNS (4),                    /* variable shift costs */
 827   COSTS_N_INSNS (4),                    /* constant shift costs */
 828   {COSTS_N_INSNS (15),                  /* cost of starting multiply for QI */
 829    COSTS_N_INSNS (15),                  /*                               HI */
 830    COSTS_N_INSNS (15),                  /*                               SI */
 831    COSTS_N_INSNS (15),                  /*                               DI */
 832    COSTS_N_INSNS (15)},                 /*                               other */
 833   0,                                    /* cost of multiply per each bit set */
 834   {COSTS_N_INSNS (56),                  /* cost of a divide/mod for QI */
 835    COSTS_N_INSNS (56),                  /*                          HI */
 836    COSTS_N_INSNS (56),                  /*                          SI */
 837    COSTS_N_INSNS (56),                  /*                          DI */
 838    COSTS_N_INSNS (56)},                 /*                          other */
 839   COSTS_N_INSNS (1),                    /* cost of movsx */
 840   COSTS_N_INSNS (1),                    /* cost of movzx */
 841   16,                                   /* "large" insn */
 842   6,                                    /* MOVE_RATIO */
 843   2,                                    /* cost for loading QImode using movzbl */
 844   {4, 5, 4},                            /* cost of loading integer registers
 845                                            in QImode, HImode and SImode.
 846                                            Relative to reg-reg move (2).  */
 847   {2, 3, 2},                            /* cost of storing integer registers */
 848   2,                                    /* cost of reg,reg fld/fst */
 849   {2, 2, 6},                            /* cost of loading fp registers
 850                                            in SFmode, DFmode and XFmode */
 851   {4, 4, 6},                            /* cost of storing fp registers
 852                                            in SFmode, DFmode and XFmode */
 853   2,                                    /* cost of moving MMX register */
 854   {2, 2},                               /* cost of loading MMX registers
 855                                            in SImode and DImode */
 856   {2, 2},                               /* cost of storing MMX registers
 857                                            in SImode and DImode */
 858   12,                                   /* cost of moving SSE register */
 859   {12, 12, 12},                         /* cost of loading SSE registers
 860                                            in SImode, DImode and TImode */
 861   {2, 2, 8},                            /* cost of storing SSE registers
 862                                            in SImode, DImode and TImode */
 863   10,                                   /* MMX or SSE register to integer */
 864   8,                                    /* size of l1 cache.  */
 865   256,                                  /* size of l2 cache.  */
 866   64,                                   /* size of prefetch block */
 867   6,                                    /* number of parallel prefetches */
 868   2,                                    /* Branch cost */
 869   COSTS_N_INSNS (5),                    /* cost of FADD and FSUB insns.  */
 870   COSTS_N_INSNS (7),                    /* cost of FMUL instruction.  */
 871   COSTS_N_INSNS (43),                   /* cost of FDIV instruction.  */
 872   COSTS_N_INSNS (2),                    /* cost of FABS instruction.  */
 873   COSTS_N_INSNS (2),                    /* cost of FCHS instruction.  */
 874   COSTS_N_INSNS (43),                   /* cost of FSQRT instruction.  */
 875   {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
 876    DUMMY_STRINGOP_ALGS},
 877   {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
 878    {-1, libcall}}},
 879    DUMMY_STRINGOP_ALGS},
 880   1,                                    /* scalar_stmt_cost.  */
 881   1,                                    /* scalar load_cost.  */
 882   1,                                    /* scalar_store_cost.  */
 883   1,                                    /* vec_stmt_cost.  */
 884   1,                                    /* vec_to_scalar_cost.  */
 885   1,                                    /* scalar_to_vec_cost.  */
 886   1,                                    /* vec_align_load_cost.  */
 887   2,                                    /* vec_unalign_load_cost.  */
 888   1,                                    /* vec_store_cost.  */
 889   3,                                    /* cond_taken_branch_cost.  */
 890   1,                                    /* cond_not_taken_branch_cost.  */
 891 };
 892 
 893 static const
 894 struct processor_costs nocona_cost = {
 895   COSTS_N_INSNS (1),                    /* cost of an add instruction */
 896   COSTS_N_INSNS (1),                    /* cost of a lea instruction */
 897   COSTS_N_INSNS (1),                    /* variable shift costs */
 898   COSTS_N_INSNS (1),                    /* constant shift costs */
 899   {COSTS_N_INSNS (10),                  /* cost of starting multiply for QI */
 900    COSTS_N_INSNS (10),                  /*                               HI */
 901    COSTS_N_INSNS (10),                  /*                               SI */
 902    COSTS_N_INSNS (10),                  /*                               DI */
 903    COSTS_N_INSNS (10)},                 /*                               other */
 904   0,                                    /* cost of multiply per each bit set */
 905   {COSTS_N_INSNS (66),                  /* cost of a divide/mod for QI */
 906    COSTS_N_INSNS (66),                  /*                          HI */
 907    COSTS_N_INSNS (66),                  /*                          SI */
 908    COSTS_N_INSNS (66),                  /*                          DI */
 909    COSTS_N_INSNS (66)},                 /*                          other */
 910   COSTS_N_INSNS (1),                    /* cost of movsx */
 911   COSTS_N_INSNS (1),                    /* cost of movzx */
 912   16,                                   /* "large" insn */
 913   17,                                   /* MOVE_RATIO */
 914   4,                                    /* cost for loading QImode using movzbl */
 915   {4, 4, 4},                            /* cost of loading integer registers
 916                                            in QImode, HImode and SImode.
 917                                            Relative to reg-reg move (2).  */
 918   {4, 4, 4},                            /* cost of storing integer registers */
 919   3,                                    /* cost of reg,reg fld/fst */
 920   {12, 12, 12},                         /* cost of loading fp registers
 921                                            in SFmode, DFmode and XFmode */
 922   {4, 4, 4},                            /* cost of storing fp registers
 923                                            in SFmode, DFmode and XFmode */
 924   6,                                    /* cost of moving MMX register */
 925   {12, 12},                             /* cost of loading MMX registers
 926                                            in SImode and DImode */
 927   {12, 12},                             /* cost of storing MMX registers
 928                                            in SImode and DImode */
 929   6,                                    /* cost of moving SSE register */
 930   {12, 12, 12},                         /* cost of loading SSE registers
 931                                            in SImode, DImode and TImode */
 932   {12, 12, 12},                         /* cost of storing SSE registers
 933                                            in SImode, DImode and TImode */
 934   8,                                    /* MMX or SSE register to integer */
 935   8,                                    /* size of l1 cache.  */
 936   1024,                                 /* size of l2 cache.  */
 937   128,                                  /* size of prefetch block */
 938   8,                                    /* number of parallel prefetches */
 939   1,                                    /* Branch cost */
 940   COSTS_N_INSNS (6),                    /* cost of FADD and FSUB insns.  */
 941   COSTS_N_INSNS (8),                    /* cost of FMUL instruction.  */
 942   COSTS_N_INSNS (40),                   /* cost of FDIV instruction.  */
 943   COSTS_N_INSNS (3),                    /* cost of FABS instruction.  */
 944   COSTS_N_INSNS (3),                    /* cost of FCHS instruction.  */
 945   COSTS_N_INSNS (44),                   /* cost of FSQRT instruction.  */
 946   {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
 947    {libcall, {{32, loop}, {20000, rep_prefix_8_byte},
 948               {100000, unrolled_loop}, {-1, libcall}}}},
 949   {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
 950    {-1, libcall}}},
 951    {libcall, {{24, loop}, {64, unrolled_loop},
 952               {8192, rep_prefix_8_byte}, {-1, libcall}}}},
 953   1,                                    /* scalar_stmt_cost.  */
 954   1,                                    /* scalar load_cost.  */
 955   1,                                    /* scalar_store_cost.  */
 956   1,                                    /* vec_stmt_cost.  */
 957   1,                                    /* vec_to_scalar_cost.  */
 958   1,                                    /* scalar_to_vec_cost.  */
 959   1,                                    /* vec_align_load_cost.  */
 960   2,                                    /* vec_unalign_load_cost.  */
 961   1,                                    /* vec_store_cost.  */
 962   3,                                    /* cond_taken_branch_cost.  */
 963   1,                                    /* cond_not_taken_branch_cost.  */
 964 };
 965 
 966 static const
 967 struct processor_costs core2_cost = {
 968   COSTS_N_INSNS (1),                    /* cost of an add instruction */
 969   COSTS_N_INSNS (1) + 1,                /* cost of a lea instruction */
 970   COSTS_N_INSNS (1),                    /* variable shift costs */
 971   COSTS_N_INSNS (1),                    /* constant shift costs */
 972   {COSTS_N_INSNS (3),                   /* cost of starting multiply for QI */
 973    COSTS_N_INSNS (3),                   /*                               HI */
 974    COSTS_N_INSNS (3),                   /*                               SI */
 975    COSTS_N_INSNS (3),                   /*                               DI */
 976    COSTS_N_INSNS (3)},                  /*                               other */
 977   0,                                    /* cost of multiply per each bit set */
 978   {COSTS_N_INSNS (22),                  /* cost of a divide/mod for QI */
 979    COSTS_N_INSNS (22),                  /*                          HI */
 980    COSTS_N_INSNS (22),                  /*                          SI */
 981    COSTS_N_INSNS (22),                  /*                          DI */
 982    COSTS_N_INSNS (22)},                 /*                          other */
 983   COSTS_N_INSNS (1),                    /* cost of movsx */
 984   COSTS_N_INSNS (1),                    /* cost of movzx */
 985   8,                                    /* "large" insn */
 986   16,                                   /* MOVE_RATIO */
 987   2,                                    /* cost for loading QImode using movzbl */
 988   {6, 6, 6},                            /* cost of loading integer registers
 989                                            in QImode, HImode and SImode.
 990                                            Relative to reg-reg move (2).  */
 991   {4, 4, 4},                            /* cost of storing integer registers */
 992   2,                                    /* cost of reg,reg fld/fst */
 993   {6, 6, 6},                            /* cost of loading fp registers
 994                                            in SFmode, DFmode and XFmode */
 995   {4, 4, 4},                            /* cost of storing fp registers
 996                                            in SFmode, DFmode and XFmode */
 997   2,                                    /* cost of moving MMX register */
 998   {6, 6},                               /* cost of loading MMX registers
 999                                            in SImode and DImode */
1000   {4, 4},                               /* cost of storing MMX registers
1001                                            in SImode and DImode */
1002   2,                                    /* cost of moving SSE register */
1003   {6, 6, 6},                            /* cost of loading SSE registers
1004                                            in SImode, DImode and TImode */
1005   {4, 4, 4},                            /* cost of storing SSE registers
1006                                            in SImode, DImode and TImode */
1007   2,                                    /* MMX or SSE register to integer */
1008   32,                                   /* size of l1 cache.  */
1009   2048,                                 /* size of l2 cache.  */
1010   128,                                  /* size of prefetch block */
1011   8,                                    /* number of parallel prefetches */
1012   3,                                    /* Branch cost */
1013   COSTS_N_INSNS (3),                    /* cost of FADD and FSUB insns.  */
1014   COSTS_N_INSNS (5),                    /* cost of FMUL instruction.  */
1015   COSTS_N_INSNS (32),                   /* cost of FDIV instruction.  */
1016   COSTS_N_INSNS (1),                    /* cost of FABS instruction.  */
1017   COSTS_N_INSNS (1),                    /* cost of FCHS instruction.  */
1018   COSTS_N_INSNS (58),                   /* cost of FSQRT instruction.  */
1019   {{libcall, {{11, loop}, {-1, rep_prefix_4_byte}}},
1020    {libcall, {{32, loop}, {64, rep_prefix_4_byte},
1021               {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1022   {{libcall, {{8, loop}, {15, unrolled_loop},
1023               {2048, rep_prefix_4_byte}, {-1, libcall}}},
1024    {libcall, {{24, loop}, {32, unrolled_loop},
1025               {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1026   1,                                    /* scalar_stmt_cost.  */
1027   1,                                    /* scalar load_cost.  */
1028   1,                                    /* scalar_store_cost.  */
1029   1,                                    /* vec_stmt_cost.  */
1030   1,                                    /* vec_to_scalar_cost.  */
1031   1,                                    /* scalar_to_vec_cost.  */
1032   1,                                    /* vec_align_load_cost.  */
1033   2,                                    /* vec_unalign_load_cost.  */
1034   1,                                    /* vec_store_cost.  */
1035   3,                                    /* cond_taken_branch_cost.  */
1036   1,                                    /* cond_not_taken_branch_cost.  */
1037 };
1038 
1039 /* Generic64 should produce code tuned for Nocona and K8.  */
1040 static const
1041 struct processor_costs generic64_cost = {
1042   COSTS_N_INSNS (1),                    /* cost of an add instruction */
1043   /* On all chips taken into consideration lea is 2 cycles and more.  With
1044      this cost however our current implementation of synth_mult results in
1045      use of unnecessary temporary registers causing regression on several
1046      SPECfp benchmarks.  */
1047   COSTS_N_INSNS (1) + 1,                /* cost of a lea instruction */
1048   COSTS_N_INSNS (1),                    /* variable shift costs */
1049   COSTS_N_INSNS (1),                    /* constant shift costs */
1050   {COSTS_N_INSNS (3),                   /* cost of starting multiply for QI */
1051    COSTS_N_INSNS (4),                   /*                               HI */
1052    COSTS_N_INSNS (3),                   /*                               SI */
1053    COSTS_N_INSNS (4),                   /*                               DI */
1054    COSTS_N_INSNS (2)},                  /*                               other */
1055   0,                                    /* cost of multiply per each bit set */
1056   {COSTS_N_INSNS (18),                  /* cost of a divide/mod for QI */
1057    COSTS_N_INSNS (26),                  /*                          HI */
1058    COSTS_N_INSNS (42),                  /*                          SI */
1059    COSTS_N_INSNS (74),                  /*                          DI */
1060    COSTS_N_INSNS (74)},                 /*                          other */
1061   COSTS_N_INSNS (1),                    /* cost of movsx */
1062   COSTS_N_INSNS (1),                    /* cost of movzx */
1063   8,                                    /* "large" insn */
1064   17,                                   /* MOVE_RATIO */
1065   4,                                    /* cost for loading QImode using movzbl */
1066   {4, 4, 4},                            /* cost of loading integer registers
1067                                            in QImode, HImode and SImode.
1068                                            Relative to reg-reg move (2).  */
1069   {4, 4, 4},                            /* cost of storing integer registers */
1070   4,                                    /* cost of reg,reg fld/fst */
1071   {12, 12, 12},                         /* cost of loading fp registers
1072                                            in SFmode, DFmode and XFmode */
1073   {6, 6, 8},                            /* cost of storing fp registers
1074                                            in SFmode, DFmode and XFmode */
1075   2,                                    /* cost of moving MMX register */
1076   {8, 8},                               /* cost of loading MMX registers
1077                                            in SImode and DImode */
1078   {8, 8},                               /* cost of storing MMX registers
1079                                            in SImode and DImode */
1080   2,                                    /* cost of moving SSE register */
1081   {8, 8, 8},                            /* cost of loading SSE registers
1082                                            in SImode, DImode and TImode */
1083   {8, 8, 8},                            /* cost of storing SSE registers
1084                                            in SImode, DImode and TImode */
1085   5,                                    /* MMX or SSE register to integer */
1086   32,                                   /* size of l1 cache.  */
1087   512,                                  /* size of l2 cache.  */
1088   64,                                   /* size of prefetch block */
1089   6,                                    /* number of parallel prefetches */
1090   /* Benchmarks shows large regressions on K8 sixtrack benchmark when this value
1091      is increased to perhaps more appropriate value of 5.  */
1092   3,                                    /* Branch cost */
1093   COSTS_N_INSNS (8),                    /* cost of FADD and FSUB insns.  */
1094   COSTS_N_INSNS (8),                    /* cost of FMUL instruction.  */
1095   COSTS_N_INSNS (20),                   /* cost of FDIV instruction.  */
1096   COSTS_N_INSNS (8),                    /* cost of FABS instruction.  */
1097   COSTS_N_INSNS (8),                    /* cost of FCHS instruction.  */
1098   COSTS_N_INSNS (40),                   /* cost of FSQRT instruction.  */
1099   {DUMMY_STRINGOP_ALGS,
1100    {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1101   {DUMMY_STRINGOP_ALGS,
1102    {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1103   1,                                    /* scalar_stmt_cost.  */
1104   1,                                    /* scalar load_cost.  */
1105   1,                                    /* scalar_store_cost.  */
1106   1,                                    /* vec_stmt_cost.  */
1107   1,                                    /* vec_to_scalar_cost.  */
1108   1,                                    /* scalar_to_vec_cost.  */
1109   1,                                    /* vec_align_load_cost.  */
1110   2,                                    /* vec_unalign_load_cost.  */
1111   1,                                    /* vec_store_cost.  */
1112   3,                                    /* cond_taken_branch_cost.  */
1113   1,                                    /* cond_not_taken_branch_cost.  */
1114 };
1115 
1116 /* Generic32 should produce code tuned for Athlon, PPro, Pentium4, Nocona and K8.  */
1117 static const
1118 struct processor_costs generic32_cost = {
1119   COSTS_N_INSNS (1),                    /* cost of an add instruction */
1120   COSTS_N_INSNS (1) + 1,                /* cost of a lea instruction */
1121   COSTS_N_INSNS (1),                    /* variable shift costs */
1122   COSTS_N_INSNS (1),                    /* constant shift costs */
1123   {COSTS_N_INSNS (3),                   /* cost of starting multiply for QI */
1124    COSTS_N_INSNS (4),                   /*                               HI */
1125    COSTS_N_INSNS (3),                   /*                               SI */
1126    COSTS_N_INSNS (4),                   /*                               DI */
1127    COSTS_N_INSNS (2)},                  /*                               other */
1128   0,                                    /* cost of multiply per each bit set */
1129   {COSTS_N_INSNS (18),                  /* cost of a divide/mod for QI */
1130    COSTS_N_INSNS (26),                  /*                          HI */
1131    COSTS_N_INSNS (42),                  /*                          SI */
1132    COSTS_N_INSNS (74),                  /*                          DI */
1133    COSTS_N_INSNS (74)},                 /*                          other */
1134   COSTS_N_INSNS (1),                    /* cost of movsx */
1135   COSTS_N_INSNS (1),                    /* cost of movzx */
1136   8,                                    /* "large" insn */
1137   17,                                   /* MOVE_RATIO */
1138   4,                                    /* cost for loading QImode using movzbl */
1139   {4, 4, 4},                            /* cost of loading integer registers
1140                                            in QImode, HImode and SImode.
1141                                            Relative to reg-reg move (2).  */
1142   {4, 4, 4},                            /* cost of storing integer registers */
1143   4,                                    /* cost of reg,reg fld/fst */
1144   {12, 12, 12},                         /* cost of loading fp registers
1145                                            in SFmode, DFmode and XFmode */
1146   {6, 6, 8},                            /* cost of storing fp registers
1147                                            in SFmode, DFmode and XFmode */
1148   2,                                    /* cost of moving MMX register */
1149   {8, 8},                               /* cost of loading MMX registers
1150                                            in SImode and DImode */
1151   {8, 8},                               /* cost of storing MMX registers
1152                                            in SImode and DImode */
1153   2,                                    /* cost of moving SSE register */
1154   {8, 8, 8},                            /* cost of loading SSE registers
1155                                            in SImode, DImode and TImode */
1156   {8, 8, 8},                            /* cost of storing SSE registers
1157                                            in SImode, DImode and TImode */
1158   5,                                    /* MMX or SSE register to integer */
1159   32,                                   /* size of l1 cache.  */
1160   256,                                  /* size of l2 cache.  */
1161   64,                                   /* size of prefetch block */
1162   6,                                    /* number of parallel prefetches */
1163   3,                                    /* Branch cost */
1164   COSTS_N_INSNS (8),                    /* cost of FADD and FSUB insns.  */
1165   COSTS_N_INSNS (8),                    /* cost of FMUL instruction.  */
1166   COSTS_N_INSNS (20),                   /* cost of FDIV instruction.  */
1167   COSTS_N_INSNS (8),                    /* cost of FABS instruction.  */
1168   COSTS_N_INSNS (8),                    /* cost of FCHS instruction.  */
1169   COSTS_N_INSNS (40),                   /* cost of FSQRT instruction.  */
1170   {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
1171    DUMMY_STRINGOP_ALGS},
1172   {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
1173    DUMMY_STRINGOP_ALGS},
1174   1,                                    /* scalar_stmt_cost.  */
1175   1,                                    /* scalar load_cost.  */
1176   1,                                    /* scalar_store_cost.  */
1177   1,                                    /* vec_stmt_cost.  */
1178   1,                                    /* vec_to_scalar_cost.  */
1179   1,                                    /* scalar_to_vec_cost.  */
1180   1,                                    /* vec_align_load_cost.  */
1181   2,                                    /* vec_unalign_load_cost.  */
1182   1,                                    /* vec_store_cost.  */
1183   3,                                    /* cond_taken_branch_cost.  */
1184   1,                                    /* cond_not_taken_branch_cost.  */
1185 };
1186 
1187 const struct processor_costs *ix86_cost = &pentium_cost;
1188 
1189 /* Processor feature/optimization bitmasks.  */
1190 #define m_386 (1<<PROCESSOR_I386)
1191 #define m_486 (1<<PROCESSOR_I486)
1192 #define m_PENT (1<<PROCESSOR_PENTIUM)
1193 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
1194 #define m_PENT4  (1<<PROCESSOR_PENTIUM4)
1195 #define m_NOCONA  (1<<PROCESSOR_NOCONA)
1196 #define m_CORE2  (1<<PROCESSOR_CORE2)
1197 
1198 #define m_GEODE  (1<<PROCESSOR_GEODE)
1199 #define m_K6  (1<<PROCESSOR_K6)
1200 #define m_K6_GEODE  (m_K6 | m_GEODE)
1201 #define m_K8  (1<<PROCESSOR_K8)
1202 #define m_ATHLON  (1<<PROCESSOR_ATHLON)
1203 #define m_ATHLON_K8  (m_K8 | m_ATHLON)
1204 #define m_AMDFAM10  (1<<PROCESSOR_AMDFAM10)
1205 #define m_AMD_MULTIPLE  (m_K8 | m_ATHLON | m_AMDFAM10)
1206 
1207 #define m_GENERIC32 (1<<PROCESSOR_GENERIC32)
1208 #define m_GENERIC64 (1<<PROCESSOR_GENERIC64)
1209 
1210 /* Generic instruction choice should be common subset of supported CPUs
1211    (PPro/PENT4/NOCONA/CORE2/Athlon/K8).  */
1212 #define m_GENERIC (m_GENERIC32 | m_GENERIC64)
1213 
1214 /* Feature tests against the various tunings.  */
1215 unsigned char ix86_tune_features[X86_TUNE_LAST];
1216 
1217 /* Feature tests against the various tunings used to create ix86_tune_features
1218    based on the processor mask.  */
1219 static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = {
1220   /* X86_TUNE_USE_LEAVE: Leave does not affect Nocona SPEC2000 results
1221      negatively, so enabling for Generic64 seems like good code size
1222      tradeoff.  We can't enable it for 32bit generic because it does not
1223      work well with PPro base chips.  */
1224   m_386 | m_K6_GEODE | m_AMD_MULTIPLE | m_CORE2 | m_GENERIC64,
1225 
1226   /* X86_TUNE_PUSH_MEMORY */
1227   m_386 | m_K6_GEODE | m_AMD_MULTIPLE | m_PENT4
1228   | m_NOCONA | m_CORE2 | m_GENERIC,
1229 
1230   /* X86_TUNE_ZERO_EXTEND_WITH_AND */
1231   m_486 | m_PENT,
1232 
1233   /* X86_TUNE_UNROLL_STRLEN */
1234   m_486 | m_PENT | m_PPRO | m_AMD_MULTIPLE | m_K6 | m_CORE2 | m_GENERIC,
1235 
1236   /* X86_TUNE_DEEP_BRANCH_PREDICTION */
1237   m_PPRO | m_K6_GEODE | m_AMD_MULTIPLE | m_PENT4 | m_GENERIC,
1238 
1239   /* X86_TUNE_BRANCH_PREDICTION_HINTS: Branch hints were put in P4 based
1240      on simulation result. But after P4 was made, no performance benefit
1241      was observed with branch hints.  It also increases the code size.
1242      As a result, icc never generates branch hints.  */
1243   0,
1244 
1245   /* X86_TUNE_DOUBLE_WITH_ADD */
1246   ~m_386,
1247 
1248   /* X86_TUNE_USE_SAHF */
1249   m_PPRO | m_K6_GEODE | m_K8 | m_AMDFAM10 | m_PENT4
1250   | m_NOCONA | m_CORE2 | m_GENERIC,
1251 
1252   /* X86_TUNE_MOVX: Enable to zero extend integer registers to avoid
1253      partial dependencies.  */
1254   m_AMD_MULTIPLE | m_PPRO | m_PENT4 | m_NOCONA
1255   | m_CORE2 | m_GENERIC | m_GEODE /* m_386 | m_K6 */,
1256 
1257   /* X86_TUNE_PARTIAL_REG_STALL: We probably ought to watch for partial
1258      register stalls on Generic32 compilation setting as well.  However
1259      in current implementation the partial register stalls are not eliminated
1260      very well - they can be introduced via subregs synthesized by combine
1261      and can happen in caller/callee saving sequences.  Because this option
1262      pays back little on PPro based chips and is in conflict with partial reg
1263      dependencies used by Athlon/P4 based chips, it is better to leave it off
1264      for generic32 for now.  */
1265   m_PPRO,
1266 
1267   /* X86_TUNE_PARTIAL_FLAG_REG_STALL */
1268   m_CORE2 | m_GENERIC,
1269 
1270   /* X86_TUNE_USE_HIMODE_FIOP */
1271   m_386 | m_486 | m_K6_GEODE,
1272 
1273   /* X86_TUNE_USE_SIMODE_FIOP */
1274   ~(m_PPRO | m_AMD_MULTIPLE | m_PENT | m_CORE2 | m_GENERIC),
1275 
1276   /* X86_TUNE_USE_MOV0 */
1277   m_K6,
1278 
1279   /* X86_TUNE_USE_CLTD */
1280   ~(m_PENT | m_K6 | m_CORE2 | m_GENERIC),
1281 
1282   /* X86_TUNE_USE_XCHGB: Use xchgb %rh,%rl instead of rolw/rorw $8,rx.  */
1283   m_PENT4,
1284 
1285   /* X86_TUNE_SPLIT_LONG_MOVES */
1286   m_PPRO,
1287 
1288   /* X86_TUNE_READ_MODIFY_WRITE */
1289   ~m_PENT,
1290 
1291   /* X86_TUNE_READ_MODIFY */
1292   ~(m_PENT | m_PPRO),
1293 
1294   /* X86_TUNE_PROMOTE_QIMODE */
1295   m_K6_GEODE | m_PENT | m_386 | m_486 | m_AMD_MULTIPLE | m_CORE2
1296   | m_GENERIC /* | m_PENT4 ? */,
1297 
1298   /* X86_TUNE_FAST_PREFIX */
1299   ~(m_PENT | m_486 | m_386),
1300 
1301   /* X86_TUNE_SINGLE_STRINGOP */
1302   m_386 | m_PENT4 | m_NOCONA,
1303 
1304   /* X86_TUNE_QIMODE_MATH */
1305   ~0,
1306 
1307   /* X86_TUNE_HIMODE_MATH: On PPro this flag is meant to avoid partial
1308      register stalls.  Just like X86_TUNE_PARTIAL_REG_STALL this option
1309      might be considered for Generic32 if our scheme for avoiding partial
1310      stalls was more effective.  */
1311   ~m_PPRO,
1312 
1313   /* X86_TUNE_PROMOTE_QI_REGS */
1314   0,
1315 
1316   /* X86_TUNE_PROMOTE_HI_REGS */
1317   m_PPRO,
1318 
1319   /* X86_TUNE_ADD_ESP_4: Enable if add/sub is preferred over 1/2 push/pop.  */
1320   m_AMD_MULTIPLE | m_K6_GEODE | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1321 
1322   /* X86_TUNE_ADD_ESP_8 */
1323   m_AMD_MULTIPLE | m_PPRO | m_K6_GEODE | m_386
1324   | m_486 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1325 
1326   /* X86_TUNE_SUB_ESP_4 */
1327   m_AMD_MULTIPLE | m_PPRO | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1328 
1329   /* X86_TUNE_SUB_ESP_8 */
1330   m_AMD_MULTIPLE | m_PPRO | m_386 | m_486
1331   | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1332 
1333   /* X86_TUNE_INTEGER_DFMODE_MOVES: Enable if integer moves are preferred
1334      for DFmode copies */
1335   ~(m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2
1336     | m_GENERIC | m_GEODE),
1337 
1338   /* X86_TUNE_PARTIAL_REG_DEPENDENCY */
1339   m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1340 
1341   /* X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY: In the Generic model we have a
1342      conflict here in between PPro/Pentium4 based chips that thread 128bit
1343      SSE registers as single units versus K8 based chips that divide SSE
1344      registers to two 64bit halves.  This knob promotes all store destinations
1345      to be 128bit to allow register renaming on 128bit SSE units, but usually
1346      results in one extra microop on 64bit SSE units.  Experimental results
1347      shows that disabling this option on P4 brings over 20% SPECfp regression,
1348      while enabling it on K8 brings roughly 2.4% regression that can be partly
1349      masked by careful scheduling of moves.  */
1350   m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC | m_AMDFAM10,
1351 
1352   /* X86_TUNE_SSE_UNALIGNED_MOVE_OPTIMAL */
1353   m_AMDFAM10,
1354 
1355   /* X86_TUNE_SSE_SPLIT_REGS: Set for machines where the type and dependencies
1356      are resolved on SSE register parts instead of whole registers, so we may
1357      maintain just lower part of scalar values in proper format leaving the
1358      upper part undefined.  */
1359   m_ATHLON_K8,
1360 
1361   /* X86_TUNE_SSE_TYPELESS_STORES */
1362   m_AMD_MULTIPLE,
1363 
1364   /* X86_TUNE_SSE_LOAD0_BY_PXOR */
1365   m_PPRO | m_PENT4 | m_NOCONA,
1366 
1367   /* X86_TUNE_MEMORY_MISMATCH_STALL */
1368   m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1369 
1370   /* X86_TUNE_PROLOGUE_USING_MOVE */
1371   m_ATHLON_K8 | m_PPRO | m_CORE2 | m_GENERIC,
1372 
1373   /* X86_TUNE_EPILOGUE_USING_MOVE */
1374   m_ATHLON_K8 | m_PPRO | m_CORE2 | m_GENERIC,
1375 
1376   /* X86_TUNE_SHIFT1 */
1377   ~m_486,
1378 
1379   /* X86_TUNE_USE_FFREEP */
1380   m_AMD_MULTIPLE,
1381 
1382   /* X86_TUNE_INTER_UNIT_MOVES */
1383   ~(m_AMD_MULTIPLE | m_GENERIC),
1384 
1385   /* X86_TUNE_INTER_UNIT_CONVERSIONS */
1386   ~(m_AMDFAM10),
1387 
1388   /* X86_TUNE_FOUR_JUMP_LIMIT: Some CPU cores are not able to predict more
1389      than 4 branch instructions in the 16 byte window.  */
1390   m_PPRO | m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1391 
1392   /* X86_TUNE_SCHEDULE */
1393   m_PPRO | m_AMD_MULTIPLE | m_K6_GEODE | m_PENT | m_CORE2 | m_GENERIC,
1394 
1395   /* X86_TUNE_USE_BT */
1396   m_AMD_MULTIPLE | m_CORE2 | m_GENERIC,
1397 
1398   /* X86_TUNE_USE_INCDEC */
1399   ~(m_PENT4 | m_NOCONA | m_GENERIC),
1400 
1401   /* X86_TUNE_PAD_RETURNS */
1402   m_AMD_MULTIPLE | m_CORE2 | m_GENERIC,
1403 
1404   /* X86_TUNE_EXT_80387_CONSTANTS */
1405   m_K6_GEODE | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC,
1406 
1407   /* X86_TUNE_SHORTEN_X87_SSE */
1408   ~m_K8,
1409 
1410   /* X86_TUNE_AVOID_VECTOR_DECODE */
1411   m_K8 | m_GENERIC64,
1412 
1413   /* X86_TUNE_PROMOTE_HIMODE_IMUL: Modern CPUs have same latency for HImode
1414      and SImode multiply, but 386 and 486 do HImode multiply faster.  */
1415   ~(m_386 | m_486),
1416 
1417   /* X86_TUNE_SLOW_IMUL_IMM32_MEM: Imul of 32-bit constant and memory is
1418      vector path on AMD machines.  */
1419   m_K8 | m_GENERIC64 | m_AMDFAM10,
1420 
1421   /* X86_TUNE_SLOW_IMUL_IMM8: Imul of 8-bit constant is vector path on AMD
1422      machines.  */
1423   m_K8 | m_GENERIC64 | m_AMDFAM10,
1424 
1425   /* X86_TUNE_MOVE_M1_VIA_OR: On pentiums, it is faster to load -1 via OR
1426      than a MOV.  */
1427   m_PENT,
1428 
1429   /* X86_TUNE_NOT_UNPAIRABLE: NOT is not pairable on Pentium, while XOR is,
1430      but one byte longer.  */
1431   m_PENT,
1432 
1433   /* X86_TUNE_NOT_VECTORMODE: On AMD K6, NOT is vector decoded with memory
1434      operand that cannot be represented using a modRM byte.  The XOR
1435      replacement is long decoded, so this split helps here as well.  */
1436   m_K6,
1437 
1438   /* X86_TUNE_USE_VECTOR_FP_CONVERTS: Prefer vector packed SSE conversion
1439      from FP to FP. */
1440   m_AMDFAM10 | m_GENERIC,
1441 
1442   /* X86_TUNE_USE_VECTOR_CONVERTS: Prefer vector packed SSE conversion
1443      from integer to FP. */
1444   m_AMDFAM10,
1445 
1446   /* X86_TUNE_FUSE_CMP_AND_BRANCH: Fuse a compare or test instruction
1447      with a subsequent conditional jump instruction into a single
1448      compare-and-branch uop.  */
1449   m_CORE2,
1450 };
1451 
1452 /* Feature tests against the various architecture variations.  */
1453 unsigned char ix86_arch_features[X86_ARCH_LAST];
1454 
1455 /* Feature tests against the various architecture variations, used to create
1456    ix86_arch_features based on the processor mask.  */
1457 static unsigned int initial_ix86_arch_features[X86_ARCH_LAST] = {
1458   /* X86_ARCH_CMOVE: Conditional move was added for pentiumpro.  */
1459   ~(m_386 | m_486 | m_PENT | m_K6),
1460 
1461   /* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486.  */
1462   ~m_386,
1463 
1464   /* X86_ARCH_CMPXCHG8B: Compare and exchange 8 bytes was added for pentium. */
1465   ~(m_386 | m_486),
1466 
1467   /* X86_ARCH_XADD: Exchange and add was added for 80486.  */
1468   ~m_386,
1469 
1470   /* X86_ARCH_BSWAP: Byteswap was added for 80486.  */
1471   ~m_386,
1472 };
1473 
1474 static const unsigned int x86_accumulate_outgoing_args
1475   = m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC;
1476 
1477 static const unsigned int x86_arch_always_fancy_math_387
1478   = m_PENT | m_PPRO | m_AMD_MULTIPLE | m_PENT4
1479     | m_NOCONA | m_CORE2 | m_GENERIC;
1480 
1481 static enum stringop_alg stringop_alg = no_stringop;
1482 
1483 /* In case the average insn count for single function invocation is
1484    lower than this constant, emit fast (but longer) prologue and
1485    epilogue code.  */
1486 #define FAST_PROLOGUE_INSN_COUNT 20
1487 
1488 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively.  */
1489 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
1490 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
1491 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
1492 
1493 /* Array of the smallest class containing reg number REGNO, indexed by
1494    REGNO.  Used by REGNO_REG_CLASS in i386.h.  */
1495 
1496 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
1497 {
1498   /* ax, dx, cx, bx */
1499   AREG, DREG, CREG, BREG,
1500   /* si, di, bp, sp */
1501   SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
1502   /* FP registers */
1503   FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
1504   FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
1505   /* arg pointer */
1506   NON_Q_REGS,
1507   /* flags, fpsr, fpcr, frame */
1508   NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
1509   /* SSE registers */
1510   SSE_FIRST_REG, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
1511   SSE_REGS, SSE_REGS,
1512   /* MMX registers */
1513   MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
1514   MMX_REGS, MMX_REGS,
1515   /* REX registers */
1516   NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
1517   NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
1518   /* SSE REX registers */
1519   SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
1520   SSE_REGS, SSE_REGS,
1521 };
1522 
1523 /* The "default" register map used in 32bit mode.  */
1524 
1525 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
1526 {
1527   0, 2, 1, 3, 6, 7, 4, 5,               /* general regs */
1528   12, 13, 14, 15, 16, 17, 18, 19,       /* fp regs */
1529   -1, -1, -1, -1, -1,                   /* arg, flags, fpsr, fpcr, frame */
1530   21, 22, 23, 24, 25, 26, 27, 28,       /* SSE */
1531   29, 30, 31, 32, 33, 34, 35, 36,       /* MMX */
1532   -1, -1, -1, -1, -1, -1, -1, -1,       /* extended integer registers */
1533   -1, -1, -1, -1, -1, -1, -1, -1,       /* extended SSE registers */
1534 };
1535 
1536 /* The "default" register map used in 64bit mode.  */
1537 
1538 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
1539 {
1540   0, 1, 2, 3, 4, 5, 6, 7,               /* general regs */
1541   33, 34, 35, 36, 37, 38, 39, 40,       /* fp regs */
1542   -1, -1, -1, -1, -1,                   /* arg, flags, fpsr, fpcr, frame */
1543   17, 18, 19, 20, 21, 22, 23, 24,       /* SSE */
1544   41, 42, 43, 44, 45, 46, 47, 48,       /* MMX */
1545   8,9,10,11,12,13,14,15,                /* extended integer registers */
1546   25, 26, 27, 28, 29, 30, 31, 32,       /* extended SSE registers */
1547 };
1548 
1549 /* Define the register numbers to be used in Dwarf debugging information.
1550    The SVR4 reference port C compiler uses the following register numbers
1551    in its Dwarf output code:
1552         0 for %eax (gcc regno = 0)
1553         1 for %ecx (gcc regno = 2)
1554         2 for %edx (gcc regno = 1)
1555         3 for %ebx (gcc regno = 3)
1556         4 for %esp (gcc regno = 7)
1557         5 for %ebp (gcc regno = 6)
1558         6 for %esi (gcc regno = 4)
1559         7 for %edi (gcc regno = 5)
1560    The following three DWARF register numbers are never generated by
1561    the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
1562    believes these numbers have these meanings.
1563         8  for %eip    (no gcc equivalent)
1564         9  for %eflags (gcc regno = 17)
1565         10 for %trapno (no gcc equivalent)
1566    It is not at all clear how we should number the FP stack registers
1567    for the x86 architecture.  If the version of SDB on x86/svr4 were
1568    a bit less brain dead with respect to floating-point then we would
1569    have a precedent to follow with respect to DWARF register numbers
1570    for x86 FP registers, but the SDB on x86/svr4 is so completely
1571    broken with respect to FP registers that it is hardly worth thinking
1572    of it as something to strive for compatibility with.
1573    The version of x86/svr4 SDB I have at the moment does (partially)
1574    seem to believe that DWARF register number 11 is associated with
1575    the x86 register %st(0), but that's about all.  Higher DWARF
1576    register numbers don't seem to be associated with anything in
1577    particular, and even for DWARF regno 11, SDB only seems to under-
1578    stand that it should say that a variable lives in %st(0) (when
1579    asked via an `=' command) if we said it was in DWARF regno 11,
1580    but SDB still prints garbage when asked for the value of the
1581    variable in question (via a `/' command).
1582    (Also note that the labels SDB prints for various FP stack regs
1583    when doing an `x' command are all wrong.)
1584    Note that these problems generally don't affect the native SVR4
1585    C compiler because it doesn't allow the use of -O with -g and
1586    because when it is *not* optimizing, it allocates a memory
1587    location for each floating-point variable, and the memory
1588    location is what gets described in the DWARF AT_location
1589    attribute for the variable in question.
1590    Regardless of the severe mental illness of the x86/svr4 SDB, we
1591    do something sensible here and we use the following DWARF
1592    register numbers.  Note that these are all stack-top-relative
1593    numbers.
1594         11 for %st(0) (gcc regno = 8)
1595         12 for %st(1) (gcc regno = 9)
1596         13 for %st(2) (gcc regno = 10)
1597         14 for %st(3) (gcc regno = 11)
1598         15 for %st(4) (gcc regno = 12)
1599         16 for %st(5) (gcc regno = 13)
1600         17 for %st(6) (gcc regno = 14)
1601         18 for %st(7) (gcc regno = 15)
1602 */
1603 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
1604 {
1605   0, 2, 1, 3, 6, 7, 5, 4,               /* general regs */
1606   11, 12, 13, 14, 15, 16, 17, 18,       /* fp regs */
1607   -1, 9, -1, -1, -1,                    /* arg, flags, fpsr, fpcr, frame */
1608   21, 22, 23, 24, 25, 26, 27, 28,       /* SSE registers */
1609   29, 30, 31, 32, 33, 34, 35, 36,       /* MMX registers */
1610   -1, -1, -1, -1, -1, -1, -1, -1,       /* extended integer registers */
1611   -1, -1, -1, -1, -1, -1, -1, -1,       /* extended SSE registers */
1612 };
1613 
1614 /* Test and compare insns in i386.md store the information needed to
1615    generate branch and scc insns here.  */
1616 
1617 rtx ix86_compare_op0 = NULL_RTX;
1618 rtx ix86_compare_op1 = NULL_RTX;
1619 rtx ix86_compare_emitted = NULL_RTX;
1620 
1621 /* Define parameter passing and return registers.  */
1622 
1623 static int const x86_64_int_parameter_registers[6] =
1624 {
1625   DI_REG, SI_REG, DX_REG, CX_REG, R8_REG, R9_REG
1626 };
1627 
1628 static int const x86_64_ms_abi_int_parameter_registers[4] =
1629 {
1630   CX_REG, DX_REG, R8_REG, R9_REG
1631 };
1632 
1633 static int const x86_64_int_return_registers[4] =
1634 {
1635   AX_REG, DX_REG, DI_REG, SI_REG
1636 };
1637 
1638 /* Define the structure for the machine field in struct function.  */
1639 
1640 struct stack_local_entry GTY(())
1641 {
1642   unsigned short mode;
1643   unsigned short n;
1644   rtx rtl;
1645   struct stack_local_entry *next;
1646 };
1647 
1648 /* Structure describing stack frame layout.
1649    Stack grows downward:
1650 
1651    [arguments]
1652                                               <- ARG_POINTER
1653    saved pc
1654 
1655    saved frame pointer if frame_pointer_needed
1656                                               <- HARD_FRAME_POINTER
1657    [-msave-args]
1658 
1659    [padding0]
1660 
1661    [saved regs]
1662 
1663    [padding05]
1664 
1665    [saved SSE regs]
1666 
1667    [padding1]          \
1668                         )
1669    [va_arg registers]  (
1670                         > to_allocate              <- FRAME_POINTER
1671    [frame]             (
1672                         )
1673    [padding2]          /
1674   */
1675 struct ix86_frame
1676 {
1677   int nmsave_args;
1678   int padding0;
1679   int nsseregs;
1680   int padding05;
1681   int nregs;
1682   int padding1;
1683   int va_arg_size;
1684   HOST_WIDE_INT frame;
1685   int padding2;
1686   int outgoing_arguments_size;
1687   int red_zone_size;
1688 
1689   HOST_WIDE_INT to_allocate;
1690   /* The offsets relative to ARG_POINTER.  */
1691   HOST_WIDE_INT frame_pointer_offset;
1692   HOST_WIDE_INT hard_frame_pointer_offset;
1693   HOST_WIDE_INT stack_pointer_offset;
1694 
1695   /* When save_regs_using_mov is set, emit prologue using
1696      move instead of push instructions.  */
1697   bool save_regs_using_mov;
1698 };
1699 
1700 /* Code model option.  */
1701 enum cmodel ix86_cmodel;
1702 /* Asm dialect.  */
1703 enum asm_dialect ix86_asm_dialect = ASM_ATT;
1704 /* TLS dialects.  */
1705 enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
1706 
1707 /* Which unit we are generating floating point math for.  */
1708 enum fpmath_unit ix86_fpmath;
1709 
1710 /* Which cpu are we scheduling for.  */
1711 enum attr_cpu ix86_schedule;
1712 
1713 /* Which cpu are we optimizing for.  */
1714 enum processor_type ix86_tune;
1715 
1716 /* Which instruction set architecture to use.  */
1717 enum processor_type ix86_arch;
1718 
1719 /* true if sse prefetch instruction is not NOOP.  */
1720 int x86_prefetch_sse;
1721 
1722 /* ix86_regparm_string as a number */
1723 static int ix86_regparm;
1724 
1725 /* -mstackrealign option */
1726 extern int ix86_force_align_arg_pointer;
1727 static const char ix86_force_align_arg_pointer_string[]
1728   = "force_align_arg_pointer";
1729 
1730 static rtx (*ix86_gen_leave) (void);
1731 static rtx (*ix86_gen_pop1) (rtx);
1732 static rtx (*ix86_gen_add3) (rtx, rtx, rtx);
1733 static rtx (*ix86_gen_sub3) (rtx, rtx, rtx);
1734 static rtx (*ix86_gen_sub3_carry) (rtx, rtx, rtx, rtx);
1735 static rtx (*ix86_gen_one_cmpl2) (rtx, rtx);
1736 static rtx (*ix86_gen_monitor) (rtx, rtx, rtx);
1737 static rtx (*ix86_gen_andsp) (rtx, rtx, rtx);
1738 
1739 /* Preferred alignment for stack boundary in bits.  */
1740 unsigned int ix86_preferred_stack_boundary;
1741 
1742 /* Alignment for incoming stack boundary in bits specified at
1743    command line.  */
1744 static unsigned int ix86_user_incoming_stack_boundary;
1745 
1746 /* Default alignment for incoming stack boundary in bits.  */
1747 static unsigned int ix86_default_incoming_stack_boundary;
1748 
1749 /* Alignment for incoming stack boundary in bits.  */
1750 unsigned int ix86_incoming_stack_boundary;
1751 
1752 /* Values 1-5: see jump.c */
1753 int ix86_branch_cost;
1754 
1755 /* Calling abi specific va_list type nodes.  */
1756 static GTY(()) tree sysv_va_list_type_node;
1757 static GTY(()) tree ms_va_list_type_node;
1758 
1759 /* Variables which are this size or smaller are put in the data/bss
1760    or ldata/lbss sections.  */
1761 
1762 int ix86_section_threshold = 65536;
1763 
1764 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL.  */
1765 char internal_label_prefix[16];
1766 int internal_label_prefix_len;
1767 
1768 /* Fence to use after loop using movnt.  */
1769 tree x86_mfence;
1770 
1771 static int ix86_nsaved_args (void);
1772 
1773 /* Register class used for passing given 64bit part of the argument.
1774    These represent classes as documented by the PS ABI, with the exception
1775    of SSESF, SSEDF classes, that are basically SSE class, just gcc will
1776    use SF or DFmode move instead of DImode to avoid reformatting penalties.
1777 
1778    Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
1779    whenever possible (upper half does contain padding).  */
1780 enum x86_64_reg_class
1781   {
1782     X86_64_NO_CLASS,
1783     X86_64_INTEGER_CLASS,
1784     X86_64_INTEGERSI_CLASS,
1785     X86_64_SSE_CLASS,
1786     X86_64_SSESF_CLASS,
1787     X86_64_SSEDF_CLASS,
1788     X86_64_SSEUP_CLASS,
1789     X86_64_X87_CLASS,
1790     X86_64_X87UP_CLASS,
1791     X86_64_COMPLEX_X87_CLASS,
1792     X86_64_MEMORY_CLASS
1793   };
1794 
1795 #define MAX_CLASSES 4
1796 
1797 /* Table of constants used by fldpi, fldln2, etc....  */
1798 static REAL_VALUE_TYPE ext_80387_constants_table [5];
1799 static bool ext_80387_constants_init = 0;
1800 
1801 
1802 static struct machine_function * ix86_init_machine_status (void);
1803 static rtx ix86_function_value (const_tree, const_tree, bool);
1804 static int ix86_function_regparm (const_tree, const_tree);
1805 static void ix86_compute_frame_layout (struct ix86_frame *);
1806 static bool ix86_expand_vector_init_one_nonzero (bool, enum machine_mode,
1807                                                  rtx, rtx, int);
1808 static void ix86_add_new_builtins (int);
1809 
1810 enum ix86_function_specific_strings
1811 {
1812   IX86_FUNCTION_SPECIFIC_ARCH,
1813   IX86_FUNCTION_SPECIFIC_TUNE,
1814   IX86_FUNCTION_SPECIFIC_FPMATH,
1815   IX86_FUNCTION_SPECIFIC_MAX
1816 };
1817 
1818 static char *ix86_target_string (int, int, const char *, const char *,
1819                                  const char *, bool);
1820 static void ix86_debug_options (void) ATTRIBUTE_UNUSED;
1821 static void ix86_function_specific_save (struct cl_target_option *);
1822 static void ix86_function_specific_restore (struct cl_target_option *);
1823 static void ix86_function_specific_print (FILE *, int,
1824                                           struct cl_target_option *);
1825 static bool ix86_valid_target_attribute_p (tree, tree, tree, int);
1826 static bool ix86_valid_target_attribute_inner_p (tree, char *[]);
1827 static bool ix86_can_inline_p (tree, tree);
1828 static void ix86_set_current_function (tree);
1829 
1830 static void pro_epilogue_adjust_stack (rtx, rtx, rtx, int);
1831 
1832 
1833 /* The svr4 ABI for the i386 says that records and unions are returned
1834    in memory.  */
1835 #ifndef DEFAULT_PCC_STRUCT_RETURN
1836 #define DEFAULT_PCC_STRUCT_RETURN 1
1837 #endif
1838 
1839 /* Whether -mtune= or -march= were specified */
1840 static int ix86_tune_defaulted;
1841 static int ix86_arch_specified;
1842 
1843 /* Bit flags that specify the ISA we are compiling for.  */
1844 int ix86_isa_flags = TARGET_64BIT_DEFAULT | TARGET_SUBTARGET_ISA_DEFAULT;
1845 
1846 /* A mask of ix86_isa_flags that includes bit X if X
1847    was set or cleared on the command line.  */
1848 static int ix86_isa_flags_explicit;
1849 
1850 /* Define a set of ISAs which are available when a given ISA is
1851    enabled.  MMX and SSE ISAs are handled separately.  */
1852 
1853 #define OPTION_MASK_ISA_MMX_SET OPTION_MASK_ISA_MMX
1854 #define OPTION_MASK_ISA_3DNOW_SET \
1855   (OPTION_MASK_ISA_3DNOW | OPTION_MASK_ISA_MMX_SET)
1856 
1857 #define OPTION_MASK_ISA_SSE_SET OPTION_MASK_ISA_SSE
1858 #define OPTION_MASK_ISA_SSE2_SET \
1859   (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE_SET)
1860 #define OPTION_MASK_ISA_SSE3_SET \
1861   (OPTION_MASK_ISA_SSE3 | OPTION_MASK_ISA_SSE2_SET)
1862 #define OPTION_MASK_ISA_SSSE3_SET \
1863   (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_SSE3_SET)
1864 #define OPTION_MASK_ISA_SSE4_1_SET \
1865   (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSSE3_SET)
1866 #define OPTION_MASK_ISA_SSE4_2_SET \
1867   (OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_SSE4_1_SET)
1868 #define OPTION_MASK_ISA_AVX_SET \
1869   (OPTION_MASK_ISA_AVX | OPTION_MASK_ISA_SSE4_2_SET)
1870 #define OPTION_MASK_ISA_FMA_SET \
1871   (OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_AVX_SET)
1872 
1873 /* SSE4 includes both SSE4.1 and SSE4.2. -msse4 should be the same
1874    as -msse4.2.  */
1875 #define OPTION_MASK_ISA_SSE4_SET OPTION_MASK_ISA_SSE4_2_SET
1876 
1877 #define OPTION_MASK_ISA_SSE4A_SET \
1878   (OPTION_MASK_ISA_SSE4A | OPTION_MASK_ISA_SSE3_SET)
1879 #define OPTION_MASK_ISA_SSE5_SET \
1880   (OPTION_MASK_ISA_SSE5 | OPTION_MASK_ISA_SSE4A_SET)
1881 
1882 /* AES and PCLMUL need SSE2 because they use xmm registers */
1883 #define OPTION_MASK_ISA_AES_SET \
1884   (OPTION_MASK_ISA_AES | OPTION_MASK_ISA_SSE2_SET)
1885 #define OPTION_MASK_ISA_PCLMUL_SET \
1886   (OPTION_MASK_ISA_PCLMUL | OPTION_MASK_ISA_SSE2_SET)
1887 
1888 #define OPTION_MASK_ISA_ABM_SET \
1889   (OPTION_MASK_ISA_ABM | OPTION_MASK_ISA_POPCNT)
1890 #define OPTION_MASK_ISA_POPCNT_SET OPTION_MASK_ISA_POPCNT
1891 #define OPTION_MASK_ISA_CX16_SET OPTION_MASK_ISA_CX16
1892 #define OPTION_MASK_ISA_SAHF_SET OPTION_MASK_ISA_SAHF
1893 
1894 /* Define a set of ISAs which aren't available when a given ISA is
1895    disabled.  MMX and SSE ISAs are handled separately.  */
1896 
1897 #define OPTION_MASK_ISA_MMX_UNSET \
1898   (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_3DNOW_UNSET)
1899 #define OPTION_MASK_ISA_3DNOW_UNSET \
1900   (OPTION_MASK_ISA_3DNOW | OPTION_MASK_ISA_3DNOW_A_UNSET)
1901 #define OPTION_MASK_ISA_3DNOW_A_UNSET OPTION_MASK_ISA_3DNOW_A
1902 
1903 #define OPTION_MASK_ISA_SSE_UNSET \
1904   (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_SSE2_UNSET)
1905 #define OPTION_MASK_ISA_SSE2_UNSET \
1906   (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE3_UNSET)
1907 #define OPTION_MASK_ISA_SSE3_UNSET \
1908   (OPTION_MASK_ISA_SSE3 \
1909    | OPTION_MASK_ISA_SSSE3_UNSET \
1910    | OPTION_MASK_ISA_SSE4A_UNSET )
1911 #define OPTION_MASK_ISA_SSSE3_UNSET \
1912   (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_SSE4_1_UNSET)
1913 #define OPTION_MASK_ISA_SSE4_1_UNSET \
1914   (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSE4_2_UNSET)
1915 #define OPTION_MASK_ISA_SSE4_2_UNSET \
1916   (OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_AVX_UNSET )
1917 #define OPTION_MASK_ISA_AVX_UNSET \
1918   (OPTION_MASK_ISA_AVX | OPTION_MASK_ISA_FMA_UNSET)
1919 #define OPTION_MASK_ISA_FMA_UNSET OPTION_MASK_ISA_FMA
1920 
1921 /* SSE4 includes both SSE4.1 and SSE4.2.  -mno-sse4 should the same
1922    as -mno-sse4.1. */
1923 #define OPTION_MASK_ISA_SSE4_UNSET OPTION_MASK_ISA_SSE4_1_UNSET
1924 
1925 #define OPTION_MASK_ISA_SSE4A_UNSET \
1926   (OPTION_MASK_ISA_SSE4A | OPTION_MASK_ISA_SSE5_UNSET)
1927 #define OPTION_MASK_ISA_SSE5_UNSET OPTION_MASK_ISA_SSE5
1928 #define OPTION_MASK_ISA_AES_UNSET OPTION_MASK_ISA_AES
1929 #define OPTION_MASK_ISA_PCLMUL_UNSET OPTION_MASK_ISA_PCLMUL
1930 #define OPTION_MASK_ISA_ABM_UNSET OPTION_MASK_ISA_ABM
1931 #define OPTION_MASK_ISA_POPCNT_UNSET OPTION_MASK_ISA_POPCNT
1932 #define OPTION_MASK_ISA_CX16_UNSET OPTION_MASK_ISA_CX16
1933 #define OPTION_MASK_ISA_SAHF_UNSET OPTION_MASK_ISA_SAHF
1934 
1935 /* Vectorization library interface and handlers.  */
1936 tree (*ix86_veclib_handler)(enum built_in_function, tree, tree) = NULL;
1937 static tree ix86_veclibabi_svml (enum built_in_function, tree, tree);
1938 static tree ix86_veclibabi_acml (enum built_in_function, tree, tree);
1939 
1940 /* Processor target table, indexed by processor number */
1941 struct ptt
1942 {
1943   const struct processor_costs *cost;           /* Processor costs */
1944   const int align_loop;                         /* Default alignments.  */
1945   const int align_loop_max_skip;
1946   const int align_jump;
1947   const int align_jump_max_skip;
1948   const int align_func;
1949 };
1950 
1951 static const struct ptt processor_target_table[PROCESSOR_max] =
1952 {
1953   {&i386_cost, 4, 3, 4, 3, 4},
1954   {&i486_cost, 16, 15, 16, 15, 16},
1955   {&pentium_cost, 16, 7, 16, 7, 16},
1956   {&pentiumpro_cost, 16, 15, 16, 10, 16},
1957   {&geode_cost, 0, 0, 0, 0, 0},
1958   {&k6_cost, 32, 7, 32, 7, 32},
1959   {&athlon_cost, 16, 7, 16, 7, 16},
1960   {&pentium4_cost, 0, 0, 0, 0, 0},
1961   {&k8_cost, 16, 7, 16, 7, 16},
1962   {&nocona_cost, 0, 0, 0, 0, 0},
1963   {&core2_cost, 16, 10, 16, 10, 16},
1964   {&generic32_cost, 16, 7, 16, 7, 16},
1965   {&generic64_cost, 16, 10, 16, 10, 16},
1966   {&amdfam10_cost, 32, 24, 32, 7, 32}
1967 };
1968 
1969 static const char *const cpu_names[TARGET_CPU_DEFAULT_max] =
1970 {
1971   "generic",
1972   "i386",
1973   "i486",
1974   "pentium",
1975   "pentium-mmx",
1976   "pentiumpro",
1977   "pentium2",
1978   "pentium3",
1979   "pentium4",
1980   "pentium-m",
1981   "prescott",
1982   "nocona",
1983   "core2",
1984   "geode",
1985   "k6",
1986   "k6-2",
1987   "k6-3",
1988   "athlon",
1989   "athlon-4",
1990   "k8",
1991   "amdfam10"
1992 };
1993 
1994 /* Implement TARGET_HANDLE_OPTION.  */
1995 
1996 static bool
1997 ix86_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED, int value)
1998 {
1999   switch (code)
2000     {
2001     case OPT_mmmx:
2002       if (value)
2003         {
2004           ix86_isa_flags |= OPTION_MASK_ISA_MMX_SET;
2005           ix86_isa_flags_explicit |= OPTION_MASK_ISA_MMX_SET;
2006         }
2007       else
2008         {
2009           ix86_isa_flags &= ~OPTION_MASK_ISA_MMX_UNSET;
2010           ix86_isa_flags_explicit |= OPTION_MASK_ISA_MMX_UNSET;
2011         }
2012       return true;
2013 
2014     case OPT_m3dnow:
2015       if (value)
2016         {
2017           ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_SET;
2018           ix86_isa_flags_explicit |= OPTION_MASK_ISA_3DNOW_SET;
2019         }
2020       else
2021         {
2022           ix86_isa_flags &= ~OPTION_MASK_ISA_3DNOW_UNSET;
2023           ix86_isa_flags_explicit |= OPTION_MASK_ISA_3DNOW_UNSET;
2024         }
2025       return true;
2026 
2027     case OPT_m3dnowa:
2028       return false;
2029 
2030     case OPT_msse:
2031       if (value)
2032         {
2033           ix86_isa_flags |= OPTION_MASK_ISA_SSE_SET;
2034           ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE_SET;
2035         }
2036       else
2037         {
2038           ix86_isa_flags &= ~OPTION_MASK_ISA_SSE_UNSET;
2039           ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE_UNSET;
2040         }
2041       return true;
2042 
2043     case OPT_msse2:
2044       if (value)
2045         {
2046           ix86_isa_flags |= OPTION_MASK_ISA_SSE2_SET;
2047           ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE2_SET;
2048         }
2049       else
2050         {
2051           ix86_isa_flags &= ~OPTION_MASK_ISA_SSE2_UNSET;
2052           ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE2_UNSET;
2053         }
2054       return true;
2055 
2056     case OPT_msse3:
2057       if (value)
2058         {
2059           ix86_isa_flags |= OPTION_MASK_ISA_SSE3_SET;
2060           ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE3_SET;
2061         }
2062       else
2063         {
2064           ix86_isa_flags &= ~OPTION_MASK_ISA_SSE3_UNSET;
2065           ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE3_UNSET;
2066         }
2067       return true;
2068 
2069     case OPT_mssse3:
2070       if (value)
2071         {
2072           ix86_isa_flags |= OPTION_MASK_ISA_SSSE3_SET;
2073           ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSSE3_SET;
2074         }
2075       else
2076         {
2077           ix86_isa_flags &= ~OPTION_MASK_ISA_SSSE3_UNSET;
2078           ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSSE3_UNSET;
2079         }
2080       return true;
2081 
2082     case OPT_msse4_1:
2083       if (value)
2084         {
2085           ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1_SET;
2086           ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_1_SET;
2087         }
2088       else
2089         {
2090           ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_1_UNSET;
2091           ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_1_UNSET;
2092         }
2093       return true;
2094 
2095     case OPT_msse4_2:
2096       if (value)
2097         {
2098           ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2_SET;
2099           ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_2_SET;
2100         }
2101       else
2102         {
2103           ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_2_UNSET;
2104           ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_2_UNSET;
2105         }
2106       return true;
2107 
2108     case OPT_mavx:
2109       if (value)
2110         {
2111           ix86_isa_flags |= OPTION_MASK_ISA_AVX_SET;
2112           ix86_isa_flags_explicit |= OPTION_MASK_ISA_AVX_SET;
2113         }
2114       else
2115         {
2116           ix86_isa_flags &= ~OPTION_MASK_ISA_AVX_UNSET;
2117           ix86_isa_flags_explicit |= OPTION_MASK_ISA_AVX_UNSET;
2118         }
2119       return true;
2120 
2121     case OPT_mfma:
2122       if (value)
2123         {
2124           ix86_isa_flags |= OPTION_MASK_ISA_FMA_SET;
2125           ix86_isa_flags_explicit |= OPTION_MASK_ISA_FMA_SET;
2126         }
2127       else
2128         {
2129           ix86_isa_flags &= ~OPTION_MASK_ISA_FMA_UNSET;
2130           ix86_isa_flags_explicit |= OPTION_MASK_ISA_FMA_UNSET;
2131         }
2132       return true;
2133 
2134     case OPT_msse4:
2135       ix86_isa_flags |= OPTION_MASK_ISA_SSE4_SET;
2136       ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_SET;
2137       return true;
2138 
2139     case OPT_mno_sse4:
2140       ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_UNSET;
2141       ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_UNSET;
2142       return true;
2143 
2144     case OPT_msse4a:
2145       if (value)
2146         {
2147           ix86_isa_flags |= OPTION_MASK_ISA_SSE4A_SET;
2148           ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4A_SET;
2149         }
2150       else
2151         {
2152           ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4A_UNSET;
2153           ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4A_UNSET;
2154         }
2155       return true;
2156 
2157     case OPT_msse5:
2158       if (value)
2159         {
2160           ix86_isa_flags |= OPTION_MASK_ISA_SSE5_SET;
2161           ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE5_SET;
2162         }
2163       else
2164         {
2165           ix86_isa_flags &= ~OPTION_MASK_ISA_SSE5_UNSET;
2166           ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE5_UNSET;
2167         }
2168       return true;
2169 
2170     case OPT_mabm:
2171       if (value)
2172         {
2173           ix86_isa_flags |= OPTION_MASK_ISA_ABM_SET;
2174           ix86_isa_flags_explicit |= OPTION_MASK_ISA_ABM_SET;
2175         }
2176       else
2177         {
2178           ix86_isa_flags &= ~OPTION_MASK_ISA_ABM_UNSET;
2179           ix86_isa_flags_explicit |= OPTION_MASK_ISA_ABM_UNSET;
2180         }
2181       return true;
2182 
2183     case OPT_mpopcnt:
2184       if (value)
2185         {
2186           ix86_isa_flags |= OPTION_MASK_ISA_POPCNT_SET;
2187           ix86_isa_flags_explicit |= OPTION_MASK_ISA_POPCNT_SET;
2188         }
2189       else
2190         {
2191           ix86_isa_flags &= ~OPTION_MASK_ISA_POPCNT_UNSET;
2192           ix86_isa_flags_explicit |= OPTION_MASK_ISA_POPCNT_UNSET;
2193         }
2194       return true;
2195 
2196     case OPT_msahf:
2197       if (value)
2198         {
2199           ix86_isa_flags |= OPTION_MASK_ISA_SAHF_SET;
2200           ix86_isa_flags_explicit |= OPTION_MASK_ISA_SAHF_SET;
2201         }
2202       else
2203         {
2204           ix86_isa_flags &= ~OPTION_MASK_ISA_SAHF_UNSET;
2205           ix86_isa_flags_explicit |= OPTION_MASK_ISA_SAHF_UNSET;
2206         }
2207       return true;
2208 
2209     case OPT_mcx16:
2210       if (value)
2211         {
2212           ix86_isa_flags |= OPTION_MASK_ISA_CX16_SET;
2213           ix86_isa_flags_explicit |= OPTION_MASK_ISA_CX16_SET;
2214         }
2215       else
2216         {
2217           ix86_isa_flags &= ~OPTION_MASK_ISA_CX16_UNSET;
2218           ix86_isa_flags_explicit |= OPTION_MASK_ISA_CX16_UNSET;
2219         }
2220       return true;
2221 
2222     case OPT_maes:
2223       if (value)
2224         {
2225           ix86_isa_flags |= OPTION_MASK_ISA_AES_SET;
2226           ix86_isa_flags_explicit |= OPTION_MASK_ISA_AES_SET;
2227         }
2228       else
2229         {
2230           ix86_isa_flags &= ~OPTION_MASK_ISA_AES_UNSET;
2231           ix86_isa_flags_explicit |= OPTION_MASK_ISA_AES_UNSET;
2232         }
2233       return true;
2234 
2235     case OPT_mpclmul:
2236       if (value)
2237         {
2238           ix86_isa_flags |= OPTION_MASK_ISA_PCLMUL_SET;
2239           ix86_isa_flags_explicit |= OPTION_MASK_ISA_PCLMUL_SET;
2240         }
2241       else
2242         {
2243           ix86_isa_flags &= ~OPTION_MASK_ISA_PCLMUL_UNSET;
2244           ix86_isa_flags_explicit |= OPTION_MASK_ISA_PCLMUL_UNSET;
2245         }
2246       return true;
2247 
2248     default:
2249       return true;
2250     }
2251 }
2252 
2253 /* Return a string the documents the current -m options.  The caller is
2254    responsible for freeing the string.  */
2255 
2256 static char *
2257 ix86_target_string (int isa, int flags, const char *arch, const char *tune,
2258                     const char *fpmath, bool add_nl_p)
2259 {
2260   struct ix86_target_opts
2261   {
2262     const char *option;         /* option string */
2263     int mask;                   /* isa mask options */
2264   };
2265 
2266   /* This table is ordered so that options like -msse5 or -msse4.2 that imply
2267      preceding options while match those first.  */
2268   static struct ix86_target_opts isa_opts[] =
2269   {
2270     { "-m64",           OPTION_MASK_ISA_64BIT },
2271     { "-msse5",         OPTION_MASK_ISA_SSE5 },
2272     { "-msse4a",        OPTION_MASK_ISA_SSE4A },
2273     { "-msse4.2",       OPTION_MASK_ISA_SSE4_2 },
2274     { "-msse4.1",       OPTION_MASK_ISA_SSE4_1 },
2275     { "-mssse3",        OPTION_MASK_ISA_SSSE3 },
2276     { "-msse3",         OPTION_MASK_ISA_SSE3 },
2277     { "-msse2",         OPTION_MASK_ISA_SSE2 },
2278     { "-msse",          OPTION_MASK_ISA_SSE },
2279     { "-m3dnow",        OPTION_MASK_ISA_3DNOW },
2280     { "-m3dnowa",       OPTION_MASK_ISA_3DNOW_A },
2281     { "-mmmx",          OPTION_MASK_ISA_MMX },
2282     { "-mabm",          OPTION_MASK_ISA_ABM },
2283     { "-mpopcnt",       OPTION_MASK_ISA_POPCNT },
2284     { "-maes",          OPTION_MASK_ISA_AES },
2285     { "-mpclmul",       OPTION_MASK_ISA_PCLMUL },
2286   };
2287 
2288   /* Flag options.  */
2289   static struct ix86_target_opts flag_opts[] =
2290   {
2291     { "-m128bit-long-double",           MASK_128BIT_LONG_DOUBLE },
2292     { "-m80387",                        MASK_80387 },
2293     { "-maccumulate-outgoing-args",     MASK_ACCUMULATE_OUTGOING_ARGS },
2294     { "-malign-double",                 MASK_ALIGN_DOUBLE },
2295     { "-mcld",                          MASK_CLD },
2296     { "-mfp-ret-in-387",                MASK_FLOAT_RETURNS },
2297     { "-mieee-fp",                      MASK_IEEE_FP },
2298     { "-minline-all-stringops",         MASK_INLINE_ALL_STRINGOPS },
2299     { "-minline-stringops-dynamically", MASK_INLINE_STRINGOPS_DYNAMICALLY },
2300     { "-mms-bitfields",                 MASK_MS_BITFIELD_LAYOUT },
2301     { "-mno-align-stringops",           MASK_NO_ALIGN_STRINGOPS },
2302     { "-mno-fancy-math-387",            MASK_NO_FANCY_MATH_387 },
2303     { "-mno-fused-madd",                MASK_NO_FUSED_MADD },
2304     { "-mno-push-args",                 MASK_NO_PUSH_ARGS },
2305     { "-mno-red-zone",                  MASK_NO_RED_ZONE },
2306     { "-momit-leaf-frame-pointer",      MASK_OMIT_LEAF_FRAME_POINTER },
2307     { "-mrecip",                        MASK_RECIP },
2308     { "-mrtd",                          MASK_RTD },
2309     { "-msseregparm",                   MASK_SSEREGPARM },
2310     { "-mstack-arg-probe",              MASK_STACK_PROBE },
2311     { "-mtls-direct-seg-refs",          MASK_TLS_DIRECT_SEG_REFS },
2312   };
2313 
2314   const char *opts[ARRAY_SIZE (isa_opts) + ARRAY_SIZE (flag_opts) + 6][2];
2315 
2316   char isa_other[40];
2317   char target_other[40];
2318   unsigned num = 0;
2319   unsigned i, j;
2320   char *ret;
2321   char *ptr;
2322   size_t len;
2323   size_t line_len;
2324   size_t sep_len;
2325 
2326   memset (opts, '\0', sizeof (opts));
2327 
2328   /* Add -march= option.  */
2329   if (arch)
2330     {
2331       opts[num][0] = "-march=";
2332       opts[num++][1] = arch;
2333     }
2334 
2335   /* Add -mtune= option.  */
2336   if (tune)
2337     {
2338       opts[num][0] = "-mtune=";
2339       opts[num++][1] = tune;
2340     }
2341 
2342   /* Pick out the options in isa options.  */
2343   for (i = 0; i < ARRAY_SIZE (isa_opts); i++)
2344     {
2345       if ((isa & isa_opts[i].mask) != 0)
2346         {
2347           opts[num++][0] = isa_opts[i].option;
2348           isa &= ~ isa_opts[i].mask;
2349         }
2350     }
2351 
2352   if (isa && add_nl_p)
2353     {
2354       opts[num++][0] = isa_other;
2355       sprintf (isa_other, "(other isa: 0x%x)", isa);
2356     }
2357 
2358   /* Add flag options.  */
2359   for (i = 0; i < ARRAY_SIZE (flag_opts); i++)
2360     {
2361       if ((flags & flag_opts[i].mask) != 0)
2362         {
2363           opts[num++][0] = flag_opts[i].option;
2364           flags &= ~ flag_opts[i].mask;
2365         }
2366     }
2367 
2368   if (flags && add_nl_p)
2369     {
2370       opts[num++][0] = target_other;
2371       sprintf (target_other, "(other flags: 0x%x)", isa);
2372     }
2373 
2374   /* Add -fpmath= option.  */
2375   if (fpmath)
2376     {
2377       opts[num][0] = "-mfpmath=";
2378       opts[num++][1] = fpmath;
2379     }
2380 
2381   /* Any options?  */
2382   if (num == 0)
2383     return NULL;
2384 
2385   gcc_assert (num < ARRAY_SIZE (opts));
2386 
2387   /* Size the string.  */
2388   len = 0;
2389   sep_len = (add_nl_p) ? 3 : 1;
2390   for (i = 0; i < num; i++)
2391     {
2392       len += sep_len;
2393       for (j = 0; j < 2; j++)
2394         if (opts[i][j])
2395           len += strlen (opts[i][j]);
2396     }
2397 
2398   /* Build the string.  */
2399   ret = ptr = (char *) xmalloc (len);
2400   line_len = 0;
2401 
2402   for (i = 0; i < num; i++)
2403     {
2404       size_t len2[2];
2405 
2406       for (j = 0; j < 2; j++)
2407         len2[j] = (opts[i][j]) ? strlen (opts[i][j]) : 0;
2408 
2409       if (i != 0)
2410         {
2411           *ptr++ = ' ';
2412           line_len++;
2413 
2414           if (add_nl_p && line_len + len2[0] + len2[1] > 70)
2415             {
2416               *ptr++ = '\\';
2417               *ptr++ = '\n';
2418               line_len = 0;
2419             }
2420         }
2421 
2422       for (j = 0; j < 2; j++)
2423         if (opts[i][j])
2424           {
2425             memcpy (ptr, opts[i][j], len2[j]);
2426             ptr += len2[j];
2427             line_len += len2[j];
2428           }
2429     }
2430 
2431   *ptr = '\0';
2432   gcc_assert (ret + len >= ptr);
2433 
2434   return ret;
2435 }
2436 
2437 /* Function that is callable from the debugger to print the current
2438    options.  */
2439 void
2440 ix86_debug_options (void)
2441 {
2442   char *opts = ix86_target_string (ix86_isa_flags, target_flags,
2443                                    ix86_arch_string, ix86_tune_string,
2444                                    ix86_fpmath_string, true);
2445 
2446   if (opts)
2447     {
2448       fprintf (stderr, "%s\n\n", opts);
2449       free (opts);
2450     }
2451   else
2452     fprintf (stderr, "<no options>\n\n");
2453 
2454   return;
2455 }
2456 
2457 /* Sometimes certain combinations of command options do not make
2458    sense on a particular target machine.  You can define a macro
2459    `OVERRIDE_OPTIONS' to take account of this.  This macro, if
2460    defined, is executed once just after all the command options have
2461    been parsed.
2462 
2463    Don't use this macro to turn on various extra optimizations for
2464    `-O'.  That is what `OPTIMIZATION_OPTIONS' is for.  */
2465 
2466 void
2467 override_options (bool main_args_p)
2468 {
2469   int i;
2470   unsigned int ix86_arch_mask, ix86_tune_mask;
2471   const char *prefix;
2472   const char *suffix;
2473   const char *sw;
2474 
2475   /* Comes from final.c -- no real reason to change it.  */
2476 #define MAX_CODE_ALIGN 16
2477 
2478   enum pta_flags
2479     {
2480       PTA_SSE = 1 << 0,
2481       PTA_SSE2 = 1 << 1,
2482       PTA_SSE3 = 1 << 2,
2483       PTA_MMX = 1 << 3,
2484       PTA_PREFETCH_SSE = 1 << 4,
2485       PTA_3DNOW = 1 << 5,
2486       PTA_3DNOW_A = 1 << 6,
2487       PTA_64BIT = 1 << 7,
2488       PTA_SSSE3 = 1 << 8,
2489       PTA_CX16 = 1 << 9,
2490       PTA_POPCNT = 1 << 10,
2491       PTA_ABM = 1 << 11,
2492       PTA_SSE4A = 1 << 12,
2493       PTA_NO_SAHF = 1 << 13,
2494       PTA_SSE4_1 = 1 << 14,
2495       PTA_SSE4_2 = 1 << 15,
2496       PTA_SSE5 = 1 << 16,
2497       PTA_AES = 1 << 17,
2498       PTA_PCLMUL = 1 << 18,
2499       PTA_AVX = 1 << 19,
2500       PTA_FMA = 1 << 20 
2501     };
2502 
2503   static struct pta
2504     {
2505       const char *const name;           /* processor name or nickname.  */
2506       const enum processor_type processor;
2507       const enum attr_cpu schedule;
2508       const unsigned /*enum pta_flags*/ flags;
2509     }
2510   const processor_alias_table[] =
2511     {
2512       {"i386", PROCESSOR_I386, CPU_NONE, 0},
2513       {"i486", PROCESSOR_I486, CPU_NONE, 0},
2514       {"i586", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
2515       {"pentium", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
2516       {"pentium-mmx", PROCESSOR_PENTIUM, CPU_PENTIUM, PTA_MMX},
2517       {"winchip-c6", PROCESSOR_I486, CPU_NONE, PTA_MMX},
2518       {"winchip2", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW},
2519       {"c3", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW},
2520       {"c3-2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, PTA_MMX | PTA_SSE},
2521       {"i686", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
2522       {"pentiumpro", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
2523       {"pentium2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, PTA_MMX},
2524       {"pentium3", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
2525         PTA_MMX | PTA_SSE},
2526       {"pentium3m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
2527         PTA_MMX | PTA_SSE},
2528       {"pentium-m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
2529         PTA_MMX | PTA_SSE | PTA_SSE2},
2530       {"pentium4", PROCESSOR_PENTIUM4, CPU_NONE,
2531         PTA_MMX |PTA_SSE | PTA_SSE2},
2532       {"pentium4m", PROCESSOR_PENTIUM4, CPU_NONE,
2533         PTA_MMX | PTA_SSE | PTA_SSE2},
2534       {"prescott", PROCESSOR_NOCONA, CPU_NONE,
2535         PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3},
2536       {"nocona", PROCESSOR_NOCONA, CPU_NONE,
2537         PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2538         | PTA_CX16 | PTA_NO_SAHF},
2539       {"core2", PROCESSOR_CORE2, CPU_CORE2,
2540         PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2541         | PTA_SSSE3 | PTA_CX16},
2542       {"geode", PROCESSOR_GEODE, CPU_GEODE,
2543         PTA_MMX | PTA_3DNOW | PTA_3DNOW_A |PTA_PREFETCH_SSE},
2544       {"k6", PROCESSOR_K6, CPU_K6, PTA_MMX},
2545       {"k6-2", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW},
2546       {"k6-3", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW},
2547       {"athlon", PROCESSOR_ATHLON, CPU_ATHLON,
2548         PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE},
2549       {"athlon-tbird", PROCESSOR_ATHLON, CPU_ATHLON,
2550         PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE},
2551       {"athlon-4", PROCESSOR_ATHLON, CPU_ATHLON,
2552         PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE},
2553       {"athlon-xp", PROCESSOR_ATHLON, CPU_ATHLON,
2554         PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE},
2555       {"athlon-mp", PROCESSOR_ATHLON, CPU_ATHLON,
2556         PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE},
2557       {"x86-64", PROCESSOR_K8, CPU_K8,
2558         PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_NO_SAHF},
2559       {"k8", PROCESSOR_K8, CPU_K8,
2560         PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2561         | PTA_SSE2 | PTA_NO_SAHF},
2562       {"k8-sse3", PROCESSOR_K8, CPU_K8,
2563         PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2564         | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF},
2565       {"opteron", PROCESSOR_K8, CPU_K8,
2566         PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2567         | PTA_SSE2 | PTA_NO_SAHF},
2568       {"opteron-sse3", PROCESSOR_K8, CPU_K8,
2569         PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2570         | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF},
2571       {"athlon64", PROCESSOR_K8, CPU_K8,
2572         PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2573         | PTA_SSE2 | PTA_NO_SAHF},
2574       {"athlon64-sse3", PROCESSOR_K8, CPU_K8,
2575         PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2576         | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF},
2577       {"athlon-fx", PROCESSOR_K8, CPU_K8,
2578         PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2579         | PTA_SSE2 | PTA_NO_SAHF},
2580       {"amdfam10", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
2581         PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2582         | PTA_SSE2 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM},
2583       {"barcelona", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
2584         PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2585         | PTA_SSE2 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM},
2586       {"generic32", PROCESSOR_GENERIC32, CPU_PENTIUMPRO,
2587         0 /* flags are only used for -march switch.  */ },
2588       {"generic64", PROCESSOR_GENERIC64, CPU_GENERIC64,
2589         PTA_64BIT /* flags are only used for -march switch.  */ },
2590     };
2591 
2592   int const pta_size = ARRAY_SIZE (processor_alias_table);
2593 
2594   /* Set up prefix/suffix so the error messages refer to either the command
2595      line argument, or the attribute(target).  */
2596   if (main_args_p)
2597     {
2598       prefix = "-m";
2599       suffix = "";
2600       sw = "switch";
2601     }
2602   else
2603     {
2604       prefix = "option(\"";
2605       suffix = "\")";
2606       sw = "attribute";
2607     }
2608 
2609 #ifdef SUBTARGET_OVERRIDE_OPTIONS
2610   SUBTARGET_OVERRIDE_OPTIONS;
2611 #endif
2612 
2613 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
2614   SUBSUBTARGET_OVERRIDE_OPTIONS;
2615 #endif
2616 
2617   /* -fPIC is the default for x86_64.  */
2618   if (TARGET_MACHO && TARGET_64BIT)
2619     flag_pic = 2;
2620 
2621   /* Set the default values for switches whose default depends on TARGET_64BIT
2622      in case they weren't overwritten by command line options.  */
2623   if (TARGET_64BIT)
2624     {
2625       /* Mach-O doesn't support omitting the frame pointer for now.  */
2626       if (flag_omit_frame_pointer == 2)
2627         flag_omit_frame_pointer = (TARGET_MACHO ? 0 : 1);
2628       if (flag_asynchronous_unwind_tables == 2)
2629         flag_asynchronous_unwind_tables = 1;
2630       if (flag_pcc_struct_return == 2)
2631         flag_pcc_struct_return = 0;
2632     }
2633   else
2634     {
2635       if (flag_omit_frame_pointer == 2)
2636         flag_omit_frame_pointer = 0;
2637       if (flag_asynchronous_unwind_tables == 2)
2638         flag_asynchronous_unwind_tables = 0;
2639       if (flag_pcc_struct_return == 2)
2640         flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
2641     }
2642 
2643   /* Need to check -mtune=generic first.  */
2644   if (ix86_tune_string)
2645     {
2646       if (!strcmp (ix86_tune_string, "generic")
2647           || !strcmp (ix86_tune_string, "i686")
2648           /* As special support for cross compilers we read -mtune=native
2649              as -mtune=generic.  With native compilers we won't see the
2650              -mtune=native, as it was changed by the driver.  */
2651           || !strcmp (ix86_tune_string, "native"))
2652         {
2653           if (TARGET_64BIT)
2654             ix86_tune_string = "generic64";
2655           else
2656             ix86_tune_string = "generic32";
2657         }
2658       /* If this call is for setting the option attribute, allow the
2659          generic32/generic64 that was previously set.  */
2660       else if (!main_args_p
2661                && (!strcmp (ix86_tune_string, "generic32")
2662                    || !strcmp (ix86_tune_string, "generic64")))
2663         ;
2664       else if (!strncmp (ix86_tune_string, "generic", 7))
2665         error ("bad value (%s) for %stune=%s %s",
2666                ix86_tune_string, prefix, suffix, sw);
2667     }
2668   else
2669     {
2670       if (ix86_arch_string)
2671         ix86_tune_string = ix86_arch_string;
2672       if (!ix86_tune_string)
2673         {
2674           ix86_tune_string = cpu_names[TARGET_CPU_DEFAULT];
2675           ix86_tune_defaulted = 1;
2676         }
2677 
2678       /* ix86_tune_string is set to ix86_arch_string or defaulted.  We
2679          need to use a sensible tune option.  */
2680       if (!strcmp (ix86_tune_string, "generic")
2681           || !strcmp (ix86_tune_string, "x86-64")
2682           || !strcmp (ix86_tune_string, "i686"))
2683         {
2684           if (TARGET_64BIT)
2685             ix86_tune_string = "generic64";
2686           else
2687             ix86_tune_string = "generic32";
2688         }
2689     }
2690   if (ix86_stringop_string)
2691     {
2692       if (!strcmp (ix86_stringop_string, "rep_byte"))
2693         stringop_alg = rep_prefix_1_byte;
2694       else if (!strcmp (ix86_stringop_string, "libcall"))
2695         stringop_alg = libcall;
2696       else if (!strcmp (ix86_stringop_string, "rep_4byte"))
2697         stringop_alg = rep_prefix_4_byte;
2698       else if (!strcmp (ix86_stringop_string, "rep_8byte")
2699                && TARGET_64BIT)
2700         /* rep; movq isn't available in 32-bit code.  */
2701         stringop_alg = rep_prefix_8_byte;
2702       else if (!strcmp (ix86_stringop_string, "byte_loop"))
2703         stringop_alg = loop_1_byte;
2704       else if (!strcmp (ix86_stringop_string, "loop"))
2705         stringop_alg = loop;
2706       else if (!strcmp (ix86_stringop_string, "unrolled_loop"))
2707         stringop_alg = unrolled_loop;
2708       else
2709         error ("bad value (%s) for %sstringop-strategy=%s %s",
2710                ix86_stringop_string, prefix, suffix, sw);
2711     }
2712   if (!strcmp (ix86_tune_string, "x86-64"))
2713     warning (OPT_Wdeprecated, "%stune=x86-64%s is deprecated.  Use "
2714              "%stune=k8%s or %stune=generic%s instead as appropriate.",
2715              prefix, suffix, prefix, suffix, prefix, suffix);
2716 
2717   if (!ix86_arch_string)
2718     ix86_arch_string = TARGET_64BIT ? "x86-64" : "i386";
2719   else
2720     ix86_arch_specified = 1;
2721 
2722   if (!strcmp (ix86_arch_string, "generic"))
2723     error ("generic CPU can be used only for %stune=%s %s",
2724            prefix, suffix, sw);
2725   if (!strncmp (ix86_arch_string, "generic", 7))
2726     error ("bad value (%s) for %sarch=%s %s",
2727            ix86_arch_string, prefix, suffix, sw);
2728 
2729   if (ix86_cmodel_string != 0)
2730     {
2731       if (!strcmp (ix86_cmodel_string, "small"))
2732         ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
2733       else if (!strcmp (ix86_cmodel_string, "medium"))
2734         ix86_cmodel = flag_pic ? CM_MEDIUM_PIC : CM_MEDIUM;
2735       else if (!strcmp (ix86_cmodel_string, "large"))
2736         ix86_cmodel = flag_pic ? CM_LARGE_PIC : CM_LARGE;
2737       else if (flag_pic)
2738         error ("code model %s does not support PIC mode", ix86_cmodel_string);
2739       else if (!strcmp (ix86_cmodel_string, "32"))
2740         ix86_cmodel = CM_32;
2741       else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
2742         ix86_cmodel = CM_KERNEL;
2743       else
2744         error ("bad value (%s) for %scmodel=%s %s",
2745                ix86_cmodel_string, prefix, suffix, sw);
2746     }
2747   else
2748     {
2749       /* For TARGET_64BIT and MS_ABI, force pic on, in order to enable the
2750          use of rip-relative addressing.  This eliminates fixups that
2751          would otherwise be needed if this object is to be placed in a
2752          DLL, and is essentially just as efficient as direct addressing.  */
2753       if (TARGET_64BIT && DEFAULT_ABI == MS_ABI)
2754         ix86_cmodel = CM_SMALL_PIC, flag_pic = 1;
2755       else if (TARGET_64BIT)
2756         ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
2757       else
2758         ix86_cmodel = CM_32;
2759     }
2760   if (ix86_asm_string != 0)
2761     {
2762       if (! TARGET_MACHO
2763           && !strcmp (ix86_asm_string, "intel"))
2764         ix86_asm_dialect = ASM_INTEL;
2765       else if (!strcmp (ix86_asm_string, "att"))
2766         ix86_asm_dialect = ASM_ATT;
2767       else
2768         error ("bad value (%s) for %sasm=%s %s",
2769                ix86_asm_string, prefix, suffix, sw);
2770     }
2771   if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
2772     error ("code model %qs not supported in the %s bit mode",
2773            ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
2774   if ((TARGET_64BIT != 0) != ((ix86_isa_flags & OPTION_MASK_ISA_64BIT) != 0))
2775     sorry ("%i-bit mode not compiled in",
2776            (ix86_isa_flags & OPTION_MASK_ISA_64BIT) ? 64 : 32);
2777 
2778   for (i = 0; i < pta_size; i++)
2779     if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
2780       {
2781         ix86_schedule = processor_alias_table[i].schedule;
2782         ix86_arch = processor_alias_table[i].processor;
2783         /* Default cpu tuning to the architecture.  */
2784         ix86_tune = ix86_arch;
2785 
2786         if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
2787           error ("CPU you selected does not support x86-64 "
2788                  "instruction set");
2789 
2790         if (processor_alias_table[i].flags & PTA_MMX
2791             && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_MMX))
2792           ix86_isa_flags |= OPTION_MASK_ISA_MMX;
2793         if (processor_alias_table[i].flags & PTA_3DNOW
2794             && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW))
2795           ix86_isa_flags |= OPTION_MASK_ISA_3DNOW;
2796         if (processor_alias_table[i].flags & PTA_3DNOW_A
2797             && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW_A))
2798           ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_A;
2799         if (processor_alias_table[i].flags & PTA_SSE
2800             && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE))
2801           ix86_isa_flags |= OPTION_MASK_ISA_SSE;
2802         if (processor_alias_table[i].flags & PTA_SSE2
2803             && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE2))
2804           ix86_isa_flags |= OPTION_MASK_ISA_SSE2;
2805         if (processor_alias_table[i].flags & PTA_SSE3
2806             && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE3))
2807           ix86_isa_flags |= OPTION_MASK_ISA_SSE3;
2808         if (processor_alias_table[i].flags & PTA_SSSE3
2809             && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSSE3))
2810           ix86_isa_flags |= OPTION_MASK_ISA_SSSE3;
2811         if (processor_alias_table[i].flags & PTA_SSE4_1
2812             && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_1))
2813           ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1;
2814         if (processor_alias_table[i].flags & PTA_SSE4_2
2815             && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_2))
2816           ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2;
2817         if (processor_alias_table[i].flags & PTA_AVX
2818             && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX))
2819           ix86_isa_flags |= OPTION_MASK_ISA_AVX;
2820         if (processor_alias_table[i].flags & PTA_FMA
2821             && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA))
2822           ix86_isa_flags |= OPTION_MASK_ISA_FMA;
2823         if (processor_alias_table[i].flags & PTA_SSE4A
2824             && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4A))
2825           ix86_isa_flags |= OPTION_MASK_ISA_SSE4A;
2826         if (processor_alias_table[i].flags & PTA_SSE5
2827             && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE5))
2828           ix86_isa_flags |= OPTION_MASK_ISA_SSE5;
2829         if (processor_alias_table[i].flags & PTA_ABM
2830             && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_ABM))
2831           ix86_isa_flags |= OPTION_MASK_ISA_ABM;
2832         if (processor_alias_table[i].flags & PTA_CX16
2833             && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_CX16))
2834           ix86_isa_flags |= OPTION_MASK_ISA_CX16;
2835         if (processor_alias_table[i].flags & (PTA_POPCNT | PTA_ABM)
2836             && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_POPCNT))
2837           ix86_isa_flags |= OPTION_MASK_ISA_POPCNT;
2838         if (!(TARGET_64BIT && (processor_alias_table[i].flags & PTA_NO_SAHF))
2839             && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SAHF))
2840           ix86_isa_flags |= OPTION_MASK_ISA_SAHF;
2841         if (processor_alias_table[i].flags & PTA_AES
2842             && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AES))
2843           ix86_isa_flags |= OPTION_MASK_ISA_AES;
2844         if (processor_alias_table[i].flags & PTA_PCLMUL
2845             && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_PCLMUL))
2846           ix86_isa_flags |= OPTION_MASK_ISA_PCLMUL;
2847         if (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE))
2848           x86_prefetch_sse = true;
2849 
2850         break;
2851       }
2852 
2853   if (i == pta_size)
2854     error ("bad value (%s) for %sarch=%s %s",
2855            ix86_arch_string, prefix, suffix, sw);
2856 
2857   ix86_arch_mask = 1u << ix86_arch;
2858   for (i = 0; i < X86_ARCH_LAST; ++i)
2859     ix86_arch_features[i] = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
2860 
2861   for (i = 0; i < pta_size; i++)
2862     if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
2863       {
2864         ix86_schedule = processor_alias_table[i].schedule;
2865         ix86_tune = processor_alias_table[i].processor;
2866         if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
2867           {
2868             if (ix86_tune_defaulted)
2869               {
2870                 ix86_tune_string = "x86-64";
2871                 for (i = 0; i < pta_size; i++)
2872                   if (! strcmp (ix86_tune_string,
2873                                 processor_alias_table[i].name))
2874                     break;
2875                 ix86_schedule = processor_alias_table[i].schedule;
2876                 ix86_tune = processor_alias_table[i].processor;
2877               }
2878             else
2879               error ("CPU you selected does not support x86-64 "
2880                      "instruction set");
2881           }
2882 
2883         /* Intel CPUs have always interpreted SSE prefetch instructions as
2884            NOPs; so, we can enable SSE prefetch instructions even when
2885            -mtune (rather than -march) points us to a processor that has them.
2886            However, the VIA C3 gives a SIGILL, so we only do that for i686 and
2887            higher processors.  */
2888         if (TARGET_CMOVE
2889             && (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE)))
2890           x86_prefetch_sse = true;
2891         break;
2892       }
2893   if (i == pta_size)
2894     error ("bad value (%s) for %stune=%s %s",
2895            ix86_tune_string, prefix, suffix, sw);
2896 
2897   ix86_tune_mask = 1u << ix86_tune;
2898   for (i = 0; i < X86_TUNE_LAST; ++i)
2899     ix86_tune_features[i] = !!(initial_ix86_tune_features[i] & ix86_tune_mask);
2900 
2901   if (optimize_size)
2902     ix86_cost = &ix86_size_cost;
2903   else
2904     ix86_cost = processor_target_table[ix86_tune].cost;
2905 
2906   /* Arrange to set up i386_stack_locals for all functions.  */
2907   init_machine_status = ix86_init_machine_status;
2908 
2909   /* Validate -mregparm= value.  */
2910   if (ix86_regparm_string)
2911     {
2912       if (TARGET_64BIT)
2913         warning (0, "%sregparm%s is ignored in 64-bit mode", prefix, suffix);
2914       i = atoi (ix86_regparm_string);
2915       if (i < 0 || i > REGPARM_MAX)
2916         error ("%sregparm=%d%s is not between 0 and %d",
2917                prefix, i, suffix, REGPARM_MAX);
2918       else
2919         ix86_regparm = i;
2920     }
2921   if (TARGET_64BIT)
2922     ix86_regparm = REGPARM_MAX;
2923 
2924   /* If the user has provided any of the -malign-* options,
2925      warn and use that value only if -falign-* is not set.
2926      Remove this code in GCC 3.2 or later.  */
2927   if (ix86_align_loops_string)
2928     {
2929       warning (0, "%salign-loops%s is obsolete, use -falign-loops%s",
2930                prefix, suffix, suffix);
2931       if (align_loops == 0)
2932         {
2933           i = atoi (ix86_align_loops_string);
2934           if (i < 0 || i > MAX_CODE_ALIGN)
2935             error ("%salign-loops=%d%s is not between 0 and %d",
2936                    prefix, i, suffix, MAX_CODE_ALIGN);
2937           else
2938             align_loops = 1 << i;
2939         }
2940     }
2941 
2942   if (ix86_align_jumps_string)
2943     {
2944       warning (0, "%salign-jumps%s is obsolete, use -falign-jumps%s",
2945                prefix, suffix, suffix);
2946       if (align_jumps == 0)
2947         {
2948           i = atoi (ix86_align_jumps_string);
2949           if (i < 0 || i > MAX_CODE_ALIGN)
2950             error ("%salign-loops=%d%s is not between 0 and %d",
2951                    prefix, i, suffix, MAX_CODE_ALIGN);
2952           else
2953             align_jumps = 1 << i;
2954         }
2955     }
2956 
2957   if (ix86_align_funcs_string)
2958     {
2959       warning (0, "%salign-functions%s is obsolete, use -falign-functions%s",
2960                prefix, suffix, suffix);
2961       if (align_functions == 0)
2962         {
2963           i = atoi (ix86_align_funcs_string);
2964           if (i < 0 || i > MAX_CODE_ALIGN)
2965             error ("%salign-loops=%d%s is not between 0 and %d",
2966                    prefix, i, suffix, MAX_CODE_ALIGN);
2967           else
2968             align_functions = 1 << i;
2969         }
2970     }
2971 
2972   /* Default align_* from the processor table.  */
2973   if (align_loops == 0)
2974     {
2975       align_loops = processor_target_table[ix86_tune].align_loop;
2976       align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
2977     }
2978   if (align_jumps == 0)
2979     {
2980       align_jumps = processor_target_table[ix86_tune].align_jump;
2981       align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
2982     }
2983   if (align_functions == 0)
2984     {
2985       align_functions = processor_target_table[ix86_tune].align_func;
2986     }
2987 
2988   /* Validate -mbranch-cost= value, or provide default.  */
2989   ix86_branch_cost = ix86_cost->branch_cost;
2990   if (ix86_branch_cost_string)
2991     {
2992       i = atoi (ix86_branch_cost_string);
2993       if (i < 0 || i > 5)
2994         error ("%sbranch-cost=%d%s is not between 0 and 5", prefix, i, suffix);
2995       else
2996         ix86_branch_cost = i;
2997     }
2998   if (ix86_section_threshold_string)
2999     {
3000       i = atoi (ix86_section_threshold_string);
3001       if (i < 0)
3002         error ("%slarge-data-threshold=%d%s is negative", prefix, i, suffix);
3003       else
3004         ix86_section_threshold = i;
3005     }
3006 
3007   if (ix86_tls_dialect_string)
3008     {
3009       if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
3010         ix86_tls_dialect = TLS_DIALECT_GNU;
3011       else if (strcmp (ix86_tls_dialect_string, "gnu2") == 0)
3012         ix86_tls_dialect = TLS_DIALECT_GNU2;
3013       else if (strcmp (ix86_tls_dialect_string, "sun") == 0)
3014         ix86_tls_dialect = TLS_DIALECT_SUN;
3015       else
3016         error ("bad value (%s) for %stls-dialect=%s %s",
3017                ix86_tls_dialect_string, prefix, suffix, sw);
3018     }
3019 
3020   if (ix87_precision_string)
3021     {
3022       i = atoi (ix87_precision_string);
3023       if (i != 32 && i != 64 && i != 80)
3024         error ("pc%d is not valid precision setting (32, 64 or 80)", i);
3025     }
3026 
3027   if (TARGET_64BIT)
3028     {
3029       target_flags |= TARGET_SUBTARGET64_DEFAULT & ~target_flags_explicit;
3030 
3031       /* Enable by default the SSE and MMX builtins.  Do allow the user to
3032          explicitly disable any of these.  In particular, disabling SSE and
3033          MMX for kernel code is extremely useful.  */
3034       if (!ix86_arch_specified)
3035       ix86_isa_flags
3036         |= ((OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_MMX
3037              | TARGET_SUBTARGET64_ISA_DEFAULT) & ~ix86_isa_flags_explicit);
3038 
3039       if (TARGET_RTD)
3040         warning (0, "%srtd%s is ignored in 64bit mode", prefix, suffix);
3041     }
3042   else
3043     {
3044       target_flags |= TARGET_SUBTARGET32_DEFAULT & ~target_flags_explicit;
3045 
3046       if (!ix86_arch_specified)
3047       ix86_isa_flags
3048         |= TARGET_SUBTARGET32_ISA_DEFAULT & ~ix86_isa_flags_explicit;
3049 
3050       /* i386 ABI does not specify red zone.  It still makes sense to use it
3051          when programmer takes care to stack from being destroyed.  */
3052       if (!(target_flags_explicit & MASK_NO_RED_ZONE))
3053         target_flags |= MASK_NO_RED_ZONE;
3054     }
3055 
3056   /* Keep nonleaf frame pointers.  */
3057   if (flag_omit_frame_pointer)
3058     target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
3059   else if (TARGET_OMIT_LEAF_FRAME_POINTER)
3060     flag_omit_frame_pointer = 1;
3061 
3062   /* If we're doing fast math, we don't care about comparison order
3063      wrt NaNs.  This lets us use a shorter comparison sequence.  */
3064   if (flag_finite_math_only)
3065     target_flags &= ~MASK_IEEE_FP;
3066 
3067   /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
3068      since the insns won't need emulation.  */
3069   if (x86_arch_always_fancy_math_387 & ix86_arch_mask)
3070     target_flags &= ~MASK_NO_FANCY_MATH_387;
3071 
3072   /* Likewise, if the target doesn't have a 387, or we've specified
3073      software floating point, don't use 387 inline intrinsics.  */
3074   if (!TARGET_80387)
3075     target_flags |= MASK_NO_FANCY_MATH_387;
3076 
3077   /* Turn on MMX builtins for -msse.  */
3078   if (TARGET_SSE)
3079     {
3080       ix86_isa_flags |= OPTION_MASK_ISA_MMX & ~ix86_isa_flags_explicit;
3081       x86_prefetch_sse = true;
3082     }
3083 
3084   /* Turn on popcnt instruction for -msse4.2 or -mabm.  */
3085   if (TARGET_SSE4_2 || TARGET_ABM)
3086     ix86_isa_flags |= OPTION_MASK_ISA_POPCNT & ~ix86_isa_flags_explicit;
3087 
3088   if (!TARGET_64BIT && TARGET_SAVE_ARGS)
3089       error ("-msave-args makes no sense in the 32-bit mode");
3090 
3091   /* Validate -mpreferred-stack-boundary= value or default it to
3092      PREFERRED_STACK_BOUNDARY_DEFAULT.  */
3093   ix86_preferred_stack_boundary = PREFERRED_STACK_BOUNDARY_DEFAULT;
3094   if (ix86_preferred_stack_boundary_string)
3095     {
3096       i = atoi (ix86_preferred_stack_boundary_string);
3097       if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
3098         error ("%spreferred-stack-boundary=%d%s is not between %d and 12",
3099                prefix, i, suffix, TARGET_64BIT ? 4 : 2);
3100       else
3101         ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
3102     }
3103 
3104   /* Set the default value for -mstackrealign.  */
3105   if (ix86_force_align_arg_pointer == -1)
3106     ix86_force_align_arg_pointer = STACK_REALIGN_DEFAULT;
3107 
3108   /* Validate -mincoming-stack-boundary= value or default it to
3109      MIN_STACK_BOUNDARY/PREFERRED_STACK_BOUNDARY.  */
3110   if (ix86_force_align_arg_pointer)
3111     ix86_default_incoming_stack_boundary = MIN_STACK_BOUNDARY;
3112   else
3113     ix86_default_incoming_stack_boundary = PREFERRED_STACK_BOUNDARY;
3114   ix86_incoming_stack_boundary = ix86_default_incoming_stack_boundary;
3115   if (ix86_incoming_stack_boundary_string)
3116     {
3117       i = atoi (ix86_incoming_stack_boundary_string);
3118       if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
3119         error ("-mincoming-stack-boundary=%d is not between %d and 12",
3120                i, TARGET_64BIT ? 4 : 2);
3121       else
3122         {
3123           ix86_user_incoming_stack_boundary = (1 << i) * BITS_PER_UNIT;
3124           ix86_incoming_stack_boundary
3125             = ix86_user_incoming_stack_boundary;
3126         }
3127     }
3128 
3129   /* Accept -msseregparm only if at least SSE support is enabled.  */
3130   if (TARGET_SSEREGPARM
3131       && ! TARGET_SSE)
3132     error ("%ssseregparm%s used without SSE enabled", prefix, suffix);
3133 
3134   ix86_fpmath = TARGET_FPMATH_DEFAULT;
3135   if (ix86_fpmath_string != 0)
3136     {
3137       if (! strcmp (ix86_fpmath_string, "387"))
3138         ix86_fpmath = FPMATH_387;
3139       else if (! strcmp (ix86_fpmath_string, "sse"))
3140         {
3141           if (!TARGET_SSE)
3142             {
3143               warning (0, "SSE instruction set disabled, using 387 arithmetics");
3144               ix86_fpmath = FPMATH_387;
3145             }
3146           else
3147             ix86_fpmath = FPMATH_SSE;
3148         }
3149       else if (! strcmp (ix86_fpmath_string, "387,sse")
3150                || ! strcmp (ix86_fpmath_string, "387+sse")
3151                || ! strcmp (ix86_fpmath_string, "sse,387")
3152                || ! strcmp (ix86_fpmath_string, "sse+387")
3153                || ! strcmp (ix86_fpmath_string, "both"))
3154         {
3155           if (!TARGET_SSE)
3156             {
3157               warning (0, "SSE instruction set disabled, using 387 arithmetics");
3158               ix86_fpmath = FPMATH_387;
3159             }
3160           else if (!TARGET_80387)
3161             {
3162               warning (0, "387 instruction set disabled, using SSE arithmetics");
3163               ix86_fpmath = FPMATH_SSE;
3164             }
3165           else
3166             ix86_fpmath = (enum fpmath_unit) (FPMATH_SSE | FPMATH_387);
3167         }
3168       else
3169         error ("bad value (%s) for %sfpmath=%s %s",
3170                ix86_fpmath_string, prefix, suffix, sw);
3171     }
3172 
3173   /* If the i387 is disabled, then do not return values in it. */
3174   if (!TARGET_80387)
3175     target_flags &= ~MASK_FLOAT_RETURNS;
3176 
3177   /* Use external vectorized library in vectorizing intrinsics.  */
3178   if (ix86_veclibabi_string)
3179     {
3180       if (strcmp (ix86_veclibabi_string, "svml") == 0)
3181         ix86_veclib_handler = ix86_veclibabi_svml;
3182       else if (strcmp (ix86_veclibabi_string, "acml") == 0)
3183         ix86_veclib_handler = ix86_veclibabi_acml;
3184       else
3185         error ("unknown vectorization library ABI type (%s) for "
3186                "%sveclibabi=%s %s", ix86_veclibabi_string,
3187                prefix, suffix, sw);
3188     }
3189 
3190   if ((x86_accumulate_outgoing_args & ix86_tune_mask)
3191       && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
3192       && !optimize_size)
3193     target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
3194 
3195   /* ??? Unwind info is not correct around the CFG unless either a frame
3196      pointer is present or M_A_O_A is set.  Fixing this requires rewriting
3197      unwind info generation to be aware of the CFG and propagating states
3198      around edges.  */
3199   if ((flag_unwind_tables || flag_asynchronous_unwind_tables
3200        || flag_exceptions || flag_non_call_exceptions)
3201       && flag_omit_frame_pointer
3202       && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
3203     {
3204       if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
3205         warning (0, "unwind tables currently require either a frame pointer "
3206                  "or %saccumulate-outgoing-args%s for correctness",
3207                  prefix, suffix);
3208       target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
3209     }
3210 
3211   /* If stack probes are required, the space used for large function
3212      arguments on the stack must also be probed, so enable
3213      -maccumulate-outgoing-args so this happens in the prologue.  */
3214   if (TARGET_STACK_PROBE
3215       && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
3216     {
3217       if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
3218         warning (0, "stack probing requires %saccumulate-outgoing-args%s "
3219                  "for correctness", prefix, suffix);
3220       target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
3221     }
3222 
3223   /* For sane SSE instruction set generation we need fcomi instruction.
3224      It is safe to enable all CMOVE instructions.  */
3225   if (TARGET_SSE)
3226     TARGET_CMOVE = 1;
3227 
3228   /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix.  */
3229   {
3230     char *p;
3231     ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
3232     p = strchr (internal_label_prefix, 'X');
3233     internal_label_prefix_len = p - internal_label_prefix;
3234     *p = '\0';
3235   }
3236 
3237   /* When scheduling description is not available, disable scheduler pass
3238      so it won't slow down the compilation and make x87 code slower.  */
3239   if (!TARGET_SCHEDULE)
3240     flag_schedule_insns_after_reload = flag_schedule_insns = 0;
3241 
3242   if (!PARAM_SET_P (PARAM_SIMULTANEOUS_PREFETCHES))
3243     set_param_value ("simultaneous-prefetches",
3244                      ix86_cost->simultaneous_prefetches);
3245   if (!PARAM_SET_P (PARAM_L1_CACHE_LINE_SIZE))
3246     set_param_value ("l1-cache-line-size", ix86_cost->prefetch_block);
3247   if (!PARAM_SET_P (PARAM_L1_CACHE_SIZE))
3248     set_param_value ("l1-cache-size", ix86_cost->l1_cache_size);
3249   if (!PARAM_SET_P (PARAM_L2_CACHE_SIZE))
3250     set_param_value ("l2-cache-size", ix86_cost->l2_cache_size);
3251 
3252   /* If using typedef char *va_list, signal that __builtin_va_start (&ap, 0)
3253      can be optimized to ap = __builtin_next_arg (0).  */
3254   if (!TARGET_64BIT)
3255     targetm.expand_builtin_va_start = NULL;
3256 
3257   if (TARGET_64BIT)
3258     {
3259       ix86_gen_leave = gen_leave_rex64;
3260       ix86_gen_pop1 = gen_popdi1;
3261       ix86_gen_add3 = gen_adddi3;
3262       ix86_gen_sub3 = gen_subdi3;
3263       ix86_gen_sub3_carry = gen_subdi3_carry_rex64;
3264       ix86_gen_one_cmpl2 = gen_one_cmpldi2;
3265       ix86_gen_monitor = gen_sse3_monitor64;
3266       ix86_gen_andsp = gen_anddi3;
3267     }
3268   else
3269     {
3270       ix86_gen_leave = gen_leave;
3271       ix86_gen_pop1 = gen_popsi1;
3272       ix86_gen_add3 = gen_addsi3;
3273       ix86_gen_sub3 = gen_subsi3;
3274       ix86_gen_sub3_carry = gen_subsi3_carry;
3275       ix86_gen_one_cmpl2 = gen_one_cmplsi2;
3276       ix86_gen_monitor = gen_sse3_monitor;
3277       ix86_gen_andsp = gen_andsi3;
3278     }
3279 
3280 #ifdef USE_IX86_CLD
3281   /* Use -mcld by default for 32-bit code if configured with --enable-cld.  */
3282   if (!TARGET_64BIT)
3283     target_flags |= MASK_CLD & ~target_flags_explicit;
3284 #endif
3285 
3286   /* Save the initial options in case the user does function specific options */
3287   if (main_args_p)
3288     target_option_default_node = target_option_current_node
3289       = build_target_option_node ();
3290 }
3291 
3292 /* Update register usage after having seen the compiler flags.  */
3293 
3294 void
3295 ix86_conditional_register_usage (void)
3296 {
3297   int i;
3298   unsigned int j;
3299 
3300   for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
3301     {
3302       if (fixed_regs[i] > 1)
3303         fixed_regs[i] = (fixed_regs[i] == (TARGET_64BIT ? 3 : 2));
3304       if (call_used_regs[i] > 1)
3305         call_used_regs[i] = (call_used_regs[i] == (TARGET_64BIT ? 3 : 2));
3306     }
3307 
3308   /* The PIC register, if it exists, is fixed.  */
3309   j = PIC_OFFSET_TABLE_REGNUM;
3310   if (j != INVALID_REGNUM)
3311     fixed_regs[j] = call_used_regs[j] = 1;
3312 
3313   /* The MS_ABI changes the set of call-used registers.  */
3314   if (TARGET_64BIT && ix86_cfun_abi () == MS_ABI)
3315     {
3316       call_used_regs[SI_REG] = 0;
3317       call_used_regs[DI_REG] = 0;
3318       call_used_regs[XMM6_REG] = 0;
3319       call_used_regs[XMM7_REG] = 0;
3320       for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
3321         call_used_regs[i] = 0;
3322     }
3323 
3324   /* The default setting of CLOBBERED_REGS is for 32-bit; add in the
3325      other call-clobbered regs for 64-bit.  */
3326   if (TARGET_64BIT)
3327     {
3328       CLEAR_HARD_REG_SET (reg_class_contents[(int)CLOBBERED_REGS]);
3329 
3330       for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
3331         if (TEST_HARD_REG_BIT (reg_class_contents[(int)GENERAL_REGS], i)
3332             && call_used_regs[i])
3333           SET_HARD_REG_BIT (reg_class_contents[(int)CLOBBERED_REGS], i);
3334     }
3335 
3336   /* If MMX is disabled, squash the registers.  */
3337   if (! TARGET_MMX)
3338     for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
3339       if (TEST_HARD_REG_BIT (reg_class_contents[(int)MMX_REGS], i))
3340         fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
3341 
3342   /* If SSE is disabled, squash the registers.  */
3343   if (! TARGET_SSE)
3344     for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
3345       if (TEST_HARD_REG_BIT (reg_class_contents[(int)SSE_REGS], i))
3346         fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
3347 
3348   /* If the FPU is disabled, squash the registers.  */
3349   if (! (TARGET_80387 || TARGET_FLOAT_RETURNS_IN_80387))
3350     for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
3351       if (TEST_HARD_REG_BIT (reg_class_contents[(int)FLOAT_REGS], i))
3352         fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
3353 
3354   /* If 32-bit, squash the 64-bit registers.  */
3355   if (! TARGET_64BIT)
3356     {
3357       for (i = FIRST_REX_INT_REG; i <= LAST_REX_INT_REG; i++)
3358         reg_names[i] = "";
3359       for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
3360         reg_names[i] = "";
3361     }
3362 }
3363 
3364 
3365 /* Save the current options */
3366 
3367 static void
3368 ix86_function_specific_save (struct cl_target_option *ptr)
3369 {
3370   gcc_assert (IN_RANGE (ix86_arch, 0, 255));
3371   gcc_assert (IN_RANGE (ix86_schedule, 0, 255));
3372   gcc_assert (IN_RANGE (ix86_tune, 0, 255));
3373   gcc_assert (IN_RANGE (ix86_fpmath, 0, 255));
3374   gcc_assert (IN_RANGE (ix86_branch_cost, 0, 255));
3375 
3376   ptr->arch = ix86_arch;
3377   ptr->schedule = ix86_schedule;
3378   ptr->tune = ix86_tune;
3379   ptr->fpmath = ix86_fpmath;
3380   ptr->branch_cost = ix86_branch_cost;
3381   ptr->tune_defaulted = ix86_tune_defaulted;
3382   ptr->arch_specified = ix86_arch_specified;
3383   ptr->ix86_isa_flags_explicit = ix86_isa_flags_explicit;
3384   ptr->target_flags_explicit = target_flags_explicit;
3385 }
3386 
3387 /* Restore the current options */
3388 
3389 static void
3390 ix86_function_specific_restore (struct cl_target_option *ptr)
3391 {
3392   enum processor_type old_tune = ix86_tune;
3393   enum processor_type old_arch = ix86_arch;
3394   unsigned int ix86_arch_mask, ix86_tune_mask;
3395   int i;
3396 
3397   ix86_arch = ptr->arch;
3398   ix86_schedule = ptr->schedule;
3399   ix86_tune = ptr->tune;
3400   ix86_fpmath = ptr->fpmath;
3401   ix86_branch_cost = ptr->branch_cost;
3402   ix86_tune_defaulted = ptr->tune_defaulted;
3403   ix86_arch_specified = ptr->arch_specified;
3404   ix86_isa_flags_explicit = ptr->ix86_isa_flags_explicit;
3405   target_flags_explicit = ptr->target_flags_explicit;
3406 
3407   /* Recreate the arch feature tests if the arch changed */
3408   if (old_arch != ix86_arch)
3409     {
3410       ix86_arch_mask = 1u << ix86_arch;
3411       for (i = 0; i < X86_ARCH_LAST; ++i)
3412         ix86_arch_features[i]
3413           = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
3414     }
3415 
3416   /* Recreate the tune optimization tests */
3417   if (old_tune != ix86_tune)
3418     {
3419       ix86_tune_mask = 1u << ix86_tune;
3420       for (i = 0; i < X86_TUNE_LAST; ++i)
3421         ix86_tune_features[i]
3422           = !!(initial_ix86_tune_features[i] & ix86_tune_mask);
3423     }
3424 }
3425 
3426 /* Print the current options */
3427 
3428 static void
3429 ix86_function_specific_print (FILE *file, int indent,
3430                               struct cl_target_option *ptr)
3431 {
3432   char *target_string
3433     = ix86_target_string (ptr->ix86_isa_flags, ptr->target_flags,
3434                           NULL, NULL, NULL, false);
3435 
3436   fprintf (file, "%*sarch = %d (%s)\n",
3437            indent, "",
3438            ptr->arch,
3439            ((ptr->arch < TARGET_CPU_DEFAULT_max)
3440             ? cpu_names[ptr->arch]
3441             : "<unknown>"));
3442 
3443   fprintf (file, "%*stune = %d (%s)\n",
3444            indent, "",
3445            ptr->tune,
3446            ((ptr->tune < TARGET_CPU_DEFAULT_max)
3447             ? cpu_names[ptr->tune]
3448             : "<unknown>"));
3449 
3450   fprintf (file, "%*sfpmath = %d%s%s\n", indent, "", ptr->fpmath,
3451            (ptr->fpmath & FPMATH_387) ? ", 387" : "",
3452            (ptr->fpmath & FPMATH_SSE) ? ", sse" : "");
3453   fprintf (file, "%*sbranch_cost = %d\n", indent, "", ptr->branch_cost);
3454 
3455   if (target_string)
3456     {
3457       fprintf (file, "%*s%s\n", indent, "", target_string);
3458       free (target_string);
3459     }
3460 }
3461 
3462 
3463 /* Inner function to process the attribute((target(...))), take an argument and
3464    set the current options from the argument. If we have a list, recursively go
3465    over the list.  */
3466 
3467 static bool
3468 ix86_valid_target_attribute_inner_p (tree args, char *p_strings[])
3469 {
3470   char *next_optstr;
3471   bool ret = true;
3472 
3473 #define IX86_ATTR_ISA(S,O)   { S, sizeof (S)-1, ix86_opt_isa, O, 0 }
3474 #define IX86_ATTR_STR(S,O)   { S, sizeof (S)-1, ix86_opt_str, O, 0 }
3475 #define IX86_ATTR_YES(S,O,M) { S, sizeof (S)-1, ix86_opt_yes, O, M }
3476 #define IX86_ATTR_NO(S,O,M)  { S, sizeof (S)-1, ix86_opt_no,  O, M }
3477 
3478   enum ix86_opt_type
3479   {
3480     ix86_opt_unknown,
3481     ix86_opt_yes,
3482     ix86_opt_no,
3483     ix86_opt_str,
3484     ix86_opt_isa
3485   };
3486 
3487   static const struct
3488   {
3489     const char *string;
3490     size_t len;
3491     enum ix86_opt_type type;
3492     int opt;
3493     int mask;
3494   } attrs[] = {
3495     /* isa options */
3496     IX86_ATTR_ISA ("3dnow",     OPT_m3dnow),
3497     IX86_ATTR_ISA ("abm",       OPT_mabm),
3498     IX86_ATTR_ISA ("aes",       OPT_maes),
3499     IX86_ATTR_ISA ("avx",       OPT_mavx),
3500     IX86_ATTR_ISA ("mmx",       OPT_mmmx),
3501     IX86_ATTR_ISA ("pclmul",    OPT_mpclmul),
3502     IX86_ATTR_ISA ("popcnt",    OPT_mpopcnt),
3503     IX86_ATTR_ISA ("sse",       OPT_msse),
3504     IX86_ATTR_ISA ("sse2",      OPT_msse2),
3505     IX86_ATTR_ISA ("sse3",      OPT_msse3),
3506     IX86_ATTR_ISA ("sse4",      OPT_msse4),
3507     IX86_ATTR_ISA ("sse4.1",    OPT_msse4_1),
3508     IX86_ATTR_ISA ("sse4.2",    OPT_msse4_2),
3509     IX86_ATTR_ISA ("sse4a",     OPT_msse4a),
3510     IX86_ATTR_ISA ("sse5",      OPT_msse5),
3511     IX86_ATTR_ISA ("ssse3",     OPT_mssse3),
3512 
3513     /* string options */
3514     IX86_ATTR_STR ("arch=",     IX86_FUNCTION_SPECIFIC_ARCH),
3515     IX86_ATTR_STR ("fpmath=",   IX86_FUNCTION_SPECIFIC_FPMATH),
3516     IX86_ATTR_STR ("tune=",     IX86_FUNCTION_SPECIFIC_TUNE),
3517 
3518     /* flag options */
3519     IX86_ATTR_YES ("cld",
3520                    OPT_mcld,
3521                    MASK_CLD),
3522 
3523     IX86_ATTR_NO ("fancy-math-387",
3524                   OPT_mfancy_math_387,
3525                   MASK_NO_FANCY_MATH_387),
3526 
3527     IX86_ATTR_NO ("fused-madd",
3528                   OPT_mfused_madd,
3529                   MASK_NO_FUSED_MADD),
3530 
3531     IX86_ATTR_YES ("ieee-fp",
3532                    OPT_mieee_fp,
3533                    MASK_IEEE_FP),
3534 
3535     IX86_ATTR_YES ("inline-all-stringops",
3536                    OPT_minline_all_stringops,
3537                    MASK_INLINE_ALL_STRINGOPS),
3538 
3539     IX86_ATTR_YES ("inline-stringops-dynamically",
3540                    OPT_minline_stringops_dynamically,
3541                    MASK_INLINE_STRINGOPS_DYNAMICALLY),
3542 
3543     IX86_ATTR_NO ("align-stringops",
3544                   OPT_mno_align_stringops,
3545                   MASK_NO_ALIGN_STRINGOPS),
3546 
3547     IX86_ATTR_YES ("recip",
3548                    OPT_mrecip,
3549                    MASK_RECIP),
3550 
3551   };
3552 
3553   /* If this is a list, recurse to get the options.  */
3554   if (TREE_CODE (args) == TREE_LIST)
3555     {
3556       bool ret = true;
3557 
3558       for (; args; args = TREE_CHAIN (args))
3559         if (TREE_VALUE (args)
3560             && !ix86_valid_target_attribute_inner_p (TREE_VALUE (args), p_strings))
3561           ret = false;
3562 
3563       return ret;
3564     }
3565 
3566   else if (TREE_CODE (args) != STRING_CST)
3567     gcc_unreachable ();
3568 
3569   /* Handle multiple arguments separated by commas.  */
3570   next_optstr = ASTRDUP (TREE_STRING_POINTER (args));
3571 
3572   while (next_optstr && *next_optstr != '\0')
3573     {
3574       char *p = next_optstr;
3575       char *orig_p = p;
3576       char *comma = strchr (next_optstr, ',');
3577       const char *opt_string;
3578       size_t len, opt_len;
3579       int opt;
3580       bool opt_set_p;
3581       char ch;
3582       unsigned i;
3583       enum ix86_opt_type type = ix86_opt_unknown;
3584       int mask = 0;
3585 
3586       if (comma)
3587         {
3588           *comma = '\0';
3589           len = comma - next_optstr;
3590           next_optstr = comma + 1;
3591         }
3592       else
3593         {
3594           len = strlen (p);
3595           next_optstr = NULL;
3596         }
3597 
3598       /* Recognize no-xxx.  */
3599       if (len > 3 && p[0] == 'n' && p[1] == 'o' && p[2] == '-')
3600         {
3601           opt_set_p = false;
3602           p += 3;
3603           len -= 3;
3604         }
3605       else
3606         opt_set_p = true;
3607 
3608       /* Find the option.  */
3609       ch = *p;
3610       opt = N_OPTS;
3611       for (i = 0; i < ARRAY_SIZE (attrs); i++)
3612         {
3613           type = attrs[i].type;
3614           opt_len = attrs[i].len;
3615           if (ch == attrs[i].string[0]
3616               && ((type != ix86_opt_str) ? len == opt_len : len > opt_len)
3617               && memcmp (p, attrs[i].string, opt_len) == 0)
3618             {
3619               opt = attrs[i].opt;
3620               mask = attrs[i].mask;
3621               opt_string = attrs[i].string;
3622               break;
3623             }
3624         }
3625 
3626       /* Process the option.  */
3627       if (opt == N_OPTS)
3628         {
3629           error ("attribute(target(\"%s\")) is unknown", orig_p);
3630           ret = false;
3631         }
3632 
3633       else if (type == ix86_opt_isa)
3634         ix86_handle_option (opt, p, opt_set_p);
3635 
3636       else if (type == ix86_opt_yes || type == ix86_opt_no)
3637         {
3638           if (type == ix86_opt_no)
3639             opt_set_p = !opt_set_p;
3640 
3641           if (opt_set_p)
3642             target_flags |= mask;
3643           else
3644             target_flags &= ~mask;
3645         }
3646 
3647       else if (type == ix86_opt_str)
3648         {
3649           if (p_strings[opt])
3650             {
3651               error ("option(\"%s\") was already specified", opt_string);
3652               ret = false;
3653             }
3654           else
3655             p_strings[opt] = xstrdup (p + opt_len);
3656         }
3657 
3658       else
3659         gcc_unreachable ();
3660     }
3661 
3662   return ret;
3663 }
3664 
3665 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL.  */
3666 
3667 tree
3668 ix86_valid_target_attribute_tree (tree args)
3669 {
3670   const char *orig_arch_string = ix86_arch_string;
3671   const char *orig_tune_string = ix86_tune_string;
3672   const char *orig_fpmath_string = ix86_fpmath_string;
3673   int orig_tune_defaulted = ix86_tune_defaulted;
3674   int orig_arch_specified = ix86_arch_specified;
3675   char *option_strings[IX86_FUNCTION_SPECIFIC_MAX] = { NULL, NULL, NULL };
3676   tree t = NULL_TREE;
3677   int i;
3678   struct cl_target_option *def
3679     = TREE_TARGET_OPTION (target_option_default_node);
3680 
3681   /* Process each of the options on the chain.  */
3682   if (! ix86_valid_target_attribute_inner_p (args, option_strings))
3683     return NULL_TREE;
3684 
3685   /* If the changed options are different from the default, rerun override_options,
3686      and then save the options away.  The string options are are attribute options,
3687      and will be undone when we copy the save structure.  */
3688   if (ix86_isa_flags != def->ix86_isa_flags
3689       || target_flags != def->target_flags
3690       || option_strings[IX86_FUNCTION_SPECIFIC_ARCH]
3691       || option_strings[IX86_FUNCTION_SPECIFIC_TUNE]
3692       || option_strings[IX86_FUNCTION_SPECIFIC_FPMATH])
3693     {
3694       /* If we are using the default tune= or arch=, undo the string assigned,
3695          and use the default.  */
3696       if (option_strings[IX86_FUNCTION_SPECIFIC_ARCH])
3697         ix86_arch_string = option_strings[IX86_FUNCTION_SPECIFIC_ARCH];
3698       else if (!orig_arch_specified)
3699         ix86_arch_string = NULL;
3700 
3701       if (option_strings[IX86_FUNCTION_SPECIFIC_TUNE])
3702         ix86_tune_string = option_strings[IX86_FUNCTION_SPECIFIC_TUNE];
3703       else if (orig_tune_defaulted)
3704         ix86_tune_string = NULL;
3705 
3706       /* If fpmath= is not set, and we now have sse2 on 32-bit, use it.  */
3707       if (option_strings[IX86_FUNCTION_SPECIFIC_FPMATH])
3708         ix86_fpmath_string = option_strings[IX86_FUNCTION_SPECIFIC_FPMATH];
3709       else if (!TARGET_64BIT && TARGET_SSE)
3710         ix86_fpmath_string = "sse,387";
3711 
3712       /* Do any overrides, such as arch=xxx, or tune=xxx support.  */
3713       override_options (false);
3714 
3715       /* Add any builtin functions with the new isa if any.  */
3716       ix86_add_new_builtins (ix86_isa_flags);
3717 
3718       /* Save the current options unless we are validating options for
3719          #pragma.  */
3720       t = build_target_option_node ();
3721 
3722       ix86_arch_string = orig_arch_string;
3723       ix86_tune_string = orig_tune_string;
3724       ix86_fpmath_string = orig_fpmath_string;
3725 
3726       /* Free up memory allocated to hold the strings */
3727       for (i = 0; i < IX86_FUNCTION_SPECIFIC_MAX; i++)
3728         if (option_strings[i])
3729           free (option_strings[i]);
3730     }
3731 
3732   return t;
3733 }
3734 
3735 /* Hook to validate attribute((target("string"))).  */
3736 
3737 static bool
3738 ix86_valid_target_attribute_p (tree fndecl,
3739                                tree ARG_UNUSED (name),
3740                                tree args,
3741                                int ARG_UNUSED (flags))
3742 {
3743   struct cl_target_option cur_target;
3744   bool ret = true;
3745   tree old_optimize = build_optimization_node ();
3746   tree new_target, new_optimize;
3747   tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
3748 
3749   /* If the function changed the optimization levels as well as setting target
3750      options, start with the optimizations specified.  */
3751   if (func_optimize && func_optimize != old_optimize)
3752     cl_optimization_restore (TREE_OPTIMIZATION (func_optimize));
3753 
3754   /* The target attributes may also change some optimization flags, so update
3755      the optimization options if necessary.  */
3756   cl_target_option_save (&cur_target);
3757   new_target = ix86_valid_target_attribute_tree (args);
3758   new_optimize = build_optimization_node ();
3759 
3760   if (!new_target)
3761     ret = false;
3762 
3763   else if (fndecl)
3764     {
3765       DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
3766 
3767       if (old_optimize != new_optimize)
3768         DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
3769     }
3770 
3771   cl_target_option_restore (&cur_target);
3772 
3773   if (old_optimize != new_optimize)
3774     cl_optimization_restore (TREE_OPTIMIZATION (old_optimize));
3775 
3776   return ret;
3777 }
3778 
3779 
3780 /* Hook to determine if one function can safely inline another.  */
3781 
3782 static bool
3783 ix86_can_inline_p (tree caller, tree callee)
3784 {
3785   bool ret = false;
3786   tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
3787   tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
3788 
3789   /* If callee has no option attributes, then it is ok to inline.  */
3790   if (!callee_tree)
3791     ret = true;
3792 
3793   /* If caller has no option attributes, but callee does then it is not ok to
3794      inline.  */
3795   else if (!caller_tree)
3796     ret = false;
3797 
3798   else
3799     {
3800       struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree);
3801       struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
3802 
3803       /* Callee's isa options should a subset of the caller's, i.e. a SSE5 function
3804          can inline a SSE2 function but a SSE2 function can't inline a SSE5
3805          function.  */
3806       if ((caller_opts->ix86_isa_flags & callee_opts->ix86_isa_flags)
3807           != callee_opts->ix86_isa_flags)
3808         ret = false;
3809 
3810       /* See if we have the same non-isa options.  */
3811       else if (caller_opts->target_flags != callee_opts->target_flags)
3812         ret = false;
3813 
3814       /* See if arch, tune, etc. are the same.  */
3815       else if (caller_opts->arch != callee_opts->arch)
3816         ret = false;
3817 
3818       else if (caller_opts->tune != callee_opts->tune)
3819         ret = false;
3820 
3821       else if (caller_opts->fpmath != callee_opts->fpmath)
3822         ret = false;
3823 
3824       else if (caller_opts->branch_cost != callee_opts->branch_cost)
3825         ret = false;
3826 
3827       else
3828         ret = true;
3829     }
3830 
3831   return ret;
3832 }
3833 
3834 
3835 /* Remember the last target of ix86_set_current_function.  */
3836 static GTY(()) tree ix86_previous_fndecl;
3837 
3838 /* Establish appropriate back-end context for processing the function
3839    FNDECL.  The argument might be NULL to indicate processing at top
3840    level, outside of any function scope.  */
3841 static void
3842 ix86_set_current_function (tree fndecl)
3843 {
3844   /* Only change the context if the function changes.  This hook is called
3845      several times in the course of compiling a function, and we don't want to
3846      slow things down too much or call target_reinit when it isn't safe.  */
3847   if (fndecl && fndecl != ix86_previous_fndecl)
3848     {
3849       tree old_tree = (ix86_previous_fndecl
3850                        ? DECL_FUNCTION_SPECIFIC_TARGET (ix86_previous_fndecl)
3851                        : NULL_TREE);
3852 
3853       tree new_tree = (fndecl
3854                        ? DECL_FUNCTION_SPECIFIC_TARGET (fndecl)
3855                        : NULL_TREE);
3856 
3857       ix86_previous_fndecl = fndecl;
3858       if (old_tree == new_tree)
3859         ;
3860 
3861       else if (new_tree)
3862         {
3863           cl_target_option_restore (TREE_TARGET_OPTION (new_tree));
3864           target_reinit ();
3865         }
3866 
3867       else if (old_tree)
3868         {
3869           struct cl_target_option *def
3870             = TREE_TARGET_OPTION (target_option_current_node);
3871 
3872           cl_target_option_restore (def);
3873           target_reinit ();
3874         }
3875     }
3876 }
3877 
3878 
3879 /* Return true if this goes in large data/bss.  */
3880 
3881 static bool
3882 ix86_in_large_data_p (tree exp)
3883 {
3884   if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
3885     return false;
3886 
3887   /* Functions are never large data.  */
3888   if (TREE_CODE (exp) == FUNCTION_DECL)
3889     return false;
3890 
3891   if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
3892     {
3893       const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
3894       if (strcmp (section, ".ldata") == 0
3895           || strcmp (section, ".lbss") == 0)
3896         return true;
3897       return false;
3898     }
3899   else
3900     {
3901       HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
3902 
3903       /* If this is an incomplete type with size 0, then we can't put it
3904          in data because it might be too big when completed.  */
3905       if (!size || size > ix86_section_threshold)
3906         return true;
3907     }
3908 
3909   return false;
3910 }
3911 
3912 /* Switch to the appropriate section for output of DECL.
3913    DECL is either a `VAR_DECL' node or a constant of some sort.
3914    RELOC indicates whether forming the initial value of DECL requires
3915    link-time relocations.  */
3916 
3917 static section * x86_64_elf_select_section (tree, int, unsigned HOST_WIDE_INT)
3918         ATTRIBUTE_UNUSED;
3919 
3920 static section *
3921 x86_64_elf_select_section (tree decl, int reloc,
3922                            unsigned HOST_WIDE_INT align)
3923 {
3924   if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
3925       && ix86_in_large_data_p (decl))
3926     {
3927       const char *sname = NULL;
3928       unsigned int flags = SECTION_WRITE;
3929       switch (categorize_decl_for_section (decl, reloc))
3930         {
3931         case SECCAT_DATA:
3932           sname = ".ldata";
3933           break;
3934         case SECCAT_DATA_REL:
3935           sname = ".ldata.rel";
3936           break;
3937         case SECCAT_DATA_REL_LOCAL:
3938           sname = ".ldata.rel.local";
3939           break;
3940         case SECCAT_DATA_REL_RO:
3941           sname = ".ldata.rel.ro";
3942           break;
3943         case SECCAT_DATA_REL_RO_LOCAL:
3944           sname = ".ldata.rel.ro.local";
3945           break;
3946         case SECCAT_BSS:
3947           sname = ".lbss";
3948           flags |= SECTION_BSS;
3949           break;
3950         case SECCAT_RODATA:
3951         case SECCAT_RODATA_MERGE_STR:
3952         case SECCAT_RODATA_MERGE_STR_INIT:
3953         case SECCAT_RODATA_MERGE_CONST:
3954           sname = ".lrodata";
3955           flags = 0;
3956           break;
3957         case SECCAT_SRODATA:
3958         case SECCAT_SDATA:
3959         case SECCAT_SBSS:
3960           gcc_unreachable ();
3961         case SECCAT_TEXT:
3962         case SECCAT_TDATA:
3963         case SECCAT_TBSS:
3964           /* We don't split these for medium model.  Place them into
3965              default sections and hope for best.  */
3966           break;
3967         case SECCAT_EMUTLS_VAR:
3968         case SECCAT_EMUTLS_TMPL:
3969           gcc_unreachable ();
3970         }
3971       if (sname)
3972         {
3973           /* We might get called with string constants, but get_named_section
3974              doesn't like them as they are not DECLs.  Also, we need to set
3975              flags in that case.  */
3976           if (!DECL_P (decl))
3977             return get_section (sname, flags, NULL);
3978           return get_named_section (decl, sname, reloc);
3979         }
3980     }
3981   return default_elf_select_section (decl, reloc, align);
3982 }
3983 
3984 /* Build up a unique section name, expressed as a
3985    STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
3986    RELOC indicates whether the initial value of EXP requires
3987    link-time relocations.  */
3988 
3989 static void ATTRIBUTE_UNUSED
3990 x86_64_elf_unique_section (tree decl, int reloc)
3991 {
3992   if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
3993       && ix86_in_large_data_p (decl))
3994     {
3995       const char *prefix = NULL;
3996       /* We only need to use .gnu.linkonce if we don't have COMDAT groups.  */
3997       bool one_only = DECL_ONE_ONLY (decl) && !HAVE_COMDAT_GROUP;
3998 
3999       switch (categorize_decl_for_section (decl, reloc))
4000         {
4001         case SECCAT_DATA:
4002         case SECCAT_DATA_REL:
4003         case SECCAT_DATA_REL_LOCAL:
4004         case SECCAT_DATA_REL_RO:
4005         case SECCAT_DATA_REL_RO_LOCAL:
4006           prefix = one_only ? ".ld" : ".ldata";
4007           break;
4008         case SECCAT_BSS:
4009           prefix = one_only ? ".lb" : ".lbss";
4010           break;
4011         case SECCAT_RODATA:
4012         case SECCAT_RODATA_MERGE_STR:
4013         case SECCAT_RODATA_MERGE_STR_INIT:
4014         case SECCAT_RODATA_MERGE_CONST:
4015           prefix = one_only ? ".lr" : ".lrodata";
4016           break;
4017         case SECCAT_SRODATA:
4018         case SECCAT_SDATA:
4019         case SECCAT_SBSS:
4020           gcc_unreachable ();
4021         case SECCAT_TEXT:
4022         case SECCAT_TDATA:
4023         case SECCAT_TBSS:
4024           /* We don't split these for medium model.  Place them into
4025              default sections and hope for best.  */
4026           break;
4027         case SECCAT_EMUTLS_VAR:
4028           prefix = targetm.emutls.var_section;
4029           break;
4030         case SECCAT_EMUTLS_TMPL:
4031           prefix = targetm.emutls.tmpl_section;
4032           break;
4033         }
4034       if (prefix)
4035         {
4036           const char *name, *linkonce;
4037           char *string;
4038 
4039           name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
4040           name = targetm.strip_name_encoding (name);
4041           
4042           /* If we're using one_only, then there needs to be a .gnu.linkonce
4043              prefix to the section name.  */
4044           linkonce = one_only ? ".gnu.linkonce" : "";
4045   
4046           string = ACONCAT ((linkonce, prefix, ".", name, NULL));
4047           
4048           DECL_SECTION_NAME (decl) = build_string (strlen (string), string);
4049           return;
4050         }
4051     }
4052   default_unique_section (decl, reloc);
4053 }
4054 
4055 #ifdef COMMON_ASM_OP
4056 /* This says how to output assembler code to declare an
4057    uninitialized external linkage data object.
4058 
4059    For medium model x86-64 we need to use .largecomm opcode for
4060    large objects.  */
4061 void
4062 x86_elf_aligned_common (FILE *file,
4063                         const char *name, unsigned HOST_WIDE_INT size,
4064                         int align)
4065 {
4066   if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
4067       && size > (unsigned int)ix86_section_threshold)
4068     fprintf (file, ".largecomm\t");
4069   else
4070     fprintf (file, "%s", COMMON_ASM_OP);
4071   assemble_name (file, name);
4072   fprintf (file, ","HOST_WIDE_INT_PRINT_UNSIGNED",%u\n",
4073            size, align / BITS_PER_UNIT);
4074 }
4075 #endif
4076 
4077 /* Utility function for targets to use in implementing
4078    ASM_OUTPUT_ALIGNED_BSS.  */
4079 
4080 void
4081 x86_output_aligned_bss (FILE *file, tree decl ATTRIBUTE_UNUSED,
4082                         const char *name, unsigned HOST_WIDE_INT size,
4083                         int align)
4084 {
4085   if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
4086       && size > (unsigned int)ix86_section_threshold)
4087     switch_to_section (get_named_section (decl, ".lbss", 0));
4088   else
4089     switch_to_section (bss_section);
4090   ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
4091 #ifdef ASM_DECLARE_OBJECT_NAME
4092   last_assemble_variable_decl = decl;
4093   ASM_DECLARE_OBJECT_NAME (file, name, decl);
4094 #else
4095   /* Standard thing is just output label for the object.  */
4096   ASM_OUTPUT_LABEL (file, name);
4097 #endif /* ASM_DECLARE_OBJECT_NAME */
4098   ASM_OUTPUT_SKIP (file, size ? size : 1);
4099 }
4100 
4101 void
4102 optimization_options (int level, int size ATTRIBUTE_UNUSED)
4103 {
4104   /* For -O2 and beyond, turn off -fschedule-insns by default.  It tends to
4105      make the problem with not enough registers even worse.  */
4106 #ifdef INSN_SCHEDULING
4107   if (level > 1)
4108     flag_schedule_insns = 0;
4109 #endif
4110 
4111   if (TARGET_MACHO)
4112     /* The Darwin libraries never set errno, so we might as well
4113        avoid calling them when that's the only reason we would.  */
4114     flag_errno_math = 0;
4115 
4116   /* The default values of these switches depend on the TARGET_64BIT
4117      that is not known at this moment.  Mark these values with 2 and
4118      let user the to override these.  In case there is no command line option
4119      specifying them, we will set the defaults in override_options.  */
4120   if (optimize >= 1)
4121     flag_omit_frame_pointer = 2;
4122   flag_pcc_struct_return = 2;
4123   flag_asynchronous_unwind_tables = 2;
4124   flag_vect_cost_model = 1;
4125 #ifdef SUBTARGET_OPTIMIZATION_OPTIONS
4126   SUBTARGET_OPTIMIZATION_OPTIONS;
4127 #endif
4128 }
4129 
4130 /* Decide whether we can make a sibling call to a function.  DECL is the
4131    declaration of the function being targeted by the call and EXP is the
4132    CALL_EXPR representing the call.  */
4133 
4134 static bool
4135 ix86_function_ok_for_sibcall (tree decl, tree exp)
4136 {
4137   tree type, decl_or_type;
4138   rtx a, b;
4139 
4140   /* If we are generating position-independent code, we cannot sibcall
4141      optimize any indirect call, or a direct call to a global function,
4142      as the PLT requires %ebx be live.  */
4143   if (!TARGET_64BIT && flag_pic && (!decl || !targetm.binds_local_p (decl)))
4144     return false;
4145 
4146   /* If we need to align the outgoing stack, then sibcalling would
4147      unalign the stack, which may break the called function.  */
4148   if (ix86_incoming_stack_boundary < PREFERRED_STACK_BOUNDARY)
4149     return false;
4150 
4151   if (decl)
4152     {
4153       decl_or_type = decl;
4154       type = TREE_TYPE (decl);
4155     }
4156   else
4157     {
4158       /* We're looking at the CALL_EXPR, we need the type of the function.  */
4159       type = CALL_EXPR_FN (exp);                /* pointer expression */
4160       type = TREE_TYPE (type);                  /* pointer type */
4161       type = TREE_TYPE (type);                  /* function type */
4162       decl_or_type = type;
4163     }
4164 
4165   /* Check that the return value locations are the same.  Like
4166      if we are returning floats on the 80387 register stack, we cannot
4167      make a sibcall from a function that doesn't return a float to a
4168      function that does or, conversely, from a function that does return
4169      a float to a function that doesn't; the necessary stack adjustment
4170      would not be executed.  This is also the place we notice
4171      differences in the return value ABI.  Note that it is ok for one
4172      of the functions to have void return type as long as the return
4173      value of the other is passed in a register.  */
4174   a = ix86_function_value (TREE_TYPE (exp), decl_or_type, false);
4175   b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
4176                            cfun->decl, false);
4177   if (STACK_REG_P (a) || STACK_REG_P (b))
4178     {
4179       if (!rtx_equal_p (a, b))
4180         return false;
4181     }
4182   else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
4183     ;
4184   else if (!rtx_equal_p (a, b))
4185     return false;
4186 
4187   if (TARGET_64BIT)
4188     {
4189       /* The SYSV ABI has more call-clobbered registers;
4190          disallow sibcalls from MS to SYSV.  */
4191       if (cfun->machine->call_abi == MS_ABI
4192           && ix86_function_type_abi (type) == SYSV_ABI)
4193         return false;
4194     }
4195   else
4196     {
4197       /* If this call is indirect, we'll need to be able to use a
4198          call-clobbered register for the address of the target function.
4199          Make sure that all such registers are not used for passing
4200          parameters.  Note that DLLIMPORT functions are indirect.  */
4201       if (!decl
4202           || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && DECL_DLLIMPORT_P (decl)))
4203         {
4204           if (ix86_function_regparm (type, NULL) >= 3)
4205             {
4206               /* ??? Need to count the actual number of registers to be used,
4207                  not the possible number of registers.  Fix later.  */
4208               return false;
4209             }
4210         }
4211     }
4212 
4213   /* Otherwise okay.  That also includes certain types of indirect calls.  */
4214   return true;
4215 }
4216 
4217 /* Handle "cdecl", "stdcall", "fastcall", "regparm" and "sseregparm"
4218    calling convention attributes;
4219    arguments as in struct attribute_spec.handler.  */
4220 
4221 static tree
4222 ix86_handle_cconv_attribute (tree *node, tree name,
4223                                    tree args,
4224                                    int flags ATTRIBUTE_UNUSED,
4225                                    bool *no_add_attrs)
4226 {
4227   if (TREE_CODE (*node) != FUNCTION_TYPE
4228       && TREE_CODE (*node) != METHOD_TYPE
4229       && TREE_CODE (*node) != FIELD_DECL
4230       && TREE_CODE (*node) != TYPE_DECL)
4231     {
4232       warning (OPT_Wattributes, "%qs attribute only applies to functions",
4233                IDENTIFIER_POINTER (name));
4234       *no_add_attrs = true;
4235       return NULL_TREE;
4236     }
4237 
4238   /* Can combine regparm with all attributes but fastcall.  */
4239   if (is_attribute_p ("regparm", name))
4240     {
4241       tree cst;
4242 
4243       if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
4244         {
4245           error ("fastcall and regparm attributes are not compatible");
4246         }
4247 
4248       cst = TREE_VALUE (args);
4249       if (TREE_CODE (cst) != INTEGER_CST)
4250         {
4251           warning (OPT_Wattributes,
4252                    "%qs attribute requires an integer constant argument",
4253                    IDENTIFIER_POINTER (name));
4254           *no_add_attrs = true;
4255         }
4256       else if (compare_tree_int (cst, REGPARM_MAX) > 0)
4257         {
4258           warning (OPT_Wattributes, "argument to %qs attribute larger than %d",
4259                    IDENTIFIER_POINTER (name), REGPARM_MAX);
4260           *no_add_attrs = true;
4261         }
4262 
4263       return NULL_TREE;
4264     }
4265 
4266   if (TARGET_64BIT)
4267     {
4268       /* Do not warn when emulating the MS ABI.  */
4269       if (TREE_CODE (*node) != FUNCTION_TYPE || ix86_function_type_abi (*node)!=MS_ABI)
4270         warning (OPT_Wattributes, "%qs attribute ignored",
4271                  IDENTIFIER_POINTER (name));
4272       *no_add_attrs = true;
4273       return NULL_TREE;
4274     }
4275 
4276   /* Can combine fastcall with stdcall (redundant) and sseregparm.  */
4277   if (is_attribute_p ("fastcall", name))
4278     {
4279       if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
4280         {
4281           error ("fastcall and cdecl attributes are not compatible");
4282         }
4283       if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
4284         {
4285           error ("fastcall and stdcall attributes are not compatible");
4286         }
4287       if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
4288         {
4289           error ("fastcall and regparm attributes are not compatible");
4290         }
4291     }
4292 
4293   /* Can combine stdcall with fastcall (redundant), regparm and
4294      sseregparm.  */
4295   else if (is_attribute_p ("stdcall", name))
4296     {
4297       if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
4298         {
4299           error ("stdcall and cdecl attributes are not compatible");
4300         }
4301       if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
4302         {
4303           error ("stdcall and fastcall attributes are not compatible");
4304         }
4305     }
4306 
4307   /* Can combine cdecl with regparm and sseregparm.  */
4308   else if (is_attribute_p ("cdecl", name))
4309     {
4310       if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
4311         {
4312           error ("stdcall and cdecl attributes are not compatible");
4313         }
4314       if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
4315         {
4316           error ("fastcall and cdecl attributes are not compatible");
4317         }
4318     }
4319 
4320   /* Can combine sseregparm with all attributes.  */
4321 
4322   return NULL_TREE;
4323 }
4324 
4325 /* Return 0 if the attributes for two types are incompatible, 1 if they
4326    are compatible, and 2 if they are nearly compatible (which causes a
4327    warning to be generated).  */
4328 
4329 static int
4330 ix86_comp_type_attributes (const_tree type1, const_tree type2)
4331 {
4332   /* Check for mismatch of non-default calling convention.  */
4333   const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
4334 
4335   if (TREE_CODE (type1) != FUNCTION_TYPE
4336       && TREE_CODE (type1) != METHOD_TYPE)
4337     return 1;
4338 
4339   /* Check for mismatched fastcall/regparm types.  */
4340   if ((!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1))
4341        != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2)))
4342       || (ix86_function_regparm (type1, NULL)
4343           != ix86_function_regparm (type2, NULL)))
4344     return 0;
4345 
4346   /* Check for mismatched sseregparm types.  */
4347   if (!lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type1))
4348       != !lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type2)))
4349     return 0;
4350 
4351   /* Check for mismatched return types (cdecl vs stdcall).  */
4352   if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
4353       != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
4354     return 0;
4355 
4356   return 1;
4357 }
4358 
4359 /* Return the regparm value for a function with the indicated TYPE and DECL.
4360    DECL may be NULL when calling function indirectly
4361    or considering a libcall.  */
4362 
4363 static int
4364 ix86_function_regparm (const_tree type, const_tree decl)
4365 {
4366   tree attr;
4367   int regparm;
4368 
4369   static bool error_issued;
4370 
4371   if (TARGET_64BIT)
4372     return (ix86_function_type_abi (type) == SYSV_ABI
4373             ? X86_64_REGPARM_MAX : X64_REGPARM_MAX);
4374 
4375   regparm = ix86_regparm;
4376   attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
4377   if (attr)
4378     {
4379       regparm
4380         = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
4381 
4382       if (decl && TREE_CODE (decl) == FUNCTION_DECL)
4383         {
4384           /* We can't use regparm(3) for nested functions because
4385              these pass static chain pointer in %ecx register.  */
4386           if (!error_issued && regparm == 3
4387               && decl_function_context (decl)
4388               && !DECL_NO_STATIC_CHAIN (decl))
4389             {
4390               error ("nested functions are limited to 2 register parameters");
4391               error_issued = true;
4392               return 0;
4393             }
4394         }
4395 
4396       return regparm;
4397     }
4398 
4399   if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
4400     return 2;
4401 
4402   /* Use register calling convention for local functions when possible.  */
4403   if (decl
4404       && TREE_CODE (decl) == FUNCTION_DECL
4405       && optimize
4406       && (TARGET_64BIT || !flag_strict_calling_conventions)
4407       && !profile_flag)
4408     {
4409       /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified.  */
4410       struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
4411       if (i && i->local)
4412         {
4413           int local_regparm, globals = 0, regno;
4414           struct function *f;
4415 
4416           /* Make sure no regparm register is taken by a
4417              fixed register variable.  */
4418           for (local_regparm = 0; local_regparm < REGPARM_MAX; local_regparm++)
4419             if (fixed_regs[local_regparm])
4420               break;
4421 
4422           /* We can't use regparm(3) for nested functions as these use
4423              static chain pointer in third argument.  */
4424           if (local_regparm == 3
4425               && decl_function_context (decl)
4426               && !DECL_NO_STATIC_CHAIN (decl))
4427             local_regparm = 2;
4428 
4429           /* If the function realigns its stackpointer, the prologue will
4430              clobber %ecx.  If we've already generated code for the callee,
4431              the callee DECL_STRUCT_FUNCTION is gone, so we fall back to
4432              scanning the attributes for the self-realigning property.  */
4433           f = DECL_STRUCT_FUNCTION (decl);
4434           /* Since current internal arg pointer won't conflict with
4435              parameter passing regs, so no need to change stack
4436              realignment and adjust regparm number.
4437 
4438              Each fixed register usage increases register pressure,
4439              so less registers should be used for argument passing.
4440              This functionality can be overriden by an explicit
4441              regparm value.  */
4442           for (regno = 0; regno <= DI_REG; regno++)
4443             if (fixed_regs[regno])
4444               globals++;
4445 
4446           local_regparm
4447             = globals < local_regparm ? local_regparm - globals : 0;
4448 
4449           if (local_regparm > regparm)
4450             regparm = local_regparm;
4451         }
4452     }
4453 
4454   return regparm;
4455 }
4456 
4457 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
4458    DFmode (2) arguments in SSE registers for a function with the
4459    indicated TYPE and DECL.  DECL may be NULL when calling function
4460    indirectly or considering a libcall.  Otherwise return 0.  */
4461 
4462 static int
4463 ix86_function_sseregparm (const_tree type, const_tree decl, bool warn)
4464 {
4465   gcc_assert (!TARGET_64BIT);
4466 
4467   /* Use SSE registers to pass SFmode and DFmode arguments if requested
4468      by the sseregparm attribute.  */
4469   if (TARGET_SSEREGPARM
4470       || (type && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
4471     {
4472       if (!TARGET_SSE)
4473         {
4474           if (warn)
4475             {
4476               if (decl)
4477                 error ("Calling %qD with attribute sseregparm without "
4478                        "SSE/SSE2 enabled", decl);
4479               else
4480                 error ("Calling %qT with attribute sseregparm without "
4481                        "SSE/SSE2 enabled", type);
4482             }
4483           return 0;
4484         }
4485 
4486       return 2;
4487     }
4488 
4489   /* For local functions, pass up to SSE_REGPARM_MAX SFmode
4490      (and DFmode for SSE2) arguments in SSE registers.  */
4491   if (decl && TARGET_SSE_MATH && optimize && !profile_flag &&
4492     (TARGET_64BIT || !flag_strict_calling_conventions))
4493     {
4494       /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified.  */
4495       struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
4496       if (i && i->local)
4497         return TARGET_SSE2 ? 2 : 1;
4498     }
4499 
4500   return 0;
4501 }
4502 
4503 /* Return true if EAX is live at the start of the function.  Used by
4504    ix86_expand_prologue to determine if we need special help before
4505    calling allocate_stack_worker.  */
4506 
4507 static bool
4508 ix86_eax_live_at_start_p (void)
4509 {
4510   /* Cheat.  Don't bother working forward from ix86_function_regparm
4511      to the function type to whether an actual argument is located in
4512      eax.  Instead just look at cfg info, which is still close enough
4513      to correct at this point.  This gives false positives for broken
4514      functions that might use uninitialized data that happens to be
4515      allocated in eax, but who cares?  */
4516   return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR), 0);
4517 }
4518 
4519 /* Value is the number of bytes of arguments automatically
4520    popped when returning from a subroutine call.
4521    FUNDECL is the declaration node of the function (as a tree),
4522    FUNTYPE is the data type of the function (as a tree),
4523    or for a library call it is an identifier node for the subroutine name.
4524    SIZE is the number of bytes of arguments passed on the stack.
4525 
4526    On the 80386, the RTD insn may be used to pop them if the number
4527      of args is fixed, but if the number is variable then the caller
4528      must pop them all.  RTD can't be used for library calls now
4529      because the library is compiled with the Unix compiler.
4530    Use of RTD is a selectable option, since it is incompatible with
4531    standard Unix calling sequences.  If the option is not selected,
4532    the caller must always pop the args.
4533 
4534    The attribute stdcall is equivalent to RTD on a per module basis.  */
4535 
4536 int
4537 ix86_return_pops_args (tree fundecl, tree funtype, int size)
4538 {
4539   int rtd;
4540 
4541   /* None of the 64-bit ABIs pop arguments.  */
4542   if (TARGET_64BIT)
4543     return 0;
4544 
4545   rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
4546 
4547   /* Cdecl functions override -mrtd, and never pop the stack.  */
4548   if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype)))
4549     {
4550       /* Stdcall and fastcall functions will pop the stack if not
4551          variable args.  */
4552       if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype))
4553           || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype)))
4554         rtd = 1;
4555 
4556       if (rtd && ! stdarg_p (funtype))
4557         return size;
4558     }
4559 
4560   /* Lose any fake structure return argument if it is passed on the stack.  */
4561   if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
4562       && !KEEP_AGGREGATE_RETURN_POINTER)
4563     {
4564       int nregs = ix86_function_regparm (funtype, fundecl);
4565       if (nregs == 0)
4566         return GET_MODE_SIZE (Pmode);
4567     }
4568 
4569   return 0;
4570 }
4571 
4572 /* Argument support functions.  */
4573 
4574 /* Return true when register may be used to pass function parameters.  */
4575 bool
4576 ix86_function_arg_regno_p (int regno)
4577 {
4578   int i;
4579   const int *parm_regs;
4580 
4581   if (!TARGET_64BIT)
4582     {
4583       if (TARGET_MACHO)
4584         return (regno < REGPARM_MAX
4585                 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
4586       else
4587         return (regno < REGPARM_MAX
4588                 || (TARGET_MMX && MMX_REGNO_P (regno)
4589                     && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
4590                 || (TARGET_SSE && SSE_REGNO_P (regno)
4591                     && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
4592     }
4593 
4594   if (TARGET_MACHO)
4595     {
4596       if (SSE_REGNO_P (regno) && TARGET_SSE)
4597         return true;
4598     }
4599   else
4600     {
4601       if (TARGET_SSE && SSE_REGNO_P (regno)
4602           && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
4603         return true;
4604     }
4605 
4606   /* TODO: The function should depend on current function ABI but
4607      builtins.c would need updating then. Therefore we use the
4608      default ABI.  */
4609 
4610   /* RAX is used as hidden argument to va_arg functions.  */
4611   if (DEFAULT_ABI == SYSV_ABI && regno == AX_REG)
4612     return true;
4613 
4614   if (DEFAULT_ABI == MS_ABI)
4615     parm_regs = x86_64_ms_abi_int_parameter_registers;
4616   else
4617     parm_regs = x86_64_int_parameter_registers;
4618   for (i = 0; i < (DEFAULT_ABI == MS_ABI ? X64_REGPARM_MAX
4619                                          : X86_64_REGPARM_MAX); i++)
4620     if (regno == parm_regs[i])
4621       return true;
4622   return false;
4623 }
4624 
4625 /* Return if we do not know how to pass TYPE solely in registers.  */
4626 
4627 static bool
4628 ix86_must_pass_in_stack (enum machine_mode mode, const_tree type)
4629 {
4630   if (must_pass_in_stack_var_size_or_pad (mode, type))
4631     return true;
4632 
4633   /* For 32-bit, we want TImode aggregates to go on the stack.  But watch out!
4634      The layout_type routine is crafty and tries to trick us into passing
4635      currently unsupported vector types on the stack by using TImode.  */
4636   return (!TARGET_64BIT && mode == TImode
4637           && type && TREE_CODE (type) != VECTOR_TYPE);
4638 }
4639 
4640 /* It returns the size, in bytes, of the area reserved for arguments passed
4641    in registers for the function represented by fndecl dependent to the used
4642    abi format.  */
4643 int
4644 ix86_reg_parm_stack_space (const_tree fndecl)
4645 {
4646   int call_abi = SYSV_ABI;
4647   if (fndecl != NULL_TREE && TREE_CODE (fndecl) == FUNCTION_DECL)
4648     call_abi = ix86_function_abi (fndecl);
4649   else
4650     call_abi = ix86_function_type_abi (fndecl);
4651   if (call_abi == MS_ABI)
4652     return 32;
4653   return 0;
4654 }
4655 
4656 /* Returns value SYSV_ABI, MS_ABI dependent on fntype, specifying the
4657    call abi used.  */
4658 int
4659 ix86_function_type_abi (const_tree fntype)
4660 {
4661   if (TARGET_64BIT && fntype != NULL)
4662     {
4663       int abi;
4664       if (DEFAULT_ABI == SYSV_ABI)
4665         abi = lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (fntype)) ? MS_ABI : SYSV_ABI;
4666       else
4667         abi = lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (fntype)) ? SYSV_ABI : MS_ABI;
4668 
4669       return abi;
4670     }
4671   return DEFAULT_ABI;
4672 }
4673 
4674 int
4675 ix86_function_abi (const_tree fndecl)
4676 {
4677   if (! fndecl)
4678     return DEFAULT_ABI;
4679   return ix86_function_type_abi (TREE_TYPE (fndecl));
4680 }
4681 
4682 /* Returns value SYSV_ABI, MS_ABI dependent on cfun, specifying the
4683    call abi used.  */
4684 int
4685 ix86_cfun_abi (void)
4686 {
4687   if (! cfun || ! TARGET_64BIT)
4688     return DEFAULT_ABI;
4689   return cfun->machine->call_abi;
4690 }
4691 
4692 /* regclass.c  */
4693 extern void init_regs (void);
4694 
4695 /* Implementation of call abi switching target hook. Specific to FNDECL
4696    the specific call register sets are set. See also CONDITIONAL_REGISTER_USAGE
4697    for more details.  */
4698 void
4699 ix86_call_abi_override (const_tree fndecl)
4700 {
4701   if (fndecl == NULL_TREE)
4702     cfun->machine->call_abi = DEFAULT_ABI;
4703   else
4704     cfun->machine->call_abi = ix86_function_type_abi (TREE_TYPE (fndecl));
4705 }
4706 
4707 /* MS and SYSV ABI have different set of call used registers.  Avoid expensive
4708    re-initialization of init_regs each time we switch function context since
4709    this is needed only during RTL expansion.  */
4710 static void
4711 ix86_maybe_switch_abi (void)
4712 {
4713   if (TARGET_64BIT &&
4714       call_used_regs[SI_REG] == (cfun->machine->call_abi == MS_ABI))
4715     reinit_regs ();
4716 }
4717 
4718 /* Initialize a variable CUM of type CUMULATIVE_ARGS
4719    for a call to a function whose data type is FNTYPE.
4720    For a library call, FNTYPE is 0.  */
4721 
4722 void
4723 init_cumulative_args (CUMULATIVE_ARGS *cum,  /* Argument info to initialize */
4724                       tree fntype,      /* tree ptr for function decl */
4725                       rtx libname,      /* SYMBOL_REF of library name or 0 */
4726                       tree fndecl)
4727 {
4728   struct cgraph_local_info *i = fndecl ? cgraph_local_info (fndecl) : NULL;
4729   memset (cum, 0, sizeof (*cum));
4730 
4731   if (fndecl)
4732    cum->call_abi = ix86_function_abi (fndecl);
4733   else
4734    cum->call_abi = ix86_function_type_abi (fntype);
4735   /* Set up the number of registers to use for passing arguments.  */
4736 
4737   if (cum->call_abi == MS_ABI && !ACCUMULATE_OUTGOING_ARGS)
4738     sorry ("ms_abi attribute requires -maccumulate-outgoing-args "
4739            "or subtarget optimization implying it");
4740   cum->nregs = ix86_regparm;
4741   if (TARGET_64BIT)
4742     {
4743       if (cum->call_abi != DEFAULT_ABI)
4744         cum->nregs = DEFAULT_ABI != SYSV_ABI ? X86_64_REGPARM_MAX
4745                                              : X64_REGPARM_MAX;
4746     }
4747   if (TARGET_SSE)
4748     {
4749       cum->sse_nregs = SSE_REGPARM_MAX;
4750       if (TARGET_64BIT)
4751         {
4752           if (cum->call_abi != DEFAULT_ABI)
4753             cum->sse_nregs = DEFAULT_ABI != SYSV_ABI ? X86_64_SSE_REGPARM_MAX
4754                                                      : X64_SSE_REGPARM_MAX;
4755         }
4756     }
4757   if (TARGET_MMX)
4758     cum->mmx_nregs = MMX_REGPARM_MAX;
4759   cum->warn_avx = true;
4760   cum->warn_sse = true;
4761   cum->warn_mmx = true;
4762 
4763   /* Because type might mismatch in between caller and callee, we need to
4764      use actual type of function for local calls.
4765      FIXME: cgraph_analyze can be told to actually record if function uses
4766      va_start so for local functions maybe_vaarg can be made aggressive
4767      helping K&R code.
4768      FIXME: once typesytem is fixed, we won't need this code anymore.  */
4769   if (i && i->local)
4770     fntype = TREE_TYPE (fndecl);
4771   cum->maybe_vaarg = (fntype
4772                       ? (!prototype_p (fntype) || stdarg_p (fntype))
4773                       : !libname);
4774 
4775   if (!TARGET_64BIT)
4776     {
4777       /* If there are variable arguments, then we won't pass anything
4778          in registers in 32-bit mode. */
4779       if (stdarg_p (fntype))
4780         {
4781           cum->nregs = 0;
4782           cum->sse_nregs = 0;
4783           cum->mmx_nregs = 0;
4784           cum->warn_avx = 0;
4785           cum->warn_sse = 0;
4786           cum->warn_mmx = 0;
4787           return;
4788         }
4789 
4790       /* Use ecx and edx registers if function has fastcall attribute,
4791          else look for regparm information.  */
4792       if (fntype)
4793         {
4794           if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
4795             {
4796               cum->nregs = 2;
4797               cum->fastcall = 1;
4798             }
4799           else
4800             cum->nregs = ix86_function_regparm (fntype, fndecl);
4801         }
4802 
4803       /* Set up the number of SSE registers used for passing SFmode
4804          and DFmode arguments.  Warn for mismatching ABI.  */
4805       cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl, true);
4806     }
4807 }
4808 
4809 /* Return the "natural" mode for TYPE.  In most cases, this is just TYPE_MODE.
4810    But in the case of vector types, it is some vector mode.
4811 
4812    When we have only some of our vector isa extensions enabled, then there
4813    are some modes for which vector_mode_supported_p is false.  For these
4814    modes, the generic vector support in gcc will choose some non-vector mode
4815    in order to implement the type.  By computing the natural mode, we'll
4816    select the proper ABI location for the operand and not depend on whatever
4817    the middle-end decides to do with these vector types.
4818 
4819    The midde-end can't deal with the vector types > 16 bytes.  In this
4820    case, we return the original mode and warn ABI change if CUM isn't
4821    NULL.  */
4822 
4823 static enum machine_mode
4824 type_natural_mode (const_tree type, CUMULATIVE_ARGS *cum)
4825 {
4826   enum machine_mode mode = TYPE_MODE (type);
4827 
4828   if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
4829     {
4830       HOST_WIDE_INT size = int_size_in_bytes (type);
4831       if ((size == 8 || size == 16 || size == 32)
4832           /* ??? Generic code allows us to create width 1 vectors.  Ignore.  */
4833           && TYPE_VECTOR_SUBPARTS (type) > 1)
4834         {
4835           enum machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
4836 
4837           if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
4838             mode = MIN_MODE_VECTOR_FLOAT;
4839           else
4840             mode = MIN_MODE_VECTOR_INT;
4841 
4842           /* Get the mode which has this inner mode and number of units.  */
4843           for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
4844             if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
4845                 && GET_MODE_INNER (mode) == innermode)
4846               {
4847                 if (size == 32 && !TARGET_AVX)
4848                   {
4849                     static bool warnedavx;
4850 
4851                     if (cum
4852                         && !warnedavx 
4853                         && cum->warn_avx)
4854                       {
4855                         warnedavx = true;
4856                         warning (0, "AVX vector argument without AVX "
4857                                  "enabled changes the ABI");
4858                       }
4859                     return TYPE_MODE (type);
4860                   }
4861                 else
4862                   return mode;
4863               }
4864 
4865           gcc_unreachable ();
4866         }
4867     }
4868 
4869   return mode;
4870 }
4871 
4872 /* We want to pass a value in REGNO whose "natural" mode is MODE.  However,
4873    this may not agree with the mode that the type system has chosen for the
4874    register, which is ORIG_MODE.  If ORIG_MODE is not BLKmode, then we can
4875    go ahead and use it.  Otherwise we have to build a PARALLEL instead.  */
4876 
4877 static rtx
4878 gen_reg_or_parallel (enum machine_mode mode, enum machine_mode orig_mode,
4879                      unsigned int regno)
4880 {
4881   rtx tmp;
4882 
4883   if (orig_mode != BLKmode)
4884     tmp = gen_rtx_REG (orig_mode, regno);
4885   else
4886     {
4887       tmp = gen_rtx_REG (mode, regno);
4888       tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
4889       tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
4890     }
4891 
4892   return tmp;
4893 }
4894 
4895 /* x86-64 register passing implementation.  See x86-64 ABI for details.  Goal
4896    of this code is to classify each 8bytes of incoming argument by the register
4897    class and assign registers accordingly.  */
4898 
4899 /* Return the union class of CLASS1 and CLASS2.
4900    See the x86-64 PS ABI for details.  */
4901 
4902 static enum x86_64_reg_class
4903 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
4904 {
4905   /* Rule #1: If both classes are equal, this is the resulting class.  */
4906   if (class1 == class2)
4907     return class1;
4908 
4909   /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
4910      the other class.  */
4911   if (class1 == X86_64_NO_CLASS)
4912     return class2;
4913   if (class2 == X86_64_NO_CLASS)
4914     return class1;
4915 
4916   /* Rule #3: If one of the classes is MEMORY, the result is MEMORY.  */
4917   if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
4918     return X86_64_MEMORY_CLASS;
4919 
4920   /* Rule #4: If one of the classes is INTEGER, the result is INTEGER.  */
4921   if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
4922       || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
4923     return X86_64_INTEGERSI_CLASS;
4924   if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
4925       || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
4926     return X86_64_INTEGER_CLASS;
4927 
4928   /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
4929      MEMORY is used.  */
4930   if (class1 == X86_64_X87_CLASS
4931       || class1 == X86_64_X87UP_CLASS
4932       || class1 == X86_64_COMPLEX_X87_CLASS
4933       || class2 == X86_64_X87_CLASS
4934       || class2 == X86_64_X87UP_CLASS
4935       || class2 == X86_64_COMPLEX_X87_CLASS)
4936     return X86_64_MEMORY_CLASS;
4937 
4938   /* Rule #6: Otherwise class SSE is used.  */
4939   return X86_64_SSE_CLASS;
4940 }
4941 
4942 /* Classify the argument of type TYPE and mode MODE.
4943    CLASSES will be filled by the register class used to pass each word
4944    of the operand.  The number of words is returned.  In case the parameter
4945    should be passed in memory, 0 is returned. As a special case for zero
4946    sized containers, classes[0] will be NO_CLASS and 1 is returned.
4947 
4948    BIT_OFFSET is used internally for handling records and specifies offset
4949    of the offset in bits modulo 256 to avoid overflow cases.
4950 
4951    See the x86-64 PS ABI for details.
4952 */
4953 
4954 static int
4955 classify_argument (enum machine_mode mode, const_tree type,
4956                    enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
4957 {
4958   HOST_WIDE_INT bytes =
4959     (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
4960   int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
4961 
4962   /* Variable sized entities are always passed/returned in memory.  */
4963   if (bytes < 0)
4964     return 0;
4965 
4966   if (mode != VOIDmode
4967       && targetm.calls.must_pass_in_stack (mode, type))
4968     return 0;
4969 
4970   if (type && AGGREGATE_TYPE_P (type))
4971     {
4972       int i;
4973       tree field;
4974       enum x86_64_reg_class subclasses[MAX_CLASSES];
4975 
4976       /* On x86-64 we pass structures larger than 32 bytes on the stack.  */
4977       if (bytes > 32)
4978         return 0;
4979 
4980       for (i = 0; i < words; i++)
4981         classes[i] = X86_64_NO_CLASS;
4982 
4983       /* Zero sized arrays or structures are NO_CLASS.  We return 0 to
4984          signalize memory class, so handle it as special case.  */
4985       if (!words)
4986         {
4987           classes[0] = X86_64_NO_CLASS;
4988           return 1;
4989         }
4990 
4991       /* Classify each field of record and merge classes.  */
4992       switch (TREE_CODE (type))
4993         {
4994         case RECORD_TYPE:
4995           /* And now merge the fields of structure.  */
4996           for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
4997             {
4998               if (TREE_CODE (field) == FIELD_DECL)
4999                 {
5000                   int num;
5001 
5002                   if (TREE_TYPE (field) == error_mark_node)
5003                     continue;
5004 
5005                   /* Bitfields are always classified as integer.  Handle them
5006                      early, since later code would consider them to be
5007                      misaligned integers.  */
5008                   if (DECL_BIT_FIELD (field))
5009                     {
5010                       for (i = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
5011                            i < ((int_bit_position (field) + (bit_offset % 64))
5012                                 + tree_low_cst (DECL_SIZE (field), 0)
5013                                 + 63) / 8 / 8; i++)
5014                         classes[i] =
5015                           merge_classes (X86_64_INTEGER_CLASS,
5016                                          classes[i]);
5017                     }
5018                   else
5019                     {
5020                       type = TREE_TYPE (field);
5021 
5022                       /* Flexible array member is ignored.  */
5023                       if (TYPE_MODE (type) == BLKmode
5024                           && TREE_CODE (type) == ARRAY_TYPE
5025                           && TYPE_SIZE (type) == NULL_TREE
5026                           && TYPE_DOMAIN (type) != NULL_TREE
5027                           && (TYPE_MAX_VALUE (TYPE_DOMAIN (type))
5028                               == NULL_TREE))
5029                         {
5030                           static bool warned;
5031                           
5032                           if (!warned && warn_psabi)
5033                             {
5034                               warned = true;
5035                               inform (input_location,
5036                                       "The ABI of passing struct with"
5037                                       " a flexible array member has"
5038                                       " changed in GCC 4.4");
5039                             }
5040                           continue;
5041                         }
5042                       num = classify_argument (TYPE_MODE (type), type,
5043                                                subclasses,
5044                                                (int_bit_position (field)
5045                                                 + bit_offset) % 256);
5046                       if (!num)
5047                         return 0;
5048                       for (i = 0; i < num; i++)
5049                         {
5050                           int pos =
5051                             (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
5052                           classes[i + pos] =
5053                             merge_classes (subclasses[i], classes[i + pos]);
5054                         }
5055                     }
5056                 }
5057             }
5058           break;
5059 
5060         case ARRAY_TYPE:
5061           /* Arrays are handled as small records.  */
5062           {
5063             int num;
5064             num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
5065                                      TREE_TYPE (type), subclasses, bit_offset);
5066             if (!num)
5067               return 0;
5068 
5069             /* The partial classes are now full classes.  */
5070             if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
5071               subclasses[0] = X86_64_SSE_CLASS;
5072             if (subclasses[0] == X86_64_INTEGERSI_CLASS
5073                 && !((bit_offset % 64) == 0 && bytes == 4))
5074               subclasses[0] = X86_64_INTEGER_CLASS;
5075 
5076             for (i = 0; i < words; i++)
5077               classes[i] = subclasses[i % num];
5078 
5079             break;
5080           }
5081         case UNION_TYPE:
5082         case QUAL_UNION_TYPE:
5083           /* Unions are similar to RECORD_TYPE but offset is always 0.
5084              */
5085           for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
5086             {
5087               if (TREE_CODE (field) == FIELD_DECL)
5088                 {
5089                   int num;
5090 
5091                   if (TREE_TYPE (field) == error_mark_node)
5092                     continue;
5093 
5094                   num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
5095                                            TREE_TYPE (field), subclasses,
5096                                            bit_offset);
5097                   if (!num)
5098                     return 0;
5099                   for (i = 0; i < num; i++)
5100                     classes[i] = merge_classes (subclasses[i], classes[i]);
5101                 }
5102             }
5103           break;
5104 
5105         default:
5106           gcc_unreachable ();
5107         }
5108 
5109       if (words > 2)
5110         {
5111           /* When size > 16 bytes, if the first one isn't
5112              X86_64_SSE_CLASS or any other ones aren't
5113              X86_64_SSEUP_CLASS, everything should be passed in
5114              memory.  */
5115           if (classes[0] != X86_64_SSE_CLASS)
5116               return 0;
5117 
5118           for (i = 1; i < words; i++)
5119             if (classes[i] != X86_64_SSEUP_CLASS)
5120               return 0;
5121         }
5122 
5123       /* Final merger cleanup.  */
5124       for (i = 0; i < words; i++)
5125         {
5126           /* If one class is MEMORY, everything should be passed in
5127              memory.  */
5128           if (classes[i] == X86_64_MEMORY_CLASS)
5129             return 0;
5130 
5131           /* The X86_64_SSEUP_CLASS should be always preceded by
5132              X86_64_SSE_CLASS or X86_64_SSEUP_CLASS.  */
5133           if (classes[i] == X86_64_SSEUP_CLASS
5134               && classes[i - 1] != X86_64_SSE_CLASS
5135               && classes[i - 1] != X86_64_SSEUP_CLASS)
5136             {
5137               /* The first one should never be X86_64_SSEUP_CLASS.  */
5138               gcc_assert (i != 0);
5139               classes[i] = X86_64_SSE_CLASS;
5140             }
5141 
5142           /*  If X86_64_X87UP_CLASS isn't preceded by X86_64_X87_CLASS,
5143                everything should be passed in memory.  */
5144           if (classes[i] == X86_64_X87UP_CLASS
5145               && (classes[i - 1] != X86_64_X87_CLASS))
5146             {
5147               static bool warned;
5148 
5149               /* The first one should never be X86_64_X87UP_CLASS.  */
5150               gcc_assert (i != 0);
5151               if (!warned && warn_psabi)
5152                 {
5153                   warned = true;
5154                   inform (input_location,
5155                           "The ABI of passing union with long double"
5156                           " has changed in GCC 4.4");
5157                 }
5158               return 0;
5159             }
5160         }
5161       return words;
5162     }
5163 
5164   /* Compute alignment needed.  We align all types to natural boundaries with
5165      exception of XFmode that is aligned to 64bits.  */
5166   if (mode != VOIDmode && mode != BLKmode)
5167     {
5168       int mode_alignment = GET_MODE_BITSIZE (mode);
5169 
5170       if (mode == XFmode)
5171         mode_alignment = 128;
5172       else if (mode == XCmode)
5173         mode_alignment = 256;
5174       if (COMPLEX_MODE_P (mode))
5175         mode_alignment /= 2;
5176       /* Misaligned fields are always returned in memory.  */
5177       if (bit_offset % mode_alignment)
5178         return 0;
5179     }
5180 
5181   /* for V1xx modes, just use the base mode */
5182   if (VECTOR_MODE_P (mode) && mode != V1DImode
5183       && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
5184     mode = GET_MODE_INNER (mode);
5185 
5186   /* Classification of atomic types.  */
5187   switch (mode)
5188     {
5189     case SDmode:
5190     case DDmode:
5191       classes[0] = X86_64_SSE_CLASS;
5192       return 1;
5193     case TDmode:
5194       classes[0] = X86_64_SSE_CLASS;
5195       classes[1] = X86_64_SSEUP_CLASS;
5196       return 2;
5197     case DImode:
5198     case SImode:
5199     case HImode:
5200     case QImode:
5201     case CSImode:
5202     case CHImode:
5203     case CQImode:
5204       {
5205         int size = (bit_offset % 64)+ (int) GET_MODE_BITSIZE (mode);
5206 
5207         if (size <= 32)
5208           {
5209             classes[0] = X86_64_INTEGERSI_CLASS;
5210             return 1;
5211           }
5212         else if (size <= 64)
5213           {
5214             classes[0] = X86_64_INTEGER_CLASS;
5215             return 1;
5216           }
5217         else if (size <= 64+32)
5218           {
5219             classes[0] = X86_64_INTEGER_CLASS;
5220             classes[1] = X86_64_INTEGERSI_CLASS;
5221             return 2;
5222           }
5223         else if (size <= 64+64)
5224           {
5225             classes[0] = classes[1] = X86_64_INTEGER_CLASS;
5226             return 2;
5227           }
5228         else
5229           gcc_unreachable ();
5230       }
5231     case CDImode:
5232     case TImode:
5233       classes[0] = classes[1] = X86_64_INTEGER_CLASS;
5234       return 2;
5235     case COImode:
5236     case OImode:
5237       /* OImode shouldn't be used directly.  */
5238       gcc_unreachable ();
5239     case CTImode:
5240       return 0;
5241     case SFmode:
5242       if (!(bit_offset % 64))
5243         classes[0] = X86_64_SSESF_CLASS;
5244       else
5245         classes[0] = X86_64_SSE_CLASS;
5246       return 1;
5247     case DFmode:
5248       classes[0] = X86_64_SSEDF_CLASS;
5249       return 1;
5250     case XFmode:
5251       classes[0] = X86_64_X87_CLASS;
5252       classes[1] = X86_64_X87UP_CLASS;
5253       return 2;
5254     case TFmode:
5255       classes[0] = X86_64_SSE_CLASS;
5256       classes[1] = X86_64_SSEUP_CLASS;
5257       return 2;
5258     case SCmode:
5259       classes[0] = X86_64_SSE_CLASS;
5260       if (!(bit_offset % 64))
5261         return 1;
5262       else
5263         {
5264           static bool warned;
5265 
5266           if (!warned && warn_psabi)
5267             {
5268               warned = true;
5269               inform (input_location,
5270                       "The ABI of passing structure with complex float"
5271                       " member has changed in GCC 4.4");
5272             }
5273           classes[1] = X86_64_SSESF_CLASS;
5274           return 2;
5275         }
5276     case DCmode:
5277       classes[0] = X86_64_SSEDF_CLASS;
5278       classes[1] = X86_64_SSEDF_CLASS;
5279       return 2;
5280     case XCmode:
5281       classes[0] = X86_64_COMPLEX_X87_CLASS;
5282       return 1;
5283     case TCmode:
5284       /* This modes is larger than 16 bytes.  */
5285       return 0;
5286     case V8SFmode:
5287     case V8SImode:
5288     case V32QImode:
5289     case V16HImode:
5290     case V4DFmode:
5291     case V4DImode:
5292       classes[0] = X86_64_SSE_CLASS;
5293       classes[1] = X86_64_SSEUP_CLASS;
5294       classes[2] = X86_64_SSEUP_CLASS;
5295       classes[3] = X86_64_SSEUP_CLASS;
5296       return 4;
5297     case V4SFmode:
5298     case V4SImode:
5299     case V16QImode:
5300     case V8HImode:
5301     case V2DFmode:
5302     case V2DImode:
5303       classes[0] = X86_64_SSE_CLASS;
5304       classes[1] = X86_64_SSEUP_CLASS;
5305       return 2;
5306     case V1DImode:
5307     case V2SFmode:
5308     case V2SImode:
5309     case V4HImode:
5310     case V8QImode:
5311       classes[0] = X86_64_SSE_CLASS;
5312       return 1;
5313     case BLKmode:
5314     case VOIDmode:
5315       return 0;
5316     default:
5317       gcc_assert (VECTOR_MODE_P (mode));
5318 
5319       if (bytes > 16)
5320         return 0;
5321 
5322       gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
5323 
5324       if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
5325         classes[0] = X86_64_INTEGERSI_CLASS;
5326       else
5327         classes[0] = X86_64_INTEGER_CLASS;
5328       classes[1] = X86_64_INTEGER_CLASS;
5329       return 1 + (bytes > 8);
5330     }
5331 }
5332 
5333 /* Examine the argument and return set number of register required in each
5334    class.  Return 0 iff parameter should be passed in memory.  */
5335 static int
5336 examine_argument (enum machine_mode mode, const_tree type, int in_return,
5337                   int *int_nregs, int *sse_nregs)
5338 {
5339   enum x86_64_reg_class regclass[MAX_CLASSES];
5340   int n = classify_argument (mode, type, regclass, 0);
5341 
5342   *int_nregs = 0;
5343   *sse_nregs = 0;
5344   if (!n)
5345     return 0;
5346   for (n--; n >= 0; n--)
5347     switch (regclass[n])
5348       {
5349       case X86_64_INTEGER_CLASS:
5350       case X86_64_INTEGERSI_CLASS:
5351         (*int_nregs)++;
5352         break;
5353       case X86_64_SSE_CLASS:
5354       case X86_64_SSESF_CLASS:
5355       case X86_64_SSEDF_CLASS:
5356         (*sse_nregs)++;
5357         break;
5358       case X86_64_NO_CLASS:
5359       case X86_64_SSEUP_CLASS:
5360         break;
5361       case X86_64_X87_CLASS:
5362       case X86_64_X87UP_CLASS:
5363         if (!in_return)
5364           return 0;
5365         break;
5366       case X86_64_COMPLEX_X87_CLASS:
5367         return in_return ? 2 : 0;
5368       case X86_64_MEMORY_CLASS:
5369         gcc_unreachable ();
5370       }
5371   return 1;
5372 }
5373 
5374 /* Construct container for the argument used by GCC interface.  See
5375    FUNCTION_ARG for the detailed description.  */
5376 
5377 static rtx
5378 construct_container (enum machine_mode mode, enum machine_mode orig_mode,
5379                      const_tree type, int in_return, int nintregs, int nsseregs,
5380                      const int *intreg, int sse_regno)
5381 {
5382   /* The following variables hold the static issued_error state.  */
5383   static bool issued_sse_arg_error;
5384   static bool issued_sse_ret_error;
5385   static bool issued_x87_ret_error;
5386 
5387   enum machine_mode tmpmode;
5388   int bytes =
5389     (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
5390   enum x86_64_reg_class regclass[MAX_CLASSES];
5391   int n;
5392   int i;
5393   int nexps = 0;
5394   int needed_sseregs, needed_intregs;
5395   rtx exp[MAX_CLASSES];
5396   rtx ret;
5397 
5398   n = classify_argument (mode, type, regclass, 0);
5399   if (!n)
5400     return NULL;
5401   if (!examine_argument (mode, type, in_return, &needed_intregs,
5402                          &needed_sseregs))
5403     return NULL;
5404   if (needed_intregs > nintregs || needed_sseregs > nsseregs)
5405     return NULL;
5406 
5407   /* We allowed the user to turn off SSE for kernel mode.  Don't crash if
5408      some less clueful developer tries to use floating-point anyway.  */
5409   if (needed_sseregs && !TARGET_SSE)
5410     {
5411       if (in_return)
5412         {
5413           if (!issued_sse_ret_error)
5414             {
5415               error ("SSE register return with SSE disabled");
5416               issued_sse_ret_error = true;
5417             }
5418         }
5419       else if (!issued_sse_arg_error)
5420         {
5421           error ("SSE register argument with SSE disabled");
5422           issued_sse_arg_error = true;
5423         }
5424       return NULL;
5425     }
5426 
5427   /* Likewise, error if the ABI requires us to return values in the
5428      x87 registers and the user specified -mno-80387.  */
5429   if (!TARGET_80387 && in_return)
5430     for (i = 0; i < n; i++)
5431       if (regclass[i] == X86_64_X87_CLASS
5432           || regclass[i] == X86_64_X87UP_CLASS
5433           || regclass[i] == X86_64_COMPLEX_X87_CLASS)
5434         {
5435           if (!issued_x87_ret_error)
5436             {
5437               error ("x87 register return with x87 disabled");
5438               issued_x87_ret_error = true;
5439             }
5440           return NULL;
5441         }
5442 
5443   /* First construct simple cases.  Avoid SCmode, since we want to use
5444      single register to pass this type.  */
5445   if (n == 1 && mode != SCmode)
5446     switch (regclass[0])
5447       {
5448       case X86_64_INTEGER_CLASS:
5449       case X86_64_INTEGERSI_CLASS:
5450         return gen_rtx_REG (mode, intreg[0]);
5451       case X86_64_SSE_CLASS:
5452       case X86_64_SSESF_CLASS:
5453       case X86_64_SSEDF_CLASS:
5454         if (mode != BLKmode)
5455           return gen_reg_or_parallel (mode, orig_mode, 
5456                                       SSE_REGNO (sse_regno));
5457         break;
5458       case X86_64_X87_CLASS:
5459       case X86_64_COMPLEX_X87_CLASS:
5460         return gen_rtx_REG (mode, FIRST_STACK_REG);
5461       case X86_64_NO_CLASS:
5462         /* Zero sized array, struct or class.  */
5463         return NULL;
5464       default:
5465         gcc_unreachable ();
5466       }
5467   if (n == 2 && regclass[0] == X86_64_SSE_CLASS
5468       && regclass[1] == X86_64_SSEUP_CLASS && mode != BLKmode)
5469     return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
5470   if (n == 4
5471       && regclass[0] == X86_64_SSE_CLASS
5472       && regclass[1] == X86_64_SSEUP_CLASS
5473       && regclass[2] == X86_64_SSEUP_CLASS
5474       && regclass[3] == X86_64_SSEUP_CLASS
5475       && mode != BLKmode)
5476     return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
5477 
5478   if (n == 2
5479       && regclass[0] == X86_64_X87_CLASS && regclass[1] == X86_64_X87UP_CLASS)
5480     return gen_rtx_REG (XFmode, FIRST_STACK_REG);
5481   if (n == 2 && regclass[0] == X86_64_INTEGER_CLASS
5482       && regclass[1] == X86_64_INTEGER_CLASS
5483       && (mode == CDImode || mode == TImode || mode == TFmode)
5484       && intreg[0] + 1 == intreg[1])
5485     return gen_rtx_REG (mode, intreg[0]);
5486 
5487   /* Otherwise figure out the entries of the PARALLEL.  */
5488   for (i = 0; i < n; i++)
5489     {
5490       int pos;
5491 
5492       switch (regclass[i])
5493         {
5494           case X86_64_NO_CLASS:
5495             break;
5496           case X86_64_INTEGER_CLASS:
5497           case X86_64_INTEGERSI_CLASS:
5498             /* Merge TImodes on aligned occasions here too.  */
5499             if (i * 8 + 8 > bytes)
5500               tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
5501             else if (regclass[i] == X86_64_INTEGERSI_CLASS)
5502               tmpmode = SImode;
5503             else
5504               tmpmode = DImode;
5505             /* We've requested 24 bytes we don't have mode for.  Use DImode.  */
5506             if (tmpmode == BLKmode)
5507               tmpmode = DImode;
5508             exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
5509                                                gen_rtx_REG (tmpmode, *intreg),
5510                                                GEN_INT (i*8));
5511             intreg++;
5512             break;
5513           case X86_64_SSESF_CLASS:
5514             exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
5515                                                gen_rtx_REG (SFmode,
5516                                                             SSE_REGNO (sse_regno)),
5517                                                GEN_INT (i*8));
5518             sse_regno++;
5519             break;
5520           case X86_64_SSEDF_CLASS:
5521             exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
5522                                                gen_rtx_REG (DFmode,
5523                                                             SSE_REGNO (sse_regno)),
5524                                                GEN_INT (i*8));
5525             sse_regno++;
5526             break;
5527           case X86_64_SSE_CLASS:
5528             pos = i;
5529             switch (n)
5530               {
5531               case 1:
5532                 tmpmode = DImode;
5533                 break;
5534               case 2:
5535                 if (i == 0 && regclass[1] == X86_64_SSEUP_CLASS)
5536                   {
5537                     tmpmode = TImode;
5538                     i++;
5539                   }
5540                 else
5541                   tmpmode = DImode;
5542                 break;
5543               case 4:
5544                 gcc_assert (i == 0
5545                             && regclass[1] == X86_64_SSEUP_CLASS
5546                             && regclass[2] == X86_64_SSEUP_CLASS
5547                             && regclass[3] == X86_64_SSEUP_CLASS);
5548                 tmpmode = OImode;
5549                 i += 3;
5550                 break;
5551               default:
5552                 gcc_unreachable ();
5553               }
5554             exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
5555                                                gen_rtx_REG (tmpmode,
5556                                                             SSE_REGNO (sse_regno)),
5557                                                GEN_INT (pos*8));
5558             sse_regno++;
5559             break;
5560           default:
5561             gcc_unreachable ();
5562         }
5563     }
5564 
5565   /* Empty aligned struct, union or class.  */
5566   if (nexps == 0)
5567     return NULL;
5568 
5569   ret =  gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
5570   for (i = 0; i < nexps; i++)
5571     XVECEXP (ret, 0, i) = exp [i];
5572   return ret;
5573 }
5574 
5575 /* Update the data in CUM to advance over an argument of mode MODE
5576    and data type TYPE.  (TYPE is null for libcalls where that information
5577    may not be available.)  */
5578 
5579 static void
5580 function_arg_advance_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
5581                          tree type, HOST_WIDE_INT bytes, HOST_WIDE_INT words)
5582 {
5583   switch (mode)
5584     {
5585     default:
5586       break;
5587 
5588     case BLKmode:
5589       if (bytes < 0)
5590         break;
5591       /* FALLTHRU */
5592 
5593     case DImode:
5594     case SImode:
5595     case HImode:
5596     case QImode:
5597       cum->words += words;
5598       cum->nregs -= words;
5599       cum->regno += words;
5600 
5601       if (cum->nregs <= 0)
5602         {
5603           cum->nregs = 0;
5604           cum->regno = 0;
5605         }
5606       break;
5607 
5608     case OImode:
5609       /* OImode shouldn't be used directly.  */
5610       gcc_unreachable ();
5611 
5612     case DFmode:
5613       if (cum->float_in_sse < 2)
5614         break;
5615     case SFmode:
5616       if (cum->float_in_sse < 1)
5617         break;
5618       /* FALLTHRU */
5619 
5620     case V8SFmode:
5621     case V8SImode:
5622     case V32QImode:
5623     case V16HImode:
5624     case V4DFmode:
5625     case V4DImode:
5626     case TImode:
5627     case V16QImode:
5628     case V8HImode:
5629     case V4SImode:
5630     case V2DImode:
5631     case V4SFmode:
5632     case V2DFmode:
5633       if (!type || !AGGREGATE_TYPE_P (type))
5634         {
5635           cum->sse_words += words;
5636           cum->sse_nregs -= 1;
5637           cum->sse_regno += 1;
5638           if (cum->sse_nregs <= 0)
5639             {
5640               cum->sse_nregs = 0;
5641               cum->sse_regno = 0;
5642             }
5643         }
5644       break;
5645 
5646     case V8QImode:
5647     case V4HImode:
5648     case V2SImode:
5649     case V2SFmode:
5650     case V1DImode:
5651       if (!type || !AGGREGATE_TYPE_P (type))
5652         {
5653           cum->mmx_words += words;
5654           cum->mmx_nregs -= 1;
5655           cum->mmx_regno += 1;
5656           if (cum->mmx_nregs <= 0)
5657             {
5658               cum->mmx_nregs = 0;
5659               cum->mmx_regno = 0;
5660             }
5661         }
5662       break;
5663     }
5664 }
5665 
5666 static void
5667 function_arg_advance_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
5668                          tree type, HOST_WIDE_INT words, int named)
5669 {
5670   int int_nregs, sse_nregs;
5671 
5672   /* Unnamed 256bit vector mode parameters are passed on stack.  */
5673   if (!named && VALID_AVX256_REG_MODE (mode))
5674     return;
5675 
5676   if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
5677     cum->words += words;
5678   else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
5679     {
5680       cum->nregs -= int_nregs;
5681       cum->sse_nregs -= sse_nregs;
5682       cum->regno += int_nregs;
5683       cum->sse_regno += sse_nregs;
5684     }
5685   else
5686     cum->words += words;
5687 }
5688 
5689 static void
5690 function_arg_advance_ms_64 (CUMULATIVE_ARGS *cum, HOST_WIDE_INT bytes,
5691                             HOST_WIDE_INT words)
5692 {
5693   /* Otherwise, this should be passed indirect.  */
5694   gcc_assert (bytes == 1 || bytes == 2 || bytes == 4 || bytes == 8);
5695 
5696   cum->words += words;
5697   if (cum->nregs > 0)
5698     {
5699       cum->nregs -= 1;
5700       cum->regno += 1;
5701     }
5702 }
5703 
5704 void
5705 function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
5706                       tree type, int named)
5707 {
5708   HOST_WIDE_INT bytes, words;
5709 
5710   if (mode == BLKmode)
5711     bytes = int_size_in_bytes (type);
5712   else
5713     bytes = GET_MODE_SIZE (mode);
5714   words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
5715 
5716   if (type)
5717     mode = type_natural_mode (type, NULL);
5718 
5719   if (TARGET_64BIT && (cum ? cum->call_abi : DEFAULT_ABI) == MS_ABI)
5720     function_arg_advance_ms_64 (cum, bytes, words);
5721   else if (TARGET_64BIT)
5722     function_arg_advance_64 (cum, mode, type, words, named);
5723   else
5724     function_arg_advance_32 (cum, mode, type, bytes, words);
5725 }
5726 
5727 /* Define where to put the arguments to a function.
5728    Value is zero to push the argument on the stack,
5729    or a hard register in which to store the argument.
5730 
5731    MODE is the argument's machine mode.
5732    TYPE is the data type of the argument (as a tree).
5733     This is null for libcalls where that information may
5734     not be available.
5735    CUM is a variable of type CUMULATIVE_ARGS which gives info about
5736     the preceding args and about the function being called.
5737    NAMED is nonzero if this argument is a named parameter
5738     (otherwise it is an extra parameter matching an ellipsis).  */
5739 
5740 static rtx
5741 function_arg_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
5742                  enum machine_mode orig_mode, tree type,
5743                  HOST_WIDE_INT bytes, HOST_WIDE_INT words)
5744 {
5745   static bool warnedsse, warnedmmx;
5746 
5747   /* Avoid the AL settings for the Unix64 ABI.  */
5748   if (mode == VOIDmode)
5749     return constm1_rtx;
5750 
5751   switch (mode)
5752     {
5753     default:
5754       break;
5755 
5756     case BLKmode:
5757       if (bytes < 0)
5758         break;
5759       /* FALLTHRU */
5760     case DImode:
5761     case SImode:
5762     case HImode:
5763     case QImode:
5764       if (words <= cum->nregs)
5765         {
5766           int regno = cum->regno;
5767 
5768           /* Fastcall allocates the first two DWORD (SImode) or
5769             smaller arguments to ECX and EDX if it isn't an
5770             aggregate type .  */
5771           if (cum->fastcall)
5772             {
5773               if (mode == BLKmode
5774                   || mode == DImode
5775                   || (type && AGGREGATE_TYPE_P (type)))
5776                 break;
5777 
5778               /* ECX not EAX is the first allocated register.  */
5779               if (regno == AX_REG)
5780                 regno = CX_REG;
5781             }
5782           return gen_rtx_REG (mode, regno);
5783         }
5784       break;
5785 
5786     case DFmode:
5787       if (cum->float_in_sse < 2)
5788         break;
5789     case SFmode:
5790       if (cum->float_in_sse < 1)
5791         break;
5792       /* FALLTHRU */
5793     case TImode:
5794       /* In 32bit, we pass TImode in xmm registers.  */
5795     case V16QImode:
5796     case V8HImode:
5797     case V4SImode:
5798     case V2DImode:
5799     case V4SFmode:
5800     case V2DFmode:
5801       if (!type || !AGGREGATE_TYPE_P (type))
5802         {
5803           if (!TARGET_SSE && !warnedsse && cum->warn_sse)
5804             {
5805               warnedsse = true;
5806               warning (0, "SSE vector argument without SSE enabled "
5807                        "changes the ABI");
5808             }
5809           if (cum->sse_nregs)
5810             return gen_reg_or_parallel (mode, orig_mode,
5811                                         cum->sse_regno + FIRST_SSE_REG);
5812         }
5813       break;
5814 
5815     case OImode:
5816       /* OImode shouldn't be used directly.  */
5817       gcc_unreachable ();
5818 
5819     case V8SFmode:
5820     case V8SImode:
5821     case V32QImode:
5822     case V16HImode:
5823     case V4DFmode:
5824     case V4DImode:
5825       if (!type || !AGGREGATE_TYPE_P (type))
5826         {
5827           if (cum->sse_nregs)
5828             return gen_reg_or_parallel (mode, orig_mode,
5829                                         cum->sse_regno + FIRST_SSE_REG);
5830         }
5831       break;
5832 
5833     case V8QImode:
5834     case V4HImode:
5835     case V2SImode:
5836     case V2SFmode:
5837     case V1DImode:
5838       if (!type || !AGGREGATE_TYPE_P (type))
5839         {
5840           if (!TARGET_MMX && !warnedmmx && cum->warn_mmx)
5841             {
5842               warnedmmx = true;
5843               warning (0, "MMX vector argument without MMX enabled "
5844                        "changes the ABI");
5845             }
5846           if (cum->mmx_nregs)
5847             return gen_reg_or_parallel (mode, orig_mode,
5848                                         cum->mmx_regno + FIRST_MMX_REG);
5849         }
5850       break;
5851     }
5852 
5853   return NULL_RTX;
5854 }
5855 
5856 static rtx
5857 function_arg_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
5858                  enum machine_mode orig_mode, tree type, int named)
5859 {
5860   /* Handle a hidden AL argument containing number of registers
5861      for varargs x86-64 functions.  */
5862   if (mode == VOIDmode)
5863     return GEN_INT (cum->maybe_vaarg
5864                     ? (cum->sse_nregs < 0
5865                        ? (cum->call_abi == DEFAULT_ABI
5866                           ? SSE_REGPARM_MAX
5867                           : (DEFAULT_ABI != SYSV_ABI ? X86_64_SSE_REGPARM_MAX
5868                                                      : X64_SSE_REGPARM_MAX))
5869                : cum->sse_regno)
5870                     : -1);
5871 
5872   switch (mode)
5873     {
5874     default:
5875       break;
5876 
5877     case V8SFmode:
5878     case V8SImode:
5879     case V32QImode:
5880     case V16HImode:
5881     case V4DFmode:
5882     case V4DImode:
5883       /* Unnamed 256bit vector mode parameters are passed on stack.  */
5884       if (!named)
5885         return NULL;
5886       break;
5887     }
5888 
5889   return construct_container (mode, orig_mode, type, 0, cum->nregs,
5890                               cum->sse_nregs,
5891                               &x86_64_int_parameter_registers [cum->regno],
5892                               cum->sse_regno);
5893 }
5894 
5895 static rtx
5896 function_arg_ms_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
5897                     enum machine_mode orig_mode, int named,
5898                     HOST_WIDE_INT bytes)
5899 {
5900   unsigned int regno;
5901 
5902   /* We need to add clobber for MS_ABI->SYSV ABI calls in expand_call.
5903      We use value of -2 to specify that current function call is MSABI.  */
5904   if (mode == VOIDmode)
5905     return GEN_INT (-2);
5906 
5907   /* If we've run out of registers, it goes on the stack.  */
5908   if (cum->nregs == 0)
5909     return NULL_RTX;
5910 
5911   regno = x86_64_ms_abi_int_parameter_registers[cum->regno];
5912 
5913   /* Only floating point modes are passed in anything but integer regs.  */
5914   if (TARGET_SSE && (mode == SFmode || mode == DFmode))
5915     {
5916       if (named)
5917         regno = cum->regno + FIRST_SSE_REG;
5918       else
5919         {
5920           rtx t1, t2;
5921 
5922           /* Unnamed floating parameters are passed in both the
5923              SSE and integer registers.  */
5924           t1 = gen_rtx_REG (mode, cum->regno + FIRST_SSE_REG);
5925           t2 = gen_rtx_REG (mode, regno);
5926           t1 = gen_rtx_EXPR_LIST (VOIDmode, t1, const0_rtx);
5927           t2 = gen_rtx_EXPR_LIST (VOIDmode, t2, const0_rtx);
5928           return gen_rtx_PARALLEL (mode, gen_rtvec (2, t1, t2));
5929         }
5930     }
5931   /* Handle aggregated types passed in register.  */
5932   if (orig_mode == BLKmode)
5933     {
5934       if (bytes > 0 && bytes <= 8)
5935         mode = (bytes > 4 ? DImode : SImode);
5936       if (mode == BLKmode)
5937         mode = DImode;
5938     }
5939 
5940   return gen_reg_or_parallel (mode, orig_mode, regno);
5941 }
5942 
5943 rtx
5944 function_arg (CUMULATIVE_ARGS *cum, enum machine_mode omode,
5945               tree type, int named)
5946 {
5947   enum machine_mode mode = omode;
5948   HOST_WIDE_INT bytes, words;
5949 
5950   if (mode == BLKmode)
5951     bytes = int_size_in_bytes (type);
5952   else
5953     bytes = GET_MODE_SIZE (mode);
5954   words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
5955 
5956   /* To simplify the code below, represent vector types with a vector mode
5957      even if MMX/SSE are not active.  */
5958   if (type && TREE_CODE (type) == VECTOR_TYPE)
5959     mode = type_natural_mode (type, cum);
5960 
5961   if (TARGET_64BIT && (cum ? cum->call_abi : DEFAULT_ABI) == MS_ABI)
5962     return function_arg_ms_64 (cum, mode, omode, named, bytes);
5963   else if (TARGET_64BIT)
5964     return function_arg_64 (cum, mode, omode, type, named);
5965   else
5966     return function_arg_32 (cum, mode, omode, type, bytes, words);
5967 }
5968 
5969 /* A C expression that indicates when an argument must be passed by
5970    reference.  If nonzero for an argument, a copy of that argument is
5971    made in memory and a pointer to the argument is passed instead of
5972    the argument itself.  The pointer is passed in whatever way is
5973    appropriate for passing a pointer to that type.  */
5974 
5975 static bool
5976 ix86_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
5977                         enum machine_mode mode ATTRIBUTE_UNUSED,
5978                         const_tree type, bool named ATTRIBUTE_UNUSED)
5979 {
5980   /* See Windows x64 Software Convention.  */
5981   if (TARGET_64BIT && (cum ? cum->call_abi : DEFAULT_ABI) == MS_ABI)
5982     {
5983       int msize = (int) GET_MODE_SIZE (mode);
5984       if (type)
5985         {
5986           /* Arrays are passed by reference.  */
5987           if (TREE_CODE (type) == ARRAY_TYPE)
5988             return true;
5989 
5990           if (AGGREGATE_TYPE_P (type))
5991             {
5992               /* Structs/unions of sizes other than 8, 16, 32, or 64 bits
5993                  are passed by reference.  */
5994               msize = int_size_in_bytes (type);
5995             }
5996         }
5997 
5998       /* __m128 is passed by reference.  */
5999       switch (msize) {
6000       case 1: case 2: case 4: case 8:
6001         break;
6002       default:
6003         return true;
6004       }
6005     }
6006   else if (TARGET_64BIT && type && int_size_in_bytes (type) == -1)
6007     return 1;
6008 
6009   return 0;
6010 }
6011 
6012 /* Return true when TYPE should be 128bit aligned for 32bit argument passing
6013    ABI.  */
6014 static bool
6015 contains_aligned_value_p (tree type)
6016 {
6017   enum machine_mode mode = TYPE_MODE (type);
6018   if (((TARGET_SSE && SSE_REG_MODE_P (mode))
6019        || mode == TDmode
6020        || mode == TFmode
6021        || mode == TCmode)
6022       && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
6023     return true;
6024   if (TYPE_ALIGN (type) < 128)
6025     return false;
6026 
6027   if (AGGREGATE_TYPE_P (type))
6028     {
6029       /* Walk the aggregates recursively.  */
6030       switch (TREE_CODE (type))
6031         {
6032         case RECORD_TYPE:
6033         case UNION_TYPE:
6034         case QUAL_UNION_TYPE:
6035           {
6036             tree field;
6037 
6038             /* Walk all the structure fields.  */
6039             for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
6040               {
6041                 if (TREE_CODE (field) == FIELD_DECL
6042                     && contains_aligned_value_p (TREE_TYPE (field)))
6043                   return true;
6044               }
6045             break;
6046           }
6047 
6048         case ARRAY_TYPE:
6049           /* Just for use if some languages passes arrays by value.  */
6050           if (contains_aligned_value_p (TREE_TYPE (type)))
6051             return true;
6052           break;
6053 
6054         default:
6055           gcc_unreachable ();
6056         }
6057     }
6058   return false;
6059 }
6060 
6061 /* Gives the alignment boundary, in bits, of an argument with the
6062    specified mode and type.  */
6063 
6064 int
6065 ix86_function_arg_boundary (enum machine_mode mode, tree type)
6066 {
6067   int align;
6068   if (type)
6069     {
6070       /* Since canonical type is used for call, we convert it to
6071          canonical type if needed.  */
6072       if (!TYPE_STRUCTURAL_EQUALITY_P (type))
6073         type = TYPE_CANONICAL (type);
6074       align = TYPE_ALIGN (type);
6075     }
6076   else
6077     align = GET_MODE_ALIGNMENT (mode);
6078   if (align < PARM_BOUNDARY)
6079     align = PARM_BOUNDARY;
6080   /* In 32bit, only _Decimal128 and __float128 are aligned to their
6081      natural boundaries.  */
6082   if (!TARGET_64BIT && mode != TDmode && mode != TFmode)
6083     {
6084       /* i386 ABI defines all arguments to be 4 byte aligned.  We have to
6085          make an exception for SSE modes since these require 128bit
6086          alignment.
6087 
6088          The handling here differs from field_alignment.  ICC aligns MMX
6089          arguments to 4 byte boundaries, while structure fields are aligned
6090          to 8 byte boundaries.  */
6091       if (!type)
6092         {
6093           if (!(TARGET_SSE && SSE_REG_MODE_P (mode)))
6094             align = PARM_BOUNDARY;
6095         }
6096       else
6097         {
6098           if (!contains_aligned_value_p (type))
6099             align = PARM_BOUNDARY;
6100         }
6101     }
6102   if (align > BIGGEST_ALIGNMENT)
6103     align = BIGGEST_ALIGNMENT;
6104   return align;
6105 }
6106 
6107 /* Return true if N is a possible register number of function value.  */
6108 
6109 bool
6110 ix86_function_value_regno_p (int regno)
6111 {
6112   switch (regno)
6113     {
6114     case 0:
6115       return true;
6116 
6117     case FIRST_FLOAT_REG:
6118       /* TODO: The function should depend on current function ABI but
6119        builtins.c would need updating then. Therefore we use the
6120        default ABI.  */
6121       if (TARGET_64BIT && DEFAULT_ABI == MS_ABI)
6122         return false;
6123       return TARGET_FLOAT_RETURNS_IN_80387;
6124 
6125     case FIRST_SSE_REG:
6126       return TARGET_SSE;
6127 
6128     case FIRST_MMX_REG:
6129       if (TARGET_MACHO || TARGET_64BIT)
6130         return false;
6131       return TARGET_MMX;
6132     }
6133 
6134   return false;
6135 }
6136 
6137 /* Define how to find the value returned by a function.
6138    VALTYPE is the data type of the value (as a tree).
6139    If the precise function being called is known, FUNC is its FUNCTION_DECL;
6140    otherwise, FUNC is 0.  */
6141 
6142 static rtx
6143 function_value_32 (enum machine_mode orig_mode, enum machine_mode mode,
6144                    const_tree fntype, const_tree fn)
6145 {
6146   unsigned int regno;
6147 
6148   /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
6149      we normally prevent this case when mmx is not available.  However
6150      some ABIs may require the result to be returned like DImode.  */
6151   if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
6152     regno = TARGET_MMX ? FIRST_MMX_REG : 0;
6153 
6154   /* 16-byte vector modes in %xmm0.  See ix86_return_in_memory for where
6155      we prevent this case when sse is not available.  However some ABIs
6156      may require the result to be returned like integer TImode.  */
6157   else if (mode == TImode
6158            || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
6159     regno = TARGET_SSE ? FIRST_SSE_REG : 0;
6160 
6161   /* 32-byte vector modes in %ymm0.   */
6162   else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 32)
6163     regno = TARGET_AVX ? FIRST_SSE_REG : 0;
6164 
6165   /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387).  */
6166   else if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387)
6167     regno = FIRST_FLOAT_REG;
6168   else
6169     /* Most things go in %eax.  */
6170     regno = AX_REG;
6171 
6172   /* Override FP return register with %xmm0 for local functions when
6173      SSE math is enabled or for functions with sseregparm attribute.  */
6174   if ((fn || fntype) && (mode == SFmode || mode == DFmode))
6175     {
6176       int sse_level = ix86_function_sseregparm (fntype, fn, false);
6177       if ((sse_level >= 1 && mode == SFmode)
6178           || (sse_level == 2 && mode == DFmode))
6179         regno = FIRST_SSE_REG;
6180     }
6181 
6182   /* OImode shouldn't be used directly.  */
6183   gcc_assert (mode != OImode);
6184 
6185   return gen_rtx_REG (orig_mode, regno);
6186 }
6187 
6188 static rtx
6189 function_value_64 (enum machine_mode orig_mode, enum machine_mode mode,
6190                    const_tree valtype)
6191 {
6192   rtx ret;
6193 
6194   /* Handle libcalls, which don't provide a type node.  */
6195   if (valtype == NULL)
6196     {
6197       switch (mode)
6198         {
6199         case SFmode:
6200         case SCmode:
6201         case DFmode:
6202         case DCmode:
6203         case TFmode:
6204         case SDmode:
6205         case DDmode:
6206         case TDmode:
6207           return gen_rtx_REG (mode, FIRST_SSE_REG);
6208         case XFmode:
6209         case XCmode:
6210           return gen_rtx_REG (mode, FIRST_FLOAT_REG);
6211         case TCmode:
6212           return NULL;
6213         default:
6214           return gen_rtx_REG (mode, AX_REG);
6215         }
6216     }
6217 
6218   ret = construct_container (mode, orig_mode, valtype, 1,
6219                              X86_64_REGPARM_MAX, X86_64_SSE_REGPARM_MAX,
6220                              x86_64_int_return_registers, 0);
6221 
6222   /* For zero sized structures, construct_container returns NULL, but we
6223      need to keep rest of compiler happy by returning meaningful value.  */
6224   if (!ret)
6225     ret = gen_rtx_REG (orig_mode, AX_REG);
6226 
6227   return ret;
6228 }
6229 
6230 static rtx
6231 function_value_ms_64 (enum machine_mode orig_mode, enum machine_mode mode)
6232 {
6233   unsigned int regno = AX_REG;
6234 
6235   if (TARGET_SSE)
6236     {
6237       switch (GET_MODE_SIZE (mode))
6238         {
6239         case 16:
6240           if((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
6241              && !COMPLEX_MODE_P (mode))
6242             regno = FIRST_SSE_REG;
6243           break;
6244         case 8:
6245         case 4:
6246           if (mode == SFmode || mode == DFmode)
6247             regno = FIRST_SSE_REG;
6248           break;
6249         default:
6250           break;
6251         }
6252     }
6253   return gen_rtx_REG (orig_mode, regno);
6254 }
6255 
6256 static rtx
6257 ix86_function_value_1 (const_tree valtype, const_tree fntype_or_decl,
6258                        enum machine_mode orig_mode, enum machine_mode mode)
6259 {
6260   const_tree fn, fntype;
6261 
6262   fn = NULL_TREE;
6263   if (fntype_or_decl && DECL_P (fntype_or_decl))
6264     fn = fntype_or_decl;
6265   fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
6266 
6267   if (TARGET_64BIT && ix86_function_type_abi (fntype) == MS_ABI)
6268     return function_value_ms_64 (orig_mode, mode);
6269   else if (TARGET_64BIT)
6270     return function_value_64 (orig_mode, mode, valtype);
6271   else
6272     return function_value_32 (orig_mode, mode, fntype, fn);
6273 }
6274 
6275 static rtx
6276 ix86_function_value (const_tree valtype, const_tree fntype_or_decl,
6277                      bool outgoing ATTRIBUTE_UNUSED)
6278 {
6279   enum machine_mode mode, orig_mode;
6280 
6281   orig_mode = TYPE_MODE (valtype);
6282   mode = type_natural_mode (valtype, NULL);
6283   return ix86_function_value_1 (valtype, fntype_or_decl, orig_mode, mode);
6284 }
6285 
6286 rtx
6287 ix86_libcall_value (enum machine_mode mode)
6288 {
6289   return ix86_function_value_1 (NULL, NULL, mode, mode);
6290 }
6291 
6292 /* Return true iff type is returned in memory.  */
6293 
6294 static int ATTRIBUTE_UNUSED
6295 return_in_memory_32 (const_tree type, enum machine_mode mode)
6296 {
6297   HOST_WIDE_INT size;
6298 
6299   if (mode == BLKmode)
6300     return 1;
6301 
6302   size = int_size_in_bytes (type);
6303 
6304   if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
6305     return 0;
6306 
6307   if (VECTOR_MODE_P (mode) || mode == TImode)
6308     {
6309       /* User-created vectors small enough to fit in EAX.  */
6310       if (size < 8)
6311         return 0;
6312 
6313       /* MMX/3dNow values are returned in MM0,
6314          except when it doesn't exits.  */
6315       if (size == 8)
6316         return (TARGET_MMX ? 0 : 1);
6317 
6318       /* SSE values are returned in XMM0, except when it doesn't exist.  */
6319       if (size == 16)
6320         return (TARGET_SSE ? 0 : 1);
6321 
6322       /* AVX values are returned in YMM0, except when it doesn't exist.  */
6323       if (size == 32)
6324         return TARGET_AVX ? 0 : 1;
6325     }
6326 
6327   if (mode == XFmode)
6328     return 0;
6329 
6330   if (size > 12)
6331     return 1;
6332 
6333   /* OImode shouldn't be used directly.  */
6334   gcc_assert (mode != OImode);
6335 
6336   return 0;
6337 }
6338 
6339 static int ATTRIBUTE_UNUSED
6340 return_in_memory_64 (const_tree type, enum machine_mode mode)
6341 {
6342   int needed_intregs, needed_sseregs;
6343   return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs);
6344 }
6345 
6346 static int ATTRIBUTE_UNUSED
6347 return_in_memory_ms_64 (const_tree type, enum machine_mode mode)
6348 {
6349   HOST_WIDE_INT size = int_size_in_bytes (type);
6350 
6351   /* __m128 is returned in xmm0.  */
6352   if ((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
6353       && !COMPLEX_MODE_P (mode) && (GET_MODE_SIZE (mode) == 16 || size == 16))
6354     return 0;
6355 
6356   /* Otherwise, the size must be exactly in [1248]. */
6357   return (size != 1 && size != 2 && size != 4 && size != 8);
6358 }
6359 
6360 static bool
6361 ix86_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
6362 {
6363 #ifdef SUBTARGET_RETURN_IN_MEMORY
6364   return SUBTARGET_RETURN_IN_MEMORY (type, fntype);
6365 #else
6366   const enum machine_mode mode = type_natural_mode (type, NULL);
6367  
6368   if (TARGET_64BIT)
6369     {
6370       if (ix86_function_type_abi (fntype) == MS_ABI)
6371         return return_in_memory_ms_64 (type, mode);
6372       else
6373         return return_in_memory_64 (type, mode);
6374     }
6375   else
6376     return return_in_memory_32 (type, mode);
6377 #endif
6378 }
6379 
6380 /* Return false iff TYPE is returned in memory.  This version is used
6381    on Solaris 10.  It is similar to the generic ix86_return_in_memory,
6382    but differs notably in that when MMX is available, 8-byte vectors
6383    are returned in memory, rather than in MMX registers.  */
6384 
6385 bool
6386 ix86_sol10_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
6387 {
6388   int size;
6389   enum machine_mode mode = type_natural_mode (type, NULL);
6390 
6391   if (TARGET_64BIT)
6392     return return_in_memory_64 (type, mode);
6393 
6394   if (mode == BLKmode)
6395     return 1;
6396 
6397   size = int_size_in_bytes (type);
6398 
6399   if (VECTOR_MODE_P (mode))
6400     {
6401       /* Return in memory only if MMX registers *are* available.  This
6402          seems backwards, but it is consistent with the existing
6403          Solaris x86 ABI.  */
6404       if (size == 8)
6405         return TARGET_MMX;
6406       if (size == 16)
6407         return !TARGET_SSE;
6408     }
6409   else if (mode == TImode)
6410     return !TARGET_SSE;
6411   else if (mode == XFmode)
6412     return 0;
6413 
6414   return size > 12;
6415 }
6416 
6417 /* When returning SSE vector types, we have a choice of either
6418      (1) being abi incompatible with a -march switch, or
6419      (2) generating an error.
6420    Given no good solution, I think the safest thing is one warning.
6421    The user won't be able to use -Werror, but....
6422 
6423    Choose the STRUCT_VALUE_RTX hook because that's (at present) only
6424    called in response to actually generating a caller or callee that
6425    uses such a type.  As opposed to TARGET_RETURN_IN_MEMORY, which is called
6426    via aggregate_value_p for general type probing from tree-ssa.  */
6427 
6428 static rtx
6429 ix86_struct_value_rtx (tree type, int incoming ATTRIBUTE_UNUSED)
6430 {
6431   static bool warnedsse, warnedmmx;
6432 
6433   if (!TARGET_64BIT && type)
6434     {
6435       /* Look at the return type of the function, not the function type.  */
6436       enum machine_mode mode = TYPE_MODE (TREE_TYPE (type));
6437 
6438       if (!TARGET_SSE && !warnedsse)
6439         {
6440           if (mode == TImode
6441               || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
6442             {
6443               warnedsse = true;
6444               warning (0, "SSE vector return without SSE enabled "
6445                        "changes the ABI");
6446             }
6447         }
6448 
6449       if (!TARGET_MMX && !warnedmmx)
6450         {
6451           if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
6452             {
6453               warnedmmx = true;
6454               warning (0, "MMX vector return without MMX enabled "
6455                        "changes the ABI");
6456             }
6457         }
6458     }
6459 
6460   return NULL;
6461 }
6462 
6463 
6464 /* Create the va_list data type.  */
6465 
6466 /* Returns the calling convention specific va_list date type.
6467    The argument ABI can be DEFAULT_ABI, MS_ABI, or SYSV_ABI.  */
6468 
6469 static tree
6470 ix86_build_builtin_va_list_abi (enum calling_abi abi)
6471 {
6472   tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
6473 
6474   /* For i386 we use plain pointer to argument area.  */
6475   if (!TARGET_64BIT || abi == MS_ABI)
6476     return build_pointer_type (char_type_node);
6477 
6478   record = (*lang_hooks.types.make_type) (RECORD_TYPE);
6479   type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
6480 
6481   f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
6482                       unsigned_type_node);
6483   f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
6484                       unsigned_type_node);
6485   f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
6486                       ptr_type_node);
6487   f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
6488                       ptr_type_node);
6489 
6490   va_list_gpr_counter_field = f_gpr;
6491   va_list_fpr_counter_field = f_fpr;
6492 
6493   DECL_FIELD_CONTEXT (f_gpr) = record;
6494   DECL_FIELD_CONTEXT (f_fpr) = record;
6495   DECL_FIELD_CONTEXT (f_ovf) = record;
6496   DECL_FIELD_CONTEXT (f_sav) = record;
6497 
6498   TREE_CHAIN (record) = type_decl;
6499   TYPE_NAME (record) = type_decl;
6500   TYPE_FIELDS (record) = f_gpr;
6501   TREE_CHAIN (f_gpr) = f_fpr;
6502   TREE_CHAIN (f_fpr) = f_ovf;
6503   TREE_CHAIN (f_ovf) = f_sav;
6504 
6505   layout_type (record);
6506 
6507   /* The correct type is an array type of one element.  */
6508   return build_array_type (record, build_index_type (size_zero_node));
6509 }
6510 
6511 /* Setup the builtin va_list data type and for 64-bit the additional
6512    calling convention specific va_list data types.  */
6513 
6514 static tree
6515 ix86_build_builtin_va_list (void)
6516 {
6517   tree ret = ix86_build_builtin_va_list_abi (DEFAULT_ABI);
6518 
6519   /* Initialize abi specific va_list builtin types.  */
6520   if (TARGET_64BIT)
6521     {
6522       tree t;
6523       if (DEFAULT_ABI == MS_ABI)
6524         {
6525           t = ix86_build_builtin_va_list_abi (SYSV_ABI);
6526           if (TREE_CODE (t) != RECORD_TYPE)
6527             t = build_variant_type_copy (t);
6528           sysv_va_list_type_node = t;
6529         }
6530       else
6531         {
6532           t = ret;
6533           if (TREE_CODE (t) != RECORD_TYPE)
6534             t = build_variant_type_copy (t);
6535           sysv_va_list_type_node = t;
6536         }
6537       if (DEFAULT_ABI != MS_ABI)
6538         {
6539           t = ix86_build_builtin_va_list_abi (MS_ABI);
6540           if (TREE_CODE (t) != RECORD_TYPE)
6541             t = build_variant_type_copy (t);
6542           ms_va_list_type_node = t;
6543         }
6544       else
6545         {
6546           t = ret;
6547           if (TREE_CODE (t) != RECORD_TYPE)
6548             t = build_variant_type_copy (t);
6549           ms_va_list_type_node = t;
6550         }
6551     }
6552 
6553   return ret;
6554 }
6555 
6556 /* Worker function for TARGET_SETUP_INCOMING_VARARGS.  */
6557 
6558 static void
6559 setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum)
6560 {
6561   rtx save_area, mem;
6562   rtx label;
6563   rtx label_ref;
6564   rtx tmp_reg;
6565   rtx nsse_reg;
6566   alias_set_type set;
6567   int i;
6568   int regparm = ix86_regparm;
6569 
6570   if (cum->call_abi != DEFAULT_ABI)
6571     regparm = DEFAULT_ABI != SYSV_ABI ? X86_64_REGPARM_MAX : X64_REGPARM_MAX;
6572 
6573   /* GPR size of varargs save area.  */
6574   if (cfun->va_list_gpr_size)
6575     ix86_varargs_gpr_size = X86_64_REGPARM_MAX * UNITS_PER_WORD;
6576   else
6577     ix86_varargs_gpr_size = 0;
6578 
6579   /* FPR size of varargs save area.  We don't need it if we don't pass
6580      anything in SSE registers.  */
6581   if (cum->sse_nregs && cfun->va_list_fpr_size)
6582     ix86_varargs_fpr_size = X86_64_SSE_REGPARM_MAX * 16;
6583   else
6584     ix86_varargs_fpr_size = 0;
6585 
6586   if (! ix86_varargs_gpr_size && ! ix86_varargs_fpr_size)
6587     return;
6588 
6589   save_area = frame_pointer_rtx;
6590   set = get_varargs_alias_set ();
6591 
6592   for (i = cum->regno;
6593        i < regparm
6594        && i < cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
6595        i++)
6596     {
6597       mem = gen_rtx_MEM (Pmode,
6598                          plus_constant (save_area, i * UNITS_PER_WORD));
6599       MEM_NOTRAP_P (mem) = 1;
6600       set_mem_alias_set (mem, set);
6601       emit_move_insn (mem, gen_rtx_REG (Pmode,
6602                                         x86_64_int_parameter_registers[i]));
6603     }
6604 
6605   if (ix86_varargs_fpr_size)
6606     {
6607       /* Stack must be aligned to 16byte for FP register save area.  */
6608       if (crtl->stack_alignment_needed < 128)
6609         crtl->stack_alignment_needed = 128;
6610 
6611       /* Now emit code to save SSE registers.  The AX parameter contains number
6612          of SSE parameter registers used to call this function.  We use
6613          sse_prologue_save insn template that produces computed jump across
6614          SSE saves.  We need some preparation work to get this working.  */
6615 
6616       label = gen_label_rtx ();
6617       label_ref = gen_rtx_LABEL_REF (Pmode, label);
6618 
6619       /* Compute address to jump to :
6620          label - eax*4 + nnamed_sse_arguments*4 Or
6621          label - eax*5 + nnamed_sse_arguments*5 for AVX.  */
6622       tmp_reg = gen_reg_rtx (Pmode);
6623       nsse_reg = gen_reg_rtx (Pmode);
6624       emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, AX_REG)));
6625       emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
6626                               gen_rtx_MULT (Pmode, nsse_reg,
6627                                             GEN_INT (4))));
6628 
6629       /* vmovaps is one byte longer than movaps.  */
6630       if (TARGET_AVX)
6631         emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
6632                                 gen_rtx_PLUS (Pmode, tmp_reg,
6633                                               nsse_reg)));
6634 
6635       if (cum->sse_regno)
6636         emit_move_insn
6637           (nsse_reg,
6638            gen_rtx_CONST (DImode,
6639                           gen_rtx_PLUS (DImode,
6640                                         label_ref,
6641                                         GEN_INT (cum->sse_regno
6642                                                  * (TARGET_AVX ? 5 : 4)))));
6643       else
6644         emit_move_insn (nsse_reg, label_ref);
6645       emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
6646 
6647       /* Compute address of memory block we save into.  We always use pointer
6648          pointing 127 bytes after first byte to store - this is needed to keep
6649          instruction size limited by 4 bytes (5 bytes for AVX) with one
6650          byte displacement.  */
6651       tmp_reg = gen_reg_rtx (Pmode);
6652       emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
6653                               plus_constant (save_area,
6654                                              ix86_varargs_gpr_size + 127)));
6655       mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
6656       MEM_NOTRAP_P (mem) = 1;
6657       set_mem_alias_set (mem, set);
6658       set_mem_align (mem, BITS_PER_WORD);
6659 
6660       /* And finally do the dirty job!  */
6661       emit_insn (gen_sse_prologue_save (mem, nsse_reg,
6662                                         GEN_INT (cum->sse_regno), label));
6663     }
6664 }
6665 
6666 static void
6667 setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS *cum)
6668 {
6669   alias_set_type set = get_varargs_alias_set ();
6670   int i;
6671 
6672   for (i = cum->regno; i < X64_REGPARM_MAX; i++)
6673     {
6674       rtx reg, mem;
6675 
6676       mem = gen_rtx_MEM (Pmode,
6677                          plus_constant (virtual_incoming_args_rtx,
6678                                         i * UNITS_PER_WORD));
6679       MEM_NOTRAP_P (mem) = 1;
6680       set_mem_alias_set (mem, set);
6681 
6682       reg = gen_rtx_REG (Pmode, x86_64_ms_abi_int_parameter_registers[i]);
6683       emit_move_insn (mem, reg);
6684     }
6685 }
6686 
6687 static void
6688 ix86_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
6689                              tree type, int *pretend_size ATTRIBUTE_UNUSED,
6690                              int no_rtl)
6691 {
6692   CUMULATIVE_ARGS next_cum;
6693   tree fntype;
6694 
6695   /* This argument doesn't appear to be used anymore.  Which is good,
6696      because the old code here didn't suppress rtl generation.  */
6697   gcc_assert (!no_rtl);
6698 
6699   if (!TARGET_64BIT)
6700     return;
6701 
6702   fntype = TREE_TYPE (current_function_decl);
6703 
6704   /* For varargs, we do not want to skip the dummy va_dcl argument.
6705      For stdargs, we do want to skip the last named argument.  */
6706   next_cum = *cum;
6707   if (stdarg_p (fntype))
6708     function_arg_advance (&next_cum, mode, type, 1);
6709 
6710   if (cum->call_abi == MS_ABI)
6711     setup_incoming_varargs_ms_64 (&next_cum);
6712   else
6713     setup_incoming_varargs_64 (&next_cum);
6714 }
6715 
6716 /* Checks if TYPE is of kind va_list char *.  */
6717 
6718 static bool
6719 is_va_list_char_pointer (tree type)
6720 {
6721   tree canonic;
6722 
6723   /* For 32-bit it is always true.  */
6724   if (!TARGET_64BIT)
6725     return true;
6726   canonic = ix86_canonical_va_list_type (type);
6727   return (canonic == ms_va_list_type_node
6728           || (DEFAULT_ABI == MS_ABI && canonic == va_list_type_node));
6729 }
6730 
6731 /* Implement va_start.  */
6732 
6733 static void
6734 ix86_va_start (tree valist, rtx nextarg)
6735 {
6736   HOST_WIDE_INT words, n_gpr, n_fpr;
6737   tree f_gpr, f_fpr, f_ovf, f_sav;
6738   tree gpr, fpr, ovf, sav, t;
6739   tree type;
6740 
6741   /* Only 64bit target needs something special.  */
6742   if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
6743     {
6744       std_expand_builtin_va_start (valist, nextarg);
6745       return;
6746     }
6747 
6748   f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
6749   f_fpr = TREE_CHAIN (f_gpr);
6750   f_ovf = TREE_CHAIN (f_fpr);
6751   f_sav = TREE_CHAIN (f_ovf);
6752 
6753   valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
6754   gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
6755   fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
6756   ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
6757   sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
6758 
6759   /* Count number of gp and fp argument registers used.  */
6760   words = crtl->args.info.words;
6761   n_gpr = crtl->args.info.regno;
6762   n_fpr = crtl->args.info.sse_regno;
6763 
6764   if (cfun->va_list_gpr_size)
6765     {
6766       type = TREE_TYPE (gpr);
6767       t = build2 (MODIFY_EXPR, type,
6768                   gpr, build_int_cst (type, n_gpr * 8));
6769       TREE_SIDE_EFFECTS (t) = 1;
6770       expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6771     }
6772 
6773   if (TARGET_SSE && cfun->va_list_fpr_size)
6774     {
6775       type = TREE_TYPE (fpr);
6776       t = build2 (MODIFY_EXPR, type, fpr,
6777                   build_int_cst (type, n_fpr * 16 + 8*X86_64_REGPARM_MAX));
6778       TREE_SIDE_EFFECTS (t) = 1;
6779       expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6780     }
6781 
6782   /* Find the overflow area.  */
6783   type = TREE_TYPE (ovf);
6784   t = make_tree (type, crtl->args.internal_arg_pointer);
6785   if (words != 0)
6786     t = build2 (POINTER_PLUS_EXPR, type, t,
6787                 size_int (words * UNITS_PER_WORD));
6788   t = build2 (MODIFY_EXPR, type, ovf, t);
6789   TREE_SIDE_EFFECTS (t) = 1;
6790   expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6791 
6792   if (ix86_varargs_gpr_size || ix86_varargs_fpr_size)
6793     {
6794       /* Find the register save area.
6795          Prologue of the function save it right above stack frame.  */
6796       type = TREE_TYPE (sav);
6797       t = make_tree (type, frame_pointer_rtx);
6798       if (!ix86_varargs_gpr_size)
6799         t = build2 (POINTER_PLUS_EXPR, type, t,
6800                     size_int (-8 * X86_64_REGPARM_MAX));
6801       t = build2 (MODIFY_EXPR, type, sav, t);
6802       TREE_SIDE_EFFECTS (t) = 1;
6803       expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6804     }
6805 }
6806 
6807 /* Implement va_arg.  */
6808 
6809 static tree
6810 ix86_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
6811                       gimple_seq *post_p)
6812 {
6813   static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
6814   tree f_gpr, f_fpr, f_ovf, f_sav;
6815   tree gpr, fpr, ovf, sav, t;
6816   int size, rsize;
6817   tree lab_false, lab_over = NULL_TREE;
6818   tree addr, t2;
6819   rtx container;
6820   int indirect_p = 0;
6821   tree ptrtype;
6822   enum machine_mode nat_mode;
6823   int arg_boundary;
6824 
6825   /* Only 64bit target needs something special.  */
6826   if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
6827     return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
6828 
6829   f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
6830   f_fpr = TREE_CHAIN (f_gpr);
6831   f_ovf = TREE_CHAIN (f_fpr);
6832   f_sav = TREE_CHAIN (f_ovf);
6833 
6834   gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr),
6835                 build_va_arg_indirect_ref (valist), f_gpr, NULL_TREE);
6836   valist = build_va_arg_indirect_ref (valist);
6837   fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
6838   ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
6839   sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
6840 
6841   indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
6842   if (indirect_p)
6843     type = build_pointer_type (type);
6844   size = int_size_in_bytes (type);
6845   rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
6846 
6847   nat_mode = type_natural_mode (type, NULL);
6848   switch (nat_mode)
6849     {
6850     case V8SFmode:
6851     case V8SImode:
6852     case V32QImode:
6853     case V16HImode:
6854     case V4DFmode:
6855     case V4DImode:
6856       /* Unnamed 256bit vector mode parameters are passed on stack.  */
6857       if (ix86_cfun_abi () == SYSV_ABI)
6858         {
6859           container = NULL;
6860           break;
6861         }
6862 
6863     default:
6864       container = construct_container (nat_mode, TYPE_MODE (type),
6865                                        type, 0, X86_64_REGPARM_MAX,
6866                                        X86_64_SSE_REGPARM_MAX, intreg,
6867                                        0);
6868       break;
6869     }
6870 
6871   /* Pull the value out of the saved registers.  */
6872 
6873   addr = create_tmp_var (ptr_type_node, "addr");
6874   DECL_POINTER_ALIAS_SET (addr) = get_varargs_alias_set ();
6875 
6876   if (container)
6877     {
6878       int needed_intregs, needed_sseregs;
6879       bool need_temp;
6880       tree int_addr, sse_addr;
6881 
6882       lab_false = create_artificial_label ();
6883       lab_over = create_artificial_label ();
6884 
6885       examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
6886 
6887       need_temp = (!REG_P (container)
6888                    && ((needed_intregs && TYPE_ALIGN (type) > 64)
6889                        || TYPE_ALIGN (type) > 128));
6890 
6891       /* In case we are passing structure, verify that it is consecutive block
6892          on the register save area.  If not we need to do moves.  */
6893       if (!need_temp && !REG_P (container))
6894         {
6895           /* Verify that all registers are strictly consecutive  */
6896           if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
6897             {
6898               int i;
6899 
6900               for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
6901                 {
6902                   rtx slot = XVECEXP (container, 0, i);
6903                   if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
6904                       || INTVAL (XEXP (slot, 1)) != i * 16)
6905                     need_temp = 1;
6906                 }
6907             }
6908           else
6909             {
6910               int i;
6911 
6912               for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
6913                 {
6914                   rtx slot = XVECEXP (container, 0, i);
6915                   if (REGNO (XEXP (slot, 0)) != (unsigned int) i
6916                       || INTVAL (XEXP (slot, 1)) != i * 8)
6917                     need_temp = 1;
6918                 }
6919             }
6920         }
6921       if (!need_temp)
6922         {
6923           int_addr = addr;
6924           sse_addr = addr;
6925         }
6926       else
6927         {
6928           int_addr = create_tmp_var (ptr_type_node, "int_addr");
6929           DECL_POINTER_ALIAS_SET (int_addr) = get_varargs_alias_set ();
6930           sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
6931           DECL_POINTER_ALIAS_SET (sse_addr) = get_varargs_alias_set ();
6932         }
6933 
6934       /* First ensure that we fit completely in registers.  */
6935       if (needed_intregs)
6936         {
6937           t = build_int_cst (TREE_TYPE (gpr),
6938                              (X86_64_REGPARM_MAX - needed_intregs + 1) * 8);
6939           t = build2 (GE_EXPR, boolean_type_node, gpr, t);
6940           t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
6941           t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
6942           gimplify_and_add (t, pre_p);
6943         }
6944       if (needed_sseregs)
6945         {
6946           t = build_int_cst (TREE_TYPE (fpr),
6947                              (X86_64_SSE_REGPARM_MAX - needed_sseregs + 1) * 16
6948                              + X86_64_REGPARM_MAX * 8);
6949           t = build2 (GE_EXPR, boolean_type_node, fpr, t);
6950           t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
6951           t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
6952           gimplify_and_add (t, pre_p);
6953         }
6954 
6955       /* Compute index to start of area used for integer regs.  */
6956       if (needed_intregs)
6957         {
6958           /* int_addr = gpr + sav; */
6959           t = fold_convert (sizetype, gpr);
6960           t = build2 (POINTER_PLUS_EXPR, ptr_type_node, sav, t);
6961           gimplify_assign (int_addr, t, pre_p);
6962         }
6963       if (needed_sseregs)
6964         {
6965           /* sse_addr = fpr + sav; */
6966           t = fold_convert (sizetype, fpr);
6967           t = build2 (POINTER_PLUS_EXPR, ptr_type_node, sav, t);
6968           gimplify_assign (sse_addr, t, pre_p);
6969         }
6970       if (need_temp)
6971         {
6972           int i;
6973           tree temp = create_tmp_var (type, "va_arg_tmp");
6974 
6975           /* addr = &temp; */
6976           t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
6977           gimplify_assign (addr, t, pre_p);
6978 
6979           for (i = 0; i < XVECLEN (container, 0); i++)
6980             {
6981               rtx slot = XVECEXP (container, 0, i);
6982               rtx reg = XEXP (slot, 0);
6983               enum machine_mode mode = GET_MODE (reg);
6984               tree piece_type = lang_hooks.types.type_for_mode (mode, 1);
6985               tree addr_type = build_pointer_type (piece_type);
6986               tree daddr_type = build_pointer_type_for_mode (piece_type,
6987                                                              ptr_mode, true);
6988               tree src_addr, src;
6989               int src_offset;
6990               tree dest_addr, dest;
6991 
6992               if (SSE_REGNO_P (REGNO (reg)))
6993                 {
6994                   src_addr = sse_addr;
6995                   src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
6996                 }
6997               else
6998                 {
6999                   src_addr = int_addr;
7000                   src_offset = REGNO (reg) * 8;
7001                 }
7002               src_addr = fold_convert (addr_type, src_addr);
7003               src_addr = fold_build2 (POINTER_PLUS_EXPR, addr_type, src_addr,
7004                                       size_int (src_offset));
7005               src = build_va_arg_indirect_ref (src_addr);
7006 
7007               dest_addr = fold_convert (daddr_type, addr);
7008               dest_addr = fold_build2 (POINTER_PLUS_EXPR, daddr_type, dest_addr,
7009                                        size_int (INTVAL (XEXP (slot, 1))));
7010               dest = build_va_arg_indirect_ref (dest_addr);
7011 
7012               gimplify_assign (dest, src, pre_p);
7013             }
7014         }
7015 
7016       if (needed_intregs)
7017         {
7018           t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
7019                       build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
7020           gimplify_assign (gpr, t, pre_p);
7021         }
7022 
7023       if (needed_sseregs)
7024         {
7025           t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
7026                       build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
7027           gimplify_assign (fpr, t, pre_p);
7028         }
7029 
7030       gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over));
7031 
7032       gimple_seq_add_stmt (pre_p, gimple_build_label (lab_false));
7033     }
7034 
7035   /* ... otherwise out of the overflow area.  */
7036 
7037   /* When we align parameter on stack for caller, if the parameter
7038      alignment is beyond MAX_SUPPORTED_STACK_ALIGNMENT, it will be
7039      aligned at MAX_SUPPORTED_STACK_ALIGNMENT.  We will match callee
7040      here with caller.  */
7041   arg_boundary = FUNCTION_ARG_BOUNDARY (VOIDmode, type);
7042   if ((unsigned int) arg_boundary > MAX_SUPPORTED_STACK_ALIGNMENT)
7043     arg_boundary = MAX_SUPPORTED_STACK_ALIGNMENT;
7044 
7045   /* Care for on-stack alignment if needed.  */
7046   if (arg_boundary <= 64
7047       || integer_zerop (TYPE_SIZE (type)))
7048     t = ovf;
7049  else
7050     {
7051       HOST_WIDE_INT align = arg_boundary / 8;
7052       t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (ovf), ovf,
7053                   size_int (align - 1));
7054       t = fold_convert (sizetype, t);
7055       t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
7056                   size_int (-align));
7057       t = fold_convert (TREE_TYPE (ovf), t);
7058     }
7059   gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
7060   gimplify_assign (addr, t, pre_p);
7061 
7062   t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (t), t,
7063               size_int (rsize * UNITS_PER_WORD));
7064   gimplify_assign (unshare_expr (ovf), t, pre_p);
7065 
7066   if (container)
7067     gimple_seq_add_stmt (pre_p, gimple_build_label (lab_over));
7068 
7069   ptrtype = build_pointer_type (type);
7070   addr = fold_convert (ptrtype, addr);
7071 
7072   if (indirect_p)
7073     addr = build_va_arg_indirect_ref (addr);
7074   return build_va_arg_indirect_ref (addr);
7075 }
7076 
7077 /* Return nonzero if OPNUM's MEM should be matched
7078    in movabs* patterns.  */
7079 
7080 int
7081 ix86_check_movabs (rtx insn, int opnum)
7082 {
7083   rtx set, mem;
7084 
7085   set = PATTERN (insn);
7086   if (GET_CODE (set) == PARALLEL)
7087     set = XVECEXP (set, 0, 0);
7088   gcc_assert (GET_CODE (set) == SET);
7089   mem = XEXP (set, opnum);
7090   while (GET_CODE (mem) == SUBREG)
7091     mem = SUBREG_REG (mem);
7092   gcc_assert (MEM_P (mem));
7093   return (volatile_ok || !MEM_VOLATILE_P (mem));
7094 }
7095 
7096 /* Initialize the table of extra 80387 mathematical constants.  */
7097 
7098 static void
7099 init_ext_80387_constants (void)
7100 {
7101   static const char * cst[5] =
7102   {
7103     "0.3010299956639811952256464283594894482",  /* 0: fldlg2  */
7104     "0.6931471805599453094286904741849753009",  /* 1: fldln2  */
7105     "1.4426950408889634073876517827983434472",  /* 2: fldl2e  */
7106     "3.3219280948873623478083405569094566090",  /* 3: fldl2t  */
7107     "3.1415926535897932385128089594061862044",  /* 4: fldpi   */
7108   };
7109   int i;
7110 
7111   for (i = 0; i < 5; i++)
7112     {
7113       real_from_string (&ext_80387_constants_table[i], cst[i]);
7114       /* Ensure each constant is rounded to XFmode precision.  */
7115       real_convert (&ext_80387_constants_table[i],
7116                     XFmode, &ext_80387_constants_table[i]);
7117     }
7118 
7119   ext_80387_constants_init = 1;
7120 }
7121 
7122 /* Return true if the constant is something that can be loaded with
7123    a special instruction.  */
7124 
7125 int
7126 standard_80387_constant_p (rtx x)
7127 {
7128   enum machine_mode mode = GET_MODE (x);
7129 
7130   REAL_VALUE_TYPE r;
7131 
7132   if (!(X87_FLOAT_MODE_P (mode) && (GET_CODE (x) == CONST_DOUBLE)))
7133     return -1;
7134 
7135   if (x == CONST0_RTX (mode))
7136     return 1;
7137   if (x == CONST1_RTX (mode))
7138     return 2;
7139 
7140   REAL_VALUE_FROM_CONST_DOUBLE (r, x);
7141 
7142   /* For XFmode constants, try to find a special 80387 instruction when
7143      optimizing for size or on those CPUs that benefit from them.  */
7144   if (mode == XFmode
7145       && (optimize_function_for_size_p (cfun) || TARGET_EXT_80387_CONSTANTS))
7146     {
7147       int i;
7148 
7149       if (! ext_80387_constants_init)
7150         init_ext_80387_constants ();
7151 
7152       for (i = 0; i < 5; i++)
7153         if (real_identical (&r, &ext_80387_constants_table[i]))
7154           return i + 3;
7155     }
7156 
7157   /* Load of the constant -0.0 or -1.0 will be split as
7158      fldz;fchs or fld1;fchs sequence.  */
7159   if (real_isnegzero (&r))
7160     return 8;
7161   if (real_identical (&r, &dconstm1))
7162     return 9;
7163 
7164   return 0;
7165 }
7166 
7167 /* Return the opcode of the special instruction to be used to load
7168    the constant X.  */
7169 
7170 const char *
7171 standard_80387_constant_opcode (rtx x)
7172 {
7173   switch (standard_80387_constant_p (x))
7174     {
7175     case 1:
7176       return "fldz";
7177     case 2:
7178       return "fld1";
7179     case 3:
7180       return "fldlg2";
7181     case 4:
7182       return "fldln2";
7183     case 5:
7184       return "fldl2e";
7185     case 6:
7186       return "fldl2t";
7187     case 7:
7188       return "fldpi";
7189     case 8:
7190     case 9:
7191       return "#";
7192     default:
7193       gcc_unreachable ();
7194     }
7195 }
7196 
7197 /* Return the CONST_DOUBLE representing the 80387 constant that is
7198    loaded by the specified special instruction.  The argument IDX
7199    matches the return value from standard_80387_constant_p.  */
7200 
7201 rtx
7202 standard_80387_constant_rtx (int idx)
7203 {
7204   int i;
7205 
7206   if (! ext_80387_constants_init)
7207     init_ext_80387_constants ();
7208 
7209   switch (idx)
7210     {
7211     case 3:
7212     case 4:
7213     case 5:
7214     case 6:
7215     case 7:
7216       i = idx - 3;
7217       break;
7218 
7219     default:
7220       gcc_unreachable ();
7221     }
7222 
7223   return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
7224                                        XFmode);
7225 }
7226 
7227 /* Return 1 if mode is a valid mode for sse.  */
7228 static int
7229 standard_sse_mode_p (enum machine_mode mode)
7230 {
7231   switch (mode)
7232     {
7233     case V16QImode:
7234     case V8HImode:
7235     case V4SImode:
7236     case V2DImode:
7237     case V4SFmode:
7238     case V2DFmode:
7239       return 1;
7240 
7241     default:
7242       return 0;
7243     }
7244 }
7245 
7246 /* Return 1 if X is all 0s.  For all 1s, return 2 if X is in 128bit
7247    SSE modes and SSE2 is enabled,  return 3 if X is in 256bit AVX
7248    modes and AVX is enabled.  */
7249 
7250 int
7251 standard_sse_constant_p (rtx x)
7252 {
7253   enum machine_mode mode = GET_MODE (x);
7254 
7255   if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
7256     return 1;
7257   if (vector_all_ones_operand (x, mode))
7258     {
7259       if (standard_sse_mode_p (mode))
7260         return TARGET_SSE2 ? 2 : -2;
7261       else if (VALID_AVX256_REG_MODE (mode))
7262         return TARGET_AVX ? 3 : -3;
7263     }
7264 
7265   return 0;
7266 }
7267 
7268 /* Return the opcode of the special instruction to be used to load
7269    the constant X.  */
7270 
7271 const char *
7272 standard_sse_constant_opcode (rtx insn, rtx x)
7273 {
7274   switch (standard_sse_constant_p (x))
7275     {
7276     case 1:
7277       switch (get_attr_mode (insn))
7278         {
7279         case MODE_V4SF:
7280           return TARGET_AVX ? "vxorps\t%0, %0, %0" : "xorps\t%0, %0";
7281         case MODE_V2DF:
7282           return TARGET_AVX ? "vxorpd\t%0, %0, %0" : "xorpd\t%0, %0";
7283         case MODE_TI:
7284           return TARGET_AVX ? "vpxor\t%0, %0, %0" : "pxor\t%0, %0";
7285         case MODE_V8SF:
7286           return "vxorps\t%x0, %x0, %x0";
7287         case MODE_V4DF:
7288           return "vxorpd\t%x0, %x0, %x0";
7289         case MODE_OI:
7290           return "vpxor\t%x0, %x0, %x0";
7291         default:
7292           gcc_unreachable ();
7293         }
7294     case 2:
7295       if (TARGET_AVX)
7296         switch (get_attr_mode (insn))
7297           {
7298           case MODE_V4SF:
7299           case MODE_V2DF:
7300           case MODE_TI:
7301             return "vpcmpeqd\t%0, %0, %0";
7302             break;
7303           default:
7304             gcc_unreachable ();
7305         }
7306       else
7307         return "pcmpeqd\t%0, %0";
7308     }
7309   gcc_unreachable ();
7310 }
7311 
7312 /* Returns 1 if OP contains a symbol reference */
7313 
7314 int
7315 symbolic_reference_mentioned_p (rtx op)
7316 {
7317   const char *fmt;
7318   int i;
7319 
7320   if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
7321     return 1;
7322 
7323   fmt = GET_RTX_FORMAT (GET_CODE (op));
7324   for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
7325     {
7326       if (fmt[i] == 'E')
7327         {
7328           int j;
7329 
7330           for (j = XVECLEN (op, i) - 1; j >= 0; j--)
7331             if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
7332               return 1;
7333         }
7334 
7335       else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
7336         return 1;
7337     }
7338 
7339   return 0;
7340 }
7341 
7342 /* Return 1 if it is appropriate to emit `ret' instructions in the
7343    body of a function.  Do this only if the epilogue is simple, needing a
7344    couple of insns.  Prior to reloading, we can't tell how many registers
7345    must be saved, so return 0 then.  Return 0 if there is no frame
7346    marker to de-allocate.  */
7347 
7348 int
7349 ix86_can_use_return_insn_p (void)
7350 {
7351   struct ix86_frame frame;
7352 
7353   if (! reload_completed || frame_pointer_needed)
7354     return 0;
7355 
7356   /* Don't allow more than 32 pop, since that's all we can do
7357      with one instruction.  */
7358   if (crtl->args.pops_args
7359       && crtl->args.size >= 32768)
7360     return 0;
7361 
7362   ix86_compute_frame_layout (&frame);
7363   return frame.to_allocate == 0 && frame.padding05 == 0 &&
7364           frame.nmsave_args == 0 && (frame.nregs + frame.nsseregs) == 0;
7365 }
7366 
7367 /* Value should be nonzero if functions must have frame pointers.
7368    Zero means the frame pointer need not be set up (and parms may
7369    be accessed via the stack pointer) in functions that seem suitable.  */
7370 
7371 int
7372 ix86_frame_pointer_required (void)
7373 {
7374   /* If we accessed previous frames, then the generated code expects
7375      to be able to access the saved ebp value in our frame.  */
7376   if (cfun->machine->accesses_prev_frame)
7377     return 1;
7378 
7379   /* Several x86 os'es need a frame pointer for other reasons,
7380      usually pertaining to setjmp.  */
7381   if (SUBTARGET_FRAME_POINTER_REQUIRED)
7382     return 1;
7383 
7384   if (TARGET_SAVE_ARGS)
7385     return 1;  
7386 
7387   /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
7388      the frame pointer by default.  Turn it back on now if we've not
7389      got a leaf function.  */
7390   if (TARGET_OMIT_LEAF_FRAME_POINTER
7391       && (!current_function_is_leaf
7392           || ix86_current_function_calls_tls_descriptor))
7393     return 1;
7394 
7395   if (crtl->profile)
7396     return 1;
7397 
7398   return 0;
7399 }
7400 
7401 /* Record that the current function accesses previous call frames.  */
7402 
7403 void
7404 ix86_setup_frame_addresses (void)
7405 {
7406   cfun->machine->accesses_prev_frame = 1;
7407 }
7408 
7409 #ifndef USE_HIDDEN_LINKONCE
7410 # if (defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)) || TARGET_MACHO
7411 #  define USE_HIDDEN_LINKONCE 1
7412 # else
7413 #  define USE_HIDDEN_LINKONCE 0
7414 # endif
7415 #endif
7416 
7417 static int pic_labels_used;
7418 
7419 /* Fills in the label name that should be used for a pc thunk for
7420    the given register.  */
7421 
7422 static void
7423 get_pc_thunk_name (char name[32], unsigned int regno)
7424 {
7425   gcc_assert (!TARGET_64BIT);
7426 
7427   if (USE_HIDDEN_LINKONCE)
7428     sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
7429   else
7430     ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
7431 }
7432 
7433 
7434 /* This function generates code for -fpic that loads %ebx with
7435    the return address of the caller and then returns.  */
7436 
7437 void
7438 ix86_file_end (void)
7439 {
7440   rtx xops[2];
7441   int regno;
7442 
7443   for (regno = 0; regno < 8; ++regno)
7444     {
7445       char name[32];
7446 
7447       if (! ((pic_labels_used >> regno) & 1))
7448         continue;
7449 
7450       get_pc_thunk_name (name, regno);
7451 
7452 #if TARGET_MACHO
7453       if (TARGET_MACHO)
7454         {
7455           switch_to_section (darwin_sections[text_coal_section]);
7456           fputs ("\t.weak_definition\t", asm_out_file);
7457           assemble_name (asm_out_file, name);
7458           fputs ("\n\t.private_extern\t", asm_out_file);
7459           assemble_name (asm_out_file, name);
7460           fputs ("\n", asm_out_file);
7461           ASM_OUTPUT_LABEL (asm_out_file, name);
7462         }
7463       else
7464 #endif
7465       if (USE_HIDDEN_LINKONCE)
7466         {
7467           tree decl;
7468 
7469           decl = build_decl (FUNCTION_DECL, get_identifier (name),
7470                              error_mark_node);
7471           TREE_PUBLIC (decl) = 1;
7472           TREE_STATIC (decl) = 1;
7473           DECL_ONE_ONLY (decl) = 1;
7474 
7475           (*targetm.asm_out.unique_section) (decl, 0);
7476           switch_to_section (get_named_section (decl, NULL, 0));
7477 
7478           (*targetm.asm_out.globalize_label) (asm_out_file, name);
7479           fputs ("\t.hidden\t", asm_out_file);
7480           assemble_name (asm_out_file, name);
7481           fputc ('\n', asm_out_file);
7482           ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
7483         }
7484       else
7485         {
7486           switch_to_section (text_section);
7487           ASM_OUTPUT_LABEL (asm_out_file, name);
7488         }
7489 
7490       xops[0] = gen_rtx_REG (Pmode, regno);
7491       xops[1] = gen_rtx_MEM (Pmode, stack_pointer_rtx);
7492       output_asm_insn ("mov%z0\t{%1, %0|%0, %1}", xops);
7493       output_asm_insn ("ret", xops);
7494     }
7495 
7496   if (NEED_INDICATE_EXEC_STACK)
7497     file_end_indicate_exec_stack ();
7498 }
7499 
7500 /* Emit code for the SET_GOT patterns.  */
7501 
7502 const char *
7503 output_set_got (rtx dest, rtx label ATTRIBUTE_UNUSED)
7504 {
7505   rtx xops[3];
7506 
7507   xops[0] = dest;
7508 
7509   if (TARGET_VXWORKS_RTP && flag_pic)
7510     {
7511       /* Load (*VXWORKS_GOTT_BASE) into the PIC register.  */
7512       xops[2] = gen_rtx_MEM (Pmode,
7513                              gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE));
7514       output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
7515 
7516       /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
7517          Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
7518          an unadorned address.  */
7519       xops[2] = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
7520       SYMBOL_REF_FLAGS (xops[2]) |= SYMBOL_FLAG_LOCAL;
7521       output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops);
7522       return "";
7523     }
7524 
7525   xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
7526 
7527   if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
7528     {
7529       xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
7530 
7531       if (!flag_pic)
7532         output_asm_insn ("mov%z0\t{%2, %0|%0, %2}", xops);
7533       else
7534         output_asm_insn ("call\t%a2", xops);
7535 
7536 #if TARGET_MACHO
7537       /* Output the Mach-O "canonical" label name ("Lxx$pb") here too.  This
7538          is what will be referenced by the Mach-O PIC subsystem.  */
7539       if (!label)
7540         ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME);
7541 #endif
7542 
7543       (*targetm.asm_out.internal_label) (asm_out_file, "L",
7544                                  CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
7545 
7546       if (flag_pic)
7547         output_asm_insn ("pop%z0\t%0", xops);
7548     }
7549   else
7550     {
7551       char name[32];
7552       get_pc_thunk_name (name, REGNO (dest));
7553       pic_labels_used |= 1 << REGNO (dest);
7554 
7555       xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
7556       xops[2] = gen_rtx_MEM (QImode, xops[2]);
7557       output_asm_insn ("call\t%X2", xops);
7558       /* Output the Mach-O "canonical" label name ("Lxx$pb") here too.  This
7559          is what will be referenced by the Mach-O PIC subsystem.  */
7560 #if TARGET_MACHO
7561       if (!label)
7562         ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME);
7563       else
7564         targetm.asm_out.internal_label (asm_out_file, "L",
7565                                            CODE_LABEL_NUMBER (label));
7566 #endif
7567     }
7568 
7569   if (TARGET_MACHO)
7570     return "";
7571 
7572   if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
7573     output_asm_insn ("add%z0\t{%1, %0|%0, %1}", xops);
7574   else
7575     output_asm_insn ("add%z0\t{%1+[.-%a2], %0|%0, %1+(.-%a2)}", xops);
7576 
7577   return "";
7578 }
7579 
7580 /* Generate an "push" pattern for input ARG.  */
7581 
7582 static rtx
7583 gen_push (rtx arg)
7584 {
7585   return gen_rtx_SET (VOIDmode,
7586                       gen_rtx_MEM (Pmode,
7587                                    gen_rtx_PRE_DEC (Pmode,
7588                                                     stack_pointer_rtx)),
7589                       arg);
7590 }
7591 
7592 /* Return >= 0 if there is an unused call-clobbered register available
7593    for the entire function.  */
7594 
7595 static unsigned int
7596 ix86_select_alt_pic_regnum (void)
7597 {
7598   if (current_function_is_leaf && !crtl->profile
7599       && !ix86_current_function_calls_tls_descriptor)
7600     {
7601       int i, drap;
7602       /* Can't use the same register for both PIC and DRAP.  */
7603       if (crtl->drap_reg)
7604         drap = REGNO (crtl->drap_reg);
7605       else
7606         drap = -1;
7607       for (i = 2; i >= 0; --i)
7608         if (i != drap && !df_regs_ever_live_p (i))
7609           return i;
7610     }
7611 
7612   return INVALID_REGNUM;
7613 }
7614 
7615 /* Return 1 if we need to save REGNO.  */
7616 static int
7617 ix86_save_reg (unsigned int regno, int maybe_eh_return)
7618 {
7619   if (pic_offset_table_rtx
7620       && regno == REAL_PIC_OFFSET_TABLE_REGNUM
7621       && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
7622           || crtl->profile
7623           || crtl->calls_eh_return
7624           || crtl->uses_const_pool))
7625     {
7626       if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
7627         return 0;
7628       return 1;
7629     }
7630 
7631   if (crtl->calls_eh_return && maybe_eh_return)
7632     {
7633       unsigned i;
7634       for (i = 0; ; i++)
7635         {
7636           unsigned test = EH_RETURN_DATA_REGNO (i);
7637           if (test == INVALID_REGNUM)
7638             break;
7639           if (test == regno)
7640             return 1;
7641         }
7642     }
7643 
7644   if (crtl->drap_reg
7645       && regno == REGNO (crtl->drap_reg))
7646     return 1;
7647 
7648   return (df_regs_ever_live_p (regno)
7649           && !call_used_regs[regno]
7650           && !fixed_regs[regno]
7651           && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
7652 }
7653 
7654 /* Return number of saved general prupose registers.  */
7655 
7656 static int
7657 ix86_nsaved_regs (void)
7658 {
7659   int nregs = 0;
7660   int regno;
7661 
7662   for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
7663     if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
7664       nregs ++;
7665   return nregs;
7666 }
7667 
7668 /* Return number of saved SSE registrers.  */
7669 
7670 static int
7671 ix86_nsaved_sseregs (void)
7672 {
7673   int nregs = 0;
7674   int regno;
7675 
7676   if (ix86_cfun_abi () != MS_ABI)
7677     return 0;
7678   for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
7679     if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
7680       nregs ++;
7681   return nregs;
7682 }
7683 
7684 /* Given FROM and TO register numbers, say whether this elimination is
7685    allowed.  If stack alignment is needed, we can only replace argument
7686    pointer with hard frame pointer, or replace frame pointer with stack
7687    pointer.  Otherwise, frame pointer elimination is automatically
7688    handled and all other eliminations are valid.  */
7689 
7690 int
7691 ix86_can_eliminate (int from, int to)
7692 {
7693   if (stack_realign_fp)
7694     return ((from == ARG_POINTER_REGNUM
7695              && to == HARD_FRAME_POINTER_REGNUM)
7696             || (from == FRAME_POINTER_REGNUM
7697                 && to == STACK_POINTER_REGNUM));
7698   else
7699     return to == STACK_POINTER_REGNUM ? !frame_pointer_needed : 1;
7700 }
7701 
7702 /* Return the offset between two registers, one to be eliminated, and the other
7703    its replacement, at the start of a routine.  */
7704 
7705 HOST_WIDE_INT
7706 ix86_initial_elimination_offset (int from, int to)
7707 {
7708   struct ix86_frame frame;
7709   ix86_compute_frame_layout (&frame);
7710 
7711   if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
7712     return frame.hard_frame_pointer_offset;
7713   else if (from == FRAME_POINTER_REGNUM
7714            && to == HARD_FRAME_POINTER_REGNUM)
7715     return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
7716   else
7717     {
7718       gcc_assert (to == STACK_POINTER_REGNUM);
7719 
7720       if (from == ARG_POINTER_REGNUM)
7721         return frame.stack_pointer_offset;
7722 
7723       gcc_assert (from == FRAME_POINTER_REGNUM);
7724       return frame.stack_pointer_offset - frame.frame_pointer_offset;
7725     }
7726 }
7727 
7728 /* In a dynamically-aligned function, we can't know the offset from
7729    stack pointer to frame pointer, so we must ensure that setjmp
7730    eliminates fp against the hard fp (%ebp) rather than trying to
7731    index from %esp up to the top of the frame across a gap that is
7732    of unknown (at compile-time) size.  */
7733 static rtx
7734 ix86_builtin_setjmp_frame_value (void)
7735 {
7736   return stack_realign_fp ? hard_frame_pointer_rtx : virtual_stack_vars_rtx;
7737 }
7738 
7739 /* Fill structure ix86_frame about frame of currently computed function.  */
7740 
7741 static void
7742 ix86_compute_frame_layout (struct ix86_frame *frame)
7743 {
7744   HOST_WIDE_INT total_size;
7745   unsigned int stack_alignment_needed;
7746   HOST_WIDE_INT offset;
7747   unsigned int preferred_alignment;
7748   HOST_WIDE_INT size = get_frame_size ();
7749 
7750   frame->nregs = ix86_nsaved_regs ();
7751   frame->nsseregs = ix86_nsaved_sseregs ();
7752   frame->nmsave_args = ix86_nsaved_args ();
7753   total_size = size;
7754 
7755   stack_alignment_needed = crtl->stack_alignment_needed / BITS_PER_UNIT;
7756   preferred_alignment = crtl->preferred_stack_boundary / BITS_PER_UNIT;
7757 
7758   /* MS ABI seem to require stack alignment to be always 16 except for function
7759      prologues.  */
7760   if (ix86_cfun_abi () == MS_ABI && preferred_alignment < 16)
7761     {
7762       preferred_alignment = 16;
7763       stack_alignment_needed = 16;
7764       crtl->preferred_stack_boundary = 128;
7765       crtl->stack_alignment_needed = 128;
7766     }
7767 
7768   gcc_assert (!size || stack_alignment_needed);
7769   gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
7770   gcc_assert (preferred_alignment <= stack_alignment_needed);
7771 
7772   /* During reload iteration the amount of registers saved can change.
7773      Recompute the value as needed.  Do not recompute when amount of registers
7774      didn't change as reload does multiple calls to the function and does not
7775      expect the decision to change within single iteration.  */
7776   if (!optimize_function_for_size_p (cfun)
7777       && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
7778     {
7779       int count = frame->nregs;
7780 
7781       cfun->machine->use_fast_prologue_epilogue_nregs = count;
7782       /* The fast prologue uses move instead of push to save registers.  This
7783          is significantly longer, but also executes faster as modern hardware
7784          can execute the moves in parallel, but can't do that for push/pop.
7785 
7786          Be careful about choosing what prologue to emit:  When function takes
7787          many instructions to execute we may use slow version as well as in
7788          case function is known to be outside hot spot (this is known with
7789          feedback only).  Weight the size of function by number of registers
7790          to save as it is cheap to use one or two push instructions but very
7791          slow to use many of them.  */
7792       if (count)
7793         count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
7794       if (cfun->function_frequency < FUNCTION_FREQUENCY_NORMAL
7795           || (flag_branch_probabilities
7796               && cfun->function_frequency < FUNCTION_FREQUENCY_HOT))
7797         cfun->machine->use_fast_prologue_epilogue = false;
7798       else
7799         cfun->machine->use_fast_prologue_epilogue
7800            = !expensive_function_p (count);
7801     }
7802   if (TARGET_PROLOGUE_USING_MOVE
7803       && cfun->machine->use_fast_prologue_epilogue)
7804     frame->save_regs_using_mov = true;
7805   else
7806     frame->save_regs_using_mov = false;
7807 
7808   if (TARGET_SAVE_ARGS)
7809     {
7810       cfun->machine->use_fast_prologue_epilogue = true;
7811       frame->save_regs_using_mov = true;
7812     }
7813 
7814   /* Skip return address and saved base pointer.  */
7815   offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
7816 
7817   frame->hard_frame_pointer_offset = offset;
7818 
7819   /* Set offset to aligned because the realigned frame starts from
7820      here.  */
7821   if (stack_realign_fp)
7822     offset = (offset + stack_alignment_needed -1) & -stack_alignment_needed;
7823 
7824   /* Argument save area */
7825   if (TARGET_SAVE_ARGS)
7826     {
7827       offset += frame->nmsave_args * UNITS_PER_WORD;
7828       frame->padding0 = (frame->nmsave_args % 2) * UNITS_PER_WORD;
7829       offset += frame->padding0;
7830     }
7831   else
7832     frame->padding0 = 0;
7833 
7834   /* Register save area */
7835   offset += frame->nregs * UNITS_PER_WORD;
7836 
7837   /* Align SSE reg save area.  */
7838   if (frame->nsseregs)
7839     frame->padding05 = ((offset + 16 - 1) & -16) - offset;
7840   else
7841     frame->padding05 = 0;
7842   
7843   /* SSE register save area.  */
7844   offset += frame->padding05 + frame->nsseregs * 16;
7845 
7846   /* Va-arg area */
7847   frame->va_arg_size = ix86_varargs_gpr_size + ix86_varargs_fpr_size;
7848   offset += frame->va_arg_size;
7849 
7850   /* Align start of frame for local function.  */
7851   frame->padding1 = ((offset + stack_alignment_needed - 1)
7852                      & -stack_alignment_needed) - offset;
7853 
7854   offset += frame->padding1;
7855 
7856   /* Frame pointer points here.  */
7857   frame->frame_pointer_offset = offset;
7858 
7859   offset += size;
7860 
7861   /* Add outgoing arguments area.  Can be skipped if we eliminated
7862      all the function calls as dead code.
7863      Skipping is however impossible when function calls alloca.  Alloca
7864      expander assumes that last crtl->outgoing_args_size
7865      of stack frame are unused.  */
7866   if (ACCUMULATE_OUTGOING_ARGS
7867       && (!current_function_is_leaf || cfun->calls_alloca
7868           || ix86_current_function_calls_tls_descriptor))
7869     {
7870       offset += crtl->outgoing_args_size;
7871       frame->outgoing_arguments_size = crtl->outgoing_args_size;
7872     }
7873   else
7874     frame->outgoing_arguments_size = 0;
7875 
7876   /* Align stack boundary.  Only needed if we're calling another function
7877      or using alloca.  */
7878   if (!current_function_is_leaf || cfun->calls_alloca
7879       || ix86_current_function_calls_tls_descriptor)
7880     frame->padding2 = ((offset + preferred_alignment - 1)
7881                        & -preferred_alignment) - offset;
7882   else
7883     frame->padding2 = 0;
7884 
7885   offset += frame->padding2;
7886 
7887   /* We've reached end of stack frame.  */
7888   frame->stack_pointer_offset = offset;
7889 
7890   /* Size prologue needs to allocate.  */
7891   frame->to_allocate =
7892     (size + frame->padding1 + frame->padding2
7893      + frame->outgoing_arguments_size + frame->va_arg_size);
7894 
7895   if (!TARGET_SAVE_ARGS
7896       && ((!frame->to_allocate && frame->nregs <= 1)
7897           || (TARGET_64BIT
7898               && frame->to_allocate >= (HOST_WIDE_INT) 0x80000000)))
7899     frame->save_regs_using_mov = false;
7900 
7901   if (!TARGET_64BIT_MS_ABI && TARGET_RED_ZONE
7902       && current_function_sp_is_unchanging
7903       && current_function_is_leaf
7904       && !ix86_current_function_calls_tls_descriptor)
7905     {
7906       frame->red_zone_size = frame->to_allocate;
7907       if (frame->save_regs_using_mov)
7908         {
7909           frame->red_zone_size
7910             += (frame->nregs + frame->nmsave_args) * UNITS_PER_WORD;
7911           frame->red_zone_size += frame->padding0;
7912         }
7913       if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
7914         frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
7915     }
7916   else
7917     frame->red_zone_size = 0;
7918   frame->to_allocate -= frame->red_zone_size;
7919   frame->stack_pointer_offset -= frame->red_zone_size;
7920 #if 0
7921   fprintf (stderr, "\n");
7922   fprintf (stderr, "size: %ld\n", (long)size);
7923   fprintf (stderr, "nregs: %ld\n", (long)frame->nregs);
7924   fprintf (stderr, "nsseregs: %ld\n", (long)frame->nsseregs);
7925   fprintf (stderr, "nmsave_args: %ld\n", (long)frame->nmsave_args);
7926   fprintf (stderr, "padding0: %ld\n", (long)frame->padding0);
7927   fprintf (stderr, "padding05: %ld\n", (long)frame->padding0);
7928   fprintf (stderr, "alignment1: %ld\n", (long)stack_alignment_needed);
7929   fprintf (stderr, "padding1: %ld\n", (long)frame->padding1);
7930   fprintf (stderr, "va_arg: %ld\n", (long)frame->va_arg_size);
7931   fprintf (stderr, "padding2: %ld\n", (long)frame->padding2);
7932   fprintf (stderr, "to_allocate: %ld\n", (long)frame->to_allocate);
7933   fprintf (stderr, "red_zone_size: %ld\n", (long)frame->red_zone_size);
7934   fprintf (stderr, "frame_pointer_offset: %ld\n", (long)frame->frame_pointer_offset);
7935   fprintf (stderr, "hard_frame_pointer_offset: %ld\n",
7936            (long)frame->hard_frame_pointer_offset);
7937   fprintf (stderr, "stack_pointer_offset: %ld\n", (long)frame->stack_pointer_offset);
7938   fprintf (stderr, "current_function_is_leaf: %ld\n", (long)current_function_is_leaf);
7939   fprintf (stderr, "cfun->calls_alloca: %ld\n", (long)cfun->calls_alloca);
7940   fprintf (stderr, "x86_current_function_calls_tls_descriptor: %ld\n", (long)ix86_current_function_calls_tls_descriptor);
7941 #endif
7942 }
7943 
7944 
7945 /* Emit code to save registers in the prologue.  */
7946 
7947 static void
7948 ix86_emit_save_regs (void)
7949 {
7950   unsigned int regno;
7951   rtx insn;
7952 
7953   if (TARGET_SAVE_ARGS)
7954     {
7955       int i;
7956       int nsaved = ix86_nsaved_args ();
7957       int start = cfun->returns_struct;
7958       for (i = start; i < start + nsaved; i++)
7959         {
7960           regno = x86_64_int_parameter_registers[i];
7961           insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
7962           RTX_FRAME_RELATED_P (insn) = 1;
7963         }
7964       if (nsaved % 2 != 0)
7965         pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
7966                                    GEN_INT (-UNITS_PER_WORD), -1);
7967     }
7968 
7969   for (regno = FIRST_PSEUDO_REGISTER - 1; regno-- > 0; )
7970     if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
7971       {
7972         insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
7973         RTX_FRAME_RELATED_P (insn) = 1;
7974       }
7975 }
7976 
7977 /* Emit code to save registers using MOV insns.  First register
7978    is restored from POINTER + OFFSET.  */
7979 static void
7980 ix86_emit_save_regs_using_mov (rtx pointer, HOST_WIDE_INT offset)
7981 {
7982   unsigned int regno;
7983   rtx insn;
7984 
7985   for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
7986     if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
7987       {
7988         insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
7989                                                Pmode, offset),
7990                                gen_rtx_REG (Pmode, regno));
7991         RTX_FRAME_RELATED_P (insn) = 1;
7992         offset += UNITS_PER_WORD;
7993       }
7994 
7995   if (TARGET_SAVE_ARGS)
7996     {
7997       int i;
7998       int nsaved = ix86_nsaved_args ();
7999       int start = cfun->returns_struct;
8000       if (nsaved % 2 != 0)
8001         offset += UNITS_PER_WORD;
8002       for (i = start + nsaved - 1; i >= start; i--)
8003         {
8004           regno = x86_64_int_parameter_registers[i];
8005           insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
8006                                                  Pmode, offset),
8007                                  gen_rtx_REG (Pmode, regno));
8008           RTX_FRAME_RELATED_P (insn) = 1;
8009           offset += UNITS_PER_WORD;
8010         }
8011     }
8012 }
8013 
8014 /* Emit code to save registers using MOV insns.  First register
8015    is restored from POINTER + OFFSET.  */
8016 static void
8017 ix86_emit_save_sse_regs_using_mov (rtx pointer, HOST_WIDE_INT offset)
8018 {
8019   unsigned int regno;
8020   rtx insn;
8021   rtx mem;
8022 
8023   for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
8024     if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
8025       {
8026         mem = adjust_address (gen_rtx_MEM (TImode, pointer), TImode, offset);
8027         set_mem_align (mem, 128);
8028         insn = emit_move_insn (mem, gen_rtx_REG (TImode, regno));
8029         RTX_FRAME_RELATED_P (insn) = 1;
8030         offset += 16;
8031       }
8032 }
8033 
8034 /* Expand prologue or epilogue stack adjustment.
8035    The pattern exist to put a dependency on all ebp-based memory accesses.
8036    STYLE should be negative if instructions should be marked as frame related,
8037    zero if %r11 register is live and cannot be freely used and positive
8038    otherwise.  */
8039 
8040 static void
8041 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset, int style)
8042 {
8043   rtx insn;
8044 
8045   if (! TARGET_64BIT)
8046     insn = emit_insn (gen_pro_epilogue_adjust_stack_1 (dest, src, offset));
8047   else if (x86_64_immediate_operand (offset, DImode))
8048     insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64 (dest, src, offset));
8049   else
8050     {
8051       rtx r11;
8052       /* r11 is used by indirect sibcall return as well, set before the
8053          epilogue and used after the epilogue.  ATM indirect sibcall
8054          shouldn't be used together with huge frame sizes in one
8055          function because of the frame_size check in sibcall.c.  */
8056       gcc_assert (style);
8057       r11 = gen_rtx_REG (DImode, R11_REG);
8058       insn = emit_insn (gen_rtx_SET (DImode, r11, offset));
8059       if (style < 0)
8060         RTX_FRAME_RELATED_P (insn) = 1;
8061       insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64_2 (dest, src, r11,
8062                                                                offset));
8063     }
8064   if (style < 0)
8065     RTX_FRAME_RELATED_P (insn) = 1;
8066 }
8067 
8068 /* Find an available register to be used as dynamic realign argument
8069    pointer regsiter.  Such a register will be written in prologue and
8070    used in begin of body, so it must not be
8071         1. parameter passing register.
8072         2. GOT pointer.
8073    We reuse static-chain register if it is available.  Otherwise, we
8074    use DI for i386 and R13 for x86-64.  We chose R13 since it has
8075    shorter encoding.
8076 
8077    Return: the regno of chosen register.  */
8078 
8079 static unsigned int 
8080 find_drap_reg (void)
8081 {
8082   tree decl = cfun->decl;
8083 
8084   if (TARGET_64BIT)
8085     {
8086       /* Use R13 for nested function or function need static chain.
8087          Since function with tail call may use any caller-saved
8088          registers in epilogue, DRAP must not use caller-saved
8089          register in such case.  */
8090       if ((decl_function_context (decl)
8091            && !DECL_NO_STATIC_CHAIN (decl))
8092           || crtl->tail_call_emit)
8093         return R13_REG;
8094 
8095       return R10_REG;
8096     }
8097   else
8098     {
8099       /* Use DI for nested function or function need static chain.
8100          Since function with tail call may use any caller-saved
8101          registers in epilogue, DRAP must not use caller-saved
8102          register in such case.  */
8103       if ((decl_function_context (decl)
8104            && !DECL_NO_STATIC_CHAIN (decl))
8105           || crtl->tail_call_emit)
8106         return DI_REG;
8107     
8108       /* Reuse static chain register if it isn't used for parameter
8109          passing.  */
8110       if (ix86_function_regparm (TREE_TYPE (decl), decl) <= 2
8111           && !lookup_attribute ("fastcall",
8112                                 TYPE_ATTRIBUTES (TREE_TYPE (decl))))
8113         return CX_REG;
8114       else
8115         return DI_REG;
8116     }
8117 }
8118 
8119 /* Update incoming stack boundary and estimated stack alignment.  */
8120 
8121 static void
8122 ix86_update_stack_boundary (void)
8123 {
8124   /* Prefer the one specified at command line. */
8125   ix86_incoming_stack_boundary 
8126     = (ix86_user_incoming_stack_boundary
8127        ? ix86_user_incoming_stack_boundary
8128        : ix86_default_incoming_stack_boundary);
8129 
8130   /* Incoming stack alignment can be changed on individual functions
8131      via force_align_arg_pointer attribute.  We use the smallest
8132      incoming stack boundary.  */
8133   if (ix86_incoming_stack_boundary > MIN_STACK_BOUNDARY
8134       && lookup_attribute (ix86_force_align_arg_pointer_string,
8135                            TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
8136     ix86_incoming_stack_boundary = MIN_STACK_BOUNDARY;
8137 
8138   /* The incoming stack frame has to be aligned at least at
8139      parm_stack_boundary.  */
8140   if (ix86_incoming_stack_boundary < crtl->parm_stack_boundary)
8141     ix86_incoming_stack_boundary = crtl->parm_stack_boundary;
8142 
8143   /* Stack at entrance of main is aligned by runtime.  We use the
8144      smallest incoming stack boundary. */
8145   if (ix86_incoming_stack_boundary > MAIN_STACK_BOUNDARY
8146       && DECL_NAME (current_function_decl)
8147       && MAIN_NAME_P (DECL_NAME (current_function_decl))
8148       && DECL_FILE_SCOPE_P (current_function_decl))
8149     ix86_incoming_stack_boundary = MAIN_STACK_BOUNDARY;
8150 
8151   /* x86_64 vararg needs 16byte stack alignment for register save
8152      area.  */
8153   if (TARGET_64BIT
8154       && cfun->stdarg
8155       && crtl->stack_alignment_estimated < 128)
8156     crtl->stack_alignment_estimated = 128;
8157 }
8158 
8159 /* Handle the TARGET_GET_DRAP_RTX hook.  Return NULL if no DRAP is
8160    needed or an rtx for DRAP otherwise.  */
8161 
8162 static rtx
8163 ix86_get_drap_rtx (void)
8164 {
8165   if (ix86_force_drap || !ACCUMULATE_OUTGOING_ARGS)
8166     crtl->need_drap = true;
8167 
8168   if (stack_realign_drap)
8169     {
8170       /* Assign DRAP to vDRAP and returns vDRAP */
8171       unsigned int regno = find_drap_reg ();
8172       rtx drap_vreg;
8173       rtx arg_ptr;
8174       rtx seq, insn;
8175 
8176       arg_ptr = gen_rtx_REG (Pmode, regno);
8177       crtl->drap_reg = arg_ptr;
8178 
8179       start_sequence ();
8180       drap_vreg = copy_to_reg (arg_ptr);
8181       seq = get_insns ();
8182       end_sequence ();
8183       
8184       insn = emit_insn_before (seq, NEXT_INSN (entry_of_function ()));
8185       RTX_FRAME_RELATED_P (insn) = 1;
8186       return drap_vreg;
8187     }
8188   else
8189     return NULL;
8190 }
8191 
8192 /* Handle the TARGET_INTERNAL_ARG_POINTER hook.  */
8193 
8194 static rtx
8195 ix86_internal_arg_pointer (void)
8196 {
8197   return virtual_incoming_args_rtx;
8198 }
8199 
8200 /* Handle the TARGET_DWARF_HANDLE_FRAME_UNSPEC hook.
8201    This is called from dwarf2out.c to emit call frame instructions
8202    for frame-related insns containing UNSPECs and UNSPEC_VOLATILEs. */
8203 static void
8204 ix86_dwarf_handle_frame_unspec (const char *label, rtx pattern, int index)
8205 {
8206   rtx unspec = SET_SRC (pattern);
8207   gcc_assert (GET_CODE (unspec) == UNSPEC);
8208 
8209   switch (index)
8210     {
8211     case UNSPEC_REG_SAVE:
8212       dwarf2out_reg_save_reg (label, XVECEXP (unspec, 0, 0),
8213                               SET_DEST (pattern));
8214       break;
8215     case UNSPEC_DEF_CFA:
8216       dwarf2out_def_cfa (label, REGNO (SET_DEST (pattern)),
8217                          INTVAL (XVECEXP (unspec, 0, 0)));
8218       break;
8219     default:
8220       gcc_unreachable ();
8221     }
8222 }
8223 
8224 /* Finalize stack_realign_needed flag, which will guide prologue/epilogue
8225    to be generated in correct form.  */
8226 static void 
8227 ix86_finalize_stack_realign_flags (void)
8228 {
8229   /* Check if stack realign is really needed after reload, and 
8230      stores result in cfun */
8231   unsigned int incoming_stack_boundary
8232     = (crtl->parm_stack_boundary > ix86_incoming_stack_boundary
8233        ? crtl->parm_stack_boundary : ix86_incoming_stack_boundary);
8234   unsigned int stack_realign = (incoming_stack_boundary
8235                                 < (current_function_is_leaf
8236                                    ? crtl->max_used_stack_slot_alignment
8237                                    : crtl->stack_alignment_needed));
8238 
8239   if (crtl->stack_realign_finalized)
8240     {
8241       /* After stack_realign_needed is finalized, we can't no longer
8242          change it.  */
8243       gcc_assert (crtl->stack_realign_needed == stack_realign);
8244     }
8245   else
8246     {
8247       crtl->stack_realign_needed = stack_realign;
8248       crtl->stack_realign_finalized = true;
8249     }
8250 }
8251 
8252 /* Expand the prologue into a bunch of separate insns.  */
8253 
8254 void
8255 ix86_expand_prologue (void)
8256 {
8257   rtx insn;
8258   bool pic_reg_used;
8259   struct ix86_frame frame;
8260   HOST_WIDE_INT allocate;
8261 
8262   ix86_finalize_stack_realign_flags ();
8263 
8264   /* DRAP should not coexist with stack_realign_fp */
8265   gcc_assert (!(crtl->drap_reg && stack_realign_fp));
8266 
8267   ix86_compute_frame_layout (&frame);
8268 
8269   /* Emit prologue code to adjust stack alignment and setup DRAP, in case
8270      of DRAP is needed and stack realignment is really needed after reload */
8271   if (crtl->drap_reg && crtl->stack_realign_needed)
8272     {
8273       rtx x, y;
8274       int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
8275       int param_ptr_offset = (call_used_regs[REGNO (crtl->drap_reg)]
8276                               ? 0 : UNITS_PER_WORD);
8277 
8278       gcc_assert (stack_realign_drap);
8279 
8280       /* Grab the argument pointer.  */
8281       x = plus_constant (stack_pointer_rtx, 
8282                          (UNITS_PER_WORD + param_ptr_offset));
8283       y = crtl->drap_reg;
8284 
8285       /* Only need to push parameter pointer reg if it is caller
8286          saved reg */
8287       if (!call_used_regs[REGNO (crtl->drap_reg)])
8288         {
8289           /* Push arg pointer reg */
8290           insn = emit_insn (gen_push (y));
8291           RTX_FRAME_RELATED_P (insn) = 1;
8292         }
8293 
8294       insn = emit_insn (gen_rtx_SET (VOIDmode, y, x));
8295       RTX_FRAME_RELATED_P (insn) = 1; 
8296 
8297       /* Align the stack.  */
8298       insn = emit_insn ((*ix86_gen_andsp) (stack_pointer_rtx,
8299                                            stack_pointer_rtx,
8300                                            GEN_INT (-align_bytes)));
8301       RTX_FRAME_RELATED_P (insn) = 1;
8302 
8303       /* Replicate the return address on the stack so that return
8304          address can be reached via (argp - 1) slot.  This is needed
8305          to implement macro RETURN_ADDR_RTX and intrinsic function
8306          expand_builtin_return_addr etc.  */
8307       x = crtl->drap_reg;
8308       x = gen_frame_mem (Pmode,
8309                          plus_constant (x, -UNITS_PER_WORD));
8310       insn = emit_insn (gen_push (x));
8311       RTX_FRAME_RELATED_P (insn) = 1;
8312     }
8313 
8314   /* Note: AT&T enter does NOT have reversed args.  Enter is probably
8315      slower on all targets.  Also sdb doesn't like it.  */
8316 
8317   if (frame_pointer_needed)
8318     {
8319       insn = emit_insn (gen_push (hard_frame_pointer_rtx));
8320       RTX_FRAME_RELATED_P (insn) = 1;
8321 
8322       insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
8323       RTX_FRAME_RELATED_P (insn) = 1;
8324     }
8325 
8326   if (stack_realign_fp)
8327     {
8328       int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
8329       gcc_assert (align_bytes > MIN_STACK_BOUNDARY / BITS_PER_UNIT);
8330 
8331       /* Align the stack.  */
8332       insn = emit_insn ((*ix86_gen_andsp) (stack_pointer_rtx,
8333                                            stack_pointer_rtx,
8334                                            GEN_INT (-align_bytes)));
8335       RTX_FRAME_RELATED_P (insn) = 1;
8336     }
8337 
8338   allocate = frame.to_allocate + frame.nsseregs * 16 + frame.padding05;
8339 
8340   if (!frame.save_regs_using_mov)
8341     ix86_emit_save_regs ();
8342   else
8343     allocate += (frame.nregs + frame.nmsave_args) * UNITS_PER_WORD
8344       + frame.padding0;
8345 
8346   /* When using red zone we may start register saving before allocating
8347      the stack frame saving one cycle of the prologue. However I will
8348      avoid doing this if I am going to have to probe the stack since
8349      at least on x86_64 the stack probe can turn into a call that clobbers
8350      a red zone location */
8351   if (!TARGET_64BIT_MS_ABI && TARGET_RED_ZONE && frame.save_regs_using_mov
8352       && (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT))
8353     ix86_emit_save_regs_using_mov ((frame_pointer_needed
8354                                      && !crtl->stack_realign_needed) 
8355                                    ? hard_frame_pointer_rtx
8356                                    : stack_pointer_rtx,
8357                                    -(frame.nregs + frame.nmsave_args)
8358                                    * UNITS_PER_WORD - frame.padding0);
8359 
8360   if (allocate == 0)
8361     ;
8362   else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
8363     pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
8364                                GEN_INT (-allocate), -1);
8365   else
8366     {
8367       rtx eax = gen_rtx_REG (Pmode, AX_REG);
8368       bool eax_live;
8369       rtx t;
8370 
8371       if (cfun->machine->call_abi == MS_ABI)
8372         eax_live = false;
8373       else
8374         eax_live = ix86_eax_live_at_start_p ();
8375 
8376       if (eax_live)
8377         {
8378           emit_insn (gen_push (eax));
8379           allocate -= UNITS_PER_WORD;
8380         }
8381 
8382       emit_move_insn (eax, GEN_INT (allocate));
8383 
8384       if (TARGET_64BIT)
8385         insn = gen_allocate_stack_worker_64 (eax, eax);
8386       else
8387         insn = gen_allocate_stack_worker_32 (eax, eax);
8388       insn = emit_insn (insn);
8389       RTX_FRAME_RELATED_P (insn) = 1;
8390       t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (-allocate));
8391       t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
8392       REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
8393                                             t, REG_NOTES (insn));
8394 
8395       if (eax_live)
8396         {
8397           if (frame_pointer_needed)
8398             t = plus_constant (hard_frame_pointer_rtx,
8399                                allocate
8400                                - frame.to_allocate
8401                                - frame.nregs * UNITS_PER_WORD);
8402           else
8403             t = plus_constant (stack_pointer_rtx, allocate);
8404           emit_move_insn (eax, gen_rtx_MEM (Pmode, t));
8405         }
8406     }
8407 
8408   if (frame.save_regs_using_mov
8409       && !(!TARGET_64BIT_MS_ABI && TARGET_RED_ZONE
8410          && (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)))
8411     {
8412       if (!TARGET_SAVE_ARGS &&
8413           (!frame_pointer_needed
8414            || !(frame.to_allocate + frame.padding05)
8415            || crtl->stack_realign_needed))
8416         ix86_emit_save_regs_using_mov (stack_pointer_rtx,
8417                                        frame.to_allocate
8418                                        + frame.nsseregs * 16 + frame.padding05);
8419       else
8420         /* XXX: Does this need help for SSE? */
8421         ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
8422                                        -(frame.nregs + frame.nmsave_args)
8423                                        * UNITS_PER_WORD - frame.padding0);
8424     }
8425   /* XXX: Does these need help for save-args? */
8426   if (!frame_pointer_needed
8427       || !(frame.to_allocate + frame.padding0)
8428       || crtl->stack_realign_needed)
8429     ix86_emit_save_sse_regs_using_mov (stack_pointer_rtx,
8430                                        frame.to_allocate);
8431   else
8432     ix86_emit_save_sse_regs_using_mov (hard_frame_pointer_rtx,
8433                                        - frame.nregs * UNITS_PER_WORD
8434                                        - frame.nsseregs * 16
8435                                        - frame.padding05);
8436 
8437   pic_reg_used = false;
8438   if (pic_offset_table_rtx
8439       && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
8440           || crtl->profile))
8441     {
8442       unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
8443 
8444       if (alt_pic_reg_used != INVALID_REGNUM)
8445         SET_REGNO (pic_offset_table_rtx, alt_pic_reg_used);
8446 
8447       pic_reg_used = true;
8448     }
8449 
8450   if (pic_reg_used)
8451     {
8452       if (TARGET_64BIT)
8453         {
8454           if (ix86_cmodel == CM_LARGE_PIC)
8455             {
8456               rtx tmp_reg = gen_rtx_REG (DImode, R11_REG);
8457               rtx label = gen_label_rtx ();
8458               emit_label (label);
8459               LABEL_PRESERVE_P (label) = 1;
8460               gcc_assert (REGNO (pic_offset_table_rtx) != REGNO (tmp_reg));
8461               insn = emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx, label));
8462               insn = emit_insn (gen_set_got_offset_rex64 (tmp_reg, label));
8463               insn = emit_insn (gen_adddi3 (pic_offset_table_rtx,
8464                                             pic_offset_table_rtx, tmp_reg));
8465             }
8466           else
8467             insn = emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
8468         }
8469       else
8470         insn = emit_insn (gen_set_got (pic_offset_table_rtx));
8471     }
8472 
8473   /* In the pic_reg_used case, make sure that the got load isn't deleted
8474      when mcount needs it.  Blockage to avoid call movement across mcount
8475      call is emitted in generic code after the NOTE_INSN_PROLOGUE_END
8476      note.  */
8477   if (crtl->profile && pic_reg_used)
8478     emit_insn (gen_prologue_use (pic_offset_table_rtx));
8479 
8480   if (crtl->drap_reg && !crtl->stack_realign_needed)
8481     {
8482       /* vDRAP is setup but after reload it turns out stack realign
8483          isn't necessary, here we will emit prologue to setup DRAP
8484          without stack realign adjustment */
8485       int drap_bp_offset = UNITS_PER_WORD * 2;
8486       rtx x = plus_constant (hard_frame_pointer_rtx, drap_bp_offset);
8487       insn = emit_insn (gen_rtx_SET (VOIDmode, crtl->drap_reg, x));
8488     }
8489 
8490   /* Prevent instructions from being scheduled into register save push
8491      sequence when access to the redzone area is done through frame pointer.
8492      The offset betweeh the frame pointer and the stack pointer is calculated
8493      relative to the value of the stack pointer at the end of the function
8494      prologue, and moving instructions that access redzone area via frame
8495      pointer inside push sequence violates this assumption.  */
8496   if (frame_pointer_needed && frame.red_zone_size)
8497     emit_insn (gen_memory_blockage ());
8498 
8499   /* Emit cld instruction if stringops are used in the function.  */
8500   if (TARGET_CLD && ix86_current_function_needs_cld)
8501     emit_insn (gen_cld ());
8502 }
8503 
8504 /* Emit code to restore saved registers using MOV insns.  First register
8505    is restored from POINTER + OFFSET.  */
8506 static void
8507 ix86_emit_restore_regs_using_mov (rtx pointer, HOST_WIDE_INT offset,
8508                                   int maybe_eh_return)
8509 {
8510   int regno;
8511   rtx base_address = gen_rtx_MEM (Pmode, pointer);
8512 
8513   for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
8514     if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
8515       {
8516         /* Ensure that adjust_address won't be forced to produce pointer
8517            out of range allowed by x86-64 instruction set.  */
8518         if (TARGET_64BIT && offset != trunc_int_for_mode (offset, SImode))
8519           {
8520             rtx r11;
8521 
8522             r11 = gen_rtx_REG (DImode, R11_REG);
8523             emit_move_insn (r11, GEN_INT (offset));
8524             emit_insn (gen_adddi3 (r11, r11, pointer));
8525             base_address = gen_rtx_MEM (Pmode, r11);
8526             offset = 0;
8527           }
8528         emit_move_insn (gen_rtx_REG (Pmode, regno),
8529                         adjust_address (base_address, Pmode, offset));
8530         offset += UNITS_PER_WORD;
8531       }
8532 }
8533 
8534 /* Emit code to restore saved registers using MOV insns.  First register
8535    is restored from POINTER + OFFSET.  */
8536 static void
8537 ix86_emit_restore_sse_regs_using_mov (rtx pointer, HOST_WIDE_INT offset,
8538                                       int maybe_eh_return)
8539 {
8540   int regno;
8541   rtx base_address = gen_rtx_MEM (TImode, pointer);
8542   rtx mem;
8543 
8544   for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
8545     if (SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
8546       {
8547         /* Ensure that adjust_address won't be forced to produce pointer
8548            out of range allowed by x86-64 instruction set.  */
8549         if (TARGET_64BIT && offset != trunc_int_for_mode (offset, SImode))
8550           {
8551             rtx r11;
8552 
8553             r11 = gen_rtx_REG (DImode, R11_REG);
8554             emit_move_insn (r11, GEN_INT (offset));
8555             emit_insn (gen_adddi3 (r11, r11, pointer));
8556             base_address = gen_rtx_MEM (TImode, r11);
8557             offset = 0;
8558           }
8559         mem = adjust_address (base_address, TImode, offset);
8560         set_mem_align (mem, 128);
8561         emit_move_insn (gen_rtx_REG (TImode, regno), mem);
8562         offset += 16;
8563       }
8564 }
8565 
8566 /* Restore function stack, frame, and registers.  */
8567 
8568 void
8569 ix86_expand_epilogue (int style)
8570 {
8571   int regno;
8572   int sp_valid;
8573   struct ix86_frame frame;
8574   HOST_WIDE_INT offset;
8575 
8576   ix86_finalize_stack_realign_flags ();
8577 
8578  /* When stack is realigned, SP must be valid.  */
8579   sp_valid = (!frame_pointer_needed
8580               || current_function_sp_is_unchanging
8581               || stack_realign_fp);
8582 
8583   ix86_compute_frame_layout (&frame);
8584 
8585   /* See the comment about red zone and frame
8586      pointer usage in ix86_expand_prologue.  */
8587   if (frame_pointer_needed && frame.red_zone_size)
8588     emit_insn (gen_memory_blockage ()); 
8589 
8590   /* Calculate start of saved registers relative to ebp.  Special care
8591      must be taken for the normal return case of a function using
8592      eh_return: the eax and edx registers are marked as saved, but not
8593      restored along this path.  */
8594   offset = frame.nregs + frame.nmsave_args;
8595   if (crtl->calls_eh_return && style != 2)
8596     offset -= 2;
8597   offset *= -UNITS_PER_WORD;
8598   offset -= frame.nsseregs * 16 + frame.padding05 + frame.padding0;
8599 
8600   /* If we're only restoring one register and sp is not valid then
8601      using a move instruction to restore the register since it's
8602      less work than reloading sp and popping the register.
8603 
8604      The default code result in stack adjustment using add/lea instruction,
8605      while this code results in LEAVE instruction (or discrete equivalent),
8606      so it is profitable in some other cases as well.  Especially when there
8607      are no registers to restore.  We also use this code when TARGET_USE_LEAVE
8608      and there is exactly one register to pop. This heuristic may need some
8609      tuning in future.  */
8610   if ((!sp_valid && (frame.nregs + frame.nsseregs) <= 1)
8611       || (TARGET_EPILOGUE_USING_MOVE
8612           && cfun->machine->use_fast_prologue_epilogue
8613           && ((frame.nregs + frame.nsseregs) > 1
8614               || (frame.to_allocate + frame.padding0) != 0))
8615       || (frame_pointer_needed && !(frame.nregs + frame.nsseregs)
8616           && (frame.to_allocate + frame.padding0) != 0)
8617       || (frame_pointer_needed && TARGET_USE_LEAVE
8618           && cfun->machine->use_fast_prologue_epilogue
8619           && (frame.nregs + frame.nsseregs) == 1)
8620       || crtl->calls_eh_return)
8621     {
8622       /* Restore registers.  We can use ebp or esp to address the memory
8623          locations.  If both are available, default to ebp, since offsets
8624          are known to be small.  Only exception is esp pointing directly
8625          to the end of block of saved registers, where we may simplify
8626          addressing mode.
8627 
8628          If we are realigning stack with bp and sp, regs restore can't
8629          be addressed by bp. sp must be used instead.  */
8630 
8631       if (!frame_pointer_needed
8632           || (sp_valid && !(frame.to_allocate + frame.padding0))
8633           || stack_realign_fp)
8634         {
8635           ix86_emit_restore_sse_regs_using_mov (stack_pointer_rtx,
8636                                                 frame.to_allocate, style == 2);
8637           ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
8638                                             frame.to_allocate
8639                                             + frame.nsseregs * 16
8640                                             + frame.padding05, style == 2);
8641         }
8642       else
8643         {
8644           ix86_emit_restore_sse_regs_using_mov (hard_frame_pointer_rtx,
8645                                                 offset, style == 2);
8646           ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
8647                                             offset
8648                                             + frame.nsseregs * 16
8649                                             + frame.padding05, style == 2);
8650         }
8651 
8652       /* eh_return epilogues need %ecx added to the stack pointer.  */
8653       if (style == 2)
8654         {
8655           rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
8656 
8657           /* Stack align doesn't work with eh_return.  */
8658           gcc_assert (!crtl->stack_realign_needed);
8659 
8660           if (frame_pointer_needed)
8661             {
8662               tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
8663               tmp = plus_constant (tmp, UNITS_PER_WORD);
8664               emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
8665 
8666               tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
8667               emit_move_insn (hard_frame_pointer_rtx, tmp);
8668 
8669               pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
8670                                          const0_rtx, style);
8671             }
8672           else
8673             {
8674               tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
8675               tmp = plus_constant (tmp, (frame.to_allocate
8676                                          + (frame.nregs + frame.nmsave_args)
8677                                            * UNITS_PER_WORD
8678                                          + frame.nsseregs * 16
8679                                          + frame.padding05 + frame.padding0));
8680               emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
8681             }
8682         }
8683       else if (!frame_pointer_needed)
8684         pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
8685                                    GEN_INT (frame.to_allocate
8686                                             + (frame.nregs + frame.nmsave_args)
8687                                               * UNITS_PER_WORD
8688                                             + frame.nsseregs * 16
8689                                             + frame.padding05 + frame.padding0),
8690                                    style);
8691       /* If not an i386, mov & pop is faster than "leave".  */
8692       else if (TARGET_USE_LEAVE || optimize_function_for_size_p (cfun)
8693                || !cfun->machine->use_fast_prologue_epilogue)
8694         emit_insn ((*ix86_gen_leave) ());
8695       else
8696         {
8697           pro_epilogue_adjust_stack (stack_pointer_rtx,
8698                                      hard_frame_pointer_rtx,
8699                                      const0_rtx, style);
8700 
8701           emit_insn ((*ix86_gen_pop1) (hard_frame_pointer_rtx));
8702         }
8703     }
8704   else
8705     {
8706       /* First step is to deallocate the stack frame so that we can
8707          pop the registers.
8708 
8709          If we realign stack with frame pointer, then stack pointer
8710          won't be able to recover via lea $offset(%bp), %sp, because
8711          there is a padding area between bp and sp for realign. 
8712          "add $to_allocate, %sp" must be used instead.  */
8713       if (!sp_valid)
8714         {
8715           gcc_assert (frame_pointer_needed);
8716           gcc_assert (!stack_realign_fp);
8717           pro_epilogue_adjust_stack (stack_pointer_rtx,
8718                                      hard_frame_pointer_rtx,
8719                                      GEN_INT (offset), style);
8720           ix86_emit_restore_sse_regs_using_mov (stack_pointer_rtx,
8721                                                 0, style == 2);
8722           pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
8723                                      GEN_INT (frame.nsseregs * 16 +
8724                                        frame.padding0), style);
8725         }
8726       else if (frame.to_allocate || frame.padding0 || frame.nsseregs)
8727         {
8728           ix86_emit_restore_sse_regs_using_mov (stack_pointer_rtx,
8729                                                 frame.to_allocate,
8730                                                 style == 2);
8731           pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
8732                                      GEN_INT (frame.to_allocate
8733                                               + frame.nsseregs * 16
8734                                               + frame.padding05), style);
8735         }
8736 
8737       for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
8738         if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, false))
8739           emit_insn ((*ix86_gen_pop1) (gen_rtx_REG (Pmode, regno)));
8740 
8741       /* XXX: Needs adjustment for SSE regs? */
8742       pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
8743                                  GEN_INT (frame.nmsave_args * UNITS_PER_WORD
8744                                           + frame.padding0), style);
8745       if (frame_pointer_needed)
8746         {
8747           /* Leave results in shorter dependency chains on CPUs that are
8748              able to grok it fast.  */
8749           if (TARGET_USE_LEAVE)
8750             emit_insn ((*ix86_gen_leave) ());
8751           else
8752             {
8753               /* For stack realigned really happens, recover stack 
8754                  pointer to hard frame pointer is a must, if not using 
8755                  leave.  */
8756               if (stack_realign_fp)
8757                 pro_epilogue_adjust_stack (stack_pointer_rtx,
8758                                            hard_frame_pointer_rtx,
8759                                            const0_rtx, style);
8760               emit_insn ((*ix86_gen_pop1) (hard_frame_pointer_rtx));
8761             }
8762         }
8763     }
8764 
8765   if (crtl->drap_reg && crtl->stack_realign_needed)
8766     {
8767       int param_ptr_offset = (call_used_regs[REGNO (crtl->drap_reg)]
8768                               ? 0 : UNITS_PER_WORD);
8769       gcc_assert (stack_realign_drap);
8770       emit_insn ((*ix86_gen_add3) (stack_pointer_rtx,
8771                                    crtl->drap_reg,
8772                                    GEN_INT (-(UNITS_PER_WORD
8773                                               + param_ptr_offset))));
8774       if (!call_used_regs[REGNO (crtl->drap_reg)])
8775         emit_insn ((*ix86_gen_pop1) (crtl->drap_reg));
8776       
8777     }
8778 
8779   /* Sibcall epilogues don't want a return instruction.  */
8780   if (style == 0)
8781     return;
8782 
8783   if (crtl->args.pops_args && crtl->args.size)
8784     {
8785       rtx popc = GEN_INT (crtl->args.pops_args);
8786 
8787       /* i386 can only pop 64K bytes.  If asked to pop more, pop
8788          return address, do explicit add, and jump indirectly to the
8789          caller.  */
8790 
8791       if (crtl->args.pops_args >= 65536)
8792         {
8793           rtx ecx = gen_rtx_REG (SImode, CX_REG);
8794 
8795           /* There is no "pascal" calling convention in any 64bit ABI.  */
8796           gcc_assert (!TARGET_64BIT);
8797 
8798           emit_insn (gen_popsi1 (ecx));
8799           emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
8800           emit_jump_insn (gen_return_indirect_internal (ecx));
8801         }
8802       else
8803         emit_jump_insn (gen_return_pop_internal (popc));
8804     }
8805   else
8806     emit_jump_insn (gen_return_internal ());
8807 }
8808 
8809 /* Reset from the function's potential modifications.  */
8810 
8811 static void
8812 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
8813                                HOST_WIDE_INT size ATTRIBUTE_UNUSED)
8814 {
8815   if (pic_offset_table_rtx)
8816     SET_REGNO (pic_offset_table_rtx, REAL_PIC_OFFSET_TABLE_REGNUM);
8817 #if TARGET_MACHO
8818   /* Mach-O doesn't support labels at the end of objects, so if
8819      it looks like we might want one, insert a NOP.  */
8820   {
8821     rtx insn = get_last_insn ();
8822     while (insn
8823            && NOTE_P (insn)
8824            && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL)
8825       insn = PREV_INSN (insn);
8826     if (insn
8827         && (LABEL_P (insn)
8828             || (NOTE_P (insn)
8829                 && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL)))
8830       fputs ("\tnop\n", file);
8831   }
8832 #endif
8833 
8834 }
8835 
8836 /* Extract the parts of an RTL expression that is a valid memory address
8837    for an instruction.  Return 0 if the structure of the address is
8838    grossly off.  Return -1 if the address contains ASHIFT, so it is not
8839    strictly valid, but still used for computing length of lea instruction.  */
8840 
8841 int
8842 ix86_decompose_address (rtx addr, struct ix86_address *out)
8843 {
8844   rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
8845   rtx base_reg, index_reg;
8846   HOST_WIDE_INT scale = 1;
8847   rtx scale_rtx = NULL_RTX;
8848   int retval = 1;
8849   enum ix86_address_seg seg = SEG_DEFAULT;
8850 
8851   if (REG_P (addr) || GET_CODE (addr) == SUBREG)
8852     base = addr;
8853   else if (GET_CODE (addr) == PLUS)
8854     {
8855       rtx addends[4], op;
8856       int n = 0, i;
8857 
8858       op = addr;
8859       do
8860         {
8861           if (n >= 4)
8862             return 0;
8863           addends[n++] = XEXP (op, 1);
8864           op = XEXP (op, 0);
8865         }
8866       while (GET_CODE (op) == PLUS);
8867       if (n >= 4)
8868         return 0;
8869       addends[n] = op;
8870 
8871       for (i = n; i >= 0; --i)
8872         {
8873           op = addends[i];
8874           switch (GET_CODE (op))
8875             {
8876             case MULT:
8877               if (index)
8878                 return 0;
8879               index = XEXP (op, 0);
8880               scale_rtx = XEXP (op, 1);
8881               break;
8882 
8883             case UNSPEC:
8884               if (XINT (op, 1) == UNSPEC_TP
8885                   && TARGET_TLS_DIRECT_SEG_REFS
8886                   && seg == SEG_DEFAULT)
8887                 seg = TARGET_64BIT ? SEG_FS : SEG_GS;
8888               else
8889                 return 0;
8890               break;
8891 
8892             case REG:
8893             case SUBREG:
8894               if (!base)
8895                 base = op;
8896               else if (!index)
8897                 index = op;
8898               else
8899                 return 0;
8900               break;
8901 
8902             case CONST:
8903             case CONST_INT:
8904             case SYMBOL_REF:
8905             case LABEL_REF:
8906               if (disp)
8907                 return 0;
8908               disp = op;
8909               break;
8910 
8911             default:
8912               return 0;
8913             }
8914         }
8915     }
8916   else if (GET_CODE (addr) == MULT)
8917     {
8918       index = XEXP (addr, 0);           /* index*scale */
8919       scale_rtx = XEXP (addr, 1);
8920     }
8921   else if (GET_CODE (addr) == ASHIFT)
8922     {
8923       rtx tmp;
8924 
8925       /* We're called for lea too, which implements ashift on occasion.  */
8926       index = XEXP (addr, 0);
8927       tmp = XEXP (addr, 1);
8928       if (!CONST_INT_P (tmp))
8929         return 0;
8930       scale = INTVAL (tmp);
8931       if ((unsigned HOST_WIDE_INT) scale > 3)
8932         return 0;
8933       scale = 1 << scale;
8934       retval = -1;
8935     }
8936   else
8937     disp = addr;                        /* displacement */
8938 
8939   /* Extract the integral value of scale.  */
8940   if (scale_rtx)
8941     {
8942       if (!CONST_INT_P (scale_rtx))
8943         return 0;
8944       scale = INTVAL (scale_rtx);
8945     }
8946 
8947   base_reg = base && GET_CODE (base) == SUBREG ? SUBREG_REG (base) : base;
8948   index_reg = index && GET_CODE (index) == SUBREG ? SUBREG_REG (index) : index;
8949 
8950   /* Allow arg pointer and stack pointer as index if there is not scaling.  */
8951   if (base_reg && index_reg && scale == 1
8952       && (index_reg == arg_pointer_rtx
8953           || index_reg == frame_pointer_rtx
8954           || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM)))
8955     {
8956       rtx tmp;
8957       tmp = base, base = index, index = tmp;
8958       tmp = base_reg, base_reg = index_reg, index_reg = tmp;
8959     }
8960 
8961   /* Special case: %ebp cannot be encoded as a base without a displacement.  */
8962   if ((base_reg == hard_frame_pointer_rtx
8963        || base_reg == frame_pointer_rtx
8964        || base_reg == arg_pointer_rtx) && !disp)
8965     disp = const0_rtx;
8966 
8967   /* Special case: on K6, [%esi] makes the instruction vector decoded.
8968      Avoid this by transforming to [%esi+0].
8969      Reload calls address legitimization without cfun defined, so we need
8970      to test cfun for being non-NULL. */
8971   if (TARGET_K6 && cfun && optimize_function_for_speed_p (cfun)
8972       && base_reg && !index_reg && !disp
8973       && REG_P (base_reg)
8974       && REGNO_REG_CLASS (REGNO (base_reg)) == SIREG)
8975     disp = const0_rtx;
8976 
8977   /* Special case: encode reg+reg instead of reg*2.  */
8978   if (!base && index && scale && scale == 2)
8979     base = index, base_reg = index_reg, scale = 1;
8980 
8981   /* Special case: scaling cannot be encoded without base or displacement.  */
8982   if (!base && !disp && index && scale != 1)
8983     disp = const0_rtx;
8984 
8985   out->base = base;
8986   out->index = index;
8987   out->disp = disp;
8988   out->scale = scale;
8989   out->seg = seg;
8990 
8991   return retval;
8992 }
8993 
8994 /* Return cost of the memory address x.
8995    For i386, it is better to use a complex address than let gcc copy
8996    the address into a reg and make a new pseudo.  But not if the address
8997    requires to two regs - that would mean more pseudos with longer
8998    lifetimes.  */
8999 static int
9000 ix86_address_cost (rtx x, bool speed ATTRIBUTE_UNUSED)
9001 {
9002   struct ix86_address parts;
9003   int cost = 1;
9004   int ok = ix86_decompose_address (x, &parts);
9005 
9006   gcc_assert (ok);
9007 
9008   if (parts.base && GET_CODE (parts.base) == SUBREG)
9009     parts.base = SUBREG_REG (parts.base);
9010   if (parts.index && GET_CODE (parts.index) == SUBREG)
9011     parts.index = SUBREG_REG (parts.index);
9012 
9013   /* Attempt to minimize number of registers in the address.  */
9014   if ((parts.base
9015        && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
9016       || (parts.index
9017           && (!REG_P (parts.index)
9018               || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
9019     cost++;
9020 
9021   if (parts.base
9022       && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
9023       && parts.index
9024       && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
9025       && parts.base != parts.index)
9026     cost++;
9027 
9028   /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
9029      since it's predecode logic can't detect the length of instructions
9030      and it degenerates to vector decoded.  Increase cost of such
9031      addresses here.  The penalty is minimally 2 cycles.  It may be worthwhile
9032      to split such addresses or even refuse such addresses at all.
9033 
9034      Following addressing modes are affected:
9035       [base+scale*index]
9036       [scale*index+disp]
9037       [base+index]
9038 
9039      The first and last case  may be avoidable by explicitly coding the zero in
9040      memory address, but I don't have AMD-K6 machine handy to check this
9041      theory.  */
9042 
9043   if (TARGET_K6
9044       && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
9045           || (parts.disp && !parts.base && parts.index && parts.scale != 1)
9046           || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
9047     cost += 10;
9048 
9049   return cost;
9050 }
9051 
9052 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
9053    this is used for to form addresses to local data when -fPIC is in
9054    use.  */
9055 
9056 static bool
9057 darwin_local_data_pic (rtx disp)
9058 {
9059   return (GET_CODE (disp) == UNSPEC
9060           && XINT (disp, 1) == UNSPEC_MACHOPIC_OFFSET);
9061 }
9062 
9063 /* Determine if a given RTX is a valid constant.  We already know this
9064    satisfies CONSTANT_P.  */
9065 
9066 bool
9067 legitimate_constant_p (rtx x)
9068 {
9069   switch (GET_CODE (x))
9070     {
9071     case CONST:
9072       x = XEXP (x, 0);
9073 
9074       if (GET_CODE (x) == PLUS)
9075         {
9076           if (!CONST_INT_P (XEXP (x, 1)))
9077             return false;
9078           x = XEXP (x, 0);
9079         }
9080 
9081       if (TARGET_MACHO && darwin_local_data_pic (x))
9082         return true;
9083 
9084       /* Only some unspecs are valid as "constants".  */
9085       if (GET_CODE (x) == UNSPEC)
9086         switch (XINT (x, 1))
9087           {
9088           case UNSPEC_GOT:
9089           case UNSPEC_GOTOFF:
9090           case UNSPEC_PLTOFF:
9091             return TARGET_64BIT;
9092           case UNSPEC_TPOFF:
9093           case UNSPEC_NTPOFF:
9094             x = XVECEXP (x, 0, 0);
9095             return (GET_CODE (x) == SYMBOL_REF
9096                     && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
9097           case UNSPEC_DTPOFF:
9098             x = XVECEXP (x, 0, 0);
9099             return (GET_CODE (x) == SYMBOL_REF
9100                     && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
9101           default:
9102             return false;
9103           }
9104 
9105       /* We must have drilled down to a symbol.  */
9106       if (GET_CODE (x) == LABEL_REF)
9107         return true;
9108       if (GET_CODE (x) != SYMBOL_REF)
9109         return false;
9110       /* FALLTHRU */
9111 
9112     case SYMBOL_REF:
9113       /* TLS symbols are never valid.  */
9114       if (SYMBOL_REF_TLS_MODEL (x))
9115         return false;
9116 
9117       /* DLLIMPORT symbols are never valid.  */
9118       if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
9119           && SYMBOL_REF_DLLIMPORT_P (x))
9120         return false;
9121       break;
9122 
9123     case CONST_DOUBLE:
9124       if (GET_MODE (x) == TImode
9125           && x != CONST0_RTX (TImode)
9126           && !TARGET_64BIT)
9127         return false;
9128       break;
9129 
9130     case CONST_VECTOR:
9131       if (x == CONST0_RTX (GET_MODE (x)))
9132         return true;
9133       return false;
9134 
9135     default:
9136       break;
9137     }
9138 
9139   /* Otherwise we handle everything else in the move patterns.  */
9140   return true;
9141 }
9142 
9143 /* Determine if it's legal to put X into the constant pool.  This
9144    is not possible for the address of thread-local symbols, which
9145    is checked above.  */
9146 
9147 static bool
9148 ix86_cannot_force_const_mem (rtx x)
9149 {
9150   /* We can always put integral constants and vectors in memory.  */
9151   switch (GET_CODE (x))
9152     {
9153     case CONST_INT:
9154     case CONST_DOUBLE:
9155     case CONST_VECTOR:
9156       return false;
9157 
9158     default:
9159       break;
9160     }
9161   return !legitimate_constant_p (x);
9162 }
9163 
9164 /* Determine if a given RTX is a valid constant address.  */
9165 
9166 bool
9167 constant_address_p (rtx x)
9168 {
9169   return CONSTANT_P (x) && legitimate_address_p (Pmode, x, 1);
9170 }
9171 
9172 /* Return number of arguments to be saved on the stack with
9173    -msave-args.  */
9174 
9175 static int
9176 ix86_nsaved_args (void)
9177 {
9178   if (TARGET_SAVE_ARGS)
9179     return crtl->args.info.regno - cfun->returns_struct;
9180   else
9181     return 0;
9182 }
9183 
9184 /* Nonzero if the constant value X is a legitimate general operand
9185    when generating PIC code.  It is given that flag_pic is on and
9186    that X satisfies CONSTANT_P or is a CONST_DOUBLE.  */
9187 bool
9188 legitimate_pic_operand_p (rtx x)
9189 {
9190   rtx inner;
9191 
9192   switch (GET_CODE (x))
9193     {
9194     case CONST:
9195       inner = XEXP (x, 0);
9196       if (GET_CODE (inner) == PLUS
9197           && CONST_INT_P (XEXP (inner, 1)))
9198         inner = XEXP (inner, 0);
9199 
9200       /* Only some unspecs are valid as "constants".  */
9201       if (GET_CODE (inner) == UNSPEC)
9202         switch (XINT (inner, 1))
9203           {
9204           case UNSPEC_GOT:
9205           case UNSPEC_GOTOFF:
9206           case UNSPEC_PLTOFF:
9207             return TARGET_64BIT;
9208           case UNSPEC_TPOFF:
9209             x = XVECEXP (inner, 0, 0);
9210             return (GET_CODE (x) == SYMBOL_REF
9211                     && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
9212           case UNSPEC_MACHOPIC_OFFSET:
9213             return legitimate_pic_address_disp_p (x);
9214           default:
9215             return false;
9216           }
9217       /* FALLTHRU */
9218 
9219     case SYMBOL_REF:
9220     case LABEL_REF:
9221       return legitimate_pic_address_disp_p (x);
9222 
9223     default:
9224       return true;
9225     }
9226 }
9227 
9228 /* Determine if a given CONST RTX is a valid memory displacement
9229    in PIC mode.  */
9230 
9231 int
9232 legitimate_pic_address_disp_p (rtx disp)
9233 {
9234   bool saw_plus;
9235 
9236   /* In 64bit mode we can allow direct addresses of symbols and labels
9237      when they are not dynamic symbols.  */
9238   if (TARGET_64BIT)
9239     {
9240       rtx op0 = disp, op1;
9241 
9242       switch (GET_CODE (disp))
9243         {
9244         case LABEL_REF:
9245           return true;
9246 
9247         case CONST:
9248           if (GET_CODE (XEXP (disp, 0)) != PLUS)
9249             break;
9250           op0 = XEXP (XEXP (disp, 0), 0);
9251           op1 = XEXP (XEXP (disp, 0), 1);
9252           if (!CONST_INT_P (op1)
9253               || INTVAL (op1) >= 16*1024*1024
9254               || INTVAL (op1) < -16*1024*1024)
9255             break;
9256           if (GET_CODE (op0) == LABEL_REF)
9257             return true;
9258           if (GET_CODE (op0) != SYMBOL_REF)
9259             break;
9260           /* FALLTHRU */
9261 
9262         case SYMBOL_REF:
9263           /* TLS references should always be enclosed in UNSPEC.  */
9264           if (SYMBOL_REF_TLS_MODEL (op0))
9265             return false;
9266           if (!SYMBOL_REF_FAR_ADDR_P (op0) && SYMBOL_REF_LOCAL_P (op0)
9267               && ix86_cmodel != CM_LARGE_PIC)
9268             return true;
9269           break;
9270 
9271         default:
9272           break;
9273         }
9274     }
9275   if (GET_CODE (disp) != CONST)
9276     return 0;
9277   disp = XEXP (disp, 0);
9278 
9279   if (TARGET_64BIT)
9280     {
9281       /* We are unsafe to allow PLUS expressions.  This limit allowed distance
9282          of GOT tables.  We should not need these anyway.  */
9283       if (GET_CODE (disp) != UNSPEC
9284           || (XINT (disp, 1) != UNSPEC_GOTPCREL
9285               && XINT (disp, 1) != UNSPEC_GOTOFF
9286               && XINT (disp, 1) != UNSPEC_PLTOFF))
9287         return 0;
9288 
9289       if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
9290           && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
9291         return 0;
9292       return 1;
9293     }
9294 
9295   saw_plus = false;
9296   if (GET_CODE (disp) == PLUS)
9297     {
9298       if (!CONST_INT_P (XEXP (disp, 1)))
9299         return 0;
9300       disp = XEXP (disp, 0);
9301       saw_plus = true;
9302     }
9303 
9304   if (TARGET_MACHO && darwin_local_data_pic (disp))
9305     return 1;
9306 
9307   if (GET_CODE (disp) != UNSPEC)
9308     return 0;
9309 
9310   switch (XINT (disp, 1))
9311     {
9312     case UNSPEC_GOT:
9313       if (saw_plus)
9314         return false;
9315       /* We need to check for both symbols and labels because VxWorks loads
9316          text labels with @GOT rather than @GOTOFF.  See gotoff_operand for
9317          details.  */
9318       return (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
9319               || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF);
9320     case UNSPEC_GOTOFF:
9321       /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
9322          While ABI specify also 32bit relocation but we don't produce it in
9323          small PIC model at all.  */
9324       if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
9325            || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
9326           && !TARGET_64BIT)
9327         return gotoff_operand (XVECEXP (disp, 0, 0), Pmode);
9328       return false;
9329     case UNSPEC_GOTTPOFF:
9330     case UNSPEC_GOTNTPOFF:
9331     case UNSPEC_INDNTPOFF:
9332       if (saw_plus)
9333         return false;
9334       disp = XVECEXP (disp, 0, 0);
9335       return (GET_CODE (disp) == SYMBOL_REF
9336               && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
9337     case UNSPEC_NTPOFF:
9338       disp = XVECEXP (disp, 0, 0);
9339       return (GET_CODE (disp) == SYMBOL_REF
9340               && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
9341     case UNSPEC_DTPOFF:
9342       disp = XVECEXP (disp, 0, 0);
9343       return (GET_CODE (disp) == SYMBOL_REF
9344               && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
9345     }
9346 
9347   return 0;
9348 }
9349 
9350 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
9351    memory address for an instruction.  The MODE argument is the machine mode
9352    for the MEM expression that wants to use this address.
9353 
9354    It only recognizes address in canonical form.  LEGITIMIZE_ADDRESS should
9355    convert common non-canonical forms to canonical form so that they will
9356    be recognized.  */
9357 
9358 int
9359 legitimate_address_p (enum machine_mode mode ATTRIBUTE_UNUSED,
9360                       rtx addr, int strict)
9361 {
9362   struct ix86_address parts;
9363   rtx base, index, disp;
9364   HOST_WIDE_INT scale;
9365   const char *reason = NULL;
9366   rtx reason_rtx = NULL_RTX;
9367 
9368   if (ix86_decompose_address (addr, &parts) <= 0)
9369     {
9370       reason = "decomposition failed";
9371       goto report_error;
9372     }
9373 
9374   base = parts.base;
9375   index = parts.index;
9376   disp = parts.disp;
9377   scale = parts.scale;
9378 
9379   /* Validate base register.
9380 
9381      Don't allow SUBREG's that span more than a word here.  It can lead to spill
9382      failures when the base is one word out of a two word structure, which is
9383      represented internally as a DImode int.  */
9384 
9385   if (base)
9386     {
9387       rtx reg;
9388       reason_rtx = base;
9389 
9390       if (REG_P (base))
9391         reg = base;
9392       else if (GET_CODE (base) == SUBREG
9393                && REG_P (SUBREG_REG (base))
9394                && GET_MODE_SIZE (GET_MODE (SUBREG_REG (base)))
9395                   <= UNITS_PER_WORD)
9396         reg = SUBREG_REG (base);
9397       else
9398         {
9399           reason = "base is not a register";
9400           goto report_error;
9401         }
9402 
9403       if (GET_MODE (base) != Pmode)
9404         {
9405           reason = "base is not in Pmode";
9406           goto report_error;
9407         }
9408 
9409       if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
9410           || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
9411         {
9412           reason = "base is not valid";
9413           goto report_error;
9414         }
9415     }
9416 
9417   /* Validate index register.
9418 
9419      Don't allow SUBREG's that span more than a word here -- same as above.  */
9420 
9421   if (index)
9422     {
9423       rtx reg;
9424       reason_rtx = index;
9425 
9426       if (REG_P (index))
9427         reg = index;
9428       else if (GET_CODE (index) == SUBREG
9429                && REG_P (SUBREG_REG (index))
9430                && GET_MODE_SIZE (GET_MODE (SUBREG_REG (index)))
9431                   <= UNITS_PER_WORD)
9432         reg = SUBREG_REG (index);
9433       else
9434         {
9435           reason = "index is not a register";
9436           goto report_error;
9437         }
9438 
9439       if (GET_MODE (index) != Pmode)
9440         {
9441           reason = "index is not in Pmode";
9442           goto report_error;
9443         }
9444 
9445       if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
9446           || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
9447         {
9448           reason = "index is not valid";
9449           goto report_error;
9450         }
9451     }
9452 
9453   /* Validate scale factor.  */
9454   if (scale != 1)
9455     {
9456       reason_rtx = GEN_INT (scale);
9457       if (!index)
9458         {
9459           reason = "scale without index";
9460           goto report_error;
9461         }
9462 
9463       if (scale != 2 && scale != 4 && scale != 8)
9464         {
9465           reason = "scale is not a valid multiplier";
9466           goto report_error;
9467         }
9468     }
9469 
9470   /* Validate displacement.  */
9471   if (disp)
9472     {
9473       reason_rtx = disp;
9474 
9475       if (GET_CODE (disp) == CONST
9476           && GET_CODE (XEXP (disp, 0)) == UNSPEC
9477           && XINT (XEXP (disp, 0), 1) != UNSPEC_MACHOPIC_OFFSET)
9478         switch (XINT (XEXP (disp, 0), 1))
9479           {
9480           /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
9481              used.  While ABI specify also 32bit relocations, we don't produce
9482              them at all and use IP relative instead.  */
9483           case UNSPEC_GOT:
9484           case UNSPEC_GOTOFF:
9485             gcc_assert (flag_pic);
9486             if (!TARGET_64BIT)
9487               goto is_legitimate_pic;
9488             reason = "64bit address unspec";
9489             goto report_error;
9490 
9491           case UNSPEC_GOTPCREL:
9492             gcc_assert (flag_pic);
9493             goto is_legitimate_pic;
9494 
9495           case UNSPEC_GOTTPOFF:
9496           case UNSPEC_GOTNTPOFF:
9497           case UNSPEC_INDNTPOFF:
9498           case UNSPEC_NTPOFF:
9499           case UNSPEC_DTPOFF:
9500             break;
9501 
9502           default:
9503             reason = "invalid address unspec";
9504             goto report_error;
9505           }
9506 
9507       else if (SYMBOLIC_CONST (disp)
9508                && (flag_pic
9509                    || (TARGET_MACHO
9510 #if TARGET_MACHO
9511                        && MACHOPIC_INDIRECT
9512                        && !machopic_operand_p (disp)
9513 #endif
9514                )))
9515         {
9516 
9517         is_legitimate_pic:
9518           if (TARGET_64BIT && (index || base))
9519             {
9520               /* foo@dtpoff(%rX) is ok.  */
9521               if (GET_CODE (disp) != CONST
9522                   || GET_CODE (XEXP (disp, 0)) != PLUS
9523                   || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
9524                   || !CONST_INT_P (XEXP (XEXP (disp, 0), 1))
9525                   || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
9526                       && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
9527                 {
9528                   reason = "non-constant pic memory reference";
9529                   goto report_error;
9530                 }
9531             }
9532           else if (! legitimate_pic_address_disp_p (disp))
9533             {
9534               reason = "displacement is an invalid pic construct";
9535               goto report_error;
9536             }
9537 
9538           /* This code used to verify that a symbolic pic displacement
9539              includes the pic_offset_table_rtx register.
9540 
9541              While this is good idea, unfortunately these constructs may
9542              be created by "adds using lea" optimization for incorrect
9543              code like:
9544 
9545              int a;
9546              int foo(int i)
9547                {
9548                  return *(&a+i);
9549                }
9550 
9551              This code is nonsensical, but results in addressing
9552              GOT table with pic_offset_table_rtx base.  We can't
9553              just refuse it easily, since it gets matched by
9554              "addsi3" pattern, that later gets split to lea in the
9555              case output register differs from input.  While this
9556              can be handled by separate addsi pattern for this case
9557              that never results in lea, this seems to be easier and
9558              correct fix for crash to disable this test.  */
9559         }
9560       else if (GET_CODE (disp) != LABEL_REF
9561                && !CONST_INT_P (disp)
9562                && (GET_CODE (disp) != CONST
9563                    || !legitimate_constant_p (disp))
9564                && (GET_CODE (disp) != SYMBOL_REF
9565                    || !legitimate_constant_p (disp)))
9566         {
9567           reason = "displacement is not constant";
9568           goto report_error;
9569         }
9570       else if (TARGET_64BIT
9571                && !x86_64_immediate_operand (disp, VOIDmode))
9572         {
9573           reason = "displacement is out of range";
9574           goto report_error;
9575         }
9576     }
9577 
9578   /* Everything looks valid.  */
9579   return TRUE;
9580 
9581  report_error:
9582   return FALSE;
9583 }
9584 
9585 /* Return a unique alias set for the GOT.  */
9586 
9587 static alias_set_type
9588 ix86_GOT_alias_set (void)
9589 {
9590   static alias_set_type set = -1;
9591   if (set == -1)
9592     set = new_alias_set ();
9593   return set;
9594 }
9595 
9596 /* Return a legitimate reference for ORIG (an address) using the
9597    register REG.  If REG is 0, a new pseudo is generated.
9598 
9599    There are two types of references that must be handled:
9600 
9601    1. Global data references must load the address from the GOT, via
9602       the PIC reg.  An insn is emitted to do this load, and the reg is
9603       returned.
9604 
9605    2. Static data references, constant pool addresses, and code labels
9606       compute the address as an offset from the GOT, whose base is in
9607       the PIC reg.  Static data objects have SYMBOL_FLAG_LOCAL set to
9608       differentiate them from global data objects.  The returned
9609       address is the PIC reg + an unspec constant.
9610 
9611    GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
9612    reg also appears in the address.  */
9613 
9614 static rtx
9615 legitimize_pic_address (rtx orig, rtx reg)
9616 {
9617   rtx addr = orig;
9618   rtx new_rtx = orig;
9619   rtx base;
9620 
9621 #if TARGET_MACHO
9622   if (TARGET_MACHO && !TARGET_64BIT)
9623     {
9624       if (reg == 0)
9625         reg = gen_reg_rtx (Pmode);
9626       /* Use the generic Mach-O PIC machinery.  */
9627       return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
9628     }
9629 #endif
9630 
9631   if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
9632     new_rtx = addr;
9633   else if (TARGET_64BIT
9634            && ix86_cmodel != CM_SMALL_PIC
9635            && gotoff_operand (addr, Pmode))
9636     {
9637       rtx tmpreg;
9638       /* This symbol may be referenced via a displacement from the PIC
9639          base address (@GOTOFF).  */
9640 
9641       if (reload_in_progress)
9642         df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
9643       if (GET_CODE (addr) == CONST)
9644         addr = XEXP (addr, 0);
9645       if (GET_CODE (addr) == PLUS)
9646           {
9647             new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
9648                                       UNSPEC_GOTOFF);
9649             new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
9650           }
9651         else
9652           new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
9653       new_rtx = gen_rtx_CONST (Pmode, new_rtx);
9654       if (!reg)
9655         tmpreg = gen_reg_rtx (Pmode);
9656       else
9657         tmpreg = reg;
9658       emit_move_insn (tmpreg, new_rtx);
9659 
9660       if (reg != 0)
9661         {
9662           new_rtx = expand_simple_binop (Pmode, PLUS, reg, pic_offset_table_rtx,
9663                                          tmpreg, 1, OPTAB_DIRECT);
9664           new_rtx = reg;
9665         }
9666       else new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, tmpreg);
9667     }
9668   else if (!TARGET_64BIT && gotoff_operand (addr, Pmode))
9669     {
9670       /* This symbol may be referenced via a displacement from the PIC
9671          base address (@GOTOFF).  */
9672 
9673       if (reload_in_progress)
9674         df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
9675       if (GET_CODE (addr) == CONST)
9676         addr = XEXP (addr, 0);
9677       if (GET_CODE (addr) == PLUS)
9678           {
9679             new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
9680                                       UNSPEC_GOTOFF);
9681             new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
9682           }
9683         else
9684           new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
9685       new_rtx = gen_rtx_CONST (Pmode, new_rtx);
9686       new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
9687 
9688       if (reg != 0)
9689         {
9690           emit_move_insn (reg, new_rtx);
9691           new_rtx = reg;
9692         }
9693     }
9694   else if ((GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0)
9695            /* We can't use @GOTOFF for text labels on VxWorks;
9696               see gotoff_operand.  */
9697            || (TARGET_VXWORKS_RTP && GET_CODE (addr) == LABEL_REF))
9698     {
9699       if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
9700         {
9701           if (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (addr))
9702             return legitimize_dllimport_symbol (addr, true);
9703           if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS
9704               && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
9705               && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (addr, 0), 0)))
9706             {
9707               rtx t = legitimize_dllimport_symbol (XEXP (XEXP (addr, 0), 0), true);
9708               return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
9709             }
9710         }
9711 
9712       if (TARGET_64BIT && ix86_cmodel != CM_LARGE_PIC)
9713         {
9714           new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
9715           new_rtx = gen_rtx_CONST (Pmode, new_rtx);
9716           new_rtx = gen_const_mem (Pmode, new_rtx);
9717           set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
9718 
9719           if (reg == 0)
9720             reg = gen_reg_rtx (Pmode);
9721           /* Use directly gen_movsi, otherwise the address is loaded
9722              into register for CSE.  We don't want to CSE this addresses,
9723              instead we CSE addresses from the GOT table, so skip this.  */
9724           emit_insn (gen_movsi (reg, new_rtx));
9725           new_rtx = reg;
9726         }
9727       else
9728         {
9729           /* This symbol must be referenced via a load from the
9730              Global Offset Table (@GOT).  */
9731 
9732           if (reload_in_progress)
9733             df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
9734           new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
9735           new_rtx = gen_rtx_CONST (Pmode, new_rtx);
9736           if (TARGET_64BIT)
9737             new_rtx = force_reg (Pmode, new_rtx);
9738           new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
9739           new_rtx = gen_const_mem (Pmode, new_rtx);
9740           set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
9741 
9742           if (reg == 0)
9743             reg = gen_reg_rtx (Pmode);
9744           emit_move_insn (reg, new_rtx);
9745           new_rtx = reg;
9746         }
9747     }
9748   else
9749     {
9750       if (CONST_INT_P (addr)
9751           && !x86_64_immediate_operand (addr, VOIDmode))
9752         {
9753           if (reg)
9754             {
9755               emit_move_insn (reg, addr);
9756               new_rtx = reg;
9757             }
9758           else
9759             new_rtx = force_reg (Pmode, addr);
9760         }
9761       else if (GET_CODE (addr) == CONST)
9762         {
9763           addr = XEXP (addr, 0);
9764 
9765           /* We must match stuff we generate before.  Assume the only
9766              unspecs that can get here are ours.  Not that we could do
9767              anything with them anyway....  */
9768           if (GET_CODE (addr) == UNSPEC
9769               || (GET_CODE (addr) == PLUS
9770                   && GET_CODE (XEXP (addr, 0)) == UNSPEC))
9771             return orig;
9772           gcc_assert (GET_CODE (addr) == PLUS);
9773         }
9774       if (GET_CODE (addr) == PLUS)
9775         {
9776           rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
9777 
9778           /* Check first to see if this is a constant offset from a @GOTOFF
9779              symbol reference.  */
9780           if (gotoff_operand (op0, Pmode)
9781               && CONST_INT_P (op1))
9782             {
9783               if (!TARGET_64BIT)
9784                 {
9785                   if (reload_in_progress)
9786                     df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
9787                   new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
9788                                             UNSPEC_GOTOFF);
9789                   new_rtx = gen_rtx_PLUS (Pmode, new_rtx, op1);
9790                   new_rtx = gen_rtx_CONST (Pmode, new_rtx);
9791                   new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
9792 
9793                   if (reg != 0)
9794                     {
9795                       emit_move_insn (reg, new_rtx);
9796                       new_rtx = reg;
9797                     }
9798                 }
9799               else
9800                 {
9801                   if (INTVAL (op1) < -16*1024*1024
9802                       || INTVAL (op1) >= 16*1024*1024)
9803                     {
9804                       if (!x86_64_immediate_operand (op1, Pmode))
9805                         op1 = force_reg (Pmode, op1);
9806                       new_rtx = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
9807                     }
9808                 }
9809             }
9810           else
9811             {
9812               base = legitimize_pic_address (XEXP (addr, 0), reg);
9813               new_rtx  = legitimize_pic_address (XEXP (addr, 1),
9814                                                  base == reg ? NULL_RTX : reg);
9815 
9816               if (CONST_INT_P (new_rtx))
9817                 new_rtx = plus_constant (base, INTVAL (new_rtx));
9818               else
9819                 {
9820                   if (GET_CODE (new_rtx) == PLUS && CONSTANT_P (XEXP (new_rtx, 1)))
9821                     {
9822                       base = gen_rtx_PLUS (Pmode, base, XEXP (new_rtx, 0));
9823                       new_rtx = XEXP (new_rtx, 1);
9824                     }
9825                   new_rtx = gen_rtx_PLUS (Pmode, base, new_rtx);
9826                 }
9827             }
9828         }
9829     }
9830   return new_rtx;
9831 }
9832 
9833 /* Load the thread pointer.  If TO_REG is true, force it into a register.  */
9834 
9835 static rtx
9836 get_thread_pointer (int to_reg)
9837 {
9838   rtx tp, reg, insn;
9839 
9840   tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
9841   if (!to_reg)
9842     return tp;
9843 
9844   reg = gen_reg_rtx (Pmode);
9845   insn = gen_rtx_SET (VOIDmode, reg, tp);
9846   insn = emit_insn (insn);
9847 
9848   return reg;
9849 }
9850 
9851 /* A subroutine of legitimize_address and ix86_expand_move.  FOR_MOV is
9852    false if we expect this to be used for a memory address and true if
9853    we expect to load the address into a register.  */
9854 
9855 static rtx
9856 legitimize_tls_address (rtx x, enum tls_model model, int for_mov)
9857 {
9858   rtx dest, base, off, pic, tp;
9859   int type;
9860 
9861   switch (model)
9862     {
9863     case TLS_MODEL_GLOBAL_DYNAMIC:
9864       dest = gen_reg_rtx (Pmode);
9865       tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
9866 
9867       if (TARGET_64BIT && ! TARGET_GNU2_TLS)
9868         {
9869           rtx rax = gen_rtx_REG (Pmode, AX_REG), insns;
9870 
9871           start_sequence ();
9872           emit_call_insn (gen_tls_global_dynamic_64 (rax, x));
9873           insns = get_insns ();
9874           end_sequence ();
9875 
9876           RTL_CONST_CALL_P (insns) = 1;
9877           emit_libcall_block (insns, dest, rax, x);
9878         }
9879       else if (TARGET_64BIT && TARGET_GNU2_TLS)
9880         emit_insn (gen_tls_global_dynamic_64 (dest, x));
9881       else
9882         emit_insn (gen_tls_global_dynamic_32 (dest, x));
9883 
9884       if (TARGET_GNU2_TLS)
9885         {
9886           dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest));
9887 
9888           set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
9889         }
9890       break;
9891 
9892     case TLS_MODEL_LOCAL_DYNAMIC:
9893       base = gen_reg_rtx (Pmode);
9894       tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
9895 
9896       if (TARGET_64BIT && ! TARGET_GNU2_TLS)
9897         {
9898           rtx rax = gen_rtx_REG (Pmode, AX_REG), insns, note;
9899 
9900           start_sequence ();
9901           emit_call_insn (gen_tls_local_dynamic_base_64 (rax));
9902           insns = get_insns ();
9903           end_sequence ();
9904 
9905           note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL);
9906           note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note);
9907           RTL_CONST_CALL_P (insns) = 1;
9908           emit_libcall_block (insns, base, rax, note);
9909         }
9910       else if (TARGET_64BIT && TARGET_GNU2_TLS)
9911         emit_insn (gen_tls_local_dynamic_base_64 (base));
9912       else
9913         emit_insn (gen_tls_local_dynamic_base_32 (base));
9914 
9915       if (TARGET_GNU2_TLS)
9916         {
9917           rtx x = ix86_tls_module_base ();
9918 
9919           set_unique_reg_note (get_last_insn (), REG_EQUIV,
9920                                gen_rtx_MINUS (Pmode, x, tp));
9921         }
9922 
9923       off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
9924       off = gen_rtx_CONST (Pmode, off);
9925 
9926       dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
9927 
9928       if (TARGET_GNU2_TLS)
9929         {
9930           dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp));
9931 
9932           set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
9933         }
9934 
9935       break;
9936 
9937     case TLS_MODEL_INITIAL_EXEC:
9938       if (TARGET_64BIT)
9939         {
9940           pic = NULL;
9941           type = UNSPEC_GOTNTPOFF;
9942         }
9943       else if (flag_pic)
9944         {
9945           if (reload_in_progress)
9946             df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
9947           pic = pic_offset_table_rtx;
9948           type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
9949         }
9950       else if (!TARGET_ANY_GNU_TLS)
9951         {
9952           pic = gen_reg_rtx (Pmode);
9953           emit_insn (gen_set_got (pic));
9954           type = UNSPEC_GOTTPOFF;
9955         }
9956       else
9957         {
9958           pic = NULL;
9959           type = UNSPEC_INDNTPOFF;
9960         }
9961 
9962       off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
9963       off = gen_rtx_CONST (Pmode, off);
9964       if (pic)
9965         off = gen_rtx_PLUS (Pmode, pic, off);
9966       off = gen_const_mem (Pmode, off);
9967       set_mem_alias_set (off, ix86_GOT_alias_set ());
9968 
9969       if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
9970         {
9971           base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
9972           off = force_reg (Pmode, off);
9973           return gen_rtx_PLUS (Pmode, base, off);
9974         }
9975       else
9976         {
9977           base = get_thread_pointer (true);
9978           dest = gen_reg_rtx (Pmode);
9979           emit_insn (gen_subsi3 (dest, base, off));
9980         }
9981       break;
9982 
9983     case TLS_MODEL_LOCAL_EXEC:
9984       off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
9985                             (TARGET_64BIT || TARGET_ANY_GNU_TLS)
9986                             ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
9987       off = gen_rtx_CONST (Pmode, off);
9988 
9989       if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
9990         {
9991           base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
9992           return gen_rtx_PLUS (Pmode, base, off);
9993         }
9994       else
9995         {
9996           base = get_thread_pointer (true);
9997           dest = gen_reg_rtx (Pmode);
9998           emit_insn (gen_subsi3 (dest, base, off));
9999         }
10000       break;
10001 
10002     default:
10003       gcc_unreachable ();
10004     }
10005 
10006   return dest;
10007 }
10008 
10009 /* Create or return the unique __imp_DECL dllimport symbol corresponding
10010    to symbol DECL.  */
10011 
10012 static GTY((if_marked ("tree_map_marked_p"), param_is (struct tree_map)))
10013   htab_t dllimport_map;
10014 
10015 static tree
10016 get_dllimport_decl (tree decl)
10017 {
10018   struct tree_map *h, in;
10019   void **loc;
10020   const char *name;
10021   const char *prefix;
10022   size_t namelen, prefixlen;
10023   char *imp_name;
10024   tree to;
10025   rtx rtl;
10026 
10027   if (!dllimport_map)
10028     dllimport_map = htab_create_ggc (512, tree_map_hash, tree_map_eq, 0);
10029 
10030   in.hash = htab_hash_pointer (decl);
10031   in.base.from = decl;
10032   loc = htab_find_slot_with_hash (dllimport_map, &in, in.hash, INSERT);
10033   h = (struct tree_map *) *loc;
10034   if (h)
10035     return h->to;
10036 
10037   *loc = h = GGC_NEW (struct tree_map);
10038   h->hash = in.hash;
10039   h->base.from = decl;
10040   h->to = to = build_decl (VAR_DECL, NULL, ptr_type_node);
10041   DECL_ARTIFICIAL (to) = 1;
10042   DECL_IGNORED_P (to) = 1;
10043   DECL_EXTERNAL (to) = 1;
10044   TREE_READONLY (to) = 1;
10045 
10046   name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
10047   name = targetm.strip_name_encoding (name);
10048   prefix = name[0] == FASTCALL_PREFIX || user_label_prefix[0] == 0
10049     ? "*__imp_" : "*__imp__";
10050   namelen = strlen (name);
10051   prefixlen = strlen (prefix);
10052   imp_name = (char *) alloca (namelen + prefixlen + 1);
10053   memcpy (imp_name, prefix, prefixlen);
10054   memcpy (imp_name + prefixlen, name, namelen + 1);
10055 
10056   name = ggc_alloc_string (imp_name, namelen + prefixlen);
10057   rtl = gen_rtx_SYMBOL_REF (Pmode, name);
10058   SET_SYMBOL_REF_DECL (rtl, to);
10059   SYMBOL_REF_FLAGS (rtl) = SYMBOL_FLAG_LOCAL;
10060 
10061   rtl = gen_const_mem (Pmode, rtl);
10062   set_mem_alias_set (rtl, ix86_GOT_alias_set ());
10063 
10064   SET_DECL_RTL (to, rtl);
10065   SET_DECL_ASSEMBLER_NAME (to, get_identifier (name));
10066 
10067   return to;
10068 }
10069 
10070 /* Expand SYMBOL into its corresponding dllimport symbol.  WANT_REG is
10071    true if we require the result be a register.  */
10072 
10073 static rtx
10074 legitimize_dllimport_symbol (rtx symbol, bool want_reg)
10075 {
10076   tree imp_decl;
10077   rtx x;
10078 
10079   gcc_assert (SYMBOL_REF_DECL (symbol));
10080   imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol));
10081 
10082   x = DECL_RTL (imp_decl);
10083   if (want_reg)
10084     x = force_reg (Pmode, x);
10085   return x;
10086 }
10087 
10088 /* Try machine-dependent ways of modifying an illegitimate address
10089    to be legitimate.  If we find one, return the new, valid address.
10090    This macro is used in only one place: `memory_address' in explow.c.
10091 
10092    OLDX is the address as it was before break_out_memory_refs was called.
10093    In some cases it is useful to look at this to decide what needs to be done.
10094 
10095    MODE and WIN are passed so that this macro can use
10096    GO_IF_LEGITIMATE_ADDRESS.
10097 
10098    It is always safe for this macro to do nothing.  It exists to recognize
10099    opportunities to optimize the output.
10100 
10101    For the 80386, we handle X+REG by loading X into a register R and
10102    using R+REG.  R will go in a general reg and indexing will be used.
10103    However, if REG is a broken-out memory address or multiplication,
10104    nothing needs to be done because REG can certainly go in a general reg.
10105 
10106    When -fpic is used, special handling is needed for symbolic references.
10107    See comments by legitimize_pic_address in i386.c for details.  */
10108 
10109 rtx
10110 legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED, enum machine_mode mode)
10111 {
10112   int changed = 0;
10113   unsigned log;
10114 
10115   log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
10116   if (log)
10117     return legitimize_tls_address (x, (enum tls_model) log, false);
10118   if (GET_CODE (x) == CONST
10119       && GET_CODE (XEXP (x, 0)) == PLUS
10120       && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
10121       && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
10122     {
10123       rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0),
10124                                       (enum tls_model) log, false);
10125       return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
10126     }
10127 
10128   if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
10129     {
10130       if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (x))
10131         return legitimize_dllimport_symbol (x, true);
10132       if (GET_CODE (x) == CONST
10133           && GET_CODE (XEXP (x, 0)) == PLUS
10134           && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
10135           && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (x, 0), 0)))
10136         {
10137           rtx t = legitimize_dllimport_symbol (XEXP (XEXP (x, 0), 0), true);
10138           return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
10139         }
10140     }
10141 
10142   if (flag_pic && SYMBOLIC_CONST (x))
10143     return legitimize_pic_address (x, 0);
10144 
10145   /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
10146   if (GET_CODE (x) == ASHIFT
10147       && CONST_INT_P (XEXP (x, 1))
10148       && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
10149     {
10150       changed = 1;
10151       log = INTVAL (XEXP (x, 1));
10152       x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
10153                         GEN_INT (1 << log));
10154     }
10155 
10156   if (GET_CODE (x) == PLUS)
10157     {
10158       /* Canonicalize shifts by 0, 1, 2, 3 into multiply.  */
10159 
10160       if (GET_CODE (XEXP (x, 0)) == ASHIFT
10161           && CONST_INT_P (XEXP (XEXP (x, 0), 1))
10162           && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
10163         {
10164           changed = 1;
10165           log = INTVAL (XEXP (XEXP (x, 0), 1));
10166           XEXP (x, 0) = gen_rtx_MULT (Pmode,
10167                                       force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
10168                                       GEN_INT (1 << log));
10169         }
10170 
10171       if (GET_CODE (XEXP (x, 1)) == ASHIFT
10172           && CONST_INT_P (XEXP (XEXP (x, 1), 1))
10173           && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
10174         {
10175           changed = 1;
10176           log = INTVAL (XEXP (XEXP (x, 1), 1));
10177           XEXP (x, 1) = gen_rtx_MULT (Pmode,
10178                                       force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
10179                                       GEN_INT (1 << log));
10180         }
10181 
10182       /* Put multiply first if it isn't already.  */
10183       if (GET_CODE (XEXP (x, 1)) == MULT)
10184         {
10185           rtx tmp = XEXP (x, 0);
10186           XEXP (x, 0) = XEXP (x, 1);
10187           XEXP (x, 1) = tmp;
10188           changed = 1;
10189         }
10190 
10191       /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
10192          into (plus (plus (mult (reg) (const)) (reg)) (const)).  This can be
10193          created by virtual register instantiation, register elimination, and
10194          similar optimizations.  */
10195       if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
10196         {
10197           changed = 1;
10198           x = gen_rtx_PLUS (Pmode,
10199                             gen_rtx_PLUS (Pmode, XEXP (x, 0),
10200                                           XEXP (XEXP (x, 1), 0)),
10201                             XEXP (XEXP (x, 1), 1));
10202         }
10203 
10204       /* Canonicalize
10205          (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
10206          into (plus (plus (mult (reg) (const)) (reg)) (const)).  */
10207       else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
10208                && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
10209                && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
10210                && CONSTANT_P (XEXP (x, 1)))
10211         {
10212           rtx constant;
10213           rtx other = NULL_RTX;
10214 
10215           if (CONST_INT_P (XEXP (x, 1)))
10216             {
10217               constant = XEXP (x, 1);
10218               other = XEXP (XEXP (XEXP (x, 0), 1), 1);
10219             }
10220           else if (CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 1), 1)))
10221             {
10222               constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
10223               other = XEXP (x, 1);
10224             }
10225           else
10226             constant = 0;
10227 
10228           if (constant)
10229             {
10230               changed = 1;
10231               x = gen_rtx_PLUS (Pmode,
10232                                 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
10233                                               XEXP (XEXP (XEXP (x, 0), 1), 0)),
10234                                 plus_constant (other, INTVAL (constant)));
10235             }
10236         }
10237 
10238       if (changed && legitimate_address_p (mode, x, FALSE))
10239         return x;
10240 
10241       if (GET_CODE (XEXP (x, 0)) == MULT)
10242         {
10243           changed = 1;
10244           XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
10245         }
10246 
10247       if (GET_CODE (XEXP (x, 1)) == MULT)
10248         {
10249           changed = 1;
10250           XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
10251         }
10252 
10253       if (changed
10254           && REG_P (XEXP (x, 1))
10255           && REG_P (XEXP (x, 0)))
10256         return x;
10257 
10258       if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
10259         {
10260           changed = 1;
10261           x = legitimize_pic_address (x, 0);
10262         }
10263 
10264       if (changed && legitimate_address_p (mode, x, FALSE))
10265         return x;
10266 
10267       if (REG_P (XEXP (x, 0)))
10268         {
10269           rtx temp = gen_reg_rtx (Pmode);
10270           rtx val  = force_operand (XEXP (x, 1), temp);
10271           if (val != temp)
10272             emit_move_insn (temp, val);
10273 
10274           XEXP (x, 1) = temp;
10275           return x;
10276         }
10277 
10278       else if (REG_P (XEXP (x, 1)))
10279         {
10280           rtx temp = gen_reg_rtx (Pmode);
10281           rtx val  = force_operand (XEXP (x, 0), temp);
10282           if (val != temp)
10283             emit_move_insn (temp, val);
10284 
10285           XEXP (x, 0) = temp;
10286           return x;
10287         }
10288     }
10289 
10290   return x;
10291 }
10292 
10293 /* Print an integer constant expression in assembler syntax.  Addition
10294    and subtraction are the only arithmetic that may appear in these
10295    expressions.  FILE is the stdio stream to write to, X is the rtx, and
10296    CODE is the operand print code from the output string.  */
10297 
10298 static void
10299 output_pic_addr_const (FILE *file, rtx x, int code)
10300 {
10301   char buf[256];
10302 
10303   switch (GET_CODE (x))
10304     {
10305     case PC:
10306       gcc_assert (flag_pic);
10307       putc ('.', file);
10308       break;
10309 
10310     case SYMBOL_REF:
10311       if (! TARGET_MACHO || TARGET_64BIT)
10312         output_addr_const (file, x);
10313       else
10314         {
10315           const char *name = XSTR (x, 0);
10316 
10317           /* Mark the decl as referenced so that cgraph will
10318              output the function.  */
10319           if (SYMBOL_REF_DECL (x))
10320             mark_decl_referenced (SYMBOL_REF_DECL (x));
10321 
10322 #if TARGET_MACHO
10323           if (MACHOPIC_INDIRECT
10324               && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
10325             name = machopic_indirection_name (x, /*stub_p=*/true);
10326 #endif
10327           assemble_name (file, name);
10328         }
10329       if (!TARGET_MACHO && !(TARGET_64BIT && DEFAULT_ABI == MS_ABI)
10330           && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
10331         fputs ("@PLT", file);
10332       break;
10333 
10334     case LABEL_REF:
10335       x = XEXP (x, 0);
10336       /* FALLTHRU */
10337     case CODE_LABEL:
10338       ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
10339       assemble_name (asm_out_file, buf);
10340       break;
10341 
10342     case CONST_INT:
10343       fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
10344       break;
10345 
10346     case CONST:
10347       /* This used to output parentheses around the expression,
10348          but that does not work on the 386 (either ATT or BSD assembler).  */
10349       output_pic_addr_const (file, XEXP (x, 0), code);
10350       break;
10351 
10352     case CONST_DOUBLE:
10353       if (GET_MODE (x) == VOIDmode)
10354         {
10355           /* We can use %d if the number is <32 bits and positive.  */
10356           if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
10357             fprintf (file, "0x%lx%08lx",
10358                      (unsigned long) CONST_DOUBLE_HIGH (x),
10359                      (unsigned long) CONST_DOUBLE_LOW (x));
10360           else
10361             fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
10362         }
10363       else
10364         /* We can't handle floating point constants;
10365            PRINT_OPERAND must handle them.  */
10366         output_operand_lossage ("floating constant misused");
10367       break;
10368 
10369     case PLUS:
10370       /* Some assemblers need integer constants to appear first.  */
10371       if (CONST_INT_P (XEXP (x, 0)))
10372         {
10373           output_pic_addr_const (file, XEXP (x, 0), code);
10374           putc ('+', file);
10375           output_pic_addr_const (file, XEXP (x, 1), code);
10376         }
10377       else
10378         {
10379           gcc_assert (CONST_INT_P (XEXP (x, 1)));
10380           output_pic_addr_const (file, XEXP (x, 1), code);
10381           putc ('+', file);
10382           output_pic_addr_const (file, XEXP (x, 0), code);
10383         }
10384       break;
10385 
10386     case MINUS:
10387       if (!TARGET_MACHO)
10388         putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
10389       output_pic_addr_const (file, XEXP (x, 0), code);
10390       putc ('-', file);
10391       output_pic_addr_const (file, XEXP (x, 1), code);
10392       if (!TARGET_MACHO)
10393         putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
10394       break;
10395 
10396      case UNSPEC:
10397        gcc_assert (XVECLEN (x, 0) == 1);
10398        output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
10399        switch (XINT (x, 1))
10400         {
10401         case UNSPEC_GOT:
10402           fputs ("@GOT", file);
10403           break;
10404         case UNSPEC_GOTOFF:
10405           fputs ("@GOTOFF", file);
10406           break;
10407         case UNSPEC_PLTOFF:
10408           fputs ("@PLTOFF", file);
10409           break;
10410         case UNSPEC_GOTPCREL:
10411           fputs (ASSEMBLER_DIALECT == ASM_ATT ?
10412                  "@GOTPCREL(%rip)" : "@GOTPCREL[rip]", file);
10413           break;
10414         case UNSPEC_GOTTPOFF:
10415           /* FIXME: This might be @TPOFF in Sun ld too.  */
10416           fputs ("@GOTTPOFF", file);
10417           break;
10418         case UNSPEC_TPOFF:
10419           fputs ("@TPOFF", file);
10420           break;
10421         case UNSPEC_NTPOFF:
10422           if (TARGET_64BIT)
10423             fputs ("@TPOFF", file);
10424           else
10425             fputs ("@NTPOFF", file);
10426           break;
10427         case UNSPEC_DTPOFF:
10428           fputs ("@DTPOFF", file);
10429           break;
10430         case UNSPEC_GOTNTPOFF:
10431           if (TARGET_64BIT)
10432             fputs (ASSEMBLER_DIALECT == ASM_ATT ?
10433                    "@GOTTPOFF(%rip)": "@GOTTPOFF[rip]", file);
10434           else
10435             fputs ("@GOTNTPOFF", file);
10436           break;
10437         case UNSPEC_INDNTPOFF:
10438           fputs ("@INDNTPOFF", file);
10439           break;
10440 #if TARGET_MACHO
10441         case UNSPEC_MACHOPIC_OFFSET:
10442           putc ('-', file);
10443           machopic_output_function_base_name (file);
10444           break;
10445 #endif
10446         default:
10447           output_operand_lossage ("invalid UNSPEC as operand");
10448           break;
10449         }
10450        break;
10451 
10452     default:
10453       output_operand_lossage ("invalid expression as operand");
10454     }
10455 }
10456 
10457 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
10458    We need to emit DTP-relative relocations.  */
10459 
10460 static void ATTRIBUTE_UNUSED
10461 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
10462 {
10463   fputs (ASM_LONG, file);
10464   output_addr_const (file, x);
10465   fputs ("@DTPOFF", file);
10466   switch (size)
10467     {
10468     case 4:
10469       break;
10470     case 8:
10471       fputs (", 0", file);
10472       break;
10473     default:
10474       gcc_unreachable ();
10475    }
10476 }
10477 
10478 /* Return true if X is a representation of the PIC register.  This copes
10479    with calls from ix86_find_base_term, where the register might have
10480    been replaced by a cselib value.  */
10481 
10482 static bool
10483 ix86_pic_register_p (rtx x)
10484 {
10485   if (GET_CODE (x) == VALUE && CSELIB_VAL_PTR (x))
10486     return (pic_offset_table_rtx
10487             && rtx_equal_for_cselib_p (x, pic_offset_table_rtx));
10488   else
10489     return REG_P (x) && REGNO (x) == PIC_OFFSET_TABLE_REGNUM;
10490 }
10491 
10492 /* In the name of slightly smaller debug output, and to cater to
10493    general assembler lossage, recognize PIC+GOTOFF and turn it back
10494    into a direct symbol reference.
10495 
10496    On Darwin, this is necessary to avoid a crash, because Darwin
10497    has a different PIC label for each routine but the DWARF debugging
10498    information is not associated with any particular routine, so it's
10499    necessary to remove references to the PIC label from RTL stored by
10500    the DWARF output code.  */
10501 
10502 static rtx
10503 ix86_delegitimize_address (rtx orig_x)
10504 {
10505   rtx x = orig_x;
10506   /* reg_addend is NULL or a multiple of some register.  */
10507   rtx reg_addend = NULL_RTX;
10508   /* const_addend is NULL or a const_int.  */
10509   rtx const_addend = NULL_RTX;
10510   /* This is the result, or NULL.  */
10511   rtx result = NULL_RTX;
10512 
10513   if (MEM_P (x))
10514     x = XEXP (x, 0);
10515 
10516   if (TARGET_64BIT)
10517     {
10518       if (GET_CODE (x) != CONST
10519           || GET_CODE (XEXP (x, 0)) != UNSPEC
10520           || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
10521           || !MEM_P (orig_x))
10522         return orig_x;
10523       return XVECEXP (XEXP (x, 0), 0, 0);
10524     }
10525 
10526   if (GET_CODE (x) != PLUS
10527       || GET_CODE (XEXP (x, 1)) != CONST)
10528     return orig_x;
10529 
10530   if (ix86_pic_register_p (XEXP (x, 0)))
10531     /* %ebx + GOT/GOTOFF */
10532     ;
10533   else if (GET_CODE (XEXP (x, 0)) == PLUS)
10534     {
10535       /* %ebx + %reg * scale + GOT/GOTOFF */
10536       reg_addend = XEXP (x, 0);
10537       if (ix86_pic_register_p (XEXP (reg_addend, 0)))
10538         reg_addend = XEXP (reg_addend, 1);
10539       else if (ix86_pic_register_p (XEXP (reg_addend, 1)))
10540         reg_addend = XEXP (reg_addend, 0);
10541       else
10542         return orig_x;
10543       if (!REG_P (reg_addend)
10544           && GET_CODE (reg_addend) != MULT
10545           && GET_CODE (reg_addend) != ASHIFT)
10546         return orig_x;
10547     }
10548   else
10549     return orig_x;
10550 
10551   x = XEXP (XEXP (x, 1), 0);
10552   if (GET_CODE (x) == PLUS
10553       && CONST_INT_P (XEXP (x, 1)))
10554     {
10555       const_addend = XEXP (x, 1);
10556       x = XEXP (x, 0);
10557     }
10558 
10559   if (GET_CODE (x) == UNSPEC
10560       && ((XINT (x, 1) == UNSPEC_GOT && MEM_P (orig_x))
10561           || (XINT (x, 1) == UNSPEC_GOTOFF && !MEM_P (orig_x))))
10562     result = XVECEXP (x, 0, 0);
10563 
10564   if (TARGET_MACHO && darwin_local_data_pic (x)
10565       && !MEM_P (orig_x))
10566     result = XVECEXP (x, 0, 0);
10567 
10568   if (! result)
10569     return orig_x;
10570 
10571   if (const_addend)
10572     result = gen_rtx_CONST (Pmode, gen_rtx_PLUS (Pmode, result, const_addend));
10573   if (reg_addend)
10574     result = gen_rtx_PLUS (Pmode, reg_addend, result);
10575   return result;
10576 }
10577 
10578 /* If X is a machine specific address (i.e. a symbol or label being
10579    referenced as a displacement from the GOT implemented using an
10580    UNSPEC), then return the base term.  Otherwise return X.  */
10581 
10582 rtx
10583 ix86_find_base_term (rtx x)
10584 {
10585   rtx term;
10586 
10587   if (TARGET_64BIT)
10588     {
10589       if (GET_CODE (x) != CONST)
10590         return x;
10591       term = XEXP (x, 0);
10592       if (GET_CODE (term) == PLUS
10593           && (CONST_INT_P (XEXP (term, 1))
10594               || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
10595         term = XEXP (term, 0);
10596       if (GET_CODE (term) != UNSPEC
10597           || XINT (term, 1) != UNSPEC_GOTPCREL)
10598         return x;
10599 
10600       return XVECEXP (term, 0, 0);
10601     }
10602 
10603   return ix86_delegitimize_address (x);
10604 }
10605 
10606 static void
10607 put_condition_code (enum rtx_code code, enum machine_mode mode, int reverse,
10608                     int fp, FILE *file)
10609 {
10610   const char *suffix;
10611 
10612   if (mode == CCFPmode || mode == CCFPUmode)
10613     {
10614       enum rtx_code second_code, bypass_code;
10615       ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
10616       gcc_assert (bypass_code == UNKNOWN && second_code == UNKNOWN);
10617       code = ix86_fp_compare_code_to_integer (code);
10618       mode = CCmode;
10619     }
10620   if (reverse)
10621     code = reverse_condition (code);
10622 
10623   switch (code)
10624     {
10625     case EQ:
10626       switch (mode)
10627         {
10628         case CCAmode:
10629           suffix = "a";
10630           break;
10631 
10632         case CCCmode:
10633           suffix = "c";
10634           break;
10635 
10636         case CCOmode:
10637           suffix = "o";
10638           break;
10639 
10640         case CCSmode:
10641           suffix = "s";
10642           break;
10643 
10644         default:
10645           suffix = "e";
10646         }
10647       break;
10648     case NE:
10649       switch (mode)
10650         {
10651         case CCAmode:
10652           suffix = "na";
10653           break;
10654 
10655         case CCCmode:
10656           suffix = "nc";
10657           break;
10658 
10659         case CCOmode:
10660           suffix = "no";
10661           break;
10662 
10663         case CCSmode:
10664           suffix = "ns";
10665           break;
10666 
10667         default:
10668           suffix = "ne";
10669         }
10670       break;
10671     case GT:
10672       gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
10673       suffix = "g";
10674       break;
10675     case GTU:
10676       /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
10677          Those same assemblers have the same but opposite lossage on cmov.  */
10678       if (mode == CCmode)
10679         suffix = fp ? "nbe" : "a";
10680       else if (mode == CCCmode)
10681         suffix = "b";
10682       else
10683         gcc_unreachable ();
10684       break;
10685     case LT:
10686       switch (mode)
10687         {
10688         case CCNOmode:
10689         case CCGOCmode:
10690           suffix = "s";
10691           break;
10692 
10693         case CCmode:
10694         case CCGCmode:
10695           suffix = "l";
10696           break;
10697 
10698         default:
10699           gcc_unreachable ();
10700         }
10701       break;
10702     case LTU:
10703       gcc_assert (mode == CCmode || mode == CCCmode);
10704       suffix = "b";
10705       break;
10706     case GE:
10707       switch (mode)
10708         {
10709         case CCNOmode:
10710         case CCGOCmode:
10711           suffix = "ns";
10712           break;
10713 
10714         case CCmode:
10715         case CCGCmode:
10716           suffix = "ge";
10717           break;
10718 
10719         default:
10720           gcc_unreachable ();
10721         }
10722       break;
10723     case GEU:
10724       /* ??? As above.  */
10725       gcc_assert (mode == CCmode || mode == CCCmode);
10726       suffix = fp ? "nb" : "ae";
10727       break;
10728     case LE:
10729       gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
10730       suffix = "le";
10731       break;
10732     case LEU:
10733       /* ??? As above.  */
10734       if (mode == CCmode)
10735         suffix = "be";
10736       else if (mode == CCCmode)
10737         suffix = fp ? "nb" : "ae";
10738       else
10739         gcc_unreachable ();
10740       break;
10741     case UNORDERED:
10742       suffix = fp ? "u" : "p";
10743       break;
10744     case ORDERED:
10745       suffix = fp ? "nu" : "np";
10746       break;
10747     default:
10748       gcc_unreachable ();
10749     }
10750   fputs (suffix, file);
10751 }
10752 
10753 /* Print the name of register X to FILE based on its machine mode and number.
10754    If CODE is 'w', pretend the mode is HImode.
10755    If CODE is 'b', pretend the mode is QImode.
10756    If CODE is 'k', pretend the mode is SImode.
10757    If CODE is 'q', pretend the mode is DImode.
10758    If CODE is 'x', pretend the mode is V4SFmode.
10759    If CODE is 't', pretend the mode is V8SFmode.
10760    If CODE is 'h', pretend the reg is the 'high' byte register.
10761    If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op.
10762    If CODE is 'd', duplicate the operand for AVX instruction.
10763  */
10764 
10765 void
10766 print_reg (rtx x, int code, FILE *file)
10767 {
10768   const char *reg;
10769   bool duplicated = code == 'd' && TARGET_AVX;
10770 
10771   gcc_assert (x == pc_rtx
10772               || (REGNO (x) != ARG_POINTER_REGNUM
10773                   && REGNO (x) != FRAME_POINTER_REGNUM
10774                   && REGNO (x) != FLAGS_REG
10775                   && REGNO (x) != FPSR_REG
10776                   && REGNO (x) != FPCR_REG));
10777 
10778   if (ASSEMBLER_DIALECT == ASM_ATT)
10779     putc ('%', file);
10780 
10781   if (x == pc_rtx)
10782     {
10783       gcc_assert (TARGET_64BIT);
10784       fputs ("rip", file);
10785       return;
10786     }
10787 
10788   if (code == 'w' || MMX_REG_P (x))
10789     code = 2;
10790   else if (code == 'b')
10791     code = 1;
10792   else if (code == 'k')
10793     code = 4;
10794   else if (code == 'q')
10795     code = 8;
10796   else if (code == 'y')
10797     code = 3;
10798   else if (code == 'h')
10799     code = 0;
10800   else if (code == 'x')
10801     code = 16;
10802   else if (code == 't')
10803     code = 32;
10804   else
10805     code = GET_MODE_SIZE (GET_MODE (x));
10806 
10807   /* Irritatingly, AMD extended registers use different naming convention
10808      from the normal registers.  */
10809   if (REX_INT_REG_P (x))
10810     {
10811       gcc_assert (TARGET_64BIT);
10812       switch (code)
10813         {
10814           case 0:
10815             error ("extended registers have no high halves");
10816             break;
10817           case 1:
10818             fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
10819             break;
10820           case 2:
10821             fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
10822             break;
10823           case 4:
10824             fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
10825             break;
10826           case 8:
10827             fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
10828             break;
10829           default:
10830             error ("unsupported operand size for extended register");
10831             break;
10832         }
10833       return;
10834     }
10835 
10836   reg = NULL;
10837   switch (code)
10838     {
10839     case 3:
10840       if (STACK_TOP_P (x))
10841         {
10842           reg = "st(0)";
10843           break;
10844         }
10845       /* FALLTHRU */
10846     case 8:
10847     case 4:
10848     case 12:
10849       if (! ANY_FP_REG_P (x))
10850         putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
10851       /* FALLTHRU */
10852     case 16:
10853     case 2:
10854     normal:
10855       reg = hi_reg_name[REGNO (x)];
10856       break;
10857     case 1:
10858       if (REGNO (x) >= ARRAY_SIZE (qi_reg_name))
10859         goto normal;
10860       reg = qi_reg_name[REGNO (x)];
10861       break;
10862     case 0:
10863       if (REGNO (x) >= ARRAY_SIZE (qi_high_reg_name))
10864         goto normal;
10865       reg = qi_high_reg_name[REGNO (x)];
10866       break;
10867     case 32:
10868       if (SSE_REG_P (x))
10869         {
10870           gcc_assert (!duplicated);
10871           putc ('y', file);
10872           fputs (hi_reg_name[REGNO (x)] + 1, file);
10873           return;
10874         }
10875       break;
10876     default:
10877       gcc_unreachable ();
10878     }
10879 
10880   fputs (reg, file);
10881   if (duplicated)
10882     {
10883       if (ASSEMBLER_DIALECT == ASM_ATT)
10884         fprintf (file, ", %%%s", reg);
10885       else
10886         fprintf (file, ", %s", reg);
10887     }
10888 }
10889 
10890 /* Locate some local-dynamic symbol still in use by this function
10891    so that we can print its name in some tls_local_dynamic_base
10892    pattern.  */
10893 
10894 static int
10895 get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
10896 {
10897   rtx x = *px;
10898 
10899   if (GET_CODE (x) == SYMBOL_REF
10900       && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC)
10901     {
10902       cfun->machine->some_ld_name = XSTR (x, 0);
10903       return 1;
10904     }
10905 
10906   return 0;
10907 }
10908 
10909 static const char *
10910 get_some_local_dynamic_name (void)
10911 {
10912   rtx insn;
10913 
10914   if (cfun->machine->some_ld_name)
10915     return cfun->machine->some_ld_name;
10916 
10917   for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
10918     if (INSN_P (insn)
10919         && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
10920       return cfun->machine->some_ld_name;
10921 
10922   gcc_unreachable ();
10923 }
10924 
10925 /* Meaning of CODE:
10926    L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
10927    C -- print opcode suffix for set/cmov insn.
10928    c -- like C, but print reversed condition
10929    F,f -- likewise, but for floating-point.
10930    O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
10931         otherwise nothing
10932    R -- print the prefix for register names.
10933    z -- print the opcode suffix for the size of the current operand.
10934    * -- print a star (in certain assembler syntax)
10935    A -- print an absolute memory reference.
10936    w -- print the operand as if it's a "word" (HImode) even if it isn't.
10937    s -- print a shift double count, followed by the assemblers argument
10938         delimiter.
10939    b -- print the QImode name of the register for the indicated operand.
10940         %b0 would print %al if operands[0] is reg 0.
10941    w --  likewise, print the HImode name of the register.
10942    k --  likewise, print the SImode name of the register.
10943    q --  likewise, print the DImode name of the register.
10944    x --  likewise, print the V4SFmode name of the register.
10945    t --  likewise, print the V8SFmode name of the register.
10946    h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
10947    y -- print "st(0)" instead of "st" as a register.
10948    d -- print duplicated register operand for AVX instruction.
10949    D -- print condition for SSE cmp instruction.
10950    P -- if PIC, print an @PLT suffix.
10951    X -- don't print any sort of PIC '@' suffix for a symbol.
10952    & -- print some in-use local-dynamic symbol name.
10953    H -- print a memory address offset by 8; used for sse high-parts
10954    Y -- print condition for SSE5 com* instruction.
10955    + -- print a branch hint as 'cs' or 'ds' prefix
10956    ; -- print a semicolon (after prefixes due to bug in older gas).
10957  */
10958 
10959 void
10960 print_operand (FILE *file, rtx x, int code)
10961 {
10962   if (code)
10963     {
10964       switch (code)
10965         {
10966         case '*':
10967           if (ASSEMBLER_DIALECT == ASM_ATT)
10968             putc ('*', file);
10969           return;
10970 
10971         case '&':
10972           assemble_name (file, get_some_local_dynamic_name ());
10973           return;
10974 
10975         case 'A':
10976           switch (ASSEMBLER_DIALECT)
10977             {
10978             case ASM_ATT:
10979               putc ('*', file);
10980               break;
10981 
10982             case ASM_INTEL:
10983               /* Intel syntax. For absolute addresses, registers should not
10984                  be surrounded by braces.  */
10985               if (!REG_P (x))
10986                 {
10987                   putc ('[', file);
10988                   PRINT_OPERAND (file, x, 0);
10989                   putc (']', file);
10990                   return;
10991                 }
10992               break;
10993 
10994             default:
10995               gcc_unreachable ();
10996             }
10997 
10998           PRINT_OPERAND (file, x, 0);
10999           return;
11000 
11001 
11002         case 'L':
11003           if (ASSEMBLER_DIALECT == ASM_ATT)
11004             putc ('l', file);
11005           return;
11006 
11007         case 'W':
11008           if (ASSEMBLER_DIALECT == ASM_ATT)
11009             putc ('w', file);
11010           return;
11011 
11012         case 'B':
11013           if (ASSEMBLER_DIALECT == ASM_ATT)
11014             putc ('b', file);
11015           return;
11016 
11017         case 'Q':
11018           if (ASSEMBLER_DIALECT == ASM_ATT)
11019             putc ('l', file);
11020           return;
11021 
11022         case 'S':
11023           if (ASSEMBLER_DIALECT == ASM_ATT)
11024             putc ('s', file);
11025           return;
11026 
11027         case 'T':
11028           if (ASSEMBLER_DIALECT == ASM_ATT)
11029             putc ('t', file);
11030           return;
11031 
11032         case 'z':
11033           /* 387 opcodes don't get size suffixes if the operands are
11034              registers.  */
11035           if (STACK_REG_P (x))
11036             return;
11037 
11038           /* Likewise if using Intel opcodes.  */
11039           if (ASSEMBLER_DIALECT == ASM_INTEL)
11040             return;
11041 
11042           /* This is the size of op from size of operand.  */
11043           switch (GET_MODE_SIZE (GET_MODE (x)))
11044             {
11045             case 1:
11046               putc ('b', file);
11047               return;
11048 
11049             case 2:
11050               if (MEM_P (x))
11051                 {
11052 #ifdef HAVE_GAS_FILDS_FISTS
11053                   putc ('s', file);
11054 #endif
11055                   return;
11056                 }
11057               else
11058                 putc ('w', file);
11059               return;
11060 
11061             case 4:
11062               if (GET_MODE (x) == SFmode)
11063                 {
11064                   putc ('s', file);
11065                   return;
11066                 }
11067               else
11068                 putc ('l', file);
11069               return;
11070 
11071             case 12:
11072             case 16:
11073               putc ('t', file);
11074               return;
11075 
11076             case 8:
11077               if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
11078                 {
11079                   if (MEM_P (x))
11080                     {
11081 #ifdef GAS_MNEMONICS
11082                       putc ('q', file);
11083 #else
11084                       putc ('l', file);
11085                       putc ('l', file);
11086 #endif
11087                     }
11088                   else
11089                     putc ('q', file);
11090                 }
11091               else
11092                 putc ('l', file);
11093               return;
11094 
11095             default:
11096               gcc_unreachable ();
11097             }
11098 
11099         case 'd':
11100         case 'b':
11101         case 'w':
11102         case 'k':
11103         case 'q':
11104         case 'h':
11105         case 't':
11106         case 'y':
11107         case 'x':
11108         case 'X':
11109         case 'P':
11110           break;
11111 
11112         case 's':
11113           if (CONST_INT_P (x) || ! SHIFT_DOUBLE_OMITS_COUNT)
11114             {
11115               PRINT_OPERAND (file, x, 0);
11116               fputs (", ", file);
11117             }
11118           return;
11119 
11120         case 'D':
11121           /* Little bit of braindamage here.  The SSE compare instructions
11122              does use completely different names for the comparisons that the
11123              fp conditional moves.  */
11124           if (TARGET_AVX)
11125             {
11126               switch (GET_CODE (x))
11127                 {
11128                 case EQ:
11129                   fputs ("eq", file);
11130                   break;
11131                 case UNEQ:
11132                   fputs ("eq_us", file);
11133                   break;
11134                 case LT:
11135                   fputs ("lt", file);
11136                   break;
11137                 case UNLT:
11138                   fputs ("nge", file);
11139                   break;
11140                 case LE:
11141                   fputs ("le", file);
11142                   break;
11143                 case UNLE:
11144                   fputs ("ngt", file);
11145                   break;
11146                 case UNORDERED:
11147                   fputs ("unord", file);
11148                   break;
11149                 case NE:
11150                   fputs ("neq", file);
11151                   break;
11152                 case LTGT:
11153                   fputs ("neq_oq", file);
11154                   break;
11155                 case GE:
11156                   fputs ("ge", file);
11157                   break;
11158                 case UNGE:
11159                   fputs ("nlt", file);
11160                   break;
11161                 case GT:
11162                   fputs ("gt", file);
11163                   break;
11164                 case UNGT:
11165                   fputs ("nle", file);
11166                   break;
11167                 case ORDERED:
11168                   fputs ("ord", file);
11169                   break;
11170                 default:
11171                   output_operand_lossage ("operand is not a condition code, invalid operand code 'D'");
11172                   return;
11173                 }
11174             }
11175           else
11176             {
11177               switch (GET_CODE (x))
11178                 {
11179                 case EQ:
11180                 case UNEQ:
11181                   fputs ("eq", file);
11182                   break;
11183                 case LT:
11184                 case UNLT:
11185                   fputs ("lt", file);
11186                   break;
11187                 case LE:
11188                 case UNLE:
11189                   fputs ("le", file);
11190                   break;
11191                 case UNORDERED:
11192                   fputs ("unord", file);
11193                   break;
11194                 case NE:
11195                 case LTGT:
11196                   fputs ("neq", file);
11197                   break;
11198                 case UNGE:
11199                 case GE:
11200                   fputs ("nlt", file);
11201                   break;
11202                 case UNGT:
11203                 case GT:
11204                   fputs ("nle", file);
11205                   break;
11206                 case ORDERED:
11207                   fputs ("ord", file);
11208                   break;
11209                 default:
11210                   output_operand_lossage ("operand is not a condition code, invalid operand code 'D'");
11211                   return;
11212                 }
11213             }
11214           return;
11215         case 'O':
11216 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
11217           if (ASSEMBLER_DIALECT == ASM_ATT)
11218             {
11219               switch (GET_MODE (x))
11220                 {
11221                 case HImode: putc ('w', file); break;
11222                 case SImode:
11223                 case SFmode: putc ('l', file); break;
11224                 case DImode:
11225                 case DFmode: putc ('q', file); break;
11226                 default: gcc_unreachable ();
11227                 }
11228               putc ('.', file);
11229             }
11230 #endif
11231           return;
11232         case 'C':
11233           if (!COMPARISON_P (x))
11234             {
11235               output_operand_lossage ("operand is neither a constant nor a "
11236                                       "condition code, invalid operand code "
11237                                       "'C'");
11238               return;
11239             }
11240           put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
11241           return;
11242         case 'F':
11243           if (!COMPARISON_P (x))
11244             {
11245               output_operand_lossage ("operand is neither a constant nor a "
11246                                       "condition code, invalid operand code "
11247                                       "'F'");
11248               return;
11249             }
11250 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
11251           if (ASSEMBLER_DIALECT == ASM_ATT)
11252             putc ('.', file);
11253 #endif
11254           put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
11255           return;
11256 
11257           /* Like above, but reverse condition */
11258         case 'c':
11259           /* Check to see if argument to %c is really a constant
11260              and not a condition code which needs to be reversed.  */
11261           if (!COMPARISON_P (x))
11262             {
11263               output_operand_lossage ("operand is neither a constant nor a "
11264                                       "condition code, invalid operand "
11265                                       "code 'c'");
11266               return;
11267             }
11268           put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
11269           return;
11270         case 'f':
11271           if (!COMPARISON_P (x))
11272             {
11273               output_operand_lossage ("operand is neither a constant nor a "
11274                                       "condition code, invalid operand "
11275                                       "code 'f'");
11276               return;
11277             }
11278 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
11279           if (ASSEMBLER_DIALECT == ASM_ATT)
11280             putc ('.', file);
11281 #endif
11282           put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
11283           return;
11284 
11285         case 'H':
11286           /* It doesn't actually matter what mode we use here, as we're
11287              only going to use this for printing.  */
11288           x = adjust_address_nv (x, DImode, 8);
11289           break;
11290 
11291         case '+':
11292           {
11293             rtx x;
11294 
11295             if (!optimize
11296                 || optimize_function_for_size_p (cfun) || !TARGET_BRANCH_PREDICTION_HINTS)
11297               return;
11298 
11299             x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
11300             if (x)
11301               {
11302                 int pred_val = INTVAL (XEXP (x, 0));
11303 
11304                 if (pred_val < REG_BR_PROB_BASE * 45 / 100
11305                     || pred_val > REG_BR_PROB_BASE * 55 / 100)
11306                   {
11307                     int taken = pred_val > REG_BR_PROB_BASE / 2;
11308                     int cputaken = final_forward_branch_p (current_output_insn) == 0;
11309 
11310                     /* Emit hints only in the case default branch prediction
11311                        heuristics would fail.  */
11312                     if (taken != cputaken)
11313                       {
11314                         /* We use 3e (DS) prefix for taken branches and
11315                            2e (CS) prefix for not taken branches.  */
11316                         if (taken)
11317                           fputs ("ds ; ", file);
11318                         else
11319                           fputs ("cs ; ", file);
11320                       }
11321                   }
11322               }
11323             return;
11324           }
11325 
11326         case 'Y':
11327           switch (GET_CODE (x))
11328             {
11329             case NE:
11330               fputs ("neq", file);
11331               break;
11332             case EQ:
11333               fputs ("eq", file);
11334               break;
11335             case GE:
11336             case GEU:
11337               fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "ge" : "unlt", file);
11338               break;
11339             case GT:
11340             case GTU:
11341               fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "gt" : "unle", file);
11342               break;
11343             case LE:
11344             case LEU:
11345               fputs ("le", file);
11346               break;
11347             case LT:
11348             case LTU:
11349               fputs ("lt", file);
11350               break;
11351             case UNORDERED:
11352               fputs ("unord", file);
11353               break;
11354             case ORDERED:
11355               fputs ("ord", file);
11356               break;
11357             case UNEQ:
11358               fputs ("ueq", file);
11359               break;
11360             case UNGE:
11361               fputs ("nlt", file);
11362               break;
11363             case UNGT:
11364               fputs ("nle", file);
11365               break;
11366             case UNLE:
11367               fputs ("ule", file);
11368               break;
11369             case UNLT:
11370               fputs ("ult", file);
11371               break;
11372             case LTGT:
11373               fputs ("une", file);
11374               break;
11375             default:
11376               output_operand_lossage ("operand is not a condition code, invalid operand code 'D'");
11377               return;
11378             }
11379           return;
11380 
11381         case ';':
11382 #if TARGET_MACHO
11383           fputs (" ; ", file);
11384 #else
11385           fputc (' ', file);
11386 #endif
11387           return;
11388 
11389         default:
11390             output_operand_lossage ("invalid operand code '%c'", code);
11391         }
11392     }
11393 
11394   if (REG_P (x))
11395     print_reg (x, code, file);
11396 
11397   else if (MEM_P (x))
11398     {
11399       /* No `byte ptr' prefix for call instructions or BLKmode operands.  */
11400       if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P'
11401           && GET_MODE (x) != BLKmode)
11402         {
11403           const char * size;
11404           switch (GET_MODE_SIZE (GET_MODE (x)))
11405             {
11406             case 1: size = "BYTE"; break;
11407             case 2: size = "WORD"; break;
11408             case 4: size = "DWORD"; break;
11409             case 8: size = "QWORD"; break;
11410             case 12: size = "TBYTE"; break;
11411             case 16:
11412               if (GET_MODE (x) == XFmode)
11413                 size = "TBYTE";
11414               else
11415                 size = "XMMWORD";
11416               break;
11417             case 32: size = "YMMWORD"; break;
11418             default:
11419               gcc_unreachable ();
11420             }
11421 
11422           /* Check for explicit size override (codes 'b', 'w' and 'k')  */
11423           if (code == 'b')
11424             size = "BYTE";
11425           else if (code == 'w')
11426             size = "WORD";
11427           else if (code == 'k')
11428             size = "DWORD";
11429 
11430           fputs (size, file);
11431           fputs (" PTR ", file);
11432         }
11433 
11434       x = XEXP (x, 0);
11435       /* Avoid (%rip) for call operands.  */
11436       if (CONSTANT_ADDRESS_P (x) && code == 'P'
11437           && !CONST_INT_P (x))
11438         output_addr_const (file, x);
11439       else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
11440         output_operand_lossage ("invalid constraints for operand");
11441       else
11442         output_address (x);
11443     }
11444 
11445   else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
11446     {
11447       REAL_VALUE_TYPE r;
11448       long l;
11449 
11450       REAL_VALUE_FROM_CONST_DOUBLE (r, x);
11451       REAL_VALUE_TO_TARGET_SINGLE (r, l);
11452 
11453       if (ASSEMBLER_DIALECT == ASM_ATT)
11454         putc ('$', file);
11455       fprintf (file, "0x%08lx", (long unsigned int) l);
11456     }
11457 
11458   /* These float cases don't actually occur as immediate operands.  */
11459   else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
11460     {
11461       char dstr[30];
11462 
11463       real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
11464       fprintf (file, "%s", dstr);
11465     }
11466 
11467   else if (GET_CODE (x) == CONST_DOUBLE
11468            && GET_MODE (x) == XFmode)
11469     {
11470       char dstr[30];
11471 
11472       real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
11473       fprintf (file, "%s", dstr);
11474     }
11475 
11476   else
11477     {
11478       /* We have patterns that allow zero sets of memory, for instance.
11479          In 64-bit mode, we should probably support all 8-byte vectors,
11480          since we can in fact encode that into an immediate.  */
11481       if (GET_CODE (x) == CONST_VECTOR)
11482         {
11483           gcc_assert (x == CONST0_RTX (GET_MODE (x)));
11484           x = const0_rtx;
11485         }
11486 
11487       if (code != 'P')
11488         {
11489           if (CONST_INT_P (x) || GET_CODE (x) == CONST_DOUBLE)
11490             {
11491               if (ASSEMBLER_DIALECT == ASM_ATT)
11492                 putc ('$', file);
11493             }
11494           else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
11495                    || GET_CODE (x) == LABEL_REF)
11496             {
11497               if (ASSEMBLER_DIALECT == ASM_ATT)
11498                 putc ('$', file);
11499               else
11500                 fputs ("OFFSET FLAT:", file);
11501             }
11502         }
11503       if (CONST_INT_P (x))
11504         fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
11505       else if (flag_pic)
11506         output_pic_addr_const (file, x, code);
11507       else
11508         output_addr_const (file, x);
11509     }
11510 }
11511 
11512 /* Print a memory operand whose address is ADDR.  */
11513 
11514 void
11515 print_operand_address (FILE *file, rtx addr)
11516 {
11517   struct ix86_address parts;
11518   rtx base, index, disp;
11519   int scale;
11520   int ok = ix86_decompose_address (addr, &parts);
11521 
11522   gcc_assert (ok);
11523 
11524   base = parts.base;
11525   index = parts.index;
11526   disp = parts.disp;
11527   scale = parts.scale;
11528 
11529   switch (parts.seg)
11530     {
11531     case SEG_DEFAULT:
11532       break;
11533     case SEG_FS:
11534     case SEG_GS:
11535       if (ASSEMBLER_DIALECT == ASM_ATT)
11536         putc ('%', file);
11537       fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
11538       break;
11539     default:
11540       gcc_unreachable ();
11541     }
11542 
11543   /* Use one byte shorter RIP relative addressing for 64bit mode.  */
11544   if (TARGET_64BIT && !base && !index)
11545     {
11546       rtx symbol = disp;
11547 
11548       if (GET_CODE (disp) == CONST
11549           && GET_CODE (XEXP (disp, 0)) == PLUS
11550           && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
11551         symbol = XEXP (XEXP (disp, 0), 0);
11552 
11553       if (GET_CODE (symbol) == LABEL_REF
11554           || (GET_CODE (symbol) == SYMBOL_REF
11555               && SYMBOL_REF_TLS_MODEL (symbol) == 0))
11556         base = pc_rtx;
11557     }
11558   if (!base && !index)
11559     {
11560       /* Displacement only requires special attention.  */
11561 
11562       if (CONST_INT_P (disp))
11563         {
11564           if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
11565             fputs ("ds:", file);
11566           fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
11567         }
11568       else if (flag_pic)
11569         output_pic_addr_const (file, disp, 0);
11570       else
11571         output_addr_const (file, disp);
11572     }
11573   else
11574     {
11575       if (ASSEMBLER_DIALECT == ASM_ATT)
11576         {
11577           if (disp)
11578             {
11579               if (flag_pic)
11580                 output_pic_addr_const (file, disp, 0);
11581               else if (GET_CODE (disp) == LABEL_REF)
11582                 output_asm_label (disp);
11583               else
11584                 output_addr_const (file, disp);
11585             }
11586 
11587           putc ('(', file);
11588           if (base)
11589             print_reg (base, 0, file);
11590           if (index)
11591             {
11592               putc (',', file);
11593               print_reg (index, 0, file);
11594               if (scale != 1)
11595                 fprintf (file, ",%d", scale);
11596             }
11597           putc (')', file);
11598         }
11599       else
11600         {
11601           rtx offset = NULL_RTX;
11602 
11603           if (disp)
11604             {
11605               /* Pull out the offset of a symbol; print any symbol itself.  */
11606               if (GET_CODE (disp) == CONST
11607                   && GET_CODE (XEXP (disp, 0)) == PLUS
11608                   && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
11609                 {
11610                   offset = XEXP (XEXP (disp, 0), 1);
11611                   disp = gen_rtx_CONST (VOIDmode,
11612                                         XEXP (XEXP (disp, 0), 0));
11613                 }
11614 
11615               if (flag_pic)
11616                 output_pic_addr_const (file, disp, 0);
11617               else if (GET_CODE (disp) == LABEL_REF)
11618                 output_asm_label (disp);
11619               else if (CONST_INT_P (disp))
11620                 offset = disp;
11621               else
11622                 output_addr_const (file, disp);
11623             }
11624 
11625           putc ('[', file);
11626           if (base)
11627             {
11628               print_reg (base, 0, file);
11629               if (offset)
11630                 {
11631                   if (INTVAL (offset) >= 0)
11632                     putc ('+', file);
11633                   fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
11634                 }
11635             }
11636           else if (offset)
11637             fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
11638           else
11639             putc ('0', file);
11640 
11641           if (index)
11642             {
11643               putc ('+', file);
11644               print_reg (index, 0, file);
11645               if (scale != 1)
11646                 fprintf (file, "*%d", scale);
11647             }
11648           putc (']', file);
11649         }
11650     }
11651 }
11652 
11653 bool
11654 output_addr_const_extra (FILE *file, rtx x)
11655 {
11656   rtx op;
11657 
11658   if (GET_CODE (x) != UNSPEC)
11659     return false;
11660 
11661   op = XVECEXP (x, 0, 0);
11662   switch (XINT (x, 1))
11663     {
11664     case UNSPEC_GOTTPOFF:
11665       output_addr_const (file, op);
11666       /* FIXME: This might be @TPOFF in Sun ld.  */
11667       fputs ("@GOTTPOFF", file);
11668       break;
11669     case UNSPEC_TPOFF:
11670       output_addr_const (file, op);
11671       fputs ("@TPOFF", file);
11672       break;
11673     case UNSPEC_NTPOFF:
11674       output_addr_const (file, op);
11675       if (TARGET_64BIT)
11676         fputs ("@TPOFF", file);
11677       else
11678         fputs ("@NTPOFF", file);
11679       break;
11680     case UNSPEC_DTPOFF:
11681       output_addr_const (file, op);
11682       fputs ("@DTPOFF", file);
11683       break;
11684     case UNSPEC_GOTNTPOFF:
11685       output_addr_const (file, op);
11686       if (TARGET_64BIT)
11687         fputs (ASSEMBLER_DIALECT == ASM_ATT ?
11688                "@GOTTPOFF(%rip)" : "@GOTTPOFF[rip]", file);
11689       else
11690         fputs ("@GOTNTPOFF", file);
11691       break;
11692     case UNSPEC_INDNTPOFF:
11693       output_addr_const (file, op);
11694       fputs ("@INDNTPOFF", file);
11695       break;
11696 #if TARGET_MACHO
11697     case UNSPEC_MACHOPIC_OFFSET:
11698       output_addr_const (file, op);
11699       putc ('-', file);
11700       machopic_output_function_base_name (file);
11701       break;
11702 #endif
11703 
11704     default:
11705       return false;
11706     }
11707 
11708   return true;
11709 }
11710 
11711 /* Split one or more DImode RTL references into pairs of SImode
11712    references.  The RTL can be REG, offsettable MEM, integer constant, or
11713    CONST_DOUBLE.  "operands" is a pointer to an array of DImode RTL to
11714    split and "num" is its length.  lo_half and hi_half are output arrays
11715    that parallel "operands".  */
11716 
11717 void
11718 split_di (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
11719 {
11720   while (num--)
11721     {
11722       rtx op = operands[num];
11723 
11724       /* simplify_subreg refuse to split volatile memory addresses,
11725          but we still have to handle it.  */
11726       if (MEM_P (op))
11727         {
11728           lo_half[num] = adjust_address (op, SImode, 0);
11729           hi_half[num] = adjust_address (op, SImode, 4);
11730         }
11731       else
11732         {
11733           lo_half[num] = simplify_gen_subreg (SImode, op,
11734                                               GET_MODE (op) == VOIDmode
11735                                               ? DImode : GET_MODE (op), 0);
11736           hi_half[num] = simplify_gen_subreg (SImode, op,
11737                                               GET_MODE (op) == VOIDmode
11738                                               ? DImode : GET_MODE (op), 4);
11739         }
11740     }
11741 }
11742 /* Split one or more TImode RTL references into pairs of DImode
11743    references.  The RTL can be REG, offsettable MEM, integer constant, or
11744    CONST_DOUBLE.  "operands" is a pointer to an array of DImode RTL to
11745    split and "num" is its length.  lo_half and hi_half are output arrays
11746    that parallel "operands".  */
11747 
11748 void
11749 split_ti (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
11750 {
11751   while (num--)
11752     {
11753       rtx op = operands[num];
11754 
11755       /* simplify_subreg refuse to split volatile memory addresses, but we
11756          still have to handle it.  */
11757       if (MEM_P (op))
11758         {
11759           lo_half[num] = adjust_address (op, DImode, 0);
11760           hi_half[num] = adjust_address (op, DImode, 8);
11761         }
11762       else
11763         {
11764           lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
11765           hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
11766         }
11767     }
11768 }
11769 
11770 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
11771    MINUS, MULT or DIV.  OPERANDS are the insn operands, where operands[3]
11772    is the expression of the binary operation.  The output may either be
11773    emitted here, or returned to the caller, like all output_* functions.
11774 
11775    There is no guarantee that the operands are the same mode, as they
11776    might be within FLOAT or FLOAT_EXTEND expressions.  */
11777 
11778 #ifndef SYSV386_COMPAT
11779 /* Set to 1 for compatibility with brain-damaged assemblers.  No-one
11780    wants to fix the assemblers because that causes incompatibility
11781    with gcc.  No-one wants to fix gcc because that causes
11782    incompatibility with assemblers...  You can use the option of
11783    -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way.  */
11784 #define SYSV386_COMPAT 1
11785 #endif
11786 
11787 const char *
11788 output_387_binary_op (rtx insn, rtx *operands)
11789 {
11790   static char buf[40];
11791   const char *p;
11792   const char *ssep;
11793   int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
11794 
11795 #ifdef ENABLE_CHECKING
11796   /* Even if we do not want to check the inputs, this documents input
11797      constraints.  Which helps in understanding the following code.  */
11798   if (STACK_REG_P (operands[0])
11799       && ((REG_P (operands[1])
11800            && REGNO (operands[0]) == REGNO (operands[1])
11801            && (STACK_REG_P (operands[2]) || MEM_P (operands[2])))
11802           || (REG_P (operands[2])
11803               && REGNO (operands[0]) == REGNO (operands[2])
11804               && (STACK_REG_P (operands[1]) || MEM_P (operands[1]))))
11805       && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
11806     ; /* ok */
11807   else
11808     gcc_assert (is_sse);
11809 #endif
11810 
11811   switch (GET_CODE (operands[3]))
11812     {
11813     case PLUS:
11814       if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
11815           || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
11816         p = "fiadd";
11817       else
11818         p = "fadd";
11819       ssep = "vadd";
11820       break;
11821 
11822     case MINUS:
11823       if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
11824           || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
11825         p = "fisub";
11826       else
11827         p = "fsub";
11828       ssep = "vsub";
11829       break;
11830 
11831     case MULT:
11832       if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
11833           || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
11834         p = "fimul";
11835       else
11836         p = "fmul";
11837       ssep = "vmul";
11838       break;
11839 
11840     case DIV:
11841       if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
11842           || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
11843         p = "fidiv";
11844       else
11845         p = "fdiv";
11846       ssep = "vdiv";
11847       break;
11848 
11849     default:
11850       gcc_unreachable ();
11851     }
11852 
11853   if (is_sse)
11854    {
11855      if (TARGET_AVX)
11856        {
11857          strcpy (buf, ssep);
11858          if (GET_MODE (operands[0]) == SFmode)
11859            strcat (buf, "ss\t{%2, %1, %0|%0, %1, %2}");
11860          else
11861            strcat (buf, "sd\t{%2, %1, %0|%0, %1, %2}");
11862        }
11863      else
11864        {
11865          strcpy (buf, ssep + 1);
11866          if (GET_MODE (operands[0]) == SFmode)
11867            strcat (buf, "ss\t{%2, %0|%0, %2}");
11868          else
11869            strcat (buf, "sd\t{%2, %0|%0, %2}");
11870        }
11871       return buf;
11872    }
11873   strcpy (buf, p);
11874 
11875   switch (GET_CODE (operands[3]))
11876     {
11877     case MULT:
11878     case PLUS:
11879       if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
11880         {
11881           rtx temp = operands[2];
11882           operands[2] = operands[1];
11883           operands[1] = temp;
11884         }
11885 
11886       /* know operands[0] == operands[1].  */
11887 
11888       if (MEM_P (operands[2]))
11889         {
11890           p = "%z2\t%2";
11891           break;
11892         }
11893 
11894       if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
11895         {
11896           if (STACK_TOP_P (operands[0]))
11897             /* How is it that we are storing to a dead operand[2]?
11898                Well, presumably operands[1] is dead too.  We can't
11899                store the result to st(0) as st(0) gets popped on this
11900                instruction.  Instead store to operands[2] (which I
11901                think has to be st(1)).  st(1) will be popped later.
11902                gcc <= 2.8.1 didn't have this check and generated
11903                assembly code that the Unixware assembler rejected.  */
11904             p = "p\t{%0, %2|%2, %0}";   /* st(1) = st(0) op st(1); pop */
11905           else
11906             p = "p\t{%2, %0|%0, %2}";   /* st(r1) = st(r1) op st(0); pop */
11907           break;
11908         }
11909 
11910       if (STACK_TOP_P (operands[0]))
11911         p = "\t{%y2, %0|%0, %y2}";      /* st(0) = st(0) op st(r2) */
11912       else
11913         p = "\t{%2, %0|%0, %2}";        /* st(r1) = st(r1) op st(0) */
11914       break;
11915 
11916     case MINUS:
11917     case DIV:
11918       if (MEM_P (operands[1]))
11919         {
11920           p = "r%z1\t%1";
11921           break;
11922         }
11923 
11924       if (MEM_P (operands[2]))
11925         {
11926           p = "%z2\t%2";
11927           break;
11928         }
11929 
11930       if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
11931         {
11932 #if SYSV386_COMPAT
11933           /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
11934              derived assemblers, confusingly reverse the direction of
11935              the operation for fsub{r} and fdiv{r} when the
11936              destination register is not st(0).  The Intel assembler
11937              doesn't have this brain damage.  Read !SYSV386_COMPAT to
11938              figure out what the hardware really does.  */
11939           if (STACK_TOP_P (operands[0]))
11940             p = "{p\t%0, %2|rp\t%2, %0}";
11941           else
11942             p = "{rp\t%2, %0|p\t%0, %2}";
11943 #else
11944           if (STACK_TOP_P (operands[0]))
11945             /* As above for fmul/fadd, we can't store to st(0).  */
11946             p = "rp\t{%0, %2|%2, %0}";  /* st(1) = st(0) op st(1); pop */
11947           else
11948             p = "p\t{%2, %0|%0, %2}";   /* st(r1) = st(r1) op st(0); pop */
11949 #endif
11950           break;
11951         }
11952 
11953       if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
11954         {
11955 #if SYSV386_COMPAT
11956           if (STACK_TOP_P (operands[0]))
11957             p = "{rp\t%0, %1|p\t%1, %0}";
11958           else
11959             p = "{p\t%1, %0|rp\t%0, %1}";
11960 #else
11961           if (STACK_TOP_P (operands[0]))
11962             p = "p\t{%0, %1|%1, %0}";   /* st(1) = st(1) op st(0); pop */
11963           else
11964             p = "rp\t{%1, %0|%0, %1}";  /* st(r2) = st(0) op st(r2); pop */
11965 #endif
11966           break;
11967         }
11968 
11969       if (STACK_TOP_P (operands[0]))
11970         {
11971           if (STACK_TOP_P (operands[1]))
11972             p = "\t{%y2, %0|%0, %y2}";  /* st(0) = st(0) op st(r2) */
11973           else
11974             p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
11975           break;
11976         }
11977       else if (STACK_TOP_P (operands[1]))
11978         {
11979 #if SYSV386_COMPAT
11980           p = "{\t%1, %0|r\t%0, %1}";
11981 #else
11982           p = "r\t{%1, %0|%0, %1}";     /* st(r2) = st(0) op st(r2) */
11983 #endif
11984         }
11985       else
11986         {
11987 #if SYSV386_COMPAT
11988           p = "{r\t%2, %0|\t%0, %2}";
11989 #else
11990           p = "\t{%2, %0|%0, %2}";      /* st(r1) = st(r1) op st(0) */
11991 #endif
11992         }
11993       break;
11994 
11995     default:
11996       gcc_unreachable ();
11997     }
11998 
11999   strcat (buf, p);
12000   return buf;
12001 }
12002 
12003 /* Return needed mode for entity in optimize_mode_switching pass.  */
12004 
12005 int
12006 ix86_mode_needed (int entity, rtx insn)
12007 {
12008   enum attr_i387_cw mode;
12009 
12010   /* The mode UNINITIALIZED is used to store control word after a
12011      function call or ASM pattern.  The mode ANY specify that function
12012      has no requirements on the control word and make no changes in the
12013      bits we are interested in.  */
12014 
12015   if (CALL_P (insn)
12016       || (NONJUMP_INSN_P (insn)
12017           && (asm_noperands (PATTERN (insn)) >= 0
12018               || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
12019     return I387_CW_UNINITIALIZED;
12020 
12021   if (recog_memoized (insn) < 0)
12022     return I387_CW_ANY;
12023 
12024   mode = get_attr_i387_cw (insn);
12025 
12026   switch (entity)
12027     {
12028     case I387_TRUNC:
12029       if (mode == I387_CW_TRUNC)
12030         return mode;
12031       break;
12032 
12033     case I387_FLOOR:
12034       if (mode == I387_CW_FLOOR)
12035         return mode;
12036       break;
12037 
12038     case I387_CEIL:
12039       if (mode == I387_CW_CEIL)
12040         return mode;
12041       break;
12042 
12043     case I387_MASK_PM:
12044       if (mode == I387_CW_MASK_PM)
12045         return mode;
12046       break;
12047 
12048     default:
12049       gcc_unreachable ();
12050     }
12051 
12052   return I387_CW_ANY;
12053 }
12054 
12055 /* Output code to initialize control word copies used by trunc?f?i and
12056    rounding patterns.  CURRENT_MODE is set to current control word,
12057    while NEW_MODE is set to new control word.  */
12058 
12059 void
12060 emit_i387_cw_initialization (int mode)
12061 {
12062   rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
12063   rtx new_mode;
12064 
12065   enum ix86_stack_slot slot;
12066 
12067   rtx reg = gen_reg_rtx (HImode);
12068 
12069   emit_insn (gen_x86_fnstcw_1 (stored_mode));
12070   emit_move_insn (reg, copy_rtx (stored_mode));
12071 
12072   if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL
12073       || optimize_function_for_size_p (cfun))
12074     {
12075       switch (mode)
12076         {
12077         case I387_CW_TRUNC:
12078           /* round toward zero (truncate) */
12079           emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
12080           slot = SLOT_CW_TRUNC;
12081           break;
12082 
12083         case I387_CW_FLOOR:
12084           /* round down toward -oo */
12085           emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
12086           emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
12087           slot = SLOT_CW_FLOOR;
12088           break;
12089 
12090         case I387_CW_CEIL:
12091           /* round up toward +oo */
12092           emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
12093           emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
12094           slot = SLOT_CW_CEIL;
12095           break;
12096 
12097         case I387_CW_MASK_PM:
12098           /* mask precision exception for nearbyint() */
12099           emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
12100           slot = SLOT_CW_MASK_PM;
12101           break;
12102 
12103         default:
12104           gcc_unreachable ();
12105         }
12106     }
12107   else
12108     {
12109       switch (mode)
12110         {
12111         case I387_CW_TRUNC:
12112           /* round toward zero (truncate) */
12113           emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
12114           slot = SLOT_CW_TRUNC;
12115           break;
12116 
12117         case I387_CW_FLOOR:
12118           /* round down toward -oo */
12119           emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4)));
12120           slot = SLOT_CW_FLOOR;
12121           break;
12122 
12123         case I387_CW_CEIL:
12124           /* round up toward +oo */
12125           emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8)));
12126           slot = SLOT_CW_CEIL;
12127           break;
12128 
12129         case I387_CW_MASK_PM:
12130           /* mask precision exception for nearbyint() */
12131           emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
12132           slot = SLOT_CW_MASK_PM;
12133           break;
12134 
12135         default:
12136           gcc_unreachable ();
12137         }
12138     }
12139 
12140   gcc_assert (slot < MAX_386_STACK_LOCALS);
12141 
12142   new_mode = assign_386_stack_local (HImode, slot);
12143   emit_move_insn (new_mode, reg);
12144 }
12145 
12146 /* Output code for INSN to convert a float to a signed int.  OPERANDS
12147    are the insn operands.  The output may be [HSD]Imode and the input
12148    operand may be [SDX]Fmode.  */
12149 
12150 const char *
12151 output_fix_trunc (rtx insn, rtx *operands, int fisttp)
12152 {
12153   int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
12154   int dimode_p = GET_MODE (operands[0]) == DImode;
12155   int round_mode = get_attr_i387_cw (insn);
12156 
12157   /* Jump through a hoop or two for DImode, since the hardware has no
12158      non-popping instruction.  We used to do this a different way, but
12159      that was somewhat fragile and broke with post-reload splitters.  */
12160   if ((dimode_p || fisttp) && !stack_top_dies)
12161     output_asm_insn ("fld\t%y1", operands);
12162 
12163   gcc_assert (STACK_TOP_P (operands[1]));
12164   gcc_assert (MEM_P (operands[0]));
12165   gcc_assert (GET_MODE (operands[1]) != TFmode);
12166 
12167   if (fisttp)
12168       output_asm_insn ("fisttp%z0\t%0", operands);
12169   else
12170     {
12171       if (round_mode != I387_CW_ANY)
12172         output_asm_insn ("fldcw\t%3", operands);
12173       if (stack_top_dies || dimode_p)
12174         output_asm_insn ("fistp%z0\t%0", operands);
12175       else
12176         output_asm_insn ("fist%z0\t%0", operands);
12177       if (round_mode != I387_CW_ANY)
12178         output_asm_insn ("fldcw\t%2", operands);
12179     }
12180 
12181   return "";
12182 }
12183 
12184 /* Output code for x87 ffreep insn.  The OPNO argument, which may only
12185    have the values zero or one, indicates the ffreep insn's operand
12186    from the OPERANDS array.  */
12187 
12188 static const char *
12189 output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
12190 {
12191   if (TARGET_USE_FFREEP)
12192 #ifdef HAVE_AS_IX86_FFREEP
12193     return opno ? "ffreep\t%y1" : "ffreep\t%y0";
12194 #else
12195     {
12196       static char retval[32];
12197       int regno = REGNO (operands[opno]);
12198 
12199       gcc_assert (FP_REGNO_P (regno));
12200 
12201       regno -= FIRST_STACK_REG;
12202 
12203       snprintf (retval, sizeof (retval), ASM_SHORT "0xc%ddf", regno);
12204       return retval;
12205     }
12206 #endif
12207 
12208   return opno ? "fstp\t%y1" : "fstp\t%y0";
12209 }
12210 
12211 
12212 /* Output code for INSN to compare OPERANDS.  EFLAGS_P is 1 when fcomi
12213    should be used.  UNORDERED_P is true when fucom should be used.  */
12214 
12215 const char *
12216 output_fp_compare (rtx insn, rtx *operands, int eflags_p, int unordered_p)
12217 {
12218   int stack_top_dies;
12219   rtx cmp_op0, cmp_op1;
12220   int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]);
12221 
12222   if (eflags_p)
12223     {
12224       cmp_op0 = operands[0];
12225       cmp_op1 = operands[1];
12226     }
12227   else
12228     {
12229       cmp_op0 = operands[1];
12230       cmp_op1 = operands[2];
12231     }
12232 
12233   if (is_sse)
12234     {
12235       static const char ucomiss[] = "vucomiss\t{%1, %0|%0, %1}";
12236       static const char ucomisd[] = "vucomisd\t{%1, %0|%0, %1}";
12237       static const char comiss[] = "vcomiss\t{%1, %0|%0, %1}";
12238       static const char comisd[] = "vcomisd\t{%1, %0|%0, %1}";
12239 
12240       if (GET_MODE (operands[0]) == SFmode)
12241         if (unordered_p)
12242           return &ucomiss[TARGET_AVX ? 0 : 1];
12243         else
12244           return &comiss[TARGET_AVX ? 0 : 1];
12245       else
12246         if (unordered_p)
12247           return &ucomisd[TARGET_AVX ? 0 : 1];
12248         else
12249           return &comisd[TARGET_AVX ? 0 : 1];
12250     }
12251 
12252   gcc_assert (STACK_TOP_P (cmp_op0));
12253 
12254   stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
12255 
12256   if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
12257     {
12258       if (stack_top_dies)
12259         {
12260           output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
12261           return output_387_ffreep (operands, 1);
12262         }
12263       else
12264         return "ftst\n\tfnstsw\t%0";
12265     }
12266 
12267   if (STACK_REG_P (cmp_op1)
12268       && stack_top_dies
12269       && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
12270       && REGNO (cmp_op1) != FIRST_STACK_REG)
12271     {
12272       /* If both the top of the 387 stack dies, and the other operand
12273          is also a stack register that dies, then this must be a
12274          `fcompp' float compare */
12275 
12276       if (eflags_p)
12277         {
12278           /* There is no double popping fcomi variant.  Fortunately,
12279              eflags is immune from the fstp's cc clobbering.  */
12280           if (unordered_p)
12281             output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
12282           else
12283             output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
12284           return output_387_ffreep (operands, 0);
12285         }
12286       else
12287         {
12288           if (unordered_p)
12289             return "fucompp\n\tfnstsw\t%0";
12290           else
12291             return "fcompp\n\tfnstsw\t%0";
12292         }
12293     }
12294   else
12295     {
12296       /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies.  */
12297 
12298       static const char * const alt[16] =
12299       {
12300         "fcom%z2\t%y2\n\tfnstsw\t%0",
12301         "fcomp%z2\t%y2\n\tfnstsw\t%0",
12302         "fucom%z2\t%y2\n\tfnstsw\t%0",
12303         "fucomp%z2\t%y2\n\tfnstsw\t%0",
12304 
12305         "ficom%z2\t%y2\n\tfnstsw\t%0",
12306         "ficomp%z2\t%y2\n\tfnstsw\t%0",
12307         NULL,
12308         NULL,
12309 
12310         "fcomi\t{%y1, %0|%0, %y1}",
12311         "fcomip\t{%y1, %0|%0, %y1}",
12312         "fucomi\t{%y1, %0|%0, %y1}",
12313         "fucomip\t{%y1, %0|%0, %y1}",
12314 
12315         NULL,
12316         NULL,
12317         NULL,
12318         NULL
12319       };
12320 
12321       int mask;
12322       const char *ret;
12323 
12324       mask  = eflags_p << 3;
12325       mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2;
12326       mask |= unordered_p << 1;
12327       mask |= stack_top_dies;
12328 
12329       gcc_assert (mask < 16);
12330       ret = alt[mask];
12331       gcc_assert (ret);
12332 
12333       return ret;
12334     }
12335 }
12336 
12337 void
12338 ix86_output_addr_vec_elt (FILE *file, int value)
12339 {
12340   const char *directive = ASM_LONG;
12341 
12342 #ifdef ASM_QUAD
12343   if (TARGET_64BIT)
12344     directive = ASM_QUAD;
12345 #else
12346   gcc_assert (!TARGET_64BIT);
12347 #endif
12348 
12349   fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
12350 }
12351 
12352 void
12353 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
12354 {
12355   const char *directive = ASM_LONG;
12356 
12357 #ifdef ASM_QUAD
12358   if (TARGET_64BIT && CASE_VECTOR_MODE == DImode)
12359     directive = ASM_QUAD;
12360 #else
12361   gcc_assert (!TARGET_64BIT);
12362 #endif
12363   /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand.  */
12364   if (TARGET_64BIT || TARGET_VXWORKS_RTP)
12365     fprintf (file, "%s%s%d-%s%d\n",
12366              directive, LPREFIX, value, LPREFIX, rel);
12367   else if (HAVE_AS_GOTOFF_IN_DATA)
12368     fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
12369 #if TARGET_MACHO
12370   else if (TARGET_MACHO)
12371     {
12372       fprintf (file, "%s%s%d-", ASM_LONG, LPREFIX, value);
12373       machopic_output_function_base_name (file);
12374       fprintf(file, "\n");
12375     }
12376 #endif
12377   else
12378     asm_fprintf (file, "%s%U%s+[.-%s%d]\n",
12379                  ASM_LONG, GOT_SYMBOL_NAME, LPREFIX, value);
12380 }
12381 
12382 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
12383    for the target.  */
12384 
12385 void
12386 ix86_expand_clear (rtx dest)
12387 {
12388   rtx tmp;
12389 
12390   /* We play register width games, which are only valid after reload.  */
12391   gcc_assert (reload_completed);
12392 
12393   /* Avoid HImode and its attendant prefix byte.  */
12394   if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
12395     dest = gen_rtx_REG (SImode, REGNO (dest));
12396   tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
12397 
12398   /* This predicate should match that for movsi_xor and movdi_xor_rex64.  */
12399   if (reload_completed && (!TARGET_USE_MOV0 || optimize_insn_for_speed_p ()))
12400     {
12401       rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
12402       tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
12403     }
12404 
12405   emit_insn (tmp);
12406 }
12407 
12408 /* X is an unchanging MEM.  If it is a constant pool reference, return
12409    the constant pool rtx, else NULL.  */
12410 
12411 rtx
12412 maybe_get_pool_constant (rtx x)
12413 {
12414   x = ix86_delegitimize_address (XEXP (x, 0));
12415 
12416   if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
12417     return get_pool_constant (x);
12418 
12419   return NULL_RTX;
12420 }
12421 
12422 void
12423 ix86_expand_move (enum machine_mode mode, rtx operands[])
12424 {
12425   rtx op0, op1;
12426   enum tls_model model;
12427 
12428   op0 = operands[0];
12429   op1 = operands[1];
12430 
12431   if (GET_CODE (op1) == SYMBOL_REF)
12432     {
12433       model = SYMBOL_REF_TLS_MODEL (op1);
12434       if (model)
12435         {
12436           op1 = legitimize_tls_address (op1, model, true);
12437           op1 = force_operand (op1, op0);
12438           if (op1 == op0)
12439             return;
12440         }
12441       else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
12442                && SYMBOL_REF_DLLIMPORT_P (op1))
12443         op1 = legitimize_dllimport_symbol (op1, false);
12444     }
12445   else if (GET_CODE (op1) == CONST
12446            && GET_CODE (XEXP (op1, 0)) == PLUS
12447            && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF)
12448     {
12449       rtx addend = XEXP (XEXP (op1, 0), 1);
12450       rtx symbol = XEXP (XEXP (op1, 0), 0);
12451       rtx tmp = NULL;
12452 
12453       model = SYMBOL_REF_TLS_MODEL (symbol);
12454       if (model)
12455         tmp = legitimize_tls_address (symbol, model, true);
12456       else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
12457                && SYMBOL_REF_DLLIMPORT_P (symbol))
12458         tmp = legitimize_dllimport_symbol (symbol, true);
12459 
12460       if (tmp)
12461         {
12462           tmp = force_operand (tmp, NULL);
12463           tmp = expand_simple_binop (Pmode, PLUS, tmp, addend,
12464                                      op0, 1, OPTAB_DIRECT);
12465           if (tmp == op0)
12466             return;
12467         }
12468     }
12469 
12470   if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
12471     {
12472       if (TARGET_MACHO && !TARGET_64BIT)
12473         {
12474 #if TARGET_MACHO
12475           if (MACHOPIC_PURE)
12476             {
12477               rtx temp = ((reload_in_progress
12478                            || ((op0 && REG_P (op0))
12479                                && mode == Pmode))
12480                           ? op0 : gen_reg_rtx (Pmode));
12481               op1 = machopic_indirect_data_reference (op1, temp);
12482               op1 = machopic_legitimize_pic_address (op1, mode,
12483                                                      temp == op1 ? 0 : temp);
12484             }
12485           else if (MACHOPIC_INDIRECT)
12486             op1 = machopic_indirect_data_reference (op1, 0);
12487           if (op0 == op1)
12488             return;
12489 #endif
12490         }
12491       else
12492         {
12493           if (MEM_P (op0))
12494             op1 = force_reg (Pmode, op1);
12495           else if (!TARGET_64BIT || !x86_64_movabs_operand (op1, Pmode))
12496             {
12497               rtx reg = !can_create_pseudo_p () ? op0 : NULL_RTX;
12498               op1 = legitimize_pic_address (op1, reg);
12499               if (op0 == op1)
12500                 return;
12501             }
12502         }
12503     }
12504   else
12505     {
12506       if (MEM_P (op0)
12507           && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
12508               || !push_operand (op0, mode))
12509           && MEM_P (op1))
12510         op1 = force_reg (mode, op1);
12511 
12512       if (push_operand (op0, mode)
12513           && ! general_no_elim_operand (op1, mode))
12514         op1 = copy_to_mode_reg (mode, op1);
12515 
12516       /* Force large constants in 64bit compilation into register
12517          to get them CSEed.  */
12518       if (can_create_pseudo_p ()
12519           && (mode == DImode) && TARGET_64BIT
12520           && immediate_operand (op1, mode)
12521           && !x86_64_zext_immediate_operand (op1, VOIDmode)
12522           && !register_operand (op0, mode)
12523           && optimize)
12524         op1 = copy_to_mode_reg (mode, op1);
12525 
12526       if (can_create_pseudo_p ()
12527           && FLOAT_MODE_P (mode)
12528           && GET_CODE (op1) == CONST_DOUBLE)
12529         {
12530           /* If we are loading a floating point constant to a register,
12531              force the value to memory now, since we'll get better code
12532              out the back end.  */
12533 
12534           op1 = validize_mem (force_const_mem (mode, op1));
12535           if (!register_operand (op0, mode))
12536             {
12537               rtx temp = gen_reg_rtx (mode);
12538               emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
12539               emit_move_insn (op0, temp);
12540               return;
12541             }
12542         }
12543     }
12544 
12545   emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
12546 }
12547 
12548 void
12549 ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
12550 {
12551   rtx op0 = operands[0], op1 = operands[1];
12552   unsigned int align = GET_MODE_ALIGNMENT (mode);
12553 
12554   /* Force constants other than zero into memory.  We do not know how
12555      the instructions used to build constants modify the upper 64 bits
12556      of the register, once we have that information we may be able
12557      to handle some of them more efficiently.  */
12558   if (can_create_pseudo_p ()
12559       && register_operand (op0, mode)
12560       && (CONSTANT_P (op1)
12561           || (GET_CODE (op1) == SUBREG
12562               && CONSTANT_P (SUBREG_REG (op1))))
12563       && standard_sse_constant_p (op1) <= 0)
12564     op1 = validize_mem (force_const_mem (mode, op1));
12565 
12566   /* We need to check memory alignment for SSE mode since attribute
12567      can make operands unaligned.  */
12568   if (can_create_pseudo_p ()
12569       && SSE_REG_MODE_P (mode)
12570       && ((MEM_P (op0) && (MEM_ALIGN (op0) < align))
12571           || (MEM_P (op1) && (MEM_ALIGN (op1) < align))))
12572     {
12573       rtx tmp[2];
12574 
12575       /* ix86_expand_vector_move_misalign() does not like constants ... */
12576       if (CONSTANT_P (op1)
12577           || (GET_CODE (op1) == SUBREG
12578               && CONSTANT_P (SUBREG_REG (op1))))
12579         op1 = validize_mem (force_const_mem (mode, op1));
12580 
12581       /* ... nor both arguments in memory.  */
12582       if (!register_operand (op0, mode)
12583           && !register_operand (op1, mode))
12584         op1 = force_reg (mode, op1);
12585 
12586       tmp[0] = op0; tmp[1] = op1;
12587       ix86_expand_vector_move_misalign (mode, tmp);
12588       return;
12589     }
12590 
12591   /* Make operand1 a register if it isn't already.  */
12592   if (can_create_pseudo_p ()
12593       && !register_operand (op0, mode)
12594       && !register_operand (op1, mode))
12595     {
12596       emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
12597       return;
12598     }
12599 
12600   emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
12601 }
12602 
12603 /* Implement the movmisalign patterns for SSE.  Non-SSE modes go
12604    straight to ix86_expand_vector_move.  */
12605 /* Code generation for scalar reg-reg moves of single and double precision data:
12606      if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
12607        movaps reg, reg
12608      else
12609        movss reg, reg
12610      if (x86_sse_partial_reg_dependency == true)
12611        movapd reg, reg
12612      else
12613        movsd reg, reg
12614 
12615    Code generation for scalar loads of double precision data:
12616      if (x86_sse_split_regs == true)
12617        movlpd mem, reg      (gas syntax)
12618      else
12619        movsd mem, reg
12620 
12621    Code generation for unaligned packed loads of single precision data
12622    (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
12623      if (x86_sse_unaligned_move_optimal)
12624        movups mem, reg
12625 
12626      if (x86_sse_partial_reg_dependency == true)
12627        {
12628          xorps  reg, reg
12629          movlps mem, reg
12630          movhps mem+8, reg
12631        }
12632      else
12633        {
12634          movlps mem, reg
12635          movhps mem+8, reg
12636        }
12637 
12638    Code generation for unaligned packed loads of double precision data
12639    (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
12640      if (x86_sse_unaligned_move_optimal)
12641        movupd mem, reg
12642 
12643      if (x86_sse_split_regs == true)
12644        {
12645          movlpd mem, reg
12646          movhpd mem+8, reg
12647        }
12648      else
12649        {
12650          movsd  mem, reg
12651          movhpd mem+8, reg
12652        }
12653  */
12654 
12655 void
12656 ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
12657 {
12658   rtx op0, op1, m;
12659 
12660   op0 = operands[0];
12661   op1 = operands[1];
12662 
12663   if (TARGET_AVX)
12664     {
12665       switch (GET_MODE_CLASS (mode))
12666         {
12667         case MODE_VECTOR_INT:
12668         case MODE_INT:
12669           switch (GET_MODE_SIZE (mode))
12670             {
12671             case 16:
12672               op0 = gen_lowpart (V16QImode, op0);
12673               op1 = gen_lowpart (V16QImode, op1);
12674               emit_insn (gen_avx_movdqu (op0, op1));
12675               break;
12676             case 32:
12677               op0 = gen_lowpart (V32QImode, op0);
12678               op1 = gen_lowpart (V32QImode, op1);
12679               emit_insn (gen_avx_movdqu256 (op0, op1));
12680               break;
12681             default:
12682               gcc_unreachable ();
12683             }
12684           break;
12685         case MODE_VECTOR_FLOAT:
12686           op0 = gen_lowpart (mode, op0);
12687           op1 = gen_lowpart (mode, op1);
12688 
12689           switch (mode)
12690             { 
12691             case V4SFmode:
12692               emit_insn (gen_avx_movups (op0, op1));
12693               break;
12694             case V8SFmode:
12695               emit_insn (gen_avx_movups256 (op0, op1));
12696               break;
12697             case V2DFmode:
12698               emit_insn (gen_avx_movupd (op0, op1));
12699               break;
12700             case V4DFmode:
12701               emit_insn (gen_avx_movupd256 (op0, op1));
12702               break;
12703             default:
12704               gcc_unreachable ();
12705             }
12706           break;
12707 
12708         default:
12709           gcc_unreachable ();
12710         }
12711 
12712       return;
12713     }
12714 
12715   if (MEM_P (op1))
12716     {
12717       /* If we're optimizing for size, movups is the smallest.  */
12718       if (optimize_insn_for_size_p ())
12719         {
12720           op0 = gen_lowpart (V4SFmode, op0);
12721           op1 = gen_lowpart (V4SFmode, op1);
12722           emit_insn (gen_sse_movups (op0, op1));
12723           return;
12724         }
12725 
12726       /* ??? If we have typed data, then it would appear that using
12727          movdqu is the only way to get unaligned data loaded with
12728          integer type.  */
12729       if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
12730         {
12731           op0 = gen_lowpart (V16QImode, op0);
12732           op1 = gen_lowpart (V16QImode, op1);
12733           emit_insn (gen_sse2_movdqu (op0, op1));
12734           return;
12735         }
12736 
12737       if (TARGET_SSE2 && mode == V2DFmode)
12738         {
12739           rtx zero;
12740 
12741           if (TARGET_SSE_UNALIGNED_MOVE_OPTIMAL)
12742             {
12743               op0 = gen_lowpart (V2DFmode, op0);
12744               op1 = gen_lowpart (V2DFmode, op1);
12745               emit_insn (gen_sse2_movupd (op0, op1));
12746               return;
12747             }
12748 
12749           /* When SSE registers are split into halves, we can avoid
12750              writing to the top half twice.  */
12751           if (TARGET_SSE_SPLIT_REGS)
12752             {
12753               emit_clobber (op0);
12754               zero = op0;
12755             }
12756           else
12757             {
12758               /* ??? Not sure about the best option for the Intel chips.
12759                  The following would seem to satisfy; the register is
12760                  entirely cleared, breaking the dependency chain.  We
12761                  then store to the upper half, with a dependency depth
12762                  of one.  A rumor has it that Intel recommends two movsd
12763                  followed by an unpacklpd, but this is unconfirmed.  And
12764                  given that the dependency depth of the unpacklpd would
12765                  still be one, I'm not sure why this would be better.  */
12766               zero = CONST0_RTX (V2DFmode);
12767             }
12768 
12769           m = adjust_address (op1, DFmode, 0);
12770           emit_insn (gen_sse2_loadlpd (op0, zero, m));
12771           m = adjust_address (op1, DFmode, 8);
12772           emit_insn (gen_sse2_loadhpd (op0, op0, m));
12773         }
12774       else
12775         {
12776           if (TARGET_SSE_UNALIGNED_MOVE_OPTIMAL)
12777             {
12778               op0 = gen_lowpart (V4SFmode, op0);
12779               op1 = gen_lowpart (V4SFmode, op1);
12780               emit_insn (gen_sse_movups (op0, op1));
12781               return;
12782             }
12783 
12784           if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
12785             emit_move_insn (op0, CONST0_RTX (mode));
12786           else
12787             emit_clobber (op0);
12788 
12789           if (mode != V4SFmode)
12790             op0 = gen_lowpart (V4SFmode, op0);
12791           m = adjust_address (op1, V2SFmode, 0);
12792           emit_insn (gen_sse_loadlps (op0, op0, m));
12793           m = adjust_address (op1, V2SFmode, 8);
12794           emit_insn (gen_sse_loadhps (op0, op0, m));
12795         }
12796     }
12797   else if (MEM_P (op0))
12798     {
12799       /* If we're optimizing for size, movups is the smallest.  */
12800       if (optimize_insn_for_size_p ())
12801         {
12802           op0 = gen_lowpart (V4SFmode, op0);
12803           op1 = gen_lowpart (V4SFmode, op1);
12804           emit_insn (gen_sse_movups (op0, op1));
12805           return;
12806         }
12807 
12808       /* ??? Similar to above, only less clear because of quote
12809          typeless stores unquote.  */
12810       if (TARGET_SSE2 && !TARGET_SSE_TYPELESS_STORES
12811           && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
12812         {
12813           op0 = gen_lowpart (V16QImode, op0);
12814           op1 = gen_lowpart (V16QImode, op1);
12815           emit_insn (gen_sse2_movdqu (op0, op1));
12816           return;
12817         }
12818 
12819       if (TARGET_SSE2 && mode == V2DFmode)
12820         {
12821           m = adjust_address (op0, DFmode, 0);
12822           emit_insn (gen_sse2_storelpd (m, op1));
12823           m = adjust_address (op0, DFmode, 8);
12824           emit_insn (gen_sse2_storehpd (m, op1));
12825         }
12826       else
12827         {
12828           if (mode != V4SFmode)
12829             op1 = gen_lowpart (V4SFmode, op1);
12830           m = adjust_address (op0, V2SFmode, 0);
12831           emit_insn (gen_sse_storelps (m, op1));
12832           m = adjust_address (op0, V2SFmode, 8);
12833           emit_insn (gen_sse_storehps (m, op1));
12834         }
12835     }
12836   else
12837     gcc_unreachable ();
12838 }
12839 
12840 /* Expand a push in MODE.  This is some mode for which we do not support
12841    proper push instructions, at least from the registers that we expect
12842    the value to live in.  */
12843 
12844 void
12845 ix86_expand_push (enum machine_mode mode, rtx x)
12846 {
12847   rtx tmp;
12848 
12849   tmp = expand_simple_binop (Pmode, PLUS, stack_pointer_rtx,
12850                              GEN_INT (-GET_MODE_SIZE (mode)),
12851                              stack_pointer_rtx, 1, OPTAB_DIRECT);
12852   if (tmp != stack_pointer_rtx)
12853     emit_move_insn (stack_pointer_rtx, tmp);
12854 
12855   tmp = gen_rtx_MEM (mode, stack_pointer_rtx);
12856 
12857   /* When we push an operand onto stack, it has to be aligned at least
12858      at the function argument boundary.  However since we don't have
12859      the argument type, we can't determine the actual argument
12860      boundary.  */
12861   emit_move_insn (tmp, x);
12862 }
12863 
12864 /* Helper function of ix86_fixup_binary_operands to canonicalize
12865    operand order.  Returns true if the operands should be swapped.  */
12866 
12867 static bool
12868 ix86_swap_binary_operands_p (enum rtx_code code, enum machine_mode mode,
12869                              rtx operands[])
12870 {
12871   rtx dst = operands[0];
12872   rtx src1 = operands[1];
12873   rtx src2 = operands[2];
12874 
12875   /* If the operation is not commutative, we can't do anything.  */
12876   if (GET_RTX_CLASS (code) != RTX_COMM_ARITH)
12877     return false;
12878 
12879   /* Highest priority is that src1 should match dst.  */
12880   if (rtx_equal_p (dst, src1))
12881     return false;
12882   if (rtx_equal_p (dst, src2))
12883     return true;
12884 
12885   /* Next highest priority is that immediate constants come second.  */
12886   if (immediate_operand (src2, mode))
12887     return false;
12888   if (immediate_operand (src1, mode))
12889     return true;
12890 
12891   /* Lowest priority is that memory references should come second.  */
12892   if (MEM_P (src2))
12893     return false;
12894   if (MEM_P (src1))
12895     return true;
12896 
12897   return false;
12898 }
12899 
12900 
12901 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok.  Return the
12902    destination to use for the operation.  If different from the true
12903    destination in operands[0], a copy operation will be required.  */
12904 
12905 rtx
12906 ix86_fixup_binary_operands (enum rtx_code code, enum machine_mode mode,
12907                             rtx operands[])
12908 {
12909   rtx dst = operands[0];
12910   rtx src1 = operands[1];
12911   rtx src2 = operands[2];
12912 
12913   /* Canonicalize operand order.  */
12914   if (ix86_swap_binary_operands_p (code, mode, operands))
12915     {
12916       rtx temp;
12917 
12918       /* It is invalid to swap operands of different modes.  */
12919       gcc_assert (GET_MODE (src1) == GET_MODE (src2));
12920 
12921       temp = src1;
12922       src1 = src2;
12923       src2 = temp;
12924     }
12925 
12926   /* Both source operands cannot be in memory.  */
12927   if (MEM_P (src1) && MEM_P (src2))
12928     {
12929       /* Optimization: Only read from memory once.  */
12930       if (rtx_equal_p (src1, src2))
12931         {
12932           src2 = force_reg (mode, src2);
12933           src1 = src2;
12934         }
12935       else
12936         src2 = force_reg (mode, src2);
12937     }
12938 
12939   /* If the destination is memory, and we do not have matching source
12940      operands, do things in registers.  */
12941   if (MEM_P (dst) && !rtx_equal_p (dst, src1))
12942     dst = gen_reg_rtx (mode);
12943 
12944   /* Source 1 cannot be a constant.  */
12945   if (CONSTANT_P (src1))
12946     src1 = force_reg (mode, src1);
12947 
12948   /* Source 1 cannot be a non-matching memory.  */
12949   if (MEM_P (src1) && !rtx_equal_p (dst, src1))
12950     src1 = force_reg (mode, src1);
12951 
12952   operands[1] = src1;
12953   operands[2] = src2;
12954   return dst;
12955 }
12956 
12957 /* Similarly, but assume that the destination has already been
12958    set up properly.  */
12959 
12960 void
12961 ix86_fixup_binary_operands_no_copy (enum rtx_code code,
12962                                     enum machine_mode mode, rtx operands[])
12963 {
12964   rtx dst = ix86_fixup_binary_operands (code, mode, operands);
12965   gcc_assert (dst == operands[0]);
12966 }
12967 
12968 /* Attempt to expand a binary operator.  Make the expansion closer to the
12969    actual machine, then just general_operand, which will allow 3 separate
12970    memory references (one output, two input) in a single insn.  */
12971 
12972 void
12973 ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
12974                              rtx operands[])
12975 {
12976   rtx src1, src2, dst, op, clob;
12977 
12978   dst = ix86_fixup_binary_operands (code, mode, operands);
12979   src1 = operands[1];
12980   src2 = operands[2];
12981 
12982  /* Emit the instruction.  */
12983 
12984   op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
12985   if (reload_in_progress)
12986     {
12987       /* Reload doesn't know about the flags register, and doesn't know that
12988          it doesn't want to clobber it.  We can only do this with PLUS.  */
12989       gcc_assert (code == PLUS);
12990       emit_insn (op);
12991     }
12992   else
12993     {
12994       clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
12995       emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
12996     }
12997 
12998   /* Fix up the destination if needed.  */
12999   if (dst != operands[0])
13000     emit_move_insn (operands[0], dst);
13001 }
13002 
13003 /* Return TRUE or FALSE depending on whether the binary operator meets the
13004    appropriate constraints.  */
13005 
13006 int
13007 ix86_binary_operator_ok (enum rtx_code code, enum machine_mode mode,
13008                          rtx operands[3])
13009 {
13010   rtx dst = operands[0];
13011   rtx src1 = operands[1];
13012   rtx src2 = operands[2];
13013 
13014   /* Both source operands cannot be in memory.  */
13015   if (MEM_P (src1) && MEM_P (src2))
13016     return 0;
13017 
13018   /* Canonicalize operand order for commutative operators.  */
13019   if (ix86_swap_binary_operands_p (code, mode, operands))
13020     {
13021       rtx temp = src1;
13022       src1 = src2;
13023       src2 = temp;
13024     }
13025 
13026   /* If the destination is memory, we must have a matching source operand.  */
13027   if (MEM_P (dst) && !rtx_equal_p (dst, src1))
13028       return 0;
13029 
13030   /* Source 1 cannot be a constant.  */
13031   if (CONSTANT_P (src1))
13032     return 0;
13033 
13034   /* Source 1 cannot be a non-matching memory.  */
13035   if (MEM_P (src1) && !rtx_equal_p (dst, src1))
13036     return 0;
13037 
13038   return 1;
13039 }
13040 
13041 /* Attempt to expand a unary operator.  Make the expansion closer to the
13042    actual machine, then just general_operand, which will allow 2 separate
13043    memory references (one output, one input) in a single insn.  */
13044 
13045 void
13046 ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
13047                             rtx operands[])
13048 {
13049   int matching_memory;
13050   rtx src, dst, op, clob;
13051 
13052   dst = operands[0];
13053   src = operands[1];
13054 
13055   /* If the destination is memory, and we do not have matching source
13056      operands, do things in registers.  */
13057   matching_memory = 0;
13058   if (MEM_P (dst))
13059     {
13060       if (rtx_equal_p (dst, src))
13061         matching_memory = 1;
13062       else
13063         dst = gen_reg_rtx (mode);
13064     }
13065 
13066   /* When source operand is memory, destination must match.  */
13067   if (MEM_P (src) && !matching_memory)
13068     src = force_reg (mode, src);
13069 
13070   /* Emit the instruction.  */
13071 
13072   op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
13073   if (reload_in_progress || code == NOT)
13074     {
13075       /* Reload doesn't know about the flags register, and doesn't know that
13076          it doesn't want to clobber it.  */
13077       gcc_assert (code == NOT);
13078       emit_insn (op);
13079     }
13080   else
13081     {
13082       clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
13083       emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
13084     }
13085 
13086   /* Fix up the destination if needed.  */
13087   if (dst != operands[0])
13088     emit_move_insn (operands[0], dst);
13089 }
13090 
13091 /* Return TRUE or FALSE depending on whether the unary operator meets the
13092    appropriate constraints.  */
13093 
13094 int
13095 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
13096                         enum machine_mode mode ATTRIBUTE_UNUSED,
13097                         rtx operands[2] ATTRIBUTE_UNUSED)
13098 {
13099   /* If one of operands is memory, source and destination must match.  */
13100   if ((MEM_P (operands[0])
13101        || MEM_P (operands[1]))
13102       && ! rtx_equal_p (operands[0], operands[1]))
13103     return FALSE;
13104   return TRUE;
13105 }
13106 
13107 /* Post-reload splitter for converting an SF or DFmode value in an
13108    SSE register into an unsigned SImode.  */
13109 
13110 void
13111 ix86_split_convert_uns_si_sse (rtx operands[])
13112 {
13113   enum machine_mode vecmode;
13114   rtx value, large, zero_or_two31, input, two31, x;
13115 
13116   large = operands[1];
13117   zero_or_two31 = operands[2];
13118   input = operands[3];
13119   two31 = operands[4];
13120   vecmode = GET_MODE (large);
13121   value = gen_rtx_REG (vecmode, REGNO (operands[0]));
13122 
13123   /* Load up the value into the low element.  We must ensure that the other
13124      elements are valid floats -- zero is the easiest such value.  */
13125   if (MEM_P (input))
13126     {
13127       if (vecmode == V4SFmode)
13128         emit_insn (gen_vec_setv4sf_0 (value, CONST0_RTX (V4SFmode), input));
13129       else
13130         emit_insn (gen_sse2_loadlpd (value, CONST0_RTX (V2DFmode), input));
13131     }
13132   else
13133     {
13134       input = gen_rtx_REG (vecmode, REGNO (input));
13135       emit_move_insn (value, CONST0_RTX (vecmode));
13136       if (vecmode == V4SFmode)
13137         emit_insn (gen_sse_movss (value, value, input));
13138       else
13139         emit_insn (gen_sse2_movsd (value, value, input));
13140     }
13141 
13142   emit_move_insn (large, two31);
13143   emit_move_insn (zero_or_two31, MEM_P (two31) ? large : two31);
13144 
13145   x = gen_rtx_fmt_ee (LE, vecmode, large, value);
13146   emit_insn (gen_rtx_SET (VOIDmode, large, x));
13147 
13148   x = gen_rtx_AND (vecmode, zero_or_two31, large);
13149   emit_insn (gen_rtx_SET (VOIDmode, zero_or_two31, x));
13150 
13151   x = gen_rtx_MINUS (vecmode, value, zero_or_two31);
13152   emit_insn (gen_rtx_SET (VOIDmode, value, x));
13153 
13154   large = gen_rtx_REG (V4SImode, REGNO (large));
13155   emit_insn (gen_ashlv4si3 (large, large, GEN_INT (31)));
13156 
13157   x = gen_rtx_REG (V4SImode, REGNO (value));
13158   if (vecmode == V4SFmode)
13159     emit_insn (gen_sse2_cvttps2dq (x, value));
13160   else
13161     emit_insn (gen_sse2_cvttpd2dq (x, value));
13162   value = x;
13163 
13164   emit_insn (gen_xorv4si3 (value, value, large));
13165 }
13166 
13167 /* Convert an unsigned DImode value into a DFmode, using only SSE.
13168    Expects the 64-bit DImode to be supplied in a pair of integral
13169    registers.  Requires SSE2; will use SSE3 if available.  For x86_32,
13170    -mfpmath=sse, !optimize_size only.  */
13171 
13172 void
13173 ix86_expand_convert_uns_didf_sse (rtx target, rtx input)
13174 {
13175   REAL_VALUE_TYPE bias_lo_rvt, bias_hi_rvt;
13176   rtx int_xmm, fp_xmm;
13177   rtx biases, exponents;
13178   rtx x;
13179 
13180   int_xmm = gen_reg_rtx (V4SImode);
13181   if (TARGET_INTER_UNIT_MOVES)
13182     emit_insn (gen_movdi_to_sse (int_xmm, input));
13183   else if (TARGET_SSE_SPLIT_REGS)
13184     {
13185       emit_clobber (int_xmm);
13186       emit_move_insn (gen_lowpart (DImode, int_xmm), input);
13187     }
13188   else
13189     {
13190       x = gen_reg_rtx (V2DImode);
13191       ix86_expand_vector_init_one_nonzero (false, V2DImode, x, input, 0);
13192       emit_move_insn (int_xmm, gen_lowpart (V4SImode, x));
13193     }
13194 
13195   x = gen_rtx_CONST_VECTOR (V4SImode,
13196                             gen_rtvec (4, GEN_INT (0x43300000UL),
13197                                        GEN_INT (0x45300000UL),
13198                                        const0_rtx, const0_rtx));
13199   exponents = validize_mem (force_const_mem (V4SImode, x));
13200 
13201   /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */
13202   emit_insn (gen_sse2_punpckldq (int_xmm, int_xmm, exponents));
13203 
13204   /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm)
13205      yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)).
13206      Similarly (0x45300000UL ## fp_value_hi_xmm) yields
13207      (0x1.0p84 + double(fp_value_hi_xmm)).
13208      Note these exponents differ by 32.  */
13209 
13210   fp_xmm = copy_to_mode_reg (V2DFmode, gen_lowpart (V2DFmode, int_xmm));
13211 
13212   /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values
13213      in [0,2**32-1] and [0]+[2**32,2**64-1] respectively.  */
13214   real_ldexp (&bias_lo_rvt, &dconst1, 52);
13215   real_ldexp (&bias_hi_rvt, &dconst1, 84);
13216   biases = const_double_from_real_value (bias_lo_rvt, DFmode);
13217   x = const_double_from_real_value (bias_hi_rvt, DFmode);
13218   biases = gen_rtx_CONST_VECTOR (V2DFmode, gen_rtvec (2, biases, x));
13219   biases = validize_mem (force_const_mem (V2DFmode, biases));
13220   emit_insn (gen_subv2df3 (fp_xmm, fp_xmm, biases));
13221 
13222   /* Add the upper and lower DFmode values together.  */
13223   if (TARGET_SSE3)
13224     emit_insn (gen_sse3_haddv2df3 (fp_xmm, fp_xmm, fp_xmm));
13225   else
13226     {
13227       x = copy_to_mode_reg (V2DFmode, fp_xmm);
13228       emit_insn (gen_sse2_unpckhpd (fp_xmm, fp_xmm, fp_xmm));
13229       emit_insn (gen_addv2df3 (fp_xmm, fp_xmm, x));
13230     }
13231 
13232   ix86_expand_vector_extract (false, target, fp_xmm, 0);
13233 }
13234 
13235 /* Not used, but eases macroization of patterns.  */
13236 void
13237 ix86_expand_convert_uns_sixf_sse (rtx target ATTRIBUTE_UNUSED,
13238                                   rtx input ATTRIBUTE_UNUSED)
13239 {
13240   gcc_unreachable ();
13241 }
13242 
13243 /* Convert an unsigned SImode value into a DFmode.  Only currently used
13244    for SSE, but applicable anywhere.  */
13245 
13246 void
13247 ix86_expand_convert_uns_sidf_sse (rtx target, rtx input)
13248 {
13249   REAL_VALUE_TYPE TWO31r;
13250   rtx x, fp;
13251 
13252   x = expand_simple_binop (SImode, PLUS, input, GEN_INT (-2147483647 - 1),
13253                            NULL, 1, OPTAB_DIRECT);
13254 
13255   fp = gen_reg_rtx (DFmode);
13256   emit_insn (gen_floatsidf2 (fp, x));
13257 
13258   real_ldexp (&TWO31r, &dconst1, 31);
13259   x = const_double_from_real_value (TWO31r, DFmode);
13260 
13261   x = expand_simple_binop (DFmode, PLUS, fp, x, target, 0, OPTAB_DIRECT);
13262   if (x != target)
13263     emit_move_insn (target, x);
13264 }
13265 
13266 /* Convert a signed DImode value into a DFmode.  Only used for SSE in
13267    32-bit mode; otherwise we have a direct convert instruction.  */
13268 
13269 void
13270 ix86_expand_convert_sign_didf_sse (rtx target, rtx input)
13271 {
13272   REAL_VALUE_TYPE TWO32r;
13273   rtx fp_lo, fp_hi, x;
13274 
13275   fp_lo = gen_reg_rtx (DFmode);
13276   fp_hi = gen_reg_rtx (DFmode);
13277 
13278   emit_insn (gen_floatsidf2 (fp_hi, gen_highpart (SImode, input)));
13279 
13280   real_ldexp (&TWO32r, &dconst1, 32);
13281   x = const_double_from_real_value (TWO32r, DFmode);
13282   fp_hi = expand_simple_binop (DFmode, MULT, fp_hi, x, fp_hi, 0, OPTAB_DIRECT);
13283 
13284   ix86_expand_convert_uns_sidf_sse (fp_lo, gen_lowpart (SImode, input));
13285 
13286   x = expand_simple_binop (DFmode, PLUS, fp_hi, fp_lo, target,
13287                            0, OPTAB_DIRECT);
13288   if (x != target)
13289     emit_move_insn (target, x);
13290 }
13291 
13292 /* Convert an unsigned SImode value into a SFmode, using only SSE.
13293    For x86_32, -mfpmath=sse, !optimize_size only.  */
13294 void
13295 ix86_expand_convert_uns_sisf_sse (rtx target, rtx input)
13296 {
13297   REAL_VALUE_TYPE ONE16r;
13298   rtx fp_hi, fp_lo, int_hi, int_lo, x;
13299 
13300   real_ldexp (&ONE16r, &dconst1, 16);
13301   x = const_double_from_real_value (ONE16r, SFmode);
13302   int_lo = expand_simple_binop (SImode, AND, input, GEN_INT(0xffff),
13303                                       NULL, 0, OPTAB_DIRECT);
13304   int_hi = expand_simple_binop (SImode, LSHIFTRT, input, GEN_INT(16),
13305                                       NULL, 0, OPTAB_DIRECT);
13306   fp_hi = gen_reg_rtx (SFmode);
13307   fp_lo = gen_reg_rtx (SFmode);
13308   emit_insn (gen_floatsisf2 (fp_hi, int_hi));
13309   emit_insn (gen_floatsisf2 (fp_lo, int_lo));
13310   fp_hi = expand_simple_binop (SFmode, MULT, fp_hi, x, fp_hi,
13311                                0, OPTAB_DIRECT);
13312   fp_hi = expand_simple_binop (SFmode, PLUS, fp_hi, fp_lo, target,
13313                                0, OPTAB_DIRECT);
13314   if (!rtx_equal_p (target, fp_hi))
13315     emit_move_insn (target, fp_hi);
13316 }
13317 
13318 /* A subroutine of ix86_build_signbit_mask_vector.  If VECT is true,
13319    then replicate the value for all elements of the vector
13320    register.  */
13321 
13322 rtx
13323 ix86_build_const_vector (enum machine_mode mode, bool vect, rtx value)
13324 {
13325   rtvec v;
13326   switch (mode)
13327     {
13328     case SImode:
13329       gcc_assert (vect);
13330       v = gen_rtvec (4, value, value, value, value);
13331       return gen_rtx_CONST_VECTOR (V4SImode, v);
13332 
13333     case DImode:
13334       gcc_assert (vect);
13335       v = gen_rtvec (2, value, value);
13336       return gen_rtx_CONST_VECTOR (V2DImode, v);
13337 
13338     case SFmode:
13339       if (vect)
13340         v = gen_rtvec (4, value, value, value, value);
13341       else
13342         v = gen_rtvec (4, value, CONST0_RTX (SFmode),
13343                        CONST0_RTX (SFmode), CONST0_RTX (SFmode));
13344       return gen_rtx_CONST_VECTOR (V4SFmode, v);
13345 
13346     case DFmode:
13347       if (vect)
13348         v = gen_rtvec (2, value, value);
13349       else
13350         v = gen_rtvec (2, value, CONST0_RTX (DFmode));
13351       return gen_rtx_CONST_VECTOR (V2DFmode, v);
13352 
13353     default:
13354       gcc_unreachable ();
13355     }
13356 }
13357 
13358 /* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders
13359    and ix86_expand_int_vcond.  Create a mask for the sign bit in MODE
13360    for an SSE register.  If VECT is true, then replicate the mask for
13361    all elements of the vector register.  If INVERT is true, then create
13362    a mask excluding the sign bit.  */
13363 
13364 rtx
13365 ix86_build_signbit_mask (enum machine_mode mode, bool vect, bool invert)
13366 {
13367   enum machine_mode vec_mode, imode;
13368   HOST_WIDE_INT hi, lo;
13369   int shift = 63;
13370   rtx v;
13371   rtx mask;
13372 
13373   /* Find the sign bit, sign extended to 2*HWI.  */
13374   switch (mode)
13375     {
13376     case SImode:
13377     case SFmode:
13378       imode = SImode;
13379       vec_mode = (mode == SImode) ? V4SImode : V4SFmode;
13380       lo = 0x80000000, hi = lo < 0;
13381       break;
13382 
13383     case DImode:
13384     case DFmode:
13385       imode = DImode;
13386       vec_mode = (mode == DImode) ? V2DImode : V2DFmode;
13387       if (HOST_BITS_PER_WIDE_INT >= 64)
13388         lo = (HOST_WIDE_INT)1 << shift, hi = -1;
13389       else
13390         lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
13391       break;
13392 
13393     case TImode:
13394     case TFmode:
13395       vec_mode = VOIDmode;
13396       if (HOST_BITS_PER_WIDE_INT >= 64)
13397         {
13398           imode = TImode;
13399           lo = 0, hi = (HOST_WIDE_INT)1 << shift;
13400         }
13401       else
13402         {
13403           rtvec vec;
13404 
13405           imode = DImode;
13406           lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
13407 
13408           if (invert)
13409             {
13410               lo = ~lo, hi = ~hi;
13411               v = constm1_rtx;
13412             }
13413           else
13414             v = const0_rtx;
13415 
13416           mask = immed_double_const (lo, hi, imode);
13417 
13418           vec = gen_rtvec (2, v, mask);
13419           v = gen_rtx_CONST_VECTOR (V2DImode, vec);
13420           v = copy_to_mode_reg (mode, gen_lowpart (mode, v));
13421 
13422           return v;
13423         }
13424      break;
13425 
13426     default:
13427       gcc_unreachable ();
13428     }
13429 
13430   if (invert)
13431     lo = ~lo, hi = ~hi;
13432 
13433   /* Force this value into the low part of a fp vector constant.  */
13434   mask = immed_double_const (lo, hi, imode);
13435   mask = gen_lowpart (mode, mask);
13436 
13437   if (vec_mode == VOIDmode)
13438     return force_reg (mode, mask);
13439 
13440   v = ix86_build_const_vector (mode, vect, mask);
13441   return force_reg (vec_mode, v);
13442 }
13443 
13444 /* Generate code for floating point ABS or NEG.  */
13445 
13446 void
13447 ix86_expand_fp_absneg_operator (enum rtx_code code, enum machine_mode mode,
13448                                 rtx operands[])
13449 {
13450   rtx mask, set, use, clob, dst, src;
13451   bool use_sse = false;
13452   bool vector_mode = VECTOR_MODE_P (mode);
13453   enum machine_mode elt_mode = mode;
13454 
13455   if (vector_mode)
13456     {
13457       elt_mode = GET_MODE_INNER (mode);
13458       use_sse = true;
13459     }
13460   else if (mode == TFmode)
13461     use_sse = true;
13462   else if (TARGET_SSE_MATH)
13463     use_sse = SSE_FLOAT_MODE_P (mode);
13464 
13465   /* NEG and ABS performed with SSE use bitwise mask operations.
13466      Create the appropriate mask now.  */
13467   if (use_sse)
13468     mask = ix86_build_signbit_mask (elt_mode, vector_mode, code == ABS);
13469   else
13470     mask = NULL_RTX;
13471 
13472   dst = operands[0];
13473   src = operands[1];
13474 
13475   if (vector_mode)
13476     {
13477       set = gen_rtx_fmt_ee (code == NEG ? XOR : AND, mode, src, mask);
13478       set = gen_rtx_SET (VOIDmode, dst, set);
13479       emit_insn (set);
13480     }
13481   else
13482     {
13483       set = gen_rtx_fmt_e (code, mode, src);
13484       set = gen_rtx_SET (VOIDmode, dst, set);
13485       if (mask)
13486         {
13487           use = gen_rtx_USE (VOIDmode, mask);
13488           clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
13489           emit_insn (gen_rtx_PARALLEL (VOIDmode,
13490                                        gen_rtvec (3, set, use, clob)));
13491         }
13492       else
13493         emit_insn (set);
13494     }
13495 }
13496 
13497 /* Expand a copysign operation.  Special case operand 0 being a constant.  */
13498 
13499 void
13500 ix86_expand_copysign (rtx operands[])
13501 {
13502   enum machine_mode mode;
13503   rtx dest, op0, op1, mask, nmask;
13504 
13505   dest = operands[0];
13506   op0 = operands[1];
13507   op1 = operands[2];
13508 
13509   mode = GET_MODE (dest);
13510 
13511   if (GET_CODE (op0) == CONST_DOUBLE)
13512     {
13513       rtx (*copysign_insn)(rtx, rtx, rtx, rtx);
13514 
13515       if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
13516         op0 = simplify_unary_operation (ABS, mode, op0, mode);
13517 
13518       if (mode == SFmode || mode == DFmode)
13519         {
13520           enum machine_mode vmode;
13521 
13522           vmode = mode == SFmode ? V4SFmode : V2DFmode;
13523 
13524           if (op0 == CONST0_RTX (mode))
13525             op0 = CONST0_RTX (vmode);
13526           else
13527             {
13528               rtvec v;
13529 
13530               if (mode == SFmode)
13531                 v = gen_rtvec (4, op0, CONST0_RTX (SFmode),
13532                                CONST0_RTX (SFmode), CONST0_RTX (SFmode));
13533               else
13534                 v = gen_rtvec (2, op0, CONST0_RTX (DFmode));
13535 
13536               op0 = force_reg (vmode, gen_rtx_CONST_VECTOR (vmode, v));
13537             }
13538         }
13539       else if (op0 != CONST0_RTX (mode))
13540         op0 = force_reg (mode, op0);
13541 
13542       mask = ix86_build_signbit_mask (mode, 0, 0);
13543 
13544       if (mode == SFmode)
13545         copysign_insn = gen_copysignsf3_const;
13546       else if (mode == DFmode)
13547         copysign_insn = gen_copysigndf3_const;
13548       else
13549         copysign_insn = gen_copysigntf3_const;
13550 
13551         emit_insn (copysign_insn (dest, op0, op1, mask));
13552     }
13553   else
13554     {
13555       rtx (*copysign_insn)(rtx, rtx, rtx, rtx, rtx, rtx);
13556 
13557       nmask = ix86_build_signbit_mask (mode, 0, 1);
13558       mask = ix86_build_signbit_mask (mode, 0, 0);
13559 
13560       if (mode == SFmode)
13561         copysign_insn = gen_copysignsf3_var;
13562       else if (mode == DFmode)
13563         copysign_insn = gen_copysigndf3_var;
13564       else
13565         copysign_insn = gen_copysigntf3_var;
13566 
13567       emit_insn (copysign_insn (dest, NULL_RTX, op0, op1, nmask, mask));
13568     }
13569 }
13570 
13571 /* Deconstruct a copysign operation into bit masks.  Operand 0 is known to
13572    be a constant, and so has already been expanded into a vector constant.  */
13573 
13574 void
13575 ix86_split_copysign_const (rtx operands[])
13576 {
13577   enum machine_mode mode, vmode;
13578   rtx dest, op0, op1, mask, x;
13579 
13580   dest = operands[0];
13581   op0 = operands[1];
13582   op1 = operands[2];
13583   mask = operands[3];
13584 
13585   mode = GET_MODE (dest);
13586   vmode = GET_MODE (mask);
13587 
13588   dest = simplify_gen_subreg (vmode, dest, mode, 0);
13589   x = gen_rtx_AND (vmode, dest, mask);
13590   emit_insn (gen_rtx_SET (VOIDmode, dest, x));
13591 
13592   if (op0 != CONST0_RTX (vmode))
13593     {
13594       x = gen_rtx_IOR (vmode, dest, op0);
13595       emit_insn (gen_rtx_SET (VOIDmode, dest, x));
13596     }
13597 }
13598 
13599 /* Deconstruct a copysign operation into bit masks.  Operand 0 is variable,
13600    so we have to do two masks.  */
13601 
13602 void
13603 ix86_split_copysign_var (rtx operands[])
13604 {
13605   enum machine_mode mode, vmode;
13606   rtx dest, scratch, op0, op1, mask, nmask, x;
13607 
13608   dest = operands[0];
13609   scratch = operands[1];
13610   op0 = operands[2];
13611   op1 = operands[3];
13612   nmask = operands[4];
13613   mask = operands[5];
13614 
13615   mode = GET_MODE (dest);
13616   vmode = GET_MODE (mask);
13617 
13618   if (rtx_equal_p (op0, op1))
13619     {
13620       /* Shouldn't happen often (it's useless, obviously), but when it does
13621          we'd generate incorrect code if we continue below.  */
13622       emit_move_insn (dest, op0);
13623       return;
13624     }
13625 
13626   if (REG_P (mask) && REGNO (dest) == REGNO (mask))     /* alternative 0 */
13627     {
13628       gcc_assert (REGNO (op1) == REGNO (scratch));
13629 
13630       x = gen_rtx_AND (vmode, scratch, mask);
13631       emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
13632 
13633       dest = mask;
13634       op0 = simplify_gen_subreg (vmode, op0, mode, 0);
13635       x = gen_rtx_NOT (vmode, dest);
13636       x = gen_rtx_AND (vmode, x, op0);
13637       emit_insn (gen_rtx_SET (VOIDmode, dest, x));
13638     }
13639   else
13640     {
13641       if (REGNO (op1) == REGNO (scratch))               /* alternative 1,3 */
13642         {
13643           x = gen_rtx_AND (vmode, scratch, mask);
13644         }
13645       else                                              /* alternative 2,4 */
13646         {
13647           gcc_assert (REGNO (mask) == REGNO (scratch));
13648           op1 = simplify_gen_subreg (vmode, op1, mode, 0);
13649           x = gen_rtx_AND (vmode, scratch, op1);
13650         }
13651       emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
13652 
13653       if (REGNO (op0) == REGNO (dest))                  /* alternative 1,2 */
13654         {
13655           dest = simplify_gen_subreg (vmode, op0, mode, 0);
13656           x = gen_rtx_AND (vmode, dest, nmask);
13657         }
13658       else                                              /* alternative 3,4 */
13659         {
13660           gcc_assert (REGNO (nmask) == REGNO (dest));
13661           dest = nmask;
13662           op0 = simplify_gen_subreg (vmode, op0, mode, 0);
13663           x = gen_rtx_AND (vmode, dest, op0);
13664         }
13665       emit_insn (gen_rtx_SET (VOIDmode, dest, x));
13666     }
13667 
13668   x = gen_rtx_IOR (vmode, dest, scratch);
13669   emit_insn (gen_rtx_SET (VOIDmode, dest, x));
13670 }
13671 
13672 /* Return TRUE or FALSE depending on whether the first SET in INSN
13673    has source and destination with matching CC modes, and that the
13674    CC mode is at least as constrained as REQ_MODE.  */
13675 
13676 int
13677 ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
13678 {
13679   rtx set;
13680   enum machine_mode set_mode;
13681 
13682   set = PATTERN (insn);
13683   if (GET_CODE (set) == PARALLEL)
13684     set = XVECEXP (set, 0, 0);
13685   gcc_assert (GET_CODE (set) == SET);
13686   gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
13687 
13688   set_mode = GET_MODE (SET_DEST (set));
13689   switch (set_mode)
13690     {
13691     case CCNOmode:
13692       if (req_mode != CCNOmode
13693           && (req_mode != CCmode
13694               || XEXP (SET_SRC (set), 1) != const0_rtx))
13695         return 0;
13696       break;
13697     case CCmode:
13698       if (req_mode == CCGCmode)
13699         return 0;
13700       /* FALLTHRU */
13701     case CCGCmode:
13702       if (req_mode == CCGOCmode || req_mode == CCNOmode)
13703         return 0;
13704       /* FALLTHRU */
13705     case CCGOCmode:
13706       if (req_mode == CCZmode)
13707         return 0;
13708       /* FALLTHRU */
13709     case CCAmode:
13710     case CCCmode:
13711     case CCOmode:
13712     case CCSmode:
13713     case CCZmode:
13714       break;
13715 
13716     default:
13717       gcc_unreachable ();
13718     }
13719 
13720   return (GET_MODE (SET_SRC (set)) == set_mode);
13721 }
13722 
13723 /* Generate insn patterns to do an integer compare of OPERANDS.  */
13724 
13725 static rtx
13726 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
13727 {
13728   enum machine_mode cmpmode;
13729   rtx tmp, flags;
13730 
13731   cmpmode = SELECT_CC_MODE (code, op0, op1);
13732   flags = gen_rtx_REG (cmpmode, FLAGS_REG);
13733 
13734   /* This is very simple, but making the interface the same as in the
13735      FP case makes the rest of the code easier.  */
13736   tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
13737   emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
13738 
13739   /* Return the test that should be put into the flags user, i.e.
13740      the bcc, scc, or cmov instruction.  */
13741   return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
13742 }
13743 
13744 /* Figure out whether to use ordered or unordered fp comparisons.
13745    Return the appropriate mode to use.  */
13746 
13747 enum machine_mode
13748 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
13749 {
13750   /* ??? In order to make all comparisons reversible, we do all comparisons
13751      non-trapping when compiling for IEEE.  Once gcc is able to distinguish
13752      all forms trapping and nontrapping comparisons, we can make inequality
13753      comparisons trapping again, since it results in better code when using
13754      FCOM based compares.  */
13755   return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
13756 }
13757 
13758 enum machine_mode
13759 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
13760 {
13761   enum machine_mode mode = GET_MODE (op0);
13762 
13763   if (SCALAR_FLOAT_MODE_P (mode))
13764     {
13765       gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
13766       return ix86_fp_compare_mode (code);
13767     }
13768 
13769   switch (code)
13770     {
13771       /* Only zero flag is needed.  */
13772     case EQ:                    /* ZF=0 */
13773     case NE:                    /* ZF!=0 */
13774       return CCZmode;
13775       /* Codes needing carry flag.  */
13776     case GEU:                   /* CF=0 */
13777     case LTU:                   /* CF=1 */
13778       /* Detect overflow checks.  They need just the carry flag.  */
13779       if (GET_CODE (op0) == PLUS
13780           && rtx_equal_p (op1, XEXP (op0, 0)))
13781         return CCCmode;
13782       else
13783         return CCmode;
13784     case GTU:                   /* CF=0 & ZF=0 */
13785     case LEU:                   /* CF=1 | ZF=1 */
13786       /* Detect overflow checks.  They need just the carry flag.  */
13787       if (GET_CODE (op0) == MINUS
13788           && rtx_equal_p (op1, XEXP (op0, 0)))
13789         return CCCmode;
13790       else
13791         return CCmode;
13792       /* Codes possibly doable only with sign flag when
13793          comparing against zero.  */
13794     case GE:                    /* SF=OF   or   SF=0 */
13795     case LT:                    /* SF<>OF  or   SF=1 */
13796       if (op1 == const0_rtx)
13797         return CCGOCmode;
13798       else
13799         /* For other cases Carry flag is not required.  */
13800         return CCGCmode;
13801       /* Codes doable only with sign flag when comparing
13802          against zero, but we miss jump instruction for it
13803          so we need to use relational tests against overflow
13804          that thus needs to be zero.  */
13805     case GT:                    /* ZF=0 & SF=OF */
13806     case LE:                    /* ZF=1 | SF<>OF */
13807       if (op1 == const0_rtx)
13808         return CCNOmode;
13809       else
13810         return CCGCmode;
13811       /* strcmp pattern do (use flags) and combine may ask us for proper
13812          mode.  */
13813     case USE:
13814       return CCmode;
13815     default:
13816       gcc_unreachable ();
13817     }
13818 }
13819 
13820 /* Return the fixed registers used for condition codes.  */
13821 
13822 static bool
13823 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
13824 {
13825   *p1 = FLAGS_REG;
13826   *p2 = FPSR_REG;
13827   return true;
13828 }
13829 
13830 /* If two condition code modes are compatible, return a condition code
13831    mode which is compatible with both.  Otherwise, return
13832    VOIDmode.  */
13833 
13834 static enum machine_mode
13835 ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
13836 {
13837   if (m1 == m2)
13838     return m1;
13839 
13840   if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
13841     return VOIDmode;
13842 
13843   if ((m1 == CCGCmode && m2 == CCGOCmode)
13844       || (m1 == CCGOCmode && m2 == CCGCmode))
13845     return CCGCmode;
13846 
13847   switch (m1)
13848     {
13849     default:
13850       gcc_unreachable ();
13851 
13852     case CCmode:
13853     case CCGCmode:
13854     case CCGOCmode:
13855     case CCNOmode:
13856     case CCAmode:
13857     case CCCmode:
13858     case CCOmode:
13859     case CCSmode:
13860     case CCZmode:
13861       switch (m2)
13862         {
13863         default:
13864           return VOIDmode;
13865 
13866         case CCmode:
13867         case CCGCmode:
13868         case CCGOCmode:
13869         case CCNOmode:
13870         case CCAmode:
13871         case CCCmode:
13872         case CCOmode:
13873         case CCSmode:
13874         case CCZmode:
13875           return CCmode;
13876         }
13877 
13878     case CCFPmode:
13879     case CCFPUmode:
13880       /* These are only compatible with themselves, which we already
13881          checked above.  */
13882       return VOIDmode;
13883     }
13884 }
13885 
13886 /* Split comparison code CODE into comparisons we can do using branch
13887    instructions.  BYPASS_CODE is comparison code for branch that will
13888    branch around FIRST_CODE and SECOND_CODE.  If some of branches
13889    is not required, set value to UNKNOWN.
13890    We never require more than two branches.  */
13891 
13892 void
13893 ix86_fp_comparison_codes (enum rtx_code code, enum rtx_code *bypass_code,
13894                           enum rtx_code *first_code,
13895                           enum rtx_code *second_code)
13896 {
13897   *first_code = code;
13898   *bypass_code = UNKNOWN;
13899   *second_code = UNKNOWN;
13900 
13901   /* The fcomi comparison sets flags as follows:
13902 
13903      cmp    ZF PF CF
13904      >      0  0  0
13905      <      0  0  1
13906      =      1  0  0
13907      un     1  1  1 */
13908 
13909   switch (code)
13910     {
13911     case GT:                    /* GTU - CF=0 & ZF=0 */
13912     case GE:                    /* GEU - CF=0 */
13913     case ORDERED:               /* PF=0 */
13914     case UNORDERED:             /* PF=1 */
13915     case UNEQ:                  /* EQ - ZF=1 */
13916     case UNLT:                  /* LTU - CF=1 */
13917     case UNLE:                  /* LEU - CF=1 | ZF=1 */
13918     case LTGT:                  /* EQ - ZF=0 */
13919       break;
13920     case LT:                    /* LTU - CF=1 - fails on unordered */
13921       *first_code = UNLT;
13922       *bypass_code = UNORDERED;
13923       break;
13924     case LE:                    /* LEU - CF=1 | ZF=1 - fails on unordered */
13925       *first_code = UNLE;
13926       *bypass_code = UNORDERED;
13927       break;
13928     case EQ:                    /* EQ - ZF=1 - fails on unordered */
13929       *first_code = UNEQ;
13930       *bypass_code = UNORDERED;
13931       break;
13932     case NE:                    /* NE - ZF=0 - fails on unordered */
13933       *first_code = LTGT;
13934       *second_code = UNORDERED;
13935       break;
13936     case UNGE:                  /* GEU - CF=0 - fails on unordered */
13937       *first_code = GE;
13938       *second_code = UNORDERED;
13939       break;
13940     case UNGT:                  /* GTU - CF=0 & ZF=0 - fails on unordered */
13941       *first_code = GT;
13942       *second_code = UNORDERED;
13943       break;
13944     default:
13945       gcc_unreachable ();
13946     }
13947   if (!TARGET_IEEE_FP)
13948     {
13949       *second_code = UNKNOWN;
13950       *bypass_code = UNKNOWN;
13951     }
13952 }
13953 
13954 /* Return cost of comparison done fcom + arithmetics operations on AX.
13955    All following functions do use number of instructions as a cost metrics.
13956    In future this should be tweaked to compute bytes for optimize_size and
13957    take into account performance of various instructions on various CPUs.  */
13958 static int
13959 ix86_fp_comparison_arithmetics_cost (enum rtx_code code)
13960 {
13961   if (!TARGET_IEEE_FP)
13962     return 4;
13963   /* The cost of code output by ix86_expand_fp_compare.  */
13964   switch (code)
13965     {
13966     case UNLE:
13967     case UNLT:
13968     case LTGT:
13969     case GT:
13970     case GE:
13971     case UNORDERED:
13972     case ORDERED:
13973     case UNEQ:
13974       return 4;
13975       break;
13976     case LT:
13977     case NE:
13978     case EQ:
13979     case UNGE:
13980       return 5;
13981       break;
13982     case LE:
13983     case UNGT:
13984       return 6;
13985       break;
13986     default:
13987       gcc_unreachable ();
13988     }
13989 }
13990 
13991 /* Return cost of comparison done using fcomi operation.
13992    See ix86_fp_comparison_arithmetics_cost for the metrics.  */
13993 static int
13994 ix86_fp_comparison_fcomi_cost (enum rtx_code code)
13995 {
13996   enum rtx_code bypass_code, first_code, second_code;
13997   /* Return arbitrarily high cost when instruction is not supported - this
13998      prevents gcc from using it.  */
13999   if (!TARGET_CMOVE)
14000     return 1024;
14001   ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
14002   return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 2;
14003 }
14004 
14005 /* Return cost of comparison done using sahf operation.
14006    See ix86_fp_comparison_arithmetics_cost for the metrics.  */
14007 static int
14008 ix86_fp_comparison_sahf_cost (enum rtx_code code)
14009 {
14010   enum rtx_code bypass_code, first_code, second_code;
14011   /* Return arbitrarily high cost when instruction is not preferred - this
14012      avoids gcc from using it.  */
14013   if (!(TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ())))
14014     return 1024;
14015   ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
14016   return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 3;
14017 }
14018 
14019 /* Compute cost of the comparison done using any method.
14020    See ix86_fp_comparison_arithmetics_cost for the metrics.  */
14021 static int
14022 ix86_fp_comparison_cost (enum rtx_code code)
14023 {
14024   int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
14025   int min;
14026 
14027   fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
14028   sahf_cost = ix86_fp_comparison_sahf_cost (code);
14029 
14030   min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
14031   if (min > sahf_cost)
14032     min = sahf_cost;
14033   if (min > fcomi_cost)
14034     min = fcomi_cost;
14035   return min;
14036 }
14037 
14038 /* Return true if we should use an FCOMI instruction for this
14039    fp comparison.  */
14040 
14041 int
14042 ix86_use_fcomi_compare (enum rtx_code code ATTRIBUTE_UNUSED)
14043 {
14044   enum rtx_code swapped_code = swap_condition (code);
14045 
14046   return ((ix86_fp_comparison_cost (code)
14047            == ix86_fp_comparison_fcomi_cost (code))
14048           || (ix86_fp_comparison_cost (swapped_code)
14049               == ix86_fp_comparison_fcomi_cost (swapped_code)));
14050 }
14051 
14052 /* Swap, force into registers, or otherwise massage the two operands
14053    to a fp comparison.  The operands are updated in place; the new
14054    comparison code is returned.  */
14055 
14056 static enum rtx_code
14057 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
14058 {
14059   enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
14060   rtx op0 = *pop0, op1 = *pop1;
14061   enum machine_mode op_mode = GET_MODE (op0);
14062   int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
14063 
14064   /* All of the unordered compare instructions only work on registers.
14065      The same is true of the fcomi compare instructions.  The XFmode
14066      compare instructions require registers except when comparing
14067      against zero or when converting operand 1 from fixed point to
14068      floating point.  */
14069 
14070   if (!is_sse
14071       && (fpcmp_mode == CCFPUmode
14072           || (op_mode == XFmode
14073               && ! (standard_80387_constant_p (op0) == 1
14074                     || standard_80387_constant_p (op1) == 1)
14075               && GET_CODE (op1) != FLOAT)
14076           || ix86_use_fcomi_compare (code)))
14077     {
14078       op0 = force_reg (op_mode, op0);
14079       op1 = force_reg (op_mode, op1);
14080     }
14081   else
14082     {
14083       /* %%% We only allow op1 in memory; op0 must be st(0).  So swap
14084          things around if they appear profitable, otherwise force op0
14085          into a register.  */
14086 
14087       if (standard_80387_constant_p (op0) == 0
14088           || (MEM_P (op0)
14089               && ! (standard_80387_constant_p (op1) == 0
14090                     || MEM_P (op1))))
14091         {
14092           rtx tmp;
14093           tmp = op0, op0 = op1, op1 = tmp;
14094           code = swap_condition (code);
14095         }
14096 
14097       if (!REG_P (op0))
14098         op0 = force_reg (op_mode, op0);
14099 
14100       if (CONSTANT_P (op1))
14101         {
14102           int tmp = standard_80387_constant_p (op1);
14103           if (tmp == 0)
14104             op1 = validize_mem (force_const_mem (op_mode, op1));
14105           else if (tmp == 1)
14106             {
14107               if (TARGET_CMOVE)
14108                 op1 = force_reg (op_mode, op1);
14109             }
14110           else
14111             op1 = force_reg (op_mode, op1);
14112         }
14113     }
14114 
14115   /* Try to rearrange the comparison to make it cheaper.  */
14116   if (ix86_fp_comparison_cost (code)
14117       > ix86_fp_comparison_cost (swap_condition (code))
14118       && (REG_P (op1) || can_create_pseudo_p ()))
14119     {
14120       rtx tmp;
14121       tmp = op0, op0 = op1, op1 = tmp;
14122       code = swap_condition (code);
14123       if (!REG_P (op0))
14124         op0 = force_reg (op_mode, op0);
14125     }
14126 
14127   *pop0 = op0;
14128   *pop1 = op1;
14129   return code;
14130 }
14131 
14132 /* Convert comparison codes we use to represent FP comparison to integer
14133    code that will result in proper branch.  Return UNKNOWN if no such code
14134    is available.  */
14135 
14136 enum rtx_code
14137 ix86_fp_compare_code_to_integer (enum rtx_code code)
14138 {
14139   switch (code)
14140     {
14141     case GT:
14142       return GTU;
14143     case GE:
14144       return GEU;
14145     case ORDERED:
14146     case UNORDERED:
14147       return code;
14148       break;
14149     case UNEQ:
14150       return EQ;
14151       break;
14152     case UNLT:
14153       return LTU;
14154       break;
14155     case UNLE:
14156       return LEU;
14157       break;
14158     case LTGT:
14159       return NE;
14160       break;
14161     default:
14162       return UNKNOWN;
14163     }
14164 }
14165 
14166 /* Generate insn patterns to do a floating point compare of OPERANDS.  */
14167 
14168 static rtx
14169 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch,
14170                         rtx *second_test, rtx *bypass_test)
14171 {
14172   enum machine_mode fpcmp_mode, intcmp_mode;
14173   rtx tmp, tmp2;
14174   int cost = ix86_fp_comparison_cost (code);
14175   enum rtx_code bypass_code, first_code, second_code;
14176 
14177   fpcmp_mode = ix86_fp_compare_mode (code);
14178   code = ix86_prepare_fp_compare_args (code, &op0, &op1);
14179 
14180   if (second_test)
14181     *second_test = NULL_RTX;
14182   if (bypass_test)
14183     *bypass_test = NULL_RTX;
14184 
14185   ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
14186 
14187   /* Do fcomi/sahf based test when profitable.  */
14188   if (ix86_fp_comparison_arithmetics_cost (code) > cost
14189       && (bypass_code == UNKNOWN || bypass_test)
14190       && (second_code == UNKNOWN || second_test))
14191     {
14192       tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
14193       tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
14194                          tmp);
14195       if (TARGET_CMOVE)
14196         emit_insn (tmp);
14197       else
14198         {
14199           gcc_assert (TARGET_SAHF);
14200 
14201           if (!scratch)
14202             scratch = gen_reg_rtx (HImode);
14203           tmp2 = gen_rtx_CLOBBER (VOIDmode, scratch);
14204 
14205           emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, tmp2)));
14206         }
14207 
14208       /* The FP codes work out to act like unsigned.  */
14209       intcmp_mode = fpcmp_mode;
14210       code = first_code;
14211       if (bypass_code != UNKNOWN)
14212         *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
14213                                        gen_rtx_REG (intcmp_mode, FLAGS_REG),
14214                                        const0_rtx);
14215       if (second_code != UNKNOWN)
14216         *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
14217                                        gen_rtx_REG (intcmp_mode, FLAGS_REG),
14218                                        const0_rtx);
14219     }
14220   else
14221     {
14222       /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first.  */
14223       tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
14224       tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
14225       if (!scratch)
14226         scratch = gen_reg_rtx (HImode);
14227       emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
14228 
14229       /* In the unordered case, we have to check C2 for NaN's, which
14230          doesn't happen to work out to anything nice combination-wise.
14231          So do some bit twiddling on the value we've got in AH to come
14232          up with an appropriate set of condition codes.  */
14233 
14234       intcmp_mode = CCNOmode;
14235       switch (code)
14236         {
14237         case GT:
14238         case UNGT:
14239           if (code == GT || !TARGET_IEEE_FP)
14240             {
14241               emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
14242               code = EQ;
14243             }
14244           else
14245             {
14246               emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
14247               emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
14248               emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
14249               intcmp_mode = CCmode;
14250               code = GEU;
14251             }
14252           break;
14253         case LT:
14254         case UNLT:
14255           if (code == LT && TARGET_IEEE_FP)
14256             {
14257               emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
14258               emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
14259               intcmp_mode = CCmode;
14260               code = EQ;
14261             }
14262           else
14263             {
14264               emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
14265               code = NE;
14266             }
14267           break;
14268         case GE:
14269         case UNGE:
14270           if (code == GE || !TARGET_IEEE_FP)
14271             {
14272               emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
14273               code = EQ;
14274             }
14275           else
14276             {
14277               emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
14278               emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
14279                                              GEN_INT (0x01)));
14280               code = NE;
14281             }
14282           break;
14283         case LE:
14284         case UNLE:
14285           if (code == LE && TARGET_IEEE_FP)
14286             {
14287               emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
14288               emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
14289               emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
14290               intcmp_mode = CCmode;
14291               code = LTU;
14292             }
14293           else
14294             {
14295               emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
14296               code = NE;
14297             }
14298           break;
14299         case EQ:
14300         case UNEQ:
14301           if (code == EQ && TARGET_IEEE_FP)
14302             {
14303               emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
14304               emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
14305               intcmp_mode = CCmode;
14306               code = EQ;
14307             }
14308           else
14309             {
14310               emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
14311               code = NE;
14312               break;
14313             }
14314           break;
14315         case NE:
14316         case LTGT:
14317           if (code == NE && TARGET_IEEE_FP)
14318             {
14319               emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
14320               emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
14321                                              GEN_INT (0x40)));
14322               code = NE;
14323             }
14324           else
14325             {
14326               emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
14327               code = EQ;
14328             }
14329           break;
14330 
14331         case UNORDERED:
14332           emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
14333           code = NE;
14334           break;
14335         case ORDERED:
14336           emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
14337           code = EQ;
14338           break;
14339 
14340         default:
14341           gcc_unreachable ();
14342         }
14343     }
14344 
14345   /* Return the test that should be put into the flags user, i.e.
14346      the bcc, scc, or cmov instruction.  */
14347   return gen_rtx_fmt_ee (code, VOIDmode,
14348                          gen_rtx_REG (intcmp_mode, FLAGS_REG),
14349                          const0_rtx);
14350 }
14351 
14352 rtx
14353 ix86_expand_compare (enum rtx_code code, rtx *second_test, rtx *bypass_test)
14354 {
14355   rtx op0, op1, ret;
14356   op0 = ix86_compare_op0;
14357   op1 = ix86_compare_op1;
14358 
14359   if (second_test)
14360     *second_test = NULL_RTX;
14361   if (bypass_test)
14362     *bypass_test = NULL_RTX;
14363 
14364   if (ix86_compare_emitted)
14365     {
14366       ret = gen_rtx_fmt_ee (code, VOIDmode, ix86_compare_emitted, const0_rtx);
14367       ix86_compare_emitted = NULL_RTX;
14368     }
14369   else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
14370     {
14371       gcc_assert (!DECIMAL_FLOAT_MODE_P (GET_MODE (op0)));
14372       ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
14373                                     second_test, bypass_test);
14374     }
14375   else
14376     ret = ix86_expand_int_compare (code, op0, op1);
14377 
14378   return ret;
14379 }
14380 
14381 /* Return true if the CODE will result in nontrivial jump sequence.  */
14382 bool
14383 ix86_fp_jump_nontrivial_p (enum rtx_code code)
14384 {
14385   enum rtx_code bypass_code, first_code, second_code;
14386   if (!TARGET_CMOVE)
14387     return true;
14388   ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
14389   return bypass_code != UNKNOWN || second_code != UNKNOWN;
14390 }
14391 
14392 void
14393 ix86_expand_branch (enum rtx_code code, rtx label)
14394 {
14395   rtx tmp;
14396 
14397   /* If we have emitted a compare insn, go straight to simple.
14398      ix86_expand_compare won't emit anything if ix86_compare_emitted
14399      is non NULL.  */
14400   if (ix86_compare_emitted)
14401     goto simple;
14402 
14403   switch (GET_MODE (ix86_compare_op0))
14404     {
14405     case QImode:
14406     case HImode:
14407     case SImode:
14408       simple:
14409       tmp = ix86_expand_compare (code, NULL, NULL);
14410       tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
14411                                   gen_rtx_LABEL_REF (VOIDmode, label),
14412                                   pc_rtx);
14413       emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
14414       return;
14415 
14416     case SFmode:
14417     case DFmode:
14418     case XFmode:
14419       {
14420         rtvec vec;
14421         int use_fcomi;
14422         enum rtx_code bypass_code, first_code, second_code;
14423 
14424         code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
14425                                              &ix86_compare_op1);
14426 
14427         ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
14428 
14429         /* Check whether we will use the natural sequence with one jump.  If
14430            so, we can expand jump early.  Otherwise delay expansion by
14431            creating compound insn to not confuse optimizers.  */
14432         if (bypass_code == UNKNOWN && second_code == UNKNOWN)
14433           {
14434             ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
14435                                   gen_rtx_LABEL_REF (VOIDmode, label),
14436                                   pc_rtx, NULL_RTX, NULL_RTX);
14437           }
14438         else
14439           {
14440             tmp = gen_rtx_fmt_ee (code, VOIDmode,
14441                                   ix86_compare_op0, ix86_compare_op1);
14442             tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
14443                                         gen_rtx_LABEL_REF (VOIDmode, label),
14444                                         pc_rtx);
14445             tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
14446 
14447             use_fcomi = ix86_use_fcomi_compare (code);
14448             vec = rtvec_alloc (3 + !use_fcomi);
14449             RTVEC_ELT (vec, 0) = tmp;
14450             RTVEC_ELT (vec, 1)
14451               = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, FPSR_REG));
14452             RTVEC_ELT (vec, 2)
14453               = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, FLAGS_REG));
14454             if (! use_fcomi)
14455               RTVEC_ELT (vec, 3)
14456                 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
14457 
14458             emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
14459           }
14460         return;
14461       }
14462 
14463     case DImode:
14464       if (TARGET_64BIT)
14465         goto simple;
14466     case TImode:
14467       /* Expand DImode branch into multiple compare+branch.  */
14468       {
14469         rtx lo[2], hi[2], label2;
14470         enum rtx_code code1, code2, code3;
14471         enum machine_mode submode;
14472 
14473         if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
14474           {
14475             tmp = ix86_compare_op0;
14476             ix86_compare_op0 = ix86_compare_op1;
14477             ix86_compare_op1 = tmp;
14478             code = swap_condition (code);
14479           }
14480         if (GET_MODE (ix86_compare_op0) == DImode)
14481           {
14482             split_di (&ix86_compare_op0, 1, lo+0, hi+0);
14483             split_di (&ix86_compare_op1, 1, lo+1, hi+1);
14484             submode = SImode;
14485           }
14486         else
14487           {
14488             split_ti (&ix86_compare_op0, 1, lo+0, hi+0);
14489             split_ti (&ix86_compare_op1, 1, lo+1, hi+1);
14490             submode = DImode;
14491           }
14492 
14493         /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
14494            avoid two branches.  This costs one extra insn, so disable when
14495            optimizing for size.  */
14496 
14497         if ((code == EQ || code == NE)
14498             && (!optimize_insn_for_size_p ()
14499                 || hi[1] == const0_rtx || lo[1] == const0_rtx))
14500           {
14501             rtx xor0, xor1;
14502 
14503             xor1 = hi[0];
14504             if (hi[1] != const0_rtx)
14505               xor1 = expand_binop (submode, xor_optab, xor1, hi[1],
14506                                    NULL_RTX, 0, OPTAB_WIDEN);
14507 
14508             xor0 = lo[0];
14509             if (lo[1] != const0_rtx)
14510               xor0 = expand_binop (submode, xor_optab, xor0, lo[1],
14511                                    NULL_RTX, 0, OPTAB_WIDEN);
14512 
14513             tmp = expand_binop (submode, ior_optab, xor1, xor0,
14514                                 NULL_RTX, 0, OPTAB_WIDEN);
14515 
14516             ix86_compare_op0 = tmp;
14517             ix86_compare_op1 = const0_rtx;
14518             ix86_expand_branch (code, label);
14519             return;
14520           }
14521 
14522         /* Otherwise, if we are doing less-than or greater-or-equal-than,
14523            op1 is a constant and the low word is zero, then we can just
14524            examine the high word.  Similarly for low word -1 and
14525            less-or-equal-than or greater-than.  */
14526 
14527         if (CONST_INT_P (hi[1]))
14528           switch (code)
14529             {
14530             case LT: case LTU: case GE: case GEU:
14531               if (lo[1] == const0_rtx)
14532                 {
14533                   ix86_compare_op0 = hi[0];
14534                   ix86_compare_op1 = hi[1];
14535                   ix86_expand_branch (code, label);
14536                   return;
14537                 }
14538               break;
14539             case LE: case LEU: case GT: case GTU:
14540               if (lo[1] == constm1_rtx)
14541                 {
14542                   ix86_compare_op0 = hi[0];
14543                   ix86_compare_op1 = hi[1];
14544                   ix86_expand_branch (code, label);
14545                   return;
14546                 }
14547               break;
14548             default:
14549               break;
14550             }
14551 
14552         /* Otherwise, we need two or three jumps.  */
14553 
14554         label2 = gen_label_rtx ();
14555 
14556         code1 = code;
14557         code2 = swap_condition (code);
14558         code3 = unsigned_condition (code);
14559 
14560         switch (code)
14561           {
14562           case LT: case GT: case LTU: case GTU:
14563             break;
14564 
14565           case LE:   code1 = LT;  code2 = GT;  break;
14566           case GE:   code1 = GT;  code2 = LT;  break;
14567           case LEU:  code1 = LTU; code2 = GTU; break;
14568           case GEU:  code1 = GTU; code2 = LTU; break;
14569 
14570           case EQ:   code1 = UNKNOWN; code2 = NE;  break;
14571           case NE:   code2 = UNKNOWN; break;
14572 
14573           default:
14574             gcc_unreachable ();
14575           }
14576 
14577         /*
14578          * a < b =>
14579          *    if (hi(a) < hi(b)) goto true;
14580          *    if (hi(a) > hi(b)) goto false;
14581          *    if (lo(a) < lo(b)) goto true;
14582          *  false:
14583          */
14584 
14585         ix86_compare_op0 = hi[0];
14586         ix86_compare_op1 = hi[1];
14587 
14588         if (code1 != UNKNOWN)
14589           ix86_expand_branch (code1, label);
14590         if (code2 != UNKNOWN)
14591           ix86_expand_branch (code2, label2);
14592 
14593         ix86_compare_op0 = lo[0];
14594         ix86_compare_op1 = lo[1];
14595         ix86_expand_branch (code3, label);
14596 
14597         if (code2 != UNKNOWN)
14598           emit_label (label2);
14599         return;
14600       }
14601 
14602     default:
14603       gcc_unreachable ();
14604     }
14605 }
14606 
14607 /* Split branch based on floating point condition.  */
14608 void
14609 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
14610                       rtx target1, rtx target2, rtx tmp, rtx pushed)
14611 {
14612   rtx second, bypass;
14613   rtx label = NULL_RTX;
14614   rtx condition;
14615   int bypass_probability = -1, second_probability = -1, probability = -1;
14616   rtx i;
14617 
14618   if (target2 != pc_rtx)
14619     {
14620       rtx tmp = target2;
14621       code = reverse_condition_maybe_unordered (code);
14622       target2 = target1;
14623       target1 = tmp;
14624     }
14625 
14626   condition = ix86_expand_fp_compare (code, op1, op2,
14627                                       tmp, &second, &bypass);
14628 
14629   /* Remove pushed operand from stack.  */
14630   if (pushed)
14631     ix86_free_from_memory (GET_MODE (pushed));
14632 
14633   if (split_branch_probability >= 0)
14634     {
14635       /* Distribute the probabilities across the jumps.
14636          Assume the BYPASS and SECOND to be always test
14637          for UNORDERED.  */
14638       probability = split_branch_probability;
14639 
14640       /* Value of 1 is low enough to make no need for probability
14641          to be updated.  Later we may run some experiments and see
14642          if unordered values are more frequent in practice.  */
14643       if (bypass)
14644         bypass_probability = 1;
14645       if (second)
14646         second_probability = 1;
14647     }
14648   if (bypass != NULL_RTX)
14649     {
14650       label = gen_label_rtx ();
14651       i = emit_jump_insn (gen_rtx_SET
14652                           (VOIDmode, pc_rtx,
14653                            gen_rtx_IF_THEN_ELSE (VOIDmode,
14654                                                  bypass,
14655                                                  gen_rtx_LABEL_REF (VOIDmode,
14656                                                                     label),
14657                                                  pc_rtx)));
14658       if (bypass_probability >= 0)
14659         REG_NOTES (i)
14660           = gen_rtx_EXPR_LIST (REG_BR_PROB,
14661                                GEN_INT (bypass_probability),
14662                                REG_NOTES (i));
14663     }
14664   i = emit_jump_insn (gen_rtx_SET
14665                       (VOIDmode, pc_rtx,
14666                        gen_rtx_IF_THEN_ELSE (VOIDmode,
14667                                              condition, target1, target2)));
14668   if (probability >= 0)
14669     REG_NOTES (i)
14670       = gen_rtx_EXPR_LIST (REG_BR_PROB,
14671                            GEN_INT (probability),
14672                            REG_NOTES (i));
14673   if (second != NULL_RTX)
14674     {
14675       i = emit_jump_insn (gen_rtx_SET
14676                           (VOIDmode, pc_rtx,
14677                            gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
14678                                                  target2)));
14679       if (second_probability >= 0)
14680         REG_NOTES (i)
14681           = gen_rtx_EXPR_LIST (REG_BR_PROB,
14682                                GEN_INT (second_probability),
14683                                REG_NOTES (i));
14684     }
14685   if (label != NULL_RTX)
14686     emit_label (label);
14687 }
14688 
14689 int
14690 ix86_expand_setcc (enum rtx_code code, rtx dest)
14691 {
14692   rtx ret, tmp, tmpreg, equiv;
14693   rtx second_test, bypass_test;
14694 
14695   if (GET_MODE (ix86_compare_op0) == (TARGET_64BIT ? TImode : DImode))
14696     return 0; /* FAIL */
14697 
14698   gcc_assert (GET_MODE (dest) == QImode);
14699 
14700   ret = ix86_expand_compare (code, &second_test, &bypass_test);
14701   PUT_MODE (ret, QImode);
14702 
14703   tmp = dest;
14704   tmpreg = dest;
14705 
14706   emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
14707   if (bypass_test || second_test)
14708     {
14709       rtx test = second_test;
14710       int bypass = 0;
14711       rtx tmp2 = gen_reg_rtx (QImode);
14712       if (bypass_test)
14713         {
14714           gcc_assert (!second_test);
14715           test = bypass_test;
14716           bypass = 1;
14717           PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
14718         }
14719       PUT_MODE (test, QImode);
14720       emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
14721 
14722       if (bypass)
14723         emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
14724       else
14725         emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
14726     }
14727 
14728   /* Attach a REG_EQUAL note describing the comparison result.  */
14729   if (ix86_compare_op0 && ix86_compare_op1)
14730     {
14731       equiv = simplify_gen_relational (code, QImode,
14732                                        GET_MODE (ix86_compare_op0),
14733                                        ix86_compare_op0, ix86_compare_op1);
14734       set_unique_reg_note (get_last_insn (), REG_EQUAL, equiv);
14735     }
14736 
14737   return 1; /* DONE */
14738 }
14739 
14740 /* Expand comparison setting or clearing carry flag.  Return true when
14741    successful and set pop for the operation.  */
14742 static bool
14743 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
14744 {
14745   enum machine_mode mode =
14746     GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
14747 
14748   /* Do not handle DImode compares that go through special path.  */
14749   if (mode == (TARGET_64BIT ? TImode : DImode))
14750     return false;
14751 
14752   if (SCALAR_FLOAT_MODE_P (mode))
14753     {
14754       rtx second_test = NULL, bypass_test = NULL;
14755       rtx compare_op, compare_seq;
14756 
14757       gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
14758 
14759       /* Shortcut:  following common codes never translate
14760          into carry flag compares.  */
14761       if (code == EQ || code == NE || code == UNEQ || code == LTGT
14762           || code == ORDERED || code == UNORDERED)
14763         return false;
14764 
14765       /* These comparisons require zero flag; swap operands so they won't.  */
14766       if ((code == GT || code == UNLE || code == LE || code == UNGT)
14767           && !TARGET_IEEE_FP)
14768         {
14769           rtx tmp = op0;
14770           op0 = op1;
14771           op1 = tmp;
14772           code = swap_condition (code);
14773         }
14774 
14775       /* Try to expand the comparison and verify that we end up with
14776          carry flag based comparison.  This fails to be true only when
14777          we decide to expand comparison using arithmetic that is not
14778          too common scenario.  */
14779       start_sequence ();
14780       compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
14781                                            &second_test, &bypass_test);
14782       compare_seq = get_insns ();
14783       end_sequence ();
14784 
14785       if (second_test || bypass_test)
14786         return false;
14787 
14788       if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
14789           || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
14790         code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
14791       else
14792         code = GET_CODE (compare_op);
14793 
14794       if (code != LTU && code != GEU)
14795         return false;
14796 
14797       emit_insn (compare_seq);
14798       *pop = compare_op;
14799       return true;
14800     }
14801 
14802   if (!INTEGRAL_MODE_P (mode))
14803     return false;
14804 
14805   switch (code)
14806     {
14807     case LTU:
14808     case GEU:
14809       break;
14810 
14811     /* Convert a==0 into (unsigned)a<1.  */
14812     case EQ:
14813     case NE:
14814       if (op1 != const0_rtx)
14815         return false;
14816       op1 = const1_rtx;
14817       code = (code == EQ ? LTU : GEU);
14818       break;
14819 
14820     /* Convert a>b into b<a or a>=b-1.  */
14821     case GTU:
14822     case LEU:
14823       if (CONST_INT_P (op1))
14824         {
14825           op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
14826           /* Bail out on overflow.  We still can swap operands but that
14827              would force loading of the constant into register.  */
14828           if (op1 == const0_rtx
14829               || !x86_64_immediate_operand (op1, GET_MODE (op1)))
14830             return false;
14831           code = (code == GTU ? GEU : LTU);
14832         }
14833       else
14834         {
14835           rtx tmp = op1;
14836           op1 = op0;
14837           op0 = tmp;
14838           code = (code == GTU ? LTU : GEU);
14839         }
14840       break;
14841 
14842     /* Convert a>=0 into (unsigned)a<0x80000000.  */
14843     case LT:
14844     case GE:
14845       if (mode == DImode || op1 != const0_rtx)
14846         return false;
14847       op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
14848       code = (code == LT ? GEU : LTU);
14849       break;
14850     case LE:
14851     case GT:
14852       if (mode == DImode || op1 != constm1_rtx)
14853         return false;
14854       op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
14855       code = (code == LE ? GEU : LTU);
14856       break;
14857 
14858     default:
14859       return false;
14860     }
14861   /* Swapping operands may cause constant to appear as first operand.  */
14862   if (!nonimmediate_operand (op0, VOIDmode))
14863     {
14864       if (!can_create_pseudo_p ())
14865         return false;
14866       op0 = force_reg (mode, op0);
14867     }
14868   ix86_compare_op0 = op0;
14869   ix86_compare_op1 = op1;
14870   *pop = ix86_expand_compare (code, NULL, NULL);
14871   gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU);
14872   return true;
14873 }
14874 
14875 int
14876 ix86_expand_int_movcc (rtx operands[])
14877 {
14878   enum rtx_code code = GET_CODE (operands[1]), compare_code;
14879   rtx compare_seq, compare_op;
14880   rtx second_test, bypass_test;
14881   enum machine_mode mode = GET_MODE (operands[0]);
14882   bool sign_bit_compare_p = false;;
14883 
14884   start_sequence ();
14885   compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
14886   compare_seq = get_insns ();
14887   end_sequence ();
14888 
14889   compare_code = GET_CODE (compare_op);
14890 
14891   if ((ix86_compare_op1 == const0_rtx && (code == GE || code == LT))
14892       || (ix86_compare_op1 == constm1_rtx && (code == GT || code == LE)))
14893     sign_bit_compare_p = true;
14894 
14895   /* Don't attempt mode expansion here -- if we had to expand 5 or 6
14896      HImode insns, we'd be swallowed in word prefix ops.  */
14897 
14898   if ((mode != HImode || TARGET_FAST_PREFIX)
14899       && (mode != (TARGET_64BIT ? TImode : DImode))
14900       && CONST_INT_P (operands[2])
14901       && CONST_INT_P (operands[3]))
14902     {
14903       rtx out = operands[0];
14904       HOST_WIDE_INT ct = INTVAL (operands[2]);
14905       HOST_WIDE_INT cf = INTVAL (operands[3]);
14906       HOST_WIDE_INT diff;
14907 
14908       diff = ct - cf;
14909       /*  Sign bit compares are better done using shifts than we do by using
14910           sbb.  */
14911       if (sign_bit_compare_p
14912           || ix86_expand_carry_flag_compare (code, ix86_compare_op0,
14913                                              ix86_compare_op1, &compare_op))
14914         {
14915           /* Detect overlap between destination and compare sources.  */
14916           rtx tmp = out;
14917 
14918           if (!sign_bit_compare_p)
14919             {
14920               bool fpcmp = false;
14921 
14922               compare_code = GET_CODE (compare_op);
14923 
14924               if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
14925                   || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
14926                 {
14927                   fpcmp = true;
14928                   compare_code = ix86_fp_compare_code_to_integer (compare_code);
14929                 }
14930 
14931               /* To simplify rest of code, restrict to the GEU case.  */
14932               if (compare_code == LTU)
14933                 {
14934                   HOST_WIDE_INT tmp = ct;
14935                   ct = cf;
14936                   cf = tmp;
14937                   compare_code = reverse_condition (compare_code);
14938                   code = reverse_condition (code);
14939                 }
14940               else
14941                 {
14942                   if (fpcmp)
14943                     PUT_CODE (compare_op,
14944                               reverse_condition_maybe_unordered
14945                                 (GET_CODE (compare_op)));
14946                   else
14947                     PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
14948                 }
14949               diff = ct - cf;
14950 
14951               if (reg_overlap_mentioned_p (out, ix86_compare_op0)
14952                   || reg_overlap_mentioned_p (out, ix86_compare_op1))
14953                 tmp = gen_reg_rtx (mode);
14954 
14955               if (mode == DImode)
14956                 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp, compare_op));
14957               else
14958                 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp), compare_op));
14959             }
14960           else
14961             {
14962               if (code == GT || code == GE)
14963                 code = reverse_condition (code);
14964               else
14965                 {
14966                   HOST_WIDE_INT tmp = ct;
14967                   ct = cf;
14968                   cf = tmp;
14969                   diff = ct - cf;
14970                 }
14971               tmp = emit_store_flag (tmp, code, ix86_compare_op0,
14972                                      ix86_compare_op1, VOIDmode, 0, -1);
14973             }
14974 
14975           if (diff == 1)
14976             {
14977               /*
14978                * cmpl op0,op1
14979                * sbbl dest,dest
14980                * [addl dest, ct]
14981                *
14982                * Size 5 - 8.
14983                */
14984               if (ct)
14985                 tmp = expand_simple_binop (mode, PLUS,
14986                                            tmp, GEN_INT (ct),
14987                                            copy_rtx (tmp), 1, OPTAB_DIRECT);
14988             }
14989           else if (cf == -1)
14990             {
14991               /*
14992                * cmpl op0,op1
14993                * sbbl dest,dest
14994                * orl $ct, dest
14995                *
14996                * Size 8.
14997                */
14998               tmp = expand_simple_binop (mode, IOR,
14999                                          tmp, GEN_INT (ct),
15000                                          copy_rtx (tmp), 1, OPTAB_DIRECT);
15001             }
15002           else if (diff == -1 && ct)
15003             {
15004               /*
15005                * cmpl op0,op1
15006                * sbbl dest,dest
15007                * notl dest
15008                * [addl dest, cf]
15009                *
15010                * Size 8 - 11.
15011                */
15012               tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
15013               if (cf)
15014                 tmp = expand_simple_binop (mode, PLUS,
15015                                            copy_rtx (tmp), GEN_INT (cf),
15016                                            copy_rtx (tmp), 1, OPTAB_DIRECT);
15017             }
15018           else
15019             {
15020               /*
15021                * cmpl op0,op1
15022                * sbbl dest,dest
15023                * [notl dest]
15024                * andl cf - ct, dest
15025                * [addl dest, ct]
15026                *
15027                * Size 8 - 11.
15028                */
15029 
15030               if (cf == 0)
15031                 {
15032                   cf = ct;
15033                   ct = 0;
15034                   tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
15035                 }
15036 
15037               tmp = expand_simple_binop (mode, AND,
15038                                          copy_rtx (tmp),
15039                                          gen_int_mode (cf - ct, mode),
15040                                          copy_rtx (tmp), 1, OPTAB_DIRECT);
15041               if (ct)
15042                 tmp = expand_simple_binop (mode, PLUS,
15043                                            copy_rtx (tmp), GEN_INT (ct),
15044                                            copy_rtx (tmp), 1, OPTAB_DIRECT);
15045             }
15046 
15047           if (!rtx_equal_p (tmp, out))
15048             emit_move_insn (copy_rtx (out), copy_rtx (tmp));
15049 
15050           return 1; /* DONE */
15051         }
15052 
15053       if (diff < 0)
15054         {
15055           enum machine_mode cmp_mode = GET_MODE (ix86_compare_op0);
15056 
15057           HOST_WIDE_INT tmp;
15058           tmp = ct, ct = cf, cf = tmp;
15059           diff = -diff;
15060 
15061           if (SCALAR_FLOAT_MODE_P (cmp_mode))
15062             {
15063               gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
15064 
15065               /* We may be reversing unordered compare to normal compare, that
15066                  is not valid in general (we may convert non-trapping condition
15067                  to trapping one), however on i386 we currently emit all
15068                  comparisons unordered.  */
15069               compare_code = reverse_condition_maybe_unordered (compare_code);
15070               code = reverse_condition_maybe_unordered (code);
15071             }
15072           else
15073             {
15074               compare_code = reverse_condition (compare_code);
15075               code = reverse_condition (code);
15076             }
15077         }
15078 
15079       compare_code = UNKNOWN;
15080       if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT
15081           && CONST_INT_P (ix86_compare_op1))
15082         {
15083           if (ix86_compare_op1 == const0_rtx
15084               && (code == LT || code == GE))
15085             compare_code = code;
15086           else if (ix86_compare_op1 == constm1_rtx)
15087             {
15088               if (code == LE)
15089                 compare_code = LT;
15090               else if (code == GT)
15091                 compare_code = GE;
15092             }
15093         }
15094 
15095       /* Optimize dest = (op0 < 0) ? -1 : cf.  */
15096       if (compare_code != UNKNOWN
15097           && GET_MODE (ix86_compare_op0) == GET_MODE (out)
15098           && (cf == -1 || ct == -1))
15099         {
15100           /* If lea code below could be used, only optimize
15101              if it results in a 2 insn sequence.  */
15102 
15103           if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
15104                  || diff == 3 || diff == 5 || diff == 9)
15105               || (compare_code == LT && ct == -1)
15106               || (compare_code == GE && cf == -1))
15107             {
15108               /*
15109                * notl op1       (if necessary)
15110                * sarl $31, op1
15111                * orl cf, op1
15112                */
15113               if (ct != -1)
15114                 {
15115                   cf = ct;
15116                   ct = -1;
15117                   code = reverse_condition (code);
15118                 }
15119 
15120               out = emit_store_flag (out, code, ix86_compare_op0,
15121                                      ix86_compare_op1, VOIDmode, 0, -1);
15122 
15123               out = expand_simple_binop (mode, IOR,
15124                                          out, GEN_INT (cf),
15125                                          out, 1, OPTAB_DIRECT);
15126               if (out != operands[0])
15127                 emit_move_insn (operands[0], out);
15128 
15129               return 1; /* DONE */
15130             }
15131         }
15132 
15133 
15134       if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
15135            || diff == 3 || diff == 5 || diff == 9)
15136           && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
15137           && (mode != DImode
15138               || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
15139         {
15140           /*
15141            * xorl dest,dest
15142            * cmpl op1,op2
15143            * setcc dest
15144            * lea cf(dest*(ct-cf)),dest
15145            *
15146            * Size 14.
15147            *
15148            * This also catches the degenerate setcc-only case.
15149            */
15150 
15151           rtx tmp;
15152           int nops;
15153 
15154           out = emit_store_flag (out, code, ix86_compare_op0,
15155                                  ix86_compare_op1, VOIDmode, 0, 1);
15156 
15157           nops = 0;
15158           /* On x86_64 the lea instruction operates on Pmode, so we need
15159              to get arithmetics done in proper mode to match.  */
15160           if (diff == 1)
15161             tmp = copy_rtx (out);
15162           else
15163             {
15164               rtx out1;
15165               out1 = copy_rtx (out);
15166               tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
15167               nops++;
15168               if (diff & 1)
15169                 {
15170                   tmp = gen_rtx_PLUS (mode, tmp, out1);
15171                   nops++;
15172                 }
15173             }
15174           if (cf != 0)
15175             {
15176               tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
15177               nops++;
15178             }
15179           if (!rtx_equal_p (tmp, out))
15180             {
15181               if (nops == 1)
15182                 out = force_operand (tmp, copy_rtx (out));
15183               else
15184                 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
15185             }
15186           if (!rtx_equal_p (out, operands[0]))
15187             emit_move_insn (operands[0], copy_rtx (out));
15188 
15189           return 1; /* DONE */
15190         }
15191 
15192       /*
15193        * General case:                  Jumpful:
15194        *   xorl dest,dest               cmpl op1, op2
15195        *   cmpl op1, op2                movl ct, dest
15196        *   setcc dest                   jcc 1f
15197        *   decl dest                    movl cf, dest
15198        *   andl (cf-ct),dest            1:
15199        *   addl ct,dest
15200        *
15201        * Size 20.                       Size 14.
15202        *
15203        * This is reasonably steep, but branch mispredict costs are
15204        * high on modern cpus, so consider failing only if optimizing
15205        * for space.
15206        */
15207 
15208       if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
15209           && BRANCH_COST (optimize_insn_for_speed_p (),
15210                           false) >= 2)
15211         {
15212           if (cf == 0)
15213             {
15214               enum machine_mode cmp_mode = GET_MODE (ix86_compare_op0);
15215 
15216               cf = ct;
15217               ct = 0;
15218 
15219               if (SCALAR_FLOAT_MODE_P (cmp_mode))
15220                 {
15221                   gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
15222 
15223                   /* We may be reversing unordered compare to normal compare,
15224                      that is not valid in general (we may convert non-trapping
15225                      condition to trapping one), however on i386 we currently
15226                      emit all comparisons unordered.  */
15227                   code = reverse_condition_maybe_unordered (code);
15228                 }
15229               else
15230                 {
15231                   code = reverse_condition (code);
15232                   if (compare_code != UNKNOWN)
15233                     compare_code = reverse_condition (compare_code);
15234                 }
15235             }
15236 
15237           if (compare_code != UNKNOWN)
15238             {
15239               /* notl op1       (if needed)
15240                  sarl $31, op1
15241                  andl (cf-ct), op1
15242                  addl ct, op1
15243 
15244                  For x < 0 (resp. x <= -1) there will be no notl,
15245                  so if possible swap the constants to get rid of the
15246                  complement.
15247                  True/false will be -1/0 while code below (store flag
15248                  followed by decrement) is 0/-1, so the constants need
15249                  to be exchanged once more.  */
15250 
15251               if (compare_code == GE || !cf)
15252                 {
15253                   code = reverse_condition (code);
15254                   compare_code = LT;
15255                 }
15256               else
15257                 {
15258                   HOST_WIDE_INT tmp = cf;
15259                   cf = ct;
15260                   ct = tmp;
15261                 }
15262 
15263               out = emit_store_flag (out, code, ix86_compare_op0,
15264                                      ix86_compare_op1, VOIDmode, 0, -1);
15265             }
15266           else
15267             {
15268               out = emit_store_flag (out, code, ix86_compare_op0,
15269                                      ix86_compare_op1, VOIDmode, 0, 1);
15270 
15271               out = expand_simple_binop (mode, PLUS, copy_rtx (out), constm1_rtx,
15272                                          copy_rtx (out), 1, OPTAB_DIRECT);
15273             }
15274 
15275           out = expand_simple_binop (mode, AND, copy_rtx (out),
15276                                      gen_int_mode (cf - ct, mode),
15277                                      copy_rtx (out), 1, OPTAB_DIRECT);
15278           if (ct)
15279             out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
15280                                        copy_rtx (out), 1, OPTAB_DIRECT);
15281           if (!rtx_equal_p (out, operands[0]))
15282             emit_move_insn (operands[0], copy_rtx (out));
15283 
15284           return 1; /* DONE */
15285         }
15286     }
15287 
15288   if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
15289     {
15290       /* Try a few things more with specific constants and a variable.  */
15291 
15292       optab op;
15293       rtx var, orig_out, out, tmp;
15294 
15295       if (BRANCH_COST (optimize_insn_for_speed_p (), false) <= 2)
15296         return 0; /* FAIL */
15297 
15298       /* If one of the two operands is an interesting constant, load a
15299          constant with the above and mask it in with a logical operation.  */
15300 
15301       if (CONST_INT_P (operands[2]))
15302         {
15303           var = operands[3];
15304           if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
15305             operands[3] = constm1_rtx, op = and_optab;
15306           else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
15307             operands[3] = const0_rtx, op = ior_optab;
15308           else
15309             return 0; /* FAIL */
15310         }
15311       else if (CONST_INT_P (operands[3]))
15312         {
15313           var = operands[2];
15314           if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
15315             operands[2] = constm1_rtx, op = and_optab;
15316           else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
15317             operands[2] = const0_rtx, op = ior_optab;
15318           else
15319             return 0; /* FAIL */
15320         }
15321       else
15322         return 0; /* FAIL */
15323 
15324       orig_out = operands[0];
15325       tmp = gen_reg_rtx (mode);
15326       operands[0] = tmp;
15327 
15328       /* Recurse to get the constant loaded.  */
15329       if (ix86_expand_int_movcc (operands) == 0)
15330         return 0; /* FAIL */
15331 
15332       /* Mask in the interesting variable.  */
15333       out = expand_binop (mode, op, var, tmp, orig_out, 0,
15334                           OPTAB_WIDEN);
15335       if (!rtx_equal_p (out, orig_out))
15336         emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
15337 
15338       return 1; /* DONE */
15339     }
15340 
15341   /*
15342    * For comparison with above,
15343    *
15344    * movl cf,dest
15345    * movl ct,tmp
15346    * cmpl op1,op2
15347    * cmovcc tmp,dest
15348    *
15349    * Size 15.
15350    */
15351 
15352   if (! nonimmediate_operand (operands[2], mode))
15353     operands[2] = force_reg (mode, operands[2]);
15354   if (! nonimmediate_operand (operands[3], mode))
15355     operands[3] = force_reg (mode, operands[3]);
15356 
15357   if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
15358     {
15359       rtx tmp = gen_reg_rtx (mode);
15360       emit_move_insn (tmp, operands[3]);
15361       operands[3] = tmp;
15362     }
15363   if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
15364     {
15365       rtx tmp = gen_reg_rtx (mode);
15366       emit_move_insn (tmp, operands[2]);
15367       operands[2] = tmp;
15368     }
15369 
15370   if (! register_operand (operands[2], VOIDmode)
15371       && (mode == QImode
15372           || ! register_operand (operands[3], VOIDmode)))
15373     operands[2] = force_reg (mode, operands[2]);
15374 
15375   if (mode == QImode
15376       && ! register_operand (operands[3], VOIDmode))
15377     operands[3] = force_reg (mode, operands[3]);
15378 
15379   emit_insn (compare_seq);
15380   emit_insn (gen_rtx_SET (VOIDmode, operands[0],
15381                           gen_rtx_IF_THEN_ELSE (mode,
15382                                                 compare_op, operands[2],
15383                                                 operands[3])));
15384   if (bypass_test)
15385     emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
15386                             gen_rtx_IF_THEN_ELSE (mode,
15387                                   bypass_test,
15388                                   copy_rtx (operands[3]),
15389                                   copy_rtx (operands[0]))));
15390   if (second_test)
15391     emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
15392                             gen_rtx_IF_THEN_ELSE (mode,
15393                                   second_test,
15394                                   copy_rtx (operands[2]),
15395                                   copy_rtx (operands[0]))));
15396 
15397   return 1; /* DONE */
15398 }
15399 
15400 /* Swap, force into registers, or otherwise massage the two operands
15401    to an sse comparison with a mask result.  Thus we differ a bit from
15402    ix86_prepare_fp_compare_args which expects to produce a flags result.
15403 
15404    The DEST operand exists to help determine whether to commute commutative
15405    operators.  The POP0/POP1 operands are updated in place.  The new
15406    comparison code is returned, or UNKNOWN if not implementable.  */
15407 
15408 static enum rtx_code
15409 ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code,
15410                                   rtx *pop0, rtx *pop1)
15411 {
15412   rtx tmp;
15413 
15414   switch (code)
15415     {
15416     case LTGT:
15417     case UNEQ:
15418       /* We have no LTGT as an operator.  We could implement it with
15419          NE & ORDERED, but this requires an extra temporary.  It's
15420          not clear that it's worth it.  */
15421       return UNKNOWN;
15422 
15423     case LT:
15424     case LE:
15425     case UNGT:
15426     case UNGE:
15427       /* These are supported directly.  */
15428       break;
15429 
15430     case EQ:
15431     case NE:
15432     case UNORDERED:
15433     case ORDERED:
15434       /* For commutative operators, try to canonicalize the destination
15435          operand to be first in the comparison - this helps reload to
15436          avoid extra moves.  */
15437       if (!dest || !rtx_equal_p (dest, *pop1))
15438         break;
15439       /* FALLTHRU */
15440 
15441     case GE:
15442     case GT:
15443     case UNLE:
15444     case UNLT:
15445       /* These are not supported directly.  Swap the comparison operands
15446          to transform into something that is supported.  */
15447       tmp = *pop0;
15448       *pop0 = *pop1;
15449       *pop1 = tmp;
15450       code = swap_condition (code);
15451       break;
15452 
15453     default:
15454       gcc_unreachable ();
15455     }
15456 
15457   return code;
15458 }
15459 
15460 /* Detect conditional moves that exactly match min/max operational
15461    semantics.  Note that this is IEEE safe, as long as we don't
15462    interchange the operands.
15463 
15464    Returns FALSE if this conditional move doesn't match a MIN/MAX,
15465    and TRUE if the operation is successful and instructions are emitted.  */
15466 
15467 static bool
15468 ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
15469                            rtx cmp_op1, rtx if_true, rtx if_false)
15470 {
15471   enum machine_mode mode;
15472   bool is_min;
15473   rtx tmp;
15474 
15475   if (code == LT)
15476     ;
15477   else if (code == UNGE)
15478     {
15479       tmp = if_true;
15480       if_true = if_false;
15481       if_false = tmp;
15482     }
15483   else
15484     return false;
15485 
15486   if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false))
15487     is_min = true;
15488   else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false))
15489     is_min = false;
15490   else
15491     return false;
15492 
15493   mode = GET_MODE (dest);
15494 
15495   /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
15496      but MODE may be a vector mode and thus not appropriate.  */
15497   if (!flag_finite_math_only || !flag_unsafe_math_optimizations)
15498     {
15499       int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
15500       rtvec v;
15501 
15502       if_true = force_reg (mode, if_true);
15503       v = gen_rtvec (2, if_true, if_false);
15504       tmp = gen_rtx_UNSPEC (mode, v, u);
15505     }
15506   else
15507     {
15508       code = is_min ? SMIN : SMAX;
15509       tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false);
15510     }
15511 
15512   emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
15513   return true;
15514 }
15515 
15516 /* Expand an sse vector comparison.  Return the register with the result.  */
15517 
15518 static rtx
15519 ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
15520                      rtx op_true, rtx op_false)
15521 {
15522   enum machine_mode mode = GET_MODE (dest);
15523   rtx x;
15524 
15525   cmp_op0 = force_reg (mode, cmp_op0);
15526   if (!nonimmediate_operand (cmp_op1, mode))
15527     cmp_op1 = force_reg (mode, cmp_op1);
15528 
15529   if (optimize
15530       || reg_overlap_mentioned_p (dest, op_true)
15531       || reg_overlap_mentioned_p (dest, op_false))
15532     dest = gen_reg_rtx (mode);
15533 
15534   x = gen_rtx_fmt_ee (code, mode, cmp_op0, cmp_op1);
15535   emit_insn (gen_rtx_SET (VOIDmode, dest, x));
15536 
15537   return dest;
15538 }
15539 
15540 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
15541    operations.  This is used for both scalar and vector conditional moves.  */
15542 
15543 static void
15544 ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
15545 {
15546   enum machine_mode mode = GET_MODE (dest);
15547   rtx t2, t3, x;
15548 
15549   if (op_false == CONST0_RTX (mode))
15550     {
15551       op_true = force_reg (mode, op_true);
15552       x = gen_rtx_AND (mode, cmp, op_true);
15553       emit_insn (gen_rtx_SET (VOIDmode, dest, x));
15554     }
15555   else if (op_true == CONST0_RTX (mode))
15556     {
15557       op_false = force_reg (mode, op_false);
15558       x = gen_rtx_NOT (mode, cmp);
15559       x = gen_rtx_AND (mode, x, op_false);
15560       emit_insn (gen_rtx_SET (VOIDmode, dest, x));
15561     }
15562   else if (TARGET_SSE5)
15563     {
15564       rtx pcmov = gen_rtx_SET (mode, dest,
15565                                gen_rtx_IF_THEN_ELSE (mode, cmp,
15566                                                      op_true,
15567                                                      op_false));
15568       emit_insn (pcmov);
15569     }
15570   else
15571     {
15572       op_true = force_reg (mode, op_true);
15573       op_false = force_reg (mode, op_false);
15574 
15575       t2 = gen_reg_rtx (mode);
15576       if (optimize)
15577         t3 = gen_reg_rtx (mode);
15578       else
15579         t3 = dest;
15580 
15581       x = gen_rtx_AND (mode, op_true, cmp);
15582       emit_insn (gen_rtx_SET (VOIDmode, t2, x));
15583 
15584       x = gen_rtx_NOT (mode, cmp);
15585       x = gen_rtx_AND (mode, x, op_false);
15586       emit_insn (gen_rtx_SET (VOIDmode, t3, x));
15587 
15588       x = gen_rtx_IOR (mode, t3, t2);
15589       emit_insn (gen_rtx_SET (VOIDmode, dest, x));
15590     }
15591 }
15592 
15593 /* Expand a floating-point conditional move.  Return true if successful.  */
15594 
15595 int
15596 ix86_expand_fp_movcc (rtx operands[])
15597 {
15598   enum machine_mode mode = GET_MODE (operands[0]);
15599   enum rtx_code code = GET_CODE (operands[1]);
15600   rtx tmp, compare_op, second_test, bypass_test;
15601 
15602   if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
15603     {
15604       enum machine_mode cmode;
15605 
15606       /* Since we've no cmove for sse registers, don't force bad register
15607          allocation just to gain access to it.  Deny movcc when the
15608          comparison mode doesn't match the move mode.  */
15609       cmode = GET_MODE (ix86_compare_op0);
15610       if (cmode == VOIDmode)
15611         cmode = GET_MODE (ix86_compare_op1);
15612       if (cmode != mode)
15613         return 0;
15614 
15615       code = ix86_prepare_sse_fp_compare_args (operands[0], code,
15616                                                &ix86_compare_op0,
15617                                                &ix86_compare_op1);
15618       if (code == UNKNOWN)
15619         return 0;
15620 
15621       if (ix86_expand_sse_fp_minmax (operands[0], code, ix86_compare_op0,
15622                                      ix86_compare_op1, operands[2],
15623                                      operands[3]))
15624         return 1;
15625 
15626       tmp = ix86_expand_sse_cmp (operands[0], code, ix86_compare_op0,
15627                                  ix86_compare_op1, operands[2], operands[3]);
15628       ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]);
15629       return 1;
15630     }
15631 
15632   /* The floating point conditional move instructions don't directly
15633      support conditions resulting from a signed integer comparison.  */
15634 
15635   compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
15636 
15637   /* The floating point conditional move instructions don't directly
15638      support signed integer comparisons.  */
15639 
15640   if (!fcmov_comparison_operator (compare_op, VOIDmode))
15641     {
15642       gcc_assert (!second_test && !bypass_test);
15643       tmp = gen_reg_rtx (QImode);
15644       ix86_expand_setcc (code, tmp);
15645       code = NE;
15646       ix86_compare_op0 = tmp;
15647       ix86_compare_op1 = const0_rtx;
15648       compare_op = ix86_expand_compare (code,  &second_test, &bypass_test);
15649     }
15650   if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
15651     {
15652       tmp = gen_reg_rtx (mode);
15653       emit_move_insn (tmp, operands[3]);
15654       operands[3] = tmp;
15655     }
15656   if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
15657     {
15658       tmp = gen_reg_rtx (mode);
15659       emit_move_insn (tmp, operands[2]);
15660       operands[2] = tmp;
15661     }
15662 
15663   emit_insn (gen_rtx_SET (VOIDmode, operands[0],
15664                           gen_rtx_IF_THEN_ELSE (mode, compare_op,
15665                                                 operands[2], operands[3])));
15666   if (bypass_test)
15667     emit_insn (gen_rtx_SET (VOIDmode, operands[0],
15668                             gen_rtx_IF_THEN_ELSE (mode, bypass_test,
15669                                                   operands[3], operands[0])));
15670   if (second_test)
15671     emit_insn (gen_rtx_SET (VOIDmode, operands[0],
15672                             gen_rtx_IF_THEN_ELSE (mode, second_test,
15673                                                   operands[2], operands[0])));
15674 
15675   return 1;
15676 }
15677 
15678 /* Expand a floating-point vector conditional move; a vcond operation
15679    rather than a movcc operation.  */
15680 
15681 bool
15682 ix86_expand_fp_vcond (rtx operands[])
15683 {
15684   enum rtx_code code = GET_CODE (operands[3]);
15685   rtx cmp;
15686 
15687   code = ix86_prepare_sse_fp_compare_args (operands[0], code,
15688                                            &operands[4], &operands[5]);
15689   if (code == UNKNOWN)
15690     return false;
15691 
15692   if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
15693                                  operands[5], operands[1], operands[2]))
15694     return true;
15695 
15696   cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
15697                              operands[1], operands[2]);
15698   ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
15699   return true;
15700 }
15701 
15702 /* Expand a signed/unsigned integral vector conditional move.  */
15703 
15704 bool
15705 ix86_expand_int_vcond (rtx operands[])
15706 {
15707   enum machine_mode mode = GET_MODE (operands[0]);
15708   enum rtx_code code = GET_CODE (operands[3]);
15709   bool negate = false;
15710   rtx x, cop0, cop1;
15711 
15712   cop0 = operands[4];
15713   cop1 = operands[5];
15714 
15715   /* SSE5 supports all of the comparisons on all vector int types.  */
15716   if (!TARGET_SSE5)
15717     {
15718       /* Canonicalize the comparison to EQ, GT, GTU.  */
15719       switch (code)
15720         {
15721         case EQ:
15722         case GT:
15723         case GTU:
15724           break;
15725 
15726         case NE:
15727         case LE:
15728         case LEU:
15729           code = reverse_condition (code);
15730           negate = true;
15731           break;
15732 
15733         case GE:
15734         case GEU:
15735           code = reverse_condition (code);
15736           negate = true;
15737           /* FALLTHRU */
15738 
15739         case LT:
15740         case LTU:
15741           code = swap_condition (code);
15742           x = cop0, cop0 = cop1, cop1 = x;
15743           break;
15744 
15745         default:
15746           gcc_unreachable ();
15747         }
15748 
15749       /* Only SSE4.1/SSE4.2 supports V2DImode.  */
15750       if (mode == V2DImode)
15751         {
15752           switch (code)
15753             {
15754             case EQ:
15755               /* SSE4.1 supports EQ.  */
15756               if (!TARGET_SSE4_1)
15757                 return false;
15758               break;
15759 
15760             case GT:
15761             case GTU:
15762               /* SSE4.2 supports GT/GTU.  */
15763               if (!TARGET_SSE4_2)
15764                 return false;
15765               break;
15766 
15767             default:
15768               gcc_unreachable ();
15769             }
15770         }
15771 
15772       /* Unsigned parallel compare is not supported by the hardware.
15773          Play some tricks to turn this into a signed comparison
15774          against 0.  */
15775       if (code == GTU)
15776         {
15777           cop0 = force_reg (mode, cop0);
15778 
15779           switch (mode)
15780             {
15781             case V4SImode:
15782             case V2DImode:
15783                 {
15784                   rtx t1, t2, mask;
15785                   rtx (*gen_sub3) (rtx, rtx, rtx);
15786 
15787                   /* Subtract (-(INT MAX) - 1) from both operands to make
15788                      them signed.  */
15789                   mask = ix86_build_signbit_mask (GET_MODE_INNER (mode),
15790                                                   true, false);
15791                   gen_sub3 = (mode == V4SImode
15792                               ? gen_subv4si3 : gen_subv2di3);
15793                   t1 = gen_reg_rtx (mode);
15794                   emit_insn (gen_sub3 (t1, cop0, mask));
15795 
15796                   t2 = gen_reg_rtx (mode);
15797                   emit_insn (gen_sub3 (t2, cop1, mask));
15798 
15799                   cop0 = t1;
15800                   cop1 = t2;
15801                   code = GT;
15802                 }
15803               break;
15804 
15805             case V16QImode:
15806             case V8HImode:
15807               /* Perform a parallel unsigned saturating subtraction.  */
15808               x = gen_reg_rtx (mode);
15809               emit_insn (gen_rtx_SET (VOIDmode, x,
15810                                       gen_rtx_US_MINUS (mode, cop0, cop1)));
15811 
15812               cop0 = x;
15813               cop1 = CONST0_RTX (mode);
15814               code = EQ;
15815               negate = !negate;
15816               break;
15817 
15818             default:
15819               gcc_unreachable ();
15820             }
15821         }
15822     }
15823 
15824   x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1,
15825                            operands[1+negate], operands[2-negate]);
15826 
15827   ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
15828                          operands[2-negate]);
15829   return true;
15830 }
15831 
15832 /* Unpack OP[1] into the next wider integer vector type.  UNSIGNED_P is
15833    true if we should do zero extension, else sign extension.  HIGH_P is
15834    true if we want the N/2 high elements, else the low elements.  */
15835 
15836 void
15837 ix86_expand_sse_unpack (rtx operands[2], bool unsigned_p, bool high_p)
15838 {
15839   enum machine_mode imode = GET_MODE (operands[1]);
15840   rtx (*unpack)(rtx, rtx, rtx);
15841   rtx se, dest;
15842 
15843   switch (imode)
15844     {
15845     case V16QImode:
15846       if (high_p)
15847         unpack = gen_vec_interleave_highv16qi;
15848       else
15849         unpack = gen_vec_interleave_lowv16qi;
15850       break;
15851     case V8HImode:
15852       if (high_p)
15853         unpack = gen_vec_interleave_highv8hi;
15854       else
15855         unpack = gen_vec_interleave_lowv8hi;
15856       break;
15857     case V4SImode:
15858       if (high_p)
15859         unpack = gen_vec_interleave_highv4si;
15860       else
15861         unpack = gen_vec_interleave_lowv4si;
15862       break;
15863     default:
15864       gcc_unreachable ();
15865     }
15866 
15867   dest = gen_lowpart (imode, operands[0]);
15868 
15869   if (unsigned_p)
15870     se = force_reg (imode, CONST0_RTX (imode));
15871   else
15872     se = ix86_expand_sse_cmp (gen_reg_rtx (imode), GT, CONST0_RTX (imode),
15873                               operands[1], pc_rtx, pc_rtx);
15874 
15875   emit_insn (unpack (dest, operands[1], se));
15876 }
15877 
15878 /* This function performs the same task as ix86_expand_sse_unpack,
15879    but with SSE4.1 instructions.  */
15880 
15881 void
15882 ix86_expand_sse4_unpack (rtx operands[2], bool unsigned_p, bool high_p)
15883 {
15884   enum machine_mode imode = GET_MODE (operands[1]);
15885   rtx (*unpack)(rtx, rtx);
15886   rtx src, dest;
15887 
15888   switch (imode)
15889     {
15890     case V16QImode:
15891       if (unsigned_p)
15892         unpack = gen_sse4_1_zero_extendv8qiv8hi2;
15893       else
15894         unpack = gen_sse4_1_extendv8qiv8hi2;
15895       break;
15896     case V8HImode:
15897       if (unsigned_p)
15898         unpack = gen_sse4_1_zero_extendv4hiv4si2;
15899       else
15900         unpack = gen_sse4_1_extendv4hiv4si2;
15901       break;
15902     case V4SImode:
15903       if (unsigned_p)
15904         unpack = gen_sse4_1_zero_extendv2siv2di2;
15905       else
15906         unpack = gen_sse4_1_extendv2siv2di2;
15907       break;
15908     default:
15909       gcc_unreachable ();
15910     }
15911 
15912   dest = operands[0];
15913   if (high_p)
15914     {
15915       /* Shift higher 8 bytes to lower 8 bytes.  */
15916       src = gen_reg_rtx (imode);
15917       emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, src),
15918                                    gen_lowpart (TImode, operands[1]),
15919                                    GEN_INT (64)));
15920     }
15921   else
15922     src = operands[1];
15923 
15924   emit_insn (unpack (dest, src));
15925 }
15926 
15927 /* This function performs the same task as ix86_expand_sse_unpack,
15928    but with sse5 instructions.  */
15929 
15930 void
15931 ix86_expand_sse5_unpack (rtx operands[2], bool unsigned_p, bool high_p)
15932 {
15933   enum machine_mode imode = GET_MODE (operands[1]);
15934   int pperm_bytes[16];
15935   int i;
15936   int h = (high_p) ? 8 : 0;
15937   int h2;
15938   int sign_extend;
15939   rtvec v = rtvec_alloc (16);
15940   rtvec vs;
15941   rtx x, p;
15942   rtx op0 = operands[0], op1 = operands[1];
15943 
15944   switch (imode)
15945     {
15946     case V16QImode:
15947       vs = rtvec_alloc (8);
15948       h2 = (high_p) ? 8 : 0;
15949       for (i = 0; i < 8; i++)
15950         {
15951           pperm_bytes[2*i+0] = PPERM_SRC | PPERM_SRC2 | i | h;
15952           pperm_bytes[2*i+1] = ((unsigned_p)
15953                                 ? PPERM_ZERO
15954                                 : PPERM_SIGN | PPERM_SRC2 | i | h);
15955         }
15956 
15957       for (i = 0; i < 16; i++)
15958         RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
15959 
15960       for (i = 0; i < 8; i++)
15961         RTVEC_ELT (vs, i) = GEN_INT (i + h2);
15962 
15963       p = gen_rtx_PARALLEL (VOIDmode, vs);
15964       x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
15965       if (unsigned_p)
15966         emit_insn (gen_sse5_pperm_zero_v16qi_v8hi (op0, op1, p, x));
15967       else
15968         emit_insn (gen_sse5_pperm_sign_v16qi_v8hi (op0, op1, p, x));
15969       break;
15970 
15971     case V8HImode:
15972       vs = rtvec_alloc (4);
15973       h2 = (high_p) ? 4 : 0;
15974       for (i = 0; i < 4; i++)
15975         {
15976           sign_extend = ((unsigned_p)
15977                          ? PPERM_ZERO
15978                          : PPERM_SIGN | PPERM_SRC2 | ((2*i) + 1 + h));
15979           pperm_bytes[4*i+0] = PPERM_SRC | PPERM_SRC2 | ((2*i) + 0 + h);
15980           pperm_bytes[4*i+1] = PPERM_SRC | PPERM_SRC2 | ((2*i) + 1 + h);
15981           pperm_bytes[4*i+2] = sign_extend;
15982           pperm_bytes[4*i+3] = sign_extend;
15983         }
15984 
15985       for (i = 0; i < 16; i++)
15986         RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
15987 
15988       for (i = 0; i < 4; i++)
15989         RTVEC_ELT (vs, i) = GEN_INT (i + h2);
15990 
15991       p = gen_rtx_PARALLEL (VOIDmode, vs);
15992       x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
15993       if (unsigned_p)
15994         emit_insn (gen_sse5_pperm_zero_v8hi_v4si (op0, op1, p, x));
15995       else
15996         emit_insn (gen_sse5_pperm_sign_v8hi_v4si (op0, op1, p, x));
15997       break;
15998 
15999     case V4SImode:
16000       vs = rtvec_alloc (2);
16001       h2 = (high_p) ? 2 : 0;
16002       for (i = 0; i < 2; i++)
16003         {
16004           sign_extend = ((unsigned_p)
16005                          ? PPERM_ZERO
16006                          : PPERM_SIGN | PPERM_SRC2 | ((4*i) + 3 + h));
16007           pperm_bytes[8*i+0] = PPERM_SRC | PPERM_SRC2 | ((4*i) + 0 + h);
16008           pperm_bytes[8*i+1] = PPERM_SRC | PPERM_SRC2 | ((4*i) + 1 + h);
16009           pperm_bytes[8*i+2] = PPERM_SRC | PPERM_SRC2 | ((4*i) + 2 + h);
16010           pperm_bytes[8*i+3] = PPERM_SRC | PPERM_SRC2 | ((4*i) + 3 + h);
16011           pperm_bytes[8*i+4] = sign_extend;
16012           pperm_bytes[8*i+5] = sign_extend;
16013           pperm_bytes[8*i+6] = sign_extend;
16014           pperm_bytes[8*i+7] = sign_extend;
16015         }
16016 
16017       for (i = 0; i < 16; i++)
16018         RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
16019 
16020       for (i = 0; i < 2; i++)
16021         RTVEC_ELT (vs, i) = GEN_INT (i + h2);
16022 
16023       p = gen_rtx_PARALLEL (VOIDmode, vs);
16024       x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
16025       if (unsigned_p)
16026         emit_insn (gen_sse5_pperm_zero_v4si_v2di (op0, op1, p, x));
16027       else
16028         emit_insn (gen_sse5_pperm_sign_v4si_v2di (op0, op1, p, x));
16029       break;
16030 
16031     default:
16032       gcc_unreachable ();
16033     }
16034 
16035   return;
16036 }
16037 
16038 /* Pack the high bits from OPERANDS[1] and low bits from OPERANDS[2] into the
16039    next narrower integer vector type */
16040 void
16041 ix86_expand_sse5_pack (rtx operands[3])
16042 {
16043   enum machine_mode imode = GET_MODE (operands[0]);
16044   int pperm_bytes[16];
16045   int i;
16046   rtvec v = rtvec_alloc (16);
16047   rtx x;
16048   rtx op0 = operands[0];
16049   rtx op1 = operands[1];
16050   rtx op2 = operands[2];
16051 
16052   switch (imode)
16053     {
16054     case V16QImode:
16055       for (i = 0; i < 8; i++)
16056         {
16057           pperm_bytes[i+0] = PPERM_SRC | PPERM_SRC1 | (i*2);
16058           pperm_bytes[i+8] = PPERM_SRC | PPERM_SRC2 | (i*2);
16059         }
16060 
16061       for (i = 0; i < 16; i++)
16062         RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
16063 
16064       x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
16065       emit_insn (gen_sse5_pperm_pack_v8hi_v16qi (op0, op1, op2, x));
16066       break;
16067 
16068     case V8HImode:
16069       for (i = 0; i < 4; i++)
16070         {
16071           pperm_bytes[(2*i)+0] = PPERM_SRC | PPERM_SRC1 | ((i*4) + 0);
16072           pperm_bytes[(2*i)+1] = PPERM_SRC | PPERM_SRC1 | ((i*4) + 1);
16073           pperm_bytes[(2*i)+8] = PPERM_SRC | PPERM_SRC2 | ((i*4) + 0);
16074           pperm_bytes[(2*i)+9] = PPERM_SRC | PPERM_SRC2 | ((i*4) + 1);
16075         }
16076 
16077       for (i = 0; i < 16; i++)
16078         RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
16079 
16080       x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
16081       emit_insn (gen_sse5_pperm_pack_v4si_v8hi (op0, op1, op2, x));
16082       break;
16083 
16084     case V4SImode:
16085       for (i = 0; i < 2; i++)
16086         {
16087           pperm_bytes[(4*i)+0]  = PPERM_SRC | PPERM_SRC1 | ((i*8) + 0);
16088           pperm_bytes[(4*i)+1]  = PPERM_SRC | PPERM_SRC1 | ((i*8) + 1);
16089           pperm_bytes[(4*i)+2]  = PPERM_SRC | PPERM_SRC1 | ((i*8) + 2);
16090           pperm_bytes[(4*i)+3]  = PPERM_SRC | PPERM_SRC1 | ((i*8) + 3);
16091           pperm_bytes[(4*i)+8]  = PPERM_SRC | PPERM_SRC2 | ((i*8) + 0);
16092           pperm_bytes[(4*i)+9]  = PPERM_SRC | PPERM_SRC2 | ((i*8) + 1);
16093           pperm_bytes[(4*i)+10] = PPERM_SRC | PPERM_SRC2 | ((i*8) + 2);
16094           pperm_bytes[(4*i)+11] = PPERM_SRC | PPERM_SRC2 | ((i*8) + 3);
16095         }
16096 
16097       for (i = 0; i < 16; i++)
16098         RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
16099 
16100       x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
16101       emit_insn (gen_sse5_pperm_pack_v2di_v4si (op0, op1, op2, x));
16102       break;
16103 
16104     default:
16105       gcc_unreachable ();
16106     }
16107 
16108   return;
16109 }
16110 
16111 /* Expand conditional increment or decrement using adb/sbb instructions.
16112    The default case using setcc followed by the conditional move can be
16113    done by generic code.  */
16114 int
16115 ix86_expand_int_addcc (rtx operands[])
16116 {
16117   enum rtx_code code = GET_CODE (operands[1]);
16118   rtx compare_op;
16119   rtx val = const0_rtx;
16120   bool fpcmp = false;
16121   enum machine_mode mode = GET_MODE (operands[0]);
16122 
16123   if (operands[3] != const1_rtx
16124       && operands[3] != constm1_rtx)
16125     return 0;
16126   if (!ix86_expand_carry_flag_compare (code, ix86_compare_op0,
16127                                        ix86_compare_op1, &compare_op))
16128      return 0;
16129   code = GET_CODE (compare_op);
16130 
16131   if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
16132       || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
16133     {
16134       fpcmp = true;
16135       code = ix86_fp_compare_code_to_integer (code);
16136     }
16137 
16138   if (code != LTU)
16139     {
16140       val = constm1_rtx;
16141       if (fpcmp)
16142         PUT_CODE (compare_op,
16143                   reverse_condition_maybe_unordered
16144                     (GET_CODE (compare_op)));
16145       else
16146         PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
16147     }
16148   PUT_MODE (compare_op, mode);
16149 
16150   /* Construct either adc or sbb insn.  */
16151   if ((code == LTU) == (operands[3] == constm1_rtx))
16152     {
16153       switch (GET_MODE (operands[0]))
16154         {
16155           case QImode:
16156             emit_insn (gen_subqi3_carry (operands[0], operands[2], val, compare_op));
16157             break;
16158           case HImode:
16159             emit_insn (gen_subhi3_carry (operands[0], operands[2], val, compare_op));
16160             break;
16161           case SImode:
16162             emit_insn (gen_subsi3_carry (operands[0], operands[2], val, compare_op));
16163             break;
16164           case DImode:
16165             emit_insn (gen_subdi3_carry_rex64 (operands[0], operands[2], val, compare_op));
16166             break;
16167           default:
16168             gcc_unreachable ();
16169         }
16170     }
16171   else
16172     {
16173       switch (GET_MODE (operands[0]))
16174         {
16175           case QImode:
16176             emit_insn (gen_addqi3_carry (operands[0], operands[2], val, compare_op));
16177             break;
16178           case HImode:
16179             emit_insn (gen_addhi3_carry (operands[0], operands[2], val, compare_op));
16180             break;
16181           case SImode:
16182             emit_insn (gen_addsi3_carry (operands[0], operands[2], val, compare_op));
16183             break;
16184           case DImode:
16185             emit_insn (gen_adddi3_carry_rex64 (operands[0], operands[2], val, compare_op));
16186             break;
16187           default:
16188             gcc_unreachable ();
16189         }
16190     }
16191   return 1; /* DONE */
16192 }
16193 
16194 
16195 /* Split operands 0 and 1 into SImode parts.  Similar to split_di, but
16196    works for floating pointer parameters and nonoffsetable memories.
16197    For pushes, it returns just stack offsets; the values will be saved
16198    in the right order.  Maximally three parts are generated.  */
16199 
16200 static int
16201 ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
16202 {
16203   int size;
16204 
16205   if (!TARGET_64BIT)
16206     size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
16207   else
16208     size = (GET_MODE_SIZE (mode) + 4) / 8;
16209 
16210   gcc_assert (!REG_P (operand) || !MMX_REGNO_P (REGNO (operand)));
16211   gcc_assert (size >= 2 && size <= 4);
16212 
16213   /* Optimize constant pool reference to immediates.  This is used by fp
16214      moves, that force all constants to memory to allow combining.  */
16215   if (MEM_P (operand) && MEM_READONLY_P (operand))
16216     {
16217       rtx tmp = maybe_get_pool_constant (operand);
16218       if (tmp)
16219         operand = tmp;
16220     }
16221 
16222   if (MEM_P (operand) && !offsettable_memref_p (operand))
16223     {
16224       /* The only non-offsetable memories we handle are pushes.  */
16225       int ok = push_operand (operand, VOIDmode);
16226 
16227       gcc_assert (ok);
16228 
16229       operand = copy_rtx (operand);
16230       PUT_MODE (operand, Pmode);
16231       parts[0] = parts[1] = parts[2] = parts[3] = operand;
16232       return size;
16233     }
16234 
16235   if (GET_CODE (operand) == CONST_VECTOR)
16236     {
16237       enum machine_mode imode = int_mode_for_mode (mode);
16238       /* Caution: if we looked through a constant pool memory above,
16239          the operand may actually have a different mode now.  That's
16240          ok, since we want to pun this all the way back to an integer.  */
16241       operand = simplify_subreg (imode, operand, GET_MODE (operand), 0);
16242       gcc_assert (operand != NULL);
16243       mode = imode;
16244     }
16245 
16246   if (!TARGET_64BIT)
16247     {
16248       if (mode == DImode)
16249         split_di (&operand, 1, &parts[0], &parts[1]);
16250       else
16251         {
16252           int i;
16253 
16254           if (REG_P (operand))
16255             {
16256               gcc_assert (reload_completed);
16257               for (i = 0; i < size; i++)
16258                 parts[i] = gen_rtx_REG (SImode, REGNO (operand) + i);
16259             }
16260           else if (offsettable_memref_p (operand))
16261             {
16262               operand = adjust_address (operand, SImode, 0);
16263               parts[0] = operand;
16264               for (i = 1; i < size; i++)
16265                 parts[i] = adjust_address (operand, SImode, 4 * i);
16266             }
16267           else if (GET_CODE (operand) == CONST_DOUBLE)
16268             {
16269               REAL_VALUE_TYPE r;
16270               long l[4];
16271 
16272               REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
16273               switch (mode)
16274                 {
16275                 case TFmode:
16276                   real_to_target (l, &r, mode);
16277                   parts[3] = gen_int_mode (l[3], SImode);
16278                   parts[2] = gen_int_mode (l[2], SImode);
16279                   break;
16280                 case XFmode:
16281                   REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
16282                   parts[2] = gen_int_mode (l[2], SImode);
16283                   break;
16284                 case DFmode:
16285                   REAL_VALUE_TO_TARGET_DOUBLE (r, l);
16286                   break;
16287                 default:
16288                   gcc_unreachable ();
16289                 }
16290               parts[1] = gen_int_mode (l[1], SImode);
16291               parts[0] = gen_int_mode (l[0], SImode);
16292             }
16293           else
16294             gcc_unreachable ();
16295         }
16296     }
16297   else
16298     {
16299       if (mode == TImode)
16300         split_ti (&operand, 1, &parts[0], &parts[1]);
16301       if (mode == XFmode || mode == TFmode)
16302         {
16303           enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
16304           if (REG_P (operand))
16305             {
16306               gcc_assert (reload_completed);
16307               parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
16308               parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
16309             }
16310           else if (offsettable_memref_p (operand))
16311             {
16312               operand = adjust_address (operand, DImode, 0);
16313               parts[0] = operand;
16314               parts[1] = adjust_address (operand, upper_mode, 8);
16315             }
16316           else if (GET_CODE (operand) == CONST_DOUBLE)
16317             {
16318               REAL_VALUE_TYPE r;
16319               long l[4];
16320 
16321               REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
16322               real_to_target (l, &r, mode);
16323 
16324               /* Do not use shift by 32 to avoid warning on 32bit systems.  */
16325               if (HOST_BITS_PER_WIDE_INT >= 64)
16326                 parts[0]
16327                   = gen_int_mode
16328                       ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
16329                        + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
16330                        DImode);
16331               else
16332                 parts[0] = immed_double_const (l[0], l[1], DImode);
16333 
16334               if (upper_mode == SImode)
16335                 parts[1] = gen_int_mode (l[2], SImode);
16336               else if (HOST_BITS_PER_WIDE_INT >= 64)
16337                 parts[1]
16338                   = gen_int_mode
16339                       ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
16340                        + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
16341                        DImode);
16342               else
16343                 parts[1] = immed_double_const (l[2], l[3], DImode);
16344             }
16345           else
16346             gcc_unreachable ();
16347         }
16348     }
16349 
16350   return size;
16351 }
16352 
16353 /* Emit insns to perform a move or push of DI, DF, XF, and TF values.
16354    Return false when normal moves are needed; true when all required
16355    insns have been emitted.  Operands 2-4 contain the input values
16356    int the correct order; operands 5-7 contain the output values.  */
16357 
16358 void
16359 ix86_split_long_move (rtx operands[])
16360 {
16361   rtx part[2][4];
16362   int nparts, i, j;
16363   int push = 0;
16364   int collisions = 0;
16365   enum machine_mode mode = GET_MODE (operands[0]);
16366   bool collisionparts[4];
16367 
16368   /* The DFmode expanders may ask us to move double.
16369      For 64bit target this is single move.  By hiding the fact
16370      here we simplify i386.md splitters.  */
16371   if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
16372     {
16373       /* Optimize constant pool reference to immediates.  This is used by
16374          fp moves, that force all constants to memory to allow combining.  */
16375 
16376       if (MEM_P (operands[1])
16377           && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
16378           && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
16379         operands[1] = get_pool_constant (XEXP (operands[1], 0));
16380       if (push_operand (operands[0], VOIDmode))
16381         {
16382           operands[0] = copy_rtx (operands[0]);
16383           PUT_MODE (operands[0], Pmode);
16384         }
16385       else
16386         operands[0] = gen_lowpart (DImode, operands[0]);
16387       operands[1] = gen_lowpart (DImode, operands[1]);
16388       emit_move_insn (operands[0], operands[1]);
16389       return;
16390     }
16391 
16392   /* The only non-offsettable memory we handle is push.  */
16393   if (push_operand (operands[0], VOIDmode))
16394     push = 1;
16395   else
16396     gcc_assert (!MEM_P (operands[0])
16397                 || offsettable_memref_p (operands[0]));
16398 
16399   nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
16400   ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
16401 
16402   /* When emitting push, take care for source operands on the stack.  */
16403   if (push && MEM_P (operands[1])
16404       && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
16405     {
16406       rtx src_base = XEXP (part[1][nparts - 1], 0);
16407 
16408       /* Compensate for the stack decrement by 4.  */
16409       if (!TARGET_64BIT && nparts == 3
16410           && mode == XFmode && TARGET_128BIT_LONG_DOUBLE)
16411         src_base = plus_constant (src_base, 4);
16412 
16413       /* src_base refers to the stack pointer and is
16414          automatically decreased by emitted push.  */
16415       for (i = 0; i < nparts; i++)
16416         part[1][i] = change_address (part[1][i],
16417                                      GET_MODE (part[1][i]), src_base);
16418     }
16419 
16420   /* We need to do copy in the right order in case an address register
16421      of the source overlaps the destination.  */
16422   if (REG_P (part[0][0]) && MEM_P (part[1][0]))
16423     {
16424       rtx tmp;
16425 
16426       for (i = 0; i < nparts; i++)
16427         {
16428           collisionparts[i]
16429             = reg_overlap_mentioned_p (part[0][i], XEXP (part[1][0], 0));
16430           if (collisionparts[i])
16431             collisions++;
16432         }
16433 
16434       /* Collision in the middle part can be handled by reordering.  */
16435       if (collisions == 1 && nparts == 3 && collisionparts [1])
16436         {
16437           tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
16438           tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
16439         }
16440       else if (collisions == 1
16441                && nparts == 4
16442                && (collisionparts [1] || collisionparts [2]))
16443         {
16444           if (collisionparts [1])
16445             {
16446               tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
16447               tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
16448             }
16449           else
16450             {
16451               tmp = part[0][2]; part[0][2] = part[0][3]; part[0][3] = tmp;
16452               tmp = part[1][2]; part[1][2] = part[1][3]; part[1][3] = tmp;
16453             }
16454         }
16455 
16456       /* If there are more collisions, we can't handle it by reordering.
16457          Do an lea to the last part and use only one colliding move.  */
16458       else if (collisions > 1)
16459         {
16460           rtx base;
16461 
16462           collisions = 1;
16463 
16464           base = part[0][nparts - 1];
16465 
16466           /* Handle the case when the last part isn't valid for lea.
16467              Happens in 64-bit mode storing the 12-byte XFmode.  */
16468           if (GET_MODE (base) != Pmode)
16469             base = gen_rtx_REG (Pmode, REGNO (base));
16470 
16471           emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
16472           part[1][0] = replace_equiv_address (part[1][0], base);
16473           for (i = 1; i < nparts; i++)
16474             {
16475               tmp = plus_constant (base, UNITS_PER_WORD * i);
16476               part[1][i] = replace_equiv_address (part[1][i], tmp);
16477             }
16478         }
16479     }
16480 
16481   if (push)
16482     {
16483       if (!TARGET_64BIT)
16484         {
16485           if (nparts == 3)
16486             {
16487               if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
16488                 emit_insn (gen_addsi3 (stack_pointer_rtx,
16489                                        stack_pointer_rtx, GEN_INT (-4)));
16490               emit_move_insn (part[0][2], part[1][2]);
16491             }
16492           else if (nparts == 4)
16493             {
16494               emit_move_insn (part[0][3], part[1][3]);
16495               emit_move_insn (part[0][2], part[1][2]);
16496             }
16497         }
16498       else
16499         {
16500           /* In 64bit mode we don't have 32bit push available.  In case this is
16501              register, it is OK - we will just use larger counterpart.  We also
16502              retype memory - these comes from attempt to avoid REX prefix on
16503              moving of second half of TFmode value.  */
16504           if (GET_MODE (part[1][1]) == SImode)
16505             {
16506               switch (GET_CODE (part[1][1]))
16507                 {
16508                 case MEM:
16509                   part[1][1] = adjust_address (part[1][1], DImode, 0);
16510                   break;
16511 
16512                 case REG:
16513                   part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
16514                   break;
16515 
16516                 default:
16517                   gcc_unreachable ();
16518                 }
16519 
16520               if (GET_MODE (part[1][0]) == SImode)
16521                 part[1][0] = part[1][1];
16522             }
16523         }
16524       emit_move_insn (part[0][1], part[1][1]);
16525       emit_move_insn (part[0][0], part[1][0]);
16526       return;
16527     }
16528 
16529   /* Choose correct order to not overwrite the source before it is copied.  */
16530   if ((REG_P (part[0][0])
16531        && REG_P (part[1][1])
16532        && (REGNO (part[0][0]) == REGNO (part[1][1])
16533            || (nparts == 3
16534                && REGNO (part[0][0]) == REGNO (part[1][2]))
16535            || (nparts == 4
16536                && REGNO (part[0][0]) == REGNO (part[1][3]))))
16537       || (collisions > 0
16538           && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
16539     {
16540       for (i = 0, j = nparts - 1; i < nparts; i++, j--)
16541         {
16542           operands[2 + i] = part[0][j];
16543           operands[6 + i] = part[1][j];
16544         }
16545     }
16546   else
16547     {
16548       for (i = 0; i < nparts; i++)
16549         {
16550           operands[2 + i] = part[0][i];
16551           operands[6 + i] = part[1][i];
16552         }
16553     }
16554 
16555   /* If optimizing for size, attempt to locally unCSE nonzero constants.  */
16556   if (optimize_insn_for_size_p ())
16557     {
16558       for (j = 0; j < nparts - 1; j++)
16559         if (CONST_INT_P (operands[6 + j])
16560             && operands[6 + j] != const0_rtx
16561             && REG_P (operands[2 + j]))
16562           for (i = j; i < nparts - 1; i++)
16563             if (CONST_INT_P (operands[7 + i])
16564                 && INTVAL (operands[7 + i]) == INTVAL (operands[6 + j]))
16565               operands[7 + i] = operands[2 + j];
16566     }
16567 
16568   for (i = 0; i < nparts; i++)
16569     emit_move_insn (operands[2 + i], operands[6 + i]);
16570 
16571   return;
16572 }
16573 
16574 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
16575    left shift by a constant, either using a single shift or
16576    a sequence of add instructions.  */
16577 
16578 static void
16579 ix86_expand_ashl_const (rtx operand, int count, enum machine_mode mode)
16580 {
16581   if (count == 1)
16582     {
16583       emit_insn ((mode == DImode
16584                   ? gen_addsi3
16585                   : gen_adddi3) (operand, operand, operand));
16586     }
16587   else if (!optimize_insn_for_size_p ()
16588            && count * ix86_cost->add <= ix86_cost->shift_const)
16589     {
16590       int i;
16591       for (i=0; i<count; i++)
16592         {
16593           emit_insn ((mode == DImode
16594                       ? gen_addsi3
16595                       : gen_adddi3) (operand, operand, operand));
16596         }
16597     }
16598   else
16599     emit_insn ((mode == DImode
16600                 ? gen_ashlsi3
16601                 : gen_ashldi3) (operand, operand, GEN_INT (count)));
16602 }
16603 
16604 void
16605 ix86_split_ashl (rtx *operands, rtx scratch, enum machine_mode mode)
16606 {
16607   rtx low[2], high[2];
16608   int count;
16609   const int single_width = mode == DImode ? 32 : 64;
16610 
16611   if (CONST_INT_P (operands[2]))
16612     {
16613       (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
16614       count = INTVAL (operands[2]) & (single_width * 2 - 1);
16615 
16616       if (count >= single_width)
16617         {
16618           emit_move_insn (high[0], low[1]);
16619           emit_move_insn (low[0], const0_rtx);
16620 
16621           if (count > single_width)
16622             ix86_expand_ashl_const (high[0], count - single_width, mode);
16623         }
16624       else
16625         {
16626           if (!rtx_equal_p (operands[0], operands[1]))
16627             emit_move_insn (operands[0], operands[1]);
16628           emit_insn ((mode == DImode
16629                      ? gen_x86_shld
16630                      : gen_x86_64_shld) (high[0], low[0], GEN_INT (count)));
16631           ix86_expand_ashl_const (low[0], count, mode);
16632         }
16633       return;
16634     }
16635 
16636   (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
16637 
16638   if (operands[1] == const1_rtx)
16639     {
16640       /* Assuming we've chosen a QImode capable registers, then 1 << N
16641          can be done with two 32/64-bit shifts, no branches, no cmoves.  */
16642       if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
16643         {
16644           rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
16645 
16646           ix86_expand_clear (low[0]);
16647           ix86_expand_clear (high[0]);
16648           emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (single_width)));
16649 
16650           d = gen_lowpart (QImode, low[0]);
16651           d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
16652           s = gen_rtx_EQ (QImode, flags, const0_rtx);
16653           emit_insn (gen_rtx_SET (VOIDmode, d, s));
16654 
16655           d = gen_lowpart (QImode, high[0]);
16656           d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
16657           s = gen_rtx_NE (QImode, flags, const0_rtx);
16658           emit_insn (gen_rtx_SET (VOIDmode, d, s));
16659         }
16660 
16661       /* Otherwise, we can get the same results by manually performing
16662          a bit extract operation on bit 5/6, and then performing the two
16663          shifts.  The two methods of getting 0/1 into low/high are exactly
16664          the same size.  Avoiding the shift in the bit extract case helps
16665          pentium4 a bit; no one else seems to care much either way.  */
16666       else
16667         {
16668           rtx x;
16669 
16670           if (TARGET_PARTIAL_REG_STALL && !optimize_insn_for_size_p ())
16671             x = gen_rtx_ZERO_EXTEND (mode == DImode ? SImode : DImode, operands[2]);
16672           else
16673             x = gen_lowpart (mode == DImode ? SImode : DImode, operands[2]);
16674           emit_insn (gen_rtx_SET (VOIDmode, high[0], x));
16675 
16676           emit_insn ((mode == DImode
16677                       ? gen_lshrsi3
16678                       : gen_lshrdi3) (high[0], high[0], GEN_INT (mode == DImode ? 5 : 6)));
16679           emit_insn ((mode == DImode
16680                       ? gen_andsi3
16681                       : gen_anddi3) (high[0], high[0], GEN_INT (1)));
16682           emit_move_insn (low[0], high[0]);
16683           emit_insn ((mode == DImode
16684                       ? gen_xorsi3
16685                       : gen_xordi3) (low[0], low[0], GEN_INT (1)));
16686         }
16687 
16688       emit_insn ((mode == DImode
16689                     ? gen_ashlsi3
16690                     : gen_ashldi3) (low[0], low[0], operands[2]));
16691       emit_insn ((mode == DImode
16692                     ? gen_ashlsi3
16693                     : gen_ashldi3) (high[0], high[0], operands[2]));
16694       return;
16695     }
16696 
16697   if (operands[1] == constm1_rtx)
16698     {
16699       /* For -1 << N, we can avoid the shld instruction, because we
16700          know that we're shifting 0...31/63 ones into a -1.  */
16701       emit_move_insn (low[0], constm1_rtx);
16702       if (optimize_insn_for_size_p ())
16703         emit_move_insn (high[0], low[0]);
16704       else
16705         emit_move_insn (high[0], constm1_rtx);
16706     }
16707   else
16708     {
16709       if (!rtx_equal_p (operands[0], operands[1]))
16710         emit_move_insn (operands[0], operands[1]);
16711 
16712       (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
16713       emit_insn ((mode == DImode
16714                   ? gen_x86_shld
16715                   : gen_x86_64_shld) (high[0], low[0], operands[2]));
16716     }
16717 
16718   emit_insn ((mode == DImode ? gen_ashlsi3 : gen_ashldi3) (low[0], low[0], operands[2]));
16719 
16720   if (TARGET_CMOVE && scratch)
16721     {
16722       ix86_expand_clear (scratch);
16723       emit_insn ((mode == DImode
16724                   ? gen_x86_shift_adj_1
16725                   : gen_x86_64_shift_adj_1) (high[0], low[0], operands[2],
16726                                              scratch));
16727     }
16728   else
16729     emit_insn ((mode == DImode
16730                 ? gen_x86_shift_adj_2
16731                 : gen_x86_64_shift_adj_2) (high[0], low[0], operands[2]));
16732 }
16733 
16734 void
16735 ix86_split_ashr (rtx *operands, rtx scratch, enum machine_mode mode)
16736 {
16737   rtx low[2], high[2];
16738   int count;
16739   const int single_width = mode == DImode ? 32 : 64;
16740 
16741   if (CONST_INT_P (operands[2]))
16742     {
16743       (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
16744       count = INTVAL (operands[2]) & (single_width * 2 - 1);
16745 
16746       if (count == single_width * 2 - 1)
16747         {
16748           emit_move_insn (high[0], high[1]);
16749           emit_insn ((mode == DImode
16750                       ? gen_ashrsi3
16751                       : gen_ashrdi3) (high[0], high[0],
16752                                       GEN_INT (single_width - 1)));
16753           emit_move_insn (low[0], high[0]);
16754 
16755         }
16756       else if (count >= single_width)
16757         {
16758           emit_move_insn (low[0], high[1]);
16759           emit_move_insn (high[0], low[0]);
16760           emit_insn ((mode == DImode
16761                       ? gen_ashrsi3
16762                       : gen_ashrdi3) (high[0], high[0],
16763                                       GEN_INT (single_width - 1)));
16764           if (count > single_width)
16765             emit_insn ((mode == DImode
16766                         ? gen_ashrsi3
16767                         : gen_ashrdi3) (low[0], low[0],
16768                                         GEN_INT (count - single_width)));
16769         }
16770       else
16771         {
16772           if (!rtx_equal_p (operands[0], operands[1]))
16773             emit_move_insn (operands[0], operands[1]);
16774           emit_insn ((mode == DImode
16775                       ? gen_x86_shrd
16776                       : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
16777           emit_insn ((mode == DImode
16778                       ? gen_ashrsi3
16779                       : gen_ashrdi3) (high[0], high[0], GEN_INT (count)));
16780         }
16781     }
16782   else
16783     {
16784       if (!rtx_equal_p (operands[0], operands[1]))
16785         emit_move_insn (operands[0], operands[1]);
16786 
16787       (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
16788 
16789       emit_insn ((mode == DImode
16790                   ? gen_x86_shrd
16791                   : gen_x86_64_shrd) (low[0], high[0], operands[2]));
16792       emit_insn ((mode == DImode
16793                   ? gen_ashrsi3
16794                   : gen_ashrdi3)  (high[0], high[0], operands[2]));
16795 
16796       if (TARGET_CMOVE && scratch)
16797         {
16798           emit_move_insn (scratch, high[0]);
16799           emit_insn ((mode == DImode
16800                       ? gen_ashrsi3
16801                       : gen_ashrdi3) (scratch, scratch,
16802                                       GEN_INT (single_width - 1)));
16803           emit_insn ((mode == DImode
16804                       ? gen_x86_shift_adj_1
16805                       : gen_x86_64_shift_adj_1) (low[0], high[0], operands[2],
16806                                                  scratch));
16807         }
16808       else
16809         emit_insn ((mode == DImode
16810                     ? gen_x86_shift_adj_3
16811                     : gen_x86_64_shift_adj_3) (low[0], high[0], operands[2]));
16812     }
16813 }
16814 
16815 void
16816 ix86_split_lshr (rtx *operands, rtx scratch, enum machine_mode mode)
16817 {
16818   rtx low[2], high[2];
16819   int count;
16820   const int single_width = mode == DImode ? 32 : 64;
16821 
16822   if (CONST_INT_P (operands[2]))
16823     {
16824       (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
16825       count = INTVAL (operands[2]) & (single_width * 2 - 1);
16826 
16827       if (count >= single_width)
16828         {
16829           emit_move_insn (low[0], high[1]);
16830           ix86_expand_clear (high[0]);
16831 
16832           if (count > single_width)
16833             emit_insn ((mode == DImode
16834                         ? gen_lshrsi3
16835                         : gen_lshrdi3) (low[0], low[0],
16836                                         GEN_INT (count - single_width)));
16837         }
16838       else
16839         {
16840           if (!rtx_equal_p (operands[0], operands[1]))
16841             emit_move_insn (operands[0], operands[1]);
16842           emit_insn ((mode == DImode
16843                       ? gen_x86_shrd
16844                       : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
16845           emit_insn ((mode == DImode
16846                       ? gen_lshrsi3
16847                       : gen_lshrdi3) (high[0], high[0], GEN_INT (count)));
16848         }
16849     }
16850   else
16851     {
16852       if (!rtx_equal_p (operands[0], operands[1]))
16853         emit_move_insn (operands[0], operands[1]);
16854 
16855       (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
16856 
16857       emit_insn ((mode == DImode
16858                   ? gen_x86_shrd
16859                   : gen_x86_64_shrd) (low[0], high[0], operands[2]));
16860       emit_insn ((mode == DImode
16861                   ? gen_lshrsi3
16862                   : gen_lshrdi3) (high[0], high[0], operands[2]));
16863 
16864       /* Heh.  By reversing the arguments, we can reuse this pattern.  */
16865       if (TARGET_CMOVE && scratch)
16866         {
16867           ix86_expand_clear (scratch);
16868           emit_insn ((mode == DImode
16869                       ? gen_x86_shift_adj_1
16870                       : gen_x86_64_shift_adj_1) (low[0], high[0], operands[2],
16871                                                  scratch));
16872         }
16873       else
16874         emit_insn ((mode == DImode
16875                     ? gen_x86_shift_adj_2
16876                     : gen_x86_64_shift_adj_2) (low[0], high[0], operands[2]));
16877     }
16878 }
16879 
16880 /* Predict just emitted jump instruction to be taken with probability PROB.  */
16881 static void
16882 predict_jump (int prob)
16883 {
16884   rtx insn = get_last_insn ();
16885   gcc_assert (JUMP_P (insn));
16886   REG_NOTES (insn)
16887     = gen_rtx_EXPR_LIST (REG_BR_PROB,
16888                          GEN_INT (prob),
16889                          REG_NOTES (insn));
16890 }
16891 
16892 /* Helper function for the string operations below.  Dest VARIABLE whether
16893    it is aligned to VALUE bytes.  If true, jump to the label.  */
16894 static rtx
16895 ix86_expand_aligntest (rtx variable, int value, bool epilogue)
16896 {
16897   rtx label = gen_label_rtx ();
16898   rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
16899   if (GET_MODE (variable) == DImode)
16900     emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
16901   else
16902     emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
16903   emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
16904                            1, label);
16905   if (epilogue)
16906     predict_jump (REG_BR_PROB_BASE * 50 / 100);
16907   else
16908     predict_jump (REG_BR_PROB_BASE * 90 / 100);
16909   return label;
16910 }
16911 
16912 /* Adjust COUNTER by the VALUE.  */
16913 static void
16914 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
16915 {
16916   if (GET_MODE (countreg) == DImode)
16917     emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
16918   else
16919     emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
16920 }
16921 
16922 /* Zero extend possibly SImode EXP to Pmode register.  */
16923 rtx
16924 ix86_zero_extend_to_Pmode (rtx exp)
16925 {
16926   rtx r;
16927   if (GET_MODE (exp) == VOIDmode)
16928     return force_reg (Pmode, exp);
16929   if (GET_MODE (exp) == Pmode)
16930     return copy_to_mode_reg (Pmode, exp);
16931   r = gen_reg_rtx (Pmode);
16932   emit_insn (gen_zero_extendsidi2 (r, exp));
16933   return r;
16934 }
16935 
16936 /* Divide COUNTREG by SCALE.  */
16937 static rtx
16938 scale_counter (rtx countreg, int scale)
16939 {
16940   rtx sc;
16941   rtx piece_size_mask;
16942 
16943   if (scale == 1)
16944     return countreg;
16945   if (CONST_INT_P (countreg))
16946     return GEN_INT (INTVAL (countreg) / scale);
16947   gcc_assert (REG_P (countreg));
16948 
16949   piece_size_mask = GEN_INT (scale - 1);
16950   sc = expand_simple_binop (GET_MODE (countreg), LSHIFTRT, countreg,
16951                             GEN_INT (exact_log2 (scale)),
16952                             NULL, 1, OPTAB_DIRECT);
16953   return sc;
16954 }
16955 
16956 /* Return mode for the memcpy/memset loop counter.  Prefer SImode over
16957    DImode for constant loop counts.  */
16958 
16959 static enum machine_mode
16960 counter_mode (rtx count_exp)
16961 {
16962   if (GET_MODE (count_exp) != VOIDmode)
16963     return GET_MODE (count_exp);
16964   if (GET_CODE (count_exp) != CONST_INT)
16965     return Pmode;
16966   if (TARGET_64BIT && (INTVAL (count_exp) & ~0xffffffff))
16967     return DImode;
16968   return SImode;
16969 }
16970 
16971 /* When SRCPTR is non-NULL, output simple loop to move memory
16972    pointer to SRCPTR to DESTPTR via chunks of MODE unrolled UNROLL times,
16973    overall size is COUNT specified in bytes.  When SRCPTR is NULL, output the
16974    equivalent loop to set memory by VALUE (supposed to be in MODE).
16975 
16976    The size is rounded down to whole number of chunk size moved at once.
16977    SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info.  */
16978 
16979 
16980 static void
16981 expand_set_or_movmem_via_loop (rtx destmem, rtx srcmem,
16982                                rtx destptr, rtx srcptr, rtx value,
16983                                rtx count, enum machine_mode mode, int unroll,
16984                                int expected_size)
16985 {
16986   rtx out_label, top_label, iter, tmp;
16987   enum machine_mode iter_mode = counter_mode (count);
16988   rtx piece_size = GEN_INT (GET_MODE_SIZE (mode) * unroll);
16989   rtx piece_size_mask = GEN_INT (~((GET_MODE_SIZE (mode) * unroll) - 1));
16990   rtx size;
16991   rtx x_addr;
16992   rtx y_addr;
16993   int i;
16994 
16995   top_label = gen_label_rtx ();
16996   out_label = gen_label_rtx ();
16997   iter = gen_reg_rtx (iter_mode);
16998 
16999   size = expand_simple_binop (iter_mode, AND, count, piece_size_mask,
17000                               NULL, 1, OPTAB_DIRECT);
17001   /* Those two should combine.  */
17002   if (piece_size == const1_rtx)
17003     {
17004       emit_cmp_and_jump_insns (size, const0_rtx, EQ, NULL_RTX, iter_mode,
17005                                true, out_label);
17006       predict_jump (REG_BR_PROB_BASE * 10 / 100);
17007     }
17008   emit_move_insn (iter, const0_rtx);
17009 
17010   emit_label (top_label);
17011 
17012   tmp = convert_modes (Pmode, iter_mode, iter, true);
17013   x_addr = gen_rtx_PLUS (Pmode, destptr, tmp);
17014   destmem = change_address (destmem, mode, x_addr);
17015 
17016   if (srcmem)
17017     {
17018       y_addr = gen_rtx_PLUS (Pmode, srcptr, copy_rtx (tmp));
17019       srcmem = change_address (srcmem, mode, y_addr);
17020 
17021       /* When unrolling for chips that reorder memory reads and writes,
17022          we can save registers by using single temporary.
17023          Also using 4 temporaries is overkill in 32bit mode.  */
17024       if (!TARGET_64BIT && 0)
17025         {
17026           for (i = 0; i < unroll; i++)
17027             {
17028               if (i)
17029                 {
17030                   destmem =
17031                     adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
17032                   srcmem =
17033                     adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
17034                 }
17035               emit_move_insn (destmem, srcmem);
17036             }
17037         }
17038       else
17039         {
17040           rtx tmpreg[4];
17041           gcc_assert (unroll <= 4);
17042           for (i = 0; i < unroll; i++)
17043             {
17044               tmpreg[i] = gen_reg_rtx (mode);
17045               if (i)
17046                 {
17047                   srcmem =
17048                     adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
17049                 }
17050               emit_move_insn (tmpreg[i], srcmem);
17051             }
17052           for (i = 0; i < unroll; i++)
17053             {
17054               if (i)
17055                 {
17056                   destmem =
17057                     adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
17058                 }
17059               emit_move_insn (destmem, tmpreg[i]);
17060             }
17061         }
17062     }
17063   else
17064     for (i = 0; i < unroll; i++)
17065       {
17066         if (i)
17067           destmem =
17068             adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
17069         emit_move_insn (destmem, value);
17070       }
17071 
17072   tmp = expand_simple_binop (iter_mode, PLUS, iter, piece_size, iter,
17073                              true, OPTAB_LIB_WIDEN);
17074   if (tmp != iter)
17075     emit_move_insn (iter, tmp);
17076 
17077   emit_cmp_and_jump_insns (iter, size, LT, NULL_RTX, iter_mode,
17078                            true, top_label);
17079   if (expected_size != -1)
17080     {
17081       expected_size /= GET_MODE_SIZE (mode) * unroll;
17082       if (expected_size == 0)
17083         predict_jump (0);
17084       else if (expected_size > REG_BR_PROB_BASE)
17085         predict_jump (REG_BR_PROB_BASE - 1);
17086       else
17087         predict_jump (REG_BR_PROB_BASE - (REG_BR_PROB_BASE + expected_size / 2) / expected_size);
17088     }
17089   else
17090     predict_jump (REG_BR_PROB_BASE * 80 / 100);
17091   iter = ix86_zero_extend_to_Pmode (iter);
17092   tmp = expand_simple_binop (Pmode, PLUS, destptr, iter, destptr,
17093                              true, OPTAB_LIB_WIDEN);
17094   if (tmp != destptr)
17095     emit_move_insn (destptr, tmp);
17096   if (srcptr)
17097     {
17098       tmp = expand_simple_binop (Pmode, PLUS, srcptr, iter, srcptr,
17099                                  true, OPTAB_LIB_WIDEN);
17100       if (tmp != srcptr)
17101         emit_move_insn (srcptr, tmp);
17102     }
17103   emit_label (out_label);
17104 }
17105 
17106 /* Output "rep; mov" instruction.
17107    Arguments have same meaning as for previous function */
17108 static void
17109 expand_movmem_via_rep_mov (rtx destmem, rtx srcmem,
17110                            rtx destptr, rtx srcptr,
17111                            rtx count,
17112                            enum machine_mode mode)
17113 {
17114   rtx destexp;
17115   rtx srcexp;
17116   rtx countreg;
17117 
17118   /* If the size is known, it is shorter to use rep movs.  */
17119   if (mode == QImode && CONST_INT_P (count)
17120       && !(INTVAL (count) & 3))
17121     mode = SImode;
17122 
17123   if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
17124     destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
17125   if (srcptr != XEXP (srcmem, 0) || GET_MODE (srcmem) != BLKmode)
17126     srcmem = adjust_automodify_address_nv (srcmem, BLKmode, srcptr, 0);
17127   countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
17128   if (mode != QImode)
17129     {
17130       destexp = gen_rtx_ASHIFT (Pmode, countreg,
17131                                 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
17132       destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
17133       srcexp = gen_rtx_ASHIFT (Pmode, countreg,
17134                                GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
17135       srcexp = gen_rtx_PLUS (Pmode, srcexp, srcptr);
17136     }
17137   else
17138     {
17139       destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
17140       srcexp = gen_rtx_PLUS (Pmode, srcptr, countreg);
17141     }
17142   if (CONST_INT_P (count))
17143     {
17144       count = GEN_INT (INTVAL (count)
17145                        & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
17146       destmem = shallow_copy_rtx (destmem);
17147       srcmem = shallow_copy_rtx (srcmem);
17148       set_mem_size (destmem, count);
17149       set_mem_size (srcmem, count);
17150     }
17151   else
17152     {
17153       if (MEM_SIZE (destmem))
17154         set_mem_size (destmem, NULL_RTX);
17155       if (MEM_SIZE (srcmem))
17156         set_mem_size (srcmem, NULL_RTX);
17157     }
17158   emit_insn (gen_rep_mov (destptr, destmem, srcptr, srcmem, countreg,
17159                           destexp, srcexp));
17160 }
17161 
17162 /* Output "rep; stos" instruction.
17163    Arguments have same meaning as for previous function */
17164 static void
17165 expand_setmem_via_rep_stos (rtx destmem, rtx destptr, rtx value,
17166                             rtx count, enum machine_mode mode,
17167                             rtx orig_value)
17168 {
17169   rtx destexp;
17170   rtx countreg;
17171 
17172   if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
17173     destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
17174   value = force_reg (mode, gen_lowpart (mode, value));
17175   countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
17176   if (mode != QImode)
17177     {
17178       destexp = gen_rtx_ASHIFT (Pmode, countreg,
17179                                 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
17180       destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
17181     }
17182   else
17183     destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
17184   if (orig_value == const0_rtx && CONST_INT_P (count))
17185     {
17186       count = GEN_INT (INTVAL (count)
17187                        & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
17188       destmem = shallow_copy_rtx (destmem);
17189       set_mem_size (destmem, count);
17190     }
17191   else if (MEM_SIZE (destmem))
17192     set_mem_size (destmem, NULL_RTX);
17193   emit_insn (gen_rep_stos (destptr, countreg, destmem, value, destexp));
17194 }
17195 
17196 static void
17197 emit_strmov (rtx destmem, rtx srcmem,
17198              rtx destptr, rtx srcptr, enum machine_mode mode, int offset)
17199 {
17200   rtx src = adjust_automodify_address_nv (srcmem, mode, srcptr, offset);
17201   rtx dest = adjust_automodify_address_nv (destmem, mode, destptr, offset);
17202   emit_insn (gen_strmov (destptr, dest, srcptr, src));
17203 }
17204 
17205 /* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST.  */
17206 static void
17207 expand_movmem_epilogue (rtx destmem, rtx srcmem,
17208                         rtx destptr, rtx srcptr, rtx count, int max_size)
17209 {
17210   rtx src, dest;
17211   if (CONST_INT_P (count))
17212     {
17213       HOST_WIDE_INT countval = INTVAL (count);
17214       int offset = 0;
17215 
17216       if ((countval & 0x10) && max_size > 16)
17217         {
17218           if (TARGET_64BIT)
17219             {
17220               emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
17221               emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset + 8);
17222             }
17223           else
17224             gcc_unreachable ();
17225           offset += 16;
17226         }
17227       if ((countval & 0x08) && max_size > 8)
17228         {
17229           if (TARGET_64BIT)
17230             emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
17231           else
17232             {
17233               emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
17234               emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset + 4);
17235             }
17236           offset += 8;
17237         }
17238       if ((countval & 0x04) && max_size > 4)
17239         {
17240           emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
17241           offset += 4;
17242         }
17243       if ((countval & 0x02) && max_size > 2)
17244         {
17245           emit_strmov (destmem, srcmem, destptr, srcptr, HImode, offset);
17246           offset += 2;
17247         }
17248       if ((countval & 0x01) && max_size > 1)
17249         {
17250           emit_strmov (destmem, srcmem, destptr, srcptr, QImode, offset);
17251           offset += 1;
17252         }
17253       return;
17254     }
17255   if (max_size > 8)
17256     {
17257       count = expand_simple_binop (GET_MODE (count), AND, count, GEN_INT (max_size - 1),
17258                                     count, 1, OPTAB_DIRECT);
17259       expand_set_or_movmem_via_loop (destmem, srcmem, destptr, srcptr, NULL,
17260                                      count, QImode, 1, 4);
17261       return;
17262     }
17263 
17264   /* When there are stringops, we can cheaply increase dest and src pointers.
17265      Otherwise we save code size by maintaining offset (zero is readily
17266      available from preceding rep operation) and using x86 addressing modes.
17267    */
17268   if (TARGET_SINGLE_STRINGOP)
17269     {
17270       if (max_size > 4)
17271         {
17272           rtx label = ix86_expand_aligntest (count, 4, true);
17273           src = change_address (srcmem, SImode, srcptr);
17274           dest = change_address (destmem, SImode, destptr);
17275           emit_insn (gen_strmov (destptr, dest, srcptr, src));
17276           emit_label (label);
17277           LABEL_NUSES (label) = 1;
17278         }
17279       if (max_size > 2)
17280         {
17281           rtx label = ix86_expand_aligntest (count, 2, true);
17282           src = change_address (srcmem, HImode, srcptr);
17283           dest = change_address (destmem, HImode, destptr);
17284           emit_insn (gen_strmov (destptr, dest, srcptr, src));
17285           emit_label (label);
17286           LABEL_NUSES (label) = 1;
17287         }
17288       if (max_size > 1)
17289         {
17290           rtx label = ix86_expand_aligntest (count, 1, true);
17291           src = change_address (srcmem, QImode, srcptr);
17292           dest = change_address (destmem, QImode, destptr);
17293           emit_insn (gen_strmov (destptr, dest, srcptr, src));
17294           emit_label (label);
17295           LABEL_NUSES (label) = 1;
17296         }
17297     }
17298   else
17299     {
17300       rtx offset = force_reg (Pmode, const0_rtx);
17301       rtx tmp;
17302 
17303       if (max_size > 4)
17304         {
17305           rtx label = ix86_expand_aligntest (count, 4, true);
17306           src = change_address (srcmem, SImode, srcptr);
17307           dest = change_address (destmem, SImode, destptr);
17308           emit_move_insn (dest, src);
17309           tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (4), NULL,
17310                                      true, OPTAB_LIB_WIDEN);
17311           if (tmp != offset)
17312             emit_move_insn (offset, tmp);
17313           emit_label (label);
17314           LABEL_NUSES (label) = 1;
17315         }
17316       if (max_size > 2)
17317         {
17318           rtx label = ix86_expand_aligntest (count, 2, true);
17319           tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
17320           src = change_address (srcmem, HImode, tmp);
17321           tmp = gen_rtx_PLUS (Pmode, destptr, offset);
17322           dest = change_address (destmem, HImode, tmp);
17323           emit_move_insn (dest, src);
17324           tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (2), tmp,
17325                                      true, OPTAB_LIB_WIDEN);
17326           if (tmp != offset)
17327             emit_move_insn (offset, tmp);
17328           emit_label (label);
17329           LABEL_NUSES (label) = 1;
17330         }
17331       if (max_size > 1)
17332         {
17333           rtx label = ix86_expand_aligntest (count, 1, true);
17334           tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
17335           src = change_address (srcmem, QImode, tmp);
17336           tmp = gen_rtx_PLUS (Pmode, destptr, offset);
17337           dest = change_address (destmem, QImode, tmp);
17338           emit_move_insn (dest, src);
17339           emit_label (label);
17340           LABEL_NUSES (label) = 1;
17341         }
17342     }
17343 }
17344 
17345 /* Output code to set at most count & (max_size - 1) bytes starting by DEST.  */
17346 static void
17347 expand_setmem_epilogue_via_loop (rtx destmem, rtx destptr, rtx value,
17348                                  rtx count, int max_size)
17349 {
17350   count =
17351     expand_simple_binop (counter_mode (count), AND, count,
17352                          GEN_INT (max_size - 1), count, 1, OPTAB_DIRECT);
17353   expand_set_or_movmem_via_loop (destmem, NULL, destptr, NULL,
17354                                  gen_lowpart (QImode, value), count, QImode,
17355                                  1, max_size / 2);
17356 }
17357 
17358 /* Output code to set at most count & (max_size - 1) bytes starting by DEST.  */
17359 static void
17360 expand_setmem_epilogue (rtx destmem, rtx destptr, rtx value, rtx count, int max_size)
17361 {
17362   rtx dest;
17363 
17364   if (CONST_INT_P (count))
17365     {
17366       HOST_WIDE_INT countval = INTVAL (count);
17367       int offset = 0;
17368 
17369       if ((countval & 0x10) && max_size > 16)
17370         {
17371           if (TARGET_64BIT)
17372             {
17373               dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
17374               emit_insn (gen_strset (destptr, dest, value));
17375               dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset + 8);
17376               emit_insn (gen_strset (destptr, dest, value));
17377             }
17378           else
17379             gcc_unreachable ();
17380           offset += 16;
17381         }
17382       if ((countval & 0x08) && max_size > 8)
17383         {
17384           if (TARGET_64BIT)
17385             {
17386               dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
17387               emit_insn (gen_strset (destptr, dest, value));
17388             }
17389           else
17390             {
17391               dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
17392               emit_insn (gen_strset (destptr, dest, value));
17393               dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset + 4);
17394               emit_insn (gen_strset (destptr, dest, value));
17395             }
17396           offset += 8;
17397         }
17398       if ((countval & 0x04) && max_size > 4)
17399         {
17400           dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
17401           emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
17402           offset += 4;
17403         }
17404       if ((countval & 0x02) && max_size > 2)
17405         {
17406           dest = adjust_automodify_address_nv (destmem, HImode, destptr, offset);
17407           emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
17408           offset += 2;
17409         }
17410       if ((countval & 0x01) && max_size > 1)
17411         {
17412           dest = adjust_automodify_address_nv (destmem, QImode, destptr, offset);
17413           emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
17414           offset += 1;
17415         }
17416       return;
17417     }
17418   if (max_size > 32)
17419     {
17420       expand_setmem_epilogue_via_loop (destmem, destptr, value, count, max_size);
17421       return;
17422     }
17423   if (max_size > 16)
17424     {
17425       rtx label = ix86_expand_aligntest (count, 16, true);
17426       if (TARGET_64BIT)
17427         {
17428           dest = change_address (destmem, DImode, destptr);
17429           emit_insn (gen_strset (destptr, dest, value));
17430           emit_insn (gen_strset (destptr, dest, value));
17431         }
17432       else
17433         {
17434           dest = change_address (destmem, SImode, destptr);
17435           emit_insn (gen_strset (destptr, dest, value));
17436           emit_insn (gen_strset (destptr, dest, value));
17437           emit_insn (gen_strset (destptr, dest, value));
17438           emit_insn (gen_strset (destptr, dest, value));
17439         }
17440       emit_label (label);
17441       LABEL_NUSES (label) = 1;
17442     }
17443   if (max_size > 8)
17444     {
17445       rtx label = ix86_expand_aligntest (count, 8, true);
17446       if (TARGET_64BIT)
17447         {
17448           dest = change_address (destmem, DImode, destptr);
17449           emit_insn (gen_strset (destptr, dest, value));
17450         }
17451       else
17452         {
17453           dest = change_address (destmem, SImode, destptr);
17454           emit_insn (gen_strset (destptr, dest, value));
17455           emit_insn (gen_strset (destptr, dest, value));
17456         }
17457       emit_label (label);
17458       LABEL_NUSES (label) = 1;
17459     }
17460   if (max_size > 4)
17461     {
17462       rtx label = ix86_expand_aligntest (count, 4, true);
17463       dest = change_address (destmem, SImode, destptr);
17464       emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
17465       emit_label (label);
17466       LABEL_NUSES (label) = 1;
17467     }
17468   if (max_size > 2)
17469     {
17470       rtx label = ix86_expand_aligntest (count, 2, true);
17471       dest = change_address (destmem, HImode, destptr);
17472       emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
17473       emit_label (label);
17474       LABEL_NUSES (label) = 1;
17475     }
17476   if (max_size > 1)
17477     {
17478       rtx label = ix86_expand_aligntest (count, 1, true);
17479       dest = change_address (destmem, QImode, destptr);
17480       emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
17481       emit_label (label);
17482       LABEL_NUSES (label) = 1;
17483     }
17484 }
17485 
17486 /* Copy enough from DEST to SRC to align DEST known to by aligned by ALIGN to
17487    DESIRED_ALIGNMENT.  */
17488 static void
17489 expand_movmem_prologue (rtx destmem, rtx srcmem,
17490                         rtx destptr, rtx srcptr, rtx count,
17491                         int align, int desired_alignment)
17492 {
17493   if (align <= 1 && desired_alignment > 1)
17494     {
17495       rtx label = ix86_expand_aligntest (destptr, 1, false);
17496       srcmem = change_address (srcmem, QImode, srcptr);
17497       destmem = change_address (destmem, QImode, destptr);
17498       emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
17499       ix86_adjust_counter (count, 1);
17500       emit_label (label);
17501       LABEL_NUSES (label) = 1;
17502     }
17503   if (align <= 2 && desired_alignment > 2)
17504     {
17505       rtx label = ix86_expand_aligntest (destptr, 2, false);
17506       srcmem = change_address (srcmem, HImode, srcptr);
17507       destmem = change_address (destmem, HImode, destptr);
17508       emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
17509       ix86_adjust_counter (count, 2);
17510       emit_label (label);
17511       LABEL_NUSES (label) = 1;
17512     }
17513   if (align <= 4 && desired_alignment > 4)
17514     {
17515       rtx label = ix86_expand_aligntest (destptr, 4, false);
17516       srcmem = change_address (srcmem, SImode, srcptr);
17517       destmem = change_address (destmem, SImode, destptr);
17518       emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
17519       ix86_adjust_counter (count, 4);
17520       emit_label (label);
17521       LABEL_NUSES (label) = 1;
17522     }
17523   gcc_assert (desired_alignment <= 8);
17524 }
17525 
17526 /* Copy enough from DST to SRC to align DST known to DESIRED_ALIGN.
17527    ALIGN_BYTES is how many bytes need to be copied.  */
17528 static rtx
17529 expand_constant_movmem_prologue (rtx dst, rtx *srcp, rtx destreg, rtx srcreg,
17530                                  int desired_align, int align_bytes)
17531 {
17532   rtx src = *srcp;
17533   rtx src_size, dst_size;
17534   int off = 0;
17535   int src_align_bytes = get_mem_align_offset (src, desired_align * BITS_PER_UNIT);
17536   if (src_align_bytes >= 0)
17537     src_align_bytes = desired_align - src_align_bytes;
17538   src_size = MEM_SIZE (src);
17539   dst_size = MEM_SIZE (dst);
17540   if (align_bytes & 1)
17541     {
17542       dst = adjust_automodify_address_nv (dst, QImode, destreg, 0);
17543       src = adjust_automodify_address_nv (src, QImode, srcreg, 0);
17544       off = 1;
17545       emit_insn (gen_strmov (destreg, dst, srcreg, src));
17546     }
17547   if (align_bytes & 2)
17548     {
17549       dst = adjust_automodify_address_nv (dst, HImode, destreg, off);
17550       src = adjust_automodify_address_nv (src, HImode, srcreg, off);
17551       if (MEM_ALIGN (dst) < 2 * BITS_PER_UNIT)
17552         set_mem_align (dst, 2 * BITS_PER_UNIT);
17553       if (src_align_bytes >= 0
17554           && (src_align_bytes & 1) == (align_bytes & 1)
17555           && MEM_ALIGN (src) < 2 * BITS_PER_UNIT)
17556         set_mem_align (src, 2 * BITS_PER_UNIT);
17557       off = 2;
17558       emit_insn (gen_strmov (destreg, dst, srcreg, src));
17559     }
17560   if (align_bytes & 4)
17561     {
17562       dst = adjust_automodify_address_nv (dst, SImode, destreg, off);
17563       src = adjust_automodify_address_nv (src, SImode, srcreg, off);
17564       if (MEM_ALIGN (dst) < 4 * BITS_PER_UNIT)
17565         set_mem_align (dst, 4 * BITS_PER_UNIT);
17566       if (src_align_bytes >= 0)
17567         {
17568           unsigned int src_align = 0;
17569           if ((src_align_bytes & 3) == (align_bytes & 3))
17570             src_align = 4;
17571           else if ((src_align_bytes & 1) == (align_bytes & 1))
17572             src_align = 2;
17573           if (MEM_ALIGN (src) < src_align * BITS_PER_UNIT)
17574             set_mem_align (src, src_align * BITS_PER_UNIT);
17575         }
17576       off = 4;
17577       emit_insn (gen_strmov (destreg, dst, srcreg, src));
17578     }
17579   dst = adjust_automodify_address_nv (dst, BLKmode, destreg, off);
17580   src = adjust_automodify_address_nv (src, BLKmode, srcreg, off);
17581   if (MEM_ALIGN (dst) < (unsigned int) desired_align * BITS_PER_UNIT)
17582     set_mem_align (dst, desired_align * BITS_PER_UNIT);
17583   if (src_align_bytes >= 0)
17584     {
17585       unsigned int src_align = 0;
17586       if ((src_align_bytes & 7) == (align_bytes & 7))
17587         src_align = 8;
17588       else if ((src_align_bytes & 3) == (align_bytes & 3))
17589         src_align = 4;
17590       else if ((src_align_bytes & 1) == (align_bytes & 1))
17591         src_align = 2;
17592       if (src_align > (unsigned int) desired_align)
17593         src_align = desired_align;
17594       if (MEM_ALIGN (src) < src_align * BITS_PER_UNIT)
17595         set_mem_align (src, src_align * BITS_PER_UNIT);
17596     }
17597   if (dst_size)
17598     set_mem_size (dst, GEN_INT (INTVAL (dst_size) - align_bytes));
17599   if (src_size)
17600     set_mem_size (dst, GEN_INT (INTVAL (src_size) - align_bytes));
17601   *srcp = src;
17602   return dst;
17603 }
17604 
17605 /* Set enough from DEST to align DEST known to by aligned by ALIGN to
17606    DESIRED_ALIGNMENT.  */
17607 static void
17608 expand_setmem_prologue (rtx destmem, rtx destptr, rtx value, rtx count,
17609                         int align, int desired_alignment)
17610 {
17611   if (align <= 1 && desired_alignment > 1)
17612     {
17613       rtx label = ix86_expand_aligntest (destptr, 1, false);
17614       destmem = change_address (destmem, QImode, destptr);
17615       emit_insn (gen_strset (destptr, destmem, gen_lowpart (QImode, value)));
17616       ix86_adjust_counter (count, 1);
17617       emit_label (label);
17618       LABEL_NUSES (label) = 1;
17619     }
17620   if (align <= 2 && desired_alignment > 2)
17621     {
17622       rtx label = ix86_expand_aligntest (destptr, 2, false);
17623       destmem = change_address (destmem, HImode, destptr);
17624       emit_insn (gen_strset (destptr, destmem, gen_lowpart (HImode, value)));
17625       ix86_adjust_counter (count, 2);
17626       emit_label (label);
17627       LABEL_NUSES (label) = 1;
17628     }
17629   if (align <= 4 && desired_alignment > 4)
17630     {
17631       rtx label = ix86_expand_aligntest (destptr, 4, false);
17632       destmem = change_address (destmem, SImode, destptr);
17633       emit_insn (gen_strset (destptr, destmem, gen_lowpart (SImode, value)));
17634       ix86_adjust_counter (count, 4);
17635       emit_label (label);
17636       LABEL_NUSES (label) = 1;
17637     }
17638   gcc_assert (desired_alignment <= 8);
17639 }
17640 
17641 /* Set enough from DST to align DST known to by aligned by ALIGN to
17642    DESIRED_ALIGN.  ALIGN_BYTES is how many bytes need to be stored.  */
17643 static rtx
17644 expand_constant_setmem_prologue (rtx dst, rtx destreg, rtx value,
17645                                  int desired_align, int align_bytes)
17646 {
17647   int off = 0;
17648   rtx dst_size = MEM_SIZE (dst);
17649   if (align_bytes & 1)
17650     {
17651       dst = adjust_automodify_address_nv (dst, QImode, destreg, 0);
17652       off = 1;
17653       emit_insn (gen_strset (destreg, dst,
17654                              gen_lowpart (QImode, value)));
17655     }
17656   if (align_bytes & 2)
17657     {
17658       dst = adjust_automodify_address_nv (dst, HImode, destreg, off);
17659       if (MEM_ALIGN (dst) < 2 * BITS_PER_UNIT)
17660         set_mem_align (dst, 2 * BITS_PER_UNIT);
17661       off = 2;
17662       emit_insn (gen_strset (destreg, dst,
17663                              gen_lowpart (HImode, value)));
17664     }
17665   if (align_bytes & 4)
17666     {
17667       dst = adjust_automodify_address_nv (dst, SImode, destreg, off);
17668       if (MEM_ALIGN (dst) < 4 * BITS_PER_UNIT)
17669         set_mem_align (dst, 4 * BITS_PER_UNIT);
17670       off = 4;
17671       emit_insn (gen_strset (destreg, dst,
17672                              gen_lowpart (SImode, value)));
17673     }
17674   dst = adjust_automodify_address_nv (dst, BLKmode, destreg, off);
17675   if (MEM_ALIGN (dst) < (unsigned int) desired_align * BITS_PER_UNIT)
17676     set_mem_align (dst, desired_align * BITS_PER_UNIT);
17677   if (dst_size)
17678     set_mem_size (dst, GEN_INT (INTVAL (dst_size) - align_bytes));
17679   return dst;
17680 }
17681 
17682 /* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation.  */
17683 static enum stringop_alg
17684 decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size, bool memset,
17685             int *dynamic_check)
17686 {
17687   const struct stringop_algs * algs;
17688   bool optimize_for_speed;
17689   /* Algorithms using the rep prefix want at least edi and ecx;
17690      additionally, memset wants eax and memcpy wants esi.  Don't
17691      consider such algorithms if the user has appropriated those
17692      registers for their own purposes.  */
17693   bool rep_prefix_usable = !(fixed_regs[CX_REG] || fixed_regs[DI_REG]
17694                              || (memset
17695                                  ? fixed_regs[AX_REG] : fixed_regs[SI_REG]));
17696 
17697 #define ALG_USABLE_P(alg) (rep_prefix_usable                    \
17698                            || (alg != rep_prefix_1_byte         \
17699                                && alg != rep_prefix_4_byte      \
17700                                && alg != rep_prefix_8_byte))
17701   const struct processor_costs *cost;
17702   
17703   /* Even if the string operation call is cold, we still might spend a lot
17704      of time processing large blocks.  */
17705   if (optimize_function_for_size_p (cfun)
17706       || (optimize_insn_for_size_p ()
17707           && expected_size != -1 && expected_size < 256))
17708     optimize_for_speed = false;
17709   else
17710     optimize_for_speed = true;
17711 
17712   cost = optimize_for_speed ? ix86_cost : &ix86_size_cost;
17713 
17714   *dynamic_check = -1;
17715   if (memset)
17716     algs = &cost->memset[TARGET_64BIT != 0];
17717   else
17718     algs = &cost->memcpy[TARGET_64BIT != 0];
17719   if (stringop_alg != no_stringop && ALG_USABLE_P (stringop_alg))
17720     return stringop_alg;
17721   /* rep; movq or rep; movl is the smallest variant.  */
17722   else if (!optimize_for_speed)
17723     {
17724       if (!count || (count & 3))
17725         return rep_prefix_usable ? rep_prefix_1_byte : loop_1_byte;
17726       else
17727         return rep_prefix_usable ? rep_prefix_4_byte : loop;
17728     }
17729   /* Very tiny blocks are best handled via the loop, REP is expensive to setup.
17730    */
17731   else if (expected_size != -1 && expected_size < 4)
17732     return loop_1_byte;
17733   else if (expected_size != -1)
17734     {
17735       unsigned int i;
17736       enum stringop_alg alg = libcall;
17737       for (i = 0; i < NAX_STRINGOP_ALGS; i++)
17738         {
17739           /* We get here if the algorithms that were not libcall-based
17740              were rep-prefix based and we are unable to use rep prefixes
17741              based on global register usage.  Break out of the loop and
17742              use the heuristic below.  */
17743           if (algs->size[i].max == 0)
17744             break;
17745           if (algs->size[i].max >= expected_size || algs->size[i].max == -1)
17746             {
17747               enum stringop_alg candidate = algs->size[i].alg;
17748 
17749               if (candidate != libcall && ALG_USABLE_P (candidate))
17750                 alg = candidate;
17751               /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
17752                  last non-libcall inline algorithm.  */
17753               if (TARGET_INLINE_ALL_STRINGOPS)
17754                 {
17755                   /* When the current size is best to be copied by a libcall,
17756                      but we are still forced to inline, run the heuristic below
17757                      that will pick code for medium sized blocks.  */
17758                   if (alg != libcall)
17759                     return alg;
17760                   break;
17761                 }
17762               else if (ALG_USABLE_P (candidate))
17763                 return candidate;
17764             }
17765         }
17766       gcc_assert (TARGET_INLINE_ALL_STRINGOPS || !rep_prefix_usable);
17767     }
17768   /* When asked to inline the call anyway, try to pick meaningful choice.
17769      We look for maximal size of block that is faster to copy by hand and
17770      take blocks of at most of that size guessing that average size will
17771      be roughly half of the block.
17772 
17773      If this turns out to be bad, we might simply specify the preferred
17774      choice in ix86_costs.  */
17775   if ((TARGET_INLINE_ALL_STRINGOPS || TARGET_INLINE_STRINGOPS_DYNAMICALLY)
17776       && (algs->unknown_size == libcall || !ALG_USABLE_P (algs->unknown_size)))
17777     {
17778       int max = -1;
17779       enum stringop_alg alg;
17780       int i;
17781       bool any_alg_usable_p = true;
17782 
17783       for (i = 0; i < NAX_STRINGOP_ALGS; i++)
17784         {
17785           enum stringop_alg candidate = algs->size[i].alg;
17786           any_alg_usable_p = any_alg_usable_p && ALG_USABLE_P (candidate);
17787 
17788           if (candidate != libcall && candidate
17789               && ALG_USABLE_P (candidate))
17790               max = algs->size[i].max;
17791         }
17792       /* If there aren't any usable algorithms, then recursing on
17793          smaller sizes isn't going to find anything.  Just return the
17794          simple byte-at-a-time copy loop.  */
17795       if (!any_alg_usable_p)
17796         {
17797           /* Pick something reasonable.  */
17798           if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
17799             *dynamic_check = 128;
17800           return loop_1_byte;
17801         }
17802       if (max == -1)
17803         max = 4096;
17804       alg = decide_alg (count, max / 2, memset, dynamic_check);
17805       gcc_assert (*dynamic_check == -1);
17806       gcc_assert (alg != libcall);
17807       if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
17808         *dynamic_check = max;
17809       return alg;
17810     }
17811   return ALG_USABLE_P (algs->unknown_size) ? algs->unknown_size : libcall;
17812 #undef ALG_USABLE_P
17813 }
17814 
17815 /* Decide on alignment.  We know that the operand is already aligned to ALIGN
17816    (ALIGN can be based on profile feedback and thus it is not 100% guaranteed).  */
17817 static int
17818 decide_alignment (int align,
17819                   enum stringop_alg alg,
17820                   int expected_size)
17821 {
17822   int desired_align = 0;
17823   switch (alg)
17824     {
17825       case no_stringop:
17826         gcc_unreachable ();
17827       case loop:
17828       case unrolled_loop:
17829         desired_align = GET_MODE_SIZE (Pmode);
17830         break;
17831       case rep_prefix_8_byte:
17832         desired_align = 8;
17833         break;
17834       case rep_prefix_4_byte:
17835         /* PentiumPro has special logic triggering for 8 byte aligned blocks.
17836            copying whole cacheline at once.  */
17837         if (TARGET_PENTIUMPRO)
17838           desired_align = 8;
17839         else
17840           desired_align = 4;
17841         break;
17842       case rep_prefix_1_byte:
17843         /* PentiumPro has special logic triggering for 8 byte aligned blocks.
17844            copying whole cacheline at once.  */
17845         if (TARGET_PENTIUMPRO)
17846           desired_align = 8;
17847         else
17848           desired_align = 1;
17849         break;
17850       case loop_1_byte:
17851         desired_align = 1;
17852         break;
17853       case libcall:
17854         return 0;
17855     }
17856 
17857   if (optimize_size)
17858     desired_align = 1;
17859   if (desired_align < align)
17860     desired_align = align;
17861   if (expected_size != -1 && expected_size < 4)
17862     desired_align = align;
17863   return desired_align;
17864 }
17865 
17866 /* Return the smallest power of 2 greater than VAL.  */
17867 static int
17868 smallest_pow2_greater_than (int val)
17869 {
17870   int ret = 1;
17871   while (ret <= val)
17872     ret <<= 1;
17873   return ret;
17874 }
17875 
17876 /* Expand string move (memcpy) operation.  Use i386 string operations when
17877    profitable.  expand_setmem contains similar code.  The code depends upon
17878    architecture, block size and alignment, but always has the same
17879    overall structure:
17880 
17881    1) Prologue guard: Conditional that jumps up to epilogues for small
17882       blocks that can be handled by epilogue alone.  This is faster but
17883       also needed for correctness, since prologue assume the block is larger
17884       than the desired alignment.
17885 
17886       Optional dynamic check for size and libcall for large
17887       blocks is emitted here too, with -minline-stringops-dynamically.
17888 
17889    2) Prologue: copy first few bytes in order to get destination aligned
17890       to DESIRED_ALIGN.  It is emitted only when ALIGN is less than
17891       DESIRED_ALIGN and and up to DESIRED_ALIGN - ALIGN bytes can be copied.
17892       We emit either a jump tree on power of two sized blocks, or a byte loop.
17893 
17894    3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
17895       with specified algorithm.
17896 
17897    4) Epilogue: code copying tail of the block that is too small to be
17898       handled by main body (or up to size guarded by prologue guard).  */
17899 
17900 int
17901 ix86_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp,
17902                     rtx expected_align_exp, rtx expected_size_exp)
17903 {
17904   rtx destreg;
17905   rtx srcreg;
17906   rtx label = NULL;
17907   rtx tmp;
17908   rtx jump_around_label = NULL;
17909   HOST_WIDE_INT align = 1;
17910   unsigned HOST_WIDE_INT count = 0;
17911   HOST_WIDE_INT expected_size = -1;
17912   int size_needed = 0, epilogue_size_needed;
17913   int desired_align = 0, align_bytes = 0;
17914   enum stringop_alg alg;
17915   int dynamic_check;
17916   bool need_zero_guard = false;
17917 
17918   if (CONST_INT_P (align_exp))
17919     align = INTVAL (align_exp);
17920   /* i386 can do misaligned access on reasonably increased cost.  */
17921   if (CONST_INT_P (expected_align_exp)
17922       && INTVAL (expected_align_exp) > align)
17923     align = INTVAL (expected_align_exp);
17924   /* ALIGN is the minimum of destination and source alignment, but we care here
17925      just about destination alignment.  */
17926   else if (MEM_ALIGN (dst) > (unsigned HOST_WIDE_INT) align * BITS_PER_UNIT)
17927     align = MEM_ALIGN (dst) / BITS_PER_UNIT;
17928 
17929   if (CONST_INT_P (count_exp))
17930     count = expected_size = INTVAL (count_exp);
17931   if (CONST_INT_P (expected_size_exp) && count == 0)
17932     expected_size = INTVAL (expected_size_exp);
17933 
17934   /* Make sure we don't need to care about overflow later on.  */
17935   if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
17936     return 0;
17937 
17938   /* Step 0: Decide on preferred algorithm, desired alignment and
17939      size of chunks to be copied by main loop.  */
17940 
17941   alg = decide_alg (count, expected_size, false, &dynamic_check);
17942   desired_align = decide_alignment (align, alg, expected_size);
17943 
17944   if (!TARGET_ALIGN_STRINGOPS)
17945     align = desired_align;
17946 
17947   if (alg == libcall)
17948     return 0;
17949   gcc_assert (alg != no_stringop);
17950   if (!count)
17951     count_exp = copy_to_mode_reg (GET_MODE (count_exp), count_exp);
17952   destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
17953   srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
17954   switch (alg)
17955     {
17956     case libcall:
17957     case no_stringop:
17958       gcc_unreachable ();
17959     case loop:
17960       need_zero_guard = true;
17961       size_needed = GET_MODE_SIZE (Pmode);
17962       break;
17963     case unrolled_loop:
17964       need_zero_guard = true;
17965       size_needed = GET_MODE_SIZE (Pmode) * (TARGET_64BIT ? 4 : 2);
17966       break;
17967     case rep_prefix_8_byte:
17968       size_needed = 8;
17969       break;
17970     case rep_prefix_4_byte:
17971       size_needed = 4;
17972       break;
17973     case rep_prefix_1_byte:
17974       size_needed = 1;
17975       break;
17976     case loop_1_byte:
17977       need_zero_guard = true;
17978       size_needed = 1;
17979       break;
17980     }
17981 
17982   epilogue_size_needed = size_needed;
17983 
17984   /* Step 1: Prologue guard.  */
17985 
17986   /* Alignment code needs count to be in register.  */
17987   if (CONST_INT_P (count_exp) && desired_align > align)
17988     {
17989       if (INTVAL (count_exp) > desired_align
17990           && INTVAL (count_exp) > size_needed)
17991         {
17992           align_bytes
17993             = get_mem_align_offset (dst, desired_align * BITS_PER_UNIT);
17994           if (align_bytes <= 0)
17995             align_bytes = 0;
17996           else
17997             align_bytes = desired_align - align_bytes;
17998         }
17999       if (align_bytes == 0)
18000         count_exp = force_reg (counter_mode (count_exp), count_exp);
18001     }
18002   gcc_assert (desired_align >= 1 && align >= 1);
18003 
18004   /* Ensure that alignment prologue won't copy past end of block.  */
18005   if (size_needed > 1 || (desired_align > 1 && desired_align > align))
18006     {
18007       epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
18008       /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
18009          Make sure it is power of 2.  */
18010       epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
18011 
18012       if (count)
18013         {
18014           if (count < (unsigned HOST_WIDE_INT)epilogue_size_needed)
18015             {
18016               /* If main algorithm works on QImode, no epilogue is needed.
18017                  For small sizes just don't align anything.  */
18018               if (size_needed == 1)
18019                 desired_align = align;
18020               else
18021                 goto epilogue;
18022             }
18023         }
18024       else
18025         {
18026           label = gen_label_rtx ();
18027           emit_cmp_and_jump_insns (count_exp,
18028                                    GEN_INT (epilogue_size_needed),
18029                                    LTU, 0, counter_mode (count_exp), 1, label);
18030           if (expected_size == -1 || expected_size < epilogue_size_needed)
18031             predict_jump (REG_BR_PROB_BASE * 60 / 100);
18032           else
18033             predict_jump (REG_BR_PROB_BASE * 20 / 100);
18034         }
18035     }
18036 
18037   /* Emit code to decide on runtime whether library call or inline should be
18038      used.  */
18039   if (dynamic_check != -1)
18040     {
18041       if (CONST_INT_P (count_exp))
18042         {
18043           if (UINTVAL (count_exp) >= (unsigned HOST_WIDE_INT)dynamic_check)
18044             {
18045               emit_block_move_via_libcall (dst, src, count_exp, false);
18046               count_exp = const0_rtx;
18047               goto epilogue;
18048             }
18049         }
18050       else
18051         {
18052           rtx hot_label = gen_label_rtx ();
18053           jump_around_label = gen_label_rtx ();
18054           emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
18055                                    LEU, 0, GET_MODE (count_exp), 1, hot_label);
18056           predict_jump (REG_BR_PROB_BASE * 90 / 100);
18057           emit_block_move_via_libcall (dst, src, count_exp, false);
18058           emit_jump (jump_around_label);
18059           emit_label (hot_label);
18060         }
18061     }
18062 
18063   /* Step 2: Alignment prologue.  */
18064 
18065   if (desired_align > align)
18066     {
18067       if (align_bytes == 0)
18068         {
18069           /* Except for the first move in epilogue, we no longer know
18070              constant offset in aliasing info.  It don't seems to worth
18071              the pain to maintain it for the first move, so throw away
18072              the info early.  */
18073           src = change_address (src, BLKmode, srcreg);
18074           dst = change_address (dst, BLKmode, destreg);
18075           expand_movmem_prologue (dst, src, destreg, srcreg, count_exp, align,
18076                                   desired_align);
18077         }
18078       else
18079         {
18080           /* If we know how many bytes need to be stored before dst is
18081              sufficiently aligned, maintain aliasing info accurately.  */
18082           dst = expand_constant_movmem_prologue (dst, &src, destreg, srcreg,
18083                                                  desired_align, align_bytes);
18084           count_exp = plus_constant (count_exp, -align_bytes);
18085           count -= align_bytes;
18086         }
18087       if (need_zero_guard
18088           && (count < (unsigned HOST_WIDE_INT) size_needed
18089               || (align_bytes == 0
18090                   && count < ((unsigned HOST_WIDE_INT) size_needed
18091                               + desired_align - align))))
18092         {
18093           /* It is possible that we copied enough so the main loop will not
18094              execute.  */
18095           gcc_assert (size_needed > 1);
18096           if (label == NULL_RTX)
18097             label = gen_label_rtx ();
18098           emit_cmp_and_jump_insns (count_exp,
18099                                    GEN_INT (size_needed),
18100                                    LTU, 0, counter_mode (count_exp), 1, label);
18101           if (expected_size == -1
18102               || expected_size < (desired_align - align) / 2 + size_needed)
18103             predict_jump (REG_BR_PROB_BASE * 20 / 100);
18104           else
18105             predict_jump (REG_BR_PROB_BASE * 60 / 100);
18106         }
18107     }
18108   if (label && size_needed == 1)
18109     {
18110       emit_label (label);
18111       LABEL_NUSES (label) = 1;
18112       label = NULL;
18113       epilogue_size_needed = 1;
18114     }
18115   else if (label == NULL_RTX)
18116     epilogue_size_needed = size_needed;
18117 
18118   /* Step 3: Main loop.  */
18119 
18120   switch (alg)
18121     {
18122     case libcall:
18123     case no_stringop:
18124       gcc_unreachable ();
18125     case loop_1_byte:
18126       expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
18127                                      count_exp, QImode, 1, expected_size);
18128       break;
18129     case loop:
18130       expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
18131                                      count_exp, Pmode, 1, expected_size);
18132       break;
18133     case unrolled_loop:
18134       /* Unroll only by factor of 2 in 32bit mode, since we don't have enough
18135          registers for 4 temporaries anyway.  */
18136       expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
18137                                      count_exp, Pmode, TARGET_64BIT ? 4 : 2,
18138                                      expected_size);
18139       break;
18140     case rep_prefix_8_byte:
18141       expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
18142                                  DImode);
18143       break;
18144     case rep_prefix_4_byte:
18145       expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
18146                                  SImode);
18147       break;
18148     case rep_prefix_1_byte:
18149       expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
18150                                  QImode);
18151       break;
18152     }
18153   /* Adjust properly the offset of src and dest memory for aliasing.  */
18154   if (CONST_INT_P (count_exp))
18155     {
18156       src = adjust_automodify_address_nv (src, BLKmode, srcreg,
18157                                           (count / size_needed) * size_needed);
18158       dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
18159                                           (count / size_needed) * size_needed);
18160     }
18161   else
18162     {
18163       src = change_address (src, BLKmode, srcreg);
18164       dst = change_address (dst, BLKmode, destreg);
18165     }
18166 
18167   /* Step 4: Epilogue to copy the remaining bytes.  */
18168  epilogue:
18169   if (label)
18170     {
18171       /* When the main loop is done, COUNT_EXP might hold original count,
18172          while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
18173          Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
18174          bytes. Compensate if needed.  */
18175 
18176       if (size_needed < epilogue_size_needed)
18177         {
18178           tmp =
18179             expand_simple_binop (counter_mode (count_exp), AND, count_exp,
18180                                  GEN_INT (size_needed - 1), count_exp, 1,
18181                                  OPTAB_DIRECT);
18182           if (tmp != count_exp)
18183             emit_move_insn (count_exp, tmp);
18184         }
18185       emit_label (label);
18186       LABEL_NUSES (label) = 1;
18187     }
18188 
18189   if (count_exp != const0_rtx && epilogue_size_needed > 1)
18190     expand_movmem_epilogue (dst, src, destreg, srcreg, count_exp,
18191                             epilogue_size_needed);
18192   if (jump_around_label)
18193     emit_label (jump_around_label);
18194   return 1;
18195 }
18196 
18197 /* Helper function for memcpy.  For QImode value 0xXY produce
18198    0xXYXYXYXY of wide specified by MODE.  This is essentially
18199    a * 0x10101010, but we can do slightly better than
18200    synth_mult by unwinding the sequence by hand on CPUs with
18201    slow multiply.  */
18202 static rtx
18203 promote_duplicated_reg (enum machine_mode mode, rtx val)
18204 {
18205   enum machine_mode valmode = GET_MODE (val);
18206   rtx tmp;
18207   int nops = mode == DImode ? 3 : 2;
18208 
18209   gcc_assert (mode == SImode || mode == DImode);
18210   if (val == const0_rtx)
18211     return copy_to_mode_reg (mode, const0_rtx);
18212   if (CONST_INT_P (val))
18213     {
18214       HOST_WIDE_INT v = INTVAL (val) & 255;
18215 
18216       v |= v << 8;
18217       v |= v << 16;
18218       if (mode == DImode)
18219         v |= (v << 16) << 16;
18220       return copy_to_mode_reg (mode, gen_int_mode (v, mode));
18221     }
18222 
18223   if (valmode == VOIDmode)
18224     valmode = QImode;
18225   if (valmode != QImode)
18226     val = gen_lowpart (QImode, val);
18227   if (mode == QImode)
18228     return val;
18229   if (!TARGET_PARTIAL_REG_STALL)
18230     nops--;
18231   if (ix86_cost->mult_init[mode == DImode ? 3 : 2]
18232       + ix86_cost->mult_bit * (mode == DImode ? 8 : 4)
18233       <= (ix86_cost->shift_const + ix86_cost->add) * nops
18234           + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL == 0)))
18235     {
18236       rtx reg = convert_modes (mode, QImode, val, true);
18237       tmp = promote_duplicated_reg (mode, const1_rtx);
18238       return expand_simple_binop (mode, MULT, reg, tmp, NULL, 1,
18239                                   OPTAB_DIRECT);
18240     }
18241   else
18242     {
18243       rtx reg = convert_modes (mode, QImode, val, true);
18244 
18245       if (!TARGET_PARTIAL_REG_STALL)
18246         if (mode == SImode)
18247           emit_insn (gen_movsi_insv_1 (reg, reg));
18248         else
18249           emit_insn (gen_movdi_insv_1_rex64 (reg, reg));
18250       else
18251         {
18252           tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (8),
18253                                      NULL, 1, OPTAB_DIRECT);
18254           reg =
18255             expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
18256         }
18257       tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (16),
18258                                  NULL, 1, OPTAB_DIRECT);
18259       reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
18260       if (mode == SImode)
18261         return reg;
18262       tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (32),
18263                                  NULL, 1, OPTAB_DIRECT);
18264       reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
18265       return reg;
18266     }
18267 }
18268 
18269 /* Duplicate value VAL using promote_duplicated_reg into maximal size that will
18270    be needed by main loop copying SIZE_NEEDED chunks and prologue getting
18271    alignment from ALIGN to DESIRED_ALIGN.  */
18272 static rtx
18273 promote_duplicated_reg_to_size (rtx val, int size_needed, int desired_align, int align)
18274 {
18275   rtx promoted_val;
18276 
18277   if (TARGET_64BIT
18278       && (size_needed > 4 || (desired_align > align && desired_align > 4)))
18279     promoted_val = promote_duplicated_reg (DImode, val);
18280   else if (size_needed > 2 || (desired_align > align && desired_align > 2))
18281     promoted_val = promote_duplicated_reg (SImode, val);
18282   else if (size_needed > 1 || (desired_align > align && desired_align > 1))
18283     promoted_val = promote_duplicated_reg (HImode, val);
18284   else
18285     promoted_val = val;
18286 
18287   return promoted_val;
18288 }
18289 
18290 /* Expand string clear operation (bzero).  Use i386 string operations when
18291    profitable.  See expand_movmem comment for explanation of individual
18292    steps performed.  */
18293 int
18294 ix86_expand_setmem (rtx dst, rtx count_exp, rtx val_exp, rtx align_exp,
18295                     rtx expected_align_exp, rtx expected_size_exp)
18296 {
18297   rtx destreg;
18298   rtx label = NULL;
18299   rtx tmp;
18300   rtx jump_around_label = NULL;
18301   HOST_WIDE_INT align = 1;
18302   unsigned HOST_WIDE_INT count = 0;
18303   HOST_WIDE_INT expected_size = -1;
18304   int size_needed = 0, epilogue_size_needed;
18305   int desired_align = 0, align_bytes = 0;
18306   enum stringop_alg alg;
18307   rtx promoted_val = NULL;
18308   bool force_loopy_epilogue = false;
18309   int dynamic_check;
18310   bool need_zero_guard = false;
18311 
18312   if (CONST_INT_P (align_exp))
18313     align = INTVAL (align_exp);
18314   /* i386 can do misaligned access on reasonably increased cost.  */
18315   if (CONST_INT_P (expected_align_exp)
18316       && INTVAL (expected_align_exp) > align)
18317     align = INTVAL (expected_align_exp);
18318   if (CONST_INT_P (count_exp))
18319     count = expected_size = INTVAL (count_exp);
18320   if (CONST_INT_P (expected_size_exp) && count == 0)
18321     expected_size = INTVAL (expected_size_exp);
18322 
18323   /* Make sure we don't need to care about overflow later on.  */
18324   if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
18325     return 0;
18326 
18327   /* Step 0: Decide on preferred algorithm, desired alignment and
18328      size of chunks to be copied by main loop.  */
18329 
18330   alg = decide_alg (count, expected_size, true, &dynamic_check);
18331   desired_align = decide_alignment (align, alg, expected_size);
18332 
18333   if (!TARGET_ALIGN_STRINGOPS)
18334     align = desired_align;
18335 
18336   if (alg == libcall)
18337     return 0;
18338   gcc_assert (alg != no_stringop);
18339   if (!count)
18340     count_exp = copy_to_mode_reg (counter_mode (count_exp), count_exp);
18341   destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
18342   switch (alg)
18343     {
18344     case libcall:
18345     case no_stringop:
18346       gcc_unreachable ();
18347     case loop:
18348       need_zero_guard = true;
18349       size_needed = GET_MODE_SIZE (Pmode);
18350       break;
18351     case unrolled_loop:
18352       need_zero_guard = true;
18353       size_needed = GET_MODE_SIZE (Pmode) * 4;
18354       break;
18355     case rep_prefix_8_byte:
18356       size_needed = 8;
18357       break;
18358     case rep_prefix_4_byte:
18359       size_needed = 4;
18360       break;
18361     case rep_prefix_1_byte:
18362       size_needed = 1;
18363       break;
18364     case loop_1_byte:
18365       need_zero_guard = true;
18366       size_needed = 1;
18367       break;
18368     }
18369   epilogue_size_needed = size_needed;
18370 
18371   /* Step 1: Prologue guard.  */
18372 
18373   /* Alignment code needs count to be in register.  */
18374   if (CONST_INT_P (count_exp) && desired_align > align)
18375     {
18376       if (INTVAL (count_exp) > desired_align
18377           && INTVAL (count_exp) > size_needed)
18378         {
18379           align_bytes
18380             = get_mem_align_offset (dst, desired_align * BITS_PER_UNIT);
18381           if (align_bytes <= 0)
18382             align_bytes = 0;
18383           else
18384             align_bytes = desired_align - align_bytes;
18385         }
18386       if (align_bytes == 0)
18387         {
18388           enum machine_mode mode = SImode;
18389           if (TARGET_64BIT && (count & ~0xffffffff))
18390             mode = DImode;
18391           count_exp = force_reg (mode, count_exp);
18392         }
18393     }
18394   /* Do the cheap promotion to allow better CSE across the
18395      main loop and epilogue (ie one load of the big constant in the
18396      front of all code.  */
18397   if (CONST_INT_P (val_exp))
18398     promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
18399                                                    desired_align, align);
18400   /* Ensure that alignment prologue won't copy past end of block.  */
18401   if (size_needed > 1 || (desired_align > 1 && desired_align > align))
18402     {
18403       epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
18404       /* Epilogue always copies COUNT_EXP & (EPILOGUE_SIZE_NEEDED - 1) bytes.
18405          Make sure it is power of 2.  */
18406       epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
18407 
18408       /* To improve performance of small blocks, we jump around the VAL
18409          promoting mode.  This mean that if the promoted VAL is not constant,
18410          we might not use it in the epilogue and have to use byte
18411          loop variant.  */
18412       if (epilogue_size_needed > 2 && !promoted_val)
18413         force_loopy_epilogue = true;
18414       if (count)
18415         {
18416           if (count < (unsigned HOST_WIDE_INT)epilogue_size_needed)
18417             {
18418               /* If main algorithm works on QImode, no epilogue is needed.
18419                  For small sizes just don't align anything.  */
18420               if (size_needed == 1)
18421                 desired_align = align;
18422               else
18423                 goto epilogue;
18424             }
18425         }
18426       else
18427         {
18428           label = gen_label_rtx ();
18429           emit_cmp_and_jump_insns (count_exp,
18430                                    GEN_INT (epilogue_size_needed),
18431                                    LTU, 0, counter_mode (count_exp), 1, label);
18432           if (expected_size == -1 || expected_size <= epilogue_size_needed)
18433             predict_jump (REG_BR_PROB_BASE * 60 / 100);
18434           else
18435             predict_jump (REG_BR_PROB_BASE * 20 / 100);
18436         }
18437     }
18438   if (dynamic_check != -1)
18439     {
18440       rtx hot_label = gen_label_rtx ();
18441       jump_around_label = gen_label_rtx ();
18442       emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
18443                                LEU, 0, counter_mode (count_exp), 1, hot_label);
18444       predict_jump (REG_BR_PROB_BASE * 90 / 100);
18445       set_storage_via_libcall (dst, count_exp, val_exp, false);
18446       emit_jump (jump_around_label);
18447       emit_label (hot_label);
18448     }
18449 
18450   /* Step 2: Alignment prologue.  */
18451 
18452   /* Do the expensive promotion once we branched off the small blocks.  */
18453   if (!promoted_val)
18454     promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
18455                                                    desired_align, align);
18456   gcc_assert (desired_align >= 1 && align >= 1);
18457 
18458   if (desired_align > align)
18459     {
18460       if (align_bytes == 0)
18461         {
18462           /* Except for the first move in epilogue, we no longer know
18463              constant offset in aliasing info.  It don't seems to worth
18464              the pain to maintain it for the first move, so throw away
18465              the info early.  */
18466           dst = change_address (dst, BLKmode, destreg);
18467           expand_setmem_prologue (dst, destreg, promoted_val, count_exp, align,
18468                                   desired_align);
18469         }
18470       else
18471         {
18472           /* If we know how many bytes need to be stored before dst is
18473              sufficiently aligned, maintain aliasing info accurately.  */
18474           dst = expand_constant_setmem_prologue (dst, destreg, promoted_val,
18475                                                  desired_align, align_bytes);
18476           count_exp = plus_constant (count_exp, -align_bytes);
18477           count -= align_bytes;
18478         }
18479       if (need_zero_guard
18480           && (count < (unsigned HOST_WIDE_INT) size_needed
18481               || (align_bytes == 0
18482                   && count < ((unsigned HOST_WIDE_INT) size_needed
18483                               + desired_align - align))))
18484         {
18485           /* It is possible that we copied enough so the main loop will not
18486              execute.  */
18487           gcc_assert (size_needed > 1);
18488           if (label == NULL_RTX)
18489             label = gen_label_rtx ();
18490           emit_cmp_and_jump_insns (count_exp,
18491                                    GEN_INT (size_needed),
18492                                    LTU, 0, counter_mode (count_exp), 1, label);
18493           if (expected_size == -1
18494               || expected_size < (desired_align - align) / 2 + size_needed)
18495             predict_jump (REG_BR_PROB_BASE * 20 / 100);
18496           else
18497             predict_jump (REG_BR_PROB_BASE * 60 / 100);
18498         }
18499     }
18500   if (label && size_needed == 1)
18501     {
18502       emit_label (label);
18503       LABEL_NUSES (label) = 1;
18504       label = NULL;
18505       promoted_val = val_exp;
18506       epilogue_size_needed = 1;
18507     }
18508   else if (label == NULL_RTX)
18509     epilogue_size_needed = size_needed;
18510 
18511   /* Step 3: Main loop.  */
18512 
18513   switch (alg)
18514     {
18515     case libcall:
18516     case no_stringop:
18517       gcc_unreachable ();
18518     case loop_1_byte:
18519       expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
18520                                      count_exp, QImode, 1, expected_size);
18521       break;
18522     case loop:
18523       expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
18524                                      count_exp, Pmode, 1, expected_size);
18525       break;
18526     case unrolled_loop:
18527       expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
18528                                      count_exp, Pmode, 4, expected_size);
18529       break;
18530     case rep_prefix_8_byte:
18531       expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
18532                                   DImode, val_exp);
18533       break;
18534     case rep_prefix_4_byte:
18535       expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
18536                                   SImode, val_exp);
18537       break;
18538     case rep_prefix_1_byte:
18539       expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
18540                                   QImode, val_exp);
18541       break;
18542     }
18543   /* Adjust properly the offset of src and dest memory for aliasing.  */
18544   if (CONST_INT_P (count_exp))
18545     dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
18546                                         (count / size_needed) * size_needed);
18547   else
18548     dst = change_address (dst, BLKmode, destreg);
18549 
18550   /* Step 4: Epilogue to copy the remaining bytes.  */
18551 
18552   if (label)
18553     {
18554       /* When the main loop is done, COUNT_EXP might hold original count,
18555          while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
18556          Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
18557          bytes. Compensate if needed.  */
18558 
18559       if (size_needed < epilogue_size_needed)
18560         {
18561           tmp =
18562             expand_simple_binop (counter_mode (count_exp), AND, count_exp,
18563                                  GEN_INT (size_needed - 1), count_exp, 1,
18564                                  OPTAB_DIRECT);
18565           if (tmp != count_exp)
18566             emit_move_insn (count_exp, tmp);
18567         }
18568       emit_label (label);
18569       LABEL_NUSES (label) = 1;
18570     }
18571  epilogue:
18572   if (count_exp != const0_rtx && epilogue_size_needed > 1)
18573     {
18574       if (force_loopy_epilogue)
18575         expand_setmem_epilogue_via_loop (dst, destreg, val_exp, count_exp,
18576                                          epilogue_size_needed);
18577       else
18578         expand_setmem_epilogue (dst, destreg, promoted_val, count_exp,
18579                                 epilogue_size_needed);
18580     }
18581   if (jump_around_label)
18582     emit_label (jump_around_label);
18583   return 1;
18584 }
18585 
18586 /* Expand the appropriate insns for doing strlen if not just doing
18587    repnz; scasb
18588 
18589    out = result, initialized with the start address
18590    align_rtx = alignment of the address.
18591    scratch = scratch register, initialized with the startaddress when
18592         not aligned, otherwise undefined
18593 
18594    This is just the body. It needs the initializations mentioned above and
18595    some address computing at the end.  These things are done in i386.md.  */
18596 
18597 static void
18598 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
18599 {
18600   int align;
18601   rtx tmp;
18602   rtx align_2_label = NULL_RTX;
18603   rtx align_3_label = NULL_RTX;
18604   rtx align_4_label = gen_label_rtx ();
18605   rtx end_0_label = gen_label_rtx ();
18606   rtx mem;
18607   rtx tmpreg = gen_reg_rtx (SImode);
18608   rtx scratch = gen_reg_rtx (SImode);
18609   rtx cmp;
18610 
18611   align = 0;
18612   if (CONST_INT_P (align_rtx))
18613     align = INTVAL (align_rtx);
18614 
18615   /* Loop to check 1..3 bytes for null to get an aligned pointer.  */
18616 
18617   /* Is there a known alignment and is it less than 4?  */
18618   if (align < 4)
18619     {
18620       rtx scratch1 = gen_reg_rtx (Pmode);
18621       emit_move_insn (scratch1, out);
18622       /* Is there a known alignment and is it not 2? */
18623       if (align != 2)
18624         {
18625           align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
18626           align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
18627 
18628           /* Leave just the 3 lower bits.  */
18629           align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
18630                                     NULL_RTX, 0, OPTAB_WIDEN);
18631 
18632           emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
18633                                    Pmode, 1, align_4_label);
18634           emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
18635                                    Pmode, 1, align_2_label);
18636           emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
18637                                    Pmode, 1, align_3_label);
18638         }
18639       else
18640         {
18641           /* Since the alignment is 2, we have to check 2 or 0 bytes;
18642              check if is aligned to 4 - byte.  */
18643 
18644           align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
18645                                     NULL_RTX, 0, OPTAB_WIDEN);
18646 
18647           emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
18648                                    Pmode, 1, align_4_label);
18649         }
18650 
18651       mem = change_address (src, QImode, out);
18652 
18653       /* Now compare the bytes.  */
18654 
18655       /* Compare the first n unaligned byte on a byte per byte basis.  */
18656       emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
18657                                QImode, 1, end_0_label);
18658 
18659       /* Increment the address.  */
18660       emit_insn ((*ix86_gen_add3) (out, out, const1_rtx));
18661 
18662       /* Not needed with an alignment of 2 */
18663       if (align != 2)
18664         {
18665           emit_label (align_2_label);
18666 
18667           emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
18668                                    end_0_label);
18669 
18670           emit_insn ((*ix86_gen_add3) (out, out, const1_rtx));
18671 
18672           emit_label (align_3_label);
18673         }
18674 
18675       emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
18676                                end_0_label);
18677 
18678       emit_insn ((*ix86_gen_add3) (out, out, const1_rtx));
18679     }
18680 
18681   /* Generate loop to check 4 bytes at a time.  It is not a good idea to
18682      align this loop.  It gives only huge programs, but does not help to
18683      speed up.  */
18684   emit_label (align_4_label);
18685 
18686   mem = change_address (src, SImode, out);
18687   emit_move_insn (scratch, mem);
18688   emit_insn ((*ix86_gen_add3) (out, out, GEN_INT (4)));
18689 
18690   /* This formula yields a nonzero result iff one of the bytes is zero.
18691      This saves three branches inside loop and many cycles.  */
18692 
18693   emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
18694   emit_insn (gen_one_cmplsi2 (scratch, scratch));
18695   emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
18696   emit_insn (gen_andsi3 (tmpreg, tmpreg,
18697                          gen_int_mode (0x80808080, SImode)));
18698   emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
18699                            align_4_label);
18700 
18701   if (TARGET_CMOVE)
18702     {
18703        rtx reg = gen_reg_rtx (SImode);
18704        rtx reg2 = gen_reg_rtx (Pmode);
18705        emit_move_insn (reg, tmpreg);
18706        emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
18707 
18708        /* If zero is not in the first two bytes, move two bytes forward.  */
18709        emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
18710        tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
18711        tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
18712        emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
18713                                gen_rtx_IF_THEN_ELSE (SImode, tmp,
18714                                                      reg,
18715                                                      tmpreg)));
18716        /* Emit lea manually to avoid clobbering of flags.  */
18717        emit_insn (gen_rtx_SET (SImode, reg2,
18718                                gen_rtx_PLUS (Pmode, out, const2_rtx)));
18719 
18720        tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
18721        tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
18722        emit_insn (gen_rtx_SET (VOIDmode, out,
18723                                gen_rtx_IF_THEN_ELSE (Pmode, tmp,
18724                                                      reg2,
18725                                                      out)));
18726 
18727     }
18728   else
18729     {
18730        rtx end_2_label = gen_label_rtx ();
18731        /* Is zero in the first two bytes? */
18732 
18733        emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
18734        tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
18735        tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
18736        tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
18737                             gen_rtx_LABEL_REF (VOIDmode, end_2_label),
18738                             pc_rtx);
18739        tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
18740        JUMP_LABEL (tmp) = end_2_label;
18741 
18742        /* Not in the first two.  Move two bytes forward.  */
18743        emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
18744        emit_insn ((*ix86_gen_add3) (out, out, const2_rtx));
18745 
18746        emit_label (end_2_label);
18747 
18748     }
18749 
18750   /* Avoid branch in fixing the byte.  */
18751   tmpreg = gen_lowpart (QImode, tmpreg);
18752   emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
18753   cmp = gen_rtx_LTU (Pmode, gen_rtx_REG (CCmode, FLAGS_REG), const0_rtx);
18754   emit_insn ((*ix86_gen_sub3_carry) (out, out, GEN_INT (3), cmp));
18755 
18756   emit_label (end_0_label);
18757 }
18758 
18759 /* Expand strlen.  */
18760 
18761 int
18762 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
18763 {
18764   rtx addr, scratch1, scratch2, scratch3, scratch4;
18765 
18766   /* The generic case of strlen expander is long.  Avoid it's
18767      expanding unless TARGET_INLINE_ALL_STRINGOPS.  */
18768 
18769   if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
18770       && !TARGET_INLINE_ALL_STRINGOPS
18771       && !optimize_insn_for_size_p ()
18772       && (!CONST_INT_P (align) || INTVAL (align) < 4))
18773     return 0;
18774 
18775   addr = force_reg (Pmode, XEXP (src, 0));
18776   scratch1 = gen_reg_rtx (Pmode);
18777 
18778   if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
18779       && !optimize_insn_for_size_p ())
18780     {
18781       /* Well it seems that some optimizer does not combine a call like
18782          foo(strlen(bar), strlen(bar));
18783          when the move and the subtraction is done here.  It does calculate
18784          the length just once when these instructions are done inside of
18785          output_strlen_unroll().  But I think since &bar[strlen(bar)] is
18786          often used and I use one fewer register for the lifetime of
18787          output_strlen_unroll() this is better.  */
18788 
18789       emit_move_insn (out, addr);
18790 
18791       ix86_expand_strlensi_unroll_1 (out, src, align);
18792 
18793       /* strlensi_unroll_1 returns the address of the zero at the end of
18794          the string, like memchr(), so compute the length by subtracting
18795          the start address.  */
18796       emit_insn ((*ix86_gen_sub3) (out, out, addr));
18797     }
18798   else
18799     {
18800       rtx unspec;
18801 
18802       /* Can't use this if the user has appropriated eax, ecx, or edi.  */
18803       if (fixed_regs[AX_REG] || fixed_regs[CX_REG] || fixed_regs[DI_REG])
18804         return false;
18805 
18806       scratch2 = gen_reg_rtx (Pmode);
18807       scratch3 = gen_reg_rtx (Pmode);
18808       scratch4 = force_reg (Pmode, constm1_rtx);
18809 
18810       emit_move_insn (scratch3, addr);
18811       eoschar = force_reg (QImode, eoschar);
18812 
18813       src = replace_equiv_address_nv (src, scratch3);
18814 
18815       /* If .md starts supporting :P, this can be done in .md.  */
18816       unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
18817                                                  scratch4), UNSPEC_SCAS);
18818       emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
18819       emit_insn ((*ix86_gen_one_cmpl2) (scratch2, scratch1));
18820       emit_insn ((*ix86_gen_add3) (out, scratch2, constm1_rtx));
18821     }
18822   return 1;
18823 }
18824 
18825 /* For given symbol (function) construct code to compute address of it's PLT
18826    entry in large x86-64 PIC model.  */
18827 rtx
18828 construct_plt_address (rtx symbol)
18829 {
18830   rtx tmp = gen_reg_rtx (Pmode);
18831   rtx unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, symbol), UNSPEC_PLTOFF);
18832 
18833   gcc_assert (GET_CODE (symbol) == SYMBOL_REF);
18834   gcc_assert (ix86_cmodel == CM_LARGE_PIC);
18835 
18836   emit_move_insn (tmp, gen_rtx_CONST (Pmode, unspec));
18837   emit_insn (gen_adddi3 (tmp, tmp, pic_offset_table_rtx));
18838   return tmp;
18839 }
18840 
18841 void
18842 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
18843                   rtx callarg2,
18844                   rtx pop, int sibcall)
18845 {
18846   rtx use = NULL, call;
18847 
18848   if (pop == const0_rtx)
18849     pop = NULL;
18850   gcc_assert (!TARGET_64BIT || !pop);
18851 
18852   if (TARGET_MACHO && !TARGET_64BIT)
18853     {
18854 #if TARGET_MACHO
18855       if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
18856         fnaddr = machopic_indirect_call_target (fnaddr);
18857 #endif
18858     }
18859   else
18860     {
18861       /* Static functions and indirect calls don't need the pic register.  */
18862       if (flag_pic && (!TARGET_64BIT || ix86_cmodel == CM_LARGE_PIC)
18863           && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
18864           && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
18865         use_reg (&use, pic_offset_table_rtx);
18866     }
18867 
18868   if (TARGET_64BIT && INTVAL (callarg2) >= 0)
18869     {
18870       rtx al = gen_rtx_REG (QImode, AX_REG);
18871       emit_move_insn (al, callarg2);
18872       use_reg (&use, al);
18873     }
18874 
18875   if (ix86_cmodel == CM_LARGE_PIC
18876       && GET_CODE (fnaddr) == MEM
18877       && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
18878       && !local_symbolic_operand (XEXP (fnaddr, 0), VOIDmode))
18879     fnaddr = gen_rtx_MEM (QImode, construct_plt_address (XEXP (fnaddr, 0)));
18880   else if (sibcall
18881            ? !sibcall_insn_operand (XEXP (fnaddr, 0), Pmode)
18882            : !call_insn_operand (XEXP (fnaddr, 0), Pmode))
18883     {
18884       fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
18885       fnaddr = gen_rtx_MEM (QImode, fnaddr);
18886     }
18887 
18888   call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
18889   if (retval)
18890     call = gen_rtx_SET (VOIDmode, retval, call);
18891   if (pop)
18892     {
18893       pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
18894       pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
18895       call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
18896     }
18897   if (TARGET_64BIT
18898       && ix86_cfun_abi () == MS_ABI
18899       && (!callarg2 || INTVAL (callarg2) != -2))
18900     {
18901       /* We need to represent that SI and DI registers are clobbered
18902          by SYSV calls.  */
18903       static int clobbered_registers[] = {
18904         XMM6_REG, XMM7_REG, XMM8_REG,
18905         XMM9_REG, XMM10_REG, XMM11_REG,
18906         XMM12_REG, XMM13_REG, XMM14_REG,
18907         XMM15_REG, SI_REG, DI_REG
18908       };
18909       unsigned int i;
18910       rtx vec[ARRAY_SIZE (clobbered_registers) + 2];
18911       rtx unspec = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, const0_rtx),
18912                                    UNSPEC_MS_TO_SYSV_CALL);
18913 
18914       vec[0] = call;
18915       vec[1] = unspec;
18916       for (i = 0; i < ARRAY_SIZE (clobbered_registers); i++)
18917         vec[i + 2] = gen_rtx_CLOBBER (SSE_REGNO_P (clobbered_registers[i])
18918                                       ? TImode : DImode,
18919                                       gen_rtx_REG
18920                                         (SSE_REGNO_P (clobbered_registers[i])
18921                                                       ? TImode : DImode,
18922                                          clobbered_registers[i]));
18923 
18924       call = gen_rtx_PARALLEL (VOIDmode,
18925                                gen_rtvec_v (ARRAY_SIZE (clobbered_registers)
18926                                + 2, vec));
18927     }
18928 
18929   call = emit_call_insn (call);
18930   if (use)
18931     CALL_INSN_FUNCTION_USAGE (call) = use;
18932 }
18933 
18934 
18935 /* Clear stack slot assignments remembered from previous functions.
18936    This is called from INIT_EXPANDERS once before RTL is emitted for each
18937    function.  */
18938 
18939 static struct machine_function *
18940 ix86_init_machine_status (void)
18941 {
18942   struct machine_function *f;
18943 
18944   f = GGC_CNEW (struct machine_function);
18945   f->use_fast_prologue_epilogue_nregs = -1;
18946   f->tls_descriptor_call_expanded_p = 0;
18947   f->call_abi = DEFAULT_ABI;
18948 
18949   return f;
18950 }
18951 
18952 /* Return a MEM corresponding to a stack slot with mode MODE.
18953    Allocate a new slot if necessary.
18954 
18955    The RTL for a function can have several slots available: N is
18956    which slot to use.  */
18957 
18958 rtx
18959 assign_386_stack_local (enum machine_mode mode, enum ix86_stack_slot n)
18960 {
18961   struct stack_local_entry *s;
18962 
18963   gcc_assert (n < MAX_386_STACK_LOCALS);
18964 
18965   /* Virtual slot is valid only before vregs are instantiated.  */
18966   gcc_assert ((n == SLOT_VIRTUAL) == !virtuals_instantiated);
18967 
18968   for (s = ix86_stack_locals; s; s = s->next)
18969     if (s->mode == mode && s->n == n)
18970       return copy_rtx (s->rtl);
18971 
18972   s = (struct stack_local_entry *)
18973     ggc_alloc (sizeof (struct stack_local_entry));
18974   s->n = n;
18975   s->mode = mode;
18976   s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
18977 
18978   s->next = ix86_stack_locals;
18979   ix86_stack_locals = s;
18980   return s->rtl;
18981 }
18982 
18983 /* Construct the SYMBOL_REF for the tls_get_addr function.  */
18984 
18985 static GTY(()) rtx ix86_tls_symbol;
18986 rtx
18987 ix86_tls_get_addr (void)
18988 {
18989 
18990   if (!ix86_tls_symbol)
18991     {
18992       ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode,
18993                                             (TARGET_ANY_GNU_TLS
18994                                              && !TARGET_64BIT)
18995                                             ? "___tls_get_addr"
18996                                             : "__tls_get_addr");
18997     }
18998 
18999   return ix86_tls_symbol;
19000 }
19001 
19002 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol.  */
19003 
19004 static GTY(()) rtx ix86_tls_module_base_symbol;
19005 rtx
19006 ix86_tls_module_base (void)
19007 {
19008 
19009   if (!ix86_tls_module_base_symbol)
19010     {
19011       ix86_tls_module_base_symbol = gen_rtx_SYMBOL_REF (Pmode,
19012                                                         "_TLS_MODULE_BASE_");
19013       SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
19014         |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
19015     }
19016 
19017   return ix86_tls_module_base_symbol;
19018 }
19019 
19020 /* Calculate the length of the memory address in the instruction
19021    encoding.  Does not include the one-byte modrm, opcode, or prefix.  */
19022 
19023 int
19024 memory_address_length (rtx addr)
19025 {
19026   struct ix86_address parts;
19027   rtx base, index, disp;
19028   int len;
19029   int ok;
19030 
19031   if (GET_CODE (addr) == PRE_DEC
19032       || GET_CODE (addr) == POST_INC
19033       || GET_CODE (addr) == PRE_MODIFY
19034       || GET_CODE (addr) == POST_MODIFY)
19035     return 0;
19036 
19037   ok = ix86_decompose_address (addr, &parts);
19038   gcc_assert (ok);
19039 
19040   if (parts.base && GET_CODE (parts.base) == SUBREG)
19041     parts.base = SUBREG_REG (parts.base);
19042   if (parts.index && GET_CODE (parts.index) == SUBREG)
19043     parts.index = SUBREG_REG (parts.index);
19044 
19045   base = parts.base;
19046   index = parts.index;
19047   disp = parts.disp;
19048   len = 0;
19049 
19050   /* Rule of thumb:
19051        - esp as the base always wants an index,
19052        - ebp as the base always wants a displacement.  */
19053 
19054   /* Register Indirect.  */
19055   if (base && !index && !disp)
19056     {
19057       /* esp (for its index) and ebp (for its displacement) need
19058          the two-byte modrm form.  */
19059       if (addr == stack_pointer_rtx
19060           || addr == arg_pointer_rtx
19061           || addr == frame_pointer_rtx
19062           || addr == hard_frame_pointer_rtx)
19063         len = 1;
19064     }
19065 
19066   /* Direct Addressing.  */
19067   else if (disp && !base && !index)
19068     len = 4;
19069 
19070   else
19071     {
19072       /* Find the length of the displacement constant.  */
19073       if (disp)
19074         {
19075           if (base && satisfies_constraint_K (disp))
19076             len = 1;
19077           else
19078             len = 4;
19079         }
19080       /* ebp always wants a displacement.  */
19081       else if (base == hard_frame_pointer_rtx)
19082         len = 1;
19083 
19084       /* An index requires the two-byte modrm form....  */
19085       if (index
19086           /* ...like esp, which always wants an index.  */
19087           || base == stack_pointer_rtx
19088           || base == arg_pointer_rtx
19089           || base == frame_pointer_rtx)
19090         len += 1;
19091     }
19092 
19093   return len;
19094 }
19095 
19096 /* Compute default value for "length_immediate" attribute.  When SHORTFORM
19097    is set, expect that insn have 8bit immediate alternative.  */
19098 int
19099 ix86_attr_length_immediate_default (rtx insn, int shortform)
19100 {
19101   int len = 0;
19102   int i;
19103   extract_insn_cached (insn);
19104   for (i = recog_data.n_operands - 1; i >= 0; --i)
19105     if (CONSTANT_P (recog_data.operand[i]))
19106       {
19107         gcc_assert (!len);
19108         if (shortform && satisfies_constraint_K (recog_data.operand[i]))
19109           len = 1;
19110         else
19111           {
19112             switch (get_attr_mode (insn))
19113               {
19114                 case MODE_QI:
19115                   len+=1;
19116                   break;
19117                 case MODE_HI:
19118                   len+=2;
19119                   break;
19120                 case MODE_SI:
19121                   len+=4;
19122                   break;
19123                 /* Immediates for DImode instructions are encoded as 32bit sign extended values.  */
19124                 case MODE_DI:
19125                   len+=4;
19126                   break;
19127                 default:
19128                   fatal_insn ("unknown insn mode", insn);
19129               }
19130           }
19131       }
19132   return len;
19133 }
19134 /* Compute default value for "length_address" attribute.  */
19135 int
19136 ix86_attr_length_address_default (rtx insn)
19137 {
19138   int i;
19139 
19140   if (get_attr_type (insn) == TYPE_LEA)
19141     {
19142       rtx set = PATTERN (insn);
19143 
19144       if (GET_CODE (set) == PARALLEL)
19145         set = XVECEXP (set, 0, 0);
19146 
19147       gcc_assert (GET_CODE (set) == SET);
19148 
19149       return memory_address_length (SET_SRC (set));
19150     }
19151 
19152   extract_insn_cached (insn);
19153   for (i = recog_data.n_operands - 1; i >= 0; --i)
19154     if (MEM_P (recog_data.operand[i]))
19155       {
19156         return memory_address_length (XEXP (recog_data.operand[i], 0));
19157         break;
19158       }
19159   return 0;
19160 }
19161 
19162 /* Compute default value for "length_vex" attribute. It includes
19163    2 or 3 byte VEX prefix and 1 opcode byte.  */
19164 
19165 int
19166 ix86_attr_length_vex_default (rtx insn, int has_0f_opcode,
19167                               int has_vex_w)
19168 {
19169   int i;
19170 
19171   /* Only 0f opcode can use 2 byte VEX prefix and  VEX W bit uses 3
19172      byte VEX prefix.  */
19173   if (!has_0f_opcode || has_vex_w)
19174     return 3 + 1;
19175 
19176  /* We can always use 2 byte VEX prefix in 32bit.  */
19177   if (!TARGET_64BIT)
19178     return 2 + 1;
19179 
19180   extract_insn_cached (insn);
19181 
19182   for (i = recog_data.n_operands - 1; i >= 0; --i)
19183     if (REG_P (recog_data.operand[i]))
19184       {
19185         /* REX.W bit uses 3 byte VEX prefix.  */
19186         if (GET_MODE (recog_data.operand[i]) == DImode)
19187           return 3 + 1;
19188       }
19189     else
19190       {
19191         /* REX.X or REX.B bits use 3 byte VEX prefix.  */
19192         if (MEM_P (recog_data.operand[i])
19193             && x86_extended_reg_mentioned_p (recog_data.operand[i]))
19194           return 3 + 1;
19195       }
19196 
19197   return 2 + 1;
19198 }
19199 
19200 /* Return the maximum number of instructions a cpu can issue.  */
19201 
19202 static int
19203 ix86_issue_rate (void)
19204 {
19205   switch (ix86_tune)
19206     {
19207     case PROCESSOR_PENTIUM:
19208     case PROCESSOR_K6:
19209       return 2;
19210 
19211     case PROCESSOR_PENTIUMPRO:
19212     case PROCESSOR_PENTIUM4:
19213     case PROCESSOR_ATHLON:
19214     case PROCESSOR_K8:
19215     case PROCESSOR_AMDFAM10:
19216     case PROCESSOR_NOCONA:
19217     case PROCESSOR_GENERIC32:
19218     case PROCESSOR_GENERIC64:
19219       return 3;
19220 
19221     case PROCESSOR_CORE2:
19222       return 4;
19223 
19224     default:
19225       return 1;
19226     }
19227 }
19228 
19229 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
19230    by DEP_INSN and nothing set by DEP_INSN.  */
19231 
19232 static int
19233 ix86_flags_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
19234 {
19235   rtx set, set2;
19236 
19237   /* Simplify the test for uninteresting insns.  */
19238   if (insn_type != TYPE_SETCC
19239       && insn_type != TYPE_ICMOV
19240       && insn_type != TYPE_FCMOV
19241       && insn_type != TYPE_IBR)
19242     return 0;
19243 
19244   if ((set = single_set (dep_insn)) != 0)
19245     {
19246       set = SET_DEST (set);
19247       set2 = NULL_RTX;
19248     }
19249   else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
19250            && XVECLEN (PATTERN (dep_insn), 0) == 2
19251            && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
19252            && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
19253     {
19254       set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
19255       set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
19256     }
19257   else
19258     return 0;
19259 
19260   if (!REG_P (set) || REGNO (set) != FLAGS_REG)
19261     return 0;
19262 
19263   /* This test is true if the dependent insn reads the flags but
19264      not any other potentially set register.  */
19265   if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
19266     return 0;
19267 
19268   if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
19269     return 0;
19270 
19271   return 1;
19272 }
19273 
19274 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
19275    address with operands set by DEP_INSN.  */
19276 
19277 static int
19278 ix86_agi_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
19279 {
19280   rtx addr;
19281 
19282   if (insn_type == TYPE_LEA
19283       && TARGET_PENTIUM)
19284     {
19285       addr = PATTERN (insn);
19286 
19287       if (GET_CODE (addr) == PARALLEL)
19288         addr = XVECEXP (addr, 0, 0);
19289 
19290       gcc_assert (GET_CODE (addr) == SET);
19291 
19292       addr = SET_SRC (addr);
19293     }
19294   else
19295     {
19296       int i;
19297       extract_insn_cached (insn);
19298       for (i = recog_data.n_operands - 1; i >= 0; --i)
19299         if (MEM_P (recog_data.operand[i]))
19300           {
19301             addr = XEXP (recog_data.operand[i], 0);
19302             goto found;
19303           }
19304       return 0;
19305     found:;
19306     }
19307 
19308   return modified_in_p (addr, dep_insn);
19309 }
19310 
19311 static int
19312 ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
19313 {
19314   enum attr_type insn_type, dep_insn_type;
19315   enum attr_memory memory;
19316   rtx set, set2;
19317   int dep_insn_code_number;
19318 
19319   /* Anti and output dependencies have zero cost on all CPUs.  */
19320   if (REG_NOTE_KIND (link) != 0)
19321     return 0;
19322 
19323   dep_insn_code_number = recog_memoized (dep_insn);
19324 
19325   /* If we can't recognize the insns, we can't really do anything.  */
19326   if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
19327     return cost;
19328 
19329   insn_type = get_attr_type (insn);
19330   dep_insn_type = get_attr_type (dep_insn);
19331 
19332   switch (ix86_tune)
19333     {
19334     case PROCESSOR_PENTIUM:
19335       /* Address Generation Interlock adds a cycle of latency.  */
19336       if (ix86_agi_dependent (insn, dep_insn, insn_type))
19337         cost += 1;
19338 
19339       /* ??? Compares pair with jump/setcc.  */
19340       if (ix86_flags_dependent (insn, dep_insn, insn_type))
19341         cost = 0;
19342 
19343       /* Floating point stores require value to be ready one cycle earlier.  */
19344       if (insn_type == TYPE_FMOV
19345           && get_attr_memory (insn) == MEMORY_STORE
19346           && !ix86_agi_dependent (insn, dep_insn, insn_type))
19347         cost += 1;
19348       break;
19349 
19350     case PROCESSOR_PENTIUMPRO:
19351       memory = get_attr_memory (insn);
19352 
19353       /* INT->FP conversion is expensive.  */
19354       if (get_attr_fp_int_src (dep_insn))
19355         cost += 5;
19356 
19357       /* There is one cycle extra latency between an FP op and a store.  */
19358       if (insn_type == TYPE_FMOV
19359           && (set = single_set (dep_insn)) != NULL_RTX
19360           && (set2 = single_set (insn)) != NULL_RTX
19361           && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
19362           && MEM_P (SET_DEST (set2)))
19363         cost += 1;
19364 
19365       /* Show ability of reorder buffer to hide latency of load by executing
19366          in parallel with previous instruction in case
19367          previous instruction is not needed to compute the address.  */
19368       if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
19369           && !ix86_agi_dependent (insn, dep_insn, insn_type))
19370         {
19371           /* Claim moves to take one cycle, as core can issue one load
19372              at time and the next load can start cycle later.  */
19373           if (dep_insn_type == TYPE_IMOV
19374               || dep_insn_type == TYPE_FMOV)
19375             cost = 1;
19376           else if (cost > 1)
19377             cost--;
19378         }
19379       break;
19380 
19381     case PROCESSOR_K6:
19382       memory = get_attr_memory (insn);
19383 
19384       /* The esp dependency is resolved before the instruction is really
19385          finished.  */
19386       if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
19387           && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
19388         return 1;
19389 
19390       /* INT->FP conversion is expensive.  */
19391       if (get_attr_fp_int_src (dep_insn))
19392         cost += 5;
19393 
19394       /* Show ability of reorder buffer to hide latency of load by executing
19395          in parallel with previous instruction in case
19396          previous instruction is not needed to compute the address.  */
19397       if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
19398           && !ix86_agi_dependent (insn, dep_insn, insn_type))
19399         {
19400           /* Claim moves to take one cycle, as core can issue one load
19401              at time and the next load can start cycle later.  */
19402           if (dep_insn_type == TYPE_IMOV
19403               || dep_insn_type == TYPE_FMOV)
19404             cost = 1;
19405           else if (cost > 2)
19406             cost -= 2;
19407           else
19408             cost = 1;
19409         }
19410       break;
19411 
19412     case PROCESSOR_ATHLON:
19413     case PROCESSOR_K8:
19414     case PROCESSOR_AMDFAM10:
19415     case PROCESSOR_GENERIC32:
19416     case PROCESSOR_GENERIC64:
19417       memory = get_attr_memory (insn);
19418 
19419       /* Show ability of reorder buffer to hide latency of load by executing
19420          in parallel with previous instruction in case
19421          previous instruction is not needed to compute the address.  */
19422       if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
19423           && !ix86_agi_dependent (insn, dep_insn, insn_type))
19424         {
19425           enum attr_unit unit = get_attr_unit (insn);
19426           int loadcost = 3;
19427 
19428           /* Because of the difference between the length of integer and
19429              floating unit pipeline preparation stages, the memory operands
19430              for floating point are cheaper.
19431 
19432              ??? For Athlon it the difference is most probably 2.  */
19433           if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
19434             loadcost = 3;
19435           else
19436             loadcost = TARGET_ATHLON ? 2 : 0;
19437 
19438           if (cost >= loadcost)
19439             cost -= loadcost;
19440           else
19441             cost = 0;
19442         }
19443 
19444     default:
19445       break;
19446     }
19447 
19448   return cost;
19449 }
19450 
19451 /* How many alternative schedules to try.  This should be as wide as the
19452    scheduling freedom in the DFA, but no wider.  Making this value too
19453    large results extra work for the scheduler.  */
19454 
19455 static int
19456 ia32_multipass_dfa_lookahead (void)
19457 {
19458   switch (ix86_tune)
19459     {
19460     case PROCESSOR_PENTIUM:
19461       return 2;
19462 
19463     case PROCESSOR_PENTIUMPRO:
19464     case PROCESSOR_K6:
19465       return 1;
19466 
19467     default:
19468       return 0;
19469     }
19470 }
19471 
19472 
19473 /* Compute the alignment given to a constant that is being placed in memory.
19474    EXP is the constant and ALIGN is the alignment that the object would
19475    ordinarily have.
19476    The value of this function is used instead of that alignment to align
19477    the object.  */
19478 
19479 int
19480 ix86_constant_alignment (tree exp, int align)
19481 {
19482   if (TREE_CODE (exp) == REAL_CST || TREE_CODE (exp) == VECTOR_CST
19483       || TREE_CODE (exp) == INTEGER_CST)
19484     {
19485       if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
19486         return 64;
19487       else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
19488         return 128;
19489     }
19490   else if (!optimize_size && TREE_CODE (exp) == STRING_CST
19491            && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
19492     return BITS_PER_WORD;
19493 
19494   return align;
19495 }
19496 
19497 /* Compute the alignment for a static variable.
19498    TYPE is the data type, and ALIGN is the alignment that
19499    the object would ordinarily have.  The value of this function is used
19500    instead of that alignment to align the object.  */
19501 
19502 int
19503 ix86_data_alignment (tree type, int align)
19504 {
19505   int max_align = optimize_size ? BITS_PER_WORD : MIN (256, MAX_OFILE_ALIGNMENT);
19506 
19507   if (AGGREGATE_TYPE_P (type)
19508       && TYPE_SIZE (type)
19509       && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
19510       && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= (unsigned) max_align
19511           || TREE_INT_CST_HIGH (TYPE_SIZE (type)))
19512       && align < max_align)
19513     align = max_align;
19514 
19515   /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
19516      to 16byte boundary.  */
19517   if (TARGET_64BIT)
19518     {
19519       if (AGGREGATE_TYPE_P (type)
19520            && TYPE_SIZE (type)
19521            && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
19522            && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
19523                || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
19524         return 128;
19525     }
19526 
19527   if (TREE_CODE (type) == ARRAY_TYPE)
19528     {
19529       if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
19530         return 64;
19531       if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
19532         return 128;
19533     }
19534   else if (TREE_CODE (type) == COMPLEX_TYPE)
19535     {
19536 
19537       if (TYPE_MODE (type) == DCmode && align < 64)
19538         return 64;
19539       if ((TYPE_MODE (type) == XCmode
19540            || TYPE_MODE (type) == TCmode) && align < 128)
19541         return 128;
19542     }
19543   else if ((TREE_CODE (type) == RECORD_TYPE
19544             || TREE_CODE (type) == UNION_TYPE
19545             || TREE_CODE (type) == QUAL_UNION_TYPE)
19546            && TYPE_FIELDS (type))
19547     {
19548       if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
19549         return 64;
19550       if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
19551         return 128;
19552     }
19553   else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
19554            || TREE_CODE (type) == INTEGER_TYPE)
19555     {
19556       if (TYPE_MODE (type) == DFmode && align < 64)
19557         return 64;
19558       if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
19559         return 128;
19560     }
19561 
19562   return align;
19563 }
19564 
19565 /* Compute the alignment for a local variable or a stack slot.  EXP is
19566    the data type or decl itself, MODE is the widest mode available and
19567    ALIGN is the alignment that the object would ordinarily have.  The
19568    value of this macro is used instead of that alignment to align the
19569    object.  */
19570 
19571 unsigned int
19572 ix86_local_alignment (tree exp, enum machine_mode mode,
19573                       unsigned int align)
19574 {
19575   tree type, decl;
19576 
19577   if (exp && DECL_P (exp))
19578     {
19579       type = TREE_TYPE (exp);
19580       decl = exp;
19581     }
19582   else
19583     {
19584       type = exp;
19585       decl = NULL;
19586     }
19587 
19588   /* Don't do dynamic stack realignment for long long objects with
19589      -mpreferred-stack-boundary=2.  */
19590   if (!TARGET_64BIT
19591       && align == 64
19592       && ix86_preferred_stack_boundary < 64
19593       && (mode == DImode || (type && TYPE_MODE (type) == DImode))
19594       && (!type || !TYPE_USER_ALIGN (type))
19595       && (!decl || !DECL_USER_ALIGN (decl)))
19596     align = 32;
19597 
19598   /* If TYPE is NULL, we are allocating a stack slot for caller-save
19599      register in MODE.  We will return the largest alignment of XF
19600      and DF.  */
19601   if (!type)
19602     {
19603       if (mode == XFmode && align < GET_MODE_ALIGNMENT (DFmode))
19604         align = GET_MODE_ALIGNMENT (DFmode);
19605       return align;
19606     }
19607 
19608   /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
19609      to 16byte boundary.  */
19610   if (TARGET_64BIT)
19611     {
19612       if (AGGREGATE_TYPE_P (type)
19613            && TYPE_SIZE (type)
19614            && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
19615            && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
19616                || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
19617         return 128;
19618     }
19619   if (TREE_CODE (type) == ARRAY_TYPE)
19620     {
19621       if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
19622         return 64;
19623       if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
19624         return 128;
19625     }
19626   else if (TREE_CODE (type) == COMPLEX_TYPE)
19627     {
19628       if (TYPE_MODE (type) == DCmode && align < 64)
19629         return 64;
19630       if ((TYPE_MODE (type) == XCmode
19631            || TYPE_MODE (type) == TCmode) && align < 128)
19632         return 128;
19633     }
19634   else if ((TREE_CODE (type) == RECORD_TYPE
19635             || TREE_CODE (type) == UNION_TYPE
19636             || TREE_CODE (type) == QUAL_UNION_TYPE)
19637            && TYPE_FIELDS (type))
19638     {
19639       if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
19640         return 64;
19641       if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
19642         return 128;
19643     }
19644   else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
19645            || TREE_CODE (type) == INTEGER_TYPE)
19646     {
19647 
19648       if (TYPE_MODE (type) == DFmode && align < 64)
19649         return 64;
19650       if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
19651         return 128;
19652     }
19653   return align;
19654 }
19655 
19656 /* Compute the minimum required alignment for dynamic stack realignment
19657    purposes for a local variable, parameter or a stack slot.  EXP is
19658    the data type or decl itself, MODE is its mode and ALIGN is the
19659    alignment that the object would ordinarily have.  */
19660 
19661 unsigned int
19662 ix86_minimum_alignment (tree exp, enum machine_mode mode,
19663                         unsigned int align)
19664 {
19665   tree type, decl;
19666 
19667   if (TARGET_64BIT || align != 64 || ix86_preferred_stack_boundary >= 64)
19668     return align;
19669 
19670   if (exp && DECL_P (exp))
19671     {
19672       type = TREE_TYPE (exp);
19673       decl = exp;
19674     }
19675   else
19676     {
19677       type = exp;
19678       decl = NULL;
19679     }
19680 
19681   /* Don't do dynamic stack realignment for long long objects with
19682      -mpreferred-stack-boundary=2.  */
19683   if ((mode == DImode || (type && TYPE_MODE (type) == DImode))
19684       && (!type || !TYPE_USER_ALIGN (type))
19685       && (!decl || !DECL_USER_ALIGN (decl)))
19686     return 32;
19687 
19688   return align;
19689 }
19690 
19691 /* Emit RTL insns to initialize the variable parts of a trampoline.
19692    FNADDR is an RTX for the address of the function's pure code.
19693    CXT is an RTX for the static chain value for the function.  */
19694 void
19695 x86_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
19696 {
19697   if (!TARGET_64BIT)
19698     {
19699       /* Compute offset from the end of the jmp to the target function.  */
19700       rtx disp = expand_binop (SImode, sub_optab, fnaddr,
19701                                plus_constant (tramp, 10),
19702                                NULL_RTX, 1, OPTAB_DIRECT);
19703       emit_move_insn (gen_rtx_MEM (QImode, tramp),
19704                       gen_int_mode (0xb9, QImode));
19705       emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
19706       emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
19707                       gen_int_mode (0xe9, QImode));
19708       emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
19709     }
19710   else
19711     {
19712       int offset = 0;
19713       /* Try to load address using shorter movl instead of movabs.
19714          We may want to support movq for kernel mode, but kernel does not use
19715          trampolines at the moment.  */
19716       if (x86_64_zext_immediate_operand (fnaddr, VOIDmode))
19717         {
19718           fnaddr = copy_to_mode_reg (DImode, fnaddr);
19719           emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
19720                           gen_int_mode (0xbb41, HImode));
19721           emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
19722                           gen_lowpart (SImode, fnaddr));
19723           offset += 6;
19724         }
19725       else
19726         {
19727           emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
19728                           gen_int_mode (0xbb49, HImode));
19729           emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
19730                           fnaddr);
19731           offset += 10;
19732         }
19733       /* Load static chain using movabs to r10.  */
19734       emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
19735                       gen_int_mode (0xba49, HImode));
19736       emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
19737                       cxt);
19738       offset += 10;
19739       /* Jump to the r11 */
19740       emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
19741                       gen_int_mode (0xff49, HImode));
19742       emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
19743                       gen_int_mode (0xe3, QImode));
19744       offset += 3;
19745       gcc_assert (offset <= TRAMPOLINE_SIZE);
19746     }
19747 
19748 #ifdef ENABLE_EXECUTE_STACK
19749   emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
19750                      LCT_NORMAL, VOIDmode, 1, tramp, Pmode);
19751 #endif
19752 }
19753 
19754 /* Codes for all the SSE/MMX builtins.  */
19755 enum ix86_builtins
19756 {
19757   IX86_BUILTIN_ADDPS,
19758   IX86_BUILTIN_ADDSS,
19759   IX86_BUILTIN_DIVPS,
19760   IX86_BUILTIN_DIVSS,
19761   IX86_BUILTIN_MULPS,
19762   IX86_BUILTIN_MULSS,
19763   IX86_BUILTIN_SUBPS,
19764   IX86_BUILTIN_SUBSS,
19765 
19766   IX86_BUILTIN_CMPEQPS,
19767   IX86_BUILTIN_CMPLTPS,
19768   IX86_BUILTIN_CMPLEPS,
19769   IX86_BUILTIN_CMPGTPS,
19770   IX86_BUILTIN_CMPGEPS,
19771   IX86_BUILTIN_CMPNEQPS,
19772   IX86_BUILTIN_CMPNLTPS,
19773   IX86_BUILTIN_CMPNLEPS,
19774   IX86_BUILTIN_CMPNGTPS,
19775   IX86_BUILTIN_CMPNGEPS,
19776   IX86_BUILTIN_CMPORDPS,
19777   IX86_BUILTIN_CMPUNORDPS,
19778   IX86_BUILTIN_CMPEQSS,
19779   IX86_BUILTIN_CMPLTSS,
19780   IX86_BUILTIN_CMPLESS,
19781   IX86_BUILTIN_CMPNEQSS,
19782   IX86_BUILTIN_CMPNLTSS,
19783   IX86_BUILTIN_CMPNLESS,
19784   IX86_BUILTIN_CMPNGTSS,
19785   IX86_BUILTIN_CMPNGESS,
19786   IX86_BUILTIN_CMPORDSS,
19787   IX86_BUILTIN_CMPUNORDSS,
19788 
19789   IX86_BUILTIN_COMIEQSS,
19790   IX86_BUILTIN_COMILTSS,
19791   IX86_BUILTIN_COMILESS,
19792   IX86_BUILTIN_COMIGTSS,
19793   IX86_BUILTIN_COMIGESS,
19794   IX86_BUILTIN_COMINEQSS,
19795   IX86_BUILTIN_UCOMIEQSS,
19796   IX86_BUILTIN_UCOMILTSS,
19797   IX86_BUILTIN_UCOMILESS,
19798   IX86_BUILTIN_UCOMIGTSS,
19799   IX86_BUILTIN_UCOMIGESS,
19800   IX86_BUILTIN_UCOMINEQSS,
19801 
19802   IX86_BUILTIN_CVTPI2PS,
19803   IX86_BUILTIN_CVTPS2PI,
19804   IX86_BUILTIN_CVTSI2SS,
19805   IX86_BUILTIN_CVTSI642SS,
19806   IX86_BUILTIN_CVTSS2SI,
19807   IX86_BUILTIN_CVTSS2SI64,
19808   IX86_BUILTIN_CVTTPS2PI,
19809   IX86_BUILTIN_CVTTSS2SI,
19810   IX86_BUILTIN_CVTTSS2SI64,
19811 
19812   IX86_BUILTIN_MAXPS,
19813   IX86_BUILTIN_MAXSS,
19814   IX86_BUILTIN_MINPS,
19815   IX86_BUILTIN_MINSS,
19816 
19817   IX86_BUILTIN_LOADUPS,
19818   IX86_BUILTIN_STOREUPS,
19819   IX86_BUILTIN_MOVSS,
19820 
19821   IX86_BUILTIN_MOVHLPS,
19822   IX86_BUILTIN_MOVLHPS,
19823   IX86_BUILTIN_LOADHPS,
19824   IX86_BUILTIN_LOADLPS,
19825   IX86_BUILTIN_STOREHPS,
19826   IX86_BUILTIN_STORELPS,
19827 
19828   IX86_BUILTIN_MASKMOVQ,
19829   IX86_BUILTIN_MOVMSKPS,
19830   IX86_BUILTIN_PMOVMSKB,
19831 
19832   IX86_BUILTIN_MOVNTPS,
19833   IX86_BUILTIN_MOVNTQ,
19834 
19835   IX86_BUILTIN_LOADDQU,
19836   IX86_BUILTIN_STOREDQU,
19837 
19838   IX86_BUILTIN_PACKSSWB,
19839   IX86_BUILTIN_PACKSSDW,
19840   IX86_BUILTIN_PACKUSWB,
19841 
19842   IX86_BUILTIN_PADDB,
19843   IX86_BUILTIN_PADDW,
19844   IX86_BUILTIN_PADDD,
19845   IX86_BUILTIN_PADDQ,
19846   IX86_BUILTIN_PADDSB,
19847   IX86_BUILTIN_PADDSW,
19848   IX86_BUILTIN_PADDUSB,
19849   IX86_BUILTIN_PADDUSW,
19850   IX86_BUILTIN_PSUBB,
19851   IX86_BUILTIN_PSUBW,
19852   IX86_BUILTIN_PSUBD,
19853   IX86_BUILTIN_PSUBQ,
19854   IX86_BUILTIN_PSUBSB,
19855   IX86_BUILTIN_PSUBSW,
19856   IX86_BUILTIN_PSUBUSB,
19857   IX86_BUILTIN_PSUBUSW,
19858 
19859   IX86_BUILTIN_PAND,
19860   IX86_BUILTIN_PANDN,
19861   IX86_BUILTIN_POR,
19862   IX86_BUILTIN_PXOR,
19863 
19864   IX86_BUILTIN_PAVGB,
19865   IX86_BUILTIN_PAVGW,
19866 
19867   IX86_BUILTIN_PCMPEQB,
19868   IX86_BUILTIN_PCMPEQW,
19869   IX86_BUILTIN_PCMPEQD,
19870   IX86_BUILTIN_PCMPGTB,
19871   IX86_BUILTIN_PCMPGTW,
19872   IX86_BUILTIN_PCMPGTD,
19873 
19874   IX86_BUILTIN_PMADDWD,
19875 
19876   IX86_BUILTIN_PMAXSW,
19877   IX86_BUILTIN_PMAXUB,
19878   IX86_BUILTIN_PMINSW,
19879   IX86_BUILTIN_PMINUB,
19880 
19881   IX86_BUILTIN_PMULHUW,
19882   IX86_BUILTIN_PMULHW,
19883   IX86_BUILTIN_PMULLW,
19884 
19885   IX86_BUILTIN_PSADBW,
19886   IX86_BUILTIN_PSHUFW,
19887 
19888   IX86_BUILTIN_PSLLW,
19889   IX86_BUILTIN_PSLLD,
19890   IX86_BUILTIN_PSLLQ,
19891   IX86_BUILTIN_PSRAW,
19892   IX86_BUILTIN_PSRAD,
19893   IX86_BUILTIN_PSRLW,
19894   IX86_BUILTIN_PSRLD,
19895   IX86_BUILTIN_PSRLQ,
19896   IX86_BUILTIN_PSLLWI,
19897   IX86_BUILTIN_PSLLDI,
19898   IX86_BUILTIN_PSLLQI,
19899   IX86_BUILTIN_PSRAWI,
19900   IX86_BUILTIN_PSRADI,
19901   IX86_BUILTIN_PSRLWI,
19902   IX86_BUILTIN_PSRLDI,
19903   IX86_BUILTIN_PSRLQI,
19904 
19905   IX86_BUILTIN_PUNPCKHBW,
19906   IX86_BUILTIN_PUNPCKHWD,
19907   IX86_BUILTIN_PUNPCKHDQ,
19908   IX86_BUILTIN_PUNPCKLBW,
19909   IX86_BUILTIN_PUNPCKLWD,
19910   IX86_BUILTIN_PUNPCKLDQ,
19911 
19912   IX86_BUILTIN_SHUFPS,
19913 
19914   IX86_BUILTIN_RCPPS,
19915   IX86_BUILTIN_RCPSS,
19916   IX86_BUILTIN_RSQRTPS,
19917   IX86_BUILTIN_RSQRTPS_NR,
19918   IX86_BUILTIN_RSQRTSS,
19919   IX86_BUILTIN_RSQRTF,
19920   IX86_BUILTIN_SQRTPS,
19921   IX86_BUILTIN_SQRTPS_NR,
19922   IX86_BUILTIN_SQRTSS,
19923 
19924   IX86_BUILTIN_UNPCKHPS,
19925   IX86_BUILTIN_UNPCKLPS,
19926 
19927   IX86_BUILTIN_ANDPS,
19928   IX86_BUILTIN_ANDNPS,
19929   IX86_BUILTIN_ORPS,
19930   IX86_BUILTIN_XORPS,
19931 
19932   IX86_BUILTIN_EMMS,
19933   IX86_BUILTIN_LDMXCSR,
19934   IX86_BUILTIN_STMXCSR,
19935   IX86_BUILTIN_SFENCE,
19936 
19937   /* 3DNow! Original */
19938   IX86_BUILTIN_FEMMS,
19939   IX86_BUILTIN_PAVGUSB,
19940   IX86_BUILTIN_PF2ID,
19941   IX86_BUILTIN_PFACC,
19942   IX86_BUILTIN_PFADD,
19943   IX86_BUILTIN_PFCMPEQ,
19944   IX86_BUILTIN_PFCMPGE,
19945   IX86_BUILTIN_PFCMPGT,
19946   IX86_BUILTIN_PFMAX,
19947   IX86_BUILTIN_PFMIN,
19948   IX86_BUILTIN_PFMUL,
19949   IX86_BUILTIN_PFRCP,
19950   IX86_BUILTIN_PFRCPIT1,
19951   IX86_BUILTIN_PFRCPIT2,
19952   IX86_BUILTIN_PFRSQIT1,
19953   IX86_BUILTIN_PFRSQRT,
19954   IX86_BUILTIN_PFSUB,
19955   IX86_BUILTIN_PFSUBR,
19956   IX86_BUILTIN_PI2FD,
19957   IX86_BUILTIN_PMULHRW,
19958 
19959   /* 3DNow! Athlon Extensions */
19960   IX86_BUILTIN_PF2IW,
19961   IX86_BUILTIN_PFNACC,
19962   IX86_BUILTIN_PFPNACC,
19963   IX86_BUILTIN_PI2FW,
19964   IX86_BUILTIN_PSWAPDSI,
19965   IX86_BUILTIN_PSWAPDSF,
19966 
19967   /* SSE2 */
19968   IX86_BUILTIN_ADDPD,
19969   IX86_BUILTIN_ADDSD,
19970   IX86_BUILTIN_DIVPD,
19971   IX86_BUILTIN_DIVSD,
19972   IX86_BUILTIN_MULPD,
19973   IX86_BUILTIN_MULSD,
19974   IX86_BUILTIN_SUBPD,
19975   IX86_BUILTIN_SUBSD,
19976 
19977   IX86_BUILTIN_CMPEQPD,
19978   IX86_BUILTIN_CMPLTPD,
19979   IX86_BUILTIN_CMPLEPD,
19980   IX86_BUILTIN_CMPGTPD,
19981   IX86_BUILTIN_CMPGEPD,
19982   IX86_BUILTIN_CMPNEQPD,
19983   IX86_BUILTIN_CMPNLTPD,
19984   IX86_BUILTIN_CMPNLEPD,
19985   IX86_BUILTIN_CMPNGTPD,
19986   IX86_BUILTIN_CMPNGEPD,
19987   IX86_BUILTIN_CMPORDPD,
19988   IX86_BUILTIN_CMPUNORDPD,
19989   IX86_BUILTIN_CMPEQSD,
19990   IX86_BUILTIN_CMPLTSD,
19991   IX86_BUILTIN_CMPLESD,
19992   IX86_BUILTIN_CMPNEQSD,
19993   IX86_BUILTIN_CMPNLTSD,
19994   IX86_BUILTIN_CMPNLESD,
19995   IX86_BUILTIN_CMPORDSD,
19996   IX86_BUILTIN_CMPUNORDSD,
19997 
19998   IX86_BUILTIN_COMIEQSD,
19999   IX86_BUILTIN_COMILTSD,
20000   IX86_BUILTIN_COMILESD,
20001   IX86_BUILTIN_COMIGTSD,
20002   IX86_BUILTIN_COMIGESD,
20003   IX86_BUILTIN_COMINEQSD,
20004   IX86_BUILTIN_UCOMIEQSD,
20005   IX86_BUILTIN_UCOMILTSD,
20006   IX86_BUILTIN_UCOMILESD,
20007   IX86_BUILTIN_UCOMIGTSD,
20008   IX86_BUILTIN_UCOMIGESD,
20009   IX86_BUILTIN_UCOMINEQSD,
20010 
20011   IX86_BUILTIN_MAXPD,
20012   IX86_BUILTIN_MAXSD,
20013   IX86_BUILTIN_MINPD,
20014   IX86_BUILTIN_MINSD,
20015 
20016   IX86_BUILTIN_ANDPD,
20017   IX86_BUILTIN_ANDNPD,
20018   IX86_BUILTIN_ORPD,
20019   IX86_BUILTIN_XORPD,
20020 
20021   IX86_BUILTIN_SQRTPD,
20022   IX86_BUILTIN_SQRTSD,
20023 
20024   IX86_BUILTIN_UNPCKHPD,
20025   IX86_BUILTIN_UNPCKLPD,
20026 
20027   IX86_BUILTIN_SHUFPD,
20028 
20029   IX86_BUILTIN_LOADUPD,
20030   IX86_BUILTIN_STOREUPD,
20031   IX86_BUILTIN_MOVSD,
20032 
20033   IX86_BUILTIN_LOADHPD,
20034   IX86_BUILTIN_LOADLPD,
20035 
20036   IX86_BUILTIN_CVTDQ2PD,
20037   IX86_BUILTIN_CVTDQ2PS,
20038 
20039   IX86_BUILTIN_CVTPD2DQ,
20040   IX86_BUILTIN_CVTPD2PI,
20041   IX86_BUILTIN_CVTPD2PS,
20042   IX86_BUILTIN_CVTTPD2DQ,
20043   IX86_BUILTIN_CVTTPD2PI,
20044 
20045   IX86_BUILTIN_CVTPI2PD,
20046   IX86_BUILTIN_CVTSI2SD,
20047   IX86_BUILTIN_CVTSI642SD,
20048 
20049   IX86_BUILTIN_CVTSD2SI,
20050   IX86_BUILTIN_CVTSD2SI64,
20051   IX86_BUILTIN_CVTSD2SS,
20052   IX86_BUILTIN_CVTSS2SD,
20053   IX86_BUILTIN_CVTTSD2SI,
20054   IX86_BUILTIN_CVTTSD2SI64,
20055 
20056   IX86_BUILTIN_CVTPS2DQ,
20057   IX86_BUILTIN_CVTPS2PD,
20058   IX86_BUILTIN_CVTTPS2DQ,
20059 
20060   IX86_BUILTIN_MOVNTI,
20061   IX86_BUILTIN_MOVNTPD,
20062   IX86_BUILTIN_MOVNTDQ,
20063 
20064   IX86_BUILTIN_MOVQ128,
20065 
20066   /* SSE2 MMX */
20067   IX86_BUILTIN_MASKMOVDQU,
20068   IX86_BUILTIN_MOVMSKPD,
20069   IX86_BUILTIN_PMOVMSKB128,
20070 
20071   IX86_BUILTIN_PACKSSWB128,
20072   IX86_BUILTIN_PACKSSDW128,
20073   IX86_BUILTIN_PACKUSWB128,
20074 
20075   IX86_BUILTIN_PADDB128,
20076   IX86_BUILTIN_PADDW128,
20077   IX86_BUILTIN_PADDD128,
20078   IX86_BUILTIN_PADDQ128,
20079   IX86_BUILTIN_PADDSB128,
20080   IX86_BUILTIN_PADDSW128,
20081   IX86_BUILTIN_PADDUSB128,
20082   IX86_BUILTIN_PADDUSW128,
20083   IX86_BUILTIN_PSUBB128,
20084   IX86_BUILTIN_PSUBW128,
20085   IX86_BUILTIN_PSUBD128,
20086   IX86_BUILTIN_PSUBQ128,
20087   IX86_BUILTIN_PSUBSB128,
20088   IX86_BUILTIN_PSUBSW128,
20089   IX86_BUILTIN_PSUBUSB128,
20090   IX86_BUILTIN_PSUBUSW128,
20091 
20092   IX86_BUILTIN_PAND128,
20093   IX86_BUILTIN_PANDN128,
20094   IX86_BUILTIN_POR128,
20095   IX86_BUILTIN_PXOR128,
20096 
20097   IX86_BUILTIN_PAVGB128,
20098   IX86_BUILTIN_PAVGW128,
20099 
20100   IX86_BUILTIN_PCMPEQB128,
20101   IX86_BUILTIN_PCMPEQW128,
20102   IX86_BUILTIN_PCMPEQD128,
20103   IX86_BUILTIN_PCMPGTB128,
20104   IX86_BUILTIN_PCMPGTW128,
20105   IX86_BUILTIN_PCMPGTD128,
20106 
20107   IX86_BUILTIN_PMADDWD128,
20108 
20109   IX86_BUILTIN_PMAXSW128,
20110   IX86_BUILTIN_PMAXUB128,
20111   IX86_BUILTIN_PMINSW128,
20112   IX86_BUILTIN_PMINUB128,
20113 
20114   IX86_BUILTIN_PMULUDQ,
20115   IX86_BUILTIN_PMULUDQ128,
20116   IX86_BUILTIN_PMULHUW128,
20117   IX86_BUILTIN_PMULHW128,
20118   IX86_BUILTIN_PMULLW128,
20119 
20120   IX86_BUILTIN_PSADBW128,
20121   IX86_BUILTIN_PSHUFHW,
20122   IX86_BUILTIN_PSHUFLW,
20123   IX86_BUILTIN_PSHUFD,
20124 
20125   IX86_BUILTIN_PSLLDQI128,
20126   IX86_BUILTIN_PSLLWI128,
20127   IX86_BUILTIN_PSLLDI128,
20128   IX86_BUILTIN_PSLLQI128,
20129   IX86_BUILTIN_PSRAWI128,
20130   IX86_BUILTIN_PSRADI128,
20131   IX86_BUILTIN_PSRLDQI128,
20132   IX86_BUILTIN_PSRLWI128,
20133   IX86_BUILTIN_PSRLDI128,
20134   IX86_BUILTIN_PSRLQI128,
20135 
20136   IX86_BUILTIN_PSLLDQ128,
20137   IX86_BUILTIN_PSLLW128,
20138   IX86_BUILTIN_PSLLD128,
20139   IX86_BUILTIN_PSLLQ128,
20140   IX86_BUILTIN_PSRAW128,
20141   IX86_BUILTIN_PSRAD128,
20142   IX86_BUILTIN_PSRLW128,
20143   IX86_BUILTIN_PSRLD128,
20144   IX86_BUILTIN_PSRLQ128,
20145 
20146   IX86_BUILTIN_PUNPCKHBW128,
20147   IX86_BUILTIN_PUNPCKHWD128,
20148   IX86_BUILTIN_PUNPCKHDQ128,
20149   IX86_BUILTIN_PUNPCKHQDQ128,
20150   IX86_BUILTIN_PUNPCKLBW128,
20151   IX86_BUILTIN_PUNPCKLWD128,
20152   IX86_BUILTIN_PUNPCKLDQ128,
20153   IX86_BUILTIN_PUNPCKLQDQ128,
20154 
20155   IX86_BUILTIN_CLFLUSH,
20156   IX86_BUILTIN_MFENCE,
20157   IX86_BUILTIN_LFENCE,
20158 
20159   /* SSE3.  */
20160   IX86_BUILTIN_ADDSUBPS,
20161   IX86_BUILTIN_HADDPS,
20162   IX86_BUILTIN_HSUBPS,
20163   IX86_BUILTIN_MOVSHDUP,
20164   IX86_BUILTIN_MOVSLDUP,
20165   IX86_BUILTIN_ADDSUBPD,
20166   IX86_BUILTIN_HADDPD,
20167   IX86_BUILTIN_HSUBPD,
20168   IX86_BUILTIN_LDDQU,
20169 
20170   IX86_BUILTIN_MONITOR,
20171   IX86_BUILTIN_MWAIT,
20172 
20173   /* SSSE3.  */
20174   IX86_BUILTIN_PHADDW,
20175   IX86_BUILTIN_PHADDD,
20176   IX86_BUILTIN_PHADDSW,
20177   IX86_BUILTIN_PHSUBW,
20178   IX86_BUILTIN_PHSUBD,
20179   IX86_BUILTIN_PHSUBSW,
20180   IX86_BUILTIN_PMADDUBSW,
20181   IX86_BUILTIN_PMULHRSW,
20182   IX86_BUILTIN_PSHUFB,
20183   IX86_BUILTIN_PSIGNB,
20184   IX86_BUILTIN_PSIGNW,
20185   IX86_BUILTIN_PSIGND,
20186   IX86_BUILTIN_PALIGNR,
20187   IX86_BUILTIN_PABSB,
20188   IX86_BUILTIN_PABSW,
20189   IX86_BUILTIN_PABSD,
20190 
20191   IX86_BUILTIN_PHADDW128,
20192   IX86_BUILTIN_PHADDD128,
20193   IX86_BUILTIN_PHADDSW128,
20194   IX86_BUILTIN_PHSUBW128,
20195   IX86_BUILTIN_PHSUBD128,
20196   IX86_BUILTIN_PHSUBSW128,
20197   IX86_BUILTIN_PMADDUBSW128,
20198   IX86_BUILTIN_PMULHRSW128,
20199   IX86_BUILTIN_PSHUFB128,
20200   IX86_BUILTIN_PSIGNB128,
20201   IX86_BUILTIN_PSIGNW128,
20202   IX86_BUILTIN_PSIGND128,
20203   IX86_BUILTIN_PALIGNR128,
20204   IX86_BUILTIN_PABSB128,
20205   IX86_BUILTIN_PABSW128,
20206   IX86_BUILTIN_PABSD128,
20207 
20208   /* AMDFAM10 - SSE4A New Instructions.  */
20209   IX86_BUILTIN_MOVNTSD,
20210   IX86_BUILTIN_MOVNTSS,
20211   IX86_BUILTIN_EXTRQI,
20212   IX86_BUILTIN_EXTRQ,
20213   IX86_BUILTIN_INSERTQI,
20214   IX86_BUILTIN_INSERTQ,
20215 
20216   /* SSE4.1.  */
20217   IX86_BUILTIN_BLENDPD,
20218   IX86_BUILTIN_BLENDPS,
20219   IX86_BUILTIN_BLENDVPD,
20220   IX86_BUILTIN_BLENDVPS,
20221   IX86_BUILTIN_PBLENDVB128,
20222   IX86_BUILTIN_PBLENDW128,
20223 
20224   IX86_BUILTIN_DPPD,
20225   IX86_BUILTIN_DPPS,
20226 
20227   IX86_BUILTIN_INSERTPS128,
20228 
20229   IX86_BUILTIN_MOVNTDQA,
20230   IX86_BUILTIN_MPSADBW128,
20231   IX86_BUILTIN_PACKUSDW128,
20232   IX86_BUILTIN_PCMPEQQ,
20233   IX86_BUILTIN_PHMINPOSUW128,
20234 
20235   IX86_BUILTIN_PMAXSB128,
20236   IX86_BUILTIN_PMAXSD128,
20237   IX86_BUILTIN_PMAXUD128,
20238   IX86_BUILTIN_PMAXUW128,
20239 
20240   IX86_BUILTIN_PMINSB128,
20241   IX86_BUILTIN_PMINSD128,
20242   IX86_BUILTIN_PMINUD128,
20243   IX86_BUILTIN_PMINUW128,
20244 
20245   IX86_BUILTIN_PMOVSXBW128,
20246   IX86_BUILTIN_PMOVSXBD128,
20247   IX86_BUILTIN_PMOVSXBQ128,
20248   IX86_BUILTIN_PMOVSXWD128,
20249   IX86_BUILTIN_PMOVSXWQ128,
20250   IX86_BUILTIN_PMOVSXDQ128,
20251 
20252   IX86_BUILTIN_PMOVZXBW128,
20253   IX86_BUILTIN_PMOVZXBD128,
20254   IX86_BUILTIN_PMOVZXBQ128,
20255   IX86_BUILTIN_PMOVZXWD128,
20256   IX86_BUILTIN_PMOVZXWQ128,
20257   IX86_BUILTIN_PMOVZXDQ128,
20258 
20259   IX86_BUILTIN_PMULDQ128,
20260   IX86_BUILTIN_PMULLD128,
20261 
20262   IX86_BUILTIN_ROUNDPD,
20263   IX86_BUILTIN_ROUNDPS,
20264   IX86_BUILTIN_ROUNDSD,
20265   IX86_BUILTIN_ROUNDSS,
20266 
20267   IX86_BUILTIN_PTESTZ,
20268   IX86_BUILTIN_PTESTC,
20269   IX86_BUILTIN_PTESTNZC,
20270 
20271   IX86_BUILTIN_VEC_INIT_V2SI,
20272   IX86_BUILTIN_VEC_INIT_V4HI,
20273   IX86_BUILTIN_VEC_INIT_V8QI,
20274   IX86_BUILTIN_VEC_EXT_V2DF,
20275   IX86_BUILTIN_VEC_EXT_V2DI,
20276   IX86_BUILTIN_VEC_EXT_V4SF,
20277   IX86_BUILTIN_VEC_EXT_V4SI,
20278   IX86_BUILTIN_VEC_EXT_V8HI,
20279   IX86_BUILTIN_VEC_EXT_V2SI,
20280   IX86_BUILTIN_VEC_EXT_V4HI,
20281   IX86_BUILTIN_VEC_EXT_V16QI,
20282   IX86_BUILTIN_VEC_SET_V2DI,
20283   IX86_BUILTIN_VEC_SET_V4SF,
20284   IX86_BUILTIN_VEC_SET_V4SI,
20285   IX86_BUILTIN_VEC_SET_V8HI,
20286   IX86_BUILTIN_VEC_SET_V4HI,
20287   IX86_BUILTIN_VEC_SET_V16QI,
20288 
20289   IX86_BUILTIN_VEC_PACK_SFIX,
20290 
20291   /* SSE4.2.  */
20292   IX86_BUILTIN_CRC32QI,
20293   IX86_BUILTIN_CRC32HI,
20294   IX86_BUILTIN_CRC32SI,
20295   IX86_BUILTIN_CRC32DI,
20296 
20297   IX86_BUILTIN_PCMPESTRI128,
20298   IX86_BUILTIN_PCMPESTRM128,
20299   IX86_BUILTIN_PCMPESTRA128,
20300   IX86_BUILTIN_PCMPESTRC128,
20301   IX86_BUILTIN_PCMPESTRO128,
20302   IX86_BUILTIN_PCMPESTRS128,
20303   IX86_BUILTIN_PCMPESTRZ128,
20304   IX86_BUILTIN_PCMPISTRI128,
20305   IX86_BUILTIN_PCMPISTRM128,
20306   IX86_BUILTIN_PCMPISTRA128,
20307   IX86_BUILTIN_PCMPISTRC128,
20308   IX86_BUILTIN_PCMPISTRO128,
20309   IX86_BUILTIN_PCMPISTRS128,
20310   IX86_BUILTIN_PCMPISTRZ128,
20311 
20312   IX86_BUILTIN_PCMPGTQ,
20313 
20314   /* AES instructions */
20315   IX86_BUILTIN_AESENC128,
20316   IX86_BUILTIN_AESENCLAST128,
20317   IX86_BUILTIN_AESDEC128,
20318   IX86_BUILTIN_AESDECLAST128,
20319   IX86_BUILTIN_AESIMC128,
20320   IX86_BUILTIN_AESKEYGENASSIST128,
20321 
20322   /* PCLMUL instruction */
20323   IX86_BUILTIN_PCLMULQDQ128,
20324 
20325   /* AVX */
20326   IX86_BUILTIN_ADDPD256,
20327   IX86_BUILTIN_ADDPS256,
20328   IX86_BUILTIN_ADDSUBPD256,
20329   IX86_BUILTIN_ADDSUBPS256,
20330   IX86_BUILTIN_ANDPD256,
20331   IX86_BUILTIN_ANDPS256,
20332   IX86_BUILTIN_ANDNPD256,
20333   IX86_BUILTIN_ANDNPS256,
20334   IX86_BUILTIN_BLENDPD256,
20335   IX86_BUILTIN_BLENDPS256,
20336   IX86_BUILTIN_BLENDVPD256,
20337   IX86_BUILTIN_BLENDVPS256,
20338   IX86_BUILTIN_DIVPD256,
20339   IX86_BUILTIN_DIVPS256,
20340   IX86_BUILTIN_DPPS256,
20341   IX86_BUILTIN_HADDPD256,
20342   IX86_BUILTIN_HADDPS256,
20343   IX86_BUILTIN_HSUBPD256,
20344   IX86_BUILTIN_HSUBPS256,
20345   IX86_BUILTIN_MAXPD256,
20346   IX86_BUILTIN_MAXPS256,
20347   IX86_BUILTIN_MINPD256,
20348   IX86_BUILTIN_MINPS256,
20349   IX86_BUILTIN_MULPD256,
20350   IX86_BUILTIN_MULPS256,
20351   IX86_BUILTIN_ORPD256,
20352   IX86_BUILTIN_ORPS256,
20353   IX86_BUILTIN_SHUFPD256,
20354   IX86_BUILTIN_SHUFPS256,
20355   IX86_BUILTIN_SUBPD256,
20356   IX86_BUILTIN_SUBPS256,
20357   IX86_BUILTIN_XORPD256,
20358   IX86_BUILTIN_XORPS256,
20359   IX86_BUILTIN_CMPSD,
20360   IX86_BUILTIN_CMPSS,
20361   IX86_BUILTIN_CMPPD,
20362   IX86_BUILTIN_CMPPS,
20363   IX86_BUILTIN_CMPPD256,
20364   IX86_BUILTIN_CMPPS256,
20365   IX86_BUILTIN_CVTDQ2PD256,
20366   IX86_BUILTIN_CVTDQ2PS256,
20367   IX86_BUILTIN_CVTPD2PS256,
20368   IX86_BUILTIN_CVTPS2DQ256,
20369   IX86_BUILTIN_CVTPS2PD256,
20370   IX86_BUILTIN_CVTTPD2DQ256,
20371   IX86_BUILTIN_CVTPD2DQ256,
20372   IX86_BUILTIN_CVTTPS2DQ256,
20373   IX86_BUILTIN_EXTRACTF128PD256,
20374   IX86_BUILTIN_EXTRACTF128PS256,
20375   IX86_BUILTIN_EXTRACTF128SI256,
20376   IX86_BUILTIN_VZEROALL,
20377   IX86_BUILTIN_VZEROUPPER,
20378   IX86_BUILTIN_VZEROUPPER_REX64,
20379   IX86_BUILTIN_VPERMILVARPD,
20380   IX86_BUILTIN_VPERMILVARPS,
20381   IX86_BUILTIN_VPERMILVARPD256,
20382   IX86_BUILTIN_VPERMILVARPS256,
20383   IX86_BUILTIN_VPERMILPD,
20384   IX86_BUILTIN_VPERMILPS,
20385   IX86_BUILTIN_VPERMILPD256,
20386   IX86_BUILTIN_VPERMILPS256,
20387   IX86_BUILTIN_VPERM2F128PD256,
20388   IX86_BUILTIN_VPERM2F128PS256,
20389   IX86_BUILTIN_VPERM2F128SI256,
20390   IX86_BUILTIN_VBROADCASTSS,
20391   IX86_BUILTIN_VBROADCASTSD256,
20392   IX86_BUILTIN_VBROADCASTSS256,
20393   IX86_BUILTIN_VBROADCASTPD256,
20394   IX86_BUILTIN_VBROADCASTPS256,
20395   IX86_BUILTIN_VINSERTF128PD256,
20396   IX86_BUILTIN_VINSERTF128PS256,
20397   IX86_BUILTIN_VINSERTF128SI256,
20398   IX86_BUILTIN_LOADUPD256,
20399   IX86_BUILTIN_LOADUPS256,
20400   IX86_BUILTIN_STOREUPD256,
20401   IX86_BUILTIN_STOREUPS256,
20402   IX86_BUILTIN_LDDQU256,
20403   IX86_BUILTIN_MOVNTDQ256,
20404   IX86_BUILTIN_MOVNTPD256,
20405   IX86_BUILTIN_MOVNTPS256,
20406   IX86_BUILTIN_LOADDQU256,
20407   IX86_BUILTIN_STOREDQU256,
20408   IX86_BUILTIN_MASKLOADPD,
20409   IX86_BUILTIN_MASKLOADPS,
20410   IX86_BUILTIN_MASKSTOREPD,
20411   IX86_BUILTIN_MASKSTOREPS,
20412   IX86_BUILTIN_MASKLOADPD256,
20413   IX86_BUILTIN_MASKLOADPS256,
20414   IX86_BUILTIN_MASKSTOREPD256,
20415   IX86_BUILTIN_MASKSTOREPS256,
20416   IX86_BUILTIN_MOVSHDUP256,
20417   IX86_BUILTIN_MOVSLDUP256,
20418   IX86_BUILTIN_MOVDDUP256,
20419 
20420   IX86_BUILTIN_SQRTPD256,
20421   IX86_BUILTIN_SQRTPS256,
20422   IX86_BUILTIN_SQRTPS_NR256,
20423   IX86_BUILTIN_RSQRTPS256,
20424   IX86_BUILTIN_RSQRTPS_NR256,
20425 
20426   IX86_BUILTIN_RCPPS256,
20427 
20428   IX86_BUILTIN_ROUNDPD256,
20429   IX86_BUILTIN_ROUNDPS256,
20430 
20431   IX86_BUILTIN_UNPCKHPD256,
20432   IX86_BUILTIN_UNPCKLPD256,
20433   IX86_BUILTIN_UNPCKHPS256,
20434   IX86_BUILTIN_UNPCKLPS256,
20435 
20436   IX86_BUILTIN_SI256_SI,
20437   IX86_BUILTIN_PS256_PS,
20438   IX86_BUILTIN_PD256_PD,
20439   IX86_BUILTIN_SI_SI256,
20440   IX86_BUILTIN_PS_PS256,
20441   IX86_BUILTIN_PD_PD256,
20442 
20443   IX86_BUILTIN_VTESTZPD,
20444   IX86_BUILTIN_VTESTCPD,
20445   IX86_BUILTIN_VTESTNZCPD,
20446   IX86_BUILTIN_VTESTZPS,
20447   IX86_BUILTIN_VTESTCPS,
20448   IX86_BUILTIN_VTESTNZCPS,
20449   IX86_BUILTIN_VTESTZPD256,
20450   IX86_BUILTIN_VTESTCPD256,
20451   IX86_BUILTIN_VTESTNZCPD256,
20452   IX86_BUILTIN_VTESTZPS256,
20453   IX86_BUILTIN_VTESTCPS256,
20454   IX86_BUILTIN_VTESTNZCPS256,
20455   IX86_BUILTIN_PTESTZ256,
20456   IX86_BUILTIN_PTESTC256,
20457   IX86_BUILTIN_PTESTNZC256,
20458 
20459   IX86_BUILTIN_MOVMSKPD256,
20460   IX86_BUILTIN_MOVMSKPS256,
20461 
20462   /* TFmode support builtins.  */
20463   IX86_BUILTIN_INFQ,
20464   IX86_BUILTIN_FABSQ,
20465   IX86_BUILTIN_COPYSIGNQ,
20466 
20467   /* SSE5 instructions */
20468   IX86_BUILTIN_FMADDSS,
20469   IX86_BUILTIN_FMADDSD,
20470   IX86_BUILTIN_FMADDPS,
20471   IX86_BUILTIN_FMADDPD,
20472   IX86_BUILTIN_FMSUBSS,
20473   IX86_BUILTIN_FMSUBSD,
20474   IX86_BUILTIN_FMSUBPS,
20475   IX86_BUILTIN_FMSUBPD,
20476   IX86_BUILTIN_FNMADDSS,
20477   IX86_BUILTIN_FNMADDSD,
20478   IX86_BUILTIN_FNMADDPS,
20479   IX86_BUILTIN_FNMADDPD,
20480   IX86_BUILTIN_FNMSUBSS,
20481   IX86_BUILTIN_FNMSUBSD,
20482   IX86_BUILTIN_FNMSUBPS,
20483   IX86_BUILTIN_FNMSUBPD,
20484   IX86_BUILTIN_PCMOV,
20485   IX86_BUILTIN_PCMOV_V2DI,
20486   IX86_BUILTIN_PCMOV_V4SI,
20487   IX86_BUILTIN_PCMOV_V8HI,
20488   IX86_BUILTIN_PCMOV_V16QI,
20489   IX86_BUILTIN_PCMOV_V4SF,
20490   IX86_BUILTIN_PCMOV_V2DF,
20491   IX86_BUILTIN_PPERM,
20492   IX86_BUILTIN_PERMPS,
20493   IX86_BUILTIN_PERMPD,
20494   IX86_BUILTIN_PMACSSWW,
20495   IX86_BUILTIN_PMACSWW,
20496   IX86_BUILTIN_PMACSSWD,
20497   IX86_BUILTIN_PMACSWD,
20498   IX86_BUILTIN_PMACSSDD,
20499   IX86_BUILTIN_PMACSDD,
20500   IX86_BUILTIN_PMACSSDQL,
20501   IX86_BUILTIN_PMACSSDQH,
20502   IX86_BUILTIN_PMACSDQL,
20503   IX86_BUILTIN_PMACSDQH,
20504   IX86_BUILTIN_PMADCSSWD,
20505   IX86_BUILTIN_PMADCSWD,
20506   IX86_BUILTIN_PHADDBW,
20507   IX86_BUILTIN_PHADDBD,
20508   IX86_BUILTIN_PHADDBQ,
20509   IX86_BUILTIN_PHADDWD,
20510   IX86_BUILTIN_PHADDWQ,
20511   IX86_BUILTIN_PHADDDQ,
20512   IX86_BUILTIN_PHADDUBW,
20513   IX86_BUILTIN_PHADDUBD,
20514   IX86_BUILTIN_PHADDUBQ,
20515   IX86_BUILTIN_PHADDUWD,
20516   IX86_BUILTIN_PHADDUWQ,
20517   IX86_BUILTIN_PHADDUDQ,
20518   IX86_BUILTIN_PHSUBBW,
20519   IX86_BUILTIN_PHSUBWD,
20520   IX86_BUILTIN_PHSUBDQ,
20521   IX86_BUILTIN_PROTB,
20522   IX86_BUILTIN_PROTW,
20523   IX86_BUILTIN_PROTD,
20524   IX86_BUILTIN_PROTQ,
20525   IX86_BUILTIN_PROTB_IMM,
20526   IX86_BUILTIN_PROTW_IMM,
20527   IX86_BUILTIN_PROTD_IMM,
20528   IX86_BUILTIN_PROTQ_IMM,
20529   IX86_BUILTIN_PSHLB,
20530   IX86_BUILTIN_PSHLW,
20531   IX86_BUILTIN_PSHLD,
20532   IX86_BUILTIN_PSHLQ,
20533   IX86_BUILTIN_PSHAB,
20534   IX86_BUILTIN_PSHAW,
20535   IX86_BUILTIN_PSHAD,
20536   IX86_BUILTIN_PSHAQ,
20537   IX86_BUILTIN_FRCZSS,
20538   IX86_BUILTIN_FRCZSD,
20539   IX86_BUILTIN_FRCZPS,
20540   IX86_BUILTIN_FRCZPD,
20541   IX86_BUILTIN_CVTPH2PS,
20542   IX86_BUILTIN_CVTPS2PH,
20543 
20544   IX86_BUILTIN_COMEQSS,
20545   IX86_BUILTIN_COMNESS,
20546   IX86_BUILTIN_COMLTSS,
20547   IX86_BUILTIN_COMLESS,
20548   IX86_BUILTIN_COMGTSS,
20549   IX86_BUILTIN_COMGESS,
20550   IX86_BUILTIN_COMUEQSS,
20551   IX86_BUILTIN_COMUNESS,
20552   IX86_BUILTIN_COMULTSS,
20553   IX86_BUILTIN_COMULESS,
20554   IX86_BUILTIN_COMUGTSS,
20555   IX86_BUILTIN_COMUGESS,
20556   IX86_BUILTIN_COMORDSS,
20557   IX86_BUILTIN_COMUNORDSS,
20558   IX86_BUILTIN_COMFALSESS,
20559   IX86_BUILTIN_COMTRUESS,
20560 
20561   IX86_BUILTIN_COMEQSD,
20562   IX86_BUILTIN_COMNESD,
20563   IX86_BUILTIN_COMLTSD,
20564   IX86_BUILTIN_COMLESD,
20565   IX86_BUILTIN_COMGTSD,
20566   IX86_BUILTIN_COMGESD,
20567   IX86_BUILTIN_COMUEQSD,
20568   IX86_BUILTIN_COMUNESD,
20569   IX86_BUILTIN_COMULTSD,
20570   IX86_BUILTIN_COMULESD,
20571   IX86_BUILTIN_COMUGTSD,
20572   IX86_BUILTIN_COMUGESD,
20573   IX86_BUILTIN_COMORDSD,
20574   IX86_BUILTIN_COMUNORDSD,
20575   IX86_BUILTIN_COMFALSESD,
20576   IX86_BUILTIN_COMTRUESD,
20577 
20578   IX86_BUILTIN_COMEQPS,
20579   IX86_BUILTIN_COMNEPS,
20580   IX86_BUILTIN_COMLTPS,
20581   IX86_BUILTIN_COMLEPS,
20582   IX86_BUILTIN_COMGTPS,
20583   IX86_BUILTIN_COMGEPS,
20584   IX86_BUILTIN_COMUEQPS,
20585   IX86_BUILTIN_COMUNEPS,
20586   IX86_BUILTIN_COMULTPS,
20587   IX86_BUILTIN_COMULEPS,
20588   IX86_BUILTIN_COMUGTPS,
20589   IX86_BUILTIN_COMUGEPS,
20590   IX86_BUILTIN_COMORDPS,
20591   IX86_BUILTIN_COMUNORDPS,
20592   IX86_BUILTIN_COMFALSEPS,
20593   IX86_BUILTIN_COMTRUEPS,
20594 
20595   IX86_BUILTIN_COMEQPD,
20596   IX86_BUILTIN_COMNEPD,
20597   IX86_BUILTIN_COMLTPD,
20598   IX86_BUILTIN_COMLEPD,
20599   IX86_BUILTIN_COMGTPD,
20600   IX86_BUILTIN_COMGEPD,
20601   IX86_BUILTIN_COMUEQPD,
20602   IX86_BUILTIN_COMUNEPD,
20603   IX86_BUILTIN_COMULTPD,
20604   IX86_BUILTIN_COMULEPD,
20605   IX86_BUILTIN_COMUGTPD,
20606   IX86_BUILTIN_COMUGEPD,
20607   IX86_BUILTIN_COMORDPD,
20608   IX86_BUILTIN_COMUNORDPD,
20609   IX86_BUILTIN_COMFALSEPD,
20610   IX86_BUILTIN_COMTRUEPD,
20611 
20612   IX86_BUILTIN_PCOMEQUB,
20613   IX86_BUILTIN_PCOMNEUB,
20614   IX86_BUILTIN_PCOMLTUB,
20615   IX86_BUILTIN_PCOMLEUB,
20616   IX86_BUILTIN_PCOMGTUB,
20617   IX86_BUILTIN_PCOMGEUB,
20618   IX86_BUILTIN_PCOMFALSEUB,
20619   IX86_BUILTIN_PCOMTRUEUB,
20620   IX86_BUILTIN_PCOMEQUW,
20621   IX86_BUILTIN_PCOMNEUW,
20622   IX86_BUILTIN_PCOMLTUW,
20623   IX86_BUILTIN_PCOMLEUW,
20624   IX86_BUILTIN_PCOMGTUW,
20625   IX86_BUILTIN_PCOMGEUW,
20626   IX86_BUILTIN_PCOMFALSEUW,
20627   IX86_BUILTIN_PCOMTRUEUW,
20628   IX86_BUILTIN_PCOMEQUD,
20629   IX86_BUILTIN_PCOMNEUD,
20630   IX86_BUILTIN_PCOMLTUD,
20631   IX86_BUILTIN_PCOMLEUD,
20632   IX86_BUILTIN_PCOMGTUD,
20633   IX86_BUILTIN_PCOMGEUD,
20634   IX86_BUILTIN_PCOMFALSEUD,
20635   IX86_BUILTIN_PCOMTRUEUD,
20636   IX86_BUILTIN_PCOMEQUQ,
20637   IX86_BUILTIN_PCOMNEUQ,
20638   IX86_BUILTIN_PCOMLTUQ,
20639   IX86_BUILTIN_PCOMLEUQ,
20640   IX86_BUILTIN_PCOMGTUQ,
20641   IX86_BUILTIN_PCOMGEUQ,
20642   IX86_BUILTIN_PCOMFALSEUQ,
20643   IX86_BUILTIN_PCOMTRUEUQ,
20644 
20645   IX86_BUILTIN_PCOMEQB,
20646   IX86_BUILTIN_PCOMNEB,
20647   IX86_BUILTIN_PCOMLTB,
20648   IX86_BUILTIN_PCOMLEB,
20649   IX86_BUILTIN_PCOMGTB,
20650   IX86_BUILTIN_PCOMGEB,
20651   IX86_BUILTIN_PCOMFALSEB,
20652   IX86_BUILTIN_PCOMTRUEB,
20653   IX86_BUILTIN_PCOMEQW,
20654   IX86_BUILTIN_PCOMNEW,
20655   IX86_BUILTIN_PCOMLTW,
20656   IX86_BUILTIN_PCOMLEW,
20657   IX86_BUILTIN_PCOMGTW,
20658   IX86_BUILTIN_PCOMGEW,
20659   IX86_BUILTIN_PCOMFALSEW,
20660   IX86_BUILTIN_PCOMTRUEW,
20661   IX86_BUILTIN_PCOMEQD,
20662   IX86_BUILTIN_PCOMNED,
20663   IX86_BUILTIN_PCOMLTD,
20664   IX86_BUILTIN_PCOMLED,
20665   IX86_BUILTIN_PCOMGTD,
20666   IX86_BUILTIN_PCOMGED,
20667   IX86_BUILTIN_PCOMFALSED,
20668   IX86_BUILTIN_PCOMTRUED,
20669   IX86_BUILTIN_PCOMEQQ,
20670   IX86_BUILTIN_PCOMNEQ,
20671   IX86_BUILTIN_PCOMLTQ,
20672   IX86_BUILTIN_PCOMLEQ,
20673   IX86_BUILTIN_PCOMGTQ,
20674   IX86_BUILTIN_PCOMGEQ,
20675   IX86_BUILTIN_PCOMFALSEQ,
20676   IX86_BUILTIN_PCOMTRUEQ,
20677 
20678   IX86_BUILTIN_MAX
20679 };
20680 
20681 /* Table for the ix86 builtin decls.  */
20682 static GTY(()) tree ix86_builtins[(int) IX86_BUILTIN_MAX];
20683 
20684 /* Table of all of the builtin functions that are possible with different ISA's
20685    but are waiting to be built until a function is declared to use that
20686    ISA.  */
20687 struct builtin_isa GTY(())
20688 {
20689   tree type;                    /* builtin type to use in the declaration */
20690   const char *name;             /* function name */
20691   int isa;                      /* isa_flags this builtin is defined for */
20692   bool const_p;                 /* true if the declaration is constant */
20693 };
20694 
20695 static GTY(()) struct builtin_isa ix86_builtins_isa[(int) IX86_BUILTIN_MAX];
20696 
20697 
20698 /* Add an ix86 target builtin function with CODE, NAME and TYPE.  Save the MASK
20699  * of which isa_flags to use in the ix86_builtins_isa array.  Stores the
20700  * function decl in the ix86_builtins array.  Returns the function decl or
20701  * NULL_TREE, if the builtin was not added.
20702  *
20703  * If the front end has a special hook for builtin functions, delay adding
20704  * builtin functions that aren't in the current ISA until the ISA is changed
20705  * with function specific optimization.  Doing so, can save about 300K for the
20706  * default compiler.  When the builtin is expanded, check at that time whether
20707  * it is valid.
20708  *
20709  * If the front end doesn't have a special hook, record all builtins, even if
20710  * it isn't an instruction set in the current ISA in case the user uses
20711  * function specific options for a different ISA, so that we don't get scope
20712  * errors if a builtin is added in the middle of a function scope.  */
20713 
20714 static inline tree
20715 def_builtin (int mask, const char *name, tree type, enum ix86_builtins code)
20716 {
20717   tree decl = NULL_TREE;
20718 
20719   if (!(mask & OPTION_MASK_ISA_64BIT) || TARGET_64BIT)
20720     {
20721       ix86_builtins_isa[(int) code].isa = mask;
20722 
20723       if ((mask & ix86_isa_flags) != 0
20724           || (lang_hooks.builtin_function
20725               == lang_hooks.builtin_function_ext_scope))
20726 
20727         {
20728           decl = add_builtin_function (name, type, code, BUILT_IN_MD, NULL,
20729                                        NULL_TREE);
20730           ix86_builtins[(int) code] = decl;
20731           ix86_builtins_isa[(int) code].type = NULL_TREE;
20732         }
20733       else
20734         {
20735           ix86_builtins[(int) code] = NULL_TREE;
20736           ix86_builtins_isa[(int) code].const_p = false;
20737           ix86_builtins_isa[(int) code].type = type;
20738           ix86_builtins_isa[(int) code].name = name;
20739         }
20740     }
20741 
20742   return decl;
20743 }
20744 
20745 /* Like def_builtin, but also marks the function decl "const".  */
20746 
20747 static inline tree
20748 def_builtin_const (int mask, const char *name, tree type,
20749                    enum ix86_builtins code)
20750 {
20751   tree decl = def_builtin (mask, name, type, code);
20752   if (decl)
20753     TREE_READONLY (decl) = 1;
20754   else
20755     ix86_builtins_isa[(int) code].const_p = true;
20756 
20757   return decl;
20758 }
20759 
20760 /* Add any new builtin functions for a given ISA that may not have been
20761    declared.  This saves a bit of space compared to adding all of the
20762    declarations to the tree, even if we didn't use them.  */
20763 
20764 static void
20765 ix86_add_new_builtins (int isa)
20766 {
20767   int i;
20768   tree decl;
20769 
20770   for (i = 0; i < (int)IX86_BUILTIN_MAX; i++)
20771     {
20772       if ((ix86_builtins_isa[i].isa & isa) != 0
20773           && ix86_builtins_isa[i].type != NULL_TREE)
20774         {
20775           decl = add_builtin_function_ext_scope (ix86_builtins_isa[i].name,
20776                                                  ix86_builtins_isa[i].type,
20777                                                  i, BUILT_IN_MD, NULL,
20778                                                  NULL_TREE);
20779 
20780           ix86_builtins[i] = decl;
20781           ix86_builtins_isa[i].type = NULL_TREE;
20782           if (ix86_builtins_isa[i].const_p)
20783             TREE_READONLY (decl) = 1;
20784         }
20785     }
20786 }
20787 
20788 /* Bits for builtin_description.flag.  */
20789 
20790 /* Set when we don't support the comparison natively, and should
20791    swap_comparison in order to support it.  */
20792 #define BUILTIN_DESC_SWAP_OPERANDS      1
20793 
20794 struct builtin_description
20795 {
20796   const unsigned int mask;
20797   const enum insn_code icode;
20798   const char *const name;
20799   const enum ix86_builtins code;
20800   const enum rtx_code comparison;
20801   const int flag;
20802 };
20803 
20804 static const struct builtin_description bdesc_comi[] =
20805 {
20806   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
20807   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
20808   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
20809   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
20810   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
20811   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
20812   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
20813   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
20814   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
20815   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
20816   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
20817   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
20818   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
20819   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
20820   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
20821   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
20822   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
20823   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
20824   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
20825   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
20826   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
20827   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
20828   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
20829   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
20830 };
20831 
20832 static const struct builtin_description bdesc_pcmpestr[] =
20833 {
20834   /* SSE4.2 */
20835   { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestri128", IX86_BUILTIN_PCMPESTRI128, UNKNOWN, 0 },
20836   { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrm128", IX86_BUILTIN_PCMPESTRM128, UNKNOWN, 0 },
20837   { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestria128", IX86_BUILTIN_PCMPESTRA128, UNKNOWN, (int) CCAmode },
20838   { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestric128", IX86_BUILTIN_PCMPESTRC128, UNKNOWN, (int) CCCmode },
20839   { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrio128", IX86_BUILTIN_PCMPESTRO128, UNKNOWN, (int) CCOmode },
20840   { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestris128", IX86_BUILTIN_PCMPESTRS128, UNKNOWN, (int) CCSmode },
20841   { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestriz128", IX86_BUILTIN_PCMPESTRZ128, UNKNOWN, (int) CCZmode },
20842 };
20843 
20844 static const struct builtin_description bdesc_pcmpistr[] =
20845 {
20846   /* SSE4.2 */
20847   { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistri128", IX86_BUILTIN_PCMPISTRI128, UNKNOWN, 0 },
20848   { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrm128", IX86_BUILTIN_PCMPISTRM128, UNKNOWN, 0 },
20849   { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistria128", IX86_BUILTIN_PCMPISTRA128, UNKNOWN, (int) CCAmode },
20850   { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistric128", IX86_BUILTIN_PCMPISTRC128, UNKNOWN, (int) CCCmode },
20851   { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrio128", IX86_BUILTIN_PCMPISTRO128, UNKNOWN, (int) CCOmode },
20852   { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistris128", IX86_BUILTIN_PCMPISTRS128, UNKNOWN, (int) CCSmode },
20853   { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistriz128", IX86_BUILTIN_PCMPISTRZ128, UNKNOWN, (int) CCZmode },
20854 };
20855 
20856 /* Special builtin types */
20857 enum ix86_special_builtin_type
20858 {
20859   SPECIAL_FTYPE_UNKNOWN,
20860   VOID_FTYPE_VOID,
20861   V32QI_FTYPE_PCCHAR,
20862   V16QI_FTYPE_PCCHAR,
20863   V8SF_FTYPE_PCV4SF,
20864   V8SF_FTYPE_PCFLOAT,
20865   V4DF_FTYPE_PCV2DF,
20866   V4DF_FTYPE_PCDOUBLE,
20867   V4SF_FTYPE_PCFLOAT,
20868   V2DF_FTYPE_PCDOUBLE,
20869   V8SF_FTYPE_PCV8SF_V8SF,
20870   V4DF_FTYPE_PCV4DF_V4DF,
20871   V4SF_FTYPE_V4SF_PCV2SF,
20872   V4SF_FTYPE_PCV4SF_V4SF,
20873   V2DF_FTYPE_V2DF_PCDOUBLE,
20874   V2DF_FTYPE_PCV2DF_V2DF,
20875   V2DI_FTYPE_PV2DI,
20876   VOID_FTYPE_PV2SF_V4SF,
20877   VOID_FTYPE_PV4DI_V4DI,
20878   VOID_FTYPE_PV2DI_V2DI,
20879   VOID_FTYPE_PCHAR_V32QI,
20880   VOID_FTYPE_PCHAR_V16QI,
20881   VOID_FTYPE_PFLOAT_V8SF,
20882   VOID_FTYPE_PFLOAT_V4SF,
20883   VOID_FTYPE_PDOUBLE_V4DF,
20884   VOID_FTYPE_PDOUBLE_V2DF,
20885   VOID_FTYPE_PDI_DI,
20886   VOID_FTYPE_PINT_INT,
20887   VOID_FTYPE_PV8SF_V8SF_V8SF,
20888   VOID_FTYPE_PV4DF_V4DF_V4DF,
20889   VOID_FTYPE_PV4SF_V4SF_V4SF,
20890   VOID_FTYPE_PV2DF_V2DF_V2DF
20891 };
20892 
20893 /* Builtin types */
20894 enum ix86_builtin_type
20895 {
20896   FTYPE_UNKNOWN,
20897   FLOAT128_FTYPE_FLOAT128,
20898   FLOAT_FTYPE_FLOAT,
20899   FLOAT128_FTYPE_FLOAT128_FLOAT128,
20900   INT_FTYPE_V8SF_V8SF_PTEST,
20901   INT_FTYPE_V4DI_V4DI_PTEST,
20902   INT_FTYPE_V4DF_V4DF_PTEST,
20903   INT_FTYPE_V4SF_V4SF_PTEST,
20904   INT_FTYPE_V2DI_V2DI_PTEST,
20905   INT_FTYPE_V2DF_V2DF_PTEST,
20906   INT64_FTYPE_V4SF,
20907   INT64_FTYPE_V2DF,
20908   INT_FTYPE_V16QI,
20909   INT_FTYPE_V8QI,
20910   INT_FTYPE_V8SF,
20911   INT_FTYPE_V4DF,
20912   INT_FTYPE_V4SF,
20913   INT_FTYPE_V2DF,
20914   V16QI_FTYPE_V16QI,
20915   V8SI_FTYPE_V8SF,
20916   V8SI_FTYPE_V4SI,
20917   V8HI_FTYPE_V8HI,
20918   V8HI_FTYPE_V16QI,
20919   V8QI_FTYPE_V8QI,
20920   V8SF_FTYPE_V8SF,
20921   V8SF_FTYPE_V8SI,
20922   V8SF_FTYPE_V4SF,
20923   V4SI_FTYPE_V4SI,
20924   V4SI_FTYPE_V16QI,
20925   V4SI_FTYPE_V8SI,
20926   V4SI_FTYPE_V8HI,
20927   V4SI_FTYPE_V4DF,
20928   V4SI_FTYPE_V4SF,
20929   V4SI_FTYPE_V2DF,
20930   V4HI_FTYPE_V4HI,
20931   V4DF_FTYPE_V4DF,
20932   V4DF_FTYPE_V4SI,
20933   V4DF_FTYPE_V4SF,
20934   V4DF_FTYPE_V2DF,
20935   V4SF_FTYPE_V4DF,
20936   V4SF_FTYPE_V4SF,
20937   V4SF_FTYPE_V4SF_VEC_MERGE,
20938   V4SF_FTYPE_V8SF,
20939   V4SF_FTYPE_V4SI,
20940   V4SF_FTYPE_V2DF,
20941   V2DI_FTYPE_V2DI,
20942   V2DI_FTYPE_V16QI,
20943   V2DI_FTYPE_V8HI,
20944   V2DI_FTYPE_V4SI,
20945   V2DF_FTYPE_V2DF,
20946   V2DF_FTYPE_V2DF_VEC_MERGE,
20947   V2DF_FTYPE_V4SI,
20948   V2DF_FTYPE_V4DF,
20949   V2DF_FTYPE_V4SF,
20950   V2DF_FTYPE_V2SI,
20951   V2SI_FTYPE_V2SI,
20952   V2SI_FTYPE_V4SF,
20953   V2SI_FTYPE_V2SF,
20954   V2SI_FTYPE_V2DF,
20955   V2SF_FTYPE_V2SF,
20956   V2SF_FTYPE_V2SI,
20957   V16QI_FTYPE_V16QI_V16QI,
20958   V16QI_FTYPE_V8HI_V8HI,
20959   V8QI_FTYPE_V8QI_V8QI,
20960   V8QI_FTYPE_V4HI_V4HI,
20961   V8HI_FTYPE_V8HI_V8HI,
20962   V8HI_FTYPE_V8HI_V8HI_COUNT,
20963   V8HI_FTYPE_V16QI_V16QI,
20964   V8HI_FTYPE_V4SI_V4SI,
20965   V8HI_FTYPE_V8HI_SI_COUNT,
20966   V8SF_FTYPE_V8SF_V8SF,
20967   V8SF_FTYPE_V8SF_V8SI,
20968   V4SI_FTYPE_V4SI_V4SI,
20969   V4SI_FTYPE_V4SI_V4SI_COUNT,
20970   V4SI_FTYPE_V8HI_V8HI,
20971   V4SI_FTYPE_V4SF_V4SF,
20972   V4SI_FTYPE_V2DF_V2DF,
20973   V4SI_FTYPE_V4SI_SI_COUNT,
20974   V4HI_FTYPE_V4HI_V4HI,
20975   V4HI_FTYPE_V4HI_V4HI_COUNT,
20976   V4HI_FTYPE_V8QI_V8QI,
20977   V4HI_FTYPE_V2SI_V2SI,
20978   V4HI_FTYPE_V4HI_SI_COUNT,
20979   V4DF_FTYPE_V4DF_V4DF,
20980   V4DF_FTYPE_V4DF_V4DI,
20981   V4SF_FTYPE_V4SF_V4SF,
20982   V4SF_FTYPE_V4SF_V4SF_SWAP,
20983   V4SF_FTYPE_V4SF_V4SI,
20984   V4SF_FTYPE_V4SF_V2SI,
20985   V4SF_FTYPE_V4SF_V2DF,
20986   V4SF_FTYPE_V4SF_DI,
20987   V4SF_FTYPE_V4SF_SI,
20988   V2DI_FTYPE_V2DI_V2DI,
20989   V2DI_FTYPE_V2DI_V2DI_COUNT,
20990   V2DI_FTYPE_V16QI_V16QI,
20991   V2DI_FTYPE_V4SI_V4SI,
20992   V2DI_FTYPE_V2DI_V16QI,
20993   V2DI_FTYPE_V2DF_V2DF,
20994   V2DI_FTYPE_V2DI_SI_COUNT,
20995   V2SI_FTYPE_V2SI_V2SI,
20996   V2SI_FTYPE_V2SI_V2SI_COUNT,
20997   V2SI_FTYPE_V4HI_V4HI,
20998   V2SI_FTYPE_V2SF_V2SF,
20999   V2SI_FTYPE_V2SI_SI_COUNT,
21000   V2DF_FTYPE_V2DF_V2DF,
21001   V2DF_FTYPE_V2DF_V2DF_SWAP,
21002   V2DF_FTYPE_V2DF_V4SF,
21003   V2DF_FTYPE_V2DF_V2DI,
21004   V2DF_FTYPE_V2DF_DI,
21005   V2DF_FTYPE_V2DF_SI,
21006   V2SF_FTYPE_V2SF_V2SF,
21007   V1DI_FTYPE_V1DI_V1DI,
21008   V1DI_FTYPE_V1DI_V1DI_COUNT,
21009   V1DI_FTYPE_V8QI_V8QI,
21010   V1DI_FTYPE_V2SI_V2SI,
21011   V1DI_FTYPE_V1DI_SI_COUNT,
21012   UINT64_FTYPE_UINT64_UINT64,
21013   UINT_FTYPE_UINT_UINT,
21014   UINT_FTYPE_UINT_USHORT,
21015   UINT_FTYPE_UINT_UCHAR,
21016   V8HI_FTYPE_V8HI_INT,
21017   V4SI_FTYPE_V4SI_INT,
21018   V4HI_FTYPE_V4HI_INT,
21019   V8SF_FTYPE_V8SF_INT,
21020   V4SI_FTYPE_V8SI_INT,
21021   V4SF_FTYPE_V8SF_INT,
21022   V2DF_FTYPE_V4DF_INT,
21023   V4DF_FTYPE_V4DF_INT,
21024   V4SF_FTYPE_V4SF_INT,
21025   V2DI_FTYPE_V2DI_INT,
21026   V2DI2TI_FTYPE_V2DI_INT,
21027   V2DF_FTYPE_V2DF_INT,
21028   V16QI_FTYPE_V16QI_V16QI_V16QI,
21029   V8SF_FTYPE_V8SF_V8SF_V8SF,
21030   V4DF_FTYPE_V4DF_V4DF_V4DF,
21031   V4SF_FTYPE_V4SF_V4SF_V4SF,
21032   V2DF_FTYPE_V2DF_V2DF_V2DF,
21033   V16QI_FTYPE_V16QI_V16QI_INT,
21034   V8SI_FTYPE_V8SI_V8SI_INT,
21035   V8SI_FTYPE_V8SI_V4SI_INT,
21036   V8HI_FTYPE_V8HI_V8HI_INT,
21037   V8SF_FTYPE_V8SF_V8SF_INT,
21038   V8SF_FTYPE_V8SF_V4SF_INT,
21039   V4SI_FTYPE_V4SI_V4SI_INT,
21040   V4DF_FTYPE_V4DF_V4DF_INT,
21041   V4DF_FTYPE_V4DF_V2DF_INT,
21042   V4SF_FTYPE_V4SF_V4SF_INT,
21043   V2DI_FTYPE_V2DI_V2DI_INT,
21044   V2DI2TI_FTYPE_V2DI_V2DI_INT,
21045   V1DI2DI_FTYPE_V1DI_V1DI_INT,
21046   V2DF_FTYPE_V2DF_V2DF_INT,
21047   V2DI_FTYPE_V2DI_UINT_UINT,
21048   V2DI_FTYPE_V2DI_V2DI_UINT_UINT
21049 };
21050 
21051 /* Special builtins with variable number of arguments.  */
21052 static const struct builtin_description bdesc_special_args[] =
21053 {
21054   /* MMX */
21055   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_emms, "__builtin_ia32_emms", IX86_BUILTIN_EMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
21056 
21057   /* 3DNow! */
21058   { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_femms, "__builtin_ia32_femms", IX86_BUILTIN_FEMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
21059 
21060   /* SSE */
21061   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movups, "__builtin_ia32_storeups", IX86_BUILTIN_STOREUPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
21062   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movntv4sf, "__builtin_ia32_movntps", IX86_BUILTIN_MOVNTPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
21063   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movups, "__builtin_ia32_loadups", IX86_BUILTIN_LOADUPS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
21064 
21065   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadhps_exp, "__builtin_ia32_loadhps", IX86_BUILTIN_LOADHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
21066   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadlps_exp, "__builtin_ia32_loadlps", IX86_BUILTIN_LOADLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
21067   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storehps, "__builtin_ia32_storehps", IX86_BUILTIN_STOREHPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
21068   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storelps, "__builtin_ia32_storelps", IX86_BUILTIN_STORELPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
21069 
21070   /* SSE or 3DNow!A  */
21071   { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_sfence, "__builtin_ia32_sfence", IX86_BUILTIN_SFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
21072   { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_movntdi, "__builtin_ia32_movntq", IX86_BUILTIN_MOVNTQ, UNKNOWN, (int) VOID_FTYPE_PDI_DI },
21073 
21074   /* SSE2 */
21075   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lfence, "__builtin_ia32_lfence", IX86_BUILTIN_LFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
21076   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_mfence, 0, IX86_BUILTIN_MFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
21077   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movupd, "__builtin_ia32_storeupd", IX86_BUILTIN_STOREUPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
21078   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movdqu, "__builtin_ia32_storedqu", IX86_BUILTIN_STOREDQU, UNKNOWN, (int) VOID_FTYPE_PCHAR_V16QI },
21079   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2df, "__builtin_ia32_movntpd", IX86_BUILTIN_MOVNTPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
21080   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2di, "__builtin_ia32_movntdq", IX86_BUILTIN_MOVNTDQ, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI },
21081   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntsi, "__builtin_ia32_movnti", IX86_BUILTIN_MOVNTI, UNKNOWN, (int) VOID_FTYPE_PINT_INT },
21082   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movupd, "__builtin_ia32_loadupd", IX86_BUILTIN_LOADUPD, UNKNOWN, (int) V2DF_FTYPE_PCDOUBLE },
21083   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movdqu, "__builtin_ia32_loaddqu", IX86_BUILTIN_LOADDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
21084 
21085   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadhpd_exp, "__builtin_ia32_loadhpd", IX86_BUILTIN_LOADHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
21086   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadlpd_exp, "__builtin_ia32_loadlpd", IX86_BUILTIN_LOADLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
21087 
21088   /* SSE3 */
21089   { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_lddqu, "__builtin_ia32_lddqu", IX86_BUILTIN_LDDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
21090 
21091   /* SSE4.1 */
21092   { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_movntdqa, "__builtin_ia32_movntdqa", IX86_BUILTIN_MOVNTDQA, UNKNOWN, (int) V2DI_FTYPE_PV2DI },
21093 
21094   /* SSE4A */
21095   { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv2df, "__builtin_ia32_movntsd", IX86_BUILTIN_MOVNTSD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
21096   { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv4sf, "__builtin_ia32_movntss", IX86_BUILTIN_MOVNTSS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
21097 
21098   /* AVX */
21099   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroall, "__builtin_ia32_vzeroall", IX86_BUILTIN_VZEROALL, UNKNOWN, (int) VOID_FTYPE_VOID },
21100   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroupper, 0, IX86_BUILTIN_VZEROUPPER, UNKNOWN, (int) VOID_FTYPE_VOID },
21101   { OPTION_MASK_ISA_AVX | OPTION_MASK_ISA_64BIT, CODE_FOR_avx_vzeroupper_rex64, 0, IX86_BUILTIN_VZEROUPPER_REX64, UNKNOWN, (int) VOID_FTYPE_VOID },
21102 
21103   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastss, "__builtin_ia32_vbroadcastss", IX86_BUILTIN_VBROADCASTSS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
21104   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastsd256, "__builtin_ia32_vbroadcastsd256", IX86_BUILTIN_VBROADCASTSD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
21105   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastss256, "__builtin_ia32_vbroadcastss256", IX86_BUILTIN_VBROADCASTSS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
21106   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_pd256, "__builtin_ia32_vbroadcastf128_pd256", IX86_BUILTIN_VBROADCASTPD256, UNKNOWN, (int) V4DF_FTYPE_PCV2DF },
21107   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_ps256, "__builtin_ia32_vbroadcastf128_ps256", IX86_BUILTIN_VBROADCASTPS256, UNKNOWN, (int) V8SF_FTYPE_PCV4SF },
21108 
21109   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movupd256, "__builtin_ia32_loadupd256", IX86_BUILTIN_LOADUPD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
21110   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movups256, "__builtin_ia32_loadups256", IX86_BUILTIN_LOADUPS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
21111   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movupd256, "__builtin_ia32_storeupd256", IX86_BUILTIN_STOREUPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
21112   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movups256, "__builtin_ia32_storeups256", IX86_BUILTIN_STOREUPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
21113   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movdqu256, "__builtin_ia32_loaddqu256", IX86_BUILTIN_LOADDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
21114   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movdqu256, "__builtin_ia32_storedqu256", IX86_BUILTIN_STOREDQU256, UNKNOWN, (int) VOID_FTYPE_PCHAR_V32QI },
21115   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_lddqu256, "__builtin_ia32_lddqu256", IX86_BUILTIN_LDDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
21116 
21117   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4di, "__builtin_ia32_movntdq256", IX86_BUILTIN_MOVNTDQ256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI },
21118   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4df, "__builtin_ia32_movntpd256", IX86_BUILTIN_MOVNTPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
21119   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv8sf, "__builtin_ia32_movntps256", IX86_BUILTIN_MOVNTPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
21120 
21121   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd, "__builtin_ia32_maskloadpd", IX86_BUILTIN_MASKLOADPD, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DF },
21122   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps, "__builtin_ia32_maskloadps", IX86_BUILTIN_MASKLOADPS, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SF },
21123   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd256, "__builtin_ia32_maskloadpd256", IX86_BUILTIN_MASKLOADPD256, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DF },
21124   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps256, "__builtin_ia32_maskloadps256", IX86_BUILTIN_MASKLOADPS256, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SF },
21125   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd, "__builtin_ia32_maskstorepd", IX86_BUILTIN_MASKSTOREPD, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DF_V2DF },
21126   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps, "__builtin_ia32_maskstoreps", IX86_BUILTIN_MASKSTOREPS, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SF_V4SF },
21127   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd256, "__builtin_ia32_maskstorepd256", IX86_BUILTIN_MASKSTOREPD256, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DF_V4DF },
21128   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps256, "__builtin_ia32_maskstoreps256", IX86_BUILTIN_MASKSTOREPS256, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SF_V8SF },
21129 };
21130 
21131 /* Builtins with variable number of arguments.  */
21132 static const struct builtin_description bdesc_args[] =
21133 {
21134   /* MMX */
21135   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21136   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21137   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21138   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21139   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21140   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21141 
21142   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21143   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21144   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21145   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21146   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21147   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21148   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21149   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21150 
21151   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21152   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21153 
21154   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21155   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andnotv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21156   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21157   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21158 
21159   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21160   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21161   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21162   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21163   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21164   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21165 
21166   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21167   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21168   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21169   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21170   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI},
21171   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI},
21172 
21173   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packsswb, "__builtin_ia32_packsswb", IX86_BUILTIN_PACKSSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
21174   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packssdw, "__builtin_ia32_packssdw", IX86_BUILTIN_PACKSSDW, UNKNOWN, (int) V4HI_FTYPE_V2SI_V2SI },
21175   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packuswb, "__builtin_ia32_packuswb", IX86_BUILTIN_PACKUSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
21176 
21177   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_pmaddwd, "__builtin_ia32_pmaddwd", IX86_BUILTIN_PMADDWD, UNKNOWN, (int) V2SI_FTYPE_V4HI_V4HI },
21178 
21179   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllwi", IX86_BUILTIN_PSLLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
21180   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslldi", IX86_BUILTIN_PSLLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
21181   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllqi", IX86_BUILTIN_PSLLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
21182   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllw", IX86_BUILTIN_PSLLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
21183   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslld", IX86_BUILTIN_PSLLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
21184   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllq", IX86_BUILTIN_PSLLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
21185 
21186   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlwi", IX86_BUILTIN_PSRLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
21187   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrldi", IX86_BUILTIN_PSRLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
21188   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlqi", IX86_BUILTIN_PSRLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
21189   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlw", IX86_BUILTIN_PSRLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
21190   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrld", IX86_BUILTIN_PSRLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
21191   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlq", IX86_BUILTIN_PSRLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
21192 
21193   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psrawi", IX86_BUILTIN_PSRAWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
21194   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psradi", IX86_BUILTIN_PSRADI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
21195   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psraw", IX86_BUILTIN_PSRAW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
21196   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psrad", IX86_BUILTIN_PSRAD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
21197 
21198   /* 3DNow! */
21199   { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pf2id, "__builtin_ia32_pf2id", IX86_BUILTIN_PF2ID, UNKNOWN, (int) V2SI_FTYPE_V2SF },
21200   { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_floatv2si2, "__builtin_ia32_pi2fd", IX86_BUILTIN_PI2FD, UNKNOWN, (int) V2SF_FTYPE_V2SI },
21201   { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpv2sf2, "__builtin_ia32_pfrcp", IX86_BUILTIN_PFRCP, UNKNOWN, (int) V2SF_FTYPE_V2SF },
21202   { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqrtv2sf2, "__builtin_ia32_pfrsqrt", IX86_BUILTIN_PFRSQRT, UNKNOWN, (int) V2SF_FTYPE_V2SF },
21203 
21204   { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgusb", IX86_BUILTIN_PAVGUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21205   { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_haddv2sf3, "__builtin_ia32_pfacc", IX86_BUILTIN_PFACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21206   { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_addv2sf3, "__builtin_ia32_pfadd", IX86_BUILTIN_PFADD, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21207   { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_eqv2sf3, "__builtin_ia32_pfcmpeq", IX86_BUILTIN_PFCMPEQ, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
21208   { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gev2sf3, "__builtin_ia32_pfcmpge", IX86_BUILTIN_PFCMPGE, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
21209   { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gtv2sf3, "__builtin_ia32_pfcmpgt", IX86_BUILTIN_PFCMPGT, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
21210   { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_smaxv2sf3, "__builtin_ia32_pfmax", IX86_BUILTIN_PFMAX, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21211   { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_sminv2sf3, "__builtin_ia32_pfmin", IX86_BUILTIN_PFMIN, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21212   { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_mulv2sf3, "__builtin_ia32_pfmul", IX86_BUILTIN_PFMUL, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21213   { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit1v2sf3, "__builtin_ia32_pfrcpit1", IX86_BUILTIN_PFRCPIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21214   { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit2v2sf3, "__builtin_ia32_pfrcpit2", IX86_BUILTIN_PFRCPIT2, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21215   { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqit1v2sf3, "__builtin_ia32_pfrsqit1", IX86_BUILTIN_PFRSQIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21216   { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subv2sf3, "__builtin_ia32_pfsub", IX86_BUILTIN_PFSUB, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21217   { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subrv2sf3, "__builtin_ia32_pfsubr", IX86_BUILTIN_PFSUBR, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21218   { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pmulhrwv4hi3, "__builtin_ia32_pmulhrw", IX86_BUILTIN_PMULHRW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21219 
21220   /* 3DNow!A */
21221   { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pf2iw, "__builtin_ia32_pf2iw", IX86_BUILTIN_PF2IW, UNKNOWN, (int) V2SI_FTYPE_V2SF },
21222   { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pi2fw, "__builtin_ia32_pi2fw", IX86_BUILTIN_PI2FW, UNKNOWN, (int) V2SF_FTYPE_V2SI },
21223   { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2si2, "__builtin_ia32_pswapdsi", IX86_BUILTIN_PSWAPDSI, UNKNOWN, (int) V2SI_FTYPE_V2SI },
21224   { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2sf2, "__builtin_ia32_pswapdsf", IX86_BUILTIN_PSWAPDSF, UNKNOWN, (int) V2SF_FTYPE_V2SF },
21225   { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_hsubv2sf3, "__builtin_ia32_pfnacc", IX86_BUILTIN_PFNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21226   { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_addsubv2sf3, "__builtin_ia32_pfpnacc", IX86_BUILTIN_PFPNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21227 
21228   /* SSE */
21229   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movmskps, "__builtin_ia32_movmskps", IX86_BUILTIN_MOVMSKPS, UNKNOWN, (int) INT_FTYPE_V4SF },
21230   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_sqrtv4sf2, "__builtin_ia32_sqrtps", IX86_BUILTIN_SQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
21231   { OPTION_MASK_ISA_SSE, CODE_FOR_sqrtv4sf2, "__builtin_ia32_sqrtps_nr", IX86_BUILTIN_SQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
21232   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rsqrtv4sf2, "__builtin_ia32_rsqrtps", IX86_BUILTIN_RSQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
21233   { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtv4sf2, "__builtin_ia32_rsqrtps_nr", IX86_BUILTIN_RSQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
21234   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rcpv4sf2, "__builtin_ia32_rcpps", IX86_BUILTIN_RCPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
21235   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtps2pi, "__builtin_ia32_cvtps2pi", IX86_BUILTIN_CVTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
21236   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtss2si, "__builtin_ia32_cvtss2si", IX86_BUILTIN_CVTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
21237   { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtss2siq, "__builtin_ia32_cvtss2si64", IX86_BUILTIN_CVTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
21238   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttps2pi, "__builtin_ia32_cvttps2pi", IX86_BUILTIN_CVTTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
21239   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttss2si, "__builtin_ia32_cvttss2si", IX86_BUILTIN_CVTTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
21240   { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvttss2siq, "__builtin_ia32_cvttss2si64", IX86_BUILTIN_CVTTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
21241 
21242   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_shufps, "__builtin_ia32_shufps", IX86_BUILTIN_SHUFPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
21243 
21244   { OPTION_MASK_ISA_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21245   { OPTION_MASK_ISA_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21246   { OPTION_MASK_ISA_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21247   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21248   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmaddv4sf3,  "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21249   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsubv4sf3,  "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21250   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmulv4sf3,  "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21251   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmdivv4sf3,  "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21252 
21253   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
21254   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
21255   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
21256   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
21257   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
21258   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
21259   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
21260   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
21261   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
21262   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
21263   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP},
21264   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
21265   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
21266   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
21267   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
21268   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
21269   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
21270   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
21271   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
21272   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
21273   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
21274   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
21275 
21276   { OPTION_MASK_ISA_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21277   { OPTION_MASK_ISA_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21278   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21279   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21280 
21281   { OPTION_MASK_ISA_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21282   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_andnotv4sf3,  "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21283   { OPTION_MASK_ISA_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21284   { OPTION_MASK_ISA_SSE, CODE_FOR_xorv4sf3,  "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21285 
21286   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movss,  "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21287   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movhlps_exp,  "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21288   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movlhps_exp,  "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21289   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21290   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21291 
21292   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtpi2ps, "__builtin_ia32_cvtpi2ps", IX86_BUILTIN_CVTPI2PS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2SI },
21293   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtsi2ss, "__builtin_ia32_cvtsi2ss", IX86_BUILTIN_CVTSI2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_SI },
21294   { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtsi2ssq, "__builtin_ia32_cvtsi642ss", IX86_BUILTIN_CVTSI642SS, UNKNOWN, V4SF_FTYPE_V4SF_DI },
21295 
21296   { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtsf2, "__builtin_ia32_rsqrtf", IX86_BUILTIN_RSQRTF, UNKNOWN, (int) FLOAT_FTYPE_FLOAT },
21297 
21298   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsqrtv4sf2, "__builtin_ia32_sqrtss", IX86_BUILTIN_SQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
21299   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrsqrtv4sf2, "__builtin_ia32_rsqrtss", IX86_BUILTIN_RSQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
21300   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrcpv4sf2, "__builtin_ia32_rcpss", IX86_BUILTIN_RCPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
21301 
21302   /* SSE MMX or 3Dnow!A */
21303   { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21304   { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21305   { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21306 
21307   { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21308   { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21309   { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21310   { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21311 
21312   { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_psadbw, "__builtin_ia32_psadbw", IX86_BUILTIN_PSADBW, UNKNOWN, (int) V1DI_FTYPE_V8QI_V8QI },
21313   { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pmovmskb, "__builtin_ia32_pmovmskb", IX86_BUILTIN_PMOVMSKB, UNKNOWN, (int) INT_FTYPE_V8QI },
21314 
21315   { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pshufw, "__builtin_ia32_pshufw", IX86_BUILTIN_PSHUFW, UNKNOWN, (int) V4HI_FTYPE_V4HI_INT },
21316 
21317   /* SSE2 */
21318   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_shufpd, "__builtin_ia32_shufpd", IX86_BUILTIN_SHUFPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
21319 
21320   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movmskpd, "__builtin_ia32_movmskpd", IX86_BUILTIN_MOVMSKPD, UNKNOWN, (int) INT_FTYPE_V2DF  },
21321   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmovmskb, "__builtin_ia32_pmovmskb128", IX86_BUILTIN_PMOVMSKB128, UNKNOWN, (int) INT_FTYPE_V16QI },
21322   { OPTION_MASK_ISA_SSE2, CODE_FOR_sqrtv2df2, "__builtin_ia32_sqrtpd", IX86_BUILTIN_SQRTPD, UNKNOWN, (int) V2DF_FTYPE_V2DF },
21323   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2pd, "__builtin_ia32_cvtdq2pd", IX86_BUILTIN_CVTDQ2PD, UNKNOWN, (int) V2DF_FTYPE_V4SI },
21324   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2ps, "__builtin_ia32_cvtdq2ps", IX86_BUILTIN_CVTDQ2PS, UNKNOWN, (int) V4SF_FTYPE_V4SI },
21325 
21326   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2dq, "__builtin_ia32_cvtpd2dq", IX86_BUILTIN_CVTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
21327   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2pi, "__builtin_ia32_cvtpd2pi", IX86_BUILTIN_CVTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
21328   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2ps, "__builtin_ia32_cvtpd2ps", IX86_BUILTIN_CVTPD2PS, UNKNOWN, (int) V4SF_FTYPE_V2DF },
21329   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2dq, "__builtin_ia32_cvttpd2dq", IX86_BUILTIN_CVTTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
21330   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2pi, "__builtin_ia32_cvttpd2pi", IX86_BUILTIN_CVTTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
21331 
21332   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpi2pd, "__builtin_ia32_cvtpi2pd", IX86_BUILTIN_CVTPI2PD, UNKNOWN, (int) V2DF_FTYPE_V2SI },
21333 
21334   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2si, "__builtin_ia32_cvtsd2si", IX86_BUILTIN_CVTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
21335   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttsd2si, "__builtin_ia32_cvttsd2si", IX86_BUILTIN_CVTTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
21336   { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsd2siq, "__builtin_ia32_cvtsd2si64", IX86_BUILTIN_CVTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
21337   { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvttsd2siq, "__builtin_ia32_cvttsd2si64", IX86_BUILTIN_CVTTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
21338 
21339   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2dq, "__builtin_ia32_cvtps2dq", IX86_BUILTIN_CVTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
21340   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2pd, "__builtin_ia32_cvtps2pd", IX86_BUILTIN_CVTPS2PD, UNKNOWN, (int) V2DF_FTYPE_V4SF },
21341   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttps2dq, "__builtin_ia32_cvttps2dq", IX86_BUILTIN_CVTTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
21342 
21343   { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21344   { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21345   { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21346   { OPTION_MASK_ISA_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21347   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmaddv2df3,  "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21348   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsubv2df3,  "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21349   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmulv2df3,  "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21350   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmdivv2df3,  "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21351 
21352   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
21353   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
21354   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
21355   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
21356   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP},
21357   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
21358   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
21359   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
21360   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
21361   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
21362   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
21363   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
21364   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
21365   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
21366   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
21367   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
21368   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
21369   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
21370   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
21371   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
21372 
21373   { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21374   { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21375   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21376   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21377 
21378   { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21379   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2df3,  "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21380   { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21381   { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2df3,  "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21382 
21383   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movsd,  "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21384   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_unpckhpd_exp, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21385   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_unpcklpd_exp, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21386 
21387   { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_pack_sfix_v2df, "__builtin_ia32_vec_pack_sfix", IX86_BUILTIN_VEC_PACK_SFIX, UNKNOWN, (int) V4SI_FTYPE_V2DF_V2DF },
21388 
21389   { OPTION_MASK_ISA_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21390   { OPTION_MASK_ISA_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21391   { OPTION_MASK_ISA_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21392   { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21393   { OPTION_MASK_ISA_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21394   { OPTION_MASK_ISA_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21395   { OPTION_MASK_ISA_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21396   { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21397 
21398   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21399   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21400   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21401   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21402   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21403   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21404   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21405   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21406 
21407   { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21408   { OPTION_MASK_ISA_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, UNKNOWN,(int) V8HI_FTYPE_V8HI_V8HI },
21409 
21410   { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21411   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21412   { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21413   { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21414 
21415   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21416   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21417 
21418   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21419   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21420   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI  },
21421   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21422   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21423   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI  },
21424 
21425   { OPTION_MASK_ISA_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21426   { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21427   { OPTION_MASK_ISA_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21428   { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21429 
21430   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21431   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI  },
21432   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, UNKNOWN,  (int) V4SI_FTYPE_V4SI_V4SI },
21433   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhqdq, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21434   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21435   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21436   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21437   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpcklqdq, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21438 
21439   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
21440   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
21441   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
21442 
21443   { OPTION_MASK_ISA_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21444   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_psadbw, "__builtin_ia32_psadbw128", IX86_BUILTIN_PSADBW128, UNKNOWN, (int) V2DI_FTYPE_V16QI_V16QI },
21445 
21446   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv1siv1di3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, UNKNOWN, (int) V1DI_FTYPE_V2SI_V2SI },
21447   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv2siv2di3, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
21448 
21449   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmaddwd, "__builtin_ia32_pmaddwd128", IX86_BUILTIN_PMADDWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI_V8HI },
21450 
21451   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsi2sd, "__builtin_ia32_cvtsi2sd", IX86_BUILTIN_CVTSI2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_SI },
21452   { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsi2sdq, "__builtin_ia32_cvtsi642sd", IX86_BUILTIN_CVTSI642SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_DI },
21453   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2ss, "__builtin_ia32_cvtsd2ss", IX86_BUILTIN_CVTSD2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2DF },
21454   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtss2sd, "__builtin_ia32_cvtss2sd", IX86_BUILTIN_CVTSS2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V4SF },
21455 
21456   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ashlti3, "__builtin_ia32_pslldqi128", IX86_BUILTIN_PSLLDQI128, UNKNOWN, (int) V2DI2TI_FTYPE_V2DI_INT },
21457   { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllwi128", IX86_BUILTIN_PSLLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
21458   { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslldi128", IX86_BUILTIN_PSLLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
21459   { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllqi128", IX86_BUILTIN_PSLLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
21460   { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllw128", IX86_BUILTIN_PSLLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
21461   { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslld128", IX86_BUILTIN_PSLLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
21462   { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllq128", IX86_BUILTIN_PSLLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
21463 
21464   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lshrti3, "__builtin_ia32_psrldqi128", IX86_BUILTIN_PSRLDQI128, UNKNOWN, (int) V2DI2TI_FTYPE_V2DI_INT },
21465   { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlwi128", IX86_BUILTIN_PSRLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
21466   { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrldi128", IX86_BUILTIN_PSRLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
21467   { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlqi128", IX86_BUILTIN_PSRLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
21468   { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlw128", IX86_BUILTIN_PSRLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
21469   { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrld128", IX86_BUILTIN_PSRLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
21470   { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlq128", IX86_BUILTIN_PSRLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
21471 
21472   { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psrawi128", IX86_BUILTIN_PSRAWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
21473   { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psradi128", IX86_BUILTIN_PSRADI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
21474   { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psraw128", IX86_BUILTIN_PSRAW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
21475   { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psrad128", IX86_BUILTIN_PSRAD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
21476 
21477   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufd, "__builtin_ia32_pshufd", IX86_BUILTIN_PSHUFD, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT },
21478   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshuflw, "__builtin_ia32_pshuflw", IX86_BUILTIN_PSHUFLW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
21479   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufhw, "__builtin_ia32_pshufhw", IX86_BUILTIN_PSHUFHW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
21480 
21481   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsqrtv2df2, "__builtin_ia32_sqrtsd", IX86_BUILTIN_SQRTSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_VEC_MERGE },
21482 
21483   { OPTION_MASK_ISA_SSE2, CODE_FOR_abstf2, 0, IX86_BUILTIN_FABSQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128 },
21484   { OPTION_MASK_ISA_SSE2, CODE_FOR_copysigntf3, 0, IX86_BUILTIN_COPYSIGNQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128_FLOAT128 },
21485 
21486   { OPTION_MASK_ISA_SSE, CODE_FOR_sse2_movq128, "__builtin_ia32_movq128", IX86_BUILTIN_MOVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
21487 
21488   /* SSE2 MMX */
21489   { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_addv1di3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
21490   { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_subv1di3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
21491 
21492   /* SSE3 */
21493   { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movshdup, "__builtin_ia32_movshdup", IX86_BUILTIN_MOVSHDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF},
21494   { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movsldup, "__builtin_ia32_movsldup", IX86_BUILTIN_MOVSLDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF },
21495 
21496   { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21497   { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21498   { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21499   { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21500   { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21501   { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21502 
21503   /* SSSE3 */
21504   { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv16qi2, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI },
21505   { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, UNKNOWN, (int) V8QI_FTYPE_V8QI },
21506   { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8hi2, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
21507   { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, UNKNOWN, (int) V4HI_FTYPE_V4HI },
21508   { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4si2, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI },
21509   { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, UNKNOWN, (int) V2SI_FTYPE_V2SI },
21510 
21511   { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv8hi3, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21512   { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv4hi3, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21513   { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv4si3, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21514   { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv2si3, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21515   { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv8hi3, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21516   { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv4hi3, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21517   { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv8hi3, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21518   { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv4hi3, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21519   { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv4si3, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21520   { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv2si3, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21521   { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv8hi3, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21522   { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv4hi3, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21523   { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw128, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128, UNKNOWN, (int) V8HI_FTYPE_V16QI_V16QI },
21524   { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW, UNKNOWN, (int) V4HI_FTYPE_V8QI_V8QI },
21525   { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv8hi3, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21526   { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv4hi3, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21527   { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv16qi3, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21528   { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv8qi3, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21529   { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv16qi3, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21530   { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8qi3, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21531   { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8hi3, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21532   { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4hi3, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21533   { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4si3, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21534   { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv2si3, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21535 
21536   /* SSSE3.  */
21537   { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrti, "__builtin_ia32_palignr128", IX86_BUILTIN_PALIGNR128, UNKNOWN, (int) V2DI2TI_FTYPE_V2DI_V2DI_INT },
21538   { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrdi, "__builtin_ia32_palignr", IX86_BUILTIN_PALIGNR, UNKNOWN, (int) V1DI2DI_FTYPE_V1DI_V1DI_INT },
21539 
21540   /* SSE4.1 */
21541   { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendpd, "__builtin_ia32_blendpd", IX86_BUILTIN_BLENDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
21542   { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendps, "__builtin_ia32_blendps", IX86_BUILTIN_BLENDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
21543   { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvpd, "__builtin_ia32_blendvpd", IX86_BUILTIN_BLENDVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF },
21544   { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvps, "__builtin_ia32_blendvps", IX86_BUILTIN_BLENDVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF },
21545   { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dppd, "__builtin_ia32_dppd", IX86_BUILTIN_DPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
21546   { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dpps, "__builtin_ia32_dpps", IX86_BUILTIN_DPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
21547   { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_insertps, "__builtin_ia32_insertps128", IX86_BUILTIN_INSERTPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
21548   { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mpsadbw, "__builtin_ia32_mpsadbw128", IX86_BUILTIN_MPSADBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_INT },
21549   { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendvb, "__builtin_ia32_pblendvb128", IX86_BUILTIN_PBLENDVB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI },
21550   { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendw, "__builtin_ia32_pblendw128", IX86_BUILTIN_PBLENDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_INT },
21551 
21552   { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv8qiv8hi2, "__builtin_ia32_pmovsxbw128", IX86_BUILTIN_PMOVSXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
21553   { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv4qiv4si2, "__builtin_ia32_pmovsxbd128", IX86_BUILTIN_PMOVSXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
21554   { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv2qiv2di2, "__builtin_ia32_pmovsxbq128", IX86_BUILTIN_PMOVSXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
21555   { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv4hiv4si2, "__builtin_ia32_pmovsxwd128", IX86_BUILTIN_PMOVSXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
21556   { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv2hiv2di2, "__builtin_ia32_pmovsxwq128", IX86_BUILTIN_PMOVSXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
21557   { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv2siv2di2, "__builtin_ia32_pmovsxdq128", IX86_BUILTIN_PMOVSXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
21558   { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv8qiv8hi2, "__builtin_ia32_pmovzxbw128", IX86_BUILTIN_PMOVZXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
21559   { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4qiv4si2, "__builtin_ia32_pmovzxbd128", IX86_BUILTIN_PMOVZXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
21560   { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2qiv2di2, "__builtin_ia32_pmovzxbq128", IX86_BUILTIN_PMOVZXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
21561   { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4hiv4si2, "__builtin_ia32_pmovzxwd128", IX86_BUILTIN_PMOVZXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
21562   { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2hiv2di2, "__builtin_ia32_pmovzxwq128", IX86_BUILTIN_PMOVZXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
21563   { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2siv2di2, "__builtin_ia32_pmovzxdq128", IX86_BUILTIN_PMOVZXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
21564   { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_phminposuw, "__builtin_ia32_phminposuw128", IX86_BUILTIN_PHMINPOSUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
21565 
21566   { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_packusdw, "__builtin_ia32_packusdw128", IX86_BUILTIN_PACKUSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
21567   { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_eqv2di3, "__builtin_ia32_pcmpeqq", IX86_BUILTIN_PCMPEQQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21568   { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv16qi3, "__builtin_ia32_pmaxsb128", IX86_BUILTIN_PMAXSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21569   { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv4si3, "__builtin_ia32_pmaxsd128", IX86_BUILTIN_PMAXSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21570   { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv4si3, "__builtin_ia32_pmaxud128", IX86_BUILTIN_PMAXUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21571   { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv8hi3, "__builtin_ia32_pmaxuw128", IX86_BUILTIN_PMAXUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21572   { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv16qi3, "__builtin_ia32_pminsb128", IX86_BUILTIN_PMINSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21573   { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv4si3, "__builtin_ia32_pminsd128", IX86_BUILTIN_PMINSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21574   { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv4si3, "__builtin_ia32_pminud128", IX86_BUILTIN_PMINUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21575   { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv8hi3, "__builtin_ia32_pminuw128", IX86_BUILTIN_PMINUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21576   { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mulv2siv2di3, "__builtin_ia32_pmuldq128", IX86_BUILTIN_PMULDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
21577   { OPTION_MASK_ISA_SSE4_1, CODE_FOR_mulv4si3, "__builtin_ia32_pmulld128", IX86_BUILTIN_PMULLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21578 
21579   /* SSE4.1 and SSE5 */
21580   { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_roundpd", IX86_BUILTIN_ROUNDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
21581   { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_roundps", IX86_BUILTIN_ROUNDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
21582   { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundsd, "__builtin_ia32_roundsd", IX86_BUILTIN_ROUNDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
21583   { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundss, "__builtin_ia32_roundss", IX86_BUILTIN_ROUNDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
21584 
21585   { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestz128", IX86_BUILTIN_PTESTZ, EQ, (int) INT_FTYPE_V2DI_V2DI_PTEST },
21586   { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestc128", IX86_BUILTIN_PTESTC, LTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
21587   { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestnzc128", IX86_BUILTIN_PTESTNZC, GTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
21588 
21589   /* SSE4.2 */
21590   { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_gtv2di3, "__builtin_ia32_pcmpgtq", IX86_BUILTIN_PCMPGTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21591   { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_crc32qi, "__builtin_ia32_crc32qi", IX86_BUILTIN_CRC32QI, UNKNOWN, (int) UINT_FTYPE_UINT_UCHAR },
21592   { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_crc32hi, "__builtin_ia32_crc32hi", IX86_BUILTIN_CRC32HI, UNKNOWN, (int) UINT_FTYPE_UINT_USHORT },
21593   { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_crc32si, "__builtin_ia32_crc32si", IX86_BUILTIN_CRC32SI, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
21594   { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse4_2_crc32di, "__builtin_ia32_crc32di", IX86_BUILTIN_CRC32DI, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
21595 
21596   /* SSE4A */
21597   { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrqi, "__builtin_ia32_extrqi", IX86_BUILTIN_EXTRQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_UINT_UINT },
21598   { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrq, "__builtin_ia32_extrq", IX86_BUILTIN_EXTRQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V16QI },
21599   { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertqi, "__builtin_ia32_insertqi", IX86_BUILTIN_INSERTQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_UINT_UINT },
21600   { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertq, "__builtin_ia32_insertq", IX86_BUILTIN_INSERTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21601 
21602   /* AES */
21603   { OPTION_MASK_ISA_SSE2, CODE_FOR_aeskeygenassist, 0, IX86_BUILTIN_AESKEYGENASSIST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT },
21604   { OPTION_MASK_ISA_SSE2, CODE_FOR_aesimc, 0, IX86_BUILTIN_AESIMC128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
21605 
21606   { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenc, 0, IX86_BUILTIN_AESENC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21607   { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenclast, 0, IX86_BUILTIN_AESENCLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21608   { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdec, 0, IX86_BUILTIN_AESDEC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21609   { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdeclast, 0, IX86_BUILTIN_AESDECLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21610 
21611   /* PCLMUL */
21612   { OPTION_MASK_ISA_SSE2, CODE_FOR_pclmulqdq, 0, IX86_BUILTIN_PCLMULQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT },
21613 
21614   /* AVX */
21615   { OPTION_MASK_ISA_AVX, CODE_FOR_addv4df3, "__builtin_ia32_addpd256", IX86_BUILTIN_ADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21616   { OPTION_MASK_ISA_AVX, CODE_FOR_addv8sf3, "__builtin_ia32_addps256", IX86_BUILTIN_ADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21617   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv4df3, "__builtin_ia32_addsubpd256", IX86_BUILTIN_ADDSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21618   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv8sf3, "__builtin_ia32_addsubps256", IX86_BUILTIN_ADDSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21619   { OPTION_MASK_ISA_AVX, CODE_FOR_andv4df3, "__builtin_ia32_andpd256", IX86_BUILTIN_ANDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21620   { OPTION_MASK_ISA_AVX, CODE_FOR_andv8sf3, "__builtin_ia32_andps256", IX86_BUILTIN_ANDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21621   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv4df3, "__builtin_ia32_andnpd256", IX86_BUILTIN_ANDNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21622   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv8sf3, "__builtin_ia32_andnps256", IX86_BUILTIN_ANDNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21623   { OPTION_MASK_ISA_AVX, CODE_FOR_divv4df3, "__builtin_ia32_divpd256", IX86_BUILTIN_DIVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21624   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_divv8sf3, "__builtin_ia32_divps256", IX86_BUILTIN_DIVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21625   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv4df3, "__builtin_ia32_haddpd256", IX86_BUILTIN_HADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21626   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv8sf3, "__builtin_ia32_hsubps256", IX86_BUILTIN_HSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21627   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv4df3, "__builtin_ia32_hsubpd256", IX86_BUILTIN_HSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21628   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv8sf3, "__builtin_ia32_haddps256", IX86_BUILTIN_HADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21629   { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv4df3, "__builtin_ia32_maxpd256", IX86_BUILTIN_MAXPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21630   { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv8sf3, "__builtin_ia32_maxps256", IX86_BUILTIN_MAXPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21631   { OPTION_MASK_ISA_AVX, CODE_FOR_sminv4df3, "__builtin_ia32_minpd256", IX86_BUILTIN_MINPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21632   { OPTION_MASK_ISA_AVX, CODE_FOR_sminv8sf3, "__builtin_ia32_minps256", IX86_BUILTIN_MINPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21633   { OPTION_MASK_ISA_AVX, CODE_FOR_mulv4df3, "__builtin_ia32_mulpd256", IX86_BUILTIN_MULPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21634   { OPTION_MASK_ISA_AVX, CODE_FOR_mulv8sf3, "__builtin_ia32_mulps256", IX86_BUILTIN_MULPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21635   { OPTION_MASK_ISA_AVX, CODE_FOR_iorv4df3, "__builtin_ia32_orpd256", IX86_BUILTIN_ORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21636   { OPTION_MASK_ISA_AVX, CODE_FOR_iorv8sf3, "__builtin_ia32_orps256", IX86_BUILTIN_ORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21637   { OPTION_MASK_ISA_AVX, CODE_FOR_subv4df3, "__builtin_ia32_subpd256", IX86_BUILTIN_SUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21638   { OPTION_MASK_ISA_AVX, CODE_FOR_subv8sf3, "__builtin_ia32_subps256", IX86_BUILTIN_SUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21639   { OPTION_MASK_ISA_AVX, CODE_FOR_xorv4df3, "__builtin_ia32_xorpd256", IX86_BUILTIN_XORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21640   { OPTION_MASK_ISA_AVX, CODE_FOR_xorv8sf3, "__builtin_ia32_xorps256", IX86_BUILTIN_XORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21641 
21642   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv2df3, "__builtin_ia32_vpermilvarpd", IX86_BUILTIN_VPERMILVARPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DI },
21643   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4sf3, "__builtin_ia32_vpermilvarps", IX86_BUILTIN_VPERMILVARPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SI },
21644   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4df3, "__builtin_ia32_vpermilvarpd256", IX86_BUILTIN_VPERMILVARPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI },
21645   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv8sf3, "__builtin_ia32_vpermilvarps256", IX86_BUILTIN_VPERMILVARPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI },
21646 
21647   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendpd256, "__builtin_ia32_blendpd256", IX86_BUILTIN_BLENDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
21648   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendps256, "__builtin_ia32_blendps256", IX86_BUILTIN_BLENDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
21649   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvpd256, "__builtin_ia32_blendvpd256", IX86_BUILTIN_BLENDVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF },
21650   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvps256, "__builtin_ia32_blendvps256", IX86_BUILTIN_BLENDVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF },
21651   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_dpps256, "__builtin_ia32_dpps256", IX86_BUILTIN_DPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
21652   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufpd256, "__builtin_ia32_shufpd256", IX86_BUILTIN_SHUFPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
21653   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufps256, "__builtin_ia32_shufps256", IX86_BUILTIN_SHUFPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
21654   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpsdv2df3, "__builtin_ia32_cmpsd", IX86_BUILTIN_CMPSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
21655   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpssv4sf3, "__builtin_ia32_cmpss", IX86_BUILTIN_CMPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
21656   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmppdv2df3, "__builtin_ia32_cmppd", IX86_BUILTIN_CMPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
21657   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmppsv4sf3, "__builtin_ia32_cmpps", IX86_BUILTIN_CMPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
21658   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmppdv4df3, "__builtin_ia32_cmppd256", IX86_BUILTIN_CMPPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
21659   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmppsv8sf3, "__builtin_ia32_cmpps256", IX86_BUILTIN_CMPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
21660   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v4df, "__builtin_ia32_vextractf128_pd256", IX86_BUILTIN_EXTRACTF128PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF_INT },
21661   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8sf, "__builtin_ia32_vextractf128_ps256", IX86_BUILTIN_EXTRACTF128PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF_INT },
21662   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8si, "__builtin_ia32_vextractf128_si256", IX86_BUILTIN_EXTRACTF128SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI_INT },
21663   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtdq2pd256, "__builtin_ia32_cvtdq2pd256", IX86_BUILTIN_CVTDQ2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SI },
21664   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtdq2ps256, "__builtin_ia32_cvtdq2ps256", IX86_BUILTIN_CVTDQ2PS256, UNKNOWN, (int) V8SF_FTYPE_V8SI },
21665   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2ps256, "__builtin_ia32_cvtpd2ps256", IX86_BUILTIN_CVTPD2PS256, UNKNOWN, (int) V4SF_FTYPE_V4DF },
21666   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtps2dq256, "__builtin_ia32_cvtps2dq256", IX86_BUILTIN_CVTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
21667   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtps2pd256, "__builtin_ia32_cvtps2pd256", IX86_BUILTIN_CVTPS2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SF },
21668   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvttpd2dq256, "__builtin_ia32_cvttpd2dq256", IX86_BUILTIN_CVTTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
21669   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2dq256, "__builtin_ia32_cvtpd2dq256", IX86_BUILTIN_CVTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
21670   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvttps2dq256, "__builtin_ia32_cvttps2dq256", IX86_BUILTIN_CVTTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
21671   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v4df3, "__builtin_ia32_vperm2f128_pd256", IX86_BUILTIN_VPERM2F128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
21672   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8sf3, "__builtin_ia32_vperm2f128_ps256", IX86_BUILTIN_VPERM2F128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
21673   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8si3, "__builtin_ia32_vperm2f128_si256", IX86_BUILTIN_VPERM2F128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT },
21674   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv2df, "__builtin_ia32_vpermilpd", IX86_BUILTIN_VPERMILPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
21675   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4sf, "__builtin_ia32_vpermilps", IX86_BUILTIN_VPERMILPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
21676   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4df, "__builtin_ia32_vpermilpd256", IX86_BUILTIN_VPERMILPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
21677   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv8sf, "__builtin_ia32_vpermilps256", IX86_BUILTIN_VPERMILPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
21678   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v4df, "__builtin_ia32_vinsertf128_pd256", IX86_BUILTIN_VINSERTF128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V2DF_INT },
21679   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8sf, "__builtin_ia32_vinsertf128_ps256", IX86_BUILTIN_VINSERTF128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V4SF_INT },
21680   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8si, "__builtin_ia32_vinsertf128_si256", IX86_BUILTIN_VINSERTF128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_INT },
21681 
21682   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movshdup256, "__builtin_ia32_movshdup256", IX86_BUILTIN_MOVSHDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
21683   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movsldup256, "__builtin_ia32_movsldup256", IX86_BUILTIN_MOVSLDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
21684   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movddup256, "__builtin_ia32_movddup256", IX86_BUILTIN_MOVDDUP256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
21685 
21686   { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv4df2, "__builtin_ia32_sqrtpd256", IX86_BUILTIN_SQRTPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
21687   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_sqrtv8sf2, "__builtin_ia32_sqrtps256", IX86_BUILTIN_SQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
21688   { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv8sf2, "__builtin_ia32_sqrtps_nr256", IX86_BUILTIN_SQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
21689   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rsqrtv8sf2, "__builtin_ia32_rsqrtps256", IX86_BUILTIN_RSQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
21690   { OPTION_MASK_ISA_AVX, CODE_FOR_rsqrtv8sf2, "__builtin_ia32_rsqrtps_nr256", IX86_BUILTIN_RSQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
21691 
21692   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rcpv8sf2, "__builtin_ia32_rcpps256", IX86_BUILTIN_RCPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
21693 
21694   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_roundpd256", IX86_BUILTIN_ROUNDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
21695   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_roundps256", IX86_BUILTIN_ROUNDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
21696 
21697   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhpd256,  "__builtin_ia32_unpckhpd256", IX86_BUILTIN_UNPCKHPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21698   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklpd256,  "__builtin_ia32_unpcklpd256", IX86_BUILTIN_UNPCKLPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21699   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhps256,  "__builtin_ia32_unpckhps256", IX86_BUILTIN_UNPCKHPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21700   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklps256,  "__builtin_ia32_unpcklps256", IX86_BUILTIN_UNPCKLPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21701 
21702   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_si256_si, "__builtin_ia32_si256_si", IX86_BUILTIN_SI256_SI, UNKNOWN, (int) V8SI_FTYPE_V4SI },
21703   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ps256_ps, "__builtin_ia32_ps256_ps", IX86_BUILTIN_PS256_PS, UNKNOWN, (int) V8SF_FTYPE_V4SF },
21704   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_pd256_pd, "__builtin_ia32_pd256_pd", IX86_BUILTIN_PD256_PD, UNKNOWN, (int) V4DF_FTYPE_V2DF },
21705   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_si_si256, "__builtin_ia32_si_si256", IX86_BUILTIN_SI_SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI },
21706   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ps_ps256, "__builtin_ia32_ps_ps256", IX86_BUILTIN_PS_PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF },
21707   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_pd_pd256, "__builtin_ia32_pd_pd256", IX86_BUILTIN_PD_PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF },
21708 
21709   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestzpd", IX86_BUILTIN_VTESTZPD, EQ, (int) INT_FTYPE_V2DF_V2DF_PTEST },
21710   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestcpd", IX86_BUILTIN_VTESTCPD, LTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
21711   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestnzcpd", IX86_BUILTIN_VTESTNZCPD, GTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
21712   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestzps", IX86_BUILTIN_VTESTZPS, EQ, (int) INT_FTYPE_V4SF_V4SF_PTEST },
21713   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestcps", IX86_BUILTIN_VTESTCPS, LTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
21714   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestnzcps", IX86_BUILTIN_VTESTNZCPS, GTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
21715   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestzpd256", IX86_BUILTIN_VTESTZPD256, EQ, (int) INT_FTYPE_V4DF_V4DF_PTEST },
21716   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestcpd256", IX86_BUILTIN_VTESTCPD256, LTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
21717   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestnzcpd256", IX86_BUILTIN_VTESTNZCPD256, GTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
21718   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestzps256", IX86_BUILTIN_VTESTZPS256, EQ, (int) INT_FTYPE_V8SF_V8SF_PTEST },
21719   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestcps256", IX86_BUILTIN_VTESTCPS256, LTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
21720   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestnzcps256", IX86_BUILTIN_VTESTNZCPS256, GTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
21721   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestz256", IX86_BUILTIN_PTESTZ256, EQ, (int) INT_FTYPE_V4DI_V4DI_PTEST },
21722   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestc256", IX86_BUILTIN_PTESTC256, LTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
21723   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestnzc256", IX86_BUILTIN_PTESTNZC256, GTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
21724 
21725   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskpd256, "__builtin_ia32_movmskpd256", IX86_BUILTIN_MOVMSKPD256, UNKNOWN, (int) INT_FTYPE_V4DF  },
21726   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskps256, "__builtin_ia32_movmskps256", IX86_BUILTIN_MOVMSKPS256, UNKNOWN, (int) INT_FTYPE_V8SF },
21727 };
21728 
21729 /* SSE5 */
21730 enum multi_arg_type {
21731   MULTI_ARG_UNKNOWN,
21732   MULTI_ARG_3_SF,
21733   MULTI_ARG_3_DF,
21734   MULTI_ARG_3_DI,
21735   MULTI_ARG_3_SI,
21736   MULTI_ARG_3_SI_DI,
21737   MULTI_ARG_3_HI,
21738   MULTI_ARG_3_HI_SI,
21739   MULTI_ARG_3_QI,
21740   MULTI_ARG_3_PERMPS,
21741   MULTI_ARG_3_PERMPD,
21742   MULTI_ARG_2_SF,
21743   MULTI_ARG_2_DF,
21744   MULTI_ARG_2_DI,
21745   MULTI_ARG_2_SI,
21746   MULTI_ARG_2_HI,
21747   MULTI_ARG_2_QI,
21748   MULTI_ARG_2_DI_IMM,
21749   MULTI_ARG_2_SI_IMM,
21750   MULTI_ARG_2_HI_IMM,
21751   MULTI_ARG_2_QI_IMM,
21752   MULTI_ARG_2_SF_CMP,
21753   MULTI_ARG_2_DF_CMP,
21754   MULTI_ARG_2_DI_CMP,
21755   MULTI_ARG_2_SI_CMP,
21756   MULTI_ARG_2_HI_CMP,
21757   MULTI_ARG_2_QI_CMP,
21758   MULTI_ARG_2_DI_TF,
21759   MULTI_ARG_2_SI_TF,
21760   MULTI_ARG_2_HI_TF,
21761   MULTI_ARG_2_QI_TF,
21762   MULTI_ARG_2_SF_TF,
21763   MULTI_ARG_2_DF_TF,
21764   MULTI_ARG_1_SF,
21765   MULTI_ARG_1_DF,
21766   MULTI_ARG_1_DI,
21767   MULTI_ARG_1_SI,
21768   MULTI_ARG_1_HI,
21769   MULTI_ARG_1_QI,
21770   MULTI_ARG_1_SI_DI,
21771   MULTI_ARG_1_HI_DI,
21772   MULTI_ARG_1_HI_SI,
21773   MULTI_ARG_1_QI_DI,
21774   MULTI_ARG_1_QI_SI,
21775   MULTI_ARG_1_QI_HI,
21776   MULTI_ARG_1_PH2PS,
21777   MULTI_ARG_1_PS2PH
21778 };
21779 
21780 static const struct builtin_description bdesc_multi_arg[] =
21781 {
21782   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfmaddv4sf4,     "__builtin_ia32_fmaddss",    IX86_BUILTIN_FMADDSS,    0,            (int)MULTI_ARG_3_SF },
21783   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfmaddv2df4,     "__builtin_ia32_fmaddsd",    IX86_BUILTIN_FMADDSD,    0,            (int)MULTI_ARG_3_DF },
21784   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fmaddv4sf4,       "__builtin_ia32_fmaddps",    IX86_BUILTIN_FMADDPS,    0,            (int)MULTI_ARG_3_SF },
21785   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fmaddv2df4,       "__builtin_ia32_fmaddpd",    IX86_BUILTIN_FMADDPD,    0,            (int)MULTI_ARG_3_DF },
21786   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfmsubv4sf4,     "__builtin_ia32_fmsubss",    IX86_BUILTIN_FMSUBSS,    0,            (int)MULTI_ARG_3_SF },
21787   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfmsubv2df4,     "__builtin_ia32_fmsubsd",    IX86_BUILTIN_FMSUBSD,    0,            (int)MULTI_ARG_3_DF },
21788   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fmsubv4sf4,       "__builtin_ia32_fmsubps",    IX86_BUILTIN_FMSUBPS,    0,            (int)MULTI_ARG_3_SF },
21789   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fmsubv2df4,       "__builtin_ia32_fmsubpd",    IX86_BUILTIN_FMSUBPD,    0,            (int)MULTI_ARG_3_DF },
21790   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfnmaddv4sf4,    "__builtin_ia32_fnmaddss",   IX86_BUILTIN_FNMADDSS,   0,            (int)MULTI_ARG_3_SF },
21791   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfnmaddv2df4,    "__builtin_ia32_fnmaddsd",   IX86_BUILTIN_FNMADDSD,   0,            (int)MULTI_ARG_3_DF },
21792   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fnmaddv4sf4,      "__builtin_ia32_fnmaddps",   IX86_BUILTIN_FNMADDPS,   0,            (int)MULTI_ARG_3_SF },
21793   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fnmaddv2df4,      "__builtin_ia32_fnmaddpd",   IX86_BUILTIN_FNMADDPD,   0,            (int)MULTI_ARG_3_DF },
21794   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfnmsubv4sf4,    "__builtin_ia32_fnmsubss",   IX86_BUILTIN_FNMSUBSS,   0,            (int)MULTI_ARG_3_SF },
21795   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfnmsubv2df4,    "__builtin_ia32_fnmsubsd",   IX86_BUILTIN_FNMSUBSD,   0,            (int)MULTI_ARG_3_DF },
21796   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fnmsubv4sf4,      "__builtin_ia32_fnmsubps",   IX86_BUILTIN_FNMSUBPS,   0,            (int)MULTI_ARG_3_SF },
21797   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fnmsubv2df4,      "__builtin_ia32_fnmsubpd",   IX86_BUILTIN_FNMSUBPD,   0,            (int)MULTI_ARG_3_DF },
21798   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v2di,        "__builtin_ia32_pcmov",      IX86_BUILTIN_PCMOV,      0,            (int)MULTI_ARG_3_DI },
21799   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v2di,        "__builtin_ia32_pcmov_v2di", IX86_BUILTIN_PCMOV_V2DI, 0,            (int)MULTI_ARG_3_DI },
21800   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v4si,        "__builtin_ia32_pcmov_v4si", IX86_BUILTIN_PCMOV_V4SI, 0,            (int)MULTI_ARG_3_SI },
21801   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v8hi,        "__builtin_ia32_pcmov_v8hi", IX86_BUILTIN_PCMOV_V8HI, 0,            (int)MULTI_ARG_3_HI },
21802   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v16qi,       "__builtin_ia32_pcmov_v16qi",IX86_BUILTIN_PCMOV_V16QI,0,            (int)MULTI_ARG_3_QI },
21803   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v2df,        "__builtin_ia32_pcmov_v2df", IX86_BUILTIN_PCMOV_V2DF, 0,            (int)MULTI_ARG_3_DF },
21804   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v4sf,        "__builtin_ia32_pcmov_v4sf", IX86_BUILTIN_PCMOV_V4SF, 0,            (int)MULTI_ARG_3_SF },
21805   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pperm,             "__builtin_ia32_pperm",      IX86_BUILTIN_PPERM,      0,            (int)MULTI_ARG_3_QI },
21806   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_permv4sf,          "__builtin_ia32_permps",     IX86_BUILTIN_PERMPS,     0,            (int)MULTI_ARG_3_PERMPS },
21807   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_permv2df,          "__builtin_ia32_permpd",     IX86_BUILTIN_PERMPD,     0,            (int)MULTI_ARG_3_PERMPD },
21808   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacssww,          "__builtin_ia32_pmacssww",   IX86_BUILTIN_PMACSSWW,   0,            (int)MULTI_ARG_3_HI },
21809   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsww,           "__builtin_ia32_pmacsww",    IX86_BUILTIN_PMACSWW,    0,            (int)MULTI_ARG_3_HI },
21810   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsswd,          "__builtin_ia32_pmacsswd",   IX86_BUILTIN_PMACSSWD,   0,            (int)MULTI_ARG_3_HI_SI },
21811   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacswd,           "__builtin_ia32_pmacswd",    IX86_BUILTIN_PMACSWD,    0,            (int)MULTI_ARG_3_HI_SI },
21812   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacssdd,          "__builtin_ia32_pmacssdd",   IX86_BUILTIN_PMACSSDD,   0,            (int)MULTI_ARG_3_SI },
21813   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsdd,           "__builtin_ia32_pmacsdd",    IX86_BUILTIN_PMACSDD,    0,            (int)MULTI_ARG_3_SI },
21814   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacssdql,         "__builtin_ia32_pmacssdql",  IX86_BUILTIN_PMACSSDQL,  0,            (int)MULTI_ARG_3_SI_DI },
21815   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacssdqh,         "__builtin_ia32_pmacssdqh",  IX86_BUILTIN_PMACSSDQH,  0,            (int)MULTI_ARG_3_SI_DI },
21816   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsdql,          "__builtin_ia32_pmacsdql",   IX86_BUILTIN_PMACSDQL,   0,            (int)MULTI_ARG_3_SI_DI },
21817   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsdqh,          "__builtin_ia32_pmacsdqh",   IX86_BUILTIN_PMACSDQH,   0,            (int)MULTI_ARG_3_SI_DI },
21818   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmadcsswd,         "__builtin_ia32_pmadcsswd",  IX86_BUILTIN_PMADCSSWD,  0,            (int)MULTI_ARG_3_HI_SI },
21819   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmadcswd,          "__builtin_ia32_pmadcswd",   IX86_BUILTIN_PMADCSWD,   0,            (int)MULTI_ARG_3_HI_SI },
21820   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vrotlv2di3,        "__builtin_ia32_protq",      IX86_BUILTIN_PROTQ,      0,            (int)MULTI_ARG_2_DI },
21821   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vrotlv4si3,        "__builtin_ia32_protd",      IX86_BUILTIN_PROTD,      0,            (int)MULTI_ARG_2_SI },
21822   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vrotlv8hi3,        "__builtin_ia32_protw",      IX86_BUILTIN_PROTW,      0,            (int)MULTI_ARG_2_HI },
21823   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vrotlv16qi3,       "__builtin_ia32_protb",      IX86_BUILTIN_PROTB,      0,            (int)MULTI_ARG_2_QI },
21824   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_rotlv2di3,         "__builtin_ia32_protqi",     IX86_BUILTIN_PROTQ_IMM,  0,            (int)MULTI_ARG_2_DI_IMM },
21825   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_rotlv4si3,         "__builtin_ia32_protdi",     IX86_BUILTIN_PROTD_IMM,  0,            (int)MULTI_ARG_2_SI_IMM },
21826   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_rotlv8hi3,         "__builtin_ia32_protwi",     IX86_BUILTIN_PROTW_IMM,  0,            (int)MULTI_ARG_2_HI_IMM },
21827   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_rotlv16qi3,        "__builtin_ia32_protbi",     IX86_BUILTIN_PROTB_IMM,  0,            (int)MULTI_ARG_2_QI_IMM },
21828   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_ashlv2di3,         "__builtin_ia32_pshaq",      IX86_BUILTIN_PSHAQ,      0,            (int)MULTI_ARG_2_DI },
21829   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_ashlv4si3,         "__builtin_ia32_pshad",      IX86_BUILTIN_PSHAD,      0,            (int)MULTI_ARG_2_SI },
21830   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_ashlv8hi3,         "__builtin_ia32_pshaw",      IX86_BUILTIN_PSHAW,      0,            (int)MULTI_ARG_2_HI },
21831   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_ashlv16qi3,        "__builtin_ia32_pshab",      IX86_BUILTIN_PSHAB,      0,            (int)MULTI_ARG_2_QI },
21832   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_lshlv2di3,         "__builtin_ia32_pshlq",      IX86_BUILTIN_PSHLQ,      0,            (int)MULTI_ARG_2_DI },
21833   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_lshlv4si3,         "__builtin_ia32_pshld",      IX86_BUILTIN_PSHLD,      0,            (int)MULTI_ARG_2_SI },
21834   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_lshlv8hi3,         "__builtin_ia32_pshlw",      IX86_BUILTIN_PSHLW,      0,            (int)MULTI_ARG_2_HI },
21835   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_lshlv16qi3,        "__builtin_ia32_pshlb",      IX86_BUILTIN_PSHLB,      0,            (int)MULTI_ARG_2_QI },
21836   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmfrczv4sf2,       "__builtin_ia32_frczss",     IX86_BUILTIN_FRCZSS,     0,            (int)MULTI_ARG_2_SF },
21837   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmfrczv2df2,       "__builtin_ia32_frczsd",     IX86_BUILTIN_FRCZSD,     0,            (int)MULTI_ARG_2_DF },
21838   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_frczv4sf2,         "__builtin_ia32_frczps",     IX86_BUILTIN_FRCZPS,     0,            (int)MULTI_ARG_1_SF },
21839   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_frczv2df2,         "__builtin_ia32_frczpd",     IX86_BUILTIN_FRCZPD,     0,            (int)MULTI_ARG_1_DF },
21840   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_cvtph2ps,          "__builtin_ia32_cvtph2ps",   IX86_BUILTIN_CVTPH2PS,   0,            (int)MULTI_ARG_1_PH2PS },
21841   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_cvtps2ph,          "__builtin_ia32_cvtps2ph",   IX86_BUILTIN_CVTPS2PH,   0,            (int)MULTI_ARG_1_PS2PH },
21842   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddbw,           "__builtin_ia32_phaddbw",    IX86_BUILTIN_PHADDBW,    0,            (int)MULTI_ARG_1_QI_HI },
21843   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddbd,           "__builtin_ia32_phaddbd",    IX86_BUILTIN_PHADDBD,    0,            (int)MULTI_ARG_1_QI_SI },
21844   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddbq,           "__builtin_ia32_phaddbq",    IX86_BUILTIN_PHADDBQ,    0,            (int)MULTI_ARG_1_QI_DI },
21845   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddwd,           "__builtin_ia32_phaddwd",    IX86_BUILTIN_PHADDWD,    0,            (int)MULTI_ARG_1_HI_SI },
21846   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddwq,           "__builtin_ia32_phaddwq",    IX86_BUILTIN_PHADDWQ,    0,            (int)MULTI_ARG_1_HI_DI },
21847   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phadddq,           "__builtin_ia32_phadddq",    IX86_BUILTIN_PHADDDQ,    0,            (int)MULTI_ARG_1_SI_DI },
21848   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddubw,          "__builtin_ia32_phaddubw",   IX86_BUILTIN_PHADDUBW,   0,            (int)MULTI_ARG_1_QI_HI },
21849   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddubd,          "__builtin_ia32_phaddubd",   IX86_BUILTIN_PHADDUBD,   0,            (int)MULTI_ARG_1_QI_SI },
21850   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddubq,          "__builtin_ia32_phaddubq",   IX86_BUILTIN_PHADDUBQ,   0,            (int)MULTI_ARG_1_QI_DI },
21851   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phadduwd,          "__builtin_ia32_phadduwd",   IX86_BUILTIN_PHADDUWD,   0,            (int)MULTI_ARG_1_HI_SI },
21852   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phadduwq,          "__builtin_ia32_phadduwq",   IX86_BUILTIN_PHADDUWQ,   0,            (int)MULTI_ARG_1_HI_DI },
21853   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddudq,          "__builtin_ia32_phaddudq",   IX86_BUILTIN_PHADDUDQ,   0,            (int)MULTI_ARG_1_SI_DI },
21854   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phsubbw,           "__builtin_ia32_phsubbw",    IX86_BUILTIN_PHSUBBW,    0,            (int)MULTI_ARG_1_QI_HI },
21855   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phsubwd,           "__builtin_ia32_phsubwd",    IX86_BUILTIN_PHSUBWD,    0,            (int)MULTI_ARG_1_HI_SI },
21856   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phsubdq,           "__builtin_ia32_phsubdq",    IX86_BUILTIN_PHSUBDQ,    0,            (int)MULTI_ARG_1_SI_DI },
21857 
21858   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3,    "__builtin_ia32_comeqss",    IX86_BUILTIN_COMEQSS,    EQ,           (int)MULTI_ARG_2_SF_CMP },
21859   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3,    "__builtin_ia32_comness",    IX86_BUILTIN_COMNESS,    NE,           (int)MULTI_ARG_2_SF_CMP },
21860   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3,    "__builtin_ia32_comneqss",   IX86_BUILTIN_COMNESS,    NE,           (int)MULTI_ARG_2_SF_CMP },
21861   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3,    "__builtin_ia32_comltss",    IX86_BUILTIN_COMLTSS,    LT,           (int)MULTI_ARG_2_SF_CMP },
21862   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3,    "__builtin_ia32_comless",    IX86_BUILTIN_COMLESS,    LE,           (int)MULTI_ARG_2_SF_CMP },
21863   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3,    "__builtin_ia32_comgtss",    IX86_BUILTIN_COMGTSS,    GT,           (int)MULTI_ARG_2_SF_CMP },
21864   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3,    "__builtin_ia32_comgess",    IX86_BUILTIN_COMGESS,    GE,           (int)MULTI_ARG_2_SF_CMP },
21865   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3,    "__builtin_ia32_comueqss",   IX86_BUILTIN_COMUEQSS,   UNEQ,         (int)MULTI_ARG_2_SF_CMP },
21866   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3,    "__builtin_ia32_comuness",   IX86_BUILTIN_COMUNESS,   LTGT,         (int)MULTI_ARG_2_SF_CMP },
21867   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3,    "__builtin_ia32_comuneqss",  IX86_BUILTIN_COMUNESS,   LTGT,         (int)MULTI_ARG_2_SF_CMP },
21868   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3,    "__builtin_ia32_comunltss",  IX86_BUILTIN_COMULTSS,   UNLT,         (int)MULTI_ARG_2_SF_CMP },
21869   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3,    "__builtin_ia32_comunless",  IX86_BUILTIN_COMULESS,   UNLE,         (int)MULTI_ARG_2_SF_CMP },
21870   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3,    "__builtin_ia32_comungtss",  IX86_BUILTIN_COMUGTSS,   UNGT,         (int)MULTI_ARG_2_SF_CMP },
21871   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3,    "__builtin_ia32_comungess",  IX86_BUILTIN_COMUGESS,   UNGE,         (int)MULTI_ARG_2_SF_CMP },
21872   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3,    "__builtin_ia32_comordss",   IX86_BUILTIN_COMORDSS,   ORDERED,      (int)MULTI_ARG_2_SF_CMP },
21873   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3,    "__builtin_ia32_comunordss", IX86_BUILTIN_COMUNORDSS, UNORDERED,    (int)MULTI_ARG_2_SF_CMP },
21874 
21875   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3,    "__builtin_ia32_comeqsd",    IX86_BUILTIN_COMEQSD,    EQ,           (int)MULTI_ARG_2_DF_CMP },
21876   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3,    "__builtin_ia32_comnesd",    IX86_BUILTIN_COMNESD,    NE,           (int)MULTI_ARG_2_DF_CMP },
21877   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3,    "__builtin_ia32_comneqsd",   IX86_BUILTIN_COMNESD,    NE,           (int)MULTI_ARG_2_DF_CMP },
21878   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3,    "__builtin_ia32_comltsd",    IX86_BUILTIN_COMLTSD,    LT,           (int)MULTI_ARG_2_DF_CMP },
21879   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3,    "__builtin_ia32_comlesd",    IX86_BUILTIN_COMLESD,    LE,           (int)MULTI_ARG_2_DF_CMP },
21880   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3,    "__builtin_ia32_comgtsd",    IX86_BUILTIN_COMGTSD,    GT,           (int)MULTI_ARG_2_DF_CMP },
21881   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3,    "__builtin_ia32_comgesd",    IX86_BUILTIN_COMGESD,    GE,           (int)MULTI_ARG_2_DF_CMP },
21882   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3,    "__builtin_ia32_comueqsd",   IX86_BUILTIN_COMUEQSD,   UNEQ,         (int)MULTI_ARG_2_DF_CMP },
21883   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3,    "__builtin_ia32_comunesd",   IX86_BUILTIN_COMUNESD,   LTGT,         (int)MULTI_ARG_2_DF_CMP },
21884   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3,    "__builtin_ia32_comuneqsd",  IX86_BUILTIN_COMUNESD,   LTGT,         (int)MULTI_ARG_2_DF_CMP },
21885   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3,    "__builtin_ia32_comunltsd",  IX86_BUILTIN_COMULTSD,   UNLT,         (int)MULTI_ARG_2_DF_CMP },
21886   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3,    "__builtin_ia32_comunlesd",  IX86_BUILTIN_COMULESD,   UNLE,         (int)MULTI_ARG_2_DF_CMP },
21887   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3,    "__builtin_ia32_comungtsd",  IX86_BUILTIN_COMUGTSD,   UNGT,         (int)MULTI_ARG_2_DF_CMP },
21888   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3,    "__builtin_ia32_comungesd",  IX86_BUILTIN_COMUGESD,   UNGE,         (int)MULTI_ARG_2_DF_CMP },
21889   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3,    "__builtin_ia32_comordsd",   IX86_BUILTIN_COMORDSD,   ORDERED,      (int)MULTI_ARG_2_DF_CMP },
21890   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3,    "__builtin_ia32_comunordsd", IX86_BUILTIN_COMUNORDSD, UNORDERED,    (int)MULTI_ARG_2_DF_CMP },
21891 
21892   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3,      "__builtin_ia32_comeqps",    IX86_BUILTIN_COMEQPS,    EQ,           (int)MULTI_ARG_2_SF_CMP },
21893   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3,      "__builtin_ia32_comneps",    IX86_BUILTIN_COMNEPS,    NE,           (int)MULTI_ARG_2_SF_CMP },
21894   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3,      "__builtin_ia32_comneqps",   IX86_BUILTIN_COMNEPS,    NE,           (int)MULTI_ARG_2_SF_CMP },
21895   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3,      "__builtin_ia32_comltps",    IX86_BUILTIN_COMLTPS,    LT,           (int)MULTI_ARG_2_SF_CMP },
21896   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3,      "__builtin_ia32_comleps",    IX86_BUILTIN_COMLEPS,    LE,           (int)MULTI_ARG_2_SF_CMP },
21897   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3,      "__builtin_ia32_comgtps",    IX86_BUILTIN_COMGTPS,    GT,           (int)MULTI_ARG_2_SF_CMP },
21898   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3,      "__builtin_ia32_comgeps",    IX86_BUILTIN_COMGEPS,    GE,           (int)MULTI_ARG_2_SF_CMP },
21899   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3,      "__builtin_ia32_comueqps",   IX86_BUILTIN_COMUEQPS,   UNEQ,         (int)MULTI_ARG_2_SF_CMP },
21900   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3,      "__builtin_ia32_comuneps",   IX86_BUILTIN_COMUNEPS,   LTGT,         (int)MULTI_ARG_2_SF_CMP },
21901   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3,      "__builtin_ia32_comuneqps",  IX86_BUILTIN_COMUNEPS,   LTGT,         (int)MULTI_ARG_2_SF_CMP },
21902   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3,      "__builtin_ia32_comunltps",  IX86_BUILTIN_COMULTPS,   UNLT,         (int)MULTI_ARG_2_SF_CMP },
21903   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3,      "__builtin_ia32_comunleps",  IX86_BUILTIN_COMULEPS,   UNLE,         (int)MULTI_ARG_2_SF_CMP },
21904   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3,      "__builtin_ia32_comungtps",  IX86_BUILTIN_COMUGTPS,   UNGT,         (int)MULTI_ARG_2_SF_CMP },
21905   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3,      "__builtin_ia32_comungeps",  IX86_BUILTIN_COMUGEPS,   UNGE,         (int)MULTI_ARG_2_SF_CMP },
21906   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3,      "__builtin_ia32_comordps",   IX86_BUILTIN_COMORDPS,   ORDERED,      (int)MULTI_ARG_2_SF_CMP },
21907   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3,      "__builtin_ia32_comunordps", IX86_BUILTIN_COMUNORDPS, UNORDERED,    (int)MULTI_ARG_2_SF_CMP },
21908 
21909   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3,      "__builtin_ia32_comeqpd",    IX86_BUILTIN_COMEQPD,    EQ,           (int)MULTI_ARG_2_DF_CMP },
21910   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3,      "__builtin_ia32_comnepd",    IX86_BUILTIN_COMNEPD,    NE,           (int)MULTI_ARG_2_DF_CMP },
21911   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3,      "__builtin_ia32_comneqpd",   IX86_BUILTIN_COMNEPD,    NE,           (int)MULTI_ARG_2_DF_CMP },
21912   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3,      "__builtin_ia32_comltpd",    IX86_BUILTIN_COMLTPD,    LT,           (int)MULTI_ARG_2_DF_CMP },
21913   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3,      "__builtin_ia32_comlepd",    IX86_BUILTIN_COMLEPD,    LE,           (int)MULTI_ARG_2_DF_CMP },
21914   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3,      "__builtin_ia32_comgtpd",    IX86_BUILTIN_COMGTPD,    GT,           (int)MULTI_ARG_2_DF_CMP },
21915   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3,      "__builtin_ia32_comgepd",    IX86_BUILTIN_COMGEPD,    GE,           (int)MULTI_ARG_2_DF_CMP },
21916   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3,      "__builtin_ia32_comueqpd",   IX86_BUILTIN_COMUEQPD,   UNEQ,         (int)MULTI_ARG_2_DF_CMP },
21917   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3,      "__builtin_ia32_comunepd",   IX86_BUILTIN_COMUNEPD,   LTGT,         (int)MULTI_ARG_2_DF_CMP },
21918   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3,      "__builtin_ia32_comuneqpd",  IX86_BUILTIN_COMUNEPD,   LTGT,         (int)MULTI_ARG_2_DF_CMP },
21919   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3,      "__builtin_ia32_comunltpd",  IX86_BUILTIN_COMULTPD,   UNLT,         (int)MULTI_ARG_2_DF_CMP },
21920   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3,      "__builtin_ia32_comunlepd",  IX86_BUILTIN_COMULEPD,   UNLE,         (int)MULTI_ARG_2_DF_CMP },
21921   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3,      "__builtin_ia32_comungtpd",  IX86_BUILTIN_COMUGTPD,   UNGT,         (int)MULTI_ARG_2_DF_CMP },
21922   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3,      "__builtin_ia32_comungepd",  IX86_BUILTIN_COMUGEPD,   UNGE,         (int)MULTI_ARG_2_DF_CMP },
21923   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3,      "__builtin_ia32_comordpd",   IX86_BUILTIN_COMORDPD,   ORDERED,      (int)MULTI_ARG_2_DF_CMP },
21924   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3,      "__builtin_ia32_comunordpd", IX86_BUILTIN_COMUNORDPD, UNORDERED,    (int)MULTI_ARG_2_DF_CMP },
21925 
21926   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3,     "__builtin_ia32_pcomeqb",    IX86_BUILTIN_PCOMEQB,    EQ,           (int)MULTI_ARG_2_QI_CMP },
21927   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3,     "__builtin_ia32_pcomneb",    IX86_BUILTIN_PCOMNEB,    NE,           (int)MULTI_ARG_2_QI_CMP },
21928   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3,     "__builtin_ia32_pcomneqb",   IX86_BUILTIN_PCOMNEB,    NE,           (int)MULTI_ARG_2_QI_CMP },
21929   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3,     "__builtin_ia32_pcomltb",    IX86_BUILTIN_PCOMLTB,    LT,           (int)MULTI_ARG_2_QI_CMP },
21930   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3,     "__builtin_ia32_pcomleb",    IX86_BUILTIN_PCOMLEB,    LE,           (int)MULTI_ARG_2_QI_CMP },
21931   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3,     "__builtin_ia32_pcomgtb",    IX86_BUILTIN_PCOMGTB,    GT,           (int)MULTI_ARG_2_QI_CMP },
21932   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3,     "__builtin_ia32_pcomgeb",    IX86_BUILTIN_PCOMGEB,    GE,           (int)MULTI_ARG_2_QI_CMP },
21933 
21934   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3,      "__builtin_ia32_pcomeqw",    IX86_BUILTIN_PCOMEQW,    EQ,           (int)MULTI_ARG_2_HI_CMP },
21935   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3,      "__builtin_ia32_pcomnew",    IX86_BUILTIN_PCOMNEW,    NE,           (int)MULTI_ARG_2_HI_CMP },
21936   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3,      "__builtin_ia32_pcomneqw",   IX86_BUILTIN_PCOMNEW,    NE,           (int)MULTI_ARG_2_HI_CMP },
21937   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3,      "__builtin_ia32_pcomltw",    IX86_BUILTIN_PCOMLTW,    LT,           (int)MULTI_ARG_2_HI_CMP },
21938   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3,      "__builtin_ia32_pcomlew",    IX86_BUILTIN_PCOMLEW,    LE,           (int)MULTI_ARG_2_HI_CMP },
21939   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3,      "__builtin_ia32_pcomgtw",    IX86_BUILTIN_PCOMGTW,    GT,           (int)MULTI_ARG_2_HI_CMP },
21940   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3,      "__builtin_ia32_pcomgew",    IX86_BUILTIN_PCOMGEW,    GE,           (int)MULTI_ARG_2_HI_CMP },
21941 
21942   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3,      "__builtin_ia32_pcomeqd",    IX86_BUILTIN_PCOMEQD,    EQ,           (int)MULTI_ARG_2_SI_CMP },
21943   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3,      "__builtin_ia32_pcomned",    IX86_BUILTIN_PCOMNED,    NE,           (int)MULTI_ARG_2_SI_CMP },
21944   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3,      "__builtin_ia32_pcomneqd",   IX86_BUILTIN_PCOMNED,    NE,           (int)MULTI_ARG_2_SI_CMP },
21945   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3,      "__builtin_ia32_pcomltd",    IX86_BUILTIN_PCOMLTD,    LT,           (int)MULTI_ARG_2_SI_CMP },
21946   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3,      "__builtin_ia32_pcomled",    IX86_BUILTIN_PCOMLED,    LE,           (int)MULTI_ARG_2_SI_CMP },
21947   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3,      "__builtin_ia32_pcomgtd",    IX86_BUILTIN_PCOMGTD,    GT,           (int)MULTI_ARG_2_SI_CMP },
21948   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3,      "__builtin_ia32_pcomged",    IX86_BUILTIN_PCOMGED,    GE,           (int)MULTI_ARG_2_SI_CMP },
21949 
21950   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3,      "__builtin_ia32_pcomeqq",    IX86_BUILTIN_PCOMEQQ,    EQ,           (int)MULTI_ARG_2_DI_CMP },
21951   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3,      "__builtin_ia32_pcomneq",    IX86_BUILTIN_PCOMNEQ,    NE,           (int)MULTI_ARG_2_DI_CMP },
21952   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3,      "__builtin_ia32_pcomneqq",   IX86_BUILTIN_PCOMNEQ,    NE,           (int)MULTI_ARG_2_DI_CMP },
21953   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3,      "__builtin_ia32_pcomltq",    IX86_BUILTIN_PCOMLTQ,    LT,           (int)MULTI_ARG_2_DI_CMP },
21954   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3,      "__builtin_ia32_pcomleq",    IX86_BUILTIN_PCOMLEQ,    LE,           (int)MULTI_ARG_2_DI_CMP },
21955   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3,      "__builtin_ia32_pcomgtq",    IX86_BUILTIN_PCOMGTQ,    GT,           (int)MULTI_ARG_2_DI_CMP },
21956   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3,      "__builtin_ia32_pcomgeq",    IX86_BUILTIN_PCOMGEQ,    GE,           (int)MULTI_ARG_2_DI_CMP },
21957 
21958   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v16qi3,"__builtin_ia32_pcomequb",   IX86_BUILTIN_PCOMEQUB,   EQ,           (int)MULTI_ARG_2_QI_CMP },
21959   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v16qi3,"__builtin_ia32_pcomneub",   IX86_BUILTIN_PCOMNEUB,   NE,           (int)MULTI_ARG_2_QI_CMP },
21960   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v16qi3,"__builtin_ia32_pcomnequb",  IX86_BUILTIN_PCOMNEUB,   NE,           (int)MULTI_ARG_2_QI_CMP },
21961   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv16qi3, "__builtin_ia32_pcomltub",   IX86_BUILTIN_PCOMLTUB,   LTU,          (int)MULTI_ARG_2_QI_CMP },
21962   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv16qi3, "__builtin_ia32_pcomleub",   IX86_BUILTIN_PCOMLEUB,   LEU,          (int)MULTI_ARG_2_QI_CMP },
21963   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv16qi3, "__builtin_ia32_pcomgtub",   IX86_BUILTIN_PCOMGTUB,   GTU,          (int)MULTI_ARG_2_QI_CMP },
21964   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv16qi3, "__builtin_ia32_pcomgeub",   IX86_BUILTIN_PCOMGEUB,   GEU,          (int)MULTI_ARG_2_QI_CMP },
21965 
21966   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v8hi3, "__builtin_ia32_pcomequw",   IX86_BUILTIN_PCOMEQUW,   EQ,           (int)MULTI_ARG_2_HI_CMP },
21967   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v8hi3, "__builtin_ia32_pcomneuw",   IX86_BUILTIN_PCOMNEUW,   NE,           (int)MULTI_ARG_2_HI_CMP },
21968   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v8hi3, "__builtin_ia32_pcomnequw",  IX86_BUILTIN_PCOMNEUW,   NE,           (int)MULTI_ARG_2_HI_CMP },
21969   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv8hi3,  "__builtin_ia32_pcomltuw",   IX86_BUILTIN_PCOMLTUW,   LTU,          (int)MULTI_ARG_2_HI_CMP },
21970   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv8hi3,  "__builtin_ia32_pcomleuw",   IX86_BUILTIN_PCOMLEUW,   LEU,          (int)MULTI_ARG_2_HI_CMP },
21971   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv8hi3,  "__builtin_ia32_pcomgtuw",   IX86_BUILTIN_PCOMGTUW,   GTU,          (int)MULTI_ARG_2_HI_CMP },
21972   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv8hi3,  "__builtin_ia32_pcomgeuw",   IX86_BUILTIN_PCOMGEUW,   GEU,          (int)MULTI_ARG_2_HI_CMP },
21973 
21974   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v4si3, "__builtin_ia32_pcomequd",   IX86_BUILTIN_PCOMEQUD,   EQ,           (int)MULTI_ARG_2_SI_CMP },
21975   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v4si3, "__builtin_ia32_pcomneud",   IX86_BUILTIN_PCOMNEUD,   NE,           (int)MULTI_ARG_2_SI_CMP },
21976   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v4si3, "__builtin_ia32_pcomnequd",  IX86_BUILTIN_PCOMNEUD,   NE,           (int)MULTI_ARG_2_SI_CMP },
21977   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv4si3,  "__builtin_ia32_pcomltud",   IX86_BUILTIN_PCOMLTUD,   LTU,          (int)MULTI_ARG_2_SI_CMP },
21978   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv4si3,  "__builtin_ia32_pcomleud",   IX86_BUILTIN_PCOMLEUD,   LEU,          (int)MULTI_ARG_2_SI_CMP },
21979   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv4si3,  "__builtin_ia32_pcomgtud",   IX86_BUILTIN_PCOMGTUD,   GTU,          (int)MULTI_ARG_2_SI_CMP },
21980   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv4si3,  "__builtin_ia32_pcomgeud",   IX86_BUILTIN_PCOMGEUD,   GEU,          (int)MULTI_ARG_2_SI_CMP },
21981 
21982   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v2di3, "__builtin_ia32_pcomequq",   IX86_BUILTIN_PCOMEQUQ,   EQ,           (int)MULTI_ARG_2_DI_CMP },
21983   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v2di3, "__builtin_ia32_pcomneuq",   IX86_BUILTIN_PCOMNEUQ,   NE,           (int)MULTI_ARG_2_DI_CMP },
21984   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v2di3, "__builtin_ia32_pcomnequq",  IX86_BUILTIN_PCOMNEUQ,   NE,           (int)MULTI_ARG_2_DI_CMP },
21985   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv2di3,  "__builtin_ia32_pcomltuq",   IX86_BUILTIN_PCOMLTUQ,   LTU,          (int)MULTI_ARG_2_DI_CMP },
21986   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv2di3,  "__builtin_ia32_pcomleuq",   IX86_BUILTIN_PCOMLEUQ,   LEU,          (int)MULTI_ARG_2_DI_CMP },
21987   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv2di3,  "__builtin_ia32_pcomgtuq",   IX86_BUILTIN_PCOMGTUQ,   GTU,          (int)MULTI_ARG_2_DI_CMP },
21988   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv2di3,  "__builtin_ia32_pcomgeuq",   IX86_BUILTIN_PCOMGEUQ,   GEU,          (int)MULTI_ARG_2_DI_CMP },
21989 
21990   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv4sf3,       "__builtin_ia32_comfalsess", IX86_BUILTIN_COMFALSESS, COM_FALSE_S,  (int)MULTI_ARG_2_SF_TF },
21991   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv4sf3,       "__builtin_ia32_comtruess",  IX86_BUILTIN_COMTRUESS,  COM_TRUE_S,   (int)MULTI_ARG_2_SF_TF },
21992   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv4sf3,       "__builtin_ia32_comfalseps", IX86_BUILTIN_COMFALSEPS, COM_FALSE_P,  (int)MULTI_ARG_2_SF_TF },
21993   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv4sf3,       "__builtin_ia32_comtrueps",  IX86_BUILTIN_COMTRUEPS,  COM_TRUE_P,   (int)MULTI_ARG_2_SF_TF },
21994   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv2df3,       "__builtin_ia32_comfalsesd", IX86_BUILTIN_COMFALSESD, COM_FALSE_S,  (int)MULTI_ARG_2_DF_TF },
21995   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv2df3,       "__builtin_ia32_comtruesd",  IX86_BUILTIN_COMTRUESD,  COM_TRUE_S,   (int)MULTI_ARG_2_DF_TF },
21996   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv2df3,       "__builtin_ia32_comfalsepd", IX86_BUILTIN_COMFALSEPD, COM_FALSE_P,  (int)MULTI_ARG_2_DF_TF },
21997   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv2df3,       "__builtin_ia32_comtruepd",  IX86_BUILTIN_COMTRUEPD,  COM_TRUE_P,   (int)MULTI_ARG_2_DF_TF },
21998 
21999   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv16qi3,     "__builtin_ia32_pcomfalseb", IX86_BUILTIN_PCOMFALSEB, PCOM_FALSE,   (int)MULTI_ARG_2_QI_TF },
22000   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv8hi3,      "__builtin_ia32_pcomfalsew", IX86_BUILTIN_PCOMFALSEW, PCOM_FALSE,   (int)MULTI_ARG_2_HI_TF },
22001   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv4si3,      "__builtin_ia32_pcomfalsed", IX86_BUILTIN_PCOMFALSED, PCOM_FALSE,   (int)MULTI_ARG_2_SI_TF },
22002   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv2di3,      "__builtin_ia32_pcomfalseq", IX86_BUILTIN_PCOMFALSEQ, PCOM_FALSE,   (int)MULTI_ARG_2_DI_TF },
22003   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv16qi3,     "__builtin_ia32_pcomfalseub",IX86_BUILTIN_PCOMFALSEUB,PCOM_FALSE,   (int)MULTI_ARG_2_QI_TF },
22004   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv8hi3,      "__builtin_ia32_pcomfalseuw",IX86_BUILTIN_PCOMFALSEUW,PCOM_FALSE,   (int)MULTI_ARG_2_HI_TF },
22005   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv4si3,      "__builtin_ia32_pcomfalseud",IX86_BUILTIN_PCOMFALSEUD,PCOM_FALSE,   (int)MULTI_ARG_2_SI_TF },
22006   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv2di3,      "__builtin_ia32_pcomfalseuq",IX86_BUILTIN_PCOMFALSEUQ,PCOM_FALSE,   (int)MULTI_ARG_2_DI_TF },
22007 
22008   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv16qi3,     "__builtin_ia32_pcomtrueb",  IX86_BUILTIN_PCOMTRUEB,  PCOM_TRUE,    (int)MULTI_ARG_2_QI_TF },
22009   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv8hi3,      "__builtin_ia32_pcomtruew",  IX86_BUILTIN_PCOMTRUEW,  PCOM_TRUE,    (int)MULTI_ARG_2_HI_TF },
22010   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv4si3,      "__builtin_ia32_pcomtrued",  IX86_BUILTIN_PCOMTRUED,  PCOM_TRUE,    (int)MULTI_ARG_2_SI_TF },
22011   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv2di3,      "__builtin_ia32_pcomtrueq",  IX86_BUILTIN_PCOMTRUEQ,  PCOM_TRUE,    (int)MULTI_ARG_2_DI_TF },
22012   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv16qi3,     "__builtin_ia32_pcomtrueub", IX86_BUILTIN_PCOMTRUEUB, PCOM_TRUE,    (int)MULTI_ARG_2_QI_TF },
22013   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv8hi3,      "__builtin_ia32_pcomtrueuw", IX86_BUILTIN_PCOMTRUEUW, PCOM_TRUE,    (int)MULTI_ARG_2_HI_TF },
22014   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv4si3,      "__builtin_ia32_pcomtrueud", IX86_BUILTIN_PCOMTRUEUD, PCOM_TRUE,    (int)MULTI_ARG_2_SI_TF },
22015   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv2di3,      "__builtin_ia32_pcomtrueuq", IX86_BUILTIN_PCOMTRUEUQ, PCOM_TRUE,    (int)MULTI_ARG_2_DI_TF },
22016 };
22017 
22018 /* Set up all the MMX/SSE builtins, even builtins for instructions that are not
22019    in the current target ISA to allow the user to compile particular modules
22020    with different target specific options that differ from the command line
22021    options.  */
22022 static void
22023 ix86_init_mmx_sse_builtins (void)
22024 {
22025   const struct builtin_description * d;
22026   size_t i;
22027 
22028   tree V16QI_type_node = build_vector_type_for_mode (char_type_node, V16QImode);
22029   tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
22030   tree V1DI_type_node
22031     = build_vector_type_for_mode (long_long_integer_type_node, V1DImode);
22032   tree V2SF_type_node = build_vector_type_for_mode (float_type_node, V2SFmode);
22033   tree V2DI_type_node
22034     = build_vector_type_for_mode (long_long_integer_type_node, V2DImode);
22035   tree V2DF_type_node = build_vector_type_for_mode (double_type_node, V2DFmode);
22036   tree V4SF_type_node = build_vector_type_for_mode (float_type_node, V4SFmode);
22037   tree V4SI_type_node = build_vector_type_for_mode (intSI_type_node, V4SImode);
22038   tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
22039   tree V8QI_type_node = build_vector_type_for_mode (char_type_node, V8QImode);
22040   tree V8HI_type_node = build_vector_type_for_mode (intHI_type_node, V8HImode);
22041 
22042   tree pchar_type_node = build_pointer_type (char_type_node);
22043   tree pcchar_type_node
22044     = build_pointer_type (build_type_variant (char_type_node, 1, 0));
22045   tree pfloat_type_node = build_pointer_type (float_type_node);
22046   tree pcfloat_type_node
22047     = build_pointer_type (build_type_variant (float_type_node, 1, 0));
22048   tree pv2sf_type_node = build_pointer_type (V2SF_type_node);
22049   tree pcv2sf_type_node
22050     = build_pointer_type (build_type_variant (V2SF_type_node, 1, 0));
22051   tree pv2di_type_node = build_pointer_type (V2DI_type_node);
22052   tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
22053 
22054   /* Comparisons.  */
22055   tree int_ftype_v4sf_v4sf
22056     = build_function_type_list (integer_type_node,
22057                                 V4SF_type_node, V4SF_type_node, NULL_TREE);
22058   tree v4si_ftype_v4sf_v4sf
22059     = build_function_type_list (V4SI_type_node,
22060                                 V4SF_type_node, V4SF_type_node, NULL_TREE);
22061   /* MMX/SSE/integer conversions.  */
22062   tree int_ftype_v4sf
22063     = build_function_type_list (integer_type_node,
22064                                 V4SF_type_node, NULL_TREE);
22065   tree int64_ftype_v4sf
22066     = build_function_type_list (long_long_integer_type_node,
22067                                 V4SF_type_node, NULL_TREE);
22068   tree int_ftype_v8qi
22069     = build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE);
22070   tree v4sf_ftype_v4sf_int
22071     = build_function_type_list (V4SF_type_node,
22072                                 V4SF_type_node, integer_type_node, NULL_TREE);
22073   tree v4sf_ftype_v4sf_int64
22074     = build_function_type_list (V4SF_type_node,
22075                                 V4SF_type_node, long_long_integer_type_node,
22076                                 NULL_TREE);
22077   tree v4sf_ftype_v4sf_v2si
22078     = build_function_type_list (V4SF_type_node,
22079                                 V4SF_type_node, V2SI_type_node, NULL_TREE);
22080 
22081   /* Miscellaneous.  */
22082   tree v8qi_ftype_v4hi_v4hi
22083     = build_function_type_list (V8QI_type_node,
22084                                 V4HI_type_node, V4HI_type_node, NULL_TREE);
22085   tree v4hi_ftype_v2si_v2si
22086     = build_function_type_list (V4HI_type_node,
22087                                 V2SI_type_node, V2SI_type_node, NULL_TREE);
22088   tree v4sf_ftype_v4sf_v4sf_int
22089     = build_function_type_list (V4SF_type_node,
22090                                 V4SF_type_node, V4SF_type_node,
22091                                 integer_type_node, NULL_TREE);
22092   tree v2si_ftype_v4hi_v4hi
22093     = build_function_type_list (V2SI_type_node,
22094                                 V4HI_type_node, V4HI_type_node, NULL_TREE);
22095   tree v4hi_ftype_v4hi_int
22096     = build_function_type_list (V4HI_type_node,
22097                                 V4HI_type_node, integer_type_node, NULL_TREE);
22098   tree v2si_ftype_v2si_int
22099     = build_function_type_list (V2SI_type_node,
22100                                 V2SI_type_node, integer_type_node, NULL_TREE);
22101   tree v1di_ftype_v1di_int
22102     = build_function_type_list (V1DI_type_node,
22103                                 V1DI_type_node, integer_type_node, NULL_TREE);
22104 
22105   tree void_ftype_void
22106     = build_function_type (void_type_node, void_list_node);
22107   tree void_ftype_unsigned
22108     = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE);
22109   tree void_ftype_unsigned_unsigned
22110     = build_function_type_list (void_type_node, unsigned_type_node,
22111                                 unsigned_type_node, NULL_TREE);
22112   tree void_ftype_pcvoid_unsigned_unsigned
22113     = build_function_type_list (void_type_node, const_ptr_type_node,
22114                                 unsigned_type_node, unsigned_type_node,
22115                                 NULL_TREE);
22116   tree unsigned_ftype_void
22117     = build_function_type (unsigned_type_node, void_list_node);
22118   tree v2si_ftype_v4sf
22119     = build_function_type_list (V2SI_type_node, V4SF_type_node, NULL_TREE);
22120   /* Loads/stores.  */
22121   tree void_ftype_v8qi_v8qi_pchar
22122     = build_function_type_list (void_type_node,
22123                                 V8QI_type_node, V8QI_type_node,
22124                                 pchar_type_node, NULL_TREE);
22125   tree v4sf_ftype_pcfloat
22126     = build_function_type_list (V4SF_type_node, pcfloat_type_node, NULL_TREE);
22127   tree v4sf_ftype_v4sf_pcv2sf
22128     = build_function_type_list (V4SF_type_node,
22129                                 V4SF_type_node, pcv2sf_type_node, NULL_TREE);
22130   tree void_ftype_pv2sf_v4sf
22131     = build_function_type_list (void_type_node,
22132                                 pv2sf_type_node, V4SF_type_node, NULL_TREE);
22133   tree void_ftype_pfloat_v4sf
22134     = build_function_type_list (void_type_node,
22135                                 pfloat_type_node, V4SF_type_node, NULL_TREE);
22136   tree void_ftype_pdi_di
22137     = build_function_type_list (void_type_node,
22138                                 pdi_type_node, long_long_unsigned_type_node,
22139                                 NULL_TREE);
22140   tree void_ftype_pv2di_v2di
22141     = build_function_type_list (void_type_node,
22142                                 pv2di_type_node, V2DI_type_node, NULL_TREE);
22143   /* Normal vector unops.  */
22144   tree v4sf_ftype_v4sf
22145     = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
22146   tree v16qi_ftype_v16qi
22147     = build_function_type_list (V16QI_type_node, V16QI_type_node, NULL_TREE);
22148   tree v8hi_ftype_v8hi
22149     = build_function_type_list (V8HI_type_node, V8HI_type_node, NULL_TREE);
22150   tree v4si_ftype_v4si
22151     = build_function_type_list (V4SI_type_node, V4SI_type_node, NULL_TREE);
22152   tree v8qi_ftype_v8qi
22153     = build_function_type_list (V8QI_type_node, V8QI_type_node, NULL_TREE);
22154   tree v4hi_ftype_v4hi
22155     = build_function_type_list (V4HI_type_node, V4HI_type_node, NULL_TREE);
22156 
22157   /* Normal vector binops.  */
22158   tree v4sf_ftype_v4sf_v4sf
22159     = build_function_type_list (V4SF_type_node,
22160                                 V4SF_type_node, V4SF_type_node, NULL_TREE);
22161   tree v8qi_ftype_v8qi_v8qi
22162     = build_function_type_list (V8QI_type_node,
22163                                 V8QI_type_node, V8QI_type_node, NULL_TREE);
22164   tree v4hi_ftype_v4hi_v4hi
22165     = build_function_type_list (V4HI_type_node,
22166                                 V4HI_type_node, V4HI_type_node, NULL_TREE);
22167   tree v2si_ftype_v2si_v2si
22168     = build_function_type_list (V2SI_type_node,
22169                                 V2SI_type_node, V2SI_type_node, NULL_TREE);
22170   tree v1di_ftype_v1di_v1di
22171     = build_function_type_list (V1DI_type_node,
22172                                 V1DI_type_node, V1DI_type_node, NULL_TREE);
22173   tree v1di_ftype_v1di_v1di_int
22174     = build_function_type_list (V1DI_type_node,
22175                                 V1DI_type_node, V1DI_type_node,
22176                                 integer_type_node, NULL_TREE);
22177   tree v2si_ftype_v2sf
22178     = build_function_type_list (V2SI_type_node, V2SF_type_node, NULL_TREE);
22179   tree v2sf_ftype_v2si
22180     = build_function_type_list (V2SF_type_node, V2SI_type_node, NULL_TREE);
22181   tree v2si_ftype_v2si
22182     = build_function_type_list (V2SI_type_node, V2SI_type_node, NULL_TREE);
22183   tree v2sf_ftype_v2sf
22184     = build_function_type_list (V2SF_type_node, V2SF_type_node, NULL_TREE);
22185   tree v2sf_ftype_v2sf_v2sf
22186     = build_function_type_list (V2SF_type_node,
22187                                 V2SF_type_node, V2SF_type_node, NULL_TREE);
22188   tree v2si_ftype_v2sf_v2sf
22189     = build_function_type_list (V2SI_type_node,
22190                                 V2SF_type_node, V2SF_type_node, NULL_TREE);
22191   tree pint_type_node    = build_pointer_type (integer_type_node);
22192   tree pdouble_type_node = build_pointer_type (double_type_node);
22193   tree pcdouble_type_node = build_pointer_type (
22194                                 build_type_variant (double_type_node, 1, 0));
22195   tree int_ftype_v2df_v2df
22196     = build_function_type_list (integer_type_node,
22197                                 V2DF_type_node, V2DF_type_node, NULL_TREE);
22198 
22199   tree void_ftype_pcvoid
22200     = build_function_type_list (void_type_node, const_ptr_type_node, NULL_TREE);
22201   tree v4sf_ftype_v4si
22202     = build_function_type_list (V4SF_type_node, V4SI_type_node, NULL_TREE);
22203   tree v4si_ftype_v4sf
22204     = build_function_type_list (V4SI_type_node, V4SF_type_node, NULL_TREE);
22205   tree v2df_ftype_v4si
22206     = build_function_type_list (V2DF_type_node, V4SI_type_node, NULL_TREE);
22207   tree v4si_ftype_v2df
22208     = build_function_type_list (V4SI_type_node, V2DF_type_node, NULL_TREE);
22209   tree v4si_ftype_v2df_v2df
22210     = build_function_type_list (V4SI_type_node,
22211                                 V2DF_type_node, V2DF_type_node, NULL_TREE);
22212   tree v2si_ftype_v2df
22213     = build_function_type_list (V2SI_type_node, V2DF_type_node, NULL_TREE);
22214   tree v4sf_ftype_v2df
22215     = build_function_type_list (V4SF_type_node, V2DF_type_node, NULL_TREE);
22216   tree v2df_ftype_v2si
22217     = build_function_type_list (V2DF_type_node, V2SI_type_node, NULL_TREE);
22218   tree v2df_ftype_v4sf
22219     = build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE);
22220   tree int_ftype_v2df
22221     = build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE);
22222   tree int64_ftype_v2df
22223     = build_function_type_list (long_long_integer_type_node,
22224                                 V2DF_type_node, NULL_TREE);
22225   tree v2df_ftype_v2df_int
22226     = build_function_type_list (V2DF_type_node,
22227                                 V2DF_type_node, integer_type_node, NULL_TREE);
22228   tree v2df_ftype_v2df_int64
22229     = build_function_type_list (V2DF_type_node,
22230                                 V2DF_type_node, long_long_integer_type_node,
22231                                 NULL_TREE);
22232   tree v4sf_ftype_v4sf_v2df
22233     = build_function_type_list (V4SF_type_node,
22234                                 V4SF_type_node, V2DF_type_node, NULL_TREE);
22235   tree v2df_ftype_v2df_v4sf
22236     = build_function_type_list (V2DF_type_node,
22237                                 V2DF_type_node, V4SF_type_node, NULL_TREE);
22238   tree v2df_ftype_v2df_v2df_int
22239     = build_function_type_list (V2DF_type_node,
22240                                 V2DF_type_node, V2DF_type_node,
22241                                 integer_type_node,
22242                                 NULL_TREE);
22243   tree v2df_ftype_v2df_pcdouble
22244     = build_function_type_list (V2DF_type_node,
22245                                 V2DF_type_node, pcdouble_type_node, NULL_TREE);
22246   tree void_ftype_pdouble_v2df
22247     = build_function_type_list (void_type_node,
22248                                 pdouble_type_node, V2DF_type_node, NULL_TREE);
22249   tree void_ftype_pint_int
22250     = build_function_type_list (void_type_node,
22251                                 pint_type_node, integer_type_node, NULL_TREE);
22252   tree void_ftype_v16qi_v16qi_pchar
22253     = build_function_type_list (void_type_node,
22254                                 V16QI_type_node, V16QI_type_node,
22255                                 pchar_type_node, NULL_TREE);
22256   tree v2df_ftype_pcdouble
22257     = build_function_type_list (V2DF_type_node, pcdouble_type_node, NULL_TREE);
22258   tree v2df_ftype_v2df_v2df
22259     = build_function_type_list (V2DF_type_node,
22260                                 V2DF_type_node, V2DF_type_node, NULL_TREE);
22261   tree v16qi_ftype_v16qi_v16qi
22262     = build_function_type_list (V16QI_type_node,
22263                                 V16QI_type_node, V16QI_type_node, NULL_TREE);
22264   tree v8hi_ftype_v8hi_v8hi
22265     = build_function_type_list (V8HI_type_node,
22266                                 V8HI_type_node, V8HI_type_node, NULL_TREE);
22267   tree v4si_ftype_v4si_v4si
22268     = build_function_type_list (V4SI_type_node,
22269                                 V4SI_type_node, V4SI_type_node, NULL_TREE);
22270   tree v2di_ftype_v2di_v2di
22271     = build_function_type_list (V2DI_type_node,
22272                                 V2DI_type_node, V2DI_type_node, NULL_TREE);
22273   tree v2di_ftype_v2df_v2df
22274     = build_function_type_list (V2DI_type_node,
22275                                 V2DF_type_node, V2DF_type_node, NULL_TREE);
22276   tree v2df_ftype_v2df
22277     = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
22278   tree v2di_ftype_v2di_int
22279     = build_function_type_list (V2DI_type_node,
22280                                 V2DI_type_node, integer_type_node, NULL_TREE);
22281   tree v2di_ftype_v2di_v2di_int
22282     = build_function_type_list (V2DI_type_node, V2DI_type_node,
22283                                 V2DI_type_node, integer_type_node, NULL_TREE);
22284   tree v4si_ftype_v4si_int
22285     = build_function_type_list (V4SI_type_node,
22286                                 V4SI_type_node, integer_type_node, NULL_TREE);
22287   tree v8hi_ftype_v8hi_int
22288     = build_function_type_list (V8HI_type_node,
22289                                 V8HI_type_node, integer_type_node, NULL_TREE);
22290   tree v4si_ftype_v8hi_v8hi
22291     = build_function_type_list (V4SI_type_node,
22292                                 V8HI_type_node, V8HI_type_node, NULL_TREE);
22293   tree v1di_ftype_v8qi_v8qi
22294     = build_function_type_list (V1DI_type_node,
22295                                 V8QI_type_node, V8QI_type_node, NULL_TREE);
22296   tree v1di_ftype_v2si_v2si
22297     = build_function_type_list (V1DI_type_node,
22298                                 V2SI_type_node, V2SI_type_node, NULL_TREE);
22299   tree v2di_ftype_v16qi_v16qi
22300     = build_function_type_list (V2DI_type_node,
22301                                 V16QI_type_node, V16QI_type_node, NULL_TREE);
22302   tree v2di_ftype_v4si_v4si
22303     = build_function_type_list (V2DI_type_node,
22304                                 V4SI_type_node, V4SI_type_node, NULL_TREE);
22305   tree int_ftype_v16qi
22306     = build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE);
22307   tree v16qi_ftype_pcchar
22308     = build_function_type_list (V16QI_type_node, pcchar_type_node, NULL_TREE);
22309   tree void_ftype_pchar_v16qi
22310     = build_function_type_list (void_type_node,
22311                                 pchar_type_node, V16QI_type_node, NULL_TREE);
22312 
22313   tree v2di_ftype_v2di_unsigned_unsigned
22314     = build_function_type_list (V2DI_type_node, V2DI_type_node,
22315                                 unsigned_type_node, unsigned_type_node,
22316                                 NULL_TREE);
22317   tree v2di_ftype_v2di_v2di_unsigned_unsigned
22318     = build_function_type_list (V2DI_type_node, V2DI_type_node, V2DI_type_node,
22319                                 unsigned_type_node, unsigned_type_node,
22320                                 NULL_TREE);
22321   tree v2di_ftype_v2di_v16qi
22322     = build_function_type_list (V2DI_type_node, V2DI_type_node, V16QI_type_node,
22323                                 NULL_TREE);
22324   tree v2df_ftype_v2df_v2df_v2df
22325     = build_function_type_list (V2DF_type_node,
22326                                 V2DF_type_node, V2DF_type_node,
22327                                 V2DF_type_node, NULL_TREE);
22328   tree v4sf_ftype_v4sf_v4sf_v4sf
22329     = build_function_type_list (V4SF_type_node,
22330                                 V4SF_type_node, V4SF_type_node,
22331                                 V4SF_type_node, NULL_TREE);
22332   tree v8hi_ftype_v16qi
22333     = build_function_type_list (V8HI_type_node, V16QI_type_node,
22334                                 NULL_TREE);
22335   tree v4si_ftype_v16qi
22336     = build_function_type_list (V4SI_type_node, V16QI_type_node,
22337                                 NULL_TREE);
22338   tree v2di_ftype_v16qi
22339     = build_function_type_list (V2DI_type_node, V16QI_type_node,
22340                                 NULL_TREE);
22341   tree v4si_ftype_v8hi
22342     = build_function_type_list (V4SI_type_node, V8HI_type_node,
22343                                 NULL_TREE);
22344   tree v2di_ftype_v8hi
22345     = build_function_type_list (V2DI_type_node, V8HI_type_node,
22346                                 NULL_TREE);
22347   tree v2di_ftype_v4si
22348     = build_function_type_list (V2DI_type_node, V4SI_type_node,
22349                                 NULL_TREE);
22350   tree v2di_ftype_pv2di
22351     = build_function_type_list (V2DI_type_node, pv2di_type_node,
22352                                 NULL_TREE);
22353   tree v16qi_ftype_v16qi_v16qi_int
22354     = build_function_type_list (V16QI_type_node, V16QI_type_node,
22355                                 V16QI_type_node, integer_type_node,
22356                                 NULL_TREE);
22357   tree v16qi_ftype_v16qi_v16qi_v16qi
22358     = build_function_type_list (V16QI_type_node, V16QI_type_node,
22359                                 V16QI_type_node, V16QI_type_node,
22360                                 NULL_TREE);
22361   tree v8hi_ftype_v8hi_v8hi_int
22362     = build_function_type_list (V8HI_type_node, V8HI_type_node,
22363                                 V8HI_type_node, integer_type_node,
22364                                 NULL_TREE);
22365   tree v4si_ftype_v4si_v4si_int
22366     = build_function_type_list (V4SI_type_node, V4SI_type_node,
22367                                 V4SI_type_node, integer_type_node,
22368                                 NULL_TREE);
22369   tree int_ftype_v2di_v2di
22370     = build_function_type_list (integer_type_node,
22371                                 V2DI_type_node, V2DI_type_node,
22372                                 NULL_TREE);
22373   tree int_ftype_v16qi_int_v16qi_int_int
22374     = build_function_type_list (integer_type_node,
22375                                 V16QI_type_node,
22376                                 integer_type_node,
22377                                 V16QI_type_node,
22378                                 integer_type_node,
22379                                 integer_type_node,
22380                                 NULL_TREE);
22381   tree v16qi_ftype_v16qi_int_v16qi_int_int
22382     = build_function_type_list (V16QI_type_node,
22383                                 V16QI_type_node,
22384                                 integer_type_node,
22385                                 V16QI_type_node,
22386                                 integer_type_node,
22387                                 integer_type_node,
22388                                 NULL_TREE);
22389   tree int_ftype_v16qi_v16qi_int
22390     = build_function_type_list (integer_type_node,
22391                                 V16QI_type_node,
22392                                 V16QI_type_node,
22393                                 integer_type_node,
22394                                 NULL_TREE);
22395 
22396   /* SSE5 instructions */
22397   tree v2di_ftype_v2di_v2di_v2di
22398     = build_function_type_list (V2DI_type_node,
22399                                 V2DI_type_node,
22400                                 V2DI_type_node,
22401                                 V2DI_type_node,
22402                                 NULL_TREE);
22403 
22404   tree v4si_ftype_v4si_v4si_v4si
22405     = build_function_type_list (V4SI_type_node,
22406                                 V4SI_type_node,
22407                                 V4SI_type_node,
22408                                 V4SI_type_node,
22409                                 NULL_TREE);
22410 
22411   tree v4si_ftype_v4si_v4si_v2di
22412     = build_function_type_list (V4SI_type_node,
22413                                 V4SI_type_node,
22414                                 V4SI_type_node,
22415                                 V2DI_type_node,
22416                                 NULL_TREE);
22417 
22418   tree v8hi_ftype_v8hi_v8hi_v8hi
22419     = build_function_type_list (V8HI_type_node,
22420                                 V8HI_type_node,
22421                                 V8HI_type_node,
22422                                 V8HI_type_node,
22423                                 NULL_TREE);
22424 
22425   tree v8hi_ftype_v8hi_v8hi_v4si
22426     = build_function_type_list (V8HI_type_node,
22427                                 V8HI_type_node,
22428                                 V8HI_type_node,
22429                                 V4SI_type_node,
22430                                 NULL_TREE);
22431 
22432   tree v2df_ftype_v2df_v2df_v16qi
22433     = build_function_type_list (V2DF_type_node,
22434                                 V2DF_type_node,
22435                                 V2DF_type_node,
22436                                 V16QI_type_node,
22437                                 NULL_TREE);
22438 
22439   tree v4sf_ftype_v4sf_v4sf_v16qi
22440     = build_function_type_list (V4SF_type_node,
22441                                 V4SF_type_node,
22442                                 V4SF_type_node,
22443                                 V16QI_type_node,
22444                                 NULL_TREE);
22445 
22446   tree v2di_ftype_v2di_si
22447     = build_function_type_list (V2DI_type_node,
22448                                 V2DI_type_node,
22449                                 integer_type_node,
22450                                 NULL_TREE);
22451 
22452   tree v4si_ftype_v4si_si
22453     = build_function_type_list (V4SI_type_node,
22454                                 V4SI_type_node,
22455                                 integer_type_node,
22456                                 NULL_TREE);
22457 
22458   tree v8hi_ftype_v8hi_si
22459     = build_function_type_list (V8HI_type_node,
22460                                 V8HI_type_node,
22461                                 integer_type_node,
22462                                 NULL_TREE);
22463 
22464   tree v16qi_ftype_v16qi_si
22465     = build_function_type_list (V16QI_type_node,
22466                                 V16QI_type_node,
22467                                 integer_type_node,
22468                                 NULL_TREE);
22469   tree v4sf_ftype_v4hi
22470     = build_function_type_list (V4SF_type_node,
22471                                 V4HI_type_node,
22472                                 NULL_TREE);
22473 
22474   tree v4hi_ftype_v4sf
22475     = build_function_type_list (V4HI_type_node,
22476                                 V4SF_type_node,
22477                                 NULL_TREE);
22478 
22479   tree v2di_ftype_v2di
22480     = build_function_type_list (V2DI_type_node, V2DI_type_node, NULL_TREE);
22481 
22482   tree v16qi_ftype_v8hi_v8hi
22483     = build_function_type_list (V16QI_type_node,
22484                                 V8HI_type_node, V8HI_type_node,
22485                                 NULL_TREE);
22486   tree v8hi_ftype_v4si_v4si
22487     = build_function_type_list (V8HI_type_node,
22488                                 V4SI_type_node, V4SI_type_node,
22489                                 NULL_TREE);
22490   tree v8hi_ftype_v16qi_v16qi 
22491     = build_function_type_list (V8HI_type_node,
22492                                 V16QI_type_node, V16QI_type_node,
22493                                 NULL_TREE);
22494   tree v4hi_ftype_v8qi_v8qi 
22495     = build_function_type_list (V4HI_type_node,
22496                                 V8QI_type_node, V8QI_type_node,
22497                                 NULL_TREE);
22498   tree unsigned_ftype_unsigned_uchar
22499     = build_function_type_list (unsigned_type_node,
22500                                 unsigned_type_node,
22501                                 unsigned_char_type_node,
22502                                 NULL_TREE);
22503   tree unsigned_ftype_unsigned_ushort
22504     = build_function_type_list (unsigned_type_node,
22505                                 unsigned_type_node,
22506                                 short_unsigned_type_node,
22507                                 NULL_TREE);
22508   tree unsigned_ftype_unsigned_unsigned
22509     = build_function_type_list (unsigned_type_node,
22510                                 unsigned_type_node,
22511                                 unsigned_type_node,
22512                                 NULL_TREE);
22513   tree uint64_ftype_uint64_uint64
22514     = build_function_type_list (long_long_unsigned_type_node,
22515                                 long_long_unsigned_type_node,
22516                                 long_long_unsigned_type_node,
22517                                 NULL_TREE);
22518   tree float_ftype_float
22519     = build_function_type_list (float_type_node,
22520                                 float_type_node,
22521                                 NULL_TREE);
22522 
22523   /* AVX builtins  */
22524   tree V32QI_type_node = build_vector_type_for_mode (char_type_node,
22525                                                      V32QImode);
22526   tree V8SI_type_node = build_vector_type_for_mode (intSI_type_node,
22527                                                     V8SImode);
22528   tree V8SF_type_node = build_vector_type_for_mode (float_type_node,
22529                                                     V8SFmode);
22530   tree V4DI_type_node = build_vector_type_for_mode (long_long_integer_type_node,
22531                                                     V4DImode);
22532   tree V4DF_type_node = build_vector_type_for_mode (double_type_node,
22533                                                     V4DFmode);
22534   tree v8sf_ftype_v8sf
22535     = build_function_type_list (V8SF_type_node,
22536                                 V8SF_type_node,
22537                                 NULL_TREE);
22538   tree v8si_ftype_v8sf
22539     = build_function_type_list (V8SI_type_node,
22540                                 V8SF_type_node,
22541                                 NULL_TREE);
22542   tree v8sf_ftype_v8si
22543     = build_function_type_list (V8SF_type_node,
22544                                 V8SI_type_node,
22545                                 NULL_TREE);
22546   tree v4si_ftype_v4df
22547     = build_function_type_list (V4SI_type_node,
22548                                 V4DF_type_node,
22549                                 NULL_TREE);
22550   tree v4df_ftype_v4df
22551     = build_function_type_list (V4DF_type_node,
22552                                 V4DF_type_node,
22553                                 NULL_TREE);
22554   tree v4df_ftype_v4si
22555     = build_function_type_list (V4DF_type_node,
22556                                 V4SI_type_node,
22557                                 NULL_TREE);
22558   tree v4df_ftype_v4sf
22559     = build_function_type_list (V4DF_type_node,
22560                                 V4SF_type_node,
22561                                 NULL_TREE);
22562   tree v4sf_ftype_v4df
22563     = build_function_type_list (V4SF_type_node,
22564                                 V4DF_type_node,
22565                                 NULL_TREE);
22566   tree v8sf_ftype_v8sf_v8sf
22567     = build_function_type_list (V8SF_type_node,
22568                                 V8SF_type_node, V8SF_type_node,
22569                                 NULL_TREE);
22570   tree v4df_ftype_v4df_v4df
22571     = build_function_type_list (V4DF_type_node,
22572                                 V4DF_type_node, V4DF_type_node,
22573                                 NULL_TREE);
22574   tree v8sf_ftype_v8sf_int
22575     = build_function_type_list (V8SF_type_node,
22576                                 V8SF_type_node, integer_type_node,
22577                                 NULL_TREE);
22578   tree v4si_ftype_v8si_int
22579     = build_function_type_list (V4SI_type_node,
22580                                 V8SI_type_node, integer_type_node,
22581                                 NULL_TREE);
22582   tree v4df_ftype_v4df_int
22583     = build_function_type_list (V4DF_type_node,
22584                                 V4DF_type_node, integer_type_node,
22585                                 NULL_TREE);
22586   tree v4sf_ftype_v8sf_int
22587     = build_function_type_list (V4SF_type_node,
22588                                 V8SF_type_node, integer_type_node,
22589                                 NULL_TREE);
22590   tree v2df_ftype_v4df_int
22591     = build_function_type_list (V2DF_type_node,
22592                                 V4DF_type_node, integer_type_node,
22593                                 NULL_TREE);
22594   tree v8sf_ftype_v8sf_v8sf_int
22595     = build_function_type_list (V8SF_type_node,
22596                                 V8SF_type_node, V8SF_type_node,
22597                                 integer_type_node,
22598                                 NULL_TREE);
22599   tree v8sf_ftype_v8sf_v8sf_v8sf
22600     = build_function_type_list (V8SF_type_node,
22601                                 V8SF_type_node, V8SF_type_node,
22602                                 V8SF_type_node,
22603                                 NULL_TREE);
22604   tree v4df_ftype_v4df_v4df_v4df
22605     = build_function_type_list (V4DF_type_node,
22606                                 V4DF_type_node, V4DF_type_node,
22607                                 V4DF_type_node,
22608                                 NULL_TREE);
22609   tree v8si_ftype_v8si_v8si_int
22610     = build_function_type_list (V8SI_type_node,
22611                                 V8SI_type_node, V8SI_type_node,
22612                                 integer_type_node,
22613                                 NULL_TREE);
22614   tree v4df_ftype_v4df_v4df_int
22615     = build_function_type_list (V4DF_type_node,
22616                                 V4DF_type_node, V4DF_type_node,
22617                                 integer_type_node,
22618                                 NULL_TREE);
22619   tree v8sf_ftype_pcfloat
22620     = build_function_type_list (V8SF_type_node,
22621                                 pcfloat_type_node,
22622                                 NULL_TREE);
22623   tree v4df_ftype_pcdouble
22624     = build_function_type_list (V4DF_type_node,
22625                                 pcdouble_type_node,
22626                                 NULL_TREE);
22627   tree pcv4sf_type_node
22628     = build_pointer_type (build_type_variant (V4SF_type_node, 1, 0));
22629   tree pcv2df_type_node
22630     = build_pointer_type (build_type_variant (V2DF_type_node, 1, 0));
22631   tree v8sf_ftype_pcv4sf
22632     = build_function_type_list (V8SF_type_node,
22633                                 pcv4sf_type_node,
22634                                 NULL_TREE);
22635   tree v4df_ftype_pcv2df
22636     = build_function_type_list (V4DF_type_node,
22637                                 pcv2df_type_node,
22638                                 NULL_TREE);
22639   tree v32qi_ftype_pcchar
22640     = build_function_type_list (V32QI_type_node,
22641                                 pcchar_type_node,
22642                                 NULL_TREE);
22643   tree void_ftype_pchar_v32qi
22644     = build_function_type_list (void_type_node,
22645                                 pchar_type_node, V32QI_type_node,
22646                                 NULL_TREE);
22647   tree v8si_ftype_v8si_v4si_int
22648     = build_function_type_list (V8SI_type_node,
22649                                 V8SI_type_node, V4SI_type_node,
22650                                 integer_type_node,
22651                                 NULL_TREE);
22652   tree pv4di_type_node = build_pointer_type (V4DI_type_node);
22653   tree void_ftype_pv4di_v4di
22654     = build_function_type_list (void_type_node,
22655                                 pv4di_type_node, V4DI_type_node,
22656                                 NULL_TREE);
22657   tree v8sf_ftype_v8sf_v4sf_int
22658     = build_function_type_list (V8SF_type_node,
22659                                 V8SF_type_node, V4SF_type_node,
22660                                 integer_type_node,
22661                                 NULL_TREE);
22662   tree v4df_ftype_v4df_v2df_int
22663     = build_function_type_list (V4DF_type_node,
22664                                 V4DF_type_node, V2DF_type_node,
22665                                 integer_type_node,
22666                                 NULL_TREE);
22667   tree void_ftype_pfloat_v8sf
22668     = build_function_type_list (void_type_node,
22669                                 pfloat_type_node, V8SF_type_node,
22670                                 NULL_TREE);
22671   tree void_ftype_pdouble_v4df
22672     = build_function_type_list (void_type_node,
22673                                 pdouble_type_node, V4DF_type_node,
22674                                 NULL_TREE);
22675   tree pv8sf_type_node = build_pointer_type (V8SF_type_node);
22676   tree pv4sf_type_node = build_pointer_type (V4SF_type_node);
22677   tree pv4df_type_node = build_pointer_type (V4DF_type_node);
22678   tree pv2df_type_node = build_pointer_type (V2DF_type_node);
22679   tree pcv8sf_type_node
22680     = build_pointer_type (build_type_variant (V8SF_type_node, 1, 0));
22681   tree pcv4df_type_node
22682     = build_pointer_type (build_type_variant (V4DF_type_node, 1, 0));
22683   tree v8sf_ftype_pcv8sf_v8sf
22684     = build_function_type_list (V8SF_type_node,
22685                                 pcv8sf_type_node, V8SF_type_node,
22686                                 NULL_TREE);
22687   tree v4df_ftype_pcv4df_v4df
22688     = build_function_type_list (V4DF_type_node,
22689                                 pcv4df_type_node, V4DF_type_node,
22690                                 NULL_TREE);
22691   tree v4sf_ftype_pcv4sf_v4sf
22692     = build_function_type_list (V4SF_type_node,
22693                                 pcv4sf_type_node, V4SF_type_node,
22694                                 NULL_TREE);
22695   tree v2df_ftype_pcv2df_v2df
22696     = build_function_type_list (V2DF_type_node,
22697                                 pcv2df_type_node, V2DF_type_node,
22698                                 NULL_TREE);
22699   tree void_ftype_pv8sf_v8sf_v8sf
22700     = build_function_type_list (void_type_node,
22701                                 pv8sf_type_node, V8SF_type_node,
22702                                 V8SF_type_node,
22703                                 NULL_TREE);
22704   tree void_ftype_pv4df_v4df_v4df
22705     = build_function_type_list (void_type_node,
22706                                 pv4df_type_node, V4DF_type_node,
22707                                 V4DF_type_node,
22708                                 NULL_TREE);
22709   tree void_ftype_pv4sf_v4sf_v4sf
22710     = build_function_type_list (void_type_node,
22711                                 pv4sf_type_node, V4SF_type_node,
22712                                 V4SF_type_node,
22713                                 NULL_TREE);
22714   tree void_ftype_pv2df_v2df_v2df
22715     = build_function_type_list (void_type_node,
22716                                 pv2df_type_node, V2DF_type_node,
22717                                 V2DF_type_node,
22718                                 NULL_TREE);
22719   tree v4df_ftype_v2df
22720     = build_function_type_list (V4DF_type_node,
22721                                 V2DF_type_node,
22722                                 NULL_TREE);
22723   tree v8sf_ftype_v4sf
22724     = build_function_type_list (V8SF_type_node,
22725                                 V4SF_type_node,
22726                                 NULL_TREE);
22727   tree v8si_ftype_v4si
22728     = build_function_type_list (V8SI_type_node,
22729                                 V4SI_type_node,
22730                                 NULL_TREE);
22731   tree v2df_ftype_v4df
22732     = build_function_type_list (V2DF_type_node,
22733                                 V4DF_type_node,
22734                                 NULL_TREE);
22735   tree v4sf_ftype_v8sf
22736     = build_function_type_list (V4SF_type_node,
22737                                 V8SF_type_node,
22738                                 NULL_TREE);
22739   tree v4si_ftype_v8si
22740     = build_function_type_list (V4SI_type_node,
22741                                 V8SI_type_node,
22742                                 NULL_TREE);
22743   tree int_ftype_v4df
22744     = build_function_type_list (integer_type_node,
22745                                 V4DF_type_node,
22746                                 NULL_TREE);
22747   tree int_ftype_v8sf
22748     = build_function_type_list (integer_type_node,
22749                                 V8SF_type_node,
22750                                 NULL_TREE);
22751   tree int_ftype_v8sf_v8sf
22752     = build_function_type_list (integer_type_node,
22753                                 V8SF_type_node, V8SF_type_node,
22754                                 NULL_TREE);
22755   tree int_ftype_v4di_v4di
22756     = build_function_type_list (integer_type_node,
22757                                 V4DI_type_node, V4DI_type_node,
22758                                 NULL_TREE);
22759   tree int_ftype_v4df_v4df
22760     = build_function_type_list (integer_type_node,
22761                                 V4DF_type_node, V4DF_type_node,
22762                                 NULL_TREE);
22763   tree v8sf_ftype_v8sf_v8si
22764     = build_function_type_list (V8SF_type_node,
22765                                 V8SF_type_node, V8SI_type_node,
22766                                 NULL_TREE);
22767   tree v4df_ftype_v4df_v4di
22768     = build_function_type_list (V4DF_type_node,
22769                                 V4DF_type_node, V4DI_type_node,
22770                                 NULL_TREE);
22771   tree v4sf_ftype_v4sf_v4si
22772     = build_function_type_list (V4SF_type_node,
22773                                 V4SF_type_node, V4SI_type_node, NULL_TREE);
22774   tree v2df_ftype_v2df_v2di
22775     = build_function_type_list (V2DF_type_node,
22776                                 V2DF_type_node, V2DI_type_node, NULL_TREE);
22777 
22778   tree ftype;
22779 
22780   /* Add all special builtins with variable number of operands.  */
22781   for (i = 0, d = bdesc_special_args;
22782        i < ARRAY_SIZE (bdesc_special_args);
22783        i++, d++)
22784     {
22785       tree type;
22786 
22787       if (d->name == 0)
22788         continue;
22789 
22790       switch ((enum ix86_special_builtin_type) d->flag)
22791         {
22792         case VOID_FTYPE_VOID:
22793           type = void_ftype_void;
22794           break;
22795         case V32QI_FTYPE_PCCHAR:
22796           type = v32qi_ftype_pcchar;
22797           break;
22798         case V16QI_FTYPE_PCCHAR:
22799           type = v16qi_ftype_pcchar;
22800           break;
22801         case V8SF_FTYPE_PCV4SF:
22802           type = v8sf_ftype_pcv4sf;
22803           break;
22804         case V8SF_FTYPE_PCFLOAT:
22805           type = v8sf_ftype_pcfloat;
22806           break;
22807         case V4DF_FTYPE_PCV2DF:
22808           type = v4df_ftype_pcv2df;
22809           break;
22810         case V4DF_FTYPE_PCDOUBLE:
22811           type = v4df_ftype_pcdouble;
22812           break;
22813         case V4SF_FTYPE_PCFLOAT:
22814           type = v4sf_ftype_pcfloat;
22815           break;
22816         case V2DI_FTYPE_PV2DI:
22817           type = v2di_ftype_pv2di;
22818           break;
22819         case V2DF_FTYPE_PCDOUBLE:
22820           type = v2df_ftype_pcdouble;
22821           break;
22822         case V8SF_FTYPE_PCV8SF_V8SF:
22823           type = v8sf_ftype_pcv8sf_v8sf;
22824           break;
22825         case V4DF_FTYPE_PCV4DF_V4DF:
22826           type = v4df_ftype_pcv4df_v4df;
22827           break;
22828         case V4SF_FTYPE_V4SF_PCV2SF:
22829           type = v4sf_ftype_v4sf_pcv2sf;
22830           break;
22831         case V4SF_FTYPE_PCV4SF_V4SF:
22832           type = v4sf_ftype_pcv4sf_v4sf;
22833           break;
22834         case V2DF_FTYPE_V2DF_PCDOUBLE:
22835           type = v2df_ftype_v2df_pcdouble;
22836           break;
22837         case V2DF_FTYPE_PCV2DF_V2DF:
22838           type = v2df_ftype_pcv2df_v2df;
22839           break;
22840         case VOID_FTYPE_PV2SF_V4SF:
22841           type = void_ftype_pv2sf_v4sf;
22842           break;
22843         case VOID_FTYPE_PV4DI_V4DI:
22844           type = void_ftype_pv4di_v4di;
22845           break;
22846         case VOID_FTYPE_PV2DI_V2DI:
22847           type = void_ftype_pv2di_v2di;
22848           break;
22849         case VOID_FTYPE_PCHAR_V32QI:
22850           type = void_ftype_pchar_v32qi;
22851           break;
22852         case VOID_FTYPE_PCHAR_V16QI:
22853           type = void_ftype_pchar_v16qi;
22854           break;
22855         case VOID_FTYPE_PFLOAT_V8SF:
22856           type = void_ftype_pfloat_v8sf;
22857           break;
22858         case VOID_FTYPE_PFLOAT_V4SF:
22859           type = void_ftype_pfloat_v4sf;
22860           break;
22861         case VOID_FTYPE_PDOUBLE_V4DF:
22862           type = void_ftype_pdouble_v4df;
22863           break;
22864         case VOID_FTYPE_PDOUBLE_V2DF:
22865           type = void_ftype_pdouble_v2df;
22866           break;
22867         case VOID_FTYPE_PDI_DI:
22868           type = void_ftype_pdi_di;
22869           break;
22870         case VOID_FTYPE_PINT_INT:
22871           type = void_ftype_pint_int;
22872           break;
22873         case VOID_FTYPE_PV8SF_V8SF_V8SF:
22874           type = void_ftype_pv8sf_v8sf_v8sf;
22875           break;
22876         case VOID_FTYPE_PV4DF_V4DF_V4DF:
22877           type = void_ftype_pv4df_v4df_v4df;
22878           break;
22879         case VOID_FTYPE_PV4SF_V4SF_V4SF:
22880           type = void_ftype_pv4sf_v4sf_v4sf;
22881           break;
22882         case VOID_FTYPE_PV2DF_V2DF_V2DF:
22883           type = void_ftype_pv2df_v2df_v2df;
22884           break;
22885         default:
22886           gcc_unreachable ();
22887         }
22888 
22889       def_builtin (d->mask, d->name, type, d->code);
22890     }
22891 
22892   /* Add all builtins with variable number of operands.  */
22893   for (i = 0, d = bdesc_args;
22894        i < ARRAY_SIZE (bdesc_args);
22895        i++, d++)
22896     {
22897       tree type;
22898 
22899       if (d->name == 0)
22900         continue;
22901 
22902       switch ((enum ix86_builtin_type) d->flag)
22903         {
22904         case FLOAT_FTYPE_FLOAT:
22905           type = float_ftype_float;
22906           break;
22907         case INT_FTYPE_V8SF_V8SF_PTEST:
22908           type = int_ftype_v8sf_v8sf;
22909           break;
22910         case INT_FTYPE_V4DI_V4DI_PTEST:
22911           type = int_ftype_v4di_v4di;
22912           break;
22913         case INT_FTYPE_V4DF_V4DF_PTEST:
22914           type = int_ftype_v4df_v4df;
22915           break;
22916         case INT_FTYPE_V4SF_V4SF_PTEST:
22917           type = int_ftype_v4sf_v4sf;
22918           break;
22919         case INT_FTYPE_V2DI_V2DI_PTEST:
22920           type = int_ftype_v2di_v2di;
22921           break;
22922         case INT_FTYPE_V2DF_V2DF_PTEST:
22923           type = int_ftype_v2df_v2df;
22924           break;
22925         case INT64_FTYPE_V4SF:
22926           type = int64_ftype_v4sf;
22927           break;
22928         case INT64_FTYPE_V2DF:
22929           type = int64_ftype_v2df;
22930           break;
22931         case INT_FTYPE_V16QI:
22932           type = int_ftype_v16qi;
22933           break;
22934         case INT_FTYPE_V8QI:
22935           type = int_ftype_v8qi;
22936           break;
22937         case INT_FTYPE_V8SF:
22938           type = int_ftype_v8sf;
22939           break;
22940         case INT_FTYPE_V4DF:
22941           type = int_ftype_v4df;
22942           break;
22943         case INT_FTYPE_V4SF:
22944           type = int_ftype_v4sf;
22945           break;
22946         case INT_FTYPE_V2DF:
22947           type = int_ftype_v2df;
22948           break;
22949         case V16QI_FTYPE_V16QI:
22950           type = v16qi_ftype_v16qi;
22951           break;
22952         case V8SI_FTYPE_V8SF:
22953           type = v8si_ftype_v8sf;
22954           break;
22955         case V8SI_FTYPE_V4SI:
22956           type = v8si_ftype_v4si;
22957           break;
22958         case V8HI_FTYPE_V8HI:
22959           type = v8hi_ftype_v8hi;
22960           break;
22961         case V8HI_FTYPE_V16QI:
22962           type = v8hi_ftype_v16qi;
22963           break;
22964         case V8QI_FTYPE_V8QI:
22965           type = v8qi_ftype_v8qi;
22966           break;
22967         case V8SF_FTYPE_V8SF:
22968           type = v8sf_ftype_v8sf;
22969           break;
22970         case V8SF_FTYPE_V8SI:
22971           type = v8sf_ftype_v8si;
22972           break;
22973         case V8SF_FTYPE_V4SF:
22974           type = v8sf_ftype_v4sf;
22975           break;
22976         case V4SI_FTYPE_V4DF:
22977           type = v4si_ftype_v4df;
22978           break;
22979         case V4SI_FTYPE_V4SI:
22980           type = v4si_ftype_v4si;
22981           break;
22982         case V4SI_FTYPE_V16QI:
22983           type = v4si_ftype_v16qi;
22984           break;
22985         case V4SI_FTYPE_V8SI:
22986           type = v4si_ftype_v8si;
22987           break;
22988         case V4SI_FTYPE_V8HI:
22989           type = v4si_ftype_v8hi;
22990           break;
22991         case V4SI_FTYPE_V4SF:
22992           type = v4si_ftype_v4sf;
22993           break;
22994         case V4SI_FTYPE_V2DF:
22995           type = v4si_ftype_v2df;
22996           break;
22997         case V4HI_FTYPE_V4HI:
22998           type = v4hi_ftype_v4hi;
22999           break;
23000         case V4DF_FTYPE_V4DF:
23001           type = v4df_ftype_v4df;
23002           break;
23003         case V4DF_FTYPE_V4SI:
23004           type = v4df_ftype_v4si;
23005           break;
23006         case V4DF_FTYPE_V4SF:
23007           type = v4df_ftype_v4sf;
23008           break;
23009         case V4DF_FTYPE_V2DF:
23010           type = v4df_ftype_v2df;
23011           break;
23012         case V4SF_FTYPE_V4SF:
23013         case V4SF_FTYPE_V4SF_VEC_MERGE:
23014           type = v4sf_ftype_v4sf;
23015           break;
23016         case V4SF_FTYPE_V8SF:
23017           type = v4sf_ftype_v8sf;
23018           break;
23019         case V4SF_FTYPE_V4SI:
23020           type = v4sf_ftype_v4si;
23021           break;
23022         case V4SF_FTYPE_V4DF:
23023           type = v4sf_ftype_v4df;
23024           break;
23025         case V4SF_FTYPE_V2DF:
23026           type = v4sf_ftype_v2df;
23027           break;
23028         case V2DI_FTYPE_V2DI:
23029           type = v2di_ftype_v2di;
23030           break;
23031         case V2DI_FTYPE_V16QI:
23032           type = v2di_ftype_v16qi;
23033           break;
23034         case V2DI_FTYPE_V8HI:
23035           type = v2di_ftype_v8hi;
23036           break;
23037         case V2DI_FTYPE_V4SI:
23038           type = v2di_ftype_v4si;
23039           break;
23040         case V2SI_FTYPE_V2SI:
23041           type = v2si_ftype_v2si;
23042           break;
23043         case V2SI_FTYPE_V4SF:
23044           type = v2si_ftype_v4sf;
23045           break;
23046         case V2SI_FTYPE_V2DF:
23047           type = v2si_ftype_v2df;
23048           break;
23049         case V2SI_FTYPE_V2SF:
23050           type = v2si_ftype_v2sf;
23051           break;
23052         case V2DF_FTYPE_V4DF:
23053           type = v2df_ftype_v4df;
23054           break;
23055         case V2DF_FTYPE_V4SF:
23056           type = v2df_ftype_v4sf;
23057           break;
23058         case V2DF_FTYPE_V2DF:
23059         case V2DF_FTYPE_V2DF_VEC_MERGE:
23060           type = v2df_ftype_v2df;
23061           break;
23062         case V2DF_FTYPE_V2SI:
23063           type = v2df_ftype_v2si;
23064           break;
23065         case V2DF_FTYPE_V4SI:
23066           type = v2df_ftype_v4si;
23067           break;
23068         case V2SF_FTYPE_V2SF:
23069           type = v2sf_ftype_v2sf;
23070           break;
23071         case V2SF_FTYPE_V2SI:
23072           type = v2sf_ftype_v2si;
23073           break;
23074         case V16QI_FTYPE_V16QI_V16QI:
23075           type = v16qi_ftype_v16qi_v16qi;
23076           break;
23077         case V16QI_FTYPE_V8HI_V8HI:
23078           type = v16qi_ftype_v8hi_v8hi;
23079           break;
23080         case V8QI_FTYPE_V8QI_V8QI:
23081           type = v8qi_ftype_v8qi_v8qi;
23082           break;
23083         case V8QI_FTYPE_V4HI_V4HI:
23084           type = v8qi_ftype_v4hi_v4hi;
23085           break;
23086         case V8HI_FTYPE_V8HI_V8HI:
23087         case V8HI_FTYPE_V8HI_V8HI_COUNT:
23088           type = v8hi_ftype_v8hi_v8hi;
23089           break;
23090         case V8HI_FTYPE_V16QI_V16QI:
23091           type = v8hi_ftype_v16qi_v16qi;
23092           break;
23093         case V8HI_FTYPE_V4SI_V4SI:
23094           type = v8hi_ftype_v4si_v4si;
23095           break;
23096         case V8HI_FTYPE_V8HI_SI_COUNT:
23097           type = v8hi_ftype_v8hi_int;
23098           break;
23099         case V8SF_FTYPE_V8SF_V8SF:
23100           type = v8sf_ftype_v8sf_v8sf;
23101           break;
23102         case V8SF_FTYPE_V8SF_V8SI:
23103           type = v8sf_ftype_v8sf_v8si;
23104           break;
23105         case V4SI_FTYPE_V4SI_V4SI:
23106         case V4SI_FTYPE_V4SI_V4SI_COUNT:
23107           type = v4si_ftype_v4si_v4si;
23108           break;
23109         case V4SI_FTYPE_V8HI_V8HI:
23110           type = v4si_ftype_v8hi_v8hi;
23111           break;
23112         case V4SI_FTYPE_V4SF_V4SF:
23113           type = v4si_ftype_v4sf_v4sf;
23114           break;
23115         case V4SI_FTYPE_V2DF_V2DF:
23116           type = v4si_ftype_v2df_v2df;
23117           break;
23118         case V4SI_FTYPE_V4SI_SI_COUNT:
23119           type = v4si_ftype_v4si_int;
23120           break;
23121         case V4HI_FTYPE_V4HI_V4HI:
23122         case V4HI_FTYPE_V4HI_V4HI_COUNT:
23123           type = v4hi_ftype_v4hi_v4hi;
23124           break;
23125         case V4HI_FTYPE_V8QI_V8QI:
23126           type = v4hi_ftype_v8qi_v8qi;
23127           break;
23128         case V4HI_FTYPE_V2SI_V2SI:
23129           type = v4hi_ftype_v2si_v2si;
23130           break;
23131         case V4HI_FTYPE_V4HI_SI_COUNT:
23132           type = v4hi_ftype_v4hi_int;
23133           break;
23134         case V4DF_FTYPE_V4DF_V4DF:
23135           type = v4df_ftype_v4df_v4df;
23136           break;
23137         case V4DF_FTYPE_V4DF_V4DI:
23138           type = v4df_ftype_v4df_v4di;
23139           break;
23140         case V4SF_FTYPE_V4SF_V4SF:
23141         case V4SF_FTYPE_V4SF_V4SF_SWAP:
23142           type = v4sf_ftype_v4sf_v4sf;
23143           break;
23144         case V4SF_FTYPE_V4SF_V4SI:
23145           type = v4sf_ftype_v4sf_v4si;
23146           break;
23147         case V4SF_FTYPE_V4SF_V2SI:
23148           type = v4sf_ftype_v4sf_v2si;
23149           break;
23150         case V4SF_FTYPE_V4SF_V2DF:
23151           type = v4sf_ftype_v4sf_v2df;
23152           break;
23153         case V4SF_FTYPE_V4SF_DI:
23154           type = v4sf_ftype_v4sf_int64;
23155           break;
23156         case V4SF_FTYPE_V4SF_SI:
23157           type = v4sf_ftype_v4sf_int;
23158           break;
23159         case V2DI_FTYPE_V2DI_V2DI:
23160         case V2DI_FTYPE_V2DI_V2DI_COUNT:
23161           type = v2di_ftype_v2di_v2di;
23162           break;
23163         case V2DI_FTYPE_V16QI_V16QI:
23164           type = v2di_ftype_v16qi_v16qi;
23165           break;
23166         case V2DI_FTYPE_V4SI_V4SI:
23167           type = v2di_ftype_v4si_v4si;
23168           break;
23169         case V2DI_FTYPE_V2DI_V16QI:
23170           type = v2di_ftype_v2di_v16qi;
23171           break;
23172         case V2DI_FTYPE_V2DF_V2DF:
23173           type = v2di_ftype_v2df_v2df;
23174           break;
23175         case V2DI_FTYPE_V2DI_SI_COUNT:
23176           type = v2di_ftype_v2di_int;
23177           break;
23178         case V2SI_FTYPE_V2SI_V2SI:
23179         case V2SI_FTYPE_V2SI_V2SI_COUNT:
23180           type = v2si_ftype_v2si_v2si;
23181           break;
23182         case V2SI_FTYPE_V4HI_V4HI:
23183           type = v2si_ftype_v4hi_v4hi;
23184           break;
23185         case V2SI_FTYPE_V2SF_V2SF:
23186           type = v2si_ftype_v2sf_v2sf;
23187           break;
23188         case V2SI_FTYPE_V2SI_SI_COUNT:
23189           type = v2si_ftype_v2si_int;
23190           break;
23191         case V2DF_FTYPE_V2DF_V2DF:
23192         case V2DF_FTYPE_V2DF_V2DF_SWAP:
23193           type = v2df_ftype_v2df_v2df;
23194           break;
23195         case V2DF_FTYPE_V2DF_V4SF:
23196           type = v2df_ftype_v2df_v4sf;
23197           break;
23198         case V2DF_FTYPE_V2DF_V2DI:
23199           type = v2df_ftype_v2df_v2di;
23200           break;
23201         case V2DF_FTYPE_V2DF_DI:
23202           type = v2df_ftype_v2df_int64;
23203           break;
23204         case V2DF_FTYPE_V2DF_SI:
23205           type = v2df_ftype_v2df_int;
23206           break;
23207         case V2SF_FTYPE_V2SF_V2SF:
23208           type = v2sf_ftype_v2sf_v2sf;
23209           break;
23210         case V1DI_FTYPE_V1DI_V1DI:
23211         case V1DI_FTYPE_V1DI_V1DI_COUNT:
23212           type = v1di_ftype_v1di_v1di;
23213           break;
23214         case V1DI_FTYPE_V8QI_V8QI:
23215           type = v1di_ftype_v8qi_v8qi;
23216           break;
23217         case V1DI_FTYPE_V2SI_V2SI:
23218           type = v1di_ftype_v2si_v2si;
23219           break;
23220         case V1DI_FTYPE_V1DI_SI_COUNT:
23221           type = v1di_ftype_v1di_int;
23222           break;
23223         case UINT64_FTYPE_UINT64_UINT64:
23224           type = uint64_ftype_uint64_uint64;
23225           break;
23226         case UINT_FTYPE_UINT_UINT:
23227           type = unsigned_ftype_unsigned_unsigned;
23228           break;
23229         case UINT_FTYPE_UINT_USHORT:
23230           type = unsigned_ftype_unsigned_ushort;
23231           break;
23232         case UINT_FTYPE_UINT_UCHAR:
23233           type = unsigned_ftype_unsigned_uchar;
23234           break;
23235         case V8HI_FTYPE_V8HI_INT:
23236           type = v8hi_ftype_v8hi_int;
23237           break;
23238         case V8SF_FTYPE_V8SF_INT:
23239           type = v8sf_ftype_v8sf_int;
23240           break;
23241         case V4SI_FTYPE_V4SI_INT:
23242           type = v4si_ftype_v4si_int;
23243           break;
23244         case V4SI_FTYPE_V8SI_INT:
23245           type = v4si_ftype_v8si_int;
23246           break;
23247         case V4HI_FTYPE_V4HI_INT:
23248           type = v4hi_ftype_v4hi_int;
23249           break;
23250         case V4DF_FTYPE_V4DF_INT:
23251           type = v4df_ftype_v4df_int;
23252           break;
23253         case V4SF_FTYPE_V4SF_INT:
23254           type = v4sf_ftype_v4sf_int;
23255           break;
23256         case V4SF_FTYPE_V8SF_INT:
23257           type = v4sf_ftype_v8sf_int;
23258           break;
23259         case V2DI_FTYPE_V2DI_INT:
23260         case V2DI2TI_FTYPE_V2DI_INT:
23261           type = v2di_ftype_v2di_int;
23262           break;
23263         case V2DF_FTYPE_V2DF_INT:
23264           type = v2df_ftype_v2df_int;
23265           break;
23266         case V2DF_FTYPE_V4DF_INT:
23267           type = v2df_ftype_v4df_int;
23268           break;
23269         case V16QI_FTYPE_V16QI_V16QI_V16QI:
23270           type = v16qi_ftype_v16qi_v16qi_v16qi;
23271           break;
23272         case V8SF_FTYPE_V8SF_V8SF_V8SF:
23273           type = v8sf_ftype_v8sf_v8sf_v8sf;
23274           break;
23275         case V4DF_FTYPE_V4DF_V4DF_V4DF:
23276           type = v4df_ftype_v4df_v4df_v4df;
23277           break;
23278         case V4SF_FTYPE_V4SF_V4SF_V4SF:
23279           type = v4sf_ftype_v4sf_v4sf_v4sf;
23280           break;
23281         case V2DF_FTYPE_V2DF_V2DF_V2DF:
23282           type = v2df_ftype_v2df_v2df_v2df;
23283           break;
23284         case V16QI_FTYPE_V16QI_V16QI_INT:
23285           type = v16qi_ftype_v16qi_v16qi_int;
23286           break;
23287         case V8SI_FTYPE_V8SI_V8SI_INT:
23288           type = v8si_ftype_v8si_v8si_int;
23289           break;
23290         case V8SI_FTYPE_V8SI_V4SI_INT:
23291           type = v8si_ftype_v8si_v4si_int;
23292           break;
23293         case V8HI_FTYPE_V8HI_V8HI_INT:
23294           type = v8hi_ftype_v8hi_v8hi_int;
23295           break;
23296         case V8SF_FTYPE_V8SF_V8SF_INT:
23297           type = v8sf_ftype_v8sf_v8sf_int;
23298           break;
23299         case V8SF_FTYPE_V8SF_V4SF_INT:
23300           type = v8sf_ftype_v8sf_v4sf_int;
23301           break;
23302         case V4SI_FTYPE_V4SI_V4SI_INT:
23303           type = v4si_ftype_v4si_v4si_int;
23304           break;
23305         case V4DF_FTYPE_V4DF_V4DF_INT:
23306           type = v4df_ftype_v4df_v4df_int;
23307           break;
23308         case V4DF_FTYPE_V4DF_V2DF_INT:
23309           type = v4df_ftype_v4df_v2df_int;
23310           break;
23311         case V4SF_FTYPE_V4SF_V4SF_INT:
23312           type = v4sf_ftype_v4sf_v4sf_int;
23313           break;
23314         case V2DI_FTYPE_V2DI_V2DI_INT:
23315         case V2DI2TI_FTYPE_V2DI_V2DI_INT:
23316           type = v2di_ftype_v2di_v2di_int;
23317           break;
23318         case V2DF_FTYPE_V2DF_V2DF_INT:
23319           type = v2df_ftype_v2df_v2df_int;
23320           break;
23321         case V2DI_FTYPE_V2DI_UINT_UINT:
23322           type = v2di_ftype_v2di_unsigned_unsigned;
23323           break;
23324         case V2DI_FTYPE_V2DI_V2DI_UINT_UINT:
23325           type = v2di_ftype_v2di_v2di_unsigned_unsigned;
23326           break;
23327         case V1DI2DI_FTYPE_V1DI_V1DI_INT:
23328           type = v1di_ftype_v1di_v1di_int;
23329           break;
23330         default:
23331           gcc_unreachable ();
23332         }
23333 
23334       def_builtin_const (d->mask, d->name, type, d->code);
23335     }
23336 
23337   /* pcmpestr[im] insns.  */
23338   for (i = 0, d = bdesc_pcmpestr;
23339        i < ARRAY_SIZE (bdesc_pcmpestr);
23340        i++, d++)
23341     {
23342       if (d->code == IX86_BUILTIN_PCMPESTRM128)
23343         ftype = v16qi_ftype_v16qi_int_v16qi_int_int;
23344       else
23345         ftype = int_ftype_v16qi_int_v16qi_int_int;
23346       def_builtin_const (d->mask, d->name, ftype, d->code);
23347     }
23348 
23349   /* pcmpistr[im] insns.  */
23350   for (i = 0, d = bdesc_pcmpistr;
23351        i < ARRAY_SIZE (bdesc_pcmpistr);
23352        i++, d++)
23353     {
23354       if (d->code == IX86_BUILTIN_PCMPISTRM128)
23355         ftype = v16qi_ftype_v16qi_v16qi_int;
23356       else
23357         ftype = int_ftype_v16qi_v16qi_int;
23358       def_builtin_const (d->mask, d->name, ftype, d->code);
23359     }
23360 
23361   /* comi/ucomi insns.  */
23362   for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
23363     if (d->mask == OPTION_MASK_ISA_SSE2)
23364       def_builtin_const (d->mask, d->name, int_ftype_v2df_v2df, d->code);
23365     else
23366       def_builtin_const (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
23367 
23368   /* SSE */
23369   def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
23370   def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
23371 
23372   /* SSE or 3DNow!A */
23373   def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
23374 
23375   /* SSE2 */
23376   def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU);
23377 
23378   def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_clflush", void_ftype_pcvoid, IX86_BUILTIN_CLFLUSH);
23379   x86_mfence = def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
23380 
23381   /* SSE3.  */
23382   def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_monitor", void_ftype_pcvoid_unsigned_unsigned, IX86_BUILTIN_MONITOR);
23383   def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_mwait", void_ftype_unsigned_unsigned, IX86_BUILTIN_MWAIT);
23384 
23385   /* AES */
23386   def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenc128", v2di_ftype_v2di_v2di, IX86_BUILTIN_AESENC128);
23387   def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenclast128", v2di_ftype_v2di_v2di, IX86_BUILTIN_AESENCLAST128);
23388   def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdec128", v2di_ftype_v2di_v2di, IX86_BUILTIN_AESDEC128);
23389   def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdeclast128", v2di_ftype_v2di_v2di, IX86_BUILTIN_AESDECLAST128);
23390   def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesimc128", v2di_ftype_v2di, IX86_BUILTIN_AESIMC128);
23391   def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aeskeygenassist128", v2di_ftype_v2di_int, IX86_BUILTIN_AESKEYGENASSIST128);
23392 
23393   /* PCLMUL */
23394   def_builtin_const (OPTION_MASK_ISA_PCLMUL, "__builtin_ia32_pclmulqdq128", v2di_ftype_v2di_v2di_int, IX86_BUILTIN_PCLMULQDQ128);
23395 
23396   /* AVX */
23397   def_builtin (OPTION_MASK_ISA_AVX, "__builtin_ia32_vzeroupper", void_ftype_void,
23398                TARGET_64BIT ? IX86_BUILTIN_VZEROUPPER_REX64 : IX86_BUILTIN_VZEROUPPER);
23399 
23400   /* Access to the vec_init patterns.  */
23401   ftype = build_function_type_list (V2SI_type_node, integer_type_node,
23402                                     integer_type_node, NULL_TREE);
23403   def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v2si", ftype, IX86_BUILTIN_VEC_INIT_V2SI);
23404 
23405   ftype = build_function_type_list (V4HI_type_node, short_integer_type_node,
23406                                     short_integer_type_node,
23407                                     short_integer_type_node,
23408                                     short_integer_type_node, NULL_TREE);
23409   def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v4hi", ftype, IX86_BUILTIN_VEC_INIT_V4HI);
23410 
23411   ftype = build_function_type_list (V8QI_type_node, char_type_node,
23412                                     char_type_node, char_type_node,
23413                                     char_type_node, char_type_node,
23414                                     char_type_node, char_type_node,
23415                                     char_type_node, NULL_TREE);
23416   def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v8qi", ftype, IX86_BUILTIN_VEC_INIT_V8QI);
23417 
23418   /* Access to the vec_extract patterns.  */
23419   ftype = build_function_type_list (double_type_node, V2DF_type_node,
23420                                     integer_type_node, NULL_TREE);
23421   def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2df", ftype, IX86_BUILTIN_VEC_EXT_V2DF);
23422 
23423   ftype = build_function_type_list (long_long_integer_type_node,
23424                                     V2DI_type_node, integer_type_node,
23425                                     NULL_TREE);
23426   def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2di", ftype, IX86_BUILTIN_VEC_EXT_V2DI);
23427 
23428   ftype = build_function_type_list (float_type_node, V4SF_type_node,
23429                                     integer_type_node, NULL_TREE);
23430   def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_vec_ext_v4sf", ftype, IX86_BUILTIN_VEC_EXT_V4SF);
23431 
23432   ftype = build_function_type_list (intSI_type_node, V4SI_type_node,
23433                                     integer_type_node, NULL_TREE);
23434   def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v4si", ftype, IX86_BUILTIN_VEC_EXT_V4SI);
23435 
23436   ftype = build_function_type_list (intHI_type_node, V8HI_type_node,
23437                                     integer_type_node, NULL_TREE);
23438   def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v8hi", ftype, IX86_BUILTIN_VEC_EXT_V8HI);
23439 
23440   ftype = build_function_type_list (intHI_type_node, V4HI_type_node,
23441                                     integer_type_node, NULL_TREE);
23442   def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_vec_ext_v4hi", ftype, IX86_BUILTIN_VEC_EXT_V4HI);
23443 
23444   ftype = build_function_type_list (intSI_type_node, V2SI_type_node,
23445                                     integer_type_node, NULL_TREE);
23446   def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_ext_v2si", ftype, IX86_BUILTIN_VEC_EXT_V2SI);
23447 
23448   ftype = build_function_type_list (intQI_type_node, V16QI_type_node,
23449                                     integer_type_node, NULL_TREE);
23450   def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v16qi", ftype, IX86_BUILTIN_VEC_EXT_V16QI);
23451 
23452   /* Access to the vec_set patterns.  */
23453   ftype = build_function_type_list (V2DI_type_node, V2DI_type_node,
23454                                     intDI_type_node,
23455                                     integer_type_node, NULL_TREE);
23456   def_builtin_const (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_64BIT, "__builtin_ia32_vec_set_v2di", ftype, IX86_BUILTIN_VEC_SET_V2DI);
23457 
23458   ftype = build_function_type_list (V4SF_type_node, V4SF_type_node,
23459                                     float_type_node,
23460                                     integer_type_node, NULL_TREE);
23461   def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4sf", ftype, IX86_BUILTIN_VEC_SET_V4SF);
23462 
23463   ftype = build_function_type_list (V4SI_type_node, V4SI_type_node,
23464                                     intSI_type_node,
23465                                     integer_type_node, NULL_TREE);
23466   def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4si", ftype, IX86_BUILTIN_VEC_SET_V4SI);
23467 
23468   ftype = build_function_type_list (V8HI_type_node, V8HI_type_node,
23469                                     intHI_type_node,
23470                                     integer_type_node, NULL_TREE);
23471   def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_set_v8hi", ftype, IX86_BUILTIN_VEC_SET_V8HI);
23472 
23473   ftype = build_function_type_list (V4HI_type_node, V4HI_type_node,
23474                                     intHI_type_node,
23475                                     integer_type_node, NULL_TREE);
23476   def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_vec_set_v4hi", ftype, IX86_BUILTIN_VEC_SET_V4HI);
23477 
23478   ftype = build_function_type_list (V16QI_type_node, V16QI_type_node,
23479                                     intQI_type_node,
23480                                     integer_type_node, NULL_TREE);
23481   def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v16qi", ftype, IX86_BUILTIN_VEC_SET_V16QI);
23482 
23483   /* Add SSE5 multi-arg argument instructions */
23484   for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
23485     {
23486       tree mtype = NULL_TREE;
23487 
23488       if (d->name == 0)
23489         continue;
23490 
23491       switch ((enum multi_arg_type)d->flag)
23492         {
23493         case MULTI_ARG_3_SF:     mtype = v4sf_ftype_v4sf_v4sf_v4sf;     break;
23494         case MULTI_ARG_3_DF:     mtype = v2df_ftype_v2df_v2df_v2df;     break;
23495         case MULTI_ARG_3_DI:     mtype = v2di_ftype_v2di_v2di_v2di;     break;
23496         case MULTI_ARG_3_SI:     mtype = v4si_ftype_v4si_v4si_v4si;     break;
23497         case MULTI_ARG_3_SI_DI:  mtype = v4si_ftype_v4si_v4si_v2di;     break;
23498         case MULTI_ARG_3_HI:     mtype = v8hi_ftype_v8hi_v8hi_v8hi;     break;
23499         case MULTI_ARG_3_HI_SI:  mtype = v8hi_ftype_v8hi_v8hi_v4si;     break;
23500         case MULTI_ARG_3_QI:     mtype = v16qi_ftype_v16qi_v16qi_v16qi; break;
23501         case MULTI_ARG_3_PERMPS: mtype = v4sf_ftype_v4sf_v4sf_v16qi;    break;
23502         case MULTI_ARG_3_PERMPD: mtype = v2df_ftype_v2df_v2df_v16qi;    break;
23503         case MULTI_ARG_2_SF:     mtype = v4sf_ftype_v4sf_v4sf;          break;
23504         case MULTI_ARG_2_DF:     mtype = v2df_ftype_v2df_v2df;          break;
23505         case MULTI_ARG_2_DI:     mtype = v2di_ftype_v2di_v2di;          break;
23506         case MULTI_ARG_2_SI:     mtype = v4si_ftype_v4si_v4si;          break;
23507         case MULTI_ARG_2_HI:     mtype = v8hi_ftype_v8hi_v8hi;          break;
23508         case MULTI_ARG_2_QI:     mtype = v16qi_ftype_v16qi_v16qi;       break;
23509         case MULTI_ARG_2_DI_IMM: mtype = v2di_ftype_v2di_si;            break;
23510         case MULTI_ARG_2_SI_IMM: mtype = v4si_ftype_v4si_si;            break;
23511         case MULTI_ARG_2_HI_IMM: mtype = v8hi_ftype_v8hi_si;            break;
23512         case MULTI_ARG_2_QI_IMM: mtype = v16qi_ftype_v16qi_si;          break;
23513         case MULTI_ARG_2_SF_CMP: mtype = v4sf_ftype_v4sf_v4sf;          break;
23514         case MULTI_ARG_2_DF_CMP: mtype = v2df_ftype_v2df_v2df;          break;
23515         case MULTI_ARG_2_DI_CMP: mtype = v2di_ftype_v2di_v2di;          break;
23516         case MULTI_ARG_2_SI_CMP: mtype = v4si_ftype_v4si_v4si;          break;
23517         case MULTI_ARG_2_HI_CMP: mtype = v8hi_ftype_v8hi_v8hi;          break;
23518         case MULTI_ARG_2_QI_CMP: mtype = v16qi_ftype_v16qi_v16qi;       break;
23519         case MULTI_ARG_2_SF_TF:  mtype = v4sf_ftype_v4sf_v4sf;          break;
23520         case MULTI_ARG_2_DF_TF:  mtype = v2df_ftype_v2df_v2df;          break;
23521         case MULTI_ARG_2_DI_TF:  mtype = v2di_ftype_v2di_v2di;          break;
23522         case MULTI_ARG_2_SI_TF:  mtype = v4si_ftype_v4si_v4si;          break;
23523         case MULTI_ARG_2_HI_TF:  mtype = v8hi_ftype_v8hi_v8hi;          break;
23524         case MULTI_ARG_2_QI_TF:  mtype = v16qi_ftype_v16qi_v16qi;       break;
23525         case MULTI_ARG_1_SF:     mtype = v4sf_ftype_v4sf;               break;
23526         case MULTI_ARG_1_DF:     mtype = v2df_ftype_v2df;               break;
23527         case MULTI_ARG_1_DI:     mtype = v2di_ftype_v2di;               break;
23528         case MULTI_ARG_1_SI:     mtype = v4si_ftype_v4si;               break;
23529         case MULTI_ARG_1_HI:     mtype = v8hi_ftype_v8hi;               break;
23530         case MULTI_ARG_1_QI:     mtype = v16qi_ftype_v16qi;             break;
23531         case MULTI_ARG_1_SI_DI:  mtype = v2di_ftype_v4si;               break;
23532         case MULTI_ARG_1_HI_DI:  mtype = v2di_ftype_v8hi;               break;
23533         case MULTI_ARG_1_HI_SI:  mtype = v4si_ftype_v8hi;               break;
23534         case MULTI_ARG_1_QI_DI:  mtype = v2di_ftype_v16qi;              break;
23535         case MULTI_ARG_1_QI_SI:  mtype = v4si_ftype_v16qi;              break;
23536         case MULTI_ARG_1_QI_HI:  mtype = v8hi_ftype_v16qi;              break;
23537         case MULTI_ARG_1_PH2PS:  mtype = v4sf_ftype_v4hi;               break;
23538         case MULTI_ARG_1_PS2PH:  mtype = v4hi_ftype_v4sf;               break;
23539         case MULTI_ARG_UNKNOWN:
23540         default:
23541           gcc_unreachable ();
23542         }
23543 
23544       if (mtype)
23545         def_builtin_const (d->mask, d->name, mtype, d->code);
23546     }
23547 }
23548 
23549 /* Internal method for ix86_init_builtins.  */
23550 
23551 static void
23552 ix86_init_builtins_va_builtins_abi (void)
23553 {
23554   tree ms_va_ref, sysv_va_ref;
23555   tree fnvoid_va_end_ms, fnvoid_va_end_sysv;
23556   tree fnvoid_va_start_ms, fnvoid_va_start_sysv;
23557   tree fnvoid_va_copy_ms, fnvoid_va_copy_sysv;
23558   tree fnattr_ms = NULL_TREE, fnattr_sysv = NULL_TREE;
23559 
23560   if (!TARGET_64BIT)
23561     return;
23562   fnattr_ms = build_tree_list (get_identifier ("ms_abi"), NULL_TREE);
23563   fnattr_sysv = build_tree_list (get_identifier ("sysv_abi"), NULL_TREE);
23564   ms_va_ref = build_reference_type (ms_va_list_type_node);
23565   sysv_va_ref =
23566     build_pointer_type (TREE_TYPE (sysv_va_list_type_node));
23567 
23568   fnvoid_va_end_ms =
23569     build_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
23570   fnvoid_va_start_ms =
23571     build_varargs_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
23572   fnvoid_va_end_sysv =
23573     build_function_type_list (void_type_node, sysv_va_ref, NULL_TREE);
23574   fnvoid_va_start_sysv =
23575     build_varargs_function_type_list (void_type_node, sysv_va_ref,
23576                                        NULL_TREE);
23577   fnvoid_va_copy_ms =
23578     build_function_type_list (void_type_node, ms_va_ref, ms_va_list_type_node,
23579                               NULL_TREE);
23580   fnvoid_va_copy_sysv =
23581     build_function_type_list (void_type_node, sysv_va_ref,
23582                               sysv_va_ref, NULL_TREE);
23583 
23584   add_builtin_function ("__builtin_ms_va_start", fnvoid_va_start_ms,
23585                         BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_ms);
23586   add_builtin_function ("__builtin_ms_va_end", fnvoid_va_end_ms,
23587                         BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_ms);
23588   add_builtin_function ("__builtin_ms_va_copy", fnvoid_va_copy_ms,
23589                         BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_ms);
23590   add_builtin_function ("__builtin_sysv_va_start", fnvoid_va_start_sysv,
23591                         BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_sysv);
23592   add_builtin_function ("__builtin_sysv_va_end", fnvoid_va_end_sysv,
23593                         BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_sysv);
23594   add_builtin_function ("__builtin_sysv_va_copy", fnvoid_va_copy_sysv,
23595                         BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_sysv);
23596 }
23597 
23598 static void
23599 ix86_init_builtins (void)
23600 {
23601   tree float128_type_node = make_node (REAL_TYPE);
23602   tree ftype, decl;
23603 
23604   /* The __float80 type.  */
23605   if (TYPE_MODE (long_double_type_node) == XFmode)
23606     (*lang_hooks.types.register_builtin_type) (long_double_type_node,
23607                                                "__float80");
23608   else
23609     {
23610       /* The __float80 type.  */
23611       tree float80_type_node = make_node (REAL_TYPE);
23612 
23613       TYPE_PRECISION (float80_type_node) = 80;
23614       layout_type (float80_type_node);
23615       (*lang_hooks.types.register_builtin_type) (float80_type_node,
23616                                                  "__float80");
23617     }
23618 
23619   /* The __float128 type.  */
23620   TYPE_PRECISION (float128_type_node) = 128;
23621   layout_type (float128_type_node);
23622   (*lang_hooks.types.register_builtin_type) (float128_type_node,
23623                                              "__float128");
23624 
23625   /* TFmode support builtins.  */
23626   ftype = build_function_type (float128_type_node, void_list_node);
23627   decl = add_builtin_function ("__builtin_infq", ftype,
23628                                IX86_BUILTIN_INFQ, BUILT_IN_MD,
23629                                NULL, NULL_TREE);
23630   ix86_builtins[(int) IX86_BUILTIN_INFQ] = decl;
23631 
23632   /* We will expand them to normal call if SSE2 isn't available since
23633      they are used by libgcc. */
23634   ftype = build_function_type_list (float128_type_node,
23635                                     float128_type_node,
23636                                     NULL_TREE);
23637   decl = add_builtin_function ("__builtin_fabsq", ftype,
23638                                IX86_BUILTIN_FABSQ, BUILT_IN_MD,
23639                                "__fabstf2", NULL_TREE);
23640   ix86_builtins[(int) IX86_BUILTIN_FABSQ] = decl;
23641   TREE_READONLY (decl) = 1;
23642 
23643   ftype = build_function_type_list (float128_type_node,
23644                                     float128_type_node,
23645                                     float128_type_node,
23646                                     NULL_TREE);
23647   decl = add_builtin_function ("__builtin_copysignq", ftype,
23648                                IX86_BUILTIN_COPYSIGNQ, BUILT_IN_MD,
23649                                "__copysigntf3", NULL_TREE);
23650   ix86_builtins[(int) IX86_BUILTIN_COPYSIGNQ] = decl;
23651   TREE_READONLY (decl) = 1;
23652 
23653   ix86_init_mmx_sse_builtins ();
23654   if (TARGET_64BIT)
23655     ix86_init_builtins_va_builtins_abi ();
23656 }
23657 
23658 /* Errors in the source file can cause expand_expr to return const0_rtx
23659    where we expect a vector.  To avoid crashing, use one of the vector
23660    clear instructions.  */
23661 static rtx
23662 safe_vector_operand (rtx x, enum machine_mode mode)
23663 {
23664   if (x == const0_rtx)
23665     x = CONST0_RTX (mode);
23666   return x;
23667 }
23668 
23669 /* Subroutine of ix86_expand_builtin to take care of binop insns.  */
23670 
23671 static rtx
23672 ix86_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
23673 {
23674   rtx pat;
23675   tree arg0 = CALL_EXPR_ARG (exp, 0);
23676   tree arg1 = CALL_EXPR_ARG (exp, 1);
23677   rtx op0 = expand_normal (arg0);
23678   rtx op1 = expand_normal (arg1);
23679   enum machine_mode tmode = insn_data[icode].operand[0].mode;
23680   enum machine_mode mode0 = insn_data[icode].operand[1].mode;
23681   enum machine_mode mode1 = insn_data[icode].operand[2].mode;
23682 
23683   if (VECTOR_MODE_P (mode0))
23684     op0 = safe_vector_operand (op0, mode0);
23685   if (VECTOR_MODE_P (mode1))
23686     op1 = safe_vector_operand (op1, mode1);
23687 
23688   if (optimize || !target
23689       || GET_MODE (target) != tmode
23690       || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
23691     target = gen_reg_rtx (tmode);
23692 
23693   if (GET_MODE (op1) == SImode && mode1 == TImode)
23694     {
23695       rtx x = gen_reg_rtx (V4SImode);
23696       emit_insn (gen_sse2_loadd (x, op1));
23697       op1 = gen_lowpart (TImode, x);
23698     }
23699 
23700   if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
23701     op0 = copy_to_mode_reg (mode0, op0);
23702   if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
23703     op1 = copy_to_mode_reg (mode1, op1);
23704 
23705   pat = GEN_FCN (icode) (target, op0, op1);
23706   if (! pat)
23707     return 0;
23708 
23709   emit_insn (pat);
23710 
23711   return target;
23712 }
23713 
23714 /* Subroutine of ix86_expand_builtin to take care of 2-4 argument insns.  */
23715 
23716 static rtx
23717 ix86_expand_multi_arg_builtin (enum insn_code icode, tree exp, rtx target,
23718                                enum multi_arg_type m_type,
23719                                enum insn_code sub_code)
23720 {
23721   rtx pat;
23722   int i;
23723   int nargs;
23724   bool comparison_p = false;
23725   bool tf_p = false;
23726   bool last_arg_constant = false;
23727   int num_memory = 0;
23728   struct {
23729     rtx op;
23730     enum machine_mode mode;
23731   } args[4];
23732 
23733   enum machine_mode tmode = insn_data[icode].operand[0].mode;
23734 
23735   switch (m_type)
23736     {
23737     case MULTI_ARG_3_SF:
23738     case MULTI_ARG_3_DF:
23739     case MULTI_ARG_3_DI:
23740     case MULTI_ARG_3_SI:
23741     case MULTI_ARG_3_SI_DI:
23742     case MULTI_ARG_3_HI:
23743     case MULTI_ARG_3_HI_SI:
23744     case MULTI_ARG_3_QI:
23745     case MULTI_ARG_3_PERMPS:
23746     case MULTI_ARG_3_PERMPD:
23747       nargs = 3;
23748       break;
23749 
23750     case MULTI_ARG_2_SF:
23751     case MULTI_ARG_2_DF:
23752     case MULTI_ARG_2_DI:
23753     case MULTI_ARG_2_SI:
23754     case MULTI_ARG_2_HI:
23755     case MULTI_ARG_2_QI:
23756       nargs = 2;
23757       break;
23758 
23759     case MULTI_ARG_2_DI_IMM:
23760     case MULTI_ARG_2_SI_IMM:
23761     case MULTI_ARG_2_HI_IMM:
23762     case MULTI_ARG_2_QI_IMM:
23763       nargs = 2;
23764       last_arg_constant = true;
23765       break;
23766 
23767     case MULTI_ARG_1_SF:
23768     case MULTI_ARG_1_DF:
23769     case MULTI_ARG_1_DI:
23770     case MULTI_ARG_1_SI:
23771     case MULTI_ARG_1_HI:
23772     case MULTI_ARG_1_QI:
23773     case MULTI_ARG_1_SI_DI:
23774     case MULTI_ARG_1_HI_DI:
23775     case MULTI_ARG_1_HI_SI:
23776     case MULTI_ARG_1_QI_DI:
23777     case MULTI_ARG_1_QI_SI:
23778     case MULTI_ARG_1_QI_HI:
23779     case MULTI_ARG_1_PH2PS:
23780     case MULTI_ARG_1_PS2PH:
23781       nargs = 1;
23782       break;
23783 
23784     case MULTI_ARG_2_SF_CMP:
23785     case MULTI_ARG_2_DF_CMP:
23786     case MULTI_ARG_2_DI_CMP:
23787     case MULTI_ARG_2_SI_CMP:
23788     case MULTI_ARG_2_HI_CMP:
23789     case MULTI_ARG_2_QI_CMP:
23790       nargs = 2;
23791       comparison_p = true;
23792       break;
23793 
23794     case MULTI_ARG_2_SF_TF:
23795     case MULTI_ARG_2_DF_TF:
23796     case MULTI_ARG_2_DI_TF:
23797     case MULTI_ARG_2_SI_TF:
23798     case MULTI_ARG_2_HI_TF:
23799     case MULTI_ARG_2_QI_TF:
23800       nargs = 2;
23801       tf_p = true;
23802       break;
23803 
23804     case MULTI_ARG_UNKNOWN:
23805     default:
23806       gcc_unreachable ();
23807     }
23808 
23809   if (optimize || !target
23810       || GET_MODE (target) != tmode
23811       || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
23812     target = gen_reg_rtx (tmode);
23813 
23814   gcc_assert (nargs <= 4);
23815 
23816   for (i = 0; i < nargs; i++)
23817     {
23818       tree arg = CALL_EXPR_ARG (exp, i);
23819       rtx op = expand_normal (arg);
23820       int adjust = (comparison_p) ? 1 : 0;
23821       enum machine_mode mode = insn_data[icode].operand[i+adjust+1].mode;
23822 
23823       if (last_arg_constant && i == nargs-1)
23824         {
23825           if (GET_CODE (op) != CONST_INT)
23826             {
23827               error ("last argument must be an immediate");
23828               return gen_reg_rtx (tmode);
23829             }
23830         }
23831       else
23832         {
23833           if (VECTOR_MODE_P (mode))
23834             op = safe_vector_operand (op, mode);
23835 
23836           /* If we aren't optimizing, only allow one memory operand to be
23837              generated.  */
23838           if (memory_operand (op, mode))
23839             num_memory++;
23840 
23841           gcc_assert (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode);
23842 
23843           if (optimize
23844               || ! (*insn_data[icode].operand[i+adjust+1].predicate) (op, mode)
23845               || num_memory > 1)
23846             op = force_reg (mode, op);
23847         }
23848 
23849       args[i].op = op;
23850       args[i].mode = mode;
23851     }
23852 
23853   switch (nargs)
23854     {
23855     case 1:
23856       pat = GEN_FCN (icode) (target, args[0].op);
23857       break;
23858 
23859     case 2:
23860       if (tf_p)
23861         pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
23862                                GEN_INT ((int)sub_code));
23863       else if (! comparison_p)
23864         pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
23865       else
23866         {
23867           rtx cmp_op = gen_rtx_fmt_ee (sub_code, GET_MODE (target),
23868                                        args[0].op,
23869                                        args[1].op);
23870 
23871           pat = GEN_FCN (icode) (target, cmp_op, args[0].op, args[1].op);
23872         }
23873       break;
23874 
23875     case 3:
23876       pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
23877       break;
23878 
23879     default:
23880       gcc_unreachable ();
23881     }
23882 
23883   if (! pat)
23884     return 0;
23885 
23886   emit_insn (pat);
23887   return target;
23888 }
23889 
23890 /* Subroutine of ix86_expand_args_builtin to take care of scalar unop
23891    insns with vec_merge.  */
23892 
23893 static rtx
23894 ix86_expand_unop_vec_merge_builtin (enum insn_code icode, tree exp,
23895                                     rtx target)
23896 {
23897   rtx pat;
23898   tree arg0 = CALL_EXPR_ARG (exp, 0);
23899   rtx op1, op0 = expand_normal (arg0);
23900   enum machine_mode tmode = insn_data[icode].operand[0].mode;
23901   enum machine_mode mode0 = insn_data[icode].operand[1].mode;
23902 
23903   if (optimize || !target
23904       || GET_MODE (target) != tmode
23905       || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
23906     target = gen_reg_rtx (tmode);
23907 
23908   if (VECTOR_MODE_P (mode0))
23909     op0 = safe_vector_operand (op0, mode0);
23910 
23911   if ((optimize && !register_operand (op0, mode0))
23912       || ! (*insn_data[icode].operand[1].predicate) (op0, mode0))
23913     op0 = copy_to_mode_reg (mode0, op0);
23914 
23915   op1 = op0;
23916   if (! (*insn_data[icode].operand[2].predicate) (op1, mode0))
23917     op1 = copy_to_mode_reg (mode0, op1);
23918 
23919   pat = GEN_FCN (icode) (target, op0, op1);
23920   if (! pat)
23921     return 0;
23922   emit_insn (pat);
23923   return target;
23924 }
23925 
23926 /* Subroutine of ix86_expand_builtin to take care of comparison insns.  */
23927 
23928 static rtx
23929 ix86_expand_sse_compare (const struct builtin_description *d,
23930                          tree exp, rtx target, bool swap)
23931 {
23932   rtx pat;
23933   tree arg0 = CALL_EXPR_ARG (exp, 0);
23934   tree arg1 = CALL_EXPR_ARG (exp, 1);
23935   rtx op0 = expand_normal (arg0);
23936   rtx op1 = expand_normal (arg1);
23937   rtx op2;
23938   enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
23939   enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
23940   enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
23941   enum rtx_code comparison = d->comparison;
23942 
23943   if (VECTOR_MODE_P (mode0))
23944     op0 = safe_vector_operand (op0, mode0);
23945   if (VECTOR_MODE_P (mode1))
23946     op1 = safe_vector_operand (op1, mode1);
23947 
23948   /* Swap operands if we have a comparison that isn't available in
23949      hardware.  */
23950   if (swap)
23951     {
23952       rtx tmp = gen_reg_rtx (mode1);
23953       emit_move_insn (tmp, op1);
23954       op1 = op0;
23955       op0 = tmp;
23956     }
23957 
23958   if (optimize || !target
23959       || GET_MODE (target) != tmode
23960       || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
23961     target = gen_reg_rtx (tmode);
23962 
23963   if ((optimize && !register_operand (op0, mode0))
23964       || ! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
23965     op0 = copy_to_mode_reg (mode0, op0);
23966   if ((optimize && !register_operand (op1, mode1))
23967       || ! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
23968     op1 = copy_to_mode_reg (mode1, op1);
23969 
23970   op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
23971   pat = GEN_FCN (d->icode) (target, op0, op1, op2);
23972   if (! pat)
23973     return 0;
23974   emit_insn (pat);
23975   return target;
23976 }
23977 
23978 /* Subroutine of ix86_expand_builtin to take care of comi insns.  */
23979 
23980 static rtx
23981 ix86_expand_sse_comi (const struct builtin_description *d, tree exp,
23982                       rtx target)
23983 {
23984   rtx pat;
23985   tree arg0 = CALL_EXPR_ARG (exp, 0);
23986   tree arg1 = CALL_EXPR_ARG (exp, 1);
23987   rtx op0 = expand_normal (arg0);
23988   rtx op1 = expand_normal (arg1);
23989   enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
23990   enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
23991   enum rtx_code comparison = d->comparison;
23992 
23993   if (VECTOR_MODE_P (mode0))
23994     op0 = safe_vector_operand (op0, mode0);
23995   if (VECTOR_MODE_P (mode1))
23996     op1 = safe_vector_operand (op1, mode1);
23997 
23998   /* Swap operands if we have a comparison that isn't available in
23999      hardware.  */
24000   if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
24001     {
24002       rtx tmp = op1;
24003       op1 = op0;
24004       op0 = tmp;
24005     }
24006 
24007   target = gen_reg_rtx (SImode);
24008   emit_move_insn (target, const0_rtx);
24009   target = gen_rtx_SUBREG (QImode, target, 0);
24010 
24011   if ((optimize && !register_operand (op0, mode0))
24012       || !(*insn_data[d->icode].operand[0].predicate) (op0, mode0))
24013     op0 = copy_to_mode_reg (mode0, op0);
24014   if ((optimize && !register_operand (op1, mode1))
24015       || !(*insn_data[d->icode].operand[1].predicate) (op1, mode1))
24016     op1 = copy_to_mode_reg (mode1, op1);
24017 
24018   pat = GEN_FCN (d->icode) (op0, op1);
24019   if (! pat)
24020     return 0;
24021   emit_insn (pat);
24022   emit_insn (gen_rtx_SET (VOIDmode,
24023                           gen_rtx_STRICT_LOW_PART (VOIDmode, target),
24024                           gen_rtx_fmt_ee (comparison, QImode,
24025                                           SET_DEST (pat),
24026                                           const0_rtx)));
24027 
24028   return SUBREG_REG (target);
24029 }
24030 
24031 /* Subroutine of ix86_expand_builtin to take care of ptest insns.  */
24032 
24033 static rtx
24034 ix86_expand_sse_ptest (const struct builtin_description *d, tree exp,
24035                        rtx target)
24036 {
24037   rtx pat;
24038   tree arg0 = CALL_EXPR_ARG (exp, 0);
24039   tree arg1 = CALL_EXPR_ARG (exp, 1);
24040   rtx op0 = expand_normal (arg0);
24041   rtx op1 = expand_normal (arg1);
24042   enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
24043   enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
24044   enum rtx_code comparison = d->comparison;
24045 
24046   if (VECTOR_MODE_P (mode0))
24047     op0 = safe_vector_operand (op0, mode0);
24048   if (VECTOR_MODE_P (mode1))
24049     op1 = safe_vector_operand (op1, mode1);
24050 
24051   target = gen_reg_rtx (SImode);
24052   emit_move_insn (target, const0_rtx);
24053   target = gen_rtx_SUBREG (QImode, target, 0);
24054 
24055   if ((optimize && !register_operand (op0, mode0))
24056       || !(*insn_data[d->icode].operand[0].predicate) (op0, mode0))
24057     op0 = copy_to_mode_reg (mode0, op0);
24058   if ((optimize && !register_operand (op1, mode1))
24059       || !(*insn_data[d->icode].operand[1].predicate) (op1, mode1))
24060     op1 = copy_to_mode_reg (mode1, op1);
24061 
24062   pat = GEN_FCN (d->icode) (op0, op1);
24063   if (! pat)
24064     return 0;
24065   emit_insn (pat);
24066   emit_insn (gen_rtx_SET (VOIDmode,
24067                           gen_rtx_STRICT_LOW_PART (VOIDmode, target),
24068                           gen_rtx_fmt_ee (comparison, QImode,
24069                                           SET_DEST (pat),
24070                                           const0_rtx)));
24071 
24072   return SUBREG_REG (target);
24073 }
24074 
24075 /* Subroutine of ix86_expand_builtin to take care of pcmpestr[im] insns.  */
24076 
24077 static rtx
24078 ix86_expand_sse_pcmpestr (const struct builtin_description *d,
24079                           tree exp, rtx target)
24080 {
24081   rtx pat;
24082   tree arg0 = CALL_EXPR_ARG (exp, 0);
24083   tree arg1 = CALL_EXPR_ARG (exp, 1);
24084   tree arg2 = CALL_EXPR_ARG (exp, 2);
24085   tree arg3 = CALL_EXPR_ARG (exp, 3);
24086   tree arg4 = CALL_EXPR_ARG (exp, 4);
24087   rtx scratch0, scratch1;
24088   rtx op0 = expand_normal (arg0);
24089   rtx op1 = expand_normal (arg1);
24090   rtx op2 = expand_normal (arg2);
24091   rtx op3 = expand_normal (arg3);
24092   rtx op4 = expand_normal (arg4);
24093   enum machine_mode tmode0, tmode1, modev2, modei3, modev4, modei5, modeimm;
24094 
24095   tmode0 = insn_data[d->icode].operand[0].mode;
24096   tmode1 = insn_data[d->icode].operand[1].mode;
24097   modev2 = insn_data[d->icode].operand[2].mode;
24098   modei3 = insn_data[d->icode].operand[3].mode;
24099   modev4 = insn_data[d->icode].operand[4].mode;
24100   modei5 = insn_data[d->icode].operand[5].mode;
24101   modeimm = insn_data[d->icode].operand[6].mode;
24102 
24103   if (VECTOR_MODE_P (modev2))
24104     op0 = safe_vector_operand (op0, modev2);
24105   if (VECTOR_MODE_P (modev4))
24106     op2 = safe_vector_operand (op2, modev4);
24107 
24108   if (! (*insn_data[d->icode].operand[2].predicate) (op0, modev2))
24109     op0 = copy_to_mode_reg (modev2, op0);
24110   if (! (*insn_data[d->icode].operand[3].predicate) (op1, modei3))
24111     op1 = copy_to_mode_reg (modei3, op1);
24112   if ((optimize && !register_operand (op2, modev4))
24113       || !(*insn_data[d->icode].operand[4].predicate) (op2, modev4))
24114     op2 = copy_to_mode_reg (modev4, op2);
24115   if (! (*insn_data[d->icode].operand[5].predicate) (op3, modei5))
24116     op3 = copy_to_mode_reg (modei5, op3);
24117 
24118   if (! (*insn_data[d->icode].operand[6].predicate) (op4, modeimm))
24119     {
24120       error ("the fifth argument must be a 8-bit immediate");
24121       return const0_rtx;
24122     }
24123 
24124   if (d->code == IX86_BUILTIN_PCMPESTRI128)
24125     {
24126       if (optimize || !target
24127           || GET_MODE (target) != tmode0
24128           || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode0))
24129         target = gen_reg_rtx (tmode0);
24130 
24131       scratch1 = gen_reg_rtx (tmode1);
24132 
24133       pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2, op3, op4);
24134     }
24135   else if (d->code == IX86_BUILTIN_PCMPESTRM128)
24136     {
24137       if (optimize || !target
24138           || GET_MODE (target) != tmode1
24139           || ! (*insn_data[d->icode].operand[1].predicate) (target, tmode1))
24140         target = gen_reg_rtx (tmode1);
24141 
24142       scratch0 = gen_reg_rtx (tmode0);
24143 
24144       pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2, op3, op4);
24145     }
24146   else
24147     {
24148       gcc_assert (d->flag);
24149 
24150       scratch0 = gen_reg_rtx (tmode0);
24151       scratch1 = gen_reg_rtx (tmode1);
24152 
24153       pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2, op3, op4);
24154     }
24155 
24156   if (! pat)
24157     return 0;
24158 
24159   emit_insn (pat);
24160 
24161   if (d->flag)
24162     {
24163       target = gen_reg_rtx (SImode);
24164       emit_move_insn (target, const0_rtx);
24165       target = gen_rtx_SUBREG (QImode, target, 0);
24166 
24167       emit_insn
24168         (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
24169                       gen_rtx_fmt_ee (EQ, QImode,
24170                                       gen_rtx_REG ((enum machine_mode) d->flag,
24171                                                    FLAGS_REG),
24172                                       const0_rtx)));
24173       return SUBREG_REG (target);
24174     }
24175   else
24176     return target;
24177 }
24178 
24179 
24180 /* Subroutine of ix86_expand_builtin to take care of pcmpistr[im] insns.  */
24181 
24182 static rtx
24183 ix86_expand_sse_pcmpistr (const struct builtin_description *d,
24184                           tree exp, rtx target)
24185 {
24186   rtx pat;
24187   tree arg0 = CALL_EXPR_ARG (exp, 0);
24188   tree arg1 = CALL_EXPR_ARG (exp, 1);
24189   tree arg2 = CALL_EXPR_ARG (exp, 2);
24190   rtx scratch0, scratch1;
24191   rtx op0 = expand_normal (arg0);
24192   rtx op1 = expand_normal (arg1);
24193   rtx op2 = expand_normal (arg2);
24194   enum machine_mode tmode0, tmode1, modev2, modev3, modeimm;
24195 
24196   tmode0 = insn_data[d->icode].operand[0].mode;
24197   tmode1 = insn_data[d->icode].operand[1].mode;
24198   modev2 = insn_data[d->icode].operand[2].mode;
24199   modev3 = insn_data[d->icode].operand[3].mode;
24200   modeimm = insn_data[d->icode].operand[4].mode;
24201 
24202   if (VECTOR_MODE_P (modev2))
24203     op0 = safe_vector_operand (op0, modev2);
24204   if (VECTOR_MODE_P (modev3))
24205     op1 = safe_vector_operand (op1, modev3);
24206 
24207   if (! (*insn_data[d->icode].operand[2].predicate) (op0, modev2))
24208     op0 = copy_to_mode_reg (modev2, op0);
24209   if ((optimize && !register_operand (op1, modev3))
24210       || !(*insn_data[d->icode].operand[3].predicate) (op1, modev3))
24211     op1 = copy_to_mode_reg (modev3, op1);
24212 
24213   if (! (*insn_data[d->icode].operand[4].predicate) (op2, modeimm))
24214     {
24215       error ("the third argument must be a 8-bit immediate");
24216       return const0_rtx;
24217     }
24218 
24219   if (d->code == IX86_BUILTIN_PCMPISTRI128)
24220     {
24221       if (optimize || !target
24222           || GET_MODE (target) != tmode0
24223           || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode0))
24224         target = gen_reg_rtx (tmode0);
24225 
24226       scratch1 = gen_reg_rtx (tmode1);
24227 
24228       pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2);
24229     }
24230   else if (d->code == IX86_BUILTIN_PCMPISTRM128)
24231     {
24232       if (optimize || !target
24233           || GET_MODE (target) != tmode1
24234           || ! (*insn_data[d->icode].operand[1].predicate) (target, tmode1))
24235         target = gen_reg_rtx (tmode1);
24236 
24237       scratch0 = gen_reg_rtx (tmode0);
24238 
24239       pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2);
24240     }
24241   else
24242     {
24243       gcc_assert (d->flag);
24244 
24245       scratch0 = gen_reg_rtx (tmode0);
24246       scratch1 = gen_reg_rtx (tmode1);
24247 
24248       pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2);
24249     }
24250 
24251   if (! pat)
24252     return 0;
24253 
24254   emit_insn (pat);
24255 
24256   if (d->flag)
24257     {
24258       target = gen_reg_rtx (SImode);
24259       emit_move_insn (target, const0_rtx);
24260       target = gen_rtx_SUBREG (QImode, target, 0);
24261 
24262       emit_insn
24263         (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
24264                       gen_rtx_fmt_ee (EQ, QImode,
24265                                       gen_rtx_REG ((enum machine_mode) d->flag,
24266                                                    FLAGS_REG),
24267                                       const0_rtx)));
24268       return SUBREG_REG (target);
24269     }
24270   else
24271     return target;
24272 }
24273 
24274 /* Subroutine of ix86_expand_builtin to take care of insns with
24275    variable number of operands.  */
24276 
24277 static rtx
24278 ix86_expand_args_builtin (const struct builtin_description *d,
24279                           tree exp, rtx target)
24280 {
24281   rtx pat, real_target;
24282   unsigned int i, nargs;
24283   unsigned int nargs_constant = 0;
24284   int num_memory = 0;
24285   struct
24286     {
24287       rtx op;
24288       enum machine_mode mode;
24289     } args[4];
24290   bool last_arg_count = false;
24291   enum insn_code icode = d->icode;
24292   const struct insn_data *insn_p = &insn_data[icode];
24293   enum machine_mode tmode = insn_p->operand[0].mode;
24294   enum machine_mode rmode = VOIDmode;
24295   bool swap = false;
24296   enum rtx_code comparison = d->comparison;
24297 
24298   switch ((enum ix86_builtin_type) d->flag)
24299     {
24300     case INT_FTYPE_V8SF_V8SF_PTEST:
24301     case INT_FTYPE_V4DI_V4DI_PTEST:
24302     case INT_FTYPE_V4DF_V4DF_PTEST:
24303     case INT_FTYPE_V4SF_V4SF_PTEST:
24304     case INT_FTYPE_V2DI_V2DI_PTEST:
24305     case INT_FTYPE_V2DF_V2DF_PTEST:
24306       return ix86_expand_sse_ptest (d, exp, target);
24307     case FLOAT128_FTYPE_FLOAT128:
24308     case FLOAT_FTYPE_FLOAT:
24309     case INT64_FTYPE_V4SF:
24310     case INT64_FTYPE_V2DF:
24311     case INT_FTYPE_V16QI:
24312     case INT_FTYPE_V8QI:
24313     case INT_FTYPE_V8SF:
24314     case INT_FTYPE_V4DF:
24315     case INT_FTYPE_V4SF:
24316     case INT_FTYPE_V2DF:
24317     case V16QI_FTYPE_V16QI:
24318     case V8SI_FTYPE_V8SF:
24319     case V8SI_FTYPE_V4SI:
24320     case V8HI_FTYPE_V8HI:
24321     case V8HI_FTYPE_V16QI:
24322     case V8QI_FTYPE_V8QI:
24323     case V8SF_FTYPE_V8SF:
24324     case V8SF_FTYPE_V8SI:
24325     case V8SF_FTYPE_V4SF:
24326     case V4SI_FTYPE_V4SI:
24327     case V4SI_FTYPE_V16QI:
24328     case V4SI_FTYPE_V4SF:
24329     case V4SI_FTYPE_V8SI:
24330     case V4SI_FTYPE_V8HI:
24331     case V4SI_FTYPE_V4DF:
24332     case V4SI_FTYPE_V2DF:
24333     case V4HI_FTYPE_V4HI:
24334     case V4DF_FTYPE_V4DF:
24335     case V4DF_FTYPE_V4SI:
24336     case V4DF_FTYPE_V4SF:
24337     case V4DF_FTYPE_V2DF:
24338     case V4SF_FTYPE_V4SF:
24339     case V4SF_FTYPE_V4SI:
24340     case V4SF_FTYPE_V8SF:
24341     case V4SF_FTYPE_V4DF:
24342     case V4SF_FTYPE_V2DF:
24343     case V2DI_FTYPE_V2DI:
24344     case V2DI_FTYPE_V16QI:
24345     case V2DI_FTYPE_V8HI:
24346     case V2DI_FTYPE_V4SI:
24347     case V2DF_FTYPE_V2DF:
24348     case V2DF_FTYPE_V4SI:
24349     case V2DF_FTYPE_V4DF:
24350     case V2DF_FTYPE_V4SF:
24351     case V2DF_FTYPE_V2SI:
24352     case V2SI_FTYPE_V2SI:
24353     case V2SI_FTYPE_V4SF:
24354     case V2SI_FTYPE_V2SF:
24355     case V2SI_FTYPE_V2DF:
24356     case V2SF_FTYPE_V2SF:
24357     case V2SF_FTYPE_V2SI:
24358       nargs = 1;
24359       break;
24360     case V4SF_FTYPE_V4SF_VEC_MERGE:
24361     case V2DF_FTYPE_V2DF_VEC_MERGE:
24362       return ix86_expand_unop_vec_merge_builtin (icode, exp, target);
24363     case FLOAT128_FTYPE_FLOAT128_FLOAT128:
24364     case V16QI_FTYPE_V16QI_V16QI:
24365     case V16QI_FTYPE_V8HI_V8HI:
24366     case V8QI_FTYPE_V8QI_V8QI:
24367     case V8QI_FTYPE_V4HI_V4HI:
24368     case V8HI_FTYPE_V8HI_V8HI:
24369     case V8HI_FTYPE_V16QI_V16QI:
24370     case V8HI_FTYPE_V4SI_V4SI:
24371     case V8SF_FTYPE_V8SF_V8SF:
24372     case V8SF_FTYPE_V8SF_V8SI:
24373     case V4SI_FTYPE_V4SI_V4SI:
24374     case V4SI_FTYPE_V8HI_V8HI:
24375     case V4SI_FTYPE_V4SF_V4SF:
24376     case V4SI_FTYPE_V2DF_V2DF:
24377     case V4HI_FTYPE_V4HI_V4HI:
24378     case V4HI_FTYPE_V8QI_V8QI:
24379     case V4HI_FTYPE_V2SI_V2SI:
24380     case V4DF_FTYPE_V4DF_V4DF:
24381     case V4DF_FTYPE_V4DF_V4DI:
24382     case V4SF_FTYPE_V4SF_V4SF:
24383     case V4SF_FTYPE_V4SF_V4SI:
24384     case V4SF_FTYPE_V4SF_V2SI:
24385     case V4SF_FTYPE_V4SF_V2DF:
24386     case V4SF_FTYPE_V4SF_DI:
24387     case V4SF_FTYPE_V4SF_SI:
24388     case V2DI_FTYPE_V2DI_V2DI:
24389     case V2DI_FTYPE_V16QI_V16QI:
24390     case V2DI_FTYPE_V4SI_V4SI:
24391     case V2DI_FTYPE_V2DI_V16QI:
24392     case V2DI_FTYPE_V2DF_V2DF:
24393     case V2SI_FTYPE_V2SI_V2SI:
24394     case V2SI_FTYPE_V4HI_V4HI:
24395     case V2SI_FTYPE_V2SF_V2SF:
24396     case V2DF_FTYPE_V2DF_V2DF:
24397     case V2DF_FTYPE_V2DF_V4SF:
24398     case V2DF_FTYPE_V2DF_V2DI:
24399     case V2DF_FTYPE_V2DF_DI:
24400     case V2DF_FTYPE_V2DF_SI:
24401     case V2SF_FTYPE_V2SF_V2SF:
24402     case V1DI_FTYPE_V1DI_V1DI:
24403     case V1DI_FTYPE_V8QI_V8QI:
24404     case V1DI_FTYPE_V2SI_V2SI:
24405       if (comparison == UNKNOWN)
24406         return ix86_expand_binop_builtin (icode, exp, target);
24407       nargs = 2;
24408       break;
24409     case V4SF_FTYPE_V4SF_V4SF_SWAP:
24410     case V2DF_FTYPE_V2DF_V2DF_SWAP:
24411       gcc_assert (comparison != UNKNOWN);
24412       nargs = 2;
24413       swap = true;
24414       break;
24415     case V8HI_FTYPE_V8HI_V8HI_COUNT:
24416     case V8HI_FTYPE_V8HI_SI_COUNT:
24417     case V4SI_FTYPE_V4SI_V4SI_COUNT:
24418     case V4SI_FTYPE_V4SI_SI_COUNT:
24419     case V4HI_FTYPE_V4HI_V4HI_COUNT:
24420     case V4HI_FTYPE_V4HI_SI_COUNT:
24421     case V2DI_FTYPE_V2DI_V2DI_COUNT:
24422     case V2DI_FTYPE_V2DI_SI_COUNT:
24423     case V2SI_FTYPE_V2SI_V2SI_COUNT:
24424     case V2SI_FTYPE_V2SI_SI_COUNT:
24425     case V1DI_FTYPE_V1DI_V1DI_COUNT:
24426     case V1DI_FTYPE_V1DI_SI_COUNT:
24427       nargs = 2;
24428       last_arg_count = true;
24429       break;
24430     case UINT64_FTYPE_UINT64_UINT64:
24431     case UINT_FTYPE_UINT_UINT:
24432     case UINT_FTYPE_UINT_USHORT:
24433     case UINT_FTYPE_UINT_UCHAR:
24434       nargs = 2;
24435       break;
24436     case V2DI2TI_FTYPE_V2DI_INT:
24437       nargs = 2;
24438       rmode = V2DImode;
24439       nargs_constant = 1;
24440       break;
24441     case V8HI_FTYPE_V8HI_INT:
24442     case V8SF_FTYPE_V8SF_INT:
24443     case V4SI_FTYPE_V4SI_INT:
24444     case V4SI_FTYPE_V8SI_INT:
24445     case V4HI_FTYPE_V4HI_INT:
24446     case V4DF_FTYPE_V4DF_INT:
24447     case V4SF_FTYPE_V4SF_INT:
24448     case V4SF_FTYPE_V8SF_INT:
24449     case V2DI_FTYPE_V2DI_INT:
24450     case V2DF_FTYPE_V2DF_INT:
24451     case V2DF_FTYPE_V4DF_INT:
24452       nargs = 2;
24453       nargs_constant = 1;
24454       break;
24455     case V16QI_FTYPE_V16QI_V16QI_V16QI:
24456     case V8SF_FTYPE_V8SF_V8SF_V8SF:
24457     case V4DF_FTYPE_V4DF_V4DF_V4DF:
24458     case V4SF_FTYPE_V4SF_V4SF_V4SF:
24459     case V2DF_FTYPE_V2DF_V2DF_V2DF:
24460       nargs = 3;
24461       break;
24462     case V16QI_FTYPE_V16QI_V16QI_INT:
24463     case V8HI_FTYPE_V8HI_V8HI_INT:
24464     case V8SI_FTYPE_V8SI_V8SI_INT:
24465     case V8SI_FTYPE_V8SI_V4SI_INT:
24466     case V8SF_FTYPE_V8SF_V8SF_INT: 
24467     case V8SF_FTYPE_V8SF_V4SF_INT: 
24468     case V4SI_FTYPE_V4SI_V4SI_INT:
24469     case V4DF_FTYPE_V4DF_V4DF_INT:
24470     case V4DF_FTYPE_V4DF_V2DF_INT:
24471     case V4SF_FTYPE_V4SF_V4SF_INT:
24472     case V2DI_FTYPE_V2DI_V2DI_INT:
24473     case V2DF_FTYPE_V2DF_V2DF_INT:
24474       nargs = 3;
24475       nargs_constant = 1;
24476       break;
24477     case V2DI2TI_FTYPE_V2DI_V2DI_INT:
24478       nargs = 3;
24479       rmode = V2DImode;
24480       nargs_constant = 1;
24481       break;
24482     case V1DI2DI_FTYPE_V1DI_V1DI_INT:
24483       nargs = 3;
24484       rmode = DImode;
24485       nargs_constant = 1;
24486       break;
24487     case V2DI_FTYPE_V2DI_UINT_UINT:
24488       nargs = 3;
24489       nargs_constant = 2;
24490       break;
24491     case V2DI_FTYPE_V2DI_V2DI_UINT_UINT:
24492       nargs = 4;
24493       nargs_constant = 2;
24494       break;
24495     default:
24496       gcc_unreachable ();
24497     }
24498 
24499   gcc_assert (nargs <= ARRAY_SIZE (args));
24500 
24501   if (comparison != UNKNOWN)
24502     {
24503       gcc_assert (nargs == 2);
24504       return ix86_expand_sse_compare (d, exp, target, swap);
24505     }
24506 
24507   if (rmode == VOIDmode || rmode == tmode)
24508     {
24509       if (optimize
24510           || target == 0
24511           || GET_MODE (target) != tmode
24512           || ! (*insn_p->operand[0].predicate) (target, tmode))
24513         target = gen_reg_rtx (tmode);
24514       real_target = target;
24515     }
24516   else
24517     {
24518       target = gen_reg_rtx (rmode);
24519       real_target = simplify_gen_subreg (tmode, target, rmode, 0);
24520     }
24521 
24522   for (i = 0; i < nargs; i++)
24523     {
24524       tree arg = CALL_EXPR_ARG (exp, i);
24525       rtx op = expand_normal (arg);
24526       enum machine_mode mode = insn_p->operand[i + 1].mode;
24527       bool match = (*insn_p->operand[i + 1].predicate) (op, mode);
24528 
24529       if (last_arg_count && (i + 1) == nargs)
24530         {
24531           /* SIMD shift insns take either an 8-bit immediate or
24532              register as count.  But builtin functions take int as
24533              count.  If count doesn't match, we put it in register.  */
24534           if (!match)
24535             {
24536               op = simplify_gen_subreg (SImode, op, GET_MODE (op), 0);
24537               if (!(*insn_p->operand[i + 1].predicate) (op, mode))
24538                 op = copy_to_reg (op);
24539             }
24540         }
24541       else if ((nargs - i) <= nargs_constant)
24542         {
24543           if (!match)
24544             switch (icode)
24545               {
24546               case CODE_FOR_sse4_1_roundpd:
24547               case CODE_FOR_sse4_1_roundps:
24548               case CODE_FOR_sse4_1_roundsd:
24549               case CODE_FOR_sse4_1_roundss:
24550               case CODE_FOR_sse4_1_blendps:
24551               case CODE_FOR_avx_blendpd256:
24552               case CODE_FOR_avx_vpermilv4df:
24553               case CODE_FOR_avx_roundpd256:
24554               case CODE_FOR_avx_roundps256:
24555                 error ("the last argument must be a 4-bit immediate");
24556                 return const0_rtx;
24557 
24558               case CODE_FOR_sse4_1_blendpd:
24559               case CODE_FOR_avx_vpermilv2df:
24560                 error ("the last argument must be a 2-bit immediate");
24561                 return const0_rtx;
24562 
24563               case CODE_FOR_avx_vextractf128v4df:
24564               case CODE_FOR_avx_vextractf128v8sf:
24565               case CODE_FOR_avx_vextractf128v8si:
24566               case CODE_FOR_avx_vinsertf128v4df:
24567               case CODE_FOR_avx_vinsertf128v8sf:
24568               case CODE_FOR_avx_vinsertf128v8si:
24569                 error ("the last argument must be a 1-bit immediate");
24570                 return const0_rtx;
24571 
24572               case CODE_FOR_avx_cmpsdv2df3:
24573               case CODE_FOR_avx_cmpssv4sf3:
24574               case CODE_FOR_avx_cmppdv2df3:
24575               case CODE_FOR_avx_cmppsv4sf3:
24576               case CODE_FOR_avx_cmppdv4df3:
24577               case CODE_FOR_avx_cmppsv8sf3:
24578                 error ("the last argument must be a 5-bit immediate");
24579                 return const0_rtx;
24580 
24581              default:
24582                 switch (nargs_constant)
24583                   {
24584                   case 2:
24585                     if ((nargs - i) == nargs_constant)
24586                       {
24587                         error ("the next to last argument must be an 8-bit immediate");
24588                         break;
24589                       }
24590                   case 1:
24591                     error ("the last argument must be an 8-bit immediate");
24592                     break;
24593                   default:
24594                     gcc_unreachable ();
24595                   }
24596                 return const0_rtx;
24597               }
24598         }
24599       else
24600         {
24601           if (VECTOR_MODE_P (mode))
24602             op = safe_vector_operand (op, mode);
24603 
24604           /* If we aren't optimizing, only allow one memory operand to
24605              be generated.  */
24606           if (memory_operand (op, mode))
24607             num_memory++;
24608 
24609           if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
24610             {
24611               if (optimize || !match || num_memory > 1)
24612                 op = copy_to_mode_reg (mode, op);
24613             }
24614           else
24615             {
24616               op = copy_to_reg (op);
24617               op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
24618             }
24619         }
24620 
24621       args[i].op = op;
24622       args[i].mode = mode;
24623     }
24624 
24625   switch (nargs)
24626     {
24627     case 1:
24628       pat = GEN_FCN (icode) (real_target, args[0].op);
24629       break;
24630     case 2:
24631       pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op);
24632       break;
24633     case 3:
24634       pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
24635                              args[2].op);
24636       break;
24637     case 4:
24638       pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
24639                              args[2].op, args[3].op);
24640       break;
24641     default:
24642       gcc_unreachable ();
24643     }
24644 
24645   if (! pat)
24646     return 0;
24647 
24648   emit_insn (pat);
24649   return target;
24650 }
24651 
24652 /* Subroutine of ix86_expand_builtin to take care of special insns
24653    with variable number of operands.  */
24654 
24655 static rtx
24656 ix86_expand_special_args_builtin (const struct builtin_description *d,
24657                                     tree exp, rtx target)
24658 {
24659   tree arg;
24660   rtx pat, op;
24661   unsigned int i, nargs, arg_adjust, memory;
24662   struct
24663     {
24664       rtx op;
24665       enum machine_mode mode;
24666     } args[2];
24667   enum insn_code icode = d->icode;
24668   bool last_arg_constant = false;
24669   const struct insn_data *insn_p = &insn_data[icode];
24670   enum machine_mode tmode = insn_p->operand[0].mode;
24671   enum { load, store } klass;
24672 
24673   switch ((enum ix86_special_builtin_type) d->flag)
24674     {
24675     case VOID_FTYPE_VOID:
24676       emit_insn (GEN_FCN (icode) (target));
24677       return 0;
24678     case V2DI_FTYPE_PV2DI:
24679     case V32QI_FTYPE_PCCHAR:
24680     case V16QI_FTYPE_PCCHAR:
24681     case V8SF_FTYPE_PCV4SF:
24682     case V8SF_FTYPE_PCFLOAT:
24683     case V4SF_FTYPE_PCFLOAT:
24684     case V4DF_FTYPE_PCV2DF:
24685     case V4DF_FTYPE_PCDOUBLE:
24686     case V2DF_FTYPE_PCDOUBLE:
24687       nargs = 1;
24688       klass = load;
24689       memory = 0;
24690       break;
24691     case VOID_FTYPE_PV2SF_V4SF:
24692     case VOID_FTYPE_PV4DI_V4DI:
24693     case VOID_FTYPE_PV2DI_V2DI:
24694     case VOID_FTYPE_PCHAR_V32QI:
24695     case VOID_FTYPE_PCHAR_V16QI:
24696     case VOID_FTYPE_PFLOAT_V8SF:
24697     case VOID_FTYPE_PFLOAT_V4SF:
24698     case VOID_FTYPE_PDOUBLE_V4DF:
24699     case VOID_FTYPE_PDOUBLE_V2DF:
24700     case VOID_FTYPE_PDI_DI:
24701     case VOID_FTYPE_PINT_INT:
24702       nargs = 1;
24703       klass = store;
24704       /* Reserve memory operand for target.  */
24705       memory = ARRAY_SIZE (args);
24706       break;
24707     case V4SF_FTYPE_V4SF_PCV2SF:
24708     case V2DF_FTYPE_V2DF_PCDOUBLE:
24709       nargs = 2;
24710       klass = load;
24711       memory = 1;
24712       break;
24713     case V8SF_FTYPE_PCV8SF_V8SF:
24714     case V4DF_FTYPE_PCV4DF_V4DF:
24715     case V4SF_FTYPE_PCV4SF_V4SF:
24716     case V2DF_FTYPE_PCV2DF_V2DF:
24717       nargs = 2;
24718       klass = load;
24719       memory = 0;
24720       break;
24721     case VOID_FTYPE_PV8SF_V8SF_V8SF:
24722     case VOID_FTYPE_PV4DF_V4DF_V4DF:
24723     case VOID_FTYPE_PV4SF_V4SF_V4SF:
24724     case VOID_FTYPE_PV2DF_V2DF_V2DF:
24725       nargs = 2;
24726       klass = store;
24727       /* Reserve memory operand for target.  */
24728       memory = ARRAY_SIZE (args);
24729       break;
24730     default:
24731       gcc_unreachable ();
24732     }
24733 
24734   gcc_assert (nargs <= ARRAY_SIZE (args));
24735 
24736   if (klass == store)
24737     {
24738       arg = CALL_EXPR_ARG (exp, 0);
24739       op = expand_normal (arg);
24740       gcc_assert (target == 0);
24741       target = gen_rtx_MEM (tmode, copy_to_mode_reg (Pmode, op));
24742       arg_adjust = 1;
24743     }
24744   else
24745     {
24746       arg_adjust = 0;
24747       if (optimize
24748           || target == 0
24749           || GET_MODE (target) != tmode
24750           || ! (*insn_p->operand[0].predicate) (target, tmode))
24751         target = gen_reg_rtx (tmode);
24752     }
24753 
24754   for (i = 0; i < nargs; i++)
24755     {
24756       enum machine_mode mode = insn_p->operand[i + 1].mode;
24757       bool match;
24758 
24759       arg = CALL_EXPR_ARG (exp, i + arg_adjust);
24760       op = expand_normal (arg);
24761       match = (*insn_p->operand[i + 1].predicate) (op, mode);
24762 
24763       if (last_arg_constant && (i + 1) == nargs)
24764         {
24765           if (!match)
24766             switch (icode)
24767               {
24768              default:
24769                 error ("the last argument must be an 8-bit immediate");
24770                 return const0_rtx;
24771               }
24772         }
24773       else
24774         {
24775           if (i == memory)
24776             {
24777               /* This must be the memory operand.  */
24778               op = gen_rtx_MEM (mode, copy_to_mode_reg (Pmode, op));
24779               gcc_assert (GET_MODE (op) == mode
24780                           || GET_MODE (op) == VOIDmode);
24781             }
24782           else
24783             {
24784               /* This must be register.  */
24785               if (VECTOR_MODE_P (mode))
24786                 op = safe_vector_operand (op, mode);
24787 
24788               gcc_assert (GET_MODE (op) == mode
24789                           || GET_MODE (op) == VOIDmode);
24790               op = copy_to_mode_reg (mode, op);
24791             }
24792         }
24793 
24794       args[i].op = op;
24795       args[i].mode = mode;
24796     }
24797 
24798   switch (nargs)
24799     {
24800     case 1:
24801       pat = GEN_FCN (icode) (target, args[0].op);
24802       break;
24803     case 2:
24804       pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
24805       break;
24806     default:
24807       gcc_unreachable ();
24808     }
24809 
24810   if (! pat)
24811     return 0;
24812   emit_insn (pat);
24813   return klass == store ? 0 : target;
24814 }
24815 
24816 /* Return the integer constant in ARG.  Constrain it to be in the range
24817    of the subparts of VEC_TYPE; issue an error if not.  */
24818 
24819 static int
24820 get_element_number (tree vec_type, tree arg)
24821 {
24822   unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
24823 
24824   if (!host_integerp (arg, 1)
24825       || (elt = tree_low_cst (arg, 1), elt > max))
24826     {
24827       error ("selector must be an integer constant in the range 0..%wi", max);
24828       return 0;
24829     }
24830 
24831   return elt;
24832 }
24833 
24834 /* A subroutine of ix86_expand_builtin.  These builtins are a wrapper around
24835    ix86_expand_vector_init.  We DO have language-level syntax for this, in
24836    the form of  (type){ init-list }.  Except that since we can't place emms
24837    instructions from inside the compiler, we can't allow the use of MMX
24838    registers unless the user explicitly asks for it.  So we do *not* define
24839    vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md.  Instead
24840    we have builtins invoked by mmintrin.h that gives us license to emit
24841    these sorts of instructions.  */
24842 
24843 static rtx
24844 ix86_expand_vec_init_builtin (tree type, tree exp, rtx target)
24845 {
24846   enum machine_mode tmode = TYPE_MODE (type);
24847   enum machine_mode inner_mode = GET_MODE_INNER (tmode);
24848   int i, n_elt = GET_MODE_NUNITS (tmode);
24849   rtvec v = rtvec_alloc (n_elt);
24850 
24851   gcc_assert (VECTOR_MODE_P (tmode));
24852   gcc_assert (call_expr_nargs (exp) == n_elt);
24853 
24854   for (i = 0; i < n_elt; ++i)
24855     {
24856       rtx x = expand_normal (CALL_EXPR_ARG (exp, i));
24857       RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
24858     }
24859 
24860   if (!target || !register_operand (target, tmode))
24861     target = gen_reg_rtx (tmode);
24862 
24863   ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
24864   return target;
24865 }
24866 
24867 /* A subroutine of ix86_expand_builtin.  These builtins are a wrapper around
24868    ix86_expand_vector_extract.  They would be redundant (for non-MMX) if we
24869    had a language-level syntax for referencing vector elements.  */
24870 
24871 static rtx
24872 ix86_expand_vec_ext_builtin (tree exp, rtx target)
24873 {
24874   enum machine_mode tmode, mode0;
24875   tree arg0, arg1;
24876   int elt;
24877   rtx op0;
24878 
24879   arg0 = CALL_EXPR_ARG (exp, 0);
24880   arg1 = CALL_EXPR_ARG (exp, 1);
24881 
24882   op0 = expand_normal (arg0);
24883   elt = get_element_number (TREE_TYPE (arg0), arg1);
24884 
24885   tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
24886   mode0 = TYPE_MODE (TREE_TYPE (arg0));
24887   gcc_assert (VECTOR_MODE_P (mode0));
24888 
24889   op0 = force_reg (mode0, op0);
24890 
24891   if (optimize || !target || !register_operand (target, tmode))
24892     target = gen_reg_rtx (tmode);
24893 
24894   ix86_expand_vector_extract (true, target, op0, elt);
24895 
24896   return target;
24897 }
24898 
24899 /* A subroutine of ix86_expand_builtin.  These builtins are a wrapper around
24900    ix86_expand_vector_set.  They would be redundant (for non-MMX) if we had
24901    a language-level syntax for referencing vector elements.  */
24902 
24903 static rtx
24904 ix86_expand_vec_set_builtin (tree exp)
24905 {
24906   enum machine_mode tmode, mode1;
24907   tree arg0, arg1, arg2;
24908   int elt;
24909   rtx op0, op1, target;
24910 
24911   arg0 = CALL_EXPR_ARG (exp, 0);
24912   arg1 = CALL_EXPR_ARG (exp, 1);
24913   arg2 = CALL_EXPR_ARG (exp, 2);
24914 
24915   tmode = TYPE_MODE (TREE_TYPE (arg0));
24916   mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
24917   gcc_assert (VECTOR_MODE_P (tmode));
24918 
24919   op0 = expand_expr (arg0, NULL_RTX, tmode, EXPAND_NORMAL);
24920   op1 = expand_expr (arg1, NULL_RTX, mode1, EXPAND_NORMAL);
24921   elt = get_element_number (TREE_TYPE (arg0), arg2);
24922 
24923   if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
24924     op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
24925 
24926   op0 = force_reg (tmode, op0);
24927   op1 = force_reg (mode1, op1);
24928 
24929   /* OP0 is the source of these builtin functions and shouldn't be
24930      modified.  Create a copy, use it and return it as target.  */
24931   target = gen_reg_rtx (tmode);
24932   emit_move_insn (target, op0);
24933   ix86_expand_vector_set (true, target, op1, elt);
24934 
24935   return target;
24936 }
24937 
24938 /* Expand an expression EXP that calls a built-in function,
24939    with result going to TARGET if that's convenient
24940    (and in mode MODE if that's convenient).
24941    SUBTARGET may be used as the target for computing one of EXP's operands.
24942    IGNORE is nonzero if the value is to be ignored.  */
24943 
24944 static rtx
24945 ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
24946                      enum machine_mode mode ATTRIBUTE_UNUSED,
24947                      int ignore ATTRIBUTE_UNUSED)
24948 {
24949   const struct builtin_description *d;
24950   size_t i;
24951   enum insn_code icode;
24952   tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
24953   tree arg0, arg1, arg2;
24954   rtx op0, op1, op2, pat;
24955   enum machine_mode mode0, mode1, mode2;
24956   unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
24957 
24958   /* Determine whether the builtin function is available under the current ISA.
24959      Originally the builtin was not created if it wasn't applicable to the
24960      current ISA based on the command line switches.  With function specific
24961      options, we need to check in the context of the function making the call
24962      whether it is supported.  */
24963   if (ix86_builtins_isa[fcode].isa
24964       && !(ix86_builtins_isa[fcode].isa & ix86_isa_flags))
24965     {
24966       char *opts = ix86_target_string (ix86_builtins_isa[fcode].isa, 0, NULL,
24967                                        NULL, NULL, false);
24968 
24969       if (!opts)
24970         error ("%qE needs unknown isa option", fndecl);
24971       else
24972         {
24973           gcc_assert (opts != NULL);
24974           error ("%qE needs isa option %s", fndecl, opts);
24975           free (opts);
24976         }
24977       return const0_rtx;
24978     }
24979 
24980   switch (fcode)
24981     {
24982     case IX86_BUILTIN_MASKMOVQ:
24983     case IX86_BUILTIN_MASKMOVDQU:
24984       icode = (fcode == IX86_BUILTIN_MASKMOVQ
24985                ? CODE_FOR_mmx_maskmovq
24986                : CODE_FOR_sse2_maskmovdqu);
24987       /* Note the arg order is different from the operand order.  */
24988       arg1 = CALL_EXPR_ARG (exp, 0);
24989       arg2 = CALL_EXPR_ARG (exp, 1);
24990       arg0 = CALL_EXPR_ARG (exp, 2);
24991       op0 = expand_normal (arg0);
24992       op1 = expand_normal (arg1);
24993       op2 = expand_normal (arg2);
24994       mode0 = insn_data[icode].operand[0].mode;
24995       mode1 = insn_data[icode].operand[1].mode;
24996       mode2 = insn_data[icode].operand[2].mode;
24997 
24998       op0 = force_reg (Pmode, op0);
24999       op0 = gen_rtx_MEM (mode1, op0);
25000 
25001       if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
25002         op0 = copy_to_mode_reg (mode0, op0);
25003       if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
25004         op1 = copy_to_mode_reg (mode1, op1);
25005       if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
25006         op2 = copy_to_mode_reg (mode2, op2);
25007       pat = GEN_FCN (icode) (op0, op1, op2);
25008       if (! pat)
25009         return 0;
25010       emit_insn (pat);
25011       return 0;
25012 
25013     case IX86_BUILTIN_LDMXCSR:
25014       op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
25015       target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
25016       emit_move_insn (target, op0);
25017       emit_insn (gen_sse_ldmxcsr (target));
25018       return 0;
25019 
25020     case IX86_BUILTIN_STMXCSR:
25021       target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
25022       emit_insn (gen_sse_stmxcsr (target));
25023       return copy_to_mode_reg (SImode, target);
25024 
25025     case IX86_BUILTIN_CLFLUSH:
25026         arg0 = CALL_EXPR_ARG (exp, 0);
25027         op0 = expand_normal (arg0);
25028         icode = CODE_FOR_sse2_clflush;
25029         if (! (*insn_data[icode].operand[0].predicate) (op0, Pmode))
25030             op0 = copy_to_mode_reg (Pmode, op0);
25031 
25032         emit_insn (gen_sse2_clflush (op0));
25033         return 0;
25034 
25035     case IX86_BUILTIN_MONITOR:
25036       arg0 = CALL_EXPR_ARG (exp, 0);
25037       arg1 = CALL_EXPR_ARG (exp, 1);
25038       arg2 = CALL_EXPR_ARG (exp, 2);
25039       op0 = expand_normal (arg0);
25040       op1 = expand_normal (arg1);
25041       op2 = expand_normal (arg2);
25042       if (!REG_P (op0))
25043         op0 = copy_to_mode_reg (Pmode, op0);
25044       if (!REG_P (op1))
25045         op1 = copy_to_mode_reg (SImode, op1);
25046       if (!REG_P (op2))
25047         op2 = copy_to_mode_reg (SImode, op2);
25048       emit_insn ((*ix86_gen_monitor) (op0, op1, op2));
25049       return 0;
25050 
25051     case IX86_BUILTIN_MWAIT:
25052       arg0 = CALL_EXPR_ARG (exp, 0);
25053       arg1 = CALL_EXPR_ARG (exp, 1);
25054       op0 = expand_normal (arg0);
25055       op1 = expand_normal (arg1);
25056       if (!REG_P (op0))
25057         op0 = copy_to_mode_reg (SImode, op0);
25058       if (!REG_P (op1))
25059         op1 = copy_to_mode_reg (SImode, op1);
25060       emit_insn (gen_sse3_mwait (op0, op1));
25061       return 0;
25062 
25063     case IX86_BUILTIN_VEC_INIT_V2SI:
25064     case IX86_BUILTIN_VEC_INIT_V4HI:
25065     case IX86_BUILTIN_VEC_INIT_V8QI:
25066       return ix86_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
25067 
25068     case IX86_BUILTIN_VEC_EXT_V2DF:
25069     case IX86_BUILTIN_VEC_EXT_V2DI:
25070     case IX86_BUILTIN_VEC_EXT_V4SF:
25071     case IX86_BUILTIN_VEC_EXT_V4SI:
25072     case IX86_BUILTIN_VEC_EXT_V8HI:
25073     case IX86_BUILTIN_VEC_EXT_V2SI:
25074     case IX86_BUILTIN_VEC_EXT_V4HI:
25075     case IX86_BUILTIN_VEC_EXT_V16QI:
25076       return ix86_expand_vec_ext_builtin (exp, target);
25077 
25078     case IX86_BUILTIN_VEC_SET_V2DI:
25079     case IX86_BUILTIN_VEC_SET_V4SF:
25080     case IX86_BUILTIN_VEC_SET_V4SI:
25081     case IX86_BUILTIN_VEC_SET_V8HI:
25082     case IX86_BUILTIN_VEC_SET_V4HI:
25083     case IX86_BUILTIN_VEC_SET_V16QI:
25084       return ix86_expand_vec_set_builtin (exp);
25085 
25086     case IX86_BUILTIN_INFQ:
25087       {
25088         REAL_VALUE_TYPE inf;
25089         rtx tmp;
25090 
25091         real_inf (&inf);
25092         tmp = CONST_DOUBLE_FROM_REAL_VALUE (inf, mode);
25093 
25094         tmp = validize_mem (force_const_mem (mode, tmp));
25095 
25096         if (target == 0)
25097           target = gen_reg_rtx (mode);
25098 
25099         emit_move_insn (target, tmp);
25100         return target;
25101       }
25102 
25103     default:
25104       break;
25105     }
25106 
25107   for (i = 0, d = bdesc_special_args;
25108        i < ARRAY_SIZE (bdesc_special_args);
25109        i++, d++)
25110     if (d->code == fcode)
25111       return ix86_expand_special_args_builtin (d, exp, target);
25112 
25113   for (i = 0, d = bdesc_args;
25114        i < ARRAY_SIZE (bdesc_args);
25115        i++, d++)
25116     if (d->code == fcode)
25117       switch (fcode)
25118         {
25119         case IX86_BUILTIN_FABSQ:
25120         case IX86_BUILTIN_COPYSIGNQ:
25121           if (!TARGET_SSE2)
25122             /* Emit a normal call if SSE2 isn't available.  */
25123             return expand_call (exp, target, ignore);
25124         default:
25125           return ix86_expand_args_builtin (d, exp, target);
25126         }
25127 
25128   for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
25129     if (d->code == fcode)
25130       return ix86_expand_sse_comi (d, exp, target);
25131 
25132   for (i = 0, d = bdesc_pcmpestr;
25133        i < ARRAY_SIZE (bdesc_pcmpestr);
25134        i++, d++)
25135     if (d->code == fcode)
25136       return ix86_expand_sse_pcmpestr (d, exp, target);
25137 
25138   for (i = 0, d = bdesc_pcmpistr;
25139        i < ARRAY_SIZE (bdesc_pcmpistr);
25140        i++, d++)
25141     if (d->code == fcode)
25142       return ix86_expand_sse_pcmpistr (d, exp, target);
25143 
25144   for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
25145     if (d->code == fcode)
25146       return ix86_expand_multi_arg_builtin (d->icode, exp, target,
25147                                             (enum multi_arg_type)d->flag,
25148                                             d->comparison);
25149 
25150   gcc_unreachable ();
25151 }
25152 
25153 /* Returns a function decl for a vectorized version of the builtin function
25154    with builtin function code FN and the result vector type TYPE, or NULL_TREE
25155    if it is not available.  */
25156 
25157 static tree
25158 ix86_builtin_vectorized_function (unsigned int fn, tree type_out,
25159                                   tree type_in)
25160 {
25161   enum machine_mode in_mode, out_mode;
25162   int in_n, out_n;
25163 
25164   if (TREE_CODE (type_out) != VECTOR_TYPE
25165       || TREE_CODE (type_in) != VECTOR_TYPE)
25166     return NULL_TREE;
25167 
25168   out_mode = TYPE_MODE (TREE_TYPE (type_out));
25169   out_n = TYPE_VECTOR_SUBPARTS (type_out);
25170   in_mode = TYPE_MODE (TREE_TYPE (type_in));
25171   in_n = TYPE_VECTOR_SUBPARTS (type_in);
25172 
25173   switch (fn)
25174     {
25175     case BUILT_IN_SQRT:
25176       if (out_mode == DFmode && out_n == 2
25177           && in_mode == DFmode && in_n == 2)
25178         return ix86_builtins[IX86_BUILTIN_SQRTPD];
25179       break;
25180 
25181     case BUILT_IN_SQRTF:
25182       if (out_mode == SFmode && out_n == 4
25183           && in_mode == SFmode && in_n == 4)
25184         return ix86_builtins[IX86_BUILTIN_SQRTPS_NR];
25185       break;
25186 
25187     case BUILT_IN_LRINT:
25188       if (out_mode == SImode && out_n == 4
25189           && in_mode == DFmode && in_n == 2)
25190         return ix86_builtins[IX86_BUILTIN_VEC_PACK_SFIX];
25191       break;
25192 
25193     case BUILT_IN_LRINTF:
25194       if (out_mode == SImode && out_n == 4
25195           && in_mode == SFmode && in_n == 4)
25196         return ix86_builtins[IX86_BUILTIN_CVTPS2DQ];
25197       break;
25198 
25199     default:
25200       ;
25201     }
25202 
25203   /* Dispatch to a handler for a vectorization library.  */
25204   if (ix86_veclib_handler)
25205     return (*ix86_veclib_handler)(fn, type_out, type_in);
25206 
25207   return NULL_TREE;
25208 }
25209 
25210 /* Handler for an SVML-style interface to
25211    a library with vectorized intrinsics.  */
25212 
25213 static tree
25214 ix86_veclibabi_svml (enum built_in_function fn, tree type_out, tree type_in)
25215 {
25216   char name[20];
25217   tree fntype, new_fndecl, args;
25218   unsigned arity;
25219   const char *bname;
25220   enum machine_mode el_mode, in_mode;
25221   int n, in_n;
25222 
25223   /* The SVML is suitable for unsafe math only.  */
25224   if (!flag_unsafe_math_optimizations)
25225     return NULL_TREE;
25226 
25227   el_mode = TYPE_MODE (TREE_TYPE (type_out));
25228   n = TYPE_VECTOR_SUBPARTS (type_out);
25229   in_mode = TYPE_MODE (TREE_TYPE (type_in));
25230   in_n = TYPE_VECTOR_SUBPARTS (type_in);
25231   if (el_mode != in_mode
25232       || n != in_n)
25233     return NULL_TREE;
25234 
25235   switch (fn)
25236     {
25237     case BUILT_IN_EXP:
25238     case BUILT_IN_LOG:
25239     case BUILT_IN_LOG10:
25240     case BUILT_IN_POW:
25241     case BUILT_IN_TANH:
25242     case BUILT_IN_TAN:
25243     case BUILT_IN_ATAN:
25244     case BUILT_IN_ATAN2:
25245     case BUILT_IN_ATANH:
25246     case BUILT_IN_CBRT:
25247     case BUILT_IN_SINH:
25248     case BUILT_IN_SIN:
25249     case BUILT_IN_ASINH:
25250     case BUILT_IN_ASIN:
25251     case BUILT_IN_COSH:
25252     case BUILT_IN_COS:
25253     case BUILT_IN_ACOSH:
25254     case BUILT_IN_ACOS:
25255       if (el_mode != DFmode || n != 2)
25256         return NULL_TREE;
25257       break;
25258 
25259     case BUILT_IN_EXPF:
25260     case BUILT_IN_LOGF:
25261     case BUILT_IN_LOG10F:
25262     case BUILT_IN_POWF:
25263     case BUILT_IN_TANHF:
25264     case BUILT_IN_TANF:
25265     case BUILT_IN_ATANF:
25266     case BUILT_IN_ATAN2F:
25267     case BUILT_IN_ATANHF:
25268     case BUILT_IN_CBRTF:
25269     case BUILT_IN_SINHF:
25270     case BUILT_IN_SINF:
25271     case BUILT_IN_ASINHF:
25272     case BUILT_IN_ASINF:
25273     case BUILT_IN_COSHF:
25274     case BUILT_IN_COSF:
25275     case BUILT_IN_ACOSHF:
25276     case BUILT_IN_ACOSF:
25277       if (el_mode != SFmode || n != 4)
25278         return NULL_TREE;
25279       break;
25280 
25281     default:
25282       return NULL_TREE;
25283     }
25284 
25285   bname = IDENTIFIER_POINTER (DECL_NAME (implicit_built_in_decls[fn]));
25286 
25287   if (fn == BUILT_IN_LOGF)
25288     strcpy (name, "vmlsLn4");
25289   else if (fn == BUILT_IN_LOG)
25290     strcpy (name, "vmldLn2");
25291   else if (n == 4)
25292     {
25293       sprintf (name, "vmls%s", bname+10);
25294       name[strlen (name)-1] = '4';
25295     }
25296   else
25297     sprintf (name, "vmld%s2", bname+10);
25298 
25299   /* Convert to uppercase. */
25300   name[4] &= ~0x20;
25301 
25302   arity = 0;
25303   for (args = DECL_ARGUMENTS (implicit_built_in_decls[fn]); args;
25304        args = TREE_CHAIN (args))
25305     arity++;
25306 
25307   if (arity == 1)
25308     fntype = build_function_type_list (type_out, type_in, NULL);
25309   else
25310     fntype = build_function_type_list (type_out, type_in, type_in, NULL);
25311 
25312   /* Build a function declaration for the vectorized function.  */
25313   new_fndecl = build_decl (FUNCTION_DECL, get_identifier (name), fntype);
25314   TREE_PUBLIC (new_fndecl) = 1;
25315   DECL_EXTERNAL (new_fndecl) = 1;
25316   DECL_IS_NOVOPS (new_fndecl) = 1;
25317   TREE_READONLY (new_fndecl) = 1;
25318 
25319   return new_fndecl;
25320 }
25321 
25322 /* Handler for an ACML-style interface to
25323    a library with vectorized intrinsics.  */
25324 
25325 static tree
25326 ix86_veclibabi_acml (enum built_in_function fn, tree type_out, tree type_in)
25327 {
25328   char name[20] = "__vr.._";
25329   tree fntype, new_fndecl, args;
25330   unsigned arity;
25331   const char *bname;
25332   enum machine_mode el_mode, in_mode;
25333   int n, in_n;
25334 
25335   /* The ACML is 64bits only and suitable for unsafe math only as
25336      it does not correctly support parts of IEEE with the required
25337      precision such as denormals.  */
25338   if (!TARGET_64BIT
25339       || !flag_unsafe_math_optimizations)
25340     return NULL_TREE;
25341 
25342   el_mode = TYPE_MODE (TREE_TYPE (type_out));
25343   n = TYPE_VECTOR_SUBPARTS (type_out);
25344   in_mode = TYPE_MODE (TREE_TYPE (type_in));
25345   in_n = TYPE_VECTOR_SUBPARTS (type_in);
25346   if (el_mode != in_mode
25347       || n != in_n)
25348     return NULL_TREE;
25349 
25350   switch (fn)
25351     {
25352     case BUILT_IN_SIN:
25353     case BUILT_IN_COS:
25354     case BUILT_IN_EXP:
25355     case BUILT_IN_LOG:
25356     case BUILT_IN_LOG2:
25357     case BUILT_IN_LOG10:
25358       name[4] = 'd';
25359       name[5] = '2';
25360       if (el_mode != DFmode
25361           || n != 2)
25362         return NULL_TREE;
25363       break;
25364 
25365     case BUILT_IN_SINF:
25366     case BUILT_IN_COSF:
25367     case BUILT_IN_EXPF:
25368     case BUILT_IN_POWF:
25369     case BUILT_IN_LOGF:
25370     case BUILT_IN_LOG2F:
25371     case BUILT_IN_LOG10F:
25372       name[4] = 's';
25373       name[5] = '4';
25374       if (el_mode != SFmode
25375           || n != 4)
25376         return NULL_TREE;
25377       break;
25378 
25379     default:
25380       return NULL_TREE;
25381     }
25382 
25383   bname = IDENTIFIER_POINTER (DECL_NAME (implicit_built_in_decls[fn]));
25384   sprintf (name + 7, "%s", bname+10);
25385 
25386   arity = 0;
25387   for (args = DECL_ARGUMENTS (implicit_built_in_decls[fn]); args;
25388        args = TREE_CHAIN (args))
25389     arity++;
25390 
25391   if (arity == 1)
25392     fntype = build_function_type_list (type_out, type_in, NULL);
25393   else
25394     fntype = build_function_type_list (type_out, type_in, type_in, NULL);
25395 
25396   /* Build a function declaration for the vectorized function.  */
25397   new_fndecl = build_decl (FUNCTION_DECL, get_identifier (name), fntype);
25398   TREE_PUBLIC (new_fndecl) = 1;
25399   DECL_EXTERNAL (new_fndecl) = 1;
25400   DECL_IS_NOVOPS (new_fndecl) = 1;
25401   TREE_READONLY (new_fndecl) = 1;
25402 
25403   return new_fndecl;
25404 }
25405 
25406 
25407 /* Returns a decl of a function that implements conversion of an integer vector
25408    into a floating-point vector, or vice-versa. TYPE is the type of the integer
25409    side of the conversion.
25410    Return NULL_TREE if it is not available.  */
25411 
25412 static tree
25413 ix86_vectorize_builtin_conversion (unsigned int code, tree type)
25414 {
25415   if (!TARGET_SSE2 || TREE_CODE (type) != VECTOR_TYPE
25416       /* There are only conversions from/to signed integers.  */
25417       || TYPE_UNSIGNED (TREE_TYPE (type)))
25418     return NULL_TREE;
25419 
25420   switch (code)
25421     {
25422     case FLOAT_EXPR:
25423       switch (TYPE_MODE (type))
25424         {
25425         case V4SImode:
25426           return ix86_builtins[IX86_BUILTIN_CVTDQ2PS];
25427         default:
25428           return NULL_TREE;
25429         }
25430 
25431     case FIX_TRUNC_EXPR:
25432       switch (TYPE_MODE (type))
25433         {
25434         case V4SImode:
25435           return ix86_builtins[IX86_BUILTIN_CVTTPS2DQ];
25436         default:
25437           return NULL_TREE;
25438         }
25439     default:
25440       return NULL_TREE;
25441 
25442     }
25443 }
25444 
25445 /* Returns a code for a target-specific builtin that implements
25446    reciprocal of the function, or NULL_TREE if not available.  */
25447 
25448 static tree
25449 ix86_builtin_reciprocal (unsigned int fn, bool md_fn,
25450                          bool sqrt ATTRIBUTE_UNUSED)
25451 {
25452   if (! (TARGET_SSE_MATH && TARGET_RECIP && !optimize_insn_for_size_p ()
25453          && flag_finite_math_only && !flag_trapping_math
25454          && flag_unsafe_math_optimizations))
25455     return NULL_TREE;
25456 
25457   if (md_fn)
25458     /* Machine dependent builtins.  */
25459     switch (fn)
25460       {
25461         /* Vectorized version of sqrt to rsqrt conversion.  */
25462       case IX86_BUILTIN_SQRTPS_NR:
25463         return ix86_builtins[IX86_BUILTIN_RSQRTPS_NR];
25464 
25465       default:
25466         return NULL_TREE;
25467       }
25468   else
25469     /* Normal builtins.  */
25470     switch (fn)
25471       {
25472         /* Sqrt to rsqrt conversion.  */
25473       case BUILT_IN_SQRTF:
25474         return ix86_builtins[IX86_BUILTIN_RSQRTF];
25475 
25476       default:
25477         return NULL_TREE;
25478       }
25479 }
25480 
25481 /* Store OPERAND to the memory after reload is completed.  This means
25482    that we can't easily use assign_stack_local.  */
25483 rtx
25484 ix86_force_to_memory (enum machine_mode mode, rtx operand)
25485 {
25486   rtx result;
25487 
25488   gcc_assert (reload_completed);
25489   if (!TARGET_64BIT_MS_ABI && TARGET_RED_ZONE)
25490     {
25491       result = gen_rtx_MEM (mode,
25492                             gen_rtx_PLUS (Pmode,
25493                                           stack_pointer_rtx,
25494                                           GEN_INT (-RED_ZONE_SIZE)));
25495       emit_move_insn (result, operand);
25496     }
25497   else if ((TARGET_64BIT_MS_ABI || !TARGET_RED_ZONE) && TARGET_64BIT)
25498     {
25499       switch (mode)
25500         {
25501         case HImode:
25502         case SImode:
25503           operand = gen_lowpart (DImode, operand);
25504           /* FALLTHRU */
25505         case DImode:
25506           emit_insn (
25507                       gen_rtx_SET (VOIDmode,
25508                                    gen_rtx_MEM (DImode,
25509                                                 gen_rtx_PRE_DEC (DImode,
25510                                                         stack_pointer_rtx)),
25511                                    operand));
25512           break;
25513         default:
25514           gcc_unreachable ();
25515         }
25516       result = gen_rtx_MEM (mode, stack_pointer_rtx);
25517     }
25518   else
25519     {
25520       switch (mode)
25521         {
25522         case DImode:
25523           {
25524             rtx operands[2];
25525             split_di (&operand, 1, operands, operands + 1);
25526             emit_insn (
25527                         gen_rtx_SET (VOIDmode,
25528                                      gen_rtx_MEM (SImode,
25529                                                   gen_rtx_PRE_DEC (Pmode,
25530                                                         stack_pointer_rtx)),
25531                                      operands[1]));
25532             emit_insn (
25533                         gen_rtx_SET (VOIDmode,
25534                                      gen_rtx_MEM (SImode,
25535                                                   gen_rtx_PRE_DEC (Pmode,
25536                                                         stack_pointer_rtx)),
25537                                      operands[0]));
25538           }
25539           break;
25540         case HImode:
25541           /* Store HImodes as SImodes.  */
25542           operand = gen_lowpart (SImode, operand);
25543           /* FALLTHRU */
25544         case SImode:
25545           emit_insn (
25546                       gen_rtx_SET (VOIDmode,
25547                                    gen_rtx_MEM (GET_MODE (operand),
25548                                                 gen_rtx_PRE_DEC (SImode,
25549                                                         stack_pointer_rtx)),
25550                                    operand));
25551           break;
25552         default:
25553           gcc_unreachable ();
25554         }
25555       result = gen_rtx_MEM (mode, stack_pointer_rtx);
25556     }
25557   return result;
25558 }
25559 
25560 /* Free operand from the memory.  */
25561 void
25562 ix86_free_from_memory (enum machine_mode mode)
25563 {
25564   if (!TARGET_RED_ZONE || TARGET_64BIT_MS_ABI)
25565     {
25566       int size;
25567 
25568       if (mode == DImode || TARGET_64BIT)
25569         size = 8;
25570       else
25571         size = 4;
25572       /* Use LEA to deallocate stack space.  In peephole2 it will be converted
25573          to pop or add instruction if registers are available.  */
25574       emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
25575                               gen_rtx_PLUS (Pmode, stack_pointer_rtx,
25576                                             GEN_INT (size))));
25577     }
25578 }
25579 
25580 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
25581    QImode must go into class Q_REGS.
25582    Narrow ALL_REGS to GENERAL_REGS.  This supports allowing movsf and
25583    movdf to do mem-to-mem moves through integer regs.  */
25584 enum reg_class
25585 ix86_preferred_reload_class (rtx x, enum reg_class regclass)
25586 {
25587   enum machine_mode mode = GET_MODE (x);
25588 
25589   /* We're only allowed to return a subclass of CLASS.  Many of the
25590      following checks fail for NO_REGS, so eliminate that early.  */
25591   if (regclass == NO_REGS)
25592     return NO_REGS;
25593 
25594   /* All classes can load zeros.  */
25595   if (x == CONST0_RTX (mode))
25596     return regclass;
25597 
25598   /* Force constants into memory if we are loading a (nonzero) constant into
25599      an MMX or SSE register.  This is because there are no MMX/SSE instructions
25600      to load from a constant.  */
25601   if (CONSTANT_P (x)
25602       && (MAYBE_MMX_CLASS_P (regclass) || MAYBE_SSE_CLASS_P (regclass)))
25603     return NO_REGS;
25604 
25605   /* Prefer SSE regs only, if we can use them for math.  */
25606   if (TARGET_SSE_MATH && !TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (mode))
25607     return SSE_CLASS_P (regclass) ? regclass : NO_REGS;
25608 
25609   /* Floating-point constants need more complex checks.  */
25610   if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
25611     {
25612       /* General regs can load everything.  */
25613       if (reg_class_subset_p (regclass, GENERAL_REGS))
25614         return regclass;
25615 
25616       /* Floats can load 0 and 1 plus some others.  Note that we eliminated
25617          zero above.  We only want to wind up preferring 80387 registers if
25618          we plan on doing computation with them.  */
25619       if (TARGET_80387
25620           && standard_80387_constant_p (x))
25621         {
25622           /* Limit class to non-sse.  */
25623           if (regclass == FLOAT_SSE_REGS)
25624             return FLOAT_REGS;
25625           if (regclass == FP_TOP_SSE_REGS)
25626             return FP_TOP_REG;
25627           if (regclass == FP_SECOND_SSE_REGS)
25628             return FP_SECOND_REG;
25629           if (regclass == FLOAT_INT_REGS || regclass == FLOAT_REGS)
25630             return regclass;
25631         }
25632 
25633       return NO_REGS;
25634     }
25635 
25636   /* Generally when we see PLUS here, it's the function invariant
25637      (plus soft-fp const_int).  Which can only be computed into general
25638      regs.  */
25639   if (GET_CODE (x) == PLUS)
25640     return reg_class_subset_p (regclass, GENERAL_REGS) ? regclass : NO_REGS;
25641 
25642   /* QImode constants are easy to load, but non-constant QImode data
25643      must go into Q_REGS.  */
25644   if (GET_MODE (x) == QImode && !CONSTANT_P (x))
25645     {
25646       if (reg_class_subset_p (regclass, Q_REGS))
25647         return regclass;
25648       if (reg_class_subset_p (Q_REGS, regclass))
25649         return Q_REGS;
25650       return NO_REGS;
25651     }
25652 
25653   return regclass;
25654 }
25655 
25656 /* Discourage putting floating-point values in SSE registers unless
25657    SSE math is being used, and likewise for the 387 registers.  */
25658 enum reg_class
25659 ix86_preferred_output_reload_class (rtx x, enum reg_class regclass)
25660 {
25661   enum machine_mode mode = GET_MODE (x);
25662 
25663   /* Restrict the output reload class to the register bank that we are doing
25664      math on.  If we would like not to return a subset of CLASS, reject this
25665      alternative: if reload cannot do this, it will still use its choice.  */
25666   mode = GET_MODE (x);
25667   if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
25668     return MAYBE_SSE_CLASS_P (regclass) ? SSE_REGS : NO_REGS;
25669 
25670   if (X87_FLOAT_MODE_P (mode))
25671     {
25672       if (regclass == FP_TOP_SSE_REGS)
25673         return FP_TOP_REG;
25674       else if (regclass == FP_SECOND_SSE_REGS)
25675         return FP_SECOND_REG;
25676       else
25677         return FLOAT_CLASS_P (regclass) ? regclass : NO_REGS;
25678     }
25679 
25680   return regclass;
25681 }
25682 
25683 static enum reg_class
25684 ix86_secondary_reload (bool in_p, rtx x, enum reg_class rclass,
25685                        enum machine_mode mode,
25686                        secondary_reload_info *sri ATTRIBUTE_UNUSED)
25687 {
25688   /* QImode spills from non-QI registers require
25689      intermediate register on 32bit targets.  */
25690   if (!in_p && mode == QImode && !TARGET_64BIT
25691       && (rclass == GENERAL_REGS
25692           || rclass == LEGACY_REGS
25693           || rclass == INDEX_REGS))
25694     {
25695       int regno;
25696 
25697       if (REG_P (x))
25698         regno = REGNO (x);
25699       else
25700         regno = -1;
25701 
25702       if (regno >= FIRST_PSEUDO_REGISTER || GET_CODE (x) == SUBREG)
25703         regno = true_regnum (x);
25704 
25705       /* Return Q_REGS if the operand is in memory.  */
25706       if (regno == -1)
25707         return Q_REGS;
25708     }
25709 
25710   return NO_REGS;
25711 }
25712 
25713 /* If we are copying between general and FP registers, we need a memory
25714    location. The same is true for SSE and MMX registers.
25715 
25716    To optimize register_move_cost performance, allow inline variant.
25717 
25718    The macro can't work reliably when one of the CLASSES is class containing
25719    registers from multiple units (SSE, MMX, integer).  We avoid this by never
25720    combining those units in single alternative in the machine description.
25721    Ensure that this constraint holds to avoid unexpected surprises.
25722 
25723    When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
25724    enforce these sanity checks.  */
25725 
25726 static inline int
25727 inline_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
25728                               enum machine_mode mode, int strict)
25729 {
25730   if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
25731       || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
25732       || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
25733       || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
25734       || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
25735       || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
25736     {
25737       gcc_assert (!strict);
25738       return true;
25739     }
25740 
25741   if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
25742     return true;
25743 
25744   /* ??? This is a lie.  We do have moves between mmx/general, and for
25745      mmx/sse2.  But by saying we need secondary memory we discourage the
25746      register allocator from using the mmx registers unless needed.  */
25747   if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
25748     return true;
25749 
25750   if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
25751     {
25752       /* SSE1 doesn't have any direct moves from other classes.  */
25753       if (!TARGET_SSE2)
25754         return true;
25755 
25756       /* If the target says that inter-unit moves are more expensive
25757          than moving through memory, then don't generate them.  */
25758       if (!TARGET_INTER_UNIT_MOVES)
25759         return true;
25760 
25761       /* Between SSE and general, we have moves no larger than word size.  */
25762       if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
25763         return true;
25764     }
25765 
25766   return false;
25767 }
25768 
25769 int
25770 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
25771                               enum machine_mode mode, int strict)
25772 {
25773   return inline_secondary_memory_needed (class1, class2, mode, strict);
25774 }
25775 
25776 /* Return true if the registers in CLASS cannot represent the change from
25777    modes FROM to TO.  */
25778 
25779 bool
25780 ix86_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
25781                                enum reg_class regclass)
25782 {
25783   if (from == to)
25784     return false;
25785 
25786   /* x87 registers can't do subreg at all, as all values are reformatted
25787      to extended precision.  */
25788   if (MAYBE_FLOAT_CLASS_P (regclass))
25789     return true;
25790 
25791   if (MAYBE_SSE_CLASS_P (regclass) || MAYBE_MMX_CLASS_P (regclass))
25792     {
25793       /* Vector registers do not support QI or HImode loads.  If we don't
25794          disallow a change to these modes, reload will assume it's ok to
25795          drop the subreg from (subreg:SI (reg:HI 100) 0).  This affects
25796          the vec_dupv4hi pattern.  */
25797       if (GET_MODE_SIZE (from) < 4)
25798         return true;
25799 
25800       /* Vector registers do not support subreg with nonzero offsets, which
25801          are otherwise valid for integer registers.  Since we can't see
25802          whether we have a nonzero offset from here, prohibit all
25803          nonparadoxical subregs changing size.  */
25804       if (GET_MODE_SIZE (to) < GET_MODE_SIZE (from))
25805         return true;
25806     }
25807 
25808   return false;
25809 }
25810 
25811 /* Return the cost of moving data of mode M between a
25812    register and memory.  A value of 2 is the default; this cost is
25813    relative to those in `REGISTER_MOVE_COST'.
25814 
25815    This function is used extensively by register_move_cost that is used to
25816    build tables at startup.  Make it inline in this case.
25817    When IN is 2, return maximum of in and out move cost.
25818 
25819    If moving between registers and memory is more expensive than
25820    between two registers, you should define this macro to express the
25821    relative cost.
25822 
25823    Model also increased moving costs of QImode registers in non
25824    Q_REGS classes.
25825  */
25826 static inline int
25827 inline_memory_move_cost (enum machine_mode mode, enum reg_class regclass,
25828                          int in)
25829 {
25830   int cost;
25831   if (FLOAT_CLASS_P (regclass))
25832     {
25833       int index;
25834       switch (mode)
25835         {
25836           case SFmode:
25837             index = 0;
25838             break;
25839           case DFmode:
25840             index = 1;
25841             break;
25842           case XFmode:
25843             index = 2;
25844             break;
25845           default:
25846             return 100;
25847         }
25848       if (in == 2)
25849         return MAX (ix86_cost->fp_load [index], ix86_cost->fp_store [index]);
25850       return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
25851     }
25852   if (SSE_CLASS_P (regclass))
25853     {
25854       int index;
25855       switch (GET_MODE_SIZE (mode))
25856         {
25857           case 4:
25858             index = 0;
25859             break;
25860           case 8:
25861             index = 1;
25862             break;
25863           case 16:
25864             index = 2;
25865             break;
25866           default:
25867             return 100;
25868         }
25869       if (in == 2)
25870         return MAX (ix86_cost->sse_load [index], ix86_cost->sse_store [index]);
25871       return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
25872     }
25873   if (MMX_CLASS_P (regclass))
25874     {
25875       int index;
25876       switch (GET_MODE_SIZE (mode))
25877         {
25878           case 4:
25879             index = 0;
25880             break;
25881           case 8:
25882             index = 1;
25883             break;
25884           default:
25885             return 100;
25886         }
25887       if (in)
25888         return MAX (ix86_cost->mmx_load [index], ix86_cost->mmx_store [index]);
25889       return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
25890     }
25891   switch (GET_MODE_SIZE (mode))
25892     {
25893       case 1:
25894         if (Q_CLASS_P (regclass) || TARGET_64BIT)
25895           {
25896             if (!in)
25897               return ix86_cost->int_store[0];
25898             if (TARGET_PARTIAL_REG_DEPENDENCY
25899                 && optimize_function_for_speed_p (cfun))
25900               cost = ix86_cost->movzbl_load;
25901             else
25902               cost = ix86_cost->int_load[0];
25903             if (in == 2)
25904               return MAX (cost, ix86_cost->int_store[0]);
25905             return cost;
25906           }
25907         else
25908           {
25909            if (in == 2)
25910              return MAX (ix86_cost->movzbl_load, ix86_cost->int_store[0] + 4);
25911            if (in)
25912              return ix86_cost->movzbl_load;
25913            else
25914              return ix86_cost->int_store[0] + 4;
25915           }
25916         break;
25917       case 2:
25918         if (in == 2)
25919           return MAX (ix86_cost->int_load[1], ix86_cost->int_store[1]);
25920         return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
25921       default:
25922         /* Compute number of 32bit moves needed.  TFmode is moved as XFmode.  */
25923         if (mode == TFmode)
25924           mode = XFmode;
25925         if (in == 2)
25926           cost = MAX (ix86_cost->int_load[2] , ix86_cost->int_store[2]);
25927         else if (in)
25928           cost = ix86_cost->int_load[2];
25929         else
25930           cost = ix86_cost->int_store[2];
25931         return (cost * (((int) GET_MODE_SIZE (mode)
25932                         + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
25933     }
25934 }
25935 
25936 int
25937 ix86_memory_move_cost (enum machine_mode mode, enum reg_class regclass, int in)
25938 {
25939   return inline_memory_move_cost (mode, regclass, in);
25940 }
25941 
25942 
25943 /* Return the cost of moving data from a register in class CLASS1 to
25944    one in class CLASS2.
25945 
25946    It is not required that the cost always equal 2 when FROM is the same as TO;
25947    on some machines it is expensive to move between registers if they are not
25948    general registers.  */
25949 
25950 int
25951 ix86_register_move_cost (enum machine_mode mode, enum reg_class class1,
25952                          enum reg_class class2)
25953 {
25954   /* In case we require secondary memory, compute cost of the store followed
25955      by load.  In order to avoid bad register allocation choices, we need
25956      for this to be *at least* as high as the symmetric MEMORY_MOVE_COST.  */
25957 
25958   if (inline_secondary_memory_needed (class1, class2, mode, 0))
25959     {
25960       int cost = 1;
25961 
25962       cost += inline_memory_move_cost (mode, class1, 2);
25963       cost += inline_memory_move_cost (mode, class2, 2);
25964 
25965       /* In case of copying from general_purpose_register we may emit multiple
25966          stores followed by single load causing memory size mismatch stall.
25967          Count this as arbitrarily high cost of 20.  */
25968       if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
25969         cost += 20;
25970 
25971       /* In the case of FP/MMX moves, the registers actually overlap, and we
25972          have to switch modes in order to treat them differently.  */
25973       if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
25974           || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
25975         cost += 20;
25976 
25977       return cost;
25978     }
25979 
25980   /* Moves between SSE/MMX and integer unit are expensive.  */
25981   if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
25982       || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
25983 
25984     /* ??? By keeping returned value relatively high, we limit the number
25985        of moves between integer and MMX/SSE registers for all targets.
25986        Additionally, high value prevents problem with x86_modes_tieable_p(),
25987        where integer modes in MMX/SSE registers are not tieable
25988        because of missing QImode and HImode moves to, from or between
25989        MMX/SSE registers.  */
25990     return MAX (8, ix86_cost->mmxsse_to_integer);
25991 
25992   if (MAYBE_FLOAT_CLASS_P (class1))
25993     return ix86_cost->fp_move;
25994   if (MAYBE_SSE_CLASS_P (class1))
25995     return ix86_cost->sse_move;
25996   if (MAYBE_MMX_CLASS_P (class1))
25997     return ix86_cost->mmx_move;
25998   return 2;
25999 }
26000 
26001 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE.  */
26002 
26003 bool
26004 ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
26005 {
26006   /* Flags and only flags can only hold CCmode values.  */
26007   if (CC_REGNO_P (regno))
26008     return GET_MODE_CLASS (mode) == MODE_CC;
26009   if (GET_MODE_CLASS (mode) == MODE_CC
26010       || GET_MODE_CLASS (mode) == MODE_RANDOM
26011       || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
26012     return 0;
26013   if (FP_REGNO_P (regno))
26014     return VALID_FP_MODE_P (mode);
26015   if (SSE_REGNO_P (regno))
26016     {
26017       /* We implement the move patterns for all vector modes into and
26018          out of SSE registers, even when no operation instructions
26019          are available.  OImode move is available only when AVX is
26020          enabled.  */
26021       return ((TARGET_AVX && mode == OImode)
26022               || VALID_AVX256_REG_MODE (mode)
26023               || VALID_SSE_REG_MODE (mode)
26024               || VALID_SSE2_REG_MODE (mode)
26025               || VALID_MMX_REG_MODE (mode)
26026               || VALID_MMX_REG_MODE_3DNOW (mode));
26027     }
26028   if (MMX_REGNO_P (regno))
26029     {
26030       /* We implement the move patterns for 3DNOW modes even in MMX mode,
26031          so if the register is available at all, then we can move data of
26032          the given mode into or out of it.  */
26033       return (VALID_MMX_REG_MODE (mode)
26034               || VALID_MMX_REG_MODE_3DNOW (mode));
26035     }
26036 
26037   if (mode == QImode)
26038     {
26039       /* Take care for QImode values - they can be in non-QI regs,
26040          but then they do cause partial register stalls.  */
26041       if (regno <= BX_REG || TARGET_64BIT)
26042         return 1;
26043       if (!TARGET_PARTIAL_REG_STALL)
26044         return 1;
26045       return reload_in_progress || reload_completed;
26046     }
26047   /* We handle both integer and floats in the general purpose registers.  */
26048   else if (VALID_INT_MODE_P (mode))
26049     return 1;
26050   else if (VALID_FP_MODE_P (mode))
26051     return 1;
26052   else if (VALID_DFP_MODE_P (mode))
26053     return 1;
26054   /* Lots of MMX code casts 8 byte vector modes to DImode.  If we then go
26055      on to use that value in smaller contexts, this can easily force a
26056      pseudo to be allocated to GENERAL_REGS.  Since this is no worse than
26057      supporting DImode, allow it.  */
26058   else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
26059     return 1;
26060 
26061   return 0;
26062 }
26063 
26064 /* A subroutine of ix86_modes_tieable_p.  Return true if MODE is a
26065    tieable integer mode.  */
26066 
26067 static bool
26068 ix86_tieable_integer_mode_p (enum machine_mode mode)
26069 {
26070   switch (mode)
26071     {
26072     case HImode:
26073     case SImode:
26074       return true;
26075 
26076     case QImode:
26077       return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
26078 
26079     case DImode:
26080       return TARGET_64BIT;
26081 
26082     default:
26083       return false;
26084     }
26085 }
26086 
26087 /* Return true if MODE1 is accessible in a register that can hold MODE2
26088    without copying.  That is, all register classes that can hold MODE2
26089    can also hold MODE1.  */
26090 
26091 bool
26092 ix86_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
26093 {
26094   if (mode1 == mode2)
26095     return true;
26096 
26097   if (ix86_tieable_integer_mode_p (mode1)
26098       && ix86_tieable_integer_mode_p (mode2))
26099     return true;
26100 
26101   /* MODE2 being XFmode implies fp stack or general regs, which means we
26102      can tie any smaller floating point modes to it.  Note that we do not
26103      tie this with TFmode.  */
26104   if (mode2 == XFmode)
26105     return mode1 == SFmode || mode1 == DFmode;
26106 
26107   /* MODE2 being DFmode implies fp stack, general or sse regs, which means
26108      that we can tie it with SFmode.  */
26109   if (mode2 == DFmode)
26110     return mode1 == SFmode;
26111 
26112   /* If MODE2 is only appropriate for an SSE register, then tie with
26113      any other mode acceptable to SSE registers.  */
26114   if (GET_MODE_SIZE (mode2) == 16
26115       && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
26116     return (GET_MODE_SIZE (mode1) == 16
26117             && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
26118 
26119   /* If MODE2 is appropriate for an MMX register, then tie
26120      with any other mode acceptable to MMX registers.  */
26121   if (GET_MODE_SIZE (mode2) == 8
26122       && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
26123     return (GET_MODE_SIZE (mode1) == 8
26124             && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1));
26125 
26126   return false;
26127 }
26128 
26129 /* Compute a (partial) cost for rtx X.  Return true if the complete
26130    cost has been computed, and false if subexpressions should be
26131    scanned.  In either case, *TOTAL contains the cost result.  */
26132 
26133 static bool
26134 ix86_rtx_costs (rtx x, int code, int outer_code_i, int *total, bool speed)
26135 {
26136   enum rtx_code outer_code = (enum rtx_code) outer_code_i;
26137   enum machine_mode mode = GET_MODE (x);
26138   const struct processor_costs *cost = speed ? ix86_cost : &ix86_size_cost;
26139 
26140   switch (code)
26141     {
26142     case CONST_INT:
26143     case CONST:
26144     case LABEL_REF:
26145     case SYMBOL_REF:
26146       if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
26147         *total = 3;
26148       else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
26149         *total = 2;
26150       else if (flag_pic && SYMBOLIC_CONST (x)
26151                && (!TARGET_64BIT
26152                    || (!GET_CODE (x) != LABEL_REF
26153                        && (GET_CODE (x) != SYMBOL_REF
26154                            || !SYMBOL_REF_LOCAL_P (x)))))
26155         *total = 1;
26156       else
26157         *total = 0;
26158       return true;
26159 
26160     case CONST_DOUBLE:
26161       if (mode == VOIDmode)
26162         *total = 0;
26163       else
26164         switch (standard_80387_constant_p (x))
26165           {
26166           case 1: /* 0.0 */
26167             *total = 1;
26168             break;
26169           default: /* Other constants */
26170             *total = 2;
26171             break;
26172           case 0:
26173           case -1:
26174             /* Start with (MEM (SYMBOL_REF)), since that's where
26175                it'll probably end up.  Add a penalty for size.  */
26176             *total = (COSTS_N_INSNS (1)
26177                       + (flag_pic != 0 && !TARGET_64BIT)
26178                       + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
26179             break;
26180           }
26181       return true;
26182 
26183     case ZERO_EXTEND:
26184       /* The zero extensions is often completely free on x86_64, so make
26185          it as cheap as possible.  */
26186       if (TARGET_64BIT && mode == DImode
26187           && GET_MODE (XEXP (x, 0)) == SImode)
26188         *total = 1;
26189       else if (TARGET_ZERO_EXTEND_WITH_AND)
26190         *total = cost->add;
26191       else
26192         *total = cost->movzx;
26193       return false;
26194 
26195     case SIGN_EXTEND:
26196       *total = cost->movsx;
26197       return false;
26198 
26199     case ASHIFT:
26200       if (CONST_INT_P (XEXP (x, 1))
26201           && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))
26202         {
26203           HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
26204           if (value == 1)
26205             {
26206               *total = cost->add;
26207               return false;
26208             }
26209           if ((value == 2 || value == 3)
26210               && cost->lea <= cost->shift_const)
26211             {
26212               *total = cost->lea;
26213               return false;
26214             }
26215         }
26216       /* FALLTHRU */
26217 
26218     case ROTATE:
26219     case ASHIFTRT:
26220     case LSHIFTRT:
26221     case ROTATERT:
26222       if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)
26223         {
26224           if (CONST_INT_P (XEXP (x, 1)))
26225             {
26226               if (INTVAL (XEXP (x, 1)) > 32)
26227                 *total = cost->shift_const + COSTS_N_INSNS (2);
26228               else
26229                 *total = cost->shift_const * 2;
26230             }
26231           else
26232             {
26233               if (GET_CODE (XEXP (x, 1)) == AND)
26234                 *total = cost->shift_var * 2;
26235               else
26236                 *total = cost->shift_var * 6 + COSTS_N_INSNS (2);
26237             }
26238         }
26239       else
26240         {
26241           if (CONST_INT_P (XEXP (x, 1)))
26242             *total = cost->shift_const;
26243           else
26244             *total = cost->shift_var;
26245         }
26246       return false;
26247 
26248     case MULT:
26249       if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
26250         {
26251           /* ??? SSE scalar cost should be used here.  */
26252           *total = cost->fmul;
26253           return false;
26254         }
26255       else if (X87_FLOAT_MODE_P (mode))
26256         {
26257           *total = cost->fmul;
26258           return false;
26259         }
26260       else if (FLOAT_MODE_P (mode))
26261         {
26262           /* ??? SSE vector cost should be used here.  */
26263           *total = cost->fmul;
26264           return false;
26265         }
26266       else
26267         {
26268           rtx op0 = XEXP (x, 0);
26269           rtx op1 = XEXP (x, 1);
26270           int nbits;
26271           if (CONST_INT_P (XEXP (x, 1)))
26272             {
26273               unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
26274               for (nbits = 0; value != 0; value &= value - 1)
26275                 nbits++;
26276             }
26277           else
26278             /* This is arbitrary.  */
26279             nbits = 7;
26280 
26281           /* Compute costs correctly for widening multiplication.  */
26282           if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
26283               && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
26284                  == GET_MODE_SIZE (mode))
26285             {
26286               int is_mulwiden = 0;
26287               enum machine_mode inner_mode = GET_MODE (op0);
26288 
26289               if (GET_CODE (op0) == GET_CODE (op1))
26290                 is_mulwiden = 1, op1 = XEXP (op1, 0);
26291               else if (CONST_INT_P (op1))
26292                 {
26293                   if (GET_CODE (op0) == SIGN_EXTEND)
26294                     is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
26295                                   == INTVAL (op1);
26296                   else
26297                     is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
26298                 }
26299 
26300               if (is_mulwiden)
26301                 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
26302             }
26303 
26304           *total = (cost->mult_init[MODE_INDEX (mode)]
26305                     + nbits * cost->mult_bit
26306                     + rtx_cost (op0, outer_code, speed) + rtx_cost (op1, outer_code, speed));
26307 
26308           return true;
26309         }
26310 
26311     case DIV:
26312     case UDIV:
26313     case MOD:
26314     case UMOD:
26315       if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
26316         /* ??? SSE cost should be used here.  */
26317         *total = cost->fdiv;
26318       else if (X87_FLOAT_MODE_P (mode))
26319         *total = cost->fdiv;
26320       else if (FLOAT_MODE_P (mode))
26321         /* ??? SSE vector cost should be used here.  */
26322         *total = cost->fdiv;
26323       else
26324         *total = cost->divide[MODE_INDEX (mode)];
26325       return false;
26326 
26327     case PLUS:
26328       if (GET_MODE_CLASS (mode) == MODE_INT
26329                && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
26330         {
26331           if (GET_CODE (XEXP (x, 0)) == PLUS
26332               && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
26333               && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1))
26334               && CONSTANT_P (XEXP (x, 1)))
26335             {
26336               HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
26337               if (val == 2 || val == 4 || val == 8)
26338                 {
26339                   *total = cost->lea;
26340                   *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code, speed);
26341                   *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
26342                                       outer_code, speed);
26343                   *total += rtx_cost (XEXP (x, 1), outer_code, speed);
26344                   return true;
26345                 }
26346             }
26347           else if (GET_CODE (XEXP (x, 0)) == MULT
26348                    && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
26349             {
26350               HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
26351               if (val == 2 || val == 4 || val == 8)
26352                 {
26353                   *total = cost->lea;
26354                   *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, speed);
26355                   *total += rtx_cost (XEXP (x, 1), outer_code, speed);
26356                   return true;
26357                 }
26358             }
26359           else if (GET_CODE (XEXP (x, 0)) == PLUS)
26360             {
26361               *total = cost->lea;
26362               *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, speed);
26363               *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code, speed);
26364               *total += rtx_cost (XEXP (x, 1), outer_code, speed);
26365               return true;
26366             }
26367         }
26368       /* FALLTHRU */
26369 
26370     case MINUS:
26371       if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
26372         {
26373           /* ??? SSE cost should be used here.  */
26374           *total = cost->fadd;
26375           return false;
26376         }
26377       else if (X87_FLOAT_MODE_P (mode))
26378         {
26379           *total = cost->fadd;
26380           return false;
26381         }
26382       else if (FLOAT_MODE_P (mode))
26383         {
26384           /* ??? SSE vector cost should be used here.  */
26385           *total = cost->fadd;
26386           return false;
26387         }
26388       /* FALLTHRU */
26389 
26390     case AND:
26391     case IOR:
26392     case XOR:
26393       if (!TARGET_64BIT && mode == DImode)
26394         {
26395           *total = (cost->add * 2
26396                     + (rtx_cost (XEXP (x, 0), outer_code, speed)
26397                        << (GET_MODE (XEXP (x, 0)) != DImode))
26398                     + (rtx_cost (XEXP (x, 1), outer_code, speed)
26399                        << (GET_MODE (XEXP (x, 1)) != DImode)));
26400           return true;
26401         }
26402       /* FALLTHRU */
26403 
26404     case NEG:
26405       if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
26406         {
26407           /* ??? SSE cost should be used here.  */
26408           *total = cost->fchs;
26409           return false;
26410         }
26411       else if (X87_FLOAT_MODE_P (mode))
26412         {
26413           *total = cost->fchs;
26414           return false;
26415         }
26416       else if (FLOAT_MODE_P (mode))
26417         {
26418           /* ??? SSE vector cost should be used here.  */
26419           *total = cost->fchs;
26420           return false;
26421         }
26422       /* FALLTHRU */
26423 
26424     case NOT:
26425       if (!TARGET_64BIT && mode == DImode)
26426         *total = cost->add * 2;
26427       else
26428         *total = cost->add;
26429       return false;
26430 
26431     case COMPARE:
26432       if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
26433           && XEXP (XEXP (x, 0), 1) == const1_rtx
26434           && CONST_INT_P (XEXP (XEXP (x, 0), 2))
26435           && XEXP (x, 1) == const0_rtx)
26436         {
26437           /* This kind of construct is implemented using test[bwl].
26438              Treat it as if we had an AND.  */
26439           *total = (cost->add
26440                     + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, speed)
26441                     + rtx_cost (const1_rtx, outer_code, speed));
26442           return true;
26443         }
26444       return false;
26445 
26446     case FLOAT_EXTEND:
26447       if (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH))
26448         *total = 0;
26449       return false;
26450 
26451     case ABS:
26452       if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
26453         /* ??? SSE cost should be used here.  */
26454         *total = cost->fabs;
26455       else if (X87_FLOAT_MODE_P (mode))
26456         *total = cost->fabs;
26457       else if (FLOAT_MODE_P (mode))
26458         /* ??? SSE vector cost should be used here.  */
26459         *total = cost->fabs;
26460       return false;
26461 
26462     case SQRT:
26463       if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
26464         /* ??? SSE cost should be used here.  */
26465         *total = cost->fsqrt;
26466       else if (X87_FLOAT_MODE_P (mode))
26467         *total = cost->fsqrt;
26468       else if (FLOAT_MODE_P (mode))
26469         /* ??? SSE vector cost should be used here.  */
26470         *total = cost->fsqrt;
26471       return false;
26472 
26473     case UNSPEC:
26474       if (XINT (x, 1) == UNSPEC_TP)
26475         *total = 0;
26476       return false;
26477 
26478     default:
26479       return false;
26480     }
26481 }
26482 
26483 #if TARGET_MACHO
26484 
26485 static int current_machopic_label_num;
26486 
26487 /* Given a symbol name and its associated stub, write out the
26488    definition of the stub.  */
26489 
26490 void
26491 machopic_output_stub (FILE *file, const char *symb, const char *stub)
26492 {
26493   unsigned int length;
26494   char *binder_name, *symbol_name, lazy_ptr_name[32];
26495   int label = ++current_machopic_label_num;
26496 
26497   /* For 64-bit we shouldn't get here.  */
26498   gcc_assert (!TARGET_64BIT);
26499 
26500   /* Lose our funky encoding stuff so it doesn't contaminate the stub.  */
26501   symb = (*targetm.strip_name_encoding) (symb);
26502 
26503   length = strlen (stub);
26504   binder_name = XALLOCAVEC (char, length + 32);
26505   GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
26506 
26507   length = strlen (symb);
26508   symbol_name = XALLOCAVEC (char, length + 32);
26509   GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
26510 
26511   sprintf (lazy_ptr_name, "L%d$lz", label);
26512 
26513   if (MACHOPIC_PURE)
26514     switch_to_section (darwin_sections[machopic_picsymbol_stub_section]);
26515   else
26516     switch_to_section (darwin_sections[machopic_symbol_stub_section]);
26517 
26518   fprintf (file, "%s:\n", stub);
26519   fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
26520 
26521   if (MACHOPIC_PURE)
26522     {
26523       fprintf (file, "\tcall\tLPC$%d\nLPC$%d:\tpopl\t%%eax\n", label, label);
26524       fprintf (file, "\tmovl\t%s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label);
26525       fprintf (file, "\tjmp\t*%%edx\n");
26526     }
26527   else
26528     fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
26529 
26530   fprintf (file, "%s:\n", binder_name);
26531 
26532   if (MACHOPIC_PURE)
26533     {
26534       fprintf (file, "\tlea\t%s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label);
26535       fprintf (file, "\tpushl\t%%eax\n");
26536     }
26537   else
26538     fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
26539 
26540   fprintf (file, "\tjmp\tdyld_stub_binding_helper\n");
26541 
26542   switch_to_section (darwin_sections[machopic_lazy_symbol_ptr_section]);
26543   fprintf (file, "%s:\n", lazy_ptr_name);
26544   fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
26545   fprintf (file, "\t.long %s\n", binder_name);
26546 }
26547 
26548 void
26549 darwin_x86_file_end (void)
26550 {
26551   darwin_file_end ();
26552   ix86_file_end ();
26553 }
26554 #endif /* TARGET_MACHO */
26555 
26556 /* Order the registers for register allocator.  */
26557 
26558 void
26559 x86_order_regs_for_local_alloc (void)
26560 {
26561    int pos = 0;
26562    int i;
26563 
26564    /* First allocate the local general purpose registers.  */
26565    for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
26566      if (GENERAL_REGNO_P (i) && call_used_regs[i])
26567         reg_alloc_order [pos++] = i;
26568 
26569    /* Global general purpose registers.  */
26570    for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
26571      if (GENERAL_REGNO_P (i) && !call_used_regs[i])
26572         reg_alloc_order [pos++] = i;
26573 
26574    /* x87 registers come first in case we are doing FP math
26575       using them.  */
26576    if (!TARGET_SSE_MATH)
26577      for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
26578        reg_alloc_order [pos++] = i;
26579 
26580    /* SSE registers.  */
26581    for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
26582      reg_alloc_order [pos++] = i;
26583    for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
26584      reg_alloc_order [pos++] = i;
26585 
26586    /* x87 registers.  */
26587    if (TARGET_SSE_MATH)
26588      for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
26589        reg_alloc_order [pos++] = i;
26590 
26591    for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
26592      reg_alloc_order [pos++] = i;
26593 
26594    /* Initialize the rest of array as we do not allocate some registers
26595       at all.  */
26596    while (pos < FIRST_PSEUDO_REGISTER)
26597      reg_alloc_order [pos++] = 0;
26598 }
26599 
26600 /* Handle a "ms_abi" or "sysv" attribute; arguments as in
26601    struct attribute_spec.handler.  */
26602 static tree
26603 ix86_handle_abi_attribute (tree *node, tree name,
26604                               tree args ATTRIBUTE_UNUSED,
26605                               int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
26606 {
26607   if (TREE_CODE (*node) != FUNCTION_TYPE
26608       && TREE_CODE (*node) != METHOD_TYPE
26609       && TREE_CODE (*node) != FIELD_DECL
26610       && TREE_CODE (*node) != TYPE_DECL)
26611     {
26612       warning (OPT_Wattributes, "%qs attribute only applies to functions",
26613                IDENTIFIER_POINTER (name));
26614       *no_add_attrs = true;
26615       return NULL_TREE;
26616     }
26617   if (!TARGET_64BIT)
26618     {
26619       warning (OPT_Wattributes, "%qs attribute only available for 64-bit",
26620                IDENTIFIER_POINTER (name));
26621       *no_add_attrs = true;
26622       return NULL_TREE;
26623     }
26624 
26625   /* Can combine regparm with all attributes but fastcall.  */
26626   if (is_attribute_p ("ms_abi", name))
26627     {
26628       if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (*node)))
26629         {
26630           error ("ms_abi and sysv_abi attributes are not compatible");
26631         }
26632 
26633       return NULL_TREE;
26634     }
26635   else if (is_attribute_p ("sysv_abi", name))
26636     {
26637       if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (*node)))
26638         {
26639           error ("ms_abi and sysv_abi attributes are not compatible");
26640         }
26641 
26642       return NULL_TREE;
26643     }
26644 
26645   return NULL_TREE;
26646 }
26647 
26648 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
26649    struct attribute_spec.handler.  */
26650 static tree
26651 ix86_handle_struct_attribute (tree *node, tree name,
26652                               tree args ATTRIBUTE_UNUSED,
26653                               int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
26654 {
26655   tree *type = NULL;
26656   if (DECL_P (*node))
26657     {
26658       if (TREE_CODE (*node) == TYPE_DECL)
26659         type = &TREE_TYPE (*node);
26660     }
26661   else
26662     type = node;
26663 
26664   if (!(type && (TREE_CODE (*type) == RECORD_TYPE
26665                  || TREE_CODE (*type) == UNION_TYPE)))
26666     {
26667       warning (OPT_Wattributes, "%qs attribute ignored",
26668                IDENTIFIER_POINTER (name));
26669       *no_add_attrs = true;
26670     }
26671 
26672   else if ((is_attribute_p ("ms_struct", name)
26673             && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
26674            || ((is_attribute_p ("gcc_struct", name)
26675                 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
26676     {
26677       warning (OPT_Wattributes, "%qs incompatible attribute ignored",
26678                IDENTIFIER_POINTER (name));
26679       *no_add_attrs = true;
26680     }
26681 
26682   return NULL_TREE;
26683 }
26684 
26685 static bool
26686 ix86_ms_bitfield_layout_p (const_tree record_type)
26687 {
26688   return (TARGET_MS_BITFIELD_LAYOUT &&
26689           !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
26690     || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
26691 }
26692 
26693 /* Returns an expression indicating where the this parameter is
26694    located on entry to the FUNCTION.  */
26695 
26696 static rtx
26697 x86_this_parameter (tree function)
26698 {
26699   tree type = TREE_TYPE (function);
26700   bool aggr = aggregate_value_p (TREE_TYPE (type), type) != 0;
26701   int nregs;
26702 
26703   if (TARGET_64BIT)
26704     {
26705       const int *parm_regs;
26706 
26707       if (ix86_function_type_abi (type) == MS_ABI)
26708         parm_regs = x86_64_ms_abi_int_parameter_registers;
26709       else
26710         parm_regs = x86_64_int_parameter_registers;
26711       return gen_rtx_REG (DImode, parm_regs[aggr]);
26712     }
26713 
26714   nregs = ix86_function_regparm (type, function);
26715 
26716   if (nregs > 0 && !stdarg_p (type))
26717     {
26718       int regno;
26719 
26720       if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
26721         regno = aggr ? DX_REG : CX_REG;
26722       else
26723         {
26724           regno = AX_REG;
26725           if (aggr)
26726             {
26727               regno = DX_REG;
26728               if (nregs == 1)
26729                 return gen_rtx_MEM (SImode,
26730                                     plus_constant (stack_pointer_rtx, 4));
26731             }
26732         }
26733       return gen_rtx_REG (SImode, regno);
26734     }
26735 
26736   return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, aggr ? 8 : 4));
26737 }
26738 
26739 /* Determine whether x86_output_mi_thunk can succeed.  */
26740 
26741 static bool
26742 x86_can_output_mi_thunk (const_tree thunk ATTRIBUTE_UNUSED,
26743                          HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
26744                          HOST_WIDE_INT vcall_offset, const_tree function)
26745 {
26746   /* 64-bit can handle anything.  */
26747   if (TARGET_64BIT)
26748     return true;
26749 
26750   /* For 32-bit, everything's fine if we have one free register.  */
26751   if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
26752     return true;
26753 
26754   /* Need a free register for vcall_offset.  */
26755   if (vcall_offset)
26756     return false;
26757 
26758   /* Need a free register for GOT references.  */
26759   if (flag_pic && !(*targetm.binds_local_p) (function))
26760     return false;
26761 
26762   /* Otherwise ok.  */
26763   return true;
26764 }
26765 
26766 /* Output the assembler code for a thunk function.  THUNK_DECL is the
26767    declaration for the thunk function itself, FUNCTION is the decl for
26768    the target function.  DELTA is an immediate constant offset to be
26769    added to THIS.  If VCALL_OFFSET is nonzero, the word at
26770    *(*this + vcall_offset) should be added to THIS.  */
26771 
26772 static void
26773 x86_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED,
26774                      tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
26775                      HOST_WIDE_INT vcall_offset, tree function)
26776 {
26777   rtx xops[3];
26778   rtx this_param = x86_this_parameter (function);
26779   rtx this_reg, tmp;
26780 
26781   /* If VCALL_OFFSET, we'll need THIS in a register.  Might as well
26782      pull it in now and let DELTA benefit.  */
26783   if (REG_P (this_param))
26784     this_reg = this_param;
26785   else if (vcall_offset)
26786     {
26787       /* Put the this parameter into %eax.  */
26788       xops[0] = this_param;
26789       xops[1] = this_reg = gen_rtx_REG (Pmode, AX_REG);
26790       output_asm_insn ("mov%z1\t{%0, %1|%1, %0}", xops);
26791     }
26792   else
26793     this_reg = NULL_RTX;
26794 
26795   /* Adjust the this parameter by a fixed constant.  */
26796   if (delta)
26797     {
26798       xops[0] = GEN_INT (delta);
26799       xops[1] = this_reg ? this_reg : this_param;
26800       if (TARGET_64BIT)
26801         {
26802           if (!x86_64_general_operand (xops[0], DImode))
26803             {
26804               tmp = gen_rtx_REG (DImode, R10_REG);
26805               xops[1] = tmp;
26806               output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
26807               xops[0] = tmp;
26808               xops[1] = this_param;
26809             }
26810           output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
26811         }
26812       else
26813         output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
26814     }
26815 
26816   /* Adjust the this parameter by a value stored in the vtable.  */
26817   if (vcall_offset)
26818     {
26819       if (TARGET_64BIT)
26820         tmp = gen_rtx_REG (DImode, R10_REG);
26821       else
26822         {
26823           int tmp_regno = CX_REG;
26824           if (lookup_attribute ("fastcall",
26825                                 TYPE_ATTRIBUTES (TREE_TYPE (function))))
26826             tmp_regno = AX_REG;
26827           tmp = gen_rtx_REG (SImode, tmp_regno);
26828         }
26829 
26830       xops[0] = gen_rtx_MEM (Pmode, this_reg);
26831       xops[1] = tmp;
26832       output_asm_insn ("mov%z1\t{%0, %1|%1, %0}", xops);
26833 
26834       /* Adjust the this parameter.  */
26835       xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset));
26836       if (TARGET_64BIT && !memory_operand (xops[0], Pmode))
26837         {
26838           rtx tmp2 = gen_rtx_REG (DImode, R11_REG);
26839           xops[0] = GEN_INT (vcall_offset);
26840           xops[1] = tmp2;
26841           output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
26842           xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2));
26843         }
26844       xops[1] = this_reg;
26845       output_asm_insn ("add%z1\t{%0, %1|%1, %0}", xops);
26846     }
26847 
26848   /* If necessary, drop THIS back to its stack slot.  */
26849   if (this_reg && this_reg != this_param)
26850     {
26851       xops[0] = this_reg;
26852       xops[1] = this_param;
26853       output_asm_insn ("mov%z1\t{%0, %1|%1, %0}", xops);
26854     }
26855 
26856   xops[0] = XEXP (DECL_RTL (function), 0);
26857   if (TARGET_64BIT)
26858     {
26859       if (!flag_pic || (*targetm.binds_local_p) (function))
26860         output_asm_insn ("jmp\t%P0", xops);
26861       /* All thunks should be in the same object as their target,
26862          and thus binds_local_p should be true.  */
26863       else if (TARGET_64BIT && cfun->machine->call_abi == MS_ABI)
26864         gcc_unreachable ();
26865       else
26866         {
26867           tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, xops[0]), UNSPEC_GOTPCREL);
26868           tmp = gen_rtx_CONST (Pmode, tmp);
26869           tmp = gen_rtx_MEM (QImode, tmp);
26870           xops[0] = tmp;
26871           output_asm_insn ("jmp\t%A0", xops);
26872         }
26873     }
26874   else
26875     {
26876       if (!flag_pic || (*targetm.binds_local_p) (function))
26877         output_asm_insn ("jmp\t%P0", xops);
26878       else
26879 #if TARGET_MACHO
26880         if (TARGET_MACHO)
26881           {
26882             rtx sym_ref = XEXP (DECL_RTL (function), 0);
26883             tmp = (gen_rtx_SYMBOL_REF
26884                    (Pmode,
26885                     machopic_indirection_name (sym_ref, /*stub_p=*/true)));
26886             tmp = gen_rtx_MEM (QImode, tmp);
26887             xops[0] = tmp;
26888             output_asm_insn ("jmp\t%0", xops);
26889           }
26890         else
26891 #endif /* TARGET_MACHO */
26892         {
26893           tmp = gen_rtx_REG (SImode, CX_REG);
26894           output_set_got (tmp, NULL_RTX);
26895 
26896           xops[1] = tmp;
26897           output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops);
26898           output_asm_insn ("jmp\t{*}%1", xops);
26899         }
26900     }
26901 }
26902 
26903 static void
26904 x86_file_start (void)
26905 {
26906   default_file_start ();
26907 #if TARGET_MACHO
26908   darwin_file_start ();
26909 #endif
26910   if (X86_FILE_START_VERSION_DIRECTIVE)
26911     fputs ("\t.version\t\"01.01\"\n", asm_out_file);
26912   if (X86_FILE_START_FLTUSED)
26913     fputs ("\t.global\t__fltused\n", asm_out_file);
26914   if (ix86_asm_dialect == ASM_INTEL)
26915     fputs ("\t.intel_syntax noprefix\n", asm_out_file);
26916 }
26917 
26918 int
26919 x86_field_alignment (tree field, int computed)
26920 {
26921   enum machine_mode mode;
26922   tree type = TREE_TYPE (field);
26923 
26924   if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
26925     return computed;
26926   mode = TYPE_MODE (strip_array_types (type));
26927   if (mode == DFmode || mode == DCmode
26928       || GET_MODE_CLASS (mode) == MODE_INT
26929       || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
26930     return MIN (32, computed);
26931   return computed;
26932 }
26933 
26934 /* Output assembler code to FILE to increment profiler label # LABELNO
26935    for profiling a function entry.  */
26936 void
26937 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
26938 {
26939   if (TARGET_64BIT)
26940     {
26941 #ifndef NO_PROFILE_COUNTERS
26942       fprintf (file, "\tleaq\t%sP%d(%%rip),%%r11\n", LPREFIX, labelno);
26943 #endif
26944 
26945       if (DEFAULT_ABI == SYSV_ABI && flag_pic)
26946         fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME);
26947       else
26948         fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
26949     }
26950   else if (flag_pic)
26951     {
26952 #ifndef NO_PROFILE_COUNTERS
26953       fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n",
26954                LPREFIX, labelno, PROFILE_COUNT_REGISTER);
26955 #endif
26956       fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME);
26957     }
26958   else
26959     {
26960 #ifndef NO_PROFILE_COUNTERS
26961       fprintf (file, "\tmovl\t$%sP%d,%%%s\n", LPREFIX, labelno,
26962                PROFILE_COUNT_REGISTER);
26963 #endif
26964       fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
26965     }
26966 }
26967 
26968 /* We don't have exact information about the insn sizes, but we may assume
26969    quite safely that we are informed about all 1 byte insns and memory
26970    address sizes.  This is enough to eliminate unnecessary padding in
26971    99% of cases.  */
26972 
26973 static int
26974 min_insn_size (rtx insn)
26975 {
26976   int l = 0;
26977 
26978   if (!INSN_P (insn) || !active_insn_p (insn))
26979     return 0;
26980 
26981   /* Discard alignments we've emit and jump instructions.  */
26982   if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
26983       && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
26984     return 0;
26985   if (JUMP_P (insn)
26986       && (GET_CODE (PATTERN (insn)) == ADDR_VEC
26987           || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC))
26988     return 0;
26989 
26990   /* Important case - calls are always 5 bytes.
26991      It is common to have many calls in the row.  */
26992   if (CALL_P (insn)
26993       && symbolic_reference_mentioned_p (PATTERN (insn))
26994       && !SIBLING_CALL_P (insn))
26995     return 5;
26996   if (get_attr_length (insn) <= 1)
26997     return 1;
26998 
26999   /* For normal instructions we may rely on the sizes of addresses
27000      and the presence of symbol to require 4 bytes of encoding.
27001      This is not the case for jumps where references are PC relative.  */
27002   if (!JUMP_P (insn))
27003     {
27004       l = get_attr_length_address (insn);
27005       if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
27006         l = 4;
27007     }
27008   if (l)
27009     return 1+l;
27010   else
27011     return 2;
27012 }
27013 
27014 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
27015    window.  */
27016 
27017 static void
27018 ix86_avoid_jump_misspredicts (void)
27019 {
27020   rtx insn, start = get_insns ();
27021   int nbytes = 0, njumps = 0;
27022   int isjump = 0;
27023 
27024   /* Look for all minimal intervals of instructions containing 4 jumps.
27025      The intervals are bounded by START and INSN.  NBYTES is the total
27026      size of instructions in the interval including INSN and not including
27027      START.  When the NBYTES is smaller than 16 bytes, it is possible
27028      that the end of START and INSN ends up in the same 16byte page.
27029 
27030      The smallest offset in the page INSN can start is the case where START
27031      ends on the offset 0.  Offset of INSN is then NBYTES - sizeof (INSN).
27032      We add p2align to 16byte window with maxskip 17 - NBYTES + sizeof (INSN).
27033      */
27034   for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
27035     {
27036 
27037       nbytes += min_insn_size (insn);
27038       if (dump_file)
27039         fprintf(dump_file, "Insn %i estimated to %i bytes\n",
27040                 INSN_UID (insn), min_insn_size (insn));
27041       if ((JUMP_P (insn)
27042            && GET_CODE (PATTERN (insn)) != ADDR_VEC
27043            && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
27044           || CALL_P (insn))
27045         njumps++;
27046       else
27047         continue;
27048 
27049       while (njumps > 3)
27050         {
27051           start = NEXT_INSN (start);
27052           if ((JUMP_P (start)
27053                && GET_CODE (PATTERN (start)) != ADDR_VEC
27054                && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
27055               || CALL_P (start))
27056             njumps--, isjump = 1;
27057           else
27058             isjump = 0;
27059           nbytes -= min_insn_size (start);
27060         }
27061       gcc_assert (njumps >= 0);
27062       if (dump_file)
27063         fprintf (dump_file, "Interval %i to %i has %i bytes\n",
27064                 INSN_UID (start), INSN_UID (insn), nbytes);
27065 
27066       if (njumps == 3 && isjump && nbytes < 16)
27067         {
27068           int padsize = 15 - nbytes + min_insn_size (insn);
27069 
27070           if (dump_file)
27071             fprintf (dump_file, "Padding insn %i by %i bytes!\n",
27072                      INSN_UID (insn), padsize);
27073           emit_insn_before (gen_align (GEN_INT (padsize)), insn);
27074         }
27075     }
27076 }
27077 
27078 /* AMD Athlon works faster
27079    when RET is not destination of conditional jump or directly preceded
27080    by other jump instruction.  We avoid the penalty by inserting NOP just
27081    before the RET instructions in such cases.  */
27082 static void
27083 ix86_pad_returns (void)
27084 {
27085   edge e;
27086   edge_iterator ei;
27087 
27088   FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
27089     {
27090       basic_block bb = e->src;
27091       rtx ret = BB_END (bb);
27092       rtx prev;
27093       bool replace = false;
27094 
27095       if (!JUMP_P (ret) || GET_CODE (PATTERN (ret)) != RETURN
27096           || optimize_bb_for_size_p (bb))
27097         continue;
27098       for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
27099         if (active_insn_p (prev) || LABEL_P (prev))
27100           break;
27101       if (prev && LABEL_P (prev))
27102         {
27103           edge e;
27104           edge_iterator ei;
27105 
27106           FOR_EACH_EDGE (e, ei, bb->preds)
27107             if (EDGE_FREQUENCY (e) && e->src->index >= 0
27108                 && !(e->flags & EDGE_FALLTHRU))
27109               replace = true;
27110         }
27111       if (!replace)
27112         {
27113           prev = prev_active_insn (ret);
27114           if (prev
27115               && ((JUMP_P (prev) && any_condjump_p (prev))
27116                   || CALL_P (prev)))
27117             replace = true;
27118           /* Empty functions get branch mispredict even when the jump destination
27119              is not visible to us.  */
27120           if (!prev && cfun->function_frequency > FUNCTION_FREQUENCY_UNLIKELY_EXECUTED)
27121             replace = true;
27122         }
27123       if (replace)
27124         {
27125           emit_insn_before (gen_return_internal_long (), ret);
27126           delete_insn (ret);
27127         }
27128     }
27129 }
27130 
27131 /* Implement machine specific optimizations.  We implement padding of returns
27132    for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window.  */
27133 static void
27134 ix86_reorg (void)
27135 {
27136   if (TARGET_PAD_RETURNS && optimize
27137       && optimize_function_for_speed_p (cfun))
27138     ix86_pad_returns ();
27139   if (TARGET_FOUR_JUMP_LIMIT && optimize
27140       && optimize_function_for_speed_p (cfun))
27141     ix86_avoid_jump_misspredicts ();
27142 }
27143 
27144 /* Return nonzero when QImode register that must be represented via REX prefix
27145    is used.  */
27146 bool
27147 x86_extended_QIreg_mentioned_p (rtx insn)
27148 {
27149   int i;
27150   extract_insn_cached (insn);
27151   for (i = 0; i < recog_data.n_operands; i++)
27152     if (REG_P (recog_data.operand[i])
27153         && REGNO (recog_data.operand[i]) > BX_REG)
27154        return true;
27155   return false;
27156 }
27157 
27158 /* Return nonzero when P points to register encoded via REX prefix.
27159    Called via for_each_rtx.  */
27160 static int
27161 extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
27162 {
27163    unsigned int regno;
27164    if (!REG_P (*p))
27165      return 0;
27166    regno = REGNO (*p);
27167    return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
27168 }
27169 
27170 /* Return true when INSN mentions register that must be encoded using REX
27171    prefix.  */
27172 bool
27173 x86_extended_reg_mentioned_p (rtx insn)
27174 {
27175   return for_each_rtx (INSN_P (insn) ? &PATTERN (insn) : &insn,
27176                        extended_reg_mentioned_1, NULL);
27177 }
27178 
27179 /* Generate an unsigned DImode/SImode to FP conversion.  This is the same code
27180    optabs would emit if we didn't have TFmode patterns.  */
27181 
27182 void
27183 x86_emit_floatuns (rtx operands[2])
27184 {
27185   rtx neglab, donelab, i0, i1, f0, in, out;
27186   enum machine_mode mode, inmode;
27187 
27188   inmode = GET_MODE (operands[1]);
27189   gcc_assert (inmode == SImode || inmode == DImode);
27190 
27191   out = operands[0];
27192   in = force_reg (inmode, operands[1]);
27193   mode = GET_MODE (out);
27194   neglab = gen_label_rtx ();
27195   donelab = gen_label_rtx ();
27196   f0 = gen_reg_rtx (mode);
27197 
27198   emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, inmode, 0, neglab);
27199 
27200   expand_float (out, in, 0);
27201 
27202   emit_jump_insn (gen_jump (donelab));
27203   emit_barrier ();
27204 
27205   emit_label (neglab);
27206 
27207   i0 = expand_simple_binop (inmode, LSHIFTRT, in, const1_rtx, NULL,
27208                             1, OPTAB_DIRECT);
27209   i1 = expand_simple_binop (inmode, AND, in, const1_rtx, NULL,
27210                             1, OPTAB_DIRECT);
27211   i0 = expand_simple_binop (inmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
27212 
27213   expand_float (f0, i0, 0);
27214 
27215   emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
27216 
27217   emit_label (donelab);
27218 }
27219 
27220 /* A subroutine of ix86_expand_vector_init.  Store into TARGET a vector
27221    with all elements equal to VAR.  Return true if successful.  */
27222 
27223 static bool
27224 ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode,
27225                                    rtx target, rtx val)
27226 {
27227   enum machine_mode hmode, smode, wsmode, wvmode;
27228   rtx x;
27229 
27230   switch (mode)
27231     {
27232     case V2SImode:
27233     case V2SFmode:
27234       if (!mmx_ok)
27235         return false;
27236       /* FALLTHRU */
27237 
27238     case V2DFmode:
27239     case V2DImode:
27240     case V4SFmode:
27241     case V4SImode:
27242       val = force_reg (GET_MODE_INNER (mode), val);
27243       x = gen_rtx_VEC_DUPLICATE (mode, val);
27244       emit_insn (gen_rtx_SET (VOIDmode, target, x));
27245       return true;
27246 
27247     case V4HImode:
27248       if (!mmx_ok)
27249         return false;
27250       if (TARGET_SSE || TARGET_3DNOW_A)
27251         {
27252           val = gen_lowpart (SImode, val);
27253           x = gen_rtx_TRUNCATE (HImode, val);
27254           x = gen_rtx_VEC_DUPLICATE (mode, x);
27255           emit_insn (gen_rtx_SET (VOIDmode, target, x));
27256           return true;
27257         }
27258       else
27259         {
27260           smode = HImode;
27261           wsmode = SImode;
27262           wvmode = V2SImode;
27263           goto widen;
27264         }
27265 
27266     case V8QImode:
27267       if (!mmx_ok)
27268         return false;
27269       smode = QImode;
27270       wsmode = HImode;
27271       wvmode = V4HImode;
27272       goto widen;
27273     case V8HImode:
27274       if (TARGET_SSE2)
27275         {
27276           rtx tmp1, tmp2;
27277           /* Extend HImode to SImode using a paradoxical SUBREG.  */
27278           tmp1 = gen_reg_rtx (SImode);
27279           emit_move_insn (tmp1, gen_lowpart (SImode, val));
27280           /* Insert the SImode value as low element of V4SImode vector. */
27281           tmp2 = gen_reg_rtx (V4SImode);
27282           tmp1 = gen_rtx_VEC_MERGE (V4SImode,
27283                                     gen_rtx_VEC_DUPLICATE (V4SImode, tmp1),
27284                                     CONST0_RTX (V4SImode),
27285                                     const1_rtx);
27286           emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1));
27287           /* Cast the V4SImode vector back to a V8HImode vector.  */
27288           tmp1 = gen_reg_rtx (V8HImode);
27289           emit_move_insn (tmp1, gen_lowpart (V8HImode, tmp2));
27290           /* Duplicate the low short through the whole low SImode word.  */
27291           emit_insn (gen_sse2_punpcklwd (tmp1, tmp1, tmp1));
27292           /* Cast the V8HImode vector back to a V4SImode vector.  */
27293           tmp2 = gen_reg_rtx (V4SImode);
27294           emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1));
27295           /* Replicate the low element of the V4SImode vector.  */
27296           emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx));
27297           /* Cast the V2SImode back to V8HImode, and store in target.  */
27298           emit_move_insn (target, gen_lowpart (V8HImode, tmp2));
27299           return true;
27300         }
27301       smode = HImode;
27302       wsmode = SImode;
27303       wvmode = V4SImode;
27304       goto widen;
27305     case V16QImode:
27306       if (TARGET_SSE2)
27307         {
27308           rtx tmp1, tmp2;
27309           /* Extend QImode to SImode using a paradoxical SUBREG.  */
27310           tmp1 = gen_reg_rtx (SImode);
27311           emit_move_insn (tmp1, gen_lowpart (SImode, val));
27312           /* Insert the SImode value as low element of V4SImode vector. */
27313           tmp2 = gen_reg_rtx (V4SImode);
27314           tmp1 = gen_rtx_VEC_MERGE (V4SImode,
27315                                     gen_rtx_VEC_DUPLICATE (V4SImode, tmp1),
27316                                     CONST0_RTX (V4SImode),
27317                                     const1_rtx);
27318           emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1));
27319           /* Cast the V4SImode vector back to a V16QImode vector.  */
27320           tmp1 = gen_reg_rtx (V16QImode);
27321           emit_move_insn (tmp1, gen_lowpart (V16QImode, tmp2));
27322           /* Duplicate the low byte through the whole low SImode word.  */
27323           emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1));
27324           emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1));
27325           /* Cast the V16QImode vector back to a V4SImode vector.  */
27326           tmp2 = gen_reg_rtx (V4SImode);
27327           emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1));
27328           /* Replicate the low element of the V4SImode vector.  */
27329           emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx));
27330           /* Cast the V2SImode back to V16QImode, and store in target.  */
27331           emit_move_insn (target, gen_lowpart (V16QImode, tmp2));
27332           return true;
27333         }
27334       smode = QImode;
27335       wsmode = HImode;
27336       wvmode = V8HImode;
27337       goto widen;
27338     widen:
27339       /* Replicate the value once into the next wider mode and recurse.  */
27340       val = convert_modes (wsmode, smode, val, true);
27341       x = expand_simple_binop (wsmode, ASHIFT, val,
27342                                GEN_INT (GET_MODE_BITSIZE (smode)),
27343                                NULL_RTX, 1, OPTAB_LIB_WIDEN);
27344       val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
27345 
27346       x = gen_reg_rtx (wvmode);
27347       if (!ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val))
27348         gcc_unreachable ();
27349       emit_move_insn (target, gen_lowpart (mode, x));
27350       return true;
27351 
27352     case V4DFmode:
27353       hmode = V2DFmode;
27354       goto half;
27355     case V4DImode:
27356       hmode = V2DImode;
27357       goto half;
27358     case V8SFmode:
27359       hmode = V4SFmode;
27360       goto half;
27361     case V8SImode:
27362       hmode = V4SImode;
27363       goto half;
27364     case V16HImode:
27365       hmode = V8HImode;
27366       goto half;
27367     case V32QImode:
27368       hmode = V16QImode;
27369       goto half;
27370 half:
27371       {
27372         rtx tmp = gen_reg_rtx (hmode);
27373         ix86_expand_vector_init_duplicate (mmx_ok, hmode, tmp, val);
27374         emit_insn (gen_rtx_SET (VOIDmode, target,
27375                                 gen_rtx_VEC_CONCAT (mode, tmp, tmp)));
27376       }
27377       return true;
27378 
27379     default:
27380       return false;
27381     }
27382 }
27383 
27384 /* A subroutine of ix86_expand_vector_init.  Store into TARGET a vector
27385    whose ONE_VAR element is VAR, and other elements are zero.  Return true
27386    if successful.  */
27387 
27388 static bool
27389 ix86_expand_vector_init_one_nonzero (bool mmx_ok, enum machine_mode mode,
27390                                      rtx target, rtx var, int one_var)
27391 {
27392   enum machine_mode vsimode;
27393   rtx new_target;
27394   rtx x, tmp;
27395   bool use_vector_set = false;
27396 
27397   switch (mode)
27398     {
27399     case V2DImode:
27400       /* For SSE4.1, we normally use vector set.  But if the second
27401          element is zero and inter-unit moves are OK, we use movq
27402          instead.  */
27403       use_vector_set = (TARGET_64BIT
27404                         && TARGET_SSE4_1
27405                         && !(TARGET_INTER_UNIT_MOVES
27406                              && one_var == 0));
27407       break;
27408     case V16QImode:
27409     case V4SImode:
27410     case V4SFmode:
27411       use_vector_set = TARGET_SSE4_1;
27412       break;
27413     case V8HImode:
27414       use_vector_set = TARGET_SSE2;
27415       break;
27416     case V4HImode:
27417       use_vector_set = TARGET_SSE || TARGET_3DNOW_A;
27418       break;
27419     case V32QImode:
27420     case V16HImode:
27421     case V8SImode:
27422     case V8SFmode:
27423     case V4DFmode:
27424       use_vector_set = TARGET_AVX;
27425       break;
27426     case V4DImode:
27427       /* Use ix86_expand_vector_set in 64bit mode only.  */
27428       use_vector_set = TARGET_AVX && TARGET_64BIT;
27429       break;
27430     default:
27431       break;
27432     }
27433 
27434   if (use_vector_set)
27435     {
27436       emit_insn (gen_rtx_SET (VOIDmode, target, CONST0_RTX (mode)));
27437       var = force_reg (GET_MODE_INNER (mode), var);
27438       ix86_expand_vector_set (mmx_ok, target, var, one_var);
27439       return true; 
27440     }
27441 
27442   switch (mode)
27443     {
27444     case V2SFmode:
27445     case V2SImode:
27446       if (!mmx_ok)
27447         return false;
27448       /* FALLTHRU */
27449 
27450     case V2DFmode:
27451     case V2DImode:
27452       if (one_var != 0)
27453         return false;
27454       var = force_reg (GET_MODE_INNER (mode), var);
27455       x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
27456       emit_insn (gen_rtx_SET (VOIDmode, target, x));
27457       return true;
27458 
27459     case V4SFmode:
27460     case V4SImode:
27461       if (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER)
27462         new_target = gen_reg_rtx (mode);
27463       else
27464         new_target = target;
27465       var = force_reg (GET_MODE_INNER (mode), var);
27466       x = gen_rtx_VEC_DUPLICATE (mode, var);
27467       x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
27468       emit_insn (gen_rtx_SET (VOIDmode, new_target, x));
27469       if (one_var != 0)
27470         {
27471           /* We need to shuffle the value to the correct position, so
27472              create a new pseudo to store the intermediate result.  */
27473 
27474           /* With SSE2, we can use the integer shuffle insns.  */
27475           if (mode != V4SFmode && TARGET_SSE2)
27476             {
27477               emit_insn (gen_sse2_pshufd_1 (new_target, new_target,
27478                                             GEN_INT (1),
27479                                             GEN_INT (one_var == 1 ? 0 : 1),
27480                                             GEN_INT (one_var == 2 ? 0 : 1),
27481                                             GEN_INT (one_var == 3 ? 0 : 1)));
27482               if (target != new_target)
27483                 emit_move_insn (target, new_target);
27484               return true;
27485             }
27486 
27487           /* Otherwise convert the intermediate result to V4SFmode and
27488              use the SSE1 shuffle instructions.  */
27489           if (mode != V4SFmode)
27490             {
27491               tmp = gen_reg_rtx (V4SFmode);
27492               emit_move_insn (tmp, gen_lowpart (V4SFmode, new_target));
27493             }
27494           else
27495             tmp = new_target;
27496 
27497           emit_insn (gen_sse_shufps_v4sf (tmp, tmp, tmp,
27498                                        GEN_INT (1),
27499                                        GEN_INT (one_var == 1 ? 0 : 1),
27500                                        GEN_INT (one_var == 2 ? 0+4 : 1+4),
27501                                        GEN_INT (one_var == 3 ? 0+4 : 1+4)));
27502 
27503           if (mode != V4SFmode)
27504             emit_move_insn (target, gen_lowpart (V4SImode, tmp));
27505           else if (tmp != target)
27506             emit_move_insn (target, tmp);
27507         }
27508       else if (target != new_target)
27509         emit_move_insn (target, new_target);
27510       return true;
27511 
27512     case V8HImode:
27513     case V16QImode:
27514       vsimode = V4SImode;
27515       goto widen;
27516     case V4HImode:
27517     case V8QImode:
27518       if (!mmx_ok)
27519         return false;
27520       vsimode = V2SImode;
27521       goto widen;
27522     widen:
27523       if (one_var != 0)
27524         return false;
27525 
27526       /* Zero extend the variable element to SImode and recurse.  */
27527       var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
27528 
27529       x = gen_reg_rtx (vsimode);
27530       if (!ix86_expand_vector_init_one_nonzero (mmx_ok, vsimode, x,
27531                                                 var, one_var))
27532         gcc_unreachable ();
27533 
27534       emit_move_insn (target, gen_lowpart (mode, x));
27535       return true;
27536 
27537     default:
27538       return false;
27539     }
27540 }
27541 
27542 /* A subroutine of ix86_expand_vector_init.  Store into TARGET a vector
27543    consisting of the values in VALS.  It is known that all elements
27544    except ONE_VAR are constants.  Return true if successful.  */
27545 
27546 static bool
27547 ix86_expand_vector_init_one_var (bool mmx_ok, enum machine_mode mode,
27548                                  rtx target, rtx vals, int one_var)
27549 {
27550   rtx var = XVECEXP (vals, 0, one_var);
27551   enum machine_mode wmode;
27552   rtx const_vec, x;
27553 
27554   const_vec = copy_rtx (vals);
27555   XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
27556   const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0));
27557 
27558   switch (mode)
27559     {
27560     case V2DFmode:
27561     case V2DImode:
27562     case V2SFmode:
27563     case V2SImode:
27564       /* For the two element vectors, it's just as easy to use
27565          the general case.  */
27566       return false;
27567 
27568     case V4DImode:
27569       /* Use ix86_expand_vector_set in 64bit mode only.  */
27570       if (!TARGET_64BIT)
27571         return false;
27572     case V4DFmode:
27573     case V8SFmode:
27574     case V8SImode:
27575     case V16HImode:
27576     case V32QImode:
27577     case V4SFmode:
27578     case V4SImode:
27579     case V8HImode:
27580     case V4HImode:
27581       break;
27582 
27583     case V16QImode:
27584       if (TARGET_SSE4_1)
27585         break;
27586       wmode = V8HImode;
27587       goto widen;
27588     case V8QImode:
27589       wmode = V4HImode;
27590       goto widen;
27591     widen:
27592       /* There's no way to set one QImode entry easily.  Combine
27593          the variable value with its adjacent constant value, and
27594          promote to an HImode set.  */
27595       x = XVECEXP (vals, 0, one_var ^ 1);
27596       if (one_var & 1)
27597         {
27598           var = convert_modes (HImode, QImode, var, true);
27599           var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
27600                                      NULL_RTX, 1, OPTAB_LIB_WIDEN);
27601           x = GEN_INT (INTVAL (x) & 0xff);
27602         }
27603       else
27604         {
27605           var = convert_modes (HImode, QImode, var, true);
27606           x = gen_int_mode (INTVAL (x) << 8, HImode);
27607         }
27608       if (x != const0_rtx)
27609         var = expand_simple_binop (HImode, IOR, var, x, var,
27610                                    1, OPTAB_LIB_WIDEN);
27611 
27612       x = gen_reg_rtx (wmode);
27613       emit_move_insn (x, gen_lowpart (wmode, const_vec));
27614       ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1);
27615 
27616       emit_move_insn (target, gen_lowpart (mode, x));
27617       return true;
27618 
27619     default:
27620       return false;
27621     }
27622 
27623   emit_move_insn (target, const_vec);
27624   ix86_expand_vector_set (mmx_ok, target, var, one_var);
27625   return true;
27626 }
27627 
27628 /* A subroutine of ix86_expand_vector_init_general.  Use vector
27629    concatenate to handle the most general case: all values variable,
27630    and none identical.  */
27631 
27632 static void
27633 ix86_expand_vector_init_concat (enum machine_mode mode,
27634                                 rtx target, rtx *ops, int n)
27635 {
27636   enum machine_mode cmode, hmode = VOIDmode;
27637   rtx first[8], second[4];
27638   rtvec v;
27639   int i, j;
27640 
27641   switch (n)
27642     {
27643     case 2:
27644       switch (mode)
27645         {
27646         case V8SImode:
27647           cmode = V4SImode;
27648           break;
27649         case V8SFmode:
27650           cmode = V4SFmode;
27651           break;
27652         case V4DImode:
27653           cmode = V2DImode;
27654           break;
27655         case V4DFmode:
27656           cmode = V2DFmode;
27657           break;
27658         case V4SImode:
27659           cmode = V2SImode;
27660           break;
27661         case V4SFmode:
27662           cmode = V2SFmode;
27663           break;
27664         case V2DImode:
27665           cmode = DImode;
27666           break;
27667         case V2SImode:
27668           cmode = SImode;
27669           break;
27670         case V2DFmode:
27671           cmode = DFmode;
27672           break;
27673         case V2SFmode:
27674           cmode = SFmode;
27675           break;
27676         default:
27677           gcc_unreachable ();
27678         }
27679 
27680       if (!register_operand (ops[1], cmode))
27681         ops[1] = force_reg (cmode, ops[1]);
27682       if (!register_operand (ops[0], cmode))
27683         ops[0] = force_reg (cmode, ops[0]);
27684       emit_insn (gen_rtx_SET (VOIDmode, target,
27685                               gen_rtx_VEC_CONCAT (mode, ops[0],
27686                                                   ops[1])));
27687       break;
27688 
27689     case 4:
27690       switch (mode)
27691         {
27692         case V4DImode:
27693           cmode = V2DImode;
27694           break;
27695         case V4DFmode:
27696           cmode = V2DFmode;
27697           break;
27698         case V4SImode:
27699           cmode = V2SImode;
27700           break;
27701         case V4SFmode:
27702           cmode = V2SFmode;
27703           break;
27704         default:
27705           gcc_unreachable ();
27706         }
27707       goto half;
27708 
27709     case 8:
27710       switch (mode)
27711         {
27712         case V8SImode:
27713           cmode = V2SImode;
27714           hmode = V4SImode;
27715           break;
27716         case V8SFmode:
27717           cmode = V2SFmode;
27718           hmode = V4SFmode;
27719           break;
27720         default:
27721           gcc_unreachable ();
27722         }
27723       goto half;
27724 
27725 half:
27726       /* FIXME: We process inputs backward to help RA.  PR 36222.  */
27727       i = n - 1;
27728       j = (n >> 1) - 1;
27729       for (; i > 0; i -= 2, j--)
27730         {
27731           first[j] = gen_reg_rtx (cmode);
27732           v = gen_rtvec (2, ops[i - 1], ops[i]);
27733           ix86_expand_vector_init (false, first[j],
27734                                    gen_rtx_PARALLEL (cmode, v));
27735         }
27736 
27737       n >>= 1;
27738       if (n > 2)
27739         {
27740           gcc_assert (hmode != VOIDmode);
27741           for (i = j = 0; i < n; i += 2, j++)
27742             {
27743               second[j] = gen_reg_rtx (hmode);
27744               ix86_expand_vector_init_concat (hmode, second [j],
27745                                               &first [i], 2);
27746             }
27747           n >>= 1;
27748           ix86_expand_vector_init_concat (mode, target, second, n);
27749         }
27750       else
27751         ix86_expand_vector_init_concat (mode, target, first, n);
27752       break;
27753 
27754     default:
27755       gcc_unreachable ();
27756     }
27757 }
27758 
27759 /* A subroutine of ix86_expand_vector_init_general.  Use vector
27760    interleave to handle the most general case: all values variable,
27761    and none identical.  */
27762 
27763 static void
27764 ix86_expand_vector_init_interleave (enum machine_mode mode,
27765                                     rtx target, rtx *ops, int n)
27766 {
27767   enum machine_mode first_imode, second_imode, third_imode, inner_mode;
27768   int i, j;
27769   rtx op0, op1;
27770   rtx (*gen_load_even) (rtx, rtx, rtx);
27771   rtx (*gen_interleave_first_low) (rtx, rtx, rtx);
27772   rtx (*gen_interleave_second_low) (rtx, rtx, rtx);
27773   
27774   switch (mode)
27775     {
27776     case V8HImode:
27777       gen_load_even = gen_vec_setv8hi;
27778       gen_interleave_first_low = gen_vec_interleave_lowv4si;
27779       gen_interleave_second_low = gen_vec_interleave_lowv2di;
27780       inner_mode = HImode;
27781       first_imode = V4SImode;
27782       second_imode = V2DImode;
27783       third_imode = VOIDmode;
27784       break;
27785     case V16QImode:
27786       gen_load_even = gen_vec_setv16qi;
27787       gen_interleave_first_low = gen_vec_interleave_lowv8hi;
27788       gen_interleave_second_low = gen_vec_interleave_lowv4si;
27789       inner_mode = QImode;
27790       first_imode = V8HImode;
27791       second_imode = V4SImode;
27792       third_imode = V2DImode;
27793       break;
27794     default:
27795       gcc_unreachable ();
27796     }
27797      
27798   for (i = 0; i < n; i++)
27799     {
27800       /* Extend the odd elment to SImode using a paradoxical SUBREG.  */
27801       op0 = gen_reg_rtx (SImode);
27802       emit_move_insn (op0, gen_lowpart (SImode, ops [i + i]));
27803 
27804       /* Insert the SImode value as low element of V4SImode vector. */
27805       op1 = gen_reg_rtx (V4SImode);
27806       op0 = gen_rtx_VEC_MERGE (V4SImode,
27807                                gen_rtx_VEC_DUPLICATE (V4SImode,
27808                                                       op0),
27809                                CONST0_RTX (V4SImode),
27810                                const1_rtx);
27811       emit_insn (gen_rtx_SET (VOIDmode, op1, op0));
27812 
27813       /* Cast the V4SImode vector back to a vector in orignal mode.  */
27814       op0 = gen_reg_rtx (mode);
27815       emit_move_insn (op0, gen_lowpart (mode, op1));
27816       
27817       /* Load even elements into the second positon.  */
27818       emit_insn ((*gen_load_even) (op0,
27819                                    force_reg (inner_mode,
27820                                               ops [i + i + 1]),
27821                                    const1_rtx));
27822 
27823       /* Cast vector to FIRST_IMODE vector.  */
27824       ops[i] = gen_reg_rtx (first_imode);
27825       emit_move_insn (ops[i], gen_lowpart (first_imode, op0));
27826     }
27827 
27828   /* Interleave low FIRST_IMODE vectors.  */
27829   for (i = j = 0; i < n; i += 2, j++)
27830     {
27831       op0 = gen_reg_rtx (first_imode);
27832       emit_insn ((*gen_interleave_first_low) (op0, ops[i], ops[i + 1]));
27833 
27834       /* Cast FIRST_IMODE vector to SECOND_IMODE vector.  */
27835       ops[j] = gen_reg_rtx (second_imode);
27836       emit_move_insn (ops[j], gen_lowpart (second_imode, op0));
27837     }
27838 
27839   /* Interleave low SECOND_IMODE vectors.  */
27840   switch (second_imode)
27841     {
27842     case V4SImode:
27843       for (i = j = 0; i < n / 2; i += 2, j++)
27844         {
27845           op0 = gen_reg_rtx (second_imode);
27846           emit_insn ((*gen_interleave_second_low) (op0, ops[i],
27847                                                    ops[i + 1]));
27848 
27849           /* Cast the SECOND_IMODE vector to the THIRD_IMODE
27850              vector.  */
27851           ops[j] = gen_reg_rtx (third_imode);
27852           emit_move_insn (ops[j], gen_lowpart (third_imode, op0));
27853         }
27854       second_imode = V2DImode;
27855       gen_interleave_second_low = gen_vec_interleave_lowv2di;
27856       /* FALLTHRU */
27857 
27858     case V2DImode:
27859       op0 = gen_reg_rtx (second_imode);
27860       emit_insn ((*gen_interleave_second_low) (op0, ops[0],
27861                                                ops[1]));
27862 
27863       /* Cast the SECOND_IMODE vector back to a vector on original
27864          mode.  */
27865       emit_insn (gen_rtx_SET (VOIDmode, target,
27866                               gen_lowpart (mode, op0)));
27867       break;
27868 
27869     default:
27870       gcc_unreachable ();
27871     }
27872 }
27873 
27874 /* A subroutine of ix86_expand_vector_init.  Handle the most general case:
27875    all values variable, and none identical.  */
27876 
27877 static void
27878 ix86_expand_vector_init_general (bool mmx_ok, enum machine_mode mode,
27879                                  rtx target, rtx vals)
27880 {
27881   rtx ops[32], op0, op1;
27882   enum machine_mode half_mode = VOIDmode;
27883   int n, i;
27884 
27885   switch (mode)
27886     {
27887     case V2SFmode:
27888     case V2SImode:
27889       if (!mmx_ok && !TARGET_SSE)
27890         break;
27891       /* FALLTHRU */
27892 
27893     case V8SFmode:
27894     case V8SImode:
27895     case V4DFmode:
27896     case V4DImode:
27897     case V4SFmode:
27898     case V4SImode:
27899     case V2DFmode:
27900     case V2DImode:
27901       n = GET_MODE_NUNITS (mode);
27902       for (i = 0; i < n; i++)
27903         ops[i] = XVECEXP (vals, 0, i);
27904       ix86_expand_vector_init_concat (mode, target, ops, n);
27905       return;
27906 
27907     case V32QImode:
27908       half_mode = V16QImode;
27909       goto half;
27910 
27911     case V16HImode:
27912       half_mode = V8HImode;
27913       goto half;
27914 
27915 half:
27916       n = GET_MODE_NUNITS (mode);
27917       for (i = 0; i < n; i++)
27918         ops[i] = XVECEXP (vals, 0, i);
27919       op0 = gen_reg_rtx (half_mode);
27920       op1 = gen_reg_rtx (half_mode);
27921       ix86_expand_vector_init_interleave (half_mode, op0, ops,
27922                                           n >> 2);
27923       ix86_expand_vector_init_interleave (half_mode, op1,
27924                                           &ops [n >> 1], n >> 2);
27925       emit_insn (gen_rtx_SET (VOIDmode, target,
27926                               gen_rtx_VEC_CONCAT (mode, op0, op1)));
27927       return;
27928 
27929     case V16QImode:
27930       if (!TARGET_SSE4_1)
27931         break;
27932       /* FALLTHRU */
27933 
27934     case V8HImode:
27935       if (!TARGET_SSE2)
27936         break;
27937 
27938       /* Don't use ix86_expand_vector_init_interleave if we can't
27939          move from GPR to SSE register directly.  */ 
27940       if (!TARGET_INTER_UNIT_MOVES)
27941         break;
27942 
27943       n = GET_MODE_NUNITS (mode);
27944       for (i = 0; i < n; i++)
27945         ops[i] = XVECEXP (vals, 0, i);
27946       ix86_expand_vector_init_interleave (mode, target, ops, n >> 1);
27947       return;
27948 
27949     case V4HImode:
27950     case V8QImode:
27951       break;
27952 
27953     default:
27954       gcc_unreachable ();
27955     }
27956 
27957     {
27958       int i, j, n_elts, n_words, n_elt_per_word;
27959       enum machine_mode inner_mode;
27960       rtx words[4], shift;
27961 
27962       inner_mode = GET_MODE_INNER (mode);
27963       n_elts = GET_MODE_NUNITS (mode);
27964       n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
27965       n_elt_per_word = n_elts / n_words;
27966       shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
27967 
27968       for (i = 0; i < n_words; ++i)
27969         {
27970           rtx word = NULL_RTX;
27971 
27972           for (j = 0; j < n_elt_per_word; ++j)
27973             {
27974               rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
27975               elt = convert_modes (word_mode, inner_mode, elt, true);
27976 
27977               if (j == 0)
27978                 word = elt;
27979               else
27980                 {
27981                   word = expand_simple_binop (word_mode, ASHIFT, word, shift,
27982                                               word, 1, OPTAB_LIB_WIDEN);
27983                   word = expand_simple_binop (word_mode, IOR, word, elt,
27984                                               word, 1, OPTAB_LIB_WIDEN);
27985                 }
27986             }
27987 
27988           words[i] = word;
27989         }
27990 
27991       if (n_words == 1)
27992         emit_move_insn (target, gen_lowpart (mode, words[0]));
27993       else if (n_words == 2)
27994         {
27995           rtx tmp = gen_reg_rtx (mode);
27996           emit_clobber (tmp);
27997           emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
27998           emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
27999           emit_move_insn (target, tmp);
28000         }
28001       else if (n_words == 4)
28002         {
28003           rtx tmp = gen_reg_rtx (V4SImode);
28004           gcc_assert (word_mode == SImode);
28005           vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
28006           ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
28007           emit_move_insn (target, gen_lowpart (mode, tmp));
28008         }
28009       else
28010         gcc_unreachable ();
28011     }
28012 }
28013 
28014 /* Initialize vector TARGET via VALS.  Suppress the use of MMX
28015    instructions unless MMX_OK is true.  */
28016 
28017 void
28018 ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
28019 {
28020   enum machine_mode mode = GET_MODE (target);
28021   enum machine_mode inner_mode = GET_MODE_INNER (mode);
28022   int n_elts = GET_MODE_NUNITS (mode);
28023   int n_var = 0, one_var = -1;
28024   bool all_same = true, all_const_zero = true;
28025   int i;
28026   rtx x;
28027 
28028   for (i = 0; i < n_elts; ++i)
28029     {
28030       x = XVECEXP (vals, 0, i);
28031       if (!(CONST_INT_P (x)
28032             || GET_CODE (x) == CONST_DOUBLE
28033             || GET_CODE (x) == CONST_FIXED))
28034         n_var++, one_var = i;
28035       else if (x != CONST0_RTX (inner_mode))
28036         all_const_zero = false;
28037       if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
28038         all_same = false;
28039     }
28040 
28041   /* Constants are best loaded from the constant pool.  */
28042   if (n_var == 0)
28043     {
28044       emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
28045       return;
28046     }
28047 
28048   /* If all values are identical, broadcast the value.  */
28049   if (all_same
28050       && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
28051                                             XVECEXP (vals, 0, 0)))
28052     return;
28053 
28054   /* Values where only one field is non-constant are best loaded from
28055      the pool and overwritten via move later.  */
28056   if (n_var == 1)
28057     {
28058       if (all_const_zero
28059           && ix86_expand_vector_init_one_nonzero (mmx_ok, mode, target,
28060                                                   XVECEXP (vals, 0, one_var),
28061                                                   one_var))
28062         return;
28063 
28064       if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
28065         return;
28066     }
28067 
28068   ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
28069 }
28070 
28071 void
28072 ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
28073 {
28074   enum machine_mode mode = GET_MODE (target);
28075   enum machine_mode inner_mode = GET_MODE_INNER (mode);
28076   enum machine_mode half_mode;
28077   bool use_vec_merge = false;
28078   rtx tmp;
28079   static rtx (*gen_extract[6][2]) (rtx, rtx)
28080     = {
28081         { gen_vec_extract_lo_v32qi, gen_vec_extract_hi_v32qi },
28082         { gen_vec_extract_lo_v16hi, gen_vec_extract_hi_v16hi },
28083         { gen_vec_extract_lo_v8si, gen_vec_extract_hi_v8si },
28084         { gen_vec_extract_lo_v4di, gen_vec_extract_hi_v4di },
28085         { gen_vec_extract_lo_v8sf, gen_vec_extract_hi_v8sf },
28086         { gen_vec_extract_lo_v4df, gen_vec_extract_hi_v4df }
28087       };
28088   static rtx (*gen_insert[6][2]) (rtx, rtx, rtx)
28089     = {
28090         { gen_vec_set_lo_v32qi, gen_vec_set_hi_v32qi },
28091         { gen_vec_set_lo_v16hi, gen_vec_set_hi_v16hi },
28092         { gen_vec_set_lo_v8si, gen_vec_set_hi_v8si },
28093         { gen_vec_set_lo_v4di, gen_vec_set_hi_v4di },
28094         { gen_vec_set_lo_v8sf, gen_vec_set_hi_v8sf },
28095         { gen_vec_set_lo_v4df, gen_vec_set_hi_v4df }
28096       };
28097   int i, j, n;
28098 
28099   switch (mode)
28100     {
28101     case V2SFmode:
28102     case V2SImode:
28103       if (mmx_ok)
28104         {
28105           tmp = gen_reg_rtx (GET_MODE_INNER (mode));
28106           ix86_expand_vector_extract (true, tmp, target, 1 - elt);
28107           if (elt == 0)
28108             tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
28109           else
28110             tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
28111           emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
28112           return;
28113         }
28114       break;
28115 
28116     case V2DImode:
28117       use_vec_merge = TARGET_SSE4_1;
28118       if (use_vec_merge)
28119         break;
28120 
28121     case V2DFmode:
28122       {
28123         rtx op0, op1;
28124 
28125         /* For the two element vectors, we implement a VEC_CONCAT with
28126            the extraction of the other element.  */
28127 
28128         tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
28129         tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
28130 
28131         if (elt == 0)
28132           op0 = val, op1 = tmp;
28133         else
28134           op0 = tmp, op1 = val;
28135 
28136         tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
28137         emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
28138       }
28139       return;
28140 
28141     case V4SFmode:
28142       use_vec_merge = TARGET_SSE4_1;
28143       if (use_vec_merge)
28144         break;
28145 
28146       switch (elt)
28147         {
28148         case 0:
28149           use_vec_merge = true;
28150           break;
28151 
28152         case 1:
28153           /* tmp = target = A B C D */
28154           tmp = copy_to_reg (target);
28155           /* target = A A B B */
28156           emit_insn (gen_sse_unpcklps (target, target, target));
28157           /* target = X A B B */
28158           ix86_expand_vector_set (false, target, val, 0);
28159           /* target = A X C D  */
28160           emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
28161                                        GEN_INT (1), GEN_INT (0),
28162                                        GEN_INT (2+4), GEN_INT (3+4)));
28163           return;
28164 
28165         case 2:
28166           /* tmp = target = A B C D */
28167           tmp = copy_to_reg (target);
28168           /* tmp = X B C D */
28169           ix86_expand_vector_set (false, tmp, val, 0);
28170           /* target = A B X D */
28171           emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
28172                                        GEN_INT (0), GEN_INT (1),
28173                                        GEN_INT (0+4), GEN_INT (3+4)));
28174           return;
28175 
28176         case 3:
28177           /* tmp = target = A B C D */
28178           tmp = copy_to_reg (target);
28179           /* tmp = X B C D */
28180           ix86_expand_vector_set (false, tmp, val, 0);
28181           /* target = A B X D */
28182           emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
28183                                        GEN_INT (0), GEN_INT (1),
28184                                        GEN_INT (2+4), GEN_INT (0+4)));
28185           return;
28186 
28187         default:
28188           gcc_unreachable ();
28189         }
28190       break;
28191 
28192     case V4SImode:
28193       use_vec_merge = TARGET_SSE4_1;
28194       if (use_vec_merge)
28195         break;
28196 
28197       /* Element 0 handled by vec_merge below.  */
28198       if (elt == 0)
28199         {
28200           use_vec_merge = true;
28201           break;
28202         }
28203 
28204       if (TARGET_SSE2)
28205         {
28206           /* With SSE2, use integer shuffles to swap element 0 and ELT,
28207              store into element 0, then shuffle them back.  */
28208 
28209           rtx order[4];
28210 
28211           order[0] = GEN_INT (elt);
28212           order[1] = const1_rtx;
28213           order[2] = const2_rtx;
28214           order[3] = GEN_INT (3);
28215           order[elt] = const0_rtx;
28216 
28217           emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
28218                                         order[1], order[2], order[3]));
28219 
28220           ix86_expand_vector_set (false, target, val, 0);
28221 
28222           emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
28223                                         order[1], order[2], order[3]));
28224         }
28225       else
28226         {
28227           /* For SSE1, we have to reuse the V4SF code.  */
28228           ix86_expand_vector_set (false, gen_lowpart (V4SFmode, target),
28229                                   gen_lowpart (SFmode, val), elt);
28230         }
28231       return;
28232 
28233     case V8HImode:
28234       use_vec_merge = TARGET_SSE2;
28235       break;
28236     case V4HImode:
28237       use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
28238       break;
28239 
28240     case V16QImode:
28241       use_vec_merge = TARGET_SSE4_1;
28242       break;
28243 
28244     case V8QImode:
28245       break;
28246 
28247     case V32QImode:
28248       half_mode = V16QImode;
28249       j = 0;
28250       n = 16;
28251       goto half;
28252 
28253     case V16HImode:
28254       half_mode = V8HImode;
28255       j = 1;
28256       n = 8;
28257       goto half;
28258 
28259     case V8SImode:
28260       half_mode = V4SImode;
28261       j = 2;
28262       n = 4;
28263       goto half;
28264 
28265     case V4DImode:
28266       half_mode = V2DImode;
28267       j = 3;
28268       n = 2;
28269       goto half;
28270 
28271     case V8SFmode:
28272       half_mode = V4SFmode;
28273       j = 4;
28274       n = 4;
28275       goto half;
28276 
28277     case V4DFmode:
28278       half_mode = V2DFmode;
28279       j = 5;
28280       n = 2;
28281       goto half;
28282 
28283 half:
28284       /* Compute offset.  */
28285       i = elt / n;
28286       elt %= n;
28287 
28288       gcc_assert (i <= 1);
28289 
28290       /* Extract the half.  */
28291       tmp = gen_reg_rtx (half_mode);
28292       emit_insn ((*gen_extract[j][i]) (tmp, target));
28293 
28294       /* Put val in tmp at elt.  */
28295       ix86_expand_vector_set (false, tmp, val, elt);
28296 
28297       /* Put it back.  */
28298       emit_insn ((*gen_insert[j][i]) (target, target, tmp));
28299       return;
28300 
28301     default:
28302       break;
28303     }
28304 
28305   if (use_vec_merge)
28306     {
28307       tmp = gen_rtx_VEC_DUPLICATE (mode, val);
28308       tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
28309       emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
28310     }
28311   else
28312     {
28313       rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
28314 
28315       emit_move_insn (mem, target);
28316 
28317       tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
28318       emit_move_insn (tmp, val);
28319 
28320       emit_move_insn (target, mem);
28321     }
28322 }
28323 
28324 void
28325 ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
28326 {
28327   enum machine_mode mode = GET_MODE (vec);
28328   enum machine_mode inner_mode = GET_MODE_INNER (mode);
28329   bool use_vec_extr = false;
28330   rtx tmp;
28331 
28332   switch (mode)
28333     {
28334     case V2SImode:
28335     case V2SFmode:
28336       if (!mmx_ok)
28337         break;
28338       /* FALLTHRU */
28339 
28340     case V2DFmode:
28341     case V2DImode:
28342       use_vec_extr = true;
28343       break;
28344 
28345     case V4SFmode:
28346       use_vec_extr = TARGET_SSE4_1;
28347       if (use_vec_extr)
28348         break;
28349 
28350       switch (elt)
28351         {
28352         case 0:
28353           tmp = vec;
28354           break;
28355 
28356         case 1:
28357         case 3:
28358           tmp = gen_reg_rtx (mode);
28359           emit_insn (gen_sse_shufps_v4sf (tmp, vec, vec,
28360                                        GEN_INT (elt), GEN_INT (elt),
28361                                        GEN_INT (elt+4), GEN_INT (elt+4)));
28362           break;
28363 
28364         case 2:
28365           tmp = gen_reg_rtx (mode);
28366           emit_insn (gen_sse_unpckhps (tmp, vec, vec));
28367           break;
28368 
28369         default:
28370           gcc_unreachable ();
28371         }
28372       vec = tmp;
28373       use_vec_extr = true;
28374       elt = 0;
28375       break;
28376 
28377     case V4SImode:
28378       use_vec_extr = TARGET_SSE4_1;
28379       if (use_vec_extr)
28380         break;
28381 
28382       if (TARGET_SSE2)
28383         {
28384           switch (elt)
28385             {
28386             case 0:
28387               tmp = vec;
28388               break;
28389 
28390             case 1:
28391             case 3:
28392               tmp = gen_reg_rtx (mode);
28393               emit_insn (gen_sse2_pshufd_1 (tmp, vec,
28394                                             GEN_INT (elt), GEN_INT (elt),
28395                                             GEN_INT (elt), GEN_INT (elt)));
28396               break;
28397 
28398             case 2:
28399               tmp = gen_reg_rtx (mode);
28400               emit_insn (gen_sse2_punpckhdq (tmp, vec, vec));
28401               break;
28402 
28403             default:
28404               gcc_unreachable ();
28405             }
28406           vec = tmp;
28407           use_vec_extr = true;
28408           elt = 0;
28409         }
28410       else
28411         {
28412           /* For SSE1, we have to reuse the V4SF code.  */
28413           ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
28414                                       gen_lowpart (V4SFmode, vec), elt);
28415           return;
28416         }
28417       break;
28418 
28419     case V8HImode:
28420       use_vec_extr = TARGET_SSE2;
28421       break;
28422     case V4HImode:
28423       use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
28424       break;
28425 
28426     case V16QImode:
28427       use_vec_extr = TARGET_SSE4_1;
28428       break;
28429 
28430     case V8QImode:
28431       /* ??? Could extract the appropriate HImode element and shift.  */
28432     default:
28433       break;
28434     }
28435 
28436   if (use_vec_extr)
28437     {
28438       tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
28439       tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
28440 
28441       /* Let the rtl optimizers know about the zero extension performed.  */
28442       if (inner_mode == QImode || inner_mode == HImode)
28443         {
28444           tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
28445           target = gen_lowpart (SImode, target);
28446         }
28447 
28448       emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
28449     }
28450   else
28451     {
28452       rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
28453 
28454       emit_move_insn (mem, vec);
28455 
28456       tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
28457       emit_move_insn (target, tmp);
28458     }
28459 }
28460 
28461 /* Expand a vector reduction on V4SFmode for SSE1.  FN is the binary
28462    pattern to reduce; DEST is the destination; IN is the input vector.  */
28463 
28464 void
28465 ix86_expand_reduc_v4sf (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
28466 {
28467   rtx tmp1, tmp2, tmp3;
28468 
28469   tmp1 = gen_reg_rtx (V4SFmode);
28470   tmp2 = gen_reg_rtx (V4SFmode);
28471   tmp3 = gen_reg_rtx (V4SFmode);
28472 
28473   emit_insn (gen_sse_movhlps (tmp1, in, in));
28474   emit_insn (fn (tmp2, tmp1, in));
28475 
28476   emit_insn (gen_sse_shufps_v4sf (tmp3, tmp2, tmp2,
28477                                GEN_INT (1), GEN_INT (1),
28478                                GEN_INT (1+4), GEN_INT (1+4)));
28479   emit_insn (fn (dest, tmp2, tmp3));
28480 }
28481 
28482 /* Target hook for scalar_mode_supported_p.  */
28483 static bool
28484 ix86_scalar_mode_supported_p (enum machine_mode mode)
28485 {
28486   if (DECIMAL_FLOAT_MODE_P (mode))
28487     return true;
28488   else if (mode == TFmode)
28489     return true;
28490   else
28491     return default_scalar_mode_supported_p (mode);
28492 }
28493 
28494 /* Implements target hook vector_mode_supported_p.  */
28495 static bool
28496 ix86_vector_mode_supported_p (enum machine_mode mode)
28497 {
28498   if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
28499     return true;
28500   if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
28501     return true;
28502   if (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
28503     return true;
28504   if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
28505     return true;
28506   if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
28507     return true;
28508   return false;
28509 }
28510 
28511 /* Target hook for c_mode_for_suffix.  */
28512 static enum machine_mode
28513 ix86_c_mode_for_suffix (char suffix)
28514 {
28515   if (suffix == 'q')
28516     return TFmode;
28517   if (suffix == 'w')
28518     return XFmode;
28519 
28520   return VOIDmode;
28521 }
28522 
28523 /* Worker function for TARGET_MD_ASM_CLOBBERS.
28524 
28525    We do this in the new i386 backend to maintain source compatibility
28526    with the old cc0-based compiler.  */
28527 
28528 static tree
28529 ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED,
28530                       tree inputs ATTRIBUTE_UNUSED,
28531                       tree clobbers)
28532 {
28533   clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
28534                         clobbers);
28535   clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
28536                         clobbers);
28537   return clobbers;
28538 }
28539 
28540 /* Implements target vector targetm.asm.encode_section_info.  This
28541    is not used by netware.  */
28542 
28543 static void ATTRIBUTE_UNUSED
28544 ix86_encode_section_info (tree decl, rtx rtl, int first)
28545 {
28546   default_encode_section_info (decl, rtl, first);
28547 
28548   if (TREE_CODE (decl) == VAR_DECL
28549       && (TREE_STATIC (decl) || DECL_EXTERNAL (decl))
28550       && ix86_in_large_data_p (decl))
28551     SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
28552 }
28553 
28554 /* Worker function for REVERSE_CONDITION.  */
28555 
28556 enum rtx_code
28557 ix86_reverse_condition (enum rtx_code code, enum machine_mode mode)
28558 {
28559   return (mode != CCFPmode && mode != CCFPUmode
28560           ? reverse_condition (code)
28561           : reverse_condition_maybe_unordered (code));
28562 }
28563 
28564 /* Output code to perform an x87 FP register move, from OPERANDS[1]
28565    to OPERANDS[0].  */
28566 
28567 const char *
28568 output_387_reg_move (rtx insn, rtx *operands)
28569 {
28570   if (REG_P (operands[0]))
28571     {
28572       if (REG_P (operands[1])
28573           && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
28574         {
28575           if (REGNO (operands[0]) == FIRST_STACK_REG)
28576             return output_387_ffreep (operands, 0);
28577           return "fstp\t%y0";
28578         }
28579       if (STACK_TOP_P (operands[0]))
28580         return "fld%z1\t%y1";
28581       return "fst\t%y0";
28582     }
28583   else if (MEM_P (operands[0]))
28584     {
28585       gcc_assert (REG_P (operands[1]));
28586       if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
28587         return "fstp%z0\t%y0";
28588       else
28589         {
28590           /* There is no non-popping store to memory for XFmode.
28591              So if we need one, follow the store with a load.  */
28592           if (GET_MODE (operands[0]) == XFmode)
28593             return "fstp%z0\t%y0\n\tfld%z0\t%y0";
28594           else
28595             return "fst%z0\t%y0";
28596         }
28597     }
28598   else
28599     gcc_unreachable();
28600 }
28601 
28602 /* Output code to perform a conditional jump to LABEL, if C2 flag in
28603    FP status register is set.  */
28604 
28605 void
28606 ix86_emit_fp_unordered_jump (rtx label)
28607 {
28608   rtx reg = gen_reg_rtx (HImode);
28609   rtx temp;
28610 
28611   emit_insn (gen_x86_fnstsw_1 (reg));
28612 
28613   if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ()))
28614     {
28615       emit_insn (gen_x86_sahf_1 (reg));
28616 
28617       temp = gen_rtx_REG (CCmode, FLAGS_REG);
28618       temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
28619     }
28620   else
28621     {
28622       emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
28623 
28624       temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
28625       temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
28626     }
28627 
28628   temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
28629                               gen_rtx_LABEL_REF (VOIDmode, label),
28630                               pc_rtx);
28631   temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
28632 
28633   emit_jump_insn (temp);
28634   predict_jump (REG_BR_PROB_BASE * 10 / 100);
28635 }
28636 
28637 /* Output code to perform a log1p XFmode calculation.  */
28638 
28639 void ix86_emit_i387_log1p (rtx op0, rtx op1)
28640 {
28641   rtx label1 = gen_label_rtx ();
28642   rtx label2 = gen_label_rtx ();
28643 
28644   rtx tmp = gen_reg_rtx (XFmode);
28645   rtx tmp2 = gen_reg_rtx (XFmode);
28646 
28647   emit_insn (gen_absxf2 (tmp, op1));
28648   emit_insn (gen_cmpxf (tmp,
28649     CONST_DOUBLE_FROM_REAL_VALUE (
28650        REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
28651        XFmode)));
28652   emit_jump_insn (gen_bge (label1));
28653 
28654   emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
28655   emit_insn (gen_fyl2xp1xf3_i387 (op0, op1, tmp2));
28656   emit_jump (label2);
28657 
28658   emit_label (label1);
28659   emit_move_insn (tmp, CONST1_RTX (XFmode));
28660   emit_insn (gen_addxf3 (tmp, op1, tmp));
28661   emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
28662   emit_insn (gen_fyl2xxf3_i387 (op0, tmp, tmp2));
28663 
28664   emit_label (label2);
28665 }
28666 
28667 /* Output code to perform a Newton-Rhapson approximation of a single precision
28668    floating point divide [http://en.wikipedia.org/wiki/N-th_root_algorithm].  */
28669 
28670 void ix86_emit_swdivsf (rtx res, rtx a, rtx b, enum machine_mode mode)
28671 {
28672   rtx x0, x1, e0, e1, two;
28673 
28674   x0 = gen_reg_rtx (mode);
28675   e0 = gen_reg_rtx (mode);
28676   e1 = gen_reg_rtx (mode);
28677   x1 = gen_reg_rtx (mode);
28678 
28679   two = CONST_DOUBLE_FROM_REAL_VALUE (dconst2, SFmode);
28680 
28681   if (VECTOR_MODE_P (mode))
28682     two = ix86_build_const_vector (SFmode, true, two);
28683 
28684   two = force_reg (mode, two);
28685 
28686   /* a / b = a * rcp(b) * (2.0 - b * rcp(b)) */
28687 
28688   /* x0 = rcp(b) estimate */
28689   emit_insn (gen_rtx_SET (VOIDmode, x0,
28690                           gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
28691                                           UNSPEC_RCP)));
28692   /* e0 = x0 * b */
28693   emit_insn (gen_rtx_SET (VOIDmode, e0,
28694                           gen_rtx_MULT (mode, x0, b)));
28695   /* e1 = 2. - e0 */
28696   emit_insn (gen_rtx_SET (VOIDmode, e1,
28697                           gen_rtx_MINUS (mode, two, e0)));
28698   /* x1 = x0 * e1 */
28699   emit_insn (gen_rtx_SET (VOIDmode, x1,
28700                           gen_rtx_MULT (mode, x0, e1)));
28701   /* res = a * x1 */
28702   emit_insn (gen_rtx_SET (VOIDmode, res,
28703                           gen_rtx_MULT (mode, a, x1)));
28704 }
28705 
28706 /* Output code to perform a Newton-Rhapson approximation of a
28707    single precision floating point [reciprocal] square root.  */
28708 
28709 void ix86_emit_swsqrtsf (rtx res, rtx a, enum machine_mode mode,
28710                          bool recip)
28711 {
28712   rtx x0, e0, e1, e2, e3, mthree, mhalf;
28713   REAL_VALUE_TYPE r;
28714 
28715   x0 = gen_reg_rtx (mode);
28716   e0 = gen_reg_rtx (mode);
28717   e1 = gen_reg_rtx (mode);
28718   e2 = gen_reg_rtx (mode);
28719   e3 = gen_reg_rtx (mode);
28720 
28721   real_from_integer (&r, VOIDmode, -3, -1, 0);
28722   mthree = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
28723 
28724   real_arithmetic (&r, NEGATE_EXPR, &dconsthalf, NULL);
28725   mhalf = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
28726 
28727   if (VECTOR_MODE_P (mode))
28728     {
28729       mthree = ix86_build_const_vector (SFmode, true, mthree);
28730       mhalf = ix86_build_const_vector (SFmode, true, mhalf);
28731     }
28732 
28733   /* sqrt(a)  = -0.5 * a * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0)
28734      rsqrt(a) = -0.5     * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0) */
28735 
28736   /* x0 = rsqrt(a) estimate */
28737   emit_insn (gen_rtx_SET (VOIDmode, x0,
28738                           gen_rtx_UNSPEC (mode, gen_rtvec (1, a),
28739                                           UNSPEC_RSQRT)));
28740 
28741   /* If (a == 0.0) Filter out infinity to prevent NaN for sqrt(0.0).  */
28742   if (!recip)
28743     {
28744       rtx zero, mask;
28745 
28746       zero = gen_reg_rtx (mode);
28747       mask = gen_reg_rtx (mode);
28748 
28749       zero = force_reg (mode, CONST0_RTX(mode));
28750       emit_insn (gen_rtx_SET (VOIDmode, mask,
28751                               gen_rtx_NE (mode, zero, a)));
28752 
28753       emit_insn (gen_rtx_SET (VOIDmode, x0,
28754                               gen_rtx_AND (mode, x0, mask)));
28755     }
28756 
28757   /* e0 = x0 * a */
28758   emit_insn (gen_rtx_SET (VOIDmode, e0,
28759                           gen_rtx_MULT (mode, x0, a)));
28760   /* e1 = e0 * x0 */
28761   emit_insn (gen_rtx_SET (VOIDmode, e1,
28762                           gen_rtx_MULT (mode, e0, x0)));
28763 
28764   /* e2 = e1 - 3. */
28765   mthree = force_reg (mode, mthree);
28766   emit_insn (gen_rtx_SET (VOIDmode, e2,
28767                           gen_rtx_PLUS (mode, e1, mthree)));
28768 
28769   mhalf = force_reg (mode, mhalf);
28770   if (recip)
28771     /* e3 = -.5 * x0 */
28772     emit_insn (gen_rtx_SET (VOIDmode, e3,
28773                             gen_rtx_MULT (mode, x0, mhalf)));
28774   else
28775     /* e3 = -.5 * e0 */
28776     emit_insn (gen_rtx_SET (VOIDmode, e3,
28777                             gen_rtx_MULT (mode, e0, mhalf)));
28778   /* ret = e2 * e3 */
28779   emit_insn (gen_rtx_SET (VOIDmode, res,
28780                           gen_rtx_MULT (mode, e2, e3)));
28781 }
28782 
28783 /* Solaris implementation of TARGET_ASM_NAMED_SECTION.  */
28784 
28785 static void ATTRIBUTE_UNUSED
28786 i386_solaris_elf_named_section (const char *name, unsigned int flags,
28787                                 tree decl)
28788 {
28789   /* With Binutils 2.15, the "@unwind" marker must be specified on
28790      every occurrence of the ".eh_frame" section, not just the first
28791      one.  */
28792   if (TARGET_64BIT
28793       && strcmp (name, ".eh_frame") == 0)
28794     {
28795       fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
28796                flags & SECTION_WRITE ? "aw" : "a");
28797       return;
28798     }
28799   default_elf_asm_named_section (name, flags, decl);
28800 }
28801 
28802 /* Return the mangling of TYPE if it is an extended fundamental type.  */
28803 
28804 static const char *
28805 ix86_mangle_type (const_tree type)
28806 {
28807   type = TYPE_MAIN_VARIANT (type);
28808 
28809   if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
28810       && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
28811     return NULL;
28812 
28813   switch (TYPE_MODE (type))
28814     {
28815     case TFmode:
28816       /* __float128 is "g".  */
28817       return "g";
28818     case XFmode:
28819       /* "long double" or __float80 is "e".  */
28820       return "e";
28821     default:
28822       return NULL;
28823     }
28824 }
28825 
28826 /* For 32-bit code we can save PIC register setup by using
28827    __stack_chk_fail_local hidden function instead of calling
28828    __stack_chk_fail directly.  64-bit code doesn't need to setup any PIC
28829    register, so it is better to call __stack_chk_fail directly.  */
28830 
28831 static tree
28832 ix86_stack_protect_fail (void)
28833 {
28834   return TARGET_64BIT
28835          ? default_external_stack_protect_fail ()
28836          : default_hidden_stack_protect_fail ();
28837 }
28838 
28839 /* Select a format to encode pointers in exception handling data.  CODE
28840    is 0 for data, 1 for code labels, 2 for function pointers.  GLOBAL is
28841    true if the symbol may be affected by dynamic relocations.
28842 
28843    ??? All x86 object file formats are capable of representing this.
28844    After all, the relocation needed is the same as for the call insn.
28845    Whether or not a particular assembler allows us to enter such, I
28846    guess we'll have to see.  */
28847 int
28848 asm_preferred_eh_data_format (int code, int global)
28849 {
28850   if (flag_pic)
28851     {
28852       int type = DW_EH_PE_sdata8;
28853       if (!TARGET_64BIT
28854           || ix86_cmodel == CM_SMALL_PIC
28855           || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
28856         type = DW_EH_PE_sdata4;
28857       return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
28858     }
28859   if (ix86_cmodel == CM_SMALL
28860       || (ix86_cmodel == CM_MEDIUM && code))
28861     return DW_EH_PE_udata4;
28862   return DW_EH_PE_absptr;
28863 }
28864 
28865 /* Expand copysign from SIGN to the positive value ABS_VALUE
28866    storing in RESULT.  If MASK is non-null, it shall be a mask to mask out
28867    the sign-bit.  */
28868 static void
28869 ix86_sse_copysign_to_positive (rtx result, rtx abs_value, rtx sign, rtx mask)
28870 {
28871   enum machine_mode mode = GET_MODE (sign);
28872   rtx sgn = gen_reg_rtx (mode);
28873   if (mask == NULL_RTX)
28874     {
28875       mask = ix86_build_signbit_mask (mode, VECTOR_MODE_P (mode), false);
28876       if (!VECTOR_MODE_P (mode))
28877         {
28878           /* We need to generate a scalar mode mask in this case.  */
28879           rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
28880           tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
28881           mask = gen_reg_rtx (mode);
28882           emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
28883         }
28884     }
28885   else
28886     mask = gen_rtx_NOT (mode, mask);
28887   emit_insn (gen_rtx_SET (VOIDmode, sgn,
28888                           gen_rtx_AND (mode, mask, sign)));
28889   emit_insn (gen_rtx_SET (VOIDmode, result,
28890                           gen_rtx_IOR (mode, abs_value, sgn)));
28891 }
28892 
28893 /* Expand fabs (OP0) and return a new rtx that holds the result.  The
28894    mask for masking out the sign-bit is stored in *SMASK, if that is
28895    non-null.  */
28896 static rtx
28897 ix86_expand_sse_fabs (rtx op0, rtx *smask)
28898 {
28899   enum machine_mode mode = GET_MODE (op0);
28900   rtx xa, mask;
28901 
28902   xa = gen_reg_rtx (mode);
28903   mask = ix86_build_signbit_mask (mode, VECTOR_MODE_P (mode), true);
28904   if (!VECTOR_MODE_P (mode))
28905     {
28906       /* We need to generate a scalar mode mask in this case.  */
28907       rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
28908       tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
28909       mask = gen_reg_rtx (mode);
28910       emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
28911     }
28912   emit_insn (gen_rtx_SET (VOIDmode, xa,
28913                           gen_rtx_AND (mode, op0, mask)));
28914 
28915   if (smask)
28916     *smask = mask;
28917 
28918   return xa;
28919 }
28920 
28921 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
28922    swapping the operands if SWAP_OPERANDS is true.  The expanded
28923    code is a forward jump to a newly created label in case the
28924    comparison is true.  The generated label rtx is returned.  */
28925 static rtx
28926 ix86_expand_sse_compare_and_jump (enum rtx_code code, rtx op0, rtx op1,
28927                                   bool swap_operands)
28928 {
28929   rtx label, tmp;
28930 
28931   if (swap_operands)
28932     {
28933       tmp = op0;
28934       op0 = op1;
28935       op1 = tmp;
28936     }
28937 
28938   label = gen_label_rtx ();
28939   tmp = gen_rtx_REG (CCFPUmode, FLAGS_REG);
28940   emit_insn (gen_rtx_SET (VOIDmode, tmp,
28941                           gen_rtx_COMPARE (CCFPUmode, op0, op1)));
28942   tmp = gen_rtx_fmt_ee (code, VOIDmode, tmp, const0_rtx);
28943   tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
28944                               gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx);
28945   tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
28946   JUMP_LABEL (tmp) = label;
28947 
28948   return label;
28949 }
28950 
28951 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
28952    using comparison code CODE.  Operands are swapped for the comparison if
28953    SWAP_OPERANDS is true.  Returns a rtx for the generated mask.  */
28954 static rtx
28955 ix86_expand_sse_compare_mask (enum rtx_code code, rtx op0, rtx op1,
28956                               bool swap_operands)
28957 {
28958   enum machine_mode mode = GET_MODE (op0);
28959   rtx mask = gen_reg_rtx (mode);
28960 
28961   if (swap_operands)
28962     {
28963       rtx tmp = op0;
28964       op0 = op1;
28965       op1 = tmp;
28966     }
28967 
28968   if (mode == DFmode)
28969     emit_insn (gen_sse2_maskcmpdf3 (mask, op0, op1,
28970                                     gen_rtx_fmt_ee (code, mode, op0, op1)));
28971   else
28972     emit_insn (gen_sse_maskcmpsf3 (mask, op0, op1,
28973                                    gen_rtx_fmt_ee (code, mode, op0, op1)));
28974 
28975   return mask;
28976 }
28977 
28978 /* Generate and return a rtx of mode MODE for 2**n where n is the number
28979    of bits of the mantissa of MODE, which must be one of DFmode or SFmode.  */
28980 static rtx
28981 ix86_gen_TWO52 (enum machine_mode mode)
28982 {
28983   REAL_VALUE_TYPE TWO52r;
28984   rtx TWO52;
28985 
28986   real_ldexp (&TWO52r, &dconst1, mode == DFmode ? 52 : 23);
28987   TWO52 = const_double_from_real_value (TWO52r, mode);
28988   TWO52 = force_reg (mode, TWO52);
28989 
28990   return TWO52;
28991 }
28992 
28993 /* Expand SSE sequence for computing lround from OP1 storing
28994    into OP0.  */
28995 void
28996 ix86_expand_lround (rtx op0, rtx op1)
28997 {
28998   /* C code for the stuff we're doing below:
28999        tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
29000        return (long)tmp;
29001    */
29002   enum machine_mode mode = GET_MODE (op1);
29003   const struct real_format *fmt;
29004   REAL_VALUE_TYPE pred_half, half_minus_pred_half;
29005   rtx adj;
29006 
29007   /* load nextafter (0.5, 0.0) */
29008   fmt = REAL_MODE_FORMAT (mode);
29009   real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
29010   REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
29011 
29012   /* adj = copysign (0.5, op1) */
29013   adj = force_reg (mode, const_double_from_real_value (pred_half, mode));
29014   ix86_sse_copysign_to_positive (adj, adj, force_reg (mode, op1), NULL_RTX);
29015 
29016   /* adj = op1 + adj */
29017   adj = expand_simple_binop (mode, PLUS, adj, op1, NULL_RTX, 0, OPTAB_DIRECT);
29018 
29019   /* op0 = (imode)adj */
29020   expand_fix (op0, adj, 0);
29021 }
29022 
29023 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
29024    into OPERAND0.  */
29025 void
29026 ix86_expand_lfloorceil (rtx op0, rtx op1, bool do_floor)
29027 {
29028   /* C code for the stuff we're doing below (for do_floor):
29029         xi = (long)op1;
29030         xi -= (double)xi > op1 ? 1 : 0;
29031         return xi;
29032    */
29033   enum machine_mode fmode = GET_MODE (op1);
29034   enum machine_mode imode = GET_MODE (op0);
29035   rtx ireg, freg, label, tmp;
29036 
29037   /* reg = (long)op1 */
29038   ireg = gen_reg_rtx (imode);
29039   expand_fix (ireg, op1, 0);
29040 
29041   /* freg = (double)reg */
29042   freg = gen_reg_rtx (fmode);
29043   expand_float (freg, ireg, 0);
29044 
29045   /* ireg = (freg > op1) ? ireg - 1 : ireg */
29046   label = ix86_expand_sse_compare_and_jump (UNLE,
29047                                             freg, op1, !do_floor);
29048   tmp = expand_simple_binop (imode, do_floor ? MINUS : PLUS,
29049                              ireg, const1_rtx, NULL_RTX, 0, OPTAB_DIRECT);
29050   emit_move_insn (ireg, tmp);
29051 
29052   emit_label (label);
29053   LABEL_NUSES (label) = 1;
29054 
29055   emit_move_insn (op0, ireg);
29056 }
29057 
29058 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
29059    result in OPERAND0.  */
29060 void
29061 ix86_expand_rint (rtx operand0, rtx operand1)
29062 {
29063   /* C code for the stuff we're doing below:
29064         xa = fabs (operand1);
29065         if (!isless (xa, 2**52))
29066           return operand1;
29067         xa = xa + 2**52 - 2**52;
29068         return copysign (xa, operand1);
29069    */
29070   enum machine_mode mode = GET_MODE (operand0);
29071   rtx res, xa, label, TWO52, mask;
29072 
29073   res = gen_reg_rtx (mode);
29074   emit_move_insn (res, operand1);
29075 
29076   /* xa = abs (operand1) */
29077   xa = ix86_expand_sse_fabs (res, &mask);
29078 
29079   /* if (!isless (xa, TWO52)) goto label; */
29080   TWO52 = ix86_gen_TWO52 (mode);
29081   label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
29082 
29083   xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
29084   xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
29085 
29086   ix86_sse_copysign_to_positive (res, xa, res, mask);
29087 
29088   emit_label (label);
29089   LABEL_NUSES (label) = 1;
29090 
29091   emit_move_insn (operand0, res);
29092 }
29093 
29094 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
29095    into OPERAND0.  */
29096 void
29097 ix86_expand_floorceildf_32 (rtx operand0, rtx operand1, bool do_floor)
29098 {
29099   /* C code for the stuff we expand below.
29100         double xa = fabs (x), x2;
29101         if (!isless (xa, TWO52))
29102           return x;
29103         xa = xa + TWO52 - TWO52;
29104         x2 = copysign (xa, x);
29105      Compensate.  Floor:
29106         if (x2 > x)
29107           x2 -= 1;
29108      Compensate.  Ceil:
29109         if (x2 < x)
29110           x2 -= -1;
29111         return x2;
29112    */
29113   enum machine_mode mode = GET_MODE (operand0);
29114   rtx xa, TWO52, tmp, label, one, res, mask;
29115 
29116   TWO52 = ix86_gen_TWO52 (mode);
29117 
29118   /* Temporary for holding the result, initialized to the input
29119      operand to ease control flow.  */
29120   res = gen_reg_rtx (mode);
29121   emit_move_insn (res, operand1);
29122 
29123   /* xa = abs (operand1) */
29124   xa = ix86_expand_sse_fabs (res, &mask);
29125 
29126   /* if (!isless (xa, TWO52)) goto label; */
29127   label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
29128 
29129   /* xa = xa + TWO52 - TWO52; */
29130   xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
29131   xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
29132 
29133   /* xa = copysign (xa, operand1) */
29134   ix86_sse_copysign_to_positive (xa, xa, res, mask);
29135 
29136   /* generate 1.0 or -1.0 */
29137   one = force_reg (mode,
29138                    const_double_from_real_value (do_floor
29139                                                  ? dconst1 : dconstm1, mode));
29140 
29141   /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
29142   tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
29143   emit_insn (gen_rtx_SET (VOIDmode, tmp,
29144                           gen_rtx_AND (mode, one, tmp)));
29145   /* We always need to subtract here to preserve signed zero.  */
29146   tmp = expand_simple_binop (mode, MINUS,
29147                              xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
29148   emit_move_insn (res, tmp);
29149 
29150   emit_label (label);
29151   LABEL_NUSES (label) = 1;
29152 
29153   emit_move_insn (operand0, res);
29154 }
29155 
29156 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
29157    into OPERAND0.  */
29158 void
29159 ix86_expand_floorceil (rtx operand0, rtx operand1, bool do_floor)
29160 {
29161   /* C code for the stuff we expand below.
29162         double xa = fabs (x), x2;
29163         if (!isless (xa, TWO52))
29164           return x;
29165         x2 = (double)(long)x;
29166      Compensate.  Floor:
29167         if (x2 > x)
29168           x2 -= 1;
29169      Compensate.  Ceil:
29170         if (x2 < x)
29171           x2 += 1;
29172         if (HONOR_SIGNED_ZEROS (mode))
29173           return copysign (x2, x);
29174         return x2;
29175    */
29176   enum machine_mode mode = GET_MODE (operand0);
29177   rtx xa, xi, TWO52, tmp, label, one, res, mask;
29178 
29179   TWO52 = ix86_gen_TWO52 (mode);
29180 
29181   /* Temporary for holding the result, initialized to the input
29182      operand to ease control flow.  */
29183   res = gen_reg_rtx (mode);
29184   emit_move_insn (res, operand1);
29185 
29186   /* xa = abs (operand1) */
29187   xa = ix86_expand_sse_fabs (res, &mask);
29188 
29189   /* if (!isless (xa, TWO52)) goto label; */
29190   label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
29191 
29192   /* xa = (double)(long)x */
29193   xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
29194   expand_fix (xi, res, 0);
29195   expand_float (xa, xi, 0);
29196 
29197   /* generate 1.0 */
29198   one = force_reg (mode, const_double_from_real_value (dconst1, mode));
29199 
29200   /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
29201   tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
29202   emit_insn (gen_rtx_SET (VOIDmode, tmp,
29203                           gen_rtx_AND (mode, one, tmp)));
29204   tmp = expand_simple_binop (mode, do_floor ? MINUS : PLUS,
29205                              xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
29206   emit_move_insn (res, tmp);
29207 
29208   if (HONOR_SIGNED_ZEROS (mode))
29209     ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
29210 
29211   emit_label (label);
29212   LABEL_NUSES (label) = 1;
29213 
29214   emit_move_insn (operand0, res);
29215 }
29216 
29217 /* Expand SSE sequence for computing round from OPERAND1 storing
29218    into OPERAND0.  Sequence that works without relying on DImode truncation
29219    via cvttsd2siq that is only available on 64bit targets.  */
29220 void
29221 ix86_expand_rounddf_32 (rtx operand0, rtx operand1)
29222 {
29223   /* C code for the stuff we expand below.
29224         double xa = fabs (x), xa2, x2;
29225         if (!isless (xa, TWO52))
29226           return x;
29227      Using the absolute value and copying back sign makes
29228      -0.0 -> -0.0 correct.
29229         xa2 = xa + TWO52 - TWO52;
29230      Compensate.
29231         dxa = xa2 - xa;
29232         if (dxa <= -0.5)
29233           xa2 += 1;
29234         else if (dxa > 0.5)
29235           xa2 -= 1;
29236         x2 = copysign (xa2, x);
29237         return x2;
29238    */
29239   enum machine_mode mode = GET_MODE (operand0);
29240   rtx xa, xa2, dxa, TWO52, tmp, label, half, mhalf, one, res, mask;
29241 
29242   TWO52 = ix86_gen_TWO52 (mode);
29243 
29244   /* Temporary for holding the result, initialized to the input
29245      operand to ease control flow.  */
29246   res = gen_reg_rtx (mode);
29247   emit_move_insn (res, operand1);
29248 
29249   /* xa = abs (operand1) */
29250   xa = ix86_expand_sse_fabs (res, &mask);
29251 
29252   /* if (!isless (xa, TWO52)) goto label; */
29253   label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
29254 
29255   /* xa2 = xa + TWO52 - TWO52; */
29256   xa2 = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
29257   xa2 = expand_simple_binop (mode, MINUS, xa2, TWO52, xa2, 0, OPTAB_DIRECT);
29258 
29259   /* dxa = xa2 - xa; */
29260   dxa = expand_simple_binop (mode, MINUS, xa2, xa, NULL_RTX, 0, OPTAB_DIRECT);
29261 
29262   /* generate 0.5, 1.0 and -0.5 */
29263   half = force_reg (mode, const_double_from_real_value (dconsthalf, mode));
29264   one = expand_simple_binop (mode, PLUS, half, half, NULL_RTX, 0, OPTAB_DIRECT);
29265   mhalf = expand_simple_binop (mode, MINUS, half, one, NULL_RTX,
29266                                0, OPTAB_DIRECT);
29267 
29268   /* Compensate.  */
29269   tmp = gen_reg_rtx (mode);
29270   /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
29271   tmp = ix86_expand_sse_compare_mask (UNGT, dxa, half, false);
29272   emit_insn (gen_rtx_SET (VOIDmode, tmp,
29273                           gen_rtx_AND (mode, one, tmp)));
29274   xa2 = expand_simple_binop (mode, MINUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
29275   /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
29276   tmp = ix86_expand_sse_compare_mask (UNGE, mhalf, dxa, false);
29277   emit_insn (gen_rtx_SET (VOIDmode, tmp,
29278                           gen_rtx_AND (mode, one, tmp)));
29279   xa2 = expand_simple_binop (mode, PLUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
29280 
29281   /* res = copysign (xa2, operand1) */
29282   ix86_sse_copysign_to_positive (res, xa2, force_reg (mode, operand1), mask);
29283 
29284   emit_label (label);
29285   LABEL_NUSES (label) = 1;
29286 
29287   emit_move_insn (operand0, res);
29288 }
29289 
29290 /* Expand SSE sequence for computing trunc from OPERAND1 storing
29291    into OPERAND0.  */
29292 void
29293 ix86_expand_trunc (rtx operand0, rtx operand1)
29294 {
29295   /* C code for SSE variant we expand below.
29296         double xa = fabs (x), x2;
29297         if (!isless (xa, TWO52))
29298           return x;
29299         x2 = (double)(long)x;
29300         if (HONOR_SIGNED_ZEROS (mode))
29301           return copysign (x2, x);
29302         return x2;
29303    */
29304   enum machine_mode mode = GET_MODE (operand0);
29305   rtx xa, xi, TWO52, label, res, mask;
29306 
29307   TWO52 = ix86_gen_TWO52 (mode);
29308 
29309   /* Temporary for holding the result, initialized to the input
29310      operand to ease control flow.  */
29311   res = gen_reg_rtx (mode);
29312   emit_move_insn (res, operand1);
29313 
29314   /* xa = abs (operand1) */
29315   xa = ix86_expand_sse_fabs (res, &mask);
29316 
29317   /* if (!isless (xa, TWO52)) goto label; */
29318   label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
29319 
29320   /* x = (double)(long)x */
29321   xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
29322   expand_fix (xi, res, 0);
29323   expand_float (res, xi, 0);
29324 
29325   if (HONOR_SIGNED_ZEROS (mode))
29326     ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
29327 
29328   emit_label (label);
29329   LABEL_NUSES (label) = 1;
29330 
29331   emit_move_insn (operand0, res);
29332 }
29333 
29334 /* Expand SSE sequence for computing trunc from OPERAND1 storing
29335    into OPERAND0.  */
29336 void
29337 ix86_expand_truncdf_32 (rtx operand0, rtx operand1)
29338 {
29339   enum machine_mode mode = GET_MODE (operand0);
29340   rtx xa, mask, TWO52, label, one, res, smask, tmp;
29341 
29342   /* C code for SSE variant we expand below.
29343         double xa = fabs (x), x2;
29344         if (!isless (xa, TWO52))
29345           return x;
29346         xa2 = xa + TWO52 - TWO52;
29347      Compensate:
29348         if (xa2 > xa)
29349           xa2 -= 1.0;
29350         x2 = copysign (xa2, x);
29351         return x2;
29352    */
29353 
29354   TWO52 = ix86_gen_TWO52 (mode);
29355 
29356   /* Temporary for holding the result, initialized to the input
29357      operand to ease control flow.  */
29358   res = gen_reg_rtx (mode);
29359   emit_move_insn (res, operand1);
29360 
29361   /* xa = abs (operand1) */
29362   xa = ix86_expand_sse_fabs (res, &smask);
29363 
29364   /* if (!isless (xa, TWO52)) goto label; */
29365   label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
29366 
29367   /* res = xa + TWO52 - TWO52; */
29368   tmp = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
29369   tmp = expand_simple_binop (mode, MINUS, tmp, TWO52, tmp, 0, OPTAB_DIRECT);
29370   emit_move_insn (res, tmp);
29371 
29372   /* generate 1.0 */
29373   one = force_reg (mode, const_double_from_real_value (dconst1, mode));
29374 
29375   /* Compensate: res = xa2 - (res > xa ? 1 : 0)  */
29376   mask = ix86_expand_sse_compare_mask (UNGT, res, xa, false);
29377   emit_insn (gen_rtx_SET (VOIDmode, mask,
29378                           gen_rtx_AND (mode, mask, one)));
29379   tmp = expand_simple_binop (mode, MINUS,
29380                              res, mask, NULL_RTX, 0, OPTAB_DIRECT);
29381   emit_move_insn (res, tmp);
29382 
29383   /* res = copysign (res, operand1) */
29384   ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), smask);
29385 
29386   emit_label (label);
29387   LABEL_NUSES (label) = 1;
29388 
29389   emit_move_insn (operand0, res);
29390 }
29391 
29392 /* Expand SSE sequence for computing round from OPERAND1 storing
29393    into OPERAND0.  */
29394 void
29395 ix86_expand_round (rtx operand0, rtx operand1)
29396 {
29397   /* C code for the stuff we're doing below:
29398         double xa = fabs (x);
29399         if (!isless (xa, TWO52))
29400           return x;
29401         xa = (double)(long)(xa + nextafter (0.5, 0.0));
29402         return copysign (xa, x);
29403    */
29404   enum machine_mode mode = GET_MODE (operand0);
29405   rtx res, TWO52, xa, label, xi, half, mask;
29406   const struct real_format *fmt;
29407   REAL_VALUE_TYPE pred_half, half_minus_pred_half;
29408 
29409   /* Temporary for holding the result, initialized to the input
29410      operand to ease control flow.  */
29411   res = gen_reg_rtx (mode);
29412   emit_move_insn (res, operand1);
29413 
29414   TWO52 = ix86_gen_TWO52 (mode);
29415   xa = ix86_expand_sse_fabs (res, &mask);
29416   label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
29417 
29418   /* load nextafter (0.5, 0.0) */
29419   fmt = REAL_MODE_FORMAT (mode);
29420   real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
29421   REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
29422 
29423   /* xa = xa + 0.5 */
29424   half = force_reg (mode, const_double_from_real_value (pred_half, mode));
29425   xa = expand_simple_binop (mode, PLUS, xa, half, NULL_RTX, 0, OPTAB_DIRECT);
29426 
29427   /* xa = (double)(int64_t)xa */
29428   xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
29429   expand_fix (xi, xa, 0);
29430   expand_float (xa, xi, 0);
29431 
29432   /* res = copysign (xa, operand1) */
29433   ix86_sse_copysign_to_positive (res, xa, force_reg (mode, operand1), mask);
29434 
29435   emit_label (label);
29436   LABEL_NUSES (label) = 1;
29437 
29438   emit_move_insn (operand0, res);
29439 }
29440 
29441 
29442 /* Validate whether a SSE5 instruction is valid or not.
29443    OPERANDS is the array of operands.
29444    NUM is the number of operands.
29445    USES_OC0 is true if the instruction uses OC0 and provides 4 variants.
29446    NUM_MEMORY is the maximum number of memory operands to accept.  
29447    when COMMUTATIVE is set, operand 1 and 2 can be swapped.  */
29448 
29449 bool
29450 ix86_sse5_valid_op_p (rtx operands[], rtx insn ATTRIBUTE_UNUSED, int num,
29451                       bool uses_oc0, int num_memory, bool commutative)
29452 {
29453   int mem_mask;
29454   int mem_count;
29455   int i;
29456 
29457   /* Count the number of memory arguments */
29458   mem_mask = 0;
29459   mem_count = 0;
29460   for (i = 0; i < num; i++)
29461     {
29462       enum machine_mode mode = GET_MODE (operands[i]);
29463       if (register_operand (operands[i], mode))
29464         ;
29465 
29466       else if (memory_operand (operands[i], mode))
29467         {
29468           mem_mask |= (1 << i);
29469           mem_count++;
29470         }
29471 
29472       else
29473         {
29474           rtx pattern = PATTERN (insn);
29475 
29476           /* allow 0 for pcmov */
29477           if (GET_CODE (pattern) != SET
29478               || GET_CODE (SET_SRC (pattern)) != IF_THEN_ELSE
29479               || i < 2
29480               || operands[i] != CONST0_RTX (mode))
29481             return false;
29482         }
29483     }
29484 
29485   /* Special case pmacsdq{l,h} where we allow the 3rd argument to be
29486      a memory operation.  */
29487   if (num_memory < 0)
29488     {
29489       num_memory = -num_memory;
29490       if ((mem_mask & (1 << (num-1))) != 0)
29491         {
29492           mem_mask &= ~(1 << (num-1));
29493           mem_count--;
29494         }
29495     }
29496 
29497   /* If there were no memory operations, allow the insn */
29498   if (mem_mask == 0)
29499     return true;
29500 
29501   /* Do not allow the destination register to be a memory operand.  */
29502   else if (mem_mask & (1 << 0))
29503     return false;
29504 
29505   /* If there are too many memory operations, disallow the instruction.  While
29506      the hardware only allows 1 memory reference, before register allocation
29507      for some insns, we allow two memory operations sometimes in order to allow
29508      code like the following to be optimized:
29509 
29510         float fmadd (float *a, float *b, float *c) { return (*a * *b) + *c; }
29511 
29512     or similar cases that are vectorized into using the fmaddss
29513     instruction.  */
29514   else if (mem_count > num_memory)
29515     return false;
29516 
29517   /* Don't allow more than one memory operation if not optimizing.  */
29518   else if (mem_count > 1 && !optimize)
29519     return false;
29520 
29521   else if (num == 4 && mem_count == 1)
29522     {
29523       /* formats (destination is the first argument), example fmaddss:
29524          xmm1, xmm1, xmm2, xmm3/mem
29525          xmm1, xmm1, xmm2/mem, xmm3
29526          xmm1, xmm2, xmm3/mem, xmm1
29527          xmm1, xmm2/mem, xmm3, xmm1 */
29528       if (uses_oc0)
29529         return ((mem_mask == (1 << 1))
29530                 || (mem_mask == (1 << 2))
29531                 || (mem_mask == (1 << 3)));
29532 
29533       /* format, example pmacsdd:
29534          xmm1, xmm2, xmm3/mem, xmm1 */
29535       if (commutative)
29536         return (mem_mask == (1 << 2) || mem_mask == (1 << 1));
29537       else
29538         return (mem_mask == (1 << 2));
29539     }
29540 
29541   else if (num == 4 && num_memory == 2)
29542     {
29543       /* If there are two memory operations, we can load one of the memory ops
29544          into the destination register.  This is for optimizing the
29545          multiply/add ops, which the combiner has optimized both the multiply
29546          and the add insns to have a memory operation.  We have to be careful
29547          that the destination doesn't overlap with the inputs.  */
29548       rtx op0 = operands[0];
29549 
29550       if (reg_mentioned_p (op0, operands[1])
29551           || reg_mentioned_p (op0, operands[2])
29552           || reg_mentioned_p (op0, operands[3]))
29553         return false;
29554 
29555       /* formats (destination is the first argument), example fmaddss:
29556          xmm1, xmm1, xmm2, xmm3/mem
29557          xmm1, xmm1, xmm2/mem, xmm3
29558          xmm1, xmm2, xmm3/mem, xmm1
29559          xmm1, xmm2/mem, xmm3, xmm1
29560 
29561          For the oc0 case, we will load either operands[1] or operands[3] into
29562          operands[0], so any combination of 2 memory operands is ok.  */
29563       if (uses_oc0)
29564         return true;
29565 
29566       /* format, example pmacsdd:
29567          xmm1, xmm2, xmm3/mem, xmm1
29568 
29569          For the integer multiply/add instructions be more restrictive and
29570          require operands[2] and operands[3] to be the memory operands.  */
29571       if (commutative)
29572         return (mem_mask == ((1 << 1) | (1 << 3)) || ((1 << 2) | (1 << 3)));
29573       else
29574         return (mem_mask == ((1 << 2) | (1 << 3)));
29575     }
29576 
29577   else if (num == 3 && num_memory == 1)
29578     {
29579       /* formats, example protb:
29580          xmm1, xmm2, xmm3/mem
29581          xmm1, xmm2/mem, xmm3 */
29582       if (uses_oc0)
29583         return ((mem_mask == (1 << 1)) || (mem_mask == (1 << 2)));
29584 
29585       /* format, example comeq:
29586          xmm1, xmm2, xmm3/mem */
29587       else
29588         return (mem_mask == (1 << 2));
29589     }
29590 
29591   else
29592     gcc_unreachable ();
29593 
29594   return false;
29595 }
29596 
29597 
29598 /* Fixup an SSE5 instruction that has 2 memory input references into a form the
29599    hardware will allow by using the destination register to load one of the
29600    memory operations.  Presently this is used by the multiply/add routines to
29601    allow 2 memory references.  */
29602 
29603 void
29604 ix86_expand_sse5_multiple_memory (rtx operands[],
29605                                   int num,
29606                                   enum machine_mode mode)
29607 {
29608   rtx op0 = operands[0];
29609   if (num != 4
29610       || memory_operand (op0, mode)
29611       || reg_mentioned_p (op0, operands[1])
29612       || reg_mentioned_p (op0, operands[2])
29613       || reg_mentioned_p (op0, operands[3]))
29614     gcc_unreachable ();
29615 
29616   /* For 2 memory operands, pick either operands[1] or operands[3] to move into
29617      the destination register.  */
29618   if (memory_operand (operands[1], mode))
29619     {
29620       emit_move_insn (op0, operands[1]);
29621       operands[1] = op0;
29622     }
29623   else if (memory_operand (operands[3], mode))
29624     {
29625       emit_move_insn (op0, operands[3]);
29626       operands[3] = op0;
29627     }
29628   else
29629     gcc_unreachable ();
29630 
29631   return;
29632 }
29633 
29634 
29635 /* Table of valid machine attributes.  */
29636 static const struct attribute_spec ix86_attribute_table[] =
29637 {
29638   /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
29639   /* Stdcall attribute says callee is responsible for popping arguments
29640      if they are not variable.  */
29641   { "stdcall",   0, 0, false, true,  true,  ix86_handle_cconv_attribute },
29642   /* Fastcall attribute says callee is responsible for popping arguments
29643      if they are not variable.  */
29644   { "fastcall",  0, 0, false, true,  true,  ix86_handle_cconv_attribute },
29645   /* Cdecl attribute says the callee is a normal C declaration */
29646   { "cdecl",     0, 0, false, true,  true,  ix86_handle_cconv_attribute },
29647   /* Regparm attribute specifies how many integer arguments are to be
29648      passed in registers.  */
29649   { "regparm",   1, 1, false, true,  true,  ix86_handle_cconv_attribute },
29650   /* Sseregparm attribute says we are using x86_64 calling conventions
29651      for FP arguments.  */
29652   { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute },
29653   /* force_align_arg_pointer says this function realigns the stack at entry.  */
29654   { (const char *)&ix86_force_align_arg_pointer_string, 0, 0,
29655     false, true,  true, ix86_handle_cconv_attribute },
29656 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
29657   { "dllimport", 0, 0, false, false, false, handle_dll_attribute },
29658   { "dllexport", 0, 0, false, false, false, handle_dll_attribute },
29659   { "shared",    0, 0, true,  false, false, ix86_handle_shared_attribute },
29660 #endif
29661   { "ms_struct", 0, 0, false, false,  false, ix86_handle_struct_attribute },
29662   { "gcc_struct", 0, 0, false, false,  false, ix86_handle_struct_attribute },
29663 #ifdef SUBTARGET_ATTRIBUTE_TABLE
29664   SUBTARGET_ATTRIBUTE_TABLE,
29665 #endif
29666   /* ms_abi and sysv_abi calling convention function attributes.  */
29667   { "ms_abi", 0, 0, false, true, true, ix86_handle_abi_attribute },
29668   { "sysv_abi", 0, 0, false, true, true, ix86_handle_abi_attribute },
29669   /* End element.  */
29670   { NULL,        0, 0, false, false, false, NULL }
29671 };
29672 
29673 /* Implement targetm.vectorize.builtin_vectorization_cost.  */
29674 static int
29675 x86_builtin_vectorization_cost (bool runtime_test)
29676 {
29677   /* If the branch of the runtime test is taken - i.e. - the vectorized
29678      version is skipped - this incurs a misprediction cost (because the
29679      vectorized version is expected to be the fall-through).  So we subtract
29680      the latency of a mispredicted branch from the costs that are incured
29681      when the vectorized version is executed.
29682 
29683      TODO: The values in individual target tables have to be tuned or new
29684      fields may be needed. For eg. on K8, the default branch path is the
29685      not-taken path. If the taken path is predicted correctly, the minimum
29686      penalty of going down the taken-path is 1 cycle. If the taken-path is
29687      not predicted correctly, then the minimum penalty is 10 cycles.  */
29688 
29689   if (runtime_test)
29690     {
29691       return (-(ix86_cost->cond_taken_branch_cost));
29692     }
29693   else
29694     return 0;
29695 }
29696 
29697 /* This function returns the calling abi specific va_list type node.
29698    It returns  the FNDECL specific va_list type.  */
29699 
29700 tree
29701 ix86_fn_abi_va_list (tree fndecl)
29702 {
29703   int abi;
29704 
29705   if (!TARGET_64BIT)
29706     return va_list_type_node;
29707   gcc_assert (fndecl != NULL_TREE);
29708   abi = ix86_function_abi ((const_tree) fndecl);
29709 
29710   if (abi == MS_ABI)
29711     return ms_va_list_type_node;
29712   else
29713     return sysv_va_list_type_node;
29714 }
29715 
29716 /* Returns the canonical va_list type specified by TYPE. If there
29717    is no valid TYPE provided, it return NULL_TREE.  */
29718 
29719 tree
29720 ix86_canonical_va_list_type (tree type)
29721 {
29722   tree wtype, htype;
29723 
29724   /* Resolve references and pointers to va_list type.  */
29725   if (INDIRECT_REF_P (type))
29726     type = TREE_TYPE (type);
29727   else if (POINTER_TYPE_P (type) && POINTER_TYPE_P (TREE_TYPE(type)))
29728     type = TREE_TYPE (type);
29729 
29730   if (TARGET_64BIT)
29731     {
29732       wtype = va_list_type_node;
29733           gcc_assert (wtype != NULL_TREE);
29734       htype = type;
29735       if (TREE_CODE (wtype) == ARRAY_TYPE)
29736         {
29737           /* If va_list is an array type, the argument may have decayed
29738              to a pointer type, e.g. by being passed to another function.
29739              In that case, unwrap both types so that we can compare the
29740              underlying records.  */
29741           if (TREE_CODE (htype) == ARRAY_TYPE
29742               || POINTER_TYPE_P (htype))
29743             {
29744               wtype = TREE_TYPE (wtype);
29745               htype = TREE_TYPE (htype);
29746             }
29747         }
29748       if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
29749         return va_list_type_node;
29750       wtype = sysv_va_list_type_node;
29751           gcc_assert (wtype != NULL_TREE);
29752       htype = type;
29753       if (TREE_CODE (wtype) == ARRAY_TYPE)
29754         {
29755           /* If va_list is an array type, the argument may have decayed
29756              to a pointer type, e.g. by being passed to another function.
29757              In that case, unwrap both types so that we can compare the
29758              underlying records.  */
29759           if (TREE_CODE (htype) == ARRAY_TYPE
29760               || POINTER_TYPE_P (htype))
29761             {
29762               wtype = TREE_TYPE (wtype);
29763               htype = TREE_TYPE (htype);
29764             }
29765         }
29766       if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
29767         return sysv_va_list_type_node;
29768       wtype = ms_va_list_type_node;
29769           gcc_assert (wtype != NULL_TREE);
29770       htype = type;
29771       if (TREE_CODE (wtype) == ARRAY_TYPE)
29772         {
29773           /* If va_list is an array type, the argument may have decayed
29774              to a pointer type, e.g. by being passed to another function.
29775              In that case, unwrap both types so that we can compare the
29776              underlying records.  */
29777           if (TREE_CODE (htype) == ARRAY_TYPE
29778               || POINTER_TYPE_P (htype))
29779             {
29780               wtype = TREE_TYPE (wtype);
29781               htype = TREE_TYPE (htype);
29782             }
29783         }
29784       if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
29785         return ms_va_list_type_node;
29786       return NULL_TREE;
29787     }
29788   return std_canonical_va_list_type (type);
29789 }
29790 
29791 /* Iterate through the target-specific builtin types for va_list.
29792     IDX denotes the iterator, *PTREE is set to the result type of
29793     the va_list builtin, and *PNAME to its internal type.
29794     Returns zero if there is no element for this index, otherwise
29795     IDX should be increased upon the next call.
29796     Note, do not iterate a base builtin's name like __builtin_va_list.
29797     Used from c_common_nodes_and_builtins.  */
29798 
29799 int
29800 ix86_enum_va_list (int idx, const char **pname, tree *ptree)
29801 {
29802   if (!TARGET_64BIT)
29803     return 0;
29804   switch (idx) {
29805   case 0:
29806     *ptree = ms_va_list_type_node;
29807     *pname = "__builtin_ms_va_list";
29808     break;
29809   case 1:
29810     *ptree = sysv_va_list_type_node;
29811     *pname = "__builtin_sysv_va_list";
29812     break;
29813   default:
29814     return 0;
29815   }
29816   return 1;
29817 }
29818 
29819 /* Initialize the GCC target structure.  */
29820 #undef TARGET_RETURN_IN_MEMORY
29821 #define TARGET_RETURN_IN_MEMORY ix86_return_in_memory
29822 
29823 #undef TARGET_ATTRIBUTE_TABLE
29824 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
29825 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
29826 #  undef TARGET_MERGE_DECL_ATTRIBUTES
29827 #  define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
29828 #endif
29829 
29830 #undef TARGET_COMP_TYPE_ATTRIBUTES
29831 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
29832 
29833 #undef TARGET_INIT_BUILTINS
29834 #define TARGET_INIT_BUILTINS ix86_init_builtins
29835 #undef TARGET_EXPAND_BUILTIN
29836 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
29837 
29838 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
29839 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
29840   ix86_builtin_vectorized_function
29841 
29842 #undef TARGET_VECTORIZE_BUILTIN_CONVERSION
29843 #define TARGET_VECTORIZE_BUILTIN_CONVERSION ix86_vectorize_builtin_conversion
29844 
29845 #undef TARGET_BUILTIN_RECIPROCAL
29846 #define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal
29847 
29848 #undef TARGET_ASM_FUNCTION_EPILOGUE
29849 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
29850 
29851 #undef TARGET_ENCODE_SECTION_INFO
29852 #ifndef SUBTARGET_ENCODE_SECTION_INFO
29853 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
29854 #else
29855 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
29856 #endif
29857 
29858 #undef TARGET_ASM_OPEN_PAREN
29859 #define TARGET_ASM_OPEN_PAREN ""
29860 #undef TARGET_ASM_CLOSE_PAREN
29861 #define TARGET_ASM_CLOSE_PAREN ""
29862 
29863 #undef TARGET_ASM_ALIGNED_HI_OP
29864 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
29865 #undef TARGET_ASM_ALIGNED_SI_OP
29866 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
29867 #ifdef ASM_QUAD
29868 #undef TARGET_ASM_ALIGNED_DI_OP
29869 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
29870 #endif
29871 
29872 #undef TARGET_ASM_UNALIGNED_HI_OP
29873 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
29874 #undef TARGET_ASM_UNALIGNED_SI_OP
29875 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
29876 #undef TARGET_ASM_UNALIGNED_DI_OP
29877 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
29878 
29879 #undef TARGET_SCHED_ADJUST_COST
29880 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
29881 #undef TARGET_SCHED_ISSUE_RATE
29882 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
29883 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
29884 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
29885   ia32_multipass_dfa_lookahead
29886 
29887 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
29888 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
29889 
29890 #ifdef HAVE_AS_TLS
29891 #undef TARGET_HAVE_TLS
29892 #define TARGET_HAVE_TLS true
29893 #endif
29894 #undef TARGET_CANNOT_FORCE_CONST_MEM
29895 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
29896 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
29897 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
29898 
29899 #undef TARGET_DELEGITIMIZE_ADDRESS
29900 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
29901 
29902 #undef TARGET_MS_BITFIELD_LAYOUT_P
29903 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
29904 
29905 #if TARGET_MACHO
29906 #undef TARGET_BINDS_LOCAL_P
29907 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
29908 #endif
29909 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
29910 #undef TARGET_BINDS_LOCAL_P
29911 #define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p
29912 #endif
29913 
29914 #undef TARGET_ASM_OUTPUT_MI_THUNK
29915 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
29916 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
29917 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
29918 
29919 #undef TARGET_ASM_FILE_START
29920 #define TARGET_ASM_FILE_START x86_file_start
29921 
29922 #undef TARGET_DEFAULT_TARGET_FLAGS
29923 #define TARGET_DEFAULT_TARGET_FLAGS     \
29924   (TARGET_DEFAULT                       \
29925    | TARGET_SUBTARGET_DEFAULT           \
29926    | TARGET_TLS_DIRECT_SEG_REFS_DEFAULT)
29927 
29928 #undef TARGET_HANDLE_OPTION
29929 #define TARGET_HANDLE_OPTION ix86_handle_option
29930 
29931 #undef TARGET_RTX_COSTS
29932 #define TARGET_RTX_COSTS ix86_rtx_costs
29933 #undef TARGET_ADDRESS_COST
29934 #define TARGET_ADDRESS_COST ix86_address_cost
29935 
29936 #undef TARGET_FIXED_CONDITION_CODE_REGS
29937 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
29938 #undef TARGET_CC_MODES_COMPATIBLE
29939 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
29940 
29941 #undef TARGET_MACHINE_DEPENDENT_REORG
29942 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
29943 
29944 #undef TARGET_BUILTIN_SETJMP_FRAME_VALUE
29945 #define TARGET_BUILTIN_SETJMP_FRAME_VALUE ix86_builtin_setjmp_frame_value
29946 
29947 #undef TARGET_BUILD_BUILTIN_VA_LIST
29948 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
29949 
29950 #undef TARGET_FN_ABI_VA_LIST
29951 #define TARGET_FN_ABI_VA_LIST ix86_fn_abi_va_list
29952 
29953 #undef TARGET_CANONICAL_VA_LIST_TYPE
29954 #define TARGET_CANONICAL_VA_LIST_TYPE ix86_canonical_va_list_type
29955 
29956 #undef TARGET_EXPAND_BUILTIN_VA_START
29957 #define TARGET_EXPAND_BUILTIN_VA_START ix86_va_start
29958 
29959 #undef TARGET_MD_ASM_CLOBBERS
29960 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
29961 
29962 #undef TARGET_PROMOTE_PROTOTYPES
29963 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
29964 #undef TARGET_STRUCT_VALUE_RTX
29965 #define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
29966 #undef TARGET_SETUP_INCOMING_VARARGS
29967 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
29968 #undef TARGET_MUST_PASS_IN_STACK
29969 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
29970 #undef TARGET_PASS_BY_REFERENCE
29971 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
29972 #undef TARGET_INTERNAL_ARG_POINTER
29973 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
29974 #undef TARGET_UPDATE_STACK_BOUNDARY
29975 #define TARGET_UPDATE_STACK_BOUNDARY ix86_update_stack_boundary
29976 #undef TARGET_GET_DRAP_RTX
29977 #define TARGET_GET_DRAP_RTX ix86_get_drap_rtx
29978 #undef TARGET_DWARF_HANDLE_FRAME_UNSPEC
29979 #define TARGET_DWARF_HANDLE_FRAME_UNSPEC ix86_dwarf_handle_frame_unspec
29980 #undef TARGET_STRICT_ARGUMENT_NAMING
29981 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
29982 
29983 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
29984 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
29985 
29986 #undef TARGET_SCALAR_MODE_SUPPORTED_P
29987 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
29988 
29989 #undef TARGET_VECTOR_MODE_SUPPORTED_P
29990 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
29991 
29992 #undef TARGET_C_MODE_FOR_SUFFIX
29993 #define TARGET_C_MODE_FOR_SUFFIX ix86_c_mode_for_suffix
29994 
29995 #ifdef HAVE_AS_TLS
29996 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
29997 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
29998 #endif
29999 
30000 #ifdef SUBTARGET_INSERT_ATTRIBUTES
30001 #undef TARGET_INSERT_ATTRIBUTES
30002 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
30003 #endif
30004 
30005 #undef TARGET_MANGLE_TYPE
30006 #define TARGET_MANGLE_TYPE ix86_mangle_type
30007 
30008 #undef TARGET_STACK_PROTECT_FAIL
30009 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
30010 
30011 #undef TARGET_FUNCTION_VALUE
30012 #define TARGET_FUNCTION_VALUE ix86_function_value
30013 
30014 #undef TARGET_SECONDARY_RELOAD
30015 #define TARGET_SECONDARY_RELOAD ix86_secondary_reload
30016 
30017 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
30018 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST x86_builtin_vectorization_cost
30019 
30020 #undef TARGET_SET_CURRENT_FUNCTION
30021 #define TARGET_SET_CURRENT_FUNCTION ix86_set_current_function
30022 
30023 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
30024 #define TARGET_OPTION_VALID_ATTRIBUTE_P ix86_valid_target_attribute_p
30025 
30026 #undef TARGET_OPTION_SAVE
30027 #define TARGET_OPTION_SAVE ix86_function_specific_save
30028 
30029 #undef TARGET_OPTION_RESTORE
30030 #define TARGET_OPTION_RESTORE ix86_function_specific_restore
30031 
30032 #undef TARGET_OPTION_PRINT
30033 #define TARGET_OPTION_PRINT ix86_function_specific_print
30034 
30035 #undef TARGET_OPTION_CAN_INLINE_P
30036 #define TARGET_OPTION_CAN_INLINE_P ix86_can_inline_p
30037 
30038 #undef TARGET_EXPAND_TO_RTL_HOOK
30039 #define TARGET_EXPAND_TO_RTL_HOOK ix86_maybe_switch_abi
30040 
30041 struct gcc_target targetm = TARGET_INITIALIZER;
30042 
30043 #include "gt-i386.h"