1 /* Subroutines used for code generation on IA-32.
   2    Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
   3    2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
   4    Free Software Foundation, Inc.
   5 
   6 This file is part of GCC.
   7 
   8 GCC is free software; you can redistribute it and/or modify
   9 it under the terms of the GNU General Public License as published by
  10 the Free Software Foundation; either version 3, or (at your option)
  11 any later version.
  12 
  13 GCC is distributed in the hope that it will be useful,
  14 but WITHOUT ANY WARRANTY; without even the implied warranty of
  15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  16 GNU General Public License for more details.
  17 
  18 You should have received a copy of the GNU General Public License
  19 along with GCC; see the file COPYING3.  If not see
  20 <http://www.gnu.org/licenses/>.  */
  21 
  22 #include "config.h"
  23 #include "system.h"
  24 #include "coretypes.h"
  25 #include "tm.h"
  26 #include "rtl.h"
  27 #include "tree.h"
  28 #include "tm_p.h"
  29 #include "regs.h"
  30 #include "hard-reg-set.h"
  31 #include "real.h"
  32 #include "insn-config.h"
  33 #include "conditions.h"
  34 #include "output.h"
  35 #include "insn-codes.h"
  36 #include "insn-attr.h"
  37 #include "flags.h"
  38 #include "c-common.h"
  39 #include "except.h"
  40 #include "function.h"
  41 #include "recog.h"
  42 #include "expr.h"
  43 #include "optabs.h"
  44 #include "toplev.h"
  45 #include "basic-block.h"
  46 #include "ggc.h"
  47 #include "target.h"
  48 #include "target-def.h"
  49 #include "langhooks.h"
  50 #include "cgraph.h"
  51 #include "gimple.h"
  52 #include "dwarf2.h"
  53 #include "df.h"
  54 #include "tm-constrs.h"
  55 #include "params.h"
  56 #include "cselib.h"
  57 
  58 static int x86_builtin_vectorization_cost (bool);
  59 static rtx legitimize_dllimport_symbol (rtx, bool);
  60 
  61 #ifndef CHECK_STACK_LIMIT
  62 #define CHECK_STACK_LIMIT (-1)
  63 #endif
  64 
  65 /* Return index of given mode in mult and division cost tables.  */
  66 #define MODE_INDEX(mode)                                        \
  67   ((mode) == QImode ? 0                                         \
  68    : (mode) == HImode ? 1                                       \
  69    : (mode) == SImode ? 2                                       \
  70    : (mode) == DImode ? 3                                       \
  71    : 4)
  72 
  73 /* Processor costs (relative to an add) */
  74 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes.  */
  75 #define COSTS_N_BYTES(N) ((N) * 2)
  76 
  77 #define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall}}}
  78 
  79 const
  80 struct processor_costs ix86_size_cost = {/* costs for tuning for size */
  81   COSTS_N_BYTES (2),                    /* cost of an add instruction */
  82   COSTS_N_BYTES (3),                    /* cost of a lea instruction */
  83   COSTS_N_BYTES (2),                    /* variable shift costs */
  84   COSTS_N_BYTES (3),                    /* constant shift costs */
  85   {COSTS_N_BYTES (3),                   /* cost of starting multiply for QI */
  86    COSTS_N_BYTES (3),                   /*                               HI */
  87    COSTS_N_BYTES (3),                   /*                               SI */
  88    COSTS_N_BYTES (3),                   /*                               DI */
  89    COSTS_N_BYTES (5)},                  /*                            other */
  90   0,                                    /* cost of multiply per each bit set */
  91   {COSTS_N_BYTES (3),                   /* cost of a divide/mod for QI */
  92    COSTS_N_BYTES (3),                   /*                          HI */
  93    COSTS_N_BYTES (3),                   /*                          SI */
  94    COSTS_N_BYTES (3),                   /*                          DI */
  95    COSTS_N_BYTES (5)},                  /*                       other */
  96   COSTS_N_BYTES (3),                    /* cost of movsx */
  97   COSTS_N_BYTES (3),                    /* cost of movzx */
  98   0,                                    /* "large" insn */
  99   2,                                    /* MOVE_RATIO */
 100   2,                                    /* cost for loading QImode using movzbl */
 101   {2, 2, 2},                            /* cost of loading integer registers
 102                                            in QImode, HImode and SImode.
 103                                            Relative to reg-reg move (2).  */
 104   {2, 2, 2},                            /* cost of storing integer registers */
 105   2,                                    /* cost of reg,reg fld/fst */
 106   {2, 2, 2},                            /* cost of loading fp registers
 107                                            in SFmode, DFmode and XFmode */
 108   {2, 2, 2},                            /* cost of storing fp registers
 109                                            in SFmode, DFmode and XFmode */
 110   3,                                    /* cost of moving MMX register */
 111   {3, 3},                               /* cost of loading MMX registers
 112                                            in SImode and DImode */
 113   {3, 3},                               /* cost of storing MMX registers
 114                                            in SImode and DImode */
 115   3,                                    /* cost of moving SSE register */
 116   {3, 3, 3},                            /* cost of loading SSE registers
 117                                            in SImode, DImode and TImode */
 118   {3, 3, 3},                            /* cost of storing SSE registers
 119                                            in SImode, DImode and TImode */
 120   3,                                    /* MMX or SSE register to integer */
 121   0,                                    /* size of l1 cache  */
 122   0,                                    /* size of l2 cache  */
 123   0,                                    /* size of prefetch block */
 124   0,                                    /* number of parallel prefetches */
 125   2,                                    /* Branch cost */
 126   COSTS_N_BYTES (2),                    /* cost of FADD and FSUB insns.  */
 127   COSTS_N_BYTES (2),                    /* cost of FMUL instruction.  */
 128   COSTS_N_BYTES (2),                    /* cost of FDIV instruction.  */
 129   COSTS_N_BYTES (2),                    /* cost of FABS instruction.  */
 130   COSTS_N_BYTES (2),                    /* cost of FCHS instruction.  */
 131   COSTS_N_BYTES (2),                    /* cost of FSQRT instruction.  */
 132   {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
 133    {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}},
 134   {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
 135    {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}},
 136   1,                                    /* scalar_stmt_cost.  */
 137   1,                                    /* scalar load_cost.  */
 138   1,                                    /* scalar_store_cost.  */
 139   1,                                    /* vec_stmt_cost.  */
 140   1,                                    /* vec_to_scalar_cost.  */
 141   1,                                    /* scalar_to_vec_cost.  */
 142   1,                                    /* vec_align_load_cost.  */
 143   1,                                    /* vec_unalign_load_cost.  */
 144   1,                                    /* vec_store_cost.  */
 145   1,                                    /* cond_taken_branch_cost.  */
 146   1,                                    /* cond_not_taken_branch_cost.  */
 147 };
 148 
 149 /* Processor costs (relative to an add) */
 150 static const
 151 struct processor_costs i386_cost = {    /* 386 specific costs */
 152   COSTS_N_INSNS (1),                    /* cost of an add instruction */
 153   COSTS_N_INSNS (1),                    /* cost of a lea instruction */
 154   COSTS_N_INSNS (3),                    /* variable shift costs */
 155   COSTS_N_INSNS (2),                    /* constant shift costs */
 156   {COSTS_N_INSNS (6),                   /* cost of starting multiply for QI */
 157    COSTS_N_INSNS (6),                   /*                               HI */
 158    COSTS_N_INSNS (6),                   /*                               SI */
 159    COSTS_N_INSNS (6),                   /*                               DI */
 160    COSTS_N_INSNS (6)},                  /*                               other */
 161   COSTS_N_INSNS (1),                    /* cost of multiply per each bit set */
 162   {COSTS_N_INSNS (23),                  /* cost of a divide/mod for QI */
 163    COSTS_N_INSNS (23),                  /*                          HI */
 164    COSTS_N_INSNS (23),                  /*                          SI */
 165    COSTS_N_INSNS (23),                  /*                          DI */
 166    COSTS_N_INSNS (23)},                 /*                          other */
 167   COSTS_N_INSNS (3),                    /* cost of movsx */
 168   COSTS_N_INSNS (2),                    /* cost of movzx */
 169   15,                                   /* "large" insn */
 170   3,                                    /* MOVE_RATIO */
 171   4,                                    /* cost for loading QImode using movzbl */
 172   {2, 4, 2},                            /* cost of loading integer registers
 173                                            in QImode, HImode and SImode.
 174                                            Relative to reg-reg move (2).  */
 175   {2, 4, 2},                            /* cost of storing integer registers */
 176   2,                                    /* cost of reg,reg fld/fst */
 177   {8, 8, 8},                            /* cost of loading fp registers
 178                                            in SFmode, DFmode and XFmode */
 179   {8, 8, 8},                            /* cost of storing fp registers
 180                                            in SFmode, DFmode and XFmode */
 181   2,                                    /* cost of moving MMX register */
 182   {4, 8},                               /* cost of loading MMX registers
 183                                            in SImode and DImode */
 184   {4, 8},                               /* cost of storing MMX registers
 185                                            in SImode and DImode */
 186   2,                                    /* cost of moving SSE register */
 187   {4, 8, 16},                           /* cost of loading SSE registers
 188                                            in SImode, DImode and TImode */
 189   {4, 8, 16},                           /* cost of storing SSE registers
 190                                            in SImode, DImode and TImode */
 191   3,                                    /* MMX or SSE register to integer */
 192   0,                                    /* size of l1 cache  */
 193   0,                                    /* size of l2 cache  */
 194   0,                                    /* size of prefetch block */
 195   0,                                    /* number of parallel prefetches */
 196   1,                                    /* Branch cost */
 197   COSTS_N_INSNS (23),                   /* cost of FADD and FSUB insns.  */
 198   COSTS_N_INSNS (27),                   /* cost of FMUL instruction.  */
 199   COSTS_N_INSNS (88),                   /* cost of FDIV instruction.  */
 200   COSTS_N_INSNS (22),                   /* cost of FABS instruction.  */
 201   COSTS_N_INSNS (24),                   /* cost of FCHS instruction.  */
 202   COSTS_N_INSNS (122),                  /* cost of FSQRT instruction.  */
 203   {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
 204    DUMMY_STRINGOP_ALGS},
 205   {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
 206    DUMMY_STRINGOP_ALGS},
 207   1,                                    /* scalar_stmt_cost.  */
 208   1,                                    /* scalar load_cost.  */
 209   1,                                    /* scalar_store_cost.  */
 210   1,                                    /* vec_stmt_cost.  */
 211   1,                                    /* vec_to_scalar_cost.  */
 212   1,                                    /* scalar_to_vec_cost.  */
 213   1,                                    /* vec_align_load_cost.  */
 214   2,                                    /* vec_unalign_load_cost.  */
 215   1,                                    /* vec_store_cost.  */
 216   3,                                    /* cond_taken_branch_cost.  */
 217   1,                                    /* cond_not_taken_branch_cost.  */
 218 };
 219 
 220 static const
 221 struct processor_costs i486_cost = {    /* 486 specific costs */
 222   COSTS_N_INSNS (1),                    /* cost of an add instruction */
 223   COSTS_N_INSNS (1),                    /* cost of a lea instruction */
 224   COSTS_N_INSNS (3),                    /* variable shift costs */
 225   COSTS_N_INSNS (2),                    /* constant shift costs */
 226   {COSTS_N_INSNS (12),                  /* cost of starting multiply for QI */
 227    COSTS_N_INSNS (12),                  /*                               HI */
 228    COSTS_N_INSNS (12),                  /*                               SI */
 229    COSTS_N_INSNS (12),                  /*                               DI */
 230    COSTS_N_INSNS (12)},                 /*                               other */
 231   1,                                    /* cost of multiply per each bit set */
 232   {COSTS_N_INSNS (40),                  /* cost of a divide/mod for QI */
 233    COSTS_N_INSNS (40),                  /*                          HI */
 234    COSTS_N_INSNS (40),                  /*                          SI */
 235    COSTS_N_INSNS (40),                  /*                          DI */
 236    COSTS_N_INSNS (40)},                 /*                          other */
 237   COSTS_N_INSNS (3),                    /* cost of movsx */
 238   COSTS_N_INSNS (2),                    /* cost of movzx */
 239   15,                                   /* "large" insn */
 240   3,                                    /* MOVE_RATIO */
 241   4,                                    /* cost for loading QImode using movzbl */
 242   {2, 4, 2},                            /* cost of loading integer registers
 243                                            in QImode, HImode and SImode.
 244                                            Relative to reg-reg move (2).  */
 245   {2, 4, 2},                            /* cost of storing integer registers */
 246   2,                                    /* cost of reg,reg fld/fst */
 247   {8, 8, 8},                            /* cost of loading fp registers
 248                                            in SFmode, DFmode and XFmode */
 249   {8, 8, 8},                            /* cost of storing fp registers
 250                                            in SFmode, DFmode and XFmode */
 251   2,                                    /* cost of moving MMX register */
 252   {4, 8},                               /* cost of loading MMX registers
 253                                            in SImode and DImode */
 254   {4, 8},                               /* cost of storing MMX registers
 255                                            in SImode and DImode */
 256   2,                                    /* cost of moving SSE register */
 257   {4, 8, 16},                           /* cost of loading SSE registers
 258                                            in SImode, DImode and TImode */
 259   {4, 8, 16},                           /* cost of storing SSE registers
 260                                            in SImode, DImode and TImode */
 261   3,                                    /* MMX or SSE register to integer */
 262   4,                                    /* size of l1 cache.  486 has 8kB cache
 263                                            shared for code and data, so 4kB is
 264                                            not really precise.  */
 265   4,                                    /* size of l2 cache  */
 266   0,                                    /* size of prefetch block */
 267   0,                                    /* number of parallel prefetches */
 268   1,                                    /* Branch cost */
 269   COSTS_N_INSNS (8),                    /* cost of FADD and FSUB insns.  */
 270   COSTS_N_INSNS (16),                   /* cost of FMUL instruction.  */
 271   COSTS_N_INSNS (73),                   /* cost of FDIV instruction.  */
 272   COSTS_N_INSNS (3),                    /* cost of FABS instruction.  */
 273   COSTS_N_INSNS (3),                    /* cost of FCHS instruction.  */
 274   COSTS_N_INSNS (83),                   /* cost of FSQRT instruction.  */
 275   {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
 276    DUMMY_STRINGOP_ALGS},
 277   {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
 278    DUMMY_STRINGOP_ALGS},
 279   1,                                    /* scalar_stmt_cost.  */
 280   1,                                    /* scalar load_cost.  */
 281   1,                                    /* scalar_store_cost.  */
 282   1,                                    /* vec_stmt_cost.  */
 283   1,                                    /* vec_to_scalar_cost.  */
 284   1,                                    /* scalar_to_vec_cost.  */
 285   1,                                    /* vec_align_load_cost.  */
 286   2,                                    /* vec_unalign_load_cost.  */
 287   1,                                    /* vec_store_cost.  */
 288   3,                                    /* cond_taken_branch_cost.  */
 289   1,                                    /* cond_not_taken_branch_cost.  */
 290 };
 291 
 292 static const
 293 struct processor_costs pentium_cost = {
 294   COSTS_N_INSNS (1),                    /* cost of an add instruction */
 295   COSTS_N_INSNS (1),                    /* cost of a lea instruction */
 296   COSTS_N_INSNS (4),                    /* variable shift costs */
 297   COSTS_N_INSNS (1),                    /* constant shift costs */
 298   {COSTS_N_INSNS (11),                  /* cost of starting multiply for QI */
 299    COSTS_N_INSNS (11),                  /*                               HI */
 300    COSTS_N_INSNS (11),                  /*                               SI */
 301    COSTS_N_INSNS (11),                  /*                               DI */
 302    COSTS_N_INSNS (11)},                 /*                               other */
 303   0,                                    /* cost of multiply per each bit set */
 304   {COSTS_N_INSNS (25),                  /* cost of a divide/mod for QI */
 305    COSTS_N_INSNS (25),                  /*                          HI */
 306    COSTS_N_INSNS (25),                  /*                          SI */
 307    COSTS_N_INSNS (25),                  /*                          DI */
 308    COSTS_N_INSNS (25)},                 /*                          other */
 309   COSTS_N_INSNS (3),                    /* cost of movsx */
 310   COSTS_N_INSNS (2),                    /* cost of movzx */
 311   8,                                    /* "large" insn */
 312   6,                                    /* MOVE_RATIO */
 313   6,                                    /* cost for loading QImode using movzbl */
 314   {2, 4, 2},                            /* cost of loading integer registers
 315                                            in QImode, HImode and SImode.
 316                                            Relative to reg-reg move (2).  */
 317   {2, 4, 2},                            /* cost of storing integer registers */
 318   2,                                    /* cost of reg,reg fld/fst */
 319   {2, 2, 6},                            /* cost of loading fp registers
 320                                            in SFmode, DFmode and XFmode */
 321   {4, 4, 6},                            /* cost of storing fp registers
 322                                            in SFmode, DFmode and XFmode */
 323   8,                                    /* cost of moving MMX register */
 324   {8, 8},                               /* cost of loading MMX registers
 325                                            in SImode and DImode */
 326   {8, 8},                               /* cost of storing MMX registers
 327                                            in SImode and DImode */
 328   2,                                    /* cost of moving SSE register */
 329   {4, 8, 16},                           /* cost of loading SSE registers
 330                                            in SImode, DImode and TImode */
 331   {4, 8, 16},                           /* cost of storing SSE registers
 332                                            in SImode, DImode and TImode */
 333   3,                                    /* MMX or SSE register to integer */
 334   8,                                    /* size of l1 cache.  */
 335   8,                                    /* size of l2 cache  */
 336   0,                                    /* size of prefetch block */
 337   0,                                    /* number of parallel prefetches */
 338   2,                                    /* Branch cost */
 339   COSTS_N_INSNS (3),                    /* cost of FADD and FSUB insns.  */
 340   COSTS_N_INSNS (3),                    /* cost of FMUL instruction.  */
 341   COSTS_N_INSNS (39),                   /* cost of FDIV instruction.  */
 342   COSTS_N_INSNS (1),                    /* cost of FABS instruction.  */
 343   COSTS_N_INSNS (1),                    /* cost of FCHS instruction.  */
 344   COSTS_N_INSNS (70),                   /* cost of FSQRT instruction.  */
 345   {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
 346    DUMMY_STRINGOP_ALGS},
 347   {{libcall, {{-1, rep_prefix_4_byte}}},
 348    DUMMY_STRINGOP_ALGS},
 349   1,                                    /* scalar_stmt_cost.  */
 350   1,                                    /* scalar load_cost.  */
 351   1,                                    /* scalar_store_cost.  */
 352   1,                                    /* vec_stmt_cost.  */
 353   1,                                    /* vec_to_scalar_cost.  */
 354   1,                                    /* scalar_to_vec_cost.  */
 355   1,                                    /* vec_align_load_cost.  */
 356   2,                                    /* vec_unalign_load_cost.  */
 357   1,                                    /* vec_store_cost.  */
 358   3,                                    /* cond_taken_branch_cost.  */
 359   1,                                    /* cond_not_taken_branch_cost.  */
 360 };
 361 
 362 static const
 363 struct processor_costs pentiumpro_cost = {
 364   COSTS_N_INSNS (1),                    /* cost of an add instruction */
 365   COSTS_N_INSNS (1),                    /* cost of a lea instruction */
 366   COSTS_N_INSNS (1),                    /* variable shift costs */
 367   COSTS_N_INSNS (1),                    /* constant shift costs */
 368   {COSTS_N_INSNS (4),                   /* cost of starting multiply for QI */
 369    COSTS_N_INSNS (4),                   /*                               HI */
 370    COSTS_N_INSNS (4),                   /*                               SI */
 371    COSTS_N_INSNS (4),                   /*                               DI */
 372    COSTS_N_INSNS (4)},                  /*                               other */
 373   0,                                    /* cost of multiply per each bit set */
 374   {COSTS_N_INSNS (17),                  /* cost of a divide/mod for QI */
 375    COSTS_N_INSNS (17),                  /*                          HI */
 376    COSTS_N_INSNS (17),                  /*                          SI */
 377    COSTS_N_INSNS (17),                  /*                          DI */
 378    COSTS_N_INSNS (17)},                 /*                          other */
 379   COSTS_N_INSNS (1),                    /* cost of movsx */
 380   COSTS_N_INSNS (1),                    /* cost of movzx */
 381   8,                                    /* "large" insn */
 382   6,                                    /* MOVE_RATIO */
 383   2,                                    /* cost for loading QImode using movzbl */
 384   {4, 4, 4},                            /* cost of loading integer registers
 385                                            in QImode, HImode and SImode.
 386                                            Relative to reg-reg move (2).  */
 387   {2, 2, 2},                            /* cost of storing integer registers */
 388   2,                                    /* cost of reg,reg fld/fst */
 389   {2, 2, 6},                            /* cost of loading fp registers
 390                                            in SFmode, DFmode and XFmode */
 391   {4, 4, 6},                            /* cost of storing fp registers
 392                                            in SFmode, DFmode and XFmode */
 393   2,                                    /* cost of moving MMX register */
 394   {2, 2},                               /* cost of loading MMX registers
 395                                            in SImode and DImode */
 396   {2, 2},                               /* cost of storing MMX registers
 397                                            in SImode and DImode */
 398   2,                                    /* cost of moving SSE register */
 399   {2, 2, 8},                            /* cost of loading SSE registers
 400                                            in SImode, DImode and TImode */
 401   {2, 2, 8},                            /* cost of storing SSE registers
 402                                            in SImode, DImode and TImode */
 403   3,                                    /* MMX or SSE register to integer */
 404   8,                                    /* size of l1 cache.  */
 405   256,                                  /* size of l2 cache  */
 406   32,                                   /* size of prefetch block */
 407   6,                                    /* number of parallel prefetches */
 408   2,                                    /* Branch cost */
 409   COSTS_N_INSNS (3),                    /* cost of FADD and FSUB insns.  */
 410   COSTS_N_INSNS (5),                    /* cost of FMUL instruction.  */
 411   COSTS_N_INSNS (56),                   /* cost of FDIV instruction.  */
 412   COSTS_N_INSNS (2),                    /* cost of FABS instruction.  */
 413   COSTS_N_INSNS (2),                    /* cost of FCHS instruction.  */
 414   COSTS_N_INSNS (56),                   /* cost of FSQRT instruction.  */
 415   /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes (we ensure
 416      the alignment).  For small blocks inline loop is still a noticeable win, for bigger
 417      blocks either rep movsl or rep movsb is way to go.  Rep movsb has apparently
 418      more expensive startup time in CPU, but after 4K the difference is down in the noise.
 419    */
 420   {{rep_prefix_4_byte, {{128, loop}, {1024, unrolled_loop},
 421                         {8192, rep_prefix_4_byte}, {-1, rep_prefix_1_byte}}},
 422    DUMMY_STRINGOP_ALGS},
 423   {{rep_prefix_4_byte, {{1024, unrolled_loop},
 424                         {8192, rep_prefix_4_byte}, {-1, libcall}}},
 425    DUMMY_STRINGOP_ALGS},
 426   1,                                    /* scalar_stmt_cost.  */
 427   1,                                    /* scalar load_cost.  */
 428   1,                                    /* scalar_store_cost.  */
 429   1,                                    /* vec_stmt_cost.  */
 430   1,                                    /* vec_to_scalar_cost.  */
 431   1,                                    /* scalar_to_vec_cost.  */
 432   1,                                    /* vec_align_load_cost.  */
 433   2,                                    /* vec_unalign_load_cost.  */
 434   1,                                    /* vec_store_cost.  */
 435   3,                                    /* cond_taken_branch_cost.  */
 436   1,                                    /* cond_not_taken_branch_cost.  */
 437 };
 438 
 439 static const
 440 struct processor_costs geode_cost = {
 441   COSTS_N_INSNS (1),                    /* cost of an add instruction */
 442   COSTS_N_INSNS (1),                    /* cost of a lea instruction */
 443   COSTS_N_INSNS (2),                    /* variable shift costs */
 444   COSTS_N_INSNS (1),                    /* constant shift costs */
 445   {COSTS_N_INSNS (3),                   /* cost of starting multiply for QI */
 446    COSTS_N_INSNS (4),                   /*                               HI */
 447    COSTS_N_INSNS (7),                   /*                               SI */
 448    COSTS_N_INSNS (7),                   /*                               DI */
 449    COSTS_N_INSNS (7)},                  /*                               other */
 450   0,                                    /* cost of multiply per each bit set */
 451   {COSTS_N_INSNS (15),                  /* cost of a divide/mod for QI */
 452    COSTS_N_INSNS (23),                  /*                          HI */
 453    COSTS_N_INSNS (39),                  /*                          SI */
 454    COSTS_N_INSNS (39),                  /*                          DI */
 455    COSTS_N_INSNS (39)},                 /*                          other */
 456   COSTS_N_INSNS (1),                    /* cost of movsx */
 457   COSTS_N_INSNS (1),                    /* cost of movzx */
 458   8,                                    /* "large" insn */
 459   4,                                    /* MOVE_RATIO */
 460   1,                                    /* cost for loading QImode using movzbl */
 461   {1, 1, 1},                            /* cost of loading integer registers
 462                                            in QImode, HImode and SImode.
 463                                            Relative to reg-reg move (2).  */
 464   {1, 1, 1},                            /* cost of storing integer registers */
 465   1,                                    /* cost of reg,reg fld/fst */
 466   {1, 1, 1},                            /* cost of loading fp registers
 467                                            in SFmode, DFmode and XFmode */
 468   {4, 6, 6},                            /* cost of storing fp registers
 469                                            in SFmode, DFmode and XFmode */
 470 
 471   1,                                    /* cost of moving MMX register */
 472   {1, 1},                               /* cost of loading MMX registers
 473                                            in SImode and DImode */
 474   {1, 1},                               /* cost of storing MMX registers
 475                                            in SImode and DImode */
 476   1,                                    /* cost of moving SSE register */
 477   {1, 1, 1},                            /* cost of loading SSE registers
 478                                            in SImode, DImode and TImode */
 479   {1, 1, 1},                            /* cost of storing SSE registers
 480                                            in SImode, DImode and TImode */
 481   1,                                    /* MMX or SSE register to integer */
 482   64,                                   /* size of l1 cache.  */
 483   128,                                  /* size of l2 cache.  */
 484   32,                                   /* size of prefetch block */
 485   1,                                    /* number of parallel prefetches */
 486   1,                                    /* Branch cost */
 487   COSTS_N_INSNS (6),                    /* cost of FADD and FSUB insns.  */
 488   COSTS_N_INSNS (11),                   /* cost of FMUL instruction.  */
 489   COSTS_N_INSNS (47),                   /* cost of FDIV instruction.  */
 490   COSTS_N_INSNS (1),                    /* cost of FABS instruction.  */
 491   COSTS_N_INSNS (1),                    /* cost of FCHS instruction.  */
 492   COSTS_N_INSNS (54),                   /* cost of FSQRT instruction.  */
 493   {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
 494    DUMMY_STRINGOP_ALGS},
 495   {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
 496    DUMMY_STRINGOP_ALGS},
 497   1,                                    /* scalar_stmt_cost.  */
 498   1,                                    /* scalar load_cost.  */
 499   1,                                    /* scalar_store_cost.  */
 500   1,                                    /* vec_stmt_cost.  */
 501   1,                                    /* vec_to_scalar_cost.  */
 502   1,                                    /* scalar_to_vec_cost.  */
 503   1,                                    /* vec_align_load_cost.  */
 504   2,                                    /* vec_unalign_load_cost.  */
 505   1,                                    /* vec_store_cost.  */
 506   3,                                    /* cond_taken_branch_cost.  */
 507   1,                                    /* cond_not_taken_branch_cost.  */
 508 };
 509 
 510 static const
 511 struct processor_costs k6_cost = {
 512   COSTS_N_INSNS (1),                    /* cost of an add instruction */
 513   COSTS_N_INSNS (2),                    /* cost of a lea instruction */
 514   COSTS_N_INSNS (1),                    /* variable shift costs */
 515   COSTS_N_INSNS (1),                    /* constant shift costs */
 516   {COSTS_N_INSNS (3),                   /* cost of starting multiply for QI */
 517    COSTS_N_INSNS (3),                   /*                               HI */
 518    COSTS_N_INSNS (3),                   /*                               SI */
 519    COSTS_N_INSNS (3),                   /*                               DI */
 520    COSTS_N_INSNS (3)},                  /*                               other */
 521   0,                                    /* cost of multiply per each bit set */
 522   {COSTS_N_INSNS (18),                  /* cost of a divide/mod for QI */
 523    COSTS_N_INSNS (18),                  /*                          HI */
 524    COSTS_N_INSNS (18),                  /*                          SI */
 525    COSTS_N_INSNS (18),                  /*                          DI */
 526    COSTS_N_INSNS (18)},                 /*                          other */
 527   COSTS_N_INSNS (2),                    /* cost of movsx */
 528   COSTS_N_INSNS (2),                    /* cost of movzx */
 529   8,                                    /* "large" insn */
 530   4,                                    /* MOVE_RATIO */
 531   3,                                    /* cost for loading QImode using movzbl */
 532   {4, 5, 4},                            /* cost of loading integer registers
 533                                            in QImode, HImode and SImode.
 534                                            Relative to reg-reg move (2).  */
 535   {2, 3, 2},                            /* cost of storing integer registers */
 536   4,                                    /* cost of reg,reg fld/fst */
 537   {6, 6, 6},                            /* cost of loading fp registers
 538                                            in SFmode, DFmode and XFmode */
 539   {4, 4, 4},                            /* cost of storing fp registers
 540                                            in SFmode, DFmode and XFmode */
 541   2,                                    /* cost of moving MMX register */
 542   {2, 2},                               /* cost of loading MMX registers
 543                                            in SImode and DImode */
 544   {2, 2},                               /* cost of storing MMX registers
 545                                            in SImode and DImode */
 546   2,                                    /* cost of moving SSE register */
 547   {2, 2, 8},                            /* cost of loading SSE registers
 548                                            in SImode, DImode and TImode */
 549   {2, 2, 8},                            /* cost of storing SSE registers
 550                                            in SImode, DImode and TImode */
 551   6,                                    /* MMX or SSE register to integer */
 552   32,                                   /* size of l1 cache.  */
 553   32,                                   /* size of l2 cache.  Some models
 554                                            have integrated l2 cache, but
 555                                            optimizing for k6 is not important
 556                                            enough to worry about that.  */
 557   32,                                   /* size of prefetch block */
 558   1,                                    /* number of parallel prefetches */
 559   1,                                    /* Branch cost */
 560   COSTS_N_INSNS (2),                    /* cost of FADD and FSUB insns.  */
 561   COSTS_N_INSNS (2),                    /* cost of FMUL instruction.  */
 562   COSTS_N_INSNS (56),                   /* cost of FDIV instruction.  */
 563   COSTS_N_INSNS (2),                    /* cost of FABS instruction.  */
 564   COSTS_N_INSNS (2),                    /* cost of FCHS instruction.  */
 565   COSTS_N_INSNS (56),                   /* cost of FSQRT instruction.  */
 566   {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
 567    DUMMY_STRINGOP_ALGS},
 568   {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
 569    DUMMY_STRINGOP_ALGS},
 570   1,                                    /* scalar_stmt_cost.  */
 571   1,                                    /* scalar load_cost.  */
 572   1,                                    /* scalar_store_cost.  */
 573   1,                                    /* vec_stmt_cost.  */
 574   1,                                    /* vec_to_scalar_cost.  */
 575   1,                                    /* scalar_to_vec_cost.  */
 576   1,                                    /* vec_align_load_cost.  */
 577   2,                                    /* vec_unalign_load_cost.  */
 578   1,                                    /* vec_store_cost.  */
 579   3,                                    /* cond_taken_branch_cost.  */
 580   1,                                    /* cond_not_taken_branch_cost.  */
 581 };
 582 
 583 static const
 584 struct processor_costs athlon_cost = {
 585   COSTS_N_INSNS (1),                    /* cost of an add instruction */
 586   COSTS_N_INSNS (2),                    /* cost of a lea instruction */
 587   COSTS_N_INSNS (1),                    /* variable shift costs */
 588   COSTS_N_INSNS (1),                    /* constant shift costs */
 589   {COSTS_N_INSNS (5),                   /* cost of starting multiply for QI */
 590    COSTS_N_INSNS (5),                   /*                               HI */
 591    COSTS_N_INSNS (5),                   /*                               SI */
 592    COSTS_N_INSNS (5),                   /*                               DI */
 593    COSTS_N_INSNS (5)},                  /*                               other */
 594   0,                                    /* cost of multiply per each bit set */
 595   {COSTS_N_INSNS (18),                  /* cost of a divide/mod for QI */
 596    COSTS_N_INSNS (26),                  /*                          HI */
 597    COSTS_N_INSNS (42),                  /*                          SI */
 598    COSTS_N_INSNS (74),                  /*                          DI */
 599    COSTS_N_INSNS (74)},                 /*                          other */
 600   COSTS_N_INSNS (1),                    /* cost of movsx */
 601   COSTS_N_INSNS (1),                    /* cost of movzx */
 602   8,                                    /* "large" insn */
 603   9,                                    /* MOVE_RATIO */
 604   4,                                    /* cost for loading QImode using movzbl */
 605   {3, 4, 3},                            /* cost of loading integer registers
 606                                            in QImode, HImode and SImode.
 607                                            Relative to reg-reg move (2).  */
 608   {3, 4, 3},                            /* cost of storing integer registers */
 609   4,                                    /* cost of reg,reg fld/fst */
 610   {4, 4, 12},                           /* cost of loading fp registers
 611                                            in SFmode, DFmode and XFmode */
 612   {6, 6, 8},                            /* cost of storing fp registers
 613                                            in SFmode, DFmode and XFmode */
 614   2,                                    /* cost of moving MMX register */
 615   {4, 4},                               /* cost of loading MMX registers
 616                                            in SImode and DImode */
 617   {4, 4},                               /* cost of storing MMX registers
 618                                            in SImode and DImode */
 619   2,                                    /* cost of moving SSE register */
 620   {4, 4, 6},                            /* cost of loading SSE registers
 621                                            in SImode, DImode and TImode */
 622   {4, 4, 5},                            /* cost of storing SSE registers
 623                                            in SImode, DImode and TImode */
 624   5,                                    /* MMX or SSE register to integer */
 625   64,                                   /* size of l1 cache.  */
 626   256,                                  /* size of l2 cache.  */
 627   64,                                   /* size of prefetch block */
 628   6,                                    /* number of parallel prefetches */
 629   5,                                    /* Branch cost */
 630   COSTS_N_INSNS (4),                    /* cost of FADD and FSUB insns.  */
 631   COSTS_N_INSNS (4),                    /* cost of FMUL instruction.  */
 632   COSTS_N_INSNS (24),                   /* cost of FDIV instruction.  */
 633   COSTS_N_INSNS (2),                    /* cost of FABS instruction.  */
 634   COSTS_N_INSNS (2),                    /* cost of FCHS instruction.  */
 635   COSTS_N_INSNS (35),                   /* cost of FSQRT instruction.  */
 636   /* For some reason, Athlon deals better with REP prefix (relative to loops)
 637      compared to K8. Alignment becomes important after 8 bytes for memcpy and
 638      128 bytes for memset.  */
 639   {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
 640    DUMMY_STRINGOP_ALGS},
 641   {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
 642    DUMMY_STRINGOP_ALGS},
 643   1,                                    /* scalar_stmt_cost.  */
 644   1,                                    /* scalar load_cost.  */
 645   1,                                    /* scalar_store_cost.  */
 646   1,                                    /* vec_stmt_cost.  */
 647   1,                                    /* vec_to_scalar_cost.  */
 648   1,                                    /* scalar_to_vec_cost.  */
 649   1,                                    /* vec_align_load_cost.  */
 650   2,                                    /* vec_unalign_load_cost.  */
 651   1,                                    /* vec_store_cost.  */
 652   3,                                    /* cond_taken_branch_cost.  */
 653   1,                                    /* cond_not_taken_branch_cost.  */
 654 };
 655 
 656 static const
 657 struct processor_costs k8_cost = {
 658   COSTS_N_INSNS (1),                    /* cost of an add instruction */
 659   COSTS_N_INSNS (2),                    /* cost of a lea instruction */
 660   COSTS_N_INSNS (1),                    /* variable shift costs */
 661   COSTS_N_INSNS (1),                    /* constant shift costs */
 662   {COSTS_N_INSNS (3),                   /* cost of starting multiply for QI */
 663    COSTS_N_INSNS (4),                   /*                               HI */
 664    COSTS_N_INSNS (3),                   /*                               SI */
 665    COSTS_N_INSNS (4),                   /*                               DI */
 666    COSTS_N_INSNS (5)},                  /*                               other */
 667   0,                                    /* cost of multiply per each bit set */
 668   {COSTS_N_INSNS (18),                  /* cost of a divide/mod for QI */
 669    COSTS_N_INSNS (26),                  /*                          HI */
 670    COSTS_N_INSNS (42),                  /*                          SI */
 671    COSTS_N_INSNS (74),                  /*                          DI */
 672    COSTS_N_INSNS (74)},                 /*                          other */
 673   COSTS_N_INSNS (1),                    /* cost of movsx */
 674   COSTS_N_INSNS (1),                    /* cost of movzx */
 675   8,                                    /* "large" insn */
 676   9,                                    /* MOVE_RATIO */
 677   4,                                    /* cost for loading QImode using movzbl */
 678   {3, 4, 3},                            /* cost of loading integer registers
 679                                            in QImode, HImode and SImode.
 680                                            Relative to reg-reg move (2).  */
 681   {3, 4, 3},                            /* cost of storing integer registers */
 682   4,                                    /* cost of reg,reg fld/fst */
 683   {4, 4, 12},                           /* cost of loading fp registers
 684                                            in SFmode, DFmode and XFmode */
 685   {6, 6, 8},                            /* cost of storing fp registers
 686                                            in SFmode, DFmode and XFmode */
 687   2,                                    /* cost of moving MMX register */
 688   {3, 3},                               /* cost of loading MMX registers
 689                                            in SImode and DImode */
 690   {4, 4},                               /* cost of storing MMX registers
 691                                            in SImode and DImode */
 692   2,                                    /* cost of moving SSE register */
 693   {4, 3, 6},                            /* cost of loading SSE registers
 694                                            in SImode, DImode and TImode */
 695   {4, 4, 5},                            /* cost of storing SSE registers
 696                                            in SImode, DImode and TImode */
 697   5,                                    /* MMX or SSE register to integer */
 698   64,                                   /* size of l1 cache.  */
 699   512,                                  /* size of l2 cache.  */
 700   64,                                   /* size of prefetch block */
 701   /* New AMD processors never drop prefetches; if they cannot be performed
 702      immediately, they are queued.  We set number of simultaneous prefetches
 703      to a large constant to reflect this (it probably is not a good idea not
 704      to limit number of prefetches at all, as their execution also takes some
 705      time).  */
 706   100,                                  /* number of parallel prefetches */
 707   3,                                    /* Branch cost */
 708   COSTS_N_INSNS (4),                    /* cost of FADD and FSUB insns.  */
 709   COSTS_N_INSNS (4),                    /* cost of FMUL instruction.  */
 710   COSTS_N_INSNS (19),                   /* cost of FDIV instruction.  */
 711   COSTS_N_INSNS (2),                    /* cost of FABS instruction.  */
 712   COSTS_N_INSNS (2),                    /* cost of FCHS instruction.  */
 713   COSTS_N_INSNS (35),                   /* cost of FSQRT instruction.  */
 714   /* K8 has optimized REP instruction for medium sized blocks, but for very small
 715      blocks it is better to use loop. For large blocks, libcall can do
 716      nontemporary accesses and beat inline considerably.  */
 717   {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
 718    {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
 719   {{libcall, {{8, loop}, {24, unrolled_loop},
 720               {2048, rep_prefix_4_byte}, {-1, libcall}}},
 721    {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
 722   4,                                    /* scalar_stmt_cost.  */
 723   2,                                    /* scalar load_cost.  */
 724   2,                                    /* scalar_store_cost.  */
 725   5,                                    /* vec_stmt_cost.  */
 726   0,                                    /* vec_to_scalar_cost.  */
 727   2,                                    /* scalar_to_vec_cost.  */
 728   2,                                    /* vec_align_load_cost.  */
 729   3,                                    /* vec_unalign_load_cost.  */
 730   3,                                    /* vec_store_cost.  */
 731   3,                                    /* cond_taken_branch_cost.  */
 732   2,                                    /* cond_not_taken_branch_cost.  */
 733 };
 734 
 735 struct processor_costs amdfam10_cost = {
 736   COSTS_N_INSNS (1),                    /* cost of an add instruction */
 737   COSTS_N_INSNS (2),                    /* cost of a lea instruction */
 738   COSTS_N_INSNS (1),                    /* variable shift costs */
 739   COSTS_N_INSNS (1),                    /* constant shift costs */
 740   {COSTS_N_INSNS (3),                   /* cost of starting multiply for QI */
 741    COSTS_N_INSNS (4),                   /*                               HI */
 742    COSTS_N_INSNS (3),                   /*                               SI */
 743    COSTS_N_INSNS (4),                   /*                               DI */
 744    COSTS_N_INSNS (5)},                  /*                               other */
 745   0,                                    /* cost of multiply per each bit set */
 746   {COSTS_N_INSNS (19),                  /* cost of a divide/mod for QI */
 747    COSTS_N_INSNS (35),                  /*                          HI */
 748    COSTS_N_INSNS (51),                  /*                          SI */
 749    COSTS_N_INSNS (83),                  /*                          DI */
 750    COSTS_N_INSNS (83)},                 /*                          other */
 751   COSTS_N_INSNS (1),                    /* cost of movsx */
 752   COSTS_N_INSNS (1),                    /* cost of movzx */
 753   8,                                    /* "large" insn */
 754   9,                                    /* MOVE_RATIO */
 755   4,                                    /* cost for loading QImode using movzbl */
 756   {3, 4, 3},                            /* cost of loading integer registers
 757                                            in QImode, HImode and SImode.
 758                                            Relative to reg-reg move (2).  */
 759   {3, 4, 3},                            /* cost of storing integer registers */
 760   4,                                    /* cost of reg,reg fld/fst */
 761   {4, 4, 12},                           /* cost of loading fp registers
 762                                            in SFmode, DFmode and XFmode */
 763   {6, 6, 8},                            /* cost of storing fp registers
 764                                            in SFmode, DFmode and XFmode */
 765   2,                                    /* cost of moving MMX register */
 766   {3, 3},                               /* cost of loading MMX registers
 767                                            in SImode and DImode */
 768   {4, 4},                               /* cost of storing MMX registers
 769                                            in SImode and DImode */
 770   2,                                    /* cost of moving SSE register */
 771   {4, 4, 3},                            /* cost of loading SSE registers
 772                                            in SImode, DImode and TImode */
 773   {4, 4, 5},                            /* cost of storing SSE registers
 774                                            in SImode, DImode and TImode */
 775   3,                                    /* MMX or SSE register to integer */
 776                                         /* On K8
 777                                             MOVD reg64, xmmreg  Double  FSTORE 4
 778                                             MOVD reg32, xmmreg  Double  FSTORE 4
 779                                            On AMDFAM10
 780                                             MOVD reg64, xmmreg  Double  FADD 3
 781                                                                 1/1  1/1
 782                                             MOVD reg32, xmmreg  Double  FADD 3
 783                                                                 1/1  1/1 */
 784   64,                                   /* size of l1 cache.  */
 785   512,                                  /* size of l2 cache.  */
 786   64,                                   /* size of prefetch block */
 787   /* New AMD processors never drop prefetches; if they cannot be performed
 788      immediately, they are queued.  We set number of simultaneous prefetches
 789      to a large constant to reflect this (it probably is not a good idea not
 790      to limit number of prefetches at all, as their execution also takes some
 791      time).  */
 792   100,                                  /* number of parallel prefetches */
 793   2,                                    /* Branch cost */
 794   COSTS_N_INSNS (4),                    /* cost of FADD and FSUB insns.  */
 795   COSTS_N_INSNS (4),                    /* cost of FMUL instruction.  */
 796   COSTS_N_INSNS (19),                   /* cost of FDIV instruction.  */
 797   COSTS_N_INSNS (2),                    /* cost of FABS instruction.  */
 798   COSTS_N_INSNS (2),                    /* cost of FCHS instruction.  */
 799   COSTS_N_INSNS (35),                   /* cost of FSQRT instruction.  */
 800 
 801   /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
 802      very small blocks it is better to use loop. For large blocks, libcall can
 803      do nontemporary accesses and beat inline considerably.  */
 804   {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
 805    {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
 806   {{libcall, {{8, loop}, {24, unrolled_loop},
 807               {2048, rep_prefix_4_byte}, {-1, libcall}}},
 808    {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
 809   4,                                    /* scalar_stmt_cost.  */
 810   2,                                    /* scalar load_cost.  */
 811   2,                                    /* scalar_store_cost.  */
 812   6,                                    /* vec_stmt_cost.  */
 813   0,                                    /* vec_to_scalar_cost.  */
 814   2,                                    /* scalar_to_vec_cost.  */
 815   2,                                    /* vec_align_load_cost.  */
 816   2,                                    /* vec_unalign_load_cost.  */
 817   2,                                    /* vec_store_cost.  */
 818   2,                                    /* cond_taken_branch_cost.  */
 819   1,                                    /* cond_not_taken_branch_cost.  */
 820 };
 821 
 822 static const
 823 struct processor_costs pentium4_cost = {
 824   COSTS_N_INSNS (1),                    /* cost of an add instruction */
 825   COSTS_N_INSNS (3),                    /* cost of a lea instruction */
 826   COSTS_N_INSNS (4),                    /* variable shift costs */
 827   COSTS_N_INSNS (4),                    /* constant shift costs */
 828   {COSTS_N_INSNS (15),                  /* cost of starting multiply for QI */
 829    COSTS_N_INSNS (15),                  /*                               HI */
 830    COSTS_N_INSNS (15),                  /*                               SI */
 831    COSTS_N_INSNS (15),                  /*                               DI */
 832    COSTS_N_INSNS (15)},                 /*                               other */
 833   0,                                    /* cost of multiply per each bit set */
 834   {COSTS_N_INSNS (56),                  /* cost of a divide/mod for QI */
 835    COSTS_N_INSNS (56),                  /*                          HI */
 836    COSTS_N_INSNS (56),                  /*                          SI */
 837    COSTS_N_INSNS (56),                  /*                          DI */
 838    COSTS_N_INSNS (56)},                 /*                          other */
 839   COSTS_N_INSNS (1),                    /* cost of movsx */
 840   COSTS_N_INSNS (1),                    /* cost of movzx */
 841   16,                                   /* "large" insn */
 842   6,                                    /* MOVE_RATIO */
 843   2,                                    /* cost for loading QImode using movzbl */
 844   {4, 5, 4},                            /* cost of loading integer registers
 845                                            in QImode, HImode and SImode.
 846                                            Relative to reg-reg move (2).  */
 847   {2, 3, 2},                            /* cost of storing integer registers */
 848   2,                                    /* cost of reg,reg fld/fst */
 849   {2, 2, 6},                            /* cost of loading fp registers
 850                                            in SFmode, DFmode and XFmode */
 851   {4, 4, 6},                            /* cost of storing fp registers
 852                                            in SFmode, DFmode and XFmode */
 853   2,                                    /* cost of moving MMX register */
 854   {2, 2},                               /* cost of loading MMX registers
 855                                            in SImode and DImode */
 856   {2, 2},                               /* cost of storing MMX registers
 857                                            in SImode and DImode */
 858   12,                                   /* cost of moving SSE register */
 859   {12, 12, 12},                         /* cost of loading SSE registers
 860                                            in SImode, DImode and TImode */
 861   {2, 2, 8},                            /* cost of storing SSE registers
 862                                            in SImode, DImode and TImode */
 863   10,                                   /* MMX or SSE register to integer */
 864   8,                                    /* size of l1 cache.  */
 865   256,                                  /* size of l2 cache.  */
 866   64,                                   /* size of prefetch block */
 867   6,                                    /* number of parallel prefetches */
 868   2,                                    /* Branch cost */
 869   COSTS_N_INSNS (5),                    /* cost of FADD and FSUB insns.  */
 870   COSTS_N_INSNS (7),                    /* cost of FMUL instruction.  */
 871   COSTS_N_INSNS (43),                   /* cost of FDIV instruction.  */
 872   COSTS_N_INSNS (2),                    /* cost of FABS instruction.  */
 873   COSTS_N_INSNS (2),                    /* cost of FCHS instruction.  */
 874   COSTS_N_INSNS (43),                   /* cost of FSQRT instruction.  */
 875   {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
 876    DUMMY_STRINGOP_ALGS},
 877   {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
 878    {-1, libcall}}},
 879    DUMMY_STRINGOP_ALGS},
 880   1,                                    /* scalar_stmt_cost.  */
 881   1,                                    /* scalar load_cost.  */
 882   1,                                    /* scalar_store_cost.  */
 883   1,                                    /* vec_stmt_cost.  */
 884   1,                                    /* vec_to_scalar_cost.  */
 885   1,                                    /* scalar_to_vec_cost.  */
 886   1,                                    /* vec_align_load_cost.  */
 887   2,                                    /* vec_unalign_load_cost.  */
 888   1,                                    /* vec_store_cost.  */
 889   3,                                    /* cond_taken_branch_cost.  */
 890   1,                                    /* cond_not_taken_branch_cost.  */
 891 };
 892 
 893 static const
 894 struct processor_costs nocona_cost = {
 895   COSTS_N_INSNS (1),                    /* cost of an add instruction */
 896   COSTS_N_INSNS (1),                    /* cost of a lea instruction */
 897   COSTS_N_INSNS (1),                    /* variable shift costs */
 898   COSTS_N_INSNS (1),                    /* constant shift costs */
 899   {COSTS_N_INSNS (10),                  /* cost of starting multiply for QI */
 900    COSTS_N_INSNS (10),                  /*                               HI */
 901    COSTS_N_INSNS (10),                  /*                               SI */
 902    COSTS_N_INSNS (10),                  /*                               DI */
 903    COSTS_N_INSNS (10)},                 /*                               other */
 904   0,                                    /* cost of multiply per each bit set */
 905   {COSTS_N_INSNS (66),                  /* cost of a divide/mod for QI */
 906    COSTS_N_INSNS (66),                  /*                          HI */
 907    COSTS_N_INSNS (66),                  /*                          SI */
 908    COSTS_N_INSNS (66),                  /*                          DI */
 909    COSTS_N_INSNS (66)},                 /*                          other */
 910   COSTS_N_INSNS (1),                    /* cost of movsx */
 911   COSTS_N_INSNS (1),                    /* cost of movzx */
 912   16,                                   /* "large" insn */
 913   17,                                   /* MOVE_RATIO */
 914   4,                                    /* cost for loading QImode using movzbl */
 915   {4, 4, 4},                            /* cost of loading integer registers
 916                                            in QImode, HImode and SImode.
 917                                            Relative to reg-reg move (2).  */
 918   {4, 4, 4},                            /* cost of storing integer registers */
 919   3,                                    /* cost of reg,reg fld/fst */
 920   {12, 12, 12},                         /* cost of loading fp registers
 921                                            in SFmode, DFmode and XFmode */
 922   {4, 4, 4},                            /* cost of storing fp registers
 923                                            in SFmode, DFmode and XFmode */
 924   6,                                    /* cost of moving MMX register */
 925   {12, 12},                             /* cost of loading MMX registers
 926                                            in SImode and DImode */
 927   {12, 12},                             /* cost of storing MMX registers
 928                                            in SImode and DImode */
 929   6,                                    /* cost of moving SSE register */
 930   {12, 12, 12},                         /* cost of loading SSE registers
 931                                            in SImode, DImode and TImode */
 932   {12, 12, 12},                         /* cost of storing SSE registers
 933                                            in SImode, DImode and TImode */
 934   8,                                    /* MMX or SSE register to integer */
 935   8,                                    /* size of l1 cache.  */
 936   1024,                                 /* size of l2 cache.  */
 937   128,                                  /* size of prefetch block */
 938   8,                                    /* number of parallel prefetches */
 939   1,                                    /* Branch cost */
 940   COSTS_N_INSNS (6),                    /* cost of FADD and FSUB insns.  */
 941   COSTS_N_INSNS (8),                    /* cost of FMUL instruction.  */
 942   COSTS_N_INSNS (40),                   /* cost of FDIV instruction.  */
 943   COSTS_N_INSNS (3),                    /* cost of FABS instruction.  */
 944   COSTS_N_INSNS (3),                    /* cost of FCHS instruction.  */
 945   COSTS_N_INSNS (44),                   /* cost of FSQRT instruction.  */
 946   {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
 947    {libcall, {{32, loop}, {20000, rep_prefix_8_byte},
 948               {100000, unrolled_loop}, {-1, libcall}}}},
 949   {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
 950    {-1, libcall}}},
 951    {libcall, {{24, loop}, {64, unrolled_loop},
 952               {8192, rep_prefix_8_byte}, {-1, libcall}}}},
 953   1,                                    /* scalar_stmt_cost.  */
 954   1,                                    /* scalar load_cost.  */
 955   1,                                    /* scalar_store_cost.  */
 956   1,                                    /* vec_stmt_cost.  */
 957   1,                                    /* vec_to_scalar_cost.  */
 958   1,                                    /* scalar_to_vec_cost.  */
 959   1,                                    /* vec_align_load_cost.  */
 960   2,                                    /* vec_unalign_load_cost.  */
 961   1,                                    /* vec_store_cost.  */
 962   3,                                    /* cond_taken_branch_cost.  */
 963   1,                                    /* cond_not_taken_branch_cost.  */
 964 };
 965 
 966 static const
 967 struct processor_costs core2_cost = {
 968   COSTS_N_INSNS (1),                    /* cost of an add instruction */
 969   COSTS_N_INSNS (1) + 1,                /* cost of a lea instruction */
 970   COSTS_N_INSNS (1),                    /* variable shift costs */
 971   COSTS_N_INSNS (1),                    /* constant shift costs */
 972   {COSTS_N_INSNS (3),                   /* cost of starting multiply for QI */
 973    COSTS_N_INSNS (3),                   /*                               HI */
 974    COSTS_N_INSNS (3),                   /*                               SI */
 975    COSTS_N_INSNS (3),                   /*                               DI */
 976    COSTS_N_INSNS (3)},                  /*                               other */
 977   0,                                    /* cost of multiply per each bit set */
 978   {COSTS_N_INSNS (22),                  /* cost of a divide/mod for QI */
 979    COSTS_N_INSNS (22),                  /*                          HI */
 980    COSTS_N_INSNS (22),                  /*                          SI */
 981    COSTS_N_INSNS (22),                  /*                          DI */
 982    COSTS_N_INSNS (22)},                 /*                          other */
 983   COSTS_N_INSNS (1),                    /* cost of movsx */
 984   COSTS_N_INSNS (1),                    /* cost of movzx */
 985   8,                                    /* "large" insn */
 986   16,                                   /* MOVE_RATIO */
 987   2,                                    /* cost for loading QImode using movzbl */
 988   {6, 6, 6},                            /* cost of loading integer registers
 989                                            in QImode, HImode and SImode.
 990                                            Relative to reg-reg move (2).  */
 991   {4, 4, 4},                            /* cost of storing integer registers */
 992   2,                                    /* cost of reg,reg fld/fst */
 993   {6, 6, 6},                            /* cost of loading fp registers
 994                                            in SFmode, DFmode and XFmode */
 995   {4, 4, 4},                            /* cost of storing fp registers
 996                                            in SFmode, DFmode and XFmode */
 997   2,                                    /* cost of moving MMX register */
 998   {6, 6},                               /* cost of loading MMX registers
 999                                            in SImode and DImode */
1000   {4, 4},                               /* cost of storing MMX registers
1001                                            in SImode and DImode */
1002   2,                                    /* cost of moving SSE register */
1003   {6, 6, 6},                            /* cost of loading SSE registers
1004                                            in SImode, DImode and TImode */
1005   {4, 4, 4},                            /* cost of storing SSE registers
1006                                            in SImode, DImode and TImode */
1007   2,                                    /* MMX or SSE register to integer */
1008   32,                                   /* size of l1 cache.  */
1009   2048,                                 /* size of l2 cache.  */
1010   128,                                  /* size of prefetch block */
1011   8,                                    /* number of parallel prefetches */
1012   3,                                    /* Branch cost */
1013   COSTS_N_INSNS (3),                    /* cost of FADD and FSUB insns.  */
1014   COSTS_N_INSNS (5),                    /* cost of FMUL instruction.  */
1015   COSTS_N_INSNS (32),                   /* cost of FDIV instruction.  */
1016   COSTS_N_INSNS (1),                    /* cost of FABS instruction.  */
1017   COSTS_N_INSNS (1),                    /* cost of FCHS instruction.  */
1018   COSTS_N_INSNS (58),                   /* cost of FSQRT instruction.  */
1019   {{libcall, {{11, loop}, {-1, rep_prefix_4_byte}}},
1020    {libcall, {{32, loop}, {64, rep_prefix_4_byte},
1021               {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1022   {{libcall, {{8, loop}, {15, unrolled_loop},
1023               {2048, rep_prefix_4_byte}, {-1, libcall}}},
1024    {libcall, {{24, loop}, {32, unrolled_loop},
1025               {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1026   1,                                    /* scalar_stmt_cost.  */
1027   1,                                    /* scalar load_cost.  */
1028   1,                                    /* scalar_store_cost.  */
1029   1,                                    /* vec_stmt_cost.  */
1030   1,                                    /* vec_to_scalar_cost.  */
1031   1,                                    /* scalar_to_vec_cost.  */
1032   1,                                    /* vec_align_load_cost.  */
1033   2,                                    /* vec_unalign_load_cost.  */
1034   1,                                    /* vec_store_cost.  */
1035   3,                                    /* cond_taken_branch_cost.  */
1036   1,                                    /* cond_not_taken_branch_cost.  */
1037 };
1038 
1039 /* Generic64 should produce code tuned for Nocona and K8.  */
1040 static const
1041 struct processor_costs generic64_cost = {
1042   COSTS_N_INSNS (1),                    /* cost of an add instruction */
1043   /* On all chips taken into consideration lea is 2 cycles and more.  With
1044      this cost however our current implementation of synth_mult results in
1045      use of unnecessary temporary registers causing regression on several
1046      SPECfp benchmarks.  */
1047   COSTS_N_INSNS (1) + 1,                /* cost of a lea instruction */
1048   COSTS_N_INSNS (1),                    /* variable shift costs */
1049   COSTS_N_INSNS (1),                    /* constant shift costs */
1050   {COSTS_N_INSNS (3),                   /* cost of starting multiply for QI */
1051    COSTS_N_INSNS (4),                   /*                               HI */
1052    COSTS_N_INSNS (3),                   /*                               SI */
1053    COSTS_N_INSNS (4),                   /*                               DI */
1054    COSTS_N_INSNS (2)},                  /*                               other */
1055   0,                                    /* cost of multiply per each bit set */
1056   {COSTS_N_INSNS (18),                  /* cost of a divide/mod for QI */
1057    COSTS_N_INSNS (26),                  /*                          HI */
1058    COSTS_N_INSNS (42),                  /*                          SI */
1059    COSTS_N_INSNS (74),                  /*                          DI */
1060    COSTS_N_INSNS (74)},                 /*                          other */
1061   COSTS_N_INSNS (1),                    /* cost of movsx */
1062   COSTS_N_INSNS (1),                    /* cost of movzx */
1063   8,                                    /* "large" insn */
1064   17,                                   /* MOVE_RATIO */
1065   4,                                    /* cost for loading QImode using movzbl */
1066   {4, 4, 4},                            /* cost of loading integer registers
1067                                            in QImode, HImode and SImode.
1068                                            Relative to reg-reg move (2).  */
1069   {4, 4, 4},                            /* cost of storing integer registers */
1070   4,                                    /* cost of reg,reg fld/fst */
1071   {12, 12, 12},                         /* cost of loading fp registers
1072                                            in SFmode, DFmode and XFmode */
1073   {6, 6, 8},                            /* cost of storing fp registers
1074                                            in SFmode, DFmode and XFmode */
1075   2,                                    /* cost of moving MMX register */
1076   {8, 8},                               /* cost of loading MMX registers
1077                                            in SImode and DImode */
1078   {8, 8},                               /* cost of storing MMX registers
1079                                            in SImode and DImode */
1080   2,                                    /* cost of moving SSE register */
1081   {8, 8, 8},                            /* cost of loading SSE registers
1082                                            in SImode, DImode and TImode */
1083   {8, 8, 8},                            /* cost of storing SSE registers
1084                                            in SImode, DImode and TImode */
1085   5,                                    /* MMX or SSE register to integer */
1086   32,                                   /* size of l1 cache.  */
1087   512,                                  /* size of l2 cache.  */
1088   64,                                   /* size of prefetch block */
1089   6,                                    /* number of parallel prefetches */
1090   /* Benchmarks shows large regressions on K8 sixtrack benchmark when this value
1091      is increased to perhaps more appropriate value of 5.  */
1092   3,                                    /* Branch cost */
1093   COSTS_N_INSNS (8),                    /* cost of FADD and FSUB insns.  */
1094   COSTS_N_INSNS (8),                    /* cost of FMUL instruction.  */
1095   COSTS_N_INSNS (20),                   /* cost of FDIV instruction.  */
1096   COSTS_N_INSNS (8),                    /* cost of FABS instruction.  */
1097   COSTS_N_INSNS (8),                    /* cost of FCHS instruction.  */
1098   COSTS_N_INSNS (40),                   /* cost of FSQRT instruction.  */
1099   {DUMMY_STRINGOP_ALGS,
1100    {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1101   {DUMMY_STRINGOP_ALGS,
1102    {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1103   1,                                    /* scalar_stmt_cost.  */
1104   1,                                    /* scalar load_cost.  */
1105   1,                                    /* scalar_store_cost.  */
1106   1,                                    /* vec_stmt_cost.  */
1107   1,                                    /* vec_to_scalar_cost.  */
1108   1,                                    /* scalar_to_vec_cost.  */
1109   1,                                    /* vec_align_load_cost.  */
1110   2,                                    /* vec_unalign_load_cost.  */
1111   1,                                    /* vec_store_cost.  */
1112   3,                                    /* cond_taken_branch_cost.  */
1113   1,                                    /* cond_not_taken_branch_cost.  */
1114 };
1115 
1116 /* Generic32 should produce code tuned for Athlon, PPro, Pentium4, Nocona and K8.  */
1117 static const
1118 struct processor_costs generic32_cost = {
1119   COSTS_N_INSNS (1),                    /* cost of an add instruction */
1120   COSTS_N_INSNS (1) + 1,                /* cost of a lea instruction */
1121   COSTS_N_INSNS (1),                    /* variable shift costs */
1122   COSTS_N_INSNS (1),                    /* constant shift costs */
1123   {COSTS_N_INSNS (3),                   /* cost of starting multiply for QI */
1124    COSTS_N_INSNS (4),                   /*                               HI */
1125    COSTS_N_INSNS (3),                   /*                               SI */
1126    COSTS_N_INSNS (4),                   /*                               DI */
1127    COSTS_N_INSNS (2)},                  /*                               other */
1128   0,                                    /* cost of multiply per each bit set */
1129   {COSTS_N_INSNS (18),                  /* cost of a divide/mod for QI */
1130    COSTS_N_INSNS (26),                  /*                          HI */
1131    COSTS_N_INSNS (42),                  /*                          SI */
1132    COSTS_N_INSNS (74),                  /*                          DI */
1133    COSTS_N_INSNS (74)},                 /*                          other */
1134   COSTS_N_INSNS (1),                    /* cost of movsx */
1135   COSTS_N_INSNS (1),                    /* cost of movzx */
1136   8,                                    /* "large" insn */
1137   17,                                   /* MOVE_RATIO */
1138   4,                                    /* cost for loading QImode using movzbl */
1139   {4, 4, 4},                            /* cost of loading integer registers
1140                                            in QImode, HImode and SImode.
1141                                            Relative to reg-reg move (2).  */
1142   {4, 4, 4},                            /* cost of storing integer registers */
1143   4,                                    /* cost of reg,reg fld/fst */
1144   {12, 12, 12},                         /* cost of loading fp registers
1145                                            in SFmode, DFmode and XFmode */
1146   {6, 6, 8},                            /* cost of storing fp registers
1147                                            in SFmode, DFmode and XFmode */
1148   2,                                    /* cost of moving MMX register */
1149   {8, 8},                               /* cost of loading MMX registers
1150                                            in SImode and DImode */
1151   {8, 8},                               /* cost of storing MMX registers
1152                                            in SImode and DImode */
1153   2,                                    /* cost of moving SSE register */
1154   {8, 8, 8},                            /* cost of loading SSE registers
1155                                            in SImode, DImode and TImode */
1156   {8, 8, 8},                            /* cost of storing SSE registers
1157                                            in SImode, DImode and TImode */
1158   5,                                    /* MMX or SSE register to integer */
1159   32,                                   /* size of l1 cache.  */
1160   256,                                  /* size of l2 cache.  */
1161   64,                                   /* size of prefetch block */
1162   6,                                    /* number of parallel prefetches */
1163   3,                                    /* Branch cost */
1164   COSTS_N_INSNS (8),                    /* cost of FADD and FSUB insns.  */
1165   COSTS_N_INSNS (8),                    /* cost of FMUL instruction.  */
1166   COSTS_N_INSNS (20),                   /* cost of FDIV instruction.  */
1167   COSTS_N_INSNS (8),                    /* cost of FABS instruction.  */
1168   COSTS_N_INSNS (8),                    /* cost of FCHS instruction.  */
1169   COSTS_N_INSNS (40),                   /* cost of FSQRT instruction.  */
1170   {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
1171    DUMMY_STRINGOP_ALGS},
1172   {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
1173    DUMMY_STRINGOP_ALGS},
1174   1,                                    /* scalar_stmt_cost.  */
1175   1,                                    /* scalar load_cost.  */
1176   1,                                    /* scalar_store_cost.  */
1177   1,                                    /* vec_stmt_cost.  */
1178   1,                                    /* vec_to_scalar_cost.  */
1179   1,                                    /* scalar_to_vec_cost.  */
1180   1,                                    /* vec_align_load_cost.  */
1181   2,                                    /* vec_unalign_load_cost.  */
1182   1,                                    /* vec_store_cost.  */
1183   3,                                    /* cond_taken_branch_cost.  */
1184   1,                                    /* cond_not_taken_branch_cost.  */
1185 };
1186 
1187 const struct processor_costs *ix86_cost = &pentium_cost;
1188 
1189 /* Processor feature/optimization bitmasks.  */
1190 #define m_386 (1<<PROCESSOR_I386)
1191 #define m_486 (1<<PROCESSOR_I486)
1192 #define m_PENT (1<<PROCESSOR_PENTIUM)
1193 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
1194 #define m_PENT4  (1<<PROCESSOR_PENTIUM4)
1195 #define m_NOCONA  (1<<PROCESSOR_NOCONA)
1196 #define m_CORE2  (1<<PROCESSOR_CORE2)
1197 
1198 #define m_GEODE  (1<<PROCESSOR_GEODE)
1199 #define m_K6  (1<<PROCESSOR_K6)
1200 #define m_K6_GEODE  (m_K6 | m_GEODE)
1201 #define m_K8  (1<<PROCESSOR_K8)
1202 #define m_ATHLON  (1<<PROCESSOR_ATHLON)
1203 #define m_ATHLON_K8  (m_K8 | m_ATHLON)
1204 #define m_AMDFAM10  (1<<PROCESSOR_AMDFAM10)
1205 #define m_AMD_MULTIPLE  (m_K8 | m_ATHLON | m_AMDFAM10)
1206 
1207 #define m_GENERIC32 (1<<PROCESSOR_GENERIC32)
1208 #define m_GENERIC64 (1<<PROCESSOR_GENERIC64)
1209 
1210 /* Generic instruction choice should be common subset of supported CPUs
1211    (PPro/PENT4/NOCONA/CORE2/Athlon/K8).  */
1212 #define m_GENERIC (m_GENERIC32 | m_GENERIC64)
1213 
1214 /* Feature tests against the various tunings.  */
1215 unsigned char ix86_tune_features[X86_TUNE_LAST];
1216 
1217 /* Feature tests against the various tunings used to create ix86_tune_features
1218    based on the processor mask.  */
1219 static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = {
1220   /* X86_TUNE_USE_LEAVE: Leave does not affect Nocona SPEC2000 results
1221      negatively, so enabling for Generic64 seems like good code size
1222      tradeoff.  We can't enable it for 32bit generic because it does not
1223      work well with PPro base chips.  */
1224   m_386 | m_K6_GEODE | m_AMD_MULTIPLE | m_CORE2 | m_GENERIC64,
1225 
1226   /* X86_TUNE_PUSH_MEMORY */
1227   m_386 | m_K6_GEODE | m_AMD_MULTIPLE | m_PENT4
1228   | m_NOCONA | m_CORE2 | m_GENERIC,
1229 
1230   /* X86_TUNE_ZERO_EXTEND_WITH_AND */
1231   m_486 | m_PENT,
1232 
1233   /* X86_TUNE_UNROLL_STRLEN */
1234   m_486 | m_PENT | m_PPRO | m_AMD_MULTIPLE | m_K6 | m_CORE2 | m_GENERIC,
1235 
1236   /* X86_TUNE_DEEP_BRANCH_PREDICTION */
1237   m_PPRO | m_K6_GEODE | m_AMD_MULTIPLE | m_PENT4 | m_GENERIC,
1238 
1239   /* X86_TUNE_BRANCH_PREDICTION_HINTS: Branch hints were put in P4 based
1240      on simulation result. But after P4 was made, no performance benefit
1241      was observed with branch hints.  It also increases the code size.
1242      As a result, icc never generates branch hints.  */
1243   0,
1244 
1245   /* X86_TUNE_DOUBLE_WITH_ADD */
1246   ~m_386,
1247 
1248   /* X86_TUNE_USE_SAHF */
1249   m_PPRO | m_K6_GEODE | m_K8 | m_AMDFAM10 | m_PENT4
1250   | m_NOCONA | m_CORE2 | m_GENERIC,
1251 
1252   /* X86_TUNE_MOVX: Enable to zero extend integer registers to avoid
1253      partial dependencies.  */
1254   m_AMD_MULTIPLE | m_PPRO | m_PENT4 | m_NOCONA
1255   | m_CORE2 | m_GENERIC | m_GEODE /* m_386 | m_K6 */,
1256 
1257   /* X86_TUNE_PARTIAL_REG_STALL: We probably ought to watch for partial
1258      register stalls on Generic32 compilation setting as well.  However
1259      in current implementation the partial register stalls are not eliminated
1260      very well - they can be introduced via subregs synthesized by combine
1261      and can happen in caller/callee saving sequences.  Because this option
1262      pays back little on PPro based chips and is in conflict with partial reg
1263      dependencies used by Athlon/P4 based chips, it is better to leave it off
1264      for generic32 for now.  */
1265   m_PPRO,
1266 
1267   /* X86_TUNE_PARTIAL_FLAG_REG_STALL */
1268   m_CORE2 | m_GENERIC,
1269 
1270   /* X86_TUNE_USE_HIMODE_FIOP */
1271   m_386 | m_486 | m_K6_GEODE,
1272 
1273   /* X86_TUNE_USE_SIMODE_FIOP */
1274   ~(m_PPRO | m_AMD_MULTIPLE | m_PENT | m_CORE2 | m_GENERIC),
1275 
1276   /* X86_TUNE_USE_MOV0 */
1277   m_K6,
1278 
1279   /* X86_TUNE_USE_CLTD */
1280   ~(m_PENT | m_K6 | m_CORE2 | m_GENERIC),
1281 
1282   /* X86_TUNE_USE_XCHGB: Use xchgb %rh,%rl instead of rolw/rorw $8,rx.  */
1283   m_PENT4,
1284 
1285   /* X86_TUNE_SPLIT_LONG_MOVES */
1286   m_PPRO,
1287 
1288   /* X86_TUNE_READ_MODIFY_WRITE */
1289   ~m_PENT,
1290 
1291   /* X86_TUNE_READ_MODIFY */
1292   ~(m_PENT | m_PPRO),
1293 
1294   /* X86_TUNE_PROMOTE_QIMODE */
1295   m_K6_GEODE | m_PENT | m_386 | m_486 | m_AMD_MULTIPLE | m_CORE2
1296   | m_GENERIC /* | m_PENT4 ? */,
1297 
1298   /* X86_TUNE_FAST_PREFIX */
1299   ~(m_PENT | m_486 | m_386),
1300 
1301   /* X86_TUNE_SINGLE_STRINGOP */
1302   m_386 | m_PENT4 | m_NOCONA,
1303 
1304   /* X86_TUNE_QIMODE_MATH */
1305   ~0,
1306 
1307   /* X86_TUNE_HIMODE_MATH: On PPro this flag is meant to avoid partial
1308      register stalls.  Just like X86_TUNE_PARTIAL_REG_STALL this option
1309      might be considered for Generic32 if our scheme for avoiding partial
1310      stalls was more effective.  */
1311   ~m_PPRO,
1312 
1313   /* X86_TUNE_PROMOTE_QI_REGS */
1314   0,
1315 
1316   /* X86_TUNE_PROMOTE_HI_REGS */
1317   m_PPRO,
1318 
1319   /* X86_TUNE_ADD_ESP_4: Enable if add/sub is preferred over 1/2 push/pop.  */
1320   m_AMD_MULTIPLE | m_K6_GEODE | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1321 
1322   /* X86_TUNE_ADD_ESP_8 */
1323   m_AMD_MULTIPLE | m_PPRO | m_K6_GEODE | m_386
1324   | m_486 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1325 
1326   /* X86_TUNE_SUB_ESP_4 */
1327   m_AMD_MULTIPLE | m_PPRO | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1328 
1329   /* X86_TUNE_SUB_ESP_8 */
1330   m_AMD_MULTIPLE | m_PPRO | m_386 | m_486
1331   | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1332 
1333   /* X86_TUNE_INTEGER_DFMODE_MOVES: Enable if integer moves are preferred
1334      for DFmode copies */
1335   ~(m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2
1336     | m_GENERIC | m_GEODE),
1337 
1338   /* X86_TUNE_PARTIAL_REG_DEPENDENCY */
1339   m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1340 
1341   /* X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY: In the Generic model we have a
1342      conflict here in between PPro/Pentium4 based chips that thread 128bit
1343      SSE registers as single units versus K8 based chips that divide SSE
1344      registers to two 64bit halves.  This knob promotes all store destinations
1345      to be 128bit to allow register renaming on 128bit SSE units, but usually
1346      results in one extra microop on 64bit SSE units.  Experimental results
1347      shows that disabling this option on P4 brings over 20% SPECfp regression,
1348      while enabling it on K8 brings roughly 2.4% regression that can be partly
1349      masked by careful scheduling of moves.  */
1350   m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC | m_AMDFAM10,
1351 
1352   /* X86_TUNE_SSE_UNALIGNED_MOVE_OPTIMAL */
1353   m_AMDFAM10,
1354 
1355   /* X86_TUNE_SSE_SPLIT_REGS: Set for machines where the type and dependencies
1356      are resolved on SSE register parts instead of whole registers, so we may
1357      maintain just lower part of scalar values in proper format leaving the
1358      upper part undefined.  */
1359   m_ATHLON_K8,
1360 
1361   /* X86_TUNE_SSE_TYPELESS_STORES */
1362   m_AMD_MULTIPLE,
1363 
1364   /* X86_TUNE_SSE_LOAD0_BY_PXOR */
1365   m_PPRO | m_PENT4 | m_NOCONA,
1366 
1367   /* X86_TUNE_MEMORY_MISMATCH_STALL */
1368   m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1369 
1370   /* X86_TUNE_PROLOGUE_USING_MOVE */
1371   m_ATHLON_K8 | m_PPRO | m_CORE2 | m_GENERIC,
1372 
1373   /* X86_TUNE_EPILOGUE_USING_MOVE */
1374   m_ATHLON_K8 | m_PPRO | m_CORE2 | m_GENERIC,
1375 
1376   /* X86_TUNE_SHIFT1 */
1377   ~m_486,
1378 
1379   /* X86_TUNE_USE_FFREEP */
1380   m_AMD_MULTIPLE,
1381 
1382   /* X86_TUNE_INTER_UNIT_MOVES */
1383   ~(m_AMD_MULTIPLE | m_GENERIC),
1384 
1385   /* X86_TUNE_INTER_UNIT_CONVERSIONS */
1386   ~(m_AMDFAM10),
1387 
1388   /* X86_TUNE_FOUR_JUMP_LIMIT: Some CPU cores are not able to predict more
1389      than 4 branch instructions in the 16 byte window.  */
1390   m_PPRO | m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1391 
1392   /* X86_TUNE_SCHEDULE */
1393   m_PPRO | m_AMD_MULTIPLE | m_K6_GEODE | m_PENT | m_CORE2 | m_GENERIC,
1394 
1395   /* X86_TUNE_USE_BT */
1396   m_AMD_MULTIPLE | m_CORE2 | m_GENERIC,
1397 
1398   /* X86_TUNE_USE_INCDEC */
1399   ~(m_PENT4 | m_NOCONA | m_GENERIC),
1400 
1401   /* X86_TUNE_PAD_RETURNS */
1402   m_AMD_MULTIPLE | m_CORE2 | m_GENERIC,
1403 
1404   /* X86_TUNE_EXT_80387_CONSTANTS */
1405   m_K6_GEODE | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC,
1406 
1407   /* X86_TUNE_SHORTEN_X87_SSE */
1408   ~m_K8,
1409 
1410   /* X86_TUNE_AVOID_VECTOR_DECODE */
1411   m_K8 | m_GENERIC64,
1412 
1413   /* X86_TUNE_PROMOTE_HIMODE_IMUL: Modern CPUs have same latency for HImode
1414      and SImode multiply, but 386 and 486 do HImode multiply faster.  */
1415   ~(m_386 | m_486),
1416 
1417   /* X86_TUNE_SLOW_IMUL_IMM32_MEM: Imul of 32-bit constant and memory is
1418      vector path on AMD machines.  */
1419   m_K8 | m_GENERIC64 | m_AMDFAM10,
1420 
1421   /* X86_TUNE_SLOW_IMUL_IMM8: Imul of 8-bit constant is vector path on AMD
1422      machines.  */
1423   m_K8 | m_GENERIC64 | m_AMDFAM10,
1424 
1425   /* X86_TUNE_MOVE_M1_VIA_OR: On pentiums, it is faster to load -1 via OR
1426      than a MOV.  */
1427   m_PENT,
1428 
1429   /* X86_TUNE_NOT_UNPAIRABLE: NOT is not pairable on Pentium, while XOR is,
1430      but one byte longer.  */
1431   m_PENT,
1432 
1433   /* X86_TUNE_NOT_VECTORMODE: On AMD K6, NOT is vector decoded with memory
1434      operand that cannot be represented using a modRM byte.  The XOR
1435      replacement is long decoded, so this split helps here as well.  */
1436   m_K6,
1437 
1438   /* X86_TUNE_USE_VECTOR_FP_CONVERTS: Prefer vector packed SSE conversion
1439      from FP to FP. */
1440   m_AMDFAM10 | m_GENERIC,
1441 
1442   /* X86_TUNE_USE_VECTOR_CONVERTS: Prefer vector packed SSE conversion
1443      from integer to FP. */
1444   m_AMDFAM10,
1445 
1446   /* X86_TUNE_FUSE_CMP_AND_BRANCH: Fuse a compare or test instruction
1447      with a subsequent conditional jump instruction into a single
1448      compare-and-branch uop.  */
1449   m_CORE2,
1450 };
1451 
1452 /* Feature tests against the various architecture variations.  */
1453 unsigned char ix86_arch_features[X86_ARCH_LAST];
1454 
1455 /* Feature tests against the various architecture variations, used to create
1456    ix86_arch_features based on the processor mask.  */
1457 static unsigned int initial_ix86_arch_features[X86_ARCH_LAST] = {
1458   /* X86_ARCH_CMOVE: Conditional move was added for pentiumpro.  */
1459   ~(m_386 | m_486 | m_PENT | m_K6),
1460 
1461   /* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486.  */
1462   ~m_386,
1463 
1464   /* X86_ARCH_CMPXCHG8B: Compare and exchange 8 bytes was added for pentium. */
1465   ~(m_386 | m_486),
1466 
1467   /* X86_ARCH_XADD: Exchange and add was added for 80486.  */
1468   ~m_386,
1469 
1470   /* X86_ARCH_BSWAP: Byteswap was added for 80486.  */
1471   ~m_386,
1472 };
1473 
1474 static const unsigned int x86_accumulate_outgoing_args
1475   = m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC;
1476 
1477 static const unsigned int x86_arch_always_fancy_math_387
1478   = m_PENT | m_PPRO | m_AMD_MULTIPLE | m_PENT4
1479     | m_NOCONA | m_CORE2 | m_GENERIC;
1480 
1481 static enum stringop_alg stringop_alg = no_stringop;
1482 
1483 /* In case the average insn count for single function invocation is
1484    lower than this constant, emit fast (but longer) prologue and
1485    epilogue code.  */
1486 #define FAST_PROLOGUE_INSN_COUNT 20
1487 
1488 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively.  */
1489 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
1490 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
1491 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
1492 
1493 /* Array of the smallest class containing reg number REGNO, indexed by
1494    REGNO.  Used by REGNO_REG_CLASS in i386.h.  */
1495 
1496 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
1497 {
1498   /* ax, dx, cx, bx */
1499   AREG, DREG, CREG, BREG,
1500   /* si, di, bp, sp */
1501   SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
1502   /* FP registers */
1503   FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
1504   FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
1505   /* arg pointer */
1506   NON_Q_REGS,
1507   /* flags, fpsr, fpcr, frame */
1508   NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
1509   /* SSE registers */
1510   SSE_FIRST_REG, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
1511   SSE_REGS, SSE_REGS,
1512   /* MMX registers */
1513   MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
1514   MMX_REGS, MMX_REGS,
1515   /* REX registers */
1516   NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
1517   NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
1518   /* SSE REX registers */
1519   SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
1520   SSE_REGS, SSE_REGS,
1521 };
1522 
1523 /* The "default" register map used in 32bit mode.  */
1524 
1525 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
1526 {
1527   0, 2, 1, 3, 6, 7, 4, 5,               /* general regs */
1528   12, 13, 14, 15, 16, 17, 18, 19,       /* fp regs */
1529   -1, -1, -1, -1, -1,                   /* arg, flags, fpsr, fpcr, frame */
1530   21, 22, 23, 24, 25, 26, 27, 28,       /* SSE */
1531   29, 30, 31, 32, 33, 34, 35, 36,       /* MMX */
1532   -1, -1, -1, -1, -1, -1, -1, -1,       /* extended integer registers */
1533   -1, -1, -1, -1, -1, -1, -1, -1,       /* extended SSE registers */
1534 };
1535 
1536 /* The "default" register map used in 64bit mode.  */
1537 
1538 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
1539 {
1540   0, 1, 2, 3, 4, 5, 6, 7,               /* general regs */
1541   33, 34, 35, 36, 37, 38, 39, 40,       /* fp regs */
1542   -1, -1, -1, -1, -1,                   /* arg, flags, fpsr, fpcr, frame */
1543   17, 18, 19, 20, 21, 22, 23, 24,       /* SSE */
1544   41, 42, 43, 44, 45, 46, 47, 48,       /* MMX */
1545   8,9,10,11,12,13,14,15,                /* extended integer registers */
1546   25, 26, 27, 28, 29, 30, 31, 32,       /* extended SSE registers */
1547 };
1548 
1549 /* Define the register numbers to be used in Dwarf debugging information.
1550    The SVR4 reference port C compiler uses the following register numbers
1551    in its Dwarf output code:
1552         0 for %eax (gcc regno = 0)
1553         1 for %ecx (gcc regno = 2)
1554         2 for %edx (gcc regno = 1)
1555         3 for %ebx (gcc regno = 3)
1556         4 for %esp (gcc regno = 7)
1557         5 for %ebp (gcc regno = 6)
1558         6 for %esi (gcc regno = 4)
1559         7 for %edi (gcc regno = 5)
1560    The following three DWARF register numbers are never generated by
1561    the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
1562    believes these numbers have these meanings.
1563         8  for %eip    (no gcc equivalent)
1564         9  for %eflags (gcc regno = 17)
1565         10 for %trapno (no gcc equivalent)
1566    It is not at all clear how we should number the FP stack registers
1567    for the x86 architecture.  If the version of SDB on x86/svr4 were
1568    a bit less brain dead with respect to floating-point then we would
1569    have a precedent to follow with respect to DWARF register numbers
1570    for x86 FP registers, but the SDB on x86/svr4 is so completely
1571    broken with respect to FP registers that it is hardly worth thinking
1572    of it as something to strive for compatibility with.
1573    The version of x86/svr4 SDB I have at the moment does (partially)
1574    seem to believe that DWARF register number 11 is associated with
1575    the x86 register %st(0), but that's about all.  Higher DWARF
1576    register numbers don't seem to be associated with anything in
1577    particular, and even for DWARF regno 11, SDB only seems to under-
1578    stand that it should say that a variable lives in %st(0) (when
1579    asked via an `=' command) if we said it was in DWARF regno 11,
1580    but SDB still prints garbage when asked for the value of the
1581    variable in question (via a `/' command).
1582    (Also note that the labels SDB prints for various FP stack regs
1583    when doing an `x' command are all wrong.)
1584    Note that these problems generally don't affect the native SVR4
1585    C compiler because it doesn't allow the use of -O with -g and
1586    because when it is *not* optimizing, it allocates a memory
1587    location for each floating-point variable, and the memory
1588    location is what gets described in the DWARF AT_location
1589    attribute for the variable in question.
1590    Regardless of the severe mental illness of the x86/svr4 SDB, we
1591    do something sensible here and we use the following DWARF
1592    register numbers.  Note that these are all stack-top-relative
1593    numbers.
1594         11 for %st(0) (gcc regno = 8)
1595         12 for %st(1) (gcc regno = 9)
1596         13 for %st(2) (gcc regno = 10)
1597         14 for %st(3) (gcc regno = 11)
1598         15 for %st(4) (gcc regno = 12)
1599         16 for %st(5) (gcc regno = 13)
1600         17 for %st(6) (gcc regno = 14)
1601         18 for %st(7) (gcc regno = 15)
1602 */
1603 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
1604 {
1605   0, 2, 1, 3, 6, 7, 5, 4,               /* general regs */
1606   11, 12, 13, 14, 15, 16, 17, 18,       /* fp regs */
1607   -1, 9, -1, -1, -1,                    /* arg, flags, fpsr, fpcr, frame */
1608   21, 22, 23, 24, 25, 26, 27, 28,       /* SSE registers */
1609   29, 30, 31, 32, 33, 34, 35, 36,       /* MMX registers */
1610   -1, -1, -1, -1, -1, -1, -1, -1,       /* extended integer registers */
1611   -1, -1, -1, -1, -1, -1, -1, -1,       /* extended SSE registers */
1612 };
1613 
1614 /* Test and compare insns in i386.md store the information needed to
1615    generate branch and scc insns here.  */
1616 
1617 rtx ix86_compare_op0 = NULL_RTX;
1618 rtx ix86_compare_op1 = NULL_RTX;
1619 rtx ix86_compare_emitted = NULL_RTX;
1620 
1621 /* Define parameter passing and return registers.  */
1622 
1623 static int const x86_64_int_parameter_registers[6] =
1624 {
1625   DI_REG, SI_REG, DX_REG, CX_REG, R8_REG, R9_REG
1626 };
1627 
1628 static int const x86_64_ms_abi_int_parameter_registers[4] =
1629 {
1630   CX_REG, DX_REG, R8_REG, R9_REG
1631 };
1632 
1633 static int const x86_64_int_return_registers[4] =
1634 {
1635   AX_REG, DX_REG, DI_REG, SI_REG
1636 };
1637 
1638 /* Define the structure for the machine field in struct function.  */
1639 
1640 struct stack_local_entry GTY(())
1641 {
1642   unsigned short mode;
1643   unsigned short n;
1644   rtx rtl;
1645   struct stack_local_entry *next;
1646 };
1647 
1648 /* Structure describing stack frame layout.
1649    Stack grows downward:
1650 
1651    [arguments]
1652                                               <- ARG_POINTER
1653    saved pc
1654 
1655    saved frame pointer if frame_pointer_needed
1656                                               <- HARD_FRAME_POINTER
1657    [-msave-args]
1658 
1659    [padding0]
1660 
1661    [saved regs]
1662 
1663    [padding05]
1664 
1665    [saved SSE regs]
1666 
1667    [padding1]          \
1668                         )
1669    [va_arg registers]  (
1670                         > to_allocate              <- FRAME_POINTER
1671    [frame]             (
1672                         )
1673    [padding2]          /
1674   */
1675 struct ix86_frame
1676 {
1677   int nmsave_args;
1678   int padding0;
1679   int nsseregs;
1680   int padding05;
1681   int nregs;
1682   int padding1;
1683   int va_arg_size;
1684   HOST_WIDE_INT frame;
1685   int padding2;
1686   int outgoing_arguments_size;
1687   int red_zone_size;
1688 
1689   HOST_WIDE_INT to_allocate;
1690   /* The offsets relative to ARG_POINTER.  */
1691   HOST_WIDE_INT frame_pointer_offset;
1692   HOST_WIDE_INT hard_frame_pointer_offset;
1693   HOST_WIDE_INT stack_pointer_offset;
1694 
1695   /* When save_regs_using_mov is set, emit prologue using
1696      move instead of push instructions.  */
1697   bool save_regs_using_mov;
1698 };
1699 
1700 /* Code model option.  */
1701 enum cmodel ix86_cmodel;
1702 /* Asm dialect.  */
1703 enum asm_dialect ix86_asm_dialect = ASM_ATT;
1704 /* TLS dialects.  */
1705 enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
1706 
1707 /* Which unit we are generating floating point math for.  */
1708 enum fpmath_unit ix86_fpmath;
1709 
1710 /* Which cpu are we scheduling for.  */
1711 enum attr_cpu ix86_schedule;
1712 
1713 /* Which cpu are we optimizing for.  */
1714 enum processor_type ix86_tune;
1715 
1716 /* Which instruction set architecture to use.  */
1717 enum processor_type ix86_arch;
1718 
1719 /* true if sse prefetch instruction is not NOOP.  */
1720 int x86_prefetch_sse;
1721 
1722 /* ix86_regparm_string as a number */
1723 static int ix86_regparm;
1724 
1725 /* -mstackrealign option */
1726 extern int ix86_force_align_arg_pointer;
1727 static const char ix86_force_align_arg_pointer_string[]
1728   = "force_align_arg_pointer";
1729 
1730 static rtx (*ix86_gen_leave) (void);
1731 static rtx (*ix86_gen_pop1) (rtx);
1732 static rtx (*ix86_gen_add3) (rtx, rtx, rtx);
1733 static rtx (*ix86_gen_sub3) (rtx, rtx, rtx);
1734 static rtx (*ix86_gen_sub3_carry) (rtx, rtx, rtx, rtx);
1735 static rtx (*ix86_gen_one_cmpl2) (rtx, rtx);
1736 static rtx (*ix86_gen_monitor) (rtx, rtx, rtx);
1737 static rtx (*ix86_gen_andsp) (rtx, rtx, rtx);
1738 
1739 /* Preferred alignment for stack boundary in bits.  */
1740 unsigned int ix86_preferred_stack_boundary;
1741 
1742 /* Alignment for incoming stack boundary in bits specified at
1743    command line.  */
1744 static unsigned int ix86_user_incoming_stack_boundary;
1745 
1746 /* Default alignment for incoming stack boundary in bits.  */
1747 static unsigned int ix86_default_incoming_stack_boundary;
1748 
1749 /* Alignment for incoming stack boundary in bits.  */
1750 unsigned int ix86_incoming_stack_boundary;
1751 
1752 /* Values 1-5: see jump.c */
1753 int ix86_branch_cost;
1754 
1755 /* Calling abi specific va_list type nodes.  */
1756 static GTY(()) tree sysv_va_list_type_node;
1757 static GTY(()) tree ms_va_list_type_node;
1758 
1759 /* Variables which are this size or smaller are put in the data/bss
1760    or ldata/lbss sections.  */
1761 
1762 int ix86_section_threshold = 65536;
1763 
1764 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL.  */
1765 char internal_label_prefix[16];
1766 int internal_label_prefix_len;
1767 
1768 /* Fence to use after loop using movnt.  */
1769 tree x86_mfence;
1770 
1771 static int ix86_nsaved_args (void);
1772 
1773 /* Register class used for passing given 64bit part of the argument.
1774    These represent classes as documented by the PS ABI, with the exception
1775    of SSESF, SSEDF classes, that are basically SSE class, just gcc will
1776    use SF or DFmode move instead of DImode to avoid reformatting penalties.
1777 
1778    Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
1779    whenever possible (upper half does contain padding).  */
1780 enum x86_64_reg_class
1781   {
1782     X86_64_NO_CLASS,
1783     X86_64_INTEGER_CLASS,
1784     X86_64_INTEGERSI_CLASS,
1785     X86_64_SSE_CLASS,
1786     X86_64_SSESF_CLASS,
1787     X86_64_SSEDF_CLASS,
1788     X86_64_SSEUP_CLASS,
1789     X86_64_X87_CLASS,
1790     X86_64_X87UP_CLASS,
1791     X86_64_COMPLEX_X87_CLASS,
1792     X86_64_MEMORY_CLASS
1793   };
1794 
1795 #define MAX_CLASSES 4
1796 
1797 /* Table of constants used by fldpi, fldln2, etc....  */
1798 static REAL_VALUE_TYPE ext_80387_constants_table [5];
1799 static bool ext_80387_constants_init = 0;
1800 
1801 
1802 static struct machine_function * ix86_init_machine_status (void);
1803 static rtx ix86_function_value (const_tree, const_tree, bool);
1804 static int ix86_function_regparm (const_tree, const_tree);
1805 static void ix86_compute_frame_layout (struct ix86_frame *);
1806 static bool ix86_expand_vector_init_one_nonzero (bool, enum machine_mode,
1807                                                  rtx, rtx, int);
1808 static void ix86_add_new_builtins (int);
1809 
1810 enum ix86_function_specific_strings
1811 {
1812   IX86_FUNCTION_SPECIFIC_ARCH,
1813   IX86_FUNCTION_SPECIFIC_TUNE,
1814   IX86_FUNCTION_SPECIFIC_FPMATH,
1815   IX86_FUNCTION_SPECIFIC_MAX
1816 };
1817 
1818 static char *ix86_target_string (int, int, const char *, const char *,
1819                                  const char *, bool);
1820 static void ix86_debug_options (void) ATTRIBUTE_UNUSED;
1821 static void ix86_function_specific_save (struct cl_target_option *);
1822 static void ix86_function_specific_restore (struct cl_target_option *);
1823 static void ix86_function_specific_print (FILE *, int,
1824                                           struct cl_target_option *);
1825 static bool ix86_valid_target_attribute_p (tree, tree, tree, int);
1826 static bool ix86_valid_target_attribute_inner_p (tree, char *[]);
1827 static bool ix86_can_inline_p (tree, tree);
1828 static void ix86_set_current_function (tree);
1829 
1830 static void pro_epilogue_adjust_stack (rtx, rtx, rtx, int);
1831 
1832 
1833 /* The svr4 ABI for the i386 says that records and unions are returned
1834    in memory.  */
1835 #ifndef DEFAULT_PCC_STRUCT_RETURN
1836 #define DEFAULT_PCC_STRUCT_RETURN 1
1837 #endif
1838 
1839 /* Whether -mtune= or -march= were specified */
1840 static int ix86_tune_defaulted;
1841 static int ix86_arch_specified;
1842 
1843 /* Bit flags that specify the ISA we are compiling for.  */
1844 int ix86_isa_flags = TARGET_64BIT_DEFAULT | TARGET_SUBTARGET_ISA_DEFAULT;
1845 
1846 /* A mask of ix86_isa_flags that includes bit X if X
1847    was set or cleared on the command line.  */
1848 static int ix86_isa_flags_explicit;
1849 
1850 /* Define a set of ISAs which are available when a given ISA is
1851    enabled.  MMX and SSE ISAs are handled separately.  */
1852 
1853 #define OPTION_MASK_ISA_MMX_SET OPTION_MASK_ISA_MMX
1854 #define OPTION_MASK_ISA_3DNOW_SET \
1855   (OPTION_MASK_ISA_3DNOW | OPTION_MASK_ISA_MMX_SET)
1856 
1857 #define OPTION_MASK_ISA_SSE_SET OPTION_MASK_ISA_SSE
1858 #define OPTION_MASK_ISA_SSE2_SET \
1859   (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE_SET)
1860 #define OPTION_MASK_ISA_SSE3_SET \
1861   (OPTION_MASK_ISA_SSE3 | OPTION_MASK_ISA_SSE2_SET)
1862 #define OPTION_MASK_ISA_SSSE3_SET \
1863   (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_SSE3_SET)
1864 #define OPTION_MASK_ISA_SSE4_1_SET \
1865   (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSSE3_SET)
1866 #define OPTION_MASK_ISA_SSE4_2_SET \
1867   (OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_SSE4_1_SET)
1868 #define OPTION_MASK_ISA_AVX_SET \
1869   (OPTION_MASK_ISA_AVX | OPTION_MASK_ISA_SSE4_2_SET)
1870 #define OPTION_MASK_ISA_FMA_SET \
1871   (OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_AVX_SET)
1872 
1873 /* SSE4 includes both SSE4.1 and SSE4.2. -msse4 should be the same
1874    as -msse4.2.  */
1875 #define OPTION_MASK_ISA_SSE4_SET OPTION_MASK_ISA_SSE4_2_SET
1876 
1877 #define OPTION_MASK_ISA_SSE4A_SET \
1878   (OPTION_MASK_ISA_SSE4A | OPTION_MASK_ISA_SSE3_SET)
1879 #define OPTION_MASK_ISA_SSE5_SET \
1880   (OPTION_MASK_ISA_SSE5 | OPTION_MASK_ISA_SSE4A_SET)
1881 
1882 /* AES and PCLMUL need SSE2 because they use xmm registers */
1883 #define OPTION_MASK_ISA_AES_SET \
1884   (OPTION_MASK_ISA_AES | OPTION_MASK_ISA_SSE2_SET)
1885 #define OPTION_MASK_ISA_PCLMUL_SET \
1886   (OPTION_MASK_ISA_PCLMUL | OPTION_MASK_ISA_SSE2_SET)
1887 
1888 #define OPTION_MASK_ISA_ABM_SET \
1889   (OPTION_MASK_ISA_ABM | OPTION_MASK_ISA_POPCNT)
1890 #define OPTION_MASK_ISA_POPCNT_SET OPTION_MASK_ISA_POPCNT
1891 #define OPTION_MASK_ISA_CX16_SET OPTION_MASK_ISA_CX16
1892 #define OPTION_MASK_ISA_SAHF_SET OPTION_MASK_ISA_SAHF
1893 
1894 /* Define a set of ISAs which aren't available when a given ISA is
1895    disabled.  MMX and SSE ISAs are handled separately.  */
1896 
1897 #define OPTION_MASK_ISA_MMX_UNSET \
1898   (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_3DNOW_UNSET)
1899 #define OPTION_MASK_ISA_3DNOW_UNSET \
1900   (OPTION_MASK_ISA_3DNOW | OPTION_MASK_ISA_3DNOW_A_UNSET)
1901 #define OPTION_MASK_ISA_3DNOW_A_UNSET OPTION_MASK_ISA_3DNOW_A
1902 
1903 #define OPTION_MASK_ISA_SSE_UNSET \
1904   (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_SSE2_UNSET)
1905 #define OPTION_MASK_ISA_SSE2_UNSET \
1906   (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE3_UNSET)
1907 #define OPTION_MASK_ISA_SSE3_UNSET \
1908   (OPTION_MASK_ISA_SSE3 \
1909    | OPTION_MASK_ISA_SSSE3_UNSET \
1910    | OPTION_MASK_ISA_SSE4A_UNSET )
1911 #define OPTION_MASK_ISA_SSSE3_UNSET \
1912   (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_SSE4_1_UNSET)
1913 #define OPTION_MASK_ISA_SSE4_1_UNSET \
1914   (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSE4_2_UNSET)
1915 #define OPTION_MASK_ISA_SSE4_2_UNSET \
1916   (OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_AVX_UNSET )
1917 #define OPTION_MASK_ISA_AVX_UNSET \
1918   (OPTION_MASK_ISA_AVX | OPTION_MASK_ISA_FMA_UNSET)
1919 #define OPTION_MASK_ISA_FMA_UNSET OPTION_MASK_ISA_FMA
1920 
1921 /* SSE4 includes both SSE4.1 and SSE4.2.  -mno-sse4 should the same
1922    as -mno-sse4.1. */
1923 #define OPTION_MASK_ISA_SSE4_UNSET OPTION_MASK_ISA_SSE4_1_UNSET
1924 
1925 #define OPTION_MASK_ISA_SSE4A_UNSET \
1926   (OPTION_MASK_ISA_SSE4A | OPTION_MASK_ISA_SSE5_UNSET)
1927 #define OPTION_MASK_ISA_SSE5_UNSET OPTION_MASK_ISA_SSE5
1928 #define OPTION_MASK_ISA_AES_UNSET OPTION_MASK_ISA_AES
1929 #define OPTION_MASK_ISA_PCLMUL_UNSET OPTION_MASK_ISA_PCLMUL
1930 #define OPTION_MASK_ISA_ABM_UNSET OPTION_MASK_ISA_ABM
1931 #define OPTION_MASK_ISA_POPCNT_UNSET OPTION_MASK_ISA_POPCNT
1932 #define OPTION_MASK_ISA_CX16_UNSET OPTION_MASK_ISA_CX16
1933 #define OPTION_MASK_ISA_SAHF_UNSET OPTION_MASK_ISA_SAHF
1934 
1935 /* Vectorization library interface and handlers.  */
1936 tree (*ix86_veclib_handler)(enum built_in_function, tree, tree) = NULL;
1937 static tree ix86_veclibabi_svml (enum built_in_function, tree, tree);
1938 static tree ix86_veclibabi_acml (enum built_in_function, tree, tree);
1939 
1940 /* Processor target table, indexed by processor number */
1941 struct ptt
1942 {
1943   const struct processor_costs *cost;           /* Processor costs */
1944   const int align_loop;                         /* Default alignments.  */
1945   const int align_loop_max_skip;
1946   const int align_jump;
1947   const int align_jump_max_skip;
1948   const int align_func;
1949 };
1950 
1951 static const struct ptt processor_target_table[PROCESSOR_max] =
1952 {
1953   {&i386_cost, 4, 3, 4, 3, 4},
1954   {&i486_cost, 16, 15, 16, 15, 16},
1955   {&pentium_cost, 16, 7, 16, 7, 16},
1956   {&pentiumpro_cost, 16, 15, 16, 10, 16},
1957   {&geode_cost, 0, 0, 0, 0, 0},
1958   {&k6_cost, 32, 7, 32, 7, 32},
1959   {&athlon_cost, 16, 7, 16, 7, 16},
1960   {&pentium4_cost, 0, 0, 0, 0, 0},
1961   {&k8_cost, 16, 7, 16, 7, 16},
1962   {&nocona_cost, 0, 0, 0, 0, 0},
1963   {&core2_cost, 16, 10, 16, 10, 16},
1964   {&generic32_cost, 16, 7, 16, 7, 16},
1965   {&generic64_cost, 16, 10, 16, 10, 16},
1966   {&amdfam10_cost, 32, 24, 32, 7, 32}
1967 };
1968 
1969 static const char *const cpu_names[TARGET_CPU_DEFAULT_max] =
1970 {
1971   "generic",
1972   "i386",
1973   "i486",
1974   "pentium",
1975   "pentium-mmx",
1976   "pentiumpro",
1977   "pentium2",
1978   "pentium3",
1979   "pentium4",
1980   "pentium-m",
1981   "prescott",
1982   "nocona",
1983   "core2",
1984   "geode",
1985   "k6",
1986   "k6-2",
1987   "k6-3",
1988   "athlon",
1989   "athlon-4",
1990   "k8",
1991   "amdfam10"
1992 };
1993 
1994 /* Implement TARGET_HANDLE_OPTION.  */
1995 
1996 static bool
1997 ix86_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED, int value)
1998 {
1999   switch (code)
2000     {
2001     case OPT_mmmx:
2002       if (value)
2003         {
2004           ix86_isa_flags |= OPTION_MASK_ISA_MMX_SET;
2005           ix86_isa_flags_explicit |= OPTION_MASK_ISA_MMX_SET;
2006         }
2007       else
2008         {
2009           ix86_isa_flags &= ~OPTION_MASK_ISA_MMX_UNSET;
2010           ix86_isa_flags_explicit |= OPTION_MASK_ISA_MMX_UNSET;
2011         }
2012       return true;
2013 
2014     case OPT_m3dnow:
2015       if (value)
2016         {
2017           ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_SET;
2018           ix86_isa_flags_explicit |= OPTION_MASK_ISA_3DNOW_SET;
2019         }
2020       else
2021         {
2022           ix86_isa_flags &= ~OPTION_MASK_ISA_3DNOW_UNSET;
2023           ix86_isa_flags_explicit |= OPTION_MASK_ISA_3DNOW_UNSET;
2024         }
2025       return true;
2026 
2027     case OPT_m3dnowa:
2028       return false;
2029 
2030     case OPT_msse:
2031       if (value)
2032         {
2033           ix86_isa_flags |= OPTION_MASK_ISA_SSE_SET;
2034           ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE_SET;
2035         }
2036       else
2037         {
2038           ix86_isa_flags &= ~OPTION_MASK_ISA_SSE_UNSET;
2039           ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE_UNSET;
2040         }
2041       return true;
2042 
2043     case OPT_msse2:
2044       if (value)
2045         {
2046           ix86_isa_flags |= OPTION_MASK_ISA_SSE2_SET;
2047           ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE2_SET;
2048         }
2049       else
2050         {
2051           ix86_isa_flags &= ~OPTION_MASK_ISA_SSE2_UNSET;
2052           ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE2_UNSET;
2053         }
2054       return true;
2055 
2056     case OPT_msse3:
2057       if (value)
2058         {
2059           ix86_isa_flags |= OPTION_MASK_ISA_SSE3_SET;
2060           ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE3_SET;
2061         }
2062       else
2063         {
2064           ix86_isa_flags &= ~OPTION_MASK_ISA_SSE3_UNSET;
2065           ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE3_UNSET;
2066         }
2067       return true;
2068 
2069     case OPT_mssse3:
2070       if (value)
2071         {
2072           ix86_isa_flags |= OPTION_MASK_ISA_SSSE3_SET;
2073           ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSSE3_SET;
2074         }
2075       else
2076         {
2077           ix86_isa_flags &= ~OPTION_MASK_ISA_SSSE3_UNSET;
2078           ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSSE3_UNSET;
2079         }
2080       return true;
2081 
2082     case OPT_msse4_1:
2083       if (value)
2084         {
2085           ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1_SET;
2086           ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_1_SET;
2087         }
2088       else
2089         {
2090           ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_1_UNSET;
2091           ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_1_UNSET;
2092         }
2093       return true;
2094 
2095     case OPT_msse4_2:
2096       if (value)
2097         {
2098           ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2_SET;
2099           ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_2_SET;
2100         }
2101       else
2102         {
2103           ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_2_UNSET;
2104           ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_2_UNSET;
2105         }
2106       return true;
2107 
2108     case OPT_mavx:
2109       if (value)
2110         {
2111           ix86_isa_flags |= OPTION_MASK_ISA_AVX_SET;
2112           ix86_isa_flags_explicit |= OPTION_MASK_ISA_AVX_SET;
2113         }
2114       else
2115         {
2116           ix86_isa_flags &= ~OPTION_MASK_ISA_AVX_UNSET;
2117           ix86_isa_flags_explicit |= OPTION_MASK_ISA_AVX_UNSET;
2118         }
2119       return true;
2120 
2121     case OPT_mfma:
2122       if (value)
2123         {
2124           ix86_isa_flags |= OPTION_MASK_ISA_FMA_SET;
2125           ix86_isa_flags_explicit |= OPTION_MASK_ISA_FMA_SET;
2126         }
2127       else
2128         {
2129           ix86_isa_flags &= ~OPTION_MASK_ISA_FMA_UNSET;
2130           ix86_isa_flags_explicit |= OPTION_MASK_ISA_FMA_UNSET;
2131         }
2132       return true;
2133 
2134     case OPT_msse4:
2135       ix86_isa_flags |= OPTION_MASK_ISA_SSE4_SET;
2136       ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_SET;
2137       return true;
2138 
2139     case OPT_mno_sse4:
2140       ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_UNSET;
2141       ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_UNSET;
2142       return true;
2143 
2144     case OPT_msse4a:
2145       if (value)
2146         {
2147           ix86_isa_flags |= OPTION_MASK_ISA_SSE4A_SET;
2148           ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4A_SET;
2149         }
2150       else
2151         {
2152           ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4A_UNSET;
2153           ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4A_UNSET;
2154         }
2155       return true;
2156 
2157     case OPT_msse5:
2158       if (value)
2159         {
2160           ix86_isa_flags |= OPTION_MASK_ISA_SSE5_SET;
2161           ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE5_SET;
2162         }
2163       else
2164         {
2165           ix86_isa_flags &= ~OPTION_MASK_ISA_SSE5_UNSET;
2166           ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE5_UNSET;
2167         }
2168       return true;
2169 
2170     case OPT_mabm:
2171       if (value)
2172         {
2173           ix86_isa_flags |= OPTION_MASK_ISA_ABM_SET;
2174           ix86_isa_flags_explicit |= OPTION_MASK_ISA_ABM_SET;
2175         }
2176       else
2177         {
2178           ix86_isa_flags &= ~OPTION_MASK_ISA_ABM_UNSET;
2179           ix86_isa_flags_explicit |= OPTION_MASK_ISA_ABM_UNSET;
2180         }
2181       return true;
2182 
2183     case OPT_mpopcnt:
2184       if (value)
2185         {
2186           ix86_isa_flags |= OPTION_MASK_ISA_POPCNT_SET;
2187           ix86_isa_flags_explicit |= OPTION_MASK_ISA_POPCNT_SET;
2188         }
2189       else
2190         {
2191           ix86_isa_flags &= ~OPTION_MASK_ISA_POPCNT_UNSET;
2192           ix86_isa_flags_explicit |= OPTION_MASK_ISA_POPCNT_UNSET;
2193         }
2194       return true;
2195 
2196     case OPT_msahf:
2197       if (value)
2198         {
2199           ix86_isa_flags |= OPTION_MASK_ISA_SAHF_SET;
2200           ix86_isa_flags_explicit |= OPTION_MASK_ISA_SAHF_SET;
2201         }
2202       else
2203         {
2204           ix86_isa_flags &= ~OPTION_MASK_ISA_SAHF_UNSET;
2205           ix86_isa_flags_explicit |= OPTION_MASK_ISA_SAHF_UNSET;
2206         }
2207       return true;
2208 
2209     case OPT_mcx16:
2210       if (value)
2211         {
2212           ix86_isa_flags |= OPTION_MASK_ISA_CX16_SET;
2213           ix86_isa_flags_explicit |= OPTION_MASK_ISA_CX16_SET;
2214         }
2215       else
2216         {
2217           ix86_isa_flags &= ~OPTION_MASK_ISA_CX16_UNSET;
2218           ix86_isa_flags_explicit |= OPTION_MASK_ISA_CX16_UNSET;
2219         }
2220       return true;
2221 
2222     case OPT_maes:
2223       if (value)
2224         {
2225           ix86_isa_flags |= OPTION_MASK_ISA_AES_SET;
2226           ix86_isa_flags_explicit |= OPTION_MASK_ISA_AES_SET;
2227         }
2228       else
2229         {
2230           ix86_isa_flags &= ~OPTION_MASK_ISA_AES_UNSET;
2231           ix86_isa_flags_explicit |= OPTION_MASK_ISA_AES_UNSET;
2232         }
2233       return true;
2234 
2235     case OPT_mpclmul:
2236       if (value)
2237         {
2238           ix86_isa_flags |= OPTION_MASK_ISA_PCLMUL_SET;
2239           ix86_isa_flags_explicit |= OPTION_MASK_ISA_PCLMUL_SET;
2240         }
2241       else
2242         {
2243           ix86_isa_flags &= ~OPTION_MASK_ISA_PCLMUL_UNSET;
2244           ix86_isa_flags_explicit |= OPTION_MASK_ISA_PCLMUL_UNSET;
2245         }
2246       return true;
2247 
2248     default:
2249       return true;
2250     }
2251 }
2252 
2253 /* Return a string the documents the current -m options.  The caller is
2254    responsible for freeing the string.  */
2255 
2256 static char *
2257 ix86_target_string (int isa, int flags, const char *arch, const char *tune,
2258                     const char *fpmath, bool add_nl_p)
2259 {
2260   struct ix86_target_opts
2261   {
2262     const char *option;         /* option string */
2263     int mask;                   /* isa mask options */
2264   };
2265 
2266   /* This table is ordered so that options like -msse5 or -msse4.2 that imply
2267      preceding options while match those first.  */
2268   static struct ix86_target_opts isa_opts[] =
2269   {
2270     { "-m64",           OPTION_MASK_ISA_64BIT },
2271     { "-msse5",         OPTION_MASK_ISA_SSE5 },
2272     { "-msse4a",        OPTION_MASK_ISA_SSE4A },
2273     { "-msse4.2",       OPTION_MASK_ISA_SSE4_2 },
2274     { "-msse4.1",       OPTION_MASK_ISA_SSE4_1 },
2275     { "-mssse3",        OPTION_MASK_ISA_SSSE3 },
2276     { "-msse3",         OPTION_MASK_ISA_SSE3 },
2277     { "-msse2",         OPTION_MASK_ISA_SSE2 },
2278     { "-msse",          OPTION_MASK_ISA_SSE },
2279     { "-m3dnow",        OPTION_MASK_ISA_3DNOW },
2280     { "-m3dnowa",       OPTION_MASK_ISA_3DNOW_A },
2281     { "-mmmx",          OPTION_MASK_ISA_MMX },
2282     { "-mabm",          OPTION_MASK_ISA_ABM },
2283     { "-mpopcnt",       OPTION_MASK_ISA_POPCNT },
2284     { "-maes",          OPTION_MASK_ISA_AES },
2285     { "-mpclmul",       OPTION_MASK_ISA_PCLMUL },
2286   };
2287 
2288   /* Flag options.  */
2289   static struct ix86_target_opts flag_opts[] =
2290   {
2291     { "-m128bit-long-double",           MASK_128BIT_LONG_DOUBLE },
2292     { "-m80387",                        MASK_80387 },
2293     { "-maccumulate-outgoing-args",     MASK_ACCUMULATE_OUTGOING_ARGS },
2294     { "-malign-double",                 MASK_ALIGN_DOUBLE },
2295     { "-mcld",                          MASK_CLD },
2296     { "-mfp-ret-in-387",                MASK_FLOAT_RETURNS },
2297     { "-mieee-fp",                      MASK_IEEE_FP },
2298     { "-minline-all-stringops",         MASK_INLINE_ALL_STRINGOPS },
2299     { "-minline-stringops-dynamically", MASK_INLINE_STRINGOPS_DYNAMICALLY },
2300     { "-mms-bitfields",                 MASK_MS_BITFIELD_LAYOUT },
2301     { "-mno-align-stringops",           MASK_NO_ALIGN_STRINGOPS },
2302     { "-mno-fancy-math-387",            MASK_NO_FANCY_MATH_387 },
2303     { "-mno-fused-madd",                MASK_NO_FUSED_MADD },
2304     { "-mno-push-args",                 MASK_NO_PUSH_ARGS },
2305     { "-mno-red-zone",                  MASK_NO_RED_ZONE },
2306     { "-momit-leaf-frame-pointer",      MASK_OMIT_LEAF_FRAME_POINTER },
2307     { "-mrecip",                        MASK_RECIP },
2308     { "-mrtd",                          MASK_RTD },
2309     { "-msseregparm",                   MASK_SSEREGPARM },
2310     { "-mstack-arg-probe",              MASK_STACK_PROBE },
2311     { "-mtls-direct-seg-refs",          MASK_TLS_DIRECT_SEG_REFS },
2312   };
2313 
2314   const char *opts[ARRAY_SIZE (isa_opts) + ARRAY_SIZE (flag_opts) + 6][2];
2315 
2316   char isa_other[40];
2317   char target_other[40];
2318   unsigned num = 0;
2319   unsigned i, j;
2320   char *ret;
2321   char *ptr;
2322   size_t len;
2323   size_t line_len;
2324   size_t sep_len;
2325 
2326   memset (opts, '\0', sizeof (opts));
2327 
2328   /* Add -march= option.  */
2329   if (arch)
2330     {
2331       opts[num][0] = "-march=";
2332       opts[num++][1] = arch;
2333     }
2334 
2335   /* Add -mtune= option.  */
2336   if (tune)
2337     {
2338       opts[num][0] = "-mtune=";
2339       opts[num++][1] = tune;
2340     }
2341 
2342   /* Pick out the options in isa options.  */
2343   for (i = 0; i < ARRAY_SIZE (isa_opts); i++)
2344     {
2345       if ((isa & isa_opts[i].mask) != 0)
2346         {
2347           opts[num++][0] = isa_opts[i].option;
2348           isa &= ~ isa_opts[i].mask;
2349         }
2350     }
2351 
2352   if (isa && add_nl_p)
2353     {
2354       opts[num++][0] = isa_other;
2355       sprintf (isa_other, "(other isa: 0x%x)", isa);
2356     }
2357 
2358   /* Add flag options.  */
2359   for (i = 0; i < ARRAY_SIZE (flag_opts); i++)
2360     {
2361       if ((flags & flag_opts[i].mask) != 0)
2362         {
2363           opts[num++][0] = flag_opts[i].option;
2364           flags &= ~ flag_opts[i].mask;
2365         }
2366     }
2367 
2368   if (flags && add_nl_p)
2369     {
2370       opts[num++][0] = target_other;
2371       sprintf (target_other, "(other flags: 0x%x)", isa);
2372     }
2373 
2374   /* Add -fpmath= option.  */
2375   if (fpmath)
2376     {
2377       opts[num][0] = "-mfpmath=";
2378       opts[num++][1] = fpmath;
2379     }
2380 
2381   /* Any options?  */
2382   if (num == 0)
2383     return NULL;
2384 
2385   gcc_assert (num < ARRAY_SIZE (opts));
2386 
2387   /* Size the string.  */
2388   len = 0;
2389   sep_len = (add_nl_p) ? 3 : 1;
2390   for (i = 0; i < num; i++)
2391     {
2392       len += sep_len;
2393       for (j = 0; j < 2; j++)
2394         if (opts[i][j])
2395           len += strlen (opts[i][j]);
2396     }
2397 
2398   /* Build the string.  */
2399   ret = ptr = (char *) xmalloc (len);
2400   line_len = 0;
2401 
2402   for (i = 0; i < num; i++)
2403     {
2404       size_t len2[2];
2405 
2406       for (j = 0; j < 2; j++)
2407         len2[j] = (opts[i][j]) ? strlen (opts[i][j]) : 0;
2408 
2409       if (i != 0)
2410         {
2411           *ptr++ = ' ';
2412           line_len++;
2413 
2414           if (add_nl_p && line_len + len2[0] + len2[1] > 70)
2415             {
2416               *ptr++ = '\\';
2417               *ptr++ = '\n';
2418               line_len = 0;
2419             }
2420         }
2421 
2422       for (j = 0; j < 2; j++)
2423         if (opts[i][j])
2424           {
2425             memcpy (ptr, opts[i][j], len2[j]);
2426             ptr += len2[j];
2427             line_len += len2[j];
2428           }
2429     }
2430 
2431   *ptr = '\0';
2432   gcc_assert (ret + len >= ptr);
2433 
2434   return ret;
2435 }
2436 
2437 /* Function that is callable from the debugger to print the current
2438    options.  */
2439 void
2440 ix86_debug_options (void)
2441 {
2442   char *opts = ix86_target_string (ix86_isa_flags, target_flags,
2443                                    ix86_arch_string, ix86_tune_string,
2444                                    ix86_fpmath_string, true);
2445 
2446   if (opts)
2447     {
2448       fprintf (stderr, "%s\n\n", opts);
2449       free (opts);
2450     }
2451   else
2452     fprintf (stderr, "<no options>\n\n");
2453 
2454   return;
2455 }
2456 
2457 /* Sometimes certain combinations of command options do not make
2458    sense on a particular target machine.  You can define a macro
2459    `OVERRIDE_OPTIONS' to take account of this.  This macro, if
2460    defined, is executed once just after all the command options have
2461    been parsed.
2462 
2463    Don't use this macro to turn on various extra optimizations for
2464    `-O'.  That is what `OPTIMIZATION_OPTIONS' is for.  */
2465 
2466 void
2467 override_options (bool main_args_p)
2468 {
2469   int i;
2470   unsigned int ix86_arch_mask, ix86_tune_mask;
2471   const char *prefix;
2472   const char *suffix;
2473   const char *sw;
2474 
2475   /* Comes from final.c -- no real reason to change it.  */
2476 #define MAX_CODE_ALIGN 16
2477 
2478   enum pta_flags
2479     {
2480       PTA_SSE = 1 << 0,
2481       PTA_SSE2 = 1 << 1,
2482       PTA_SSE3 = 1 << 2,
2483       PTA_MMX = 1 << 3,
2484       PTA_PREFETCH_SSE = 1 << 4,
2485       PTA_3DNOW = 1 << 5,
2486       PTA_3DNOW_A = 1 << 6,
2487       PTA_64BIT = 1 << 7,
2488       PTA_SSSE3 = 1 << 8,
2489       PTA_CX16 = 1 << 9,
2490       PTA_POPCNT = 1 << 10,
2491       PTA_ABM = 1 << 11,
2492       PTA_SSE4A = 1 << 12,
2493       PTA_NO_SAHF = 1 << 13,
2494       PTA_SSE4_1 = 1 << 14,
2495       PTA_SSE4_2 = 1 << 15,
2496       PTA_SSE5 = 1 << 16,
2497       PTA_AES = 1 << 17,
2498       PTA_PCLMUL = 1 << 18,
2499       PTA_AVX = 1 << 19,
2500       PTA_FMA = 1 << 20 
2501     };
2502 
2503   static struct pta
2504     {
2505       const char *const name;           /* processor name or nickname.  */
2506       const enum processor_type processor;
2507       const enum attr_cpu schedule;
2508       const unsigned /*enum pta_flags*/ flags;
2509     }
2510   const processor_alias_table[] =
2511     {
2512       {"i386", PROCESSOR_I386, CPU_NONE, 0},
2513       {"i486", PROCESSOR_I486, CPU_NONE, 0},
2514       {"i586", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
2515       {"pentium", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
2516       {"pentium-mmx", PROCESSOR_PENTIUM, CPU_PENTIUM, PTA_MMX},
2517       {"winchip-c6", PROCESSOR_I486, CPU_NONE, PTA_MMX},
2518       {"winchip2", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW},
2519       {"c3", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW},
2520       {"c3-2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, PTA_MMX | PTA_SSE},
2521       {"i686", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
2522       {"pentiumpro", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
2523       {"pentium2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, PTA_MMX},
2524       {"pentium3", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
2525         PTA_MMX | PTA_SSE},
2526       {"pentium3m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
2527         PTA_MMX | PTA_SSE},
2528       {"pentium-m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
2529         PTA_MMX | PTA_SSE | PTA_SSE2},
2530       {"pentium4", PROCESSOR_PENTIUM4, CPU_NONE,
2531         PTA_MMX |PTA_SSE | PTA_SSE2},
2532       {"pentium4m", PROCESSOR_PENTIUM4, CPU_NONE,
2533         PTA_MMX | PTA_SSE | PTA_SSE2},
2534       {"prescott", PROCESSOR_NOCONA, CPU_NONE,
2535         PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3},
2536       {"nocona", PROCESSOR_NOCONA, CPU_NONE,
2537         PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2538         | PTA_CX16 | PTA_NO_SAHF},
2539       {"core2", PROCESSOR_CORE2, CPU_CORE2,
2540         PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2541         | PTA_SSSE3 | PTA_CX16},
2542       {"geode", PROCESSOR_GEODE, CPU_GEODE,
2543         PTA_MMX | PTA_3DNOW | PTA_3DNOW_A |PTA_PREFETCH_SSE},
2544       {"k6", PROCESSOR_K6, CPU_K6, PTA_MMX},
2545       {"k6-2", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW},
2546       {"k6-3", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW},
2547       {"athlon", PROCESSOR_ATHLON, CPU_ATHLON,
2548         PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE},
2549       {"athlon-tbird", PROCESSOR_ATHLON, CPU_ATHLON,
2550         PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE},
2551       {"athlon-4", PROCESSOR_ATHLON, CPU_ATHLON,
2552         PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE},
2553       {"athlon-xp", PROCESSOR_ATHLON, CPU_ATHLON,
2554         PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE},
2555       {"athlon-mp", PROCESSOR_ATHLON, CPU_ATHLON,
2556         PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE},
2557       {"x86-64", PROCESSOR_K8, CPU_K8,
2558         PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_NO_SAHF},
2559       {"k8", PROCESSOR_K8, CPU_K8,
2560         PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2561         | PTA_SSE2 | PTA_NO_SAHF},
2562       {"k8-sse3", PROCESSOR_K8, CPU_K8,
2563         PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2564         | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF},
2565       {"opteron", PROCESSOR_K8, CPU_K8,
2566         PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2567         | PTA_SSE2 | PTA_NO_SAHF},
2568       {"opteron-sse3", PROCESSOR_K8, CPU_K8,
2569         PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2570         | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF},
2571       {"athlon64", PROCESSOR_K8, CPU_K8,
2572         PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2573         | PTA_SSE2 | PTA_NO_SAHF},
2574       {"athlon64-sse3", PROCESSOR_K8, CPU_K8,
2575         PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2576         | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF},
2577       {"athlon-fx", PROCESSOR_K8, CPU_K8,
2578         PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2579         | PTA_SSE2 | PTA_NO_SAHF},
2580       {"amdfam10", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
2581         PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2582         | PTA_SSE2 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM},
2583       {"barcelona", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
2584         PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2585         | PTA_SSE2 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM},
2586       {"generic32", PROCESSOR_GENERIC32, CPU_PENTIUMPRO,
2587         0 /* flags are only used for -march switch.  */ },
2588       {"generic64", PROCESSOR_GENERIC64, CPU_GENERIC64,
2589         PTA_64BIT /* flags are only used for -march switch.  */ },
2590     };
2591 
2592   int const pta_size = ARRAY_SIZE (processor_alias_table);
2593 
2594   /* Set up prefix/suffix so the error messages refer to either the command
2595      line argument, or the attribute(target).  */
2596   if (main_args_p)
2597     {
2598       prefix = "-m";
2599       suffix = "";
2600       sw = "switch";
2601     }
2602   else
2603     {
2604       prefix = "option(\"";
2605       suffix = "\")";
2606       sw = "attribute";
2607     }
2608 
2609 #ifdef SUBTARGET_OVERRIDE_OPTIONS
2610   SUBTARGET_OVERRIDE_OPTIONS;
2611 #endif
2612 
2613 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
2614   SUBSUBTARGET_OVERRIDE_OPTIONS;
2615 #endif
2616 
2617   /* -fPIC is the default for x86_64.  */
2618   if (TARGET_MACHO && TARGET_64BIT)
2619     flag_pic = 2;
2620 
2621   /* Set the default values for switches whose default depends on TARGET_64BIT
2622      in case they weren't overwritten by command line options.  */
2623   if (TARGET_64BIT)
2624     {
2625       /* Mach-O doesn't support omitting the frame pointer for now.  */
2626       if (flag_omit_frame_pointer == 2)
2627         flag_omit_frame_pointer = (TARGET_MACHO ? 0 : 1);
2628       if (flag_asynchronous_unwind_tables == 2)
2629         flag_asynchronous_unwind_tables = 1;
2630       if (flag_pcc_struct_return == 2)
2631         flag_pcc_struct_return = 0;
2632     }
2633   else
2634     {
2635       if (flag_omit_frame_pointer == 2)
2636         flag_omit_frame_pointer = 0;
2637       if (flag_asynchronous_unwind_tables == 2)
2638         flag_asynchronous_unwind_tables = 0;
2639       if (flag_pcc_struct_return == 2)
2640         flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
2641     }
2642 
2643   /* Need to check -mtune=generic first.  */
2644   if (ix86_tune_string)
2645     {
2646       if (!strcmp (ix86_tune_string, "generic")
2647           || !strcmp (ix86_tune_string, "i686")
2648           /* As special support for cross compilers we read -mtune=native
2649              as -mtune=generic.  With native compilers we won't see the
2650              -mtune=native, as it was changed by the driver.  */
2651           || !strcmp (ix86_tune_string, "native"))
2652         {
2653           if (TARGET_64BIT)
2654             ix86_tune_string = "generic64";
2655           else
2656             ix86_tune_string = "generic32";
2657         }
2658       /* If this call is for setting the option attribute, allow the
2659          generic32/generic64 that was previously set.  */
2660       else if (!main_args_p
2661                && (!strcmp (ix86_tune_string, "generic32")
2662                    || !strcmp (ix86_tune_string, "generic64")))
2663         ;
2664       else if (!strncmp (ix86_tune_string, "generic", 7))
2665         error ("bad value (%s) for %stune=%s %s",
2666                ix86_tune_string, prefix, suffix, sw);
2667     }
2668   else
2669     {
2670       if (ix86_arch_string)
2671         ix86_tune_string = ix86_arch_string;
2672       if (!ix86_tune_string)
2673         {
2674           ix86_tune_string = cpu_names[TARGET_CPU_DEFAULT];
2675           ix86_tune_defaulted = 1;
2676         }
2677 
2678       /* ix86_tune_string is set to ix86_arch_string or defaulted.  We
2679          need to use a sensible tune option.  */
2680       if (!strcmp (ix86_tune_string, "generic")
2681           || !strcmp (ix86_tune_string, "x86-64")
2682           || !strcmp (ix86_tune_string, "i686"))
2683         {
2684           if (TARGET_64BIT)
2685             ix86_tune_string = "generic64";
2686           else
2687             ix86_tune_string = "generic32";
2688         }
2689     }
2690   if (ix86_stringop_string)
2691     {
2692       if (!strcmp (ix86_stringop_string, "rep_byte"))
2693         stringop_alg = rep_prefix_1_byte;
2694       else if (!strcmp (ix86_stringop_string, "libcall"))
2695         stringop_alg = libcall;
2696       else if (!strcmp (ix86_stringop_string, "rep_4byte"))
2697         stringop_alg = rep_prefix_4_byte;
2698       else if (!strcmp (ix86_stringop_string, "rep_8byte")
2699                && TARGET_64BIT)
2700         /* rep; movq isn't available in 32-bit code.  */
2701         stringop_alg = rep_prefix_8_byte;
2702       else if (!strcmp (ix86_stringop_string, "byte_loop"))
2703         stringop_alg = loop_1_byte;
2704       else if (!strcmp (ix86_stringop_string, "loop"))
2705         stringop_alg = loop;
2706       else if (!strcmp (ix86_stringop_string, "unrolled_loop"))
2707         stringop_alg = unrolled_loop;
2708       else
2709         error ("bad value (%s) for %sstringop-strategy=%s %s",
2710                ix86_stringop_string, prefix, suffix, sw);
2711     }
2712   if (!strcmp (ix86_tune_string, "x86-64"))
2713     warning (OPT_Wdeprecated, "%stune=x86-64%s is deprecated.  Use "
2714              "%stune=k8%s or %stune=generic%s instead as appropriate.",
2715              prefix, suffix, prefix, suffix, prefix, suffix);
2716 
2717   if (!ix86_arch_string)
2718     ix86_arch_string = TARGET_64BIT ? "x86-64" : "i386";
2719   else
2720     ix86_arch_specified = 1;
2721 
2722   if (!strcmp (ix86_arch_string, "generic"))
2723     error ("generic CPU can be used only for %stune=%s %s",
2724            prefix, suffix, sw);
2725   if (!strncmp (ix86_arch_string, "generic", 7))
2726     error ("bad value (%s) for %sarch=%s %s",
2727            ix86_arch_string, prefix, suffix, sw);
2728 
2729   if (ix86_cmodel_string != 0)
2730     {
2731       if (!strcmp (ix86_cmodel_string, "small"))
2732         ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
2733       else if (!strcmp (ix86_cmodel_string, "medium"))
2734         ix86_cmodel = flag_pic ? CM_MEDIUM_PIC : CM_MEDIUM;
2735       else if (!strcmp (ix86_cmodel_string, "large"))
2736         ix86_cmodel = flag_pic ? CM_LARGE_PIC : CM_LARGE;
2737       else if (flag_pic)
2738         error ("code model %s does not support PIC mode", ix86_cmodel_string);
2739       else if (!strcmp (ix86_cmodel_string, "32"))
2740         ix86_cmodel = CM_32;
2741       else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
2742         ix86_cmodel = CM_KERNEL;
2743       else
2744         error ("bad value (%s) for %scmodel=%s %s",
2745                ix86_cmodel_string, prefix, suffix, sw);
2746     }
2747   else
2748     {
2749       /* For TARGET_64BIT and MS_ABI, force pic on, in order to enable the
2750          use of rip-relative addressing.  This eliminates fixups that
2751          would otherwise be needed if this object is to be placed in a
2752          DLL, and is essentially just as efficient as direct addressing.  */
2753       if (TARGET_64BIT && DEFAULT_ABI == MS_ABI)
2754         ix86_cmodel = CM_SMALL_PIC, flag_pic = 1;
2755       else if (TARGET_64BIT)
2756         ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
2757       else
2758         ix86_cmodel = CM_32;
2759     }
2760   if (ix86_asm_string != 0)
2761     {
2762       if (! TARGET_MACHO
2763           && !strcmp (ix86_asm_string, "intel"))
2764         ix86_asm_dialect = ASM_INTEL;
2765       else if (!strcmp (ix86_asm_string, "att"))
2766         ix86_asm_dialect = ASM_ATT;
2767       else
2768         error ("bad value (%s) for %sasm=%s %s",
2769                ix86_asm_string, prefix, suffix, sw);
2770     }
2771   if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
2772     error ("code model %qs not supported in the %s bit mode",
2773            ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
2774   if ((TARGET_64BIT != 0) != ((ix86_isa_flags & OPTION_MASK_ISA_64BIT) != 0))
2775     sorry ("%i-bit mode not compiled in",
2776            (ix86_isa_flags & OPTION_MASK_ISA_64BIT) ? 64 : 32);
2777 
2778   for (i = 0; i < pta_size; i++)
2779     if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
2780       {
2781         ix86_schedule = processor_alias_table[i].schedule;
2782         ix86_arch = processor_alias_table[i].processor;
2783         /* Default cpu tuning to the architecture.  */
2784         ix86_tune = ix86_arch;
2785 
2786         if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
2787           error ("CPU you selected does not support x86-64 "
2788                  "instruction set");
2789 
2790         if (processor_alias_table[i].flags & PTA_MMX
2791             && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_MMX))
2792           ix86_isa_flags |= OPTION_MASK_ISA_MMX;
2793         if (processor_alias_table[i].flags & PTA_3DNOW
2794             && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW))
2795           ix86_isa_flags |= OPTION_MASK_ISA_3DNOW;
2796         if (processor_alias_table[i].flags & PTA_3DNOW_A
2797             && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW_A))
2798           ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_A;
2799         if (processor_alias_table[i].flags & PTA_SSE
2800             && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE))
2801           ix86_isa_flags |= OPTION_MASK_ISA_SSE;
2802         if (processor_alias_table[i].flags & PTA_SSE2
2803             && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE2))
2804           ix86_isa_flags |= OPTION_MASK_ISA_SSE2;
2805         if (processor_alias_table[i].flags & PTA_SSE3
2806             && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE3))
2807           ix86_isa_flags |= OPTION_MASK_ISA_SSE3;
2808         if (processor_alias_table[i].flags & PTA_SSSE3
2809             && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSSE3))
2810           ix86_isa_flags |= OPTION_MASK_ISA_SSSE3;
2811         if (processor_alias_table[i].flags & PTA_SSE4_1
2812             && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_1))
2813           ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1;
2814         if (processor_alias_table[i].flags & PTA_SSE4_2
2815             && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_2))
2816           ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2;
2817         if (processor_alias_table[i].flags & PTA_AVX
2818             && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX))
2819           ix86_isa_flags |= OPTION_MASK_ISA_AVX;
2820         if (processor_alias_table[i].flags & PTA_FMA
2821             && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA))
2822           ix86_isa_flags |= OPTION_MASK_ISA_FMA;
2823         if (processor_alias_table[i].flags & PTA_SSE4A
2824             && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4A))
2825           ix86_isa_flags |= OPTION_MASK_ISA_SSE4A;
2826         if (processor_alias_table[i].flags & PTA_SSE5
2827             && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE5))
2828           ix86_isa_flags |= OPTION_MASK_ISA_SSE5;
2829         if (processor_alias_table[i].flags & PTA_ABM
2830             && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_ABM))
2831           ix86_isa_flags |= OPTION_MASK_ISA_ABM;
2832         if (processor_alias_table[i].flags & PTA_CX16
2833             && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_CX16))
2834           ix86_isa_flags |= OPTION_MASK_ISA_CX16;
2835         if (processor_alias_table[i].flags & (PTA_POPCNT | PTA_ABM)
2836             && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_POPCNT))
2837           ix86_isa_flags |= OPTION_MASK_ISA_POPCNT;
2838         if (!(TARGET_64BIT && (processor_alias_table[i].flags & PTA_NO_SAHF))
2839             && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SAHF))
2840           ix86_isa_flags |= OPTION_MASK_ISA_SAHF;
2841         if (processor_alias_table[i].flags & PTA_AES
2842             && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AES))
2843           ix86_isa_flags |= OPTION_MASK_ISA_AES;
2844         if (processor_alias_table[i].flags & PTA_PCLMUL
2845             && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_PCLMUL))
2846           ix86_isa_flags |= OPTION_MASK_ISA_PCLMUL;
2847         if (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE))
2848           x86_prefetch_sse = true;
2849 
2850         break;
2851       }
2852 
2853   if (i == pta_size)
2854     error ("bad value (%s) for %sarch=%s %s",
2855            ix86_arch_string, prefix, suffix, sw);
2856 
2857   ix86_arch_mask = 1u << ix86_arch;
2858   for (i = 0; i < X86_ARCH_LAST; ++i)
2859     ix86_arch_features[i] = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
2860 
2861   for (i = 0; i < pta_size; i++)
2862     if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
2863       {
2864         ix86_schedule = processor_alias_table[i].schedule;
2865         ix86_tune = processor_alias_table[i].processor;
2866         if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
2867           {
2868             if (ix86_tune_defaulted)
2869               {
2870                 ix86_tune_string = "x86-64";
2871                 for (i = 0; i < pta_size; i++)
2872                   if (! strcmp (ix86_tune_string,
2873                                 processor_alias_table[i].name))
2874                     break;
2875                 ix86_schedule = processor_alias_table[i].schedule;
2876                 ix86_tune = processor_alias_table[i].processor;
2877               }
2878             else
2879               error ("CPU you selected does not support x86-64 "
2880                      "instruction set");
2881           }
2882 
2883         /* Intel CPUs have always interpreted SSE prefetch instructions as
2884            NOPs; so, we can enable SSE prefetch instructions even when
2885            -mtune (rather than -march) points us to a processor that has them.
2886            However, the VIA C3 gives a SIGILL, so we only do that for i686 and
2887            higher processors.  */
2888         if (TARGET_CMOVE
2889             && (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE)))
2890           x86_prefetch_sse = true;
2891         break;
2892       }
2893   if (i == pta_size)
2894     error ("bad value (%s) for %stune=%s %s",
2895            ix86_tune_string, prefix, suffix, sw);
2896 
2897   ix86_tune_mask = 1u << ix86_tune;
2898   for (i = 0; i < X86_TUNE_LAST; ++i)
2899     ix86_tune_features[i] = !!(initial_ix86_tune_features[i] & ix86_tune_mask);
2900 
2901   if (optimize_size)
2902     ix86_cost = &ix86_size_cost;
2903   else
2904     ix86_cost = processor_target_table[ix86_tune].cost;
2905 
2906   /* Arrange to set up i386_stack_locals for all functions.  */
2907   init_machine_status = ix86_init_machine_status;
2908 
2909   /* Validate -mregparm= value.  */
2910   if (ix86_regparm_string)
2911     {
2912       if (TARGET_64BIT)
2913         warning (0, "%sregparm%s is ignored in 64-bit mode", prefix, suffix);
2914       i = atoi (ix86_regparm_string);
2915       if (i < 0 || i > REGPARM_MAX)
2916         error ("%sregparm=%d%s is not between 0 and %d",
2917                prefix, i, suffix, REGPARM_MAX);
2918       else
2919         ix86_regparm = i;
2920     }
2921   if (TARGET_64BIT)
2922     ix86_regparm = REGPARM_MAX;
2923 
2924   /* If the user has provided any of the -malign-* options,
2925      warn and use that value only if -falign-* is not set.
2926      Remove this code in GCC 3.2 or later.  */
2927   if (ix86_align_loops_string)
2928     {
2929       warning (0, "%salign-loops%s is obsolete, use -falign-loops%s",
2930                prefix, suffix, suffix);
2931       if (align_loops == 0)
2932         {
2933           i = atoi (ix86_align_loops_string);
2934           if (i < 0 || i > MAX_CODE_ALIGN)
2935             error ("%salign-loops=%d%s is not between 0 and %d",
2936                    prefix, i, suffix, MAX_CODE_ALIGN);
2937           else
2938             align_loops = 1 << i;
2939         }
2940     }
2941 
2942   if (ix86_align_jumps_string)
2943     {
2944       warning (0, "%salign-jumps%s is obsolete, use -falign-jumps%s",
2945                prefix, suffix, suffix);
2946       if (align_jumps == 0)
2947         {
2948           i = atoi (ix86_align_jumps_string);
2949           if (i < 0 || i > MAX_CODE_ALIGN)
2950             error ("%salign-loops=%d%s is not between 0 and %d",
2951                    prefix, i, suffix, MAX_CODE_ALIGN);
2952           else
2953             align_jumps = 1 << i;
2954         }
2955     }
2956 
2957   if (ix86_align_funcs_string)
2958     {
2959       warning (0, "%salign-functions%s is obsolete, use -falign-functions%s",
2960                prefix, suffix, suffix);
2961       if (align_functions == 0)
2962         {
2963           i = atoi (ix86_align_funcs_string);
2964           if (i < 0 || i > MAX_CODE_ALIGN)
2965             error ("%salign-loops=%d%s is not between 0 and %d",
2966                    prefix, i, suffix, MAX_CODE_ALIGN);
2967           else
2968             align_functions = 1 << i;
2969         }
2970     }
2971 
2972   /* Default align_* from the processor table.  */
2973   if (align_loops == 0)
2974     {
2975       align_loops = processor_target_table[ix86_tune].align_loop;
2976       align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
2977     }
2978   if (align_jumps == 0)
2979     {
2980       align_jumps = processor_target_table[ix86_tune].align_jump;
2981       align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
2982     }
2983   if (align_functions == 0)
2984     {
2985       align_functions = processor_target_table[ix86_tune].align_func;
2986     }
2987 
2988   /* Validate -mbranch-cost= value, or provide default.  */
2989   ix86_branch_cost = ix86_cost->branch_cost;
2990   if (ix86_branch_cost_string)
2991     {
2992       i = atoi (ix86_branch_cost_string);
2993       if (i < 0 || i > 5)
2994         error ("%sbranch-cost=%d%s is not between 0 and 5", prefix, i, suffix);
2995       else
2996         ix86_branch_cost = i;
2997     }
2998   if (ix86_section_threshold_string)
2999     {
3000       i = atoi (ix86_section_threshold_string);
3001       if (i < 0)
3002         error ("%slarge-data-threshold=%d%s is negative", prefix, i, suffix);
3003       else
3004         ix86_section_threshold = i;
3005     }
3006 
3007   if (ix86_tls_dialect_string)
3008     {
3009       if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
3010         ix86_tls_dialect = TLS_DIALECT_GNU;
3011       else if (strcmp (ix86_tls_dialect_string, "gnu2") == 0)
3012         ix86_tls_dialect = TLS_DIALECT_GNU2;
3013       else if (strcmp (ix86_tls_dialect_string, "sun") == 0)
3014         ix86_tls_dialect = TLS_DIALECT_SUN;
3015       else
3016         error ("bad value (%s) for %stls-dialect=%s %s",
3017                ix86_tls_dialect_string, prefix, suffix, sw);
3018     }
3019 
3020   if (ix87_precision_string)
3021     {
3022       i = atoi (ix87_precision_string);
3023       if (i != 32 && i != 64 && i != 80)
3024         error ("pc%d is not valid precision setting (32, 64 or 80)", i);
3025     }
3026 
3027   if (TARGET_64BIT)
3028     {
3029       target_flags |= TARGET_SUBTARGET64_DEFAULT & ~target_flags_explicit;
3030 
3031       /* Enable by default the SSE and MMX builtins.  Do allow the user to
3032          explicitly disable any of these.  In particular, disabling SSE and
3033          MMX for kernel code is extremely useful.  */
3034       if (!ix86_arch_specified)
3035       ix86_isa_flags
3036         |= ((OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_MMX
3037              | TARGET_SUBTARGET64_ISA_DEFAULT) & ~ix86_isa_flags_explicit);
3038 
3039       if (TARGET_RTD)
3040         warning (0, "%srtd%s is ignored in 64bit mode", prefix, suffix);
3041     }
3042   else
3043     {
3044       target_flags |= TARGET_SUBTARGET32_DEFAULT & ~target_flags_explicit;
3045 
3046       if (!ix86_arch_specified)
3047       ix86_isa_flags
3048         |= TARGET_SUBTARGET32_ISA_DEFAULT & ~ix86_isa_flags_explicit;
3049 
3050       /* i386 ABI does not specify red zone.  It still makes sense to use it
3051          when programmer takes care to stack from being destroyed.  */
3052       if (!(target_flags_explicit & MASK_NO_RED_ZONE))
3053         target_flags |= MASK_NO_RED_ZONE;
3054     }
3055 
3056   /* Keep nonleaf frame pointers.  */
3057   if (flag_omit_frame_pointer)
3058     target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
3059   else if (TARGET_OMIT_LEAF_FRAME_POINTER)
3060     flag_omit_frame_pointer = 1;
3061 
3062   /* If we're doing fast math, we don't care about comparison order
3063      wrt NaNs.  This lets us use a shorter comparison sequence.  */
3064   if (flag_finite_math_only)
3065     target_flags &= ~MASK_IEEE_FP;
3066 
3067   /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
3068      since the insns won't need emulation.  */
3069   if (x86_arch_always_fancy_math_387 & ix86_arch_mask)
3070     target_flags &= ~MASK_NO_FANCY_MATH_387;
3071 
3072   /* Likewise, if the target doesn't have a 387, or we've specified
3073      software floating point, don't use 387 inline intrinsics.  */
3074   if (!TARGET_80387)
3075     target_flags |= MASK_NO_FANCY_MATH_387;
3076 
3077   /* Turn on MMX builtins for -msse.  */
3078   if (TARGET_SSE)
3079     {
3080       ix86_isa_flags |= OPTION_MASK_ISA_MMX & ~ix86_isa_flags_explicit;
3081       x86_prefetch_sse = true;
3082     }
3083 
3084   /* Turn on popcnt instruction for -msse4.2 or -mabm.  */
3085   if (TARGET_SSE4_2 || TARGET_ABM)
3086     ix86_isa_flags |= OPTION_MASK_ISA_POPCNT & ~ix86_isa_flags_explicit;
3087 
3088   if (!TARGET_64BIT && TARGET_SAVE_ARGS)
3089       error ("-msave-args makes no sense in the 32-bit mode");
3090 
3091   /* Validate -mpreferred-stack-boundary= value or default it to
3092      PREFERRED_STACK_BOUNDARY_DEFAULT.  */
3093   ix86_preferred_stack_boundary = PREFERRED_STACK_BOUNDARY_DEFAULT;
3094   if (ix86_preferred_stack_boundary_string)
3095     {
3096       i = atoi (ix86_preferred_stack_boundary_string);
3097       if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
3098         error ("%spreferred-stack-boundary=%d%s is not between %d and 12",
3099                prefix, i, suffix, TARGET_64BIT ? 4 : 2);
3100       else
3101         ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
3102     }
3103 
3104   /* Set the default value for -mstackrealign.  */
3105   if (ix86_force_align_arg_pointer == -1)
3106     ix86_force_align_arg_pointer = STACK_REALIGN_DEFAULT;
3107 
3108   /* Validate -mincoming-stack-boundary= value or default it to
3109      MIN_STACK_BOUNDARY/PREFERRED_STACK_BOUNDARY.  */
3110   if (ix86_force_align_arg_pointer)
3111     ix86_default_incoming_stack_boundary = MIN_STACK_BOUNDARY;
3112   else
3113     ix86_default_incoming_stack_boundary = PREFERRED_STACK_BOUNDARY;
3114   ix86_incoming_stack_boundary = ix86_default_incoming_stack_boundary;
3115   if (ix86_incoming_stack_boundary_string)
3116     {
3117       i = atoi (ix86_incoming_stack_boundary_string);
3118       if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
3119         error ("-mincoming-stack-boundary=%d is not between %d and 12",
3120                i, TARGET_64BIT ? 4 : 2);
3121       else
3122         {
3123           ix86_user_incoming_stack_boundary = (1 << i) * BITS_PER_UNIT;
3124           ix86_incoming_stack_boundary
3125             = ix86_user_incoming_stack_boundary;
3126         }
3127     }
3128 
3129   /* Accept -msseregparm only if at least SSE support is enabled.  */
3130   if (TARGET_SSEREGPARM
3131       && ! TARGET_SSE)
3132     error ("%ssseregparm%s used without SSE enabled", prefix, suffix);
3133 
3134   ix86_fpmath = TARGET_FPMATH_DEFAULT;
3135   if (ix86_fpmath_string != 0)
3136     {
3137       if (! strcmp (ix86_fpmath_string, "387"))
3138         ix86_fpmath = FPMATH_387;
3139       else if (! strcmp (ix86_fpmath_string, "sse"))
3140         {
3141           if (!TARGET_SSE)
3142             {
3143               warning (0, "SSE instruction set disabled, using 387 arithmetics");
3144               ix86_fpmath = FPMATH_387;
3145             }
3146           else
3147             ix86_fpmath = FPMATH_SSE;
3148         }
3149       else if (! strcmp (ix86_fpmath_string, "387,sse")
3150                || ! strcmp (ix86_fpmath_string, "387+sse")
3151                || ! strcmp (ix86_fpmath_string, "sse,387")
3152                || ! strcmp (ix86_fpmath_string, "sse+387")
3153                || ! strcmp (ix86_fpmath_string, "both"))
3154         {
3155           if (!TARGET_SSE)
3156             {
3157               warning (0, "SSE instruction set disabled, using 387 arithmetics");
3158               ix86_fpmath = FPMATH_387;
3159             }
3160           else if (!TARGET_80387)
3161             {
3162               warning (0, "387 instruction set disabled, using SSE arithmetics");
3163               ix86_fpmath = FPMATH_SSE;
3164             }
3165           else
3166             ix86_fpmath = (enum fpmath_unit) (FPMATH_SSE | FPMATH_387);
3167         }
3168       else
3169         error ("bad value (%s) for %sfpmath=%s %s",
3170                ix86_fpmath_string, prefix, suffix, sw);
3171     }
3172 
3173   /* If the i387 is disabled, then do not return values in it. */
3174   if (!TARGET_80387)
3175     target_flags &= ~MASK_FLOAT_RETURNS;
3176 
3177   /* Use external vectorized library in vectorizing intrinsics.  */
3178   if (ix86_veclibabi_string)
3179     {
3180       if (strcmp (ix86_veclibabi_string, "svml") == 0)
3181         ix86_veclib_handler = ix86_veclibabi_svml;
3182       else if (strcmp (ix86_veclibabi_string, "acml") == 0)
3183         ix86_veclib_handler = ix86_veclibabi_acml;
3184       else
3185         error ("unknown vectorization library ABI type (%s) for "
3186                "%sveclibabi=%s %s", ix86_veclibabi_string,
3187                prefix, suffix, sw);
3188     }
3189 
3190   if ((x86_accumulate_outgoing_args & ix86_tune_mask)
3191       && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
3192       && !optimize_size)
3193     target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
3194 
3195   /* ??? Unwind info is not correct around the CFG unless either a frame
3196      pointer is present or M_A_O_A is set.  Fixing this requires rewriting
3197      unwind info generation to be aware of the CFG and propagating states
3198      around edges.  */
3199   if ((flag_unwind_tables || flag_asynchronous_unwind_tables
3200        || flag_exceptions || flag_non_call_exceptions)
3201       && flag_omit_frame_pointer
3202       && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
3203     {
3204       if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
3205         warning (0, "unwind tables currently require either a frame pointer "
3206                  "or %saccumulate-outgoing-args%s for correctness",
3207                  prefix, suffix);
3208       target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
3209     }
3210 
3211   /* If stack probes are required, the space used for large function
3212      arguments on the stack must also be probed, so enable
3213      -maccumulate-outgoing-args so this happens in the prologue.  */
3214   if (TARGET_STACK_PROBE
3215       && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
3216     {
3217       if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
3218         warning (0, "stack probing requires %saccumulate-outgoing-args%s "
3219                  "for correctness", prefix, suffix);
3220       target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
3221     }
3222 
3223   /* For sane SSE instruction set generation we need fcomi instruction.
3224      It is safe to enable all CMOVE instructions.  */
3225   if (TARGET_SSE)
3226     TARGET_CMOVE = 1;
3227 
3228   /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix.  */
3229   {
3230     char *p;
3231     ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
3232     p = strchr (internal_label_prefix, 'X');
3233     internal_label_prefix_len = p - internal_label_prefix;
3234     *p = '\0';
3235   }
3236 
3237   /* When scheduling description is not available, disable scheduler pass
3238      so it won't slow down the compilation and make x87 code slower.  */
3239   if (!TARGET_SCHEDULE)
3240     flag_schedule_insns_after_reload = flag_schedule_insns = 0;
3241 
3242   if (!PARAM_SET_P (PARAM_SIMULTANEOUS_PREFETCHES))
3243     set_param_value ("simultaneous-prefetches",
3244                      ix86_cost->simultaneous_prefetches);
3245   if (!PARAM_SET_P (PARAM_L1_CACHE_LINE_SIZE))
3246     set_param_value ("l1-cache-line-size", ix86_cost->prefetch_block);
3247   if (!PARAM_SET_P (PARAM_L1_CACHE_SIZE))
3248     set_param_value ("l1-cache-size", ix86_cost->l1_cache_size);
3249   if (!PARAM_SET_P (PARAM_L2_CACHE_SIZE))
3250     set_param_value ("l2-cache-size", ix86_cost->l2_cache_size);
3251 
3252   /* If using typedef char *va_list, signal that __builtin_va_start (&ap, 0)
3253      can be optimized to ap = __builtin_next_arg (0).  */
3254   if (!TARGET_64BIT)
3255     targetm.expand_builtin_va_start = NULL;
3256 
3257   if (TARGET_64BIT)
3258     {
3259       ix86_gen_leave = gen_leave_rex64;
3260       ix86_gen_pop1 = gen_popdi1;
3261       ix86_gen_add3 = gen_adddi3;
3262       ix86_gen_sub3 = gen_subdi3;
3263       ix86_gen_sub3_carry = gen_subdi3_carry_rex64;
3264       ix86_gen_one_cmpl2 = gen_one_cmpldi2;
3265       ix86_gen_monitor = gen_sse3_monitor64;
3266       ix86_gen_andsp = gen_anddi3;
3267     }
3268   else
3269     {
3270       ix86_gen_leave = gen_leave;
3271       ix86_gen_pop1 = gen_popsi1;
3272       ix86_gen_add3 = gen_addsi3;
3273       ix86_gen_sub3 = gen_subsi3;
3274       ix86_gen_sub3_carry = gen_subsi3_carry;
3275       ix86_gen_one_cmpl2 = gen_one_cmplsi2;
3276       ix86_gen_monitor = gen_sse3_monitor;
3277       ix86_gen_andsp = gen_andsi3;
3278     }
3279 
3280 #ifdef USE_IX86_CLD
3281   /* Use -mcld by default for 32-bit code if configured with --enable-cld.  */
3282   if (!TARGET_64BIT)
3283     target_flags |= MASK_CLD & ~target_flags_explicit;
3284 #endif
3285 
3286   /* Save the initial options in case the user does function specific options */
3287   if (main_args_p)
3288     target_option_default_node = target_option_current_node
3289       = build_target_option_node ();
3290 }
3291 
3292 /* Update register usage after having seen the compiler flags.  */
3293 
3294 void
3295 ix86_conditional_register_usage (void)
3296 {
3297   int i;
3298   unsigned int j;
3299 
3300   for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
3301     {
3302       if (fixed_regs[i] > 1)
3303         fixed_regs[i] = (fixed_regs[i] == (TARGET_64BIT ? 3 : 2));
3304       if (call_used_regs[i] > 1)
3305         call_used_regs[i] = (call_used_regs[i] == (TARGET_64BIT ? 3 : 2));
3306     }
3307 
3308   /* The PIC register, if it exists, is fixed.  */
3309   j = PIC_OFFSET_TABLE_REGNUM;
3310   if (j != INVALID_REGNUM)
3311     fixed_regs[j] = call_used_regs[j] = 1;
3312 
3313   /* The MS_ABI changes the set of call-used registers.  */
3314   if (TARGET_64BIT && ix86_cfun_abi () == MS_ABI)
3315     {
3316       call_used_regs[SI_REG] = 0;
3317       call_used_regs[DI_REG] = 0;
3318       call_used_regs[XMM6_REG] = 0;
3319       call_used_regs[XMM7_REG] = 0;
3320       for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
3321         call_used_regs[i] = 0;
3322     }
3323 
3324   /* The default setting of CLOBBERED_REGS is for 32-bit; add in the
3325      other call-clobbered regs for 64-bit.  */
3326   if (TARGET_64BIT)
3327     {
3328       CLEAR_HARD_REG_SET (reg_class_contents[(int)CLOBBERED_REGS]);
3329 
3330       for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
3331         if (TEST_HARD_REG_BIT (reg_class_contents[(int)GENERAL_REGS], i)
3332             && call_used_regs[i])
3333           SET_HARD_REG_BIT (reg_class_contents[(int)CLOBBERED_REGS], i);
3334     }
3335 
3336   /* If MMX is disabled, squash the registers.  */
3337   if (! TARGET_MMX)
3338     for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
3339       if (TEST_HARD_REG_BIT (reg_class_contents[(int)MMX_REGS], i))
3340         fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
3341 
3342   /* If SSE is disabled, squash the registers.  */
3343   if (! TARGET_SSE)
3344     for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
3345       if (TEST_HARD_REG_BIT (reg_class_contents[(int)SSE_REGS], i))
3346         fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
3347 
3348   /* If the FPU is disabled, squash the registers.  */
3349   if (! (TARGET_80387 || TARGET_FLOAT_RETURNS_IN_80387))
3350     for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
3351       if (TEST_HARD_REG_BIT (reg_class_contents[(int)FLOAT_REGS], i))
3352         fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
3353 
3354   /* If 32-bit, squash the 64-bit registers.  */
3355   if (! TARGET_64BIT)
3356     {
3357       for (i = FIRST_REX_INT_REG; i <= LAST_REX_INT_REG; i++)
3358         reg_names[i] = "";
3359       for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
3360         reg_names[i] = "";
3361     }
3362 }
3363 
3364 
3365 /* Save the current options */
3366 
3367 static void
3368 ix86_function_specific_save (struct cl_target_option *ptr)
3369 {
3370   gcc_assert (IN_RANGE (ix86_arch, 0, 255));
3371   gcc_assert (IN_RANGE (ix86_schedule, 0, 255));
3372   gcc_assert (IN_RANGE (ix86_tune, 0, 255));
3373   gcc_assert (IN_RANGE (ix86_fpmath, 0, 255));
3374   gcc_assert (IN_RANGE (ix86_branch_cost, 0, 255));
3375 
3376   ptr->arch = ix86_arch;
3377   ptr->schedule = ix86_schedule;
3378   ptr->tune = ix86_tune;
3379   ptr->fpmath = ix86_fpmath;
3380   ptr->branch_cost = ix86_branch_cost;
3381   ptr->tune_defaulted = ix86_tune_defaulted;
3382   ptr->arch_specified = ix86_arch_specified;
3383   ptr->ix86_isa_flags_explicit = ix86_isa_flags_explicit;
3384   ptr->target_flags_explicit = target_flags_explicit;
3385 }
3386 
3387 /* Restore the current options */
3388 
3389 static void
3390 ix86_function_specific_restore (struct cl_target_option *ptr)
3391 {
3392   enum processor_type old_tune = ix86_tune;
3393   enum processor_type old_arch = ix86_arch;
3394   unsigned int ix86_arch_mask, ix86_tune_mask;
3395   int i;
3396 
3397   ix86_arch = ptr->arch;
3398   ix86_schedule = ptr->schedule;
3399   ix86_tune = ptr->tune;
3400   ix86_fpmath = ptr->fpmath;
3401   ix86_branch_cost = ptr->branch_cost;
3402   ix86_tune_defaulted = ptr->tune_defaulted;
3403   ix86_arch_specified = ptr->arch_specified;
3404   ix86_isa_flags_explicit = ptr->ix86_isa_flags_explicit;
3405   target_flags_explicit = ptr->target_flags_explicit;
3406 
3407   /* Recreate the arch feature tests if the arch changed */
3408   if (old_arch != ix86_arch)
3409     {
3410       ix86_arch_mask = 1u << ix86_arch;
3411       for (i = 0; i < X86_ARCH_LAST; ++i)
3412         ix86_arch_features[i]
3413           = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
3414     }
3415 
3416   /* Recreate the tune optimization tests */
3417   if (old_tune != ix86_tune)
3418     {
3419       ix86_tune_mask = 1u << ix86_tune;
3420       for (i = 0; i < X86_TUNE_LAST; ++i)
3421         ix86_tune_features[i]
3422           = !!(initial_ix86_tune_features[i] & ix86_tune_mask);
3423     }
3424 }
3425 
3426 /* Print the current options */
3427 
3428 static void
3429 ix86_function_specific_print (FILE *file, int indent,
3430                               struct cl_target_option *ptr)
3431 {
3432   char *target_string
3433     = ix86_target_string (ptr->ix86_isa_flags, ptr->target_flags,
3434                           NULL, NULL, NULL, false);
3435 
3436   fprintf (file, "%*sarch = %d (%s)\n",
3437            indent, "",
3438            ptr->arch,
3439            ((ptr->arch < TARGET_CPU_DEFAULT_max)
3440             ? cpu_names[ptr->arch]
3441             : "<unknown>"));
3442 
3443   fprintf (file, "%*stune = %d (%s)\n",
3444            indent, "",
3445            ptr->tune,
3446            ((ptr->tune < TARGET_CPU_DEFAULT_max)
3447             ? cpu_names[ptr->tune]
3448             : "<unknown>"));
3449 
3450   fprintf (file, "%*sfpmath = %d%s%s\n", indent, "", ptr->fpmath,
3451            (ptr->fpmath & FPMATH_387) ? ", 387" : "",
3452            (ptr->fpmath & FPMATH_SSE) ? ", sse" : "");
3453   fprintf (file, "%*sbranch_cost = %d\n", indent, "", ptr->branch_cost);
3454 
3455   if (target_string)
3456     {
3457       fprintf (file, "%*s%s\n", indent, "", target_string);
3458       free (target_string);
3459     }
3460 }
3461 
3462 
3463 /* Inner function to process the attribute((target(...))), take an argument and
3464    set the current options from the argument. If we have a list, recursively go
3465    over the list.  */
3466 
3467 static bool
3468 ix86_valid_target_attribute_inner_p (tree args, char *p_strings[])
3469 {
3470   char *next_optstr;
3471   bool ret = true;
3472 
3473 #define IX86_ATTR_ISA(S,O)   { S, sizeof (S)-1, ix86_opt_isa, O, 0 }
3474 #define IX86_ATTR_STR(S,O)   { S, sizeof (S)-1, ix86_opt_str, O, 0 }
3475 #define IX86_ATTR_YES(S,O,M) { S, sizeof (S)-1, ix86_opt_yes, O, M }
3476 #define IX86_ATTR_NO(S,O,M)  { S, sizeof (S)-1, ix86_opt_no,  O, M }
3477 
3478   enum ix86_opt_type
3479   {
3480     ix86_opt_unknown,
3481     ix86_opt_yes,
3482     ix86_opt_no,
3483     ix86_opt_str,
3484     ix86_opt_isa
3485   };
3486 
3487   static const struct
3488   {
3489     const char *string;
3490     size_t len;
3491     enum ix86_opt_type type;
3492     int opt;
3493     int mask;
3494   } attrs[] = {
3495     /* isa options */
3496     IX86_ATTR_ISA ("3dnow",     OPT_m3dnow),
3497     IX86_ATTR_ISA ("abm",       OPT_mabm),
3498     IX86_ATTR_ISA ("aes",       OPT_maes),
3499     IX86_ATTR_ISA ("avx",       OPT_mavx),
3500     IX86_ATTR_ISA ("mmx",       OPT_mmmx),
3501     IX86_ATTR_ISA ("pclmul",    OPT_mpclmul),
3502     IX86_ATTR_ISA ("popcnt",    OPT_mpopcnt),
3503     IX86_ATTR_ISA ("sse",       OPT_msse),
3504     IX86_ATTR_ISA ("sse2",      OPT_msse2),
3505     IX86_ATTR_ISA ("sse3",      OPT_msse3),
3506     IX86_ATTR_ISA ("sse4",      OPT_msse4),
3507     IX86_ATTR_ISA ("sse4.1",    OPT_msse4_1),
3508     IX86_ATTR_ISA ("sse4.2",    OPT_msse4_2),
3509     IX86_ATTR_ISA ("sse4a",     OPT_msse4a),
3510     IX86_ATTR_ISA ("sse5",      OPT_msse5),
3511     IX86_ATTR_ISA ("ssse3",     OPT_mssse3),
3512 
3513     /* string options */
3514     IX86_ATTR_STR ("arch=",     IX86_FUNCTION_SPECIFIC_ARCH),
3515     IX86_ATTR_STR ("fpmath=",   IX86_FUNCTION_SPECIFIC_FPMATH),
3516     IX86_ATTR_STR ("tune=",     IX86_FUNCTION_SPECIFIC_TUNE),
3517 
3518     /* flag options */
3519     IX86_ATTR_YES ("cld",
3520                    OPT_mcld,
3521                    MASK_CLD),
3522 
3523     IX86_ATTR_NO ("fancy-math-387",
3524                   OPT_mfancy_math_387,
3525                   MASK_NO_FANCY_MATH_387),
3526 
3527     IX86_ATTR_NO ("fused-madd",
3528                   OPT_mfused_madd,
3529                   MASK_NO_FUSED_MADD),
3530 
3531     IX86_ATTR_YES ("ieee-fp",
3532                    OPT_mieee_fp,
3533                    MASK_IEEE_FP),
3534 
3535     IX86_ATTR_YES ("inline-all-stringops",
3536                    OPT_minline_all_stringops,
3537                    MASK_INLINE_ALL_STRINGOPS),
3538 
3539     IX86_ATTR_YES ("inline-stringops-dynamically",
3540                    OPT_minline_stringops_dynamically,
3541                    MASK_INLINE_STRINGOPS_DYNAMICALLY),
3542 
3543     IX86_ATTR_NO ("align-stringops",
3544                   OPT_mno_align_stringops,
3545                   MASK_NO_ALIGN_STRINGOPS),
3546 
3547     IX86_ATTR_YES ("recip",
3548                    OPT_mrecip,
3549                    MASK_RECIP),
3550 
3551   };
3552 
3553   /* If this is a list, recurse to get the options.  */
3554   if (TREE_CODE (args) == TREE_LIST)
3555     {
3556       bool ret = true;
3557 
3558       for (; args; args = TREE_CHAIN (args))
3559         if (TREE_VALUE (args)
3560             && !ix86_valid_target_attribute_inner_p (TREE_VALUE (args), p_strings))
3561           ret = false;
3562 
3563       return ret;
3564     }
3565 
3566   else if (TREE_CODE (args) != STRING_CST)
3567     gcc_unreachable ();
3568 
3569   /* Handle multiple arguments separated by commas.  */
3570   next_optstr = ASTRDUP (TREE_STRING_POINTER (args));
3571 
3572   while (next_optstr && *next_optstr != '\0')
3573     {
3574       char *p = next_optstr;
3575       char *orig_p = p;
3576       char *comma = strchr (next_optstr, ',');
3577       const char *opt_string;
3578       size_t len, opt_len;
3579       int opt;
3580       bool opt_set_p;
3581       char ch;
3582       unsigned i;
3583       enum ix86_opt_type type = ix86_opt_unknown;
3584       int mask = 0;
3585 
3586       if (comma)
3587         {
3588           *comma = '\0';
3589           len = comma - next_optstr;
3590           next_optstr = comma + 1;
3591         }
3592       else
3593         {
3594           len = strlen (p);
3595           next_optstr = NULL;
3596         }
3597 
3598       /* Recognize no-xxx.  */
3599       if (len > 3 && p[0] == 'n' && p[1] == 'o' && p[2] == '-')
3600         {
3601           opt_set_p = false;
3602           p += 3;
3603           len -= 3;
3604         }
3605       else
3606         opt_set_p = true;
3607 
3608       /* Find the option.  */
3609       ch = *p;
3610       opt = N_OPTS;
3611       for (i = 0; i < ARRAY_SIZE (attrs); i++)
3612         {
3613           type = attrs[i].type;
3614           opt_len = attrs[i].len;
3615           if (ch == attrs[i].string[0]
3616               && ((type != ix86_opt_str) ? len == opt_len : len > opt_len)
3617               && memcmp (p, attrs[i].string, opt_len) == 0)
3618             {
3619               opt = attrs[i].opt;
3620               mask = attrs[i].mask;
3621               opt_string = attrs[i].string;
3622               break;
3623             }
3624         }
3625 
3626       /* Process the option.  */
3627       if (opt == N_OPTS)
3628         {
3629           error ("attribute(target(\"%s\")) is unknown", orig_p);
3630           ret = false;
3631         }
3632 
3633       else if (type == ix86_opt_isa)
3634         ix86_handle_option (opt, p, opt_set_p);
3635 
3636       else if (type == ix86_opt_yes || type == ix86_opt_no)
3637         {
3638           if (type == ix86_opt_no)
3639             opt_set_p = !opt_set_p;
3640 
3641           if (opt_set_p)
3642             target_flags |= mask;
3643           else
3644             target_flags &= ~mask;
3645         }
3646 
3647       else if (type == ix86_opt_str)
3648         {
3649           if (p_strings[opt])
3650             {
3651               error ("option(\"%s\") was already specified", opt_string);
3652               ret = false;
3653             }
3654           else
3655             p_strings[opt] = xstrdup (p + opt_len);
3656         }
3657 
3658       else
3659         gcc_unreachable ();
3660     }
3661 
3662   return ret;
3663 }
3664 
3665 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL.  */
3666 
3667 tree
3668 ix86_valid_target_attribute_tree (tree args)
3669 {
3670   const char *orig_arch_string = ix86_arch_string;
3671   const char *orig_tune_string = ix86_tune_string;
3672   const char *orig_fpmath_string = ix86_fpmath_string;
3673   int orig_tune_defaulted = ix86_tune_defaulted;
3674   int orig_arch_specified = ix86_arch_specified;
3675   char *option_strings[IX86_FUNCTION_SPECIFIC_MAX] = { NULL, NULL, NULL };
3676   tree t = NULL_TREE;
3677   int i;
3678   struct cl_target_option *def
3679     = TREE_TARGET_OPTION (target_option_default_node);
3680 
3681   /* Process each of the options on the chain.  */
3682   if (! ix86_valid_target_attribute_inner_p (args, option_strings))
3683     return NULL_TREE;
3684 
3685   /* If the changed options are different from the default, rerun override_options,
3686      and then save the options away.  The string options are are attribute options,
3687      and will be undone when we copy the save structure.  */
3688   if (ix86_isa_flags != def->ix86_isa_flags
3689       || target_flags != def->target_flags
3690       || option_strings[IX86_FUNCTION_SPECIFIC_ARCH]
3691       || option_strings[IX86_FUNCTION_SPECIFIC_TUNE]
3692       || option_strings[IX86_FUNCTION_SPECIFIC_FPMATH])
3693     {
3694       /* If we are using the default tune= or arch=, undo the string assigned,
3695          and use the default.  */
3696       if (option_strings[IX86_FUNCTION_SPECIFIC_ARCH])
3697         ix86_arch_string = option_strings[IX86_FUNCTION_SPECIFIC_ARCH];
3698       else if (!orig_arch_specified)
3699         ix86_arch_string = NULL;
3700 
3701       if (option_strings[IX86_FUNCTION_SPECIFIC_TUNE])
3702         ix86_tune_string = option_strings[IX86_FUNCTION_SPECIFIC_TUNE];
3703       else if (orig_tune_defaulted)
3704         ix86_tune_string = NULL;
3705 
3706       /* If fpmath= is not set, and we now have sse2 on 32-bit, use it.  */
3707       if (option_strings[IX86_FUNCTION_SPECIFIC_FPMATH])
3708         ix86_fpmath_string = option_strings[IX86_FUNCTION_SPECIFIC_FPMATH];
3709       else if (!TARGET_64BIT && TARGET_SSE)
3710         ix86_fpmath_string = "sse,387";
3711 
3712       /* Do any overrides, such as arch=xxx, or tune=xxx support.  */
3713       override_options (false);
3714 
3715       /* Add any builtin functions with the new isa if any.  */
3716       ix86_add_new_builtins (ix86_isa_flags);
3717 
3718       /* Save the current options unless we are validating options for
3719          #pragma.  */
3720       t = build_target_option_node ();
3721 
3722       ix86_arch_string = orig_arch_string;
3723       ix86_tune_string = orig_tune_string;
3724       ix86_fpmath_string = orig_fpmath_string;
3725 
3726       /* Free up memory allocated to hold the strings */
3727       for (i = 0; i < IX86_FUNCTION_SPECIFIC_MAX; i++)
3728         if (option_strings[i])
3729           free (option_strings[i]);
3730     }
3731 
3732   return t;
3733 }
3734 
3735 /* Hook to validate attribute((target("string"))).  */
3736 
3737 static bool
3738 ix86_valid_target_attribute_p (tree fndecl,
3739                                tree ARG_UNUSED (name),
3740                                tree args,
3741                                int ARG_UNUSED (flags))
3742 {
3743   struct cl_target_option cur_target;
3744   bool ret = true;
3745   tree old_optimize = build_optimization_node ();
3746   tree new_target, new_optimize;
3747   tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
3748 
3749   /* If the function changed the optimization levels as well as setting target
3750      options, start with the optimizations specified.  */
3751   if (func_optimize && func_optimize != old_optimize)
3752     cl_optimization_restore (TREE_OPTIMIZATION (func_optimize));
3753 
3754   /* The target attributes may also change some optimization flags, so update
3755      the optimization options if necessary.  */
3756   cl_target_option_save (&cur_target);
3757   new_target = ix86_valid_target_attribute_tree (args);
3758   new_optimize = build_optimization_node ();
3759 
3760   if (!new_target)
3761     ret = false;
3762 
3763   else if (fndecl)
3764     {
3765       DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
3766 
3767       if (old_optimize != new_optimize)
3768         DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
3769     }
3770 
3771   cl_target_option_restore (&cur_target);
3772 
3773   if (old_optimize != new_optimize)
3774     cl_optimization_restore (TREE_OPTIMIZATION (old_optimize));
3775 
3776   return ret;
3777 }
3778 
3779 
3780 /* Hook to determine if one function can safely inline another.  */
3781 
3782 static bool
3783 ix86_can_inline_p (tree caller, tree callee)
3784 {
3785   bool ret = false;
3786   tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
3787   tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
3788 
3789   /* If callee has no option attributes, then it is ok to inline.  */
3790   if (!callee_tree)
3791     ret = true;
3792 
3793   /* If caller has no option attributes, but callee does then it is not ok to
3794      inline.  */
3795   else if (!caller_tree)
3796     ret = false;
3797 
3798   else
3799     {
3800       struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree);
3801       struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
3802 
3803       /* Callee's isa options should a subset of the caller's, i.e. a SSE5 function
3804          can inline a SSE2 function but a SSE2 function can't inline a SSE5
3805          function.  */
3806       if ((caller_opts->ix86_isa_flags & callee_opts->ix86_isa_flags)
3807           != callee_opts->ix86_isa_flags)
3808         ret = false;
3809 
3810       /* See if we have the same non-isa options.  */
3811       else if (caller_opts->target_flags != callee_opts->target_flags)
3812         ret = false;
3813 
3814       /* See if arch, tune, etc. are the same.  */
3815       else if (caller_opts->arch != callee_opts->arch)
3816         ret = false;
3817 
3818       else if (caller_opts->tune != callee_opts->tune)
3819         ret = false;
3820 
3821       else if (caller_opts->fpmath != callee_opts->fpmath)
3822         ret = false;
3823 
3824       else if (caller_opts->branch_cost != callee_opts->branch_cost)
3825         ret = false;
3826 
3827       else
3828         ret = true;
3829     }
3830 
3831   return ret;
3832 }
3833 
3834 
3835 /* Remember the last target of ix86_set_current_function.  */
3836 static GTY(()) tree ix86_previous_fndecl;
3837 
3838 /* Establish appropriate back-end context for processing the function
3839    FNDECL.  The argument might be NULL to indicate processing at top
3840    level, outside of any function scope.  */
3841 static void
3842 ix86_set_current_function (tree fndecl)
3843 {
3844   /* Only change the context if the function changes.  This hook is called
3845      several times in the course of compiling a function, and we don't want to
3846      slow things down too much or call target_reinit when it isn't safe.  */
3847   if (fndecl && fndecl != ix86_previous_fndecl)
3848     {
3849       tree old_tree = (ix86_previous_fndecl
3850                        ? DECL_FUNCTION_SPECIFIC_TARGET (ix86_previous_fndecl)
3851                        : NULL_TREE);
3852 
3853       tree new_tree = (fndecl
3854                        ? DECL_FUNCTION_SPECIFIC_TARGET (fndecl)
3855                        : NULL_TREE);
3856 
3857       ix86_previous_fndecl = fndecl;
3858       if (old_tree == new_tree)
3859         ;
3860 
3861       else if (new_tree)
3862         {
3863           cl_target_option_restore (TREE_TARGET_OPTION (new_tree));
3864           target_reinit ();
3865         }
3866 
3867       else if (old_tree)
3868         {
3869           struct cl_target_option *def
3870             = TREE_TARGET_OPTION (target_option_current_node);
3871 
3872           cl_target_option_restore (def);
3873           target_reinit ();
3874         }
3875     }
3876 }
3877 
3878 
3879 /* Return true if this goes in large data/bss.  */
3880 
3881 static bool
3882 ix86_in_large_data_p (tree exp)
3883 {
3884   if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
3885     return false;
3886 
3887   /* Functions are never large data.  */
3888   if (TREE_CODE (exp) == FUNCTION_DECL)
3889     return false;
3890 
3891   if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
3892     {
3893       const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
3894       if (strcmp (section, ".ldata") == 0
3895           || strcmp (section, ".lbss") == 0)
3896         return true;
3897       return false;
3898     }
3899   else
3900     {
3901       HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
3902 
3903       /* If this is an incomplete type with size 0, then we can't put it
3904          in data because it might be too big when completed.  */
3905       if (!size || size > ix86_section_threshold)
3906         return true;
3907     }
3908 
3909   return false;
3910 }
3911 
3912 /* Switch to the appropriate section for output of DECL.
3913    DECL is either a `VAR_DECL' node or a constant of some sort.
3914    RELOC indicates whether forming the initial value of DECL requires
3915    link-time relocations.  */
3916 
3917 static section * x86_64_elf_select_section (tree, int, unsigned HOST_WIDE_INT)
3918         ATTRIBUTE_UNUSED;
3919 
3920 static section *
3921 x86_64_elf_select_section (tree decl, int reloc,
3922                            unsigned HOST_WIDE_INT align)
3923 {
3924   if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
3925       && ix86_in_large_data_p (decl))
3926     {
3927       const char *sname = NULL;
3928       unsigned int flags = SECTION_WRITE;
3929       switch (categorize_decl_for_section (decl, reloc))
3930         {
3931         case SECCAT_DATA:
3932           sname = ".ldata";
3933           break;
3934         case SECCAT_DATA_REL:
3935           sname = ".ldata.rel";
3936           break;
3937         case SECCAT_DATA_REL_LOCAL:
3938           sname = ".ldata.rel.local";
3939           break;
3940         case SECCAT_DATA_REL_RO:
3941           sname = ".ldata.rel.ro";
3942           break;
3943         case SECCAT_DATA_REL_RO_LOCAL:
3944           sname = ".ldata.rel.ro.local";
3945           break;
3946         case SECCAT_BSS:
3947           sname = ".lbss";
3948           flags |= SECTION_BSS;
3949           break;
3950         case SECCAT_RODATA:
3951         case SECCAT_RODATA_MERGE_STR:
3952         case SECCAT_RODATA_MERGE_STR_INIT:
3953         case SECCAT_RODATA_MERGE_CONST:
3954           sname = ".lrodata";
3955           flags = 0;
3956           break;
3957         case SECCAT_SRODATA:
3958         case SECCAT_SDATA:
3959         case SECCAT_SBSS:
3960           gcc_unreachable ();
3961         case SECCAT_TEXT:
3962         case SECCAT_TDATA:
3963         case SECCAT_TBSS:
3964           /* We don't split these for medium model.  Place them into
3965              default sections and hope for best.  */
3966           break;
3967         case SECCAT_EMUTLS_VAR:
3968         case SECCAT_EMUTLS_TMPL:
3969           gcc_unreachable ();
3970         }
3971       if (sname)
3972         {
3973           /* We might get called with string constants, but get_named_section
3974              doesn't like them as they are not DECLs.  Also, we need to set
3975              flags in that case.  */
3976           if (!DECL_P (decl))
3977             return get_section (sname, flags, NULL);
3978           return get_named_section (decl, sname, reloc);
3979         }
3980     }
3981   return default_elf_select_section (decl, reloc, align);
3982 }
3983 
3984 /* Build up a unique section name, expressed as a
3985    STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
3986    RELOC indicates whether the initial value of EXP requires
3987    link-time relocations.  */
3988 
3989 static void ATTRIBUTE_UNUSED
3990 x86_64_elf_unique_section (tree decl, int reloc)
3991 {
3992   if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
3993       && ix86_in_large_data_p (decl))
3994     {
3995       const char *prefix = NULL;
3996       /* We only need to use .gnu.linkonce if we don't have COMDAT groups.  */
3997       bool one_only = DECL_ONE_ONLY (decl) && !HAVE_COMDAT_GROUP;
3998 
3999       switch (categorize_decl_for_section (decl, reloc))
4000         {
4001         case SECCAT_DATA:
4002         case SECCAT_DATA_REL:
4003         case SECCAT_DATA_REL_LOCAL:
4004         case SECCAT_DATA_REL_RO:
4005         case SECCAT_DATA_REL_RO_LOCAL:
4006           prefix = one_only ? ".ld" : ".ldata";
4007           break;
4008         case SECCAT_BSS:
4009           prefix = one_only ? ".lb" : ".lbss";
4010           break;
4011         case SECCAT_RODATA:
4012         case SECCAT_RODATA_MERGE_STR:
4013         case SECCAT_RODATA_MERGE_STR_INIT:
4014         case SECCAT_RODATA_MERGE_CONST:
4015           prefix = one_only ? ".lr" : ".lrodata";
4016           break;
4017         case SECCAT_SRODATA:
4018         case SECCAT_SDATA:
4019         case SECCAT_SBSS:
4020           gcc_unreachable ();
4021         case SECCAT_TEXT:
4022         case SECCAT_TDATA:
4023         case SECCAT_TBSS:
4024           /* We don't split these for medium model.  Place them into
4025              default sections and hope for best.  */
4026           break;
4027         case SECCAT_EMUTLS_VAR:
4028           prefix = targetm.emutls.var_section;
4029           break;
4030         case SECCAT_EMUTLS_TMPL:
4031           prefix = targetm.emutls.tmpl_section;
4032           break;
4033         }
4034       if (prefix)
4035         {
4036           const char *name, *linkonce;
4037           char *string;
4038 
4039           name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
4040           name = targetm.strip_name_encoding (name);
4041           
4042           /* If we're using one_only, then there needs to be a .gnu.linkonce
4043              prefix to the section name.  */
4044           linkonce = one_only ? ".gnu.linkonce" : "";
4045   
4046           string = ACONCAT ((linkonce, prefix, ".", name, NULL));
4047           
4048           DECL_SECTION_NAME (decl) = build_string (strlen (string), string);
4049           return;
4050         }
4051     }
4052   default_unique_section (decl, reloc);
4053 }
4054 
4055 #ifdef COMMON_ASM_OP
4056 /* This says how to output assembler code to declare an
4057    uninitialized external linkage data object.
4058 
4059    For medium model x86-64 we need to use .largecomm opcode for
4060    large objects.  */
4061 void
4062 x86_elf_aligned_common (FILE *file,
4063                         const char *name, unsigned HOST_WIDE_INT size,
4064                         int align)
4065 {
4066   if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
4067       && size > (unsigned int)ix86_section_threshold)
4068     fprintf (file, ".largecomm\t");
4069   else
4070     fprintf (file, "%s", COMMON_ASM_OP);
4071   assemble_name (file, name);
4072   fprintf (file, ","HOST_WIDE_INT_PRINT_UNSIGNED",%u\n",
4073            size, align / BITS_PER_UNIT);
4074 }
4075 #endif
4076 
4077 /* Utility function for targets to use in implementing
4078    ASM_OUTPUT_ALIGNED_BSS.  */
4079 
4080 void
4081 x86_output_aligned_bss (FILE *file, tree decl ATTRIBUTE_UNUSED,
4082                         const char *name, unsigned HOST_WIDE_INT size,
4083                         int align)
4084 {
4085   if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
4086       && size > (unsigned int)ix86_section_threshold)
4087     switch_to_section (get_named_section (decl, ".lbss", 0));
4088   else
4089     switch_to_section (bss_section);
4090   ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
4091 #ifdef ASM_DECLARE_OBJECT_NAME
4092   last_assemble_variable_decl = decl;
4093   ASM_DECLARE_OBJECT_NAME (file, name, decl);
4094 #else
4095   /* Standard thing is just output label for the object.  */
4096   ASM_OUTPUT_LABEL (file, name);
4097 #endif /* ASM_DECLARE_OBJECT_NAME */
4098   ASM_OUTPUT_SKIP (file, size ? size : 1);
4099 }
4100 
4101 void
4102 optimization_options (int level, int size ATTRIBUTE_UNUSED)
4103 {
4104   /* For -O2 and beyond, turn off -fschedule-insns by default.  It tends to
4105      make the problem with not enough registers even worse.  */
4106 #ifdef INSN_SCHEDULING
4107   if (level > 1)
4108     flag_schedule_insns = 0;
4109 #endif
4110 
4111   if (TARGET_MACHO)
4112     /* The Darwin libraries never set errno, so we might as well
4113        avoid calling them when that's the only reason we would.  */
4114     flag_errno_math = 0;
4115 
4116   /* The default values of these switches depend on the TARGET_64BIT
4117      that is not known at this moment.  Mark these values with 2 and
4118      let user the to override these.  In case there is no command line option
4119      specifying them, we will set the defaults in override_options.  */
4120   if (optimize >= 1)
4121     flag_omit_frame_pointer = 2;
4122   flag_pcc_struct_return = 2;
4123   flag_asynchronous_unwind_tables = 2;
4124   flag_vect_cost_model = 1;
4125 #ifdef SUBTARGET_OPTIMIZATION_OPTIONS
4126   SUBTARGET_OPTIMIZATION_OPTIONS;
4127 #endif
4128 }
4129 
4130 /* Decide whether we can make a sibling call to a function.  DECL is the
4131    declaration of the function being targeted by the call and EXP is the
4132    CALL_EXPR representing the call.  */
4133 
4134 static bool
4135 ix86_function_ok_for_sibcall (tree decl, tree exp)
4136 {
4137   tree type, decl_or_type;
4138   rtx a, b;
4139 
4140   /* If we are generating position-independent code, we cannot sibcall
4141      optimize any indirect call, or a direct call to a global function,
4142      as the PLT requires %ebx be live.  */
4143   if (!TARGET_64BIT && flag_pic && (!decl || !targetm.binds_local_p (decl)))
4144     return false;
4145 
4146   /* If we need to align the outgoing stack, then sibcalling would
4147      unalign the stack, which may break the called function.  */
4148   if (ix86_incoming_stack_boundary < PREFERRED_STACK_BOUNDARY)
4149     return false;
4150 
4151   if (decl)
4152     {
4153       decl_or_type = decl;
4154       type = TREE_TYPE (decl);
4155     }
4156   else
4157     {
4158       /* We're looking at the CALL_EXPR, we need the type of the function.  */
4159       type = CALL_EXPR_FN (exp);                /* pointer expression */
4160       type = TREE_TYPE (type);                  /* pointer type */
4161       type = TREE_TYPE (type);                  /* function type */
4162       decl_or_type = type;
4163     }
4164 
4165   /* Check that the return value locations are the same.  Like
4166      if we are returning floats on the 80387 register stack, we cannot
4167      make a sibcall from a function that doesn't return a float to a
4168      function that does or, conversely, from a function that does return
4169      a float to a function that doesn't; the necessary stack adjustment
4170      would not be executed.  This is also the place we notice
4171      differences in the return value ABI.  Note that it is ok for one
4172      of the functions to have void return type as long as the return
4173      value of the other is passed in a register.  */
4174   a = ix86_function_value (TREE_TYPE (exp), decl_or_type, false);
4175   b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
4176                            cfun->decl, false);
4177   if (STACK_REG_P (a) || STACK_REG_P (b))
4178     {
4179       if (!rtx_equal_p (a, b))
4180         return false;
4181     }
4182   else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
4183     ;
4184   else if (!rtx_equal_p (a, b))
4185     return false;
4186 
4187   if (TARGET_64BIT)
4188     {
4189       /* The SYSV ABI has more call-clobbered registers;
4190          disallow sibcalls from MS to SYSV.  */
4191       if (cfun->machine->call_abi == MS_ABI
4192           && ix86_function_type_abi (type) == SYSV_ABI)
4193         return false;
4194     }
4195   else
4196     {
4197       /* If this call is indirect, we'll need to be able to use a
4198          call-clobbered register for the address of the target function.
4199          Make sure that all such registers are not used for passing
4200          parameters.  Note that DLLIMPORT functions are indirect.  */
4201       if (!decl
4202           || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && DECL_DLLIMPORT_P (decl)))
4203         {
4204           if (ix86_function_regparm (type, NULL) >= 3)
4205             {
4206               /* ??? Need to count the actual number of registers to be used,
4207                  not the possible number of registers.  Fix later.  */
4208               return false;
4209             }
4210         }
4211     }
4212 
4213   /* Otherwise okay.  That also includes certain types of indirect calls.  */
4214   return true;
4215 }
4216 
4217 /* Handle "cdecl", "stdcall", "fastcall", "regparm" and "sseregparm"
4218    calling convention attributes;
4219    arguments as in struct attribute_spec.handler.  */
4220 
4221 static tree
4222 ix86_handle_cconv_attribute (tree *node, tree name,
4223                                    tree args,
4224                                    int flags ATTRIBUTE_UNUSED,
4225                                    bool *no_add_attrs)
4226 {
4227   if (TREE_CODE (*node) != FUNCTION_TYPE
4228       && TREE_CODE (*node) != METHOD_TYPE
4229       && TREE_CODE (*node) != FIELD_DECL
4230       && TREE_CODE (*node) != TYPE_DECL)
4231     {
4232       warning (OPT_Wattributes, "%qs attribute only applies to functions",
4233                IDENTIFIER_POINTER (name));
4234       *no_add_attrs = true;
4235       return NULL_TREE;
4236     }
4237 
4238   /* Can combine regparm with all attributes but fastcall.  */
4239   if (is_attribute_p ("regparm", name))
4240     {
4241       tree cst;
4242 
4243       if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
4244         {
4245           error ("fastcall and regparm attributes are not compatible");
4246         }
4247 
4248       cst = TREE_VALUE (args);
4249       if (TREE_CODE (cst) != INTEGER_CST)
4250         {
4251           warning (OPT_Wattributes,
4252                    "%qs attribute requires an integer constant argument",
4253                    IDENTIFIER_POINTER (name));
4254           *no_add_attrs = true;
4255         }
4256       else if (compare_tree_int (cst, REGPARM_MAX) > 0)
4257         {
4258           warning (OPT_Wattributes, "argument to %qs attribute larger than %d",
4259                    IDENTIFIER_POINTER (name), REGPARM_MAX);
4260           *no_add_attrs = true;
4261         }
4262 
4263       return NULL_TREE;
4264     }
4265 
4266   if (TARGET_64BIT)
4267     {
4268       /* Do not warn when emulating the MS ABI.  */
4269       if (TREE_CODE (*node) != FUNCTION_TYPE || ix86_function_type_abi (*node)!=MS_ABI)
4270         warning (OPT_Wattributes, "%qs attribute ignored",
4271                  IDENTIFIER_POINTER (name));
4272       *no_add_attrs = true;
4273       return NULL_TREE;
4274     }
4275 
4276   /* Can combine fastcall with stdcall (redundant) and sseregparm.  */
4277   if (is_attribute_p ("fastcall", name))
4278     {
4279       if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
4280         {
4281           error ("fastcall and cdecl attributes are not compatible");
4282         }
4283       if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
4284         {
4285           error ("fastcall and stdcall attributes are not compatible");
4286         }
4287       if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
4288         {
4289           error ("fastcall and regparm attributes are not compatible");
4290         }
4291     }
4292 
4293   /* Can combine stdcall with fastcall (redundant), regparm and
4294      sseregparm.  */
4295   else if (is_attribute_p ("stdcall", name))
4296     {
4297       if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
4298         {
4299           error ("stdcall and cdecl attributes are not compatible");
4300         }
4301       if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
4302         {
4303           error ("stdcall and fastcall attributes are not compatible");
4304         }
4305     }
4306 
4307   /* Can combine cdecl with regparm and sseregparm.  */
4308   else if (is_attribute_p ("cdecl", name))
4309     {
4310       if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
4311         {
4312           error ("stdcall and cdecl attributes are not compatible");
4313         }
4314       if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
4315         {
4316           error ("fastcall and cdecl attributes are not compatible");
4317         }
4318     }
4319 
4320   /* Can combine sseregparm with all attributes.  */
4321 
4322   return NULL_TREE;
4323 }
4324 
4325 /* Return 0 if the attributes for two types are incompatible, 1 if they
4326    are compatible, and 2 if they are nearly compatible (which causes a
4327    warning to be generated).  */
4328 
4329 static int
4330 ix86_comp_type_attributes (const_tree type1, const_tree type2)
4331 {
4332   /* Check for mismatch of non-default calling convention.  */
4333   const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
4334 
4335   if (TREE_CODE (type1) != FUNCTION_TYPE
4336       && TREE_CODE (type1) != METHOD_TYPE)
4337     return 1;
4338 
4339   /* Check for mismatched fastcall/regparm types.  */
4340   if ((!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1))
4341        != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2)))
4342       || (ix86_function_regparm (type1, NULL)
4343           != ix86_function_regparm (type2, NULL)))
4344     return 0;
4345 
4346   /* Check for mismatched sseregparm types.  */
4347   if (!lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type1))
4348       != !lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type2)))
4349     return 0;
4350 
4351   /* Check for mismatched return types (cdecl vs stdcall).  */
4352   if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
4353       != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
4354     return 0;
4355 
4356   return 1;
4357 }
4358 
4359 /* Return the regparm value for a function with the indicated TYPE and DECL.
4360    DECL may be NULL when calling function indirectly
4361    or considering a libcall.  */
4362 
4363 static int
4364 ix86_function_regparm (const_tree type, const_tree decl)
4365 {
4366   tree attr;
4367   int regparm;
4368 
4369   static bool error_issued;
4370 
4371   if (TARGET_64BIT)
4372     return (ix86_function_type_abi (type) == SYSV_ABI
4373             ? X86_64_REGPARM_MAX : X64_REGPARM_MAX);
4374 
4375   regparm = ix86_regparm;
4376   attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
4377   if (attr)
4378     {
4379       regparm
4380         = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
4381 
4382       if (decl && TREE_CODE (decl) == FUNCTION_DECL)
4383         {
4384           /* We can't use regparm(3) for nested functions because
4385              these pass static chain pointer in %ecx register.  */
4386           if (!error_issued && regparm == 3
4387               && decl_function_context (decl)
4388               && !DECL_NO_STATIC_CHAIN (decl))
4389             {
4390               error ("nested functions are limited to 2 register parameters");
4391               error_issued = true;
4392               return 0;
4393             }
4394         }
4395 
4396       return regparm;
4397     }
4398 
4399   if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
4400     return 2;
4401 
4402   /* Use register calling convention for local functions when possible.  */
4403   if (decl
4404       && TREE_CODE (decl) == FUNCTION_DECL
4405       && optimize
4406       && !profile_flag)
4407     {
4408       /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified.  */
4409       struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
4410       if (i && i->local)
4411         {
4412           int local_regparm, globals = 0, regno;
4413           struct function *f;
4414 
4415           /* Make sure no regparm register is taken by a
4416              fixed register variable.  */
4417           for (local_regparm = 0; local_regparm < REGPARM_MAX; local_regparm++)
4418             if (fixed_regs[local_regparm])
4419               break;
4420 
4421           /* We can't use regparm(3) for nested functions as these use
4422              static chain pointer in third argument.  */
4423           if (local_regparm == 3
4424               && decl_function_context (decl)
4425               && !DECL_NO_STATIC_CHAIN (decl))
4426             local_regparm = 2;
4427 
4428           /* If the function realigns its stackpointer, the prologue will
4429              clobber %ecx.  If we've already generated code for the callee,
4430              the callee DECL_STRUCT_FUNCTION is gone, so we fall back to
4431              scanning the attributes for the self-realigning property.  */
4432           f = DECL_STRUCT_FUNCTION (decl);
4433           /* Since current internal arg pointer won't conflict with
4434              parameter passing regs, so no need to change stack
4435              realignment and adjust regparm number.
4436 
4437              Each fixed register usage increases register pressure,
4438              so less registers should be used for argument passing.
4439              This functionality can be overriden by an explicit
4440              regparm value.  */
4441           for (regno = 0; regno <= DI_REG; regno++)
4442             if (fixed_regs[regno])
4443               globals++;
4444 
4445           local_regparm
4446             = globals < local_regparm ? local_regparm - globals : 0;
4447 
4448           if (local_regparm > regparm)
4449             regparm = local_regparm;
4450         }
4451     }
4452 
4453   return regparm;
4454 }
4455 
4456 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
4457    DFmode (2) arguments in SSE registers for a function with the
4458    indicated TYPE and DECL.  DECL may be NULL when calling function
4459    indirectly or considering a libcall.  Otherwise return 0.  */
4460 
4461 static int
4462 ix86_function_sseregparm (const_tree type, const_tree decl, bool warn)
4463 {
4464   gcc_assert (!TARGET_64BIT);
4465 
4466   /* Use SSE registers to pass SFmode and DFmode arguments if requested
4467      by the sseregparm attribute.  */
4468   if (TARGET_SSEREGPARM
4469       || (type && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
4470     {
4471       if (!TARGET_SSE)
4472         {
4473           if (warn)
4474             {
4475               if (decl)
4476                 error ("Calling %qD with attribute sseregparm without "
4477                        "SSE/SSE2 enabled", decl);
4478               else
4479                 error ("Calling %qT with attribute sseregparm without "
4480                        "SSE/SSE2 enabled", type);
4481             }
4482           return 0;
4483         }
4484 
4485       return 2;
4486     }
4487 
4488   /* For local functions, pass up to SSE_REGPARM_MAX SFmode
4489      (and DFmode for SSE2) arguments in SSE registers.  */
4490   if (decl && TARGET_SSE_MATH && optimize && !profile_flag)
4491     {
4492       /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified.  */
4493       struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
4494       if (i && i->local)
4495         return TARGET_SSE2 ? 2 : 1;
4496     }
4497 
4498   return 0;
4499 }
4500 
4501 /* Return true if EAX is live at the start of the function.  Used by
4502    ix86_expand_prologue to determine if we need special help before
4503    calling allocate_stack_worker.  */
4504 
4505 static bool
4506 ix86_eax_live_at_start_p (void)
4507 {
4508   /* Cheat.  Don't bother working forward from ix86_function_regparm
4509      to the function type to whether an actual argument is located in
4510      eax.  Instead just look at cfg info, which is still close enough
4511      to correct at this point.  This gives false positives for broken
4512      functions that might use uninitialized data that happens to be
4513      allocated in eax, but who cares?  */
4514   return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR), 0);
4515 }
4516 
4517 /* Value is the number of bytes of arguments automatically
4518    popped when returning from a subroutine call.
4519    FUNDECL is the declaration node of the function (as a tree),
4520    FUNTYPE is the data type of the function (as a tree),
4521    or for a library call it is an identifier node for the subroutine name.
4522    SIZE is the number of bytes of arguments passed on the stack.
4523 
4524    On the 80386, the RTD insn may be used to pop them if the number
4525      of args is fixed, but if the number is variable then the caller
4526      must pop them all.  RTD can't be used for library calls now
4527      because the library is compiled with the Unix compiler.
4528    Use of RTD is a selectable option, since it is incompatible with
4529    standard Unix calling sequences.  If the option is not selected,
4530    the caller must always pop the args.
4531 
4532    The attribute stdcall is equivalent to RTD on a per module basis.  */
4533 
4534 int
4535 ix86_return_pops_args (tree fundecl, tree funtype, int size)
4536 {
4537   int rtd;
4538 
4539   /* None of the 64-bit ABIs pop arguments.  */
4540   if (TARGET_64BIT)
4541     return 0;
4542 
4543   rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
4544 
4545   /* Cdecl functions override -mrtd, and never pop the stack.  */
4546   if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype)))
4547     {
4548       /* Stdcall and fastcall functions will pop the stack if not
4549          variable args.  */
4550       if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype))
4551           || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype)))
4552         rtd = 1;
4553 
4554       if (rtd && ! stdarg_p (funtype))
4555         return size;
4556     }
4557 
4558   /* Lose any fake structure return argument if it is passed on the stack.  */
4559   if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
4560       && !KEEP_AGGREGATE_RETURN_POINTER)
4561     {
4562       int nregs = ix86_function_regparm (funtype, fundecl);
4563       if (nregs == 0)
4564         return GET_MODE_SIZE (Pmode);
4565     }
4566 
4567   return 0;
4568 }
4569 
4570 /* Argument support functions.  */
4571 
4572 /* Return true when register may be used to pass function parameters.  */
4573 bool
4574 ix86_function_arg_regno_p (int regno)
4575 {
4576   int i;
4577   const int *parm_regs;
4578 
4579   if (!TARGET_64BIT)
4580     {
4581       if (TARGET_MACHO)
4582         return (regno < REGPARM_MAX
4583                 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
4584       else
4585         return (regno < REGPARM_MAX
4586                 || (TARGET_MMX && MMX_REGNO_P (regno)
4587                     && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
4588                 || (TARGET_SSE && SSE_REGNO_P (regno)
4589                     && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
4590     }
4591 
4592   if (TARGET_MACHO)
4593     {
4594       if (SSE_REGNO_P (regno) && TARGET_SSE)
4595         return true;
4596     }
4597   else
4598     {
4599       if (TARGET_SSE && SSE_REGNO_P (regno)
4600           && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
4601         return true;
4602     }
4603 
4604   /* TODO: The function should depend on current function ABI but
4605      builtins.c would need updating then. Therefore we use the
4606      default ABI.  */
4607 
4608   /* RAX is used as hidden argument to va_arg functions.  */
4609   if (DEFAULT_ABI == SYSV_ABI && regno == AX_REG)
4610     return true;
4611 
4612   if (DEFAULT_ABI == MS_ABI)
4613     parm_regs = x86_64_ms_abi_int_parameter_registers;
4614   else
4615     parm_regs = x86_64_int_parameter_registers;
4616   for (i = 0; i < (DEFAULT_ABI == MS_ABI ? X64_REGPARM_MAX
4617                                          : X86_64_REGPARM_MAX); i++)
4618     if (regno == parm_regs[i])
4619       return true;
4620   return false;
4621 }
4622 
4623 /* Return if we do not know how to pass TYPE solely in registers.  */
4624 
4625 static bool
4626 ix86_must_pass_in_stack (enum machine_mode mode, const_tree type)
4627 {
4628   if (must_pass_in_stack_var_size_or_pad (mode, type))
4629     return true;
4630 
4631   /* For 32-bit, we want TImode aggregates to go on the stack.  But watch out!
4632      The layout_type routine is crafty and tries to trick us into passing
4633      currently unsupported vector types on the stack by using TImode.  */
4634   return (!TARGET_64BIT && mode == TImode
4635           && type && TREE_CODE (type) != VECTOR_TYPE);
4636 }
4637 
4638 /* It returns the size, in bytes, of the area reserved for arguments passed
4639    in registers for the function represented by fndecl dependent to the used
4640    abi format.  */
4641 int
4642 ix86_reg_parm_stack_space (const_tree fndecl)
4643 {
4644   int call_abi = SYSV_ABI;
4645   if (fndecl != NULL_TREE && TREE_CODE (fndecl) == FUNCTION_DECL)
4646     call_abi = ix86_function_abi (fndecl);
4647   else
4648     call_abi = ix86_function_type_abi (fndecl);
4649   if (call_abi == MS_ABI)
4650     return 32;
4651   return 0;
4652 }
4653 
4654 /* Returns value SYSV_ABI, MS_ABI dependent on fntype, specifying the
4655    call abi used.  */
4656 int
4657 ix86_function_type_abi (const_tree fntype)
4658 {
4659   if (TARGET_64BIT && fntype != NULL)
4660     {
4661       int abi;
4662       if (DEFAULT_ABI == SYSV_ABI)
4663         abi = lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (fntype)) ? MS_ABI : SYSV_ABI;
4664       else
4665         abi = lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (fntype)) ? SYSV_ABI : MS_ABI;
4666 
4667       return abi;
4668     }
4669   return DEFAULT_ABI;
4670 }
4671 
4672 int
4673 ix86_function_abi (const_tree fndecl)
4674 {
4675   if (! fndecl)
4676     return DEFAULT_ABI;
4677   return ix86_function_type_abi (TREE_TYPE (fndecl));
4678 }
4679 
4680 /* Returns value SYSV_ABI, MS_ABI dependent on cfun, specifying the
4681    call abi used.  */
4682 int
4683 ix86_cfun_abi (void)
4684 {
4685   if (! cfun || ! TARGET_64BIT)
4686     return DEFAULT_ABI;
4687   return cfun->machine->call_abi;
4688 }
4689 
4690 /* regclass.c  */
4691 extern void init_regs (void);
4692 
4693 /* Implementation of call abi switching target hook. Specific to FNDECL
4694    the specific call register sets are set. See also CONDITIONAL_REGISTER_USAGE
4695    for more details.  */
4696 void
4697 ix86_call_abi_override (const_tree fndecl)
4698 {
4699   if (fndecl == NULL_TREE)
4700     cfun->machine->call_abi = DEFAULT_ABI;
4701   else
4702     cfun->machine->call_abi = ix86_function_type_abi (TREE_TYPE (fndecl));
4703 }
4704 
4705 /* MS and SYSV ABI have different set of call used registers.  Avoid expensive
4706    re-initialization of init_regs each time we switch function context since
4707    this is needed only during RTL expansion.  */
4708 static void
4709 ix86_maybe_switch_abi (void)
4710 {
4711   if (TARGET_64BIT &&
4712       call_used_regs[SI_REG] == (cfun->machine->call_abi == MS_ABI))
4713     reinit_regs ();
4714 }
4715 
4716 /* Initialize a variable CUM of type CUMULATIVE_ARGS
4717    for a call to a function whose data type is FNTYPE.
4718    For a library call, FNTYPE is 0.  */
4719 
4720 void
4721 init_cumulative_args (CUMULATIVE_ARGS *cum,  /* Argument info to initialize */
4722                       tree fntype,      /* tree ptr for function decl */
4723                       rtx libname,      /* SYMBOL_REF of library name or 0 */
4724                       tree fndecl)
4725 {
4726   struct cgraph_local_info *i = fndecl ? cgraph_local_info (fndecl) : NULL;
4727   memset (cum, 0, sizeof (*cum));
4728 
4729   if (fndecl)
4730    cum->call_abi = ix86_function_abi (fndecl);
4731   else
4732    cum->call_abi = ix86_function_type_abi (fntype);
4733   /* Set up the number of registers to use for passing arguments.  */
4734 
4735   if (cum->call_abi == MS_ABI && !ACCUMULATE_OUTGOING_ARGS)
4736     sorry ("ms_abi attribute requires -maccumulate-outgoing-args "
4737            "or subtarget optimization implying it");
4738   cum->nregs = ix86_regparm;
4739   if (TARGET_64BIT)
4740     {
4741       if (cum->call_abi != DEFAULT_ABI)
4742         cum->nregs = DEFAULT_ABI != SYSV_ABI ? X86_64_REGPARM_MAX
4743                                              : X64_REGPARM_MAX;
4744     }
4745   if (TARGET_SSE)
4746     {
4747       cum->sse_nregs = SSE_REGPARM_MAX;
4748       if (TARGET_64BIT)
4749         {
4750           if (cum->call_abi != DEFAULT_ABI)
4751             cum->sse_nregs = DEFAULT_ABI != SYSV_ABI ? X86_64_SSE_REGPARM_MAX
4752                                                      : X64_SSE_REGPARM_MAX;
4753         }
4754     }
4755   if (TARGET_MMX)
4756     cum->mmx_nregs = MMX_REGPARM_MAX;
4757   cum->warn_avx = true;
4758   cum->warn_sse = true;
4759   cum->warn_mmx = true;
4760 
4761   /* Because type might mismatch in between caller and callee, we need to
4762      use actual type of function for local calls.
4763      FIXME: cgraph_analyze can be told to actually record if function uses
4764      va_start so for local functions maybe_vaarg can be made aggressive
4765      helping K&R code.
4766      FIXME: once typesytem is fixed, we won't need this code anymore.  */
4767   if (i && i->local)
4768     fntype = TREE_TYPE (fndecl);
4769   cum->maybe_vaarg = (fntype
4770                       ? (!prototype_p (fntype) || stdarg_p (fntype))
4771                       : !libname);
4772 
4773   if (!TARGET_64BIT)
4774     {
4775       /* If there are variable arguments, then we won't pass anything
4776          in registers in 32-bit mode. */
4777       if (stdarg_p (fntype))
4778         {
4779           cum->nregs = 0;
4780           cum->sse_nregs = 0;
4781           cum->mmx_nregs = 0;
4782           cum->warn_avx = 0;
4783           cum->warn_sse = 0;
4784           cum->warn_mmx = 0;
4785           return;
4786         }
4787 
4788       /* Use ecx and edx registers if function has fastcall attribute,
4789          else look for regparm information.  */
4790       if (fntype)
4791         {
4792           if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
4793             {
4794               cum->nregs = 2;
4795               cum->fastcall = 1;
4796             }
4797           else
4798             cum->nregs = ix86_function_regparm (fntype, fndecl);
4799         }
4800 
4801       /* Set up the number of SSE registers used for passing SFmode
4802          and DFmode arguments.  Warn for mismatching ABI.  */
4803       cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl, true);
4804     }
4805 }
4806 
4807 /* Return the "natural" mode for TYPE.  In most cases, this is just TYPE_MODE.
4808    But in the case of vector types, it is some vector mode.
4809 
4810    When we have only some of our vector isa extensions enabled, then there
4811    are some modes for which vector_mode_supported_p is false.  For these
4812    modes, the generic vector support in gcc will choose some non-vector mode
4813    in order to implement the type.  By computing the natural mode, we'll
4814    select the proper ABI location for the operand and not depend on whatever
4815    the middle-end decides to do with these vector types.
4816 
4817    The midde-end can't deal with the vector types > 16 bytes.  In this
4818    case, we return the original mode and warn ABI change if CUM isn't
4819    NULL.  */
4820 
4821 static enum machine_mode
4822 type_natural_mode (const_tree type, CUMULATIVE_ARGS *cum)
4823 {
4824   enum machine_mode mode = TYPE_MODE (type);
4825 
4826   if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
4827     {
4828       HOST_WIDE_INT size = int_size_in_bytes (type);
4829       if ((size == 8 || size == 16 || size == 32)
4830           /* ??? Generic code allows us to create width 1 vectors.  Ignore.  */
4831           && TYPE_VECTOR_SUBPARTS (type) > 1)
4832         {
4833           enum machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
4834 
4835           if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
4836             mode = MIN_MODE_VECTOR_FLOAT;
4837           else
4838             mode = MIN_MODE_VECTOR_INT;
4839 
4840           /* Get the mode which has this inner mode and number of units.  */
4841           for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
4842             if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
4843                 && GET_MODE_INNER (mode) == innermode)
4844               {
4845                 if (size == 32 && !TARGET_AVX)
4846                   {
4847                     static bool warnedavx;
4848 
4849                     if (cum
4850                         && !warnedavx 
4851                         && cum->warn_avx)
4852                       {
4853                         warnedavx = true;
4854                         warning (0, "AVX vector argument without AVX "
4855                                  "enabled changes the ABI");
4856                       }
4857                     return TYPE_MODE (type);
4858                   }
4859                 else
4860                   return mode;
4861               }
4862 
4863           gcc_unreachable ();
4864         }
4865     }
4866 
4867   return mode;
4868 }
4869 
4870 /* We want to pass a value in REGNO whose "natural" mode is MODE.  However,
4871    this may not agree with the mode that the type system has chosen for the
4872    register, which is ORIG_MODE.  If ORIG_MODE is not BLKmode, then we can
4873    go ahead and use it.  Otherwise we have to build a PARALLEL instead.  */
4874 
4875 static rtx
4876 gen_reg_or_parallel (enum machine_mode mode, enum machine_mode orig_mode,
4877                      unsigned int regno)
4878 {
4879   rtx tmp;
4880 
4881   if (orig_mode != BLKmode)
4882     tmp = gen_rtx_REG (orig_mode, regno);
4883   else
4884     {
4885       tmp = gen_rtx_REG (mode, regno);
4886       tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
4887       tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
4888     }
4889 
4890   return tmp;
4891 }
4892 
4893 /* x86-64 register passing implementation.  See x86-64 ABI for details.  Goal
4894    of this code is to classify each 8bytes of incoming argument by the register
4895    class and assign registers accordingly.  */
4896 
4897 /* Return the union class of CLASS1 and CLASS2.
4898    See the x86-64 PS ABI for details.  */
4899 
4900 static enum x86_64_reg_class
4901 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
4902 {
4903   /* Rule #1: If both classes are equal, this is the resulting class.  */
4904   if (class1 == class2)
4905     return class1;
4906 
4907   /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
4908      the other class.  */
4909   if (class1 == X86_64_NO_CLASS)
4910     return class2;
4911   if (class2 == X86_64_NO_CLASS)
4912     return class1;
4913 
4914   /* Rule #3: If one of the classes is MEMORY, the result is MEMORY.  */
4915   if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
4916     return X86_64_MEMORY_CLASS;
4917 
4918   /* Rule #4: If one of the classes is INTEGER, the result is INTEGER.  */
4919   if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
4920       || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
4921     return X86_64_INTEGERSI_CLASS;
4922   if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
4923       || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
4924     return X86_64_INTEGER_CLASS;
4925 
4926   /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
4927      MEMORY is used.  */
4928   if (class1 == X86_64_X87_CLASS
4929       || class1 == X86_64_X87UP_CLASS
4930       || class1 == X86_64_COMPLEX_X87_CLASS
4931       || class2 == X86_64_X87_CLASS
4932       || class2 == X86_64_X87UP_CLASS
4933       || class2 == X86_64_COMPLEX_X87_CLASS)
4934     return X86_64_MEMORY_CLASS;
4935 
4936   /* Rule #6: Otherwise class SSE is used.  */
4937   return X86_64_SSE_CLASS;
4938 }
4939 
4940 /* Classify the argument of type TYPE and mode MODE.
4941    CLASSES will be filled by the register class used to pass each word
4942    of the operand.  The number of words is returned.  In case the parameter
4943    should be passed in memory, 0 is returned. As a special case for zero
4944    sized containers, classes[0] will be NO_CLASS and 1 is returned.
4945 
4946    BIT_OFFSET is used internally for handling records and specifies offset
4947    of the offset in bits modulo 256 to avoid overflow cases.
4948 
4949    See the x86-64 PS ABI for details.
4950 */
4951 
4952 static int
4953 classify_argument (enum machine_mode mode, const_tree type,
4954                    enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
4955 {
4956   HOST_WIDE_INT bytes =
4957     (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
4958   int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
4959 
4960   /* Variable sized entities are always passed/returned in memory.  */
4961   if (bytes < 0)
4962     return 0;
4963 
4964   if (mode != VOIDmode
4965       && targetm.calls.must_pass_in_stack (mode, type))
4966     return 0;
4967 
4968   if (type && AGGREGATE_TYPE_P (type))
4969     {
4970       int i;
4971       tree field;
4972       enum x86_64_reg_class subclasses[MAX_CLASSES];
4973 
4974       /* On x86-64 we pass structures larger than 32 bytes on the stack.  */
4975       if (bytes > 32)
4976         return 0;
4977 
4978       for (i = 0; i < words; i++)
4979         classes[i] = X86_64_NO_CLASS;
4980 
4981       /* Zero sized arrays or structures are NO_CLASS.  We return 0 to
4982          signalize memory class, so handle it as special case.  */
4983       if (!words)
4984         {
4985           classes[0] = X86_64_NO_CLASS;
4986           return 1;
4987         }
4988 
4989       /* Classify each field of record and merge classes.  */
4990       switch (TREE_CODE (type))
4991         {
4992         case RECORD_TYPE:
4993           /* And now merge the fields of structure.  */
4994           for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
4995             {
4996               if (TREE_CODE (field) == FIELD_DECL)
4997                 {
4998                   int num;
4999 
5000                   if (TREE_TYPE (field) == error_mark_node)
5001                     continue;
5002 
5003                   /* Bitfields are always classified as integer.  Handle them
5004                      early, since later code would consider them to be
5005                      misaligned integers.  */
5006                   if (DECL_BIT_FIELD (field))
5007                     {
5008                       for (i = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
5009                            i < ((int_bit_position (field) + (bit_offset % 64))
5010                                 + tree_low_cst (DECL_SIZE (field), 0)
5011                                 + 63) / 8 / 8; i++)
5012                         classes[i] =
5013                           merge_classes (X86_64_INTEGER_CLASS,
5014                                          classes[i]);
5015                     }
5016                   else
5017                     {
5018                       type = TREE_TYPE (field);
5019 
5020                       /* Flexible array member is ignored.  */
5021                       if (TYPE_MODE (type) == BLKmode
5022                           && TREE_CODE (type) == ARRAY_TYPE
5023                           && TYPE_SIZE (type) == NULL_TREE
5024                           && TYPE_DOMAIN (type) != NULL_TREE
5025                           && (TYPE_MAX_VALUE (TYPE_DOMAIN (type))
5026                               == NULL_TREE))
5027                         {
5028                           static bool warned;
5029                           
5030                           if (!warned && warn_psabi)
5031                             {
5032                               warned = true;
5033                               inform (input_location,
5034                                       "The ABI of passing struct with"
5035                                       " a flexible array member has"
5036                                       " changed in GCC 4.4");
5037                             }
5038                           continue;
5039                         }
5040                       num = classify_argument (TYPE_MODE (type), type,
5041                                                subclasses,
5042                                                (int_bit_position (field)
5043                                                 + bit_offset) % 256);
5044                       if (!num)
5045                         return 0;
5046                       for (i = 0; i < num; i++)
5047                         {
5048                           int pos =
5049                             (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
5050                           classes[i + pos] =
5051                             merge_classes (subclasses[i], classes[i + pos]);
5052                         }
5053                     }
5054                 }
5055             }
5056           break;
5057 
5058         case ARRAY_TYPE:
5059           /* Arrays are handled as small records.  */
5060           {
5061             int num;
5062             num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
5063                                      TREE_TYPE (type), subclasses, bit_offset);
5064             if (!num)
5065               return 0;
5066 
5067             /* The partial classes are now full classes.  */
5068             if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
5069               subclasses[0] = X86_64_SSE_CLASS;
5070             if (subclasses[0] == X86_64_INTEGERSI_CLASS
5071                 && !((bit_offset % 64) == 0 && bytes == 4))
5072               subclasses[0] = X86_64_INTEGER_CLASS;
5073 
5074             for (i = 0; i < words; i++)
5075               classes[i] = subclasses[i % num];
5076 
5077             break;
5078           }
5079         case UNION_TYPE:
5080         case QUAL_UNION_TYPE:
5081           /* Unions are similar to RECORD_TYPE but offset is always 0.
5082              */
5083           for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
5084             {
5085               if (TREE_CODE (field) == FIELD_DECL)
5086                 {
5087                   int num;
5088 
5089                   if (TREE_TYPE (field) == error_mark_node)
5090                     continue;
5091 
5092                   num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
5093                                            TREE_TYPE (field), subclasses,
5094                                            bit_offset);
5095                   if (!num)
5096                     return 0;
5097                   for (i = 0; i < num; i++)
5098                     classes[i] = merge_classes (subclasses[i], classes[i]);
5099                 }
5100             }
5101           break;
5102 
5103         default:
5104           gcc_unreachable ();
5105         }
5106 
5107       if (words > 2)
5108         {
5109           /* When size > 16 bytes, if the first one isn't
5110              X86_64_SSE_CLASS or any other ones aren't
5111              X86_64_SSEUP_CLASS, everything should be passed in
5112              memory.  */
5113           if (classes[0] != X86_64_SSE_CLASS)
5114               return 0;
5115 
5116           for (i = 1; i < words; i++)
5117             if (classes[i] != X86_64_SSEUP_CLASS)
5118               return 0;
5119         }
5120 
5121       /* Final merger cleanup.  */
5122       for (i = 0; i < words; i++)
5123         {
5124           /* If one class is MEMORY, everything should be passed in
5125              memory.  */
5126           if (classes[i] == X86_64_MEMORY_CLASS)
5127             return 0;
5128 
5129           /* The X86_64_SSEUP_CLASS should be always preceded by
5130              X86_64_SSE_CLASS or X86_64_SSEUP_CLASS.  */
5131           if (classes[i] == X86_64_SSEUP_CLASS
5132               && classes[i - 1] != X86_64_SSE_CLASS
5133               && classes[i - 1] != X86_64_SSEUP_CLASS)
5134             {
5135               /* The first one should never be X86_64_SSEUP_CLASS.  */
5136               gcc_assert (i != 0);
5137               classes[i] = X86_64_SSE_CLASS;
5138             }
5139 
5140           /*  If X86_64_X87UP_CLASS isn't preceded by X86_64_X87_CLASS,
5141                everything should be passed in memory.  */
5142           if (classes[i] == X86_64_X87UP_CLASS
5143               && (classes[i - 1] != X86_64_X87_CLASS))
5144             {
5145               static bool warned;
5146 
5147               /* The first one should never be X86_64_X87UP_CLASS.  */
5148               gcc_assert (i != 0);
5149               if (!warned && warn_psabi)
5150                 {
5151                   warned = true;
5152                   inform (input_location,
5153                           "The ABI of passing union with long double"
5154                           " has changed in GCC 4.4");
5155                 }
5156               return 0;
5157             }
5158         }
5159       return words;
5160     }
5161 
5162   /* Compute alignment needed.  We align all types to natural boundaries with
5163      exception of XFmode that is aligned to 64bits.  */
5164   if (mode != VOIDmode && mode != BLKmode)
5165     {
5166       int mode_alignment = GET_MODE_BITSIZE (mode);
5167 
5168       if (mode == XFmode)
5169         mode_alignment = 128;
5170       else if (mode == XCmode)
5171         mode_alignment = 256;
5172       if (COMPLEX_MODE_P (mode))
5173         mode_alignment /= 2;
5174       /* Misaligned fields are always returned in memory.  */
5175       if (bit_offset % mode_alignment)
5176         return 0;
5177     }
5178 
5179   /* for V1xx modes, just use the base mode */
5180   if (VECTOR_MODE_P (mode) && mode != V1DImode
5181       && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
5182     mode = GET_MODE_INNER (mode);
5183 
5184   /* Classification of atomic types.  */
5185   switch (mode)
5186     {
5187     case SDmode:
5188     case DDmode:
5189       classes[0] = X86_64_SSE_CLASS;
5190       return 1;
5191     case TDmode:
5192       classes[0] = X86_64_SSE_CLASS;
5193       classes[1] = X86_64_SSEUP_CLASS;
5194       return 2;
5195     case DImode:
5196     case SImode:
5197     case HImode:
5198     case QImode:
5199     case CSImode:
5200     case CHImode:
5201     case CQImode:
5202       {
5203         int size = (bit_offset % 64)+ (int) GET_MODE_BITSIZE (mode);
5204 
5205         if (size <= 32)
5206           {
5207             classes[0] = X86_64_INTEGERSI_CLASS;
5208             return 1;
5209           }
5210         else if (size <= 64)
5211           {
5212             classes[0] = X86_64_INTEGER_CLASS;
5213             return 1;
5214           }
5215         else if (size <= 64+32)
5216           {
5217             classes[0] = X86_64_INTEGER_CLASS;
5218             classes[1] = X86_64_INTEGERSI_CLASS;
5219             return 2;
5220           }
5221         else if (size <= 64+64)
5222           {
5223             classes[0] = classes[1] = X86_64_INTEGER_CLASS;
5224             return 2;
5225           }
5226         else
5227           gcc_unreachable ();
5228       }
5229     case CDImode:
5230     case TImode:
5231       classes[0] = classes[1] = X86_64_INTEGER_CLASS;
5232       return 2;
5233     case COImode:
5234     case OImode:
5235       /* OImode shouldn't be used directly.  */
5236       gcc_unreachable ();
5237     case CTImode:
5238       return 0;
5239     case SFmode:
5240       if (!(bit_offset % 64))
5241         classes[0] = X86_64_SSESF_CLASS;
5242       else
5243         classes[0] = X86_64_SSE_CLASS;
5244       return 1;
5245     case DFmode:
5246       classes[0] = X86_64_SSEDF_CLASS;
5247       return 1;
5248     case XFmode:
5249       classes[0] = X86_64_X87_CLASS;
5250       classes[1] = X86_64_X87UP_CLASS;
5251       return 2;
5252     case TFmode:
5253       classes[0] = X86_64_SSE_CLASS;
5254       classes[1] = X86_64_SSEUP_CLASS;
5255       return 2;
5256     case SCmode:
5257       classes[0] = X86_64_SSE_CLASS;
5258       if (!(bit_offset % 64))
5259         return 1;
5260       else
5261         {
5262           static bool warned;
5263 
5264           if (!warned && warn_psabi)
5265             {
5266               warned = true;
5267               inform (input_location,
5268                       "The ABI of passing structure with complex float"
5269                       " member has changed in GCC 4.4");
5270             }
5271           classes[1] = X86_64_SSESF_CLASS;
5272           return 2;
5273         }
5274     case DCmode:
5275       classes[0] = X86_64_SSEDF_CLASS;
5276       classes[1] = X86_64_SSEDF_CLASS;
5277       return 2;
5278     case XCmode:
5279       classes[0] = X86_64_COMPLEX_X87_CLASS;
5280       return 1;
5281     case TCmode:
5282       /* This modes is larger than 16 bytes.  */
5283       return 0;
5284     case V8SFmode:
5285     case V8SImode:
5286     case V32QImode:
5287     case V16HImode:
5288     case V4DFmode:
5289     case V4DImode:
5290       classes[0] = X86_64_SSE_CLASS;
5291       classes[1] = X86_64_SSEUP_CLASS;
5292       classes[2] = X86_64_SSEUP_CLASS;
5293       classes[3] = X86_64_SSEUP_CLASS;
5294       return 4;
5295     case V4SFmode:
5296     case V4SImode:
5297     case V16QImode:
5298     case V8HImode:
5299     case V2DFmode:
5300     case V2DImode:
5301       classes[0] = X86_64_SSE_CLASS;
5302       classes[1] = X86_64_SSEUP_CLASS;
5303       return 2;
5304     case V1DImode:
5305     case V2SFmode:
5306     case V2SImode:
5307     case V4HImode:
5308     case V8QImode:
5309       classes[0] = X86_64_SSE_CLASS;
5310       return 1;
5311     case BLKmode:
5312     case VOIDmode:
5313       return 0;
5314     default:
5315       gcc_assert (VECTOR_MODE_P (mode));
5316 
5317       if (bytes > 16)
5318         return 0;
5319 
5320       gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
5321 
5322       if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
5323         classes[0] = X86_64_INTEGERSI_CLASS;
5324       else
5325         classes[0] = X86_64_INTEGER_CLASS;
5326       classes[1] = X86_64_INTEGER_CLASS;
5327       return 1 + (bytes > 8);
5328     }
5329 }
5330 
5331 /* Examine the argument and return set number of register required in each
5332    class.  Return 0 iff parameter should be passed in memory.  */
5333 static int
5334 examine_argument (enum machine_mode mode, const_tree type, int in_return,
5335                   int *int_nregs, int *sse_nregs)
5336 {
5337   enum x86_64_reg_class regclass[MAX_CLASSES];
5338   int n = classify_argument (mode, type, regclass, 0);
5339 
5340   *int_nregs = 0;
5341   *sse_nregs = 0;
5342   if (!n)
5343     return 0;
5344   for (n--; n >= 0; n--)
5345     switch (regclass[n])
5346       {
5347       case X86_64_INTEGER_CLASS:
5348       case X86_64_INTEGERSI_CLASS:
5349         (*int_nregs)++;
5350         break;
5351       case X86_64_SSE_CLASS:
5352       case X86_64_SSESF_CLASS:
5353       case X86_64_SSEDF_CLASS:
5354         (*sse_nregs)++;
5355         break;
5356       case X86_64_NO_CLASS:
5357       case X86_64_SSEUP_CLASS:
5358         break;
5359       case X86_64_X87_CLASS:
5360       case X86_64_X87UP_CLASS:
5361         if (!in_return)
5362           return 0;
5363         break;
5364       case X86_64_COMPLEX_X87_CLASS:
5365         return in_return ? 2 : 0;
5366       case X86_64_MEMORY_CLASS:
5367         gcc_unreachable ();
5368       }
5369   return 1;
5370 }
5371 
5372 /* Construct container for the argument used by GCC interface.  See
5373    FUNCTION_ARG for the detailed description.  */
5374 
5375 static rtx
5376 construct_container (enum machine_mode mode, enum machine_mode orig_mode,
5377                      const_tree type, int in_return, int nintregs, int nsseregs,
5378                      const int *intreg, int sse_regno)
5379 {
5380   /* The following variables hold the static issued_error state.  */
5381   static bool issued_sse_arg_error;
5382   static bool issued_sse_ret_error;
5383   static bool issued_x87_ret_error;
5384 
5385   enum machine_mode tmpmode;
5386   int bytes =
5387     (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
5388   enum x86_64_reg_class regclass[MAX_CLASSES];
5389   int n;
5390   int i;
5391   int nexps = 0;
5392   int needed_sseregs, needed_intregs;
5393   rtx exp[MAX_CLASSES];
5394   rtx ret;
5395 
5396   n = classify_argument (mode, type, regclass, 0);
5397   if (!n)
5398     return NULL;
5399   if (!examine_argument (mode, type, in_return, &needed_intregs,
5400                          &needed_sseregs))
5401     return NULL;
5402   if (needed_intregs > nintregs || needed_sseregs > nsseregs)
5403     return NULL;
5404 
5405   /* We allowed the user to turn off SSE for kernel mode.  Don't crash if
5406      some less clueful developer tries to use floating-point anyway.  */
5407   if (needed_sseregs && !TARGET_SSE)
5408     {
5409       if (in_return)
5410         {
5411           if (!issued_sse_ret_error)
5412             {
5413               error ("SSE register return with SSE disabled");
5414               issued_sse_ret_error = true;
5415             }
5416         }
5417       else if (!issued_sse_arg_error)
5418         {
5419           error ("SSE register argument with SSE disabled");
5420           issued_sse_arg_error = true;
5421         }
5422       return NULL;
5423     }
5424 
5425   /* Likewise, error if the ABI requires us to return values in the
5426      x87 registers and the user specified -mno-80387.  */
5427   if (!TARGET_80387 && in_return)
5428     for (i = 0; i < n; i++)
5429       if (regclass[i] == X86_64_X87_CLASS
5430           || regclass[i] == X86_64_X87UP_CLASS
5431           || regclass[i] == X86_64_COMPLEX_X87_CLASS)
5432         {
5433           if (!issued_x87_ret_error)
5434             {
5435               error ("x87 register return with x87 disabled");
5436               issued_x87_ret_error = true;
5437             }
5438           return NULL;
5439         }
5440 
5441   /* First construct simple cases.  Avoid SCmode, since we want to use
5442      single register to pass this type.  */
5443   if (n == 1 && mode != SCmode)
5444     switch (regclass[0])
5445       {
5446       case X86_64_INTEGER_CLASS:
5447       case X86_64_INTEGERSI_CLASS:
5448         return gen_rtx_REG (mode, intreg[0]);
5449       case X86_64_SSE_CLASS:
5450       case X86_64_SSESF_CLASS:
5451       case X86_64_SSEDF_CLASS:
5452         if (mode != BLKmode)
5453           return gen_reg_or_parallel (mode, orig_mode, 
5454                                       SSE_REGNO (sse_regno));
5455         break;
5456       case X86_64_X87_CLASS:
5457       case X86_64_COMPLEX_X87_CLASS:
5458         return gen_rtx_REG (mode, FIRST_STACK_REG);
5459       case X86_64_NO_CLASS:
5460         /* Zero sized array, struct or class.  */
5461         return NULL;
5462       default:
5463         gcc_unreachable ();
5464       }
5465   if (n == 2 && regclass[0] == X86_64_SSE_CLASS
5466       && regclass[1] == X86_64_SSEUP_CLASS && mode != BLKmode)
5467     return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
5468   if (n == 4
5469       && regclass[0] == X86_64_SSE_CLASS
5470       && regclass[1] == X86_64_SSEUP_CLASS
5471       && regclass[2] == X86_64_SSEUP_CLASS
5472       && regclass[3] == X86_64_SSEUP_CLASS
5473       && mode != BLKmode)
5474     return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
5475 
5476   if (n == 2
5477       && regclass[0] == X86_64_X87_CLASS && regclass[1] == X86_64_X87UP_CLASS)
5478     return gen_rtx_REG (XFmode, FIRST_STACK_REG);
5479   if (n == 2 && regclass[0] == X86_64_INTEGER_CLASS
5480       && regclass[1] == X86_64_INTEGER_CLASS
5481       && (mode == CDImode || mode == TImode || mode == TFmode)
5482       && intreg[0] + 1 == intreg[1])
5483     return gen_rtx_REG (mode, intreg[0]);
5484 
5485   /* Otherwise figure out the entries of the PARALLEL.  */
5486   for (i = 0; i < n; i++)
5487     {
5488       int pos;
5489 
5490       switch (regclass[i])
5491         {
5492           case X86_64_NO_CLASS:
5493             break;
5494           case X86_64_INTEGER_CLASS:
5495           case X86_64_INTEGERSI_CLASS:
5496             /* Merge TImodes on aligned occasions here too.  */
5497             if (i * 8 + 8 > bytes)
5498               tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
5499             else if (regclass[i] == X86_64_INTEGERSI_CLASS)
5500               tmpmode = SImode;
5501             else
5502               tmpmode = DImode;
5503             /* We've requested 24 bytes we don't have mode for.  Use DImode.  */
5504             if (tmpmode == BLKmode)
5505               tmpmode = DImode;
5506             exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
5507                                                gen_rtx_REG (tmpmode, *intreg),
5508                                                GEN_INT (i*8));
5509             intreg++;
5510             break;
5511           case X86_64_SSESF_CLASS:
5512             exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
5513                                                gen_rtx_REG (SFmode,
5514                                                             SSE_REGNO (sse_regno)),
5515                                                GEN_INT (i*8));
5516             sse_regno++;
5517             break;
5518           case X86_64_SSEDF_CLASS:
5519             exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
5520                                                gen_rtx_REG (DFmode,
5521                                                             SSE_REGNO (sse_regno)),
5522                                                GEN_INT (i*8));
5523             sse_regno++;
5524             break;
5525           case X86_64_SSE_CLASS:
5526             pos = i;
5527             switch (n)
5528               {
5529               case 1:
5530                 tmpmode = DImode;
5531                 break;
5532               case 2:
5533                 if (i == 0 && regclass[1] == X86_64_SSEUP_CLASS)
5534                   {
5535                     tmpmode = TImode;
5536                     i++;
5537                   }
5538                 else
5539                   tmpmode = DImode;
5540                 break;
5541               case 4:
5542                 gcc_assert (i == 0
5543                             && regclass[1] == X86_64_SSEUP_CLASS
5544                             && regclass[2] == X86_64_SSEUP_CLASS
5545                             && regclass[3] == X86_64_SSEUP_CLASS);
5546                 tmpmode = OImode;
5547                 i += 3;
5548                 break;
5549               default:
5550                 gcc_unreachable ();
5551               }
5552             exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
5553                                                gen_rtx_REG (tmpmode,
5554                                                             SSE_REGNO (sse_regno)),
5555                                                GEN_INT (pos*8));
5556             sse_regno++;
5557             break;
5558           default:
5559             gcc_unreachable ();
5560         }
5561     }
5562 
5563   /* Empty aligned struct, union or class.  */
5564   if (nexps == 0)
5565     return NULL;
5566 
5567   ret =  gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
5568   for (i = 0; i < nexps; i++)
5569     XVECEXP (ret, 0, i) = exp [i];
5570   return ret;
5571 }
5572 
5573 /* Update the data in CUM to advance over an argument of mode MODE
5574    and data type TYPE.  (TYPE is null for libcalls where that information
5575    may not be available.)  */
5576 
5577 static void
5578 function_arg_advance_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
5579                          tree type, HOST_WIDE_INT bytes, HOST_WIDE_INT words)
5580 {
5581   switch (mode)
5582     {
5583     default:
5584       break;
5585 
5586     case BLKmode:
5587       if (bytes < 0)
5588         break;
5589       /* FALLTHRU */
5590 
5591     case DImode:
5592     case SImode:
5593     case HImode:
5594     case QImode:
5595       cum->words += words;
5596       cum->nregs -= words;
5597       cum->regno += words;
5598 
5599       if (cum->nregs <= 0)
5600         {
5601           cum->nregs = 0;
5602           cum->regno = 0;
5603         }
5604       break;
5605 
5606     case OImode:
5607       /* OImode shouldn't be used directly.  */
5608       gcc_unreachable ();
5609 
5610     case DFmode:
5611       if (cum->float_in_sse < 2)
5612         break;
5613     case SFmode:
5614       if (cum->float_in_sse < 1)
5615         break;
5616       /* FALLTHRU */
5617 
5618     case V8SFmode:
5619     case V8SImode:
5620     case V32QImode:
5621     case V16HImode:
5622     case V4DFmode:
5623     case V4DImode:
5624     case TImode:
5625     case V16QImode:
5626     case V8HImode:
5627     case V4SImode:
5628     case V2DImode:
5629     case V4SFmode:
5630     case V2DFmode:
5631       if (!type || !AGGREGATE_TYPE_P (type))
5632         {
5633           cum->sse_words += words;
5634           cum->sse_nregs -= 1;
5635           cum->sse_regno += 1;
5636           if (cum->sse_nregs <= 0)
5637             {
5638               cum->sse_nregs = 0;
5639               cum->sse_regno = 0;
5640             }
5641         }
5642       break;
5643 
5644     case V8QImode:
5645     case V4HImode:
5646     case V2SImode:
5647     case V2SFmode:
5648     case V1DImode:
5649       if (!type || !AGGREGATE_TYPE_P (type))
5650         {
5651           cum->mmx_words += words;
5652           cum->mmx_nregs -= 1;
5653           cum->mmx_regno += 1;
5654           if (cum->mmx_nregs <= 0)
5655             {
5656               cum->mmx_nregs = 0;
5657               cum->mmx_regno = 0;
5658             }
5659         }
5660       break;
5661     }
5662 }
5663 
5664 static void
5665 function_arg_advance_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
5666                          tree type, HOST_WIDE_INT words, int named)
5667 {
5668   int int_nregs, sse_nregs;
5669 
5670   /* Unnamed 256bit vector mode parameters are passed on stack.  */
5671   if (!named && VALID_AVX256_REG_MODE (mode))
5672     return;
5673 
5674   if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
5675     cum->words += words;
5676   else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
5677     {
5678       cum->nregs -= int_nregs;
5679       cum->sse_nregs -= sse_nregs;
5680       cum->regno += int_nregs;
5681       cum->sse_regno += sse_nregs;
5682     }
5683   else
5684     cum->words += words;
5685 }
5686 
5687 static void
5688 function_arg_advance_ms_64 (CUMULATIVE_ARGS *cum, HOST_WIDE_INT bytes,
5689                             HOST_WIDE_INT words)
5690 {
5691   /* Otherwise, this should be passed indirect.  */
5692   gcc_assert (bytes == 1 || bytes == 2 || bytes == 4 || bytes == 8);
5693 
5694   cum->words += words;
5695   if (cum->nregs > 0)
5696     {
5697       cum->nregs -= 1;
5698       cum->regno += 1;
5699     }
5700 }
5701 
5702 void
5703 function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
5704                       tree type, int named)
5705 {
5706   HOST_WIDE_INT bytes, words;
5707 
5708   if (mode == BLKmode)
5709     bytes = int_size_in_bytes (type);
5710   else
5711     bytes = GET_MODE_SIZE (mode);
5712   words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
5713 
5714   if (type)
5715     mode = type_natural_mode (type, NULL);
5716 
5717   if (TARGET_64BIT && (cum ? cum->call_abi : DEFAULT_ABI) == MS_ABI)
5718     function_arg_advance_ms_64 (cum, bytes, words);
5719   else if (TARGET_64BIT)
5720     function_arg_advance_64 (cum, mode, type, words, named);
5721   else
5722     function_arg_advance_32 (cum, mode, type, bytes, words);
5723 }
5724 
5725 /* Define where to put the arguments to a function.
5726    Value is zero to push the argument on the stack,
5727    or a hard register in which to store the argument.
5728 
5729    MODE is the argument's machine mode.
5730    TYPE is the data type of the argument (as a tree).
5731     This is null for libcalls where that information may
5732     not be available.
5733    CUM is a variable of type CUMULATIVE_ARGS which gives info about
5734     the preceding args and about the function being called.
5735    NAMED is nonzero if this argument is a named parameter
5736     (otherwise it is an extra parameter matching an ellipsis).  */
5737 
5738 static rtx
5739 function_arg_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
5740                  enum machine_mode orig_mode, tree type,
5741                  HOST_WIDE_INT bytes, HOST_WIDE_INT words)
5742 {
5743   static bool warnedsse, warnedmmx;
5744 
5745   /* Avoid the AL settings for the Unix64 ABI.  */
5746   if (mode == VOIDmode)
5747     return constm1_rtx;
5748 
5749   switch (mode)
5750     {
5751     default:
5752       break;
5753 
5754     case BLKmode:
5755       if (bytes < 0)
5756         break;
5757       /* FALLTHRU */
5758     case DImode:
5759     case SImode:
5760     case HImode:
5761     case QImode:
5762       if (words <= cum->nregs)
5763         {
5764           int regno = cum->regno;
5765 
5766           /* Fastcall allocates the first two DWORD (SImode) or
5767             smaller arguments to ECX and EDX if it isn't an
5768             aggregate type .  */
5769           if (cum->fastcall)
5770             {
5771               if (mode == BLKmode
5772                   || mode == DImode
5773                   || (type && AGGREGATE_TYPE_P (type)))
5774                 break;
5775 
5776               /* ECX not EAX is the first allocated register.  */
5777               if (regno == AX_REG)
5778                 regno = CX_REG;
5779             }
5780           return gen_rtx_REG (mode, regno);
5781         }
5782       break;
5783 
5784     case DFmode:
5785       if (cum->float_in_sse < 2)
5786         break;
5787     case SFmode:
5788       if (cum->float_in_sse < 1)
5789         break;
5790       /* FALLTHRU */
5791     case TImode:
5792       /* In 32bit, we pass TImode in xmm registers.  */
5793     case V16QImode:
5794     case V8HImode:
5795     case V4SImode:
5796     case V2DImode:
5797     case V4SFmode:
5798     case V2DFmode:
5799       if (!type || !AGGREGATE_TYPE_P (type))
5800         {
5801           if (!TARGET_SSE && !warnedsse && cum->warn_sse)
5802             {
5803               warnedsse = true;
5804               warning (0, "SSE vector argument without SSE enabled "
5805                        "changes the ABI");
5806             }
5807           if (cum->sse_nregs)
5808             return gen_reg_or_parallel (mode, orig_mode,
5809                                         cum->sse_regno + FIRST_SSE_REG);
5810         }
5811       break;
5812 
5813     case OImode:
5814       /* OImode shouldn't be used directly.  */
5815       gcc_unreachable ();
5816 
5817     case V8SFmode:
5818     case V8SImode:
5819     case V32QImode:
5820     case V16HImode:
5821     case V4DFmode:
5822     case V4DImode:
5823       if (!type || !AGGREGATE_TYPE_P (type))
5824         {
5825           if (cum->sse_nregs)
5826             return gen_reg_or_parallel (mode, orig_mode,
5827                                         cum->sse_regno + FIRST_SSE_REG);
5828         }
5829       break;
5830 
5831     case V8QImode:
5832     case V4HImode:
5833     case V2SImode:
5834     case V2SFmode:
5835     case V1DImode:
5836       if (!type || !AGGREGATE_TYPE_P (type))
5837         {
5838           if (!TARGET_MMX && !warnedmmx && cum->warn_mmx)
5839             {
5840               warnedmmx = true;
5841               warning (0, "MMX vector argument without MMX enabled "
5842                        "changes the ABI");
5843             }
5844           if (cum->mmx_nregs)
5845             return gen_reg_or_parallel (mode, orig_mode,
5846                                         cum->mmx_regno + FIRST_MMX_REG);
5847         }
5848       break;
5849     }
5850 
5851   return NULL_RTX;
5852 }
5853 
5854 static rtx
5855 function_arg_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
5856                  enum machine_mode orig_mode, tree type, int named)
5857 {
5858   /* Handle a hidden AL argument containing number of registers
5859      for varargs x86-64 functions.  */
5860   if (mode == VOIDmode)
5861     return GEN_INT (cum->maybe_vaarg
5862                     ? (cum->sse_nregs < 0
5863                        ? (cum->call_abi == DEFAULT_ABI
5864                           ? SSE_REGPARM_MAX
5865                           : (DEFAULT_ABI != SYSV_ABI ? X86_64_SSE_REGPARM_MAX
5866                                                      : X64_SSE_REGPARM_MAX))
5867                : cum->sse_regno)
5868                     : -1);
5869 
5870   switch (mode)
5871     {
5872     default:
5873       break;
5874 
5875     case V8SFmode:
5876     case V8SImode:
5877     case V32QImode:
5878     case V16HImode:
5879     case V4DFmode:
5880     case V4DImode:
5881       /* Unnamed 256bit vector mode parameters are passed on stack.  */
5882       if (!named)
5883         return NULL;
5884       break;
5885     }
5886 
5887   return construct_container (mode, orig_mode, type, 0, cum->nregs,
5888                               cum->sse_nregs,
5889                               &x86_64_int_parameter_registers [cum->regno],
5890                               cum->sse_regno);
5891 }
5892 
5893 static rtx
5894 function_arg_ms_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
5895                     enum machine_mode orig_mode, int named,
5896                     HOST_WIDE_INT bytes)
5897 {
5898   unsigned int regno;
5899 
5900   /* We need to add clobber for MS_ABI->SYSV ABI calls in expand_call.
5901      We use value of -2 to specify that current function call is MSABI.  */
5902   if (mode == VOIDmode)
5903     return GEN_INT (-2);
5904 
5905   /* If we've run out of registers, it goes on the stack.  */
5906   if (cum->nregs == 0)
5907     return NULL_RTX;
5908 
5909   regno = x86_64_ms_abi_int_parameter_registers[cum->regno];
5910 
5911   /* Only floating point modes are passed in anything but integer regs.  */
5912   if (TARGET_SSE && (mode == SFmode || mode == DFmode))
5913     {
5914       if (named)
5915         regno = cum->regno + FIRST_SSE_REG;
5916       else
5917         {
5918           rtx t1, t2;
5919 
5920           /* Unnamed floating parameters are passed in both the
5921              SSE and integer registers.  */
5922           t1 = gen_rtx_REG (mode, cum->regno + FIRST_SSE_REG);
5923           t2 = gen_rtx_REG (mode, regno);
5924           t1 = gen_rtx_EXPR_LIST (VOIDmode, t1, const0_rtx);
5925           t2 = gen_rtx_EXPR_LIST (VOIDmode, t2, const0_rtx);
5926           return gen_rtx_PARALLEL (mode, gen_rtvec (2, t1, t2));
5927         }
5928     }
5929   /* Handle aggregated types passed in register.  */
5930   if (orig_mode == BLKmode)
5931     {
5932       if (bytes > 0 && bytes <= 8)
5933         mode = (bytes > 4 ? DImode : SImode);
5934       if (mode == BLKmode)
5935         mode = DImode;
5936     }
5937 
5938   return gen_reg_or_parallel (mode, orig_mode, regno);
5939 }
5940 
5941 rtx
5942 function_arg (CUMULATIVE_ARGS *cum, enum machine_mode omode,
5943               tree type, int named)
5944 {
5945   enum machine_mode mode = omode;
5946   HOST_WIDE_INT bytes, words;
5947 
5948   if (mode == BLKmode)
5949     bytes = int_size_in_bytes (type);
5950   else
5951     bytes = GET_MODE_SIZE (mode);
5952   words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
5953 
5954   /* To simplify the code below, represent vector types with a vector mode
5955      even if MMX/SSE are not active.  */
5956   if (type && TREE_CODE (type) == VECTOR_TYPE)
5957     mode = type_natural_mode (type, cum);
5958 
5959   if (TARGET_64BIT && (cum ? cum->call_abi : DEFAULT_ABI) == MS_ABI)
5960     return function_arg_ms_64 (cum, mode, omode, named, bytes);
5961   else if (TARGET_64BIT)
5962     return function_arg_64 (cum, mode, omode, type, named);
5963   else
5964     return function_arg_32 (cum, mode, omode, type, bytes, words);
5965 }
5966 
5967 /* A C expression that indicates when an argument must be passed by
5968    reference.  If nonzero for an argument, a copy of that argument is
5969    made in memory and a pointer to the argument is passed instead of
5970    the argument itself.  The pointer is passed in whatever way is
5971    appropriate for passing a pointer to that type.  */
5972 
5973 static bool
5974 ix86_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
5975                         enum machine_mode mode ATTRIBUTE_UNUSED,
5976                         const_tree type, bool named ATTRIBUTE_UNUSED)
5977 {
5978   /* See Windows x64 Software Convention.  */
5979   if (TARGET_64BIT && (cum ? cum->call_abi : DEFAULT_ABI) == MS_ABI)
5980     {
5981       int msize = (int) GET_MODE_SIZE (mode);
5982       if (type)
5983         {
5984           /* Arrays are passed by reference.  */
5985           if (TREE_CODE (type) == ARRAY_TYPE)
5986             return true;
5987 
5988           if (AGGREGATE_TYPE_P (type))
5989             {
5990               /* Structs/unions of sizes other than 8, 16, 32, or 64 bits
5991                  are passed by reference.  */
5992               msize = int_size_in_bytes (type);
5993             }
5994         }
5995 
5996       /* __m128 is passed by reference.  */
5997       switch (msize) {
5998       case 1: case 2: case 4: case 8:
5999         break;
6000       default:
6001         return true;
6002       }
6003     }
6004   else if (TARGET_64BIT && type && int_size_in_bytes (type) == -1)
6005     return 1;
6006 
6007   return 0;
6008 }
6009 
6010 /* Return true when TYPE should be 128bit aligned for 32bit argument passing
6011    ABI.  */
6012 static bool
6013 contains_aligned_value_p (tree type)
6014 {
6015   enum machine_mode mode = TYPE_MODE (type);
6016   if (((TARGET_SSE && SSE_REG_MODE_P (mode))
6017        || mode == TDmode
6018        || mode == TFmode
6019        || mode == TCmode)
6020       && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
6021     return true;
6022   if (TYPE_ALIGN (type) < 128)
6023     return false;
6024 
6025   if (AGGREGATE_TYPE_P (type))
6026     {
6027       /* Walk the aggregates recursively.  */
6028       switch (TREE_CODE (type))
6029         {
6030         case RECORD_TYPE:
6031         case UNION_TYPE:
6032         case QUAL_UNION_TYPE:
6033           {
6034             tree field;
6035 
6036             /* Walk all the structure fields.  */
6037             for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
6038               {
6039                 if (TREE_CODE (field) == FIELD_DECL
6040                     && contains_aligned_value_p (TREE_TYPE (field)))
6041                   return true;
6042               }
6043             break;
6044           }
6045 
6046         case ARRAY_TYPE:
6047           /* Just for use if some languages passes arrays by value.  */
6048           if (contains_aligned_value_p (TREE_TYPE (type)))
6049             return true;
6050           break;
6051 
6052         default:
6053           gcc_unreachable ();
6054         }
6055     }
6056   return false;
6057 }
6058 
6059 /* Gives the alignment boundary, in bits, of an argument with the
6060    specified mode and type.  */
6061 
6062 int
6063 ix86_function_arg_boundary (enum machine_mode mode, tree type)
6064 {
6065   int align;
6066   if (type)
6067     {
6068       /* Since canonical type is used for call, we convert it to
6069          canonical type if needed.  */
6070       if (!TYPE_STRUCTURAL_EQUALITY_P (type))
6071         type = TYPE_CANONICAL (type);
6072       align = TYPE_ALIGN (type);
6073     }
6074   else
6075     align = GET_MODE_ALIGNMENT (mode);
6076   if (align < PARM_BOUNDARY)
6077     align = PARM_BOUNDARY;
6078   /* In 32bit, only _Decimal128 and __float128 are aligned to their
6079      natural boundaries.  */
6080   if (!TARGET_64BIT && mode != TDmode && mode != TFmode)
6081     {
6082       /* i386 ABI defines all arguments to be 4 byte aligned.  We have to
6083          make an exception for SSE modes since these require 128bit
6084          alignment.
6085 
6086          The handling here differs from field_alignment.  ICC aligns MMX
6087          arguments to 4 byte boundaries, while structure fields are aligned
6088          to 8 byte boundaries.  */
6089       if (!type)
6090         {
6091           if (!(TARGET_SSE && SSE_REG_MODE_P (mode)))
6092             align = PARM_BOUNDARY;
6093         }
6094       else
6095         {
6096           if (!contains_aligned_value_p (type))
6097             align = PARM_BOUNDARY;
6098         }
6099     }
6100   if (align > BIGGEST_ALIGNMENT)
6101     align = BIGGEST_ALIGNMENT;
6102   return align;
6103 }
6104 
6105 /* Return true if N is a possible register number of function value.  */
6106 
6107 bool
6108 ix86_function_value_regno_p (int regno)
6109 {
6110   switch (regno)
6111     {
6112     case 0:
6113       return true;
6114 
6115     case FIRST_FLOAT_REG:
6116       /* TODO: The function should depend on current function ABI but
6117        builtins.c would need updating then. Therefore we use the
6118        default ABI.  */
6119       if (TARGET_64BIT && DEFAULT_ABI == MS_ABI)
6120         return false;
6121       return TARGET_FLOAT_RETURNS_IN_80387;
6122 
6123     case FIRST_SSE_REG:
6124       return TARGET_SSE;
6125 
6126     case FIRST_MMX_REG:
6127       if (TARGET_MACHO || TARGET_64BIT)
6128         return false;
6129       return TARGET_MMX;
6130     }
6131 
6132   return false;
6133 }
6134 
6135 /* Define how to find the value returned by a function.
6136    VALTYPE is the data type of the value (as a tree).
6137    If the precise function being called is known, FUNC is its FUNCTION_DECL;
6138    otherwise, FUNC is 0.  */
6139 
6140 static rtx
6141 function_value_32 (enum machine_mode orig_mode, enum machine_mode mode,
6142                    const_tree fntype, const_tree fn)
6143 {
6144   unsigned int regno;
6145 
6146   /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
6147      we normally prevent this case when mmx is not available.  However
6148      some ABIs may require the result to be returned like DImode.  */
6149   if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
6150     regno = TARGET_MMX ? FIRST_MMX_REG : 0;
6151 
6152   /* 16-byte vector modes in %xmm0.  See ix86_return_in_memory for where
6153      we prevent this case when sse is not available.  However some ABIs
6154      may require the result to be returned like integer TImode.  */
6155   else if (mode == TImode
6156            || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
6157     regno = TARGET_SSE ? FIRST_SSE_REG : 0;
6158 
6159   /* 32-byte vector modes in %ymm0.   */
6160   else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 32)
6161     regno = TARGET_AVX ? FIRST_SSE_REG : 0;
6162 
6163   /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387).  */
6164   else if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387)
6165     regno = FIRST_FLOAT_REG;
6166   else
6167     /* Most things go in %eax.  */
6168     regno = AX_REG;
6169 
6170   /* Override FP return register with %xmm0 for local functions when
6171      SSE math is enabled or for functions with sseregparm attribute.  */
6172   if ((fn || fntype) && (mode == SFmode || mode == DFmode))
6173     {
6174       int sse_level = ix86_function_sseregparm (fntype, fn, false);
6175       if ((sse_level >= 1 && mode == SFmode)
6176           || (sse_level == 2 && mode == DFmode))
6177         regno = FIRST_SSE_REG;
6178     }
6179 
6180   /* OImode shouldn't be used directly.  */
6181   gcc_assert (mode != OImode);
6182 
6183   return gen_rtx_REG (orig_mode, regno);
6184 }
6185 
6186 static rtx
6187 function_value_64 (enum machine_mode orig_mode, enum machine_mode mode,
6188                    const_tree valtype)
6189 {
6190   rtx ret;
6191 
6192   /* Handle libcalls, which don't provide a type node.  */
6193   if (valtype == NULL)
6194     {
6195       switch (mode)
6196         {
6197         case SFmode:
6198         case SCmode:
6199         case DFmode:
6200         case DCmode:
6201         case TFmode:
6202         case SDmode:
6203         case DDmode:
6204         case TDmode:
6205           return gen_rtx_REG (mode, FIRST_SSE_REG);
6206         case XFmode:
6207         case XCmode:
6208           return gen_rtx_REG (mode, FIRST_FLOAT_REG);
6209         case TCmode:
6210           return NULL;
6211         default:
6212           return gen_rtx_REG (mode, AX_REG);
6213         }
6214     }
6215 
6216   ret = construct_container (mode, orig_mode, valtype, 1,
6217                              X86_64_REGPARM_MAX, X86_64_SSE_REGPARM_MAX,
6218                              x86_64_int_return_registers, 0);
6219 
6220   /* For zero sized structures, construct_container returns NULL, but we
6221      need to keep rest of compiler happy by returning meaningful value.  */
6222   if (!ret)
6223     ret = gen_rtx_REG (orig_mode, AX_REG);
6224 
6225   return ret;
6226 }
6227 
6228 static rtx
6229 function_value_ms_64 (enum machine_mode orig_mode, enum machine_mode mode)
6230 {
6231   unsigned int regno = AX_REG;
6232 
6233   if (TARGET_SSE)
6234     {
6235       switch (GET_MODE_SIZE (mode))
6236         {
6237         case 16:
6238           if((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
6239              && !COMPLEX_MODE_P (mode))
6240             regno = FIRST_SSE_REG;
6241           break;
6242         case 8:
6243         case 4:
6244           if (mode == SFmode || mode == DFmode)
6245             regno = FIRST_SSE_REG;
6246           break;
6247         default:
6248           break;
6249         }
6250     }
6251   return gen_rtx_REG (orig_mode, regno);
6252 }
6253 
6254 static rtx
6255 ix86_function_value_1 (const_tree valtype, const_tree fntype_or_decl,
6256                        enum machine_mode orig_mode, enum machine_mode mode)
6257 {
6258   const_tree fn, fntype;
6259 
6260   fn = NULL_TREE;
6261   if (fntype_or_decl && DECL_P (fntype_or_decl))
6262     fn = fntype_or_decl;
6263   fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
6264 
6265   if (TARGET_64BIT && ix86_function_type_abi (fntype) == MS_ABI)
6266     return function_value_ms_64 (orig_mode, mode);
6267   else if (TARGET_64BIT)
6268     return function_value_64 (orig_mode, mode, valtype);
6269   else
6270     return function_value_32 (orig_mode, mode, fntype, fn);
6271 }
6272 
6273 static rtx
6274 ix86_function_value (const_tree valtype, const_tree fntype_or_decl,
6275                      bool outgoing ATTRIBUTE_UNUSED)
6276 {
6277   enum machine_mode mode, orig_mode;
6278 
6279   orig_mode = TYPE_MODE (valtype);
6280   mode = type_natural_mode (valtype, NULL);
6281   return ix86_function_value_1 (valtype, fntype_or_decl, orig_mode, mode);
6282 }
6283 
6284 rtx
6285 ix86_libcall_value (enum machine_mode mode)
6286 {
6287   return ix86_function_value_1 (NULL, NULL, mode, mode);
6288 }
6289 
6290 /* Return true iff type is returned in memory.  */
6291 
6292 static int ATTRIBUTE_UNUSED
6293 return_in_memory_32 (const_tree type, enum machine_mode mode)
6294 {
6295   HOST_WIDE_INT size;
6296 
6297   if (mode == BLKmode)
6298     return 1;
6299 
6300   size = int_size_in_bytes (type);
6301 
6302   if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
6303     return 0;
6304 
6305   if (VECTOR_MODE_P (mode) || mode == TImode)
6306     {
6307       /* User-created vectors small enough to fit in EAX.  */
6308       if (size < 8)
6309         return 0;
6310 
6311       /* MMX/3dNow values are returned in MM0,
6312          except when it doesn't exits.  */
6313       if (size == 8)
6314         return (TARGET_MMX ? 0 : 1);
6315 
6316       /* SSE values are returned in XMM0, except when it doesn't exist.  */
6317       if (size == 16)
6318         return (TARGET_SSE ? 0 : 1);
6319 
6320       /* AVX values are returned in YMM0, except when it doesn't exist.  */
6321       if (size == 32)
6322         return TARGET_AVX ? 0 : 1;
6323     }
6324 
6325   if (mode == XFmode)
6326     return 0;
6327 
6328   if (size > 12)
6329     return 1;
6330 
6331   /* OImode shouldn't be used directly.  */
6332   gcc_assert (mode != OImode);
6333 
6334   return 0;
6335 }
6336 
6337 static int ATTRIBUTE_UNUSED
6338 return_in_memory_64 (const_tree type, enum machine_mode mode)
6339 {
6340   int needed_intregs, needed_sseregs;
6341   return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs);
6342 }
6343 
6344 static int ATTRIBUTE_UNUSED
6345 return_in_memory_ms_64 (const_tree type, enum machine_mode mode)
6346 {
6347   HOST_WIDE_INT size = int_size_in_bytes (type);
6348 
6349   /* __m128 is returned in xmm0.  */
6350   if ((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
6351       && !COMPLEX_MODE_P (mode) && (GET_MODE_SIZE (mode) == 16 || size == 16))
6352     return 0;
6353 
6354   /* Otherwise, the size must be exactly in [1248]. */
6355   return (size != 1 && size != 2 && size != 4 && size != 8);
6356 }
6357 
6358 static bool
6359 ix86_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
6360 {
6361 #ifdef SUBTARGET_RETURN_IN_MEMORY
6362   return SUBTARGET_RETURN_IN_MEMORY (type, fntype);
6363 #else
6364   const enum machine_mode mode = type_natural_mode (type, NULL);
6365  
6366   if (TARGET_64BIT)
6367     {
6368       if (ix86_function_type_abi (fntype) == MS_ABI)
6369         return return_in_memory_ms_64 (type, mode);
6370       else
6371         return return_in_memory_64 (type, mode);
6372     }
6373   else
6374     return return_in_memory_32 (type, mode);
6375 #endif
6376 }
6377 
6378 /* Return false iff TYPE is returned in memory.  This version is used
6379    on Solaris 10.  It is similar to the generic ix86_return_in_memory,
6380    but differs notably in that when MMX is available, 8-byte vectors
6381    are returned in memory, rather than in MMX registers.  */
6382 
6383 bool
6384 ix86_sol10_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
6385 {
6386   int size;
6387   enum machine_mode mode = type_natural_mode (type, NULL);
6388 
6389   if (TARGET_64BIT)
6390     return return_in_memory_64 (type, mode);
6391 
6392   if (mode == BLKmode)
6393     return 1;
6394 
6395   size = int_size_in_bytes (type);
6396 
6397   if (VECTOR_MODE_P (mode))
6398     {
6399       /* Return in memory only if MMX registers *are* available.  This
6400          seems backwards, but it is consistent with the existing
6401          Solaris x86 ABI.  */
6402       if (size == 8)
6403         return TARGET_MMX;
6404       if (size == 16)
6405         return !TARGET_SSE;
6406     }
6407   else if (mode == TImode)
6408     return !TARGET_SSE;
6409   else if (mode == XFmode)
6410     return 0;
6411 
6412   return size > 12;
6413 }
6414 
6415 /* When returning SSE vector types, we have a choice of either
6416      (1) being abi incompatible with a -march switch, or
6417      (2) generating an error.
6418    Given no good solution, I think the safest thing is one warning.
6419    The user won't be able to use -Werror, but....
6420 
6421    Choose the STRUCT_VALUE_RTX hook because that's (at present) only
6422    called in response to actually generating a caller or callee that
6423    uses such a type.  As opposed to TARGET_RETURN_IN_MEMORY, which is called
6424    via aggregate_value_p for general type probing from tree-ssa.  */
6425 
6426 static rtx
6427 ix86_struct_value_rtx (tree type, int incoming ATTRIBUTE_UNUSED)
6428 {
6429   static bool warnedsse, warnedmmx;
6430 
6431   if (!TARGET_64BIT && type)
6432     {
6433       /* Look at the return type of the function, not the function type.  */
6434       enum machine_mode mode = TYPE_MODE (TREE_TYPE (type));
6435 
6436       if (!TARGET_SSE && !warnedsse)
6437         {
6438           if (mode == TImode
6439               || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
6440             {
6441               warnedsse = true;
6442               warning (0, "SSE vector return without SSE enabled "
6443                        "changes the ABI");
6444             }
6445         }
6446 
6447       if (!TARGET_MMX && !warnedmmx)
6448         {
6449           if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
6450             {
6451               warnedmmx = true;
6452               warning (0, "MMX vector return without MMX enabled "
6453                        "changes the ABI");
6454             }
6455         }
6456     }
6457 
6458   return NULL;
6459 }
6460 
6461 
6462 /* Create the va_list data type.  */
6463 
6464 /* Returns the calling convention specific va_list date type.
6465    The argument ABI can be DEFAULT_ABI, MS_ABI, or SYSV_ABI.  */
6466 
6467 static tree
6468 ix86_build_builtin_va_list_abi (enum calling_abi abi)
6469 {
6470   tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
6471 
6472   /* For i386 we use plain pointer to argument area.  */
6473   if (!TARGET_64BIT || abi == MS_ABI)
6474     return build_pointer_type (char_type_node);
6475 
6476   record = (*lang_hooks.types.make_type) (RECORD_TYPE);
6477   type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
6478 
6479   f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
6480                       unsigned_type_node);
6481   f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
6482                       unsigned_type_node);
6483   f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
6484                       ptr_type_node);
6485   f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
6486                       ptr_type_node);
6487 
6488   va_list_gpr_counter_field = f_gpr;
6489   va_list_fpr_counter_field = f_fpr;
6490 
6491   DECL_FIELD_CONTEXT (f_gpr) = record;
6492   DECL_FIELD_CONTEXT (f_fpr) = record;
6493   DECL_FIELD_CONTEXT (f_ovf) = record;
6494   DECL_FIELD_CONTEXT (f_sav) = record;
6495 
6496   TREE_CHAIN (record) = type_decl;
6497   TYPE_NAME (record) = type_decl;
6498   TYPE_FIELDS (record) = f_gpr;
6499   TREE_CHAIN (f_gpr) = f_fpr;
6500   TREE_CHAIN (f_fpr) = f_ovf;
6501   TREE_CHAIN (f_ovf) = f_sav;
6502 
6503   layout_type (record);
6504 
6505   /* The correct type is an array type of one element.  */
6506   return build_array_type (record, build_index_type (size_zero_node));
6507 }
6508 
6509 /* Setup the builtin va_list data type and for 64-bit the additional
6510    calling convention specific va_list data types.  */
6511 
6512 static tree
6513 ix86_build_builtin_va_list (void)
6514 {
6515   tree ret = ix86_build_builtin_va_list_abi (DEFAULT_ABI);
6516 
6517   /* Initialize abi specific va_list builtin types.  */
6518   if (TARGET_64BIT)
6519     {
6520       tree t;
6521       if (DEFAULT_ABI == MS_ABI)
6522         {
6523           t = ix86_build_builtin_va_list_abi (SYSV_ABI);
6524           if (TREE_CODE (t) != RECORD_TYPE)
6525             t = build_variant_type_copy (t);
6526           sysv_va_list_type_node = t;
6527         }
6528       else
6529         {
6530           t = ret;
6531           if (TREE_CODE (t) != RECORD_TYPE)
6532             t = build_variant_type_copy (t);
6533           sysv_va_list_type_node = t;
6534         }
6535       if (DEFAULT_ABI != MS_ABI)
6536         {
6537           t = ix86_build_builtin_va_list_abi (MS_ABI);
6538           if (TREE_CODE (t) != RECORD_TYPE)
6539             t = build_variant_type_copy (t);
6540           ms_va_list_type_node = t;
6541         }
6542       else
6543         {
6544           t = ret;
6545           if (TREE_CODE (t) != RECORD_TYPE)
6546             t = build_variant_type_copy (t);
6547           ms_va_list_type_node = t;
6548         }
6549     }
6550 
6551   return ret;
6552 }
6553 
6554 /* Worker function for TARGET_SETUP_INCOMING_VARARGS.  */
6555 
6556 static void
6557 setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum)
6558 {
6559   rtx save_area, mem;
6560   rtx label;
6561   rtx label_ref;
6562   rtx tmp_reg;
6563   rtx nsse_reg;
6564   alias_set_type set;
6565   int i;
6566   int regparm = ix86_regparm;
6567 
6568   if (cum->call_abi != DEFAULT_ABI)
6569     regparm = DEFAULT_ABI != SYSV_ABI ? X86_64_REGPARM_MAX : X64_REGPARM_MAX;
6570 
6571   /* GPR size of varargs save area.  */
6572   if (cfun->va_list_gpr_size)
6573     ix86_varargs_gpr_size = X86_64_REGPARM_MAX * UNITS_PER_WORD;
6574   else
6575     ix86_varargs_gpr_size = 0;
6576 
6577   /* FPR size of varargs save area.  We don't need it if we don't pass
6578      anything in SSE registers.  */
6579   if (cum->sse_nregs && cfun->va_list_fpr_size)
6580     ix86_varargs_fpr_size = X86_64_SSE_REGPARM_MAX * 16;
6581   else
6582     ix86_varargs_fpr_size = 0;
6583 
6584   if (! ix86_varargs_gpr_size && ! ix86_varargs_fpr_size)
6585     return;
6586 
6587   save_area = frame_pointer_rtx;
6588   set = get_varargs_alias_set ();
6589 
6590   for (i = cum->regno;
6591        i < regparm
6592        && i < cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
6593        i++)
6594     {
6595       mem = gen_rtx_MEM (Pmode,
6596                          plus_constant (save_area, i * UNITS_PER_WORD));
6597       MEM_NOTRAP_P (mem) = 1;
6598       set_mem_alias_set (mem, set);
6599       emit_move_insn (mem, gen_rtx_REG (Pmode,
6600                                         x86_64_int_parameter_registers[i]));
6601     }
6602 
6603   if (ix86_varargs_fpr_size)
6604     {
6605       /* Stack must be aligned to 16byte for FP register save area.  */
6606       if (crtl->stack_alignment_needed < 128)
6607         crtl->stack_alignment_needed = 128;
6608 
6609       /* Now emit code to save SSE registers.  The AX parameter contains number
6610          of SSE parameter registers used to call this function.  We use
6611          sse_prologue_save insn template that produces computed jump across
6612          SSE saves.  We need some preparation work to get this working.  */
6613 
6614       label = gen_label_rtx ();
6615       label_ref = gen_rtx_LABEL_REF (Pmode, label);
6616 
6617       /* Compute address to jump to :
6618          label - eax*4 + nnamed_sse_arguments*4 Or
6619          label - eax*5 + nnamed_sse_arguments*5 for AVX.  */
6620       tmp_reg = gen_reg_rtx (Pmode);
6621       nsse_reg = gen_reg_rtx (Pmode);
6622       emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, AX_REG)));
6623       emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
6624                               gen_rtx_MULT (Pmode, nsse_reg,
6625                                             GEN_INT (4))));
6626 
6627       /* vmovaps is one byte longer than movaps.  */
6628       if (TARGET_AVX)
6629         emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
6630                                 gen_rtx_PLUS (Pmode, tmp_reg,
6631                                               nsse_reg)));
6632 
6633       if (cum->sse_regno)
6634         emit_move_insn
6635           (nsse_reg,
6636            gen_rtx_CONST (DImode,
6637                           gen_rtx_PLUS (DImode,
6638                                         label_ref,
6639                                         GEN_INT (cum->sse_regno
6640                                                  * (TARGET_AVX ? 5 : 4)))));
6641       else
6642         emit_move_insn (nsse_reg, label_ref);
6643       emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
6644 
6645       /* Compute address of memory block we save into.  We always use pointer
6646          pointing 127 bytes after first byte to store - this is needed to keep
6647          instruction size limited by 4 bytes (5 bytes for AVX) with one
6648          byte displacement.  */
6649       tmp_reg = gen_reg_rtx (Pmode);
6650       emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
6651                               plus_constant (save_area,
6652                                              ix86_varargs_gpr_size + 127)));
6653       mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
6654       MEM_NOTRAP_P (mem) = 1;
6655       set_mem_alias_set (mem, set);
6656       set_mem_align (mem, BITS_PER_WORD);
6657 
6658       /* And finally do the dirty job!  */
6659       emit_insn (gen_sse_prologue_save (mem, nsse_reg,
6660                                         GEN_INT (cum->sse_regno), label));
6661     }
6662 }
6663 
6664 static void
6665 setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS *cum)
6666 {
6667   alias_set_type set = get_varargs_alias_set ();
6668   int i;
6669 
6670   for (i = cum->regno; i < X64_REGPARM_MAX; i++)
6671     {
6672       rtx reg, mem;
6673 
6674       mem = gen_rtx_MEM (Pmode,
6675                          plus_constant (virtual_incoming_args_rtx,
6676                                         i * UNITS_PER_WORD));
6677       MEM_NOTRAP_P (mem) = 1;
6678       set_mem_alias_set (mem, set);
6679 
6680       reg = gen_rtx_REG (Pmode, x86_64_ms_abi_int_parameter_registers[i]);
6681       emit_move_insn (mem, reg);
6682     }
6683 }
6684 
6685 static void
6686 ix86_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
6687                              tree type, int *pretend_size ATTRIBUTE_UNUSED,
6688                              int no_rtl)
6689 {
6690   CUMULATIVE_ARGS next_cum;
6691   tree fntype;
6692 
6693   /* This argument doesn't appear to be used anymore.  Which is good,
6694      because the old code here didn't suppress rtl generation.  */
6695   gcc_assert (!no_rtl);
6696 
6697   if (!TARGET_64BIT)
6698     return;
6699 
6700   fntype = TREE_TYPE (current_function_decl);
6701 
6702   /* For varargs, we do not want to skip the dummy va_dcl argument.
6703      For stdargs, we do want to skip the last named argument.  */
6704   next_cum = *cum;
6705   if (stdarg_p (fntype))
6706     function_arg_advance (&next_cum, mode, type, 1);
6707 
6708   if (cum->call_abi == MS_ABI)
6709     setup_incoming_varargs_ms_64 (&next_cum);
6710   else
6711     setup_incoming_varargs_64 (&next_cum);
6712 }
6713 
6714 /* Checks if TYPE is of kind va_list char *.  */
6715 
6716 static bool
6717 is_va_list_char_pointer (tree type)
6718 {
6719   tree canonic;
6720 
6721   /* For 32-bit it is always true.  */
6722   if (!TARGET_64BIT)
6723     return true;
6724   canonic = ix86_canonical_va_list_type (type);
6725   return (canonic == ms_va_list_type_node
6726           || (DEFAULT_ABI == MS_ABI && canonic == va_list_type_node));
6727 }
6728 
6729 /* Implement va_start.  */
6730 
6731 static void
6732 ix86_va_start (tree valist, rtx nextarg)
6733 {
6734   HOST_WIDE_INT words, n_gpr, n_fpr;
6735   tree f_gpr, f_fpr, f_ovf, f_sav;
6736   tree gpr, fpr, ovf, sav, t;
6737   tree type;
6738 
6739   /* Only 64bit target needs something special.  */
6740   if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
6741     {
6742       std_expand_builtin_va_start (valist, nextarg);
6743       return;
6744     }
6745 
6746   f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
6747   f_fpr = TREE_CHAIN (f_gpr);
6748   f_ovf = TREE_CHAIN (f_fpr);
6749   f_sav = TREE_CHAIN (f_ovf);
6750 
6751   valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
6752   gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
6753   fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
6754   ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
6755   sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
6756 
6757   /* Count number of gp and fp argument registers used.  */
6758   words = crtl->args.info.words;
6759   n_gpr = crtl->args.info.regno;
6760   n_fpr = crtl->args.info.sse_regno;
6761 
6762   if (cfun->va_list_gpr_size)
6763     {
6764       type = TREE_TYPE (gpr);
6765       t = build2 (MODIFY_EXPR, type,
6766                   gpr, build_int_cst (type, n_gpr * 8));
6767       TREE_SIDE_EFFECTS (t) = 1;
6768       expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6769     }
6770 
6771   if (TARGET_SSE && cfun->va_list_fpr_size)
6772     {
6773       type = TREE_TYPE (fpr);
6774       t = build2 (MODIFY_EXPR, type, fpr,
6775                   build_int_cst (type, n_fpr * 16 + 8*X86_64_REGPARM_MAX));
6776       TREE_SIDE_EFFECTS (t) = 1;
6777       expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6778     }
6779 
6780   /* Find the overflow area.  */
6781   type = TREE_TYPE (ovf);
6782   t = make_tree (type, crtl->args.internal_arg_pointer);
6783   if (words != 0)
6784     t = build2 (POINTER_PLUS_EXPR, type, t,
6785                 size_int (words * UNITS_PER_WORD));
6786   t = build2 (MODIFY_EXPR, type, ovf, t);
6787   TREE_SIDE_EFFECTS (t) = 1;
6788   expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6789 
6790   if (ix86_varargs_gpr_size || ix86_varargs_fpr_size)
6791     {
6792       /* Find the register save area.
6793          Prologue of the function save it right above stack frame.  */
6794       type = TREE_TYPE (sav);
6795       t = make_tree (type, frame_pointer_rtx);
6796       if (!ix86_varargs_gpr_size)
6797         t = build2 (POINTER_PLUS_EXPR, type, t,
6798                     size_int (-8 * X86_64_REGPARM_MAX));
6799       t = build2 (MODIFY_EXPR, type, sav, t);
6800       TREE_SIDE_EFFECTS (t) = 1;
6801       expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6802     }
6803 }
6804 
6805 /* Implement va_arg.  */
6806 
6807 static tree
6808 ix86_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
6809                       gimple_seq *post_p)
6810 {
6811   static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
6812   tree f_gpr, f_fpr, f_ovf, f_sav;
6813   tree gpr, fpr, ovf, sav, t;
6814   int size, rsize;
6815   tree lab_false, lab_over = NULL_TREE;
6816   tree addr, t2;
6817   rtx container;
6818   int indirect_p = 0;
6819   tree ptrtype;
6820   enum machine_mode nat_mode;
6821   int arg_boundary;
6822 
6823   /* Only 64bit target needs something special.  */
6824   if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
6825     return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
6826 
6827   f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
6828   f_fpr = TREE_CHAIN (f_gpr);
6829   f_ovf = TREE_CHAIN (f_fpr);
6830   f_sav = TREE_CHAIN (f_ovf);
6831 
6832   gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr),
6833                 build_va_arg_indirect_ref (valist), f_gpr, NULL_TREE);
6834   valist = build_va_arg_indirect_ref (valist);
6835   fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
6836   ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
6837   sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
6838 
6839   indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
6840   if (indirect_p)
6841     type = build_pointer_type (type);
6842   size = int_size_in_bytes (type);
6843   rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
6844 
6845   nat_mode = type_natural_mode (type, NULL);
6846   switch (nat_mode)
6847     {
6848     case V8SFmode:
6849     case V8SImode:
6850     case V32QImode:
6851     case V16HImode:
6852     case V4DFmode:
6853     case V4DImode:
6854       /* Unnamed 256bit vector mode parameters are passed on stack.  */
6855       if (ix86_cfun_abi () == SYSV_ABI)
6856         {
6857           container = NULL;
6858           break;
6859         }
6860 
6861     default:
6862       container = construct_container (nat_mode, TYPE_MODE (type),
6863                                        type, 0, X86_64_REGPARM_MAX,
6864                                        X86_64_SSE_REGPARM_MAX, intreg,
6865                                        0);
6866       break;
6867     }
6868 
6869   /* Pull the value out of the saved registers.  */
6870 
6871   addr = create_tmp_var (ptr_type_node, "addr");
6872   DECL_POINTER_ALIAS_SET (addr) = get_varargs_alias_set ();
6873 
6874   if (container)
6875     {
6876       int needed_intregs, needed_sseregs;
6877       bool need_temp;
6878       tree int_addr, sse_addr;
6879 
6880       lab_false = create_artificial_label ();
6881       lab_over = create_artificial_label ();
6882 
6883       examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
6884 
6885       need_temp = (!REG_P (container)
6886                    && ((needed_intregs && TYPE_ALIGN (type) > 64)
6887                        || TYPE_ALIGN (type) > 128));
6888 
6889       /* In case we are passing structure, verify that it is consecutive block
6890          on the register save area.  If not we need to do moves.  */
6891       if (!need_temp && !REG_P (container))
6892         {
6893           /* Verify that all registers are strictly consecutive  */
6894           if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
6895             {
6896               int i;
6897 
6898               for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
6899                 {
6900                   rtx slot = XVECEXP (container, 0, i);
6901                   if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
6902                       || INTVAL (XEXP (slot, 1)) != i * 16)
6903                     need_temp = 1;
6904                 }
6905             }
6906           else
6907             {
6908               int i;
6909 
6910               for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
6911                 {
6912                   rtx slot = XVECEXP (container, 0, i);
6913                   if (REGNO (XEXP (slot, 0)) != (unsigned int) i
6914                       || INTVAL (XEXP (slot, 1)) != i * 8)
6915                     need_temp = 1;
6916                 }
6917             }
6918         }
6919       if (!need_temp)
6920         {
6921           int_addr = addr;
6922           sse_addr = addr;
6923         }
6924       else
6925         {
6926           int_addr = create_tmp_var (ptr_type_node, "int_addr");
6927           DECL_POINTER_ALIAS_SET (int_addr) = get_varargs_alias_set ();
6928           sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
6929           DECL_POINTER_ALIAS_SET (sse_addr) = get_varargs_alias_set ();
6930         }
6931 
6932       /* First ensure that we fit completely in registers.  */
6933       if (needed_intregs)
6934         {
6935           t = build_int_cst (TREE_TYPE (gpr),
6936                              (X86_64_REGPARM_MAX - needed_intregs + 1) * 8);
6937           t = build2 (GE_EXPR, boolean_type_node, gpr, t);
6938           t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
6939           t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
6940           gimplify_and_add (t, pre_p);
6941         }
6942       if (needed_sseregs)
6943         {
6944           t = build_int_cst (TREE_TYPE (fpr),
6945                              (X86_64_SSE_REGPARM_MAX - needed_sseregs + 1) * 16
6946                              + X86_64_REGPARM_MAX * 8);
6947           t = build2 (GE_EXPR, boolean_type_node, fpr, t);
6948           t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
6949           t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
6950           gimplify_and_add (t, pre_p);
6951         }
6952 
6953       /* Compute index to start of area used for integer regs.  */
6954       if (needed_intregs)
6955         {
6956           /* int_addr = gpr + sav; */
6957           t = fold_convert (sizetype, gpr);
6958           t = build2 (POINTER_PLUS_EXPR, ptr_type_node, sav, t);
6959           gimplify_assign (int_addr, t, pre_p);
6960         }
6961       if (needed_sseregs)
6962         {
6963           /* sse_addr = fpr + sav; */
6964           t = fold_convert (sizetype, fpr);
6965           t = build2 (POINTER_PLUS_EXPR, ptr_type_node, sav, t);
6966           gimplify_assign (sse_addr, t, pre_p);
6967         }
6968       if (need_temp)
6969         {
6970           int i;
6971           tree temp = create_tmp_var (type, "va_arg_tmp");
6972 
6973           /* addr = &temp; */
6974           t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
6975           gimplify_assign (addr, t, pre_p);
6976 
6977           for (i = 0; i < XVECLEN (container, 0); i++)
6978             {
6979               rtx slot = XVECEXP (container, 0, i);
6980               rtx reg = XEXP (slot, 0);
6981               enum machine_mode mode = GET_MODE (reg);
6982               tree piece_type = lang_hooks.types.type_for_mode (mode, 1);
6983               tree addr_type = build_pointer_type (piece_type);
6984               tree daddr_type = build_pointer_type_for_mode (piece_type,
6985                                                              ptr_mode, true);
6986               tree src_addr, src;
6987               int src_offset;
6988               tree dest_addr, dest;
6989 
6990               if (SSE_REGNO_P (REGNO (reg)))
6991                 {
6992                   src_addr = sse_addr;
6993                   src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
6994                 }
6995               else
6996                 {
6997                   src_addr = int_addr;
6998                   src_offset = REGNO (reg) * 8;
6999                 }
7000               src_addr = fold_convert (addr_type, src_addr);
7001               src_addr = fold_build2 (POINTER_PLUS_EXPR, addr_type, src_addr,
7002                                       size_int (src_offset));
7003               src = build_va_arg_indirect_ref (src_addr);
7004 
7005               dest_addr = fold_convert (daddr_type, addr);
7006               dest_addr = fold_build2 (POINTER_PLUS_EXPR, daddr_type, dest_addr,
7007                                        size_int (INTVAL (XEXP (slot, 1))));
7008               dest = build_va_arg_indirect_ref (dest_addr);
7009 
7010               gimplify_assign (dest, src, pre_p);
7011             }
7012         }
7013 
7014       if (needed_intregs)
7015         {
7016           t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
7017                       build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
7018           gimplify_assign (gpr, t, pre_p);
7019         }
7020 
7021       if (needed_sseregs)
7022         {
7023           t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
7024                       build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
7025           gimplify_assign (fpr, t, pre_p);
7026         }
7027 
7028       gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over));
7029 
7030       gimple_seq_add_stmt (pre_p, gimple_build_label (lab_false));
7031     }
7032 
7033   /* ... otherwise out of the overflow area.  */
7034 
7035   /* When we align parameter on stack for caller, if the parameter
7036      alignment is beyond MAX_SUPPORTED_STACK_ALIGNMENT, it will be
7037      aligned at MAX_SUPPORTED_STACK_ALIGNMENT.  We will match callee
7038      here with caller.  */
7039   arg_boundary = FUNCTION_ARG_BOUNDARY (VOIDmode, type);
7040   if ((unsigned int) arg_boundary > MAX_SUPPORTED_STACK_ALIGNMENT)
7041     arg_boundary = MAX_SUPPORTED_STACK_ALIGNMENT;
7042 
7043   /* Care for on-stack alignment if needed.  */
7044   if (arg_boundary <= 64
7045       || integer_zerop (TYPE_SIZE (type)))
7046     t = ovf;
7047  else
7048     {
7049       HOST_WIDE_INT align = arg_boundary / 8;
7050       t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (ovf), ovf,
7051                   size_int (align - 1));
7052       t = fold_convert (sizetype, t);
7053       t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
7054                   size_int (-align));
7055       t = fold_convert (TREE_TYPE (ovf), t);
7056     }
7057   gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
7058   gimplify_assign (addr, t, pre_p);
7059 
7060   t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (t), t,
7061               size_int (rsize * UNITS_PER_WORD));
7062   gimplify_assign (unshare_expr (ovf), t, pre_p);
7063 
7064   if (container)
7065     gimple_seq_add_stmt (pre_p, gimple_build_label (lab_over));
7066 
7067   ptrtype = build_pointer_type (type);
7068   addr = fold_convert (ptrtype, addr);
7069 
7070   if (indirect_p)
7071     addr = build_va_arg_indirect_ref (addr);
7072   return build_va_arg_indirect_ref (addr);
7073 }
7074 
7075 /* Return nonzero if OPNUM's MEM should be matched
7076    in movabs* patterns.  */
7077 
7078 int
7079 ix86_check_movabs (rtx insn, int opnum)
7080 {
7081   rtx set, mem;
7082 
7083   set = PATTERN (insn);
7084   if (GET_CODE (set) == PARALLEL)
7085     set = XVECEXP (set, 0, 0);
7086   gcc_assert (GET_CODE (set) == SET);
7087   mem = XEXP (set, opnum);
7088   while (GET_CODE (mem) == SUBREG)
7089     mem = SUBREG_REG (mem);
7090   gcc_assert (MEM_P (mem));
7091   return (volatile_ok || !MEM_VOLATILE_P (mem));
7092 }
7093 
7094 /* Initialize the table of extra 80387 mathematical constants.  */
7095 
7096 static void
7097 init_ext_80387_constants (void)
7098 {
7099   static const char * cst[5] =
7100   {
7101     "0.3010299956639811952256464283594894482",  /* 0: fldlg2  */
7102     "0.6931471805599453094286904741849753009",  /* 1: fldln2  */
7103     "1.4426950408889634073876517827983434472",  /* 2: fldl2e  */
7104     "3.3219280948873623478083405569094566090",  /* 3: fldl2t  */
7105     "3.1415926535897932385128089594061862044",  /* 4: fldpi   */
7106   };
7107   int i;
7108 
7109   for (i = 0; i < 5; i++)
7110     {
7111       real_from_string (&ext_80387_constants_table[i], cst[i]);
7112       /* Ensure each constant is rounded to XFmode precision.  */
7113       real_convert (&ext_80387_constants_table[i],
7114                     XFmode, &ext_80387_constants_table[i]);
7115     }
7116 
7117   ext_80387_constants_init = 1;
7118 }
7119 
7120 /* Return true if the constant is something that can be loaded with
7121    a special instruction.  */
7122 
7123 int
7124 standard_80387_constant_p (rtx x)
7125 {
7126   enum machine_mode mode = GET_MODE (x);
7127 
7128   REAL_VALUE_TYPE r;
7129 
7130   if (!(X87_FLOAT_MODE_P (mode) && (GET_CODE (x) == CONST_DOUBLE)))
7131     return -1;
7132 
7133   if (x == CONST0_RTX (mode))
7134     return 1;
7135   if (x == CONST1_RTX (mode))
7136     return 2;
7137 
7138   REAL_VALUE_FROM_CONST_DOUBLE (r, x);
7139 
7140   /* For XFmode constants, try to find a special 80387 instruction when
7141      optimizing for size or on those CPUs that benefit from them.  */
7142   if (mode == XFmode
7143       && (optimize_function_for_size_p (cfun) || TARGET_EXT_80387_CONSTANTS))
7144     {
7145       int i;
7146 
7147       if (! ext_80387_constants_init)
7148         init_ext_80387_constants ();
7149 
7150       for (i = 0; i < 5; i++)
7151         if (real_identical (&r, &ext_80387_constants_table[i]))
7152           return i + 3;
7153     }
7154 
7155   /* Load of the constant -0.0 or -1.0 will be split as
7156      fldz;fchs or fld1;fchs sequence.  */
7157   if (real_isnegzero (&r))
7158     return 8;
7159   if (real_identical (&r, &dconstm1))
7160     return 9;
7161 
7162   return 0;
7163 }
7164 
7165 /* Return the opcode of the special instruction to be used to load
7166    the constant X.  */
7167 
7168 const char *
7169 standard_80387_constant_opcode (rtx x)
7170 {
7171   switch (standard_80387_constant_p (x))
7172     {
7173     case 1:
7174       return "fldz";
7175     case 2:
7176       return "fld1";
7177     case 3:
7178       return "fldlg2";
7179     case 4:
7180       return "fldln2";
7181     case 5:
7182       return "fldl2e";
7183     case 6:
7184       return "fldl2t";
7185     case 7:
7186       return "fldpi";
7187     case 8:
7188     case 9:
7189       return "#";
7190     default:
7191       gcc_unreachable ();
7192     }
7193 }
7194 
7195 /* Return the CONST_DOUBLE representing the 80387 constant that is
7196    loaded by the specified special instruction.  The argument IDX
7197    matches the return value from standard_80387_constant_p.  */
7198 
7199 rtx
7200 standard_80387_constant_rtx (int idx)
7201 {
7202   int i;
7203 
7204   if (! ext_80387_constants_init)
7205     init_ext_80387_constants ();
7206 
7207   switch (idx)
7208     {
7209     case 3:
7210     case 4:
7211     case 5:
7212     case 6:
7213     case 7:
7214       i = idx - 3;
7215       break;
7216 
7217     default:
7218       gcc_unreachable ();
7219     }
7220 
7221   return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
7222                                        XFmode);
7223 }
7224 
7225 /* Return 1 if mode is a valid mode for sse.  */
7226 static int
7227 standard_sse_mode_p (enum machine_mode mode)
7228 {
7229   switch (mode)
7230     {
7231     case V16QImode:
7232     case V8HImode:
7233     case V4SImode:
7234     case V2DImode:
7235     case V4SFmode:
7236     case V2DFmode:
7237       return 1;
7238 
7239     default:
7240       return 0;
7241     }
7242 }
7243 
7244 /* Return 1 if X is all 0s.  For all 1s, return 2 if X is in 128bit
7245    SSE modes and SSE2 is enabled,  return 3 if X is in 256bit AVX
7246    modes and AVX is enabled.  */
7247 
7248 int
7249 standard_sse_constant_p (rtx x)
7250 {
7251   enum machine_mode mode = GET_MODE (x);
7252 
7253   if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
7254     return 1;
7255   if (vector_all_ones_operand (x, mode))
7256     {
7257       if (standard_sse_mode_p (mode))
7258         return TARGET_SSE2 ? 2 : -2;
7259       else if (VALID_AVX256_REG_MODE (mode))
7260         return TARGET_AVX ? 3 : -3;
7261     }
7262 
7263   return 0;
7264 }
7265 
7266 /* Return the opcode of the special instruction to be used to load
7267    the constant X.  */
7268 
7269 const char *
7270 standard_sse_constant_opcode (rtx insn, rtx x)
7271 {
7272   switch (standard_sse_constant_p (x))
7273     {
7274     case 1:
7275       switch (get_attr_mode (insn))
7276         {
7277         case MODE_V4SF:
7278           return TARGET_AVX ? "vxorps\t%0, %0, %0" : "xorps\t%0, %0";
7279         case MODE_V2DF:
7280           return TARGET_AVX ? "vxorpd\t%0, %0, %0" : "xorpd\t%0, %0";
7281         case MODE_TI:
7282           return TARGET_AVX ? "vpxor\t%0, %0, %0" : "pxor\t%0, %0";
7283         case MODE_V8SF:
7284           return "vxorps\t%x0, %x0, %x0";
7285         case MODE_V4DF:
7286           return "vxorpd\t%x0, %x0, %x0";
7287         case MODE_OI:
7288           return "vpxor\t%x0, %x0, %x0";
7289         default:
7290           gcc_unreachable ();
7291         }
7292     case 2:
7293       if (TARGET_AVX)
7294         switch (get_attr_mode (insn))
7295           {
7296           case MODE_V4SF:
7297           case MODE_V2DF:
7298           case MODE_TI:
7299             return "vpcmpeqd\t%0, %0, %0";
7300             break;
7301           default:
7302             gcc_unreachable ();
7303         }
7304       else
7305         return "pcmpeqd\t%0, %0";
7306     }
7307   gcc_unreachable ();
7308 }
7309 
7310 /* Returns 1 if OP contains a symbol reference */
7311 
7312 int
7313 symbolic_reference_mentioned_p (rtx op)
7314 {
7315   const char *fmt;
7316   int i;
7317 
7318   if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
7319     return 1;
7320 
7321   fmt = GET_RTX_FORMAT (GET_CODE (op));
7322   for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
7323     {
7324       if (fmt[i] == 'E')
7325         {
7326           int j;
7327 
7328           for (j = XVECLEN (op, i) - 1; j >= 0; j--)
7329             if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
7330               return 1;
7331         }
7332 
7333       else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
7334         return 1;
7335     }
7336 
7337   return 0;
7338 }
7339 
7340 /* Return 1 if it is appropriate to emit `ret' instructions in the
7341    body of a function.  Do this only if the epilogue is simple, needing a
7342    couple of insns.  Prior to reloading, we can't tell how many registers
7343    must be saved, so return 0 then.  Return 0 if there is no frame
7344    marker to de-allocate.  */
7345 
7346 int
7347 ix86_can_use_return_insn_p (void)
7348 {
7349   struct ix86_frame frame;
7350 
7351   if (! reload_completed || frame_pointer_needed)
7352     return 0;
7353 
7354   /* Don't allow more than 32 pop, since that's all we can do
7355      with one instruction.  */
7356   if (crtl->args.pops_args
7357       && crtl->args.size >= 32768)
7358     return 0;
7359 
7360   ix86_compute_frame_layout (&frame);
7361   return frame.to_allocate == 0 && frame.padding05 == 0 &&
7362           frame.nmsave_args == 0 && (frame.nregs + frame.nsseregs) == 0;
7363 }
7364 
7365 /* Value should be nonzero if functions must have frame pointers.
7366    Zero means the frame pointer need not be set up (and parms may
7367    be accessed via the stack pointer) in functions that seem suitable.  */
7368 
7369 int
7370 ix86_frame_pointer_required (void)
7371 {
7372   /* If we accessed previous frames, then the generated code expects
7373      to be able to access the saved ebp value in our frame.  */
7374   if (cfun->machine->accesses_prev_frame)
7375     return 1;
7376 
7377   /* Several x86 os'es need a frame pointer for other reasons,
7378      usually pertaining to setjmp.  */
7379   if (SUBTARGET_FRAME_POINTER_REQUIRED)
7380     return 1;
7381 
7382   if (TARGET_SAVE_ARGS)
7383     return 1;  
7384 
7385   /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
7386      the frame pointer by default.  Turn it back on now if we've not
7387      got a leaf function.  */
7388   if (TARGET_OMIT_LEAF_FRAME_POINTER
7389       && (!current_function_is_leaf
7390           || ix86_current_function_calls_tls_descriptor))
7391     return 1;
7392 
7393   if (crtl->profile)
7394     return 1;
7395 
7396   return 0;
7397 }
7398 
7399 /* Record that the current function accesses previous call frames.  */
7400 
7401 void
7402 ix86_setup_frame_addresses (void)
7403 {
7404   cfun->machine->accesses_prev_frame = 1;
7405 }
7406 
7407 #ifndef USE_HIDDEN_LINKONCE
7408 # if (defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)) || TARGET_MACHO
7409 #  define USE_HIDDEN_LINKONCE 1
7410 # else
7411 #  define USE_HIDDEN_LINKONCE 0
7412 # endif
7413 #endif
7414 
7415 static int pic_labels_used;
7416 
7417 /* Fills in the label name that should be used for a pc thunk for
7418    the given register.  */
7419 
7420 static void
7421 get_pc_thunk_name (char name[32], unsigned int regno)
7422 {
7423   gcc_assert (!TARGET_64BIT);
7424 
7425   if (USE_HIDDEN_LINKONCE)
7426     sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
7427   else
7428     ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
7429 }
7430 
7431 
7432 /* This function generates code for -fpic that loads %ebx with
7433    the return address of the caller and then returns.  */
7434 
7435 void
7436 ix86_file_end (void)
7437 {
7438   rtx xops[2];
7439   int regno;
7440 
7441   for (regno = 0; regno < 8; ++regno)
7442     {
7443       char name[32];
7444 
7445       if (! ((pic_labels_used >> regno) & 1))
7446         continue;
7447 
7448       get_pc_thunk_name (name, regno);
7449 
7450 #if TARGET_MACHO
7451       if (TARGET_MACHO)
7452         {
7453           switch_to_section (darwin_sections[text_coal_section]);
7454           fputs ("\t.weak_definition\t", asm_out_file);
7455           assemble_name (asm_out_file, name);
7456           fputs ("\n\t.private_extern\t", asm_out_file);
7457           assemble_name (asm_out_file, name);
7458           fputs ("\n", asm_out_file);
7459           ASM_OUTPUT_LABEL (asm_out_file, name);
7460         }
7461       else
7462 #endif
7463       if (USE_HIDDEN_LINKONCE)
7464         {
7465           tree decl;
7466 
7467           decl = build_decl (FUNCTION_DECL, get_identifier (name),
7468                              error_mark_node);
7469           TREE_PUBLIC (decl) = 1;
7470           TREE_STATIC (decl) = 1;
7471           DECL_ONE_ONLY (decl) = 1;
7472 
7473           (*targetm.asm_out.unique_section) (decl, 0);
7474           switch_to_section (get_named_section (decl, NULL, 0));
7475 
7476           (*targetm.asm_out.globalize_label) (asm_out_file, name);
7477           fputs ("\t.hidden\t", asm_out_file);
7478           assemble_name (asm_out_file, name);
7479           fputc ('\n', asm_out_file);
7480           ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
7481         }
7482       else
7483         {
7484           switch_to_section (text_section);
7485           ASM_OUTPUT_LABEL (asm_out_file, name);
7486         }
7487 
7488       xops[0] = gen_rtx_REG (Pmode, regno);
7489       xops[1] = gen_rtx_MEM (Pmode, stack_pointer_rtx);
7490       output_asm_insn ("mov%z0\t{%1, %0|%0, %1}", xops);
7491       output_asm_insn ("ret", xops);
7492     }
7493 
7494   if (NEED_INDICATE_EXEC_STACK)
7495     file_end_indicate_exec_stack ();
7496 }
7497 
7498 /* Emit code for the SET_GOT patterns.  */
7499 
7500 const char *
7501 output_set_got (rtx dest, rtx label ATTRIBUTE_UNUSED)
7502 {
7503   rtx xops[3];
7504 
7505   xops[0] = dest;
7506 
7507   if (TARGET_VXWORKS_RTP && flag_pic)
7508     {
7509       /* Load (*VXWORKS_GOTT_BASE) into the PIC register.  */
7510       xops[2] = gen_rtx_MEM (Pmode,
7511                              gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE));
7512       output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
7513 
7514       /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
7515          Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
7516          an unadorned address.  */
7517       xops[2] = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
7518       SYMBOL_REF_FLAGS (xops[2]) |= SYMBOL_FLAG_LOCAL;
7519       output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops);
7520       return "";
7521     }
7522 
7523   xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
7524 
7525   if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
7526     {
7527       xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
7528 
7529       if (!flag_pic)
7530         output_asm_insn ("mov%z0\t{%2, %0|%0, %2}", xops);
7531       else
7532         output_asm_insn ("call\t%a2", xops);
7533 
7534 #if TARGET_MACHO
7535       /* Output the Mach-O "canonical" label name ("Lxx$pb") here too.  This
7536          is what will be referenced by the Mach-O PIC subsystem.  */
7537       if (!label)
7538         ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME);
7539 #endif
7540 
7541       (*targetm.asm_out.internal_label) (asm_out_file, "L",
7542                                  CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
7543 
7544       if (flag_pic)
7545         output_asm_insn ("pop%z0\t%0", xops);
7546     }
7547   else
7548     {
7549       char name[32];
7550       get_pc_thunk_name (name, REGNO (dest));
7551       pic_labels_used |= 1 << REGNO (dest);
7552 
7553       xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
7554       xops[2] = gen_rtx_MEM (QImode, xops[2]);
7555       output_asm_insn ("call\t%X2", xops);
7556       /* Output the Mach-O "canonical" label name ("Lxx$pb") here too.  This
7557          is what will be referenced by the Mach-O PIC subsystem.  */
7558 #if TARGET_MACHO
7559       if (!label)
7560         ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME);
7561       else
7562         targetm.asm_out.internal_label (asm_out_file, "L",
7563                                            CODE_LABEL_NUMBER (label));
7564 #endif
7565     }
7566 
7567   if (TARGET_MACHO)
7568     return "";
7569 
7570   if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
7571     output_asm_insn ("add%z0\t{%1, %0|%0, %1}", xops);
7572   else
7573     output_asm_insn ("add%z0\t{%1+[.-%a2], %0|%0, %1+(.-%a2)}", xops);
7574 
7575   return "";
7576 }
7577 
7578 /* Generate an "push" pattern for input ARG.  */
7579 
7580 static rtx
7581 gen_push (rtx arg)
7582 {
7583   return gen_rtx_SET (VOIDmode,
7584                       gen_rtx_MEM (Pmode,
7585                                    gen_rtx_PRE_DEC (Pmode,
7586                                                     stack_pointer_rtx)),
7587                       arg);
7588 }
7589 
7590 /* Return >= 0 if there is an unused call-clobbered register available
7591    for the entire function.  */
7592 
7593 static unsigned int
7594 ix86_select_alt_pic_regnum (void)
7595 {
7596   if (current_function_is_leaf && !crtl->profile
7597       && !ix86_current_function_calls_tls_descriptor)
7598     {
7599       int i, drap;
7600       /* Can't use the same register for both PIC and DRAP.  */
7601       if (crtl->drap_reg)
7602         drap = REGNO (crtl->drap_reg);
7603       else
7604         drap = -1;
7605       for (i = 2; i >= 0; --i)
7606         if (i != drap && !df_regs_ever_live_p (i))
7607           return i;
7608     }
7609 
7610   return INVALID_REGNUM;
7611 }
7612 
7613 /* Return 1 if we need to save REGNO.  */
7614 static int
7615 ix86_save_reg (unsigned int regno, int maybe_eh_return)
7616 {
7617   if (pic_offset_table_rtx
7618       && regno == REAL_PIC_OFFSET_TABLE_REGNUM
7619       && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
7620           || crtl->profile
7621           || crtl->calls_eh_return
7622           || crtl->uses_const_pool))
7623     {
7624       if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
7625         return 0;
7626       return 1;
7627     }
7628 
7629   if (crtl->calls_eh_return && maybe_eh_return)
7630     {
7631       unsigned i;
7632       for (i = 0; ; i++)
7633         {
7634           unsigned test = EH_RETURN_DATA_REGNO (i);
7635           if (test == INVALID_REGNUM)
7636             break;
7637           if (test == regno)
7638             return 1;
7639         }
7640     }
7641 
7642   if (crtl->drap_reg
7643       && regno == REGNO (crtl->drap_reg))
7644     return 1;
7645 
7646   return (df_regs_ever_live_p (regno)
7647           && !call_used_regs[regno]
7648           && !fixed_regs[regno]
7649           && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
7650 }
7651 
7652 /* Return number of saved general prupose registers.  */
7653 
7654 static int
7655 ix86_nsaved_regs (void)
7656 {
7657   int nregs = 0;
7658   int regno;
7659 
7660   for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
7661     if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
7662       nregs ++;
7663   return nregs;
7664 }
7665 
7666 /* Return number of saved SSE registrers.  */
7667 
7668 static int
7669 ix86_nsaved_sseregs (void)
7670 {
7671   int nregs = 0;
7672   int regno;
7673 
7674   if (ix86_cfun_abi () != MS_ABI)
7675     return 0;
7676   for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
7677     if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
7678       nregs ++;
7679   return nregs;
7680 }
7681 
7682 /* Given FROM and TO register numbers, say whether this elimination is
7683    allowed.  If stack alignment is needed, we can only replace argument
7684    pointer with hard frame pointer, or replace frame pointer with stack
7685    pointer.  Otherwise, frame pointer elimination is automatically
7686    handled and all other eliminations are valid.  */
7687 
7688 int
7689 ix86_can_eliminate (int from, int to)
7690 {
7691   if (stack_realign_fp)
7692     return ((from == ARG_POINTER_REGNUM
7693              && to == HARD_FRAME_POINTER_REGNUM)
7694             || (from == FRAME_POINTER_REGNUM
7695                 && to == STACK_POINTER_REGNUM));
7696   else
7697     return to == STACK_POINTER_REGNUM ? !frame_pointer_needed : 1;
7698 }
7699 
7700 /* Return the offset between two registers, one to be eliminated, and the other
7701    its replacement, at the start of a routine.  */
7702 
7703 HOST_WIDE_INT
7704 ix86_initial_elimination_offset (int from, int to)
7705 {
7706   struct ix86_frame frame;
7707   ix86_compute_frame_layout (&frame);
7708 
7709   if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
7710     return frame.hard_frame_pointer_offset;
7711   else if (from == FRAME_POINTER_REGNUM
7712            && to == HARD_FRAME_POINTER_REGNUM)
7713     return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
7714   else
7715     {
7716       gcc_assert (to == STACK_POINTER_REGNUM);
7717 
7718       if (from == ARG_POINTER_REGNUM)
7719         return frame.stack_pointer_offset;
7720 
7721       gcc_assert (from == FRAME_POINTER_REGNUM);
7722       return frame.stack_pointer_offset - frame.frame_pointer_offset;
7723     }
7724 }
7725 
7726 /* In a dynamically-aligned function, we can't know the offset from
7727    stack pointer to frame pointer, so we must ensure that setjmp
7728    eliminates fp against the hard fp (%ebp) rather than trying to
7729    index from %esp up to the top of the frame across a gap that is
7730    of unknown (at compile-time) size.  */
7731 static rtx
7732 ix86_builtin_setjmp_frame_value (void)
7733 {
7734   return stack_realign_fp ? hard_frame_pointer_rtx : virtual_stack_vars_rtx;
7735 }
7736 
7737 /* Fill structure ix86_frame about frame of currently computed function.  */
7738 
7739 static void
7740 ix86_compute_frame_layout (struct ix86_frame *frame)
7741 {
7742   HOST_WIDE_INT total_size;
7743   unsigned int stack_alignment_needed;
7744   HOST_WIDE_INT offset;
7745   unsigned int preferred_alignment;
7746   HOST_WIDE_INT size = get_frame_size ();
7747 
7748   frame->nregs = ix86_nsaved_regs ();
7749   frame->nsseregs = ix86_nsaved_sseregs ();
7750   frame->nmsave_args = ix86_nsaved_args ();
7751   total_size = size;
7752 
7753   stack_alignment_needed = crtl->stack_alignment_needed / BITS_PER_UNIT;
7754   preferred_alignment = crtl->preferred_stack_boundary / BITS_PER_UNIT;
7755 
7756   /* MS ABI seem to require stack alignment to be always 16 except for function
7757      prologues.  */
7758   if (ix86_cfun_abi () == MS_ABI && preferred_alignment < 16)
7759     {
7760       preferred_alignment = 16;
7761       stack_alignment_needed = 16;
7762       crtl->preferred_stack_boundary = 128;
7763       crtl->stack_alignment_needed = 128;
7764     }
7765 
7766   gcc_assert (!size || stack_alignment_needed);
7767   gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
7768   gcc_assert (preferred_alignment <= stack_alignment_needed);
7769 
7770   /* During reload iteration the amount of registers saved can change.
7771      Recompute the value as needed.  Do not recompute when amount of registers
7772      didn't change as reload does multiple calls to the function and does not
7773      expect the decision to change within single iteration.  */
7774   if (!optimize_function_for_size_p (cfun)
7775       && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
7776     {
7777       int count = frame->nregs;
7778 
7779       cfun->machine->use_fast_prologue_epilogue_nregs = count;
7780       /* The fast prologue uses move instead of push to save registers.  This
7781          is significantly longer, but also executes faster as modern hardware
7782          can execute the moves in parallel, but can't do that for push/pop.
7783 
7784          Be careful about choosing what prologue to emit:  When function takes
7785          many instructions to execute we may use slow version as well as in
7786          case function is known to be outside hot spot (this is known with
7787          feedback only).  Weight the size of function by number of registers
7788          to save as it is cheap to use one or two push instructions but very
7789          slow to use many of them.  */
7790       if (count)
7791         count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
7792       if (cfun->function_frequency < FUNCTION_FREQUENCY_NORMAL
7793           || (flag_branch_probabilities
7794               && cfun->function_frequency < FUNCTION_FREQUENCY_HOT))
7795         cfun->machine->use_fast_prologue_epilogue = false;
7796       else
7797         cfun->machine->use_fast_prologue_epilogue
7798            = !expensive_function_p (count);
7799     }
7800   if (TARGET_PROLOGUE_USING_MOVE
7801       && cfun->machine->use_fast_prologue_epilogue)
7802     frame->save_regs_using_mov = true;
7803   else
7804     frame->save_regs_using_mov = false;
7805 
7806   if (TARGET_SAVE_ARGS)
7807     {
7808       cfun->machine->use_fast_prologue_epilogue = true;
7809       frame->save_regs_using_mov = true;
7810     }
7811 
7812   /* Skip return address and saved base pointer.  */
7813   offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
7814 
7815   frame->hard_frame_pointer_offset = offset;
7816 
7817   /* Set offset to aligned because the realigned frame starts from
7818      here.  */
7819   if (stack_realign_fp)
7820     offset = (offset + stack_alignment_needed -1) & -stack_alignment_needed;
7821 
7822   /* Argument save area */
7823   if (TARGET_SAVE_ARGS)
7824     {
7825       offset += frame->nmsave_args * UNITS_PER_WORD;
7826       frame->padding0 = (frame->nmsave_args % 2) * UNITS_PER_WORD;
7827       offset += frame->padding0;
7828     }
7829   else
7830     frame->padding0 = 0;
7831 
7832   /* Register save area */
7833   offset += frame->nregs * UNITS_PER_WORD;
7834 
7835   /* Align SSE reg save area.  */
7836   if (frame->nsseregs)
7837     frame->padding05 = ((offset + 16 - 1) & -16) - offset;
7838   else
7839     frame->padding05 = 0;
7840   
7841   /* SSE register save area.  */
7842   offset += frame->padding05 + frame->nsseregs * 16;
7843 
7844   /* Va-arg area */
7845   frame->va_arg_size = ix86_varargs_gpr_size + ix86_varargs_fpr_size;
7846   offset += frame->va_arg_size;
7847 
7848   /* Align start of frame for local function.  */
7849   frame->padding1 = ((offset + stack_alignment_needed - 1)
7850                      & -stack_alignment_needed) - offset;
7851 
7852   offset += frame->padding1;
7853 
7854   /* Frame pointer points here.  */
7855   frame->frame_pointer_offset = offset;
7856 
7857   offset += size;
7858 
7859   /* Add outgoing arguments area.  Can be skipped if we eliminated
7860      all the function calls as dead code.
7861      Skipping is however impossible when function calls alloca.  Alloca
7862      expander assumes that last crtl->outgoing_args_size
7863      of stack frame are unused.  */
7864   if (ACCUMULATE_OUTGOING_ARGS
7865       && (!current_function_is_leaf || cfun->calls_alloca
7866           || ix86_current_function_calls_tls_descriptor))
7867     {
7868       offset += crtl->outgoing_args_size;
7869       frame->outgoing_arguments_size = crtl->outgoing_args_size;
7870     }
7871   else
7872     frame->outgoing_arguments_size = 0;
7873 
7874   /* Align stack boundary.  Only needed if we're calling another function
7875      or using alloca.  */
7876   if (!current_function_is_leaf || cfun->calls_alloca
7877       || ix86_current_function_calls_tls_descriptor)
7878     frame->padding2 = ((offset + preferred_alignment - 1)
7879                        & -preferred_alignment) - offset;
7880   else
7881     frame->padding2 = 0;
7882 
7883   offset += frame->padding2;
7884 
7885   /* We've reached end of stack frame.  */
7886   frame->stack_pointer_offset = offset;
7887 
7888   /* Size prologue needs to allocate.  */
7889   frame->to_allocate =
7890     (size + frame->padding1 + frame->padding2
7891      + frame->outgoing_arguments_size + frame->va_arg_size);
7892 
7893   if (!TARGET_SAVE_ARGS
7894       && ((!frame->to_allocate && frame->nregs <= 1)
7895           || (TARGET_64BIT
7896               && frame->to_allocate >= (HOST_WIDE_INT) 0x80000000)))
7897     frame->save_regs_using_mov = false;
7898 
7899   if (!TARGET_64BIT_MS_ABI && TARGET_RED_ZONE
7900       && current_function_sp_is_unchanging
7901       && current_function_is_leaf
7902       && !ix86_current_function_calls_tls_descriptor)
7903     {
7904       frame->red_zone_size = frame->to_allocate;
7905       if (frame->save_regs_using_mov)
7906         {
7907           frame->red_zone_size
7908             += (frame->nregs + frame->nmsave_args) * UNITS_PER_WORD;
7909           frame->red_zone_size += frame->padding0;
7910         }
7911       if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
7912         frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
7913     }
7914   else
7915     frame->red_zone_size = 0;
7916   frame->to_allocate -= frame->red_zone_size;
7917   frame->stack_pointer_offset -= frame->red_zone_size;
7918 #if 0
7919   fprintf (stderr, "\n");
7920   fprintf (stderr, "size: %ld\n", (long)size);
7921   fprintf (stderr, "nregs: %ld\n", (long)frame->nregs);
7922   fprintf (stderr, "nsseregs: %ld\n", (long)frame->nsseregs);
7923   fprintf (stderr, "nmsave_args: %ld\n", (long)frame->nmsave_args);
7924   fprintf (stderr, "padding0: %ld\n", (long)frame->padding0);
7925   fprintf (stderr, "padding05: %ld\n", (long)frame->padding0);
7926   fprintf (stderr, "alignment1: %ld\n", (long)stack_alignment_needed);
7927   fprintf (stderr, "padding1: %ld\n", (long)frame->padding1);
7928   fprintf (stderr, "va_arg: %ld\n", (long)frame->va_arg_size);
7929   fprintf (stderr, "padding2: %ld\n", (long)frame->padding2);
7930   fprintf (stderr, "to_allocate: %ld\n", (long)frame->to_allocate);
7931   fprintf (stderr, "red_zone_size: %ld\n", (long)frame->red_zone_size);
7932   fprintf (stderr, "frame_pointer_offset: %ld\n", (long)frame->frame_pointer_offset);
7933   fprintf (stderr, "hard_frame_pointer_offset: %ld\n",
7934            (long)frame->hard_frame_pointer_offset);
7935   fprintf (stderr, "stack_pointer_offset: %ld\n", (long)frame->stack_pointer_offset);
7936   fprintf (stderr, "current_function_is_leaf: %ld\n", (long)current_function_is_leaf);
7937   fprintf (stderr, "cfun->calls_alloca: %ld\n", (long)cfun->calls_alloca);
7938   fprintf (stderr, "x86_current_function_calls_tls_descriptor: %ld\n", (long)ix86_current_function_calls_tls_descriptor);
7939 #endif
7940 }
7941 
7942 
7943 /* Emit code to save registers in the prologue.  */
7944 
7945 static void
7946 ix86_emit_save_regs (void)
7947 {
7948   unsigned int regno;
7949   rtx insn;
7950 
7951   if (TARGET_SAVE_ARGS)
7952     {
7953       int i;
7954       int nsaved = ix86_nsaved_args ();
7955       int start = cfun->returns_struct;
7956       for (i = start; i < start + nsaved; i++)
7957         {
7958           regno = x86_64_int_parameter_registers[i];
7959           insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
7960           RTX_FRAME_RELATED_P (insn) = 1;
7961         }
7962       if (nsaved % 2 != 0)
7963         pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
7964                                    GEN_INT (-UNITS_PER_WORD), -1);
7965     }
7966 
7967   for (regno = FIRST_PSEUDO_REGISTER - 1; regno-- > 0; )
7968     if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
7969       {
7970         insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
7971         RTX_FRAME_RELATED_P (insn) = 1;
7972       }
7973 }
7974 
7975 /* Emit code to save registers using MOV insns.  First register
7976    is restored from POINTER + OFFSET.  */
7977 static void
7978 ix86_emit_save_regs_using_mov (rtx pointer, HOST_WIDE_INT offset)
7979 {
7980   unsigned int regno;
7981   rtx insn;
7982 
7983   for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
7984     if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
7985       {
7986         insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
7987                                                Pmode, offset),
7988                                gen_rtx_REG (Pmode, regno));
7989         RTX_FRAME_RELATED_P (insn) = 1;
7990         offset += UNITS_PER_WORD;
7991       }
7992 
7993   if (TARGET_SAVE_ARGS)
7994     {
7995       int i;
7996       int nsaved = ix86_nsaved_args ();
7997       int start = cfun->returns_struct;
7998       if (nsaved % 2 != 0)
7999         offset += UNITS_PER_WORD;
8000       for (i = start + nsaved - 1; i >= start; i--)
8001         {
8002           regno = x86_64_int_parameter_registers[i];
8003           insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
8004                                                  Pmode, offset),
8005                                  gen_rtx_REG (Pmode, regno));
8006           RTX_FRAME_RELATED_P (insn) = 1;
8007           offset += UNITS_PER_WORD;
8008         }
8009     }
8010 }
8011 
8012 /* Emit code to save registers using MOV insns.  First register
8013    is restored from POINTER + OFFSET.  */
8014 static void
8015 ix86_emit_save_sse_regs_using_mov (rtx pointer, HOST_WIDE_INT offset)
8016 {
8017   unsigned int regno;
8018   rtx insn;
8019   rtx mem;
8020 
8021   for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
8022     if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
8023       {
8024         mem = adjust_address (gen_rtx_MEM (TImode, pointer), TImode, offset);
8025         set_mem_align (mem, 128);
8026         insn = emit_move_insn (mem, gen_rtx_REG (TImode, regno));
8027         RTX_FRAME_RELATED_P (insn) = 1;
8028         offset += 16;
8029       }
8030 }
8031 
8032 /* Expand prologue or epilogue stack adjustment.
8033    The pattern exist to put a dependency on all ebp-based memory accesses.
8034    STYLE should be negative if instructions should be marked as frame related,
8035    zero if %r11 register is live and cannot be freely used and positive
8036    otherwise.  */
8037 
8038 static void
8039 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset, int style)
8040 {
8041   rtx insn;
8042 
8043   if (! TARGET_64BIT)
8044     insn = emit_insn (gen_pro_epilogue_adjust_stack_1 (dest, src, offset));
8045   else if (x86_64_immediate_operand (offset, DImode))
8046     insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64 (dest, src, offset));
8047   else
8048     {
8049       rtx r11;
8050       /* r11 is used by indirect sibcall return as well, set before the
8051          epilogue and used after the epilogue.  ATM indirect sibcall
8052          shouldn't be used together with huge frame sizes in one
8053          function because of the frame_size check in sibcall.c.  */
8054       gcc_assert (style);
8055       r11 = gen_rtx_REG (DImode, R11_REG);
8056       insn = emit_insn (gen_rtx_SET (DImode, r11, offset));
8057       if (style < 0)
8058         RTX_FRAME_RELATED_P (insn) = 1;
8059       insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64_2 (dest, src, r11,
8060                                                                offset));
8061     }
8062   if (style < 0)
8063     RTX_FRAME_RELATED_P (insn) = 1;
8064 }
8065 
8066 /* Find an available register to be used as dynamic realign argument
8067    pointer regsiter.  Such a register will be written in prologue and
8068    used in begin of body, so it must not be
8069         1. parameter passing register.
8070         2. GOT pointer.
8071    We reuse static-chain register if it is available.  Otherwise, we
8072    use DI for i386 and R13 for x86-64.  We chose R13 since it has
8073    shorter encoding.
8074 
8075    Return: the regno of chosen register.  */
8076 
8077 static unsigned int 
8078 find_drap_reg (void)
8079 {
8080   tree decl = cfun->decl;
8081 
8082   if (TARGET_64BIT)
8083     {
8084       /* Use R13 for nested function or function need static chain.
8085          Since function with tail call may use any caller-saved
8086          registers in epilogue, DRAP must not use caller-saved
8087          register in such case.  */
8088       if ((decl_function_context (decl)
8089            && !DECL_NO_STATIC_CHAIN (decl))
8090           || crtl->tail_call_emit)
8091         return R13_REG;
8092 
8093       return R10_REG;
8094     }
8095   else
8096     {
8097       /* Use DI for nested function or function need static chain.
8098          Since function with tail call may use any caller-saved
8099          registers in epilogue, DRAP must not use caller-saved
8100          register in such case.  */
8101       if ((decl_function_context (decl)
8102            && !DECL_NO_STATIC_CHAIN (decl))
8103           || crtl->tail_call_emit)
8104         return DI_REG;
8105     
8106       /* Reuse static chain register if it isn't used for parameter
8107          passing.  */
8108       if (ix86_function_regparm (TREE_TYPE (decl), decl) <= 2
8109           && !lookup_attribute ("fastcall",
8110                                 TYPE_ATTRIBUTES (TREE_TYPE (decl))))
8111         return CX_REG;
8112       else
8113         return DI_REG;
8114     }
8115 }
8116 
8117 /* Update incoming stack boundary and estimated stack alignment.  */
8118 
8119 static void
8120 ix86_update_stack_boundary (void)
8121 {
8122   /* Prefer the one specified at command line. */
8123   ix86_incoming_stack_boundary 
8124     = (ix86_user_incoming_stack_boundary
8125        ? ix86_user_incoming_stack_boundary
8126        : ix86_default_incoming_stack_boundary);
8127 
8128   /* Incoming stack alignment can be changed on individual functions
8129      via force_align_arg_pointer attribute.  We use the smallest
8130      incoming stack boundary.  */
8131   if (ix86_incoming_stack_boundary > MIN_STACK_BOUNDARY
8132       && lookup_attribute (ix86_force_align_arg_pointer_string,
8133                            TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
8134     ix86_incoming_stack_boundary = MIN_STACK_BOUNDARY;
8135 
8136   /* The incoming stack frame has to be aligned at least at
8137      parm_stack_boundary.  */
8138   if (ix86_incoming_stack_boundary < crtl->parm_stack_boundary)
8139     ix86_incoming_stack_boundary = crtl->parm_stack_boundary;
8140 
8141   /* Stack at entrance of main is aligned by runtime.  We use the
8142      smallest incoming stack boundary. */
8143   if (ix86_incoming_stack_boundary > MAIN_STACK_BOUNDARY
8144       && DECL_NAME (current_function_decl)
8145       && MAIN_NAME_P (DECL_NAME (current_function_decl))
8146       && DECL_FILE_SCOPE_P (current_function_decl))
8147     ix86_incoming_stack_boundary = MAIN_STACK_BOUNDARY;
8148 
8149   /* x86_64 vararg needs 16byte stack alignment for register save
8150      area.  */
8151   if (TARGET_64BIT
8152       && cfun->stdarg
8153       && crtl->stack_alignment_estimated < 128)
8154     crtl->stack_alignment_estimated = 128;
8155 }
8156 
8157 /* Handle the TARGET_GET_DRAP_RTX hook.  Return NULL if no DRAP is
8158    needed or an rtx for DRAP otherwise.  */
8159 
8160 static rtx
8161 ix86_get_drap_rtx (void)
8162 {
8163   if (ix86_force_drap || !ACCUMULATE_OUTGOING_ARGS)
8164     crtl->need_drap = true;
8165 
8166   if (stack_realign_drap)
8167     {
8168       /* Assign DRAP to vDRAP and returns vDRAP */
8169       unsigned int regno = find_drap_reg ();
8170       rtx drap_vreg;
8171       rtx arg_ptr;
8172       rtx seq, insn;
8173 
8174       arg_ptr = gen_rtx_REG (Pmode, regno);
8175       crtl->drap_reg = arg_ptr;
8176 
8177       start_sequence ();
8178       drap_vreg = copy_to_reg (arg_ptr);
8179       seq = get_insns ();
8180       end_sequence ();
8181       
8182       insn = emit_insn_before (seq, NEXT_INSN (entry_of_function ()));
8183       RTX_FRAME_RELATED_P (insn) = 1;
8184       return drap_vreg;
8185     }
8186   else
8187     return NULL;
8188 }
8189 
8190 /* Handle the TARGET_INTERNAL_ARG_POINTER hook.  */
8191 
8192 static rtx
8193 ix86_internal_arg_pointer (void)
8194 {
8195   return virtual_incoming_args_rtx;
8196 }
8197 
8198 /* Handle the TARGET_DWARF_HANDLE_FRAME_UNSPEC hook.
8199    This is called from dwarf2out.c to emit call frame instructions
8200    for frame-related insns containing UNSPECs and UNSPEC_VOLATILEs. */
8201 static void
8202 ix86_dwarf_handle_frame_unspec (const char *label, rtx pattern, int index)
8203 {
8204   rtx unspec = SET_SRC (pattern);
8205   gcc_assert (GET_CODE (unspec) == UNSPEC);
8206 
8207   switch (index)
8208     {
8209     case UNSPEC_REG_SAVE:
8210       dwarf2out_reg_save_reg (label, XVECEXP (unspec, 0, 0),
8211                               SET_DEST (pattern));
8212       break;
8213     case UNSPEC_DEF_CFA:
8214       dwarf2out_def_cfa (label, REGNO (SET_DEST (pattern)),
8215                          INTVAL (XVECEXP (unspec, 0, 0)));
8216       break;
8217     default:
8218       gcc_unreachable ();
8219     }
8220 }
8221 
8222 /* Finalize stack_realign_needed flag, which will guide prologue/epilogue
8223    to be generated in correct form.  */
8224 static void 
8225 ix86_finalize_stack_realign_flags (void)
8226 {
8227   /* Check if stack realign is really needed after reload, and 
8228      stores result in cfun */
8229   unsigned int incoming_stack_boundary
8230     = (crtl->parm_stack_boundary > ix86_incoming_stack_boundary
8231        ? crtl->parm_stack_boundary : ix86_incoming_stack_boundary);
8232   unsigned int stack_realign = (incoming_stack_boundary
8233                                 < (current_function_is_leaf
8234                                    ? crtl->max_used_stack_slot_alignment
8235                                    : crtl->stack_alignment_needed));
8236 
8237   if (crtl->stack_realign_finalized)
8238     {
8239       /* After stack_realign_needed is finalized, we can't no longer
8240          change it.  */
8241       gcc_assert (crtl->stack_realign_needed == stack_realign);
8242     }
8243   else
8244     {
8245       crtl->stack_realign_needed = stack_realign;
8246       crtl->stack_realign_finalized = true;
8247     }
8248 }
8249 
8250 /* Expand the prologue into a bunch of separate insns.  */
8251 
8252 void
8253 ix86_expand_prologue (void)
8254 {
8255   rtx insn;
8256   bool pic_reg_used;
8257   struct ix86_frame frame;
8258   HOST_WIDE_INT allocate;
8259 
8260   ix86_finalize_stack_realign_flags ();
8261 
8262   /* DRAP should not coexist with stack_realign_fp */
8263   gcc_assert (!(crtl->drap_reg && stack_realign_fp));
8264 
8265   ix86_compute_frame_layout (&frame);
8266 
8267   /* Emit prologue code to adjust stack alignment and setup DRAP, in case
8268      of DRAP is needed and stack realignment is really needed after reload */
8269   if (crtl->drap_reg && crtl->stack_realign_needed)
8270     {
8271       rtx x, y;
8272       int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
8273       int param_ptr_offset = (call_used_regs[REGNO (crtl->drap_reg)]
8274                               ? 0 : UNITS_PER_WORD);
8275 
8276       gcc_assert (stack_realign_drap);
8277 
8278       /* Grab the argument pointer.  */
8279       x = plus_constant (stack_pointer_rtx, 
8280                          (UNITS_PER_WORD + param_ptr_offset));
8281       y = crtl->drap_reg;
8282 
8283       /* Only need to push parameter pointer reg if it is caller
8284          saved reg */
8285       if (!call_used_regs[REGNO (crtl->drap_reg)])
8286         {
8287           /* Push arg pointer reg */
8288           insn = emit_insn (gen_push (y));
8289           RTX_FRAME_RELATED_P (insn) = 1;
8290         }
8291 
8292       insn = emit_insn (gen_rtx_SET (VOIDmode, y, x));
8293       RTX_FRAME_RELATED_P (insn) = 1; 
8294 
8295       /* Align the stack.  */
8296       insn = emit_insn ((*ix86_gen_andsp) (stack_pointer_rtx,
8297                                            stack_pointer_rtx,
8298                                            GEN_INT (-align_bytes)));
8299       RTX_FRAME_RELATED_P (insn) = 1;
8300 
8301       /* Replicate the return address on the stack so that return
8302          address can be reached via (argp - 1) slot.  This is needed
8303          to implement macro RETURN_ADDR_RTX and intrinsic function
8304          expand_builtin_return_addr etc.  */
8305       x = crtl->drap_reg;
8306       x = gen_frame_mem (Pmode,
8307                          plus_constant (x, -UNITS_PER_WORD));
8308       insn = emit_insn (gen_push (x));
8309       RTX_FRAME_RELATED_P (insn) = 1;
8310     }
8311 
8312   /* Note: AT&T enter does NOT have reversed args.  Enter is probably
8313      slower on all targets.  Also sdb doesn't like it.  */
8314 
8315   if (frame_pointer_needed)
8316     {
8317       insn = emit_insn (gen_push (hard_frame_pointer_rtx));
8318       RTX_FRAME_RELATED_P (insn) = 1;
8319 
8320       insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
8321       RTX_FRAME_RELATED_P (insn) = 1;
8322     }
8323 
8324   if (stack_realign_fp)
8325     {
8326       int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
8327       gcc_assert (align_bytes > MIN_STACK_BOUNDARY / BITS_PER_UNIT);
8328 
8329       /* Align the stack.  */
8330       insn = emit_insn ((*ix86_gen_andsp) (stack_pointer_rtx,
8331                                            stack_pointer_rtx,
8332                                            GEN_INT (-align_bytes)));
8333       RTX_FRAME_RELATED_P (insn) = 1;
8334     }
8335 
8336   allocate = frame.to_allocate + frame.nsseregs * 16 + frame.padding05;
8337 
8338   if (!frame.save_regs_using_mov)
8339     ix86_emit_save_regs ();
8340   else
8341     allocate += (frame.nregs + frame.nmsave_args) * UNITS_PER_WORD
8342       + frame.padding0;
8343 
8344   /* When using red zone we may start register saving before allocating
8345      the stack frame saving one cycle of the prologue. However I will
8346      avoid doing this if I am going to have to probe the stack since
8347      at least on x86_64 the stack probe can turn into a call that clobbers
8348      a red zone location */
8349   if (!TARGET_64BIT_MS_ABI && TARGET_RED_ZONE && frame.save_regs_using_mov
8350       && (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT))
8351     ix86_emit_save_regs_using_mov ((frame_pointer_needed
8352                                      && !crtl->stack_realign_needed) 
8353                                    ? hard_frame_pointer_rtx
8354                                    : stack_pointer_rtx,
8355                                    -(frame.nregs + frame.nmsave_args)
8356                                    * UNITS_PER_WORD - frame.padding0);
8357 
8358   if (allocate == 0)
8359     ;
8360   else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
8361     pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
8362                                GEN_INT (-allocate), -1);
8363   else
8364     {
8365       rtx eax = gen_rtx_REG (Pmode, AX_REG);
8366       bool eax_live;
8367       rtx t;
8368 
8369       if (cfun->machine->call_abi == MS_ABI)
8370         eax_live = false;
8371       else
8372         eax_live = ix86_eax_live_at_start_p ();
8373 
8374       if (eax_live)
8375         {
8376           emit_insn (gen_push (eax));
8377           allocate -= UNITS_PER_WORD;
8378         }
8379 
8380       emit_move_insn (eax, GEN_INT (allocate));
8381 
8382       if (TARGET_64BIT)
8383         insn = gen_allocate_stack_worker_64 (eax, eax);
8384       else
8385         insn = gen_allocate_stack_worker_32 (eax, eax);
8386       insn = emit_insn (insn);
8387       RTX_FRAME_RELATED_P (insn) = 1;
8388       t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (-allocate));
8389       t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
8390       REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
8391                                             t, REG_NOTES (insn));
8392 
8393       if (eax_live)
8394         {
8395           if (frame_pointer_needed)
8396             t = plus_constant (hard_frame_pointer_rtx,
8397                                allocate
8398                                - frame.to_allocate
8399                                - frame.nregs * UNITS_PER_WORD);
8400           else
8401             t = plus_constant (stack_pointer_rtx, allocate);
8402           emit_move_insn (eax, gen_rtx_MEM (Pmode, t));
8403         }
8404     }
8405 
8406   if (frame.save_regs_using_mov
8407       && !(!TARGET_64BIT_MS_ABI && TARGET_RED_ZONE
8408          && (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)))
8409     {
8410       if (!TARGET_SAVE_ARGS &&
8411           (!frame_pointer_needed
8412            || !(frame.to_allocate + frame.padding05)
8413            || crtl->stack_realign_needed))
8414         ix86_emit_save_regs_using_mov (stack_pointer_rtx,
8415                                        frame.to_allocate
8416                                        + frame.nsseregs * 16 + frame.padding05);
8417       else
8418         /* XXX: Does this need help for SSE? */
8419         ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
8420                                        -(frame.nregs + frame.nmsave_args)
8421                                        * UNITS_PER_WORD - frame.padding0);
8422     }
8423   /* XXX: Does these need help for save-args? */
8424   if (!frame_pointer_needed
8425       || !(frame.to_allocate + frame.padding0)
8426       || crtl->stack_realign_needed)
8427     ix86_emit_save_sse_regs_using_mov (stack_pointer_rtx,
8428                                        frame.to_allocate);
8429   else
8430     ix86_emit_save_sse_regs_using_mov (hard_frame_pointer_rtx,
8431                                        - frame.nregs * UNITS_PER_WORD
8432                                        - frame.nsseregs * 16
8433                                        - frame.padding05);
8434 
8435   pic_reg_used = false;
8436   if (pic_offset_table_rtx
8437       && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
8438           || crtl->profile))
8439     {
8440       unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
8441 
8442       if (alt_pic_reg_used != INVALID_REGNUM)
8443         SET_REGNO (pic_offset_table_rtx, alt_pic_reg_used);
8444 
8445       pic_reg_used = true;
8446     }
8447 
8448   if (pic_reg_used)
8449     {
8450       if (TARGET_64BIT)
8451         {
8452           if (ix86_cmodel == CM_LARGE_PIC)
8453             {
8454               rtx tmp_reg = gen_rtx_REG (DImode, R11_REG);
8455               rtx label = gen_label_rtx ();
8456               emit_label (label);
8457               LABEL_PRESERVE_P (label) = 1;
8458               gcc_assert (REGNO (pic_offset_table_rtx) != REGNO (tmp_reg));
8459               insn = emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx, label));
8460               insn = emit_insn (gen_set_got_offset_rex64 (tmp_reg, label));
8461               insn = emit_insn (gen_adddi3 (pic_offset_table_rtx,
8462                                             pic_offset_table_rtx, tmp_reg));
8463             }
8464           else
8465             insn = emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
8466         }
8467       else
8468         insn = emit_insn (gen_set_got (pic_offset_table_rtx));
8469     }
8470 
8471   /* In the pic_reg_used case, make sure that the got load isn't deleted
8472      when mcount needs it.  Blockage to avoid call movement across mcount
8473      call is emitted in generic code after the NOTE_INSN_PROLOGUE_END
8474      note.  */
8475   if (crtl->profile && pic_reg_used)
8476     emit_insn (gen_prologue_use (pic_offset_table_rtx));
8477 
8478   if (crtl->drap_reg && !crtl->stack_realign_needed)
8479     {
8480       /* vDRAP is setup but after reload it turns out stack realign
8481          isn't necessary, here we will emit prologue to setup DRAP
8482          without stack realign adjustment */
8483       int drap_bp_offset = UNITS_PER_WORD * 2;
8484       rtx x = plus_constant (hard_frame_pointer_rtx, drap_bp_offset);
8485       insn = emit_insn (gen_rtx_SET (VOIDmode, crtl->drap_reg, x));
8486     }
8487 
8488   /* Prevent instructions from being scheduled into register save push
8489      sequence when access to the redzone area is done through frame pointer.
8490      The offset betweeh the frame pointer and the stack pointer is calculated
8491      relative to the value of the stack pointer at the end of the function
8492      prologue, and moving instructions that access redzone area via frame
8493      pointer inside push sequence violates this assumption.  */
8494   if (frame_pointer_needed && frame.red_zone_size)
8495     emit_insn (gen_memory_blockage ());
8496 
8497   /* Emit cld instruction if stringops are used in the function.  */
8498   if (TARGET_CLD && ix86_current_function_needs_cld)
8499     emit_insn (gen_cld ());
8500 }
8501 
8502 /* Emit code to restore saved registers using MOV insns.  First register
8503    is restored from POINTER + OFFSET.  */
8504 static void
8505 ix86_emit_restore_regs_using_mov (rtx pointer, HOST_WIDE_INT offset,
8506                                   int maybe_eh_return)
8507 {
8508   int regno;
8509   rtx base_address = gen_rtx_MEM (Pmode, pointer);
8510 
8511   for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
8512     if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
8513       {
8514         /* Ensure that adjust_address won't be forced to produce pointer
8515            out of range allowed by x86-64 instruction set.  */
8516         if (TARGET_64BIT && offset != trunc_int_for_mode (offset, SImode))
8517           {
8518             rtx r11;
8519 
8520             r11 = gen_rtx_REG (DImode, R11_REG);
8521             emit_move_insn (r11, GEN_INT (offset));
8522             emit_insn (gen_adddi3 (r11, r11, pointer));
8523             base_address = gen_rtx_MEM (Pmode, r11);
8524             offset = 0;
8525           }
8526         emit_move_insn (gen_rtx_REG (Pmode, regno),
8527                         adjust_address (base_address, Pmode, offset));
8528         offset += UNITS_PER_WORD;
8529       }
8530 }
8531 
8532 /* Emit code to restore saved registers using MOV insns.  First register
8533    is restored from POINTER + OFFSET.  */
8534 static void
8535 ix86_emit_restore_sse_regs_using_mov (rtx pointer, HOST_WIDE_INT offset,
8536                                       int maybe_eh_return)
8537 {
8538   int regno;
8539   rtx base_address = gen_rtx_MEM (TImode, pointer);
8540   rtx mem;
8541 
8542   for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
8543     if (SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
8544       {
8545         /* Ensure that adjust_address won't be forced to produce pointer
8546            out of range allowed by x86-64 instruction set.  */
8547         if (TARGET_64BIT && offset != trunc_int_for_mode (offset, SImode))
8548           {
8549             rtx r11;
8550 
8551             r11 = gen_rtx_REG (DImode, R11_REG);
8552             emit_move_insn (r11, GEN_INT (offset));
8553             emit_insn (gen_adddi3 (r11, r11, pointer));
8554             base_address = gen_rtx_MEM (TImode, r11);
8555             offset = 0;
8556           }
8557         mem = adjust_address (base_address, TImode, offset);
8558         set_mem_align (mem, 128);
8559         emit_move_insn (gen_rtx_REG (TImode, regno), mem);
8560         offset += 16;
8561       }
8562 }
8563 
8564 /* Restore function stack, frame, and registers.  */
8565 
8566 void
8567 ix86_expand_epilogue (int style)
8568 {
8569   int regno;
8570   int sp_valid;
8571   struct ix86_frame frame;
8572   HOST_WIDE_INT offset;
8573 
8574   ix86_finalize_stack_realign_flags ();
8575 
8576  /* When stack is realigned, SP must be valid.  */
8577   sp_valid = (!frame_pointer_needed
8578               || current_function_sp_is_unchanging
8579               || stack_realign_fp);
8580 
8581   ix86_compute_frame_layout (&frame);
8582 
8583   /* See the comment about red zone and frame
8584      pointer usage in ix86_expand_prologue.  */
8585   if (frame_pointer_needed && frame.red_zone_size)
8586     emit_insn (gen_memory_blockage ()); 
8587 
8588   /* Calculate start of saved registers relative to ebp.  Special care
8589      must be taken for the normal return case of a function using
8590      eh_return: the eax and edx registers are marked as saved, but not
8591      restored along this path.  */
8592   offset = frame.nregs + frame.nmsave_args;
8593   if (crtl->calls_eh_return && style != 2)
8594     offset -= 2;
8595   offset *= -UNITS_PER_WORD;
8596   offset -= frame.nsseregs * 16 + frame.padding05 + frame.padding0;
8597 
8598   /* If we're only restoring one register and sp is not valid then
8599      using a move instruction to restore the register since it's
8600      less work than reloading sp and popping the register.
8601 
8602      The default code result in stack adjustment using add/lea instruction,
8603      while this code results in LEAVE instruction (or discrete equivalent),
8604      so it is profitable in some other cases as well.  Especially when there
8605      are no registers to restore.  We also use this code when TARGET_USE_LEAVE
8606      and there is exactly one register to pop. This heuristic may need some
8607      tuning in future.  */
8608   if ((!sp_valid && (frame.nregs + frame.nsseregs) <= 1)
8609       || (TARGET_EPILOGUE_USING_MOVE
8610           && cfun->machine->use_fast_prologue_epilogue
8611           && ((frame.nregs + frame.nsseregs) > 1
8612               || (frame.to_allocate + frame.padding0) != 0))
8613       || (frame_pointer_needed && !(frame.nregs + frame.nsseregs)
8614           && (frame.to_allocate + frame.padding0) != 0)
8615       || (frame_pointer_needed && TARGET_USE_LEAVE
8616           && cfun->machine->use_fast_prologue_epilogue
8617           && (frame.nregs + frame.nsseregs) == 1)
8618       || crtl->calls_eh_return)
8619     {
8620       /* Restore registers.  We can use ebp or esp to address the memory
8621          locations.  If both are available, default to ebp, since offsets
8622          are known to be small.  Only exception is esp pointing directly
8623          to the end of block of saved registers, where we may simplify
8624          addressing mode.
8625 
8626          If we are realigning stack with bp and sp, regs restore can't
8627          be addressed by bp. sp must be used instead.  */
8628 
8629       if (!frame_pointer_needed
8630           || (sp_valid && !(frame.to_allocate + frame.padding0))
8631           || stack_realign_fp)
8632         {
8633           ix86_emit_restore_sse_regs_using_mov (stack_pointer_rtx,
8634                                                 frame.to_allocate, style == 2);
8635           ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
8636                                             frame.to_allocate
8637                                             + frame.nsseregs * 16
8638                                             + frame.padding05, style == 2);
8639         }
8640       else
8641         {
8642           ix86_emit_restore_sse_regs_using_mov (hard_frame_pointer_rtx,
8643                                                 offset, style == 2);
8644           ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
8645                                             offset
8646                                             + frame.nsseregs * 16
8647                                             + frame.padding05, style == 2);
8648         }
8649 
8650       /* eh_return epilogues need %ecx added to the stack pointer.  */
8651       if (style == 2)
8652         {
8653           rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
8654 
8655           /* Stack align doesn't work with eh_return.  */
8656           gcc_assert (!crtl->stack_realign_needed);
8657 
8658           if (frame_pointer_needed)
8659             {
8660               tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
8661               tmp = plus_constant (tmp, UNITS_PER_WORD);
8662               emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
8663 
8664               tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
8665               emit_move_insn (hard_frame_pointer_rtx, tmp);
8666 
8667               pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
8668                                          const0_rtx, style);
8669             }
8670           else
8671             {
8672               tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
8673               tmp = plus_constant (tmp, (frame.to_allocate
8674                                          + (frame.nregs + frame.nmsave_args)
8675                                            * UNITS_PER_WORD
8676                                          + frame.nsseregs * 16
8677                                          + frame.padding05 + frame.padding0));
8678               emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
8679             }
8680         }
8681       else if (!frame_pointer_needed)
8682         pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
8683                                    GEN_INT (frame.to_allocate
8684                                             + (frame.nregs + frame.nmsave_args)
8685                                               * UNITS_PER_WORD
8686                                             + frame.nsseregs * 16
8687                                             + frame.padding05 + frame.padding0),
8688                                    style);
8689       /* If not an i386, mov & pop is faster than "leave".  */
8690       else if (TARGET_USE_LEAVE || optimize_function_for_size_p (cfun)
8691                || !cfun->machine->use_fast_prologue_epilogue)
8692         emit_insn ((*ix86_gen_leave) ());
8693       else
8694         {
8695           pro_epilogue_adjust_stack (stack_pointer_rtx,
8696                                      hard_frame_pointer_rtx,
8697                                      const0_rtx, style);
8698 
8699           emit_insn ((*ix86_gen_pop1) (hard_frame_pointer_rtx));
8700         }
8701     }
8702   else
8703     {
8704       /* First step is to deallocate the stack frame so that we can
8705          pop the registers.
8706 
8707          If we realign stack with frame pointer, then stack pointer
8708          won't be able to recover via lea $offset(%bp), %sp, because
8709          there is a padding area between bp and sp for realign. 
8710          "add $to_allocate, %sp" must be used instead.  */
8711       if (!sp_valid)
8712         {
8713           gcc_assert (frame_pointer_needed);
8714           gcc_assert (!stack_realign_fp);
8715           pro_epilogue_adjust_stack (stack_pointer_rtx,
8716                                      hard_frame_pointer_rtx,
8717                                      GEN_INT (offset), style);
8718           ix86_emit_restore_sse_regs_using_mov (stack_pointer_rtx,
8719                                                 0, style == 2);
8720           pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
8721                                      GEN_INT (frame.nsseregs * 16 +
8722                                        frame.padding0), style);
8723         }
8724       else if (frame.to_allocate || frame.padding0 || frame.nsseregs)
8725         {
8726           ix86_emit_restore_sse_regs_using_mov (stack_pointer_rtx,
8727                                                 frame.to_allocate,
8728                                                 style == 2);
8729           pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
8730                                      GEN_INT (frame.to_allocate
8731                                               + frame.nsseregs * 16
8732                                               + frame.padding05), style);
8733         }
8734 
8735       for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
8736         if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, false))
8737           emit_insn ((*ix86_gen_pop1) (gen_rtx_REG (Pmode, regno)));
8738 
8739       /* XXX: Needs adjustment for SSE regs? */
8740       pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
8741                                  GEN_INT (frame.nmsave_args * UNITS_PER_WORD
8742                                           + frame.padding0), style);
8743       if (frame_pointer_needed)
8744         {
8745           /* Leave results in shorter dependency chains on CPUs that are
8746              able to grok it fast.  */
8747           if (TARGET_USE_LEAVE)
8748             emit_insn ((*ix86_gen_leave) ());
8749           else
8750             {
8751               /* For stack realigned really happens, recover stack 
8752                  pointer to hard frame pointer is a must, if not using 
8753                  leave.  */
8754               if (stack_realign_fp)
8755                 pro_epilogue_adjust_stack (stack_pointer_rtx,
8756                                            hard_frame_pointer_rtx,
8757                                            const0_rtx, style);
8758               emit_insn ((*ix86_gen_pop1) (hard_frame_pointer_rtx));
8759             }
8760         }
8761     }
8762 
8763   if (crtl->drap_reg && crtl->stack_realign_needed)
8764     {
8765       int param_ptr_offset = (call_used_regs[REGNO (crtl->drap_reg)]
8766                               ? 0 : UNITS_PER_WORD);
8767       gcc_assert (stack_realign_drap);
8768       emit_insn ((*ix86_gen_add3) (stack_pointer_rtx,
8769                                    crtl->drap_reg,
8770                                    GEN_INT (-(UNITS_PER_WORD
8771                                               + param_ptr_offset))));
8772       if (!call_used_regs[REGNO (crtl->drap_reg)])
8773         emit_insn ((*ix86_gen_pop1) (crtl->drap_reg));
8774       
8775     }
8776 
8777   /* Sibcall epilogues don't want a return instruction.  */
8778   if (style == 0)
8779     return;
8780 
8781   if (crtl->args.pops_args && crtl->args.size)
8782     {
8783       rtx popc = GEN_INT (crtl->args.pops_args);
8784 
8785       /* i386 can only pop 64K bytes.  If asked to pop more, pop
8786          return address, do explicit add, and jump indirectly to the
8787          caller.  */
8788 
8789       if (crtl->args.pops_args >= 65536)
8790         {
8791           rtx ecx = gen_rtx_REG (SImode, CX_REG);
8792 
8793           /* There is no "pascal" calling convention in any 64bit ABI.  */
8794           gcc_assert (!TARGET_64BIT);
8795 
8796           emit_insn (gen_popsi1 (ecx));
8797           emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
8798           emit_jump_insn (gen_return_indirect_internal (ecx));
8799         }
8800       else
8801         emit_jump_insn (gen_return_pop_internal (popc));
8802     }
8803   else
8804     emit_jump_insn (gen_return_internal ());
8805 }
8806 
8807 /* Reset from the function's potential modifications.  */
8808 
8809 static void
8810 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
8811                                HOST_WIDE_INT size ATTRIBUTE_UNUSED)
8812 {
8813   if (pic_offset_table_rtx)
8814     SET_REGNO (pic_offset_table_rtx, REAL_PIC_OFFSET_TABLE_REGNUM);
8815 #if TARGET_MACHO
8816   /* Mach-O doesn't support labels at the end of objects, so if
8817      it looks like we might want one, insert a NOP.  */
8818   {
8819     rtx insn = get_last_insn ();
8820     while (insn
8821            && NOTE_P (insn)
8822            && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL)
8823       insn = PREV_INSN (insn);
8824     if (insn
8825         && (LABEL_P (insn)
8826             || (NOTE_P (insn)
8827                 && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL)))
8828       fputs ("\tnop\n", file);
8829   }
8830 #endif
8831 
8832 }
8833 
8834 /* Extract the parts of an RTL expression that is a valid memory address
8835    for an instruction.  Return 0 if the structure of the address is
8836    grossly off.  Return -1 if the address contains ASHIFT, so it is not
8837    strictly valid, but still used for computing length of lea instruction.  */
8838 
8839 int
8840 ix86_decompose_address (rtx addr, struct ix86_address *out)
8841 {
8842   rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
8843   rtx base_reg, index_reg;
8844   HOST_WIDE_INT scale = 1;
8845   rtx scale_rtx = NULL_RTX;
8846   int retval = 1;
8847   enum ix86_address_seg seg = SEG_DEFAULT;
8848 
8849   if (REG_P (addr) || GET_CODE (addr) == SUBREG)
8850     base = addr;
8851   else if (GET_CODE (addr) == PLUS)
8852     {
8853       rtx addends[4], op;
8854       int n = 0, i;
8855 
8856       op = addr;
8857       do
8858         {
8859           if (n >= 4)
8860             return 0;
8861           addends[n++] = XEXP (op, 1);
8862           op = XEXP (op, 0);
8863         }
8864       while (GET_CODE (op) == PLUS);
8865       if (n >= 4)
8866         return 0;
8867       addends[n] = op;
8868 
8869       for (i = n; i >= 0; --i)
8870         {
8871           op = addends[i];
8872           switch (GET_CODE (op))
8873             {
8874             case MULT:
8875               if (index)
8876                 return 0;
8877               index = XEXP (op, 0);
8878               scale_rtx = XEXP (op, 1);
8879               break;
8880 
8881             case UNSPEC:
8882               if (XINT (op, 1) == UNSPEC_TP
8883                   && TARGET_TLS_DIRECT_SEG_REFS
8884                   && seg == SEG_DEFAULT)
8885                 seg = TARGET_64BIT ? SEG_FS : SEG_GS;
8886               else
8887                 return 0;
8888               break;
8889 
8890             case REG:
8891             case SUBREG:
8892               if (!base)
8893                 base = op;
8894               else if (!index)
8895                 index = op;
8896               else
8897                 return 0;
8898               break;
8899 
8900             case CONST:
8901             case CONST_INT:
8902             case SYMBOL_REF:
8903             case LABEL_REF:
8904               if (disp)
8905                 return 0;
8906               disp = op;
8907               break;
8908 
8909             default:
8910               return 0;
8911             }
8912         }
8913     }
8914   else if (GET_CODE (addr) == MULT)
8915     {
8916       index = XEXP (addr, 0);           /* index*scale */
8917       scale_rtx = XEXP (addr, 1);
8918     }
8919   else if (GET_CODE (addr) == ASHIFT)
8920     {
8921       rtx tmp;
8922 
8923       /* We're called for lea too, which implements ashift on occasion.  */
8924       index = XEXP (addr, 0);
8925       tmp = XEXP (addr, 1);
8926       if (!CONST_INT_P (tmp))
8927         return 0;
8928       scale = INTVAL (tmp);
8929       if ((unsigned HOST_WIDE_INT) scale > 3)
8930         return 0;
8931       scale = 1 << scale;
8932       retval = -1;
8933     }
8934   else
8935     disp = addr;                        /* displacement */
8936 
8937   /* Extract the integral value of scale.  */
8938   if (scale_rtx)
8939     {
8940       if (!CONST_INT_P (scale_rtx))
8941         return 0;
8942       scale = INTVAL (scale_rtx);
8943     }
8944 
8945   base_reg = base && GET_CODE (base) == SUBREG ? SUBREG_REG (base) : base;
8946   index_reg = index && GET_CODE (index) == SUBREG ? SUBREG_REG (index) : index;
8947 
8948   /* Allow arg pointer and stack pointer as index if there is not scaling.  */
8949   if (base_reg && index_reg && scale == 1
8950       && (index_reg == arg_pointer_rtx
8951           || index_reg == frame_pointer_rtx
8952           || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM)))
8953     {
8954       rtx tmp;
8955       tmp = base, base = index, index = tmp;
8956       tmp = base_reg, base_reg = index_reg, index_reg = tmp;
8957     }
8958 
8959   /* Special case: %ebp cannot be encoded as a base without a displacement.  */
8960   if ((base_reg == hard_frame_pointer_rtx
8961        || base_reg == frame_pointer_rtx
8962        || base_reg == arg_pointer_rtx) && !disp)
8963     disp = const0_rtx;
8964 
8965   /* Special case: on K6, [%esi] makes the instruction vector decoded.
8966      Avoid this by transforming to [%esi+0].
8967      Reload calls address legitimization without cfun defined, so we need
8968      to test cfun for being non-NULL. */
8969   if (TARGET_K6 && cfun && optimize_function_for_speed_p (cfun)
8970       && base_reg && !index_reg && !disp
8971       && REG_P (base_reg)
8972       && REGNO_REG_CLASS (REGNO (base_reg)) == SIREG)
8973     disp = const0_rtx;
8974 
8975   /* Special case: encode reg+reg instead of reg*2.  */
8976   if (!base && index && scale && scale == 2)
8977     base = index, base_reg = index_reg, scale = 1;
8978 
8979   /* Special case: scaling cannot be encoded without base or displacement.  */
8980   if (!base && !disp && index && scale != 1)
8981     disp = const0_rtx;
8982 
8983   out->base = base;
8984   out->index = index;
8985   out->disp = disp;
8986   out->scale = scale;
8987   out->seg = seg;
8988 
8989   return retval;
8990 }
8991 
8992 /* Return cost of the memory address x.
8993    For i386, it is better to use a complex address than let gcc copy
8994    the address into a reg and make a new pseudo.  But not if the address
8995    requires to two regs - that would mean more pseudos with longer
8996    lifetimes.  */
8997 static int
8998 ix86_address_cost (rtx x, bool speed ATTRIBUTE_UNUSED)
8999 {
9000   struct ix86_address parts;
9001   int cost = 1;
9002   int ok = ix86_decompose_address (x, &parts);
9003 
9004   gcc_assert (ok);
9005 
9006   if (parts.base && GET_CODE (parts.base) == SUBREG)
9007     parts.base = SUBREG_REG (parts.base);
9008   if (parts.index && GET_CODE (parts.index) == SUBREG)
9009     parts.index = SUBREG_REG (parts.index);
9010 
9011   /* Attempt to minimize number of registers in the address.  */
9012   if ((parts.base
9013        && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
9014       || (parts.index
9015           && (!REG_P (parts.index)
9016               || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
9017     cost++;
9018 
9019   if (parts.base
9020       && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
9021       && parts.index
9022       && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
9023       && parts.base != parts.index)
9024     cost++;
9025 
9026   /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
9027      since it's predecode logic can't detect the length of instructions
9028      and it degenerates to vector decoded.  Increase cost of such
9029      addresses here.  The penalty is minimally 2 cycles.  It may be worthwhile
9030      to split such addresses or even refuse such addresses at all.
9031 
9032      Following addressing modes are affected:
9033       [base+scale*index]
9034       [scale*index+disp]
9035       [base+index]
9036 
9037      The first and last case  may be avoidable by explicitly coding the zero in
9038      memory address, but I don't have AMD-K6 machine handy to check this
9039      theory.  */
9040 
9041   if (TARGET_K6
9042       && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
9043           || (parts.disp && !parts.base && parts.index && parts.scale != 1)
9044           || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
9045     cost += 10;
9046 
9047   return cost;
9048 }
9049 
9050 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
9051    this is used for to form addresses to local data when -fPIC is in
9052    use.  */
9053 
9054 static bool
9055 darwin_local_data_pic (rtx disp)
9056 {
9057   return (GET_CODE (disp) == UNSPEC
9058           && XINT (disp, 1) == UNSPEC_MACHOPIC_OFFSET);
9059 }
9060 
9061 /* Determine if a given RTX is a valid constant.  We already know this
9062    satisfies CONSTANT_P.  */
9063 
9064 bool
9065 legitimate_constant_p (rtx x)
9066 {
9067   switch (GET_CODE (x))
9068     {
9069     case CONST:
9070       x = XEXP (x, 0);
9071 
9072       if (GET_CODE (x) == PLUS)
9073         {
9074           if (!CONST_INT_P (XEXP (x, 1)))
9075             return false;
9076           x = XEXP (x, 0);
9077         }
9078 
9079       if (TARGET_MACHO && darwin_local_data_pic (x))
9080         return true;
9081 
9082       /* Only some unspecs are valid as "constants".  */
9083       if (GET_CODE (x) == UNSPEC)
9084         switch (XINT (x, 1))
9085           {
9086           case UNSPEC_GOT:
9087           case UNSPEC_GOTOFF:
9088           case UNSPEC_PLTOFF:
9089             return TARGET_64BIT;
9090           case UNSPEC_TPOFF:
9091           case UNSPEC_NTPOFF:
9092             x = XVECEXP (x, 0, 0);
9093             return (GET_CODE (x) == SYMBOL_REF
9094                     && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
9095           case UNSPEC_DTPOFF:
9096             x = XVECEXP (x, 0, 0);
9097             return (GET_CODE (x) == SYMBOL_REF
9098                     && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
9099           default:
9100             return false;
9101           }
9102 
9103       /* We must have drilled down to a symbol.  */
9104       if (GET_CODE (x) == LABEL_REF)
9105         return true;
9106       if (GET_CODE (x) != SYMBOL_REF)
9107         return false;
9108       /* FALLTHRU */
9109 
9110     case SYMBOL_REF:
9111       /* TLS symbols are never valid.  */
9112       if (SYMBOL_REF_TLS_MODEL (x))
9113         return false;
9114 
9115       /* DLLIMPORT symbols are never valid.  */
9116       if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
9117           && SYMBOL_REF_DLLIMPORT_P (x))
9118         return false;
9119       break;
9120 
9121     case CONST_DOUBLE:
9122       if (GET_MODE (x) == TImode
9123           && x != CONST0_RTX (TImode)
9124           && !TARGET_64BIT)
9125         return false;
9126       break;
9127 
9128     case CONST_VECTOR:
9129       if (x == CONST0_RTX (GET_MODE (x)))
9130         return true;
9131       return false;
9132 
9133     default:
9134       break;
9135     }
9136 
9137   /* Otherwise we handle everything else in the move patterns.  */
9138   return true;
9139 }
9140 
9141 /* Determine if it's legal to put X into the constant pool.  This
9142    is not possible for the address of thread-local symbols, which
9143    is checked above.  */
9144 
9145 static bool
9146 ix86_cannot_force_const_mem (rtx x)
9147 {
9148   /* We can always put integral constants and vectors in memory.  */
9149   switch (GET_CODE (x))
9150     {
9151     case CONST_INT:
9152     case CONST_DOUBLE:
9153     case CONST_VECTOR:
9154       return false;
9155 
9156     default:
9157       break;
9158     }
9159   return !legitimate_constant_p (x);
9160 }
9161 
9162 /* Determine if a given RTX is a valid constant address.  */
9163 
9164 bool
9165 constant_address_p (rtx x)
9166 {
9167   return CONSTANT_P (x) && legitimate_address_p (Pmode, x, 1);
9168 }
9169 
9170 /* Return number of arguments to be saved on the stack with
9171    -msave-args.  */
9172 
9173 static int
9174 ix86_nsaved_args (void)
9175 {
9176   if (TARGET_SAVE_ARGS)
9177     return crtl->args.info.regno - cfun->returns_struct;
9178   else
9179     return 0;
9180 }
9181 
9182 /* Nonzero if the constant value X is a legitimate general operand
9183    when generating PIC code.  It is given that flag_pic is on and
9184    that X satisfies CONSTANT_P or is a CONST_DOUBLE.  */
9185 bool
9186 legitimate_pic_operand_p (rtx x)
9187 {
9188   rtx inner;
9189 
9190   switch (GET_CODE (x))
9191     {
9192     case CONST:
9193       inner = XEXP (x, 0);
9194       if (GET_CODE (inner) == PLUS
9195           && CONST_INT_P (XEXP (inner, 1)))
9196         inner = XEXP (inner, 0);
9197 
9198       /* Only some unspecs are valid as "constants".  */
9199       if (GET_CODE (inner) == UNSPEC)
9200         switch (XINT (inner, 1))
9201           {
9202           case UNSPEC_GOT:
9203           case UNSPEC_GOTOFF:
9204           case UNSPEC_PLTOFF:
9205             return TARGET_64BIT;
9206           case UNSPEC_TPOFF:
9207             x = XVECEXP (inner, 0, 0);
9208             return (GET_CODE (x) == SYMBOL_REF
9209                     && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
9210           case UNSPEC_MACHOPIC_OFFSET:
9211             return legitimate_pic_address_disp_p (x);
9212           default:
9213             return false;
9214           }
9215       /* FALLTHRU */
9216 
9217     case SYMBOL_REF:
9218     case LABEL_REF:
9219       return legitimate_pic_address_disp_p (x);
9220 
9221     default:
9222       return true;
9223     }
9224 }
9225 
9226 /* Determine if a given CONST RTX is a valid memory displacement
9227    in PIC mode.  */
9228 
9229 int
9230 legitimate_pic_address_disp_p (rtx disp)
9231 {
9232   bool saw_plus;
9233 
9234   /* In 64bit mode we can allow direct addresses of symbols and labels
9235      when they are not dynamic symbols.  */
9236   if (TARGET_64BIT)
9237     {
9238       rtx op0 = disp, op1;
9239 
9240       switch (GET_CODE (disp))
9241         {
9242         case LABEL_REF:
9243           return true;
9244 
9245         case CONST:
9246           if (GET_CODE (XEXP (disp, 0)) != PLUS)
9247             break;
9248           op0 = XEXP (XEXP (disp, 0), 0);
9249           op1 = XEXP (XEXP (disp, 0), 1);
9250           if (!CONST_INT_P (op1)
9251               || INTVAL (op1) >= 16*1024*1024
9252               || INTVAL (op1) < -16*1024*1024)
9253             break;
9254           if (GET_CODE (op0) == LABEL_REF)
9255             return true;
9256           if (GET_CODE (op0) != SYMBOL_REF)
9257             break;
9258           /* FALLTHRU */
9259 
9260         case SYMBOL_REF:
9261           /* TLS references should always be enclosed in UNSPEC.  */
9262           if (SYMBOL_REF_TLS_MODEL (op0))
9263             return false;
9264           if (!SYMBOL_REF_FAR_ADDR_P (op0) && SYMBOL_REF_LOCAL_P (op0)
9265               && ix86_cmodel != CM_LARGE_PIC)
9266             return true;
9267           break;
9268 
9269         default:
9270           break;
9271         }
9272     }
9273   if (GET_CODE (disp) != CONST)
9274     return 0;
9275   disp = XEXP (disp, 0);
9276 
9277   if (TARGET_64BIT)
9278     {
9279       /* We are unsafe to allow PLUS expressions.  This limit allowed distance
9280          of GOT tables.  We should not need these anyway.  */
9281       if (GET_CODE (disp) != UNSPEC
9282           || (XINT (disp, 1) != UNSPEC_GOTPCREL
9283               && XINT (disp, 1) != UNSPEC_GOTOFF
9284               && XINT (disp, 1) != UNSPEC_PLTOFF))
9285         return 0;
9286 
9287       if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
9288           && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
9289         return 0;
9290       return 1;
9291     }
9292 
9293   saw_plus = false;
9294   if (GET_CODE (disp) == PLUS)
9295     {
9296       if (!CONST_INT_P (XEXP (disp, 1)))
9297         return 0;
9298       disp = XEXP (disp, 0);
9299       saw_plus = true;
9300     }
9301 
9302   if (TARGET_MACHO && darwin_local_data_pic (disp))
9303     return 1;
9304 
9305   if (GET_CODE (disp) != UNSPEC)
9306     return 0;
9307 
9308   switch (XINT (disp, 1))
9309     {
9310     case UNSPEC_GOT:
9311       if (saw_plus)
9312         return false;
9313       /* We need to check for both symbols and labels because VxWorks loads
9314          text labels with @GOT rather than @GOTOFF.  See gotoff_operand for
9315          details.  */
9316       return (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
9317               || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF);
9318     case UNSPEC_GOTOFF:
9319       /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
9320          While ABI specify also 32bit relocation but we don't produce it in
9321          small PIC model at all.  */
9322       if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
9323            || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
9324           && !TARGET_64BIT)
9325         return gotoff_operand (XVECEXP (disp, 0, 0), Pmode);
9326       return false;
9327     case UNSPEC_GOTTPOFF:
9328     case UNSPEC_GOTNTPOFF:
9329     case UNSPEC_INDNTPOFF:
9330       if (saw_plus)
9331         return false;
9332       disp = XVECEXP (disp, 0, 0);
9333       return (GET_CODE (disp) == SYMBOL_REF
9334               && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
9335     case UNSPEC_NTPOFF:
9336       disp = XVECEXP (disp, 0, 0);
9337       return (GET_CODE (disp) == SYMBOL_REF
9338               && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
9339     case UNSPEC_DTPOFF:
9340       disp = XVECEXP (disp, 0, 0);
9341       return (GET_CODE (disp) == SYMBOL_REF
9342               && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
9343     }
9344 
9345   return 0;
9346 }
9347 
9348 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
9349    memory address for an instruction.  The MODE argument is the machine mode
9350    for the MEM expression that wants to use this address.
9351 
9352    It only recognizes address in canonical form.  LEGITIMIZE_ADDRESS should
9353    convert common non-canonical forms to canonical form so that they will
9354    be recognized.  */
9355 
9356 int
9357 legitimate_address_p (enum machine_mode mode ATTRIBUTE_UNUSED,
9358                       rtx addr, int strict)
9359 {
9360   struct ix86_address parts;
9361   rtx base, index, disp;
9362   HOST_WIDE_INT scale;
9363   const char *reason = NULL;
9364   rtx reason_rtx = NULL_RTX;
9365 
9366   if (ix86_decompose_address (addr, &parts) <= 0)
9367     {
9368       reason = "decomposition failed";
9369       goto report_error;
9370     }
9371 
9372   base = parts.base;
9373   index = parts.index;
9374   disp = parts.disp;
9375   scale = parts.scale;
9376 
9377   /* Validate base register.
9378 
9379      Don't allow SUBREG's that span more than a word here.  It can lead to spill
9380      failures when the base is one word out of a two word structure, which is
9381      represented internally as a DImode int.  */
9382 
9383   if (base)
9384     {
9385       rtx reg;
9386       reason_rtx = base;
9387 
9388       if (REG_P (base))
9389         reg = base;
9390       else if (GET_CODE (base) == SUBREG
9391                && REG_P (SUBREG_REG (base))
9392                && GET_MODE_SIZE (GET_MODE (SUBREG_REG (base)))
9393                   <= UNITS_PER_WORD)
9394         reg = SUBREG_REG (base);
9395       else
9396         {
9397           reason = "base is not a register";
9398           goto report_error;
9399         }
9400 
9401       if (GET_MODE (base) != Pmode)
9402         {
9403           reason = "base is not in Pmode";
9404           goto report_error;
9405         }
9406 
9407       if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
9408           || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
9409         {
9410           reason = "base is not valid";
9411           goto report_error;
9412         }
9413     }
9414 
9415   /* Validate index register.
9416 
9417      Don't allow SUBREG's that span more than a word here -- same as above.  */
9418 
9419   if (index)
9420     {
9421       rtx reg;
9422       reason_rtx = index;
9423 
9424       if (REG_P (index))
9425         reg = index;
9426       else if (GET_CODE (index) == SUBREG
9427                && REG_P (SUBREG_REG (index))
9428                && GET_MODE_SIZE (GET_MODE (SUBREG_REG (index)))
9429                   <= UNITS_PER_WORD)
9430         reg = SUBREG_REG (index);
9431       else
9432         {
9433           reason = "index is not a register";
9434           goto report_error;
9435         }
9436 
9437       if (GET_MODE (index) != Pmode)
9438         {
9439           reason = "index is not in Pmode";
9440           goto report_error;
9441         }
9442 
9443       if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
9444           || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
9445         {
9446           reason = "index is not valid";
9447           goto report_error;
9448         }
9449     }
9450 
9451   /* Validate scale factor.  */
9452   if (scale != 1)
9453     {
9454       reason_rtx = GEN_INT (scale);
9455       if (!index)
9456         {
9457           reason = "scale without index";
9458           goto report_error;
9459         }
9460 
9461       if (scale != 2 && scale != 4 && scale != 8)
9462         {
9463           reason = "scale is not a valid multiplier";
9464           goto report_error;
9465         }
9466     }
9467 
9468   /* Validate displacement.  */
9469   if (disp)
9470     {
9471       reason_rtx = disp;
9472 
9473       if (GET_CODE (disp) == CONST
9474           && GET_CODE (XEXP (disp, 0)) == UNSPEC
9475           && XINT (XEXP (disp, 0), 1) != UNSPEC_MACHOPIC_OFFSET)
9476         switch (XINT (XEXP (disp, 0), 1))
9477           {
9478           /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
9479              used.  While ABI specify also 32bit relocations, we don't produce
9480              them at all and use IP relative instead.  */
9481           case UNSPEC_GOT:
9482           case UNSPEC_GOTOFF:
9483             gcc_assert (flag_pic);
9484             if (!TARGET_64BIT)
9485               goto is_legitimate_pic;
9486             reason = "64bit address unspec";
9487             goto report_error;
9488 
9489           case UNSPEC_GOTPCREL:
9490             gcc_assert (flag_pic);
9491             goto is_legitimate_pic;
9492 
9493           case UNSPEC_GOTTPOFF:
9494           case UNSPEC_GOTNTPOFF:
9495           case UNSPEC_INDNTPOFF:
9496           case UNSPEC_NTPOFF:
9497           case UNSPEC_DTPOFF:
9498             break;
9499 
9500           default:
9501             reason = "invalid address unspec";
9502             goto report_error;
9503           }
9504 
9505       else if (SYMBOLIC_CONST (disp)
9506                && (flag_pic
9507                    || (TARGET_MACHO
9508 #if TARGET_MACHO
9509                        && MACHOPIC_INDIRECT
9510                        && !machopic_operand_p (disp)
9511 #endif
9512                )))
9513         {
9514 
9515         is_legitimate_pic:
9516           if (TARGET_64BIT && (index || base))
9517             {
9518               /* foo@dtpoff(%rX) is ok.  */
9519               if (GET_CODE (disp) != CONST
9520                   || GET_CODE (XEXP (disp, 0)) != PLUS
9521                   || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
9522                   || !CONST_INT_P (XEXP (XEXP (disp, 0), 1))
9523                   || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
9524                       && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
9525                 {
9526                   reason = "non-constant pic memory reference";
9527                   goto report_error;
9528                 }
9529             }
9530           else if (! legitimate_pic_address_disp_p (disp))
9531             {
9532               reason = "displacement is an invalid pic construct";
9533               goto report_error;
9534             }
9535 
9536           /* This code used to verify that a symbolic pic displacement
9537              includes the pic_offset_table_rtx register.
9538 
9539              While this is good idea, unfortunately these constructs may
9540              be created by "adds using lea" optimization for incorrect
9541              code like:
9542 
9543              int a;
9544              int foo(int i)
9545                {
9546                  return *(&a+i);
9547                }
9548 
9549              This code is nonsensical, but results in addressing
9550              GOT table with pic_offset_table_rtx base.  We can't
9551              just refuse it easily, since it gets matched by
9552              "addsi3" pattern, that later gets split to lea in the
9553              case output register differs from input.  While this
9554              can be handled by separate addsi pattern for this case
9555              that never results in lea, this seems to be easier and
9556              correct fix for crash to disable this test.  */
9557         }
9558       else if (GET_CODE (disp) != LABEL_REF
9559                && !CONST_INT_P (disp)
9560                && (GET_CODE (disp) != CONST
9561                    || !legitimate_constant_p (disp))
9562                && (GET_CODE (disp) != SYMBOL_REF
9563                    || !legitimate_constant_p (disp)))
9564         {
9565           reason = "displacement is not constant";
9566           goto report_error;
9567         }
9568       else if (TARGET_64BIT
9569                && !x86_64_immediate_operand (disp, VOIDmode))
9570         {
9571           reason = "displacement is out of range";
9572           goto report_error;
9573         }
9574     }
9575 
9576   /* Everything looks valid.  */
9577   return TRUE;
9578 
9579  report_error:
9580   return FALSE;
9581 }
9582 
9583 /* Return a unique alias set for the GOT.  */
9584 
9585 static alias_set_type
9586 ix86_GOT_alias_set (void)
9587 {
9588   static alias_set_type set = -1;
9589   if (set == -1)
9590     set = new_alias_set ();
9591   return set;
9592 }
9593 
9594 /* Return a legitimate reference for ORIG (an address) using the
9595    register REG.  If REG is 0, a new pseudo is generated.
9596 
9597    There are two types of references that must be handled:
9598 
9599    1. Global data references must load the address from the GOT, via
9600       the PIC reg.  An insn is emitted to do this load, and the reg is
9601       returned.
9602 
9603    2. Static data references, constant pool addresses, and code labels
9604       compute the address as an offset from the GOT, whose base is in
9605       the PIC reg.  Static data objects have SYMBOL_FLAG_LOCAL set to
9606       differentiate them from global data objects.  The returned
9607       address is the PIC reg + an unspec constant.
9608 
9609    GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
9610    reg also appears in the address.  */
9611 
9612 static rtx
9613 legitimize_pic_address (rtx orig, rtx reg)
9614 {
9615   rtx addr = orig;
9616   rtx new_rtx = orig;
9617   rtx base;
9618 
9619 #if TARGET_MACHO
9620   if (TARGET_MACHO && !TARGET_64BIT)
9621     {
9622       if (reg == 0)
9623         reg = gen_reg_rtx (Pmode);
9624       /* Use the generic Mach-O PIC machinery.  */
9625       return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
9626     }
9627 #endif
9628 
9629   if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
9630     new_rtx = addr;
9631   else if (TARGET_64BIT
9632            && ix86_cmodel != CM_SMALL_PIC
9633            && gotoff_operand (addr, Pmode))
9634     {
9635       rtx tmpreg;
9636       /* This symbol may be referenced via a displacement from the PIC
9637          base address (@GOTOFF).  */
9638 
9639       if (reload_in_progress)
9640         df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
9641       if (GET_CODE (addr) == CONST)
9642         addr = XEXP (addr, 0);
9643       if (GET_CODE (addr) == PLUS)
9644           {
9645             new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
9646                                       UNSPEC_GOTOFF);
9647             new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
9648           }
9649         else
9650           new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
9651       new_rtx = gen_rtx_CONST (Pmode, new_rtx);
9652       if (!reg)
9653         tmpreg = gen_reg_rtx (Pmode);
9654       else
9655         tmpreg = reg;
9656       emit_move_insn (tmpreg, new_rtx);
9657 
9658       if (reg != 0)
9659         {
9660           new_rtx = expand_simple_binop (Pmode, PLUS, reg, pic_offset_table_rtx,
9661                                          tmpreg, 1, OPTAB_DIRECT);
9662           new_rtx = reg;
9663         }
9664       else new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, tmpreg);
9665     }
9666   else if (!TARGET_64BIT && gotoff_operand (addr, Pmode))
9667     {
9668       /* This symbol may be referenced via a displacement from the PIC
9669          base address (@GOTOFF).  */
9670 
9671       if (reload_in_progress)
9672         df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
9673       if (GET_CODE (addr) == CONST)
9674         addr = XEXP (addr, 0);
9675       if (GET_CODE (addr) == PLUS)
9676           {
9677             new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
9678                                       UNSPEC_GOTOFF);
9679             new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
9680           }
9681         else
9682           new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
9683       new_rtx = gen_rtx_CONST (Pmode, new_rtx);
9684       new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
9685 
9686       if (reg != 0)
9687         {
9688           emit_move_insn (reg, new_rtx);
9689           new_rtx = reg;
9690         }
9691     }
9692   else if ((GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0)
9693            /* We can't use @GOTOFF for text labels on VxWorks;
9694               see gotoff_operand.  */
9695            || (TARGET_VXWORKS_RTP && GET_CODE (addr) == LABEL_REF))
9696     {
9697       if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
9698         {
9699           if (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (addr))
9700             return legitimize_dllimport_symbol (addr, true);
9701           if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS
9702               && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
9703               && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (addr, 0), 0)))
9704             {
9705               rtx t = legitimize_dllimport_symbol (XEXP (XEXP (addr, 0), 0), true);
9706               return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
9707             }
9708         }
9709 
9710       if (TARGET_64BIT && ix86_cmodel != CM_LARGE_PIC)
9711         {
9712           new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
9713           new_rtx = gen_rtx_CONST (Pmode, new_rtx);
9714           new_rtx = gen_const_mem (Pmode, new_rtx);
9715           set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
9716 
9717           if (reg == 0)
9718             reg = gen_reg_rtx (Pmode);
9719           /* Use directly gen_movsi, otherwise the address is loaded
9720              into register for CSE.  We don't want to CSE this addresses,
9721              instead we CSE addresses from the GOT table, so skip this.  */
9722           emit_insn (gen_movsi (reg, new_rtx));
9723           new_rtx = reg;
9724         }
9725       else
9726         {
9727           /* This symbol must be referenced via a load from the
9728              Global Offset Table (@GOT).  */
9729 
9730           if (reload_in_progress)
9731             df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
9732           new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
9733           new_rtx = gen_rtx_CONST (Pmode, new_rtx);
9734           if (TARGET_64BIT)
9735             new_rtx = force_reg (Pmode, new_rtx);
9736           new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
9737           new_rtx = gen_const_mem (Pmode, new_rtx);
9738           set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
9739 
9740           if (reg == 0)
9741             reg = gen_reg_rtx (Pmode);
9742           emit_move_insn (reg, new_rtx);
9743           new_rtx = reg;
9744         }
9745     }
9746   else
9747     {
9748       if (CONST_INT_P (addr)
9749           && !x86_64_immediate_operand (addr, VOIDmode))
9750         {
9751           if (reg)
9752             {
9753               emit_move_insn (reg, addr);
9754               new_rtx = reg;
9755             }
9756           else
9757             new_rtx = force_reg (Pmode, addr);
9758         }
9759       else if (GET_CODE (addr) == CONST)
9760         {
9761           addr = XEXP (addr, 0);
9762 
9763           /* We must match stuff we generate before.  Assume the only
9764              unspecs that can get here are ours.  Not that we could do
9765              anything with them anyway....  */
9766           if (GET_CODE (addr) == UNSPEC
9767               || (GET_CODE (addr) == PLUS
9768                   && GET_CODE (XEXP (addr, 0)) == UNSPEC))
9769             return orig;
9770           gcc_assert (GET_CODE (addr) == PLUS);
9771         }
9772       if (GET_CODE (addr) == PLUS)
9773         {
9774           rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
9775 
9776           /* Check first to see if this is a constant offset from a @GOTOFF
9777              symbol reference.  */
9778           if (gotoff_operand (op0, Pmode)
9779               && CONST_INT_P (op1))
9780             {
9781               if (!TARGET_64BIT)
9782                 {
9783                   if (reload_in_progress)
9784                     df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
9785                   new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
9786                                             UNSPEC_GOTOFF);
9787                   new_rtx = gen_rtx_PLUS (Pmode, new_rtx, op1);
9788                   new_rtx = gen_rtx_CONST (Pmode, new_rtx);
9789                   new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
9790 
9791                   if (reg != 0)
9792                     {
9793                       emit_move_insn (reg, new_rtx);
9794                       new_rtx = reg;
9795                     }
9796                 }
9797               else
9798                 {
9799                   if (INTVAL (op1) < -16*1024*1024
9800                       || INTVAL (op1) >= 16*1024*1024)
9801                     {
9802                       if (!x86_64_immediate_operand (op1, Pmode))
9803                         op1 = force_reg (Pmode, op1);
9804                       new_rtx = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
9805                     }
9806                 }
9807             }
9808           else
9809             {
9810               base = legitimize_pic_address (XEXP (addr, 0), reg);
9811               new_rtx  = legitimize_pic_address (XEXP (addr, 1),
9812                                                  base == reg ? NULL_RTX : reg);
9813 
9814               if (CONST_INT_P (new_rtx))
9815                 new_rtx = plus_constant (base, INTVAL (new_rtx));
9816               else
9817                 {
9818                   if (GET_CODE (new_rtx) == PLUS && CONSTANT_P (XEXP (new_rtx, 1)))
9819                     {
9820                       base = gen_rtx_PLUS (Pmode, base, XEXP (new_rtx, 0));
9821                       new_rtx = XEXP (new_rtx, 1);
9822                     }
9823                   new_rtx = gen_rtx_PLUS (Pmode, base, new_rtx);
9824                 }
9825             }
9826         }
9827     }
9828   return new_rtx;
9829 }
9830 
9831 /* Load the thread pointer.  If TO_REG is true, force it into a register.  */
9832 
9833 static rtx
9834 get_thread_pointer (int to_reg)
9835 {
9836   rtx tp, reg, insn;
9837 
9838   tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
9839   if (!to_reg)
9840     return tp;
9841 
9842   reg = gen_reg_rtx (Pmode);
9843   insn = gen_rtx_SET (VOIDmode, reg, tp);
9844   insn = emit_insn (insn);
9845 
9846   return reg;
9847 }
9848 
9849 /* A subroutine of legitimize_address and ix86_expand_move.  FOR_MOV is
9850    false if we expect this to be used for a memory address and true if
9851    we expect to load the address into a register.  */
9852 
9853 static rtx
9854 legitimize_tls_address (rtx x, enum tls_model model, int for_mov)
9855 {
9856   rtx dest, base, off, pic, tp;
9857   int type;
9858 
9859   switch (model)
9860     {
9861     case TLS_MODEL_GLOBAL_DYNAMIC:
9862       dest = gen_reg_rtx (Pmode);
9863       tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
9864 
9865       if (TARGET_64BIT && ! TARGET_GNU2_TLS)
9866         {
9867           rtx rax = gen_rtx_REG (Pmode, AX_REG), insns;
9868 
9869           start_sequence ();
9870           emit_call_insn (gen_tls_global_dynamic_64 (rax, x));
9871           insns = get_insns ();
9872           end_sequence ();
9873 
9874           RTL_CONST_CALL_P (insns) = 1;
9875           emit_libcall_block (insns, dest, rax, x);
9876         }
9877       else if (TARGET_64BIT && TARGET_GNU2_TLS)
9878         emit_insn (gen_tls_global_dynamic_64 (dest, x));
9879       else
9880         emit_insn (gen_tls_global_dynamic_32 (dest, x));
9881 
9882       if (TARGET_GNU2_TLS)
9883         {
9884           dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest));
9885 
9886           set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
9887         }
9888       break;
9889 
9890     case TLS_MODEL_LOCAL_DYNAMIC:
9891       base = gen_reg_rtx (Pmode);
9892       tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
9893 
9894       if (TARGET_64BIT && ! TARGET_GNU2_TLS)
9895         {
9896           rtx rax = gen_rtx_REG (Pmode, AX_REG), insns, note;
9897 
9898           start_sequence ();
9899           emit_call_insn (gen_tls_local_dynamic_base_64 (rax));
9900           insns = get_insns ();
9901           end_sequence ();
9902 
9903           note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL);
9904           note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note);
9905           RTL_CONST_CALL_P (insns) = 1;
9906           emit_libcall_block (insns, base, rax, note);
9907         }
9908       else if (TARGET_64BIT && TARGET_GNU2_TLS)
9909         emit_insn (gen_tls_local_dynamic_base_64 (base));
9910       else
9911         emit_insn (gen_tls_local_dynamic_base_32 (base));
9912 
9913       if (TARGET_GNU2_TLS)
9914         {
9915           rtx x = ix86_tls_module_base ();
9916 
9917           set_unique_reg_note (get_last_insn (), REG_EQUIV,
9918                                gen_rtx_MINUS (Pmode, x, tp));
9919         }
9920 
9921       off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
9922       off = gen_rtx_CONST (Pmode, off);
9923 
9924       dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
9925 
9926       if (TARGET_GNU2_TLS)
9927         {
9928           dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp));
9929 
9930           set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
9931         }
9932 
9933       break;
9934 
9935     case TLS_MODEL_INITIAL_EXEC:
9936       if (TARGET_64BIT)
9937         {
9938           pic = NULL;
9939           type = UNSPEC_GOTNTPOFF;
9940         }
9941       else if (flag_pic)
9942         {
9943           if (reload_in_progress)
9944             df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
9945           pic = pic_offset_table_rtx;
9946           type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
9947         }
9948       else if (!TARGET_ANY_GNU_TLS)
9949         {
9950           pic = gen_reg_rtx (Pmode);
9951           emit_insn (gen_set_got (pic));
9952           type = UNSPEC_GOTTPOFF;
9953         }
9954       else
9955         {
9956           pic = NULL;
9957           type = UNSPEC_INDNTPOFF;
9958         }
9959 
9960       off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
9961       off = gen_rtx_CONST (Pmode, off);
9962       if (pic)
9963         off = gen_rtx_PLUS (Pmode, pic, off);
9964       off = gen_const_mem (Pmode, off);
9965       set_mem_alias_set (off, ix86_GOT_alias_set ());
9966 
9967       if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
9968         {
9969           base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
9970           off = force_reg (Pmode, off);
9971           return gen_rtx_PLUS (Pmode, base, off);
9972         }
9973       else
9974         {
9975           base = get_thread_pointer (true);
9976           dest = gen_reg_rtx (Pmode);
9977           emit_insn (gen_subsi3 (dest, base, off));
9978         }
9979       break;
9980 
9981     case TLS_MODEL_LOCAL_EXEC:
9982       off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
9983                             (TARGET_64BIT || TARGET_ANY_GNU_TLS)
9984                             ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
9985       off = gen_rtx_CONST (Pmode, off);
9986 
9987       if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
9988         {
9989           base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
9990           return gen_rtx_PLUS (Pmode, base, off);
9991         }
9992       else
9993         {
9994           base = get_thread_pointer (true);
9995           dest = gen_reg_rtx (Pmode);
9996           emit_insn (gen_subsi3 (dest, base, off));
9997         }
9998       break;
9999 
10000     default:
10001       gcc_unreachable ();
10002     }
10003 
10004   return dest;
10005 }
10006 
10007 /* Create or return the unique __imp_DECL dllimport symbol corresponding
10008    to symbol DECL.  */
10009 
10010 static GTY((if_marked ("tree_map_marked_p"), param_is (struct tree_map)))
10011   htab_t dllimport_map;
10012 
10013 static tree
10014 get_dllimport_decl (tree decl)
10015 {
10016   struct tree_map *h, in;
10017   void **loc;
10018   const char *name;
10019   const char *prefix;
10020   size_t namelen, prefixlen;
10021   char *imp_name;
10022   tree to;
10023   rtx rtl;
10024 
10025   if (!dllimport_map)
10026     dllimport_map = htab_create_ggc (512, tree_map_hash, tree_map_eq, 0);
10027 
10028   in.hash = htab_hash_pointer (decl);
10029   in.base.from = decl;
10030   loc = htab_find_slot_with_hash (dllimport_map, &in, in.hash, INSERT);
10031   h = (struct tree_map *) *loc;
10032   if (h)
10033     return h->to;
10034 
10035   *loc = h = GGC_NEW (struct tree_map);
10036   h->hash = in.hash;
10037   h->base.from = decl;
10038   h->to = to = build_decl (VAR_DECL, NULL, ptr_type_node);
10039   DECL_ARTIFICIAL (to) = 1;
10040   DECL_IGNORED_P (to) = 1;
10041   DECL_EXTERNAL (to) = 1;
10042   TREE_READONLY (to) = 1;
10043 
10044   name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
10045   name = targetm.strip_name_encoding (name);
10046   prefix = name[0] == FASTCALL_PREFIX || user_label_prefix[0] == 0
10047     ? "*__imp_" : "*__imp__";
10048   namelen = strlen (name);
10049   prefixlen = strlen (prefix);
10050   imp_name = (char *) alloca (namelen + prefixlen + 1);
10051   memcpy (imp_name, prefix, prefixlen);
10052   memcpy (imp_name + prefixlen, name, namelen + 1);
10053 
10054   name = ggc_alloc_string (imp_name, namelen + prefixlen);
10055   rtl = gen_rtx_SYMBOL_REF (Pmode, name);
10056   SET_SYMBOL_REF_DECL (rtl, to);
10057   SYMBOL_REF_FLAGS (rtl) = SYMBOL_FLAG_LOCAL;
10058 
10059   rtl = gen_const_mem (Pmode, rtl);
10060   set_mem_alias_set (rtl, ix86_GOT_alias_set ());
10061 
10062   SET_DECL_RTL (to, rtl);
10063   SET_DECL_ASSEMBLER_NAME (to, get_identifier (name));
10064 
10065   return to;
10066 }
10067 
10068 /* Expand SYMBOL into its corresponding dllimport symbol.  WANT_REG is
10069    true if we require the result be a register.  */
10070 
10071 static rtx
10072 legitimize_dllimport_symbol (rtx symbol, bool want_reg)
10073 {
10074   tree imp_decl;
10075   rtx x;
10076 
10077   gcc_assert (SYMBOL_REF_DECL (symbol));
10078   imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol));
10079 
10080   x = DECL_RTL (imp_decl);
10081   if (want_reg)
10082     x = force_reg (Pmode, x);
10083   return x;
10084 }
10085 
10086 /* Try machine-dependent ways of modifying an illegitimate address
10087    to be legitimate.  If we find one, return the new, valid address.
10088    This macro is used in only one place: `memory_address' in explow.c.
10089 
10090    OLDX is the address as it was before break_out_memory_refs was called.
10091    In some cases it is useful to look at this to decide what needs to be done.
10092 
10093    MODE and WIN are passed so that this macro can use
10094    GO_IF_LEGITIMATE_ADDRESS.
10095 
10096    It is always safe for this macro to do nothing.  It exists to recognize
10097    opportunities to optimize the output.
10098 
10099    For the 80386, we handle X+REG by loading X into a register R and
10100    using R+REG.  R will go in a general reg and indexing will be used.
10101    However, if REG is a broken-out memory address or multiplication,
10102    nothing needs to be done because REG can certainly go in a general reg.
10103 
10104    When -fpic is used, special handling is needed for symbolic references.
10105    See comments by legitimize_pic_address in i386.c for details.  */
10106 
10107 rtx
10108 legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED, enum machine_mode mode)
10109 {
10110   int changed = 0;
10111   unsigned log;
10112 
10113   log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
10114   if (log)
10115     return legitimize_tls_address (x, (enum tls_model) log, false);
10116   if (GET_CODE (x) == CONST
10117       && GET_CODE (XEXP (x, 0)) == PLUS
10118       && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
10119       && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
10120     {
10121       rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0),
10122                                       (enum tls_model) log, false);
10123       return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
10124     }
10125 
10126   if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
10127     {
10128       if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (x))
10129         return legitimize_dllimport_symbol (x, true);
10130       if (GET_CODE (x) == CONST
10131           && GET_CODE (XEXP (x, 0)) == PLUS
10132           && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
10133           && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (x, 0), 0)))
10134         {
10135           rtx t = legitimize_dllimport_symbol (XEXP (XEXP (x, 0), 0), true);
10136           return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
10137         }
10138     }
10139 
10140   if (flag_pic && SYMBOLIC_CONST (x))
10141     return legitimize_pic_address (x, 0);
10142 
10143   /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
10144   if (GET_CODE (x) == ASHIFT
10145       && CONST_INT_P (XEXP (x, 1))
10146       && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
10147     {
10148       changed = 1;
10149       log = INTVAL (XEXP (x, 1));
10150       x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
10151                         GEN_INT (1 << log));
10152     }
10153 
10154   if (GET_CODE (x) == PLUS)
10155     {
10156       /* Canonicalize shifts by 0, 1, 2, 3 into multiply.  */
10157 
10158       if (GET_CODE (XEXP (x, 0)) == ASHIFT
10159           && CONST_INT_P (XEXP (XEXP (x, 0), 1))
10160           && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
10161         {
10162           changed = 1;
10163           log = INTVAL (XEXP (XEXP (x, 0), 1));
10164           XEXP (x, 0) = gen_rtx_MULT (Pmode,
10165                                       force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
10166                                       GEN_INT (1 << log));
10167         }
10168 
10169       if (GET_CODE (XEXP (x, 1)) == ASHIFT
10170           && CONST_INT_P (XEXP (XEXP (x, 1), 1))
10171           && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
10172         {
10173           changed = 1;
10174           log = INTVAL (XEXP (XEXP (x, 1), 1));
10175           XEXP (x, 1) = gen_rtx_MULT (Pmode,
10176                                       force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
10177                                       GEN_INT (1 << log));
10178         }
10179 
10180       /* Put multiply first if it isn't already.  */
10181       if (GET_CODE (XEXP (x, 1)) == MULT)
10182         {
10183           rtx tmp = XEXP (x, 0);
10184           XEXP (x, 0) = XEXP (x, 1);
10185           XEXP (x, 1) = tmp;
10186           changed = 1;
10187         }
10188 
10189       /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
10190          into (plus (plus (mult (reg) (const)) (reg)) (const)).  This can be
10191          created by virtual register instantiation, register elimination, and
10192          similar optimizations.  */
10193       if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
10194         {
10195           changed = 1;
10196           x = gen_rtx_PLUS (Pmode,
10197                             gen_rtx_PLUS (Pmode, XEXP (x, 0),
10198                                           XEXP (XEXP (x, 1), 0)),
10199                             XEXP (XEXP (x, 1), 1));
10200         }
10201 
10202       /* Canonicalize
10203          (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
10204          into (plus (plus (mult (reg) (const)) (reg)) (const)).  */
10205       else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
10206                && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
10207                && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
10208                && CONSTANT_P (XEXP (x, 1)))
10209         {
10210           rtx constant;
10211           rtx other = NULL_RTX;
10212 
10213           if (CONST_INT_P (XEXP (x, 1)))
10214             {
10215               constant = XEXP (x, 1);
10216               other = XEXP (XEXP (XEXP (x, 0), 1), 1);
10217             }
10218           else if (CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 1), 1)))
10219             {
10220               constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
10221               other = XEXP (x, 1);
10222             }
10223           else
10224             constant = 0;
10225 
10226           if (constant)
10227             {
10228               changed = 1;
10229               x = gen_rtx_PLUS (Pmode,
10230                                 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
10231                                               XEXP (XEXP (XEXP (x, 0), 1), 0)),
10232                                 plus_constant (other, INTVAL (constant)));
10233             }
10234         }
10235 
10236       if (changed && legitimate_address_p (mode, x, FALSE))
10237         return x;
10238 
10239       if (GET_CODE (XEXP (x, 0)) == MULT)
10240         {
10241           changed = 1;
10242           XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
10243         }
10244 
10245       if (GET_CODE (XEXP (x, 1)) == MULT)
10246         {
10247           changed = 1;
10248           XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
10249         }
10250 
10251       if (changed
10252           && REG_P (XEXP (x, 1))
10253           && REG_P (XEXP (x, 0)))
10254         return x;
10255 
10256       if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
10257         {
10258           changed = 1;
10259           x = legitimize_pic_address (x, 0);
10260         }
10261 
10262       if (changed && legitimate_address_p (mode, x, FALSE))
10263         return x;
10264 
10265       if (REG_P (XEXP (x, 0)))
10266         {
10267           rtx temp = gen_reg_rtx (Pmode);
10268           rtx val  = force_operand (XEXP (x, 1), temp);
10269           if (val != temp)
10270             emit_move_insn (temp, val);
10271 
10272           XEXP (x, 1) = temp;
10273           return x;
10274         }
10275 
10276       else if (REG_P (XEXP (x, 1)))
10277         {
10278           rtx temp = gen_reg_rtx (Pmode);
10279           rtx val  = force_operand (XEXP (x, 0), temp);
10280           if (val != temp)
10281             emit_move_insn (temp, val);
10282 
10283           XEXP (x, 0) = temp;
10284           return x;
10285         }
10286     }
10287 
10288   return x;
10289 }
10290 
10291 /* Print an integer constant expression in assembler syntax.  Addition
10292    and subtraction are the only arithmetic that may appear in these
10293    expressions.  FILE is the stdio stream to write to, X is the rtx, and
10294    CODE is the operand print code from the output string.  */
10295 
10296 static void
10297 output_pic_addr_const (FILE *file, rtx x, int code)
10298 {
10299   char buf[256];
10300 
10301   switch (GET_CODE (x))
10302     {
10303     case PC:
10304       gcc_assert (flag_pic);
10305       putc ('.', file);
10306       break;
10307 
10308     case SYMBOL_REF:
10309       if (! TARGET_MACHO || TARGET_64BIT)
10310         output_addr_const (file, x);
10311       else
10312         {
10313           const char *name = XSTR (x, 0);
10314 
10315           /* Mark the decl as referenced so that cgraph will
10316              output the function.  */
10317           if (SYMBOL_REF_DECL (x))
10318             mark_decl_referenced (SYMBOL_REF_DECL (x));
10319 
10320 #if TARGET_MACHO
10321           if (MACHOPIC_INDIRECT
10322               && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
10323             name = machopic_indirection_name (x, /*stub_p=*/true);
10324 #endif
10325           assemble_name (file, name);
10326         }
10327       if (!TARGET_MACHO && !(TARGET_64BIT && DEFAULT_ABI == MS_ABI)
10328           && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
10329         fputs ("@PLT", file);
10330       break;
10331 
10332     case LABEL_REF:
10333       x = XEXP (x, 0);
10334       /* FALLTHRU */
10335     case CODE_LABEL:
10336       ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
10337       assemble_name (asm_out_file, buf);
10338       break;
10339 
10340     case CONST_INT:
10341       fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
10342       break;
10343 
10344     case CONST:
10345       /* This used to output parentheses around the expression,
10346          but that does not work on the 386 (either ATT or BSD assembler).  */
10347       output_pic_addr_const (file, XEXP (x, 0), code);
10348       break;
10349 
10350     case CONST_DOUBLE:
10351       if (GET_MODE (x) == VOIDmode)
10352         {
10353           /* We can use %d if the number is <32 bits and positive.  */
10354           if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
10355             fprintf (file, "0x%lx%08lx",
10356                      (unsigned long) CONST_DOUBLE_HIGH (x),
10357                      (unsigned long) CONST_DOUBLE_LOW (x));
10358           else
10359             fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
10360         }
10361       else
10362         /* We can't handle floating point constants;
10363            PRINT_OPERAND must handle them.  */
10364         output_operand_lossage ("floating constant misused");
10365       break;
10366 
10367     case PLUS:
10368       /* Some assemblers need integer constants to appear first.  */
10369       if (CONST_INT_P (XEXP (x, 0)))
10370         {
10371           output_pic_addr_const (file, XEXP (x, 0), code);
10372           putc ('+', file);
10373           output_pic_addr_const (file, XEXP (x, 1), code);
10374         }
10375       else
10376         {
10377           gcc_assert (CONST_INT_P (XEXP (x, 1)));
10378           output_pic_addr_const (file, XEXP (x, 1), code);
10379           putc ('+', file);
10380           output_pic_addr_const (file, XEXP (x, 0), code);
10381         }
10382       break;
10383 
10384     case MINUS:
10385       if (!TARGET_MACHO)
10386         putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
10387       output_pic_addr_const (file, XEXP (x, 0), code);
10388       putc ('-', file);
10389       output_pic_addr_const (file, XEXP (x, 1), code);
10390       if (!TARGET_MACHO)
10391         putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
10392       break;
10393 
10394      case UNSPEC:
10395        gcc_assert (XVECLEN (x, 0) == 1);
10396        output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
10397        switch (XINT (x, 1))
10398         {
10399         case UNSPEC_GOT:
10400           fputs ("@GOT", file);
10401           break;
10402         case UNSPEC_GOTOFF:
10403           fputs ("@GOTOFF", file);
10404           break;
10405         case UNSPEC_PLTOFF:
10406           fputs ("@PLTOFF", file);
10407           break;
10408         case UNSPEC_GOTPCREL:
10409           fputs (ASSEMBLER_DIALECT == ASM_ATT ?
10410                  "@GOTPCREL(%rip)" : "@GOTPCREL[rip]", file);
10411           break;
10412         case UNSPEC_GOTTPOFF:
10413           /* FIXME: This might be @TPOFF in Sun ld too.  */
10414           fputs ("@GOTTPOFF", file);
10415           break;
10416         case UNSPEC_TPOFF:
10417           fputs ("@TPOFF", file);
10418           break;
10419         case UNSPEC_NTPOFF:
10420           if (TARGET_64BIT)
10421             fputs ("@TPOFF", file);
10422           else
10423             fputs ("@NTPOFF", file);
10424           break;
10425         case UNSPEC_DTPOFF:
10426           fputs ("@DTPOFF", file);
10427           break;
10428         case UNSPEC_GOTNTPOFF:
10429           if (TARGET_64BIT)
10430             fputs (ASSEMBLER_DIALECT == ASM_ATT ?
10431                    "@GOTTPOFF(%rip)": "@GOTTPOFF[rip]", file);
10432           else
10433             fputs ("@GOTNTPOFF", file);
10434           break;
10435         case UNSPEC_INDNTPOFF:
10436           fputs ("@INDNTPOFF", file);
10437           break;
10438 #if TARGET_MACHO
10439         case UNSPEC_MACHOPIC_OFFSET:
10440           putc ('-', file);
10441           machopic_output_function_base_name (file);
10442           break;
10443 #endif
10444         default:
10445           output_operand_lossage ("invalid UNSPEC as operand");
10446           break;
10447         }
10448        break;
10449 
10450     default:
10451       output_operand_lossage ("invalid expression as operand");
10452     }
10453 }
10454 
10455 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
10456    We need to emit DTP-relative relocations.  */
10457 
10458 static void ATTRIBUTE_UNUSED
10459 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
10460 {
10461   fputs (ASM_LONG, file);
10462   output_addr_const (file, x);
10463   fputs ("@DTPOFF", file);
10464   switch (size)
10465     {
10466     case 4:
10467       break;
10468     case 8:
10469       fputs (", 0", file);
10470       break;
10471     default:
10472       gcc_unreachable ();
10473    }
10474 }
10475 
10476 /* Return true if X is a representation of the PIC register.  This copes
10477    with calls from ix86_find_base_term, where the register might have
10478    been replaced by a cselib value.  */
10479 
10480 static bool
10481 ix86_pic_register_p (rtx x)
10482 {
10483   if (GET_CODE (x) == VALUE && CSELIB_VAL_PTR (x))
10484     return (pic_offset_table_rtx
10485             && rtx_equal_for_cselib_p (x, pic_offset_table_rtx));
10486   else
10487     return REG_P (x) && REGNO (x) == PIC_OFFSET_TABLE_REGNUM;
10488 }
10489 
10490 /* In the name of slightly smaller debug output, and to cater to
10491    general assembler lossage, recognize PIC+GOTOFF and turn it back
10492    into a direct symbol reference.
10493 
10494    On Darwin, this is necessary to avoid a crash, because Darwin
10495    has a different PIC label for each routine but the DWARF debugging
10496    information is not associated with any particular routine, so it's
10497    necessary to remove references to the PIC label from RTL stored by
10498    the DWARF output code.  */
10499 
10500 static rtx
10501 ix86_delegitimize_address (rtx orig_x)
10502 {
10503   rtx x = orig_x;
10504   /* reg_addend is NULL or a multiple of some register.  */
10505   rtx reg_addend = NULL_RTX;
10506   /* const_addend is NULL or a const_int.  */
10507   rtx const_addend = NULL_RTX;
10508   /* This is the result, or NULL.  */
10509   rtx result = NULL_RTX;
10510 
10511   if (MEM_P (x))
10512     x = XEXP (x, 0);
10513 
10514   if (TARGET_64BIT)
10515     {
10516       if (GET_CODE (x) != CONST
10517           || GET_CODE (XEXP (x, 0)) != UNSPEC
10518           || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
10519           || !MEM_P (orig_x))
10520         return orig_x;
10521       return XVECEXP (XEXP (x, 0), 0, 0);
10522     }
10523 
10524   if (GET_CODE (x) != PLUS
10525       || GET_CODE (XEXP (x, 1)) != CONST)
10526     return orig_x;
10527 
10528   if (ix86_pic_register_p (XEXP (x, 0)))
10529     /* %ebx + GOT/GOTOFF */
10530     ;
10531   else if (GET_CODE (XEXP (x, 0)) == PLUS)
10532     {
10533       /* %ebx + %reg * scale + GOT/GOTOFF */
10534       reg_addend = XEXP (x, 0);
10535       if (ix86_pic_register_p (XEXP (reg_addend, 0)))
10536         reg_addend = XEXP (reg_addend, 1);
10537       else if (ix86_pic_register_p (XEXP (reg_addend, 1)))
10538         reg_addend = XEXP (reg_addend, 0);
10539       else
10540         return orig_x;
10541       if (!REG_P (reg_addend)
10542           && GET_CODE (reg_addend) != MULT
10543           && GET_CODE (reg_addend) != ASHIFT)
10544         return orig_x;
10545     }
10546   else
10547     return orig_x;
10548 
10549   x = XEXP (XEXP (x, 1), 0);
10550   if (GET_CODE (x) == PLUS
10551       && CONST_INT_P (XEXP (x, 1)))
10552     {
10553       const_addend = XEXP (x, 1);
10554       x = XEXP (x, 0);
10555     }
10556 
10557   if (GET_CODE (x) == UNSPEC
10558       && ((XINT (x, 1) == UNSPEC_GOT && MEM_P (orig_x))
10559           || (XINT (x, 1) == UNSPEC_GOTOFF && !MEM_P (orig_x))))
10560     result = XVECEXP (x, 0, 0);
10561 
10562   if (TARGET_MACHO && darwin_local_data_pic (x)
10563       && !MEM_P (orig_x))
10564     result = XVECEXP (x, 0, 0);
10565 
10566   if (! result)
10567     return orig_x;
10568 
10569   if (const_addend)
10570     result = gen_rtx_CONST (Pmode, gen_rtx_PLUS (Pmode, result, const_addend));
10571   if (reg_addend)
10572     result = gen_rtx_PLUS (Pmode, reg_addend, result);
10573   return result;
10574 }
10575 
10576 /* If X is a machine specific address (i.e. a symbol or label being
10577    referenced as a displacement from the GOT implemented using an
10578    UNSPEC), then return the base term.  Otherwise return X.  */
10579 
10580 rtx
10581 ix86_find_base_term (rtx x)
10582 {
10583   rtx term;
10584 
10585   if (TARGET_64BIT)
10586     {
10587       if (GET_CODE (x) != CONST)
10588         return x;
10589       term = XEXP (x, 0);
10590       if (GET_CODE (term) == PLUS
10591           && (CONST_INT_P (XEXP (term, 1))
10592               || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
10593         term = XEXP (term, 0);
10594       if (GET_CODE (term) != UNSPEC
10595           || XINT (term, 1) != UNSPEC_GOTPCREL)
10596         return x;
10597 
10598       return XVECEXP (term, 0, 0);
10599     }
10600 
10601   return ix86_delegitimize_address (x);
10602 }
10603 
10604 static void
10605 put_condition_code (enum rtx_code code, enum machine_mode mode, int reverse,
10606                     int fp, FILE *file)
10607 {
10608   const char *suffix;
10609 
10610   if (mode == CCFPmode || mode == CCFPUmode)
10611     {
10612       enum rtx_code second_code, bypass_code;
10613       ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
10614       gcc_assert (bypass_code == UNKNOWN && second_code == UNKNOWN);
10615       code = ix86_fp_compare_code_to_integer (code);
10616       mode = CCmode;
10617     }
10618   if (reverse)
10619     code = reverse_condition (code);
10620 
10621   switch (code)
10622     {
10623     case EQ:
10624       switch (mode)
10625         {
10626         case CCAmode:
10627           suffix = "a";
10628           break;
10629 
10630         case CCCmode:
10631           suffix = "c";
10632           break;
10633 
10634         case CCOmode:
10635           suffix = "o";
10636           break;
10637 
10638         case CCSmode:
10639           suffix = "s";
10640           break;
10641 
10642         default:
10643           suffix = "e";
10644         }
10645       break;
10646     case NE:
10647       switch (mode)
10648         {
10649         case CCAmode:
10650           suffix = "na";
10651           break;
10652 
10653         case CCCmode:
10654           suffix = "nc";
10655           break;
10656 
10657         case CCOmode:
10658           suffix = "no";
10659           break;
10660 
10661         case CCSmode:
10662           suffix = "ns";
10663           break;
10664 
10665         default:
10666           suffix = "ne";
10667         }
10668       break;
10669     case GT:
10670       gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
10671       suffix = "g";
10672       break;
10673     case GTU:
10674       /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
10675          Those same assemblers have the same but opposite lossage on cmov.  */
10676       if (mode == CCmode)
10677         suffix = fp ? "nbe" : "a";
10678       else if (mode == CCCmode)
10679         suffix = "b";
10680       else
10681         gcc_unreachable ();
10682       break;
10683     case LT:
10684       switch (mode)
10685         {
10686         case CCNOmode:
10687         case CCGOCmode:
10688           suffix = "s";
10689           break;
10690 
10691         case CCmode:
10692         case CCGCmode:
10693           suffix = "l";
10694           break;
10695 
10696         default:
10697           gcc_unreachable ();
10698         }
10699       break;
10700     case LTU:
10701       gcc_assert (mode == CCmode || mode == CCCmode);
10702       suffix = "b";
10703       break;
10704     case GE:
10705       switch (mode)
10706         {
10707         case CCNOmode:
10708         case CCGOCmode:
10709           suffix = "ns";
10710           break;
10711 
10712         case CCmode:
10713         case CCGCmode:
10714           suffix = "ge";
10715           break;
10716 
10717         default:
10718           gcc_unreachable ();
10719         }
10720       break;
10721     case GEU:
10722       /* ??? As above.  */
10723       gcc_assert (mode == CCmode || mode == CCCmode);
10724       suffix = fp ? "nb" : "ae";
10725       break;
10726     case LE:
10727       gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
10728       suffix = "le";
10729       break;
10730     case LEU:
10731       /* ??? As above.  */
10732       if (mode == CCmode)
10733         suffix = "be";
10734       else if (mode == CCCmode)
10735         suffix = fp ? "nb" : "ae";
10736       else
10737         gcc_unreachable ();
10738       break;
10739     case UNORDERED:
10740       suffix = fp ? "u" : "p";
10741       break;
10742     case ORDERED:
10743       suffix = fp ? "nu" : "np";
10744       break;
10745     default:
10746       gcc_unreachable ();
10747     }
10748   fputs (suffix, file);
10749 }
10750 
10751 /* Print the name of register X to FILE based on its machine mode and number.
10752    If CODE is 'w', pretend the mode is HImode.
10753    If CODE is 'b', pretend the mode is QImode.
10754    If CODE is 'k', pretend the mode is SImode.
10755    If CODE is 'q', pretend the mode is DImode.
10756    If CODE is 'x', pretend the mode is V4SFmode.
10757    If CODE is 't', pretend the mode is V8SFmode.
10758    If CODE is 'h', pretend the reg is the 'high' byte register.
10759    If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op.
10760    If CODE is 'd', duplicate the operand for AVX instruction.
10761  */
10762 
10763 void
10764 print_reg (rtx x, int code, FILE *file)
10765 {
10766   const char *reg;
10767   bool duplicated = code == 'd' && TARGET_AVX;
10768 
10769   gcc_assert (x == pc_rtx
10770               || (REGNO (x) != ARG_POINTER_REGNUM
10771                   && REGNO (x) != FRAME_POINTER_REGNUM
10772                   && REGNO (x) != FLAGS_REG
10773                   && REGNO (x) != FPSR_REG
10774                   && REGNO (x) != FPCR_REG));
10775 
10776   if (ASSEMBLER_DIALECT == ASM_ATT)
10777     putc ('%', file);
10778 
10779   if (x == pc_rtx)
10780     {
10781       gcc_assert (TARGET_64BIT);
10782       fputs ("rip", file);
10783       return;
10784     }
10785 
10786   if (code == 'w' || MMX_REG_P (x))
10787     code = 2;
10788   else if (code == 'b')
10789     code = 1;
10790   else if (code == 'k')
10791     code = 4;
10792   else if (code == 'q')
10793     code = 8;
10794   else if (code == 'y')
10795     code = 3;
10796   else if (code == 'h')
10797     code = 0;
10798   else if (code == 'x')
10799     code = 16;
10800   else if (code == 't')
10801     code = 32;
10802   else
10803     code = GET_MODE_SIZE (GET_MODE (x));
10804 
10805   /* Irritatingly, AMD extended registers use different naming convention
10806      from the normal registers.  */
10807   if (REX_INT_REG_P (x))
10808     {
10809       gcc_assert (TARGET_64BIT);
10810       switch (code)
10811         {
10812           case 0:
10813             error ("extended registers have no high halves");
10814             break;
10815           case 1:
10816             fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
10817             break;
10818           case 2:
10819             fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
10820             break;
10821           case 4:
10822             fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
10823             break;
10824           case 8:
10825             fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
10826             break;
10827           default:
10828             error ("unsupported operand size for extended register");
10829             break;
10830         }
10831       return;
10832     }
10833 
10834   reg = NULL;
10835   switch (code)
10836     {
10837     case 3:
10838       if (STACK_TOP_P (x))
10839         {
10840           reg = "st(0)";
10841           break;
10842         }
10843       /* FALLTHRU */
10844     case 8:
10845     case 4:
10846     case 12:
10847       if (! ANY_FP_REG_P (x))
10848         putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
10849       /* FALLTHRU */
10850     case 16:
10851     case 2:
10852     normal:
10853       reg = hi_reg_name[REGNO (x)];
10854       break;
10855     case 1:
10856       if (REGNO (x) >= ARRAY_SIZE (qi_reg_name))
10857         goto normal;
10858       reg = qi_reg_name[REGNO (x)];
10859       break;
10860     case 0:
10861       if (REGNO (x) >= ARRAY_SIZE (qi_high_reg_name))
10862         goto normal;
10863       reg = qi_high_reg_name[REGNO (x)];
10864       break;
10865     case 32:
10866       if (SSE_REG_P (x))
10867         {
10868           gcc_assert (!duplicated);
10869           putc ('y', file);
10870           fputs (hi_reg_name[REGNO (x)] + 1, file);
10871           return;
10872         }
10873       break;
10874     default:
10875       gcc_unreachable ();
10876     }
10877 
10878   fputs (reg, file);
10879   if (duplicated)
10880     {
10881       if (ASSEMBLER_DIALECT == ASM_ATT)
10882         fprintf (file, ", %%%s", reg);
10883       else
10884         fprintf (file, ", %s", reg);
10885     }
10886 }
10887 
10888 /* Locate some local-dynamic symbol still in use by this function
10889    so that we can print its name in some tls_local_dynamic_base
10890    pattern.  */
10891 
10892 static int
10893 get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
10894 {
10895   rtx x = *px;
10896 
10897   if (GET_CODE (x) == SYMBOL_REF
10898       && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC)
10899     {
10900       cfun->machine->some_ld_name = XSTR (x, 0);
10901       return 1;
10902     }
10903 
10904   return 0;
10905 }
10906 
10907 static const char *
10908 get_some_local_dynamic_name (void)
10909 {
10910   rtx insn;
10911 
10912   if (cfun->machine->some_ld_name)
10913     return cfun->machine->some_ld_name;
10914 
10915   for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
10916     if (INSN_P (insn)
10917         && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
10918       return cfun->machine->some_ld_name;
10919 
10920   gcc_unreachable ();
10921 }
10922 
10923 /* Meaning of CODE:
10924    L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
10925    C -- print opcode suffix for set/cmov insn.
10926    c -- like C, but print reversed condition
10927    F,f -- likewise, but for floating-point.
10928    O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
10929         otherwise nothing
10930    R -- print the prefix for register names.
10931    z -- print the opcode suffix for the size of the current operand.
10932    * -- print a star (in certain assembler syntax)
10933    A -- print an absolute memory reference.
10934    w -- print the operand as if it's a "word" (HImode) even if it isn't.
10935    s -- print a shift double count, followed by the assemblers argument
10936         delimiter.
10937    b -- print the QImode name of the register for the indicated operand.
10938         %b0 would print %al if operands[0] is reg 0.
10939    w --  likewise, print the HImode name of the register.
10940    k --  likewise, print the SImode name of the register.
10941    q --  likewise, print the DImode name of the register.
10942    x --  likewise, print the V4SFmode name of the register.
10943    t --  likewise, print the V8SFmode name of the register.
10944    h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
10945    y -- print "st(0)" instead of "st" as a register.
10946    d -- print duplicated register operand for AVX instruction.
10947    D -- print condition for SSE cmp instruction.
10948    P -- if PIC, print an @PLT suffix.
10949    X -- don't print any sort of PIC '@' suffix for a symbol.
10950    & -- print some in-use local-dynamic symbol name.
10951    H -- print a memory address offset by 8; used for sse high-parts
10952    Y -- print condition for SSE5 com* instruction.
10953    + -- print a branch hint as 'cs' or 'ds' prefix
10954    ; -- print a semicolon (after prefixes due to bug in older gas).
10955  */
10956 
10957 void
10958 print_operand (FILE *file, rtx x, int code)
10959 {
10960   if (code)
10961     {
10962       switch (code)
10963         {
10964         case '*':
10965           if (ASSEMBLER_DIALECT == ASM_ATT)
10966             putc ('*', file);
10967           return;
10968 
10969         case '&':
10970           assemble_name (file, get_some_local_dynamic_name ());
10971           return;
10972 
10973         case 'A':
10974           switch (ASSEMBLER_DIALECT)
10975             {
10976             case ASM_ATT:
10977               putc ('*', file);
10978               break;
10979 
10980             case ASM_INTEL:
10981               /* Intel syntax. For absolute addresses, registers should not
10982                  be surrounded by braces.  */
10983               if (!REG_P (x))
10984                 {
10985                   putc ('[', file);
10986                   PRINT_OPERAND (file, x, 0);
10987                   putc (']', file);
10988                   return;
10989                 }
10990               break;
10991 
10992             default:
10993               gcc_unreachable ();
10994             }
10995 
10996           PRINT_OPERAND (file, x, 0);
10997           return;
10998 
10999 
11000         case 'L':
11001           if (ASSEMBLER_DIALECT == ASM_ATT)
11002             putc ('l', file);
11003           return;
11004 
11005         case 'W':
11006           if (ASSEMBLER_DIALECT == ASM_ATT)
11007             putc ('w', file);
11008           return;
11009 
11010         case 'B':
11011           if (ASSEMBLER_DIALECT == ASM_ATT)
11012             putc ('b', file);
11013           return;
11014 
11015         case 'Q':
11016           if (ASSEMBLER_DIALECT == ASM_ATT)
11017             putc ('l', file);
11018           return;
11019 
11020         case 'S':
11021           if (ASSEMBLER_DIALECT == ASM_ATT)
11022             putc ('s', file);
11023           return;
11024 
11025         case 'T':
11026           if (ASSEMBLER_DIALECT == ASM_ATT)
11027             putc ('t', file);
11028           return;
11029 
11030         case 'z':
11031           /* 387 opcodes don't get size suffixes if the operands are
11032              registers.  */
11033           if (STACK_REG_P (x))
11034             return;
11035 
11036           /* Likewise if using Intel opcodes.  */
11037           if (ASSEMBLER_DIALECT == ASM_INTEL)
11038             return;
11039 
11040           /* This is the size of op from size of operand.  */
11041           switch (GET_MODE_SIZE (GET_MODE (x)))
11042             {
11043             case 1:
11044               putc ('b', file);
11045               return;
11046 
11047             case 2:
11048               if (MEM_P (x))
11049                 {
11050 #ifdef HAVE_GAS_FILDS_FISTS
11051                   putc ('s', file);
11052 #endif
11053                   return;
11054                 }
11055               else
11056                 putc ('w', file);
11057               return;
11058 
11059             case 4:
11060               if (GET_MODE (x) == SFmode)
11061                 {
11062                   putc ('s', file);
11063                   return;
11064                 }
11065               else
11066                 putc ('l', file);
11067               return;
11068 
11069             case 12:
11070             case 16:
11071               putc ('t', file);
11072               return;
11073 
11074             case 8:
11075               if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
11076                 {
11077                   if (MEM_P (x))
11078                     {
11079 #ifdef GAS_MNEMONICS
11080                       putc ('q', file);
11081 #else
11082                       putc ('l', file);
11083                       putc ('l', file);
11084 #endif
11085                     }
11086                   else
11087                     putc ('q', file);
11088                 }
11089               else
11090                 putc ('l', file);
11091               return;
11092 
11093             default:
11094               gcc_unreachable ();
11095             }
11096 
11097         case 'd':
11098         case 'b':
11099         case 'w':
11100         case 'k':
11101         case 'q':
11102         case 'h':
11103         case 't':
11104         case 'y':
11105         case 'x':
11106         case 'X':
11107         case 'P':
11108           break;
11109 
11110         case 's':
11111           if (CONST_INT_P (x) || ! SHIFT_DOUBLE_OMITS_COUNT)
11112             {
11113               PRINT_OPERAND (file, x, 0);
11114               fputs (", ", file);
11115             }
11116           return;
11117 
11118         case 'D':
11119           /* Little bit of braindamage here.  The SSE compare instructions
11120              does use completely different names for the comparisons that the
11121              fp conditional moves.  */
11122           if (TARGET_AVX)
11123             {
11124               switch (GET_CODE (x))
11125                 {
11126                 case EQ:
11127                   fputs ("eq", file);
11128                   break;
11129                 case UNEQ:
11130                   fputs ("eq_us", file);
11131                   break;
11132                 case LT:
11133                   fputs ("lt", file);
11134                   break;
11135                 case UNLT:
11136                   fputs ("nge", file);
11137                   break;
11138                 case LE:
11139                   fputs ("le", file);
11140                   break;
11141                 case UNLE:
11142                   fputs ("ngt", file);
11143                   break;
11144                 case UNORDERED:
11145                   fputs ("unord", file);
11146                   break;
11147                 case NE:
11148                   fputs ("neq", file);
11149                   break;
11150                 case LTGT:
11151                   fputs ("neq_oq", file);
11152                   break;
11153                 case GE:
11154                   fputs ("ge", file);
11155                   break;
11156                 case UNGE:
11157                   fputs ("nlt", file);
11158                   break;
11159                 case GT:
11160                   fputs ("gt", file);
11161                   break;
11162                 case UNGT:
11163                   fputs ("nle", file);
11164                   break;
11165                 case ORDERED:
11166                   fputs ("ord", file);
11167                   break;
11168                 default:
11169                   output_operand_lossage ("operand is not a condition code, invalid operand code 'D'");
11170                   return;
11171                 }
11172             }
11173           else
11174             {
11175               switch (GET_CODE (x))
11176                 {
11177                 case EQ:
11178                 case UNEQ:
11179                   fputs ("eq", file);
11180                   break;
11181                 case LT:
11182                 case UNLT:
11183                   fputs ("lt", file);
11184                   break;
11185                 case LE:
11186                 case UNLE:
11187                   fputs ("le", file);
11188                   break;
11189                 case UNORDERED:
11190                   fputs ("unord", file);
11191                   break;
11192                 case NE:
11193                 case LTGT:
11194                   fputs ("neq", file);
11195                   break;
11196                 case UNGE:
11197                 case GE:
11198                   fputs ("nlt", file);
11199                   break;
11200                 case UNGT:
11201                 case GT:
11202                   fputs ("nle", file);
11203                   break;
11204                 case ORDERED:
11205                   fputs ("ord", file);
11206                   break;
11207                 default:
11208                   output_operand_lossage ("operand is not a condition code, invalid operand code 'D'");
11209                   return;
11210                 }
11211             }
11212           return;
11213         case 'O':
11214 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
11215           if (ASSEMBLER_DIALECT == ASM_ATT)
11216             {
11217               switch (GET_MODE (x))
11218                 {
11219                 case HImode: putc ('w', file); break;
11220                 case SImode:
11221                 case SFmode: putc ('l', file); break;
11222                 case DImode:
11223                 case DFmode: putc ('q', file); break;
11224                 default: gcc_unreachable ();
11225                 }
11226               putc ('.', file);
11227             }
11228 #endif
11229           return;
11230         case 'C':
11231           if (!COMPARISON_P (x))
11232             {
11233               output_operand_lossage ("operand is neither a constant nor a "
11234                                       "condition code, invalid operand code "
11235                                       "'C'");
11236               return;
11237             }
11238           put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
11239           return;
11240         case 'F':
11241           if (!COMPARISON_P (x))
11242             {
11243               output_operand_lossage ("operand is neither a constant nor a "
11244                                       "condition code, invalid operand code "
11245                                       "'F'");
11246               return;
11247             }
11248 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
11249           if (ASSEMBLER_DIALECT == ASM_ATT)
11250             putc ('.', file);
11251 #endif
11252           put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
11253           return;
11254 
11255           /* Like above, but reverse condition */
11256         case 'c':
11257           /* Check to see if argument to %c is really a constant
11258              and not a condition code which needs to be reversed.  */
11259           if (!COMPARISON_P (x))
11260             {
11261               output_operand_lossage ("operand is neither a constant nor a "
11262                                       "condition code, invalid operand "
11263                                       "code 'c'");
11264               return;
11265             }
11266           put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
11267           return;
11268         case 'f':
11269           if (!COMPARISON_P (x))
11270             {
11271               output_operand_lossage ("operand is neither a constant nor a "
11272                                       "condition code, invalid operand "
11273                                       "code 'f'");
11274               return;
11275             }
11276 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
11277           if (ASSEMBLER_DIALECT == ASM_ATT)
11278             putc ('.', file);
11279 #endif
11280           put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
11281           return;
11282 
11283         case 'H':
11284           /* It doesn't actually matter what mode we use here, as we're
11285              only going to use this for printing.  */
11286           x = adjust_address_nv (x, DImode, 8);
11287           break;
11288 
11289         case '+':
11290           {
11291             rtx x;
11292 
11293             if (!optimize
11294                 || optimize_function_for_size_p (cfun) || !TARGET_BRANCH_PREDICTION_HINTS)
11295               return;
11296 
11297             x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
11298             if (x)
11299               {
11300                 int pred_val = INTVAL (XEXP (x, 0));
11301 
11302                 if (pred_val < REG_BR_PROB_BASE * 45 / 100
11303                     || pred_val > REG_BR_PROB_BASE * 55 / 100)
11304                   {
11305                     int taken = pred_val > REG_BR_PROB_BASE / 2;
11306                     int cputaken = final_forward_branch_p (current_output_insn) == 0;
11307 
11308                     /* Emit hints only in the case default branch prediction
11309                        heuristics would fail.  */
11310                     if (taken != cputaken)
11311                       {
11312                         /* We use 3e (DS) prefix for taken branches and
11313                            2e (CS) prefix for not taken branches.  */
11314                         if (taken)
11315                           fputs ("ds ; ", file);
11316                         else
11317                           fputs ("cs ; ", file);
11318                       }
11319                   }
11320               }
11321             return;
11322           }
11323 
11324         case 'Y':
11325           switch (GET_CODE (x))
11326             {
11327             case NE:
11328               fputs ("neq", file);
11329               break;
11330             case EQ:
11331               fputs ("eq", file);
11332               break;
11333             case GE:
11334             case GEU:
11335               fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "ge" : "unlt", file);
11336               break;
11337             case GT:
11338             case GTU:
11339               fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "gt" : "unle", file);
11340               break;
11341             case LE:
11342             case LEU:
11343               fputs ("le", file);
11344               break;
11345             case LT:
11346             case LTU:
11347               fputs ("lt", file);
11348               break;
11349             case UNORDERED:
11350               fputs ("unord", file);
11351               break;
11352             case ORDERED:
11353               fputs ("ord", file);
11354               break;
11355             case UNEQ:
11356               fputs ("ueq", file);
11357               break;
11358             case UNGE:
11359               fputs ("nlt", file);
11360               break;
11361             case UNGT:
11362               fputs ("nle", file);
11363               break;
11364             case UNLE:
11365               fputs ("ule", file);
11366               break;
11367             case UNLT:
11368               fputs ("ult", file);
11369               break;
11370             case LTGT:
11371               fputs ("une", file);
11372               break;
11373             default:
11374               output_operand_lossage ("operand is not a condition code, invalid operand code 'D'");
11375               return;
11376             }
11377           return;
11378 
11379         case ';':
11380 #if TARGET_MACHO
11381           fputs (" ; ", file);
11382 #else
11383           fputc (' ', file);
11384 #endif
11385           return;
11386 
11387         default:
11388             output_operand_lossage ("invalid operand code '%c'", code);
11389         }
11390     }
11391 
11392   if (REG_P (x))
11393     print_reg (x, code, file);
11394 
11395   else if (MEM_P (x))
11396     {
11397       /* No `byte ptr' prefix for call instructions or BLKmode operands.  */
11398       if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P'
11399           && GET_MODE (x) != BLKmode)
11400         {
11401           const char * size;
11402           switch (GET_MODE_SIZE (GET_MODE (x)))
11403             {
11404             case 1: size = "BYTE"; break;
11405             case 2: size = "WORD"; break;
11406             case 4: size = "DWORD"; break;
11407             case 8: size = "QWORD"; break;
11408             case 12: size = "TBYTE"; break;
11409             case 16:
11410               if (GET_MODE (x) == XFmode)
11411                 size = "TBYTE";
11412               else
11413                 size = "XMMWORD";
11414               break;
11415             case 32: size = "YMMWORD"; break;
11416             default:
11417               gcc_unreachable ();
11418             }
11419 
11420           /* Check for explicit size override (codes 'b', 'w' and 'k')  */
11421           if (code == 'b')
11422             size = "BYTE";
11423           else if (code == 'w')
11424             size = "WORD";
11425           else if (code == 'k')
11426             size = "DWORD";
11427 
11428           fputs (size, file);
11429           fputs (" PTR ", file);
11430         }
11431 
11432       x = XEXP (x, 0);
11433       /* Avoid (%rip) for call operands.  */
11434       if (CONSTANT_ADDRESS_P (x) && code == 'P'
11435           && !CONST_INT_P (x))
11436         output_addr_const (file, x);
11437       else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
11438         output_operand_lossage ("invalid constraints for operand");
11439       else
11440         output_address (x);
11441     }
11442 
11443   else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
11444     {
11445       REAL_VALUE_TYPE r;
11446       long l;
11447 
11448       REAL_VALUE_FROM_CONST_DOUBLE (r, x);
11449       REAL_VALUE_TO_TARGET_SINGLE (r, l);
11450 
11451       if (ASSEMBLER_DIALECT == ASM_ATT)
11452         putc ('$', file);
11453       fprintf (file, "0x%08lx", (long unsigned int) l);
11454     }
11455 
11456   /* These float cases don't actually occur as immediate operands.  */
11457   else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
11458     {
11459       char dstr[30];
11460 
11461       real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
11462       fprintf (file, "%s", dstr);
11463     }
11464 
11465   else if (GET_CODE (x) == CONST_DOUBLE
11466            && GET_MODE (x) == XFmode)
11467     {
11468       char dstr[30];
11469 
11470       real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
11471       fprintf (file, "%s", dstr);
11472     }
11473 
11474   else
11475     {
11476       /* We have patterns that allow zero sets of memory, for instance.
11477          In 64-bit mode, we should probably support all 8-byte vectors,
11478          since we can in fact encode that into an immediate.  */
11479       if (GET_CODE (x) == CONST_VECTOR)
11480         {
11481           gcc_assert (x == CONST0_RTX (GET_MODE (x)));
11482           x = const0_rtx;
11483         }
11484 
11485       if (code != 'P')
11486         {
11487           if (CONST_INT_P (x) || GET_CODE (x) == CONST_DOUBLE)
11488             {
11489               if (ASSEMBLER_DIALECT == ASM_ATT)
11490                 putc ('$', file);
11491             }
11492           else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
11493                    || GET_CODE (x) == LABEL_REF)
11494             {
11495               if (ASSEMBLER_DIALECT == ASM_ATT)
11496                 putc ('$', file);
11497               else
11498                 fputs ("OFFSET FLAT:", file);
11499             }
11500         }
11501       if (CONST_INT_P (x))
11502         fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
11503       else if (flag_pic)
11504         output_pic_addr_const (file, x, code);
11505       else
11506         output_addr_const (file, x);
11507     }
11508 }
11509 
11510 /* Print a memory operand whose address is ADDR.  */
11511 
11512 void
11513 print_operand_address (FILE *file, rtx addr)
11514 {
11515   struct ix86_address parts;
11516   rtx base, index, disp;
11517   int scale;
11518   int ok = ix86_decompose_address (addr, &parts);
11519 
11520   gcc_assert (ok);
11521 
11522   base = parts.base;
11523   index = parts.index;
11524   disp = parts.disp;
11525   scale = parts.scale;
11526 
11527   switch (parts.seg)
11528     {
11529     case SEG_DEFAULT:
11530       break;
11531     case SEG_FS:
11532     case SEG_GS:
11533       if (ASSEMBLER_DIALECT == ASM_ATT)
11534         putc ('%', file);
11535       fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
11536       break;
11537     default:
11538       gcc_unreachable ();
11539     }
11540 
11541   /* Use one byte shorter RIP relative addressing for 64bit mode.  */
11542   if (TARGET_64BIT && !base && !index)
11543     {
11544       rtx symbol = disp;
11545 
11546       if (GET_CODE (disp) == CONST
11547           && GET_CODE (XEXP (disp, 0)) == PLUS
11548           && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
11549         symbol = XEXP (XEXP (disp, 0), 0);
11550 
11551       if (GET_CODE (symbol) == LABEL_REF
11552           || (GET_CODE (symbol) == SYMBOL_REF
11553               && SYMBOL_REF_TLS_MODEL (symbol) == 0))
11554         base = pc_rtx;
11555     }
11556   if (!base && !index)
11557     {
11558       /* Displacement only requires special attention.  */
11559 
11560       if (CONST_INT_P (disp))
11561         {
11562           if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
11563             fputs ("ds:", file);
11564           fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
11565         }
11566       else if (flag_pic)
11567         output_pic_addr_const (file, disp, 0);
11568       else
11569         output_addr_const (file, disp);
11570     }
11571   else
11572     {
11573       if (ASSEMBLER_DIALECT == ASM_ATT)
11574         {
11575           if (disp)
11576             {
11577               if (flag_pic)
11578                 output_pic_addr_const (file, disp, 0);
11579               else if (GET_CODE (disp) == LABEL_REF)
11580                 output_asm_label (disp);
11581               else
11582                 output_addr_const (file, disp);
11583             }
11584 
11585           putc ('(', file);
11586           if (base)
11587             print_reg (base, 0, file);
11588           if (index)
11589             {
11590               putc (',', file);
11591               print_reg (index, 0, file);
11592               if (scale != 1)
11593                 fprintf (file, ",%d", scale);
11594             }
11595           putc (')', file);
11596         }
11597       else
11598         {
11599           rtx offset = NULL_RTX;
11600 
11601           if (disp)
11602             {
11603               /* Pull out the offset of a symbol; print any symbol itself.  */
11604               if (GET_CODE (disp) == CONST
11605                   && GET_CODE (XEXP (disp, 0)) == PLUS
11606                   && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
11607                 {
11608                   offset = XEXP (XEXP (disp, 0), 1);
11609                   disp = gen_rtx_CONST (VOIDmode,
11610                                         XEXP (XEXP (disp, 0), 0));
11611                 }
11612 
11613               if (flag_pic)
11614                 output_pic_addr_const (file, disp, 0);
11615               else if (GET_CODE (disp) == LABEL_REF)
11616                 output_asm_label (disp);
11617               else if (CONST_INT_P (disp))
11618                 offset = disp;
11619               else
11620                 output_addr_const (file, disp);
11621             }
11622 
11623           putc ('[', file);
11624           if (base)
11625             {
11626               print_reg (base, 0, file);
11627               if (offset)
11628                 {
11629                   if (INTVAL (offset) >= 0)
11630                     putc ('+', file);
11631                   fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
11632                 }
11633             }
11634           else if (offset)
11635             fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
11636           else
11637             putc ('0', file);
11638 
11639           if (index)
11640             {
11641               putc ('+', file);
11642               print_reg (index, 0, file);
11643               if (scale != 1)
11644                 fprintf (file, "*%d", scale);
11645             }
11646           putc (']', file);
11647         }
11648     }
11649 }
11650 
11651 bool
11652 output_addr_const_extra (FILE *file, rtx x)
11653 {
11654   rtx op;
11655 
11656   if (GET_CODE (x) != UNSPEC)
11657     return false;
11658 
11659   op = XVECEXP (x, 0, 0);
11660   switch (XINT (x, 1))
11661     {
11662     case UNSPEC_GOTTPOFF:
11663       output_addr_const (file, op);
11664       /* FIXME: This might be @TPOFF in Sun ld.  */
11665       fputs ("@GOTTPOFF", file);
11666       break;
11667     case UNSPEC_TPOFF:
11668       output_addr_const (file, op);
11669       fputs ("@TPOFF", file);
11670       break;
11671     case UNSPEC_NTPOFF:
11672       output_addr_const (file, op);
11673       if (TARGET_64BIT)
11674         fputs ("@TPOFF", file);
11675       else
11676         fputs ("@NTPOFF", file);
11677       break;
11678     case UNSPEC_DTPOFF:
11679       output_addr_const (file, op);
11680       fputs ("@DTPOFF", file);
11681       break;
11682     case UNSPEC_GOTNTPOFF:
11683       output_addr_const (file, op);
11684       if (TARGET_64BIT)
11685         fputs (ASSEMBLER_DIALECT == ASM_ATT ?
11686                "@GOTTPOFF(%rip)" : "@GOTTPOFF[rip]", file);
11687       else
11688         fputs ("@GOTNTPOFF", file);
11689       break;
11690     case UNSPEC_INDNTPOFF:
11691       output_addr_const (file, op);
11692       fputs ("@INDNTPOFF", file);
11693       break;
11694 #if TARGET_MACHO
11695     case UNSPEC_MACHOPIC_OFFSET:
11696       output_addr_const (file, op);
11697       putc ('-', file);
11698       machopic_output_function_base_name (file);
11699       break;
11700 #endif
11701 
11702     default:
11703       return false;
11704     }
11705 
11706   return true;
11707 }
11708 
11709 /* Split one or more DImode RTL references into pairs of SImode
11710    references.  The RTL can be REG, offsettable MEM, integer constant, or
11711    CONST_DOUBLE.  "operands" is a pointer to an array of DImode RTL to
11712    split and "num" is its length.  lo_half and hi_half are output arrays
11713    that parallel "operands".  */
11714 
11715 void
11716 split_di (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
11717 {
11718   while (num--)
11719     {
11720       rtx op = operands[num];
11721 
11722       /* simplify_subreg refuse to split volatile memory addresses,
11723          but we still have to handle it.  */
11724       if (MEM_P (op))
11725         {
11726           lo_half[num] = adjust_address (op, SImode, 0);
11727           hi_half[num] = adjust_address (op, SImode, 4);
11728         }
11729       else
11730         {
11731           lo_half[num] = simplify_gen_subreg (SImode, op,
11732                                               GET_MODE (op) == VOIDmode
11733                                               ? DImode : GET_MODE (op), 0);
11734           hi_half[num] = simplify_gen_subreg (SImode, op,
11735                                               GET_MODE (op) == VOIDmode
11736                                               ? DImode : GET_MODE (op), 4);
11737         }
11738     }
11739 }
11740 /* Split one or more TImode RTL references into pairs of DImode
11741    references.  The RTL can be REG, offsettable MEM, integer constant, or
11742    CONST_DOUBLE.  "operands" is a pointer to an array of DImode RTL to
11743    split and "num" is its length.  lo_half and hi_half are output arrays
11744    that parallel "operands".  */
11745 
11746 void
11747 split_ti (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
11748 {
11749   while (num--)
11750     {
11751       rtx op = operands[num];
11752 
11753       /* simplify_subreg refuse to split volatile memory addresses, but we
11754          still have to handle it.  */
11755       if (MEM_P (op))
11756         {
11757           lo_half[num] = adjust_address (op, DImode, 0);
11758           hi_half[num] = adjust_address (op, DImode, 8);
11759         }
11760       else
11761         {
11762           lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
11763           hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
11764         }
11765     }
11766 }
11767 
11768 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
11769    MINUS, MULT or DIV.  OPERANDS are the insn operands, where operands[3]
11770    is the expression of the binary operation.  The output may either be
11771    emitted here, or returned to the caller, like all output_* functions.
11772 
11773    There is no guarantee that the operands are the same mode, as they
11774    might be within FLOAT or FLOAT_EXTEND expressions.  */
11775 
11776 #ifndef SYSV386_COMPAT
11777 /* Set to 1 for compatibility with brain-damaged assemblers.  No-one
11778    wants to fix the assemblers because that causes incompatibility
11779    with gcc.  No-one wants to fix gcc because that causes
11780    incompatibility with assemblers...  You can use the option of
11781    -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way.  */
11782 #define SYSV386_COMPAT 1
11783 #endif
11784 
11785 const char *
11786 output_387_binary_op (rtx insn, rtx *operands)
11787 {
11788   static char buf[40];
11789   const char *p;
11790   const char *ssep;
11791   int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
11792 
11793 #ifdef ENABLE_CHECKING
11794   /* Even if we do not want to check the inputs, this documents input
11795      constraints.  Which helps in understanding the following code.  */
11796   if (STACK_REG_P (operands[0])
11797       && ((REG_P (operands[1])
11798            && REGNO (operands[0]) == REGNO (operands[1])
11799            && (STACK_REG_P (operands[2]) || MEM_P (operands[2])))
11800           || (REG_P (operands[2])
11801               && REGNO (operands[0]) == REGNO (operands[2])
11802               && (STACK_REG_P (operands[1]) || MEM_P (operands[1]))))
11803       && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
11804     ; /* ok */
11805   else
11806     gcc_assert (is_sse);
11807 #endif
11808 
11809   switch (GET_CODE (operands[3]))
11810     {
11811     case PLUS:
11812       if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
11813           || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
11814         p = "fiadd";
11815       else
11816         p = "fadd";
11817       ssep = "vadd";
11818       break;
11819 
11820     case MINUS:
11821       if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
11822           || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
11823         p = "fisub";
11824       else
11825         p = "fsub";
11826       ssep = "vsub";
11827       break;
11828 
11829     case MULT:
11830       if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
11831           || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
11832         p = "fimul";
11833       else
11834         p = "fmul";
11835       ssep = "vmul";
11836       break;
11837 
11838     case DIV:
11839       if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
11840           || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
11841         p = "fidiv";
11842       else
11843         p = "fdiv";
11844       ssep = "vdiv";
11845       break;
11846 
11847     default:
11848       gcc_unreachable ();
11849     }
11850 
11851   if (is_sse)
11852    {
11853      if (TARGET_AVX)
11854        {
11855          strcpy (buf, ssep);
11856          if (GET_MODE (operands[0]) == SFmode)
11857            strcat (buf, "ss\t{%2, %1, %0|%0, %1, %2}");
11858          else
11859            strcat (buf, "sd\t{%2, %1, %0|%0, %1, %2}");
11860        }
11861      else
11862        {
11863          strcpy (buf, ssep + 1);
11864          if (GET_MODE (operands[0]) == SFmode)
11865            strcat (buf, "ss\t{%2, %0|%0, %2}");
11866          else
11867            strcat (buf, "sd\t{%2, %0|%0, %2}");
11868        }
11869       return buf;
11870    }
11871   strcpy (buf, p);
11872 
11873   switch (GET_CODE (operands[3]))
11874     {
11875     case MULT:
11876     case PLUS:
11877       if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
11878         {
11879           rtx temp = operands[2];
11880           operands[2] = operands[1];
11881           operands[1] = temp;
11882         }
11883 
11884       /* know operands[0] == operands[1].  */
11885 
11886       if (MEM_P (operands[2]))
11887         {
11888           p = "%z2\t%2";
11889           break;
11890         }
11891 
11892       if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
11893         {
11894           if (STACK_TOP_P (operands[0]))
11895             /* How is it that we are storing to a dead operand[2]?
11896                Well, presumably operands[1] is dead too.  We can't
11897                store the result to st(0) as st(0) gets popped on this
11898                instruction.  Instead store to operands[2] (which I
11899                think has to be st(1)).  st(1) will be popped later.
11900                gcc <= 2.8.1 didn't have this check and generated
11901                assembly code that the Unixware assembler rejected.  */
11902             p = "p\t{%0, %2|%2, %0}";   /* st(1) = st(0) op st(1); pop */
11903           else
11904             p = "p\t{%2, %0|%0, %2}";   /* st(r1) = st(r1) op st(0); pop */
11905           break;
11906         }
11907 
11908       if (STACK_TOP_P (operands[0]))
11909         p = "\t{%y2, %0|%0, %y2}";      /* st(0) = st(0) op st(r2) */
11910       else
11911         p = "\t{%2, %0|%0, %2}";        /* st(r1) = st(r1) op st(0) */
11912       break;
11913 
11914     case MINUS:
11915     case DIV:
11916       if (MEM_P (operands[1]))
11917         {
11918           p = "r%z1\t%1";
11919           break;
11920         }
11921 
11922       if (MEM_P (operands[2]))
11923         {
11924           p = "%z2\t%2";
11925           break;
11926         }
11927 
11928       if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
11929         {
11930 #if SYSV386_COMPAT
11931           /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
11932              derived assemblers, confusingly reverse the direction of
11933              the operation for fsub{r} and fdiv{r} when the
11934              destination register is not st(0).  The Intel assembler
11935              doesn't have this brain damage.  Read !SYSV386_COMPAT to
11936              figure out what the hardware really does.  */
11937           if (STACK_TOP_P (operands[0]))
11938             p = "{p\t%0, %2|rp\t%2, %0}";
11939           else
11940             p = "{rp\t%2, %0|p\t%0, %2}";
11941 #else
11942           if (STACK_TOP_P (operands[0]))
11943             /* As above for fmul/fadd, we can't store to st(0).  */
11944             p = "rp\t{%0, %2|%2, %0}";  /* st(1) = st(0) op st(1); pop */
11945           else
11946             p = "p\t{%2, %0|%0, %2}";   /* st(r1) = st(r1) op st(0); pop */
11947 #endif
11948           break;
11949         }
11950 
11951       if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
11952         {
11953 #if SYSV386_COMPAT
11954           if (STACK_TOP_P (operands[0]))
11955             p = "{rp\t%0, %1|p\t%1, %0}";
11956           else
11957             p = "{p\t%1, %0|rp\t%0, %1}";
11958 #else
11959           if (STACK_TOP_P (operands[0]))
11960             p = "p\t{%0, %1|%1, %0}";   /* st(1) = st(1) op st(0); pop */
11961           else
11962             p = "rp\t{%1, %0|%0, %1}";  /* st(r2) = st(0) op st(r2); pop */
11963 #endif
11964           break;
11965         }
11966 
11967       if (STACK_TOP_P (operands[0]))
11968         {
11969           if (STACK_TOP_P (operands[1]))
11970             p = "\t{%y2, %0|%0, %y2}";  /* st(0) = st(0) op st(r2) */
11971           else
11972             p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
11973           break;
11974         }
11975       else if (STACK_TOP_P (operands[1]))
11976         {
11977 #if SYSV386_COMPAT
11978           p = "{\t%1, %0|r\t%0, %1}";
11979 #else
11980           p = "r\t{%1, %0|%0, %1}";     /* st(r2) = st(0) op st(r2) */
11981 #endif
11982         }
11983       else
11984         {
11985 #if SYSV386_COMPAT
11986           p = "{r\t%2, %0|\t%0, %2}";
11987 #else
11988           p = "\t{%2, %0|%0, %2}";      /* st(r1) = st(r1) op st(0) */
11989 #endif
11990         }
11991       break;
11992 
11993     default:
11994       gcc_unreachable ();
11995     }
11996 
11997   strcat (buf, p);
11998   return buf;
11999 }
12000 
12001 /* Return needed mode for entity in optimize_mode_switching pass.  */
12002 
12003 int
12004 ix86_mode_needed (int entity, rtx insn)
12005 {
12006   enum attr_i387_cw mode;
12007 
12008   /* The mode UNINITIALIZED is used to store control word after a
12009      function call or ASM pattern.  The mode ANY specify that function
12010      has no requirements on the control word and make no changes in the
12011      bits we are interested in.  */
12012 
12013   if (CALL_P (insn)
12014       || (NONJUMP_INSN_P (insn)
12015           && (asm_noperands (PATTERN (insn)) >= 0
12016               || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
12017     return I387_CW_UNINITIALIZED;
12018 
12019   if (recog_memoized (insn) < 0)
12020     return I387_CW_ANY;
12021 
12022   mode = get_attr_i387_cw (insn);
12023 
12024   switch (entity)
12025     {
12026     case I387_TRUNC:
12027       if (mode == I387_CW_TRUNC)
12028         return mode;
12029       break;
12030 
12031     case I387_FLOOR:
12032       if (mode == I387_CW_FLOOR)
12033         return mode;
12034       break;
12035 
12036     case I387_CEIL:
12037       if (mode == I387_CW_CEIL)
12038         return mode;
12039       break;
12040 
12041     case I387_MASK_PM:
12042       if (mode == I387_CW_MASK_PM)
12043         return mode;
12044       break;
12045 
12046     default:
12047       gcc_unreachable ();
12048     }
12049 
12050   return I387_CW_ANY;
12051 }
12052 
12053 /* Output code to initialize control word copies used by trunc?f?i and
12054    rounding patterns.  CURRENT_MODE is set to current control word,
12055    while NEW_MODE is set to new control word.  */
12056 
12057 void
12058 emit_i387_cw_initialization (int mode)
12059 {
12060   rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
12061   rtx new_mode;
12062 
12063   enum ix86_stack_slot slot;
12064 
12065   rtx reg = gen_reg_rtx (HImode);
12066 
12067   emit_insn (gen_x86_fnstcw_1 (stored_mode));
12068   emit_move_insn (reg, copy_rtx (stored_mode));
12069 
12070   if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL
12071       || optimize_function_for_size_p (cfun))
12072     {
12073       switch (mode)
12074         {
12075         case I387_CW_TRUNC:
12076           /* round toward zero (truncate) */
12077           emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
12078           slot = SLOT_CW_TRUNC;
12079           break;
12080 
12081         case I387_CW_FLOOR:
12082           /* round down toward -oo */
12083           emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
12084           emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
12085           slot = SLOT_CW_FLOOR;
12086           break;
12087 
12088         case I387_CW_CEIL:
12089           /* round up toward +oo */
12090           emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
12091           emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
12092           slot = SLOT_CW_CEIL;
12093           break;
12094 
12095         case I387_CW_MASK_PM:
12096           /* mask precision exception for nearbyint() */
12097           emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
12098           slot = SLOT_CW_MASK_PM;
12099           break;
12100 
12101         default:
12102           gcc_unreachable ();
12103         }
12104     }
12105   else
12106     {
12107       switch (mode)
12108         {
12109         case I387_CW_TRUNC:
12110           /* round toward zero (truncate) */
12111           emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
12112           slot = SLOT_CW_TRUNC;
12113           break;
12114 
12115         case I387_CW_FLOOR:
12116           /* round down toward -oo */
12117           emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4)));
12118           slot = SLOT_CW_FLOOR;
12119           break;
12120 
12121         case I387_CW_CEIL:
12122           /* round up toward +oo */
12123           emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8)));
12124           slot = SLOT_CW_CEIL;
12125           break;
12126 
12127         case I387_CW_MASK_PM:
12128           /* mask precision exception for nearbyint() */
12129           emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
12130           slot = SLOT_CW_MASK_PM;
12131           break;
12132 
12133         default:
12134           gcc_unreachable ();
12135         }
12136     }
12137 
12138   gcc_assert (slot < MAX_386_STACK_LOCALS);
12139 
12140   new_mode = assign_386_stack_local (HImode, slot);
12141   emit_move_insn (new_mode, reg);
12142 }
12143 
12144 /* Output code for INSN to convert a float to a signed int.  OPERANDS
12145    are the insn operands.  The output may be [HSD]Imode and the input
12146    operand may be [SDX]Fmode.  */
12147 
12148 const char *
12149 output_fix_trunc (rtx insn, rtx *operands, int fisttp)
12150 {
12151   int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
12152   int dimode_p = GET_MODE (operands[0]) == DImode;
12153   int round_mode = get_attr_i387_cw (insn);
12154 
12155   /* Jump through a hoop or two for DImode, since the hardware has no
12156      non-popping instruction.  We used to do this a different way, but
12157      that was somewhat fragile and broke with post-reload splitters.  */
12158   if ((dimode_p || fisttp) && !stack_top_dies)
12159     output_asm_insn ("fld\t%y1", operands);
12160 
12161   gcc_assert (STACK_TOP_P (operands[1]));
12162   gcc_assert (MEM_P (operands[0]));
12163   gcc_assert (GET_MODE (operands[1]) != TFmode);
12164 
12165   if (fisttp)
12166       output_asm_insn ("fisttp%z0\t%0", operands);
12167   else
12168     {
12169       if (round_mode != I387_CW_ANY)
12170         output_asm_insn ("fldcw\t%3", operands);
12171       if (stack_top_dies || dimode_p)
12172         output_asm_insn ("fistp%z0\t%0", operands);
12173       else
12174         output_asm_insn ("fist%z0\t%0", operands);
12175       if (round_mode != I387_CW_ANY)
12176         output_asm_insn ("fldcw\t%2", operands);
12177     }
12178 
12179   return "";
12180 }
12181 
12182 /* Output code for x87 ffreep insn.  The OPNO argument, which may only
12183    have the values zero or one, indicates the ffreep insn's operand
12184    from the OPERANDS array.  */
12185 
12186 static const char *
12187 output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
12188 {
12189   if (TARGET_USE_FFREEP)
12190 #ifdef HAVE_AS_IX86_FFREEP
12191     return opno ? "ffreep\t%y1" : "ffreep\t%y0";
12192 #else
12193     {
12194       static char retval[32];
12195       int regno = REGNO (operands[opno]);
12196 
12197       gcc_assert (FP_REGNO_P (regno));
12198 
12199       regno -= FIRST_STACK_REG;
12200 
12201       snprintf (retval, sizeof (retval), ASM_SHORT "0xc%ddf", regno);
12202       return retval;
12203     }
12204 #endif
12205 
12206   return opno ? "fstp\t%y1" : "fstp\t%y0";
12207 }
12208 
12209 
12210 /* Output code for INSN to compare OPERANDS.  EFLAGS_P is 1 when fcomi
12211    should be used.  UNORDERED_P is true when fucom should be used.  */
12212 
12213 const char *
12214 output_fp_compare (rtx insn, rtx *operands, int eflags_p, int unordered_p)
12215 {
12216   int stack_top_dies;
12217   rtx cmp_op0, cmp_op1;
12218   int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]);
12219 
12220   if (eflags_p)
12221     {
12222       cmp_op0 = operands[0];
12223       cmp_op1 = operands[1];
12224     }
12225   else
12226     {
12227       cmp_op0 = operands[1];
12228       cmp_op1 = operands[2];
12229     }
12230 
12231   if (is_sse)
12232     {
12233       static const char ucomiss[] = "vucomiss\t{%1, %0|%0, %1}";
12234       static const char ucomisd[] = "vucomisd\t{%1, %0|%0, %1}";
12235       static const char comiss[] = "vcomiss\t{%1, %0|%0, %1}";
12236       static const char comisd[] = "vcomisd\t{%1, %0|%0, %1}";
12237 
12238       if (GET_MODE (operands[0]) == SFmode)
12239         if (unordered_p)
12240           return &ucomiss[TARGET_AVX ? 0 : 1];
12241         else
12242           return &comiss[TARGET_AVX ? 0 : 1];
12243       else
12244         if (unordered_p)
12245           return &ucomisd[TARGET_AVX ? 0 : 1];
12246         else
12247           return &comisd[TARGET_AVX ? 0 : 1];
12248     }
12249 
12250   gcc_assert (STACK_TOP_P (cmp_op0));
12251 
12252   stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
12253 
12254   if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
12255     {
12256       if (stack_top_dies)
12257         {
12258           output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
12259           return output_387_ffreep (operands, 1);
12260         }
12261       else
12262         return "ftst\n\tfnstsw\t%0";
12263     }
12264 
12265   if (STACK_REG_P (cmp_op1)
12266       && stack_top_dies
12267       && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
12268       && REGNO (cmp_op1) != FIRST_STACK_REG)
12269     {
12270       /* If both the top of the 387 stack dies, and the other operand
12271          is also a stack register that dies, then this must be a
12272          `fcompp' float compare */
12273 
12274       if (eflags_p)
12275         {
12276           /* There is no double popping fcomi variant.  Fortunately,
12277              eflags is immune from the fstp's cc clobbering.  */
12278           if (unordered_p)
12279             output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
12280           else
12281             output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
12282           return output_387_ffreep (operands, 0);
12283         }
12284       else
12285         {
12286           if (unordered_p)
12287             return "fucompp\n\tfnstsw\t%0";
12288           else
12289             return "fcompp\n\tfnstsw\t%0";
12290         }
12291     }
12292   else
12293     {
12294       /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies.  */
12295 
12296       static const char * const alt[16] =
12297       {
12298         "fcom%z2\t%y2\n\tfnstsw\t%0",
12299         "fcomp%z2\t%y2\n\tfnstsw\t%0",
12300         "fucom%z2\t%y2\n\tfnstsw\t%0",
12301         "fucomp%z2\t%y2\n\tfnstsw\t%0",
12302 
12303         "ficom%z2\t%y2\n\tfnstsw\t%0",
12304         "ficomp%z2\t%y2\n\tfnstsw\t%0",
12305         NULL,
12306         NULL,
12307 
12308         "fcomi\t{%y1, %0|%0, %y1}",
12309         "fcomip\t{%y1, %0|%0, %y1}",
12310         "fucomi\t{%y1, %0|%0, %y1}",
12311         "fucomip\t{%y1, %0|%0, %y1}",
12312 
12313         NULL,
12314         NULL,
12315         NULL,
12316         NULL
12317       };
12318 
12319       int mask;
12320       const char *ret;
12321 
12322       mask  = eflags_p << 3;
12323       mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2;
12324       mask |= unordered_p << 1;
12325       mask |= stack_top_dies;
12326 
12327       gcc_assert (mask < 16);
12328       ret = alt[mask];
12329       gcc_assert (ret);
12330 
12331       return ret;
12332     }
12333 }
12334 
12335 void
12336 ix86_output_addr_vec_elt (FILE *file, int value)
12337 {
12338   const char *directive = ASM_LONG;
12339 
12340 #ifdef ASM_QUAD
12341   if (TARGET_64BIT)
12342     directive = ASM_QUAD;
12343 #else
12344   gcc_assert (!TARGET_64BIT);
12345 #endif
12346 
12347   fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
12348 }
12349 
12350 void
12351 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
12352 {
12353   const char *directive = ASM_LONG;
12354 
12355 #ifdef ASM_QUAD
12356   if (TARGET_64BIT && CASE_VECTOR_MODE == DImode)
12357     directive = ASM_QUAD;
12358 #else
12359   gcc_assert (!TARGET_64BIT);
12360 #endif
12361   /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand.  */
12362   if (TARGET_64BIT || TARGET_VXWORKS_RTP)
12363     fprintf (file, "%s%s%d-%s%d\n",
12364              directive, LPREFIX, value, LPREFIX, rel);
12365   else if (HAVE_AS_GOTOFF_IN_DATA)
12366     fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
12367 #if TARGET_MACHO
12368   else if (TARGET_MACHO)
12369     {
12370       fprintf (file, "%s%s%d-", ASM_LONG, LPREFIX, value);
12371       machopic_output_function_base_name (file);
12372       fprintf(file, "\n");
12373     }
12374 #endif
12375   else
12376     asm_fprintf (file, "%s%U%s+[.-%s%d]\n",
12377                  ASM_LONG, GOT_SYMBOL_NAME, LPREFIX, value);
12378 }
12379 
12380 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
12381    for the target.  */
12382 
12383 void
12384 ix86_expand_clear (rtx dest)
12385 {
12386   rtx tmp;
12387 
12388   /* We play register width games, which are only valid after reload.  */
12389   gcc_assert (reload_completed);
12390 
12391   /* Avoid HImode and its attendant prefix byte.  */
12392   if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
12393     dest = gen_rtx_REG (SImode, REGNO (dest));
12394   tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
12395 
12396   /* This predicate should match that for movsi_xor and movdi_xor_rex64.  */
12397   if (reload_completed && (!TARGET_USE_MOV0 || optimize_insn_for_speed_p ()))
12398     {
12399       rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
12400       tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
12401     }
12402 
12403   emit_insn (tmp);
12404 }
12405 
12406 /* X is an unchanging MEM.  If it is a constant pool reference, return
12407    the constant pool rtx, else NULL.  */
12408 
12409 rtx
12410 maybe_get_pool_constant (rtx x)
12411 {
12412   x = ix86_delegitimize_address (XEXP (x, 0));
12413 
12414   if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
12415     return get_pool_constant (x);
12416 
12417   return NULL_RTX;
12418 }
12419 
12420 void
12421 ix86_expand_move (enum machine_mode mode, rtx operands[])
12422 {
12423   rtx op0, op1;
12424   enum tls_model model;
12425 
12426   op0 = operands[0];
12427   op1 = operands[1];
12428 
12429   if (GET_CODE (op1) == SYMBOL_REF)
12430     {
12431       model = SYMBOL_REF_TLS_MODEL (op1);
12432       if (model)
12433         {
12434           op1 = legitimize_tls_address (op1, model, true);
12435           op1 = force_operand (op1, op0);
12436           if (op1 == op0)
12437             return;
12438         }
12439       else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
12440                && SYMBOL_REF_DLLIMPORT_P (op1))
12441         op1 = legitimize_dllimport_symbol (op1, false);
12442     }
12443   else if (GET_CODE (op1) == CONST
12444            && GET_CODE (XEXP (op1, 0)) == PLUS
12445            && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF)
12446     {
12447       rtx addend = XEXP (XEXP (op1, 0), 1);
12448       rtx symbol = XEXP (XEXP (op1, 0), 0);
12449       rtx tmp = NULL;
12450 
12451       model = SYMBOL_REF_TLS_MODEL (symbol);
12452       if (model)
12453         tmp = legitimize_tls_address (symbol, model, true);
12454       else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
12455                && SYMBOL_REF_DLLIMPORT_P (symbol))
12456         tmp = legitimize_dllimport_symbol (symbol, true);
12457 
12458       if (tmp)
12459         {
12460           tmp = force_operand (tmp, NULL);
12461           tmp = expand_simple_binop (Pmode, PLUS, tmp, addend,
12462                                      op0, 1, OPTAB_DIRECT);
12463           if (tmp == op0)
12464             return;
12465         }
12466     }
12467 
12468   if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
12469     {
12470       if (TARGET_MACHO && !TARGET_64BIT)
12471         {
12472 #if TARGET_MACHO
12473           if (MACHOPIC_PURE)
12474             {
12475               rtx temp = ((reload_in_progress
12476                            || ((op0 && REG_P (op0))
12477                                && mode == Pmode))
12478                           ? op0 : gen_reg_rtx (Pmode));
12479               op1 = machopic_indirect_data_reference (op1, temp);
12480               op1 = machopic_legitimize_pic_address (op1, mode,
12481                                                      temp == op1 ? 0 : temp);
12482             }
12483           else if (MACHOPIC_INDIRECT)
12484             op1 = machopic_indirect_data_reference (op1, 0);
12485           if (op0 == op1)
12486             return;
12487 #endif
12488         }
12489       else
12490         {
12491           if (MEM_P (op0))
12492             op1 = force_reg (Pmode, op1);
12493           else if (!TARGET_64BIT || !x86_64_movabs_operand (op1, Pmode))
12494             {
12495               rtx reg = !can_create_pseudo_p () ? op0 : NULL_RTX;
12496               op1 = legitimize_pic_address (op1, reg);
12497               if (op0 == op1)
12498                 return;
12499             }
12500         }
12501     }
12502   else
12503     {
12504       if (MEM_P (op0)
12505           && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
12506               || !push_operand (op0, mode))
12507           && MEM_P (op1))
12508         op1 = force_reg (mode, op1);
12509 
12510       if (push_operand (op0, mode)
12511           && ! general_no_elim_operand (op1, mode))
12512         op1 = copy_to_mode_reg (mode, op1);
12513 
12514       /* Force large constants in 64bit compilation into register
12515          to get them CSEed.  */
12516       if (can_create_pseudo_p ()
12517           && (mode == DImode) && TARGET_64BIT
12518           && immediate_operand (op1, mode)
12519           && !x86_64_zext_immediate_operand (op1, VOIDmode)
12520           && !register_operand (op0, mode)
12521           && optimize)
12522         op1 = copy_to_mode_reg (mode, op1);
12523 
12524       if (can_create_pseudo_p ()
12525           && FLOAT_MODE_P (mode)
12526           && GET_CODE (op1) == CONST_DOUBLE)
12527         {
12528           /* If we are loading a floating point constant to a register,
12529              force the value to memory now, since we'll get better code
12530              out the back end.  */
12531 
12532           op1 = validize_mem (force_const_mem (mode, op1));
12533           if (!register_operand (op0, mode))
12534             {
12535               rtx temp = gen_reg_rtx (mode);
12536               emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
12537               emit_move_insn (op0, temp);
12538               return;
12539             }
12540         }
12541     }
12542 
12543   emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
12544 }
12545 
12546 void
12547 ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
12548 {
12549   rtx op0 = operands[0], op1 = operands[1];
12550   unsigned int align = GET_MODE_ALIGNMENT (mode);
12551 
12552   /* Force constants other than zero into memory.  We do not know how
12553      the instructions used to build constants modify the upper 64 bits
12554      of the register, once we have that information we may be able
12555      to handle some of them more efficiently.  */
12556   if (can_create_pseudo_p ()
12557       && register_operand (op0, mode)
12558       && (CONSTANT_P (op1)
12559           || (GET_CODE (op1) == SUBREG
12560               && CONSTANT_P (SUBREG_REG (op1))))
12561       && standard_sse_constant_p (op1) <= 0)
12562     op1 = validize_mem (force_const_mem (mode, op1));
12563 
12564   /* We need to check memory alignment for SSE mode since attribute
12565      can make operands unaligned.  */
12566   if (can_create_pseudo_p ()
12567       && SSE_REG_MODE_P (mode)
12568       && ((MEM_P (op0) && (MEM_ALIGN (op0) < align))
12569           || (MEM_P (op1) && (MEM_ALIGN (op1) < align))))
12570     {
12571       rtx tmp[2];
12572 
12573       /* ix86_expand_vector_move_misalign() does not like constants ... */
12574       if (CONSTANT_P (op1)
12575           || (GET_CODE (op1) == SUBREG
12576               && CONSTANT_P (SUBREG_REG (op1))))
12577         op1 = validize_mem (force_const_mem (mode, op1));
12578 
12579       /* ... nor both arguments in memory.  */
12580       if (!register_operand (op0, mode)
12581           && !register_operand (op1, mode))
12582         op1 = force_reg (mode, op1);
12583 
12584       tmp[0] = op0; tmp[1] = op1;
12585       ix86_expand_vector_move_misalign (mode, tmp);
12586       return;
12587     }
12588 
12589   /* Make operand1 a register if it isn't already.  */
12590   if (can_create_pseudo_p ()
12591       && !register_operand (op0, mode)
12592       && !register_operand (op1, mode))
12593     {
12594       emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
12595       return;
12596     }
12597 
12598   emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
12599 }
12600 
12601 /* Implement the movmisalign patterns for SSE.  Non-SSE modes go
12602    straight to ix86_expand_vector_move.  */
12603 /* Code generation for scalar reg-reg moves of single and double precision data:
12604      if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
12605        movaps reg, reg
12606      else
12607        movss reg, reg
12608      if (x86_sse_partial_reg_dependency == true)
12609        movapd reg, reg
12610      else
12611        movsd reg, reg
12612 
12613    Code generation for scalar loads of double precision data:
12614      if (x86_sse_split_regs == true)
12615        movlpd mem, reg      (gas syntax)
12616      else
12617        movsd mem, reg
12618 
12619    Code generation for unaligned packed loads of single precision data
12620    (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
12621      if (x86_sse_unaligned_move_optimal)
12622        movups mem, reg
12623 
12624      if (x86_sse_partial_reg_dependency == true)
12625        {
12626          xorps  reg, reg
12627          movlps mem, reg
12628          movhps mem+8, reg
12629        }
12630      else
12631        {
12632          movlps mem, reg
12633          movhps mem+8, reg
12634        }
12635 
12636    Code generation for unaligned packed loads of double precision data
12637    (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
12638      if (x86_sse_unaligned_move_optimal)
12639        movupd mem, reg
12640 
12641      if (x86_sse_split_regs == true)
12642        {
12643          movlpd mem, reg
12644          movhpd mem+8, reg
12645        }
12646      else
12647        {
12648          movsd  mem, reg
12649          movhpd mem+8, reg
12650        }
12651  */
12652 
12653 void
12654 ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
12655 {
12656   rtx op0, op1, m;
12657 
12658   op0 = operands[0];
12659   op1 = operands[1];
12660 
12661   if (TARGET_AVX)
12662     {
12663       switch (GET_MODE_CLASS (mode))
12664         {
12665         case MODE_VECTOR_INT:
12666         case MODE_INT:
12667           switch (GET_MODE_SIZE (mode))
12668             {
12669             case 16:
12670               op0 = gen_lowpart (V16QImode, op0);
12671               op1 = gen_lowpart (V16QImode, op1);
12672               emit_insn (gen_avx_movdqu (op0, op1));
12673               break;
12674             case 32:
12675               op0 = gen_lowpart (V32QImode, op0);
12676               op1 = gen_lowpart (V32QImode, op1);
12677               emit_insn (gen_avx_movdqu256 (op0, op1));
12678               break;
12679             default:
12680               gcc_unreachable ();
12681             }
12682           break;
12683         case MODE_VECTOR_FLOAT:
12684           op0 = gen_lowpart (mode, op0);
12685           op1 = gen_lowpart (mode, op1);
12686 
12687           switch (mode)
12688             { 
12689             case V4SFmode:
12690               emit_insn (gen_avx_movups (op0, op1));
12691               break;
12692             case V8SFmode:
12693               emit_insn (gen_avx_movups256 (op0, op1));
12694               break;
12695             case V2DFmode:
12696               emit_insn (gen_avx_movupd (op0, op1));
12697               break;
12698             case V4DFmode:
12699               emit_insn (gen_avx_movupd256 (op0, op1));
12700               break;
12701             default:
12702               gcc_unreachable ();
12703             }
12704           break;
12705 
12706         default:
12707           gcc_unreachable ();
12708         }
12709 
12710       return;
12711     }
12712 
12713   if (MEM_P (op1))
12714     {
12715       /* If we're optimizing for size, movups is the smallest.  */
12716       if (optimize_insn_for_size_p ())
12717         {
12718           op0 = gen_lowpart (V4SFmode, op0);
12719           op1 = gen_lowpart (V4SFmode, op1);
12720           emit_insn (gen_sse_movups (op0, op1));
12721           return;
12722         }
12723 
12724       /* ??? If we have typed data, then it would appear that using
12725          movdqu is the only way to get unaligned data loaded with
12726          integer type.  */
12727       if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
12728         {
12729           op0 = gen_lowpart (V16QImode, op0);
12730           op1 = gen_lowpart (V16QImode, op1);
12731           emit_insn (gen_sse2_movdqu (op0, op1));
12732           return;
12733         }
12734 
12735       if (TARGET_SSE2 && mode == V2DFmode)
12736         {
12737           rtx zero;
12738 
12739           if (TARGET_SSE_UNALIGNED_MOVE_OPTIMAL)
12740             {
12741               op0 = gen_lowpart (V2DFmode, op0);
12742               op1 = gen_lowpart (V2DFmode, op1);
12743               emit_insn (gen_sse2_movupd (op0, op1));
12744               return;
12745             }
12746 
12747           /* When SSE registers are split into halves, we can avoid
12748              writing to the top half twice.  */
12749           if (TARGET_SSE_SPLIT_REGS)
12750             {
12751               emit_clobber (op0);
12752               zero = op0;
12753             }
12754           else
12755             {
12756               /* ??? Not sure about the best option for the Intel chips.
12757                  The following would seem to satisfy; the register is
12758                  entirely cleared, breaking the dependency chain.  We
12759                  then store to the upper half, with a dependency depth
12760                  of one.  A rumor has it that Intel recommends two movsd
12761                  followed by an unpacklpd, but this is unconfirmed.  And
12762                  given that the dependency depth of the unpacklpd would
12763                  still be one, I'm not sure why this would be better.  */
12764               zero = CONST0_RTX (V2DFmode);
12765             }
12766 
12767           m = adjust_address (op1, DFmode, 0);
12768           emit_insn (gen_sse2_loadlpd (op0, zero, m));
12769           m = adjust_address (op1, DFmode, 8);
12770           emit_insn (gen_sse2_loadhpd (op0, op0, m));
12771         }
12772       else
12773         {
12774           if (TARGET_SSE_UNALIGNED_MOVE_OPTIMAL)
12775             {
12776               op0 = gen_lowpart (V4SFmode, op0);
12777               op1 = gen_lowpart (V4SFmode, op1);
12778               emit_insn (gen_sse_movups (op0, op1));
12779               return;
12780             }
12781 
12782           if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
12783             emit_move_insn (op0, CONST0_RTX (mode));
12784           else
12785             emit_clobber (op0);
12786 
12787           if (mode != V4SFmode)
12788             op0 = gen_lowpart (V4SFmode, op0);
12789           m = adjust_address (op1, V2SFmode, 0);
12790           emit_insn (gen_sse_loadlps (op0, op0, m));
12791           m = adjust_address (op1, V2SFmode, 8);
12792           emit_insn (gen_sse_loadhps (op0, op0, m));
12793         }
12794     }
12795   else if (MEM_P (op0))
12796     {
12797       /* If we're optimizing for size, movups is the smallest.  */
12798       if (optimize_insn_for_size_p ())
12799         {
12800           op0 = gen_lowpart (V4SFmode, op0);
12801           op1 = gen_lowpart (V4SFmode, op1);
12802           emit_insn (gen_sse_movups (op0, op1));
12803           return;
12804         }
12805 
12806       /* ??? Similar to above, only less clear because of quote
12807          typeless stores unquote.  */
12808       if (TARGET_SSE2 && !TARGET_SSE_TYPELESS_STORES
12809           && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
12810         {
12811           op0 = gen_lowpart (V16QImode, op0);
12812           op1 = gen_lowpart (V16QImode, op1);
12813           emit_insn (gen_sse2_movdqu (op0, op1));
12814           return;
12815         }
12816 
12817       if (TARGET_SSE2 && mode == V2DFmode)
12818         {
12819           m = adjust_address (op0, DFmode, 0);
12820           emit_insn (gen_sse2_storelpd (m, op1));
12821           m = adjust_address (op0, DFmode, 8);
12822           emit_insn (gen_sse2_storehpd (m, op1));
12823         }
12824       else
12825         {
12826           if (mode != V4SFmode)
12827             op1 = gen_lowpart (V4SFmode, op1);
12828           m = adjust_address (op0, V2SFmode, 0);
12829           emit_insn (gen_sse_storelps (m, op1));
12830           m = adjust_address (op0, V2SFmode, 8);
12831           emit_insn (gen_sse_storehps (m, op1));
12832         }
12833     }
12834   else
12835     gcc_unreachable ();
12836 }
12837 
12838 /* Expand a push in MODE.  This is some mode for which we do not support
12839    proper push instructions, at least from the registers that we expect
12840    the value to live in.  */
12841 
12842 void
12843 ix86_expand_push (enum machine_mode mode, rtx x)
12844 {
12845   rtx tmp;
12846 
12847   tmp = expand_simple_binop (Pmode, PLUS, stack_pointer_rtx,
12848                              GEN_INT (-GET_MODE_SIZE (mode)),
12849                              stack_pointer_rtx, 1, OPTAB_DIRECT);
12850   if (tmp != stack_pointer_rtx)
12851     emit_move_insn (stack_pointer_rtx, tmp);
12852 
12853   tmp = gen_rtx_MEM (mode, stack_pointer_rtx);
12854 
12855   /* When we push an operand onto stack, it has to be aligned at least
12856      at the function argument boundary.  However since we don't have
12857      the argument type, we can't determine the actual argument
12858      boundary.  */
12859   emit_move_insn (tmp, x);
12860 }
12861 
12862 /* Helper function of ix86_fixup_binary_operands to canonicalize
12863    operand order.  Returns true if the operands should be swapped.  */
12864 
12865 static bool
12866 ix86_swap_binary_operands_p (enum rtx_code code, enum machine_mode mode,
12867                              rtx operands[])
12868 {
12869   rtx dst = operands[0];
12870   rtx src1 = operands[1];
12871   rtx src2 = operands[2];
12872 
12873   /* If the operation is not commutative, we can't do anything.  */
12874   if (GET_RTX_CLASS (code) != RTX_COMM_ARITH)
12875     return false;
12876 
12877   /* Highest priority is that src1 should match dst.  */
12878   if (rtx_equal_p (dst, src1))
12879     return false;
12880   if (rtx_equal_p (dst, src2))
12881     return true;
12882 
12883   /* Next highest priority is that immediate constants come second.  */
12884   if (immediate_operand (src2, mode))
12885     return false;
12886   if (immediate_operand (src1, mode))
12887     return true;
12888 
12889   /* Lowest priority is that memory references should come second.  */
12890   if (MEM_P (src2))
12891     return false;
12892   if (MEM_P (src1))
12893     return true;
12894 
12895   return false;
12896 }
12897 
12898 
12899 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok.  Return the
12900    destination to use for the operation.  If different from the true
12901    destination in operands[0], a copy operation will be required.  */
12902 
12903 rtx
12904 ix86_fixup_binary_operands (enum rtx_code code, enum machine_mode mode,
12905                             rtx operands[])
12906 {
12907   rtx dst = operands[0];
12908   rtx src1 = operands[1];
12909   rtx src2 = operands[2];
12910 
12911   /* Canonicalize operand order.  */
12912   if (ix86_swap_binary_operands_p (code, mode, operands))
12913     {
12914       rtx temp;
12915 
12916       /* It is invalid to swap operands of different modes.  */
12917       gcc_assert (GET_MODE (src1) == GET_MODE (src2));
12918 
12919       temp = src1;
12920       src1 = src2;
12921       src2 = temp;
12922     }
12923 
12924   /* Both source operands cannot be in memory.  */
12925   if (MEM_P (src1) && MEM_P (src2))
12926     {
12927       /* Optimization: Only read from memory once.  */
12928       if (rtx_equal_p (src1, src2))
12929         {
12930           src2 = force_reg (mode, src2);
12931           src1 = src2;
12932         }
12933       else
12934         src2 = force_reg (mode, src2);
12935     }
12936 
12937   /* If the destination is memory, and we do not have matching source
12938      operands, do things in registers.  */
12939   if (MEM_P (dst) && !rtx_equal_p (dst, src1))
12940     dst = gen_reg_rtx (mode);
12941 
12942   /* Source 1 cannot be a constant.  */
12943   if (CONSTANT_P (src1))
12944     src1 = force_reg (mode, src1);
12945 
12946   /* Source 1 cannot be a non-matching memory.  */
12947   if (MEM_P (src1) && !rtx_equal_p (dst, src1))
12948     src1 = force_reg (mode, src1);
12949 
12950   operands[1] = src1;
12951   operands[2] = src2;
12952   return dst;
12953 }
12954 
12955 /* Similarly, but assume that the destination has already been
12956    set up properly.  */
12957 
12958 void
12959 ix86_fixup_binary_operands_no_copy (enum rtx_code code,
12960                                     enum machine_mode mode, rtx operands[])
12961 {
12962   rtx dst = ix86_fixup_binary_operands (code, mode, operands);
12963   gcc_assert (dst == operands[0]);
12964 }
12965 
12966 /* Attempt to expand a binary operator.  Make the expansion closer to the
12967    actual machine, then just general_operand, which will allow 3 separate
12968    memory references (one output, two input) in a single insn.  */
12969 
12970 void
12971 ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
12972                              rtx operands[])
12973 {
12974   rtx src1, src2, dst, op, clob;
12975 
12976   dst = ix86_fixup_binary_operands (code, mode, operands);
12977   src1 = operands[1];
12978   src2 = operands[2];
12979 
12980  /* Emit the instruction.  */
12981 
12982   op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
12983   if (reload_in_progress)
12984     {
12985       /* Reload doesn't know about the flags register, and doesn't know that
12986          it doesn't want to clobber it.  We can only do this with PLUS.  */
12987       gcc_assert (code == PLUS);
12988       emit_insn (op);
12989     }
12990   else
12991     {
12992       clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
12993       emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
12994     }
12995 
12996   /* Fix up the destination if needed.  */
12997   if (dst != operands[0])
12998     emit_move_insn (operands[0], dst);
12999 }
13000 
13001 /* Return TRUE or FALSE depending on whether the binary operator meets the
13002    appropriate constraints.  */
13003 
13004 int
13005 ix86_binary_operator_ok (enum rtx_code code, enum machine_mode mode,
13006                          rtx operands[3])
13007 {
13008   rtx dst = operands[0];
13009   rtx src1 = operands[1];
13010   rtx src2 = operands[2];
13011 
13012   /* Both source operands cannot be in memory.  */
13013   if (MEM_P (src1) && MEM_P (src2))
13014     return 0;
13015 
13016   /* Canonicalize operand order for commutative operators.  */
13017   if (ix86_swap_binary_operands_p (code, mode, operands))
13018     {
13019       rtx temp = src1;
13020       src1 = src2;
13021       src2 = temp;
13022     }
13023 
13024   /* If the destination is memory, we must have a matching source operand.  */
13025   if (MEM_P (dst) && !rtx_equal_p (dst, src1))
13026       return 0;
13027 
13028   /* Source 1 cannot be a constant.  */
13029   if (CONSTANT_P (src1))
13030     return 0;
13031 
13032   /* Source 1 cannot be a non-matching memory.  */
13033   if (MEM_P (src1) && !rtx_equal_p (dst, src1))
13034     return 0;
13035 
13036   return 1;
13037 }
13038 
13039 /* Attempt to expand a unary operator.  Make the expansion closer to the
13040    actual machine, then just general_operand, which will allow 2 separate
13041    memory references (one output, one input) in a single insn.  */
13042 
13043 void
13044 ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
13045                             rtx operands[])
13046 {
13047   int matching_memory;
13048   rtx src, dst, op, clob;
13049 
13050   dst = operands[0];
13051   src = operands[1];
13052 
13053   /* If the destination is memory, and we do not have matching source
13054      operands, do things in registers.  */
13055   matching_memory = 0;
13056   if (MEM_P (dst))
13057     {
13058       if (rtx_equal_p (dst, src))
13059         matching_memory = 1;
13060       else
13061         dst = gen_reg_rtx (mode);
13062     }
13063 
13064   /* When source operand is memory, destination must match.  */
13065   if (MEM_P (src) && !matching_memory)
13066     src = force_reg (mode, src);
13067 
13068   /* Emit the instruction.  */
13069 
13070   op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
13071   if (reload_in_progress || code == NOT)
13072     {
13073       /* Reload doesn't know about the flags register, and doesn't know that
13074          it doesn't want to clobber it.  */
13075       gcc_assert (code == NOT);
13076       emit_insn (op);
13077     }
13078   else
13079     {
13080       clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
13081       emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
13082     }
13083 
13084   /* Fix up the destination if needed.  */
13085   if (dst != operands[0])
13086     emit_move_insn (operands[0], dst);
13087 }
13088 
13089 /* Return TRUE or FALSE depending on whether the unary operator meets the
13090    appropriate constraints.  */
13091 
13092 int
13093 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
13094                         enum machine_mode mode ATTRIBUTE_UNUSED,
13095                         rtx operands[2] ATTRIBUTE_UNUSED)
13096 {
13097   /* If one of operands is memory, source and destination must match.  */
13098   if ((MEM_P (operands[0])
13099        || MEM_P (operands[1]))
13100       && ! rtx_equal_p (operands[0], operands[1]))
13101     return FALSE;
13102   return TRUE;
13103 }
13104 
13105 /* Post-reload splitter for converting an SF or DFmode value in an
13106    SSE register into an unsigned SImode.  */
13107 
13108 void
13109 ix86_split_convert_uns_si_sse (rtx operands[])
13110 {
13111   enum machine_mode vecmode;
13112   rtx value, large, zero_or_two31, input, two31, x;
13113 
13114   large = operands[1];
13115   zero_or_two31 = operands[2];
13116   input = operands[3];
13117   two31 = operands[4];
13118   vecmode = GET_MODE (large);
13119   value = gen_rtx_REG (vecmode, REGNO (operands[0]));
13120 
13121   /* Load up the value into the low element.  We must ensure that the other
13122      elements are valid floats -- zero is the easiest such value.  */
13123   if (MEM_P (input))
13124     {
13125       if (vecmode == V4SFmode)
13126         emit_insn (gen_vec_setv4sf_0 (value, CONST0_RTX (V4SFmode), input));
13127       else
13128         emit_insn (gen_sse2_loadlpd (value, CONST0_RTX (V2DFmode), input));
13129     }
13130   else
13131     {
13132       input = gen_rtx_REG (vecmode, REGNO (input));
13133       emit_move_insn (value, CONST0_RTX (vecmode));
13134       if (vecmode == V4SFmode)
13135         emit_insn (gen_sse_movss (value, value, input));
13136       else
13137         emit_insn (gen_sse2_movsd (value, value, input));
13138     }
13139 
13140   emit_move_insn (large, two31);
13141   emit_move_insn (zero_or_two31, MEM_P (two31) ? large : two31);
13142 
13143   x = gen_rtx_fmt_ee (LE, vecmode, large, value);
13144   emit_insn (gen_rtx_SET (VOIDmode, large, x));
13145 
13146   x = gen_rtx_AND (vecmode, zero_or_two31, large);
13147   emit_insn (gen_rtx_SET (VOIDmode, zero_or_two31, x));
13148 
13149   x = gen_rtx_MINUS (vecmode, value, zero_or_two31);
13150   emit_insn (gen_rtx_SET (VOIDmode, value, x));
13151 
13152   large = gen_rtx_REG (V4SImode, REGNO (large));
13153   emit_insn (gen_ashlv4si3 (large, large, GEN_INT (31)));
13154 
13155   x = gen_rtx_REG (V4SImode, REGNO (value));
13156   if (vecmode == V4SFmode)
13157     emit_insn (gen_sse2_cvttps2dq (x, value));
13158   else
13159     emit_insn (gen_sse2_cvttpd2dq (x, value));
13160   value = x;
13161 
13162   emit_insn (gen_xorv4si3 (value, value, large));
13163 }
13164 
13165 /* Convert an unsigned DImode value into a DFmode, using only SSE.
13166    Expects the 64-bit DImode to be supplied in a pair of integral
13167    registers.  Requires SSE2; will use SSE3 if available.  For x86_32,
13168    -mfpmath=sse, !optimize_size only.  */
13169 
13170 void
13171 ix86_expand_convert_uns_didf_sse (rtx target, rtx input)
13172 {
13173   REAL_VALUE_TYPE bias_lo_rvt, bias_hi_rvt;
13174   rtx int_xmm, fp_xmm;
13175   rtx biases, exponents;
13176   rtx x;
13177 
13178   int_xmm = gen_reg_rtx (V4SImode);
13179   if (TARGET_INTER_UNIT_MOVES)
13180     emit_insn (gen_movdi_to_sse (int_xmm, input));
13181   else if (TARGET_SSE_SPLIT_REGS)
13182     {
13183       emit_clobber (int_xmm);
13184       emit_move_insn (gen_lowpart (DImode, int_xmm), input);
13185     }
13186   else
13187     {
13188       x = gen_reg_rtx (V2DImode);
13189       ix86_expand_vector_init_one_nonzero (false, V2DImode, x, input, 0);
13190       emit_move_insn (int_xmm, gen_lowpart (V4SImode, x));
13191     }
13192 
13193   x = gen_rtx_CONST_VECTOR (V4SImode,
13194                             gen_rtvec (4, GEN_INT (0x43300000UL),
13195                                        GEN_INT (0x45300000UL),
13196                                        const0_rtx, const0_rtx));
13197   exponents = validize_mem (force_const_mem (V4SImode, x));
13198 
13199   /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */
13200   emit_insn (gen_sse2_punpckldq (int_xmm, int_xmm, exponents));
13201 
13202   /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm)
13203      yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)).
13204      Similarly (0x45300000UL ## fp_value_hi_xmm) yields
13205      (0x1.0p84 + double(fp_value_hi_xmm)).
13206      Note these exponents differ by 32.  */
13207 
13208   fp_xmm = copy_to_mode_reg (V2DFmode, gen_lowpart (V2DFmode, int_xmm));
13209 
13210   /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values
13211      in [0,2**32-1] and [0]+[2**32,2**64-1] respectively.  */
13212   real_ldexp (&bias_lo_rvt, &dconst1, 52);
13213   real_ldexp (&bias_hi_rvt, &dconst1, 84);
13214   biases = const_double_from_real_value (bias_lo_rvt, DFmode);
13215   x = const_double_from_real_value (bias_hi_rvt, DFmode);
13216   biases = gen_rtx_CONST_VECTOR (V2DFmode, gen_rtvec (2, biases, x));
13217   biases = validize_mem (force_const_mem (V2DFmode, biases));
13218   emit_insn (gen_subv2df3 (fp_xmm, fp_xmm, biases));
13219 
13220   /* Add the upper and lower DFmode values together.  */
13221   if (TARGET_SSE3)
13222     emit_insn (gen_sse3_haddv2df3 (fp_xmm, fp_xmm, fp_xmm));
13223   else
13224     {
13225       x = copy_to_mode_reg (V2DFmode, fp_xmm);
13226       emit_insn (gen_sse2_unpckhpd (fp_xmm, fp_xmm, fp_xmm));
13227       emit_insn (gen_addv2df3 (fp_xmm, fp_xmm, x));
13228     }
13229 
13230   ix86_expand_vector_extract (false, target, fp_xmm, 0);
13231 }
13232 
13233 /* Not used, but eases macroization of patterns.  */
13234 void
13235 ix86_expand_convert_uns_sixf_sse (rtx target ATTRIBUTE_UNUSED,
13236                                   rtx input ATTRIBUTE_UNUSED)
13237 {
13238   gcc_unreachable ();
13239 }
13240 
13241 /* Convert an unsigned SImode value into a DFmode.  Only currently used
13242    for SSE, but applicable anywhere.  */
13243 
13244 void
13245 ix86_expand_convert_uns_sidf_sse (rtx target, rtx input)
13246 {
13247   REAL_VALUE_TYPE TWO31r;
13248   rtx x, fp;
13249 
13250   x = expand_simple_binop (SImode, PLUS, input, GEN_INT (-2147483647 - 1),
13251                            NULL, 1, OPTAB_DIRECT);
13252 
13253   fp = gen_reg_rtx (DFmode);
13254   emit_insn (gen_floatsidf2 (fp, x));
13255 
13256   real_ldexp (&TWO31r, &dconst1, 31);
13257   x = const_double_from_real_value (TWO31r, DFmode);
13258 
13259   x = expand_simple_binop (DFmode, PLUS, fp, x, target, 0, OPTAB_DIRECT);
13260   if (x != target)
13261     emit_move_insn (target, x);
13262 }
13263 
13264 /* Convert a signed DImode value into a DFmode.  Only used for SSE in
13265    32-bit mode; otherwise we have a direct convert instruction.  */
13266 
13267 void
13268 ix86_expand_convert_sign_didf_sse (rtx target, rtx input)
13269 {
13270   REAL_VALUE_TYPE TWO32r;
13271   rtx fp_lo, fp_hi, x;
13272 
13273   fp_lo = gen_reg_rtx (DFmode);
13274   fp_hi = gen_reg_rtx (DFmode);
13275 
13276   emit_insn (gen_floatsidf2 (fp_hi, gen_highpart (SImode, input)));
13277 
13278   real_ldexp (&TWO32r, &dconst1, 32);
13279   x = const_double_from_real_value (TWO32r, DFmode);
13280   fp_hi = expand_simple_binop (DFmode, MULT, fp_hi, x, fp_hi, 0, OPTAB_DIRECT);
13281 
13282   ix86_expand_convert_uns_sidf_sse (fp_lo, gen_lowpart (SImode, input));
13283 
13284   x = expand_simple_binop (DFmode, PLUS, fp_hi, fp_lo, target,
13285                            0, OPTAB_DIRECT);
13286   if (x != target)
13287     emit_move_insn (target, x);
13288 }
13289 
13290 /* Convert an unsigned SImode value into a SFmode, using only SSE.
13291    For x86_32, -mfpmath=sse, !optimize_size only.  */
13292 void
13293 ix86_expand_convert_uns_sisf_sse (rtx target, rtx input)
13294 {
13295   REAL_VALUE_TYPE ONE16r;
13296   rtx fp_hi, fp_lo, int_hi, int_lo, x;
13297 
13298   real_ldexp (&ONE16r, &dconst1, 16);
13299   x = const_double_from_real_value (ONE16r, SFmode);
13300   int_lo = expand_simple_binop (SImode, AND, input, GEN_INT(0xffff),
13301                                       NULL, 0, OPTAB_DIRECT);
13302   int_hi = expand_simple_binop (SImode, LSHIFTRT, input, GEN_INT(16),
13303                                       NULL, 0, OPTAB_DIRECT);
13304   fp_hi = gen_reg_rtx (SFmode);
13305   fp_lo = gen_reg_rtx (SFmode);
13306   emit_insn (gen_floatsisf2 (fp_hi, int_hi));
13307   emit_insn (gen_floatsisf2 (fp_lo, int_lo));
13308   fp_hi = expand_simple_binop (SFmode, MULT, fp_hi, x, fp_hi,
13309                                0, OPTAB_DIRECT);
13310   fp_hi = expand_simple_binop (SFmode, PLUS, fp_hi, fp_lo, target,
13311                                0, OPTAB_DIRECT);
13312   if (!rtx_equal_p (target, fp_hi))
13313     emit_move_insn (target, fp_hi);
13314 }
13315 
13316 /* A subroutine of ix86_build_signbit_mask_vector.  If VECT is true,
13317    then replicate the value for all elements of the vector
13318    register.  */
13319 
13320 rtx
13321 ix86_build_const_vector (enum machine_mode mode, bool vect, rtx value)
13322 {
13323   rtvec v;
13324   switch (mode)
13325     {
13326     case SImode:
13327       gcc_assert (vect);
13328       v = gen_rtvec (4, value, value, value, value);
13329       return gen_rtx_CONST_VECTOR (V4SImode, v);
13330 
13331     case DImode:
13332       gcc_assert (vect);
13333       v = gen_rtvec (2, value, value);
13334       return gen_rtx_CONST_VECTOR (V2DImode, v);
13335 
13336     case SFmode:
13337       if (vect)
13338         v = gen_rtvec (4, value, value, value, value);
13339       else
13340         v = gen_rtvec (4, value, CONST0_RTX (SFmode),
13341                        CONST0_RTX (SFmode), CONST0_RTX (SFmode));
13342       return gen_rtx_CONST_VECTOR (V4SFmode, v);
13343 
13344     case DFmode:
13345       if (vect)
13346         v = gen_rtvec (2, value, value);
13347       else
13348         v = gen_rtvec (2, value, CONST0_RTX (DFmode));
13349       return gen_rtx_CONST_VECTOR (V2DFmode, v);
13350 
13351     default:
13352       gcc_unreachable ();
13353     }
13354 }
13355 
13356 /* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders
13357    and ix86_expand_int_vcond.  Create a mask for the sign bit in MODE
13358    for an SSE register.  If VECT is true, then replicate the mask for
13359    all elements of the vector register.  If INVERT is true, then create
13360    a mask excluding the sign bit.  */
13361 
13362 rtx
13363 ix86_build_signbit_mask (enum machine_mode mode, bool vect, bool invert)
13364 {
13365   enum machine_mode vec_mode, imode;
13366   HOST_WIDE_INT hi, lo;
13367   int shift = 63;
13368   rtx v;
13369   rtx mask;
13370 
13371   /* Find the sign bit, sign extended to 2*HWI.  */
13372   switch (mode)
13373     {
13374     case SImode:
13375     case SFmode:
13376       imode = SImode;
13377       vec_mode = (mode == SImode) ? V4SImode : V4SFmode;
13378       lo = 0x80000000, hi = lo < 0;
13379       break;
13380 
13381     case DImode:
13382     case DFmode:
13383       imode = DImode;
13384       vec_mode = (mode == DImode) ? V2DImode : V2DFmode;
13385       if (HOST_BITS_PER_WIDE_INT >= 64)
13386         lo = (HOST_WIDE_INT)1 << shift, hi = -1;
13387       else
13388         lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
13389       break;
13390 
13391     case TImode:
13392     case TFmode:
13393       vec_mode = VOIDmode;
13394       if (HOST_BITS_PER_WIDE_INT >= 64)
13395         {
13396           imode = TImode;
13397           lo = 0, hi = (HOST_WIDE_INT)1 << shift;
13398         }
13399       else
13400         {
13401           rtvec vec;
13402 
13403           imode = DImode;
13404           lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
13405 
13406           if (invert)
13407             {
13408               lo = ~lo, hi = ~hi;
13409               v = constm1_rtx;
13410             }
13411           else
13412             v = const0_rtx;
13413 
13414           mask = immed_double_const (lo, hi, imode);
13415 
13416           vec = gen_rtvec (2, v, mask);
13417           v = gen_rtx_CONST_VECTOR (V2DImode, vec);
13418           v = copy_to_mode_reg (mode, gen_lowpart (mode, v));
13419 
13420           return v;
13421         }
13422      break;
13423 
13424     default:
13425       gcc_unreachable ();
13426     }
13427 
13428   if (invert)
13429     lo = ~lo, hi = ~hi;
13430 
13431   /* Force this value into the low part of a fp vector constant.  */
13432   mask = immed_double_const (lo, hi, imode);
13433   mask = gen_lowpart (mode, mask);
13434 
13435   if (vec_mode == VOIDmode)
13436     return force_reg (mode, mask);
13437 
13438   v = ix86_build_const_vector (mode, vect, mask);
13439   return force_reg (vec_mode, v);
13440 }
13441 
13442 /* Generate code for floating point ABS or NEG.  */
13443 
13444 void
13445 ix86_expand_fp_absneg_operator (enum rtx_code code, enum machine_mode mode,
13446                                 rtx operands[])
13447 {
13448   rtx mask, set, use, clob, dst, src;
13449   bool use_sse = false;
13450   bool vector_mode = VECTOR_MODE_P (mode);
13451   enum machine_mode elt_mode = mode;
13452 
13453   if (vector_mode)
13454     {
13455       elt_mode = GET_MODE_INNER (mode);
13456       use_sse = true;
13457     }
13458   else if (mode == TFmode)
13459     use_sse = true;
13460   else if (TARGET_SSE_MATH)
13461     use_sse = SSE_FLOAT_MODE_P (mode);
13462 
13463   /* NEG and ABS performed with SSE use bitwise mask operations.
13464      Create the appropriate mask now.  */
13465   if (use_sse)
13466     mask = ix86_build_signbit_mask (elt_mode, vector_mode, code == ABS);
13467   else
13468     mask = NULL_RTX;
13469 
13470   dst = operands[0];
13471   src = operands[1];
13472 
13473   if (vector_mode)
13474     {
13475       set = gen_rtx_fmt_ee (code == NEG ? XOR : AND, mode, src, mask);
13476       set = gen_rtx_SET (VOIDmode, dst, set);
13477       emit_insn (set);
13478     }
13479   else
13480     {
13481       set = gen_rtx_fmt_e (code, mode, src);
13482       set = gen_rtx_SET (VOIDmode, dst, set);
13483       if (mask)
13484         {
13485           use = gen_rtx_USE (VOIDmode, mask);
13486           clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
13487           emit_insn (gen_rtx_PARALLEL (VOIDmode,
13488                                        gen_rtvec (3, set, use, clob)));
13489         }
13490       else
13491         emit_insn (set);
13492     }
13493 }
13494 
13495 /* Expand a copysign operation.  Special case operand 0 being a constant.  */
13496 
13497 void
13498 ix86_expand_copysign (rtx operands[])
13499 {
13500   enum machine_mode mode;
13501   rtx dest, op0, op1, mask, nmask;
13502 
13503   dest = operands[0];
13504   op0 = operands[1];
13505   op1 = operands[2];
13506 
13507   mode = GET_MODE (dest);
13508 
13509   if (GET_CODE (op0) == CONST_DOUBLE)
13510     {
13511       rtx (*copysign_insn)(rtx, rtx, rtx, rtx);
13512 
13513       if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
13514         op0 = simplify_unary_operation (ABS, mode, op0, mode);
13515 
13516       if (mode == SFmode || mode == DFmode)
13517         {
13518           enum machine_mode vmode;
13519 
13520           vmode = mode == SFmode ? V4SFmode : V2DFmode;
13521 
13522           if (op0 == CONST0_RTX (mode))
13523             op0 = CONST0_RTX (vmode);
13524           else
13525             {
13526               rtvec v;
13527 
13528               if (mode == SFmode)
13529                 v = gen_rtvec (4, op0, CONST0_RTX (SFmode),
13530                                CONST0_RTX (SFmode), CONST0_RTX (SFmode));
13531               else
13532                 v = gen_rtvec (2, op0, CONST0_RTX (DFmode));
13533 
13534               op0 = force_reg (vmode, gen_rtx_CONST_VECTOR (vmode, v));
13535             }
13536         }
13537       else if (op0 != CONST0_RTX (mode))
13538         op0 = force_reg (mode, op0);
13539 
13540       mask = ix86_build_signbit_mask (mode, 0, 0);
13541 
13542       if (mode == SFmode)
13543         copysign_insn = gen_copysignsf3_const;
13544       else if (mode == DFmode)
13545         copysign_insn = gen_copysigndf3_const;
13546       else
13547         copysign_insn = gen_copysigntf3_const;
13548 
13549         emit_insn (copysign_insn (dest, op0, op1, mask));
13550     }
13551   else
13552     {
13553       rtx (*copysign_insn)(rtx, rtx, rtx, rtx, rtx, rtx);
13554 
13555       nmask = ix86_build_signbit_mask (mode, 0, 1);
13556       mask = ix86_build_signbit_mask (mode, 0, 0);
13557 
13558       if (mode == SFmode)
13559         copysign_insn = gen_copysignsf3_var;
13560       else if (mode == DFmode)
13561         copysign_insn = gen_copysigndf3_var;
13562       else
13563         copysign_insn = gen_copysigntf3_var;
13564 
13565       emit_insn (copysign_insn (dest, NULL_RTX, op0, op1, nmask, mask));
13566     }
13567 }
13568 
13569 /* Deconstruct a copysign operation into bit masks.  Operand 0 is known to
13570    be a constant, and so has already been expanded into a vector constant.  */
13571 
13572 void
13573 ix86_split_copysign_const (rtx operands[])
13574 {
13575   enum machine_mode mode, vmode;
13576   rtx dest, op0, op1, mask, x;
13577 
13578   dest = operands[0];
13579   op0 = operands[1];
13580   op1 = operands[2];
13581   mask = operands[3];
13582 
13583   mode = GET_MODE (dest);
13584   vmode = GET_MODE (mask);
13585 
13586   dest = simplify_gen_subreg (vmode, dest, mode, 0);
13587   x = gen_rtx_AND (vmode, dest, mask);
13588   emit_insn (gen_rtx_SET (VOIDmode, dest, x));
13589 
13590   if (op0 != CONST0_RTX (vmode))
13591     {
13592       x = gen_rtx_IOR (vmode, dest, op0);
13593       emit_insn (gen_rtx_SET (VOIDmode, dest, x));
13594     }
13595 }
13596 
13597 /* Deconstruct a copysign operation into bit masks.  Operand 0 is variable,
13598    so we have to do two masks.  */
13599 
13600 void
13601 ix86_split_copysign_var (rtx operands[])
13602 {
13603   enum machine_mode mode, vmode;
13604   rtx dest, scratch, op0, op1, mask, nmask, x;
13605 
13606   dest = operands[0];
13607   scratch = operands[1];
13608   op0 = operands[2];
13609   op1 = operands[3];
13610   nmask = operands[4];
13611   mask = operands[5];
13612 
13613   mode = GET_MODE (dest);
13614   vmode = GET_MODE (mask);
13615 
13616   if (rtx_equal_p (op0, op1))
13617     {
13618       /* Shouldn't happen often (it's useless, obviously), but when it does
13619          we'd generate incorrect code if we continue below.  */
13620       emit_move_insn (dest, op0);
13621       return;
13622     }
13623 
13624   if (REG_P (mask) && REGNO (dest) == REGNO (mask))     /* alternative 0 */
13625     {
13626       gcc_assert (REGNO (op1) == REGNO (scratch));
13627 
13628       x = gen_rtx_AND (vmode, scratch, mask);
13629       emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
13630 
13631       dest = mask;
13632       op0 = simplify_gen_subreg (vmode, op0, mode, 0);
13633       x = gen_rtx_NOT (vmode, dest);
13634       x = gen_rtx_AND (vmode, x, op0);
13635       emit_insn (gen_rtx_SET (VOIDmode, dest, x));
13636     }
13637   else
13638     {
13639       if (REGNO (op1) == REGNO (scratch))               /* alternative 1,3 */
13640         {
13641           x = gen_rtx_AND (vmode, scratch, mask);
13642         }
13643       else                                              /* alternative 2,4 */
13644         {
13645           gcc_assert (REGNO (mask) == REGNO (scratch));
13646           op1 = simplify_gen_subreg (vmode, op1, mode, 0);
13647           x = gen_rtx_AND (vmode, scratch, op1);
13648         }
13649       emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
13650 
13651       if (REGNO (op0) == REGNO (dest))                  /* alternative 1,2 */
13652         {
13653           dest = simplify_gen_subreg (vmode, op0, mode, 0);
13654           x = gen_rtx_AND (vmode, dest, nmask);
13655         }
13656       else                                              /* alternative 3,4 */
13657         {
13658           gcc_assert (REGNO (nmask) == REGNO (dest));
13659           dest = nmask;
13660           op0 = simplify_gen_subreg (vmode, op0, mode, 0);
13661           x = gen_rtx_AND (vmode, dest, op0);
13662         }
13663       emit_insn (gen_rtx_SET (VOIDmode, dest, x));
13664     }
13665 
13666   x = gen_rtx_IOR (vmode, dest, scratch);
13667   emit_insn (gen_rtx_SET (VOIDmode, dest, x));
13668 }
13669 
13670 /* Return TRUE or FALSE depending on whether the first SET in INSN
13671    has source and destination with matching CC modes, and that the
13672    CC mode is at least as constrained as REQ_MODE.  */
13673 
13674 int
13675 ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
13676 {
13677   rtx set;
13678   enum machine_mode set_mode;
13679 
13680   set = PATTERN (insn);
13681   if (GET_CODE (set) == PARALLEL)
13682     set = XVECEXP (set, 0, 0);
13683   gcc_assert (GET_CODE (set) == SET);
13684   gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
13685 
13686   set_mode = GET_MODE (SET_DEST (set));
13687   switch (set_mode)
13688     {
13689     case CCNOmode:
13690       if (req_mode != CCNOmode
13691           && (req_mode != CCmode
13692               || XEXP (SET_SRC (set), 1) != const0_rtx))
13693         return 0;
13694       break;
13695     case CCmode:
13696       if (req_mode == CCGCmode)
13697         return 0;
13698       /* FALLTHRU */
13699     case CCGCmode:
13700       if (req_mode == CCGOCmode || req_mode == CCNOmode)
13701         return 0;
13702       /* FALLTHRU */
13703     case CCGOCmode:
13704       if (req_mode == CCZmode)
13705         return 0;
13706       /* FALLTHRU */
13707     case CCAmode:
13708     case CCCmode:
13709     case CCOmode:
13710     case CCSmode:
13711     case CCZmode:
13712       break;
13713 
13714     default:
13715       gcc_unreachable ();
13716     }
13717 
13718   return (GET_MODE (SET_SRC (set)) == set_mode);
13719 }
13720 
13721 /* Generate insn patterns to do an integer compare of OPERANDS.  */
13722 
13723 static rtx
13724 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
13725 {
13726   enum machine_mode cmpmode;
13727   rtx tmp, flags;
13728 
13729   cmpmode = SELECT_CC_MODE (code, op0, op1);
13730   flags = gen_rtx_REG (cmpmode, FLAGS_REG);
13731 
13732   /* This is very simple, but making the interface the same as in the
13733      FP case makes the rest of the code easier.  */
13734   tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
13735   emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
13736 
13737   /* Return the test that should be put into the flags user, i.e.
13738      the bcc, scc, or cmov instruction.  */
13739   return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
13740 }
13741 
13742 /* Figure out whether to use ordered or unordered fp comparisons.
13743    Return the appropriate mode to use.  */
13744 
13745 enum machine_mode
13746 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
13747 {
13748   /* ??? In order to make all comparisons reversible, we do all comparisons
13749      non-trapping when compiling for IEEE.  Once gcc is able to distinguish
13750      all forms trapping and nontrapping comparisons, we can make inequality
13751      comparisons trapping again, since it results in better code when using
13752      FCOM based compares.  */
13753   return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
13754 }
13755 
13756 enum machine_mode
13757 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
13758 {
13759   enum machine_mode mode = GET_MODE (op0);
13760 
13761   if (SCALAR_FLOAT_MODE_P (mode))
13762     {
13763       gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
13764       return ix86_fp_compare_mode (code);
13765     }
13766 
13767   switch (code)
13768     {
13769       /* Only zero flag is needed.  */
13770     case EQ:                    /* ZF=0 */
13771     case NE:                    /* ZF!=0 */
13772       return CCZmode;
13773       /* Codes needing carry flag.  */
13774     case GEU:                   /* CF=0 */
13775     case LTU:                   /* CF=1 */
13776       /* Detect overflow checks.  They need just the carry flag.  */
13777       if (GET_CODE (op0) == PLUS
13778           && rtx_equal_p (op1, XEXP (op0, 0)))
13779         return CCCmode;
13780       else
13781         return CCmode;
13782     case GTU:                   /* CF=0 & ZF=0 */
13783     case LEU:                   /* CF=1 | ZF=1 */
13784       /* Detect overflow checks.  They need just the carry flag.  */
13785       if (GET_CODE (op0) == MINUS
13786           && rtx_equal_p (op1, XEXP (op0, 0)))
13787         return CCCmode;
13788       else
13789         return CCmode;
13790       /* Codes possibly doable only with sign flag when
13791          comparing against zero.  */
13792     case GE:                    /* SF=OF   or   SF=0 */
13793     case LT:                    /* SF<>OF  or   SF=1 */
13794       if (op1 == const0_rtx)
13795         return CCGOCmode;
13796       else
13797         /* For other cases Carry flag is not required.  */
13798         return CCGCmode;
13799       /* Codes doable only with sign flag when comparing
13800          against zero, but we miss jump instruction for it
13801          so we need to use relational tests against overflow
13802          that thus needs to be zero.  */
13803     case GT:                    /* ZF=0 & SF=OF */
13804     case LE:                    /* ZF=1 | SF<>OF */
13805       if (op1 == const0_rtx)
13806         return CCNOmode;
13807       else
13808         return CCGCmode;
13809       /* strcmp pattern do (use flags) and combine may ask us for proper
13810          mode.  */
13811     case USE:
13812       return CCmode;
13813     default:
13814       gcc_unreachable ();
13815     }
13816 }
13817 
13818 /* Return the fixed registers used for condition codes.  */
13819 
13820 static bool
13821 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
13822 {
13823   *p1 = FLAGS_REG;
13824   *p2 = FPSR_REG;
13825   return true;
13826 }
13827 
13828 /* If two condition code modes are compatible, return a condition code
13829    mode which is compatible with both.  Otherwise, return
13830    VOIDmode.  */
13831 
13832 static enum machine_mode
13833 ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
13834 {
13835   if (m1 == m2)
13836     return m1;
13837 
13838   if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
13839     return VOIDmode;
13840 
13841   if ((m1 == CCGCmode && m2 == CCGOCmode)
13842       || (m1 == CCGOCmode && m2 == CCGCmode))
13843     return CCGCmode;
13844 
13845   switch (m1)
13846     {
13847     default:
13848       gcc_unreachable ();
13849 
13850     case CCmode:
13851     case CCGCmode:
13852     case CCGOCmode:
13853     case CCNOmode:
13854     case CCAmode:
13855     case CCCmode:
13856     case CCOmode:
13857     case CCSmode:
13858     case CCZmode:
13859       switch (m2)
13860         {
13861         default:
13862           return VOIDmode;
13863 
13864         case CCmode:
13865         case CCGCmode:
13866         case CCGOCmode:
13867         case CCNOmode:
13868         case CCAmode:
13869         case CCCmode:
13870         case CCOmode:
13871         case CCSmode:
13872         case CCZmode:
13873           return CCmode;
13874         }
13875 
13876     case CCFPmode:
13877     case CCFPUmode:
13878       /* These are only compatible with themselves, which we already
13879          checked above.  */
13880       return VOIDmode;
13881     }
13882 }
13883 
13884 /* Split comparison code CODE into comparisons we can do using branch
13885    instructions.  BYPASS_CODE is comparison code for branch that will
13886    branch around FIRST_CODE and SECOND_CODE.  If some of branches
13887    is not required, set value to UNKNOWN.
13888    We never require more than two branches.  */
13889 
13890 void
13891 ix86_fp_comparison_codes (enum rtx_code code, enum rtx_code *bypass_code,
13892                           enum rtx_code *first_code,
13893                           enum rtx_code *second_code)
13894 {
13895   *first_code = code;
13896   *bypass_code = UNKNOWN;
13897   *second_code = UNKNOWN;
13898 
13899   /* The fcomi comparison sets flags as follows:
13900 
13901      cmp    ZF PF CF
13902      >      0  0  0
13903      <      0  0  1
13904      =      1  0  0
13905      un     1  1  1 */
13906 
13907   switch (code)
13908     {
13909     case GT:                    /* GTU - CF=0 & ZF=0 */
13910     case GE:                    /* GEU - CF=0 */
13911     case ORDERED:               /* PF=0 */
13912     case UNORDERED:             /* PF=1 */
13913     case UNEQ:                  /* EQ - ZF=1 */
13914     case UNLT:                  /* LTU - CF=1 */
13915     case UNLE:                  /* LEU - CF=1 | ZF=1 */
13916     case LTGT:                  /* EQ - ZF=0 */
13917       break;
13918     case LT:                    /* LTU - CF=1 - fails on unordered */
13919       *first_code = UNLT;
13920       *bypass_code = UNORDERED;
13921       break;
13922     case LE:                    /* LEU - CF=1 | ZF=1 - fails on unordered */
13923       *first_code = UNLE;
13924       *bypass_code = UNORDERED;
13925       break;
13926     case EQ:                    /* EQ - ZF=1 - fails on unordered */
13927       *first_code = UNEQ;
13928       *bypass_code = UNORDERED;
13929       break;
13930     case NE:                    /* NE - ZF=0 - fails on unordered */
13931       *first_code = LTGT;
13932       *second_code = UNORDERED;
13933       break;
13934     case UNGE:                  /* GEU - CF=0 - fails on unordered */
13935       *first_code = GE;
13936       *second_code = UNORDERED;
13937       break;
13938     case UNGT:                  /* GTU - CF=0 & ZF=0 - fails on unordered */
13939       *first_code = GT;
13940       *second_code = UNORDERED;
13941       break;
13942     default:
13943       gcc_unreachable ();
13944     }
13945   if (!TARGET_IEEE_FP)
13946     {
13947       *second_code = UNKNOWN;
13948       *bypass_code = UNKNOWN;
13949     }
13950 }
13951 
13952 /* Return cost of comparison done fcom + arithmetics operations on AX.
13953    All following functions do use number of instructions as a cost metrics.
13954    In future this should be tweaked to compute bytes for optimize_size and
13955    take into account performance of various instructions on various CPUs.  */
13956 static int
13957 ix86_fp_comparison_arithmetics_cost (enum rtx_code code)
13958 {
13959   if (!TARGET_IEEE_FP)
13960     return 4;
13961   /* The cost of code output by ix86_expand_fp_compare.  */
13962   switch (code)
13963     {
13964     case UNLE:
13965     case UNLT:
13966     case LTGT:
13967     case GT:
13968     case GE:
13969     case UNORDERED:
13970     case ORDERED:
13971     case UNEQ:
13972       return 4;
13973       break;
13974     case LT:
13975     case NE:
13976     case EQ:
13977     case UNGE:
13978       return 5;
13979       break;
13980     case LE:
13981     case UNGT:
13982       return 6;
13983       break;
13984     default:
13985       gcc_unreachable ();
13986     }
13987 }
13988 
13989 /* Return cost of comparison done using fcomi operation.
13990    See ix86_fp_comparison_arithmetics_cost for the metrics.  */
13991 static int
13992 ix86_fp_comparison_fcomi_cost (enum rtx_code code)
13993 {
13994   enum rtx_code bypass_code, first_code, second_code;
13995   /* Return arbitrarily high cost when instruction is not supported - this
13996      prevents gcc from using it.  */
13997   if (!TARGET_CMOVE)
13998     return 1024;
13999   ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
14000   return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 2;
14001 }
14002 
14003 /* Return cost of comparison done using sahf operation.
14004    See ix86_fp_comparison_arithmetics_cost for the metrics.  */
14005 static int
14006 ix86_fp_comparison_sahf_cost (enum rtx_code code)
14007 {
14008   enum rtx_code bypass_code, first_code, second_code;
14009   /* Return arbitrarily high cost when instruction is not preferred - this
14010      avoids gcc from using it.  */
14011   if (!(TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ())))
14012     return 1024;
14013   ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
14014   return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 3;
14015 }
14016 
14017 /* Compute cost of the comparison done using any method.
14018    See ix86_fp_comparison_arithmetics_cost for the metrics.  */
14019 static int
14020 ix86_fp_comparison_cost (enum rtx_code code)
14021 {
14022   int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
14023   int min;
14024 
14025   fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
14026   sahf_cost = ix86_fp_comparison_sahf_cost (code);
14027 
14028   min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
14029   if (min > sahf_cost)
14030     min = sahf_cost;
14031   if (min > fcomi_cost)
14032     min = fcomi_cost;
14033   return min;
14034 }
14035 
14036 /* Return true if we should use an FCOMI instruction for this
14037    fp comparison.  */
14038 
14039 int
14040 ix86_use_fcomi_compare (enum rtx_code code ATTRIBUTE_UNUSED)
14041 {
14042   enum rtx_code swapped_code = swap_condition (code);
14043 
14044   return ((ix86_fp_comparison_cost (code)
14045            == ix86_fp_comparison_fcomi_cost (code))
14046           || (ix86_fp_comparison_cost (swapped_code)
14047               == ix86_fp_comparison_fcomi_cost (swapped_code)));
14048 }
14049 
14050 /* Swap, force into registers, or otherwise massage the two operands
14051    to a fp comparison.  The operands are updated in place; the new
14052    comparison code is returned.  */
14053 
14054 static enum rtx_code
14055 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
14056 {
14057   enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
14058   rtx op0 = *pop0, op1 = *pop1;
14059   enum machine_mode op_mode = GET_MODE (op0);
14060   int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
14061 
14062   /* All of the unordered compare instructions only work on registers.
14063      The same is true of the fcomi compare instructions.  The XFmode
14064      compare instructions require registers except when comparing
14065      against zero or when converting operand 1 from fixed point to
14066      floating point.  */
14067 
14068   if (!is_sse
14069       && (fpcmp_mode == CCFPUmode
14070           || (op_mode == XFmode
14071               && ! (standard_80387_constant_p (op0) == 1
14072                     || standard_80387_constant_p (op1) == 1)
14073               && GET_CODE (op1) != FLOAT)
14074           || ix86_use_fcomi_compare (code)))
14075     {
14076       op0 = force_reg (op_mode, op0);
14077       op1 = force_reg (op_mode, op1);
14078     }
14079   else
14080     {
14081       /* %%% We only allow op1 in memory; op0 must be st(0).  So swap
14082          things around if they appear profitable, otherwise force op0
14083          into a register.  */
14084 
14085       if (standard_80387_constant_p (op0) == 0
14086           || (MEM_P (op0)
14087               && ! (standard_80387_constant_p (op1) == 0
14088                     || MEM_P (op1))))
14089         {
14090           rtx tmp;
14091           tmp = op0, op0 = op1, op1 = tmp;
14092           code = swap_condition (code);
14093         }
14094 
14095       if (!REG_P (op0))
14096         op0 = force_reg (op_mode, op0);
14097 
14098       if (CONSTANT_P (op1))
14099         {
14100           int tmp = standard_80387_constant_p (op1);
14101           if (tmp == 0)
14102             op1 = validize_mem (force_const_mem (op_mode, op1));
14103           else if (tmp == 1)
14104             {
14105               if (TARGET_CMOVE)
14106                 op1 = force_reg (op_mode, op1);
14107             }
14108           else
14109             op1 = force_reg (op_mode, op1);
14110         }
14111     }
14112 
14113   /* Try to rearrange the comparison to make it cheaper.  */
14114   if (ix86_fp_comparison_cost (code)
14115       > ix86_fp_comparison_cost (swap_condition (code))
14116       && (REG_P (op1) || can_create_pseudo_p ()))
14117     {
14118       rtx tmp;
14119       tmp = op0, op0 = op1, op1 = tmp;
14120       code = swap_condition (code);
14121       if (!REG_P (op0))
14122         op0 = force_reg (op_mode, op0);
14123     }
14124 
14125   *pop0 = op0;
14126   *pop1 = op1;
14127   return code;
14128 }
14129 
14130 /* Convert comparison codes we use to represent FP comparison to integer
14131    code that will result in proper branch.  Return UNKNOWN if no such code
14132    is available.  */
14133 
14134 enum rtx_code
14135 ix86_fp_compare_code_to_integer (enum rtx_code code)
14136 {
14137   switch (code)
14138     {
14139     case GT:
14140       return GTU;
14141     case GE:
14142       return GEU;
14143     case ORDERED:
14144     case UNORDERED:
14145       return code;
14146       break;
14147     case UNEQ:
14148       return EQ;
14149       break;
14150     case UNLT:
14151       return LTU;
14152       break;
14153     case UNLE:
14154       return LEU;
14155       break;
14156     case LTGT:
14157       return NE;
14158       break;
14159     default:
14160       return UNKNOWN;
14161     }
14162 }
14163 
14164 /* Generate insn patterns to do a floating point compare of OPERANDS.  */
14165 
14166 static rtx
14167 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch,
14168                         rtx *second_test, rtx *bypass_test)
14169 {
14170   enum machine_mode fpcmp_mode, intcmp_mode;
14171   rtx tmp, tmp2;
14172   int cost = ix86_fp_comparison_cost (code);
14173   enum rtx_code bypass_code, first_code, second_code;
14174 
14175   fpcmp_mode = ix86_fp_compare_mode (code);
14176   code = ix86_prepare_fp_compare_args (code, &op0, &op1);
14177 
14178   if (second_test)
14179     *second_test = NULL_RTX;
14180   if (bypass_test)
14181     *bypass_test = NULL_RTX;
14182 
14183   ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
14184 
14185   /* Do fcomi/sahf based test when profitable.  */
14186   if (ix86_fp_comparison_arithmetics_cost (code) > cost
14187       && (bypass_code == UNKNOWN || bypass_test)
14188       && (second_code == UNKNOWN || second_test))
14189     {
14190       tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
14191       tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
14192                          tmp);
14193       if (TARGET_CMOVE)
14194         emit_insn (tmp);
14195       else
14196         {
14197           gcc_assert (TARGET_SAHF);
14198 
14199           if (!scratch)
14200             scratch = gen_reg_rtx (HImode);
14201           tmp2 = gen_rtx_CLOBBER (VOIDmode, scratch);
14202 
14203           emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, tmp2)));
14204         }
14205 
14206       /* The FP codes work out to act like unsigned.  */
14207       intcmp_mode = fpcmp_mode;
14208       code = first_code;
14209       if (bypass_code != UNKNOWN)
14210         *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
14211                                        gen_rtx_REG (intcmp_mode, FLAGS_REG),
14212                                        const0_rtx);
14213       if (second_code != UNKNOWN)
14214         *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
14215                                        gen_rtx_REG (intcmp_mode, FLAGS_REG),
14216                                        const0_rtx);
14217     }
14218   else
14219     {
14220       /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first.  */
14221       tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
14222       tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
14223       if (!scratch)
14224         scratch = gen_reg_rtx (HImode);
14225       emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
14226 
14227       /* In the unordered case, we have to check C2 for NaN's, which
14228          doesn't happen to work out to anything nice combination-wise.
14229          So do some bit twiddling on the value we've got in AH to come
14230          up with an appropriate set of condition codes.  */
14231 
14232       intcmp_mode = CCNOmode;
14233       switch (code)
14234         {
14235         case GT:
14236         case UNGT:
14237           if (code == GT || !TARGET_IEEE_FP)
14238             {
14239               emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
14240               code = EQ;
14241             }
14242           else
14243             {
14244               emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
14245               emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
14246               emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
14247               intcmp_mode = CCmode;
14248               code = GEU;
14249             }
14250           break;
14251         case LT:
14252         case UNLT:
14253           if (code == LT && TARGET_IEEE_FP)
14254             {
14255               emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
14256               emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
14257               intcmp_mode = CCmode;
14258               code = EQ;
14259             }
14260           else
14261             {
14262               emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
14263               code = NE;
14264             }
14265           break;
14266         case GE:
14267         case UNGE:
14268           if (code == GE || !TARGET_IEEE_FP)
14269             {
14270               emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
14271               code = EQ;
14272             }
14273           else
14274             {
14275               emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
14276               emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
14277                                              GEN_INT (0x01)));
14278               code = NE;
14279             }
14280           break;
14281         case LE:
14282         case UNLE:
14283           if (code == LE && TARGET_IEEE_FP)
14284             {
14285               emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
14286               emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
14287               emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
14288               intcmp_mode = CCmode;
14289               code = LTU;
14290             }
14291           else
14292             {
14293               emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
14294               code = NE;
14295             }
14296           break;
14297         case EQ:
14298         case UNEQ:
14299           if (code == EQ && TARGET_IEEE_FP)
14300             {
14301               emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
14302               emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
14303               intcmp_mode = CCmode;
14304               code = EQ;
14305             }
14306           else
14307             {
14308               emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
14309               code = NE;
14310               break;
14311             }
14312           break;
14313         case NE:
14314         case LTGT:
14315           if (code == NE && TARGET_IEEE_FP)
14316             {
14317               emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
14318               emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
14319                                              GEN_INT (0x40)));
14320               code = NE;
14321             }
14322           else
14323             {
14324               emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
14325               code = EQ;
14326             }
14327           break;
14328 
14329         case UNORDERED:
14330           emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
14331           code = NE;
14332           break;
14333         case ORDERED:
14334           emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
14335           code = EQ;
14336           break;
14337 
14338         default:
14339           gcc_unreachable ();
14340         }
14341     }
14342 
14343   /* Return the test that should be put into the flags user, i.e.
14344      the bcc, scc, or cmov instruction.  */
14345   return gen_rtx_fmt_ee (code, VOIDmode,
14346                          gen_rtx_REG (intcmp_mode, FLAGS_REG),
14347                          const0_rtx);
14348 }
14349 
14350 rtx
14351 ix86_expand_compare (enum rtx_code code, rtx *second_test, rtx *bypass_test)
14352 {
14353   rtx op0, op1, ret;
14354   op0 = ix86_compare_op0;
14355   op1 = ix86_compare_op1;
14356 
14357   if (second_test)
14358     *second_test = NULL_RTX;
14359   if (bypass_test)
14360     *bypass_test = NULL_RTX;
14361 
14362   if (ix86_compare_emitted)
14363     {
14364       ret = gen_rtx_fmt_ee (code, VOIDmode, ix86_compare_emitted, const0_rtx);
14365       ix86_compare_emitted = NULL_RTX;
14366     }
14367   else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
14368     {
14369       gcc_assert (!DECIMAL_FLOAT_MODE_P (GET_MODE (op0)));
14370       ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
14371                                     second_test, bypass_test);
14372     }
14373   else
14374     ret = ix86_expand_int_compare (code, op0, op1);
14375 
14376   return ret;
14377 }
14378 
14379 /* Return true if the CODE will result in nontrivial jump sequence.  */
14380 bool
14381 ix86_fp_jump_nontrivial_p (enum rtx_code code)
14382 {
14383   enum rtx_code bypass_code, first_code, second_code;
14384   if (!TARGET_CMOVE)
14385     return true;
14386   ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
14387   return bypass_code != UNKNOWN || second_code != UNKNOWN;
14388 }
14389 
14390 void
14391 ix86_expand_branch (enum rtx_code code, rtx label)
14392 {
14393   rtx tmp;
14394 
14395   /* If we have emitted a compare insn, go straight to simple.
14396      ix86_expand_compare won't emit anything if ix86_compare_emitted
14397      is non NULL.  */
14398   if (ix86_compare_emitted)
14399     goto simple;
14400 
14401   switch (GET_MODE (ix86_compare_op0))
14402     {
14403     case QImode:
14404     case HImode:
14405     case SImode:
14406       simple:
14407       tmp = ix86_expand_compare (code, NULL, NULL);
14408       tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
14409                                   gen_rtx_LABEL_REF (VOIDmode, label),
14410                                   pc_rtx);
14411       emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
14412       return;
14413 
14414     case SFmode:
14415     case DFmode:
14416     case XFmode:
14417       {
14418         rtvec vec;
14419         int use_fcomi;
14420         enum rtx_code bypass_code, first_code, second_code;
14421 
14422         code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
14423                                              &ix86_compare_op1);
14424 
14425         ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
14426 
14427         /* Check whether we will use the natural sequence with one jump.  If
14428            so, we can expand jump early.  Otherwise delay expansion by
14429            creating compound insn to not confuse optimizers.  */
14430         if (bypass_code == UNKNOWN && second_code == UNKNOWN)
14431           {
14432             ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
14433                                   gen_rtx_LABEL_REF (VOIDmode, label),
14434                                   pc_rtx, NULL_RTX, NULL_RTX);
14435           }
14436         else
14437           {
14438             tmp = gen_rtx_fmt_ee (code, VOIDmode,
14439                                   ix86_compare_op0, ix86_compare_op1);
14440             tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
14441                                         gen_rtx_LABEL_REF (VOIDmode, label),
14442                                         pc_rtx);
14443             tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
14444 
14445             use_fcomi = ix86_use_fcomi_compare (code);
14446             vec = rtvec_alloc (3 + !use_fcomi);
14447             RTVEC_ELT (vec, 0) = tmp;
14448             RTVEC_ELT (vec, 1)
14449               = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, FPSR_REG));
14450             RTVEC_ELT (vec, 2)
14451               = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, FLAGS_REG));
14452             if (! use_fcomi)
14453               RTVEC_ELT (vec, 3)
14454                 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
14455 
14456             emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
14457           }
14458         return;
14459       }
14460 
14461     case DImode:
14462       if (TARGET_64BIT)
14463         goto simple;
14464     case TImode:
14465       /* Expand DImode branch into multiple compare+branch.  */
14466       {
14467         rtx lo[2], hi[2], label2;
14468         enum rtx_code code1, code2, code3;
14469         enum machine_mode submode;
14470 
14471         if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
14472           {
14473             tmp = ix86_compare_op0;
14474             ix86_compare_op0 = ix86_compare_op1;
14475             ix86_compare_op1 = tmp;
14476             code = swap_condition (code);
14477           }
14478         if (GET_MODE (ix86_compare_op0) == DImode)
14479           {
14480             split_di (&ix86_compare_op0, 1, lo+0, hi+0);
14481             split_di (&ix86_compare_op1, 1, lo+1, hi+1);
14482             submode = SImode;
14483           }
14484         else
14485           {
14486             split_ti (&ix86_compare_op0, 1, lo+0, hi+0);
14487             split_ti (&ix86_compare_op1, 1, lo+1, hi+1);
14488             submode = DImode;
14489           }
14490 
14491         /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
14492            avoid two branches.  This costs one extra insn, so disable when
14493            optimizing for size.  */
14494 
14495         if ((code == EQ || code == NE)
14496             && (!optimize_insn_for_size_p ()
14497                 || hi[1] == const0_rtx || lo[1] == const0_rtx))
14498           {
14499             rtx xor0, xor1;
14500 
14501             xor1 = hi[0];
14502             if (hi[1] != const0_rtx)
14503               xor1 = expand_binop (submode, xor_optab, xor1, hi[1],
14504                                    NULL_RTX, 0, OPTAB_WIDEN);
14505 
14506             xor0 = lo[0];
14507             if (lo[1] != const0_rtx)
14508               xor0 = expand_binop (submode, xor_optab, xor0, lo[1],
14509                                    NULL_RTX, 0, OPTAB_WIDEN);
14510 
14511             tmp = expand_binop (submode, ior_optab, xor1, xor0,
14512                                 NULL_RTX, 0, OPTAB_WIDEN);
14513 
14514             ix86_compare_op0 = tmp;
14515             ix86_compare_op1 = const0_rtx;
14516             ix86_expand_branch (code, label);
14517             return;
14518           }
14519 
14520         /* Otherwise, if we are doing less-than or greater-or-equal-than,
14521            op1 is a constant and the low word is zero, then we can just
14522            examine the high word.  Similarly for low word -1 and
14523            less-or-equal-than or greater-than.  */
14524 
14525         if (CONST_INT_P (hi[1]))
14526           switch (code)
14527             {
14528             case LT: case LTU: case GE: case GEU:
14529               if (lo[1] == const0_rtx)
14530                 {
14531                   ix86_compare_op0 = hi[0];
14532                   ix86_compare_op1 = hi[1];
14533                   ix86_expand_branch (code, label);
14534                   return;
14535                 }
14536               break;
14537             case LE: case LEU: case GT: case GTU:
14538               if (lo[1] == constm1_rtx)
14539                 {
14540                   ix86_compare_op0 = hi[0];
14541                   ix86_compare_op1 = hi[1];
14542                   ix86_expand_branch (code, label);
14543                   return;
14544                 }
14545               break;
14546             default:
14547               break;
14548             }
14549 
14550         /* Otherwise, we need two or three jumps.  */
14551 
14552         label2 = gen_label_rtx ();
14553 
14554         code1 = code;
14555         code2 = swap_condition (code);
14556         code3 = unsigned_condition (code);
14557 
14558         switch (code)
14559           {
14560           case LT: case GT: case LTU: case GTU:
14561             break;
14562 
14563           case LE:   code1 = LT;  code2 = GT;  break;
14564           case GE:   code1 = GT;  code2 = LT;  break;
14565           case LEU:  code1 = LTU; code2 = GTU; break;
14566           case GEU:  code1 = GTU; code2 = LTU; break;
14567 
14568           case EQ:   code1 = UNKNOWN; code2 = NE;  break;
14569           case NE:   code2 = UNKNOWN; break;
14570 
14571           default:
14572             gcc_unreachable ();
14573           }
14574 
14575         /*
14576          * a < b =>
14577          *    if (hi(a) < hi(b)) goto true;
14578          *    if (hi(a) > hi(b)) goto false;
14579          *    if (lo(a) < lo(b)) goto true;
14580          *  false:
14581          */
14582 
14583         ix86_compare_op0 = hi[0];
14584         ix86_compare_op1 = hi[1];
14585 
14586         if (code1 != UNKNOWN)
14587           ix86_expand_branch (code1, label);
14588         if (code2 != UNKNOWN)
14589           ix86_expand_branch (code2, label2);
14590 
14591         ix86_compare_op0 = lo[0];
14592         ix86_compare_op1 = lo[1];
14593         ix86_expand_branch (code3, label);
14594 
14595         if (code2 != UNKNOWN)
14596           emit_label (label2);
14597         return;
14598       }
14599 
14600     default:
14601       gcc_unreachable ();
14602     }
14603 }
14604 
14605 /* Split branch based on floating point condition.  */
14606 void
14607 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
14608                       rtx target1, rtx target2, rtx tmp, rtx pushed)
14609 {
14610   rtx second, bypass;
14611   rtx label = NULL_RTX;
14612   rtx condition;
14613   int bypass_probability = -1, second_probability = -1, probability = -1;
14614   rtx i;
14615 
14616   if (target2 != pc_rtx)
14617     {
14618       rtx tmp = target2;
14619       code = reverse_condition_maybe_unordered (code);
14620       target2 = target1;
14621       target1 = tmp;
14622     }
14623 
14624   condition = ix86_expand_fp_compare (code, op1, op2,
14625                                       tmp, &second, &bypass);
14626 
14627   /* Remove pushed operand from stack.  */
14628   if (pushed)
14629     ix86_free_from_memory (GET_MODE (pushed));
14630 
14631   if (split_branch_probability >= 0)
14632     {
14633       /* Distribute the probabilities across the jumps.
14634          Assume the BYPASS and SECOND to be always test
14635          for UNORDERED.  */
14636       probability = split_branch_probability;
14637 
14638       /* Value of 1 is low enough to make no need for probability
14639          to be updated.  Later we may run some experiments and see
14640          if unordered values are more frequent in practice.  */
14641       if (bypass)
14642         bypass_probability = 1;
14643       if (second)
14644         second_probability = 1;
14645     }
14646   if (bypass != NULL_RTX)
14647     {
14648       label = gen_label_rtx ();
14649       i = emit_jump_insn (gen_rtx_SET
14650                           (VOIDmode, pc_rtx,
14651                            gen_rtx_IF_THEN_ELSE (VOIDmode,
14652                                                  bypass,
14653                                                  gen_rtx_LABEL_REF (VOIDmode,
14654                                                                     label),
14655                                                  pc_rtx)));
14656       if (bypass_probability >= 0)
14657         REG_NOTES (i)
14658           = gen_rtx_EXPR_LIST (REG_BR_PROB,
14659                                GEN_INT (bypass_probability),
14660                                REG_NOTES (i));
14661     }
14662   i = emit_jump_insn (gen_rtx_SET
14663                       (VOIDmode, pc_rtx,
14664                        gen_rtx_IF_THEN_ELSE (VOIDmode,
14665                                              condition, target1, target2)));
14666   if (probability >= 0)
14667     REG_NOTES (i)
14668       = gen_rtx_EXPR_LIST (REG_BR_PROB,
14669                            GEN_INT (probability),
14670                            REG_NOTES (i));
14671   if (second != NULL_RTX)
14672     {
14673       i = emit_jump_insn (gen_rtx_SET
14674                           (VOIDmode, pc_rtx,
14675                            gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
14676                                                  target2)));
14677       if (second_probability >= 0)
14678         REG_NOTES (i)
14679           = gen_rtx_EXPR_LIST (REG_BR_PROB,
14680                                GEN_INT (second_probability),
14681                                REG_NOTES (i));
14682     }
14683   if (label != NULL_RTX)
14684     emit_label (label);
14685 }
14686 
14687 int
14688 ix86_expand_setcc (enum rtx_code code, rtx dest)
14689 {
14690   rtx ret, tmp, tmpreg, equiv;
14691   rtx second_test, bypass_test;
14692 
14693   if (GET_MODE (ix86_compare_op0) == (TARGET_64BIT ? TImode : DImode))
14694     return 0; /* FAIL */
14695 
14696   gcc_assert (GET_MODE (dest) == QImode);
14697 
14698   ret = ix86_expand_compare (code, &second_test, &bypass_test);
14699   PUT_MODE (ret, QImode);
14700 
14701   tmp = dest;
14702   tmpreg = dest;
14703 
14704   emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
14705   if (bypass_test || second_test)
14706     {
14707       rtx test = second_test;
14708       int bypass = 0;
14709       rtx tmp2 = gen_reg_rtx (QImode);
14710       if (bypass_test)
14711         {
14712           gcc_assert (!second_test);
14713           test = bypass_test;
14714           bypass = 1;
14715           PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
14716         }
14717       PUT_MODE (test, QImode);
14718       emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
14719 
14720       if (bypass)
14721         emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
14722       else
14723         emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
14724     }
14725 
14726   /* Attach a REG_EQUAL note describing the comparison result.  */
14727   if (ix86_compare_op0 && ix86_compare_op1)
14728     {
14729       equiv = simplify_gen_relational (code, QImode,
14730                                        GET_MODE (ix86_compare_op0),
14731                                        ix86_compare_op0, ix86_compare_op1);
14732       set_unique_reg_note (get_last_insn (), REG_EQUAL, equiv);
14733     }
14734 
14735   return 1; /* DONE */
14736 }
14737 
14738 /* Expand comparison setting or clearing carry flag.  Return true when
14739    successful and set pop for the operation.  */
14740 static bool
14741 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
14742 {
14743   enum machine_mode mode =
14744     GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
14745 
14746   /* Do not handle DImode compares that go through special path.  */
14747   if (mode == (TARGET_64BIT ? TImode : DImode))
14748     return false;
14749 
14750   if (SCALAR_FLOAT_MODE_P (mode))
14751     {
14752       rtx second_test = NULL, bypass_test = NULL;
14753       rtx compare_op, compare_seq;
14754 
14755       gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
14756 
14757       /* Shortcut:  following common codes never translate
14758          into carry flag compares.  */
14759       if (code == EQ || code == NE || code == UNEQ || code == LTGT
14760           || code == ORDERED || code == UNORDERED)
14761         return false;
14762 
14763       /* These comparisons require zero flag; swap operands so they won't.  */
14764       if ((code == GT || code == UNLE || code == LE || code == UNGT)
14765           && !TARGET_IEEE_FP)
14766         {
14767           rtx tmp = op0;
14768           op0 = op1;
14769           op1 = tmp;
14770           code = swap_condition (code);
14771         }
14772 
14773       /* Try to expand the comparison and verify that we end up with
14774          carry flag based comparison.  This fails to be true only when
14775          we decide to expand comparison using arithmetic that is not
14776          too common scenario.  */
14777       start_sequence ();
14778       compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
14779                                            &second_test, &bypass_test);
14780       compare_seq = get_insns ();
14781       end_sequence ();
14782 
14783       if (second_test || bypass_test)
14784         return false;
14785 
14786       if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
14787           || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
14788         code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
14789       else
14790         code = GET_CODE (compare_op);
14791 
14792       if (code != LTU && code != GEU)
14793         return false;
14794 
14795       emit_insn (compare_seq);
14796       *pop = compare_op;
14797       return true;
14798     }
14799 
14800   if (!INTEGRAL_MODE_P (mode))
14801     return false;
14802 
14803   switch (code)
14804     {
14805     case LTU:
14806     case GEU:
14807       break;
14808 
14809     /* Convert a==0 into (unsigned)a<1.  */
14810     case EQ:
14811     case NE:
14812       if (op1 != const0_rtx)
14813         return false;
14814       op1 = const1_rtx;
14815       code = (code == EQ ? LTU : GEU);
14816       break;
14817 
14818     /* Convert a>b into b<a or a>=b-1.  */
14819     case GTU:
14820     case LEU:
14821       if (CONST_INT_P (op1))
14822         {
14823           op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
14824           /* Bail out on overflow.  We still can swap operands but that
14825              would force loading of the constant into register.  */
14826           if (op1 == const0_rtx
14827               || !x86_64_immediate_operand (op1, GET_MODE (op1)))
14828             return false;
14829           code = (code == GTU ? GEU : LTU);
14830         }
14831       else
14832         {
14833           rtx tmp = op1;
14834           op1 = op0;
14835           op0 = tmp;
14836           code = (code == GTU ? LTU : GEU);
14837         }
14838       break;
14839 
14840     /* Convert a>=0 into (unsigned)a<0x80000000.  */
14841     case LT:
14842     case GE:
14843       if (mode == DImode || op1 != const0_rtx)
14844         return false;
14845       op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
14846       code = (code == LT ? GEU : LTU);
14847       break;
14848     case LE:
14849     case GT:
14850       if (mode == DImode || op1 != constm1_rtx)
14851         return false;
14852       op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
14853       code = (code == LE ? GEU : LTU);
14854       break;
14855 
14856     default:
14857       return false;
14858     }
14859   /* Swapping operands may cause constant to appear as first operand.  */
14860   if (!nonimmediate_operand (op0, VOIDmode))
14861     {
14862       if (!can_create_pseudo_p ())
14863         return false;
14864       op0 = force_reg (mode, op0);
14865     }
14866   ix86_compare_op0 = op0;
14867   ix86_compare_op1 = op1;
14868   *pop = ix86_expand_compare (code, NULL, NULL);
14869   gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU);
14870   return true;
14871 }
14872 
14873 int
14874 ix86_expand_int_movcc (rtx operands[])
14875 {
14876   enum rtx_code code = GET_CODE (operands[1]), compare_code;
14877   rtx compare_seq, compare_op;
14878   rtx second_test, bypass_test;
14879   enum machine_mode mode = GET_MODE (operands[0]);
14880   bool sign_bit_compare_p = false;;
14881 
14882   start_sequence ();
14883   compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
14884   compare_seq = get_insns ();
14885   end_sequence ();
14886 
14887   compare_code = GET_CODE (compare_op);
14888 
14889   if ((ix86_compare_op1 == const0_rtx && (code == GE || code == LT))
14890       || (ix86_compare_op1 == constm1_rtx && (code == GT || code == LE)))
14891     sign_bit_compare_p = true;
14892 
14893   /* Don't attempt mode expansion here -- if we had to expand 5 or 6
14894      HImode insns, we'd be swallowed in word prefix ops.  */
14895 
14896   if ((mode != HImode || TARGET_FAST_PREFIX)
14897       && (mode != (TARGET_64BIT ? TImode : DImode))
14898       && CONST_INT_P (operands[2])
14899       && CONST_INT_P (operands[3]))
14900     {
14901       rtx out = operands[0];
14902       HOST_WIDE_INT ct = INTVAL (operands[2]);
14903       HOST_WIDE_INT cf = INTVAL (operands[3]);
14904       HOST_WIDE_INT diff;
14905 
14906       diff = ct - cf;
14907       /*  Sign bit compares are better done using shifts than we do by using
14908           sbb.  */
14909       if (sign_bit_compare_p
14910           || ix86_expand_carry_flag_compare (code, ix86_compare_op0,
14911                                              ix86_compare_op1, &compare_op))
14912         {
14913           /* Detect overlap between destination and compare sources.  */
14914           rtx tmp = out;
14915 
14916           if (!sign_bit_compare_p)
14917             {
14918               bool fpcmp = false;
14919 
14920               compare_code = GET_CODE (compare_op);
14921 
14922               if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
14923                   || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
14924                 {
14925                   fpcmp = true;
14926                   compare_code = ix86_fp_compare_code_to_integer (compare_code);
14927                 }
14928 
14929               /* To simplify rest of code, restrict to the GEU case.  */
14930               if (compare_code == LTU)
14931                 {
14932                   HOST_WIDE_INT tmp = ct;
14933                   ct = cf;
14934                   cf = tmp;
14935                   compare_code = reverse_condition (compare_code);
14936                   code = reverse_condition (code);
14937                 }
14938               else
14939                 {
14940                   if (fpcmp)
14941                     PUT_CODE (compare_op,
14942                               reverse_condition_maybe_unordered
14943                                 (GET_CODE (compare_op)));
14944                   else
14945                     PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
14946                 }
14947               diff = ct - cf;
14948 
14949               if (reg_overlap_mentioned_p (out, ix86_compare_op0)
14950                   || reg_overlap_mentioned_p (out, ix86_compare_op1))
14951                 tmp = gen_reg_rtx (mode);
14952 
14953               if (mode == DImode)
14954                 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp, compare_op));
14955               else
14956                 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp), compare_op));
14957             }
14958           else
14959             {
14960               if (code == GT || code == GE)
14961                 code = reverse_condition (code);
14962               else
14963                 {
14964                   HOST_WIDE_INT tmp = ct;
14965                   ct = cf;
14966                   cf = tmp;
14967                   diff = ct - cf;
14968                 }
14969               tmp = emit_store_flag (tmp, code, ix86_compare_op0,
14970                                      ix86_compare_op1, VOIDmode, 0, -1);
14971             }
14972 
14973           if (diff == 1)
14974             {
14975               /*
14976                * cmpl op0,op1
14977                * sbbl dest,dest
14978                * [addl dest, ct]
14979                *
14980                * Size 5 - 8.
14981                */
14982               if (ct)
14983                 tmp = expand_simple_binop (mode, PLUS,
14984                                            tmp, GEN_INT (ct),
14985                                            copy_rtx (tmp), 1, OPTAB_DIRECT);
14986             }
14987           else if (cf == -1)
14988             {
14989               /*
14990                * cmpl op0,op1
14991                * sbbl dest,dest
14992                * orl $ct, dest
14993                *
14994                * Size 8.
14995                */
14996               tmp = expand_simple_binop (mode, IOR,
14997                                          tmp, GEN_INT (ct),
14998                                          copy_rtx (tmp), 1, OPTAB_DIRECT);
14999             }
15000           else if (diff == -1 && ct)
15001             {
15002               /*
15003                * cmpl op0,op1
15004                * sbbl dest,dest
15005                * notl dest
15006                * [addl dest, cf]
15007                *
15008                * Size 8 - 11.
15009                */
15010               tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
15011               if (cf)
15012                 tmp = expand_simple_binop (mode, PLUS,
15013                                            copy_rtx (tmp), GEN_INT (cf),
15014                                            copy_rtx (tmp), 1, OPTAB_DIRECT);
15015             }
15016           else
15017             {
15018               /*
15019                * cmpl op0,op1
15020                * sbbl dest,dest
15021                * [notl dest]
15022                * andl cf - ct, dest
15023                * [addl dest, ct]
15024                *
15025                * Size 8 - 11.
15026                */
15027 
15028               if (cf == 0)
15029                 {
15030                   cf = ct;
15031                   ct = 0;
15032                   tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
15033                 }
15034 
15035               tmp = expand_simple_binop (mode, AND,
15036                                          copy_rtx (tmp),
15037                                          gen_int_mode (cf - ct, mode),
15038                                          copy_rtx (tmp), 1, OPTAB_DIRECT);
15039               if (ct)
15040                 tmp = expand_simple_binop (mode, PLUS,
15041                                            copy_rtx (tmp), GEN_INT (ct),
15042                                            copy_rtx (tmp), 1, OPTAB_DIRECT);
15043             }
15044 
15045           if (!rtx_equal_p (tmp, out))
15046             emit_move_insn (copy_rtx (out), copy_rtx (tmp));
15047 
15048           return 1; /* DONE */
15049         }
15050 
15051       if (diff < 0)
15052         {
15053           enum machine_mode cmp_mode = GET_MODE (ix86_compare_op0);
15054 
15055           HOST_WIDE_INT tmp;
15056           tmp = ct, ct = cf, cf = tmp;
15057           diff = -diff;
15058 
15059           if (SCALAR_FLOAT_MODE_P (cmp_mode))
15060             {
15061               gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
15062 
15063               /* We may be reversing unordered compare to normal compare, that
15064                  is not valid in general (we may convert non-trapping condition
15065                  to trapping one), however on i386 we currently emit all
15066                  comparisons unordered.  */
15067               compare_code = reverse_condition_maybe_unordered (compare_code);
15068               code = reverse_condition_maybe_unordered (code);
15069             }
15070           else
15071             {
15072               compare_code = reverse_condition (compare_code);
15073               code = reverse_condition (code);
15074             }
15075         }
15076 
15077       compare_code = UNKNOWN;
15078       if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT
15079           && CONST_INT_P (ix86_compare_op1))
15080         {
15081           if (ix86_compare_op1 == const0_rtx
15082               && (code == LT || code == GE))
15083             compare_code = code;
15084           else if (ix86_compare_op1 == constm1_rtx)
15085             {
15086               if (code == LE)
15087                 compare_code = LT;
15088               else if (code == GT)
15089                 compare_code = GE;
15090             }
15091         }
15092 
15093       /* Optimize dest = (op0 < 0) ? -1 : cf.  */
15094       if (compare_code != UNKNOWN
15095           && GET_MODE (ix86_compare_op0) == GET_MODE (out)
15096           && (cf == -1 || ct == -1))
15097         {
15098           /* If lea code below could be used, only optimize
15099              if it results in a 2 insn sequence.  */
15100 
15101           if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
15102                  || diff == 3 || diff == 5 || diff == 9)
15103               || (compare_code == LT && ct == -1)
15104               || (compare_code == GE && cf == -1))
15105             {
15106               /*
15107                * notl op1       (if necessary)
15108                * sarl $31, op1
15109                * orl cf, op1
15110                */
15111               if (ct != -1)
15112                 {
15113                   cf = ct;
15114                   ct = -1;
15115                   code = reverse_condition (code);
15116                 }
15117 
15118               out = emit_store_flag (out, code, ix86_compare_op0,
15119                                      ix86_compare_op1, VOIDmode, 0, -1);
15120 
15121               out = expand_simple_binop (mode, IOR,
15122                                          out, GEN_INT (cf),
15123                                          out, 1, OPTAB_DIRECT);
15124               if (out != operands[0])
15125                 emit_move_insn (operands[0], out);
15126 
15127               return 1; /* DONE */
15128             }
15129         }
15130 
15131 
15132       if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
15133            || diff == 3 || diff == 5 || diff == 9)
15134           && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
15135           && (mode != DImode
15136               || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
15137         {
15138           /*
15139            * xorl dest,dest
15140            * cmpl op1,op2
15141            * setcc dest
15142            * lea cf(dest*(ct-cf)),dest
15143            *
15144            * Size 14.
15145            *
15146            * This also catches the degenerate setcc-only case.
15147            */
15148 
15149           rtx tmp;
15150           int nops;
15151 
15152           out = emit_store_flag (out, code, ix86_compare_op0,
15153                                  ix86_compare_op1, VOIDmode, 0, 1);
15154 
15155           nops = 0;
15156           /* On x86_64 the lea instruction operates on Pmode, so we need
15157              to get arithmetics done in proper mode to match.  */
15158           if (diff == 1)
15159             tmp = copy_rtx (out);
15160           else
15161             {
15162               rtx out1;
15163               out1 = copy_rtx (out);
15164               tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
15165               nops++;
15166               if (diff & 1)
15167                 {
15168                   tmp = gen_rtx_PLUS (mode, tmp, out1);
15169                   nops++;
15170                 }
15171             }
15172           if (cf != 0)
15173             {
15174               tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
15175               nops++;
15176             }
15177           if (!rtx_equal_p (tmp, out))
15178             {
15179               if (nops == 1)
15180                 out = force_operand (tmp, copy_rtx (out));
15181               else
15182                 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
15183             }
15184           if (!rtx_equal_p (out, operands[0]))
15185             emit_move_insn (operands[0], copy_rtx (out));
15186 
15187           return 1; /* DONE */
15188         }
15189 
15190       /*
15191        * General case:                  Jumpful:
15192        *   xorl dest,dest               cmpl op1, op2
15193        *   cmpl op1, op2                movl ct, dest
15194        *   setcc dest                   jcc 1f
15195        *   decl dest                    movl cf, dest
15196        *   andl (cf-ct),dest            1:
15197        *   addl ct,dest
15198        *
15199        * Size 20.                       Size 14.
15200        *
15201        * This is reasonably steep, but branch mispredict costs are
15202        * high on modern cpus, so consider failing only if optimizing
15203        * for space.
15204        */
15205 
15206       if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
15207           && BRANCH_COST (optimize_insn_for_speed_p (),
15208                           false) >= 2)
15209         {
15210           if (cf == 0)
15211             {
15212               enum machine_mode cmp_mode = GET_MODE (ix86_compare_op0);
15213 
15214               cf = ct;
15215               ct = 0;
15216 
15217               if (SCALAR_FLOAT_MODE_P (cmp_mode))
15218                 {
15219                   gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
15220 
15221                   /* We may be reversing unordered compare to normal compare,
15222                      that is not valid in general (we may convert non-trapping
15223                      condition to trapping one), however on i386 we currently
15224                      emit all comparisons unordered.  */
15225                   code = reverse_condition_maybe_unordered (code);
15226                 }
15227               else
15228                 {
15229                   code = reverse_condition (code);
15230                   if (compare_code != UNKNOWN)
15231                     compare_code = reverse_condition (compare_code);
15232                 }
15233             }
15234 
15235           if (compare_code != UNKNOWN)
15236             {
15237               /* notl op1       (if needed)
15238                  sarl $31, op1
15239                  andl (cf-ct), op1
15240                  addl ct, op1
15241 
15242                  For x < 0 (resp. x <= -1) there will be no notl,
15243                  so if possible swap the constants to get rid of the
15244                  complement.
15245                  True/false will be -1/0 while code below (store flag
15246                  followed by decrement) is 0/-1, so the constants need
15247                  to be exchanged once more.  */
15248 
15249               if (compare_code == GE || !cf)
15250                 {
15251                   code = reverse_condition (code);
15252                   compare_code = LT;
15253                 }
15254               else
15255                 {
15256                   HOST_WIDE_INT tmp = cf;
15257                   cf = ct;
15258                   ct = tmp;
15259                 }
15260 
15261               out = emit_store_flag (out, code, ix86_compare_op0,
15262                                      ix86_compare_op1, VOIDmode, 0, -1);
15263             }
15264           else
15265             {
15266               out = emit_store_flag (out, code, ix86_compare_op0,
15267                                      ix86_compare_op1, VOIDmode, 0, 1);
15268 
15269               out = expand_simple_binop (mode, PLUS, copy_rtx (out), constm1_rtx,
15270                                          copy_rtx (out), 1, OPTAB_DIRECT);
15271             }
15272 
15273           out = expand_simple_binop (mode, AND, copy_rtx (out),
15274                                      gen_int_mode (cf - ct, mode),
15275                                      copy_rtx (out), 1, OPTAB_DIRECT);
15276           if (ct)
15277             out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
15278                                        copy_rtx (out), 1, OPTAB_DIRECT);
15279           if (!rtx_equal_p (out, operands[0]))
15280             emit_move_insn (operands[0], copy_rtx (out));
15281 
15282           return 1; /* DONE */
15283         }
15284     }
15285 
15286   if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
15287     {
15288       /* Try a few things more with specific constants and a variable.  */
15289 
15290       optab op;
15291       rtx var, orig_out, out, tmp;
15292 
15293       if (BRANCH_COST (optimize_insn_for_speed_p (), false) <= 2)
15294         return 0; /* FAIL */
15295 
15296       /* If one of the two operands is an interesting constant, load a
15297          constant with the above and mask it in with a logical operation.  */
15298 
15299       if (CONST_INT_P (operands[2]))
15300         {
15301           var = operands[3];
15302           if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
15303             operands[3] = constm1_rtx, op = and_optab;
15304           else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
15305             operands[3] = const0_rtx, op = ior_optab;
15306           else
15307             return 0; /* FAIL */
15308         }
15309       else if (CONST_INT_P (operands[3]))
15310         {
15311           var = operands[2];
15312           if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
15313             operands[2] = constm1_rtx, op = and_optab;
15314           else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
15315             operands[2] = const0_rtx, op = ior_optab;
15316           else
15317             return 0; /* FAIL */
15318         }
15319       else
15320         return 0; /* FAIL */
15321 
15322       orig_out = operands[0];
15323       tmp = gen_reg_rtx (mode);
15324       operands[0] = tmp;
15325 
15326       /* Recurse to get the constant loaded.  */
15327       if (ix86_expand_int_movcc (operands) == 0)
15328         return 0; /* FAIL */
15329 
15330       /* Mask in the interesting variable.  */
15331       out = expand_binop (mode, op, var, tmp, orig_out, 0,
15332                           OPTAB_WIDEN);
15333       if (!rtx_equal_p (out, orig_out))
15334         emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
15335 
15336       return 1; /* DONE */
15337     }
15338 
15339   /*
15340    * For comparison with above,
15341    *
15342    * movl cf,dest
15343    * movl ct,tmp
15344    * cmpl op1,op2
15345    * cmovcc tmp,dest
15346    *
15347    * Size 15.
15348    */
15349 
15350   if (! nonimmediate_operand (operands[2], mode))
15351     operands[2] = force_reg (mode, operands[2]);
15352   if (! nonimmediate_operand (operands[3], mode))
15353     operands[3] = force_reg (mode, operands[3]);
15354 
15355   if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
15356     {
15357       rtx tmp = gen_reg_rtx (mode);
15358       emit_move_insn (tmp, operands[3]);
15359       operands[3] = tmp;
15360     }
15361   if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
15362     {
15363       rtx tmp = gen_reg_rtx (mode);
15364       emit_move_insn (tmp, operands[2]);
15365       operands[2] = tmp;
15366     }
15367 
15368   if (! register_operand (operands[2], VOIDmode)
15369       && (mode == QImode
15370           || ! register_operand (operands[3], VOIDmode)))
15371     operands[2] = force_reg (mode, operands[2]);
15372 
15373   if (mode == QImode
15374       && ! register_operand (operands[3], VOIDmode))
15375     operands[3] = force_reg (mode, operands[3]);
15376 
15377   emit_insn (compare_seq);
15378   emit_insn (gen_rtx_SET (VOIDmode, operands[0],
15379                           gen_rtx_IF_THEN_ELSE (mode,
15380                                                 compare_op, operands[2],
15381                                                 operands[3])));
15382   if (bypass_test)
15383     emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
15384                             gen_rtx_IF_THEN_ELSE (mode,
15385                                   bypass_test,
15386                                   copy_rtx (operands[3]),
15387                                   copy_rtx (operands[0]))));
15388   if (second_test)
15389     emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
15390                             gen_rtx_IF_THEN_ELSE (mode,
15391                                   second_test,
15392                                   copy_rtx (operands[2]),
15393                                   copy_rtx (operands[0]))));
15394 
15395   return 1; /* DONE */
15396 }
15397 
15398 /* Swap, force into registers, or otherwise massage the two operands
15399    to an sse comparison with a mask result.  Thus we differ a bit from
15400    ix86_prepare_fp_compare_args which expects to produce a flags result.
15401 
15402    The DEST operand exists to help determine whether to commute commutative
15403    operators.  The POP0/POP1 operands are updated in place.  The new
15404    comparison code is returned, or UNKNOWN if not implementable.  */
15405 
15406 static enum rtx_code
15407 ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code,
15408                                   rtx *pop0, rtx *pop1)
15409 {
15410   rtx tmp;
15411 
15412   switch (code)
15413     {
15414     case LTGT:
15415     case UNEQ:
15416       /* We have no LTGT as an operator.  We could implement it with
15417          NE & ORDERED, but this requires an extra temporary.  It's
15418          not clear that it's worth it.  */
15419       return UNKNOWN;
15420 
15421     case LT:
15422     case LE:
15423     case UNGT:
15424     case UNGE:
15425       /* These are supported directly.  */
15426       break;
15427 
15428     case EQ:
15429     case NE:
15430     case UNORDERED:
15431     case ORDERED:
15432       /* For commutative operators, try to canonicalize the destination
15433          operand to be first in the comparison - this helps reload to
15434          avoid extra moves.  */
15435       if (!dest || !rtx_equal_p (dest, *pop1))
15436         break;
15437       /* FALLTHRU */
15438 
15439     case GE:
15440     case GT:
15441     case UNLE:
15442     case UNLT:
15443       /* These are not supported directly.  Swap the comparison operands
15444          to transform into something that is supported.  */
15445       tmp = *pop0;
15446       *pop0 = *pop1;
15447       *pop1 = tmp;
15448       code = swap_condition (code);
15449       break;
15450 
15451     default:
15452       gcc_unreachable ();
15453     }
15454 
15455   return code;
15456 }
15457 
15458 /* Detect conditional moves that exactly match min/max operational
15459    semantics.  Note that this is IEEE safe, as long as we don't
15460    interchange the operands.
15461 
15462    Returns FALSE if this conditional move doesn't match a MIN/MAX,
15463    and TRUE if the operation is successful and instructions are emitted.  */
15464 
15465 static bool
15466 ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
15467                            rtx cmp_op1, rtx if_true, rtx if_false)
15468 {
15469   enum machine_mode mode;
15470   bool is_min;
15471   rtx tmp;
15472 
15473   if (code == LT)
15474     ;
15475   else if (code == UNGE)
15476     {
15477       tmp = if_true;
15478       if_true = if_false;
15479       if_false = tmp;
15480     }
15481   else
15482     return false;
15483 
15484   if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false))
15485     is_min = true;
15486   else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false))
15487     is_min = false;
15488   else
15489     return false;
15490 
15491   mode = GET_MODE (dest);
15492 
15493   /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
15494      but MODE may be a vector mode and thus not appropriate.  */
15495   if (!flag_finite_math_only || !flag_unsafe_math_optimizations)
15496     {
15497       int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
15498       rtvec v;
15499 
15500       if_true = force_reg (mode, if_true);
15501       v = gen_rtvec (2, if_true, if_false);
15502       tmp = gen_rtx_UNSPEC (mode, v, u);
15503     }
15504   else
15505     {
15506       code = is_min ? SMIN : SMAX;
15507       tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false);
15508     }
15509 
15510   emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
15511   return true;
15512 }
15513 
15514 /* Expand an sse vector comparison.  Return the register with the result.  */
15515 
15516 static rtx
15517 ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
15518                      rtx op_true, rtx op_false)
15519 {
15520   enum machine_mode mode = GET_MODE (dest);
15521   rtx x;
15522 
15523   cmp_op0 = force_reg (mode, cmp_op0);
15524   if (!nonimmediate_operand (cmp_op1, mode))
15525     cmp_op1 = force_reg (mode, cmp_op1);
15526 
15527   if (optimize
15528       || reg_overlap_mentioned_p (dest, op_true)
15529       || reg_overlap_mentioned_p (dest, op_false))
15530     dest = gen_reg_rtx (mode);
15531 
15532   x = gen_rtx_fmt_ee (code, mode, cmp_op0, cmp_op1);
15533   emit_insn (gen_rtx_SET (VOIDmode, dest, x));
15534 
15535   return dest;
15536 }
15537 
15538 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
15539    operations.  This is used for both scalar and vector conditional moves.  */
15540 
15541 static void
15542 ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
15543 {
15544   enum machine_mode mode = GET_MODE (dest);
15545   rtx t2, t3, x;
15546 
15547   if (op_false == CONST0_RTX (mode))
15548     {
15549       op_true = force_reg (mode, op_true);
15550       x = gen_rtx_AND (mode, cmp, op_true);
15551       emit_insn (gen_rtx_SET (VOIDmode, dest, x));
15552     }
15553   else if (op_true == CONST0_RTX (mode))
15554     {
15555       op_false = force_reg (mode, op_false);
15556       x = gen_rtx_NOT (mode, cmp);
15557       x = gen_rtx_AND (mode, x, op_false);
15558       emit_insn (gen_rtx_SET (VOIDmode, dest, x));
15559     }
15560   else if (TARGET_SSE5)
15561     {
15562       rtx pcmov = gen_rtx_SET (mode, dest,
15563                                gen_rtx_IF_THEN_ELSE (mode, cmp,
15564                                                      op_true,
15565                                                      op_false));
15566       emit_insn (pcmov);
15567     }
15568   else
15569     {
15570       op_true = force_reg (mode, op_true);
15571       op_false = force_reg (mode, op_false);
15572 
15573       t2 = gen_reg_rtx (mode);
15574       if (optimize)
15575         t3 = gen_reg_rtx (mode);
15576       else
15577         t3 = dest;
15578 
15579       x = gen_rtx_AND (mode, op_true, cmp);
15580       emit_insn (gen_rtx_SET (VOIDmode, t2, x));
15581 
15582       x = gen_rtx_NOT (mode, cmp);
15583       x = gen_rtx_AND (mode, x, op_false);
15584       emit_insn (gen_rtx_SET (VOIDmode, t3, x));
15585 
15586       x = gen_rtx_IOR (mode, t3, t2);
15587       emit_insn (gen_rtx_SET (VOIDmode, dest, x));
15588     }
15589 }
15590 
15591 /* Expand a floating-point conditional move.  Return true if successful.  */
15592 
15593 int
15594 ix86_expand_fp_movcc (rtx operands[])
15595 {
15596   enum machine_mode mode = GET_MODE (operands[0]);
15597   enum rtx_code code = GET_CODE (operands[1]);
15598   rtx tmp, compare_op, second_test, bypass_test;
15599 
15600   if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
15601     {
15602       enum machine_mode cmode;
15603 
15604       /* Since we've no cmove for sse registers, don't force bad register
15605          allocation just to gain access to it.  Deny movcc when the
15606          comparison mode doesn't match the move mode.  */
15607       cmode = GET_MODE (ix86_compare_op0);
15608       if (cmode == VOIDmode)
15609         cmode = GET_MODE (ix86_compare_op1);
15610       if (cmode != mode)
15611         return 0;
15612 
15613       code = ix86_prepare_sse_fp_compare_args (operands[0], code,
15614                                                &ix86_compare_op0,
15615                                                &ix86_compare_op1);
15616       if (code == UNKNOWN)
15617         return 0;
15618 
15619       if (ix86_expand_sse_fp_minmax (operands[0], code, ix86_compare_op0,
15620                                      ix86_compare_op1, operands[2],
15621                                      operands[3]))
15622         return 1;
15623 
15624       tmp = ix86_expand_sse_cmp (operands[0], code, ix86_compare_op0,
15625                                  ix86_compare_op1, operands[2], operands[3]);
15626       ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]);
15627       return 1;
15628     }
15629 
15630   /* The floating point conditional move instructions don't directly
15631      support conditions resulting from a signed integer comparison.  */
15632 
15633   compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
15634 
15635   /* The floating point conditional move instructions don't directly
15636      support signed integer comparisons.  */
15637 
15638   if (!fcmov_comparison_operator (compare_op, VOIDmode))
15639     {
15640       gcc_assert (!second_test && !bypass_test);
15641       tmp = gen_reg_rtx (QImode);
15642       ix86_expand_setcc (code, tmp);
15643       code = NE;
15644       ix86_compare_op0 = tmp;
15645       ix86_compare_op1 = const0_rtx;
15646       compare_op = ix86_expand_compare (code,  &second_test, &bypass_test);
15647     }
15648   if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
15649     {
15650       tmp = gen_reg_rtx (mode);
15651       emit_move_insn (tmp, operands[3]);
15652       operands[3] = tmp;
15653     }
15654   if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
15655     {
15656       tmp = gen_reg_rtx (mode);
15657       emit_move_insn (tmp, operands[2]);
15658       operands[2] = tmp;
15659     }
15660 
15661   emit_insn (gen_rtx_SET (VOIDmode, operands[0],
15662                           gen_rtx_IF_THEN_ELSE (mode, compare_op,
15663                                                 operands[2], operands[3])));
15664   if (bypass_test)
15665     emit_insn (gen_rtx_SET (VOIDmode, operands[0],
15666                             gen_rtx_IF_THEN_ELSE (mode, bypass_test,
15667                                                   operands[3], operands[0])));
15668   if (second_test)
15669     emit_insn (gen_rtx_SET (VOIDmode, operands[0],
15670                             gen_rtx_IF_THEN_ELSE (mode, second_test,
15671                                                   operands[2], operands[0])));
15672 
15673   return 1;
15674 }
15675 
15676 /* Expand a floating-point vector conditional move; a vcond operation
15677    rather than a movcc operation.  */
15678 
15679 bool
15680 ix86_expand_fp_vcond (rtx operands[])
15681 {
15682   enum rtx_code code = GET_CODE (operands[3]);
15683   rtx cmp;
15684 
15685   code = ix86_prepare_sse_fp_compare_args (operands[0], code,
15686                                            &operands[4], &operands[5]);
15687   if (code == UNKNOWN)
15688     return false;
15689 
15690   if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
15691                                  operands[5], operands[1], operands[2]))
15692     return true;
15693 
15694   cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
15695                              operands[1], operands[2]);
15696   ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
15697   return true;
15698 }
15699 
15700 /* Expand a signed/unsigned integral vector conditional move.  */
15701 
15702 bool
15703 ix86_expand_int_vcond (rtx operands[])
15704 {
15705   enum machine_mode mode = GET_MODE (operands[0]);
15706   enum rtx_code code = GET_CODE (operands[3]);
15707   bool negate = false;
15708   rtx x, cop0, cop1;
15709 
15710   cop0 = operands[4];
15711   cop1 = operands[5];
15712 
15713   /* SSE5 supports all of the comparisons on all vector int types.  */
15714   if (!TARGET_SSE5)
15715     {
15716       /* Canonicalize the comparison to EQ, GT, GTU.  */
15717       switch (code)
15718         {
15719         case EQ:
15720         case GT:
15721         case GTU:
15722           break;
15723 
15724         case NE:
15725         case LE:
15726         case LEU:
15727           code = reverse_condition (code);
15728           negate = true;
15729           break;
15730 
15731         case GE:
15732         case GEU:
15733           code = reverse_condition (code);
15734           negate = true;
15735           /* FALLTHRU */
15736 
15737         case LT:
15738         case LTU:
15739           code = swap_condition (code);
15740           x = cop0, cop0 = cop1, cop1 = x;
15741           break;
15742 
15743         default:
15744           gcc_unreachable ();
15745         }
15746 
15747       /* Only SSE4.1/SSE4.2 supports V2DImode.  */
15748       if (mode == V2DImode)
15749         {
15750           switch (code)
15751             {
15752             case EQ:
15753               /* SSE4.1 supports EQ.  */
15754               if (!TARGET_SSE4_1)
15755                 return false;
15756               break;
15757 
15758             case GT:
15759             case GTU:
15760               /* SSE4.2 supports GT/GTU.  */
15761               if (!TARGET_SSE4_2)
15762                 return false;
15763               break;
15764 
15765             default:
15766               gcc_unreachable ();
15767             }
15768         }
15769 
15770       /* Unsigned parallel compare is not supported by the hardware.
15771          Play some tricks to turn this into a signed comparison
15772          against 0.  */
15773       if (code == GTU)
15774         {
15775           cop0 = force_reg (mode, cop0);
15776 
15777           switch (mode)
15778             {
15779             case V4SImode:
15780             case V2DImode:
15781                 {
15782                   rtx t1, t2, mask;
15783                   rtx (*gen_sub3) (rtx, rtx, rtx);
15784 
15785                   /* Subtract (-(INT MAX) - 1) from both operands to make
15786                      them signed.  */
15787                   mask = ix86_build_signbit_mask (GET_MODE_INNER (mode),
15788                                                   true, false);
15789                   gen_sub3 = (mode == V4SImode
15790                               ? gen_subv4si3 : gen_subv2di3);
15791                   t1 = gen_reg_rtx (mode);
15792                   emit_insn (gen_sub3 (t1, cop0, mask));
15793 
15794                   t2 = gen_reg_rtx (mode);
15795                   emit_insn (gen_sub3 (t2, cop1, mask));
15796 
15797                   cop0 = t1;
15798                   cop1 = t2;
15799                   code = GT;
15800                 }
15801               break;
15802 
15803             case V16QImode:
15804             case V8HImode:
15805               /* Perform a parallel unsigned saturating subtraction.  */
15806               x = gen_reg_rtx (mode);
15807               emit_insn (gen_rtx_SET (VOIDmode, x,
15808                                       gen_rtx_US_MINUS (mode, cop0, cop1)));
15809 
15810               cop0 = x;
15811               cop1 = CONST0_RTX (mode);
15812               code = EQ;
15813               negate = !negate;
15814               break;
15815 
15816             default:
15817               gcc_unreachable ();
15818             }
15819         }
15820     }
15821 
15822   x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1,
15823                            operands[1+negate], operands[2-negate]);
15824 
15825   ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
15826                          operands[2-negate]);
15827   return true;
15828 }
15829 
15830 /* Unpack OP[1] into the next wider integer vector type.  UNSIGNED_P is
15831    true if we should do zero extension, else sign extension.  HIGH_P is
15832    true if we want the N/2 high elements, else the low elements.  */
15833 
15834 void
15835 ix86_expand_sse_unpack (rtx operands[2], bool unsigned_p, bool high_p)
15836 {
15837   enum machine_mode imode = GET_MODE (operands[1]);
15838   rtx (*unpack)(rtx, rtx, rtx);
15839   rtx se, dest;
15840 
15841   switch (imode)
15842     {
15843     case V16QImode:
15844       if (high_p)
15845         unpack = gen_vec_interleave_highv16qi;
15846       else
15847         unpack = gen_vec_interleave_lowv16qi;
15848       break;
15849     case V8HImode:
15850       if (high_p)
15851         unpack = gen_vec_interleave_highv8hi;
15852       else
15853         unpack = gen_vec_interleave_lowv8hi;
15854       break;
15855     case V4SImode:
15856       if (high_p)
15857         unpack = gen_vec_interleave_highv4si;
15858       else
15859         unpack = gen_vec_interleave_lowv4si;
15860       break;
15861     default:
15862       gcc_unreachable ();
15863     }
15864 
15865   dest = gen_lowpart (imode, operands[0]);
15866 
15867   if (unsigned_p)
15868     se = force_reg (imode, CONST0_RTX (imode));
15869   else
15870     se = ix86_expand_sse_cmp (gen_reg_rtx (imode), GT, CONST0_RTX (imode),
15871                               operands[1], pc_rtx, pc_rtx);
15872 
15873   emit_insn (unpack (dest, operands[1], se));
15874 }
15875 
15876 /* This function performs the same task as ix86_expand_sse_unpack,
15877    but with SSE4.1 instructions.  */
15878 
15879 void
15880 ix86_expand_sse4_unpack (rtx operands[2], bool unsigned_p, bool high_p)
15881 {
15882   enum machine_mode imode = GET_MODE (operands[1]);
15883   rtx (*unpack)(rtx, rtx);
15884   rtx src, dest;
15885 
15886   switch (imode)
15887     {
15888     case V16QImode:
15889       if (unsigned_p)
15890         unpack = gen_sse4_1_zero_extendv8qiv8hi2;
15891       else
15892         unpack = gen_sse4_1_extendv8qiv8hi2;
15893       break;
15894     case V8HImode:
15895       if (unsigned_p)
15896         unpack = gen_sse4_1_zero_extendv4hiv4si2;
15897       else
15898         unpack = gen_sse4_1_extendv4hiv4si2;
15899       break;
15900     case V4SImode:
15901       if (unsigned_p)
15902         unpack = gen_sse4_1_zero_extendv2siv2di2;
15903       else
15904         unpack = gen_sse4_1_extendv2siv2di2;
15905       break;
15906     default:
15907       gcc_unreachable ();
15908     }
15909 
15910   dest = operands[0];
15911   if (high_p)
15912     {
15913       /* Shift higher 8 bytes to lower 8 bytes.  */
15914       src = gen_reg_rtx (imode);
15915       emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, src),
15916                                    gen_lowpart (TImode, operands[1]),
15917                                    GEN_INT (64)));
15918     }
15919   else
15920     src = operands[1];
15921 
15922   emit_insn (unpack (dest, src));
15923 }
15924 
15925 /* This function performs the same task as ix86_expand_sse_unpack,
15926    but with sse5 instructions.  */
15927 
15928 void
15929 ix86_expand_sse5_unpack (rtx operands[2], bool unsigned_p, bool high_p)
15930 {
15931   enum machine_mode imode = GET_MODE (operands[1]);
15932   int pperm_bytes[16];
15933   int i;
15934   int h = (high_p) ? 8 : 0;
15935   int h2;
15936   int sign_extend;
15937   rtvec v = rtvec_alloc (16);
15938   rtvec vs;
15939   rtx x, p;
15940   rtx op0 = operands[0], op1 = operands[1];
15941 
15942   switch (imode)
15943     {
15944     case V16QImode:
15945       vs = rtvec_alloc (8);
15946       h2 = (high_p) ? 8 : 0;
15947       for (i = 0; i < 8; i++)
15948         {
15949           pperm_bytes[2*i+0] = PPERM_SRC | PPERM_SRC2 | i | h;
15950           pperm_bytes[2*i+1] = ((unsigned_p)
15951                                 ? PPERM_ZERO
15952                                 : PPERM_SIGN | PPERM_SRC2 | i | h);
15953         }
15954 
15955       for (i = 0; i < 16; i++)
15956         RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
15957 
15958       for (i = 0; i < 8; i++)
15959         RTVEC_ELT (vs, i) = GEN_INT (i + h2);
15960 
15961       p = gen_rtx_PARALLEL (VOIDmode, vs);
15962       x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
15963       if (unsigned_p)
15964         emit_insn (gen_sse5_pperm_zero_v16qi_v8hi (op0, op1, p, x));
15965       else
15966         emit_insn (gen_sse5_pperm_sign_v16qi_v8hi (op0, op1, p, x));
15967       break;
15968 
15969     case V8HImode:
15970       vs = rtvec_alloc (4);
15971       h2 = (high_p) ? 4 : 0;
15972       for (i = 0; i < 4; i++)
15973         {
15974           sign_extend = ((unsigned_p)
15975                          ? PPERM_ZERO
15976                          : PPERM_SIGN | PPERM_SRC2 | ((2*i) + 1 + h));
15977           pperm_bytes[4*i+0] = PPERM_SRC | PPERM_SRC2 | ((2*i) + 0 + h);
15978           pperm_bytes[4*i+1] = PPERM_SRC | PPERM_SRC2 | ((2*i) + 1 + h);
15979           pperm_bytes[4*i+2] = sign_extend;
15980           pperm_bytes[4*i+3] = sign_extend;
15981         }
15982 
15983       for (i = 0; i < 16; i++)
15984         RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
15985 
15986       for (i = 0; i < 4; i++)
15987         RTVEC_ELT (vs, i) = GEN_INT (i + h2);
15988 
15989       p = gen_rtx_PARALLEL (VOIDmode, vs);
15990       x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
15991       if (unsigned_p)
15992         emit_insn (gen_sse5_pperm_zero_v8hi_v4si (op0, op1, p, x));
15993       else
15994         emit_insn (gen_sse5_pperm_sign_v8hi_v4si (op0, op1, p, x));
15995       break;
15996 
15997     case V4SImode:
15998       vs = rtvec_alloc (2);
15999       h2 = (high_p) ? 2 : 0;
16000       for (i = 0; i < 2; i++)
16001         {
16002           sign_extend = ((unsigned_p)
16003                          ? PPERM_ZERO
16004                          : PPERM_SIGN | PPERM_SRC2 | ((4*i) + 3 + h));
16005           pperm_bytes[8*i+0] = PPERM_SRC | PPERM_SRC2 | ((4*i) + 0 + h);
16006           pperm_bytes[8*i+1] = PPERM_SRC | PPERM_SRC2 | ((4*i) + 1 + h);
16007           pperm_bytes[8*i+2] = PPERM_SRC | PPERM_SRC2 | ((4*i) + 2 + h);
16008           pperm_bytes[8*i+3] = PPERM_SRC | PPERM_SRC2 | ((4*i) + 3 + h);
16009           pperm_bytes[8*i+4] = sign_extend;
16010           pperm_bytes[8*i+5] = sign_extend;
16011           pperm_bytes[8*i+6] = sign_extend;
16012           pperm_bytes[8*i+7] = sign_extend;
16013         }
16014 
16015       for (i = 0; i < 16; i++)
16016         RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
16017 
16018       for (i = 0; i < 2; i++)
16019         RTVEC_ELT (vs, i) = GEN_INT (i + h2);
16020 
16021       p = gen_rtx_PARALLEL (VOIDmode, vs);
16022       x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
16023       if (unsigned_p)
16024         emit_insn (gen_sse5_pperm_zero_v4si_v2di (op0, op1, p, x));
16025       else
16026         emit_insn (gen_sse5_pperm_sign_v4si_v2di (op0, op1, p, x));
16027       break;
16028 
16029     default:
16030       gcc_unreachable ();
16031     }
16032 
16033   return;
16034 }
16035 
16036 /* Pack the high bits from OPERANDS[1] and low bits from OPERANDS[2] into the
16037    next narrower integer vector type */
16038 void
16039 ix86_expand_sse5_pack (rtx operands[3])
16040 {
16041   enum machine_mode imode = GET_MODE (operands[0]);
16042   int pperm_bytes[16];
16043   int i;
16044   rtvec v = rtvec_alloc (16);
16045   rtx x;
16046   rtx op0 = operands[0];
16047   rtx op1 = operands[1];
16048   rtx op2 = operands[2];
16049 
16050   switch (imode)
16051     {
16052     case V16QImode:
16053       for (i = 0; i < 8; i++)
16054         {
16055           pperm_bytes[i+0] = PPERM_SRC | PPERM_SRC1 | (i*2);
16056           pperm_bytes[i+8] = PPERM_SRC | PPERM_SRC2 | (i*2);
16057         }
16058 
16059       for (i = 0; i < 16; i++)
16060         RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
16061 
16062       x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
16063       emit_insn (gen_sse5_pperm_pack_v8hi_v16qi (op0, op1, op2, x));
16064       break;
16065 
16066     case V8HImode:
16067       for (i = 0; i < 4; i++)
16068         {
16069           pperm_bytes[(2*i)+0] = PPERM_SRC | PPERM_SRC1 | ((i*4) + 0);
16070           pperm_bytes[(2*i)+1] = PPERM_SRC | PPERM_SRC1 | ((i*4) + 1);
16071           pperm_bytes[(2*i)+8] = PPERM_SRC | PPERM_SRC2 | ((i*4) + 0);
16072           pperm_bytes[(2*i)+9] = PPERM_SRC | PPERM_SRC2 | ((i*4) + 1);
16073         }
16074 
16075       for (i = 0; i < 16; i++)
16076         RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
16077 
16078       x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
16079       emit_insn (gen_sse5_pperm_pack_v4si_v8hi (op0, op1, op2, x));
16080       break;
16081 
16082     case V4SImode:
16083       for (i = 0; i < 2; i++)
16084         {
16085           pperm_bytes[(4*i)+0]  = PPERM_SRC | PPERM_SRC1 | ((i*8) + 0);
16086           pperm_bytes[(4*i)+1]  = PPERM_SRC | PPERM_SRC1 | ((i*8) + 1);
16087           pperm_bytes[(4*i)+2]  = PPERM_SRC | PPERM_SRC1 | ((i*8) + 2);
16088           pperm_bytes[(4*i)+3]  = PPERM_SRC | PPERM_SRC1 | ((i*8) + 3);
16089           pperm_bytes[(4*i)+8]  = PPERM_SRC | PPERM_SRC2 | ((i*8) + 0);
16090           pperm_bytes[(4*i)+9]  = PPERM_SRC | PPERM_SRC2 | ((i*8) + 1);
16091           pperm_bytes[(4*i)+10] = PPERM_SRC | PPERM_SRC2 | ((i*8) + 2);
16092           pperm_bytes[(4*i)+11] = PPERM_SRC | PPERM_SRC2 | ((i*8) + 3);
16093         }
16094 
16095       for (i = 0; i < 16; i++)
16096         RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
16097 
16098       x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
16099       emit_insn (gen_sse5_pperm_pack_v2di_v4si (op0, op1, op2, x));
16100       break;
16101 
16102     default:
16103       gcc_unreachable ();
16104     }
16105 
16106   return;
16107 }
16108 
16109 /* Expand conditional increment or decrement using adb/sbb instructions.
16110    The default case using setcc followed by the conditional move can be
16111    done by generic code.  */
16112 int
16113 ix86_expand_int_addcc (rtx operands[])
16114 {
16115   enum rtx_code code = GET_CODE (operands[1]);
16116   rtx compare_op;
16117   rtx val = const0_rtx;
16118   bool fpcmp = false;
16119   enum machine_mode mode = GET_MODE (operands[0]);
16120 
16121   if (operands[3] != const1_rtx
16122       && operands[3] != constm1_rtx)
16123     return 0;
16124   if (!ix86_expand_carry_flag_compare (code, ix86_compare_op0,
16125                                        ix86_compare_op1, &compare_op))
16126      return 0;
16127   code = GET_CODE (compare_op);
16128 
16129   if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
16130       || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
16131     {
16132       fpcmp = true;
16133       code = ix86_fp_compare_code_to_integer (code);
16134     }
16135 
16136   if (code != LTU)
16137     {
16138       val = constm1_rtx;
16139       if (fpcmp)
16140         PUT_CODE (compare_op,
16141                   reverse_condition_maybe_unordered
16142                     (GET_CODE (compare_op)));
16143       else
16144         PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
16145     }
16146   PUT_MODE (compare_op, mode);
16147 
16148   /* Construct either adc or sbb insn.  */
16149   if ((code == LTU) == (operands[3] == constm1_rtx))
16150     {
16151       switch (GET_MODE (operands[0]))
16152         {
16153           case QImode:
16154             emit_insn (gen_subqi3_carry (operands[0], operands[2], val, compare_op));
16155             break;
16156           case HImode:
16157             emit_insn (gen_subhi3_carry (operands[0], operands[2], val, compare_op));
16158             break;
16159           case SImode:
16160             emit_insn (gen_subsi3_carry (operands[0], operands[2], val, compare_op));
16161             break;
16162           case DImode:
16163             emit_insn (gen_subdi3_carry_rex64 (operands[0], operands[2], val, compare_op));
16164             break;
16165           default:
16166             gcc_unreachable ();
16167         }
16168     }
16169   else
16170     {
16171       switch (GET_MODE (operands[0]))
16172         {
16173           case QImode:
16174             emit_insn (gen_addqi3_carry (operands[0], operands[2], val, compare_op));
16175             break;
16176           case HImode:
16177             emit_insn (gen_addhi3_carry (operands[0], operands[2], val, compare_op));
16178             break;
16179           case SImode:
16180             emit_insn (gen_addsi3_carry (operands[0], operands[2], val, compare_op));
16181             break;
16182           case DImode:
16183             emit_insn (gen_adddi3_carry_rex64 (operands[0], operands[2], val, compare_op));
16184             break;
16185           default:
16186             gcc_unreachable ();
16187         }
16188     }
16189   return 1; /* DONE */
16190 }
16191 
16192 
16193 /* Split operands 0 and 1 into SImode parts.  Similar to split_di, but
16194    works for floating pointer parameters and nonoffsetable memories.
16195    For pushes, it returns just stack offsets; the values will be saved
16196    in the right order.  Maximally three parts are generated.  */
16197 
16198 static int
16199 ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
16200 {
16201   int size;
16202 
16203   if (!TARGET_64BIT)
16204     size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
16205   else
16206     size = (GET_MODE_SIZE (mode) + 4) / 8;
16207 
16208   gcc_assert (!REG_P (operand) || !MMX_REGNO_P (REGNO (operand)));
16209   gcc_assert (size >= 2 && size <= 4);
16210 
16211   /* Optimize constant pool reference to immediates.  This is used by fp
16212      moves, that force all constants to memory to allow combining.  */
16213   if (MEM_P (operand) && MEM_READONLY_P (operand))
16214     {
16215       rtx tmp = maybe_get_pool_constant (operand);
16216       if (tmp)
16217         operand = tmp;
16218     }
16219 
16220   if (MEM_P (operand) && !offsettable_memref_p (operand))
16221     {
16222       /* The only non-offsetable memories we handle are pushes.  */
16223       int ok = push_operand (operand, VOIDmode);
16224 
16225       gcc_assert (ok);
16226 
16227       operand = copy_rtx (operand);
16228       PUT_MODE (operand, Pmode);
16229       parts[0] = parts[1] = parts[2] = parts[3] = operand;
16230       return size;
16231     }
16232 
16233   if (GET_CODE (operand) == CONST_VECTOR)
16234     {
16235       enum machine_mode imode = int_mode_for_mode (mode);
16236       /* Caution: if we looked through a constant pool memory above,
16237          the operand may actually have a different mode now.  That's
16238          ok, since we want to pun this all the way back to an integer.  */
16239       operand = simplify_subreg (imode, operand, GET_MODE (operand), 0);
16240       gcc_assert (operand != NULL);
16241       mode = imode;
16242     }
16243 
16244   if (!TARGET_64BIT)
16245     {
16246       if (mode == DImode)
16247         split_di (&operand, 1, &parts[0], &parts[1]);
16248       else
16249         {
16250           int i;
16251 
16252           if (REG_P (operand))
16253             {
16254               gcc_assert (reload_completed);
16255               for (i = 0; i < size; i++)
16256                 parts[i] = gen_rtx_REG (SImode, REGNO (operand) + i);
16257             }
16258           else if (offsettable_memref_p (operand))
16259             {
16260               operand = adjust_address (operand, SImode, 0);
16261               parts[0] = operand;
16262               for (i = 1; i < size; i++)
16263                 parts[i] = adjust_address (operand, SImode, 4 * i);
16264             }
16265           else if (GET_CODE (operand) == CONST_DOUBLE)
16266             {
16267               REAL_VALUE_TYPE r;
16268               long l[4];
16269 
16270               REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
16271               switch (mode)
16272                 {
16273                 case TFmode:
16274                   real_to_target (l, &r, mode);
16275                   parts[3] = gen_int_mode (l[3], SImode);
16276                   parts[2] = gen_int_mode (l[2], SImode);
16277                   break;
16278                 case XFmode:
16279                   REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
16280                   parts[2] = gen_int_mode (l[2], SImode);
16281                   break;
16282                 case DFmode:
16283                   REAL_VALUE_TO_TARGET_DOUBLE (r, l);
16284                   break;
16285                 default:
16286                   gcc_unreachable ();
16287                 }
16288               parts[1] = gen_int_mode (l[1], SImode);
16289               parts[0] = gen_int_mode (l[0], SImode);
16290             }
16291           else
16292             gcc_unreachable ();
16293         }
16294     }
16295   else
16296     {
16297       if (mode == TImode)
16298         split_ti (&operand, 1, &parts[0], &parts[1]);
16299       if (mode == XFmode || mode == TFmode)
16300         {
16301           enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
16302           if (REG_P (operand))
16303             {
16304               gcc_assert (reload_completed);
16305               parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
16306               parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
16307             }
16308           else if (offsettable_memref_p (operand))
16309             {
16310               operand = adjust_address (operand, DImode, 0);
16311               parts[0] = operand;
16312               parts[1] = adjust_address (operand, upper_mode, 8);
16313             }
16314           else if (GET_CODE (operand) == CONST_DOUBLE)
16315             {
16316               REAL_VALUE_TYPE r;
16317               long l[4];
16318 
16319               REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
16320               real_to_target (l, &r, mode);
16321 
16322               /* Do not use shift by 32 to avoid warning on 32bit systems.  */
16323               if (HOST_BITS_PER_WIDE_INT >= 64)
16324                 parts[0]
16325                   = gen_int_mode
16326                       ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
16327                        + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
16328                        DImode);
16329               else
16330                 parts[0] = immed_double_const (l[0], l[1], DImode);
16331 
16332               if (upper_mode == SImode)
16333                 parts[1] = gen_int_mode (l[2], SImode);
16334               else if (HOST_BITS_PER_WIDE_INT >= 64)
16335                 parts[1]
16336                   = gen_int_mode
16337                       ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
16338                        + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
16339                        DImode);
16340               else
16341                 parts[1] = immed_double_const (l[2], l[3], DImode);
16342             }
16343           else
16344             gcc_unreachable ();
16345         }
16346     }
16347 
16348   return size;
16349 }
16350 
16351 /* Emit insns to perform a move or push of DI, DF, XF, and TF values.
16352    Return false when normal moves are needed; true when all required
16353    insns have been emitted.  Operands 2-4 contain the input values
16354    int the correct order; operands 5-7 contain the output values.  */
16355 
16356 void
16357 ix86_split_long_move (rtx operands[])
16358 {
16359   rtx part[2][4];
16360   int nparts, i, j;
16361   int push = 0;
16362   int collisions = 0;
16363   enum machine_mode mode = GET_MODE (operands[0]);
16364   bool collisionparts[4];
16365 
16366   /* The DFmode expanders may ask us to move double.
16367      For 64bit target this is single move.  By hiding the fact
16368      here we simplify i386.md splitters.  */
16369   if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
16370     {
16371       /* Optimize constant pool reference to immediates.  This is used by
16372          fp moves, that force all constants to memory to allow combining.  */
16373 
16374       if (MEM_P (operands[1])
16375           && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
16376           && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
16377         operands[1] = get_pool_constant (XEXP (operands[1], 0));
16378       if (push_operand (operands[0], VOIDmode))
16379         {
16380           operands[0] = copy_rtx (operands[0]);
16381           PUT_MODE (operands[0], Pmode);
16382         }
16383       else
16384         operands[0] = gen_lowpart (DImode, operands[0]);
16385       operands[1] = gen_lowpart (DImode, operands[1]);
16386       emit_move_insn (operands[0], operands[1]);
16387       return;
16388     }
16389 
16390   /* The only non-offsettable memory we handle is push.  */
16391   if (push_operand (operands[0], VOIDmode))
16392     push = 1;
16393   else
16394     gcc_assert (!MEM_P (operands[0])
16395                 || offsettable_memref_p (operands[0]));
16396 
16397   nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
16398   ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
16399 
16400   /* When emitting push, take care for source operands on the stack.  */
16401   if (push && MEM_P (operands[1])
16402       && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
16403     {
16404       rtx src_base = XEXP (part[1][nparts - 1], 0);
16405 
16406       /* Compensate for the stack decrement by 4.  */
16407       if (!TARGET_64BIT && nparts == 3
16408           && mode == XFmode && TARGET_128BIT_LONG_DOUBLE)
16409         src_base = plus_constant (src_base, 4);
16410 
16411       /* src_base refers to the stack pointer and is
16412          automatically decreased by emitted push.  */
16413       for (i = 0; i < nparts; i++)
16414         part[1][i] = change_address (part[1][i],
16415                                      GET_MODE (part[1][i]), src_base);
16416     }
16417 
16418   /* We need to do copy in the right order in case an address register
16419      of the source overlaps the destination.  */
16420   if (REG_P (part[0][0]) && MEM_P (part[1][0]))
16421     {
16422       rtx tmp;
16423 
16424       for (i = 0; i < nparts; i++)
16425         {
16426           collisionparts[i]
16427             = reg_overlap_mentioned_p (part[0][i], XEXP (part[1][0], 0));
16428           if (collisionparts[i])
16429             collisions++;
16430         }
16431 
16432       /* Collision in the middle part can be handled by reordering.  */
16433       if (collisions == 1 && nparts == 3 && collisionparts [1])
16434         {
16435           tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
16436           tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
16437         }
16438       else if (collisions == 1
16439                && nparts == 4
16440                && (collisionparts [1] || collisionparts [2]))
16441         {
16442           if (collisionparts [1])
16443             {
16444               tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
16445               tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
16446             }
16447           else
16448             {
16449               tmp = part[0][2]; part[0][2] = part[0][3]; part[0][3] = tmp;
16450               tmp = part[1][2]; part[1][2] = part[1][3]; part[1][3] = tmp;
16451             }
16452         }
16453 
16454       /* If there are more collisions, we can't handle it by reordering.
16455          Do an lea to the last part and use only one colliding move.  */
16456       else if (collisions > 1)
16457         {
16458           rtx base;
16459 
16460           collisions = 1;
16461 
16462           base = part[0][nparts - 1];
16463 
16464           /* Handle the case when the last part isn't valid for lea.
16465              Happens in 64-bit mode storing the 12-byte XFmode.  */
16466           if (GET_MODE (base) != Pmode)
16467             base = gen_rtx_REG (Pmode, REGNO (base));
16468 
16469           emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
16470           part[1][0] = replace_equiv_address (part[1][0], base);
16471           for (i = 1; i < nparts; i++)
16472             {
16473               tmp = plus_constant (base, UNITS_PER_WORD * i);
16474               part[1][i] = replace_equiv_address (part[1][i], tmp);
16475             }
16476         }
16477     }
16478 
16479   if (push)
16480     {
16481       if (!TARGET_64BIT)
16482         {
16483           if (nparts == 3)
16484             {
16485               if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
16486                 emit_insn (gen_addsi3 (stack_pointer_rtx,
16487                                        stack_pointer_rtx, GEN_INT (-4)));
16488               emit_move_insn (part[0][2], part[1][2]);
16489             }
16490           else if (nparts == 4)
16491             {
16492               emit_move_insn (part[0][3], part[1][3]);
16493               emit_move_insn (part[0][2], part[1][2]);
16494             }
16495         }
16496       else
16497         {
16498           /* In 64bit mode we don't have 32bit push available.  In case this is
16499              register, it is OK - we will just use larger counterpart.  We also
16500              retype memory - these comes from attempt to avoid REX prefix on
16501              moving of second half of TFmode value.  */
16502           if (GET_MODE (part[1][1]) == SImode)
16503             {
16504               switch (GET_CODE (part[1][1]))
16505                 {
16506                 case MEM:
16507                   part[1][1] = adjust_address (part[1][1], DImode, 0);
16508                   break;
16509 
16510                 case REG:
16511                   part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
16512                   break;
16513 
16514                 default:
16515                   gcc_unreachable ();
16516                 }
16517 
16518               if (GET_MODE (part[1][0]) == SImode)
16519                 part[1][0] = part[1][1];
16520             }
16521         }
16522       emit_move_insn (part[0][1], part[1][1]);
16523       emit_move_insn (part[0][0], part[1][0]);
16524       return;
16525     }
16526 
16527   /* Choose correct order to not overwrite the source before it is copied.  */
16528   if ((REG_P (part[0][0])
16529        && REG_P (part[1][1])
16530        && (REGNO (part[0][0]) == REGNO (part[1][1])
16531            || (nparts == 3
16532                && REGNO (part[0][0]) == REGNO (part[1][2]))
16533            || (nparts == 4
16534                && REGNO (part[0][0]) == REGNO (part[1][3]))))
16535       || (collisions > 0
16536           && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
16537     {
16538       for (i = 0, j = nparts - 1; i < nparts; i++, j--)
16539         {
16540           operands[2 + i] = part[0][j];
16541           operands[6 + i] = part[1][j];
16542         }
16543     }
16544   else
16545     {
16546       for (i = 0; i < nparts; i++)
16547         {
16548           operands[2 + i] = part[0][i];
16549           operands[6 + i] = part[1][i];
16550         }
16551     }
16552 
16553   /* If optimizing for size, attempt to locally unCSE nonzero constants.  */
16554   if (optimize_insn_for_size_p ())
16555     {
16556       for (j = 0; j < nparts - 1; j++)
16557         if (CONST_INT_P (operands[6 + j])
16558             && operands[6 + j] != const0_rtx
16559             && REG_P (operands[2 + j]))
16560           for (i = j; i < nparts - 1; i++)
16561             if (CONST_INT_P (operands[7 + i])
16562                 && INTVAL (operands[7 + i]) == INTVAL (operands[6 + j]))
16563               operands[7 + i] = operands[2 + j];
16564     }
16565 
16566   for (i = 0; i < nparts; i++)
16567     emit_move_insn (operands[2 + i], operands[6 + i]);
16568 
16569   return;
16570 }
16571 
16572 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
16573    left shift by a constant, either using a single shift or
16574    a sequence of add instructions.  */
16575 
16576 static void
16577 ix86_expand_ashl_const (rtx operand, int count, enum machine_mode mode)
16578 {
16579   if (count == 1)
16580     {
16581       emit_insn ((mode == DImode
16582                   ? gen_addsi3
16583                   : gen_adddi3) (operand, operand, operand));
16584     }
16585   else if (!optimize_insn_for_size_p ()
16586            && count * ix86_cost->add <= ix86_cost->shift_const)
16587     {
16588       int i;
16589       for (i=0; i<count; i++)
16590         {
16591           emit_insn ((mode == DImode
16592                       ? gen_addsi3
16593                       : gen_adddi3) (operand, operand, operand));
16594         }
16595     }
16596   else
16597     emit_insn ((mode == DImode
16598                 ? gen_ashlsi3
16599                 : gen_ashldi3) (operand, operand, GEN_INT (count)));
16600 }
16601 
16602 void
16603 ix86_split_ashl (rtx *operands, rtx scratch, enum machine_mode mode)
16604 {
16605   rtx low[2], high[2];
16606   int count;
16607   const int single_width = mode == DImode ? 32 : 64;
16608 
16609   if (CONST_INT_P (operands[2]))
16610     {
16611       (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
16612       count = INTVAL (operands[2]) & (single_width * 2 - 1);
16613 
16614       if (count >= single_width)
16615         {
16616           emit_move_insn (high[0], low[1]);
16617           emit_move_insn (low[0], const0_rtx);
16618 
16619           if (count > single_width)
16620             ix86_expand_ashl_const (high[0], count - single_width, mode);
16621         }
16622       else
16623         {
16624           if (!rtx_equal_p (operands[0], operands[1]))
16625             emit_move_insn (operands[0], operands[1]);
16626           emit_insn ((mode == DImode
16627                      ? gen_x86_shld
16628                      : gen_x86_64_shld) (high[0], low[0], GEN_INT (count)));
16629           ix86_expand_ashl_const (low[0], count, mode);
16630         }
16631       return;
16632     }
16633 
16634   (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
16635 
16636   if (operands[1] == const1_rtx)
16637     {
16638       /* Assuming we've chosen a QImode capable registers, then 1 << N
16639          can be done with two 32/64-bit shifts, no branches, no cmoves.  */
16640       if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
16641         {
16642           rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
16643 
16644           ix86_expand_clear (low[0]);
16645           ix86_expand_clear (high[0]);
16646           emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (single_width)));
16647 
16648           d = gen_lowpart (QImode, low[0]);
16649           d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
16650           s = gen_rtx_EQ (QImode, flags, const0_rtx);
16651           emit_insn (gen_rtx_SET (VOIDmode, d, s));
16652 
16653           d = gen_lowpart (QImode, high[0]);
16654           d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
16655           s = gen_rtx_NE (QImode, flags, const0_rtx);
16656           emit_insn (gen_rtx_SET (VOIDmode, d, s));
16657         }
16658 
16659       /* Otherwise, we can get the same results by manually performing
16660          a bit extract operation on bit 5/6, and then performing the two
16661          shifts.  The two methods of getting 0/1 into low/high are exactly
16662          the same size.  Avoiding the shift in the bit extract case helps
16663          pentium4 a bit; no one else seems to care much either way.  */
16664       else
16665         {
16666           rtx x;
16667 
16668           if (TARGET_PARTIAL_REG_STALL && !optimize_insn_for_size_p ())
16669             x = gen_rtx_ZERO_EXTEND (mode == DImode ? SImode : DImode, operands[2]);
16670           else
16671             x = gen_lowpart (mode == DImode ? SImode : DImode, operands[2]);
16672           emit_insn (gen_rtx_SET (VOIDmode, high[0], x));
16673 
16674           emit_insn ((mode == DImode
16675                       ? gen_lshrsi3
16676                       : gen_lshrdi3) (high[0], high[0], GEN_INT (mode == DImode ? 5 : 6)));
16677           emit_insn ((mode == DImode
16678                       ? gen_andsi3
16679                       : gen_anddi3) (high[0], high[0], GEN_INT (1)));
16680           emit_move_insn (low[0], high[0]);
16681           emit_insn ((mode == DImode
16682                       ? gen_xorsi3
16683                       : gen_xordi3) (low[0], low[0], GEN_INT (1)));
16684         }
16685 
16686       emit_insn ((mode == DImode
16687                     ? gen_ashlsi3
16688                     : gen_ashldi3) (low[0], low[0], operands[2]));
16689       emit_insn ((mode == DImode
16690                     ? gen_ashlsi3
16691                     : gen_ashldi3) (high[0], high[0], operands[2]));
16692       return;
16693     }
16694 
16695   if (operands[1] == constm1_rtx)
16696     {
16697       /* For -1 << N, we can avoid the shld instruction, because we
16698          know that we're shifting 0...31/63 ones into a -1.  */
16699       emit_move_insn (low[0], constm1_rtx);
16700       if (optimize_insn_for_size_p ())
16701         emit_move_insn (high[0], low[0]);
16702       else
16703         emit_move_insn (high[0], constm1_rtx);
16704     }
16705   else
16706     {
16707       if (!rtx_equal_p (operands[0], operands[1]))
16708         emit_move_insn (operands[0], operands[1]);
16709 
16710       (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
16711       emit_insn ((mode == DImode
16712                   ? gen_x86_shld
16713                   : gen_x86_64_shld) (high[0], low[0], operands[2]));
16714     }
16715 
16716   emit_insn ((mode == DImode ? gen_ashlsi3 : gen_ashldi3) (low[0], low[0], operands[2]));
16717 
16718   if (TARGET_CMOVE && scratch)
16719     {
16720       ix86_expand_clear (scratch);
16721       emit_insn ((mode == DImode
16722                   ? gen_x86_shift_adj_1
16723                   : gen_x86_64_shift_adj_1) (high[0], low[0], operands[2],
16724                                              scratch));
16725     }
16726   else
16727     emit_insn ((mode == DImode
16728                 ? gen_x86_shift_adj_2
16729                 : gen_x86_64_shift_adj_2) (high[0], low[0], operands[2]));
16730 }
16731 
16732 void
16733 ix86_split_ashr (rtx *operands, rtx scratch, enum machine_mode mode)
16734 {
16735   rtx low[2], high[2];
16736   int count;
16737   const int single_width = mode == DImode ? 32 : 64;
16738 
16739   if (CONST_INT_P (operands[2]))
16740     {
16741       (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
16742       count = INTVAL (operands[2]) & (single_width * 2 - 1);
16743 
16744       if (count == single_width * 2 - 1)
16745         {
16746           emit_move_insn (high[0], high[1]);
16747           emit_insn ((mode == DImode
16748                       ? gen_ashrsi3
16749                       : gen_ashrdi3) (high[0], high[0],
16750                                       GEN_INT (single_width - 1)));
16751           emit_move_insn (low[0], high[0]);
16752 
16753         }
16754       else if (count >= single_width)
16755         {
16756           emit_move_insn (low[0], high[1]);
16757           emit_move_insn (high[0], low[0]);
16758           emit_insn ((mode == DImode
16759                       ? gen_ashrsi3
16760                       : gen_ashrdi3) (high[0], high[0],
16761                                       GEN_INT (single_width - 1)));
16762           if (count > single_width)
16763             emit_insn ((mode == DImode
16764                         ? gen_ashrsi3
16765                         : gen_ashrdi3) (low[0], low[0],
16766                                         GEN_INT (count - single_width)));
16767         }
16768       else
16769         {
16770           if (!rtx_equal_p (operands[0], operands[1]))
16771             emit_move_insn (operands[0], operands[1]);
16772           emit_insn ((mode == DImode
16773                       ? gen_x86_shrd
16774                       : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
16775           emit_insn ((mode == DImode
16776                       ? gen_ashrsi3
16777                       : gen_ashrdi3) (high[0], high[0], GEN_INT (count)));
16778         }
16779     }
16780   else
16781     {
16782       if (!rtx_equal_p (operands[0], operands[1]))
16783         emit_move_insn (operands[0], operands[1]);
16784 
16785       (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
16786 
16787       emit_insn ((mode == DImode
16788                   ? gen_x86_shrd
16789                   : gen_x86_64_shrd) (low[0], high[0], operands[2]));
16790       emit_insn ((mode == DImode
16791                   ? gen_ashrsi3
16792                   : gen_ashrdi3)  (high[0], high[0], operands[2]));
16793 
16794       if (TARGET_CMOVE && scratch)
16795         {
16796           emit_move_insn (scratch, high[0]);
16797           emit_insn ((mode == DImode
16798                       ? gen_ashrsi3
16799                       : gen_ashrdi3) (scratch, scratch,
16800                                       GEN_INT (single_width - 1)));
16801           emit_insn ((mode == DImode
16802                       ? gen_x86_shift_adj_1
16803                       : gen_x86_64_shift_adj_1) (low[0], high[0], operands[2],
16804                                                  scratch));
16805         }
16806       else
16807         emit_insn ((mode == DImode
16808                     ? gen_x86_shift_adj_3
16809                     : gen_x86_64_shift_adj_3) (low[0], high[0], operands[2]));
16810     }
16811 }
16812 
16813 void
16814 ix86_split_lshr (rtx *operands, rtx scratch, enum machine_mode mode)
16815 {
16816   rtx low[2], high[2];
16817   int count;
16818   const int single_width = mode == DImode ? 32 : 64;
16819 
16820   if (CONST_INT_P (operands[2]))
16821     {
16822       (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
16823       count = INTVAL (operands[2]) & (single_width * 2 - 1);
16824 
16825       if (count >= single_width)
16826         {
16827           emit_move_insn (low[0], high[1]);
16828           ix86_expand_clear (high[0]);
16829 
16830           if (count > single_width)
16831             emit_insn ((mode == DImode
16832                         ? gen_lshrsi3
16833                         : gen_lshrdi3) (low[0], low[0],
16834                                         GEN_INT (count - single_width)));
16835         }
16836       else
16837         {
16838           if (!rtx_equal_p (operands[0], operands[1]))
16839             emit_move_insn (operands[0], operands[1]);
16840           emit_insn ((mode == DImode
16841                       ? gen_x86_shrd
16842                       : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
16843           emit_insn ((mode == DImode
16844                       ? gen_lshrsi3
16845                       : gen_lshrdi3) (high[0], high[0], GEN_INT (count)));
16846         }
16847     }
16848   else
16849     {
16850       if (!rtx_equal_p (operands[0], operands[1]))
16851         emit_move_insn (operands[0], operands[1]);
16852 
16853       (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
16854 
16855       emit_insn ((mode == DImode
16856                   ? gen_x86_shrd
16857                   : gen_x86_64_shrd) (low[0], high[0], operands[2]));
16858       emit_insn ((mode == DImode
16859                   ? gen_lshrsi3
16860                   : gen_lshrdi3) (high[0], high[0], operands[2]));
16861 
16862       /* Heh.  By reversing the arguments, we can reuse this pattern.  */
16863       if (TARGET_CMOVE && scratch)
16864         {
16865           ix86_expand_clear (scratch);
16866           emit_insn ((mode == DImode
16867                       ? gen_x86_shift_adj_1
16868                       : gen_x86_64_shift_adj_1) (low[0], high[0], operands[2],
16869                                                  scratch));
16870         }
16871       else
16872         emit_insn ((mode == DImode
16873                     ? gen_x86_shift_adj_2
16874                     : gen_x86_64_shift_adj_2) (low[0], high[0], operands[2]));
16875     }
16876 }
16877 
16878 /* Predict just emitted jump instruction to be taken with probability PROB.  */
16879 static void
16880 predict_jump (int prob)
16881 {
16882   rtx insn = get_last_insn ();
16883   gcc_assert (JUMP_P (insn));
16884   REG_NOTES (insn)
16885     = gen_rtx_EXPR_LIST (REG_BR_PROB,
16886                          GEN_INT (prob),
16887                          REG_NOTES (insn));
16888 }
16889 
16890 /* Helper function for the string operations below.  Dest VARIABLE whether
16891    it is aligned to VALUE bytes.  If true, jump to the label.  */
16892 static rtx
16893 ix86_expand_aligntest (rtx variable, int value, bool epilogue)
16894 {
16895   rtx label = gen_label_rtx ();
16896   rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
16897   if (GET_MODE (variable) == DImode)
16898     emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
16899   else
16900     emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
16901   emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
16902                            1, label);
16903   if (epilogue)
16904     predict_jump (REG_BR_PROB_BASE * 50 / 100);
16905   else
16906     predict_jump (REG_BR_PROB_BASE * 90 / 100);
16907   return label;
16908 }
16909 
16910 /* Adjust COUNTER by the VALUE.  */
16911 static void
16912 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
16913 {
16914   if (GET_MODE (countreg) == DImode)
16915     emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
16916   else
16917     emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
16918 }
16919 
16920 /* Zero extend possibly SImode EXP to Pmode register.  */
16921 rtx
16922 ix86_zero_extend_to_Pmode (rtx exp)
16923 {
16924   rtx r;
16925   if (GET_MODE (exp) == VOIDmode)
16926     return force_reg (Pmode, exp);
16927   if (GET_MODE (exp) == Pmode)
16928     return copy_to_mode_reg (Pmode, exp);
16929   r = gen_reg_rtx (Pmode);
16930   emit_insn (gen_zero_extendsidi2 (r, exp));
16931   return r;
16932 }
16933 
16934 /* Divide COUNTREG by SCALE.  */
16935 static rtx
16936 scale_counter (rtx countreg, int scale)
16937 {
16938   rtx sc;
16939   rtx piece_size_mask;
16940 
16941   if (scale == 1)
16942     return countreg;
16943   if (CONST_INT_P (countreg))
16944     return GEN_INT (INTVAL (countreg) / scale);
16945   gcc_assert (REG_P (countreg));
16946 
16947   piece_size_mask = GEN_INT (scale - 1);
16948   sc = expand_simple_binop (GET_MODE (countreg), LSHIFTRT, countreg,
16949                             GEN_INT (exact_log2 (scale)),
16950                             NULL, 1, OPTAB_DIRECT);
16951   return sc;
16952 }
16953 
16954 /* Return mode for the memcpy/memset loop counter.  Prefer SImode over
16955    DImode for constant loop counts.  */
16956 
16957 static enum machine_mode
16958 counter_mode (rtx count_exp)
16959 {
16960   if (GET_MODE (count_exp) != VOIDmode)
16961     return GET_MODE (count_exp);
16962   if (GET_CODE (count_exp) != CONST_INT)
16963     return Pmode;
16964   if (TARGET_64BIT && (INTVAL (count_exp) & ~0xffffffff))
16965     return DImode;
16966   return SImode;
16967 }
16968 
16969 /* When SRCPTR is non-NULL, output simple loop to move memory
16970    pointer to SRCPTR to DESTPTR via chunks of MODE unrolled UNROLL times,
16971    overall size is COUNT specified in bytes.  When SRCPTR is NULL, output the
16972    equivalent loop to set memory by VALUE (supposed to be in MODE).
16973 
16974    The size is rounded down to whole number of chunk size moved at once.
16975    SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info.  */
16976 
16977 
16978 static void
16979 expand_set_or_movmem_via_loop (rtx destmem, rtx srcmem,
16980                                rtx destptr, rtx srcptr, rtx value,
16981                                rtx count, enum machine_mode mode, int unroll,
16982                                int expected_size)
16983 {
16984   rtx out_label, top_label, iter, tmp;
16985   enum machine_mode iter_mode = counter_mode (count);
16986   rtx piece_size = GEN_INT (GET_MODE_SIZE (mode) * unroll);
16987   rtx piece_size_mask = GEN_INT (~((GET_MODE_SIZE (mode) * unroll) - 1));
16988   rtx size;
16989   rtx x_addr;
16990   rtx y_addr;
16991   int i;
16992 
16993   top_label = gen_label_rtx ();
16994   out_label = gen_label_rtx ();
16995   iter = gen_reg_rtx (iter_mode);
16996 
16997   size = expand_simple_binop (iter_mode, AND, count, piece_size_mask,
16998                               NULL, 1, OPTAB_DIRECT);
16999   /* Those two should combine.  */
17000   if (piece_size == const1_rtx)
17001     {
17002       emit_cmp_and_jump_insns (size, const0_rtx, EQ, NULL_RTX, iter_mode,
17003                                true, out_label);
17004       predict_jump (REG_BR_PROB_BASE * 10 / 100);
17005     }
17006   emit_move_insn (iter, const0_rtx);
17007 
17008   emit_label (top_label);
17009 
17010   tmp = convert_modes (Pmode, iter_mode, iter, true);
17011   x_addr = gen_rtx_PLUS (Pmode, destptr, tmp);
17012   destmem = change_address (destmem, mode, x_addr);
17013 
17014   if (srcmem)
17015     {
17016       y_addr = gen_rtx_PLUS (Pmode, srcptr, copy_rtx (tmp));
17017       srcmem = change_address (srcmem, mode, y_addr);
17018 
17019       /* When unrolling for chips that reorder memory reads and writes,
17020          we can save registers by using single temporary.
17021          Also using 4 temporaries is overkill in 32bit mode.  */
17022       if (!TARGET_64BIT && 0)
17023         {
17024           for (i = 0; i < unroll; i++)
17025             {
17026               if (i)
17027                 {
17028                   destmem =
17029                     adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
17030                   srcmem =
17031                     adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
17032                 }
17033               emit_move_insn (destmem, srcmem);
17034             }
17035         }
17036       else
17037         {
17038           rtx tmpreg[4];
17039           gcc_assert (unroll <= 4);
17040           for (i = 0; i < unroll; i++)
17041             {
17042               tmpreg[i] = gen_reg_rtx (mode);
17043               if (i)
17044                 {
17045                   srcmem =
17046                     adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
17047                 }
17048               emit_move_insn (tmpreg[i], srcmem);
17049             }
17050           for (i = 0; i < unroll; i++)
17051             {
17052               if (i)
17053                 {
17054                   destmem =
17055                     adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
17056                 }
17057               emit_move_insn (destmem, tmpreg[i]);
17058             }
17059         }
17060     }
17061   else
17062     for (i = 0; i < unroll; i++)
17063       {
17064         if (i)
17065           destmem =
17066             adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
17067         emit_move_insn (destmem, value);
17068       }
17069 
17070   tmp = expand_simple_binop (iter_mode, PLUS, iter, piece_size, iter,
17071                              true, OPTAB_LIB_WIDEN);
17072   if (tmp != iter)
17073     emit_move_insn (iter, tmp);
17074 
17075   emit_cmp_and_jump_insns (iter, size, LT, NULL_RTX, iter_mode,
17076                            true, top_label);
17077   if (expected_size != -1)
17078     {
17079       expected_size /= GET_MODE_SIZE (mode) * unroll;
17080       if (expected_size == 0)
17081         predict_jump (0);
17082       else if (expected_size > REG_BR_PROB_BASE)
17083         predict_jump (REG_BR_PROB_BASE - 1);
17084       else
17085         predict_jump (REG_BR_PROB_BASE - (REG_BR_PROB_BASE + expected_size / 2) / expected_size);
17086     }
17087   else
17088     predict_jump (REG_BR_PROB_BASE * 80 / 100);
17089   iter = ix86_zero_extend_to_Pmode (iter);
17090   tmp = expand_simple_binop (Pmode, PLUS, destptr, iter, destptr,
17091                              true, OPTAB_LIB_WIDEN);
17092   if (tmp != destptr)
17093     emit_move_insn (destptr, tmp);
17094   if (srcptr)
17095     {
17096       tmp = expand_simple_binop (Pmode, PLUS, srcptr, iter, srcptr,
17097                                  true, OPTAB_LIB_WIDEN);
17098       if (tmp != srcptr)
17099         emit_move_insn (srcptr, tmp);
17100     }
17101   emit_label (out_label);
17102 }
17103 
17104 /* Output "rep; mov" instruction.
17105    Arguments have same meaning as for previous function */
17106 static void
17107 expand_movmem_via_rep_mov (rtx destmem, rtx srcmem,
17108                            rtx destptr, rtx srcptr,
17109                            rtx count,
17110                            enum machine_mode mode)
17111 {
17112   rtx destexp;
17113   rtx srcexp;
17114   rtx countreg;
17115 
17116   /* If the size is known, it is shorter to use rep movs.  */
17117   if (mode == QImode && CONST_INT_P (count)
17118       && !(INTVAL (count) & 3))
17119     mode = SImode;
17120 
17121   if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
17122     destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
17123   if (srcptr != XEXP (srcmem, 0) || GET_MODE (srcmem) != BLKmode)
17124     srcmem = adjust_automodify_address_nv (srcmem, BLKmode, srcptr, 0);
17125   countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
17126   if (mode != QImode)
17127     {
17128       destexp = gen_rtx_ASHIFT (Pmode, countreg,
17129                                 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
17130       destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
17131       srcexp = gen_rtx_ASHIFT (Pmode, countreg,
17132                                GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
17133       srcexp = gen_rtx_PLUS (Pmode, srcexp, srcptr);
17134     }
17135   else
17136     {
17137       destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
17138       srcexp = gen_rtx_PLUS (Pmode, srcptr, countreg);
17139     }
17140   if (CONST_INT_P (count))
17141     {
17142       count = GEN_INT (INTVAL (count)
17143                        & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
17144       destmem = shallow_copy_rtx (destmem);
17145       srcmem = shallow_copy_rtx (srcmem);
17146       set_mem_size (destmem, count);
17147       set_mem_size (srcmem, count);
17148     }
17149   else
17150     {
17151       if (MEM_SIZE (destmem))
17152         set_mem_size (destmem, NULL_RTX);
17153       if (MEM_SIZE (srcmem))
17154         set_mem_size (srcmem, NULL_RTX);
17155     }
17156   emit_insn (gen_rep_mov (destptr, destmem, srcptr, srcmem, countreg,
17157                           destexp, srcexp));
17158 }
17159 
17160 /* Output "rep; stos" instruction.
17161    Arguments have same meaning as for previous function */
17162 static void
17163 expand_setmem_via_rep_stos (rtx destmem, rtx destptr, rtx value,
17164                             rtx count, enum machine_mode mode,
17165                             rtx orig_value)
17166 {
17167   rtx destexp;
17168   rtx countreg;
17169 
17170   if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
17171     destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
17172   value = force_reg (mode, gen_lowpart (mode, value));
17173   countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
17174   if (mode != QImode)
17175     {
17176       destexp = gen_rtx_ASHIFT (Pmode, countreg,
17177                                 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
17178       destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
17179     }
17180   else
17181     destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
17182   if (orig_value == const0_rtx && CONST_INT_P (count))
17183     {
17184       count = GEN_INT (INTVAL (count)
17185                        & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
17186       destmem = shallow_copy_rtx (destmem);
17187       set_mem_size (destmem, count);
17188     }
17189   else if (MEM_SIZE (destmem))
17190     set_mem_size (destmem, NULL_RTX);
17191   emit_insn (gen_rep_stos (destptr, countreg, destmem, value, destexp));
17192 }
17193 
17194 static void
17195 emit_strmov (rtx destmem, rtx srcmem,
17196              rtx destptr, rtx srcptr, enum machine_mode mode, int offset)
17197 {
17198   rtx src = adjust_automodify_address_nv (srcmem, mode, srcptr, offset);
17199   rtx dest = adjust_automodify_address_nv (destmem, mode, destptr, offset);
17200   emit_insn (gen_strmov (destptr, dest, srcptr, src));
17201 }
17202 
17203 /* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST.  */
17204 static void
17205 expand_movmem_epilogue (rtx destmem, rtx srcmem,
17206                         rtx destptr, rtx srcptr, rtx count, int max_size)
17207 {
17208   rtx src, dest;
17209   if (CONST_INT_P (count))
17210     {
17211       HOST_WIDE_INT countval = INTVAL (count);
17212       int offset = 0;
17213 
17214       if ((countval & 0x10) && max_size > 16)
17215         {
17216           if (TARGET_64BIT)
17217             {
17218               emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
17219               emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset + 8);
17220             }
17221           else
17222             gcc_unreachable ();
17223           offset += 16;
17224         }
17225       if ((countval & 0x08) && max_size > 8)
17226         {
17227           if (TARGET_64BIT)
17228             emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
17229           else
17230             {
17231               emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
17232               emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset + 4);
17233             }
17234           offset += 8;
17235         }
17236       if ((countval & 0x04) && max_size > 4)
17237         {
17238           emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
17239           offset += 4;
17240         }
17241       if ((countval & 0x02) && max_size > 2)
17242         {
17243           emit_strmov (destmem, srcmem, destptr, srcptr, HImode, offset);
17244           offset += 2;
17245         }
17246       if ((countval & 0x01) && max_size > 1)
17247         {
17248           emit_strmov (destmem, srcmem, destptr, srcptr, QImode, offset);
17249           offset += 1;
17250         }
17251       return;
17252     }
17253   if (max_size > 8)
17254     {
17255       count = expand_simple_binop (GET_MODE (count), AND, count, GEN_INT (max_size - 1),
17256                                     count, 1, OPTAB_DIRECT);
17257       expand_set_or_movmem_via_loop (destmem, srcmem, destptr, srcptr, NULL,
17258                                      count, QImode, 1, 4);
17259       return;
17260     }
17261 
17262   /* When there are stringops, we can cheaply increase dest and src pointers.
17263      Otherwise we save code size by maintaining offset (zero is readily
17264      available from preceding rep operation) and using x86 addressing modes.
17265    */
17266   if (TARGET_SINGLE_STRINGOP)
17267     {
17268       if (max_size > 4)
17269         {
17270           rtx label = ix86_expand_aligntest (count, 4, true);
17271           src = change_address (srcmem, SImode, srcptr);
17272           dest = change_address (destmem, SImode, destptr);
17273           emit_insn (gen_strmov (destptr, dest, srcptr, src));
17274           emit_label (label);
17275           LABEL_NUSES (label) = 1;
17276         }
17277       if (max_size > 2)
17278         {
17279           rtx label = ix86_expand_aligntest (count, 2, true);
17280           src = change_address (srcmem, HImode, srcptr);
17281           dest = change_address (destmem, HImode, destptr);
17282           emit_insn (gen_strmov (destptr, dest, srcptr, src));
17283           emit_label (label);
17284           LABEL_NUSES (label) = 1;
17285         }
17286       if (max_size > 1)
17287         {
17288           rtx label = ix86_expand_aligntest (count, 1, true);
17289           src = change_address (srcmem, QImode, srcptr);
17290           dest = change_address (destmem, QImode, destptr);
17291           emit_insn (gen_strmov (destptr, dest, srcptr, src));
17292           emit_label (label);
17293           LABEL_NUSES (label) = 1;
17294         }
17295     }
17296   else
17297     {
17298       rtx offset = force_reg (Pmode, const0_rtx);
17299       rtx tmp;
17300 
17301       if (max_size > 4)
17302         {
17303           rtx label = ix86_expand_aligntest (count, 4, true);
17304           src = change_address (srcmem, SImode, srcptr);
17305           dest = change_address (destmem, SImode, destptr);
17306           emit_move_insn (dest, src);
17307           tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (4), NULL,
17308                                      true, OPTAB_LIB_WIDEN);
17309           if (tmp != offset)
17310             emit_move_insn (offset, tmp);
17311           emit_label (label);
17312           LABEL_NUSES (label) = 1;
17313         }
17314       if (max_size > 2)
17315         {
17316           rtx label = ix86_expand_aligntest (count, 2, true);
17317           tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
17318           src = change_address (srcmem, HImode, tmp);
17319           tmp = gen_rtx_PLUS (Pmode, destptr, offset);
17320           dest = change_address (destmem, HImode, tmp);
17321           emit_move_insn (dest, src);
17322           tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (2), tmp,
17323                                      true, OPTAB_LIB_WIDEN);
17324           if (tmp != offset)
17325             emit_move_insn (offset, tmp);
17326           emit_label (label);
17327           LABEL_NUSES (label) = 1;
17328         }
17329       if (max_size > 1)
17330         {
17331           rtx label = ix86_expand_aligntest (count, 1, true);
17332           tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
17333           src = change_address (srcmem, QImode, tmp);
17334           tmp = gen_rtx_PLUS (Pmode, destptr, offset);
17335           dest = change_address (destmem, QImode, tmp);
17336           emit_move_insn (dest, src);
17337           emit_label (label);
17338           LABEL_NUSES (label) = 1;
17339         }
17340     }
17341 }
17342 
17343 /* Output code to set at most count & (max_size - 1) bytes starting by DEST.  */
17344 static void
17345 expand_setmem_epilogue_via_loop (rtx destmem, rtx destptr, rtx value,
17346                                  rtx count, int max_size)
17347 {
17348   count =
17349     expand_simple_binop (counter_mode (count), AND, count,
17350                          GEN_INT (max_size - 1), count, 1, OPTAB_DIRECT);
17351   expand_set_or_movmem_via_loop (destmem, NULL, destptr, NULL,
17352                                  gen_lowpart (QImode, value), count, QImode,
17353                                  1, max_size / 2);
17354 }
17355 
17356 /* Output code to set at most count & (max_size - 1) bytes starting by DEST.  */
17357 static void
17358 expand_setmem_epilogue (rtx destmem, rtx destptr, rtx value, rtx count, int max_size)
17359 {
17360   rtx dest;
17361 
17362   if (CONST_INT_P (count))
17363     {
17364       HOST_WIDE_INT countval = INTVAL (count);
17365       int offset = 0;
17366 
17367       if ((countval & 0x10) && max_size > 16)
17368         {
17369           if (TARGET_64BIT)
17370             {
17371               dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
17372               emit_insn (gen_strset (destptr, dest, value));
17373               dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset + 8);
17374               emit_insn (gen_strset (destptr, dest, value));
17375             }
17376           else
17377             gcc_unreachable ();
17378           offset += 16;
17379         }
17380       if ((countval & 0x08) && max_size > 8)
17381         {
17382           if (TARGET_64BIT)
17383             {
17384               dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
17385               emit_insn (gen_strset (destptr, dest, value));
17386             }
17387           else
17388             {
17389               dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
17390               emit_insn (gen_strset (destptr, dest, value));
17391               dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset + 4);
17392               emit_insn (gen_strset (destptr, dest, value));
17393             }
17394           offset += 8;
17395         }
17396       if ((countval & 0x04) && max_size > 4)
17397         {
17398           dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
17399           emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
17400           offset += 4;
17401         }
17402       if ((countval & 0x02) && max_size > 2)
17403         {
17404           dest = adjust_automodify_address_nv (destmem, HImode, destptr, offset);
17405           emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
17406           offset += 2;
17407         }
17408       if ((countval & 0x01) && max_size > 1)
17409         {
17410           dest = adjust_automodify_address_nv (destmem, QImode, destptr, offset);
17411           emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
17412           offset += 1;
17413         }
17414       return;
17415     }
17416   if (max_size > 32)
17417     {
17418       expand_setmem_epilogue_via_loop (destmem, destptr, value, count, max_size);
17419       return;
17420     }
17421   if (max_size > 16)
17422     {
17423       rtx label = ix86_expand_aligntest (count, 16, true);
17424       if (TARGET_64BIT)
17425         {
17426           dest = change_address (destmem, DImode, destptr);
17427           emit_insn (gen_strset (destptr, dest, value));
17428           emit_insn (gen_strset (destptr, dest, value));
17429         }
17430       else
17431         {
17432           dest = change_address (destmem, SImode, destptr);
17433           emit_insn (gen_strset (destptr, dest, value));
17434           emit_insn (gen_strset (destptr, dest, value));
17435           emit_insn (gen_strset (destptr, dest, value));
17436           emit_insn (gen_strset (destptr, dest, value));
17437         }
17438       emit_label (label);
17439       LABEL_NUSES (label) = 1;
17440     }
17441   if (max_size > 8)
17442     {
17443       rtx label = ix86_expand_aligntest (count, 8, true);
17444       if (TARGET_64BIT)
17445         {
17446           dest = change_address (destmem, DImode, destptr);
17447           emit_insn (gen_strset (destptr, dest, value));
17448         }
17449       else
17450         {
17451           dest = change_address (destmem, SImode, destptr);
17452           emit_insn (gen_strset (destptr, dest, value));
17453           emit_insn (gen_strset (destptr, dest, value));
17454         }
17455       emit_label (label);
17456       LABEL_NUSES (label) = 1;
17457     }
17458   if (max_size > 4)
17459     {
17460       rtx label = ix86_expand_aligntest (count, 4, true);
17461       dest = change_address (destmem, SImode, destptr);
17462       emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
17463       emit_label (label);
17464       LABEL_NUSES (label) = 1;
17465     }
17466   if (max_size > 2)
17467     {
17468       rtx label = ix86_expand_aligntest (count, 2, true);
17469       dest = change_address (destmem, HImode, destptr);
17470       emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
17471       emit_label (label);
17472       LABEL_NUSES (label) = 1;
17473     }
17474   if (max_size > 1)
17475     {
17476       rtx label = ix86_expand_aligntest (count, 1, true);
17477       dest = change_address (destmem, QImode, destptr);
17478       emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
17479       emit_label (label);
17480       LABEL_NUSES (label) = 1;
17481     }
17482 }
17483 
17484 /* Copy enough from DEST to SRC to align DEST known to by aligned by ALIGN to
17485    DESIRED_ALIGNMENT.  */
17486 static void
17487 expand_movmem_prologue (rtx destmem, rtx srcmem,
17488                         rtx destptr, rtx srcptr, rtx count,
17489                         int align, int desired_alignment)
17490 {
17491   if (align <= 1 && desired_alignment > 1)
17492     {
17493       rtx label = ix86_expand_aligntest (destptr, 1, false);
17494       srcmem = change_address (srcmem, QImode, srcptr);
17495       destmem = change_address (destmem, QImode, destptr);
17496       emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
17497       ix86_adjust_counter (count, 1);
17498       emit_label (label);
17499       LABEL_NUSES (label) = 1;
17500     }
17501   if (align <= 2 && desired_alignment > 2)
17502     {
17503       rtx label = ix86_expand_aligntest (destptr, 2, false);
17504       srcmem = change_address (srcmem, HImode, srcptr);
17505       destmem = change_address (destmem, HImode, destptr);
17506       emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
17507       ix86_adjust_counter (count, 2);
17508       emit_label (label);
17509       LABEL_NUSES (label) = 1;
17510     }
17511   if (align <= 4 && desired_alignment > 4)
17512     {
17513       rtx label = ix86_expand_aligntest (destptr, 4, false);
17514       srcmem = change_address (srcmem, SImode, srcptr);
17515       destmem = change_address (destmem, SImode, destptr);
17516       emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
17517       ix86_adjust_counter (count, 4);
17518       emit_label (label);
17519       LABEL_NUSES (label) = 1;
17520     }
17521   gcc_assert (desired_alignment <= 8);
17522 }
17523 
17524 /* Copy enough from DST to SRC to align DST known to DESIRED_ALIGN.
17525    ALIGN_BYTES is how many bytes need to be copied.  */
17526 static rtx
17527 expand_constant_movmem_prologue (rtx dst, rtx *srcp, rtx destreg, rtx srcreg,
17528                                  int desired_align, int align_bytes)
17529 {
17530   rtx src = *srcp;
17531   rtx src_size, dst_size;
17532   int off = 0;
17533   int src_align_bytes = get_mem_align_offset (src, desired_align * BITS_PER_UNIT);
17534   if (src_align_bytes >= 0)
17535     src_align_bytes = desired_align - src_align_bytes;
17536   src_size = MEM_SIZE (src);
17537   dst_size = MEM_SIZE (dst);
17538   if (align_bytes & 1)
17539     {
17540       dst = adjust_automodify_address_nv (dst, QImode, destreg, 0);
17541       src = adjust_automodify_address_nv (src, QImode, srcreg, 0);
17542       off = 1;
17543       emit_insn (gen_strmov (destreg, dst, srcreg, src));
17544     }
17545   if (align_bytes & 2)
17546     {
17547       dst = adjust_automodify_address_nv (dst, HImode, destreg, off);
17548       src = adjust_automodify_address_nv (src, HImode, srcreg, off);
17549       if (MEM_ALIGN (dst) < 2 * BITS_PER_UNIT)
17550         set_mem_align (dst, 2 * BITS_PER_UNIT);
17551       if (src_align_bytes >= 0
17552           && (src_align_bytes & 1) == (align_bytes & 1)
17553           && MEM_ALIGN (src) < 2 * BITS_PER_UNIT)
17554         set_mem_align (src, 2 * BITS_PER_UNIT);
17555       off = 2;
17556       emit_insn (gen_strmov (destreg, dst, srcreg, src));
17557     }
17558   if (align_bytes & 4)
17559     {
17560       dst = adjust_automodify_address_nv (dst, SImode, destreg, off);
17561       src = adjust_automodify_address_nv (src, SImode, srcreg, off);
17562       if (MEM_ALIGN (dst) < 4 * BITS_PER_UNIT)
17563         set_mem_align (dst, 4 * BITS_PER_UNIT);
17564       if (src_align_bytes >= 0)
17565         {
17566           unsigned int src_align = 0;
17567           if ((src_align_bytes & 3) == (align_bytes & 3))
17568             src_align = 4;
17569           else if ((src_align_bytes & 1) == (align_bytes & 1))
17570             src_align = 2;
17571           if (MEM_ALIGN (src) < src_align * BITS_PER_UNIT)
17572             set_mem_align (src, src_align * BITS_PER_UNIT);
17573         }
17574       off = 4;
17575       emit_insn (gen_strmov (destreg, dst, srcreg, src));
17576     }
17577   dst = adjust_automodify_address_nv (dst, BLKmode, destreg, off);
17578   src = adjust_automodify_address_nv (src, BLKmode, srcreg, off);
17579   if (MEM_ALIGN (dst) < (unsigned int) desired_align * BITS_PER_UNIT)
17580     set_mem_align (dst, desired_align * BITS_PER_UNIT);
17581   if (src_align_bytes >= 0)
17582     {
17583       unsigned int src_align = 0;
17584       if ((src_align_bytes & 7) == (align_bytes & 7))
17585         src_align = 8;
17586       else if ((src_align_bytes & 3) == (align_bytes & 3))
17587         src_align = 4;
17588       else if ((src_align_bytes & 1) == (align_bytes & 1))
17589         src_align = 2;
17590       if (src_align > (unsigned int) desired_align)
17591         src_align = desired_align;
17592       if (MEM_ALIGN (src) < src_align * BITS_PER_UNIT)
17593         set_mem_align (src, src_align * BITS_PER_UNIT);
17594     }
17595   if (dst_size)
17596     set_mem_size (dst, GEN_INT (INTVAL (dst_size) - align_bytes));
17597   if (src_size)
17598     set_mem_size (dst, GEN_INT (INTVAL (src_size) - align_bytes));
17599   *srcp = src;
17600   return dst;
17601 }
17602 
17603 /* Set enough from DEST to align DEST known to by aligned by ALIGN to
17604    DESIRED_ALIGNMENT.  */
17605 static void
17606 expand_setmem_prologue (rtx destmem, rtx destptr, rtx value, rtx count,
17607                         int align, int desired_alignment)
17608 {
17609   if (align <= 1 && desired_alignment > 1)
17610     {
17611       rtx label = ix86_expand_aligntest (destptr, 1, false);
17612       destmem = change_address (destmem, QImode, destptr);
17613       emit_insn (gen_strset (destptr, destmem, gen_lowpart (QImode, value)));
17614       ix86_adjust_counter (count, 1);
17615       emit_label (label);
17616       LABEL_NUSES (label) = 1;
17617     }
17618   if (align <= 2 && desired_alignment > 2)
17619     {
17620       rtx label = ix86_expand_aligntest (destptr, 2, false);
17621       destmem = change_address (destmem, HImode, destptr);
17622       emit_insn (gen_strset (destptr, destmem, gen_lowpart (HImode, value)));
17623       ix86_adjust_counter (count, 2);
17624       emit_label (label);
17625       LABEL_NUSES (label) = 1;
17626     }
17627   if (align <= 4 && desired_alignment > 4)
17628     {
17629       rtx label = ix86_expand_aligntest (destptr, 4, false);
17630       destmem = change_address (destmem, SImode, destptr);
17631       emit_insn (gen_strset (destptr, destmem, gen_lowpart (SImode, value)));
17632       ix86_adjust_counter (count, 4);
17633       emit_label (label);
17634       LABEL_NUSES (label) = 1;
17635     }
17636   gcc_assert (desired_alignment <= 8);
17637 }
17638 
17639 /* Set enough from DST to align DST known to by aligned by ALIGN to
17640    DESIRED_ALIGN.  ALIGN_BYTES is how many bytes need to be stored.  */
17641 static rtx
17642 expand_constant_setmem_prologue (rtx dst, rtx destreg, rtx value,
17643                                  int desired_align, int align_bytes)
17644 {
17645   int off = 0;
17646   rtx dst_size = MEM_SIZE (dst);
17647   if (align_bytes & 1)
17648     {
17649       dst = adjust_automodify_address_nv (dst, QImode, destreg, 0);
17650       off = 1;
17651       emit_insn (gen_strset (destreg, dst,
17652                              gen_lowpart (QImode, value)));
17653     }
17654   if (align_bytes & 2)
17655     {
17656       dst = adjust_automodify_address_nv (dst, HImode, destreg, off);
17657       if (MEM_ALIGN (dst) < 2 * BITS_PER_UNIT)
17658         set_mem_align (dst, 2 * BITS_PER_UNIT);
17659       off = 2;
17660       emit_insn (gen_strset (destreg, dst,
17661                              gen_lowpart (HImode, value)));
17662     }
17663   if (align_bytes & 4)
17664     {
17665       dst = adjust_automodify_address_nv (dst, SImode, destreg, off);
17666       if (MEM_ALIGN (dst) < 4 * BITS_PER_UNIT)
17667         set_mem_align (dst, 4 * BITS_PER_UNIT);
17668       off = 4;
17669       emit_insn (gen_strset (destreg, dst,
17670                              gen_lowpart (SImode, value)));
17671     }
17672   dst = adjust_automodify_address_nv (dst, BLKmode, destreg, off);
17673   if (MEM_ALIGN (dst) < (unsigned int) desired_align * BITS_PER_UNIT)
17674     set_mem_align (dst, desired_align * BITS_PER_UNIT);
17675   if (dst_size)
17676     set_mem_size (dst, GEN_INT (INTVAL (dst_size) - align_bytes));
17677   return dst;
17678 }
17679 
17680 /* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation.  */
17681 static enum stringop_alg
17682 decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size, bool memset,
17683             int *dynamic_check)
17684 {
17685   const struct stringop_algs * algs;
17686   bool optimize_for_speed;
17687   /* Algorithms using the rep prefix want at least edi and ecx;
17688      additionally, memset wants eax and memcpy wants esi.  Don't
17689      consider such algorithms if the user has appropriated those
17690      registers for their own purposes.  */
17691   bool rep_prefix_usable = !(fixed_regs[CX_REG] || fixed_regs[DI_REG]
17692                              || (memset
17693                                  ? fixed_regs[AX_REG] : fixed_regs[SI_REG]));
17694 
17695 #define ALG_USABLE_P(alg) (rep_prefix_usable                    \
17696                            || (alg != rep_prefix_1_byte         \
17697                                && alg != rep_prefix_4_byte      \
17698                                && alg != rep_prefix_8_byte))
17699   const struct processor_costs *cost;
17700   
17701   /* Even if the string operation call is cold, we still might spend a lot
17702      of time processing large blocks.  */
17703   if (optimize_function_for_size_p (cfun)
17704       || (optimize_insn_for_size_p ()
17705           && expected_size != -1 && expected_size < 256))
17706     optimize_for_speed = false;
17707   else
17708     optimize_for_speed = true;
17709 
17710   cost = optimize_for_speed ? ix86_cost : &ix86_size_cost;
17711 
17712   *dynamic_check = -1;
17713   if (memset)
17714     algs = &cost->memset[TARGET_64BIT != 0];
17715   else
17716     algs = &cost->memcpy[TARGET_64BIT != 0];
17717   if (stringop_alg != no_stringop && ALG_USABLE_P (stringop_alg))
17718     return stringop_alg;
17719   /* rep; movq or rep; movl is the smallest variant.  */
17720   else if (!optimize_for_speed)
17721     {
17722       if (!count || (count & 3))
17723         return rep_prefix_usable ? rep_prefix_1_byte : loop_1_byte;
17724       else
17725         return rep_prefix_usable ? rep_prefix_4_byte : loop;
17726     }
17727   /* Very tiny blocks are best handled via the loop, REP is expensive to setup.
17728    */
17729   else if (expected_size != -1 && expected_size < 4)
17730     return loop_1_byte;
17731   else if (expected_size != -1)
17732     {
17733       unsigned int i;
17734       enum stringop_alg alg = libcall;
17735       for (i = 0; i < NAX_STRINGOP_ALGS; i++)
17736         {
17737           /* We get here if the algorithms that were not libcall-based
17738              were rep-prefix based and we are unable to use rep prefixes
17739              based on global register usage.  Break out of the loop and
17740              use the heuristic below.  */
17741           if (algs->size[i].max == 0)
17742             break;
17743           if (algs->size[i].max >= expected_size || algs->size[i].max == -1)
17744             {
17745               enum stringop_alg candidate = algs->size[i].alg;
17746 
17747               if (candidate != libcall && ALG_USABLE_P (candidate))
17748                 alg = candidate;
17749               /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
17750                  last non-libcall inline algorithm.  */
17751               if (TARGET_INLINE_ALL_STRINGOPS)
17752                 {
17753                   /* When the current size is best to be copied by a libcall,
17754                      but we are still forced to inline, run the heuristic below
17755                      that will pick code for medium sized blocks.  */
17756                   if (alg != libcall)
17757                     return alg;
17758                   break;
17759                 }
17760               else if (ALG_USABLE_P (candidate))
17761                 return candidate;
17762             }
17763         }
17764       gcc_assert (TARGET_INLINE_ALL_STRINGOPS || !rep_prefix_usable);
17765     }
17766   /* When asked to inline the call anyway, try to pick meaningful choice.
17767      We look for maximal size of block that is faster to copy by hand and
17768      take blocks of at most of that size guessing that average size will
17769      be roughly half of the block.
17770 
17771      If this turns out to be bad, we might simply specify the preferred
17772      choice in ix86_costs.  */
17773   if ((TARGET_INLINE_ALL_STRINGOPS || TARGET_INLINE_STRINGOPS_DYNAMICALLY)
17774       && (algs->unknown_size == libcall || !ALG_USABLE_P (algs->unknown_size)))
17775     {
17776       int max = -1;
17777       enum stringop_alg alg;
17778       int i;
17779       bool any_alg_usable_p = true;
17780 
17781       for (i = 0; i < NAX_STRINGOP_ALGS; i++)
17782         {
17783           enum stringop_alg candidate = algs->size[i].alg;
17784           any_alg_usable_p = any_alg_usable_p && ALG_USABLE_P (candidate);
17785 
17786           if (candidate != libcall && candidate
17787               && ALG_USABLE_P (candidate))
17788               max = algs->size[i].max;
17789         }
17790       /* If there aren't any usable algorithms, then recursing on
17791          smaller sizes isn't going to find anything.  Just return the
17792          simple byte-at-a-time copy loop.  */
17793       if (!any_alg_usable_p)
17794         {
17795           /* Pick something reasonable.  */
17796           if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
17797             *dynamic_check = 128;
17798           return loop_1_byte;
17799         }
17800       if (max == -1)
17801         max = 4096;
17802       alg = decide_alg (count, max / 2, memset, dynamic_check);
17803       gcc_assert (*dynamic_check == -1);
17804       gcc_assert (alg != libcall);
17805       if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
17806         *dynamic_check = max;
17807       return alg;
17808     }
17809   return ALG_USABLE_P (algs->unknown_size) ? algs->unknown_size : libcall;
17810 #undef ALG_USABLE_P
17811 }
17812 
17813 /* Decide on alignment.  We know that the operand is already aligned to ALIGN
17814    (ALIGN can be based on profile feedback and thus it is not 100% guaranteed).  */
17815 static int
17816 decide_alignment (int align,
17817                   enum stringop_alg alg,
17818                   int expected_size)
17819 {
17820   int desired_align = 0;
17821   switch (alg)
17822     {
17823       case no_stringop:
17824         gcc_unreachable ();
17825       case loop:
17826       case unrolled_loop:
17827         desired_align = GET_MODE_SIZE (Pmode);
17828         break;
17829       case rep_prefix_8_byte:
17830         desired_align = 8;
17831         break;
17832       case rep_prefix_4_byte:
17833         /* PentiumPro has special logic triggering for 8 byte aligned blocks.
17834            copying whole cacheline at once.  */
17835         if (TARGET_PENTIUMPRO)
17836           desired_align = 8;
17837         else
17838           desired_align = 4;
17839         break;
17840       case rep_prefix_1_byte:
17841         /* PentiumPro has special logic triggering for 8 byte aligned blocks.
17842            copying whole cacheline at once.  */
17843         if (TARGET_PENTIUMPRO)
17844           desired_align = 8;
17845         else
17846           desired_align = 1;
17847         break;
17848       case loop_1_byte:
17849         desired_align = 1;
17850         break;
17851       case libcall:
17852         return 0;
17853     }
17854 
17855   if (optimize_size)
17856     desired_align = 1;
17857   if (desired_align < align)
17858     desired_align = align;
17859   if (expected_size != -1 && expected_size < 4)
17860     desired_align = align;
17861   return desired_align;
17862 }
17863 
17864 /* Return the smallest power of 2 greater than VAL.  */
17865 static int
17866 smallest_pow2_greater_than (int val)
17867 {
17868   int ret = 1;
17869   while (ret <= val)
17870     ret <<= 1;
17871   return ret;
17872 }
17873 
17874 /* Expand string move (memcpy) operation.  Use i386 string operations when
17875    profitable.  expand_setmem contains similar code.  The code depends upon
17876    architecture, block size and alignment, but always has the same
17877    overall structure:
17878 
17879    1) Prologue guard: Conditional that jumps up to epilogues for small
17880       blocks that can be handled by epilogue alone.  This is faster but
17881       also needed for correctness, since prologue assume the block is larger
17882       than the desired alignment.
17883 
17884       Optional dynamic check for size and libcall for large
17885       blocks is emitted here too, with -minline-stringops-dynamically.
17886 
17887    2) Prologue: copy first few bytes in order to get destination aligned
17888       to DESIRED_ALIGN.  It is emitted only when ALIGN is less than
17889       DESIRED_ALIGN and and up to DESIRED_ALIGN - ALIGN bytes can be copied.
17890       We emit either a jump tree on power of two sized blocks, or a byte loop.
17891 
17892    3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
17893       with specified algorithm.
17894 
17895    4) Epilogue: code copying tail of the block that is too small to be
17896       handled by main body (or up to size guarded by prologue guard).  */
17897 
17898 int
17899 ix86_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp,
17900                     rtx expected_align_exp, rtx expected_size_exp)
17901 {
17902   rtx destreg;
17903   rtx srcreg;
17904   rtx label = NULL;
17905   rtx tmp;
17906   rtx jump_around_label = NULL;
17907   HOST_WIDE_INT align = 1;
17908   unsigned HOST_WIDE_INT count = 0;
17909   HOST_WIDE_INT expected_size = -1;
17910   int size_needed = 0, epilogue_size_needed;
17911   int desired_align = 0, align_bytes = 0;
17912   enum stringop_alg alg;
17913   int dynamic_check;
17914   bool need_zero_guard = false;
17915 
17916   if (CONST_INT_P (align_exp))
17917     align = INTVAL (align_exp);
17918   /* i386 can do misaligned access on reasonably increased cost.  */
17919   if (CONST_INT_P (expected_align_exp)
17920       && INTVAL (expected_align_exp) > align)
17921     align = INTVAL (expected_align_exp);
17922   /* ALIGN is the minimum of destination and source alignment, but we care here
17923      just about destination alignment.  */
17924   else if (MEM_ALIGN (dst) > (unsigned HOST_WIDE_INT) align * BITS_PER_UNIT)
17925     align = MEM_ALIGN (dst) / BITS_PER_UNIT;
17926 
17927   if (CONST_INT_P (count_exp))
17928     count = expected_size = INTVAL (count_exp);
17929   if (CONST_INT_P (expected_size_exp) && count == 0)
17930     expected_size = INTVAL (expected_size_exp);
17931 
17932   /* Make sure we don't need to care about overflow later on.  */
17933   if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
17934     return 0;
17935 
17936   /* Step 0: Decide on preferred algorithm, desired alignment and
17937      size of chunks to be copied by main loop.  */
17938 
17939   alg = decide_alg (count, expected_size, false, &dynamic_check);
17940   desired_align = decide_alignment (align, alg, expected_size);
17941 
17942   if (!TARGET_ALIGN_STRINGOPS)
17943     align = desired_align;
17944 
17945   if (alg == libcall)
17946     return 0;
17947   gcc_assert (alg != no_stringop);
17948   if (!count)
17949     count_exp = copy_to_mode_reg (GET_MODE (count_exp), count_exp);
17950   destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
17951   srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
17952   switch (alg)
17953     {
17954     case libcall:
17955     case no_stringop:
17956       gcc_unreachable ();
17957     case loop:
17958       need_zero_guard = true;
17959       size_needed = GET_MODE_SIZE (Pmode);
17960       break;
17961     case unrolled_loop:
17962       need_zero_guard = true;
17963       size_needed = GET_MODE_SIZE (Pmode) * (TARGET_64BIT ? 4 : 2);
17964       break;
17965     case rep_prefix_8_byte:
17966       size_needed = 8;
17967       break;
17968     case rep_prefix_4_byte:
17969       size_needed = 4;
17970       break;
17971     case rep_prefix_1_byte:
17972       size_needed = 1;
17973       break;
17974     case loop_1_byte:
17975       need_zero_guard = true;
17976       size_needed = 1;
17977       break;
17978     }
17979 
17980   epilogue_size_needed = size_needed;
17981 
17982   /* Step 1: Prologue guard.  */
17983 
17984   /* Alignment code needs count to be in register.  */
17985   if (CONST_INT_P (count_exp) && desired_align > align)
17986     {
17987       if (INTVAL (count_exp) > desired_align
17988           && INTVAL (count_exp) > size_needed)
17989         {
17990           align_bytes
17991             = get_mem_align_offset (dst, desired_align * BITS_PER_UNIT);
17992           if (align_bytes <= 0)
17993             align_bytes = 0;
17994           else
17995             align_bytes = desired_align - align_bytes;
17996         }
17997       if (align_bytes == 0)
17998         count_exp = force_reg (counter_mode (count_exp), count_exp);
17999     }
18000   gcc_assert (desired_align >= 1 && align >= 1);
18001 
18002   /* Ensure that alignment prologue won't copy past end of block.  */
18003   if (size_needed > 1 || (desired_align > 1 && desired_align > align))
18004     {
18005       epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
18006       /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
18007          Make sure it is power of 2.  */
18008       epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
18009 
18010       if (count)
18011         {
18012           if (count < (unsigned HOST_WIDE_INT)epilogue_size_needed)
18013             {
18014               /* If main algorithm works on QImode, no epilogue is needed.
18015                  For small sizes just don't align anything.  */
18016               if (size_needed == 1)
18017                 desired_align = align;
18018               else
18019                 goto epilogue;
18020             }
18021         }
18022       else
18023         {
18024           label = gen_label_rtx ();
18025           emit_cmp_and_jump_insns (count_exp,
18026                                    GEN_INT (epilogue_size_needed),
18027                                    LTU, 0, counter_mode (count_exp), 1, label);
18028           if (expected_size == -1 || expected_size < epilogue_size_needed)
18029             predict_jump (REG_BR_PROB_BASE * 60 / 100);
18030           else
18031             predict_jump (REG_BR_PROB_BASE * 20 / 100);
18032         }
18033     }
18034 
18035   /* Emit code to decide on runtime whether library call or inline should be
18036      used.  */
18037   if (dynamic_check != -1)
18038     {
18039       if (CONST_INT_P (count_exp))
18040         {
18041           if (UINTVAL (count_exp) >= (unsigned HOST_WIDE_INT)dynamic_check)
18042             {
18043               emit_block_move_via_libcall (dst, src, count_exp, false);
18044               count_exp = const0_rtx;
18045               goto epilogue;
18046             }
18047         }
18048       else
18049         {
18050           rtx hot_label = gen_label_rtx ();
18051           jump_around_label = gen_label_rtx ();
18052           emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
18053                                    LEU, 0, GET_MODE (count_exp), 1, hot_label);
18054           predict_jump (REG_BR_PROB_BASE * 90 / 100);
18055           emit_block_move_via_libcall (dst, src, count_exp, false);
18056           emit_jump (jump_around_label);
18057           emit_label (hot_label);
18058         }
18059     }
18060 
18061   /* Step 2: Alignment prologue.  */
18062 
18063   if (desired_align > align)
18064     {
18065       if (align_bytes == 0)
18066         {
18067           /* Except for the first move in epilogue, we no longer know
18068              constant offset in aliasing info.  It don't seems to worth
18069              the pain to maintain it for the first move, so throw away
18070              the info early.  */
18071           src = change_address (src, BLKmode, srcreg);
18072           dst = change_address (dst, BLKmode, destreg);
18073           expand_movmem_prologue (dst, src, destreg, srcreg, count_exp, align,
18074                                   desired_align);
18075         }
18076       else
18077         {
18078           /* If we know how many bytes need to be stored before dst is
18079              sufficiently aligned, maintain aliasing info accurately.  */
18080           dst = expand_constant_movmem_prologue (dst, &src, destreg, srcreg,
18081                                                  desired_align, align_bytes);
18082           count_exp = plus_constant (count_exp, -align_bytes);
18083           count -= align_bytes;
18084         }
18085       if (need_zero_guard
18086           && (count < (unsigned HOST_WIDE_INT) size_needed
18087               || (align_bytes == 0
18088                   && count < ((unsigned HOST_WIDE_INT) size_needed
18089                               + desired_align - align))))
18090         {
18091           /* It is possible that we copied enough so the main loop will not
18092              execute.  */
18093           gcc_assert (size_needed > 1);
18094           if (label == NULL_RTX)
18095             label = gen_label_rtx ();
18096           emit_cmp_and_jump_insns (count_exp,
18097                                    GEN_INT (size_needed),
18098                                    LTU, 0, counter_mode (count_exp), 1, label);
18099           if (expected_size == -1
18100               || expected_size < (desired_align - align) / 2 + size_needed)
18101             predict_jump (REG_BR_PROB_BASE * 20 / 100);
18102           else
18103             predict_jump (REG_BR_PROB_BASE * 60 / 100);
18104         }
18105     }
18106   if (label && size_needed == 1)
18107     {
18108       emit_label (label);
18109       LABEL_NUSES (label) = 1;
18110       label = NULL;
18111       epilogue_size_needed = 1;
18112     }
18113   else if (label == NULL_RTX)
18114     epilogue_size_needed = size_needed;
18115 
18116   /* Step 3: Main loop.  */
18117 
18118   switch (alg)
18119     {
18120     case libcall:
18121     case no_stringop:
18122       gcc_unreachable ();
18123     case loop_1_byte:
18124       expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
18125                                      count_exp, QImode, 1, expected_size);
18126       break;
18127     case loop:
18128       expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
18129                                      count_exp, Pmode, 1, expected_size);
18130       break;
18131     case unrolled_loop:
18132       /* Unroll only by factor of 2 in 32bit mode, since we don't have enough
18133          registers for 4 temporaries anyway.  */
18134       expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
18135                                      count_exp, Pmode, TARGET_64BIT ? 4 : 2,
18136                                      expected_size);
18137       break;
18138     case rep_prefix_8_byte:
18139       expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
18140                                  DImode);
18141       break;
18142     case rep_prefix_4_byte:
18143       expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
18144                                  SImode);
18145       break;
18146     case rep_prefix_1_byte:
18147       expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
18148                                  QImode);
18149       break;
18150     }
18151   /* Adjust properly the offset of src and dest memory for aliasing.  */
18152   if (CONST_INT_P (count_exp))
18153     {
18154       src = adjust_automodify_address_nv (src, BLKmode, srcreg,
18155                                           (count / size_needed) * size_needed);
18156       dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
18157                                           (count / size_needed) * size_needed);
18158     }
18159   else
18160     {
18161       src = change_address (src, BLKmode, srcreg);
18162       dst = change_address (dst, BLKmode, destreg);
18163     }
18164 
18165   /* Step 4: Epilogue to copy the remaining bytes.  */
18166  epilogue:
18167   if (label)
18168     {
18169       /* When the main loop is done, COUNT_EXP might hold original count,
18170          while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
18171          Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
18172          bytes. Compensate if needed.  */
18173 
18174       if (size_needed < epilogue_size_needed)
18175         {
18176           tmp =
18177             expand_simple_binop (counter_mode (count_exp), AND, count_exp,
18178                                  GEN_INT (size_needed - 1), count_exp, 1,
18179                                  OPTAB_DIRECT);
18180           if (tmp != count_exp)
18181             emit_move_insn (count_exp, tmp);
18182         }
18183       emit_label (label);
18184       LABEL_NUSES (label) = 1;
18185     }
18186 
18187   if (count_exp != const0_rtx && epilogue_size_needed > 1)
18188     expand_movmem_epilogue (dst, src, destreg, srcreg, count_exp,
18189                             epilogue_size_needed);
18190   if (jump_around_label)
18191     emit_label (jump_around_label);
18192   return 1;
18193 }
18194 
18195 /* Helper function for memcpy.  For QImode value 0xXY produce
18196    0xXYXYXYXY of wide specified by MODE.  This is essentially
18197    a * 0x10101010, but we can do slightly better than
18198    synth_mult by unwinding the sequence by hand on CPUs with
18199    slow multiply.  */
18200 static rtx
18201 promote_duplicated_reg (enum machine_mode mode, rtx val)
18202 {
18203   enum machine_mode valmode = GET_MODE (val);
18204   rtx tmp;
18205   int nops = mode == DImode ? 3 : 2;
18206 
18207   gcc_assert (mode == SImode || mode == DImode);
18208   if (val == const0_rtx)
18209     return copy_to_mode_reg (mode, const0_rtx);
18210   if (CONST_INT_P (val))
18211     {
18212       HOST_WIDE_INT v = INTVAL (val) & 255;
18213 
18214       v |= v << 8;
18215       v |= v << 16;
18216       if (mode == DImode)
18217         v |= (v << 16) << 16;
18218       return copy_to_mode_reg (mode, gen_int_mode (v, mode));
18219     }
18220 
18221   if (valmode == VOIDmode)
18222     valmode = QImode;
18223   if (valmode != QImode)
18224     val = gen_lowpart (QImode, val);
18225   if (mode == QImode)
18226     return val;
18227   if (!TARGET_PARTIAL_REG_STALL)
18228     nops--;
18229   if (ix86_cost->mult_init[mode == DImode ? 3 : 2]
18230       + ix86_cost->mult_bit * (mode == DImode ? 8 : 4)
18231       <= (ix86_cost->shift_const + ix86_cost->add) * nops
18232           + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL == 0)))
18233     {
18234       rtx reg = convert_modes (mode, QImode, val, true);
18235       tmp = promote_duplicated_reg (mode, const1_rtx);
18236       return expand_simple_binop (mode, MULT, reg, tmp, NULL, 1,
18237                                   OPTAB_DIRECT);
18238     }
18239   else
18240     {
18241       rtx reg = convert_modes (mode, QImode, val, true);
18242 
18243       if (!TARGET_PARTIAL_REG_STALL)
18244         if (mode == SImode)
18245           emit_insn (gen_movsi_insv_1 (reg, reg));
18246         else
18247           emit_insn (gen_movdi_insv_1_rex64 (reg, reg));
18248       else
18249         {
18250           tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (8),
18251                                      NULL, 1, OPTAB_DIRECT);
18252           reg =
18253             expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
18254         }
18255       tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (16),
18256                                  NULL, 1, OPTAB_DIRECT);
18257       reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
18258       if (mode == SImode)
18259         return reg;
18260       tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (32),
18261                                  NULL, 1, OPTAB_DIRECT);
18262       reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
18263       return reg;
18264     }
18265 }
18266 
18267 /* Duplicate value VAL using promote_duplicated_reg into maximal size that will
18268    be needed by main loop copying SIZE_NEEDED chunks and prologue getting
18269    alignment from ALIGN to DESIRED_ALIGN.  */
18270 static rtx
18271 promote_duplicated_reg_to_size (rtx val, int size_needed, int desired_align, int align)
18272 {
18273   rtx promoted_val;
18274 
18275   if (TARGET_64BIT
18276       && (size_needed > 4 || (desired_align > align && desired_align > 4)))
18277     promoted_val = promote_duplicated_reg (DImode, val);
18278   else if (size_needed > 2 || (desired_align > align && desired_align > 2))
18279     promoted_val = promote_duplicated_reg (SImode, val);
18280   else if (size_needed > 1 || (desired_align > align && desired_align > 1))
18281     promoted_val = promote_duplicated_reg (HImode, val);
18282   else
18283     promoted_val = val;
18284 
18285   return promoted_val;
18286 }
18287 
18288 /* Expand string clear operation (bzero).  Use i386 string operations when
18289    profitable.  See expand_movmem comment for explanation of individual
18290    steps performed.  */
18291 int
18292 ix86_expand_setmem (rtx dst, rtx count_exp, rtx val_exp, rtx align_exp,
18293                     rtx expected_align_exp, rtx expected_size_exp)
18294 {
18295   rtx destreg;
18296   rtx label = NULL;
18297   rtx tmp;
18298   rtx jump_around_label = NULL;
18299   HOST_WIDE_INT align = 1;
18300   unsigned HOST_WIDE_INT count = 0;
18301   HOST_WIDE_INT expected_size = -1;
18302   int size_needed = 0, epilogue_size_needed;
18303   int desired_align = 0, align_bytes = 0;
18304   enum stringop_alg alg;
18305   rtx promoted_val = NULL;
18306   bool force_loopy_epilogue = false;
18307   int dynamic_check;
18308   bool need_zero_guard = false;
18309 
18310   if (CONST_INT_P (align_exp))
18311     align = INTVAL (align_exp);
18312   /* i386 can do misaligned access on reasonably increased cost.  */
18313   if (CONST_INT_P (expected_align_exp)
18314       && INTVAL (expected_align_exp) > align)
18315     align = INTVAL (expected_align_exp);
18316   if (CONST_INT_P (count_exp))
18317     count = expected_size = INTVAL (count_exp);
18318   if (CONST_INT_P (expected_size_exp) && count == 0)
18319     expected_size = INTVAL (expected_size_exp);
18320 
18321   /* Make sure we don't need to care about overflow later on.  */
18322   if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
18323     return 0;
18324 
18325   /* Step 0: Decide on preferred algorithm, desired alignment and
18326      size of chunks to be copied by main loop.  */
18327 
18328   alg = decide_alg (count, expected_size, true, &dynamic_check);
18329   desired_align = decide_alignment (align, alg, expected_size);
18330 
18331   if (!TARGET_ALIGN_STRINGOPS)
18332     align = desired_align;
18333 
18334   if (alg == libcall)
18335     return 0;
18336   gcc_assert (alg != no_stringop);
18337   if (!count)
18338     count_exp = copy_to_mode_reg (counter_mode (count_exp), count_exp);
18339   destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
18340   switch (alg)
18341     {
18342     case libcall:
18343     case no_stringop:
18344       gcc_unreachable ();
18345     case loop:
18346       need_zero_guard = true;
18347       size_needed = GET_MODE_SIZE (Pmode);
18348       break;
18349     case unrolled_loop:
18350       need_zero_guard = true;
18351       size_needed = GET_MODE_SIZE (Pmode) * 4;
18352       break;
18353     case rep_prefix_8_byte:
18354       size_needed = 8;
18355       break;
18356     case rep_prefix_4_byte:
18357       size_needed = 4;
18358       break;
18359     case rep_prefix_1_byte:
18360       size_needed = 1;
18361       break;
18362     case loop_1_byte:
18363       need_zero_guard = true;
18364       size_needed = 1;
18365       break;
18366     }
18367   epilogue_size_needed = size_needed;
18368 
18369   /* Step 1: Prologue guard.  */
18370 
18371   /* Alignment code needs count to be in register.  */
18372   if (CONST_INT_P (count_exp) && desired_align > align)
18373     {
18374       if (INTVAL (count_exp) > desired_align
18375           && INTVAL (count_exp) > size_needed)
18376         {
18377           align_bytes
18378             = get_mem_align_offset (dst, desired_align * BITS_PER_UNIT);
18379           if (align_bytes <= 0)
18380             align_bytes = 0;
18381           else
18382             align_bytes = desired_align - align_bytes;
18383         }
18384       if (align_bytes == 0)
18385         {
18386           enum machine_mode mode = SImode;
18387           if (TARGET_64BIT && (count & ~0xffffffff))
18388             mode = DImode;
18389           count_exp = force_reg (mode, count_exp);
18390         }
18391     }
18392   /* Do the cheap promotion to allow better CSE across the
18393      main loop and epilogue (ie one load of the big constant in the
18394      front of all code.  */
18395   if (CONST_INT_P (val_exp))
18396     promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
18397                                                    desired_align, align);
18398   /* Ensure that alignment prologue won't copy past end of block.  */
18399   if (size_needed > 1 || (desired_align > 1 && desired_align > align))
18400     {
18401       epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
18402       /* Epilogue always copies COUNT_EXP & (EPILOGUE_SIZE_NEEDED - 1) bytes.
18403          Make sure it is power of 2.  */
18404       epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
18405 
18406       /* To improve performance of small blocks, we jump around the VAL
18407          promoting mode.  This mean that if the promoted VAL is not constant,
18408          we might not use it in the epilogue and have to use byte
18409          loop variant.  */
18410       if (epilogue_size_needed > 2 && !promoted_val)
18411         force_loopy_epilogue = true;
18412       if (count)
18413         {
18414           if (count < (unsigned HOST_WIDE_INT)epilogue_size_needed)
18415             {
18416               /* If main algorithm works on QImode, no epilogue is needed.
18417                  For small sizes just don't align anything.  */
18418               if (size_needed == 1)
18419                 desired_align = align;
18420               else
18421                 goto epilogue;
18422             }
18423         }
18424       else
18425         {
18426           label = gen_label_rtx ();
18427           emit_cmp_and_jump_insns (count_exp,
18428                                    GEN_INT (epilogue_size_needed),
18429                                    LTU, 0, counter_mode (count_exp), 1, label);
18430           if (expected_size == -1 || expected_size <= epilogue_size_needed)
18431             predict_jump (REG_BR_PROB_BASE * 60 / 100);
18432           else
18433             predict_jump (REG_BR_PROB_BASE * 20 / 100);
18434         }
18435     }
18436   if (dynamic_check != -1)
18437     {
18438       rtx hot_label = gen_label_rtx ();
18439       jump_around_label = gen_label_rtx ();
18440       emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
18441                                LEU, 0, counter_mode (count_exp), 1, hot_label);
18442       predict_jump (REG_BR_PROB_BASE * 90 / 100);
18443       set_storage_via_libcall (dst, count_exp, val_exp, false);
18444       emit_jump (jump_around_label);
18445       emit_label (hot_label);
18446     }
18447 
18448   /* Step 2: Alignment prologue.  */
18449 
18450   /* Do the expensive promotion once we branched off the small blocks.  */
18451   if (!promoted_val)
18452     promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
18453                                                    desired_align, align);
18454   gcc_assert (desired_align >= 1 && align >= 1);
18455 
18456   if (desired_align > align)
18457     {
18458       if (align_bytes == 0)
18459         {
18460           /* Except for the first move in epilogue, we no longer know
18461              constant offset in aliasing info.  It don't seems to worth
18462              the pain to maintain it for the first move, so throw away
18463              the info early.  */
18464           dst = change_address (dst, BLKmode, destreg);
18465           expand_setmem_prologue (dst, destreg, promoted_val, count_exp, align,
18466                                   desired_align);
18467         }
18468       else
18469         {
18470           /* If we know how many bytes need to be stored before dst is
18471              sufficiently aligned, maintain aliasing info accurately.  */
18472           dst = expand_constant_setmem_prologue (dst, destreg, promoted_val,
18473                                                  desired_align, align_bytes);
18474           count_exp = plus_constant (count_exp, -align_bytes);
18475           count -= align_bytes;
18476         }
18477       if (need_zero_guard
18478           && (count < (unsigned HOST_WIDE_INT) size_needed
18479               || (align_bytes == 0
18480                   && count < ((unsigned HOST_WIDE_INT) size_needed
18481                               + desired_align - align))))
18482         {
18483           /* It is possible that we copied enough so the main loop will not
18484              execute.  */
18485           gcc_assert (size_needed > 1);
18486           if (label == NULL_RTX)
18487             label = gen_label_rtx ();
18488           emit_cmp_and_jump_insns (count_exp,
18489                                    GEN_INT (size_needed),
18490                                    LTU, 0, counter_mode (count_exp), 1, label);
18491           if (expected_size == -1
18492               || expected_size < (desired_align - align) / 2 + size_needed)
18493             predict_jump (REG_BR_PROB_BASE * 20 / 100);
18494           else
18495             predict_jump (REG_BR_PROB_BASE * 60 / 100);
18496         }
18497     }
18498   if (label && size_needed == 1)
18499     {
18500       emit_label (label);
18501       LABEL_NUSES (label) = 1;
18502       label = NULL;
18503       promoted_val = val_exp;
18504       epilogue_size_needed = 1;
18505     }
18506   else if (label == NULL_RTX)
18507     epilogue_size_needed = size_needed;
18508 
18509   /* Step 3: Main loop.  */
18510 
18511   switch (alg)
18512     {
18513     case libcall:
18514     case no_stringop:
18515       gcc_unreachable ();
18516     case loop_1_byte:
18517       expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
18518                                      count_exp, QImode, 1, expected_size);
18519       break;
18520     case loop:
18521       expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
18522                                      count_exp, Pmode, 1, expected_size);
18523       break;
18524     case unrolled_loop:
18525       expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
18526                                      count_exp, Pmode, 4, expected_size);
18527       break;
18528     case rep_prefix_8_byte:
18529       expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
18530                                   DImode, val_exp);
18531       break;
18532     case rep_prefix_4_byte:
18533       expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
18534                                   SImode, val_exp);
18535       break;
18536     case rep_prefix_1_byte:
18537       expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
18538                                   QImode, val_exp);
18539       break;
18540     }
18541   /* Adjust properly the offset of src and dest memory for aliasing.  */
18542   if (CONST_INT_P (count_exp))
18543     dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
18544                                         (count / size_needed) * size_needed);
18545   else
18546     dst = change_address (dst, BLKmode, destreg);
18547 
18548   /* Step 4: Epilogue to copy the remaining bytes.  */
18549 
18550   if (label)
18551     {
18552       /* When the main loop is done, COUNT_EXP might hold original count,
18553          while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
18554          Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
18555          bytes. Compensate if needed.  */
18556 
18557       if (size_needed < epilogue_size_needed)
18558         {
18559           tmp =
18560             expand_simple_binop (counter_mode (count_exp), AND, count_exp,
18561                                  GEN_INT (size_needed - 1), count_exp, 1,
18562                                  OPTAB_DIRECT);
18563           if (tmp != count_exp)
18564             emit_move_insn (count_exp, tmp);
18565         }
18566       emit_label (label);
18567       LABEL_NUSES (label) = 1;
18568     }
18569  epilogue:
18570   if (count_exp != const0_rtx && epilogue_size_needed > 1)
18571     {
18572       if (force_loopy_epilogue)
18573         expand_setmem_epilogue_via_loop (dst, destreg, val_exp, count_exp,
18574                                          epilogue_size_needed);
18575       else
18576         expand_setmem_epilogue (dst, destreg, promoted_val, count_exp,
18577                                 epilogue_size_needed);
18578     }
18579   if (jump_around_label)
18580     emit_label (jump_around_label);
18581   return 1;
18582 }
18583 
18584 /* Expand the appropriate insns for doing strlen if not just doing
18585    repnz; scasb
18586 
18587    out = result, initialized with the start address
18588    align_rtx = alignment of the address.
18589    scratch = scratch register, initialized with the startaddress when
18590         not aligned, otherwise undefined
18591 
18592    This is just the body. It needs the initializations mentioned above and
18593    some address computing at the end.  These things are done in i386.md.  */
18594 
18595 static void
18596 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
18597 {
18598   int align;
18599   rtx tmp;
18600   rtx align_2_label = NULL_RTX;
18601   rtx align_3_label = NULL_RTX;
18602   rtx align_4_label = gen_label_rtx ();
18603   rtx end_0_label = gen_label_rtx ();
18604   rtx mem;
18605   rtx tmpreg = gen_reg_rtx (SImode);
18606   rtx scratch = gen_reg_rtx (SImode);
18607   rtx cmp;
18608 
18609   align = 0;
18610   if (CONST_INT_P (align_rtx))
18611     align = INTVAL (align_rtx);
18612 
18613   /* Loop to check 1..3 bytes for null to get an aligned pointer.  */
18614 
18615   /* Is there a known alignment and is it less than 4?  */
18616   if (align < 4)
18617     {
18618       rtx scratch1 = gen_reg_rtx (Pmode);
18619       emit_move_insn (scratch1, out);
18620       /* Is there a known alignment and is it not 2? */
18621       if (align != 2)
18622         {
18623           align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
18624           align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
18625 
18626           /* Leave just the 3 lower bits.  */
18627           align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
18628                                     NULL_RTX, 0, OPTAB_WIDEN);
18629 
18630           emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
18631                                    Pmode, 1, align_4_label);
18632           emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
18633                                    Pmode, 1, align_2_label);
18634           emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
18635                                    Pmode, 1, align_3_label);
18636         }
18637       else
18638         {
18639           /* Since the alignment is 2, we have to check 2 or 0 bytes;
18640              check if is aligned to 4 - byte.  */
18641 
18642           align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
18643                                     NULL_RTX, 0, OPTAB_WIDEN);
18644 
18645           emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
18646                                    Pmode, 1, align_4_label);
18647         }
18648 
18649       mem = change_address (src, QImode, out);
18650 
18651       /* Now compare the bytes.  */
18652 
18653       /* Compare the first n unaligned byte on a byte per byte basis.  */
18654       emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
18655                                QImode, 1, end_0_label);
18656 
18657       /* Increment the address.  */
18658       emit_insn ((*ix86_gen_add3) (out, out, const1_rtx));
18659 
18660       /* Not needed with an alignment of 2 */
18661       if (align != 2)
18662         {
18663           emit_label (align_2_label);
18664 
18665           emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
18666                                    end_0_label);
18667 
18668           emit_insn ((*ix86_gen_add3) (out, out, const1_rtx));
18669 
18670           emit_label (align_3_label);
18671         }
18672 
18673       emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
18674                                end_0_label);
18675 
18676       emit_insn ((*ix86_gen_add3) (out, out, const1_rtx));
18677     }
18678 
18679   /* Generate loop to check 4 bytes at a time.  It is not a good idea to
18680      align this loop.  It gives only huge programs, but does not help to
18681      speed up.  */
18682   emit_label (align_4_label);
18683 
18684   mem = change_address (src, SImode, out);
18685   emit_move_insn (scratch, mem);
18686   emit_insn ((*ix86_gen_add3) (out, out, GEN_INT (4)));
18687 
18688   /* This formula yields a nonzero result iff one of the bytes is zero.
18689      This saves three branches inside loop and many cycles.  */
18690 
18691   emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
18692   emit_insn (gen_one_cmplsi2 (scratch, scratch));
18693   emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
18694   emit_insn (gen_andsi3 (tmpreg, tmpreg,
18695                          gen_int_mode (0x80808080, SImode)));
18696   emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
18697                            align_4_label);
18698 
18699   if (TARGET_CMOVE)
18700     {
18701        rtx reg = gen_reg_rtx (SImode);
18702        rtx reg2 = gen_reg_rtx (Pmode);
18703        emit_move_insn (reg, tmpreg);
18704        emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
18705 
18706        /* If zero is not in the first two bytes, move two bytes forward.  */
18707        emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
18708        tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
18709        tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
18710        emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
18711                                gen_rtx_IF_THEN_ELSE (SImode, tmp,
18712                                                      reg,
18713                                                      tmpreg)));
18714        /* Emit lea manually to avoid clobbering of flags.  */
18715        emit_insn (gen_rtx_SET (SImode, reg2,
18716                                gen_rtx_PLUS (Pmode, out, const2_rtx)));
18717 
18718        tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
18719        tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
18720        emit_insn (gen_rtx_SET (VOIDmode, out,
18721                                gen_rtx_IF_THEN_ELSE (Pmode, tmp,
18722                                                      reg2,
18723                                                      out)));
18724 
18725     }
18726   else
18727     {
18728        rtx end_2_label = gen_label_rtx ();
18729        /* Is zero in the first two bytes? */
18730 
18731        emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
18732        tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
18733        tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
18734        tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
18735                             gen_rtx_LABEL_REF (VOIDmode, end_2_label),
18736                             pc_rtx);
18737        tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
18738        JUMP_LABEL (tmp) = end_2_label;
18739 
18740        /* Not in the first two.  Move two bytes forward.  */
18741        emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
18742        emit_insn ((*ix86_gen_add3) (out, out, const2_rtx));
18743 
18744        emit_label (end_2_label);
18745 
18746     }
18747 
18748   /* Avoid branch in fixing the byte.  */
18749   tmpreg = gen_lowpart (QImode, tmpreg);
18750   emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
18751   cmp = gen_rtx_LTU (Pmode, gen_rtx_REG (CCmode, FLAGS_REG), const0_rtx);
18752   emit_insn ((*ix86_gen_sub3_carry) (out, out, GEN_INT (3), cmp));
18753 
18754   emit_label (end_0_label);
18755 }
18756 
18757 /* Expand strlen.  */
18758 
18759 int
18760 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
18761 {
18762   rtx addr, scratch1, scratch2, scratch3, scratch4;
18763 
18764   /* The generic case of strlen expander is long.  Avoid it's
18765      expanding unless TARGET_INLINE_ALL_STRINGOPS.  */
18766 
18767   if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
18768       && !TARGET_INLINE_ALL_STRINGOPS
18769       && !optimize_insn_for_size_p ()
18770       && (!CONST_INT_P (align) || INTVAL (align) < 4))
18771     return 0;
18772 
18773   addr = force_reg (Pmode, XEXP (src, 0));
18774   scratch1 = gen_reg_rtx (Pmode);
18775 
18776   if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
18777       && !optimize_insn_for_size_p ())
18778     {
18779       /* Well it seems that some optimizer does not combine a call like
18780          foo(strlen(bar), strlen(bar));
18781          when the move and the subtraction is done here.  It does calculate
18782          the length just once when these instructions are done inside of
18783          output_strlen_unroll().  But I think since &bar[strlen(bar)] is
18784          often used and I use one fewer register for the lifetime of
18785          output_strlen_unroll() this is better.  */
18786 
18787       emit_move_insn (out, addr);
18788 
18789       ix86_expand_strlensi_unroll_1 (out, src, align);
18790 
18791       /* strlensi_unroll_1 returns the address of the zero at the end of
18792          the string, like memchr(), so compute the length by subtracting
18793          the start address.  */
18794       emit_insn ((*ix86_gen_sub3) (out, out, addr));
18795     }
18796   else
18797     {
18798       rtx unspec;
18799 
18800       /* Can't use this if the user has appropriated eax, ecx, or edi.  */
18801       if (fixed_regs[AX_REG] || fixed_regs[CX_REG] || fixed_regs[DI_REG])
18802         return false;
18803 
18804       scratch2 = gen_reg_rtx (Pmode);
18805       scratch3 = gen_reg_rtx (Pmode);
18806       scratch4 = force_reg (Pmode, constm1_rtx);
18807 
18808       emit_move_insn (scratch3, addr);
18809       eoschar = force_reg (QImode, eoschar);
18810 
18811       src = replace_equiv_address_nv (src, scratch3);
18812 
18813       /* If .md starts supporting :P, this can be done in .md.  */
18814       unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
18815                                                  scratch4), UNSPEC_SCAS);
18816       emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
18817       emit_insn ((*ix86_gen_one_cmpl2) (scratch2, scratch1));
18818       emit_insn ((*ix86_gen_add3) (out, scratch2, constm1_rtx));
18819     }
18820   return 1;
18821 }
18822 
18823 /* For given symbol (function) construct code to compute address of it's PLT
18824    entry in large x86-64 PIC model.  */
18825 rtx
18826 construct_plt_address (rtx symbol)
18827 {
18828   rtx tmp = gen_reg_rtx (Pmode);
18829   rtx unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, symbol), UNSPEC_PLTOFF);
18830 
18831   gcc_assert (GET_CODE (symbol) == SYMBOL_REF);
18832   gcc_assert (ix86_cmodel == CM_LARGE_PIC);
18833 
18834   emit_move_insn (tmp, gen_rtx_CONST (Pmode, unspec));
18835   emit_insn (gen_adddi3 (tmp, tmp, pic_offset_table_rtx));
18836   return tmp;
18837 }
18838 
18839 void
18840 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
18841                   rtx callarg2,
18842                   rtx pop, int sibcall)
18843 {
18844   rtx use = NULL, call;
18845 
18846   if (pop == const0_rtx)
18847     pop = NULL;
18848   gcc_assert (!TARGET_64BIT || !pop);
18849 
18850   if (TARGET_MACHO && !TARGET_64BIT)
18851     {
18852 #if TARGET_MACHO
18853       if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
18854         fnaddr = machopic_indirect_call_target (fnaddr);
18855 #endif
18856     }
18857   else
18858     {
18859       /* Static functions and indirect calls don't need the pic register.  */
18860       if (flag_pic && (!TARGET_64BIT || ix86_cmodel == CM_LARGE_PIC)
18861           && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
18862           && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
18863         use_reg (&use, pic_offset_table_rtx);
18864     }
18865 
18866   if (TARGET_64BIT && INTVAL (callarg2) >= 0)
18867     {
18868       rtx al = gen_rtx_REG (QImode, AX_REG);
18869       emit_move_insn (al, callarg2);
18870       use_reg (&use, al);
18871     }
18872 
18873   if (ix86_cmodel == CM_LARGE_PIC
18874       && GET_CODE (fnaddr) == MEM
18875       && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
18876       && !local_symbolic_operand (XEXP (fnaddr, 0), VOIDmode))
18877     fnaddr = gen_rtx_MEM (QImode, construct_plt_address (XEXP (fnaddr, 0)));
18878   else if (sibcall
18879            ? !sibcall_insn_operand (XEXP (fnaddr, 0), Pmode)
18880            : !call_insn_operand (XEXP (fnaddr, 0), Pmode))
18881     {
18882       fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
18883       fnaddr = gen_rtx_MEM (QImode, fnaddr);
18884     }
18885 
18886   call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
18887   if (retval)
18888     call = gen_rtx_SET (VOIDmode, retval, call);
18889   if (pop)
18890     {
18891       pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
18892       pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
18893       call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
18894     }
18895   if (TARGET_64BIT
18896       && ix86_cfun_abi () == MS_ABI
18897       && (!callarg2 || INTVAL (callarg2) != -2))
18898     {
18899       /* We need to represent that SI and DI registers are clobbered
18900          by SYSV calls.  */
18901       static int clobbered_registers[] = {
18902         XMM6_REG, XMM7_REG, XMM8_REG,
18903         XMM9_REG, XMM10_REG, XMM11_REG,
18904         XMM12_REG, XMM13_REG, XMM14_REG,
18905         XMM15_REG, SI_REG, DI_REG
18906       };
18907       unsigned int i;
18908       rtx vec[ARRAY_SIZE (clobbered_registers) + 2];
18909       rtx unspec = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, const0_rtx),
18910                                    UNSPEC_MS_TO_SYSV_CALL);
18911 
18912       vec[0] = call;
18913       vec[1] = unspec;
18914       for (i = 0; i < ARRAY_SIZE (clobbered_registers); i++)
18915         vec[i + 2] = gen_rtx_CLOBBER (SSE_REGNO_P (clobbered_registers[i])
18916                                       ? TImode : DImode,
18917                                       gen_rtx_REG
18918                                         (SSE_REGNO_P (clobbered_registers[i])
18919                                                       ? TImode : DImode,
18920                                          clobbered_registers[i]));
18921 
18922       call = gen_rtx_PARALLEL (VOIDmode,
18923                                gen_rtvec_v (ARRAY_SIZE (clobbered_registers)
18924                                + 2, vec));
18925     }
18926 
18927   call = emit_call_insn (call);
18928   if (use)
18929     CALL_INSN_FUNCTION_USAGE (call) = use;
18930 }
18931 
18932 
18933 /* Clear stack slot assignments remembered from previous functions.
18934    This is called from INIT_EXPANDERS once before RTL is emitted for each
18935    function.  */
18936 
18937 static struct machine_function *
18938 ix86_init_machine_status (void)
18939 {
18940   struct machine_function *f;
18941 
18942   f = GGC_CNEW (struct machine_function);
18943   f->use_fast_prologue_epilogue_nregs = -1;
18944   f->tls_descriptor_call_expanded_p = 0;
18945   f->call_abi = DEFAULT_ABI;
18946 
18947   return f;
18948 }
18949 
18950 /* Return a MEM corresponding to a stack slot with mode MODE.
18951    Allocate a new slot if necessary.
18952 
18953    The RTL for a function can have several slots available: N is
18954    which slot to use.  */
18955 
18956 rtx
18957 assign_386_stack_local (enum machine_mode mode, enum ix86_stack_slot n)
18958 {
18959   struct stack_local_entry *s;
18960 
18961   gcc_assert (n < MAX_386_STACK_LOCALS);
18962 
18963   /* Virtual slot is valid only before vregs are instantiated.  */
18964   gcc_assert ((n == SLOT_VIRTUAL) == !virtuals_instantiated);
18965 
18966   for (s = ix86_stack_locals; s; s = s->next)
18967     if (s->mode == mode && s->n == n)
18968       return copy_rtx (s->rtl);
18969 
18970   s = (struct stack_local_entry *)
18971     ggc_alloc (sizeof (struct stack_local_entry));
18972   s->n = n;
18973   s->mode = mode;
18974   s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
18975 
18976   s->next = ix86_stack_locals;
18977   ix86_stack_locals = s;
18978   return s->rtl;
18979 }
18980 
18981 /* Construct the SYMBOL_REF for the tls_get_addr function.  */
18982 
18983 static GTY(()) rtx ix86_tls_symbol;
18984 rtx
18985 ix86_tls_get_addr (void)
18986 {
18987 
18988   if (!ix86_tls_symbol)
18989     {
18990       ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode,
18991                                             (TARGET_ANY_GNU_TLS
18992                                              && !TARGET_64BIT)
18993                                             ? "___tls_get_addr"
18994                                             : "__tls_get_addr");
18995     }
18996 
18997   return ix86_tls_symbol;
18998 }
18999 
19000 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol.  */
19001 
19002 static GTY(()) rtx ix86_tls_module_base_symbol;
19003 rtx
19004 ix86_tls_module_base (void)
19005 {
19006 
19007   if (!ix86_tls_module_base_symbol)
19008     {
19009       ix86_tls_module_base_symbol = gen_rtx_SYMBOL_REF (Pmode,
19010                                                         "_TLS_MODULE_BASE_");
19011       SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
19012         |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
19013     }
19014 
19015   return ix86_tls_module_base_symbol;
19016 }
19017 
19018 /* Calculate the length of the memory address in the instruction
19019    encoding.  Does not include the one-byte modrm, opcode, or prefix.  */
19020 
19021 int
19022 memory_address_length (rtx addr)
19023 {
19024   struct ix86_address parts;
19025   rtx base, index, disp;
19026   int len;
19027   int ok;
19028 
19029   if (GET_CODE (addr) == PRE_DEC
19030       || GET_CODE (addr) == POST_INC
19031       || GET_CODE (addr) == PRE_MODIFY
19032       || GET_CODE (addr) == POST_MODIFY)
19033     return 0;
19034 
19035   ok = ix86_decompose_address (addr, &parts);
19036   gcc_assert (ok);
19037 
19038   if (parts.base && GET_CODE (parts.base) == SUBREG)
19039     parts.base = SUBREG_REG (parts.base);
19040   if (parts.index && GET_CODE (parts.index) == SUBREG)
19041     parts.index = SUBREG_REG (parts.index);
19042 
19043   base = parts.base;
19044   index = parts.index;
19045   disp = parts.disp;
19046   len = 0;
19047 
19048   /* Rule of thumb:
19049        - esp as the base always wants an index,
19050        - ebp as the base always wants a displacement.  */
19051 
19052   /* Register Indirect.  */
19053   if (base && !index && !disp)
19054     {
19055       /* esp (for its index) and ebp (for its displacement) need
19056          the two-byte modrm form.  */
19057       if (addr == stack_pointer_rtx
19058           || addr == arg_pointer_rtx
19059           || addr == frame_pointer_rtx
19060           || addr == hard_frame_pointer_rtx)
19061         len = 1;
19062     }
19063 
19064   /* Direct Addressing.  */
19065   else if (disp && !base && !index)
19066     len = 4;
19067 
19068   else
19069     {
19070       /* Find the length of the displacement constant.  */
19071       if (disp)
19072         {
19073           if (base && satisfies_constraint_K (disp))
19074             len = 1;
19075           else
19076             len = 4;
19077         }
19078       /* ebp always wants a displacement.  */
19079       else if (base == hard_frame_pointer_rtx)
19080         len = 1;
19081 
19082       /* An index requires the two-byte modrm form....  */
19083       if (index
19084           /* ...like esp, which always wants an index.  */
19085           || base == stack_pointer_rtx
19086           || base == arg_pointer_rtx
19087           || base == frame_pointer_rtx)
19088         len += 1;
19089     }
19090 
19091   return len;
19092 }
19093 
19094 /* Compute default value for "length_immediate" attribute.  When SHORTFORM
19095    is set, expect that insn have 8bit immediate alternative.  */
19096 int
19097 ix86_attr_length_immediate_default (rtx insn, int shortform)
19098 {
19099   int len = 0;
19100   int i;
19101   extract_insn_cached (insn);
19102   for (i = recog_data.n_operands - 1; i >= 0; --i)
19103     if (CONSTANT_P (recog_data.operand[i]))
19104       {
19105         gcc_assert (!len);
19106         if (shortform && satisfies_constraint_K (recog_data.operand[i]))
19107           len = 1;
19108         else
19109           {
19110             switch (get_attr_mode (insn))
19111               {
19112                 case MODE_QI:
19113                   len+=1;
19114                   break;
19115                 case MODE_HI:
19116                   len+=2;
19117                   break;
19118                 case MODE_SI:
19119                   len+=4;
19120                   break;
19121                 /* Immediates for DImode instructions are encoded as 32bit sign extended values.  */
19122                 case MODE_DI:
19123                   len+=4;
19124                   break;
19125                 default:
19126                   fatal_insn ("unknown insn mode", insn);
19127               }
19128           }
19129       }
19130   return len;
19131 }
19132 /* Compute default value for "length_address" attribute.  */
19133 int
19134 ix86_attr_length_address_default (rtx insn)
19135 {
19136   int i;
19137 
19138   if (get_attr_type (insn) == TYPE_LEA)
19139     {
19140       rtx set = PATTERN (insn);
19141 
19142       if (GET_CODE (set) == PARALLEL)
19143         set = XVECEXP (set, 0, 0);
19144 
19145       gcc_assert (GET_CODE (set) == SET);
19146 
19147       return memory_address_length (SET_SRC (set));
19148     }
19149 
19150   extract_insn_cached (insn);
19151   for (i = recog_data.n_operands - 1; i >= 0; --i)
19152     if (MEM_P (recog_data.operand[i]))
19153       {
19154         return memory_address_length (XEXP (recog_data.operand[i], 0));
19155         break;
19156       }
19157   return 0;
19158 }
19159 
19160 /* Compute default value for "length_vex" attribute. It includes
19161    2 or 3 byte VEX prefix and 1 opcode byte.  */
19162 
19163 int
19164 ix86_attr_length_vex_default (rtx insn, int has_0f_opcode,
19165                               int has_vex_w)
19166 {
19167   int i;
19168 
19169   /* Only 0f opcode can use 2 byte VEX prefix and  VEX W bit uses 3
19170      byte VEX prefix.  */
19171   if (!has_0f_opcode || has_vex_w)
19172     return 3 + 1;
19173 
19174  /* We can always use 2 byte VEX prefix in 32bit.  */
19175   if (!TARGET_64BIT)
19176     return 2 + 1;
19177 
19178   extract_insn_cached (insn);
19179 
19180   for (i = recog_data.n_operands - 1; i >= 0; --i)
19181     if (REG_P (recog_data.operand[i]))
19182       {
19183         /* REX.W bit uses 3 byte VEX prefix.  */
19184         if (GET_MODE (recog_data.operand[i]) == DImode)
19185           return 3 + 1;
19186       }
19187     else
19188       {
19189         /* REX.X or REX.B bits use 3 byte VEX prefix.  */
19190         if (MEM_P (recog_data.operand[i])
19191             && x86_extended_reg_mentioned_p (recog_data.operand[i]))
19192           return 3 + 1;
19193       }
19194 
19195   return 2 + 1;
19196 }
19197 
19198 /* Return the maximum number of instructions a cpu can issue.  */
19199 
19200 static int
19201 ix86_issue_rate (void)
19202 {
19203   switch (ix86_tune)
19204     {
19205     case PROCESSOR_PENTIUM:
19206     case PROCESSOR_K6:
19207       return 2;
19208 
19209     case PROCESSOR_PENTIUMPRO:
19210     case PROCESSOR_PENTIUM4:
19211     case PROCESSOR_ATHLON:
19212     case PROCESSOR_K8:
19213     case PROCESSOR_AMDFAM10:
19214     case PROCESSOR_NOCONA:
19215     case PROCESSOR_GENERIC32:
19216     case PROCESSOR_GENERIC64:
19217       return 3;
19218 
19219     case PROCESSOR_CORE2:
19220       return 4;
19221 
19222     default:
19223       return 1;
19224     }
19225 }
19226 
19227 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
19228    by DEP_INSN and nothing set by DEP_INSN.  */
19229 
19230 static int
19231 ix86_flags_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
19232 {
19233   rtx set, set2;
19234 
19235   /* Simplify the test for uninteresting insns.  */
19236   if (insn_type != TYPE_SETCC
19237       && insn_type != TYPE_ICMOV
19238       && insn_type != TYPE_FCMOV
19239       && insn_type != TYPE_IBR)
19240     return 0;
19241 
19242   if ((set = single_set (dep_insn)) != 0)
19243     {
19244       set = SET_DEST (set);
19245       set2 = NULL_RTX;
19246     }
19247   else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
19248            && XVECLEN (PATTERN (dep_insn), 0) == 2
19249            && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
19250            && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
19251     {
19252       set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
19253       set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
19254     }
19255   else
19256     return 0;
19257 
19258   if (!REG_P (set) || REGNO (set) != FLAGS_REG)
19259     return 0;
19260 
19261   /* This test is true if the dependent insn reads the flags but
19262      not any other potentially set register.  */
19263   if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
19264     return 0;
19265 
19266   if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
19267     return 0;
19268 
19269   return 1;
19270 }
19271 
19272 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
19273    address with operands set by DEP_INSN.  */
19274 
19275 static int
19276 ix86_agi_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
19277 {
19278   rtx addr;
19279 
19280   if (insn_type == TYPE_LEA
19281       && TARGET_PENTIUM)
19282     {
19283       addr = PATTERN (insn);
19284 
19285       if (GET_CODE (addr) == PARALLEL)
19286         addr = XVECEXP (addr, 0, 0);
19287 
19288       gcc_assert (GET_CODE (addr) == SET);
19289 
19290       addr = SET_SRC (addr);
19291     }
19292   else
19293     {
19294       int i;
19295       extract_insn_cached (insn);
19296       for (i = recog_data.n_operands - 1; i >= 0; --i)
19297         if (MEM_P (recog_data.operand[i]))
19298           {
19299             addr = XEXP (recog_data.operand[i], 0);
19300             goto found;
19301           }
19302       return 0;
19303     found:;
19304     }
19305 
19306   return modified_in_p (addr, dep_insn);
19307 }
19308 
19309 static int
19310 ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
19311 {
19312   enum attr_type insn_type, dep_insn_type;
19313   enum attr_memory memory;
19314   rtx set, set2;
19315   int dep_insn_code_number;
19316 
19317   /* Anti and output dependencies have zero cost on all CPUs.  */
19318   if (REG_NOTE_KIND (link) != 0)
19319     return 0;
19320 
19321   dep_insn_code_number = recog_memoized (dep_insn);
19322 
19323   /* If we can't recognize the insns, we can't really do anything.  */
19324   if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
19325     return cost;
19326 
19327   insn_type = get_attr_type (insn);
19328   dep_insn_type = get_attr_type (dep_insn);
19329 
19330   switch (ix86_tune)
19331     {
19332     case PROCESSOR_PENTIUM:
19333       /* Address Generation Interlock adds a cycle of latency.  */
19334       if (ix86_agi_dependent (insn, dep_insn, insn_type))
19335         cost += 1;
19336 
19337       /* ??? Compares pair with jump/setcc.  */
19338       if (ix86_flags_dependent (insn, dep_insn, insn_type))
19339         cost = 0;
19340 
19341       /* Floating point stores require value to be ready one cycle earlier.  */
19342       if (insn_type == TYPE_FMOV
19343           && get_attr_memory (insn) == MEMORY_STORE
19344           && !ix86_agi_dependent (insn, dep_insn, insn_type))
19345         cost += 1;
19346       break;
19347 
19348     case PROCESSOR_PENTIUMPRO:
19349       memory = get_attr_memory (insn);
19350 
19351       /* INT->FP conversion is expensive.  */
19352       if (get_attr_fp_int_src (dep_insn))
19353         cost += 5;
19354 
19355       /* There is one cycle extra latency between an FP op and a store.  */
19356       if (insn_type == TYPE_FMOV
19357           && (set = single_set (dep_insn)) != NULL_RTX
19358           && (set2 = single_set (insn)) != NULL_RTX
19359           && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
19360           && MEM_P (SET_DEST (set2)))
19361         cost += 1;
19362 
19363       /* Show ability of reorder buffer to hide latency of load by executing
19364          in parallel with previous instruction in case
19365          previous instruction is not needed to compute the address.  */
19366       if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
19367           && !ix86_agi_dependent (insn, dep_insn, insn_type))
19368         {
19369           /* Claim moves to take one cycle, as core can issue one load
19370              at time and the next load can start cycle later.  */
19371           if (dep_insn_type == TYPE_IMOV
19372               || dep_insn_type == TYPE_FMOV)
19373             cost = 1;
19374           else if (cost > 1)
19375             cost--;
19376         }
19377       break;
19378 
19379     case PROCESSOR_K6:
19380       memory = get_attr_memory (insn);
19381 
19382       /* The esp dependency is resolved before the instruction is really
19383          finished.  */
19384       if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
19385           && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
19386         return 1;
19387 
19388       /* INT->FP conversion is expensive.  */
19389       if (get_attr_fp_int_src (dep_insn))
19390         cost += 5;
19391 
19392       /* Show ability of reorder buffer to hide latency of load by executing
19393          in parallel with previous instruction in case
19394          previous instruction is not needed to compute the address.  */
19395       if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
19396           && !ix86_agi_dependent (insn, dep_insn, insn_type))
19397         {
19398           /* Claim moves to take one cycle, as core can issue one load
19399              at time and the next load can start cycle later.  */
19400           if (dep_insn_type == TYPE_IMOV
19401               || dep_insn_type == TYPE_FMOV)
19402             cost = 1;
19403           else if (cost > 2)
19404             cost -= 2;
19405           else
19406             cost = 1;
19407         }
19408       break;
19409 
19410     case PROCESSOR_ATHLON:
19411     case PROCESSOR_K8:
19412     case PROCESSOR_AMDFAM10:
19413     case PROCESSOR_GENERIC32:
19414     case PROCESSOR_GENERIC64:
19415       memory = get_attr_memory (insn);
19416 
19417       /* Show ability of reorder buffer to hide latency of load by executing
19418          in parallel with previous instruction in case
19419          previous instruction is not needed to compute the address.  */
19420       if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
19421           && !ix86_agi_dependent (insn, dep_insn, insn_type))
19422         {
19423           enum attr_unit unit = get_attr_unit (insn);
19424           int loadcost = 3;
19425 
19426           /* Because of the difference between the length of integer and
19427              floating unit pipeline preparation stages, the memory operands
19428              for floating point are cheaper.
19429 
19430              ??? For Athlon it the difference is most probably 2.  */
19431           if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
19432             loadcost = 3;
19433           else
19434             loadcost = TARGET_ATHLON ? 2 : 0;
19435 
19436           if (cost >= loadcost)
19437             cost -= loadcost;
19438           else
19439             cost = 0;
19440         }
19441 
19442     default:
19443       break;
19444     }
19445 
19446   return cost;
19447 }
19448 
19449 /* How many alternative schedules to try.  This should be as wide as the
19450    scheduling freedom in the DFA, but no wider.  Making this value too
19451    large results extra work for the scheduler.  */
19452 
19453 static int
19454 ia32_multipass_dfa_lookahead (void)
19455 {
19456   switch (ix86_tune)
19457     {
19458     case PROCESSOR_PENTIUM:
19459       return 2;
19460 
19461     case PROCESSOR_PENTIUMPRO:
19462     case PROCESSOR_K6:
19463       return 1;
19464 
19465     default:
19466       return 0;
19467     }
19468 }
19469 
19470 
19471 /* Compute the alignment given to a constant that is being placed in memory.
19472    EXP is the constant and ALIGN is the alignment that the object would
19473    ordinarily have.
19474    The value of this function is used instead of that alignment to align
19475    the object.  */
19476 
19477 int
19478 ix86_constant_alignment (tree exp, int align)
19479 {
19480   if (TREE_CODE (exp) == REAL_CST || TREE_CODE (exp) == VECTOR_CST
19481       || TREE_CODE (exp) == INTEGER_CST)
19482     {
19483       if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
19484         return 64;
19485       else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
19486         return 128;
19487     }
19488   else if (!optimize_size && TREE_CODE (exp) == STRING_CST
19489            && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
19490     return BITS_PER_WORD;
19491 
19492   return align;
19493 }
19494 
19495 /* Compute the alignment for a static variable.
19496    TYPE is the data type, and ALIGN is the alignment that
19497    the object would ordinarily have.  The value of this function is used
19498    instead of that alignment to align the object.  */
19499 
19500 int
19501 ix86_data_alignment (tree type, int align)
19502 {
19503   int max_align = optimize_size ? BITS_PER_WORD : MIN (256, MAX_OFILE_ALIGNMENT);
19504 
19505   if (AGGREGATE_TYPE_P (type)
19506       && TYPE_SIZE (type)
19507       && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
19508       && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= (unsigned) max_align
19509           || TREE_INT_CST_HIGH (TYPE_SIZE (type)))
19510       && align < max_align)
19511     align = max_align;
19512 
19513   /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
19514      to 16byte boundary.  */
19515   if (TARGET_64BIT)
19516     {
19517       if (AGGREGATE_TYPE_P (type)
19518            && TYPE_SIZE (type)
19519            && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
19520            && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
19521                || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
19522         return 128;
19523     }
19524 
19525   if (TREE_CODE (type) == ARRAY_TYPE)
19526     {
19527       if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
19528         return 64;
19529       if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
19530         return 128;
19531     }
19532   else if (TREE_CODE (type) == COMPLEX_TYPE)
19533     {
19534 
19535       if (TYPE_MODE (type) == DCmode && align < 64)
19536         return 64;
19537       if ((TYPE_MODE (type) == XCmode
19538            || TYPE_MODE (type) == TCmode) && align < 128)
19539         return 128;
19540     }
19541   else if ((TREE_CODE (type) == RECORD_TYPE
19542             || TREE_CODE (type) == UNION_TYPE
19543             || TREE_CODE (type) == QUAL_UNION_TYPE)
19544            && TYPE_FIELDS (type))
19545     {
19546       if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
19547         return 64;
19548       if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
19549         return 128;
19550     }
19551   else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
19552            || TREE_CODE (type) == INTEGER_TYPE)
19553     {
19554       if (TYPE_MODE (type) == DFmode && align < 64)
19555         return 64;
19556       if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
19557         return 128;
19558     }
19559 
19560   return align;
19561 }
19562 
19563 /* Compute the alignment for a local variable or a stack slot.  EXP is
19564    the data type or decl itself, MODE is the widest mode available and
19565    ALIGN is the alignment that the object would ordinarily have.  The
19566    value of this macro is used instead of that alignment to align the
19567    object.  */
19568 
19569 unsigned int
19570 ix86_local_alignment (tree exp, enum machine_mode mode,
19571                       unsigned int align)
19572 {
19573   tree type, decl;
19574 
19575   if (exp && DECL_P (exp))
19576     {
19577       type = TREE_TYPE (exp);
19578       decl = exp;
19579     }
19580   else
19581     {
19582       type = exp;
19583       decl = NULL;
19584     }
19585 
19586   /* Don't do dynamic stack realignment for long long objects with
19587      -mpreferred-stack-boundary=2.  */
19588   if (!TARGET_64BIT
19589       && align == 64
19590       && ix86_preferred_stack_boundary < 64
19591       && (mode == DImode || (type && TYPE_MODE (type) == DImode))
19592       && (!type || !TYPE_USER_ALIGN (type))
19593       && (!decl || !DECL_USER_ALIGN (decl)))
19594     align = 32;
19595 
19596   /* If TYPE is NULL, we are allocating a stack slot for caller-save
19597      register in MODE.  We will return the largest alignment of XF
19598      and DF.  */
19599   if (!type)
19600     {
19601       if (mode == XFmode && align < GET_MODE_ALIGNMENT (DFmode))
19602         align = GET_MODE_ALIGNMENT (DFmode);
19603       return align;
19604     }
19605 
19606   /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
19607      to 16byte boundary.  */
19608   if (TARGET_64BIT)
19609     {
19610       if (AGGREGATE_TYPE_P (type)
19611            && TYPE_SIZE (type)
19612            && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
19613            && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
19614                || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
19615         return 128;
19616     }
19617   if (TREE_CODE (type) == ARRAY_TYPE)
19618     {
19619       if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
19620         return 64;
19621       if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
19622         return 128;
19623     }
19624   else if (TREE_CODE (type) == COMPLEX_TYPE)
19625     {
19626       if (TYPE_MODE (type) == DCmode && align < 64)
19627         return 64;
19628       if ((TYPE_MODE (type) == XCmode
19629            || TYPE_MODE (type) == TCmode) && align < 128)
19630         return 128;
19631     }
19632   else if ((TREE_CODE (type) == RECORD_TYPE
19633             || TREE_CODE (type) == UNION_TYPE
19634             || TREE_CODE (type) == QUAL_UNION_TYPE)
19635            && TYPE_FIELDS (type))
19636     {
19637       if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
19638         return 64;
19639       if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
19640         return 128;
19641     }
19642   else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
19643            || TREE_CODE (type) == INTEGER_TYPE)
19644     {
19645 
19646       if (TYPE_MODE (type) == DFmode && align < 64)
19647         return 64;
19648       if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
19649         return 128;
19650     }
19651   return align;
19652 }
19653 
19654 /* Compute the minimum required alignment for dynamic stack realignment
19655    purposes for a local variable, parameter or a stack slot.  EXP is
19656    the data type or decl itself, MODE is its mode and ALIGN is the
19657    alignment that the object would ordinarily have.  */
19658 
19659 unsigned int
19660 ix86_minimum_alignment (tree exp, enum machine_mode mode,
19661                         unsigned int align)
19662 {
19663   tree type, decl;
19664 
19665   if (TARGET_64BIT || align != 64 || ix86_preferred_stack_boundary >= 64)
19666     return align;
19667 
19668   if (exp && DECL_P (exp))
19669     {
19670       type = TREE_TYPE (exp);
19671       decl = exp;
19672     }
19673   else
19674     {
19675       type = exp;
19676       decl = NULL;
19677     }
19678 
19679   /* Don't do dynamic stack realignment for long long objects with
19680      -mpreferred-stack-boundary=2.  */
19681   if ((mode == DImode || (type && TYPE_MODE (type) == DImode))
19682       && (!type || !TYPE_USER_ALIGN (type))
19683       && (!decl || !DECL_USER_ALIGN (decl)))
19684     return 32;
19685 
19686   return align;
19687 }
19688 
19689 /* Emit RTL insns to initialize the variable parts of a trampoline.
19690    FNADDR is an RTX for the address of the function's pure code.
19691    CXT is an RTX for the static chain value for the function.  */
19692 void
19693 x86_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
19694 {
19695   if (!TARGET_64BIT)
19696     {
19697       /* Compute offset from the end of the jmp to the target function.  */
19698       rtx disp = expand_binop (SImode, sub_optab, fnaddr,
19699                                plus_constant (tramp, 10),
19700                                NULL_RTX, 1, OPTAB_DIRECT);
19701       emit_move_insn (gen_rtx_MEM (QImode, tramp),
19702                       gen_int_mode (0xb9, QImode));
19703       emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
19704       emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
19705                       gen_int_mode (0xe9, QImode));
19706       emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
19707     }
19708   else
19709     {
19710       int offset = 0;
19711       /* Try to load address using shorter movl instead of movabs.
19712          We may want to support movq for kernel mode, but kernel does not use
19713          trampolines at the moment.  */
19714       if (x86_64_zext_immediate_operand (fnaddr, VOIDmode))
19715         {
19716           fnaddr = copy_to_mode_reg (DImode, fnaddr);
19717           emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
19718                           gen_int_mode (0xbb41, HImode));
19719           emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
19720                           gen_lowpart (SImode, fnaddr));
19721           offset += 6;
19722         }
19723       else
19724         {
19725           emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
19726                           gen_int_mode (0xbb49, HImode));
19727           emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
19728                           fnaddr);
19729           offset += 10;
19730         }
19731       /* Load static chain using movabs to r10.  */
19732       emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
19733                       gen_int_mode (0xba49, HImode));
19734       emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
19735                       cxt);
19736       offset += 10;
19737       /* Jump to the r11 */
19738       emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
19739                       gen_int_mode (0xff49, HImode));
19740       emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
19741                       gen_int_mode (0xe3, QImode));
19742       offset += 3;
19743       gcc_assert (offset <= TRAMPOLINE_SIZE);
19744     }
19745 
19746 #ifdef ENABLE_EXECUTE_STACK
19747   emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
19748                      LCT_NORMAL, VOIDmode, 1, tramp, Pmode);
19749 #endif
19750 }
19751 
19752 /* Codes for all the SSE/MMX builtins.  */
19753 enum ix86_builtins
19754 {
19755   IX86_BUILTIN_ADDPS,
19756   IX86_BUILTIN_ADDSS,
19757   IX86_BUILTIN_DIVPS,
19758   IX86_BUILTIN_DIVSS,
19759   IX86_BUILTIN_MULPS,
19760   IX86_BUILTIN_MULSS,
19761   IX86_BUILTIN_SUBPS,
19762   IX86_BUILTIN_SUBSS,
19763 
19764   IX86_BUILTIN_CMPEQPS,
19765   IX86_BUILTIN_CMPLTPS,
19766   IX86_BUILTIN_CMPLEPS,
19767   IX86_BUILTIN_CMPGTPS,
19768   IX86_BUILTIN_CMPGEPS,
19769   IX86_BUILTIN_CMPNEQPS,
19770   IX86_BUILTIN_CMPNLTPS,
19771   IX86_BUILTIN_CMPNLEPS,
19772   IX86_BUILTIN_CMPNGTPS,
19773   IX86_BUILTIN_CMPNGEPS,
19774   IX86_BUILTIN_CMPORDPS,
19775   IX86_BUILTIN_CMPUNORDPS,
19776   IX86_BUILTIN_CMPEQSS,
19777   IX86_BUILTIN_CMPLTSS,
19778   IX86_BUILTIN_CMPLESS,
19779   IX86_BUILTIN_CMPNEQSS,
19780   IX86_BUILTIN_CMPNLTSS,
19781   IX86_BUILTIN_CMPNLESS,
19782   IX86_BUILTIN_CMPNGTSS,
19783   IX86_BUILTIN_CMPNGESS,
19784   IX86_BUILTIN_CMPORDSS,
19785   IX86_BUILTIN_CMPUNORDSS,
19786 
19787   IX86_BUILTIN_COMIEQSS,
19788   IX86_BUILTIN_COMILTSS,
19789   IX86_BUILTIN_COMILESS,
19790   IX86_BUILTIN_COMIGTSS,
19791   IX86_BUILTIN_COMIGESS,
19792   IX86_BUILTIN_COMINEQSS,
19793   IX86_BUILTIN_UCOMIEQSS,
19794   IX86_BUILTIN_UCOMILTSS,
19795   IX86_BUILTIN_UCOMILESS,
19796   IX86_BUILTIN_UCOMIGTSS,
19797   IX86_BUILTIN_UCOMIGESS,
19798   IX86_BUILTIN_UCOMINEQSS,
19799 
19800   IX86_BUILTIN_CVTPI2PS,
19801   IX86_BUILTIN_CVTPS2PI,
19802   IX86_BUILTIN_CVTSI2SS,
19803   IX86_BUILTIN_CVTSI642SS,
19804   IX86_BUILTIN_CVTSS2SI,
19805   IX86_BUILTIN_CVTSS2SI64,
19806   IX86_BUILTIN_CVTTPS2PI,
19807   IX86_BUILTIN_CVTTSS2SI,
19808   IX86_BUILTIN_CVTTSS2SI64,
19809 
19810   IX86_BUILTIN_MAXPS,
19811   IX86_BUILTIN_MAXSS,
19812   IX86_BUILTIN_MINPS,
19813   IX86_BUILTIN_MINSS,
19814 
19815   IX86_BUILTIN_LOADUPS,
19816   IX86_BUILTIN_STOREUPS,
19817   IX86_BUILTIN_MOVSS,
19818 
19819   IX86_BUILTIN_MOVHLPS,
19820   IX86_BUILTIN_MOVLHPS,
19821   IX86_BUILTIN_LOADHPS,
19822   IX86_BUILTIN_LOADLPS,
19823   IX86_BUILTIN_STOREHPS,
19824   IX86_BUILTIN_STORELPS,
19825 
19826   IX86_BUILTIN_MASKMOVQ,
19827   IX86_BUILTIN_MOVMSKPS,
19828   IX86_BUILTIN_PMOVMSKB,
19829 
19830   IX86_BUILTIN_MOVNTPS,
19831   IX86_BUILTIN_MOVNTQ,
19832 
19833   IX86_BUILTIN_LOADDQU,
19834   IX86_BUILTIN_STOREDQU,
19835 
19836   IX86_BUILTIN_PACKSSWB,
19837   IX86_BUILTIN_PACKSSDW,
19838   IX86_BUILTIN_PACKUSWB,
19839 
19840   IX86_BUILTIN_PADDB,
19841   IX86_BUILTIN_PADDW,
19842   IX86_BUILTIN_PADDD,
19843   IX86_BUILTIN_PADDQ,
19844   IX86_BUILTIN_PADDSB,
19845   IX86_BUILTIN_PADDSW,
19846   IX86_BUILTIN_PADDUSB,
19847   IX86_BUILTIN_PADDUSW,
19848   IX86_BUILTIN_PSUBB,
19849   IX86_BUILTIN_PSUBW,
19850   IX86_BUILTIN_PSUBD,
19851   IX86_BUILTIN_PSUBQ,
19852   IX86_BUILTIN_PSUBSB,
19853   IX86_BUILTIN_PSUBSW,
19854   IX86_BUILTIN_PSUBUSB,
19855   IX86_BUILTIN_PSUBUSW,
19856 
19857   IX86_BUILTIN_PAND,
19858   IX86_BUILTIN_PANDN,
19859   IX86_BUILTIN_POR,
19860   IX86_BUILTIN_PXOR,
19861 
19862   IX86_BUILTIN_PAVGB,
19863   IX86_BUILTIN_PAVGW,
19864 
19865   IX86_BUILTIN_PCMPEQB,
19866   IX86_BUILTIN_PCMPEQW,
19867   IX86_BUILTIN_PCMPEQD,
19868   IX86_BUILTIN_PCMPGTB,
19869   IX86_BUILTIN_PCMPGTW,
19870   IX86_BUILTIN_PCMPGTD,
19871 
19872   IX86_BUILTIN_PMADDWD,
19873 
19874   IX86_BUILTIN_PMAXSW,
19875   IX86_BUILTIN_PMAXUB,
19876   IX86_BUILTIN_PMINSW,
19877   IX86_BUILTIN_PMINUB,
19878 
19879   IX86_BUILTIN_PMULHUW,
19880   IX86_BUILTIN_PMULHW,
19881   IX86_BUILTIN_PMULLW,
19882 
19883   IX86_BUILTIN_PSADBW,
19884   IX86_BUILTIN_PSHUFW,
19885 
19886   IX86_BUILTIN_PSLLW,
19887   IX86_BUILTIN_PSLLD,
19888   IX86_BUILTIN_PSLLQ,
19889   IX86_BUILTIN_PSRAW,
19890   IX86_BUILTIN_PSRAD,
19891   IX86_BUILTIN_PSRLW,
19892   IX86_BUILTIN_PSRLD,
19893   IX86_BUILTIN_PSRLQ,
19894   IX86_BUILTIN_PSLLWI,
19895   IX86_BUILTIN_PSLLDI,
19896   IX86_BUILTIN_PSLLQI,
19897   IX86_BUILTIN_PSRAWI,
19898   IX86_BUILTIN_PSRADI,
19899   IX86_BUILTIN_PSRLWI,
19900   IX86_BUILTIN_PSRLDI,
19901   IX86_BUILTIN_PSRLQI,
19902 
19903   IX86_BUILTIN_PUNPCKHBW,
19904   IX86_BUILTIN_PUNPCKHWD,
19905   IX86_BUILTIN_PUNPCKHDQ,
19906   IX86_BUILTIN_PUNPCKLBW,
19907   IX86_BUILTIN_PUNPCKLWD,
19908   IX86_BUILTIN_PUNPCKLDQ,
19909 
19910   IX86_BUILTIN_SHUFPS,
19911 
19912   IX86_BUILTIN_RCPPS,
19913   IX86_BUILTIN_RCPSS,
19914   IX86_BUILTIN_RSQRTPS,
19915   IX86_BUILTIN_RSQRTPS_NR,
19916   IX86_BUILTIN_RSQRTSS,
19917   IX86_BUILTIN_RSQRTF,
19918   IX86_BUILTIN_SQRTPS,
19919   IX86_BUILTIN_SQRTPS_NR,
19920   IX86_BUILTIN_SQRTSS,
19921 
19922   IX86_BUILTIN_UNPCKHPS,
19923   IX86_BUILTIN_UNPCKLPS,
19924 
19925   IX86_BUILTIN_ANDPS,
19926   IX86_BUILTIN_ANDNPS,
19927   IX86_BUILTIN_ORPS,
19928   IX86_BUILTIN_XORPS,
19929 
19930   IX86_BUILTIN_EMMS,
19931   IX86_BUILTIN_LDMXCSR,
19932   IX86_BUILTIN_STMXCSR,
19933   IX86_BUILTIN_SFENCE,
19934 
19935   /* 3DNow! Original */
19936   IX86_BUILTIN_FEMMS,
19937   IX86_BUILTIN_PAVGUSB,
19938   IX86_BUILTIN_PF2ID,
19939   IX86_BUILTIN_PFACC,
19940   IX86_BUILTIN_PFADD,
19941   IX86_BUILTIN_PFCMPEQ,
19942   IX86_BUILTIN_PFCMPGE,
19943   IX86_BUILTIN_PFCMPGT,
19944   IX86_BUILTIN_PFMAX,
19945   IX86_BUILTIN_PFMIN,
19946   IX86_BUILTIN_PFMUL,
19947   IX86_BUILTIN_PFRCP,
19948   IX86_BUILTIN_PFRCPIT1,
19949   IX86_BUILTIN_PFRCPIT2,
19950   IX86_BUILTIN_PFRSQIT1,
19951   IX86_BUILTIN_PFRSQRT,
19952   IX86_BUILTIN_PFSUB,
19953   IX86_BUILTIN_PFSUBR,
19954   IX86_BUILTIN_PI2FD,
19955   IX86_BUILTIN_PMULHRW,
19956 
19957   /* 3DNow! Athlon Extensions */
19958   IX86_BUILTIN_PF2IW,
19959   IX86_BUILTIN_PFNACC,
19960   IX86_BUILTIN_PFPNACC,
19961   IX86_BUILTIN_PI2FW,
19962   IX86_BUILTIN_PSWAPDSI,
19963   IX86_BUILTIN_PSWAPDSF,
19964 
19965   /* SSE2 */
19966   IX86_BUILTIN_ADDPD,
19967   IX86_BUILTIN_ADDSD,
19968   IX86_BUILTIN_DIVPD,
19969   IX86_BUILTIN_DIVSD,
19970   IX86_BUILTIN_MULPD,
19971   IX86_BUILTIN_MULSD,
19972   IX86_BUILTIN_SUBPD,
19973   IX86_BUILTIN_SUBSD,
19974 
19975   IX86_BUILTIN_CMPEQPD,
19976   IX86_BUILTIN_CMPLTPD,
19977   IX86_BUILTIN_CMPLEPD,
19978   IX86_BUILTIN_CMPGTPD,
19979   IX86_BUILTIN_CMPGEPD,
19980   IX86_BUILTIN_CMPNEQPD,
19981   IX86_BUILTIN_CMPNLTPD,
19982   IX86_BUILTIN_CMPNLEPD,
19983   IX86_BUILTIN_CMPNGTPD,
19984   IX86_BUILTIN_CMPNGEPD,
19985   IX86_BUILTIN_CMPORDPD,
19986   IX86_BUILTIN_CMPUNORDPD,
19987   IX86_BUILTIN_CMPEQSD,
19988   IX86_BUILTIN_CMPLTSD,
19989   IX86_BUILTIN_CMPLESD,
19990   IX86_BUILTIN_CMPNEQSD,
19991   IX86_BUILTIN_CMPNLTSD,
19992   IX86_BUILTIN_CMPNLESD,
19993   IX86_BUILTIN_CMPORDSD,
19994   IX86_BUILTIN_CMPUNORDSD,
19995 
19996   IX86_BUILTIN_COMIEQSD,
19997   IX86_BUILTIN_COMILTSD,
19998   IX86_BUILTIN_COMILESD,
19999   IX86_BUILTIN_COMIGTSD,
20000   IX86_BUILTIN_COMIGESD,
20001   IX86_BUILTIN_COMINEQSD,
20002   IX86_BUILTIN_UCOMIEQSD,
20003   IX86_BUILTIN_UCOMILTSD,
20004   IX86_BUILTIN_UCOMILESD,
20005   IX86_BUILTIN_UCOMIGTSD,
20006   IX86_BUILTIN_UCOMIGESD,
20007   IX86_BUILTIN_UCOMINEQSD,
20008 
20009   IX86_BUILTIN_MAXPD,
20010   IX86_BUILTIN_MAXSD,
20011   IX86_BUILTIN_MINPD,
20012   IX86_BUILTIN_MINSD,
20013 
20014   IX86_BUILTIN_ANDPD,
20015   IX86_BUILTIN_ANDNPD,
20016   IX86_BUILTIN_ORPD,
20017   IX86_BUILTIN_XORPD,
20018 
20019   IX86_BUILTIN_SQRTPD,
20020   IX86_BUILTIN_SQRTSD,
20021 
20022   IX86_BUILTIN_UNPCKHPD,
20023   IX86_BUILTIN_UNPCKLPD,
20024 
20025   IX86_BUILTIN_SHUFPD,
20026 
20027   IX86_BUILTIN_LOADUPD,
20028   IX86_BUILTIN_STOREUPD,
20029   IX86_BUILTIN_MOVSD,
20030 
20031   IX86_BUILTIN_LOADHPD,
20032   IX86_BUILTIN_LOADLPD,
20033 
20034   IX86_BUILTIN_CVTDQ2PD,
20035   IX86_BUILTIN_CVTDQ2PS,
20036 
20037   IX86_BUILTIN_CVTPD2DQ,
20038   IX86_BUILTIN_CVTPD2PI,
20039   IX86_BUILTIN_CVTPD2PS,
20040   IX86_BUILTIN_CVTTPD2DQ,
20041   IX86_BUILTIN_CVTTPD2PI,
20042 
20043   IX86_BUILTIN_CVTPI2PD,
20044   IX86_BUILTIN_CVTSI2SD,
20045   IX86_BUILTIN_CVTSI642SD,
20046 
20047   IX86_BUILTIN_CVTSD2SI,
20048   IX86_BUILTIN_CVTSD2SI64,
20049   IX86_BUILTIN_CVTSD2SS,
20050   IX86_BUILTIN_CVTSS2SD,
20051   IX86_BUILTIN_CVTTSD2SI,
20052   IX86_BUILTIN_CVTTSD2SI64,
20053 
20054   IX86_BUILTIN_CVTPS2DQ,
20055   IX86_BUILTIN_CVTPS2PD,
20056   IX86_BUILTIN_CVTTPS2DQ,
20057 
20058   IX86_BUILTIN_MOVNTI,
20059   IX86_BUILTIN_MOVNTPD,
20060   IX86_BUILTIN_MOVNTDQ,
20061 
20062   IX86_BUILTIN_MOVQ128,
20063 
20064   /* SSE2 MMX */
20065   IX86_BUILTIN_MASKMOVDQU,
20066   IX86_BUILTIN_MOVMSKPD,
20067   IX86_BUILTIN_PMOVMSKB128,
20068 
20069   IX86_BUILTIN_PACKSSWB128,
20070   IX86_BUILTIN_PACKSSDW128,
20071   IX86_BUILTIN_PACKUSWB128,
20072 
20073   IX86_BUILTIN_PADDB128,
20074   IX86_BUILTIN_PADDW128,
20075   IX86_BUILTIN_PADDD128,
20076   IX86_BUILTIN_PADDQ128,
20077   IX86_BUILTIN_PADDSB128,
20078   IX86_BUILTIN_PADDSW128,
20079   IX86_BUILTIN_PADDUSB128,
20080   IX86_BUILTIN_PADDUSW128,
20081   IX86_BUILTIN_PSUBB128,
20082   IX86_BUILTIN_PSUBW128,
20083   IX86_BUILTIN_PSUBD128,
20084   IX86_BUILTIN_PSUBQ128,
20085   IX86_BUILTIN_PSUBSB128,
20086   IX86_BUILTIN_PSUBSW128,
20087   IX86_BUILTIN_PSUBUSB128,
20088   IX86_BUILTIN_PSUBUSW128,
20089 
20090   IX86_BUILTIN_PAND128,
20091   IX86_BUILTIN_PANDN128,
20092   IX86_BUILTIN_POR128,
20093   IX86_BUILTIN_PXOR128,
20094 
20095   IX86_BUILTIN_PAVGB128,
20096   IX86_BUILTIN_PAVGW128,
20097 
20098   IX86_BUILTIN_PCMPEQB128,
20099   IX86_BUILTIN_PCMPEQW128,
20100   IX86_BUILTIN_PCMPEQD128,
20101   IX86_BUILTIN_PCMPGTB128,
20102   IX86_BUILTIN_PCMPGTW128,
20103   IX86_BUILTIN_PCMPGTD128,
20104 
20105   IX86_BUILTIN_PMADDWD128,
20106 
20107   IX86_BUILTIN_PMAXSW128,
20108   IX86_BUILTIN_PMAXUB128,
20109   IX86_BUILTIN_PMINSW128,
20110   IX86_BUILTIN_PMINUB128,
20111 
20112   IX86_BUILTIN_PMULUDQ,
20113   IX86_BUILTIN_PMULUDQ128,
20114   IX86_BUILTIN_PMULHUW128,
20115   IX86_BUILTIN_PMULHW128,
20116   IX86_BUILTIN_PMULLW128,
20117 
20118   IX86_BUILTIN_PSADBW128,
20119   IX86_BUILTIN_PSHUFHW,
20120   IX86_BUILTIN_PSHUFLW,
20121   IX86_BUILTIN_PSHUFD,
20122 
20123   IX86_BUILTIN_PSLLDQI128,
20124   IX86_BUILTIN_PSLLWI128,
20125   IX86_BUILTIN_PSLLDI128,
20126   IX86_BUILTIN_PSLLQI128,
20127   IX86_BUILTIN_PSRAWI128,
20128   IX86_BUILTIN_PSRADI128,
20129   IX86_BUILTIN_PSRLDQI128,
20130   IX86_BUILTIN_PSRLWI128,
20131   IX86_BUILTIN_PSRLDI128,
20132   IX86_BUILTIN_PSRLQI128,
20133 
20134   IX86_BUILTIN_PSLLDQ128,
20135   IX86_BUILTIN_PSLLW128,
20136   IX86_BUILTIN_PSLLD128,
20137   IX86_BUILTIN_PSLLQ128,
20138   IX86_BUILTIN_PSRAW128,
20139   IX86_BUILTIN_PSRAD128,
20140   IX86_BUILTIN_PSRLW128,
20141   IX86_BUILTIN_PSRLD128,
20142   IX86_BUILTIN_PSRLQ128,
20143 
20144   IX86_BUILTIN_PUNPCKHBW128,
20145   IX86_BUILTIN_PUNPCKHWD128,
20146   IX86_BUILTIN_PUNPCKHDQ128,
20147   IX86_BUILTIN_PUNPCKHQDQ128,
20148   IX86_BUILTIN_PUNPCKLBW128,
20149   IX86_BUILTIN_PUNPCKLWD128,
20150   IX86_BUILTIN_PUNPCKLDQ128,
20151   IX86_BUILTIN_PUNPCKLQDQ128,
20152 
20153   IX86_BUILTIN_CLFLUSH,
20154   IX86_BUILTIN_MFENCE,
20155   IX86_BUILTIN_LFENCE,
20156 
20157   /* SSE3.  */
20158   IX86_BUILTIN_ADDSUBPS,
20159   IX86_BUILTIN_HADDPS,
20160   IX86_BUILTIN_HSUBPS,
20161   IX86_BUILTIN_MOVSHDUP,
20162   IX86_BUILTIN_MOVSLDUP,
20163   IX86_BUILTIN_ADDSUBPD,
20164   IX86_BUILTIN_HADDPD,
20165   IX86_BUILTIN_HSUBPD,
20166   IX86_BUILTIN_LDDQU,
20167 
20168   IX86_BUILTIN_MONITOR,
20169   IX86_BUILTIN_MWAIT,
20170 
20171   /* SSSE3.  */
20172   IX86_BUILTIN_PHADDW,
20173   IX86_BUILTIN_PHADDD,
20174   IX86_BUILTIN_PHADDSW,
20175   IX86_BUILTIN_PHSUBW,
20176   IX86_BUILTIN_PHSUBD,
20177   IX86_BUILTIN_PHSUBSW,
20178   IX86_BUILTIN_PMADDUBSW,
20179   IX86_BUILTIN_PMULHRSW,
20180   IX86_BUILTIN_PSHUFB,
20181   IX86_BUILTIN_PSIGNB,
20182   IX86_BUILTIN_PSIGNW,
20183   IX86_BUILTIN_PSIGND,
20184   IX86_BUILTIN_PALIGNR,
20185   IX86_BUILTIN_PABSB,
20186   IX86_BUILTIN_PABSW,
20187   IX86_BUILTIN_PABSD,
20188 
20189   IX86_BUILTIN_PHADDW128,
20190   IX86_BUILTIN_PHADDD128,
20191   IX86_BUILTIN_PHADDSW128,
20192   IX86_BUILTIN_PHSUBW128,
20193   IX86_BUILTIN_PHSUBD128,
20194   IX86_BUILTIN_PHSUBSW128,
20195   IX86_BUILTIN_PMADDUBSW128,
20196   IX86_BUILTIN_PMULHRSW128,
20197   IX86_BUILTIN_PSHUFB128,
20198   IX86_BUILTIN_PSIGNB128,
20199   IX86_BUILTIN_PSIGNW128,
20200   IX86_BUILTIN_PSIGND128,
20201   IX86_BUILTIN_PALIGNR128,
20202   IX86_BUILTIN_PABSB128,
20203   IX86_BUILTIN_PABSW128,
20204   IX86_BUILTIN_PABSD128,
20205 
20206   /* AMDFAM10 - SSE4A New Instructions.  */
20207   IX86_BUILTIN_MOVNTSD,
20208   IX86_BUILTIN_MOVNTSS,
20209   IX86_BUILTIN_EXTRQI,
20210   IX86_BUILTIN_EXTRQ,
20211   IX86_BUILTIN_INSERTQI,
20212   IX86_BUILTIN_INSERTQ,
20213 
20214   /* SSE4.1.  */
20215   IX86_BUILTIN_BLENDPD,
20216   IX86_BUILTIN_BLENDPS,
20217   IX86_BUILTIN_BLENDVPD,
20218   IX86_BUILTIN_BLENDVPS,
20219   IX86_BUILTIN_PBLENDVB128,
20220   IX86_BUILTIN_PBLENDW128,
20221 
20222   IX86_BUILTIN_DPPD,
20223   IX86_BUILTIN_DPPS,
20224 
20225   IX86_BUILTIN_INSERTPS128,
20226 
20227   IX86_BUILTIN_MOVNTDQA,
20228   IX86_BUILTIN_MPSADBW128,
20229   IX86_BUILTIN_PACKUSDW128,
20230   IX86_BUILTIN_PCMPEQQ,
20231   IX86_BUILTIN_PHMINPOSUW128,
20232 
20233   IX86_BUILTIN_PMAXSB128,
20234   IX86_BUILTIN_PMAXSD128,
20235   IX86_BUILTIN_PMAXUD128,
20236   IX86_BUILTIN_PMAXUW128,
20237 
20238   IX86_BUILTIN_PMINSB128,
20239   IX86_BUILTIN_PMINSD128,
20240   IX86_BUILTIN_PMINUD128,
20241   IX86_BUILTIN_PMINUW128,
20242 
20243   IX86_BUILTIN_PMOVSXBW128,
20244   IX86_BUILTIN_PMOVSXBD128,
20245   IX86_BUILTIN_PMOVSXBQ128,
20246   IX86_BUILTIN_PMOVSXWD128,
20247   IX86_BUILTIN_PMOVSXWQ128,
20248   IX86_BUILTIN_PMOVSXDQ128,
20249 
20250   IX86_BUILTIN_PMOVZXBW128,
20251   IX86_BUILTIN_PMOVZXBD128,
20252   IX86_BUILTIN_PMOVZXBQ128,
20253   IX86_BUILTIN_PMOVZXWD128,
20254   IX86_BUILTIN_PMOVZXWQ128,
20255   IX86_BUILTIN_PMOVZXDQ128,
20256 
20257   IX86_BUILTIN_PMULDQ128,
20258   IX86_BUILTIN_PMULLD128,
20259 
20260   IX86_BUILTIN_ROUNDPD,
20261   IX86_BUILTIN_ROUNDPS,
20262   IX86_BUILTIN_ROUNDSD,
20263   IX86_BUILTIN_ROUNDSS,
20264 
20265   IX86_BUILTIN_PTESTZ,
20266   IX86_BUILTIN_PTESTC,
20267   IX86_BUILTIN_PTESTNZC,
20268 
20269   IX86_BUILTIN_VEC_INIT_V2SI,
20270   IX86_BUILTIN_VEC_INIT_V4HI,
20271   IX86_BUILTIN_VEC_INIT_V8QI,
20272   IX86_BUILTIN_VEC_EXT_V2DF,
20273   IX86_BUILTIN_VEC_EXT_V2DI,
20274   IX86_BUILTIN_VEC_EXT_V4SF,
20275   IX86_BUILTIN_VEC_EXT_V4SI,
20276   IX86_BUILTIN_VEC_EXT_V8HI,
20277   IX86_BUILTIN_VEC_EXT_V2SI,
20278   IX86_BUILTIN_VEC_EXT_V4HI,
20279   IX86_BUILTIN_VEC_EXT_V16QI,
20280   IX86_BUILTIN_VEC_SET_V2DI,
20281   IX86_BUILTIN_VEC_SET_V4SF,
20282   IX86_BUILTIN_VEC_SET_V4SI,
20283   IX86_BUILTIN_VEC_SET_V8HI,
20284   IX86_BUILTIN_VEC_SET_V4HI,
20285   IX86_BUILTIN_VEC_SET_V16QI,
20286 
20287   IX86_BUILTIN_VEC_PACK_SFIX,
20288 
20289   /* SSE4.2.  */
20290   IX86_BUILTIN_CRC32QI,
20291   IX86_BUILTIN_CRC32HI,
20292   IX86_BUILTIN_CRC32SI,
20293   IX86_BUILTIN_CRC32DI,
20294 
20295   IX86_BUILTIN_PCMPESTRI128,
20296   IX86_BUILTIN_PCMPESTRM128,
20297   IX86_BUILTIN_PCMPESTRA128,
20298   IX86_BUILTIN_PCMPESTRC128,
20299   IX86_BUILTIN_PCMPESTRO128,
20300   IX86_BUILTIN_PCMPESTRS128,
20301   IX86_BUILTIN_PCMPESTRZ128,
20302   IX86_BUILTIN_PCMPISTRI128,
20303   IX86_BUILTIN_PCMPISTRM128,
20304   IX86_BUILTIN_PCMPISTRA128,
20305   IX86_BUILTIN_PCMPISTRC128,
20306   IX86_BUILTIN_PCMPISTRO128,
20307   IX86_BUILTIN_PCMPISTRS128,
20308   IX86_BUILTIN_PCMPISTRZ128,
20309 
20310   IX86_BUILTIN_PCMPGTQ,
20311 
20312   /* AES instructions */
20313   IX86_BUILTIN_AESENC128,
20314   IX86_BUILTIN_AESENCLAST128,
20315   IX86_BUILTIN_AESDEC128,
20316   IX86_BUILTIN_AESDECLAST128,
20317   IX86_BUILTIN_AESIMC128,
20318   IX86_BUILTIN_AESKEYGENASSIST128,
20319 
20320   /* PCLMUL instruction */
20321   IX86_BUILTIN_PCLMULQDQ128,
20322 
20323   /* AVX */
20324   IX86_BUILTIN_ADDPD256,
20325   IX86_BUILTIN_ADDPS256,
20326   IX86_BUILTIN_ADDSUBPD256,
20327   IX86_BUILTIN_ADDSUBPS256,
20328   IX86_BUILTIN_ANDPD256,
20329   IX86_BUILTIN_ANDPS256,
20330   IX86_BUILTIN_ANDNPD256,
20331   IX86_BUILTIN_ANDNPS256,
20332   IX86_BUILTIN_BLENDPD256,
20333   IX86_BUILTIN_BLENDPS256,
20334   IX86_BUILTIN_BLENDVPD256,
20335   IX86_BUILTIN_BLENDVPS256,
20336   IX86_BUILTIN_DIVPD256,
20337   IX86_BUILTIN_DIVPS256,
20338   IX86_BUILTIN_DPPS256,
20339   IX86_BUILTIN_HADDPD256,
20340   IX86_BUILTIN_HADDPS256,
20341   IX86_BUILTIN_HSUBPD256,
20342   IX86_BUILTIN_HSUBPS256,
20343   IX86_BUILTIN_MAXPD256,
20344   IX86_BUILTIN_MAXPS256,
20345   IX86_BUILTIN_MINPD256,
20346   IX86_BUILTIN_MINPS256,
20347   IX86_BUILTIN_MULPD256,
20348   IX86_BUILTIN_MULPS256,
20349   IX86_BUILTIN_ORPD256,
20350   IX86_BUILTIN_ORPS256,
20351   IX86_BUILTIN_SHUFPD256,
20352   IX86_BUILTIN_SHUFPS256,
20353   IX86_BUILTIN_SUBPD256,
20354   IX86_BUILTIN_SUBPS256,
20355   IX86_BUILTIN_XORPD256,
20356   IX86_BUILTIN_XORPS256,
20357   IX86_BUILTIN_CMPSD,
20358   IX86_BUILTIN_CMPSS,
20359   IX86_BUILTIN_CMPPD,
20360   IX86_BUILTIN_CMPPS,
20361   IX86_BUILTIN_CMPPD256,
20362   IX86_BUILTIN_CMPPS256,
20363   IX86_BUILTIN_CVTDQ2PD256,
20364   IX86_BUILTIN_CVTDQ2PS256,
20365   IX86_BUILTIN_CVTPD2PS256,
20366   IX86_BUILTIN_CVTPS2DQ256,
20367   IX86_BUILTIN_CVTPS2PD256,
20368   IX86_BUILTIN_CVTTPD2DQ256,
20369   IX86_BUILTIN_CVTPD2DQ256,
20370   IX86_BUILTIN_CVTTPS2DQ256,
20371   IX86_BUILTIN_EXTRACTF128PD256,
20372   IX86_BUILTIN_EXTRACTF128PS256,
20373   IX86_BUILTIN_EXTRACTF128SI256,
20374   IX86_BUILTIN_VZEROALL,
20375   IX86_BUILTIN_VZEROUPPER,
20376   IX86_BUILTIN_VZEROUPPER_REX64,
20377   IX86_BUILTIN_VPERMILVARPD,
20378   IX86_BUILTIN_VPERMILVARPS,
20379   IX86_BUILTIN_VPERMILVARPD256,
20380   IX86_BUILTIN_VPERMILVARPS256,
20381   IX86_BUILTIN_VPERMILPD,
20382   IX86_BUILTIN_VPERMILPS,
20383   IX86_BUILTIN_VPERMILPD256,
20384   IX86_BUILTIN_VPERMILPS256,
20385   IX86_BUILTIN_VPERM2F128PD256,
20386   IX86_BUILTIN_VPERM2F128PS256,
20387   IX86_BUILTIN_VPERM2F128SI256,
20388   IX86_BUILTIN_VBROADCASTSS,
20389   IX86_BUILTIN_VBROADCASTSD256,
20390   IX86_BUILTIN_VBROADCASTSS256,
20391   IX86_BUILTIN_VBROADCASTPD256,
20392   IX86_BUILTIN_VBROADCASTPS256,
20393   IX86_BUILTIN_VINSERTF128PD256,
20394   IX86_BUILTIN_VINSERTF128PS256,
20395   IX86_BUILTIN_VINSERTF128SI256,
20396   IX86_BUILTIN_LOADUPD256,
20397   IX86_BUILTIN_LOADUPS256,
20398   IX86_BUILTIN_STOREUPD256,
20399   IX86_BUILTIN_STOREUPS256,
20400   IX86_BUILTIN_LDDQU256,
20401   IX86_BUILTIN_MOVNTDQ256,
20402   IX86_BUILTIN_MOVNTPD256,
20403   IX86_BUILTIN_MOVNTPS256,
20404   IX86_BUILTIN_LOADDQU256,
20405   IX86_BUILTIN_STOREDQU256,
20406   IX86_BUILTIN_MASKLOADPD,
20407   IX86_BUILTIN_MASKLOADPS,
20408   IX86_BUILTIN_MASKSTOREPD,
20409   IX86_BUILTIN_MASKSTOREPS,
20410   IX86_BUILTIN_MASKLOADPD256,
20411   IX86_BUILTIN_MASKLOADPS256,
20412   IX86_BUILTIN_MASKSTOREPD256,
20413   IX86_BUILTIN_MASKSTOREPS256,
20414   IX86_BUILTIN_MOVSHDUP256,
20415   IX86_BUILTIN_MOVSLDUP256,
20416   IX86_BUILTIN_MOVDDUP256,
20417 
20418   IX86_BUILTIN_SQRTPD256,
20419   IX86_BUILTIN_SQRTPS256,
20420   IX86_BUILTIN_SQRTPS_NR256,
20421   IX86_BUILTIN_RSQRTPS256,
20422   IX86_BUILTIN_RSQRTPS_NR256,
20423 
20424   IX86_BUILTIN_RCPPS256,
20425 
20426   IX86_BUILTIN_ROUNDPD256,
20427   IX86_BUILTIN_ROUNDPS256,
20428 
20429   IX86_BUILTIN_UNPCKHPD256,
20430   IX86_BUILTIN_UNPCKLPD256,
20431   IX86_BUILTIN_UNPCKHPS256,
20432   IX86_BUILTIN_UNPCKLPS256,
20433 
20434   IX86_BUILTIN_SI256_SI,
20435   IX86_BUILTIN_PS256_PS,
20436   IX86_BUILTIN_PD256_PD,
20437   IX86_BUILTIN_SI_SI256,
20438   IX86_BUILTIN_PS_PS256,
20439   IX86_BUILTIN_PD_PD256,
20440 
20441   IX86_BUILTIN_VTESTZPD,
20442   IX86_BUILTIN_VTESTCPD,
20443   IX86_BUILTIN_VTESTNZCPD,
20444   IX86_BUILTIN_VTESTZPS,
20445   IX86_BUILTIN_VTESTCPS,
20446   IX86_BUILTIN_VTESTNZCPS,
20447   IX86_BUILTIN_VTESTZPD256,
20448   IX86_BUILTIN_VTESTCPD256,
20449   IX86_BUILTIN_VTESTNZCPD256,
20450   IX86_BUILTIN_VTESTZPS256,
20451   IX86_BUILTIN_VTESTCPS256,
20452   IX86_BUILTIN_VTESTNZCPS256,
20453   IX86_BUILTIN_PTESTZ256,
20454   IX86_BUILTIN_PTESTC256,
20455   IX86_BUILTIN_PTESTNZC256,
20456 
20457   IX86_BUILTIN_MOVMSKPD256,
20458   IX86_BUILTIN_MOVMSKPS256,
20459 
20460   /* TFmode support builtins.  */
20461   IX86_BUILTIN_INFQ,
20462   IX86_BUILTIN_FABSQ,
20463   IX86_BUILTIN_COPYSIGNQ,
20464 
20465   /* SSE5 instructions */
20466   IX86_BUILTIN_FMADDSS,
20467   IX86_BUILTIN_FMADDSD,
20468   IX86_BUILTIN_FMADDPS,
20469   IX86_BUILTIN_FMADDPD,
20470   IX86_BUILTIN_FMSUBSS,
20471   IX86_BUILTIN_FMSUBSD,
20472   IX86_BUILTIN_FMSUBPS,
20473   IX86_BUILTIN_FMSUBPD,
20474   IX86_BUILTIN_FNMADDSS,
20475   IX86_BUILTIN_FNMADDSD,
20476   IX86_BUILTIN_FNMADDPS,
20477   IX86_BUILTIN_FNMADDPD,
20478   IX86_BUILTIN_FNMSUBSS,
20479   IX86_BUILTIN_FNMSUBSD,
20480   IX86_BUILTIN_FNMSUBPS,
20481   IX86_BUILTIN_FNMSUBPD,
20482   IX86_BUILTIN_PCMOV,
20483   IX86_BUILTIN_PCMOV_V2DI,
20484   IX86_BUILTIN_PCMOV_V4SI,
20485   IX86_BUILTIN_PCMOV_V8HI,
20486   IX86_BUILTIN_PCMOV_V16QI,
20487   IX86_BUILTIN_PCMOV_V4SF,
20488   IX86_BUILTIN_PCMOV_V2DF,
20489   IX86_BUILTIN_PPERM,
20490   IX86_BUILTIN_PERMPS,
20491   IX86_BUILTIN_PERMPD,
20492   IX86_BUILTIN_PMACSSWW,
20493   IX86_BUILTIN_PMACSWW,
20494   IX86_BUILTIN_PMACSSWD,
20495   IX86_BUILTIN_PMACSWD,
20496   IX86_BUILTIN_PMACSSDD,
20497   IX86_BUILTIN_PMACSDD,
20498   IX86_BUILTIN_PMACSSDQL,
20499   IX86_BUILTIN_PMACSSDQH,
20500   IX86_BUILTIN_PMACSDQL,
20501   IX86_BUILTIN_PMACSDQH,
20502   IX86_BUILTIN_PMADCSSWD,
20503   IX86_BUILTIN_PMADCSWD,
20504   IX86_BUILTIN_PHADDBW,
20505   IX86_BUILTIN_PHADDBD,
20506   IX86_BUILTIN_PHADDBQ,
20507   IX86_BUILTIN_PHADDWD,
20508   IX86_BUILTIN_PHADDWQ,
20509   IX86_BUILTIN_PHADDDQ,
20510   IX86_BUILTIN_PHADDUBW,
20511   IX86_BUILTIN_PHADDUBD,
20512   IX86_BUILTIN_PHADDUBQ,
20513   IX86_BUILTIN_PHADDUWD,
20514   IX86_BUILTIN_PHADDUWQ,
20515   IX86_BUILTIN_PHADDUDQ,
20516   IX86_BUILTIN_PHSUBBW,
20517   IX86_BUILTIN_PHSUBWD,
20518   IX86_BUILTIN_PHSUBDQ,
20519   IX86_BUILTIN_PROTB,
20520   IX86_BUILTIN_PROTW,
20521   IX86_BUILTIN_PROTD,
20522   IX86_BUILTIN_PROTQ,
20523   IX86_BUILTIN_PROTB_IMM,
20524   IX86_BUILTIN_PROTW_IMM,
20525   IX86_BUILTIN_PROTD_IMM,
20526   IX86_BUILTIN_PROTQ_IMM,
20527   IX86_BUILTIN_PSHLB,
20528   IX86_BUILTIN_PSHLW,
20529   IX86_BUILTIN_PSHLD,
20530   IX86_BUILTIN_PSHLQ,
20531   IX86_BUILTIN_PSHAB,
20532   IX86_BUILTIN_PSHAW,
20533   IX86_BUILTIN_PSHAD,
20534   IX86_BUILTIN_PSHAQ,
20535   IX86_BUILTIN_FRCZSS,
20536   IX86_BUILTIN_FRCZSD,
20537   IX86_BUILTIN_FRCZPS,
20538   IX86_BUILTIN_FRCZPD,
20539   IX86_BUILTIN_CVTPH2PS,
20540   IX86_BUILTIN_CVTPS2PH,
20541 
20542   IX86_BUILTIN_COMEQSS,
20543   IX86_BUILTIN_COMNESS,
20544   IX86_BUILTIN_COMLTSS,
20545   IX86_BUILTIN_COMLESS,
20546   IX86_BUILTIN_COMGTSS,
20547   IX86_BUILTIN_COMGESS,
20548   IX86_BUILTIN_COMUEQSS,
20549   IX86_BUILTIN_COMUNESS,
20550   IX86_BUILTIN_COMULTSS,
20551   IX86_BUILTIN_COMULESS,
20552   IX86_BUILTIN_COMUGTSS,
20553   IX86_BUILTIN_COMUGESS,
20554   IX86_BUILTIN_COMORDSS,
20555   IX86_BUILTIN_COMUNORDSS,
20556   IX86_BUILTIN_COMFALSESS,
20557   IX86_BUILTIN_COMTRUESS,
20558 
20559   IX86_BUILTIN_COMEQSD,
20560   IX86_BUILTIN_COMNESD,
20561   IX86_BUILTIN_COMLTSD,
20562   IX86_BUILTIN_COMLESD,
20563   IX86_BUILTIN_COMGTSD,
20564   IX86_BUILTIN_COMGESD,
20565   IX86_BUILTIN_COMUEQSD,
20566   IX86_BUILTIN_COMUNESD,
20567   IX86_BUILTIN_COMULTSD,
20568   IX86_BUILTIN_COMULESD,
20569   IX86_BUILTIN_COMUGTSD,
20570   IX86_BUILTIN_COMUGESD,
20571   IX86_BUILTIN_COMORDSD,
20572   IX86_BUILTIN_COMUNORDSD,
20573   IX86_BUILTIN_COMFALSESD,
20574   IX86_BUILTIN_COMTRUESD,
20575 
20576   IX86_BUILTIN_COMEQPS,
20577   IX86_BUILTIN_COMNEPS,
20578   IX86_BUILTIN_COMLTPS,
20579   IX86_BUILTIN_COMLEPS,
20580   IX86_BUILTIN_COMGTPS,
20581   IX86_BUILTIN_COMGEPS,
20582   IX86_BUILTIN_COMUEQPS,
20583   IX86_BUILTIN_COMUNEPS,
20584   IX86_BUILTIN_COMULTPS,
20585   IX86_BUILTIN_COMULEPS,
20586   IX86_BUILTIN_COMUGTPS,
20587   IX86_BUILTIN_COMUGEPS,
20588   IX86_BUILTIN_COMORDPS,
20589   IX86_BUILTIN_COMUNORDPS,
20590   IX86_BUILTIN_COMFALSEPS,
20591   IX86_BUILTIN_COMTRUEPS,
20592 
20593   IX86_BUILTIN_COMEQPD,
20594   IX86_BUILTIN_COMNEPD,
20595   IX86_BUILTIN_COMLTPD,
20596   IX86_BUILTIN_COMLEPD,
20597   IX86_BUILTIN_COMGTPD,
20598   IX86_BUILTIN_COMGEPD,
20599   IX86_BUILTIN_COMUEQPD,
20600   IX86_BUILTIN_COMUNEPD,
20601   IX86_BUILTIN_COMULTPD,
20602   IX86_BUILTIN_COMULEPD,
20603   IX86_BUILTIN_COMUGTPD,
20604   IX86_BUILTIN_COMUGEPD,
20605   IX86_BUILTIN_COMORDPD,
20606   IX86_BUILTIN_COMUNORDPD,
20607   IX86_BUILTIN_COMFALSEPD,
20608   IX86_BUILTIN_COMTRUEPD,
20609 
20610   IX86_BUILTIN_PCOMEQUB,
20611   IX86_BUILTIN_PCOMNEUB,
20612   IX86_BUILTIN_PCOMLTUB,
20613   IX86_BUILTIN_PCOMLEUB,
20614   IX86_BUILTIN_PCOMGTUB,
20615   IX86_BUILTIN_PCOMGEUB,
20616   IX86_BUILTIN_PCOMFALSEUB,
20617   IX86_BUILTIN_PCOMTRUEUB,
20618   IX86_BUILTIN_PCOMEQUW,
20619   IX86_BUILTIN_PCOMNEUW,
20620   IX86_BUILTIN_PCOMLTUW,
20621   IX86_BUILTIN_PCOMLEUW,
20622   IX86_BUILTIN_PCOMGTUW,
20623   IX86_BUILTIN_PCOMGEUW,
20624   IX86_BUILTIN_PCOMFALSEUW,
20625   IX86_BUILTIN_PCOMTRUEUW,
20626   IX86_BUILTIN_PCOMEQUD,
20627   IX86_BUILTIN_PCOMNEUD,
20628   IX86_BUILTIN_PCOMLTUD,
20629   IX86_BUILTIN_PCOMLEUD,
20630   IX86_BUILTIN_PCOMGTUD,
20631   IX86_BUILTIN_PCOMGEUD,
20632   IX86_BUILTIN_PCOMFALSEUD,
20633   IX86_BUILTIN_PCOMTRUEUD,
20634   IX86_BUILTIN_PCOMEQUQ,
20635   IX86_BUILTIN_PCOMNEUQ,
20636   IX86_BUILTIN_PCOMLTUQ,
20637   IX86_BUILTIN_PCOMLEUQ,
20638   IX86_BUILTIN_PCOMGTUQ,
20639   IX86_BUILTIN_PCOMGEUQ,
20640   IX86_BUILTIN_PCOMFALSEUQ,
20641   IX86_BUILTIN_PCOMTRUEUQ,
20642 
20643   IX86_BUILTIN_PCOMEQB,
20644   IX86_BUILTIN_PCOMNEB,
20645   IX86_BUILTIN_PCOMLTB,
20646   IX86_BUILTIN_PCOMLEB,
20647   IX86_BUILTIN_PCOMGTB,
20648   IX86_BUILTIN_PCOMGEB,
20649   IX86_BUILTIN_PCOMFALSEB,
20650   IX86_BUILTIN_PCOMTRUEB,
20651   IX86_BUILTIN_PCOMEQW,
20652   IX86_BUILTIN_PCOMNEW,
20653   IX86_BUILTIN_PCOMLTW,
20654   IX86_BUILTIN_PCOMLEW,
20655   IX86_BUILTIN_PCOMGTW,
20656   IX86_BUILTIN_PCOMGEW,
20657   IX86_BUILTIN_PCOMFALSEW,
20658   IX86_BUILTIN_PCOMTRUEW,
20659   IX86_BUILTIN_PCOMEQD,
20660   IX86_BUILTIN_PCOMNED,
20661   IX86_BUILTIN_PCOMLTD,
20662   IX86_BUILTIN_PCOMLED,
20663   IX86_BUILTIN_PCOMGTD,
20664   IX86_BUILTIN_PCOMGED,
20665   IX86_BUILTIN_PCOMFALSED,
20666   IX86_BUILTIN_PCOMTRUED,
20667   IX86_BUILTIN_PCOMEQQ,
20668   IX86_BUILTIN_PCOMNEQ,
20669   IX86_BUILTIN_PCOMLTQ,
20670   IX86_BUILTIN_PCOMLEQ,
20671   IX86_BUILTIN_PCOMGTQ,
20672   IX86_BUILTIN_PCOMGEQ,
20673   IX86_BUILTIN_PCOMFALSEQ,
20674   IX86_BUILTIN_PCOMTRUEQ,
20675 
20676   IX86_BUILTIN_MAX
20677 };
20678 
20679 /* Table for the ix86 builtin decls.  */
20680 static GTY(()) tree ix86_builtins[(int) IX86_BUILTIN_MAX];
20681 
20682 /* Table of all of the builtin functions that are possible with different ISA's
20683    but are waiting to be built until a function is declared to use that
20684    ISA.  */
20685 struct builtin_isa GTY(())
20686 {
20687   tree type;                    /* builtin type to use in the declaration */
20688   const char *name;             /* function name */
20689   int isa;                      /* isa_flags this builtin is defined for */
20690   bool const_p;                 /* true if the declaration is constant */
20691 };
20692 
20693 static GTY(()) struct builtin_isa ix86_builtins_isa[(int) IX86_BUILTIN_MAX];
20694 
20695 
20696 /* Add an ix86 target builtin function with CODE, NAME and TYPE.  Save the MASK
20697  * of which isa_flags to use in the ix86_builtins_isa array.  Stores the
20698  * function decl in the ix86_builtins array.  Returns the function decl or
20699  * NULL_TREE, if the builtin was not added.
20700  *
20701  * If the front end has a special hook for builtin functions, delay adding
20702  * builtin functions that aren't in the current ISA until the ISA is changed
20703  * with function specific optimization.  Doing so, can save about 300K for the
20704  * default compiler.  When the builtin is expanded, check at that time whether
20705  * it is valid.
20706  *
20707  * If the front end doesn't have a special hook, record all builtins, even if
20708  * it isn't an instruction set in the current ISA in case the user uses
20709  * function specific options for a different ISA, so that we don't get scope
20710  * errors if a builtin is added in the middle of a function scope.  */
20711 
20712 static inline tree
20713 def_builtin (int mask, const char *name, tree type, enum ix86_builtins code)
20714 {
20715   tree decl = NULL_TREE;
20716 
20717   if (!(mask & OPTION_MASK_ISA_64BIT) || TARGET_64BIT)
20718     {
20719       ix86_builtins_isa[(int) code].isa = mask;
20720 
20721       if ((mask & ix86_isa_flags) != 0
20722           || (lang_hooks.builtin_function
20723               == lang_hooks.builtin_function_ext_scope))
20724 
20725         {
20726           decl = add_builtin_function (name, type, code, BUILT_IN_MD, NULL,
20727                                        NULL_TREE);
20728           ix86_builtins[(int) code] = decl;
20729           ix86_builtins_isa[(int) code].type = NULL_TREE;
20730         }
20731       else
20732         {
20733           ix86_builtins[(int) code] = NULL_TREE;
20734           ix86_builtins_isa[(int) code].const_p = false;
20735           ix86_builtins_isa[(int) code].type = type;
20736           ix86_builtins_isa[(int) code].name = name;
20737         }
20738     }
20739 
20740   return decl;
20741 }
20742 
20743 /* Like def_builtin, but also marks the function decl "const".  */
20744 
20745 static inline tree
20746 def_builtin_const (int mask, const char *name, tree type,
20747                    enum ix86_builtins code)
20748 {
20749   tree decl = def_builtin (mask, name, type, code);
20750   if (decl)
20751     TREE_READONLY (decl) = 1;
20752   else
20753     ix86_builtins_isa[(int) code].const_p = true;
20754 
20755   return decl;
20756 }
20757 
20758 /* Add any new builtin functions for a given ISA that may not have been
20759    declared.  This saves a bit of space compared to adding all of the
20760    declarations to the tree, even if we didn't use them.  */
20761 
20762 static void
20763 ix86_add_new_builtins (int isa)
20764 {
20765   int i;
20766   tree decl;
20767 
20768   for (i = 0; i < (int)IX86_BUILTIN_MAX; i++)
20769     {
20770       if ((ix86_builtins_isa[i].isa & isa) != 0
20771           && ix86_builtins_isa[i].type != NULL_TREE)
20772         {
20773           decl = add_builtin_function_ext_scope (ix86_builtins_isa[i].name,
20774                                                  ix86_builtins_isa[i].type,
20775                                                  i, BUILT_IN_MD, NULL,
20776                                                  NULL_TREE);
20777 
20778           ix86_builtins[i] = decl;
20779           ix86_builtins_isa[i].type = NULL_TREE;
20780           if (ix86_builtins_isa[i].const_p)
20781             TREE_READONLY (decl) = 1;
20782         }
20783     }
20784 }
20785 
20786 /* Bits for builtin_description.flag.  */
20787 
20788 /* Set when we don't support the comparison natively, and should
20789    swap_comparison in order to support it.  */
20790 #define BUILTIN_DESC_SWAP_OPERANDS      1
20791 
20792 struct builtin_description
20793 {
20794   const unsigned int mask;
20795   const enum insn_code icode;
20796   const char *const name;
20797   const enum ix86_builtins code;
20798   const enum rtx_code comparison;
20799   const int flag;
20800 };
20801 
20802 static const struct builtin_description bdesc_comi[] =
20803 {
20804   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
20805   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
20806   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
20807   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
20808   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
20809   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
20810   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
20811   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
20812   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
20813   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
20814   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
20815   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
20816   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
20817   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
20818   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
20819   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
20820   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
20821   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
20822   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
20823   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
20824   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
20825   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
20826   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
20827   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
20828 };
20829 
20830 static const struct builtin_description bdesc_pcmpestr[] =
20831 {
20832   /* SSE4.2 */
20833   { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestri128", IX86_BUILTIN_PCMPESTRI128, UNKNOWN, 0 },
20834   { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrm128", IX86_BUILTIN_PCMPESTRM128, UNKNOWN, 0 },
20835   { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestria128", IX86_BUILTIN_PCMPESTRA128, UNKNOWN, (int) CCAmode },
20836   { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestric128", IX86_BUILTIN_PCMPESTRC128, UNKNOWN, (int) CCCmode },
20837   { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrio128", IX86_BUILTIN_PCMPESTRO128, UNKNOWN, (int) CCOmode },
20838   { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestris128", IX86_BUILTIN_PCMPESTRS128, UNKNOWN, (int) CCSmode },
20839   { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestriz128", IX86_BUILTIN_PCMPESTRZ128, UNKNOWN, (int) CCZmode },
20840 };
20841 
20842 static const struct builtin_description bdesc_pcmpistr[] =
20843 {
20844   /* SSE4.2 */
20845   { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistri128", IX86_BUILTIN_PCMPISTRI128, UNKNOWN, 0 },
20846   { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrm128", IX86_BUILTIN_PCMPISTRM128, UNKNOWN, 0 },
20847   { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistria128", IX86_BUILTIN_PCMPISTRA128, UNKNOWN, (int) CCAmode },
20848   { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistric128", IX86_BUILTIN_PCMPISTRC128, UNKNOWN, (int) CCCmode },
20849   { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrio128", IX86_BUILTIN_PCMPISTRO128, UNKNOWN, (int) CCOmode },
20850   { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistris128", IX86_BUILTIN_PCMPISTRS128, UNKNOWN, (int) CCSmode },
20851   { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistriz128", IX86_BUILTIN_PCMPISTRZ128, UNKNOWN, (int) CCZmode },
20852 };
20853 
20854 /* Special builtin types */
20855 enum ix86_special_builtin_type
20856 {
20857   SPECIAL_FTYPE_UNKNOWN,
20858   VOID_FTYPE_VOID,
20859   V32QI_FTYPE_PCCHAR,
20860   V16QI_FTYPE_PCCHAR,
20861   V8SF_FTYPE_PCV4SF,
20862   V8SF_FTYPE_PCFLOAT,
20863   V4DF_FTYPE_PCV2DF,
20864   V4DF_FTYPE_PCDOUBLE,
20865   V4SF_FTYPE_PCFLOAT,
20866   V2DF_FTYPE_PCDOUBLE,
20867   V8SF_FTYPE_PCV8SF_V8SF,
20868   V4DF_FTYPE_PCV4DF_V4DF,
20869   V4SF_FTYPE_V4SF_PCV2SF,
20870   V4SF_FTYPE_PCV4SF_V4SF,
20871   V2DF_FTYPE_V2DF_PCDOUBLE,
20872   V2DF_FTYPE_PCV2DF_V2DF,
20873   V2DI_FTYPE_PV2DI,
20874   VOID_FTYPE_PV2SF_V4SF,
20875   VOID_FTYPE_PV4DI_V4DI,
20876   VOID_FTYPE_PV2DI_V2DI,
20877   VOID_FTYPE_PCHAR_V32QI,
20878   VOID_FTYPE_PCHAR_V16QI,
20879   VOID_FTYPE_PFLOAT_V8SF,
20880   VOID_FTYPE_PFLOAT_V4SF,
20881   VOID_FTYPE_PDOUBLE_V4DF,
20882   VOID_FTYPE_PDOUBLE_V2DF,
20883   VOID_FTYPE_PDI_DI,
20884   VOID_FTYPE_PINT_INT,
20885   VOID_FTYPE_PV8SF_V8SF_V8SF,
20886   VOID_FTYPE_PV4DF_V4DF_V4DF,
20887   VOID_FTYPE_PV4SF_V4SF_V4SF,
20888   VOID_FTYPE_PV2DF_V2DF_V2DF
20889 };
20890 
20891 /* Builtin types */
20892 enum ix86_builtin_type
20893 {
20894   FTYPE_UNKNOWN,
20895   FLOAT128_FTYPE_FLOAT128,
20896   FLOAT_FTYPE_FLOAT,
20897   FLOAT128_FTYPE_FLOAT128_FLOAT128,
20898   INT_FTYPE_V8SF_V8SF_PTEST,
20899   INT_FTYPE_V4DI_V4DI_PTEST,
20900   INT_FTYPE_V4DF_V4DF_PTEST,
20901   INT_FTYPE_V4SF_V4SF_PTEST,
20902   INT_FTYPE_V2DI_V2DI_PTEST,
20903   INT_FTYPE_V2DF_V2DF_PTEST,
20904   INT64_FTYPE_V4SF,
20905   INT64_FTYPE_V2DF,
20906   INT_FTYPE_V16QI,
20907   INT_FTYPE_V8QI,
20908   INT_FTYPE_V8SF,
20909   INT_FTYPE_V4DF,
20910   INT_FTYPE_V4SF,
20911   INT_FTYPE_V2DF,
20912   V16QI_FTYPE_V16QI,
20913   V8SI_FTYPE_V8SF,
20914   V8SI_FTYPE_V4SI,
20915   V8HI_FTYPE_V8HI,
20916   V8HI_FTYPE_V16QI,
20917   V8QI_FTYPE_V8QI,
20918   V8SF_FTYPE_V8SF,
20919   V8SF_FTYPE_V8SI,
20920   V8SF_FTYPE_V4SF,
20921   V4SI_FTYPE_V4SI,
20922   V4SI_FTYPE_V16QI,
20923   V4SI_FTYPE_V8SI,
20924   V4SI_FTYPE_V8HI,
20925   V4SI_FTYPE_V4DF,
20926   V4SI_FTYPE_V4SF,
20927   V4SI_FTYPE_V2DF,
20928   V4HI_FTYPE_V4HI,
20929   V4DF_FTYPE_V4DF,
20930   V4DF_FTYPE_V4SI,
20931   V4DF_FTYPE_V4SF,
20932   V4DF_FTYPE_V2DF,
20933   V4SF_FTYPE_V4DF,
20934   V4SF_FTYPE_V4SF,
20935   V4SF_FTYPE_V4SF_VEC_MERGE,
20936   V4SF_FTYPE_V8SF,
20937   V4SF_FTYPE_V4SI,
20938   V4SF_FTYPE_V2DF,
20939   V2DI_FTYPE_V2DI,
20940   V2DI_FTYPE_V16QI,
20941   V2DI_FTYPE_V8HI,
20942   V2DI_FTYPE_V4SI,
20943   V2DF_FTYPE_V2DF,
20944   V2DF_FTYPE_V2DF_VEC_MERGE,
20945   V2DF_FTYPE_V4SI,
20946   V2DF_FTYPE_V4DF,
20947   V2DF_FTYPE_V4SF,
20948   V2DF_FTYPE_V2SI,
20949   V2SI_FTYPE_V2SI,
20950   V2SI_FTYPE_V4SF,
20951   V2SI_FTYPE_V2SF,
20952   V2SI_FTYPE_V2DF,
20953   V2SF_FTYPE_V2SF,
20954   V2SF_FTYPE_V2SI,
20955   V16QI_FTYPE_V16QI_V16QI,
20956   V16QI_FTYPE_V8HI_V8HI,
20957   V8QI_FTYPE_V8QI_V8QI,
20958   V8QI_FTYPE_V4HI_V4HI,
20959   V8HI_FTYPE_V8HI_V8HI,
20960   V8HI_FTYPE_V8HI_V8HI_COUNT,
20961   V8HI_FTYPE_V16QI_V16QI,
20962   V8HI_FTYPE_V4SI_V4SI,
20963   V8HI_FTYPE_V8HI_SI_COUNT,
20964   V8SF_FTYPE_V8SF_V8SF,
20965   V8SF_FTYPE_V8SF_V8SI,
20966   V4SI_FTYPE_V4SI_V4SI,
20967   V4SI_FTYPE_V4SI_V4SI_COUNT,
20968   V4SI_FTYPE_V8HI_V8HI,
20969   V4SI_FTYPE_V4SF_V4SF,
20970   V4SI_FTYPE_V2DF_V2DF,
20971   V4SI_FTYPE_V4SI_SI_COUNT,
20972   V4HI_FTYPE_V4HI_V4HI,
20973   V4HI_FTYPE_V4HI_V4HI_COUNT,
20974   V4HI_FTYPE_V8QI_V8QI,
20975   V4HI_FTYPE_V2SI_V2SI,
20976   V4HI_FTYPE_V4HI_SI_COUNT,
20977   V4DF_FTYPE_V4DF_V4DF,
20978   V4DF_FTYPE_V4DF_V4DI,
20979   V4SF_FTYPE_V4SF_V4SF,
20980   V4SF_FTYPE_V4SF_V4SF_SWAP,
20981   V4SF_FTYPE_V4SF_V4SI,
20982   V4SF_FTYPE_V4SF_V2SI,
20983   V4SF_FTYPE_V4SF_V2DF,
20984   V4SF_FTYPE_V4SF_DI,
20985   V4SF_FTYPE_V4SF_SI,
20986   V2DI_FTYPE_V2DI_V2DI,
20987   V2DI_FTYPE_V2DI_V2DI_COUNT,
20988   V2DI_FTYPE_V16QI_V16QI,
20989   V2DI_FTYPE_V4SI_V4SI,
20990   V2DI_FTYPE_V2DI_V16QI,
20991   V2DI_FTYPE_V2DF_V2DF,
20992   V2DI_FTYPE_V2DI_SI_COUNT,
20993   V2SI_FTYPE_V2SI_V2SI,
20994   V2SI_FTYPE_V2SI_V2SI_COUNT,
20995   V2SI_FTYPE_V4HI_V4HI,
20996   V2SI_FTYPE_V2SF_V2SF,
20997   V2SI_FTYPE_V2SI_SI_COUNT,
20998   V2DF_FTYPE_V2DF_V2DF,
20999   V2DF_FTYPE_V2DF_V2DF_SWAP,
21000   V2DF_FTYPE_V2DF_V4SF,
21001   V2DF_FTYPE_V2DF_V2DI,
21002   V2DF_FTYPE_V2DF_DI,
21003   V2DF_FTYPE_V2DF_SI,
21004   V2SF_FTYPE_V2SF_V2SF,
21005   V1DI_FTYPE_V1DI_V1DI,
21006   V1DI_FTYPE_V1DI_V1DI_COUNT,
21007   V1DI_FTYPE_V8QI_V8QI,
21008   V1DI_FTYPE_V2SI_V2SI,
21009   V1DI_FTYPE_V1DI_SI_COUNT,
21010   UINT64_FTYPE_UINT64_UINT64,
21011   UINT_FTYPE_UINT_UINT,
21012   UINT_FTYPE_UINT_USHORT,
21013   UINT_FTYPE_UINT_UCHAR,
21014   V8HI_FTYPE_V8HI_INT,
21015   V4SI_FTYPE_V4SI_INT,
21016   V4HI_FTYPE_V4HI_INT,
21017   V8SF_FTYPE_V8SF_INT,
21018   V4SI_FTYPE_V8SI_INT,
21019   V4SF_FTYPE_V8SF_INT,
21020   V2DF_FTYPE_V4DF_INT,
21021   V4DF_FTYPE_V4DF_INT,
21022   V4SF_FTYPE_V4SF_INT,
21023   V2DI_FTYPE_V2DI_INT,
21024   V2DI2TI_FTYPE_V2DI_INT,
21025   V2DF_FTYPE_V2DF_INT,
21026   V16QI_FTYPE_V16QI_V16QI_V16QI,
21027   V8SF_FTYPE_V8SF_V8SF_V8SF,
21028   V4DF_FTYPE_V4DF_V4DF_V4DF,
21029   V4SF_FTYPE_V4SF_V4SF_V4SF,
21030   V2DF_FTYPE_V2DF_V2DF_V2DF,
21031   V16QI_FTYPE_V16QI_V16QI_INT,
21032   V8SI_FTYPE_V8SI_V8SI_INT,
21033   V8SI_FTYPE_V8SI_V4SI_INT,
21034   V8HI_FTYPE_V8HI_V8HI_INT,
21035   V8SF_FTYPE_V8SF_V8SF_INT,
21036   V8SF_FTYPE_V8SF_V4SF_INT,
21037   V4SI_FTYPE_V4SI_V4SI_INT,
21038   V4DF_FTYPE_V4DF_V4DF_INT,
21039   V4DF_FTYPE_V4DF_V2DF_INT,
21040   V4SF_FTYPE_V4SF_V4SF_INT,
21041   V2DI_FTYPE_V2DI_V2DI_INT,
21042   V2DI2TI_FTYPE_V2DI_V2DI_INT,
21043   V1DI2DI_FTYPE_V1DI_V1DI_INT,
21044   V2DF_FTYPE_V2DF_V2DF_INT,
21045   V2DI_FTYPE_V2DI_UINT_UINT,
21046   V2DI_FTYPE_V2DI_V2DI_UINT_UINT
21047 };
21048 
21049 /* Special builtins with variable number of arguments.  */
21050 static const struct builtin_description bdesc_special_args[] =
21051 {
21052   /* MMX */
21053   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_emms, "__builtin_ia32_emms", IX86_BUILTIN_EMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
21054 
21055   /* 3DNow! */
21056   { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_femms, "__builtin_ia32_femms", IX86_BUILTIN_FEMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
21057 
21058   /* SSE */
21059   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movups, "__builtin_ia32_storeups", IX86_BUILTIN_STOREUPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
21060   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movntv4sf, "__builtin_ia32_movntps", IX86_BUILTIN_MOVNTPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
21061   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movups, "__builtin_ia32_loadups", IX86_BUILTIN_LOADUPS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
21062 
21063   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadhps_exp, "__builtin_ia32_loadhps", IX86_BUILTIN_LOADHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
21064   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadlps_exp, "__builtin_ia32_loadlps", IX86_BUILTIN_LOADLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
21065   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storehps, "__builtin_ia32_storehps", IX86_BUILTIN_STOREHPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
21066   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storelps, "__builtin_ia32_storelps", IX86_BUILTIN_STORELPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
21067 
21068   /* SSE or 3DNow!A  */
21069   { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_sfence, "__builtin_ia32_sfence", IX86_BUILTIN_SFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
21070   { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_movntdi, "__builtin_ia32_movntq", IX86_BUILTIN_MOVNTQ, UNKNOWN, (int) VOID_FTYPE_PDI_DI },
21071 
21072   /* SSE2 */
21073   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lfence, "__builtin_ia32_lfence", IX86_BUILTIN_LFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
21074   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_mfence, 0, IX86_BUILTIN_MFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
21075   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movupd, "__builtin_ia32_storeupd", IX86_BUILTIN_STOREUPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
21076   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movdqu, "__builtin_ia32_storedqu", IX86_BUILTIN_STOREDQU, UNKNOWN, (int) VOID_FTYPE_PCHAR_V16QI },
21077   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2df, "__builtin_ia32_movntpd", IX86_BUILTIN_MOVNTPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
21078   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2di, "__builtin_ia32_movntdq", IX86_BUILTIN_MOVNTDQ, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI },
21079   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntsi, "__builtin_ia32_movnti", IX86_BUILTIN_MOVNTI, UNKNOWN, (int) VOID_FTYPE_PINT_INT },
21080   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movupd, "__builtin_ia32_loadupd", IX86_BUILTIN_LOADUPD, UNKNOWN, (int) V2DF_FTYPE_PCDOUBLE },
21081   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movdqu, "__builtin_ia32_loaddqu", IX86_BUILTIN_LOADDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
21082 
21083   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadhpd_exp, "__builtin_ia32_loadhpd", IX86_BUILTIN_LOADHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
21084   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadlpd_exp, "__builtin_ia32_loadlpd", IX86_BUILTIN_LOADLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
21085 
21086   /* SSE3 */
21087   { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_lddqu, "__builtin_ia32_lddqu", IX86_BUILTIN_LDDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
21088 
21089   /* SSE4.1 */
21090   { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_movntdqa, "__builtin_ia32_movntdqa", IX86_BUILTIN_MOVNTDQA, UNKNOWN, (int) V2DI_FTYPE_PV2DI },
21091 
21092   /* SSE4A */
21093   { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv2df, "__builtin_ia32_movntsd", IX86_BUILTIN_MOVNTSD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
21094   { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv4sf, "__builtin_ia32_movntss", IX86_BUILTIN_MOVNTSS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
21095 
21096   /* AVX */
21097   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroall, "__builtin_ia32_vzeroall", IX86_BUILTIN_VZEROALL, UNKNOWN, (int) VOID_FTYPE_VOID },
21098   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroupper, 0, IX86_BUILTIN_VZEROUPPER, UNKNOWN, (int) VOID_FTYPE_VOID },
21099   { OPTION_MASK_ISA_AVX | OPTION_MASK_ISA_64BIT, CODE_FOR_avx_vzeroupper_rex64, 0, IX86_BUILTIN_VZEROUPPER_REX64, UNKNOWN, (int) VOID_FTYPE_VOID },
21100 
21101   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastss, "__builtin_ia32_vbroadcastss", IX86_BUILTIN_VBROADCASTSS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
21102   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastsd256, "__builtin_ia32_vbroadcastsd256", IX86_BUILTIN_VBROADCASTSD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
21103   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastss256, "__builtin_ia32_vbroadcastss256", IX86_BUILTIN_VBROADCASTSS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
21104   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_pd256, "__builtin_ia32_vbroadcastf128_pd256", IX86_BUILTIN_VBROADCASTPD256, UNKNOWN, (int) V4DF_FTYPE_PCV2DF },
21105   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_ps256, "__builtin_ia32_vbroadcastf128_ps256", IX86_BUILTIN_VBROADCASTPS256, UNKNOWN, (int) V8SF_FTYPE_PCV4SF },
21106 
21107   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movupd256, "__builtin_ia32_loadupd256", IX86_BUILTIN_LOADUPD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
21108   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movups256, "__builtin_ia32_loadups256", IX86_BUILTIN_LOADUPS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
21109   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movupd256, "__builtin_ia32_storeupd256", IX86_BUILTIN_STOREUPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
21110   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movups256, "__builtin_ia32_storeups256", IX86_BUILTIN_STOREUPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
21111   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movdqu256, "__builtin_ia32_loaddqu256", IX86_BUILTIN_LOADDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
21112   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movdqu256, "__builtin_ia32_storedqu256", IX86_BUILTIN_STOREDQU256, UNKNOWN, (int) VOID_FTYPE_PCHAR_V32QI },
21113   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_lddqu256, "__builtin_ia32_lddqu256", IX86_BUILTIN_LDDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
21114 
21115   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4di, "__builtin_ia32_movntdq256", IX86_BUILTIN_MOVNTDQ256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI },
21116   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4df, "__builtin_ia32_movntpd256", IX86_BUILTIN_MOVNTPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
21117   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv8sf, "__builtin_ia32_movntps256", IX86_BUILTIN_MOVNTPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
21118 
21119   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd, "__builtin_ia32_maskloadpd", IX86_BUILTIN_MASKLOADPD, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DF },
21120   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps, "__builtin_ia32_maskloadps", IX86_BUILTIN_MASKLOADPS, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SF },
21121   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd256, "__builtin_ia32_maskloadpd256", IX86_BUILTIN_MASKLOADPD256, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DF },
21122   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps256, "__builtin_ia32_maskloadps256", IX86_BUILTIN_MASKLOADPS256, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SF },
21123   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd, "__builtin_ia32_maskstorepd", IX86_BUILTIN_MASKSTOREPD, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DF_V2DF },
21124   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps, "__builtin_ia32_maskstoreps", IX86_BUILTIN_MASKSTOREPS, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SF_V4SF },
21125   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd256, "__builtin_ia32_maskstorepd256", IX86_BUILTIN_MASKSTOREPD256, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DF_V4DF },
21126   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps256, "__builtin_ia32_maskstoreps256", IX86_BUILTIN_MASKSTOREPS256, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SF_V8SF },
21127 };
21128 
21129 /* Builtins with variable number of arguments.  */
21130 static const struct builtin_description bdesc_args[] =
21131 {
21132   /* MMX */
21133   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21134   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21135   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21136   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21137   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21138   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21139 
21140   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21141   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21142   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21143   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21144   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21145   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21146   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21147   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21148 
21149   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21150   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21151 
21152   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21153   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andnotv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21154   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21155   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21156 
21157   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21158   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21159   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21160   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21161   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21162   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21163 
21164   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21165   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21166   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21167   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21168   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI},
21169   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI},
21170 
21171   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packsswb, "__builtin_ia32_packsswb", IX86_BUILTIN_PACKSSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
21172   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packssdw, "__builtin_ia32_packssdw", IX86_BUILTIN_PACKSSDW, UNKNOWN, (int) V4HI_FTYPE_V2SI_V2SI },
21173   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packuswb, "__builtin_ia32_packuswb", IX86_BUILTIN_PACKUSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
21174 
21175   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_pmaddwd, "__builtin_ia32_pmaddwd", IX86_BUILTIN_PMADDWD, UNKNOWN, (int) V2SI_FTYPE_V4HI_V4HI },
21176 
21177   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllwi", IX86_BUILTIN_PSLLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
21178   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslldi", IX86_BUILTIN_PSLLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
21179   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllqi", IX86_BUILTIN_PSLLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
21180   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllw", IX86_BUILTIN_PSLLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
21181   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslld", IX86_BUILTIN_PSLLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
21182   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllq", IX86_BUILTIN_PSLLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
21183 
21184   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlwi", IX86_BUILTIN_PSRLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
21185   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrldi", IX86_BUILTIN_PSRLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
21186   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlqi", IX86_BUILTIN_PSRLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
21187   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlw", IX86_BUILTIN_PSRLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
21188   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrld", IX86_BUILTIN_PSRLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
21189   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlq", IX86_BUILTIN_PSRLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
21190 
21191   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psrawi", IX86_BUILTIN_PSRAWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
21192   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psradi", IX86_BUILTIN_PSRADI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
21193   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psraw", IX86_BUILTIN_PSRAW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
21194   { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psrad", IX86_BUILTIN_PSRAD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
21195 
21196   /* 3DNow! */
21197   { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pf2id, "__builtin_ia32_pf2id", IX86_BUILTIN_PF2ID, UNKNOWN, (int) V2SI_FTYPE_V2SF },
21198   { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_floatv2si2, "__builtin_ia32_pi2fd", IX86_BUILTIN_PI2FD, UNKNOWN, (int) V2SF_FTYPE_V2SI },
21199   { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpv2sf2, "__builtin_ia32_pfrcp", IX86_BUILTIN_PFRCP, UNKNOWN, (int) V2SF_FTYPE_V2SF },
21200   { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqrtv2sf2, "__builtin_ia32_pfrsqrt", IX86_BUILTIN_PFRSQRT, UNKNOWN, (int) V2SF_FTYPE_V2SF },
21201 
21202   { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgusb", IX86_BUILTIN_PAVGUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21203   { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_haddv2sf3, "__builtin_ia32_pfacc", IX86_BUILTIN_PFACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21204   { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_addv2sf3, "__builtin_ia32_pfadd", IX86_BUILTIN_PFADD, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21205   { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_eqv2sf3, "__builtin_ia32_pfcmpeq", IX86_BUILTIN_PFCMPEQ, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
21206   { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gev2sf3, "__builtin_ia32_pfcmpge", IX86_BUILTIN_PFCMPGE, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
21207   { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gtv2sf3, "__builtin_ia32_pfcmpgt", IX86_BUILTIN_PFCMPGT, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
21208   { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_smaxv2sf3, "__builtin_ia32_pfmax", IX86_BUILTIN_PFMAX, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21209   { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_sminv2sf3, "__builtin_ia32_pfmin", IX86_BUILTIN_PFMIN, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21210   { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_mulv2sf3, "__builtin_ia32_pfmul", IX86_BUILTIN_PFMUL, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21211   { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit1v2sf3, "__builtin_ia32_pfrcpit1", IX86_BUILTIN_PFRCPIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21212   { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit2v2sf3, "__builtin_ia32_pfrcpit2", IX86_BUILTIN_PFRCPIT2, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21213   { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqit1v2sf3, "__builtin_ia32_pfrsqit1", IX86_BUILTIN_PFRSQIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21214   { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subv2sf3, "__builtin_ia32_pfsub", IX86_BUILTIN_PFSUB, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21215   { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subrv2sf3, "__builtin_ia32_pfsubr", IX86_BUILTIN_PFSUBR, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21216   { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pmulhrwv4hi3, "__builtin_ia32_pmulhrw", IX86_BUILTIN_PMULHRW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21217 
21218   /* 3DNow!A */
21219   { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pf2iw, "__builtin_ia32_pf2iw", IX86_BUILTIN_PF2IW, UNKNOWN, (int) V2SI_FTYPE_V2SF },
21220   { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pi2fw, "__builtin_ia32_pi2fw", IX86_BUILTIN_PI2FW, UNKNOWN, (int) V2SF_FTYPE_V2SI },
21221   { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2si2, "__builtin_ia32_pswapdsi", IX86_BUILTIN_PSWAPDSI, UNKNOWN, (int) V2SI_FTYPE_V2SI },
21222   { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2sf2, "__builtin_ia32_pswapdsf", IX86_BUILTIN_PSWAPDSF, UNKNOWN, (int) V2SF_FTYPE_V2SF },
21223   { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_hsubv2sf3, "__builtin_ia32_pfnacc", IX86_BUILTIN_PFNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21224   { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_addsubv2sf3, "__builtin_ia32_pfpnacc", IX86_BUILTIN_PFPNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21225 
21226   /* SSE */
21227   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movmskps, "__builtin_ia32_movmskps", IX86_BUILTIN_MOVMSKPS, UNKNOWN, (int) INT_FTYPE_V4SF },
21228   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_sqrtv4sf2, "__builtin_ia32_sqrtps", IX86_BUILTIN_SQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
21229   { OPTION_MASK_ISA_SSE, CODE_FOR_sqrtv4sf2, "__builtin_ia32_sqrtps_nr", IX86_BUILTIN_SQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
21230   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rsqrtv4sf2, "__builtin_ia32_rsqrtps", IX86_BUILTIN_RSQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
21231   { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtv4sf2, "__builtin_ia32_rsqrtps_nr", IX86_BUILTIN_RSQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
21232   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rcpv4sf2, "__builtin_ia32_rcpps", IX86_BUILTIN_RCPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
21233   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtps2pi, "__builtin_ia32_cvtps2pi", IX86_BUILTIN_CVTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
21234   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtss2si, "__builtin_ia32_cvtss2si", IX86_BUILTIN_CVTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
21235   { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtss2siq, "__builtin_ia32_cvtss2si64", IX86_BUILTIN_CVTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
21236   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttps2pi, "__builtin_ia32_cvttps2pi", IX86_BUILTIN_CVTTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
21237   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttss2si, "__builtin_ia32_cvttss2si", IX86_BUILTIN_CVTTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
21238   { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvttss2siq, "__builtin_ia32_cvttss2si64", IX86_BUILTIN_CVTTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
21239 
21240   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_shufps, "__builtin_ia32_shufps", IX86_BUILTIN_SHUFPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
21241 
21242   { OPTION_MASK_ISA_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21243   { OPTION_MASK_ISA_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21244   { OPTION_MASK_ISA_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21245   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21246   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmaddv4sf3,  "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21247   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsubv4sf3,  "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21248   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmulv4sf3,  "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21249   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmdivv4sf3,  "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21250 
21251   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
21252   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
21253   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
21254   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
21255   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
21256   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
21257   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
21258   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
21259   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
21260   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
21261   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP},
21262   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
21263   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
21264   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
21265   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
21266   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
21267   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
21268   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
21269   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
21270   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
21271   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
21272   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
21273 
21274   { OPTION_MASK_ISA_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21275   { OPTION_MASK_ISA_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21276   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21277   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21278 
21279   { OPTION_MASK_ISA_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21280   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_andnotv4sf3,  "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21281   { OPTION_MASK_ISA_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21282   { OPTION_MASK_ISA_SSE, CODE_FOR_xorv4sf3,  "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21283 
21284   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movss,  "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21285   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movhlps_exp,  "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21286   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movlhps_exp,  "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21287   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21288   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21289 
21290   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtpi2ps, "__builtin_ia32_cvtpi2ps", IX86_BUILTIN_CVTPI2PS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2SI },
21291   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtsi2ss, "__builtin_ia32_cvtsi2ss", IX86_BUILTIN_CVTSI2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_SI },
21292   { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtsi2ssq, "__builtin_ia32_cvtsi642ss", IX86_BUILTIN_CVTSI642SS, UNKNOWN, V4SF_FTYPE_V4SF_DI },
21293 
21294   { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtsf2, "__builtin_ia32_rsqrtf", IX86_BUILTIN_RSQRTF, UNKNOWN, (int) FLOAT_FTYPE_FLOAT },
21295 
21296   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsqrtv4sf2, "__builtin_ia32_sqrtss", IX86_BUILTIN_SQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
21297   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrsqrtv4sf2, "__builtin_ia32_rsqrtss", IX86_BUILTIN_RSQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
21298   { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrcpv4sf2, "__builtin_ia32_rcpss", IX86_BUILTIN_RCPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
21299 
21300   /* SSE MMX or 3Dnow!A */
21301   { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21302   { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21303   { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21304 
21305   { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21306   { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21307   { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21308   { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21309 
21310   { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_psadbw, "__builtin_ia32_psadbw", IX86_BUILTIN_PSADBW, UNKNOWN, (int) V1DI_FTYPE_V8QI_V8QI },
21311   { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pmovmskb, "__builtin_ia32_pmovmskb", IX86_BUILTIN_PMOVMSKB, UNKNOWN, (int) INT_FTYPE_V8QI },
21312 
21313   { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pshufw, "__builtin_ia32_pshufw", IX86_BUILTIN_PSHUFW, UNKNOWN, (int) V4HI_FTYPE_V4HI_INT },
21314 
21315   /* SSE2 */
21316   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_shufpd, "__builtin_ia32_shufpd", IX86_BUILTIN_SHUFPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
21317 
21318   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movmskpd, "__builtin_ia32_movmskpd", IX86_BUILTIN_MOVMSKPD, UNKNOWN, (int) INT_FTYPE_V2DF  },
21319   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmovmskb, "__builtin_ia32_pmovmskb128", IX86_BUILTIN_PMOVMSKB128, UNKNOWN, (int) INT_FTYPE_V16QI },
21320   { OPTION_MASK_ISA_SSE2, CODE_FOR_sqrtv2df2, "__builtin_ia32_sqrtpd", IX86_BUILTIN_SQRTPD, UNKNOWN, (int) V2DF_FTYPE_V2DF },
21321   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2pd, "__builtin_ia32_cvtdq2pd", IX86_BUILTIN_CVTDQ2PD, UNKNOWN, (int) V2DF_FTYPE_V4SI },
21322   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2ps, "__builtin_ia32_cvtdq2ps", IX86_BUILTIN_CVTDQ2PS, UNKNOWN, (int) V4SF_FTYPE_V4SI },
21323 
21324   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2dq, "__builtin_ia32_cvtpd2dq", IX86_BUILTIN_CVTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
21325   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2pi, "__builtin_ia32_cvtpd2pi", IX86_BUILTIN_CVTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
21326   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2ps, "__builtin_ia32_cvtpd2ps", IX86_BUILTIN_CVTPD2PS, UNKNOWN, (int) V4SF_FTYPE_V2DF },
21327   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2dq, "__builtin_ia32_cvttpd2dq", IX86_BUILTIN_CVTTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
21328   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2pi, "__builtin_ia32_cvttpd2pi", IX86_BUILTIN_CVTTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
21329 
21330   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpi2pd, "__builtin_ia32_cvtpi2pd", IX86_BUILTIN_CVTPI2PD, UNKNOWN, (int) V2DF_FTYPE_V2SI },
21331 
21332   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2si, "__builtin_ia32_cvtsd2si", IX86_BUILTIN_CVTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
21333   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttsd2si, "__builtin_ia32_cvttsd2si", IX86_BUILTIN_CVTTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
21334   { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsd2siq, "__builtin_ia32_cvtsd2si64", IX86_BUILTIN_CVTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
21335   { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvttsd2siq, "__builtin_ia32_cvttsd2si64", IX86_BUILTIN_CVTTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
21336 
21337   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2dq, "__builtin_ia32_cvtps2dq", IX86_BUILTIN_CVTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
21338   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2pd, "__builtin_ia32_cvtps2pd", IX86_BUILTIN_CVTPS2PD, UNKNOWN, (int) V2DF_FTYPE_V4SF },
21339   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttps2dq, "__builtin_ia32_cvttps2dq", IX86_BUILTIN_CVTTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
21340 
21341   { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21342   { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21343   { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21344   { OPTION_MASK_ISA_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21345   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmaddv2df3,  "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21346   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsubv2df3,  "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21347   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmulv2df3,  "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21348   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmdivv2df3,  "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21349 
21350   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
21351   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
21352   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
21353   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
21354   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP},
21355   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
21356   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
21357   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
21358   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
21359   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
21360   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
21361   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
21362   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
21363   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
21364   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
21365   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
21366   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
21367   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
21368   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
21369   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
21370 
21371   { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21372   { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21373   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21374   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21375 
21376   { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21377   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2df3,  "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21378   { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21379   { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2df3,  "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21380 
21381   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movsd,  "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21382   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_unpckhpd_exp, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21383   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_unpcklpd_exp, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21384 
21385   { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_pack_sfix_v2df, "__builtin_ia32_vec_pack_sfix", IX86_BUILTIN_VEC_PACK_SFIX, UNKNOWN, (int) V4SI_FTYPE_V2DF_V2DF },
21386 
21387   { OPTION_MASK_ISA_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21388   { OPTION_MASK_ISA_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21389   { OPTION_MASK_ISA_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21390   { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21391   { OPTION_MASK_ISA_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21392   { OPTION_MASK_ISA_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21393   { OPTION_MASK_ISA_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21394   { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21395 
21396   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21397   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21398   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21399   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21400   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21401   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21402   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21403   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21404 
21405   { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21406   { OPTION_MASK_ISA_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, UNKNOWN,(int) V8HI_FTYPE_V8HI_V8HI },
21407 
21408   { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21409   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21410   { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21411   { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21412 
21413   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21414   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21415 
21416   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21417   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21418   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI  },
21419   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21420   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21421   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI  },
21422 
21423   { OPTION_MASK_ISA_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21424   { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21425   { OPTION_MASK_ISA_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21426   { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21427 
21428   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21429   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI  },
21430   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, UNKNOWN,  (int) V4SI_FTYPE_V4SI_V4SI },
21431   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhqdq, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21432   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21433   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21434   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21435   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpcklqdq, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21436 
21437   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
21438   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
21439   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
21440 
21441   { OPTION_MASK_ISA_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21442   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_psadbw, "__builtin_ia32_psadbw128", IX86_BUILTIN_PSADBW128, UNKNOWN, (int) V2DI_FTYPE_V16QI_V16QI },
21443 
21444   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv1siv1di3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, UNKNOWN, (int) V1DI_FTYPE_V2SI_V2SI },
21445   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv2siv2di3, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
21446 
21447   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmaddwd, "__builtin_ia32_pmaddwd128", IX86_BUILTIN_PMADDWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI_V8HI },
21448 
21449   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsi2sd, "__builtin_ia32_cvtsi2sd", IX86_BUILTIN_CVTSI2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_SI },
21450   { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsi2sdq, "__builtin_ia32_cvtsi642sd", IX86_BUILTIN_CVTSI642SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_DI },
21451   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2ss, "__builtin_ia32_cvtsd2ss", IX86_BUILTIN_CVTSD2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2DF },
21452   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtss2sd, "__builtin_ia32_cvtss2sd", IX86_BUILTIN_CVTSS2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V4SF },
21453 
21454   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ashlti3, "__builtin_ia32_pslldqi128", IX86_BUILTIN_PSLLDQI128, UNKNOWN, (int) V2DI2TI_FTYPE_V2DI_INT },
21455   { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllwi128", IX86_BUILTIN_PSLLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
21456   { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslldi128", IX86_BUILTIN_PSLLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
21457   { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllqi128", IX86_BUILTIN_PSLLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
21458   { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllw128", IX86_BUILTIN_PSLLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
21459   { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslld128", IX86_BUILTIN_PSLLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
21460   { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllq128", IX86_BUILTIN_PSLLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
21461 
21462   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lshrti3, "__builtin_ia32_psrldqi128", IX86_BUILTIN_PSRLDQI128, UNKNOWN, (int) V2DI2TI_FTYPE_V2DI_INT },
21463   { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlwi128", IX86_BUILTIN_PSRLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
21464   { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrldi128", IX86_BUILTIN_PSRLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
21465   { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlqi128", IX86_BUILTIN_PSRLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
21466   { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlw128", IX86_BUILTIN_PSRLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
21467   { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrld128", IX86_BUILTIN_PSRLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
21468   { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlq128", IX86_BUILTIN_PSRLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
21469 
21470   { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psrawi128", IX86_BUILTIN_PSRAWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
21471   { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psradi128", IX86_BUILTIN_PSRADI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
21472   { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psraw128", IX86_BUILTIN_PSRAW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
21473   { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psrad128", IX86_BUILTIN_PSRAD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
21474 
21475   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufd, "__builtin_ia32_pshufd", IX86_BUILTIN_PSHUFD, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT },
21476   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshuflw, "__builtin_ia32_pshuflw", IX86_BUILTIN_PSHUFLW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
21477   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufhw, "__builtin_ia32_pshufhw", IX86_BUILTIN_PSHUFHW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
21478 
21479   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsqrtv2df2, "__builtin_ia32_sqrtsd", IX86_BUILTIN_SQRTSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_VEC_MERGE },
21480 
21481   { OPTION_MASK_ISA_SSE2, CODE_FOR_abstf2, 0, IX86_BUILTIN_FABSQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128 },
21482   { OPTION_MASK_ISA_SSE2, CODE_FOR_copysigntf3, 0, IX86_BUILTIN_COPYSIGNQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128_FLOAT128 },
21483 
21484   { OPTION_MASK_ISA_SSE, CODE_FOR_sse2_movq128, "__builtin_ia32_movq128", IX86_BUILTIN_MOVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
21485 
21486   /* SSE2 MMX */
21487   { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_addv1di3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
21488   { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_subv1di3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
21489 
21490   /* SSE3 */
21491   { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movshdup, "__builtin_ia32_movshdup", IX86_BUILTIN_MOVSHDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF},
21492   { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movsldup, "__builtin_ia32_movsldup", IX86_BUILTIN_MOVSLDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF },
21493 
21494   { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21495   { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21496   { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21497   { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21498   { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21499   { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21500 
21501   /* SSSE3 */
21502   { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv16qi2, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI },
21503   { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, UNKNOWN, (int) V8QI_FTYPE_V8QI },
21504   { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8hi2, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
21505   { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, UNKNOWN, (int) V4HI_FTYPE_V4HI },
21506   { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4si2, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI },
21507   { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, UNKNOWN, (int) V2SI_FTYPE_V2SI },
21508 
21509   { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv8hi3, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21510   { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv4hi3, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21511   { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv4si3, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21512   { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv2si3, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21513   { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv8hi3, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21514   { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv4hi3, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21515   { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv8hi3, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21516   { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv4hi3, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21517   { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv4si3, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21518   { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv2si3, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21519   { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv8hi3, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21520   { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv4hi3, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21521   { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw128, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128, UNKNOWN, (int) V8HI_FTYPE_V16QI_V16QI },
21522   { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW, UNKNOWN, (int) V4HI_FTYPE_V8QI_V8QI },
21523   { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv8hi3, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21524   { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv4hi3, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21525   { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv16qi3, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21526   { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv8qi3, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21527   { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv16qi3, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21528   { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8qi3, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21529   { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8hi3, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21530   { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4hi3, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21531   { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4si3, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21532   { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv2si3, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21533 
21534   /* SSSE3.  */
21535   { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrti, "__builtin_ia32_palignr128", IX86_BUILTIN_PALIGNR128, UNKNOWN, (int) V2DI2TI_FTYPE_V2DI_V2DI_INT },
21536   { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrdi, "__builtin_ia32_palignr", IX86_BUILTIN_PALIGNR, UNKNOWN, (int) V1DI2DI_FTYPE_V1DI_V1DI_INT },
21537 
21538   /* SSE4.1 */
21539   { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendpd, "__builtin_ia32_blendpd", IX86_BUILTIN_BLENDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
21540   { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendps, "__builtin_ia32_blendps", IX86_BUILTIN_BLENDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
21541   { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvpd, "__builtin_ia32_blendvpd", IX86_BUILTIN_BLENDVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF },
21542   { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvps, "__builtin_ia32_blendvps", IX86_BUILTIN_BLENDVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF },
21543   { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dppd, "__builtin_ia32_dppd", IX86_BUILTIN_DPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
21544   { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dpps, "__builtin_ia32_dpps", IX86_BUILTIN_DPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
21545   { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_insertps, "__builtin_ia32_insertps128", IX86_BUILTIN_INSERTPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
21546   { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mpsadbw, "__builtin_ia32_mpsadbw128", IX86_BUILTIN_MPSADBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_INT },
21547   { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendvb, "__builtin_ia32_pblendvb128", IX86_BUILTIN_PBLENDVB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI },
21548   { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendw, "__builtin_ia32_pblendw128", IX86_BUILTIN_PBLENDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_INT },
21549 
21550   { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv8qiv8hi2, "__builtin_ia32_pmovsxbw128", IX86_BUILTIN_PMOVSXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
21551   { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv4qiv4si2, "__builtin_ia32_pmovsxbd128", IX86_BUILTIN_PMOVSXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
21552   { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv2qiv2di2, "__builtin_ia32_pmovsxbq128", IX86_BUILTIN_PMOVSXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
21553   { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv4hiv4si2, "__builtin_ia32_pmovsxwd128", IX86_BUILTIN_PMOVSXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
21554   { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv2hiv2di2, "__builtin_ia32_pmovsxwq128", IX86_BUILTIN_PMOVSXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
21555   { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv2siv2di2, "__builtin_ia32_pmovsxdq128", IX86_BUILTIN_PMOVSXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
21556   { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv8qiv8hi2, "__builtin_ia32_pmovzxbw128", IX86_BUILTIN_PMOVZXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
21557   { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4qiv4si2, "__builtin_ia32_pmovzxbd128", IX86_BUILTIN_PMOVZXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
21558   { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2qiv2di2, "__builtin_ia32_pmovzxbq128", IX86_BUILTIN_PMOVZXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
21559   { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4hiv4si2, "__builtin_ia32_pmovzxwd128", IX86_BUILTIN_PMOVZXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
21560   { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2hiv2di2, "__builtin_ia32_pmovzxwq128", IX86_BUILTIN_PMOVZXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
21561   { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2siv2di2, "__builtin_ia32_pmovzxdq128", IX86_BUILTIN_PMOVZXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
21562   { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_phminposuw, "__builtin_ia32_phminposuw128", IX86_BUILTIN_PHMINPOSUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
21563 
21564   { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_packusdw, "__builtin_ia32_packusdw128", IX86_BUILTIN_PACKUSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
21565   { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_eqv2di3, "__builtin_ia32_pcmpeqq", IX86_BUILTIN_PCMPEQQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21566   { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv16qi3, "__builtin_ia32_pmaxsb128", IX86_BUILTIN_PMAXSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21567   { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv4si3, "__builtin_ia32_pmaxsd128", IX86_BUILTIN_PMAXSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21568   { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv4si3, "__builtin_ia32_pmaxud128", IX86_BUILTIN_PMAXUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21569   { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv8hi3, "__builtin_ia32_pmaxuw128", IX86_BUILTIN_PMAXUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21570   { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv16qi3, "__builtin_ia32_pminsb128", IX86_BUILTIN_PMINSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21571   { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv4si3, "__builtin_ia32_pminsd128", IX86_BUILTIN_PMINSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21572   { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv4si3, "__builtin_ia32_pminud128", IX86_BUILTIN_PMINUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21573   { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv8hi3, "__builtin_ia32_pminuw128", IX86_BUILTIN_PMINUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21574   { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mulv2siv2di3, "__builtin_ia32_pmuldq128", IX86_BUILTIN_PMULDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
21575   { OPTION_MASK_ISA_SSE4_1, CODE_FOR_mulv4si3, "__builtin_ia32_pmulld128", IX86_BUILTIN_PMULLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21576 
21577   /* SSE4.1 and SSE5 */
21578   { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_roundpd", IX86_BUILTIN_ROUNDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
21579   { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_roundps", IX86_BUILTIN_ROUNDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
21580   { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundsd, "__builtin_ia32_roundsd", IX86_BUILTIN_ROUNDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
21581   { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundss, "__builtin_ia32_roundss", IX86_BUILTIN_ROUNDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
21582 
21583   { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestz128", IX86_BUILTIN_PTESTZ, EQ, (int) INT_FTYPE_V2DI_V2DI_PTEST },
21584   { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestc128", IX86_BUILTIN_PTESTC, LTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
21585   { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestnzc128", IX86_BUILTIN_PTESTNZC, GTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
21586 
21587   /* SSE4.2 */
21588   { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_gtv2di3, "__builtin_ia32_pcmpgtq", IX86_BUILTIN_PCMPGTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21589   { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_crc32qi, "__builtin_ia32_crc32qi", IX86_BUILTIN_CRC32QI, UNKNOWN, (int) UINT_FTYPE_UINT_UCHAR },
21590   { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_crc32hi, "__builtin_ia32_crc32hi", IX86_BUILTIN_CRC32HI, UNKNOWN, (int) UINT_FTYPE_UINT_USHORT },
21591   { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_crc32si, "__builtin_ia32_crc32si", IX86_BUILTIN_CRC32SI, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
21592   { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse4_2_crc32di, "__builtin_ia32_crc32di", IX86_BUILTIN_CRC32DI, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
21593 
21594   /* SSE4A */
21595   { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrqi, "__builtin_ia32_extrqi", IX86_BUILTIN_EXTRQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_UINT_UINT },
21596   { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrq, "__builtin_ia32_extrq", IX86_BUILTIN_EXTRQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V16QI },
21597   { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertqi, "__builtin_ia32_insertqi", IX86_BUILTIN_INSERTQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_UINT_UINT },
21598   { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertq, "__builtin_ia32_insertq", IX86_BUILTIN_INSERTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21599 
21600   /* AES */
21601   { OPTION_MASK_ISA_SSE2, CODE_FOR_aeskeygenassist, 0, IX86_BUILTIN_AESKEYGENASSIST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT },
21602   { OPTION_MASK_ISA_SSE2, CODE_FOR_aesimc, 0, IX86_BUILTIN_AESIMC128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
21603 
21604   { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenc, 0, IX86_BUILTIN_AESENC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21605   { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenclast, 0, IX86_BUILTIN_AESENCLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21606   { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdec, 0, IX86_BUILTIN_AESDEC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21607   { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdeclast, 0, IX86_BUILTIN_AESDECLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21608 
21609   /* PCLMUL */
21610   { OPTION_MASK_ISA_SSE2, CODE_FOR_pclmulqdq, 0, IX86_BUILTIN_PCLMULQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT },
21611 
21612   /* AVX */
21613   { OPTION_MASK_ISA_AVX, CODE_FOR_addv4df3, "__builtin_ia32_addpd256", IX86_BUILTIN_ADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21614   { OPTION_MASK_ISA_AVX, CODE_FOR_addv8sf3, "__builtin_ia32_addps256", IX86_BUILTIN_ADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21615   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv4df3, "__builtin_ia32_addsubpd256", IX86_BUILTIN_ADDSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21616   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv8sf3, "__builtin_ia32_addsubps256", IX86_BUILTIN_ADDSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21617   { OPTION_MASK_ISA_AVX, CODE_FOR_andv4df3, "__builtin_ia32_andpd256", IX86_BUILTIN_ANDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21618   { OPTION_MASK_ISA_AVX, CODE_FOR_andv8sf3, "__builtin_ia32_andps256", IX86_BUILTIN_ANDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21619   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv4df3, "__builtin_ia32_andnpd256", IX86_BUILTIN_ANDNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21620   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv8sf3, "__builtin_ia32_andnps256", IX86_BUILTIN_ANDNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21621   { OPTION_MASK_ISA_AVX, CODE_FOR_divv4df3, "__builtin_ia32_divpd256", IX86_BUILTIN_DIVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21622   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_divv8sf3, "__builtin_ia32_divps256", IX86_BUILTIN_DIVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21623   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv4df3, "__builtin_ia32_haddpd256", IX86_BUILTIN_HADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21624   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv8sf3, "__builtin_ia32_hsubps256", IX86_BUILTIN_HSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21625   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv4df3, "__builtin_ia32_hsubpd256", IX86_BUILTIN_HSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21626   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv8sf3, "__builtin_ia32_haddps256", IX86_BUILTIN_HADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21627   { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv4df3, "__builtin_ia32_maxpd256", IX86_BUILTIN_MAXPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21628   { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv8sf3, "__builtin_ia32_maxps256", IX86_BUILTIN_MAXPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21629   { OPTION_MASK_ISA_AVX, CODE_FOR_sminv4df3, "__builtin_ia32_minpd256", IX86_BUILTIN_MINPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21630   { OPTION_MASK_ISA_AVX, CODE_FOR_sminv8sf3, "__builtin_ia32_minps256", IX86_BUILTIN_MINPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21631   { OPTION_MASK_ISA_AVX, CODE_FOR_mulv4df3, "__builtin_ia32_mulpd256", IX86_BUILTIN_MULPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21632   { OPTION_MASK_ISA_AVX, CODE_FOR_mulv8sf3, "__builtin_ia32_mulps256", IX86_BUILTIN_MULPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21633   { OPTION_MASK_ISA_AVX, CODE_FOR_iorv4df3, "__builtin_ia32_orpd256", IX86_BUILTIN_ORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21634   { OPTION_MASK_ISA_AVX, CODE_FOR_iorv8sf3, "__builtin_ia32_orps256", IX86_BUILTIN_ORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21635   { OPTION_MASK_ISA_AVX, CODE_FOR_subv4df3, "__builtin_ia32_subpd256", IX86_BUILTIN_SUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21636   { OPTION_MASK_ISA_AVX, CODE_FOR_subv8sf3, "__builtin_ia32_subps256", IX86_BUILTIN_SUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21637   { OPTION_MASK_ISA_AVX, CODE_FOR_xorv4df3, "__builtin_ia32_xorpd256", IX86_BUILTIN_XORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21638   { OPTION_MASK_ISA_AVX, CODE_FOR_xorv8sf3, "__builtin_ia32_xorps256", IX86_BUILTIN_XORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21639 
21640   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv2df3, "__builtin_ia32_vpermilvarpd", IX86_BUILTIN_VPERMILVARPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DI },
21641   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4sf3, "__builtin_ia32_vpermilvarps", IX86_BUILTIN_VPERMILVARPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SI },
21642   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4df3, "__builtin_ia32_vpermilvarpd256", IX86_BUILTIN_VPERMILVARPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI },
21643   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv8sf3, "__builtin_ia32_vpermilvarps256", IX86_BUILTIN_VPERMILVARPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI },
21644 
21645   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendpd256, "__builtin_ia32_blendpd256", IX86_BUILTIN_BLENDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
21646   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendps256, "__builtin_ia32_blendps256", IX86_BUILTIN_BLENDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
21647   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvpd256, "__builtin_ia32_blendvpd256", IX86_BUILTIN_BLENDVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF },
21648   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvps256, "__builtin_ia32_blendvps256", IX86_BUILTIN_BLENDVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF },
21649   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_dpps256, "__builtin_ia32_dpps256", IX86_BUILTIN_DPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
21650   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufpd256, "__builtin_ia32_shufpd256", IX86_BUILTIN_SHUFPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
21651   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufps256, "__builtin_ia32_shufps256", IX86_BUILTIN_SHUFPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
21652   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpsdv2df3, "__builtin_ia32_cmpsd", IX86_BUILTIN_CMPSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
21653   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpssv4sf3, "__builtin_ia32_cmpss", IX86_BUILTIN_CMPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
21654   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmppdv2df3, "__builtin_ia32_cmppd", IX86_BUILTIN_CMPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
21655   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmppsv4sf3, "__builtin_ia32_cmpps", IX86_BUILTIN_CMPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
21656   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmppdv4df3, "__builtin_ia32_cmppd256", IX86_BUILTIN_CMPPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
21657   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmppsv8sf3, "__builtin_ia32_cmpps256", IX86_BUILTIN_CMPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
21658   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v4df, "__builtin_ia32_vextractf128_pd256", IX86_BUILTIN_EXTRACTF128PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF_INT },
21659   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8sf, "__builtin_ia32_vextractf128_ps256", IX86_BUILTIN_EXTRACTF128PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF_INT },
21660   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8si, "__builtin_ia32_vextractf128_si256", IX86_BUILTIN_EXTRACTF128SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI_INT },
21661   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtdq2pd256, "__builtin_ia32_cvtdq2pd256", IX86_BUILTIN_CVTDQ2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SI },
21662   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtdq2ps256, "__builtin_ia32_cvtdq2ps256", IX86_BUILTIN_CVTDQ2PS256, UNKNOWN, (int) V8SF_FTYPE_V8SI },
21663   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2ps256, "__builtin_ia32_cvtpd2ps256", IX86_BUILTIN_CVTPD2PS256, UNKNOWN, (int) V4SF_FTYPE_V4DF },
21664   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtps2dq256, "__builtin_ia32_cvtps2dq256", IX86_BUILTIN_CVTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
21665   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtps2pd256, "__builtin_ia32_cvtps2pd256", IX86_BUILTIN_CVTPS2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SF },
21666   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvttpd2dq256, "__builtin_ia32_cvttpd2dq256", IX86_BUILTIN_CVTTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
21667   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2dq256, "__builtin_ia32_cvtpd2dq256", IX86_BUILTIN_CVTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
21668   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvttps2dq256, "__builtin_ia32_cvttps2dq256", IX86_BUILTIN_CVTTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
21669   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v4df3, "__builtin_ia32_vperm2f128_pd256", IX86_BUILTIN_VPERM2F128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
21670   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8sf3, "__builtin_ia32_vperm2f128_ps256", IX86_BUILTIN_VPERM2F128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
21671   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8si3, "__builtin_ia32_vperm2f128_si256", IX86_BUILTIN_VPERM2F128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT },
21672   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv2df, "__builtin_ia32_vpermilpd", IX86_BUILTIN_VPERMILPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
21673   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4sf, "__builtin_ia32_vpermilps", IX86_BUILTIN_VPERMILPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
21674   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4df, "__builtin_ia32_vpermilpd256", IX86_BUILTIN_VPERMILPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
21675   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv8sf, "__builtin_ia32_vpermilps256", IX86_BUILTIN_VPERMILPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
21676   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v4df, "__builtin_ia32_vinsertf128_pd256", IX86_BUILTIN_VINSERTF128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V2DF_INT },
21677   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8sf, "__builtin_ia32_vinsertf128_ps256", IX86_BUILTIN_VINSERTF128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V4SF_INT },
21678   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8si, "__builtin_ia32_vinsertf128_si256", IX86_BUILTIN_VINSERTF128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_INT },
21679 
21680   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movshdup256, "__builtin_ia32_movshdup256", IX86_BUILTIN_MOVSHDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
21681   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movsldup256, "__builtin_ia32_movsldup256", IX86_BUILTIN_MOVSLDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
21682   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movddup256, "__builtin_ia32_movddup256", IX86_BUILTIN_MOVDDUP256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
21683 
21684   { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv4df2, "__builtin_ia32_sqrtpd256", IX86_BUILTIN_SQRTPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
21685   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_sqrtv8sf2, "__builtin_ia32_sqrtps256", IX86_BUILTIN_SQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
21686   { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv8sf2, "__builtin_ia32_sqrtps_nr256", IX86_BUILTIN_SQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
21687   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rsqrtv8sf2, "__builtin_ia32_rsqrtps256", IX86_BUILTIN_RSQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
21688   { OPTION_MASK_ISA_AVX, CODE_FOR_rsqrtv8sf2, "__builtin_ia32_rsqrtps_nr256", IX86_BUILTIN_RSQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
21689 
21690   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rcpv8sf2, "__builtin_ia32_rcpps256", IX86_BUILTIN_RCPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
21691 
21692   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_roundpd256", IX86_BUILTIN_ROUNDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
21693   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_roundps256", IX86_BUILTIN_ROUNDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
21694 
21695   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhpd256,  "__builtin_ia32_unpckhpd256", IX86_BUILTIN_UNPCKHPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21696   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklpd256,  "__builtin_ia32_unpcklpd256", IX86_BUILTIN_UNPCKLPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21697   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhps256,  "__builtin_ia32_unpckhps256", IX86_BUILTIN_UNPCKHPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21698   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklps256,  "__builtin_ia32_unpcklps256", IX86_BUILTIN_UNPCKLPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21699 
21700   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_si256_si, "__builtin_ia32_si256_si", IX86_BUILTIN_SI256_SI, UNKNOWN, (int) V8SI_FTYPE_V4SI },
21701   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ps256_ps, "__builtin_ia32_ps256_ps", IX86_BUILTIN_PS256_PS, UNKNOWN, (int) V8SF_FTYPE_V4SF },
21702   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_pd256_pd, "__builtin_ia32_pd256_pd", IX86_BUILTIN_PD256_PD, UNKNOWN, (int) V4DF_FTYPE_V2DF },
21703   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_si_si256, "__builtin_ia32_si_si256", IX86_BUILTIN_SI_SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI },
21704   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ps_ps256, "__builtin_ia32_ps_ps256", IX86_BUILTIN_PS_PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF },
21705   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_pd_pd256, "__builtin_ia32_pd_pd256", IX86_BUILTIN_PD_PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF },
21706 
21707   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestzpd", IX86_BUILTIN_VTESTZPD, EQ, (int) INT_FTYPE_V2DF_V2DF_PTEST },
21708   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestcpd", IX86_BUILTIN_VTESTCPD, LTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
21709   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestnzcpd", IX86_BUILTIN_VTESTNZCPD, GTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
21710   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestzps", IX86_BUILTIN_VTESTZPS, EQ, (int) INT_FTYPE_V4SF_V4SF_PTEST },
21711   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestcps", IX86_BUILTIN_VTESTCPS, LTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
21712   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestnzcps", IX86_BUILTIN_VTESTNZCPS, GTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
21713   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestzpd256", IX86_BUILTIN_VTESTZPD256, EQ, (int) INT_FTYPE_V4DF_V4DF_PTEST },
21714   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestcpd256", IX86_BUILTIN_VTESTCPD256, LTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
21715   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestnzcpd256", IX86_BUILTIN_VTESTNZCPD256, GTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
21716   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestzps256", IX86_BUILTIN_VTESTZPS256, EQ, (int) INT_FTYPE_V8SF_V8SF_PTEST },
21717   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestcps256", IX86_BUILTIN_VTESTCPS256, LTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
21718   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestnzcps256", IX86_BUILTIN_VTESTNZCPS256, GTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
21719   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestz256", IX86_BUILTIN_PTESTZ256, EQ, (int) INT_FTYPE_V4DI_V4DI_PTEST },
21720   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestc256", IX86_BUILTIN_PTESTC256, LTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
21721   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestnzc256", IX86_BUILTIN_PTESTNZC256, GTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
21722 
21723   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskpd256, "__builtin_ia32_movmskpd256", IX86_BUILTIN_MOVMSKPD256, UNKNOWN, (int) INT_FTYPE_V4DF  },
21724   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskps256, "__builtin_ia32_movmskps256", IX86_BUILTIN_MOVMSKPS256, UNKNOWN, (int) INT_FTYPE_V8SF },
21725 };
21726 
21727 /* SSE5 */
21728 enum multi_arg_type {
21729   MULTI_ARG_UNKNOWN,
21730   MULTI_ARG_3_SF,
21731   MULTI_ARG_3_DF,
21732   MULTI_ARG_3_DI,
21733   MULTI_ARG_3_SI,
21734   MULTI_ARG_3_SI_DI,
21735   MULTI_ARG_3_HI,
21736   MULTI_ARG_3_HI_SI,
21737   MULTI_ARG_3_QI,
21738   MULTI_ARG_3_PERMPS,
21739   MULTI_ARG_3_PERMPD,
21740   MULTI_ARG_2_SF,
21741   MULTI_ARG_2_DF,
21742   MULTI_ARG_2_DI,
21743   MULTI_ARG_2_SI,
21744   MULTI_ARG_2_HI,
21745   MULTI_ARG_2_QI,
21746   MULTI_ARG_2_DI_IMM,
21747   MULTI_ARG_2_SI_IMM,
21748   MULTI_ARG_2_HI_IMM,
21749   MULTI_ARG_2_QI_IMM,
21750   MULTI_ARG_2_SF_CMP,
21751   MULTI_ARG_2_DF_CMP,
21752   MULTI_ARG_2_DI_CMP,
21753   MULTI_ARG_2_SI_CMP,
21754   MULTI_ARG_2_HI_CMP,
21755   MULTI_ARG_2_QI_CMP,
21756   MULTI_ARG_2_DI_TF,
21757   MULTI_ARG_2_SI_TF,
21758   MULTI_ARG_2_HI_TF,
21759   MULTI_ARG_2_QI_TF,
21760   MULTI_ARG_2_SF_TF,
21761   MULTI_ARG_2_DF_TF,
21762   MULTI_ARG_1_SF,
21763   MULTI_ARG_1_DF,
21764   MULTI_ARG_1_DI,
21765   MULTI_ARG_1_SI,
21766   MULTI_ARG_1_HI,
21767   MULTI_ARG_1_QI,
21768   MULTI_ARG_1_SI_DI,
21769   MULTI_ARG_1_HI_DI,
21770   MULTI_ARG_1_HI_SI,
21771   MULTI_ARG_1_QI_DI,
21772   MULTI_ARG_1_QI_SI,
21773   MULTI_ARG_1_QI_HI,
21774   MULTI_ARG_1_PH2PS,
21775   MULTI_ARG_1_PS2PH
21776 };
21777 
21778 static const struct builtin_description bdesc_multi_arg[] =
21779 {
21780   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfmaddv4sf4,     "__builtin_ia32_fmaddss",    IX86_BUILTIN_FMADDSS,    0,            (int)MULTI_ARG_3_SF },
21781   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfmaddv2df4,     "__builtin_ia32_fmaddsd",    IX86_BUILTIN_FMADDSD,    0,            (int)MULTI_ARG_3_DF },
21782   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fmaddv4sf4,       "__builtin_ia32_fmaddps",    IX86_BUILTIN_FMADDPS,    0,            (int)MULTI_ARG_3_SF },
21783   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fmaddv2df4,       "__builtin_ia32_fmaddpd",    IX86_BUILTIN_FMADDPD,    0,            (int)MULTI_ARG_3_DF },
21784   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfmsubv4sf4,     "__builtin_ia32_fmsubss",    IX86_BUILTIN_FMSUBSS,    0,            (int)MULTI_ARG_3_SF },
21785   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfmsubv2df4,     "__builtin_ia32_fmsubsd",    IX86_BUILTIN_FMSUBSD,    0,            (int)MULTI_ARG_3_DF },
21786   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fmsubv4sf4,       "__builtin_ia32_fmsubps",    IX86_BUILTIN_FMSUBPS,    0,            (int)MULTI_ARG_3_SF },
21787   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fmsubv2df4,       "__builtin_ia32_fmsubpd",    IX86_BUILTIN_FMSUBPD,    0,            (int)MULTI_ARG_3_DF },
21788   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfnmaddv4sf4,    "__builtin_ia32_fnmaddss",   IX86_BUILTIN_FNMADDSS,   0,            (int)MULTI_ARG_3_SF },
21789   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfnmaddv2df4,    "__builtin_ia32_fnmaddsd",   IX86_BUILTIN_FNMADDSD,   0,            (int)MULTI_ARG_3_DF },
21790   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fnmaddv4sf4,      "__builtin_ia32_fnmaddps",   IX86_BUILTIN_FNMADDPS,   0,            (int)MULTI_ARG_3_SF },
21791   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fnmaddv2df4,      "__builtin_ia32_fnmaddpd",   IX86_BUILTIN_FNMADDPD,   0,            (int)MULTI_ARG_3_DF },
21792   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfnmsubv4sf4,    "__builtin_ia32_fnmsubss",   IX86_BUILTIN_FNMSUBSS,   0,            (int)MULTI_ARG_3_SF },
21793   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfnmsubv2df4,    "__builtin_ia32_fnmsubsd",   IX86_BUILTIN_FNMSUBSD,   0,            (int)MULTI_ARG_3_DF },
21794   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fnmsubv4sf4,      "__builtin_ia32_fnmsubps",   IX86_BUILTIN_FNMSUBPS,   0,            (int)MULTI_ARG_3_SF },
21795   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fnmsubv2df4,      "__builtin_ia32_fnmsubpd",   IX86_BUILTIN_FNMSUBPD,   0,            (int)MULTI_ARG_3_DF },
21796   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v2di,        "__builtin_ia32_pcmov",      IX86_BUILTIN_PCMOV,      0,            (int)MULTI_ARG_3_DI },
21797   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v2di,        "__builtin_ia32_pcmov_v2di", IX86_BUILTIN_PCMOV_V2DI, 0,            (int)MULTI_ARG_3_DI },
21798   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v4si,        "__builtin_ia32_pcmov_v4si", IX86_BUILTIN_PCMOV_V4SI, 0,            (int)MULTI_ARG_3_SI },
21799   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v8hi,        "__builtin_ia32_pcmov_v8hi", IX86_BUILTIN_PCMOV_V8HI, 0,            (int)MULTI_ARG_3_HI },
21800   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v16qi,       "__builtin_ia32_pcmov_v16qi",IX86_BUILTIN_PCMOV_V16QI,0,            (int)MULTI_ARG_3_QI },
21801   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v2df,        "__builtin_ia32_pcmov_v2df", IX86_BUILTIN_PCMOV_V2DF, 0,            (int)MULTI_ARG_3_DF },
21802   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v4sf,        "__builtin_ia32_pcmov_v4sf", IX86_BUILTIN_PCMOV_V4SF, 0,            (int)MULTI_ARG_3_SF },
21803   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pperm,             "__builtin_ia32_pperm",      IX86_BUILTIN_PPERM,      0,            (int)MULTI_ARG_3_QI },
21804   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_permv4sf,          "__builtin_ia32_permps",     IX86_BUILTIN_PERMPS,     0,            (int)MULTI_ARG_3_PERMPS },
21805   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_permv2df,          "__builtin_ia32_permpd",     IX86_BUILTIN_PERMPD,     0,            (int)MULTI_ARG_3_PERMPD },
21806   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacssww,          "__builtin_ia32_pmacssww",   IX86_BUILTIN_PMACSSWW,   0,            (int)MULTI_ARG_3_HI },
21807   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsww,           "__builtin_ia32_pmacsww",    IX86_BUILTIN_PMACSWW,    0,            (int)MULTI_ARG_3_HI },
21808   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsswd,          "__builtin_ia32_pmacsswd",   IX86_BUILTIN_PMACSSWD,   0,            (int)MULTI_ARG_3_HI_SI },
21809   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacswd,           "__builtin_ia32_pmacswd",    IX86_BUILTIN_PMACSWD,    0,            (int)MULTI_ARG_3_HI_SI },
21810   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacssdd,          "__builtin_ia32_pmacssdd",   IX86_BUILTIN_PMACSSDD,   0,            (int)MULTI_ARG_3_SI },
21811   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsdd,           "__builtin_ia32_pmacsdd",    IX86_BUILTIN_PMACSDD,    0,            (int)MULTI_ARG_3_SI },
21812   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacssdql,         "__builtin_ia32_pmacssdql",  IX86_BUILTIN_PMACSSDQL,  0,            (int)MULTI_ARG_3_SI_DI },
21813   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacssdqh,         "__builtin_ia32_pmacssdqh",  IX86_BUILTIN_PMACSSDQH,  0,            (int)MULTI_ARG_3_SI_DI },
21814   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsdql,          "__builtin_ia32_pmacsdql",   IX86_BUILTIN_PMACSDQL,   0,            (int)MULTI_ARG_3_SI_DI },
21815   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsdqh,          "__builtin_ia32_pmacsdqh",   IX86_BUILTIN_PMACSDQH,   0,            (int)MULTI_ARG_3_SI_DI },
21816   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmadcsswd,         "__builtin_ia32_pmadcsswd",  IX86_BUILTIN_PMADCSSWD,  0,            (int)MULTI_ARG_3_HI_SI },
21817   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmadcswd,          "__builtin_ia32_pmadcswd",   IX86_BUILTIN_PMADCSWD,   0,            (int)MULTI_ARG_3_HI_SI },
21818   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vrotlv2di3,        "__builtin_ia32_protq",      IX86_BUILTIN_PROTQ,      0,            (int)MULTI_ARG_2_DI },
21819   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vrotlv4si3,        "__builtin_ia32_protd",      IX86_BUILTIN_PROTD,      0,            (int)MULTI_ARG_2_SI },
21820   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vrotlv8hi3,        "__builtin_ia32_protw",      IX86_BUILTIN_PROTW,      0,            (int)MULTI_ARG_2_HI },
21821   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vrotlv16qi3,       "__builtin_ia32_protb",      IX86_BUILTIN_PROTB,      0,            (int)MULTI_ARG_2_QI },
21822   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_rotlv2di3,         "__builtin_ia32_protqi",     IX86_BUILTIN_PROTQ_IMM,  0,            (int)MULTI_ARG_2_DI_IMM },
21823   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_rotlv4si3,         "__builtin_ia32_protdi",     IX86_BUILTIN_PROTD_IMM,  0,            (int)MULTI_ARG_2_SI_IMM },
21824   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_rotlv8hi3,         "__builtin_ia32_protwi",     IX86_BUILTIN_PROTW_IMM,  0,            (int)MULTI_ARG_2_HI_IMM },
21825   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_rotlv16qi3,        "__builtin_ia32_protbi",     IX86_BUILTIN_PROTB_IMM,  0,            (int)MULTI_ARG_2_QI_IMM },
21826   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_ashlv2di3,         "__builtin_ia32_pshaq",      IX86_BUILTIN_PSHAQ,      0,            (int)MULTI_ARG_2_DI },
21827   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_ashlv4si3,         "__builtin_ia32_pshad",      IX86_BUILTIN_PSHAD,      0,            (int)MULTI_ARG_2_SI },
21828   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_ashlv8hi3,         "__builtin_ia32_pshaw",      IX86_BUILTIN_PSHAW,      0,            (int)MULTI_ARG_2_HI },
21829   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_ashlv16qi3,        "__builtin_ia32_pshab",      IX86_BUILTIN_PSHAB,      0,            (int)MULTI_ARG_2_QI },
21830   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_lshlv2di3,         "__builtin_ia32_pshlq",      IX86_BUILTIN_PSHLQ,      0,            (int)MULTI_ARG_2_DI },
21831   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_lshlv4si3,         "__builtin_ia32_pshld",      IX86_BUILTIN_PSHLD,      0,            (int)MULTI_ARG_2_SI },
21832   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_lshlv8hi3,         "__builtin_ia32_pshlw",      IX86_BUILTIN_PSHLW,      0,            (int)MULTI_ARG_2_HI },
21833   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_lshlv16qi3,        "__builtin_ia32_pshlb",      IX86_BUILTIN_PSHLB,      0,            (int)MULTI_ARG_2_QI },
21834   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmfrczv4sf2,       "__builtin_ia32_frczss",     IX86_BUILTIN_FRCZSS,     0,            (int)MULTI_ARG_2_SF },
21835   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmfrczv2df2,       "__builtin_ia32_frczsd",     IX86_BUILTIN_FRCZSD,     0,            (int)MULTI_ARG_2_DF },
21836   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_frczv4sf2,         "__builtin_ia32_frczps",     IX86_BUILTIN_FRCZPS,     0,            (int)MULTI_ARG_1_SF },
21837   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_frczv2df2,         "__builtin_ia32_frczpd",     IX86_BUILTIN_FRCZPD,     0,            (int)MULTI_ARG_1_DF },
21838   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_cvtph2ps,          "__builtin_ia32_cvtph2ps",   IX86_BUILTIN_CVTPH2PS,   0,            (int)MULTI_ARG_1_PH2PS },
21839   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_cvtps2ph,          "__builtin_ia32_cvtps2ph",   IX86_BUILTIN_CVTPS2PH,   0,            (int)MULTI_ARG_1_PS2PH },
21840   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddbw,           "__builtin_ia32_phaddbw",    IX86_BUILTIN_PHADDBW,    0,            (int)MULTI_ARG_1_QI_HI },
21841   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddbd,           "__builtin_ia32_phaddbd",    IX86_BUILTIN_PHADDBD,    0,            (int)MULTI_ARG_1_QI_SI },
21842   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddbq,           "__builtin_ia32_phaddbq",    IX86_BUILTIN_PHADDBQ,    0,            (int)MULTI_ARG_1_QI_DI },
21843   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddwd,           "__builtin_ia32_phaddwd",    IX86_BUILTIN_PHADDWD,    0,            (int)MULTI_ARG_1_HI_SI },
21844   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddwq,           "__builtin_ia32_phaddwq",    IX86_BUILTIN_PHADDWQ,    0,            (int)MULTI_ARG_1_HI_DI },
21845   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phadddq,           "__builtin_ia32_phadddq",    IX86_BUILTIN_PHADDDQ,    0,            (int)MULTI_ARG_1_SI_DI },
21846   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddubw,          "__builtin_ia32_phaddubw",   IX86_BUILTIN_PHADDUBW,   0,            (int)MULTI_ARG_1_QI_HI },
21847   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddubd,          "__builtin_ia32_phaddubd",   IX86_BUILTIN_PHADDUBD,   0,            (int)MULTI_ARG_1_QI_SI },
21848   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddubq,          "__builtin_ia32_phaddubq",   IX86_BUILTIN_PHADDUBQ,   0,            (int)MULTI_ARG_1_QI_DI },
21849   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phadduwd,          "__builtin_ia32_phadduwd",   IX86_BUILTIN_PHADDUWD,   0,            (int)MULTI_ARG_1_HI_SI },
21850   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phadduwq,          "__builtin_ia32_phadduwq",   IX86_BUILTIN_PHADDUWQ,   0,            (int)MULTI_ARG_1_HI_DI },
21851   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddudq,          "__builtin_ia32_phaddudq",   IX86_BUILTIN_PHADDUDQ,   0,            (int)MULTI_ARG_1_SI_DI },
21852   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phsubbw,           "__builtin_ia32_phsubbw",    IX86_BUILTIN_PHSUBBW,    0,            (int)MULTI_ARG_1_QI_HI },
21853   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phsubwd,           "__builtin_ia32_phsubwd",    IX86_BUILTIN_PHSUBWD,    0,            (int)MULTI_ARG_1_HI_SI },
21854   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phsubdq,           "__builtin_ia32_phsubdq",    IX86_BUILTIN_PHSUBDQ,    0,            (int)MULTI_ARG_1_SI_DI },
21855 
21856   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3,    "__builtin_ia32_comeqss",    IX86_BUILTIN_COMEQSS,    EQ,           (int)MULTI_ARG_2_SF_CMP },
21857   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3,    "__builtin_ia32_comness",    IX86_BUILTIN_COMNESS,    NE,           (int)MULTI_ARG_2_SF_CMP },
21858   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3,    "__builtin_ia32_comneqss",   IX86_BUILTIN_COMNESS,    NE,           (int)MULTI_ARG_2_SF_CMP },
21859   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3,    "__builtin_ia32_comltss",    IX86_BUILTIN_COMLTSS,    LT,           (int)MULTI_ARG_2_SF_CMP },
21860   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3,    "__builtin_ia32_comless",    IX86_BUILTIN_COMLESS,    LE,           (int)MULTI_ARG_2_SF_CMP },
21861   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3,    "__builtin_ia32_comgtss",    IX86_BUILTIN_COMGTSS,    GT,           (int)MULTI_ARG_2_SF_CMP },
21862   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3,    "__builtin_ia32_comgess",    IX86_BUILTIN_COMGESS,    GE,           (int)MULTI_ARG_2_SF_CMP },
21863   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3,    "__builtin_ia32_comueqss",   IX86_BUILTIN_COMUEQSS,   UNEQ,         (int)MULTI_ARG_2_SF_CMP },
21864   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3,    "__builtin_ia32_comuness",   IX86_BUILTIN_COMUNESS,   LTGT,         (int)MULTI_ARG_2_SF_CMP },
21865   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3,    "__builtin_ia32_comuneqss",  IX86_BUILTIN_COMUNESS,   LTGT,         (int)MULTI_ARG_2_SF_CMP },
21866   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3,    "__builtin_ia32_comunltss",  IX86_BUILTIN_COMULTSS,   UNLT,         (int)MULTI_ARG_2_SF_CMP },
21867   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3,    "__builtin_ia32_comunless",  IX86_BUILTIN_COMULESS,   UNLE,         (int)MULTI_ARG_2_SF_CMP },
21868   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3,    "__builtin_ia32_comungtss",  IX86_BUILTIN_COMUGTSS,   UNGT,         (int)MULTI_ARG_2_SF_CMP },
21869   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3,    "__builtin_ia32_comungess",  IX86_BUILTIN_COMUGESS,   UNGE,         (int)MULTI_ARG_2_SF_CMP },
21870   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3,    "__builtin_ia32_comordss",   IX86_BUILTIN_COMORDSS,   ORDERED,      (int)MULTI_ARG_2_SF_CMP },
21871   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3,    "__builtin_ia32_comunordss", IX86_BUILTIN_COMUNORDSS, UNORDERED,    (int)MULTI_ARG_2_SF_CMP },
21872 
21873   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3,    "__builtin_ia32_comeqsd",    IX86_BUILTIN_COMEQSD,    EQ,           (int)MULTI_ARG_2_DF_CMP },
21874   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3,    "__builtin_ia32_comnesd",    IX86_BUILTIN_COMNESD,    NE,           (int)MULTI_ARG_2_DF_CMP },
21875   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3,    "__builtin_ia32_comneqsd",   IX86_BUILTIN_COMNESD,    NE,           (int)MULTI_ARG_2_DF_CMP },
21876   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3,    "__builtin_ia32_comltsd",    IX86_BUILTIN_COMLTSD,    LT,           (int)MULTI_ARG_2_DF_CMP },
21877   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3,    "__builtin_ia32_comlesd",    IX86_BUILTIN_COMLESD,    LE,           (int)MULTI_ARG_2_DF_CMP },
21878   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3,    "__builtin_ia32_comgtsd",    IX86_BUILTIN_COMGTSD,    GT,           (int)MULTI_ARG_2_DF_CMP },
21879   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3,    "__builtin_ia32_comgesd",    IX86_BUILTIN_COMGESD,    GE,           (int)MULTI_ARG_2_DF_CMP },
21880   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3,    "__builtin_ia32_comueqsd",   IX86_BUILTIN_COMUEQSD,   UNEQ,         (int)MULTI_ARG_2_DF_CMP },
21881   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3,    "__builtin_ia32_comunesd",   IX86_BUILTIN_COMUNESD,   LTGT,         (int)MULTI_ARG_2_DF_CMP },
21882   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3,    "__builtin_ia32_comuneqsd",  IX86_BUILTIN_COMUNESD,   LTGT,         (int)MULTI_ARG_2_DF_CMP },
21883   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3,    "__builtin_ia32_comunltsd",  IX86_BUILTIN_COMULTSD,   UNLT,         (int)MULTI_ARG_2_DF_CMP },
21884   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3,    "__builtin_ia32_comunlesd",  IX86_BUILTIN_COMULESD,   UNLE,         (int)MULTI_ARG_2_DF_CMP },
21885   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3,    "__builtin_ia32_comungtsd",  IX86_BUILTIN_COMUGTSD,   UNGT,         (int)MULTI_ARG_2_DF_CMP },
21886   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3,    "__builtin_ia32_comungesd",  IX86_BUILTIN_COMUGESD,   UNGE,         (int)MULTI_ARG_2_DF_CMP },
21887   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3,    "__builtin_ia32_comordsd",   IX86_BUILTIN_COMORDSD,   ORDERED,      (int)MULTI_ARG_2_DF_CMP },
21888   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3,    "__builtin_ia32_comunordsd", IX86_BUILTIN_COMUNORDSD, UNORDERED,    (int)MULTI_ARG_2_DF_CMP },
21889 
21890   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3,      "__builtin_ia32_comeqps",    IX86_BUILTIN_COMEQPS,    EQ,           (int)MULTI_ARG_2_SF_CMP },
21891   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3,      "__builtin_ia32_comneps",    IX86_BUILTIN_COMNEPS,    NE,           (int)MULTI_ARG_2_SF_CMP },
21892   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3,      "__builtin_ia32_comneqps",   IX86_BUILTIN_COMNEPS,    NE,           (int)MULTI_ARG_2_SF_CMP },
21893   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3,      "__builtin_ia32_comltps",    IX86_BUILTIN_COMLTPS,    LT,           (int)MULTI_ARG_2_SF_CMP },
21894   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3,      "__builtin_ia32_comleps",    IX86_BUILTIN_COMLEPS,    LE,           (int)MULTI_ARG_2_SF_CMP },
21895   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3,      "__builtin_ia32_comgtps",    IX86_BUILTIN_COMGTPS,    GT,           (int)MULTI_ARG_2_SF_CMP },
21896   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3,      "__builtin_ia32_comgeps",    IX86_BUILTIN_COMGEPS,    GE,           (int)MULTI_ARG_2_SF_CMP },
21897   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3,      "__builtin_ia32_comueqps",   IX86_BUILTIN_COMUEQPS,   UNEQ,         (int)MULTI_ARG_2_SF_CMP },
21898   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3,      "__builtin_ia32_comuneps",   IX86_BUILTIN_COMUNEPS,   LTGT,         (int)MULTI_ARG_2_SF_CMP },
21899   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3,      "__builtin_ia32_comuneqps",  IX86_BUILTIN_COMUNEPS,   LTGT,         (int)MULTI_ARG_2_SF_CMP },
21900   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3,      "__builtin_ia32_comunltps",  IX86_BUILTIN_COMULTPS,   UNLT,         (int)MULTI_ARG_2_SF_CMP },
21901   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3,      "__builtin_ia32_comunleps",  IX86_BUILTIN_COMULEPS,   UNLE,         (int)MULTI_ARG_2_SF_CMP },
21902   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3,      "__builtin_ia32_comungtps",  IX86_BUILTIN_COMUGTPS,   UNGT,         (int)MULTI_ARG_2_SF_CMP },
21903   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3,      "__builtin_ia32_comungeps",  IX86_BUILTIN_COMUGEPS,   UNGE,         (int)MULTI_ARG_2_SF_CMP },
21904   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3,      "__builtin_ia32_comordps",   IX86_BUILTIN_COMORDPS,   ORDERED,      (int)MULTI_ARG_2_SF_CMP },
21905   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3,      "__builtin_ia32_comunordps", IX86_BUILTIN_COMUNORDPS, UNORDERED,    (int)MULTI_ARG_2_SF_CMP },
21906 
21907   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3,      "__builtin_ia32_comeqpd",    IX86_BUILTIN_COMEQPD,    EQ,           (int)MULTI_ARG_2_DF_CMP },
21908   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3,      "__builtin_ia32_comnepd",    IX86_BUILTIN_COMNEPD,    NE,           (int)MULTI_ARG_2_DF_CMP },
21909   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3,      "__builtin_ia32_comneqpd",   IX86_BUILTIN_COMNEPD,    NE,           (int)MULTI_ARG_2_DF_CMP },
21910   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3,      "__builtin_ia32_comltpd",    IX86_BUILTIN_COMLTPD,    LT,           (int)MULTI_ARG_2_DF_CMP },
21911   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3,      "__builtin_ia32_comlepd",    IX86_BUILTIN_COMLEPD,    LE,           (int)MULTI_ARG_2_DF_CMP },
21912   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3,      "__builtin_ia32_comgtpd",    IX86_BUILTIN_COMGTPD,    GT,           (int)MULTI_ARG_2_DF_CMP },
21913   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3,      "__builtin_ia32_comgepd",    IX86_BUILTIN_COMGEPD,    GE,           (int)MULTI_ARG_2_DF_CMP },
21914   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3,      "__builtin_ia32_comueqpd",   IX86_BUILTIN_COMUEQPD,   UNEQ,         (int)MULTI_ARG_2_DF_CMP },
21915   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3,      "__builtin_ia32_comunepd",   IX86_BUILTIN_COMUNEPD,   LTGT,         (int)MULTI_ARG_2_DF_CMP },
21916   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3,      "__builtin_ia32_comuneqpd",  IX86_BUILTIN_COMUNEPD,   LTGT,         (int)MULTI_ARG_2_DF_CMP },
21917   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3,      "__builtin_ia32_comunltpd",  IX86_BUILTIN_COMULTPD,   UNLT,         (int)MULTI_ARG_2_DF_CMP },
21918   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3,      "__builtin_ia32_comunlepd",  IX86_BUILTIN_COMULEPD,   UNLE,         (int)MULTI_ARG_2_DF_CMP },
21919   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3,      "__builtin_ia32_comungtpd",  IX86_BUILTIN_COMUGTPD,   UNGT,         (int)MULTI_ARG_2_DF_CMP },
21920   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3,      "__builtin_ia32_comungepd",  IX86_BUILTIN_COMUGEPD,   UNGE,         (int)MULTI_ARG_2_DF_CMP },
21921   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3,      "__builtin_ia32_comordpd",   IX86_BUILTIN_COMORDPD,   ORDERED,      (int)MULTI_ARG_2_DF_CMP },
21922   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3,      "__builtin_ia32_comunordpd", IX86_BUILTIN_COMUNORDPD, UNORDERED,    (int)MULTI_ARG_2_DF_CMP },
21923 
21924   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3,     "__builtin_ia32_pcomeqb",    IX86_BUILTIN_PCOMEQB,    EQ,           (int)MULTI_ARG_2_QI_CMP },
21925   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3,     "__builtin_ia32_pcomneb",    IX86_BUILTIN_PCOMNEB,    NE,           (int)MULTI_ARG_2_QI_CMP },
21926   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3,     "__builtin_ia32_pcomneqb",   IX86_BUILTIN_PCOMNEB,    NE,           (int)MULTI_ARG_2_QI_CMP },
21927   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3,     "__builtin_ia32_pcomltb",    IX86_BUILTIN_PCOMLTB,    LT,           (int)MULTI_ARG_2_QI_CMP },
21928   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3,     "__builtin_ia32_pcomleb",    IX86_BUILTIN_PCOMLEB,    LE,           (int)MULTI_ARG_2_QI_CMP },
21929   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3,     "__builtin_ia32_pcomgtb",    IX86_BUILTIN_PCOMGTB,    GT,           (int)MULTI_ARG_2_QI_CMP },
21930   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3,     "__builtin_ia32_pcomgeb",    IX86_BUILTIN_PCOMGEB,    GE,           (int)MULTI_ARG_2_QI_CMP },
21931 
21932   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3,      "__builtin_ia32_pcomeqw",    IX86_BUILTIN_PCOMEQW,    EQ,           (int)MULTI_ARG_2_HI_CMP },
21933   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3,      "__builtin_ia32_pcomnew",    IX86_BUILTIN_PCOMNEW,    NE,           (int)MULTI_ARG_2_HI_CMP },
21934   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3,      "__builtin_ia32_pcomneqw",   IX86_BUILTIN_PCOMNEW,    NE,           (int)MULTI_ARG_2_HI_CMP },
21935   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3,      "__builtin_ia32_pcomltw",    IX86_BUILTIN_PCOMLTW,    LT,           (int)MULTI_ARG_2_HI_CMP },
21936   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3,      "__builtin_ia32_pcomlew",    IX86_BUILTIN_PCOMLEW,    LE,           (int)MULTI_ARG_2_HI_CMP },
21937   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3,      "__builtin_ia32_pcomgtw",    IX86_BUILTIN_PCOMGTW,    GT,           (int)MULTI_ARG_2_HI_CMP },
21938   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3,      "__builtin_ia32_pcomgew",    IX86_BUILTIN_PCOMGEW,    GE,           (int)MULTI_ARG_2_HI_CMP },
21939 
21940   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3,      "__builtin_ia32_pcomeqd",    IX86_BUILTIN_PCOMEQD,    EQ,           (int)MULTI_ARG_2_SI_CMP },
21941   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3,      "__builtin_ia32_pcomned",    IX86_BUILTIN_PCOMNED,    NE,           (int)MULTI_ARG_2_SI_CMP },
21942   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3,      "__builtin_ia32_pcomneqd",   IX86_BUILTIN_PCOMNED,    NE,           (int)MULTI_ARG_2_SI_CMP },
21943   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3,      "__builtin_ia32_pcomltd",    IX86_BUILTIN_PCOMLTD,    LT,           (int)MULTI_ARG_2_SI_CMP },
21944   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3,      "__builtin_ia32_pcomled",    IX86_BUILTIN_PCOMLED,    LE,           (int)MULTI_ARG_2_SI_CMP },
21945   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3,      "__builtin_ia32_pcomgtd",    IX86_BUILTIN_PCOMGTD,    GT,           (int)MULTI_ARG_2_SI_CMP },
21946   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3,      "__builtin_ia32_pcomged",    IX86_BUILTIN_PCOMGED,    GE,           (int)MULTI_ARG_2_SI_CMP },
21947 
21948   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3,      "__builtin_ia32_pcomeqq",    IX86_BUILTIN_PCOMEQQ,    EQ,           (int)MULTI_ARG_2_DI_CMP },
21949   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3,      "__builtin_ia32_pcomneq",    IX86_BUILTIN_PCOMNEQ,    NE,           (int)MULTI_ARG_2_DI_CMP },
21950   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3,      "__builtin_ia32_pcomneqq",   IX86_BUILTIN_PCOMNEQ,    NE,           (int)MULTI_ARG_2_DI_CMP },
21951   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3,      "__builtin_ia32_pcomltq",    IX86_BUILTIN_PCOMLTQ,    LT,           (int)MULTI_ARG_2_DI_CMP },
21952   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3,      "__builtin_ia32_pcomleq",    IX86_BUILTIN_PCOMLEQ,    LE,           (int)MULTI_ARG_2_DI_CMP },
21953   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3,      "__builtin_ia32_pcomgtq",    IX86_BUILTIN_PCOMGTQ,    GT,           (int)MULTI_ARG_2_DI_CMP },
21954   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3,      "__builtin_ia32_pcomgeq",    IX86_BUILTIN_PCOMGEQ,    GE,           (int)MULTI_ARG_2_DI_CMP },
21955 
21956   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v16qi3,"__builtin_ia32_pcomequb",   IX86_BUILTIN_PCOMEQUB,   EQ,           (int)MULTI_ARG_2_QI_CMP },
21957   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v16qi3,"__builtin_ia32_pcomneub",   IX86_BUILTIN_PCOMNEUB,   NE,           (int)MULTI_ARG_2_QI_CMP },
21958   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v16qi3,"__builtin_ia32_pcomnequb",  IX86_BUILTIN_PCOMNEUB,   NE,           (int)MULTI_ARG_2_QI_CMP },
21959   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv16qi3, "__builtin_ia32_pcomltub",   IX86_BUILTIN_PCOMLTUB,   LTU,          (int)MULTI_ARG_2_QI_CMP },
21960   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv16qi3, "__builtin_ia32_pcomleub",   IX86_BUILTIN_PCOMLEUB,   LEU,          (int)MULTI_ARG_2_QI_CMP },
21961   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv16qi3, "__builtin_ia32_pcomgtub",   IX86_BUILTIN_PCOMGTUB,   GTU,          (int)MULTI_ARG_2_QI_CMP },
21962   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv16qi3, "__builtin_ia32_pcomgeub",   IX86_BUILTIN_PCOMGEUB,   GEU,          (int)MULTI_ARG_2_QI_CMP },
21963 
21964   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v8hi3, "__builtin_ia32_pcomequw",   IX86_BUILTIN_PCOMEQUW,   EQ,           (int)MULTI_ARG_2_HI_CMP },
21965   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v8hi3, "__builtin_ia32_pcomneuw",   IX86_BUILTIN_PCOMNEUW,   NE,           (int)MULTI_ARG_2_HI_CMP },
21966   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v8hi3, "__builtin_ia32_pcomnequw",  IX86_BUILTIN_PCOMNEUW,   NE,           (int)MULTI_ARG_2_HI_CMP },
21967   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv8hi3,  "__builtin_ia32_pcomltuw",   IX86_BUILTIN_PCOMLTUW,   LTU,          (int)MULTI_ARG_2_HI_CMP },
21968   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv8hi3,  "__builtin_ia32_pcomleuw",   IX86_BUILTIN_PCOMLEUW,   LEU,          (int)MULTI_ARG_2_HI_CMP },
21969   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv8hi3,  "__builtin_ia32_pcomgtuw",   IX86_BUILTIN_PCOMGTUW,   GTU,          (int)MULTI_ARG_2_HI_CMP },
21970   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv8hi3,  "__builtin_ia32_pcomgeuw",   IX86_BUILTIN_PCOMGEUW,   GEU,          (int)MULTI_ARG_2_HI_CMP },
21971 
21972   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v4si3, "__builtin_ia32_pcomequd",   IX86_BUILTIN_PCOMEQUD,   EQ,           (int)MULTI_ARG_2_SI_CMP },
21973   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v4si3, "__builtin_ia32_pcomneud",   IX86_BUILTIN_PCOMNEUD,   NE,           (int)MULTI_ARG_2_SI_CMP },
21974   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v4si3, "__builtin_ia32_pcomnequd",  IX86_BUILTIN_PCOMNEUD,   NE,           (int)MULTI_ARG_2_SI_CMP },
21975   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv4si3,  "__builtin_ia32_pcomltud",   IX86_BUILTIN_PCOMLTUD,   LTU,          (int)MULTI_ARG_2_SI_CMP },
21976   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv4si3,  "__builtin_ia32_pcomleud",   IX86_BUILTIN_PCOMLEUD,   LEU,          (int)MULTI_ARG_2_SI_CMP },
21977   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv4si3,  "__builtin_ia32_pcomgtud",   IX86_BUILTIN_PCOMGTUD,   GTU,          (int)MULTI_ARG_2_SI_CMP },
21978   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv4si3,  "__builtin_ia32_pcomgeud",   IX86_BUILTIN_PCOMGEUD,   GEU,          (int)MULTI_ARG_2_SI_CMP },
21979 
21980   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v2di3, "__builtin_ia32_pcomequq",   IX86_BUILTIN_PCOMEQUQ,   EQ,           (int)MULTI_ARG_2_DI_CMP },
21981   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v2di3, "__builtin_ia32_pcomneuq",   IX86_BUILTIN_PCOMNEUQ,   NE,           (int)MULTI_ARG_2_DI_CMP },
21982   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v2di3, "__builtin_ia32_pcomnequq",  IX86_BUILTIN_PCOMNEUQ,   NE,           (int)MULTI_ARG_2_DI_CMP },
21983   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv2di3,  "__builtin_ia32_pcomltuq",   IX86_BUILTIN_PCOMLTUQ,   LTU,          (int)MULTI_ARG_2_DI_CMP },
21984   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv2di3,  "__builtin_ia32_pcomleuq",   IX86_BUILTIN_PCOMLEUQ,   LEU,          (int)MULTI_ARG_2_DI_CMP },
21985   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv2di3,  "__builtin_ia32_pcomgtuq",   IX86_BUILTIN_PCOMGTUQ,   GTU,          (int)MULTI_ARG_2_DI_CMP },
21986   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv2di3,  "__builtin_ia32_pcomgeuq",   IX86_BUILTIN_PCOMGEUQ,   GEU,          (int)MULTI_ARG_2_DI_CMP },
21987 
21988   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv4sf3,       "__builtin_ia32_comfalsess", IX86_BUILTIN_COMFALSESS, COM_FALSE_S,  (int)MULTI_ARG_2_SF_TF },
21989   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv4sf3,       "__builtin_ia32_comtruess",  IX86_BUILTIN_COMTRUESS,  COM_TRUE_S,   (int)MULTI_ARG_2_SF_TF },
21990   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv4sf3,       "__builtin_ia32_comfalseps", IX86_BUILTIN_COMFALSEPS, COM_FALSE_P,  (int)MULTI_ARG_2_SF_TF },
21991   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv4sf3,       "__builtin_ia32_comtrueps",  IX86_BUILTIN_COMTRUEPS,  COM_TRUE_P,   (int)MULTI_ARG_2_SF_TF },
21992   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv2df3,       "__builtin_ia32_comfalsesd", IX86_BUILTIN_COMFALSESD, COM_FALSE_S,  (int)MULTI_ARG_2_DF_TF },
21993   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv2df3,       "__builtin_ia32_comtruesd",  IX86_BUILTIN_COMTRUESD,  COM_TRUE_S,   (int)MULTI_ARG_2_DF_TF },
21994   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv2df3,       "__builtin_ia32_comfalsepd", IX86_BUILTIN_COMFALSEPD, COM_FALSE_P,  (int)MULTI_ARG_2_DF_TF },
21995   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv2df3,       "__builtin_ia32_comtruepd",  IX86_BUILTIN_COMTRUEPD,  COM_TRUE_P,   (int)MULTI_ARG_2_DF_TF },
21996 
21997   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv16qi3,     "__builtin_ia32_pcomfalseb", IX86_BUILTIN_PCOMFALSEB, PCOM_FALSE,   (int)MULTI_ARG_2_QI_TF },
21998   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv8hi3,      "__builtin_ia32_pcomfalsew", IX86_BUILTIN_PCOMFALSEW, PCOM_FALSE,   (int)MULTI_ARG_2_HI_TF },
21999   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv4si3,      "__builtin_ia32_pcomfalsed", IX86_BUILTIN_PCOMFALSED, PCOM_FALSE,   (int)MULTI_ARG_2_SI_TF },
22000   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv2di3,      "__builtin_ia32_pcomfalseq", IX86_BUILTIN_PCOMFALSEQ, PCOM_FALSE,   (int)MULTI_ARG_2_DI_TF },
22001   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv16qi3,     "__builtin_ia32_pcomfalseub",IX86_BUILTIN_PCOMFALSEUB,PCOM_FALSE,   (int)MULTI_ARG_2_QI_TF },
22002   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv8hi3,      "__builtin_ia32_pcomfalseuw",IX86_BUILTIN_PCOMFALSEUW,PCOM_FALSE,   (int)MULTI_ARG_2_HI_TF },
22003   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv4si3,      "__builtin_ia32_pcomfalseud",IX86_BUILTIN_PCOMFALSEUD,PCOM_FALSE,   (int)MULTI_ARG_2_SI_TF },
22004   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv2di3,      "__builtin_ia32_pcomfalseuq",IX86_BUILTIN_PCOMFALSEUQ,PCOM_FALSE,   (int)MULTI_ARG_2_DI_TF },
22005 
22006   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv16qi3,     "__builtin_ia32_pcomtrueb",  IX86_BUILTIN_PCOMTRUEB,  PCOM_TRUE,    (int)MULTI_ARG_2_QI_TF },
22007   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv8hi3,      "__builtin_ia32_pcomtruew",  IX86_BUILTIN_PCOMTRUEW,  PCOM_TRUE,    (int)MULTI_ARG_2_HI_TF },
22008   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv4si3,      "__builtin_ia32_pcomtrued",  IX86_BUILTIN_PCOMTRUED,  PCOM_TRUE,    (int)MULTI_ARG_2_SI_TF },
22009   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv2di3,      "__builtin_ia32_pcomtrueq",  IX86_BUILTIN_PCOMTRUEQ,  PCOM_TRUE,    (int)MULTI_ARG_2_DI_TF },
22010   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv16qi3,     "__builtin_ia32_pcomtrueub", IX86_BUILTIN_PCOMTRUEUB, PCOM_TRUE,    (int)MULTI_ARG_2_QI_TF },
22011   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv8hi3,      "__builtin_ia32_pcomtrueuw", IX86_BUILTIN_PCOMTRUEUW, PCOM_TRUE,    (int)MULTI_ARG_2_HI_TF },
22012   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv4si3,      "__builtin_ia32_pcomtrueud", IX86_BUILTIN_PCOMTRUEUD, PCOM_TRUE,    (int)MULTI_ARG_2_SI_TF },
22013   { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv2di3,      "__builtin_ia32_pcomtrueuq", IX86_BUILTIN_PCOMTRUEUQ, PCOM_TRUE,    (int)MULTI_ARG_2_DI_TF },
22014 };
22015 
22016 /* Set up all the MMX/SSE builtins, even builtins for instructions that are not
22017    in the current target ISA to allow the user to compile particular modules
22018    with different target specific options that differ from the command line
22019    options.  */
22020 static void
22021 ix86_init_mmx_sse_builtins (void)
22022 {
22023   const struct builtin_description * d;
22024   size_t i;
22025 
22026   tree V16QI_type_node = build_vector_type_for_mode (char_type_node, V16QImode);
22027   tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
22028   tree V1DI_type_node
22029     = build_vector_type_for_mode (long_long_integer_type_node, V1DImode);
22030   tree V2SF_type_node = build_vector_type_for_mode (float_type_node, V2SFmode);
22031   tree V2DI_type_node
22032     = build_vector_type_for_mode (long_long_integer_type_node, V2DImode);
22033   tree V2DF_type_node = build_vector_type_for_mode (double_type_node, V2DFmode);
22034   tree V4SF_type_node = build_vector_type_for_mode (float_type_node, V4SFmode);
22035   tree V4SI_type_node = build_vector_type_for_mode (intSI_type_node, V4SImode);
22036   tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
22037   tree V8QI_type_node = build_vector_type_for_mode (char_type_node, V8QImode);
22038   tree V8HI_type_node = build_vector_type_for_mode (intHI_type_node, V8HImode);
22039 
22040   tree pchar_type_node = build_pointer_type (char_type_node);
22041   tree pcchar_type_node
22042     = build_pointer_type (build_type_variant (char_type_node, 1, 0));
22043   tree pfloat_type_node = build_pointer_type (float_type_node);
22044   tree pcfloat_type_node
22045     = build_pointer_type (build_type_variant (float_type_node, 1, 0));
22046   tree pv2sf_type_node = build_pointer_type (V2SF_type_node);
22047   tree pcv2sf_type_node
22048     = build_pointer_type (build_type_variant (V2SF_type_node, 1, 0));
22049   tree pv2di_type_node = build_pointer_type (V2DI_type_node);
22050   tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
22051 
22052   /* Comparisons.  */
22053   tree int_ftype_v4sf_v4sf
22054     = build_function_type_list (integer_type_node,
22055                                 V4SF_type_node, V4SF_type_node, NULL_TREE);
22056   tree v4si_ftype_v4sf_v4sf
22057     = build_function_type_list (V4SI_type_node,
22058                                 V4SF_type_node, V4SF_type_node, NULL_TREE);
22059   /* MMX/SSE/integer conversions.  */
22060   tree int_ftype_v4sf
22061     = build_function_type_list (integer_type_node,
22062                                 V4SF_type_node, NULL_TREE);
22063   tree int64_ftype_v4sf
22064     = build_function_type_list (long_long_integer_type_node,
22065                                 V4SF_type_node, NULL_TREE);
22066   tree int_ftype_v8qi
22067     = build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE);
22068   tree v4sf_ftype_v4sf_int
22069     = build_function_type_list (V4SF_type_node,
22070                                 V4SF_type_node, integer_type_node, NULL_TREE);
22071   tree v4sf_ftype_v4sf_int64
22072     = build_function_type_list (V4SF_type_node,
22073                                 V4SF_type_node, long_long_integer_type_node,
22074                                 NULL_TREE);
22075   tree v4sf_ftype_v4sf_v2si
22076     = build_function_type_list (V4SF_type_node,
22077                                 V4SF_type_node, V2SI_type_node, NULL_TREE);
22078 
22079   /* Miscellaneous.  */
22080   tree v8qi_ftype_v4hi_v4hi
22081     = build_function_type_list (V8QI_type_node,
22082                                 V4HI_type_node, V4HI_type_node, NULL_TREE);
22083   tree v4hi_ftype_v2si_v2si
22084     = build_function_type_list (V4HI_type_node,
22085                                 V2SI_type_node, V2SI_type_node, NULL_TREE);
22086   tree v4sf_ftype_v4sf_v4sf_int
22087     = build_function_type_list (V4SF_type_node,
22088                                 V4SF_type_node, V4SF_type_node,
22089                                 integer_type_node, NULL_TREE);
22090   tree v2si_ftype_v4hi_v4hi
22091     = build_function_type_list (V2SI_type_node,
22092                                 V4HI_type_node, V4HI_type_node, NULL_TREE);
22093   tree v4hi_ftype_v4hi_int
22094     = build_function_type_list (V4HI_type_node,
22095                                 V4HI_type_node, integer_type_node, NULL_TREE);
22096   tree v2si_ftype_v2si_int
22097     = build_function_type_list (V2SI_type_node,
22098                                 V2SI_type_node, integer_type_node, NULL_TREE);
22099   tree v1di_ftype_v1di_int
22100     = build_function_type_list (V1DI_type_node,
22101                                 V1DI_type_node, integer_type_node, NULL_TREE);
22102 
22103   tree void_ftype_void
22104     = build_function_type (void_type_node, void_list_node);
22105   tree void_ftype_unsigned
22106     = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE);
22107   tree void_ftype_unsigned_unsigned
22108     = build_function_type_list (void_type_node, unsigned_type_node,
22109                                 unsigned_type_node, NULL_TREE);
22110   tree void_ftype_pcvoid_unsigned_unsigned
22111     = build_function_type_list (void_type_node, const_ptr_type_node,
22112                                 unsigned_type_node, unsigned_type_node,
22113                                 NULL_TREE);
22114   tree unsigned_ftype_void
22115     = build_function_type (unsigned_type_node, void_list_node);
22116   tree v2si_ftype_v4sf
22117     = build_function_type_list (V2SI_type_node, V4SF_type_node, NULL_TREE);
22118   /* Loads/stores.  */
22119   tree void_ftype_v8qi_v8qi_pchar
22120     = build_function_type_list (void_type_node,
22121                                 V8QI_type_node, V8QI_type_node,
22122                                 pchar_type_node, NULL_TREE);
22123   tree v4sf_ftype_pcfloat
22124     = build_function_type_list (V4SF_type_node, pcfloat_type_node, NULL_TREE);
22125   tree v4sf_ftype_v4sf_pcv2sf
22126     = build_function_type_list (V4SF_type_node,
22127                                 V4SF_type_node, pcv2sf_type_node, NULL_TREE);
22128   tree void_ftype_pv2sf_v4sf
22129     = build_function_type_list (void_type_node,
22130                                 pv2sf_type_node, V4SF_type_node, NULL_TREE);
22131   tree void_ftype_pfloat_v4sf
22132     = build_function_type_list (void_type_node,
22133                                 pfloat_type_node, V4SF_type_node, NULL_TREE);
22134   tree void_ftype_pdi_di
22135     = build_function_type_list (void_type_node,
22136                                 pdi_type_node, long_long_unsigned_type_node,
22137                                 NULL_TREE);
22138   tree void_ftype_pv2di_v2di
22139     = build_function_type_list (void_type_node,
22140                                 pv2di_type_node, V2DI_type_node, NULL_TREE);
22141   /* Normal vector unops.  */
22142   tree v4sf_ftype_v4sf
22143     = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
22144   tree v16qi_ftype_v16qi
22145     = build_function_type_list (V16QI_type_node, V16QI_type_node, NULL_TREE);
22146   tree v8hi_ftype_v8hi
22147     = build_function_type_list (V8HI_type_node, V8HI_type_node, NULL_TREE);
22148   tree v4si_ftype_v4si
22149     = build_function_type_list (V4SI_type_node, V4SI_type_node, NULL_TREE);
22150   tree v8qi_ftype_v8qi
22151     = build_function_type_list (V8QI_type_node, V8QI_type_node, NULL_TREE);
22152   tree v4hi_ftype_v4hi
22153     = build_function_type_list (V4HI_type_node, V4HI_type_node, NULL_TREE);
22154 
22155   /* Normal vector binops.  */
22156   tree v4sf_ftype_v4sf_v4sf
22157     = build_function_type_list (V4SF_type_node,
22158                                 V4SF_type_node, V4SF_type_node, NULL_TREE);
22159   tree v8qi_ftype_v8qi_v8qi
22160     = build_function_type_list (V8QI_type_node,
22161                                 V8QI_type_node, V8QI_type_node, NULL_TREE);
22162   tree v4hi_ftype_v4hi_v4hi
22163     = build_function_type_list (V4HI_type_node,
22164                                 V4HI_type_node, V4HI_type_node, NULL_TREE);
22165   tree v2si_ftype_v2si_v2si
22166     = build_function_type_list (V2SI_type_node,
22167                                 V2SI_type_node, V2SI_type_node, NULL_TREE);
22168   tree v1di_ftype_v1di_v1di
22169     = build_function_type_list (V1DI_type_node,
22170                                 V1DI_type_node, V1DI_type_node, NULL_TREE);
22171   tree v1di_ftype_v1di_v1di_int
22172     = build_function_type_list (V1DI_type_node,
22173                                 V1DI_type_node, V1DI_type_node,
22174                                 integer_type_node, NULL_TREE);
22175   tree v2si_ftype_v2sf
22176     = build_function_type_list (V2SI_type_node, V2SF_type_node, NULL_TREE);
22177   tree v2sf_ftype_v2si
22178     = build_function_type_list (V2SF_type_node, V2SI_type_node, NULL_TREE);
22179   tree v2si_ftype_v2si
22180     = build_function_type_list (V2SI_type_node, V2SI_type_node, NULL_TREE);
22181   tree v2sf_ftype_v2sf
22182     = build_function_type_list (V2SF_type_node, V2SF_type_node, NULL_TREE);
22183   tree v2sf_ftype_v2sf_v2sf
22184     = build_function_type_list (V2SF_type_node,
22185                                 V2SF_type_node, V2SF_type_node, NULL_TREE);
22186   tree v2si_ftype_v2sf_v2sf
22187     = build_function_type_list (V2SI_type_node,
22188                                 V2SF_type_node, V2SF_type_node, NULL_TREE);
22189   tree pint_type_node    = build_pointer_type (integer_type_node);
22190   tree pdouble_type_node = build_pointer_type (double_type_node);
22191   tree pcdouble_type_node = build_pointer_type (
22192                                 build_type_variant (double_type_node, 1, 0));
22193   tree int_ftype_v2df_v2df
22194     = build_function_type_list (integer_type_node,
22195                                 V2DF_type_node, V2DF_type_node, NULL_TREE);
22196 
22197   tree void_ftype_pcvoid
22198     = build_function_type_list (void_type_node, const_ptr_type_node, NULL_TREE);
22199   tree v4sf_ftype_v4si
22200     = build_function_type_list (V4SF_type_node, V4SI_type_node, NULL_TREE);
22201   tree v4si_ftype_v4sf
22202     = build_function_type_list (V4SI_type_node, V4SF_type_node, NULL_TREE);
22203   tree v2df_ftype_v4si
22204     = build_function_type_list (V2DF_type_node, V4SI_type_node, NULL_TREE);
22205   tree v4si_ftype_v2df
22206     = build_function_type_list (V4SI_type_node, V2DF_type_node, NULL_TREE);
22207   tree v4si_ftype_v2df_v2df
22208     = build_function_type_list (V4SI_type_node,
22209                                 V2DF_type_node, V2DF_type_node, NULL_TREE);
22210   tree v2si_ftype_v2df
22211     = build_function_type_list (V2SI_type_node, V2DF_type_node, NULL_TREE);
22212   tree v4sf_ftype_v2df
22213     = build_function_type_list (V4SF_type_node, V2DF_type_node, NULL_TREE);
22214   tree v2df_ftype_v2si
22215     = build_function_type_list (V2DF_type_node, V2SI_type_node, NULL_TREE);
22216   tree v2df_ftype_v4sf
22217     = build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE);
22218   tree int_ftype_v2df
22219     = build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE);
22220   tree int64_ftype_v2df
22221     = build_function_type_list (long_long_integer_type_node,
22222                                 V2DF_type_node, NULL_TREE);
22223   tree v2df_ftype_v2df_int
22224     = build_function_type_list (V2DF_type_node,
22225                                 V2DF_type_node, integer_type_node, NULL_TREE);
22226   tree v2df_ftype_v2df_int64
22227     = build_function_type_list (V2DF_type_node,
22228                                 V2DF_type_node, long_long_integer_type_node,
22229                                 NULL_TREE);
22230   tree v4sf_ftype_v4sf_v2df
22231     = build_function_type_list (V4SF_type_node,
22232                                 V4SF_type_node, V2DF_type_node, NULL_TREE);
22233   tree v2df_ftype_v2df_v4sf
22234     = build_function_type_list (V2DF_type_node,
22235                                 V2DF_type_node, V4SF_type_node, NULL_TREE);
22236   tree v2df_ftype_v2df_v2df_int
22237     = build_function_type_list (V2DF_type_node,
22238                                 V2DF_type_node, V2DF_type_node,
22239                                 integer_type_node,
22240                                 NULL_TREE);
22241   tree v2df_ftype_v2df_pcdouble
22242     = build_function_type_list (V2DF_type_node,
22243                                 V2DF_type_node, pcdouble_type_node, NULL_TREE);
22244   tree void_ftype_pdouble_v2df
22245     = build_function_type_list (void_type_node,
22246                                 pdouble_type_node, V2DF_type_node, NULL_TREE);
22247   tree void_ftype_pint_int
22248     = build_function_type_list (void_type_node,
22249                                 pint_type_node, integer_type_node, NULL_TREE);
22250   tree void_ftype_v16qi_v16qi_pchar
22251     = build_function_type_list (void_type_node,
22252                                 V16QI_type_node, V16QI_type_node,
22253                                 pchar_type_node, NULL_TREE);
22254   tree v2df_ftype_pcdouble
22255     = build_function_type_list (V2DF_type_node, pcdouble_type_node, NULL_TREE);
22256   tree v2df_ftype_v2df_v2df
22257     = build_function_type_list (V2DF_type_node,
22258                                 V2DF_type_node, V2DF_type_node, NULL_TREE);
22259   tree v16qi_ftype_v16qi_v16qi
22260     = build_function_type_list (V16QI_type_node,
22261                                 V16QI_type_node, V16QI_type_node, NULL_TREE);
22262   tree v8hi_ftype_v8hi_v8hi
22263     = build_function_type_list (V8HI_type_node,
22264                                 V8HI_type_node, V8HI_type_node, NULL_TREE);
22265   tree v4si_ftype_v4si_v4si
22266     = build_function_type_list (V4SI_type_node,
22267                                 V4SI_type_node, V4SI_type_node, NULL_TREE);
22268   tree v2di_ftype_v2di_v2di
22269     = build_function_type_list (V2DI_type_node,
22270                                 V2DI_type_node, V2DI_type_node, NULL_TREE);
22271   tree v2di_ftype_v2df_v2df
22272     = build_function_type_list (V2DI_type_node,
22273                                 V2DF_type_node, V2DF_type_node, NULL_TREE);
22274   tree v2df_ftype_v2df
22275     = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
22276   tree v2di_ftype_v2di_int
22277     = build_function_type_list (V2DI_type_node,
22278                                 V2DI_type_node, integer_type_node, NULL_TREE);
22279   tree v2di_ftype_v2di_v2di_int
22280     = build_function_type_list (V2DI_type_node, V2DI_type_node,
22281                                 V2DI_type_node, integer_type_node, NULL_TREE);
22282   tree v4si_ftype_v4si_int
22283     = build_function_type_list (V4SI_type_node,
22284                                 V4SI_type_node, integer_type_node, NULL_TREE);
22285   tree v8hi_ftype_v8hi_int
22286     = build_function_type_list (V8HI_type_node,
22287                                 V8HI_type_node, integer_type_node, NULL_TREE);
22288   tree v4si_ftype_v8hi_v8hi
22289     = build_function_type_list (V4SI_type_node,
22290                                 V8HI_type_node, V8HI_type_node, NULL_TREE);
22291   tree v1di_ftype_v8qi_v8qi
22292     = build_function_type_list (V1DI_type_node,
22293                                 V8QI_type_node, V8QI_type_node, NULL_TREE);
22294   tree v1di_ftype_v2si_v2si
22295     = build_function_type_list (V1DI_type_node,
22296                                 V2SI_type_node, V2SI_type_node, NULL_TREE);
22297   tree v2di_ftype_v16qi_v16qi
22298     = build_function_type_list (V2DI_type_node,
22299                                 V16QI_type_node, V16QI_type_node, NULL_TREE);
22300   tree v2di_ftype_v4si_v4si
22301     = build_function_type_list (V2DI_type_node,
22302                                 V4SI_type_node, V4SI_type_node, NULL_TREE);
22303   tree int_ftype_v16qi
22304     = build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE);
22305   tree v16qi_ftype_pcchar
22306     = build_function_type_list (V16QI_type_node, pcchar_type_node, NULL_TREE);
22307   tree void_ftype_pchar_v16qi
22308     = build_function_type_list (void_type_node,
22309                                 pchar_type_node, V16QI_type_node, NULL_TREE);
22310 
22311   tree v2di_ftype_v2di_unsigned_unsigned
22312     = build_function_type_list (V2DI_type_node, V2DI_type_node,
22313                                 unsigned_type_node, unsigned_type_node,
22314                                 NULL_TREE);
22315   tree v2di_ftype_v2di_v2di_unsigned_unsigned
22316     = build_function_type_list (V2DI_type_node, V2DI_type_node, V2DI_type_node,
22317                                 unsigned_type_node, unsigned_type_node,
22318                                 NULL_TREE);
22319   tree v2di_ftype_v2di_v16qi
22320     = build_function_type_list (V2DI_type_node, V2DI_type_node, V16QI_type_node,
22321                                 NULL_TREE);
22322   tree v2df_ftype_v2df_v2df_v2df
22323     = build_function_type_list (V2DF_type_node,
22324                                 V2DF_type_node, V2DF_type_node,
22325                                 V2DF_type_node, NULL_TREE);
22326   tree v4sf_ftype_v4sf_v4sf_v4sf
22327     = build_function_type_list (V4SF_type_node,
22328                                 V4SF_type_node, V4SF_type_node,
22329                                 V4SF_type_node, NULL_TREE);
22330   tree v8hi_ftype_v16qi
22331     = build_function_type_list (V8HI_type_node, V16QI_type_node,
22332                                 NULL_TREE);
22333   tree v4si_ftype_v16qi
22334     = build_function_type_list (V4SI_type_node, V16QI_type_node,
22335                                 NULL_TREE);
22336   tree v2di_ftype_v16qi
22337     = build_function_type_list (V2DI_type_node, V16QI_type_node,
22338                                 NULL_TREE);
22339   tree v4si_ftype_v8hi
22340     = build_function_type_list (V4SI_type_node, V8HI_type_node,
22341                                 NULL_TREE);
22342   tree v2di_ftype_v8hi
22343     = build_function_type_list (V2DI_type_node, V8HI_type_node,
22344                                 NULL_TREE);
22345   tree v2di_ftype_v4si
22346     = build_function_type_list (V2DI_type_node, V4SI_type_node,
22347                                 NULL_TREE);
22348   tree v2di_ftype_pv2di
22349     = build_function_type_list (V2DI_type_node, pv2di_type_node,
22350                                 NULL_TREE);
22351   tree v16qi_ftype_v16qi_v16qi_int
22352     = build_function_type_list (V16QI_type_node, V16QI_type_node,
22353                                 V16QI_type_node, integer_type_node,
22354                                 NULL_TREE);
22355   tree v16qi_ftype_v16qi_v16qi_v16qi
22356     = build_function_type_list (V16QI_type_node, V16QI_type_node,
22357                                 V16QI_type_node, V16QI_type_node,
22358                                 NULL_TREE);
22359   tree v8hi_ftype_v8hi_v8hi_int
22360     = build_function_type_list (V8HI_type_node, V8HI_type_node,
22361                                 V8HI_type_node, integer_type_node,
22362                                 NULL_TREE);
22363   tree v4si_ftype_v4si_v4si_int
22364     = build_function_type_list (V4SI_type_node, V4SI_type_node,
22365                                 V4SI_type_node, integer_type_node,
22366                                 NULL_TREE);
22367   tree int_ftype_v2di_v2di
22368     = build_function_type_list (integer_type_node,
22369                                 V2DI_type_node, V2DI_type_node,
22370                                 NULL_TREE);
22371   tree int_ftype_v16qi_int_v16qi_int_int
22372     = build_function_type_list (integer_type_node,
22373                                 V16QI_type_node,
22374                                 integer_type_node,
22375                                 V16QI_type_node,
22376                                 integer_type_node,
22377                                 integer_type_node,
22378                                 NULL_TREE);
22379   tree v16qi_ftype_v16qi_int_v16qi_int_int
22380     = build_function_type_list (V16QI_type_node,
22381                                 V16QI_type_node,
22382                                 integer_type_node,
22383                                 V16QI_type_node,
22384                                 integer_type_node,
22385                                 integer_type_node,
22386                                 NULL_TREE);
22387   tree int_ftype_v16qi_v16qi_int
22388     = build_function_type_list (integer_type_node,
22389                                 V16QI_type_node,
22390                                 V16QI_type_node,
22391                                 integer_type_node,
22392                                 NULL_TREE);
22393 
22394   /* SSE5 instructions */
22395   tree v2di_ftype_v2di_v2di_v2di
22396     = build_function_type_list (V2DI_type_node,
22397                                 V2DI_type_node,
22398                                 V2DI_type_node,
22399                                 V2DI_type_node,
22400                                 NULL_TREE);
22401 
22402   tree v4si_ftype_v4si_v4si_v4si
22403     = build_function_type_list (V4SI_type_node,
22404                                 V4SI_type_node,
22405                                 V4SI_type_node,
22406                                 V4SI_type_node,
22407                                 NULL_TREE);
22408 
22409   tree v4si_ftype_v4si_v4si_v2di
22410     = build_function_type_list (V4SI_type_node,
22411                                 V4SI_type_node,
22412                                 V4SI_type_node,
22413                                 V2DI_type_node,
22414                                 NULL_TREE);
22415 
22416   tree v8hi_ftype_v8hi_v8hi_v8hi
22417     = build_function_type_list (V8HI_type_node,
22418                                 V8HI_type_node,
22419                                 V8HI_type_node,
22420                                 V8HI_type_node,
22421                                 NULL_TREE);
22422 
22423   tree v8hi_ftype_v8hi_v8hi_v4si
22424     = build_function_type_list (V8HI_type_node,
22425                                 V8HI_type_node,
22426                                 V8HI_type_node,
22427                                 V4SI_type_node,
22428                                 NULL_TREE);
22429 
22430   tree v2df_ftype_v2df_v2df_v16qi
22431     = build_function_type_list (V2DF_type_node,
22432                                 V2DF_type_node,
22433                                 V2DF_type_node,
22434                                 V16QI_type_node,
22435                                 NULL_TREE);
22436 
22437   tree v4sf_ftype_v4sf_v4sf_v16qi
22438     = build_function_type_list (V4SF_type_node,
22439                                 V4SF_type_node,
22440                                 V4SF_type_node,
22441                                 V16QI_type_node,
22442                                 NULL_TREE);
22443 
22444   tree v2di_ftype_v2di_si
22445     = build_function_type_list (V2DI_type_node,
22446                                 V2DI_type_node,
22447                                 integer_type_node,
22448                                 NULL_TREE);
22449 
22450   tree v4si_ftype_v4si_si
22451     = build_function_type_list (V4SI_type_node,
22452                                 V4SI_type_node,
22453                                 integer_type_node,
22454                                 NULL_TREE);
22455 
22456   tree v8hi_ftype_v8hi_si
22457     = build_function_type_list (V8HI_type_node,
22458                                 V8HI_type_node,
22459                                 integer_type_node,
22460                                 NULL_TREE);
22461 
22462   tree v16qi_ftype_v16qi_si
22463     = build_function_type_list (V16QI_type_node,
22464                                 V16QI_type_node,
22465                                 integer_type_node,
22466                                 NULL_TREE);
22467   tree v4sf_ftype_v4hi
22468     = build_function_type_list (V4SF_type_node,
22469                                 V4HI_type_node,
22470                                 NULL_TREE);
22471 
22472   tree v4hi_ftype_v4sf
22473     = build_function_type_list (V4HI_type_node,
22474                                 V4SF_type_node,
22475                                 NULL_TREE);
22476 
22477   tree v2di_ftype_v2di
22478     = build_function_type_list (V2DI_type_node, V2DI_type_node, NULL_TREE);
22479 
22480   tree v16qi_ftype_v8hi_v8hi
22481     = build_function_type_list (V16QI_type_node,
22482                                 V8HI_type_node, V8HI_type_node,
22483                                 NULL_TREE);
22484   tree v8hi_ftype_v4si_v4si
22485     = build_function_type_list (V8HI_type_node,
22486                                 V4SI_type_node, V4SI_type_node,
22487                                 NULL_TREE);
22488   tree v8hi_ftype_v16qi_v16qi 
22489     = build_function_type_list (V8HI_type_node,
22490                                 V16QI_type_node, V16QI_type_node,
22491                                 NULL_TREE);
22492   tree v4hi_ftype_v8qi_v8qi 
22493     = build_function_type_list (V4HI_type_node,
22494                                 V8QI_type_node, V8QI_type_node,
22495                                 NULL_TREE);
22496   tree unsigned_ftype_unsigned_uchar
22497     = build_function_type_list (unsigned_type_node,
22498                                 unsigned_type_node,
22499                                 unsigned_char_type_node,
22500                                 NULL_TREE);
22501   tree unsigned_ftype_unsigned_ushort
22502     = build_function_type_list (unsigned_type_node,
22503                                 unsigned_type_node,
22504                                 short_unsigned_type_node,
22505                                 NULL_TREE);
22506   tree unsigned_ftype_unsigned_unsigned
22507     = build_function_type_list (unsigned_type_node,
22508                                 unsigned_type_node,
22509                                 unsigned_type_node,
22510                                 NULL_TREE);
22511   tree uint64_ftype_uint64_uint64
22512     = build_function_type_list (long_long_unsigned_type_node,
22513                                 long_long_unsigned_type_node,
22514                                 long_long_unsigned_type_node,
22515                                 NULL_TREE);
22516   tree float_ftype_float
22517     = build_function_type_list (float_type_node,
22518                                 float_type_node,
22519                                 NULL_TREE);
22520 
22521   /* AVX builtins  */
22522   tree V32QI_type_node = build_vector_type_for_mode (char_type_node,
22523                                                      V32QImode);
22524   tree V8SI_type_node = build_vector_type_for_mode (intSI_type_node,
22525                                                     V8SImode);
22526   tree V8SF_type_node = build_vector_type_for_mode (float_type_node,
22527                                                     V8SFmode);
22528   tree V4DI_type_node = build_vector_type_for_mode (long_long_integer_type_node,
22529                                                     V4DImode);
22530   tree V4DF_type_node = build_vector_type_for_mode (double_type_node,
22531                                                     V4DFmode);
22532   tree v8sf_ftype_v8sf
22533     = build_function_type_list (V8SF_type_node,
22534                                 V8SF_type_node,
22535                                 NULL_TREE);
22536   tree v8si_ftype_v8sf
22537     = build_function_type_list (V8SI_type_node,
22538                                 V8SF_type_node,
22539                                 NULL_TREE);
22540   tree v8sf_ftype_v8si
22541     = build_function_type_list (V8SF_type_node,
22542                                 V8SI_type_node,
22543                                 NULL_TREE);
22544   tree v4si_ftype_v4df
22545     = build_function_type_list (V4SI_type_node,
22546                                 V4DF_type_node,
22547                                 NULL_TREE);
22548   tree v4df_ftype_v4df
22549     = build_function_type_list (V4DF_type_node,
22550                                 V4DF_type_node,
22551                                 NULL_TREE);
22552   tree v4df_ftype_v4si
22553     = build_function_type_list (V4DF_type_node,
22554                                 V4SI_type_node,
22555                                 NULL_TREE);
22556   tree v4df_ftype_v4sf
22557     = build_function_type_list (V4DF_type_node,
22558                                 V4SF_type_node,
22559                                 NULL_TREE);
22560   tree v4sf_ftype_v4df
22561     = build_function_type_list (V4SF_type_node,
22562                                 V4DF_type_node,
22563                                 NULL_TREE);
22564   tree v8sf_ftype_v8sf_v8sf
22565     = build_function_type_list (V8SF_type_node,
22566                                 V8SF_type_node, V8SF_type_node,
22567                                 NULL_TREE);
22568   tree v4df_ftype_v4df_v4df
22569     = build_function_type_list (V4DF_type_node,
22570                                 V4DF_type_node, V4DF_type_node,
22571                                 NULL_TREE);
22572   tree v8sf_ftype_v8sf_int
22573     = build_function_type_list (V8SF_type_node,
22574                                 V8SF_type_node, integer_type_node,
22575                                 NULL_TREE);
22576   tree v4si_ftype_v8si_int
22577     = build_function_type_list (V4SI_type_node,
22578                                 V8SI_type_node, integer_type_node,
22579                                 NULL_TREE);
22580   tree v4df_ftype_v4df_int
22581     = build_function_type_list (V4DF_type_node,
22582                                 V4DF_type_node, integer_type_node,
22583                                 NULL_TREE);
22584   tree v4sf_ftype_v8sf_int
22585     = build_function_type_list (V4SF_type_node,
22586                                 V8SF_type_node, integer_type_node,
22587                                 NULL_TREE);
22588   tree v2df_ftype_v4df_int
22589     = build_function_type_list (V2DF_type_node,
22590                                 V4DF_type_node, integer_type_node,
22591                                 NULL_TREE);
22592   tree v8sf_ftype_v8sf_v8sf_int
22593     = build_function_type_list (V8SF_type_node,
22594                                 V8SF_type_node, V8SF_type_node,
22595                                 integer_type_node,
22596                                 NULL_TREE);
22597   tree v8sf_ftype_v8sf_v8sf_v8sf
22598     = build_function_type_list (V8SF_type_node,
22599                                 V8SF_type_node, V8SF_type_node,
22600                                 V8SF_type_node,
22601                                 NULL_TREE);
22602   tree v4df_ftype_v4df_v4df_v4df
22603     = build_function_type_list (V4DF_type_node,
22604                                 V4DF_type_node, V4DF_type_node,
22605                                 V4DF_type_node,
22606                                 NULL_TREE);
22607   tree v8si_ftype_v8si_v8si_int
22608     = build_function_type_list (V8SI_type_node,
22609                                 V8SI_type_node, V8SI_type_node,
22610                                 integer_type_node,
22611                                 NULL_TREE);
22612   tree v4df_ftype_v4df_v4df_int
22613     = build_function_type_list (V4DF_type_node,
22614                                 V4DF_type_node, V4DF_type_node,
22615                                 integer_type_node,
22616                                 NULL_TREE);
22617   tree v8sf_ftype_pcfloat
22618     = build_function_type_list (V8SF_type_node,
22619                                 pcfloat_type_node,
22620                                 NULL_TREE);
22621   tree v4df_ftype_pcdouble
22622     = build_function_type_list (V4DF_type_node,
22623                                 pcdouble_type_node,
22624                                 NULL_TREE);
22625   tree pcv4sf_type_node
22626     = build_pointer_type (build_type_variant (V4SF_type_node, 1, 0));
22627   tree pcv2df_type_node
22628     = build_pointer_type (build_type_variant (V2DF_type_node, 1, 0));
22629   tree v8sf_ftype_pcv4sf
22630     = build_function_type_list (V8SF_type_node,
22631                                 pcv4sf_type_node,
22632                                 NULL_TREE);
22633   tree v4df_ftype_pcv2df
22634     = build_function_type_list (V4DF_type_node,
22635                                 pcv2df_type_node,
22636                                 NULL_TREE);
22637   tree v32qi_ftype_pcchar
22638     = build_function_type_list (V32QI_type_node,
22639                                 pcchar_type_node,
22640                                 NULL_TREE);
22641   tree void_ftype_pchar_v32qi
22642     = build_function_type_list (void_type_node,
22643                                 pchar_type_node, V32QI_type_node,
22644                                 NULL_TREE);
22645   tree v8si_ftype_v8si_v4si_int
22646     = build_function_type_list (V8SI_type_node,
22647                                 V8SI_type_node, V4SI_type_node,
22648                                 integer_type_node,
22649                                 NULL_TREE);
22650   tree pv4di_type_node = build_pointer_type (V4DI_type_node);
22651   tree void_ftype_pv4di_v4di
22652     = build_function_type_list (void_type_node,
22653                                 pv4di_type_node, V4DI_type_node,
22654                                 NULL_TREE);
22655   tree v8sf_ftype_v8sf_v4sf_int
22656     = build_function_type_list (V8SF_type_node,
22657                                 V8SF_type_node, V4SF_type_node,
22658                                 integer_type_node,
22659                                 NULL_TREE);
22660   tree v4df_ftype_v4df_v2df_int
22661     = build_function_type_list (V4DF_type_node,
22662                                 V4DF_type_node, V2DF_type_node,
22663                                 integer_type_node,
22664                                 NULL_TREE);
22665   tree void_ftype_pfloat_v8sf
22666     = build_function_type_list (void_type_node,
22667                                 pfloat_type_node, V8SF_type_node,
22668                                 NULL_TREE);
22669   tree void_ftype_pdouble_v4df
22670     = build_function_type_list (void_type_node,
22671                                 pdouble_type_node, V4DF_type_node,
22672                                 NULL_TREE);
22673   tree pv8sf_type_node = build_pointer_type (V8SF_type_node);
22674   tree pv4sf_type_node = build_pointer_type (V4SF_type_node);
22675   tree pv4df_type_node = build_pointer_type (V4DF_type_node);
22676   tree pv2df_type_node = build_pointer_type (V2DF_type_node);
22677   tree pcv8sf_type_node
22678     = build_pointer_type (build_type_variant (V8SF_type_node, 1, 0));
22679   tree pcv4df_type_node
22680     = build_pointer_type (build_type_variant (V4DF_type_node, 1, 0));
22681   tree v8sf_ftype_pcv8sf_v8sf
22682     = build_function_type_list (V8SF_type_node,
22683                                 pcv8sf_type_node, V8SF_type_node,
22684                                 NULL_TREE);
22685   tree v4df_ftype_pcv4df_v4df
22686     = build_function_type_list (V4DF_type_node,
22687                                 pcv4df_type_node, V4DF_type_node,
22688                                 NULL_TREE);
22689   tree v4sf_ftype_pcv4sf_v4sf
22690     = build_function_type_list (V4SF_type_node,
22691                                 pcv4sf_type_node, V4SF_type_node,
22692                                 NULL_TREE);
22693   tree v2df_ftype_pcv2df_v2df
22694     = build_function_type_list (V2DF_type_node,
22695                                 pcv2df_type_node, V2DF_type_node,
22696                                 NULL_TREE);
22697   tree void_ftype_pv8sf_v8sf_v8sf
22698     = build_function_type_list (void_type_node,
22699                                 pv8sf_type_node, V8SF_type_node,
22700                                 V8SF_type_node,
22701                                 NULL_TREE);
22702   tree void_ftype_pv4df_v4df_v4df
22703     = build_function_type_list (void_type_node,
22704                                 pv4df_type_node, V4DF_type_node,
22705                                 V4DF_type_node,
22706                                 NULL_TREE);
22707   tree void_ftype_pv4sf_v4sf_v4sf
22708     = build_function_type_list (void_type_node,
22709                                 pv4sf_type_node, V4SF_type_node,
22710                                 V4SF_type_node,
22711                                 NULL_TREE);
22712   tree void_ftype_pv2df_v2df_v2df
22713     = build_function_type_list (void_type_node,
22714                                 pv2df_type_node, V2DF_type_node,
22715                                 V2DF_type_node,
22716                                 NULL_TREE);
22717   tree v4df_ftype_v2df
22718     = build_function_type_list (V4DF_type_node,
22719                                 V2DF_type_node,
22720                                 NULL_TREE);
22721   tree v8sf_ftype_v4sf
22722     = build_function_type_list (V8SF_type_node,
22723                                 V4SF_type_node,
22724                                 NULL_TREE);
22725   tree v8si_ftype_v4si
22726     = build_function_type_list (V8SI_type_node,
22727                                 V4SI_type_node,
22728                                 NULL_TREE);
22729   tree v2df_ftype_v4df
22730     = build_function_type_list (V2DF_type_node,
22731                                 V4DF_type_node,
22732                                 NULL_TREE);
22733   tree v4sf_ftype_v8sf
22734     = build_function_type_list (V4SF_type_node,
22735                                 V8SF_type_node,
22736                                 NULL_TREE);
22737   tree v4si_ftype_v8si
22738     = build_function_type_list (V4SI_type_node,
22739                                 V8SI_type_node,
22740                                 NULL_TREE);
22741   tree int_ftype_v4df
22742     = build_function_type_list (integer_type_node,
22743                                 V4DF_type_node,
22744                                 NULL_TREE);
22745   tree int_ftype_v8sf
22746     = build_function_type_list (integer_type_node,
22747                                 V8SF_type_node,
22748                                 NULL_TREE);
22749   tree int_ftype_v8sf_v8sf
22750     = build_function_type_list (integer_type_node,
22751                                 V8SF_type_node, V8SF_type_node,
22752                                 NULL_TREE);
22753   tree int_ftype_v4di_v4di
22754     = build_function_type_list (integer_type_node,
22755                                 V4DI_type_node, V4DI_type_node,
22756                                 NULL_TREE);
22757   tree int_ftype_v4df_v4df
22758     = build_function_type_list (integer_type_node,
22759                                 V4DF_type_node, V4DF_type_node,
22760                                 NULL_TREE);
22761   tree v8sf_ftype_v8sf_v8si
22762     = build_function_type_list (V8SF_type_node,
22763                                 V8SF_type_node, V8SI_type_node,
22764                                 NULL_TREE);
22765   tree v4df_ftype_v4df_v4di
22766     = build_function_type_list (V4DF_type_node,
22767                                 V4DF_type_node, V4DI_type_node,
22768                                 NULL_TREE);
22769   tree v4sf_ftype_v4sf_v4si
22770     = build_function_type_list (V4SF_type_node,
22771                                 V4SF_type_node, V4SI_type_node, NULL_TREE);
22772   tree v2df_ftype_v2df_v2di
22773     = build_function_type_list (V2DF_type_node,
22774                                 V2DF_type_node, V2DI_type_node, NULL_TREE);
22775 
22776   tree ftype;
22777 
22778   /* Add all special builtins with variable number of operands.  */
22779   for (i = 0, d = bdesc_special_args;
22780        i < ARRAY_SIZE (bdesc_special_args);
22781        i++, d++)
22782     {
22783       tree type;
22784 
22785       if (d->name == 0)
22786         continue;
22787 
22788       switch ((enum ix86_special_builtin_type) d->flag)
22789         {
22790         case VOID_FTYPE_VOID:
22791           type = void_ftype_void;
22792           break;
22793         case V32QI_FTYPE_PCCHAR:
22794           type = v32qi_ftype_pcchar;
22795           break;
22796         case V16QI_FTYPE_PCCHAR:
22797           type = v16qi_ftype_pcchar;
22798           break;
22799         case V8SF_FTYPE_PCV4SF:
22800           type = v8sf_ftype_pcv4sf;
22801           break;
22802         case V8SF_FTYPE_PCFLOAT:
22803           type = v8sf_ftype_pcfloat;
22804           break;
22805         case V4DF_FTYPE_PCV2DF:
22806           type = v4df_ftype_pcv2df;
22807           break;
22808         case V4DF_FTYPE_PCDOUBLE:
22809           type = v4df_ftype_pcdouble;
22810           break;
22811         case V4SF_FTYPE_PCFLOAT:
22812           type = v4sf_ftype_pcfloat;
22813           break;
22814         case V2DI_FTYPE_PV2DI:
22815           type = v2di_ftype_pv2di;
22816           break;
22817         case V2DF_FTYPE_PCDOUBLE:
22818           type = v2df_ftype_pcdouble;
22819           break;
22820         case V8SF_FTYPE_PCV8SF_V8SF:
22821           type = v8sf_ftype_pcv8sf_v8sf;
22822           break;
22823         case V4DF_FTYPE_PCV4DF_V4DF:
22824           type = v4df_ftype_pcv4df_v4df;
22825           break;
22826         case V4SF_FTYPE_V4SF_PCV2SF:
22827           type = v4sf_ftype_v4sf_pcv2sf;
22828           break;
22829         case V4SF_FTYPE_PCV4SF_V4SF:
22830           type = v4sf_ftype_pcv4sf_v4sf;
22831           break;
22832         case V2DF_FTYPE_V2DF_PCDOUBLE:
22833           type = v2df_ftype_v2df_pcdouble;
22834           break;
22835         case V2DF_FTYPE_PCV2DF_V2DF:
22836           type = v2df_ftype_pcv2df_v2df;
22837           break;
22838         case VOID_FTYPE_PV2SF_V4SF:
22839           type = void_ftype_pv2sf_v4sf;
22840           break;
22841         case VOID_FTYPE_PV4DI_V4DI:
22842           type = void_ftype_pv4di_v4di;
22843           break;
22844         case VOID_FTYPE_PV2DI_V2DI:
22845           type = void_ftype_pv2di_v2di;
22846           break;
22847         case VOID_FTYPE_PCHAR_V32QI:
22848           type = void_ftype_pchar_v32qi;
22849           break;
22850         case VOID_FTYPE_PCHAR_V16QI:
22851           type = void_ftype_pchar_v16qi;
22852           break;
22853         case VOID_FTYPE_PFLOAT_V8SF:
22854           type = void_ftype_pfloat_v8sf;
22855           break;
22856         case VOID_FTYPE_PFLOAT_V4SF:
22857           type = void_ftype_pfloat_v4sf;
22858           break;
22859         case VOID_FTYPE_PDOUBLE_V4DF:
22860           type = void_ftype_pdouble_v4df;
22861           break;
22862         case VOID_FTYPE_PDOUBLE_V2DF:
22863           type = void_ftype_pdouble_v2df;
22864           break;
22865         case VOID_FTYPE_PDI_DI:
22866           type = void_ftype_pdi_di;
22867           break;
22868         case VOID_FTYPE_PINT_INT:
22869           type = void_ftype_pint_int;
22870           break;
22871         case VOID_FTYPE_PV8SF_V8SF_V8SF:
22872           type = void_ftype_pv8sf_v8sf_v8sf;
22873           break;
22874         case VOID_FTYPE_PV4DF_V4DF_V4DF:
22875           type = void_ftype_pv4df_v4df_v4df;
22876           break;
22877         case VOID_FTYPE_PV4SF_V4SF_V4SF:
22878           type = void_ftype_pv4sf_v4sf_v4sf;
22879           break;
22880         case VOID_FTYPE_PV2DF_V2DF_V2DF:
22881           type = void_ftype_pv2df_v2df_v2df;
22882           break;
22883         default:
22884           gcc_unreachable ();
22885         }
22886 
22887       def_builtin (d->mask, d->name, type, d->code);
22888     }
22889 
22890   /* Add all builtins with variable number of operands.  */
22891   for (i = 0, d = bdesc_args;
22892        i < ARRAY_SIZE (bdesc_args);
22893        i++, d++)
22894     {
22895       tree type;
22896 
22897       if (d->name == 0)
22898         continue;
22899 
22900       switch ((enum ix86_builtin_type) d->flag)
22901         {
22902         case FLOAT_FTYPE_FLOAT:
22903           type = float_ftype_float;
22904           break;
22905         case INT_FTYPE_V8SF_V8SF_PTEST:
22906           type = int_ftype_v8sf_v8sf;
22907           break;
22908         case INT_FTYPE_V4DI_V4DI_PTEST:
22909           type = int_ftype_v4di_v4di;
22910           break;
22911         case INT_FTYPE_V4DF_V4DF_PTEST:
22912           type = int_ftype_v4df_v4df;
22913           break;
22914         case INT_FTYPE_V4SF_V4SF_PTEST:
22915           type = int_ftype_v4sf_v4sf;
22916           break;
22917         case INT_FTYPE_V2DI_V2DI_PTEST:
22918           type = int_ftype_v2di_v2di;
22919           break;
22920         case INT_FTYPE_V2DF_V2DF_PTEST:
22921           type = int_ftype_v2df_v2df;
22922           break;
22923         case INT64_FTYPE_V4SF:
22924           type = int64_ftype_v4sf;
22925           break;
22926         case INT64_FTYPE_V2DF:
22927           type = int64_ftype_v2df;
22928           break;
22929         case INT_FTYPE_V16QI:
22930           type = int_ftype_v16qi;
22931           break;
22932         case INT_FTYPE_V8QI:
22933           type = int_ftype_v8qi;
22934           break;
22935         case INT_FTYPE_V8SF:
22936           type = int_ftype_v8sf;
22937           break;
22938         case INT_FTYPE_V4DF:
22939           type = int_ftype_v4df;
22940           break;
22941         case INT_FTYPE_V4SF:
22942           type = int_ftype_v4sf;
22943           break;
22944         case INT_FTYPE_V2DF:
22945           type = int_ftype_v2df;
22946           break;
22947         case V16QI_FTYPE_V16QI:
22948           type = v16qi_ftype_v16qi;
22949           break;
22950         case V8SI_FTYPE_V8SF:
22951           type = v8si_ftype_v8sf;
22952           break;
22953         case V8SI_FTYPE_V4SI:
22954           type = v8si_ftype_v4si;
22955           break;
22956         case V8HI_FTYPE_V8HI:
22957           type = v8hi_ftype_v8hi;
22958           break;
22959         case V8HI_FTYPE_V16QI:
22960           type = v8hi_ftype_v16qi;
22961           break;
22962         case V8QI_FTYPE_V8QI:
22963           type = v8qi_ftype_v8qi;
22964           break;
22965         case V8SF_FTYPE_V8SF:
22966           type = v8sf_ftype_v8sf;
22967           break;
22968         case V8SF_FTYPE_V8SI:
22969           type = v8sf_ftype_v8si;
22970           break;
22971         case V8SF_FTYPE_V4SF:
22972           type = v8sf_ftype_v4sf;
22973           break;
22974         case V4SI_FTYPE_V4DF:
22975           type = v4si_ftype_v4df;
22976           break;
22977         case V4SI_FTYPE_V4SI:
22978           type = v4si_ftype_v4si;
22979           break;
22980         case V4SI_FTYPE_V16QI:
22981           type = v4si_ftype_v16qi;
22982           break;
22983         case V4SI_FTYPE_V8SI:
22984           type = v4si_ftype_v8si;
22985           break;
22986         case V4SI_FTYPE_V8HI:
22987           type = v4si_ftype_v8hi;
22988           break;
22989         case V4SI_FTYPE_V4SF:
22990           type = v4si_ftype_v4sf;
22991           break;
22992         case V4SI_FTYPE_V2DF:
22993           type = v4si_ftype_v2df;
22994           break;
22995         case V4HI_FTYPE_V4HI:
22996           type = v4hi_ftype_v4hi;
22997           break;
22998         case V4DF_FTYPE_V4DF:
22999           type = v4df_ftype_v4df;
23000           break;
23001         case V4DF_FTYPE_V4SI:
23002           type = v4df_ftype_v4si;
23003           break;
23004         case V4DF_FTYPE_V4SF:
23005           type = v4df_ftype_v4sf;
23006           break;
23007         case V4DF_FTYPE_V2DF:
23008           type = v4df_ftype_v2df;
23009           break;
23010         case V4SF_FTYPE_V4SF:
23011         case V4SF_FTYPE_V4SF_VEC_MERGE:
23012           type = v4sf_ftype_v4sf;
23013           break;
23014         case V4SF_FTYPE_V8SF:
23015           type = v4sf_ftype_v8sf;
23016           break;
23017         case V4SF_FTYPE_V4SI:
23018           type = v4sf_ftype_v4si;
23019           break;
23020         case V4SF_FTYPE_V4DF:
23021           type = v4sf_ftype_v4df;
23022           break;
23023         case V4SF_FTYPE_V2DF:
23024           type = v4sf_ftype_v2df;
23025           break;
23026         case V2DI_FTYPE_V2DI:
23027           type = v2di_ftype_v2di;
23028           break;
23029         case V2DI_FTYPE_V16QI:
23030           type = v2di_ftype_v16qi;
23031           break;
23032         case V2DI_FTYPE_V8HI:
23033           type = v2di_ftype_v8hi;
23034           break;
23035         case V2DI_FTYPE_V4SI:
23036           type = v2di_ftype_v4si;
23037           break;
23038         case V2SI_FTYPE_V2SI:
23039           type = v2si_ftype_v2si;
23040           break;
23041         case V2SI_FTYPE_V4SF:
23042           type = v2si_ftype_v4sf;
23043           break;
23044         case V2SI_FTYPE_V2DF:
23045           type = v2si_ftype_v2df;
23046           break;
23047         case V2SI_FTYPE_V2SF:
23048           type = v2si_ftype_v2sf;
23049           break;
23050         case V2DF_FTYPE_V4DF:
23051           type = v2df_ftype_v4df;
23052           break;
23053         case V2DF_FTYPE_V4SF:
23054           type = v2df_ftype_v4sf;
23055           break;
23056         case V2DF_FTYPE_V2DF:
23057         case V2DF_FTYPE_V2DF_VEC_MERGE:
23058           type = v2df_ftype_v2df;
23059           break;
23060         case V2DF_FTYPE_V2SI:
23061           type = v2df_ftype_v2si;
23062           break;
23063         case V2DF_FTYPE_V4SI:
23064           type = v2df_ftype_v4si;
23065           break;
23066         case V2SF_FTYPE_V2SF:
23067           type = v2sf_ftype_v2sf;
23068           break;
23069         case V2SF_FTYPE_V2SI:
23070           type = v2sf_ftype_v2si;
23071           break;
23072         case V16QI_FTYPE_V16QI_V16QI:
23073           type = v16qi_ftype_v16qi_v16qi;
23074           break;
23075         case V16QI_FTYPE_V8HI_V8HI:
23076           type = v16qi_ftype_v8hi_v8hi;
23077           break;
23078         case V8QI_FTYPE_V8QI_V8QI:
23079           type = v8qi_ftype_v8qi_v8qi;
23080           break;
23081         case V8QI_FTYPE_V4HI_V4HI:
23082           type = v8qi_ftype_v4hi_v4hi;
23083           break;
23084         case V8HI_FTYPE_V8HI_V8HI:
23085         case V8HI_FTYPE_V8HI_V8HI_COUNT:
23086           type = v8hi_ftype_v8hi_v8hi;
23087           break;
23088         case V8HI_FTYPE_V16QI_V16QI:
23089           type = v8hi_ftype_v16qi_v16qi;
23090           break;
23091         case V8HI_FTYPE_V4SI_V4SI:
23092           type = v8hi_ftype_v4si_v4si;
23093           break;
23094         case V8HI_FTYPE_V8HI_SI_COUNT:
23095           type = v8hi_ftype_v8hi_int;
23096           break;
23097         case V8SF_FTYPE_V8SF_V8SF:
23098           type = v8sf_ftype_v8sf_v8sf;
23099           break;
23100         case V8SF_FTYPE_V8SF_V8SI:
23101           type = v8sf_ftype_v8sf_v8si;
23102           break;
23103         case V4SI_FTYPE_V4SI_V4SI:
23104         case V4SI_FTYPE_V4SI_V4SI_COUNT:
23105           type = v4si_ftype_v4si_v4si;
23106           break;
23107         case V4SI_FTYPE_V8HI_V8HI:
23108           type = v4si_ftype_v8hi_v8hi;
23109           break;
23110         case V4SI_FTYPE_V4SF_V4SF:
23111           type = v4si_ftype_v4sf_v4sf;
23112           break;
23113         case V4SI_FTYPE_V2DF_V2DF:
23114           type = v4si_ftype_v2df_v2df;
23115           break;
23116         case V4SI_FTYPE_V4SI_SI_COUNT:
23117           type = v4si_ftype_v4si_int;
23118           break;
23119         case V4HI_FTYPE_V4HI_V4HI:
23120         case V4HI_FTYPE_V4HI_V4HI_COUNT:
23121           type = v4hi_ftype_v4hi_v4hi;
23122           break;
23123         case V4HI_FTYPE_V8QI_V8QI:
23124           type = v4hi_ftype_v8qi_v8qi;
23125           break;
23126         case V4HI_FTYPE_V2SI_V2SI:
23127           type = v4hi_ftype_v2si_v2si;
23128           break;
23129         case V4HI_FTYPE_V4HI_SI_COUNT:
23130           type = v4hi_ftype_v4hi_int;
23131           break;
23132         case V4DF_FTYPE_V4DF_V4DF:
23133           type = v4df_ftype_v4df_v4df;
23134           break;
23135         case V4DF_FTYPE_V4DF_V4DI:
23136           type = v4df_ftype_v4df_v4di;
23137           break;
23138         case V4SF_FTYPE_V4SF_V4SF:
23139         case V4SF_FTYPE_V4SF_V4SF_SWAP:
23140           type = v4sf_ftype_v4sf_v4sf;
23141           break;
23142         case V4SF_FTYPE_V4SF_V4SI:
23143           type = v4sf_ftype_v4sf_v4si;
23144           break;
23145         case V4SF_FTYPE_V4SF_V2SI:
23146           type = v4sf_ftype_v4sf_v2si;
23147           break;
23148         case V4SF_FTYPE_V4SF_V2DF:
23149           type = v4sf_ftype_v4sf_v2df;
23150           break;
23151         case V4SF_FTYPE_V4SF_DI:
23152           type = v4sf_ftype_v4sf_int64;
23153           break;
23154         case V4SF_FTYPE_V4SF_SI:
23155           type = v4sf_ftype_v4sf_int;
23156           break;
23157         case V2DI_FTYPE_V2DI_V2DI:
23158         case V2DI_FTYPE_V2DI_V2DI_COUNT:
23159           type = v2di_ftype_v2di_v2di;
23160           break;
23161         case V2DI_FTYPE_V16QI_V16QI:
23162           type = v2di_ftype_v16qi_v16qi;
23163           break;
23164         case V2DI_FTYPE_V4SI_V4SI:
23165           type = v2di_ftype_v4si_v4si;
23166           break;
23167         case V2DI_FTYPE_V2DI_V16QI:
23168           type = v2di_ftype_v2di_v16qi;
23169           break;
23170         case V2DI_FTYPE_V2DF_V2DF:
23171           type = v2di_ftype_v2df_v2df;
23172           break;
23173         case V2DI_FTYPE_V2DI_SI_COUNT:
23174           type = v2di_ftype_v2di_int;
23175           break;
23176         case V2SI_FTYPE_V2SI_V2SI:
23177         case V2SI_FTYPE_V2SI_V2SI_COUNT:
23178           type = v2si_ftype_v2si_v2si;
23179           break;
23180         case V2SI_FTYPE_V4HI_V4HI:
23181           type = v2si_ftype_v4hi_v4hi;
23182           break;
23183         case V2SI_FTYPE_V2SF_V2SF:
23184           type = v2si_ftype_v2sf_v2sf;
23185           break;
23186         case V2SI_FTYPE_V2SI_SI_COUNT:
23187           type = v2si_ftype_v2si_int;
23188           break;
23189         case V2DF_FTYPE_V2DF_V2DF:
23190         case V2DF_FTYPE_V2DF_V2DF_SWAP:
23191           type = v2df_ftype_v2df_v2df;
23192           break;
23193         case V2DF_FTYPE_V2DF_V4SF:
23194           type = v2df_ftype_v2df_v4sf;
23195           break;
23196         case V2DF_FTYPE_V2DF_V2DI:
23197           type = v2df_ftype_v2df_v2di;
23198           break;
23199         case V2DF_FTYPE_V2DF_DI:
23200           type = v2df_ftype_v2df_int64;
23201           break;
23202         case V2DF_FTYPE_V2DF_SI:
23203           type = v2df_ftype_v2df_int;
23204           break;
23205         case V2SF_FTYPE_V2SF_V2SF:
23206           type = v2sf_ftype_v2sf_v2sf;
23207           break;
23208         case V1DI_FTYPE_V1DI_V1DI:
23209         case V1DI_FTYPE_V1DI_V1DI_COUNT:
23210           type = v1di_ftype_v1di_v1di;
23211           break;
23212         case V1DI_FTYPE_V8QI_V8QI:
23213           type = v1di_ftype_v8qi_v8qi;
23214           break;
23215         case V1DI_FTYPE_V2SI_V2SI:
23216           type = v1di_ftype_v2si_v2si;
23217           break;
23218         case V1DI_FTYPE_V1DI_SI_COUNT:
23219           type = v1di_ftype_v1di_int;
23220           break;
23221         case UINT64_FTYPE_UINT64_UINT64:
23222           type = uint64_ftype_uint64_uint64;
23223           break;
23224         case UINT_FTYPE_UINT_UINT:
23225           type = unsigned_ftype_unsigned_unsigned;
23226           break;
23227         case UINT_FTYPE_UINT_USHORT:
23228           type = unsigned_ftype_unsigned_ushort;
23229           break;
23230         case UINT_FTYPE_UINT_UCHAR:
23231           type = unsigned_ftype_unsigned_uchar;
23232           break;
23233         case V8HI_FTYPE_V8HI_INT:
23234           type = v8hi_ftype_v8hi_int;
23235           break;
23236         case V8SF_FTYPE_V8SF_INT:
23237           type = v8sf_ftype_v8sf_int;
23238           break;
23239         case V4SI_FTYPE_V4SI_INT:
23240           type = v4si_ftype_v4si_int;
23241           break;
23242         case V4SI_FTYPE_V8SI_INT:
23243           type = v4si_ftype_v8si_int;
23244           break;
23245         case V4HI_FTYPE_V4HI_INT:
23246           type = v4hi_ftype_v4hi_int;
23247           break;
23248         case V4DF_FTYPE_V4DF_INT:
23249           type = v4df_ftype_v4df_int;
23250           break;
23251         case V4SF_FTYPE_V4SF_INT:
23252           type = v4sf_ftype_v4sf_int;
23253           break;
23254         case V4SF_FTYPE_V8SF_INT:
23255           type = v4sf_ftype_v8sf_int;
23256           break;
23257         case V2DI_FTYPE_V2DI_INT:
23258         case V2DI2TI_FTYPE_V2DI_INT:
23259           type = v2di_ftype_v2di_int;
23260           break;
23261         case V2DF_FTYPE_V2DF_INT:
23262           type = v2df_ftype_v2df_int;
23263           break;
23264         case V2DF_FTYPE_V4DF_INT:
23265           type = v2df_ftype_v4df_int;
23266           break;
23267         case V16QI_FTYPE_V16QI_V16QI_V16QI:
23268           type = v16qi_ftype_v16qi_v16qi_v16qi;
23269           break;
23270         case V8SF_FTYPE_V8SF_V8SF_V8SF:
23271           type = v8sf_ftype_v8sf_v8sf_v8sf;
23272           break;
23273         case V4DF_FTYPE_V4DF_V4DF_V4DF:
23274           type = v4df_ftype_v4df_v4df_v4df;
23275           break;
23276         case V4SF_FTYPE_V4SF_V4SF_V4SF:
23277           type = v4sf_ftype_v4sf_v4sf_v4sf;
23278           break;
23279         case V2DF_FTYPE_V2DF_V2DF_V2DF:
23280           type = v2df_ftype_v2df_v2df_v2df;
23281           break;
23282         case V16QI_FTYPE_V16QI_V16QI_INT:
23283           type = v16qi_ftype_v16qi_v16qi_int;
23284           break;
23285         case V8SI_FTYPE_V8SI_V8SI_INT:
23286           type = v8si_ftype_v8si_v8si_int;
23287           break;
23288         case V8SI_FTYPE_V8SI_V4SI_INT:
23289           type = v8si_ftype_v8si_v4si_int;
23290           break;
23291         case V8HI_FTYPE_V8HI_V8HI_INT:
23292           type = v8hi_ftype_v8hi_v8hi_int;
23293           break;
23294         case V8SF_FTYPE_V8SF_V8SF_INT:
23295           type = v8sf_ftype_v8sf_v8sf_int;
23296           break;
23297         case V8SF_FTYPE_V8SF_V4SF_INT:
23298           type = v8sf_ftype_v8sf_v4sf_int;
23299           break;
23300         case V4SI_FTYPE_V4SI_V4SI_INT:
23301           type = v4si_ftype_v4si_v4si_int;
23302           break;
23303         case V4DF_FTYPE_V4DF_V4DF_INT:
23304           type = v4df_ftype_v4df_v4df_int;
23305           break;
23306         case V4DF_FTYPE_V4DF_V2DF_INT:
23307           type = v4df_ftype_v4df_v2df_int;
23308           break;
23309         case V4SF_FTYPE_V4SF_V4SF_INT:
23310           type = v4sf_ftype_v4sf_v4sf_int;
23311           break;
23312         case V2DI_FTYPE_V2DI_V2DI_INT:
23313         case V2DI2TI_FTYPE_V2DI_V2DI_INT:
23314           type = v2di_ftype_v2di_v2di_int;
23315           break;
23316         case V2DF_FTYPE_V2DF_V2DF_INT:
23317           type = v2df_ftype_v2df_v2df_int;
23318           break;
23319         case V2DI_FTYPE_V2DI_UINT_UINT:
23320           type = v2di_ftype_v2di_unsigned_unsigned;
23321           break;
23322         case V2DI_FTYPE_V2DI_V2DI_UINT_UINT:
23323           type = v2di_ftype_v2di_v2di_unsigned_unsigned;
23324           break;
23325         case V1DI2DI_FTYPE_V1DI_V1DI_INT:
23326           type = v1di_ftype_v1di_v1di_int;
23327           break;
23328         default:
23329           gcc_unreachable ();
23330         }
23331 
23332       def_builtin_const (d->mask, d->name, type, d->code);
23333     }
23334 
23335   /* pcmpestr[im] insns.  */
23336   for (i = 0, d = bdesc_pcmpestr;
23337        i < ARRAY_SIZE (bdesc_pcmpestr);
23338        i++, d++)
23339     {
23340       if (d->code == IX86_BUILTIN_PCMPESTRM128)
23341         ftype = v16qi_ftype_v16qi_int_v16qi_int_int;
23342       else
23343         ftype = int_ftype_v16qi_int_v16qi_int_int;
23344       def_builtin_const (d->mask, d->name, ftype, d->code);
23345     }
23346 
23347   /* pcmpistr[im] insns.  */
23348   for (i = 0, d = bdesc_pcmpistr;
23349        i < ARRAY_SIZE (bdesc_pcmpistr);
23350        i++, d++)
23351     {
23352       if (d->code == IX86_BUILTIN_PCMPISTRM128)
23353         ftype = v16qi_ftype_v16qi_v16qi_int;
23354       else
23355         ftype = int_ftype_v16qi_v16qi_int;
23356       def_builtin_const (d->mask, d->name, ftype, d->code);
23357     }
23358 
23359   /* comi/ucomi insns.  */
23360   for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
23361     if (d->mask == OPTION_MASK_ISA_SSE2)
23362       def_builtin_const (d->mask, d->name, int_ftype_v2df_v2df, d->code);
23363     else
23364       def_builtin_const (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
23365 
23366   /* SSE */
23367   def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
23368   def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
23369 
23370   /* SSE or 3DNow!A */
23371   def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
23372 
23373   /* SSE2 */
23374   def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU);
23375 
23376   def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_clflush", void_ftype_pcvoid, IX86_BUILTIN_CLFLUSH);
23377   x86_mfence = def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
23378 
23379   /* SSE3.  */
23380   def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_monitor", void_ftype_pcvoid_unsigned_unsigned, IX86_BUILTIN_MONITOR);
23381   def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_mwait", void_ftype_unsigned_unsigned, IX86_BUILTIN_MWAIT);
23382 
23383   /* AES */
23384   def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenc128", v2di_ftype_v2di_v2di, IX86_BUILTIN_AESENC128);
23385   def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenclast128", v2di_ftype_v2di_v2di, IX86_BUILTIN_AESENCLAST128);
23386   def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdec128", v2di_ftype_v2di_v2di, IX86_BUILTIN_AESDEC128);
23387   def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdeclast128", v2di_ftype_v2di_v2di, IX86_BUILTIN_AESDECLAST128);
23388   def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesimc128", v2di_ftype_v2di, IX86_BUILTIN_AESIMC128);
23389   def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aeskeygenassist128", v2di_ftype_v2di_int, IX86_BUILTIN_AESKEYGENASSIST128);
23390 
23391   /* PCLMUL */
23392   def_builtin_const (OPTION_MASK_ISA_PCLMUL, "__builtin_ia32_pclmulqdq128", v2di_ftype_v2di_v2di_int, IX86_BUILTIN_PCLMULQDQ128);
23393 
23394   /* AVX */
23395   def_builtin (OPTION_MASK_ISA_AVX, "__builtin_ia32_vzeroupper", void_ftype_void,
23396                TARGET_64BIT ? IX86_BUILTIN_VZEROUPPER_REX64 : IX86_BUILTIN_VZEROUPPER);
23397 
23398   /* Access to the vec_init patterns.  */
23399   ftype = build_function_type_list (V2SI_type_node, integer_type_node,
23400                                     integer_type_node, NULL_TREE);
23401   def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v2si", ftype, IX86_BUILTIN_VEC_INIT_V2SI);
23402 
23403   ftype = build_function_type_list (V4HI_type_node, short_integer_type_node,
23404                                     short_integer_type_node,
23405                                     short_integer_type_node,
23406                                     short_integer_type_node, NULL_TREE);
23407   def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v4hi", ftype, IX86_BUILTIN_VEC_INIT_V4HI);
23408 
23409   ftype = build_function_type_list (V8QI_type_node, char_type_node,
23410                                     char_type_node, char_type_node,
23411                                     char_type_node, char_type_node,
23412                                     char_type_node, char_type_node,
23413                                     char_type_node, NULL_TREE);
23414   def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v8qi", ftype, IX86_BUILTIN_VEC_INIT_V8QI);
23415 
23416   /* Access to the vec_extract patterns.  */
23417   ftype = build_function_type_list (double_type_node, V2DF_type_node,
23418                                     integer_type_node, NULL_TREE);
23419   def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2df", ftype, IX86_BUILTIN_VEC_EXT_V2DF);
23420 
23421   ftype = build_function_type_list (long_long_integer_type_node,
23422                                     V2DI_type_node, integer_type_node,
23423                                     NULL_TREE);
23424   def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2di", ftype, IX86_BUILTIN_VEC_EXT_V2DI);
23425 
23426   ftype = build_function_type_list (float_type_node, V4SF_type_node,
23427                                     integer_type_node, NULL_TREE);
23428   def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_vec_ext_v4sf", ftype, IX86_BUILTIN_VEC_EXT_V4SF);
23429 
23430   ftype = build_function_type_list (intSI_type_node, V4SI_type_node,
23431                                     integer_type_node, NULL_TREE);
23432   def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v4si", ftype, IX86_BUILTIN_VEC_EXT_V4SI);
23433 
23434   ftype = build_function_type_list (intHI_type_node, V8HI_type_node,
23435                                     integer_type_node, NULL_TREE);
23436   def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v8hi", ftype, IX86_BUILTIN_VEC_EXT_V8HI);
23437 
23438   ftype = build_function_type_list (intHI_type_node, V4HI_type_node,
23439                                     integer_type_node, NULL_TREE);
23440   def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_vec_ext_v4hi", ftype, IX86_BUILTIN_VEC_EXT_V4HI);
23441 
23442   ftype = build_function_type_list (intSI_type_node, V2SI_type_node,
23443                                     integer_type_node, NULL_TREE);
23444   def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_ext_v2si", ftype, IX86_BUILTIN_VEC_EXT_V2SI);
23445 
23446   ftype = build_function_type_list (intQI_type_node, V16QI_type_node,
23447                                     integer_type_node, NULL_TREE);
23448   def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v16qi", ftype, IX86_BUILTIN_VEC_EXT_V16QI);
23449 
23450   /* Access to the vec_set patterns.  */
23451   ftype = build_function_type_list (V2DI_type_node, V2DI_type_node,
23452                                     intDI_type_node,
23453                                     integer_type_node, NULL_TREE);
23454   def_builtin_const (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_64BIT, "__builtin_ia32_vec_set_v2di", ftype, IX86_BUILTIN_VEC_SET_V2DI);
23455 
23456   ftype = build_function_type_list (V4SF_type_node, V4SF_type_node,
23457                                     float_type_node,
23458                                     integer_type_node, NULL_TREE);
23459   def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4sf", ftype, IX86_BUILTIN_VEC_SET_V4SF);
23460 
23461   ftype = build_function_type_list (V4SI_type_node, V4SI_type_node,
23462                                     intSI_type_node,
23463                                     integer_type_node, NULL_TREE);
23464   def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4si", ftype, IX86_BUILTIN_VEC_SET_V4SI);
23465 
23466   ftype = build_function_type_list (V8HI_type_node, V8HI_type_node,
23467                                     intHI_type_node,
23468                                     integer_type_node, NULL_TREE);
23469   def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_set_v8hi", ftype, IX86_BUILTIN_VEC_SET_V8HI);
23470 
23471   ftype = build_function_type_list (V4HI_type_node, V4HI_type_node,
23472                                     intHI_type_node,
23473                                     integer_type_node, NULL_TREE);
23474   def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_vec_set_v4hi", ftype, IX86_BUILTIN_VEC_SET_V4HI);
23475 
23476   ftype = build_function_type_list (V16QI_type_node, V16QI_type_node,
23477                                     intQI_type_node,
23478                                     integer_type_node, NULL_TREE);
23479   def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v16qi", ftype, IX86_BUILTIN_VEC_SET_V16QI);
23480 
23481   /* Add SSE5 multi-arg argument instructions */
23482   for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
23483     {
23484       tree mtype = NULL_TREE;
23485 
23486       if (d->name == 0)
23487         continue;
23488 
23489       switch ((enum multi_arg_type)d->flag)
23490         {
23491         case MULTI_ARG_3_SF:     mtype = v4sf_ftype_v4sf_v4sf_v4sf;     break;
23492         case MULTI_ARG_3_DF:     mtype = v2df_ftype_v2df_v2df_v2df;     break;
23493         case MULTI_ARG_3_DI:     mtype = v2di_ftype_v2di_v2di_v2di;     break;
23494         case MULTI_ARG_3_SI:     mtype = v4si_ftype_v4si_v4si_v4si;     break;
23495         case MULTI_ARG_3_SI_DI:  mtype = v4si_ftype_v4si_v4si_v2di;     break;
23496         case MULTI_ARG_3_HI:     mtype = v8hi_ftype_v8hi_v8hi_v8hi;     break;
23497         case MULTI_ARG_3_HI_SI:  mtype = v8hi_ftype_v8hi_v8hi_v4si;     break;
23498         case MULTI_ARG_3_QI:     mtype = v16qi_ftype_v16qi_v16qi_v16qi; break;
23499         case MULTI_ARG_3_PERMPS: mtype = v4sf_ftype_v4sf_v4sf_v16qi;    break;
23500         case MULTI_ARG_3_PERMPD: mtype = v2df_ftype_v2df_v2df_v16qi;    break;
23501         case MULTI_ARG_2_SF:     mtype = v4sf_ftype_v4sf_v4sf;          break;
23502         case MULTI_ARG_2_DF:     mtype = v2df_ftype_v2df_v2df;          break;
23503         case MULTI_ARG_2_DI:     mtype = v2di_ftype_v2di_v2di;          break;
23504         case MULTI_ARG_2_SI:     mtype = v4si_ftype_v4si_v4si;          break;
23505         case MULTI_ARG_2_HI:     mtype = v8hi_ftype_v8hi_v8hi;          break;
23506         case MULTI_ARG_2_QI:     mtype = v16qi_ftype_v16qi_v16qi;       break;
23507         case MULTI_ARG_2_DI_IMM: mtype = v2di_ftype_v2di_si;            break;
23508         case MULTI_ARG_2_SI_IMM: mtype = v4si_ftype_v4si_si;            break;
23509         case MULTI_ARG_2_HI_IMM: mtype = v8hi_ftype_v8hi_si;            break;
23510         case MULTI_ARG_2_QI_IMM: mtype = v16qi_ftype_v16qi_si;          break;
23511         case MULTI_ARG_2_SF_CMP: mtype = v4sf_ftype_v4sf_v4sf;          break;
23512         case MULTI_ARG_2_DF_CMP: mtype = v2df_ftype_v2df_v2df;          break;
23513         case MULTI_ARG_2_DI_CMP: mtype = v2di_ftype_v2di_v2di;          break;
23514         case MULTI_ARG_2_SI_CMP: mtype = v4si_ftype_v4si_v4si;          break;
23515         case MULTI_ARG_2_HI_CMP: mtype = v8hi_ftype_v8hi_v8hi;          break;
23516         case MULTI_ARG_2_QI_CMP: mtype = v16qi_ftype_v16qi_v16qi;       break;
23517         case MULTI_ARG_2_SF_TF:  mtype = v4sf_ftype_v4sf_v4sf;          break;
23518         case MULTI_ARG_2_DF_TF:  mtype = v2df_ftype_v2df_v2df;          break;
23519         case MULTI_ARG_2_DI_TF:  mtype = v2di_ftype_v2di_v2di;          break;
23520         case MULTI_ARG_2_SI_TF:  mtype = v4si_ftype_v4si_v4si;          break;
23521         case MULTI_ARG_2_HI_TF:  mtype = v8hi_ftype_v8hi_v8hi;          break;
23522         case MULTI_ARG_2_QI_TF:  mtype = v16qi_ftype_v16qi_v16qi;       break;
23523         case MULTI_ARG_1_SF:     mtype = v4sf_ftype_v4sf;               break;
23524         case MULTI_ARG_1_DF:     mtype = v2df_ftype_v2df;               break;
23525         case MULTI_ARG_1_DI:     mtype = v2di_ftype_v2di;               break;
23526         case MULTI_ARG_1_SI:     mtype = v4si_ftype_v4si;               break;
23527         case MULTI_ARG_1_HI:     mtype = v8hi_ftype_v8hi;               break;
23528         case MULTI_ARG_1_QI:     mtype = v16qi_ftype_v16qi;             break;
23529         case MULTI_ARG_1_SI_DI:  mtype = v2di_ftype_v4si;               break;
23530         case MULTI_ARG_1_HI_DI:  mtype = v2di_ftype_v8hi;               break;
23531         case MULTI_ARG_1_HI_SI:  mtype = v4si_ftype_v8hi;               break;
23532         case MULTI_ARG_1_QI_DI:  mtype = v2di_ftype_v16qi;              break;
23533         case MULTI_ARG_1_QI_SI:  mtype = v4si_ftype_v16qi;              break;
23534         case MULTI_ARG_1_QI_HI:  mtype = v8hi_ftype_v16qi;              break;
23535         case MULTI_ARG_1_PH2PS:  mtype = v4sf_ftype_v4hi;               break;
23536         case MULTI_ARG_1_PS2PH:  mtype = v4hi_ftype_v4sf;               break;
23537         case MULTI_ARG_UNKNOWN:
23538         default:
23539           gcc_unreachable ();
23540         }
23541 
23542       if (mtype)
23543         def_builtin_const (d->mask, d->name, mtype, d->code);
23544     }
23545 }
23546 
23547 /* Internal method for ix86_init_builtins.  */
23548 
23549 static void
23550 ix86_init_builtins_va_builtins_abi (void)
23551 {
23552   tree ms_va_ref, sysv_va_ref;
23553   tree fnvoid_va_end_ms, fnvoid_va_end_sysv;
23554   tree fnvoid_va_start_ms, fnvoid_va_start_sysv;
23555   tree fnvoid_va_copy_ms, fnvoid_va_copy_sysv;
23556   tree fnattr_ms = NULL_TREE, fnattr_sysv = NULL_TREE;
23557 
23558   if (!TARGET_64BIT)
23559     return;
23560   fnattr_ms = build_tree_list (get_identifier ("ms_abi"), NULL_TREE);
23561   fnattr_sysv = build_tree_list (get_identifier ("sysv_abi"), NULL_TREE);
23562   ms_va_ref = build_reference_type (ms_va_list_type_node);
23563   sysv_va_ref =
23564     build_pointer_type (TREE_TYPE (sysv_va_list_type_node));
23565 
23566   fnvoid_va_end_ms =
23567     build_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
23568   fnvoid_va_start_ms =
23569     build_varargs_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
23570   fnvoid_va_end_sysv =
23571     build_function_type_list (void_type_node, sysv_va_ref, NULL_TREE);
23572   fnvoid_va_start_sysv =
23573     build_varargs_function_type_list (void_type_node, sysv_va_ref,
23574                                        NULL_TREE);
23575   fnvoid_va_copy_ms =
23576     build_function_type_list (void_type_node, ms_va_ref, ms_va_list_type_node,
23577                               NULL_TREE);
23578   fnvoid_va_copy_sysv =
23579     build_function_type_list (void_type_node, sysv_va_ref,
23580                               sysv_va_ref, NULL_TREE);
23581 
23582   add_builtin_function ("__builtin_ms_va_start", fnvoid_va_start_ms,
23583                         BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_ms);
23584   add_builtin_function ("__builtin_ms_va_end", fnvoid_va_end_ms,
23585                         BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_ms);
23586   add_builtin_function ("__builtin_ms_va_copy", fnvoid_va_copy_ms,
23587                         BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_ms);
23588   add_builtin_function ("__builtin_sysv_va_start", fnvoid_va_start_sysv,
23589                         BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_sysv);
23590   add_builtin_function ("__builtin_sysv_va_end", fnvoid_va_end_sysv,
23591                         BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_sysv);
23592   add_builtin_function ("__builtin_sysv_va_copy", fnvoid_va_copy_sysv,
23593                         BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_sysv);
23594 }
23595 
23596 static void
23597 ix86_init_builtins (void)
23598 {
23599   tree float128_type_node = make_node (REAL_TYPE);
23600   tree ftype, decl;
23601 
23602   /* The __float80 type.  */
23603   if (TYPE_MODE (long_double_type_node) == XFmode)
23604     (*lang_hooks.types.register_builtin_type) (long_double_type_node,
23605                                                "__float80");
23606   else
23607     {
23608       /* The __float80 type.  */
23609       tree float80_type_node = make_node (REAL_TYPE);
23610 
23611       TYPE_PRECISION (float80_type_node) = 80;
23612       layout_type (float80_type_node);
23613       (*lang_hooks.types.register_builtin_type) (float80_type_node,
23614                                                  "__float80");
23615     }
23616 
23617   /* The __float128 type.  */
23618   TYPE_PRECISION (float128_type_node) = 128;
23619   layout_type (float128_type_node);
23620   (*lang_hooks.types.register_builtin_type) (float128_type_node,
23621                                              "__float128");
23622 
23623   /* TFmode support builtins.  */
23624   ftype = build_function_type (float128_type_node, void_list_node);
23625   decl = add_builtin_function ("__builtin_infq", ftype,
23626                                IX86_BUILTIN_INFQ, BUILT_IN_MD,
23627                                NULL, NULL_TREE);
23628   ix86_builtins[(int) IX86_BUILTIN_INFQ] = decl;
23629 
23630   /* We will expand them to normal call if SSE2 isn't available since
23631      they are used by libgcc. */
23632   ftype = build_function_type_list (float128_type_node,
23633                                     float128_type_node,
23634                                     NULL_TREE);
23635   decl = add_builtin_function ("__builtin_fabsq", ftype,
23636                                IX86_BUILTIN_FABSQ, BUILT_IN_MD,
23637                                "__fabstf2", NULL_TREE);
23638   ix86_builtins[(int) IX86_BUILTIN_FABSQ] = decl;
23639   TREE_READONLY (decl) = 1;
23640 
23641   ftype = build_function_type_list (float128_type_node,
23642                                     float128_type_node,
23643                                     float128_type_node,
23644                                     NULL_TREE);
23645   decl = add_builtin_function ("__builtin_copysignq", ftype,
23646                                IX86_BUILTIN_COPYSIGNQ, BUILT_IN_MD,
23647                                "__copysigntf3", NULL_TREE);
23648   ix86_builtins[(int) IX86_BUILTIN_COPYSIGNQ] = decl;
23649   TREE_READONLY (decl) = 1;
23650 
23651   ix86_init_mmx_sse_builtins ();
23652   if (TARGET_64BIT)
23653     ix86_init_builtins_va_builtins_abi ();
23654 }
23655 
23656 /* Errors in the source file can cause expand_expr to return const0_rtx
23657    where we expect a vector.  To avoid crashing, use one of the vector
23658    clear instructions.  */
23659 static rtx
23660 safe_vector_operand (rtx x, enum machine_mode mode)
23661 {
23662   if (x == const0_rtx)
23663     x = CONST0_RTX (mode);
23664   return x;
23665 }
23666 
23667 /* Subroutine of ix86_expand_builtin to take care of binop insns.  */
23668 
23669 static rtx
23670 ix86_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
23671 {
23672   rtx pat;
23673   tree arg0 = CALL_EXPR_ARG (exp, 0);
23674   tree arg1 = CALL_EXPR_ARG (exp, 1);
23675   rtx op0 = expand_normal (arg0);
23676   rtx op1 = expand_normal (arg1);
23677   enum machine_mode tmode = insn_data[icode].operand[0].mode;
23678   enum machine_mode mode0 = insn_data[icode].operand[1].mode;
23679   enum machine_mode mode1 = insn_data[icode].operand[2].mode;
23680 
23681   if (VECTOR_MODE_P (mode0))
23682     op0 = safe_vector_operand (op0, mode0);
23683   if (VECTOR_MODE_P (mode1))
23684     op1 = safe_vector_operand (op1, mode1);
23685 
23686   if (optimize || !target
23687       || GET_MODE (target) != tmode
23688       || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
23689     target = gen_reg_rtx (tmode);
23690 
23691   if (GET_MODE (op1) == SImode && mode1 == TImode)
23692     {
23693       rtx x = gen_reg_rtx (V4SImode);
23694       emit_insn (gen_sse2_loadd (x, op1));
23695       op1 = gen_lowpart (TImode, x);
23696     }
23697 
23698   if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
23699     op0 = copy_to_mode_reg (mode0, op0);
23700   if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
23701     op1 = copy_to_mode_reg (mode1, op1);
23702 
23703   pat = GEN_FCN (icode) (target, op0, op1);
23704   if (! pat)
23705     return 0;
23706 
23707   emit_insn (pat);
23708 
23709   return target;
23710 }
23711 
23712 /* Subroutine of ix86_expand_builtin to take care of 2-4 argument insns.  */
23713 
23714 static rtx
23715 ix86_expand_multi_arg_builtin (enum insn_code icode, tree exp, rtx target,
23716                                enum multi_arg_type m_type,
23717                                enum insn_code sub_code)
23718 {
23719   rtx pat;
23720   int i;
23721   int nargs;
23722   bool comparison_p = false;
23723   bool tf_p = false;
23724   bool last_arg_constant = false;
23725   int num_memory = 0;
23726   struct {
23727     rtx op;
23728     enum machine_mode mode;
23729   } args[4];
23730 
23731   enum machine_mode tmode = insn_data[icode].operand[0].mode;
23732 
23733   switch (m_type)
23734     {
23735     case MULTI_ARG_3_SF:
23736     case MULTI_ARG_3_DF:
23737     case MULTI_ARG_3_DI:
23738     case MULTI_ARG_3_SI:
23739     case MULTI_ARG_3_SI_DI:
23740     case MULTI_ARG_3_HI:
23741     case MULTI_ARG_3_HI_SI:
23742     case MULTI_ARG_3_QI:
23743     case MULTI_ARG_3_PERMPS:
23744     case MULTI_ARG_3_PERMPD:
23745       nargs = 3;
23746       break;
23747 
23748     case MULTI_ARG_2_SF:
23749     case MULTI_ARG_2_DF:
23750     case MULTI_ARG_2_DI:
23751     case MULTI_ARG_2_SI:
23752     case MULTI_ARG_2_HI:
23753     case MULTI_ARG_2_QI:
23754       nargs = 2;
23755       break;
23756 
23757     case MULTI_ARG_2_DI_IMM:
23758     case MULTI_ARG_2_SI_IMM:
23759     case MULTI_ARG_2_HI_IMM:
23760     case MULTI_ARG_2_QI_IMM:
23761       nargs = 2;
23762       last_arg_constant = true;
23763       break;
23764 
23765     case MULTI_ARG_1_SF:
23766     case MULTI_ARG_1_DF:
23767     case MULTI_ARG_1_DI:
23768     case MULTI_ARG_1_SI:
23769     case MULTI_ARG_1_HI:
23770     case MULTI_ARG_1_QI:
23771     case MULTI_ARG_1_SI_DI:
23772     case MULTI_ARG_1_HI_DI:
23773     case MULTI_ARG_1_HI_SI:
23774     case MULTI_ARG_1_QI_DI:
23775     case MULTI_ARG_1_QI_SI:
23776     case MULTI_ARG_1_QI_HI:
23777     case MULTI_ARG_1_PH2PS:
23778     case MULTI_ARG_1_PS2PH:
23779       nargs = 1;
23780       break;
23781 
23782     case MULTI_ARG_2_SF_CMP:
23783     case MULTI_ARG_2_DF_CMP:
23784     case MULTI_ARG_2_DI_CMP:
23785     case MULTI_ARG_2_SI_CMP:
23786     case MULTI_ARG_2_HI_CMP:
23787     case MULTI_ARG_2_QI_CMP:
23788       nargs = 2;
23789       comparison_p = true;
23790       break;
23791 
23792     case MULTI_ARG_2_SF_TF:
23793     case MULTI_ARG_2_DF_TF:
23794     case MULTI_ARG_2_DI_TF:
23795     case MULTI_ARG_2_SI_TF:
23796     case MULTI_ARG_2_HI_TF:
23797     case MULTI_ARG_2_QI_TF:
23798       nargs = 2;
23799       tf_p = true;
23800       break;
23801 
23802     case MULTI_ARG_UNKNOWN:
23803     default:
23804       gcc_unreachable ();
23805     }
23806 
23807   if (optimize || !target
23808       || GET_MODE (target) != tmode
23809       || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
23810     target = gen_reg_rtx (tmode);
23811 
23812   gcc_assert (nargs <= 4);
23813 
23814   for (i = 0; i < nargs; i++)
23815     {
23816       tree arg = CALL_EXPR_ARG (exp, i);
23817       rtx op = expand_normal (arg);
23818       int adjust = (comparison_p) ? 1 : 0;
23819       enum machine_mode mode = insn_data[icode].operand[i+adjust+1].mode;
23820 
23821       if (last_arg_constant && i == nargs-1)
23822         {
23823           if (GET_CODE (op) != CONST_INT)
23824             {
23825               error ("last argument must be an immediate");
23826               return gen_reg_rtx (tmode);
23827             }
23828         }
23829       else
23830         {
23831           if (VECTOR_MODE_P (mode))
23832             op = safe_vector_operand (op, mode);
23833 
23834           /* If we aren't optimizing, only allow one memory operand to be
23835              generated.  */
23836           if (memory_operand (op, mode))
23837             num_memory++;
23838 
23839           gcc_assert (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode);
23840 
23841           if (optimize
23842               || ! (*insn_data[icode].operand[i+adjust+1].predicate) (op, mode)
23843               || num_memory > 1)
23844             op = force_reg (mode, op);
23845         }
23846 
23847       args[i].op = op;
23848       args[i].mode = mode;
23849     }
23850 
23851   switch (nargs)
23852     {
23853     case 1:
23854       pat = GEN_FCN (icode) (target, args[0].op);
23855       break;
23856 
23857     case 2:
23858       if (tf_p)
23859         pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
23860                                GEN_INT ((int)sub_code));
23861       else if (! comparison_p)
23862         pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
23863       else
23864         {
23865           rtx cmp_op = gen_rtx_fmt_ee (sub_code, GET_MODE (target),
23866                                        args[0].op,
23867                                        args[1].op);
23868 
23869           pat = GEN_FCN (icode) (target, cmp_op, args[0].op, args[1].op);
23870         }
23871       break;
23872 
23873     case 3:
23874       pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
23875       break;
23876 
23877     default:
23878       gcc_unreachable ();
23879     }
23880 
23881   if (! pat)
23882     return 0;
23883 
23884   emit_insn (pat);
23885   return target;
23886 }
23887 
23888 /* Subroutine of ix86_expand_args_builtin to take care of scalar unop
23889    insns with vec_merge.  */
23890 
23891 static rtx
23892 ix86_expand_unop_vec_merge_builtin (enum insn_code icode, tree exp,
23893                                     rtx target)
23894 {
23895   rtx pat;
23896   tree arg0 = CALL_EXPR_ARG (exp, 0);
23897   rtx op1, op0 = expand_normal (arg0);
23898   enum machine_mode tmode = insn_data[icode].operand[0].mode;
23899   enum machine_mode mode0 = insn_data[icode].operand[1].mode;
23900 
23901   if (optimize || !target
23902       || GET_MODE (target) != tmode
23903       || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
23904     target = gen_reg_rtx (tmode);
23905 
23906   if (VECTOR_MODE_P (mode0))
23907     op0 = safe_vector_operand (op0, mode0);
23908 
23909   if ((optimize && !register_operand (op0, mode0))
23910       || ! (*insn_data[icode].operand[1].predicate) (op0, mode0))
23911     op0 = copy_to_mode_reg (mode0, op0);
23912 
23913   op1 = op0;
23914   if (! (*insn_data[icode].operand[2].predicate) (op1, mode0))
23915     op1 = copy_to_mode_reg (mode0, op1);
23916 
23917   pat = GEN_FCN (icode) (target, op0, op1);
23918   if (! pat)
23919     return 0;
23920   emit_insn (pat);
23921   return target;
23922 }
23923 
23924 /* Subroutine of ix86_expand_builtin to take care of comparison insns.  */
23925 
23926 static rtx
23927 ix86_expand_sse_compare (const struct builtin_description *d,
23928                          tree exp, rtx target, bool swap)
23929 {
23930   rtx pat;
23931   tree arg0 = CALL_EXPR_ARG (exp, 0);
23932   tree arg1 = CALL_EXPR_ARG (exp, 1);
23933   rtx op0 = expand_normal (arg0);
23934   rtx op1 = expand_normal (arg1);
23935   rtx op2;
23936   enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
23937   enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
23938   enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
23939   enum rtx_code comparison = d->comparison;
23940 
23941   if (VECTOR_MODE_P (mode0))
23942     op0 = safe_vector_operand (op0, mode0);
23943   if (VECTOR_MODE_P (mode1))
23944     op1 = safe_vector_operand (op1, mode1);
23945 
23946   /* Swap operands if we have a comparison that isn't available in
23947      hardware.  */
23948   if (swap)
23949     {
23950       rtx tmp = gen_reg_rtx (mode1);
23951       emit_move_insn (tmp, op1);
23952       op1 = op0;
23953       op0 = tmp;
23954     }
23955 
23956   if (optimize || !target
23957       || GET_MODE (target) != tmode
23958       || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
23959     target = gen_reg_rtx (tmode);
23960 
23961   if ((optimize && !register_operand (op0, mode0))
23962       || ! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
23963     op0 = copy_to_mode_reg (mode0, op0);
23964   if ((optimize && !register_operand (op1, mode1))
23965       || ! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
23966     op1 = copy_to_mode_reg (mode1, op1);
23967 
23968   op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
23969   pat = GEN_FCN (d->icode) (target, op0, op1, op2);
23970   if (! pat)
23971     return 0;
23972   emit_insn (pat);
23973   return target;
23974 }
23975 
23976 /* Subroutine of ix86_expand_builtin to take care of comi insns.  */
23977 
23978 static rtx
23979 ix86_expand_sse_comi (const struct builtin_description *d, tree exp,
23980                       rtx target)
23981 {
23982   rtx pat;
23983   tree arg0 = CALL_EXPR_ARG (exp, 0);
23984   tree arg1 = CALL_EXPR_ARG (exp, 1);
23985   rtx op0 = expand_normal (arg0);
23986   rtx op1 = expand_normal (arg1);
23987   enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
23988   enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
23989   enum rtx_code comparison = d->comparison;
23990 
23991   if (VECTOR_MODE_P (mode0))
23992     op0 = safe_vector_operand (op0, mode0);
23993   if (VECTOR_MODE_P (mode1))
23994     op1 = safe_vector_operand (op1, mode1);
23995 
23996   /* Swap operands if we have a comparison that isn't available in
23997      hardware.  */
23998   if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
23999     {
24000       rtx tmp = op1;
24001       op1 = op0;
24002       op0 = tmp;
24003     }
24004 
24005   target = gen_reg_rtx (SImode);
24006   emit_move_insn (target, const0_rtx);
24007   target = gen_rtx_SUBREG (QImode, target, 0);
24008 
24009   if ((optimize && !register_operand (op0, mode0))
24010       || !(*insn_data[d->icode].operand[0].predicate) (op0, mode0))
24011     op0 = copy_to_mode_reg (mode0, op0);
24012   if ((optimize && !register_operand (op1, mode1))
24013       || !(*insn_data[d->icode].operand[1].predicate) (op1, mode1))
24014     op1 = copy_to_mode_reg (mode1, op1);
24015 
24016   pat = GEN_FCN (d->icode) (op0, op1);
24017   if (! pat)
24018     return 0;
24019   emit_insn (pat);
24020   emit_insn (gen_rtx_SET (VOIDmode,
24021                           gen_rtx_STRICT_LOW_PART (VOIDmode, target),
24022                           gen_rtx_fmt_ee (comparison, QImode,
24023                                           SET_DEST (pat),
24024                                           const0_rtx)));
24025 
24026   return SUBREG_REG (target);
24027 }
24028 
24029 /* Subroutine of ix86_expand_builtin to take care of ptest insns.  */
24030 
24031 static rtx
24032 ix86_expand_sse_ptest (const struct builtin_description *d, tree exp,
24033                        rtx target)
24034 {
24035   rtx pat;
24036   tree arg0 = CALL_EXPR_ARG (exp, 0);
24037   tree arg1 = CALL_EXPR_ARG (exp, 1);
24038   rtx op0 = expand_normal (arg0);
24039   rtx op1 = expand_normal (arg1);
24040   enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
24041   enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
24042   enum rtx_code comparison = d->comparison;
24043 
24044   if (VECTOR_MODE_P (mode0))
24045     op0 = safe_vector_operand (op0, mode0);
24046   if (VECTOR_MODE_P (mode1))
24047     op1 = safe_vector_operand (op1, mode1);
24048 
24049   target = gen_reg_rtx (SImode);
24050   emit_move_insn (target, const0_rtx);
24051   target = gen_rtx_SUBREG (QImode, target, 0);
24052 
24053   if ((optimize && !register_operand (op0, mode0))
24054       || !(*insn_data[d->icode].operand[0].predicate) (op0, mode0))
24055     op0 = copy_to_mode_reg (mode0, op0);
24056   if ((optimize && !register_operand (op1, mode1))
24057       || !(*insn_data[d->icode].operand[1].predicate) (op1, mode1))
24058     op1 = copy_to_mode_reg (mode1, op1);
24059 
24060   pat = GEN_FCN (d->icode) (op0, op1);
24061   if (! pat)
24062     return 0;
24063   emit_insn (pat);
24064   emit_insn (gen_rtx_SET (VOIDmode,
24065                           gen_rtx_STRICT_LOW_PART (VOIDmode, target),
24066                           gen_rtx_fmt_ee (comparison, QImode,
24067                                           SET_DEST (pat),
24068                                           const0_rtx)));
24069 
24070   return SUBREG_REG (target);
24071 }
24072 
24073 /* Subroutine of ix86_expand_builtin to take care of pcmpestr[im] insns.  */
24074 
24075 static rtx
24076 ix86_expand_sse_pcmpestr (const struct builtin_description *d,
24077                           tree exp, rtx target)
24078 {
24079   rtx pat;
24080   tree arg0 = CALL_EXPR_ARG (exp, 0);
24081   tree arg1 = CALL_EXPR_ARG (exp, 1);
24082   tree arg2 = CALL_EXPR_ARG (exp, 2);
24083   tree arg3 = CALL_EXPR_ARG (exp, 3);
24084   tree arg4 = CALL_EXPR_ARG (exp, 4);
24085   rtx scratch0, scratch1;
24086   rtx op0 = expand_normal (arg0);
24087   rtx op1 = expand_normal (arg1);
24088   rtx op2 = expand_normal (arg2);
24089   rtx op3 = expand_normal (arg3);
24090   rtx op4 = expand_normal (arg4);
24091   enum machine_mode tmode0, tmode1, modev2, modei3, modev4, modei5, modeimm;
24092 
24093   tmode0 = insn_data[d->icode].operand[0].mode;
24094   tmode1 = insn_data[d->icode].operand[1].mode;
24095   modev2 = insn_data[d->icode].operand[2].mode;
24096   modei3 = insn_data[d->icode].operand[3].mode;
24097   modev4 = insn_data[d->icode].operand[4].mode;
24098   modei5 = insn_data[d->icode].operand[5].mode;
24099   modeimm = insn_data[d->icode].operand[6].mode;
24100 
24101   if (VECTOR_MODE_P (modev2))
24102     op0 = safe_vector_operand (op0, modev2);
24103   if (VECTOR_MODE_P (modev4))
24104     op2 = safe_vector_operand (op2, modev4);
24105 
24106   if (! (*insn_data[d->icode].operand[2].predicate) (op0, modev2))
24107     op0 = copy_to_mode_reg (modev2, op0);
24108   if (! (*insn_data[d->icode].operand[3].predicate) (op1, modei3))
24109     op1 = copy_to_mode_reg (modei3, op1);
24110   if ((optimize && !register_operand (op2, modev4))
24111       || !(*insn_data[d->icode].operand[4].predicate) (op2, modev4))
24112     op2 = copy_to_mode_reg (modev4, op2);
24113   if (! (*insn_data[d->icode].operand[5].predicate) (op3, modei5))
24114     op3 = copy_to_mode_reg (modei5, op3);
24115 
24116   if (! (*insn_data[d->icode].operand[6].predicate) (op4, modeimm))
24117     {
24118       error ("the fifth argument must be a 8-bit immediate");
24119       return const0_rtx;
24120     }
24121 
24122   if (d->code == IX86_BUILTIN_PCMPESTRI128)
24123     {
24124       if (optimize || !target
24125           || GET_MODE (target) != tmode0
24126           || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode0))
24127         target = gen_reg_rtx (tmode0);
24128 
24129       scratch1 = gen_reg_rtx (tmode1);
24130 
24131       pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2, op3, op4);
24132     }
24133   else if (d->code == IX86_BUILTIN_PCMPESTRM128)
24134     {
24135       if (optimize || !target
24136           || GET_MODE (target) != tmode1
24137           || ! (*insn_data[d->icode].operand[1].predicate) (target, tmode1))
24138         target = gen_reg_rtx (tmode1);
24139 
24140       scratch0 = gen_reg_rtx (tmode0);
24141 
24142       pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2, op3, op4);
24143     }
24144   else
24145     {
24146       gcc_assert (d->flag);
24147 
24148       scratch0 = gen_reg_rtx (tmode0);
24149       scratch1 = gen_reg_rtx (tmode1);
24150 
24151       pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2, op3, op4);
24152     }
24153 
24154   if (! pat)
24155     return 0;
24156 
24157   emit_insn (pat);
24158 
24159   if (d->flag)
24160     {
24161       target = gen_reg_rtx (SImode);
24162       emit_move_insn (target, const0_rtx);
24163       target = gen_rtx_SUBREG (QImode, target, 0);
24164 
24165       emit_insn
24166         (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
24167                       gen_rtx_fmt_ee (EQ, QImode,
24168                                       gen_rtx_REG ((enum machine_mode) d->flag,
24169                                                    FLAGS_REG),
24170                                       const0_rtx)));
24171       return SUBREG_REG (target);
24172     }
24173   else
24174     return target;
24175 }
24176 
24177 
24178 /* Subroutine of ix86_expand_builtin to take care of pcmpistr[im] insns.  */
24179 
24180 static rtx
24181 ix86_expand_sse_pcmpistr (const struct builtin_description *d,
24182                           tree exp, rtx target)
24183 {
24184   rtx pat;
24185   tree arg0 = CALL_EXPR_ARG (exp, 0);
24186   tree arg1 = CALL_EXPR_ARG (exp, 1);
24187   tree arg2 = CALL_EXPR_ARG (exp, 2);
24188   rtx scratch0, scratch1;
24189   rtx op0 = expand_normal (arg0);
24190   rtx op1 = expand_normal (arg1);
24191   rtx op2 = expand_normal (arg2);
24192   enum machine_mode tmode0, tmode1, modev2, modev3, modeimm;
24193 
24194   tmode0 = insn_data[d->icode].operand[0].mode;
24195   tmode1 = insn_data[d->icode].operand[1].mode;
24196   modev2 = insn_data[d->icode].operand[2].mode;
24197   modev3 = insn_data[d->icode].operand[3].mode;
24198   modeimm = insn_data[d->icode].operand[4].mode;
24199 
24200   if (VECTOR_MODE_P (modev2))
24201     op0 = safe_vector_operand (op0, modev2);
24202   if (VECTOR_MODE_P (modev3))
24203     op1 = safe_vector_operand (op1, modev3);
24204 
24205   if (! (*insn_data[d->icode].operand[2].predicate) (op0, modev2))
24206     op0 = copy_to_mode_reg (modev2, op0);
24207   if ((optimize && !register_operand (op1, modev3))
24208       || !(*insn_data[d->icode].operand[3].predicate) (op1, modev3))
24209     op1 = copy_to_mode_reg (modev3, op1);
24210 
24211   if (! (*insn_data[d->icode].operand[4].predicate) (op2, modeimm))
24212     {
24213       error ("the third argument must be a 8-bit immediate");
24214       return const0_rtx;
24215     }
24216 
24217   if (d->code == IX86_BUILTIN_PCMPISTRI128)
24218     {
24219       if (optimize || !target
24220           || GET_MODE (target) != tmode0
24221           || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode0))
24222         target = gen_reg_rtx (tmode0);
24223 
24224       scratch1 = gen_reg_rtx (tmode1);
24225 
24226       pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2);
24227     }
24228   else if (d->code == IX86_BUILTIN_PCMPISTRM128)
24229     {
24230       if (optimize || !target
24231           || GET_MODE (target) != tmode1
24232           || ! (*insn_data[d->icode].operand[1].predicate) (target, tmode1))
24233         target = gen_reg_rtx (tmode1);
24234 
24235       scratch0 = gen_reg_rtx (tmode0);
24236 
24237       pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2);
24238     }
24239   else
24240     {
24241       gcc_assert (d->flag);
24242 
24243       scratch0 = gen_reg_rtx (tmode0);
24244       scratch1 = gen_reg_rtx (tmode1);
24245 
24246       pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2);
24247     }
24248 
24249   if (! pat)
24250     return 0;
24251 
24252   emit_insn (pat);
24253 
24254   if (d->flag)
24255     {
24256       target = gen_reg_rtx (SImode);
24257       emit_move_insn (target, const0_rtx);
24258       target = gen_rtx_SUBREG (QImode, target, 0);
24259 
24260       emit_insn
24261         (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
24262                       gen_rtx_fmt_ee (EQ, QImode,
24263                                       gen_rtx_REG ((enum machine_mode) d->flag,
24264                                                    FLAGS_REG),
24265                                       const0_rtx)));
24266       return SUBREG_REG (target);
24267     }
24268   else
24269     return target;
24270 }
24271 
24272 /* Subroutine of ix86_expand_builtin to take care of insns with
24273    variable number of operands.  */
24274 
24275 static rtx
24276 ix86_expand_args_builtin (const struct builtin_description *d,
24277                           tree exp, rtx target)
24278 {
24279   rtx pat, real_target;
24280   unsigned int i, nargs;
24281   unsigned int nargs_constant = 0;
24282   int num_memory = 0;
24283   struct
24284     {
24285       rtx op;
24286       enum machine_mode mode;
24287     } args[4];
24288   bool last_arg_count = false;
24289   enum insn_code icode = d->icode;
24290   const struct insn_data *insn_p = &insn_data[icode];
24291   enum machine_mode tmode = insn_p->operand[0].mode;
24292   enum machine_mode rmode = VOIDmode;
24293   bool swap = false;
24294   enum rtx_code comparison = d->comparison;
24295 
24296   switch ((enum ix86_builtin_type) d->flag)
24297     {
24298     case INT_FTYPE_V8SF_V8SF_PTEST:
24299     case INT_FTYPE_V4DI_V4DI_PTEST:
24300     case INT_FTYPE_V4DF_V4DF_PTEST:
24301     case INT_FTYPE_V4SF_V4SF_PTEST:
24302     case INT_FTYPE_V2DI_V2DI_PTEST:
24303     case INT_FTYPE_V2DF_V2DF_PTEST:
24304       return ix86_expand_sse_ptest (d, exp, target);
24305     case FLOAT128_FTYPE_FLOAT128:
24306     case FLOAT_FTYPE_FLOAT:
24307     case INT64_FTYPE_V4SF:
24308     case INT64_FTYPE_V2DF:
24309     case INT_FTYPE_V16QI:
24310     case INT_FTYPE_V8QI:
24311     case INT_FTYPE_V8SF:
24312     case INT_FTYPE_V4DF:
24313     case INT_FTYPE_V4SF:
24314     case INT_FTYPE_V2DF:
24315     case V16QI_FTYPE_V16QI:
24316     case V8SI_FTYPE_V8SF:
24317     case V8SI_FTYPE_V4SI:
24318     case V8HI_FTYPE_V8HI:
24319     case V8HI_FTYPE_V16QI:
24320     case V8QI_FTYPE_V8QI:
24321     case V8SF_FTYPE_V8SF:
24322     case V8SF_FTYPE_V8SI:
24323     case V8SF_FTYPE_V4SF:
24324     case V4SI_FTYPE_V4SI:
24325     case V4SI_FTYPE_V16QI:
24326     case V4SI_FTYPE_V4SF:
24327     case V4SI_FTYPE_V8SI:
24328     case V4SI_FTYPE_V8HI:
24329     case V4SI_FTYPE_V4DF:
24330     case V4SI_FTYPE_V2DF:
24331     case V4HI_FTYPE_V4HI:
24332     case V4DF_FTYPE_V4DF:
24333     case V4DF_FTYPE_V4SI:
24334     case V4DF_FTYPE_V4SF:
24335     case V4DF_FTYPE_V2DF:
24336     case V4SF_FTYPE_V4SF:
24337     case V4SF_FTYPE_V4SI:
24338     case V4SF_FTYPE_V8SF:
24339     case V4SF_FTYPE_V4DF:
24340     case V4SF_FTYPE_V2DF:
24341     case V2DI_FTYPE_V2DI:
24342     case V2DI_FTYPE_V16QI:
24343     case V2DI_FTYPE_V8HI:
24344     case V2DI_FTYPE_V4SI:
24345     case V2DF_FTYPE_V2DF:
24346     case V2DF_FTYPE_V4SI:
24347     case V2DF_FTYPE_V4DF:
24348     case V2DF_FTYPE_V4SF:
24349     case V2DF_FTYPE_V2SI:
24350     case V2SI_FTYPE_V2SI:
24351     case V2SI_FTYPE_V4SF:
24352     case V2SI_FTYPE_V2SF:
24353     case V2SI_FTYPE_V2DF:
24354     case V2SF_FTYPE_V2SF:
24355     case V2SF_FTYPE_V2SI:
24356       nargs = 1;
24357       break;
24358     case V4SF_FTYPE_V4SF_VEC_MERGE:
24359     case V2DF_FTYPE_V2DF_VEC_MERGE:
24360       return ix86_expand_unop_vec_merge_builtin (icode, exp, target);
24361     case FLOAT128_FTYPE_FLOAT128_FLOAT128:
24362     case V16QI_FTYPE_V16QI_V16QI:
24363     case V16QI_FTYPE_V8HI_V8HI:
24364     case V8QI_FTYPE_V8QI_V8QI:
24365     case V8QI_FTYPE_V4HI_V4HI:
24366     case V8HI_FTYPE_V8HI_V8HI:
24367     case V8HI_FTYPE_V16QI_V16QI:
24368     case V8HI_FTYPE_V4SI_V4SI:
24369     case V8SF_FTYPE_V8SF_V8SF:
24370     case V8SF_FTYPE_V8SF_V8SI:
24371     case V4SI_FTYPE_V4SI_V4SI:
24372     case V4SI_FTYPE_V8HI_V8HI:
24373     case V4SI_FTYPE_V4SF_V4SF:
24374     case V4SI_FTYPE_V2DF_V2DF:
24375     case V4HI_FTYPE_V4HI_V4HI:
24376     case V4HI_FTYPE_V8QI_V8QI:
24377     case V4HI_FTYPE_V2SI_V2SI:
24378     case V4DF_FTYPE_V4DF_V4DF:
24379     case V4DF_FTYPE_V4DF_V4DI:
24380     case V4SF_FTYPE_V4SF_V4SF:
24381     case V4SF_FTYPE_V4SF_V4SI:
24382     case V4SF_FTYPE_V4SF_V2SI:
24383     case V4SF_FTYPE_V4SF_V2DF:
24384     case V4SF_FTYPE_V4SF_DI:
24385     case V4SF_FTYPE_V4SF_SI:
24386     case V2DI_FTYPE_V2DI_V2DI:
24387     case V2DI_FTYPE_V16QI_V16QI:
24388     case V2DI_FTYPE_V4SI_V4SI:
24389     case V2DI_FTYPE_V2DI_V16QI:
24390     case V2DI_FTYPE_V2DF_V2DF:
24391     case V2SI_FTYPE_V2SI_V2SI:
24392     case V2SI_FTYPE_V4HI_V4HI:
24393     case V2SI_FTYPE_V2SF_V2SF:
24394     case V2DF_FTYPE_V2DF_V2DF:
24395     case V2DF_FTYPE_V2DF_V4SF:
24396     case V2DF_FTYPE_V2DF_V2DI:
24397     case V2DF_FTYPE_V2DF_DI:
24398     case V2DF_FTYPE_V2DF_SI:
24399     case V2SF_FTYPE_V2SF_V2SF:
24400     case V1DI_FTYPE_V1DI_V1DI:
24401     case V1DI_FTYPE_V8QI_V8QI:
24402     case V1DI_FTYPE_V2SI_V2SI:
24403       if (comparison == UNKNOWN)
24404         return ix86_expand_binop_builtin (icode, exp, target);
24405       nargs = 2;
24406       break;
24407     case V4SF_FTYPE_V4SF_V4SF_SWAP:
24408     case V2DF_FTYPE_V2DF_V2DF_SWAP:
24409       gcc_assert (comparison != UNKNOWN);
24410       nargs = 2;
24411       swap = true;
24412       break;
24413     case V8HI_FTYPE_V8HI_V8HI_COUNT:
24414     case V8HI_FTYPE_V8HI_SI_COUNT:
24415     case V4SI_FTYPE_V4SI_V4SI_COUNT:
24416     case V4SI_FTYPE_V4SI_SI_COUNT:
24417     case V4HI_FTYPE_V4HI_V4HI_COUNT:
24418     case V4HI_FTYPE_V4HI_SI_COUNT:
24419     case V2DI_FTYPE_V2DI_V2DI_COUNT:
24420     case V2DI_FTYPE_V2DI_SI_COUNT:
24421     case V2SI_FTYPE_V2SI_V2SI_COUNT:
24422     case V2SI_FTYPE_V2SI_SI_COUNT:
24423     case V1DI_FTYPE_V1DI_V1DI_COUNT:
24424     case V1DI_FTYPE_V1DI_SI_COUNT:
24425       nargs = 2;
24426       last_arg_count = true;
24427       break;
24428     case UINT64_FTYPE_UINT64_UINT64:
24429     case UINT_FTYPE_UINT_UINT:
24430     case UINT_FTYPE_UINT_USHORT:
24431     case UINT_FTYPE_UINT_UCHAR:
24432       nargs = 2;
24433       break;
24434     case V2DI2TI_FTYPE_V2DI_INT:
24435       nargs = 2;
24436       rmode = V2DImode;
24437       nargs_constant = 1;
24438       break;
24439     case V8HI_FTYPE_V8HI_INT:
24440     case V8SF_FTYPE_V8SF_INT:
24441     case V4SI_FTYPE_V4SI_INT:
24442     case V4SI_FTYPE_V8SI_INT:
24443     case V4HI_FTYPE_V4HI_INT:
24444     case V4DF_FTYPE_V4DF_INT:
24445     case V4SF_FTYPE_V4SF_INT:
24446     case V4SF_FTYPE_V8SF_INT:
24447     case V2DI_FTYPE_V2DI_INT:
24448     case V2DF_FTYPE_V2DF_INT:
24449     case V2DF_FTYPE_V4DF_INT:
24450       nargs = 2;
24451       nargs_constant = 1;
24452       break;
24453     case V16QI_FTYPE_V16QI_V16QI_V16QI:
24454     case V8SF_FTYPE_V8SF_V8SF_V8SF:
24455     case V4DF_FTYPE_V4DF_V4DF_V4DF:
24456     case V4SF_FTYPE_V4SF_V4SF_V4SF:
24457     case V2DF_FTYPE_V2DF_V2DF_V2DF:
24458       nargs = 3;
24459       break;
24460     case V16QI_FTYPE_V16QI_V16QI_INT:
24461     case V8HI_FTYPE_V8HI_V8HI_INT:
24462     case V8SI_FTYPE_V8SI_V8SI_INT:
24463     case V8SI_FTYPE_V8SI_V4SI_INT:
24464     case V8SF_FTYPE_V8SF_V8SF_INT: 
24465     case V8SF_FTYPE_V8SF_V4SF_INT: 
24466     case V4SI_FTYPE_V4SI_V4SI_INT:
24467     case V4DF_FTYPE_V4DF_V4DF_INT:
24468     case V4DF_FTYPE_V4DF_V2DF_INT:
24469     case V4SF_FTYPE_V4SF_V4SF_INT:
24470     case V2DI_FTYPE_V2DI_V2DI_INT:
24471     case V2DF_FTYPE_V2DF_V2DF_INT:
24472       nargs = 3;
24473       nargs_constant = 1;
24474       break;
24475     case V2DI2TI_FTYPE_V2DI_V2DI_INT:
24476       nargs = 3;
24477       rmode = V2DImode;
24478       nargs_constant = 1;
24479       break;
24480     case V1DI2DI_FTYPE_V1DI_V1DI_INT:
24481       nargs = 3;
24482       rmode = DImode;
24483       nargs_constant = 1;
24484       break;
24485     case V2DI_FTYPE_V2DI_UINT_UINT:
24486       nargs = 3;
24487       nargs_constant = 2;
24488       break;
24489     case V2DI_FTYPE_V2DI_V2DI_UINT_UINT:
24490       nargs = 4;
24491       nargs_constant = 2;
24492       break;
24493     default:
24494       gcc_unreachable ();
24495     }
24496 
24497   gcc_assert (nargs <= ARRAY_SIZE (args));
24498 
24499   if (comparison != UNKNOWN)
24500     {
24501       gcc_assert (nargs == 2);
24502       return ix86_expand_sse_compare (d, exp, target, swap);
24503     }
24504 
24505   if (rmode == VOIDmode || rmode == tmode)
24506     {
24507       if (optimize
24508           || target == 0
24509           || GET_MODE (target) != tmode
24510           || ! (*insn_p->operand[0].predicate) (target, tmode))
24511         target = gen_reg_rtx (tmode);
24512       real_target = target;
24513     }
24514   else
24515     {
24516       target = gen_reg_rtx (rmode);
24517       real_target = simplify_gen_subreg (tmode, target, rmode, 0);
24518     }
24519 
24520   for (i = 0; i < nargs; i++)
24521     {
24522       tree arg = CALL_EXPR_ARG (exp, i);
24523       rtx op = expand_normal (arg);
24524       enum machine_mode mode = insn_p->operand[i + 1].mode;
24525       bool match = (*insn_p->operand[i + 1].predicate) (op, mode);
24526 
24527       if (last_arg_count && (i + 1) == nargs)
24528         {
24529           /* SIMD shift insns take either an 8-bit immediate or
24530              register as count.  But builtin functions take int as
24531              count.  If count doesn't match, we put it in register.  */
24532           if (!match)
24533             {
24534               op = simplify_gen_subreg (SImode, op, GET_MODE (op), 0);
24535               if (!(*insn_p->operand[i + 1].predicate) (op, mode))
24536                 op = copy_to_reg (op);
24537             }
24538         }
24539       else if ((nargs - i) <= nargs_constant)
24540         {
24541           if (!match)
24542             switch (icode)
24543               {
24544               case CODE_FOR_sse4_1_roundpd:
24545               case CODE_FOR_sse4_1_roundps:
24546               case CODE_FOR_sse4_1_roundsd:
24547               case CODE_FOR_sse4_1_roundss:
24548               case CODE_FOR_sse4_1_blendps:
24549               case CODE_FOR_avx_blendpd256:
24550               case CODE_FOR_avx_vpermilv4df:
24551               case CODE_FOR_avx_roundpd256:
24552               case CODE_FOR_avx_roundps256:
24553                 error ("the last argument must be a 4-bit immediate");
24554                 return const0_rtx;
24555 
24556               case CODE_FOR_sse4_1_blendpd:
24557               case CODE_FOR_avx_vpermilv2df:
24558                 error ("the last argument must be a 2-bit immediate");
24559                 return const0_rtx;
24560 
24561               case CODE_FOR_avx_vextractf128v4df:
24562               case CODE_FOR_avx_vextractf128v8sf:
24563               case CODE_FOR_avx_vextractf128v8si:
24564               case CODE_FOR_avx_vinsertf128v4df:
24565               case CODE_FOR_avx_vinsertf128v8sf:
24566               case CODE_FOR_avx_vinsertf128v8si:
24567                 error ("the last argument must be a 1-bit immediate");
24568                 return const0_rtx;
24569 
24570               case CODE_FOR_avx_cmpsdv2df3:
24571               case CODE_FOR_avx_cmpssv4sf3:
24572               case CODE_FOR_avx_cmppdv2df3:
24573               case CODE_FOR_avx_cmppsv4sf3:
24574               case CODE_FOR_avx_cmppdv4df3:
24575               case CODE_FOR_avx_cmppsv8sf3:
24576                 error ("the last argument must be a 5-bit immediate");
24577                 return const0_rtx;
24578 
24579              default:
24580                 switch (nargs_constant)
24581                   {
24582                   case 2:
24583                     if ((nargs - i) == nargs_constant)
24584                       {
24585                         error ("the next to last argument must be an 8-bit immediate");
24586                         break;
24587                       }
24588                   case 1:
24589                     error ("the last argument must be an 8-bit immediate");
24590                     break;
24591                   default:
24592                     gcc_unreachable ();
24593                   }
24594                 return const0_rtx;
24595               }
24596         }
24597       else
24598         {
24599           if (VECTOR_MODE_P (mode))
24600             op = safe_vector_operand (op, mode);
24601 
24602           /* If we aren't optimizing, only allow one memory operand to
24603              be generated.  */
24604           if (memory_operand (op, mode))
24605             num_memory++;
24606 
24607           if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
24608             {
24609               if (optimize || !match || num_memory > 1)
24610                 op = copy_to_mode_reg (mode, op);
24611             }
24612           else
24613             {
24614               op = copy_to_reg (op);
24615               op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
24616             }
24617         }
24618 
24619       args[i].op = op;
24620       args[i].mode = mode;
24621     }
24622 
24623   switch (nargs)
24624     {
24625     case 1:
24626       pat = GEN_FCN (icode) (real_target, args[0].op);
24627       break;
24628     case 2:
24629       pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op);
24630       break;
24631     case 3:
24632       pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
24633                              args[2].op);
24634       break;
24635     case 4:
24636       pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
24637                              args[2].op, args[3].op);
24638       break;
24639     default:
24640       gcc_unreachable ();
24641     }
24642 
24643   if (! pat)
24644     return 0;
24645 
24646   emit_insn (pat);
24647   return target;
24648 }
24649 
24650 /* Subroutine of ix86_expand_builtin to take care of special insns
24651    with variable number of operands.  */
24652 
24653 static rtx
24654 ix86_expand_special_args_builtin (const struct builtin_description *d,
24655                                     tree exp, rtx target)
24656 {
24657   tree arg;
24658   rtx pat, op;
24659   unsigned int i, nargs, arg_adjust, memory;
24660   struct
24661     {
24662       rtx op;
24663       enum machine_mode mode;
24664     } args[2];
24665   enum insn_code icode = d->icode;
24666   bool last_arg_constant = false;
24667   const struct insn_data *insn_p = &insn_data[icode];
24668   enum machine_mode tmode = insn_p->operand[0].mode;
24669   enum { load, store } klass;
24670 
24671   switch ((enum ix86_special_builtin_type) d->flag)
24672     {
24673     case VOID_FTYPE_VOID:
24674       emit_insn (GEN_FCN (icode) (target));
24675       return 0;
24676     case V2DI_FTYPE_PV2DI:
24677     case V32QI_FTYPE_PCCHAR:
24678     case V16QI_FTYPE_PCCHAR:
24679     case V8SF_FTYPE_PCV4SF:
24680     case V8SF_FTYPE_PCFLOAT:
24681     case V4SF_FTYPE_PCFLOAT:
24682     case V4DF_FTYPE_PCV2DF:
24683     case V4DF_FTYPE_PCDOUBLE:
24684     case V2DF_FTYPE_PCDOUBLE:
24685       nargs = 1;
24686       klass = load;
24687       memory = 0;
24688       break;
24689     case VOID_FTYPE_PV2SF_V4SF:
24690     case VOID_FTYPE_PV4DI_V4DI:
24691     case VOID_FTYPE_PV2DI_V2DI:
24692     case VOID_FTYPE_PCHAR_V32QI:
24693     case VOID_FTYPE_PCHAR_V16QI:
24694     case VOID_FTYPE_PFLOAT_V8SF:
24695     case VOID_FTYPE_PFLOAT_V4SF:
24696     case VOID_FTYPE_PDOUBLE_V4DF:
24697     case VOID_FTYPE_PDOUBLE_V2DF:
24698     case VOID_FTYPE_PDI_DI:
24699     case VOID_FTYPE_PINT_INT:
24700       nargs = 1;
24701       klass = store;
24702       /* Reserve memory operand for target.  */
24703       memory = ARRAY_SIZE (args);
24704       break;
24705     case V4SF_FTYPE_V4SF_PCV2SF:
24706     case V2DF_FTYPE_V2DF_PCDOUBLE:
24707       nargs = 2;
24708       klass = load;
24709       memory = 1;
24710       break;
24711     case V8SF_FTYPE_PCV8SF_V8SF:
24712     case V4DF_FTYPE_PCV4DF_V4DF:
24713     case V4SF_FTYPE_PCV4SF_V4SF:
24714     case V2DF_FTYPE_PCV2DF_V2DF:
24715       nargs = 2;
24716       klass = load;
24717       memory = 0;
24718       break;
24719     case VOID_FTYPE_PV8SF_V8SF_V8SF:
24720     case VOID_FTYPE_PV4DF_V4DF_V4DF:
24721     case VOID_FTYPE_PV4SF_V4SF_V4SF:
24722     case VOID_FTYPE_PV2DF_V2DF_V2DF:
24723       nargs = 2;
24724       klass = store;
24725       /* Reserve memory operand for target.  */
24726       memory = ARRAY_SIZE (args);
24727       break;
24728     default:
24729       gcc_unreachable ();
24730     }
24731 
24732   gcc_assert (nargs <= ARRAY_SIZE (args));
24733 
24734   if (klass == store)
24735     {
24736       arg = CALL_EXPR_ARG (exp, 0);
24737       op = expand_normal (arg);
24738       gcc_assert (target == 0);
24739       target = gen_rtx_MEM (tmode, copy_to_mode_reg (Pmode, op));
24740       arg_adjust = 1;
24741     }
24742   else
24743     {
24744       arg_adjust = 0;
24745       if (optimize
24746           || target == 0
24747           || GET_MODE (target) != tmode
24748           || ! (*insn_p->operand[0].predicate) (target, tmode))
24749         target = gen_reg_rtx (tmode);
24750     }
24751 
24752   for (i = 0; i < nargs; i++)
24753     {
24754       enum machine_mode mode = insn_p->operand[i + 1].mode;
24755       bool match;
24756 
24757       arg = CALL_EXPR_ARG (exp, i + arg_adjust);
24758       op = expand_normal (arg);
24759       match = (*insn_p->operand[i + 1].predicate) (op, mode);
24760 
24761       if (last_arg_constant && (i + 1) == nargs)
24762         {
24763           if (!match)
24764             switch (icode)
24765               {
24766              default:
24767                 error ("the last argument must be an 8-bit immediate");
24768                 return const0_rtx;
24769               }
24770         }
24771       else
24772         {
24773           if (i == memory)
24774             {
24775               /* This must be the memory operand.  */
24776               op = gen_rtx_MEM (mode, copy_to_mode_reg (Pmode, op));
24777               gcc_assert (GET_MODE (op) == mode
24778                           || GET_MODE (op) == VOIDmode);
24779             }
24780           else
24781             {
24782               /* This must be register.  */
24783               if (VECTOR_MODE_P (mode))
24784                 op = safe_vector_operand (op, mode);
24785 
24786               gcc_assert (GET_MODE (op) == mode
24787                           || GET_MODE (op) == VOIDmode);
24788               op = copy_to_mode_reg (mode, op);
24789             }
24790         }
24791 
24792       args[i].op = op;
24793       args[i].mode = mode;
24794     }
24795 
24796   switch (nargs)
24797     {
24798     case 1:
24799       pat = GEN_FCN (icode) (target, args[0].op);
24800       break;
24801     case 2:
24802       pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
24803       break;
24804     default:
24805       gcc_unreachable ();
24806     }
24807 
24808   if (! pat)
24809     return 0;
24810   emit_insn (pat);
24811   return klass == store ? 0 : target;
24812 }
24813 
24814 /* Return the integer constant in ARG.  Constrain it to be in the range
24815    of the subparts of VEC_TYPE; issue an error if not.  */
24816 
24817 static int
24818 get_element_number (tree vec_type, tree arg)
24819 {
24820   unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
24821 
24822   if (!host_integerp (arg, 1)
24823       || (elt = tree_low_cst (arg, 1), elt > max))
24824     {
24825       error ("selector must be an integer constant in the range 0..%wi", max);
24826       return 0;
24827     }
24828 
24829   return elt;
24830 }
24831 
24832 /* A subroutine of ix86_expand_builtin.  These builtins are a wrapper around
24833    ix86_expand_vector_init.  We DO have language-level syntax for this, in
24834    the form of  (type){ init-list }.  Except that since we can't place emms
24835    instructions from inside the compiler, we can't allow the use of MMX
24836    registers unless the user explicitly asks for it.  So we do *not* define
24837    vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md.  Instead
24838    we have builtins invoked by mmintrin.h that gives us license to emit
24839    these sorts of instructions.  */
24840 
24841 static rtx
24842 ix86_expand_vec_init_builtin (tree type, tree exp, rtx target)
24843 {
24844   enum machine_mode tmode = TYPE_MODE (type);
24845   enum machine_mode inner_mode = GET_MODE_INNER (tmode);
24846   int i, n_elt = GET_MODE_NUNITS (tmode);
24847   rtvec v = rtvec_alloc (n_elt);
24848 
24849   gcc_assert (VECTOR_MODE_P (tmode));
24850   gcc_assert (call_expr_nargs (exp) == n_elt);
24851 
24852   for (i = 0; i < n_elt; ++i)
24853     {
24854       rtx x = expand_normal (CALL_EXPR_ARG (exp, i));
24855       RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
24856     }
24857 
24858   if (!target || !register_operand (target, tmode))
24859     target = gen_reg_rtx (tmode);
24860 
24861   ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
24862   return target;
24863 }
24864 
24865 /* A subroutine of ix86_expand_builtin.  These builtins are a wrapper around
24866    ix86_expand_vector_extract.  They would be redundant (for non-MMX) if we
24867    had a language-level syntax for referencing vector elements.  */
24868 
24869 static rtx
24870 ix86_expand_vec_ext_builtin (tree exp, rtx target)
24871 {
24872   enum machine_mode tmode, mode0;
24873   tree arg0, arg1;
24874   int elt;
24875   rtx op0;
24876 
24877   arg0 = CALL_EXPR_ARG (exp, 0);
24878   arg1 = CALL_EXPR_ARG (exp, 1);
24879 
24880   op0 = expand_normal (arg0);
24881   elt = get_element_number (TREE_TYPE (arg0), arg1);
24882 
24883   tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
24884   mode0 = TYPE_MODE (TREE_TYPE (arg0));
24885   gcc_assert (VECTOR_MODE_P (mode0));
24886 
24887   op0 = force_reg (mode0, op0);
24888 
24889   if (optimize || !target || !register_operand (target, tmode))
24890     target = gen_reg_rtx (tmode);
24891 
24892   ix86_expand_vector_extract (true, target, op0, elt);
24893 
24894   return target;
24895 }
24896 
24897 /* A subroutine of ix86_expand_builtin.  These builtins are a wrapper around
24898    ix86_expand_vector_set.  They would be redundant (for non-MMX) if we had
24899    a language-level syntax for referencing vector elements.  */
24900 
24901 static rtx
24902 ix86_expand_vec_set_builtin (tree exp)
24903 {
24904   enum machine_mode tmode, mode1;
24905   tree arg0, arg1, arg2;
24906   int elt;
24907   rtx op0, op1, target;
24908 
24909   arg0 = CALL_EXPR_ARG (exp, 0);
24910   arg1 = CALL_EXPR_ARG (exp, 1);
24911   arg2 = CALL_EXPR_ARG (exp, 2);
24912 
24913   tmode = TYPE_MODE (TREE_TYPE (arg0));
24914   mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
24915   gcc_assert (VECTOR_MODE_P (tmode));
24916 
24917   op0 = expand_expr (arg0, NULL_RTX, tmode, EXPAND_NORMAL);
24918   op1 = expand_expr (arg1, NULL_RTX, mode1, EXPAND_NORMAL);
24919   elt = get_element_number (TREE_TYPE (arg0), arg2);
24920 
24921   if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
24922     op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
24923 
24924   op0 = force_reg (tmode, op0);
24925   op1 = force_reg (mode1, op1);
24926 
24927   /* OP0 is the source of these builtin functions and shouldn't be
24928      modified.  Create a copy, use it and return it as target.  */
24929   target = gen_reg_rtx (tmode);
24930   emit_move_insn (target, op0);
24931   ix86_expand_vector_set (true, target, op1, elt);
24932 
24933   return target;
24934 }
24935 
24936 /* Expand an expression EXP that calls a built-in function,
24937    with result going to TARGET if that's convenient
24938    (and in mode MODE if that's convenient).
24939    SUBTARGET may be used as the target for computing one of EXP's operands.
24940    IGNORE is nonzero if the value is to be ignored.  */
24941 
24942 static rtx
24943 ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
24944                      enum machine_mode mode ATTRIBUTE_UNUSED,
24945                      int ignore ATTRIBUTE_UNUSED)
24946 {
24947   const struct builtin_description *d;
24948   size_t i;
24949   enum insn_code icode;
24950   tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
24951   tree arg0, arg1, arg2;
24952   rtx op0, op1, op2, pat;
24953   enum machine_mode mode0, mode1, mode2;
24954   unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
24955 
24956   /* Determine whether the builtin function is available under the current ISA.
24957      Originally the builtin was not created if it wasn't applicable to the
24958      current ISA based on the command line switches.  With function specific
24959      options, we need to check in the context of the function making the call
24960      whether it is supported.  */
24961   if (ix86_builtins_isa[fcode].isa
24962       && !(ix86_builtins_isa[fcode].isa & ix86_isa_flags))
24963     {
24964       char *opts = ix86_target_string (ix86_builtins_isa[fcode].isa, 0, NULL,
24965                                        NULL, NULL, false);
24966 
24967       if (!opts)
24968         error ("%qE needs unknown isa option", fndecl);
24969       else
24970         {
24971           gcc_assert (opts != NULL);
24972           error ("%qE needs isa option %s", fndecl, opts);
24973           free (opts);
24974         }
24975       return const0_rtx;
24976     }
24977 
24978   switch (fcode)
24979     {
24980     case IX86_BUILTIN_MASKMOVQ:
24981     case IX86_BUILTIN_MASKMOVDQU:
24982       icode = (fcode == IX86_BUILTIN_MASKMOVQ
24983                ? CODE_FOR_mmx_maskmovq
24984                : CODE_FOR_sse2_maskmovdqu);
24985       /* Note the arg order is different from the operand order.  */
24986       arg1 = CALL_EXPR_ARG (exp, 0);
24987       arg2 = CALL_EXPR_ARG (exp, 1);
24988       arg0 = CALL_EXPR_ARG (exp, 2);
24989       op0 = expand_normal (arg0);
24990       op1 = expand_normal (arg1);
24991       op2 = expand_normal (arg2);
24992       mode0 = insn_data[icode].operand[0].mode;
24993       mode1 = insn_data[icode].operand[1].mode;
24994       mode2 = insn_data[icode].operand[2].mode;
24995 
24996       op0 = force_reg (Pmode, op0);
24997       op0 = gen_rtx_MEM (mode1, op0);
24998 
24999       if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
25000         op0 = copy_to_mode_reg (mode0, op0);
25001       if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
25002         op1 = copy_to_mode_reg (mode1, op1);
25003       if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
25004         op2 = copy_to_mode_reg (mode2, op2);
25005       pat = GEN_FCN (icode) (op0, op1, op2);
25006       if (! pat)
25007         return 0;
25008       emit_insn (pat);
25009       return 0;
25010 
25011     case IX86_BUILTIN_LDMXCSR:
25012       op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
25013       target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
25014       emit_move_insn (target, op0);
25015       emit_insn (gen_sse_ldmxcsr (target));
25016       return 0;
25017 
25018     case IX86_BUILTIN_STMXCSR:
25019       target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
25020       emit_insn (gen_sse_stmxcsr (target));
25021       return copy_to_mode_reg (SImode, target);
25022 
25023     case IX86_BUILTIN_CLFLUSH:
25024         arg0 = CALL_EXPR_ARG (exp, 0);
25025         op0 = expand_normal (arg0);
25026         icode = CODE_FOR_sse2_clflush;
25027         if (! (*insn_data[icode].operand[0].predicate) (op0, Pmode))
25028             op0 = copy_to_mode_reg (Pmode, op0);
25029 
25030         emit_insn (gen_sse2_clflush (op0));
25031         return 0;
25032 
25033     case IX86_BUILTIN_MONITOR:
25034       arg0 = CALL_EXPR_ARG (exp, 0);
25035       arg1 = CALL_EXPR_ARG (exp, 1);
25036       arg2 = CALL_EXPR_ARG (exp, 2);
25037       op0 = expand_normal (arg0);
25038       op1 = expand_normal (arg1);
25039       op2 = expand_normal (arg2);
25040       if (!REG_P (op0))
25041         op0 = copy_to_mode_reg (Pmode, op0);
25042       if (!REG_P (op1))
25043         op1 = copy_to_mode_reg (SImode, op1);
25044       if (!REG_P (op2))
25045         op2 = copy_to_mode_reg (SImode, op2);
25046       emit_insn ((*ix86_gen_monitor) (op0, op1, op2));
25047       return 0;
25048 
25049     case IX86_BUILTIN_MWAIT:
25050       arg0 = CALL_EXPR_ARG (exp, 0);
25051       arg1 = CALL_EXPR_ARG (exp, 1);
25052       op0 = expand_normal (arg0);
25053       op1 = expand_normal (arg1);
25054       if (!REG_P (op0))
25055         op0 = copy_to_mode_reg (SImode, op0);
25056       if (!REG_P (op1))
25057         op1 = copy_to_mode_reg (SImode, op1);
25058       emit_insn (gen_sse3_mwait (op0, op1));
25059       return 0;
25060 
25061     case IX86_BUILTIN_VEC_INIT_V2SI:
25062     case IX86_BUILTIN_VEC_INIT_V4HI:
25063     case IX86_BUILTIN_VEC_INIT_V8QI:
25064       return ix86_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
25065 
25066     case IX86_BUILTIN_VEC_EXT_V2DF:
25067     case IX86_BUILTIN_VEC_EXT_V2DI:
25068     case IX86_BUILTIN_VEC_EXT_V4SF:
25069     case IX86_BUILTIN_VEC_EXT_V4SI:
25070     case IX86_BUILTIN_VEC_EXT_V8HI:
25071     case IX86_BUILTIN_VEC_EXT_V2SI:
25072     case IX86_BUILTIN_VEC_EXT_V4HI:
25073     case IX86_BUILTIN_VEC_EXT_V16QI:
25074       return ix86_expand_vec_ext_builtin (exp, target);
25075 
25076     case IX86_BUILTIN_VEC_SET_V2DI:
25077     case IX86_BUILTIN_VEC_SET_V4SF:
25078     case IX86_BUILTIN_VEC_SET_V4SI:
25079     case IX86_BUILTIN_VEC_SET_V8HI:
25080     case IX86_BUILTIN_VEC_SET_V4HI:
25081     case IX86_BUILTIN_VEC_SET_V16QI:
25082       return ix86_expand_vec_set_builtin (exp);
25083 
25084     case IX86_BUILTIN_INFQ:
25085       {
25086         REAL_VALUE_TYPE inf;
25087         rtx tmp;
25088 
25089         real_inf (&inf);
25090         tmp = CONST_DOUBLE_FROM_REAL_VALUE (inf, mode);
25091 
25092         tmp = validize_mem (force_const_mem (mode, tmp));
25093 
25094         if (target == 0)
25095           target = gen_reg_rtx (mode);
25096 
25097         emit_move_insn (target, tmp);
25098         return target;
25099       }
25100 
25101     default:
25102       break;
25103     }
25104 
25105   for (i = 0, d = bdesc_special_args;
25106        i < ARRAY_SIZE (bdesc_special_args);
25107        i++, d++)
25108     if (d->code == fcode)
25109       return ix86_expand_special_args_builtin (d, exp, target);
25110 
25111   for (i = 0, d = bdesc_args;
25112        i < ARRAY_SIZE (bdesc_args);
25113        i++, d++)
25114     if (d->code == fcode)
25115       switch (fcode)
25116         {
25117         case IX86_BUILTIN_FABSQ:
25118         case IX86_BUILTIN_COPYSIGNQ:
25119           if (!TARGET_SSE2)
25120             /* Emit a normal call if SSE2 isn't available.  */
25121             return expand_call (exp, target, ignore);
25122         default:
25123           return ix86_expand_args_builtin (d, exp, target);
25124         }
25125 
25126   for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
25127     if (d->code == fcode)
25128       return ix86_expand_sse_comi (d, exp, target);
25129 
25130   for (i = 0, d = bdesc_pcmpestr;
25131        i < ARRAY_SIZE (bdesc_pcmpestr);
25132        i++, d++)
25133     if (d->code == fcode)
25134       return ix86_expand_sse_pcmpestr (d, exp, target);
25135 
25136   for (i = 0, d = bdesc_pcmpistr;
25137        i < ARRAY_SIZE (bdesc_pcmpistr);
25138        i++, d++)
25139     if (d->code == fcode)
25140       return ix86_expand_sse_pcmpistr (d, exp, target);
25141 
25142   for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
25143     if (d->code == fcode)
25144       return ix86_expand_multi_arg_builtin (d->icode, exp, target,
25145                                             (enum multi_arg_type)d->flag,
25146                                             d->comparison);
25147 
25148   gcc_unreachable ();
25149 }
25150 
25151 /* Returns a function decl for a vectorized version of the builtin function
25152    with builtin function code FN and the result vector type TYPE, or NULL_TREE
25153    if it is not available.  */
25154 
25155 static tree
25156 ix86_builtin_vectorized_function (unsigned int fn, tree type_out,
25157                                   tree type_in)
25158 {
25159   enum machine_mode in_mode, out_mode;
25160   int in_n, out_n;
25161 
25162   if (TREE_CODE (type_out) != VECTOR_TYPE
25163       || TREE_CODE (type_in) != VECTOR_TYPE)
25164     return NULL_TREE;
25165 
25166   out_mode = TYPE_MODE (TREE_TYPE (type_out));
25167   out_n = TYPE_VECTOR_SUBPARTS (type_out);
25168   in_mode = TYPE_MODE (TREE_TYPE (type_in));
25169   in_n = TYPE_VECTOR_SUBPARTS (type_in);
25170 
25171   switch (fn)
25172     {
25173     case BUILT_IN_SQRT:
25174       if (out_mode == DFmode && out_n == 2
25175           && in_mode == DFmode && in_n == 2)
25176         return ix86_builtins[IX86_BUILTIN_SQRTPD];
25177       break;
25178 
25179     case BUILT_IN_SQRTF:
25180       if (out_mode == SFmode && out_n == 4
25181           && in_mode == SFmode && in_n == 4)
25182         return ix86_builtins[IX86_BUILTIN_SQRTPS_NR];
25183       break;
25184 
25185     case BUILT_IN_LRINT:
25186       if (out_mode == SImode && out_n == 4
25187           && in_mode == DFmode && in_n == 2)
25188         return ix86_builtins[IX86_BUILTIN_VEC_PACK_SFIX];
25189       break;
25190 
25191     case BUILT_IN_LRINTF:
25192       if (out_mode == SImode && out_n == 4
25193           && in_mode == SFmode && in_n == 4)
25194         return ix86_builtins[IX86_BUILTIN_CVTPS2DQ];
25195       break;
25196 
25197     default:
25198       ;
25199     }
25200 
25201   /* Dispatch to a handler for a vectorization library.  */
25202   if (ix86_veclib_handler)
25203     return (*ix86_veclib_handler)(fn, type_out, type_in);
25204 
25205   return NULL_TREE;
25206 }
25207 
25208 /* Handler for an SVML-style interface to
25209    a library with vectorized intrinsics.  */
25210 
25211 static tree
25212 ix86_veclibabi_svml (enum built_in_function fn, tree type_out, tree type_in)
25213 {
25214   char name[20];
25215   tree fntype, new_fndecl, args;
25216   unsigned arity;
25217   const char *bname;
25218   enum machine_mode el_mode, in_mode;
25219   int n, in_n;
25220 
25221   /* The SVML is suitable for unsafe math only.  */
25222   if (!flag_unsafe_math_optimizations)
25223     return NULL_TREE;
25224 
25225   el_mode = TYPE_MODE (TREE_TYPE (type_out));
25226   n = TYPE_VECTOR_SUBPARTS (type_out);
25227   in_mode = TYPE_MODE (TREE_TYPE (type_in));
25228   in_n = TYPE_VECTOR_SUBPARTS (type_in);
25229   if (el_mode != in_mode
25230       || n != in_n)
25231     return NULL_TREE;
25232 
25233   switch (fn)
25234     {
25235     case BUILT_IN_EXP:
25236     case BUILT_IN_LOG:
25237     case BUILT_IN_LOG10:
25238     case BUILT_IN_POW:
25239     case BUILT_IN_TANH:
25240     case BUILT_IN_TAN:
25241     case BUILT_IN_ATAN:
25242     case BUILT_IN_ATAN2:
25243     case BUILT_IN_ATANH:
25244     case BUILT_IN_CBRT:
25245     case BUILT_IN_SINH:
25246     case BUILT_IN_SIN:
25247     case BUILT_IN_ASINH:
25248     case BUILT_IN_ASIN:
25249     case BUILT_IN_COSH:
25250     case BUILT_IN_COS:
25251     case BUILT_IN_ACOSH:
25252     case BUILT_IN_ACOS:
25253       if (el_mode != DFmode || n != 2)
25254         return NULL_TREE;
25255       break;
25256 
25257     case BUILT_IN_EXPF:
25258     case BUILT_IN_LOGF:
25259     case BUILT_IN_LOG10F:
25260     case BUILT_IN_POWF:
25261     case BUILT_IN_TANHF:
25262     case BUILT_IN_TANF:
25263     case BUILT_IN_ATANF:
25264     case BUILT_IN_ATAN2F:
25265     case BUILT_IN_ATANHF:
25266     case BUILT_IN_CBRTF:
25267     case BUILT_IN_SINHF:
25268     case BUILT_IN_SINF:
25269     case BUILT_IN_ASINHF:
25270     case BUILT_IN_ASINF:
25271     case BUILT_IN_COSHF:
25272     case BUILT_IN_COSF:
25273     case BUILT_IN_ACOSHF:
25274     case BUILT_IN_ACOSF:
25275       if (el_mode != SFmode || n != 4)
25276         return NULL_TREE;
25277       break;
25278 
25279     default:
25280       return NULL_TREE;
25281     }
25282 
25283   bname = IDENTIFIER_POINTER (DECL_NAME (implicit_built_in_decls[fn]));
25284 
25285   if (fn == BUILT_IN_LOGF)
25286     strcpy (name, "vmlsLn4");
25287   else if (fn == BUILT_IN_LOG)
25288     strcpy (name, "vmldLn2");
25289   else if (n == 4)
25290     {
25291       sprintf (name, "vmls%s", bname+10);
25292       name[strlen (name)-1] = '4';
25293     }
25294   else
25295     sprintf (name, "vmld%s2", bname+10);
25296 
25297   /* Convert to uppercase. */
25298   name[4] &= ~0x20;
25299 
25300   arity = 0;
25301   for (args = DECL_ARGUMENTS (implicit_built_in_decls[fn]); args;
25302        args = TREE_CHAIN (args))
25303     arity++;
25304 
25305   if (arity == 1)
25306     fntype = build_function_type_list (type_out, type_in, NULL);
25307   else
25308     fntype = build_function_type_list (type_out, type_in, type_in, NULL);
25309 
25310   /* Build a function declaration for the vectorized function.  */
25311   new_fndecl = build_decl (FUNCTION_DECL, get_identifier (name), fntype);
25312   TREE_PUBLIC (new_fndecl) = 1;
25313   DECL_EXTERNAL (new_fndecl) = 1;
25314   DECL_IS_NOVOPS (new_fndecl) = 1;
25315   TREE_READONLY (new_fndecl) = 1;
25316 
25317   return new_fndecl;
25318 }
25319 
25320 /* Handler for an ACML-style interface to
25321    a library with vectorized intrinsics.  */
25322 
25323 static tree
25324 ix86_veclibabi_acml (enum built_in_function fn, tree type_out, tree type_in)
25325 {
25326   char name[20] = "__vr.._";
25327   tree fntype, new_fndecl, args;
25328   unsigned arity;
25329   const char *bname;
25330   enum machine_mode el_mode, in_mode;
25331   int n, in_n;
25332 
25333   /* The ACML is 64bits only and suitable for unsafe math only as
25334      it does not correctly support parts of IEEE with the required
25335      precision such as denormals.  */
25336   if (!TARGET_64BIT
25337       || !flag_unsafe_math_optimizations)
25338     return NULL_TREE;
25339 
25340   el_mode = TYPE_MODE (TREE_TYPE (type_out));
25341   n = TYPE_VECTOR_SUBPARTS (type_out);
25342   in_mode = TYPE_MODE (TREE_TYPE (type_in));
25343   in_n = TYPE_VECTOR_SUBPARTS (type_in);
25344   if (el_mode != in_mode
25345       || n != in_n)
25346     return NULL_TREE;
25347 
25348   switch (fn)
25349     {
25350     case BUILT_IN_SIN:
25351     case BUILT_IN_COS:
25352     case BUILT_IN_EXP:
25353     case BUILT_IN_LOG:
25354     case BUILT_IN_LOG2:
25355     case BUILT_IN_LOG10:
25356       name[4] = 'd';
25357       name[5] = '2';
25358       if (el_mode != DFmode
25359           || n != 2)
25360         return NULL_TREE;
25361       break;
25362 
25363     case BUILT_IN_SINF:
25364     case BUILT_IN_COSF:
25365     case BUILT_IN_EXPF:
25366     case BUILT_IN_POWF:
25367     case BUILT_IN_LOGF:
25368     case BUILT_IN_LOG2F:
25369     case BUILT_IN_LOG10F:
25370       name[4] = 's';
25371       name[5] = '4';
25372       if (el_mode != SFmode
25373           || n != 4)
25374         return NULL_TREE;
25375       break;
25376 
25377     default:
25378       return NULL_TREE;
25379     }
25380 
25381   bname = IDENTIFIER_POINTER (DECL_NAME (implicit_built_in_decls[fn]));
25382   sprintf (name + 7, "%s", bname+10);
25383 
25384   arity = 0;
25385   for (args = DECL_ARGUMENTS (implicit_built_in_decls[fn]); args;
25386        args = TREE_CHAIN (args))
25387     arity++;
25388 
25389   if (arity == 1)
25390     fntype = build_function_type_list (type_out, type_in, NULL);
25391   else
25392     fntype = build_function_type_list (type_out, type_in, type_in, NULL);
25393 
25394   /* Build a function declaration for the vectorized function.  */
25395   new_fndecl = build_decl (FUNCTION_DECL, get_identifier (name), fntype);
25396   TREE_PUBLIC (new_fndecl) = 1;
25397   DECL_EXTERNAL (new_fndecl) = 1;
25398   DECL_IS_NOVOPS (new_fndecl) = 1;
25399   TREE_READONLY (new_fndecl) = 1;
25400 
25401   return new_fndecl;
25402 }
25403 
25404 
25405 /* Returns a decl of a function that implements conversion of an integer vector
25406    into a floating-point vector, or vice-versa. TYPE is the type of the integer
25407    side of the conversion.
25408    Return NULL_TREE if it is not available.  */
25409 
25410 static tree
25411 ix86_vectorize_builtin_conversion (unsigned int code, tree type)
25412 {
25413   if (!TARGET_SSE2 || TREE_CODE (type) != VECTOR_TYPE
25414       /* There are only conversions from/to signed integers.  */
25415       || TYPE_UNSIGNED (TREE_TYPE (type)))
25416     return NULL_TREE;
25417 
25418   switch (code)
25419     {
25420     case FLOAT_EXPR:
25421       switch (TYPE_MODE (type))
25422         {
25423         case V4SImode:
25424           return ix86_builtins[IX86_BUILTIN_CVTDQ2PS];
25425         default:
25426           return NULL_TREE;
25427         }
25428 
25429     case FIX_TRUNC_EXPR:
25430       switch (TYPE_MODE (type))
25431         {
25432         case V4SImode:
25433           return ix86_builtins[IX86_BUILTIN_CVTTPS2DQ];
25434         default:
25435           return NULL_TREE;
25436         }
25437     default:
25438       return NULL_TREE;
25439 
25440     }
25441 }
25442 
25443 /* Returns a code for a target-specific builtin that implements
25444    reciprocal of the function, or NULL_TREE if not available.  */
25445 
25446 static tree
25447 ix86_builtin_reciprocal (unsigned int fn, bool md_fn,
25448                          bool sqrt ATTRIBUTE_UNUSED)
25449 {
25450   if (! (TARGET_SSE_MATH && TARGET_RECIP && !optimize_insn_for_size_p ()
25451          && flag_finite_math_only && !flag_trapping_math
25452          && flag_unsafe_math_optimizations))
25453     return NULL_TREE;
25454 
25455   if (md_fn)
25456     /* Machine dependent builtins.  */
25457     switch (fn)
25458       {
25459         /* Vectorized version of sqrt to rsqrt conversion.  */
25460       case IX86_BUILTIN_SQRTPS_NR:
25461         return ix86_builtins[IX86_BUILTIN_RSQRTPS_NR];
25462 
25463       default:
25464         return NULL_TREE;
25465       }
25466   else
25467     /* Normal builtins.  */
25468     switch (fn)
25469       {
25470         /* Sqrt to rsqrt conversion.  */
25471       case BUILT_IN_SQRTF:
25472         return ix86_builtins[IX86_BUILTIN_RSQRTF];
25473 
25474       default:
25475         return NULL_TREE;
25476       }
25477 }
25478 
25479 /* Store OPERAND to the memory after reload is completed.  This means
25480    that we can't easily use assign_stack_local.  */
25481 rtx
25482 ix86_force_to_memory (enum machine_mode mode, rtx operand)
25483 {
25484   rtx result;
25485 
25486   gcc_assert (reload_completed);
25487   if (!TARGET_64BIT_MS_ABI && TARGET_RED_ZONE)
25488     {
25489       result = gen_rtx_MEM (mode,
25490                             gen_rtx_PLUS (Pmode,
25491                                           stack_pointer_rtx,
25492                                           GEN_INT (-RED_ZONE_SIZE)));
25493       emit_move_insn (result, operand);
25494     }
25495   else if ((TARGET_64BIT_MS_ABI || !TARGET_RED_ZONE) && TARGET_64BIT)
25496     {
25497       switch (mode)
25498         {
25499         case HImode:
25500         case SImode:
25501           operand = gen_lowpart (DImode, operand);
25502           /* FALLTHRU */
25503         case DImode:
25504           emit_insn (
25505                       gen_rtx_SET (VOIDmode,
25506                                    gen_rtx_MEM (DImode,
25507                                                 gen_rtx_PRE_DEC (DImode,
25508                                                         stack_pointer_rtx)),
25509                                    operand));
25510           break;
25511         default:
25512           gcc_unreachable ();
25513         }
25514       result = gen_rtx_MEM (mode, stack_pointer_rtx);
25515     }
25516   else
25517     {
25518       switch (mode)
25519         {
25520         case DImode:
25521           {
25522             rtx operands[2];
25523             split_di (&operand, 1, operands, operands + 1);
25524             emit_insn (
25525                         gen_rtx_SET (VOIDmode,
25526                                      gen_rtx_MEM (SImode,
25527                                                   gen_rtx_PRE_DEC (Pmode,
25528                                                         stack_pointer_rtx)),
25529                                      operands[1]));
25530             emit_insn (
25531                         gen_rtx_SET (VOIDmode,
25532                                      gen_rtx_MEM (SImode,
25533                                                   gen_rtx_PRE_DEC (Pmode,
25534                                                         stack_pointer_rtx)),
25535                                      operands[0]));
25536           }
25537           break;
25538         case HImode:
25539           /* Store HImodes as SImodes.  */
25540           operand = gen_lowpart (SImode, operand);
25541           /* FALLTHRU */
25542         case SImode:
25543           emit_insn (
25544                       gen_rtx_SET (VOIDmode,
25545                                    gen_rtx_MEM (GET_MODE (operand),
25546                                                 gen_rtx_PRE_DEC (SImode,
25547                                                         stack_pointer_rtx)),
25548                                    operand));
25549           break;
25550         default:
25551           gcc_unreachable ();
25552         }
25553       result = gen_rtx_MEM (mode, stack_pointer_rtx);
25554     }
25555   return result;
25556 }
25557 
25558 /* Free operand from the memory.  */
25559 void
25560 ix86_free_from_memory (enum machine_mode mode)
25561 {
25562   if (!TARGET_RED_ZONE || TARGET_64BIT_MS_ABI)
25563     {
25564       int size;
25565 
25566       if (mode == DImode || TARGET_64BIT)
25567         size = 8;
25568       else
25569         size = 4;
25570       /* Use LEA to deallocate stack space.  In peephole2 it will be converted
25571          to pop or add instruction if registers are available.  */
25572       emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
25573                               gen_rtx_PLUS (Pmode, stack_pointer_rtx,
25574                                             GEN_INT (size))));
25575     }
25576 }
25577 
25578 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
25579    QImode must go into class Q_REGS.
25580    Narrow ALL_REGS to GENERAL_REGS.  This supports allowing movsf and
25581    movdf to do mem-to-mem moves through integer regs.  */
25582 enum reg_class
25583 ix86_preferred_reload_class (rtx x, enum reg_class regclass)
25584 {
25585   enum machine_mode mode = GET_MODE (x);
25586 
25587   /* We're only allowed to return a subclass of CLASS.  Many of the
25588      following checks fail for NO_REGS, so eliminate that early.  */
25589   if (regclass == NO_REGS)
25590     return NO_REGS;
25591 
25592   /* All classes can load zeros.  */
25593   if (x == CONST0_RTX (mode))
25594     return regclass;
25595 
25596   /* Force constants into memory if we are loading a (nonzero) constant into
25597      an MMX or SSE register.  This is because there are no MMX/SSE instructions
25598      to load from a constant.  */
25599   if (CONSTANT_P (x)
25600       && (MAYBE_MMX_CLASS_P (regclass) || MAYBE_SSE_CLASS_P (regclass)))
25601     return NO_REGS;
25602 
25603   /* Prefer SSE regs only, if we can use them for math.  */
25604   if (TARGET_SSE_MATH && !TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (mode))
25605     return SSE_CLASS_P (regclass) ? regclass : NO_REGS;
25606 
25607   /* Floating-point constants need more complex checks.  */
25608   if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
25609     {
25610       /* General regs can load everything.  */
25611       if (reg_class_subset_p (regclass, GENERAL_REGS))
25612         return regclass;
25613 
25614       /* Floats can load 0 and 1 plus some others.  Note that we eliminated
25615          zero above.  We only want to wind up preferring 80387 registers if
25616          we plan on doing computation with them.  */
25617       if (TARGET_80387
25618           && standard_80387_constant_p (x))
25619         {
25620           /* Limit class to non-sse.  */
25621           if (regclass == FLOAT_SSE_REGS)
25622             return FLOAT_REGS;
25623           if (regclass == FP_TOP_SSE_REGS)
25624             return FP_TOP_REG;
25625           if (regclass == FP_SECOND_SSE_REGS)
25626             return FP_SECOND_REG;
25627           if (regclass == FLOAT_INT_REGS || regclass == FLOAT_REGS)
25628             return regclass;
25629         }
25630 
25631       return NO_REGS;
25632     }
25633 
25634   /* Generally when we see PLUS here, it's the function invariant
25635      (plus soft-fp const_int).  Which can only be computed into general
25636      regs.  */
25637   if (GET_CODE (x) == PLUS)
25638     return reg_class_subset_p (regclass, GENERAL_REGS) ? regclass : NO_REGS;
25639 
25640   /* QImode constants are easy to load, but non-constant QImode data
25641      must go into Q_REGS.  */
25642   if (GET_MODE (x) == QImode && !CONSTANT_P (x))
25643     {
25644       if (reg_class_subset_p (regclass, Q_REGS))
25645         return regclass;
25646       if (reg_class_subset_p (Q_REGS, regclass))
25647         return Q_REGS;
25648       return NO_REGS;
25649     }
25650 
25651   return regclass;
25652 }
25653 
25654 /* Discourage putting floating-point values in SSE registers unless
25655    SSE math is being used, and likewise for the 387 registers.  */
25656 enum reg_class
25657 ix86_preferred_output_reload_class (rtx x, enum reg_class regclass)
25658 {
25659   enum machine_mode mode = GET_MODE (x);
25660 
25661   /* Restrict the output reload class to the register bank that we are doing
25662      math on.  If we would like not to return a subset of CLASS, reject this
25663      alternative: if reload cannot do this, it will still use its choice.  */
25664   mode = GET_MODE (x);
25665   if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
25666     return MAYBE_SSE_CLASS_P (regclass) ? SSE_REGS : NO_REGS;
25667 
25668   if (X87_FLOAT_MODE_P (mode))
25669     {
25670       if (regclass == FP_TOP_SSE_REGS)
25671         return FP_TOP_REG;
25672       else if (regclass == FP_SECOND_SSE_REGS)
25673         return FP_SECOND_REG;
25674       else
25675         return FLOAT_CLASS_P (regclass) ? regclass : NO_REGS;
25676     }
25677 
25678   return regclass;
25679 }
25680 
25681 static enum reg_class
25682 ix86_secondary_reload (bool in_p, rtx x, enum reg_class rclass,
25683                        enum machine_mode mode,
25684                        secondary_reload_info *sri ATTRIBUTE_UNUSED)
25685 {
25686   /* QImode spills from non-QI registers require
25687      intermediate register on 32bit targets.  */
25688   if (!in_p && mode == QImode && !TARGET_64BIT
25689       && (rclass == GENERAL_REGS
25690           || rclass == LEGACY_REGS
25691           || rclass == INDEX_REGS))
25692     {
25693       int regno;
25694 
25695       if (REG_P (x))
25696         regno = REGNO (x);
25697       else
25698         regno = -1;
25699 
25700       if (regno >= FIRST_PSEUDO_REGISTER || GET_CODE (x) == SUBREG)
25701         regno = true_regnum (x);
25702 
25703       /* Return Q_REGS if the operand is in memory.  */
25704       if (regno == -1)
25705         return Q_REGS;
25706     }
25707 
25708   return NO_REGS;
25709 }
25710 
25711 /* If we are copying between general and FP registers, we need a memory
25712    location. The same is true for SSE and MMX registers.
25713 
25714    To optimize register_move_cost performance, allow inline variant.
25715 
25716    The macro can't work reliably when one of the CLASSES is class containing
25717    registers from multiple units (SSE, MMX, integer).  We avoid this by never
25718    combining those units in single alternative in the machine description.
25719    Ensure that this constraint holds to avoid unexpected surprises.
25720 
25721    When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
25722    enforce these sanity checks.  */
25723 
25724 static inline int
25725 inline_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
25726                               enum machine_mode mode, int strict)
25727 {
25728   if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
25729       || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
25730       || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
25731       || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
25732       || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
25733       || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
25734     {
25735       gcc_assert (!strict);
25736       return true;
25737     }
25738 
25739   if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
25740     return true;
25741 
25742   /* ??? This is a lie.  We do have moves between mmx/general, and for
25743      mmx/sse2.  But by saying we need secondary memory we discourage the
25744      register allocator from using the mmx registers unless needed.  */
25745   if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
25746     return true;
25747 
25748   if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
25749     {
25750       /* SSE1 doesn't have any direct moves from other classes.  */
25751       if (!TARGET_SSE2)
25752         return true;
25753 
25754       /* If the target says that inter-unit moves are more expensive
25755          than moving through memory, then don't generate them.  */
25756       if (!TARGET_INTER_UNIT_MOVES)
25757         return true;
25758 
25759       /* Between SSE and general, we have moves no larger than word size.  */
25760       if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
25761         return true;
25762     }
25763 
25764   return false;
25765 }
25766 
25767 int
25768 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
25769                               enum machine_mode mode, int strict)
25770 {
25771   return inline_secondary_memory_needed (class1, class2, mode, strict);
25772 }
25773 
25774 /* Return true if the registers in CLASS cannot represent the change from
25775    modes FROM to TO.  */
25776 
25777 bool
25778 ix86_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
25779                                enum reg_class regclass)
25780 {
25781   if (from == to)
25782     return false;
25783 
25784   /* x87 registers can't do subreg at all, as all values are reformatted
25785      to extended precision.  */
25786   if (MAYBE_FLOAT_CLASS_P (regclass))
25787     return true;
25788 
25789   if (MAYBE_SSE_CLASS_P (regclass) || MAYBE_MMX_CLASS_P (regclass))
25790     {
25791       /* Vector registers do not support QI or HImode loads.  If we don't
25792          disallow a change to these modes, reload will assume it's ok to
25793          drop the subreg from (subreg:SI (reg:HI 100) 0).  This affects
25794          the vec_dupv4hi pattern.  */
25795       if (GET_MODE_SIZE (from) < 4)
25796         return true;
25797 
25798       /* Vector registers do not support subreg with nonzero offsets, which
25799          are otherwise valid for integer registers.  Since we can't see
25800          whether we have a nonzero offset from here, prohibit all
25801          nonparadoxical subregs changing size.  */
25802       if (GET_MODE_SIZE (to) < GET_MODE_SIZE (from))
25803         return true;
25804     }
25805 
25806   return false;
25807 }
25808 
25809 /* Return the cost of moving data of mode M between a
25810    register and memory.  A value of 2 is the default; this cost is
25811    relative to those in `REGISTER_MOVE_COST'.
25812 
25813    This function is used extensively by register_move_cost that is used to
25814    build tables at startup.  Make it inline in this case.
25815    When IN is 2, return maximum of in and out move cost.
25816 
25817    If moving between registers and memory is more expensive than
25818    between two registers, you should define this macro to express the
25819    relative cost.
25820 
25821    Model also increased moving costs of QImode registers in non
25822    Q_REGS classes.
25823  */
25824 static inline int
25825 inline_memory_move_cost (enum machine_mode mode, enum reg_class regclass,
25826                          int in)
25827 {
25828   int cost;
25829   if (FLOAT_CLASS_P (regclass))
25830     {
25831       int index;
25832       switch (mode)
25833         {
25834           case SFmode:
25835             index = 0;
25836             break;
25837           case DFmode:
25838             index = 1;
25839             break;
25840           case XFmode:
25841             index = 2;
25842             break;
25843           default:
25844             return 100;
25845         }
25846       if (in == 2)
25847         return MAX (ix86_cost->fp_load [index], ix86_cost->fp_store [index]);
25848       return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
25849     }
25850   if (SSE_CLASS_P (regclass))
25851     {
25852       int index;
25853       switch (GET_MODE_SIZE (mode))
25854         {
25855           case 4:
25856             index = 0;
25857             break;
25858           case 8:
25859             index = 1;
25860             break;
25861           case 16:
25862             index = 2;
25863             break;
25864           default:
25865             return 100;
25866         }
25867       if (in == 2)
25868         return MAX (ix86_cost->sse_load [index], ix86_cost->sse_store [index]);
25869       return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
25870     }
25871   if (MMX_CLASS_P (regclass))
25872     {
25873       int index;
25874       switch (GET_MODE_SIZE (mode))
25875         {
25876           case 4:
25877             index = 0;
25878             break;
25879           case 8:
25880             index = 1;
25881             break;
25882           default:
25883             return 100;
25884         }
25885       if (in)
25886         return MAX (ix86_cost->mmx_load [index], ix86_cost->mmx_store [index]);
25887       return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
25888     }
25889   switch (GET_MODE_SIZE (mode))
25890     {
25891       case 1:
25892         if (Q_CLASS_P (regclass) || TARGET_64BIT)
25893           {
25894             if (!in)
25895               return ix86_cost->int_store[0];
25896             if (TARGET_PARTIAL_REG_DEPENDENCY
25897                 && optimize_function_for_speed_p (cfun))
25898               cost = ix86_cost->movzbl_load;
25899             else
25900               cost = ix86_cost->int_load[0];
25901             if (in == 2)
25902               return MAX (cost, ix86_cost->int_store[0]);
25903             return cost;
25904           }
25905         else
25906           {
25907            if (in == 2)
25908              return MAX (ix86_cost->movzbl_load, ix86_cost->int_store[0] + 4);
25909            if (in)
25910              return ix86_cost->movzbl_load;
25911            else
25912              return ix86_cost->int_store[0] + 4;
25913           }
25914         break;
25915       case 2:
25916         if (in == 2)
25917           return MAX (ix86_cost->int_load[1], ix86_cost->int_store[1]);
25918         return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
25919       default:
25920         /* Compute number of 32bit moves needed.  TFmode is moved as XFmode.  */
25921         if (mode == TFmode)
25922           mode = XFmode;
25923         if (in == 2)
25924           cost = MAX (ix86_cost->int_load[2] , ix86_cost->int_store[2]);
25925         else if (in)
25926           cost = ix86_cost->int_load[2];
25927         else
25928           cost = ix86_cost->int_store[2];
25929         return (cost * (((int) GET_MODE_SIZE (mode)
25930                         + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
25931     }
25932 }
25933 
25934 int
25935 ix86_memory_move_cost (enum machine_mode mode, enum reg_class regclass, int in)
25936 {
25937   return inline_memory_move_cost (mode, regclass, in);
25938 }
25939 
25940 
25941 /* Return the cost of moving data from a register in class CLASS1 to
25942    one in class CLASS2.
25943 
25944    It is not required that the cost always equal 2 when FROM is the same as TO;
25945    on some machines it is expensive to move between registers if they are not
25946    general registers.  */
25947 
25948 int
25949 ix86_register_move_cost (enum machine_mode mode, enum reg_class class1,
25950                          enum reg_class class2)
25951 {
25952   /* In case we require secondary memory, compute cost of the store followed
25953      by load.  In order to avoid bad register allocation choices, we need
25954      for this to be *at least* as high as the symmetric MEMORY_MOVE_COST.  */
25955 
25956   if (inline_secondary_memory_needed (class1, class2, mode, 0))
25957     {
25958       int cost = 1;
25959 
25960       cost += inline_memory_move_cost (mode, class1, 2);
25961       cost += inline_memory_move_cost (mode, class2, 2);
25962 
25963       /* In case of copying from general_purpose_register we may emit multiple
25964          stores followed by single load causing memory size mismatch stall.
25965          Count this as arbitrarily high cost of 20.  */
25966       if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
25967         cost += 20;
25968 
25969       /* In the case of FP/MMX moves, the registers actually overlap, and we
25970          have to switch modes in order to treat them differently.  */
25971       if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
25972           || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
25973         cost += 20;
25974 
25975       return cost;
25976     }
25977 
25978   /* Moves between SSE/MMX and integer unit are expensive.  */
25979   if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
25980       || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
25981 
25982     /* ??? By keeping returned value relatively high, we limit the number
25983        of moves between integer and MMX/SSE registers for all targets.
25984        Additionally, high value prevents problem with x86_modes_tieable_p(),
25985        where integer modes in MMX/SSE registers are not tieable
25986        because of missing QImode and HImode moves to, from or between
25987        MMX/SSE registers.  */
25988     return MAX (8, ix86_cost->mmxsse_to_integer);
25989 
25990   if (MAYBE_FLOAT_CLASS_P (class1))
25991     return ix86_cost->fp_move;
25992   if (MAYBE_SSE_CLASS_P (class1))
25993     return ix86_cost->sse_move;
25994   if (MAYBE_MMX_CLASS_P (class1))
25995     return ix86_cost->mmx_move;
25996   return 2;
25997 }
25998 
25999 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE.  */
26000 
26001 bool
26002 ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
26003 {
26004   /* Flags and only flags can only hold CCmode values.  */
26005   if (CC_REGNO_P (regno))
26006     return GET_MODE_CLASS (mode) == MODE_CC;
26007   if (GET_MODE_CLASS (mode) == MODE_CC
26008       || GET_MODE_CLASS (mode) == MODE_RANDOM
26009       || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
26010     return 0;
26011   if (FP_REGNO_P (regno))
26012     return VALID_FP_MODE_P (mode);
26013   if (SSE_REGNO_P (regno))
26014     {
26015       /* We implement the move patterns for all vector modes into and
26016          out of SSE registers, even when no operation instructions
26017          are available.  OImode move is available only when AVX is
26018          enabled.  */
26019       return ((TARGET_AVX && mode == OImode)
26020               || VALID_AVX256_REG_MODE (mode)
26021               || VALID_SSE_REG_MODE (mode)
26022               || VALID_SSE2_REG_MODE (mode)
26023               || VALID_MMX_REG_MODE (mode)
26024               || VALID_MMX_REG_MODE_3DNOW (mode));
26025     }
26026   if (MMX_REGNO_P (regno))
26027     {
26028       /* We implement the move patterns for 3DNOW modes even in MMX mode,
26029          so if the register is available at all, then we can move data of
26030          the given mode into or out of it.  */
26031       return (VALID_MMX_REG_MODE (mode)
26032               || VALID_MMX_REG_MODE_3DNOW (mode));
26033     }
26034 
26035   if (mode == QImode)
26036     {
26037       /* Take care for QImode values - they can be in non-QI regs,
26038          but then they do cause partial register stalls.  */
26039       if (regno <= BX_REG || TARGET_64BIT)
26040         return 1;
26041       if (!TARGET_PARTIAL_REG_STALL)
26042         return 1;
26043       return reload_in_progress || reload_completed;
26044     }
26045   /* We handle both integer and floats in the general purpose registers.  */
26046   else if (VALID_INT_MODE_P (mode))
26047     return 1;
26048   else if (VALID_FP_MODE_P (mode))
26049     return 1;
26050   else if (VALID_DFP_MODE_P (mode))
26051     return 1;
26052   /* Lots of MMX code casts 8 byte vector modes to DImode.  If we then go
26053      on to use that value in smaller contexts, this can easily force a
26054      pseudo to be allocated to GENERAL_REGS.  Since this is no worse than
26055      supporting DImode, allow it.  */
26056   else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
26057     return 1;
26058 
26059   return 0;
26060 }
26061 
26062 /* A subroutine of ix86_modes_tieable_p.  Return true if MODE is a
26063    tieable integer mode.  */
26064 
26065 static bool
26066 ix86_tieable_integer_mode_p (enum machine_mode mode)
26067 {
26068   switch (mode)
26069     {
26070     case HImode:
26071     case SImode:
26072       return true;
26073 
26074     case QImode:
26075       return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
26076 
26077     case DImode:
26078       return TARGET_64BIT;
26079 
26080     default:
26081       return false;
26082     }
26083 }
26084 
26085 /* Return true if MODE1 is accessible in a register that can hold MODE2
26086    without copying.  That is, all register classes that can hold MODE2
26087    can also hold MODE1.  */
26088 
26089 bool
26090 ix86_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
26091 {
26092   if (mode1 == mode2)
26093     return true;
26094 
26095   if (ix86_tieable_integer_mode_p (mode1)
26096       && ix86_tieable_integer_mode_p (mode2))
26097     return true;
26098 
26099   /* MODE2 being XFmode implies fp stack or general regs, which means we
26100      can tie any smaller floating point modes to it.  Note that we do not
26101      tie this with TFmode.  */
26102   if (mode2 == XFmode)
26103     return mode1 == SFmode || mode1 == DFmode;
26104 
26105   /* MODE2 being DFmode implies fp stack, general or sse regs, which means
26106      that we can tie it with SFmode.  */
26107   if (mode2 == DFmode)
26108     return mode1 == SFmode;
26109 
26110   /* If MODE2 is only appropriate for an SSE register, then tie with
26111      any other mode acceptable to SSE registers.  */
26112   if (GET_MODE_SIZE (mode2) == 16
26113       && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
26114     return (GET_MODE_SIZE (mode1) == 16
26115             && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
26116 
26117   /* If MODE2 is appropriate for an MMX register, then tie
26118      with any other mode acceptable to MMX registers.  */
26119   if (GET_MODE_SIZE (mode2) == 8
26120       && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
26121     return (GET_MODE_SIZE (mode1) == 8
26122             && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1));
26123 
26124   return false;
26125 }
26126 
26127 /* Compute a (partial) cost for rtx X.  Return true if the complete
26128    cost has been computed, and false if subexpressions should be
26129    scanned.  In either case, *TOTAL contains the cost result.  */
26130 
26131 static bool
26132 ix86_rtx_costs (rtx x, int code, int outer_code_i, int *total, bool speed)
26133 {
26134   enum rtx_code outer_code = (enum rtx_code) outer_code_i;
26135   enum machine_mode mode = GET_MODE (x);
26136   const struct processor_costs *cost = speed ? ix86_cost : &ix86_size_cost;
26137 
26138   switch (code)
26139     {
26140     case CONST_INT:
26141     case CONST:
26142     case LABEL_REF:
26143     case SYMBOL_REF:
26144       if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
26145         *total = 3;
26146       else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
26147         *total = 2;
26148       else if (flag_pic && SYMBOLIC_CONST (x)
26149                && (!TARGET_64BIT
26150                    || (!GET_CODE (x) != LABEL_REF
26151                        && (GET_CODE (x) != SYMBOL_REF
26152                            || !SYMBOL_REF_LOCAL_P (x)))))
26153         *total = 1;
26154       else
26155         *total = 0;
26156       return true;
26157 
26158     case CONST_DOUBLE:
26159       if (mode == VOIDmode)
26160         *total = 0;
26161       else
26162         switch (standard_80387_constant_p (x))
26163           {
26164           case 1: /* 0.0 */
26165             *total = 1;
26166             break;
26167           default: /* Other constants */
26168             *total = 2;
26169             break;
26170           case 0:
26171           case -1:
26172             /* Start with (MEM (SYMBOL_REF)), since that's where
26173                it'll probably end up.  Add a penalty for size.  */
26174             *total = (COSTS_N_INSNS (1)
26175                       + (flag_pic != 0 && !TARGET_64BIT)
26176                       + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
26177             break;
26178           }
26179       return true;
26180 
26181     case ZERO_EXTEND:
26182       /* The zero extensions is often completely free on x86_64, so make
26183          it as cheap as possible.  */
26184       if (TARGET_64BIT && mode == DImode
26185           && GET_MODE (XEXP (x, 0)) == SImode)
26186         *total = 1;
26187       else if (TARGET_ZERO_EXTEND_WITH_AND)
26188         *total = cost->add;
26189       else
26190         *total = cost->movzx;
26191       return false;
26192 
26193     case SIGN_EXTEND:
26194       *total = cost->movsx;
26195       return false;
26196 
26197     case ASHIFT:
26198       if (CONST_INT_P (XEXP (x, 1))
26199           && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))
26200         {
26201           HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
26202           if (value == 1)
26203             {
26204               *total = cost->add;
26205               return false;
26206             }
26207           if ((value == 2 || value == 3)
26208               && cost->lea <= cost->shift_const)
26209             {
26210               *total = cost->lea;
26211               return false;
26212             }
26213         }
26214       /* FALLTHRU */
26215 
26216     case ROTATE:
26217     case ASHIFTRT:
26218     case LSHIFTRT:
26219     case ROTATERT:
26220       if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)
26221         {
26222           if (CONST_INT_P (XEXP (x, 1)))
26223             {
26224               if (INTVAL (XEXP (x, 1)) > 32)
26225                 *total = cost->shift_const + COSTS_N_INSNS (2);
26226               else
26227                 *total = cost->shift_const * 2;
26228             }
26229           else
26230             {
26231               if (GET_CODE (XEXP (x, 1)) == AND)
26232                 *total = cost->shift_var * 2;
26233               else
26234                 *total = cost->shift_var * 6 + COSTS_N_INSNS (2);
26235             }
26236         }
26237       else
26238         {
26239           if (CONST_INT_P (XEXP (x, 1)))
26240             *total = cost->shift_const;
26241           else
26242             *total = cost->shift_var;
26243         }
26244       return false;
26245 
26246     case MULT:
26247       if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
26248         {
26249           /* ??? SSE scalar cost should be used here.  */
26250           *total = cost->fmul;
26251           return false;
26252         }
26253       else if (X87_FLOAT_MODE_P (mode))
26254         {
26255           *total = cost->fmul;
26256           return false;
26257         }
26258       else if (FLOAT_MODE_P (mode))
26259         {
26260           /* ??? SSE vector cost should be used here.  */
26261           *total = cost->fmul;
26262           return false;
26263         }
26264       else
26265         {
26266           rtx op0 = XEXP (x, 0);
26267           rtx op1 = XEXP (x, 1);
26268           int nbits;
26269           if (CONST_INT_P (XEXP (x, 1)))
26270             {
26271               unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
26272               for (nbits = 0; value != 0; value &= value - 1)
26273                 nbits++;
26274             }
26275           else
26276             /* This is arbitrary.  */
26277             nbits = 7;
26278 
26279           /* Compute costs correctly for widening multiplication.  */
26280           if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
26281               && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
26282                  == GET_MODE_SIZE (mode))
26283             {
26284               int is_mulwiden = 0;
26285               enum machine_mode inner_mode = GET_MODE (op0);
26286 
26287               if (GET_CODE (op0) == GET_CODE (op1))
26288                 is_mulwiden = 1, op1 = XEXP (op1, 0);
26289               else if (CONST_INT_P (op1))
26290                 {
26291                   if (GET_CODE (op0) == SIGN_EXTEND)
26292                     is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
26293                                   == INTVAL (op1);
26294                   else
26295                     is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
26296                 }
26297 
26298               if (is_mulwiden)
26299                 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
26300             }
26301 
26302           *total = (cost->mult_init[MODE_INDEX (mode)]
26303                     + nbits * cost->mult_bit
26304                     + rtx_cost (op0, outer_code, speed) + rtx_cost (op1, outer_code, speed));
26305 
26306           return true;
26307         }
26308 
26309     case DIV:
26310     case UDIV:
26311     case MOD:
26312     case UMOD:
26313       if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
26314         /* ??? SSE cost should be used here.  */
26315         *total = cost->fdiv;
26316       else if (X87_FLOAT_MODE_P (mode))
26317         *total = cost->fdiv;
26318       else if (FLOAT_MODE_P (mode))
26319         /* ??? SSE vector cost should be used here.  */
26320         *total = cost->fdiv;
26321       else
26322         *total = cost->divide[MODE_INDEX (mode)];
26323       return false;
26324 
26325     case PLUS:
26326       if (GET_MODE_CLASS (mode) == MODE_INT
26327                && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
26328         {
26329           if (GET_CODE (XEXP (x, 0)) == PLUS
26330               && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
26331               && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1))
26332               && CONSTANT_P (XEXP (x, 1)))
26333             {
26334               HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
26335               if (val == 2 || val == 4 || val == 8)
26336                 {
26337                   *total = cost->lea;
26338                   *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code, speed);
26339                   *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
26340                                       outer_code, speed);
26341                   *total += rtx_cost (XEXP (x, 1), outer_code, speed);
26342                   return true;
26343                 }
26344             }
26345           else if (GET_CODE (XEXP (x, 0)) == MULT
26346                    && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
26347             {
26348               HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
26349               if (val == 2 || val == 4 || val == 8)
26350                 {
26351                   *total = cost->lea;
26352                   *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, speed);
26353                   *total += rtx_cost (XEXP (x, 1), outer_code, speed);
26354                   return true;
26355                 }
26356             }
26357           else if (GET_CODE (XEXP (x, 0)) == PLUS)
26358             {
26359               *total = cost->lea;
26360               *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, speed);
26361               *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code, speed);
26362               *total += rtx_cost (XEXP (x, 1), outer_code, speed);
26363               return true;
26364             }
26365         }
26366       /* FALLTHRU */
26367 
26368     case MINUS:
26369       if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
26370         {
26371           /* ??? SSE cost should be used here.  */
26372           *total = cost->fadd;
26373           return false;
26374         }
26375       else if (X87_FLOAT_MODE_P (mode))
26376         {
26377           *total = cost->fadd;
26378           return false;
26379         }
26380       else if (FLOAT_MODE_P (mode))
26381         {
26382           /* ??? SSE vector cost should be used here.  */
26383           *total = cost->fadd;
26384           return false;
26385         }
26386       /* FALLTHRU */
26387 
26388     case AND:
26389     case IOR:
26390     case XOR:
26391       if (!TARGET_64BIT && mode == DImode)
26392         {
26393           *total = (cost->add * 2
26394                     + (rtx_cost (XEXP (x, 0), outer_code, speed)
26395                        << (GET_MODE (XEXP (x, 0)) != DImode))
26396                     + (rtx_cost (XEXP (x, 1), outer_code, speed)
26397                        << (GET_MODE (XEXP (x, 1)) != DImode)));
26398           return true;
26399         }
26400       /* FALLTHRU */
26401 
26402     case NEG:
26403       if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
26404         {
26405           /* ??? SSE cost should be used here.  */
26406           *total = cost->fchs;
26407           return false;
26408         }
26409       else if (X87_FLOAT_MODE_P (mode))
26410         {
26411           *total = cost->fchs;
26412           return false;
26413         }
26414       else if (FLOAT_MODE_P (mode))
26415         {
26416           /* ??? SSE vector cost should be used here.  */
26417           *total = cost->fchs;
26418           return false;
26419         }
26420       /* FALLTHRU */
26421 
26422     case NOT:
26423       if (!TARGET_64BIT && mode == DImode)
26424         *total = cost->add * 2;
26425       else
26426         *total = cost->add;
26427       return false;
26428 
26429     case COMPARE:
26430       if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
26431           && XEXP (XEXP (x, 0), 1) == const1_rtx
26432           && CONST_INT_P (XEXP (XEXP (x, 0), 2))
26433           && XEXP (x, 1) == const0_rtx)
26434         {
26435           /* This kind of construct is implemented using test[bwl].
26436              Treat it as if we had an AND.  */
26437           *total = (cost->add
26438                     + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, speed)
26439                     + rtx_cost (const1_rtx, outer_code, speed));
26440           return true;
26441         }
26442       return false;
26443 
26444     case FLOAT_EXTEND:
26445       if (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH))
26446         *total = 0;
26447       return false;
26448 
26449     case ABS:
26450       if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
26451         /* ??? SSE cost should be used here.  */
26452         *total = cost->fabs;
26453       else if (X87_FLOAT_MODE_P (mode))
26454         *total = cost->fabs;
26455       else if (FLOAT_MODE_P (mode))
26456         /* ??? SSE vector cost should be used here.  */
26457         *total = cost->fabs;
26458       return false;
26459 
26460     case SQRT:
26461       if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
26462         /* ??? SSE cost should be used here.  */
26463         *total = cost->fsqrt;
26464       else if (X87_FLOAT_MODE_P (mode))
26465         *total = cost->fsqrt;
26466       else if (FLOAT_MODE_P (mode))
26467         /* ??? SSE vector cost should be used here.  */
26468         *total = cost->fsqrt;
26469       return false;
26470 
26471     case UNSPEC:
26472       if (XINT (x, 1) == UNSPEC_TP)
26473         *total = 0;
26474       return false;
26475 
26476     default:
26477       return false;
26478     }
26479 }
26480 
26481 #if TARGET_MACHO
26482 
26483 static int current_machopic_label_num;
26484 
26485 /* Given a symbol name and its associated stub, write out the
26486    definition of the stub.  */
26487 
26488 void
26489 machopic_output_stub (FILE *file, const char *symb, const char *stub)
26490 {
26491   unsigned int length;
26492   char *binder_name, *symbol_name, lazy_ptr_name[32];
26493   int label = ++current_machopic_label_num;
26494 
26495   /* For 64-bit we shouldn't get here.  */
26496   gcc_assert (!TARGET_64BIT);
26497 
26498   /* Lose our funky encoding stuff so it doesn't contaminate the stub.  */
26499   symb = (*targetm.strip_name_encoding) (symb);
26500 
26501   length = strlen (stub);
26502   binder_name = XALLOCAVEC (char, length + 32);
26503   GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
26504 
26505   length = strlen (symb);
26506   symbol_name = XALLOCAVEC (char, length + 32);
26507   GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
26508 
26509   sprintf (lazy_ptr_name, "L%d$lz", label);
26510 
26511   if (MACHOPIC_PURE)
26512     switch_to_section (darwin_sections[machopic_picsymbol_stub_section]);
26513   else
26514     switch_to_section (darwin_sections[machopic_symbol_stub_section]);
26515 
26516   fprintf (file, "%s:\n", stub);
26517   fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
26518 
26519   if (MACHOPIC_PURE)
26520     {
26521       fprintf (file, "\tcall\tLPC$%d\nLPC$%d:\tpopl\t%%eax\n", label, label);
26522       fprintf (file, "\tmovl\t%s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label);
26523       fprintf (file, "\tjmp\t*%%edx\n");
26524     }
26525   else
26526     fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
26527 
26528   fprintf (file, "%s:\n", binder_name);
26529 
26530   if (MACHOPIC_PURE)
26531     {
26532       fprintf (file, "\tlea\t%s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label);
26533       fprintf (file, "\tpushl\t%%eax\n");
26534     }
26535   else
26536     fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
26537 
26538   fprintf (file, "\tjmp\tdyld_stub_binding_helper\n");
26539 
26540   switch_to_section (darwin_sections[machopic_lazy_symbol_ptr_section]);
26541   fprintf (file, "%s:\n", lazy_ptr_name);
26542   fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
26543   fprintf (file, "\t.long %s\n", binder_name);
26544 }
26545 
26546 void
26547 darwin_x86_file_end (void)
26548 {
26549   darwin_file_end ();
26550   ix86_file_end ();
26551 }
26552 #endif /* TARGET_MACHO */
26553 
26554 /* Order the registers for register allocator.  */
26555 
26556 void
26557 x86_order_regs_for_local_alloc (void)
26558 {
26559    int pos = 0;
26560    int i;
26561 
26562    /* First allocate the local general purpose registers.  */
26563    for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
26564      if (GENERAL_REGNO_P (i) && call_used_regs[i])
26565         reg_alloc_order [pos++] = i;
26566 
26567    /* Global general purpose registers.  */
26568    for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
26569      if (GENERAL_REGNO_P (i) && !call_used_regs[i])
26570         reg_alloc_order [pos++] = i;
26571 
26572    /* x87 registers come first in case we are doing FP math
26573       using them.  */
26574    if (!TARGET_SSE_MATH)
26575      for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
26576        reg_alloc_order [pos++] = i;
26577 
26578    /* SSE registers.  */
26579    for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
26580      reg_alloc_order [pos++] = i;
26581    for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
26582      reg_alloc_order [pos++] = i;
26583 
26584    /* x87 registers.  */
26585    if (TARGET_SSE_MATH)
26586      for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
26587        reg_alloc_order [pos++] = i;
26588 
26589    for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
26590      reg_alloc_order [pos++] = i;
26591 
26592    /* Initialize the rest of array as we do not allocate some registers
26593       at all.  */
26594    while (pos < FIRST_PSEUDO_REGISTER)
26595      reg_alloc_order [pos++] = 0;
26596 }
26597 
26598 /* Handle a "ms_abi" or "sysv" attribute; arguments as in
26599    struct attribute_spec.handler.  */
26600 static tree
26601 ix86_handle_abi_attribute (tree *node, tree name,
26602                               tree args ATTRIBUTE_UNUSED,
26603                               int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
26604 {
26605   if (TREE_CODE (*node) != FUNCTION_TYPE
26606       && TREE_CODE (*node) != METHOD_TYPE
26607       && TREE_CODE (*node) != FIELD_DECL
26608       && TREE_CODE (*node) != TYPE_DECL)
26609     {
26610       warning (OPT_Wattributes, "%qs attribute only applies to functions",
26611                IDENTIFIER_POINTER (name));
26612       *no_add_attrs = true;
26613       return NULL_TREE;
26614     }
26615   if (!TARGET_64BIT)
26616     {
26617       warning (OPT_Wattributes, "%qs attribute only available for 64-bit",
26618                IDENTIFIER_POINTER (name));
26619       *no_add_attrs = true;
26620       return NULL_TREE;
26621     }
26622 
26623   /* Can combine regparm with all attributes but fastcall.  */
26624   if (is_attribute_p ("ms_abi", name))
26625     {
26626       if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (*node)))
26627         {
26628           error ("ms_abi and sysv_abi attributes are not compatible");
26629         }
26630 
26631       return NULL_TREE;
26632     }
26633   else if (is_attribute_p ("sysv_abi", name))
26634     {
26635       if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (*node)))
26636         {
26637           error ("ms_abi and sysv_abi attributes are not compatible");
26638         }
26639 
26640       return NULL_TREE;
26641     }
26642 
26643   return NULL_TREE;
26644 }
26645 
26646 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
26647    struct attribute_spec.handler.  */
26648 static tree
26649 ix86_handle_struct_attribute (tree *node, tree name,
26650                               tree args ATTRIBUTE_UNUSED,
26651                               int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
26652 {
26653   tree *type = NULL;
26654   if (DECL_P (*node))
26655     {
26656       if (TREE_CODE (*node) == TYPE_DECL)
26657         type = &TREE_TYPE (*node);
26658     }
26659   else
26660     type = node;
26661 
26662   if (!(type && (TREE_CODE (*type) == RECORD_TYPE
26663                  || TREE_CODE (*type) == UNION_TYPE)))
26664     {
26665       warning (OPT_Wattributes, "%qs attribute ignored",
26666                IDENTIFIER_POINTER (name));
26667       *no_add_attrs = true;
26668     }
26669 
26670   else if ((is_attribute_p ("ms_struct", name)
26671             && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
26672            || ((is_attribute_p ("gcc_struct", name)
26673                 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
26674     {
26675       warning (OPT_Wattributes, "%qs incompatible attribute ignored",
26676                IDENTIFIER_POINTER (name));
26677       *no_add_attrs = true;
26678     }
26679 
26680   return NULL_TREE;
26681 }
26682 
26683 static bool
26684 ix86_ms_bitfield_layout_p (const_tree record_type)
26685 {
26686   return (TARGET_MS_BITFIELD_LAYOUT &&
26687           !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
26688     || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
26689 }
26690 
26691 /* Returns an expression indicating where the this parameter is
26692    located on entry to the FUNCTION.  */
26693 
26694 static rtx
26695 x86_this_parameter (tree function)
26696 {
26697   tree type = TREE_TYPE (function);
26698   bool aggr = aggregate_value_p (TREE_TYPE (type), type) != 0;
26699   int nregs;
26700 
26701   if (TARGET_64BIT)
26702     {
26703       const int *parm_regs;
26704 
26705       if (ix86_function_type_abi (type) == MS_ABI)
26706         parm_regs = x86_64_ms_abi_int_parameter_registers;
26707       else
26708         parm_regs = x86_64_int_parameter_registers;
26709       return gen_rtx_REG (DImode, parm_regs[aggr]);
26710     }
26711 
26712   nregs = ix86_function_regparm (type, function);
26713 
26714   if (nregs > 0 && !stdarg_p (type))
26715     {
26716       int regno;
26717 
26718       if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
26719         regno = aggr ? DX_REG : CX_REG;
26720       else
26721         {
26722           regno = AX_REG;
26723           if (aggr)
26724             {
26725               regno = DX_REG;
26726               if (nregs == 1)
26727                 return gen_rtx_MEM (SImode,
26728                                     plus_constant (stack_pointer_rtx, 4));
26729             }
26730         }
26731       return gen_rtx_REG (SImode, regno);
26732     }
26733 
26734   return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, aggr ? 8 : 4));
26735 }
26736 
26737 /* Determine whether x86_output_mi_thunk can succeed.  */
26738 
26739 static bool
26740 x86_can_output_mi_thunk (const_tree thunk ATTRIBUTE_UNUSED,
26741                          HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
26742                          HOST_WIDE_INT vcall_offset, const_tree function)
26743 {
26744   /* 64-bit can handle anything.  */
26745   if (TARGET_64BIT)
26746     return true;
26747 
26748   /* For 32-bit, everything's fine if we have one free register.  */
26749   if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
26750     return true;
26751 
26752   /* Need a free register for vcall_offset.  */
26753   if (vcall_offset)
26754     return false;
26755 
26756   /* Need a free register for GOT references.  */
26757   if (flag_pic && !(*targetm.binds_local_p) (function))
26758     return false;
26759 
26760   /* Otherwise ok.  */
26761   return true;
26762 }
26763 
26764 /* Output the assembler code for a thunk function.  THUNK_DECL is the
26765    declaration for the thunk function itself, FUNCTION is the decl for
26766    the target function.  DELTA is an immediate constant offset to be
26767    added to THIS.  If VCALL_OFFSET is nonzero, the word at
26768    *(*this + vcall_offset) should be added to THIS.  */
26769 
26770 static void
26771 x86_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED,
26772                      tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
26773                      HOST_WIDE_INT vcall_offset, tree function)
26774 {
26775   rtx xops[3];
26776   rtx this_param = x86_this_parameter (function);
26777   rtx this_reg, tmp;
26778 
26779   /* If VCALL_OFFSET, we'll need THIS in a register.  Might as well
26780      pull it in now and let DELTA benefit.  */
26781   if (REG_P (this_param))
26782     this_reg = this_param;
26783   else if (vcall_offset)
26784     {
26785       /* Put the this parameter into %eax.  */
26786       xops[0] = this_param;
26787       xops[1] = this_reg = gen_rtx_REG (Pmode, AX_REG);
26788       output_asm_insn ("mov%z1\t{%0, %1|%1, %0}", xops);
26789     }
26790   else
26791     this_reg = NULL_RTX;
26792 
26793   /* Adjust the this parameter by a fixed constant.  */
26794   if (delta)
26795     {
26796       xops[0] = GEN_INT (delta);
26797       xops[1] = this_reg ? this_reg : this_param;
26798       if (TARGET_64BIT)
26799         {
26800           if (!x86_64_general_operand (xops[0], DImode))
26801             {
26802               tmp = gen_rtx_REG (DImode, R10_REG);
26803               xops[1] = tmp;
26804               output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
26805               xops[0] = tmp;
26806               xops[1] = this_param;
26807             }
26808           output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
26809         }
26810       else
26811         output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
26812     }
26813 
26814   /* Adjust the this parameter by a value stored in the vtable.  */
26815   if (vcall_offset)
26816     {
26817       if (TARGET_64BIT)
26818         tmp = gen_rtx_REG (DImode, R10_REG);
26819       else
26820         {
26821           int tmp_regno = CX_REG;
26822           if (lookup_attribute ("fastcall",
26823                                 TYPE_ATTRIBUTES (TREE_TYPE (function))))
26824             tmp_regno = AX_REG;
26825           tmp = gen_rtx_REG (SImode, tmp_regno);
26826         }
26827 
26828       xops[0] = gen_rtx_MEM (Pmode, this_reg);
26829       xops[1] = tmp;
26830       output_asm_insn ("mov%z1\t{%0, %1|%1, %0}", xops);
26831 
26832       /* Adjust the this parameter.  */
26833       xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset));
26834       if (TARGET_64BIT && !memory_operand (xops[0], Pmode))
26835         {
26836           rtx tmp2 = gen_rtx_REG (DImode, R11_REG);
26837           xops[0] = GEN_INT (vcall_offset);
26838           xops[1] = tmp2;
26839           output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
26840           xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2));
26841         }
26842       xops[1] = this_reg;
26843       output_asm_insn ("add%z1\t{%0, %1|%1, %0}", xops);
26844     }
26845 
26846   /* If necessary, drop THIS back to its stack slot.  */
26847   if (this_reg && this_reg != this_param)
26848     {
26849       xops[0] = this_reg;
26850       xops[1] = this_param;
26851       output_asm_insn ("mov%z1\t{%0, %1|%1, %0}", xops);
26852     }
26853 
26854   xops[0] = XEXP (DECL_RTL (function), 0);
26855   if (TARGET_64BIT)
26856     {
26857       if (!flag_pic || (*targetm.binds_local_p) (function))
26858         output_asm_insn ("jmp\t%P0", xops);
26859       /* All thunks should be in the same object as their target,
26860          and thus binds_local_p should be true.  */
26861       else if (TARGET_64BIT && cfun->machine->call_abi == MS_ABI)
26862         gcc_unreachable ();
26863       else
26864         {
26865           tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, xops[0]), UNSPEC_GOTPCREL);
26866           tmp = gen_rtx_CONST (Pmode, tmp);
26867           tmp = gen_rtx_MEM (QImode, tmp);
26868           xops[0] = tmp;
26869           output_asm_insn ("jmp\t%A0", xops);
26870         }
26871     }
26872   else
26873     {
26874       if (!flag_pic || (*targetm.binds_local_p) (function))
26875         output_asm_insn ("jmp\t%P0", xops);
26876       else
26877 #if TARGET_MACHO
26878         if (TARGET_MACHO)
26879           {
26880             rtx sym_ref = XEXP (DECL_RTL (function), 0);
26881             tmp = (gen_rtx_SYMBOL_REF
26882                    (Pmode,
26883                     machopic_indirection_name (sym_ref, /*stub_p=*/true)));
26884             tmp = gen_rtx_MEM (QImode, tmp);
26885             xops[0] = tmp;
26886             output_asm_insn ("jmp\t%0", xops);
26887           }
26888         else
26889 #endif /* TARGET_MACHO */
26890         {
26891           tmp = gen_rtx_REG (SImode, CX_REG);
26892           output_set_got (tmp, NULL_RTX);
26893 
26894           xops[1] = tmp;
26895           output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops);
26896           output_asm_insn ("jmp\t{*}%1", xops);
26897         }
26898     }
26899 }
26900 
26901 static void
26902 x86_file_start (void)
26903 {
26904   default_file_start ();
26905 #if TARGET_MACHO
26906   darwin_file_start ();
26907 #endif
26908   if (X86_FILE_START_VERSION_DIRECTIVE)
26909     fputs ("\t.version\t\"01.01\"\n", asm_out_file);
26910   if (X86_FILE_START_FLTUSED)
26911     fputs ("\t.global\t__fltused\n", asm_out_file);
26912   if (ix86_asm_dialect == ASM_INTEL)
26913     fputs ("\t.intel_syntax noprefix\n", asm_out_file);
26914 }
26915 
26916 int
26917 x86_field_alignment (tree field, int computed)
26918 {
26919   enum machine_mode mode;
26920   tree type = TREE_TYPE (field);
26921 
26922   if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
26923     return computed;
26924   mode = TYPE_MODE (strip_array_types (type));
26925   if (mode == DFmode || mode == DCmode
26926       || GET_MODE_CLASS (mode) == MODE_INT
26927       || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
26928     return MIN (32, computed);
26929   return computed;
26930 }
26931 
26932 /* Output assembler code to FILE to increment profiler label # LABELNO
26933    for profiling a function entry.  */
26934 void
26935 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
26936 {
26937   if (TARGET_64BIT)
26938     {
26939 #ifndef NO_PROFILE_COUNTERS
26940       fprintf (file, "\tleaq\t%sP%d(%%rip),%%r11\n", LPREFIX, labelno);
26941 #endif
26942 
26943       if (DEFAULT_ABI == SYSV_ABI && flag_pic)
26944         fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME);
26945       else
26946         fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
26947     }
26948   else if (flag_pic)
26949     {
26950 #ifndef NO_PROFILE_COUNTERS
26951       fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n",
26952                LPREFIX, labelno, PROFILE_COUNT_REGISTER);
26953 #endif
26954       fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME);
26955     }
26956   else
26957     {
26958 #ifndef NO_PROFILE_COUNTERS
26959       fprintf (file, "\tmovl\t$%sP%d,%%%s\n", LPREFIX, labelno,
26960                PROFILE_COUNT_REGISTER);
26961 #endif
26962       fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
26963     }
26964 }
26965 
26966 /* We don't have exact information about the insn sizes, but we may assume
26967    quite safely that we are informed about all 1 byte insns and memory
26968    address sizes.  This is enough to eliminate unnecessary padding in
26969    99% of cases.  */
26970 
26971 static int
26972 min_insn_size (rtx insn)
26973 {
26974   int l = 0;
26975 
26976   if (!INSN_P (insn) || !active_insn_p (insn))
26977     return 0;
26978 
26979   /* Discard alignments we've emit and jump instructions.  */
26980   if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
26981       && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
26982     return 0;
26983   if (JUMP_P (insn)
26984       && (GET_CODE (PATTERN (insn)) == ADDR_VEC
26985           || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC))
26986     return 0;
26987 
26988   /* Important case - calls are always 5 bytes.
26989      It is common to have many calls in the row.  */
26990   if (CALL_P (insn)
26991       && symbolic_reference_mentioned_p (PATTERN (insn))
26992       && !SIBLING_CALL_P (insn))
26993     return 5;
26994   if (get_attr_length (insn) <= 1)
26995     return 1;
26996 
26997   /* For normal instructions we may rely on the sizes of addresses
26998      and the presence of symbol to require 4 bytes of encoding.
26999      This is not the case for jumps where references are PC relative.  */
27000   if (!JUMP_P (insn))
27001     {
27002       l = get_attr_length_address (insn);
27003       if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
27004         l = 4;
27005     }
27006   if (l)
27007     return 1+l;
27008   else
27009     return 2;
27010 }
27011 
27012 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
27013    window.  */
27014 
27015 static void
27016 ix86_avoid_jump_misspredicts (void)
27017 {
27018   rtx insn, start = get_insns ();
27019   int nbytes = 0, njumps = 0;
27020   int isjump = 0;
27021 
27022   /* Look for all minimal intervals of instructions containing 4 jumps.
27023      The intervals are bounded by START and INSN.  NBYTES is the total
27024      size of instructions in the interval including INSN and not including
27025      START.  When the NBYTES is smaller than 16 bytes, it is possible
27026      that the end of START and INSN ends up in the same 16byte page.
27027 
27028      The smallest offset in the page INSN can start is the case where START
27029      ends on the offset 0.  Offset of INSN is then NBYTES - sizeof (INSN).
27030      We add p2align to 16byte window with maxskip 17 - NBYTES + sizeof (INSN).
27031      */
27032   for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
27033     {
27034 
27035       nbytes += min_insn_size (insn);
27036       if (dump_file)
27037         fprintf(dump_file, "Insn %i estimated to %i bytes\n",
27038                 INSN_UID (insn), min_insn_size (insn));
27039       if ((JUMP_P (insn)
27040            && GET_CODE (PATTERN (insn)) != ADDR_VEC
27041            && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
27042           || CALL_P (insn))
27043         njumps++;
27044       else
27045         continue;
27046 
27047       while (njumps > 3)
27048         {
27049           start = NEXT_INSN (start);
27050           if ((JUMP_P (start)
27051                && GET_CODE (PATTERN (start)) != ADDR_VEC
27052                && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
27053               || CALL_P (start))
27054             njumps--, isjump = 1;
27055           else
27056             isjump = 0;
27057           nbytes -= min_insn_size (start);
27058         }
27059       gcc_assert (njumps >= 0);
27060       if (dump_file)
27061         fprintf (dump_file, "Interval %i to %i has %i bytes\n",
27062                 INSN_UID (start), INSN_UID (insn), nbytes);
27063 
27064       if (njumps == 3 && isjump && nbytes < 16)
27065         {
27066           int padsize = 15 - nbytes + min_insn_size (insn);
27067 
27068           if (dump_file)
27069             fprintf (dump_file, "Padding insn %i by %i bytes!\n",
27070                      INSN_UID (insn), padsize);
27071           emit_insn_before (gen_align (GEN_INT (padsize)), insn);
27072         }
27073     }
27074 }
27075 
27076 /* AMD Athlon works faster
27077    when RET is not destination of conditional jump or directly preceded
27078    by other jump instruction.  We avoid the penalty by inserting NOP just
27079    before the RET instructions in such cases.  */
27080 static void
27081 ix86_pad_returns (void)
27082 {
27083   edge e;
27084   edge_iterator ei;
27085 
27086   FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
27087     {
27088       basic_block bb = e->src;
27089       rtx ret = BB_END (bb);
27090       rtx prev;
27091       bool replace = false;
27092 
27093       if (!JUMP_P (ret) || GET_CODE (PATTERN (ret)) != RETURN
27094           || optimize_bb_for_size_p (bb))
27095         continue;
27096       for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
27097         if (active_insn_p (prev) || LABEL_P (prev))
27098           break;
27099       if (prev && LABEL_P (prev))
27100         {
27101           edge e;
27102           edge_iterator ei;
27103 
27104           FOR_EACH_EDGE (e, ei, bb->preds)
27105             if (EDGE_FREQUENCY (e) && e->src->index >= 0
27106                 && !(e->flags & EDGE_FALLTHRU))
27107               replace = true;
27108         }
27109       if (!replace)
27110         {
27111           prev = prev_active_insn (ret);
27112           if (prev
27113               && ((JUMP_P (prev) && any_condjump_p (prev))
27114                   || CALL_P (prev)))
27115             replace = true;
27116           /* Empty functions get branch mispredict even when the jump destination
27117              is not visible to us.  */
27118           if (!prev && cfun->function_frequency > FUNCTION_FREQUENCY_UNLIKELY_EXECUTED)
27119             replace = true;
27120         }
27121       if (replace)
27122         {
27123           emit_insn_before (gen_return_internal_long (), ret);
27124           delete_insn (ret);
27125         }
27126     }
27127 }
27128 
27129 /* Implement machine specific optimizations.  We implement padding of returns
27130    for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window.  */
27131 static void
27132 ix86_reorg (void)
27133 {
27134   if (TARGET_PAD_RETURNS && optimize
27135       && optimize_function_for_speed_p (cfun))
27136     ix86_pad_returns ();
27137   if (TARGET_FOUR_JUMP_LIMIT && optimize
27138       && optimize_function_for_speed_p (cfun))
27139     ix86_avoid_jump_misspredicts ();
27140 }
27141 
27142 /* Return nonzero when QImode register that must be represented via REX prefix
27143    is used.  */
27144 bool
27145 x86_extended_QIreg_mentioned_p (rtx insn)
27146 {
27147   int i;
27148   extract_insn_cached (insn);
27149   for (i = 0; i < recog_data.n_operands; i++)
27150     if (REG_P (recog_data.operand[i])
27151         && REGNO (recog_data.operand[i]) > BX_REG)
27152        return true;
27153   return false;
27154 }
27155 
27156 /* Return nonzero when P points to register encoded via REX prefix.
27157    Called via for_each_rtx.  */
27158 static int
27159 extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
27160 {
27161    unsigned int regno;
27162    if (!REG_P (*p))
27163      return 0;
27164    regno = REGNO (*p);
27165    return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
27166 }
27167 
27168 /* Return true when INSN mentions register that must be encoded using REX
27169    prefix.  */
27170 bool
27171 x86_extended_reg_mentioned_p (rtx insn)
27172 {
27173   return for_each_rtx (INSN_P (insn) ? &PATTERN (insn) : &insn,
27174                        extended_reg_mentioned_1, NULL);
27175 }
27176 
27177 /* Generate an unsigned DImode/SImode to FP conversion.  This is the same code
27178    optabs would emit if we didn't have TFmode patterns.  */
27179 
27180 void
27181 x86_emit_floatuns (rtx operands[2])
27182 {
27183   rtx neglab, donelab, i0, i1, f0, in, out;
27184   enum machine_mode mode, inmode;
27185 
27186   inmode = GET_MODE (operands[1]);
27187   gcc_assert (inmode == SImode || inmode == DImode);
27188 
27189   out = operands[0];
27190   in = force_reg (inmode, operands[1]);
27191   mode = GET_MODE (out);
27192   neglab = gen_label_rtx ();
27193   donelab = gen_label_rtx ();
27194   f0 = gen_reg_rtx (mode);
27195 
27196   emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, inmode, 0, neglab);
27197 
27198   expand_float (out, in, 0);
27199 
27200   emit_jump_insn (gen_jump (donelab));
27201   emit_barrier ();
27202 
27203   emit_label (neglab);
27204 
27205   i0 = expand_simple_binop (inmode, LSHIFTRT, in, const1_rtx, NULL,
27206                             1, OPTAB_DIRECT);
27207   i1 = expand_simple_binop (inmode, AND, in, const1_rtx, NULL,
27208                             1, OPTAB_DIRECT);
27209   i0 = expand_simple_binop (inmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
27210 
27211   expand_float (f0, i0, 0);
27212 
27213   emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
27214 
27215   emit_label (donelab);
27216 }
27217 
27218 /* A subroutine of ix86_expand_vector_init.  Store into TARGET a vector
27219    with all elements equal to VAR.  Return true if successful.  */
27220 
27221 static bool
27222 ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode,
27223                                    rtx target, rtx val)
27224 {
27225   enum machine_mode hmode, smode, wsmode, wvmode;
27226   rtx x;
27227 
27228   switch (mode)
27229     {
27230     case V2SImode:
27231     case V2SFmode:
27232       if (!mmx_ok)
27233         return false;
27234       /* FALLTHRU */
27235 
27236     case V2DFmode:
27237     case V2DImode:
27238     case V4SFmode:
27239     case V4SImode:
27240       val = force_reg (GET_MODE_INNER (mode), val);
27241       x = gen_rtx_VEC_DUPLICATE (mode, val);
27242       emit_insn (gen_rtx_SET (VOIDmode, target, x));
27243       return true;
27244 
27245     case V4HImode:
27246       if (!mmx_ok)
27247         return false;
27248       if (TARGET_SSE || TARGET_3DNOW_A)
27249         {
27250           val = gen_lowpart (SImode, val);
27251           x = gen_rtx_TRUNCATE (HImode, val);
27252           x = gen_rtx_VEC_DUPLICATE (mode, x);
27253           emit_insn (gen_rtx_SET (VOIDmode, target, x));
27254           return true;
27255         }
27256       else
27257         {
27258           smode = HImode;
27259           wsmode = SImode;
27260           wvmode = V2SImode;
27261           goto widen;
27262         }
27263 
27264     case V8QImode:
27265       if (!mmx_ok)
27266         return false;
27267       smode = QImode;
27268       wsmode = HImode;
27269       wvmode = V4HImode;
27270       goto widen;
27271     case V8HImode:
27272       if (TARGET_SSE2)
27273         {
27274           rtx tmp1, tmp2;
27275           /* Extend HImode to SImode using a paradoxical SUBREG.  */
27276           tmp1 = gen_reg_rtx (SImode);
27277           emit_move_insn (tmp1, gen_lowpart (SImode, val));
27278           /* Insert the SImode value as low element of V4SImode vector. */
27279           tmp2 = gen_reg_rtx (V4SImode);
27280           tmp1 = gen_rtx_VEC_MERGE (V4SImode,
27281                                     gen_rtx_VEC_DUPLICATE (V4SImode, tmp1),
27282                                     CONST0_RTX (V4SImode),
27283                                     const1_rtx);
27284           emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1));
27285           /* Cast the V4SImode vector back to a V8HImode vector.  */
27286           tmp1 = gen_reg_rtx (V8HImode);
27287           emit_move_insn (tmp1, gen_lowpart (V8HImode, tmp2));
27288           /* Duplicate the low short through the whole low SImode word.  */
27289           emit_insn (gen_sse2_punpcklwd (tmp1, tmp1, tmp1));
27290           /* Cast the V8HImode vector back to a V4SImode vector.  */
27291           tmp2 = gen_reg_rtx (V4SImode);
27292           emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1));
27293           /* Replicate the low element of the V4SImode vector.  */
27294           emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx));
27295           /* Cast the V2SImode back to V8HImode, and store in target.  */
27296           emit_move_insn (target, gen_lowpart (V8HImode, tmp2));
27297           return true;
27298         }
27299       smode = HImode;
27300       wsmode = SImode;
27301       wvmode = V4SImode;
27302       goto widen;
27303     case V16QImode:
27304       if (TARGET_SSE2)
27305         {
27306           rtx tmp1, tmp2;
27307           /* Extend QImode to SImode using a paradoxical SUBREG.  */
27308           tmp1 = gen_reg_rtx (SImode);
27309           emit_move_insn (tmp1, gen_lowpart (SImode, val));
27310           /* Insert the SImode value as low element of V4SImode vector. */
27311           tmp2 = gen_reg_rtx (V4SImode);
27312           tmp1 = gen_rtx_VEC_MERGE (V4SImode,
27313                                     gen_rtx_VEC_DUPLICATE (V4SImode, tmp1),
27314                                     CONST0_RTX (V4SImode),
27315                                     const1_rtx);
27316           emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1));
27317           /* Cast the V4SImode vector back to a V16QImode vector.  */
27318           tmp1 = gen_reg_rtx (V16QImode);
27319           emit_move_insn (tmp1, gen_lowpart (V16QImode, tmp2));
27320           /* Duplicate the low byte through the whole low SImode word.  */
27321           emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1));
27322           emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1));
27323           /* Cast the V16QImode vector back to a V4SImode vector.  */
27324           tmp2 = gen_reg_rtx (V4SImode);
27325           emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1));
27326           /* Replicate the low element of the V4SImode vector.  */
27327           emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx));
27328           /* Cast the V2SImode back to V16QImode, and store in target.  */
27329           emit_move_insn (target, gen_lowpart (V16QImode, tmp2));
27330           return true;
27331         }
27332       smode = QImode;
27333       wsmode = HImode;
27334       wvmode = V8HImode;
27335       goto widen;
27336     widen:
27337       /* Replicate the value once into the next wider mode and recurse.  */
27338       val = convert_modes (wsmode, smode, val, true);
27339       x = expand_simple_binop (wsmode, ASHIFT, val,
27340                                GEN_INT (GET_MODE_BITSIZE (smode)),
27341                                NULL_RTX, 1, OPTAB_LIB_WIDEN);
27342       val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
27343 
27344       x = gen_reg_rtx (wvmode);
27345       if (!ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val))
27346         gcc_unreachable ();
27347       emit_move_insn (target, gen_lowpart (mode, x));
27348       return true;
27349 
27350     case V4DFmode:
27351       hmode = V2DFmode;
27352       goto half;
27353     case V4DImode:
27354       hmode = V2DImode;
27355       goto half;
27356     case V8SFmode:
27357       hmode = V4SFmode;
27358       goto half;
27359     case V8SImode:
27360       hmode = V4SImode;
27361       goto half;
27362     case V16HImode:
27363       hmode = V8HImode;
27364       goto half;
27365     case V32QImode:
27366       hmode = V16QImode;
27367       goto half;
27368 half:
27369       {
27370         rtx tmp = gen_reg_rtx (hmode);
27371         ix86_expand_vector_init_duplicate (mmx_ok, hmode, tmp, val);
27372         emit_insn (gen_rtx_SET (VOIDmode, target,
27373                                 gen_rtx_VEC_CONCAT (mode, tmp, tmp)));
27374       }
27375       return true;
27376 
27377     default:
27378       return false;
27379     }
27380 }
27381 
27382 /* A subroutine of ix86_expand_vector_init.  Store into TARGET a vector
27383    whose ONE_VAR element is VAR, and other elements are zero.  Return true
27384    if successful.  */
27385 
27386 static bool
27387 ix86_expand_vector_init_one_nonzero (bool mmx_ok, enum machine_mode mode,
27388                                      rtx target, rtx var, int one_var)
27389 {
27390   enum machine_mode vsimode;
27391   rtx new_target;
27392   rtx x, tmp;
27393   bool use_vector_set = false;
27394 
27395   switch (mode)
27396     {
27397     case V2DImode:
27398       /* For SSE4.1, we normally use vector set.  But if the second
27399          element is zero and inter-unit moves are OK, we use movq
27400          instead.  */
27401       use_vector_set = (TARGET_64BIT
27402                         && TARGET_SSE4_1
27403                         && !(TARGET_INTER_UNIT_MOVES
27404                              && one_var == 0));
27405       break;
27406     case V16QImode:
27407     case V4SImode:
27408     case V4SFmode:
27409       use_vector_set = TARGET_SSE4_1;
27410       break;
27411     case V8HImode:
27412       use_vector_set = TARGET_SSE2;
27413       break;
27414     case V4HImode:
27415       use_vector_set = TARGET_SSE || TARGET_3DNOW_A;
27416       break;
27417     case V32QImode:
27418     case V16HImode:
27419     case V8SImode:
27420     case V8SFmode:
27421     case V4DFmode:
27422       use_vector_set = TARGET_AVX;
27423       break;
27424     case V4DImode:
27425       /* Use ix86_expand_vector_set in 64bit mode only.  */
27426       use_vector_set = TARGET_AVX && TARGET_64BIT;
27427       break;
27428     default:
27429       break;
27430     }
27431 
27432   if (use_vector_set)
27433     {
27434       emit_insn (gen_rtx_SET (VOIDmode, target, CONST0_RTX (mode)));
27435       var = force_reg (GET_MODE_INNER (mode), var);
27436       ix86_expand_vector_set (mmx_ok, target, var, one_var);
27437       return true; 
27438     }
27439 
27440   switch (mode)
27441     {
27442     case V2SFmode:
27443     case V2SImode:
27444       if (!mmx_ok)
27445         return false;
27446       /* FALLTHRU */
27447 
27448     case V2DFmode:
27449     case V2DImode:
27450       if (one_var != 0)
27451         return false;
27452       var = force_reg (GET_MODE_INNER (mode), var);
27453       x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
27454       emit_insn (gen_rtx_SET (VOIDmode, target, x));
27455       return true;
27456 
27457     case V4SFmode:
27458     case V4SImode:
27459       if (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER)
27460         new_target = gen_reg_rtx (mode);
27461       else
27462         new_target = target;
27463       var = force_reg (GET_MODE_INNER (mode), var);
27464       x = gen_rtx_VEC_DUPLICATE (mode, var);
27465       x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
27466       emit_insn (gen_rtx_SET (VOIDmode, new_target, x));
27467       if (one_var != 0)
27468         {
27469           /* We need to shuffle the value to the correct position, so
27470              create a new pseudo to store the intermediate result.  */
27471 
27472           /* With SSE2, we can use the integer shuffle insns.  */
27473           if (mode != V4SFmode && TARGET_SSE2)
27474             {
27475               emit_insn (gen_sse2_pshufd_1 (new_target, new_target,
27476                                             GEN_INT (1),
27477                                             GEN_INT (one_var == 1 ? 0 : 1),
27478                                             GEN_INT (one_var == 2 ? 0 : 1),
27479                                             GEN_INT (one_var == 3 ? 0 : 1)));
27480               if (target != new_target)
27481                 emit_move_insn (target, new_target);
27482               return true;
27483             }
27484 
27485           /* Otherwise convert the intermediate result to V4SFmode and
27486              use the SSE1 shuffle instructions.  */
27487           if (mode != V4SFmode)
27488             {
27489               tmp = gen_reg_rtx (V4SFmode);
27490               emit_move_insn (tmp, gen_lowpart (V4SFmode, new_target));
27491             }
27492           else
27493             tmp = new_target;
27494 
27495           emit_insn (gen_sse_shufps_v4sf (tmp, tmp, tmp,
27496                                        GEN_INT (1),
27497                                        GEN_INT (one_var == 1 ? 0 : 1),
27498                                        GEN_INT (one_var == 2 ? 0+4 : 1+4),
27499                                        GEN_INT (one_var == 3 ? 0+4 : 1+4)));
27500 
27501           if (mode != V4SFmode)
27502             emit_move_insn (target, gen_lowpart (V4SImode, tmp));
27503           else if (tmp != target)
27504             emit_move_insn (target, tmp);
27505         }
27506       else if (target != new_target)
27507         emit_move_insn (target, new_target);
27508       return true;
27509 
27510     case V8HImode:
27511     case V16QImode:
27512       vsimode = V4SImode;
27513       goto widen;
27514     case V4HImode:
27515     case V8QImode:
27516       if (!mmx_ok)
27517         return false;
27518       vsimode = V2SImode;
27519       goto widen;
27520     widen:
27521       if (one_var != 0)
27522         return false;
27523 
27524       /* Zero extend the variable element to SImode and recurse.  */
27525       var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
27526 
27527       x = gen_reg_rtx (vsimode);
27528       if (!ix86_expand_vector_init_one_nonzero (mmx_ok, vsimode, x,
27529                                                 var, one_var))
27530         gcc_unreachable ();
27531 
27532       emit_move_insn (target, gen_lowpart (mode, x));
27533       return true;
27534 
27535     default:
27536       return false;
27537     }
27538 }
27539 
27540 /* A subroutine of ix86_expand_vector_init.  Store into TARGET a vector
27541    consisting of the values in VALS.  It is known that all elements
27542    except ONE_VAR are constants.  Return true if successful.  */
27543 
27544 static bool
27545 ix86_expand_vector_init_one_var (bool mmx_ok, enum machine_mode mode,
27546                                  rtx target, rtx vals, int one_var)
27547 {
27548   rtx var = XVECEXP (vals, 0, one_var);
27549   enum machine_mode wmode;
27550   rtx const_vec, x;
27551 
27552   const_vec = copy_rtx (vals);
27553   XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
27554   const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0));
27555 
27556   switch (mode)
27557     {
27558     case V2DFmode:
27559     case V2DImode:
27560     case V2SFmode:
27561     case V2SImode:
27562       /* For the two element vectors, it's just as easy to use
27563          the general case.  */
27564       return false;
27565 
27566     case V4DImode:
27567       /* Use ix86_expand_vector_set in 64bit mode only.  */
27568       if (!TARGET_64BIT)
27569         return false;
27570     case V4DFmode:
27571     case V8SFmode:
27572     case V8SImode:
27573     case V16HImode:
27574     case V32QImode:
27575     case V4SFmode:
27576     case V4SImode:
27577     case V8HImode:
27578     case V4HImode:
27579       break;
27580 
27581     case V16QImode:
27582       if (TARGET_SSE4_1)
27583         break;
27584       wmode = V8HImode;
27585       goto widen;
27586     case V8QImode:
27587       wmode = V4HImode;
27588       goto widen;
27589     widen:
27590       /* There's no way to set one QImode entry easily.  Combine
27591          the variable value with its adjacent constant value, and
27592          promote to an HImode set.  */
27593       x = XVECEXP (vals, 0, one_var ^ 1);
27594       if (one_var & 1)
27595         {
27596           var = convert_modes (HImode, QImode, var, true);
27597           var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
27598                                      NULL_RTX, 1, OPTAB_LIB_WIDEN);
27599           x = GEN_INT (INTVAL (x) & 0xff);
27600         }
27601       else
27602         {
27603           var = convert_modes (HImode, QImode, var, true);
27604           x = gen_int_mode (INTVAL (x) << 8, HImode);
27605         }
27606       if (x != const0_rtx)
27607         var = expand_simple_binop (HImode, IOR, var, x, var,
27608                                    1, OPTAB_LIB_WIDEN);
27609 
27610       x = gen_reg_rtx (wmode);
27611       emit_move_insn (x, gen_lowpart (wmode, const_vec));
27612       ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1);
27613 
27614       emit_move_insn (target, gen_lowpart (mode, x));
27615       return true;
27616 
27617     default:
27618       return false;
27619     }
27620 
27621   emit_move_insn (target, const_vec);
27622   ix86_expand_vector_set (mmx_ok, target, var, one_var);
27623   return true;
27624 }
27625 
27626 /* A subroutine of ix86_expand_vector_init_general.  Use vector
27627    concatenate to handle the most general case: all values variable,
27628    and none identical.  */
27629 
27630 static void
27631 ix86_expand_vector_init_concat (enum machine_mode mode,
27632                                 rtx target, rtx *ops, int n)
27633 {
27634   enum machine_mode cmode, hmode = VOIDmode;
27635   rtx first[8], second[4];
27636   rtvec v;
27637   int i, j;
27638 
27639   switch (n)
27640     {
27641     case 2:
27642       switch (mode)
27643         {
27644         case V8SImode:
27645           cmode = V4SImode;
27646           break;
27647         case V8SFmode:
27648           cmode = V4SFmode;
27649           break;
27650         case V4DImode:
27651           cmode = V2DImode;
27652           break;
27653         case V4DFmode:
27654           cmode = V2DFmode;
27655           break;
27656         case V4SImode:
27657           cmode = V2SImode;
27658           break;
27659         case V4SFmode:
27660           cmode = V2SFmode;
27661           break;
27662         case V2DImode:
27663           cmode = DImode;
27664           break;
27665         case V2SImode:
27666           cmode = SImode;
27667           break;
27668         case V2DFmode:
27669           cmode = DFmode;
27670           break;
27671         case V2SFmode:
27672           cmode = SFmode;
27673           break;
27674         default:
27675           gcc_unreachable ();
27676         }
27677 
27678       if (!register_operand (ops[1], cmode))
27679         ops[1] = force_reg (cmode, ops[1]);
27680       if (!register_operand (ops[0], cmode))
27681         ops[0] = force_reg (cmode, ops[0]);
27682       emit_insn (gen_rtx_SET (VOIDmode, target,
27683                               gen_rtx_VEC_CONCAT (mode, ops[0],
27684                                                   ops[1])));
27685       break;
27686 
27687     case 4:
27688       switch (mode)
27689         {
27690         case V4DImode:
27691           cmode = V2DImode;
27692           break;
27693         case V4DFmode:
27694           cmode = V2DFmode;
27695           break;
27696         case V4SImode:
27697           cmode = V2SImode;
27698           break;
27699         case V4SFmode:
27700           cmode = V2SFmode;
27701           break;
27702         default:
27703           gcc_unreachable ();
27704         }
27705       goto half;
27706 
27707     case 8:
27708       switch (mode)
27709         {
27710         case V8SImode:
27711           cmode = V2SImode;
27712           hmode = V4SImode;
27713           break;
27714         case V8SFmode:
27715           cmode = V2SFmode;
27716           hmode = V4SFmode;
27717           break;
27718         default:
27719           gcc_unreachable ();
27720         }
27721       goto half;
27722 
27723 half:
27724       /* FIXME: We process inputs backward to help RA.  PR 36222.  */
27725       i = n - 1;
27726       j = (n >> 1) - 1;
27727       for (; i > 0; i -= 2, j--)
27728         {
27729           first[j] = gen_reg_rtx (cmode);
27730           v = gen_rtvec (2, ops[i - 1], ops[i]);
27731           ix86_expand_vector_init (false, first[j],
27732                                    gen_rtx_PARALLEL (cmode, v));
27733         }
27734 
27735       n >>= 1;
27736       if (n > 2)
27737         {
27738           gcc_assert (hmode != VOIDmode);
27739           for (i = j = 0; i < n; i += 2, j++)
27740             {
27741               second[j] = gen_reg_rtx (hmode);
27742               ix86_expand_vector_init_concat (hmode, second [j],
27743                                               &first [i], 2);
27744             }
27745           n >>= 1;
27746           ix86_expand_vector_init_concat (mode, target, second, n);
27747         }
27748       else
27749         ix86_expand_vector_init_concat (mode, target, first, n);
27750       break;
27751 
27752     default:
27753       gcc_unreachable ();
27754     }
27755 }
27756 
27757 /* A subroutine of ix86_expand_vector_init_general.  Use vector
27758    interleave to handle the most general case: all values variable,
27759    and none identical.  */
27760 
27761 static void
27762 ix86_expand_vector_init_interleave (enum machine_mode mode,
27763                                     rtx target, rtx *ops, int n)
27764 {
27765   enum machine_mode first_imode, second_imode, third_imode, inner_mode;
27766   int i, j;
27767   rtx op0, op1;
27768   rtx (*gen_load_even) (rtx, rtx, rtx);
27769   rtx (*gen_interleave_first_low) (rtx, rtx, rtx);
27770   rtx (*gen_interleave_second_low) (rtx, rtx, rtx);
27771   
27772   switch (mode)
27773     {
27774     case V8HImode:
27775       gen_load_even = gen_vec_setv8hi;
27776       gen_interleave_first_low = gen_vec_interleave_lowv4si;
27777       gen_interleave_second_low = gen_vec_interleave_lowv2di;
27778       inner_mode = HImode;
27779       first_imode = V4SImode;
27780       second_imode = V2DImode;
27781       third_imode = VOIDmode;
27782       break;
27783     case V16QImode:
27784       gen_load_even = gen_vec_setv16qi;
27785       gen_interleave_first_low = gen_vec_interleave_lowv8hi;
27786       gen_interleave_second_low = gen_vec_interleave_lowv4si;
27787       inner_mode = QImode;
27788       first_imode = V8HImode;
27789       second_imode = V4SImode;
27790       third_imode = V2DImode;
27791       break;
27792     default:
27793       gcc_unreachable ();
27794     }
27795      
27796   for (i = 0; i < n; i++)
27797     {
27798       /* Extend the odd elment to SImode using a paradoxical SUBREG.  */
27799       op0 = gen_reg_rtx (SImode);
27800       emit_move_insn (op0, gen_lowpart (SImode, ops [i + i]));
27801 
27802       /* Insert the SImode value as low element of V4SImode vector. */
27803       op1 = gen_reg_rtx (V4SImode);
27804       op0 = gen_rtx_VEC_MERGE (V4SImode,
27805                                gen_rtx_VEC_DUPLICATE (V4SImode,
27806                                                       op0),
27807                                CONST0_RTX (V4SImode),
27808                                const1_rtx);
27809       emit_insn (gen_rtx_SET (VOIDmode, op1, op0));
27810 
27811       /* Cast the V4SImode vector back to a vector in orignal mode.  */
27812       op0 = gen_reg_rtx (mode);
27813       emit_move_insn (op0, gen_lowpart (mode, op1));
27814       
27815       /* Load even elements into the second positon.  */
27816       emit_insn ((*gen_load_even) (op0,
27817                                    force_reg (inner_mode,
27818                                               ops [i + i + 1]),
27819                                    const1_rtx));
27820 
27821       /* Cast vector to FIRST_IMODE vector.  */
27822       ops[i] = gen_reg_rtx (first_imode);
27823       emit_move_insn (ops[i], gen_lowpart (first_imode, op0));
27824     }
27825 
27826   /* Interleave low FIRST_IMODE vectors.  */
27827   for (i = j = 0; i < n; i += 2, j++)
27828     {
27829       op0 = gen_reg_rtx (first_imode);
27830       emit_insn ((*gen_interleave_first_low) (op0, ops[i], ops[i + 1]));
27831 
27832       /* Cast FIRST_IMODE vector to SECOND_IMODE vector.  */
27833       ops[j] = gen_reg_rtx (second_imode);
27834       emit_move_insn (ops[j], gen_lowpart (second_imode, op0));
27835     }
27836 
27837   /* Interleave low SECOND_IMODE vectors.  */
27838   switch (second_imode)
27839     {
27840     case V4SImode:
27841       for (i = j = 0; i < n / 2; i += 2, j++)
27842         {
27843           op0 = gen_reg_rtx (second_imode);
27844           emit_insn ((*gen_interleave_second_low) (op0, ops[i],
27845                                                    ops[i + 1]));
27846 
27847           /* Cast the SECOND_IMODE vector to the THIRD_IMODE
27848              vector.  */
27849           ops[j] = gen_reg_rtx (third_imode);
27850           emit_move_insn (ops[j], gen_lowpart (third_imode, op0));
27851         }
27852       second_imode = V2DImode;
27853       gen_interleave_second_low = gen_vec_interleave_lowv2di;
27854       /* FALLTHRU */
27855 
27856     case V2DImode:
27857       op0 = gen_reg_rtx (second_imode);
27858       emit_insn ((*gen_interleave_second_low) (op0, ops[0],
27859                                                ops[1]));
27860 
27861       /* Cast the SECOND_IMODE vector back to a vector on original
27862          mode.  */
27863       emit_insn (gen_rtx_SET (VOIDmode, target,
27864                               gen_lowpart (mode, op0)));
27865       break;
27866 
27867     default:
27868       gcc_unreachable ();
27869     }
27870 }
27871 
27872 /* A subroutine of ix86_expand_vector_init.  Handle the most general case:
27873    all values variable, and none identical.  */
27874 
27875 static void
27876 ix86_expand_vector_init_general (bool mmx_ok, enum machine_mode mode,
27877                                  rtx target, rtx vals)
27878 {
27879   rtx ops[32], op0, op1;
27880   enum machine_mode half_mode = VOIDmode;
27881   int n, i;
27882 
27883   switch (mode)
27884     {
27885     case V2SFmode:
27886     case V2SImode:
27887       if (!mmx_ok && !TARGET_SSE)
27888         break;
27889       /* FALLTHRU */
27890 
27891     case V8SFmode:
27892     case V8SImode:
27893     case V4DFmode:
27894     case V4DImode:
27895     case V4SFmode:
27896     case V4SImode:
27897     case V2DFmode:
27898     case V2DImode:
27899       n = GET_MODE_NUNITS (mode);
27900       for (i = 0; i < n; i++)
27901         ops[i] = XVECEXP (vals, 0, i);
27902       ix86_expand_vector_init_concat (mode, target, ops, n);
27903       return;
27904 
27905     case V32QImode:
27906       half_mode = V16QImode;
27907       goto half;
27908 
27909     case V16HImode:
27910       half_mode = V8HImode;
27911       goto half;
27912 
27913 half:
27914       n = GET_MODE_NUNITS (mode);
27915       for (i = 0; i < n; i++)
27916         ops[i] = XVECEXP (vals, 0, i);
27917       op0 = gen_reg_rtx (half_mode);
27918       op1 = gen_reg_rtx (half_mode);
27919       ix86_expand_vector_init_interleave (half_mode, op0, ops,
27920                                           n >> 2);
27921       ix86_expand_vector_init_interleave (half_mode, op1,
27922                                           &ops [n >> 1], n >> 2);
27923       emit_insn (gen_rtx_SET (VOIDmode, target,
27924                               gen_rtx_VEC_CONCAT (mode, op0, op1)));
27925       return;
27926 
27927     case V16QImode:
27928       if (!TARGET_SSE4_1)
27929         break;
27930       /* FALLTHRU */
27931 
27932     case V8HImode:
27933       if (!TARGET_SSE2)
27934         break;
27935 
27936       /* Don't use ix86_expand_vector_init_interleave if we can't
27937          move from GPR to SSE register directly.  */ 
27938       if (!TARGET_INTER_UNIT_MOVES)
27939         break;
27940 
27941       n = GET_MODE_NUNITS (mode);
27942       for (i = 0; i < n; i++)
27943         ops[i] = XVECEXP (vals, 0, i);
27944       ix86_expand_vector_init_interleave (mode, target, ops, n >> 1);
27945       return;
27946 
27947     case V4HImode:
27948     case V8QImode:
27949       break;
27950 
27951     default:
27952       gcc_unreachable ();
27953     }
27954 
27955     {
27956       int i, j, n_elts, n_words, n_elt_per_word;
27957       enum machine_mode inner_mode;
27958       rtx words[4], shift;
27959 
27960       inner_mode = GET_MODE_INNER (mode);
27961       n_elts = GET_MODE_NUNITS (mode);
27962       n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
27963       n_elt_per_word = n_elts / n_words;
27964       shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
27965 
27966       for (i = 0; i < n_words; ++i)
27967         {
27968           rtx word = NULL_RTX;
27969 
27970           for (j = 0; j < n_elt_per_word; ++j)
27971             {
27972               rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
27973               elt = convert_modes (word_mode, inner_mode, elt, true);
27974 
27975               if (j == 0)
27976                 word = elt;
27977               else
27978                 {
27979                   word = expand_simple_binop (word_mode, ASHIFT, word, shift,
27980                                               word, 1, OPTAB_LIB_WIDEN);
27981                   word = expand_simple_binop (word_mode, IOR, word, elt,
27982                                               word, 1, OPTAB_LIB_WIDEN);
27983                 }
27984             }
27985 
27986           words[i] = word;
27987         }
27988 
27989       if (n_words == 1)
27990         emit_move_insn (target, gen_lowpart (mode, words[0]));
27991       else if (n_words == 2)
27992         {
27993           rtx tmp = gen_reg_rtx (mode);
27994           emit_clobber (tmp);
27995           emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
27996           emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
27997           emit_move_insn (target, tmp);
27998         }
27999       else if (n_words == 4)
28000         {
28001           rtx tmp = gen_reg_rtx (V4SImode);
28002           gcc_assert (word_mode == SImode);
28003           vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
28004           ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
28005           emit_move_insn (target, gen_lowpart (mode, tmp));
28006         }
28007       else
28008         gcc_unreachable ();
28009     }
28010 }
28011 
28012 /* Initialize vector TARGET via VALS.  Suppress the use of MMX
28013    instructions unless MMX_OK is true.  */
28014 
28015 void
28016 ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
28017 {
28018   enum machine_mode mode = GET_MODE (target);
28019   enum machine_mode inner_mode = GET_MODE_INNER (mode);
28020   int n_elts = GET_MODE_NUNITS (mode);
28021   int n_var = 0, one_var = -1;
28022   bool all_same = true, all_const_zero = true;
28023   int i;
28024   rtx x;
28025 
28026   for (i = 0; i < n_elts; ++i)
28027     {
28028       x = XVECEXP (vals, 0, i);
28029       if (!(CONST_INT_P (x)
28030             || GET_CODE (x) == CONST_DOUBLE
28031             || GET_CODE (x) == CONST_FIXED))
28032         n_var++, one_var = i;
28033       else if (x != CONST0_RTX (inner_mode))
28034         all_const_zero = false;
28035       if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
28036         all_same = false;
28037     }
28038 
28039   /* Constants are best loaded from the constant pool.  */
28040   if (n_var == 0)
28041     {
28042       emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
28043       return;
28044     }
28045 
28046   /* If all values are identical, broadcast the value.  */
28047   if (all_same
28048       && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
28049                                             XVECEXP (vals, 0, 0)))
28050     return;
28051 
28052   /* Values where only one field is non-constant are best loaded from
28053      the pool and overwritten via move later.  */
28054   if (n_var == 1)
28055     {
28056       if (all_const_zero
28057           && ix86_expand_vector_init_one_nonzero (mmx_ok, mode, target,
28058                                                   XVECEXP (vals, 0, one_var),
28059                                                   one_var))
28060         return;
28061 
28062       if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
28063         return;
28064     }
28065 
28066   ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
28067 }
28068 
28069 void
28070 ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
28071 {
28072   enum machine_mode mode = GET_MODE (target);
28073   enum machine_mode inner_mode = GET_MODE_INNER (mode);
28074   enum machine_mode half_mode;
28075   bool use_vec_merge = false;
28076   rtx tmp;
28077   static rtx (*gen_extract[6][2]) (rtx, rtx)
28078     = {
28079         { gen_vec_extract_lo_v32qi, gen_vec_extract_hi_v32qi },
28080         { gen_vec_extract_lo_v16hi, gen_vec_extract_hi_v16hi },
28081         { gen_vec_extract_lo_v8si, gen_vec_extract_hi_v8si },
28082         { gen_vec_extract_lo_v4di, gen_vec_extract_hi_v4di },
28083         { gen_vec_extract_lo_v8sf, gen_vec_extract_hi_v8sf },
28084         { gen_vec_extract_lo_v4df, gen_vec_extract_hi_v4df }
28085       };
28086   static rtx (*gen_insert[6][2]) (rtx, rtx, rtx)
28087     = {
28088         { gen_vec_set_lo_v32qi, gen_vec_set_hi_v32qi },
28089         { gen_vec_set_lo_v16hi, gen_vec_set_hi_v16hi },
28090         { gen_vec_set_lo_v8si, gen_vec_set_hi_v8si },
28091         { gen_vec_set_lo_v4di, gen_vec_set_hi_v4di },
28092         { gen_vec_set_lo_v8sf, gen_vec_set_hi_v8sf },
28093         { gen_vec_set_lo_v4df, gen_vec_set_hi_v4df }
28094       };
28095   int i, j, n;
28096 
28097   switch (mode)
28098     {
28099     case V2SFmode:
28100     case V2SImode:
28101       if (mmx_ok)
28102         {
28103           tmp = gen_reg_rtx (GET_MODE_INNER (mode));
28104           ix86_expand_vector_extract (true, tmp, target, 1 - elt);
28105           if (elt == 0)
28106             tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
28107           else
28108             tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
28109           emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
28110           return;
28111         }
28112       break;
28113 
28114     case V2DImode:
28115       use_vec_merge = TARGET_SSE4_1;
28116       if (use_vec_merge)
28117         break;
28118 
28119     case V2DFmode:
28120       {
28121         rtx op0, op1;
28122 
28123         /* For the two element vectors, we implement a VEC_CONCAT with
28124            the extraction of the other element.  */
28125 
28126         tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
28127         tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
28128 
28129         if (elt == 0)
28130           op0 = val, op1 = tmp;
28131         else
28132           op0 = tmp, op1 = val;
28133 
28134         tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
28135         emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
28136       }
28137       return;
28138 
28139     case V4SFmode:
28140       use_vec_merge = TARGET_SSE4_1;
28141       if (use_vec_merge)
28142         break;
28143 
28144       switch (elt)
28145         {
28146         case 0:
28147           use_vec_merge = true;
28148           break;
28149 
28150         case 1:
28151           /* tmp = target = A B C D */
28152           tmp = copy_to_reg (target);
28153           /* target = A A B B */
28154           emit_insn (gen_sse_unpcklps (target, target, target));
28155           /* target = X A B B */
28156           ix86_expand_vector_set (false, target, val, 0);
28157           /* target = A X C D  */
28158           emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
28159                                        GEN_INT (1), GEN_INT (0),
28160                                        GEN_INT (2+4), GEN_INT (3+4)));
28161           return;
28162 
28163         case 2:
28164           /* tmp = target = A B C D */
28165           tmp = copy_to_reg (target);
28166           /* tmp = X B C D */
28167           ix86_expand_vector_set (false, tmp, val, 0);
28168           /* target = A B X D */
28169           emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
28170                                        GEN_INT (0), GEN_INT (1),
28171                                        GEN_INT (0+4), GEN_INT (3+4)));
28172           return;
28173 
28174         case 3:
28175           /* tmp = target = A B C D */
28176           tmp = copy_to_reg (target);
28177           /* tmp = X B C D */
28178           ix86_expand_vector_set (false, tmp, val, 0);
28179           /* target = A B X D */
28180           emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
28181                                        GEN_INT (0), GEN_INT (1),
28182                                        GEN_INT (2+4), GEN_INT (0+4)));
28183           return;
28184 
28185         default:
28186           gcc_unreachable ();
28187         }
28188       break;
28189 
28190     case V4SImode:
28191       use_vec_merge = TARGET_SSE4_1;
28192       if (use_vec_merge)
28193         break;
28194 
28195       /* Element 0 handled by vec_merge below.  */
28196       if (elt == 0)
28197         {
28198           use_vec_merge = true;
28199           break;
28200         }
28201 
28202       if (TARGET_SSE2)
28203         {
28204           /* With SSE2, use integer shuffles to swap element 0 and ELT,
28205              store into element 0, then shuffle them back.  */
28206 
28207           rtx order[4];
28208 
28209           order[0] = GEN_INT (elt);
28210           order[1] = const1_rtx;
28211           order[2] = const2_rtx;
28212           order[3] = GEN_INT (3);
28213           order[elt] = const0_rtx;
28214 
28215           emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
28216                                         order[1], order[2], order[3]));
28217 
28218           ix86_expand_vector_set (false, target, val, 0);
28219 
28220           emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
28221                                         order[1], order[2], order[3]));
28222         }
28223       else
28224         {
28225           /* For SSE1, we have to reuse the V4SF code.  */
28226           ix86_expand_vector_set (false, gen_lowpart (V4SFmode, target),
28227                                   gen_lowpart (SFmode, val), elt);
28228         }
28229       return;
28230 
28231     case V8HImode:
28232       use_vec_merge = TARGET_SSE2;
28233       break;
28234     case V4HImode:
28235       use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
28236       break;
28237 
28238     case V16QImode:
28239       use_vec_merge = TARGET_SSE4_1;
28240       break;
28241 
28242     case V8QImode:
28243       break;
28244 
28245     case V32QImode:
28246       half_mode = V16QImode;
28247       j = 0;
28248       n = 16;
28249       goto half;
28250 
28251     case V16HImode:
28252       half_mode = V8HImode;
28253       j = 1;
28254       n = 8;
28255       goto half;
28256 
28257     case V8SImode:
28258       half_mode = V4SImode;
28259       j = 2;
28260       n = 4;
28261       goto half;
28262 
28263     case V4DImode:
28264       half_mode = V2DImode;
28265       j = 3;
28266       n = 2;
28267       goto half;
28268 
28269     case V8SFmode:
28270       half_mode = V4SFmode;
28271       j = 4;
28272       n = 4;
28273       goto half;
28274 
28275     case V4DFmode:
28276       half_mode = V2DFmode;
28277       j = 5;
28278       n = 2;
28279       goto half;
28280 
28281 half:
28282       /* Compute offset.  */
28283       i = elt / n;
28284       elt %= n;
28285 
28286       gcc_assert (i <= 1);
28287 
28288       /* Extract the half.  */
28289       tmp = gen_reg_rtx (half_mode);
28290       emit_insn ((*gen_extract[j][i]) (tmp, target));
28291 
28292       /* Put val in tmp at elt.  */
28293       ix86_expand_vector_set (false, tmp, val, elt);
28294 
28295       /* Put it back.  */
28296       emit_insn ((*gen_insert[j][i]) (target, target, tmp));
28297       return;
28298 
28299     default:
28300       break;
28301     }
28302 
28303   if (use_vec_merge)
28304     {
28305       tmp = gen_rtx_VEC_DUPLICATE (mode, val);
28306       tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
28307       emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
28308     }
28309   else
28310     {
28311       rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
28312 
28313       emit_move_insn (mem, target);
28314 
28315       tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
28316       emit_move_insn (tmp, val);
28317 
28318       emit_move_insn (target, mem);
28319     }
28320 }
28321 
28322 void
28323 ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
28324 {
28325   enum machine_mode mode = GET_MODE (vec);
28326   enum machine_mode inner_mode = GET_MODE_INNER (mode);
28327   bool use_vec_extr = false;
28328   rtx tmp;
28329 
28330   switch (mode)
28331     {
28332     case V2SImode:
28333     case V2SFmode:
28334       if (!mmx_ok)
28335         break;
28336       /* FALLTHRU */
28337 
28338     case V2DFmode:
28339     case V2DImode:
28340       use_vec_extr = true;
28341       break;
28342 
28343     case V4SFmode:
28344       use_vec_extr = TARGET_SSE4_1;
28345       if (use_vec_extr)
28346         break;
28347 
28348       switch (elt)
28349         {
28350         case 0:
28351           tmp = vec;
28352           break;
28353 
28354         case 1:
28355         case 3:
28356           tmp = gen_reg_rtx (mode);
28357           emit_insn (gen_sse_shufps_v4sf (tmp, vec, vec,
28358                                        GEN_INT (elt), GEN_INT (elt),
28359                                        GEN_INT (elt+4), GEN_INT (elt+4)));
28360           break;
28361 
28362         case 2:
28363           tmp = gen_reg_rtx (mode);
28364           emit_insn (gen_sse_unpckhps (tmp, vec, vec));
28365           break;
28366 
28367         default:
28368           gcc_unreachable ();
28369         }
28370       vec = tmp;
28371       use_vec_extr = true;
28372       elt = 0;
28373       break;
28374 
28375     case V4SImode:
28376       use_vec_extr = TARGET_SSE4_1;
28377       if (use_vec_extr)
28378         break;
28379 
28380       if (TARGET_SSE2)
28381         {
28382           switch (elt)
28383             {
28384             case 0:
28385               tmp = vec;
28386               break;
28387 
28388             case 1:
28389             case 3:
28390               tmp = gen_reg_rtx (mode);
28391               emit_insn (gen_sse2_pshufd_1 (tmp, vec,
28392                                             GEN_INT (elt), GEN_INT (elt),
28393                                             GEN_INT (elt), GEN_INT (elt)));
28394               break;
28395 
28396             case 2:
28397               tmp = gen_reg_rtx (mode);
28398               emit_insn (gen_sse2_punpckhdq (tmp, vec, vec));
28399               break;
28400 
28401             default:
28402               gcc_unreachable ();
28403             }
28404           vec = tmp;
28405           use_vec_extr = true;
28406           elt = 0;
28407         }
28408       else
28409         {
28410           /* For SSE1, we have to reuse the V4SF code.  */
28411           ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
28412                                       gen_lowpart (V4SFmode, vec), elt);
28413           return;
28414         }
28415       break;
28416 
28417     case V8HImode:
28418       use_vec_extr = TARGET_SSE2;
28419       break;
28420     case V4HImode:
28421       use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
28422       break;
28423 
28424     case V16QImode:
28425       use_vec_extr = TARGET_SSE4_1;
28426       break;
28427 
28428     case V8QImode:
28429       /* ??? Could extract the appropriate HImode element and shift.  */
28430     default:
28431       break;
28432     }
28433 
28434   if (use_vec_extr)
28435     {
28436       tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
28437       tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
28438 
28439       /* Let the rtl optimizers know about the zero extension performed.  */
28440       if (inner_mode == QImode || inner_mode == HImode)
28441         {
28442           tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
28443           target = gen_lowpart (SImode, target);
28444         }
28445 
28446       emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
28447     }
28448   else
28449     {
28450       rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
28451 
28452       emit_move_insn (mem, vec);
28453 
28454       tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
28455       emit_move_insn (target, tmp);
28456     }
28457 }
28458 
28459 /* Expand a vector reduction on V4SFmode for SSE1.  FN is the binary
28460    pattern to reduce; DEST is the destination; IN is the input vector.  */
28461 
28462 void
28463 ix86_expand_reduc_v4sf (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
28464 {
28465   rtx tmp1, tmp2, tmp3;
28466 
28467   tmp1 = gen_reg_rtx (V4SFmode);
28468   tmp2 = gen_reg_rtx (V4SFmode);
28469   tmp3 = gen_reg_rtx (V4SFmode);
28470 
28471   emit_insn (gen_sse_movhlps (tmp1, in, in));
28472   emit_insn (fn (tmp2, tmp1, in));
28473 
28474   emit_insn (gen_sse_shufps_v4sf (tmp3, tmp2, tmp2,
28475                                GEN_INT (1), GEN_INT (1),
28476                                GEN_INT (1+4), GEN_INT (1+4)));
28477   emit_insn (fn (dest, tmp2, tmp3));
28478 }
28479 
28480 /* Target hook for scalar_mode_supported_p.  */
28481 static bool
28482 ix86_scalar_mode_supported_p (enum machine_mode mode)
28483 {
28484   if (DECIMAL_FLOAT_MODE_P (mode))
28485     return true;
28486   else if (mode == TFmode)
28487     return true;
28488   else
28489     return default_scalar_mode_supported_p (mode);
28490 }
28491 
28492 /* Implements target hook vector_mode_supported_p.  */
28493 static bool
28494 ix86_vector_mode_supported_p (enum machine_mode mode)
28495 {
28496   if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
28497     return true;
28498   if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
28499     return true;
28500   if (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
28501     return true;
28502   if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
28503     return true;
28504   if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
28505     return true;
28506   return false;
28507 }
28508 
28509 /* Target hook for c_mode_for_suffix.  */
28510 static enum machine_mode
28511 ix86_c_mode_for_suffix (char suffix)
28512 {
28513   if (suffix == 'q')
28514     return TFmode;
28515   if (suffix == 'w')
28516     return XFmode;
28517 
28518   return VOIDmode;
28519 }
28520 
28521 /* Worker function for TARGET_MD_ASM_CLOBBERS.
28522 
28523    We do this in the new i386 backend to maintain source compatibility
28524    with the old cc0-based compiler.  */
28525 
28526 static tree
28527 ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED,
28528                       tree inputs ATTRIBUTE_UNUSED,
28529                       tree clobbers)
28530 {
28531   clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
28532                         clobbers);
28533   clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
28534                         clobbers);
28535   return clobbers;
28536 }
28537 
28538 /* Implements target vector targetm.asm.encode_section_info.  This
28539    is not used by netware.  */
28540 
28541 static void ATTRIBUTE_UNUSED
28542 ix86_encode_section_info (tree decl, rtx rtl, int first)
28543 {
28544   default_encode_section_info (decl, rtl, first);
28545 
28546   if (TREE_CODE (decl) == VAR_DECL
28547       && (TREE_STATIC (decl) || DECL_EXTERNAL (decl))
28548       && ix86_in_large_data_p (decl))
28549     SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
28550 }
28551 
28552 /* Worker function for REVERSE_CONDITION.  */
28553 
28554 enum rtx_code
28555 ix86_reverse_condition (enum rtx_code code, enum machine_mode mode)
28556 {
28557   return (mode != CCFPmode && mode != CCFPUmode
28558           ? reverse_condition (code)
28559           : reverse_condition_maybe_unordered (code));
28560 }
28561 
28562 /* Output code to perform an x87 FP register move, from OPERANDS[1]
28563    to OPERANDS[0].  */
28564 
28565 const char *
28566 output_387_reg_move (rtx insn, rtx *operands)
28567 {
28568   if (REG_P (operands[0]))
28569     {
28570       if (REG_P (operands[1])
28571           && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
28572         {
28573           if (REGNO (operands[0]) == FIRST_STACK_REG)
28574             return output_387_ffreep (operands, 0);
28575           return "fstp\t%y0";
28576         }
28577       if (STACK_TOP_P (operands[0]))
28578         return "fld%z1\t%y1";
28579       return "fst\t%y0";
28580     }
28581   else if (MEM_P (operands[0]))
28582     {
28583       gcc_assert (REG_P (operands[1]));
28584       if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
28585         return "fstp%z0\t%y0";
28586       else
28587         {
28588           /* There is no non-popping store to memory for XFmode.
28589              So if we need one, follow the store with a load.  */
28590           if (GET_MODE (operands[0]) == XFmode)
28591             return "fstp%z0\t%y0\n\tfld%z0\t%y0";
28592           else
28593             return "fst%z0\t%y0";
28594         }
28595     }
28596   else
28597     gcc_unreachable();
28598 }
28599 
28600 /* Output code to perform a conditional jump to LABEL, if C2 flag in
28601    FP status register is set.  */
28602 
28603 void
28604 ix86_emit_fp_unordered_jump (rtx label)
28605 {
28606   rtx reg = gen_reg_rtx (HImode);
28607   rtx temp;
28608 
28609   emit_insn (gen_x86_fnstsw_1 (reg));
28610 
28611   if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ()))
28612     {
28613       emit_insn (gen_x86_sahf_1 (reg));
28614 
28615       temp = gen_rtx_REG (CCmode, FLAGS_REG);
28616       temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
28617     }
28618   else
28619     {
28620       emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
28621 
28622       temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
28623       temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
28624     }
28625 
28626   temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
28627                               gen_rtx_LABEL_REF (VOIDmode, label),
28628                               pc_rtx);
28629   temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
28630 
28631   emit_jump_insn (temp);
28632   predict_jump (REG_BR_PROB_BASE * 10 / 100);
28633 }
28634 
28635 /* Output code to perform a log1p XFmode calculation.  */
28636 
28637 void ix86_emit_i387_log1p (rtx op0, rtx op1)
28638 {
28639   rtx label1 = gen_label_rtx ();
28640   rtx label2 = gen_label_rtx ();
28641 
28642   rtx tmp = gen_reg_rtx (XFmode);
28643   rtx tmp2 = gen_reg_rtx (XFmode);
28644 
28645   emit_insn (gen_absxf2 (tmp, op1));
28646   emit_insn (gen_cmpxf (tmp,
28647     CONST_DOUBLE_FROM_REAL_VALUE (
28648        REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
28649        XFmode)));
28650   emit_jump_insn (gen_bge (label1));
28651 
28652   emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
28653   emit_insn (gen_fyl2xp1xf3_i387 (op0, op1, tmp2));
28654   emit_jump (label2);
28655 
28656   emit_label (label1);
28657   emit_move_insn (tmp, CONST1_RTX (XFmode));
28658   emit_insn (gen_addxf3 (tmp, op1, tmp));
28659   emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
28660   emit_insn (gen_fyl2xxf3_i387 (op0, tmp, tmp2));
28661 
28662   emit_label (label2);
28663 }
28664 
28665 /* Output code to perform a Newton-Rhapson approximation of a single precision
28666    floating point divide [http://en.wikipedia.org/wiki/N-th_root_algorithm].  */
28667 
28668 void ix86_emit_swdivsf (rtx res, rtx a, rtx b, enum machine_mode mode)
28669 {
28670   rtx x0, x1, e0, e1, two;
28671 
28672   x0 = gen_reg_rtx (mode);
28673   e0 = gen_reg_rtx (mode);
28674   e1 = gen_reg_rtx (mode);
28675   x1 = gen_reg_rtx (mode);
28676 
28677   two = CONST_DOUBLE_FROM_REAL_VALUE (dconst2, SFmode);
28678 
28679   if (VECTOR_MODE_P (mode))
28680     two = ix86_build_const_vector (SFmode, true, two);
28681 
28682   two = force_reg (mode, two);
28683 
28684   /* a / b = a * rcp(b) * (2.0 - b * rcp(b)) */
28685 
28686   /* x0 = rcp(b) estimate */
28687   emit_insn (gen_rtx_SET (VOIDmode, x0,
28688                           gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
28689                                           UNSPEC_RCP)));
28690   /* e0 = x0 * b */
28691   emit_insn (gen_rtx_SET (VOIDmode, e0,
28692                           gen_rtx_MULT (mode, x0, b)));
28693   /* e1 = 2. - e0 */
28694   emit_insn (gen_rtx_SET (VOIDmode, e1,
28695                           gen_rtx_MINUS (mode, two, e0)));
28696   /* x1 = x0 * e1 */
28697   emit_insn (gen_rtx_SET (VOIDmode, x1,
28698                           gen_rtx_MULT (mode, x0, e1)));
28699   /* res = a * x1 */
28700   emit_insn (gen_rtx_SET (VOIDmode, res,
28701                           gen_rtx_MULT (mode, a, x1)));
28702 }
28703 
28704 /* Output code to perform a Newton-Rhapson approximation of a
28705    single precision floating point [reciprocal] square root.  */
28706 
28707 void ix86_emit_swsqrtsf (rtx res, rtx a, enum machine_mode mode,
28708                          bool recip)
28709 {
28710   rtx x0, e0, e1, e2, e3, mthree, mhalf;
28711   REAL_VALUE_TYPE r;
28712 
28713   x0 = gen_reg_rtx (mode);
28714   e0 = gen_reg_rtx (mode);
28715   e1 = gen_reg_rtx (mode);
28716   e2 = gen_reg_rtx (mode);
28717   e3 = gen_reg_rtx (mode);
28718 
28719   real_from_integer (&r, VOIDmode, -3, -1, 0);
28720   mthree = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
28721 
28722   real_arithmetic (&r, NEGATE_EXPR, &dconsthalf, NULL);
28723   mhalf = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
28724 
28725   if (VECTOR_MODE_P (mode))
28726     {
28727       mthree = ix86_build_const_vector (SFmode, true, mthree);
28728       mhalf = ix86_build_const_vector (SFmode, true, mhalf);
28729     }
28730 
28731   /* sqrt(a)  = -0.5 * a * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0)
28732      rsqrt(a) = -0.5     * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0) */
28733 
28734   /* x0 = rsqrt(a) estimate */
28735   emit_insn (gen_rtx_SET (VOIDmode, x0,
28736                           gen_rtx_UNSPEC (mode, gen_rtvec (1, a),
28737                                           UNSPEC_RSQRT)));
28738 
28739   /* If (a == 0.0) Filter out infinity to prevent NaN for sqrt(0.0).  */
28740   if (!recip)
28741     {
28742       rtx zero, mask;
28743 
28744       zero = gen_reg_rtx (mode);
28745       mask = gen_reg_rtx (mode);
28746 
28747       zero = force_reg (mode, CONST0_RTX(mode));
28748       emit_insn (gen_rtx_SET (VOIDmode, mask,
28749                               gen_rtx_NE (mode, zero, a)));
28750 
28751       emit_insn (gen_rtx_SET (VOIDmode, x0,
28752                               gen_rtx_AND (mode, x0, mask)));
28753     }
28754 
28755   /* e0 = x0 * a */
28756   emit_insn (gen_rtx_SET (VOIDmode, e0,
28757                           gen_rtx_MULT (mode, x0, a)));
28758   /* e1 = e0 * x0 */
28759   emit_insn (gen_rtx_SET (VOIDmode, e1,
28760                           gen_rtx_MULT (mode, e0, x0)));
28761 
28762   /* e2 = e1 - 3. */
28763   mthree = force_reg (mode, mthree);
28764   emit_insn (gen_rtx_SET (VOIDmode, e2,
28765                           gen_rtx_PLUS (mode, e1, mthree)));
28766 
28767   mhalf = force_reg (mode, mhalf);
28768   if (recip)
28769     /* e3 = -.5 * x0 */
28770     emit_insn (gen_rtx_SET (VOIDmode, e3,
28771                             gen_rtx_MULT (mode, x0, mhalf)));
28772   else
28773     /* e3 = -.5 * e0 */
28774     emit_insn (gen_rtx_SET (VOIDmode, e3,
28775                             gen_rtx_MULT (mode, e0, mhalf)));
28776   /* ret = e2 * e3 */
28777   emit_insn (gen_rtx_SET (VOIDmode, res,
28778                           gen_rtx_MULT (mode, e2, e3)));
28779 }
28780 
28781 /* Solaris implementation of TARGET_ASM_NAMED_SECTION.  */
28782 
28783 static void ATTRIBUTE_UNUSED
28784 i386_solaris_elf_named_section (const char *name, unsigned int flags,
28785                                 tree decl)
28786 {
28787   /* With Binutils 2.15, the "@unwind" marker must be specified on
28788      every occurrence of the ".eh_frame" section, not just the first
28789      one.  */
28790   if (TARGET_64BIT
28791       && strcmp (name, ".eh_frame") == 0)
28792     {
28793       fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
28794                flags & SECTION_WRITE ? "aw" : "a");
28795       return;
28796     }
28797   default_elf_asm_named_section (name, flags, decl);
28798 }
28799 
28800 /* Return the mangling of TYPE if it is an extended fundamental type.  */
28801 
28802 static const char *
28803 ix86_mangle_type (const_tree type)
28804 {
28805   type = TYPE_MAIN_VARIANT (type);
28806 
28807   if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
28808       && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
28809     return NULL;
28810 
28811   switch (TYPE_MODE (type))
28812     {
28813     case TFmode:
28814       /* __float128 is "g".  */
28815       return "g";
28816     case XFmode:
28817       /* "long double" or __float80 is "e".  */
28818       return "e";
28819     default:
28820       return NULL;
28821     }
28822 }
28823 
28824 /* For 32-bit code we can save PIC register setup by using
28825    __stack_chk_fail_local hidden function instead of calling
28826    __stack_chk_fail directly.  64-bit code doesn't need to setup any PIC
28827    register, so it is better to call __stack_chk_fail directly.  */
28828 
28829 static tree
28830 ix86_stack_protect_fail (void)
28831 {
28832   return TARGET_64BIT
28833          ? default_external_stack_protect_fail ()
28834          : default_hidden_stack_protect_fail ();
28835 }
28836 
28837 /* Select a format to encode pointers in exception handling data.  CODE
28838    is 0 for data, 1 for code labels, 2 for function pointers.  GLOBAL is
28839    true if the symbol may be affected by dynamic relocations.
28840 
28841    ??? All x86 object file formats are capable of representing this.
28842    After all, the relocation needed is the same as for the call insn.
28843    Whether or not a particular assembler allows us to enter such, I
28844    guess we'll have to see.  */
28845 int
28846 asm_preferred_eh_data_format (int code, int global)
28847 {
28848   if (flag_pic)
28849     {
28850       int type = DW_EH_PE_sdata8;
28851       if (!TARGET_64BIT
28852           || ix86_cmodel == CM_SMALL_PIC
28853           || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
28854         type = DW_EH_PE_sdata4;
28855       return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
28856     }
28857   if (ix86_cmodel == CM_SMALL
28858       || (ix86_cmodel == CM_MEDIUM && code))
28859     return DW_EH_PE_udata4;
28860   return DW_EH_PE_absptr;
28861 }
28862 
28863 /* Expand copysign from SIGN to the positive value ABS_VALUE
28864    storing in RESULT.  If MASK is non-null, it shall be a mask to mask out
28865    the sign-bit.  */
28866 static void
28867 ix86_sse_copysign_to_positive (rtx result, rtx abs_value, rtx sign, rtx mask)
28868 {
28869   enum machine_mode mode = GET_MODE (sign);
28870   rtx sgn = gen_reg_rtx (mode);
28871   if (mask == NULL_RTX)
28872     {
28873       mask = ix86_build_signbit_mask (mode, VECTOR_MODE_P (mode), false);
28874       if (!VECTOR_MODE_P (mode))
28875         {
28876           /* We need to generate a scalar mode mask in this case.  */
28877           rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
28878           tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
28879           mask = gen_reg_rtx (mode);
28880           emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
28881         }
28882     }
28883   else
28884     mask = gen_rtx_NOT (mode, mask);
28885   emit_insn (gen_rtx_SET (VOIDmode, sgn,
28886                           gen_rtx_AND (mode, mask, sign)));
28887   emit_insn (gen_rtx_SET (VOIDmode, result,
28888                           gen_rtx_IOR (mode, abs_value, sgn)));
28889 }
28890 
28891 /* Expand fabs (OP0) and return a new rtx that holds the result.  The
28892    mask for masking out the sign-bit is stored in *SMASK, if that is
28893    non-null.  */
28894 static rtx
28895 ix86_expand_sse_fabs (rtx op0, rtx *smask)
28896 {
28897   enum machine_mode mode = GET_MODE (op0);
28898   rtx xa, mask;
28899 
28900   xa = gen_reg_rtx (mode);
28901   mask = ix86_build_signbit_mask (mode, VECTOR_MODE_P (mode), true);
28902   if (!VECTOR_MODE_P (mode))
28903     {
28904       /* We need to generate a scalar mode mask in this case.  */
28905       rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
28906       tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
28907       mask = gen_reg_rtx (mode);
28908       emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
28909     }
28910   emit_insn (gen_rtx_SET (VOIDmode, xa,
28911                           gen_rtx_AND (mode, op0, mask)));
28912 
28913   if (smask)
28914     *smask = mask;
28915 
28916   return xa;
28917 }
28918 
28919 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
28920    swapping the operands if SWAP_OPERANDS is true.  The expanded
28921    code is a forward jump to a newly created label in case the
28922    comparison is true.  The generated label rtx is returned.  */
28923 static rtx
28924 ix86_expand_sse_compare_and_jump (enum rtx_code code, rtx op0, rtx op1,
28925                                   bool swap_operands)
28926 {
28927   rtx label, tmp;
28928 
28929   if (swap_operands)
28930     {
28931       tmp = op0;
28932       op0 = op1;
28933       op1 = tmp;
28934     }
28935 
28936   label = gen_label_rtx ();
28937   tmp = gen_rtx_REG (CCFPUmode, FLAGS_REG);
28938   emit_insn (gen_rtx_SET (VOIDmode, tmp,
28939                           gen_rtx_COMPARE (CCFPUmode, op0, op1)));
28940   tmp = gen_rtx_fmt_ee (code, VOIDmode, tmp, const0_rtx);
28941   tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
28942                               gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx);
28943   tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
28944   JUMP_LABEL (tmp) = label;
28945 
28946   return label;
28947 }
28948 
28949 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
28950    using comparison code CODE.  Operands are swapped for the comparison if
28951    SWAP_OPERANDS is true.  Returns a rtx for the generated mask.  */
28952 static rtx
28953 ix86_expand_sse_compare_mask (enum rtx_code code, rtx op0, rtx op1,
28954                               bool swap_operands)
28955 {
28956   enum machine_mode mode = GET_MODE (op0);
28957   rtx mask = gen_reg_rtx (mode);
28958 
28959   if (swap_operands)
28960     {
28961       rtx tmp = op0;
28962       op0 = op1;
28963       op1 = tmp;
28964     }
28965 
28966   if (mode == DFmode)
28967     emit_insn (gen_sse2_maskcmpdf3 (mask, op0, op1,
28968                                     gen_rtx_fmt_ee (code, mode, op0, op1)));
28969   else
28970     emit_insn (gen_sse_maskcmpsf3 (mask, op0, op1,
28971                                    gen_rtx_fmt_ee (code, mode, op0, op1)));
28972 
28973   return mask;
28974 }
28975 
28976 /* Generate and return a rtx of mode MODE for 2**n where n is the number
28977    of bits of the mantissa of MODE, which must be one of DFmode or SFmode.  */
28978 static rtx
28979 ix86_gen_TWO52 (enum machine_mode mode)
28980 {
28981   REAL_VALUE_TYPE TWO52r;
28982   rtx TWO52;
28983 
28984   real_ldexp (&TWO52r, &dconst1, mode == DFmode ? 52 : 23);
28985   TWO52 = const_double_from_real_value (TWO52r, mode);
28986   TWO52 = force_reg (mode, TWO52);
28987 
28988   return TWO52;
28989 }
28990 
28991 /* Expand SSE sequence for computing lround from OP1 storing
28992    into OP0.  */
28993 void
28994 ix86_expand_lround (rtx op0, rtx op1)
28995 {
28996   /* C code for the stuff we're doing below:
28997        tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
28998        return (long)tmp;
28999    */
29000   enum machine_mode mode = GET_MODE (op1);
29001   const struct real_format *fmt;
29002   REAL_VALUE_TYPE pred_half, half_minus_pred_half;
29003   rtx adj;
29004 
29005   /* load nextafter (0.5, 0.0) */
29006   fmt = REAL_MODE_FORMAT (mode);
29007   real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
29008   REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
29009 
29010   /* adj = copysign (0.5, op1) */
29011   adj = force_reg (mode, const_double_from_real_value (pred_half, mode));
29012   ix86_sse_copysign_to_positive (adj, adj, force_reg (mode, op1), NULL_RTX);
29013 
29014   /* adj = op1 + adj */
29015   adj = expand_simple_binop (mode, PLUS, adj, op1, NULL_RTX, 0, OPTAB_DIRECT);
29016 
29017   /* op0 = (imode)adj */
29018   expand_fix (op0, adj, 0);
29019 }
29020 
29021 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
29022    into OPERAND0.  */
29023 void
29024 ix86_expand_lfloorceil (rtx op0, rtx op1, bool do_floor)
29025 {
29026   /* C code for the stuff we're doing below (for do_floor):
29027         xi = (long)op1;
29028         xi -= (double)xi > op1 ? 1 : 0;
29029         return xi;
29030    */
29031   enum machine_mode fmode = GET_MODE (op1);
29032   enum machine_mode imode = GET_MODE (op0);
29033   rtx ireg, freg, label, tmp;
29034 
29035   /* reg = (long)op1 */
29036   ireg = gen_reg_rtx (imode);
29037   expand_fix (ireg, op1, 0);
29038 
29039   /* freg = (double)reg */
29040   freg = gen_reg_rtx (fmode);
29041   expand_float (freg, ireg, 0);
29042 
29043   /* ireg = (freg > op1) ? ireg - 1 : ireg */
29044   label = ix86_expand_sse_compare_and_jump (UNLE,
29045                                             freg, op1, !do_floor);
29046   tmp = expand_simple_binop (imode, do_floor ? MINUS : PLUS,
29047                              ireg, const1_rtx, NULL_RTX, 0, OPTAB_DIRECT);
29048   emit_move_insn (ireg, tmp);
29049 
29050   emit_label (label);
29051   LABEL_NUSES (label) = 1;
29052 
29053   emit_move_insn (op0, ireg);
29054 }
29055 
29056 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
29057    result in OPERAND0.  */
29058 void
29059 ix86_expand_rint (rtx operand0, rtx operand1)
29060 {
29061   /* C code for the stuff we're doing below:
29062         xa = fabs (operand1);
29063         if (!isless (xa, 2**52))
29064           return operand1;
29065         xa = xa + 2**52 - 2**52;
29066         return copysign (xa, operand1);
29067    */
29068   enum machine_mode mode = GET_MODE (operand0);
29069   rtx res, xa, label, TWO52, mask;
29070 
29071   res = gen_reg_rtx (mode);
29072   emit_move_insn (res, operand1);
29073 
29074   /* xa = abs (operand1) */
29075   xa = ix86_expand_sse_fabs (res, &mask);
29076 
29077   /* if (!isless (xa, TWO52)) goto label; */
29078   TWO52 = ix86_gen_TWO52 (mode);
29079   label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
29080 
29081   xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
29082   xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
29083 
29084   ix86_sse_copysign_to_positive (res, xa, res, mask);
29085 
29086   emit_label (label);
29087   LABEL_NUSES (label) = 1;
29088 
29089   emit_move_insn (operand0, res);
29090 }
29091 
29092 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
29093    into OPERAND0.  */
29094 void
29095 ix86_expand_floorceildf_32 (rtx operand0, rtx operand1, bool do_floor)
29096 {
29097   /* C code for the stuff we expand below.
29098         double xa = fabs (x), x2;
29099         if (!isless (xa, TWO52))
29100           return x;
29101         xa = xa + TWO52 - TWO52;
29102         x2 = copysign (xa, x);
29103      Compensate.  Floor:
29104         if (x2 > x)
29105           x2 -= 1;
29106      Compensate.  Ceil:
29107         if (x2 < x)
29108           x2 -= -1;
29109         return x2;
29110    */
29111   enum machine_mode mode = GET_MODE (operand0);
29112   rtx xa, TWO52, tmp, label, one, res, mask;
29113 
29114   TWO52 = ix86_gen_TWO52 (mode);
29115 
29116   /* Temporary for holding the result, initialized to the input
29117      operand to ease control flow.  */
29118   res = gen_reg_rtx (mode);
29119   emit_move_insn (res, operand1);
29120 
29121   /* xa = abs (operand1) */
29122   xa = ix86_expand_sse_fabs (res, &mask);
29123 
29124   /* if (!isless (xa, TWO52)) goto label; */
29125   label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
29126 
29127   /* xa = xa + TWO52 - TWO52; */
29128   xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
29129   xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
29130 
29131   /* xa = copysign (xa, operand1) */
29132   ix86_sse_copysign_to_positive (xa, xa, res, mask);
29133 
29134   /* generate 1.0 or -1.0 */
29135   one = force_reg (mode,
29136                    const_double_from_real_value (do_floor
29137                                                  ? dconst1 : dconstm1, mode));
29138 
29139   /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
29140   tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
29141   emit_insn (gen_rtx_SET (VOIDmode, tmp,
29142                           gen_rtx_AND (mode, one, tmp)));
29143   /* We always need to subtract here to preserve signed zero.  */
29144   tmp = expand_simple_binop (mode, MINUS,
29145                              xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
29146   emit_move_insn (res, tmp);
29147 
29148   emit_label (label);
29149   LABEL_NUSES (label) = 1;
29150 
29151   emit_move_insn (operand0, res);
29152 }
29153 
29154 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
29155    into OPERAND0.  */
29156 void
29157 ix86_expand_floorceil (rtx operand0, rtx operand1, bool do_floor)
29158 {
29159   /* C code for the stuff we expand below.
29160         double xa = fabs (x), x2;
29161         if (!isless (xa, TWO52))
29162           return x;
29163         x2 = (double)(long)x;
29164      Compensate.  Floor:
29165         if (x2 > x)
29166           x2 -= 1;
29167      Compensate.  Ceil:
29168         if (x2 < x)
29169           x2 += 1;
29170         if (HONOR_SIGNED_ZEROS (mode))
29171           return copysign (x2, x);
29172         return x2;
29173    */
29174   enum machine_mode mode = GET_MODE (operand0);
29175   rtx xa, xi, TWO52, tmp, label, one, res, mask;
29176 
29177   TWO52 = ix86_gen_TWO52 (mode);
29178 
29179   /* Temporary for holding the result, initialized to the input
29180      operand to ease control flow.  */
29181   res = gen_reg_rtx (mode);
29182   emit_move_insn (res, operand1);
29183 
29184   /* xa = abs (operand1) */
29185   xa = ix86_expand_sse_fabs (res, &mask);
29186 
29187   /* if (!isless (xa, TWO52)) goto label; */
29188   label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
29189 
29190   /* xa = (double)(long)x */
29191   xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
29192   expand_fix (xi, res, 0);
29193   expand_float (xa, xi, 0);
29194 
29195   /* generate 1.0 */
29196   one = force_reg (mode, const_double_from_real_value (dconst1, mode));
29197 
29198   /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
29199   tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
29200   emit_insn (gen_rtx_SET (VOIDmode, tmp,
29201                           gen_rtx_AND (mode, one, tmp)));
29202   tmp = expand_simple_binop (mode, do_floor ? MINUS : PLUS,
29203                              xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
29204   emit_move_insn (res, tmp);
29205 
29206   if (HONOR_SIGNED_ZEROS (mode))
29207     ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
29208 
29209   emit_label (label);
29210   LABEL_NUSES (label) = 1;
29211 
29212   emit_move_insn (operand0, res);
29213 }
29214 
29215 /* Expand SSE sequence for computing round from OPERAND1 storing
29216    into OPERAND0.  Sequence that works without relying on DImode truncation
29217    via cvttsd2siq that is only available on 64bit targets.  */
29218 void
29219 ix86_expand_rounddf_32 (rtx operand0, rtx operand1)
29220 {
29221   /* C code for the stuff we expand below.
29222         double xa = fabs (x), xa2, x2;
29223         if (!isless (xa, TWO52))
29224           return x;
29225      Using the absolute value and copying back sign makes
29226      -0.0 -> -0.0 correct.
29227         xa2 = xa + TWO52 - TWO52;
29228      Compensate.
29229         dxa = xa2 - xa;
29230         if (dxa <= -0.5)
29231           xa2 += 1;
29232         else if (dxa > 0.5)
29233           xa2 -= 1;
29234         x2 = copysign (xa2, x);
29235         return x2;
29236    */
29237   enum machine_mode mode = GET_MODE (operand0);
29238   rtx xa, xa2, dxa, TWO52, tmp, label, half, mhalf, one, res, mask;
29239 
29240   TWO52 = ix86_gen_TWO52 (mode);
29241 
29242   /* Temporary for holding the result, initialized to the input
29243      operand to ease control flow.  */
29244   res = gen_reg_rtx (mode);
29245   emit_move_insn (res, operand1);
29246 
29247   /* xa = abs (operand1) */
29248   xa = ix86_expand_sse_fabs (res, &mask);
29249 
29250   /* if (!isless (xa, TWO52)) goto label; */
29251   label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
29252 
29253   /* xa2 = xa + TWO52 - TWO52; */
29254   xa2 = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
29255   xa2 = expand_simple_binop (mode, MINUS, xa2, TWO52, xa2, 0, OPTAB_DIRECT);
29256 
29257   /* dxa = xa2 - xa; */
29258   dxa = expand_simple_binop (mode, MINUS, xa2, xa, NULL_RTX, 0, OPTAB_DIRECT);
29259 
29260   /* generate 0.5, 1.0 and -0.5 */
29261   half = force_reg (mode, const_double_from_real_value (dconsthalf, mode));
29262   one = expand_simple_binop (mode, PLUS, half, half, NULL_RTX, 0, OPTAB_DIRECT);
29263   mhalf = expand_simple_binop (mode, MINUS, half, one, NULL_RTX,
29264                                0, OPTAB_DIRECT);
29265 
29266   /* Compensate.  */
29267   tmp = gen_reg_rtx (mode);
29268   /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
29269   tmp = ix86_expand_sse_compare_mask (UNGT, dxa, half, false);
29270   emit_insn (gen_rtx_SET (VOIDmode, tmp,
29271                           gen_rtx_AND (mode, one, tmp)));
29272   xa2 = expand_simple_binop (mode, MINUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
29273   /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
29274   tmp = ix86_expand_sse_compare_mask (UNGE, mhalf, dxa, false);
29275   emit_insn (gen_rtx_SET (VOIDmode, tmp,
29276                           gen_rtx_AND (mode, one, tmp)));
29277   xa2 = expand_simple_binop (mode, PLUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
29278 
29279   /* res = copysign (xa2, operand1) */
29280   ix86_sse_copysign_to_positive (res, xa2, force_reg (mode, operand1), mask);
29281 
29282   emit_label (label);
29283   LABEL_NUSES (label) = 1;
29284 
29285   emit_move_insn (operand0, res);
29286 }
29287 
29288 /* Expand SSE sequence for computing trunc from OPERAND1 storing
29289    into OPERAND0.  */
29290 void
29291 ix86_expand_trunc (rtx operand0, rtx operand1)
29292 {
29293   /* C code for SSE variant we expand below.
29294         double xa = fabs (x), x2;
29295         if (!isless (xa, TWO52))
29296           return x;
29297         x2 = (double)(long)x;
29298         if (HONOR_SIGNED_ZEROS (mode))
29299           return copysign (x2, x);
29300         return x2;
29301    */
29302   enum machine_mode mode = GET_MODE (operand0);
29303   rtx xa, xi, TWO52, label, res, mask;
29304 
29305   TWO52 = ix86_gen_TWO52 (mode);
29306 
29307   /* Temporary for holding the result, initialized to the input
29308      operand to ease control flow.  */
29309   res = gen_reg_rtx (mode);
29310   emit_move_insn (res, operand1);
29311 
29312   /* xa = abs (operand1) */
29313   xa = ix86_expand_sse_fabs (res, &mask);
29314 
29315   /* if (!isless (xa, TWO52)) goto label; */
29316   label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
29317 
29318   /* x = (double)(long)x */
29319   xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
29320   expand_fix (xi, res, 0);
29321   expand_float (res, xi, 0);
29322 
29323   if (HONOR_SIGNED_ZEROS (mode))
29324     ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
29325 
29326   emit_label (label);
29327   LABEL_NUSES (label) = 1;
29328 
29329   emit_move_insn (operand0, res);
29330 }
29331 
29332 /* Expand SSE sequence for computing trunc from OPERAND1 storing
29333    into OPERAND0.  */
29334 void
29335 ix86_expand_truncdf_32 (rtx operand0, rtx operand1)
29336 {
29337   enum machine_mode mode = GET_MODE (operand0);
29338   rtx xa, mask, TWO52, label, one, res, smask, tmp;
29339 
29340   /* C code for SSE variant we expand below.
29341         double xa = fabs (x), x2;
29342         if (!isless (xa, TWO52))
29343           return x;
29344         xa2 = xa + TWO52 - TWO52;
29345      Compensate:
29346         if (xa2 > xa)
29347           xa2 -= 1.0;
29348         x2 = copysign (xa2, x);
29349         return x2;
29350    */
29351 
29352   TWO52 = ix86_gen_TWO52 (mode);
29353 
29354   /* Temporary for holding the result, initialized to the input
29355      operand to ease control flow.  */
29356   res = gen_reg_rtx (mode);
29357   emit_move_insn (res, operand1);
29358 
29359   /* xa = abs (operand1) */
29360   xa = ix86_expand_sse_fabs (res, &smask);
29361 
29362   /* if (!isless (xa, TWO52)) goto label; */
29363   label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
29364 
29365   /* res = xa + TWO52 - TWO52; */
29366   tmp = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
29367   tmp = expand_simple_binop (mode, MINUS, tmp, TWO52, tmp, 0, OPTAB_DIRECT);
29368   emit_move_insn (res, tmp);
29369 
29370   /* generate 1.0 */
29371   one = force_reg (mode, const_double_from_real_value (dconst1, mode));
29372 
29373   /* Compensate: res = xa2 - (res > xa ? 1 : 0)  */
29374   mask = ix86_expand_sse_compare_mask (UNGT, res, xa, false);
29375   emit_insn (gen_rtx_SET (VOIDmode, mask,
29376                           gen_rtx_AND (mode, mask, one)));
29377   tmp = expand_simple_binop (mode, MINUS,
29378                              res, mask, NULL_RTX, 0, OPTAB_DIRECT);
29379   emit_move_insn (res, tmp);
29380 
29381   /* res = copysign (res, operand1) */
29382   ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), smask);
29383 
29384   emit_label (label);
29385   LABEL_NUSES (label) = 1;
29386 
29387   emit_move_insn (operand0, res);
29388 }
29389 
29390 /* Expand SSE sequence for computing round from OPERAND1 storing
29391    into OPERAND0.  */
29392 void
29393 ix86_expand_round (rtx operand0, rtx operand1)
29394 {
29395   /* C code for the stuff we're doing below:
29396         double xa = fabs (x);
29397         if (!isless (xa, TWO52))
29398           return x;
29399         xa = (double)(long)(xa + nextafter (0.5, 0.0));
29400         return copysign (xa, x);
29401    */
29402   enum machine_mode mode = GET_MODE (operand0);
29403   rtx res, TWO52, xa, label, xi, half, mask;
29404   const struct real_format *fmt;
29405   REAL_VALUE_TYPE pred_half, half_minus_pred_half;
29406 
29407   /* Temporary for holding the result, initialized to the input
29408      operand to ease control flow.  */
29409   res = gen_reg_rtx (mode);
29410   emit_move_insn (res, operand1);
29411 
29412   TWO52 = ix86_gen_TWO52 (mode);
29413   xa = ix86_expand_sse_fabs (res, &mask);
29414   label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
29415 
29416   /* load nextafter (0.5, 0.0) */
29417   fmt = REAL_MODE_FORMAT (mode);
29418   real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
29419   REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
29420 
29421   /* xa = xa + 0.5 */
29422   half = force_reg (mode, const_double_from_real_value (pred_half, mode));
29423   xa = expand_simple_binop (mode, PLUS, xa, half, NULL_RTX, 0, OPTAB_DIRECT);
29424 
29425   /* xa = (double)(int64_t)xa */
29426   xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
29427   expand_fix (xi, xa, 0);
29428   expand_float (xa, xi, 0);
29429 
29430   /* res = copysign (xa, operand1) */
29431   ix86_sse_copysign_to_positive (res, xa, force_reg (mode, operand1), mask);
29432 
29433   emit_label (label);
29434   LABEL_NUSES (label) = 1;
29435 
29436   emit_move_insn (operand0, res);
29437 }
29438 
29439 
29440 /* Validate whether a SSE5 instruction is valid or not.
29441    OPERANDS is the array of operands.
29442    NUM is the number of operands.
29443    USES_OC0 is true if the instruction uses OC0 and provides 4 variants.
29444    NUM_MEMORY is the maximum number of memory operands to accept.  
29445    when COMMUTATIVE is set, operand 1 and 2 can be swapped.  */
29446 
29447 bool
29448 ix86_sse5_valid_op_p (rtx operands[], rtx insn ATTRIBUTE_UNUSED, int num,
29449                       bool uses_oc0, int num_memory, bool commutative)
29450 {
29451   int mem_mask;
29452   int mem_count;
29453   int i;
29454 
29455   /* Count the number of memory arguments */
29456   mem_mask = 0;
29457   mem_count = 0;
29458   for (i = 0; i < num; i++)
29459     {
29460       enum machine_mode mode = GET_MODE (operands[i]);
29461       if (register_operand (operands[i], mode))
29462         ;
29463 
29464       else if (memory_operand (operands[i], mode))
29465         {
29466           mem_mask |= (1 << i);
29467           mem_count++;
29468         }
29469 
29470       else
29471         {
29472           rtx pattern = PATTERN (insn);
29473 
29474           /* allow 0 for pcmov */
29475           if (GET_CODE (pattern) != SET
29476               || GET_CODE (SET_SRC (pattern)) != IF_THEN_ELSE
29477               || i < 2
29478               || operands[i] != CONST0_RTX (mode))
29479             return false;
29480         }
29481     }
29482 
29483   /* Special case pmacsdq{l,h} where we allow the 3rd argument to be
29484      a memory operation.  */
29485   if (num_memory < 0)
29486     {
29487       num_memory = -num_memory;
29488       if ((mem_mask & (1 << (num-1))) != 0)
29489         {
29490           mem_mask &= ~(1 << (num-1));
29491           mem_count--;
29492         }
29493     }
29494 
29495   /* If there were no memory operations, allow the insn */
29496   if (mem_mask == 0)
29497     return true;
29498 
29499   /* Do not allow the destination register to be a memory operand.  */
29500   else if (mem_mask & (1 << 0))
29501     return false;
29502 
29503   /* If there are too many memory operations, disallow the instruction.  While
29504      the hardware only allows 1 memory reference, before register allocation
29505      for some insns, we allow two memory operations sometimes in order to allow
29506      code like the following to be optimized:
29507 
29508         float fmadd (float *a, float *b, float *c) { return (*a * *b) + *c; }
29509 
29510     or similar cases that are vectorized into using the fmaddss
29511     instruction.  */
29512   else if (mem_count > num_memory)
29513     return false;
29514 
29515   /* Don't allow more than one memory operation if not optimizing.  */
29516   else if (mem_count > 1 && !optimize)
29517     return false;
29518 
29519   else if (num == 4 && mem_count == 1)
29520     {
29521       /* formats (destination is the first argument), example fmaddss:
29522          xmm1, xmm1, xmm2, xmm3/mem
29523          xmm1, xmm1, xmm2/mem, xmm3
29524          xmm1, xmm2, xmm3/mem, xmm1
29525          xmm1, xmm2/mem, xmm3, xmm1 */
29526       if (uses_oc0)
29527         return ((mem_mask == (1 << 1))
29528                 || (mem_mask == (1 << 2))
29529                 || (mem_mask == (1 << 3)));
29530 
29531       /* format, example pmacsdd:
29532          xmm1, xmm2, xmm3/mem, xmm1 */
29533       if (commutative)
29534         return (mem_mask == (1 << 2) || mem_mask == (1 << 1));
29535       else
29536         return (mem_mask == (1 << 2));
29537     }
29538 
29539   else if (num == 4 && num_memory == 2)
29540     {
29541       /* If there are two memory operations, we can load one of the memory ops
29542          into the destination register.  This is for optimizing the
29543          multiply/add ops, which the combiner has optimized both the multiply
29544          and the add insns to have a memory operation.  We have to be careful
29545          that the destination doesn't overlap with the inputs.  */
29546       rtx op0 = operands[0];
29547 
29548       if (reg_mentioned_p (op0, operands[1])
29549           || reg_mentioned_p (op0, operands[2])
29550           || reg_mentioned_p (op0, operands[3]))
29551         return false;
29552 
29553       /* formats (destination is the first argument), example fmaddss:
29554          xmm1, xmm1, xmm2, xmm3/mem
29555          xmm1, xmm1, xmm2/mem, xmm3
29556          xmm1, xmm2, xmm3/mem, xmm1
29557          xmm1, xmm2/mem, xmm3, xmm1
29558 
29559          For the oc0 case, we will load either operands[1] or operands[3] into
29560          operands[0], so any combination of 2 memory operands is ok.  */
29561       if (uses_oc0)
29562         return true;
29563 
29564       /* format, example pmacsdd:
29565          xmm1, xmm2, xmm3/mem, xmm1
29566 
29567          For the integer multiply/add instructions be more restrictive and
29568          require operands[2] and operands[3] to be the memory operands.  */
29569       if (commutative)
29570         return (mem_mask == ((1 << 1) | (1 << 3)) || ((1 << 2) | (1 << 3)));
29571       else
29572         return (mem_mask == ((1 << 2) | (1 << 3)));
29573     }
29574 
29575   else if (num == 3 && num_memory == 1)
29576     {
29577       /* formats, example protb:
29578          xmm1, xmm2, xmm3/mem
29579          xmm1, xmm2/mem, xmm3 */
29580       if (uses_oc0)
29581         return ((mem_mask == (1 << 1)) || (mem_mask == (1 << 2)));
29582 
29583       /* format, example comeq:
29584          xmm1, xmm2, xmm3/mem */
29585       else
29586         return (mem_mask == (1 << 2));
29587     }
29588 
29589   else
29590     gcc_unreachable ();
29591 
29592   return false;
29593 }
29594 
29595 
29596 /* Fixup an SSE5 instruction that has 2 memory input references into a form the
29597    hardware will allow by using the destination register to load one of the
29598    memory operations.  Presently this is used by the multiply/add routines to
29599    allow 2 memory references.  */
29600 
29601 void
29602 ix86_expand_sse5_multiple_memory (rtx operands[],
29603                                   int num,
29604                                   enum machine_mode mode)
29605 {
29606   rtx op0 = operands[0];
29607   if (num != 4
29608       || memory_operand (op0, mode)
29609       || reg_mentioned_p (op0, operands[1])
29610       || reg_mentioned_p (op0, operands[2])
29611       || reg_mentioned_p (op0, operands[3]))
29612     gcc_unreachable ();
29613 
29614   /* For 2 memory operands, pick either operands[1] or operands[3] to move into
29615      the destination register.  */
29616   if (memory_operand (operands[1], mode))
29617     {
29618       emit_move_insn (op0, operands[1]);
29619       operands[1] = op0;
29620     }
29621   else if (memory_operand (operands[3], mode))
29622     {
29623       emit_move_insn (op0, operands[3]);
29624       operands[3] = op0;
29625     }
29626   else
29627     gcc_unreachable ();
29628 
29629   return;
29630 }
29631 
29632 
29633 /* Table of valid machine attributes.  */
29634 static const struct attribute_spec ix86_attribute_table[] =
29635 {
29636   /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
29637   /* Stdcall attribute says callee is responsible for popping arguments
29638      if they are not variable.  */
29639   { "stdcall",   0, 0, false, true,  true,  ix86_handle_cconv_attribute },
29640   /* Fastcall attribute says callee is responsible for popping arguments
29641      if they are not variable.  */
29642   { "fastcall",  0, 0, false, true,  true,  ix86_handle_cconv_attribute },
29643   /* Cdecl attribute says the callee is a normal C declaration */
29644   { "cdecl",     0, 0, false, true,  true,  ix86_handle_cconv_attribute },
29645   /* Regparm attribute specifies how many integer arguments are to be
29646      passed in registers.  */
29647   { "regparm",   1, 1, false, true,  true,  ix86_handle_cconv_attribute },
29648   /* Sseregparm attribute says we are using x86_64 calling conventions
29649      for FP arguments.  */
29650   { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute },
29651   /* force_align_arg_pointer says this function realigns the stack at entry.  */
29652   { (const char *)&ix86_force_align_arg_pointer_string, 0, 0,
29653     false, true,  true, ix86_handle_cconv_attribute },
29654 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
29655   { "dllimport", 0, 0, false, false, false, handle_dll_attribute },
29656   { "dllexport", 0, 0, false, false, false, handle_dll_attribute },
29657   { "shared",    0, 0, true,  false, false, ix86_handle_shared_attribute },
29658 #endif
29659   { "ms_struct", 0, 0, false, false,  false, ix86_handle_struct_attribute },
29660   { "gcc_struct", 0, 0, false, false,  false, ix86_handle_struct_attribute },
29661 #ifdef SUBTARGET_ATTRIBUTE_TABLE
29662   SUBTARGET_ATTRIBUTE_TABLE,
29663 #endif
29664   /* ms_abi and sysv_abi calling convention function attributes.  */
29665   { "ms_abi", 0, 0, false, true, true, ix86_handle_abi_attribute },
29666   { "sysv_abi", 0, 0, false, true, true, ix86_handle_abi_attribute },
29667   /* End element.  */
29668   { NULL,        0, 0, false, false, false, NULL }
29669 };
29670 
29671 /* Implement targetm.vectorize.builtin_vectorization_cost.  */
29672 static int
29673 x86_builtin_vectorization_cost (bool runtime_test)
29674 {
29675   /* If the branch of the runtime test is taken - i.e. - the vectorized
29676      version is skipped - this incurs a misprediction cost (because the
29677      vectorized version is expected to be the fall-through).  So we subtract
29678      the latency of a mispredicted branch from the costs that are incured
29679      when the vectorized version is executed.
29680 
29681      TODO: The values in individual target tables have to be tuned or new
29682      fields may be needed. For eg. on K8, the default branch path is the
29683      not-taken path. If the taken path is predicted correctly, the minimum
29684      penalty of going down the taken-path is 1 cycle. If the taken-path is
29685      not predicted correctly, then the minimum penalty is 10 cycles.  */
29686 
29687   if (runtime_test)
29688     {
29689       return (-(ix86_cost->cond_taken_branch_cost));
29690     }
29691   else
29692     return 0;
29693 }
29694 
29695 /* This function returns the calling abi specific va_list type node.
29696    It returns  the FNDECL specific va_list type.  */
29697 
29698 tree
29699 ix86_fn_abi_va_list (tree fndecl)
29700 {
29701   int abi;
29702 
29703   if (!TARGET_64BIT)
29704     return va_list_type_node;
29705   gcc_assert (fndecl != NULL_TREE);
29706   abi = ix86_function_abi ((const_tree) fndecl);
29707 
29708   if (abi == MS_ABI)
29709     return ms_va_list_type_node;
29710   else
29711     return sysv_va_list_type_node;
29712 }
29713 
29714 /* Returns the canonical va_list type specified by TYPE. If there
29715    is no valid TYPE provided, it return NULL_TREE.  */
29716 
29717 tree
29718 ix86_canonical_va_list_type (tree type)
29719 {
29720   tree wtype, htype;
29721 
29722   /* Resolve references and pointers to va_list type.  */
29723   if (INDIRECT_REF_P (type))
29724     type = TREE_TYPE (type);
29725   else if (POINTER_TYPE_P (type) && POINTER_TYPE_P (TREE_TYPE(type)))
29726     type = TREE_TYPE (type);
29727 
29728   if (TARGET_64BIT)
29729     {
29730       wtype = va_list_type_node;
29731           gcc_assert (wtype != NULL_TREE);
29732       htype = type;
29733       if (TREE_CODE (wtype) == ARRAY_TYPE)
29734         {
29735           /* If va_list is an array type, the argument may have decayed
29736              to a pointer type, e.g. by being passed to another function.
29737              In that case, unwrap both types so that we can compare the
29738              underlying records.  */
29739           if (TREE_CODE (htype) == ARRAY_TYPE
29740               || POINTER_TYPE_P (htype))
29741             {
29742               wtype = TREE_TYPE (wtype);
29743               htype = TREE_TYPE (htype);
29744             }
29745         }
29746       if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
29747         return va_list_type_node;
29748       wtype = sysv_va_list_type_node;
29749           gcc_assert (wtype != NULL_TREE);
29750       htype = type;
29751       if (TREE_CODE (wtype) == ARRAY_TYPE)
29752         {
29753           /* If va_list is an array type, the argument may have decayed
29754              to a pointer type, e.g. by being passed to another function.
29755              In that case, unwrap both types so that we can compare the
29756              underlying records.  */
29757           if (TREE_CODE (htype) == ARRAY_TYPE
29758               || POINTER_TYPE_P (htype))
29759             {
29760               wtype = TREE_TYPE (wtype);
29761               htype = TREE_TYPE (htype);
29762             }
29763         }
29764       if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
29765         return sysv_va_list_type_node;
29766       wtype = ms_va_list_type_node;
29767           gcc_assert (wtype != NULL_TREE);
29768       htype = type;
29769       if (TREE_CODE (wtype) == ARRAY_TYPE)
29770         {
29771           /* If va_list is an array type, the argument may have decayed
29772              to a pointer type, e.g. by being passed to another function.
29773              In that case, unwrap both types so that we can compare the
29774              underlying records.  */
29775           if (TREE_CODE (htype) == ARRAY_TYPE
29776               || POINTER_TYPE_P (htype))
29777             {
29778               wtype = TREE_TYPE (wtype);
29779               htype = TREE_TYPE (htype);
29780             }
29781         }
29782       if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
29783         return ms_va_list_type_node;
29784       return NULL_TREE;
29785     }
29786   return std_canonical_va_list_type (type);
29787 }
29788 
29789 /* Iterate through the target-specific builtin types for va_list.
29790     IDX denotes the iterator, *PTREE is set to the result type of
29791     the va_list builtin, and *PNAME to its internal type.
29792     Returns zero if there is no element for this index, otherwise
29793     IDX should be increased upon the next call.
29794     Note, do not iterate a base builtin's name like __builtin_va_list.
29795     Used from c_common_nodes_and_builtins.  */
29796 
29797 int
29798 ix86_enum_va_list (int idx, const char **pname, tree *ptree)
29799 {
29800   if (!TARGET_64BIT)
29801     return 0;
29802   switch (idx) {
29803   case 0:
29804     *ptree = ms_va_list_type_node;
29805     *pname = "__builtin_ms_va_list";
29806     break;
29807   case 1:
29808     *ptree = sysv_va_list_type_node;
29809     *pname = "__builtin_sysv_va_list";
29810     break;
29811   default:
29812     return 0;
29813   }
29814   return 1;
29815 }
29816 
29817 /* Initialize the GCC target structure.  */
29818 #undef TARGET_RETURN_IN_MEMORY
29819 #define TARGET_RETURN_IN_MEMORY ix86_return_in_memory
29820 
29821 #undef TARGET_ATTRIBUTE_TABLE
29822 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
29823 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
29824 #  undef TARGET_MERGE_DECL_ATTRIBUTES
29825 #  define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
29826 #endif
29827 
29828 #undef TARGET_COMP_TYPE_ATTRIBUTES
29829 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
29830 
29831 #undef TARGET_INIT_BUILTINS
29832 #define TARGET_INIT_BUILTINS ix86_init_builtins
29833 #undef TARGET_EXPAND_BUILTIN
29834 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
29835 
29836 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
29837 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
29838   ix86_builtin_vectorized_function
29839 
29840 #undef TARGET_VECTORIZE_BUILTIN_CONVERSION
29841 #define TARGET_VECTORIZE_BUILTIN_CONVERSION ix86_vectorize_builtin_conversion
29842 
29843 #undef TARGET_BUILTIN_RECIPROCAL
29844 #define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal
29845 
29846 #undef TARGET_ASM_FUNCTION_EPILOGUE
29847 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
29848 
29849 #undef TARGET_ENCODE_SECTION_INFO
29850 #ifndef SUBTARGET_ENCODE_SECTION_INFO
29851 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
29852 #else
29853 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
29854 #endif
29855 
29856 #undef TARGET_ASM_OPEN_PAREN
29857 #define TARGET_ASM_OPEN_PAREN ""
29858 #undef TARGET_ASM_CLOSE_PAREN
29859 #define TARGET_ASM_CLOSE_PAREN ""
29860 
29861 #undef TARGET_ASM_ALIGNED_HI_OP
29862 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
29863 #undef TARGET_ASM_ALIGNED_SI_OP
29864 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
29865 #ifdef ASM_QUAD
29866 #undef TARGET_ASM_ALIGNED_DI_OP
29867 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
29868 #endif
29869 
29870 #undef TARGET_ASM_UNALIGNED_HI_OP
29871 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
29872 #undef TARGET_ASM_UNALIGNED_SI_OP
29873 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
29874 #undef TARGET_ASM_UNALIGNED_DI_OP
29875 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
29876 
29877 #undef TARGET_SCHED_ADJUST_COST
29878 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
29879 #undef TARGET_SCHED_ISSUE_RATE
29880 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
29881 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
29882 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
29883   ia32_multipass_dfa_lookahead
29884 
29885 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
29886 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
29887 
29888 #ifdef HAVE_AS_TLS
29889 #undef TARGET_HAVE_TLS
29890 #define TARGET_HAVE_TLS true
29891 #endif
29892 #undef TARGET_CANNOT_FORCE_CONST_MEM
29893 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
29894 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
29895 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
29896 
29897 #undef TARGET_DELEGITIMIZE_ADDRESS
29898 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
29899 
29900 #undef TARGET_MS_BITFIELD_LAYOUT_P
29901 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
29902 
29903 #if TARGET_MACHO
29904 #undef TARGET_BINDS_LOCAL_P
29905 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
29906 #endif
29907 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
29908 #undef TARGET_BINDS_LOCAL_P
29909 #define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p
29910 #endif
29911 
29912 #undef TARGET_ASM_OUTPUT_MI_THUNK
29913 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
29914 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
29915 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
29916 
29917 #undef TARGET_ASM_FILE_START
29918 #define TARGET_ASM_FILE_START x86_file_start
29919 
29920 #undef TARGET_DEFAULT_TARGET_FLAGS
29921 #define TARGET_DEFAULT_TARGET_FLAGS     \
29922   (TARGET_DEFAULT                       \
29923    | TARGET_SUBTARGET_DEFAULT           \
29924    | TARGET_TLS_DIRECT_SEG_REFS_DEFAULT)
29925 
29926 #undef TARGET_HANDLE_OPTION
29927 #define TARGET_HANDLE_OPTION ix86_handle_option
29928 
29929 #undef TARGET_RTX_COSTS
29930 #define TARGET_RTX_COSTS ix86_rtx_costs
29931 #undef TARGET_ADDRESS_COST
29932 #define TARGET_ADDRESS_COST ix86_address_cost
29933 
29934 #undef TARGET_FIXED_CONDITION_CODE_REGS
29935 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
29936 #undef TARGET_CC_MODES_COMPATIBLE
29937 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
29938 
29939 #undef TARGET_MACHINE_DEPENDENT_REORG
29940 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
29941 
29942 #undef TARGET_BUILTIN_SETJMP_FRAME_VALUE
29943 #define TARGET_BUILTIN_SETJMP_FRAME_VALUE ix86_builtin_setjmp_frame_value
29944 
29945 #undef TARGET_BUILD_BUILTIN_VA_LIST
29946 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
29947 
29948 #undef TARGET_FN_ABI_VA_LIST
29949 #define TARGET_FN_ABI_VA_LIST ix86_fn_abi_va_list
29950 
29951 #undef TARGET_CANONICAL_VA_LIST_TYPE
29952 #define TARGET_CANONICAL_VA_LIST_TYPE ix86_canonical_va_list_type
29953 
29954 #undef TARGET_EXPAND_BUILTIN_VA_START
29955 #define TARGET_EXPAND_BUILTIN_VA_START ix86_va_start
29956 
29957 #undef TARGET_MD_ASM_CLOBBERS
29958 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
29959 
29960 #undef TARGET_PROMOTE_PROTOTYPES
29961 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
29962 #undef TARGET_STRUCT_VALUE_RTX
29963 #define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
29964 #undef TARGET_SETUP_INCOMING_VARARGS
29965 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
29966 #undef TARGET_MUST_PASS_IN_STACK
29967 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
29968 #undef TARGET_PASS_BY_REFERENCE
29969 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
29970 #undef TARGET_INTERNAL_ARG_POINTER
29971 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
29972 #undef TARGET_UPDATE_STACK_BOUNDARY
29973 #define TARGET_UPDATE_STACK_BOUNDARY ix86_update_stack_boundary
29974 #undef TARGET_GET_DRAP_RTX
29975 #define TARGET_GET_DRAP_RTX ix86_get_drap_rtx
29976 #undef TARGET_DWARF_HANDLE_FRAME_UNSPEC
29977 #define TARGET_DWARF_HANDLE_FRAME_UNSPEC ix86_dwarf_handle_frame_unspec
29978 #undef TARGET_STRICT_ARGUMENT_NAMING
29979 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
29980 
29981 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
29982 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
29983 
29984 #undef TARGET_SCALAR_MODE_SUPPORTED_P
29985 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
29986 
29987 #undef TARGET_VECTOR_MODE_SUPPORTED_P
29988 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
29989 
29990 #undef TARGET_C_MODE_FOR_SUFFIX
29991 #define TARGET_C_MODE_FOR_SUFFIX ix86_c_mode_for_suffix
29992 
29993 #ifdef HAVE_AS_TLS
29994 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
29995 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
29996 #endif
29997 
29998 #ifdef SUBTARGET_INSERT_ATTRIBUTES
29999 #undef TARGET_INSERT_ATTRIBUTES
30000 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
30001 #endif
30002 
30003 #undef TARGET_MANGLE_TYPE
30004 #define TARGET_MANGLE_TYPE ix86_mangle_type
30005 
30006 #undef TARGET_STACK_PROTECT_FAIL
30007 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
30008 
30009 #undef TARGET_FUNCTION_VALUE
30010 #define TARGET_FUNCTION_VALUE ix86_function_value
30011 
30012 #undef TARGET_SECONDARY_RELOAD
30013 #define TARGET_SECONDARY_RELOAD ix86_secondary_reload
30014 
30015 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
30016 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST x86_builtin_vectorization_cost
30017 
30018 #undef TARGET_SET_CURRENT_FUNCTION
30019 #define TARGET_SET_CURRENT_FUNCTION ix86_set_current_function
30020 
30021 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
30022 #define TARGET_OPTION_VALID_ATTRIBUTE_P ix86_valid_target_attribute_p
30023 
30024 #undef TARGET_OPTION_SAVE
30025 #define TARGET_OPTION_SAVE ix86_function_specific_save
30026 
30027 #undef TARGET_OPTION_RESTORE
30028 #define TARGET_OPTION_RESTORE ix86_function_specific_restore
30029 
30030 #undef TARGET_OPTION_PRINT
30031 #define TARGET_OPTION_PRINT ix86_function_specific_print
30032 
30033 #undef TARGET_OPTION_CAN_INLINE_P
30034 #define TARGET_OPTION_CAN_INLINE_P ix86_can_inline_p
30035 
30036 #undef TARGET_EXPAND_TO_RTL_HOOK
30037 #define TARGET_EXPAND_TO_RTL_HOOK ix86_maybe_switch_abi
30038 
30039 struct gcc_target targetm = TARGET_INITIALIZER;
30040 
30041 #include "gt-i386.h"