Ticket #4204: intel-atom-optimization.diff

File intel-atom-optimization.diff, 124.5 KB (added by jprostko, 15 years ago)

Optional patch to allow Intel Atom optimizations in GCC 4.4.1

  • gcc/doc/invoke.texi

     
    574574-mno-wide-multiply  -mrtd  -malign-double @gol
    575575-mpreferred-stack-boundary=@var{num}
    576576-mincoming-stack-boundary=@var{num}
    577 -mcld -mcx16 -msahf -mrecip @gol
     577-mcld -mcx16 -msahf -mmovbe -mrecip @gol
    578578-mmmx  -msse  -msse2 -msse3 -mssse3 -msse4.1 -msse4.2 -msse4 -mavx @gol
    579579-maes -mpclmul @gol
    580580-msse4a -m3dnow -mpopcnt -mabm -msse5 @gol
     
    584584-m96bit-long-double  -mregparm=@var{num}  -msseregparm @gol
    585585-mveclibabi=@var{type} -mpc32 -mpc64 -mpc80 -mstackrealign @gol
    586586-momit-leaf-frame-pointer  -mno-red-zone -mno-tls-direct-seg-refs @gol
    587 -mcmodel=@var{code-model} @gol
     587-mcmodel=@var{code-model} -mabi=@var{name} @gol
    588588-m32  -m64 -mlarge-data-threshold=@var{num} @gol
    589589-mfused-madd -mno-fused-madd -msse2avx}
    590590
     
    1095910959@item core2
    1096010960Intel Core2 CPU with 64-bit extensions, MMX, SSE, SSE2, SSE3 and SSSE3
    1096110961instruction set support.
     10962@item atom
     10963Intel Atom CPU with 64-bit extensions, MMX, SSE, SSE2, SSE3 and SSSE3
     10964instruction set support.
    1096210965@item k6
    1096310966AMD K6 CPU with MMX instruction set support.
    1096410967@item k6-2, k6-3
     
    1135811361In 64-bit mode, SAHF instruction is used to optimize @code{fmod}, @code{drem}
    1135911362or @code{remainder} built-in functions: see @ref{Other Builtins} for details.
    1136011363
     11364@item -mmovbe
     11365@opindex mmovbe
     11366This option will enable GCC to use movbe instruction to implement
     11367@code{__builtin_bswap32} and @code{__builtin_bswap64}.
     11368
    1136111369@item -mrecip
    1136211370@opindex mrecip
    1136311371This option will enable GCC to use RCPSS and RSQRTSS instructions (and their
     
    1139411402@option{-funsafe-math-optimizations} have to be enabled. A SVML or ACML ABI
    1139511403compatible library will have to be specified at link time.
    1139611404
     11405@item -mabi=@var{name}
     11406@opindex mabi
     11407Generate code for the specified calling convention.  Permissible values
     11408are: @samp{sysv} for the ABI used on GNU/Linux and other systems and
     11409@samp{ms} for the Microsoft ABI.  The default is to use the Microsoft
     11410ABI when targeting Windows.  On all other systems, the default is the
     11411SYSV ABI.  You can control this behavior for a specific function by
     11412using the function attribute @samp{ms_abi}/@samp{sysv_abi}.
     11413@xref{Function Attributes}.
     11414
    1139711415@item -mpush-args
    1139811416@itemx -mno-push-args
    1139911417@opindex mpush-args
  • gcc/doc/md.texi

     
    75047504recognize complicated bypasses, e.g.@: when the consumer is only an address
    75057505of insn @samp{store} (not a stored value).
    75067506
     7507If there are more one bypass with the same output and input insns, the
     7508chosen bypass is the first bypass with a guard in description whose
     7509guard function returns nonzero.  If there is no such bypass, then
     7510bypass without the guard function is chosen.
     7511
    75077512@findex exclusion_set
    75087513@findex presence_set
    75097514@findex final_presence_set
  • gcc/genautomata.c

     
    11/* Pipeline hazard description translator.
    2    Copyright (C) 2000, 2001, 2002, 2003, 2004, 2005, 2007, 2008
     2   Copyright (C) 2000, 2001, 2002, 2003, 2004, 2005, 2007, 2008, 2009
    33   Free Software Foundation, Inc.
    44
    55   Written by Vladimir Makarov <vmakarov@redhat.com>
     
    2222
    2323/* References:
    2424
    25    1. Detecting pipeline structural hazards quickly. T. Proebsting,
     25   1. The finite state automaton based pipeline hazard recognizer and
     26      instruction scheduler in GCC.  V. Makarov.  Proceedings of GCC
     27      summit, 2003.
     28
     29   2. Detecting pipeline structural hazards quickly. T. Proebsting,
    2630      C. Fraser. Proceedings of ACM SIGPLAN-SIGACT Symposium on
    2731      Principles of Programming Languages, pages 280--286, 1994.
    2832
    2933      This article is a good start point to understand usage of finite
    3034      state automata for pipeline hazard recognizers.  But I'd
    31       recommend the 2nd article for more deep understanding.
     35      recommend the 1st and 3rd article for more deep understanding.
    3236
    33    2. Efficient Instruction Scheduling Using Finite State Automata:
     37   3. Efficient Instruction Scheduling Using Finite State Automata:
    3438      V. Bala and N. Rubin, Proceedings of MICRO-28.  This is the best
    3539      article about usage of finite state automata for pipeline hazard
    3640      recognizers.
    3741
    38    The current implementation is different from the 2nd article in the
    39    following:
     42   The current implementation is described in the 1st article and it
     43   is different from the 3rd article in the following:
    4044
    4145   1. New operator `|' (alternative) is permitted in functional unit
    4246      reservation which can be treated deterministically and
     
    463467     insn.  */
    464468  int insn_num;
    465469  /* The following field value is list of bypasses in which given insn
    466      is output insn.  */
     470     is output insn.  Bypasses with the same input insn stay one after
     471     another in the list in the same order as their occurrences in the
     472     description but the bypass without a guard stays always the last
     473     in a row of bypasses with the same input insn.  */
    467474  struct bypass_decl *bypass_list;
    468475
    469476  /* The following fields are defined by automaton generator.  */
     
    23672374}
    23682375
    23692376
    2370 /* The function searches for bypass with given IN_INSN_RESERV in given
    2371    BYPASS_LIST.  */
    2372 static struct bypass_decl *
    2373 find_bypass (struct bypass_decl *bypass_list,
    2374          struct insn_reserv_decl *in_insn_reserv)
     2377/* The function inserts BYPASS in the list of bypasses of the
     2378   corresponding output insn.  The order of bypasses in the list is
     2379   decribed in a comment for member `bypass_list' (see above).  If
     2380   there is already the same bypass in the list the function reports
     2381   this and does nothing.  */
     2382static void
     2383insert_bypass (struct bypass_decl *bypass)
    23752384{
    2376   struct bypass_decl *bypass;
    2377 
    2378   for (bypass = bypass_list; bypass != NULL; bypass = bypass->next)
    2379     if (bypass->in_insn_reserv == in_insn_reserv)
    2380       break;
    2381   return bypass;
     2385  struct bypass_decl *curr, *last;
     2386  struct insn_reserv_decl *out_insn_reserv = bypass->out_insn_reserv;
     2387  struct insn_reserv_decl *in_insn_reserv = bypass->in_insn_reserv;
     2388 
     2389  for (curr = out_insn_reserv->bypass_list, last = NULL;
     2390       curr != NULL;
     2391       last = curr, curr = curr->next)
     2392    if (curr->in_insn_reserv == in_insn_reserv)
     2393      {
     2394    if ((bypass->bypass_guard_name != NULL
     2395         && curr->bypass_guard_name != NULL
     2396         && ! strcmp (bypass->bypass_guard_name, curr->bypass_guard_name))
     2397        || bypass->bypass_guard_name == curr->bypass_guard_name)
     2398      {
     2399        if (bypass->bypass_guard_name == NULL)
     2400          {
     2401        if (!w_flag)
     2402          error ("the same bypass `%s - %s' is already defined",
     2403             bypass->out_insn_name, bypass->in_insn_name);
     2404        else
     2405          warning (0, "the same bypass `%s - %s' is already defined",
     2406               bypass->out_insn_name, bypass->in_insn_name);
     2407          }
     2408        else if (!w_flag)
     2409          error ("the same bypass `%s - %s' (guard %s) is already defined",
     2410             bypass->out_insn_name, bypass->in_insn_name,
     2411             bypass->bypass_guard_name);
     2412        else
     2413          warning
     2414        (0, "the same bypass `%s - %s' (guard %s) is already defined",
     2415         bypass->out_insn_name, bypass->in_insn_name,
     2416         bypass->bypass_guard_name);
     2417        return;
     2418      }
     2419    if (curr->bypass_guard_name == NULL)
     2420      break;
     2421    if (curr->next == NULL || curr->next->in_insn_reserv != in_insn_reserv)
     2422      {
     2423        last = curr;
     2424        break;
     2425      }
     2426     
     2427      }
     2428  if (last == NULL)
     2429    {
     2430      bypass->next = out_insn_reserv->bypass_list;
     2431      out_insn_reserv->bypass_list = bypass;
     2432    }
     2433  else
     2434    {
     2435      bypass->next = last->next;
     2436      last->next = bypass;
     2437    }
    23822438}
    23832439
    23842440/* The function processes pipeline description declarations, checks
     
    23912447  decl_t decl_in_table;
    23922448  decl_t out_insn_reserv;
    23932449  decl_t in_insn_reserv;
    2394   struct bypass_decl *bypass;
    23952450  int automaton_presence;
    23962451  int i;
    23972452
     
    25142569        = DECL_INSN_RESERV (out_insn_reserv);
    25152570          DECL_BYPASS (decl)->in_insn_reserv
    25162571        = DECL_INSN_RESERV (in_insn_reserv);
    2517           bypass
    2518         = find_bypass (DECL_INSN_RESERV (out_insn_reserv)->bypass_list,
    2519                    DECL_BYPASS (decl)->in_insn_reserv);
    2520           if (bypass != NULL)
    2521         {
    2522           if (DECL_BYPASS (decl)->latency == bypass->latency)
    2523             {
    2524               if (!w_flag)
    2525             error
    2526               ("the same bypass `%s - %s' is already defined",
    2527                DECL_BYPASS (decl)->out_insn_name,
    2528                DECL_BYPASS (decl)->in_insn_name);
    2529               else
    2530             warning
    2531               (0, "the same bypass `%s - %s' is already defined",
    2532                DECL_BYPASS (decl)->out_insn_name,
    2533                DECL_BYPASS (decl)->in_insn_name);
    2534             }
    2535           else
    2536             error ("bypass `%s - %s' is already defined",
    2537                DECL_BYPASS (decl)->out_insn_name,
    2538                DECL_BYPASS (decl)->in_insn_name);
    2539         }
    2540           else
    2541         {
    2542           DECL_BYPASS (decl)->next
    2543             = DECL_INSN_RESERV (out_insn_reserv)->bypass_list;
    2544           DECL_INSN_RESERV (out_insn_reserv)->bypass_list
    2545             = DECL_BYPASS (decl);
    2546         }
     2572          insert_bypass (DECL_BYPASS (decl));
    25472573        }
    25482574    }
    25492575    }
     
    81598185                (advance_cycle_insn_decl)->insn_num));
    81608186        fprintf (output_file, "        case %d:\n",
    81618187             bypass->in_insn_reserv->insn_num);
    8162         if (bypass->bypass_guard_name == NULL)
    8163           fprintf (output_file, "          return %d;\n",
    8164                bypass->latency);
    8165         else
     8188        for (;;)
    81668189          {
    8167         fprintf (output_file,
    8168              "          if (%s (%s, %s))\n",
    8169              bypass->bypass_guard_name, INSN_PARAMETER_NAME,
    8170              INSN2_PARAMETER_NAME);
    8171         fprintf (output_file,
    8172              "            return %d;\n          break;\n",
    8173              bypass->latency);
     8190        if (bypass->bypass_guard_name == NULL)
     8191          {
     8192            gcc_assert (bypass->next == NULL
     8193                || (bypass->in_insn_reserv
     8194                    != bypass->next->in_insn_reserv));
     8195            fprintf (output_file, "          return %d;\n",
     8196                 bypass->latency);
     8197          }
     8198        else
     8199          {
     8200            fprintf (output_file,
     8201                 "          if (%s (%s, %s))\n",
     8202                 bypass->bypass_guard_name, INSN_PARAMETER_NAME,
     8203                 INSN2_PARAMETER_NAME);
     8204            fprintf (output_file, "            return %d;\n",
     8205                 bypass->latency);
     8206          }
     8207        if (bypass->next == NULL
     8208            || bypass->in_insn_reserv != bypass->next->in_insn_reserv)
     8209          break;
     8210        bypass = bypass->next;
    81748211          }
     8212        if (bypass->bypass_guard_name != NULL)
     8213          fprintf (output_file, "          break;\n");
    81758214      }
    81768215    fputs ("        }\n      break;\n", output_file);
    81778216      }
  • gcc/testsuite/gcc.target/i386/movbe-1.c

     
     1/* { dg-do compile } */
     2/* { dg-options "-O2 -mmovbe" } */
     3
     4extern int x;
     5
     6void
     7foo (int i)
     8{
     9  x = __builtin_bswap32 (i);
     10}
     11
     12int
     13bar ()
     14{
     15  return __builtin_bswap32 (x);
     16}
     17
     18/* { dg-final { scan-assembler-times "movbe\[ \t\]" 2 } } */
  • gcc/testsuite/gcc.target/i386/movbe-2.c

     
     1/* { dg-do compile } */
     2/* { dg-options "-O2 -mmovbe" } */
     3
     4extern long long x;
     5
     6void
     7foo (long long i)
     8{
     9  x = __builtin_bswap64 (i);
     10}
     11
     12long long
     13bar ()
     14{
     15  return __builtin_bswap64 (x);
     16}
     17
     18/* { dg-final { scan-assembler-times "movbe\[ \t\]" 4 { target ilp32 } } } */
     19/* { dg-final { scan-assembler-times "movbe\[ \t\]" 2 { target lp64 } } } */
  • gcc/testsuite/gcc.target/x86_64/abi/callabi/vaarg-4a.c

     
     1/* Test for cross x86_64<->w64 abi va_list calls.  */
     2/* { dg-do run { target i?86-*-linux* x86_64-*-linux* } } */
     3/* { dg-options "-O2 -mabi=ms -std=gnu99 -fno-builtin" } */
     4/* { dg-additional-sources "vaarg-4b.c" } */
     5
     6extern __SIZE_TYPE__ __attribute__ ((sysv_abi)) strlen (const char *);
     7extern int __attribute__ ((sysv_abi)) sprintf (char *,const char *, ...);
     8extern void __attribute__ ((sysv_abi)) abort (void);
     9
     10extern void do_cpy (char *, ...);
     11
     12int __attribute__ ((sysv_abi))
     13main ()
     14{
     15  char s[256];
     16
     17  do_cpy (s, "1","2","3","4", "5", "6", "7", "");
     18
     19  if (s[0] != '1' || s[1] !='2' || s[2] != '3' || s[3] != '4'
     20      || s[4] != '5' || s[5] != '6' || s[6] != '7' || s[7] != 0)
     21    abort ();
     22
     23  return 0;
     24}
  • gcc/testsuite/gcc.target/x86_64/abi/callabi/vaarg-4b.c

     
     1/* Test for cross x86_64<->w64 abi va_list calls.  */
     2/* { dg-options "-O2 -mabi=ms -std=gnu99 -fno-builtin" } */
     3
     4#include <stdarg.h>
     5
     6extern __SIZE_TYPE__ __attribute__ ((sysv_abi)) strlen (const char *);
     7extern int __attribute__ ((sysv_abi)) sprintf (char *, const char *, ...);
     8
     9static void
     10vdo_cpy (char *s, va_list argp)
     11{
     12  __SIZE_TYPE__ len;
     13  char *r = s;
     14  char *e;
     15  *r = 0;
     16  for (;;) {
     17    e = va_arg (argp, char *);
     18    if (*e == 0) break;
     19    sprintf (r,"%s", e);
     20    r += strlen (r);
     21  }
     22}
     23
     24void
     25do_cpy (char *s, ...)
     26{
     27  va_list argp;
     28  va_start (argp, s);
     29  vdo_cpy (s, argp);
     30  va_end (argp);
     31}
  • gcc/testsuite/gcc.target/x86_64/abi/callabi/vaarg-5a.c

     
     1/* Test for cross x86_64<->w64 abi va_list calls.  */
     2/* { dg-do run { target i?86-*-linux* x86_64-*-linux* } } */
     3/* { dg-options "-O2 -mabi=ms -std=gnu99 -fno-builtin" } */
     4/* { dg-additional-sources "vaarg-5b.c" } */
     5
     6extern void __attribute__ ((sysv_abi)) abort (void);
     7extern int fct2 (int, ...);
     8
     9#define SZ_ARGS 1ll,2ll,3ll,4ll,5ll,6ll,7ll,0ll
     10
     11int __attribute__ ((sysv_abi))
     12main()
     13{
     14  if (fct2 (-1, SZ_ARGS) != 0)
     15    abort ();
     16  return 0;
     17}
  • gcc/testsuite/gcc.target/x86_64/abi/callabi/vaarg-5b.c

     
     1/* Test for cross x86_64<->w64 abi va_list calls.  */
     2/* { dg-options "-O2 -mabi=ms -std=gnu99 -fno-builtin" } */
     3
     4#include <stdarg.h>
     5
     6#define SZ_ARGS 1ll,2ll,3ll,4ll,5ll,6ll,7ll,0ll
     7
     8static int __attribute__ ((sysv_abi))
     9fct1 (va_list argp, ...)
     10{
     11  long long p1,p2;
     12  int ret = 1;
     13  __builtin_sysv_va_list argp_2;
     14
     15  __builtin_sysv_va_start (argp_2, argp);
     16  do {
     17    p1 = va_arg (argp_2, long long);
     18    p2 = va_arg (argp, long long);
     19    if (p1 != p2)
     20      ret = 0;
     21  } while (ret && p1 != 0);
     22  __builtin_sysv_va_end (argp_2);
     23
     24  return ret;
     25}
     26
     27int
     28fct2 (int dummy, ...)
     29{
     30  va_list argp;
     31  int ret = dummy;
     32
     33  va_start (argp, dummy);
     34  ret += fct1 (argp, SZ_ARGS);
     35  va_end (argp);
     36  return ret;
     37}
  • gcc/testsuite/gcc.target/x86_64/abi/callabi/func-indirect-2a.c

     
     1/* Test for cross x86_64<->w64 abi standard calls via variable.  */
     2/* { dg-do run { target i?86-*-linux* x86_64-*-linux* } } */
     3/* { dg-options "-O2 -mabi=ms -std=gnu99 -ffast-math -fno-builtin" } */
     4/* { dg-additional-sources "func-indirect-2b.c" } */
     5
     6extern void __attribute__ ((sysv_abi)) abort (void);
     7typedef int (*func)(void *, char *, char *, short, long long);
     8extern func get_callback (void);
     9
     10int __attribute__ ((sysv_abi))
     11main ()
     12{
     13  func callme = get_callback ();
     14  if (callme (0, 0, 0, 0x1234, 0x1234567890abcdefLL))
     15    abort ();
     16  return 0;
     17}
  • gcc/testsuite/gcc.target/x86_64/abi/callabi/func-2a.c

     
     1/* Test for cross x86_64<->w64 abi standard calls.  */
     2/* { dg-do run { target i?86-*-linux* x86_64-*-linux* } } */
     3/* { dg-options "-O2 -mabi=ms -std=gnu99 -ffast-math -fno-builtin" } */
     4/* { dg-additional-sources "func-2b.c" } */
     5
     6extern void __attribute__ ((sysv_abi)) abort (void);
     7long double func_cross (long double, double, float, long, int, char);
     8
     9long double __attribute__ ((sysv_abi))
     10func_native (long double a, double b, float c, long d, int e, char f)
     11{
     12  long double ret;
     13  ret = a + (long double) b + (long double) c;
     14  ret *= (long double) (d + (long) e);
     15  if (f>0)
     16    ret += func_native (a,b,c,d,e,-f);
     17  return ret;
     18}
     19
     20int __attribute__ ((sysv_abi))
     21main ()
     22{
     23  if (func_cross (1.0,2.0,3.0,1,2,3)
     24      != func_native (1.0,2.0,3.0,1,2,3))
     25    abort ();
     26  return 0;
     27}
  • gcc/testsuite/gcc.target/x86_64/abi/callabi/func-indirect-2b.c

     
     1/* Test for cross x86_64<->w64 abi standard calls via variable.  */
     2/* { dg-options "-O2 -mabi=ms -std=gnu99 -ffast-math -fno-builtin" } */
     3
     4typedef int (*func)(void *, char *, char *, short, long long);
     5
     6static int
     7callback (void *ptr, char *string1, char *string2, short number,
     8      long long rand)
     9{
     10  if (ptr != 0
     11      || string1 != 0
     12      || string2 != 0
     13      || number != 0x1234
     14      || rand != 0x1234567890abcdefLL)
     15    return 1;
     16  else
     17    return 0;
     18}
     19
     20func
     21get_callback (void)
     22{
     23  return callback;
     24}
  • gcc/testsuite/gcc.target/x86_64/abi/callabi/func-2b.c

     
     1/* Test for cross x86_64<->w64 abi standard calls.  */
     2/* { dg-options "-mabi=ms -std=gnu99 -ffast-math -fno-builtin" } */
     3
     4long double func_cross (long double a, double b, float c, long d, int e,
     5            char f)
     6{
     7  long double ret;
     8  ret = a + (long double) b + (long double) c;
     9  ret *= (long double) (d + (long) e);
     10  if (f>0)
     11    ret += func_cross (a,b,c,d,e,-f);
     12  return ret;
     13}
  • gcc/testsuite/ChangeLog.ix86

     
     12009-05-21  H.J. Lu  <hongjiu.lu@intel.com>
     2
     3    Backport from mainline:
     4    2009-05-21  H.J. Lu  <hongjiu.lu@intel.com>
     5
     6    * gcc.target/i386/movbe-1.c: New.
     7    * gcc.target/i386/movbe-2.c: Likewise.
     8
     92009-03-27  H.J. Lu  <hongjiu.lu@intel.com>
     10
     11    Backport from mainline:
     12    2009-03-27  H.J. Lu  <hongjiu.lu@intel.com>
     13
     14    PR target/39472
     15    * gcc.target/x86_64/abi/callabi/func-2a.c: New.
     16    * gcc.target/x86_64/abi/callabi/func-2b.c: Likewise.
     17    * gcc.target/x86_64/abi/callabi/func-indirect-2a.c: Likewise.
     18    * gcc.target/x86_64/abi/callabi/func-indirect-2b.c: Likewise.
     19    * gcc.target/x86_64/abi/callabi/vaarg-4a.c: Likewise.
     20    * gcc.target/x86_64/abi/callabi/vaarg-4b.c: Likewise.
     21    * gcc.target/x86_64/abi/callabi/vaarg-5a.c: Likewise.
     22    * gcc.target/x86_64/abi/callabi/vaarg-5b.c: Likewise.
  • gcc/rtl.def

     
    10881088   guard for the bypass.  The function will get the two insns as
    10891089   parameters.  If the function returns zero the bypass will be
    10901090   ignored for this case.  Additional guard is necessary to recognize
    1091    complicated bypasses, e.g. when consumer is load address.  */
     1091   complicated bypasses, e.g. when consumer is load address.  If there
     1092   are more one bypass with the same output and input insns, the
     1093   chosen bypass is the first bypass with a guard in description whose
     1094   guard function returns nonzero.  If there is no such bypass, then
     1095   bypass without the guard function is chosen.  */
    10921096DEF_RTL_EXPR(DEFINE_BYPASS, "define_bypass", "issS", RTX_EXTRA)
    10931097
    10941098/* (define_automaton string) describes names of automata generated and
  • gcc/ChangeLog.ix86

     
     12009-05-21  H.J. Lu  <hongjiu.lu@intel.com>
     2
     3    Backport from mainline:
     4    2009-05-21  H.J. Lu  <hongjiu.lu@intel.com>
     5            Uros Bizjak  <ubizjak@gmail.com>
     6
     7    * config/i386/cpuid.h (bit_MOVBE): New.
     8
     9    * config/i386/driver-i386.c (host_detect_local_cpu): Check movbe.
     10
     11    * config/i386/i386.c (OPTION_MASK_ISA_MOVBE_SET): New.
     12    (OPTION_MASK_ISA_MOVBE_UNSET): Likewise.
     13    (ix86_handle_option): Handle OPT_mmovbe.
     14    (ix86_target_string): Add -mmovbe.
     15    (pta_flags): Add PTA_MOVBE.
     16    (processor_alias_table): Add PTA_MOVBE to "atom".
     17    (override_options): Handle PTA_MOVBE.
     18
     19    * config/i386/i386.h (TARGET_MOVBE): New.
     20
     21    * config/i386/i386.md (bswapsi2): Check TARGET_MOVBE.
     22    (*bswapsi_movbe): New.
     23    (*bswapdi_movbe): Likewise.
     24    (bswapdi2): Renamed to ...
     25    (*bswapdi_1): This.
     26    (bswapdi2): New expander.
     27
     28    * config/i386/i386.opt (mmovbe): New.
     29
     30    * doc/invoke.texi: Document -mmovbe.
     31
     322009-05-20  H.J. Lu  <hongjiu.lu@intel.com>
     33
     34    Backport from mainline:
     35    2009-05-20  H.J. Lu  <hongjiu.lu@intel.com>
     36
     37    * config/i386/driver-i386.c (host_detect_local_cpu): Check
     38    extended family and model for Intel processors.  Support Intel
     39    Atom.
     40
     412009-04-20  H.J. Lu  <hongjiu.lu@intel.com>
     42
     43    Backport from mainline:
     44    2009-04-20  Joey Ye  <joey.ye@intel.com>
     45            Xuepeng Guo <xuepeng.guo@intel.com>
     46            H.J. Lu  <hongjiu.lu@intel.com>
     47
     48    * config/i386/atom.md: Add bypasses with ix86_dep_by_shift_count.
     49
     50    * config/i386/i386.c (LEA_SEARCH_THRESHOLD): New macro.
     51    (IX86_LEA_PRIORITY): Likewise.
     52    (distance_non_agu_define): New function.
     53    (distance_agu_use): Likewise.
     54    (ix86_lea_for_add_ok): Likewise.
     55    (ix86_dep_by_shift_count): Likewise.
     56
     57    * config/i386/i386.md: Call ix86_lea_for_add_ok to decide we
     58    should split for LEA.
     59
     60    * config/i386/i386-protos.h (ix86_lea_for_add_ok): Declare new
     61    function.
     62    (ix86_dep_by_shift_count): Likewise.
     63
     642009-04-07  H.J. Lu  <hongjiu.lu@intel.com>
     65
     66    Backport from mainline:
     67    2009-04-07  H.J. Lu  <hongjiu.lu@intel.com>
     68
     69    * doc/invoke.texi: Document Atom support.
     70
     712009-04-06  H.J. Lu  <hongjiu.lu@intel.com>
     72
     73    * config/i386/i386.md: Revert 2 accidental checkins.
     74
     752009-04-06  H.J. Lu  <hongjiu.lu@intel.com>
     76
     77    Backport from mainline:
     78    2009-04-06  Joey Ye  <joey.ye@intel.com>
     79            Xuepeng Guo <xuepeng.guo@intel.com>
     80            H.J. Lu  <hongjiu.lu@intel.com>
     81
     82    Atom pipeline model, tuning and insn selection.
     83    * config.gcc (atom): Add atom config options and target.
     84
     85    * config/i386/atom.md: New.
     86
     87    * config/i386/i386.c (atom_cost): New cost.
     88    (m_ATOM): New macro flag.
     89    (initial_ix86_tune_features): Set m_ATOM.
     90    (x86_accumulate_outgoing_args): Likewise.
     91    (x86_arch_always_fancy_math_387): Likewise.
     92    (processor_target): Add Atom cost.
     93    (cpu_names): Add Atom cpu name.
     94    (override_options): Set Atom ISA.
     95    (ix86_issue_rate): New case PROCESSOR_ATOM.
     96    (ix86_adjust_cost): Likewise.
     97
     98    * config/i386/i386.h (TARGET_ATOM): New target macro.
     99    (ix86_tune_indices): Add X86_TUNE_OPT_AGU.
     100    (TARGET_OPT_AGU): New target option.
     101    (target_cpu_default): Add TARGET_CPU_DEFAULT_atom.
     102    (processor_type): Add PROCESSOR_ATOM.
     103
     104    * config/i386/i386.md (cpu): Add new value "atom".
     105    (use_carry, movu): New attr.
     106    (atom.md): Include atom.md.
     107    (adddi3_carry_rex64): Set attr "use_carry".
     108    (addqi3_carry): Likewise.
     109    (addhi3_carry): Likewise.
     110    (addsi3_carry): Likewise.
     111    (*addsi3_carry_zext): Likewise.
     112    (subdi3_carry_rex64): Likewise.
     113    (subqi3_carry): Likewise.
     114    (subhi3_carry): Likewise.
     115    (subsi3_carry): Likewise.
     116    (x86_movdicc_0_m1_rex64): Likewise.
     117    (*x86_movdicc_0_m1_se): Likewise.
     118    (x86_movsicc_0_m1): Likewise.
     119    (*x86_movsicc_0_m1_se): Likewise.
     120    (*adddi_1_rex64): Emit add insn as much as possible.
     121    (*addsi_1): Likewise.
     122    (return_internal): Set atom_unit.
     123    (return_internal_long): Likewise.
     124    (return_pop_internal): Likewise.
     125    (*rcpsf2_sse): Set atom_sse_attr attr.
     126    (*qrt<mode>2_sse): Likewise.
     127
     1282009-04-02  H.J. Lu  <hongjiu.lu@intel.com>
     129
     130    Backport from mainline:
     131    2009-04-02  H.J. Lu  <hongjiu.lu@intel.com>
     132
     133    * config/i386/i386.c (ix86_abi): Move initialization to ...
     134    (override_options): Here.
     135
     1362009-03-29  H.J. Lu  <hongjiu.lu@intel.com>
     137
     138    Backport from mainline:
     139    2009-03-29  H.J. Lu  <hongjiu.lu@intel.com>
     140
     141    * config/i386/i386-protos.h (ix86_agi_dependent): New.
     142
     143    * config/i386/i386.c (ix86_agi_dependent): Rewrite.
     144    (ix86_adjust_cost): Updated.
     145
     1462009-03-27  H.J. Lu  <hongjiu.lu@intel.com>
     147
     148    Backport from mainline:
     149    2009-03-27  H.J. Lu  <hongjiu.lu@intel.com>
     150
     151    PR target/39472
     152    * config/i386/i386.c (ix86_abi): New.
     153    (override_options): Handle -mabi=.
     154    (ix86_function_arg_regno_p): Replace DEFAULT_ABI with
     155    ix86_abi.
     156    (ix86_call_abi_override): Likewise.
     157    (init_cumulative_args): Likewise.
     158    (function_arg_advance): Likewise.
     159    (function_arg_64): Likewise.
     160    (function_arg): Likewise.
     161    (ix86_pass_by_reference): Likewise.
     162    (ix86_function_value_regno_p): Likewise.
     163    (ix86_build_builtin_va_list_abi): Likewise.
     164    (setup_incoming_varargs_64): Likewise.
     165    (is_va_list_char_pointer): Likewise.
     166    (ix86_init_machine_status): Likewise.
     167    (ix86_reg_parm_stack_space): Use enum calling_abi on
     168    call_abi.
     169    (ix86_function_type_abi): Return enum calling_abi.  Rewrite
     170    for 64bit.  Replace DEFAULT_ABI with ix86_abi.
     171    (ix86_function_abi): Make it static and return enum
     172    calling_abi.
     173    (ix86_cfun_abi): Return enum calling_abi.  Replace DEFAULT_ABI
     174    with ix86_abi.
     175    (ix86_fn_abi_va_list): Updated.
     176
     177    * config/i386/i386.h (ix86_abi): New.
     178    (STACK_BOUNDARY): Replace DEFAULT_ABI with ix86_abi.
     179    (CONDITIONAL_REGISTER_USAGE): Likewise.
     180    (CUMULATIVE_ARGS): Change call_abi type to enum calling_abi.
     181    (machine_function): Likewise.
     182
     183    * config/i386/i386.md (untyped_call): Replace DEFAULT_ABI
     184    with ix86_abi.
     185    * config/i386/cygming.h (TARGET_64BIT_MS_ABI): Likewise.
     186    (STACK_BOUNDARY): Likewise.
     187    * config/i386/mingw32.h (EXTRA_OS_CPP_BUILTINS): Likewise.
     188
     189    * config/i386/i386.opt (mabi=): New.
     190
     191    * config/i386/i386-protos.h (ix86_cfun_abi): Changed to
     192    return enum calling_abi.
     193    (ix86_function_type_abi): Likewise.
     194    (ix86_function_abi): Removed.
     195
     1962009-03-27  H.J. Lu  <hongjiu.lu@intel.com>
     197
     198    Backport from mainline:
     199    2009-03-27  Vladimir Makarov  <vmakarov@redhat.com>
     200
     201    * genautomata.c: Add a new year to the copyright.  Add a new
     202    reference.
     203    (struct insn_reserv_decl): Add comments for member bypass_list.
     204    (find_bypass): Remove.
     205    (insert_bypass): New.
     206    (process_decls): Use insert_bypass.
     207    (output_internal_insn_latency_func): Output all bypasses with the
     208    same input insn in one switch case.
     209
     210    * rtl.def (define_bypass): Describe bypass choice.
     211    * doc/md.texi (define_bypass): Ditto.
  • gcc/config.gcc

     
    10881088            tmake_file="${tmake_file} i386/t-linux64"
    10891089            need_64bit_hwint=yes
    10901090            case X"${with_cpu}" in
    1091             Xgeneric|Xcore2|Xnocona|Xx86-64|Xamdfam10|Xbarcelona|Xk8|Xopteron|Xathlon64|Xathlon-fx)
     1091            Xgeneric|Xatom|Xcore2|Xnocona|Xx86-64|Xamdfam10|Xbarcelona|Xk8|Xopteron|Xathlon64|Xathlon-fx)
    10921092                ;;
    10931093            X)
    10941094                if test x$with_cpu_64 = x; then
     
    10971097                ;;
    10981098            *)
    10991099                echo "Unsupported CPU used in --with-cpu=$with_cpu, supported values:" 1>&2
    1100                 echo "generic core2 nocona x86-64 amdfam10 barcelona k8 opteron athlon64 athlon-fx" 1>&2
     1100                echo "generic atom core2 nocona x86-64 amdfam10 barcelona k8 opteron athlon64 athlon-fx" 1>&2
    11011101                exit 1
    11021102                ;;
    11031103            esac
     
    12021202        # libgcc/configure.ac instead.
    12031203        need_64bit_hwint=yes
    12041204        case X"${with_cpu}" in
    1205         Xgeneric|Xcore2|Xnocona|Xx86-64|Xamdfam10|Xbarcelona|Xk8|Xopteron|Xathlon64|Xathlon-fx)
     1205        Xgeneric|Xatom|Xcore2|Xnocona|Xx86-64|Xamdfam10|Xbarcelona|Xk8|Xopteron|Xathlon64|Xathlon-fx)
    12061206            ;;
    12071207        X)
    12081208            if test x$with_cpu_64 = x; then
     
    12111211            ;;
    12121212        *)
    12131213            echo "Unsupported CPU used in --with-cpu=$with_cpu, supported values:" 1>&2
    1214             echo "generic core2 nocona x86-64 amdfam10 barcelona k8 opteron athlon64 athlon-fx" 1>&2
     1214            echo "generic atom core2 nocona x86-64 amdfam10 barcelona k8 opteron athlon64 athlon-fx" 1>&2
    12151215            exit 1
    12161216            ;;
    12171217        esac
     
    28052805                esac
    28062806                # OK
    28072807                ;;
    2808             "" | amdfam10 | barcelona | k8 | opteron | athlon64 | athlon-fx | nocona | core2 | generic)
     2808            "" | amdfam10 | barcelona | k8 | opteron | athlon64 | athlon-fx | nocona | core2 | atom | generic)
    28092809                # OK
    28102810                ;;
    28112811            *)
  • gcc/config/i386/i386.h

     
    5959#define TARGET_ABM  OPTION_ISA_ABM
    6060#define TARGET_POPCNT   OPTION_ISA_POPCNT
    6161#define TARGET_SAHF OPTION_ISA_SAHF
     62#define TARGET_MOVBE    OPTION_ISA_MOVBE
    6263#define TARGET_AES  OPTION_ISA_AES
    6364#define TARGET_PCLMUL   OPTION_ISA_PCLMUL
    6465#define TARGET_CMPXCHG16B OPTION_ISA_CX16
     
    236237#define TARGET_GENERIC64 (ix86_tune == PROCESSOR_GENERIC64)
    237238#define TARGET_GENERIC (TARGET_GENERIC32 || TARGET_GENERIC64)
    238239#define TARGET_AMDFAM10 (ix86_tune == PROCESSOR_AMDFAM10)
     240#define TARGET_ATOM (ix86_tune == PROCESSOR_ATOM)
    239241
    240242/* Feature tests against the various tunings.  */
    241243enum ix86_tune_indices {
     
    300302  X86_TUNE_USE_VECTOR_FP_CONVERTS,
    301303  X86_TUNE_USE_VECTOR_CONVERTS,
    302304  X86_TUNE_FUSE_CMP_AND_BRANCH,
     305  X86_TUNE_OPT_AGU,
    303306
    304307  X86_TUNE_LAST
    305308};
     
    387390    ix86_tune_features[X86_TUNE_USE_VECTOR_CONVERTS]
    388391#define TARGET_FUSE_CMP_AND_BRANCH \
    389392    ix86_tune_features[X86_TUNE_FUSE_CMP_AND_BRANCH]
     393#define TARGET_OPT_AGU ix86_tune_features[X86_TUNE_OPT_AGU]
    390394
    391395/* Feature tests against the various architecture variations.  */
    392396enum ix86_arch_indices {
     
    470474  MS_ABI = 1
    471475};
    472476
    473 /* The default abi form used by target.  */
     477/* The abi used by target.  */
     478extern enum calling_abi ix86_abi;
     479
     480/* The default abi used by target.  */
    474481#define DEFAULT_ABI SYSV_ABI
    475482
    476483/* Subtargets may reset this to 1 in order to enable 96-bit long double
     
    569576  TARGET_CPU_DEFAULT_prescott,
    570577  TARGET_CPU_DEFAULT_nocona,
    571578  TARGET_CPU_DEFAULT_core2,
     579  TARGET_CPU_DEFAULT_atom,
    572580
    573581  TARGET_CPU_DEFAULT_geode,
    574582  TARGET_CPU_DEFAULT_k6,
     
    658666
    659667/* Boundary (in *bits*) on which stack pointer should be aligned.  */
    660668#define STACK_BOUNDARY \
    661  (TARGET_64BIT && DEFAULT_ABI == MS_ABI ? 128 : BITS_PER_WORD)
     669 (TARGET_64BIT && ix86_abi == MS_ABI ? 128 : BITS_PER_WORD)
    662670
    663671/* Stack boundary of the main function guaranteed by OS.  */
    664672#define MAIN_STACK_BOUNDARY (TARGET_64BIT ? 128 : 32)
     
    15841592  int maybe_vaarg;      /* true for calls to possibly vardic fncts.  */
    15851593  int float_in_sse;     /* 1 if in 32-bit mode SFmode (2 for DFmode) should
    15861594                   be passed in SSE registers.  Otherwise 0.  */
    1587   int call_abi;         /* Set to SYSV_ABI for sysv abi. Otherwise
     1595  enum calling_abi call_abi;    /* Set to SYSV_ABI for sysv abi. Otherwise
    15881596                   MS_ABI for ms abi.  */
    15891597} CUMULATIVE_ARGS;
    15901598
     
    22302238  PROCESSOR_GENERIC32,
    22312239  PROCESSOR_GENERIC64,
    22322240  PROCESSOR_AMDFAM10,
     2241  PROCESSOR_ATOM,
    22332242  PROCESSOR_max
    22342243};
    22352244
     
    24032412  int tls_descriptor_call_expanded_p;
    24042413  /* This value is used for amd64 targets and specifies the current abi
    24052414     to be used. MS_ABI means ms abi. Otherwise SYSV_ABI means sysv abi.  */
    2406   int call_abi;
     2415   enum calling_abi call_abi;
    24072416};
    24082417
    24092418#define ix86_stack_locals (cfun->machine->stack_locals)
  • gcc/config/i386/cygming.h

     
    3434#endif
    3535
    3636#undef TARGET_64BIT_MS_ABI
    37 #define TARGET_64BIT_MS_ABI (!cfun ? DEFAULT_ABI == MS_ABI : TARGET_64BIT && cfun->machine->call_abi == MS_ABI)
     37#define TARGET_64BIT_MS_ABI (!cfun ? ix86_abi == MS_ABI : TARGET_64BIT && cfun->machine->call_abi == MS_ABI)
    3838
    3939#undef DEFAULT_ABI
    4040#define DEFAULT_ABI (TARGET_64BIT ? MS_ABI : SYSV_ABI)
     
    203203#define CHECK_STACK_LIMIT 4000
    204204
    205205#undef STACK_BOUNDARY
    206 #define STACK_BOUNDARY  (DEFAULT_ABI == MS_ABI ? 128 : BITS_PER_WORD)
     206#define STACK_BOUNDARY  (ix86_abi == MS_ABI ? 128 : BITS_PER_WORD)
    207207
    208208/* By default, target has a 80387, uses IEEE compatible arithmetic,
    209209   returns float values in the 387 and needs stack probes.
  • gcc/config/i386/i386.md

     
    316316
    317317
    318318
    319319;; Processor type.
    320 (define_attr "cpu" "none,pentium,pentiumpro,geode,k6,athlon,k8,core2,
     320(define_attr "cpu" "none,pentium,pentiumpro,geode,k6,athlon,k8,core2,atom,
    321321            generic64,amdfam10"
    322322  (const (symbol_ref "ix86_schedule")))
  • gcc/config/i386/atom.md

     
    @@ -612,6 +612,12 @@
     (define_attr "i387_cw" "trunc,floor,ceil,mask_pm,uninitialized,any"
       (const_string "any"))
     
    +;; Define attribute to classify add/sub insns that consumes carry flag (CF)
    +(define_attr "use_carry" "0,1" (const_string "0"))
    +
    +;; Define attribute to indicate unaligned ssemov insns
    +(define_attr "movu" "0,1" (const_string "0"))
    +
     ;; Describe a user's asm statement.
     (define_asm_attributes
       [(set_attr "length" "128")
    @@ -727,6 +733,7 @@
     (include "k6.md")
     (include "athlon.md")
     (include "geode.md")
    +(include "atom.md")
     
     
    
     ;; Operand and operator predicates and constraints
    @@ -5790,6 +5797,7 @@
       "TARGET_64BIT && ix86_binary_operator_ok (PLUS, DImode, operands)"
       "adc{q}\t{%2, %0|%0, %2}"
       [(set_attr "type" "alu")
    +   (set_attr "use_carry" "1")
        (set_attr "pent_pair" "pu")
        (set_attr "mode" "DI")])
     
    @@ -5864,6 +5872,7 @@
       "ix86_binary_operator_ok (PLUS, QImode, operands)"
       "adc{b}\t{%2, %0|%0, %2}"
       [(set_attr "type" "alu")
    +   (set_attr "use_carry" "1")
        (set_attr "pent_pair" "pu")
        (set_attr "mode" "QI")])
     
    @@ -5876,6 +5885,7 @@
       "ix86_binary_operator_ok (PLUS, HImode, operands)"
       "adc{w}\t{%2, %0|%0, %2}"
       [(set_attr "type" "alu")
    +   (set_attr "use_carry" "1")
        (set_attr "pent_pair" "pu")
        (set_attr "mode" "HI")])
     
    @@ -5888,6 +5898,7 @@
       "ix86_binary_operator_ok (PLUS, SImode, operands)"
       "adc{l}\t{%2, %0|%0, %2}"
       [(set_attr "type" "alu")
    +   (set_attr "use_carry" "1")
        (set_attr "pent_pair" "pu")
        (set_attr "mode" "SI")])
     
    @@ -5901,6 +5912,7 @@
       "TARGET_64BIT && ix86_binary_operator_ok (PLUS, SImode, operands)"
       "adc{l}\t{%2, %k0|%k0, %2}"
       [(set_attr "type" "alu")
    +   (set_attr "use_carry" "1")
        (set_attr "pent_pair" "pu")
        (set_attr "mode" "SI")])
     
    @@ -6130,9 +6142,9 @@
        (set_attr "mode" "SI")])
     
     (define_insn "*adddi_1_rex64"
    -  [(set (match_operand:DI 0 "nonimmediate_operand" "=r,rm,r")
    -	(plus:DI (match_operand:DI 1 "nonimmediate_operand" "%0,0,r")
    -		 (match_operand:DI 2 "x86_64_general_operand" "rme,re,le")))
    +  [(set (match_operand:DI 0 "nonimmediate_operand" "=r,rm,r,r")
    +	(plus:DI (match_operand:DI 1 "nonimmediate_operand" "%0,0,r,r")
    +		 (match_operand:DI 2 "x86_64_general_operand" "rme,re,0,le")))
        (clobber (reg:CC FLAGS_REG))]
       "TARGET_64BIT && ix86_binary_operator_ok (PLUS, DImode, operands)"
     {
    @@ -6153,6 +6165,10 @@
     	}
     
         default:
    +      /* Use add as much as possible to replace lea for AGU optimization. */
    +      if (which_alternative == 2 && TARGET_OPT_AGU)
    +        return "add{q}\t{%1, %0|%0, %1}";
    +        
           gcc_assert (rtx_equal_p (operands[0], operands[1]));
     
           /* Make things pretty and `subl $4,%eax' rather than `addl $-4, %eax'.
    @@ -6171,8 +6187,11 @@
         }
     }
       [(set (attr "type")
    -     (cond [(eq_attr "alternative" "2")
    +     (cond [(and (eq_attr "alternative" "2") 
    +                 (eq (symbol_ref "TARGET_OPT_AGU") (const_int 0)))
     	      (const_string "lea")
    +            (eq_attr "alternative" "3")
    +              (const_string "lea")
     	    ; Current assemblers are broken and do not allow @GOTOFF in
     	    ; ought but a memory context.
     	    (match_operand:DI 2 "pic_symbolic_operand" "")
    @@ -6189,8 +6208,8 @@
     	(plus:DI (match_operand:DI 1 "register_operand" "")
     		 (match_operand:DI 2 "x86_64_nonmemory_operand" "")))
        (clobber (reg:CC FLAGS_REG))]
    -  "TARGET_64BIT && reload_completed
    -   && true_regnum (operands[0]) != true_regnum (operands[1])"
    +  "TARGET_64BIT && reload_completed 
    +   && ix86_lea_for_add_ok (PLUS, insn, operands)"
       [(set (match_dup 0)
     	(plus:DI (match_dup 1)
     		 (match_dup 2)))]
    @@ -6394,9 +6413,9 @@
     
     
     (define_insn "*addsi_1"
    -  [(set (match_operand:SI 0 "nonimmediate_operand" "=r,rm,r")
    -	(plus:SI (match_operand:SI 1 "nonimmediate_operand" "%0,0,r")
    -		 (match_operand:SI 2 "general_operand" "g,ri,li")))
    +  [(set (match_operand:SI 0 "nonimmediate_operand" "=r,rm,r,r")
    +	(plus:SI (match_operand:SI 1 "nonimmediate_operand" "%0,0,r,r")
    +		 (match_operand:SI 2 "general_operand" "g,ri,0,li")))
        (clobber (reg:CC FLAGS_REG))]
       "ix86_binary_operator_ok (PLUS, SImode, operands)"
     {
    @@ -6417,6 +6436,10 @@
     	}
     
         default:
    +      /* Use add as much as possible to replace lea for AGU optimization. */
    +      if (which_alternative == 2 && TARGET_OPT_AGU)
    +        return "add{l}\t{%1, %0|%0, %1}";
    +
           gcc_assert (rtx_equal_p (operands[0], operands[1]));
     
           /* Make things pretty and `subl $4,%eax' rather than `addl $-4, %eax'.
    @@ -6433,7 +6456,10 @@
         }
     }
       [(set (attr "type")
    -     (cond [(eq_attr "alternative" "2")
    +     (cond [(and (eq_attr "alternative" "2") 
    +                 (eq (symbol_ref "TARGET_OPT_AGU") (const_int 0)))
    +               (const_string "lea")
    +            (eq_attr "alternative" "3")
     	      (const_string "lea")
     	    ; Current assemblers are broken and do not allow @GOTOFF in
     	    ; ought but a memory context.
    @@ -6451,8 +6477,7 @@
     	(plus (match_operand 1 "register_operand" "")
                   (match_operand 2 "nonmemory_operand" "")))
        (clobber (reg:CC FLAGS_REG))]
    -  "reload_completed
    -   && true_regnum (operands[0]) != true_regnum (operands[1])"
    +  "reload_completed && ix86_lea_for_add_ok (PLUS, insn, operands)" 
       [(const_int 0)]
     {
       rtx pat;
    @@ -7553,6 +7578,7 @@
       "TARGET_64BIT && ix86_binary_operator_ok (MINUS, DImode, operands)"
       "sbb{q}\t{%2, %0|%0, %2}"
       [(set_attr "type" "alu")
    +   (set_attr "use_carry" "1")
        (set_attr "pent_pair" "pu")
        (set_attr "mode" "DI")])
     
    @@ -7601,6 +7627,7 @@
       "ix86_binary_operator_ok (MINUS, QImode, operands)"
       "sbb{b}\t{%2, %0|%0, %2}"
       [(set_attr "type" "alu")
    +   (set_attr "use_carry" "1")
        (set_attr "pent_pair" "pu")
        (set_attr "mode" "QI")])
     
    @@ -7613,6 +7640,7 @@
       "ix86_binary_operator_ok (MINUS, HImode, operands)"
       "sbb{w}\t{%2, %0|%0, %2}"
       [(set_attr "type" "alu")
    +   (set_attr "use_carry" "1")
        (set_attr "pent_pair" "pu")
        (set_attr "mode" "HI")])
     
    @@ -7625,6 +7653,7 @@
       "ix86_binary_operator_ok (MINUS, SImode, operands)"
       "sbb{l}\t{%2, %0|%0, %2}"
       [(set_attr "type" "alu")
    +   (set_attr "use_carry" "1")
        (set_attr "pent_pair" "pu")
        (set_attr "mode" "SI")])
     
    @@ -15155,7 +15184,7 @@
     		     ? gen_rtx_REG (XCmode, FIRST_FLOAT_REG) : NULL),
     		    operands[0], const0_rtx,
     		    GEN_INT ((TARGET_64BIT
    -			      ? (DEFAULT_ABI == SYSV_ABI
    +			      ? (ix86_abi == SYSV_ABI
     				 ? X86_64_SSE_REGPARM_MAX
     				 : X64_SSE_REGPARM_MAX)
     			      : X86_32_SSE_REGPARM_MAX)
    @@ -15235,6 +15264,7 @@
       "reload_completed"
       "ret"
       [(set_attr "length" "1")
    +   (set_attr "atom_unit" "jeu")
        (set_attr "length_immediate" "0")
        (set_attr "modrm" "0")])
     
    @@ -15247,6 +15277,7 @@
       "reload_completed"
       "rep\;ret"
       [(set_attr "length" "1")
    +   (set_attr "atom_unit" "jeu")
        (set_attr "length_immediate" "0")
        (set_attr "prefix_rep" "1")
        (set_attr "modrm" "0")])
    @@ -15257,6 +15288,7 @@
       "reload_completed"
       "ret\t%0"
       [(set_attr "length" "3")
    +   (set_attr "atom_unit" "jeu")
        (set_attr "length_immediate" "2")
        (set_attr "modrm" "0")])
     
    @@ -15610,7 +15642,7 @@
     	(bswap:SI (match_operand:SI 1 "register_operand" "")))]
       ""
     {
    -  if (!TARGET_BSWAP)
    +  if (!(TARGET_BSWAP || TARGET_MOVBE))
         {
           rtx x = operands[0];
     
    @@ -15622,6 +15654,21 @@
         }
     })
     
    +(define_insn "*bswapsi_movbe"
    +  [(set (match_operand:SI 0 "nonimmediate_operand" "=r,r,m")
    +	(bswap:SI (match_operand:SI 1 "nonimmediate_operand" "0,m,r")))]
    +  "TARGET_MOVBE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
    +  "@
    +    bswap\t%0
    +    movbe\t{%1, %0|%0, %1}
    +    movbe\t{%1, %0|%0, %1}"
    +  [(set_attr "type" "*,imov,imov")
    +   (set_attr "modrm" "*,1,1")
    +   (set_attr "prefix_0f" "1")
    +   (set_attr "prefix_extra" "*,1,1")
    +   (set_attr "length" "2,*,*")
    +   (set_attr "mode" "SI")])
    +
     (define_insn "*bswapsi_1"
       [(set (match_operand:SI 0 "register_operand" "=r")
     	(bswap:SI (match_operand:SI 1 "register_operand" "0")))]
    @@ -15650,7 +15697,29 @@
       [(set_attr "length" "4")
        (set_attr "mode" "HI")])
     
    -(define_insn "bswapdi2"
    +(define_expand "bswapdi2"
    +  [(set (match_operand:DI 0 "register_operand" "")
    +	(bswap:DI (match_operand:DI 1 "register_operand" "")))]
    +  "TARGET_64BIT"
    +  "")
    +
    +(define_insn "*bswapdi_movbe"
    +  [(set (match_operand:DI 0 "nonimmediate_operand" "=r,r,m")
    +	(bswap:DI (match_operand:DI 1 "nonimmediate_operand" "0,m,r")))]
    +  "TARGET_64BIT && TARGET_MOVBE
    +   && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
    +  "@
    +    bswap\t%0
    +    movbe\t{%1, %0|%0, %1}
    +    movbe\t{%1, %0|%0, %1}"
    +  [(set_attr "type" "*,imov,imov")
    +   (set_attr "modrm" "*,1,1")
    +   (set_attr "prefix_0f" "1")
    +   (set_attr "prefix_extra" "*,1,1")
    +   (set_attr "length" "3,*,*")
    +   (set_attr "mode" "DI")])
    +
    +(define_insn "*bswapdi_1"
       [(set (match_operand:DI 0 "register_operand" "=r")
     	(bswap:DI (match_operand:DI 1 "register_operand" "0")))]
       "TARGET_64BIT"
    @@ -16378,6 +16447,7 @@
       "TARGET_SSE_MATH"
       "%vrcpss\t{%1, %d0|%d0, %1}"
       [(set_attr "type" "sse")
    +   (set_attr "atom_sse_attr" "rcp")
        (set_attr "prefix" "maybe_vex")
        (set_attr "mode" "SF")])
     
    @@ -16729,6 +16799,7 @@
       "TARGET_SSE_MATH"
       "%vrsqrtss\t{%1, %d0|%d0, %1}"
       [(set_attr "type" "sse")
    +   (set_attr "atom_sse_attr" "rcp")
        (set_attr "prefix" "maybe_vex")
        (set_attr "mode" "SF")])
     
    @@ -16749,6 +16820,7 @@
       "SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH"
       "%vsqrts<ssemodefsuffix>\t{%1, %d0|%d0, %1}"
       [(set_attr "type" "sse")
    +   (set_attr "atom_sse_attr" "sqrt")
        (set_attr "prefix" "maybe_vex")
        (set_attr "mode" "<MODE>")
        (set_attr "athlon_decode" "*")
    @@ -19802,6 +19874,7 @@
       ; Since we don't have the proper number of operands for an alu insn,
       ; fill in all the blanks.
       [(set_attr "type" "alu")
    +   (set_attr "use_carry" "1")
        (set_attr "pent_pair" "pu")
        (set_attr "memory" "none")
        (set_attr "imm_disp" "false")
    @@ -19817,6 +19890,7 @@
       ""
       "sbb{q}\t%0, %0"
       [(set_attr "type" "alu")
    +   (set_attr "use_carry" "1")
        (set_attr "pent_pair" "pu")
        (set_attr "memory" "none")
        (set_attr "imm_disp" "false")
    @@ -19860,6 +19934,7 @@
       ; Since we don't have the proper number of operands for an alu insn,
       ; fill in all the blanks.
       [(set_attr "type" "alu")
    +   (set_attr "use_carry" "1")
        (set_attr "pent_pair" "pu")
        (set_attr "memory" "none")
        (set_attr "imm_disp" "false")
    @@ -19875,6 +19950,7 @@
       ""
       "sbb{l}\t%0, %0"
       [(set_attr "type" "alu")
    +   (set_attr "use_carry" "1")
        (set_attr "pent_pair" "pu")
        (set_attr "memory" "none")
        (set_attr "imm_disp" "false")
    @@ -20207,7 +20283,8 @@
         }
     }
       [(set (attr "type")
    -	(cond [(eq_attr "alternative" "0")
    +	(cond [(and (eq_attr "alternative" "0") 
    +	            (eq (symbol_ref "TARGET_OPT_AGU") (const_int 0)))
     		 (const_string "alu")
     	       (match_operand:SI 2 "const0_operand" "")
     		 (const_string "imov")
    @@ -20250,7 +20327,8 @@
         }
     }
       [(set (attr "type")
    -	(cond [(eq_attr "alternative" "0")
    +	(cond [(and (eq_attr "alternative" "0")
    +	            (eq (symbol_ref "TARGET_OPT_AGU") (const_int 0)))
     		 (const_string "alu")
     	       (match_operand:DI 2 "const0_operand" "")
     		 (const_string "imov")
    @@ -21734,6 +21812,7 @@
       return patterns[locality];
     }
       [(set_attr "type" "sse")
    +   (set_attr "atom_sse_attr" "prefetch")
        (set_attr "memory" "none")])
     
     (define_insn "*prefetch_sse_rex"
    @@ -21752,6 +21831,7 @@
       return patterns[locality];
     }
       [(set_attr "type" "sse")
    +   (set_attr "atom_sse_attr" "prefetch")
        (set_attr "memory" "none")])
     
     (define_insn "*prefetch_3dnow"
     
     1;; Atom Scheduling
     2;; Copyright (C) 2009 Free Software Foundation, Inc.
     3;;
     4;; This file is part of GCC.
     5;;
     6;; GCC is free software; you can redistribute it and/or modify
     7;; it under the terms of the GNU General Public License as published by
     8;; the Free Software Foundation; either version 3, or (at your option)
     9;; any later version.
     10;;
     11;; GCC is distributed in the hope that it will be useful,
     12;; but WITHOUT ANY WARRANTY; without even the implied warranty of
     13;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
     14;; GNU General Public License for more details.
     15;;
     16;; You should have received a copy of the GNU General Public License
     17;; along with GCC; see the file COPYING3.  If not see
     18;; <http://www.gnu.org/licenses/>.
     19;;
     20;; Atom is an in-order core with two integer pipelines.
     21
     22
     23(define_attr "atom_unit" "sishuf,simul,jeu,complex,other"
     24  (const_string "other"))
     25
     26(define_attr "atom_sse_attr" "rcp,movdup,lfence,fence,prefetch,sqrt,mxcsr,other"
     27  (const_string "other"))
     28
     29(define_automaton "atom")
     30
     31;;  Atom has two ports: port 0 and port 1 connecting to all execution units
     32(define_cpu_unit "atom-port-0,atom-port-1" "atom")
     33
     34;;  EU: Execution Unit
     35;;  Atom EUs are connected by port 0 or port 1.
     36
     37(define_cpu_unit "atom-eu-0, atom-eu-1,
     38                  atom-imul-1, atom-imul-2, atom-imul-3, atom-imul-4"
     39                  "atom")
     40
     41;; Some EUs have duplicated copied and can be accessed via either
     42;; port 0 or port 1
     43;; (define_reservation "atom-port-either" "(atom-port-0 | atom-port-1)")
     44
     45;;; Some instructions is dual-pipe execution, need both ports
     46;;; Complex multi-op macro-instructoins need both ports and all EUs
     47(define_reservation "atom-port-dual" "(atom-port-0 + atom-port-1)")
     48(define_reservation "atom-all-eu" "(atom-eu-0 + atom-eu-1 +
     49                                    atom-imul-1 + atom-imul-2 + atom-imul-3 +
     50                                    atom-imul-4)")
     51
     52;;; Most of simple instructions have 1 cycle latency. Some of them
     53;;; issue in port 0, some in port 0 and some in either port.
     54(define_reservation "atom-simple-0" "(atom-port-0 + atom-eu-0)")
     55(define_reservation "atom-simple-1" "(atom-port-1 + atom-eu-1)")
     56(define_reservation "atom-simple-either" "(atom-simple-0 | atom-simple-1)")
     57
     58;;; Some insn issues in port 0 with 3 cycle latency and 1 cycle tput
     59(define_reservation "atom-eu-0-3-1" "(atom-port-0 + atom-eu-0, nothing*2)")
     60
     61;;; fmul insn can have 4 or 5 cycles latency
     62(define_reservation "atom-fmul-5c" "(atom-port-0 + atom-eu-0), nothing*4")
     63(define_reservation "atom-fmul-4c" "(atom-port-0 + atom-eu-0), nothing*3")
     64
     65;;; fadd can has 5 cycles latency depends on instruction forms
     66(define_reservation "atom-fadd-5c" "(atom-port-1 + atom-eu-1), nothing*5")
     67
     68;;; imul insn has 5 cycles latency
     69(define_reservation "atom-imul-32"
     70                    "atom-imul-1, atom-imul-2, atom-imul-3, atom-imul-4,
     71                     atom-port-0")
     72;;; imul instruction excludes other non-FP instructions.
     73(exclusion_set "atom-eu-0, atom-eu-1"
     74               "atom-imul-1, atom-imul-2, atom-imul-3, atom-imul-4")
     75
     76;;; dual-execution instructions can have 1,2,4,5 cycles latency depends on
     77;;; instruction forms
     78(define_reservation "atom-dual-1c" "(atom-port-dual + atom-eu-0 + atom-eu-1)")
     79(define_reservation "atom-dual-2c"
     80                    "(atom-port-dual + atom-eu-0 + atom-eu-1, nothing)")
     81(define_reservation "atom-dual-5c"
     82                    "(atom-port-dual + atom-eu-0 + atom-eu-1, nothing*4)")
     83
     84;;; Complex macro-instruction has variants of latency, and uses both ports.
     85(define_reservation "atom-complex" "(atom-port-dual + atom-all-eu)")
     86
     87(define_insn_reservation  "atom_other" 9
     88  (and (eq_attr "cpu" "atom")
     89       (and (eq_attr "type" "other")
     90            (eq_attr "atom_unit" "!jeu")))
     91  "atom-complex, atom-all-eu*8")
     92
     93;; return has type "other" with atom_unit "jeu"
     94(define_insn_reservation  "atom_other_2" 1
     95  (and (eq_attr "cpu" "atom")
     96       (and (eq_attr "type" "other")
     97            (eq_attr "atom_unit" "jeu")))
     98  "atom-dual-1c")
     99
     100(define_insn_reservation  "atom_multi" 9
     101  (and (eq_attr "cpu" "atom")
     102       (eq_attr "type" "multi"))
     103  "atom-complex, atom-all-eu*8")
     104
     105;; Normal alu insns without carry
     106(define_insn_reservation  "atom_alu" 1
     107  (and (eq_attr "cpu" "atom")
     108       (and (eq_attr "type" "alu")
     109            (and (eq_attr "memory" "none")
     110                 (eq_attr "use_carry" "0"))))
     111  "atom-simple-either")
     112
     113;; Normal alu insns without carry
     114(define_insn_reservation  "atom_alu_mem" 1
     115  (and (eq_attr "cpu" "atom")
     116       (and (eq_attr "type" "alu")
     117            (and (eq_attr "memory" "!none")
     118                 (eq_attr "use_carry" "0"))))
     119  "atom-simple-either")
     120
     121;; Alu insn consuming CF, such as add/sbb
     122(define_insn_reservation  "atom_alu_carry" 1
     123  (and (eq_attr "cpu" "atom")
     124       (and (eq_attr "type" "alu")
     125            (and (eq_attr "memory" "none")
     126                 (eq_attr "use_carry" "1"))))
     127  "atom-simple-either")
     128
     129;; Alu insn consuming CF, such as add/sbb
     130(define_insn_reservation  "atom_alu_carry_mem" 1
     131  (and (eq_attr "cpu" "atom")
     132       (and (eq_attr "type" "alu")
     133            (and (eq_attr "memory" "!none")
     134                (eq_attr "use_carry" "1"))))
     135  "atom-simple-either")
     136
     137(define_insn_reservation  "atom_alu1" 1
     138  (and (eq_attr "cpu" "atom")
     139       (and (eq_attr "type" "alu1")
     140            (eq_attr "memory" "none")))
     141  "atom-simple-either")
     142
     143(define_insn_reservation  "atom_alu1_mem" 1
     144  (and (eq_attr "cpu" "atom")
     145       (and (eq_attr "type" "alu1")
     146            (eq_attr "memory" "!none")))
     147  "atom-simple-either")
     148
     149(define_insn_reservation  "atom_negnot" 1
     150  (and (eq_attr "cpu" "atom")
     151       (and (eq_attr "type" "negnot")
     152            (eq_attr "memory" "none")))
     153  "atom-simple-either")
     154
     155(define_insn_reservation  "atom_negnot_mem" 1
     156  (and (eq_attr "cpu" "atom")
     157       (and (eq_attr "type" "negnot")
     158            (eq_attr "memory" "!none")))
     159  "atom-simple-either")
     160
     161(define_insn_reservation  "atom_imov" 1
     162  (and (eq_attr "cpu" "atom")
     163       (and (eq_attr "type" "imov")
     164            (eq_attr "memory" "none")))
     165  "atom-simple-either")
     166
     167(define_insn_reservation  "atom_imov_mem" 1
     168  (and (eq_attr "cpu" "atom")
     169       (and (eq_attr "type" "imov")
     170            (eq_attr "memory" "!none")))
     171  "atom-simple-either")
     172
     173;; 16<-16, 32<-32
     174(define_insn_reservation  "atom_imovx" 1
     175  (and (eq_attr "cpu" "atom")
     176       (and (eq_attr "type" "imovx")
     177            (and (eq_attr "memory" "none")
     178                 (ior (and (match_operand:HI 0 "register_operand")
     179                           (match_operand:HI 1 "general_operand"))
     180                      (and (match_operand:SI 0 "register_operand")
     181                           (match_operand:SI 1 "general_operand"))))))
     182  "atom-simple-either")
     183
     184;; 16<-16, 32<-32, mem
     185(define_insn_reservation  "atom_imovx_mem" 1
     186  (and (eq_attr "cpu" "atom")
     187       (and (eq_attr "type" "imovx")
     188            (and (eq_attr "memory" "!none")
     189                 (ior (and (match_operand:HI 0 "register_operand")
     190                           (match_operand:HI 1 "general_operand"))
     191                      (and (match_operand:SI 0 "register_operand")
     192                           (match_operand:SI 1 "general_operand"))))))
     193  "atom-simple-either")
     194
     195;; 32<-16, 32<-8, 64<-16, 64<-8, 64<-32, 8<-8
     196(define_insn_reservation  "atom_imovx_2" 1
     197  (and (eq_attr "cpu" "atom")
     198       (and (eq_attr "type" "imovx")
     199            (and (eq_attr "memory" "none")
     200                 (ior (match_operand:QI 0 "register_operand")
     201                      (ior (and (match_operand:SI 0 "register_operand")
     202                                (not (match_operand:SI 1 "general_operand")))
     203                           (match_operand:DI 0 "register_operand"))))))
     204  "atom-simple-0")
     205
     206;; 32<-16, 32<-8, 64<-16, 64<-8, 64<-32, 8<-8, mem
     207(define_insn_reservation  "atom_imovx_2_mem" 1
     208  (and (eq_attr "cpu" "atom")
     209       (and (eq_attr "type" "imovx")
     210            (and (eq_attr "memory" "!none")
     211                 (ior (match_operand:QI 0 "register_operand")
     212                      (ior (and (match_operand:SI 0 "register_operand")
     213                                (not (match_operand:SI 1 "general_operand")))
     214                           (match_operand:DI 0 "register_operand"))))))
     215  "atom-simple-0")
     216
     217;; 16<-8
     218(define_insn_reservation  "atom_imovx_3" 3
     219  (and (eq_attr "cpu" "atom")
     220       (and (eq_attr "type" "imovx")
     221            (and (match_operand:HI 0 "register_operand")
     222                 (match_operand:QI 1 "general_operand"))))
     223  "atom-complex, atom-all-eu*2")
     224
     225(define_insn_reservation  "atom_lea" 1
     226  (and (eq_attr "cpu" "atom")
     227       (and (eq_attr "type" "lea")
     228            (eq_attr "mode" "!HI")))
     229  "atom-simple-either")
     230
     231;; lea 16bit address is complex insn
     232(define_insn_reservation  "atom_lea_2" 2
     233  (and (eq_attr "cpu" "atom")
     234       (and (eq_attr "type" "lea")
     235            (eq_attr "mode" "HI")))
     236  "atom-complex, atom-all-eu")
     237
     238(define_insn_reservation  "atom_incdec" 1
     239  (and (eq_attr "cpu" "atom")
     240       (and (eq_attr "type" "incdec")
     241            (eq_attr "memory" "none")))
     242  "atom-simple-either")
     243
     244(define_insn_reservation  "atom_incdec_mem" 1
     245  (and (eq_attr "cpu" "atom")
     246       (and (eq_attr "type" "incdec")
     247            (eq_attr "memory" "!none")))
     248  "atom-simple-either")
     249
     250;; simple shift instruction use SHIFT eu, none memory
     251(define_insn_reservation  "atom_ishift" 1
     252  (and (eq_attr "cpu" "atom")
     253       (and (eq_attr "type" "ishift")
     254            (and (eq_attr "memory" "none") (eq_attr "prefix_0f" "0"))))
     255  "atom-simple-0")
     256
     257;; simple shift instruction use SHIFT eu, memory
     258(define_insn_reservation  "atom_ishift_mem" 1
     259  (and (eq_attr "cpu" "atom")
     260       (and (eq_attr "type" "ishift")
     261            (and (eq_attr "memory" "!none") (eq_attr "prefix_0f" "0"))))
     262  "atom-simple-0")
     263
     264;; DF shift (prefixed with 0f) is complex insn with latency of 7 cycles
     265(define_insn_reservation  "atom_ishift_3" 7
     266  (and (eq_attr "cpu" "atom")
     267       (and (eq_attr "type" "ishift")
     268            (eq_attr "prefix_0f" "1")))
     269  "atom-complex, atom-all-eu*6")
     270
     271(define_insn_reservation  "atom_ishift1" 1
     272  (and (eq_attr "cpu" "atom")
     273       (and (eq_attr "type" "ishift1")
     274            (eq_attr "memory" "none")))
     275  "atom-simple-0")
     276
     277(define_insn_reservation  "atom_ishift1_mem" 1
     278  (and (eq_attr "cpu" "atom")
     279       (and (eq_attr "type" "ishift1")
     280            (eq_attr "memory" "!none")))
     281  "atom-simple-0")
     282
     283(define_insn_reservation  "atom_rotate" 1
     284  (and (eq_attr "cpu" "atom")
     285       (and (eq_attr "type" "rotate")
     286            (eq_attr "memory" "none")))
     287  "atom-simple-0")
     288
     289(define_insn_reservation  "atom_rotate_mem" 1
     290  (and (eq_attr "cpu" "atom")
     291       (and (eq_attr "type" "rotate")
     292            (eq_attr "memory" "!none")))
     293  "atom-simple-0")
     294
     295(define_insn_reservation  "atom_rotate1" 1
     296  (and (eq_attr "cpu" "atom")
     297       (and (eq_attr "type" "rotate1")
     298            (eq_attr "memory" "none")))
     299  "atom-simple-0")
     300
     301(define_insn_reservation  "atom_rotate1_mem" 1
     302  (and (eq_attr "cpu" "atom")
     303       (and (eq_attr "type" "rotate1")
     304            (eq_attr "memory" "!none")))
     305  "atom-simple-0")
     306
     307(define_insn_reservation  "atom_imul" 5
     308  (and (eq_attr "cpu" "atom")
     309       (and (eq_attr "type" "imul")
     310            (and (eq_attr "memory" "none") (eq_attr "mode" "SI"))))
     311  "atom-imul-32")
     312
     313(define_insn_reservation  "atom_imul_mem" 5
     314  (and (eq_attr "cpu" "atom")
     315       (and (eq_attr "type" "imul")
     316            (and (eq_attr "memory" "!none") (eq_attr "mode" "SI"))))
     317  "atom-imul-32")
     318
     319;; latency set to 10 as common 64x64 imul
     320(define_insn_reservation  "atom_imul_3" 10
     321  (and (eq_attr "cpu" "atom")
     322       (and (eq_attr "type" "imul")
     323            (eq_attr "mode" "!SI")))
     324  "atom-complex, atom-all-eu*9")
     325
     326(define_insn_reservation  "atom_idiv" 65
     327  (and (eq_attr "cpu" "atom")
     328       (eq_attr "type" "idiv"))
     329  "atom-complex, atom-all-eu*32, nothing*32")
     330
     331(define_insn_reservation  "atom_icmp" 1
     332  (and (eq_attr "cpu" "atom")
     333       (and (eq_attr "type" "icmp")
     334            (eq_attr "memory" "none")))
     335  "atom-simple-either")
     336
     337(define_insn_reservation  "atom_icmp_mem" 1
     338  (and (eq_attr "cpu" "atom")
     339       (and (eq_attr "type" "icmp")
     340            (eq_attr "memory" "!none")))
     341  "atom-simple-either")
     342
     343(define_insn_reservation  "atom_test" 1
     344  (and (eq_attr "cpu" "atom")
     345       (and (eq_attr "type" "test")
     346            (eq_attr "memory" "none")))
     347  "atom-simple-either")
     348
     349(define_insn_reservation  "atom_test_mem" 1
     350  (and (eq_attr "cpu" "atom")
     351       (and (eq_attr "type" "test")
     352            (eq_attr "memory" "!none")))
     353  "atom-simple-either")
     354
     355(define_insn_reservation  "atom_ibr" 1
     356  (and (eq_attr "cpu" "atom")
     357       (and (eq_attr "type" "ibr")
     358            (eq_attr "memory" "!load")))
     359  "atom-simple-1")
     360
     361;; complex if jump target is from address
     362(define_insn_reservation  "atom_ibr_2" 2
     363  (and (eq_attr "cpu" "atom")
     364       (and (eq_attr "type" "ibr")
     365            (eq_attr "memory" "load")))
     366  "atom-complex, atom-all-eu")
     367
     368(define_insn_reservation  "atom_setcc" 1
     369  (and (eq_attr "cpu" "atom")
     370       (and (eq_attr "type" "setcc")
     371            (eq_attr "memory" "!store")))
     372  "atom-simple-either")
     373
     374;; 2 cycles complex if target is in memory
     375(define_insn_reservation  "atom_setcc_2" 2
     376  (and (eq_attr "cpu" "atom")
     377       (and (eq_attr "type" "setcc")
     378            (eq_attr "memory" "store")))
     379  "atom-complex, atom-all-eu")
     380
     381(define_insn_reservation  "atom_icmov" 1
     382  (and (eq_attr "cpu" "atom")
     383       (and (eq_attr "type" "icmov")
     384            (eq_attr "memory" "none")))
     385  "atom-simple-either")
     386
     387(define_insn_reservation  "atom_icmov_mem" 1
     388  (and (eq_attr "cpu" "atom")
     389       (and (eq_attr "type" "icmov")
     390            (eq_attr "memory" "!none")))
     391  "atom-simple-either")
     392
     393;; UCODE if segreg, ignored
     394(define_insn_reservation  "atom_push" 2
     395  (and (eq_attr "cpu" "atom")
     396       (eq_attr "type" "push"))
     397  "atom-dual-2c")
     398
     399;; pop r64 is 1 cycle. UCODE if segreg, ignored
     400(define_insn_reservation  "atom_pop" 1
     401  (and (eq_attr "cpu" "atom")
     402       (and (eq_attr "type" "pop")
     403            (eq_attr "mode" "DI")))
     404  "atom-dual-1c")
     405
     406;; pop non-r64 is 2 cycles. UCODE if segreg, ignored
     407(define_insn_reservation  "atom_pop_2" 2
     408  (and (eq_attr "cpu" "atom")
     409       (and (eq_attr "type" "pop")
     410            (eq_attr "mode" "!DI")))
     411  "atom-dual-2c")
     412
     413;; UCODE if segreg, ignored
     414(define_insn_reservation  "atom_call" 1
     415  (and (eq_attr "cpu" "atom")
     416       (eq_attr "type" "call"))
     417  "atom-dual-1c")
     418
     419(define_insn_reservation  "atom_callv" 1
     420  (and (eq_attr "cpu" "atom")
     421       (eq_attr "type" "callv"))
     422  "atom-dual-1c")
     423
     424(define_insn_reservation  "atom_leave" 3
     425  (and (eq_attr "cpu" "atom")
     426       (eq_attr "type" "leave"))
     427  "atom-complex, atom-all-eu*2")
     428
     429(define_insn_reservation  "atom_str" 3
     430  (and (eq_attr "cpu" "atom")
     431       (eq_attr "type" "str"))
     432  "atom-complex, atom-all-eu*2")
     433
     434(define_insn_reservation  "atom_sselog" 1
     435  (and (eq_attr "cpu" "atom")
     436       (and (eq_attr "type" "sselog")
     437            (eq_attr "memory" "none")))
     438  "atom-simple-either")
     439
     440(define_insn_reservation  "atom_sselog_mem" 1
     441  (and (eq_attr "cpu" "atom")
     442       (and (eq_attr "type" "sselog")
     443            (eq_attr "memory" "!none")))
     444  "atom-simple-either")
     445
     446(define_insn_reservation  "atom_sselog1" 1
     447  (and (eq_attr "cpu" "atom")
     448       (and (eq_attr "type" "sselog1")
     449            (eq_attr "memory" "none")))
     450  "atom-simple-0")
     451
     452(define_insn_reservation  "atom_sselog1_mem" 1
     453  (and (eq_attr "cpu" "atom")
     454       (and (eq_attr "type" "sselog1")
     455            (eq_attr "memory" "!none")))
     456  "atom-simple-0")
     457
     458;; not pmad, not psad
     459(define_insn_reservation  "atom_sseiadd" 1
     460  (and (eq_attr "cpu" "atom")
     461       (and (eq_attr "type" "sseiadd")
     462            (and (not (match_operand:V2DI 0 "register_operand"))
     463                 (and (eq_attr "atom_unit" "!simul")
     464                      (eq_attr "atom_unit" "!complex")))))
     465  "atom-simple-either")
     466
     467;; pmad, psad and 64
     468(define_insn_reservation  "atom_sseiadd_2" 4
     469  (and (eq_attr "cpu" "atom")
     470       (and (eq_attr "type" "sseiadd")
     471            (and (not (match_operand:V2DI 0 "register_operand"))
     472                 (and (eq_attr "atom_unit" "simul" )
     473                      (eq_attr "mode" "DI")))))
     474  "atom-fmul-4c")
     475
     476;; pmad, psad and 128
     477(define_insn_reservation  "atom_sseiadd_3" 5
     478  (and (eq_attr "cpu" "atom")
     479       (and (eq_attr "type" "sseiadd")
     480            (and (not (match_operand:V2DI 0 "register_operand"))
     481                 (and (eq_attr "atom_unit" "simul" )
     482                      (eq_attr "mode" "TI")))))
     483  "atom-fmul-5c")
     484
     485;; if paddq(64 bit op), phadd/phsub
     486(define_insn_reservation  "atom_sseiadd_4" 6
     487  (and (eq_attr "cpu" "atom")
     488       (and (eq_attr "type" "sseiadd")
     489            (ior (match_operand:V2DI 0 "register_operand")
     490                 (eq_attr "atom_unit" "complex"))))
     491  "atom-complex, atom-all-eu*5")
     492
     493;; if immediate op.
     494(define_insn_reservation  "atom_sseishft" 1
     495  (and (eq_attr "cpu" "atom")
     496       (and (eq_attr "type" "sseishft")
     497            (and (eq_attr "atom_unit" "!sishuf")
     498                 (match_operand 2 "immediate_operand"))))
     499  "atom-simple-either")
     500
     501;; if palignr or psrldq
     502(define_insn_reservation  "atom_sseishft_2" 1
     503  (and (eq_attr "cpu" "atom")
     504       (and (eq_attr "type" "sseishft")
     505            (and (eq_attr "atom_unit" "sishuf")
     506                 (match_operand 2 "immediate_operand"))))
     507  "atom-simple-0")
     508
     509;; if reg/mem op
     510(define_insn_reservation  "atom_sseishft_3" 2
     511  (and (eq_attr "cpu" "atom")
     512       (and (eq_attr "type" "sseishft")
     513            (not (match_operand 2 "immediate_operand"))))
     514  "atom-complex, atom-all-eu")
     515
     516(define_insn_reservation  "atom_sseimul" 1
     517  (and (eq_attr "cpu" "atom")
     518       (eq_attr "type" "sseimul"))
     519  "atom-simple-0")
     520
     521;; rcpss or rsqrtss
     522(define_insn_reservation  "atom_sse" 4
     523  (and (eq_attr "cpu" "atom")
     524       (and (eq_attr "type" "sse")
     525            (and (eq_attr "atom_sse_attr" "rcp") (eq_attr "mode" "SF"))))
     526  "atom-fmul-4c")
     527
     528;; movshdup, movsldup. Suggest to type sseishft
     529(define_insn_reservation  "atom_sse_2" 1
     530  (and (eq_attr "cpu" "atom")
     531       (and (eq_attr "type" "sse")
     532            (eq_attr "atom_sse_attr" "movdup")))
     533  "atom-simple-0")
     534
     535;; lfence
     536(define_insn_reservation  "atom_sse_3" 1
     537  (and (eq_attr "cpu" "atom")
     538       (and (eq_attr "type" "sse")
     539            (eq_attr "atom_sse_attr" "lfence")))
     540  "atom-simple-either")
     541
     542;; sfence,clflush,mfence, prefetch
     543(define_insn_reservation  "atom_sse_4" 1
     544  (and (eq_attr "cpu" "atom")
     545       (and (eq_attr "type" "sse")
     546            (ior (eq_attr "atom_sse_attr" "fence")
     547                 (eq_attr "atom_sse_attr" "prefetch"))))
     548  "atom-simple-0")
     549
     550;; rcpps, rsqrtss, sqrt, ldmxcsr
     551(define_insn_reservation  "atom_sse_5" 7
     552  (and (eq_attr "cpu" "atom")
     553       (and (eq_attr "type" "sse")
     554            (ior (ior (eq_attr "atom_sse_attr" "sqrt")
     555                      (eq_attr "atom_sse_attr" "mxcsr"))
     556                 (and (eq_attr "atom_sse_attr" "rcp")
     557                      (eq_attr "mode" "V4SF")))))
     558  "atom-complex, atom-all-eu*6")
     559
     560;; xmm->xmm
     561(define_insn_reservation  "atom_ssemov" 1
     562  (and (eq_attr "cpu" "atom")
     563       (and (eq_attr "type" "ssemov")
     564            (and (match_operand 0 "register_operand" "xy") (match_operand 1 "register_operand" "xy"))))
     565  "atom-simple-either")
     566
     567;; reg->xmm
     568(define_insn_reservation  "atom_ssemov_2" 1
     569  (and (eq_attr "cpu" "atom")
     570       (and (eq_attr "type" "ssemov")
     571            (and (match_operand 0 "register_operand" "xy") (match_operand 1 "register_operand" "r"))))
     572  "atom-simple-0")
     573
     574;; xmm->reg
     575(define_insn_reservation  "atom_ssemov_3" 3
     576  (and (eq_attr "cpu" "atom")
     577       (and (eq_attr "type" "ssemov")
     578            (and (match_operand 0 "register_operand" "r") (match_operand 1 "register_operand" "xy"))))
     579  "atom-eu-0-3-1")
     580
     581;; mov mem
     582(define_insn_reservation  "atom_ssemov_4" 1
     583  (and (eq_attr "cpu" "atom")
     584       (and (eq_attr "type" "ssemov")
     585            (and (eq_attr "movu" "0") (eq_attr "memory" "!none"))))
     586  "atom-simple-0")
     587
     588;; movu mem
     589(define_insn_reservation  "atom_ssemov_5" 2
     590  (and (eq_attr "cpu" "atom")
     591       (and (eq_attr "type" "ssemov")
     592            (ior (eq_attr "movu" "1") (eq_attr "memory" "!none"))))
     593  "atom-complex, atom-all-eu")
     594
     595;; no memory simple
     596(define_insn_reservation  "atom_sseadd" 5
     597  (and (eq_attr "cpu" "atom")
     598       (and (eq_attr "type" "sseadd")
     599            (and (eq_attr "memory" "none")
     600                 (and (eq_attr "mode" "!V2DF")
     601                      (eq_attr "atom_unit" "!complex")))))
     602  "atom-fadd-5c")
     603
     604;; memory simple
     605(define_insn_reservation  "atom_sseadd_mem" 5
     606  (and (eq_attr "cpu" "atom")
     607       (and (eq_attr "type" "sseadd")
     608            (and (eq_attr "memory" "!none")
     609                 (and (eq_attr "mode" "!V2DF")
     610                      (eq_attr "atom_unit" "!complex")))))
     611  "atom-dual-5c")
     612
     613;; maxps, minps, *pd, hadd, hsub
     614(define_insn_reservation  "atom_sseadd_3" 8
     615  (and (eq_attr "cpu" "atom")
     616       (and (eq_attr "type" "sseadd")
     617            (ior (eq_attr "mode" "V2DF") (eq_attr "atom_unit" "complex"))))
     618  "atom-complex, atom-all-eu*7")
     619
     620;; Except dppd/dpps
     621(define_insn_reservation  "atom_ssemul" 5
     622  (and (eq_attr "cpu" "atom")
     623       (and (eq_attr "type" "ssemul")
     624            (eq_attr "mode" "!SF")))
     625  "atom-fmul-5c")
     626
     627;; Except dppd/dpps, 4 cycle if mulss
     628(define_insn_reservation  "atom_ssemul_2" 4
     629  (and (eq_attr "cpu" "atom")
     630       (and (eq_attr "type" "ssemul")
     631            (eq_attr "mode" "SF")))
     632  "atom-fmul-4c")
     633
     634(define_insn_reservation  "atom_ssecmp" 1
     635  (and (eq_attr "cpu" "atom")
     636       (eq_attr "type" "ssecmp"))
     637  "atom-simple-either")
     638
     639(define_insn_reservation  "atom_ssecomi" 10
     640  (and (eq_attr "cpu" "atom")
     641       (eq_attr "type" "ssecomi"))
     642  "atom-complex, atom-all-eu*9")
     643
     644;; no memory and cvtpi2ps, cvtps2pi, cvttps2pi
     645(define_insn_reservation  "atom_ssecvt" 5
     646  (and (eq_attr "cpu" "atom")
     647       (and (eq_attr "type" "ssecvt")
     648            (ior (and (match_operand:V2SI 0 "register_operand")
     649                      (match_operand:V4SF 1 "register_operand"))
     650                 (and (match_operand:V4SF 0 "register_operand")
     651                      (match_operand:V2SI 1 "register_operand")))))
     652  "atom-fadd-5c")
     653
     654;; memory and cvtpi2ps, cvtps2pi, cvttps2pi
     655(define_insn_reservation  "atom_ssecvt_2" 5
     656  (and (eq_attr "cpu" "atom")
     657       (and (eq_attr "type" "ssecvt")
     658            (ior (and (match_operand:V2SI 0 "register_operand")
     659                      (match_operand:V4SF 1 "memory_operand"))
     660                 (and (match_operand:V4SF 0 "register_operand")
     661                      (match_operand:V2SI 1 "memory_operand")))))
     662  "atom-dual-5c")
     663
     664;; otherwise. 7 cycles average for cvtss2sd
     665(define_insn_reservation  "atom_ssecvt_3" 7
     666  (and (eq_attr "cpu" "atom")
     667       (and (eq_attr "type" "ssecvt")
     668            (not (ior (and (match_operand:V2SI 0 "register_operand")
     669                           (match_operand:V4SF 1 "nonimmediate_operand"))
     670                      (and (match_operand:V4SF 0 "register_operand")
     671                           (match_operand:V2SI 1 "nonimmediate_operand"))))))
     672  "atom-complex, atom-all-eu*6")
     673
     674;; memory and cvtsi2sd
     675(define_insn_reservation  "atom_sseicvt" 5
     676  (and (eq_attr "cpu" "atom")
     677       (and (eq_attr "type" "sseicvt")
     678            (and (match_operand:V2DF 0 "register_operand")
     679                 (match_operand:SI 1 "memory_operand"))))
     680  "atom-dual-5c")
     681
     682;; otherwise. 8 cycles average for cvtsd2si
     683(define_insn_reservation  "atom_sseicvt_2" 8
     684  (and (eq_attr "cpu" "atom")
     685       (and (eq_attr "type" "sseicvt")
     686            (not (and (match_operand:V2DF 0 "register_operand")
     687                      (match_operand:SI 1 "memory_operand")))))
     688  "atom-complex, atom-all-eu*7")
     689
     690(define_insn_reservation  "atom_ssediv" 62
     691  (and (eq_attr "cpu" "atom")
     692       (eq_attr "type" "ssediv"))
     693  "atom-complex, atom-all-eu*12, nothing*49")
     694
     695;; simple for fmov
     696(define_insn_reservation  "atom_fmov" 1
     697  (and (eq_attr "cpu" "atom")
     698       (and (eq_attr "type" "fmov")
     699            (eq_attr "memory" "none")))
     700  "atom-simple-either")
     701
     702;; simple for fmov
     703(define_insn_reservation  "atom_fmov_mem" 1
     704  (and (eq_attr "cpu" "atom")
     705       (and (eq_attr "type" "fmov")
     706            (eq_attr "memory" "!none")))
     707  "atom-simple-either")
     708
     709;; Define bypass here
     710
     711;; There will be no stall from lea to non-mem EX insns
     712(define_bypass 0 "atom_lea"
     713                 "atom_alu_carry,
     714                  atom_alu,atom_alu1,atom_negnot,atom_imov,atom_imovx,
     715                  atom_incdec, atom_setcc, atom_icmov, atom_pop")
     716
     717(define_bypass 0 "atom_lea"
     718                 "atom_alu_mem, atom_alu_carry_mem, atom_alu1_mem,
     719                  atom_imovx_mem, atom_imovx_2_mem,
     720                  atom_imov_mem, atom_icmov_mem, atom_fmov_mem"
     721                 "!ix86_agi_dependent")
     722
     723;; There will be 3 cycles stall from EX insns to AGAN insns LEA
     724(define_bypass 4 "atom_alu_carry,
     725                  atom_alu,atom_alu1,atom_negnot,atom_imov,atom_imovx,
     726                  atom_incdec,atom_ishift,atom_ishift1,atom_rotate,
     727                  atom_rotate1, atom_setcc, atom_icmov, atom_pop,
     728                  atom_alu_mem, atom_alu_carry_mem, atom_alu1_mem,
     729                  atom_imovx_mem, atom_imovx_2_mem,
     730                  atom_imov_mem, atom_icmov_mem, atom_fmov_mem"
     731                 "atom_lea")
     732
     733;; There will be 3 cycles stall from EX insns to insns need addr calculation
     734(define_bypass 4 "atom_alu_carry,
     735                  atom_alu,atom_alu1,atom_negnot,atom_imov,atom_imovx,
     736                  atom_incdec,atom_ishift,atom_ishift1,atom_rotate,
     737                  atom_rotate1, atom_setcc, atom_icmov, atom_pop,
     738                  atom_imovx_mem, atom_imovx_2_mem,
     739                  atom_alu_mem, atom_alu_carry_mem, atom_alu1_mem,
     740                  atom_imov_mem, atom_icmov_mem, atom_fmov_mem"
     741                 "atom_alu_mem, atom_alu_carry_mem, atom_alu1_mem,
     742                  atom_negnot_mem, atom_imov_mem, atom_incdec_mem,
     743                  atom_imovx_mem, atom_imovx_2_mem,
     744                  atom_imul_mem, atom_icmp_mem,
     745                  atom_test_mem, atom_icmov_mem, atom_sselog_mem,
     746                  atom_sselog1_mem, atom_fmov_mem, atom_sseadd_mem,
     747                  atom_ishift_mem, atom_ishift1_mem,
     748                  atom_rotate_mem, atom_rotate1_mem"
     749                  "ix86_agi_dependent")
     750
     751;; Stall from imul to lea is 8 cycles.
     752(define_bypass 9 "atom_imul, atom_imul_mem" "atom_lea")
     753
     754;; Stall from imul to memory address is 8 cycles.
     755(define_bypass 9 "atom_imul, atom_imul_mem"
     756                 "atom_alu_mem, atom_alu_carry_mem, atom_alu1_mem,
     757                  atom_negnot_mem, atom_imov_mem, atom_incdec_mem,
     758                  atom_ishift_mem, atom_ishift1_mem, atom_rotate_mem,
     759                  atom_rotate1_mem, atom_imul_mem, atom_icmp_mem,
     760                  atom_test_mem, atom_icmov_mem, atom_sselog_mem,
     761                  atom_sselog1_mem, atom_fmov_mem, atom_sseadd_mem"
     762                  "ix86_agi_dependent")
     763
     764;; There will be 0 cycle stall from cmp/test to jcc
     765
     766;; There will be 1 cycle stall from flag producer to cmov and adc/sbb
     767(define_bypass 2 "atom_icmp, atom_test, atom_alu, atom_alu_carry,
     768                  atom_alu1, atom_negnot, atom_incdec, atom_ishift,
     769                  atom_ishift1, atom_rotate, atom_rotate1"
     770                 "atom_icmov, atom_alu_carry")
     771
     772;; lea to shift count stall is 2 cycles
     773(define_bypass 3 "atom_lea"
     774                 "atom_ishift, atom_ishift1, atom_rotate, atom_rotate1,
     775                  atom_ishift_mem, atom_ishift1_mem,
     776                  atom_rotate_mem, atom_rotate1_mem"
     777                 "ix86_dep_by_shift_count")
     778
     779;; lea to shift source stall is 1 cycle
     780(define_bypass 2 "atom_lea"
     781                 "atom_ishift, atom_ishift1, atom_rotate, atom_rotate1"
     782                 "!ix86_dep_by_shift_count")
     783
     784;; non-lea to shift count stall is 1 cycle
     785(define_bypass 2 "atom_alu_carry,
     786                  atom_alu,atom_alu1,atom_negnot,atom_imov,atom_imovx,
     787                  atom_incdec,atom_ishift,atom_ishift1,atom_rotate,
     788                  atom_rotate1, atom_setcc, atom_icmov, atom_pop,
     789                  atom_alu_mem, atom_alu_carry_mem, atom_alu1_mem,
     790                  atom_imovx_mem, atom_imovx_2_mem,
     791                  atom_imov_mem, atom_icmov_mem, atom_fmov_mem"
     792                 "atom_ishift, atom_ishift1, atom_rotate, atom_rotate1,
     793                  atom_ishift_mem, atom_ishift1_mem,
     794                  atom_rotate_mem, atom_rotate1_mem"
     795                 "ix86_dep_by_shift_count")
  • gcc/config/i386/cpuid.h

     
    2929#define bit_CMPXCHG16B  (1 << 13)
    3030#define bit_SSE4_1  (1 << 19)
    3131#define bit_SSE4_2  (1 << 20)
     32#define bit_MOVBE   (1 << 22)
    3233#define bit_POPCNT  (1 << 23)
    3334#define bit_AES     (1 << 25)
    3435#define bit_XSAVE   (1 << 26)
  • gcc/config/i386/sse.md

     
    338338   && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
    339339  "vmovup<avxmodesuffixf2c>\t{%1, %0|%0, %1}"
    340340  [(set_attr "type" "ssemov")
     341   (set_attr "movu" "1")
    341342   (set_attr "prefix" "vex")
    342343   (set_attr "mode" "<MODE>")])
    343344
     
    363364   && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
    364365  "movup<ssemodesuffixf2c>\t{%1, %0|%0, %1}"
    365366  [(set_attr "type" "ssemov")
     367   (set_attr "movu" "1")
    366368   (set_attr "mode" "<MODE>")])
    367369
    368370(define_insn "avx_movdqu<avxmodesuffix>"
     
    373375  "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
    374376  "vmovdqu\t{%1, %0|%0, %1}"
    375377  [(set_attr "type" "ssemov")
     378   (set_attr "movu" "1")
    376379   (set_attr "prefix" "vex")
    377380   (set_attr "mode" "<avxvecmode>")])
    378381
     
    383386  "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
    384387  "movdqu\t{%1, %0|%0, %1}"
    385388  [(set_attr "type" "ssemov")
     389   (set_attr "movu" "1")
    386390   (set_attr "prefix_data16" "1")
    387391   (set_attr "mode" "TI")])
    388392
     
    424428             UNSPEC_MOVNT))]
    425429  "TARGET_SSE2"
    426430  "movntdq\t{%1, %0|%0, %1}"
    427   [(set_attr "type" "ssecvt")
     431  [(set_attr "type" "ssemov")
    428432   (set_attr "prefix_data16" "1")
    429433   (set_attr "mode" "TI")])
    430434
     
    434438           UNSPEC_MOVNT))]
    435439  "TARGET_SSE2"
    436440  "movnti\t{%1, %0|%0, %1}"
    437   [(set_attr "type" "ssecvt")
     441  [(set_attr "type" "ssemov")
    438442   (set_attr "mode" "V2DF")])
    439443
    440444(define_insn "avx_lddqu<avxmodesuffix>"
     
    445449  "TARGET_AVX"
    446450  "vlddqu\t{%1, %0|%0, %1}"
    447451  [(set_attr "type" "ssecvt")
     452   (set_attr "movu" "1")
    448453   (set_attr "prefix" "vex")
    449454   (set_attr "mode" "<avxvecmode>")])
    450455
     
    454459              UNSPEC_LDDQU))]
    455460  "TARGET_SSE3"
    456461  "lddqu\t{%1, %0|%0, %1}"
    457   [(set_attr "type" "ssecvt")
     462  [(set_attr "type" "ssemov")
     463   (set_attr "movu" "1")
    458464   (set_attr "prefix_rep" "1")
    459465   (set_attr "mode" "TI")])
    460466
     
    761767  "TARGET_SSE"
    762768  "%vrcpps\t{%1, %0|%0, %1}"
    763769  [(set_attr "type" "sse")
     770   (set_attr "atom_sse_attr" "rcp")
    764771   (set_attr "prefix" "maybe_vex")
    765772   (set_attr "mode" "V4SF")])
    766773
     
    787794  "TARGET_SSE"
    788795  "rcpss\t{%1, %0|%0, %1}"
    789796  [(set_attr "type" "sse")
     797   (set_attr "atom_sse_attr" "rcp")
    790798   (set_attr "mode" "SF")])
    791799
    792800(define_expand "sqrtv8sf2"
     
    832840  "TARGET_SSE"
    833841  "%vsqrtps\t{%1, %0|%0, %1}"
    834842  [(set_attr "type" "sse")
     843   (set_attr "atom_sse_attr" "sqrt")
    835844   (set_attr "prefix" "maybe_vex")
    836845   (set_attr "mode" "V4SF")])
    837846
     
    876885  "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
    877886  "sqrts<ssemodesuffixf2c>\t{%1, %0|%0, %1}"
    878887  [(set_attr "type" "sse")
     888   (set_attr "atom_sse_attr" "sqrt")
    879889   (set_attr "mode" "<ssescalarmode>")])
    880890
    881891(define_expand "rsqrtv8sf2"
     
    10391049     (const_int 1)))]
    10401050  "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
    10411051  "<maxminfprefix>s<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
    1042   [(set_attr "type" "sse")
     1052  [(set_attr "type" "sseadd")
    10431053   (set_attr "mode" "<ssescalarmode>")])
    10441054
    10451055;; These versions of the min/max patterns implement exactly the operations
     
    11751185  "TARGET_SSE3"
    11761186  "addsubpd\t{%2, %0|%0, %2}"
    11771187  [(set_attr "type" "sseadd")
     1188   (set_attr "atom_unit" "complex")
    11781189   (set_attr "mode" "V2DF")])
    11791190
    11801191(define_insn "avx_h<plusminus_insn>v4df3"
     
    12981309  "TARGET_SSE3"
    12991310  "h<plusminus_mnemonic>ps\t{%2, %0|%0, %2}"
    13001311  [(set_attr "type" "sseadd")
     1312   (set_attr "atom_unit" "complex")
    13011313   (set_attr "prefix_rep" "1")
    13021314   (set_attr "mode" "V4SF")])
    13031315
     
    50665078  "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
    50675079  "pmaddwd\t{%2, %0|%0, %2}"
    50685080  [(set_attr "type" "sseiadd")
     5081   (set_attr "atom_unit" "simul")
    50695082   (set_attr "prefix_data16" "1")
    50705083   (set_attr "mode" "TI")])
    50715084
     
    70257038   movq\t{%H1, %0|%0, %H1}
    70267039   mov{q}\t{%H1, %0|%0, %H1}"
    70277040  [(set_attr "type" "ssemov,sseishft,ssemov,imov")
     7041   (set_attr "atom_unit" "*,sishuf,*,*")
    70287042   (set_attr "memory" "*,none,*,*")
    70297043   (set_attr "mode" "V2SF,TI,TI,DI")])
    70307044
     
    70577071   psrldq\t{$8, %0|%0, 8}
    70587072   movq\t{%H1, %0|%0, %H1}"
    70597073  [(set_attr "type" "ssemov,sseishft,ssemov")
     7074   (set_attr "atom_unit" "*,sishuf,*")
    70607075   (set_attr "memory" "*,none,*")
    70617076   (set_attr "mode" "V2SF,TI,TI")])
    70627077
     
    76147629  "TARGET_SSE2"
    76157630  "psadbw\t{%2, %0|%0, %2}"
    76167631  [(set_attr "type" "sseiadd")
     7632   (set_attr "atom_unit" "simul")
    76177633   (set_attr "prefix_data16" "1")
    76187634   (set_attr "mode" "TI")])
    76197635
     
    76357651      UNSPEC_MOVMSK))]
    76367652  "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
    76377653  "%vmovmskp<ssemodesuffixf2c>\t{%1, %0|%0, %1}"
    7638   [(set_attr "type" "ssecvt")
     7654  [(set_attr "type" "ssemov")
    76397655   (set_attr "prefix" "maybe_vex")
    76407656   (set_attr "mode" "<MODE>")])
    76417657
     
    76457661           UNSPEC_MOVMSK))]
    76467662  "TARGET_SSE2"
    76477663  "%vpmovmskb\t{%1, %0|%0, %1}"
    7648   [(set_attr "type" "ssecvt")
     7664  [(set_attr "type" "ssemov")
    76497665   (set_attr "prefix_data16" "1")
    76507666   (set_attr "prefix" "maybe_vex")
    76517667   (set_attr "mode" "SI")])
     
    76687684  "TARGET_SSE2 && !TARGET_64BIT"
    76697685  ;; @@@ check ordering of operands in intel/nonintel syntax
    76707686  "%vmaskmovdqu\t{%2, %1|%1, %2}"
    7671   [(set_attr "type" "ssecvt")
     7687  [(set_attr "type" "ssemov")
    76727688   (set_attr "prefix_data16" "1")
    76737689   (set_attr "prefix" "maybe_vex")
    76747690   (set_attr "mode" "TI")])
     
    76827698  "TARGET_SSE2 && TARGET_64BIT"
    76837699  ;; @@@ check ordering of operands in intel/nonintel syntax
    76847700  "%vmaskmovdqu\t{%2, %1|%1, %2}"
    7685   [(set_attr "type" "ssecvt")
     7701  [(set_attr "type" "ssemov")
    76867702   (set_attr "prefix_data16" "1")
    76877703   (set_attr "prefix" "maybe_vex")
    76887704   (set_attr "mode" "TI")])
     
    76937709  "TARGET_SSE"
    76947710  "%vldmxcsr\t%0"
    76957711  [(set_attr "type" "sse")
     7712   (set_attr "atom_sse_attr" "mxcsr")
    76967713   (set_attr "prefix" "maybe_vex")
    76977714   (set_attr "memory" "load")])
    76987715
     
    77027719  "TARGET_SSE"
    77037720  "%vstmxcsr\t%0"
    77047721  [(set_attr "type" "sse")
     7722   (set_attr "atom_sse_attr" "mxcsr")
    77057723   (set_attr "prefix" "maybe_vex")
    77067724   (set_attr "memory" "store")])
    77077725
     
    77207738  "TARGET_SSE || TARGET_3DNOW_A"
    77217739  "sfence"
    77227740  [(set_attr "type" "sse")
     7741   (set_attr "atom_sse_attr" "fence")
    77237742   (set_attr "memory" "unknown")])
    77247743
    77257744(define_insn "sse2_clflush"
     
    77287747  "TARGET_SSE2"
    77297748  "clflush\t%a0"
    77307749  [(set_attr "type" "sse")
     7750   (set_attr "atom_sse_attr" "fence")
    77317751   (set_attr "memory" "unknown")])
    77327752
    77337753(define_expand "sse2_mfence"
     
    77457765  "TARGET_64BIT || TARGET_SSE2"
    77467766  "mfence"
    77477767  [(set_attr "type" "sse")
     7768   (set_attr "atom_sse_attr" "fence")
    77487769   (set_attr "memory" "unknown")])
    77497770
    77507771(define_expand "sse2_lfence"
     
    77627783  "TARGET_SSE2"
    77637784  "lfence"
    77647785  [(set_attr "type" "sse")
     7786   (set_attr "atom_sse_attr" "lfence")
    77657787   (set_attr "memory" "unknown")])
    77667788
    77677789(define_insn "sse3_mwait"
     
    78857907  "TARGET_SSSE3"
    78867908  "phaddw\t{%2, %0|%0, %2}"
    78877909  [(set_attr "type" "sseiadd")
     7910   (set_attr "atom_unit" "complex")
    78887911   (set_attr "prefix_data16" "1")
    78897912   (set_attr "prefix_extra" "1")
    78907913   (set_attr "mode" "TI")])
     
    79137936  "TARGET_SSSE3"
    79147937  "phaddw\t{%2, %0|%0, %2}"
    79157938  [(set_attr "type" "sseiadd")
     7939   (set_attr "atom_unit" "complex")
    79167940   (set_attr "prefix_extra" "1")
    79177941   (set_attr "mode" "DI")])
    79187942
     
    79677991  "TARGET_SSSE3"
    79687992  "phaddd\t{%2, %0|%0, %2}"
    79697993  [(set_attr "type" "sseiadd")
     7994   (set_attr "atom_unit" "complex")
    79707995   (set_attr "prefix_data16" "1")
    79717996   (set_attr "prefix_extra" "1")
    79727997   (set_attr "mode" "TI")])
     
    79878012  "TARGET_SSSE3"
    79888013  "phaddd\t{%2, %0|%0, %2}"
    79898014  [(set_attr "type" "sseiadd")
     8015   (set_attr "atom_unit" "complex")
    79908016   (set_attr "prefix_extra" "1")
    79918017   (set_attr "mode" "DI")])
    79928018
     
    80738099  "TARGET_SSSE3"
    80748100  "phaddsw\t{%2, %0|%0, %2}"
    80758101  [(set_attr "type" "sseiadd")
     8102   (set_attr "atom_unit" "complex")
    80768103   (set_attr "prefix_data16" "1")
    80778104   (set_attr "prefix_extra" "1")
    80788105   (set_attr "mode" "TI")])
     
    81018128  "TARGET_SSSE3"
    81028129  "phaddsw\t{%2, %0|%0, %2}"
    81038130  [(set_attr "type" "sseiadd")
     8131   (set_attr "atom_unit" "complex")
    81048132   (set_attr "prefix_extra" "1")
    81058133   (set_attr "mode" "DI")])
    81068134
     
    81878215  "TARGET_SSSE3"
    81888216  "phsubw\t{%2, %0|%0, %2}"
    81898217  [(set_attr "type" "sseiadd")
     8218   (set_attr "atom_unit" "complex")
    81908219   (set_attr "prefix_data16" "1")
    81918220   (set_attr "prefix_extra" "1")
    81928221   (set_attr "mode" "TI")])
     
    82158244  "TARGET_SSSE3"
    82168245  "phsubw\t{%2, %0|%0, %2}"
    82178246  [(set_attr "type" "sseiadd")
     8247   (set_attr "atom_unit" "complex")
    82188248   (set_attr "prefix_extra" "1")
    82198249   (set_attr "mode" "DI")])
    82208250
     
    82698299  "TARGET_SSSE3"
    82708300  "phsubd\t{%2, %0|%0, %2}"
    82718301  [(set_attr "type" "sseiadd")
     8302   (set_attr "atom_unit" "complex")
    82728303   (set_attr "prefix_data16" "1")
    82738304   (set_attr "prefix_extra" "1")
    82748305   (set_attr "mode" "TI")])
     
    82898320  "TARGET_SSSE3"
    82908321  "phsubd\t{%2, %0|%0, %2}"
    82918322  [(set_attr "type" "sseiadd")
     8323   (set_attr "atom_unit" "complex")
    82928324   (set_attr "prefix_extra" "1")
    82938325   (set_attr "mode" "DI")])
    82948326
     
    83758407  "TARGET_SSSE3"
    83768408  "phsubsw\t{%2, %0|%0, %2}"
    83778409  [(set_attr "type" "sseiadd")
     8410   (set_attr "atom_unit" "complex")
    83788411   (set_attr "prefix_data16" "1")
    83798412   (set_attr "prefix_extra" "1")
    83808413   (set_attr "mode" "TI")])
     
    84038436  "TARGET_SSSE3"
    84048437  "phsubsw\t{%2, %0|%0, %2}"
    84058438  [(set_attr "type" "sseiadd")
     8439   (set_attr "atom_unit" "complex")
    84068440   (set_attr "prefix_extra" "1")
    84078441   (set_attr "mode" "DI")])
    84088442
     
    85098543  "TARGET_SSSE3"
    85108544  "pmaddubsw\t{%2, %0|%0, %2}"
    85118545  [(set_attr "type" "sseiadd")
     8546   (set_attr "atom_unit" "simul")
    85128547   (set_attr "prefix_data16" "1")
    85138548   (set_attr "prefix_extra" "1")
    85148549   (set_attr "mode" "TI")])
     
    85478582  "TARGET_SSSE3"
    85488583  "pmaddubsw\t{%2, %0|%0, %2}"
    85498584  [(set_attr "type" "sseiadd")
     8585   (set_attr "atom_unit" "simul")
    85508586   (set_attr "prefix_extra" "1")
    85518587   (set_attr "mode" "DI")])
    85528588
     
    87548790  return "palignr\t{%3, %2, %0|%0, %2, %3}";
    87558791}
    87568792  [(set_attr "type" "sseishft")
     8793   (set_attr "atom_unit" "sishuf")
    87578794   (set_attr "prefix_data16" "1")
    87588795   (set_attr "prefix_extra" "1")
    87598796   (set_attr "mode" "TI")])
     
    87708807  return "palignr\t{%3, %2, %0|%0, %2, %3}";
    87718808}
    87728809  [(set_attr "type" "sseishft")
     8810   (set_attr "atom_unit" "sishuf")
    87738811   (set_attr "prefix_extra" "1")
    87748812   (set_attr "mode" "DI")])
    87758813
     
    89568994             UNSPEC_MOVNTDQA))]
    89578995  "TARGET_SSE4_1"
    89588996  "%vmovntdqa\t{%1, %0|%0, %1}"
    8959   [(set_attr "type" "ssecvt")
     8997  [(set_attr "type" "ssemov")
    89608998   (set_attr "prefix_extra" "1")
    89618999   (set_attr "prefix" "maybe_vex")
    89629000   (set_attr "mode" "TI")])
  • gcc/config/i386/i386.opt

     
    228228Target RejectNegative Joined Var(ix86_tune_string)
    229229Schedule code for given CPU
    230230
     231mabi=
     232Target RejectNegative Joined Var(ix86_abi_string)
     233Generate code that conforms to the given ABI
     234
    231235mveclibabi=
    232236Target RejectNegative Joined Var(ix86_veclibabi_string)
    233237Vector library ABI to use
     
    335339Target Report Mask(ISA_SAHF) Var(ix86_isa_flags) VarExists Save
    336340Support code generation of sahf instruction in 64bit x86-64 code.
    337341
     342mmovbe
     343Target Report Mask(ISA_MOVBE) Var(ix86_isa_flags) VarExists Save
     344Support code generation of movbe instruction.
     345
    338346maes
    339347Target Report Mask(ISA_AES) Var(ix86_isa_flags) VarExists Save
    340348Support AES built-in functions and code generation
  • gcc/config/i386/i386-c.c

     
    119119      def_or_undef (parse_in, "__core2");
    120120      def_or_undef (parse_in, "__core2__");
    121121      break;
     122    case PROCESSOR_ATOM:
     123      def_or_undef (parse_in, "__atom");
     124      def_or_undef (parse_in, "__atom__");
     125      break;
    122126    /* use PROCESSOR_max to not set/unset the arch macro.  */
    123127    case PROCESSOR_max:
    124128      break;
     
    187191    case PROCESSOR_CORE2:
    188192      def_or_undef (parse_in, "__tune_core2__");
    189193      break;
     194    case PROCESSOR_ATOM:
     195      def_or_undef (parse_in, "__tune_atom__");
     196      break;
    190197    case PROCESSOR_GENERIC32:
    191198    case PROCESSOR_GENERIC64:
    192199      break;
  • gcc/config/i386/mingw32.h

     
    3838      builtin_define_std ("WINNT");             \
    3939      builtin_define_with_int_value ("_INTEGRAL_MAX_BITS",  \
    4040                     TYPE_PRECISION (intmax_type_node));\
    41       if (TARGET_64BIT && DEFAULT_ABI == MS_ABI)            \
     41      if (TARGET_64BIT && ix86_abi == MS_ABI)           \
    4242    {                           \
    4343      builtin_define ("__MINGW64__");           \
    4444      builtin_define_std ("WIN64");             \
  • gcc/config/i386/i386-protos.h

     
    8686extern void ix86_expand_binary_operator (enum rtx_code,
    8787                     enum machine_mode, rtx[]);
    8888extern int ix86_binary_operator_ok (enum rtx_code, enum machine_mode, rtx[]);
     89extern bool ix86_lea_for_add_ok (enum rtx_code, rtx, rtx[]);
     90extern bool ix86_dep_by_shift_count (const_rtx set_insn, const_rtx use_insn);
     91extern bool ix86_agi_dependent (rtx set_insn, rtx use_insn);
    8992extern void ix86_expand_unary_operator (enum rtx_code, enum machine_mode,
    9093                    rtx[]);
    9194extern rtx ix86_build_const_vector (enum machine_mode, bool, rtx);
     
    140143extern bool ix86_sol10_return_in_memory (const_tree,const_tree);
    141144extern rtx ix86_force_to_memory (enum machine_mode, rtx);
    142145extern void ix86_free_from_memory (enum machine_mode);
    143 extern int ix86_cfun_abi (void);
    144 extern int ix86_function_abi (const_tree);
    145 extern int ix86_function_type_abi (const_tree);
     146extern enum calling_abi ix86_cfun_abi (void);
     147extern enum calling_abi ix86_function_type_abi (const_tree);
    146148extern void ix86_call_abi_override (const_tree);
    147149extern tree ix86_fn_abi_va_list (tree);
    148150extern tree ix86_canonical_va_list_type (tree);
  • gcc/config/i386/driver-i386.c

     
    378378  /* Extended features */
    379379  unsigned int has_lahf_lm = 0, has_sse4a = 0;
    380380  unsigned int has_longmode = 0, has_3dnowp = 0, has_3dnow = 0;
    381   unsigned int has_sse4_1 = 0, has_sse4_2 = 0;
     381  unsigned int has_movbe = 0, has_sse4_1 = 0, has_sse4_2 = 0;
    382382  unsigned int has_popcnt = 0, has_aes = 0, has_avx = 0;
    383383  unsigned int has_pclmul = 0;
    384384
     
    398398
    399399  __cpuid (1, eax, ebx, ecx, edx);
    400400
    401   /* We don't care for extended family.  */
    402401  model = (eax >> 4) & 0x0f;
    403402  family = (eax >> 8) & 0x0f;
     403  if (vendor == SIG_INTEL)
     404    {
     405      unsigned int extended_model, extended_family;
    404406
     407      extended_model = (eax >> 12) & 0xf0;
     408      extended_family = (eax >> 20) & 0xff;
     409      if (family == 0x0f)
     410    {
     411      family += extended_family;
     412      model += extended_model;
     413    }
     414      else if (family == 0x06)
     415    model += extended_model;
     416    }
     417
    405418  has_sse3 = ecx & bit_SSE3;
    406419  has_ssse3 = ecx & bit_SSSE3;
    407420  has_sse4_1 = ecx & bit_SSE4_1;
    408421  has_sse4_2 = ecx & bit_SSE4_2;
    409422  has_avx = ecx & bit_AVX;
    410423  has_cmpxchg16b = ecx & bit_CMPXCHG16B;
     424  has_movbe = ecx & bit_MOVBE;
    411425  has_popcnt = ecx & bit_POPCNT;
    412426  has_aes = ecx & bit_AES;
    413427  has_pclmul = ecx & bit_PCLMUL;
     
    505519      break;
    506520    case PROCESSOR_PENTIUMPRO:
    507521      if (has_longmode)
    508     /* It is Core 2 Duo.  */
    509     cpu = "core2";
     522    /* It is Core 2 or Atom.  */
     523    cpu = (model == 28) ? "atom" : "core2";
    510524      else if (arch)
    511525    {
    512526      if (has_sse3)
     
    597611    options = concat (options, "-mcx16 ", NULL);
    598612      if (has_lahf_lm)
    599613    options = concat (options, "-msahf ", NULL);
     614      if (has_movbe)
     615    options = concat (options, "-mmovbe ", NULL);
    600616      if (has_aes)
    601617    options = concat (options, "-maes ", NULL);
    602618      if (has_pclmul)
  • gcc/config/i386/i386.c

     
    10361036  1,                                    /* cond_not_taken_branch_cost.  */
    10371037};
    10381038
     1039static const
     1040struct processor_costs atom_cost = {
     1041  COSTS_N_INSNS (1),            /* cost of an add instruction */
     1042  COSTS_N_INSNS (1) + 1,        /* cost of a lea instruction */
     1043  COSTS_N_INSNS (1),            /* variable shift costs */
     1044  COSTS_N_INSNS (1),            /* constant shift costs */
     1045  {COSTS_N_INSNS (3),           /* cost of starting multiply for QI */
     1046   COSTS_N_INSNS (4),           /*                               HI */
     1047   COSTS_N_INSNS (3),           /*                               SI */
     1048   COSTS_N_INSNS (4),           /*                               DI */
     1049   COSTS_N_INSNS (2)},          /*                               other */
     1050  0,                    /* cost of multiply per each bit set */
     1051  {COSTS_N_INSNS (18),          /* cost of a divide/mod for QI */
     1052   COSTS_N_INSNS (26),          /*                          HI */
     1053   COSTS_N_INSNS (42),          /*                          SI */
     1054   COSTS_N_INSNS (74),          /*                          DI */
     1055   COSTS_N_INSNS (74)},         /*                          other */
     1056  COSTS_N_INSNS (1),            /* cost of movsx */
     1057  COSTS_N_INSNS (1),            /* cost of movzx */
     1058  8,                    /* "large" insn */
     1059  17,                   /* MOVE_RATIO */
     1060  2,                    /* cost for loading QImode using movzbl */
     1061  {4, 4, 4},                /* cost of loading integer registers
     1062                       in QImode, HImode and SImode.
     1063                       Relative to reg-reg move (2).  */
     1064  {4, 4, 4},                /* cost of storing integer registers */
     1065  4,                    /* cost of reg,reg fld/fst */
     1066  {12, 12, 12},             /* cost of loading fp registers
     1067                       in SFmode, DFmode and XFmode */
     1068  {6, 6, 8},                /* cost of storing fp registers
     1069                       in SFmode, DFmode and XFmode */
     1070  2,                    /* cost of moving MMX register */
     1071  {8, 8},               /* cost of loading MMX registers
     1072                       in SImode and DImode */
     1073  {8, 8},               /* cost of storing MMX registers
     1074                       in SImode and DImode */
     1075  2,                    /* cost of moving SSE register */
     1076  {8, 8, 8},                /* cost of loading SSE registers
     1077                       in SImode, DImode and TImode */
     1078  {8, 8, 8},                /* cost of storing SSE registers
     1079                       in SImode, DImode and TImode */
     1080  5,                    /* MMX or SSE register to integer */
     1081  32,                   /* size of l1 cache.  */
     1082  256,                  /* size of l2 cache.  */
     1083  64,                   /* size of prefetch block */
     1084  6,                    /* number of parallel prefetches */
     1085  3,                    /* Branch cost */
     1086  COSTS_N_INSNS (8),            /* cost of FADD and FSUB insns.  */
     1087  COSTS_N_INSNS (8),            /* cost of FMUL instruction.  */
     1088  COSTS_N_INSNS (20),           /* cost of FDIV instruction.  */
     1089  COSTS_N_INSNS (8),            /* cost of FABS instruction.  */
     1090  COSTS_N_INSNS (8),            /* cost of FCHS instruction.  */
     1091  COSTS_N_INSNS (40),           /* cost of FSQRT instruction.  */
     1092  {{libcall, {{11, loop}, {-1, rep_prefix_4_byte}}},
     1093   {libcall, {{32, loop}, {64, rep_prefix_4_byte},
     1094          {8192, rep_prefix_8_byte}, {-1, libcall}}}},
     1095  {{libcall, {{8, loop}, {15, unrolled_loop},
     1096          {2048, rep_prefix_4_byte}, {-1, libcall}}},
     1097   {libcall, {{24, loop}, {32, unrolled_loop},
     1098          {8192, rep_prefix_8_byte}, {-1, libcall}}}},
     1099  1,                                    /* scalar_stmt_cost.  */
     1100  1,                                    /* scalar load_cost.  */
     1101  1,                                    /* scalar_store_cost.  */
     1102  1,                                    /* vec_stmt_cost.  */
     1103  1,                                    /* vec_to_scalar_cost.  */
     1104  1,                                    /* scalar_to_vec_cost.  */
     1105  1,                                    /* vec_align_load_cost.  */
     1106  2,                                    /* vec_unalign_load_cost.  */
     1107  1,                                    /* vec_store_cost.  */
     1108  3,                                    /* cond_taken_branch_cost.  */
     1109  1,                                    /* cond_not_taken_branch_cost.  */
     1110};
     1111
    10391112/* Generic64 should produce code tuned for Nocona and K8.  */
    10401113static const
    10411114struct processor_costs generic64_cost = {
     
    11941267#define m_PENT4  (1<<PROCESSOR_PENTIUM4)
    11951268#define m_NOCONA  (1<<PROCESSOR_NOCONA)
    11961269#define m_CORE2  (1<<PROCESSOR_CORE2)
     1270#define m_ATOM  (1<<PROCESSOR_ATOM)
    11971271
    11981272#define m_GEODE  (1<<PROCESSOR_GEODE)
    11991273#define m_K6  (1<<PROCESSOR_K6)
     
    12311305  m_486 | m_PENT,
    12321306
    12331307  /* X86_TUNE_UNROLL_STRLEN */
    1234   m_486 | m_PENT | m_PPRO | m_AMD_MULTIPLE | m_K6 | m_CORE2 | m_GENERIC,
     1308  m_486 | m_PENT | m_ATOM | m_PPRO | m_AMD_MULTIPLE | m_K6
     1309  | m_CORE2 | m_GENERIC,
    12351310
    12361311  /* X86_TUNE_DEEP_BRANCH_PREDICTION */
    1237   m_PPRO | m_K6_GEODE | m_AMD_MULTIPLE | m_PENT4 | m_GENERIC,
     1312  m_ATOM | m_PPRO | m_K6_GEODE | m_AMD_MULTIPLE | m_PENT4 | m_GENERIC,
    12381313
    12391314  /* X86_TUNE_BRANCH_PREDICTION_HINTS: Branch hints were put in P4 based
    12401315     on simulation result. But after P4 was made, no performance benefit
     
    12461321  ~m_386,
    12471322
    12481323  /* X86_TUNE_USE_SAHF */
    1249   m_PPRO | m_K6_GEODE | m_K8 | m_AMDFAM10 | m_PENT4
     1324  m_ATOM | m_PPRO | m_K6_GEODE | m_K8 | m_AMDFAM10 | m_PENT4
    12501325  | m_NOCONA | m_CORE2 | m_GENERIC,
    12511326
    12521327  /* X86_TUNE_MOVX: Enable to zero extend integer registers to avoid
    12531328     partial dependencies.  */
    1254   m_AMD_MULTIPLE | m_PPRO | m_PENT4 | m_NOCONA
     1329  m_AMD_MULTIPLE | m_ATOM | m_PPRO | m_PENT4 | m_NOCONA
    12551330  | m_CORE2 | m_GENERIC | m_GEODE /* m_386 | m_K6 */,
    12561331
    12571332  /* X86_TUNE_PARTIAL_REG_STALL: We probably ought to watch for partial
     
    12711346  m_386 | m_486 | m_K6_GEODE,
    12721347
    12731348  /* X86_TUNE_USE_SIMODE_FIOP */
    1274   ~(m_PPRO | m_AMD_MULTIPLE | m_PENT | m_CORE2 | m_GENERIC),
     1349  ~(m_PPRO | m_AMD_MULTIPLE | m_PENT | m_ATOM | m_CORE2 | m_GENERIC),
    12751350
    12761351  /* X86_TUNE_USE_MOV0 */
    12771352  m_K6,
    12781353
    12791354  /* X86_TUNE_USE_CLTD */
    1280   ~(m_PENT | m_K6 | m_CORE2 | m_GENERIC),
     1355  ~(m_PENT | m_ATOM | m_K6 | m_CORE2 | m_GENERIC),
    12811356
    12821357  /* X86_TUNE_USE_XCHGB: Use xchgb %rh,%rl instead of rolw/rorw $8,rx.  */
    12831358  m_PENT4,
     
    12921367  ~(m_PENT | m_PPRO),
    12931368
    12941369  /* X86_TUNE_PROMOTE_QIMODE */
    1295   m_K6_GEODE | m_PENT | m_386 | m_486 | m_AMD_MULTIPLE | m_CORE2
    1296   | m_GENERIC /* | m_PENT4 ? */,
     1370  m_K6_GEODE | m_PENT | m_ATOM | m_386 | m_486 | m_AMD_MULTIPLE
     1371  | m_CORE2 | m_GENERIC /* | m_PENT4 ? */,
    12971372
    12981373  /* X86_TUNE_FAST_PREFIX */
    12991374  ~(m_PENT | m_486 | m_386),
     
    13171392  m_PPRO,
    13181393
    13191394  /* X86_TUNE_ADD_ESP_4: Enable if add/sub is preferred over 1/2 push/pop.  */
    1320   m_AMD_MULTIPLE | m_K6_GEODE | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
     1395  m_ATOM | m_AMD_MULTIPLE | m_K6_GEODE | m_PENT4 | m_NOCONA
     1396  | m_CORE2 | m_GENERIC,
    13211397
    13221398  /* X86_TUNE_ADD_ESP_8 */
    1323   m_AMD_MULTIPLE | m_PPRO | m_K6_GEODE | m_386
     1399  m_AMD_MULTIPLE | m_ATOM | m_PPRO | m_K6_GEODE | m_386
    13241400  | m_486 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
    13251401
    13261402  /* X86_TUNE_SUB_ESP_4 */
    1327   m_AMD_MULTIPLE | m_PPRO | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
     1403  m_AMD_MULTIPLE | m_ATOM | m_PPRO | m_PENT4 | m_NOCONA | m_CORE2
     1404  | m_GENERIC,
    13281405
    13291406  /* X86_TUNE_SUB_ESP_8 */
    1330   m_AMD_MULTIPLE | m_PPRO | m_386 | m_486
     1407  m_AMD_MULTIPLE | m_ATOM | m_PPRO | m_386 | m_486
    13311408  | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
    13321409
    13331410  /* X86_TUNE_INTEGER_DFMODE_MOVES: Enable if integer moves are preferred
    13341411     for DFmode copies */
    1335   ~(m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2
     1412  ~(m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2
    13361413    | m_GENERIC | m_GEODE),
    13371414
    13381415  /* X86_TUNE_PARTIAL_REG_DEPENDENCY */
    1339   m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
     1416  m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
    13401417
    13411418  /* X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY: In the Generic model we have a
    13421419     conflict here in between PPro/Pentium4 based chips that thread 128bit
     
    13471424     shows that disabling this option on P4 brings over 20% SPECfp regression,
    13481425     while enabling it on K8 brings roughly 2.4% regression that can be partly
    13491426     masked by careful scheduling of moves.  */
    1350   m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC | m_AMDFAM10,
     1427  m_ATOM | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC
     1428  | m_AMDFAM10,
    13511429
    13521430  /* X86_TUNE_SSE_UNALIGNED_MOVE_OPTIMAL */
    13531431  m_AMDFAM10,
     
    13651443  m_PPRO | m_PENT4 | m_NOCONA,
    13661444
    13671445  /* X86_TUNE_MEMORY_MISMATCH_STALL */
    1368   m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
     1446  m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
    13691447
    13701448  /* X86_TUNE_PROLOGUE_USING_MOVE */
    1371   m_ATHLON_K8 | m_PPRO | m_CORE2 | m_GENERIC,
     1449  m_ATHLON_K8 | m_ATOM | m_PPRO | m_CORE2 | m_GENERIC,
    13721450
    13731451  /* X86_TUNE_EPILOGUE_USING_MOVE */
    1374   m_ATHLON_K8 | m_PPRO | m_CORE2 | m_GENERIC,
     1452  m_ATHLON_K8 | m_ATOM | m_PPRO | m_CORE2 | m_GENERIC,
    13751453
    13761454  /* X86_TUNE_SHIFT1 */
    13771455  ~m_486,
     
    13801458  m_AMD_MULTIPLE,
    13811459
    13821460  /* X86_TUNE_INTER_UNIT_MOVES */
    1383   ~(m_AMD_MULTIPLE | m_GENERIC),
     1461  ~(m_AMD_MULTIPLE | m_ATOM | m_GENERIC),
    13841462
    13851463  /* X86_TUNE_INTER_UNIT_CONVERSIONS */
    13861464  ~(m_AMDFAM10),
    13871465
    13881466  /* X86_TUNE_FOUR_JUMP_LIMIT: Some CPU cores are not able to predict more
    13891467     than 4 branch instructions in the 16 byte window.  */
    1390   m_PPRO | m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
     1468  m_ATOM | m_PPRO | m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_CORE2
     1469  | m_GENERIC,
    13911470
    13921471  /* X86_TUNE_SCHEDULE */
    1393   m_PPRO | m_AMD_MULTIPLE | m_K6_GEODE | m_PENT | m_CORE2 | m_GENERIC,
     1472  m_PPRO | m_AMD_MULTIPLE | m_K6_GEODE | m_PENT | m_ATOM | m_CORE2
     1473  | m_GENERIC,
    13941474
    13951475  /* X86_TUNE_USE_BT */
    1396   m_AMD_MULTIPLE | m_CORE2 | m_GENERIC,
     1476  m_AMD_MULTIPLE | m_ATOM | m_CORE2 | m_GENERIC,
    13971477
    13981478  /* X86_TUNE_USE_INCDEC */
    1399   ~(m_PENT4 | m_NOCONA | m_GENERIC),
     1479  ~(m_PENT4 | m_NOCONA | m_GENERIC | m_ATOM),
    14001480
    14011481  /* X86_TUNE_PAD_RETURNS */
    14021482  m_AMD_MULTIPLE | m_CORE2 | m_GENERIC,
    14031483
    14041484  /* X86_TUNE_EXT_80387_CONSTANTS */
    1405   m_K6_GEODE | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC,
     1485  m_K6_GEODE | m_ATHLON_K8 | m_ATOM | m_PENT4 | m_NOCONA | m_PPRO
     1486  | m_CORE2 | m_GENERIC,
    14061487
    14071488  /* X86_TUNE_SHORTEN_X87_SSE */
    14081489  ~m_K8,
     
    14471528     with a subsequent conditional jump instruction into a single
    14481529     compare-and-branch uop.  */
    14491530  m_CORE2,
     1531
     1532  /* X86_TUNE_OPT_AGU: Optimize for Address Generation Unit. This flag
     1533     will impact LEA instruction selection. */
     1534  m_ATOM,
    14501535};
    14511536
    14521537/* Feature tests against the various architecture variations.  */
     
    14721557};
    14731558
    14741559static const unsigned int x86_accumulate_outgoing_args
    1475   = m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC;
     1560  = m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2
     1561    | m_GENERIC;
    14761562
    14771563static const unsigned int x86_arch_always_fancy_math_387
    1478   = m_PENT | m_PPRO | m_AMD_MULTIPLE | m_PENT4
     1564  = m_PENT | m_ATOM | m_PPRO | m_AMD_MULTIPLE | m_PENT4
    14791565    | m_NOCONA | m_CORE2 | m_GENERIC;
    14801566
    14811567static enum stringop_alg stringop_alg = no_stringop;
     
    17431829/* Alignment for incoming stack boundary in bits.  */
    17441830unsigned int ix86_incoming_stack_boundary;
    17451831
     1832/* The abi used by target.  */
     1833enum calling_abi ix86_abi;
     1834
    17461835/* Values 1-5: see jump.c */
    17471836int ix86_branch_cost;
    17481837
     
    18191908static bool ix86_can_inline_p (tree, tree);
    18201909static void ix86_set_current_function (tree);
    18211910
     1911static enum calling_abi ix86_function_abi (const_tree);
     1912
    18221913
    18231914
    18241915/* The svr4 ABI for the i386 says that records and unions are returned