Ticket #4204: intel-atom-optimization.diff
File intel-atom-optimization.diff, 124.5 KB (added by , 15 years ago) |
---|
-
gcc/doc/invoke.texi
574 574 -mno-wide-multiply -mrtd -malign-double @gol 575 575 -mpreferred-stack-boundary=@var{num} 576 576 -mincoming-stack-boundary=@var{num} 577 -mcld -mcx16 -msahf -m recip @gol577 -mcld -mcx16 -msahf -mmovbe -mrecip @gol 578 578 -mmmx -msse -msse2 -msse3 -mssse3 -msse4.1 -msse4.2 -msse4 -mavx @gol 579 579 -maes -mpclmul @gol 580 580 -msse4a -m3dnow -mpopcnt -mabm -msse5 @gol … … 584 584 -m96bit-long-double -mregparm=@var{num} -msseregparm @gol 585 585 -mveclibabi=@var{type} -mpc32 -mpc64 -mpc80 -mstackrealign @gol 586 586 -momit-leaf-frame-pointer -mno-red-zone -mno-tls-direct-seg-refs @gol 587 -mcmodel=@var{code-model} @gol587 -mcmodel=@var{code-model} -mabi=@var{name} @gol 588 588 -m32 -m64 -mlarge-data-threshold=@var{num} @gol 589 589 -mfused-madd -mno-fused-madd -msse2avx} 590 590 … … 10959 10959 @item core2 10960 10960 Intel Core2 CPU with 64-bit extensions, MMX, SSE, SSE2, SSE3 and SSSE3 10961 10961 instruction set support. 10962 @item atom 10963 Intel Atom CPU with 64-bit extensions, MMX, SSE, SSE2, SSE3 and SSSE3 10964 instruction set support. 10962 10965 @item k6 10963 10966 AMD K6 CPU with MMX instruction set support. 10964 10967 @item k6-2, k6-3 … … 11358 11361 In 64-bit mode, SAHF instruction is used to optimize @code{fmod}, @code{drem} 11359 11362 or @code{remainder} built-in functions: see @ref{Other Builtins} for details. 11360 11363 11364 @item -mmovbe 11365 @opindex mmovbe 11366 This option will enable GCC to use movbe instruction to implement 11367 @code{__builtin_bswap32} and @code{__builtin_bswap64}. 11368 11361 11369 @item -mrecip 11362 11370 @opindex mrecip 11363 11371 This option will enable GCC to use RCPSS and RSQRTSS instructions (and their … … 11394 11402 @option{-funsafe-math-optimizations} have to be enabled. A SVML or ACML ABI 11395 11403 compatible library will have to be specified at link time. 11396 11404 11405 @item -mabi=@var{name} 11406 @opindex mabi 11407 Generate code for the specified calling convention. Permissible values 11408 are: @samp{sysv} for the ABI used on GNU/Linux and other systems and 11409 @samp{ms} for the Microsoft ABI. The default is to use the Microsoft 11410 ABI when targeting Windows. On all other systems, the default is the 11411 SYSV ABI. You can control this behavior for a specific function by 11412 using the function attribute @samp{ms_abi}/@samp{sysv_abi}. 11413 @xref{Function Attributes}. 11414 11397 11415 @item -mpush-args 11398 11416 @itemx -mno-push-args 11399 11417 @opindex mpush-args -
gcc/doc/md.texi
7504 7504 recognize complicated bypasses, e.g.@: when the consumer is only an address 7505 7505 of insn @samp{store} (not a stored value). 7506 7506 7507 If there are more one bypass with the same output and input insns, the 7508 chosen bypass is the first bypass with a guard in description whose 7509 guard function returns nonzero. If there is no such bypass, then 7510 bypass without the guard function is chosen. 7511 7507 7512 @findex exclusion_set 7508 7513 @findex presence_set 7509 7514 @findex final_presence_set -
gcc/genautomata.c
1 1 /* Pipeline hazard description translator. 2 Copyright (C) 2000, 2001, 2002, 2003, 2004, 2005, 2007, 2008 2 Copyright (C) 2000, 2001, 2002, 2003, 2004, 2005, 2007, 2008, 2009 3 3 Free Software Foundation, Inc. 4 4 5 5 Written by Vladimir Makarov <vmakarov@redhat.com> … … 22 22 23 23 /* References: 24 24 25 1. Detecting pipeline structural hazards quickly. T. Proebsting, 25 1. The finite state automaton based pipeline hazard recognizer and 26 instruction scheduler in GCC. V. Makarov. Proceedings of GCC 27 summit, 2003. 28 29 2. Detecting pipeline structural hazards quickly. T. Proebsting, 26 30 C. Fraser. Proceedings of ACM SIGPLAN-SIGACT Symposium on 27 31 Principles of Programming Languages, pages 280--286, 1994. 28 32 29 33 This article is a good start point to understand usage of finite 30 34 state automata for pipeline hazard recognizers. But I'd 31 recommend the 2nd article for more deep understanding.35 recommend the 1st and 3rd article for more deep understanding. 32 36 33 2. Efficient Instruction Scheduling Using Finite State Automata:37 3. Efficient Instruction Scheduling Using Finite State Automata: 34 38 V. Bala and N. Rubin, Proceedings of MICRO-28. This is the best 35 39 article about usage of finite state automata for pipeline hazard 36 40 recognizers. 37 41 38 The current implementation is d ifferent from the 2nd article in the39 following:42 The current implementation is described in the 1st article and it 43 is different from the 3rd article in the following: 40 44 41 45 1. New operator `|' (alternative) is permitted in functional unit 42 46 reservation which can be treated deterministically and … … 463 467 insn. */ 464 468 int insn_num; 465 469 /* The following field value is list of bypasses in which given insn 466 is output insn. */ 470 is output insn. Bypasses with the same input insn stay one after 471 another in the list in the same order as their occurrences in the 472 description but the bypass without a guard stays always the last 473 in a row of bypasses with the same input insn. */ 467 474 struct bypass_decl *bypass_list; 468 475 469 476 /* The following fields are defined by automaton generator. */ … … 2367 2374 } 2368 2375 2369 2376 2370 /* The function searches for bypass with given IN_INSN_RESERV in given 2371 BYPASS_LIST. */ 2372 static struct bypass_decl * 2373 find_bypass (struct bypass_decl *bypass_list, 2374 struct insn_reserv_decl *in_insn_reserv) 2377 /* The function inserts BYPASS in the list of bypasses of the 2378 corresponding output insn. The order of bypasses in the list is 2379 decribed in a comment for member `bypass_list' (see above). If 2380 there is already the same bypass in the list the function reports 2381 this and does nothing. */ 2382 static void 2383 insert_bypass (struct bypass_decl *bypass) 2375 2384 { 2376 struct bypass_decl *bypass; 2377 2378 for (bypass = bypass_list; bypass != NULL; bypass = bypass->next) 2379 if (bypass->in_insn_reserv == in_insn_reserv) 2380 break; 2381 return bypass; 2385 struct bypass_decl *curr, *last; 2386 struct insn_reserv_decl *out_insn_reserv = bypass->out_insn_reserv; 2387 struct insn_reserv_decl *in_insn_reserv = bypass->in_insn_reserv; 2388 2389 for (curr = out_insn_reserv->bypass_list, last = NULL; 2390 curr != NULL; 2391 last = curr, curr = curr->next) 2392 if (curr->in_insn_reserv == in_insn_reserv) 2393 { 2394 if ((bypass->bypass_guard_name != NULL 2395 && curr->bypass_guard_name != NULL 2396 && ! strcmp (bypass->bypass_guard_name, curr->bypass_guard_name)) 2397 || bypass->bypass_guard_name == curr->bypass_guard_name) 2398 { 2399 if (bypass->bypass_guard_name == NULL) 2400 { 2401 if (!w_flag) 2402 error ("the same bypass `%s - %s' is already defined", 2403 bypass->out_insn_name, bypass->in_insn_name); 2404 else 2405 warning (0, "the same bypass `%s - %s' is already defined", 2406 bypass->out_insn_name, bypass->in_insn_name); 2407 } 2408 else if (!w_flag) 2409 error ("the same bypass `%s - %s' (guard %s) is already defined", 2410 bypass->out_insn_name, bypass->in_insn_name, 2411 bypass->bypass_guard_name); 2412 else 2413 warning 2414 (0, "the same bypass `%s - %s' (guard %s) is already defined", 2415 bypass->out_insn_name, bypass->in_insn_name, 2416 bypass->bypass_guard_name); 2417 return; 2418 } 2419 if (curr->bypass_guard_name == NULL) 2420 break; 2421 if (curr->next == NULL || curr->next->in_insn_reserv != in_insn_reserv) 2422 { 2423 last = curr; 2424 break; 2425 } 2426 2427 } 2428 if (last == NULL) 2429 { 2430 bypass->next = out_insn_reserv->bypass_list; 2431 out_insn_reserv->bypass_list = bypass; 2432 } 2433 else 2434 { 2435 bypass->next = last->next; 2436 last->next = bypass; 2437 } 2382 2438 } 2383 2439 2384 2440 /* The function processes pipeline description declarations, checks … … 2391 2447 decl_t decl_in_table; 2392 2448 decl_t out_insn_reserv; 2393 2449 decl_t in_insn_reserv; 2394 struct bypass_decl *bypass;2395 2450 int automaton_presence; 2396 2451 int i; 2397 2452 … … 2514 2569 = DECL_INSN_RESERV (out_insn_reserv); 2515 2570 DECL_BYPASS (decl)->in_insn_reserv 2516 2571 = DECL_INSN_RESERV (in_insn_reserv); 2517 bypass 2518 = find_bypass (DECL_INSN_RESERV (out_insn_reserv)->bypass_list, 2519 DECL_BYPASS (decl)->in_insn_reserv); 2520 if (bypass != NULL) 2521 { 2522 if (DECL_BYPASS (decl)->latency == bypass->latency) 2523 { 2524 if (!w_flag) 2525 error 2526 ("the same bypass `%s - %s' is already defined", 2527 DECL_BYPASS (decl)->out_insn_name, 2528 DECL_BYPASS (decl)->in_insn_name); 2529 else 2530 warning 2531 (0, "the same bypass `%s - %s' is already defined", 2532 DECL_BYPASS (decl)->out_insn_name, 2533 DECL_BYPASS (decl)->in_insn_name); 2534 } 2535 else 2536 error ("bypass `%s - %s' is already defined", 2537 DECL_BYPASS (decl)->out_insn_name, 2538 DECL_BYPASS (decl)->in_insn_name); 2539 } 2540 else 2541 { 2542 DECL_BYPASS (decl)->next 2543 = DECL_INSN_RESERV (out_insn_reserv)->bypass_list; 2544 DECL_INSN_RESERV (out_insn_reserv)->bypass_list 2545 = DECL_BYPASS (decl); 2546 } 2572 insert_bypass (DECL_BYPASS (decl)); 2547 2573 } 2548 2574 } 2549 2575 } … … 8159 8185 (advance_cycle_insn_decl)->insn_num)); 8160 8186 fprintf (output_file, " case %d:\n", 8161 8187 bypass->in_insn_reserv->insn_num); 8162 if (bypass->bypass_guard_name == NULL) 8163 fprintf (output_file, " return %d;\n", 8164 bypass->latency); 8165 else 8188 for (;;) 8166 8189 { 8167 fprintf (output_file, 8168 " if (%s (%s, %s))\n", 8169 bypass->bypass_guard_name, INSN_PARAMETER_NAME, 8170 INSN2_PARAMETER_NAME); 8171 fprintf (output_file, 8172 " return %d;\n break;\n", 8173 bypass->latency); 8190 if (bypass->bypass_guard_name == NULL) 8191 { 8192 gcc_assert (bypass->next == NULL 8193 || (bypass->in_insn_reserv 8194 != bypass->next->in_insn_reserv)); 8195 fprintf (output_file, " return %d;\n", 8196 bypass->latency); 8197 } 8198 else 8199 { 8200 fprintf (output_file, 8201 " if (%s (%s, %s))\n", 8202 bypass->bypass_guard_name, INSN_PARAMETER_NAME, 8203 INSN2_PARAMETER_NAME); 8204 fprintf (output_file, " return %d;\n", 8205 bypass->latency); 8206 } 8207 if (bypass->next == NULL 8208 || bypass->in_insn_reserv != bypass->next->in_insn_reserv) 8209 break; 8210 bypass = bypass->next; 8174 8211 } 8212 if (bypass->bypass_guard_name != NULL) 8213 fprintf (output_file, " break;\n"); 8175 8214 } 8176 8215 fputs (" }\n break;\n", output_file); 8177 8216 } -
gcc/testsuite/gcc.target/i386/movbe-1.c
1 /* { dg-do compile } */ 2 /* { dg-options "-O2 -mmovbe" } */ 3 4 extern int x; 5 6 void 7 foo (int i) 8 { 9 x = __builtin_bswap32 (i); 10 } 11 12 int 13 bar () 14 { 15 return __builtin_bswap32 (x); 16 } 17 18 /* { dg-final { scan-assembler-times "movbe\[ \t\]" 2 } } */ -
gcc/testsuite/gcc.target/i386/movbe-2.c
1 /* { dg-do compile } */ 2 /* { dg-options "-O2 -mmovbe" } */ 3 4 extern long long x; 5 6 void 7 foo (long long i) 8 { 9 x = __builtin_bswap64 (i); 10 } 11 12 long long 13 bar () 14 { 15 return __builtin_bswap64 (x); 16 } 17 18 /* { dg-final { scan-assembler-times "movbe\[ \t\]" 4 { target ilp32 } } } */ 19 /* { dg-final { scan-assembler-times "movbe\[ \t\]" 2 { target lp64 } } } */ -
gcc/testsuite/gcc.target/x86_64/abi/callabi/vaarg-4a.c
1 /* Test for cross x86_64<->w64 abi va_list calls. */ 2 /* { dg-do run { target i?86-*-linux* x86_64-*-linux* } } */ 3 /* { dg-options "-O2 -mabi=ms -std=gnu99 -fno-builtin" } */ 4 /* { dg-additional-sources "vaarg-4b.c" } */ 5 6 extern __SIZE_TYPE__ __attribute__ ((sysv_abi)) strlen (const char *); 7 extern int __attribute__ ((sysv_abi)) sprintf (char *,const char *, ...); 8 extern void __attribute__ ((sysv_abi)) abort (void); 9 10 extern void do_cpy (char *, ...); 11 12 int __attribute__ ((sysv_abi)) 13 main () 14 { 15 char s[256]; 16 17 do_cpy (s, "1","2","3","4", "5", "6", "7", ""); 18 19 if (s[0] != '1' || s[1] !='2' || s[2] != '3' || s[3] != '4' 20 || s[4] != '5' || s[5] != '6' || s[6] != '7' || s[7] != 0) 21 abort (); 22 23 return 0; 24 } -
gcc/testsuite/gcc.target/x86_64/abi/callabi/vaarg-4b.c
1 /* Test for cross x86_64<->w64 abi va_list calls. */ 2 /* { dg-options "-O2 -mabi=ms -std=gnu99 -fno-builtin" } */ 3 4 #include <stdarg.h> 5 6 extern __SIZE_TYPE__ __attribute__ ((sysv_abi)) strlen (const char *); 7 extern int __attribute__ ((sysv_abi)) sprintf (char *, const char *, ...); 8 9 static void 10 vdo_cpy (char *s, va_list argp) 11 { 12 __SIZE_TYPE__ len; 13 char *r = s; 14 char *e; 15 *r = 0; 16 for (;;) { 17 e = va_arg (argp, char *); 18 if (*e == 0) break; 19 sprintf (r,"%s", e); 20 r += strlen (r); 21 } 22 } 23 24 void 25 do_cpy (char *s, ...) 26 { 27 va_list argp; 28 va_start (argp, s); 29 vdo_cpy (s, argp); 30 va_end (argp); 31 } -
gcc/testsuite/gcc.target/x86_64/abi/callabi/vaarg-5a.c
1 /* Test for cross x86_64<->w64 abi va_list calls. */ 2 /* { dg-do run { target i?86-*-linux* x86_64-*-linux* } } */ 3 /* { dg-options "-O2 -mabi=ms -std=gnu99 -fno-builtin" } */ 4 /* { dg-additional-sources "vaarg-5b.c" } */ 5 6 extern void __attribute__ ((sysv_abi)) abort (void); 7 extern int fct2 (int, ...); 8 9 #define SZ_ARGS 1ll,2ll,3ll,4ll,5ll,6ll,7ll,0ll 10 11 int __attribute__ ((sysv_abi)) 12 main() 13 { 14 if (fct2 (-1, SZ_ARGS) != 0) 15 abort (); 16 return 0; 17 } -
gcc/testsuite/gcc.target/x86_64/abi/callabi/vaarg-5b.c
1 /* Test for cross x86_64<->w64 abi va_list calls. */ 2 /* { dg-options "-O2 -mabi=ms -std=gnu99 -fno-builtin" } */ 3 4 #include <stdarg.h> 5 6 #define SZ_ARGS 1ll,2ll,3ll,4ll,5ll,6ll,7ll,0ll 7 8 static int __attribute__ ((sysv_abi)) 9 fct1 (va_list argp, ...) 10 { 11 long long p1,p2; 12 int ret = 1; 13 __builtin_sysv_va_list argp_2; 14 15 __builtin_sysv_va_start (argp_2, argp); 16 do { 17 p1 = va_arg (argp_2, long long); 18 p2 = va_arg (argp, long long); 19 if (p1 != p2) 20 ret = 0; 21 } while (ret && p1 != 0); 22 __builtin_sysv_va_end (argp_2); 23 24 return ret; 25 } 26 27 int 28 fct2 (int dummy, ...) 29 { 30 va_list argp; 31 int ret = dummy; 32 33 va_start (argp, dummy); 34 ret += fct1 (argp, SZ_ARGS); 35 va_end (argp); 36 return ret; 37 } -
gcc/testsuite/gcc.target/x86_64/abi/callabi/func-indirect-2a.c
1 /* Test for cross x86_64<->w64 abi standard calls via variable. */ 2 /* { dg-do run { target i?86-*-linux* x86_64-*-linux* } } */ 3 /* { dg-options "-O2 -mabi=ms -std=gnu99 -ffast-math -fno-builtin" } */ 4 /* { dg-additional-sources "func-indirect-2b.c" } */ 5 6 extern void __attribute__ ((sysv_abi)) abort (void); 7 typedef int (*func)(void *, char *, char *, short, long long); 8 extern func get_callback (void); 9 10 int __attribute__ ((sysv_abi)) 11 main () 12 { 13 func callme = get_callback (); 14 if (callme (0, 0, 0, 0x1234, 0x1234567890abcdefLL)) 15 abort (); 16 return 0; 17 } -
gcc/testsuite/gcc.target/x86_64/abi/callabi/func-2a.c
1 /* Test for cross x86_64<->w64 abi standard calls. */ 2 /* { dg-do run { target i?86-*-linux* x86_64-*-linux* } } */ 3 /* { dg-options "-O2 -mabi=ms -std=gnu99 -ffast-math -fno-builtin" } */ 4 /* { dg-additional-sources "func-2b.c" } */ 5 6 extern void __attribute__ ((sysv_abi)) abort (void); 7 long double func_cross (long double, double, float, long, int, char); 8 9 long double __attribute__ ((sysv_abi)) 10 func_native (long double a, double b, float c, long d, int e, char f) 11 { 12 long double ret; 13 ret = a + (long double) b + (long double) c; 14 ret *= (long double) (d + (long) e); 15 if (f>0) 16 ret += func_native (a,b,c,d,e,-f); 17 return ret; 18 } 19 20 int __attribute__ ((sysv_abi)) 21 main () 22 { 23 if (func_cross (1.0,2.0,3.0,1,2,3) 24 != func_native (1.0,2.0,3.0,1,2,3)) 25 abort (); 26 return 0; 27 } -
gcc/testsuite/gcc.target/x86_64/abi/callabi/func-indirect-2b.c
1 /* Test for cross x86_64<->w64 abi standard calls via variable. */ 2 /* { dg-options "-O2 -mabi=ms -std=gnu99 -ffast-math -fno-builtin" } */ 3 4 typedef int (*func)(void *, char *, char *, short, long long); 5 6 static int 7 callback (void *ptr, char *string1, char *string2, short number, 8 long long rand) 9 { 10 if (ptr != 0 11 || string1 != 0 12 || string2 != 0 13 || number != 0x1234 14 || rand != 0x1234567890abcdefLL) 15 return 1; 16 else 17 return 0; 18 } 19 20 func 21 get_callback (void) 22 { 23 return callback; 24 } -
gcc/testsuite/gcc.target/x86_64/abi/callabi/func-2b.c
1 /* Test for cross x86_64<->w64 abi standard calls. */ 2 /* { dg-options "-mabi=ms -std=gnu99 -ffast-math -fno-builtin" } */ 3 4 long double func_cross (long double a, double b, float c, long d, int e, 5 char f) 6 { 7 long double ret; 8 ret = a + (long double) b + (long double) c; 9 ret *= (long double) (d + (long) e); 10 if (f>0) 11 ret += func_cross (a,b,c,d,e,-f); 12 return ret; 13 } -
gcc/testsuite/ChangeLog.ix86
1 2009-05-21 H.J. Lu <hongjiu.lu@intel.com> 2 3 Backport from mainline: 4 2009-05-21 H.J. Lu <hongjiu.lu@intel.com> 5 6 * gcc.target/i386/movbe-1.c: New. 7 * gcc.target/i386/movbe-2.c: Likewise. 8 9 2009-03-27 H.J. Lu <hongjiu.lu@intel.com> 10 11 Backport from mainline: 12 2009-03-27 H.J. Lu <hongjiu.lu@intel.com> 13 14 PR target/39472 15 * gcc.target/x86_64/abi/callabi/func-2a.c: New. 16 * gcc.target/x86_64/abi/callabi/func-2b.c: Likewise. 17 * gcc.target/x86_64/abi/callabi/func-indirect-2a.c: Likewise. 18 * gcc.target/x86_64/abi/callabi/func-indirect-2b.c: Likewise. 19 * gcc.target/x86_64/abi/callabi/vaarg-4a.c: Likewise. 20 * gcc.target/x86_64/abi/callabi/vaarg-4b.c: Likewise. 21 * gcc.target/x86_64/abi/callabi/vaarg-5a.c: Likewise. 22 * gcc.target/x86_64/abi/callabi/vaarg-5b.c: Likewise. -
gcc/rtl.def
1088 1088 guard for the bypass. The function will get the two insns as 1089 1089 parameters. If the function returns zero the bypass will be 1090 1090 ignored for this case. Additional guard is necessary to recognize 1091 complicated bypasses, e.g. when consumer is load address. */ 1091 complicated bypasses, e.g. when consumer is load address. If there 1092 are more one bypass with the same output and input insns, the 1093 chosen bypass is the first bypass with a guard in description whose 1094 guard function returns nonzero. If there is no such bypass, then 1095 bypass without the guard function is chosen. */ 1092 1096 DEF_RTL_EXPR(DEFINE_BYPASS, "define_bypass", "issS", RTX_EXTRA) 1093 1097 1094 1098 /* (define_automaton string) describes names of automata generated and -
gcc/ChangeLog.ix86
1 2009-05-21 H.J. Lu <hongjiu.lu@intel.com> 2 3 Backport from mainline: 4 2009-05-21 H.J. Lu <hongjiu.lu@intel.com> 5 Uros Bizjak <ubizjak@gmail.com> 6 7 * config/i386/cpuid.h (bit_MOVBE): New. 8 9 * config/i386/driver-i386.c (host_detect_local_cpu): Check movbe. 10 11 * config/i386/i386.c (OPTION_MASK_ISA_MOVBE_SET): New. 12 (OPTION_MASK_ISA_MOVBE_UNSET): Likewise. 13 (ix86_handle_option): Handle OPT_mmovbe. 14 (ix86_target_string): Add -mmovbe. 15 (pta_flags): Add PTA_MOVBE. 16 (processor_alias_table): Add PTA_MOVBE to "atom". 17 (override_options): Handle PTA_MOVBE. 18 19 * config/i386/i386.h (TARGET_MOVBE): New. 20 21 * config/i386/i386.md (bswapsi2): Check TARGET_MOVBE. 22 (*bswapsi_movbe): New. 23 (*bswapdi_movbe): Likewise. 24 (bswapdi2): Renamed to ... 25 (*bswapdi_1): This. 26 (bswapdi2): New expander. 27 28 * config/i386/i386.opt (mmovbe): New. 29 30 * doc/invoke.texi: Document -mmovbe. 31 32 2009-05-20 H.J. Lu <hongjiu.lu@intel.com> 33 34 Backport from mainline: 35 2009-05-20 H.J. Lu <hongjiu.lu@intel.com> 36 37 * config/i386/driver-i386.c (host_detect_local_cpu): Check 38 extended family and model for Intel processors. Support Intel 39 Atom. 40 41 2009-04-20 H.J. Lu <hongjiu.lu@intel.com> 42 43 Backport from mainline: 44 2009-04-20 Joey Ye <joey.ye@intel.com> 45 Xuepeng Guo <xuepeng.guo@intel.com> 46 H.J. Lu <hongjiu.lu@intel.com> 47 48 * config/i386/atom.md: Add bypasses with ix86_dep_by_shift_count. 49 50 * config/i386/i386.c (LEA_SEARCH_THRESHOLD): New macro. 51 (IX86_LEA_PRIORITY): Likewise. 52 (distance_non_agu_define): New function. 53 (distance_agu_use): Likewise. 54 (ix86_lea_for_add_ok): Likewise. 55 (ix86_dep_by_shift_count): Likewise. 56 57 * config/i386/i386.md: Call ix86_lea_for_add_ok to decide we 58 should split for LEA. 59 60 * config/i386/i386-protos.h (ix86_lea_for_add_ok): Declare new 61 function. 62 (ix86_dep_by_shift_count): Likewise. 63 64 2009-04-07 H.J. Lu <hongjiu.lu@intel.com> 65 66 Backport from mainline: 67 2009-04-07 H.J. Lu <hongjiu.lu@intel.com> 68 69 * doc/invoke.texi: Document Atom support. 70 71 2009-04-06 H.J. Lu <hongjiu.lu@intel.com> 72 73 * config/i386/i386.md: Revert 2 accidental checkins. 74 75 2009-04-06 H.J. Lu <hongjiu.lu@intel.com> 76 77 Backport from mainline: 78 2009-04-06 Joey Ye <joey.ye@intel.com> 79 Xuepeng Guo <xuepeng.guo@intel.com> 80 H.J. Lu <hongjiu.lu@intel.com> 81 82 Atom pipeline model, tuning and insn selection. 83 * config.gcc (atom): Add atom config options and target. 84 85 * config/i386/atom.md: New. 86 87 * config/i386/i386.c (atom_cost): New cost. 88 (m_ATOM): New macro flag. 89 (initial_ix86_tune_features): Set m_ATOM. 90 (x86_accumulate_outgoing_args): Likewise. 91 (x86_arch_always_fancy_math_387): Likewise. 92 (processor_target): Add Atom cost. 93 (cpu_names): Add Atom cpu name. 94 (override_options): Set Atom ISA. 95 (ix86_issue_rate): New case PROCESSOR_ATOM. 96 (ix86_adjust_cost): Likewise. 97 98 * config/i386/i386.h (TARGET_ATOM): New target macro. 99 (ix86_tune_indices): Add X86_TUNE_OPT_AGU. 100 (TARGET_OPT_AGU): New target option. 101 (target_cpu_default): Add TARGET_CPU_DEFAULT_atom. 102 (processor_type): Add PROCESSOR_ATOM. 103 104 * config/i386/i386.md (cpu): Add new value "atom". 105 (use_carry, movu): New attr. 106 (atom.md): Include atom.md. 107 (adddi3_carry_rex64): Set attr "use_carry". 108 (addqi3_carry): Likewise. 109 (addhi3_carry): Likewise. 110 (addsi3_carry): Likewise. 111 (*addsi3_carry_zext): Likewise. 112 (subdi3_carry_rex64): Likewise. 113 (subqi3_carry): Likewise. 114 (subhi3_carry): Likewise. 115 (subsi3_carry): Likewise. 116 (x86_movdicc_0_m1_rex64): Likewise. 117 (*x86_movdicc_0_m1_se): Likewise. 118 (x86_movsicc_0_m1): Likewise. 119 (*x86_movsicc_0_m1_se): Likewise. 120 (*adddi_1_rex64): Emit add insn as much as possible. 121 (*addsi_1): Likewise. 122 (return_internal): Set atom_unit. 123 (return_internal_long): Likewise. 124 (return_pop_internal): Likewise. 125 (*rcpsf2_sse): Set atom_sse_attr attr. 126 (*qrt<mode>2_sse): Likewise. 127 128 2009-04-02 H.J. Lu <hongjiu.lu@intel.com> 129 130 Backport from mainline: 131 2009-04-02 H.J. Lu <hongjiu.lu@intel.com> 132 133 * config/i386/i386.c (ix86_abi): Move initialization to ... 134 (override_options): Here. 135 136 2009-03-29 H.J. Lu <hongjiu.lu@intel.com> 137 138 Backport from mainline: 139 2009-03-29 H.J. Lu <hongjiu.lu@intel.com> 140 141 * config/i386/i386-protos.h (ix86_agi_dependent): New. 142 143 * config/i386/i386.c (ix86_agi_dependent): Rewrite. 144 (ix86_adjust_cost): Updated. 145 146 2009-03-27 H.J. Lu <hongjiu.lu@intel.com> 147 148 Backport from mainline: 149 2009-03-27 H.J. Lu <hongjiu.lu@intel.com> 150 151 PR target/39472 152 * config/i386/i386.c (ix86_abi): New. 153 (override_options): Handle -mabi=. 154 (ix86_function_arg_regno_p): Replace DEFAULT_ABI with 155 ix86_abi. 156 (ix86_call_abi_override): Likewise. 157 (init_cumulative_args): Likewise. 158 (function_arg_advance): Likewise. 159 (function_arg_64): Likewise. 160 (function_arg): Likewise. 161 (ix86_pass_by_reference): Likewise. 162 (ix86_function_value_regno_p): Likewise. 163 (ix86_build_builtin_va_list_abi): Likewise. 164 (setup_incoming_varargs_64): Likewise. 165 (is_va_list_char_pointer): Likewise. 166 (ix86_init_machine_status): Likewise. 167 (ix86_reg_parm_stack_space): Use enum calling_abi on 168 call_abi. 169 (ix86_function_type_abi): Return enum calling_abi. Rewrite 170 for 64bit. Replace DEFAULT_ABI with ix86_abi. 171 (ix86_function_abi): Make it static and return enum 172 calling_abi. 173 (ix86_cfun_abi): Return enum calling_abi. Replace DEFAULT_ABI 174 with ix86_abi. 175 (ix86_fn_abi_va_list): Updated. 176 177 * config/i386/i386.h (ix86_abi): New. 178 (STACK_BOUNDARY): Replace DEFAULT_ABI with ix86_abi. 179 (CONDITIONAL_REGISTER_USAGE): Likewise. 180 (CUMULATIVE_ARGS): Change call_abi type to enum calling_abi. 181 (machine_function): Likewise. 182 183 * config/i386/i386.md (untyped_call): Replace DEFAULT_ABI 184 with ix86_abi. 185 * config/i386/cygming.h (TARGET_64BIT_MS_ABI): Likewise. 186 (STACK_BOUNDARY): Likewise. 187 * config/i386/mingw32.h (EXTRA_OS_CPP_BUILTINS): Likewise. 188 189 * config/i386/i386.opt (mabi=): New. 190 191 * config/i386/i386-protos.h (ix86_cfun_abi): Changed to 192 return enum calling_abi. 193 (ix86_function_type_abi): Likewise. 194 (ix86_function_abi): Removed. 195 196 2009-03-27 H.J. Lu <hongjiu.lu@intel.com> 197 198 Backport from mainline: 199 2009-03-27 Vladimir Makarov <vmakarov@redhat.com> 200 201 * genautomata.c: Add a new year to the copyright. Add a new 202 reference. 203 (struct insn_reserv_decl): Add comments for member bypass_list. 204 (find_bypass): Remove. 205 (insert_bypass): New. 206 (process_decls): Use insert_bypass. 207 (output_internal_insn_latency_func): Output all bypasses with the 208 same input insn in one switch case. 209 210 * rtl.def (define_bypass): Describe bypass choice. 211 * doc/md.texi (define_bypass): Ditto. -
gcc/config.gcc
1088 1088 tmake_file="${tmake_file} i386/t-linux64" 1089 1089 need_64bit_hwint=yes 1090 1090 case X"${with_cpu}" in 1091 Xgeneric|X core2|Xnocona|Xx86-64|Xamdfam10|Xbarcelona|Xk8|Xopteron|Xathlon64|Xathlon-fx)1091 Xgeneric|Xatom|Xcore2|Xnocona|Xx86-64|Xamdfam10|Xbarcelona|Xk8|Xopteron|Xathlon64|Xathlon-fx) 1092 1092 ;; 1093 1093 X) 1094 1094 if test x$with_cpu_64 = x; then … … 1097 1097 ;; 1098 1098 *) 1099 1099 echo "Unsupported CPU used in --with-cpu=$with_cpu, supported values:" 1>&2 1100 echo "generic core2 nocona x86-64 amdfam10 barcelona k8 opteron athlon64 athlon-fx" 1>&21100 echo "generic atom core2 nocona x86-64 amdfam10 barcelona k8 opteron athlon64 athlon-fx" 1>&2 1101 1101 exit 1 1102 1102 ;; 1103 1103 esac … … 1202 1202 # libgcc/configure.ac instead. 1203 1203 need_64bit_hwint=yes 1204 1204 case X"${with_cpu}" in 1205 Xgeneric|X core2|Xnocona|Xx86-64|Xamdfam10|Xbarcelona|Xk8|Xopteron|Xathlon64|Xathlon-fx)1205 Xgeneric|Xatom|Xcore2|Xnocona|Xx86-64|Xamdfam10|Xbarcelona|Xk8|Xopteron|Xathlon64|Xathlon-fx) 1206 1206 ;; 1207 1207 X) 1208 1208 if test x$with_cpu_64 = x; then … … 1211 1211 ;; 1212 1212 *) 1213 1213 echo "Unsupported CPU used in --with-cpu=$with_cpu, supported values:" 1>&2 1214 echo "generic core2 nocona x86-64 amdfam10 barcelona k8 opteron athlon64 athlon-fx" 1>&21214 echo "generic atom core2 nocona x86-64 amdfam10 barcelona k8 opteron athlon64 athlon-fx" 1>&2 1215 1215 exit 1 1216 1216 ;; 1217 1217 esac … … 2805 2805 esac 2806 2806 # OK 2807 2807 ;; 2808 "" | amdfam10 | barcelona | k8 | opteron | athlon64 | athlon-fx | nocona | core2 | generic)2808 "" | amdfam10 | barcelona | k8 | opteron | athlon64 | athlon-fx | nocona | core2 | atom | generic) 2809 2809 # OK 2810 2810 ;; 2811 2811 *) -
gcc/config/i386/i386.h
59 59 #define TARGET_ABM OPTION_ISA_ABM 60 60 #define TARGET_POPCNT OPTION_ISA_POPCNT 61 61 #define TARGET_SAHF OPTION_ISA_SAHF 62 #define TARGET_MOVBE OPTION_ISA_MOVBE 62 63 #define TARGET_AES OPTION_ISA_AES 63 64 #define TARGET_PCLMUL OPTION_ISA_PCLMUL 64 65 #define TARGET_CMPXCHG16B OPTION_ISA_CX16 … … 236 237 #define TARGET_GENERIC64 (ix86_tune == PROCESSOR_GENERIC64) 237 238 #define TARGET_GENERIC (TARGET_GENERIC32 || TARGET_GENERIC64) 238 239 #define TARGET_AMDFAM10 (ix86_tune == PROCESSOR_AMDFAM10) 240 #define TARGET_ATOM (ix86_tune == PROCESSOR_ATOM) 239 241 240 242 /* Feature tests against the various tunings. */ 241 243 enum ix86_tune_indices { … … 300 302 X86_TUNE_USE_VECTOR_FP_CONVERTS, 301 303 X86_TUNE_USE_VECTOR_CONVERTS, 302 304 X86_TUNE_FUSE_CMP_AND_BRANCH, 305 X86_TUNE_OPT_AGU, 303 306 304 307 X86_TUNE_LAST 305 308 }; … … 387 390 ix86_tune_features[X86_TUNE_USE_VECTOR_CONVERTS] 388 391 #define TARGET_FUSE_CMP_AND_BRANCH \ 389 392 ix86_tune_features[X86_TUNE_FUSE_CMP_AND_BRANCH] 393 #define TARGET_OPT_AGU ix86_tune_features[X86_TUNE_OPT_AGU] 390 394 391 395 /* Feature tests against the various architecture variations. */ 392 396 enum ix86_arch_indices { … … 470 474 MS_ABI = 1 471 475 }; 472 476 473 /* The default abi form used by target. */ 477 /* The abi used by target. */ 478 extern enum calling_abi ix86_abi; 479 480 /* The default abi used by target. */ 474 481 #define DEFAULT_ABI SYSV_ABI 475 482 476 483 /* Subtargets may reset this to 1 in order to enable 96-bit long double … … 569 576 TARGET_CPU_DEFAULT_prescott, 570 577 TARGET_CPU_DEFAULT_nocona, 571 578 TARGET_CPU_DEFAULT_core2, 579 TARGET_CPU_DEFAULT_atom, 572 580 573 581 TARGET_CPU_DEFAULT_geode, 574 582 TARGET_CPU_DEFAULT_k6, … … 658 666 659 667 /* Boundary (in *bits*) on which stack pointer should be aligned. */ 660 668 #define STACK_BOUNDARY \ 661 (TARGET_64BIT && DEFAULT_ABI== MS_ABI ? 128 : BITS_PER_WORD)669 (TARGET_64BIT && ix86_abi == MS_ABI ? 128 : BITS_PER_WORD) 662 670 663 671 /* Stack boundary of the main function guaranteed by OS. */ 664 672 #define MAIN_STACK_BOUNDARY (TARGET_64BIT ? 128 : 32) … … 1584 1592 int maybe_vaarg; /* true for calls to possibly vardic fncts. */ 1585 1593 int float_in_sse; /* 1 if in 32-bit mode SFmode (2 for DFmode) should 1586 1594 be passed in SSE registers. Otherwise 0. */ 1587 int call_abi;/* Set to SYSV_ABI for sysv abi. Otherwise1595 enum calling_abi call_abi; /* Set to SYSV_ABI for sysv abi. Otherwise 1588 1596 MS_ABI for ms abi. */ 1589 1597 } CUMULATIVE_ARGS; 1590 1598 … … 2230 2238 PROCESSOR_GENERIC32, 2231 2239 PROCESSOR_GENERIC64, 2232 2240 PROCESSOR_AMDFAM10, 2241 PROCESSOR_ATOM, 2233 2242 PROCESSOR_max 2234 2243 }; 2235 2244 … … 2403 2412 int tls_descriptor_call_expanded_p; 2404 2413 /* This value is used for amd64 targets and specifies the current abi 2405 2414 to be used. MS_ABI means ms abi. Otherwise SYSV_ABI means sysv abi. */ 2406 intcall_abi;2415 enum calling_abi call_abi; 2407 2416 }; 2408 2417 2409 2418 #define ix86_stack_locals (cfun->machine->stack_locals) -
gcc/config/i386/cygming.h
34 34 #endif 35 35 36 36 #undef TARGET_64BIT_MS_ABI 37 #define TARGET_64BIT_MS_ABI (!cfun ? DEFAULT_ABI== MS_ABI : TARGET_64BIT && cfun->machine->call_abi == MS_ABI)37 #define TARGET_64BIT_MS_ABI (!cfun ? ix86_abi == MS_ABI : TARGET_64BIT && cfun->machine->call_abi == MS_ABI) 38 38 39 39 #undef DEFAULT_ABI 40 40 #define DEFAULT_ABI (TARGET_64BIT ? MS_ABI : SYSV_ABI) … … 203 203 #define CHECK_STACK_LIMIT 4000 204 204 205 205 #undef STACK_BOUNDARY 206 #define STACK_BOUNDARY ( DEFAULT_ABI== MS_ABI ? 128 : BITS_PER_WORD)206 #define STACK_BOUNDARY (ix86_abi == MS_ABI ? 128 : BITS_PER_WORD) 207 207 208 208 /* By default, target has a 80387, uses IEEE compatible arithmetic, 209 209 returns float values in the 387 and needs stack probes. -
gcc/config/i386/i386.md
316 316 317 317 318 318 319 319 ;; Processor type. 320 (define_attr "cpu" "none,pentium,pentiumpro,geode,k6,athlon,k8,core2, 320 (define_attr "cpu" "none,pentium,pentiumpro,geode,k6,athlon,k8,core2,atom, 321 321 generic64,amdfam10" 322 322 (const (symbol_ref "ix86_schedule"))) -
gcc/config/i386/atom.md
@@ -612,6 +612,12 @@ (define_attr "i387_cw" "trunc,floor,ceil,mask_pm,uninitialized,any" (const_string "any")) +;; Define attribute to classify add/sub insns that consumes carry flag (CF) +(define_attr "use_carry" "0,1" (const_string "0")) + +;; Define attribute to indicate unaligned ssemov insns +(define_attr "movu" "0,1" (const_string "0")) + ;; Describe a user's asm statement. (define_asm_attributes [(set_attr "length" "128") @@ -727,6 +733,7 @@ (include "k6.md") (include "athlon.md") (include "geode.md") +(include "atom.md") ;; Operand and operator predicates and constraints @@ -5790,6 +5797,7 @@ "TARGET_64BIT && ix86_binary_operator_ok (PLUS, DImode, operands)" "adc{q}\t{%2, %0|%0, %2}" [(set_attr "type" "alu") + (set_attr "use_carry" "1") (set_attr "pent_pair" "pu") (set_attr "mode" "DI")]) @@ -5864,6 +5872,7 @@ "ix86_binary_operator_ok (PLUS, QImode, operands)" "adc{b}\t{%2, %0|%0, %2}" [(set_attr "type" "alu") + (set_attr "use_carry" "1") (set_attr "pent_pair" "pu") (set_attr "mode" "QI")]) @@ -5876,6 +5885,7 @@ "ix86_binary_operator_ok (PLUS, HImode, operands)" "adc{w}\t{%2, %0|%0, %2}" [(set_attr "type" "alu") + (set_attr "use_carry" "1") (set_attr "pent_pair" "pu") (set_attr "mode" "HI")]) @@ -5888,6 +5898,7 @@ "ix86_binary_operator_ok (PLUS, SImode, operands)" "adc{l}\t{%2, %0|%0, %2}" [(set_attr "type" "alu") + (set_attr "use_carry" "1") (set_attr "pent_pair" "pu") (set_attr "mode" "SI")]) @@ -5901,6 +5912,7 @@ "TARGET_64BIT && ix86_binary_operator_ok (PLUS, SImode, operands)" "adc{l}\t{%2, %k0|%k0, %2}" [(set_attr "type" "alu") + (set_attr "use_carry" "1") (set_attr "pent_pair" "pu") (set_attr "mode" "SI")]) @@ -6130,9 +6142,9 @@ (set_attr "mode" "SI")]) (define_insn "*adddi_1_rex64" - [(set (match_operand:DI 0 "nonimmediate_operand" "=r,rm,r") - (plus:DI (match_operand:DI 1 "nonimmediate_operand" "%0,0,r") - (match_operand:DI 2 "x86_64_general_operand" "rme,re,le"))) + [(set (match_operand:DI 0 "nonimmediate_operand" "=r,rm,r,r") + (plus:DI (match_operand:DI 1 "nonimmediate_operand" "%0,0,r,r") + (match_operand:DI 2 "x86_64_general_operand" "rme,re,0,le"))) (clobber (reg:CC FLAGS_REG))] "TARGET_64BIT && ix86_binary_operator_ok (PLUS, DImode, operands)" { @@ -6153,6 +6165,10 @@ } default: + /* Use add as much as possible to replace lea for AGU optimization. */ + if (which_alternative == 2 && TARGET_OPT_AGU) + return "add{q}\t{%1, %0|%0, %1}"; + gcc_assert (rtx_equal_p (operands[0], operands[1])); /* Make things pretty and `subl $4,%eax' rather than `addl $-4, %eax'. @@ -6171,8 +6187,11 @@ } } [(set (attr "type") - (cond [(eq_attr "alternative" "2") + (cond [(and (eq_attr "alternative" "2") + (eq (symbol_ref "TARGET_OPT_AGU") (const_int 0))) (const_string "lea") + (eq_attr "alternative" "3") + (const_string "lea") ; Current assemblers are broken and do not allow @GOTOFF in ; ought but a memory context. (match_operand:DI 2 "pic_symbolic_operand" "") @@ -6189,8 +6208,8 @@ (plus:DI (match_operand:DI 1 "register_operand" "") (match_operand:DI 2 "x86_64_nonmemory_operand" ""))) (clobber (reg:CC FLAGS_REG))] - "TARGET_64BIT && reload_completed - && true_regnum (operands[0]) != true_regnum (operands[1])" + "TARGET_64BIT && reload_completed + && ix86_lea_for_add_ok (PLUS, insn, operands)" [(set (match_dup 0) (plus:DI (match_dup 1) (match_dup 2)))] @@ -6394,9 +6413,9 @@ (define_insn "*addsi_1" - [(set (match_operand:SI 0 "nonimmediate_operand" "=r,rm,r") - (plus:SI (match_operand:SI 1 "nonimmediate_operand" "%0,0,r") - (match_operand:SI 2 "general_operand" "g,ri,li"))) + [(set (match_operand:SI 0 "nonimmediate_operand" "=r,rm,r,r") + (plus:SI (match_operand:SI 1 "nonimmediate_operand" "%0,0,r,r") + (match_operand:SI 2 "general_operand" "g,ri,0,li"))) (clobber (reg:CC FLAGS_REG))] "ix86_binary_operator_ok (PLUS, SImode, operands)" { @@ -6417,6 +6436,10 @@ } default: + /* Use add as much as possible to replace lea for AGU optimization. */ + if (which_alternative == 2 && TARGET_OPT_AGU) + return "add{l}\t{%1, %0|%0, %1}"; + gcc_assert (rtx_equal_p (operands[0], operands[1])); /* Make things pretty and `subl $4,%eax' rather than `addl $-4, %eax'. @@ -6433,7 +6456,10 @@ } } [(set (attr "type") - (cond [(eq_attr "alternative" "2") + (cond [(and (eq_attr "alternative" "2") + (eq (symbol_ref "TARGET_OPT_AGU") (const_int 0))) + (const_string "lea") + (eq_attr "alternative" "3") (const_string "lea") ; Current assemblers are broken and do not allow @GOTOFF in ; ought but a memory context. @@ -6451,8 +6477,7 @@ (plus (match_operand 1 "register_operand" "") (match_operand 2 "nonmemory_operand" ""))) (clobber (reg:CC FLAGS_REG))] - "reload_completed - && true_regnum (operands[0]) != true_regnum (operands[1])" + "reload_completed && ix86_lea_for_add_ok (PLUS, insn, operands)" [(const_int 0)] { rtx pat; @@ -7553,6 +7578,7 @@ "TARGET_64BIT && ix86_binary_operator_ok (MINUS, DImode, operands)" "sbb{q}\t{%2, %0|%0, %2}" [(set_attr "type" "alu") + (set_attr "use_carry" "1") (set_attr "pent_pair" "pu") (set_attr "mode" "DI")]) @@ -7601,6 +7627,7 @@ "ix86_binary_operator_ok (MINUS, QImode, operands)" "sbb{b}\t{%2, %0|%0, %2}" [(set_attr "type" "alu") + (set_attr "use_carry" "1") (set_attr "pent_pair" "pu") (set_attr "mode" "QI")]) @@ -7613,6 +7640,7 @@ "ix86_binary_operator_ok (MINUS, HImode, operands)" "sbb{w}\t{%2, %0|%0, %2}" [(set_attr "type" "alu") + (set_attr "use_carry" "1") (set_attr "pent_pair" "pu") (set_attr "mode" "HI")]) @@ -7625,6 +7653,7 @@ "ix86_binary_operator_ok (MINUS, SImode, operands)" "sbb{l}\t{%2, %0|%0, %2}" [(set_attr "type" "alu") + (set_attr "use_carry" "1") (set_attr "pent_pair" "pu") (set_attr "mode" "SI")]) @@ -15155,7 +15184,7 @@ ? gen_rtx_REG (XCmode, FIRST_FLOAT_REG) : NULL), operands[0], const0_rtx, GEN_INT ((TARGET_64BIT - ? (DEFAULT_ABI == SYSV_ABI + ? (ix86_abi == SYSV_ABI ? X86_64_SSE_REGPARM_MAX : X64_SSE_REGPARM_MAX) : X86_32_SSE_REGPARM_MAX) @@ -15235,6 +15264,7 @@ "reload_completed" "ret" [(set_attr "length" "1") + (set_attr "atom_unit" "jeu") (set_attr "length_immediate" "0") (set_attr "modrm" "0")]) @@ -15247,6 +15277,7 @@ "reload_completed" "rep\;ret" [(set_attr "length" "1") + (set_attr "atom_unit" "jeu") (set_attr "length_immediate" "0") (set_attr "prefix_rep" "1") (set_attr "modrm" "0")]) @@ -15257,6 +15288,7 @@ "reload_completed" "ret\t%0" [(set_attr "length" "3") + (set_attr "atom_unit" "jeu") (set_attr "length_immediate" "2") (set_attr "modrm" "0")]) @@ -15610,7 +15642,7 @@ (bswap:SI (match_operand:SI 1 "register_operand" "")))] "" { - if (!TARGET_BSWAP) + if (!(TARGET_BSWAP || TARGET_MOVBE)) { rtx x = operands[0]; @@ -15622,6 +15654,21 @@ } }) +(define_insn "*bswapsi_movbe" + [(set (match_operand:SI 0 "nonimmediate_operand" "=r,r,m") + (bswap:SI (match_operand:SI 1 "nonimmediate_operand" "0,m,r")))] + "TARGET_MOVBE && !(MEM_P (operands[0]) && MEM_P (operands[1]))" + "@ + bswap\t%0 + movbe\t{%1, %0|%0, %1} + movbe\t{%1, %0|%0, %1}" + [(set_attr "type" "*,imov,imov") + (set_attr "modrm" "*,1,1") + (set_attr "prefix_0f" "1") + (set_attr "prefix_extra" "*,1,1") + (set_attr "length" "2,*,*") + (set_attr "mode" "SI")]) + (define_insn "*bswapsi_1" [(set (match_operand:SI 0 "register_operand" "=r") (bswap:SI (match_operand:SI 1 "register_operand" "0")))] @@ -15650,7 +15697,29 @@ [(set_attr "length" "4") (set_attr "mode" "HI")]) -(define_insn "bswapdi2" +(define_expand "bswapdi2" + [(set (match_operand:DI 0 "register_operand" "") + (bswap:DI (match_operand:DI 1 "register_operand" "")))] + "TARGET_64BIT" + "") + +(define_insn "*bswapdi_movbe" + [(set (match_operand:DI 0 "nonimmediate_operand" "=r,r,m") + (bswap:DI (match_operand:DI 1 "nonimmediate_operand" "0,m,r")))] + "TARGET_64BIT && TARGET_MOVBE + && !(MEM_P (operands[0]) && MEM_P (operands[1]))" + "@ + bswap\t%0 + movbe\t{%1, %0|%0, %1} + movbe\t{%1, %0|%0, %1}" + [(set_attr "type" "*,imov,imov") + (set_attr "modrm" "*,1,1") + (set_attr "prefix_0f" "1") + (set_attr "prefix_extra" "*,1,1") + (set_attr "length" "3,*,*") + (set_attr "mode" "DI")]) + +(define_insn "*bswapdi_1" [(set (match_operand:DI 0 "register_operand" "=r") (bswap:DI (match_operand:DI 1 "register_operand" "0")))] "TARGET_64BIT" @@ -16378,6 +16447,7 @@ "TARGET_SSE_MATH" "%vrcpss\t{%1, %d0|%d0, %1}" [(set_attr "type" "sse") + (set_attr "atom_sse_attr" "rcp") (set_attr "prefix" "maybe_vex") (set_attr "mode" "SF")]) @@ -16729,6 +16799,7 @@ "TARGET_SSE_MATH" "%vrsqrtss\t{%1, %d0|%d0, %1}" [(set_attr "type" "sse") + (set_attr "atom_sse_attr" "rcp") (set_attr "prefix" "maybe_vex") (set_attr "mode" "SF")]) @@ -16749,6 +16820,7 @@ "SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH" "%vsqrts<ssemodefsuffix>\t{%1, %d0|%d0, %1}" [(set_attr "type" "sse") + (set_attr "atom_sse_attr" "sqrt") (set_attr "prefix" "maybe_vex") (set_attr "mode" "<MODE>") (set_attr "athlon_decode" "*") @@ -19802,6 +19874,7 @@ ; Since we don't have the proper number of operands for an alu insn, ; fill in all the blanks. [(set_attr "type" "alu") + (set_attr "use_carry" "1") (set_attr "pent_pair" "pu") (set_attr "memory" "none") (set_attr "imm_disp" "false") @@ -19817,6 +19890,7 @@ "" "sbb{q}\t%0, %0" [(set_attr "type" "alu") + (set_attr "use_carry" "1") (set_attr "pent_pair" "pu") (set_attr "memory" "none") (set_attr "imm_disp" "false") @@ -19860,6 +19934,7 @@ ; Since we don't have the proper number of operands for an alu insn, ; fill in all the blanks. [(set_attr "type" "alu") + (set_attr "use_carry" "1") (set_attr "pent_pair" "pu") (set_attr "memory" "none") (set_attr "imm_disp" "false") @@ -19875,6 +19950,7 @@ "" "sbb{l}\t%0, %0" [(set_attr "type" "alu") + (set_attr "use_carry" "1") (set_attr "pent_pair" "pu") (set_attr "memory" "none") (set_attr "imm_disp" "false") @@ -20207,7 +20283,8 @@ } } [(set (attr "type") - (cond [(eq_attr "alternative" "0") + (cond [(and (eq_attr "alternative" "0") + (eq (symbol_ref "TARGET_OPT_AGU") (const_int 0))) (const_string "alu") (match_operand:SI 2 "const0_operand" "") (const_string "imov") @@ -20250,7 +20327,8 @@ } } [(set (attr "type") - (cond [(eq_attr "alternative" "0") + (cond [(and (eq_attr "alternative" "0") + (eq (symbol_ref "TARGET_OPT_AGU") (const_int 0))) (const_string "alu") (match_operand:DI 2 "const0_operand" "") (const_string "imov") @@ -21734,6 +21812,7 @@ return patterns[locality]; } [(set_attr "type" "sse") + (set_attr "atom_sse_attr" "prefetch") (set_attr "memory" "none")]) (define_insn "*prefetch_sse_rex" @@ -21752,6 +21831,7 @@ return patterns[locality]; } [(set_attr "type" "sse") + (set_attr "atom_sse_attr" "prefetch") (set_attr "memory" "none")]) (define_insn "*prefetch_3dnow"
1 ;; Atom Scheduling 2 ;; Copyright (C) 2009 Free Software Foundation, Inc. 3 ;; 4 ;; This file is part of GCC. 5 ;; 6 ;; GCC is free software; you can redistribute it and/or modify 7 ;; it under the terms of the GNU General Public License as published by 8 ;; the Free Software Foundation; either version 3, or (at your option) 9 ;; any later version. 10 ;; 11 ;; GCC is distributed in the hope that it will be useful, 12 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of 13 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 ;; GNU General Public License for more details. 15 ;; 16 ;; You should have received a copy of the GNU General Public License 17 ;; along with GCC; see the file COPYING3. If not see 18 ;; <http://www.gnu.org/licenses/>. 19 ;; 20 ;; Atom is an in-order core with two integer pipelines. 21 22 23 (define_attr "atom_unit" "sishuf,simul,jeu,complex,other" 24 (const_string "other")) 25 26 (define_attr "atom_sse_attr" "rcp,movdup,lfence,fence,prefetch,sqrt,mxcsr,other" 27 (const_string "other")) 28 29 (define_automaton "atom") 30 31 ;; Atom has two ports: port 0 and port 1 connecting to all execution units 32 (define_cpu_unit "atom-port-0,atom-port-1" "atom") 33 34 ;; EU: Execution Unit 35 ;; Atom EUs are connected by port 0 or port 1. 36 37 (define_cpu_unit "atom-eu-0, atom-eu-1, 38 atom-imul-1, atom-imul-2, atom-imul-3, atom-imul-4" 39 "atom") 40 41 ;; Some EUs have duplicated copied and can be accessed via either 42 ;; port 0 or port 1 43 ;; (define_reservation "atom-port-either" "(atom-port-0 | atom-port-1)") 44 45 ;;; Some instructions is dual-pipe execution, need both ports 46 ;;; Complex multi-op macro-instructoins need both ports and all EUs 47 (define_reservation "atom-port-dual" "(atom-port-0 + atom-port-1)") 48 (define_reservation "atom-all-eu" "(atom-eu-0 + atom-eu-1 + 49 atom-imul-1 + atom-imul-2 + atom-imul-3 + 50 atom-imul-4)") 51 52 ;;; Most of simple instructions have 1 cycle latency. Some of them 53 ;;; issue in port 0, some in port 0 and some in either port. 54 (define_reservation "atom-simple-0" "(atom-port-0 + atom-eu-0)") 55 (define_reservation "atom-simple-1" "(atom-port-1 + atom-eu-1)") 56 (define_reservation "atom-simple-either" "(atom-simple-0 | atom-simple-1)") 57 58 ;;; Some insn issues in port 0 with 3 cycle latency and 1 cycle tput 59 (define_reservation "atom-eu-0-3-1" "(atom-port-0 + atom-eu-0, nothing*2)") 60 61 ;;; fmul insn can have 4 or 5 cycles latency 62 (define_reservation "atom-fmul-5c" "(atom-port-0 + atom-eu-0), nothing*4") 63 (define_reservation "atom-fmul-4c" "(atom-port-0 + atom-eu-0), nothing*3") 64 65 ;;; fadd can has 5 cycles latency depends on instruction forms 66 (define_reservation "atom-fadd-5c" "(atom-port-1 + atom-eu-1), nothing*5") 67 68 ;;; imul insn has 5 cycles latency 69 (define_reservation "atom-imul-32" 70 "atom-imul-1, atom-imul-2, atom-imul-3, atom-imul-4, 71 atom-port-0") 72 ;;; imul instruction excludes other non-FP instructions. 73 (exclusion_set "atom-eu-0, atom-eu-1" 74 "atom-imul-1, atom-imul-2, atom-imul-3, atom-imul-4") 75 76 ;;; dual-execution instructions can have 1,2,4,5 cycles latency depends on 77 ;;; instruction forms 78 (define_reservation "atom-dual-1c" "(atom-port-dual + atom-eu-0 + atom-eu-1)") 79 (define_reservation "atom-dual-2c" 80 "(atom-port-dual + atom-eu-0 + atom-eu-1, nothing)") 81 (define_reservation "atom-dual-5c" 82 "(atom-port-dual + atom-eu-0 + atom-eu-1, nothing*4)") 83 84 ;;; Complex macro-instruction has variants of latency, and uses both ports. 85 (define_reservation "atom-complex" "(atom-port-dual + atom-all-eu)") 86 87 (define_insn_reservation "atom_other" 9 88 (and (eq_attr "cpu" "atom") 89 (and (eq_attr "type" "other") 90 (eq_attr "atom_unit" "!jeu"))) 91 "atom-complex, atom-all-eu*8") 92 93 ;; return has type "other" with atom_unit "jeu" 94 (define_insn_reservation "atom_other_2" 1 95 (and (eq_attr "cpu" "atom") 96 (and (eq_attr "type" "other") 97 (eq_attr "atom_unit" "jeu"))) 98 "atom-dual-1c") 99 100 (define_insn_reservation "atom_multi" 9 101 (and (eq_attr "cpu" "atom") 102 (eq_attr "type" "multi")) 103 "atom-complex, atom-all-eu*8") 104 105 ;; Normal alu insns without carry 106 (define_insn_reservation "atom_alu" 1 107 (and (eq_attr "cpu" "atom") 108 (and (eq_attr "type" "alu") 109 (and (eq_attr "memory" "none") 110 (eq_attr "use_carry" "0")))) 111 "atom-simple-either") 112 113 ;; Normal alu insns without carry 114 (define_insn_reservation "atom_alu_mem" 1 115 (and (eq_attr "cpu" "atom") 116 (and (eq_attr "type" "alu") 117 (and (eq_attr "memory" "!none") 118 (eq_attr "use_carry" "0")))) 119 "atom-simple-either") 120 121 ;; Alu insn consuming CF, such as add/sbb 122 (define_insn_reservation "atom_alu_carry" 1 123 (and (eq_attr "cpu" "atom") 124 (and (eq_attr "type" "alu") 125 (and (eq_attr "memory" "none") 126 (eq_attr "use_carry" "1")))) 127 "atom-simple-either") 128 129 ;; Alu insn consuming CF, such as add/sbb 130 (define_insn_reservation "atom_alu_carry_mem" 1 131 (and (eq_attr "cpu" "atom") 132 (and (eq_attr "type" "alu") 133 (and (eq_attr "memory" "!none") 134 (eq_attr "use_carry" "1")))) 135 "atom-simple-either") 136 137 (define_insn_reservation "atom_alu1" 1 138 (and (eq_attr "cpu" "atom") 139 (and (eq_attr "type" "alu1") 140 (eq_attr "memory" "none"))) 141 "atom-simple-either") 142 143 (define_insn_reservation "atom_alu1_mem" 1 144 (and (eq_attr "cpu" "atom") 145 (and (eq_attr "type" "alu1") 146 (eq_attr "memory" "!none"))) 147 "atom-simple-either") 148 149 (define_insn_reservation "atom_negnot" 1 150 (and (eq_attr "cpu" "atom") 151 (and (eq_attr "type" "negnot") 152 (eq_attr "memory" "none"))) 153 "atom-simple-either") 154 155 (define_insn_reservation "atom_negnot_mem" 1 156 (and (eq_attr "cpu" "atom") 157 (and (eq_attr "type" "negnot") 158 (eq_attr "memory" "!none"))) 159 "atom-simple-either") 160 161 (define_insn_reservation "atom_imov" 1 162 (and (eq_attr "cpu" "atom") 163 (and (eq_attr "type" "imov") 164 (eq_attr "memory" "none"))) 165 "atom-simple-either") 166 167 (define_insn_reservation "atom_imov_mem" 1 168 (and (eq_attr "cpu" "atom") 169 (and (eq_attr "type" "imov") 170 (eq_attr "memory" "!none"))) 171 "atom-simple-either") 172 173 ;; 16<-16, 32<-32 174 (define_insn_reservation "atom_imovx" 1 175 (and (eq_attr "cpu" "atom") 176 (and (eq_attr "type" "imovx") 177 (and (eq_attr "memory" "none") 178 (ior (and (match_operand:HI 0 "register_operand") 179 (match_operand:HI 1 "general_operand")) 180 (and (match_operand:SI 0 "register_operand") 181 (match_operand:SI 1 "general_operand")))))) 182 "atom-simple-either") 183 184 ;; 16<-16, 32<-32, mem 185 (define_insn_reservation "atom_imovx_mem" 1 186 (and (eq_attr "cpu" "atom") 187 (and (eq_attr "type" "imovx") 188 (and (eq_attr "memory" "!none") 189 (ior (and (match_operand:HI 0 "register_operand") 190 (match_operand:HI 1 "general_operand")) 191 (and (match_operand:SI 0 "register_operand") 192 (match_operand:SI 1 "general_operand")))))) 193 "atom-simple-either") 194 195 ;; 32<-16, 32<-8, 64<-16, 64<-8, 64<-32, 8<-8 196 (define_insn_reservation "atom_imovx_2" 1 197 (and (eq_attr "cpu" "atom") 198 (and (eq_attr "type" "imovx") 199 (and (eq_attr "memory" "none") 200 (ior (match_operand:QI 0 "register_operand") 201 (ior (and (match_operand:SI 0 "register_operand") 202 (not (match_operand:SI 1 "general_operand"))) 203 (match_operand:DI 0 "register_operand")))))) 204 "atom-simple-0") 205 206 ;; 32<-16, 32<-8, 64<-16, 64<-8, 64<-32, 8<-8, mem 207 (define_insn_reservation "atom_imovx_2_mem" 1 208 (and (eq_attr "cpu" "atom") 209 (and (eq_attr "type" "imovx") 210 (and (eq_attr "memory" "!none") 211 (ior (match_operand:QI 0 "register_operand") 212 (ior (and (match_operand:SI 0 "register_operand") 213 (not (match_operand:SI 1 "general_operand"))) 214 (match_operand:DI 0 "register_operand")))))) 215 "atom-simple-0") 216 217 ;; 16<-8 218 (define_insn_reservation "atom_imovx_3" 3 219 (and (eq_attr "cpu" "atom") 220 (and (eq_attr "type" "imovx") 221 (and (match_operand:HI 0 "register_operand") 222 (match_operand:QI 1 "general_operand")))) 223 "atom-complex, atom-all-eu*2") 224 225 (define_insn_reservation "atom_lea" 1 226 (and (eq_attr "cpu" "atom") 227 (and (eq_attr "type" "lea") 228 (eq_attr "mode" "!HI"))) 229 "atom-simple-either") 230 231 ;; lea 16bit address is complex insn 232 (define_insn_reservation "atom_lea_2" 2 233 (and (eq_attr "cpu" "atom") 234 (and (eq_attr "type" "lea") 235 (eq_attr "mode" "HI"))) 236 "atom-complex, atom-all-eu") 237 238 (define_insn_reservation "atom_incdec" 1 239 (and (eq_attr "cpu" "atom") 240 (and (eq_attr "type" "incdec") 241 (eq_attr "memory" "none"))) 242 "atom-simple-either") 243 244 (define_insn_reservation "atom_incdec_mem" 1 245 (and (eq_attr "cpu" "atom") 246 (and (eq_attr "type" "incdec") 247 (eq_attr "memory" "!none"))) 248 "atom-simple-either") 249 250 ;; simple shift instruction use SHIFT eu, none memory 251 (define_insn_reservation "atom_ishift" 1 252 (and (eq_attr "cpu" "atom") 253 (and (eq_attr "type" "ishift") 254 (and (eq_attr "memory" "none") (eq_attr "prefix_0f" "0")))) 255 "atom-simple-0") 256 257 ;; simple shift instruction use SHIFT eu, memory 258 (define_insn_reservation "atom_ishift_mem" 1 259 (and (eq_attr "cpu" "atom") 260 (and (eq_attr "type" "ishift") 261 (and (eq_attr "memory" "!none") (eq_attr "prefix_0f" "0")))) 262 "atom-simple-0") 263 264 ;; DF shift (prefixed with 0f) is complex insn with latency of 7 cycles 265 (define_insn_reservation "atom_ishift_3" 7 266 (and (eq_attr "cpu" "atom") 267 (and (eq_attr "type" "ishift") 268 (eq_attr "prefix_0f" "1"))) 269 "atom-complex, atom-all-eu*6") 270 271 (define_insn_reservation "atom_ishift1" 1 272 (and (eq_attr "cpu" "atom") 273 (and (eq_attr "type" "ishift1") 274 (eq_attr "memory" "none"))) 275 "atom-simple-0") 276 277 (define_insn_reservation "atom_ishift1_mem" 1 278 (and (eq_attr "cpu" "atom") 279 (and (eq_attr "type" "ishift1") 280 (eq_attr "memory" "!none"))) 281 "atom-simple-0") 282 283 (define_insn_reservation "atom_rotate" 1 284 (and (eq_attr "cpu" "atom") 285 (and (eq_attr "type" "rotate") 286 (eq_attr "memory" "none"))) 287 "atom-simple-0") 288 289 (define_insn_reservation "atom_rotate_mem" 1 290 (and (eq_attr "cpu" "atom") 291 (and (eq_attr "type" "rotate") 292 (eq_attr "memory" "!none"))) 293 "atom-simple-0") 294 295 (define_insn_reservation "atom_rotate1" 1 296 (and (eq_attr "cpu" "atom") 297 (and (eq_attr "type" "rotate1") 298 (eq_attr "memory" "none"))) 299 "atom-simple-0") 300 301 (define_insn_reservation "atom_rotate1_mem" 1 302 (and (eq_attr "cpu" "atom") 303 (and (eq_attr "type" "rotate1") 304 (eq_attr "memory" "!none"))) 305 "atom-simple-0") 306 307 (define_insn_reservation "atom_imul" 5 308 (and (eq_attr "cpu" "atom") 309 (and (eq_attr "type" "imul") 310 (and (eq_attr "memory" "none") (eq_attr "mode" "SI")))) 311 "atom-imul-32") 312 313 (define_insn_reservation "atom_imul_mem" 5 314 (and (eq_attr "cpu" "atom") 315 (and (eq_attr "type" "imul") 316 (and (eq_attr "memory" "!none") (eq_attr "mode" "SI")))) 317 "atom-imul-32") 318 319 ;; latency set to 10 as common 64x64 imul 320 (define_insn_reservation "atom_imul_3" 10 321 (and (eq_attr "cpu" "atom") 322 (and (eq_attr "type" "imul") 323 (eq_attr "mode" "!SI"))) 324 "atom-complex, atom-all-eu*9") 325 326 (define_insn_reservation "atom_idiv" 65 327 (and (eq_attr "cpu" "atom") 328 (eq_attr "type" "idiv")) 329 "atom-complex, atom-all-eu*32, nothing*32") 330 331 (define_insn_reservation "atom_icmp" 1 332 (and (eq_attr "cpu" "atom") 333 (and (eq_attr "type" "icmp") 334 (eq_attr "memory" "none"))) 335 "atom-simple-either") 336 337 (define_insn_reservation "atom_icmp_mem" 1 338 (and (eq_attr "cpu" "atom") 339 (and (eq_attr "type" "icmp") 340 (eq_attr "memory" "!none"))) 341 "atom-simple-either") 342 343 (define_insn_reservation "atom_test" 1 344 (and (eq_attr "cpu" "atom") 345 (and (eq_attr "type" "test") 346 (eq_attr "memory" "none"))) 347 "atom-simple-either") 348 349 (define_insn_reservation "atom_test_mem" 1 350 (and (eq_attr "cpu" "atom") 351 (and (eq_attr "type" "test") 352 (eq_attr "memory" "!none"))) 353 "atom-simple-either") 354 355 (define_insn_reservation "atom_ibr" 1 356 (and (eq_attr "cpu" "atom") 357 (and (eq_attr "type" "ibr") 358 (eq_attr "memory" "!load"))) 359 "atom-simple-1") 360 361 ;; complex if jump target is from address 362 (define_insn_reservation "atom_ibr_2" 2 363 (and (eq_attr "cpu" "atom") 364 (and (eq_attr "type" "ibr") 365 (eq_attr "memory" "load"))) 366 "atom-complex, atom-all-eu") 367 368 (define_insn_reservation "atom_setcc" 1 369 (and (eq_attr "cpu" "atom") 370 (and (eq_attr "type" "setcc") 371 (eq_attr "memory" "!store"))) 372 "atom-simple-either") 373 374 ;; 2 cycles complex if target is in memory 375 (define_insn_reservation "atom_setcc_2" 2 376 (and (eq_attr "cpu" "atom") 377 (and (eq_attr "type" "setcc") 378 (eq_attr "memory" "store"))) 379 "atom-complex, atom-all-eu") 380 381 (define_insn_reservation "atom_icmov" 1 382 (and (eq_attr "cpu" "atom") 383 (and (eq_attr "type" "icmov") 384 (eq_attr "memory" "none"))) 385 "atom-simple-either") 386 387 (define_insn_reservation "atom_icmov_mem" 1 388 (and (eq_attr "cpu" "atom") 389 (and (eq_attr "type" "icmov") 390 (eq_attr "memory" "!none"))) 391 "atom-simple-either") 392 393 ;; UCODE if segreg, ignored 394 (define_insn_reservation "atom_push" 2 395 (and (eq_attr "cpu" "atom") 396 (eq_attr "type" "push")) 397 "atom-dual-2c") 398 399 ;; pop r64 is 1 cycle. UCODE if segreg, ignored 400 (define_insn_reservation "atom_pop" 1 401 (and (eq_attr "cpu" "atom") 402 (and (eq_attr "type" "pop") 403 (eq_attr "mode" "DI"))) 404 "atom-dual-1c") 405 406 ;; pop non-r64 is 2 cycles. UCODE if segreg, ignored 407 (define_insn_reservation "atom_pop_2" 2 408 (and (eq_attr "cpu" "atom") 409 (and (eq_attr "type" "pop") 410 (eq_attr "mode" "!DI"))) 411 "atom-dual-2c") 412 413 ;; UCODE if segreg, ignored 414 (define_insn_reservation "atom_call" 1 415 (and (eq_attr "cpu" "atom") 416 (eq_attr "type" "call")) 417 "atom-dual-1c") 418 419 (define_insn_reservation "atom_callv" 1 420 (and (eq_attr "cpu" "atom") 421 (eq_attr "type" "callv")) 422 "atom-dual-1c") 423 424 (define_insn_reservation "atom_leave" 3 425 (and (eq_attr "cpu" "atom") 426 (eq_attr "type" "leave")) 427 "atom-complex, atom-all-eu*2") 428 429 (define_insn_reservation "atom_str" 3 430 (and (eq_attr "cpu" "atom") 431 (eq_attr "type" "str")) 432 "atom-complex, atom-all-eu*2") 433 434 (define_insn_reservation "atom_sselog" 1 435 (and (eq_attr "cpu" "atom") 436 (and (eq_attr "type" "sselog") 437 (eq_attr "memory" "none"))) 438 "atom-simple-either") 439 440 (define_insn_reservation "atom_sselog_mem" 1 441 (and (eq_attr "cpu" "atom") 442 (and (eq_attr "type" "sselog") 443 (eq_attr "memory" "!none"))) 444 "atom-simple-either") 445 446 (define_insn_reservation "atom_sselog1" 1 447 (and (eq_attr "cpu" "atom") 448 (and (eq_attr "type" "sselog1") 449 (eq_attr "memory" "none"))) 450 "atom-simple-0") 451 452 (define_insn_reservation "atom_sselog1_mem" 1 453 (and (eq_attr "cpu" "atom") 454 (and (eq_attr "type" "sselog1") 455 (eq_attr "memory" "!none"))) 456 "atom-simple-0") 457 458 ;; not pmad, not psad 459 (define_insn_reservation "atom_sseiadd" 1 460 (and (eq_attr "cpu" "atom") 461 (and (eq_attr "type" "sseiadd") 462 (and (not (match_operand:V2DI 0 "register_operand")) 463 (and (eq_attr "atom_unit" "!simul") 464 (eq_attr "atom_unit" "!complex"))))) 465 "atom-simple-either") 466 467 ;; pmad, psad and 64 468 (define_insn_reservation "atom_sseiadd_2" 4 469 (and (eq_attr "cpu" "atom") 470 (and (eq_attr "type" "sseiadd") 471 (and (not (match_operand:V2DI 0 "register_operand")) 472 (and (eq_attr "atom_unit" "simul" ) 473 (eq_attr "mode" "DI"))))) 474 "atom-fmul-4c") 475 476 ;; pmad, psad and 128 477 (define_insn_reservation "atom_sseiadd_3" 5 478 (and (eq_attr "cpu" "atom") 479 (and (eq_attr "type" "sseiadd") 480 (and (not (match_operand:V2DI 0 "register_operand")) 481 (and (eq_attr "atom_unit" "simul" ) 482 (eq_attr "mode" "TI"))))) 483 "atom-fmul-5c") 484 485 ;; if paddq(64 bit op), phadd/phsub 486 (define_insn_reservation "atom_sseiadd_4" 6 487 (and (eq_attr "cpu" "atom") 488 (and (eq_attr "type" "sseiadd") 489 (ior (match_operand:V2DI 0 "register_operand") 490 (eq_attr "atom_unit" "complex")))) 491 "atom-complex, atom-all-eu*5") 492 493 ;; if immediate op. 494 (define_insn_reservation "atom_sseishft" 1 495 (and (eq_attr "cpu" "atom") 496 (and (eq_attr "type" "sseishft") 497 (and (eq_attr "atom_unit" "!sishuf") 498 (match_operand 2 "immediate_operand")))) 499 "atom-simple-either") 500 501 ;; if palignr or psrldq 502 (define_insn_reservation "atom_sseishft_2" 1 503 (and (eq_attr "cpu" "atom") 504 (and (eq_attr "type" "sseishft") 505 (and (eq_attr "atom_unit" "sishuf") 506 (match_operand 2 "immediate_operand")))) 507 "atom-simple-0") 508 509 ;; if reg/mem op 510 (define_insn_reservation "atom_sseishft_3" 2 511 (and (eq_attr "cpu" "atom") 512 (and (eq_attr "type" "sseishft") 513 (not (match_operand 2 "immediate_operand")))) 514 "atom-complex, atom-all-eu") 515 516 (define_insn_reservation "atom_sseimul" 1 517 (and (eq_attr "cpu" "atom") 518 (eq_attr "type" "sseimul")) 519 "atom-simple-0") 520 521 ;; rcpss or rsqrtss 522 (define_insn_reservation "atom_sse" 4 523 (and (eq_attr "cpu" "atom") 524 (and (eq_attr "type" "sse") 525 (and (eq_attr "atom_sse_attr" "rcp") (eq_attr "mode" "SF")))) 526 "atom-fmul-4c") 527 528 ;; movshdup, movsldup. Suggest to type sseishft 529 (define_insn_reservation "atom_sse_2" 1 530 (and (eq_attr "cpu" "atom") 531 (and (eq_attr "type" "sse") 532 (eq_attr "atom_sse_attr" "movdup"))) 533 "atom-simple-0") 534 535 ;; lfence 536 (define_insn_reservation "atom_sse_3" 1 537 (and (eq_attr "cpu" "atom") 538 (and (eq_attr "type" "sse") 539 (eq_attr "atom_sse_attr" "lfence"))) 540 "atom-simple-either") 541 542 ;; sfence,clflush,mfence, prefetch 543 (define_insn_reservation "atom_sse_4" 1 544 (and (eq_attr "cpu" "atom") 545 (and (eq_attr "type" "sse") 546 (ior (eq_attr "atom_sse_attr" "fence") 547 (eq_attr "atom_sse_attr" "prefetch")))) 548 "atom-simple-0") 549 550 ;; rcpps, rsqrtss, sqrt, ldmxcsr 551 (define_insn_reservation "atom_sse_5" 7 552 (and (eq_attr "cpu" "atom") 553 (and (eq_attr "type" "sse") 554 (ior (ior (eq_attr "atom_sse_attr" "sqrt") 555 (eq_attr "atom_sse_attr" "mxcsr")) 556 (and (eq_attr "atom_sse_attr" "rcp") 557 (eq_attr "mode" "V4SF"))))) 558 "atom-complex, atom-all-eu*6") 559 560 ;; xmm->xmm 561 (define_insn_reservation "atom_ssemov" 1 562 (and (eq_attr "cpu" "atom") 563 (and (eq_attr "type" "ssemov") 564 (and (match_operand 0 "register_operand" "xy") (match_operand 1 "register_operand" "xy")))) 565 "atom-simple-either") 566 567 ;; reg->xmm 568 (define_insn_reservation "atom_ssemov_2" 1 569 (and (eq_attr "cpu" "atom") 570 (and (eq_attr "type" "ssemov") 571 (and (match_operand 0 "register_operand" "xy") (match_operand 1 "register_operand" "r")))) 572 "atom-simple-0") 573 574 ;; xmm->reg 575 (define_insn_reservation "atom_ssemov_3" 3 576 (and (eq_attr "cpu" "atom") 577 (and (eq_attr "type" "ssemov") 578 (and (match_operand 0 "register_operand" "r") (match_operand 1 "register_operand" "xy")))) 579 "atom-eu-0-3-1") 580 581 ;; mov mem 582 (define_insn_reservation "atom_ssemov_4" 1 583 (and (eq_attr "cpu" "atom") 584 (and (eq_attr "type" "ssemov") 585 (and (eq_attr "movu" "0") (eq_attr "memory" "!none")))) 586 "atom-simple-0") 587 588 ;; movu mem 589 (define_insn_reservation "atom_ssemov_5" 2 590 (and (eq_attr "cpu" "atom") 591 (and (eq_attr "type" "ssemov") 592 (ior (eq_attr "movu" "1") (eq_attr "memory" "!none")))) 593 "atom-complex, atom-all-eu") 594 595 ;; no memory simple 596 (define_insn_reservation "atom_sseadd" 5 597 (and (eq_attr "cpu" "atom") 598 (and (eq_attr "type" "sseadd") 599 (and (eq_attr "memory" "none") 600 (and (eq_attr "mode" "!V2DF") 601 (eq_attr "atom_unit" "!complex"))))) 602 "atom-fadd-5c") 603 604 ;; memory simple 605 (define_insn_reservation "atom_sseadd_mem" 5 606 (and (eq_attr "cpu" "atom") 607 (and (eq_attr "type" "sseadd") 608 (and (eq_attr "memory" "!none") 609 (and (eq_attr "mode" "!V2DF") 610 (eq_attr "atom_unit" "!complex"))))) 611 "atom-dual-5c") 612 613 ;; maxps, minps, *pd, hadd, hsub 614 (define_insn_reservation "atom_sseadd_3" 8 615 (and (eq_attr "cpu" "atom") 616 (and (eq_attr "type" "sseadd") 617 (ior (eq_attr "mode" "V2DF") (eq_attr "atom_unit" "complex")))) 618 "atom-complex, atom-all-eu*7") 619 620 ;; Except dppd/dpps 621 (define_insn_reservation "atom_ssemul" 5 622 (and (eq_attr "cpu" "atom") 623 (and (eq_attr "type" "ssemul") 624 (eq_attr "mode" "!SF"))) 625 "atom-fmul-5c") 626 627 ;; Except dppd/dpps, 4 cycle if mulss 628 (define_insn_reservation "atom_ssemul_2" 4 629 (and (eq_attr "cpu" "atom") 630 (and (eq_attr "type" "ssemul") 631 (eq_attr "mode" "SF"))) 632 "atom-fmul-4c") 633 634 (define_insn_reservation "atom_ssecmp" 1 635 (and (eq_attr "cpu" "atom") 636 (eq_attr "type" "ssecmp")) 637 "atom-simple-either") 638 639 (define_insn_reservation "atom_ssecomi" 10 640 (and (eq_attr "cpu" "atom") 641 (eq_attr "type" "ssecomi")) 642 "atom-complex, atom-all-eu*9") 643 644 ;; no memory and cvtpi2ps, cvtps2pi, cvttps2pi 645 (define_insn_reservation "atom_ssecvt" 5 646 (and (eq_attr "cpu" "atom") 647 (and (eq_attr "type" "ssecvt") 648 (ior (and (match_operand:V2SI 0 "register_operand") 649 (match_operand:V4SF 1 "register_operand")) 650 (and (match_operand:V4SF 0 "register_operand") 651 (match_operand:V2SI 1 "register_operand"))))) 652 "atom-fadd-5c") 653 654 ;; memory and cvtpi2ps, cvtps2pi, cvttps2pi 655 (define_insn_reservation "atom_ssecvt_2" 5 656 (and (eq_attr "cpu" "atom") 657 (and (eq_attr "type" "ssecvt") 658 (ior (and (match_operand:V2SI 0 "register_operand") 659 (match_operand:V4SF 1 "memory_operand")) 660 (and (match_operand:V4SF 0 "register_operand") 661 (match_operand:V2SI 1 "memory_operand"))))) 662 "atom-dual-5c") 663 664 ;; otherwise. 7 cycles average for cvtss2sd 665 (define_insn_reservation "atom_ssecvt_3" 7 666 (and (eq_attr "cpu" "atom") 667 (and (eq_attr "type" "ssecvt") 668 (not (ior (and (match_operand:V2SI 0 "register_operand") 669 (match_operand:V4SF 1 "nonimmediate_operand")) 670 (and (match_operand:V4SF 0 "register_operand") 671 (match_operand:V2SI 1 "nonimmediate_operand")))))) 672 "atom-complex, atom-all-eu*6") 673 674 ;; memory and cvtsi2sd 675 (define_insn_reservation "atom_sseicvt" 5 676 (and (eq_attr "cpu" "atom") 677 (and (eq_attr "type" "sseicvt") 678 (and (match_operand:V2DF 0 "register_operand") 679 (match_operand:SI 1 "memory_operand")))) 680 "atom-dual-5c") 681 682 ;; otherwise. 8 cycles average for cvtsd2si 683 (define_insn_reservation "atom_sseicvt_2" 8 684 (and (eq_attr "cpu" "atom") 685 (and (eq_attr "type" "sseicvt") 686 (not (and (match_operand:V2DF 0 "register_operand") 687 (match_operand:SI 1 "memory_operand"))))) 688 "atom-complex, atom-all-eu*7") 689 690 (define_insn_reservation "atom_ssediv" 62 691 (and (eq_attr "cpu" "atom") 692 (eq_attr "type" "ssediv")) 693 "atom-complex, atom-all-eu*12, nothing*49") 694 695 ;; simple for fmov 696 (define_insn_reservation "atom_fmov" 1 697 (and (eq_attr "cpu" "atom") 698 (and (eq_attr "type" "fmov") 699 (eq_attr "memory" "none"))) 700 "atom-simple-either") 701 702 ;; simple for fmov 703 (define_insn_reservation "atom_fmov_mem" 1 704 (and (eq_attr "cpu" "atom") 705 (and (eq_attr "type" "fmov") 706 (eq_attr "memory" "!none"))) 707 "atom-simple-either") 708 709 ;; Define bypass here 710 711 ;; There will be no stall from lea to non-mem EX insns 712 (define_bypass 0 "atom_lea" 713 "atom_alu_carry, 714 atom_alu,atom_alu1,atom_negnot,atom_imov,atom_imovx, 715 atom_incdec, atom_setcc, atom_icmov, atom_pop") 716 717 (define_bypass 0 "atom_lea" 718 "atom_alu_mem, atom_alu_carry_mem, atom_alu1_mem, 719 atom_imovx_mem, atom_imovx_2_mem, 720 atom_imov_mem, atom_icmov_mem, atom_fmov_mem" 721 "!ix86_agi_dependent") 722 723 ;; There will be 3 cycles stall from EX insns to AGAN insns LEA 724 (define_bypass 4 "atom_alu_carry, 725 atom_alu,atom_alu1,atom_negnot,atom_imov,atom_imovx, 726 atom_incdec,atom_ishift,atom_ishift1,atom_rotate, 727 atom_rotate1, atom_setcc, atom_icmov, atom_pop, 728 atom_alu_mem, atom_alu_carry_mem, atom_alu1_mem, 729 atom_imovx_mem, atom_imovx_2_mem, 730 atom_imov_mem, atom_icmov_mem, atom_fmov_mem" 731 "atom_lea") 732 733 ;; There will be 3 cycles stall from EX insns to insns need addr calculation 734 (define_bypass 4 "atom_alu_carry, 735 atom_alu,atom_alu1,atom_negnot,atom_imov,atom_imovx, 736 atom_incdec,atom_ishift,atom_ishift1,atom_rotate, 737 atom_rotate1, atom_setcc, atom_icmov, atom_pop, 738 atom_imovx_mem, atom_imovx_2_mem, 739 atom_alu_mem, atom_alu_carry_mem, atom_alu1_mem, 740 atom_imov_mem, atom_icmov_mem, atom_fmov_mem" 741 "atom_alu_mem, atom_alu_carry_mem, atom_alu1_mem, 742 atom_negnot_mem, atom_imov_mem, atom_incdec_mem, 743 atom_imovx_mem, atom_imovx_2_mem, 744 atom_imul_mem, atom_icmp_mem, 745 atom_test_mem, atom_icmov_mem, atom_sselog_mem, 746 atom_sselog1_mem, atom_fmov_mem, atom_sseadd_mem, 747 atom_ishift_mem, atom_ishift1_mem, 748 atom_rotate_mem, atom_rotate1_mem" 749 "ix86_agi_dependent") 750 751 ;; Stall from imul to lea is 8 cycles. 752 (define_bypass 9 "atom_imul, atom_imul_mem" "atom_lea") 753 754 ;; Stall from imul to memory address is 8 cycles. 755 (define_bypass 9 "atom_imul, atom_imul_mem" 756 "atom_alu_mem, atom_alu_carry_mem, atom_alu1_mem, 757 atom_negnot_mem, atom_imov_mem, atom_incdec_mem, 758 atom_ishift_mem, atom_ishift1_mem, atom_rotate_mem, 759 atom_rotate1_mem, atom_imul_mem, atom_icmp_mem, 760 atom_test_mem, atom_icmov_mem, atom_sselog_mem, 761 atom_sselog1_mem, atom_fmov_mem, atom_sseadd_mem" 762 "ix86_agi_dependent") 763 764 ;; There will be 0 cycle stall from cmp/test to jcc 765 766 ;; There will be 1 cycle stall from flag producer to cmov and adc/sbb 767 (define_bypass 2 "atom_icmp, atom_test, atom_alu, atom_alu_carry, 768 atom_alu1, atom_negnot, atom_incdec, atom_ishift, 769 atom_ishift1, atom_rotate, atom_rotate1" 770 "atom_icmov, atom_alu_carry") 771 772 ;; lea to shift count stall is 2 cycles 773 (define_bypass 3 "atom_lea" 774 "atom_ishift, atom_ishift1, atom_rotate, atom_rotate1, 775 atom_ishift_mem, atom_ishift1_mem, 776 atom_rotate_mem, atom_rotate1_mem" 777 "ix86_dep_by_shift_count") 778 779 ;; lea to shift source stall is 1 cycle 780 (define_bypass 2 "atom_lea" 781 "atom_ishift, atom_ishift1, atom_rotate, atom_rotate1" 782 "!ix86_dep_by_shift_count") 783 784 ;; non-lea to shift count stall is 1 cycle 785 (define_bypass 2 "atom_alu_carry, 786 atom_alu,atom_alu1,atom_negnot,atom_imov,atom_imovx, 787 atom_incdec,atom_ishift,atom_ishift1,atom_rotate, 788 atom_rotate1, atom_setcc, atom_icmov, atom_pop, 789 atom_alu_mem, atom_alu_carry_mem, atom_alu1_mem, 790 atom_imovx_mem, atom_imovx_2_mem, 791 atom_imov_mem, atom_icmov_mem, atom_fmov_mem" 792 "atom_ishift, atom_ishift1, atom_rotate, atom_rotate1, 793 atom_ishift_mem, atom_ishift1_mem, 794 atom_rotate_mem, atom_rotate1_mem" 795 "ix86_dep_by_shift_count") -
gcc/config/i386/cpuid.h
29 29 #define bit_CMPXCHG16B (1 << 13) 30 30 #define bit_SSE4_1 (1 << 19) 31 31 #define bit_SSE4_2 (1 << 20) 32 #define bit_MOVBE (1 << 22) 32 33 #define bit_POPCNT (1 << 23) 33 34 #define bit_AES (1 << 25) 34 35 #define bit_XSAVE (1 << 26) -
gcc/config/i386/sse.md
338 338 && !(MEM_P (operands[0]) && MEM_P (operands[1]))" 339 339 "vmovup<avxmodesuffixf2c>\t{%1, %0|%0, %1}" 340 340 [(set_attr "type" "ssemov") 341 (set_attr "movu" "1") 341 342 (set_attr "prefix" "vex") 342 343 (set_attr "mode" "<MODE>")]) 343 344 … … 363 364 && !(MEM_P (operands[0]) && MEM_P (operands[1]))" 364 365 "movup<ssemodesuffixf2c>\t{%1, %0|%0, %1}" 365 366 [(set_attr "type" "ssemov") 367 (set_attr "movu" "1") 366 368 (set_attr "mode" "<MODE>")]) 367 369 368 370 (define_insn "avx_movdqu<avxmodesuffix>" … … 373 375 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))" 374 376 "vmovdqu\t{%1, %0|%0, %1}" 375 377 [(set_attr "type" "ssemov") 378 (set_attr "movu" "1") 376 379 (set_attr "prefix" "vex") 377 380 (set_attr "mode" "<avxvecmode>")]) 378 381 … … 383 386 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))" 384 387 "movdqu\t{%1, %0|%0, %1}" 385 388 [(set_attr "type" "ssemov") 389 (set_attr "movu" "1") 386 390 (set_attr "prefix_data16" "1") 387 391 (set_attr "mode" "TI")]) 388 392 … … 424 428 UNSPEC_MOVNT))] 425 429 "TARGET_SSE2" 426 430 "movntdq\t{%1, %0|%0, %1}" 427 [(set_attr "type" "sse cvt")431 [(set_attr "type" "ssemov") 428 432 (set_attr "prefix_data16" "1") 429 433 (set_attr "mode" "TI")]) 430 434 … … 434 438 UNSPEC_MOVNT))] 435 439 "TARGET_SSE2" 436 440 "movnti\t{%1, %0|%0, %1}" 437 [(set_attr "type" "sse cvt")441 [(set_attr "type" "ssemov") 438 442 (set_attr "mode" "V2DF")]) 439 443 440 444 (define_insn "avx_lddqu<avxmodesuffix>" … … 445 449 "TARGET_AVX" 446 450 "vlddqu\t{%1, %0|%0, %1}" 447 451 [(set_attr "type" "ssecvt") 452 (set_attr "movu" "1") 448 453 (set_attr "prefix" "vex") 449 454 (set_attr "mode" "<avxvecmode>")]) 450 455 … … 454 459 UNSPEC_LDDQU))] 455 460 "TARGET_SSE3" 456 461 "lddqu\t{%1, %0|%0, %1}" 457 [(set_attr "type" "ssecvt") 462 [(set_attr "type" "ssemov") 463 (set_attr "movu" "1") 458 464 (set_attr "prefix_rep" "1") 459 465 (set_attr "mode" "TI")]) 460 466 … … 761 767 "TARGET_SSE" 762 768 "%vrcpps\t{%1, %0|%0, %1}" 763 769 [(set_attr "type" "sse") 770 (set_attr "atom_sse_attr" "rcp") 764 771 (set_attr "prefix" "maybe_vex") 765 772 (set_attr "mode" "V4SF")]) 766 773 … … 787 794 "TARGET_SSE" 788 795 "rcpss\t{%1, %0|%0, %1}" 789 796 [(set_attr "type" "sse") 797 (set_attr "atom_sse_attr" "rcp") 790 798 (set_attr "mode" "SF")]) 791 799 792 800 (define_expand "sqrtv8sf2" … … 832 840 "TARGET_SSE" 833 841 "%vsqrtps\t{%1, %0|%0, %1}" 834 842 [(set_attr "type" "sse") 843 (set_attr "atom_sse_attr" "sqrt") 835 844 (set_attr "prefix" "maybe_vex") 836 845 (set_attr "mode" "V4SF")]) 837 846 … … 876 885 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)" 877 886 "sqrts<ssemodesuffixf2c>\t{%1, %0|%0, %1}" 878 887 [(set_attr "type" "sse") 888 (set_attr "atom_sse_attr" "sqrt") 879 889 (set_attr "mode" "<ssescalarmode>")]) 880 890 881 891 (define_expand "rsqrtv8sf2" … … 1039 1049 (const_int 1)))] 1040 1050 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)" 1041 1051 "<maxminfprefix>s<ssemodesuffixf2c>\t{%2, %0|%0, %2}" 1042 [(set_attr "type" "sse ")1052 [(set_attr "type" "sseadd") 1043 1053 (set_attr "mode" "<ssescalarmode>")]) 1044 1054 1045 1055 ;; These versions of the min/max patterns implement exactly the operations … … 1175 1185 "TARGET_SSE3" 1176 1186 "addsubpd\t{%2, %0|%0, %2}" 1177 1187 [(set_attr "type" "sseadd") 1188 (set_attr "atom_unit" "complex") 1178 1189 (set_attr "mode" "V2DF")]) 1179 1190 1180 1191 (define_insn "avx_h<plusminus_insn>v4df3" … … 1298 1309 "TARGET_SSE3" 1299 1310 "h<plusminus_mnemonic>ps\t{%2, %0|%0, %2}" 1300 1311 [(set_attr "type" "sseadd") 1312 (set_attr "atom_unit" "complex") 1301 1313 (set_attr "prefix_rep" "1") 1302 1314 (set_attr "mode" "V4SF")]) 1303 1315 … … 5066 5078 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)" 5067 5079 "pmaddwd\t{%2, %0|%0, %2}" 5068 5080 [(set_attr "type" "sseiadd") 5081 (set_attr "atom_unit" "simul") 5069 5082 (set_attr "prefix_data16" "1") 5070 5083 (set_attr "mode" "TI")]) 5071 5084 … … 7025 7038 movq\t{%H1, %0|%0, %H1} 7026 7039 mov{q}\t{%H1, %0|%0, %H1}" 7027 7040 [(set_attr "type" "ssemov,sseishft,ssemov,imov") 7041 (set_attr "atom_unit" "*,sishuf,*,*") 7028 7042 (set_attr "memory" "*,none,*,*") 7029 7043 (set_attr "mode" "V2SF,TI,TI,DI")]) 7030 7044 … … 7057 7071 psrldq\t{$8, %0|%0, 8} 7058 7072 movq\t{%H1, %0|%0, %H1}" 7059 7073 [(set_attr "type" "ssemov,sseishft,ssemov") 7074 (set_attr "atom_unit" "*,sishuf,*") 7060 7075 (set_attr "memory" "*,none,*") 7061 7076 (set_attr "mode" "V2SF,TI,TI")]) 7062 7077 … … 7614 7629 "TARGET_SSE2" 7615 7630 "psadbw\t{%2, %0|%0, %2}" 7616 7631 [(set_attr "type" "sseiadd") 7632 (set_attr "atom_unit" "simul") 7617 7633 (set_attr "prefix_data16" "1") 7618 7634 (set_attr "mode" "TI")]) 7619 7635 … … 7635 7651 UNSPEC_MOVMSK))] 7636 7652 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)" 7637 7653 "%vmovmskp<ssemodesuffixf2c>\t{%1, %0|%0, %1}" 7638 [(set_attr "type" "sse cvt")7654 [(set_attr "type" "ssemov") 7639 7655 (set_attr "prefix" "maybe_vex") 7640 7656 (set_attr "mode" "<MODE>")]) 7641 7657 … … 7645 7661 UNSPEC_MOVMSK))] 7646 7662 "TARGET_SSE2" 7647 7663 "%vpmovmskb\t{%1, %0|%0, %1}" 7648 [(set_attr "type" "sse cvt")7664 [(set_attr "type" "ssemov") 7649 7665 (set_attr "prefix_data16" "1") 7650 7666 (set_attr "prefix" "maybe_vex") 7651 7667 (set_attr "mode" "SI")]) … … 7668 7684 "TARGET_SSE2 && !TARGET_64BIT" 7669 7685 ;; @@@ check ordering of operands in intel/nonintel syntax 7670 7686 "%vmaskmovdqu\t{%2, %1|%1, %2}" 7671 [(set_attr "type" "sse cvt")7687 [(set_attr "type" "ssemov") 7672 7688 (set_attr "prefix_data16" "1") 7673 7689 (set_attr "prefix" "maybe_vex") 7674 7690 (set_attr "mode" "TI")]) … … 7682 7698 "TARGET_SSE2 && TARGET_64BIT" 7683 7699 ;; @@@ check ordering of operands in intel/nonintel syntax 7684 7700 "%vmaskmovdqu\t{%2, %1|%1, %2}" 7685 [(set_attr "type" "sse cvt")7701 [(set_attr "type" "ssemov") 7686 7702 (set_attr "prefix_data16" "1") 7687 7703 (set_attr "prefix" "maybe_vex") 7688 7704 (set_attr "mode" "TI")]) … … 7693 7709 "TARGET_SSE" 7694 7710 "%vldmxcsr\t%0" 7695 7711 [(set_attr "type" "sse") 7712 (set_attr "atom_sse_attr" "mxcsr") 7696 7713 (set_attr "prefix" "maybe_vex") 7697 7714 (set_attr "memory" "load")]) 7698 7715 … … 7702 7719 "TARGET_SSE" 7703 7720 "%vstmxcsr\t%0" 7704 7721 [(set_attr "type" "sse") 7722 (set_attr "atom_sse_attr" "mxcsr") 7705 7723 (set_attr "prefix" "maybe_vex") 7706 7724 (set_attr "memory" "store")]) 7707 7725 … … 7720 7738 "TARGET_SSE || TARGET_3DNOW_A" 7721 7739 "sfence" 7722 7740 [(set_attr "type" "sse") 7741 (set_attr "atom_sse_attr" "fence") 7723 7742 (set_attr "memory" "unknown")]) 7724 7743 7725 7744 (define_insn "sse2_clflush" … … 7728 7747 "TARGET_SSE2" 7729 7748 "clflush\t%a0" 7730 7749 [(set_attr "type" "sse") 7750 (set_attr "atom_sse_attr" "fence") 7731 7751 (set_attr "memory" "unknown")]) 7732 7752 7733 7753 (define_expand "sse2_mfence" … … 7745 7765 "TARGET_64BIT || TARGET_SSE2" 7746 7766 "mfence" 7747 7767 [(set_attr "type" "sse") 7768 (set_attr "atom_sse_attr" "fence") 7748 7769 (set_attr "memory" "unknown")]) 7749 7770 7750 7771 (define_expand "sse2_lfence" … … 7762 7783 "TARGET_SSE2" 7763 7784 "lfence" 7764 7785 [(set_attr "type" "sse") 7786 (set_attr "atom_sse_attr" "lfence") 7765 7787 (set_attr "memory" "unknown")]) 7766 7788 7767 7789 (define_insn "sse3_mwait" … … 7885 7907 "TARGET_SSSE3" 7886 7908 "phaddw\t{%2, %0|%0, %2}" 7887 7909 [(set_attr "type" "sseiadd") 7910 (set_attr "atom_unit" "complex") 7888 7911 (set_attr "prefix_data16" "1") 7889 7912 (set_attr "prefix_extra" "1") 7890 7913 (set_attr "mode" "TI")]) … … 7913 7936 "TARGET_SSSE3" 7914 7937 "phaddw\t{%2, %0|%0, %2}" 7915 7938 [(set_attr "type" "sseiadd") 7939 (set_attr "atom_unit" "complex") 7916 7940 (set_attr "prefix_extra" "1") 7917 7941 (set_attr "mode" "DI")]) 7918 7942 … … 7967 7991 "TARGET_SSSE3" 7968 7992 "phaddd\t{%2, %0|%0, %2}" 7969 7993 [(set_attr "type" "sseiadd") 7994 (set_attr "atom_unit" "complex") 7970 7995 (set_attr "prefix_data16" "1") 7971 7996 (set_attr "prefix_extra" "1") 7972 7997 (set_attr "mode" "TI")]) … … 7987 8012 "TARGET_SSSE3" 7988 8013 "phaddd\t{%2, %0|%0, %2}" 7989 8014 [(set_attr "type" "sseiadd") 8015 (set_attr "atom_unit" "complex") 7990 8016 (set_attr "prefix_extra" "1") 7991 8017 (set_attr "mode" "DI")]) 7992 8018 … … 8073 8099 "TARGET_SSSE3" 8074 8100 "phaddsw\t{%2, %0|%0, %2}" 8075 8101 [(set_attr "type" "sseiadd") 8102 (set_attr "atom_unit" "complex") 8076 8103 (set_attr "prefix_data16" "1") 8077 8104 (set_attr "prefix_extra" "1") 8078 8105 (set_attr "mode" "TI")]) … … 8101 8128 "TARGET_SSSE3" 8102 8129 "phaddsw\t{%2, %0|%0, %2}" 8103 8130 [(set_attr "type" "sseiadd") 8131 (set_attr "atom_unit" "complex") 8104 8132 (set_attr "prefix_extra" "1") 8105 8133 (set_attr "mode" "DI")]) 8106 8134 … … 8187 8215 "TARGET_SSSE3" 8188 8216 "phsubw\t{%2, %0|%0, %2}" 8189 8217 [(set_attr "type" "sseiadd") 8218 (set_attr "atom_unit" "complex") 8190 8219 (set_attr "prefix_data16" "1") 8191 8220 (set_attr "prefix_extra" "1") 8192 8221 (set_attr "mode" "TI")]) … … 8215 8244 "TARGET_SSSE3" 8216 8245 "phsubw\t{%2, %0|%0, %2}" 8217 8246 [(set_attr "type" "sseiadd") 8247 (set_attr "atom_unit" "complex") 8218 8248 (set_attr "prefix_extra" "1") 8219 8249 (set_attr "mode" "DI")]) 8220 8250 … … 8269 8299 "TARGET_SSSE3" 8270 8300 "phsubd\t{%2, %0|%0, %2}" 8271 8301 [(set_attr "type" "sseiadd") 8302 (set_attr "atom_unit" "complex") 8272 8303 (set_attr "prefix_data16" "1") 8273 8304 (set_attr "prefix_extra" "1") 8274 8305 (set_attr "mode" "TI")]) … … 8289 8320 "TARGET_SSSE3" 8290 8321 "phsubd\t{%2, %0|%0, %2}" 8291 8322 [(set_attr "type" "sseiadd") 8323 (set_attr "atom_unit" "complex") 8292 8324 (set_attr "prefix_extra" "1") 8293 8325 (set_attr "mode" "DI")]) 8294 8326 … … 8375 8407 "TARGET_SSSE3" 8376 8408 "phsubsw\t{%2, %0|%0, %2}" 8377 8409 [(set_attr "type" "sseiadd") 8410 (set_attr "atom_unit" "complex") 8378 8411 (set_attr "prefix_data16" "1") 8379 8412 (set_attr "prefix_extra" "1") 8380 8413 (set_attr "mode" "TI")]) … … 8403 8436 "TARGET_SSSE3" 8404 8437 "phsubsw\t{%2, %0|%0, %2}" 8405 8438 [(set_attr "type" "sseiadd") 8439 (set_attr "atom_unit" "complex") 8406 8440 (set_attr "prefix_extra" "1") 8407 8441 (set_attr "mode" "DI")]) 8408 8442 … … 8509 8543 "TARGET_SSSE3" 8510 8544 "pmaddubsw\t{%2, %0|%0, %2}" 8511 8545 [(set_attr "type" "sseiadd") 8546 (set_attr "atom_unit" "simul") 8512 8547 (set_attr "prefix_data16" "1") 8513 8548 (set_attr "prefix_extra" "1") 8514 8549 (set_attr "mode" "TI")]) … … 8547 8582 "TARGET_SSSE3" 8548 8583 "pmaddubsw\t{%2, %0|%0, %2}" 8549 8584 [(set_attr "type" "sseiadd") 8585 (set_attr "atom_unit" "simul") 8550 8586 (set_attr "prefix_extra" "1") 8551 8587 (set_attr "mode" "DI")]) 8552 8588 … … 8754 8790 return "palignr\t{%3, %2, %0|%0, %2, %3}"; 8755 8791 } 8756 8792 [(set_attr "type" "sseishft") 8793 (set_attr "atom_unit" "sishuf") 8757 8794 (set_attr "prefix_data16" "1") 8758 8795 (set_attr "prefix_extra" "1") 8759 8796 (set_attr "mode" "TI")]) … … 8770 8807 return "palignr\t{%3, %2, %0|%0, %2, %3}"; 8771 8808 } 8772 8809 [(set_attr "type" "sseishft") 8810 (set_attr "atom_unit" "sishuf") 8773 8811 (set_attr "prefix_extra" "1") 8774 8812 (set_attr "mode" "DI")]) 8775 8813 … … 8956 8994 UNSPEC_MOVNTDQA))] 8957 8995 "TARGET_SSE4_1" 8958 8996 "%vmovntdqa\t{%1, %0|%0, %1}" 8959 [(set_attr "type" "sse cvt")8997 [(set_attr "type" "ssemov") 8960 8998 (set_attr "prefix_extra" "1") 8961 8999 (set_attr "prefix" "maybe_vex") 8962 9000 (set_attr "mode" "TI")]) -
gcc/config/i386/i386.opt
228 228 Target RejectNegative Joined Var(ix86_tune_string) 229 229 Schedule code for given CPU 230 230 231 mabi= 232 Target RejectNegative Joined Var(ix86_abi_string) 233 Generate code that conforms to the given ABI 234 231 235 mveclibabi= 232 236 Target RejectNegative Joined Var(ix86_veclibabi_string) 233 237 Vector library ABI to use … … 335 339 Target Report Mask(ISA_SAHF) Var(ix86_isa_flags) VarExists Save 336 340 Support code generation of sahf instruction in 64bit x86-64 code. 337 341 342 mmovbe 343 Target Report Mask(ISA_MOVBE) Var(ix86_isa_flags) VarExists Save 344 Support code generation of movbe instruction. 345 338 346 maes 339 347 Target Report Mask(ISA_AES) Var(ix86_isa_flags) VarExists Save 340 348 Support AES built-in functions and code generation -
gcc/config/i386/i386-c.c
119 119 def_or_undef (parse_in, "__core2"); 120 120 def_or_undef (parse_in, "__core2__"); 121 121 break; 122 case PROCESSOR_ATOM: 123 def_or_undef (parse_in, "__atom"); 124 def_or_undef (parse_in, "__atom__"); 125 break; 122 126 /* use PROCESSOR_max to not set/unset the arch macro. */ 123 127 case PROCESSOR_max: 124 128 break; … … 187 191 case PROCESSOR_CORE2: 188 192 def_or_undef (parse_in, "__tune_core2__"); 189 193 break; 194 case PROCESSOR_ATOM: 195 def_or_undef (parse_in, "__tune_atom__"); 196 break; 190 197 case PROCESSOR_GENERIC32: 191 198 case PROCESSOR_GENERIC64: 192 199 break; -
gcc/config/i386/mingw32.h
38 38 builtin_define_std ("WINNT"); \ 39 39 builtin_define_with_int_value ("_INTEGRAL_MAX_BITS", \ 40 40 TYPE_PRECISION (intmax_type_node));\ 41 if (TARGET_64BIT && DEFAULT_ABI== MS_ABI) \41 if (TARGET_64BIT && ix86_abi == MS_ABI) \ 42 42 { \ 43 43 builtin_define ("__MINGW64__"); \ 44 44 builtin_define_std ("WIN64"); \ -
gcc/config/i386/i386-protos.h
86 86 extern void ix86_expand_binary_operator (enum rtx_code, 87 87 enum machine_mode, rtx[]); 88 88 extern int ix86_binary_operator_ok (enum rtx_code, enum machine_mode, rtx[]); 89 extern bool ix86_lea_for_add_ok (enum rtx_code, rtx, rtx[]); 90 extern bool ix86_dep_by_shift_count (const_rtx set_insn, const_rtx use_insn); 91 extern bool ix86_agi_dependent (rtx set_insn, rtx use_insn); 89 92 extern void ix86_expand_unary_operator (enum rtx_code, enum machine_mode, 90 93 rtx[]); 91 94 extern rtx ix86_build_const_vector (enum machine_mode, bool, rtx); … … 140 143 extern bool ix86_sol10_return_in_memory (const_tree,const_tree); 141 144 extern rtx ix86_force_to_memory (enum machine_mode, rtx); 142 145 extern void ix86_free_from_memory (enum machine_mode); 143 extern int ix86_cfun_abi (void); 144 extern int ix86_function_abi (const_tree); 145 extern int ix86_function_type_abi (const_tree); 146 extern enum calling_abi ix86_cfun_abi (void); 147 extern enum calling_abi ix86_function_type_abi (const_tree); 146 148 extern void ix86_call_abi_override (const_tree); 147 149 extern tree ix86_fn_abi_va_list (tree); 148 150 extern tree ix86_canonical_va_list_type (tree); -
gcc/config/i386/driver-i386.c
378 378 /* Extended features */ 379 379 unsigned int has_lahf_lm = 0, has_sse4a = 0; 380 380 unsigned int has_longmode = 0, has_3dnowp = 0, has_3dnow = 0; 381 unsigned int has_ sse4_1 = 0, has_sse4_2 = 0;381 unsigned int has_movbe = 0, has_sse4_1 = 0, has_sse4_2 = 0; 382 382 unsigned int has_popcnt = 0, has_aes = 0, has_avx = 0; 383 383 unsigned int has_pclmul = 0; 384 384 … … 398 398 399 399 __cpuid (1, eax, ebx, ecx, edx); 400 400 401 /* We don't care for extended family. */402 401 model = (eax >> 4) & 0x0f; 403 402 family = (eax >> 8) & 0x0f; 403 if (vendor == SIG_INTEL) 404 { 405 unsigned int extended_model, extended_family; 404 406 407 extended_model = (eax >> 12) & 0xf0; 408 extended_family = (eax >> 20) & 0xff; 409 if (family == 0x0f) 410 { 411 family += extended_family; 412 model += extended_model; 413 } 414 else if (family == 0x06) 415 model += extended_model; 416 } 417 405 418 has_sse3 = ecx & bit_SSE3; 406 419 has_ssse3 = ecx & bit_SSSE3; 407 420 has_sse4_1 = ecx & bit_SSE4_1; 408 421 has_sse4_2 = ecx & bit_SSE4_2; 409 422 has_avx = ecx & bit_AVX; 410 423 has_cmpxchg16b = ecx & bit_CMPXCHG16B; 424 has_movbe = ecx & bit_MOVBE; 411 425 has_popcnt = ecx & bit_POPCNT; 412 426 has_aes = ecx & bit_AES; 413 427 has_pclmul = ecx & bit_PCLMUL; … … 505 519 break; 506 520 case PROCESSOR_PENTIUMPRO: 507 521 if (has_longmode) 508 /* It is Core 2 Duo. */509 cpu = "core2";522 /* It is Core 2 or Atom. */ 523 cpu = (model == 28) ? "atom" : "core2"; 510 524 else if (arch) 511 525 { 512 526 if (has_sse3) … … 597 611 options = concat (options, "-mcx16 ", NULL); 598 612 if (has_lahf_lm) 599 613 options = concat (options, "-msahf ", NULL); 614 if (has_movbe) 615 options = concat (options, "-mmovbe ", NULL); 600 616 if (has_aes) 601 617 options = concat (options, "-maes ", NULL); 602 618 if (has_pclmul) -
gcc/config/i386/i386.c
1036 1036 1, /* cond_not_taken_branch_cost. */ 1037 1037 }; 1038 1038 1039 static const 1040 struct processor_costs atom_cost = { 1041 COSTS_N_INSNS (1), /* cost of an add instruction */ 1042 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */ 1043 COSTS_N_INSNS (1), /* variable shift costs */ 1044 COSTS_N_INSNS (1), /* constant shift costs */ 1045 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */ 1046 COSTS_N_INSNS (4), /* HI */ 1047 COSTS_N_INSNS (3), /* SI */ 1048 COSTS_N_INSNS (4), /* DI */ 1049 COSTS_N_INSNS (2)}, /* other */ 1050 0, /* cost of multiply per each bit set */ 1051 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */ 1052 COSTS_N_INSNS (26), /* HI */ 1053 COSTS_N_INSNS (42), /* SI */ 1054 COSTS_N_INSNS (74), /* DI */ 1055 COSTS_N_INSNS (74)}, /* other */ 1056 COSTS_N_INSNS (1), /* cost of movsx */ 1057 COSTS_N_INSNS (1), /* cost of movzx */ 1058 8, /* "large" insn */ 1059 17, /* MOVE_RATIO */ 1060 2, /* cost for loading QImode using movzbl */ 1061 {4, 4, 4}, /* cost of loading integer registers 1062 in QImode, HImode and SImode. 1063 Relative to reg-reg move (2). */ 1064 {4, 4, 4}, /* cost of storing integer registers */ 1065 4, /* cost of reg,reg fld/fst */ 1066 {12, 12, 12}, /* cost of loading fp registers 1067 in SFmode, DFmode and XFmode */ 1068 {6, 6, 8}, /* cost of storing fp registers 1069 in SFmode, DFmode and XFmode */ 1070 2, /* cost of moving MMX register */ 1071 {8, 8}, /* cost of loading MMX registers 1072 in SImode and DImode */ 1073 {8, 8}, /* cost of storing MMX registers 1074 in SImode and DImode */ 1075 2, /* cost of moving SSE register */ 1076 {8, 8, 8}, /* cost of loading SSE registers 1077 in SImode, DImode and TImode */ 1078 {8, 8, 8}, /* cost of storing SSE registers 1079 in SImode, DImode and TImode */ 1080 5, /* MMX or SSE register to integer */ 1081 32, /* size of l1 cache. */ 1082 256, /* size of l2 cache. */ 1083 64, /* size of prefetch block */ 1084 6, /* number of parallel prefetches */ 1085 3, /* Branch cost */ 1086 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */ 1087 COSTS_N_INSNS (8), /* cost of FMUL instruction. */ 1088 COSTS_N_INSNS (20), /* cost of FDIV instruction. */ 1089 COSTS_N_INSNS (8), /* cost of FABS instruction. */ 1090 COSTS_N_INSNS (8), /* cost of FCHS instruction. */ 1091 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */ 1092 {{libcall, {{11, loop}, {-1, rep_prefix_4_byte}}}, 1093 {libcall, {{32, loop}, {64, rep_prefix_4_byte}, 1094 {8192, rep_prefix_8_byte}, {-1, libcall}}}}, 1095 {{libcall, {{8, loop}, {15, unrolled_loop}, 1096 {2048, rep_prefix_4_byte}, {-1, libcall}}}, 1097 {libcall, {{24, loop}, {32, unrolled_loop}, 1098 {8192, rep_prefix_8_byte}, {-1, libcall}}}}, 1099 1, /* scalar_stmt_cost. */ 1100 1, /* scalar load_cost. */ 1101 1, /* scalar_store_cost. */ 1102 1, /* vec_stmt_cost. */ 1103 1, /* vec_to_scalar_cost. */ 1104 1, /* scalar_to_vec_cost. */ 1105 1, /* vec_align_load_cost. */ 1106 2, /* vec_unalign_load_cost. */ 1107 1, /* vec_store_cost. */ 1108 3, /* cond_taken_branch_cost. */ 1109 1, /* cond_not_taken_branch_cost. */ 1110 }; 1111 1039 1112 /* Generic64 should produce code tuned for Nocona and K8. */ 1040 1113 static const 1041 1114 struct processor_costs generic64_cost = { … … 1194 1267 #define m_PENT4 (1<<PROCESSOR_PENTIUM4) 1195 1268 #define m_NOCONA (1<<PROCESSOR_NOCONA) 1196 1269 #define m_CORE2 (1<<PROCESSOR_CORE2) 1270 #define m_ATOM (1<<PROCESSOR_ATOM) 1197 1271 1198 1272 #define m_GEODE (1<<PROCESSOR_GEODE) 1199 1273 #define m_K6 (1<<PROCESSOR_K6) … … 1231 1305 m_486 | m_PENT, 1232 1306 1233 1307 /* X86_TUNE_UNROLL_STRLEN */ 1234 m_486 | m_PENT | m_PPRO | m_AMD_MULTIPLE | m_K6 | m_CORE2 | m_GENERIC, 1308 m_486 | m_PENT | m_ATOM | m_PPRO | m_AMD_MULTIPLE | m_K6 1309 | m_CORE2 | m_GENERIC, 1235 1310 1236 1311 /* X86_TUNE_DEEP_BRANCH_PREDICTION */ 1237 m_ PPRO | m_K6_GEODE | m_AMD_MULTIPLE | m_PENT4 | m_GENERIC,1312 m_ATOM | m_PPRO | m_K6_GEODE | m_AMD_MULTIPLE | m_PENT4 | m_GENERIC, 1238 1313 1239 1314 /* X86_TUNE_BRANCH_PREDICTION_HINTS: Branch hints were put in P4 based 1240 1315 on simulation result. But after P4 was made, no performance benefit … … 1246 1321 ~m_386, 1247 1322 1248 1323 /* X86_TUNE_USE_SAHF */ 1249 m_ PPRO | m_K6_GEODE | m_K8 | m_AMDFAM10 | m_PENT41324 m_ATOM | m_PPRO | m_K6_GEODE | m_K8 | m_AMDFAM10 | m_PENT4 1250 1325 | m_NOCONA | m_CORE2 | m_GENERIC, 1251 1326 1252 1327 /* X86_TUNE_MOVX: Enable to zero extend integer registers to avoid 1253 1328 partial dependencies. */ 1254 m_AMD_MULTIPLE | m_ PPRO | m_PENT4 | m_NOCONA1329 m_AMD_MULTIPLE | m_ATOM | m_PPRO | m_PENT4 | m_NOCONA 1255 1330 | m_CORE2 | m_GENERIC | m_GEODE /* m_386 | m_K6 */, 1256 1331 1257 1332 /* X86_TUNE_PARTIAL_REG_STALL: We probably ought to watch for partial … … 1271 1346 m_386 | m_486 | m_K6_GEODE, 1272 1347 1273 1348 /* X86_TUNE_USE_SIMODE_FIOP */ 1274 ~(m_PPRO | m_AMD_MULTIPLE | m_PENT | m_ CORE2 | m_GENERIC),1349 ~(m_PPRO | m_AMD_MULTIPLE | m_PENT | m_ATOM | m_CORE2 | m_GENERIC), 1275 1350 1276 1351 /* X86_TUNE_USE_MOV0 */ 1277 1352 m_K6, 1278 1353 1279 1354 /* X86_TUNE_USE_CLTD */ 1280 ~(m_PENT | m_ K6 | m_CORE2 | m_GENERIC),1355 ~(m_PENT | m_ATOM | m_K6 | m_CORE2 | m_GENERIC), 1281 1356 1282 1357 /* X86_TUNE_USE_XCHGB: Use xchgb %rh,%rl instead of rolw/rorw $8,rx. */ 1283 1358 m_PENT4, … … 1292 1367 ~(m_PENT | m_PPRO), 1293 1368 1294 1369 /* X86_TUNE_PROMOTE_QIMODE */ 1295 m_K6_GEODE | m_PENT | m_ 386 | m_486 | m_AMD_MULTIPLE | m_CORE21296 | m_ GENERIC /* | m_PENT4 ? */,1370 m_K6_GEODE | m_PENT | m_ATOM | m_386 | m_486 | m_AMD_MULTIPLE 1371 | m_CORE2 | m_GENERIC /* | m_PENT4 ? */, 1297 1372 1298 1373 /* X86_TUNE_FAST_PREFIX */ 1299 1374 ~(m_PENT | m_486 | m_386), … … 1317 1392 m_PPRO, 1318 1393 1319 1394 /* X86_TUNE_ADD_ESP_4: Enable if add/sub is preferred over 1/2 push/pop. */ 1320 m_AMD_MULTIPLE | m_K6_GEODE | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC, 1395 m_ATOM | m_AMD_MULTIPLE | m_K6_GEODE | m_PENT4 | m_NOCONA 1396 | m_CORE2 | m_GENERIC, 1321 1397 1322 1398 /* X86_TUNE_ADD_ESP_8 */ 1323 m_AMD_MULTIPLE | m_ PPRO | m_K6_GEODE | m_3861399 m_AMD_MULTIPLE | m_ATOM | m_PPRO | m_K6_GEODE | m_386 1324 1400 | m_486 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC, 1325 1401 1326 1402 /* X86_TUNE_SUB_ESP_4 */ 1327 m_AMD_MULTIPLE | m_PPRO | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC, 1403 m_AMD_MULTIPLE | m_ATOM | m_PPRO | m_PENT4 | m_NOCONA | m_CORE2 1404 | m_GENERIC, 1328 1405 1329 1406 /* X86_TUNE_SUB_ESP_8 */ 1330 m_AMD_MULTIPLE | m_ PPRO | m_386 | m_4861407 m_AMD_MULTIPLE | m_ATOM | m_PPRO | m_386 | m_486 1331 1408 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC, 1332 1409 1333 1410 /* X86_TUNE_INTEGER_DFMODE_MOVES: Enable if integer moves are preferred 1334 1411 for DFmode copies */ 1335 ~(m_AMD_MULTIPLE | m_ PENT4 | m_NOCONA | m_PPRO | m_CORE21412 ~(m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 1336 1413 | m_GENERIC | m_GEODE), 1337 1414 1338 1415 /* X86_TUNE_PARTIAL_REG_DEPENDENCY */ 1339 m_AMD_MULTIPLE | m_ PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,1416 m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC, 1340 1417 1341 1418 /* X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY: In the Generic model we have a 1342 1419 conflict here in between PPro/Pentium4 based chips that thread 128bit … … 1347 1424 shows that disabling this option on P4 brings over 20% SPECfp regression, 1348 1425 while enabling it on K8 brings roughly 2.4% regression that can be partly 1349 1426 masked by careful scheduling of moves. */ 1350 m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC | m_AMDFAM10, 1427 m_ATOM | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC 1428 | m_AMDFAM10, 1351 1429 1352 1430 /* X86_TUNE_SSE_UNALIGNED_MOVE_OPTIMAL */ 1353 1431 m_AMDFAM10, … … 1365 1443 m_PPRO | m_PENT4 | m_NOCONA, 1366 1444 1367 1445 /* X86_TUNE_MEMORY_MISMATCH_STALL */ 1368 m_AMD_MULTIPLE | m_ PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,1446 m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC, 1369 1447 1370 1448 /* X86_TUNE_PROLOGUE_USING_MOVE */ 1371 m_ATHLON_K8 | m_ PPRO | m_CORE2 | m_GENERIC,1449 m_ATHLON_K8 | m_ATOM | m_PPRO | m_CORE2 | m_GENERIC, 1372 1450 1373 1451 /* X86_TUNE_EPILOGUE_USING_MOVE */ 1374 m_ATHLON_K8 | m_ PPRO | m_CORE2 | m_GENERIC,1452 m_ATHLON_K8 | m_ATOM | m_PPRO | m_CORE2 | m_GENERIC, 1375 1453 1376 1454 /* X86_TUNE_SHIFT1 */ 1377 1455 ~m_486, … … 1380 1458 m_AMD_MULTIPLE, 1381 1459 1382 1460 /* X86_TUNE_INTER_UNIT_MOVES */ 1383 ~(m_AMD_MULTIPLE | m_ GENERIC),1461 ~(m_AMD_MULTIPLE | m_ATOM | m_GENERIC), 1384 1462 1385 1463 /* X86_TUNE_INTER_UNIT_CONVERSIONS */ 1386 1464 ~(m_AMDFAM10), 1387 1465 1388 1466 /* X86_TUNE_FOUR_JUMP_LIMIT: Some CPU cores are not able to predict more 1389 1467 than 4 branch instructions in the 16 byte window. */ 1390 m_PPRO | m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC, 1468 m_ATOM | m_PPRO | m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_CORE2 1469 | m_GENERIC, 1391 1470 1392 1471 /* X86_TUNE_SCHEDULE */ 1393 m_PPRO | m_AMD_MULTIPLE | m_K6_GEODE | m_PENT | m_CORE2 | m_GENERIC, 1472 m_PPRO | m_AMD_MULTIPLE | m_K6_GEODE | m_PENT | m_ATOM | m_CORE2 1473 | m_GENERIC, 1394 1474 1395 1475 /* X86_TUNE_USE_BT */ 1396 m_AMD_MULTIPLE | m_ CORE2 | m_GENERIC,1476 m_AMD_MULTIPLE | m_ATOM | m_CORE2 | m_GENERIC, 1397 1477 1398 1478 /* X86_TUNE_USE_INCDEC */ 1399 ~(m_PENT4 | m_NOCONA | m_GENERIC ),1479 ~(m_PENT4 | m_NOCONA | m_GENERIC | m_ATOM), 1400 1480 1401 1481 /* X86_TUNE_PAD_RETURNS */ 1402 1482 m_AMD_MULTIPLE | m_CORE2 | m_GENERIC, 1403 1483 1404 1484 /* X86_TUNE_EXT_80387_CONSTANTS */ 1405 m_K6_GEODE | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC, 1485 m_K6_GEODE | m_ATHLON_K8 | m_ATOM | m_PENT4 | m_NOCONA | m_PPRO 1486 | m_CORE2 | m_GENERIC, 1406 1487 1407 1488 /* X86_TUNE_SHORTEN_X87_SSE */ 1408 1489 ~m_K8, … … 1447 1528 with a subsequent conditional jump instruction into a single 1448 1529 compare-and-branch uop. */ 1449 1530 m_CORE2, 1531 1532 /* X86_TUNE_OPT_AGU: Optimize for Address Generation Unit. This flag 1533 will impact LEA instruction selection. */ 1534 m_ATOM, 1450 1535 }; 1451 1536 1452 1537 /* Feature tests against the various architecture variations. */ … … 1472 1557 }; 1473 1558 1474 1559 static const unsigned int x86_accumulate_outgoing_args 1475 = m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC; 1560 = m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 1561 | m_GENERIC; 1476 1562 1477 1563 static const unsigned int x86_arch_always_fancy_math_387 1478 = m_PENT | m_ PPRO | m_AMD_MULTIPLE | m_PENT41564 = m_PENT | m_ATOM | m_PPRO | m_AMD_MULTIPLE | m_PENT4 1479 1565 | m_NOCONA | m_CORE2 | m_GENERIC; 1480 1566 1481 1567 static enum stringop_alg stringop_alg = no_stringop; … … 1743 1829 /* Alignment for incoming stack boundary in bits. */ 1744 1830 unsigned int ix86_incoming_stack_boundary; 1745 1831 1832 /* The abi used by target. */ 1833 enum calling_abi ix86_abi; 1834 1746 1835 /* Values 1-5: see jump.c */ 1747 1836 int ix86_branch_cost; 1748 1837 … … 1819 1908 static bool ix86_can_inline_p (tree, tree); 1820 1909 static void ix86_set_current_function (tree); 1821 1910 1911 static enum calling_abi ix86_function_abi (const_tree); 1912 1822 1913 1823 1914 1824 1915 /* The svr4 ABI for the i386 says that records and unions are returned