diff options
author | Teresa Johnson <tejohnson@google.com> | 2012-04-06 05:03:49 +0000 |
---|---|---|
committer | Teresa Johnson <tejohnson@gcc.gnu.org> | 2012-04-06 05:03:49 +0000 |
commit | 7b38ee83df3686a974d84c17da96101239de47bb (patch) | |
tree | a12fd9a169d4afde5d4da2344818e26d9caa4a08 | |
parent | 939c8f05b8ebcb9d34f2b58055899c3f71912a2c (diff) | |
download | gcc-7b38ee83df3686a974d84c17da96101239de47bb.zip gcc-7b38ee83df3686a974d84c17da96101239de47bb.tar.gz gcc-7b38ee83df3686a974d84c17da96101239de47bb.tar.bz2 |
Avoid instructions that incur expensive length-changing prefix (LCP) stalls on some x86-64 implementations...
Avoid instructions that incur expensive length-changing prefix (LCP) stalls
on some x86-64 implementations, notably Core2 and Corei7. Specifically, a move of
a 16-bit constant into memory requires a length-changing prefix and can incur significant
penalties. Modified an old patch written by H.J to split such instructions
during peephole2.
2012-04-05 Teresa Johnson <tejohnson@google.com>
H.J. Lu <hongjiu.lu@intel.com>
* config/i386/i386.h (ix86_tune_indices): Add
X86_TUNE_LCP_STALL.
* config/i386/i386.md (move immediate to memory peephole2):
Add cases for HImode move when LCP stall avoidance is needed.
* config/i386/i386.c (initial_ix86_tune_features): Initialize
X86_TUNE_LCP_STALL entry.
Co-Authored-By: H.J. Lu <hongjiu.lu@intel.com>
From-SVN: r186176
-rw-r--r-- | gcc/ChangeLog | 10 | ||||
-rw-r--r-- | gcc/config/i386/i386.c | 4 | ||||
-rw-r--r-- | gcc/config/i386/i386.h | 3 | ||||
-rw-r--r-- | gcc/config/i386/i386.md | 16 |
4 files changed, 27 insertions, 6 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 0df25cf..8118ed5 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,13 @@ +2012-04-05 Teresa Johnson <tejohnson@google.com> + H.J. Lu <hongjiu.lu@intel.com> + + * config/i386/i386.h (ix86_tune_indices): Add + X86_TUNE_LCP_STALL. + * config/i386/i386.md (move immediate to memory peephole2): + Add cases for HImode move when LCP stall avoidance is needed. + * config/i386/i386.c (initial_ix86_tune_features): Initialize + X86_TUNE_LCP_STALL entry. + 2012-04-05 Uros Bizjak <ubizjak@gmail.com> PR target/52882 diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index c959113..8974ddc 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -1964,6 +1964,10 @@ static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = { /* X86_TUNE_PARTIAL_FLAG_REG_STALL */ m_CORE2I7 | m_GENERIC, + /* X86_TUNE_LCP_STALL: Avoid an expensive length-changing prefix stall + * on 16-bit immediate moves into memory on Core2 and Corei7. */ + m_CORE2I7 | m_GENERIC, + /* X86_TUNE_USE_HIMODE_FIOP */ m_386 | m_486 | m_K6_GEODE, diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h index 7ba90c7..8942ea8 100644 --- a/gcc/config/i386/i386.h +++ b/gcc/config/i386/i386.h @@ -262,6 +262,7 @@ enum ix86_tune_indices { X86_TUNE_MOVX, X86_TUNE_PARTIAL_REG_STALL, X86_TUNE_PARTIAL_FLAG_REG_STALL, + X86_TUNE_LCP_STALL, X86_TUNE_USE_HIMODE_FIOP, X86_TUNE_USE_SIMODE_FIOP, X86_TUNE_USE_MOV0, @@ -340,6 +341,8 @@ extern unsigned char ix86_tune_features[X86_TUNE_LAST]; #define TARGET_PARTIAL_REG_STALL ix86_tune_features[X86_TUNE_PARTIAL_REG_STALL] #define TARGET_PARTIAL_FLAG_REG_STALL \ ix86_tune_features[X86_TUNE_PARTIAL_FLAG_REG_STALL] +#define TARGET_LCP_STALL \ + ix86_tune_features[X86_TUNE_LCP_STALL] #define TARGET_USE_HIMODE_FIOP ix86_tune_features[X86_TUNE_USE_HIMODE_FIOP] #define TARGET_USE_SIMODE_FIOP ix86_tune_features[X86_TUNE_USE_SIMODE_FIOP] #define TARGET_USE_MOV0 ix86_tune_features[X86_TUNE_USE_MOV0] diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index 2d20a52..dd1f9be 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -16971,15 +16971,17 @@ (set (match_dup 0) (match_dup 2))]) ;; Don't move an immediate directly to memory when the instruction -;; gets too big. +;; gets too big, or if LCP stalls are a problem for 16-bit moves. (define_peephole2 [(match_scratch:SWI124 1 "<r>") (set (match_operand:SWI124 0 "memory_operand") (const_int 0))] "optimize_insn_for_speed_p () - && !TARGET_USE_MOV0 - && TARGET_SPLIT_LONG_MOVES - && get_attr_length (insn) >= ix86_cur_cost ()->large_insn + && ((<MODE>mode == HImode + && TARGET_LCP_STALL) + || (!TARGET_USE_MOV0 + && TARGET_SPLIT_LONG_MOVES + && get_attr_length (insn) >= ix86_cur_cost ()->large_insn)) && peep2_regno_dead_p (0, FLAGS_REG)" [(parallel [(set (match_dup 2) (const_int 0)) (clobber (reg:CC FLAGS_REG))]) @@ -16991,8 +16993,10 @@ (set (match_operand:SWI124 0 "memory_operand") (match_operand:SWI124 1 "immediate_operand"))] "optimize_insn_for_speed_p () - && TARGET_SPLIT_LONG_MOVES - && get_attr_length (insn) >= ix86_cur_cost ()->large_insn" + && ((<MODE>mode == HImode + && TARGET_LCP_STALL) + || (TARGET_SPLIT_LONG_MOVES + && get_attr_length (insn) >= ix86_cur_cost ()->large_insn))" [(set (match_dup 2) (match_dup 1)) (set (match_dup 0) (match_dup 2))]) |