aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTeresa Johnson <tejohnson@google.com>2012-04-06 05:03:49 +0000
committerTeresa Johnson <tejohnson@gcc.gnu.org>2012-04-06 05:03:49 +0000
commit7b38ee83df3686a974d84c17da96101239de47bb (patch)
treea12fd9a169d4afde5d4da2344818e26d9caa4a08
parent939c8f05b8ebcb9d34f2b58055899c3f71912a2c (diff)
downloadgcc-7b38ee83df3686a974d84c17da96101239de47bb.zip
gcc-7b38ee83df3686a974d84c17da96101239de47bb.tar.gz
gcc-7b38ee83df3686a974d84c17da96101239de47bb.tar.bz2
Avoid instructions that incur expensive length-changing prefix (LCP) stalls on some x86-64 implementations...
Avoid instructions that incur expensive length-changing prefix (LCP) stalls on some x86-64 implementations, notably Core2 and Corei7. Specifically, a move of a 16-bit constant into memory requires a length-changing prefix and can incur significant penalties. Modified an old patch written by H.J to split such instructions during peephole2. 2012-04-05 Teresa Johnson <tejohnson@google.com> H.J. Lu <hongjiu.lu@intel.com> * config/i386/i386.h (ix86_tune_indices): Add X86_TUNE_LCP_STALL. * config/i386/i386.md (move immediate to memory peephole2): Add cases for HImode move when LCP stall avoidance is needed. * config/i386/i386.c (initial_ix86_tune_features): Initialize X86_TUNE_LCP_STALL entry. Co-Authored-By: H.J. Lu <hongjiu.lu@intel.com> From-SVN: r186176
-rw-r--r--gcc/ChangeLog10
-rw-r--r--gcc/config/i386/i386.c4
-rw-r--r--gcc/config/i386/i386.h3
-rw-r--r--gcc/config/i386/i386.md16
4 files changed, 27 insertions, 6 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 0df25cf..8118ed5 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,13 @@
+2012-04-05 Teresa Johnson <tejohnson@google.com>
+ H.J. Lu <hongjiu.lu@intel.com>
+
+ * config/i386/i386.h (ix86_tune_indices): Add
+ X86_TUNE_LCP_STALL.
+ * config/i386/i386.md (move immediate to memory peephole2):
+ Add cases for HImode move when LCP stall avoidance is needed.
+ * config/i386/i386.c (initial_ix86_tune_features): Initialize
+ X86_TUNE_LCP_STALL entry.
+
2012-04-05 Uros Bizjak <ubizjak@gmail.com>
PR target/52882
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index c959113..8974ddc 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -1964,6 +1964,10 @@ static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = {
/* X86_TUNE_PARTIAL_FLAG_REG_STALL */
m_CORE2I7 | m_GENERIC,
+ /* X86_TUNE_LCP_STALL: Avoid an expensive length-changing prefix stall
+ * on 16-bit immediate moves into memory on Core2 and Corei7. */
+ m_CORE2I7 | m_GENERIC,
+
/* X86_TUNE_USE_HIMODE_FIOP */
m_386 | m_486 | m_K6_GEODE,
diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
index 7ba90c7..8942ea8 100644
--- a/gcc/config/i386/i386.h
+++ b/gcc/config/i386/i386.h
@@ -262,6 +262,7 @@ enum ix86_tune_indices {
X86_TUNE_MOVX,
X86_TUNE_PARTIAL_REG_STALL,
X86_TUNE_PARTIAL_FLAG_REG_STALL,
+ X86_TUNE_LCP_STALL,
X86_TUNE_USE_HIMODE_FIOP,
X86_TUNE_USE_SIMODE_FIOP,
X86_TUNE_USE_MOV0,
@@ -340,6 +341,8 @@ extern unsigned char ix86_tune_features[X86_TUNE_LAST];
#define TARGET_PARTIAL_REG_STALL ix86_tune_features[X86_TUNE_PARTIAL_REG_STALL]
#define TARGET_PARTIAL_FLAG_REG_STALL \
ix86_tune_features[X86_TUNE_PARTIAL_FLAG_REG_STALL]
+#define TARGET_LCP_STALL \
+ ix86_tune_features[X86_TUNE_LCP_STALL]
#define TARGET_USE_HIMODE_FIOP ix86_tune_features[X86_TUNE_USE_HIMODE_FIOP]
#define TARGET_USE_SIMODE_FIOP ix86_tune_features[X86_TUNE_USE_SIMODE_FIOP]
#define TARGET_USE_MOV0 ix86_tune_features[X86_TUNE_USE_MOV0]
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index 2d20a52..dd1f9be 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -16971,15 +16971,17 @@
(set (match_dup 0) (match_dup 2))])
;; Don't move an immediate directly to memory when the instruction
-;; gets too big.
+;; gets too big, or if LCP stalls are a problem for 16-bit moves.
(define_peephole2
[(match_scratch:SWI124 1 "<r>")
(set (match_operand:SWI124 0 "memory_operand")
(const_int 0))]
"optimize_insn_for_speed_p ()
- && !TARGET_USE_MOV0
- && TARGET_SPLIT_LONG_MOVES
- && get_attr_length (insn) >= ix86_cur_cost ()->large_insn
+ && ((<MODE>mode == HImode
+ && TARGET_LCP_STALL)
+ || (!TARGET_USE_MOV0
+ && TARGET_SPLIT_LONG_MOVES
+ && get_attr_length (insn) >= ix86_cur_cost ()->large_insn))
&& peep2_regno_dead_p (0, FLAGS_REG)"
[(parallel [(set (match_dup 2) (const_int 0))
(clobber (reg:CC FLAGS_REG))])
@@ -16991,8 +16993,10 @@
(set (match_operand:SWI124 0 "memory_operand")
(match_operand:SWI124 1 "immediate_operand"))]
"optimize_insn_for_speed_p ()
- && TARGET_SPLIT_LONG_MOVES
- && get_attr_length (insn) >= ix86_cur_cost ()->large_insn"
+ && ((<MODE>mode == HImode
+ && TARGET_LCP_STALL)
+ || (TARGET_SPLIT_LONG_MOVES
+ && get_attr_length (insn) >= ix86_cur_cost ()->large_insn))"
[(set (match_dup 2) (match_dup 1))
(set (match_dup 0) (match_dup 2))])