aboutsummaryrefslogtreecommitdiff
path: root/gcc
diff options
context:
space:
mode:
authorH.J. Lu <hongjiu.lu@intel.com>2018-05-31 15:37:22 +0000
committerH.J. Lu <hjl@gcc.gnu.org>2018-05-31 08:37:22 -0700
commit3217e694854fe50be13091f071283f47aad9b348 (patch)
tree711df1b4cb3a1740a15ade8d61067cd35139f4f8 /gcc
parentbd1cab35c5eabf51d9392751036fa3a71758d263 (diff)
downloadgcc-3217e694854fe50be13091f071283f47aad9b348.zip
gcc-3217e694854fe50be13091f071283f47aad9b348.tar.gz
gcc-3217e694854fe50be13091f071283f47aad9b348.tar.bz2
x86: Re-enable partial_reg_dependency and movx for Haswell
r254152 disabled partial_reg_dependency and movx for Haswell and newer Intel processors. r258972 restored them for skylake-avx512. For Haswell, movx improves performance. But partial_reg_stall may be better than partial_reg_dependency in theory. We will investigate performance impact of partial_reg_stall vs partial_reg_dependency on Haswell for GCC 9. In the meantime, this patch restores both partial_reg_dependency and mox for Haswell in GCC 8. On Haswell, improvements for EEMBC benchmarks with -mtune-ctrl=movx,partial_reg_dependency -Ofast -march=haswell vs -Ofast -mtune=haswell are automotive ========= aifftr01 (default) - goodperf: Runtime improvement of 2.6% (time). aiifft01 (default) - goodperf: Runtime improvement of 2.2% (time). networking ========= ip_pktcheckb1m (default) - goodperf: Runtime improvement of 3.8% (time). ip_pktcheckb2m (default) - goodperf: Runtime improvement of 5.2% (time). ip_pktcheckb4m (default) - goodperf: Runtime improvement of 4.4% (time). ip_pktcheckb512k (default) - goodperf: Runtime improvement of 4.2% (time). telecom ========= fft00data_1 (default) - goodperf: Runtime improvement of 8.4% (time). fft00data_2 (default) - goodperf: Runtime improvement of 8.6% (time). fft00data_3 (default) - goodperf: Runtime improvement of 9.0% (time). PR target/85829 * config/i386/x86-tune.def: Re-enable partial_reg_dependency and movx for Haswell. From-SVN: r261028
Diffstat (limited to 'gcc')
-rw-r--r--gcc/ChangeLog6
-rw-r--r--gcc/config/i386/x86-tune.def4
2 files changed, 8 insertions, 2 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index c9cf570..36c7a86 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,9 @@
+2018-05-31 H.J. Lu <hongjiu.lu@intel.com>
+
+ PR target/85829
+ * config/i386/x86-tune.def: Re-enable partial_reg_dependency
+ and movx for Haswell.
+
2018-05-31 Chung-Lin Tang <cltang@codesourcery.com>
Cesar Philippidis <cesar@codesourcery.com>
diff --git a/gcc/config/i386/x86-tune.def b/gcc/config/i386/x86-tune.def
index 77d9934..f95c070 100644
--- a/gcc/config/i386/x86-tune.def
+++ b/gcc/config/i386/x86-tune.def
@@ -49,7 +49,7 @@ DEF_TUNE (X86_TUNE_SCHEDULE, "schedule",
over partial stores. For example preffer MOVZBL or MOVQ to load 8bit
value over movb. */
DEF_TUNE (X86_TUNE_PARTIAL_REG_DEPENDENCY, "partial_reg_dependency",
- m_P4_NOCONA | m_CORE2 | m_NEHALEM | m_SANDYBRIDGE
+ m_P4_NOCONA | m_CORE2 | m_NEHALEM | m_SANDYBRIDGE | m_HASWELL
| m_BONNELL | m_SILVERMONT | m_GOLDMONT | m_GOLDMONT_PLUS | m_INTEL
| m_KNL | m_KNM | m_AMD_MULTIPLE | m_SKYLAKE_AVX512 | m_GENERIC)
@@ -87,7 +87,7 @@ DEF_TUNE (X86_TUNE_MOVX, "movx",
m_PPRO | m_P4_NOCONA | m_CORE2 | m_NEHALEM | m_SANDYBRIDGE
| m_BONNELL | m_SILVERMONT | m_GOLDMONT | m_KNL | m_KNM | m_INTEL
| m_GOLDMONT_PLUS | m_GEODE | m_AMD_MULTIPLE | m_SKYLAKE_AVX512
- | m_GENERIC)
+ | m_HASWELL | m_GENERIC)
/* X86_TUNE_MEMORY_MISMATCH_STALL: Avoid partial stores that are followed by
full sized loads. */