diff options
-rw-r--r-- | ChangeLog | 11 | ||||
-rw-r--r-- | gcc/config/i386/i386.c | 5 | ||||
-rw-r--r-- | gcc/config/i386/x86-tune.def | 26 | ||||
-rw-r--r-- | gcc/testsuite/ChangeLog | 8 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/pr84413-1.c | 17 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/pr84413-2.c | 17 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/pr84413-3.c | 17 |
7 files changed, 87 insertions, 14 deletions
@@ -1,3 +1,14 @@ +2018-07-13 H.J. Lu <hongjiu.lu@intel.com> + Sunil K Pandey <sunil.k.pandey@intel.com> + + PR target/84413 + * config/i386/i386.c (m_CORE_AVX512): New. + (m_CORE_AVX2): Likewise. + (m_CORE_ALL): Add m_CORE_AVX2. + * config/i386/x86-tune.def: Replace m_HASWELL with m_CORE_AVX2. + Replace m_SKYLAKE_AVX512 with m_CORE_AVX512 on avx256_optimal + and remove the rest of m_SKYLAKE_AVX512. + 2018-07-06 Sebastian Huber <sebastian.huber@embedded-brains.de> * config.sub: Sync with upstream version 2018-07-03. diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index 9e46b7b..ccc24e3 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -138,7 +138,6 @@ const struct processor_costs *ix86_cost = NULL; #define m_NEHALEM (HOST_WIDE_INT_1U<<PROCESSOR_NEHALEM) #define m_SANDYBRIDGE (HOST_WIDE_INT_1U<<PROCESSOR_SANDYBRIDGE) #define m_HASWELL (HOST_WIDE_INT_1U<<PROCESSOR_HASWELL) -#define m_CORE_ALL (m_CORE2 | m_NEHALEM | m_SANDYBRIDGE | m_HASWELL) #define m_BONNELL (HOST_WIDE_INT_1U<<PROCESSOR_BONNELL) #define m_SILVERMONT (HOST_WIDE_INT_1U<<PROCESSOR_SILVERMONT) #define m_KNL (HOST_WIDE_INT_1U<<PROCESSOR_KNL) @@ -148,6 +147,10 @@ const struct processor_costs *ix86_cost = NULL; #define m_CANNONLAKE (HOST_WIDE_INT_1U<<PROCESSOR_CANNONLAKE) #define m_ICELAKE_CLIENT (HOST_WIDE_INT_1U<<PROCESSOR_ICELAKE_CLIENT) #define m_ICELAKE_SERVER (HOST_WIDE_INT_1U<<PROCESSOR_ICELAKE_SERVER) +#define m_CORE_AVX512 (m_SKYLAKE_AVX512 | m_CANNONLAKE \ + | m_ICELAKE_CLIENT | m_ICELAKE_SERVER) +#define m_CORE_AVX2 (m_HASWELL | m_SKYLAKE | m_CORE_AVX512) +#define m_CORE_ALL (m_CORE2 | m_NEHALEM | m_SANDYBRIDGE | m_CORE_AVX2) #define m_GOLDMONT (HOST_WIDE_INT_1U<<PROCESSOR_GOLDMONT) #define m_GOLDMONT_PLUS (HOST_WIDE_INT_1U<<PROCESSOR_GOLDMONT_PLUS) #define m_TREMONT (HOST_WIDE_INT_1U<<PROCESSOR_TREMONT) diff --git a/gcc/config/i386/x86-tune.def b/gcc/config/i386/x86-tune.def index 8a8d5ab..a46450a 100644 --- a/gcc/config/i386/x86-tune.def +++ b/gcc/config/i386/x86-tune.def @@ -49,9 +49,9 @@ DEF_TUNE (X86_TUNE_SCHEDULE, "schedule", over partial stores. For example preffer MOVZBL or MOVQ to load 8bit value over movb. */ DEF_TUNE (X86_TUNE_PARTIAL_REG_DEPENDENCY, "partial_reg_dependency", - m_P4_NOCONA | m_CORE2 | m_NEHALEM | m_SANDYBRIDGE | m_HASWELL + m_P4_NOCONA | m_CORE2 | m_NEHALEM | m_SANDYBRIDGE | m_CORE_AVX2 | m_BONNELL | m_SILVERMONT | m_GOLDMONT | m_GOLDMONT_PLUS | m_INTEL - | m_KNL | m_KNM | m_AMD_MULTIPLE | m_SKYLAKE_AVX512 | m_TREMONT + | m_KNL | m_KNM | m_AMD_MULTIPLE | m_TREMONT | m_GENERIC) /* X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY: This knob promotes all store @@ -87,8 +87,8 @@ DEF_TUNE (X86_TUNE_PARTIAL_FLAG_REG_STALL, "partial_flag_reg_stall", DEF_TUNE (X86_TUNE_MOVX, "movx", m_PPRO | m_P4_NOCONA | m_CORE2 | m_NEHALEM | m_SANDYBRIDGE | m_BONNELL | m_SILVERMONT | m_GOLDMONT | m_KNL | m_KNM | m_INTEL - | m_GOLDMONT_PLUS | m_GEODE | m_AMD_MULTIPLE | m_SKYLAKE_AVX512 - | m_HASWELL | m_TREMONT | m_GENERIC) + | m_GOLDMONT_PLUS | m_GEODE | m_AMD_MULTIPLE + | m_CORE_AVX2 | m_TREMONT | m_GENERIC) /* X86_TUNE_MEMORY_MISMATCH_STALL: Avoid partial stores that are followed by full sized loads. */ @@ -105,19 +105,19 @@ DEF_TUNE (X86_TUNE_FUSE_CMP_AND_BRANCH_32, "fuse_cmp_and_branch_32", /* X86_TUNE_FUSE_CMP_AND_BRANCH_64: Fuse compare with a subsequent conditional jump instruction for TARGET_64BIT. */ DEF_TUNE (X86_TUNE_FUSE_CMP_AND_BRANCH_64, "fuse_cmp_and_branch_64", - m_NEHALEM | m_SANDYBRIDGE | m_HASWELL | m_BDVER | m_ZNVER1 | m_GENERIC) + m_NEHALEM | m_SANDYBRIDGE | m_CORE_AVX2 | m_BDVER | m_ZNVER1 | m_GENERIC) /* X86_TUNE_FUSE_CMP_AND_BRANCH_SOFLAGS: Fuse compare with a subsequent conditional jump instruction when the condition jump check sign flag (SF) or overflow flag (OF). */ DEF_TUNE (X86_TUNE_FUSE_CMP_AND_BRANCH_SOFLAGS, "fuse_cmp_and_branch_soflags", - m_NEHALEM | m_SANDYBRIDGE | m_HASWELL | m_BDVER | m_ZNVER1 | m_GENERIC) + m_NEHALEM | m_SANDYBRIDGE | m_CORE_AVX2 | m_BDVER | m_ZNVER1 | m_GENERIC) /* X86_TUNE_FUSE_ALU_AND_BRANCH: Fuse alu with a subsequent conditional jump instruction when the alu instruction produces the CCFLAG consumed by the conditional jump instruction. */ DEF_TUNE (X86_TUNE_FUSE_ALU_AND_BRANCH, "fuse_alu_and_branch", - m_SANDYBRIDGE | m_HASWELL | m_GENERIC) + m_SANDYBRIDGE | m_CORE_AVX2 | m_GENERIC) /*****************************************************************************/ @@ -297,7 +297,7 @@ DEF_TUNE (X86_TUNE_USE_BT, "use_bt", /* X86_TUNE_AVOID_FALSE_DEP_FOR_BMI: Avoid false dependency for bit-manipulation instructions. */ DEF_TUNE (X86_TUNE_AVOID_FALSE_DEP_FOR_BMI, "avoid_false_dep_for_bmi", - m_SANDYBRIDGE | m_HASWELL | m_GENERIC) + m_SANDYBRIDGE | m_CORE_AVX2 | m_GENERIC) /* X86_TUNE_ADJUST_UNROLL: This enables adjusting the unroll factor based on hardware capabilities. Bdver3 hardware has a loop buffer which makes @@ -349,15 +349,15 @@ DEF_TUNE (X86_TUNE_GENERAL_REGS_SSE_SPILL, "general_regs_sse_spill", /* X86_TUNE_SSE_UNALIGNED_LOAD_OPTIMAL: Use movups for misaligned loads instead of a sequence loading registers by parts. */ DEF_TUNE (X86_TUNE_SSE_UNALIGNED_LOAD_OPTIMAL, "sse_unaligned_load_optimal", - m_NEHALEM | m_SANDYBRIDGE | m_HASWELL | m_SILVERMONT | m_KNL | m_KNM - | m_INTEL | m_SKYLAKE_AVX512 | m_GOLDMONT | m_GOLDMONT_PLUS + m_NEHALEM | m_SANDYBRIDGE | m_CORE_AVX2 | m_SILVERMONT | m_KNL | m_KNM + | m_INTEL | m_GOLDMONT | m_GOLDMONT_PLUS | m_TREMONT | m_AMDFAM10 | m_BDVER | m_BTVER | m_ZNVER1 | m_GENERIC) /* X86_TUNE_SSE_UNALIGNED_STORE_OPTIMAL: Use movups for misaligned stores instead of a sequence loading registers by parts. */ DEF_TUNE (X86_TUNE_SSE_UNALIGNED_STORE_OPTIMAL, "sse_unaligned_store_optimal", - m_NEHALEM | m_SANDYBRIDGE | m_HASWELL | m_SILVERMONT | m_KNL | m_KNM - | m_INTEL | m_SKYLAKE_AVX512 | m_GOLDMONT | m_GOLDMONT_PLUS + m_NEHALEM | m_SANDYBRIDGE | m_CORE_AVX2 | m_SILVERMONT | m_KNL | m_KNM + | m_INTEL | m_GOLDMONT | m_GOLDMONT_PLUS | m_TREMONT | m_BDVER | m_ZNVER1 | m_GENERIC) /* Use packed single precision instructions where posisble. I.e. movups instead @@ -446,7 +446,7 @@ DEF_TUNE (X86_TUNE_AVX128_OPTIMAL, "avx128_optimal", m_BDVER | m_BTVER2 /* X86_TUNE_AVX256_OPTIMAL: Use 256-bit AVX instructions instead of 512-bit AVX instructions in the auto-vectorizer. */ -DEF_TUNE (X86_TUNE_AVX256_OPTIMAL, "avx256_optimal", m_SKYLAKE_AVX512) +DEF_TUNE (X86_TUNE_AVX256_OPTIMAL, "avx256_optimal", m_CORE_AVX512) /*****************************************************************************/ /* Historical relics: tuning flags that helps a specific old CPU designs */ diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 3bedd12..b197642 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,11 @@ +2018-07-13 H.J. Lu <hongjiu.lu@intel.com> + Sunil K Pandey <sunil.k.pandey@intel.com> + + PR target/84413 + * gcc.target/i386/pr84413-1.c: New test. + * gcc.target/i386/pr84413-2.c: Likewise. + * gcc.target/i386/pr84413-3.c: Likewise. + 2018-07-13 Bill Schmidt <wschmidt@linux.ibm.com> Steve Munroe <munroesj52@gmail.com> diff --git a/gcc/testsuite/gcc.target/i386/pr84413-1.c b/gcc/testsuite/gcc.target/i386/pr84413-1.c new file mode 100644 index 0000000..1c94d77 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr84413-1.c @@ -0,0 +1,17 @@ +/* { dg-do compile } */ +/* { dg-options "-O3 -march=skylake-avx512" } */ +/* { dg-final { scan-assembler-not "%zmm\[0-9\]+" } } */ +/* { dg-final { scan-assembler "vmulpd\[ \\t\]+\[^\n\]*%ymm\[0-9\]+" } } */ + +#define N 1024 + +double a[N], b[N], c[N]; + +void +avx512f_test (void) +{ + int i; + + for (i = 0; i < N; i++) + c[i] = a[i] * b[i]; +} diff --git a/gcc/testsuite/gcc.target/i386/pr84413-2.c b/gcc/testsuite/gcc.target/i386/pr84413-2.c new file mode 100644 index 0000000..adf9b52 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr84413-2.c @@ -0,0 +1,17 @@ +/* { dg-do compile } */ +/* { dg-options "-O3 -march=cannonlake" } */ +/* { dg-final { scan-assembler-not "%zmm\[0-9\]+" } } */ +/* { dg-final { scan-assembler "vmulpd\[ \\t\]+\[^\n\]*%ymm\[0-9\]+" } } */ + +#define N 1024 + +double a[N], b[N], c[N]; + +void +avx512f_test (void) +{ + int i; + + for (i = 0; i < N; i++) + c[i] = a[i] * b[i]; +} diff --git a/gcc/testsuite/gcc.target/i386/pr84413-3.c b/gcc/testsuite/gcc.target/i386/pr84413-3.c new file mode 100644 index 0000000..76bf25f --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr84413-3.c @@ -0,0 +1,17 @@ +/* { dg-do compile } */ +/* { dg-options "-O3 -march=icelake-server" } */ +/* { dg-final { scan-assembler-not "%zmm\[0-9\]+" } } */ +/* { dg-final { scan-assembler "vmulpd\[ \\t\]+\[^\n\]*%ymm\[0-9\]+" } } */ + +#define N 1024 + +double a[N], b[N], c[N]; + +void +avx512f_test (void) +{ + int i; + + for (i = 0; i < N; i++) + c[i] = a[i] * b[i]; +} |