aboutsummaryrefslogtreecommitdiff
path: root/gcc
diff options
context:
space:
mode:
Diffstat (limited to 'gcc')
-rw-r--r--gcc/config/i386/i386-features.c6
-rw-r--r--gcc/config/i386/i386.h4
-rw-r--r--gcc/config/i386/i386.md9
-rw-r--r--gcc/config/i386/x86-tune.def15
-rw-r--r--gcc/testsuite/gcc.target/i386/avx-covert-1.c19
-rw-r--r--gcc/testsuite/gcc.target/i386/avx-fp-covert-1.c15
-rw-r--r--gcc/testsuite/gcc.target/i386/avx-int-covert-1.c14
-rw-r--r--gcc/testsuite/gcc.target/i386/sse-covert-1.c19
-rw-r--r--gcc/testsuite/gcc.target/i386/sse-fp-covert-1.c15
-rw-r--r--gcc/testsuite/gcc.target/i386/sse-int-covert-1.c14
10 files changed, 125 insertions, 5 deletions
diff --git a/gcc/config/i386/i386-features.c b/gcc/config/i386/i386-features.c
index a65f601..a525a83 100644
--- a/gcc/config/i386/i386-features.c
+++ b/gcc/config/i386/i386-features.c
@@ -2222,12 +2222,14 @@ remove_partial_avx_dependency (void)
{
case E_SFmode:
case E_DFmode:
- if (TARGET_USE_VECTOR_FP_CONVERTS)
+ if (TARGET_USE_VECTOR_FP_CONVERTS
+ || !TARGET_SSE_PARTIAL_REG_FP_CONVERTS_DEPENDENCY)
continue;
break;
case E_SImode:
case E_DImode:
- if (TARGET_USE_VECTOR_CONVERTS)
+ if (TARGET_USE_VECTOR_CONVERTS
+ || !TARGET_SSE_PARTIAL_REG_CONVERTS_DEPENDENCY)
continue;
break;
default:
diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
index e76bb55..ec60b89 100644
--- a/gcc/config/i386/i386.h
+++ b/gcc/config/i386/i386.h
@@ -334,6 +334,10 @@ extern unsigned char ix86_tune_features[X86_TUNE_LAST];
ix86_tune_features[X86_TUNE_PARTIAL_REG_DEPENDENCY]
#define TARGET_SSE_PARTIAL_REG_DEPENDENCY \
ix86_tune_features[X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY]
+#define TARGET_SSE_PARTIAL_REG_FP_CONVERTS_DEPENDENCY \
+ ix86_tune_features[X86_TUNE_SSE_PARTIAL_REG_FP_CONVERTS_DEPENDENCY]
+#define TARGET_SSE_PARTIAL_REG_CONVERTS_DEPENDENCY \
+ ix86_tune_features[X86_TUNE_SSE_PARTIAL_REG_CONVERTS_DEPENDENCY]
#define TARGET_SSE_UNALIGNED_LOAD_OPTIMAL \
ix86_tune_features[X86_TUNE_SSE_UNALIGNED_LOAD_OPTIMAL]
#define TARGET_SSE_UNALIGNED_STORE_OPTIMAL \
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index 13f6f57..c82a9dc 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -4535,7 +4535,8 @@
(float_extend:DF
(match_operand:SF 1 "nonimmediate_operand")))]
"!TARGET_AVX
- && TARGET_SSE_PARTIAL_REG_DEPENDENCY && epilogue_completed
+ && TARGET_SSE_PARTIAL_REG_FP_CONVERTS_DEPENDENCY
+ && epilogue_completed
&& optimize_function_for_speed_p (cfun)
&& (!REG_P (operands[1])
|| (!TARGET_AVX && REGNO (operands[0]) != REGNO (operands[1])))
@@ -4708,7 +4709,8 @@
(float_truncate:SF
(match_operand:DF 1 "nonimmediate_operand")))]
"!TARGET_AVX
- && TARGET_SSE_PARTIAL_REG_DEPENDENCY && epilogue_completed
+ && TARGET_SSE_PARTIAL_REG_FP_CONVERTS_DEPENDENCY
+ && epilogue_completed
&& optimize_function_for_speed_p (cfun)
&& (!REG_P (operands[1])
|| (!TARGET_AVX && REGNO (operands[0]) != REGNO (operands[1])))
@@ -5243,7 +5245,8 @@
[(set (match_operand:MODEF 0 "sse_reg_operand")
(float:MODEF (match_operand:SWI48 1 "nonimmediate_operand")))]
"!TARGET_AVX
- && TARGET_SSE_PARTIAL_REG_DEPENDENCY && epilogue_completed
+ && TARGET_SSE_PARTIAL_REG_CONVERTS_DEPENDENCY
+ && epilogue_completed
&& optimize_function_for_speed_p (cfun)
&& (!EXT_REX_SSE_REG_P (operands[0])
|| TARGET_AVX512VL)"
diff --git a/gcc/config/i386/x86-tune.def b/gcc/config/i386/x86-tune.def
index 088edb6..58e8ead 100644
--- a/gcc/config/i386/x86-tune.def
+++ b/gcc/config/i386/x86-tune.def
@@ -64,6 +64,21 @@ DEF_TUNE (X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY, "sse_partial_reg_dependency",
m_PPRO | m_P4_NOCONA | m_CORE_ALL | m_BONNELL | m_AMDFAM10
| m_BDVER | m_ZNVER | m_TREMONT | m_GENERIC)
+/* X86_TUNE_SSE_PARTIAL_REG_FP_CONVERTS_DEPENDENCY: This knob avoids
+ partial write to the destination in scalar SSE conversion from FP
+ to FP. */
+DEF_TUNE (X86_TUNE_SSE_PARTIAL_REG_FP_CONVERTS_DEPENDENCY,
+ "sse_partial_reg_fp_converts_dependency",
+ m_PPRO | m_P4_NOCONA | m_CORE_ALL | m_BONNELL | m_AMDFAM10
+ | m_BDVER | m_ZNVER | m_GENERIC)
+
+/* X86_TUNE_SSE_PARTIAL_REG_CONVERTS_DEPENDENCY: This knob avoids partial
+ write to the destination in scalar SSE conversion from integer to FP. */
+DEF_TUNE (X86_TUNE_SSE_PARTIAL_REG_CONVERTS_DEPENDENCY,
+ "sse_partial_reg_converts_dependency",
+ m_PPRO | m_P4_NOCONA | m_CORE_ALL | m_BONNELL | m_AMDFAM10
+ | m_BDVER | m_ZNVER | m_GENERIC)
+
/* X86_TUNE_SSE_SPLIT_REGS: Set for machines where the type and dependencies
are resolved on SSE register parts instead of whole registers, so we may
maintain just lower part of scalar values in proper format leaving the
diff --git a/gcc/testsuite/gcc.target/i386/avx-covert-1.c b/gcc/testsuite/gcc.target/i386/avx-covert-1.c
new file mode 100644
index 0000000..b6c794e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx-covert-1.c
@@ -0,0 +1,19 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=skylake -mfpmath=sse -mtune-ctrl=^sse_partial_reg_fp_converts_dependency,^sse_partial_reg_converts_dependency" } */
+
+extern float f;
+extern double d;
+extern int i;
+
+void
+foo (void)
+{
+ d = f;
+ f = i;
+}
+
+/* { dg-final { scan-assembler "vcvtss2sd" } } */
+/* { dg-final { scan-assembler "vcvtsi2ssl" } } */
+/* { dg-final { scan-assembler-not "vcvtps2pd" } } */
+/* { dg-final { scan-assembler-not "vcvtdq2ps" } } */
+/* { dg-final { scan-assembler-not "vxorps" } } */
diff --git a/gcc/testsuite/gcc.target/i386/avx-fp-covert-1.c b/gcc/testsuite/gcc.target/i386/avx-fp-covert-1.c
new file mode 100644
index 0000000..c40c48b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx-fp-covert-1.c
@@ -0,0 +1,15 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=skylake -mfpmath=sse -mtune-ctrl=^sse_partial_reg_fp_converts_dependency" } */
+
+extern float f;
+extern double d;
+
+void
+foo (void)
+{
+ d = f;
+}
+
+/* { dg-final { scan-assembler "vcvtss2sd" } } */
+/* { dg-final { scan-assembler-not "vcvtps2pd" } } */
+/* { dg-final { scan-assembler-not "vxorps" } } */
diff --git a/gcc/testsuite/gcc.target/i386/avx-int-covert-1.c b/gcc/testsuite/gcc.target/i386/avx-int-covert-1.c
new file mode 100644
index 0000000..01bb64e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx-int-covert-1.c
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=skylake -mfpmath=sse -mtune-ctrl=^sse_partial_reg_converts_dependency" } */
+
+extern float f;
+extern int i;
+
+void
+foo (void)
+{
+ f = i;
+}
+
+/* { dg-final { scan-assembler "vcvtsi2ssl" } } */
+/* { dg-final { scan-assembler-not "vxorps" } } */
diff --git a/gcc/testsuite/gcc.target/i386/sse-covert-1.c b/gcc/testsuite/gcc.target/i386/sse-covert-1.c
new file mode 100644
index 0000000..c30af69
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse-covert-1.c
@@ -0,0 +1,19 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=x86-64 -mfpmath=sse -mtune-ctrl=^sse_partial_reg_fp_converts_dependency,^sse_partial_reg_converts_dependency" } */
+
+extern float f;
+extern double d;
+extern int i;
+
+void
+foo (void)
+{
+ d = f;
+ f = i;
+}
+
+/* { dg-final { scan-assembler "cvtss2sd" } } */
+/* { dg-final { scan-assembler "cvtsi2ssl" } } */
+/* { dg-final { scan-assembler-not "cvtps2pd" } } */
+/* { dg-final { scan-assembler-not "cvtdq2ps" } } */
+/* { dg-final { scan-assembler-not "pxor" } } */
diff --git a/gcc/testsuite/gcc.target/i386/sse-fp-covert-1.c b/gcc/testsuite/gcc.target/i386/sse-fp-covert-1.c
new file mode 100644
index 0000000..b6567e6
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse-fp-covert-1.c
@@ -0,0 +1,15 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=x86-64 -mfpmath=sse -mtune-ctrl=^sse_partial_reg_fp_converts_dependency" } */
+
+extern float f;
+extern double d;
+
+void
+foo (void)
+{
+ d = f;
+}
+
+/* { dg-final { scan-assembler "cvtss2sd" } } */
+/* { dg-final { scan-assembler-not "cvtps2pd" } } */
+/* { dg-final { scan-assembler-not "pxor" } } */
diff --git a/gcc/testsuite/gcc.target/i386/sse-int-covert-1.c b/gcc/testsuite/gcc.target/i386/sse-int-covert-1.c
new file mode 100644
index 0000000..107f724
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse-int-covert-1.c
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=x86-64 -mfpmath=sse -mtune-ctrl=^sse_partial_reg_converts_dependency" } */
+
+extern float f;
+extern int i;
+
+void
+foo (void)
+{
+ f = i;
+}
+
+/* { dg-final { scan-assembler "cvtsi2ssl" } } */
+/* { dg-final { scan-assembler-not "pxor" } } */