aboutsummaryrefslogtreecommitdiff
path: root/gcc/config
diff options
context:
space:
mode:
authorwwwhhhyyy <hongyu.wang@intel.com>2021-08-30 16:41:41 +0800
committerHongyu Wang <hongyu.wang@intel.com>2022-01-16 12:43:02 +0800
commit1c2575586c47f56a2e75f734af42371579516f0c (patch)
tree00cfab3267531feb8987b3cec759731030662d9d /gcc/config
parent9248ee41478754b46b70f2409b85d9743ece9e72 (diff)
downloadgcc-1c2575586c47f56a2e75f734af42371579516f0c.zip
gcc-1c2575586c47f56a2e75f734af42371579516f0c.tar.gz
gcc-1c2575586c47f56a2e75f734af42371579516f0c.tar.bz2
[i386] GLC tuning: Break false dependency for dest register.
For GoldenCove micro-architecture, force insert zero-idiom in asm template to break false dependency of dest register for several insns. The related insns are: VPERM/D/Q/PS/PD VRANGEPD/PS/SD/SS VGETMANTSS/SD/SH VGETMANDPS/PD - mem version only VPMULLQ VFMULCSH/PH VFCMULCSH/PH gcc/ChangeLog: * config/i386/i386.h (TARGET_DEST_FALSE_DEP_FOR_GLC): New macro. * config/i386/sse.md (<avx512>_<complexopname>_<mode><maskc_name><round_name>): Insert zero-idiom in output template when attr enabled, set new attribute to true for non-mask/maskz insn. (avx512fp16_<complexopname>sh_v8hf<mask_scalarc_name><round_scalarcz_name>): Likewise. (avx512dq_mul<mode>3<mask_name>): Likewise. (<avx2_avx512>_permvar<mode><mask_name>): Likewise. (avx2_perm<mode>_1<mask_name>): Likewise. (avx512f_perm<mode>_1<mask_name>): Likewise. (avx512dq_rangep<mode><mask_name><round_saeonly_name>): Likewise. (avx512dq_ranges<mode><mask_scalar_name><round_saeonly_scalar_name>): Likewise. (<avx512>_getmant<mode><mask_name><round_saeonly_name>): Likewise. (avx512f_vgetmant<mode><mask_scalar_name><round_saeonly_scalar_name>): Likewise. * config/i386/subst.md (mask3_dest_false_dep_for_glc_cond): New subst_attr. (mask4_dest_false_dep_for_glc_cond): Likewise. (mask6_dest_false_dep_for_glc_cond): Likewise. (mask10_dest_false_dep_for_glc_cond): Likewise. (maskc_dest_false_dep_for_glc_cond): Likewise. (mask_scalar4_dest_false_dep_for_glc_cond): Likewise. (mask_scalarc_dest_false_dep_for_glc_cond): Likewise. * config/i386/x86-tune.def (X86_TUNE_DEST_FALSE_DEP_FOR_GLC): New DEF_TUNE enabled for m_SAPPHIRERAPIDS and m_ALDERLAKE gcc/testsuite/ChangeLog: * gcc.target/i386/avx2-dest-false-dep-for-glc.c: New test. * gcc.target/i386/avx512dq-dest-false-dep-for-glc.c: Ditto. * gcc.target/i386/avx512f-dest-false-dep-for-glc.c: Ditto. * gcc.target/i386/avx512fp16-dest-false-dep-for-glc.c: Ditto. * gcc.target/i386/avx512fp16vl-dest-false-dep-for-glc.c: Ditto. * gcc.target/i386/avx512vl-dest-false-dep-for-glc.c: Ditto.
Diffstat (limited to 'gcc/config')
-rw-r--r--gcc/config/i386/i386.h2
-rw-r--r--gcc/config/i386/sse.md75
-rw-r--r--gcc/config/i386/subst.md7
-rw-r--r--gcc/config/i386/x86-tune.def6
4 files changed, 82 insertions, 8 deletions
diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
index 3ac0f69..f1bb8a8 100644
--- a/gcc/config/i386/i386.h
+++ b/gcc/config/i386/i386.h
@@ -429,6 +429,8 @@ extern unsigned char ix86_tune_features[X86_TUNE_LAST];
ix86_tune_features[X86_TUNE_EXPAND_ABS]
#define TARGET_V2DF_REDUCTION_PREFER_HADDPD \
ix86_tune_features[X86_TUNE_V2DF_REDUCTION_PREFER_HADDPD]
+#define TARGET_DEST_FALSE_DEP_FOR_GLC \
+ ix86_tune_features[X86_TUNE_DEST_FALSE_DEP_FOR_GLC]
/* Feature tests against the various architecture variations. */
enum ix86_arch_indices {
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 50dc5da..ea72aa5 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -6536,7 +6536,12 @@
(match_operand:VF_AVX512FP16VL 2 "nonimmediate_operand" "<round_constraint>")]
UNSPEC_COMPLEX_F_C_MUL))]
"TARGET_AVX512FP16 && <round_mode512bit_condition>"
- "v<complexopname><ssemodesuffix>\t{<round_maskc_op3>%2, %1, %0<maskc_operand3>|%0<maskc_operand3>, %1, %2<round_maskc_op3>}"
+{
+ if (TARGET_DEST_FALSE_DEP_FOR_GLC
+ && <maskc_dest_false_dep_for_glc_cond>)
+ output_asm_insn ("vxorps\t{%x0, %x0, %x0}", operands);
+ return "v<complexopname><ssemodesuffix>\t{<round_maskc_op3>%2, %1, %0<maskc_operand3>|%0<maskc_operand3>, %1, %2<round_maskc_op3>}";
+}
[(set_attr "type" "ssemul")
(set_attr "mode" "<MODE>")])
@@ -6742,7 +6747,12 @@
(match_dup 1)
(const_int 3)))]
"TARGET_AVX512FP16"
- "v<complexopname>sh\t{<round_scalarc_mask_op3>%2, %1, %0<mask_scalarc_operand3>|%0<mask_scalarc_operand3>, %1, %2<round_scalarc_mask_op3>}"
+{
+ if (TARGET_DEST_FALSE_DEP_FOR_GLC
+ && <mask_scalarc_dest_false_dep_for_glc_cond>)
+ output_asm_insn ("vxorps\t{%x0, %x0, %x0}", operands);
+ return "v<complexopname>sh\t{<round_scalarc_mask_op3>%2, %1, %0<mask_scalarc_operand3>|%0<mask_scalarc_operand3>, %1, %2<round_scalarc_mask_op3>}";
+}
[(set_attr "type" "ssemul")
(set_attr "mode" "V8HF")])
@@ -15207,7 +15217,14 @@
(match_operand:VI8_AVX512VL 2 "bcst_vector_operand" "vmBr")))]
"TARGET_AVX512DQ && <mask_mode512bit_condition>
&& ix86_binary_operator_ok (MULT, <MODE>mode, operands)"
- "vpmullq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
+{
+ if (TARGET_DEST_FALSE_DEP_FOR_GLC
+ && <mask3_dest_false_dep_for_glc_cond>
+ && !reg_mentioned_p (operands[0], operands[1])
+ && !reg_mentioned_p (operands[0], operands[2]))
+ output_asm_insn ("vxorps\t{%x0, %x0, %x0}", operands);
+ return "vpmullq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}";
+}
[(set_attr "type" "sseimul")
(set_attr "prefix" "evex")
(set_attr "mode" "<sseinsnmode>")])
@@ -24636,7 +24653,14 @@
(match_operand:<sseintvecmode> 2 "register_operand" "v")]
UNSPEC_VPERMVAR))]
"TARGET_AVX2 && <mask_mode512bit_condition>"
- "vperm<ssemodesuffix>\t{%1, %2, %0<mask_operand3>|%0<mask_operand3>, %2, %1}"
+{
+ if (TARGET_DEST_FALSE_DEP_FOR_GLC
+ && <mask3_dest_false_dep_for_glc_cond>
+ && !reg_mentioned_p (operands[0], operands[1])
+ && !reg_mentioned_p (operands[0], operands[2]))
+ output_asm_insn ("vxorps\t{%x0, %x0, %x0}", operands);
+ return "vperm<ssemodesuffix>\t{%1, %2, %0<mask_operand3>|%0<mask_operand3>, %2, %1}";
+}
[(set_attr "type" "sselog")
(set_attr "prefix" "<mask_prefix2>")
(set_attr "mode" "<sseinsnmode>")])
@@ -24873,6 +24897,10 @@
mask |= INTVAL (operands[4]) << 4;
mask |= INTVAL (operands[5]) << 6;
operands[2] = GEN_INT (mask);
+ if (TARGET_DEST_FALSE_DEP_FOR_GLC
+ && <mask6_dest_false_dep_for_glc_cond>
+ && !reg_mentioned_p (operands[0], operands[1]))
+ output_asm_insn ("vxorps\t{%x0, %x0, %x0}", operands);
return "vperm<ssemodesuffix>\t{%2, %1, %0<mask_operand6>|%0<mask_operand6>, %1, %2}";
}
[(set_attr "type" "sselog")
@@ -24944,6 +24972,10 @@
mask |= INTVAL (operands[4]) << 4;
mask |= INTVAL (operands[5]) << 6;
operands[2] = GEN_INT (mask);
+ if (TARGET_DEST_FALSE_DEP_FOR_GLC
+ && <mask10_dest_false_dep_for_glc_cond>
+ && !reg_mentioned_p (operands[0], operands[1]))
+ output_asm_insn ("vxorps\t{%x0, %x0, %x0}", operands);
return "vperm<ssemodesuffix>\t{%2, %1, %0<mask_operand10>|%0<mask_operand10>, %1, %2}";
}
[(set_attr "type" "sselog")
@@ -26843,7 +26875,14 @@
(match_operand:SI 3 "const_0_to_15_operand")]
UNSPEC_RANGE))]
"TARGET_AVX512DQ && <round_saeonly_mode512bit_condition>"
- "vrange<ssemodesuffix>\t{%3, <round_saeonly_mask_op4>%2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2<round_saeonly_mask_op4>, %3}"
+{
+ if (TARGET_DEST_FALSE_DEP_FOR_GLC
+ && <mask4_dest_false_dep_for_glc_cond>
+ && !reg_mentioned_p (operands[0], operands[1])
+ && !reg_mentioned_p (operands[0], operands[2]))
+ output_asm_insn ("vxorps\t{%x0, %x0, %x0}", operands);
+ return "vrange<ssemodesuffix>\t{%3, <round_saeonly_mask_op4>%2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2<round_saeonly_mask_op4>, %3}";
+}
[(set_attr "type" "sse")
(set_attr "prefix" "evex")
(set_attr "mode" "<MODE>")])
@@ -26859,7 +26898,14 @@
(match_dup 1)
(const_int 1)))]
"TARGET_AVX512DQ"
- "vrange<ssescalarmodesuffix>\t{%3, <round_saeonly_scalar_mask_op4>%2, %1, %0<mask_scalar_operand4>|%0<mask_scalar_operand4>, %1, %<iptr>2<round_saeonly_scalar_mask_op4>, %3}"
+{
+ if (TARGET_DEST_FALSE_DEP_FOR_GLC
+ && <mask_scalar4_dest_false_dep_for_glc_cond>
+ && !reg_mentioned_p (operands[0], operands[1])
+ && !reg_mentioned_p (operands[0], operands[2]))
+ output_asm_insn ("vxorps\t{%x0, %x0, %x0}", operands);
+ return "vrange<ssescalarmodesuffix>\t{%3, <round_saeonly_scalar_mask_op4>%2, %1, %0<mask_scalar_operand4>|%0<mask_scalar_operand4>, %1, %<iptr>2<round_saeonly_scalar_mask_op4>, %3}";
+}
[(set_attr "type" "sse")
(set_attr "prefix" "evex")
(set_attr "mode" "<MODE>")])
@@ -26899,7 +26945,13 @@
(match_operand:SI 2 "const_0_to_15_operand")]
UNSPEC_GETMANT))]
"TARGET_AVX512F"
- "vgetmant<ssemodesuffix>\t{%2, <round_saeonly_mask_op3>%1, %0<mask_operand3>|%0<mask_operand3>, %1<round_saeonly_mask_op3>, %2}";
+{
+ if (TARGET_DEST_FALSE_DEP_FOR_GLC
+ && <mask3_dest_false_dep_for_glc_cond>
+ && MEM_P (operands[1]))
+ output_asm_insn ("vxorps\t{%x0, %x0, %x0}", operands);
+ return "vgetmant<ssemodesuffix>\t{%2, <round_saeonly_mask_op3>%1, %0<mask_operand3>|%0<mask_operand3>, %1<round_saeonly_mask_op3>, %2}";
+}
[(set_attr "prefix" "evex")
(set_attr "mode" "<MODE>")])
@@ -26914,7 +26966,14 @@
(match_dup 1)
(const_int 1)))]
"TARGET_AVX512F"
- "vgetmant<ssescalarmodesuffix>\t{%3, <round_saeonly_scalar_mask_op4>%2, %1, %0<mask_scalar_operand4>|%0<mask_scalar_operand4>, %1, %<iptr>2<round_saeonly_scalar_mask_op4>, %3}";
+{
+ if (TARGET_DEST_FALSE_DEP_FOR_GLC
+ && <mask_scalar4_dest_false_dep_for_glc_cond>
+ && !reg_mentioned_p (operands[0], operands[1])
+ && !reg_mentioned_p (operands[0], operands[2]))
+ output_asm_insn ("vxorps\t{%x0, %x0, %x0}", operands);
+ return "vgetmant<ssescalarmodesuffix>\t{%3, <round_saeonly_scalar_mask_op4>%2, %1, %0<mask_scalar_operand4>|%0<mask_scalar_operand4>, %1, %<iptr>2<round_saeonly_scalar_mask_op4>, %3}";
+}
[(set_attr "prefix" "evex")
(set_attr "mode" "<ssescalarmode>")])
diff --git a/gcc/config/i386/subst.md b/gcc/config/i386/subst.md
index 21d445c..bb86f82 100644
--- a/gcc/config/i386/subst.md
+++ b/gcc/config/i386/subst.md
@@ -71,6 +71,11 @@
(define_subst_attr "mask_prefix4" "mask" "orig,orig,vex" "evex,evex,evex")
(define_subst_attr "bcst_mask_prefix4" "mask" "orig,orig,maybe_evex" "evex,evex,evex")
(define_subst_attr "mask_expand_op3" "mask" "3" "5")
+(define_subst_attr "mask3_dest_false_dep_for_glc_cond" "mask" "1" "operands[3] == CONST0_RTX(<MODE>mode)")
+(define_subst_attr "mask4_dest_false_dep_for_glc_cond" "mask" "1" "operands[4] == CONST0_RTX(<MODE>mode)")
+(define_subst_attr "mask6_dest_false_dep_for_glc_cond" "mask" "1" "operands[6] == CONST0_RTX(<MODE>mode)")
+(define_subst_attr "mask10_dest_false_dep_for_glc_cond" "mask" "1" "operands[10] == CONST0_RTX(<MODE>mode)")
+(define_subst_attr "maskc_dest_false_dep_for_glc_cond" "maskc" "1" "operands[3] == CONST0_RTX(<MODE>mode)")
(define_subst "mask"
[(set (match_operand:SUBST_V 0)
@@ -337,6 +342,8 @@
(define_subst_attr "mask_scalar_operand3" "mask_scalar" "" "%{%4%}%N3")
(define_subst_attr "mask_scalar_operand4" "mask_scalar" "" "%{%5%}%N4")
(define_subst_attr "mask_scalarcz_operand4" "mask_scalarcz" "" "%{%5%}%N4")
+(define_subst_attr "mask_scalar4_dest_false_dep_for_glc_cond" "mask_scalar" "1" "operands[4] == CONST0_RTX(<MODE>mode)")
+(define_subst_attr "mask_scalarc_dest_false_dep_for_glc_cond" "mask_scalarc" "1" "operands[3] == CONST0_RTX(V8HFmode)")
(define_subst "mask_scalar"
[(set (match_operand:SUBST_V 0)
diff --git a/gcc/config/i386/x86-tune.def b/gcc/config/i386/x86-tune.def
index 0d3fd07..f9eb3c2 100644
--- a/gcc/config/i386/x86-tune.def
+++ b/gcc/config/i386/x86-tune.def
@@ -79,6 +79,12 @@ DEF_TUNE (X86_TUNE_SSE_PARTIAL_REG_CONVERTS_DEPENDENCY,
m_PPRO | m_P4_NOCONA | m_CORE_ALL | m_BONNELL | m_AMDFAM10
| m_BDVER | m_ZNVER | m_ALDERLAKE | m_GENERIC)
+/* X86_TUNE_DEST_FALSE_DEP_FOR_GLC: This knob inserts zero-idiom before
+ several insns to break false dependency on the dest register for GLC
+ micro-architecture. */
+DEF_TUNE (X86_TUNE_DEST_FALSE_DEP_FOR_GLC,
+ "dest_false_dep_for_glc", m_SAPPHIRERAPIDS | m_ALDERLAKE)
+
/* X86_TUNE_SSE_SPLIT_REGS: Set for machines where the type and dependencies
are resolved on SSE register parts instead of whole registers, so we may
maintain just lower part of scalar values in proper format leaving the