diff options
author | Jakub Jelinek <jakub@redhat.com> | 2022-01-19 02:24:06 +0100 |
---|---|---|
committer | Jakub Jelinek <jakub@redhat.com> | 2022-01-19 02:24:06 +0100 |
commit | ddce00dba244d889d688490517fb106169a72f01 (patch) | |
tree | 4fce2d3cf3d19fcd7510a24d62d4d4154590f9e6 /gcc | |
parent | 6325041c2b68af096195e0eef92091b2e293e950 (diff) | |
download | gcc-ddce00dba244d889d688490517fb106169a72f01.zip gcc-ddce00dba244d889d688490517fb106169a72f01.tar.gz gcc-ddce00dba244d889d688490517fb106169a72f01.tar.bz2 |
i386: Fix GLC tuning with -masm=intel [PR104104]
> > On Sat, Jan 15, 2022 at 5:39 PM Hongyu Wang <wwwhhhyyy333@gmail.com> wrote:
> > > Thanks for the suggestion, here is the updated patch that survived
> > > bootstrap/regtest.
Unfortunately the patch results in assembler failures with -masm=intel.
> > > > + if (TARGET_DEST_FALSE_DEPENDENCY
> > > > + && get_attr_dest_false_dep (insn) ==
> > > > + DEST_FALSE_DEP_TRUE)
> > > > + output_asm_insn ("vxorps\t{%x0, %x0, %x0}", operands);
All the vxorps insns were emitted like the above, which means for -masm=sysv
it looks like
vxorps %xmm3, %xmm3, %xmm3
but for -masm=intel like:
vxorps
We want obviously
vxorps xmm3, xmm3, xmm3
so the following patch just drops the errorneous {}s.
2022-01-19 Jakub Jelinek <jakub@redhat.com>
PR target/104104
* config/i386/sse.md
(<avx512>_<complexopname>_<mode><maskc_name><round_name>,
avx512fp16_<complexopname>sh_v8hf<mask_scalarc_name><round_scalarcz_name>,
avx512dq_mul<mode>3<mask_name>, <avx2_avx512>_permvar<mode><mask_name>,
avx2_perm<mode>_1<mask_name>, avx512f_perm<mode>_1<mask_name>,
avx512dq_rangep<mode><mask_name><round_saeonly_name>,
avx512dq_ranges<mode><mask_scalar_name><round_saeonly_scalar_name>,
<avx512>_getmant<mode><mask_name><round_saeonly_name>,
avx512f_vgetmant<mode><mask_scalar_name><round_saeonly_scalar_name>):
Use vxorps\t%x0, %x0, %x0 instead of vxorps\t{%x0, %x0, %x0}.
* gcc.target/i386/pr104104.c: New test.
Diffstat (limited to 'gcc')
-rw-r--r-- | gcc/config/i386/sse.md | 20 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/pr104104.c | 10 |
2 files changed, 20 insertions, 10 deletions
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index 34175fd..829107e 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -6539,7 +6539,7 @@ { if (TARGET_DEST_FALSE_DEP_FOR_GLC && <maskc_dest_false_dep_for_glc_cond>) - output_asm_insn ("vxorps\t{%x0, %x0, %x0}", operands); + output_asm_insn ("vxorps\t%x0, %x0, %x0", operands); return "v<complexopname><ssemodesuffix>\t{<round_maskc_op3>%2, %1, %0<maskc_operand3>|%0<maskc_operand3>, %1, %2<round_maskc_op3>}"; } [(set_attr "type" "ssemul") @@ -6750,7 +6750,7 @@ { if (TARGET_DEST_FALSE_DEP_FOR_GLC && <mask_scalarc_dest_false_dep_for_glc_cond>) - output_asm_insn ("vxorps\t{%x0, %x0, %x0}", operands); + output_asm_insn ("vxorps\t%x0, %x0, %x0", operands); return "v<complexopname>sh\t{<round_scalarc_mask_op3>%2, %1, %0<mask_scalarc_operand3>|%0<mask_scalarc_operand3>, %1, %2<round_scalarc_mask_op3>}"; } [(set_attr "type" "ssemul") @@ -15222,7 +15222,7 @@ && <mask3_dest_false_dep_for_glc_cond> && !reg_mentioned_p (operands[0], operands[1]) && !reg_mentioned_p (operands[0], operands[2])) - output_asm_insn ("vxorps\t{%x0, %x0, %x0}", operands); + output_asm_insn ("vxorps\t%x0, %x0, %x0", operands); return "vpmullq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"; } [(set_attr "type" "sseimul") @@ -24658,7 +24658,7 @@ && <mask3_dest_false_dep_for_glc_cond> && !reg_mentioned_p (operands[0], operands[1]) && !reg_mentioned_p (operands[0], operands[2])) - output_asm_insn ("vxorps\t{%x0, %x0, %x0}", operands); + output_asm_insn ("vxorps\t%x0, %x0, %x0", operands); return "vperm<ssemodesuffix>\t{%1, %2, %0<mask_operand3>|%0<mask_operand3>, %2, %1}"; } [(set_attr "type" "sselog") @@ -24900,7 +24900,7 @@ if (TARGET_DEST_FALSE_DEP_FOR_GLC && <mask6_dest_false_dep_for_glc_cond> && !reg_mentioned_p (operands[0], operands[1])) - output_asm_insn ("vxorps\t{%x0, %x0, %x0}", operands); + output_asm_insn ("vxorps\t%x0, %x0, %x0", operands); return "vperm<ssemodesuffix>\t{%2, %1, %0<mask_operand6>|%0<mask_operand6>, %1, %2}"; } [(set_attr "type" "sselog") @@ -24975,7 +24975,7 @@ if (TARGET_DEST_FALSE_DEP_FOR_GLC && <mask10_dest_false_dep_for_glc_cond> && !reg_mentioned_p (operands[0], operands[1])) - output_asm_insn ("vxorps\t{%x0, %x0, %x0}", operands); + output_asm_insn ("vxorps\t%x0, %x0, %x0", operands); return "vperm<ssemodesuffix>\t{%2, %1, %0<mask_operand10>|%0<mask_operand10>, %1, %2}"; } [(set_attr "type" "sselog") @@ -26880,7 +26880,7 @@ && <mask4_dest_false_dep_for_glc_cond> && !reg_mentioned_p (operands[0], operands[1]) && !reg_mentioned_p (operands[0], operands[2])) - output_asm_insn ("vxorps\t{%x0, %x0, %x0}", operands); + output_asm_insn ("vxorps\t%x0, %x0, %x0", operands); return "vrange<ssemodesuffix>\t{%3, <round_saeonly_mask_op4>%2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2<round_saeonly_mask_op4>, %3}"; } [(set_attr "type" "sse") @@ -26903,7 +26903,7 @@ && <mask_scalar4_dest_false_dep_for_glc_cond> && !reg_mentioned_p (operands[0], operands[1]) && !reg_mentioned_p (operands[0], operands[2])) - output_asm_insn ("vxorps\t{%x0, %x0, %x0}", operands); + output_asm_insn ("vxorps\t%x0, %x0, %x0", operands); return "vrange<ssescalarmodesuffix>\t{%3, <round_saeonly_scalar_mask_op4>%2, %1, %0<mask_scalar_operand4>|%0<mask_scalar_operand4>, %1, %<iptr>2<round_saeonly_scalar_mask_op4>, %3}"; } [(set_attr "type" "sse") @@ -26949,7 +26949,7 @@ if (TARGET_DEST_FALSE_DEP_FOR_GLC && <mask3_dest_false_dep_for_glc_cond> && MEM_P (operands[1])) - output_asm_insn ("vxorps\t{%x0, %x0, %x0}", operands); + output_asm_insn ("vxorps\t%x0, %x0, %x0", operands); return "vgetmant<ssemodesuffix>\t{%2, <round_saeonly_mask_op3>%1, %0<mask_operand3>|%0<mask_operand3>, %1<round_saeonly_mask_op3>, %2}"; } [(set_attr "prefix" "evex") @@ -26971,7 +26971,7 @@ && <mask_scalar4_dest_false_dep_for_glc_cond> && !reg_mentioned_p (operands[0], operands[1]) && !reg_mentioned_p (operands[0], operands[2])) - output_asm_insn ("vxorps\t{%x0, %x0, %x0}", operands); + output_asm_insn ("vxorps\t%x0, %x0, %x0", operands); return "vgetmant<ssescalarmodesuffix>\t{%3, <round_saeonly_scalar_mask_op4>%2, %1, %0<mask_scalar_operand4>|%0<mask_scalar_operand4>, %1, %<iptr>2<round_saeonly_scalar_mask_op4>, %3}"; } [(set_attr "prefix" "evex") diff --git a/gcc/testsuite/gcc.target/i386/pr104104.c b/gcc/testsuite/gcc.target/i386/pr104104.c new file mode 100644 index 0000000..1653cae --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr104104.c @@ -0,0 +1,10 @@ +/* PR target/104104 */ +/* { dg-do assemble { target vect_simd_clones } } */ +/* { dg-require-effective-target masm_intel } */ +/* { dg-options "-march=alderlake -masm=intel -O1 -fallow-store-data-races -funroll-all-loops" } */ + +__attribute__ ((simd)) short int +foo (void) +{ + return 0; +} |