AVX512BF16: Do not allow permutation with vcvtne2ps2bf16 [PR115889]

According to the instruction spec of AVX512BF16, the convert from float to BF16 is not a simple truncation. It has special handling for denormal/nan, even for normal float it will add an extra bias according to the least significant bit for bf number. This means we cannot use the vcvtne2ps2bf16 for any bf16 vector shuffle. The optimization introduced in r15-1368 adds a specific split to convert HImode permutation with this instruction, so remove it and treat the BFmode permutation same as HFmode. gcc/ChangeLog: PR target/115889 * config/i386/predicates.md (vcvtne2ps2bf_parallel): Remove. * config/i386/sse.md (hi_cvt_bf): Remove. (HI_CVT_BF): Likewise. (vpermt2_sepcial_bf16_shuffle_<mode>):Likewise. gcc/testsuite/ChangeLog: PR target/115889 * gcc.target/i386/vpermt2-special-bf16-shufflue.c: Adjust output scan.
author: Hongyu Wang <hongyu.wang@intel.com> 2024-07-13 11:45:31 +0800
committer: Hongyu Wang <hongyu.wang@intel.com> 2024-07-15 10:34:07 +0800
commit: 02a3bf5e2f0c18078bf67fc0002219edba1d76ff (patch)
tree: 604905433761b75991c49260ce1e1a582660fde5
parent: 666f167bec09d1234e6496c86b566fe1a71f61f0 (diff)
download: gcc-02a3bf5e2f0c18078bf67fc0002219edba1d76ff.zip
gcc-02a3bf5e2f0c18078bf67fc0002219edba1d76ff.tar.gz
gcc-02a3bf5e2f0c18078bf67fc0002219edba1d76ff.tar.bz2
3 files changed, 1 insertions, 48 deletions
diff --git a/gcc/config/i386/predicates.md b/gcc/config/i386/predicates.md
index a894847..5d0bb1e 100644
--- a/gcc/config/i386/predicates.md
+++ b/gcc/config/i386/predicates.md
@@ -2327,14 +2327,3 @@
 
   return true;
 })
-
-;; Check that each element is odd and incrementally increasing from 1
-(define_predicate "vcvtne2ps2bf_parallel"
-  (and (match_code "const_vector")
-       (match_code "const_int" "a"))
-{
-  for (int i = 0; i < XVECLEN (op, 0); ++i)
-    if (INTVAL (XVECEXP (op, 0, i)) != (2 * i + 1))
-      return false;
-  return true;
-})
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index b3b4697..c134494 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -31460,38 +31460,3 @@
   "TARGET_AVXVNNIINT16"
   "vpdp<vpdpwprodtype>\t{%3, %2, %0|%0, %2, %3}"
    [(set_attr "prefix" "vex")])
-
-(define_mode_attr hi_cvt_bf
-  [(V8HI "v8bf") (V16HI "v16bf") (V32HI "v32bf")])
-
-(define_mode_attr HI_CVT_BF
-  [(V8HI "V8BF") (V16HI "V16BF") (V32HI "V32BF")])
-
-(define_insn_and_split "vpermt2_sepcial_bf16_shuffle_<mode>"
-  [(set (match_operand:VI2_AVX512F 0 "register_operand")
-	(unspec:VI2_AVX512F
-	  [(match_operand:VI2_AVX512F 1 "vcvtne2ps2bf_parallel")
-	   (match_operand:VI2_AVX512F 2 "register_operand")
-	   (match_operand:VI2_AVX512F 3 "nonimmediate_operand")]
-	   UNSPEC_VPERMT2))]
-  "TARGET_AVX512VL && TARGET_AVX512BF16 && ix86_pre_reload_split ()"
-  "#"
-  "&& 1"
-  [(const_int 0)]
-{
-  rtx op0 = gen_reg_rtx (<HI_CVT_BF>mode);
-  operands[2] = lowpart_subreg (<ssePSmode>mode,
-				force_reg (<MODE>mode, operands[2]),
-				<MODE>mode);
-  operands[3] = lowpart_subreg (<ssePSmode>mode,
-				force_reg (<MODE>mode, operands[3]),
-				<MODE>mode);
-
-  emit_insn (gen_avx512f_cvtne2ps2bf16_<hi_cvt_bf>(op0,
-						   operands[3],
-						   operands[2]));
-  emit_move_insn (operands[0], lowpart_subreg (<MODE>mode, op0,
-					       <HI_CVT_BF>mode));
-  DONE;
-}
-[(set_attr "mode" "<sseinsnmode>")])
diff --git a/gcc/testsuite/gcc.target/i386/vpermt2-special-bf16-shufflue.c b/gcc/testsuite/gcc.target/i386/vpermt2-special-bf16-shufflue.c
index 5c65f2a..e504f3f 100755
--- a/gcc/testsuite/gcc.target/i386/vpermt2-special-bf16-shufflue.c
+++ b/gcc/testsuite/gcc.target/i386/vpermt2-special-bf16-shufflue.c
@@ -1,7 +1,6 @@
 /* { dg-do compile } */
 /* { dg-options "-O2 -mavx512bf16 -mavx512vl" } */
-/* { dg-final { scan-assembler-not "vpermi2b" } } */
-/* { dg-final { scan-assembler-times "vcvtne2ps2bf16" 3 } } */
+/* { dg-final { scan-assembler-times "vpermi2w" 3 } } */
 
 typedef __bf16 v8bf __attribute__((vector_size(16)));
 typedef __bf16 v16bf __attribute__((vector_size(32)));
author	Hongyu Wang <hongyu.wang@intel.com>	2024-07-13 11:45:31 +0800
committer	Hongyu Wang <hongyu.wang@intel.com>	2024-07-15 10:34:07 +0800
commit	02a3bf5e2f0c18078bf67fc0002219edba1d76ff (patch)
tree	604905433761b75991c49260ce1e1a582660fde5
parent	666f167bec09d1234e6496c86b566fe1a71f61f0 (diff)
download	gcc-02a3bf5e2f0c18078bf67fc0002219edba1d76ff.zip gcc-02a3bf5e2f0c18078bf67fc0002219edba1d76ff.tar.gz gcc-02a3bf5e2f0c18078bf67fc0002219edba1d76ff.tar.bz2