aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLevy Hsu <admin@levyhsu.com>2024-06-13 15:20:04 +0930
committerLevy Hsu <admin@levyhsu.com>2024-06-17 06:57:17 +0000
commit6d0b7b69d143025f271d0041cfa29cf26e6c343b (patch)
tree1573a73cca19a0ca7080a987e248b3df4466a60f
parente86d4e4ac7d7438f2f1b2437508cfd394a0a34d9 (diff)
downloadgcc-6d0b7b69d143025f271d0041cfa29cf26e6c343b.zip
gcc-6d0b7b69d143025f271d0041cfa29cf26e6c343b.tar.gz
gcc-6d0b7b69d143025f271d0041cfa29cf26e6c343b.tar.bz2
x86: Emit cvtne2ps2bf16 for odd increasing perm in __builtin_shufflevector
This patch updates the GCC x86 backend to efficiently handle odd, incrementally increasing permutations of BF16 vectors using the cvtne2ps2bf16 instruction. It modifies ix86_vectorize_vec_perm_const to support these operations and adds a specific predicate to ensure proper sequence handling. gcc/ChangeLog: * config/i386/i386-expand.cc (ix86_vectorize_vec_perm_const): Convert BF to HI using subreg. * config/i386/predicates.md (vcvtne2ps2bf_parallel): New define_insn_and_split. * config/i386/sse.md (vpermt2_sepcial_bf16_shuffle_<mode>): New predicates matches odd increasing perm. gcc/testsuite/ChangeLog: * gcc.target/i386/vpermt2-special-bf16-shufflue.c: New test.
-rw-r--r--gcc/config/i386/i386-expand.cc4
-rw-r--r--gcc/config/i386/predicates.md11
-rw-r--r--gcc/config/i386/sse.md35
-rwxr-xr-xgcc/testsuite/gcc.target/i386/vpermt2-special-bf16-shufflue.c27
4 files changed, 75 insertions, 2 deletions
diff --git a/gcc/config/i386/i386-expand.cc b/gcc/config/i386/i386-expand.cc
index a4379b8..7c6a82e 100644
--- a/gcc/config/i386/i386-expand.cc
+++ b/gcc/config/i386/i386-expand.cc
@@ -23657,8 +23657,8 @@ ix86_vectorize_vec_perm_const (machine_mode vmode, machine_mode op_mode,
if (GET_MODE_SIZE (vmode) == 64 && !TARGET_EVEX512)
return false;
- /* For HF mode vector, convert it to HI using subreg. */
- if (GET_MODE_INNER (vmode) == HFmode)
+ /* For HF and BF mode vector, convert it to HI using subreg. */
+ if (GET_MODE_INNER (vmode) == HFmode || GET_MODE_INNER (vmode) == BFmode)
{
machine_mode orig_mode = vmode;
vmode = mode_for_vector (HImode,
diff --git a/gcc/config/i386/predicates.md b/gcc/config/i386/predicates.md
index 7afe310..1676c50 100644
--- a/gcc/config/i386/predicates.md
+++ b/gcc/config/i386/predicates.md
@@ -2322,3 +2322,14 @@
return true;
})
+
+;; Check that each element is odd and incrementally increasing from 1
+(define_predicate "vcvtne2ps2bf_parallel"
+ (and (match_code "const_vector")
+ (match_code "const_int" "a"))
+{
+ for (int i = 0; i < XVECLEN (op, 0); ++i)
+ if (INTVAL (XVECEXP (op, 0, i)) != (2 * i + 1))
+ return false;
+ return true;
+})
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 680a46a..5ddd1c0 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -30698,3 +30698,38 @@
"TARGET_AVXVNNIINT16"
"vpdp<vpdpwprodtype>\t{%3, %2, %0|%0, %2, %3}"
[(set_attr "prefix" "vex")])
+
+(define_mode_attr hi_cvt_bf
+ [(V8HI "v8bf") (V16HI "v16bf") (V32HI "v32bf")])
+
+(define_mode_attr HI_CVT_BF
+ [(V8HI "V8BF") (V16HI "V16BF") (V32HI "V32BF")])
+
+(define_insn_and_split "vpermt2_sepcial_bf16_shuffle_<mode>"
+ [(set (match_operand:VI2_AVX512F 0 "register_operand")
+ (unspec:VI2_AVX512F
+ [(match_operand:VI2_AVX512F 1 "vcvtne2ps2bf_parallel")
+ (match_operand:VI2_AVX512F 2 "register_operand")
+ (match_operand:VI2_AVX512F 3 "nonimmediate_operand")]
+ UNSPEC_VPERMT2))]
+ "TARGET_AVX512VL && TARGET_AVX512BF16 && ix86_pre_reload_split ()"
+ "#"
+ "&& 1"
+ [(const_int 0)]
+{
+ rtx op0 = gen_reg_rtx (<HI_CVT_BF>mode);
+ operands[2] = lowpart_subreg (<ssePSmode>mode,
+ force_reg (<MODE>mode, operands[2]),
+ <MODE>mode);
+ operands[3] = lowpart_subreg (<ssePSmode>mode,
+ force_reg (<MODE>mode, operands[3]),
+ <MODE>mode);
+
+ emit_insn (gen_avx512f_cvtne2ps2bf16_<hi_cvt_bf>(op0,
+ operands[3],
+ operands[2]));
+ emit_move_insn (operands[0], lowpart_subreg (<MODE>mode, op0,
+ <HI_CVT_BF>mode));
+ DONE;
+}
+[(set_attr "mode" "<sseinsnmode>")])
diff --git a/gcc/testsuite/gcc.target/i386/vpermt2-special-bf16-shufflue.c b/gcc/testsuite/gcc.target/i386/vpermt2-special-bf16-shufflue.c
new file mode 100755
index 0000000..5c65f2a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/vpermt2-special-bf16-shufflue.c
@@ -0,0 +1,27 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512bf16 -mavx512vl" } */
+/* { dg-final { scan-assembler-not "vpermi2b" } } */
+/* { dg-final { scan-assembler-times "vcvtne2ps2bf16" 3 } } */
+
+typedef __bf16 v8bf __attribute__((vector_size(16)));
+typedef __bf16 v16bf __attribute__((vector_size(32)));
+typedef __bf16 v32bf __attribute__((vector_size(64)));
+
+v8bf foo0(v8bf a, v8bf b)
+{
+ return __builtin_shufflevector(a, b, 1, 3, 5, 7, 9, 11, 13, 15);
+}
+
+v16bf foo1(v16bf a, v16bf b)
+{
+ return __builtin_shufflevector(a, b, 1, 3, 5, 7, 9, 11, 13, 15,
+ 17, 19, 21, 23, 25, 27, 29, 31);
+}
+
+v32bf foo2(v32bf a, v32bf b)
+{
+ return __builtin_shufflevector(a, b, 1, 3, 5, 7, 9, 11, 13, 15,
+ 17, 19, 21, 23, 25, 27, 29, 31,
+ 33, 35, 37, 39, 41, 43, 45, 47,
+ 49, 51, 53, 55, 57, 59, 61, 63);
+}