aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLevy Hsu <admin@levyhsu.com>2024-08-27 14:22:20 +0930
committerLevy Hsu <admin@levyhsu.com>2024-09-03 02:54:51 +0000
commit62df24e50039ae04aa3b940e680cffd9041ef5bf (patch)
tree4c6eeab4f72e40816f1307fd318083e252670dd7
parent8e16f26ca9fad685b9b723da7112ffcc99e81593 (diff)
downloadgcc-62df24e50039ae04aa3b940e680cffd9041ef5bf.zip
gcc-62df24e50039ae04aa3b940e680cffd9041ef5bf.tar.gz
gcc-62df24e50039ae04aa3b940e680cffd9041ef5bf.tar.bz2
i386: Support partial vectorized V2BF/V4BF smaxmin
This patch supports sminmax for partial vectorized V2BF/V4BF. gcc/ChangeLog: * config/i386/mmx.md (<code><mode>3): New define_expand for V2BF/V4BFsmaxmin gcc/testsuite/ChangeLog: * gcc.target/i386/avx10_2-partial-bf-vector-smaxmin-1.c: New test.
-rw-r--r--gcc/config/i386/mmx.md19
-rw-r--r--gcc/testsuite/gcc.target/i386/avx10_2-partial-bf-vector-smaxmin-1.c36
2 files changed, 55 insertions, 0 deletions
diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index 076ea2e..fac90cf 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -2098,6 +2098,25 @@
DONE;
})
+(define_expand "<code><mode>3"
+ [(set (match_operand:VBF_32_64 0 "register_operand")
+ (smaxmin:VBF_32_64
+ (match_operand:VBF_32_64 1 "nonimmediate_operand")
+ (match_operand:VBF_32_64 2 "nonimmediate_operand")))]
+ "TARGET_AVX10_2_256"
+{
+ rtx op0 = gen_reg_rtx (V8BFmode);
+ rtx op1 = lowpart_subreg (V8BFmode,
+ force_reg (<MODE>mode, operands[1]), <MODE>mode);
+ rtx op2 = lowpart_subreg (V8BFmode,
+ force_reg (<MODE>mode, operands[2]), <MODE>mode);
+
+ emit_insn (gen_<code>v8bf3 (op0, op1, op2));
+
+ emit_move_insn (operands[0], lowpart_subreg (<MODE>mode, op0, V8BFmode));
+ DONE;
+})
+
(define_expand "sqrt<mode>2"
[(set (match_operand:VHF_32_64 0 "register_operand")
(sqrt:VHF_32_64
diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-partial-bf-vector-smaxmin-1.c b/gcc/testsuite/gcc.target/i386/avx10_2-partial-bf-vector-smaxmin-1.c
new file mode 100644
index 0000000..0a7cc58
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx10_2-partial-bf-vector-smaxmin-1.c
@@ -0,0 +1,36 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-mavx10.2 -Ofast" } */
+/* /* { dg-final { scan-assembler-times "vmaxpbf16" 2 } } */
+/* /* { dg-final { scan-assembler-times "vminpbf16" 2 } } */
+
+void
+maxpbf16_64 (__bf16* restrict dest, __bf16* restrict src1, __bf16* restrict src2)
+{
+ int i;
+ for (i = 0; i < 4; i++)
+ dest[i] = src1[i] > src2[i] ? src1[i] : src2[i];
+}
+
+void
+maxpbf16_32 (__bf16* restrict dest, __bf16* restrict src1, __bf16* restrict src2)
+{
+ int i;
+ for (i = 0; i < 2; i++)
+ dest[i] = src1[i] > src2[i] ? src1[i] : src2[i];
+}
+
+void
+minpbf16_64 (__bf16* restrict dest, __bf16* restrict src1, __bf16* restrict src2)
+{
+ int i;
+ for (i = 0; i < 4; i++)
+ dest[i] = src1[i] < src2[i] ? src1[i] : src2[i];
+}
+
+void
+minpbf16_32 (__bf16* restrict dest, __bf16* restrict src1, __bf16* restrict src2)
+{
+ int i;
+ for (i = 0; i < 2; i++)
+ dest[i] = src1[i] < src2[i] ? src1[i] : src2[i];
+}