diff options
author | Levy Hsu <admin@levyhsu.com> | 2024-09-02 10:24:47 +0800 |
---|---|---|
committer | Haochen Jiang <haochen.jiang@intel.com> | 2024-09-02 10:24:47 +0800 |
commit | 29ef601973d7b79338694e59581d4c24bcd07f69 (patch) | |
tree | 929150ae63e7fb0b5b1992819cdc0a33e072d095 | |
parent | 6d294fb8ac9baf2624446deaa4c995b7a7719823 (diff) | |
download | gcc-29ef601973d7b79338694e59581d4c24bcd07f69.zip gcc-29ef601973d7b79338694e59581d4c24bcd07f69.tar.gz gcc-29ef601973d7b79338694e59581d4c24bcd07f69.tar.bz2 |
i386: Support vectorized BF16 smaxmin with AVX10.2 instructions
gcc/ChangeLog:
* config/i386/sse.md
(<code><mode>3): New define expand pattern for BF smaxmin.
gcc/testsuite/ChangeLog:
* gcc.target/i386/avx10_2-512-bf-vector-smaxmin-1.c: New test.
* gcc.target/i386/avx10_2-bf-vector-smaxmin-1.c: New test.
-rw-r--r-- | gcc/config/i386/sse.md | 7 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/avx10_2-512-bf-vector-smaxmin-1.c | 20 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/avx10_2-bf-vector-smaxmin-1.c | 36 |
3 files changed, 63 insertions, 0 deletions
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index 85fbef3..b374783 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -31901,6 +31901,13 @@ "vscalefpbf16\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}" [(set_attr "prefix" "evex")]) +(define_expand "<code><mode>3" + [(set (match_operand:VBF_AVX10_2 0 "register_operand") + (smaxmin:VBF_AVX10_2 + (match_operand:VBF_AVX10_2 1 "register_operand") + (match_operand:VBF_AVX10_2 2 "nonimmediate_operand")))] + "TARGET_AVX10_2_256") + (define_insn "avx10_2_<code>pbf16_<mode><mask_name>" [(set (match_operand:VBF_AVX10_2 0 "register_operand" "=v") (smaxmin:VBF_AVX10_2 diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-512-bf-vector-smaxmin-1.c b/gcc/testsuite/gcc.target/i386/avx10_2-512-bf-vector-smaxmin-1.c new file mode 100644 index 0000000..e33c325 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx10_2-512-bf-vector-smaxmin-1.c @@ -0,0 +1,20 @@ +/* { dg-do compile } */ +/* { dg-options "-mavx10.2-512 -mprefer-vector-width=512 -Ofast" } */ +/* /* { dg-final { scan-assembler-times "vmaxpbf16" 1 } } */ +/* /* { dg-final { scan-assembler-times "vminpbf16" 1 } } */ + +void +maxpbf16_512 (__bf16* dest, __bf16* src1, __bf16* src2) +{ + int i; + for (i = 0; i < 32; i++) + dest[i] = src1[i] > src2[i] ? src1[i] : src2[i]; +} + +void +minpbf16_512 (__bf16* dest, __bf16* src1, __bf16* src2) +{ + int i; + for (i = 0; i < 32; i++) + dest[i] = src1[i] < src2[i] ? src1[i] : src2[i]; +} diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-bf-vector-smaxmin-1.c b/gcc/testsuite/gcc.target/i386/avx10_2-bf-vector-smaxmin-1.c new file mode 100644 index 0000000..9bae073 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx10_2-bf-vector-smaxmin-1.c @@ -0,0 +1,36 @@ +/* { dg-do compile } */ +/* { dg-options "-mavx10.2 -Ofast" } */ +/* /* { dg-final { scan-assembler-times "vmaxpbf16" 2 } } */ +/* /* { dg-final { scan-assembler-times "vminpbf16" 2 } } */ + +void +maxpbf16_256 (__bf16* dest, __bf16* src1, __bf16* src2) +{ + int i; + for (i = 0; i < 16; i++) + dest[i] = src1[i] > src2[i] ? src1[i] : src2[i]; +} + +void +minpbf16_256 (__bf16* dest, __bf16* src1, __bf16* src2) +{ + int i; + for (i = 0; i < 16; i++) + dest[i] = src1[i] < src2[i] ? src1[i] : src2[i]; +} + +void +maxpbf16_128 (__bf16* dest, __bf16* src1, __bf16* src2) +{ + int i; + for (i = 0; i < 16; i++) + dest[i] = src1[i] > src2[i] ? src1[i] : src2[i]; +} + +void +minpbf16_128 (__bf16* dest, __bf16* src1, __bf16* src2) +{ + int i; + for (i = 0; i < 16; i++) + dest[i] = src1[i] < src2[i] ? src1[i] : src2[i]; +} |