aboutsummaryrefslogtreecommitdiff
path: root/gcc
diff options
context:
space:
mode:
authorliuhongt <hongtao.liu@intel.com>2024-10-21 02:22:08 -0700
committerliuhongt <hongtao.liu@intel.com>2024-10-22 19:28:26 -0700
commitee7e77e9c121f5a6f27c92b6b24b2abf9cd66a4d (patch)
tree9f4a8c53f9fdbddec5c6944dd9a45e01ac050f36 /gcc
parent01ed5c62bf8a9759442d1f4c80ea6dc3d71f3719 (diff)
downloadgcc-ee7e77e9c121f5a6f27c92b6b24b2abf9cd66a4d.zip
gcc-ee7e77e9c121f5a6f27c92b6b24b2abf9cd66a4d.tar.gz
gcc-ee7e77e9c121f5a6f27c92b6b24b2abf9cd66a4d.tar.bz2
i386: Optimize EQ/NE comparison between avx512 kmask and -1.
r15-974-gbf7745f887c765e06f2e75508f263debb60aeb2e has optimized for jcc/setcc, but missed movcc. The patch supports movcc. gcc/ChangeLog: PR target/117232 * config/i386/sse.md (*kortest_cmp<SWI1248_AVX512BWDQ_64:mode>_movqicc): New define_insn_and_split. (*kortest_cmp<SWI1248_AVX512BWDQ_64:mode>_mov<SWI248:mode>cc): Ditto. gcc/testsuite/ChangeLog: * gcc.target/i386/pr117232-1.c: New test. * gcc.target/i386/pr117232-apx-1.c: New test.
Diffstat (limited to 'gcc')
-rw-r--r--gcc/config/i386/sse.md85
-rw-r--r--gcc/testsuite/gcc.target/i386/pr117232-1.c47
-rw-r--r--gcc/testsuite/gcc.target/i386/pr117232-apx-1.c48
3 files changed, 180 insertions, 0 deletions
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 6c28b74..2345015 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -2423,6 +2423,91 @@
DONE;
})
+;; Optimize cmp + movcc with mask register by kortest + movcc.
+(define_insn_and_split "*kortest_cmp<SWI1248_AVX512BWDQ_64:mode>_movqicc"
+ [(set (match_operand:QI 0 "register_operand" "=r,r,r,r,r,r")
+ (if_then_else:QI
+ (match_operator 1 "bt_comparison_operator"
+ [(match_operand:SWI1248_AVX512BWDQ_64 4 "register_operand"
+ "?k,<SWI1248_AVX512BWDQ_64:r>,?k, <SWI1248_AVX512BWDQ_64:r>,?k,r")
+ (const_int -1)])
+ (match_operand:QI 2 "register_operand" "r,r,0,0,r,r")
+ (match_operand:QI 3 "register_operand" " 0,0,r,r,r,r")))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_AVX512BW && TARGET_CMOVE && !TARGET_PARTIAL_REG_STALL"
+ "#"
+ "&& reload_completed"
+ [(set (match_dup 0)
+ (if_then_else:SI
+ (match_dup 5)
+ (match_dup 2)
+ (match_dup 3)))]
+{
+ rtx flag_reg;
+ if (MASK_REGNO_P (REGNO (operands[4])))
+ {
+ emit_insn (gen_kortest<SWI1248_AVX512BWDQ_64:mode>_ccc (operands[4], operands[4]));
+ flag_reg = gen_rtx_REG (CCCmode, FLAGS_REG);
+ }
+ else
+ {
+ flag_reg = gen_rtx_REG (CCZmode, FLAGS_REG);
+ emit_insn (gen_rtx_SET (flag_reg,
+ gen_rtx_COMPARE (CCZmode,
+ operands[4],
+ constm1_rtx)));
+ }
+ operands[5] = gen_rtx_fmt_ee (GET_CODE (operands[1]), VOIDmode,
+ flag_reg,const0_rtx);
+ operands[0] = gen_lowpart (SImode, operands[0]);
+ operands[2] = gen_lowpart (SImode, operands[2]);
+ operands[3] = gen_lowpart (SImode, operands[3]);
+}
+ [(set_attr "isa" "*,*,*,*,apx_ndd,apx_ndd")
+ (set_attr "type" "icmov")
+ (set_attr "mode" "QI")])
+
+(define_insn_and_split "*kortest_cmp<SWI1248_AVX512BWDQ_64:mode>_mov<SWI248:mode>cc"
+ [(set (match_operand:SWI248 0 "register_operand" "=r,r,r,r,r,r,r,r")
+ (if_then_else:SWI248
+ (match_operator 1 "bt_comparison_operator"
+ [(match_operand:SWI1248_AVX512BWDQ_64 4 "register_operand"
+ "?k,<SWI1248_AVX512BWDQ_64:r>,?k, <SWI1248_AVX512BWDQ_64:r>,?k,r,?k, r")
+ (const_int -1)])
+ (match_operand:SWI248 2 "nonimmediate_operand" "rm,rm, 0, 0,rm,rm, r, r")
+ (match_operand:SWI248 3 "nonimmediate_operand" " 0, 0,rm,rm, r, r,rm,rm")))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_AVX512BW && TARGET_CMOVE
+ && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
+ "#"
+ "&& reload_completed"
+ [(set (match_dup 0)
+ (if_then_else:SWI248
+ (match_dup 5)
+ (match_dup 2)
+ (match_dup 3)))]
+{
+ rtx flag_reg;
+ if (MASK_REGNO_P (REGNO (operands[4])))
+ {
+ emit_insn (gen_kortest<SWI1248_AVX512BWDQ_64:mode>_ccc (operands[4], operands[4]));
+ flag_reg = gen_rtx_REG (CCCmode, FLAGS_REG);
+ }
+ else
+ {
+ flag_reg = gen_rtx_REG (CCZmode, FLAGS_REG);
+ emit_insn (gen_rtx_SET (flag_reg,
+ gen_rtx_COMPARE (CCZmode,
+ operands[4],
+ constm1_rtx)));
+ }
+ operands[5] = gen_rtx_fmt_ee (GET_CODE (operands[1]), VOIDmode,
+ flag_reg,const0_rtx);
+}
+ [(set_attr "isa" "*,*,*,*,apx_ndd,apx_ndd,apx_ndd,apx_ndd")
+ (set_attr "type" "icmov")
+ (set_attr "mode" "<SWI248:MODE>")])
+
(define_insn "kunpckhi"
[(set (match_operand:HI 0 "register_operand" "=k")
(ior:HI
diff --git a/gcc/testsuite/gcc.target/i386/pr117232-1.c b/gcc/testsuite/gcc.target/i386/pr117232-1.c
new file mode 100644
index 0000000..cd7f5d1
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr117232-1.c
@@ -0,0 +1,47 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512bw -mavx512vl -mavx512dq -O2" } */
+/* { dg-final { scan-assembler-times {(?n)kortest[bwqd]} 7 { target { ! ia32 } } } } */
+/* { dg-final { scan-assembler-times {(?n)cmovn?c} 7 { target { ! ia32 } } } } */
+
+#include <immintrin.h>
+int
+foo (__m512i a, __m512i b, int c, int d) {
+ __mmask64 k = _mm512_cmpeq_epi8_mask (a, b);
+ return k == (__mmask64) -1 ? c : d;
+}
+
+int
+foo1 (__m512i a, __m512i b, int c, int d) {
+ __mmask32 k = _mm512_cmpeq_epi16_mask (a, b);
+ return k == (__mmask32) -1 ? c : d;
+}
+
+int
+foo2 (__m512i a, __m512i b, int c, int d) {
+ __mmask16 k = _mm512_cmpeq_epi32_mask (a, b);
+ return k == (__mmask16) -1 ? c : d;
+}
+
+int
+foo3 (__m512i a, __m512i b, int c, int d) {
+ __mmask8 k = _mm512_cmpeq_epi64_mask (a, b);
+ return k == (__mmask8) -1 ? c : d;
+}
+
+short
+foo4 (__m512i a, __m512i b, short c, short d) {
+ __mmask8 k = _mm512_cmpeq_epi64_mask (a, b);
+ return k == (__mmask8) -1 ? c : d;
+}
+
+char
+foo5 (__m512i a, __m512i b, char c, char d) {
+ __mmask64 k = _mm512_cmpeq_epi8_mask (a, b);
+ return k == (__mmask64) -1 ? c : d;
+}
+
+long long
+foo6 (__m512i a, __m512i b, long long c, long long d) {
+ __mmask16 k = _mm512_cmpeq_epi32_mask (a, b);
+ return k == (__mmask16) -1 ? c : d;
+}
diff --git a/gcc/testsuite/gcc.target/i386/pr117232-apx-1.c b/gcc/testsuite/gcc.target/i386/pr117232-apx-1.c
new file mode 100644
index 0000000..e3571ad
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr117232-apx-1.c
@@ -0,0 +1,48 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-mavx512bw -mavx512vl -mavx512dq -mapxf -O2" } */
+/* { dg-final { scan-assembler-times {(?n)kortest[bwqd]} 7 } } */
+/* { dg-final { scan-assembler-times {(?n)cmovn?c} 7 } } */
+
+#include <immintrin.h>
+
+int
+foo (__m512i a, __m512i b, int c, int d) {
+ __mmask64 k = _mm512_cmpeq_epi8_mask (a, b);
+ return k == (__mmask64) -1 ? c : d;
+}
+
+int
+foo1 (__m512i a, __m512i b, int c, int d) {
+ __mmask32 k = _mm512_cmpeq_epi16_mask (a, b);
+ return k == (__mmask32) -1 ? c : d;
+}
+
+int
+foo2 (__m512i a, __m512i b, int c, int d) {
+ __mmask16 k = _mm512_cmpeq_epi32_mask (a, b);
+ return k == (__mmask16) -1 ? c : d;
+}
+
+int
+foo3 (__m512i a, __m512i b, int c, int d) {
+ __mmask8 k = _mm512_cmpeq_epi64_mask (a, b);
+ return k == (__mmask8) -1 ? c : d;
+}
+
+short
+foo4 (__m512i a, __m512i b, short c, short d) {
+ __mmask8 k = _mm512_cmpeq_epi64_mask (a, b);
+ return k == (__mmask8) -1 ? c : d;
+}
+
+char
+foo5 (__m512i a, __m512i b, char c, char d) {
+ __mmask64 k = _mm512_cmpeq_epi8_mask (a, b);
+ return k == (__mmask64) -1 ? c : d;
+}
+
+long long
+foo6 (__m512i a, __m512i b, long long c, long long d) {
+ __mmask16 k = _mm512_cmpeq_epi32_mask (a, b);
+ return k == (__mmask16) -1 ? c : d;
+}