aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorHu, Lin1 <lin1.hu@intel.com>2024-05-09 09:29:07 +0800
committerHu, Lin1 <lin1.hu@intel.com>2024-06-03 13:44:16 +0800
commitbf7745f887c765e06f2e75508f263debb60aeb2e (patch)
tree6bba51857d7112d4aedb319e8c2ec531a0c4d02e
parentcbf2ed4b309d54039d74be5d730299012e7681b3 (diff)
downloadgcc-bf7745f887c765e06f2e75508f263debb60aeb2e.zip
gcc-bf7745f887c765e06f2e75508f263debb60aeb2e.tar.gz
gcc-bf7745f887c765e06f2e75508f263debb60aeb2e.tar.bz2
i386: Optimize EQ/NE comparison between avx512 kmask and -1.
Acheive EQ/NE comparison between avx512 kmask and -1 by using kxortest with checking CF. gcc/ChangeLog: PR target/113609 * config/i386/sse.md (*kortest_cmp<mode>_setcc): New define_insn_and_split. (*kortest_cmp<mode>_jcc): Ditto. gcc/testsuite/ChangeLog: PR target/113609 * gcc.target/i386/pr113609-1.c: New test. * gcc.target/i386/pr113609-2.c: Ditto.
-rw-r--r--gcc/config/i386/sse.md67
-rw-r--r--gcc/testsuite/gcc.target/i386/pr113609-1.c194
-rw-r--r--gcc/testsuite/gcc.target/i386/pr113609-2.c161
3 files changed, 422 insertions, 0 deletions
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 7cd912e..a5a7347f 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -2201,6 +2201,73 @@
UNSPEC_KORTEST))]
"TARGET_AVX512F")
+;; Optimize cmp + setcc with mask register by kortest + setcc.
+(define_insn_and_split "*kortest_cmp<mode>_setcc"
+ [(set (match_operand:QI 0 "nonimmediate_operand" "=qm, qm")
+ (match_operator:QI 1 "bt_comparison_operator"
+ [(match_operand:SWI1248_AVX512BWDQ_64 2 "register_operand" "?k, <r>")
+ (const_int -1)]))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_AVX512BW"
+ "#"
+ "&& reload_completed"
+ [(const_int 0)]
+{
+ if (MASK_REGNO_P (REGNO (operands[2])))
+ {
+ emit_insn (gen_kortest<mode>_ccc (operands[2], operands[2]));
+ operands[4] = gen_rtx_REG (CCCmode, FLAGS_REG);
+ }
+ else
+ {
+ operands[4] = gen_rtx_REG (CCZmode, FLAGS_REG);
+ emit_insn (gen_rtx_SET (operands[4],
+ gen_rtx_COMPARE (CCZmode,
+ operands[2],
+ constm1_rtx)));
+ }
+ ix86_expand_setcc (operands[0],
+ GET_CODE (operands[1]),
+ operands[4],
+ const0_rtx);
+ DONE;
+})
+
+;; Optimize cmp + jcc with mask register by kortest + jcc.
+(define_insn_and_split "*kortest_cmp<mode>_jcc"
+ [(set (pc)
+ (if_then_else
+ (match_operator 0 "bt_comparison_operator"
+ [(match_operand:SWI1248_AVX512BWDQ_64 1 "register_operand" "?k, <r>")
+ (const_int -1)])
+ (label_ref (match_operand 2))
+ (pc)))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_AVX512BW"
+ "#"
+ "&& reload_completed"
+ [(const_int 0)]
+{
+ if (MASK_REGNO_P (REGNO (operands[1])))
+ {
+ emit_insn (gen_kortest<mode>_ccc (operands[1], operands[1]));
+ operands[4] = gen_rtx_REG (CCCmode, FLAGS_REG);
+ }
+ else
+ {
+ operands[4] = gen_rtx_REG (CCZmode, FLAGS_REG);
+ emit_insn (gen_rtx_SET (operands[4],
+ gen_rtx_COMPARE (CCZmode,
+ operands[1],
+ constm1_rtx)));
+ }
+ ix86_expand_branch (GET_CODE (operands[0]),
+ operands[4],
+ const0_rtx,
+ operands[2]);
+ DONE;
+})
+
(define_insn "kunpckhi"
[(set (match_operand:HI 0 "register_operand" "=k")
(ior:HI
diff --git a/gcc/testsuite/gcc.target/i386/pr113609-1.c b/gcc/testsuite/gcc.target/i386/pr113609-1.c
new file mode 100644
index 0000000..f0639b8
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr113609-1.c
@@ -0,0 +1,194 @@
+/* PR target/113609 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=x86-64-v4" } */
+/* { dg-final { scan-assembler-not "^cmp" } } */
+/* { dg-final { scan-assembler-not "\[ \\t\]+sete" { target { ! ia32 } } } } */
+/* { dg-final { scan-assembler-not "\[ \\t\]+setne" { target { ! ia32 } } } } */
+/* { dg-final { scan-assembler-not "\[ \\t\]+je" { target { ! ia32 } } } } */
+/* { dg-final { scan-assembler-not "\[ \\t\]+jne" { target { ! ia32 } } } } */
+/* { dg-final { scan-assembler-times "\[ \\t\]+sete" 1 { target { ia32 } } } } */
+/* { dg-final { scan-assembler-times "\[ \\t\]+setne" 1 { target { ia32 } } } } */
+/* { dg-final { scan-assembler-times "\[ \\t\]+je" 1 { target { ia32 } } } } */
+/* { dg-final { scan-assembler-times "\[ \\t\]+jne" 2 { target { ia32 } } } } */
+/* { dg-final { scan-assembler-times "kortest" 12 { target { ia32 } } } } */
+/* { dg-final { scan-assembler-times "kortest" 17 { target { ! ia32 } } } } */
+
+#include <immintrin.h>
+
+unsigned int
+cmp_vector_sete_mask8(__m128i a, __m128i b)
+{
+ __mmask8 k = _mm_cmpeq_epi16_mask (a, b);
+ if (k == (__mmask8) -1)
+ return 1;
+ else
+ return 0;
+}
+
+unsigned int
+cmp_vector_sete_mask16(__m128i a, __m128i b)
+{
+ __mmask16 k = _mm_cmpeq_epi8_mask (a, b);
+ if (k == (__mmask16) -1)
+ return 1;
+ else
+ return 0;
+}
+
+unsigned int
+cmp_vector_sete_mask32(__m256i a, __m256i b)
+{
+ __mmask32 k = _mm256_cmpeq_epi8_mask (a, b);
+ if (k == (__mmask32) -1)
+ return 1;
+ else
+ return 0;
+}
+
+unsigned int
+cmp_vector_sete_mask64(__m512i a, __m512i b)
+{
+ __mmask64 k = _mm512_cmpeq_epi8_mask (a, b);
+ if (k == (__mmask64) -1)
+ return 1;
+ else
+ return 0;
+}
+
+unsigned int
+cmp_vector_setne_mask8(__m128i a, __m128i b)
+{
+ __mmask8 k = _mm_cmpeq_epi16_mask (a, b);
+ if (k != (__mmask8) -1)
+ return 1;
+ else
+ return 0;
+}
+
+unsigned int
+cmp_vector_setne_mask16(__m128i a, __m128i b)
+{
+ __mmask16 k = _mm_cmpeq_epi8_mask (a, b);
+ if (k != (__mmask16) -1)
+ return 1;
+ else
+ return 0;
+}
+
+unsigned int
+cmp_vector_setne_mask32(__m256i a, __m256i b)
+{
+ __mmask32 k = _mm256_cmpeq_epi8_mask (a, b);
+ if (k != (__mmask32) -1)
+ return 1;
+ else
+ return 0;
+}
+
+unsigned int
+cmp_vector_setne_mask64(__m512i a, __m512i b)
+{
+ __mmask64 k = _mm512_cmpeq_epi8_mask (a, b);
+ if (k != (__mmask64) -1)
+ return 1;
+ else
+ return 0;
+}
+
+__m128i
+cmp_vector_je_mask8(__m128i a, __m128i b) {
+ __mmask8 k = _mm_cmpeq_epi16_mask (a, b);
+ if (k == (__mmask8) -1) {
+ a[0] = a[0] + 1;
+ }
+ else {
+ a[0] = a[0] - 1;
+ }
+ return a;
+}
+
+__m128i
+cmp_vector_je_mask16(__m128i a, __m128i b) {
+ __mmask16 k = _mm_cmpeq_epi8_mask (a, b);
+ if (k == (__mmask16) -1) {
+ a[0] = a[0] + 1;
+ }
+ else {
+ a[0] = a[0] - 1;
+ }
+ return a;
+}
+
+__m256i
+cmp_vector_je_mask32(__m256i a, __m256i b) {
+ __mmask32 k = _mm256_cmpeq_epi8_mask (a, b);
+ if (k == (__mmask32) -1) {
+ a[0] = a[0] + 1;
+ }
+ else {
+ a[0] = a[0] - 1;
+ }
+ return a;
+}
+
+__m512i
+cmp_vector_je_mask64(__m512i a, __m512i b) {
+ __mmask64 k = _mm512_cmpeq_epi8_mask (a, b);
+ if (k == (__mmask64) -1) {
+ a[0] = a[0] + 1;
+ }
+ else {
+ a[0] = a[0] - 5;
+ }
+ return a;
+}
+
+__m128i
+cmp_vector_jne_mask8(__m128i a, __m128i b) {
+ __mmask8 k = _mm_cmpeq_epi16_mask (a, b);
+ if (k == (__mmask8) -1) {
+ a[0] = a[0] + 1;
+ }
+ a[0] = a[0] - 4;
+ return a;
+}
+
+__m128i
+cmp_vector_jne_mask16(__m128i a, __m128i b) {
+ __mmask16 k = _mm_cmpeq_epi8_mask (a, b);
+ if (k == (__mmask16) -1) {
+ a[0] = a[0] + 1;
+ }
+ a[0] = a[0] - 4;
+ return a;
+}
+
+__m256i
+cmp_vector_jne_mask32(__m256i a, __m256i b) {
+ __mmask32 k = _mm256_cmpeq_epi8_mask (a, b);
+ if (k == (__mmask32) -1) {
+ a[0] = a[0] + 1;
+ }
+ a[0] = a[0] - 4;
+ return a;
+}
+
+__m512i
+cmp_vector_jne_mask64(__m512i a, __m512i b) {
+ __mmask64 k = _mm512_cmpeq_epi8_mask (a, b);
+ if (k == (__mmask64) -1) {
+ a[0] = a[0] + 1;
+ }
+ a[0] = a[0] - 4;
+ return a;
+}
+
+__m512i
+mask_cmp_vector_jne_mask64(__m512i a, __m512i b) {
+ __mmask64 k = _mm512_mask_cmpeq_epi8_mask ((__mmask64)0xffffffefffffffff, a, b);
+ if (k == (__mmask64) -1) {
+ a[0] = a[0] + 1;
+ }
+ a[0] = a[0] - 4;
+ return a;
+}
diff --git a/gcc/testsuite/gcc.target/i386/pr113609-2.c b/gcc/testsuite/gcc.target/i386/pr113609-2.c
new file mode 100644
index 0000000..e9503f5
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr113609-2.c
@@ -0,0 +1,161 @@
+/* PR target/113609 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=x86-64-v4" } */
+/* { dg-final { scan-assembler-times "\[ \\t\]+sete" 4 } } */
+/* { dg-final { scan-assembler-times "\[ \\t\]+setne" 4 } } */
+/* { dg-final { scan-assembler-times "\[ \\t\]+je" 4 } } */
+/* { dg-final { scan-assembler-times "\[ \\t\]+jne" 4 } } */
+
+#include <immintrin.h>
+
+unsigned int
+cmp_pi8_setcc(char a)
+{
+ if (a == -1)
+ return 1;
+ else
+ return 0;
+}
+
+unsigned int
+cmp_pi16_setcc(short a)
+{
+ if (a == -1)
+ return 1;
+ else
+ return 0;
+}
+
+unsigned int
+cmp_pi32_setcc(int a)
+{
+ if (a == -1)
+ return 1;
+ else
+ return 0;
+}
+
+unsigned int
+cmp_pi64_setcc(long long a)
+{
+ if (a == -1)
+ return 1;
+ else
+ return 0;
+}
+
+unsigned int
+cmp_pi8_setne(char a)
+{
+ if (a != -1)
+ return 1;
+ else
+ return 0;
+}
+
+unsigned int
+cmp_pi16_setne(short a)
+{
+ if (a != -1)
+ return 1;
+ else
+ return 0;
+}
+
+unsigned int
+cmp_pi32_setne(int a)
+{
+ if (a != -1)
+ return 1;
+ else
+ return 0;
+}
+
+unsigned int
+cmp_pi64_setne(long long a)
+{
+ if (a != -1)
+ return 1;
+ else
+ return 0;
+}
+
+__m128i
+cmp_pi8_je(__m128i a, char b) {
+ if (b == -1) {
+ a[0] = a[0] + 1;
+ }
+ else {
+ a[0] = a[0] - 1;
+ }
+ return a;
+}
+
+__m128i
+cmp_pi16_je(__m128i a, short b) {
+ if (b == -1) {
+ a[0] = a[0] + 1;
+ }
+ else {
+ a[0] = a[0] - 1;
+ }
+ return a;
+}
+
+__m128i
+cmp_pi32_je(__m128i a, int b) {
+ if (b == -1) {
+ a[0] = a[0] + 1;
+ }
+ else {
+ a[0] = a[0] - 1;
+ }
+ return a;
+}
+
+__m128i
+cmp_pi64_je(__m128i a, long long b) {
+ if (b == -1) {
+ a[0] = a[0] + 1;
+ }
+ else {
+ a[0] = a[0] - 1;
+ }
+ return a;
+}
+
+__m128i
+cmp_pi8_jne(__m128i a, char b) {
+ if (b == -1) {
+ a[0] = a[0] + 1;
+ }
+ a[0] = a[0] - 4;
+ return a;
+}
+
+__m128i
+cmp_pi16_jne(__m128i a, short b) {
+ if (b == -1) {
+ a[0] = a[0] + 1;
+ }
+ a[0] = a[0] - 4;
+ return a;
+}
+
+__m128i
+cmp_pi32_jne(__m128i a, int b) {
+ if (b == -1) {
+ a[0] = a[0] + 1;
+ }
+ a[0] = a[0] - 4;
+ return a;
+}
+
+__m128i
+cmp_pi64_jne(__m128i a, long long b) {
+ if (b == -1) {
+ a[0] = a[0] + 1;
+ }
+ a[0] = a[0] - 4;
+ return a;
+}