aboutsummaryrefslogtreecommitdiff
path: root/gcc
diff options
context:
space:
mode:
authorliuhongt <hongtao.liu@intel.com>2020-11-30 13:27:16 +0800
committerliuhongt <hongtao.liu@intel.com>2020-12-03 13:42:39 +0800
commit70310982492071f98eacdac0747521769b0f0328 (patch)
tree1b8f4e168b25ff13331f63c9d8592966cd4c9cc7 /gcc
parent35c4c67e6c534ef3d6ba7a7752ab7e0fbc91755b (diff)
downloadgcc-70310982492071f98eacdac0747521769b0f0328.zip
gcc-70310982492071f98eacdac0747521769b0f0328.tar.gz
gcc-70310982492071f98eacdac0747521769b0f0328.tar.bz2
Optimize vpsubusw compared to 0 into vpcmpleuw or vpcmpnleuw [PR96906]
For signed comparisons, it handles cases that are eq or neq to 0. For unsigned comparisons, it additionaly handles cases that are le or gt to 0(equivilent to eq or neq to 0). Transform case eq to leu, case neq to gtu. .i.e. for -mavx512bw -mavx512vl transform eq case code from vpsubusw %xmm1, %xmm0, %xmm0 vpxor %xmm1, %xmm1, %xmm1 vpcmpeqw %xmm1, %xmm0, %k0 to vpcmpleuw %xmm1, %xmm0, %k0 .i.e. for -mavx512bw -mavx512vl transform neq case code from vpsubusw %xmm1, %xmm0, %xmm0 vpxor %xmm1, %xmm1, %xmm1 vpcmpneqw %xmm1, %xmm0, %k0 to vpcmpnleuw %xmm1, %xmm0, %k0 gcc/ChangeLog PR target/96906 * config/i386/sse.md (<avx512>_ucmp<mode>3<mask_scalar_merge_name>): Add a new define_split after this insn. gcc/testsuite/ChangeLog * gcc.target/i386/avx512bw-pr96906-1.c: New test. * gcc.target/i386/pr96906-1.c: Add -mno-avx512f.
Diffstat (limited to 'gcc')
-rw-r--r--gcc/config/i386/sse.md38
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512bw-pr96906-1.c68
-rw-r--r--gcc/testsuite/gcc.target/i386/pr96906-1.c2
3 files changed, 107 insertions, 1 deletions
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 78f7367..94bb445 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -3098,6 +3098,44 @@
(set_attr "prefix" "evex")
(set_attr "mode" "<sseinsnmode>")])
+(define_int_iterator UNSPEC_PCMP_ITER
+ [UNSPEC_PCMP UNSPEC_UNSIGNED_PCMP])
+
+(define_int_attr pcmp_signed_mask
+ [(UNSPEC_PCMP "3") (UNSPEC_UNSIGNED_PCMP "1")])
+
+;; PR96906 - optimize vpsubusw compared to 0 into vpcmpleuw or vpcmpnltuw.
+;; For signed comparison, handle EQ 0: NEQ 4,
+;; for unsigned comparison extra handle LE:2, NLE:6, equivalent to EQ and NEQ.
+
+(define_split
+ [(set (match_operand:<avx512fmaskmode> 0 "register_operand")
+ (unspec:<avx512fmaskmode>
+ [(us_minus:VI12_AVX512VL
+ (match_operand:VI12_AVX512VL 1 "vector_operand")
+ (match_operand:VI12_AVX512VL 2 "vector_operand"))
+ (match_operand:VI12_AVX512VL 3 "const0_operand")
+ (match_operand:SI 4 "const_0_to_7_operand")]
+ UNSPEC_PCMP_ITER))]
+ "TARGET_AVX512BW
+ && ix86_binary_operator_ok (US_MINUS, <MODE>mode, operands)
+ && (INTVAL (operands[4]) & <pcmp_signed_mask>) == 0"
+ [(const_int 0)]
+ {
+ /* LE: 2, NLT: 5, NLE: 6, LT: 1 */
+ int cmp_predicate = 2; /* LE */
+ if (MEM_P (operands[1]))
+ {
+ std::swap (operands[1], operands[2]);
+ cmp_predicate = 5; /* NLT (GE) */
+ }
+ if ((INTVAL (operands[4]) & 4) != 0)
+ cmp_predicate ^= 4; /* Invert the comparison to NLE (GT) or LT. */
+ emit_insn (gen_<avx512>_ucmp<mode>3 (operands[0], operands[1],operands[2],
+ GEN_INT (cmp_predicate)));
+ DONE;
+ })
+
(define_insn "avx512f_vmcmp<mode>3<round_saeonly_name>"
[(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
(and:<avx512fmaskmode>
diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-pr96906-1.c b/gcc/testsuite/gcc.target/i386/avx512bw-pr96906-1.c
new file mode 100644
index 0000000..81d7e06
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512bw-pr96906-1.c
@@ -0,0 +1,68 @@
+/* PR target/96906 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512bw -mavx512vl -masm=att" } */
+/* { dg-final { scan-assembler-times {(?n)vpcmpub[ \t]*\$2} 9 } } */
+/* { dg-final { scan-assembler-times {(?n)vpcmpub[ \t]*\$6} 9 } } */
+/* { dg-final { scan-assembler-times {(?n)vpcmpuw[ \t]*\$2} 9 } } */
+/* { dg-final { scan-assembler-times {(?n)vpcmpuw[ \t]*\$6} 9 } } */
+
+
+#include<immintrin.h>
+
+#define FOO(LENGTH,SUFFIX,TYPE,UTYPE,RTYPE,PRED) \
+ __mmask##RTYPE \
+ foo_##LENGTH##_##TYPE##_##PRED (__m##LENGTH##i x, __m##LENGTH##i y) \
+ { \
+ return \
+ _mm##SUFFIX##_cmp_##TYPE##_mask (_mm##SUFFIX##_subs_##UTYPE (x, y), \
+ _mm##SUFFIX##_setzero_si##LENGTH (), \
+ PRED); \
+ } \
+
+FOO (128,, epi16, epu16, 8, 0);
+FOO (128,, epi16, epu16, 8, 4);
+
+FOO (128,, epu16, epu16, 8, 0);
+FOO (128,, epu16, epu16, 8, 2);
+FOO (128,, epu16, epu16, 8, 4);
+FOO (128,, epu16, epu16, 8, 6);
+
+FOO (256, 256, epi16, epu16, 16, 0);
+FOO (256, 256, epi16, epu16, 16, 4);
+
+FOO (256, 256, epu16, epu16, 16, 0);
+FOO (256, 256, epu16, epu16, 16, 2);
+FOO (256, 256, epu16, epu16, 16, 4);
+FOO (256, 256, epu16, epu16, 16, 6);
+
+FOO (512, 512, epi16, epu16, 32, 0);
+FOO (512, 512, epi16, epu16, 32, 4);
+
+FOO (512, 512, epu16, epu16, 32, 0);
+FOO (512, 512, epu16, epu16, 32, 2);
+FOO (512, 512, epu16, epu16, 32, 4);
+FOO (512, 512, epu16, epu16, 32, 6);
+
+FOO (128,, epi8, epu8, 16, 0);
+FOO (128,, epi8, epu8, 16, 4);
+
+FOO (128,, epu8, epu8, 16, 0);
+FOO (128,, epu8, epu8, 16, 2);
+FOO (128,, epu8, epu8, 16, 4);
+FOO (128,, epu8, epu8, 16, 6);
+
+FOO (256, 256, epi8, epu8, 32, 0);
+FOO (256, 256, epi8, epu8, 32, 4);
+
+FOO (256, 256, epu8, epu8, 32, 0);
+FOO (256, 256, epu8, epu8, 32, 2);
+FOO (256, 256, epu8, epu8, 32, 4);
+FOO (256, 256, epu8, epu8, 32, 6);
+
+FOO (512, 512, epi8, epu8, 64, 0);
+FOO (512, 512, epi8, epu8, 64, 4);
+
+FOO (512, 512, epu8, epu8, 64, 0);
+FOO (512, 512, epu8, epu8, 64, 2);
+FOO (512, 512, epu8, epu8, 64, 4);
+FOO (512, 512, epu8, epu8, 64, 6);
diff --git a/gcc/testsuite/gcc.target/i386/pr96906-1.c b/gcc/testsuite/gcc.target/i386/pr96906-1.c
index 9d836eb..b1b41bf 100644
--- a/gcc/testsuite/gcc.target/i386/pr96906-1.c
+++ b/gcc/testsuite/gcc.target/i386/pr96906-1.c
@@ -1,6 +1,6 @@
/* PR target/96906 */
/* { dg-do compile } */
-/* { dg-options "-O2 -mavx2" } */
+/* { dg-options "-O2 -mavx2 -mno-avx512f" } */
/* { dg-final { scan-assembler-times "\tvpminub\[^\n\r]*xmm" 2 } } */
/* { dg-final { scan-assembler-times "\tvpminuw\[^\n\r]*xmm" 2 } } */
/* { dg-final { scan-assembler-times "\tvpminub\[^\n\r]*ymm" 2 } } */