diff options
author | liuhongt <hongtao.liu@intel.com> | 2020-11-30 13:27:16 +0800 |
---|---|---|
committer | liuhongt <hongtao.liu@intel.com> | 2020-12-03 13:42:39 +0800 |
commit | 70310982492071f98eacdac0747521769b0f0328 (patch) | |
tree | 1b8f4e168b25ff13331f63c9d8592966cd4c9cc7 /gcc | |
parent | 35c4c67e6c534ef3d6ba7a7752ab7e0fbc91755b (diff) | |
download | gcc-70310982492071f98eacdac0747521769b0f0328.zip gcc-70310982492071f98eacdac0747521769b0f0328.tar.gz gcc-70310982492071f98eacdac0747521769b0f0328.tar.bz2 |
Optimize vpsubusw compared to 0 into vpcmpleuw or vpcmpnleuw [PR96906]
For signed comparisons, it handles cases that are eq or neq to 0.
For unsigned comparisons, it additionaly handles cases that are le or
gt to 0(equivilent to eq or neq to 0). Transform case eq to leu,
case neq to gtu.
.i.e. for -mavx512bw -mavx512vl transform eq case code from
vpsubusw %xmm1, %xmm0, %xmm0
vpxor %xmm1, %xmm1, %xmm1
vpcmpeqw %xmm1, %xmm0, %k0
to
vpcmpleuw %xmm1, %xmm0, %k0
.i.e. for -mavx512bw -mavx512vl transform neq case code from
vpsubusw %xmm1, %xmm0, %xmm0
vpxor %xmm1, %xmm1, %xmm1
vpcmpneqw %xmm1, %xmm0, %k0
to
vpcmpnleuw %xmm1, %xmm0, %k0
gcc/ChangeLog
PR target/96906
* config/i386/sse.md
(<avx512>_ucmp<mode>3<mask_scalar_merge_name>): Add a new
define_split after this insn.
gcc/testsuite/ChangeLog
* gcc.target/i386/avx512bw-pr96906-1.c: New test.
* gcc.target/i386/pr96906-1.c: Add -mno-avx512f.
Diffstat (limited to 'gcc')
-rw-r--r-- | gcc/config/i386/sse.md | 38 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/avx512bw-pr96906-1.c | 68 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/pr96906-1.c | 2 |
3 files changed, 107 insertions, 1 deletions
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index 78f7367..94bb445 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -3098,6 +3098,44 @@ (set_attr "prefix" "evex") (set_attr "mode" "<sseinsnmode>")]) +(define_int_iterator UNSPEC_PCMP_ITER + [UNSPEC_PCMP UNSPEC_UNSIGNED_PCMP]) + +(define_int_attr pcmp_signed_mask + [(UNSPEC_PCMP "3") (UNSPEC_UNSIGNED_PCMP "1")]) + +;; PR96906 - optimize vpsubusw compared to 0 into vpcmpleuw or vpcmpnltuw. +;; For signed comparison, handle EQ 0: NEQ 4, +;; for unsigned comparison extra handle LE:2, NLE:6, equivalent to EQ and NEQ. + +(define_split + [(set (match_operand:<avx512fmaskmode> 0 "register_operand") + (unspec:<avx512fmaskmode> + [(us_minus:VI12_AVX512VL + (match_operand:VI12_AVX512VL 1 "vector_operand") + (match_operand:VI12_AVX512VL 2 "vector_operand")) + (match_operand:VI12_AVX512VL 3 "const0_operand") + (match_operand:SI 4 "const_0_to_7_operand")] + UNSPEC_PCMP_ITER))] + "TARGET_AVX512BW + && ix86_binary_operator_ok (US_MINUS, <MODE>mode, operands) + && (INTVAL (operands[4]) & <pcmp_signed_mask>) == 0" + [(const_int 0)] + { + /* LE: 2, NLT: 5, NLE: 6, LT: 1 */ + int cmp_predicate = 2; /* LE */ + if (MEM_P (operands[1])) + { + std::swap (operands[1], operands[2]); + cmp_predicate = 5; /* NLT (GE) */ + } + if ((INTVAL (operands[4]) & 4) != 0) + cmp_predicate ^= 4; /* Invert the comparison to NLE (GT) or LT. */ + emit_insn (gen_<avx512>_ucmp<mode>3 (operands[0], operands[1],operands[2], + GEN_INT (cmp_predicate))); + DONE; + }) + (define_insn "avx512f_vmcmp<mode>3<round_saeonly_name>" [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k") (and:<avx512fmaskmode> diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-pr96906-1.c b/gcc/testsuite/gcc.target/i386/avx512bw-pr96906-1.c new file mode 100644 index 0000000..81d7e06 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512bw-pr96906-1.c @@ -0,0 +1,68 @@ +/* PR target/96906 */ +/* { dg-do compile } */ +/* { dg-options "-O2 -mavx512bw -mavx512vl -masm=att" } */ +/* { dg-final { scan-assembler-times {(?n)vpcmpub[ \t]*\$2} 9 } } */ +/* { dg-final { scan-assembler-times {(?n)vpcmpub[ \t]*\$6} 9 } } */ +/* { dg-final { scan-assembler-times {(?n)vpcmpuw[ \t]*\$2} 9 } } */ +/* { dg-final { scan-assembler-times {(?n)vpcmpuw[ \t]*\$6} 9 } } */ + + +#include<immintrin.h> + +#define FOO(LENGTH,SUFFIX,TYPE,UTYPE,RTYPE,PRED) \ + __mmask##RTYPE \ + foo_##LENGTH##_##TYPE##_##PRED (__m##LENGTH##i x, __m##LENGTH##i y) \ + { \ + return \ + _mm##SUFFIX##_cmp_##TYPE##_mask (_mm##SUFFIX##_subs_##UTYPE (x, y), \ + _mm##SUFFIX##_setzero_si##LENGTH (), \ + PRED); \ + } \ + +FOO (128,, epi16, epu16, 8, 0); +FOO (128,, epi16, epu16, 8, 4); + +FOO (128,, epu16, epu16, 8, 0); +FOO (128,, epu16, epu16, 8, 2); +FOO (128,, epu16, epu16, 8, 4); +FOO (128,, epu16, epu16, 8, 6); + +FOO (256, 256, epi16, epu16, 16, 0); +FOO (256, 256, epi16, epu16, 16, 4); + +FOO (256, 256, epu16, epu16, 16, 0); +FOO (256, 256, epu16, epu16, 16, 2); +FOO (256, 256, epu16, epu16, 16, 4); +FOO (256, 256, epu16, epu16, 16, 6); + +FOO (512, 512, epi16, epu16, 32, 0); +FOO (512, 512, epi16, epu16, 32, 4); + +FOO (512, 512, epu16, epu16, 32, 0); +FOO (512, 512, epu16, epu16, 32, 2); +FOO (512, 512, epu16, epu16, 32, 4); +FOO (512, 512, epu16, epu16, 32, 6); + +FOO (128,, epi8, epu8, 16, 0); +FOO (128,, epi8, epu8, 16, 4); + +FOO (128,, epu8, epu8, 16, 0); +FOO (128,, epu8, epu8, 16, 2); +FOO (128,, epu8, epu8, 16, 4); +FOO (128,, epu8, epu8, 16, 6); + +FOO (256, 256, epi8, epu8, 32, 0); +FOO (256, 256, epi8, epu8, 32, 4); + +FOO (256, 256, epu8, epu8, 32, 0); +FOO (256, 256, epu8, epu8, 32, 2); +FOO (256, 256, epu8, epu8, 32, 4); +FOO (256, 256, epu8, epu8, 32, 6); + +FOO (512, 512, epi8, epu8, 64, 0); +FOO (512, 512, epi8, epu8, 64, 4); + +FOO (512, 512, epu8, epu8, 64, 0); +FOO (512, 512, epu8, epu8, 64, 2); +FOO (512, 512, epu8, epu8, 64, 4); +FOO (512, 512, epu8, epu8, 64, 6); diff --git a/gcc/testsuite/gcc.target/i386/pr96906-1.c b/gcc/testsuite/gcc.target/i386/pr96906-1.c index 9d836eb..b1b41bf 100644 --- a/gcc/testsuite/gcc.target/i386/pr96906-1.c +++ b/gcc/testsuite/gcc.target/i386/pr96906-1.c @@ -1,6 +1,6 @@ /* PR target/96906 */ /* { dg-do compile } */ -/* { dg-options "-O2 -mavx2" } */ +/* { dg-options "-O2 -mavx2 -mno-avx512f" } */ /* { dg-final { scan-assembler-times "\tvpminub\[^\n\r]*xmm" 2 } } */ /* { dg-final { scan-assembler-times "\tvpminuw\[^\n\r]*xmm" 2 } } */ /* { dg-final { scan-assembler-times "\tvpminub\[^\n\r]*ymm" 2 } } */ |