diff options
author | Jan Beulich <jbeulich@suse.com> | 2023-07-05 09:48:47 +0200 |
---|---|---|
committer | Jan Beulich <jbeulich@suse.com> | 2023-07-05 09:48:47 +0200 |
commit | fa58c2871a1235cb5ba475303a2bd11ae90416d5 (patch) | |
tree | d6bd11996e98cefcfc7caac5164f91511d51879e /gcc | |
parent | 3186ef0cb9e2d25e8455f9990e50187e3d1eee19 (diff) | |
download | gcc-fa58c2871a1235cb5ba475303a2bd11ae90416d5.zip gcc-fa58c2871a1235cb5ba475303a2bd11ae90416d5.tar.gz gcc-fa58c2871a1235cb5ba475303a2bd11ae90416d5.tar.bz2 |
x86: further PR target/100711-like splitting
With respective two-operand bitwise operations now expressable by a
single VPTERNLOG, add splitters to also deal with ior and xor
counterparts of the original and-only case. Note that the splitters need
to be separate, as the placement of "not" differs in the final insns
(*iornot<mode>3, *xnor<mode>3) which are intended to pick up one half of
the result.
gcc/
PR target/100711
* config/i386/sse.md: New splitters to simplify
not;vec_duplicate;{ior,xor} as vec_duplicate;{iornot,xnor}.
gcc/testsuite/
PR target/100711
* gcc.target/i386/pr100711-4.c: New test.
* gcc.target/i386/pr100711-5.c: New test.
Diffstat (limited to 'gcc')
-rw-r--r-- | gcc/config/i386/sse.md | 30 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/pr100711-4.c | 42 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/pr100711-5.c | 40 |
3 files changed, 112 insertions, 0 deletions
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index ab9fd67..d828143 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -17366,6 +17366,36 @@ (match_dup 2)))] "operands[3] = gen_reg_rtx (<MODE>mode);") +(define_split + [(set (match_operand:VI 0 "register_operand") + (ior:VI + (vec_duplicate:VI + (not:<ssescalarmode> + (match_operand:<ssescalarmode> 1 "nonimmediate_operand"))) + (match_operand:VI 2 "vector_operand")))] + "<MODE_SIZE> == 64 || TARGET_AVX512VL + || (TARGET_AVX512F && !TARGET_PREFER_AVX256)" + [(set (match_dup 3) + (vec_duplicate:VI (match_dup 1))) + (set (match_dup 0) + (ior:VI (not:VI (match_dup 3)) (match_dup 2)))] + "operands[3] = gen_reg_rtx (<MODE>mode);") + +(define_split + [(set (match_operand:VI 0 "register_operand") + (xor:VI + (vec_duplicate:VI + (not:<ssescalarmode> + (match_operand:<ssescalarmode> 1 "nonimmediate_operand"))) + (match_operand:VI 2 "vector_operand")))] + "<MODE_SIZE> == 64 || TARGET_AVX512VL + || (TARGET_AVX512F && !TARGET_PREFER_AVX256)" + [(set (match_dup 3) + (vec_duplicate:VI (match_dup 1))) + (set (match_dup 0) + (not:VI (xor:VI (match_dup 3) (match_dup 2))))] + "operands[3] = gen_reg_rtx (<MODE>mode);") + (define_insn "*andnot<mode>3_mask" [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v") (vec_merge:VI48_AVX512VL diff --git a/gcc/testsuite/gcc.target/i386/pr100711-4.c b/gcc/testsuite/gcc.target/i386/pr100711-4.c new file mode 100644 index 0000000..3ca524f --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr100711-4.c @@ -0,0 +1,42 @@ +/* { dg-do compile } */ +/* { dg-options "-mavx512bw -mno-avx512vl -mprefer-vector-width=512 -O2" } */ + +typedef char v64qi __attribute__ ((vector_size (64))); +typedef short v32hi __attribute__ ((vector_size (64))); +typedef int v16si __attribute__ ((vector_size (64))); +typedef long long v8di __attribute__((vector_size (64))); + +v64qi foo_v64qi (char a, v64qi b) +{ + return (__extension__ (v64qi) {~a, ~a, ~a, ~a, ~a, ~a, ~a, ~a, + ~a, ~a, ~a, ~a, ~a, ~a, ~a, ~a, + ~a, ~a, ~a, ~a, ~a, ~a, ~a, ~a, + ~a, ~a, ~a, ~a, ~a, ~a, ~a, ~a, + ~a, ~a, ~a, ~a, ~a, ~a, ~a, ~a, + ~a, ~a, ~a, ~a, ~a, ~a, ~a, ~a, + ~a, ~a, ~a, ~a, ~a, ~a, ~a, ~a, + ~a, ~a, ~a, ~a, ~a, ~a, ~a, ~a}) | b; +} + +v32hi foo_v32hi (short a, v32hi b) +{ + return (__extension__ (v32hi) {~a, ~a, ~a, ~a, ~a, ~a, ~a, ~a, + ~a, ~a, ~a, ~a, ~a, ~a, ~a, ~a, + ~a, ~a, ~a, ~a, ~a, ~a, ~a, ~a, + ~a, ~a, ~a, ~a, ~a, ~a, ~a, ~a}) | b; +} + +v16si foo_v16si (int a, v16si b) +{ + return (__extension__ (v16si) {~a, ~a, ~a, ~a, ~a, ~a, ~a, ~a, + ~a, ~a, ~a, ~a, ~a, ~a, ~a, ~a}) | b; +} + +v8di foo_v8di (long long a, v8di b) +{ + return (__extension__ (v8di) {~a, ~a, ~a, ~a, ~a, ~a, ~a, ~a}) | b; +} + +/* { dg-final { scan-assembler-times "vpternlog\[dq\]\[ \\t\]+\\\$0xbb" 4 { target { ! ia32 } } } } */ +/* { dg-final { scan-assembler-times "vpternlog\[dq\]\[ \\t\]+\\\$0xbb" 2 { target { ia32 } } } } */ +/* { dg-final { scan-assembler-times "vpternlog\[dq\]\[ \\t\]+\\\$0xdd" 2 { target { ia32 } } } } */ diff --git a/gcc/testsuite/gcc.target/i386/pr100711-5.c b/gcc/testsuite/gcc.target/i386/pr100711-5.c new file mode 100644 index 0000000..161fbfc --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr100711-5.c @@ -0,0 +1,40 @@ +/* { dg-do compile } */ +/* { dg-options "-mavx512bw -mno-avx512vl -mprefer-vector-width=512 -O2" } */ + +typedef char v64qi __attribute__ ((vector_size (64))); +typedef short v32hi __attribute__ ((vector_size (64))); +typedef int v16si __attribute__ ((vector_size (64))); +typedef long long v8di __attribute__((vector_size (64))); + +v64qi foo_v64qi (char a, v64qi b) +{ + return (__extension__ (v64qi) {~a, ~a, ~a, ~a, ~a, ~a, ~a, ~a, + ~a, ~a, ~a, ~a, ~a, ~a, ~a, ~a, + ~a, ~a, ~a, ~a, ~a, ~a, ~a, ~a, + ~a, ~a, ~a, ~a, ~a, ~a, ~a, ~a, + ~a, ~a, ~a, ~a, ~a, ~a, ~a, ~a, + ~a, ~a, ~a, ~a, ~a, ~a, ~a, ~a, + ~a, ~a, ~a, ~a, ~a, ~a, ~a, ~a, + ~a, ~a, ~a, ~a, ~a, ~a, ~a, ~a}) ^ b; +} + +v32hi foo_v32hi (short a, v32hi b) +{ + return (__extension__ (v32hi) {~a, ~a, ~a, ~a, ~a, ~a, ~a, ~a, + ~a, ~a, ~a, ~a, ~a, ~a, ~a, ~a, + ~a, ~a, ~a, ~a, ~a, ~a, ~a, ~a, + ~a, ~a, ~a, ~a, ~a, ~a, ~a, ~a}) ^ b; +} + +v16si foo_v16si (int a, v16si b) +{ + return (__extension__ (v16si) {~a, ~a, ~a, ~a, ~a, ~a, ~a, ~a, + ~a, ~a, ~a, ~a, ~a, ~a, ~a, ~a}) ^ b; +} + +v8di foo_v8di (long long a, v8di b) +{ + return (__extension__ (v8di) {~a, ~a, ~a, ~a, ~a, ~a, ~a, ~a}) ^ b; +} + +/* { dg-final { scan-assembler-times "vpternlog\[dq\]\[ \\t\]+\\\$0x99" 4 } } */ |