aboutsummaryrefslogtreecommitdiff
path: root/gcc
diff options
context:
space:
mode:
authorJan Beulich <jbeulich@suse.com>2023-07-05 09:48:47 +0200
committerJan Beulich <jbeulich@suse.com>2023-07-05 09:48:47 +0200
commitfa58c2871a1235cb5ba475303a2bd11ae90416d5 (patch)
treed6bd11996e98cefcfc7caac5164f91511d51879e /gcc
parent3186ef0cb9e2d25e8455f9990e50187e3d1eee19 (diff)
downloadgcc-fa58c2871a1235cb5ba475303a2bd11ae90416d5.zip
gcc-fa58c2871a1235cb5ba475303a2bd11ae90416d5.tar.gz
gcc-fa58c2871a1235cb5ba475303a2bd11ae90416d5.tar.bz2
x86: further PR target/100711-like splitting
With respective two-operand bitwise operations now expressable by a single VPTERNLOG, add splitters to also deal with ior and xor counterparts of the original and-only case. Note that the splitters need to be separate, as the placement of "not" differs in the final insns (*iornot<mode>3, *xnor<mode>3) which are intended to pick up one half of the result. gcc/ PR target/100711 * config/i386/sse.md: New splitters to simplify not;vec_duplicate;{ior,xor} as vec_duplicate;{iornot,xnor}. gcc/testsuite/ PR target/100711 * gcc.target/i386/pr100711-4.c: New test. * gcc.target/i386/pr100711-5.c: New test.
Diffstat (limited to 'gcc')
-rw-r--r--gcc/config/i386/sse.md30
-rw-r--r--gcc/testsuite/gcc.target/i386/pr100711-4.c42
-rw-r--r--gcc/testsuite/gcc.target/i386/pr100711-5.c40
3 files changed, 112 insertions, 0 deletions
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index ab9fd67..d828143 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -17366,6 +17366,36 @@
(match_dup 2)))]
"operands[3] = gen_reg_rtx (<MODE>mode);")
+(define_split
+ [(set (match_operand:VI 0 "register_operand")
+ (ior:VI
+ (vec_duplicate:VI
+ (not:<ssescalarmode>
+ (match_operand:<ssescalarmode> 1 "nonimmediate_operand")))
+ (match_operand:VI 2 "vector_operand")))]
+ "<MODE_SIZE> == 64 || TARGET_AVX512VL
+ || (TARGET_AVX512F && !TARGET_PREFER_AVX256)"
+ [(set (match_dup 3)
+ (vec_duplicate:VI (match_dup 1)))
+ (set (match_dup 0)
+ (ior:VI (not:VI (match_dup 3)) (match_dup 2)))]
+ "operands[3] = gen_reg_rtx (<MODE>mode);")
+
+(define_split
+ [(set (match_operand:VI 0 "register_operand")
+ (xor:VI
+ (vec_duplicate:VI
+ (not:<ssescalarmode>
+ (match_operand:<ssescalarmode> 1 "nonimmediate_operand")))
+ (match_operand:VI 2 "vector_operand")))]
+ "<MODE_SIZE> == 64 || TARGET_AVX512VL
+ || (TARGET_AVX512F && !TARGET_PREFER_AVX256)"
+ [(set (match_dup 3)
+ (vec_duplicate:VI (match_dup 1)))
+ (set (match_dup 0)
+ (not:VI (xor:VI (match_dup 3) (match_dup 2))))]
+ "operands[3] = gen_reg_rtx (<MODE>mode);")
+
(define_insn "*andnot<mode>3_mask"
[(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
(vec_merge:VI48_AVX512VL
diff --git a/gcc/testsuite/gcc.target/i386/pr100711-4.c b/gcc/testsuite/gcc.target/i386/pr100711-4.c
new file mode 100644
index 0000000..3ca524f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr100711-4.c
@@ -0,0 +1,42 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512bw -mno-avx512vl -mprefer-vector-width=512 -O2" } */
+
+typedef char v64qi __attribute__ ((vector_size (64)));
+typedef short v32hi __attribute__ ((vector_size (64)));
+typedef int v16si __attribute__ ((vector_size (64)));
+typedef long long v8di __attribute__((vector_size (64)));
+
+v64qi foo_v64qi (char a, v64qi b)
+{
+ return (__extension__ (v64qi) {~a, ~a, ~a, ~a, ~a, ~a, ~a, ~a,
+ ~a, ~a, ~a, ~a, ~a, ~a, ~a, ~a,
+ ~a, ~a, ~a, ~a, ~a, ~a, ~a, ~a,
+ ~a, ~a, ~a, ~a, ~a, ~a, ~a, ~a,
+ ~a, ~a, ~a, ~a, ~a, ~a, ~a, ~a,
+ ~a, ~a, ~a, ~a, ~a, ~a, ~a, ~a,
+ ~a, ~a, ~a, ~a, ~a, ~a, ~a, ~a,
+ ~a, ~a, ~a, ~a, ~a, ~a, ~a, ~a}) | b;
+}
+
+v32hi foo_v32hi (short a, v32hi b)
+{
+ return (__extension__ (v32hi) {~a, ~a, ~a, ~a, ~a, ~a, ~a, ~a,
+ ~a, ~a, ~a, ~a, ~a, ~a, ~a, ~a,
+ ~a, ~a, ~a, ~a, ~a, ~a, ~a, ~a,
+ ~a, ~a, ~a, ~a, ~a, ~a, ~a, ~a}) | b;
+}
+
+v16si foo_v16si (int a, v16si b)
+{
+ return (__extension__ (v16si) {~a, ~a, ~a, ~a, ~a, ~a, ~a, ~a,
+ ~a, ~a, ~a, ~a, ~a, ~a, ~a, ~a}) | b;
+}
+
+v8di foo_v8di (long long a, v8di b)
+{
+ return (__extension__ (v8di) {~a, ~a, ~a, ~a, ~a, ~a, ~a, ~a}) | b;
+}
+
+/* { dg-final { scan-assembler-times "vpternlog\[dq\]\[ \\t\]+\\\$0xbb" 4 { target { ! ia32 } } } } */
+/* { dg-final { scan-assembler-times "vpternlog\[dq\]\[ \\t\]+\\\$0xbb" 2 { target { ia32 } } } } */
+/* { dg-final { scan-assembler-times "vpternlog\[dq\]\[ \\t\]+\\\$0xdd" 2 { target { ia32 } } } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr100711-5.c b/gcc/testsuite/gcc.target/i386/pr100711-5.c
new file mode 100644
index 0000000..161fbfc
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr100711-5.c
@@ -0,0 +1,40 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512bw -mno-avx512vl -mprefer-vector-width=512 -O2" } */
+
+typedef char v64qi __attribute__ ((vector_size (64)));
+typedef short v32hi __attribute__ ((vector_size (64)));
+typedef int v16si __attribute__ ((vector_size (64)));
+typedef long long v8di __attribute__((vector_size (64)));
+
+v64qi foo_v64qi (char a, v64qi b)
+{
+ return (__extension__ (v64qi) {~a, ~a, ~a, ~a, ~a, ~a, ~a, ~a,
+ ~a, ~a, ~a, ~a, ~a, ~a, ~a, ~a,
+ ~a, ~a, ~a, ~a, ~a, ~a, ~a, ~a,
+ ~a, ~a, ~a, ~a, ~a, ~a, ~a, ~a,
+ ~a, ~a, ~a, ~a, ~a, ~a, ~a, ~a,
+ ~a, ~a, ~a, ~a, ~a, ~a, ~a, ~a,
+ ~a, ~a, ~a, ~a, ~a, ~a, ~a, ~a,
+ ~a, ~a, ~a, ~a, ~a, ~a, ~a, ~a}) ^ b;
+}
+
+v32hi foo_v32hi (short a, v32hi b)
+{
+ return (__extension__ (v32hi) {~a, ~a, ~a, ~a, ~a, ~a, ~a, ~a,
+ ~a, ~a, ~a, ~a, ~a, ~a, ~a, ~a,
+ ~a, ~a, ~a, ~a, ~a, ~a, ~a, ~a,
+ ~a, ~a, ~a, ~a, ~a, ~a, ~a, ~a}) ^ b;
+}
+
+v16si foo_v16si (int a, v16si b)
+{
+ return (__extension__ (v16si) {~a, ~a, ~a, ~a, ~a, ~a, ~a, ~a,
+ ~a, ~a, ~a, ~a, ~a, ~a, ~a, ~a}) ^ b;
+}
+
+v8di foo_v8di (long long a, v8di b)
+{
+ return (__extension__ (v8di) {~a, ~a, ~a, ~a, ~a, ~a, ~a, ~a}) ^ b;
+}
+
+/* { dg-final { scan-assembler-times "vpternlog\[dq\]\[ \\t\]+\\\$0x99" 4 } } */