aboutsummaryrefslogtreecommitdiff
path: root/gcc
diff options
context:
space:
mode:
authorKyrylo Tkachov <kyrylo.tkachov@arm.com>2021-01-07 14:02:02 +0000
committerKyrylo Tkachov <kyrylo.tkachov@arm.com>2021-01-08 10:29:25 +0000
commitc9d25aa7489fd478098e0ef098438e797d597d3b (patch)
tree1528daf7cbf593bba5f6952c23e0fad2511495aa /gcc
parent21c1a30fc73105af50c5e717cb99dc3becabf8fa (diff)
downloadgcc-c9d25aa7489fd478098e0ef098438e797d597d3b.zip
gcc-c9d25aa7489fd478098e0ef098438e797d597d3b.tar.gz
gcc-c9d25aa7489fd478098e0ef098438e797d597d3b.tar.bz2
aarch64: Fix RTL patterns for UABA/SABA
Sometime ago we changed the RTL representation of the (SU)ABD instructions in RTL to a (MINUS (MAX) (MIN)) rather than a (MINUS (ABS) (ABS)) as it is more correctly models the semantics. We should do the same for the accumulation forms of these instructions: UABA/SABA. This patch does that and allows the new pattern to generate the unsigned UABA form as well. The new form also allows it to more easily be re-used to implement the relevant arm_neon.h intrinsics in the future. The testcase takes an -fno-tree-reassoc to work around a side-effect of PR98581. gcc/ * config/aarch64/aarch64-simd.md (aba<mode>_3): Rename to... (aarch64_<su>aba<mode>): ... This. Handle uaba as well. Change RTL pattern to match. gcc/testsuite/ * gcc.target/aarch64/usaba_1.c: New test.
Diffstat (limited to 'gcc')
-rw-r--r--gcc/config/aarch64/aarch64-simd.md18
-rw-r--r--gcc/testsuite/gcc.target/aarch64/usaba_1.c29
2 files changed, 40 insertions, 7 deletions
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
index 85770c8..d23398e 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -859,14 +859,18 @@
}
)
-(define_insn "aba<mode>_3"
+(define_insn "aarch64_<su>aba<mode>"
[(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
- (plus:VDQ_BHSI (abs:VDQ_BHSI (minus:VDQ_BHSI
- (match_operand:VDQ_BHSI 1 "register_operand" "w")
- (match_operand:VDQ_BHSI 2 "register_operand" "w")))
- (match_operand:VDQ_BHSI 3 "register_operand" "0")))]
- "TARGET_SIMD"
- "saba\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
+ (plus:VDQ_BHSI (minus:VDQ_BHSI
+ (USMAX:VDQ_BHSI
+ (match_operand:VDQ_BHSI 2 "register_operand" "w")
+ (match_operand:VDQ_BHSI 3 "register_operand" "w"))
+ (<max_opp>:VDQ_BHSI
+ (match_dup 2)
+ (match_dup 3)))
+ (match_operand:VDQ_BHSI 1 "register_operand" "0")))]
+ "TARGET_SIMD"
+ "<su>aba\t%0.<Vtype>, %2.<Vtype>, %3.<Vtype>"
[(set_attr "type" "neon_arith_acc<q>")]
)
diff --git a/gcc/testsuite/gcc.target/aarch64/usaba_1.c b/gcc/testsuite/gcc.target/aarch64/usaba_1.c
new file mode 100644
index 0000000..58b5beb
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/usaba_1.c
@@ -0,0 +1,29 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -fno-tree-reassoc" } */
+
+#pragma GCC target "+nosve"
+
+#define MIN(a, b) ((a) < (b) ? (a) : (b))
+#define MAX(a, b) ((a) > (b) ? (a) : (b))
+
+#define FUNC(T, N, S) \
+void saba_##S (T * __restrict__ a, T * __restrict__ b, T * __restrict__ c) \
+{ \
+ int i; \
+ for (i = 0; i < N; i++) \
+ c[i] += (MAX (a[i], b[i]) - MIN (a[i], b[i])); \
+}
+
+FUNC (signed char, 16, qi)
+/* { dg-final { scan-assembler-times {saba\tv[0-9]+\.16b, v[0-9]+\.16b, v[0-9]+\.16b} 1 } } */
+FUNC (short, 8, hi)
+/* { dg-final { scan-assembler-times {saba\tv[0-9]+\.8h, v[0-9]+\.8h, v[0-9]+\.8h} 1 } } */
+FUNC (int, 4, si)
+/* { dg-final { scan-assembler-times {saba\tv[0-9]+\.4s, v[0-9]+\.4s, v[0-9]+\.4s} 1 } } */
+FUNC (unsigned char, 16, uqi)
+/* { dg-final { scan-assembler-times {uaba\tv[0-9]+\.16b, v[0-9]+\.16b, v[0-9]+.16b} 1 } } */
+FUNC (unsigned short, 8, uhi)
+/* { dg-final { scan-assembler-times {uaba\tv[0-9]+\.8h, v[0-9]+\.8h, v[0-9]+\.8h} 1 } } */
+FUNC (unsigned int, 4, usi)
+/* { dg-final { scan-assembler-times {uaba\tv[0-9]+\.4s, v[0-9]+\.4s, v[0-9]+\.4s} 1 } } */
+