sve: optimize add reduction patterns

The following loop does a conditional reduction using an add: #include <stdint.h> int32_t f (int32_t *restrict array, int len, int min) { int32_t iSum = 0; for (int i=0; i<len; i++) { if (array[i] >= min) iSum += array[i]; } return iSum; } for this we currently generate: mov z1.b, #0 mov z2.s, w2 mov z3.d, z1.d ptrue p2.b, all ld1w z0.s, p0/z, [x0, x3, lsl 2] cmpge p1.s, p2/z, z0.s, z2.s add x3, x3, x4 sel z0.s, p1, z0.s, z3.s add z1.s, p0/m, z1.s, z0.s whilelo p0.s, w3, w1 where the SEL is unneeded as it's selecting between 0 or a value. This can be optimized to just doing the conditional add on p1 instead of p0. After this patch we generate: mov z2.s, w2 mov z0.b, #0 ptrue p1.b, all ld1w z1.s, p0/z, [x0, x3, lsl 2] cmpge p0.s, p0/z, z1.s, z2.s add x3, x3, x4 add z0.s, p0/m, z0.s, z1.s whilelo p0.s, w3, w1 and so we drop the SEL and the 0 move. gcc/ChangeLog: * match.pd: New rule. gcc/testsuite/ChangeLog: * gcc.target/aarch64/sve/pred-cond-reduc.c: New test.
author: Tamar Christina <tamar.christina@arm.com> 2021-10-14 15:07:14 +0100
committer: Tamar Christina <tamar.christina@arm.com> 2021-10-14 15:07:14 +0100
commit: 62b505a4d5fc8916867e25ed86dfb865fca81511 (patch)
tree: 73d53f204ac4b558d67a262b640182238904692d /gcc
parent: fecd145359fc981beb2802f746190227c5cc010a (diff)
download: gcc-62b505a4d5fc8916867e25ed86dfb865fca81511.zip
gcc-62b505a4d5fc8916867e25ed86dfb865fca81511.tar.gz
gcc-62b505a4d5fc8916867e25ed86dfb865fca81511.tar.bz2
2 files changed, 30 insertions, 0 deletions
diff --git a/gcc/match.pd b/gcc/match.pd
index c153e9a..038a798 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -7156,6 +7156,18 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
         && element_precision (type) == element_precision (op_type))
     (view_convert (cond_op @2 @3 @4 @5 (view_convert:op_type @1)))))))
 
+/* Detect simplication for a conditional reduction where
+
+   a = mask1 ? b : 0
+   c = mask2 ? d + a : d
+
+   is turned into
+
+   c = mask1 && mask2 ? d + b : d.  */
+(simplify
+  (IFN_COND_ADD @0 @1 (vec_cond @2 @3 integer_zerop) @1)
+   (IFN_COND_ADD (bit_and @0 @2) @1 @3 @1))
+
 /* For pointers @0 and @2 and nonnegative constant offset @1, look for
    expressions like:
 
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pred-cond-reduc.c b/gcc/testsuite/gcc.target/aarch64/sve/pred-cond-reduc.c
new file mode 100644
index 0000000..bd53025
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/pred-cond-reduc.c
@@ -0,0 +1,18 @@
+/* { dg-do assemble { target aarch64_asm_sve_ok } } */
+/* { dg-options "-O3 --save-temps" } */
+
+#include <stdint.h>
+
+int32_t f (int32_t *restrict array, int len, int min)
+{
+  int32_t iSum = 0;
+
+  for (int i=0; i<len; i++) {
+    if (array[i] >= min)
+       iSum += array[i];
+  }
+  return iSum;
+}
+
+
+/* { dg-final { scan-assembler-not {\tsel\tz[0-9]+\.s, p1, z[0-9]+\.s, z[0-9]+\.s} } } */
author	Tamar Christina <tamar.christina@arm.com>	2021-10-14 15:07:14 +0100
committer	Tamar Christina <tamar.christina@arm.com>	2021-10-14 15:07:14 +0100
commit	62b505a4d5fc8916867e25ed86dfb865fca81511 (patch)
tree	73d53f204ac4b558d67a262b640182238904692d /gcc
parent	fecd145359fc981beb2802f746190227c5cc010a (diff)
download	gcc-62b505a4d5fc8916867e25ed86dfb865fca81511.zip gcc-62b505a4d5fc8916867e25ed86dfb865fca81511.tar.gz gcc-62b505a4d5fc8916867e25ed86dfb865fca81511.tar.bz2