aboutsummaryrefslogtreecommitdiff
path: root/gcc
diff options
context:
space:
mode:
authorRoger Sayle <roger@nextmovesoftware.com>2022-02-22 12:32:22 +0000
committerRoger Sayle <roger@nextmovesoftware.com>2022-02-22 12:32:22 +0000
commit2ef0e75d0bbc80bc06a0a39135e3564f907e39c8 (patch)
tree1ceacdf57c7c09f000456ec8aa17f0669a7a71b8 /gcc
parent2f59f067610f22c3f2ec9b1516e24b85836676ed (diff)
downloadgcc-2ef0e75d0bbc80bc06a0a39135e3564f907e39c8.zip
gcc-2ef0e75d0bbc80bc06a0a39135e3564f907e39c8.tar.gz
gcc-2ef0e75d0bbc80bc06a0a39135e3564f907e39c8.tar.bz2
Implement constant-folding simplifications of reductions.
This patch addresses a code quality regression in GCC 12 by implementing some constant folding/simplification transformations for REDUC_PLUS_EXPR in match.pd. The motivating example is gcc.dg/vect/pr89440.c which with -O2 -ffast-math (with vectorization now enabled) gets optimized to: float f (float x) { vector(4) float vect_x_14.11; vector(4) float _2; float _32; _2 = {x_9(D), 0.0, 0.0, 0.0}; vect_x_14.11_29 = _2 + { 1.0e+1, 2.6e+1, 4.2e+1, 5.8e+1 }; _32 = .REDUC_PLUS (vect_x_14.11_29); [tail call] return _32; } With these proposed new transformations, we can simplify the above code even further. float f (float x) { float _32; _32 = x_9(D) + 1.36e+2; return _32; } [which happens to match what we'd produce with -fno-tree-vectorize, and with GCC 11]. 2022-02-22 Roger Sayle <roger@nextmovesoftware.com> Richard Biener <rguenther@suse.de> gcc/ChangeLog * fold-const.cc (ctor_single_nonzero_element): New function to return the single non-zero element of a (vector) constructor. * fold-const.h (ctor_single_nonzero_element): Prototype here. * match.pd (reduc (constructor@0)): Simplify reductions of a constructor containing a single non-zero element. (reduc (@0 op VECTOR_CST) -> (reduc @0) op CONST): Simplify reductions of vector operations of the same operator with constant vector operands. gcc/testsuite/ChangeLog * gcc.dg/fold-reduc-1.c: New test case.
Diffstat (limited to 'gcc')
-rw-r--r--gcc/fold-const.cc20
-rw-r--r--gcc/fold-const.h1
-rw-r--r--gcc/match.pd18
-rw-r--r--gcc/testsuite/gcc.dg/fold-reduc-1.c19
4 files changed, 58 insertions, 0 deletions
diff --git a/gcc/fold-const.cc b/gcc/fold-const.cc
index 7b21240..39a5a52 100644
--- a/gcc/fold-const.cc
+++ b/gcc/fold-const.cc
@@ -16792,6 +16792,26 @@ address_compare (tree_code code, tree type, tree op0, tree op1,
return equal;
}
+/* Return the single non-zero element of a CONSTRUCTOR or NULL_TREE. */
+tree
+ctor_single_nonzero_element (const_tree t)
+{
+ unsigned HOST_WIDE_INT idx;
+ constructor_elt *ce;
+ tree elt = NULL_TREE;
+
+ if (TREE_CODE (t) != CONSTRUCTOR)
+ return NULL_TREE;
+ for (idx = 0; vec_safe_iterate (CONSTRUCTOR_ELTS (t), idx, &ce); idx++)
+ if (!integer_zerop (ce->value) && !real_zerop (ce->value))
+ {
+ if (elt)
+ return NULL_TREE;
+ elt = ce->value;
+ }
+ return elt;
+}
+
#if CHECKING_P
namespace selftest {
diff --git a/gcc/fold-const.h b/gcc/fold-const.h
index 926c775..a4ff554 100644
--- a/gcc/fold-const.h
+++ b/gcc/fold-const.h
@@ -225,6 +225,7 @@ extern const char *c_getstr (tree);
extern wide_int tree_nonzero_bits (const_tree);
extern int address_compare (tree_code, tree, tree, tree, tree &, tree &,
poly_int64 &, poly_int64 &, bool);
+extern tree ctor_single_nonzero_element (const_tree);
/* Return OFF converted to a pointer offset type suitable as offset for
POINTER_PLUS_EXPR. Use location LOC for this conversion. */
diff --git a/gcc/match.pd b/gcc/match.pd
index 8b6f22f..cad6184 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -7532,6 +7532,24 @@ and,
(BIT_FIELD_REF:elt_type @0 { size; } { pos; })
{ elt; })))))))
+/* Fold reduction of a single nonzero element constructor. */
+(for reduc (IFN_REDUC_PLUS IFN_REDUC_IOR IFN_REDUC_XOR)
+ (simplify (reduc (CONSTRUCTOR@0))
+ (with { tree ctor = (TREE_CODE (@0) == SSA_NAME
+ ? gimple_assign_rhs1 (SSA_NAME_DEF_STMT (@0)) : @0);
+ tree elt = ctor_single_nonzero_element (ctor); }
+ (if (elt
+ && !HONOR_SNANS (type)
+ && !HONOR_SIGNED_ZEROS (type))
+ { elt; }))))
+
+/* Fold REDUC (@0 op VECTOR_CST) as REDUC (@0) op REDUC (VECTOR_CST). */
+(for reduc (IFN_REDUC_PLUS IFN_REDUC_MAX IFN_REDUC_MIN IFN_REDUC_FMAX
+ IFN_REDUC_FMIN IFN_REDUC_AND IFN_REDUC_IOR IFN_REDUC_XOR)
+ op (plus max min IFN_FMAX IFN_FMIN bit_and bit_ior bit_xor)
+ (simplify (reduc (op @0 VECTOR_CST@1))
+ (op (reduc:type @0) (reduc:type @1))))
+
(simplify
(vec_perm @0 @1 VECTOR_CST@2)
(with
diff --git a/gcc/testsuite/gcc.dg/fold-reduc-1.c b/gcc/testsuite/gcc.dg/fold-reduc-1.c
new file mode 100644
index 0000000..c8360b0
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/fold-reduc-1.c
@@ -0,0 +1,19 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -ffast-math -fdump-tree-optimized" } */
+float foo (float x)
+{
+ int i;
+ float j;
+ float a = 0;
+ for (i = 0; i < 4; ++i)
+ {
+ for (j = 0; j < 4; ++j)
+ {
+ a += 1;
+ x += a;
+ }
+ }
+ return x;
+}
+
+/* { dg-final { scan-tree-dump-not "REDUC_PLUS" "optimized"} } */