diff options
author | Roger Sayle <roger@nextmovesoftware.com> | 2022-02-22 12:32:22 +0000 |
---|---|---|
committer | Roger Sayle <roger@nextmovesoftware.com> | 2022-02-22 12:32:22 +0000 |
commit | 2ef0e75d0bbc80bc06a0a39135e3564f907e39c8 (patch) | |
tree | 1ceacdf57c7c09f000456ec8aa17f0669a7a71b8 | |
parent | 2f59f067610f22c3f2ec9b1516e24b85836676ed (diff) | |
download | gcc-2ef0e75d0bbc80bc06a0a39135e3564f907e39c8.zip gcc-2ef0e75d0bbc80bc06a0a39135e3564f907e39c8.tar.gz gcc-2ef0e75d0bbc80bc06a0a39135e3564f907e39c8.tar.bz2 |
Implement constant-folding simplifications of reductions.
This patch addresses a code quality regression in GCC 12 by implementing
some constant folding/simplification transformations for REDUC_PLUS_EXPR
in match.pd. The motivating example is gcc.dg/vect/pr89440.c which with
-O2 -ffast-math (with vectorization now enabled) gets optimized to:
float f (float x)
{
vector(4) float vect_x_14.11;
vector(4) float _2;
float _32;
_2 = {x_9(D), 0.0, 0.0, 0.0};
vect_x_14.11_29 = _2 + { 1.0e+1, 2.6e+1, 4.2e+1, 5.8e+1 };
_32 = .REDUC_PLUS (vect_x_14.11_29); [tail call]
return _32;
}
With these proposed new transformations, we can simplify the
above code even further.
float f (float x)
{
float _32;
_32 = x_9(D) + 1.36e+2;
return _32;
}
[which happens to match what we'd produce with -fno-tree-vectorize,
and with GCC 11].
2022-02-22 Roger Sayle <roger@nextmovesoftware.com>
Richard Biener <rguenther@suse.de>
gcc/ChangeLog
* fold-const.cc (ctor_single_nonzero_element): New function to
return the single non-zero element of a (vector) constructor.
* fold-const.h (ctor_single_nonzero_element): Prototype here.
* match.pd (reduc (constructor@0)): Simplify reductions of a
constructor containing a single non-zero element.
(reduc (@0 op VECTOR_CST) -> (reduc @0) op CONST): Simplify
reductions of vector operations of the same operator with
constant vector operands.
gcc/testsuite/ChangeLog
* gcc.dg/fold-reduc-1.c: New test case.
-rw-r--r-- | gcc/fold-const.cc | 20 | ||||
-rw-r--r-- | gcc/fold-const.h | 1 | ||||
-rw-r--r-- | gcc/match.pd | 18 | ||||
-rw-r--r-- | gcc/testsuite/gcc.dg/fold-reduc-1.c | 19 |
4 files changed, 58 insertions, 0 deletions
diff --git a/gcc/fold-const.cc b/gcc/fold-const.cc index 7b21240..39a5a52 100644 --- a/gcc/fold-const.cc +++ b/gcc/fold-const.cc @@ -16792,6 +16792,26 @@ address_compare (tree_code code, tree type, tree op0, tree op1, return equal; } +/* Return the single non-zero element of a CONSTRUCTOR or NULL_TREE. */ +tree +ctor_single_nonzero_element (const_tree t) +{ + unsigned HOST_WIDE_INT idx; + constructor_elt *ce; + tree elt = NULL_TREE; + + if (TREE_CODE (t) != CONSTRUCTOR) + return NULL_TREE; + for (idx = 0; vec_safe_iterate (CONSTRUCTOR_ELTS (t), idx, &ce); idx++) + if (!integer_zerop (ce->value) && !real_zerop (ce->value)) + { + if (elt) + return NULL_TREE; + elt = ce->value; + } + return elt; +} + #if CHECKING_P namespace selftest { diff --git a/gcc/fold-const.h b/gcc/fold-const.h index 926c775..a4ff554 100644 --- a/gcc/fold-const.h +++ b/gcc/fold-const.h @@ -225,6 +225,7 @@ extern const char *c_getstr (tree); extern wide_int tree_nonzero_bits (const_tree); extern int address_compare (tree_code, tree, tree, tree, tree &, tree &, poly_int64 &, poly_int64 &, bool); +extern tree ctor_single_nonzero_element (const_tree); /* Return OFF converted to a pointer offset type suitable as offset for POINTER_PLUS_EXPR. Use location LOC for this conversion. */ diff --git a/gcc/match.pd b/gcc/match.pd index 8b6f22f..cad6184 100644 --- a/gcc/match.pd +++ b/gcc/match.pd @@ -7532,6 +7532,24 @@ and, (BIT_FIELD_REF:elt_type @0 { size; } { pos; }) { elt; }))))))) +/* Fold reduction of a single nonzero element constructor. */ +(for reduc (IFN_REDUC_PLUS IFN_REDUC_IOR IFN_REDUC_XOR) + (simplify (reduc (CONSTRUCTOR@0)) + (with { tree ctor = (TREE_CODE (@0) == SSA_NAME + ? gimple_assign_rhs1 (SSA_NAME_DEF_STMT (@0)) : @0); + tree elt = ctor_single_nonzero_element (ctor); } + (if (elt + && !HONOR_SNANS (type) + && !HONOR_SIGNED_ZEROS (type)) + { elt; })))) + +/* Fold REDUC (@0 op VECTOR_CST) as REDUC (@0) op REDUC (VECTOR_CST). */ +(for reduc (IFN_REDUC_PLUS IFN_REDUC_MAX IFN_REDUC_MIN IFN_REDUC_FMAX + IFN_REDUC_FMIN IFN_REDUC_AND IFN_REDUC_IOR IFN_REDUC_XOR) + op (plus max min IFN_FMAX IFN_FMIN bit_and bit_ior bit_xor) + (simplify (reduc (op @0 VECTOR_CST@1)) + (op (reduc:type @0) (reduc:type @1)))) + (simplify (vec_perm @0 @1 VECTOR_CST@2) (with diff --git a/gcc/testsuite/gcc.dg/fold-reduc-1.c b/gcc/testsuite/gcc.dg/fold-reduc-1.c new file mode 100644 index 0000000..c8360b0 --- /dev/null +++ b/gcc/testsuite/gcc.dg/fold-reduc-1.c @@ -0,0 +1,19 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ffast-math -fdump-tree-optimized" } */ +float foo (float x) +{ + int i; + float j; + float a = 0; + for (i = 0; i < 4; ++i) + { + for (j = 0; j < 4; ++j) + { + a += 1; + x += a; + } + } + return x; +} + +/* { dg-final { scan-tree-dump-not "REDUC_PLUS" "optimized"} } */ |