aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRichard Biener <rguenther@suse.de>2023-06-26 12:51:37 +0200
committerRichard Biener <rguenther@suse.de>2024-06-04 10:07:30 +0200
commit8f6d889a8e609710ecfd555778fbff602b2c7d74 (patch)
treef365ea00d815a85c875d13fc59ddc1abb05c11ca
parentdb0f236aa1c30f703ff564960bd9f3dbd747ea7b (diff)
downloadgcc-8f6d889a8e609710ecfd555778fbff602b2c7d74.zip
gcc-8f6d889a8e609710ecfd555778fbff602b2c7d74.tar.gz
gcc-8f6d889a8e609710ecfd555778fbff602b2c7d74.tar.bz2
tree-optimization/110381 - preserve SLP permutation with in-order reductions
The following fixes a bug that manifests itself during fold-left reduction transform in picking not the last scalar def to replace and thus double-counting some elements. But the underlying issue is that we merge a load permutation into the in-order reduction which is of course wrong. Now, reduction analysis has not yet been performend when optimizing permutations so we have to resort to check that ourselves. PR tree-optimization/110381 * tree-vect-slp.cc (vect_optimize_slp_pass::start_choosing_layouts): Materialize permutes before fold-left reductions. * gcc.dg/vect/pr110381.c: New testcase. (cherry picked from commit 53d6f57c1b20c6da52aefce737fb7d5263686ba3)
-rw-r--r--gcc/testsuite/gcc.dg/vect/pr110381.c44
-rw-r--r--gcc/tree-vect-slp.cc19
2 files changed, 60 insertions, 3 deletions
diff --git a/gcc/testsuite/gcc.dg/vect/pr110381.c b/gcc/testsuite/gcc.dg/vect/pr110381.c
new file mode 100644
index 0000000..278f442
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/pr110381.c
@@ -0,0 +1,44 @@
+/* { dg-require-effective-target vect_float_strict } */
+
+#include "tree-vect.h"
+
+struct FOO {
+ double a;
+ double b;
+ double c;
+};
+
+double __attribute__((noipa))
+sum_8_foos(const struct FOO* foos)
+{
+ double sum = 0;
+
+ for (int i = 0; i < 8; ++i)
+ {
+ struct FOO foo = foos[i];
+
+ /* Need to use an in-order reduction here, preserving
+ the load permutation. */
+ sum += foo.a;
+ sum += foo.c;
+ sum += foo.b;
+ }
+
+ return sum;
+}
+
+int main()
+{
+ struct FOO foos[8];
+
+ check_vect ();
+
+ __builtin_memset (foos, 0, sizeof (foos));
+ foos[0].a = __DBL_MAX__;
+ foos[0].b = 5;
+ foos[0].c = -__DBL_MAX__;
+
+ if (sum_8_foos (foos) != 5)
+ __builtin_abort ();
+ return 0;
+}
diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc
index 54e6a9e..19cab93 100644
--- a/gcc/tree-vect-slp.cc
+++ b/gcc/tree-vect-slp.cc
@@ -3733,9 +3733,8 @@ vect_optimize_slp (vec_info *vinfo)
vertices[idx].perm_out = perms.length () - 1;
}
- /* In addition to the above we have to mark outgoing permutes facing
- non-reduction graph entries that are not represented as to be
- materialized. */
+ /* We have to mark outgoing permutations facing non-associating-reduction
+ graph entries that are not represented as to be materialized. */
for (slp_instance instance : vinfo->slp_instances)
if (SLP_INSTANCE_KIND (instance) == slp_inst_kind_ctor)
{
@@ -3744,6 +3743,20 @@ vect_optimize_slp (vec_info *vinfo)
vertices[SLP_INSTANCE_TREE (instance)->vertex].perm_in = 0;
vertices[SLP_INSTANCE_TREE (instance)->vertex].perm_out = 0;
}
+ else if (SLP_INSTANCE_KIND (instance) == slp_inst_kind_reduc_chain)
+ {
+ stmt_vec_info stmt_info
+ = SLP_TREE_REPRESENTATIVE (SLP_INSTANCE_TREE (instance));
+ stmt_vec_info reduc_info = info_for_reduction (vinfo, stmt_info);
+ if (needs_fold_left_reduction_p (TREE_TYPE
+ (gimple_get_lhs (stmt_info->stmt)),
+ STMT_VINFO_REDUC_CODE (reduc_info)))
+ {
+ unsigned int node_i = SLP_INSTANCE_TREE (instance)->vertex;
+ vertices[node_i].perm_in = 0;
+ vertices[node_i].perm_out = 0;
+ }
+ }
/* Propagate permutes along the graph and compute materialization points. */
bool changed;