aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRichard Biener <rguenther@suse.de>2019-11-15 12:48:34 +0000
committerRichard Biener <rguenth@gcc.gnu.org>2019-11-15 12:48:34 +0000
commitd03431d0f59f5aa5c2a8e2e5ea94ea1660b240f8 (patch)
tree224553aa798f2c7b6d41788fe7abd395243b11eb
parent8eea62d8ab9c621b9d46926eb4c5380fe5606e4f (diff)
downloadgcc-d03431d0f59f5aa5c2a8e2e5ea94ea1660b240f8.zip
gcc-d03431d0f59f5aa5c2a8e2e5ea94ea1660b240f8.tar.gz
gcc-d03431d0f59f5aa5c2a8e2e5ea94ea1660b240f8.tar.bz2
re PR tree-optimization/92324 (ICE in expand_direct_optab_fn, at internal-fn.c:2890)
2019-11-15 Richard Biener <rguenther@suse.de> PR tree-optimization/92324 * tree-vect-loop.c (vect_create_epilog_for_reduction): Fix singedness of SLP reduction epilouge operations. Also reduce the vector width for SLP reductions before doing elementwise operations if possible. * gcc.dg/vect/pr92324-4.c: New testcase. From-SVN: r278289
-rw-r--r--gcc/ChangeLog8
-rw-r--r--gcc/testsuite/ChangeLog5
-rw-r--r--gcc/testsuite/gcc.dg/vect/pr92324-4.c30
-rw-r--r--gcc/tree-vect-loop.c68
4 files changed, 81 insertions, 30 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 2f93252..5eb21e7 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,11 @@
+2019-11-15 Richard Biener <rguenther@suse.de>
+
+ PR tree-optimization/92324
+ * tree-vect-loop.c (vect_create_epilog_for_reduction): Fix
+ singedness of SLP reduction epilouge operations. Also reduce
+ the vector width for SLP reductions before doing elementwise
+ operations if possible.
+
2019-11-15 Matthew Malcomson <matthew.malcomson@arm.com>
* passes.c (skip_pass): Set epilogue_completed if skipping the
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index 6b12383..4377c4c 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,8 @@
+2019-11-15 Richard Biener <rguenther@suse.de>
+
+ PR tree-optimization/92324
+ * gcc.dg/vect/pr92324-4.c: New testcase.
+
2019-11-15 Paul Thomas <pault@gcc.gnu.org>
PR fortran/69654
diff --git a/gcc/testsuite/gcc.dg/vect/pr92324-4.c b/gcc/testsuite/gcc.dg/vect/pr92324-4.c
new file mode 100644
index 0000000..8347985
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/pr92324-4.c
@@ -0,0 +1,30 @@
+#include "tree-vect.h"
+
+unsigned a[1024];
+int gres1, gres2;
+
+int __attribute__((noipa))
+foo (int n)
+{
+ int res1 = 0;
+ int res2 = 0;
+ for (int i = 0; i < n; ++i)
+ {
+ res1 = res1 > a[2*i] ? res1 : a[2*i];
+ res2 = res2 > a[2*i+1] ? res2 : a[2*i+1];
+ }
+ gres1 = res1;
+ gres2 = res2;
+}
+
+int main ()
+{
+ check_vect ();
+ a[30] = (unsigned)__INT_MAX__ + 1;
+ a[31] = (unsigned)__INT_MAX__ + 1;
+ foo (16);
+ if (gres1 != -__INT_MAX__ - 1
+ || gres2 != -__INT_MAX__ - 1)
+ __builtin_abort ();
+ return 0;
+}
diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c
index 567fcc7..389ef17 100644
--- a/gcc/tree-vect-loop.c
+++ b/gcc/tree-vect-loop.c
@@ -4930,6 +4930,8 @@ vect_create_epilog_for_reduction (stmt_vec_info stmt_info,
bool reduce_with_shift;
tree vec_temp;
+ gcc_assert (slp_reduc || new_phis.length () == 1);
+
/* See if the target wants to do the final (shift) reduction
in a vector mode of smaller size and first reduce upper/lower
halves against each other. */
@@ -4937,6 +4939,21 @@ vect_create_epilog_for_reduction (stmt_vec_info stmt_info,
tree stype = TREE_TYPE (vectype);
unsigned nunits = TYPE_VECTOR_SUBPARTS (vectype).to_constant ();
unsigned nunits1 = nunits;
+ if ((mode1 = targetm.vectorize.split_reduction (mode)) != mode
+ && new_phis.length () == 1)
+ {
+ nunits1 = GET_MODE_NUNITS (mode1).to_constant ();
+ /* For SLP reductions we have to make sure lanes match up, but
+ since we're doing individual element final reduction reducing
+ vector width here is even more important.
+ ??? We can also separate lanes with permutes, for the common
+ case of power-of-two group-size odd/even extracts would work. */
+ if (slp_reduc && nunits != nunits1)
+ {
+ nunits1 = least_common_multiple (nunits1, group_size);
+ gcc_assert (exact_log2 (nunits1) != -1 && nunits1 <= nunits);
+ }
+ }
if (!slp_reduc
&& (mode1 = targetm.vectorize.split_reduction (mode)) != mode)
nunits1 = GET_MODE_NUNITS (mode1).to_constant ();
@@ -4958,7 +4975,6 @@ vect_create_epilog_for_reduction (stmt_vec_info stmt_info,
new_temp = new_phi_result;
while (nunits > nunits1)
{
- gcc_assert (!slp_reduc);
nunits /= 2;
vectype1 = get_related_vectype_for_scalar_type (TYPE_MODE (vectype),
stype, nunits);
@@ -5113,6 +5129,8 @@ vect_create_epilog_for_reduction (stmt_vec_info stmt_info,
int vec_size_in_bits = tree_to_uhwi (TYPE_SIZE (vectype1));
int element_bitsize = tree_to_uhwi (bitsize);
+ tree compute_type = TREE_TYPE (vectype);
+ gimple_seq stmts = NULL;
FOR_EACH_VEC_ELT (new_phis, i, new_phi)
{
int bit_offset;
@@ -5120,12 +5138,8 @@ vect_create_epilog_for_reduction (stmt_vec_info stmt_info,
vec_temp = PHI_RESULT (new_phi);
else
vec_temp = gimple_assign_lhs (new_phi);
- tree rhs = build3 (BIT_FIELD_REF, scalar_type, vec_temp, bitsize,
- bitsize_zero_node);
- epilog_stmt = gimple_build_assign (new_scalar_dest, rhs);
- new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
- gimple_assign_set_lhs (epilog_stmt, new_temp);
- gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
+ new_temp = gimple_build (&stmts, BIT_FIELD_REF, compute_type,
+ vec_temp, bitsize, bitsize_zero_node);
/* In SLP we don't need to apply reduction operation, so we just
collect s' values in SCALAR_RESULTS. */
@@ -5137,14 +5151,9 @@ vect_create_epilog_for_reduction (stmt_vec_info stmt_info,
bit_offset += element_bitsize)
{
tree bitpos = bitsize_int (bit_offset);
- tree rhs = build3 (BIT_FIELD_REF, scalar_type, vec_temp,
- bitsize, bitpos);
-
- epilog_stmt = gimple_build_assign (new_scalar_dest, rhs);
- new_name = make_ssa_name (new_scalar_dest, epilog_stmt);
- gimple_assign_set_lhs (epilog_stmt, new_name);
- gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
-
+ new_name = gimple_build (&stmts, BIT_FIELD_REF,
+ compute_type, vec_temp,
+ bitsize, bitpos);
if (slp_reduc)
{
/* In SLP we don't need to apply reduction operation, so
@@ -5153,13 +5162,8 @@ vect_create_epilog_for_reduction (stmt_vec_info stmt_info,
scalar_results.safe_push (new_name);
}
else
- {
- epilog_stmt = gimple_build_assign (new_scalar_dest, code,
- new_name, new_temp);
- new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
- gimple_assign_set_lhs (epilog_stmt, new_temp);
- gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
- }
+ new_temp = gimple_build (&stmts, code, compute_type,
+ new_name, new_temp);
}
}
@@ -5170,24 +5174,28 @@ vect_create_epilog_for_reduction (stmt_vec_info stmt_info,
if (slp_reduc)
{
tree res, first_res, new_res;
- gimple *new_stmt;
/* Reduce multiple scalar results in case of SLP unrolling. */
for (j = group_size; scalar_results.iterate (j, &res);
j++)
{
first_res = scalar_results[j % group_size];
- new_stmt = gimple_build_assign (new_scalar_dest, code,
- first_res, res);
- new_res = make_ssa_name (new_scalar_dest, new_stmt);
- gimple_assign_set_lhs (new_stmt, new_res);
- gsi_insert_before (&exit_gsi, new_stmt, GSI_SAME_STMT);
+ new_res = gimple_build (&stmts, code, compute_type,
+ first_res, res);
scalar_results[j % group_size] = new_res;
}
+ for (k = 0; k < group_size; k++)
+ scalar_results[k] = gimple_convert (&stmts, scalar_type,
+ scalar_results[k]);
}
else
- /* Not SLP - we have one scalar to keep in SCALAR_RESULTS. */
- scalar_results.safe_push (new_temp);
+ {
+ /* Not SLP - we have one scalar to keep in SCALAR_RESULTS. */
+ new_temp = gimple_convert (&stmts, scalar_type, new_temp);
+ scalar_results.safe_push (new_temp);
+ }
+
+ gsi_insert_seq_before (&exit_gsi, stmts, GSI_SAME_STMT);
}
if ((STMT_VINFO_REDUC_TYPE (reduc_info) == INTEGER_INDUC_COND_REDUCTION)