aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRichard Biener <rguenther@suse.de>2015-06-10 10:39:31 +0000
committerRichard Biener <rguenth@gcc.gnu.org>2015-06-10 10:39:31 +0000
commitb266b96856da702dbe957396adc6f242daed233b (patch)
treece0f05e25c08e6f3afd2ed0a00dc9443a954ff57
parent8ffd51d230e0ab7a1c1caaf1486303796862f771 (diff)
downloadgcc-b266b96856da702dbe957396adc6f242daed233b.zip
gcc-b266b96856da702dbe957396adc6f242daed233b.tar.gz
gcc-b266b96856da702dbe957396adc6f242daed233b.tar.bz2
tree-vect-slp.c (vect_attempt_slp_rearrange_stmts): Split out from ...
2015-06-10 Richard Biener <rguenther@suse.de> * tree-vect-slp.c (vect_attempt_slp_rearrange_stmts): Split out from ... (vect_supported_load_permutation_p): ... here. Handle supportable permutations in reductions. * tree-vect-stmts.c (vectorizable_load): Handle SLP permutations for vectorizing strided group loads. From-SVN: r224324
-rw-r--r--gcc/ChangeLog9
-rw-r--r--gcc/tree-vect-slp.c115
-rw-r--r--gcc/tree-vect-stmts.c17
3 files changed, 86 insertions, 55 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 4486b4e..c3854fa 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,12 @@
+2015-06-10 Richard Biener <rguenther@suse.de>
+
+ * tree-vect-slp.c (vect_attempt_slp_rearrange_stmts): Split
+ out from ...
+ (vect_supported_load_permutation_p): ... here. Handle
+ supportable permutations in reductions.
+ * tree-vect-stmts.c (vectorizable_load): Handle SLP permutations
+ for vectorizing strided group loads.
+
2015-06-10 Jakub Jelinek <jakub@redhat.com>
PR target/66470
diff --git a/gcc/tree-vect-slp.c b/gcc/tree-vect-slp.c
index 20e4aba..880b245 100644
--- a/gcc/tree-vect-slp.c
+++ b/gcc/tree-vect-slp.c
@@ -1299,6 +1299,67 @@ vect_slp_rearrange_stmts (slp_tree node, unsigned int group_size,
}
+/* Attempt to reorder stmts in a reduction chain so that we don't
+ require any load permutation. Return true if that was possible,
+ otherwise return false. */
+
+static bool
+vect_attempt_slp_rearrange_stmts (slp_instance slp_instn)
+{
+ unsigned int group_size = SLP_INSTANCE_GROUP_SIZE (slp_instn);
+ unsigned int i, j;
+ sbitmap load_index;
+ unsigned int lidx;
+ slp_tree node, load;
+
+ /* Compare all the permutation sequences to the first one. We know
+ that at least one load is permuted. */
+ node = SLP_INSTANCE_LOADS (slp_instn)[0];
+ if (!node->load_permutation.exists ())
+ return false;
+ for (i = 1; SLP_INSTANCE_LOADS (slp_instn).iterate (i, &load); ++i)
+ {
+ if (!load->load_permutation.exists ())
+ return false;
+ FOR_EACH_VEC_ELT (load->load_permutation, j, lidx)
+ if (lidx != node->load_permutation[j])
+ return false;
+ }
+
+ /* Check that the loads in the first sequence are different and there
+ are no gaps between them. */
+ load_index = sbitmap_alloc (group_size);
+ bitmap_clear (load_index);
+ FOR_EACH_VEC_ELT (node->load_permutation, i, lidx)
+ {
+ if (bitmap_bit_p (load_index, lidx))
+ {
+ sbitmap_free (load_index);
+ return false;
+ }
+ bitmap_set_bit (load_index, lidx);
+ }
+ for (i = 0; i < group_size; i++)
+ if (!bitmap_bit_p (load_index, i))
+ {
+ sbitmap_free (load_index);
+ return false;
+ }
+ sbitmap_free (load_index);
+
+ /* This permutation is valid for reduction. Since the order of the
+ statements in the nodes is not important unless they are memory
+ accesses, we can rearrange the statements in all the nodes
+ according to the order of the loads. */
+ vect_slp_rearrange_stmts (SLP_INSTANCE_TREE (slp_instn), group_size,
+ node->load_permutation);
+
+ /* We are done, no actual permutations need to be generated. */
+ FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
+ SLP_TREE_LOAD_PERMUTATION (node).release ();
+ return true;
+}
+
/* Check if the required load permutations in the SLP instance
SLP_INSTN are supported. */
@@ -1307,7 +1368,6 @@ vect_supported_load_permutation_p (slp_instance slp_instn)
{
unsigned int group_size = SLP_INSTANCE_GROUP_SIZE (slp_instn);
unsigned int i, j, k, next;
- sbitmap load_index;
slp_tree node;
gimple stmt, load, next_load, first_load;
struct data_reference *dr;
@@ -1342,59 +1402,14 @@ vect_supported_load_permutation_p (slp_instance slp_instn)
stmt = SLP_TREE_SCALAR_STMTS (node)[0];
/* Reduction (there are no data-refs in the root).
- In reduction chain the order of the loads is important. */
+ In reduction chain the order of the loads is not important. */
if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt))
&& !GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
{
- slp_tree load;
- unsigned int lidx;
+ if (vect_attempt_slp_rearrange_stmts (slp_instn))
+ return true;
- /* Compare all the permutation sequences to the first one. We know
- that at least one load is permuted. */
- node = SLP_INSTANCE_LOADS (slp_instn)[0];
- if (!node->load_permutation.exists ())
- return false;
- for (i = 1; SLP_INSTANCE_LOADS (slp_instn).iterate (i, &load); ++i)
- {
- if (!load->load_permutation.exists ())
- return false;
- FOR_EACH_VEC_ELT (load->load_permutation, j, lidx)
- if (lidx != node->load_permutation[j])
- return false;
- }
-
- /* Check that the loads in the first sequence are different and there
- are no gaps between them. */
- load_index = sbitmap_alloc (group_size);
- bitmap_clear (load_index);
- FOR_EACH_VEC_ELT (node->load_permutation, i, lidx)
- {
- if (bitmap_bit_p (load_index, lidx))
- {
- sbitmap_free (load_index);
- return false;
- }
- bitmap_set_bit (load_index, lidx);
- }
- for (i = 0; i < group_size; i++)
- if (!bitmap_bit_p (load_index, i))
- {
- sbitmap_free (load_index);
- return false;
- }
- sbitmap_free (load_index);
-
- /* This permutation is valid for reduction. Since the order of the
- statements in the nodes is not important unless they are memory
- accesses, we can rearrange the statements in all the nodes
- according to the order of the loads. */
- vect_slp_rearrange_stmts (SLP_INSTANCE_TREE (slp_instn), group_size,
- node->load_permutation);
-
- /* We are done, no actual permutations need to be generated. */
- FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
- SLP_TREE_LOAD_PERMUTATION (node).release ();
- return true;
+ /* Fallthru to general load permutation handling. */
}
/* In basic block vectorization we allow any subchain of an interleaving
diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c
index 0698061..2f77e84 100644
--- a/gcc/tree-vect-stmts.c
+++ b/gcc/tree-vect-stmts.c
@@ -5995,9 +5995,7 @@ vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
if ((grouped_load
&& (slp || PURE_SLP_STMT (stmt_info)))
&& (group_size > nunits
- || nunits % group_size != 0
- /* We don't support load permutations. */
- || slp_perm))
+ || nunits % group_size != 0))
{
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
"unhandled strided group load\n");
@@ -6294,6 +6292,7 @@ vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
alias_off = build_int_cst (reference_alias_ptr_type (DR_REF (dr)), 0);
int nloads = nunits;
tree ltype = TREE_TYPE (vectype);
+ auto_vec<tree> dr_chain;
if (slp)
{
nloads = nunits / group_size;
@@ -6303,7 +6302,8 @@ vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
ltype = vectype;
ltype = build_aligned_type (ltype, TYPE_ALIGN (TREE_TYPE (vectype)));
ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
- gcc_assert (!slp_perm);
+ if (slp_perm)
+ dr_chain.create (ncopies);
}
for (j = 0; j < ncopies; j++)
{
@@ -6350,13 +6350,20 @@ vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
}
if (slp)
- SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
+ {
+ SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
+ if (slp_perm)
+ dr_chain.quick_push (gimple_assign_lhs (new_stmt));
+ }
if (j == 0)
STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
else
STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
prev_stmt_info = vinfo_for_stmt (new_stmt);
}
+ if (slp_perm)
+ vect_transform_slp_perm_load (slp_node, dr_chain, gsi, vf,
+ slp_node_instance, false);
return true;
}