aboutsummaryrefslogtreecommitdiff
path: root/gcc
diff options
context:
space:
mode:
Diffstat (limited to 'gcc')
-rw-r--r--gcc/testsuite/gcc.dg/vect/bb-slp-pr65935.c16
-rw-r--r--gcc/testsuite/gcc.dg/vect/slp-46.c2
-rw-r--r--gcc/tree-vect-slp.cc51
-rw-r--r--gcc/tree-vect-stmts.cc128
4 files changed, 127 insertions, 70 deletions
diff --git a/gcc/testsuite/gcc.dg/vect/bb-slp-pr65935.c b/gcc/testsuite/gcc.dg/vect/bb-slp-pr65935.c
index ee12136..8cefa7f 100644
--- a/gcc/testsuite/gcc.dg/vect/bb-slp-pr65935.c
+++ b/gcc/testsuite/gcc.dg/vect/bb-slp-pr65935.c
@@ -24,11 +24,17 @@ void rephase (void)
struct site *s;
for(i=0,s=lattice;i<sites_on_node;i++,s++)
for(dir=0;dir<32;dir++)
- for(j=0;j<3;j++)for(k=0;k<3;k++)
- {
- s->link[dir].e[j][k].real *= s->phase[dir];
- s->link[dir].e[j][k].imag *= s->phase[dir];
- }
+ {
+ for(j=0;j<3;j++)
+ for(k=0;k<3;k++)
+ {
+ s->link[dir].e[j][k].real *= s->phase[dir];
+ s->link[dir].e[j][k].imag *= s->phase[dir];
+ }
+ /* Avoid loop vectorizing the outer loop after unrolling
+ the inners. */
+ __asm__ volatile ("" : : : "memory");
+ }
}
int main()
diff --git a/gcc/testsuite/gcc.dg/vect/slp-46.c b/gcc/testsuite/gcc.dg/vect/slp-46.c
index 18476a4..79ed0bb 100644
--- a/gcc/testsuite/gcc.dg/vect/slp-46.c
+++ b/gcc/testsuite/gcc.dg/vect/slp-46.c
@@ -94,4 +94,4 @@ main ()
return 0;
}
-/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" { xfail vect_load_lanes } } } */
+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 4 "vect" { xfail vect_load_lanes } } } */
diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc
index fee992d..8cb1ac1 100644
--- a/gcc/tree-vect-slp.cc
+++ b/gcc/tree-vect-slp.cc
@@ -1286,15 +1286,19 @@ vect_build_slp_tree_1 (vec_info *vinfo, unsigned char *swap,
{
if (load_p
&& rhs_code != CFN_GATHER_LOAD
- && rhs_code != CFN_MASK_GATHER_LOAD)
+ && rhs_code != CFN_MASK_GATHER_LOAD
+ /* Not grouped loads are handled as externals for BB
+ vectorization. For loop vectorization we can handle
+ splats the same we handle single element interleaving. */
+ && (is_a <bb_vec_info> (vinfo)
+ || stmt_info != first_stmt_info))
{
/* Not grouped load. */
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
"Build SLP failed: not grouped load %G", stmt);
- /* FORNOW: Not grouped loads are not supported. */
- if (is_a <bb_vec_info> (vinfo) && i != 0)
+ if (i != 0)
continue;
/* Fatal mismatch. */
matches[0] = false;
@@ -1302,7 +1306,8 @@ vect_build_slp_tree_1 (vec_info *vinfo, unsigned char *swap,
}
/* Not memory operation. */
- if (!phi_p
+ if (!load_p
+ && !phi_p
&& rhs_code.is_tree_code ()
&& TREE_CODE_CLASS (tree_code (rhs_code)) != tcc_binary
&& TREE_CODE_CLASS (tree_code (rhs_code)) != tcc_unary
@@ -1774,7 +1779,7 @@ vect_build_slp_tree_2 (vec_info *vinfo, slp_tree node,
return NULL;
/* If the SLP node is a load, terminate the recursion unless masked. */
- if (STMT_VINFO_GROUPED_ACCESS (stmt_info)
+ if (STMT_VINFO_DATA_REF (stmt_info)
&& DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info)))
{
if (gcall *stmt = dyn_cast <gcall *> (stmt_info->stmt))
@@ -1798,8 +1803,12 @@ vect_build_slp_tree_2 (vec_info *vinfo, slp_tree node,
= DR_GROUP_FIRST_ELEMENT (SLP_TREE_SCALAR_STMTS (node)[0]);
FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), j, load_info)
{
- int load_place = vect_get_place_in_interleaving_chain
- (load_info, first_stmt_info);
+ int load_place;
+ if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
+ load_place = vect_get_place_in_interleaving_chain
+ (load_info, first_stmt_info);
+ else
+ load_place = 0;
gcc_assert (load_place != -1);
load_permutation.safe_push (load_place);
}
@@ -5439,6 +5448,16 @@ vect_optimize_slp_pass::remove_redundant_permutations ()
this_load_permuted = true;
break;
}
+ /* When this isn't a grouped access we know it's single element
+ and contiguous. */
+ if (!STMT_VINFO_GROUPED_ACCESS (SLP_TREE_SCALAR_STMTS (node)[0]))
+ {
+ if (!this_load_permuted
+ && (known_eq (LOOP_VINFO_VECT_FACTOR (loop_vinfo), 1U)
+ || SLP_TREE_LANES (node) == 1))
+ SLP_TREE_LOAD_PERMUTATION (node).release ();
+ continue;
+ }
stmt_vec_info first_stmt_info
= DR_GROUP_FIRST_ELEMENT (SLP_TREE_SCALAR_STMTS (node)[0]);
if (!this_load_permuted
@@ -8129,12 +8148,16 @@ vect_transform_slp_perm_load_1 (vec_info *vinfo, slp_tree node,
tree vectype = SLP_TREE_VECTYPE (node);
unsigned int group_size = SLP_TREE_SCALAR_STMTS (node).length ();
unsigned int mask_element;
+ unsigned dr_group_size;
machine_mode mode;
if (!STMT_VINFO_GROUPED_ACCESS (stmt_info))
- return false;
-
- stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
+ dr_group_size = 1;
+ else
+ {
+ stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
+ dr_group_size = DR_GROUP_SIZE (stmt_info);
+ }
mode = TYPE_MODE (vectype);
poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
@@ -8175,7 +8198,7 @@ vect_transform_slp_perm_load_1 (vec_info *vinfo, slp_tree node,
unsigned int nelts_to_build;
unsigned int nvectors_per_build;
unsigned int in_nlanes;
- bool repeating_p = (group_size == DR_GROUP_SIZE (stmt_info)
+ bool repeating_p = (group_size == dr_group_size
&& multiple_p (nunits, group_size));
if (repeating_p)
{
@@ -8188,7 +8211,7 @@ vect_transform_slp_perm_load_1 (vec_info *vinfo, slp_tree node,
it at least one to ensure the later computation for n_perms
proceed. */
nvectors_per_build = nstmts > 0 ? nstmts : 1;
- in_nlanes = DR_GROUP_SIZE (stmt_info) * 3;
+ in_nlanes = dr_group_size * 3;
}
else
{
@@ -8200,7 +8223,7 @@ vect_transform_slp_perm_load_1 (vec_info *vinfo, slp_tree node,
mask.new_vector (const_nunits, const_nunits, 1);
nelts_to_build = const_vf * group_size;
nvectors_per_build = 1;
- in_nlanes = const_vf * DR_GROUP_SIZE (stmt_info);
+ in_nlanes = const_vf * dr_group_size;
}
auto_sbitmap used_in_lanes (in_nlanes);
bitmap_clear (used_in_lanes);
@@ -8214,7 +8237,7 @@ vect_transform_slp_perm_load_1 (vec_info *vinfo, slp_tree node,
{
unsigned int iter_num = j / group_size;
unsigned int stmt_num = j % group_size;
- unsigned int i = (iter_num * DR_GROUP_SIZE (stmt_info) + perm[stmt_num]);
+ unsigned int i = (iter_num * dr_group_size + perm[stmt_num]);
bitmap_set_bit (used_in_lanes, i);
if (repeating_p)
{
diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc
index b31971e..d642d3c 100644
--- a/gcc/tree-vect-stmts.cc
+++ b/gcc/tree-vect-stmts.cc
@@ -1150,6 +1150,8 @@ vect_model_load_cost (vec_info *vinfo,
/* If the load is permuted then the alignment is determined by
the first group element not by the first scalar stmt DR. */
stmt_vec_info first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
+ if (!first_stmt_info)
+ first_stmt_info = stmt_info;
/* Record the cost for the permutation. */
unsigned n_perms, n_loads;
vect_transform_slp_perm_load (vinfo, slp_node, vNULL, NULL,
@@ -2203,12 +2205,24 @@ get_group_load_store_type (vec_info *vinfo, stmt_vec_info stmt_info,
{
loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
class loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
- stmt_vec_info first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
+ stmt_vec_info first_stmt_info;
+ unsigned int group_size;
+ unsigned HOST_WIDE_INT gap;
+ if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
+ {
+ first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
+ group_size = DR_GROUP_SIZE (first_stmt_info);
+ gap = DR_GROUP_GAP (first_stmt_info);
+ }
+ else
+ {
+ first_stmt_info = stmt_info;
+ group_size = 1;
+ gap = 0;
+ }
dr_vec_info *first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
- unsigned int group_size = DR_GROUP_SIZE (first_stmt_info);
bool single_element_p = (stmt_info == first_stmt_info
&& !DR_GROUP_NEXT_ELEMENT (stmt_info));
- unsigned HOST_WIDE_INT gap = DR_GROUP_GAP (first_stmt_info);
poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
/* True if the vectorized statements would access beyond the last
@@ -2311,11 +2325,16 @@ get_group_load_store_type (vec_info *vinfo, stmt_vec_info stmt_info,
*memory_access_type = VMAT_ELEMENTWISE;
}
}
- else
+ else if (cmp == 0 && loop_vinfo)
{
- gcc_assert (!loop_vinfo || cmp > 0);
- *memory_access_type = VMAT_CONTIGUOUS;
+ gcc_assert (vls_type == VLS_LOAD);
+ *memory_access_type = VMAT_INVARIANT;
+ /* Invariant accesses perform only component accesses, alignment
+ is irrelevant for them. */
+ *alignment_support_scheme = dr_unaligned_supported;
}
+ else
+ *memory_access_type = VMAT_CONTIGUOUS;
/* When we have a contiguous access across loop iterations
but the access in the loop doesn't cover the full vector
@@ -2540,7 +2559,7 @@ get_load_store_type (vec_info *vinfo, stmt_vec_info stmt_info,
is irrelevant for them. */
*alignment_support_scheme = dr_unaligned_supported;
}
- else if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
+ else if (STMT_VINFO_GROUPED_ACCESS (stmt_info) || slp_node)
{
if (!get_group_load_store_type (vinfo, stmt_info, vectype, slp_node,
masked_p,
@@ -9464,46 +9483,6 @@ vectorizable_load (vec_info *vinfo,
return false;
}
- if (slp && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
- {
- slp_perm = true;
-
- if (!loop_vinfo)
- {
- /* In BB vectorization we may not actually use a loaded vector
- accessing elements in excess of DR_GROUP_SIZE. */
- stmt_vec_info group_info = SLP_TREE_SCALAR_STMTS (slp_node)[0];
- group_info = DR_GROUP_FIRST_ELEMENT (group_info);
- unsigned HOST_WIDE_INT nunits;
- unsigned j, k, maxk = 0;
- FOR_EACH_VEC_ELT (SLP_TREE_LOAD_PERMUTATION (slp_node), j, k)
- if (k > maxk)
- maxk = k;
- tree vectype = SLP_TREE_VECTYPE (slp_node);
- if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits)
- || maxk >= (DR_GROUP_SIZE (group_info) & ~(nunits - 1)))
- {
- if (dump_enabled_p ())
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "BB vectorization with gaps at the end of "
- "a load is not supported\n");
- return false;
- }
- }
-
- auto_vec<tree> tem;
- unsigned n_perms;
- if (!vect_transform_slp_perm_load (vinfo, slp_node, tem, NULL, vf,
- true, &n_perms))
- {
- if (dump_enabled_p ())
- dump_printf_loc (MSG_MISSED_OPTIMIZATION,
- vect_location,
- "unsupported load permutation\n");
- return false;
- }
- }
-
/* Invalidate assumptions made by dependence analysis when vectorization
on the unrolled body effectively re-orders stmts. */
if (!PURE_SLP_STMT (stmt_info)
@@ -9521,6 +9500,46 @@ vectorizable_load (vec_info *vinfo,
else
group_size = 1;
+ if (slp && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
+ {
+ slp_perm = true;
+
+ if (!loop_vinfo)
+ {
+ /* In BB vectorization we may not actually use a loaded vector
+ accessing elements in excess of DR_GROUP_SIZE. */
+ stmt_vec_info group_info = SLP_TREE_SCALAR_STMTS (slp_node)[0];
+ group_info = DR_GROUP_FIRST_ELEMENT (group_info);
+ unsigned HOST_WIDE_INT nunits;
+ unsigned j, k, maxk = 0;
+ FOR_EACH_VEC_ELT (SLP_TREE_LOAD_PERMUTATION (slp_node), j, k)
+ if (k > maxk)
+ maxk = k;
+ tree vectype = SLP_TREE_VECTYPE (slp_node);
+ if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits)
+ || maxk >= (DR_GROUP_SIZE (group_info) & ~(nunits - 1)))
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "BB vectorization with gaps at the end of "
+ "a load is not supported\n");
+ return false;
+ }
+ }
+
+ auto_vec<tree> tem;
+ unsigned n_perms;
+ if (!vect_transform_slp_perm_load (vinfo, slp_node, tem, NULL, vf,
+ true, &n_perms))
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION,
+ vect_location,
+ "unsupported load permutation\n");
+ return false;
+ }
+ }
+
vect_memory_access_type memory_access_type;
enum dr_alignment_support alignment_support_scheme;
int misalignment;
@@ -9898,10 +9917,19 @@ vectorizable_load (vec_info *vinfo,
|| (!slp && memory_access_type == VMAT_CONTIGUOUS))
grouped_load = false;
- if (grouped_load)
+ if (grouped_load
+ || (slp && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ()))
{
- first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
- group_size = DR_GROUP_SIZE (first_stmt_info);
+ if (grouped_load)
+ {
+ first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
+ group_size = DR_GROUP_SIZE (first_stmt_info);
+ }
+ else
+ {
+ first_stmt_info = stmt_info;
+ group_size = 1;
+ }
/* For SLP vectorization we directly vectorize a subchain
without permutation. */
if (slp && ! SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())