aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRichard Sandiford <richard.sandiford@arm.com>2023-11-27 13:38:16 +0000
committerRichard Sandiford <richard.sandiford@arm.com>2023-11-27 13:38:16 +0000
commit061a82fa2b751b42d0d8ddfcd45367c848d3ee64 (patch)
tree5dcbf2e93a40b610c34dade0e88cf7b09c79574e
parent5b33cf3a3a2025a4856f90fea8bd04884c2f6b31 (diff)
downloadgcc-061a82fa2b751b42d0d8ddfcd45367c848d3ee64.zip
gcc-061a82fa2b751b42d0d8ddfcd45367c848d3ee64.tar.gz
gcc-061a82fa2b751b42d0d8ddfcd45367c848d3ee64.tar.bz2
vect: Avoid duplicate_and_interleave for uniform vectors [PR112661]
can_duplicate_and_interleave_p checks whether we know a way of building a particular VLA SLP invariant. g:60034ecf25597bd515f skipped that test for booleans, to support MASK_LEN_GATHER_LOAD calls with a dummy all-ones mask. But there's nothing fundamentally different about VLA masks vs VLA data vectors. If we have a VLA mask that isn't all-ones, we need some way of loading it. This ultimately led to the ICE in the PR. This patch fixes it by applying can_duplicate_and_interleave_p to masks, while also adding a special path for uniform vectors (of all kinds) to support the MASK_LEN_GATHER_LOAD usage. This also fixes an XFAIL in pr36648.cc for SVE. The patch is mostly Richard's. My only changes were to skip redundant conversions and to use gimple_build_vector_from_val for all eligible vectors. 2023-11-27 Richard Biener <rguenther@suse.de> Richard Sandiford <richard.sandiford@arm.com> gcc/ PR tree-optimization/112661 * tree-vect-slp.cc (vect_get_and_check_slp_defs): Defer duplicate-and- interleave test to... (vect_build_slp_tree_2): ...here, once we have all the operands. Skip the test for uniform vectors. (vect_create_constant_vectors): Detect uniform vectors. Avoid redundant conversions in that case. Use gimple_build_vector_from_val to build the vector. gcc/testsuite/ * g++.dg/vect/pr36648.cc: Remove XFAIL for VLA load-lanes.
-rw-r--r--gcc/testsuite/g++.dg/vect/pr36648.cc2
-rw-r--r--gcc/tree-vect-slp.cc56
2 files changed, 40 insertions, 18 deletions
diff --git a/gcc/testsuite/g++.dg/vect/pr36648.cc b/gcc/testsuite/g++.dg/vect/pr36648.cc
index 8d24d3d..7bda828 100644
--- a/gcc/testsuite/g++.dg/vect/pr36648.cc
+++ b/gcc/testsuite/g++.dg/vect/pr36648.cc
@@ -25,6 +25,6 @@ int main() { }
targets, ! vect_no_align is a sufficient test. */
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { { { ! vect_no_align } && { ! powerpc*-*-* } } || { powerpc*-*-* && vect_hw_misalign } } } } } */
-/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target { { { ! vect_no_align } && { ! powerpc*-*-* } } || { powerpc*-*-* && vect_hw_misalign } } xfail { vect_variable_length && vect_load_lanes } } } } */
+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target { { { ! vect_no_align } && { ! powerpc*-*-* } } || { powerpc*-*-* && vect_hw_misalign } } } } } */
diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc
index 4a09b3c..6799b93 100644
--- a/gcc/tree-vect-slp.cc
+++ b/gcc/tree-vect-slp.cc
@@ -763,18 +763,6 @@ vect_get_and_check_slp_defs (vec_info *vinfo, unsigned char swap,
{
tree type = TREE_TYPE (oprnd);
dt = dts[i];
- if ((dt == vect_constant_def
- || dt == vect_external_def)
- && !GET_MODE_SIZE (vinfo->vector_mode).is_constant ()
- && TREE_CODE (type) != BOOLEAN_TYPE
- && !can_duplicate_and_interleave_p (vinfo, stmts.length (), type))
- {
- if (dump_enabled_p ())
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "Build SLP failed: invalid type of def "
- "for variable-length SLP %T\n", oprnd);
- return -1;
- }
/* For the swapping logic below force vect_reduction_def
for the reduction op in a SLP reduction group. */
@@ -2395,7 +2383,7 @@ out:
/* Create SLP_TREE nodes for the definition node/s. */
FOR_EACH_VEC_ELT (oprnds_info, i, oprnd_info)
{
- slp_tree child;
+ slp_tree child = nullptr;
unsigned int j;
/* We're skipping certain operands from processing, for example
@@ -2443,6 +2431,29 @@ out:
if (oprnd_info->first_dt == vect_external_def
|| oprnd_info->first_dt == vect_constant_def)
{
+ if (!GET_MODE_SIZE (vinfo->vector_mode).is_constant ())
+ {
+ tree op0;
+ tree uniform_val = op0 = oprnd_info->ops[0];
+ for (j = 1; j < oprnd_info->ops.length (); ++j)
+ if (!operand_equal_p (uniform_val, oprnd_info->ops[j]))
+ {
+ uniform_val = NULL_TREE;
+ break;
+ }
+ if (!uniform_val
+ && !can_duplicate_and_interleave_p (vinfo,
+ oprnd_info->ops.length (),
+ TREE_TYPE (op0)))
+ {
+ matches[j] = false;
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "Build SLP failed: invalid type of def "
+ "for variable-length SLP %T\n", op0);
+ goto fail;
+ }
+ }
slp_tree invnode = vect_create_new_slp_node (oprnd_info->ops);
SLP_TREE_DEF_TYPE (invnode) = oprnd_info->first_dt;
oprnd_info->ops = vNULL;
@@ -8157,6 +8168,7 @@ vect_create_constant_vectors (vec_info *vinfo, slp_tree op_node)
number_of_places_left_in_vector = nunits;
constant_p = true;
+ tree uniform_elt = NULL_TREE;
tree_vector_builder elts (vector_type, nunits, 1);
elts.quick_grow (nunits);
stmt_vec_info insert_after = NULL;
@@ -8166,8 +8178,14 @@ vect_create_constant_vectors (vec_info *vinfo, slp_tree op_node)
for (i = group_size - 1; op_node->ops.iterate (i, &op); i--)
{
/* Create 'vect_ = {op0,op1,...,opn}'. */
- number_of_places_left_in_vector--;
tree orig_op = op;
+ if (number_of_places_left_in_vector == nunits)
+ uniform_elt = op;
+ else if (uniform_elt && operand_equal_p (uniform_elt, op))
+ op = elts[number_of_places_left_in_vector];
+ else
+ uniform_elt = NULL_TREE;
+ number_of_places_left_in_vector--;
if (!types_compatible_p (TREE_TYPE (vector_type), TREE_TYPE (op)))
{
if (CONSTANT_CLASS_P (op))
@@ -8236,9 +8254,13 @@ vect_create_constant_vectors (vec_info *vinfo, slp_tree op_node)
if (number_of_places_left_in_vector == 0)
{
- if (constant_p
- ? multiple_p (TYPE_VECTOR_SUBPARTS (vector_type), nunits)
- : known_eq (TYPE_VECTOR_SUBPARTS (vector_type), nunits))
+ auto type_nunits = TYPE_VECTOR_SUBPARTS (vector_type);
+ if (uniform_elt)
+ vec_cst = gimple_build_vector_from_val (&ctor_seq, vector_type,
+ elts[0]);
+ else if (constant_p
+ ? multiple_p (type_nunits, nunits)
+ : known_eq (type_nunits, nunits))
vec_cst = gimple_build_vector (&ctor_seq, &elts);
else
{