aboutsummaryrefslogtreecommitdiff
path: root/gcc
diff options
context:
space:
mode:
authorRichard Biener <rguenther@suse.de>2024-12-02 11:07:46 +0100
committerRichard Biener <rguenth@gcc.gnu.org>2024-12-02 14:04:35 +0100
commit5ab3f091b3eb42795340d3c9cea8aaec2060693c (patch)
treeb2a3c3aaaadad02f228425810a67ff8cfb7e8a87 /gcc
parente36eae19f3a4cc9e5efa9ebfa31e081c7ee52fdc (diff)
downloadgcc-5ab3f091b3eb42795340d3c9cea8aaec2060693c.zip
gcc-5ab3f091b3eb42795340d3c9cea8aaec2060693c.tar.gz
gcc-5ab3f091b3eb42795340d3c9cea8aaec2060693c.tar.bz2
tree-optimization/116352 - SLP scheduling and stmt order
The PR uncovers unchecked constraints on the ability to code-generate with SLP but also latent issues with regard to stmt order checking since loop (early-break) and BB (for quite some time) vectorization are no longer constraint to single-BBs. In particular get_later_stmt simply compares UIDs of stmts, but that's only reliable when they are in the same BB. For the PR in question the problematical case is demoting a SLP node to external which fails to check we can actually code generate this in the way we do (using get_later_stmt). The following thus adds checking that we demote to external only when all defs are from the same BB. We no longer vectorize gcc.dg/vect/bb-slp-49.c but the testcase was for a wrong-code issue and the vectorization done is a no-op. PR tree-optimization/116352 PR tree-optimization/117876 * tree-vect-slp.cc (vect_slp_can_convert_to_external): New. (vect_slp_convert_to_external): Call it. (vect_build_slp_tree_2): Likewise. * gcc.dg/vect/pr116352.c: New testcase. * gcc.dg/vect/bb-slp-49.c: Remove vectorization check.
Diffstat (limited to 'gcc')
-rw-r--r--gcc/testsuite/gcc.dg/vect/bb-slp-49.c3
-rw-r--r--gcc/testsuite/gcc.dg/vect/pr116352.c34
-rw-r--r--gcc/tree-vect-slp.cc29
3 files changed, 58 insertions, 8 deletions
diff --git a/gcc/testsuite/gcc.dg/vect/bb-slp-49.c b/gcc/testsuite/gcc.dg/vect/bb-slp-49.c
index e7101fc..c0ad5d7 100644
--- a/gcc/testsuite/gcc.dg/vect/bb-slp-49.c
+++ b/gcc/testsuite/gcc.dg/vect/bb-slp-49.c
@@ -23,6 +23,5 @@ main ()
return 0;
}
-/* See that we vectorize an SLP instance. */
+/* See that we try to vectorize an SLP instance. */
/* { dg-final { scan-tree-dump "Analyzing vectorizable constructor" "slp1" } } */
-/* { dg-final { scan-tree-dump "vectorizing stmts using SLP" "slp1" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/pr116352.c b/gcc/testsuite/gcc.dg/vect/pr116352.c
new file mode 100644
index 0000000..3fe537c
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/pr116352.c
@@ -0,0 +1,34 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-O3" } */
+
+static void addPrior(float center_x, float center_y, float width, float height,
+ bool normalized, float *dst)
+{
+ if (normalized)
+ {
+ dst[0] = (center_x - width * 0.5f);
+ dst[1] = (center_y - height * 0.5f);
+ dst[2] = (center_x + width * 0.5f);
+ dst[3] = (center_y + height * 0.5f);
+ }
+ else
+ {
+ dst[0] = center_x - width * 0.5f;
+ dst[1] = center_y - height * 0.5f;
+ dst[2] = center_x + width * 0.5f - 1.0f;
+ dst[3] = center_y + height * 0.5f - 1.0f;
+ }
+}
+void forward(float *outputPtr, int _offsetsXs, float *_offsetsX,
+ float *_offsetsY, float _stepX, float _stepY,
+ bool _bboxesNormalized, float _boxWidth, float _boxHeight)
+{
+ for (int j = 0; j < _offsetsXs; ++j)
+ {
+ float center_x = (_offsetsX[j]) * _stepX;
+ float center_y = (_offsetsY[j]) * _stepY;
+ addPrior(center_x, center_y, _boxWidth, _boxHeight, _bboxesNormalized,
+ outputPtr);
+ outputPtr += 4;
+ }
+}
diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc
index ec986cc..1799d5a 100644
--- a/gcc/tree-vect-slp.cc
+++ b/gcc/tree-vect-slp.cc
@@ -67,6 +67,7 @@ static int vectorizable_slp_permutation_1 (vec_info *, gimple_stmt_iterator *,
static bool vectorizable_slp_permutation (vec_info *, gimple_stmt_iterator *,
slp_tree, stmt_vector_for_cost *);
static void vect_print_slp_tree (dump_flags_t, dump_location_t, slp_tree);
+static bool vect_slp_can_convert_to_external (const vec<stmt_vec_info> &);
static object_allocator<_slp_tree> *slp_tree_pool;
static slp_tree slp_first_node;
@@ -2887,7 +2888,8 @@ fail:
for (j = 0; j < group_size; ++j)
if (!matches[j])
break;
- if (!known_ge (j, TYPE_VECTOR_SUBPARTS (vectype)))
+ if (!known_ge (j, TYPE_VECTOR_SUBPARTS (vectype))
+ && vect_slp_can_convert_to_external (oprnd_info->def_stmts))
{
if (dump_enabled_p ())
dump_printf_loc (MSG_NOTE, vect_location,
@@ -7764,6 +7766,24 @@ vect_slp_analyze_node_operations_1 (vec_info *vinfo, slp_tree node,
node, node_instance, cost_vec);
}
+/* Verify if we can externalize a set of internal defs. */
+
+static bool
+vect_slp_can_convert_to_external (const vec<stmt_vec_info> &stmts)
+{
+ basic_block bb = NULL;
+ for (stmt_vec_info stmt : stmts)
+ if (!stmt)
+ return false;
+ /* Constant generation uses get_later_stmt which can only handle
+ defs from the same BB. */
+ else if (!bb)
+ bb = gimple_bb (stmt->stmt);
+ else if (gimple_bb (stmt->stmt) != bb)
+ return false;
+ return true;
+}
+
/* Try to build NODE from scalars, returning true on success.
NODE_INSTANCE is the SLP instance that contains NODE. */
@@ -7779,13 +7799,10 @@ vect_slp_convert_to_external (vec_info *vinfo, slp_tree node,
|| !SLP_TREE_SCALAR_STMTS (node).exists ()
|| vect_contains_pattern_stmt_p (SLP_TREE_SCALAR_STMTS (node))
/* Force the mask use to be built from scalars instead. */
- || VECTOR_BOOLEAN_TYPE_P (SLP_TREE_VECTYPE (node)))
+ || VECTOR_BOOLEAN_TYPE_P (SLP_TREE_VECTYPE (node))
+ || !vect_slp_can_convert_to_external (SLP_TREE_SCALAR_STMTS (node)))
return false;
- FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt_info)
- if (!stmt_info)
- return false;
-
if (dump_enabled_p ())
dump_printf_loc (MSG_NOTE, vect_location,
"Building vector operands of %p from scalars instead\n",