aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--gcc/testsuite/gcc.dg/vect/slp-perm-12.c1
-rw-r--r--gcc/testsuite/gcc.target/i386/pr110310.c13
-rw-r--r--gcc/tree-vect-loop-manip.cc104
-rw-r--r--gcc/tree-vect-loop.cc98
4 files changed, 102 insertions, 114 deletions
diff --git a/gcc/testsuite/gcc.dg/vect/slp-perm-12.c b/gcc/testsuite/gcc.dg/vect/slp-perm-12.c
index 113223a..635fca5 100644
--- a/gcc/testsuite/gcc.dg/vect/slp-perm-12.c
+++ b/gcc/testsuite/gcc.dg/vect/slp-perm-12.c
@@ -1,5 +1,6 @@
/* { dg-require-effective-target vect_int } */
/* { dg-require-effective-target vect_pack_trunc } */
+/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
/* { dg-additional-options "-msse4" { target { i?86-*-* x86_64-*-* } } } */
#include "tree-vect.h"
diff --git a/gcc/testsuite/gcc.target/i386/pr110310.c b/gcc/testsuite/gcc.target/i386/pr110310.c
new file mode 100644
index 0000000..dce388a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr110310.c
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -march=znver4 -fdump-tree-vect-optimized" } */
+
+void foo (int * __restrict a, int *b)
+{
+ for (int i = 0; i < 20; ++i)
+ a[i] = b[i] + 42;
+}
+
+/* We should vectorize the main loop with AVX512 and the epilog with SSE. */
+
+/* { dg-final { scan-tree-dump "optimized: loop vectorized using 64 byte vectors" "vect" } } */
+/* { dg-final { scan-tree-dump "optimized: loop vectorized using 16 byte vectors" "vect" } } */
diff --git a/gcc/tree-vect-loop-manip.cc b/gcc/tree-vect-loop-manip.cc
index 20f570e..6c452e0 100644
--- a/gcc/tree-vect-loop-manip.cc
+++ b/gcc/tree-vect-loop-manip.cc
@@ -2882,34 +2882,6 @@ slpeel_update_phi_nodes_for_lcssa (class loop *epilog)
rename_use_op (PHI_ARG_DEF_PTR_FROM_EDGE (gsi.phi (), e));
}
-/* EPILOGUE_VINFO is an epilogue loop that we now know would need to
- iterate exactly CONST_NITERS times. Make a final decision about
- whether the epilogue loop should be used, returning true if so. */
-
-static bool
-vect_update_epilogue_niters (loop_vec_info epilogue_vinfo,
- unsigned HOST_WIDE_INT const_niters)
-{
- /* Avoid wrap-around when computing const_niters - 1. Also reject
- using an epilogue loop for a single scalar iteration, even if
- we could in principle implement that using partial vectors. */
- unsigned int gap_niters = LOOP_VINFO_PEELING_FOR_GAPS (epilogue_vinfo);
- if (const_niters <= gap_niters + 1)
- return false;
-
- /* Install the number of iterations. */
- tree niters_type = TREE_TYPE (LOOP_VINFO_NITERS (epilogue_vinfo));
- tree niters_tree = build_int_cst (niters_type, const_niters);
- tree nitersm1_tree = build_int_cst (niters_type, const_niters - 1);
-
- LOOP_VINFO_NITERS (epilogue_vinfo) = niters_tree;
- LOOP_VINFO_NITERSM1 (epilogue_vinfo) = nitersm1_tree;
-
- /* Decide what to do if the number of epilogue iterations is not
- a multiple of the epilogue loop's vectorization factor. */
- return vect_determine_partial_vectors_and_peeling (epilogue_vinfo, true);
-}
-
/* LOOP_VINFO is an epilogue loop whose corresponding main loop can be skipped.
Return a value that equals:
@@ -3039,7 +3011,6 @@ vect_do_peeling (loop_vec_info loop_vinfo, tree niters, tree nitersm1,
int estimated_vf;
int prolog_peeling = 0;
bool vect_epilogues = loop_vinfo->epilogue_vinfos.length () > 0;
- bool vect_epilogues_updated_niters = false;
/* We currently do not support prolog peeling if the target alignment is not
known at compile time. 'vect_gen_prolog_loop_niters' depends on the
target alignment being constant. */
@@ -3167,36 +3138,6 @@ vect_do_peeling (loop_vec_info loop_vinfo, tree niters, tree nitersm1,
tree before_loop_niters = LOOP_VINFO_NITERS (loop_vinfo);
edge update_e = NULL, skip_e = NULL;
unsigned int lowest_vf = constant_lower_bound (vf);
- /* If we know the number of scalar iterations for the main loop we should
- check whether after the main loop there are enough iterations left over
- for the epilogue. */
- if (vect_epilogues
- && LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
- && prolog_peeling >= 0
- && known_eq (vf, lowest_vf))
- {
- unsigned HOST_WIDE_INT eiters
- = (LOOP_VINFO_INT_NITERS (loop_vinfo)
- - LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo));
-
- eiters -= prolog_peeling;
- eiters
- = eiters % lowest_vf + LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo);
-
- while (!vect_update_epilogue_niters (epilogue_vinfo, eiters))
- {
- delete epilogue_vinfo;
- epilogue_vinfo = NULL;
- if (loop_vinfo->epilogue_vinfos.length () == 0)
- {
- vect_epilogues = false;
- break;
- }
- epilogue_vinfo = loop_vinfo->epilogue_vinfos[0];
- loop_vinfo->epilogue_vinfos.ordered_remove (0);
- }
- vect_epilogues_updated_niters = true;
- }
/* Prolog loop may be skipped. */
bool skip_prolog = (prolog_peeling != 0);
/* Skip this loop to epilog when there are not enough iterations to enter this
@@ -3473,9 +3414,7 @@ vect_do_peeling (loop_vec_info loop_vinfo, tree niters, tree nitersm1,
skip_e edge. */
if (skip_vector)
{
- gcc_assert (update_e != NULL
- && skip_e != NULL
- && !vect_epilogues_updated_niters);
+ gcc_assert (update_e != NULL && skip_e != NULL);
gphi *new_phi = create_phi_node (make_ssa_name (TREE_TYPE (niters)),
update_e->dest);
tree new_ssa = make_ssa_name (TREE_TYPE (niters));
@@ -3506,28 +3445,25 @@ vect_do_peeling (loop_vec_info loop_vinfo, tree niters, tree nitersm1,
loop and its prologue. */
*advance = niters;
- if (!vect_epilogues_updated_niters)
- {
- /* Subtract the number of iterations performed by the vectorized loop
- from the number of total iterations. */
- tree epilogue_niters = fold_build2 (MINUS_EXPR, TREE_TYPE (niters),
- before_loop_niters,
- niters);
-
- LOOP_VINFO_NITERS (epilogue_vinfo) = epilogue_niters;
- LOOP_VINFO_NITERSM1 (epilogue_vinfo)
- = fold_build2 (MINUS_EXPR, TREE_TYPE (epilogue_niters),
- epilogue_niters,
- build_one_cst (TREE_TYPE (epilogue_niters)));
-
- /* Decide what to do if the number of epilogue iterations is not
- a multiple of the epilogue loop's vectorization factor.
- We should have rejected the loop during the analysis phase
- if this fails. */
- if (!vect_determine_partial_vectors_and_peeling (epilogue_vinfo,
- true))
- gcc_unreachable ();
- }
+ /* Subtract the number of iterations performed by the vectorized loop
+ from the number of total iterations. */
+ tree epilogue_niters = fold_build2 (MINUS_EXPR, TREE_TYPE (niters),
+ before_loop_niters,
+ niters);
+
+ LOOP_VINFO_NITERS (epilogue_vinfo) = epilogue_niters;
+ LOOP_VINFO_NITERSM1 (epilogue_vinfo)
+ = fold_build2 (MINUS_EXPR, TREE_TYPE (epilogue_niters),
+ epilogue_niters,
+ build_one_cst (TREE_TYPE (epilogue_niters)));
+
+ /* Decide what to do if the number of epilogue iterations is not
+ a multiple of the epilogue loop's vectorization factor.
+ We should have rejected the loop during the analysis phase
+ if this fails. */
+ bool res = vect_determine_partial_vectors_and_peeling (epilogue_vinfo,
+ true);
+ gcc_assert (res);
}
adjust_vec.release ();
diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc
index 0a03f56..f39a1ec 100644
--- a/gcc/tree-vect-loop.cc
+++ b/gcc/tree-vect-loop.cc
@@ -2144,14 +2144,76 @@ vect_analyze_loop_costing (loop_vec_info loop_vinfo,
/* Only loops that can handle partially-populated vectors can have iteration
counts less than the vectorization factor. */
- if (!LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo))
+ if (!LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo)
+ && vect_known_niters_smaller_than_vf (loop_vinfo))
{
- if (vect_known_niters_smaller_than_vf (loop_vinfo))
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "not vectorized: iteration count smaller than "
+ "vectorization factor.\n");
+ return 0;
+ }
+
+ /* If we know the number of iterations we can do better, for the
+ epilogue we can also decide whether the main loop leaves us
+ with enough iterations, prefering a smaller vector epilog then
+ also possibly used for the case we skip the vector loop. */
+ if (!LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo)
+ && LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
+ {
+ widest_int scalar_niters
+ = wi::to_widest (LOOP_VINFO_NITERSM1 (loop_vinfo)) + 1;
+ if (LOOP_VINFO_EPILOGUE_P (loop_vinfo))
+ {
+ loop_vec_info orig_loop_vinfo
+ = LOOP_VINFO_ORIG_LOOP_INFO (loop_vinfo);
+ unsigned lowest_vf
+ = constant_lower_bound (LOOP_VINFO_VECT_FACTOR (orig_loop_vinfo));
+ int prolog_peeling = 0;
+ if (!vect_use_loop_mask_for_alignment_p (loop_vinfo))
+ prolog_peeling = LOOP_VINFO_PEELING_FOR_ALIGNMENT (orig_loop_vinfo);
+ if (prolog_peeling >= 0
+ && known_eq (LOOP_VINFO_VECT_FACTOR (orig_loop_vinfo),
+ lowest_vf))
+ {
+ unsigned gap
+ = LOOP_VINFO_PEELING_FOR_GAPS (orig_loop_vinfo) ? 1 : 0;
+ scalar_niters = ((scalar_niters - gap - prolog_peeling)
+ % lowest_vf + gap);
+ if (scalar_niters == 0)
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "not vectorized: loop never entered\n");
+ return 0;
+ }
+ }
+ }
+
+ /* Check that the loop processes at least one full vector. */
+ poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
+ if (known_lt (scalar_niters, vf))
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "not vectorized: iteration count smaller than "
- "vectorization factor.\n");
+ "loop does not have enough iterations "
+ "to support vectorization.\n");
+ return 0;
+ }
+
+ /* If we need to peel an extra epilogue iteration to handle data
+ accesses with gaps, check that there are enough scalar iterations
+ available.
+
+ The check above is redundant with this one when peeling for gaps,
+ but the distinction is useful for diagnostics. */
+ if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)
+ && known_le (scalar_niters, vf))
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "loop does not have enough iterations "
+ "to support peeling for gaps.\n");
return 0;
}
}
@@ -2502,31 +2564,6 @@ vect_determine_partial_vectors_and_peeling (loop_vec_info loop_vinfo,
LOOP_VINFO_VECT_FACTOR (orig_loop_vinfo)));
}
- if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
- && !LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo))
- {
- /* Check that the loop processes at least one full vector. */
- poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
- tree scalar_niters = LOOP_VINFO_NITERS (loop_vinfo);
- if (known_lt (wi::to_widest (scalar_niters), vf))
- return opt_result::failure_at (vect_location,
- "loop does not have enough iterations"
- " to support vectorization.\n");
-
- /* If we need to peel an extra epilogue iteration to handle data
- accesses with gaps, check that there are enough scalar iterations
- available.
-
- The check above is redundant with this one when peeling for gaps,
- but the distinction is useful for diagnostics. */
- tree scalar_nitersm1 = LOOP_VINFO_NITERSM1 (loop_vinfo);
- if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)
- && known_lt (wi::to_widest (scalar_nitersm1), vf))
- return opt_result::failure_at (vect_location,
- "loop does not have enough iterations"
- " to support peeling for gaps.\n");
- }
-
LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo)
= (!LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo)
&& need_peeling_or_partial_vectors_p);
@@ -3002,7 +3039,8 @@ start_over:
assuming that the loop will be used as a main loop. We will redo
this analysis later if we instead decide to use the loop as an
epilogue loop. */
- ok = vect_determine_partial_vectors_and_peeling (loop_vinfo, false);
+ ok = vect_determine_partial_vectors_and_peeling
+ (loop_vinfo, LOOP_VINFO_EPILOGUE_P (loop_vinfo));
if (!ok)
return ok;