aboutsummaryrefslogtreecommitdiff
path: root/gcc/tree-vect-loop.c
diff options
context:
space:
mode:
authorRichard Sandiford <richard.sandiford@linaro.org>2018-01-03 07:15:20 +0000
committerRichard Sandiford <rsandifo@gcc.gnu.org>2018-01-03 07:15:20 +0000
commit86e3672871beff63eebb195642566224c9f80891 (patch)
tree2904209c48ac70db2e36ec1f0c1f7534c4b09f0a /gcc/tree-vect-loop.c
parent87133c45a06aa9c04cb6bc13b3b0733ec43efcec (diff)
downloadgcc-86e3672871beff63eebb195642566224c9f80891.zip
gcc-86e3672871beff63eebb195642566224c9f80891.tar.gz
gcc-86e3672871beff63eebb195642566224c9f80891.tar.bz2
poly_int: current_vector_size and TARGET_AUTOVECTORIZE_VECTOR_SIZES
This patch changes the type of current_vector_size to poly_uint64. It also changes TARGET_AUTOVECTORIZE_VECTOR_SIZES so that it fills in a vector of possible sizes (as poly_uint64s) instead of returning a bitmask. The documentation claimed that the hook didn't need to include the default vector size (returned by preferred_simd_mode), but that wasn't consistent with the omp-low.c usage. 2018-01-03 Richard Sandiford <richard.sandiford@linaro.org> Alan Hayward <alan.hayward@arm.com> David Sherwood <david.sherwood@arm.com> gcc/ * target.h (vector_sizes, auto_vector_sizes): New typedefs. * target.def (autovectorize_vector_sizes): Return the vector sizes by pointer, using vector_sizes rather than a bitmask. * targhooks.h (default_autovectorize_vector_sizes): Update accordingly. * targhooks.c (default_autovectorize_vector_sizes): Likewise. * config/aarch64/aarch64.c (aarch64_autovectorize_vector_sizes): Likewise. * config/arc/arc.c (arc_autovectorize_vector_sizes): Likewise. * config/arm/arm.c (arm_autovectorize_vector_sizes): Likewise. * config/i386/i386.c (ix86_autovectorize_vector_sizes): Likewise. * config/mips/mips.c (mips_autovectorize_vector_sizes): Likewise. * omp-general.c (omp_max_vf): Likewise. * omp-low.c (omp_clause_aligned_alignment): Likewise. * optabs-query.c (can_vec_mask_load_store_p): Likewise. * tree-vect-loop.c (vect_analyze_loop): Likewise. * tree-vect-slp.c (vect_slp_bb): Likewise. * doc/tm.texi: Regenerate. * tree-vectorizer.h (current_vector_size): Change from an unsigned int to a poly_uint64. * tree-vect-stmts.c (get_vectype_for_scalar_type_and_size): Take the vector size as a poly_uint64 rather than an unsigned int. (current_vector_size): Change from an unsigned int to a poly_uint64. (get_vectype_for_scalar_type): Update accordingly. * tree.h (build_truth_vector_type): Take the size and number of units as a poly_uint64 rather than an unsigned int. (build_vector_type): Add a temporary overload that takes the number of units as a poly_uint64 rather than an unsigned int. * tree.c (make_vector_type): Likewise. (build_truth_vector_type): Take the number of units as a poly_uint64 rather than an unsigned int. Co-Authored-By: Alan Hayward <alan.hayward@arm.com> Co-Authored-By: David Sherwood <david.sherwood@arm.com> From-SVN: r256131
Diffstat (limited to 'gcc/tree-vect-loop.c')
-rw-r--r--gcc/tree-vect-loop.c89
1 files changed, 56 insertions, 33 deletions
diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c
index c58a08d..557522c 100644
--- a/gcc/tree-vect-loop.c
+++ b/gcc/tree-vect-loop.c
@@ -2330,11 +2330,12 @@ loop_vec_info
vect_analyze_loop (struct loop *loop, loop_vec_info orig_loop_vinfo)
{
loop_vec_info loop_vinfo;
- unsigned int vector_sizes;
+ auto_vector_sizes vector_sizes;
/* Autodetect first vector size we try. */
current_vector_size = 0;
- vector_sizes = targetm.vectorize.autovectorize_vector_sizes ();
+ targetm.vectorize.autovectorize_vector_sizes (&vector_sizes);
+ unsigned int next_size = 0;
if (dump_enabled_p ())
dump_printf_loc (MSG_NOTE, vect_location,
@@ -2350,6 +2351,7 @@ vect_analyze_loop (struct loop *loop, loop_vec_info orig_loop_vinfo)
return NULL;
}
+ poly_uint64 autodetected_vector_size = 0;
while (1)
{
/* Check the CFG characteristics of the loop (nesting, entry/exit). */
@@ -2376,18 +2378,28 @@ vect_analyze_loop (struct loop *loop, loop_vec_info orig_loop_vinfo)
delete loop_vinfo;
- vector_sizes &= ~current_vector_size;
+ if (next_size == 0)
+ autodetected_vector_size = current_vector_size;
+
+ if (next_size < vector_sizes.length ()
+ && known_eq (vector_sizes[next_size], autodetected_vector_size))
+ next_size += 1;
+
if (fatal
- || vector_sizes == 0
- || current_vector_size == 0)
+ || next_size == vector_sizes.length ()
+ || known_eq (current_vector_size, 0U))
return NULL;
/* Try the next biggest vector size. */
- current_vector_size = 1 << floor_log2 (vector_sizes);
+ current_vector_size = vector_sizes[next_size++];
if (dump_enabled_p ())
- dump_printf_loc (MSG_NOTE, vect_location,
- "***** Re-trying analysis with "
- "vector size %d\n", current_vector_size);
+ {
+ dump_printf_loc (MSG_NOTE, vect_location,
+ "***** Re-trying analysis with "
+ "vector size ");
+ dump_dec (MSG_NOTE, current_vector_size);
+ dump_printf (MSG_NOTE, "\n");
+ }
}
}
@@ -7748,9 +7760,12 @@ vect_transform_loop (loop_vec_info loop_vinfo)
dump_printf (MSG_NOTE, "\n");
}
else
- dump_printf_loc (MSG_NOTE, vect_location,
- "LOOP EPILOGUE VECTORIZED (VS=%d)\n",
- current_vector_size);
+ {
+ dump_printf_loc (MSG_NOTE, vect_location,
+ "LOOP EPILOGUE VECTORIZED (VS=");
+ dump_dec (MSG_NOTE, current_vector_size);
+ dump_printf (MSG_NOTE, ")\n");
+ }
}
/* Free SLP instances here because otherwise stmt reference counting
@@ -7767,31 +7782,39 @@ vect_transform_loop (loop_vec_info loop_vinfo)
if (LOOP_VINFO_EPILOGUE_P (loop_vinfo))
epilogue = NULL;
+ if (!PARAM_VALUE (PARAM_VECT_EPILOGUES_NOMASK))
+ epilogue = NULL;
+
if (epilogue)
{
- unsigned int vector_sizes
- = targetm.vectorize.autovectorize_vector_sizes ();
- vector_sizes &= current_vector_size - 1;
-
- if (!PARAM_VALUE (PARAM_VECT_EPILOGUES_NOMASK))
- epilogue = NULL;
- else if (!vector_sizes)
- epilogue = NULL;
- else if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
- && LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) >= 0
- && known_eq (vf, lowest_vf))
- {
- int smallest_vec_size = 1 << ctz_hwi (vector_sizes);
- int ratio = current_vector_size / smallest_vec_size;
- unsigned HOST_WIDE_INT eiters = LOOP_VINFO_INT_NITERS (loop_vinfo)
- - LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo);
- eiters = eiters % lowest_vf;
+ auto_vector_sizes vector_sizes;
+ targetm.vectorize.autovectorize_vector_sizes (&vector_sizes);
+ unsigned int next_size = 0;
- epilogue->nb_iterations_upper_bound = eiters - 1;
+ if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
+ && LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) >= 0
+ && known_eq (vf, lowest_vf))
+ {
+ unsigned int eiters
+ = (LOOP_VINFO_INT_NITERS (loop_vinfo)
+ - LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo));
+ eiters = eiters % lowest_vf;
+ epilogue->nb_iterations_upper_bound = eiters - 1;
+
+ unsigned int ratio;
+ while (next_size < vector_sizes.length ()
+ && !(constant_multiple_p (current_vector_size,
+ vector_sizes[next_size], &ratio)
+ && eiters >= lowest_vf / ratio))
+ next_size += 1;
+ }
+ else
+ while (next_size < vector_sizes.length ()
+ && maybe_lt (current_vector_size, vector_sizes[next_size]))
+ next_size += 1;
- if (eiters < lowest_vf / ratio)
- epilogue = NULL;
- }
+ if (next_size == vector_sizes.length ())
+ epilogue = NULL;
}
if (epilogue)