diff options
author | Richard Sandiford <richard.sandiford@linaro.org> | 2018-01-03 07:15:20 +0000 |
---|---|---|
committer | Richard Sandiford <rsandifo@gcc.gnu.org> | 2018-01-03 07:15:20 +0000 |
commit | 86e3672871beff63eebb195642566224c9f80891 (patch) | |
tree | 2904209c48ac70db2e36ec1f0c1f7534c4b09f0a /gcc/tree-vect-loop.c | |
parent | 87133c45a06aa9c04cb6bc13b3b0733ec43efcec (diff) | |
download | gcc-86e3672871beff63eebb195642566224c9f80891.zip gcc-86e3672871beff63eebb195642566224c9f80891.tar.gz gcc-86e3672871beff63eebb195642566224c9f80891.tar.bz2 |
poly_int: current_vector_size and TARGET_AUTOVECTORIZE_VECTOR_SIZES
This patch changes the type of current_vector_size to poly_uint64.
It also changes TARGET_AUTOVECTORIZE_VECTOR_SIZES so that it fills
in a vector of possible sizes (as poly_uint64s) instead of returning
a bitmask. The documentation claimed that the hook didn't need to
include the default vector size (returned by preferred_simd_mode),
but that wasn't consistent with the omp-low.c usage.
2018-01-03 Richard Sandiford <richard.sandiford@linaro.org>
Alan Hayward <alan.hayward@arm.com>
David Sherwood <david.sherwood@arm.com>
gcc/
* target.h (vector_sizes, auto_vector_sizes): New typedefs.
* target.def (autovectorize_vector_sizes): Return the vector sizes
by pointer, using vector_sizes rather than a bitmask.
* targhooks.h (default_autovectorize_vector_sizes): Update accordingly.
* targhooks.c (default_autovectorize_vector_sizes): Likewise.
* config/aarch64/aarch64.c (aarch64_autovectorize_vector_sizes):
Likewise.
* config/arc/arc.c (arc_autovectorize_vector_sizes): Likewise.
* config/arm/arm.c (arm_autovectorize_vector_sizes): Likewise.
* config/i386/i386.c (ix86_autovectorize_vector_sizes): Likewise.
* config/mips/mips.c (mips_autovectorize_vector_sizes): Likewise.
* omp-general.c (omp_max_vf): Likewise.
* omp-low.c (omp_clause_aligned_alignment): Likewise.
* optabs-query.c (can_vec_mask_load_store_p): Likewise.
* tree-vect-loop.c (vect_analyze_loop): Likewise.
* tree-vect-slp.c (vect_slp_bb): Likewise.
* doc/tm.texi: Regenerate.
* tree-vectorizer.h (current_vector_size): Change from an unsigned int
to a poly_uint64.
* tree-vect-stmts.c (get_vectype_for_scalar_type_and_size): Take
the vector size as a poly_uint64 rather than an unsigned int.
(current_vector_size): Change from an unsigned int to a poly_uint64.
(get_vectype_for_scalar_type): Update accordingly.
* tree.h (build_truth_vector_type): Take the size and number of
units as a poly_uint64 rather than an unsigned int.
(build_vector_type): Add a temporary overload that takes
the number of units as a poly_uint64 rather than an unsigned int.
* tree.c (make_vector_type): Likewise.
(build_truth_vector_type): Take the number of units as a poly_uint64
rather than an unsigned int.
Co-Authored-By: Alan Hayward <alan.hayward@arm.com>
Co-Authored-By: David Sherwood <david.sherwood@arm.com>
From-SVN: r256131
Diffstat (limited to 'gcc/tree-vect-loop.c')
-rw-r--r-- | gcc/tree-vect-loop.c | 89 |
1 files changed, 56 insertions, 33 deletions
diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c index c58a08d..557522c 100644 --- a/gcc/tree-vect-loop.c +++ b/gcc/tree-vect-loop.c @@ -2330,11 +2330,12 @@ loop_vec_info vect_analyze_loop (struct loop *loop, loop_vec_info orig_loop_vinfo) { loop_vec_info loop_vinfo; - unsigned int vector_sizes; + auto_vector_sizes vector_sizes; /* Autodetect first vector size we try. */ current_vector_size = 0; - vector_sizes = targetm.vectorize.autovectorize_vector_sizes (); + targetm.vectorize.autovectorize_vector_sizes (&vector_sizes); + unsigned int next_size = 0; if (dump_enabled_p ()) dump_printf_loc (MSG_NOTE, vect_location, @@ -2350,6 +2351,7 @@ vect_analyze_loop (struct loop *loop, loop_vec_info orig_loop_vinfo) return NULL; } + poly_uint64 autodetected_vector_size = 0; while (1) { /* Check the CFG characteristics of the loop (nesting, entry/exit). */ @@ -2376,18 +2378,28 @@ vect_analyze_loop (struct loop *loop, loop_vec_info orig_loop_vinfo) delete loop_vinfo; - vector_sizes &= ~current_vector_size; + if (next_size == 0) + autodetected_vector_size = current_vector_size; + + if (next_size < vector_sizes.length () + && known_eq (vector_sizes[next_size], autodetected_vector_size)) + next_size += 1; + if (fatal - || vector_sizes == 0 - || current_vector_size == 0) + || next_size == vector_sizes.length () + || known_eq (current_vector_size, 0U)) return NULL; /* Try the next biggest vector size. */ - current_vector_size = 1 << floor_log2 (vector_sizes); + current_vector_size = vector_sizes[next_size++]; if (dump_enabled_p ()) - dump_printf_loc (MSG_NOTE, vect_location, - "***** Re-trying analysis with " - "vector size %d\n", current_vector_size); + { + dump_printf_loc (MSG_NOTE, vect_location, + "***** Re-trying analysis with " + "vector size "); + dump_dec (MSG_NOTE, current_vector_size); + dump_printf (MSG_NOTE, "\n"); + } } } @@ -7748,9 +7760,12 @@ vect_transform_loop (loop_vec_info loop_vinfo) dump_printf (MSG_NOTE, "\n"); } else - dump_printf_loc (MSG_NOTE, vect_location, - "LOOP EPILOGUE VECTORIZED (VS=%d)\n", - current_vector_size); + { + dump_printf_loc (MSG_NOTE, vect_location, + "LOOP EPILOGUE VECTORIZED (VS="); + dump_dec (MSG_NOTE, current_vector_size); + dump_printf (MSG_NOTE, ")\n"); + } } /* Free SLP instances here because otherwise stmt reference counting @@ -7767,31 +7782,39 @@ vect_transform_loop (loop_vec_info loop_vinfo) if (LOOP_VINFO_EPILOGUE_P (loop_vinfo)) epilogue = NULL; + if (!PARAM_VALUE (PARAM_VECT_EPILOGUES_NOMASK)) + epilogue = NULL; + if (epilogue) { - unsigned int vector_sizes - = targetm.vectorize.autovectorize_vector_sizes (); - vector_sizes &= current_vector_size - 1; - - if (!PARAM_VALUE (PARAM_VECT_EPILOGUES_NOMASK)) - epilogue = NULL; - else if (!vector_sizes) - epilogue = NULL; - else if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo) - && LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) >= 0 - && known_eq (vf, lowest_vf)) - { - int smallest_vec_size = 1 << ctz_hwi (vector_sizes); - int ratio = current_vector_size / smallest_vec_size; - unsigned HOST_WIDE_INT eiters = LOOP_VINFO_INT_NITERS (loop_vinfo) - - LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo); - eiters = eiters % lowest_vf; + auto_vector_sizes vector_sizes; + targetm.vectorize.autovectorize_vector_sizes (&vector_sizes); + unsigned int next_size = 0; - epilogue->nb_iterations_upper_bound = eiters - 1; + if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo) + && LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) >= 0 + && known_eq (vf, lowest_vf)) + { + unsigned int eiters + = (LOOP_VINFO_INT_NITERS (loop_vinfo) + - LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo)); + eiters = eiters % lowest_vf; + epilogue->nb_iterations_upper_bound = eiters - 1; + + unsigned int ratio; + while (next_size < vector_sizes.length () + && !(constant_multiple_p (current_vector_size, + vector_sizes[next_size], &ratio) + && eiters >= lowest_vf / ratio)) + next_size += 1; + } + else + while (next_size < vector_sizes.length () + && maybe_lt (current_vector_size, vector_sizes[next_size])) + next_size += 1; - if (eiters < lowest_vf / ratio) - epilogue = NULL; - } + if (next_size == vector_sizes.length ()) + epilogue = NULL; } if (epilogue) |