diff options
author | Kewen Lin <linkw@gcc.gnu.org> | 2020-06-12 01:14:40 -0500 |
---|---|---|
committer | Kewen Lin <linkw@linux.ibm.com> | 2020-06-12 02:18:20 -0500 |
commit | 04f0546e3e468e67dcd8cd20265796887805de3e (patch) | |
tree | 6545e2205e4a7f87d8eb992bb09ab582918d484d /gcc/tree-vect-loop.c | |
parent | 3a179232a743b2b1987b21034cd5bb3bc660ee85 (diff) | |
download | gcc-04f0546e3e468e67dcd8cd20265796887805de3e.zip gcc-04f0546e3e468e67dcd8cd20265796887805de3e.tar.gz gcc-04f0546e3e468e67dcd8cd20265796887805de3e.tar.bz2 |
vect: Factor out and rename some functions/macros
Power supports vector memory access with length (in bytes) instructions.
Like existing fully masking for SVE, it is another approach to vectorize
the loop using partially-populated vectors.
As Richard Sandiford suggested, we should share the codes in approaches
with partial vectors if possible. This patch is to:
1) factor out two functions:
- vect_min_prec_for_max_niters
- vect_known_niters_smaller_than_vf.
2) rename four functions:
- vect_iv_limit_for_full_masking
- check_load_store_masking
- vect_set_loop_condition_masked
- vect_set_loop_condition_unmasked
3) rename macros LOOP_VINFO_MASK_COMPARE_TYPE and LOOP_VINFO_MASK_IV_TYPE.
Bootstrapped/regtested on aarch64-linux-gnu.
gcc/ChangeLog:
* tree-vect-loop-manip.c (vect_set_loop_controls_directly): Rename
LOOP_VINFO_MASK_COMPARE_TYPE to LOOP_VINFO_RGROUP_COMPARE_TYPE. Rename
LOOP_VINFO_MASK_IV_TYPE to LOOP_VINFO_RGROUP_IV_TYPE.
(vect_set_loop_condition_masked): Renamed to ...
(vect_set_loop_condition_partial_vectors): ... this. Rename
LOOP_VINFO_MASK_COMPARE_TYPE to LOOP_VINFO_RGROUP_COMPARE_TYPE. Rename
vect_iv_limit_for_full_masking to vect_iv_limit_for_partial_vectors.
(vect_set_loop_condition_unmasked): Renamed to ...
(vect_set_loop_condition_normal): ... this.
(vect_set_loop_condition): Rename vect_set_loop_condition_unmasked to
vect_set_loop_condition_normal. Rename vect_set_loop_condition_masked
to vect_set_loop_condition_partial_vectors.
(vect_prepare_for_masked_peels): Rename LOOP_VINFO_MASK_COMPARE_TYPE
to LOOP_VINFO_RGROUP_COMPARE_TYPE.
* tree-vect-loop.c (vect_known_niters_smaller_than_vf): New, factored
out from ...
(vect_analyze_loop_costing): ... this.
(_loop_vec_info::_loop_vec_info): Rename mask_compare_type to
compare_type.
(vect_min_prec_for_max_niters): New, factored out from ...
(vect_verify_full_masking): ... this. Rename
vect_iv_limit_for_full_masking to vect_iv_limit_for_partial_vectors.
Rename LOOP_VINFO_MASK_COMPARE_TYPE to LOOP_VINFO_RGROUP_COMPARE_TYPE.
Rename LOOP_VINFO_MASK_IV_TYPE to LOOP_VINFO_RGROUP_IV_TYPE.
(vectorizable_reduction): Update some dumpings with partial
vectors instead of fully-masked.
(vectorizable_live_operation): Likewise.
(vect_iv_limit_for_full_masking): Renamed to ...
(vect_iv_limit_for_partial_vectors): ... this.
* tree-vect-stmts.c (check_load_store_masking): Renamed to ...
(check_load_store_for_partial_vectors): ... this. Update some
dumpings with partial vectors instead of fully-masked.
(vectorizable_store): Rename check_load_store_masking to
check_load_store_for_partial_vectors.
(vectorizable_load): Likewise.
* tree-vectorizer.h (LOOP_VINFO_MASK_COMPARE_TYPE): Renamed to ...
(LOOP_VINFO_RGROUP_COMPARE_TYPE): ... this.
(LOOP_VINFO_MASK_IV_TYPE): Renamed to ...
(LOOP_VINFO_RGROUP_IV_TYPE): ... this.
(vect_iv_limit_for_full_masking): Renamed to ...
(vect_iv_limit_for_partial_vectors): this.
(_loop_vec_info): Rename mask_compare_type to rgroup_compare_type.
Rename iv_type to rgroup_iv_type.
Diffstat (limited to 'gcc/tree-vect-loop.c')
-rw-r--r-- | gcc/tree-vect-loop.c | 116 |
1 files changed, 72 insertions, 44 deletions
diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c index f4d47e0..6311e79 100644 --- a/gcc/tree-vect-loop.c +++ b/gcc/tree-vect-loop.c @@ -802,7 +802,7 @@ _loop_vec_info::_loop_vec_info (class loop *loop_in, vec_info_shared *shared) vectorization_factor (0), max_vectorization_factor (0), mask_skip_niters (NULL_TREE), - mask_compare_type (NULL_TREE), + rgroup_compare_type (NULL_TREE), simd_if_cond (NULL_TREE), unaligned_dr (NULL), peeling_for_alignment (0), @@ -963,14 +963,39 @@ vect_get_max_nscalars_per_iter (loop_vec_info loop_vinfo) return res; } +/* Calculate the minimum precision necessary to represent: + + MAX_NITERS * FACTOR + + as an unsigned integer, where MAX_NITERS is the maximum number of + loop header iterations for the original scalar form of LOOP_VINFO. */ + +static unsigned +vect_min_prec_for_max_niters (loop_vec_info loop_vinfo, unsigned int factor) +{ + class loop *loop = LOOP_VINFO_LOOP (loop_vinfo); + + /* Get the maximum number of iterations that is representable + in the counter type. */ + tree ni_type = TREE_TYPE (LOOP_VINFO_NITERSM1 (loop_vinfo)); + widest_int max_ni = wi::to_widest (TYPE_MAX_VALUE (ni_type)) + 1; + + /* Get a more refined estimate for the number of iterations. */ + widest_int max_back_edges; + if (max_loop_iterations (loop, &max_back_edges)) + max_ni = wi::smin (max_ni, max_back_edges + 1); + + /* Work out how many bits we need to represent the limit. */ + return wi::min_precision (max_ni * factor, UNSIGNED); +} + /* Each statement in LOOP_VINFO can be masked where necessary. Check whether we can actually generate the masks required. Return true if so, - storing the type of the scalar IV in LOOP_VINFO_MASK_COMPARE_TYPE. */ + storing the type of the scalar IV in LOOP_VINFO_RGROUP_COMPARE_TYPE. */ static bool vect_verify_full_masking (loop_vec_info loop_vinfo) { - class loop *loop = LOOP_VINFO_LOOP (loop_vinfo); unsigned int min_ni_width; unsigned int max_nscalars_per_iter = vect_get_max_nscalars_per_iter (loop_vinfo); @@ -981,27 +1006,15 @@ vect_verify_full_masking (loop_vec_info loop_vinfo) if (LOOP_VINFO_MASKS (loop_vinfo).is_empty ()) return false; - /* Get the maximum number of iterations that is representable - in the counter type. */ - tree ni_type = TREE_TYPE (LOOP_VINFO_NITERSM1 (loop_vinfo)); - widest_int max_ni = wi::to_widest (TYPE_MAX_VALUE (ni_type)) + 1; - - /* Get a more refined estimate for the number of iterations. */ - widest_int max_back_edges; - if (max_loop_iterations (loop, &max_back_edges)) - max_ni = wi::smin (max_ni, max_back_edges + 1); - - /* Account for rgroup masks, in which each bit is replicated N times. */ - max_ni *= max_nscalars_per_iter; - /* Work out how many bits we need to represent the limit. */ - min_ni_width = wi::min_precision (max_ni, UNSIGNED); + min_ni_width + = vect_min_prec_for_max_niters (loop_vinfo, max_nscalars_per_iter); /* Find a scalar mode for which WHILE_ULT is supported. */ opt_scalar_int_mode cmp_mode_iter; tree cmp_type = NULL_TREE; tree iv_type = NULL_TREE; - widest_int iv_limit = vect_iv_limit_for_full_masking (loop_vinfo); + widest_int iv_limit = vect_iv_limit_for_partial_vectors (loop_vinfo); unsigned int iv_precision = UINT_MAX; if (iv_limit != -1) @@ -1054,8 +1067,8 @@ vect_verify_full_masking (loop_vec_info loop_vinfo) if (!cmp_type) return false; - LOOP_VINFO_MASK_COMPARE_TYPE (loop_vinfo) = cmp_type; - LOOP_VINFO_MASK_IV_TYPE (loop_vinfo) = iv_type; + LOOP_VINFO_RGROUP_COMPARE_TYPE (loop_vinfo) = cmp_type; + LOOP_VINFO_RGROUP_IV_TYPE (loop_vinfo) = iv_type; return true; } @@ -1624,6 +1637,27 @@ vect_analyze_loop_operations (loop_vec_info loop_vinfo) return opt_result::success (); } +/* Return true if we know that the iteration count is smaller than the + vectorization factor. Return false if it isn't, or if we can't be sure + either way. */ + +static bool +vect_known_niters_smaller_than_vf (loop_vec_info loop_vinfo) +{ + unsigned int assumed_vf = vect_vf_for_cost (loop_vinfo); + + HOST_WIDE_INT max_niter; + if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)) + max_niter = LOOP_VINFO_INT_NITERS (loop_vinfo); + else + max_niter = max_stmt_executions_int (LOOP_VINFO_LOOP (loop_vinfo)); + + if (max_niter != -1 && (unsigned HOST_WIDE_INT) max_niter < assumed_vf) + return true; + + return false; +} + /* Analyze the cost of the loop described by LOOP_VINFO. Decide if it is worthwhile to vectorize. Return 1 if definitely yes, 0 if definitely no, or -1 if it's worth retrying. */ @@ -1638,15 +1672,7 @@ vect_analyze_loop_costing (loop_vec_info loop_vinfo) counts less than the vectorization factor. */ if (!LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo)) { - HOST_WIDE_INT max_niter; - - if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)) - max_niter = LOOP_VINFO_INT_NITERS (loop_vinfo); - else - max_niter = max_stmt_executions_int (loop); - - if (max_niter != -1 - && (unsigned HOST_WIDE_INT) max_niter < assumed_vf) + if (vect_known_niters_smaller_than_vf (loop_vinfo)) { if (dump_enabled_p ()) dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, @@ -6794,8 +6820,8 @@ vectorizable_reduction (loop_vec_info loop_vinfo, { if (dump_enabled_p ()) dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, - "can't use a fully-masked loop because no" - " conditional operation is available.\n"); + "can't operate on partial vectors because" + " no conditional operation is available.\n"); LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false; } else if (reduction_type == FOLD_LEFT_REDUCTION @@ -6806,8 +6832,8 @@ vectorizable_reduction (loop_vec_info loop_vinfo, { if (dump_enabled_p ()) dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, - "can't use a fully-masked loop because no" - " conditional operation is available.\n"); + "can't operate on partial vectors because" + " no conditional operation is available.\n"); LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false; } else @@ -7886,25 +7912,26 @@ vectorizable_live_operation (loop_vec_info loop_vinfo, { if (dump_enabled_p ()) dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, - "can't use a fully-masked loop because " - "the target doesn't support extract last " - "reduction.\n"); + "can't operate on partial vectors " + "because the target doesn't support extract " + "last reduction.\n"); LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false; } else if (slp_node) { if (dump_enabled_p ()) dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, - "can't use a fully-masked loop because an " - "SLP statement is live after the loop.\n"); + "can't operate on partial vectors " + "because an SLP statement is live after " + "the loop.\n"); LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false; } else if (ncopies > 1) { if (dump_enabled_p ()) dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, - "can't use a fully-masked loop because" - " ncopies is greater than 1.\n"); + "can't operate on partial vectors " + "because ncopies is greater than 1.\n"); LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false; } else @@ -9056,12 +9083,13 @@ optimize_mask_stores (class loop *loop) } /* Decide whether it is possible to use a zero-based induction variable - when vectorizing LOOP_VINFO with a fully-masked loop. If it is, - return the value that the induction variable must be able to hold - in order to ensure that the loop ends with an all-false mask. + when vectorizing LOOP_VINFO with partial vectors. If it is, return + the value that the induction variable must be able to hold in order + to ensure that the rgroups eventually have no active vector elements. Return -1 otherwise. */ + widest_int -vect_iv_limit_for_full_masking (loop_vec_info loop_vinfo) +vect_iv_limit_for_partial_vectors (loop_vec_info loop_vinfo) { tree niters_skip = LOOP_VINFO_MASK_SKIP_NITERS (loop_vinfo); class loop *loop = LOOP_VINFO_LOOP (loop_vinfo); |