diff options
Diffstat (limited to 'gcc/tree-vect-loop.cc')
-rw-r--r-- | gcc/tree-vect-loop.cc | 159 |
1 files changed, 123 insertions, 36 deletions
diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc index 2d1a688..42e0015 100644 --- a/gcc/tree-vect-loop.cc +++ b/gcc/tree-vect-loop.cc @@ -58,6 +58,7 @@ along with GCC; see the file COPYING3. If not see #include "tree-eh.h" #include "case-cfn-macros.h" #include "langhooks.h" +#include "opts.h" /* Loop Vectorization Pass. @@ -1069,10 +1070,12 @@ _loop_vec_info::_loop_vec_info (class loop *loop_in, vec_info_shared *shared) using_decrementing_iv_p (false), using_select_vl_p (false), epil_using_partial_vectors_p (false), + allow_mutual_alignment (false), partial_load_store_bias (0), peeling_for_gaps (false), peeling_for_niter (false), early_breaks (false), + user_unroll (false), no_data_dependencies (false), has_mask_store (false), scalar_loop_scaling (profile_probability::uninitialized ()), @@ -3398,8 +3401,10 @@ vect_joust_loop_vinfos (loop_vec_info new_loop_vinfo, } /* Analyze LOOP with VECTOR_MODES[MODE_I] and as epilogue if ORIG_LOOP_VINFO is - not NULL. Set AUTODETECTED_VECTOR_MODE if VOIDmode and advance - MODE_I to the next mode useful to analyze. + not NULL. When MASKED_P is not -1 override the default + LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P with it. + Set AUTODETECTED_VECTOR_MODE if VOIDmode and advance MODE_I to the next + mode useful to analyze. Return the loop_vinfo on success and wrapped null on failure. */ static opt_loop_vec_info @@ -3407,6 +3412,7 @@ vect_analyze_loop_1 (class loop *loop, vec_info_shared *shared, const vect_loop_form_info *loop_form_info, loop_vec_info orig_loop_vinfo, const vector_modes &vector_modes, unsigned &mode_i, + int masked_p, machine_mode &autodetected_vector_mode, bool &fatal) { @@ -3415,6 +3421,8 @@ vect_analyze_loop_1 (class loop *loop, vec_info_shared *shared, machine_mode vector_mode = vector_modes[mode_i]; loop_vinfo->vector_mode = vector_mode; + if (masked_p != -1) + loop_vinfo->can_use_partial_vectors_p = masked_p; unsigned int suggested_unroll_factor = 1; unsigned slp_done_for_suggested_uf = 0; @@ -3428,27 +3436,50 @@ vect_analyze_loop_1 (class loop *loop, vec_info_shared *shared, res ? "succeeded" : "failed", GET_MODE_NAME (loop_vinfo->vector_mode)); - if (res && !LOOP_VINFO_EPILOGUE_P (loop_vinfo) && suggested_unroll_factor > 1) + auto user_unroll = LOOP_VINFO_LOOP (loop_vinfo)->unroll; + if (res && !LOOP_VINFO_EPILOGUE_P (loop_vinfo) + /* Check to see if the user wants to unroll or if the target wants to. */ + && (suggested_unroll_factor > 1 || user_unroll > 1)) { - if (dump_enabled_p ()) - dump_printf_loc (MSG_NOTE, vect_location, + if (suggested_unroll_factor == 1) + { + int assumed_vf = vect_vf_for_cost (loop_vinfo); + suggested_unroll_factor = user_unroll / assumed_vf; + if (suggested_unroll_factor > 1) + { + if (dump_enabled_p ()) + dump_printf_loc (MSG_NOTE, vect_location, + "setting unroll factor to %d based on user requested " + "unroll factor %d and suggested vectorization " + "factor: %d\n", + suggested_unroll_factor, user_unroll, assumed_vf); + } + } + + if (suggested_unroll_factor > 1) + { + if (dump_enabled_p ()) + dump_printf_loc (MSG_NOTE, vect_location, "***** Re-trying analysis for unrolling" " with unroll factor %d and slp %s.\n", suggested_unroll_factor, slp_done_for_suggested_uf ? "on" : "off"); - loop_vec_info unroll_vinfo - = vect_create_loop_vinfo (loop, shared, loop_form_info, NULL); - unroll_vinfo->vector_mode = vector_mode; - unroll_vinfo->suggested_unroll_factor = suggested_unroll_factor; - opt_result new_res = vect_analyze_loop_2 (unroll_vinfo, fatal, NULL, - slp_done_for_suggested_uf); - if (new_res) - { - delete loop_vinfo; - loop_vinfo = unroll_vinfo; - } - else - delete unroll_vinfo; + loop_vec_info unroll_vinfo + = vect_create_loop_vinfo (loop, shared, loop_form_info, NULL); + unroll_vinfo->vector_mode = vector_mode; + unroll_vinfo->suggested_unroll_factor = suggested_unroll_factor; + opt_result new_res + = vect_analyze_loop_2 (unroll_vinfo, fatal, NULL, + slp_done_for_suggested_uf); + if (new_res) + { + delete loop_vinfo; + loop_vinfo = unroll_vinfo; + LOOP_VINFO_USER_UNROLL (loop_vinfo) = user_unroll > 1; + } + else + delete unroll_vinfo; + } } /* Remember the autodetected vector mode. */ @@ -3469,13 +3500,8 @@ vect_analyze_loop_1 (class loop *loop, vec_info_shared *shared, mode_i += 1; } if (mode_i + 1 < vector_modes.length () - && VECTOR_MODE_P (autodetected_vector_mode) - && (related_vector_mode (vector_modes[mode_i + 1], - GET_MODE_INNER (autodetected_vector_mode)) - == autodetected_vector_mode) - && (related_vector_mode (autodetected_vector_mode, - GET_MODE_INNER (vector_modes[mode_i + 1])) - == vector_modes[mode_i + 1])) + && vect_chooses_same_modes_p (autodetected_vector_mode, + vector_modes[mode_i + 1])) { if (dump_enabled_p ()) dump_printf_loc (MSG_NOTE, vect_location, @@ -3580,7 +3606,7 @@ vect_analyze_loop (class loop *loop, gimple *loop_vectorized_call, cached_vf_per_mode[last_mode_i] = -1; opt_loop_vec_info loop_vinfo = vect_analyze_loop_1 (loop, shared, &loop_form_info, - NULL, vector_modes, mode_i, + NULL, vector_modes, mode_i, -1, autodetected_vector_mode, fatal); if (fatal) break; @@ -3665,24 +3691,38 @@ vect_analyze_loop (class loop *loop, gimple *loop_vectorized_call, array may contain length-agnostic and length-specific modes. Their ordering is not guaranteed, so we could end up picking a mode for the main loop that is after the epilogue's optimal mode. */ + int masked_p = -1; if (!unlimited_cost_model (loop) - && first_loop_vinfo->vector_costs->suggested_epilogue_mode () != VOIDmode) + && (first_loop_vinfo->vector_costs->suggested_epilogue_mode (masked_p) + != VOIDmode)) { vector_modes[0] - = first_loop_vinfo->vector_costs->suggested_epilogue_mode (); + = first_loop_vinfo->vector_costs->suggested_epilogue_mode (masked_p); cached_vf_per_mode[0] = 0; } else vector_modes[0] = autodetected_vector_mode; mode_i = 0; - bool supports_partial_vectors = - partial_vectors_supported_p () && param_vect_partial_vector_usage != 0; + bool supports_partial_vectors = (param_vect_partial_vector_usage != 0 + || masked_p == 1); + machine_mode mask_mode; + if (supports_partial_vectors + && !partial_vectors_supported_p () + && !(VECTOR_MODE_P (first_loop_vinfo->vector_mode) + && targetm.vectorize.get_mask_mode + (first_loop_vinfo->vector_mode).exists (&mask_mode) + && SCALAR_INT_MODE_P (mask_mode))) + supports_partial_vectors = false; poly_uint64 first_vinfo_vf = LOOP_VINFO_VECT_FACTOR (first_loop_vinfo); loop_vec_info orig_loop_vinfo = first_loop_vinfo; do { + /* Let the user override what the target suggests. */ + if (OPTION_SET_P (param_vect_partial_vector_usage)) + masked_p = -1; + while (1) { /* If the target does not support partial vectors we can shorten the @@ -3697,6 +3737,22 @@ vect_analyze_loop (class loop *loop, gimple *loop_vectorized_call, break; continue; } + /* We would need an exhaustive search to find all modes we + skipped but that would lead to the same result as the + analysis it was skipped for and where we'd could check + cached_vf_per_mode against. + Check for the autodetected mode, which is the common + situation on x86 which does not perform cost comparison. */ + if (!supports_partial_vectors + && maybe_ge (cached_vf_per_mode[0], first_vinfo_vf) + && vect_chooses_same_modes_p (autodetected_vector_mode, + vector_modes[mode_i])) + { + mode_i++; + if (mode_i == vector_modes.length ()) + break; + continue; + } if (dump_enabled_p ()) dump_printf_loc (MSG_NOTE, vect_location, @@ -3707,7 +3763,7 @@ vect_analyze_loop (class loop *loop, gimple *loop_vectorized_call, opt_loop_vec_info loop_vinfo = vect_analyze_loop_1 (loop, shared, &loop_form_info, orig_loop_vinfo, - vector_modes, mode_i, + vector_modes, mode_i, masked_p, autodetected_vector_mode, fatal); if (fatal) break; @@ -3738,6 +3794,9 @@ vect_analyze_loop (class loop *loop, gimple *loop_vectorized_call, break; } + /* Revert back to the default from the suggested prefered + epilogue vectorization mode. */ + masked_p = -1; if (mode_i == vector_modes.length ()) break; } @@ -3748,12 +3807,14 @@ vect_analyze_loop (class loop *loop, gimple *loop_vectorized_call, /* When we selected a first vectorized epilogue, see if the target suggests to have another one. */ + masked_p = -1; if (!unlimited_cost_model (loop) - && (orig_loop_vinfo->vector_costs->suggested_epilogue_mode () + && !LOOP_VINFO_USING_PARTIAL_VECTORS_P (orig_loop_vinfo) + && (orig_loop_vinfo->vector_costs->suggested_epilogue_mode (masked_p) != VOIDmode)) { vector_modes[0] - = orig_loop_vinfo->vector_costs->suggested_epilogue_mode (); + = orig_loop_vinfo->vector_costs->suggested_epilogue_mode (masked_p); cached_vf_per_mode[0] = 0; mode_i = 0; } @@ -4101,6 +4162,10 @@ pop: if (op.ops[2] == op.ops[opi]) neg = ! neg; } + /* For an FMA the reduction code is the PLUS if the addition chain + is the reduction. */ + else if (op.code == IFN_FMA && opi == 2) + op.code = PLUS_EXPR; if (CONVERT_EXPR_CODE_P (op.code) && tree_nop_conversion_p (op.type, TREE_TYPE (op.ops[0]))) ; @@ -4646,7 +4711,8 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo, TODO: Consider assigning different costs to different scalar statements. */ - scalar_single_iter_cost = loop_vinfo->scalar_costs->total_cost (); + scalar_single_iter_cost = (loop_vinfo->scalar_costs->total_cost () + * param_vect_scalar_cost_multiplier) / 100; /* Add additional cost for the peeled instructions in prologue and epilogue loop. (For fully-masked loops there will be no peeling.) @@ -6016,7 +6082,8 @@ vect_create_epilog_for_reduction (loop_vec_info loop_vinfo, /* Create an induction variable. */ gimple_stmt_iterator incr_gsi; bool insert_after; - vect_iv_increment_position (loop_exit, &incr_gsi, &insert_after); + vect_iv_increment_position (LOOP_VINFO_IV_EXIT (loop_vinfo), + &incr_gsi, &insert_after); create_iv (series_vect, PLUS_EXPR, vec_step, NULL_TREE, loop, &incr_gsi, insert_after, &indx_before_incr, &indx_after_incr); @@ -7755,7 +7822,7 @@ vectorizable_reduction (loop_vec_info loop_vinfo, inside the loop body. The last operand is the reduction variable, which is defined by the loop-header-phi. */ - tree vectype_out = STMT_VINFO_VECTYPE (stmt_info); + tree vectype_out = SLP_TREE_VECTYPE (slp_for_stmt_info); STMT_VINFO_REDUC_VECTYPE (reduc_info) = vectype_out; STMT_VINFO_REDUC_VECTYPE_IN (reduc_info) = vectype_in; @@ -8043,6 +8110,19 @@ vectorizable_reduction (loop_vec_info loop_vinfo, "in-order reduction chain without SLP.\n"); return false; } + /* Code generation doesn't support function calls other + than .COND_*. */ + if (!op.code.is_tree_code () + && !(op.code.is_internal_fn () + && conditional_internal_fn_code (internal_fn (op.code)) + != ERROR_MARK)) + { + if (dump_enabled_p ()) + dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, + "in-order reduction chain operation not " + "supported.\n"); + return false; + } STMT_VINFO_REDUC_TYPE (reduc_info) = reduction_type = FOLD_LEFT_REDUCTION; } @@ -12039,6 +12119,13 @@ vect_transform_loop (loop_vec_info loop_vinfo, gimple *loop_vectorized_call) dump_printf_loc (MSG_NOTE, vect_location, "Disabling unrolling due to" " variable-length vectorization factor\n"); } + + /* When we have unrolled the loop due to a user requested value we should + leave it up to the RTL unroll heuristics to determine if it's still worth + while to unroll more. */ + if (LOOP_VINFO_USER_UNROLL (loop_vinfo)) + loop->unroll = 0; + /* Free SLP instances here because otherwise stmt reference counting won't work. */ slp_instance instance; |