aboutsummaryrefslogtreecommitdiff
path: root/gcc/tree-vect-loop.cc
diff options
context:
space:
mode:
Diffstat (limited to 'gcc/tree-vect-loop.cc')
-rw-r--r--gcc/tree-vect-loop.cc123
1 files changed, 96 insertions, 27 deletions
diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc
index 2d1a688..575987e 100644
--- a/gcc/tree-vect-loop.cc
+++ b/gcc/tree-vect-loop.cc
@@ -1069,10 +1069,12 @@ _loop_vec_info::_loop_vec_info (class loop *loop_in, vec_info_shared *shared)
using_decrementing_iv_p (false),
using_select_vl_p (false),
epil_using_partial_vectors_p (false),
+ allow_mutual_alignment (false),
partial_load_store_bias (0),
peeling_for_gaps (false),
peeling_for_niter (false),
early_breaks (false),
+ user_unroll (false),
no_data_dependencies (false),
has_mask_store (false),
scalar_loop_scaling (profile_probability::uninitialized ()),
@@ -3428,27 +3430,50 @@ vect_analyze_loop_1 (class loop *loop, vec_info_shared *shared,
res ? "succeeded" : "failed",
GET_MODE_NAME (loop_vinfo->vector_mode));
- if (res && !LOOP_VINFO_EPILOGUE_P (loop_vinfo) && suggested_unroll_factor > 1)
+ auto user_unroll = LOOP_VINFO_LOOP (loop_vinfo)->unroll;
+ if (res && !LOOP_VINFO_EPILOGUE_P (loop_vinfo)
+ /* Check to see if the user wants to unroll or if the target wants to. */
+ && (suggested_unroll_factor > 1 || user_unroll > 1))
{
- if (dump_enabled_p ())
- dump_printf_loc (MSG_NOTE, vect_location,
+ if (suggested_unroll_factor == 1)
+ {
+ int assumed_vf = vect_vf_for_cost (loop_vinfo);
+ suggested_unroll_factor = user_unroll / assumed_vf;
+ if (suggested_unroll_factor > 1)
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_NOTE, vect_location,
+ "setting unroll factor to %d based on user requested "
+ "unroll factor %d and suggested vectorization "
+ "factor: %d\n",
+ suggested_unroll_factor, user_unroll, assumed_vf);
+ }
+ }
+
+ if (suggested_unroll_factor > 1)
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_NOTE, vect_location,
"***** Re-trying analysis for unrolling"
" with unroll factor %d and slp %s.\n",
suggested_unroll_factor,
slp_done_for_suggested_uf ? "on" : "off");
- loop_vec_info unroll_vinfo
- = vect_create_loop_vinfo (loop, shared, loop_form_info, NULL);
- unroll_vinfo->vector_mode = vector_mode;
- unroll_vinfo->suggested_unroll_factor = suggested_unroll_factor;
- opt_result new_res = vect_analyze_loop_2 (unroll_vinfo, fatal, NULL,
- slp_done_for_suggested_uf);
- if (new_res)
- {
- delete loop_vinfo;
- loop_vinfo = unroll_vinfo;
- }
- else
- delete unroll_vinfo;
+ loop_vec_info unroll_vinfo
+ = vect_create_loop_vinfo (loop, shared, loop_form_info, NULL);
+ unroll_vinfo->vector_mode = vector_mode;
+ unroll_vinfo->suggested_unroll_factor = suggested_unroll_factor;
+ opt_result new_res
+ = vect_analyze_loop_2 (unroll_vinfo, fatal, NULL,
+ slp_done_for_suggested_uf);
+ if (new_res)
+ {
+ delete loop_vinfo;
+ loop_vinfo = unroll_vinfo;
+ LOOP_VINFO_USER_UNROLL (loop_vinfo) = user_unroll > 1;
+ }
+ else
+ delete unroll_vinfo;
+ }
}
/* Remember the autodetected vector mode. */
@@ -3469,13 +3494,8 @@ vect_analyze_loop_1 (class loop *loop, vec_info_shared *shared,
mode_i += 1;
}
if (mode_i + 1 < vector_modes.length ()
- && VECTOR_MODE_P (autodetected_vector_mode)
- && (related_vector_mode (vector_modes[mode_i + 1],
- GET_MODE_INNER (autodetected_vector_mode))
- == autodetected_vector_mode)
- && (related_vector_mode (autodetected_vector_mode,
- GET_MODE_INNER (vector_modes[mode_i + 1]))
- == vector_modes[mode_i + 1]))
+ && vect_chooses_same_modes_p (autodetected_vector_mode,
+ vector_modes[mode_i + 1]))
{
if (dump_enabled_p ())
dump_printf_loc (MSG_NOTE, vect_location,
@@ -3676,8 +3696,15 @@ vect_analyze_loop (class loop *loop, gimple *loop_vectorized_call,
vector_modes[0] = autodetected_vector_mode;
mode_i = 0;
- bool supports_partial_vectors =
- partial_vectors_supported_p () && param_vect_partial_vector_usage != 0;
+ bool supports_partial_vectors = param_vect_partial_vector_usage != 0;
+ machine_mode mask_mode;
+ if (supports_partial_vectors
+ && !partial_vectors_supported_p ()
+ && !(VECTOR_MODE_P (first_loop_vinfo->vector_mode)
+ && targetm.vectorize.get_mask_mode
+ (first_loop_vinfo->vector_mode).exists (&mask_mode)
+ && SCALAR_INT_MODE_P (mask_mode)))
+ supports_partial_vectors = false;
poly_uint64 first_vinfo_vf = LOOP_VINFO_VECT_FACTOR (first_loop_vinfo);
loop_vec_info orig_loop_vinfo = first_loop_vinfo;
@@ -3697,6 +3724,22 @@ vect_analyze_loop (class loop *loop, gimple *loop_vectorized_call,
break;
continue;
}
+ /* We would need an exhaustive search to find all modes we
+ skipped but that would lead to the same result as the
+ analysis it was skipped for and where we'd could check
+ cached_vf_per_mode against.
+ Check for the autodetected mode, which is the common
+ situation on x86 which does not perform cost comparison. */
+ if (!supports_partial_vectors
+ && maybe_ge (cached_vf_per_mode[0], first_vinfo_vf)
+ && vect_chooses_same_modes_p (autodetected_vector_mode,
+ vector_modes[mode_i]))
+ {
+ mode_i++;
+ if (mode_i == vector_modes.length ())
+ break;
+ continue;
+ }
if (dump_enabled_p ())
dump_printf_loc (MSG_NOTE, vect_location,
@@ -4101,6 +4144,10 @@ pop:
if (op.ops[2] == op.ops[opi])
neg = ! neg;
}
+ /* For an FMA the reduction code is the PLUS if the addition chain
+ is the reduction. */
+ else if (op.code == IFN_FMA && opi == 2)
+ op.code = PLUS_EXPR;
if (CONVERT_EXPR_CODE_P (op.code)
&& tree_nop_conversion_p (op.type, TREE_TYPE (op.ops[0])))
;
@@ -4646,7 +4693,8 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo,
TODO: Consider assigning different costs to different scalar
statements. */
- scalar_single_iter_cost = loop_vinfo->scalar_costs->total_cost ();
+ scalar_single_iter_cost = (loop_vinfo->scalar_costs->total_cost ()
+ * param_vect_scalar_cost_multiplier) / 100;
/* Add additional cost for the peeled instructions in prologue and epilogue
loop. (For fully-masked loops there will be no peeling.)
@@ -6016,7 +6064,8 @@ vect_create_epilog_for_reduction (loop_vec_info loop_vinfo,
/* Create an induction variable. */
gimple_stmt_iterator incr_gsi;
bool insert_after;
- vect_iv_increment_position (loop_exit, &incr_gsi, &insert_after);
+ vect_iv_increment_position (LOOP_VINFO_IV_EXIT (loop_vinfo),
+ &incr_gsi, &insert_after);
create_iv (series_vect, PLUS_EXPR, vec_step, NULL_TREE, loop, &incr_gsi,
insert_after, &indx_before_incr, &indx_after_incr);
@@ -8043,6 +8092,19 @@ vectorizable_reduction (loop_vec_info loop_vinfo,
"in-order reduction chain without SLP.\n");
return false;
}
+ /* Code generation doesn't support function calls other
+ than .COND_*. */
+ if (!op.code.is_tree_code ()
+ && !(op.code.is_internal_fn ()
+ && conditional_internal_fn_code (internal_fn (op.code))
+ != ERROR_MARK))
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "in-order reduction chain operation not "
+ "supported.\n");
+ return false;
+ }
STMT_VINFO_REDUC_TYPE (reduc_info)
= reduction_type = FOLD_LEFT_REDUCTION;
}
@@ -12039,6 +12101,13 @@ vect_transform_loop (loop_vec_info loop_vinfo, gimple *loop_vectorized_call)
dump_printf_loc (MSG_NOTE, vect_location, "Disabling unrolling due to"
" variable-length vectorization factor\n");
}
+
+ /* When we have unrolled the loop due to a user requested value we should
+ leave it up to the RTL unroll heuristics to determine if it's still worth
+ while to unroll more. */
+ if (LOOP_VINFO_USER_UNROLL (loop_vinfo))
+ loop->unroll = 0;
+
/* Free SLP instances here because otherwise stmt reference counting
won't work. */
slp_instance instance;