aboutsummaryrefslogtreecommitdiff
path: root/gcc/tree-vect-loop.c
diff options
context:
space:
mode:
authorRichard Sandiford <richard.sandiford@arm.com>2021-11-04 12:31:17 +0000
committerRichard Sandiford <richard.sandiford@arm.com>2021-11-04 12:31:17 +0000
commit6239dd05127f1dc05f13f504805d36e8ebe1c2c5 (patch)
tree27c1a57b8c23d47801a5c3d62e95e17c15ddbfa4 /gcc/tree-vect-loop.c
parentaf976d90fa15b379b766647fea34e6b2c37d5c16 (diff)
downloadgcc-6239dd05127f1dc05f13f504805d36e8ebe1c2c5.zip
gcc-6239dd05127f1dc05f13f504805d36e8ebe1c2c5.tar.gz
gcc-6239dd05127f1dc05f13f504805d36e8ebe1c2c5.tar.bz2
vect: Convert cost hooks to classes
The current vector cost interface has a quite a bit of redundancy built in. Each target that defines its own hooks has to replicate the basic unsigned[3] management. Currently each target also duplicates the cost adjustment for inner loops. This patch instead defines a vector_costs class for holding the scalar or vector cost and allows targets to subclass it. There is then only one costing hook: to create a new costs structure of the appropriate type. Everything else can be virtual functions, with common concepts implemented in the base class rather than in each target's derivation. This might seem like excess C++-ification, but it shaves ~100 LOC. I've also got some follow-on changes that become significantly easier with this patch. Maybe it could help with things like weighting blocks based on frequency too. This will clash with Andre's unrolling patches. His patches have priority so this patch should queue behind them. The x86 and rs6000 parts fully convert to a self-contained class. The equivalent aarch64 changes are more complex, so this patch just does the bare minimum. A later patch will rework the aarch64 bits. gcc/ * target.def (targetm.vectorize.init_cost): Replace with... (targetm.vectorize.create_costs): ...this. (targetm.vectorize.add_stmt_cost): Delete. (targetm.vectorize.finish_cost): Likewise. (targetm.vectorize.destroy_cost_data): Likewise. * doc/tm.texi.in (TARGET_VECTORIZE_INIT_COST): Replace with... (TARGET_VECTORIZE_CREATE_COSTS): ...this. (TARGET_VECTORIZE_ADD_STMT_COST): Delete. (TARGET_VECTORIZE_FINISH_COST): Likewise. (TARGET_VECTORIZE_DESTROY_COST_DATA): Likewise. * doc/tm.texi: Regenerate. * tree-vectorizer.h (vec_info::vec_info): Remove target_cost_data parameter. (vec_info::target_cost_data): Change from a void * to a vector_costs *. (vector_costs): New class. (init_cost): Take a vec_info and return a vector_costs. (dump_stmt_cost): Remove data parameter. (add_stmt_cost): Replace vinfo and data parameters with a vector_costs. (add_stmt_costs): Likewise. (finish_cost): Replace data parameter with a vector_costs. (destroy_cost_data): Delete. * tree-vectorizer.c (dump_stmt_cost): Remove data argument and don't print it. (vec_info::vec_info): Remove the target_cost_data parameter and initialize the member variable to null instead. (vec_info::~vec_info): Delete target_cost_data instead of calling destroy_cost_data. (vector_costs::add_stmt_cost): New function. (vector_costs::finish_cost): Likewise. (vector_costs::record_stmt_cost): Likewise. (vector_costs::adjust_cost_for_freq): Likewise. * tree-vect-loop.c (_loop_vec_info::_loop_vec_info): Update call to vec_info::vec_info. (vect_compute_single_scalar_iteration_cost): Update after above changes to costing interface. (vect_analyze_loop_operations): Likewise. (vect_estimate_min_profitable_iters): Likewise. (vect_analyze_loop_2): Initialize LOOP_VINFO_TARGET_COST_DATA at the start_over point, where it needs to be recreated after trying without slp. Update retry code accordingly. * tree-vect-slp.c (_bb_vec_info::_bb_vec_info): Update call to vec_info::vec_info. (vect_slp_analyze_operation): Update after above changes to costing interface. (vect_bb_vectorization_profitable_p): Likewise. * targhooks.h (default_init_cost): Replace with... (default_vectorize_create_costs): ...this. (default_add_stmt_cost): Delete. (default_finish_cost, default_destroy_cost_data): Likewise. * targhooks.c (default_init_cost): Replace with... (default_vectorize_create_costs): ...this. (default_add_stmt_cost): Delete, moving logic to vector_costs instead. (default_finish_cost, default_destroy_cost_data): Delete. * config/aarch64/aarch64.c (aarch64_vector_costs): Inherit from vector_costs. Add a constructor. (aarch64_init_cost): Replace with... (aarch64_vectorize_create_costs): ...this. (aarch64_add_stmt_cost): Replace with... (aarch64_vector_costs::add_stmt_cost): ...this. Use record_stmt_cost to adjust the cost for inner loops. (aarch64_finish_cost): Replace with... (aarch64_vector_costs::finish_cost): ...this. (aarch64_destroy_cost_data): Delete. (TARGET_VECTORIZE_INIT_COST): Replace with... (TARGET_VECTORIZE_CREATE_COSTS): ...this. (TARGET_VECTORIZE_ADD_STMT_COST): Delete. (TARGET_VECTORIZE_FINISH_COST): Likewise. (TARGET_VECTORIZE_DESTROY_COST_DATA): Likewise. * config/i386/i386.c (ix86_vector_costs): New structure. (ix86_init_cost): Replace with... (ix86_vectorize_create_costs): ...this. (ix86_add_stmt_cost): Replace with... (ix86_vector_costs::add_stmt_cost): ...this. Use adjust_cost_for_freq to adjust the cost for inner loops. (ix86_finish_cost, ix86_destroy_cost_data): Delete. (TARGET_VECTORIZE_INIT_COST): Replace with... (TARGET_VECTORIZE_CREATE_COSTS): ...this. (TARGET_VECTORIZE_ADD_STMT_COST): Delete. (TARGET_VECTORIZE_FINISH_COST): Likewise. (TARGET_VECTORIZE_DESTROY_COST_DATA): Likewise. * config/rs6000/rs6000.c (TARGET_VECTORIZE_INIT_COST): Replace with... (TARGET_VECTORIZE_CREATE_COSTS): ...this. (TARGET_VECTORIZE_ADD_STMT_COST): Delete. (TARGET_VECTORIZE_FINISH_COST): Likewise. (TARGET_VECTORIZE_DESTROY_COST_DATA): Likewise. (rs6000_cost_data): Inherit from vector_costs. Add a constructor. Drop loop_info, cost and costing_for_scalar in favor of the corresponding vector_costs member variables. Add "m_" to the names of the remaining member variables and initialize them. (rs6000_density_test): Replace with... (rs6000_cost_data::density_test): ...this. (rs6000_init_cost): Replace with... (rs6000_vectorize_create_costs): ...this. (rs6000_update_target_cost_per_stmt): Replace with... (rs6000_cost_data::update_target_cost_per_stmt): ...this. (rs6000_add_stmt_cost): Replace with... (rs6000_cost_data::add_stmt_cost): ...this. Use adjust_cost_for_freq to adjust the cost for inner loops. (rs6000_adjust_vect_cost_per_loop): Replace with... (rs6000_cost_data::adjust_vect_cost_per_loop): ...this. (rs6000_finish_cost): Replace with... (rs6000_cost_data::finish_cost): ...this. Group loop code into a single if statement and pass the loop_vinfo down to subroutines. (rs6000_destroy_cost_data): Delete.
Diffstat (limited to 'gcc/tree-vect-loop.c')
-rw-r--r--gcc/tree-vect-loop.c51
1 files changed, 26 insertions, 25 deletions
diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c
index 961c162..201000a 100644
--- a/gcc/tree-vect-loop.c
+++ b/gcc/tree-vect-loop.c
@@ -814,7 +814,7 @@ bb_in_loop_p (const_basic_block bb, const void *data)
stmt_vec_info structs for all the stmts in LOOP_IN. */
_loop_vec_info::_loop_vec_info (class loop *loop_in, vec_info_shared *shared)
- : vec_info (vec_info::loop, init_cost (loop_in, false), shared),
+ : vec_info (vec_info::loop, shared),
loop (loop_in),
bbs (XCNEWVEC (basic_block, loop->num_nodes)),
num_itersm1 (NULL_TREE),
@@ -1292,18 +1292,18 @@ vect_compute_single_scalar_iteration_cost (loop_vec_info loop_vinfo)
}
/* Now accumulate cost. */
- void *target_cost_data = init_cost (loop, true);
+ vector_costs *target_cost_data = init_cost (loop_vinfo, true);
stmt_info_for_cost *si;
int j;
FOR_EACH_VEC_ELT (LOOP_VINFO_SCALAR_ITERATION_COST (loop_vinfo),
j, si)
- (void) add_stmt_cost (loop_vinfo, target_cost_data, si->count,
+ (void) add_stmt_cost (target_cost_data, si->count,
si->kind, si->stmt_info, si->vectype,
si->misalign, si->where);
unsigned prologue_cost = 0, body_cost = 0, epilogue_cost = 0;
finish_cost (target_cost_data, &prologue_cost, &body_cost,
&epilogue_cost);
- destroy_cost_data (target_cost_data);
+ delete target_cost_data;
LOOP_VINFO_SINGLE_SCALAR_ITERATION_COST (loop_vinfo)
= prologue_cost + body_cost + epilogue_cost;
}
@@ -1783,7 +1783,7 @@ vect_analyze_loop_operations (loop_vec_info loop_vinfo)
}
} /* bbs */
- add_stmt_costs (loop_vinfo, loop_vinfo->target_cost_data, &cost_vec);
+ add_stmt_costs (loop_vinfo->target_cost_data, &cost_vec);
/* All operations in the loop are either irrelevant (deal with loop
control, or dead), or only used outside the loop and can be moved
@@ -2393,6 +2393,8 @@ start_over:
LOOP_VINFO_INT_NITERS (loop_vinfo));
}
+ LOOP_VINFO_TARGET_COST_DATA (loop_vinfo) = init_cost (loop_vinfo, false);
+
/* Analyze the alignment of the data-refs in the loop.
Fail if a data reference is found that cannot be vectorized. */
@@ -2757,9 +2759,8 @@ again:
LOOP_VINFO_COMP_ALIAS_DDRS (loop_vinfo).release ();
LOOP_VINFO_CHECK_UNEQUAL_ADDRS (loop_vinfo).release ();
/* Reset target cost data. */
- destroy_cost_data (LOOP_VINFO_TARGET_COST_DATA (loop_vinfo));
- LOOP_VINFO_TARGET_COST_DATA (loop_vinfo)
- = init_cost (LOOP_VINFO_LOOP (loop_vinfo), false);
+ delete LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
+ LOOP_VINFO_TARGET_COST_DATA (loop_vinfo) = nullptr;
/* Reset accumulated rgroup information. */
release_vec_loop_controls (&LOOP_VINFO_MASKS (loop_vinfo));
release_vec_loop_controls (&LOOP_VINFO_LENS (loop_vinfo));
@@ -3895,7 +3896,7 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo,
int scalar_outside_cost = 0;
int assumed_vf = vect_vf_for_cost (loop_vinfo);
int npeel = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo);
- void *target_cost_data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
+ vector_costs *target_cost_data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
/* Cost model disabled. */
if (unlimited_cost_model (LOOP_VINFO_LOOP (loop_vinfo)))
@@ -3912,7 +3913,7 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo,
{
/* FIXME: Make cost depend on complexity of individual check. */
unsigned len = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo).length ();
- (void) add_stmt_cost (loop_vinfo, target_cost_data, len, vector_stmt,
+ (void) add_stmt_cost (target_cost_data, len, vector_stmt,
NULL, NULL_TREE, 0, vect_prologue);
if (dump_enabled_p ())
dump_printf (MSG_NOTE,
@@ -3925,12 +3926,12 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo,
{
/* FIXME: Make cost depend on complexity of individual check. */
unsigned len = LOOP_VINFO_COMP_ALIAS_DDRS (loop_vinfo).length ();
- (void) add_stmt_cost (loop_vinfo, target_cost_data, len, vector_stmt,
+ (void) add_stmt_cost (target_cost_data, len, vector_stmt,
NULL, NULL_TREE, 0, vect_prologue);
len = LOOP_VINFO_CHECK_UNEQUAL_ADDRS (loop_vinfo).length ();
if (len)
/* Count LEN - 1 ANDs and LEN comparisons. */
- (void) add_stmt_cost (loop_vinfo, target_cost_data, len * 2 - 1,
+ (void) add_stmt_cost (target_cost_data, len * 2 - 1,
scalar_stmt, NULL, NULL_TREE, 0, vect_prologue);
len = LOOP_VINFO_LOWER_BOUNDS (loop_vinfo).length ();
if (len)
@@ -3941,7 +3942,7 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo,
for (unsigned int i = 0; i < len; ++i)
if (!LOOP_VINFO_LOWER_BOUNDS (loop_vinfo)[i].unsigned_p)
nstmts += 1;
- (void) add_stmt_cost (loop_vinfo, target_cost_data, nstmts,
+ (void) add_stmt_cost (target_cost_data, nstmts,
scalar_stmt, NULL, NULL_TREE, 0, vect_prologue);
}
if (dump_enabled_p ())
@@ -3954,7 +3955,7 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo,
if (LOOP_REQUIRES_VERSIONING_FOR_NITERS (loop_vinfo))
{
/* FIXME: Make cost depend on complexity of individual check. */
- (void) add_stmt_cost (loop_vinfo, target_cost_data, 1, vector_stmt,
+ (void) add_stmt_cost (target_cost_data, 1, vector_stmt,
NULL, NULL_TREE, 0, vect_prologue);
if (dump_enabled_p ())
dump_printf (MSG_NOTE,
@@ -3963,7 +3964,7 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo,
}
if (LOOP_REQUIRES_VERSIONING (loop_vinfo))
- (void) add_stmt_cost (loop_vinfo, target_cost_data, 1, cond_branch_taken,
+ (void) add_stmt_cost (target_cost_data, 1, cond_branch_taken,
NULL, NULL_TREE, 0, vect_prologue);
/* Count statements in scalar loop. Using this as scalar cost for a single
@@ -4051,7 +4052,7 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo,
if (peel_iters_prologue)
FOR_EACH_VEC_ELT (LOOP_VINFO_SCALAR_ITERATION_COST (loop_vinfo), j, si)
{
- (void) add_stmt_cost (loop_vinfo, target_cost_data,
+ (void) add_stmt_cost (target_cost_data,
si->count * peel_iters_prologue, si->kind,
si->stmt_info, si->vectype, si->misalign,
vect_prologue);
@@ -4061,7 +4062,7 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo,
if (peel_iters_epilogue)
FOR_EACH_VEC_ELT (LOOP_VINFO_SCALAR_ITERATION_COST (loop_vinfo), j, si)
{
- (void) add_stmt_cost (loop_vinfo, target_cost_data,
+ (void) add_stmt_cost (target_cost_data,
si->count * peel_iters_epilogue, si->kind,
si->stmt_info, si->vectype, si->misalign,
vect_epilogue);
@@ -4070,20 +4071,20 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo,
/* Add possible cond_branch_taken/cond_branch_not_taken cost. */
if (prologue_need_br_taken_cost)
- (void) add_stmt_cost (loop_vinfo, target_cost_data, 1, cond_branch_taken,
+ (void) add_stmt_cost (target_cost_data, 1, cond_branch_taken,
NULL, NULL_TREE, 0, vect_prologue);
if (prologue_need_br_not_taken_cost)
- (void) add_stmt_cost (loop_vinfo, target_cost_data, 1,
+ (void) add_stmt_cost (target_cost_data, 1,
cond_branch_not_taken, NULL, NULL_TREE, 0,
vect_prologue);
if (epilogue_need_br_taken_cost)
- (void) add_stmt_cost (loop_vinfo, target_cost_data, 1, cond_branch_taken,
+ (void) add_stmt_cost (target_cost_data, 1, cond_branch_taken,
NULL, NULL_TREE, 0, vect_epilogue);
if (epilogue_need_br_not_taken_cost)
- (void) add_stmt_cost (loop_vinfo, target_cost_data, 1,
+ (void) add_stmt_cost (target_cost_data, 1,
cond_branch_not_taken, NULL, NULL_TREE, 0,
vect_epilogue);
@@ -4111,9 +4112,9 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo,
simpler and safer to use the worst-case cost; if this ends up
being the tie-breaker between vectorizing or not, then it's
probably better not to vectorize. */
- (void) add_stmt_cost (loop_vinfo, target_cost_data, num_masks,
+ (void) add_stmt_cost (target_cost_data, num_masks,
vector_stmt, NULL, NULL_TREE, 0, vect_prologue);
- (void) add_stmt_cost (loop_vinfo, target_cost_data, num_masks - 1,
+ (void) add_stmt_cost (target_cost_data, num_masks - 1,
vector_stmt, NULL, NULL_TREE, 0, vect_body);
}
else if (LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo))
@@ -4163,9 +4164,9 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo,
body_stmts += 3 * num_vectors;
}
- (void) add_stmt_cost (loop_vinfo, target_cost_data, prologue_stmts,
+ (void) add_stmt_cost (target_cost_data, prologue_stmts,
scalar_stmt, NULL, NULL_TREE, 0, vect_prologue);
- (void) add_stmt_cost (loop_vinfo, target_cost_data, body_stmts,
+ (void) add_stmt_cost (target_cost_data, body_stmts,
scalar_stmt, NULL, NULL_TREE, 0, vect_body);
}