aboutsummaryrefslogtreecommitdiff
path: root/gcc
diff options
context:
space:
mode:
authorRichard Sandiford <richard.sandiford@arm.com>2021-11-12 17:33:02 +0000
committerRichard Sandiford <richard.sandiford@arm.com>2021-11-12 17:33:02 +0000
commit1a5288fe3dcaa1eb5398ed28b0765f8dad9a1b2a (patch)
tree12491f79216fcfd7b463467d9325fd73009b0749 /gcc
parent6756706ea636d6f9aab85cef22659cc35143476f (diff)
downloadgcc-1a5288fe3dcaa1eb5398ed28b0765f8dad9a1b2a.zip
gcc-1a5288fe3dcaa1eb5398ed28b0765f8dad9a1b2a.tar.gz
gcc-1a5288fe3dcaa1eb5398ed28b0765f8dad9a1b2a.tar.bz2
aarch64: Use an array of aarch64_vec_op_counts
-mtune=neoverse-512tvb uses two issue rates, one for Neoverse V1 and one with more generic parameters. We use both rates when making a choice between scalar, Advanced SIMD and SVE code. Previously we calculated the Neoverse V1 issue rates from the more generic issue rates, but by removing m_scalar_ops and (later) m_advsimd_ops, it becomes easier to track multiple issue rates directly. This patch therefore converts m_ops and (temporarily) m_advsimd_ops into arrays. gcc/ * config/aarch64/aarch64.c (aarch64_vec_op_count): Allow default initialization. (aarch64_vec_op_count::base_issue_info): Remove handling of null issue_infos. (aarch64_vec_op_count::simd_issue_info): Likewise. (aarch64_vec_op_count::sve_issue_info): Likewise. (aarch64_vector_costs::m_ops): Turn into a vector. (aarch64_vector_costs::m_advsimd_ops): Likewise. (aarch64_vector_costs::aarch64_vector_costs): Add entries to the vectors based on aarch64_tune_params. (aarch64_vector_costs::analyze_loop_vinfo): Update the pred_ops of all entries in m_ops. (aarch64_vector_costs::add_stmt_cost): Call count_ops for all entries in m_ops. (aarch64_estimate_min_cycles_per_iter): Remove issue_info parameter and get the information from the ops instead. (aarch64_vector_costs::adjust_body_cost_sve): Take a aarch64_vec_issue_info instead of a aarch64_vec_op_count. (aarch64_vector_costs::adjust_body_cost): Update call accordingly. Exit earlier if m_ops is empty for either cost structure.
Diffstat (limited to 'gcc')
-rw-r--r--gcc/config/aarch64/aarch64.c115
1 files changed, 60 insertions, 55 deletions
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index 3944c09..71c44d6 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -14710,6 +14710,7 @@ aarch64_first_cycle_multipass_dfa_lookahead_guard (rtx_insn *insn,
class aarch64_vec_op_count
{
public:
+ aarch64_vec_op_count () = default;
aarch64_vec_op_count (const aarch64_vec_issue_info *, unsigned int);
unsigned int vec_flags () const { return m_vec_flags; }
@@ -14739,14 +14740,14 @@ public:
private:
/* The issue information for the core. */
- const aarch64_vec_issue_info *m_issue_info;
+ const aarch64_vec_issue_info *m_issue_info = nullptr;
/* - If M_VEC_FLAGS is zero then this structure describes scalar code
- If M_VEC_FLAGS & VEC_ADVSIMD is nonzero then this structure describes
Advanced SIMD code.
- If M_VEC_FLAGS & VEC_ANY_SVE is nonzero then this structure describes
SVE code. */
- unsigned int m_vec_flags;
+ unsigned int m_vec_flags = 0;
};
aarch64_vec_op_count::
@@ -14765,9 +14766,7 @@ aarch64_vec_op_count::base_issue_info () const
{
if (auto *ret = simd_issue_info ())
return ret;
- if (m_issue_info)
- return m_issue_info->scalar;
- return nullptr;
+ return m_issue_info->scalar;
}
/* If the structure describes vector code and we have associated issue
@@ -14777,7 +14776,7 @@ aarch64_vec_op_count::simd_issue_info () const
{
if (auto *ret = sve_issue_info ())
return ret;
- if (m_issue_info && m_vec_flags)
+ if (m_vec_flags)
return m_issue_info->advsimd;
return nullptr;
}
@@ -14787,7 +14786,7 @@ aarch64_vec_op_count::simd_issue_info () const
const aarch64_sve_vec_issue_info *
aarch64_vec_op_count::sve_issue_info () const
{
- if (m_issue_info && (m_vec_flags & VEC_ANY_SVE))
+ if (m_vec_flags & VEC_ANY_SVE)
return m_issue_info->sve;
return nullptr;
}
@@ -14809,7 +14808,7 @@ private:
void analyze_loop_vinfo (loop_vec_info);
void count_ops (unsigned int, vect_cost_for_stmt, stmt_vec_info, tree,
aarch64_vec_op_count *, unsigned int);
- fractional_cost adjust_body_cost_sve (const aarch64_vec_issue_info *,
+ fractional_cost adjust_body_cost_sve (const aarch64_vec_op_count *,
fractional_cost, fractional_cost,
bool, unsigned int, unsigned int *,
bool *);
@@ -14853,13 +14852,14 @@ private:
/* Used only when vectorizing loops. Estimates the number and kind of
operations that would be needed by one iteration of the scalar
- or vector loop. */
- aarch64_vec_op_count m_ops;
+ or vector loop. There is one entry for each tuning option of
+ interest. */
+ auto_vec<aarch64_vec_op_count, 2> m_ops;
- /* Used only when vectorizing loops for SVE. It estimates what the
- equivalent Advanced SIMD-only code would need in order to perform
- the same work as one iteration of the SVE loop. */
- aarch64_vec_op_count m_advsimd_ops;
+ /* Used only when vectorizing loops for SVE. For the first element of M_OPS,
+ it estimates what the equivalent Advanced SIMD-only code would need
+ in order to perform the same work as one iteration of the SVE loop. */
+ auto_vec<aarch64_vec_op_count, 1> m_advsimd_ops;
/* Used to detect cases in which we end up costing the same load twice,
once to account for results that are actually used and once to account
@@ -14871,10 +14871,16 @@ aarch64_vector_costs::aarch64_vector_costs (vec_info *vinfo,
bool costing_for_scalar)
: vector_costs (vinfo, costing_for_scalar),
m_vec_flags (costing_for_scalar ? 0
- : aarch64_classify_vector_mode (vinfo->vector_mode)),
- m_ops (aarch64_tune_params.vec_costs->issue_info, m_vec_flags),
- m_advsimd_ops (aarch64_tune_params.vec_costs->issue_info, VEC_ADVSIMD)
+ : aarch64_classify_vector_mode (vinfo->vector_mode))
{
+ if (auto *issue_info = aarch64_tune_params.vec_costs->issue_info)
+ {
+ m_ops.quick_push ({ issue_info, m_vec_flags });
+ if (m_vec_flags & VEC_ANY_SVE)
+ m_advsimd_ops.quick_push ({ issue_info, VEC_ADVSIMD });
+ if (aarch64_tune_params.vec_costs == &neoverse512tvb_vector_cost)
+ m_ops.quick_push ({ &neoversev1_vec_issue_info, m_vec_flags });
+ }
}
/* Implement TARGET_VECTORIZE_CREATE_COSTS. */
@@ -15001,10 +15007,7 @@ aarch64_vector_costs::analyze_loop_vinfo (loop_vec_info loop_vinfo)
/* Record the issue information for any SVE WHILE instructions that the
loop needs. */
- auto *issue_info = aarch64_tune_params.vec_costs->issue_info;
- if (issue_info
- && issue_info->sve
- && !LOOP_VINFO_MASKS (loop_vinfo).is_empty ())
+ if (!m_ops.is_empty () && !LOOP_VINFO_MASKS (loop_vinfo).is_empty ())
{
unsigned int num_masks = 0;
rgroup_controls *rgm;
@@ -15012,7 +15015,9 @@ aarch64_vector_costs::analyze_loop_vinfo (loop_vec_info loop_vinfo)
FOR_EACH_VEC_ELT (LOOP_VINFO_MASKS (loop_vinfo), num_vectors_m1, rgm)
if (rgm->type)
num_masks += num_vectors_m1 + 1;
- m_ops.pred_ops += num_masks * issue_info->sve->while_pred_ops;
+ for (auto &ops : m_ops)
+ if (auto *issue = ops.sve_issue_info ())
+ ops.pred_ops += num_masks * issue->while_pred_ops;
}
}
@@ -15788,12 +15793,13 @@ aarch64_vector_costs::add_stmt_cost (int count, vect_cost_for_stmt kind,
&& (!LOOP_VINFO_LOOP (loop_vinfo)->inner || in_inner_loop_p)
&& stmt_cost != 0)
{
- count_ops (count, kind, stmt_info, vectype, &m_ops, 1);
- if (aarch64_sve_mode_p (m_vinfo->vector_mode))
+ for (auto &ops : m_ops)
+ count_ops (count, kind, stmt_info, vectype, &ops, 1);
+ for (auto &ops : m_advsimd_ops)
/* Record estimates for a possible Advanced SIMD version
of the SVE code. */
- count_ops (count, kind, stmt_info, vectype,
- &m_advsimd_ops, aarch64_estimated_sve_vq ());
+ count_ops (count, kind, stmt_info, vectype, &ops,
+ aarch64_estimated_sve_vq ());
}
/* If we're applying the SVE vs. Advanced SIMD unrolling heuristic,
@@ -15824,13 +15830,12 @@ aarch64_vec_op_count::dump () const
" reduction latency = %d\n", reduction_latency);
}
-/* Use ISSUE_INFO to estimate the minimum number of cycles needed to issue
- the operations described by OPS. This is a very simplistic model! */
+/* Estimate the minimum number of cycles needed to issue the operations
+ described by OPS. This is a very simplistic model! */
static fractional_cost
-aarch64_estimate_min_cycles_per_iter
- (const aarch64_vec_op_count *ops,
- const aarch64_base_vec_issue_info *issue_info)
+aarch64_estimate_min_cycles_per_iter (const aarch64_vec_op_count *ops)
{
+ auto *issue_info = ops->base_issue_info ();
fractional_cost cycles = MAX (ops->reduction_latency, 1);
cycles = std::max (cycles, { ops->stores, issue_info->stores_per_cycle });
cycles = std::max (cycles, { ops->loads + ops->stores,
@@ -15852,16 +15857,18 @@ aarch64_estimate_min_cycles_per_iter
fractional_cost
aarch64_vector_costs::
-adjust_body_cost_sve (const aarch64_vec_issue_info *issue_info,
+adjust_body_cost_sve (const aarch64_vec_op_count *ops,
fractional_cost scalar_cycles_per_iter,
fractional_cost advsimd_cycles_per_iter,
bool could_use_advsimd, unsigned int orig_body_cost,
unsigned int *body_cost, bool *should_disparage)
{
+ auto *issue_info = ops->sve_issue_info ();
+
/* Estimate the minimum number of cycles per iteration needed to issue
non-predicate operations. */
fractional_cost sve_nonpred_issue_cycles_per_iter
- = aarch64_estimate_min_cycles_per_iter (&m_ops, issue_info->sve);
+ = aarch64_estimate_min_cycles_per_iter (ops);
/* Estimate the minimum number of cycles per iteration needed to rename
SVE instructions.
@@ -15869,7 +15876,7 @@ adjust_body_cost_sve (const aarch64_vec_issue_info *issue_info,
??? For now this is done inline rather than via cost tables, since it
isn't clear how it should be parameterized for the general case. */
fractional_cost sve_rename_cycles_per_iter = 0;
- if (issue_info == &neoverse512tvb_vec_issue_info)
+ if (issue_info == &neoverse512tvb_sve_issue_info)
/* + 1 for an addition. We've already counted a general op for each
store, so we don't need to account for stores separately. The branch
reads no registers and so does not need to be counted either.
@@ -15877,7 +15884,7 @@ adjust_body_cost_sve (const aarch64_vec_issue_info *issue_info,
??? This value is very much on the pessimistic side, but seems to work
pretty well in practice. */
sve_rename_cycles_per_iter
- = { m_ops.general_ops + m_ops.loads + m_ops.pred_ops + 1, 5 };
+ = { ops->general_ops + ops->loads + ops->pred_ops + 1, 5 };
/* Combine the rename and non-predicate issue limits into a single value. */
fractional_cost sve_nonpred_cycles_per_iter
@@ -15886,7 +15893,7 @@ adjust_body_cost_sve (const aarch64_vec_issue_info *issue_info,
/* Separately estimate the minimum number of cycles per iteration needed
to issue the predicate operations. */
fractional_cost sve_pred_issue_cycles_per_iter
- = { m_ops.pred_ops, issue_info->sve->pred_ops_per_cycle };
+ = { ops->pred_ops, issue_info->pred_ops_per_cycle };
/* Calculate the overall limit on the number of cycles per iteration. */
fractional_cost sve_cycles_per_iter
@@ -15894,15 +15901,15 @@ adjust_body_cost_sve (const aarch64_vec_issue_info *issue_info,
if (dump_enabled_p ())
{
- m_ops.dump ();
+ ops->dump ();
dump_printf_loc (MSG_NOTE, vect_location,
" estimated cycles per iteration = %f\n",
sve_cycles_per_iter.as_double ());
- if (m_ops.pred_ops)
+ if (ops->pred_ops)
dump_printf_loc (MSG_NOTE, vect_location,
" predicate issue = %f\n",
sve_pred_issue_cycles_per_iter.as_double ());
- if (m_ops.pred_ops || sve_rename_cycles_per_iter)
+ if (ops->pred_ops || sve_rename_cycles_per_iter)
dump_printf_loc (MSG_NOTE, vect_location,
" non-predicate issue = %f\n",
sve_nonpred_issue_cycles_per_iter.as_double ());
@@ -15987,7 +15994,11 @@ adjust_body_cost (loop_vec_info loop_vinfo,
const aarch64_vector_costs *scalar_costs,
unsigned int body_cost)
{
- const auto &scalar_ops = scalar_costs->m_ops;
+ if (scalar_costs->m_ops.is_empty () || m_ops.is_empty ())
+ return body_cost;
+
+ const auto &scalar_ops = scalar_costs->m_ops[0];
+ const auto &vector_ops = m_ops[0];
unsigned int estimated_vf = vect_vf_for_cost (loop_vinfo);
unsigned int orig_body_cost = body_cost;
bool should_disparage = false;
@@ -16030,16 +16041,12 @@ adjust_body_cost (loop_vec_info loop_vinfo,
}
}
- auto *issue_info = aarch64_tune_params.vec_costs->issue_info;
- if (!issue_info)
- return body_cost;
-
fractional_cost scalar_cycles_per_iter
- = aarch64_estimate_min_cycles_per_iter (&scalar_ops, issue_info->scalar);
+ = aarch64_estimate_min_cycles_per_iter (&scalar_ops);
scalar_cycles_per_iter *= estimated_vf;
fractional_cost vector_cycles_per_iter
- = aarch64_estimate_min_cycles_per_iter (&m_ops, m_ops.base_issue_info ());
+ = aarch64_estimate_min_cycles_per_iter (&vector_ops);
if (dump_enabled_p ())
{
@@ -16055,7 +16062,7 @@ adjust_body_cost (loop_vec_info loop_vinfo,
estimated_vf, scalar_cycles_per_iter.as_double ());
}
- if ((m_vec_flags & VEC_ANY_SVE) && issue_info->sve)
+ if (vector_ops.sve_issue_info ())
{
bool could_use_advsimd
= (aarch64_autovec_preference != 2
@@ -16064,15 +16071,14 @@ adjust_body_cost (loop_vec_info loop_vinfo,
&& !m_saw_sve_only_op);
fractional_cost advsimd_cycles_per_iter
- = aarch64_estimate_min_cycles_per_iter (&m_advsimd_ops,
- issue_info->advsimd);
+ = aarch64_estimate_min_cycles_per_iter (&m_advsimd_ops[0]);
if (dump_enabled_p ())
{
if (could_use_advsimd)
{
dump_printf_loc (MSG_NOTE, vect_location,
"Advanced SIMD issue estimate:\n");
- m_advsimd_ops.dump ();
+ m_advsimd_ops[0].dump ();
dump_printf_loc (MSG_NOTE, vect_location,
" estimated cycles per iteration = %f\n",
advsimd_cycles_per_iter.as_double ());
@@ -16083,7 +16089,7 @@ adjust_body_cost (loop_vec_info loop_vinfo,
dump_printf_loc (MSG_NOTE, vect_location, "SVE issue estimate:\n");
}
vector_cycles_per_iter
- = adjust_body_cost_sve (issue_info, scalar_cycles_per_iter,
+ = adjust_body_cost_sve (&vector_ops, scalar_cycles_per_iter,
advsimd_cycles_per_iter, could_use_advsimd,
orig_body_cost, &body_cost, &should_disparage);
@@ -16095,8 +16101,7 @@ adjust_body_cost (loop_vec_info loop_vinfo,
if (dump_enabled_p ())
dump_printf_loc (MSG_NOTE, vect_location,
"Neoverse V1 estimate:\n");
- adjust_body_cost_sve (&neoversev1_vec_issue_info,
- scalar_cycles_per_iter * 2,
+ adjust_body_cost_sve (&m_ops[1], scalar_cycles_per_iter * 2,
advsimd_cycles_per_iter * 2,
could_use_advsimd, orig_body_cost,
&body_cost, &should_disparage);
@@ -16108,7 +16113,7 @@ adjust_body_cost (loop_vec_info loop_vinfo,
{
dump_printf_loc (MSG_NOTE, vect_location,
"Vector issue estimate:\n");
- m_ops.dump ();
+ vector_ops.dump ();
dump_printf_loc (MSG_NOTE, vect_location,
" estimated cycles per iteration = %f\n",
vector_cycles_per_iter.as_double ());
@@ -16155,7 +16160,7 @@ adjust_body_cost (loop_vec_info loop_vinfo,
vector code is an improvement, even if adding the other (non-loop-carried)
latencies tends to hide this saving. We therefore reduce the cost of the
vector loop body in proportion to the saving. */
- else if (scalar_ops.reduction_latency > m_ops.reduction_latency
+ else if (scalar_ops.reduction_latency > vector_ops.reduction_latency
&& scalar_ops.reduction_latency == scalar_cycles_per_iter
&& scalar_cycles_per_iter > vector_cycles_per_iter
&& !should_disparage)