diff options
author | Richard Sandiford <richard.sandiford@arm.com> | 2021-11-12 17:33:03 +0000 |
---|---|---|
committer | Richard Sandiford <richard.sandiford@arm.com> | 2021-11-12 17:33:03 +0000 |
commit | 87fcff96db69986cfa8fd6ec4ad8bbbc0c6ca96f (patch) | |
tree | 7fdd5cd646b348b2a8bf887c5db87e3186c13777 /gcc | |
parent | c6c5c5ebaee4e7aa99289ae63cabb2d05d9aee00 (diff) | |
download | gcc-87fcff96db69986cfa8fd6ec4ad8bbbc0c6ca96f.zip gcc-87fcff96db69986cfa8fd6ec4ad8bbbc0c6ca96f.tar.gz gcc-87fcff96db69986cfa8fd6ec4ad8bbbc0c6ca96f.tar.bz2 |
aarch64: Remove redundant costing code
Previous patches made some of the complex parts of the issue rate
code redundant.
gcc/
* config/aarch64/aarch64.c (aarch64_vector_op::n_advsimd_ops): Delete.
(aarch64_vector_op::m_seen_loads): Likewise.
(aarch64_vector_costs::aarch64_vector_costs): Don't push to
m_advsimd_ops.
(aarch64_vector_op::count_ops): Remove vectype and factor parameters.
Remove code that tries to predict different vec_flags from the
current loop's.
(aarch64_vector_costs::add_stmt_cost): Update accordingly.
Remove m_advsimd_ops handling.
Diffstat (limited to 'gcc')
-rw-r--r-- | gcc/config/aarch64/aarch64.c | 142 |
1 files changed, 30 insertions, 112 deletions
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c index 1e2f3bf..d8410fc 100644 --- a/gcc/config/aarch64/aarch64.c +++ b/gcc/config/aarch64/aarch64.c @@ -14914,8 +14914,8 @@ public: private: void record_potential_advsimd_unrolling (loop_vec_info); void analyze_loop_vinfo (loop_vec_info); - void count_ops (unsigned int, vect_cost_for_stmt, stmt_vec_info, tree, - aarch64_vec_op_count *, unsigned int); + void count_ops (unsigned int, vect_cost_for_stmt, stmt_vec_info, + aarch64_vec_op_count *); fractional_cost adjust_body_cost_sve (const aarch64_vec_op_count *, fractional_cost, unsigned int, unsigned int *, bool *); @@ -14959,16 +14959,6 @@ private: or vector loop. There is one entry for each tuning option of interest. */ auto_vec<aarch64_vec_op_count, 2> m_ops; - - /* Used only when vectorizing loops for SVE. For the first element of M_OPS, - it estimates what the equivalent Advanced SIMD-only code would need - in order to perform the same work as one iteration of the SVE loop. */ - auto_vec<aarch64_vec_op_count, 1> m_advsimd_ops; - - /* Used to detect cases in which we end up costing the same load twice, - once to account for results that are actually used and once to account - for unused results. */ - hash_map<nofree_ptr_hash<_stmt_vec_info>, unsigned int> m_seen_loads; }; aarch64_vector_costs::aarch64_vector_costs (vec_info *vinfo, @@ -14980,8 +14970,6 @@ aarch64_vector_costs::aarch64_vector_costs (vec_info *vinfo, if (auto *issue_info = aarch64_tune_params.vec_costs->issue_info) { m_ops.quick_push ({ issue_info, m_vec_flags }); - if (m_vec_flags & VEC_ANY_SVE) - m_advsimd_ops.quick_push ({ issue_info, VEC_ADVSIMD }); if (aarch64_tune_params.vec_costs == &neoverse512tvb_vector_cost) { unsigned int vf_factor = (m_vec_flags & VEC_ANY_SVE) ? 2 : 1; @@ -15620,26 +15608,19 @@ aarch64_adjust_stmt_cost (vect_cost_for_stmt kind, stmt_vec_info stmt_info, return stmt_cost; } -/* COUNT, KIND, STMT_INFO and VECTYPE are the same as for - vector_costs::add_stmt_cost and they describe an operation in the - body of a vector loop. Record issue information relating to the vector - operation in OPS, where OPS is one of m_ops or m_advsimd_ops; see the - comments above those variables for details. - - FACTOR says how many iterations of the loop described by VEC_FLAGS would be - needed to match one iteration of the vector loop in VINFO. */ +/* COUNT, KIND and STMT_INFO are the same as for vector_costs::add_stmt_cost + and they describe an operation in the body of a vector loop. Record issue + information relating to the vector operation in OPS. */ void aarch64_vector_costs::count_ops (unsigned int count, vect_cost_for_stmt kind, - stmt_vec_info stmt_info, tree vectype, - aarch64_vec_op_count *ops, - unsigned int factor) + stmt_vec_info stmt_info, + aarch64_vec_op_count *ops) { const aarch64_base_vec_issue_info *base_issue = ops->base_issue_info (); if (!base_issue) return; const aarch64_simd_vec_issue_info *simd_issue = ops->simd_issue_info (); const aarch64_sve_vec_issue_info *sve_issue = ops->sve_issue_info (); - unsigned int vec_flags = ops->vec_flags (); /* Calculate the minimum cycles per iteration imposed by a reduction operation. */ @@ -15647,46 +15628,17 @@ aarch64_vector_costs::count_ops (unsigned int count, vect_cost_for_stmt kind, && vect_is_reduction (stmt_info)) { unsigned int base - = aarch64_in_loop_reduction_latency (m_vinfo, stmt_info, vec_flags); - if (vect_reduc_type (m_vinfo, stmt_info) == FOLD_LEFT_REDUCTION) - { - if (vectype && aarch64_sve_mode_p (TYPE_MODE (vectype))) - { - /* When costing an SVE FADDA, the vectorizer treats vec_to_scalar - as a single operation, whereas for Advanced SIMD it is a - per-element one. Increase the factor accordingly, both for - the reduction_latency calculation and for the op couting. */ - if (vec_flags & VEC_ADVSIMD) - factor = vect_nunits_for_cost (vectype); - } - else - /* An Advanced SIMD fold-left reduction is the same as a - scalar one and the vectorizer therefore treats vec_to_scalar - as a per-element cost. There is no extra factor to apply for - scalar code, either for reduction_latency or for the op - counting below. */ - factor = 1; - } + = aarch64_in_loop_reduction_latency (m_vinfo, stmt_info, m_vec_flags); - /* ??? Ideally for vector code we'd do COUNT * FACTOR reductions in - parallel, but unfortunately that's not yet the case. */ - ops->reduction_latency = MAX (ops->reduction_latency, - base * count * factor); + /* ??? Ideally we'd do COUNT reductions in parallel, but unfortunately + that's not yet the case. */ + ops->reduction_latency = MAX (ops->reduction_latency, base * count); } /* Assume that multiply-adds will become a single operation. */ - if (stmt_info && aarch64_multiply_add_p (m_vinfo, stmt_info, vec_flags)) + if (stmt_info && aarch64_multiply_add_p (m_vinfo, stmt_info, m_vec_flags)) return; - /* When costing scalar statements in vector code, the count already - includes the number of scalar elements in the vector, so we don't - need to apply the factor as well. */ - if (kind == scalar_load || kind == scalar_store || kind == scalar_stmt) - factor = 1; - - /* This can go negative with the load handling below. */ - int num_copies = count * factor; - /* Count the basic operation cost associated with KIND. */ switch (kind) { @@ -15702,65 +15654,38 @@ aarch64_vector_costs::count_ops (unsigned int count, vect_cost_for_stmt kind, case vec_construct: case vec_to_scalar: case scalar_to_vec: - /* Assume that these operations have no overhead in the original - scalar code. */ - if (!vec_flags) - break; - /* Fallthrough. */ case vector_stmt: case scalar_stmt: - ops->general_ops += num_copies; + ops->general_ops += count; break; case scalar_load: case vector_load: case unaligned_load: - /* When costing scalars, detect cases in which we are called twice for - the same load. This happens for LD[234] operations if only some of - the results are used. The first time represents the cost of loading - the unused vectors, while the second time represents the cost of - loading the useful parts. Only the latter should count towards the - scalar costs. */ - if (stmt_info && !vec_flags) - { - bool existed = false; - unsigned int &prev_count - = m_seen_loads.get_or_insert (stmt_info, &existed); - if (existed) - num_copies -= prev_count; - else - prev_count = num_copies; - } - ops->loads += num_copies; - if (vec_flags || FLOAT_TYPE_P (aarch64_dr_type (stmt_info))) - ops->general_ops += base_issue->fp_simd_load_general_ops * num_copies; + ops->loads += count; + if (m_vec_flags || FLOAT_TYPE_P (aarch64_dr_type (stmt_info))) + ops->general_ops += base_issue->fp_simd_load_general_ops * count; break; case vector_store: case unaligned_store: case scalar_store: - ops->stores += num_copies; - if (vec_flags || FLOAT_TYPE_P (aarch64_dr_type (stmt_info))) - ops->general_ops += base_issue->fp_simd_store_general_ops * num_copies; + ops->stores += count; + if (m_vec_flags || FLOAT_TYPE_P (aarch64_dr_type (stmt_info))) + ops->general_ops += base_issue->fp_simd_store_general_ops * count; break; } /* Add any embedded comparison operations. */ if ((kind == scalar_stmt || kind == vector_stmt || kind == vec_to_scalar) && vect_embedded_comparison_type (stmt_info)) - ops->general_ops += num_copies; + ops->general_ops += count; - /* Detect COND_REDUCTIONs and things that would need to become - COND_REDUCTIONs if they were implemented using Advanced SIMD. - There are then two sets of VEC_COND_EXPRs, whereas so far we + /* COND_REDUCTIONS need two sets of VEC_COND_EXPRs, whereas so far we have only accounted for one. */ - if (vec_flags && (kind == vector_stmt || kind == vec_to_scalar)) - { - int reduc_type = vect_reduc_type (m_vinfo, stmt_info); - if ((reduc_type == EXTRACT_LAST_REDUCTION && (vec_flags & VEC_ADVSIMD)) - || reduc_type == COND_REDUCTION) - ops->general_ops += num_copies; - } + if ((kind == vector_stmt || kind == vec_to_scalar) + && vect_reduc_type (m_vinfo, stmt_info) == COND_REDUCTION) + ops->general_ops += count; /* Count the predicate operations needed by an SVE comparison. */ if (sve_issue && (kind == vector_stmt || kind == vec_to_scalar)) @@ -15769,7 +15694,7 @@ aarch64_vector_costs::count_ops (unsigned int count, vect_cost_for_stmt kind, unsigned int base = (FLOAT_TYPE_P (type) ? sve_issue->fp_cmp_pred_ops : sve_issue->int_cmp_pred_ops); - ops->pred_ops += base * num_copies; + ops->pred_ops += base * count; } /* Add any extra overhead associated with LD[234] and ST[234] operations. */ @@ -15777,15 +15702,15 @@ aarch64_vector_costs::count_ops (unsigned int count, vect_cost_for_stmt kind, switch (aarch64_ld234_st234_vectors (kind, stmt_info)) { case 2: - ops->general_ops += simd_issue->ld2_st2_general_ops * num_copies; + ops->general_ops += simd_issue->ld2_st2_general_ops * count; break; case 3: - ops->general_ops += simd_issue->ld3_st3_general_ops * num_copies; + ops->general_ops += simd_issue->ld3_st3_general_ops * count; break; case 4: - ops->general_ops += simd_issue->ld4_st4_general_ops * num_copies; + ops->general_ops += simd_issue->ld4_st4_general_ops * count; break; } @@ -15861,15 +15786,8 @@ aarch64_vector_costs::add_stmt_cost (int count, vect_cost_for_stmt kind, && (m_costing_for_scalar || where == vect_body) && (!LOOP_VINFO_LOOP (loop_vinfo)->inner || in_inner_loop_p) && stmt_cost != 0) - { - for (auto &ops : m_ops) - count_ops (count, kind, stmt_info, vectype, &ops, 1); - for (auto &ops : m_advsimd_ops) - /* Record estimates for a possible Advanced SIMD version - of the SVE code. */ - count_ops (count, kind, stmt_info, vectype, &ops, - aarch64_estimated_sve_vq ()); - } + for (auto &ops : m_ops) + count_ops (count, kind, stmt_info, &ops); /* If we're applying the SVE vs. Advanced SIMD unrolling heuristic, estimate the number of statements in the unrolled Advanced SIMD |