diff options
author | Andrew Pinski <andrew.pinski@oss.qualcomm.com> | 2025-08-12 23:31:15 -0700 |
---|---|---|
committer | Andrew Pinski <andrew.pinski@oss.qualcomm.com> | 2025-08-14 00:32:18 -0700 |
commit | df9635322ab8f1297f3774d38a59e13cee4ae79b (patch) | |
tree | 88b58bcb62b32e024b6df7d2832fe9b8771b8f1f | |
parent | ee67004474d521f7e107ec2795cfbb894a855c87 (diff) | |
download | gcc-master.zip gcc-master.tar.gz gcc-master.tar.bz2 |
https://gcc.gnu.org/pipermail/gcc-patches/2025-August/692091.html
pointed out:
'''
Oh, as we now do alias walks in forwprop maybe we should make this
conditional and do
this not for all pass instances, since it makes forwprop possibly a lot slower?
'''
This does patch limits the walk in a few different ways.
First only allow for a full walk in the first 2 forwprop (the one before inlining
and the one after inlining). The other 2 forwprop are less likely to find any extra
zero prop so limit them so there is no walk.
There is an exception to the rule though, allowing to skip over clobbers still since those
will not take a long time for the walk and from when looking at benchmarks the only place
where forwprop3/4 would cause a zero prop.
The other thing is limit a full walk only if flag_expensive_optimizations is true.
This limits the walk for -O1 since flag_expensive_optimizations is turned on at -O2+.
Bootstrapped and tested on x86_64-linux-gnu.
PR tree-optimization/121474
gcc/ChangeLog:
* passes.def: Update forwprop1/2 to have full_walk to be true.
* tree-ssa-forwprop.cc (optimize_aggr_zeroprop): Add new argument
full_walk. Take into account the full_walk and clobbers at the end
of the limit can be done always.
(simplify_builtin_call): Add new argument, full_walk.
Update call to optimize_aggr_zeroprop.
(pass_forwprop): Add m_full_walk field.
(pass_forwprop::set_pass_param): Update for m_full_walk.
(pass_forwprop::execute): Update call to simplify_builtin_call
and optimize_aggr_zeroprop.
Signed-off-by: Andrew Pinski <andrew.pinski@oss.qualcomm.com>
-rw-r--r-- | gcc/passes.def | 8 | ||||
-rw-r--r-- | gcc/tree-ssa-forwprop.cc | 42 |
2 files changed, 34 insertions, 16 deletions
diff --git a/gcc/passes.def b/gcc/passes.def index d528a04..68ce53b 100644 --- a/gcc/passes.def +++ b/gcc/passes.def @@ -83,7 +83,7 @@ along with GCC; see the file COPYING3. If not see NEXT_PASS (pass_ccp, false /* nonzero_p */); /* After CCP we rewrite no longer addressed locals into SSA form if possible. */ - NEXT_PASS (pass_forwprop, /*last=*/false); + NEXT_PASS (pass_forwprop, /*full_walk=*/true); NEXT_PASS (pass_early_thread_jumps, /*first=*/true); NEXT_PASS (pass_sra_early); /* pass_build_ealias is a dummy pass that ensures that we @@ -221,7 +221,7 @@ along with GCC; see the file COPYING3. If not see NEXT_PASS (pass_complete_unrolli); NEXT_PASS (pass_backprop); NEXT_PASS (pass_phiprop); - NEXT_PASS (pass_forwprop, /*last=*/false); + NEXT_PASS (pass_forwprop, /*full_walk=*/true); /* pass_build_alias is a dummy pass that ensures that we execute TODO_rebuild_alias at this point. */ NEXT_PASS (pass_build_alias); @@ -261,7 +261,7 @@ along with GCC; see the file COPYING3. If not see NEXT_PASS (pass_isolate_erroneous_paths); NEXT_PASS (pass_reassoc, true /* early_p */); NEXT_PASS (pass_dce); - NEXT_PASS (pass_forwprop, /*last=*/false); + NEXT_PASS (pass_forwprop); NEXT_PASS (pass_phiopt, false /* early_p */); NEXT_PASS (pass_ccp, true /* nonzero_p */); /* After CCP we rewrite no longer addressed locals into SSA @@ -363,7 +363,7 @@ along with GCC; see the file COPYING3. If not see NEXT_PASS (pass_dce, true /* update_address_taken_p */, true /* remove_unused_locals */); /* After late DCE we rewrite no longer addressed locals into SSA form if possible. */ - NEXT_PASS (pass_forwprop, /*last=*/true); + NEXT_PASS (pass_forwprop, /*full_walk=*/false, /*last=*/true); NEXT_PASS (pass_sink_code, true /* unsplit edges */); NEXT_PASS (pass_phiopt, false /* early_p */); NEXT_PASS (pass_fold_builtins); diff --git a/gcc/tree-ssa-forwprop.cc b/gcc/tree-ssa-forwprop.cc index ec4fbeb..ebf625f 100644 --- a/gcc/tree-ssa-forwprop.cc +++ b/gcc/tree-ssa-forwprop.cc @@ -1299,7 +1299,7 @@ optimize_aggr_zeroprop_1 (gimple *defstmt, gimple *stmt, and/or memcpy (&b, &a, sizeof (a)); instead of b = a; */ static bool -optimize_aggr_zeroprop (gimple_stmt_iterator *gsip) +optimize_aggr_zeroprop (gimple_stmt_iterator *gsip, bool full_walk) { ao_ref read; gimple *stmt = gsi_stmt (*gsip); @@ -1383,7 +1383,7 @@ optimize_aggr_zeroprop (gimple_stmt_iterator *gsip) /* Setup the worklist. */ auto_vec<std::pair<tree, unsigned>> worklist; - unsigned limit = param_sccvn_max_alias_queries_per_access; + unsigned limit = full_walk ? param_sccvn_max_alias_queries_per_access : 0; worklist.safe_push (std::make_pair (gimple_vdef (stmt), limit)); while (!worklist.is_empty ()) @@ -1400,13 +1400,17 @@ optimize_aggr_zeroprop (gimple_stmt_iterator *gsip) continue; /* If this statement does not clobber add the vdef stmt to the - worklist. */ - if (limit != 0 + worklist. + After hitting the limit, allow clobbers to able to pass through. */ + if ((limit != 0 || gimple_clobber_p (use_stmt)) && gimple_vdef (use_stmt) && !stmt_may_clobber_ref_p_1 (use_stmt, &read, /* tbaa_p = */ can_use_tbba)) - worklist.safe_push (std::make_pair (gimple_vdef (use_stmt), - limit - 1)); + { + unsigned new_limit = limit == 0 ? 0 : limit - 1; + worklist.safe_push (std::make_pair (gimple_vdef (use_stmt), + new_limit)); + } if (optimize_aggr_zeroprop_1 (stmt, use_stmt, dest_base, offset, val, wi::to_poly_offset (len))) @@ -1591,7 +1595,7 @@ optimize_agr_copyprop (gimple_stmt_iterator *gsip) to __atomic_fetch_op (p, x, y) when possible (also __sync). */ static bool -simplify_builtin_call (gimple_stmt_iterator *gsi_p, tree callee2) +simplify_builtin_call (gimple_stmt_iterator *gsi_p, tree callee2, bool full_walk) { gimple *stmt1, *stmt2 = gsi_stmt (*gsi_p); enum built_in_function other_atomic = END_BUILTINS; @@ -1670,7 +1674,7 @@ simplify_builtin_call (gimple_stmt_iterator *gsi_p, tree callee2) { /* Try to prop the zeroing/value of the memset to memcpy if the dest is an address and the value is a constant. */ - if (optimize_aggr_zeroprop (gsi_p)) + if (optimize_aggr_zeroprop (gsi_p, full_walk)) return true; } if (gimple_call_num_args (stmt2) != 3 @@ -4460,8 +4464,17 @@ public: opt_pass * clone () final override { return new pass_forwprop (m_ctxt); } void set_pass_param (unsigned int n, bool param) final override { - gcc_assert (n == 0); - last_p = param; + switch (n) + { + case 0: + m_full_walk = param; + break; + case 1: + last_p = param; + break; + default: + gcc_unreachable(); + } } bool gate (function *) final override { return flag_tree_forwprop; } unsigned int execute (function *) final override; @@ -4469,12 +4482,17 @@ public: private: /* Determines whether the pass instance should set PROP_last_full_fold. */ bool last_p; + + /* True if the aggregate props are doing a full walk or not. */ + bool m_full_walk = false; }; // class pass_forwprop unsigned int pass_forwprop::execute (function *fun) { unsigned int todoflags = 0; + /* Handle a full walk only when expensive optimizations are on. */ + bool full_walk = m_full_walk && flag_expensive_optimizations; cfg_changed = false; if (last_p) @@ -4991,7 +5009,7 @@ pass_forwprop::execute (function *fun) { tree rhs1 = gimple_assign_rhs1 (stmt); enum tree_code code = gimple_assign_rhs_code (stmt); - if (gimple_store_p (stmt) && optimize_aggr_zeroprop (&gsi)) + if (gimple_store_p (stmt) && optimize_aggr_zeroprop (&gsi, full_walk)) { changed = true; break; @@ -5051,7 +5069,7 @@ pass_forwprop::execute (function *fun) tree callee = gimple_call_fndecl (stmt); if (callee != NULL_TREE && fndecl_built_in_p (callee, BUILT_IN_NORMAL)) - changed |= simplify_builtin_call (&gsi, callee); + changed |= simplify_builtin_call (&gsi, callee, full_walk); break; } |