From 6fc2f9337311c11dabcc464c808cbef205f17a52 Mon Sep 17 00:00:00 2001 From: Andrew Pinski Date: Tue, 21 Jan 2020 08:34:42 +0000 Subject: Change recursive prepare_block_for_update to use a worklist Reported as PR 93321, prepare_block_for_update with some huge recusive inlining can go past the stack limit. Transforming this recursive into worklist improves the stack usage here and we no longer seg fault for the testcase. Note the order we walk the siblings change. ChangeLog: PR tree-opt/93321 * tree-into-ssa.c (prepare_block_for_update_1): Split out from ... (prepare_block_for_update): This. Use a worklist instead of recursing. --- gcc/tree-into-ssa.c | 59 ++++++++++++++++++++++++++++++++++++++++++----------- 1 file changed, 47 insertions(+), 12 deletions(-) (limited to 'gcc/tree-into-ssa.c') diff --git a/gcc/tree-into-ssa.c b/gcc/tree-into-ssa.c index c27bf2c..6528aca 100644 --- a/gcc/tree-into-ssa.c +++ b/gcc/tree-into-ssa.c @@ -2593,11 +2593,9 @@ mark_use_interesting (tree var, gimple *stmt, basic_block bb, } } - -/* Do a dominator walk starting at BB processing statements that - reference symbols in SSA operands. This is very similar to - mark_def_sites, but the scan handles statements whose operands may - already be SSA names. +/* Processing statements in BB that reference symbols in SSA operands. + This is very similar to mark_def_sites, but the scan handles + statements whose operands may already be SSA names. If INSERT_PHI_P is true, mark those uses as live in the corresponding block. This is later used by the PHI placement @@ -2610,9 +2608,8 @@ mark_use_interesting (tree var, gimple *stmt, basic_block bb, that. */ static void -prepare_block_for_update (basic_block bb, bool insert_phi_p) +prepare_block_for_update_1 (basic_block bb, bool insert_phi_p) { - basic_block son; edge e; edge_iterator ei; @@ -2694,13 +2691,51 @@ prepare_block_for_update (basic_block bb, bool insert_phi_p) } } - /* Now visit all the blocks dominated by BB. */ - for (son = first_dom_son (CDI_DOMINATORS, bb); - son; - son = next_dom_son (CDI_DOMINATORS, son)) - prepare_block_for_update (son, insert_phi_p); } +/* Do a dominator walk starting at BB processing statements that + reference symbols in SSA operands. This is very similar to + mark_def_sites, but the scan handles statements whose operands may + already be SSA names. + + If INSERT_PHI_P is true, mark those uses as live in the + corresponding block. This is later used by the PHI placement + algorithm to make PHI pruning decisions. + + FIXME. Most of this would be unnecessary if we could associate a + symbol to all the SSA names that reference it. But that + sounds like it would be expensive to maintain. Still, it + would be interesting to see if it makes better sense to do + that. */ +static void +prepare_block_for_update (basic_block bb, bool insert_phi_p) +{ + size_t sp = 0; + basic_block *worklist; + + /* Allocate the worklist. */ + worklist = XNEWVEC (basic_block, n_basic_blocks_for_fn (cfun)); + /* Add the BB to the worklist. */ + worklist[sp++] = bb; + + while (sp) + { + basic_block bb; + basic_block son; + + /* Pick a block from the worklist. */ + bb = worklist[--sp]; + + prepare_block_for_update_1 (bb, insert_phi_p); + + /* Now add all the blocks dominated by BB to the worklist. */ + for (son = first_dom_son (CDI_DOMINATORS, bb); + son; + son = next_dom_son (CDI_DOMINATORS, son)) + worklist[sp++] = son; + } + free (worklist); +} /* Helper for prepare_names_to_update. Mark all the use sites for NAME as interesting. BLOCKS and INSERT_PHI_P are as in -- cgit v1.1 From eb72dc663e9070b281be83a80f6f838a3a878822 Mon Sep 17 00:00:00 2001 From: Richard Biener Date: Wed, 22 Apr 2020 10:40:51 +0200 Subject: extend DECL_GIMPLE_REG_P to all types This extends DECL_GIMPLE_REG_P to all types so we can clear TREE_ADDRESSABLE even for integers with partial defs, not just complex and vector variables. To make that transition easier the patch inverts DECL_GIMPLE_REG_P to DECL_NOT_GIMPLE_REG_P since that makes the default the current state for all other types besides complex and vectors. For the testcase in PR94703 we're able to expand the partial def'ed local integer to a register then, producing a single movl rather than going through the stack. On i?86 this execute FAILs gcc.dg/torture/pr71522.c because we now expand a round-trip through a long double automatic var to a register fld/fst which normalizes the value. For that during RTL expansion we're looking for problematic punnings of decls and avoid pseudos for those - I chose integer or BLKmode accesses on decls with modes where precision doesn't match bitsize which covers the XFmode case. 2020-05-07 Richard Biener PR middle-end/94703 * tree-core.h (tree_decl_common::gimple_reg_flag): Rename ... (tree_decl_common::not_gimple_reg_flag): ... to this. * tree.h (DECL_GIMPLE_REG_P): Rename ... (DECL_NOT_GIMPLE_REG_P): ... to this. * gimple-expr.c (copy_var_decl): Copy DECL_NOT_GIMPLE_REG_P. (create_tmp_reg): Simplify. (create_tmp_reg_fn): Likewise. (is_gimple_reg): Check DECL_NOT_GIMPLE_REG_P for all regs. * gimplify.c (create_tmp_from_val): Simplify. (gimplify_bind_expr): Likewise. (gimplify_compound_literal_expr): Likewise. (gimplify_function_tree): Likewise. (prepare_gimple_addressable): Set DECL_NOT_GIMPLE_REG_P. * asan.c (create_odr_indicator): Do not clear DECL_GIMPLE_REG_P. (asan_add_global): Copy it. * cgraphunit.c (cgraph_node::expand_thunk): Force args to be GIMPLE regs. * function.c (gimplify_parameters): Copy DECL_NOT_GIMPLE_REG_P. * ipa-param-manipulation.c (ipa_param_body_adjustments::common_initialization): Simplify. (ipa_param_body_adjustments::reset_debug_stmts): Copy DECL_NOT_GIMPLE_REG_P. * omp-low.c (lower_omp_for_scan): Do not set DECL_GIMPLE_REG_P. * sanopt.c (sanitize_rewrite_addressable_params): Likewise. * tree-cfg.c (make_blocks_1): Simplify. (verify_address): Do not verify DECL_GIMPLE_REG_P setting. * tree-eh.c (lower_eh_constructs_2): Simplify. * tree-inline.c (declare_return_variable): Adjust and generalize. (copy_decl_to_var): Copy DECL_NOT_GIMPLE_REG_P. (copy_result_decl_to_var): Likewise. * tree-into-ssa.c (pass_build_ssa::execute): Adjust comment. * tree-nested.c (create_tmp_var_for): Simplify. * tree-parloops.c (separate_decls_in_region_name): Copy DECL_NOT_GIMPLE_REG_P. * tree-sra.c (create_access_replacement): Adjust and generalize partial def support. * tree-ssa-forwprop.c (pass_forwprop::execute): Set DECL_NOT_GIMPLE_REG_P on decls we introduce partial defs on. * tree-ssa.c (maybe_optimize_var): Handle clearing of TREE_ADDRESSABLE and setting/clearing DECL_NOT_GIMPLE_REG_P independently. * lto-streamer-out.c (hash_tree): Hash DECL_NOT_GIMPLE_REG_P. * tree-streamer-out.c (pack_ts_decl_common_value_fields): Stream DECL_NOT_GIMPLE_REG_P. * tree-streamer-in.c (unpack_ts_decl_common_value_fields): Likewise. * cfgexpand.c (avoid_type_punning_on_regs): New. (discover_nonconstant_array_refs): Call avoid_type_punning_on_regs to avoid unsupported mode punning. lto/ * lto-common.c (compare_tree_sccs_1): Compare DECL_NOT_GIMPLE_REG_P. c/ * gimple-parser.c (c_parser_parse_ssa_name): Do not set DECL_GIMPLE_REG_P. cp/ * optimize.c (update_cloned_parm): Copy DECL_NOT_GIMPLE_REG_P. * gcc.dg/tree-ssa/pr94703.c: New testcase. --- gcc/tree-into-ssa.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'gcc/tree-into-ssa.c') diff --git a/gcc/tree-into-ssa.c b/gcc/tree-into-ssa.c index 6528aca..c24931e 100644 --- a/gcc/tree-into-ssa.c +++ b/gcc/tree-into-ssa.c @@ -2430,8 +2430,7 @@ pass_build_ssa::execute (function *fun) basic_block bb; /* Increase the set of variables we can rewrite into SSA form - by clearing TREE_ADDRESSABLE and setting DECL_GIMPLE_REG_P - and transform the IL to support this. */ + by clearing TREE_ADDRESSABLE and transform the IL to support this. */ if (optimize) execute_update_addresses_taken (); -- cgit v1.1