diff options
Diffstat (limited to 'gcc/ipa-cp.cc')
-rw-r--r-- | gcc/ipa-cp.cc | 419 |
1 files changed, 199 insertions, 220 deletions
diff --git a/gcc/ipa-cp.cc b/gcc/ipa-cp.cc index 2645689..abde64b 100644 --- a/gcc/ipa-cp.cc +++ b/gcc/ipa-cp.cc @@ -147,10 +147,6 @@ object_allocator<ipcp_value_source<tree> > ipcp_sources_pool object_allocator<ipcp_agg_lattice> ipcp_agg_lattice_pool ("IPA_CP aggregate lattices"); -/* Base count to use in heuristics when using profile feedback. */ - -static profile_count base_count; - /* Original overall size of the program. */ static long overall_size, orig_overall_size; @@ -307,14 +303,31 @@ ipcp_lattice<valtype>::print (FILE * f, bool dump_sources, bool dump_benefits) fprintf (f, "\n"); } -/* If VALUE has all bits set to one, print "-1" to F, otherwise simply print it - hexadecimally to F. */ +/* Print VALUE to F in a form which in usual cases does not take thousands of + characters. */ static void ipcp_print_widest_int (FILE *f, const widest_int &value) { - if (wi::eq_p (wi::bit_not (value), 0)) + if (value == -1) fprintf (f, "-1"); + else if (wi::arshift (value, 128) == -1) + { + char buf[35], *p = buf + 2; + widest_int v = wi::zext (value, 128); + size_t len; + print_hex (v, buf); + len = strlen (p); + if (len == 32) + { + fprintf (f, "0xf..f"); + while (*p == 'f') + ++p; + } + else + fprintf (f, "0xf..f%0*d", (int) (32 - len), 0); + fputs (p, f); + } else print_hex (value, f); } @@ -331,7 +344,7 @@ ipcp_bits_lattice::print (FILE *f) fprintf (f, " Bits: value = "); ipcp_print_widest_int (f, get_value ()); fprintf (f, ", mask = "); - print_hex (get_mask (), f); + ipcp_print_widest_int (f, get_mask ()); fprintf (f, "\n"); } } @@ -488,14 +501,16 @@ struct caller_statistics profile_count count_sum; /* Sum of all frequencies for all calls. */ sreal freq_sum; - /* Number of calls and hot calls respectively. */ - int n_calls, n_hot_calls; + /* Number of calls and calls considered interesting respectively. */ + int n_calls, n_interesting_calls; /* If itself is set up, also count the number of non-self-recursive calls. */ int n_nonrec_calls; /* If non-NULL, this is the node itself and calls from it should have their counts included in rec_count_sum and not count_sum. */ cgraph_node *itself; + /* True if there is a caller that has no IPA profile. */ + bool called_without_ipa_profile; }; /* Initialize fields of STAT to zeroes and optionally set it up so that edges @@ -507,10 +522,39 @@ init_caller_stats (caller_statistics *stats, cgraph_node *itself = NULL) stats->rec_count_sum = profile_count::zero (); stats->count_sum = profile_count::zero (); stats->n_calls = 0; - stats->n_hot_calls = 0; + stats->n_interesting_calls = 0; stats->n_nonrec_calls = 0; stats->freq_sum = 0; stats->itself = itself; + stats->called_without_ipa_profile = false; +} + +/* We want to propagate across edges that may be executed, however + we do not want to check maybe_hot, since call itself may be cold + while calee contains some heavy loop which makes propagation still + relevant. + + In particular, even edge called once may lead to significant + improvement. */ + +static bool +cs_interesting_for_ipcp_p (cgraph_edge *e) +{ + /* If profile says the edge is executed, we want to optimize. */ + if (e->count.ipa ().nonzero_p ()) + return true; + /* If local (possibly guseed or adjusted 0 profile) claims edge is + not executed, do not propagate. */ + if (!e->count.nonzero_p ()) + return false; + /* If IPA profile says edge is executed zero times, but zero + is quality is ADJUSTED, still consider it for cloning in + case we have partial training. */ + if (e->count.ipa ().initialized_p () + && opt_for_fn (e->callee->decl,flag_profile_partial_training) + && e->count.nonzero_p ()) + return false; + return true; } /* Worker callback of cgraph_for_node_and_aliases accumulating statistics of @@ -536,13 +580,18 @@ gather_caller_stats (struct cgraph_node *node, void *data) else stats->count_sum += cs->count.ipa (); } + else + stats->called_without_ipa_profile = true; stats->freq_sum += cs->sreal_frequency (); stats->n_calls++; if (stats->itself && stats->itself != cs->caller) stats->n_nonrec_calls++; - if (cs->maybe_hot_p ()) - stats->n_hot_calls ++; + /* If profile known to be zero, we do not want to clone for performance. + However if call is cold, the called function may still contain + important hot loops. */ + if (cs_interesting_for_ipcp_p (cs)) + stats->n_interesting_calls++; } return false; @@ -585,26 +634,11 @@ ipcp_cloning_candidate_p (struct cgraph_node *node) node->dump_name ()); return true; } - - /* When profile is available and function is hot, propagate into it even if - calls seems cold; constant propagation can improve function's speed - significantly. */ - if (stats.count_sum > profile_count::zero () - && node->count.ipa ().initialized_p ()) - { - if (stats.count_sum > node->count.ipa ().apply_scale (90, 100)) - { - if (dump_file) - fprintf (dump_file, "Considering %s for cloning; " - "usually called directly.\n", - node->dump_name ()); - return true; - } - } - if (!stats.n_hot_calls) + if (!stats.n_interesting_calls) { if (dump_file) - fprintf (dump_file, "Not considering %s for cloning; no hot calls.\n", + fprintf (dump_file, "Not considering %s for cloning; " + "no calls considered interesting by profile.\n", node->dump_name ()); return false; } @@ -916,11 +950,13 @@ ipcp_bits_lattice::meet_with_1 (widest_int value, widest_int mask, m_mask = (m_mask | mask) | (m_value ^ value); if (drop_all_ones) m_mask |= m_value; - m_value &= ~m_mask; + widest_int cap_mask = wi::shifted_mask <widest_int> (0, precision, true); + m_mask |= cap_mask; if (wi::sext (m_mask, precision) == -1) return set_to_bottom (); + m_value &= ~m_mask; return m_mask != old_mask; } @@ -996,6 +1032,8 @@ ipcp_bits_lattice::meet_with (ipcp_bits_lattice& other, unsigned precision, adjusted_mask |= adjusted_value; adjusted_value &= ~adjusted_mask; } + widest_int cap_mask = wi::shifted_mask <widest_int> (0, precision, true); + adjusted_mask |= cap_mask; if (wi::sext (adjusted_mask, precision) == -1) return set_to_bottom (); return set_to_constant (adjusted_value, adjusted_mask); @@ -1467,10 +1505,12 @@ ipacp_value_safe_for_type (tree param_type, tree value) return NULL_TREE; } -/* Return the result of a (possibly arithmetic) operation on the constant value - INPUT. OPERAND is 2nd operand for binary operation. RES_TYPE is the type - in which any operation is to be performed. Return NULL_TREE if that cannot - be determined or be considered an interprocedural invariant. */ +/* Return the result of a (possibly arithmetic) operation determined by OPCODE + on the constant value INPUT. OPERAND is 2nd operand for binary operation + and is required for binary operations. RES_TYPE, required when opcode is + not NOP_EXPR, is the type in which any operation is to be performed. Return + NULL_TREE if that cannot be determined or be considered an interprocedural + invariant. */ static tree ipa_get_jf_arith_result (enum tree_code opcode, tree input, tree operand, @@ -1491,16 +1531,6 @@ ipa_get_jf_arith_result (enum tree_code opcode, tree input, tree operand, return NULL_TREE; } - if (!res_type) - { - if (TREE_CODE_CLASS (opcode) == tcc_comparison) - res_type = boolean_type_node; - else if (expr_type_first_operand_type_p (opcode)) - res_type = TREE_TYPE (input); - else - return NULL_TREE; - } - if (TREE_CODE_CLASS (opcode) == tcc_unary) res = fold_unary (opcode, res_type, input); else @@ -1584,7 +1614,10 @@ ipa_value_from_jfunc (class ipa_node_params *info, struct ipa_jump_func *jfunc, return NULL_TREE; enum tree_code opcode = ipa_get_jf_pass_through_operation (jfunc); tree op2 = ipa_get_jf_pass_through_operand (jfunc); - tree cstval = ipa_get_jf_arith_result (opcode, input, op2, NULL_TREE); + tree op_type + = (opcode == NOP_EXPR) ? NULL_TREE + : ipa_get_jf_pass_through_op_type (jfunc); + tree cstval = ipa_get_jf_arith_result (opcode, input, op2, op_type); return ipacp_value_safe_for_type (parm_type, cstval); } else @@ -1724,24 +1757,7 @@ ipa_vr_intersect_with_arith_jfunc (vrange &vr, const value_range *inter_vr; if (operation != NOP_EXPR) { - /* Since we construct arithmetic jump functions even when there is a - type conversion in between the operation encoded in the jump - function and when it is passed in a call argument, the IPA - propagation phase must also perform the operation and conversion - in two separate steps. - - TODO: In order to remove the use of expr_type_first_operand_type_p - predicate we would need to stream the operation type, ideally - encoding the whole jump function as a series of expr_eval_op - structures. */ - - tree operation_type; - if (expr_type_first_operand_type_p (operation)) - operation_type = src_type; - else if (operation == ABSU_EXPR) - operation_type = unsigned_type_for (src_type); - else - return; + tree operation_type = ipa_get_jf_pass_through_op_type (jfunc); op_res.set_varying (operation_type); if (!ipa_vr_operation_and_type_effects (op_res, src_vr, operation, operation_type, src_type)) @@ -1771,14 +1787,7 @@ ipa_vr_intersect_with_arith_jfunc (vrange &vr, value_range op_vr (TREE_TYPE (operand)); ipa_get_range_from_ip_invariant (op_vr, operand, context_node); - tree operation_type; - if (TREE_CODE_CLASS (operation) == tcc_comparison) - operation_type = boolean_type_node; - else if (expr_type_first_operand_type_p (operation)) - operation_type = src_type; - else - return; - + tree operation_type = ipa_get_jf_pass_through_op_type (jfunc); value_range op_res (operation_type); if (!ipa_vr_supported_type_p (operation_type) || !handler.operand_check_p (operation_type, src_type, op_vr.type ()) @@ -1918,10 +1927,11 @@ ipa_agg_value_from_jfunc (ipa_node_params *info, cgraph_node *node, return NULL_TREE; } - return ipa_get_jf_arith_result (item->value.pass_through.operation, - value, - item->value.pass_through.operand, - item->type); + tree cstval = ipa_get_jf_arith_result (item->value.pass_through.operation, + value, + item->value.pass_through.operand, + item->value.pass_through.op_type); + return ipacp_value_safe_for_type (item->type, cstval); } /* Process all items in AGG_JFUNC relative to caller (or the node the original @@ -2150,13 +2160,15 @@ ipcp_lattice<valtype>::add_value (valtype newval, cgraph_edge *cs, /* A helper function that returns result of operation specified by OPCODE on the value of SRC_VAL. If non-NULL, OPND1_TYPE is expected type for the value of SRC_VAL. If the operation is binary, OPND2 is a constant value - acting as its second operand. */ + acting as its second operand. OP_TYPE is the type in which the operation is + performed. */ static tree get_val_across_arith_op (enum tree_code opcode, tree opnd1_type, tree opnd2, - ipcp_value<tree> *src_val) + ipcp_value<tree> *src_val, + tree op_type) { tree opnd1 = src_val->value; @@ -2165,17 +2177,19 @@ get_val_across_arith_op (enum tree_code opcode, && !useless_type_conversion_p (opnd1_type, TREE_TYPE (opnd1))) return NULL_TREE; - return ipa_get_jf_arith_result (opcode, opnd1, opnd2, NULL_TREE); + return ipa_get_jf_arith_result (opcode, opnd1, opnd2, op_type); } /* Propagate values through an arithmetic transformation described by a jump function associated with edge CS, taking values from SRC_LAT and putting - them into DEST_LAT. OPND1_TYPE is expected type for the values in SRC_LAT. - OPND2 is a constant value if transformation is a binary operation. - SRC_OFFSET specifies offset in an aggregate if SRC_LAT describes lattice of - a part of the aggregate. SRC_IDX is the index of the source parameter. - RES_TYPE is the value type of result being propagated into. Return true if - DEST_LAT changed. */ + them into DEST_LAT. OPND1_TYPE, if non-NULL, is the expected type for the + values in SRC_LAT. OPND2 is a constant value if transformation is a binary + operation. SRC_OFFSET specifies offset in an aggregate if SRC_LAT describes + lattice of a part of an aggregate, otherwise it should be -1. SRC_IDX is + the index of the source parameter. OP_TYPE is the type in which the + operation is performed and can be NULL when OPCODE is NOP_EXPR. RES_TYPE is + the value type of result being propagated into. Return true if DEST_LAT + changed. */ static bool propagate_vals_across_arith_jfunc (cgraph_edge *cs, @@ -2186,6 +2200,7 @@ propagate_vals_across_arith_jfunc (cgraph_edge *cs, ipcp_lattice<tree> *dest_lat, HOST_WIDE_INT src_offset, int src_idx, + tree op_type, tree res_type) { ipcp_value<tree> *src_val; @@ -2241,7 +2256,7 @@ propagate_vals_across_arith_jfunc (cgraph_edge *cs, for (int j = 1; j < max_recursive_depth; j++) { tree cstval = get_val_across_arith_op (opcode, opnd1_type, opnd2, - src_val); + src_val, op_type); cstval = ipacp_value_safe_for_type (res_type, cstval); if (!cstval) break; @@ -2266,7 +2281,7 @@ propagate_vals_across_arith_jfunc (cgraph_edge *cs, } tree cstval = get_val_across_arith_op (opcode, opnd1_type, opnd2, - src_val); + src_val, op_type); cstval = ipacp_value_safe_for_type (res_type, cstval); if (cstval) ret |= dest_lat->add_value (cstval, cs, src_val, src_idx, @@ -2290,11 +2305,13 @@ propagate_vals_across_pass_through (cgraph_edge *cs, ipa_jump_func *jfunc, tree parm_type) { gcc_checking_assert (parm_type); - return propagate_vals_across_arith_jfunc (cs, - ipa_get_jf_pass_through_operation (jfunc), - NULL_TREE, + enum tree_code opcode = ipa_get_jf_pass_through_operation (jfunc); + tree op_type = (opcode == NOP_EXPR) ? NULL_TREE + : ipa_get_jf_pass_through_op_type (jfunc); + return propagate_vals_across_arith_jfunc (cs, opcode, NULL_TREE, ipa_get_jf_pass_through_operand (jfunc), - src_lat, dest_lat, -1, src_idx, parm_type); + src_lat, dest_lat, -1, src_idx, op_type, + parm_type); } /* Propagate values through an ancestor jump function JFUNC associated with @@ -2507,14 +2524,12 @@ propagate_bits_across_jump_function (cgraph_edge *cs, int idx, return dest_lattice->set_to_bottom (); } - unsigned precision = TYPE_PRECISION (parm_type); - signop sgn = TYPE_SIGN (parm_type); - if (jfunc->type == IPA_JF_PASS_THROUGH || jfunc->type == IPA_JF_ANCESTOR) { ipa_node_params *caller_info = ipa_node_params_sum->get (cs->caller); tree operand = NULL_TREE; + tree op_type = NULL_TREE; enum tree_code code; unsigned src_idx; bool keep_null = false; @@ -2524,7 +2539,10 @@ propagate_bits_across_jump_function (cgraph_edge *cs, int idx, code = ipa_get_jf_pass_through_operation (jfunc); src_idx = ipa_get_jf_pass_through_formal_id (jfunc); if (code != NOP_EXPR) - operand = ipa_get_jf_pass_through_operand (jfunc); + { + operand = ipa_get_jf_pass_through_operand (jfunc); + op_type = ipa_get_jf_pass_through_op_type (jfunc); + } } else { @@ -2551,6 +2569,11 @@ propagate_bits_across_jump_function (cgraph_edge *cs, int idx, if (!src_lats->bits_lattice.bottom_p ()) { + if (!op_type) + op_type = ipa_get_type (caller_info, src_idx); + + unsigned precision = TYPE_PRECISION (op_type); + signop sgn = TYPE_SIGN (op_type); bool drop_all_ones = keep_null && !src_lats->bits_lattice.known_nonzero_p (); @@ -2570,7 +2593,8 @@ propagate_bits_across_jump_function (cgraph_edge *cs, int idx, = widest_int::from (bm.mask (), TYPE_SIGN (parm_type)); widest_int value = widest_int::from (bm.value (), TYPE_SIGN (parm_type)); - return dest_lattice->meet_with (value, mask, precision); + return dest_lattice->meet_with (value, mask, + TYPE_PRECISION (parm_type)); } } return dest_lattice->set_to_bottom (); @@ -2869,6 +2893,7 @@ propagate_aggregate_lattice (struct cgraph_edge *cs, src_lat, aglat, src_offset, src_idx, + item->value.pass_through.op_type, item->type); if (src_lat->contains_variable) @@ -3361,24 +3386,29 @@ incorporate_penalties (cgraph_node *node, ipa_node_params *info, static bool good_cloning_opportunity_p (struct cgraph_node *node, sreal time_benefit, sreal freq_sum, profile_count count_sum, - int size_cost) + int size_cost, bool called_without_ipa_profile) { + gcc_assert (count_sum.ipa () == count_sum); if (time_benefit == 0 || !opt_for_fn (node->decl, flag_ipa_cp_clone) - || node->optimize_for_size_p ()) + || node->optimize_for_size_p () + /* If there is no call which was executed in profiling or where + profile is missing, we do not want to clone. */ + || (!called_without_ipa_profile && !count_sum.nonzero_p ())) return false; gcc_assert (size_cost > 0); ipa_node_params *info = ipa_node_params_sum->get (node); int eval_threshold = opt_for_fn (node->decl, param_ipa_cp_eval_threshold); + /* If we know the execution IPA execution counts, we can estimate overall + speedup of the program. */ if (count_sum.nonzero_p ()) { - gcc_assert (base_count.nonzero_p ()); - sreal factor = count_sum.probability_in (base_count).to_sreal (); - sreal evaluation = (time_benefit * factor) / size_cost; + profile_count saved_time = count_sum * time_benefit; + sreal evaluation = saved_time.to_sreal_scale (profile_count::one ()) + / size_cost; evaluation = incorporate_penalties (node, info, evaluation); - evaluation *= 1000; if (dump_file && (dump_flags & TDF_DETAILS)) { @@ -3386,33 +3416,46 @@ good_cloning_opportunity_p (struct cgraph_node *node, sreal time_benefit, "size: %i, count_sum: ", time_benefit.to_double (), size_cost); count_sum.dump (dump_file); + fprintf (dump_file, ", overall time saved: "); + saved_time.dump (dump_file); fprintf (dump_file, "%s%s) -> evaluation: %.2f, threshold: %i\n", info->node_within_scc ? (info->node_is_self_scc ? ", self_scc" : ", scc") : "", info->node_calling_single_call ? ", single_call" : "", evaluation.to_double (), eval_threshold); } - - return evaluation.to_int () >= eval_threshold; + gcc_checking_assert (saved_time == saved_time.ipa ()); + if (!maybe_hot_count_p (NULL, saved_time)) + { + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, " not cloning: time saved is not hot\n"); + } + /* Evaulation approximately corresponds to time saved per instruction + introduced. This is likely almost always going to be true, since we + already checked that time saved is large enough to be considered + hot. */ + else if (evaluation.to_int () >= eval_threshold) + return true; + /* If all call sites have profile known; we know we do not want t clone. + If there are calls with unknown profile; try local heuristics. */ + if (!called_without_ipa_profile) + return false; } - else - { - sreal evaluation = (time_benefit * freq_sum) / size_cost; - evaluation = incorporate_penalties (node, info, evaluation); - evaluation *= 1000; + sreal evaluation = (time_benefit * freq_sum) / size_cost; + evaluation = incorporate_penalties (node, info, evaluation); + evaluation *= 1000; - if (dump_file && (dump_flags & TDF_DETAILS)) - fprintf (dump_file, " good_cloning_opportunity_p (time: %g, " - "size: %i, freq_sum: %g%s%s) -> evaluation: %.2f, " - "threshold: %i\n", - time_benefit.to_double (), size_cost, freq_sum.to_double (), - info->node_within_scc - ? (info->node_is_self_scc ? ", self_scc" : ", scc") : "", - info->node_calling_single_call ? ", single_call" : "", - evaluation.to_double (), eval_threshold); + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, " good_cloning_opportunity_p (time: %g, " + "size: %i, freq_sum: %g%s%s) -> evaluation: %.2f, " + "threshold: %i\n", + time_benefit.to_double (), size_cost, freq_sum.to_double (), + info->node_within_scc + ? (info->node_is_self_scc ? ", self_scc" : ", scc") : "", + info->node_calling_single_call ? ", single_call" : "", + evaluation.to_double (), eval_threshold); - return evaluation.to_int () >= eval_threshold; - } + return evaluation.to_int () >= eval_threshold; } /* Grow vectors in AVALS and fill them with information about values of @@ -3605,7 +3648,8 @@ estimate_local_effects (struct cgraph_node *node) "known contexts, code not going to grow.\n"); } else if (good_cloning_opportunity_p (node, time, stats.freq_sum, - stats.count_sum, size)) + stats.count_sum, size, + stats.called_without_ipa_profile)) { if (size + overall_size <= get_max_overall_size (node)) { @@ -3971,7 +4015,7 @@ value_topo_info<valtype>::propagate_effects () processed_srcvals.empty (); for (src = val->sources; src; src = src->next) if (src->val - && src->cs->maybe_hot_p ()) + && cs_interesting_for_ipcp_p (src->cs)) { if (!processed_srcvals.add (src->val)) { @@ -4016,21 +4060,6 @@ value_topo_info<valtype>::propagate_effects () } } -/* Callback for qsort to sort counts of all edges. */ - -static int -compare_edge_profile_counts (const void *a, const void *b) -{ - const profile_count *cnt1 = (const profile_count *) a; - const profile_count *cnt2 = (const profile_count *) b; - - if (*cnt1 < *cnt2) - return 1; - if (*cnt1 > *cnt2) - return -1; - return 0; -} - /* Propagate constants, polymorphic contexts and their effects from the summaries interprocedurally. */ @@ -4043,10 +4072,6 @@ ipcp_propagate_stage (class ipa_topo_info *topo) if (dump_file) fprintf (dump_file, "\n Propagating constants:\n\n"); - base_count = profile_count::uninitialized (); - - bool compute_count_base = false; - unsigned base_count_pos_percent = 0; FOR_EACH_DEFINED_FUNCTION (node) { if (node->has_gimple_body_p () @@ -4063,57 +4088,8 @@ ipcp_propagate_stage (class ipa_topo_info *topo) ipa_size_summary *s = ipa_size_summaries->get (node); if (node->definition && !node->alias && s != NULL) overall_size += s->self_size; - if (node->count.ipa ().initialized_p ()) - { - compute_count_base = true; - unsigned pos_percent = opt_for_fn (node->decl, - param_ipa_cp_profile_count_base); - base_count_pos_percent = MAX (base_count_pos_percent, pos_percent); - } } - if (compute_count_base) - { - auto_vec<profile_count> all_edge_counts; - all_edge_counts.reserve_exact (symtab->edges_count); - FOR_EACH_DEFINED_FUNCTION (node) - for (cgraph_edge *cs = node->callees; cs; cs = cs->next_callee) - { - profile_count count = cs->count.ipa (); - if (!count.nonzero_p ()) - continue; - - enum availability avail; - cgraph_node *tgt - = cs->callee->function_or_virtual_thunk_symbol (&avail); - ipa_node_params *info = ipa_node_params_sum->get (tgt); - if (info && info->versionable) - all_edge_counts.quick_push (count); - } - - if (!all_edge_counts.is_empty ()) - { - gcc_assert (base_count_pos_percent <= 100); - all_edge_counts.qsort (compare_edge_profile_counts); - - unsigned base_count_pos - = ((all_edge_counts.length () * (base_count_pos_percent)) / 100); - base_count = all_edge_counts[base_count_pos]; - - if (dump_file) - { - fprintf (dump_file, "\nSelected base_count from %u edges at " - "position %u, arriving at: ", all_edge_counts.length (), - base_count_pos); - base_count.dump (dump_file); - fprintf (dump_file, "\n"); - } - } - else if (dump_file) - fprintf (dump_file, "\nNo candidates with non-zero call count found, " - "continuing as if without profile feedback.\n"); - } - orig_overall_size = overall_size; if (dump_file) @@ -4375,15 +4351,17 @@ static bool get_info_about_necessary_edges (ipcp_value<valtype> *val, cgraph_node *dest, sreal *freq_sum, int *caller_count, profile_count *rec_count_sum, - profile_count *nonrec_count_sum) + profile_count *nonrec_count_sum, + bool *called_without_ipa_profile) { ipcp_value_source<valtype> *src; sreal freq = 0; int count = 0; profile_count rec_cnt = profile_count::zero (); profile_count nonrec_cnt = profile_count::zero (); - bool hot = false; + bool interesting = false; bool non_self_recursive = false; + *called_without_ipa_profile = false; for (src = val->sources; src; src = src->next) { @@ -4394,15 +4372,19 @@ get_info_about_necessary_edges (ipcp_value<valtype> *val, cgraph_node *dest, { count++; freq += cs->sreal_frequency (); - hot |= cs->maybe_hot_p (); + interesting |= cs_interesting_for_ipcp_p (cs); if (cs->caller != dest) { non_self_recursive = true; if (cs->count.ipa ().initialized_p ()) rec_cnt += cs->count.ipa (); + else + *called_without_ipa_profile = true; } else if (cs->count.ipa ().initialized_p ()) nonrec_cnt += cs->count.ipa (); + else + *called_without_ipa_profile = true; } cs = get_next_cgraph_edge_clone (cs); } @@ -4418,19 +4400,7 @@ get_info_about_necessary_edges (ipcp_value<valtype> *val, cgraph_node *dest, *rec_count_sum = rec_cnt; *nonrec_count_sum = nonrec_cnt; - if (!hot && ipa_node_params_sum->get (dest)->node_within_scc) - { - struct cgraph_edge *cs; - - /* Cold non-SCC source edge could trigger hot recursive execution of - function. Consider the case as hot and rely on following cost model - computation to further select right one. */ - for (cs = dest->callers; cs; cs = cs->next_caller) - if (cs->caller == dest && cs->maybe_hot_p ()) - return true; - } - - return hot; + return interesting; } /* Given a NODE, and a set of its CALLERS, try to adjust order of the callers @@ -5394,11 +5364,14 @@ find_more_scalar_values_for_callers_subset (struct cgraph_node *node, if (self_recursive_pass_through_p (cs, jump_func, i, false)) { gcc_assert (newval); - t = ipa_get_jf_arith_result ( - ipa_get_jf_pass_through_operation (jump_func), - newval, + enum tree_code opcode + = ipa_get_jf_pass_through_operation (jump_func); + tree op_type = (opcode == NOP_EXPR) ? NULL_TREE + : ipa_get_jf_pass_through_op_type (jump_func); + t = ipa_get_jf_arith_result (opcode, newval, ipa_get_jf_pass_through_operand (jump_func), - type); + op_type); + t = ipacp_value_safe_for_type (type, t); } else t = ipa_value_from_jfunc (ipa_node_params_sum->get (cs->caller), @@ -5603,10 +5576,13 @@ push_agg_values_for_index_from_edge (struct cgraph_edge *cs, int index, && self_recursive_agg_pass_through_p (cs, &agg_jf, index, false) && (srcvalue = interim->get_value(index, agg_jf.offset / BITS_PER_UNIT))) - value = ipa_get_jf_arith_result (agg_jf.value.pass_through.operation, - srcvalue, - agg_jf.value.pass_through.operand, - agg_jf.type); + { + value = ipa_get_jf_arith_result (agg_jf.value.pass_through.operation, + srcvalue, + agg_jf.value.pass_through.operand, + agg_jf.value.pass_through.op_type); + value = ipacp_value_safe_for_type (agg_jf.type, value); + } else value = ipa_agg_value_from_jfunc (caller_info, cs->caller, &agg_jf); @@ -5914,6 +5890,7 @@ decide_about_value (struct cgraph_node *node, int index, HOST_WIDE_INT offset, sreal freq_sum; profile_count count_sum, rec_count_sum; vec<cgraph_edge *> callers; + bool called_without_ipa_profile; if (val->spec_node) { @@ -5929,7 +5906,8 @@ decide_about_value (struct cgraph_node *node, int index, HOST_WIDE_INT offset, return false; } else if (!get_info_about_necessary_edges (val, node, &freq_sum, &caller_count, - &rec_count_sum, &count_sum)) + &rec_count_sum, &count_sum, + &called_without_ipa_profile)) return false; if (!dbg_cnt (ipa_cp_values)) @@ -5966,9 +5944,11 @@ decide_about_value (struct cgraph_node *node, int index, HOST_WIDE_INT offset, if (!good_cloning_opportunity_p (node, val->local_time_benefit, freq_sum, count_sum, - val->local_size_cost) + val->local_size_cost, + called_without_ipa_profile) && !good_cloning_opportunity_p (node, val->prop_time_benefit, - freq_sum, count_sum, val->prop_size_cost)) + freq_sum, count_sum, val->prop_size_cost, + called_without_ipa_profile)) return false; if (dump_file) @@ -6426,7 +6406,7 @@ ipcp_store_vr_results (void) fprintf (dump_file, " param %i: value = ", i); ipcp_print_widest_int (dump_file, bits->get_value ()); fprintf (dump_file, ", mask = "); - print_hex (bits->get_mask (), dump_file); + ipcp_print_widest_int (dump_file, bits->get_mask ()); fprintf (dump_file, "\n"); } } @@ -6550,7 +6530,6 @@ make_pass_ipa_cp (gcc::context *ctxt) void ipa_cp_cc_finalize (void) { - base_count = profile_count::uninitialized (); overall_size = 0; orig_overall_size = 0; ipcp_free_transformation_sum (); |