From 28daadc98094501175c9dfe4a985871fa6aa4f94 Mon Sep 17 00:00:00 2001 From: liuhongt Date: Wed, 6 Jan 2021 16:33:27 +0800 Subject: Extend is_cond_scalar_reduction to handle nop_expr after/before scalar reduction.[PR98365] gcc/ChangeLog: PR tree-optimization/98365 * tree-if-conv.c (strip_nop_cond_scalar_reduction): New function. (is_cond_scalar_reduction): Handle nop_expr in cond scalar reduction. (convert_scalar_cond_reduction): Ditto. (predicate_scalar_phi): Ditto. gcc/testsuite/ChangeLog: PR tree-optimization/98365 * gcc.target/i386/pr98365.c: New test. --- gcc/tree-if-conv.c | 142 ++++++++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 124 insertions(+), 18 deletions(-) (limited to 'gcc/tree-if-conv.c') diff --git a/gcc/tree-if-conv.c b/gcc/tree-if-conv.c index 716eae4..345488e 100644 --- a/gcc/tree-if-conv.c +++ b/gcc/tree-if-conv.c @@ -1579,6 +1579,31 @@ if_convertible_loop_p (class loop *loop) return res; } +/* Return reduc_1 if has_nop. + + if (...) + tmp1 = (unsigned type) reduc_1; + tmp2 = tmp1 + rhs2; + reduc_3 = (signed type) tmp2. */ +static tree +strip_nop_cond_scalar_reduction (bool has_nop, tree op) +{ + if (!has_nop) + return op; + + if (TREE_CODE (op) != SSA_NAME) + return NULL_TREE; + + gassign *stmt = safe_dyn_cast (SSA_NAME_DEF_STMT (op)); + if (!stmt + || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (stmt)) + || !tree_nop_conversion_p (TREE_TYPE (op), TREE_TYPE + (gimple_assign_rhs1 (stmt)))) + return NULL_TREE; + + return gimple_assign_rhs1 (stmt); +} + /* Returns true if def-stmt for phi argument ARG is simple increment/decrement which is in predicated basic block. In fact, the following PHI pattern is searching: @@ -1595,9 +1620,10 @@ if_convertible_loop_p (class loop *loop) static bool is_cond_scalar_reduction (gimple *phi, gimple **reduc, tree arg_0, tree arg_1, - tree *op0, tree *op1, bool extended) + tree *op0, tree *op1, bool extended, bool* has_nop, + gimple **nop_reduc) { - tree lhs, r_op1, r_op2; + tree lhs, r_op1, r_op2, r_nop1, r_nop2; gimple *stmt; gimple *header_phi = NULL; enum tree_code reduction_op; @@ -1608,7 +1634,7 @@ is_cond_scalar_reduction (gimple *phi, gimple **reduc, tree arg_0, tree arg_1, use_operand_p use_p; edge e; edge_iterator ei; - bool result = false; + bool result = *has_nop = false; if (TREE_CODE (arg_0) != SSA_NAME || TREE_CODE (arg_1) != SSA_NAME) return false; @@ -1656,18 +1682,77 @@ is_cond_scalar_reduction (gimple *phi, gimple **reduc, tree arg_0, tree arg_1, return false; reduction_op = gimple_assign_rhs_code (stmt); + + /* Catch something like below + + loop-header: + reduc_1 = PHI <..., reduc_2> + ... + if (...) + tmp1 = (unsigned type) reduc_1; + tmp2 = tmp1 + rhs2; + reduc_3 = (signed type) tmp2; + + reduc_2 = PHI + + and convert to + + reduc_2 = PHI <0, reduc_3> + tmp1 = (unsigned type)reduce_1; + ifcvt = cond_expr ? rhs2 : 0 + tmp2 = tmp1 +/- ifcvt; + reduce_1 = (signed type)tmp2; */ + + if (CONVERT_EXPR_CODE_P (reduction_op)) + { + lhs = gimple_assign_rhs1 (stmt); + if (TREE_CODE (lhs) != SSA_NAME + || !has_single_use (lhs)) + return false; + + *nop_reduc = stmt; + stmt = SSA_NAME_DEF_STMT (lhs); + if (gimple_bb (stmt) != gimple_bb (*nop_reduc) + || !is_gimple_assign (stmt)) + return false; + + *has_nop = true; + reduction_op = gimple_assign_rhs_code (stmt); + } + if (reduction_op != PLUS_EXPR && reduction_op != MINUS_EXPR) return false; r_op1 = gimple_assign_rhs1 (stmt); r_op2 = gimple_assign_rhs2 (stmt); + r_nop1 = strip_nop_cond_scalar_reduction (*has_nop, r_op1); + r_nop2 = strip_nop_cond_scalar_reduction (*has_nop, r_op2); + /* Make R_OP1 to hold reduction variable. */ - if (r_op2 == PHI_RESULT (header_phi) + if (r_nop2 == PHI_RESULT (header_phi) && reduction_op == PLUS_EXPR) - std::swap (r_op1, r_op2); - else if (r_op1 != PHI_RESULT (header_phi)) + { + std::swap (r_op1, r_op2); + std::swap (r_nop1, r_nop2); + } + else if (r_nop1 != PHI_RESULT (header_phi)) return false; + if (*has_nop) + { + /* Check that R_NOP1 is used in nop_stmt or in PHI only. */ + FOR_EACH_IMM_USE_FAST (use_p, imm_iter, r_nop1) + { + gimple *use_stmt = USE_STMT (use_p); + if (is_gimple_debug (use_stmt)) + continue; + if (use_stmt == SSA_NAME_DEF_STMT (r_op1)) + continue; + if (use_stmt != phi) + return false; + } + } + /* Check that R_OP1 is used in reduction stmt or in PHI only. */ FOR_EACH_IMM_USE_FAST (use_p, imm_iter, r_op1) { @@ -1705,7 +1790,8 @@ is_cond_scalar_reduction (gimple *phi, gimple **reduc, tree arg_0, tree arg_1, static tree convert_scalar_cond_reduction (gimple *reduc, gimple_stmt_iterator *gsi, - tree cond, tree op0, tree op1, bool swap) + tree cond, tree op0, tree op1, bool swap, + bool has_nop, gimple* nop_reduc) { gimple_stmt_iterator stmt_it; gimple *new_assign; @@ -1714,6 +1800,7 @@ convert_scalar_cond_reduction (gimple *reduc, gimple_stmt_iterator *gsi, tree tmp = make_temp_ssa_name (TREE_TYPE (rhs1), NULL, "_ifc_"); tree c; tree zero = build_zero_cst (TREE_TYPE (rhs1)); + gimple_seq stmts = NULL; if (dump_file && (dump_flags & TDF_DETAILS)) { @@ -1732,8 +1819,18 @@ convert_scalar_cond_reduction (gimple *reduc, gimple_stmt_iterator *gsi, new_assign = gimple_build_assign (tmp, c); gsi_insert_before (gsi, new_assign, GSI_SAME_STMT); /* Build rhs for unconditional increment/decrement. */ - rhs = fold_build2 (gimple_assign_rhs_code (reduc), - TREE_TYPE (rhs1), op0, tmp); + rhs = gimple_build (&stmts, gimple_assign_rhs_code (reduc), + TREE_TYPE (rhs1), op0, tmp); + + if (has_nop) + { + rhs = gimple_convert (&stmts, + TREE_TYPE (gimple_assign_lhs (nop_reduc)), rhs); + stmt_it = gsi_for_stmt (nop_reduc); + gsi_remove (&stmt_it, true); + release_defs (nop_reduc); + } + gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT); /* Delete original reduction stmt. */ stmt_it = gsi_for_stmt (reduc); @@ -1808,7 +1905,7 @@ ifcvt_follow_ssa_use_edges (tree val) static void predicate_scalar_phi (gphi *phi, gimple_stmt_iterator *gsi) { - gimple *new_stmt = NULL, *reduc; + gimple *new_stmt = NULL, *reduc, *nop_reduc; tree rhs, res, arg0, arg1, op0, op1, scev; tree cond; unsigned int index0; @@ -1816,6 +1913,7 @@ predicate_scalar_phi (gphi *phi, gimple_stmt_iterator *gsi) edge e; basic_block bb; unsigned int i; + bool has_nop; res = gimple_phi_result (phi); if (virtual_operand_p (res)) @@ -1876,10 +1974,15 @@ predicate_scalar_phi (gphi *phi, gimple_stmt_iterator *gsi) arg1 = gimple_phi_arg_def (phi, 1); } if (is_cond_scalar_reduction (phi, &reduc, arg0, arg1, - &op0, &op1, false)) - /* Convert reduction stmt into vectorizable form. */ - rhs = convert_scalar_cond_reduction (reduc, gsi, cond, op0, op1, - true_bb != gimple_bb (reduc)); + &op0, &op1, false, &has_nop, + &nop_reduc)) + { + /* Convert reduction stmt into vectorizable form. */ + rhs = convert_scalar_cond_reduction (reduc, gsi, cond, op0, op1, + true_bb != gimple_bb (reduc), + has_nop, nop_reduc); + redundant_ssa_names.safe_push (std::make_pair (res, rhs)); + } else /* Build new RHS using selected condition and arguments. */ rhs = fold_build_cond_expr (TREE_TYPE (res), unshare_expr (cond), @@ -1961,14 +2064,17 @@ predicate_scalar_phi (gphi *phi, gimple_stmt_iterator *gsi) is_gimple_condexpr, NULL_TREE, true, GSI_SAME_STMT); if (!(is_cond_scalar_reduction (phi, &reduc, arg0 , arg1, - &op0, &op1, true))) + &op0, &op1, true, &has_nop, &nop_reduc))) rhs = fold_build_cond_expr (TREE_TYPE (res), unshare_expr (cond), swap? arg1 : arg0, swap? arg0 : arg1); else - /* Convert reduction stmt into vectorizable form. */ - rhs = convert_scalar_cond_reduction (reduc, gsi, cond, op0, op1, - swap); + { + /* Convert reduction stmt into vectorizable form. */ + rhs = convert_scalar_cond_reduction (reduc, gsi, cond, op0, op1, + swap,has_nop, nop_reduc); + redundant_ssa_names.safe_push (std::make_pair (res, rhs)); + } new_stmt = gimple_build_assign (res, rhs); gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT); update_stmt (new_stmt); -- cgit v1.1 From 00dcc88a0ed7bd148ea86d900b6c93574a2e1f26 Mon Sep 17 00:00:00 2001 From: Martin Sebor Date: Tue, 20 Jul 2021 11:14:19 -0600 Subject: Adjust by-value function vec arguments to by-reference. gcc/c-family/ChangeLog: * c-common.c (c_build_shufflevector): Adjust by-value argument to by-const-reference. * c-common.h (c_build_shufflevector): Same. gcc/c/ChangeLog: * c-tree.h (c_build_function_call_vec): Adjust by-value argument to by-const-reference. * c-typeck.c (c_build_function_call_vec): Same. gcc/ChangeLog: * cfgloop.h (single_likely_exit): Adjust by-value argument to by-const-reference. * cfgloopanal.c (single_likely_exit): Same. * cgraph.h (struct cgraph_node): Same. * cgraphclones.c (cgraph_node::create_virtual_clone): Same. * genautomata.c (merge_states): Same. * genextract.c (VEC_char_to_string): Same. * genmatch.c (dt_node::gen_kids_1): Same. (walk_captures): Adjust by-value argument to by-reference. * gimple-ssa-store-merging.c (check_no_overlap): Adjust by-value argument to by-const-reference. * gimple.c (gimple_build_call_vec): Same. (gimple_build_call_internal_vec): Same. (gimple_build_switch): Same. (sort_case_labels): Same. (preprocess_case_label_vec_for_gimple): Adjust by-value argument to by-reference. * gimple.h (gimple_build_call_vec): Adjust by-value argument to by-const-reference. (gimple_build_call_internal_vec): Same. (gimple_build_switch): Same. (sort_case_labels): Same. (preprocess_case_label_vec_for_gimple): Adjust by-value argument to by-reference. * haifa-sched.c (calc_priorities): Adjust by-value argument to by-const-reference. (sched_init_luids): Same. (haifa_init_h_i_d): Same. * ipa-cp.c (ipa_get_indirect_edge_target_1): Same. (adjust_callers_for_value_intersection): Adjust by-value argument to by-reference. (find_more_scalar_values_for_callers_subset): Adjust by-value argument to by-const-reference. (find_more_contexts_for_caller_subset): Same. (find_aggregate_values_for_callers_subset): Same. (copy_useful_known_contexts): Same. * ipa-fnsummary.c (remap_edge_summaries): Same. (remap_freqcounting_predicate): Same. * ipa-inline.c (add_new_edges_to_heap): Adjust by-value argument to by-reference. * ipa-predicate.c (predicate::remap_after_inlining): Adjust by-value argument to by-const-reference. * ipa-predicate.h (predicate::remap_after_inlining): Same. * ipa-prop.c (ipa_find_agg_cst_for_param): Same. * ipa-prop.h (ipa_find_agg_cst_for_param): Same. * ira-build.c (ira_loop_tree_body_rev_postorder): Same. * read-rtl.c (add_overload_instance): Same. * rtl.h (native_decode_rtx): Same. (native_decode_vector_rtx): Same. * sched-int.h (sched_init_luids): Same. (haifa_init_h_i_d): Same. * simplify-rtx.c (native_decode_vector_rtx): Same. (native_decode_rtx): Same. * tree-call-cdce.c (gen_shrink_wrap_conditions): Same. (shrink_wrap_one_built_in_call_with_conds): Same. (shrink_wrap_conditional_dead_built_in_calls): Same. * tree-data-ref.c (create_runtime_alias_checks): Same. (compute_all_dependences): Same. * tree-data-ref.h (compute_all_dependences): Same. (create_runtime_alias_checks): Same. (index_in_loop_nest): Same. * tree-if-conv.c (mask_exists): Same. * tree-loop-distribution.c (class loop_distribution): Same. (loop_distribution::create_rdg_vertices): Same. (dump_rdg_partitions): Same. (debug_rdg_partitions): Same. (partition_contains_all_rw): Same. (loop_distribution::distribute_loop): Same. * tree-parloops.c (oacc_entry_exit_ok_1): Same. (oacc_entry_exit_single_gang): Same. * tree-ssa-loop-im.c (hoist_memory_references): Same. (loop_suitable_for_sm): Same. * tree-ssa-loop-niter.c (bound_index): Same. * tree-ssa-reassoc.c (update_ops): Same. (swap_ops_for_binary_stmt): Same. (rewrite_expr_tree): Same. (rewrite_expr_tree_parallel): Same. * tree-ssa-sccvn.c (ao_ref_init_from_vn_reference): Same. * tree-ssa-sccvn.h (ao_ref_init_from_vn_reference): Same. * tree-ssa-structalias.c (process_all_all_constraints): Same. (make_constraints_to): Same. (handle_lhs_call): Same. (find_func_aliases_for_builtin_call): Same. (sort_fieldstack): Same. (check_for_overlaps): Same. * tree-vect-loop-manip.c (vect_create_cond_for_align_checks): Same. (vect_create_cond_for_unequal_addrs): Same. (vect_create_cond_for_lower_bounds): Same. (vect_create_cond_for_alias_checks): Same. * tree-vect-slp-patterns.c (vect_validate_multiplication): Same. * tree-vect-slp.c (vect_analyze_slp_instance): Same. (vect_make_slp_decision): Same. (vect_slp_bbs): Same. (duplicate_and_interleave): Same. (vect_transform_slp_perm_load): Same. (vect_schedule_slp): Same. * tree-vectorizer.h (vect_transform_slp_perm_load): Same. (vect_schedule_slp): Same. (duplicate_and_interleave): Same. * tree.c (build_vector_from_ctor): Same. (build_vector): Same. (check_vector_cst): Same. (check_vector_cst_duplicate): Same. (check_vector_cst_fill): Same. (check_vector_cst_stepped): Same. * tree.h (build_vector_from_ctor): Same. --- gcc/tree-if-conv.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'gcc/tree-if-conv.c') diff --git a/gcc/tree-if-conv.c b/gcc/tree-if-conv.c index 345488e..49e89cf 100644 --- a/gcc/tree-if-conv.c +++ b/gcc/tree-if-conv.c @@ -2208,7 +2208,7 @@ insert_gimplified_predicates (loop_p loop) mask if it was created for given SIZE and -1 otherwise. */ static int -mask_exists (int size, vec vec) +mask_exists (int size, const vec &vec) { unsigned int ix; int v; -- cgit v1.1 From e41ba804ba5f5ca433e09238d561b1b4c8b10985 Mon Sep 17 00:00:00 2001 From: Kewen Lin Date: Thu, 29 Jul 2021 22:26:25 -0500 Subject: Use range-based for loops for traversing loops This patch follows Martin's suggestion here[1], to support range based loop for iterating loops, analogously to the patch for vec[2]. For example, use below range-based for loop for (auto loop : loops_list (cfun, 0)) to replace the previous macro FOR_EACH_LOOP FOR_EACH_LOOP (loop, 0) [1] https://gcc.gnu.org/pipermail/gcc-patches/2021-June/573424.html [2] https://gcc.gnu.org/pipermail/gcc-patches/2021-June/572315.html gcc/ChangeLog: * cfgloop.h (as_const): New function. (class loop_iterator): Rename to ... (class loops_list): ... this. (loop_iterator::next): Rename to ... (loops_list::Iter::fill_curr_loop): ... this and adjust. (loop_iterator::loop_iterator): Rename to ... (loops_list::loops_list): ... this and adjust. (loops_list::Iter): New class. (loops_list::iterator): New type. (loops_list::const_iterator): New type. (loops_list::begin): New function. (loops_list::end): Likewise. (loops_list::begin const): Likewise. (loops_list::end const): Likewise. (FOR_EACH_LOOP): Remove. (FOR_EACH_LOOP_FN): Remove. * cfgloop.c (flow_loops_dump): Adjust FOR_EACH_LOOP* with range-based for loop with loops_list instance. (sort_sibling_loops): Likewise. (disambiguate_loops_with_multiple_latches): Likewise. (verify_loop_structure): Likewise. * cfgloopmanip.c (create_preheaders): Likewise. (force_single_succ_latches): Likewise. * config/aarch64/falkor-tag-collision-avoidance.c (execute_tag_collision_avoidance): Likewise. * config/mn10300/mn10300.c (mn10300_scan_for_setlb_lcc): Likewise. * config/s390/s390.c (s390_adjust_loops): Likewise. * doc/loop.texi: Likewise. * gimple-loop-interchange.cc (pass_linterchange::execute): Likewise. * gimple-loop-jam.c (tree_loop_unroll_and_jam): Likewise. * gimple-loop-versioning.cc (loop_versioning::analyze_blocks): Likewise. (loop_versioning::make_versioning_decisions): Likewise. * gimple-ssa-split-paths.c (split_paths): Likewise. * graphite-isl-ast-to-gimple.c (graphite_regenerate_ast_isl): Likewise. * graphite.c (canonicalize_loop_form): Likewise. (graphite_transform_loops): Likewise. * ipa-fnsummary.c (analyze_function_body): Likewise. * ipa-pure-const.c (analyze_function): Likewise. * loop-doloop.c (doloop_optimize_loops): Likewise. * loop-init.c (loop_optimizer_finalize): Likewise. (fix_loop_structure): Likewise. * loop-invariant.c (calculate_loop_reg_pressure): Likewise. (move_loop_invariants): Likewise. * loop-unroll.c (decide_unrolling): Likewise. (unroll_loops): Likewise. * modulo-sched.c (sms_schedule): Likewise. * predict.c (predict_loops): Likewise. (pass_profile::execute): Likewise. * profile.c (branch_prob): Likewise. * sel-sched-ir.c (sel_finish_pipelining): Likewise. (sel_find_rgns): Likewise. * tree-cfg.c (replace_loop_annotate): Likewise. (replace_uses_by): Likewise. (move_sese_region_to_fn): Likewise. * tree-if-conv.c (pass_if_conversion::execute): Likewise. * tree-loop-distribution.c (loop_distribution::execute): Likewise. * tree-parloops.c (parallelize_loops): Likewise. * tree-predcom.c (tree_predictive_commoning): Likewise. * tree-scalar-evolution.c (scev_initialize): Likewise. (scev_reset): Likewise. * tree-ssa-dce.c (find_obviously_necessary_stmts): Likewise. * tree-ssa-live.c (remove_unused_locals): Likewise. * tree-ssa-loop-ch.c (ch_base::copy_headers): Likewise. * tree-ssa-loop-im.c (analyze_memory_references): Likewise. (tree_ssa_lim_initialize): Likewise. * tree-ssa-loop-ivcanon.c (canonicalize_induction_variables): Likewise. * tree-ssa-loop-ivopts.c (tree_ssa_iv_optimize): Likewise. * tree-ssa-loop-manip.c (get_loops_exits): Likewise. * tree-ssa-loop-niter.c (estimate_numbers_of_iterations): Likewise. (free_numbers_of_iterations_estimates): Likewise. * tree-ssa-loop-prefetch.c (tree_ssa_prefetch_arrays): Likewise. * tree-ssa-loop-split.c (tree_ssa_split_loops): Likewise. * tree-ssa-loop-unswitch.c (tree_ssa_unswitch_loops): Likewise. * tree-ssa-loop.c (gate_oacc_kernels): Likewise. (pass_scev_cprop::execute): Likewise. * tree-ssa-propagate.c (clean_up_loop_closed_phi): Likewise. * tree-ssa-sccvn.c (do_rpo_vn): Likewise. * tree-ssa-threadupdate.c (jump_thread_path_registry::thread_through_all_blocks): Likewise. * tree-vectorizer.c (vectorize_loops): Likewise. * tree-vrp.c (vrp_asserts::find_assert_locations): Likewise. --- gcc/tree-if-conv.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'gcc/tree-if-conv.c') diff --git a/gcc/tree-if-conv.c b/gcc/tree-if-conv.c index 49e89cf..d7b7b30 100644 --- a/gcc/tree-if-conv.c +++ b/gcc/tree-if-conv.c @@ -3300,14 +3300,13 @@ pass_if_conversion::gate (function *fun) unsigned int pass_if_conversion::execute (function *fun) { - class loop *loop; unsigned todo = 0; if (number_of_loops (fun) <= 1) return 0; auto_vec preds; - FOR_EACH_LOOP (loop, 0) + for (auto loop : loops_list (cfun, 0)) if (flag_tree_loop_if_convert == 1 || ((flag_tree_loop_vectorize || loop->force_vectorize) && !loop->dont_vectorize)) -- cgit v1.1