From 028d409252058d88805341a3f6dc0ff1553f5bdc Mon Sep 17 00:00:00 2001 From: Martin Liska Date: Tue, 12 Nov 2019 11:08:40 +0100 Subject: Apply mechanical replacement (generated patch). 2019-11-12 Martin Liska * asan.c (asan_sanitize_stack_p): Replace old parameter syntax with the new one, include opts.h if needed. Use SET_OPTION_IF_UNSET macro. (asan_sanitize_allocas_p): Likewise. (asan_emit_stack_protection): Likewise. (asan_protect_global): Likewise. (instrument_derefs): Likewise. (instrument_builtin_call): Likewise. (asan_expand_mark_ifn): Likewise. * auto-profile.c (auto_profile): Likewise. * bb-reorder.c (copy_bb_p): Likewise. (duplicate_computed_gotos): Likewise. * builtins.c (inline_expand_builtin_string_cmp): Likewise. * cfgcleanup.c (try_crossjump_to_edge): Likewise. (try_crossjump_bb): Likewise. * cfgexpand.c (defer_stack_allocation): Likewise. (stack_protect_classify_type): Likewise. (pass_expand::execute): Likewise. * cfgloopanal.c (expected_loop_iterations_unbounded): Likewise. (estimate_reg_pressure_cost): Likewise. * cgraph.c (cgraph_edge::maybe_hot_p): Likewise. * combine.c (combine_instructions): Likewise. (record_value_for_reg): Likewise. * common/config/aarch64/aarch64-common.c (aarch64_option_validate_param): Likewise. (aarch64_option_default_params): Likewise. * common/config/ia64/ia64-common.c (ia64_option_default_params): Likewise. * common/config/powerpcspe/powerpcspe-common.c (rs6000_option_default_params): Likewise. * common/config/rs6000/rs6000-common.c (rs6000_option_default_params): Likewise. * common/config/sh/sh-common.c (sh_option_default_params): Likewise. * config/aarch64/aarch64.c (aarch64_output_probe_stack_range): Likewise. (aarch64_allocate_and_probe_stack_space): Likewise. (aarch64_expand_epilogue): Likewise. (aarch64_override_options_internal): Likewise. * config/alpha/alpha.c (alpha_option_override): Likewise. * config/arm/arm.c (arm_option_override): Likewise. (arm_valid_target_attribute_p): Likewise. * config/i386/i386-options.c (ix86_option_override_internal): Likewise. * config/i386/i386.c (get_probe_interval): Likewise. (ix86_adjust_stack_and_probe_stack_clash): Likewise. (ix86_max_noce_ifcvt_seq_cost): Likewise. * config/ia64/ia64.c (ia64_adjust_cost): Likewise. * config/rs6000/rs6000-logue.c (get_stack_clash_protection_probe_interval): Likewise. (get_stack_clash_protection_guard_size): Likewise. * config/rs6000/rs6000.c (rs6000_option_override_internal): Likewise. * config/s390/s390.c (allocate_stack_space): Likewise. (s390_emit_prologue): Likewise. (s390_option_override_internal): Likewise. * config/sparc/sparc.c (sparc_option_override): Likewise. * config/visium/visium.c (visium_option_override): Likewise. * coverage.c (get_coverage_counts): Likewise. (coverage_compute_profile_id): Likewise. (coverage_begin_function): Likewise. (coverage_end_function): Likewise. * cse.c (cse_find_path): Likewise. (cse_extended_basic_block): Likewise. (cse_main): Likewise. * cselib.c (cselib_invalidate_mem): Likewise. * dse.c (dse_step1): Likewise. * emit-rtl.c (set_new_first_and_last_insn): Likewise. (get_max_insn_count): Likewise. (make_debug_insn_raw): Likewise. (init_emit): Likewise. * explow.c (compute_stack_clash_protection_loop_data): Likewise. * final.c (compute_alignments): Likewise. * fold-const.c (fold_range_test): Likewise. (fold_truth_andor): Likewise. (tree_single_nonnegative_warnv_p): Likewise. (integer_valued_real_single_p): Likewise. * gcse.c (want_to_gcse_p): Likewise. (prune_insertions_deletions): Likewise. (hoist_code): Likewise. (gcse_or_cprop_is_too_expensive): Likewise. * ggc-common.c: Likewise. * ggc-page.c (ggc_collect): Likewise. * gimple-loop-interchange.cc (MAX_NUM_STMT): Likewise. (MAX_DATAREFS): Likewise. (OUTER_STRIDE_RATIO): Likewise. * gimple-loop-jam.c (tree_loop_unroll_and_jam): Likewise. * gimple-loop-versioning.cc (loop_versioning::max_insns_for_loop): Likewise. * gimple-ssa-split-paths.c (is_feasible_trace): Likewise. * gimple-ssa-store-merging.c (imm_store_chain_info::try_coalesce_bswap): Likewise. (imm_store_chain_info::coalesce_immediate_stores): Likewise. (imm_store_chain_info::output_merged_store): Likewise. (pass_store_merging::process_store): Likewise. * gimple-ssa-strength-reduction.c (find_basis_for_base_expr): Likewise. * graphite-isl-ast-to-gimple.c (class translate_isl_ast_to_gimple): Likewise. (scop_to_isl_ast): Likewise. * graphite-optimize-isl.c (get_schedule_for_node_st): Likewise. (optimize_isl): Likewise. * graphite-scop-detection.c (build_scops): Likewise. * haifa-sched.c (set_modulo_params): Likewise. (rank_for_schedule): Likewise. (model_add_to_worklist): Likewise. (model_promote_insn): Likewise. (model_choose_insn): Likewise. (queue_to_ready): Likewise. (autopref_multipass_dfa_lookahead_guard): Likewise. (schedule_block): Likewise. (sched_init): Likewise. * hsa-gen.c (init_prologue): Likewise. * ifcvt.c (bb_ok_for_noce_convert_multiple_sets): Likewise. (cond_move_process_if_block): Likewise. * ipa-cp.c (ipcp_lattice::add_value): Likewise. (merge_agg_lats_step): Likewise. (devirtualization_time_bonus): Likewise. (hint_time_bonus): Likewise. (incorporate_penalties): Likewise. (good_cloning_opportunity_p): Likewise. (ipcp_propagate_stage): Likewise. * ipa-fnsummary.c (decompose_param_expr): Likewise. (set_switch_stmt_execution_predicate): Likewise. (analyze_function_body): Likewise. (compute_fn_summary): Likewise. * ipa-inline-analysis.c (estimate_growth): Likewise. * ipa-inline.c (caller_growth_limits): Likewise. (inline_insns_single): Likewise. (inline_insns_auto): Likewise. (can_inline_edge_by_limits_p): Likewise. (want_early_inline_function_p): Likewise. (big_speedup_p): Likewise. (want_inline_small_function_p): Likewise. (want_inline_self_recursive_call_p): Likewise. (edge_badness): Likewise. (recursive_inlining): Likewise. (compute_max_insns): Likewise. (early_inliner): Likewise. * ipa-polymorphic-call.c (csftc_abort_walking_p): Likewise. * ipa-profile.c (ipa_profile): Likewise. * ipa-prop.c (determine_known_aggregate_parts): Likewise. (ipa_analyze_node): Likewise. (ipcp_transform_function): Likewise. * ipa-split.c (consider_split): Likewise. * ipa-sra.c (allocate_access): Likewise. (process_scan_results): Likewise. (ipa_sra_summarize_function): Likewise. (pull_accesses_from_callee): Likewise. * ira-build.c (loop_compare_func): Likewise. (mark_loops_for_removal): Likewise. * ira-conflicts.c (build_conflict_bit_table): Likewise. * loop-doloop.c (doloop_optimize): Likewise. * loop-invariant.c (gain_for_invariant): Likewise. (move_loop_invariants): Likewise. * loop-unroll.c (decide_unroll_constant_iterations): Likewise. (decide_unroll_runtime_iterations): Likewise. (decide_unroll_stupid): Likewise. (expand_var_during_unrolling): Likewise. * lra-assigns.c (spill_for): Likewise. * lra-constraints.c (EBB_PROBABILITY_CUTOFF): Likewise. * modulo-sched.c (sms_schedule): Likewise. (DFA_HISTORY): Likewise. * opts.c (default_options_optimization): Likewise. (finish_options): Likewise. (common_handle_option): Likewise. * postreload-gcse.c (eliminate_partially_redundant_load): Likewise. (if): Likewise. * predict.c (get_hot_bb_threshold): Likewise. (maybe_hot_count_p): Likewise. (probably_never_executed): Likewise. (predictable_edge_p): Likewise. (predict_loops): Likewise. (expr_expected_value_1): Likewise. (tree_predict_by_opcode): Likewise. (handle_missing_profiles): Likewise. * reload.c (find_equiv_reg): Likewise. * reorg.c (redundant_insn): Likewise. * resource.c (mark_target_live_regs): Likewise. (incr_ticks_for_insn): Likewise. * sanopt.c (pass_sanopt::execute): Likewise. * sched-deps.c (sched_analyze_1): Likewise. (sched_analyze_2): Likewise. (sched_analyze_insn): Likewise. (deps_analyze_insn): Likewise. * sched-ebb.c (schedule_ebbs): Likewise. * sched-rgn.c (find_single_block_region): Likewise. (too_large): Likewise. (haifa_find_rgns): Likewise. (extend_rgns): Likewise. (new_ready): Likewise. (schedule_region): Likewise. (sched_rgn_init): Likewise. * sel-sched-ir.c (make_region_from_loop): Likewise. * sel-sched-ir.h (MAX_WS): Likewise. * sel-sched.c (process_pipelined_exprs): Likewise. (sel_setup_region_sched_flags): Likewise. * shrink-wrap.c (try_shrink_wrapping): Likewise. * targhooks.c (default_max_noce_ifcvt_seq_cost): Likewise. * toplev.c (print_version): Likewise. (process_options): Likewise. * tracer.c (tail_duplicate): Likewise. * trans-mem.c (tm_log_add): Likewise. * tree-chrec.c (chrec_fold_plus_1): Likewise. * tree-data-ref.c (split_constant_offset): Likewise. (compute_all_dependences): Likewise. * tree-if-conv.c (MAX_PHI_ARG_NUM): Likewise. * tree-inline.c (remap_gimple_stmt): Likewise. * tree-loop-distribution.c (MAX_DATAREFS_NUM): Likewise. * tree-parloops.c (MIN_PER_THREAD): Likewise. (create_parallel_loop): Likewise. * tree-predcom.c (determine_unroll_factor): Likewise. * tree-scalar-evolution.c (instantiate_scev_r): Likewise. * tree-sra.c (analyze_all_variable_accesses): Likewise. * tree-ssa-ccp.c (fold_builtin_alloca_with_align): Likewise. * tree-ssa-dse.c (setup_live_bytes_from_ref): Likewise. (dse_optimize_redundant_stores): Likewise. (dse_classify_store): Likewise. * tree-ssa-ifcombine.c (ifcombine_ifandif): Likewise. * tree-ssa-loop-ch.c (ch_base::copy_headers): Likewise. * tree-ssa-loop-im.c (LIM_EXPENSIVE): Likewise. * tree-ssa-loop-ivcanon.c (try_unroll_loop_completely): Likewise. (try_peel_loop): Likewise. (tree_unroll_loops_completely): Likewise. * tree-ssa-loop-ivopts.c (avg_loop_niter): Likewise. (CONSIDER_ALL_CANDIDATES_BOUND): Likewise. (MAX_CONSIDERED_GROUPS): Likewise. (ALWAYS_PRUNE_CAND_SET_BOUND): Likewise. * tree-ssa-loop-manip.c (can_unroll_loop_p): Likewise. * tree-ssa-loop-niter.c (MAX_ITERATIONS_TO_TRACK): Likewise. * tree-ssa-loop-prefetch.c (PREFETCH_BLOCK): Likewise. (L1_CACHE_SIZE_BYTES): Likewise. (L2_CACHE_SIZE_BYTES): Likewise. (should_issue_prefetch_p): Likewise. (schedule_prefetches): Likewise. (determine_unroll_factor): Likewise. (volume_of_references): Likewise. (add_subscript_strides): Likewise. (self_reuse_distance): Likewise. (mem_ref_count_reasonable_p): Likewise. (insn_to_prefetch_ratio_too_small_p): Likewise. (loop_prefetch_arrays): Likewise. (tree_ssa_prefetch_arrays): Likewise. * tree-ssa-loop-unswitch.c (tree_unswitch_single_loop): Likewise. * tree-ssa-math-opts.c (gimple_expand_builtin_pow): Likewise. (convert_mult_to_fma): Likewise. (math_opts_dom_walker::after_dom_children): Likewise. * tree-ssa-phiopt.c (cond_if_else_store_replacement): Likewise. (hoist_adjacent_loads): Likewise. (gate_hoist_loads): Likewise. * tree-ssa-pre.c (translate_vuse_through_block): Likewise. (compute_partial_antic_aux): Likewise. * tree-ssa-reassoc.c (get_reassociation_width): Likewise. * tree-ssa-sccvn.c (vn_reference_lookup_pieces): Likewise. (vn_reference_lookup): Likewise. (do_rpo_vn): Likewise. * tree-ssa-scopedtables.c (avail_exprs_stack::lookup_avail_expr): Likewise. * tree-ssa-sink.c (select_best_block): Likewise. * tree-ssa-strlen.c (new_stridx): Likewise. (new_addr_stridx): Likewise. (get_range_strlen_dynamic): Likewise. (class ssa_name_limit_t): Likewise. * tree-ssa-structalias.c (push_fields_onto_fieldstack): Likewise. (create_variable_info_for_1): Likewise. (init_alias_vars): Likewise. * tree-ssa-tail-merge.c (find_clusters_1): Likewise. (tail_merge_optimize): Likewise. * tree-ssa-threadbackward.c (thread_jumps::profitable_jump_thread_path): Likewise. (thread_jumps::fsm_find_control_statement_thread_paths): Likewise. (thread_jumps::find_jump_threads_backwards): Likewise. * tree-ssa-threadedge.c (record_temporary_equivalences_from_stmts_at_dest): Likewise. * tree-ssa-uninit.c (compute_control_dep_chain): Likewise. * tree-switch-conversion.c (switch_conversion::check_range): Likewise. (jump_table_cluster::can_be_handled): Likewise. * tree-switch-conversion.h (jump_table_cluster::case_values_threshold): Likewise. (SWITCH_CONVERSION_BRANCH_RATIO): Likewise. (param_switch_conversion_branch_ratio): Likewise. * tree-vect-data-refs.c (vect_mark_for_runtime_alias_test): Likewise. (vect_enhance_data_refs_alignment): Likewise. (vect_prune_runtime_alias_test_list): Likewise. * tree-vect-loop.c (vect_analyze_loop_costing): Likewise. (vect_get_datarefs_in_loop): Likewise. (vect_analyze_loop): Likewise. * tree-vect-slp.c (vect_slp_bb): Likewise. * tree-vectorizer.h: Likewise. * tree-vrp.c (find_switch_asserts): Likewise. (vrp_prop::check_mem_ref): Likewise. * tree.c (wide_int_to_tree_1): Likewise. (cache_integer_cst): Likewise. * var-tracking.c (EXPR_USE_DEPTH): Likewise. (reverse_op): Likewise. (vt_find_locations): Likewise. 2019-11-12 Martin Liska * gimple-parser.c (c_parser_parse_gimple_body): Replace old parameter syntax with the new one, include opts.h if needed. Use SET_OPTION_IF_UNSET macro. 2019-11-12 Martin Liska * name-lookup.c (namespace_hints::namespace_hints): Replace old parameter syntax with the new one, include opts.h if needed. Use SET_OPTION_IF_UNSET macro. * typeck.c (comptypes): Likewise. 2019-11-12 Martin Liska * lto-partition.c (lto_balanced_map): Replace old parameter syntax with the new one, include opts.h if needed. Use SET_OPTION_IF_UNSET macro. * lto.c (do_whole_program_analysis): Likewise. From-SVN: r278085 --- gcc/tree-data-ref.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'gcc/tree-data-ref.c') diff --git a/gcc/tree-data-ref.c b/gcc/tree-data-ref.c index 7f75b7e..e9fa4ae 100644 --- a/gcc/tree-data-ref.c +++ b/gcc/tree-data-ref.c @@ -836,7 +836,7 @@ split_constant_offset (tree exp, tree *var, tree *off, void split_constant_offset (tree exp, tree *var, tree *off) { - unsigned limit = PARAM_VALUE (PARAM_SSA_NAME_DEF_CHAIN_LIMIT); + unsigned limit = param_ssa_name_def_chain_limit; static hash_map > *cache; if (!cache) cache = new hash_map > (37); @@ -4917,7 +4917,7 @@ compute_all_dependences (vec datarefs, unsigned int i, j; if ((int) datarefs.length () - > PARAM_VALUE (PARAM_LOOP_MAX_DATAREFS_FOR_DATADEPS)) + > param_loop_max_datarefs_for_datadeps) { struct data_dependence_relation *ddr; -- cgit v1.1 From 0c29cac4a8cc840a0a597313147a7e0df0064c54 Mon Sep 17 00:00:00 2001 From: Martin Liska Date: Tue, 12 Nov 2019 11:09:41 +0100 Subject: Remove gcc/params.* files. 2019-11-12 Martin Liska * Makefile.in: Remove PARAMS_H and params.list and params.options. * params-enum.h: Remove. * params-list.h: Remove. * params-options.h: Remove. * params.c: Remove. * params.def: Remove. * params.h: Remove. * asan.c: Do not include params.h. * auto-profile.c: Likewise. * bb-reorder.c: Likewise. * builtins.c: Likewise. * cfgcleanup.c: Likewise. * cfgexpand.c: Likewise. * cfgloopanal.c: Likewise. * cgraph.c: Likewise. * combine.c: Likewise. * common/config/aarch64/aarch64-common.c: Likewise. * common/config/gcn/gcn-common.c: Likewise. * common/config/ia64/ia64-common.c: Likewise. * common/config/powerpcspe/powerpcspe-common.c: Likewise. * common/config/rs6000/rs6000-common.c: Likewise. * common/config/sh/sh-common.c: Likewise. * config/aarch64/aarch64.c: Likewise. * config/alpha/alpha.c: Likewise. * config/arm/arm.c: Likewise. * config/avr/avr.c: Likewise. * config/csky/csky.c: Likewise. * config/i386/i386-builtins.c: Likewise. * config/i386/i386-expand.c: Likewise. * config/i386/i386-features.c: Likewise. * config/i386/i386-options.c: Likewise. * config/i386/i386.c: Likewise. * config/ia64/ia64.c: Likewise. * config/rs6000/rs6000-logue.c: Likewise. * config/rs6000/rs6000.c: Likewise. * config/s390/s390.c: Likewise. * config/sparc/sparc.c: Likewise. * config/visium/visium.c: Likewise. * coverage.c: Likewise. * cprop.c: Likewise. * cse.c: Likewise. * cselib.c: Likewise. * dse.c: Likewise. * emit-rtl.c: Likewise. * explow.c: Likewise. * final.c: Likewise. * fold-const.c: Likewise. * gcc.c: Likewise. * gcse.c: Likewise. * ggc-common.c: Likewise. * ggc-page.c: Likewise. * gimple-loop-interchange.cc: Likewise. * gimple-loop-jam.c: Likewise. * gimple-loop-versioning.cc: Likewise. * gimple-ssa-split-paths.c: Likewise. * gimple-ssa-sprintf.c: Likewise. * gimple-ssa-store-merging.c: Likewise. * gimple-ssa-strength-reduction.c: Likewise. * gimple-ssa-warn-alloca.c: Likewise. * gimple-ssa-warn-restrict.c: Likewise. * graphite-isl-ast-to-gimple.c: Likewise. * graphite-optimize-isl.c: Likewise. * graphite-scop-detection.c: Likewise. * graphite-sese-to-poly.c: Likewise. * graphite.c: Likewise. * haifa-sched.c: Likewise. * hsa-gen.c: Likewise. * ifcvt.c: Likewise. * ipa-cp.c: Likewise. * ipa-fnsummary.c: Likewise. * ipa-inline-analysis.c: Likewise. * ipa-inline.c: Likewise. * ipa-polymorphic-call.c: Likewise. * ipa-profile.c: Likewise. * ipa-prop.c: Likewise. * ipa-split.c: Likewise. * ipa-sra.c: Likewise. * ira-build.c: Likewise. * ira-conflicts.c: Likewise. * loop-doloop.c: Likewise. * loop-invariant.c: Likewise. * loop-unroll.c: Likewise. * lra-assigns.c: Likewise. * lra-constraints.c: Likewise. * modulo-sched.c: Likewise. * opt-suggestions.c: Likewise. * opts.c: Likewise. * postreload-gcse.c: Likewise. * predict.c: Likewise. * reload.c: Likewise. * reorg.c: Likewise. * resource.c: Likewise. * sanopt.c: Likewise. * sched-deps.c: Likewise. * sched-ebb.c: Likewise. * sched-rgn.c: Likewise. * sel-sched-ir.c: Likewise. * sel-sched.c: Likewise. * shrink-wrap.c: Likewise. * stmt.c: Likewise. * targhooks.c: Likewise. * toplev.c: Likewise. * tracer.c: Likewise. * trans-mem.c: Likewise. * tree-chrec.c: Likewise. * tree-data-ref.c: Likewise. * tree-if-conv.c: Likewise. * tree-inline.c: Likewise. * tree-loop-distribution.c: Likewise. * tree-parloops.c: Likewise. * tree-predcom.c: Likewise. * tree-profile.c: Likewise. * tree-scalar-evolution.c: Likewise. * tree-sra.c: Likewise. * tree-ssa-ccp.c: Likewise. * tree-ssa-dom.c: Likewise. * tree-ssa-dse.c: Likewise. * tree-ssa-ifcombine.c: Likewise. * tree-ssa-loop-ch.c: Likewise. * tree-ssa-loop-im.c: Likewise. * tree-ssa-loop-ivcanon.c: Likewise. * tree-ssa-loop-ivopts.c: Likewise. * tree-ssa-loop-manip.c: Likewise. * tree-ssa-loop-niter.c: Likewise. * tree-ssa-loop-prefetch.c: Likewise. * tree-ssa-loop-unswitch.c: Likewise. * tree-ssa-math-opts.c: Likewise. * tree-ssa-phiopt.c: Likewise. * tree-ssa-pre.c: Likewise. * tree-ssa-reassoc.c: Likewise. * tree-ssa-sccvn.c: Likewise. * tree-ssa-scopedtables.c: Likewise. * tree-ssa-sink.c: Likewise. * tree-ssa-strlen.c: Likewise. * tree-ssa-structalias.c: Likewise. * tree-ssa-tail-merge.c: Likewise. * tree-ssa-threadbackward.c: Likewise. * tree-ssa-threadedge.c: Likewise. * tree-ssa-uninit.c: Likewise. * tree-switch-conversion.c: Likewise. * tree-vect-data-refs.c: Likewise. * tree-vect-loop.c: Likewise. * tree-vect-slp.c: Likewise. * tree-vrp.c: Likewise. * tree.c: Likewise. * value-prof.c: Likewise. * var-tracking.c: Likewise. 2019-11-12 Martin Liska * gimple-parser.c: Do not include params.h. 2019-11-12 Martin Liska * name-lookup.c: Do not include params.h. * typeck.c: Likewise. 2019-11-12 Martin Liska * lto-common.c: Do not include params.h. * lto-partition.c: Likewise. * lto.c: Likewise. From-SVN: r278086 --- gcc/tree-data-ref.c | 1 - 1 file changed, 1 deletion(-) (limited to 'gcc/tree-data-ref.c') diff --git a/gcc/tree-data-ref.c b/gcc/tree-data-ref.c index e9fa4ae..191beb9 100644 --- a/gcc/tree-data-ref.c +++ b/gcc/tree-data-ref.c @@ -93,7 +93,6 @@ along with GCC; see the file COPYING3. If not see #include "tree-scalar-evolution.h" #include "dumpfile.h" #include "tree-affine.h" -#include "params.h" #include "builtins.h" #include "tree-eh.h" #include "ssa.h" -- cgit v1.1 From 1fb2b0f69ee849142b669ba1b82264ce6d0f75f9 Mon Sep 17 00:00:00 2001 From: Richard Sandiford Date: Sat, 16 Nov 2019 11:35:08 +0000 Subject: Move canonicalisation of dr_with_seg_len_pair_ts The two users of tree-data-ref's runtime alias checks both canonicalise the order of the dr_with_seg_lens in a pair before passing them to prune_runtime_alias_test_list. It's more convenient for later patches if prune_runtime_alias_test_list does that itself. 2019-11-16 Richard Sandiford gcc/ * tree-data-ref.c (prune_runtime_alias_test_list): Sort the two accesses in each dr_with_seg_len_pair_t before trying to combine separate dr_with_seg_len_pair_ts. * tree-loop-distribution.c (compute_alias_check_pairs): Don't do that here. * tree-vect-data-refs.c (vect_prune_runtime_alias_test_list): Likewise. From-SVN: r278348 --- gcc/tree-data-ref.c | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) (limited to 'gcc/tree-data-ref.c') diff --git a/gcc/tree-data-ref.c b/gcc/tree-data-ref.c index 191beb9..b31ed42 100644 --- a/gcc/tree-data-ref.c +++ b/gcc/tree-data-ref.c @@ -1486,13 +1486,32 @@ void prune_runtime_alias_test_list (vec *alias_pairs, poly_uint64) { + /* Canonicalize each pair so that the base components are ordered wrt + data_ref_compare_tree. This allows the loop below to merge more + cases. */ + unsigned int i; + dr_with_seg_len_pair_t *alias_pair; + FOR_EACH_VEC_ELT (*alias_pairs, i, alias_pair) + { + data_reference_p dr_a = alias_pair->first.dr; + data_reference_p dr_b = alias_pair->second.dr; + int comp_res = data_ref_compare_tree (DR_BASE_ADDRESS (dr_a), + DR_BASE_ADDRESS (dr_b)); + if (comp_res == 0) + comp_res = data_ref_compare_tree (DR_OFFSET (dr_a), DR_OFFSET (dr_b)); + if (comp_res == 0) + comp_res = data_ref_compare_tree (DR_INIT (dr_a), DR_INIT (dr_b)); + if (comp_res > 0) + std::swap (alias_pair->first, alias_pair->second); + } + /* Sort the collected data ref pairs so that we can scan them once to combine all possible aliasing checks. */ alias_pairs->qsort (comp_dr_with_seg_len_pair); /* Scan the sorted dr pairs and check if we can combine alias checks of two neighboring dr pairs. */ - for (size_t i = 1; i < alias_pairs->length (); ++i) + for (i = 1; i < alias_pairs->length (); ++i) { /* Deal with two ddrs (dr_a1, dr_b1) and (dr_a2, dr_b2). */ dr_with_seg_len *dr_a1 = &(*alias_pairs)[i-1].first, -- cgit v1.1 From 97602450b04e94aff034381bf6ee4236b95727ed Mon Sep 17 00:00:00 2001 From: Richard Sandiford Date: Sat, 16 Nov 2019 11:35:56 +0000 Subject: Delay swapping data refs in prune_runtime_alias_test_list prune_runtime_alias_test_list swapped dr_as between two dr_with_seg_len pairs before finally deciding whether to merge them. Bailing out later would therefore leave the pairs in an incorrect state. IMO a better fix would be to split this out into a subroutine that produces a temporary dr_with_seg_len on success, rather than changing an existing one in-place. It would then be easy to merge both the dr_as and dr_bs if we wanted to, rather than requiring one of them to be equal. But here I tried to do something that could be backported if necessary. 2019-11-16 Richard Sandiford gcc/ * tree-data-ref.c (prune_runtime_alias_test_list): Delay swapping the dr_as based on init values until we've decided whether to merge them. From-SVN: r278349 --- gcc/tree-data-ref.c | 24 +++++++++++++++--------- 1 file changed, 15 insertions(+), 9 deletions(-) (limited to 'gcc/tree-data-ref.c') diff --git a/gcc/tree-data-ref.c b/gcc/tree-data-ref.c index b31ed42..cf4fb26 100644 --- a/gcc/tree-data-ref.c +++ b/gcc/tree-data-ref.c @@ -1555,13 +1555,6 @@ prune_runtime_alias_test_list (vec *alias_pairs, if (!ordered_p (init_a1, init_a2)) continue; - /* Make sure dr_a1 starts left of dr_a2. */ - if (maybe_gt (init_a1, init_a2)) - { - std::swap (*dr_a1, *dr_a2); - std::swap (init_a1, init_a2); - } - /* Work out what the segment length would be if we did combine DR_A1 and DR_A2: @@ -1578,7 +1571,10 @@ prune_runtime_alias_test_list (vec *alias_pairs, The lengths both have sizetype, so the sign is taken from the step instead. */ - if (!operand_equal_p (dr_a1->seg_len, dr_a2->seg_len, 0)) + poly_uint64 new_seg_len = 0; + bool new_seg_len_p = !operand_equal_p (dr_a1->seg_len, + dr_a2->seg_len, 0); + if (new_seg_len_p) { poly_uint64 seg_len_a1, seg_len_a2; if (!poly_int_tree_p (dr_a1->seg_len, &seg_len_a1) @@ -1596,14 +1592,24 @@ prune_runtime_alias_test_list (vec *alias_pairs, int sign_a = tree_int_cst_sgn (indicator_a); int sign_b = tree_int_cst_sgn (indicator_b); - poly_uint64 new_seg_len; if (sign_a <= 0 && sign_b <= 0) new_seg_len = lower_bound (seg_len_a1, seg_len_a2); else if (sign_a >= 0 && sign_b >= 0) new_seg_len = upper_bound (seg_len_a1, seg_len_a2); else continue; + } + /* At this point we're committed to merging the refs. */ + /* Make sure dr_a1 starts left of dr_a2. */ + if (maybe_gt (init_a1, init_a2)) + { + std::swap (*dr_a1, *dr_a2); + std::swap (init_a1, init_a2); + } + + if (new_seg_len_p) + { dr_a1->seg_len = build_int_cst (TREE_TYPE (dr_a1->seg_len), new_seg_len); dr_a1->align = MIN (dr_a1->align, known_alignment (new_seg_len)); -- cgit v1.1 From e9acf80c96d681917d930869b7cbfb7d2fa54d51 Mon Sep 17 00:00:00 2001 From: Richard Sandiford Date: Sat, 16 Nov 2019 11:40:22 +0000 Subject: Add flags to dr_with_seg_len_pair_t This patch adds a bunch of flags to dr_with_seg_len_pair_t, for use by later patches. The update to tree-loop-distribution.c is conservatively correct, but might be tweakable later. 2019-11-16 Richard Sandiford gcc/ * tree-data-ref.h (DR_ALIAS_RAW, DR_ALIAS_WAR, DR_ALIAS_WAW) (DR_ALIAS_ARBITRARY, DR_ALIAS_SWAPPED, DR_ALIAS_UNSWAPPED): New flags. (dr_with_seg_len_pair_t::sequencing): New enum. (dr_with_seg_len_pair_t::flags): New member variable. (dr_with_seg_len_pair_t::dr_with_seg_len_pair_t): Take a sequencing parameter and initialize the flags member variable. * tree-loop-distribution.c (compute_alias_check_pairs): Update call accordingly. * tree-vect-data-refs.c (vect_prune_runtime_alias_test_list): Likewise. Ensure the two data references in an alias pair are in statement order, if there is a defined order. * tree-data-ref.c (prune_runtime_alias_test_list): Use DR_ALIAS_SWAPPED and DR_ALIAS_UNSWAPPED to record whether we've swapped the references in a dr_with_seg_len_pair_t. OR together the flags when merging two dr_with_seg_len_pair_ts. After merging, try to restore the original dr_with_seg_len order, updating the flags if that fails. From-SVN: r278350 --- gcc/tree-data-ref.c | 35 ++++++++++++++++++++++++++++++----- 1 file changed, 30 insertions(+), 5 deletions(-) (limited to 'gcc/tree-data-ref.c') diff --git a/gcc/tree-data-ref.c b/gcc/tree-data-ref.c index cf4fb26..21cd2ad 100644 --- a/gcc/tree-data-ref.c +++ b/gcc/tree-data-ref.c @@ -1502,7 +1502,12 @@ prune_runtime_alias_test_list (vec *alias_pairs, if (comp_res == 0) comp_res = data_ref_compare_tree (DR_INIT (dr_a), DR_INIT (dr_b)); if (comp_res > 0) - std::swap (alias_pair->first, alias_pair->second); + { + std::swap (alias_pair->first, alias_pair->second); + alias_pair->flags |= DR_ALIAS_SWAPPED; + } + else + alias_pair->flags |= DR_ALIAS_UNSWAPPED; } /* Sort the collected data ref pairs so that we can scan them once to @@ -1514,10 +1519,13 @@ prune_runtime_alias_test_list (vec *alias_pairs, for (i = 1; i < alias_pairs->length (); ++i) { /* Deal with two ddrs (dr_a1, dr_b1) and (dr_a2, dr_b2). */ - dr_with_seg_len *dr_a1 = &(*alias_pairs)[i-1].first, - *dr_b1 = &(*alias_pairs)[i-1].second, - *dr_a2 = &(*alias_pairs)[i].first, - *dr_b2 = &(*alias_pairs)[i].second; + dr_with_seg_len_pair_t *alias_pair1 = &(*alias_pairs)[i - 1]; + dr_with_seg_len_pair_t *alias_pair2 = &(*alias_pairs)[i]; + + dr_with_seg_len *dr_a1 = &alias_pair1->first; + dr_with_seg_len *dr_b1 = &alias_pair1->second; + dr_with_seg_len *dr_a2 = &alias_pair2->first; + dr_with_seg_len *dr_b2 = &alias_pair2->second; /* Remove duplicate data ref pairs. */ if (*dr_a1 == *dr_a2 && *dr_b1 == *dr_b2) @@ -1526,6 +1534,7 @@ prune_runtime_alias_test_list (vec *alias_pairs, dump_printf (MSG_NOTE, "found equal ranges %T, %T and %T, %T\n", DR_REF (dr_a1->dr), DR_REF (dr_b1->dr), DR_REF (dr_a2->dr), DR_REF (dr_b2->dr)); + alias_pair1->flags |= alias_pair2->flags; alias_pairs->ordered_remove (i--); continue; } @@ -1631,10 +1640,26 @@ prune_runtime_alias_test_list (vec *alias_pairs, dump_printf (MSG_NOTE, "merging ranges for %T, %T and %T, %T\n", DR_REF (dr_a1->dr), DR_REF (dr_b1->dr), DR_REF (dr_a2->dr), DR_REF (dr_b2->dr)); + alias_pair1->flags |= alias_pair2->flags; alias_pairs->ordered_remove (i); i--; } } + + /* Try to restore the original dr_with_seg_len order within each + dr_with_seg_len_pair_t. If we ended up combining swapped and + unswapped pairs into the same check, we have to invalidate any + RAW, WAR and WAW information for it. */ + FOR_EACH_VEC_ELT (*alias_pairs, i, alias_pair) + { + unsigned int swap_mask = (DR_ALIAS_SWAPPED | DR_ALIAS_UNSWAPPED); + unsigned int swapped = (alias_pair->flags & swap_mask); + if (swapped == DR_ALIAS_SWAPPED) + std::swap (alias_pair->first, alias_pair->second); + else if (swapped != DR_ALIAS_UNSWAPPED) + alias_pair->flags |= DR_ALIAS_ARBITRARY; + alias_pair->flags &= ~swap_mask; + } } /* Given LOOP's two data references and segment lengths described by DR_A -- cgit v1.1 From 52c29905259363ce2b78dd7aa8a25cf531cddb3a Mon Sep 17 00:00:00 2001 From: Richard Sandiford Date: Sat, 16 Nov 2019 11:41:16 +0000 Subject: Record whether a dr_with_seg_len contains mixed steps prune_runtime_alias_test_list can merge dr_with_seg_len_pair_ts that have different steps for the first reference or different steps for the second reference. This patch adds a flag to record that. I don't know whether the change to create_intersect_range_checks_index fixes anything in practice. It would have to be a corner case if so, since at present we only merge two alias pairs if either the first or the second references are identical and only the other references differ. And the vectoriser uses VF-based segment lengths only if both references in a pair have the same step. Either way, it still seems wrong to use DR_STEP when it doesn't represent all checks that have been merged into the pair. 2019-11-16 Richard Sandiford gcc/ * tree-data-ref.h (DR_ALIAS_MIXED_STEPS): New flag. * tree-data-ref.c (prune_runtime_alias_test_list): Set it when merging data references with different steps. (create_intersect_range_checks_index): Take a dr_with_seg_len_pair_t instead of two dr_with_seg_lens. Bail out if DR_ALIAS_MIXED_STEPS is set. (create_intersect_range_checks): Take a dr_with_seg_len_pair_t instead of two dr_with_seg_lens. Update call to create_intersect_range_checks_index. (create_runtime_alias_checks): Update call accordingly. From-SVN: r278351 --- gcc/tree-data-ref.c | 57 ++++++++++++++++++++++++++++++++--------------------- 1 file changed, 35 insertions(+), 22 deletions(-) (limited to 'gcc/tree-data-ref.c') diff --git a/gcc/tree-data-ref.c b/gcc/tree-data-ref.c index 21cd2ad..bc5d213 100644 --- a/gcc/tree-data-ref.c +++ b/gcc/tree-data-ref.c @@ -1617,6 +1617,11 @@ prune_runtime_alias_test_list (vec *alias_pairs, std::swap (init_a1, init_a2); } + /* The DR_Bs are equal, so only the DR_As can introduce + mixed steps. */ + if (!operand_equal_p (DR_STEP (dr_a1->dr), DR_STEP (dr_a2->dr), 0)) + alias_pair1->flags |= DR_ALIAS_MIXED_STEPS; + if (new_seg_len_p) { dr_a1->seg_len = build_int_cst (TREE_TYPE (dr_a1->seg_len), @@ -1662,11 +1667,14 @@ prune_runtime_alias_test_list (vec *alias_pairs, } } -/* Given LOOP's two data references and segment lengths described by DR_A - and DR_B, create expression checking if the two addresses ranges intersect - with each other based on index of the two addresses. This can only be - done if DR_A and DR_B referring to the same (array) object and the index - is the only difference. For example: +/* Try to generate a runtime condition that is true if ALIAS_PAIR is + free of aliases, using a condition based on index values instead + of a condition based on addresses. Return true on success, + storing the condition in *COND_EXPR. + + This can only be done if the two data references in ALIAS_PAIR access + the same array object and the index is the only difference. For example, + if the two data references are DR_A and DR_B: DR_A DR_B data-ref arr[i] arr[j] @@ -1689,10 +1697,12 @@ prune_runtime_alias_test_list (vec *alias_pairs, static bool create_intersect_range_checks_index (class loop *loop, tree *cond_expr, - const dr_with_seg_len& dr_a, - const dr_with_seg_len& dr_b) + const dr_with_seg_len_pair_t &alias_pair) { - if (integer_zerop (DR_STEP (dr_a.dr)) + const dr_with_seg_len &dr_a = alias_pair.first; + const dr_with_seg_len &dr_b = alias_pair.second; + if ((alias_pair.flags & DR_ALIAS_MIXED_STEPS) + || integer_zerop (DR_STEP (dr_a.dr)) || integer_zerop (DR_STEP (dr_b.dr)) || DR_NUM_DIMENSIONS (dr_a.dr) != DR_NUM_DIMENSIONS (dr_b.dr)) return false; @@ -1914,24 +1924,26 @@ get_segment_min_max (const dr_with_seg_len &d, tree *seg_min_out, *seg_max_out = fold_build_pointer_plus (addr_base, max_reach); } -/* Given two data references and segment lengths described by DR_A and DR_B, - create expression checking if the two addresses ranges intersect with - each other: +/* Generate a runtime condition that is true if ALIAS_PAIR is free of aliases, + storing the condition in *COND_EXPR. The fallback is to generate a + a test that the two accesses do not overlap: - ((DR_A_addr_0 + DR_A_segment_length_0) <= DR_B_addr_0) - || (DR_B_addr_0 + DER_B_segment_length_0) <= DR_A_addr_0)) */ + end_a <= start_b || end_b <= start_a. */ static void create_intersect_range_checks (class loop *loop, tree *cond_expr, - const dr_with_seg_len& dr_a, - const dr_with_seg_len& dr_b) + const dr_with_seg_len_pair_t &alias_pair) { + const dr_with_seg_len& dr_a = alias_pair.first; + const dr_with_seg_len& dr_b = alias_pair.second; *cond_expr = NULL_TREE; - if (create_intersect_range_checks_index (loop, cond_expr, dr_a, dr_b)) + if (create_intersect_range_checks_index (loop, cond_expr, alias_pair)) return; unsigned HOST_WIDE_INT min_align; tree_code cmp_code; + /* We don't have to check DR_ALIAS_MIXED_STEPS here, since both versions + are equivalent. This is just an optimization heuristic. */ if (TREE_CODE (DR_STEP (dr_a.dr)) == INTEGER_CST && TREE_CODE (DR_STEP (dr_b.dr)) == INTEGER_CST) { @@ -1988,18 +2000,19 @@ create_runtime_alias_checks (class loop *loop, tree part_cond_expr; fold_defer_overflow_warnings (); - for (size_t i = 0, s = alias_pairs->length (); i < s; ++i) + dr_with_seg_len_pair_t *alias_pair; + unsigned int i; + FOR_EACH_VEC_ELT (*alias_pairs, i, alias_pair) { - const dr_with_seg_len& dr_a = (*alias_pairs)[i].first; - const dr_with_seg_len& dr_b = (*alias_pairs)[i].second; - + gcc_assert (alias_pair->flags); if (dump_enabled_p ()) dump_printf (MSG_NOTE, "create runtime check for data references %T and %T\n", - DR_REF (dr_a.dr), DR_REF (dr_b.dr)); + DR_REF (alias_pair->first.dr), + DR_REF (alias_pair->second.dr)); /* Create condition expression for each pair data references. */ - create_intersect_range_checks (loop, &part_cond_expr, dr_a, dr_b); + create_intersect_range_checks (loop, &part_cond_expr, *alias_pair); if (*cond_expr) *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node, *cond_expr, part_cond_expr); -- cgit v1.1 From cad984b289e2b3aca786314c673339eb0500fefa Mon Sep 17 00:00:00 2001 From: Richard Sandiford Date: Sat, 16 Nov 2019 11:42:02 +0000 Subject: Dump the list of merged alias pairs This patch dumps the final (merged) list of alias pairs. It also adds: - WAW and RAW versions of vect-alias-check-8.c - a "well-ordered" version of vect-alias-check-9.c (i.e. all reads before any writes) - a test with mixed steps in the same alias pair I also tweaked the test value in vect-alias-check-9.c so that the result was less likely to be accidentally correct if the alias isn't honoured. 2019-11-16 Richard Sandiford gcc/ * tree-data-ref.c (dump_alias_pair): New function. (prune_runtime_alias_test_list): Use it to dump each merged alias pair. gcc/testsuite/ * gcc.dg/vect/vect-alias-check-8.c: Test for the RAW flag. * gcc.dg/vect/vect-alias-check-9.c: Test for the ARBITRARY flag. (TEST_VALUE): Use a higher value for early iterations. * gcc.dg/vect/vect-alias-check-14.c: New test. * gcc.dg/vect/vect-alias-check-15.c: Likewise. * gcc.dg/vect/vect-alias-check-16.c: Likewise. * gcc.dg/vect/vect-alias-check-17.c: Likewise. From-SVN: r278352 --- gcc/tree-data-ref.c | 52 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 52 insertions(+) (limited to 'gcc/tree-data-ref.c') diff --git a/gcc/tree-data-ref.c b/gcc/tree-data-ref.c index bc5d213..7fc401f 100644 --- a/gcc/tree-data-ref.c +++ b/gcc/tree-data-ref.c @@ -1452,6 +1452,54 @@ comp_dr_with_seg_len_pair (const void *pa_, const void *pb_) return 0; } +/* Dump information about ALIAS_PAIR, indenting each line by INDENT. */ + +static void +dump_alias_pair (dr_with_seg_len_pair_t *alias_pair, const char *indent) +{ + dump_printf (MSG_NOTE, "%sreference: %T vs. %T\n", indent, + DR_REF (alias_pair->first.dr), + DR_REF (alias_pair->second.dr)); + + dump_printf (MSG_NOTE, "%ssegment length: %T", indent, + alias_pair->first.seg_len); + if (!operand_equal_p (alias_pair->first.seg_len, + alias_pair->second.seg_len, 0)) + dump_printf (MSG_NOTE, " vs. %T", alias_pair->second.seg_len); + + dump_printf (MSG_NOTE, "\n%saccess size: ", indent); + dump_dec (MSG_NOTE, alias_pair->first.access_size); + if (maybe_ne (alias_pair->first.access_size, alias_pair->second.access_size)) + { + dump_printf (MSG_NOTE, " vs. "); + dump_dec (MSG_NOTE, alias_pair->second.access_size); + } + + dump_printf (MSG_NOTE, "\n%salignment: %d", indent, + alias_pair->first.align); + if (alias_pair->first.align != alias_pair->second.align) + dump_printf (MSG_NOTE, " vs. %d", alias_pair->second.align); + + dump_printf (MSG_NOTE, "\n%sflags: ", indent); + if (alias_pair->flags & DR_ALIAS_RAW) + dump_printf (MSG_NOTE, " RAW"); + if (alias_pair->flags & DR_ALIAS_WAR) + dump_printf (MSG_NOTE, " WAR"); + if (alias_pair->flags & DR_ALIAS_WAW) + dump_printf (MSG_NOTE, " WAW"); + if (alias_pair->flags & DR_ALIAS_ARBITRARY) + dump_printf (MSG_NOTE, " ARBITRARY"); + if (alias_pair->flags & DR_ALIAS_SWAPPED) + dump_printf (MSG_NOTE, " SWAPPED"); + if (alias_pair->flags & DR_ALIAS_UNSWAPPED) + dump_printf (MSG_NOTE, " UNSWAPPED"); + if (alias_pair->flags & DR_ALIAS_MIXED_STEPS) + dump_printf (MSG_NOTE, " MIXED_STEPS"); + if (alias_pair->flags == 0) + dump_printf (MSG_NOTE, " "); + dump_printf (MSG_NOTE, "\n"); +} + /* Merge alias checks recorded in ALIAS_PAIRS and remove redundant ones. FACTOR is number of iterations that each data reference is accessed. @@ -1655,6 +1703,8 @@ prune_runtime_alias_test_list (vec *alias_pairs, dr_with_seg_len_pair_t. If we ended up combining swapped and unswapped pairs into the same check, we have to invalidate any RAW, WAR and WAW information for it. */ + if (dump_enabled_p ()) + dump_printf (MSG_NOTE, "merged alias checks:\n"); FOR_EACH_VEC_ELT (*alias_pairs, i, alias_pair) { unsigned int swap_mask = (DR_ALIAS_SWAPPED | DR_ALIAS_UNSWAPPED); @@ -1664,6 +1714,8 @@ prune_runtime_alias_test_list (vec *alias_pairs, else if (swapped != DR_ALIAS_UNSWAPPED) alias_pair->flags |= DR_ALIAS_ARBITRARY; alias_pair->flags &= ~swap_mask; + if (dump_enabled_p ()) + dump_alias_pair (alias_pair, " "); } } -- cgit v1.1 From b4d1b635737a4780e5be247f8be9550eaf83dae5 Mon Sep 17 00:00:00 2001 From: Richard Sandiford Date: Sat, 16 Nov 2019 11:42:53 +0000 Subject: Print the type of alias check in a dump message This patch prints a message to say how an alias check is being implemented. 2019-11-16 Richard Sandiford gcc/ * tree-data-ref.c (create_intersect_range_checks_index) (create_intersect_range_checks): Print dump messages. gcc/testsuite/ * gcc.dg/vect/vect-alias-check-1.c: Test for the type of alias check. * gcc.dg/vect/vect-alias-check-8.c: Likewise. * gcc.dg/vect/vect-alias-check-9.c: Likewise. * gcc.dg/vect/vect-alias-check-10.c: Likewise. * gcc.dg/vect/vect-alias-check-11.c: Likewise. * gcc.dg/vect/vect-alias-check-12.c: Likewise. * gcc.dg/vect/vect-alias-check-13.c: Likewise. * gcc.dg/vect/vect-alias-check-14.c: Likewise. * gcc.dg/vect/vect-alias-check-15.c: Likewise. * gcc.dg/vect/vect-alias-check-16.c: Likewise. * gcc.dg/vect/vect-alias-check-17.c: Likewise. From-SVN: r278353 --- gcc/tree-data-ref.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'gcc/tree-data-ref.c') diff --git a/gcc/tree-data-ref.c b/gcc/tree-data-ref.c index 7fc401f..527cb5e 100644 --- a/gcc/tree-data-ref.c +++ b/gcc/tree-data-ref.c @@ -1889,6 +1889,8 @@ create_intersect_range_checks_index (class loop *loop, tree *cond_expr, else *cond_expr = part_cond_expr; } + if (dump_enabled_p ()) + dump_printf (MSG_NOTE, "using an index-based overlap test\n"); return true; } @@ -2036,6 +2038,8 @@ create_intersect_range_checks (class loop *loop, tree *cond_expr, = fold_build2 (TRUTH_OR_EXPR, boolean_type_node, fold_build2 (cmp_code, boolean_type_node, seg_a_max, seg_b_min), fold_build2 (cmp_code, boolean_type_node, seg_b_max, seg_a_min)); + if (dump_enabled_p ()) + dump_printf (MSG_NOTE, "using an address-based overlap test\n"); } /* Create a conditional expression that represents the run-time checks for -- cgit v1.1 From f9d6338bd15ce1fae36bf25d3a0545e9678ddc58 Mon Sep 17 00:00:00 2001 From: Richard Sandiford Date: Sat, 16 Nov 2019 11:43:31 +0000 Subject: Use a single comparison for index-based alias checks This patch rewrites the index-based alias checks to use conditions of the form: (unsigned T) (a - b + bias) <= limit E.g. before the patch: struct s { int x[100]; }; void f1 (struct s *s1, int a, int b) { for (int i = 0; i < 32; ++i) s1->x[i + a] += s1->x[i + b]; } used: add w3, w1, 3 cmp w3, w2 add w3, w2, 3 ccmp w1, w3, 0, ge ble .L2 whereas after the patch it uses: sub w3, w1, w2 add w3, w3, 3 cmp w3, 6 bls .L2 The patch also fixes the seg_len1 and seg_len2 negation for cases in which seg_len is a "negative unsigned" value narrower than 64 bits, like it is for 32-bit targets. Previously we'd end up with values like 0xffffffff000000001 instead of 1. 2019-11-16 Richard Sandiford gcc/ * tree-data-ref.c (create_intersect_range_checks_index): Rewrite the index tests to have the form (unsigned T) (B - A + bias) <= limit. gcc/testsuite/ * gcc.dg/vect/vect-alias-check-18.c: New test. * gcc.dg/vect/vect-alias-check-19.c: Likewise. * gcc.dg/vect/vect-alias-check-20.c: Likewise. From-SVN: r278354 --- gcc/tree-data-ref.c | 143 +++++++++++++++++++++++++++++++++------------------- 1 file changed, 92 insertions(+), 51 deletions(-) (limited to 'gcc/tree-data-ref.c') diff --git a/gcc/tree-data-ref.c b/gcc/tree-data-ref.c index 527cb5e..4dfa334 100644 --- a/gcc/tree-data-ref.c +++ b/gcc/tree-data-ref.c @@ -1743,7 +1743,9 @@ prune_runtime_alias_test_list (vec *alias_pairs, We can create expression based on index rather than address: - (i_0 + 4 < j_0 || j_0 + 4 < i_0) + (unsigned) (i_0 - j_0 + 3) <= 6 + + i.e. the indices are less than 4 apart. Note evolution step of index needs to be considered in comparison. */ @@ -1780,15 +1782,8 @@ create_intersect_range_checks_index (class loop *loop, tree *cond_expr, if (neg_step) { abs_step = -abs_step; - seg_len1 = -seg_len1; - seg_len2 = -seg_len2; - } - else - { - /* Include the access size in the length, so that we only have one - tree addition below. */ - seg_len1 += dr_a.access_size; - seg_len2 += dr_b.access_size; + seg_len1 = (-wi::to_poly_wide (dr_a.seg_len)).force_uhwi (); + seg_len2 = (-wi::to_poly_wide (dr_b.seg_len)).force_uhwi (); } /* Infer the number of iterations with which the memory segment is accessed @@ -1802,16 +1797,13 @@ create_intersect_range_checks_index (class loop *loop, tree *cond_expr, || !can_div_trunc_p (seg_len2 + abs_step - 1, abs_step, &niter_len2)) return false; - poly_uint64 niter_access1 = 0, niter_access2 = 0; - if (neg_step) - { - /* Divide each access size by the byte step, rounding up. */ - if (!can_div_trunc_p (dr_a.access_size - abs_step - 1, - abs_step, &niter_access1) - || !can_div_trunc_p (dr_b.access_size + abs_step - 1, - abs_step, &niter_access2)) - return false; - } + /* Divide each access size by the byte step, rounding up. */ + poly_uint64 niter_access1, niter_access2; + if (!can_div_trunc_p (dr_a.access_size + abs_step - 1, + abs_step, &niter_access1) + || !can_div_trunc_p (dr_b.access_size + abs_step - 1, + abs_step, &niter_access2)) + return false; unsigned int i; for (i = 0; i < DR_NUM_DIMENSIONS (dr_a.dr); i++) @@ -1851,38 +1843,87 @@ create_intersect_range_checks_index (class loop *loop, tree *cond_expr, index of data reference. Like segment length, index length is linear function of the number of iterations with index_step as the coefficient, i.e, niter_len * idx_step. */ - tree idx_len1 = fold_build2 (MULT_EXPR, TREE_TYPE (min1), idx_step, - build_int_cst (TREE_TYPE (min1), - niter_len1)); - tree idx_len2 = fold_build2 (MULT_EXPR, TREE_TYPE (min2), idx_step, - build_int_cst (TREE_TYPE (min2), - niter_len2)); - tree max1 = fold_build2 (PLUS_EXPR, TREE_TYPE (min1), min1, idx_len1); - tree max2 = fold_build2 (PLUS_EXPR, TREE_TYPE (min2), min2, idx_len2); - /* Adjust ranges for negative step. */ + offset_int abs_idx_step = offset_int::from (wi::to_wide (idx_step), + SIGNED); if (neg_step) - { - /* IDX_LEN1 and IDX_LEN2 are negative in this case. */ - std::swap (min1, max1); - std::swap (min2, max2); - - /* As with the lengths just calculated, we've measured the access - sizes in iterations, so multiply them by the index step. */ - tree idx_access1 - = fold_build2 (MULT_EXPR, TREE_TYPE (min1), idx_step, - build_int_cst (TREE_TYPE (min1), niter_access1)); - tree idx_access2 - = fold_build2 (MULT_EXPR, TREE_TYPE (min2), idx_step, - build_int_cst (TREE_TYPE (min2), niter_access2)); - - /* MINUS_EXPR because the above values are negative. */ - max1 = fold_build2 (MINUS_EXPR, TREE_TYPE (max1), max1, idx_access1); - max2 = fold_build2 (MINUS_EXPR, TREE_TYPE (max2), max2, idx_access2); - } - tree part_cond_expr - = fold_build2 (TRUTH_OR_EXPR, boolean_type_node, - fold_build2 (LE_EXPR, boolean_type_node, max1, min2), - fold_build2 (LE_EXPR, boolean_type_node, max2, min1)); + abs_idx_step = -abs_idx_step; + poly_offset_int idx_len1 = abs_idx_step * niter_len1; + poly_offset_int idx_len2 = abs_idx_step * niter_len2; + poly_offset_int idx_access1 = abs_idx_step * niter_access1; + poly_offset_int idx_access2 = abs_idx_step * niter_access2; + + gcc_assert (known_ge (idx_len1, 0) + && known_ge (idx_len2, 0) + && known_ge (idx_access1, 0) + && known_ge (idx_access2, 0)); + + /* Each access has the following pattern, with lengths measured + in units of INDEX: + + <-- idx_len --> + <--- A: -ve step ---> + +-----+-------+-----+-------+-----+ + | n-1 | ..... | 0 | ..... | n-1 | + +-----+-------+-----+-------+-----+ + <--- B: +ve step ---> + <-- idx_len --> + | + min + + where "n" is the number of scalar iterations covered by the segment + and where each access spans idx_access units. + + A is the range of bytes accessed when the step is negative, + B is the range when the step is positive. + + When checking for general overlap, we need to test whether + the range: + + [min1 + low_offset1, min2 + high_offset1 + idx_access1 - 1] + + overlaps: + + [min2 + low_offset2, min2 + high_offset2 + idx_access2 - 1] + + where: + + low_offsetN = +ve step ? 0 : -idx_lenN; + high_offsetN = +ve step ? idx_lenN : 0; + + This is equivalent to testing whether: + + min1 + low_offset1 <= min2 + high_offset2 + idx_access2 - 1 + && min2 + low_offset2 <= min1 + high_offset1 + idx_access1 - 1 + + Converting this into a single test, there is an overlap if: + + 0 <= min2 - min1 + bias <= limit + + where bias = high_offset2 + idx_access2 - 1 - low_offset1 + limit = (high_offset1 - low_offset1 + idx_access1 - 1) + + (high_offset2 - low_offset2 + idx_access2 - 1) + i.e. limit = idx_len1 + idx_access1 - 1 + idx_len2 + idx_access2 - 1 + + Combining the tests requires limit to be computable in an unsigned + form of the index type; if it isn't, we fall back to the usual + pointer-based checks. */ + poly_offset_int limit = (idx_len1 + idx_access1 - 1 + + idx_len2 + idx_access2 - 1); + tree utype = unsigned_type_for (TREE_TYPE (min1)); + if (!wi::fits_to_tree_p (limit, utype)) + return false; + + poly_offset_int low_offset1 = neg_step ? -idx_len1 : 0; + poly_offset_int high_offset2 = neg_step ? 0 : idx_len2; + poly_offset_int bias = high_offset2 + idx_access2 - 1 - low_offset1; + + tree subject = fold_build2 (MINUS_EXPR, utype, + fold_convert (utype, min2), + fold_convert (utype, min1)); + subject = fold_build2 (PLUS_EXPR, utype, subject, + wide_int_to_tree (utype, bias)); + tree part_cond_expr = fold_build2 (GT_EXPR, boolean_type_node, subject, + wide_int_to_tree (utype, limit)); if (*cond_expr) *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node, *cond_expr, part_cond_expr); -- cgit v1.1 From 8489e1f45b50600c01eb8ed8c5d0ca914ded281c Mon Sep 17 00:00:00 2001 From: Richard Sandiford Date: Mon, 18 Nov 2019 15:26:55 +0000 Subject: Optimise WAR and WAW alias checks For: void f1 (int *x, int *y) { for (int i = 0; i < 32; ++i) x[i] += y[i]; } we checked at runtime whether one vector at x would overlap one vector at y. But in cases like this, the vector code would handle x <= y just fine, since any write to address A still happens after any read from address A. The only problem is if x is ahead of y by less than a vector. The same is true for two writes: void f2 (int *x, int *y) { for (int i = 0; i < 32; ++i) { x[i] = i; y[i] = 2; } } if y <= x then a vector write at y after a vector write at x would have the same net effect as the original scalar writes. This patch optimises the alias checks for these two cases. E.g., before the patch, f1 used: add x2, x0, 15 sub x2, x2, x1 cmp x2, 30 bls .L2 whereas after the patch it uses: add x2, x1, 4 sub x2, x0, x2 cmp x2, 8 bls .L2 Read-after-write cases like: int f3 (int *x, int *y) { int res = 0; for (int i = 0; i < 32; ++i) { x[i] = i; res += y[i]; } return res; } can cope with x == y, but otherwise don't allow overlap in either direction. Since checking for x == y at runtime would require extra code, we're probably better off sticking with the current overlap test. An overlap test is also needed if the scalar or vector accesses covered by the alias check are mixed together, rather than all statements for the second access following all statements for the first access. The new code for gcc.target/aarch64/sve/var_strict_[135].c is slightly better than before. 2019-11-18 Richard Sandiford gcc/ * tree-data-ref.c (create_intersect_range_checks_index): If the alias pair describes simple WAW and WAR dependencies, just check whether the first B access overlaps later A accesses. (create_waw_or_war_checks): New function that performs the same optimization on addresses. (create_intersect_range_checks): Call it. gcc/testsuite/ * gcc.dg/vect/vect-alias-check-8.c: Expect WAR/WAW checks to be used. * gcc.dg/vect/vect-alias-check-14.c: Likewise. * gcc.dg/vect/vect-alias-check-15.c: Likewise. * gcc.dg/vect/vect-alias-check-18.c: Likewise. * gcc.dg/vect/vect-alias-check-19.c: Likewise. * gcc.target/aarch64/sve/var_stride_1.c: Update expected sequence. * gcc.target/aarch64/sve/var_stride_2.c: Likewise. * gcc.target/aarch64/sve/var_stride_3.c: Likewise. * gcc.target/aarch64/sve/var_stride_5.c: Likewise. From-SVN: r278409 --- gcc/tree-data-ref.c | 218 ++++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 213 insertions(+), 5 deletions(-) (limited to 'gcc/tree-data-ref.c') diff --git a/gcc/tree-data-ref.c b/gcc/tree-data-ref.c index 4dfa334..bad80e1 100644 --- a/gcc/tree-data-ref.c +++ b/gcc/tree-data-ref.c @@ -1805,6 +1805,8 @@ create_intersect_range_checks_index (class loop *loop, tree *cond_expr, abs_step, &niter_access2)) return false; + bool waw_or_war_p = (alias_pair.flags & ~(DR_ALIAS_WAR | DR_ALIAS_WAW)) == 0; + unsigned int i; for (i = 0; i < DR_NUM_DIMENSIONS (dr_a.dr); i++) { @@ -1906,16 +1908,57 @@ create_intersect_range_checks_index (class loop *loop, tree *cond_expr, Combining the tests requires limit to be computable in an unsigned form of the index type; if it isn't, we fall back to the usual - pointer-based checks. */ - poly_offset_int limit = (idx_len1 + idx_access1 - 1 - + idx_len2 + idx_access2 - 1); + pointer-based checks. + + We can do better if DR_B is a write and if DR_A and DR_B are + well-ordered in both the original and the new code (see the + comment above the DR_ALIAS_* flags for details). In this case + we know that for each i in [0, n-1], the write performed by + access i of DR_B occurs after access numbers j<=i of DR_A in + both the original and the new code. Any write or anti + dependencies wrt those DR_A accesses are therefore maintained. + + We just need to make sure that each individual write in DR_B does not + overlap any higher-indexed access in DR_A; such DR_A accesses happen + after the DR_B access in the original code but happen before it in + the new code. + + We know the steps for both accesses are equal, so by induction, we + just need to test whether the first write of DR_B overlaps a later + access of DR_A. In other words, we need to move min1 along by + one iteration: + + min1' = min1 + idx_step + + and use the ranges: + + [min1' + low_offset1', min1' + high_offset1' + idx_access1 - 1] + + and: + + [min2, min2 + idx_access2 - 1] + + where: + + low_offset1' = +ve step ? 0 : -(idx_len1 - |idx_step|) + high_offset1' = +ve_step ? idx_len1 - |idx_step| : 0. */ + if (waw_or_war_p) + idx_len1 -= abs_idx_step; + + poly_offset_int limit = idx_len1 + idx_access1 - 1 + idx_access2 - 1; + if (!waw_or_war_p) + limit += idx_len2; + tree utype = unsigned_type_for (TREE_TYPE (min1)); if (!wi::fits_to_tree_p (limit, utype)) return false; poly_offset_int low_offset1 = neg_step ? -idx_len1 : 0; - poly_offset_int high_offset2 = neg_step ? 0 : idx_len2; + poly_offset_int high_offset2 = neg_step || waw_or_war_p ? 0 : idx_len2; poly_offset_int bias = high_offset2 + idx_access2 - 1 - low_offset1; + /* Equivalent to adding IDX_STEP to MIN1. */ + if (waw_or_war_p) + bias -= wi::to_offset (idx_step); tree subject = fold_build2 (MINUS_EXPR, utype, fold_convert (utype, min2), @@ -1931,7 +1974,169 @@ create_intersect_range_checks_index (class loop *loop, tree *cond_expr, *cond_expr = part_cond_expr; } if (dump_enabled_p ()) - dump_printf (MSG_NOTE, "using an index-based overlap test\n"); + { + if (waw_or_war_p) + dump_printf (MSG_NOTE, "using an index-based WAR/WAW test\n"); + else + dump_printf (MSG_NOTE, "using an index-based overlap test\n"); + } + return true; +} + +/* A subroutine of create_intersect_range_checks, with a subset of the + same arguments. Try to optimize cases in which the second access + is a write and in which some overlap is valid. */ + +static bool +create_waw_or_war_checks (tree *cond_expr, + const dr_with_seg_len_pair_t &alias_pair) +{ + const dr_with_seg_len& dr_a = alias_pair.first; + const dr_with_seg_len& dr_b = alias_pair.second; + + /* Check for cases in which: + + (a) DR_B is always a write; + (b) the accesses are well-ordered in both the original and new code + (see the comment above the DR_ALIAS_* flags for details); and + (c) the DR_STEPs describe all access pairs covered by ALIAS_PAIR. */ + if (alias_pair.flags & ~(DR_ALIAS_WAR | DR_ALIAS_WAW)) + return false; + + /* Check for equal (but possibly variable) steps. */ + tree step = DR_STEP (dr_a.dr); + if (!operand_equal_p (step, DR_STEP (dr_b.dr))) + return false; + + /* Make sure that we can operate on sizetype without loss of precision. */ + tree addr_type = TREE_TYPE (DR_BASE_ADDRESS (dr_a.dr)); + if (TYPE_PRECISION (addr_type) != TYPE_PRECISION (sizetype)) + return false; + + /* All addresses involved are known to have a common alignment ALIGN. + We can therefore subtract ALIGN from an exclusive endpoint to get + an inclusive endpoint. In the best (and common) case, ALIGN is the + same as the access sizes of both DRs, and so subtracting ALIGN + cancels out the addition of an access size. */ + unsigned int align = MIN (dr_a.align, dr_b.align); + poly_uint64 last_chunk_a = dr_a.access_size - align; + poly_uint64 last_chunk_b = dr_b.access_size - align; + + /* Get a boolean expression that is true when the step is negative. */ + tree indicator = dr_direction_indicator (dr_a.dr); + tree neg_step = fold_build2 (LT_EXPR, boolean_type_node, + fold_convert (ssizetype, indicator), + ssize_int (0)); + + /* Get lengths in sizetype. */ + tree seg_len_a + = fold_convert (sizetype, rewrite_to_non_trapping_overflow (dr_a.seg_len)); + step = fold_convert (sizetype, rewrite_to_non_trapping_overflow (step)); + + /* Each access has the following pattern: + + <- |seg_len| -> + <--- A: -ve step ---> + +-----+-------+-----+-------+-----+ + | n-1 | ..... | 0 | ..... | n-1 | + +-----+-------+-----+-------+-----+ + <--- B: +ve step ---> + <- |seg_len| -> + | + base address + + where "n" is the number of scalar iterations covered by the segment. + + A is the range of bytes accessed when the step is negative, + B is the range when the step is positive. + + We know that DR_B is a write. We also know (from checking that + DR_A and DR_B are well-ordered) that for each i in [0, n-1], + the write performed by access i of DR_B occurs after access numbers + j<=i of DR_A in both the original and the new code. Any write or + anti dependencies wrt those DR_A accesses are therefore maintained. + + We just need to make sure that each individual write in DR_B does not + overlap any higher-indexed access in DR_A; such DR_A accesses happen + after the DR_B access in the original code but happen before it in + the new code. + + We know the steps for both accesses are equal, so by induction, we + just need to test whether the first write of DR_B overlaps a later + access of DR_A. In other words, we need to move addr_a along by + one iteration: + + addr_a' = addr_a + step + + and check whether: + + [addr_b, addr_b + last_chunk_b] + + overlaps: + + [addr_a' + low_offset_a, addr_a' + high_offset_a + last_chunk_a] + + where [low_offset_a, high_offset_a] spans accesses [1, n-1]. I.e.: + + low_offset_a = +ve step ? 0 : seg_len_a - step + high_offset_a = +ve step ? seg_len_a - step : 0 + + This is equivalent to testing whether: + + addr_a' + low_offset_a <= addr_b + last_chunk_b + && addr_b <= addr_a' + high_offset_a + last_chunk_a + + Converting this into a single test, there is an overlap if: + + 0 <= addr_b + last_chunk_b - addr_a' - low_offset_a <= limit + + where limit = high_offset_a - low_offset_a + last_chunk_a + last_chunk_b + + If DR_A is performed, limit + |step| - last_chunk_b is known to be + less than the size of the object underlying DR_A. We also know + that last_chunk_b <= |step|; this is checked elsewhere if it isn't + guaranteed at compile time. There can therefore be no overflow if + "limit" is calculated in an unsigned type with pointer precision. */ + tree addr_a = fold_build_pointer_plus (DR_BASE_ADDRESS (dr_a.dr), + DR_OFFSET (dr_a.dr)); + addr_a = fold_build_pointer_plus (addr_a, DR_INIT (dr_a.dr)); + + tree addr_b = fold_build_pointer_plus (DR_BASE_ADDRESS (dr_b.dr), + DR_OFFSET (dr_b.dr)); + addr_b = fold_build_pointer_plus (addr_b, DR_INIT (dr_b.dr)); + + /* Advance ADDR_A by one iteration and adjust the length to compensate. */ + addr_a = fold_build_pointer_plus (addr_a, step); + tree seg_len_a_minus_step = fold_build2 (MINUS_EXPR, sizetype, + seg_len_a, step); + if (!CONSTANT_CLASS_P (seg_len_a_minus_step)) + seg_len_a_minus_step = build1 (SAVE_EXPR, sizetype, seg_len_a_minus_step); + + tree low_offset_a = fold_build3 (COND_EXPR, sizetype, neg_step, + seg_len_a_minus_step, size_zero_node); + if (!CONSTANT_CLASS_P (low_offset_a)) + low_offset_a = build1 (SAVE_EXPR, sizetype, low_offset_a); + + /* We could use COND_EXPR , + but it's usually more efficient to reuse the LOW_OFFSET_A result. */ + tree high_offset_a = fold_build2 (MINUS_EXPR, sizetype, seg_len_a_minus_step, + low_offset_a); + + /* The amount added to addr_b - addr_a'. */ + tree bias = fold_build2 (MINUS_EXPR, sizetype, + size_int (last_chunk_b), low_offset_a); + + tree limit = fold_build2 (MINUS_EXPR, sizetype, high_offset_a, low_offset_a); + limit = fold_build2 (PLUS_EXPR, sizetype, limit, + size_int (last_chunk_a + last_chunk_b)); + + tree subject = fold_build2 (POINTER_DIFF_EXPR, ssizetype, addr_b, addr_a); + subject = fold_build2 (PLUS_EXPR, sizetype, + fold_convert (sizetype, subject), bias); + + *cond_expr = fold_build2 (GT_EXPR, boolean_type_node, subject, limit); + if (dump_enabled_p ()) + dump_printf (MSG_NOTE, "using an address-based WAR/WAW test\n"); return true; } @@ -2035,6 +2240,9 @@ create_intersect_range_checks (class loop *loop, tree *cond_expr, if (create_intersect_range_checks_index (loop, cond_expr, alias_pair)) return; + if (create_waw_or_war_checks (cond_expr, alias_pair)) + return; + unsigned HOST_WIDE_INT min_align; tree_code cmp_code; /* We don't have to check DR_ALIAS_MIXED_STEPS here, since both versions -- cgit v1.1 From 58c036c8354e4d14551ceaeffaa1dda2fe445640 Mon Sep 17 00:00:00 2001 From: Richard Sandiford Date: Mon, 18 Nov 2019 15:36:10 +0000 Subject: Add optabs for accelerating RAW and WAR alias checks This patch adds optabs that check whether a read followed by a write or a write followed by a read can be divided into interleaved byte accesses without changing the dependencies between the bytes. This is one of the uses of the SVE2 WHILERW and WHILEWR instructions. (The instructions can also be used to limit the VF at runtime, but that's future work.) 2019-11-18 Richard Sandiford gcc/ * doc/sourcebuild.texi (vect_check_ptrs): Document. * optabs.def (check_raw_ptrs_optab, check_war_ptrs_optab): New optabs. * doc/md.texi: Document them. * internal-fn.def (IFN_CHECK_RAW_PTRS, IFN_CHECK_WAR_PTRS): New internal functions. * internal-fn.h (internal_check_ptrs_fn_supported_p): Declare. * internal-fn.c (check_ptrs_direct): New macro. (expand_check_ptrs_optab_fn): Likewise. (direct_check_ptrs_optab_supported_p): Likewise. (internal_check_ptrs_fn_supported_p): New fuction. * tree-data-ref.c: Include internal-fn.h. (create_ifn_alias_checks): New function. (create_intersect_range_checks): Use it. * config/aarch64/iterators.md (SVE2_WHILE_PTR): New int iterator. (optab, cmp_op): Handle it. (raw_war, unspec): New int attributes. * config/aarch64/aarch64.md (UNSPEC_WHILERW, UNSPEC_WHILE_WR): New constants. * config/aarch64/predicates.md (aarch64_bytes_per_sve_vector_operand): New predicate. * config/aarch64/aarch64-sve2.md (check__ptrs): New expander. (@aarch64_sve2_while_ptest): New pattern. gcc/testsuite/ * lib/target-supports.exp (check_effective_target_vect_check_ptrs): New procedure. * gcc.dg/vect/vect-alias-check-14.c: Expect IFN_CHECK_WAR to be used, if available. * gcc.dg/vect/vect-alias-check-15.c: Likewise. * gcc.dg/vect/vect-alias-check-16.c: Likewise IFN_CHECK_RAW. * gcc.target/aarch64/sve2/whilerw_1.c: New test. * gcc.target/aarch64/sve2/whilewr_1.c: Likewise. * gcc.target/aarch64/sve2/whilewr_2.c: Likewise. From-SVN: r278414 --- gcc/tree-data-ref.c | 78 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 78 insertions(+) (limited to 'gcc/tree-data-ref.c') diff --git a/gcc/tree-data-ref.c b/gcc/tree-data-ref.c index bad80e1..117a14b 100644 --- a/gcc/tree-data-ref.c +++ b/gcc/tree-data-ref.c @@ -96,6 +96,7 @@ along with GCC; see the file COPYING3. If not see #include "builtins.h" #include "tree-eh.h" #include "ssa.h" +#include "internal-fn.h" static struct datadep_stats { @@ -1719,6 +1720,80 @@ prune_runtime_alias_test_list (vec *alias_pairs, } } +/* A subroutine of create_intersect_range_checks, with a subset of the + same arguments. Try to use IFN_CHECK_RAW_PTRS and IFN_CHECK_WAR_PTRS + to optimize cases in which the references form a simple RAW, WAR or + WAR dependence. */ + +static bool +create_ifn_alias_checks (tree *cond_expr, + const dr_with_seg_len_pair_t &alias_pair) +{ + const dr_with_seg_len& dr_a = alias_pair.first; + const dr_with_seg_len& dr_b = alias_pair.second; + + /* Check for cases in which: + + (a) we have a known RAW, WAR or WAR dependence + (b) the accesses are well-ordered in both the original and new code + (see the comment above the DR_ALIAS_* flags for details); and + (c) the DR_STEPs describe all access pairs covered by ALIAS_PAIR. */ + if (alias_pair.flags & ~(DR_ALIAS_RAW | DR_ALIAS_WAR | DR_ALIAS_WAW)) + return false; + + /* Make sure that both DRs access the same pattern of bytes, + with a constant length and and step. */ + poly_uint64 seg_len; + if (!operand_equal_p (dr_a.seg_len, dr_b.seg_len, 0) + || !poly_int_tree_p (dr_a.seg_len, &seg_len) + || maybe_ne (dr_a.access_size, dr_b.access_size) + || !operand_equal_p (DR_STEP (dr_a.dr), DR_STEP (dr_b.dr), 0) + || !tree_fits_uhwi_p (DR_STEP (dr_a.dr))) + return false; + + unsigned HOST_WIDE_INT bytes = tree_to_uhwi (DR_STEP (dr_a.dr)); + tree addr_a = DR_BASE_ADDRESS (dr_a.dr); + tree addr_b = DR_BASE_ADDRESS (dr_b.dr); + + /* See whether the target suports what we want to do. WAW checks are + equivalent to WAR checks here. */ + internal_fn ifn = (alias_pair.flags & DR_ALIAS_RAW + ? IFN_CHECK_RAW_PTRS + : IFN_CHECK_WAR_PTRS); + unsigned int align = MIN (dr_a.align, dr_b.align); + poly_uint64 full_length = seg_len + bytes; + if (!internal_check_ptrs_fn_supported_p (ifn, TREE_TYPE (addr_a), + full_length, align)) + { + full_length = seg_len + dr_a.access_size; + if (!internal_check_ptrs_fn_supported_p (ifn, TREE_TYPE (addr_a), + full_length, align)) + return false; + } + + /* Commit to using this form of test. */ + addr_a = fold_build_pointer_plus (addr_a, DR_OFFSET (dr_a.dr)); + addr_a = fold_build_pointer_plus (addr_a, DR_INIT (dr_a.dr)); + + addr_b = fold_build_pointer_plus (addr_b, DR_OFFSET (dr_b.dr)); + addr_b = fold_build_pointer_plus (addr_b, DR_INIT (dr_b.dr)); + + *cond_expr = build_call_expr_internal_loc (UNKNOWN_LOCATION, + ifn, boolean_type_node, + 4, addr_a, addr_b, + size_int (full_length), + size_int (align)); + + if (dump_enabled_p ()) + { + if (ifn == IFN_CHECK_RAW_PTRS) + dump_printf (MSG_NOTE, "using an IFN_CHECK_RAW_PTRS test\n"); + else + dump_printf (MSG_NOTE, "using an IFN_CHECK_WAR_PTRS test\n"); + } + return true; +} + /* Try to generate a runtime condition that is true if ALIAS_PAIR is free of aliases, using a condition based on index values instead of a condition based on addresses. Return true on success, @@ -2240,6 +2315,9 @@ create_intersect_range_checks (class loop *loop, tree *cond_expr, if (create_intersect_range_checks_index (loop, cond_expr, alias_pair)) return; + if (create_ifn_alias_checks (cond_expr, alias_pair)) + return; + if (create_waw_or_war_checks (cond_expr, alias_pair)) return; -- cgit v1.1 From ea1ff9e46c7ec5e49ec671616cfcf405ef665054 Mon Sep 17 00:00:00 2001 From: Richard Sandiford Date: Fri, 6 Dec 2019 10:31:44 +0000 Subject: Avoid quadratic behaviour in prune_runtime_alias_test_list prune_runtime_alias_test_list used ordered_remove to remove a merged alias pair, which made the function quadratic when many aliases could be removed. I had a testcase in which these memmoves accounted for an impressive 85% of compile time. The fact that we had so many probably shows a deeper problem, but still, it's easy to remove as we go. 2019-12-06 Richard Sandiford gcc/ * tree-data-ref.c (prune_runtime_alias_test_list): Exit early for empty vectors. Avoid using ordered_remove and instead shuffle the vector as we go. From-SVN: r279038 --- gcc/tree-data-ref.c | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) (limited to 'gcc/tree-data-ref.c') diff --git a/gcc/tree-data-ref.c b/gcc/tree-data-ref.c index 117a14b..7ef891d 100644 --- a/gcc/tree-data-ref.c +++ b/gcc/tree-data-ref.c @@ -1535,6 +1535,9 @@ void prune_runtime_alias_test_list (vec *alias_pairs, poly_uint64) { + if (alias_pairs->is_empty ()) + return; + /* Canonicalize each pair so that the base components are ordered wrt data_ref_compare_tree. This allows the loop below to merge more cases. */ @@ -1565,10 +1568,11 @@ prune_runtime_alias_test_list (vec *alias_pairs, /* Scan the sorted dr pairs and check if we can combine alias checks of two neighboring dr pairs. */ + unsigned int last = 0; for (i = 1; i < alias_pairs->length (); ++i) { /* Deal with two ddrs (dr_a1, dr_b1) and (dr_a2, dr_b2). */ - dr_with_seg_len_pair_t *alias_pair1 = &(*alias_pairs)[i - 1]; + dr_with_seg_len_pair_t *alias_pair1 = &(*alias_pairs)[last]; dr_with_seg_len_pair_t *alias_pair2 = &(*alias_pairs)[i]; dr_with_seg_len *dr_a1 = &alias_pair1->first; @@ -1584,10 +1588,15 @@ prune_runtime_alias_test_list (vec *alias_pairs, DR_REF (dr_a1->dr), DR_REF (dr_b1->dr), DR_REF (dr_a2->dr), DR_REF (dr_b2->dr)); alias_pair1->flags |= alias_pair2->flags; - alias_pairs->ordered_remove (i--); continue; } + /* Assume that we won't be able to merge the pairs, then correct + if we do. */ + last += 1; + if (last != i) + (*alias_pairs)[last] = (*alias_pairs)[i]; + if (*dr_a1 == *dr_a2 || *dr_b1 == *dr_b2) { /* We consider the case that DR_B1 and DR_B2 are same memrefs, @@ -1695,10 +1704,10 @@ prune_runtime_alias_test_list (vec *alias_pairs, DR_REF (dr_a1->dr), DR_REF (dr_b1->dr), DR_REF (dr_a2->dr), DR_REF (dr_b2->dr)); alias_pair1->flags |= alias_pair2->flags; - alias_pairs->ordered_remove (i); - i--; + last -= 1; } } + alias_pairs->truncate (last + 1); /* Try to restore the original dr_with_seg_len order within each dr_with_seg_len_pair_t. If we ended up combining swapped and -- cgit v1.1