diff options
Diffstat (limited to 'gcc')
57 files changed, 1558 insertions, 948 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog index d597002..d475eee 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,159 @@ +2025-05-13 Andrew Pinski <quic_apinski@quicinc.com> + + PR tree-optimization/119903 + * gimple-fold.cc (replace_stmt_with_simplification): Reject for + noncall exceptions replacing comparison with itself. + +2025-05-13 Andrew Pinski <quic_apinski@quicinc.com> + + PR middle-end/118868 + * tree-cfg.cc (verify_gimple_assign_unary): Allow pointers + but disallow aggregate types for PAREN_EXPR. + +2025-05-13 Andrew Pinski <quic_apinski@quicinc.com> + + * cfgexpand.cc (vars_ssa_cache::operator()): Update the cache if the use is already + has a cache. + +2025-05-13 Andrew Pinski <quic_apinski@quicinc.com> + + * cfgexpand.cc (vars_ssa_cache::operator()): Reverse the order of the going + through the update list. + +2025-05-13 Richard Biener <rguenther@suse.de> + + * tree-vect-loop.cc (vectorizable_nonlinear_induction): + Remove non-SLP path, use SLP_TREE_VECTYPE. + (vectorizable_induction): Likewise. Drop ncopies variable + which is always 1. + +2025-05-13 Kito Cheng <kito.cheng@sifive.com> + + * common/config/riscv/riscv-common.cc (riscv_extra_ext_flag_table_t): + New. + (riscv_ext_flag_table): Rename to ... + (riscv_extra_ext_flag_table): this, and drop most of definitions + that can obtained from the flags field of the riscv_ext_info_t + structures. + (apply_extra_extension_flags): Use riscv_ext_info_t. + (riscv_ext_is_subset): Ditto. + +2025-05-13 Kito Cheng <kito.cheng@sifive.com> + + * common/config/riscv/riscv-common.cc (riscv_ext_version_table): + Remove. + (standard_extensions_p): Use riscv_ext_info_t. + (get_default_version): Use riscv_ext_info_t. + (riscv_arch_help): Ditto. + +2025-05-13 Kito Cheng <kito.cheng@sifive.com> + + * common/config/riscv/riscv-common.cc + (riscv_implied_info::riscv_implied_info_t): Remove unused + variant. + (struct riscv_implied_info_t): Remove unsued field. + (riscv_implied_info::match): Remove unused variant, and adjust + the logic. + (get_riscv_ext_info): New. + (riscv_implied_info): Remove. + (riscv_ext_info_t::apply_implied_ext): New. + (riscv_combine_info). Remove. + (riscv_subset_list::handle_implied_ext): Use riscv_ext_info_t + rather than riscv_implied_info. + (riscv_subset_list::check_implied_ext): Ditto. + (riscv_subset_list::handle_combine_ext): Use riscv_ext_info_t + rather than riscv_combine_info. + (riscv_minimal_hwprobe_feature_bits): Use riscv_ext_info_t + rather than riscv_implied_info. + +2025-05-13 Kito Cheng <kito.cheng@sifive.com> + + * common/config/riscv/riscv-common.cc (riscv_ext_info_t): New + struct. + (opt_var_ref_t): Adjust order. + (cl_opt_var_ref_t): Ditto. + (riscv_ext_flag_table_t): Adjust order, and add a new construct + that not hold the extension name. + (riscv_version_t): New struct. + (riscv_implied_info_t): Adjust order, and add a new construct that not + hold the extension name. + (apply_extra_extension_flags): New function. + (riscv_ext_infos): New. + (riscv_implied_info): Adjust. + * config/riscv/riscv-opts.h (EXT_FLAG_MACRO): New macro. + (BITMASK_NOT_YET_ALLOCATED): New macro. + +2025-05-13 Kito Cheng <kito.cheng@sifive.com> + + * common/config/riscv/riscv-common.cc (riscv_can_inline_p): Drop + extension flags check from `target_flags`. + * config/riscv/riscv-subset.h (riscv_x_target_flags_isa_mask): + Remove. + * config/riscv/riscv.cc (riscv_x_target_flags_isa_mask): Remove. + +2025-05-13 Kito Cheng <kito.cheng@sifive.com> + + * doc/invoke.texi: Replace hand‑written extension table with + `@include riscv-ext.texi` to pull in auto‑generated entries. + * doc/riscv-ext.texi: New generated definition file + containing formatted documentation entries for each extension. + * Makefile.in: Add riscv-ext.texi to the list of files to be + processed by the Texinfo generator. + * config/riscv/gen-riscv-ext-texi.cc: New. + * config/riscv/t-riscv: Add rule for generating riscv-ext.texi. + +2025-05-13 Kito Cheng <kito.cheng@sifive.com> + + * config/riscv/gen-riscv-ext-opt.cc: New. + * config/riscv/riscv.opt: Drop manual entries for target + options, and include riscv-ext.opt. + * config/riscv/riscv-ext.opt: New. + * config/riscv/riscv-ext.opt.urls: New. + * config.gcc: Add riscv-ext.opt to the list of target options files. + * common/config/riscv/riscv-common.cc (riscv_ext_flag_table): Adjsut target + option variable entry. + (riscv_set_arch_by_subset_list): Adjust target option variable. + * config/riscv/riscv-c.cc (riscv_ext_flag_table): Adjust target + option variable entry. + * config/riscv/riscv-vector-builtins.cc (pragma_intrinsic_flags): + Adjust variable name. + (riscv_pragma_intrinsic_flags_pollute): Adjust variable name. + (riscv_pragma_intrinsic_flags_restore): Ditto. + * config/riscv/t-riscv: Add the rule for generating + riscv-ext.opt. + * config/riscv/riscv-opts.h (TARGET_MIN_VLEN): Update. + (TARGET_MIN_VLEN_OPTS): Update. + +2025-05-13 Kito Cheng <kito.cheng@sifive.com> + + * config/riscv/riscv-ext.def: New file; define extension metadata table. + * config/riscv/riscv-ext-corev.def: New. + * config/riscv/riscv-ext-sifive.def: New. + * config/riscv/riscv-ext-thead.def: New. + * config/riscv/riscv-ext-ventana.def: New. + +2025-05-13 David Malcolm <dmalcolm@redhat.com> + + PR other/116792 + * diagnostic-format-html.cc: Include "diagnostic-format-text.h", + "pretty-print-urlifier.h" and "edit-context.h". + (html_builder::html_builder): Fix indentation in decl. + (html_builder::make_element_for_diagnostic): Split out metadata + code into make_element_for_metadata. Call + make_element_for_source, make_element_for_path, and + make_element_for_patch. + (html_builder::make_element_for_source): New. + (html_builder::make_element_for_path): New. + (html_builder::make_element_for_patch): New. + (html_builder::make_metadata_element): New. + (html_builder::make_element_for_metadata): New. + (html_output_format::get_builder): New. + (selftest::test_html_diagnostic_context::get_builder): New. + (selftest::test_simple_log): Update test to print a quoted string, + and verify that it uses a "gcc-quoted-text" span. + (selftest::test_metadata): New. + (selftest::diagnostic_format_html_cc_tests): Call it. + 2025-05-13 Andrew MacLeod <amacleod@redhat.com> * tree-ssanames.cc (set_bitmask): Use int_range_max for temps. diff --git a/gcc/DATESTAMP b/gcc/DATESTAMP index 83f5cb2..cfb9239 100644 --- a/gcc/DATESTAMP +++ b/gcc/DATESTAMP @@ -1 +1 @@ -20250513 +20250514 diff --git a/gcc/ada/ChangeLog b/gcc/ada/ChangeLog index 128ea05..f0046a0 100644 --- a/gcc/ada/ChangeLog +++ b/gcc/ada/ChangeLog @@ -1,3 +1,9 @@ +2025-05-13 Nicolas Boulenguez <nicolas@debian.org> + + PR ada/87778 + * Make-generated.in: Remove -q gnatmake option. + * gcc-interface/Makefile.in: Likewise. + 2025-05-05 Eric Botcazou <ebotcazou@adacore.com> PR ada/120104 diff --git a/gcc/cfgexpand.cc b/gcc/cfgexpand.cc index 2b27076..277ef65 100644 --- a/gcc/cfgexpand.cc +++ b/gcc/cfgexpand.cc @@ -766,7 +766,12 @@ vars_ssa_cache::operator() (tree name) /* If the cache exists for the use, don't try to recreate it. */ if (exists (use)) - continue; + { + /* Update the cache here, this can reduce the number of + times through the update loop below. */ + update (old_name, use); + continue; + } /* Create the cache bitmap for the use and also so we don't go into an infinite loop for some phi nodes with loops. */ @@ -804,9 +809,11 @@ vars_ssa_cache::operator() (tree name) bool changed; do { changed = false; - for (auto &e : update_cache_list) + unsigned int i; + std::pair<tree,tree> *e; + FOR_EACH_VEC_ELT_REVERSE (update_cache_list, i, e) { - if (update (e.second, e.first)) + if (update (e->second, e->first)) changed = true; } } while (changed); diff --git a/gcc/common/config/riscv/riscv-common.cc b/gcc/common/config/riscv/riscv-common.cc index 3d3ca11..53ca039 100644 --- a/gcc/common/config/riscv/riscv-common.cc +++ b/gcc/common/config/riscv/riscv-common.cc @@ -364,7 +364,7 @@ riscv_subset_t::riscv_subset_t () riscv_subset_list::riscv_subset_list (const char *arch, location_t loc) : m_arch (arch), m_loc (loc), m_head (NULL), m_tail (NULL), m_xlen (0), - m_subset_num (0) + m_subset_num (0), m_allow_adding_dup (false) { } diff --git a/gcc/config/i386/i386-features.cc b/gcc/config/i386/i386-features.cc index cc8313b..6491c6b 100644 --- a/gcc/config/i386/i386-features.cc +++ b/gcc/config/i386/i386-features.cc @@ -296,9 +296,8 @@ scalar_chain::scalar_chain (enum machine_mode smode_, enum machine_mode vmode_) insns_conv = BITMAP_ALLOC (NULL); queue = NULL; - n_sse_to_integer = 0; - n_integer_to_sse = 0; - + cost_sse_integer = 0; + weighted_cost_sse_integer = 0 ; max_visits = x86_stv_max_visits; } @@ -337,20 +336,52 @@ scalar_chain::mark_dual_mode_def (df_ref def) /* Record the def/insn pair so we can later efficiently iterate over the defs to convert on insns not in the chain. */ bool reg_new = bitmap_set_bit (defs_conv, DF_REF_REGNO (def)); + basic_block bb = BLOCK_FOR_INSN (DF_REF_INSN (def)); + profile_count entry_count = ENTRY_BLOCK_PTR_FOR_FN (cfun)->count; + bool speed_p = optimize_bb_for_speed_p (bb); + int cost = 0; + if (!bitmap_bit_p (insns, DF_REF_INSN_UID (def))) { if (!bitmap_set_bit (insns_conv, DF_REF_INSN_UID (def)) && !reg_new) return; - n_integer_to_sse++; + + /* Cost integer to sse moves. */ + if (speed_p) + cost = COSTS_N_INSNS (ix86_cost->integer_to_sse) / 2; + else if (TARGET_64BIT || smode == SImode) + cost = COSTS_N_BYTES (4); + /* vmovd (4 bytes) + vpinsrd (6 bytes). */ + else if (TARGET_SSE4_1) + cost = COSTS_N_BYTES (10); + /* movd (4 bytes) + movd (4 bytes) + unpckldq (4 bytes). */ + else + cost = COSTS_N_BYTES (12); } else { if (!reg_new) return; - n_sse_to_integer++; + + /* Cost sse to integer moves. */ + if (speed_p) + cost = COSTS_N_INSNS (ix86_cost->sse_to_integer) / 2; + else if (TARGET_64BIT || smode == SImode) + cost = COSTS_N_BYTES (4); + /* vmovd (4 bytes) + vpextrd (6 bytes). */ + else if (TARGET_SSE4_1) + cost = COSTS_N_BYTES (10); + /* movd (4 bytes) + psrlq (5 bytes) + movd (4 bytes). */ + else + cost = COSTS_N_BYTES (13); } + if (speed_p) + weighted_cost_sse_integer += bb->count.to_sreal_scale (entry_count) * cost; + + cost_sse_integer += cost; + if (dump_file) fprintf (dump_file, " Mark r%d def in insn %d as requiring both modes in chain #%d\n", @@ -531,15 +562,15 @@ general_scalar_chain::vector_const_cost (rtx exp, basic_block bb) return COSTS_N_INSNS (ix86_cost->sse_load[smode == DImode ? 1 : 0]) / 2; } -/* Compute a gain for chain conversion. */ +/* Return true if it's cost profitable for chain conversion. */ -int +bool general_scalar_chain::compute_convert_gain () { bitmap_iterator bi; unsigned insn_uid; int gain = 0; - int cost = 0; + sreal weighted_gain = 0; if (dump_file) fprintf (dump_file, "Computing gain for chain #%d...\n", chain_id); @@ -559,10 +590,13 @@ general_scalar_chain::compute_convert_gain () rtx dst = SET_DEST (def_set); basic_block bb = BLOCK_FOR_INSN (insn); int igain = 0; + profile_count entry_count = ENTRY_BLOCK_PTR_FOR_FN (cfun)->count; + bool speed_p = optimize_bb_for_speed_p (bb); + sreal bb_freq = bb->count.to_sreal_scale (entry_count); if (REG_P (src) && REG_P (dst)) { - if (optimize_bb_for_size_p (bb)) + if (!speed_p) /* reg-reg move is 2 bytes, while SSE 3. */ igain += COSTS_N_BYTES (2 * m - 3); else @@ -571,7 +605,7 @@ general_scalar_chain::compute_convert_gain () } else if (REG_P (src) && MEM_P (dst)) { - if (optimize_bb_for_size_p (bb)) + if (!speed_p) /* Integer load/store is 3+ bytes and SSE 4+. */ igain += COSTS_N_BYTES (3 * m - 4); else @@ -581,7 +615,7 @@ general_scalar_chain::compute_convert_gain () } else if (MEM_P (src) && REG_P (dst)) { - if (optimize_bb_for_size_p (bb)) + if (!speed_p) igain += COSTS_N_BYTES (3 * m - 4); else igain += COSTS_N_INSNS (m * ix86_cost->int_load[2] @@ -593,7 +627,7 @@ general_scalar_chain::compute_convert_gain () of explicit load and store instructions. */ if (MEM_P (dst)) { - if (optimize_bb_for_size_p (bb)) + if (!speed_p) /* ??? This probably should account size difference of SSE and integer load rather than full SSE load. */ igain -= COSTS_N_BYTES (8); @@ -667,7 +701,7 @@ general_scalar_chain::compute_convert_gain () igain -= vector_const_cost (XEXP (src, 1), bb); if (MEM_P (XEXP (src, 1))) { - if (optimize_bb_for_size_p (bb)) + if (!speed_p) igain -= COSTS_N_BYTES (m == 2 ? 3 : 5); else igain += COSTS_N_INSNS @@ -730,7 +764,7 @@ general_scalar_chain::compute_convert_gain () case CONST_INT: if (REG_P (dst)) { - if (optimize_bb_for_size_p (bb)) + if (!speed_p) { /* xor (2 bytes) vs. xorps (3 bytes). */ if (src == const0_rtx) @@ -769,14 +803,14 @@ general_scalar_chain::compute_convert_gain () if (XVECEXP (XEXP (src, 1), 0, 0) == const0_rtx) { // movd (4 bytes) replaced with movdqa (4 bytes). - if (!optimize_bb_for_size_p (bb)) + if (!!speed_p) igain += COSTS_N_INSNS (ix86_cost->sse_to_integer - ix86_cost->xmm_move) / 2; } else { // pshufd; movd replaced with pshufd. - if (optimize_bb_for_size_p (bb)) + if (!speed_p) igain += COSTS_N_BYTES (4); else igain += ix86_cost->sse_to_integer; @@ -788,55 +822,34 @@ general_scalar_chain::compute_convert_gain () } } + if (speed_p) + weighted_gain += bb_freq * igain; + gain += igain; + if (igain != 0 && dump_file) { - fprintf (dump_file, " Instruction gain %d for ", igain); + fprintf (dump_file, " Instruction gain %d with bb_freq %.2f for", + igain, bb_freq.to_double ()); dump_insn_slim (dump_file, insn); } - gain += igain; } if (dump_file) - fprintf (dump_file, " Instruction conversion gain: %d\n", gain); - - /* Cost the integer to sse and sse to integer moves. */ - if (!optimize_function_for_size_p (cfun)) { - cost += n_sse_to_integer * COSTS_N_INSNS (ix86_cost->sse_to_integer) / 2; - /* ??? integer_to_sse but we only have that in the RA cost table. - Assume sse_to_integer/integer_to_sse are the same which they - are at the moment. */ - cost += n_integer_to_sse * COSTS_N_INSNS (ix86_cost->integer_to_sse) / 2; + fprintf (dump_file, " Instruction conversion gain: %d, \n", + gain); + fprintf (dump_file, " Registers conversion cost: %d\n", + cost_sse_integer); + fprintf (dump_file, " Weighted instruction conversion gain: %.2f, \n", + weighted_gain.to_double ()); + fprintf (dump_file, " Weighted registers conversion cost: %.2f\n", + weighted_cost_sse_integer.to_double ()); } - else if (TARGET_64BIT || smode == SImode) - { - cost += n_sse_to_integer * COSTS_N_BYTES (4); - cost += n_integer_to_sse * COSTS_N_BYTES (4); - } - else if (TARGET_SSE4_1) - { - /* vmovd (4 bytes) + vpextrd (6 bytes). */ - cost += n_sse_to_integer * COSTS_N_BYTES (10); - /* vmovd (4 bytes) + vpinsrd (6 bytes). */ - cost += n_integer_to_sse * COSTS_N_BYTES (10); - } - else - { - /* movd (4 bytes) + psrlq (5 bytes) + movd (4 bytes). */ - cost += n_sse_to_integer * COSTS_N_BYTES (13); - /* movd (4 bytes) + movd (4 bytes) + unpckldq (4 bytes). */ - cost += n_integer_to_sse * COSTS_N_BYTES (12); - } - - if (dump_file) - fprintf (dump_file, " Registers conversion cost: %d\n", cost); - - gain -= cost; - if (dump_file) - fprintf (dump_file, " Total gain: %d\n", gain); - - return gain; + if (weighted_gain != weighted_cost_sse_integer) + return weighted_gain > weighted_cost_sse_integer; + else + return gain > cost_sse_integer;; } /* Insert generated conversion instruction sequence INSNS @@ -1553,21 +1566,22 @@ timode_immed_const_gain (rtx cst, basic_block bb) return 0; } -/* Compute a gain for chain conversion. */ +/* Return true it's cost profitable for for chain conversion. */ -int +bool timode_scalar_chain::compute_convert_gain () { /* Assume that if we have to move TImode values between units, then transforming this chain isn't worth it. */ - if (n_sse_to_integer || n_integer_to_sse) - return -1; + if (cost_sse_integer) + return false; bitmap_iterator bi; unsigned insn_uid; /* Split ties to prefer V1TImode when not optimizing for size. */ int gain = optimize_size ? 0 : 1; + sreal weighted_gain = 0; if (dump_file) fprintf (dump_file, "Computing gain for chain #%d...\n", chain_id); @@ -1582,32 +1596,33 @@ timode_scalar_chain::compute_convert_gain () basic_block bb = BLOCK_FOR_INSN (insn); int scost, vcost; int igain = 0; + profile_count entry_count = ENTRY_BLOCK_PTR_FOR_FN (cfun)->count; + bool speed_p = optimize_bb_for_speed_p (bb); + sreal bb_freq = bb->count.to_sreal_scale (entry_count); switch (GET_CODE (src)) { case REG: - if (optimize_bb_for_size_p (bb)) + if (!speed_p) igain = MEM_P (dst) ? COSTS_N_BYTES (6) : COSTS_N_BYTES (3); else igain = COSTS_N_INSNS (1); break; case MEM: - igain = optimize_bb_for_size_p (bb) ? COSTS_N_BYTES (7) - : COSTS_N_INSNS (1); + igain = !speed_p ? COSTS_N_BYTES (7) : COSTS_N_INSNS (1); break; case CONST_INT: if (MEM_P (dst) && standard_sse_constant_p (src, V1TImode)) - igain = optimize_bb_for_size_p (bb) ? COSTS_N_BYTES (11) : 1; + igain = !speed_p ? COSTS_N_BYTES (11) : 1; break; case CONST_WIDE_INT: /* 2 x mov vs. vmovdqa. */ if (MEM_P (dst)) - igain = optimize_bb_for_size_p (bb) ? COSTS_N_BYTES (3) - : COSTS_N_INSNS (1); + igain = !speed_p ? COSTS_N_BYTES (3) : COSTS_N_INSNS (1); break; case NOT: @@ -1628,7 +1643,7 @@ timode_scalar_chain::compute_convert_gain () case LSHIFTRT: /* See ix86_expand_v1ti_shift. */ op1val = INTVAL (XEXP (src, 1)); - if (optimize_bb_for_size_p (bb)) + if (!speed_p) { if (op1val == 64 || op1val == 65) scost = COSTS_N_BYTES (5); @@ -1662,7 +1677,7 @@ timode_scalar_chain::compute_convert_gain () case ASHIFTRT: /* See ix86_expand_v1ti_ashiftrt. */ op1val = INTVAL (XEXP (src, 1)); - if (optimize_bb_for_size_p (bb)) + if (!speed_p) { if (op1val == 64 || op1val == 127) scost = COSTS_N_BYTES (7); @@ -1740,7 +1755,7 @@ timode_scalar_chain::compute_convert_gain () case ROTATERT: /* See ix86_expand_v1ti_rotate. */ op1val = INTVAL (XEXP (src, 1)); - if (optimize_bb_for_size_p (bb)) + if (!speed_p) { scost = COSTS_N_BYTES (13); if ((op1val & 31) == 0) @@ -1772,34 +1787,40 @@ timode_scalar_chain::compute_convert_gain () { if (GET_CODE (XEXP (src, 0)) == AND) /* and;and;or (9 bytes) vs. ptest (5 bytes). */ - igain = optimize_bb_for_size_p (bb) ? COSTS_N_BYTES (4) - : COSTS_N_INSNS (2); + igain = !speed_p ? COSTS_N_BYTES (4) : COSTS_N_INSNS (2); /* or (3 bytes) vs. ptest (5 bytes). */ - else if (optimize_bb_for_size_p (bb)) + else if (!speed_p) igain = -COSTS_N_BYTES (2); } else if (XEXP (src, 1) == const1_rtx) /* and;cmp -1 (7 bytes) vs. pcmpeqd;pxor;ptest (13 bytes). */ - igain = optimize_bb_for_size_p (bb) ? -COSTS_N_BYTES (6) - : -COSTS_N_INSNS (1); + igain = !speed_p ? -COSTS_N_BYTES (6) : -COSTS_N_INSNS (1); break; default: break; } + gain += igain; + if (speed_p) + weighted_gain += bb_freq * igain; + if (igain != 0 && dump_file) { - fprintf (dump_file, " Instruction gain %d for ", igain); + fprintf (dump_file, " Instruction gain %d with bb_freq %.2f for ", + igain, bb_freq.to_double ()); dump_insn_slim (dump_file, insn); } - gain += igain; } if (dump_file) - fprintf (dump_file, " Total gain: %d\n", gain); + fprintf (dump_file, " Total gain: %d, weighted gain %.2f\n", + gain, weighted_gain.to_double ()); - return gain; + if (weighted_gain > (sreal) 0) + return true; + else + return gain > 0; } /* Fix uses of converted REG in debug insns. */ @@ -2595,7 +2616,7 @@ convert_scalars_to_vector (bool timode_p) conversions. */ if (chain->build (&candidates[i], uid, disallowed)) { - if (chain->compute_convert_gain () > 0) + if (chain->compute_convert_gain ()) converted_insns += chain->convert (); else if (dump_file) fprintf (dump_file, "Chain #%d conversion is not profitable\n", diff --git a/gcc/config/i386/i386-features.h b/gcc/config/i386/i386-features.h index 7f7c0f7..e3719b3 100644 --- a/gcc/config/i386/i386-features.h +++ b/gcc/config/i386/i386-features.h @@ -153,12 +153,13 @@ class scalar_chain bitmap insns_conv; hash_map<rtx, rtx> defs_map; - unsigned n_sse_to_integer; - unsigned n_integer_to_sse; + /* Cost of inserted conversion between ineteger and sse. */ + int cost_sse_integer; + sreal weighted_cost_sse_integer; auto_vec<rtx_insn *> control_flow_insns; bool build (bitmap candidates, unsigned insn_uid, bitmap disallowed); - virtual int compute_convert_gain () = 0; + virtual bool compute_convert_gain () = 0; int convert (); protected: @@ -184,7 +185,7 @@ class general_scalar_chain : public scalar_chain public: general_scalar_chain (enum machine_mode smode_, enum machine_mode vmode_) : scalar_chain (smode_, vmode_) {} - int compute_convert_gain () final override; + bool compute_convert_gain () final override; private: void convert_insn (rtx_insn *insn) final override; @@ -196,7 +197,7 @@ class timode_scalar_chain : public scalar_chain { public: timode_scalar_chain () : scalar_chain (TImode, V1TImode) {} - int compute_convert_gain () final override; + bool compute_convert_gain () final override; private: void fix_debug_reg_uses (rtx reg); diff --git a/gcc/config/riscv/riscv-ext.def b/gcc/config/riscv/riscv-ext.def index 34742d9..97b5766 100644 --- a/gcc/config/riscv/riscv-ext.def +++ b/gcc/config/riscv/riscv-ext.def @@ -1572,6 +1572,97 @@ DEFINE_RISCV_EXT( /* EXTRA_EXTENSION_FLAGS */ 0) DEFINE_RISCV_EXT( + /* NAME */ sha, + /* UPPERCAE_NAME */ SHA, + /* FULL_NAME */ "The augmented hypervisor extension", + /* DESC */ "", + /* URL */ , + /* DEP_EXTS */ ({"h", "shcounterenw", "shgatpa", "shtvala", "shvstvala", "shvstvecd", "shvsatpa", "ssstateen"}), + /* SUPPORTED_VERSIONS */ ({{1, 0}}), + /* FLAG_GROUP */ sh, + /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED, + /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED, + /* EXTRA_EXTENSION_FLAGS */ 0) + +DEFINE_RISCV_EXT( + /* NAME */ shcounterenw, + /* UPPERCAE_NAME */ SHCOUNTERENW, + /* FULL_NAME */ "Support writeable enables for any supported counter", + /* DESC */ "", + /* URL */ , + /* DEP_EXTS */ ({"h", "zihpm"}), + /* SUPPORTED_VERSIONS */ ({{1, 0}}), + /* FLAG_GROUP */ sh, + /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED, + /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED, + /* EXTRA_EXTENSION_FLAGS */ 0) + +DEFINE_RISCV_EXT( + /* NAME */ shgatpa, + /* UPPERCAE_NAME */ SHGATPA, + /* FULL_NAME */ "SvNNx4 mode supported for all modes supported by satp", + /* DESC */ "", + /* URL */ , + /* DEP_EXTS */ ({"h", "ssstateen"}), + /* SUPPORTED_VERSIONS */ ({{1, 0}}), + /* FLAG_GROUP */ sh, + /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED, + /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED, + /* EXTRA_EXTENSION_FLAGS */ 0) + +DEFINE_RISCV_EXT( + /* NAME */ shtvala, + /* UPPERCAE_NAME */ SHTVALA, + /* FULL_NAME */ "The htval register provides all needed values", + /* DESC */ "", + /* URL */ , + /* DEP_EXTS */ ({"h"}), + /* SUPPORTED_VERSIONS */ ({{1, 0}}), + /* FLAG_GROUP */ sh, + /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED, + /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED, + /* EXTRA_EXTENSION_FLAGS */ 0) + +DEFINE_RISCV_EXT( + /* NAME */ shvstvala, + /* UPPERCAE_NAME */ SHVSTVALA, + /* FULL_NAME */ "The vstval register provides all needed values", + /* DESC */ "", + /* URL */ , + /* DEP_EXTS */ ({"h"}), + /* SUPPORTED_VERSIONS */ ({{1, 0}}), + /* FLAG_GROUP */ sh, + /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED, + /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED, + /* EXTRA_EXTENSION_FLAGS */ 0) + +DEFINE_RISCV_EXT( + /* NAME */ shvstvecd, + /* UPPERCAE_NAME */ SHVSTVECD, + /* FULL_NAME */ "The vstvec register supports Direct mode", + /* DESC */ "", + /* URL */ , + /* DEP_EXTS */ ({"h"}), + /* SUPPORTED_VERSIONS */ ({{1, 0}}), + /* FLAG_GROUP */ sh, + /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED, + /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED, + /* EXTRA_EXTENSION_FLAGS */ 0) + +DEFINE_RISCV_EXT( + /* NAME */ shvsatpa, + /* UPPERCAE_NAME */ SHVSATPA, + /* FULL_NAME */ "The vsatp register supports all modes supported by satp", + /* DESC */ "", + /* URL */ , + /* DEP_EXTS */ ({"h"}), + /* SUPPORTED_VERSIONS */ ({{1, 0}}), + /* FLAG_GROUP */ sh, + /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED, + /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED, + /* EXTRA_EXTENSION_FLAGS */ 0) + +DEFINE_RISCV_EXT( /* NAME */ smaia, /* UPPERCAE_NAME */ SMAIA, /* FULL_NAME */ "Advanced interrupt architecture extension", diff --git a/gcc/config/riscv/riscv-ext.opt b/gcc/config/riscv/riscv-ext.opt index 0c56dc9..9199aa3 100644 --- a/gcc/config/riscv/riscv-ext.opt +++ b/gcc/config/riscv/riscv-ext.opt @@ -29,6 +29,9 @@ TargetVariable int riscv_sd_subext TargetVariable +int riscv_sh_subext + +TargetVariable int riscv_sm_subext TargetVariable @@ -316,6 +319,20 @@ Mask(ZHINXMIN) Var(riscv_zinx_subext) Mask(SDTRIG) Var(riscv_sd_subext) +Mask(SHA) Var(riscv_sh_subext) + +Mask(SHCOUNTERENW) Var(riscv_sh_subext) + +Mask(SHGATPA) Var(riscv_sh_subext) + +Mask(SHTVALA) Var(riscv_sh_subext) + +Mask(SHVSTVALA) Var(riscv_sh_subext) + +Mask(SHVSTVECD) Var(riscv_sh_subext) + +Mask(SHVSATPA) Var(riscv_sh_subext) + Mask(SMAIA) Var(riscv_sm_subext) Mask(SMEPMP) Var(riscv_sm_subext) diff --git a/gcc/config/riscv/riscv-ext.opt.urls b/gcc/config/riscv/riscv-ext.opt.urls index e69de29..c4f4710 100644 --- a/gcc/config/riscv/riscv-ext.opt.urls +++ b/gcc/config/riscv/riscv-ext.opt.urls @@ -0,0 +1,2 @@ +; Autogenerated by regenerate-opt-urls.py from gcc/config/riscv/riscv-ext.opt and generated HTML + diff --git a/gcc/config/riscv/t-riscv b/gcc/config/riscv/t-riscv index e99d668..854daa9 100644 --- a/gcc/config/riscv/t-riscv +++ b/gcc/config/riscv/t-riscv @@ -198,8 +198,6 @@ RISCV_EXT_DEFS = \ $(srcdir)/config/riscv/riscv-ext.opt: $(RISCV_EXT_DEFS) -$(srcdir)/config/riscv/riscv-ext.opt: s-riscv-ext.opt ; @true - build/gen-riscv-ext-opt$(build_exeext): $(srcdir)/config/riscv/gen-riscv-ext-opt.cc \ $(RISCV_EXT_DEFS) $(CXX_FOR_BUILD) $(CXXFLAGS_FOR_BUILD) $< -o $@ diff --git a/gcc/config/s390/vector.md b/gcc/config/s390/vector.md index 160e42a..cdd55b6 100644 --- a/gcc/config/s390/vector.md +++ b/gcc/config/s390/vector.md @@ -953,7 +953,7 @@ else { reg_pair += 2; // get rid of prefix %f - snprintf (buf, sizeof (buf), "ldr\t%%f0,%%f1;vpdi\t%%%%v%s,%%v1,%%%%v%s,5", reg_pair, reg_pair); + snprintf (buf, sizeof (buf), "vlr\t%%v0,%%v1;vpdi\t%%%%v%s,%%v1,%%%%v%s,5", reg_pair, reg_pair); output_asm_insn (buf, operands); return ""; } diff --git a/gcc/cp/cp-gimplify.cc b/gcc/cp/cp-gimplify.cc index d2423fd..eab5550 100644 --- a/gcc/cp/cp-gimplify.cc +++ b/gcc/cp/cp-gimplify.cc @@ -3343,19 +3343,14 @@ cp_fold (tree x, fold_flags_t flags) || id_equal (DECL_NAME (callee), "addressof") /* This addressof equivalent is used heavily in libstdc++. */ || id_equal (DECL_NAME (callee), "__addressof") + || id_equal (DECL_NAME (callee), "to_underlying") || id_equal (DECL_NAME (callee), "as_const"))) { r = CALL_EXPR_ARG (x, 0); - /* Check that the return and argument types are sane before - folding. */ - if (INDIRECT_TYPE_P (TREE_TYPE (x)) - && INDIRECT_TYPE_P (TREE_TYPE (r))) - { - if (!same_type_p (TREE_TYPE (x), TREE_TYPE (r))) - r = build_nop (TREE_TYPE (x), r); - x = cp_fold (r, flags); - break; - } + if (!same_type_p (TREE_TYPE (x), TREE_TYPE (r))) + r = build_nop (TREE_TYPE (x), r); + x = cp_fold (r, flags); + break; } int sv = optimize, nw = sv; diff --git a/gcc/cp/decl2.cc b/gcc/cp/decl2.cc index 15db1d6..a08d173 100644 --- a/gcc/cp/decl2.cc +++ b/gcc/cp/decl2.cc @@ -4186,7 +4186,11 @@ start_objects (bool initp, unsigned priority, bool has_body, bool omp_target = false) { bool default_init = initp && priority == DEFAULT_INIT_PRIORITY; - bool is_module_init = default_init && module_global_init_needed (); + /* FIXME: We may eventually want to treat OpenMP offload initializers + in modules specially as well. */ + bool is_module_init = (default_init + && !omp_target + && module_global_init_needed ()); tree name = NULL_TREE; if (is_module_init) @@ -5878,12 +5882,8 @@ c_parse_final_cleanups (void) if (static_init_fini_fns[true]->get_or_insert (DEFAULT_INIT_PRIORITY)) has_module_inits = true; - if (flag_openmp) - { - if (!static_init_fini_fns[2 + true]) - static_init_fini_fns[2 + true] = priority_map_t::create_ggc (); - static_init_fini_fns[2 + true]->get_or_insert (DEFAULT_INIT_PRIORITY); - } + /* FIXME: We need to work out what static constructors on OpenMP offload + target in modules will look like. */ } /* Generate initialization and destruction functions for all diff --git a/gcc/cp/method.cc b/gcc/cp/method.cc index 05c19cf..092bae2 100644 --- a/gcc/cp/method.cc +++ b/gcc/cp/method.cc @@ -2949,7 +2949,9 @@ synthesized_method_walk (tree ctype, special_function_kind sfk, bool const_p, && BINFO_VIRTUAL_P (base_binfo) && fn && TREE_CODE (fn) == FUNCTION_DECL && move_fn_p (fn) && !trivial_fn_p (fn) - && vbase_has_user_provided_move_assign (BINFO_TYPE (base_binfo))) + && vbase_has_user_provided_move_assign (BINFO_TYPE (base_binfo)) + && warning_enabled_at (DECL_SOURCE_LOCATION (fn), + OPT_Wvirtual_move_assign)) warning (OPT_Wvirtual_move_assign, "defaulted move assignment for %qT calls a non-trivial " "move assignment operator for virtual base %qT", diff --git a/gcc/cp/module.cc b/gcc/cp/module.cc index f562bf8..e778262 100644 --- a/gcc/cp/module.cc +++ b/gcc/cp/module.cc @@ -12638,7 +12638,11 @@ trees_out::write_function_def (tree decl) { unsigned flags = 0; - flags |= 1 * DECL_NOT_REALLY_EXTERN (decl); + /* Whether the importer should emit this definition, if used. */ + flags |= 1 * (DECL_NOT_REALLY_EXTERN (decl) + && (get_importer_interface (decl) + != importer_interface::always_import)); + if (f) { flags |= 2; diff --git a/gcc/cp/name-lookup.cc b/gcc/cp/name-lookup.cc index 9b317c4..84b5e67 100644 --- a/gcc/cp/name-lookup.cc +++ b/gcc/cp/name-lookup.cc @@ -4556,6 +4556,9 @@ lookup_imported_hidden_friend (tree friend_tmpl) || !DECL_MODULE_ENTITY_P (inner)) return NULL_TREE; + /* Load any templates matching FRIEND_TMPL from importers. */ + lazy_load_pendings (friend_tmpl); + tree name = DECL_NAME (inner); tree *slot = find_namespace_slot (current_namespace, name, false); if (!slot || !*slot || TREE_CODE (*slot) != BINDING_VECTOR) diff --git a/gcc/cp/optimize.cc b/gcc/cp/optimize.cc index 6f9a77f..fc4d6c2 100644 --- a/gcc/cp/optimize.cc +++ b/gcc/cp/optimize.cc @@ -309,8 +309,8 @@ maybe_thunk_body (tree fn, bool force) defer_mangling_aliases = save_defer_mangling_aliases; cgraph_node::get_create (fns[0])->set_comdat_group (comdat_group); cgraph_node::get_create (fns[1])->add_to_same_comdat_group - (cgraph_node::get_create (fns[0])); - symtab_node::get (fn)->add_to_same_comdat_group + (cgraph_node::get (fns[0])); + symtab_node::get_create (fn)->add_to_same_comdat_group (symtab_node::get (fns[0])); if (fns[2]) /* If *[CD][12]* dtors go into the *[CD]5* comdat group and dtor is diff --git a/gcc/doc/riscv-ext.texi b/gcc/doc/riscv-ext.texi index 968654b..bd3d29c 100644 --- a/gcc/doc/riscv-ext.texi +++ b/gcc/doc/riscv-ext.texi @@ -462,6 +462,34 @@ @tab 1.0 @tab sdtrig extension +@item sha +@tab 1.0 +@tab The augmented hypervisor extension + +@item shcounterenw +@tab 1.0 +@tab Support writeable enables for any supported counter + +@item shgatpa +@tab 1.0 +@tab SvNNx4 mode supported for all modes supported by satp + +@item shtvala +@tab 1.0 +@tab The htval register provides all needed values + +@item shvstvala +@tab 1.0 +@tab The vstval register provides all needed values + +@item shvstvecd +@tab 1.0 +@tab The vstvec register supports Direct mode + +@item shvsatpa +@tab 1.0 +@tab The vsatp register supports all modes supported by satp + @item smaia @tab 1.0 @tab Advanced interrupt architecture extension diff --git a/gcc/fortran/ChangeLog b/gcc/fortran/ChangeLog index aa6d6cb..8b82b20 100644 --- a/gcc/fortran/ChangeLog +++ b/gcc/fortran/ChangeLog @@ -1,3 +1,23 @@ +2025-05-13 Yuao Ma <c8ef@outlook.com> + Steven G. Kargl <kargl@gcc.gnu.org> + + PR fortran/113413 + * intrinsic.cc (do_check): Minor doc polish. + (add_functions): Add atand(y, x) mapping. + * intrinsic.texi: Update atand example. + +2025-05-13 Jakub Jelinek <jakub@redhat.com> + Daniil Kochergin <daniil2472s@gmail.com> + Tobias Burnus <tburnus@baylibre.com> + + PR fortran/120191 + * trans-intrinsic.cc (strip_kind_from_actual): Remove. + (gfc_conv_intrinsic_minmaxloc): Don't call strip_kind_from_actual. + Free and clear kind_arg->expr if non-NULL. Set back_arg->name to + "%VAL" instead of a loop looking for last argument. Remove actual + variable, use array_arg instead. Free and clear dim_arg->expr if + non-NULL for BT_CHARACTER cases instead of using a loop. + 2025-05-11 Thomas Koenig <tkoenig@gcc.gnu.org> PR fortran/120163 diff --git a/gcc/fortran/dump-parse-tree.cc b/gcc/fortran/dump-parse-tree.cc index dd920f3..3cd2eee 100644 --- a/gcc/fortran/dump-parse-tree.cc +++ b/gcc/fortran/dump-parse-tree.cc @@ -4371,6 +4371,8 @@ get_c_type_name (gfc_typespec *ts, gfc_array_spec *as, const char **pre, mpz_clear (sz); *asterisk = false; } + else + *asterisk = true; } return ret; } @@ -4415,10 +4417,11 @@ write_type (gfc_symbol *sym) { gfc_component *c; - /* Don't dump our iso c module, nor vtypes. */ + /* Don't dump types that are not interoperable, our very own ISO C Binding + module, or vtypes. */ if (sym->from_intmod == INTMOD_ISO_C_BINDING || sym->attr.flavor != FL_DERIVED - || sym->attr.vtype) + || sym->attr.vtype || !sym->attr.is_bind_c) return; fprintf (dumpfile, "typedef struct %s {\n", sym->name); diff --git a/gcc/fortran/intrinsic.cc b/gcc/fortran/intrinsic.cc index 2eba209..908e1da 100644 --- a/gcc/fortran/intrinsic.cc +++ b/gcc/fortran/intrinsic.cc @@ -376,11 +376,11 @@ do_check (gfc_intrinsic_sym *specific, gfc_actual_arglist *arg) Argument list: char * name of function - int whether function is elemental - int If the function can be used as an actual argument [1] - bt return type of function - int kind of return type of function - int Fortran standard version + int whether function is elemental + int If the function can be used as an actual argument [1] + bt return type of function + int kind of return type of function + int Fortran standard version check pointer to check function simplify pointer to simplification function resolve pointer to resolution function @@ -396,7 +396,7 @@ do_check (gfc_intrinsic_sym *specific, gfc_actual_arglist *arg) [1] Whether a function can or cannot be used as an actual argument is - determined by its presence on the 13.6 list in Fortran 2003. The + determined by its presence in the 13.6 list in Fortran 2003. The following intrinsics, which are GNU extensions, are considered allowed as actual arguments: ACOSH ATANH DACOSH DASINH DATANH DCONJG DIMAG ZABS ZCOS ZEXP ZLOG ZSIN ZSQRT. */ @@ -3479,6 +3479,13 @@ add_functions (void) gfc_check_fn_r, gfc_simplify_atand, gfc_resolve_trigd, x, BT_REAL, dr, REQUIRED); + /* Two-argument version of atand, equivalent to atan2d. */ + add_sym_2 ("atand", GFC_ISYM_ATAN2D, CLASS_ELEMENTAL, ACTUAL_YES, + BT_REAL, dr, GFC_STD_F2023, + gfc_check_atan2, gfc_simplify_atan2d, gfc_resolve_trigd2, + y, BT_REAL, dr, REQUIRED, + x, BT_REAL, dr, REQUIRED); + make_generic ("atand", GFC_ISYM_ATAND, GFC_STD_F2023); add_sym_1 ("datand", GFC_ISYM_ATAND, CLASS_ELEMENTAL, ACTUAL_YES, diff --git a/gcc/fortran/intrinsic.texi b/gcc/fortran/intrinsic.texi index 3a105bc..48c2d60 100644 --- a/gcc/fortran/intrinsic.texi +++ b/gcc/fortran/intrinsic.texi @@ -1547,7 +1547,7 @@ Fortran 90 and later @node ATAN -@section @code{ATAN} --- Arctangent function +@section @code{ATAN} --- Arctangent function @fnindex ATAN @fnindex DATAN @cindex trigonometric function, tangent, inverse @@ -1619,6 +1619,7 @@ Degrees function: @* @item @emph{Synopsis}: @multitable @columnfractions .80 @item @code{RESULT = ATAND(X)} +@item @code{RESULT = ATAND(Y, X)} @end multitable @item @emph{Description}: @@ -1630,21 +1631,23 @@ Elemental function @item @emph{Arguments}: @multitable @columnfractions .15 .70 -@item @var{X} @tab The type shall be @code{REAL}; -if @var{Y} is present, @var{X} shall be REAL. +@item @var{X} @tab The type shall be @code{REAL}. @item @var{Y} @tab The type and kind type parameter shall be the same as @var{X}. @end multitable @item @emph{Return value}: The return value is of the same type and kind as @var{X}. -The result is in degrees and lies in the range -@math{-90 \leq \Re \atand(x) \leq 90}. +If @var{Y} is present, the result is identical to @code{ATAN2D(Y, X)}. +Otherwise, the result is in degrees and lies in the range +@math{-90 \leq \atand(x) \leq 90}. @item @emph{Example}: @smallexample program test_atand real(8) :: x = 2.866_8 + real(4) :: x1 = 1.e0_4, y1 = 0.5e0_4 x = atand(x) + x1 = atand(y1, x1) end program test_atand @end smallexample diff --git a/gcc/fortran/simplify.cc b/gcc/fortran/simplify.cc index 208251b..1927097 100644 --- a/gcc/fortran/simplify.cc +++ b/gcc/fortran/simplify.cc @@ -1183,6 +1183,7 @@ gfc_simplify_asin (gfc_expr *x) } +#if MPFR_VERSION < MPFR_VERSION_NUM(4,2,0) /* Convert radians to degrees, i.e., x * 180 / pi. */ static void @@ -1196,6 +1197,7 @@ rad2deg (mpfr_t x) mpfr_div (x, x, tmp, GFC_RND_MODE); mpfr_clear (tmp); } +#endif /* Simplify ACOSD(X) where the returned value has units of degree. */ @@ -1217,8 +1219,12 @@ gfc_simplify_acosd (gfc_expr *x) } result = gfc_get_constant_expr (x->ts.type, x->ts.kind, &x->where); +#if MPFR_VERSION >= MPFR_VERSION_NUM(4,2,0) + mpfr_acosu (result->value.real, x->value.real, 360, GFC_RND_MODE); +#else mpfr_acos (result->value.real, x->value.real, GFC_RND_MODE); rad2deg (result->value.real); +#endif return range_check (result, "ACOSD"); } @@ -1243,8 +1249,12 @@ gfc_simplify_asind (gfc_expr *x) } result = gfc_get_constant_expr (x->ts.type, x->ts.kind, &x->where); +#if MPFR_VERSION >= MPFR_VERSION_NUM(4,2,0) + mpfr_asinu (result->value.real, x->value.real, 360, GFC_RND_MODE); +#else mpfr_asin (result->value.real, x->value.real, GFC_RND_MODE); rad2deg (result->value.real); +#endif return range_check (result, "ASIND"); } @@ -1261,8 +1271,12 @@ gfc_simplify_atand (gfc_expr *x) return NULL; result = gfc_get_constant_expr (x->ts.type, x->ts.kind, &x->where); +#if MPFR_VERSION >= MPFR_VERSION_NUM(4,2,0) + mpfr_atanu (result->value.real, x->value.real, 360, GFC_RND_MODE); +#else mpfr_atan (result->value.real, x->value.real, GFC_RND_MODE); rad2deg (result->value.real); +#endif return range_check (result, "ATAND"); } @@ -1954,8 +1968,13 @@ gfc_simplify_atan2d (gfc_expr *y, gfc_expr *x) } result = gfc_get_constant_expr (x->ts.type, x->ts.kind, &x->where); +#if MPFR_VERSION >= MPFR_VERSION_NUM(4,2,0) + mpfr_atan2u (result->value.real, y->value.real, x->value.real, 360, + GFC_RND_MODE); +#else mpfr_atan2 (result->value.real, y->value.real, x->value.real, GFC_RND_MODE); rad2deg (result->value.real); +#endif return range_check (result, "ATAN2D"); } @@ -1990,6 +2009,8 @@ gfc_simplify_cos (gfc_expr *x) } +#if MPFR_VERSION < MPFR_VERSION_NUM(4,2,0) +/* Used by trigd_fe.inc. */ static void deg2rad (mpfr_t x) { @@ -2001,11 +2022,13 @@ deg2rad (mpfr_t x) mpfr_mul (x, x, d2r, GFC_RND_MODE); mpfr_clear (d2r); } +#endif +#if MPFR_VERSION < MPFR_VERSION_NUM(4,2,0) /* Simplification routines for SIND, COSD, TAND. */ #include "trigd_fe.inc" - +#endif /* Simplify COSD(X) where X has the unit of degree. */ @@ -2018,8 +2041,12 @@ gfc_simplify_cosd (gfc_expr *x) return NULL; result = gfc_get_constant_expr (x->ts.type, x->ts.kind, &x->where); +#if MPFR_VERSION >= MPFR_VERSION_NUM(4,2,0) + mpfr_cosu (result->value.real, x->value.real, 360, GFC_RND_MODE); +#else mpfr_set (result->value.real, x->value.real, GFC_RND_MODE); simplify_cosd (result->value.real); +#endif return range_check (result, "COSD"); } @@ -2036,8 +2063,12 @@ gfc_simplify_sind (gfc_expr *x) return NULL; result = gfc_get_constant_expr (x->ts.type, x->ts.kind, &x->where); +#if MPFR_VERSION >= MPFR_VERSION_NUM(4,2,0) + mpfr_sinu (result->value.real, x->value.real, 360, GFC_RND_MODE); +#else mpfr_set (result->value.real, x->value.real, GFC_RND_MODE); simplify_sind (result->value.real); +#endif return range_check (result, "SIND"); } @@ -2054,8 +2085,12 @@ gfc_simplify_tand (gfc_expr *x) return NULL; result = gfc_get_constant_expr (x->ts.type, x->ts.kind, &x->where); +#if MPFR_VERSION >= MPFR_VERSION_NUM(4,2,0) + mpfr_tanu (result->value.real, x->value.real, 360, GFC_RND_MODE); +#else mpfr_set (result->value.real, x->value.real, GFC_RND_MODE); simplify_tand (result->value.real); +#endif return range_check (result, "TAND"); } @@ -2078,7 +2113,11 @@ gfc_simplify_cotand (gfc_expr *x) result = gfc_get_constant_expr (x->ts.type, x->ts.kind, &x->where); mpfr_set (result->value.real, x->value.real, GFC_RND_MODE); mpfr_add_ui (result->value.real, result->value.real, 90, GFC_RND_MODE); +#if MPFR_VERSION >= MPFR_VERSION_NUM(4,2,0) + mpfr_tanu (result->value.real, result->value.real, 360, GFC_RND_MODE); +#else simplify_tand (result->value.real); +#endif mpfr_neg (result->value.real, result->value.real, GFC_RND_MODE); return range_check (result, "COTAND"); diff --git a/gcc/gimple-fold.cc b/gcc/gimple-fold.cc index e63fd6f..009c573 100644 --- a/gcc/gimple-fold.cc +++ b/gcc/gimple-fold.cc @@ -6239,8 +6239,9 @@ replace_stmt_with_simplification (gimple_stmt_iterator *gsi, auto code = tree_code (res_op->code); if (TREE_CODE_CLASS (code) == tcc_comparison /* GIMPLE_CONDs condition may not throw. */ - && (!flag_exceptions - || !cfun->can_throw_non_call_exceptions + && ((cfun + && (!flag_exceptions + || !cfun->can_throw_non_call_exceptions)) || !operation_could_trap_p (code, FLOAT_TYPE_P (TREE_TYPE (ops[0])), false, NULL_TREE))) @@ -6276,6 +6277,33 @@ replace_stmt_with_simplification (gimple_stmt_iterator *gsi, } else if (!inplace) { + /* For throwing comparisons, see if the GIMPLE_COND is the same as + the comparison would be. + This can happen due to the match pattern for + `(ne (cmp @0 @1) integer_zerop)` which creates a new expression + for the comparison. */ + if (TREE_CODE_CLASS (code) == tcc_comparison + && (!cfun + || (flag_exceptions + && cfun->can_throw_non_call_exceptions)) + && operation_could_trap_p (code, + FLOAT_TYPE_P (TREE_TYPE (ops[0])), + false, NULL_TREE)) + { + tree lhs = gimple_cond_lhs (cond_stmt); + if (gimple_cond_code (cond_stmt) == NE_EXPR + && TREE_CODE (lhs) == SSA_NAME + && INTEGRAL_TYPE_P (TREE_TYPE (lhs)) + && integer_zerop (gimple_cond_rhs (cond_stmt))) + { + gimple *s = SSA_NAME_DEF_STMT (lhs); + if (is_gimple_assign (s) + && gimple_assign_rhs_code (s) == code + && operand_equal_p (gimple_assign_rhs1 (s), ops[0]) + && operand_equal_p (gimple_assign_rhs2 (s), ops[1])) + return false; + } + } tree res = maybe_push_res_to_seq (res_op, seq); if (!res) return false; diff --git a/gcc/gimple.h b/gcc/gimple.h index 977ff1c..94d5a13 100644 --- a/gcc/gimple.h +++ b/gcc/gimple.h @@ -3716,6 +3716,7 @@ gimple_cond_code (const gimple *gs) inline void gimple_cond_set_code (gcond *gs, enum tree_code code) { + gcc_gimple_checking_assert (TREE_CODE_CLASS (code) == tcc_comparison); gs->subcode = code; } diff --git a/gcc/m2/ChangeLog b/gcc/m2/ChangeLog index 058468b..40396a2 100644 --- a/gcc/m2/ChangeLog +++ b/gcc/m2/ChangeLog @@ -1,3 +1,14 @@ +2025-05-13 Gaius Mulley <gaiusmod2@gmail.com> + + PR modula2/120253 + * m2.flex (FIRST_COLUMN): New define. + (updatepos): Remove commented code. + (consumeLine): Assign column to FIRST_COLUMN. + (initLine): Ditto. + (m2flex_GetColumnNo): Return FIRST_COLUMN if currentLine is NULL. + (m2flex_GetLineNo): Rewrite for positive logic. + (m2flex_GetLocation): Ditto. + 2025-05-05 Gaius Mulley <gaiusmod2@gmail.com> PR modula2/120117 diff --git a/gcc/m2/m2.flex b/gcc/m2/m2.flex index d08ac3e..e3cf010 100644 --- a/gcc/m2/m2.flex +++ b/gcc/m2/m2.flex @@ -48,6 +48,8 @@ static int cpreprocessor = 0; /* Replace this with correct getter. */ #define EXTERN extern "C" #endif +#define FIRST_COLUMN 1 + /* m2.flex provides a lexical analyser for GNU Modula-2. */ struct lineInfo { @@ -558,7 +560,7 @@ static void consumeLine (void) currentLine->lineno = lineno; currentLine->tokenpos=0; currentLine->nextpos=0; - currentLine->column=0; + currentLine->column=FIRST_COLUMN; START_LINE (lineno, yyleng); yyless(1); /* push back all but the \n */ traceLine (); @@ -621,7 +623,6 @@ static void updatepos (void) seenModuleStart = false; currentLine->nextpos = currentLine->tokenpos+yyleng; currentLine->toklen = yyleng; - /* if (currentLine->column == 0) */ currentLine->column = currentLine->tokenpos+1; currentLine->location = M2Options_OverrideLocation (GET_LOCATION (currentLine->column, @@ -677,7 +678,7 @@ static void initLine (void) currentLine->toklen = 0; currentLine->nextpos = 0; currentLine->lineno = lineno; - currentLine->column = 0; + currentLine->column = FIRST_COLUMN; currentLine->inuse = true; currentLine->next = NULL; } @@ -812,10 +813,10 @@ EXTERN bool m2flex_OpenSource (char *s) EXTERN int m2flex_GetLineNo (void) { - if (currentLine != NULL) - return currentLine->lineno; - else + if (currentLine == NULL) return 0; + else + return currentLine->lineno; } /* @@ -825,10 +826,10 @@ EXTERN int m2flex_GetLineNo (void) EXTERN int m2flex_GetColumnNo (void) { - if (currentLine != NULL) - return currentLine->column; + if (currentLine == NULL) + return FIRST_COLUMN; else - return 0; + return currentLine->column; } /* @@ -837,10 +838,10 @@ EXTERN int m2flex_GetColumnNo (void) EXTERN location_t m2flex_GetLocation (void) { - if (currentLine != NULL) - return currentLine->location; - else + if (currentLine == NULL) return 0; + else + return currentLine->location; } /* diff --git a/gcc/match.pd b/gcc/match.pd index f405068..9613640 100644 --- a/gcc/match.pd +++ b/gcc/match.pd @@ -11308,26 +11308,58 @@ and, (match (ctz_table_index @1 @2 @3) (rshift (mult (bit_and:c (negate @1) @1) INTEGER_CST@2) INTEGER_CST@3)) +/* Floatint point/integer comparison and integer->integer + or floating point -> float point conversion. */ (match (cond_expr_convert_p @0 @2 @3 @6) (cond (simple_comparison@6 @0 @1) (convert@4 @2) (convert@5 @3)) - (if (INTEGRAL_TYPE_P (type) - && INTEGRAL_TYPE_P (TREE_TYPE (@2)) - && INTEGRAL_TYPE_P (TREE_TYPE (@0)) - && INTEGRAL_TYPE_P (TREE_TYPE (@3)) - && TYPE_PRECISION (type) != TYPE_PRECISION (TREE_TYPE (@0)) - && TYPE_PRECISION (TREE_TYPE (@0)) - == TYPE_PRECISION (TREE_TYPE (@2)) - && TYPE_PRECISION (TREE_TYPE (@0)) - == TYPE_PRECISION (TREE_TYPE (@3)) + (if ((INTEGRAL_TYPE_P (type) + || (!flag_trapping_math && SCALAR_FLOAT_TYPE_P (type))) + && ((INTEGRAL_TYPE_P (TREE_TYPE (@2)) + && INTEGRAL_TYPE_P (TREE_TYPE (@3))) + || (SCALAR_FLOAT_TYPE_P (TREE_TYPE (@2)) + && types_match (TREE_TYPE (@2), TREE_TYPE (@3)))) + && !operand_equal_p (TYPE_SIZE (type), TYPE_SIZE (TREE_TYPE (@0))) + && operand_equal_p (TYPE_SIZE (TREE_TYPE (@0)), + TYPE_SIZE (TREE_TYPE (@2))) + && operand_equal_p (TYPE_SIZE (TREE_TYPE (@0)), + TYPE_SIZE (TREE_TYPE (@3))) /* For vect_recog_cond_expr_convert_pattern, @2 and @3 can differ in signess when convert is truncation, but not ok for extension since it's sign_extend vs zero_extend. */ - && (TYPE_PRECISION (TREE_TYPE (@0)) > TYPE_PRECISION (type) + && (known_gt (tree_to_poly_uint64 (TYPE_SIZE (TREE_TYPE (@0))), + tree_to_poly_uint64 (TYPE_SIZE (type))) || (TYPE_UNSIGNED (TREE_TYPE (@2)) == TYPE_UNSIGNED (TREE_TYPE (@3)))) && single_use (@4) && single_use (@5)))) +/* Floating point or integer comparison and integer to floating point + conversion. */ +(match (cond_expr_convert_p @0 @2 @3 @6) + (cond (simple_comparison@6 @0 @1) (float@4 @2) (float@5 @3)) + (if (SCALAR_FLOAT_TYPE_P (type) && !flag_trapping_math + && INTEGRAL_TYPE_P (TREE_TYPE (@2)) + && types_match (TREE_TYPE (@2), TREE_TYPE (@3)) + && !operand_equal_p (TYPE_SIZE (type), TYPE_SIZE (TREE_TYPE (@0))) + && operand_equal_p (TYPE_SIZE (TREE_TYPE (@0)), + TYPE_SIZE (TREE_TYPE (@2))) + && single_use (@4) + && single_use (@5)))) + +/* Floating point or integer comparison and floating point to integer + conversion. */ +(match (cond_expr_convert_p @0 @2 @3 @6) + (cond (simple_comparison@6 @0 @1) (fix_trunc@4 @2) (fix_trunc@5 @3)) + (if (INTEGRAL_TYPE_P (type) && !flag_trapping_math + && SCALAR_FLOAT_TYPE_P (TREE_TYPE (@2)) + && types_match (TREE_TYPE (@2), TREE_TYPE (@3)) + && !operand_equal_p (TYPE_SIZE (type), + TYPE_SIZE (TREE_TYPE (@0))) + && operand_equal_p (TYPE_SIZE (TREE_TYPE (@0)), + TYPE_SIZE (TREE_TYPE (@2))) + && single_use (@4) + && single_use (@5)))) + (for bit_op (bit_and bit_ior bit_xor) (match (bitwise_induction_p @0 @2 @3) (bit_op:c diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index b7e62e8..6a9c9c7 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,69 @@ +2025-05-13 Tobias Burnus <tburnus@baylibre.com> + + PR fortran/113413 + * gfortran.dg/dec_math.f90: Add comment that degree + functions are part of F2023. + +2025-05-13 Yuao Ma <c8ef@outlook.com> + Steven G. Kargl <kargl@gcc.gnu.org> + + PR fortran/113413 + * gfortran.dg/dec_math.f90: Add atand(y, x) testcase. + +2025-05-13 Andrew Pinski <quic_apinski@quicinc.com> + + PR tree-optimization/119903 + * g++.dg/tree-ssa/pr119903-1.C: New test. + +2025-05-13 Andrew Pinski <quic_apinski@quicinc.com> + + PR middle-end/118868 + * c-c++-common/pr118868-1.c: New test. + +2025-05-13 Gaius Mulley <gaiusmod2@gmail.com> + + PR modula2/120188 + * gm2.dg/doc/examples/plugin/fail/doc-examples-plugin-fail.exp: + Remove call to gm2-dg-frontend-configure-check and replace with + tests for whether plugin variables exist. + +2025-05-13 Jakub Jelinek <jakub@redhat.com> + + PR libfortran/120196 + * gfortran.dg/pr120196.f90: New test. + +2025-05-13 Jakub Jelinek <jakub@redhat.com> + + PR fortran/120191 + * gfortran.dg/pr120191_3.f90: New test. + +2025-05-13 Jakub Jelinek <jakub@redhat.com> + + PR fortran/120191 + * gfortran.dg/pr120191_2.f90: New test. + +2025-05-13 Jakub Jelinek <jakub@redhat.com> + Daniil Kochergin <daniil2472s@gmail.com> + Tobias Burnus <tburnus@baylibre.com> + + PR fortran/120191 + * gfortran.dg/pr120191_1.f90: New test. + +2025-05-13 David Malcolm <dmalcolm@redhat.com> + + PR other/116792 + * gcc.dg/html-output/missing-semicolon.py: Verify that we don't + have an empty "gcc-annotated-source" and we do have a + "gcc-generated-patch". + * gcc.dg/plugin/diagnostic-test-metadata-html.c: New test. + * gcc.dg/plugin/diagnostic-test-metadata-html.py: New test script. + * gcc.dg/plugin/diagnostic-test-paths-2.c: Add + "-fdiagnostics-add-output=experimental-html" to options. Add + invocation of diagnostic-test-paths-2.py. + * gcc.dg/plugin/diagnostic-test-paths-2.py: New test script. + * gcc.dg/plugin/plugin.exp (plugin_test_list): Add + diagnostic-test-metadata-html.c. + 2025-05-13 Andrew MacLeod <amacleod@redhat.com> * gcc.dg/tree-ssa/vrp124.c: New. diff --git a/gcc/testsuite/c-c++-common/pr118868-1.c b/gcc/testsuite/c-c++-common/pr118868-1.c new file mode 100644 index 0000000..d0a9e77f7 --- /dev/null +++ b/gcc/testsuite/c-c++-common/pr118868-1.c @@ -0,0 +1,9 @@ +/* { dg-do compile } */ + +/* PR middle-end/118868 */ + +/* __builtin_assoc_barrier should work on pointers without any ICE */ +void *f(void *a) +{ + return __builtin_assoc_barrier(a); +} diff --git a/gcc/testsuite/g++.dg/cpp0x/lambda/lambda-ice33.C b/gcc/testsuite/g++.dg/cpp0x/lambda/lambda-ice33.C new file mode 100644 index 0000000..8564286 --- /dev/null +++ b/gcc/testsuite/g++.dg/cpp0x/lambda/lambda-ice33.C @@ -0,0 +1,12 @@ +// PR c++/120126 +// { dg-do compile { target c++11 } } + +template <typename... Args> +int sum(Args... args) { + return [args...] { // { dg-error "parameter packs not expanded with" } + typename decltype(args)::type temp; + }; +} +int main() { + sum(1, 10); +} diff --git a/gcc/testsuite/g++.dg/modules/clone-4_a.C b/gcc/testsuite/g++.dg/modules/clone-4_a.C new file mode 100644 index 0000000..3ee6109 --- /dev/null +++ b/gcc/testsuite/g++.dg/modules/clone-4_a.C @@ -0,0 +1,12 @@ +// PR c++/120125 +// { dg-additional-options "-fmodules -fdeclone-ctor-dtor" } +// { dg-module-cmi M } + +export module M; + +void foo(); +export template <typename _Tp> struct __shared_ptr { + inline __shared_ptr() { foo(); } +}; + +template class __shared_ptr<int>; diff --git a/gcc/testsuite/g++.dg/modules/clone-4_b.C b/gcc/testsuite/g++.dg/modules/clone-4_b.C new file mode 100644 index 0000000..1b36cb4 --- /dev/null +++ b/gcc/testsuite/g++.dg/modules/clone-4_b.C @@ -0,0 +1,12 @@ +// PR c++/120125 +// { dg-additional-options "-fmodules -fdeclone-ctor-dtor" } + +import M; + +int main() { + __shared_ptr<int> s1; + __shared_ptr<double> s2; +} + +// { dg-final { scan-assembler-not {_ZNW1M12__shared_ptrIiEC[1-4]Ev:} } } +// { dg-final { scan-assembler {_ZNW1M12__shared_ptrIdEC2Ev:} } } diff --git a/gcc/testsuite/g++.dg/modules/openmp-1.C b/gcc/testsuite/g++.dg/modules/openmp-1.C new file mode 100644 index 0000000..b5a30ad --- /dev/null +++ b/gcc/testsuite/g++.dg/modules/openmp-1.C @@ -0,0 +1,9 @@ +// PR c++/119864 +// { dg-do assemble } +// { dg-additional-options "-fmodules -fopenmp" } +// { dg-require-effective-target "fopenmp" } + +export module M; + +int foo(); +int x = foo(); diff --git a/gcc/testsuite/g++.dg/modules/tpl-friend-19_a.C b/gcc/testsuite/g++.dg/modules/tpl-friend-19_a.C new file mode 100644 index 0000000..59f0175 --- /dev/null +++ b/gcc/testsuite/g++.dg/modules/tpl-friend-19_a.C @@ -0,0 +1,16 @@ +// { dg-additional-options "-fmodules -Wno-global-module" } +// { dg-module-cmi M } + +module; + +template <typename _MemFunPtr> +class _Mem_fn_base { + template <typename> friend struct _Bind_check_arity; +}; + +template <typename> struct _Bind_check_arity {}; + +export module M; + +template struct _Bind_check_arity<int>; +export _Mem_fn_base<int> mem_fn(); diff --git a/gcc/testsuite/g++.dg/modules/tpl-friend-19_b.C b/gcc/testsuite/g++.dg/modules/tpl-friend-19_b.C new file mode 100644 index 0000000..ce99647 --- /dev/null +++ b/gcc/testsuite/g++.dg/modules/tpl-friend-19_b.C @@ -0,0 +1,6 @@ +// { dg-additional-options "-fmodules" } + +import M; +int main() { + mem_fn(); +} diff --git a/gcc/testsuite/g++.dg/opt/pr96780_cpp23.C b/gcc/testsuite/g++.dg/opt/pr96780_cpp23.C new file mode 100644 index 0000000..ba4a837 --- /dev/null +++ b/gcc/testsuite/g++.dg/opt/pr96780_cpp23.C @@ -0,0 +1,16 @@ +// PR c++/96780 +// Verify calls to std::move/forward are folded away by the frontend. +// { dg-do compile { target c++23 } } +// { dg-additional-options "-ffold-simple-inlines -fdump-tree-gimple" } + +#include <utility> + +enum class A : char {a}; + +extern A& x; + +void f() { + auto&& x1 = std::to_underlying(x); +} + +// { dg-final { scan-tree-dump-not "= std::to_underlying" "gimple" } } diff --git a/gcc/testsuite/g++.dg/tree-ssa/pr119903-1.C b/gcc/testsuite/g++.dg/tree-ssa/pr119903-1.C new file mode 100644 index 0000000..605f989 --- /dev/null +++ b/gcc/testsuite/g++.dg/tree-ssa/pr119903-1.C @@ -0,0 +1,24 @@ +// { dg-do compile { target c++11 } } +// { dg-options "-O2 -fnon-call-exceptions -ftrapping-math -fdump-tree-optimized-eh" } + +// PR tree-optimization/119903 +// match and simplify would cause the internal throwable fp comparison +// to become only external throwable and lose the landing pad. + +int f() noexcept; +int g() noexcept; + +int m(double a) +{ + try { + if (a < 1.0) + return f(); + return g(); + }catch(...) + { + return -1; + } +} + +// Make sure There is a landing pad for the non-call exception from the comparison. +// { dg-final { scan-tree-dump "LP " "optimized" } } diff --git a/gcc/testsuite/g++.dg/warn/ignore-virtual-move-assign.C b/gcc/testsuite/g++.dg/warn/ignore-virtual-move-assign.C new file mode 100644 index 0000000..73922e6 --- /dev/null +++ b/gcc/testsuite/g++.dg/warn/ignore-virtual-move-assign.C @@ -0,0 +1,45 @@ +// { dg-do compile { target c++11 } } +// { dg-options "-Wvirtual-move-assign -Wattributes" } + +#include <utility> + +class A +{ + int val; + +public: + explicit A (int val) : val (val) {} + + A (const A &oth) : val (0) {} + A &operator= (const A &oth) { return *this; } + A (A &&oth) : val (oth.val) { oth.val = 0; } +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wvirtual-move-assign" + A &operator= (A &&oth) + { + val += oth.val; + oth.val = 0; + return *this; + } +#pragma GCC diagnostic pop +}; + +class B : virtual A +{ +public: + B () : A (12) {} + B &operator= (B &&) = default; +}; + +class C : virtual A +{ +public: + C () : A (12) {} +}; + +void +test_fn () +{ + C x, y; + x = std::move (y); +} diff --git a/gcc/testsuite/gcc.dg/ipa/pr120044-1.c b/gcc/testsuite/gcc.dg/ipa/pr120044-1.c new file mode 100644 index 0000000..f9fee3e --- /dev/null +++ b/gcc/testsuite/gcc.dg/ipa/pr120044-1.c @@ -0,0 +1,17 @@ +/* { dg-do run } */ +/* { dg-options "-O3 -fno-early-inlining -fno-tree-fre -fno-tree-pre -fno-code-hoisting -fno-inline" } */ + +struct a { + int b; +} const c; +void d(char p, struct a e) { + while (e.b) + ; +} +static unsigned short f(const struct a g) { + d(g.b, g); + return g.b; +} +int main() { + return f(c); +} diff --git a/gcc/testsuite/gcc.dg/ipa/pr120044-2.c b/gcc/testsuite/gcc.dg/ipa/pr120044-2.c new file mode 100644 index 0000000..5130791 --- /dev/null +++ b/gcc/testsuite/gcc.dg/ipa/pr120044-2.c @@ -0,0 +1,17 @@ +/* { dg-do run } */ +/* { dg-options "-O3 -fno-early-inlining -fno-tree-fre -fno-tree-pre -fno-code-hoisting -fno-ipa-cp" } */ + +struct a { + int b; +} const c; +void d(char p, struct a e) { + while (e.b) + ; +} +static unsigned short f(const struct a g) { + d(g.b, g); + return g.b; +} +int main() { + return f(c); +} diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr114864.c b/gcc/testsuite/gcc.dg/tree-ssa/pr114864.c new file mode 100644 index 0000000..cd9b94c --- /dev/null +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr114864.c @@ -0,0 +1,15 @@ +/* { dg-do run } */ +/* { dg-options "-O1 -fno-tree-dce -fno-tree-fre" } */ + +struct a { + int b; +} const c; +void d(const struct a f) {} +void e(const struct a f) { + f.b == 0 ? 1 : f.b; + d(f); +} +int main() { + e(c); + return 0; +} diff --git a/gcc/testsuite/gcc.target/i386/pr103771-4.c b/gcc/testsuite/gcc.target/i386/pr103771-4.c new file mode 100644 index 0000000..299337d --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr103771-4.c @@ -0,0 +1,82 @@ +/* { dg-do compile } */ +/* { dg-options "-march=x86-64-v4 -Ofast -fdump-tree-vect-details" } */ +/* { dg-final { scan-assembler-not "kshift" { target { ! ia32 } } } } */ +/* { dg-final { scan-tree-dump-times "loop vectorized using 64 byte vectors" 6 "vect" { target { ! ia32 } } } } */ + +void +foo (float* a, float* b, int* c, int* d, long long* __restrict e, int n) +{ + for (int i = 0 ; i != n; i++) + { + long long tmp = c[i]; + long long tmp2 = d[i]; + if (a[i] < b[i]) + tmp = tmp2; + e[i] = tmp; + } +} + +void +foo1 (double* a, double* b, long long* c, long long* d, int* __restrict e, int n) +{ + for (int i = 0 ; i != n; i++) + { + int tmp = (int)c[i]; + int tmp2 = (int)d[i]; + if (a[i] < b[i]) + tmp = tmp2; + e[i] = tmp; + } +} + +void +foo2 (float* a, float* b, int* c, int* d, double* __restrict e, int n) +{ + for (int i = 0 ; i != n; i++) + { + double tmp = c[i]; + double tmp2 = d[i]; + if (a[i] < b[i]) + tmp = tmp2; + e[i] = tmp; + } +} + +void +foo3 (double* a, double* b, long long* c, long long* d, float* __restrict e, int n) +{ + for (int i = 0 ; i != n; i++) + { + float tmp = c[i]; + float tmp2 = d[i]; + if (a[i] < b[i]) + tmp = tmp2; + e[i] = tmp; + } +} + +void +foo4 (int* a, int* b, int* c, int* d, double* __restrict e, int n) +{ + for (int i = 0 ; i != n; i++) + { + double tmp = c[i]; + double tmp2 = d[i]; + if (a[i] < b[i]) + tmp = tmp2; + e[i] = tmp; + } +} + +void +foo5 (long long* a, long long* b, long long* c, long long* d, float* __restrict e, int n) +{ + for (int i = 0 ; i != n; i++) + { + float tmp = c[i]; + float tmp2 = d[i]; + if (a[i] < b[i]) + tmp = tmp2; + e[i] = tmp; + } +} diff --git a/gcc/testsuite/gcc.target/riscv/arch-55.c b/gcc/testsuite/gcc.target/riscv/arch-55.c new file mode 100644 index 0000000..0e8a294 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/arch-55.c @@ -0,0 +1,9 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64g_sha -mabi=lp64d" } */ + +void foo(){} + +/* { dg-final { scan-assembler ".attribute arch, \"rv64i2p1_m2p0_a2p1_f2p2" +"_d2p2_h1p0_zicsr2p0_zifencei2p0_zmmul1p0_zaamo1p0_zalrsc1p0_sha1p0" +"_shcounterenw1p0_shgatpa1p0_shtvala1p0_shvsatpa1p0_shvstvala1p0_shvstvecd1p0" +"_ssstateen1p0\"" } } */
\ No newline at end of file diff --git a/gcc/testsuite/gfortran.dg/dec_math.f90 b/gcc/testsuite/gfortran.dg/dec_math.f90 index 393e7de..79c1807 100644 --- a/gcc/testsuite/gfortran.dg/dec_math.f90 +++ b/gcc/testsuite/gfortran.dg/dec_math.f90 @@ -5,6 +5,12 @@ ! Test extra math intrinsics formerly offered by -fdec-math, ! now included with -std=gnu or -std=legacy. ! +! Since Fortran 2023, the degree trigonometric functions (sind, cosd, ...) +! are part of the standard; additionally, Fortran 2023 added a two-argument +! version of atand as alias for atan2d. +! +! Note that cotan and cotand are not part of Fortran 2023; hence, this file +! still requires -std=gnu and cannot be compiled with -std=f2023. module dec_math @@ -522,6 +528,69 @@ call cmpq(q_i1, q_oxe, q_ox, q_tol, "(x) qatand") #endif ! Input +f_i1 = 1.0_4 +f_i2 = 2.0_4 +d_i1 = 1.0_8 +d_i2 = 2.0_8 +#ifdef __GFC_REAL_10__ +l_i1 = 1.0_10 +l_i2 = 2.0_10 +#endif +#ifdef __GFC_REAL_16__ +q_i1 = 1.0_16 +q_i2 = 2.0_16 +#endif + +! Expected +f_oe = r2d_f * atan2 (f_i1, f_i2) +f_oxe = r2d_f * atan2 (xf * f_i1, f_i2) +d_oe = r2d_d * atan2 (d_i1, d_i2) +d_oxe = r2d_d * atan2 (xd * d_i1, d_i2) +#ifdef __GFC_REAL_10__ +l_oe = r2d_l * atan2 (l_i1, l_i2) +l_oxe = r2d_l * atan2 (xl * l_i1, l_i2) +#endif +#ifdef __GFC_REAL_16__ +q_oe = r2d_q * atan2 (q_i1, q_i2) +q_oxe = r2d_q * atan2 (xq * q_i1, q_i2) +#endif + +! Actual +f_oa = atand (f_i1, f_i2) +f_oc = atand (1.0_4, 2.0_4) +f_ox = atand (xf * f_i1, f_i2) +d_oa = atand (d_i1, d_i2) +d_oc = atand (1.0_8, 2.0_8) +d_ox = atand (xd * d_i1, d_i2) +#ifdef __GFC_REAL_10__ +l_oa = atand (l_i1, l_i2) +l_oc = atand (1.0_10, 2.0_10) +l_ox = atand (xl * l_i1, l_i2) +#endif +#ifdef __GFC_REAL_16__ +q_oa = atand (q_i1, q_i2) +q_oc = atand (1.0_16, 2.0_16) +q_ox = atand (xq * q_i1, q_i2) +#endif + +call cmpf(f_i1, f_oe, f_oa, f_tol, "( ) fatand") +call cmpf(f_i1, f_oe, f_oc, f_tol, "(c) fatand") +call cmpf(f_i1, f_oxe, f_ox, f_tol, "(x) fatand") +call cmpd(d_i1, d_oe, d_oa, d_tol, "( ) datand") +call cmpd(d_i1, d_oe, d_oc, d_tol, "(c) datand") +call cmpd(d_i1, d_oxe, d_ox, d_tol, "(x) atand") +#ifdef __GFC_REAL_10__ +call cmpl(l_i1, l_oe, l_oa, l_tol, "( ) latand") +call cmpl(l_i1, l_oe, l_oc, l_tol, "(c) latand") +call cmpl(l_i1, l_oxe, l_ox, l_tol, "(x) latand") +#endif +#ifdef __GFC_REAL_16__ +call cmpq(q_i1, q_oe, q_oa, q_tol, "( ) qatand") +call cmpq(q_i1, q_oe, q_oc, q_tol, "(c) qatand") +call cmpq(q_i1, q_oxe, q_ox, q_tol, "(x) qatand") +#endif + +! Input f_i1 = 34.3775_4 d_i1 = 34.3774677078494_8 #ifdef __GFC_REAL_10__ diff --git a/gcc/testsuite/gm2.dg/doc/examples/plugin/fail/doc-examples-plugin-fail.exp b/gcc/testsuite/gm2.dg/doc/examples/plugin/fail/doc-examples-plugin-fail.exp index 8a41ff8..6ddf2d5 100644 --- a/gcc/testsuite/gm2.dg/doc/examples/plugin/fail/doc-examples-plugin-fail.exp +++ b/gcc/testsuite/gm2.dg/doc/examples/plugin/fail/doc-examples-plugin-fail.exp @@ -11,7 +11,7 @@ gm2_init_pim4 $srcdir/$subdir dg-init # If the --enable-plugin has not been enabled during configure, bail. -if { ![gm2-dg-frontend-configure-check "enable-plugin" ] } { +if { ![info exists TESTING_IN_BUILD_TREE] || ![info exists ENABLE_PLUGIN] } { return } diff --git a/gcc/tree-cfg.cc b/gcc/tree-cfg.cc index 6a95b82..928459a 100644 --- a/gcc/tree-cfg.cc +++ b/gcc/tree-cfg.cc @@ -3870,7 +3870,6 @@ verify_gimple_assign_unary (gassign *stmt) case NEGATE_EXPR: case ABS_EXPR: case BIT_NOT_EXPR: - case PAREN_EXPR: case CONJ_EXPR: /* Disallow pointer and offset types for many of the unary gimple. */ if (POINTER_TYPE_P (lhs_type) @@ -3883,6 +3882,17 @@ verify_gimple_assign_unary (gassign *stmt) } break; + case PAREN_EXPR: + /* Disallow non arthmetic types on PAREN_EXPR. */ + if (AGGREGATE_TYPE_P (lhs_type)) + { + error ("invalid types for %qs", code_name); + debug_generic_expr (lhs_type); + debug_generic_expr (rhs1_type); + return true; + } + break; + case ABSU_EXPR: if (!ANY_INTEGRAL_TYPE_P (lhs_type) || !TYPE_UNSIGNED (lhs_type) diff --git a/gcc/tree-cfgcleanup.cc b/gcc/tree-cfgcleanup.cc index 9a8a668..a34a51e 100644 --- a/gcc/tree-cfgcleanup.cc +++ b/gcc/tree-cfgcleanup.cc @@ -46,6 +46,7 @@ along with GCC; see the file COPYING3. If not see #include "cgraph.h" #include "tree-into-ssa.h" #include "tree-cfgcleanup.h" +#include "gimple-pretty-print.h" /* The set of blocks in that at least one of the following changes happened: @@ -122,6 +123,41 @@ convert_single_case_switch (gswitch *swtch, gimple_stmt_iterator &gsi) return true; } +/* Canonicalize _Bool == 0 and _Bool != 1 to _Bool != 0 of STMT in BB by + swapping edges of the BB. */ +bool +canonicalize_bool_cond (gcond *stmt, basic_block bb) +{ + tree rhs1 = gimple_cond_lhs (stmt); + tree rhs2 = gimple_cond_rhs (stmt); + enum tree_code code = gimple_cond_code (stmt); + if (code != EQ_EXPR && code != NE_EXPR) + return false; + if (TREE_CODE (TREE_TYPE (rhs1)) != BOOLEAN_TYPE + && (!INTEGRAL_TYPE_P (TREE_TYPE (rhs1)) + || TYPE_PRECISION (TREE_TYPE (rhs1)) != 1)) + return false; + + /* Canonicalize _Bool == 0 and _Bool != 1 to _Bool != 0 by swapping edges. */ + if (code == EQ_EXPR && !integer_zerop (rhs2)) + return false; + if (code == NE_EXPR && !integer_onep (rhs2)) + return false; + + gimple_cond_set_code (stmt, NE_EXPR); + gimple_cond_set_rhs (stmt, build_zero_cst (TREE_TYPE (rhs1))); + EDGE_SUCC (bb, 0)->flags ^= (EDGE_TRUE_VALUE|EDGE_FALSE_VALUE); + EDGE_SUCC (bb, 1)->flags ^= (EDGE_TRUE_VALUE|EDGE_FALSE_VALUE); + + if (dump_file) + { + fprintf (dump_file, " Swapped '"); + print_gimple_expr (dump_file, stmt, 0); + fprintf (dump_file, "'\n"); + } + return true; +} + /* Disconnect an unreachable block in the control expression starting at block BB. */ @@ -145,6 +181,9 @@ cleanup_control_expr_graph (basic_block bb, gimple_stmt_iterator gsi) && convert_single_case_switch (as_a<gswitch *> (stmt), gsi)) stmt = gsi_stmt (gsi); + if (gimple_code (stmt) == GIMPLE_COND) + canonicalize_bool_cond (as_a<gcond*> (stmt), bb); + fold_defer_overflow_warnings (); switch (gimple_code (stmt)) { diff --git a/gcc/tree-cfgcleanup.h b/gcc/tree-cfgcleanup.h index 83c857f..94b430e 100644 --- a/gcc/tree-cfgcleanup.h +++ b/gcc/tree-cfgcleanup.h @@ -28,5 +28,6 @@ extern bool delete_unreachable_blocks_update_callgraph (cgraph_node *dst_node, bool update_clones); extern unsigned clean_up_loop_closed_phi (function *); extern bool phi_alternatives_equal (basic_block, edge, edge); +extern bool canonicalize_bool_cond (gcond *stmt, basic_block bb); #endif /* GCC_TREE_CFGCLEANUP_H */ diff --git a/gcc/tree-sra.cc b/gcc/tree-sra.cc index 302b73e..4b6daf7 100644 --- a/gcc/tree-sra.cc +++ b/gcc/tree-sra.cc @@ -4205,8 +4205,10 @@ sra_modify_expr (tree *expr, bool write, gimple_stmt_iterator *stmt_gsi, } else { - gassign *stmt; + if (TREE_READONLY (access->base)) + return false; + gassign *stmt; if (access->grp_partial_lhs) repl = force_gimple_operand_gsi (stmt_gsi, repl, true, NULL_TREE, true, diff --git a/gcc/tree-ssa-forwprop.cc b/gcc/tree-ssa-forwprop.cc index fafc4d6..3187314 100644 --- a/gcc/tree-ssa-forwprop.cc +++ b/gcc/tree-ssa-forwprop.cc @@ -551,9 +551,8 @@ forward_propagate_into_gimple_cond (gcond *stmt) tree rhs1 = gimple_cond_lhs (stmt); tree rhs2 = gimple_cond_rhs (stmt); - /* We can do tree combining on SSA_NAME and comparison expressions. */ - if (TREE_CODE_CLASS (gimple_cond_code (stmt)) != tcc_comparison) - return 0; + /* GIMPLE_COND will always be a comparison. */ + gcc_assert (TREE_CODE_CLASS (gimple_cond_code (stmt)) == tcc_comparison); tmp = forward_propagate_into_comparison_1 (stmt, code, boolean_type_node, @@ -580,22 +579,8 @@ forward_propagate_into_gimple_cond (gcond *stmt) return (cfg_changed || is_gimple_min_invariant (tmp)) ? 2 : 1; } - /* Canonicalize _Bool == 0 and _Bool != 1 to _Bool != 0 by swapping edges. */ - if ((TREE_CODE (TREE_TYPE (rhs1)) == BOOLEAN_TYPE - || (INTEGRAL_TYPE_P (TREE_TYPE (rhs1)) - && TYPE_PRECISION (TREE_TYPE (rhs1)) == 1)) - && ((code == EQ_EXPR - && integer_zerop (rhs2)) - || (code == NE_EXPR - && integer_onep (rhs2)))) - { - basic_block bb = gimple_bb (stmt); - gimple_cond_set_code (stmt, NE_EXPR); - gimple_cond_set_rhs (stmt, build_zero_cst (TREE_TYPE (rhs1))); - EDGE_SUCC (bb, 0)->flags ^= (EDGE_TRUE_VALUE|EDGE_FALSE_VALUE); - EDGE_SUCC (bb, 1)->flags ^= (EDGE_TRUE_VALUE|EDGE_FALSE_VALUE); - return 1; - } + if (canonicalize_bool_cond (stmt, gimple_bb (stmt))) + return 1; return 0; } @@ -4497,6 +4482,8 @@ pass_forwprop::execute (function *fun) } } } + if (substituted_p) + update_stmt (stmt); if (substituted_p && is_gimple_assign (stmt) && gimple_assign_rhs_code (stmt) == ADDR_EXPR) @@ -4536,17 +4523,7 @@ pass_forwprop::execute (function *fun) && !SSA_NAME_IS_DEFAULT_DEF (use)) bitmap_set_bit (simple_dce_worklist, SSA_NAME_VERSION (use)); - } - - if (changed || substituted_p) - { - if (maybe_clean_or_replace_eh_stmt (orig_stmt, stmt)) - bitmap_set_bit (to_purge, bb->index); - if (!was_noreturn - && is_gimple_call (stmt) && gimple_call_noreturn_p (stmt)) - to_fixup.safe_push (stmt); update_stmt (stmt); - substituted_p = false; } switch (gimple_code (stmt)) @@ -4560,11 +4537,9 @@ pass_forwprop::execute (function *fun) { int did_something; did_something = forward_propagate_into_comparison (&gsi); - if (maybe_clean_or_replace_eh_stmt (stmt, gsi_stmt (gsi))) - bitmap_set_bit (to_purge, bb->index); if (did_something == 2) cfg_changed = true; - changed = did_something != 0; + changed |= did_something != 0; } else if ((code == PLUS_EXPR || code == BIT_IOR_EXPR @@ -4580,15 +4555,15 @@ pass_forwprop::execute (function *fun) } else if (code == CONSTRUCTOR && TREE_CODE (TREE_TYPE (rhs1)) == VECTOR_TYPE) - changed = simplify_vector_constructor (&gsi); + changed |= simplify_vector_constructor (&gsi); else if (code == ARRAY_REF) - changed = simplify_count_trailing_zeroes (&gsi); + changed |= simplify_count_trailing_zeroes (&gsi); break; } case GIMPLE_SWITCH: - changed = simplify_gimple_switch (as_a <gswitch *> (stmt), - edges_to_remove); + changed |= simplify_gimple_switch (as_a <gswitch *> (stmt), + edges_to_remove); break; case GIMPLE_COND: @@ -4597,7 +4572,7 @@ pass_forwprop::execute (function *fun) (as_a <gcond *> (stmt)); if (did_something == 2) cfg_changed = true; - changed = did_something != 0; + changed |= did_something != 0; break; } @@ -4606,13 +4581,23 @@ pass_forwprop::execute (function *fun) tree callee = gimple_call_fndecl (stmt); if (callee != NULL_TREE && fndecl_built_in_p (callee, BUILT_IN_NORMAL)) - changed = simplify_builtin_call (&gsi, callee); + changed |= simplify_builtin_call (&gsi, callee); break; } default:; } + if (changed || substituted_p) + { + substituted_p = false; + stmt = gsi_stmt (gsi); + if (maybe_clean_or_replace_eh_stmt (orig_stmt, stmt)) + bitmap_set_bit (to_purge, bb->index); + if (!was_noreturn + && is_gimple_call (stmt) && gimple_call_noreturn_p (stmt)) + to_fixup.safe_push (stmt); + } if (changed) { /* If the stmt changed then re-visit it and the statements diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc index fe6f3cf..2d1a688 100644 --- a/gcc/tree-vect-loop.cc +++ b/gcc/tree-vect-loop.cc @@ -9698,7 +9698,7 @@ vectorizable_nonlinear_induction (loop_vec_info loop_vinfo, gphi *phi = dyn_cast <gphi *> (stmt_info->stmt); - tree vectype = STMT_VINFO_VECTYPE (stmt_info); + tree vectype = SLP_TREE_VECTYPE (slp_node); poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype); enum vect_induction_op_type induction_type = STMT_VINFO_LOOP_PHI_EVOLUTION_TYPE (stmt_info); @@ -9723,7 +9723,7 @@ vectorizable_nonlinear_induction (loop_vec_info loop_vinfo, /* TODO: Support multi-lane SLP for nonlinear iv. There should be separate vector iv update for each iv and a permutation to generate wanted vector iv. */ - if (slp_node && SLP_TREE_LANES (slp_node) > 1) + if (SLP_TREE_LANES (slp_node) > 1) { if (dump_enabled_p ()) dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, @@ -9934,13 +9934,7 @@ vectorizable_nonlinear_induction (loop_vec_info loop_vinfo, add_phi_arg (induction_phi, vec_def, loop_latch_edge (iv_loop), UNKNOWN_LOCATION); - if (slp_node) - slp_node->push_vec_def (induction_phi); - else - { - STMT_VINFO_VEC_STMTS (stmt_info).safe_push (induction_phi); - *vec_stmt = induction_phi; - } + slp_node->push_vec_def (induction_phi); /* In case that vectorization factor (VF) is bigger than the number of elements that we can fit in a vectype (nunits), we have to generate @@ -9970,10 +9964,7 @@ vectorizable_nonlinear_induction (loop_vec_info loop_vinfo, induction_type); gsi_insert_seq_before (&si, stmts, GSI_SAME_STMT); new_stmt = SSA_NAME_DEF_STMT (vec_def); - if (slp_node) - slp_node->push_vec_def (new_stmt); - else - STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt); + slp_node->push_vec_def (new_stmt); } } @@ -9999,15 +9990,13 @@ vectorizable_induction (loop_vec_info loop_vinfo, stmt_vector_for_cost *cost_vec) { class loop *loop = LOOP_VINFO_LOOP (loop_vinfo); - unsigned ncopies; bool nested_in_vect_loop = false; class loop *iv_loop; tree vec_def; edge pe = loop_preheader_edge (loop); basic_block new_bb; - tree new_vec, vec_init = NULL_TREE, vec_step, t; + tree vec_init = NULL_TREE, vec_step, t; tree new_name; - gimple *new_stmt; gphi *induction_phi; tree induc_def, vec_dest; poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo); @@ -10034,15 +10023,9 @@ vectorizable_induction (loop_vec_info loop_vinfo, return vectorizable_nonlinear_induction (loop_vinfo, stmt_info, vec_stmt, slp_node, cost_vec); - tree vectype = STMT_VINFO_VECTYPE (stmt_info); + tree vectype = SLP_TREE_VECTYPE (slp_node); poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype); - if (slp_node) - ncopies = 1; - else - ncopies = vect_get_num_copies (loop_vinfo, vectype); - gcc_assert (ncopies >= 1); - /* FORNOW. These restrictions should be relaxed. */ if (nested_in_vect_loop_p (loop, stmt_info)) { @@ -10052,14 +10035,6 @@ vectorizable_induction (loop_vec_info loop_vinfo, edge latch_e; tree loop_arg; - if (ncopies > 1) - { - if (dump_enabled_p ()) - dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, - "multiple types in nested loop.\n"); - return false; - } - exit_phi = NULL; latch_e = loop_latch_edge (loop->inner); loop_arg = PHI_ARG_DEF_FROM_EDGE (phi, latch_e); @@ -10096,7 +10071,7 @@ vectorizable_induction (loop_vec_info loop_vinfo, iv_loop = loop; gcc_assert (iv_loop == (gimple_bb (phi))->loop_father); - if (slp_node && (!nunits.is_constant () && SLP_TREE_LANES (slp_node) != 1)) + if (!nunits.is_constant () && SLP_TREE_LANES (slp_node) != 1) { /* The current SLP code creates the step value element-by-element. */ if (dump_enabled_p ()) @@ -10152,41 +10127,28 @@ vectorizable_induction (loop_vec_info loop_vinfo, if (!vec_stmt) /* transformation not required. */ { unsigned inside_cost = 0, prologue_cost = 0; - if (slp_node) - { - /* We eventually need to set a vector type on invariant - arguments. */ - unsigned j; - slp_tree child; - FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (slp_node), j, child) - if (!vect_maybe_update_slp_op_vectype - (child, SLP_TREE_VECTYPE (slp_node))) - { - if (dump_enabled_p ()) - dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, - "incompatible vector types for " - "invariants\n"); - return false; - } - /* loop cost for vec_loop. */ - inside_cost - = record_stmt_cost (cost_vec, - SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node), - vector_stmt, stmt_info, 0, vect_body); - /* prologue cost for vec_init (if not nested) and step. */ - prologue_cost = record_stmt_cost (cost_vec, 1 + !nested_in_vect_loop, - scalar_to_vec, - stmt_info, 0, vect_prologue); - } - else /* if (!slp_node) */ - { - /* loop cost for vec_loop. */ - inside_cost = record_stmt_cost (cost_vec, ncopies, vector_stmt, - stmt_info, 0, vect_body); - /* prologue cost for vec_init and vec_step. */ - prologue_cost = record_stmt_cost (cost_vec, 2, scalar_to_vec, - stmt_info, 0, vect_prologue); - } + /* We eventually need to set a vector type on invariant + arguments. */ + unsigned j; + slp_tree child; + FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (slp_node), j, child) + if (!vect_maybe_update_slp_op_vectype + (child, SLP_TREE_VECTYPE (slp_node))) + { + if (dump_enabled_p ()) + dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, + "incompatible vector types for " + "invariants\n"); + return false; + } + /* loop cost for vec_loop. */ + inside_cost = record_stmt_cost (cost_vec, + SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node), + vector_stmt, stmt_info, 0, vect_body); + /* prologue cost for vec_init (if not nested) and step. */ + prologue_cost = record_stmt_cost (cost_vec, 1 + !nested_in_vect_loop, + scalar_to_vec, + stmt_info, 0, vect_prologue); if (dump_enabled_p ()) dump_printf_loc (MSG_NOTE, vect_location, "vect_model_induction_cost: inside_cost = %d, " @@ -10217,670 +10179,374 @@ vectorizable_induction (loop_vec_info loop_vinfo, with group size 3 we need [i0, i1, i2, i0 + S0] [i1 + S1, i2 + S2, i0 + 2*S0, i1 + 2*S1] [i2 + 2*S2, i0 + 3*S0, i1 + 3*S1, i2 + 3*S2]. */ - if (slp_node) + gimple_stmt_iterator incr_si; + bool insert_after; + standard_iv_increment_position (iv_loop, &incr_si, &insert_after); + + /* The initial values are vectorized, but any lanes > group_size + need adjustment. */ + slp_tree init_node + = SLP_TREE_CHILDREN (slp_node)[pe->dest_idx]; + + /* Gather steps. Since we do not vectorize inductions as + cycles we have to reconstruct the step from SCEV data. */ + unsigned group_size = SLP_TREE_LANES (slp_node); + tree *steps = XALLOCAVEC (tree, group_size); + tree *inits = XALLOCAVEC (tree, group_size); + stmt_vec_info phi_info; + FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (slp_node), i, phi_info) + { + steps[i] = STMT_VINFO_LOOP_PHI_EVOLUTION_PART (phi_info); + if (!init_node) + inits[i] = gimple_phi_arg_def (as_a<gphi *> (phi_info->stmt), + pe->dest_idx); + } + + /* Now generate the IVs. */ + unsigned nvects = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node); + gcc_assert (multiple_p (nunits * nvects, group_size)); + unsigned nivs; + unsigned HOST_WIDE_INT const_nunits; + if (nested_in_vect_loop) + nivs = nvects; + else if (nunits.is_constant (&const_nunits)) { - gimple_stmt_iterator incr_si; - bool insert_after; - standard_iv_increment_position (iv_loop, &incr_si, &insert_after); - - /* The initial values are vectorized, but any lanes > group_size - need adjustment. */ - slp_tree init_node - = SLP_TREE_CHILDREN (slp_node)[pe->dest_idx]; - - /* Gather steps. Since we do not vectorize inductions as - cycles we have to reconstruct the step from SCEV data. */ - unsigned group_size = SLP_TREE_LANES (slp_node); - tree *steps = XALLOCAVEC (tree, group_size); - tree *inits = XALLOCAVEC (tree, group_size); - stmt_vec_info phi_info; - FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (slp_node), i, phi_info) - { - steps[i] = STMT_VINFO_LOOP_PHI_EVOLUTION_PART (phi_info); - if (!init_node) - inits[i] = gimple_phi_arg_def (as_a<gphi *> (phi_info->stmt), - pe->dest_idx); - } - - /* Now generate the IVs. */ - unsigned nvects = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node); - gcc_assert (multiple_p (nunits * nvects, group_size)); - unsigned nivs; - unsigned HOST_WIDE_INT const_nunits; - if (nested_in_vect_loop) - nivs = nvects; - else if (nunits.is_constant (&const_nunits)) - { - /* Compute the number of distinct IVs we need. First reduce - group_size if it is a multiple of const_nunits so we get - one IV for a group_size of 4 but const_nunits 2. */ - unsigned group_sizep = group_size; - if (group_sizep % const_nunits == 0) - group_sizep = group_sizep / const_nunits; - nivs = least_common_multiple (group_sizep, - const_nunits) / const_nunits; - } - else - { - gcc_assert (SLP_TREE_LANES (slp_node) == 1); - nivs = 1; - } - gimple_seq init_stmts = NULL; - tree lupdate_mul = NULL_TREE; - if (!nested_in_vect_loop) + /* Compute the number of distinct IVs we need. First reduce + group_size if it is a multiple of const_nunits so we get + one IV for a group_size of 4 but const_nunits 2. */ + unsigned group_sizep = group_size; + if (group_sizep % const_nunits == 0) + group_sizep = group_sizep / const_nunits; + nivs = least_common_multiple (group_sizep, const_nunits) / const_nunits; + } + else + { + gcc_assert (SLP_TREE_LANES (slp_node) == 1); + nivs = 1; + } + gimple_seq init_stmts = NULL; + tree lupdate_mul = NULL_TREE; + if (!nested_in_vect_loop) + { + if (nunits.is_constant (&const_nunits)) { - if (nunits.is_constant (&const_nunits)) - { - /* The number of iterations covered in one vector iteration. */ - unsigned lup_mul = (nvects * const_nunits) / group_size; - lupdate_mul - = build_vector_from_val (step_vectype, - SCALAR_FLOAT_TYPE_P (stept) - ? build_real_from_wide (stept, lup_mul, - UNSIGNED) - : build_int_cstu (stept, lup_mul)); - } - else - { - if (SCALAR_FLOAT_TYPE_P (stept)) - { - tree tem = build_int_cst (integer_type_node, vf); - lupdate_mul = gimple_build (&init_stmts, FLOAT_EXPR, - stept, tem); - } - else - lupdate_mul = build_int_cst (stept, vf); - lupdate_mul = gimple_build_vector_from_val (&init_stmts, - step_vectype, - lupdate_mul); - } + /* The number of iterations covered in one vector iteration. */ + unsigned lup_mul = (nvects * const_nunits) / group_size; + lupdate_mul + = build_vector_from_val (step_vectype, + SCALAR_FLOAT_TYPE_P (stept) + ? build_real_from_wide (stept, lup_mul, + UNSIGNED) + : build_int_cstu (stept, lup_mul)); } - tree peel_mul = NULL_TREE; - if (LOOP_VINFO_MASK_SKIP_NITERS (loop_vinfo)) + else { if (SCALAR_FLOAT_TYPE_P (stept)) - peel_mul = gimple_build (&init_stmts, FLOAT_EXPR, stept, - LOOP_VINFO_MASK_SKIP_NITERS (loop_vinfo)); - else - peel_mul = gimple_convert (&init_stmts, stept, - LOOP_VINFO_MASK_SKIP_NITERS (loop_vinfo)); - peel_mul = gimple_build_vector_from_val (&init_stmts, - step_vectype, peel_mul); - - /* If early break then we have to create a new PHI which we can use as - an offset to adjust the induction reduction in early exits. - - This is because when peeling for alignment using masking, the first - few elements of the vector can be inactive. As such if we find the - entry in the first iteration we have adjust the starting point of - the scalar code. - - We do this by creating a new scalar PHI that keeps track of whether - we are the first iteration of the loop (with the additional masking) - or whether we have taken a loop iteration already. - - The generated sequence: - - pre-header: - bb1: - i_1 = <number of leading inactive elements> - - header: - bb2: - i_2 = PHI <i_1(bb1), 0(latch)> - … - - early-exit: - bb3: - i_3 = iv_step * i_2 + PHI<vector-iv> - - The first part of the adjustment to create i_1 and i_2 are done here - and the last part creating i_3 is done in - vectorizable_live_operations when the induction extraction is - materialized. */ - if (LOOP_VINFO_EARLY_BREAKS (loop_vinfo) - && !LOOP_VINFO_MASK_NITERS_PFA_OFFSET (loop_vinfo)) { - auto skip_niters = LOOP_VINFO_MASK_SKIP_NITERS (loop_vinfo); - tree ty_skip_niters = TREE_TYPE (skip_niters); - tree break_lhs_phi = vect_get_new_vect_var (ty_skip_niters, - vect_scalar_var, - "pfa_iv_offset"); - gphi *nphi = create_phi_node (break_lhs_phi, bb); - add_phi_arg (nphi, skip_niters, pe, UNKNOWN_LOCATION); - add_phi_arg (nphi, build_zero_cst (ty_skip_niters), - loop_latch_edge (iv_loop), UNKNOWN_LOCATION); - - LOOP_VINFO_MASK_NITERS_PFA_OFFSET (loop_vinfo) - = PHI_RESULT (nphi); + tree tem = build_int_cst (integer_type_node, vf); + lupdate_mul = gimple_build (&init_stmts, FLOAT_EXPR, stept, tem); } + else + lupdate_mul = build_int_cst (stept, vf); + lupdate_mul = gimple_build_vector_from_val (&init_stmts, step_vectype, + lupdate_mul); } - tree step_mul = NULL_TREE; - unsigned ivn; - auto_vec<tree> vec_steps; - for (ivn = 0; ivn < nivs; ++ivn) + } + tree peel_mul = NULL_TREE; + if (LOOP_VINFO_MASK_SKIP_NITERS (loop_vinfo)) + { + if (SCALAR_FLOAT_TYPE_P (stept)) + peel_mul = gimple_build (&init_stmts, FLOAT_EXPR, stept, + LOOP_VINFO_MASK_SKIP_NITERS (loop_vinfo)); + else + peel_mul = gimple_convert (&init_stmts, stept, + LOOP_VINFO_MASK_SKIP_NITERS (loop_vinfo)); + peel_mul = gimple_build_vector_from_val (&init_stmts, + step_vectype, peel_mul); + + /* If early break then we have to create a new PHI which we can use as + an offset to adjust the induction reduction in early exits. + + This is because when peeling for alignment using masking, the first + few elements of the vector can be inactive. As such if we find the + entry in the first iteration we have adjust the starting point of + the scalar code. + + We do this by creating a new scalar PHI that keeps track of whether + we are the first iteration of the loop (with the additional masking) + or whether we have taken a loop iteration already. + + The generated sequence: + + pre-header: + bb1: + i_1 = <number of leading inactive elements> + + header: + bb2: + i_2 = PHI <i_1(bb1), 0(latch)> + … + + early-exit: + bb3: + i_3 = iv_step * i_2 + PHI<vector-iv> + + The first part of the adjustment to create i_1 and i_2 are done here + and the last part creating i_3 is done in + vectorizable_live_operations when the induction extraction is + materialized. */ + if (LOOP_VINFO_EARLY_BREAKS (loop_vinfo) + && !LOOP_VINFO_MASK_NITERS_PFA_OFFSET (loop_vinfo)) + { + auto skip_niters = LOOP_VINFO_MASK_SKIP_NITERS (loop_vinfo); + tree ty_skip_niters = TREE_TYPE (skip_niters); + tree break_lhs_phi = vect_get_new_vect_var (ty_skip_niters, + vect_scalar_var, + "pfa_iv_offset"); + gphi *nphi = create_phi_node (break_lhs_phi, bb); + add_phi_arg (nphi, skip_niters, pe, UNKNOWN_LOCATION); + add_phi_arg (nphi, build_zero_cst (ty_skip_niters), + loop_latch_edge (iv_loop), UNKNOWN_LOCATION); + + LOOP_VINFO_MASK_NITERS_PFA_OFFSET (loop_vinfo) = PHI_RESULT (nphi); + } + } + tree step_mul = NULL_TREE; + unsigned ivn; + auto_vec<tree> vec_steps; + for (ivn = 0; ivn < nivs; ++ivn) + { + gimple_seq stmts = NULL; + bool invariant = true; + if (nunits.is_constant (&const_nunits)) { - gimple_seq stmts = NULL; - bool invariant = true; - if (nunits.is_constant (&const_nunits)) + tree_vector_builder step_elts (step_vectype, const_nunits, 1); + tree_vector_builder init_elts (vectype, const_nunits, 1); + tree_vector_builder mul_elts (step_vectype, const_nunits, 1); + for (unsigned eltn = 0; eltn < const_nunits; ++eltn) { - tree_vector_builder step_elts (step_vectype, const_nunits, 1); - tree_vector_builder init_elts (vectype, const_nunits, 1); - tree_vector_builder mul_elts (step_vectype, const_nunits, 1); - for (unsigned eltn = 0; eltn < const_nunits; ++eltn) - { - /* The scalar steps of the IVs. */ - tree elt = steps[(ivn*const_nunits + eltn) % group_size]; - elt = gimple_convert (&init_stmts, - TREE_TYPE (step_vectype), elt); - step_elts.quick_push (elt); - if (!init_node) - { - /* The scalar inits of the IVs if not vectorized. */ - elt = inits[(ivn*const_nunits + eltn) % group_size]; - if (!useless_type_conversion_p (TREE_TYPE (vectype), - TREE_TYPE (elt))) - elt = gimple_build (&init_stmts, VIEW_CONVERT_EXPR, - TREE_TYPE (vectype), elt); - init_elts.quick_push (elt); - } - /* The number of steps to add to the initial values. */ - unsigned mul_elt = (ivn*const_nunits + eltn) / group_size; - mul_elts.quick_push (SCALAR_FLOAT_TYPE_P (stept) - ? build_real_from_wide (stept, mul_elt, - UNSIGNED) - : build_int_cstu (stept, mul_elt)); - } - vec_step = gimple_build_vector (&init_stmts, &step_elts); - step_mul = gimple_build_vector (&init_stmts, &mul_elts); + /* The scalar steps of the IVs. */ + tree elt = steps[(ivn*const_nunits + eltn) % group_size]; + elt = gimple_convert (&init_stmts, TREE_TYPE (step_vectype), elt); + step_elts.quick_push (elt); if (!init_node) - vec_init = gimple_build_vector (&init_stmts, &init_elts); - } - else - { - if (init_node) - ; - else if (INTEGRAL_TYPE_P (TREE_TYPE (steps[0]))) - { - new_name = gimple_convert (&init_stmts, stept, inits[0]); - /* Build the initial value directly as a VEC_SERIES_EXPR. */ - vec_init = gimple_build (&init_stmts, VEC_SERIES_EXPR, - step_vectype, new_name, steps[0]); - if (!useless_type_conversion_p (vectype, step_vectype)) - vec_init = gimple_build (&init_stmts, VIEW_CONVERT_EXPR, - vectype, vec_init); - } - else { - /* Build: - [base, base, base, ...] - + (vectype) [0, 1, 2, ...] * [step, step, step, ...]. */ - gcc_assert (SCALAR_FLOAT_TYPE_P (TREE_TYPE (steps[0]))); - gcc_assert (flag_associative_math); - gcc_assert (index_vectype != NULL_TREE); - - tree index = build_index_vector (index_vectype, 0, 1); - new_name = gimple_convert (&init_stmts, TREE_TYPE (steps[0]), - inits[0]); - tree base_vec = gimple_build_vector_from_val (&init_stmts, - step_vectype, - new_name); - tree step_vec = gimple_build_vector_from_val (&init_stmts, - step_vectype, - steps[0]); - vec_init = gimple_build (&init_stmts, FLOAT_EXPR, - step_vectype, index); - vec_init = gimple_build (&init_stmts, MULT_EXPR, - step_vectype, vec_init, step_vec); - vec_init = gimple_build (&init_stmts, PLUS_EXPR, - step_vectype, vec_init, base_vec); - if (!useless_type_conversion_p (vectype, step_vectype)) - vec_init = gimple_build (&init_stmts, VIEW_CONVERT_EXPR, - vectype, vec_init); + /* The scalar inits of the IVs if not vectorized. */ + elt = inits[(ivn*const_nunits + eltn) % group_size]; + if (!useless_type_conversion_p (TREE_TYPE (vectype), + TREE_TYPE (elt))) + elt = gimple_build (&init_stmts, VIEW_CONVERT_EXPR, + TREE_TYPE (vectype), elt); + init_elts.quick_push (elt); } - /* iv_loop is nested in the loop to be vectorized. Generate: - vec_step = [S, S, S, S] */ - t = unshare_expr (steps[0]); - gcc_assert (CONSTANT_CLASS_P (t) - || TREE_CODE (t) == SSA_NAME); - vec_step = gimple_build_vector_from_val (&init_stmts, - step_vectype, t); - } - vec_steps.safe_push (vec_step); - if (peel_mul) - { - if (!step_mul) - step_mul = peel_mul; - else - step_mul = gimple_build (&init_stmts, - MINUS_EXPR, step_vectype, - step_mul, peel_mul); - } - - /* Create the induction-phi that defines the induction-operand. */ - vec_dest = vect_get_new_vect_var (vectype, vect_simple_var, - "vec_iv_"); - induction_phi = create_phi_node (vec_dest, iv_loop->header); - induc_def = PHI_RESULT (induction_phi); - - /* Create the iv update inside the loop */ - tree up = vec_step; - if (lupdate_mul) - { - if (LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo)) - { - /* When we're using loop_len produced by SELEC_VL, the - non-final iterations are not always processing VF - elements. So vectorize induction variable instead of - - _21 = vect_vec_iv_.6_22 + { VF, ... }; - - We should generate: - - _35 = .SELECT_VL (ivtmp_33, VF); - vect_cst__22 = [vec_duplicate_expr] _35; - _21 = vect_vec_iv_.6_22 + vect_cst__22; */ - vec_loop_lens *lens = &LOOP_VINFO_LENS (loop_vinfo); - tree len = vect_get_loop_len (loop_vinfo, NULL, lens, 1, - vectype, 0, 0); - if (SCALAR_FLOAT_TYPE_P (stept)) - expr = gimple_build (&stmts, FLOAT_EXPR, stept, len); - else - expr = gimple_convert (&stmts, stept, len); - lupdate_mul = gimple_build_vector_from_val (&stmts, - step_vectype, - expr); - up = gimple_build (&stmts, MULT_EXPR, - step_vectype, vec_step, lupdate_mul); - } - else - up = gimple_build (&init_stmts, - MULT_EXPR, step_vectype, - vec_step, lupdate_mul); - } - vec_def = gimple_convert (&stmts, step_vectype, induc_def); - vec_def = gimple_build (&stmts, - PLUS_EXPR, step_vectype, vec_def, up); - vec_def = gimple_convert (&stmts, vectype, vec_def); - insert_iv_increment (&incr_si, insert_after, stmts); - add_phi_arg (induction_phi, vec_def, loop_latch_edge (iv_loop), - UNKNOWN_LOCATION); - - if (init_node) - vec_init = vect_get_slp_vect_def (init_node, ivn); - if (!nested_in_vect_loop - && step_mul - && !integer_zerop (step_mul)) - { - gcc_assert (invariant); - vec_def = gimple_convert (&init_stmts, step_vectype, vec_init); - up = gimple_build (&init_stmts, MULT_EXPR, step_vectype, - vec_step, step_mul); - vec_def = gimple_build (&init_stmts, PLUS_EXPR, step_vectype, - vec_def, up); - vec_init = gimple_convert (&init_stmts, vectype, vec_def); - } - - /* Set the arguments of the phi node: */ - add_phi_arg (induction_phi, vec_init, pe, UNKNOWN_LOCATION); - - slp_node->push_vec_def (induction_phi); - } - if (!nested_in_vect_loop) - { - /* Fill up to the number of vectors we need for the whole group. */ - if (nunits.is_constant (&const_nunits)) - nivs = least_common_multiple (group_size, - const_nunits) / const_nunits; - else - nivs = 1; - vec_steps.reserve (nivs-ivn); - for (; ivn < nivs; ++ivn) - { - slp_node->push_vec_def (SLP_TREE_VEC_DEFS (slp_node)[0]); - vec_steps.quick_push (vec_steps[0]); + /* The number of steps to add to the initial values. */ + unsigned mul_elt = (ivn*const_nunits + eltn) / group_size; + mul_elts.quick_push (SCALAR_FLOAT_TYPE_P (stept) + ? build_real_from_wide (stept, mul_elt, + UNSIGNED) + : build_int_cstu (stept, mul_elt)); } + vec_step = gimple_build_vector (&init_stmts, &step_elts); + step_mul = gimple_build_vector (&init_stmts, &mul_elts); + if (!init_node) + vec_init = gimple_build_vector (&init_stmts, &init_elts); } - - /* Re-use IVs when we can. We are generating further vector - stmts by adding VF' * stride to the IVs generated above. */ - if (ivn < nvects) + else { - if (nunits.is_constant (&const_nunits)) + if (init_node) + ; + else if (INTEGRAL_TYPE_P (TREE_TYPE (steps[0]))) { - unsigned vfp = (least_common_multiple (group_size, const_nunits) - / group_size); - lupdate_mul - = build_vector_from_val (step_vectype, - SCALAR_FLOAT_TYPE_P (stept) - ? build_real_from_wide (stept, - vfp, UNSIGNED) - : build_int_cstu (stept, vfp)); + new_name = gimple_convert (&init_stmts, stept, inits[0]); + /* Build the initial value directly as a VEC_SERIES_EXPR. */ + vec_init = gimple_build (&init_stmts, VEC_SERIES_EXPR, + step_vectype, new_name, steps[0]); + if (!useless_type_conversion_p (vectype, step_vectype)) + vec_init = gimple_build (&init_stmts, VIEW_CONVERT_EXPR, + vectype, vec_init); } else { - if (SCALAR_FLOAT_TYPE_P (stept)) - { - tree tem = build_int_cst (integer_type_node, nunits); - lupdate_mul = gimple_build (&init_stmts, FLOAT_EXPR, - stept, tem); - } - else - lupdate_mul = build_int_cst (stept, nunits); - lupdate_mul = gimple_build_vector_from_val (&init_stmts, - step_vectype, - lupdate_mul); - } - for (; ivn < nvects; ++ivn) - { - gimple *iv - = SSA_NAME_DEF_STMT (SLP_TREE_VEC_DEFS (slp_node)[ivn - nivs]); - tree def = gimple_get_lhs (iv); - if (ivn < 2*nivs) - vec_steps[ivn - nivs] - = gimple_build (&init_stmts, MULT_EXPR, step_vectype, - vec_steps[ivn - nivs], lupdate_mul); - gimple_seq stmts = NULL; - def = gimple_convert (&stmts, step_vectype, def); - def = gimple_build (&stmts, PLUS_EXPR, step_vectype, - def, vec_steps[ivn % nivs]); - def = gimple_convert (&stmts, vectype, def); - if (gimple_code (iv) == GIMPLE_PHI) - gsi_insert_seq_before (&si, stmts, GSI_SAME_STMT); - else - { - gimple_stmt_iterator tgsi = gsi_for_stmt (iv); - gsi_insert_seq_after (&tgsi, stmts, GSI_CONTINUE_LINKING); - } - slp_node->push_vec_def (def); + /* Build: + [base, base, base, ...] + + (vectype) [0, 1, 2, ...] * [step, step, step, ...]. */ + gcc_assert (SCALAR_FLOAT_TYPE_P (TREE_TYPE (steps[0]))); + gcc_assert (flag_associative_math); + gcc_assert (index_vectype != NULL_TREE); + + tree index = build_index_vector (index_vectype, 0, 1); + new_name = gimple_convert (&init_stmts, TREE_TYPE (steps[0]), + inits[0]); + tree base_vec = gimple_build_vector_from_val (&init_stmts, + step_vectype, + new_name); + tree step_vec = gimple_build_vector_from_val (&init_stmts, + step_vectype, + steps[0]); + vec_init = gimple_build (&init_stmts, FLOAT_EXPR, + step_vectype, index); + vec_init = gimple_build (&init_stmts, MULT_EXPR, + step_vectype, vec_init, step_vec); + vec_init = gimple_build (&init_stmts, PLUS_EXPR, + step_vectype, vec_init, base_vec); + if (!useless_type_conversion_p (vectype, step_vectype)) + vec_init = gimple_build (&init_stmts, VIEW_CONVERT_EXPR, + vectype, vec_init); } + /* iv_loop is nested in the loop to be vectorized. Generate: + vec_step = [S, S, S, S] */ + t = unshare_expr (steps[0]); + gcc_assert (CONSTANT_CLASS_P (t) + || TREE_CODE (t) == SSA_NAME); + vec_step = gimple_build_vector_from_val (&init_stmts, + step_vectype, t); + } + vec_steps.safe_push (vec_step); + if (peel_mul) + { + if (!step_mul) + step_mul = peel_mul; + else + step_mul = gimple_build (&init_stmts, + MINUS_EXPR, step_vectype, + step_mul, peel_mul); } - new_bb = gsi_insert_seq_on_edge_immediate (pe, init_stmts); - gcc_assert (!new_bb); + /* Create the induction-phi that defines the induction-operand. */ + vec_dest = vect_get_new_vect_var (vectype, vect_simple_var, + "vec_iv_"); + induction_phi = create_phi_node (vec_dest, iv_loop->header); + induc_def = PHI_RESULT (induction_phi); - return true; - } - - tree init_expr = vect_phi_initial_value (phi); - - gimple_seq stmts = NULL; - if (!nested_in_vect_loop) - { - /* Convert the initial value to the IV update type. */ - tree new_type = TREE_TYPE (step_expr); - init_expr = gimple_convert (&stmts, new_type, init_expr); - - /* If we are using the loop mask to "peel" for alignment then we need - to adjust the start value here. */ - tree skip_niters = LOOP_VINFO_MASK_SKIP_NITERS (loop_vinfo); - if (skip_niters != NULL_TREE) + /* Create the iv update inside the loop */ + tree up = vec_step; + if (lupdate_mul) { - if (FLOAT_TYPE_P (vectype)) - skip_niters = gimple_build (&stmts, FLOAT_EXPR, new_type, - skip_niters); - else - skip_niters = gimple_convert (&stmts, new_type, skip_niters); - tree skip_step = gimple_build (&stmts, MULT_EXPR, new_type, - skip_niters, step_expr); - init_expr = gimple_build (&stmts, MINUS_EXPR, new_type, - init_expr, skip_step); - } - } + if (LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo)) + { + /* When we're using loop_len produced by SELEC_VL, the + non-final iterations are not always processing VF + elements. So vectorize induction variable instead of - if (stmts) - { - new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts); - gcc_assert (!new_bb); - } + _21 = vect_vec_iv_.6_22 + { VF, ... }; - /* Create the vector that holds the initial_value of the induction. */ - if (nested_in_vect_loop) - { - /* iv_loop is nested in the loop to be vectorized. init_expr had already - been created during vectorization of previous stmts. We obtain it - from the STMT_VINFO_VEC_STMT of the defining stmt. */ - auto_vec<tree> vec_inits; - vect_get_vec_defs_for_operand (loop_vinfo, stmt_info, 1, - init_expr, &vec_inits); - vec_init = vec_inits[0]; - /* If the initial value is not of proper type, convert it. */ - if (!useless_type_conversion_p (vectype, TREE_TYPE (vec_init))) - { - new_stmt - = gimple_build_assign (vect_get_new_ssa_name (vectype, - vect_simple_var, - "vec_iv_"), - VIEW_CONVERT_EXPR, - build1 (VIEW_CONVERT_EXPR, vectype, - vec_init)); - vec_init = gimple_assign_lhs (new_stmt); - new_bb = gsi_insert_on_edge_immediate (loop_preheader_edge (iv_loop), - new_stmt); - gcc_assert (!new_bb); - } - } - else - { - /* iv_loop is the loop to be vectorized. Create: - vec_init = [X, X+S, X+2*S, X+3*S] (S = step_expr, X = init_expr) */ - stmts = NULL; - new_name = gimple_convert (&stmts, TREE_TYPE (step_expr), init_expr); + We should generate: - unsigned HOST_WIDE_INT const_nunits; - if (nunits.is_constant (&const_nunits)) - { - tree_vector_builder elts (step_vectype, const_nunits, 1); - elts.quick_push (new_name); - for (i = 1; i < const_nunits; i++) - { - /* Create: new_name_i = new_name + step_expr */ - new_name = gimple_build (&stmts, PLUS_EXPR, TREE_TYPE (new_name), - new_name, step_expr); - elts.quick_push (new_name); + _35 = .SELECT_VL (ivtmp_33, VF); + vect_cst__22 = [vec_duplicate_expr] _35; + _21 = vect_vec_iv_.6_22 + vect_cst__22; */ + vec_loop_lens *lens = &LOOP_VINFO_LENS (loop_vinfo); + tree len = vect_get_loop_len (loop_vinfo, NULL, lens, 1, + vectype, 0, 0); + if (SCALAR_FLOAT_TYPE_P (stept)) + expr = gimple_build (&stmts, FLOAT_EXPR, stept, len); + else + expr = gimple_convert (&stmts, stept, len); + lupdate_mul = gimple_build_vector_from_val (&stmts, step_vectype, + expr); + up = gimple_build (&stmts, MULT_EXPR, + step_vectype, vec_step, lupdate_mul); } - /* Create a vector from [new_name_0, new_name_1, ..., - new_name_nunits-1] */ - vec_init = gimple_build_vector (&stmts, &elts); - } - else if (INTEGRAL_TYPE_P (TREE_TYPE (step_expr))) - /* Build the initial value directly from a VEC_SERIES_EXPR. */ - vec_init = gimple_build (&stmts, VEC_SERIES_EXPR, step_vectype, - new_name, step_expr); - else - { - /* Build: - [base, base, base, ...] - + (vectype) [0, 1, 2, ...] * [step, step, step, ...]. */ - gcc_assert (SCALAR_FLOAT_TYPE_P (TREE_TYPE (step_expr))); - gcc_assert (flag_associative_math); - gcc_assert (index_vectype != NULL_TREE); - - tree index = build_index_vector (index_vectype, 0, 1); - tree base_vec = gimple_build_vector_from_val (&stmts, step_vectype, - new_name); - tree step_vec = gimple_build_vector_from_val (&stmts, step_vectype, - step_expr); - vec_init = gimple_build (&stmts, FLOAT_EXPR, step_vectype, index); - vec_init = gimple_build (&stmts, MULT_EXPR, step_vectype, - vec_init, step_vec); - vec_init = gimple_build (&stmts, PLUS_EXPR, step_vectype, - vec_init, base_vec); - } - vec_init = gimple_convert (&stmts, vectype, vec_init); + else + up = gimple_build (&init_stmts, MULT_EXPR, step_vectype, + vec_step, lupdate_mul); + } + vec_def = gimple_convert (&stmts, step_vectype, induc_def); + vec_def = gimple_build (&stmts, PLUS_EXPR, step_vectype, vec_def, up); + vec_def = gimple_convert (&stmts, vectype, vec_def); + insert_iv_increment (&incr_si, insert_after, stmts); + add_phi_arg (induction_phi, vec_def, loop_latch_edge (iv_loop), + UNKNOWN_LOCATION); - if (stmts) + if (init_node) + vec_init = vect_get_slp_vect_def (init_node, ivn); + if (!nested_in_vect_loop + && step_mul + && !integer_zerop (step_mul)) { - new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts); - gcc_assert (!new_bb); + gcc_assert (invariant); + vec_def = gimple_convert (&init_stmts, step_vectype, vec_init); + up = gimple_build (&init_stmts, MULT_EXPR, step_vectype, + vec_step, step_mul); + vec_def = gimple_build (&init_stmts, PLUS_EXPR, step_vectype, + vec_def, up); + vec_init = gimple_convert (&init_stmts, vectype, vec_def); } - } - - - /* Create the vector that holds the step of the induction. */ - gimple_stmt_iterator *step_iv_si = NULL; - if (nested_in_vect_loop) - /* iv_loop is nested in the loop to be vectorized. Generate: - vec_step = [S, S, S, S] */ - new_name = step_expr; - else if (LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo)) - { - /* When we're using loop_len produced by SELEC_VL, the non-final - iterations are not always processing VF elements. So vectorize - induction variable instead of - _21 = vect_vec_iv_.6_22 + { VF, ... }; + /* Set the arguments of the phi node: */ + add_phi_arg (induction_phi, vec_init, pe, UNKNOWN_LOCATION); - We should generate: - - _35 = .SELECT_VL (ivtmp_33, VF); - vect_cst__22 = [vec_duplicate_expr] _35; - _21 = vect_vec_iv_.6_22 + vect_cst__22; */ - gcc_assert (!slp_node); - gimple_seq seq = NULL; - vec_loop_lens *lens = &LOOP_VINFO_LENS (loop_vinfo); - tree len = vect_get_loop_len (loop_vinfo, NULL, lens, 1, vectype, 0, 0); - expr = force_gimple_operand (fold_convert (TREE_TYPE (step_expr), - unshare_expr (len)), - &seq, true, NULL_TREE); - new_name = gimple_build (&seq, MULT_EXPR, TREE_TYPE (step_expr), expr, - step_expr); - gsi_insert_seq_before (&si, seq, GSI_SAME_STMT); - step_iv_si = &si; + slp_node->push_vec_def (induction_phi); } - else + if (!nested_in_vect_loop) { - /* iv_loop is the loop to be vectorized. Generate: - vec_step = [VF*S, VF*S, VF*S, VF*S] */ - gimple_seq seq = NULL; - if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (step_expr))) - { - expr = build_int_cst (integer_type_node, vf); - expr = gimple_build (&seq, FLOAT_EXPR, TREE_TYPE (step_expr), expr); - } + /* Fill up to the number of vectors we need for the whole group. */ + if (nunits.is_constant (&const_nunits)) + nivs = least_common_multiple (group_size, const_nunits) / const_nunits; else - expr = build_int_cst (TREE_TYPE (step_expr), vf); - new_name = gimple_build (&seq, MULT_EXPR, TREE_TYPE (step_expr), - expr, step_expr); - if (seq) + nivs = 1; + vec_steps.reserve (nivs-ivn); + for (; ivn < nivs; ++ivn) { - new_bb = gsi_insert_seq_on_edge_immediate (pe, seq); - gcc_assert (!new_bb); + slp_node->push_vec_def (SLP_TREE_VEC_DEFS (slp_node)[0]); + vec_steps.quick_push (vec_steps[0]); } } - t = unshare_expr (new_name); - gcc_assert (CONSTANT_CLASS_P (new_name) - || TREE_CODE (new_name) == SSA_NAME); - new_vec = build_vector_from_val (step_vectype, t); - vec_step = vect_init_vector (loop_vinfo, stmt_info, - new_vec, step_vectype, step_iv_si); - - - /* Create the following def-use cycle: - loop prolog: - vec_init = ... - vec_step = ... - loop: - vec_iv = PHI <vec_init, vec_loop> - ... - STMT - ... - vec_loop = vec_iv + vec_step; */ - - /* Create the induction-phi that defines the induction-operand. */ - vec_dest = vect_get_new_vect_var (vectype, vect_simple_var, "vec_iv_"); - induction_phi = create_phi_node (vec_dest, iv_loop->header); - induc_def = PHI_RESULT (induction_phi); - - /* Create the iv update inside the loop */ - stmts = NULL; - vec_def = gimple_convert (&stmts, step_vectype, induc_def); - vec_def = gimple_build (&stmts, PLUS_EXPR, step_vectype, vec_def, vec_step); - vec_def = gimple_convert (&stmts, vectype, vec_def); - gsi_insert_seq_before (&si, stmts, GSI_SAME_STMT); - new_stmt = SSA_NAME_DEF_STMT (vec_def); - - /* Set the arguments of the phi node: */ - add_phi_arg (induction_phi, vec_init, pe, UNKNOWN_LOCATION); - add_phi_arg (induction_phi, vec_def, loop_latch_edge (iv_loop), - UNKNOWN_LOCATION); - - STMT_VINFO_VEC_STMTS (stmt_info).safe_push (induction_phi); - *vec_stmt = induction_phi; - - /* In case that vectorization factor (VF) is bigger than the number - of elements that we can fit in a vectype (nunits), we have to generate - more than one vector stmt - i.e - we need to "unroll" the - vector stmt by a factor VF/nunits. For more details see documentation - in vectorizable_operation. */ - - if (ncopies > 1) + /* Re-use IVs when we can. We are generating further vector + stmts by adding VF' * stride to the IVs generated above. */ + if (ivn < nvects) { - gimple_seq seq = NULL; - /* FORNOW. This restriction should be relaxed. */ - gcc_assert (!nested_in_vect_loop); - /* We expect LOOP_VINFO_USING_SELECT_VL_P to be false if ncopies > 1. */ - gcc_assert (!LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo)); - - /* Create the vector that holds the step of the induction. */ - if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (step_expr))) + if (nunits.is_constant (&const_nunits)) { - expr = build_int_cst (integer_type_node, nunits); - expr = gimple_build (&seq, FLOAT_EXPR, TREE_TYPE (step_expr), expr); + unsigned vfp = (least_common_multiple (group_size, const_nunits) + / group_size); + lupdate_mul + = build_vector_from_val (step_vectype, + SCALAR_FLOAT_TYPE_P (stept) + ? build_real_from_wide (stept, + vfp, UNSIGNED) + : build_int_cstu (stept, vfp)); } else - expr = build_int_cst (TREE_TYPE (step_expr), nunits); - new_name = gimple_build (&seq, MULT_EXPR, TREE_TYPE (step_expr), - expr, step_expr); - if (seq) { - new_bb = gsi_insert_seq_on_edge_immediate (pe, seq); - gcc_assert (!new_bb); - } - - t = unshare_expr (new_name); - gcc_assert (CONSTANT_CLASS_P (new_name) - || TREE_CODE (new_name) == SSA_NAME); - new_vec = build_vector_from_val (step_vectype, t); - vec_step = vect_init_vector (loop_vinfo, stmt_info, - new_vec, step_vectype, NULL); - - vec_def = induc_def; - for (i = 1; i < ncopies + 1; i++) - { - /* vec_i = vec_prev + vec_step */ - gimple_seq stmts = NULL; - vec_def = gimple_convert (&stmts, step_vectype, vec_def); - vec_def = gimple_build (&stmts, - PLUS_EXPR, step_vectype, vec_def, vec_step); - vec_def = gimple_convert (&stmts, vectype, vec_def); - - gsi_insert_seq_before (&si, stmts, GSI_SAME_STMT); - if (i < ncopies) + if (SCALAR_FLOAT_TYPE_P (stept)) { - new_stmt = SSA_NAME_DEF_STMT (vec_def); - STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt); + tree tem = build_int_cst (integer_type_node, nunits); + lupdate_mul = gimple_build (&init_stmts, FLOAT_EXPR, stept, tem); } else + lupdate_mul = build_int_cst (stept, nunits); + lupdate_mul = gimple_build_vector_from_val (&init_stmts, step_vectype, + lupdate_mul); + } + for (; ivn < nvects; ++ivn) + { + gimple *iv + = SSA_NAME_DEF_STMT (SLP_TREE_VEC_DEFS (slp_node)[ivn - nivs]); + tree def = gimple_get_lhs (iv); + if (ivn < 2*nivs) + vec_steps[ivn - nivs] + = gimple_build (&init_stmts, MULT_EXPR, step_vectype, + vec_steps[ivn - nivs], lupdate_mul); + gimple_seq stmts = NULL; + def = gimple_convert (&stmts, step_vectype, def); + def = gimple_build (&stmts, PLUS_EXPR, step_vectype, + def, vec_steps[ivn % nivs]); + def = gimple_convert (&stmts, vectype, def); + if (gimple_code (iv) == GIMPLE_PHI) + gsi_insert_seq_before (&si, stmts, GSI_SAME_STMT); + else { - /* vec_1 = vec_iv + (VF/n * S) - vec_2 = vec_1 + (VF/n * S) - ... - vec_n = vec_prev + (VF/n * S) = vec_iv + VF * S = vec_loop - - vec_n is used as vec_loop to save the large step register and - related operations. */ - add_phi_arg (induction_phi, vec_def, loop_latch_edge (iv_loop), - UNKNOWN_LOCATION); + gimple_stmt_iterator tgsi = gsi_for_stmt (iv); + gsi_insert_seq_after (&tgsi, stmts, GSI_CONTINUE_LINKING); } + slp_node->push_vec_def (def); } } - if (dump_enabled_p ()) - dump_printf_loc (MSG_NOTE, vect_location, - "transform induction: created def-use cycle: %G%G", - (gimple *) induction_phi, SSA_NAME_DEF_STMT (vec_def)); + new_bb = gsi_insert_seq_on_edge_immediate (pe, init_stmts); + gcc_assert (!new_bb); return true; } diff --git a/gcc/tree-vect-patterns.cc b/gcc/tree-vect-patterns.cc index ca19add..d848476 100644 --- a/gcc/tree-vect-patterns.cc +++ b/gcc/tree-vect-patterns.cc @@ -1098,6 +1098,7 @@ vect_recog_cond_expr_convert_pattern (vec_info *vinfo, tree lhs, match[4], temp, type, new_lhs, op2; gimple *cond_stmt; gimple *pattern_stmt; + enum tree_code code = NOP_EXPR; if (!last_stmt) return NULL; @@ -1111,6 +1112,11 @@ vect_recog_cond_expr_convert_pattern (vec_info *vinfo, vect_pattern_detected ("vect_recog_cond_expr_convert_pattern", last_stmt); + if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (lhs))) + code = INTEGRAL_TYPE_P (TREE_TYPE (match[1])) ? FLOAT_EXPR : CONVERT_EXPR; + else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (match[1]))) + code = FIX_TRUNC_EXPR; + op2 = match[2]; type = TREE_TYPE (match[1]); if (TYPE_SIGN (type) != TYPE_SIGN (TREE_TYPE (match[2]))) @@ -1127,7 +1133,7 @@ vect_recog_cond_expr_convert_pattern (vec_info *vinfo, append_pattern_def_seq (vinfo, stmt_vinfo, cond_stmt, get_vectype_for_scalar_type (vinfo, type)); new_lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL); - pattern_stmt = gimple_build_assign (new_lhs, NOP_EXPR, temp); + pattern_stmt = gimple_build_assign (new_lhs, code, temp); *type_out = STMT_VINFO_VECTYPE (stmt_vinfo); if (dump_enabled_p ()) diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc index 857517f..fb2262a 100644 --- a/gcc/tree-vect-slp.cc +++ b/gcc/tree-vect-slp.cc @@ -8036,7 +8036,7 @@ vect_prologue_cost_for_slp (slp_tree node, we are costing so avoid passing it down more than once. Pass it to the first vec_construct or scalar_to_vec part since for those the x86 backend tries to account for GPR to XMM register moves. */ - record_stmt_cost (cost_vec, 1, kind, + record_stmt_cost (cost_vec, 1, kind, nullptr, (kind != vector_load && !passed) ? node : nullptr, vectype, 0, vect_prologue); if (kind != vector_load) diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc index bd390b2..6695854 100644 --- a/gcc/tree-vect-stmts.cc +++ b/gcc/tree-vect-stmts.cc @@ -130,7 +130,8 @@ record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count, tree vectype, int misalign, enum vect_cost_model_location where) { - return record_stmt_cost (body_cost_vec, count, kind, NULL, node, + return record_stmt_cost (body_cost_vec, count, kind, + SLP_TREE_REPRESENTATIVE (node), node, vectype, misalign, where); } @@ -905,11 +906,8 @@ vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo, bool *fatal) be generated for the single vector op. We will handle that shortly. */ static void -vect_model_simple_cost (vec_info *, - stmt_vec_info stmt_info, int ncopies, - enum vect_def_type *dt, - int ndts, - slp_tree node, +vect_model_simple_cost (vec_info *, int ncopies, enum vect_def_type *dt, + int ndts, slp_tree node, stmt_vector_for_cost *cost_vec, vect_cost_for_stmt kind = vector_stmt) { @@ -928,11 +926,11 @@ vect_model_simple_cost (vec_info *, for (int i = 0; i < ndts; i++) if (dt[i] == vect_constant_def || dt[i] == vect_external_def) prologue_cost += record_stmt_cost (cost_vec, 1, scalar_to_vec, - stmt_info, 0, vect_prologue); + node, 0, vect_prologue); /* Pass the inside-of-loop statements to the target-specific cost model. */ inside_cost += record_stmt_cost (cost_vec, ncopies, kind, - stmt_info, 0, vect_body); + node, 0, vect_body); if (dump_enabled_p ()) dump_printf_loc (MSG_NOTE, vect_location, @@ -1019,13 +1017,15 @@ vect_get_store_cost (vec_info *, stmt_vec_info stmt_info, slp_tree slp_node, unsigned int *inside_cost, stmt_vector_for_cost *body_cost_vec) { + tree vectype + = slp_node ? SLP_TREE_VECTYPE (slp_node) : STMT_VINFO_VECTYPE (stmt_info); switch (alignment_support_scheme) { case dr_aligned: { *inside_cost += record_stmt_cost (body_cost_vec, ncopies, - vector_store, stmt_info, slp_node, 0, - vect_body); + vector_store, stmt_info, slp_node, + vectype, 0, vect_body); if (dump_enabled_p ()) dump_printf_loc (MSG_NOTE, vect_location, @@ -1038,7 +1038,7 @@ vect_get_store_cost (vec_info *, stmt_vec_info stmt_info, slp_tree slp_node, /* Here, we assign an additional cost for the unaligned store. */ *inside_cost += record_stmt_cost (body_cost_vec, ncopies, unaligned_store, stmt_info, slp_node, - misalignment, vect_body); + vectype, misalignment, vect_body); if (dump_enabled_p ()) dump_printf_loc (MSG_NOTE, vect_location, "vect_model_store_cost: unaligned supported by " @@ -1072,12 +1072,15 @@ vect_get_load_cost (vec_info *, stmt_vec_info stmt_info, slp_tree slp_node, stmt_vector_for_cost *body_cost_vec, bool record_prologue_costs) { + tree vectype + = slp_node ? SLP_TREE_VECTYPE (slp_node) : STMT_VINFO_VECTYPE (stmt_info); switch (alignment_support_scheme) { case dr_aligned: { *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load, - stmt_info, slp_node, 0, vect_body); + stmt_info, slp_node, vectype, + 0, vect_body); if (dump_enabled_p ()) dump_printf_loc (MSG_NOTE, vect_location, @@ -1090,7 +1093,7 @@ vect_get_load_cost (vec_info *, stmt_vec_info stmt_info, slp_tree slp_node, /* Here, we assign an additional cost for the unaligned load. */ *inside_cost += record_stmt_cost (body_cost_vec, ncopies, unaligned_load, stmt_info, slp_node, - misalignment, vect_body); + vectype, misalignment, vect_body); if (dump_enabled_p ()) dump_printf_loc (MSG_NOTE, vect_location, @@ -1102,18 +1105,19 @@ vect_get_load_cost (vec_info *, stmt_vec_info stmt_info, slp_tree slp_node, case dr_explicit_realign: { *inside_cost += record_stmt_cost (body_cost_vec, ncopies * 2, - vector_load, stmt_info, slp_node, 0, - vect_body); + vector_load, stmt_info, slp_node, + vectype, 0, vect_body); *inside_cost += record_stmt_cost (body_cost_vec, ncopies, - vec_perm, stmt_info, slp_node, 0, - vect_body); + vec_perm, stmt_info, slp_node, + vectype, 0, vect_body); /* FIXME: If the misalignment remains fixed across the iterations of the containing loop, the following cost should be added to the prologue costs. */ if (targetm.vectorize.builtin_mask_for_load) *inside_cost += record_stmt_cost (body_cost_vec, 1, vector_stmt, - stmt_info, slp_node, 0, vect_body); + stmt_info, slp_node, vectype, + 0, vect_body); if (dump_enabled_p ()) dump_printf_loc (MSG_NOTE, vect_location, @@ -1139,17 +1143,21 @@ vect_get_load_cost (vec_info *, stmt_vec_info stmt_info, slp_tree slp_node, { *prologue_cost += record_stmt_cost (prologue_cost_vec, 2, vector_stmt, stmt_info, - slp_node, 0, vect_prologue); + slp_node, vectype, + 0, vect_prologue); if (targetm.vectorize.builtin_mask_for_load) *prologue_cost += record_stmt_cost (prologue_cost_vec, 1, vector_stmt, stmt_info, - slp_node, 0, vect_prologue); + slp_node, vectype, + 0, vect_prologue); } *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load, - stmt_info, slp_node, 0, vect_body); + stmt_info, slp_node, vectype, + 0, vect_body); *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_perm, - stmt_info, slp_node, 0, vect_body); + stmt_info, slp_node, vectype, + 0, vect_body); if (dump_enabled_p ()) dump_printf_loc (MSG_NOTE, vect_location, @@ -3756,8 +3764,7 @@ vectorizable_call (vec_info *vinfo, } STMT_VINFO_TYPE (stmt_info) = call_vec_info_type; DUMP_VECT_SCOPE ("vectorizable_call"); - vect_model_simple_cost (vinfo, stmt_info, - ncopies, dt, ndts, slp_node, cost_vec); + vect_model_simple_cost (vinfo, ncopies, dt, ndts, slp_node, cost_vec); if (ifn != IFN_LAST && modifier == NARROW && !slp_node) record_stmt_cost (cost_vec, ncopies / 2, vec_promote_demote, stmt_info, 0, vect_body); @@ -4724,8 +4731,7 @@ vectorizable_simd_clone_call (vec_info *vinfo, stmt_vec_info stmt_info, STMT_VINFO_TYPE (stmt_info) = call_simd_clone_vec_info_type; DUMP_VECT_SCOPE ("vectorizable_simd_clone_call"); -/* vect_model_simple_cost (vinfo, stmt_info, ncopies, - dt, slp_node, cost_vec); */ +/* vect_model_simple_cost (vinfo, ncopies, dt, slp_node, cost_vec); */ return true; } @@ -5922,7 +5928,7 @@ vectorizable_conversion (vec_info *vinfo, if (modifier == NONE) { STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type; - vect_model_simple_cost (vinfo, stmt_info, (1 + multi_step_cvt), + vect_model_simple_cost (vinfo, (1 + multi_step_cvt), dt, ndts, slp_node, cost_vec); } else if (modifier == NARROW_SRC || modifier == NARROW_DST) @@ -6291,8 +6297,7 @@ vectorizable_assignment (vec_info *vinfo, STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type; DUMP_VECT_SCOPE ("vectorizable_assignment"); if (!vect_nop_conversion_p (stmt_info)) - vect_model_simple_cost (vinfo, stmt_info, ncopies, dt, ndts, slp_node, - cost_vec); + vect_model_simple_cost (vinfo, ncopies, dt, ndts, slp_node, cost_vec); return true; } @@ -6662,7 +6667,7 @@ vectorizable_shift (vec_info *vinfo, } STMT_VINFO_TYPE (stmt_info) = shift_vec_info_type; DUMP_VECT_SCOPE ("vectorizable_shift"); - vect_model_simple_cost (vinfo, stmt_info, ncopies, dt, + vect_model_simple_cost (vinfo, ncopies, dt, scalar_shift_arg ? 1 : ndts, slp_node, cost_vec); return true; } @@ -7099,8 +7104,7 @@ vectorizable_operation (vec_info *vinfo, STMT_VINFO_TYPE (stmt_info) = op_vec_info_type; DUMP_VECT_SCOPE ("vectorizable_operation"); - vect_model_simple_cost (vinfo, stmt_info, - 1, dt, ndts, slp_node, cost_vec); + vect_model_simple_cost (vinfo, 1, dt, ndts, slp_node, cost_vec); if (using_emulated_vectors_p) { /* The above vect_model_simple_cost call handles constants @@ -8676,7 +8680,7 @@ vectorizable_store (vec_info *vinfo, } else if (vls_type != VLS_STORE_INVARIANT) return; - *prologue_cost += record_stmt_cost (cost_vec, 1, scalar_to_vec, stmt_info, + *prologue_cost += record_stmt_cost (cost_vec, 1, scalar_to_vec, slp_node, 0, vect_prologue); }; @@ -8985,8 +8989,7 @@ vectorizable_store (vec_info *vinfo, if (nstores > 1) inside_cost += record_stmt_cost (cost_vec, n_adjacent_stores, - vec_to_scalar, stmt_info, slp_node, - 0, vect_body); + vec_to_scalar, slp_node, 0, vect_body); } if (dump_enabled_p ()) dump_printf_loc (MSG_NOTE, vect_location, @@ -9323,8 +9326,7 @@ vectorizable_store (vec_info *vinfo, { if (costing_p && vls_type == VLS_STORE_INVARIANT) prologue_cost += record_stmt_cost (cost_vec, 1, scalar_to_vec, - stmt_info, slp_node, 0, - vect_prologue); + slp_node, 0, vect_prologue); else if (!costing_p) { /* Since the store is not grouped, DR_GROUP_SIZE is 1, and @@ -9398,8 +9400,7 @@ vectorizable_store (vec_info *vinfo, unsigned int cnunits = vect_nunits_for_cost (vectype); inside_cost += record_stmt_cost (cost_vec, cnunits, scalar_store, - stmt_info, slp_node, 0, - vect_body); + slp_node, 0, vect_body); continue; } @@ -9467,7 +9468,7 @@ vectorizable_store (vec_info *vinfo, unsigned int cnunits = vect_nunits_for_cost (vectype); inside_cost += record_stmt_cost (cost_vec, cnunits, scalar_store, - stmt_info, slp_node, 0, vect_body); + slp_node, 0, vect_body); continue; } @@ -9575,14 +9576,14 @@ vectorizable_store (vec_info *vinfo, consumed by the load). */ inside_cost += record_stmt_cost (cost_vec, cnunits, vec_to_scalar, - stmt_info, slp_node, 0, vect_body); + slp_node, 0, vect_body); /* N scalar stores plus extracting the elements. */ inside_cost += record_stmt_cost (cost_vec, cnunits, vec_to_scalar, - stmt_info, slp_node, 0, vect_body); + slp_node, 0, vect_body); inside_cost += record_stmt_cost (cost_vec, cnunits, scalar_store, - stmt_info, slp_node, 0, vect_body); + slp_node, 0, vect_body); continue; } @@ -9776,8 +9777,7 @@ vectorizable_store (vec_info *vinfo, int group_size = DR_GROUP_SIZE (first_stmt_info); int nstmts = ceil_log2 (group_size) * group_size; inside_cost += record_stmt_cost (cost_vec, nstmts, vec_perm, - stmt_info, slp_node, 0, - vect_body); + slp_node, 0, vect_body); if (dump_enabled_p ()) dump_printf_loc (MSG_NOTE, vect_location, "vect_model_store_cost: " @@ -9806,8 +9806,7 @@ vectorizable_store (vec_info *vinfo, { if (costing_p) inside_cost += record_stmt_cost (cost_vec, 1, vec_perm, - stmt_info, slp_node, 0, - vect_body); + slp_node, 0, vect_body); else { tree perm_mask = perm_mask_for_reverse (vectype); @@ -10026,11 +10025,11 @@ vectorizable_store (vec_info *vinfo, /* Spill. */ prologue_cost += record_stmt_cost (cost_vec, ncopies, vector_store, - stmt_info, slp_node, 0, vect_epilogue); + slp_node, 0, vect_epilogue); /* Loads. */ prologue_cost += record_stmt_cost (cost_vec, ncopies * nregs, scalar_load, - stmt_info, slp_node, 0, vect_epilogue); + slp_node, 0, vect_epilogue); } } } @@ -10603,9 +10602,8 @@ vectorizable_load (vec_info *vinfo, enum vect_cost_model_location cost_loc = hoist_p ? vect_prologue : vect_body; unsigned int cost = record_stmt_cost (cost_vec, 1, scalar_load, - stmt_info, slp_node, 0, - cost_loc); - cost += record_stmt_cost (cost_vec, 1, scalar_to_vec, stmt_info, + slp_node, 0, cost_loc); + cost += record_stmt_cost (cost_vec, 1, scalar_to_vec, slp_node, 0, cost_loc); unsigned int prologue_cost = hoist_p ? cost : 0; unsigned int inside_cost = hoist_p ? 0 : cost; @@ -10871,8 +10869,7 @@ vectorizable_load (vec_info *vinfo, n_adjacent_loads++; else inside_cost += record_stmt_cost (cost_vec, 1, scalar_load, - stmt_info, slp_node, 0, - vect_body); + slp_node, 0, vect_body); continue; } tree this_off = build_int_cst (TREE_TYPE (alias_off), @@ -10910,8 +10907,7 @@ vectorizable_load (vec_info *vinfo, { if (costing_p) inside_cost += record_stmt_cost (cost_vec, 1, vec_construct, - stmt_info, slp_node, 0, - vect_body); + slp_node, 0, vect_body); else { tree vec_inv = build_constructor (lvectype, v); @@ -10966,8 +10962,7 @@ vectorizable_load (vec_info *vinfo, vect_transform_slp_perm_load (vinfo, slp_node, vNULL, NULL, vf, true, &n_perms, &n_loads); inside_cost += record_stmt_cost (cost_vec, n_perms, vec_perm, - first_stmt_info, slp_node, 0, - vect_body); + slp_node, 0, vect_body); } else vect_transform_slp_perm_load (vinfo, slp_node, dr_chain, gsi, vf, @@ -11537,7 +11532,7 @@ vectorizable_load (vec_info *vinfo, unsigned int cnunits = vect_nunits_for_cost (vectype); inside_cost = record_stmt_cost (cost_vec, cnunits, scalar_load, - stmt_info, slp_node, 0, vect_body); + slp_node, 0, vect_body); continue; } if (STMT_VINFO_GATHER_SCATTER_P (stmt_info)) @@ -11613,7 +11608,7 @@ vectorizable_load (vec_info *vinfo, unsigned int cnunits = vect_nunits_for_cost (vectype); inside_cost = record_stmt_cost (cost_vec, cnunits, scalar_load, - stmt_info, slp_node, 0, vect_body); + slp_node, 0, vect_body); continue; } poly_uint64 offset_nunits @@ -11742,16 +11737,16 @@ vectorizable_load (vec_info *vinfo, /* For emulated gathers N offset vector element offset add is consumed by the load). */ inside_cost = record_stmt_cost (cost_vec, const_nunits, - vec_to_scalar, stmt_info, + vec_to_scalar, slp_node, 0, vect_body); /* N scalar loads plus gathering them into a vector. */ inside_cost = record_stmt_cost (cost_vec, const_nunits, scalar_load, - stmt_info, slp_node, 0, vect_body); + slp_node, 0, vect_body); inside_cost = record_stmt_cost (cost_vec, 1, vec_construct, - stmt_info, slp_node, 0, vect_body); + slp_node, 0, vect_body); continue; } unsigned HOST_WIDE_INT const_offset_nunits @@ -12412,8 +12407,7 @@ vectorizable_load (vec_info *vinfo, { if (costing_p) inside_cost = record_stmt_cost (cost_vec, 1, vec_perm, - stmt_info, slp_node, 0, - vect_body); + slp_node, 0, vect_body); else { tree perm_mask = perm_mask_for_reverse (vectype); @@ -12482,8 +12476,7 @@ vectorizable_load (vec_info *vinfo, vect_transform_slp_perm_load (vinfo, slp_node, vNULL, nullptr, vf, true, &n_perms, nullptr); inside_cost = record_stmt_cost (cost_vec, n_perms, vec_perm, - stmt_info, slp_node, 0, - vect_body); + slp_node, 0, vect_body); } else { @@ -12510,8 +12503,7 @@ vectorizable_load (vec_info *vinfo, int group_size = DR_GROUP_SIZE (first_stmt_info); int nstmts = ceil_log2 (group_size) * group_size; inside_cost += record_stmt_cost (cost_vec, nstmts, vec_perm, - stmt_info, slp_node, 0, - vect_body); + slp_node, 0, vect_body); if (dump_enabled_p ()) dump_printf_loc (MSG_NOTE, vect_location, @@ -12931,7 +12923,7 @@ vectorizable_condition (vec_info *vinfo, } STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type; - vect_model_simple_cost (vinfo, stmt_info, ncopies, dts, ndts, slp_node, + vect_model_simple_cost (vinfo, ncopies, dts, ndts, slp_node, cost_vec, kind); return true; } @@ -13363,8 +13355,7 @@ vectorizable_comparison_1 (vec_info *vinfo, tree vectype, return false; } - vect_model_simple_cost (vinfo, stmt_info, - ncopies * (1 + (bitop2 != NOP_EXPR)), + vect_model_simple_cost (vinfo, ncopies * (1 + (bitop2 != NOP_EXPR)), dts, ndts, slp_node, cost_vec); return true; } diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h index a2f33a5..7aa2b02 100644 --- a/gcc/tree-vectorizer.h +++ b/gcc/tree-vectorizer.h @@ -2441,17 +2441,15 @@ record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count, STMT_VINFO_VECTYPE (stmt_info), misalign, where); } -/* Overload of record_stmt_cost with VECTYPE derived from STMT_INFO and - SLP node specified. */ +/* Overload of record_stmt_cost with VECTYPE derived from SLP node. */ inline unsigned record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count, - enum vect_cost_for_stmt kind, stmt_vec_info stmt_info, - slp_tree node, + enum vect_cost_for_stmt kind, slp_tree node, int misalign, enum vect_cost_model_location where) { - return record_stmt_cost (body_cost_vec, count, kind, stmt_info, node, - STMT_VINFO_VECTYPE (stmt_info), misalign, where); + return record_stmt_cost (body_cost_vec, count, kind, node, + SLP_TREE_VECTYPE (node), misalign, where); } extern void vect_finish_replace_stmt (vec_info *, stmt_vec_info, gimple *); |