diff options
Diffstat (limited to 'gcc')
70 files changed, 2255 insertions, 186 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 21446b6..cbce913 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,611 @@ +2025-05-07 Jeff Law <jlaw@ventanamicro.com> + + PR target/120137 + PR target/120154 + * config/riscv/riscv-vect-permconst.cc (process_bb): Verify each + canonicalized element fits into the vector element mode. + +2025-05-07 Dongyan Chen <chendongyan@isrc.iscas.ac.cn> + + * common/config/riscv/riscv-common.cc: New extension. + * config/riscv/riscv.opt: Ditto. + +2025-05-07 Richard Earnshaw <rearnsha@arm.com> + + PR target/91323 + * config/arm/arm.cc (arm_select_cc_mode): Use CCFPEmode for LTGT. + +2025-05-07 Richard Earnshaw <rearnsha@arm.com> + + PR target/110796 + PR target/118446 + * config/arm/arm.h (REVERSIBLE_CC_MODE): FP modes are only + reversible if flag_finite_math_only. + * config/arm/arm.cc (arm_select_cc_mode): Return CCFPmode for all + FP comparisons if flag_finite_math_only. + +2025-05-07 Andrew Pinski <quic_apinski@quicinc.com> + + PR tree-optimization/111276 + * gimple-fold.cc (arith_code_with_undefined_signed_overflow): Make static. + (gimple_with_undefined_signed_overflow): New function. + * gimple-fold.h (arith_code_with_undefined_signed_overflow): Remove. + (gimple_with_undefined_signed_overflow): Add declaration. + * tree-if-conv.cc (if_convertible_gimple_assign_stmt_p): Use + gimple_with_undefined_signed_overflow instead of manually + checking lhs and the code of the stmt. + (predicate_statements): Likewise. + * tree-ssa-ifcombine.cc (ifcombine_rewrite_to_defined_overflow): Likewise. + * tree-ssa-loop-im.cc (move_computations_worker): Likewise. + * tree-ssa-reassoc.cc (update_range_test): Likewise. Reformat. + * tree-scalar-evolution.cc (final_value_replacement_loop): Use + gimple_with_undefined_signed_overflow instead of + arith_code_with_undefined_signed_overflow. + * tree-ssa-loop-split.cc (split_loop): Likewise. + +2025-05-07 Andrew Pinski <quic_apinski@quicinc.com> + + * tree-ssa-loop-im.cc (compute_invariantness): Hoist to the always executed point + if ignorning the cost. + +2025-05-07 Jan Hubicka <hubicka@ucw.cz> + + * config/i386/i386.cc (ix86_vector_costs::add_stmt_cost): Add FLOAT_EXPR; + FIX_TRUNC_EXPR and vec_promote_demote costs. + +2025-05-07 Jennifer Schmitz <jschmitz@nvidia.com> + + PR target/117978 + * config/aarch64/aarch64-protos.h: Declare + aarch64_emit_load_store_through_mode and aarch64_sve_maskloadstore. + * config/aarch64/aarch64-sve.md + (maskload<mode><vpred>): New define_expand folding maskloads with + certain predicate patterns to ASIMD loads. + (*aarch64_maskload<mode><vpred>): Renamed from maskload<mode><vpred>. + (maskstore<mode><vpred>): New define_expand folding maskstores with + certain predicate patterns to ASIMD stores. + (*aarch64_maskstore<mode><vpred>): Renamed from maskstore<mode><vpred>. + * config/aarch64/aarch64.cc + (aarch64_emit_load_store_through_mode): New function emitting a + load/store through subregs of a given mode. + (aarch64_emit_sve_pred_move): Refactor to use + aarch64_emit_load_store_through_mode. + (aarch64_expand_maskloadstore): New function to emit ASIMD loads/stores + for maskloads/stores with SVE predicates with VL1, VL2, VL4, VL8, or + VL16 patterns. + (aarch64_partial_ptrue_length): New function returning number of leading + set bits in a predicate. + +2025-05-07 Stefan Schulze Frielinghaus <stefansf@gcc.gnu.org> + + * config/s390/s390-protos.h (s390_expand_cstoreti4): New + function. + * config/s390/s390.cc (s390_expand_cstoreti4): New function. + * config/s390/s390.md (CC_SUZ): New mode iterator. + (l): New mode attribute. + (cc_tolower): New mode attribute. + * config/s390/vector.md (cstoreti4): New expander. + (*vec_cmpv2di_lane0_<cc_tolower>): New insn. + (*vec_cmpti_<cc_tolower>): New insn. + +2025-05-07 H.J. Lu <hjl.tools@gmail.com> + + PR target/120036 + * config/i386/i386-features.cc (ix86_get_vector_load_mode): + Handle 8/4/2 bytes. + (remove_redundant_vector_load): If the mode size is smaller than + its natural size, first insert an extra move with a QI vector + SUBREG of the same size to avoid validate_subreg failure. + +2025-05-07 hongtao.liu <hongtao.liu@intel.com> + + PR gcov-profile/118508 + * auto-profile.cc + (autofdo_source_profile::get_callsite_total_count): Fix name + mismatch for fortran. + +2025-05-07 Jeff Law <jlaw@ventanamicro.com> + + * config/riscv/riscv.md (*branch<ANYI:mode>_shiftedarith_equals_zero): + Avoid generating unnecessary andi. Fix formatting. + +2025-05-06 Dongyan Chen <chendongyan@isrc.iscas.ac.cn> + + * common/config/riscv/riscv-common.cc: New extension. + +2025-05-06 Mingzhu Yan <yanmingzhu@iscas.ac.cn> + + * common/config/riscv/riscv-common.cc (riscv_ext_version_table): New + extension. + (riscv_ext_flag_table) Ditto. + * config/riscv/riscv.opt: New mask. + * doc/invoke.texi (RISC-V Options): New extension + +2025-05-06 Jan Hubicka <hubicka@ucw.cz> + + * config/i386/i386.cc (ix86_rtx_costs): Cost FLOAT, UNSIGNED_FLOAT, + FIX, UNSIGNED_FIX. + * config/i386/i386.h (struct processor_costs): Add + cvtsi2ss, cvtss2si, cvtpi2ps, cvtps2pi. + * config/i386/x86-tune-costs.h (struct processor_costs): Update tables. + +2025-05-06 Martin Jambor <mjambor@suse.cz> + + PR ipa/119852 + * cgraph.h (cgraph_node::create_clone): Remove the default value of + argument suffix. Update function comment. + * cgraphclones.cc (cgraph_node::create_clone): Update function comment. + * ipa-inline-transform.cc (clone_inlined_nodes): Pass NULL to suffix + of create_clone explicitely. + * ipa-inline.cc (recursive_inlining): Likewise. + * lto-cgraph.cc (input_node): Likewise. + +2025-05-06 Martin Jambor <mjambor@suse.cz> + + * cgraph.h (cgraph_node::create_version_clone_with_body): Fix function + comment. Change the name of clone_name to suffix, in line with the + function definition. + * cgraphclones.cc (cgraph_node::create_version_clone_with_body): Fix + function comment. + +2025-05-06 Martin Jambor <mjambor@suse.cz> + + PR ipa/119852 + * cgraphclones.cc (dump_callgraph_transformation): Document the + function. Do not dump if suffix is NULL. + +2025-05-06 Martin Jambor <mjambor@suse.cz> + + * doc/invoke.texi (Developer Options): Document -fdump-ipa-clones. + +2025-05-06 David Malcolm <dmalcolm@redhat.com> + + * selftest-diagnostic.cc (test_diagnostic_context::report): Use + diagnostic_option_id rather than plain int. + * selftest-diagnostic.h (test_diagnostic_context::report): + Likewise. + +2025-05-06 David Malcolm <dmalcolm@redhat.com> + + PR sarif-replay/117988 + * json.cc (json::pointer::token::token): New ctors. + (json::pointer::token::~token): New. + (json::pointer::token::operator=): New. + (json::object::set): Set the value's m_pointer_token. + (json::array::append): Likewise. + * json.h (json::pointer::token): New struct. + (json::value::get_pointer_token): New accessor. + (json::value::m_pointer_token): New field. + * libsarifreplay.cc (get_logical_location_kind_for_json_kind): + New. + (make_logical_location_from_jv): New. + (sarif_replayer::report_problem): Set the logical location of the + diagnostic. + +2025-05-06 David Malcolm <dmalcolm@redhat.com> + + * diagnostic-format-sarif.cc (maybe_get_sarif_kind): Add cases for + new kinds of logical location. + * doc/libgdiagnostics/topics/logical-locations.rst: Add new kinds + of logical location for handling XML and JSON. + * libgdiagnostics.cc (impl_logical_location_manager::get_kind): + Add cases for new kinds of logical location. + (diagnostic_text_sink::text_starter): Likewise, introducing a + macro for this. + (diagnostic_manager_debug_dump_logical_location): Likewise. + * libgdiagnostics.h (enum diagnostic_logical_location_kind_t): Add + new kinds of logical location for handling XML and JSON. + * libsarifreplay.cc (handle_logical_location_object): Add entries + to "kind_values" for decoding sarif logical location kinds + relating to XML and JSON. + * logical-location.h (enum logical_location_kind): Add new kinds + of logical location for handling XML and JSON. + +2025-05-06 David Malcolm <dmalcolm@redhat.com> + + PR other/116176 + * diagnostic-format-sarif.cc (class sarif_array_of_unique): New + template. + (class sarif_logical_location): Move here from + diagnostic-format-sarif.h. + (sarif_builder::m_cached_logical_locs): New. + (sarif_builder::sarif_builder): Initialize it. + (sarif_builder::set_any_logical_locs_arr): Call + make_minimal_sarif_logical_location rather than + make_sarif_logical_location_object. + (sarif_property_bag::set_logical_location): Likewise. + (make_sarif_logical_location_object): Replace with... + (sarif_builder::ensure_sarif_logical_location_for): ...this. + Capture "parentIndex" property. Consolidate into + theRuns.logicalLocations. + (sarif_builder::make_minimal_sarif_logical_location): New. + (sarif_builder::make_run_object): Add "index" properties to + m_cached_logical_locs and move it to theRuns.logicalLocations. + (selftest::test_sarif_array_of_unique_1): New. + (selftest::test_sarif_array_of_unique_2): New. + (selftest::diagnostic_format_sarif_cc_tests): Call the new + selftests. + * diagnostic-format-sarif.h (class sarif_logical_location): Move + to diagnostic-format-sarif.cc. + (make_sarif_logical_location_object): Drop decl. + * json.cc (value::compare): New. + (object::compare): New. + (selftest::fail_comparison): New. + (selftest::assert_json_equal): New. + (ASSERT_JSON_EQ): New. + (selftest::assert_json_non_equal): New. + (ASSERT_JSON_NE): New. + (selftest::test_comparisons): New. + (selftest::json_cc_tests): Call the new selftest. + * json.h (json::value::dyn_cast_object): New vfunc. + (json::object::dyn_cast_object): New vfunc impl. + (json::object::compare): New decl. + * libgdiagnostics.cc + (impl_logical_location_manager::get_parent): New. + * logical-location.h (logical_location_manager::get_parent): New + vfunc impl. + * selftest-logical-location.h + (test_logical_location_manager::get_parent): New vfunc impl. + * tree-logical-location.cc (assert_valid_tree): New. + (tree_logical_location_manager::get_short_name): Support types as + well as decls. + (tree_logical_location_manager::get_name_with_scope): Gracefully + handle non-decl nodes. + (tree_logical_location_manager::get_internal_name): Likewise. + (tree_logical_location_manager::get_kind): Don't attempt to handle + null nodes. Handle NAMESPACE_DECL and RECORD_TYPE. + (tree_logical_location_manager::get_name_for_path_output): + Gracefully handle non-decl nodes. + (tree_logical_location_manager::get_parent): New. + * tree-logical-location.h + (tree_logical_location_manager::get_parent): New vfunc impl. + +2025-05-06 David Malcolm <dmalcolm@redhat.com> + + * diagnostic-client-data-hooks.h: Include "logical-location.h". + (diagnostic_client_data_hooks::get_logical_location_manager): New. + (diagnostic_client_data_hooks::get_current_logical_location): + Convert return type from const logical_location * to + logical_location. + * diagnostic-format-json.cc: Include + "diagnostic-client-data-hooks.h". + (make_json_for_path): Update to use logical_location_manager from + the context. + * diagnostic-format-sarif.cc + (sarif_builder::get_logical_location_manager): New. + (sarif_builder::make_location_object): Update type of logical_loc + from "const logical_location *" to "logical_location". + (sarif_builder::set_any_logical_locs_arr): Likewise. + (sarif_builder::m_logical_loc_mgr): New field. + (sarif_result::on_nested_diagnostic): Use logical_location default + ctor rather than nullptr. + (sarif_builder::sarif_builder): Initialize m_logical_loc_mgr from + context's client data hooks. + (sarif_builder::make_locations_arr): Convert type of logical_loc + from from "const logical_location *" to "logical_location". + (sarif_builder::set_any_logical_locs_arr): Likewise. Pass manager + to make_sarif_logical_location_object. + (sarif_builder::make_location_object): Likewise. + (sarif_property_bag::set_logical_location): New. + (make_sarif_logical_location_object): Update for introduction of + logical_location_manager. + (populate_thread_flow_location_object): Pass builder to + ev.maybe_add_sarif_properties. + (selftest::test_make_location_object): Use logical_location + default ctor rather than nullptr. + * diagnostic-format-sarif.h (class logical_location): Replace + forward decl with include of "logical-location.h". + (class sarif_builder): New forward decl. + (sarif_property_bag::set_logical_location): New. + (make_sarif_logical_location_object): Add "mgr" param. + * diagnostic-path.cc + (diagnostic_path::get_first_event_in_a_function): Update for + change of logical_location type. + (per_thread_summary::per_thread_summary): Pass in + "logical_loc_mgr". + (per_thread_summary::m_logical_loc_mgr): New field. + (event_range::m_logical_loc): Update for change of + logical_location type. + (path_summary::get_logical_location_manager): New accessor. + (path_summary::m_logical_loc_mgr): New field. + (path_summary::get_or_create_events_for_thread_id): Pass + m_logical_loc_mgr to per_thread_summary ctor. + (path_summary::path_summary): Initialize m_logical_loc_mgr. + (thread_event_printer::print_swimlane_for_event_range): Add param + "logical_loc_mgr". Update for change in logical_loc type. + (print_path_summary_as_text): Pass manager to + thread_event_printer::print_swimlane_for_event_range. + (diagnostic_text_output_format::print_path): Update for + introduction of logical_location_manager. + * diagnostic-path.h: Include "logical-location.h". + (class sarif_builder): New forward decl. + (diagnostic_event::get_logical_location): Convert return type from + "const logical_location *" to "logical_location". + (diagnostic_event::maybe_add_sarif_properties): Add sarif_builder + param. + (diagnostic_path::get_logical_location_manager): New accessor. + (diagnostic_path::diagnostic_path): New ctor, taking manager. + (diagnostic_path::m_logical_loc_mgr): New field. + * diagnostic.cc + (diagnostic_context::get_logical_location_manager): New. + (logical_location::function_p): Convert to... + (logical_location_manager::function_p): ...this. + * diagnostic.h (class logical_location): Replace forward decl + with... + (class logical_location_manager): ...this. + (diagnostic_context::get_logical_location_manager): New decl. + * lazy-diagnostic-path.cc + (selftest::test_lazy_path::test_lazy_path): Pass m_logical_loc_mgr + to path ctor. + (selftest::test_lazy_path::make_inner_path): Likewise. + (selftest::test_lazy_path::m_logical_loc_mgr): New field. + * lazy-diagnostic-path.h + (lazy_diagnostic_path::lazy_diagnostic_path): New ctor. + * libgdiagnostics.cc (struct diagnostic_logical_location): Convert + from subclass of logical_location to a plain struct, dropping + accessors. + (class impl_logical_location_manager): New. + (impl_diagnostic_client_data_hooks::get_logical_location_manager): + New + (impl_diagnostic_client_data_hooks::m_logical_location_manager): + New field. + (diagnostic_manager::get_logical_location_manager): New. + (libgdiagnostics_path_event::get_logical_location): Reimplement. + (diagnostic_execution_path::diagnostic_execution_path): Add + logical_loc_mgr and pass to base class. + (diagnostic_execution_path::same_function_p): Update for change to + logical_location type. + (diagnostic::add_execution_path): Pass logical_loc_mgr to path + ctor. + (impl_diagnostic_client_data_hooks::get_current_logical_location): + Reimplement. + (diagnostic_text_sink::text_starter): Reimplement printing of + logical location. + (diagnostic_manager::new_execution_path): Pass mgr to path ctor. + (diagnostic_manager_debug_dump_logical_location): Update for + changes to diagnostic_logical_location. + (diagnostic_logical_location_get_kind): Likewise. + (diagnostic_logical_location_get_parent): Likewise. + (diagnostic_logical_location_get_short_name): Likewise. + (diagnostic_logical_location_get_fully_qualified_name): Likewise. + (diagnostic_logical_location_get_decorated_name): Likewise. + * logical-location.h (class logical_location_manager): New. + (class logical_location): Convert to typedef of + logical_location_manager::key. + * selftest-diagnostic-path.cc + (selftest::test_diagnostic_path::test_diagnostic_path): Pass + m_test_logical_loc_mgr to base ctor. + (selftest::test_diagnostic_path::same_function_p): Use pointer + comparison. + (selftest::test_diagnostic_path::add_event): Use + logical_location_from_funcname. + (selftest::test_diagnostic_path::add_thread_event): Likewise. + (selftest::test_diagnostic_path::logical_location_from_funcname): + New. + (selftest::test_diagnostic_event::test_diagnostic_event): Fix + indentation. Pass logical_location rather than const char *. + * selftest-diagnostic-path.h + (selftest::test_diagnostic_event::test_diagnostic_event): + Likewise. + (selftest::test_diagnostic_event::get_logical_location): Update + for change to logical_location type. + (selftest::test_diagnostic_event::get_function_name): Drop. + (selftest::test_diagnostic_event::m_logical_loc): Convert from + test_logical_location to logical_location. + (selftest::test_diagnostic_path::logical_location_from_funcname): + New. + (selftest::test_diagnostic_path::m_test_logical_loc_mgr): New + field. + * selftest-logical-location.cc: Include "selftest.h". + (selftest::test_logical_location::test_logical_location): Drop. + (selftest::test_logical_location_manager::~test_logical_location_manager): + New. + (selftest::test_logical_location::get_short_name): Replace with... + (selftest::test_logical_location_manager::get_short_name): + ...this. + (selftest::test_logical_location::get_name_with_scope): Replace + with... + (selftest::test_logical_location_manager::get_name_with_scope): + ...this. + (selftest::test_logical_location::get_internal_name): Replace + with... + (selftest::test_logical_location_manager::get_internal_name): + ...this. + (selftest::test_logical_location::get_kind): Replace with... + (selftest::test_logical_location_manager::get_kind): ...this. + (selftest::test_logical_location::get_name_for_path_output): + Replace with... + (selftest::test_logical_location_manager::get_name_for_path_output): + ...this. + (selftest::test_logical_location_manager::logical_location_from_funcname): + New. + (selftest::test_logical_location_manager::item_from_funcname): + New. + (selftest::selftest_logical_location_cc_tests): New. + * selftest-logical-location.h (class test_logical_location): + Replace with... + (class test_logical_location_manager): ...this. + * selftest-run-tests.cc (selftest::run_tests): Call + selftest_logical_location_cc_tests. + * selftest.h (selftest::selftest_logical_location_cc_tests): New + decl. + * simple-diagnostic-path.cc + (simple_diagnostic_path::simple_diagnostic_path): Add + "logical_loc_mgr" param and pass it to base ctor. + (simple_diagnostic_event::simple_diagnostic_event): Update init of + m_logical_loc. + (selftest::test_intraprocedural_path): Update for changes to + logical locations. + * simple-diagnostic-path.h: Likewise. + * tree-diagnostic-client-data-hooks.cc + (compiler_data_hooks::get_logical_location_manger): New. + (compiler_data_hooks::get_current_logical_location): Update. + (compiler_data_hooks::m_current_fndecl_logical_loc): Replace + with... + (compiler_data_hooks::m_logical_location_manager): ...this. + * tree-logical-location.cc + (compiler_logical_location::get_short_name_for_tree): Replace + with... + (tree_logical_location_manager::get_short_name): ...this. + (compiler_logical_location::get_name_with_scope_for_tree): Replace + with... + (tree_logical_location_manager::get_name_with_scope): ...this. + (compiler_logical_location::get_internal_name_for_tree): Replace + with... + (tree_logical_location_manager::get_internal_name): ...this. + (compiler_logical_location::get_kind_for_tree): Replace with... + (tree_logical_location_manager::get_kind): ...this. + (compiler_logical_location::get_name_for_tree_for_path_output): + Replace with... + (tree_logical_location_manager::get_name_for_path_output): + ...this. + (tree_logical_location::get_short_name): Drop. + (tree_logical_location::get_name_with_scope): Drop. + (tree_logical_location::get_internal_name): Drop. + (tree_logical_location::get_kind): Drop. + (tree_logical_location::get_name_for_path_output): Drop. + (current_fndecl_logical_location::get_short_name): Drop. + (current_fndecl_logical_location::get_name_with_scope): Drop. + (current_fndecl_logical_location::get_internal_name): Drop. + (current_fndecl_logical_location::get_kind): Drop. + (current_fndecl_logical_location::get_name_for_path_output): Drop. + * tree-logical-location.h (class compiler_logical_location): Drop. + (class tree_logical_location): Drop. + (class current_fndecl_logical_location): Drop. + (class tree_logical_location_manager): New. + +2025-05-06 David Malcolm <dmalcolm@redhat.com> + + * doc/libgdiagnostics/topics/compatibility.rst: New file, based + on gcc/jit/docs/topics/compatibility.rst. + * doc/libgdiagnostics/topics/index.rst: Add compatibility.rst. + * doc/libgdiagnostics/topics/logical-locations.rst (Accessors): + New section. + * libgdiagnostics++.h (logical_location::operator bool): New. + (logical_location::operator==): New. + (logical_location::operator!=): New. + (logical_location::get_kind): New. + (logical_location::get_parent): New. + (logical_location::get_short_name): New. + (logical_location::get_fully_qualified_name): New. + (logical_location::get_decorated_name): New. + * libgdiagnostics.cc + (diagnostic_logical_location::get_fully_qualified_name): New. + (diagnostic_logical_location_get_kind): New entrypoint. + (diagnostic_logical_location_get_parent): New entrypoint. + (diagnostic_logical_location_get_short_name): New entrypoint. + (diagnostic_logical_location_get_fully_qualified_name): New + entrypoint. + (diagnostic_logical_location_get_decorated_name): New entrypoint. + * libgdiagnostics.h + (LIBDIAGNOSTICS_HAVE_LOGICAL_LOCATION_ACCESSORS): New define. + (diagnostic_logical_location_get_kind): New entrypoint. + (diagnostic_logical_location_get_parent): New entrypoint. + (diagnostic_logical_location_get_short_name): New entrypoint. + (diagnostic_logical_location_get_fully_qualified_name): New + entrypoint. + (diagnostic_logical_location_get_decorated_name): New entrypoint. + * libgdiagnostics.map (LIBGDIAGNOSTICS_ABI_1): New. + +2025-05-06 Shreya Munnangi <smunnangi1@ventanamicro.com> + + PR middle-end/114512 + * config/riscv/bitmanip.md (bext* patterns): New patterns for + bext recognition plus splitter for extracting variable bit from + a constant. + * config/riscv/predicates.md (bitpos_mask_operand): New predicate. + +2025-05-06 Pan Li <pan2.li@intel.com> + + * config/riscv/autovec-opt.md (*<optab>_vx_<mode>): Add new + combine to convert vec_duplicate + vadd.vv to vaddvx on GR2VR + cost. + * config/riscv/riscv.cc (riscv_rtx_costs): Take care of the cost + when vec_dup and vadd v, vec_dup(x). + * config/riscv/vector-iterators.md: Add new iterator for vx. + +2025-05-06 Pan Li <pan2.li@intel.com> + + * config/riscv/riscv-protos.h (get_gr2vr_cost): Add new decl to + get the cost of gr2vr. + * config/riscv/riscv-vector-costs.cc (costs::adjust_stmt_cost): + Leverage the helper function to get the cost of gr2vr. + * config/riscv/riscv.cc (riscv_register_move_cost): Ditto. + (riscv_builtin_vectorization_cost): Ditto. + (get_gr2vr_cost): Add new impl of the helper function. + +2025-05-06 Pan Li <pan2.li@intel.com> + + * config/riscv/riscv-opts.h (RVV_GR2VR_COST_UNPROVIDED): Add + new macro to indicate the param is not provided. + * config/riscv/riscv.opt: Add new option --pararm=gpr2vr-cost. + +2025-05-06 Richard Biener <rguenther@suse.de> + + PR tree-optimization/1157777 + * tree-vectorizer.h (_slp_tree::avoid_stlf_fail): New member. + * tree-vect-slp.cc (_slp_tree::_slp_tree): Initialize it. + (vect_print_slp_tree): Dump it. + * tree-vect-data-refs.cc (vect_slp_analyze_instance_dependence): + For dataflow dependent loads of a store check whether there's + a cross-iteration data dependence that for sure prohibits + store-to-load forwarding and mark involved loads. + * tree-vect-stmts.cc (get_group_load_store_type): For avoid_stlf_fail + marked loads use VMAT_ELEMENTWISE. + +2025-05-06 Jakub Jelinek <jakub@redhat.com> + + PR tree-optimization/120074 + * gimple-fold.cc (fold_truth_andor_for_ifcombine): For + lsignbit && l_xor case, punt if ll_bitsize != lr_bitsize. Similarly + for rsignbit && r_xor case, punt if rl_bitsize != rr_bitsize. + Formatting fix. + +2025-05-06 Jan Hubicka <hubicka@ucw.cz> + + * config/i386/i386.cc (ix86_tls_index): Add ifdef. + +2025-05-06 Richard Biener <rguenther@suse.de> + + PR tree-optimization/120031 + * match.pd ((nop_outer_cast)-(inner_cast)var -> -(outer_cast)(var)): + Allow inner conversions that are not widenings when the outer + type is unsigned. + +2025-05-06 LIU Hao <lh_mouse@126.com> + + PR pch/14940 + * config/i386/host-mingw32.cc (mingw32_gt_pch_use_address): + Replace the loop that attempted to map the PCH only to its + original address with more adaptive operations + +2025-05-06 Julian Waters <tanksherman27@gmail.com> + Eric Botcazou <botcazou@adacore.com> + Uroš Bizjak <ubizjak@gmail.com> + Liu Hao <lh_mouse@126.com> + + * config/i386/i386.cc (ix86_legitimate_constant_p): Handle new UNSPEC. + (legitimate_pic_operand_p): Handle new UNSPEC. + (legitimate_pic_address_disp_p): Handle new UNSPEC. + (ix86_legitimate_address_p): Handle new UNSPEC. + (ix86_tls_index_symbol): New symbol for _tls_index. + (ix86_tls_index): Handle creation of _tls_index symbol. + (legitimize_tls_address): Create thread local access sequence. + (output_pic_addr_const): Handle new UNSPEC. + (i386_output_dwarf_dtprel): Handle new UNSPEC. + (i386_asm_output_addr_const_extra): Handle new UNSPEC. + * config/i386/i386.h (TARGET_WIN32_TLS): Define. + * config/i386/i386.md: New UNSPEC. + * config/i386/predicates.md: Handle new UNSPEC. + * config/mingw/mingw32.h (TARGET_WIN32_TLS): Define. + (TARGET_ASM_SELECT_SECTION): Define. + (DEFAULT_TLS_SEG_REG): Define. + * config/mingw/winnt.cc (mingw_pe_select_section): Select proper TLS section. + (mingw_pe_unique_section): Handle TLS section. + * config/mingw/winnt.h (mingw_pe_select_section): Declare. + * configure: Regenerate. + * configure.ac: New check for broken linker thread local support + 2025-05-05 Jeff Law <jlaw@ventanamicro.com> PR target/119971 diff --git a/gcc/DATESTAMP b/gcc/DATESTAMP index 6906e73..8cb3c2b 100644 --- a/gcc/DATESTAMP +++ b/gcc/DATESTAMP @@ -1 +1 @@ -20250506 +20250508 diff --git a/gcc/analyzer/ChangeLog b/gcc/analyzer/ChangeLog index 186f355..1fbba5d 100644 --- a/gcc/analyzer/ChangeLog +++ b/gcc/analyzer/ChangeLog @@ -1,3 +1,29 @@ +2025-05-06 David Malcolm <dmalcolm@redhat.com> + + * checker-event.cc (checker_event::checker_event): Update + initialization of m_logical_loc. + (checker_event::maybe_add_sarif_properties): Add "builder" param. + Replace call to make_sarif_logical_location_object with call to + sarif_property_bag::set_logical_location. + (superedge_event::maybe_add_sarif_properties): Add "builder" + param. + * checker-event.h (checker_event::get_logical_location): + Reimplement. + (checker_event::maybe_add_sarif_properties): Add "builder" param. + (checker_event::maybe_add_sarif_properties): Add "builder" param. + (checker_event::m_logical_loc): Convert from tree_logical_location + to logical_location. + (superedge_event::maybe_add_sarif_properties): Add sarif_builder + param. + * checker-path.h (checker_path::checker_path): Add logical_loc_mgr + param. + * diagnostic-manager.cc + (diagnostic_manager::emit_saved_diagnostic): Pass logical location + manager to emission_path ctor. + (diagnostic_manager::get_logical_location_manager): New. + * diagnostic-manager.h + (diagnostic_manager::get_logical_location_manager): New decl. + 2025-04-30 David Malcolm <dmalcolm@redhat.com> * sm-malloc.cc (malloc_diagnostic::describe_state_change): Tweak diff --git a/gcc/auto-profile.cc b/gcc/auto-profile.cc index 7e0e8c6..9966d93 100644 --- a/gcc/auto-profile.cc +++ b/gcc/auto-profile.cc @@ -848,8 +848,8 @@ autofdo_source_profile::get_callsite_total_count ( function_instance *s = get_function_instance_by_inline_stack (stack); if (s == NULL - || afdo_string_table->get_index (IDENTIFIER_POINTER ( - DECL_ASSEMBLER_NAME (edge->callee->decl))) != s->name ()) + ||(afdo_string_table->get_index_by_decl (edge->callee->decl) + != s->name())) return 0; return s->total_count (); diff --git a/gcc/common/config/riscv/riscv-common.cc b/gcc/common/config/riscv/riscv-common.cc index 145a0f2..ca14eb9 100644 --- a/gcc/common/config/riscv/riscv-common.cc +++ b/gcc/common/config/riscv/riscv-common.cc @@ -327,6 +327,7 @@ static const struct riscv_ext_version riscv_ext_version_table[] = {"zalrsc", ISA_SPEC_CLASS_NONE, 1, 0}, {"zabha", ISA_SPEC_CLASS_NONE, 1, 0}, {"zacas", ISA_SPEC_CLASS_NONE, 1, 0}, + {"zama16b", ISA_SPEC_CLASS_NONE, 1, 0}, {"zba", ISA_SPEC_CLASS_NONE, 1, 0}, {"zbb", ISA_SPEC_CLASS_NONE, 1, 0}, @@ -432,6 +433,8 @@ static const struct riscv_ext_version riscv_ext_version_table[] = {"zcmp", ISA_SPEC_CLASS_NONE, 1, 0}, {"zcmt", ISA_SPEC_CLASS_NONE, 1, 0}, + {"sdtrig", ISA_SPEC_CLASS_NONE, 1, 0}, + {"smaia", ISA_SPEC_CLASS_NONE, 1, 0}, {"smepmp", ISA_SPEC_CLASS_NONE, 1, 0}, {"smstateen", ISA_SPEC_CLASS_NONE, 1, 0}, @@ -440,7 +443,10 @@ static const struct riscv_ext_version riscv_ext_version_table[] = {"sscofpmf", ISA_SPEC_CLASS_NONE, 1, 0}, {"ssstateen", ISA_SPEC_CLASS_NONE, 1, 0}, {"sstc", ISA_SPEC_CLASS_NONE, 1, 0}, + {"ssstrict", ISA_SPEC_CLASS_NONE, 1, 0}, + {"svade", ISA_SPEC_CLASS_NONE, 1, 0}, + {"svadu", ISA_SPEC_CLASS_NONE, 1, 0}, {"svinval", ISA_SPEC_CLASS_NONE, 1, 0}, {"svnapot", ISA_SPEC_CLASS_NONE, 1, 0}, {"svpbmt", ISA_SPEC_CLASS_NONE, 1, 0}, @@ -1652,6 +1658,7 @@ static const riscv_ext_flag_table_t riscv_ext_flag_table[] = RISCV_EXT_FLAG_ENTRY ("zalrsc", x_riscv_za_subext, MASK_ZALRSC), RISCV_EXT_FLAG_ENTRY ("zabha", x_riscv_za_subext, MASK_ZABHA), RISCV_EXT_FLAG_ENTRY ("zacas", x_riscv_za_subext, MASK_ZACAS), + RISCV_EXT_FLAG_ENTRY ("zama16b", x_riscv_za_subext, MASK_ZAMA16B), RISCV_EXT_FLAG_ENTRY ("zba", x_riscv_zb_subext, MASK_ZBA), RISCV_EXT_FLAG_ENTRY ("zbb", x_riscv_zb_subext, MASK_ZBB), @@ -1764,9 +1771,11 @@ static const riscv_ext_flag_table_t riscv_ext_flag_table[] = RISCV_EXT_FLAG_ENTRY ("zcmp", x_riscv_zc_subext, MASK_ZCMP), RISCV_EXT_FLAG_ENTRY ("zcmt", x_riscv_zc_subext, MASK_ZCMT), - RISCV_EXT_FLAG_ENTRY ("svinval", x_riscv_sv_subext, MASK_SVINVAL), - RISCV_EXT_FLAG_ENTRY ("svnapot", x_riscv_sv_subext, MASK_SVNAPOT), - RISCV_EXT_FLAG_ENTRY ("svvptc", x_riscv_sv_subext, MASK_SVVPTC), + RISCV_EXT_FLAG_ENTRY ("svade", x_riscv_sv_subext, MASK_SVADE), + RISCV_EXT_FLAG_ENTRY ("svadu", x_riscv_sv_subext, MASK_SVADU), + RISCV_EXT_FLAG_ENTRY ("svinval", x_riscv_sv_subext, MASK_SVINVAL), + RISCV_EXT_FLAG_ENTRY ("svnapot", x_riscv_sv_subext, MASK_SVNAPOT), + RISCV_EXT_FLAG_ENTRY ("svvptc", x_riscv_sv_subext, MASK_SVVPTC), RISCV_EXT_FLAG_ENTRY ("ztso", x_riscv_ztso_subext, MASK_ZTSO), diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h index 1ca86c9..c935e7b 100644 --- a/gcc/config/aarch64/aarch64-protos.h +++ b/gcc/config/aarch64/aarch64-protos.h @@ -1026,6 +1026,8 @@ rtx aarch64_ptrue_reg (machine_mode, unsigned int); rtx aarch64_ptrue_reg (machine_mode, machine_mode); rtx aarch64_pfalse_reg (machine_mode); bool aarch64_sve_same_pred_for_ptest_p (rtx *, rtx *); +void aarch64_emit_load_store_through_mode (rtx, rtx, machine_mode); +bool aarch64_expand_maskloadstore (rtx *, machine_mode); void aarch64_emit_sve_pred_move (rtx, rtx, rtx); void aarch64_expand_sve_mem_move (rtx, rtx, machine_mode); bool aarch64_maybe_expand_sve_subreg_move (rtx, rtx); @@ -1053,6 +1055,7 @@ void aarch64_subvti_scratch_regs (rtx, rtx, rtx *, rtx *, rtx *, rtx *); void aarch64_expand_subvti (rtx, rtx, rtx, rtx, rtx, rtx, rtx, bool); +int aarch64_exact_log2_inverse (unsigned int, rtx); /* Initialize builtins for SIMD intrinsics. */ diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md index e2afe87..1099e74 100644 --- a/gcc/config/aarch64/aarch64-simd.md +++ b/gcc/config/aarch64/aarch64-simd.md @@ -1193,12 +1193,14 @@ (define_insn "aarch64_simd_vec_set_zero<mode>" [(set (match_operand:VALL_F16 0 "register_operand" "=w") (vec_merge:VALL_F16 - (match_operand:VALL_F16 1 "aarch64_simd_imm_zero" "") - (match_operand:VALL_F16 3 "register_operand" "0") + (match_operand:VALL_F16 1 "register_operand" "0") + (match_operand:VALL_F16 3 "aarch64_simd_imm_zero" "") (match_operand:SI 2 "immediate_operand" "i")))] - "TARGET_SIMD && exact_log2 (INTVAL (operands[2])) >= 0" + "TARGET_SIMD && aarch64_exact_log2_inverse (<nunits>, operands[2]) >= 0" { - int elt = ENDIAN_LANE_N (<nunits>, exact_log2 (INTVAL (operands[2]))); + int elt = ENDIAN_LANE_N (<nunits>, + aarch64_exact_log2_inverse (<nunits>, + operands[2])); operands[2] = GEN_INT ((HOST_WIDE_INT) 1 << elt); return "ins\\t%0.<Vetype>[%p2], <vwcore>zr"; } diff --git a/gcc/config/aarch64/aarch64-sve.md b/gcc/config/aarch64/aarch64-sve.md index 7bf12ff..f39af6e 100644 --- a/gcc/config/aarch64/aarch64-sve.md +++ b/gcc/config/aarch64/aarch64-sve.md @@ -1286,7 +1286,24 @@ ;; ------------------------------------------------------------------------- ;; Predicated LD1 (single). -(define_insn "maskload<mode><vpred>" +(define_expand "maskload<mode><vpred>" + [(set (match_operand:SVE_ALL 0 "register_operand") + (unspec:SVE_ALL + [(match_operand:<VPRED> 2 "nonmemory_operand") + (match_operand:SVE_ALL 1 "memory_operand") + (match_operand:SVE_ALL 3 "aarch64_maskload_else_operand")] + UNSPEC_LD1_SVE))] + "TARGET_SVE" + { + if (aarch64_expand_maskloadstore (operands, <MODE>mode)) + DONE; + if (CONSTANT_P (operands[2])) + operands[2] = force_reg (<VPRED>mode, operands[2]); + } +) + +;; Predicated LD1 (single). +(define_insn "*aarch64_maskload<mode><vpred>" [(set (match_operand:SVE_ALL 0 "register_operand" "=w") (unspec:SVE_ALL [(match_operand:<VPRED> 2 "register_operand" "Upl") @@ -2287,7 +2304,24 @@ ;; ------------------------------------------------------------------------- ;; Predicated ST1 (single). -(define_insn "maskstore<mode><vpred>" +(define_expand "maskstore<mode><vpred>" + [(set (match_operand:SVE_ALL 0 "memory_operand") + (unspec:SVE_ALL + [(match_operand:<VPRED> 2 "nonmemory_operand") + (match_operand:SVE_ALL 1 "register_operand") + (match_dup 0)] + UNSPEC_ST1_SVE))] + "TARGET_SVE" + { + if (aarch64_expand_maskloadstore (operands, <MODE>mode)) + DONE; + if (CONSTANT_P (operands[2])) + operands[2] = force_reg (<VPRED>mode, operands[2]); + } +) + +;; Predicated ST1 (single). +(define_insn "*aarch64_maskstore<mode><vpred>" [(set (match_operand:SVE_ALL 0 "memory_operand" "+m") (unspec:SVE_ALL [(match_operand:<VPRED> 2 "register_operand" "Upl") diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc index fff8d9d..9e3f288 100644 --- a/gcc/config/aarch64/aarch64.cc +++ b/gcc/config/aarch64/aarch64.cc @@ -3667,6 +3667,14 @@ aarch64_partial_ptrue_length (rtx_vector_builder &builder, if (builder.nelts_per_pattern () == 3) return 0; + /* It is conservatively correct to drop the element size to a lower value, + and we must do so if the predicate consists of a leading "foreground" + sequence that is smaller than the element size. Without this, + we would test only one bit and so treat everything as either an + all-true or an all-false predicate. */ + if (builder.nelts_per_pattern () == 2) + elt_size = MIN (elt_size, builder.npatterns ()); + /* Skip over leading set bits. */ unsigned int nelts = builder.encoded_nelts (); unsigned int i = 0; @@ -3698,6 +3706,24 @@ aarch64_partial_ptrue_length (rtx_vector_builder &builder, return vl; } +/* Return: + + * -1 if all bits of PRED are set + * N if PRED has N leading set bits followed by all clear bits + * 0 if PRED does not have any of these forms. */ + +int +aarch64_partial_ptrue_length (rtx pred) +{ + rtx_vector_builder builder; + if (!aarch64_get_sve_pred_bits (builder, pred)) + return 0; + + auto elt_size = vector_element_size (GET_MODE_BITSIZE (GET_MODE (pred)), + GET_MODE_NUNITS (GET_MODE (pred))); + return aarch64_partial_ptrue_length (builder, elt_size); +} + /* See if there is an svpattern that encodes an SVE predicate of mode PRED_MODE in which the first VL bits are set and the rest are clear. Return the pattern if so, otherwise return AARCH64_NUM_SVPATTERNS. @@ -6410,8 +6436,32 @@ aarch64_stack_protect_canary_mem (machine_mode mode, rtx decl_rtl, return gen_rtx_MEM (mode, force_reg (Pmode, addr)); } -/* Emit an SVE predicated move from SRC to DEST. PRED is a predicate - that is known to contain PTRUE. */ +/* Emit a load/store from a subreg of SRC to a subreg of DEST. + The subregs have mode NEW_MODE. Use only for reg<->mem moves. */ +void +aarch64_emit_load_store_through_mode (rtx dest, rtx src, machine_mode new_mode) +{ + gcc_assert ((MEM_P (dest) && register_operand (src, VOIDmode)) + || (MEM_P (src) && register_operand (dest, VOIDmode))); + auto mode = GET_MODE (dest); + auto int_mode = aarch64_sve_int_mode (mode); + if (MEM_P (src)) + { + rtx tmp = force_reg (new_mode, adjust_address (src, new_mode, 0)); + tmp = force_lowpart_subreg (int_mode, tmp, new_mode); + emit_move_insn (dest, force_lowpart_subreg (mode, tmp, int_mode)); + } + else + { + src = force_lowpart_subreg (int_mode, src, mode); + emit_move_insn (adjust_address (dest, new_mode, 0), + force_lowpart_subreg (new_mode, src, int_mode)); + } +} + +/* PRED is a predicate that is known to contain PTRUE. + For 128-bit VLS loads/stores, emit LDR/STR. + Else, emit an SVE predicated move from SRC to DEST. */ void aarch64_emit_sve_pred_move (rtx dest, rtx pred, rtx src) @@ -6421,16 +6471,7 @@ aarch64_emit_sve_pred_move (rtx dest, rtx pred, rtx src) && known_eq (GET_MODE_SIZE (mode), 16) && aarch64_classify_vector_mode (mode) == VEC_SVE_DATA && !BYTES_BIG_ENDIAN) - { - if (MEM_P (src)) - { - rtx tmp = force_reg (V16QImode, adjust_address (src, V16QImode, 0)); - emit_move_insn (dest, lowpart_subreg (mode, tmp, V16QImode)); - } - else - emit_move_insn (adjust_address (dest, V16QImode, 0), - force_lowpart_subreg (V16QImode, src, mode)); - } + aarch64_emit_load_store_through_mode (dest, src, V16QImode); else { expand_operand ops[3]; @@ -23526,6 +23567,39 @@ aarch64_simd_valid_imm (rtx op, simd_immediate_info *info, return false; } +/* Try to optimize the expansion of a maskload or maskstore with + the operands in OPERANDS, given that the vector being loaded or + stored has mode MODE. Return true on success or false if the normal + expansion should be used. */ + +bool +aarch64_expand_maskloadstore (rtx *operands, machine_mode mode) +{ + /* If the predicate in operands[2] is a patterned SVE PTRUE predicate + with patterns VL1, VL2, VL4, VL8, or VL16 and at most the bottom + 128 bits are loaded/stored, emit an ASIMD load/store. */ + int vl = aarch64_partial_ptrue_length (operands[2]); + int width = vl * GET_MODE_UNIT_BITSIZE (mode); + if (width <= 128 + && pow2p_hwi (vl) + && (vl == 1 + || (!BYTES_BIG_ENDIAN + && aarch64_classify_vector_mode (mode) == VEC_SVE_DATA))) + { + machine_mode new_mode; + if (known_eq (width, 128)) + new_mode = V16QImode; + else if (known_eq (width, 64)) + new_mode = V8QImode; + else + new_mode = int_mode_for_size (width, 0).require (); + aarch64_emit_load_store_through_mode (operands[0], operands[1], + new_mode); + return true; + } + return false; +} + /* Return true if OP is a valid SIMD move immediate for SVE or AdvSIMD. */ bool aarch64_simd_valid_mov_imm (rtx op) @@ -23840,6 +23914,16 @@ aarch64_strided_registers_p (rtx *operands, unsigned int num_operands, return true; } +/* Return the base 2 logarithm of the bit inverse of OP masked by the lowest + NELTS bits, if OP is a power of 2. Otherwise, returns -1. */ + +int +aarch64_exact_log2_inverse (unsigned int nelts, rtx op) +{ + return exact_log2 ((~INTVAL (op)) + & ((HOST_WIDE_INT_1U << nelts) - 1)); +} + /* Bounds-check lanes. Ensure OPERAND lies between LOW (inclusive) and HIGH (exclusive). */ void diff --git a/gcc/config/arm/arm.cc b/gcc/config/arm/arm.cc index 670f487..6bdb68a 100644 --- a/gcc/config/arm/arm.cc +++ b/gcc/config/arm/arm.cc @@ -16211,14 +16211,16 @@ arm_select_cc_mode (enum rtx_code op, rtx x, rtx y) case UNGT: case UNGE: case UNEQ: - case LTGT: return CCFPmode; case LT: case LE: case GT: case GE: - return CCFPEmode; + case LTGT: + return (flag_finite_math_only + ? CCFPmode + : CCFPEmode); default: gcc_unreachable (); diff --git a/gcc/config/arm/arm.h b/gcc/config/arm/arm.h index 8472b75..08d3f0d 100644 --- a/gcc/config/arm/arm.h +++ b/gcc/config/arm/arm.h @@ -2257,7 +2257,11 @@ extern int making_const_table; #define SELECT_CC_MODE(OP, X, Y) arm_select_cc_mode (OP, X, Y) -#define REVERSIBLE_CC_MODE(MODE) 1 +/* Floating-point modes cannot be reversed unless we don't care about + NaNs. */ +#define REVERSIBLE_CC_MODE(MODE) \ + (flag_finite_math_only \ + || !((MODE) == CCFPmode || (MODE) == CCFPEmode)) #define REVERSE_CONDITION(CODE,MODE) \ (((MODE) == CCFPmode || (MODE) == CCFPEmode) \ diff --git a/gcc/config/i386/i386-features.cc b/gcc/config/i386/i386-features.cc index 31f3ee2..1ba5ac4 100644 --- a/gcc/config/i386/i386-features.cc +++ b/gcc/config/i386/i386-features.cc @@ -3309,8 +3309,16 @@ ix86_get_vector_load_mode (unsigned int size) mode = V64QImode; else if (size == 32) mode = V32QImode; - else + else if (size == 16) mode = V16QImode; + else if (size == 8) + mode = V8QImode; + else if (size == 4) + mode = V4QImode; + else if (size == 2) + mode = V2QImode; + else + gcc_unreachable (); return mode; } @@ -3338,13 +3346,36 @@ replace_vector_const (machine_mode vector_mode, rtx vector_const, if (SUBREG_P (dest) || mode == vector_mode) replace = vector_const; else - replace = gen_rtx_SUBREG (mode, vector_const, 0); + { + unsigned int size = GET_MODE_SIZE (mode); + if (size < ix86_regmode_natural_size (mode)) + { + /* If the mode size is smaller than its natural size, + first insert an extra move with a QI vector SUBREG + of the same size to avoid validate_subreg failure. */ + machine_mode vmode = ix86_get_vector_load_mode (size); + rtx vreg; + if (mode == vmode) + vreg = vector_const; + else + { + vreg = gen_reg_rtx (vmode); + rtx vsubreg = gen_rtx_SUBREG (vmode, vector_const, 0); + rtx pat = gen_rtx_SET (vreg, vsubreg); + rtx_insn *vinsn = emit_insn_before (pat, insn); + df_insn_rescan (vinsn); + } + replace = gen_rtx_SUBREG (mode, vreg, 0); + } + else + replace = gen_rtx_SUBREG (mode, vector_const, 0); + } - /* NB: Don't run recog_memoized here since vector SUBREG may not - be valid. Let LRA handle vector SUBREG. */ SET_SRC (set) = replace; /* Drop possible dead definitions. */ PATTERN (insn) = set; + INSN_CODE (insn) = -1; + recog_memoized (insn); df_insn_rescan (insn); } } diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc index 89f518c..fd36ea8 100644 --- a/gcc/config/i386/i386.cc +++ b/gcc/config/i386/i386.cc @@ -22794,6 +22794,27 @@ ix86_rtx_costs (rtx x, machine_mode mode, int outer_code_i, int opno, else *total = vec_fp_conversion_cost (cost, GET_MODE_BITSIZE (mode)); return false; + case FLOAT: + case UNSIGNED_FLOAT: + if (!SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode)) + /* TODO: We do not have cost tables for x87. */ + *total = cost->fadd; + else if (VECTOR_MODE_P (mode)) + *total = ix86_vec_cost (mode, cost->cvtpi2ps); + else + *total = cost->cvtsi2ss; + return false; + + case FIX: + case UNSIGNED_FIX: + if (!SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode)) + /* TODO: We do not have cost tables for x87. */ + *total = cost->fadd; + else if (VECTOR_MODE_P (mode)) + *total = ix86_vec_cost (mode, cost->cvtps2pi); + else + *total = cost->cvtss2si; + return false; case ABS: /* SSE requires memory load for the constant operand. It may make @@ -25746,6 +25767,26 @@ ix86_vector_costs::add_stmt_cost (int count, vect_cost_for_stmt kind, (ix86_tune_cost, GET_MODE_BITSIZE (mode)); break; + case FLOAT_EXPR: + if (SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode)) + stmt_cost = ix86_cost->cvtsi2ss; + else if (X87_FLOAT_MODE_P (mode)) + /* TODO: We do not have cost tables for x87. */ + stmt_cost = ix86_cost->fadd; + else + stmt_cost = ix86_vec_cost (mode, ix86_cost->cvtpi2ps); + break; + + case FIX_TRUNC_EXPR: + if (SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode)) + stmt_cost = ix86_cost->cvtss2si; + else if (X87_FLOAT_MODE_P (mode)) + /* TODO: We do not have cost tables for x87. */ + stmt_cost = ix86_cost->fadd; + else + stmt_cost = ix86_vec_cost (mode, ix86_cost->cvtps2pi); + break; + case COND_EXPR: { /* SSE2 conditinal move sequence is: @@ -25909,8 +25950,7 @@ ix86_vector_costs::add_stmt_cost (int count, vect_cost_for_stmt kind, break; } - if (kind == vec_promote_demote - && fp && FLOAT_TYPE_P (TREE_TYPE (gimple_assign_rhs1 (stmt_info->stmt)))) + if (kind == vec_promote_demote) { int outer_size = tree_to_uhwi @@ -25920,16 +25960,25 @@ ix86_vector_costs::add_stmt_cost (int count, vect_cost_for_stmt kind, = tree_to_uhwi (TYPE_SIZE (TREE_TYPE (gimple_assign_rhs1 (stmt_info->stmt)))); - int stmt_cost = vec_fp_conversion_cost - (ix86_tune_cost, GET_MODE_BITSIZE (mode)); - /* VEC_PACK_TRUNC_EXPR: If inner size is greater than outer size we will end - up doing two conversions and packing them. */ + bool inner_fp = FLOAT_TYPE_P + (TREE_TYPE (gimple_assign_rhs1 (stmt_info->stmt))); + + if (fp && inner_fp) + stmt_cost = vec_fp_conversion_cost + (ix86_tune_cost, GET_MODE_BITSIZE (mode)); + else if (fp && !inner_fp) + stmt_cost = ix86_vec_cost (mode, ix86_cost->cvtpi2ps); + else if (!fp && inner_fp) + stmt_cost = ix86_vec_cost (mode, ix86_cost->cvtps2pi); + else + stmt_cost = ix86_vec_cost (mode, ix86_cost->sse_op); + /* VEC_PACK_TRUNC_EXPR and similar demote operations: If outer size is + greater than inner size we will end up doing two conversions and + packing them. We always pack pairs; if the size difference is greater + it is split into multiple demote operations. */ if (inner_size > outer_size) - { - int n = inner_size / outer_size; - stmt_cost = stmt_cost * n - + (n - 1) * ix86_vec_cost (mode, ix86_cost->sse_op); - } + stmt_cost = stmt_cost * 2 + + ix86_vec_cost (mode, ix86_cost->sse_op); } /* If we do elementwise loads into a vector then we are bound by diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h index 02bf357..6a38de3 100644 --- a/gcc/config/i386/i386.h +++ b/gcc/config/i386/i386.h @@ -213,6 +213,10 @@ struct processor_costs { such as VCVTPD2PS with larger reg in ymm. */ const int vcvtps2pd512; /* cost 512bit packed FP conversions, such as VCVTPD2PS with larger reg in zmm. */ + const int cvtsi2ss; /* cost of CVTSI2SS instruction. */ + const int cvtss2si; /* cost of CVT(T)SS2SI instruction. */ + const int cvtpi2ps; /* cost of CVTPI2PS instruction. */ + const int cvtps2pi; /* cost of CVT(T)PS2PI instruction. */ const int reassoc_int, reassoc_fp, reassoc_vec_int, reassoc_vec_fp; /* Specify reassociation width for integer, fp, vector integer and vector fp diff --git a/gcc/config/i386/x86-tune-costs.h b/gcc/config/i386/x86-tune-costs.h index cddcf61..6cce70a 100644 --- a/gcc/config/i386/x86-tune-costs.h +++ b/gcc/config/i386/x86-tune-costs.h @@ -134,6 +134,11 @@ struct processor_costs ix86_size_cost = {/* costs for tuning for size */ COSTS_N_BYTES (4), /* cost of CVTSS2SD etc. */ COSTS_N_BYTES (4), /* cost of 256bit VCVTPS2PD etc. */ COSTS_N_BYTES (6), /* cost of 512bit VCVTPS2PD etc. */ + COSTS_N_BYTES (4), /* cost of CVTSI2SS instruction. */ + COSTS_N_BYTES (4), /* cost of CVT(T)SS2SI instruction. */ + COSTS_N_BYTES (4), /* cost of CVTPI2PS instruction. */ + COSTS_N_BYTES (4), /* cost of CVT(T)PS2PI instruction. */ + 1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */ ix86_size_memcpy, ix86_size_memset, @@ -249,6 +254,10 @@ struct processor_costs i386_cost = { /* 386 specific costs */ COSTS_N_INSNS (27), /* cost of CVTSS2SD etc. */ COSTS_N_INSNS (54), /* cost of 256bit VCVTPS2PD etc. */ COSTS_N_INSNS (108), /* cost of 512bit VCVTPS2PD etc. */ + COSTS_N_INSNS (27), /* cost of CVTSI2SS instruction. */ + COSTS_N_INSNS (27), /* cost of CVT(T)SS2SI instruction. */ + COSTS_N_INSNS (27), /* cost of CVTPI2PS instruction. */ + COSTS_N_INSNS (27), /* cost of CVT(T)PS2PI instruction. */ 1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */ i386_memcpy, i386_memset, @@ -365,6 +374,10 @@ struct processor_costs i486_cost = { /* 486 specific costs */ COSTS_N_INSNS (8), /* cost of CVTSS2SD etc. */ COSTS_N_INSNS (16), /* cost of 256bit VCVTPS2PD etc. */ COSTS_N_INSNS (32), /* cost of 512bit VCVTPS2PD etc. */ + COSTS_N_INSNS (27), /* cost of CVTSI2SS instruction. */ + COSTS_N_INSNS (27), /* cost of CVT(T)SS2SI instruction. */ + COSTS_N_INSNS (27), /* cost of CVTPI2PS instruction. */ + COSTS_N_INSNS (27), /* cost of CVT(T)PS2PI instruction. */ 1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */ i486_memcpy, i486_memset, @@ -479,6 +492,10 @@ struct processor_costs pentium_cost = { COSTS_N_INSNS (3), /* cost of CVTSS2SD etc. */ COSTS_N_INSNS (6), /* cost of 256bit VCVTPS2PD etc. */ COSTS_N_INSNS (12), /* cost of 512bit VCVTPS2PD etc. */ + COSTS_N_INSNS (3), /* cost of CVTSI2SS instruction. */ + COSTS_N_INSNS (3), /* cost of CVT(T)SS2SI instruction. */ + COSTS_N_INSNS (3), /* cost of CVTPI2PS instruction. */ + COSTS_N_INSNS (3), /* cost of CVT(T)PS2PI instruction. */ 1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */ pentium_memcpy, pentium_memset, @@ -586,6 +603,10 @@ struct processor_costs lakemont_cost = { COSTS_N_INSNS (5), /* cost of CVTSS2SD etc. */ COSTS_N_INSNS (10), /* cost of 256bit VCVTPS2PD etc. */ COSTS_N_INSNS (20), /* cost of 512bit VCVTPS2PD etc. */ + COSTS_N_INSNS (5), /* cost of CVTSI2SS instruction. */ + COSTS_N_INSNS (5), /* cost of CVT(T)SS2SI instruction. */ + COSTS_N_INSNS (5), /* cost of CVTPI2PS instruction. */ + COSTS_N_INSNS (5), /* cost of CVT(T)PS2PI instruction. */ 1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */ pentium_memcpy, pentium_memset, @@ -708,6 +729,10 @@ struct processor_costs pentiumpro_cost = { COSTS_N_INSNS (3), /* cost of CVTSS2SD etc. */ COSTS_N_INSNS (6), /* cost of 256bit VCVTPS2PD etc. */ COSTS_N_INSNS (12), /* cost of 512bit VCVTPS2PD etc. */ + COSTS_N_INSNS (3), /* cost of CVTSI2SS instruction. */ + COSTS_N_INSNS (3), /* cost of CVT(T)SS2SI instruction. */ + COSTS_N_INSNS (3), /* cost of CVTPI2PS instruction. */ + COSTS_N_INSNS (3), /* cost of CVT(T)PS2PI instruction. */ 1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */ pentiumpro_memcpy, pentiumpro_memset, @@ -821,6 +846,10 @@ struct processor_costs geode_cost = { COSTS_N_INSNS (6), /* cost of CVTSS2SD etc. */ COSTS_N_INSNS (12), /* cost of 256bit VCVTPS2PD etc. */ COSTS_N_INSNS (24), /* cost of 512bit VCVTPS2PD etc. */ + COSTS_N_INSNS (6), /* cost of CVTSI2SS instruction. */ + COSTS_N_INSNS (6), /* cost of CVT(T)SS2SI instruction. */ + COSTS_N_INSNS (6), /* cost of CVTPI2PS instruction. */ + COSTS_N_INSNS (6), /* cost of CVT(T)PS2PI instruction. */ 1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */ geode_memcpy, geode_memset, @@ -937,6 +966,10 @@ struct processor_costs k6_cost = { COSTS_N_INSNS (2), /* cost of CVTSS2SD etc. */ COSTS_N_INSNS (4), /* cost of 256bit VCVTPS2PD etc. */ COSTS_N_INSNS (8), /* cost of 512bit VCVTPS2PD etc. */ + COSTS_N_INSNS (2), /* cost of CVTSI2SS instruction. */ + COSTS_N_INSNS (2), /* cost of CVT(T)SS2SI instruction. */ + COSTS_N_INSNS (2), /* cost of CVTPI2PS instruction. */ + COSTS_N_INSNS (2), /* cost of CVT(T)PS2PI instruction. */ 1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */ k6_memcpy, k6_memset, @@ -1054,6 +1087,10 @@ struct processor_costs athlon_cost = { COSTS_N_INSNS (4), /* cost of CVTSS2SD etc. */ COSTS_N_INSNS (8), /* cost of 256bit VCVTPS2PD etc. */ COSTS_N_INSNS (16), /* cost of 512bit VCVTPS2PD etc. */ + COSTS_N_INSNS (4), /* cost of CVTSI2SS instruction. */ + COSTS_N_INSNS (6), /* cost of CVT(T)SS2SI instruction. */ + COSTS_N_INSNS (4), /* cost of CVTPI2PS instruction. */ + COSTS_N_INSNS (6), /* cost of CVT(T)PS2PI instruction. */ 1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */ athlon_memcpy, athlon_memset, @@ -1180,6 +1217,10 @@ struct processor_costs k8_cost = { COSTS_N_INSNS (4), /* cost of CVTSS2SD etc. */ COSTS_N_INSNS (8), /* cost of 256bit VCVTPS2PD etc. */ COSTS_N_INSNS (16), /* cost of 512bit VCVTPS2PD etc. */ + COSTS_N_INSNS (14), /* cost of CVTSI2SS instruction. */ + COSTS_N_INSNS (10), /* cost of CVT(T)SS2SI instruction. */ + COSTS_N_INSNS (4), /* cost of CVTPI2PS instruction. */ + COSTS_N_INSNS (5), /* cost of CVT(T)PS2PI instruction. */ 1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */ k8_memcpy, k8_memset, @@ -1314,6 +1355,10 @@ struct processor_costs amdfam10_cost = { COSTS_N_INSNS (4), /* cost of CVTSS2SD etc. */ COSTS_N_INSNS (8), /* cost of 256bit VCVTPS2PD etc. */ COSTS_N_INSNS (16), /* cost of 512bit VCVTPS2PD etc. */ + COSTS_N_INSNS (14), /* cost of CVTSI2SS instruction. */ + COSTS_N_INSNS (8), /* cost of CVT(T)SS2SI instruction. */ + COSTS_N_INSNS (7), /* cost of CVTPI2PS instruction. */ + COSTS_N_INSNS (4), /* cost of CVT(T)PS2PI instruction. */ 1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */ amdfam10_memcpy, amdfam10_memset, @@ -1441,6 +1486,10 @@ const struct processor_costs bdver_cost = { COSTS_N_INSNS (4), /* cost of CVTSS2SD etc. */ COSTS_N_INSNS (7), /* cost of 256bit VCVTPS2PD etc. */ COSTS_N_INSNS (14), /* cost of 512bit VCVTPS2PD etc. */ + COSTS_N_INSNS (14), /* cost of CVTSI2SS instruction. */ + COSTS_N_INSNS (13), /* cost of CVT(T)SS2SI instruction. */ + COSTS_N_INSNS (4), /* cost of CVTPI2PS instruction. */ + COSTS_N_INSNS (4), /* cost of CVT(T)PS2PI instruction. */ 1, 2, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */ bdver_memcpy, bdver_memset, @@ -1593,6 +1642,10 @@ struct processor_costs znver1_cost = { /* Real latency is 4, but for split regs multiply cost of half op by 2. */ COSTS_N_INSNS (6), /* cost of 256bit VCVTPS2PD etc. */ COSTS_N_INSNS (12), /* cost of 512bit VCVTPS2PD etc. */ + COSTS_N_INSNS (8), /* cost of CVTSI2SS instruction. */ + COSTS_N_INSNS (7), /* cost of CVT(T)SS2SI instruction. */ + COSTS_N_INSNS (6), /* cost of CVTPI2PS instruction. */ + COSTS_N_INSNS (6), /* cost of CVT(T)PS2PI instruction. */ /* Zen can execute 4 integer operations per cycle. FP operations take 3 cycles and it can execute 2 integer additions and 2 multiplications thus reassociation may make sense up to with of 6. SPEC2k6 bencharks suggests @@ -1755,6 +1808,10 @@ struct processor_costs znver2_cost = { COSTS_N_INSNS (3), /* cost of CVTSS2SD etc. */ COSTS_N_INSNS (5), /* cost of 256bit VCVTPS2PD etc. */ COSTS_N_INSNS (10), /* cost of 512bit VCVTPS2PD etc. */ + COSTS_N_INSNS (7), /* cost of CVTSI2SS instruction. */ + COSTS_N_INSNS (6), /* cost of CVT(T)SS2SI instruction. */ + COSTS_N_INSNS (6), /* cost of CVTPI2PS instruction. */ + COSTS_N_INSNS (3), /* cost of CVT(T)PS2PI instruction. */ /* Zen can execute 4 integer operations per cycle. FP operations take 3 cycles and it can execute 2 integer additions and 2 multiplications thus reassociation may make sense up to with of 6. @@ -1893,6 +1950,10 @@ struct processor_costs znver3_cost = { COSTS_N_INSNS (3), /* cost of CVTSS2SD etc. */ COSTS_N_INSNS (5), /* cost of 256bit VCVTPS2PD etc. */ COSTS_N_INSNS (10), /* cost of 512bit VCVTPS2PD etc. */ + COSTS_N_INSNS (6), /* cost of CVTSI2SS instruction. */ + COSTS_N_INSNS (6), /* cost of CVT(T)SS2SI instruction. */ + COSTS_N_INSNS (3), /* cost of CVTPI2PS instruction. */ + COSTS_N_INSNS (4), /* cost of CVT(T)PS2PI instruction. */ /* Zen can execute 4 integer operations per cycle. FP operations take 3 cycles and it can execute 2 integer additions and 2 multiplications thus reassociation may make sense up to with of 6. @@ -2034,6 +2095,10 @@ struct processor_costs znver4_cost = { COSTS_N_INSNS (5), /* cost of 256bit VCVTPS2PD etc. */ /* Real latency is 6, but for split regs multiply cost of half op by 2. */ COSTS_N_INSNS (10), /* cost of 512bit VCVTPS2PD etc. */ + COSTS_N_INSNS (6), /* cost of CVTSI2SS instruction. */ + COSTS_N_INSNS (6), /* cost of CVT(T)SS2SI instruction. */ + COSTS_N_INSNS (3), /* cost of CVTPI2PS instruction. */ + COSTS_N_INSNS (4), /* cost of CVT(T)PS2PI instruction. */ /* Zen can execute 4 integer operations per cycle. FP operations take 3 cycles and it can execute 2 integer additions and 2 multiplications thus reassociation may make sense up to with of 6. @@ -2188,6 +2253,10 @@ struct processor_costs znver5_cost = { COSTS_N_INSNS (3), /* cost of CVTSS2SD etc. */ COSTS_N_INSNS (5), /* cost of 256bit VCVTPS2PD etc. */ COSTS_N_INSNS (5), /* cost of 512bit VCVTPS2PD etc. */ + COSTS_N_INSNS (6), /* cost of CVTSI2SS instruction. */ + COSTS_N_INSNS (6), /* cost of CVT(T)SS2SI instruction. */ + COSTS_N_INSNS (3), /* cost of CVTPI2PS instruction. */ + COSTS_N_INSNS (4), /* cost of CVT(T)PS2PI instruction. */ /* Zen5 can execute: - integer ops: 6 per cycle, at most 3 multiplications. latency 1 for additions, 3 for multiplications (pipelined) @@ -2330,6 +2399,10 @@ struct processor_costs skylake_cost = { COSTS_N_INSNS (2), /* cost of CVTSS2SD etc. */ COSTS_N_INSNS (2), /* cost of 256bit VCVTPS2PD etc. */ COSTS_N_INSNS (4), /* cost of 512bit VCVTPS2PD etc. */ + COSTS_N_INSNS (6), /* cost of CVTSI2SS instruction. */ + COSTS_N_INSNS (6), /* cost of CVT(T)SS2SI instruction. */ + COSTS_N_INSNS (6), /* cost of CVTPI2PS instruction. */ + COSTS_N_INSNS (7), /* cost of CVT(T)PS2PI instruction. */ 1, 4, 2, 2, /* reassoc int, fp, vec_int, vec_fp. */ skylake_memcpy, skylake_memset, @@ -2462,6 +2535,10 @@ struct processor_costs icelake_cost = { COSTS_N_INSNS (2), /* cost of CVTSS2SD etc. */ COSTS_N_INSNS (2), /* cost of 256bit VCVTPS2PD etc. */ COSTS_N_INSNS (2), /* cost of 512bit VCVTPS2PD etc. */ + COSTS_N_INSNS (6), /* cost of CVTSI2SS instruction. */ + COSTS_N_INSNS (6), /* cost of CVT(T)SS2SI instruction. */ + COSTS_N_INSNS (7), /* cost of CVTPI2PS instruction. */ + COSTS_N_INSNS (6), /* cost of CVT(T)PS2PI instruction. */ 1, 4, 2, 2, /* reassoc int, fp, vec_int, vec_fp. */ icelake_memcpy, icelake_memset, @@ -2588,6 +2665,10 @@ struct processor_costs alderlake_cost = { COSTS_N_INSNS (2), /* cost of CVTSS2SD etc. */ COSTS_N_INSNS (2), /* cost of 256bit VCVTPS2PD etc. */ COSTS_N_INSNS (2), /* cost of 512bit VCVTPS2PD etc. */ + COSTS_N_INSNS (6), /* cost of CVTSI2SS instruction. */ + COSTS_N_INSNS (6), /* cost of CVT(T)SS2SI instruction. */ + COSTS_N_INSNS (7), /* cost of CVTPI2PS instruction. */ + COSTS_N_INSNS (6), /* cost of CVT(T)PS2PI instruction. */ 1, 4, 3, 3, /* reassoc int, fp, vec_int, vec_fp. */ alderlake_memcpy, alderlake_memset, @@ -2707,6 +2788,10 @@ const struct processor_costs btver1_cost = { COSTS_N_INSNS (4), /* cost of CVTSS2SD etc. */ COSTS_N_INSNS (7), /* cost of 256bit VCVTPS2PD etc. */ COSTS_N_INSNS (14), /* cost of 512bit VCVTPS2PD etc. */ + COSTS_N_INSNS (14), /* cost of CVTSI2SS instruction. */ + COSTS_N_INSNS (13), /* cost of CVT(T)SS2SI instruction. */ + COSTS_N_INSNS (4), /* cost of CVTPI2PS instruction. */ + COSTS_N_INSNS (4), /* cost of CVT(T)PS2PI instruction. */ 1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */ btver1_memcpy, btver1_memset, @@ -2823,6 +2908,10 @@ const struct processor_costs btver2_cost = { COSTS_N_INSNS (4), /* cost of CVTSS2SD etc. */ COSTS_N_INSNS (7), /* cost of 256bit VCVTPS2PD etc. */ COSTS_N_INSNS (14), /* cost of 512bit VCVTPS2PD etc. */ + COSTS_N_INSNS (14), /* cost of CVTSI2SS instruction. */ + COSTS_N_INSNS (13), /* cost of CVT(T)SS2SI instruction. */ + COSTS_N_INSNS (4), /* cost of CVTPI2PS instruction. */ + COSTS_N_INSNS (4), /* cost of CVT(T)PS2PI instruction. */ 1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */ btver2_memcpy, btver2_memset, @@ -2938,6 +3027,10 @@ struct processor_costs pentium4_cost = { COSTS_N_INSNS (10), /* cost of CVTSS2SD etc. */ COSTS_N_INSNS (20), /* cost of 256bit VCVTPS2PD etc. */ COSTS_N_INSNS (40), /* cost of 512bit VCVTPS2PD etc. */ + COSTS_N_INSNS (20), /* cost of CVTSI2SS instruction. */ + COSTS_N_INSNS (17), /* cost of CVT(T)SS2SI instruction. */ + COSTS_N_INSNS (12), /* cost of CVTPI2PS instruction. */ + COSTS_N_INSNS (8), /* cost of CVT(T)PS2PI instruction. */ 1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */ pentium4_memcpy, pentium4_memset, @@ -3056,6 +3149,10 @@ struct processor_costs nocona_cost = { COSTS_N_INSNS (10), /* cost of CVTSS2SD etc. */ COSTS_N_INSNS (20), /* cost of 256bit VCVTPS2PD etc. */ COSTS_N_INSNS (40), /* cost of 512bit VCVTPS2PD etc. */ + COSTS_N_INSNS (20), /* cost of CVTSI2SS instruction. */ + COSTS_N_INSNS (17), /* cost of CVT(T)SS2SI instruction. */ + COSTS_N_INSNS (12), /* cost of CVTPI2PS instruction. */ + COSTS_N_INSNS (8), /* cost of CVT(T)PS2PI instruction. */ 1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */ nocona_memcpy, nocona_memset, @@ -3172,6 +3269,10 @@ struct processor_costs atom_cost = { COSTS_N_INSNS (6), /* cost of CVTSS2SD etc. */ COSTS_N_INSNS (12), /* cost of 256bit VCVTPS2PD etc. */ COSTS_N_INSNS (24), /* cost of 512bit VCVTPS2PD etc. */ + COSTS_N_INSNS (7), /* cost of CVTSI2SS instruction. */ + COSTS_N_INSNS (10), /* cost of CVT(T)SS2SI instruction. */ + COSTS_N_INSNS (6), /* cost of CVTPI2PS instruction. */ + COSTS_N_INSNS (4), /* cost of CVT(T)PS2PI instruction. */ 2, 2, 2, 2, /* reassoc int, fp, vec_int, vec_fp. */ atom_memcpy, atom_memset, @@ -3288,6 +3389,10 @@ struct processor_costs slm_cost = { COSTS_N_INSNS (3), /* cost of CVTSS2SD etc. */ COSTS_N_INSNS (6), /* cost of 256bit VCVTPS2PD etc. */ COSTS_N_INSNS (12), /* cost of 512bit VCVTPS2PD etc. */ + COSTS_N_INSNS (5), /* cost of CVTSI2SS instruction. */ + COSTS_N_INSNS (5), /* cost of CVT(T)SS2SI instruction. */ + COSTS_N_INSNS (4), /* cost of CVTPI2PS instruction. */ + COSTS_N_INSNS (4), /* cost of CVT(T)PS2PI instruction. */ 1, 2, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */ slm_memcpy, slm_memset, @@ -3418,6 +3523,10 @@ struct processor_costs tremont_cost = { COSTS_N_INSNS (3), /* cost of CVTSS2SD etc. */ COSTS_N_INSNS (6), /* cost of 256bit VCVTPS2PD etc. */ COSTS_N_INSNS (12), /* cost of 512bit VCVTPS2PD etc. */ + COSTS_N_INSNS (6), /* cost of CVTSI2SS instruction. */ + COSTS_N_INSNS (6), /* cost of CVT(T)SS2SI instruction. */ + COSTS_N_INSNS (4), /* cost of CVTPI2PS instruction. */ + COSTS_N_INSNS (4), /* cost of CVT(T)PS2PI instruction. */ 1, 4, 3, 3, /* reassoc int, fp, vec_int, vec_fp. */ tremont_memcpy, tremont_memset, @@ -3534,6 +3643,10 @@ struct processor_costs intel_cost = { COSTS_N_INSNS (8), /* cost of CVTSS2SD etc. */ COSTS_N_INSNS (16), /* cost of 256bit VCVTPS2PD etc. */ COSTS_N_INSNS (32), /* cost of 512bit VCVTPS2PD etc. */ + COSTS_N_INSNS (8), /* cost of CVTSI2SS instruction. */ + COSTS_N_INSNS (8), /* cost of CVT(T)SS2SI instruction. */ + COSTS_N_INSNS (8), /* cost of CVTPI2PS instruction. */ + COSTS_N_INSNS (8), /* cost of CVT(T)PS2PI instruction. */ 1, 4, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */ intel_memcpy, intel_memset, @@ -3655,6 +3768,10 @@ struct processor_costs lujiazui_cost = { COSTS_N_INSNS (3), /* cost of CVTSS2SD etc. */ COSTS_N_INSNS (6), /* cost of 256bit VCVTPS2PD etc. */ COSTS_N_INSNS (12), /* cost of 512bit VCVTPS2PD etc. */ + COSTS_N_INSNS (3), /* cost of CVTSI2SS instruction. */ + COSTS_N_INSNS (3), /* cost of CVT(T)SS2SI instruction. */ + COSTS_N_INSNS (3), /* cost of CVTPI2PS instruction. */ + COSTS_N_INSNS (3), /* cost of CVT(T)PS2PI instruction. */ 1, 4, 3, 3, /* reassoc int, fp, vec_int, vec_fp. */ lujiazui_memcpy, lujiazui_memset, @@ -3774,6 +3891,10 @@ struct processor_costs yongfeng_cost = { COSTS_N_INSNS (3), /* cost of CVTSS2SD etc. */ COSTS_N_INSNS (6), /* cost of 256bit VCVTPS2PD etc. */ COSTS_N_INSNS (12), /* cost of 512bit VCVTPS2PD etc. */ + COSTS_N_INSNS (3), /* cost of CVTSI2SS instruction. */ + COSTS_N_INSNS (3), /* cost of CVT(T)SS2SI instruction. */ + COSTS_N_INSNS (3), /* cost of CVTPI2PS instruction. */ + COSTS_N_INSNS (3), /* cost of CVT(T)PS2PI instruction. */ 4, 4, 4, 4, /* reassoc int, fp, vec_int, vec_fp. */ yongfeng_memcpy, yongfeng_memset, @@ -3893,6 +4014,10 @@ struct processor_costs shijidadao_cost = { COSTS_N_INSNS (3), /* cost of CVTSS2SD etc. */ COSTS_N_INSNS (6), /* cost of 256bit VCVTPS2PD etc. */ COSTS_N_INSNS (12), /* cost of 512bit VCVTPS2PD etc. */ + COSTS_N_INSNS (3), /* cost of CVTSI2SS instruction. */ + COSTS_N_INSNS (3), /* cost of CVT(T)SS2SI instruction. */ + COSTS_N_INSNS (3), /* cost of CVTPI2PS instruction. */ + COSTS_N_INSNS (3), /* cost of CVT(T)PS2PI instruction. */ 4, 4, 4, 4, /* reassoc int, fp, vec_int, vec_fp. */ shijidadao_memcpy, shijidadao_memset, @@ -4020,6 +4145,10 @@ struct processor_costs generic_cost = { COSTS_N_INSNS (3), /* cost of CVTSS2SD etc. */ COSTS_N_INSNS (4), /* cost of 256bit VCVTPS2PD etc. */ COSTS_N_INSNS (5), /* cost of 512bit VCVTPS2PD etc. */ + COSTS_N_INSNS (6), /* cost of CVTSI2SS instruction. */ + COSTS_N_INSNS (6), /* cost of CVT(T)SS2SI instruction. */ + COSTS_N_INSNS (3), /* cost of CVTPI2PS instruction. */ + COSTS_N_INSNS (3), /* cost of CVT(T)PS2PI instruction. */ 1, 4, 3, 3, /* reassoc int, fp, vec_int, vec_fp. */ generic_memcpy, generic_memset, @@ -4152,6 +4281,10 @@ struct processor_costs core_cost = { COSTS_N_INSNS (2), /* cost of CVTSS2SD etc. */ COSTS_N_INSNS (2), /* cost of 256bit VCVTPS2PD etc. */ COSTS_N_INSNS (2), /* cost of 512bit VCVTPS2PD etc. */ + COSTS_N_INSNS (6), /* cost of CVTSI2SS instruction. */ + COSTS_N_INSNS (6), /* cost of CVT(T)SS2SI instruction. */ + COSTS_N_INSNS (6), /* cost of CVTPI2PS instruction. */ + COSTS_N_INSNS (7), /* cost of CVT(T)PS2PI instruction. */ 1, 4, 2, 2, /* reassoc int, fp, vec_int, vec_fp. */ core_memcpy, core_memset, diff --git a/gcc/config/riscv/riscv-vect-permconst.cc b/gcc/config/riscv/riscv-vect-permconst.cc index feecc7e..8e13cf8 100644 --- a/gcc/config/riscv/riscv-vect-permconst.cc +++ b/gcc/config/riscv/riscv-vect-permconst.cc @@ -203,6 +203,24 @@ vector_permconst::process_bb (basic_block bb) if (bias < 0 || bias > 16384 / 8) continue; + /* We need to verify that each element would be a valid value + in the inner mode after applying the bias. */ + machine_mode inner = GET_MODE_INNER (GET_MODE (cvec)); + HOST_WIDE_INT precision = GET_MODE_PRECISION (inner).to_constant (); + int i; + for (i = 0; i < CONST_VECTOR_NUNITS (cvec).to_constant (); i++) + { + HOST_WIDE_INT val = INTVAL (CONST_VECTOR_ELT (cvec, i)) - bias; + if (val != sext_hwi (val, precision)) + break; + } + + /* If the loop terminated early, then we found a case where the + adjusted constant would not fit, so we can't record the constant + for this case (it's unlikely to be useful anyway. */ + if (i != CONST_VECTOR_NUNITS (cvec).to_constant ()) + continue; + /* At this point we have a load of a constant integer vector from the constant pool. That constant integer vector is hopefully a permutation constant. We need to make a copy of the vector and @@ -211,7 +229,7 @@ vector_permconst::process_bb (basic_block bb) XXX This violates structure sharing conventions. */ rtvec_def *nvec = gen_rtvec (CONST_VECTOR_NUNITS (cvec).to_constant ()); - for (int i = 0; i < CONST_VECTOR_NUNITS (cvec).to_constant (); i++) + for (i = 0; i < CONST_VECTOR_NUNITS (cvec).to_constant (); i++) nvec->elem[i] = GEN_INT (INTVAL (CONST_VECTOR_ELT (cvec, i)) - bias); rtx copy = gen_rtx_CONST_VECTOR (GET_MODE (cvec), nvec); diff --git a/gcc/config/riscv/riscv.md b/gcc/config/riscv/riscv.md index 15c89ff..259997f 100644 --- a/gcc/config/riscv/riscv.md +++ b/gcc/config/riscv/riscv.md @@ -3173,15 +3173,25 @@ "#" "&& reload_completed" [(set (match_dup 4) (lshiftrt:X (subreg:X (match_dup 2) 0) (match_dup 6))) - (set (match_dup 4) (and:X (match_dup 4) (match_dup 7))) + (set (match_dup 4) (match_dup 8)) (set (pc) (if_then_else (match_op_dup 1 [(match_dup 4) (const_int 0)]) (label_ref (match_dup 0)) (pc)))] { - HOST_WIDE_INT mask = INTVAL (operands[3]); - int trailing = ctz_hwi (mask); + HOST_WIDE_INT mask = INTVAL (operands[3]); + int trailing = ctz_hwi (mask); + + operands[6] = GEN_INT (trailing); + operands[7] = GEN_INT (mask >> trailing); - operands[6] = GEN_INT (trailing); - operands[7] = GEN_INT (mask >> trailing); + /* This splits after reload, so there's little chance to clean things + up. Rather than emit a ton of RTL here, we can just make a new + operand for that RHS and use it. For the case where the AND would + have been redundant, we can make it a NOP move, which does get + cleaned up. */ + if (operands[7] == CONSTM1_RTX (word_mode)) + operands[8] = operands[4]; + else + operands[8] = gen_rtx_AND (word_mode, operands[4], operands[7]); } [(set_attr "type" "branch")]) diff --git a/gcc/config/riscv/riscv.opt b/gcc/config/riscv/riscv.opt index 7102480..80593ee 100644 --- a/gcc/config/riscv/riscv.opt +++ b/gcc/config/riscv/riscv.opt @@ -274,6 +274,8 @@ Mask(ZA64RS) Var(riscv_za_subext) Mask(ZA128RS) Var(riscv_za_subext) +Mask(ZAMA16B) Var(riscv_za_subext) + TargetVariable int riscv_zb_subext @@ -466,6 +468,10 @@ Mask(XCVBI) Var(riscv_xcv_subext) TargetVariable int riscv_sv_subext +Mask(SVADE) Var(riscv_sv_subext) + +Mask(SVADU) Var(riscv_sv_subext) + Mask(SVINVAL) Var(riscv_sv_subext) Mask(SVNAPOT) Var(riscv_sv_subext) diff --git a/gcc/config/s390/s390-protos.h b/gcc/config/s390/s390-protos.h index e8c7f83..d760a7e 100644 --- a/gcc/config/s390/s390-protos.h +++ b/gcc/config/s390/s390-protos.h @@ -114,6 +114,7 @@ extern bool s390_expand_cmpmem (rtx, rtx, rtx, rtx); extern void s390_expand_vec_strlen (rtx, rtx, rtx); extern void s390_expand_vec_movstr (rtx, rtx, rtx); extern bool s390_expand_addcc (enum rtx_code, rtx, rtx, rtx, rtx, rtx); +extern void s390_expand_cstoreti4 (rtx, rtx, rtx, rtx); extern bool s390_expand_insv (rtx, rtx, rtx, rtx); extern void s390_expand_cs (machine_mode, rtx, rtx, rtx, rtx, rtx, bool); extern void s390_expand_atomic_exchange_tdsi (rtx, rtx, rtx); diff --git a/gcc/config/s390/s390.cc b/gcc/config/s390/s390.cc index e3edf85..2d44cec 100644 --- a/gcc/config/s390/s390.cc +++ b/gcc/config/s390/s390.cc @@ -7210,6 +7210,82 @@ s390_expand_mask_and_shift (rtx val, machine_mode mode, rtx count) NULL_RTX, 1, OPTAB_DIRECT); } +/* Expand optab cstoreti4. */ + +void +s390_expand_cstoreti4 (rtx dst, rtx cmp, rtx op1, rtx op2) +{ + rtx_code code = GET_CODE (cmp); + + if (TARGET_VXE3) + { + rtx cond = s390_emit_compare (GET_MODE (cmp), code, op1, op2); + emit_insn (gen_movsicc (dst, cond, const1_rtx, const0_rtx)); + return; + } + + /* Prior VXE3 emulate the comparison. For an (in)equality test exploit + VECTOR COMPARE EQUAL. For a relational test, first compare the high part + via VECTOR ELEMENT COMPARE (LOGICAL). If the high part does not equal, + then consume the CC immediatelly by a subsequent LOAD ON CONDITION. + Otherweise, if the high part equals, then perform a subsequent VECTOR + COMPARE HIGH LOGICAL followed by a LOAD ON CONDITION. */ + + op1 = force_reg (V2DImode, simplify_gen_subreg (V2DImode, op1, TImode, 0)); + op2 = force_reg (V2DImode, simplify_gen_subreg (V2DImode, op2, TImode, 0)); + + if (code == EQ || code == NE) + { + s390_expand_vec_compare_cc (dst, code, op1, op2, code == EQ); + return; + } + + /* Normalize code into either GE(U) or GT(U). */ + if (code == LT || code == LE || code == LTU || code == LEU) + { + std::swap (op1, op2); + code = swap_condition (code); + } + + /* For (un)signed comparisons + - high(op1) >= high(op2) instruction VECG op1, op2 sets CC1 + if the relation does _not_ hold. + - high(op1) > high(op2) instruction VECG op2, op1 sets CC1 + if the relation holds. */ + if (code == GT || code == GTU) + std::swap (op1, op2); + machine_mode cc_mode = (code == GEU || code == GTU) ? CCUmode : CCSmode; + rtx lane0 = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx)); + emit_insn ( + gen_rtx_SET (gen_rtx_REG (cc_mode, CC_REGNUM), + gen_rtx_COMPARE (cc_mode, + gen_rtx_VEC_SELECT (DImode, op1, lane0), + gen_rtx_VEC_SELECT (DImode, op2, lane0)))); + rtx ccs_reg = gen_rtx_REG (CCSmode, CC_REGNUM); + rtx lab = gen_label_rtx (); + s390_emit_jump (lab, gen_rtx_NE (VOIDmode, ccs_reg, const0_rtx)); + /* At this point we have that high(op1) == high(op2). Thus, test the low + part, now. For unsigned comparisons + - low(op1) >= low(op2) instruction VCHLGS op2, op1 sets CC1 + if the relation does _not_ hold. + - low(op1) > low(op2) instruction VCHLGS op1, op2 sets CC1 + if the relation holds. */ + std::swap (op1, op2); + emit_insn (gen_rtx_PARALLEL ( + VOIDmode, + gen_rtvec (2, + gen_rtx_SET (gen_rtx_REG (CCVIHUmode, CC_REGNUM), + gen_rtx_COMPARE (CCVIHUmode, op1, op2)), + gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (V2DImode))))); + emit_label (lab); + /* For (un)signed comparison >= any CC except CC1 means that the relation + holds. For (un)signed comparison > only CC1 means that the relation + holds. */ + rtx_code cmp_code = (code == GE || code == GEU) ? UNGE : LT; + rtx cond = gen_rtx_fmt_ee (cmp_code, CCSmode, ccs_reg, const0_rtx); + emit_insn (gen_movsicc (dst, cond, const1_rtx, const0_rtx)); +} + /* Generate a vector comparison COND of CMP_OP1 and CMP_OP2 and store the result in TARGET. */ @@ -7310,9 +7386,9 @@ s390_expand_vec_compare (rtx target, enum rtx_code cond, /* Expand the comparison CODE of CMP1 and CMP2 and copy 1 or 0 into TARGET if either all (ALL_P is true) or any (ALL_P is false) of the elements in CMP1 and CMP2 fulfill the comparison. - This function is only used to emit patterns for the vx builtins and - therefore only handles comparison codes required by the - builtins. */ + This function is only used in s390_expand_cstoreti4 and to emit patterns for + the vx builtins and therefore only handles comparison codes required by + those. */ void s390_expand_vec_compare_cc (rtx target, enum rtx_code code, rtx cmp1, rtx cmp2, bool all_p) diff --git a/gcc/config/s390/s390.md b/gcc/config/s390/s390.md index 05b9da6..97a4bdf 100644 --- a/gcc/config/s390/s390.md +++ b/gcc/config/s390/s390.md @@ -993,6 +993,10 @@ (define_mode_attr asm_fcmp [(CCVEQ "e") (CCVFH "h") (CCVFHE "he")]) (define_mode_attr insn_cmp [(CCVEQ "eq") (CCVIH "h") (CCVIHU "hl") (CCVFH "h") (CCVFHE "he")]) +(define_mode_iterator CC_SUZ [CCS CCU CCZ]) +(define_mode_attr l [(CCS "") (CCU "l") (CCZ "")]) +(define_mode_attr cc_tolower [(CCS "ccs") (CCU "ccu") (CCZ "ccz")]) + ; Analogue to TOINTVEC / tointvec (define_mode_attr TOINT [(TF "TI") (DF "DI") (SF "SI")]) (define_mode_attr toint [(TF "ti") (DF "di") (SF "si")]) diff --git a/gcc/config/s390/vector.md b/gcc/config/s390/vector.md index e29255f..160e42a 100644 --- a/gcc/config/s390/vector.md +++ b/gcc/config/s390/vector.md @@ -538,6 +538,14 @@ "vlvg<bhfgq>\t%v0,%1,%Y4(%2)" [(set_attr "op_type" "VRS")]) +(define_expand "cstoreti4" + [(set (match_operand:SI 0 "register_operand") + (match_operator:SI 1 "ordered_comparison_operator" + [(match_operand:TI 2 "register_operand") + (match_operand:TI 3 "register_operand")]))] + "TARGET_VX" + "s390_expand_cstoreti4 (operands[0], operands[1], operands[2], operands[3]); DONE;") + ;; FIXME: Support also vector mode operands for 0 ;; This is used via RTL standard name as well as for expanding the builtin @@ -2209,6 +2217,28 @@ operands[5] = gen_reg_rtx (V2DImode); }) +(define_insn "*vec_cmpv2di_lane0_<cc_tolower>" + [(set (reg:CC_SUZ CC_REGNUM) + (compare:CC_SUZ + (vec_select:DI + (match_operand:V2DI 0 "register_operand" "v") + (parallel [(const_int 0)])) + (vec_select:DI + (match_operand:V2DI 1 "register_operand" "v") + (parallel [(const_int 0)]))))] + "TARGET_VX" + "vec<l>g\t%v0,%v1" + [(set_attr "op_type" "VRR")]) + +(define_insn "*vec_cmpti_<cc_tolower>" + [(set (reg:CC_SUZ CC_REGNUM) + (compare:CC_SUZ + (match_operand:TI 0 "register_operand" "v") + (match_operand:TI 1 "register_operand" "v")))] + "TARGET_VXE3" + "vec<l>q\t%v0,%v1" + [(set_attr "op_type" "VRR")]) + ;; ;; Floating point compares diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi index 90cbb51..b1964b3 100644 --- a/gcc/doc/invoke.texi +++ b/gcc/doc/invoke.texi @@ -31532,6 +31532,14 @@ to @samp{zvks} and @samp{zvkg}. @tab 1.0 @tab Supervisor-mode timer interrupts extension. +@item svade +@tab 1.0 +@tab Cause exception when hardware updating of A/D bits is disabled + +@item svadu +@tab 1.0 +@tab Hardware Updating of A/D Bits extension. + @item svinval @tab 1.0 @tab Fine-grained address-translation cache invalidation extension. diff --git a/gcc/fortran/ChangeLog b/gcc/fortran/ChangeLog index f87c64b..d92b9d6 100644 --- a/gcc/fortran/ChangeLog +++ b/gcc/fortran/ChangeLog @@ -1,3 +1,28 @@ +2025-05-07 Paul Thomas <pault@gcc.gnu.org> + and Steven G. Kargl <kargl@gcc.gnu.org> + + PR fortran/119948 + * primary.cc (match_variable): Module procedures with sym the + same as result can be treated as variables, although marked + external. + +2025-05-06 Jerry DeLisle <jvdelisle@gcc.gnu.org> + + PR fortran/120049 + * check.cc (gfc_check_c_associated): Modify checks to avoid + ICE and allow use, intrinsic :: iso_c_binding from a separate + module file. + +2025-05-06 Thomas Koenig <tkoenig@gcc.gnu.org> + + PR fortran/119928 + * interface.cc (gfc_check_dummy_characteristics): Do not issue + error if one dummy symbol has been generated from an actual + argument and the other one has OPTIONAL, INTENT, ALLOCATABLE, + POINTER, TARGET, VALUE, ASYNCHRONOUS or CONTIGUOUS. + (gfc_get_formal_from_actual_arglist): Do nothing if symbol + is a class. + 2025-05-04 Harald Anlauf <anlauf@gmx.de> PR fortran/119986 diff --git a/gcc/fortran/check.cc b/gcc/fortran/check.cc index 299c216..f02a2a3 100644 --- a/gcc/fortran/check.cc +++ b/gcc/fortran/check.cc @@ -5955,30 +5955,40 @@ gfc_check_c_sizeof (gfc_expr *arg) bool gfc_check_c_associated (gfc_expr *c_ptr_1, gfc_expr *c_ptr_2) { - if (c_ptr_1->ts.type != BT_DERIVED - || c_ptr_1->ts.u.derived->from_intmod != INTMOD_ISO_C_BINDING - || (c_ptr_1->ts.u.derived->intmod_sym_id != ISOCBINDING_PTR - && c_ptr_1->ts.u.derived->intmod_sym_id != ISOCBINDING_FUNPTR)) + if (c_ptr_1) { - gfc_error ("Argument C_PTR_1 at %L to C_ASSOCIATED shall have the " - "type TYPE(C_PTR) or TYPE(C_FUNPTR)", &c_ptr_1->where); - return false; + if (c_ptr_1->expr_type == EXPR_FUNCTION && c_ptr_1->ts.type == BT_VOID) + return true; + + if (c_ptr_1->ts.type != BT_DERIVED + || c_ptr_1->ts.u.derived->from_intmod != INTMOD_ISO_C_BINDING + || (c_ptr_1->ts.u.derived->intmod_sym_id != ISOCBINDING_PTR + && c_ptr_1->ts.u.derived->intmod_sym_id != ISOCBINDING_FUNPTR)) + { + gfc_error ("Argument C_PTR_1 at %L to C_ASSOCIATED shall have the " + "type TYPE(C_PTR) or TYPE(C_FUNPTR)", &c_ptr_1->where); + return false; + } } if (!scalar_check (c_ptr_1, 0)) return false; - if (c_ptr_2 - && (c_ptr_2->ts.type != BT_DERIVED + if (c_ptr_2) + { + if (c_ptr_2->expr_type == EXPR_FUNCTION && c_ptr_2->ts.type == BT_VOID) + return true; + + if (c_ptr_2->ts.type != BT_DERIVED || c_ptr_2->ts.u.derived->from_intmod != INTMOD_ISO_C_BINDING || (c_ptr_1->ts.u.derived->intmod_sym_id - != c_ptr_2->ts.u.derived->intmod_sym_id))) - { - gfc_error ("Argument C_PTR_2 at %L to C_ASSOCIATED shall have the " - "same type as C_PTR_1: %s instead of %s", &c_ptr_1->where, - gfc_typename (&c_ptr_1->ts), - gfc_typename (&c_ptr_2->ts)); - return false; + != c_ptr_2->ts.u.derived->intmod_sym_id)) + { + gfc_error ("Argument C_PTR_2 at %L to C_ASSOCIATED shall have the " + "same type as C_PTR_1: %s instead of %s", &c_ptr_1->where, + gfc_typename (&c_ptr_1->ts), gfc_typename (&c_ptr_2->ts)); + return false; + } } if (c_ptr_2 && !scalar_check (c_ptr_2, 1)) diff --git a/gcc/fortran/primary.cc b/gcc/fortran/primary.cc index 72ecc7c..ec4e135 100644 --- a/gcc/fortran/primary.cc +++ b/gcc/fortran/primary.cc @@ -4396,7 +4396,7 @@ match_variable (gfc_expr **result, int equiv_flag, int host_flag) case FL_PROCEDURE: /* Check for a nonrecursive function result variable. */ if (sym->attr.function - && !sym->attr.external + && (!sym->attr.external || sym->abr_modproc_decl) && sym->result == sym && (gfc_is_function_return_value (sym, gfc_current_ns) || (sym->attr.entry diff --git a/gcc/gimple-fold.cc b/gcc/gimple-fold.cc index 5884b79..7721795 100644 --- a/gcc/gimple-fold.cc +++ b/gcc/gimple-fold.cc @@ -10573,7 +10573,7 @@ gimple_fold_indirect_ref (tree t) integer types involves undefined behavior on overflow and the operation can be expressed with unsigned arithmetic. */ -bool +static bool arith_code_with_undefined_signed_overflow (tree_code code) { switch (code) @@ -10590,6 +10590,30 @@ arith_code_with_undefined_signed_overflow (tree_code code) } } +/* Return true if STMT has an operation that operates on a signed + integer types involves undefined behavior on overflow and the + operation can be expressed with unsigned arithmetic. */ + +bool +gimple_with_undefined_signed_overflow (gimple *stmt) +{ + if (!is_gimple_assign (stmt)) + return false; + tree lhs = gimple_assign_lhs (stmt); + if (!lhs) + return false; + tree lhs_type = TREE_TYPE (lhs); + if (!INTEGRAL_TYPE_P (lhs_type) + && !POINTER_TYPE_P (lhs_type)) + return false; + if (!TYPE_OVERFLOW_UNDEFINED (lhs_type)) + return false; + if (!arith_code_with_undefined_signed_overflow + (gimple_assign_rhs_code (stmt))) + return false; + return true; +} + /* Rewrite STMT, an assignment with a signed integer or pointer arithmetic operation that can be transformed to unsigned arithmetic by converting its operand, carrying out the operation in the corresponding unsigned diff --git a/gcc/gimple-fold.h b/gcc/gimple-fold.h index 2790d0f..5fcfdcd 100644 --- a/gcc/gimple-fold.h +++ b/gcc/gimple-fold.h @@ -59,7 +59,7 @@ extern tree gimple_get_virt_method_for_vtable (HOST_WIDE_INT, tree, extern tree gimple_fold_indirect_ref (tree); extern bool gimple_fold_builtin_sprintf (gimple_stmt_iterator *); extern bool gimple_fold_builtin_snprintf (gimple_stmt_iterator *); -extern bool arith_code_with_undefined_signed_overflow (tree_code); +extern bool gimple_with_undefined_signed_overflow (gimple *); extern void rewrite_to_defined_overflow (gimple_stmt_iterator *); extern gimple_seq rewrite_to_defined_overflow (gimple *); extern void replace_call_with_value (gimple_stmt_iterator *, tree); diff --git a/gcc/simplify-rtx.cc b/gcc/simplify-rtx.cc index 7bcbe11..b34fd2f 100644 --- a/gcc/simplify-rtx.cc +++ b/gcc/simplify-rtx.cc @@ -7387,6 +7387,13 @@ simplify_context::simplify_ternary_operation (rtx_code code, machine_mode mode, return gen_rtx_CONST_VECTOR (mode, v); } + if (swap_commutative_operands_p (op0, op1) + /* Two operands have same precedence, then first bit of mask + select first operand. */ + || (!swap_commutative_operands_p (op1, op0) && !(sel & 1))) + return simplify_gen_ternary (code, mode, mode, op1, op0, + GEN_INT (~sel & mask)); + /* Replace (vec_merge (vec_merge a b m) c n) with (vec_merge b c n) if no element from a appears in the result. */ if (GET_CODE (op0) == VEC_MERGE) diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 4551e2d..73e8f7c 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,218 @@ +2025-05-07 Jeff Law <jlaw@ventanamicro.com> + + PR target/120137 + PR target/120154 + * gcc.target/riscv/pr120137.c: New test. + * gcc.target/riscv/pr120154.c: New test. + +2025-05-07 Dongyan Chen <chendongyan@isrc.iscas.ac.cn> + + * gcc.target/riscv/arch-48.c: New test. + +2025-05-07 Richard Earnshaw <rearnsha@arm.com> + + PR target/110796 + PR target/118446 + * gcc.target/arm/armv8_2-fp16-arith-1.c: Adjust due to no-longer + emitting VCMPE when -ffast-math.. + +2025-05-07 Jakub Jelinek <jakub@redhat.com> + + PR preprocessor/108900 + PR preprocessor/116047 + PR preprocessor/120061 + * gcc.dg/plugin/plugin.exp: Add location-overflow-test-pr116047.c + and location-overflow-test-pr120061.c. + * gcc.dg/plugin/location_overflow_plugin.cc (plugin_init): Don't error + on unknown values, instead just break. Handle 0x4fHHHHHH arguments + differently. + * gcc.dg/plugin/location-overflow-test-pr116047.c: New test. + * gcc.dg/plugin/location-overflow-test-pr116047-1.h: New test. + * gcc.dg/plugin/location-overflow-test-pr116047-2.h: New test. + * gcc.dg/plugin/location-overflow-test-pr120061.c: New test. + * gcc.dg/plugin/location-overflow-test-pr120061-1.h: New test. + * gcc.dg/plugin/location-overflow-test-pr120061-2.h: New test. + +2025-05-07 Jan Hubicka <hubicka@ucw.cz> + + * gcc.target/i386/pr119919.c: Add -mtune=znver1 + +2025-05-07 Jennifer Schmitz <jschmitz@nvidia.com> + + PR target/117978 + * gcc.target/aarch64/sve/acle/general/whilelt_5.c: Adjust expected + outcome. + * gcc.target/aarch64/sve/ldst_ptrue_pat_128_to_neon.c: New test. + * gcc.target/aarch64/sve/while_7.c: Adjust expected outcome. + * gcc.target/aarch64/sve/while_9.c: Adjust expected outcome. + +2025-05-07 Stefan Schulze Frielinghaus <stefansf@gcc.gnu.org> + + * gcc.target/s390/vector/cstoreti-1.c: New test. + * gcc.target/s390/vector/cstoreti-2.c: New test. + +2025-05-07 H.J. Lu <hjl.tools@gmail.com> + + PR target/120036 + * g++.target/i386/pr120036.C: New test. + * gcc.target/i386/pr117839-3a.c: Likewise. + * gcc.target/i386/pr117839-3b.c: Likewise. + +2025-05-07 Paul Thomas <pault@gcc.gnu.org> + and Steven G. Kargl <kargl@gcc.gnu.org> + + PR fortran/119948 + * gfortran.dg/pr119948.f90: Update to incorporate failing test, + where module procedure is the result. Test submodule cases. + +2025-05-07 Jeff Law <jlaw@ventanamicro.com> + + * g++.target/riscv/redundant-andi.C: New test. + +2025-05-06 Dongyan Chen <chendongyan@isrc.iscas.ac.cn> + + * gcc.target/riscv/arch-47.c: New test. + +2025-05-06 Mingzhu Yan <yanmingzhu@iscas.ac.cn> + + * gcc.target/riscv/arch-45.c: New test. + * gcc.target/riscv/arch-46.c: New test. + +2025-05-06 Jerry DeLisle <jvdelisle@gcc.gnu.org> + + PR fortran/120049 + * gfortran.dg/pr120049_a.f90: New test. + * gfortran.dg/pr120049_b.f90: New test. + +2025-05-06 Thomas Koenig <tkoenig@gcc.gnu.org> + + PR fortran/119928 + * gfortran.dg/interface_60.f90: New test. + +2025-05-06 Martin Jambor <mjambor@suse.cz> + + PR ipa/119852 + * gcc.dg/ipa/pr119852.c: New test. + +2025-05-06 David Malcolm <dmalcolm@redhat.com> + + PR sarif-replay/117988 + * sarif-replay.dg/2.1.0-invalid/3.1-not-an-object.sarif: Add + expected logical location. + * sarif-replay.dg/2.1.0-invalid/3.11.11-missing-arguments-for-placeholders.sarif: + Likewise. + * sarif-replay.dg/2.1.0-invalid/3.11.11-not-enough-arguments-for-placeholders.sarif: + Likewise. + * sarif-replay.dg/2.1.0-invalid/3.11.5-unescaped-braces.sarif: Likewise. + * sarif-replay.dg/2.1.0-invalid/3.13.2-no-version.sarif: Likewise. + * sarif-replay.dg/2.1.0-invalid/3.13.2-version-not-a-string.sarif: Likewise. + * sarif-replay.dg/2.1.0-invalid/3.13.4-bad-runs.sarif: Likewise. + * sarif-replay.dg/2.1.0-invalid/3.13.4-no-runs.sarif: Likewise. + * sarif-replay.dg/2.1.0-invalid/3.13.4-non-object-in-runs.sarif: Likewise. + * sarif-replay.dg/2.1.0-invalid/3.27.10-bad-level.sarif: Likewise. + * sarif-replay.dg/2.1.0-invalid/3.33.3-index-out-of-range.sarif: Likewise. + * sarif-replay.dg/2.1.0-unhandled/3.27.10-none-level.sarif: Likewise. + +2025-05-06 David Malcolm <dmalcolm@redhat.com> + + * libgdiagnostics.dg/test-nested-logical-locations-json-c.py: New test. + * libgdiagnostics.dg/test-nested-logical-locations-json.c: New test. + * sarif-replay.dg/2.1.0-valid/3.33.7-json-example.sarif: New test. + * sarif-replay.dg/2.1.0-valid/3.33.7-xml-example.sarif: New test. + +2025-05-06 David Malcolm <dmalcolm@redhat.com> + + PR other/116176 + * g++.dg/sarif-output/logical-locations-1.C: New test. + * g++.dg/sarif-output/logical-locations-1.py: New test script. + * g++.dg/sarif-output/logical-locations-2.C: New test. + * g++.dg/sarif-output/logical-locations-2.py: New test script. + * g++.dg/sarif-output/logical-locations-3.C: New test. + * g++.dg/sarif-output/logical-locations-3.py: New test script. + * g++.dg/sarif-output/sarif-output.exp: New script, adapted + from gcc.dg/sarif-output/sarif-output.exp. + * libgdiagnostics.dg/test-logical-location-c.py: Update for using + theRun.logicalLocations. + * libgdiagnostics.dg/test-warning-with-path-c.py: Likewise. + +2025-05-06 David Malcolm <dmalcolm@redhat.com> + + * gcc.dg/plugin/diagnostic_plugin_test_paths.cc: Update for + changes to simple_diagnostic_path. + +2025-05-06 David Malcolm <dmalcolm@redhat.com> + + * libgdiagnostics.dg/test-logical-location.c: Include + <string.h>. + (main): Verify that the accessors work. + * libgdiagnostics.dg/test-logical-location.cc: New test. + +2025-05-06 Shreya Munnangi <smunnangi1@ventanamicro.com> + + PR middle-end/114512 + * gcc.target/riscv/pr114512.c: New test. + +2025-05-06 Pan Li <pan2.li@intel.com> + + * gcc.target/riscv/rvv/autovec/vx_vf/vx_vadd-3-i16.c: New test. + * gcc.target/riscv/rvv/autovec/vx_vf/vx_vadd-3-i32.c: New test. + * gcc.target/riscv/rvv/autovec/vx_vf/vx_vadd-3-i64.c: New test. + * gcc.target/riscv/rvv/autovec/vx_vf/vx_vadd-3-i8.c: New test. + * gcc.target/riscv/rvv/autovec/vx_vf/vx_vadd-3-u16.c: New test. + * gcc.target/riscv/rvv/autovec/vx_vf/vx_vadd-3-u32.c: New test. + * gcc.target/riscv/rvv/autovec/vx_vf/vx_vadd-3-u64.c: New test. + * gcc.target/riscv/rvv/autovec/vx_vf/vx_vadd-3-u8.c: New test. + +2025-05-06 Pan Li <pan2.li@intel.com> + + * gcc.target/riscv/rvv/autovec/vx_vf/vx_vadd-2-i16.c: New test. + * gcc.target/riscv/rvv/autovec/vx_vf/vx_vadd-2-i32.c: New test. + * gcc.target/riscv/rvv/autovec/vx_vf/vx_vadd-2-i64.c: New test. + * gcc.target/riscv/rvv/autovec/vx_vf/vx_vadd-2-i8.c: New test. + * gcc.target/riscv/rvv/autovec/vx_vf/vx_vadd-2-u16.c: New test. + * gcc.target/riscv/rvv/autovec/vx_vf/vx_vadd-2-u32.c: New test. + * gcc.target/riscv/rvv/autovec/vx_vf/vx_vadd-2-u64.c: New test. + * gcc.target/riscv/rvv/autovec/vx_vf/vx_vadd-2-u8.c: New test. + +2025-05-06 Pan Li <pan2.li@intel.com> + + * gcc.target/riscv/rvv/rvv.exp: Add new folder vx_vf for all + vec_dup + vv to vx testcases. + * gcc.target/riscv/rvv/autovec/vx_vf/vx_binary.h: New test. + * gcc.target/riscv/rvv/autovec/vx_vf/vx_binary_data.h: New test. + * gcc.target/riscv/rvv/autovec/vx_vf/vx_binary_run.h: New test. + * gcc.target/riscv/rvv/autovec/vx_vf/vx_vadd-1-i16.c: New test. + * gcc.target/riscv/rvv/autovec/vx_vf/vx_vadd-1-i32.c: New test. + * gcc.target/riscv/rvv/autovec/vx_vf/vx_vadd-1-i64.c: New test. + * gcc.target/riscv/rvv/autovec/vx_vf/vx_vadd-1-i8.c: New test. + * gcc.target/riscv/rvv/autovec/vx_vf/vx_vadd-1-u16.c: New test. + * gcc.target/riscv/rvv/autovec/vx_vf/vx_vadd-1-u32.c: New test. + * gcc.target/riscv/rvv/autovec/vx_vf/vx_vadd-1-u64.c: New test. + * gcc.target/riscv/rvv/autovec/vx_vf/vx_vadd-1-u8.c: New test. + * gcc.target/riscv/rvv/autovec/vx_vf/vx_vadd-run-1-i16.c: New test. + * gcc.target/riscv/rvv/autovec/vx_vf/vx_vadd-run-1-i32.c: New test. + * gcc.target/riscv/rvv/autovec/vx_vf/vx_vadd-run-1-i64.c: New test. + * gcc.target/riscv/rvv/autovec/vx_vf/vx_vadd-run-1-i8.c: New test. + * gcc.target/riscv/rvv/autovec/vx_vf/vx_vadd-run-1-u16.c: New test. + * gcc.target/riscv/rvv/autovec/vx_vf/vx_vadd-run-1-u32.c: New test. + * gcc.target/riscv/rvv/autovec/vx_vf/vx_vadd-run-1-u64.c: New test. + * gcc.target/riscv/rvv/autovec/vx_vf/vx_vadd-run-1-u8.c: New test. + +2025-05-06 Richard Biener <rguenther@suse.de> + + PR tree-optimization/1157777 + * gcc.dg/vect/bb-slp-pr115777.c: New testcase. + +2025-05-06 Jakub Jelinek <jakub@redhat.com> + + PR tree-optimization/120074 + * gcc.dg/pr120074.c: New test. + +2025-05-06 Richard Biener <rguenther@suse.de> + + PR tree-optimization/120031 + * gcc.target/i386/pr120031.c: New testcase. + 2025-05-05 Jeff Law <jlaw@ventanamicro.com> PR target/119971 diff --git a/gcc/testsuite/g++.target/i386/pr120036.C b/gcc/testsuite/g++.target/i386/pr120036.C new file mode 100644 index 0000000..a2fc24f --- /dev/null +++ b/gcc/testsuite/g++.target/i386/pr120036.C @@ -0,0 +1,113 @@ +/* { dg-do compile { target fpic } } */ +/* { dg-options "-O2 -std=c++11 -march=sapphirerapids -fPIC" } */ + +typedef _Float16 Native; +struct float16_t +{ + Native native; + float16_t (); + float16_t (Native arg) : native (arg) {} + operator Native (); + float16_t + operator+ (float16_t rhs) + { + return native + rhs.native; + } + float16_t + operator* (float16_t) + { + return native * native; + } +}; +template <int N> struct Simd +{ + static constexpr int kPrivateLanes = N; +}; +template <int N> struct ClampNAndPow2 +{ + using type = Simd<N>; +}; +template <int kLimit> struct CappedTagChecker +{ + static constexpr int N = sizeof (int) ? kLimit : 0; + using type = typename ClampNAndPow2<N>::type; +}; +template <typename, int kLimit, int> +using CappedTag = typename CappedTagChecker<kLimit>::type; +template <class D> +int +Lanes (D) +{ + return D::kPrivateLanes; +} +template <class D> int Zero (D); +template <class D> using VFromD = decltype (Zero (D ())); +struct Vec512 +{ + __attribute__ ((__vector_size__ (16))) _Float16 raw; +}; +Vec512 Zero (Simd<2>); +template <class D> void ReduceSum (D, VFromD<D>); +struct Dot +{ + template <int, class D, typename T> + static T + Compute (D d, T *pa, int num_elements) + { + T *pb; + int N = Lanes (d), i = 0; + if (__builtin_expect (num_elements < N, 0)) + { + T sum0 = 0, sum1 = 0; + for (; i + 2 <= num_elements; i += 2) + { + float16_t __trans_tmp_6 = pa[i] * pb[i], + __trans_tmp_5 = sum0 + __trans_tmp_6, + __trans_tmp_8 = pa[i + 1] * pb[1], + __trans_tmp_7 = sum1 + __trans_tmp_8; + sum0 = __trans_tmp_5; + sum1 = __trans_tmp_7; + } + float16_t __trans_tmp_9 = sum0 + sum1; + return __trans_tmp_9; + } + decltype (Zero (d)) sum0; + ReduceSum (d, sum0); + __builtin_trap (); + } +}; +template <int kMul, class Test, int kPow2> struct ForeachCappedR +{ + static void + Do (int min_lanes, int max_lanes) + { + CappedTag<int, kMul, kPow2> d; + Test () (int (), d); + ForeachCappedR<kMul / 2, Test, kPow2>::Do (min_lanes, max_lanes); + } +}; +template <class Test, int kPow2> struct ForeachCappedR<0, Test, kPow2> +{ + static void Do (int, int); +}; +struct TestDot +{ + template <class T, class D> + void + operator() (T, D d) + { + int counts[]{ 1, 3 }; + for (int num : counts) + { + float16_t a; + T __trans_tmp_4 = Dot::Compute<0> (d, &a, num); + } + } +}; +int DotTest_TestAllDot_TestTestBody_max_lanes; +void +DotTest_TestAllDot_TestTestBody () +{ + ForeachCappedR<64, TestDot, 0>::Do ( + 1, DotTest_TestAllDot_TestTestBody_max_lanes); +} diff --git a/gcc/testsuite/g++.target/riscv/redundant-andi.C b/gcc/testsuite/g++.target/riscv/redundant-andi.C new file mode 100644 index 0000000..fe560a7 --- /dev/null +++ b/gcc/testsuite/g++.target/riscv/redundant-andi.C @@ -0,0 +1,26 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -march=rv64gcb -mabi=lp64" { target rv64 } } */ +/* { dg-options "-O2 -march=rv32gcb -mabi=ilp32" { target rv32 } } */ + + +typedef int move_s; +struct state_t +{ + int npieces[13]; +}; +typedef struct state_t state_t; +int +search (state_t *s, int alpha, int beta, int depth, int is_null, int cutnode, + int extend, int wpcs, int bpcs, move_s moves[240]) +{ + int i; + if ((((moves[i]) >> 19) & 0x0F) != 13 + && (((moves[i]) >> 19) & 0x0F) != 1 && (((moves[i]) >> 19) & 0x0F) != 2) + if ((wpcs + bpcs) == 1) + extend += 4; + return extend; +} + +/* A splitter was generating an unnecessary andi instruction. Verify it's + not in our output. */ +/* { dg-final { scan-assembler-not "andi\t\[a-z\]\[0-9\],\[a-z\]\[0-9\],-1" } } */ diff --git a/gcc/testsuite/gcc.dg/plugin/location-overflow-test-pr116047-1.h b/gcc/testsuite/gcc.dg/plugin/location-overflow-test-pr116047-1.h new file mode 100644 index 0000000..3dd6434 --- /dev/null +++ b/gcc/testsuite/gcc.dg/plugin/location-overflow-test-pr116047-1.h @@ -0,0 +1,6 @@ + + + + +#include "location-overflow-test-pr116047-2.h" +static_assert (__LINE__ == 6, ""); diff --git a/gcc/testsuite/gcc.dg/plugin/location-overflow-test-pr116047-2.h b/gcc/testsuite/gcc.dg/plugin/location-overflow-test-pr116047-2.h new file mode 100644 index 0000000..048f715 --- /dev/null +++ b/gcc/testsuite/gcc.dg/plugin/location-overflow-test-pr116047-2.h @@ -0,0 +1 @@ +int i; diff --git a/gcc/testsuite/gcc.dg/plugin/location-overflow-test-pr116047.c b/gcc/testsuite/gcc.dg/plugin/location-overflow-test-pr116047.c new file mode 100644 index 0000000..75161fa --- /dev/null +++ b/gcc/testsuite/gcc.dg/plugin/location-overflow-test-pr116047.c @@ -0,0 +1,5 @@ +/* PR preprocessor/116047 */ +/* { dg-do preprocess } */ +/* { dg-options "-nostdinc -std=c23 -fplugin-arg-location_overflow_plugin-value=0x4ffe0180" } */ +#include "location-overflow-test-pr116047-1.h" +/* { dg-final { scan-file location-overflow-test-pr116047.i "static_assert\[^\n\r]\*6\[^\n\r]\*== 6" } } */ diff --git a/gcc/testsuite/gcc.dg/plugin/location-overflow-test-pr120061-1.h b/gcc/testsuite/gcc.dg/plugin/location-overflow-test-pr120061-1.h new file mode 100644 index 0000000..ebf7704 --- /dev/null +++ b/gcc/testsuite/gcc.dg/plugin/location-overflow-test-pr120061-1.h @@ -0,0 +1,6 @@ + + + + +#include "location-overflow-test-pr120061-2.h" + diff --git a/gcc/testsuite/gcc.dg/plugin/location-overflow-test-pr120061-2.h b/gcc/testsuite/gcc.dg/plugin/location-overflow-test-pr120061-2.h new file mode 100644 index 0000000..048f715 --- /dev/null +++ b/gcc/testsuite/gcc.dg/plugin/location-overflow-test-pr120061-2.h @@ -0,0 +1 @@ +int i; diff --git a/gcc/testsuite/gcc.dg/plugin/location-overflow-test-pr120061.c b/gcc/testsuite/gcc.dg/plugin/location-overflow-test-pr120061.c new file mode 100644 index 0000000..e8e8038 --- /dev/null +++ b/gcc/testsuite/gcc.dg/plugin/location-overflow-test-pr120061.c @@ -0,0 +1,6 @@ +/* PR preprocessor/120061 */ +/* { dg-do preprocess } */ +/* { dg-options "-nostdinc -std=c23 -fplugin-arg-location_overflow_plugin-value=0x61000000" } */ +#include "location-overflow-test-pr120061-1.h" +static_assert (__LINE__ == 5, ""); +/* { dg-final { scan-file location-overflow-test-pr120061.i "static_assert\[^\n\r]\*5\[^\n\r]\*== 5" } } */ diff --git a/gcc/testsuite/gcc.dg/plugin/location_overflow_plugin.cc b/gcc/testsuite/gcc.dg/plugin/location_overflow_plugin.cc index f731b14..f770d35 100644 --- a/gcc/testsuite/gcc.dg/plugin/location_overflow_plugin.cc +++ b/gcc/testsuite/gcc.dg/plugin/location_overflow_plugin.cc @@ -85,9 +85,18 @@ plugin_init (struct plugin_name_args *plugin_info, error_at (UNKNOWN_LOCATION, "missing plugin argument"); /* With 64-bit locations, the thresholds are larger, so shift the base - location argument accordingly. */ + location argument accordingly, basically remap the GCC 14 32-bit + location_t argument values to 64-bit location_t counterparts. There + is one exception for values slightly before the 32-bit location_t + LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES (0x50000000). In that case + remap them to the same amount before the 64-bit location_t + LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES - + ((location_t) 0x50000000) << 31. */ gcc_assert (sizeof (location_t) == sizeof (uint64_t)); - base_location = 1 + ((base_location - 1) << 31); + if (base_location >= 0x4f000000 && base_location <= 0x4fffffff) + base_location += (((location_t) 0x50000000) << 31) - 0x50000000; + else + base_location = 1 + ((base_location - 1) << 31); register_callback (plugin_info->base_name, PLUGIN_PRAGMAS, @@ -107,7 +116,7 @@ plugin_init (struct plugin_name_args *plugin_info, break; default: - error_at (UNKNOWN_LOCATION, "unrecognized value for plugin argument"); + break; } return 0; diff --git a/gcc/testsuite/gcc.dg/plugin/plugin.exp b/gcc/testsuite/gcc.dg/plugin/plugin.exp index 90c9162..96e76d2 100644 --- a/gcc/testsuite/gcc.dg/plugin/plugin.exp +++ b/gcc/testsuite/gcc.dg/plugin/plugin.exp @@ -138,7 +138,9 @@ set plugin_test_list [list \ { location_overflow_plugin.cc \ location-overflow-test-1.c \ location-overflow-test-2.c \ - location-overflow-test-pr83173.c } \ + location-overflow-test-pr83173.c \ + location-overflow-test-pr116047.c \ + location-overflow-test-pr120061.c } \ { must_tail_call_plugin.cc \ must-tail-call-1.c \ must-tail-call-2.c } \ diff --git a/gcc/testsuite/gcc.dg/vect/vect-early-break_134-pr120089.c b/gcc/testsuite/gcc.dg/vect/vect-early-break_134-pr120089.c new file mode 100644 index 0000000..4d8199c --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-early-break_134-pr120089.c @@ -0,0 +1,66 @@ +/* { dg-add-options vect_early_break } */ +/* { dg-additional-options "-funswitch-loops" } */ + +#include "tree-vect.h" + +typedef int type; +typedef type Vec2[2]; + +struct BytesVec { + type d[100]; +}; + +__attribute__((noipa)) struct BytesVec +buildVertexBufferData(const Vec2 *origVertices, bool needsZW, + unsigned paddingSize, unsigned long t) { + const unsigned vertexCount = t; + struct BytesVec data = (struct BytesVec){.d = {0}}; + type *nextVertexPtr = data.d; + + for (unsigned vertexIdx = 0u; vertexIdx < vertexCount; ++vertexIdx) { + + if (vertexIdx > t) + __builtin_trap(); + __builtin_memcpy(nextVertexPtr, &origVertices[vertexIdx], + 2 * sizeof(type)); + nextVertexPtr += 2; + + if (needsZW) { + nextVertexPtr += 2; + } + + nextVertexPtr += paddingSize; + } + + return data; +} +Vec2 origVertices[] = { + {0, 1}, {2, 3}, {4, 5}, {6, 7}, + {8, 9}, {10, 11}, {12, 13}, {14, 15}, + {16, 17}, {18, 19}, {20, 21}, {22, 23}, + {24, 25}, {26, 27}, {27, 28}, {29, 30}, +}; + +int main() +{ + check_vect (); + struct BytesVec vec + = buildVertexBufferData(origVertices, false, 0, + sizeof(origVertices) / sizeof(origVertices[0])); + + int errors = 0; + for (unsigned i = 0; i < 100; i++) { + if (i / 2 < sizeof(origVertices) / sizeof(origVertices[0])) { + int ii = i; + int e = origVertices[ii / 2][ii % 2]; + if (vec.d[i] != e) + errors++; + } else { + if (vec.d[i] != 0) + errors++; + } + } + if (errors) + __builtin_abort(); + return 0; +} diff --git a/gcc/testsuite/gcc.dg/vect/vect-early-break_135-pr120143.c b/gcc/testsuite/gcc.dg/vect/vect-early-break_135-pr120143.c new file mode 100644 index 0000000..1ee30a8 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-early-break_135-pr120143.c @@ -0,0 +1,18 @@ +/* { dg-do compile } */ +/* { dg-add-options vect_early_break } */ +/* { dg-additional-options "-O3 -fwhole-program" } */ + +short a; +extern _Bool b[][23]; +short g = 6; +int v[4]; +int x[3]; +void c(short g, int v[], int x[]) { + for (;;) + for (unsigned y = 0; y < 023; y++) { + b[y][y] = v[y]; + for (_Bool aa = 0; aa < (_Bool)g; aa = x[y]) + a = a > 0; + } +} +int main() { c(g, v, x); } diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/whilelt_5.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/whilelt_5.c index f06a74a..05e266a 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/whilelt_5.c +++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/whilelt_5.c @@ -11,8 +11,7 @@ extern "C" { /* ** load_vl1: -** ptrue (p[0-7])\.[bhsd], vl1 -** ld1h z0\.h, \1/z, \[x0\] +** ldr h0, \[x0\] ** ret */ svint16_t @@ -22,7 +21,12 @@ load_vl1 (int16_t *ptr) } /* -** load_vl2: +** load_vl2: { target aarch64_little_endian } +** ldr s0, \[x0\] +** ret +*/ +/* +** load_vl2: { target aarch64_big_endian } ** ptrue (p[0-7])\.h, vl2 ** ld1h z0\.h, \1/z, \[x0\] ** ret @@ -46,7 +50,12 @@ load_vl3 (int16_t *ptr) } /* -** load_vl4: +** load_vl4: { target aarch64_little_endian } +** ldr d0, \[x0\] +** ret +*/ +/* +** load_vl4: { target aarch64_big_endian } ** ptrue (p[0-7])\.h, vl4 ** ld1h z0\.h, \1/z, \[x0\] ** ret @@ -94,7 +103,12 @@ load_vl7 (int16_t *ptr) } /* -** load_vl8: +** load_vl8: { target aarch64_little_endian } +** ldr q0, \[x0\] +** ret +*/ +/* +** load_vl8: { target aarch64_big_endian } ** ptrue (p[0-7])\.h, vl8 ** ld1h z0\.h, \1/z, \[x0\] ** ret diff --git a/gcc/testsuite/gcc.target/aarch64/sve/ldst_ptrue_pat_128_to_neon.c b/gcc/testsuite/gcc.target/aarch64/sve/ldst_ptrue_pat_128_to_neon.c new file mode 100644 index 0000000..2d47c1f --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/ldst_ptrue_pat_128_to_neon.c @@ -0,0 +1,81 @@ +/* { dg-do compile } */ +/* { dg-options "-O2" } */ +/* { dg-require-effective-target aarch64_little_endian } */ + +#include <arm_sve.h> + +#define TEST(TYPE, TY, W, B) \ + sv##TYPE \ + ld1_##TY##W##B##_1 (TYPE *x) \ + { \ + svbool_t pg = svwhilelt_b##B (0, W); \ + return svld1_##TY##B (pg, x); \ + } \ + sv##TYPE \ + ld1_##TY##W##B##_2 (TYPE *x) \ + { \ + svbool_t pg = svptrue_pat_b##B ((enum svpattern) (W > 8 ? 9 : W)); \ + return svld1_##TY##B (pg, x); \ + } \ + void \ + st1_##TY##W##B##_1 (TYPE *x, sv##TYPE data) \ + { \ + svbool_t pg = svwhilelt_b##B (0, W); \ + return svst1_##TY##B (pg, x, data); \ + } \ + void \ + st1_##TY##W##B##_2 (TYPE *x, sv##TYPE data) \ + { \ + svbool_t pg = svptrue_pat_b##B ((enum svpattern) (W > 8 ? 9 : W)); \ + return svst1_##TY##B (pg, x, data); \ + } \ + +#define TEST64(TYPE, TY, B) \ + TEST (TYPE, TY, 1, B) \ + TEST (TYPE, TY, 2, B) \ + +#define TEST32(TYPE, TY, B) \ + TEST64 (TYPE, TY, B) \ + TEST (TYPE, TY, 4, B) \ + +#define TEST16(TYPE, TY, B) \ + TEST32 (TYPE, TY, B) \ + TEST (TYPE, TY, 8, B) \ + +#define TEST8(TYPE, TY, B) \ + TEST16 (TYPE, TY, B) \ + TEST (TYPE, TY, 16, B) + +#define T(TYPE, TY, B) \ + TEST##B (TYPE, TY, B) + +T (bfloat16_t, bf, 16) +T (float16_t, f, 16) +T (float32_t, f, 32) +T (float64_t, f, 64) +T (int8_t, s, 8) +T (int16_t, s, 16) +T (int32_t, s, 32) +T (int64_t, s, 64) +T (uint8_t, u, 8) +T (uint16_t, u, 16) +T (uint32_t, u, 32) +T (uint64_t, u, 64) + +/* { dg-final { scan-assembler-times {\tldr\tq0, \[x0\]} 24 } } */ +/* { dg-final { scan-assembler-times {\tldr\td0, \[x0\]} 24 } } */ +/* { dg-final { scan-assembler-times {\tldr\ts0, \[x0\]} 18 } } */ +/* { dg-final { scan-assembler-times {\tldr\th0, \[x0\]} 12 } } */ +/* { dg-final { scan-assembler-times {\tldr\tb0, \[x0\]} 4 } } */ + +/* { dg-final { scan-assembler-times {\tstr\tq0, \[x0\]} 24 } } */ +/* { dg-final { scan-assembler-times {\tstr\td0, \[x0\]} 24 } } */ +/* { dg-final { scan-assembler-times {\tstr\ts0, \[x0\]} 18 } } */ +/* { dg-final { scan-assembler-times {\tstr\th0, \[x0\]} 12 } } */ +/* { dg-final { scan-assembler-times {\tstr\tb0, \[x0\]} 4 } } */ + +svint8_t foo (int8_t *x) +{ + return svld1_s8 (svptrue_b16 (), x); +} +/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.h, all\n\tld1b} 1 } } */
\ No newline at end of file diff --git a/gcc/testsuite/gcc.target/aarch64/sve/while_7.c b/gcc/testsuite/gcc.target/aarch64/sve/while_7.c index a66a20d..ab2fa36 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve/while_7.c +++ b/gcc/testsuite/gcc.target/aarch64/sve/while_7.c @@ -19,7 +19,7 @@ TEST_ALL (ADD_LOOP) -/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.b, vl8\n} 1 } } */ -/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.h, vl8\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tldr\td[0-9]+, \[x0\]} 1 } } */ +/* { dg-final { scan-assembler-times {\tldr\tq[0-9]+, \[x0\]} 1 } } */ /* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.s,} 2 } } */ /* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.d,} 2 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/while_9.c b/gcc/testsuite/gcc.target/aarch64/sve/while_9.c index dd3f404..99940dd 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve/while_9.c +++ b/gcc/testsuite/gcc.target/aarch64/sve/while_9.c @@ -19,7 +19,7 @@ TEST_ALL (ADD_LOOP) -/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.b, vl16\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tldr\tq[0-9]+\, \[x0\]} 1 } } */ /* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.h,} 2 } } */ /* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.s,} 2 } } */ /* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.d,} 2 } } */ diff --git a/gcc/testsuite/gcc.target/arm/armv8_2-fp16-arith-1.c b/gcc/testsuite/gcc.target/arm/armv8_2-fp16-arith-1.c index 52b8737..f3fea52 100644 --- a/gcc/testsuite/gcc.target/arm/armv8_2-fp16-arith-1.c +++ b/gcc/testsuite/gcc.target/arm/armv8_2-fp16-arith-1.c @@ -106,8 +106,7 @@ TEST_CMP (greaterthanqual, >=, int16x8_t, float16x8_t) /* { dg-final { scan-assembler-times {vdiv\.f16\ts[0-9]+, s[0-9]+, s[0-9]+} 13 } } */ /* For float16_t. */ -/* { dg-final { scan-assembler-times {vcmp\.f32\ts[0-9]+, s[0-9]+} 2 } } */ -/* { dg-final { scan-assembler-times {vcmpe\.f32\ts[0-9]+, s[0-9]+} 4 } } */ +/* { dg-final { scan-assembler-times {vcmp\.f32\ts[0-9]+, s[0-9]+} 6 } } */ /* For float16x4_t. */ /* { dg-final { scan-assembler-times {vceq\.f16\td[0-9]+, d[0-9]+} 2 } } */ diff --git a/gcc/testsuite/gcc.target/i386/pr117839-3a.c b/gcc/testsuite/gcc.target/i386/pr117839-3a.c new file mode 100644 index 0000000..81afa9d --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr117839-3a.c @@ -0,0 +1,22 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -mno-avx -msse2 -mtune=generic" } */ +/* { dg-final { scan-assembler-times "xor\[a-z\]*\[\t \]*%xmm\[0-9\]\+,\[^,\]*" 1 } } */ + +typedef char v4qi __attribute__((vector_size(4))); +typedef char v16qi __attribute__((vector_size(16))); + +v4qi a; +v16qi b; +void +foo (v4qi* c, v16qi* d) +{ + v4qi sum = __extension__(v4qi){0, 0, 0, 0}; + v16qi sum2 = __extension__(v16qi){0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +0, 0, 0, 0, 0}; + for (int i = 0; i != 100; i++) + sum += c[i]; + for (int i = 0 ; i != 100; i++) + sum2 += d[i]; + a = sum; + b = sum2; +} diff --git a/gcc/testsuite/gcc.target/i386/pr117839-3b.c b/gcc/testsuite/gcc.target/i386/pr117839-3b.c new file mode 100644 index 0000000..a599c28 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr117839-3b.c @@ -0,0 +1,5 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -march=x86-64-v3" } */ +/* { dg-final { scan-assembler-times "xor\[a-z\]*\[\t \]*%xmm\[0-9\]\+,\[^,\]*" 1 } } */ + +#include "pr117839-3a.c" diff --git a/gcc/testsuite/gcc.target/i386/pr119919.c b/gcc/testsuite/gcc.target/i386/pr119919.c index ed64656..e39819f 100644 --- a/gcc/testsuite/gcc.target/i386/pr119919.c +++ b/gcc/testsuite/gcc.target/i386/pr119919.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O2 -msse2 -fdump-tree-vect-details" } */ +/* { dg-options "-O2 -msse2 -fdump-tree-vect-details -mtune=znver1" } */ int a[9*9]; bool b[9]; void test() diff --git a/gcc/testsuite/gcc.target/riscv/arch-45.c b/gcc/testsuite/gcc.target/riscv/arch-45.c new file mode 100644 index 0000000..afffb99 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/arch-45.c @@ -0,0 +1,5 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gc_svadu -mabi=lp64" } */ +int foo() +{ +} diff --git a/gcc/testsuite/gcc.target/riscv/arch-46.c b/gcc/testsuite/gcc.target/riscv/arch-46.c new file mode 100644 index 0000000..2a06217 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/arch-46.c @@ -0,0 +1,5 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gc_svade -mabi=lp64" } */ +int foo() +{ +} diff --git a/gcc/testsuite/gcc.target/riscv/arch-47.c b/gcc/testsuite/gcc.target/riscv/arch-47.c new file mode 100644 index 0000000..06bc80f --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/arch-47.c @@ -0,0 +1,5 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gc_sdtrig_ssstrict -mabi=lp64" } */ +int foo() +{ +} diff --git a/gcc/testsuite/gcc.target/riscv/arch-48.c b/gcc/testsuite/gcc.target/riscv/arch-48.c new file mode 100644 index 0000000..58a558e --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/arch-48.c @@ -0,0 +1,5 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gc_zama16b -mabi=lp64" } */ +int foo() +{ +} diff --git a/gcc/testsuite/gcc.target/riscv/pr120137.c b/gcc/testsuite/gcc.target/riscv/pr120137.c new file mode 100644 index 0000000..c55a1c1 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/pr120137.c @@ -0,0 +1,12 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gcv_zvl256b -mrvv-vector-bits=zvl -mabi=lp64" } */ + +char b[13][13]; +void c() { + for (int d = 0; d < 13; ++d) + for (int e = 0; e < 13; ++e) + b[d][e] = e == 0 ? -98 : 38; +} + + + diff --git a/gcc/testsuite/gcc.target/riscv/pr120154.c b/gcc/testsuite/gcc.target/riscv/pr120154.c new file mode 100644 index 0000000..fd849ca --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/pr120154.c @@ -0,0 +1,22 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gv -mabi=lp64" } */ + + + +typedef __attribute__((__vector_size__(4))) char V; + +V g; + +V +bar(V a, V b) +{ + V s = a + b + g; + return s; +} + +V +foo() +{ + return bar((V){20}, (V){23, 150}); +} + diff --git a/gcc/testsuite/gcc.target/s390/vector/cstoreti-1.c b/gcc/testsuite/gcc.target/s390/vector/cstoreti-1.c new file mode 100644 index 0000000..f2a131b --- /dev/null +++ b/gcc/testsuite/gcc.target/s390/vector/cstoreti-1.c @@ -0,0 +1,127 @@ +/* { dg-do compile { target int128 } } */ +/* { dg-options "-O2 -march=z13" } */ +/* { dg-final { check-function-bodies "**" "" } } */ + +/* +** test_le: +** vl (%v.),0\(%r2\),3 +** vl (%v.),0\(%r3\),3 +** vecg \2,\1 +** jne \.L.+ +** vchlgs %v.,\1,\2 +** lghi %r2,0 +** locghinl %r2,1 +** br %r14 +*/ + +int test_le (__int128 x, __int128 y) { return x <= y; } + +/* +** test_leu: +** vl (%v.),0\(%r2\),3 +** vl (%v.),0\(%r3\),3 +** veclg \2,\1 +** jne \.L.+ +** vchlgs %v.,\1,\2 +** lghi %r2,0 +** locghinl %r2,1 +** br %r14 +*/ + +int test_leu (unsigned __int128 x, unsigned __int128 y) { return x <= y; } + +/* +** test_lt: +** vl (%v.),0\(%r2\),3 +** vl (%v.),0\(%r3\),3 +** vecg \1,\2 +** jne \.L.+ +** vchlgs %v.,\2,\1 +** lghi %r2,0 +** locghil %r2,1 +** br %r14 +*/ + +int test_lt (__int128 x, __int128 y) { return x < y; } + +/* +** test_ltu: +** vl (%v.),0\(%r2\),3 +** vl (%v.),0\(%r3\),3 +** veclg \1,\2 +** jne \.L.+ +** vchlgs %v.,\2,\1 +** lghi %r2,0 +** locghil %r2,1 +** br %r14 +*/ + +int test_ltu (unsigned __int128 x, unsigned __int128 y) { return x < y; } + +/* +** test_ge: +** vl (%v.),0\(%r2\),3 +** vl (%v.),0\(%r3\),3 +** vecg \1,\2 +** jne \.L.+ +** vchlgs %v.,\2,\1 +** lghi %r2,0 +** locghinl %r2,1 +** br %r14 +*/ + +int test_ge (__int128 x, __int128 y) { return x >= y; } + +/* +** test_geu: +** vl (%v.),0\(%r2\),3 +** vl (%v.),0\(%r3\),3 +** veclg \1,\2 +** jne \.L.+ +** vchlgs %v.,\2,\1 +** lghi %r2,0 +** locghinl %r2,1 +** br %r14 +*/ + +int test_geu (unsigned __int128 x, unsigned __int128 y) { return x >= y; } + +/* +** test_gt: +** vl (%v.),0\(%r2\),3 +** vl (%v.),0\(%r3\),3 +** vecg \2,\1 +** jne \.L.+ +** vchlgs %v.,\1,\2 +** lghi %r2,0 +** locghil %r2,1 +** br %r14 +*/ + +int test_gt (__int128 x, __int128 y) { return x > y; } + +/* +** test_gtu: +** vl (%v.),0\(%r2\),3 +** vl (%v.),0\(%r3\),3 +** veclg \2,\1 +** jne \.L.+ +** vchlgs %v.,\1,\2 +** lghi %r2,0 +** locghil %r2,1 +** br %r14 +*/ + +int test_gtu (unsigned __int128 x, unsigned __int128 y) { return x > y; } + +/* { dg-final { scan-assembler-times {vceqgs\t} 4 } } */ +/* { dg-final { scan-assembler-times {locghie\t} 2 } } */ +/* { dg-final { scan-assembler-times {locghine\t} 2 } } */ + +int test_eq (__int128 x, __int128 y) { return x == y; } + +int test_equ (unsigned __int128 x, unsigned __int128 y) { return x == y; } + +int test_ne (__int128 x, __int128 y) { return x != y; } + +int test_neu (unsigned __int128 x, unsigned __int128 y) { return x != y; } diff --git a/gcc/testsuite/gcc.target/s390/vector/cstoreti-2.c b/gcc/testsuite/gcc.target/s390/vector/cstoreti-2.c new file mode 100644 index 0000000..d7b0382 --- /dev/null +++ b/gcc/testsuite/gcc.target/s390/vector/cstoreti-2.c @@ -0,0 +1,25 @@ +/* { dg-do compile { target int128 } } */ +/* { dg-options "-O2 -march=z17" } */ +/* { dg-final { scan-assembler-times {vecq\t} 8 } } */ +/* { dg-final { scan-assembler-times {veclq\t} 4 } } */ +/* { dg-final { scan-assembler-times {locghile\t} 1 } } LE */ +/* { dg-final { scan-assembler-times {slbgr\t} 1 } } LEU */ +/* { dg-final { scan-assembler-times {locghil\t} 2 } } LT LTU */ +/* { dg-final { scan-assembler-times {locghihe\t} 2 } } GE GEU */ +/* { dg-final { scan-assembler-times {locghih\t} 1 } } GT */ +/* { dg-final { scan-assembler-times {alcgr\t} 1 } } GTU */ +/* { dg-final { scan-assembler-times {locghie\t} 2 } } EQ EQU */ +/* { dg-final { scan-assembler-times {locghine\t} 2 } } NE NEU */ + +int test_le (__int128 x, __int128 y) { return x <= y; } +int test_leu (unsigned __int128 x, unsigned __int128 y) { return x <= y; } +int test_lt (__int128 x, __int128 y) { return x < y; } +int test_ltu (unsigned __int128 x, unsigned __int128 y) { return x < y; } +int test_ge (__int128 x, __int128 y) { return x >= y; } +int test_geu (unsigned __int128 x, unsigned __int128 y) { return x >= y; } +int test_gt (__int128 x, __int128 y) { return x > y; } +int test_gtu (unsigned __int128 x, unsigned __int128 y) { return x > y; } +int test_eq (__int128 x, __int128 y) { return x == y; } +int test_equ (unsigned __int128 x, unsigned __int128 y) { return x == y; } +int test_ne (__int128 x, __int128 y) { return x != y; } +int test_neu (unsigned __int128 x, unsigned __int128 y) { return x != y; } diff --git a/gcc/testsuite/gfortran.dg/pr119948.f90 b/gcc/testsuite/gfortran.dg/pr119948.f90 index 9ecb080..2e36fae 100644 --- a/gcc/testsuite/gfortran.dg/pr119948.f90 +++ b/gcc/testsuite/gfortran.dg/pr119948.f90 @@ -1,7 +1,8 @@ -! { dg-do compile } +! { dg-do run } ! -! Test the fix for PR119948, which used to fail as indicated below with, -! "Error: Bad allocate-object at (1) for a PURE procedure" +! Test the fix for PR119948, which used to fail as indicated below with: +! (1) "Error: Bad allocate-object at (1) for a PURE procedure" +! (2) "Error: ‘construct_test2 at (1) is not a variable" ! ! Contributed by Damian Rouson <damian@archaeologic.codes> ! @@ -18,33 +19,65 @@ module test_m type(test_t) :: test type(test_t), intent(in) :: arg end function - pure module function construct_test_sub(arg) result(test) + + pure module function construct_test2(arg) + implicit none + type(test_t) construct_test2 + type(test_t), intent(in) :: arg + end function + + pure module function construct_test_3(arg) result(test) implicit none type(test_t) :: test type(test_t), intent(in) :: arg end function + + pure module function construct_test_4(arg) + implicit none + type(test_t) :: construct_test_4 + type(test_t), intent(in) :: arg + end function end interface contains module procedure construct_test - allocate(test%i, source = arg%i) ! Used to fail here + allocate(test%i, source = arg%i) ! Fail #1 + end procedure + + module procedure construct_test2 + allocate(construct_test2%i, source = arg%i) ! Fail #2 end procedure end module submodule (test_m)test_s contains - module procedure construct_test_sub + module procedure construct_test_3 allocate(test%i, source = arg%i) ! This was OK. end procedure + + module procedure construct_test_4 + allocate(construct_test_4%i, source = arg%i) ! This was OK. + end procedure end submodule use test_m type(test_t) :: res, dummy - dummy%i = 42 +! + dummy%i = int (rand () * 1e6) res = construct_test (dummy) if (res%i /= dummy%i) stop 1 - dummy%i = -42 - res = construct_test_sub (dummy) +! + dummy%i = int (rand () * 1e6) + res = construct_test2 (dummy) if (res%i /= dummy%i) stop 2 +! + dummy%i = int (rand () * 1e6) + res = construct_test_3 (dummy) + if (res%i /= dummy%i) stop 3 + + dummy%i = int (rand () * 1e6) + res = construct_test_4 (dummy) + if (res%i /= dummy%i) stop 4 + deallocate (res%i, dummy%i) end diff --git a/gcc/testsuite/gfortran.dg/pr120049_a.f90 b/gcc/testsuite/gfortran.dg/pr120049_a.f90 new file mode 100644 index 0000000..c404a4d --- /dev/null +++ b/gcc/testsuite/gfortran.dg/pr120049_a.f90 @@ -0,0 +1,15 @@ +! { dg-do preprocess } +! { dg-additional-options "-cpp" } +! +! Test the fix for PR86248 +program tests_gtk_sup + use gtk_sup + implicit none + type(c_ptr), target :: val + if (c_associated(val, c_loc(val))) then + stop 1 + endif + if (c_associated(c_loc(val), val)) then + stop 2 + endif +end program tests_gtk_sup diff --git a/gcc/testsuite/gfortran.dg/pr120049_b.f90 b/gcc/testsuite/gfortran.dg/pr120049_b.f90 new file mode 100644 index 0000000..127db98 --- /dev/null +++ b/gcc/testsuite/gfortran.dg/pr120049_b.f90 @@ -0,0 +1,8 @@ +! { dg-do run } +! { dg-additional-sources pr120049_a.f90 } +! +! Module for pr120049.f90 +! +module gtk_sup + use, intrinsic :: iso_c_binding +end module gtk_sup diff --git a/gcc/tree-if-conv.cc b/gcc/tree-if-conv.cc index 5b63bf6..fe8aee0 100644 --- a/gcc/tree-if-conv.cc +++ b/gcc/tree-if-conv.cc @@ -1066,11 +1066,7 @@ if_convertible_gimple_assign_stmt_p (gimple *stmt, fprintf (dump_file, "tree could trap...\n"); return false; } - else if ((INTEGRAL_TYPE_P (TREE_TYPE (lhs)) - || POINTER_TYPE_P (TREE_TYPE (lhs))) - && TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (lhs)) - && arith_code_with_undefined_signed_overflow - (gimple_assign_rhs_code (stmt))) + else if (gimple_with_undefined_signed_overflow (stmt)) /* We have to rewrite stmts with undefined overflow. */ need_to_rewrite_undefined = true; @@ -2830,7 +2826,6 @@ predicate_statements (loop_p loop) for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi);) { gassign *stmt = dyn_cast <gassign *> (gsi_stmt (gsi)); - tree lhs; if (!stmt) ; else if (is_false_predicate (cond) @@ -2886,12 +2881,7 @@ predicate_statements (loop_p loop) gsi_replace (&gsi, new_stmt, true); } - else if (((lhs = gimple_assign_lhs (stmt)), true) - && (INTEGRAL_TYPE_P (TREE_TYPE (lhs)) - || POINTER_TYPE_P (TREE_TYPE (lhs))) - && TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (lhs)) - && arith_code_with_undefined_signed_overflow - (gimple_assign_rhs_code (stmt))) + else if (gimple_with_undefined_signed_overflow (stmt)) rewrite_to_defined_overflow (&gsi); else if (gimple_vdef (stmt)) { @@ -2946,7 +2936,7 @@ predicate_statements (loop_p loop) gsi_replace (&gsi, new_call, true); } - lhs = gimple_get_lhs (gsi_stmt (gsi)); + tree lhs = gimple_get_lhs (gsi_stmt (gsi)); if (lhs && TREE_CODE (lhs) == SSA_NAME) ssa_names.add (lhs); gsi_next (&gsi); diff --git a/gcc/tree-scalar-evolution.cc b/gcc/tree-scalar-evolution.cc index 4ca0875..9d64d3a 100644 --- a/gcc/tree-scalar-evolution.cc +++ b/gcc/tree-scalar-evolution.cc @@ -3932,10 +3932,7 @@ final_value_replacement_loop (class loop *loop) gsi2 = gsi_start (stmts); while (!gsi_end_p (gsi2)) { - gimple *stmt = gsi_stmt (gsi2); - if (is_gimple_assign (stmt) - && arith_code_with_undefined_signed_overflow - (gimple_assign_rhs_code (stmt))) + if (gimple_with_undefined_signed_overflow (gsi_stmt (gsi2))) rewrite_to_defined_overflow (&gsi2); gsi_next (&gsi2); } diff --git a/gcc/tree-ssa-ifcombine.cc b/gcc/tree-ssa-ifcombine.cc index f791994..19990d6 100644 --- a/gcc/tree-ssa-ifcombine.cc +++ b/gcc/tree-ssa-ifcombine.cc @@ -514,15 +514,9 @@ ifcombine_mark_ssa_name_walk (tree *t, int *, void *data_) static inline void ifcombine_rewrite_to_defined_overflow (gimple_stmt_iterator gsi) { - gassign *ass = dyn_cast <gassign *> (gsi_stmt (gsi)); - if (!ass) + if (!gimple_with_undefined_signed_overflow (gsi_stmt (gsi))) return; - tree lhs = gimple_assign_lhs (ass); - if ((INTEGRAL_TYPE_P (TREE_TYPE (lhs)) - || POINTER_TYPE_P (TREE_TYPE (lhs))) - && arith_code_with_undefined_signed_overflow - (gimple_assign_rhs_code (ass))) - rewrite_to_defined_overflow (&gsi); + rewrite_to_defined_overflow (&gsi); } diff --git a/gcc/tree-ssa-loop-im.cc b/gcc/tree-ssa-loop-im.cc index a3ca5af..ae2fd87 100644 --- a/gcc/tree-ssa-loop-im.cc +++ b/gcc/tree-ssa-loop-im.cc @@ -1241,12 +1241,24 @@ compute_invariantness (basic_block bb) lim_data->cost); } - if (lim_data->cost >= LIM_EXPENSIVE - /* When we run before PRE and PRE is active hoist all expressions - since PRE would do so anyway and we can preserve range info - but PRE cannot. */ - || (flag_tree_pre && !in_loop_pipeline)) + if (lim_data->cost >= LIM_EXPENSIVE) set_profitable_level (stmt); + /* When we run before PRE and PRE is active hoist all expressions + to the always executed loop since PRE would do so anyway + and we can preserve range info while PRE cannot. */ + else if (flag_tree_pre && !in_loop_pipeline + && outermost) + { + class loop *mloop = lim_data->max_loop; + if (loop_depth (outermost) > loop_depth (mloop)) + { + mloop = outermost; + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, " constraining to loop depth %d\n\n\n", + loop_depth (mloop)); + } + set_level (stmt, bb->loop_father, mloop); + } } } @@ -1407,11 +1419,7 @@ move_computations_worker (basic_block bb) when the target loop header is executed and the stmt may invoke undefined integer or pointer overflow rewrite it to unsigned arithmetic. */ - if (is_gimple_assign (stmt) - && INTEGRAL_TYPE_P (TREE_TYPE (gimple_assign_lhs (stmt))) - && TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (gimple_assign_lhs (stmt))) - && arith_code_with_undefined_signed_overflow - (gimple_assign_rhs_code (stmt)) + if (gimple_with_undefined_signed_overflow (stmt) && (!ALWAYS_EXECUTED_IN (bb) || !(ALWAYS_EXECUTED_IN (bb) == level || flow_loop_nested_p (ALWAYS_EXECUTED_IN (bb), level)))) diff --git a/gcc/tree-ssa-loop-split.cc b/gcc/tree-ssa-loop-split.cc index 5f78c0b..80f488a 100644 --- a/gcc/tree-ssa-loop-split.cc +++ b/gcc/tree-ssa-loop-split.cc @@ -663,10 +663,7 @@ split_loop (class loop *loop1) gsi = gsi_start (stmts2); while (!gsi_end_p (gsi)) { - gimple *stmt = gsi_stmt (gsi); - if (is_gimple_assign (stmt) - && arith_code_with_undefined_signed_overflow - (gimple_assign_rhs_code (stmt))) + if (gimple_with_undefined_signed_overflow (gsi_stmt (gsi))) rewrite_to_defined_overflow (&gsi); gsi_next (&gsi); } diff --git a/gcc/tree-ssa-reassoc.cc b/gcc/tree-ssa-reassoc.cc index 4017eea..13bb85c 100644 --- a/gcc/tree-ssa-reassoc.cc +++ b/gcc/tree-ssa-reassoc.cc @@ -2925,30 +2925,22 @@ update_range_test (struct range_entry *range, struct range_entry *otherrange, !gsi_end_p (gsi); gsi_next (&gsi)) { gimple *stmt = gsi_stmt (gsi); - if (is_gimple_assign (stmt)) - if (tree lhs = gimple_assign_lhs (stmt)) - if ((INTEGRAL_TYPE_P (TREE_TYPE (lhs)) - || POINTER_TYPE_P (TREE_TYPE (lhs))) - && TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (lhs))) - { - enum tree_code code = gimple_assign_rhs_code (stmt); - if (arith_code_with_undefined_signed_overflow (code)) - { - gimple_stmt_iterator gsip = gsi; - gimple_stmt_iterator gsin = gsi; - gsi_prev (&gsip); - gsi_next (&gsin); - rewrite_to_defined_overflow (&gsi); - unsigned uid = gimple_uid (stmt); - if (gsi_end_p (gsip)) - gsip = gsi_after_labels (bb); - else - gsi_next (&gsip); - for (; gsi_stmt (gsip) != gsi_stmt (gsin); - gsi_next (&gsip)) - gimple_set_uid (gsi_stmt (gsip), uid); - } - } + if (gimple_with_undefined_signed_overflow (stmt)) + { + gimple_stmt_iterator gsip = gsi; + gimple_stmt_iterator gsin = gsi; + gsi_prev (&gsip); + gsi_next (&gsin); + rewrite_to_defined_overflow (&gsi); + unsigned uid = gimple_uid (stmt); + if (gsi_end_p (gsip)) + gsip = gsi_after_labels (bb); + else + gsi_next (&gsip); + for (; gsi_stmt (gsip) != gsi_stmt (gsin); + gsi_next (&gsip)) + gimple_set_uid (gsi_stmt (gsip), uid); + } } if (opcode == BIT_IOR_EXPR diff --git a/gcc/tree-vect-data-refs.cc b/gcc/tree-vect-data-refs.cc index 231a3ca..9fd1ef2 100644 --- a/gcc/tree-vect-data-refs.cc +++ b/gcc/tree-vect-data-refs.cc @@ -734,7 +734,6 @@ vect_analyze_early_break_dependences (loop_vec_info loop_vinfo) stmt_vec_info stmt_vinfo = vect_stmt_to_vectorize (loop_vinfo->lookup_stmt (stmt)); - stmt = STMT_VINFO_STMT (stmt_vinfo); auto dr_ref = STMT_VINFO_DATA_REF (stmt_vinfo); if (!dr_ref) continue; diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc index 562e222..80e9c01 100644 --- a/gcc/tree-vect-slp.cc +++ b/gcc/tree-vect-slp.cc @@ -5042,14 +5042,17 @@ vect_analyze_slp (vec_info *vinfo, unsigned max_tree_size, vec<stmt_vec_info> roots = vNULL; vec<tree> remain = vNULL; gphi *phi = as_a<gphi *> (STMT_VINFO_STMT (stmt_info)); - stmts.create (1); tree def = gimple_phi_arg_def_from_edge (phi, latch_e); stmt_vec_info lc_info = loop_vinfo->lookup_def (def); - stmts.quick_push (vect_stmt_to_vectorize (lc_info)); - vect_build_slp_instance (vinfo, slp_inst_kind_reduc_group, - stmts, roots, remain, - max_tree_size, &limit, - bst_map, NULL, force_single_lane); + if (lc_info) + { + stmts.create (1); + stmts.quick_push (vect_stmt_to_vectorize (lc_info)); + vect_build_slp_instance (vinfo, slp_inst_kind_reduc_group, + stmts, roots, remain, + max_tree_size, &limit, + bst_map, NULL, force_single_lane); + } /* When the latch def is from a different cycle this can only be a induction. Build a simple instance for this. ??? We should be able to start discovery from the PHI @@ -5059,8 +5062,6 @@ vect_analyze_slp (vec_info *vinfo, unsigned max_tree_size, tem.quick_push (stmt_info); if (!bst_map->get (tem)) { - gcc_assert (STMT_VINFO_DEF_TYPE (stmt_info) - == vect_induction_def); stmts.create (1); stmts.quick_push (stmt_info); vect_build_slp_instance (vinfo, slp_inst_kind_reduc_group, diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc index ea0b426..a8762ba 100644 --- a/gcc/tree-vect-stmts.cc +++ b/gcc/tree-vect-stmts.cc @@ -419,18 +419,21 @@ vect_stmt_relevant_p (stmt_vec_info stmt_info, loop_vec_info loop_vinfo, } } - /* Check if it's an induction and multiple exits. In this case there will be - a usage later on after peeling which is needed for the alternate exit. */ + /* Check if it's a not live PHI and multiple exits. In this case + there will be a usage later on after peeling which is needed for the + alternate exit. */ if (LOOP_VINFO_EARLY_BREAKS (loop_vinfo) - && STMT_VINFO_DEF_TYPE (stmt_info) == vect_induction_def) + && is_a <gphi *> (stmt) + && ((! VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (stmt_info)) + && ! *live_p) + || STMT_VINFO_DEF_TYPE (stmt_info) == vect_induction_def)) { if (dump_enabled_p ()) - dump_printf_loc (MSG_NOTE, vect_location, - "vec_stmt_relevant_p: induction forced for " - "early break.\n"); + dump_printf_loc (MSG_NOTE, vect_location, + "vec_stmt_relevant_p: PHI forced live for " + "early break.\n"); LOOP_VINFO_EARLY_BREAKS_LIVE_IVS (loop_vinfo).safe_push (stmt_info); *live_p = true; - } if (*live_p && *relevant == vect_unused_in_scope @@ -714,6 +717,8 @@ vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo, bool *fatal) bb = bbs[i]; for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si)) { + if (virtual_operand_p (gimple_phi_result (gsi_stmt (si)))) + continue; stmt_vec_info phi_info = loop_vinfo->lookup_stmt (gsi_stmt (si)); if (dump_enabled_p ()) dump_printf_loc (MSG_NOTE, vect_location, "init: phi relevant? %G", @@ -8786,6 +8791,15 @@ vectorizable_store (vec_info *vinfo, if (n == const_nunits) { int mis_align = dr_misalignment (first_dr_info, vectype); + /* With VF > 1 we advance the DR by step, if that is constant + and only aligned when performed VF times, DR alignment + analysis can analyze this as aligned since it assumes + contiguous accesses. But that is not how we code generate + here, so adjust for this. */ + if (maybe_gt (vf, 1u) + && !multiple_p (DR_STEP_ALIGNMENT (first_dr_info->dr), + DR_TARGET_ALIGNMENT (first_dr_info))) + mis_align = -1; dr_alignment_support dr_align = vect_supportable_dr_alignment (vinfo, dr_info, vectype, mis_align); @@ -8807,6 +8821,10 @@ vectorizable_store (vec_info *vinfo, ltype = build_vector_type (elem_type, n); lvectype = vectype; int mis_align = dr_misalignment (first_dr_info, ltype); + if (maybe_gt (vf, 1u) + && !multiple_p (DR_STEP_ALIGNMENT (first_dr_info->dr), + DR_TARGET_ALIGNMENT (first_dr_info))) + mis_align = -1; dr_alignment_support dr_align = vect_supportable_dr_alignment (vinfo, dr_info, ltype, mis_align); @@ -8867,17 +8885,10 @@ vectorizable_store (vec_info *vinfo, } } unsigned align; - /* ??? We'd want to use - if (alignment_support_scheme == dr_aligned) - align = known_alignment (DR_TARGET_ALIGNMENT (first_dr_info)); - since doing that is what we assume we can in the above checks. - But this interferes with groups with gaps where for example - VF == 2 makes the group in the unrolled loop aligned but the - fact that we advance with step between the two subgroups - makes the access to the second unaligned. See PR119586. - We have to anticipate that here or adjust code generation to - avoid the misaligned loads by means of permutations. */ - align = dr_alignment (vect_dr_behavior (vinfo, first_dr_info)); + if (alignment_support_scheme == dr_aligned) + align = known_alignment (DR_TARGET_ALIGNMENT (first_dr_info)); + else + align = dr_alignment (vect_dr_behavior (vinfo, first_dr_info)); /* Alignment is at most the access size if we do multiple stores. */ if (nstores > 1) align = MIN (tree_to_uhwi (TYPE_SIZE_UNIT (ltype)), align); @@ -10805,6 +10816,15 @@ vectorizable_load (vec_info *vinfo, if (n == const_nunits) { int mis_align = dr_misalignment (first_dr_info, vectype); + /* With VF > 1 we advance the DR by step, if that is constant + and only aligned when performed VF times, DR alignment + analysis can analyze this as aligned since it assumes + contiguous accesses. But that is not how we code generate + here, so adjust for this. */ + if (maybe_gt (vf, 1u) + && !multiple_p (DR_STEP_ALIGNMENT (first_dr_info->dr), + DR_TARGET_ALIGNMENT (first_dr_info))) + mis_align = -1; dr_alignment_support dr_align = vect_supportable_dr_alignment (vinfo, dr_info, vectype, mis_align); @@ -10833,6 +10853,10 @@ vectorizable_load (vec_info *vinfo, if (VECTOR_TYPE_P (ptype)) { mis_align = dr_misalignment (first_dr_info, ptype); + if (maybe_gt (vf, 1u) + && !multiple_p (DR_STEP_ALIGNMENT (first_dr_info->dr), + DR_TARGET_ALIGNMENT (first_dr_info))) + mis_align = -1; dr_align = vect_supportable_dr_alignment (vinfo, dr_info, ptype, mis_align); @@ -10852,8 +10876,10 @@ vectorizable_load (vec_info *vinfo, } } unsigned align; - /* ??? The above is still wrong, see vectorizable_store. */ - align = dr_alignment (vect_dr_behavior (vinfo, first_dr_info)); + if (alignment_support_scheme == dr_aligned) + align = known_alignment (DR_TARGET_ALIGNMENT (first_dr_info)); + else + align = dr_alignment (vect_dr_behavior (vinfo, first_dr_info)); /* Alignment is at most the access size if we do multiple loads. */ if (nloads > 1) align = MIN (tree_to_uhwi (TYPE_SIZE_UNIT (ltype)), align); |