aboutsummaryrefslogtreecommitdiff
path: root/gcc
diff options
context:
space:
mode:
Diffstat (limited to 'gcc')
-rw-r--r--gcc/ChangeLog608
-rw-r--r--gcc/DATESTAMP2
-rw-r--r--gcc/analyzer/ChangeLog26
-rw-r--r--gcc/auto-profile.cc4
-rw-r--r--gcc/common/config/riscv/riscv-common.cc15
-rw-r--r--gcc/config/aarch64/aarch64-protos.h3
-rw-r--r--gcc/config/aarch64/aarch64-simd.md10
-rw-r--r--gcc/config/aarch64/aarch64-sve.md38
-rw-r--r--gcc/config/aarch64/aarch64.cc108
-rw-r--r--gcc/config/arm/arm.cc6
-rw-r--r--gcc/config/arm/arm.h6
-rw-r--r--gcc/config/i386/i386-features.cc39
-rw-r--r--gcc/config/i386/i386.cc71
-rw-r--r--gcc/config/i386/i386.h4
-rw-r--r--gcc/config/i386/x86-tune-costs.h133
-rw-r--r--gcc/config/riscv/riscv-vect-permconst.cc20
-rw-r--r--gcc/config/riscv/riscv.md20
-rw-r--r--gcc/config/riscv/riscv.opt6
-rw-r--r--gcc/config/s390/s390-protos.h1
-rw-r--r--gcc/config/s390/s390.cc82
-rw-r--r--gcc/config/s390/s390.md4
-rw-r--r--gcc/config/s390/vector.md30
-rw-r--r--gcc/doc/invoke.texi8
-rw-r--r--gcc/fortran/ChangeLog25
-rw-r--r--gcc/fortran/check.cc42
-rw-r--r--gcc/fortran/primary.cc2
-rw-r--r--gcc/gimple-fold.cc26
-rw-r--r--gcc/gimple-fold.h2
-rw-r--r--gcc/simplify-rtx.cc7
-rw-r--r--gcc/testsuite/ChangeLog215
-rw-r--r--gcc/testsuite/g++.target/i386/pr120036.C113
-rw-r--r--gcc/testsuite/g++.target/riscv/redundant-andi.C26
-rw-r--r--gcc/testsuite/gcc.dg/plugin/location-overflow-test-pr116047-1.h6
-rw-r--r--gcc/testsuite/gcc.dg/plugin/location-overflow-test-pr116047-2.h1
-rw-r--r--gcc/testsuite/gcc.dg/plugin/location-overflow-test-pr116047.c5
-rw-r--r--gcc/testsuite/gcc.dg/plugin/location-overflow-test-pr120061-1.h6
-rw-r--r--gcc/testsuite/gcc.dg/plugin/location-overflow-test-pr120061-2.h1
-rw-r--r--gcc/testsuite/gcc.dg/plugin/location-overflow-test-pr120061.c6
-rw-r--r--gcc/testsuite/gcc.dg/plugin/location_overflow_plugin.cc15
-rw-r--r--gcc/testsuite/gcc.dg/plugin/plugin.exp4
-rw-r--r--gcc/testsuite/gcc.dg/vect/vect-early-break_134-pr120089.c66
-rw-r--r--gcc/testsuite/gcc.dg/vect/vect-early-break_135-pr120143.c18
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve/acle/general/whilelt_5.c24
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve/ldst_ptrue_pat_128_to_neon.c81
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve/while_7.c4
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve/while_9.c2
-rw-r--r--gcc/testsuite/gcc.target/arm/armv8_2-fp16-arith-1.c3
-rw-r--r--gcc/testsuite/gcc.target/i386/pr117839-3a.c22
-rw-r--r--gcc/testsuite/gcc.target/i386/pr117839-3b.c5
-rw-r--r--gcc/testsuite/gcc.target/i386/pr119919.c2
-rw-r--r--gcc/testsuite/gcc.target/riscv/arch-45.c5
-rw-r--r--gcc/testsuite/gcc.target/riscv/arch-46.c5
-rw-r--r--gcc/testsuite/gcc.target/riscv/arch-47.c5
-rw-r--r--gcc/testsuite/gcc.target/riscv/arch-48.c5
-rw-r--r--gcc/testsuite/gcc.target/riscv/pr120137.c12
-rw-r--r--gcc/testsuite/gcc.target/riscv/pr120154.c22
-rw-r--r--gcc/testsuite/gcc.target/s390/vector/cstoreti-1.c127
-rw-r--r--gcc/testsuite/gcc.target/s390/vector/cstoreti-2.c25
-rw-r--r--gcc/testsuite/gfortran.dg/pr119948.f9051
-rw-r--r--gcc/testsuite/gfortran.dg/pr120049_a.f9015
-rw-r--r--gcc/testsuite/gfortran.dg/pr120049_b.f908
-rw-r--r--gcc/tree-if-conv.cc16
-rw-r--r--gcc/tree-scalar-evolution.cc5
-rw-r--r--gcc/tree-ssa-ifcombine.cc10
-rw-r--r--gcc/tree-ssa-loop-im.cc28
-rw-r--r--gcc/tree-ssa-loop-split.cc5
-rw-r--r--gcc/tree-ssa-reassoc.cc40
-rw-r--r--gcc/tree-vect-data-refs.cc1
-rw-r--r--gcc/tree-vect-slp.cc17
-rw-r--r--gcc/tree-vect-stmts.cc66
70 files changed, 2255 insertions, 186 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 21446b6..cbce913 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,611 @@
+2025-05-07 Jeff Law <jlaw@ventanamicro.com>
+
+ PR target/120137
+ PR target/120154
+ * config/riscv/riscv-vect-permconst.cc (process_bb): Verify each
+ canonicalized element fits into the vector element mode.
+
+2025-05-07 Dongyan Chen <chendongyan@isrc.iscas.ac.cn>
+
+ * common/config/riscv/riscv-common.cc: New extension.
+ * config/riscv/riscv.opt: Ditto.
+
+2025-05-07 Richard Earnshaw <rearnsha@arm.com>
+
+ PR target/91323
+ * config/arm/arm.cc (arm_select_cc_mode): Use CCFPEmode for LTGT.
+
+2025-05-07 Richard Earnshaw <rearnsha@arm.com>
+
+ PR target/110796
+ PR target/118446
+ * config/arm/arm.h (REVERSIBLE_CC_MODE): FP modes are only
+ reversible if flag_finite_math_only.
+ * config/arm/arm.cc (arm_select_cc_mode): Return CCFPmode for all
+ FP comparisons if flag_finite_math_only.
+
+2025-05-07 Andrew Pinski <quic_apinski@quicinc.com>
+
+ PR tree-optimization/111276
+ * gimple-fold.cc (arith_code_with_undefined_signed_overflow): Make static.
+ (gimple_with_undefined_signed_overflow): New function.
+ * gimple-fold.h (arith_code_with_undefined_signed_overflow): Remove.
+ (gimple_with_undefined_signed_overflow): Add declaration.
+ * tree-if-conv.cc (if_convertible_gimple_assign_stmt_p): Use
+ gimple_with_undefined_signed_overflow instead of manually
+ checking lhs and the code of the stmt.
+ (predicate_statements): Likewise.
+ * tree-ssa-ifcombine.cc (ifcombine_rewrite_to_defined_overflow): Likewise.
+ * tree-ssa-loop-im.cc (move_computations_worker): Likewise.
+ * tree-ssa-reassoc.cc (update_range_test): Likewise. Reformat.
+ * tree-scalar-evolution.cc (final_value_replacement_loop): Use
+ gimple_with_undefined_signed_overflow instead of
+ arith_code_with_undefined_signed_overflow.
+ * tree-ssa-loop-split.cc (split_loop): Likewise.
+
+2025-05-07 Andrew Pinski <quic_apinski@quicinc.com>
+
+ * tree-ssa-loop-im.cc (compute_invariantness): Hoist to the always executed point
+ if ignorning the cost.
+
+2025-05-07 Jan Hubicka <hubicka@ucw.cz>
+
+ * config/i386/i386.cc (ix86_vector_costs::add_stmt_cost): Add FLOAT_EXPR;
+ FIX_TRUNC_EXPR and vec_promote_demote costs.
+
+2025-05-07 Jennifer Schmitz <jschmitz@nvidia.com>
+
+ PR target/117978
+ * config/aarch64/aarch64-protos.h: Declare
+ aarch64_emit_load_store_through_mode and aarch64_sve_maskloadstore.
+ * config/aarch64/aarch64-sve.md
+ (maskload<mode><vpred>): New define_expand folding maskloads with
+ certain predicate patterns to ASIMD loads.
+ (*aarch64_maskload<mode><vpred>): Renamed from maskload<mode><vpred>.
+ (maskstore<mode><vpred>): New define_expand folding maskstores with
+ certain predicate patterns to ASIMD stores.
+ (*aarch64_maskstore<mode><vpred>): Renamed from maskstore<mode><vpred>.
+ * config/aarch64/aarch64.cc
+ (aarch64_emit_load_store_through_mode): New function emitting a
+ load/store through subregs of a given mode.
+ (aarch64_emit_sve_pred_move): Refactor to use
+ aarch64_emit_load_store_through_mode.
+ (aarch64_expand_maskloadstore): New function to emit ASIMD loads/stores
+ for maskloads/stores with SVE predicates with VL1, VL2, VL4, VL8, or
+ VL16 patterns.
+ (aarch64_partial_ptrue_length): New function returning number of leading
+ set bits in a predicate.
+
+2025-05-07 Stefan Schulze Frielinghaus <stefansf@gcc.gnu.org>
+
+ * config/s390/s390-protos.h (s390_expand_cstoreti4): New
+ function.
+ * config/s390/s390.cc (s390_expand_cstoreti4): New function.
+ * config/s390/s390.md (CC_SUZ): New mode iterator.
+ (l): New mode attribute.
+ (cc_tolower): New mode attribute.
+ * config/s390/vector.md (cstoreti4): New expander.
+ (*vec_cmpv2di_lane0_<cc_tolower>): New insn.
+ (*vec_cmpti_<cc_tolower>): New insn.
+
+2025-05-07 H.J. Lu <hjl.tools@gmail.com>
+
+ PR target/120036
+ * config/i386/i386-features.cc (ix86_get_vector_load_mode):
+ Handle 8/4/2 bytes.
+ (remove_redundant_vector_load): If the mode size is smaller than
+ its natural size, first insert an extra move with a QI vector
+ SUBREG of the same size to avoid validate_subreg failure.
+
+2025-05-07 hongtao.liu <hongtao.liu@intel.com>
+
+ PR gcov-profile/118508
+ * auto-profile.cc
+ (autofdo_source_profile::get_callsite_total_count): Fix name
+ mismatch for fortran.
+
+2025-05-07 Jeff Law <jlaw@ventanamicro.com>
+
+ * config/riscv/riscv.md (*branch<ANYI:mode>_shiftedarith_equals_zero):
+ Avoid generating unnecessary andi. Fix formatting.
+
+2025-05-06 Dongyan Chen <chendongyan@isrc.iscas.ac.cn>
+
+ * common/config/riscv/riscv-common.cc: New extension.
+
+2025-05-06 Mingzhu Yan <yanmingzhu@iscas.ac.cn>
+
+ * common/config/riscv/riscv-common.cc (riscv_ext_version_table): New
+ extension.
+ (riscv_ext_flag_table) Ditto.
+ * config/riscv/riscv.opt: New mask.
+ * doc/invoke.texi (RISC-V Options): New extension
+
+2025-05-06 Jan Hubicka <hubicka@ucw.cz>
+
+ * config/i386/i386.cc (ix86_rtx_costs): Cost FLOAT, UNSIGNED_FLOAT,
+ FIX, UNSIGNED_FIX.
+ * config/i386/i386.h (struct processor_costs): Add
+ cvtsi2ss, cvtss2si, cvtpi2ps, cvtps2pi.
+ * config/i386/x86-tune-costs.h (struct processor_costs): Update tables.
+
+2025-05-06 Martin Jambor <mjambor@suse.cz>
+
+ PR ipa/119852
+ * cgraph.h (cgraph_node::create_clone): Remove the default value of
+ argument suffix. Update function comment.
+ * cgraphclones.cc (cgraph_node::create_clone): Update function comment.
+ * ipa-inline-transform.cc (clone_inlined_nodes): Pass NULL to suffix
+ of create_clone explicitely.
+ * ipa-inline.cc (recursive_inlining): Likewise.
+ * lto-cgraph.cc (input_node): Likewise.
+
+2025-05-06 Martin Jambor <mjambor@suse.cz>
+
+ * cgraph.h (cgraph_node::create_version_clone_with_body): Fix function
+ comment. Change the name of clone_name to suffix, in line with the
+ function definition.
+ * cgraphclones.cc (cgraph_node::create_version_clone_with_body): Fix
+ function comment.
+
+2025-05-06 Martin Jambor <mjambor@suse.cz>
+
+ PR ipa/119852
+ * cgraphclones.cc (dump_callgraph_transformation): Document the
+ function. Do not dump if suffix is NULL.
+
+2025-05-06 Martin Jambor <mjambor@suse.cz>
+
+ * doc/invoke.texi (Developer Options): Document -fdump-ipa-clones.
+
+2025-05-06 David Malcolm <dmalcolm@redhat.com>
+
+ * selftest-diagnostic.cc (test_diagnostic_context::report): Use
+ diagnostic_option_id rather than plain int.
+ * selftest-diagnostic.h (test_diagnostic_context::report):
+ Likewise.
+
+2025-05-06 David Malcolm <dmalcolm@redhat.com>
+
+ PR sarif-replay/117988
+ * json.cc (json::pointer::token::token): New ctors.
+ (json::pointer::token::~token): New.
+ (json::pointer::token::operator=): New.
+ (json::object::set): Set the value's m_pointer_token.
+ (json::array::append): Likewise.
+ * json.h (json::pointer::token): New struct.
+ (json::value::get_pointer_token): New accessor.
+ (json::value::m_pointer_token): New field.
+ * libsarifreplay.cc (get_logical_location_kind_for_json_kind):
+ New.
+ (make_logical_location_from_jv): New.
+ (sarif_replayer::report_problem): Set the logical location of the
+ diagnostic.
+
+2025-05-06 David Malcolm <dmalcolm@redhat.com>
+
+ * diagnostic-format-sarif.cc (maybe_get_sarif_kind): Add cases for
+ new kinds of logical location.
+ * doc/libgdiagnostics/topics/logical-locations.rst: Add new kinds
+ of logical location for handling XML and JSON.
+ * libgdiagnostics.cc (impl_logical_location_manager::get_kind):
+ Add cases for new kinds of logical location.
+ (diagnostic_text_sink::text_starter): Likewise, introducing a
+ macro for this.
+ (diagnostic_manager_debug_dump_logical_location): Likewise.
+ * libgdiagnostics.h (enum diagnostic_logical_location_kind_t): Add
+ new kinds of logical location for handling XML and JSON.
+ * libsarifreplay.cc (handle_logical_location_object): Add entries
+ to "kind_values" for decoding sarif logical location kinds
+ relating to XML and JSON.
+ * logical-location.h (enum logical_location_kind): Add new kinds
+ of logical location for handling XML and JSON.
+
+2025-05-06 David Malcolm <dmalcolm@redhat.com>
+
+ PR other/116176
+ * diagnostic-format-sarif.cc (class sarif_array_of_unique): New
+ template.
+ (class sarif_logical_location): Move here from
+ diagnostic-format-sarif.h.
+ (sarif_builder::m_cached_logical_locs): New.
+ (sarif_builder::sarif_builder): Initialize it.
+ (sarif_builder::set_any_logical_locs_arr): Call
+ make_minimal_sarif_logical_location rather than
+ make_sarif_logical_location_object.
+ (sarif_property_bag::set_logical_location): Likewise.
+ (make_sarif_logical_location_object): Replace with...
+ (sarif_builder::ensure_sarif_logical_location_for): ...this.
+ Capture "parentIndex" property. Consolidate into
+ theRuns.logicalLocations.
+ (sarif_builder::make_minimal_sarif_logical_location): New.
+ (sarif_builder::make_run_object): Add "index" properties to
+ m_cached_logical_locs and move it to theRuns.logicalLocations.
+ (selftest::test_sarif_array_of_unique_1): New.
+ (selftest::test_sarif_array_of_unique_2): New.
+ (selftest::diagnostic_format_sarif_cc_tests): Call the new
+ selftests.
+ * diagnostic-format-sarif.h (class sarif_logical_location): Move
+ to diagnostic-format-sarif.cc.
+ (make_sarif_logical_location_object): Drop decl.
+ * json.cc (value::compare): New.
+ (object::compare): New.
+ (selftest::fail_comparison): New.
+ (selftest::assert_json_equal): New.
+ (ASSERT_JSON_EQ): New.
+ (selftest::assert_json_non_equal): New.
+ (ASSERT_JSON_NE): New.
+ (selftest::test_comparisons): New.
+ (selftest::json_cc_tests): Call the new selftest.
+ * json.h (json::value::dyn_cast_object): New vfunc.
+ (json::object::dyn_cast_object): New vfunc impl.
+ (json::object::compare): New decl.
+ * libgdiagnostics.cc
+ (impl_logical_location_manager::get_parent): New.
+ * logical-location.h (logical_location_manager::get_parent): New
+ vfunc impl.
+ * selftest-logical-location.h
+ (test_logical_location_manager::get_parent): New vfunc impl.
+ * tree-logical-location.cc (assert_valid_tree): New.
+ (tree_logical_location_manager::get_short_name): Support types as
+ well as decls.
+ (tree_logical_location_manager::get_name_with_scope): Gracefully
+ handle non-decl nodes.
+ (tree_logical_location_manager::get_internal_name): Likewise.
+ (tree_logical_location_manager::get_kind): Don't attempt to handle
+ null nodes. Handle NAMESPACE_DECL and RECORD_TYPE.
+ (tree_logical_location_manager::get_name_for_path_output):
+ Gracefully handle non-decl nodes.
+ (tree_logical_location_manager::get_parent): New.
+ * tree-logical-location.h
+ (tree_logical_location_manager::get_parent): New vfunc impl.
+
+2025-05-06 David Malcolm <dmalcolm@redhat.com>
+
+ * diagnostic-client-data-hooks.h: Include "logical-location.h".
+ (diagnostic_client_data_hooks::get_logical_location_manager): New.
+ (diagnostic_client_data_hooks::get_current_logical_location):
+ Convert return type from const logical_location * to
+ logical_location.
+ * diagnostic-format-json.cc: Include
+ "diagnostic-client-data-hooks.h".
+ (make_json_for_path): Update to use logical_location_manager from
+ the context.
+ * diagnostic-format-sarif.cc
+ (sarif_builder::get_logical_location_manager): New.
+ (sarif_builder::make_location_object): Update type of logical_loc
+ from "const logical_location *" to "logical_location".
+ (sarif_builder::set_any_logical_locs_arr): Likewise.
+ (sarif_builder::m_logical_loc_mgr): New field.
+ (sarif_result::on_nested_diagnostic): Use logical_location default
+ ctor rather than nullptr.
+ (sarif_builder::sarif_builder): Initialize m_logical_loc_mgr from
+ context's client data hooks.
+ (sarif_builder::make_locations_arr): Convert type of logical_loc
+ from from "const logical_location *" to "logical_location".
+ (sarif_builder::set_any_logical_locs_arr): Likewise. Pass manager
+ to make_sarif_logical_location_object.
+ (sarif_builder::make_location_object): Likewise.
+ (sarif_property_bag::set_logical_location): New.
+ (make_sarif_logical_location_object): Update for introduction of
+ logical_location_manager.
+ (populate_thread_flow_location_object): Pass builder to
+ ev.maybe_add_sarif_properties.
+ (selftest::test_make_location_object): Use logical_location
+ default ctor rather than nullptr.
+ * diagnostic-format-sarif.h (class logical_location): Replace
+ forward decl with include of "logical-location.h".
+ (class sarif_builder): New forward decl.
+ (sarif_property_bag::set_logical_location): New.
+ (make_sarif_logical_location_object): Add "mgr" param.
+ * diagnostic-path.cc
+ (diagnostic_path::get_first_event_in_a_function): Update for
+ change of logical_location type.
+ (per_thread_summary::per_thread_summary): Pass in
+ "logical_loc_mgr".
+ (per_thread_summary::m_logical_loc_mgr): New field.
+ (event_range::m_logical_loc): Update for change of
+ logical_location type.
+ (path_summary::get_logical_location_manager): New accessor.
+ (path_summary::m_logical_loc_mgr): New field.
+ (path_summary::get_or_create_events_for_thread_id): Pass
+ m_logical_loc_mgr to per_thread_summary ctor.
+ (path_summary::path_summary): Initialize m_logical_loc_mgr.
+ (thread_event_printer::print_swimlane_for_event_range): Add param
+ "logical_loc_mgr". Update for change in logical_loc type.
+ (print_path_summary_as_text): Pass manager to
+ thread_event_printer::print_swimlane_for_event_range.
+ (diagnostic_text_output_format::print_path): Update for
+ introduction of logical_location_manager.
+ * diagnostic-path.h: Include "logical-location.h".
+ (class sarif_builder): New forward decl.
+ (diagnostic_event::get_logical_location): Convert return type from
+ "const logical_location *" to "logical_location".
+ (diagnostic_event::maybe_add_sarif_properties): Add sarif_builder
+ param.
+ (diagnostic_path::get_logical_location_manager): New accessor.
+ (diagnostic_path::diagnostic_path): New ctor, taking manager.
+ (diagnostic_path::m_logical_loc_mgr): New field.
+ * diagnostic.cc
+ (diagnostic_context::get_logical_location_manager): New.
+ (logical_location::function_p): Convert to...
+ (logical_location_manager::function_p): ...this.
+ * diagnostic.h (class logical_location): Replace forward decl
+ with...
+ (class logical_location_manager): ...this.
+ (diagnostic_context::get_logical_location_manager): New decl.
+ * lazy-diagnostic-path.cc
+ (selftest::test_lazy_path::test_lazy_path): Pass m_logical_loc_mgr
+ to path ctor.
+ (selftest::test_lazy_path::make_inner_path): Likewise.
+ (selftest::test_lazy_path::m_logical_loc_mgr): New field.
+ * lazy-diagnostic-path.h
+ (lazy_diagnostic_path::lazy_diagnostic_path): New ctor.
+ * libgdiagnostics.cc (struct diagnostic_logical_location): Convert
+ from subclass of logical_location to a plain struct, dropping
+ accessors.
+ (class impl_logical_location_manager): New.
+ (impl_diagnostic_client_data_hooks::get_logical_location_manager):
+ New
+ (impl_diagnostic_client_data_hooks::m_logical_location_manager):
+ New field.
+ (diagnostic_manager::get_logical_location_manager): New.
+ (libgdiagnostics_path_event::get_logical_location): Reimplement.
+ (diagnostic_execution_path::diagnostic_execution_path): Add
+ logical_loc_mgr and pass to base class.
+ (diagnostic_execution_path::same_function_p): Update for change to
+ logical_location type.
+ (diagnostic::add_execution_path): Pass logical_loc_mgr to path
+ ctor.
+ (impl_diagnostic_client_data_hooks::get_current_logical_location):
+ Reimplement.
+ (diagnostic_text_sink::text_starter): Reimplement printing of
+ logical location.
+ (diagnostic_manager::new_execution_path): Pass mgr to path ctor.
+ (diagnostic_manager_debug_dump_logical_location): Update for
+ changes to diagnostic_logical_location.
+ (diagnostic_logical_location_get_kind): Likewise.
+ (diagnostic_logical_location_get_parent): Likewise.
+ (diagnostic_logical_location_get_short_name): Likewise.
+ (diagnostic_logical_location_get_fully_qualified_name): Likewise.
+ (diagnostic_logical_location_get_decorated_name): Likewise.
+ * logical-location.h (class logical_location_manager): New.
+ (class logical_location): Convert to typedef of
+ logical_location_manager::key.
+ * selftest-diagnostic-path.cc
+ (selftest::test_diagnostic_path::test_diagnostic_path): Pass
+ m_test_logical_loc_mgr to base ctor.
+ (selftest::test_diagnostic_path::same_function_p): Use pointer
+ comparison.
+ (selftest::test_diagnostic_path::add_event): Use
+ logical_location_from_funcname.
+ (selftest::test_diagnostic_path::add_thread_event): Likewise.
+ (selftest::test_diagnostic_path::logical_location_from_funcname):
+ New.
+ (selftest::test_diagnostic_event::test_diagnostic_event): Fix
+ indentation. Pass logical_location rather than const char *.
+ * selftest-diagnostic-path.h
+ (selftest::test_diagnostic_event::test_diagnostic_event):
+ Likewise.
+ (selftest::test_diagnostic_event::get_logical_location): Update
+ for change to logical_location type.
+ (selftest::test_diagnostic_event::get_function_name): Drop.
+ (selftest::test_diagnostic_event::m_logical_loc): Convert from
+ test_logical_location to logical_location.
+ (selftest::test_diagnostic_path::logical_location_from_funcname):
+ New.
+ (selftest::test_diagnostic_path::m_test_logical_loc_mgr): New
+ field.
+ * selftest-logical-location.cc: Include "selftest.h".
+ (selftest::test_logical_location::test_logical_location): Drop.
+ (selftest::test_logical_location_manager::~test_logical_location_manager):
+ New.
+ (selftest::test_logical_location::get_short_name): Replace with...
+ (selftest::test_logical_location_manager::get_short_name):
+ ...this.
+ (selftest::test_logical_location::get_name_with_scope): Replace
+ with...
+ (selftest::test_logical_location_manager::get_name_with_scope):
+ ...this.
+ (selftest::test_logical_location::get_internal_name): Replace
+ with...
+ (selftest::test_logical_location_manager::get_internal_name):
+ ...this.
+ (selftest::test_logical_location::get_kind): Replace with...
+ (selftest::test_logical_location_manager::get_kind): ...this.
+ (selftest::test_logical_location::get_name_for_path_output):
+ Replace with...
+ (selftest::test_logical_location_manager::get_name_for_path_output):
+ ...this.
+ (selftest::test_logical_location_manager::logical_location_from_funcname):
+ New.
+ (selftest::test_logical_location_manager::item_from_funcname):
+ New.
+ (selftest::selftest_logical_location_cc_tests): New.
+ * selftest-logical-location.h (class test_logical_location):
+ Replace with...
+ (class test_logical_location_manager): ...this.
+ * selftest-run-tests.cc (selftest::run_tests): Call
+ selftest_logical_location_cc_tests.
+ * selftest.h (selftest::selftest_logical_location_cc_tests): New
+ decl.
+ * simple-diagnostic-path.cc
+ (simple_diagnostic_path::simple_diagnostic_path): Add
+ "logical_loc_mgr" param and pass it to base ctor.
+ (simple_diagnostic_event::simple_diagnostic_event): Update init of
+ m_logical_loc.
+ (selftest::test_intraprocedural_path): Update for changes to
+ logical locations.
+ * simple-diagnostic-path.h: Likewise.
+ * tree-diagnostic-client-data-hooks.cc
+ (compiler_data_hooks::get_logical_location_manger): New.
+ (compiler_data_hooks::get_current_logical_location): Update.
+ (compiler_data_hooks::m_current_fndecl_logical_loc): Replace
+ with...
+ (compiler_data_hooks::m_logical_location_manager): ...this.
+ * tree-logical-location.cc
+ (compiler_logical_location::get_short_name_for_tree): Replace
+ with...
+ (tree_logical_location_manager::get_short_name): ...this.
+ (compiler_logical_location::get_name_with_scope_for_tree): Replace
+ with...
+ (tree_logical_location_manager::get_name_with_scope): ...this.
+ (compiler_logical_location::get_internal_name_for_tree): Replace
+ with...
+ (tree_logical_location_manager::get_internal_name): ...this.
+ (compiler_logical_location::get_kind_for_tree): Replace with...
+ (tree_logical_location_manager::get_kind): ...this.
+ (compiler_logical_location::get_name_for_tree_for_path_output):
+ Replace with...
+ (tree_logical_location_manager::get_name_for_path_output):
+ ...this.
+ (tree_logical_location::get_short_name): Drop.
+ (tree_logical_location::get_name_with_scope): Drop.
+ (tree_logical_location::get_internal_name): Drop.
+ (tree_logical_location::get_kind): Drop.
+ (tree_logical_location::get_name_for_path_output): Drop.
+ (current_fndecl_logical_location::get_short_name): Drop.
+ (current_fndecl_logical_location::get_name_with_scope): Drop.
+ (current_fndecl_logical_location::get_internal_name): Drop.
+ (current_fndecl_logical_location::get_kind): Drop.
+ (current_fndecl_logical_location::get_name_for_path_output): Drop.
+ * tree-logical-location.h (class compiler_logical_location): Drop.
+ (class tree_logical_location): Drop.
+ (class current_fndecl_logical_location): Drop.
+ (class tree_logical_location_manager): New.
+
+2025-05-06 David Malcolm <dmalcolm@redhat.com>
+
+ * doc/libgdiagnostics/topics/compatibility.rst: New file, based
+ on gcc/jit/docs/topics/compatibility.rst.
+ * doc/libgdiagnostics/topics/index.rst: Add compatibility.rst.
+ * doc/libgdiagnostics/topics/logical-locations.rst (Accessors):
+ New section.
+ * libgdiagnostics++.h (logical_location::operator bool): New.
+ (logical_location::operator==): New.
+ (logical_location::operator!=): New.
+ (logical_location::get_kind): New.
+ (logical_location::get_parent): New.
+ (logical_location::get_short_name): New.
+ (logical_location::get_fully_qualified_name): New.
+ (logical_location::get_decorated_name): New.
+ * libgdiagnostics.cc
+ (diagnostic_logical_location::get_fully_qualified_name): New.
+ (diagnostic_logical_location_get_kind): New entrypoint.
+ (diagnostic_logical_location_get_parent): New entrypoint.
+ (diagnostic_logical_location_get_short_name): New entrypoint.
+ (diagnostic_logical_location_get_fully_qualified_name): New
+ entrypoint.
+ (diagnostic_logical_location_get_decorated_name): New entrypoint.
+ * libgdiagnostics.h
+ (LIBDIAGNOSTICS_HAVE_LOGICAL_LOCATION_ACCESSORS): New define.
+ (diagnostic_logical_location_get_kind): New entrypoint.
+ (diagnostic_logical_location_get_parent): New entrypoint.
+ (diagnostic_logical_location_get_short_name): New entrypoint.
+ (diagnostic_logical_location_get_fully_qualified_name): New
+ entrypoint.
+ (diagnostic_logical_location_get_decorated_name): New entrypoint.
+ * libgdiagnostics.map (LIBGDIAGNOSTICS_ABI_1): New.
+
+2025-05-06 Shreya Munnangi <smunnangi1@ventanamicro.com>
+
+ PR middle-end/114512
+ * config/riscv/bitmanip.md (bext* patterns): New patterns for
+ bext recognition plus splitter for extracting variable bit from
+ a constant.
+ * config/riscv/predicates.md (bitpos_mask_operand): New predicate.
+
+2025-05-06 Pan Li <pan2.li@intel.com>
+
+ * config/riscv/autovec-opt.md (*<optab>_vx_<mode>): Add new
+ combine to convert vec_duplicate + vadd.vv to vaddvx on GR2VR
+ cost.
+ * config/riscv/riscv.cc (riscv_rtx_costs): Take care of the cost
+ when vec_dup and vadd v, vec_dup(x).
+ * config/riscv/vector-iterators.md: Add new iterator for vx.
+
+2025-05-06 Pan Li <pan2.li@intel.com>
+
+ * config/riscv/riscv-protos.h (get_gr2vr_cost): Add new decl to
+ get the cost of gr2vr.
+ * config/riscv/riscv-vector-costs.cc (costs::adjust_stmt_cost):
+ Leverage the helper function to get the cost of gr2vr.
+ * config/riscv/riscv.cc (riscv_register_move_cost): Ditto.
+ (riscv_builtin_vectorization_cost): Ditto.
+ (get_gr2vr_cost): Add new impl of the helper function.
+
+2025-05-06 Pan Li <pan2.li@intel.com>
+
+ * config/riscv/riscv-opts.h (RVV_GR2VR_COST_UNPROVIDED): Add
+ new macro to indicate the param is not provided.
+ * config/riscv/riscv.opt: Add new option --pararm=gpr2vr-cost.
+
+2025-05-06 Richard Biener <rguenther@suse.de>
+
+ PR tree-optimization/1157777
+ * tree-vectorizer.h (_slp_tree::avoid_stlf_fail): New member.
+ * tree-vect-slp.cc (_slp_tree::_slp_tree): Initialize it.
+ (vect_print_slp_tree): Dump it.
+ * tree-vect-data-refs.cc (vect_slp_analyze_instance_dependence):
+ For dataflow dependent loads of a store check whether there's
+ a cross-iteration data dependence that for sure prohibits
+ store-to-load forwarding and mark involved loads.
+ * tree-vect-stmts.cc (get_group_load_store_type): For avoid_stlf_fail
+ marked loads use VMAT_ELEMENTWISE.
+
+2025-05-06 Jakub Jelinek <jakub@redhat.com>
+
+ PR tree-optimization/120074
+ * gimple-fold.cc (fold_truth_andor_for_ifcombine): For
+ lsignbit && l_xor case, punt if ll_bitsize != lr_bitsize. Similarly
+ for rsignbit && r_xor case, punt if rl_bitsize != rr_bitsize.
+ Formatting fix.
+
+2025-05-06 Jan Hubicka <hubicka@ucw.cz>
+
+ * config/i386/i386.cc (ix86_tls_index): Add ifdef.
+
+2025-05-06 Richard Biener <rguenther@suse.de>
+
+ PR tree-optimization/120031
+ * match.pd ((nop_outer_cast)-(inner_cast)var -> -(outer_cast)(var)):
+ Allow inner conversions that are not widenings when the outer
+ type is unsigned.
+
+2025-05-06 LIU Hao <lh_mouse@126.com>
+
+ PR pch/14940
+ * config/i386/host-mingw32.cc (mingw32_gt_pch_use_address):
+ Replace the loop that attempted to map the PCH only to its
+ original address with more adaptive operations
+
+2025-05-06 Julian Waters <tanksherman27@gmail.com>
+ Eric Botcazou <botcazou@adacore.com>
+ Uroš Bizjak <ubizjak@gmail.com>
+ Liu Hao <lh_mouse@126.com>
+
+ * config/i386/i386.cc (ix86_legitimate_constant_p): Handle new UNSPEC.
+ (legitimate_pic_operand_p): Handle new UNSPEC.
+ (legitimate_pic_address_disp_p): Handle new UNSPEC.
+ (ix86_legitimate_address_p): Handle new UNSPEC.
+ (ix86_tls_index_symbol): New symbol for _tls_index.
+ (ix86_tls_index): Handle creation of _tls_index symbol.
+ (legitimize_tls_address): Create thread local access sequence.
+ (output_pic_addr_const): Handle new UNSPEC.
+ (i386_output_dwarf_dtprel): Handle new UNSPEC.
+ (i386_asm_output_addr_const_extra): Handle new UNSPEC.
+ * config/i386/i386.h (TARGET_WIN32_TLS): Define.
+ * config/i386/i386.md: New UNSPEC.
+ * config/i386/predicates.md: Handle new UNSPEC.
+ * config/mingw/mingw32.h (TARGET_WIN32_TLS): Define.
+ (TARGET_ASM_SELECT_SECTION): Define.
+ (DEFAULT_TLS_SEG_REG): Define.
+ * config/mingw/winnt.cc (mingw_pe_select_section): Select proper TLS section.
+ (mingw_pe_unique_section): Handle TLS section.
+ * config/mingw/winnt.h (mingw_pe_select_section): Declare.
+ * configure: Regenerate.
+ * configure.ac: New check for broken linker thread local support
+
2025-05-05 Jeff Law <jlaw@ventanamicro.com>
PR target/119971
diff --git a/gcc/DATESTAMP b/gcc/DATESTAMP
index 6906e73..8cb3c2b 100644
--- a/gcc/DATESTAMP
+++ b/gcc/DATESTAMP
@@ -1 +1 @@
-20250506
+20250508
diff --git a/gcc/analyzer/ChangeLog b/gcc/analyzer/ChangeLog
index 186f355..1fbba5d 100644
--- a/gcc/analyzer/ChangeLog
+++ b/gcc/analyzer/ChangeLog
@@ -1,3 +1,29 @@
+2025-05-06 David Malcolm <dmalcolm@redhat.com>
+
+ * checker-event.cc (checker_event::checker_event): Update
+ initialization of m_logical_loc.
+ (checker_event::maybe_add_sarif_properties): Add "builder" param.
+ Replace call to make_sarif_logical_location_object with call to
+ sarif_property_bag::set_logical_location.
+ (superedge_event::maybe_add_sarif_properties): Add "builder"
+ param.
+ * checker-event.h (checker_event::get_logical_location):
+ Reimplement.
+ (checker_event::maybe_add_sarif_properties): Add "builder" param.
+ (checker_event::maybe_add_sarif_properties): Add "builder" param.
+ (checker_event::m_logical_loc): Convert from tree_logical_location
+ to logical_location.
+ (superedge_event::maybe_add_sarif_properties): Add sarif_builder
+ param.
+ * checker-path.h (checker_path::checker_path): Add logical_loc_mgr
+ param.
+ * diagnostic-manager.cc
+ (diagnostic_manager::emit_saved_diagnostic): Pass logical location
+ manager to emission_path ctor.
+ (diagnostic_manager::get_logical_location_manager): New.
+ * diagnostic-manager.h
+ (diagnostic_manager::get_logical_location_manager): New decl.
+
2025-04-30 David Malcolm <dmalcolm@redhat.com>
* sm-malloc.cc (malloc_diagnostic::describe_state_change): Tweak
diff --git a/gcc/auto-profile.cc b/gcc/auto-profile.cc
index 7e0e8c6..9966d93 100644
--- a/gcc/auto-profile.cc
+++ b/gcc/auto-profile.cc
@@ -848,8 +848,8 @@ autofdo_source_profile::get_callsite_total_count (
function_instance *s = get_function_instance_by_inline_stack (stack);
if (s == NULL
- || afdo_string_table->get_index (IDENTIFIER_POINTER (
- DECL_ASSEMBLER_NAME (edge->callee->decl))) != s->name ())
+ ||(afdo_string_table->get_index_by_decl (edge->callee->decl)
+ != s->name()))
return 0;
return s->total_count ();
diff --git a/gcc/common/config/riscv/riscv-common.cc b/gcc/common/config/riscv/riscv-common.cc
index 145a0f2..ca14eb9 100644
--- a/gcc/common/config/riscv/riscv-common.cc
+++ b/gcc/common/config/riscv/riscv-common.cc
@@ -327,6 +327,7 @@ static const struct riscv_ext_version riscv_ext_version_table[] =
{"zalrsc", ISA_SPEC_CLASS_NONE, 1, 0},
{"zabha", ISA_SPEC_CLASS_NONE, 1, 0},
{"zacas", ISA_SPEC_CLASS_NONE, 1, 0},
+ {"zama16b", ISA_SPEC_CLASS_NONE, 1, 0},
{"zba", ISA_SPEC_CLASS_NONE, 1, 0},
{"zbb", ISA_SPEC_CLASS_NONE, 1, 0},
@@ -432,6 +433,8 @@ static const struct riscv_ext_version riscv_ext_version_table[] =
{"zcmp", ISA_SPEC_CLASS_NONE, 1, 0},
{"zcmt", ISA_SPEC_CLASS_NONE, 1, 0},
+ {"sdtrig", ISA_SPEC_CLASS_NONE, 1, 0},
+
{"smaia", ISA_SPEC_CLASS_NONE, 1, 0},
{"smepmp", ISA_SPEC_CLASS_NONE, 1, 0},
{"smstateen", ISA_SPEC_CLASS_NONE, 1, 0},
@@ -440,7 +443,10 @@ static const struct riscv_ext_version riscv_ext_version_table[] =
{"sscofpmf", ISA_SPEC_CLASS_NONE, 1, 0},
{"ssstateen", ISA_SPEC_CLASS_NONE, 1, 0},
{"sstc", ISA_SPEC_CLASS_NONE, 1, 0},
+ {"ssstrict", ISA_SPEC_CLASS_NONE, 1, 0},
+ {"svade", ISA_SPEC_CLASS_NONE, 1, 0},
+ {"svadu", ISA_SPEC_CLASS_NONE, 1, 0},
{"svinval", ISA_SPEC_CLASS_NONE, 1, 0},
{"svnapot", ISA_SPEC_CLASS_NONE, 1, 0},
{"svpbmt", ISA_SPEC_CLASS_NONE, 1, 0},
@@ -1652,6 +1658,7 @@ static const riscv_ext_flag_table_t riscv_ext_flag_table[] =
RISCV_EXT_FLAG_ENTRY ("zalrsc", x_riscv_za_subext, MASK_ZALRSC),
RISCV_EXT_FLAG_ENTRY ("zabha", x_riscv_za_subext, MASK_ZABHA),
RISCV_EXT_FLAG_ENTRY ("zacas", x_riscv_za_subext, MASK_ZACAS),
+ RISCV_EXT_FLAG_ENTRY ("zama16b", x_riscv_za_subext, MASK_ZAMA16B),
RISCV_EXT_FLAG_ENTRY ("zba", x_riscv_zb_subext, MASK_ZBA),
RISCV_EXT_FLAG_ENTRY ("zbb", x_riscv_zb_subext, MASK_ZBB),
@@ -1764,9 +1771,11 @@ static const riscv_ext_flag_table_t riscv_ext_flag_table[] =
RISCV_EXT_FLAG_ENTRY ("zcmp", x_riscv_zc_subext, MASK_ZCMP),
RISCV_EXT_FLAG_ENTRY ("zcmt", x_riscv_zc_subext, MASK_ZCMT),
- RISCV_EXT_FLAG_ENTRY ("svinval", x_riscv_sv_subext, MASK_SVINVAL),
- RISCV_EXT_FLAG_ENTRY ("svnapot", x_riscv_sv_subext, MASK_SVNAPOT),
- RISCV_EXT_FLAG_ENTRY ("svvptc", x_riscv_sv_subext, MASK_SVVPTC),
+ RISCV_EXT_FLAG_ENTRY ("svade", x_riscv_sv_subext, MASK_SVADE),
+ RISCV_EXT_FLAG_ENTRY ("svadu", x_riscv_sv_subext, MASK_SVADU),
+ RISCV_EXT_FLAG_ENTRY ("svinval", x_riscv_sv_subext, MASK_SVINVAL),
+ RISCV_EXT_FLAG_ENTRY ("svnapot", x_riscv_sv_subext, MASK_SVNAPOT),
+ RISCV_EXT_FLAG_ENTRY ("svvptc", x_riscv_sv_subext, MASK_SVVPTC),
RISCV_EXT_FLAG_ENTRY ("ztso", x_riscv_ztso_subext, MASK_ZTSO),
diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h
index 1ca86c9..c935e7b 100644
--- a/gcc/config/aarch64/aarch64-protos.h
+++ b/gcc/config/aarch64/aarch64-protos.h
@@ -1026,6 +1026,8 @@ rtx aarch64_ptrue_reg (machine_mode, unsigned int);
rtx aarch64_ptrue_reg (machine_mode, machine_mode);
rtx aarch64_pfalse_reg (machine_mode);
bool aarch64_sve_same_pred_for_ptest_p (rtx *, rtx *);
+void aarch64_emit_load_store_through_mode (rtx, rtx, machine_mode);
+bool aarch64_expand_maskloadstore (rtx *, machine_mode);
void aarch64_emit_sve_pred_move (rtx, rtx, rtx);
void aarch64_expand_sve_mem_move (rtx, rtx, machine_mode);
bool aarch64_maybe_expand_sve_subreg_move (rtx, rtx);
@@ -1053,6 +1055,7 @@ void aarch64_subvti_scratch_regs (rtx, rtx, rtx *,
rtx *, rtx *, rtx *);
void aarch64_expand_subvti (rtx, rtx, rtx,
rtx, rtx, rtx, rtx, bool);
+int aarch64_exact_log2_inverse (unsigned int, rtx);
/* Initialize builtins for SIMD intrinsics. */
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
index e2afe87..1099e74 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -1193,12 +1193,14 @@
(define_insn "aarch64_simd_vec_set_zero<mode>"
[(set (match_operand:VALL_F16 0 "register_operand" "=w")
(vec_merge:VALL_F16
- (match_operand:VALL_F16 1 "aarch64_simd_imm_zero" "")
- (match_operand:VALL_F16 3 "register_operand" "0")
+ (match_operand:VALL_F16 1 "register_operand" "0")
+ (match_operand:VALL_F16 3 "aarch64_simd_imm_zero" "")
(match_operand:SI 2 "immediate_operand" "i")))]
- "TARGET_SIMD && exact_log2 (INTVAL (operands[2])) >= 0"
+ "TARGET_SIMD && aarch64_exact_log2_inverse (<nunits>, operands[2]) >= 0"
{
- int elt = ENDIAN_LANE_N (<nunits>, exact_log2 (INTVAL (operands[2])));
+ int elt = ENDIAN_LANE_N (<nunits>,
+ aarch64_exact_log2_inverse (<nunits>,
+ operands[2]));
operands[2] = GEN_INT ((HOST_WIDE_INT) 1 << elt);
return "ins\\t%0.<Vetype>[%p2], <vwcore>zr";
}
diff --git a/gcc/config/aarch64/aarch64-sve.md b/gcc/config/aarch64/aarch64-sve.md
index 7bf12ff..f39af6e 100644
--- a/gcc/config/aarch64/aarch64-sve.md
+++ b/gcc/config/aarch64/aarch64-sve.md
@@ -1286,7 +1286,24 @@
;; -------------------------------------------------------------------------
;; Predicated LD1 (single).
-(define_insn "maskload<mode><vpred>"
+(define_expand "maskload<mode><vpred>"
+ [(set (match_operand:SVE_ALL 0 "register_operand")
+ (unspec:SVE_ALL
+ [(match_operand:<VPRED> 2 "nonmemory_operand")
+ (match_operand:SVE_ALL 1 "memory_operand")
+ (match_operand:SVE_ALL 3 "aarch64_maskload_else_operand")]
+ UNSPEC_LD1_SVE))]
+ "TARGET_SVE"
+ {
+ if (aarch64_expand_maskloadstore (operands, <MODE>mode))
+ DONE;
+ if (CONSTANT_P (operands[2]))
+ operands[2] = force_reg (<VPRED>mode, operands[2]);
+ }
+)
+
+;; Predicated LD1 (single).
+(define_insn "*aarch64_maskload<mode><vpred>"
[(set (match_operand:SVE_ALL 0 "register_operand" "=w")
(unspec:SVE_ALL
[(match_operand:<VPRED> 2 "register_operand" "Upl")
@@ -2287,7 +2304,24 @@
;; -------------------------------------------------------------------------
;; Predicated ST1 (single).
-(define_insn "maskstore<mode><vpred>"
+(define_expand "maskstore<mode><vpred>"
+ [(set (match_operand:SVE_ALL 0 "memory_operand")
+ (unspec:SVE_ALL
+ [(match_operand:<VPRED> 2 "nonmemory_operand")
+ (match_operand:SVE_ALL 1 "register_operand")
+ (match_dup 0)]
+ UNSPEC_ST1_SVE))]
+ "TARGET_SVE"
+ {
+ if (aarch64_expand_maskloadstore (operands, <MODE>mode))
+ DONE;
+ if (CONSTANT_P (operands[2]))
+ operands[2] = force_reg (<VPRED>mode, operands[2]);
+ }
+)
+
+;; Predicated ST1 (single).
+(define_insn "*aarch64_maskstore<mode><vpred>"
[(set (match_operand:SVE_ALL 0 "memory_operand" "+m")
(unspec:SVE_ALL
[(match_operand:<VPRED> 2 "register_operand" "Upl")
diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index fff8d9d..9e3f288 100644
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -3667,6 +3667,14 @@ aarch64_partial_ptrue_length (rtx_vector_builder &builder,
if (builder.nelts_per_pattern () == 3)
return 0;
+ /* It is conservatively correct to drop the element size to a lower value,
+ and we must do so if the predicate consists of a leading "foreground"
+ sequence that is smaller than the element size. Without this,
+ we would test only one bit and so treat everything as either an
+ all-true or an all-false predicate. */
+ if (builder.nelts_per_pattern () == 2)
+ elt_size = MIN (elt_size, builder.npatterns ());
+
/* Skip over leading set bits. */
unsigned int nelts = builder.encoded_nelts ();
unsigned int i = 0;
@@ -3698,6 +3706,24 @@ aarch64_partial_ptrue_length (rtx_vector_builder &builder,
return vl;
}
+/* Return:
+
+ * -1 if all bits of PRED are set
+ * N if PRED has N leading set bits followed by all clear bits
+ * 0 if PRED does not have any of these forms. */
+
+int
+aarch64_partial_ptrue_length (rtx pred)
+{
+ rtx_vector_builder builder;
+ if (!aarch64_get_sve_pred_bits (builder, pred))
+ return 0;
+
+ auto elt_size = vector_element_size (GET_MODE_BITSIZE (GET_MODE (pred)),
+ GET_MODE_NUNITS (GET_MODE (pred)));
+ return aarch64_partial_ptrue_length (builder, elt_size);
+}
+
/* See if there is an svpattern that encodes an SVE predicate of mode
PRED_MODE in which the first VL bits are set and the rest are clear.
Return the pattern if so, otherwise return AARCH64_NUM_SVPATTERNS.
@@ -6410,8 +6436,32 @@ aarch64_stack_protect_canary_mem (machine_mode mode, rtx decl_rtl,
return gen_rtx_MEM (mode, force_reg (Pmode, addr));
}
-/* Emit an SVE predicated move from SRC to DEST. PRED is a predicate
- that is known to contain PTRUE. */
+/* Emit a load/store from a subreg of SRC to a subreg of DEST.
+ The subregs have mode NEW_MODE. Use only for reg<->mem moves. */
+void
+aarch64_emit_load_store_through_mode (rtx dest, rtx src, machine_mode new_mode)
+{
+ gcc_assert ((MEM_P (dest) && register_operand (src, VOIDmode))
+ || (MEM_P (src) && register_operand (dest, VOIDmode)));
+ auto mode = GET_MODE (dest);
+ auto int_mode = aarch64_sve_int_mode (mode);
+ if (MEM_P (src))
+ {
+ rtx tmp = force_reg (new_mode, adjust_address (src, new_mode, 0));
+ tmp = force_lowpart_subreg (int_mode, tmp, new_mode);
+ emit_move_insn (dest, force_lowpart_subreg (mode, tmp, int_mode));
+ }
+ else
+ {
+ src = force_lowpart_subreg (int_mode, src, mode);
+ emit_move_insn (adjust_address (dest, new_mode, 0),
+ force_lowpart_subreg (new_mode, src, int_mode));
+ }
+}
+
+/* PRED is a predicate that is known to contain PTRUE.
+ For 128-bit VLS loads/stores, emit LDR/STR.
+ Else, emit an SVE predicated move from SRC to DEST. */
void
aarch64_emit_sve_pred_move (rtx dest, rtx pred, rtx src)
@@ -6421,16 +6471,7 @@ aarch64_emit_sve_pred_move (rtx dest, rtx pred, rtx src)
&& known_eq (GET_MODE_SIZE (mode), 16)
&& aarch64_classify_vector_mode (mode) == VEC_SVE_DATA
&& !BYTES_BIG_ENDIAN)
- {
- if (MEM_P (src))
- {
- rtx tmp = force_reg (V16QImode, adjust_address (src, V16QImode, 0));
- emit_move_insn (dest, lowpart_subreg (mode, tmp, V16QImode));
- }
- else
- emit_move_insn (adjust_address (dest, V16QImode, 0),
- force_lowpart_subreg (V16QImode, src, mode));
- }
+ aarch64_emit_load_store_through_mode (dest, src, V16QImode);
else
{
expand_operand ops[3];
@@ -23526,6 +23567,39 @@ aarch64_simd_valid_imm (rtx op, simd_immediate_info *info,
return false;
}
+/* Try to optimize the expansion of a maskload or maskstore with
+ the operands in OPERANDS, given that the vector being loaded or
+ stored has mode MODE. Return true on success or false if the normal
+ expansion should be used. */
+
+bool
+aarch64_expand_maskloadstore (rtx *operands, machine_mode mode)
+{
+ /* If the predicate in operands[2] is a patterned SVE PTRUE predicate
+ with patterns VL1, VL2, VL4, VL8, or VL16 and at most the bottom
+ 128 bits are loaded/stored, emit an ASIMD load/store. */
+ int vl = aarch64_partial_ptrue_length (operands[2]);
+ int width = vl * GET_MODE_UNIT_BITSIZE (mode);
+ if (width <= 128
+ && pow2p_hwi (vl)
+ && (vl == 1
+ || (!BYTES_BIG_ENDIAN
+ && aarch64_classify_vector_mode (mode) == VEC_SVE_DATA)))
+ {
+ machine_mode new_mode;
+ if (known_eq (width, 128))
+ new_mode = V16QImode;
+ else if (known_eq (width, 64))
+ new_mode = V8QImode;
+ else
+ new_mode = int_mode_for_size (width, 0).require ();
+ aarch64_emit_load_store_through_mode (operands[0], operands[1],
+ new_mode);
+ return true;
+ }
+ return false;
+}
+
/* Return true if OP is a valid SIMD move immediate for SVE or AdvSIMD. */
bool
aarch64_simd_valid_mov_imm (rtx op)
@@ -23840,6 +23914,16 @@ aarch64_strided_registers_p (rtx *operands, unsigned int num_operands,
return true;
}
+/* Return the base 2 logarithm of the bit inverse of OP masked by the lowest
+ NELTS bits, if OP is a power of 2. Otherwise, returns -1. */
+
+int
+aarch64_exact_log2_inverse (unsigned int nelts, rtx op)
+{
+ return exact_log2 ((~INTVAL (op))
+ & ((HOST_WIDE_INT_1U << nelts) - 1));
+}
+
/* Bounds-check lanes. Ensure OPERAND lies between LOW (inclusive) and
HIGH (exclusive). */
void
diff --git a/gcc/config/arm/arm.cc b/gcc/config/arm/arm.cc
index 670f487..6bdb68a 100644
--- a/gcc/config/arm/arm.cc
+++ b/gcc/config/arm/arm.cc
@@ -16211,14 +16211,16 @@ arm_select_cc_mode (enum rtx_code op, rtx x, rtx y)
case UNGT:
case UNGE:
case UNEQ:
- case LTGT:
return CCFPmode;
case LT:
case LE:
case GT:
case GE:
- return CCFPEmode;
+ case LTGT:
+ return (flag_finite_math_only
+ ? CCFPmode
+ : CCFPEmode);
default:
gcc_unreachable ();
diff --git a/gcc/config/arm/arm.h b/gcc/config/arm/arm.h
index 8472b75..08d3f0d 100644
--- a/gcc/config/arm/arm.h
+++ b/gcc/config/arm/arm.h
@@ -2257,7 +2257,11 @@ extern int making_const_table;
#define SELECT_CC_MODE(OP, X, Y) arm_select_cc_mode (OP, X, Y)
-#define REVERSIBLE_CC_MODE(MODE) 1
+/* Floating-point modes cannot be reversed unless we don't care about
+ NaNs. */
+#define REVERSIBLE_CC_MODE(MODE) \
+ (flag_finite_math_only \
+ || !((MODE) == CCFPmode || (MODE) == CCFPEmode))
#define REVERSE_CONDITION(CODE,MODE) \
(((MODE) == CCFPmode || (MODE) == CCFPEmode) \
diff --git a/gcc/config/i386/i386-features.cc b/gcc/config/i386/i386-features.cc
index 31f3ee2..1ba5ac4 100644
--- a/gcc/config/i386/i386-features.cc
+++ b/gcc/config/i386/i386-features.cc
@@ -3309,8 +3309,16 @@ ix86_get_vector_load_mode (unsigned int size)
mode = V64QImode;
else if (size == 32)
mode = V32QImode;
- else
+ else if (size == 16)
mode = V16QImode;
+ else if (size == 8)
+ mode = V8QImode;
+ else if (size == 4)
+ mode = V4QImode;
+ else if (size == 2)
+ mode = V2QImode;
+ else
+ gcc_unreachable ();
return mode;
}
@@ -3338,13 +3346,36 @@ replace_vector_const (machine_mode vector_mode, rtx vector_const,
if (SUBREG_P (dest) || mode == vector_mode)
replace = vector_const;
else
- replace = gen_rtx_SUBREG (mode, vector_const, 0);
+ {
+ unsigned int size = GET_MODE_SIZE (mode);
+ if (size < ix86_regmode_natural_size (mode))
+ {
+ /* If the mode size is smaller than its natural size,
+ first insert an extra move with a QI vector SUBREG
+ of the same size to avoid validate_subreg failure. */
+ machine_mode vmode = ix86_get_vector_load_mode (size);
+ rtx vreg;
+ if (mode == vmode)
+ vreg = vector_const;
+ else
+ {
+ vreg = gen_reg_rtx (vmode);
+ rtx vsubreg = gen_rtx_SUBREG (vmode, vector_const, 0);
+ rtx pat = gen_rtx_SET (vreg, vsubreg);
+ rtx_insn *vinsn = emit_insn_before (pat, insn);
+ df_insn_rescan (vinsn);
+ }
+ replace = gen_rtx_SUBREG (mode, vreg, 0);
+ }
+ else
+ replace = gen_rtx_SUBREG (mode, vector_const, 0);
+ }
- /* NB: Don't run recog_memoized here since vector SUBREG may not
- be valid. Let LRA handle vector SUBREG. */
SET_SRC (set) = replace;
/* Drop possible dead definitions. */
PATTERN (insn) = set;
+ INSN_CODE (insn) = -1;
+ recog_memoized (insn);
df_insn_rescan (insn);
}
}
diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
index 89f518c..fd36ea8 100644
--- a/gcc/config/i386/i386.cc
+++ b/gcc/config/i386/i386.cc
@@ -22794,6 +22794,27 @@ ix86_rtx_costs (rtx x, machine_mode mode, int outer_code_i, int opno,
else
*total = vec_fp_conversion_cost (cost, GET_MODE_BITSIZE (mode));
return false;
+ case FLOAT:
+ case UNSIGNED_FLOAT:
+ if (!SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode))
+ /* TODO: We do not have cost tables for x87. */
+ *total = cost->fadd;
+ else if (VECTOR_MODE_P (mode))
+ *total = ix86_vec_cost (mode, cost->cvtpi2ps);
+ else
+ *total = cost->cvtsi2ss;
+ return false;
+
+ case FIX:
+ case UNSIGNED_FIX:
+ if (!SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode))
+ /* TODO: We do not have cost tables for x87. */
+ *total = cost->fadd;
+ else if (VECTOR_MODE_P (mode))
+ *total = ix86_vec_cost (mode, cost->cvtps2pi);
+ else
+ *total = cost->cvtss2si;
+ return false;
case ABS:
/* SSE requires memory load for the constant operand. It may make
@@ -25746,6 +25767,26 @@ ix86_vector_costs::add_stmt_cost (int count, vect_cost_for_stmt kind,
(ix86_tune_cost, GET_MODE_BITSIZE (mode));
break;
+ case FLOAT_EXPR:
+ if (SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode))
+ stmt_cost = ix86_cost->cvtsi2ss;
+ else if (X87_FLOAT_MODE_P (mode))
+ /* TODO: We do not have cost tables for x87. */
+ stmt_cost = ix86_cost->fadd;
+ else
+ stmt_cost = ix86_vec_cost (mode, ix86_cost->cvtpi2ps);
+ break;
+
+ case FIX_TRUNC_EXPR:
+ if (SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode))
+ stmt_cost = ix86_cost->cvtss2si;
+ else if (X87_FLOAT_MODE_P (mode))
+ /* TODO: We do not have cost tables for x87. */
+ stmt_cost = ix86_cost->fadd;
+ else
+ stmt_cost = ix86_vec_cost (mode, ix86_cost->cvtps2pi);
+ break;
+
case COND_EXPR:
{
/* SSE2 conditinal move sequence is:
@@ -25909,8 +25950,7 @@ ix86_vector_costs::add_stmt_cost (int count, vect_cost_for_stmt kind,
break;
}
- if (kind == vec_promote_demote
- && fp && FLOAT_TYPE_P (TREE_TYPE (gimple_assign_rhs1 (stmt_info->stmt))))
+ if (kind == vec_promote_demote)
{
int outer_size
= tree_to_uhwi
@@ -25920,16 +25960,25 @@ ix86_vector_costs::add_stmt_cost (int count, vect_cost_for_stmt kind,
= tree_to_uhwi
(TYPE_SIZE
(TREE_TYPE (gimple_assign_rhs1 (stmt_info->stmt))));
- int stmt_cost = vec_fp_conversion_cost
- (ix86_tune_cost, GET_MODE_BITSIZE (mode));
- /* VEC_PACK_TRUNC_EXPR: If inner size is greater than outer size we will end
- up doing two conversions and packing them. */
+ bool inner_fp = FLOAT_TYPE_P
+ (TREE_TYPE (gimple_assign_rhs1 (stmt_info->stmt)));
+
+ if (fp && inner_fp)
+ stmt_cost = vec_fp_conversion_cost
+ (ix86_tune_cost, GET_MODE_BITSIZE (mode));
+ else if (fp && !inner_fp)
+ stmt_cost = ix86_vec_cost (mode, ix86_cost->cvtpi2ps);
+ else if (!fp && inner_fp)
+ stmt_cost = ix86_vec_cost (mode, ix86_cost->cvtps2pi);
+ else
+ stmt_cost = ix86_vec_cost (mode, ix86_cost->sse_op);
+ /* VEC_PACK_TRUNC_EXPR and similar demote operations: If outer size is
+ greater than inner size we will end up doing two conversions and
+ packing them. We always pack pairs; if the size difference is greater
+ it is split into multiple demote operations. */
if (inner_size > outer_size)
- {
- int n = inner_size / outer_size;
- stmt_cost = stmt_cost * n
- + (n - 1) * ix86_vec_cost (mode, ix86_cost->sse_op);
- }
+ stmt_cost = stmt_cost * 2
+ + ix86_vec_cost (mode, ix86_cost->sse_op);
}
/* If we do elementwise loads into a vector then we are bound by
diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
index 02bf357..6a38de3 100644
--- a/gcc/config/i386/i386.h
+++ b/gcc/config/i386/i386.h
@@ -213,6 +213,10 @@ struct processor_costs {
such as VCVTPD2PS with larger reg in ymm. */
const int vcvtps2pd512; /* cost 512bit packed FP conversions,
such as VCVTPD2PS with larger reg in zmm. */
+ const int cvtsi2ss; /* cost of CVTSI2SS instruction. */
+ const int cvtss2si; /* cost of CVT(T)SS2SI instruction. */
+ const int cvtpi2ps; /* cost of CVTPI2PS instruction. */
+ const int cvtps2pi; /* cost of CVT(T)PS2PI instruction. */
const int reassoc_int, reassoc_fp, reassoc_vec_int, reassoc_vec_fp;
/* Specify reassociation width for integer,
fp, vector integer and vector fp
diff --git a/gcc/config/i386/x86-tune-costs.h b/gcc/config/i386/x86-tune-costs.h
index cddcf61..6cce70a 100644
--- a/gcc/config/i386/x86-tune-costs.h
+++ b/gcc/config/i386/x86-tune-costs.h
@@ -134,6 +134,11 @@ struct processor_costs ix86_size_cost = {/* costs for tuning for size */
COSTS_N_BYTES (4), /* cost of CVTSS2SD etc. */
COSTS_N_BYTES (4), /* cost of 256bit VCVTPS2PD etc. */
COSTS_N_BYTES (6), /* cost of 512bit VCVTPS2PD etc. */
+ COSTS_N_BYTES (4), /* cost of CVTSI2SS instruction. */
+ COSTS_N_BYTES (4), /* cost of CVT(T)SS2SI instruction. */
+ COSTS_N_BYTES (4), /* cost of CVTPI2PS instruction. */
+ COSTS_N_BYTES (4), /* cost of CVT(T)PS2PI instruction. */
+
1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
ix86_size_memcpy,
ix86_size_memset,
@@ -249,6 +254,10 @@ struct processor_costs i386_cost = { /* 386 specific costs */
COSTS_N_INSNS (27), /* cost of CVTSS2SD etc. */
COSTS_N_INSNS (54), /* cost of 256bit VCVTPS2PD etc. */
COSTS_N_INSNS (108), /* cost of 512bit VCVTPS2PD etc. */
+ COSTS_N_INSNS (27), /* cost of CVTSI2SS instruction. */
+ COSTS_N_INSNS (27), /* cost of CVT(T)SS2SI instruction. */
+ COSTS_N_INSNS (27), /* cost of CVTPI2PS instruction. */
+ COSTS_N_INSNS (27), /* cost of CVT(T)PS2PI instruction. */
1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
i386_memcpy,
i386_memset,
@@ -365,6 +374,10 @@ struct processor_costs i486_cost = { /* 486 specific costs */
COSTS_N_INSNS (8), /* cost of CVTSS2SD etc. */
COSTS_N_INSNS (16), /* cost of 256bit VCVTPS2PD etc. */
COSTS_N_INSNS (32), /* cost of 512bit VCVTPS2PD etc. */
+ COSTS_N_INSNS (27), /* cost of CVTSI2SS instruction. */
+ COSTS_N_INSNS (27), /* cost of CVT(T)SS2SI instruction. */
+ COSTS_N_INSNS (27), /* cost of CVTPI2PS instruction. */
+ COSTS_N_INSNS (27), /* cost of CVT(T)PS2PI instruction. */
1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
i486_memcpy,
i486_memset,
@@ -479,6 +492,10 @@ struct processor_costs pentium_cost = {
COSTS_N_INSNS (3), /* cost of CVTSS2SD etc. */
COSTS_N_INSNS (6), /* cost of 256bit VCVTPS2PD etc. */
COSTS_N_INSNS (12), /* cost of 512bit VCVTPS2PD etc. */
+ COSTS_N_INSNS (3), /* cost of CVTSI2SS instruction. */
+ COSTS_N_INSNS (3), /* cost of CVT(T)SS2SI instruction. */
+ COSTS_N_INSNS (3), /* cost of CVTPI2PS instruction. */
+ COSTS_N_INSNS (3), /* cost of CVT(T)PS2PI instruction. */
1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
pentium_memcpy,
pentium_memset,
@@ -586,6 +603,10 @@ struct processor_costs lakemont_cost = {
COSTS_N_INSNS (5), /* cost of CVTSS2SD etc. */
COSTS_N_INSNS (10), /* cost of 256bit VCVTPS2PD etc. */
COSTS_N_INSNS (20), /* cost of 512bit VCVTPS2PD etc. */
+ COSTS_N_INSNS (5), /* cost of CVTSI2SS instruction. */
+ COSTS_N_INSNS (5), /* cost of CVT(T)SS2SI instruction. */
+ COSTS_N_INSNS (5), /* cost of CVTPI2PS instruction. */
+ COSTS_N_INSNS (5), /* cost of CVT(T)PS2PI instruction. */
1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
pentium_memcpy,
pentium_memset,
@@ -708,6 +729,10 @@ struct processor_costs pentiumpro_cost = {
COSTS_N_INSNS (3), /* cost of CVTSS2SD etc. */
COSTS_N_INSNS (6), /* cost of 256bit VCVTPS2PD etc. */
COSTS_N_INSNS (12), /* cost of 512bit VCVTPS2PD etc. */
+ COSTS_N_INSNS (3), /* cost of CVTSI2SS instruction. */
+ COSTS_N_INSNS (3), /* cost of CVT(T)SS2SI instruction. */
+ COSTS_N_INSNS (3), /* cost of CVTPI2PS instruction. */
+ COSTS_N_INSNS (3), /* cost of CVT(T)PS2PI instruction. */
1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
pentiumpro_memcpy,
pentiumpro_memset,
@@ -821,6 +846,10 @@ struct processor_costs geode_cost = {
COSTS_N_INSNS (6), /* cost of CVTSS2SD etc. */
COSTS_N_INSNS (12), /* cost of 256bit VCVTPS2PD etc. */
COSTS_N_INSNS (24), /* cost of 512bit VCVTPS2PD etc. */
+ COSTS_N_INSNS (6), /* cost of CVTSI2SS instruction. */
+ COSTS_N_INSNS (6), /* cost of CVT(T)SS2SI instruction. */
+ COSTS_N_INSNS (6), /* cost of CVTPI2PS instruction. */
+ COSTS_N_INSNS (6), /* cost of CVT(T)PS2PI instruction. */
1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
geode_memcpy,
geode_memset,
@@ -937,6 +966,10 @@ struct processor_costs k6_cost = {
COSTS_N_INSNS (2), /* cost of CVTSS2SD etc. */
COSTS_N_INSNS (4), /* cost of 256bit VCVTPS2PD etc. */
COSTS_N_INSNS (8), /* cost of 512bit VCVTPS2PD etc. */
+ COSTS_N_INSNS (2), /* cost of CVTSI2SS instruction. */
+ COSTS_N_INSNS (2), /* cost of CVT(T)SS2SI instruction. */
+ COSTS_N_INSNS (2), /* cost of CVTPI2PS instruction. */
+ COSTS_N_INSNS (2), /* cost of CVT(T)PS2PI instruction. */
1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
k6_memcpy,
k6_memset,
@@ -1054,6 +1087,10 @@ struct processor_costs athlon_cost = {
COSTS_N_INSNS (4), /* cost of CVTSS2SD etc. */
COSTS_N_INSNS (8), /* cost of 256bit VCVTPS2PD etc. */
COSTS_N_INSNS (16), /* cost of 512bit VCVTPS2PD etc. */
+ COSTS_N_INSNS (4), /* cost of CVTSI2SS instruction. */
+ COSTS_N_INSNS (6), /* cost of CVT(T)SS2SI instruction. */
+ COSTS_N_INSNS (4), /* cost of CVTPI2PS instruction. */
+ COSTS_N_INSNS (6), /* cost of CVT(T)PS2PI instruction. */
1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
athlon_memcpy,
athlon_memset,
@@ -1180,6 +1217,10 @@ struct processor_costs k8_cost = {
COSTS_N_INSNS (4), /* cost of CVTSS2SD etc. */
COSTS_N_INSNS (8), /* cost of 256bit VCVTPS2PD etc. */
COSTS_N_INSNS (16), /* cost of 512bit VCVTPS2PD etc. */
+ COSTS_N_INSNS (14), /* cost of CVTSI2SS instruction. */
+ COSTS_N_INSNS (10), /* cost of CVT(T)SS2SI instruction. */
+ COSTS_N_INSNS (4), /* cost of CVTPI2PS instruction. */
+ COSTS_N_INSNS (5), /* cost of CVT(T)PS2PI instruction. */
1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
k8_memcpy,
k8_memset,
@@ -1314,6 +1355,10 @@ struct processor_costs amdfam10_cost = {
COSTS_N_INSNS (4), /* cost of CVTSS2SD etc. */
COSTS_N_INSNS (8), /* cost of 256bit VCVTPS2PD etc. */
COSTS_N_INSNS (16), /* cost of 512bit VCVTPS2PD etc. */
+ COSTS_N_INSNS (14), /* cost of CVTSI2SS instruction. */
+ COSTS_N_INSNS (8), /* cost of CVT(T)SS2SI instruction. */
+ COSTS_N_INSNS (7), /* cost of CVTPI2PS instruction. */
+ COSTS_N_INSNS (4), /* cost of CVT(T)PS2PI instruction. */
1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
amdfam10_memcpy,
amdfam10_memset,
@@ -1441,6 +1486,10 @@ const struct processor_costs bdver_cost = {
COSTS_N_INSNS (4), /* cost of CVTSS2SD etc. */
COSTS_N_INSNS (7), /* cost of 256bit VCVTPS2PD etc. */
COSTS_N_INSNS (14), /* cost of 512bit VCVTPS2PD etc. */
+ COSTS_N_INSNS (14), /* cost of CVTSI2SS instruction. */
+ COSTS_N_INSNS (13), /* cost of CVT(T)SS2SI instruction. */
+ COSTS_N_INSNS (4), /* cost of CVTPI2PS instruction. */
+ COSTS_N_INSNS (4), /* cost of CVT(T)PS2PI instruction. */
1, 2, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
bdver_memcpy,
bdver_memset,
@@ -1593,6 +1642,10 @@ struct processor_costs znver1_cost = {
/* Real latency is 4, but for split regs multiply cost of half op by 2. */
COSTS_N_INSNS (6), /* cost of 256bit VCVTPS2PD etc. */
COSTS_N_INSNS (12), /* cost of 512bit VCVTPS2PD etc. */
+ COSTS_N_INSNS (8), /* cost of CVTSI2SS instruction. */
+ COSTS_N_INSNS (7), /* cost of CVT(T)SS2SI instruction. */
+ COSTS_N_INSNS (6), /* cost of CVTPI2PS instruction. */
+ COSTS_N_INSNS (6), /* cost of CVT(T)PS2PI instruction. */
/* Zen can execute 4 integer operations per cycle. FP operations take 3 cycles
and it can execute 2 integer additions and 2 multiplications thus
reassociation may make sense up to with of 6. SPEC2k6 bencharks suggests
@@ -1755,6 +1808,10 @@ struct processor_costs znver2_cost = {
COSTS_N_INSNS (3), /* cost of CVTSS2SD etc. */
COSTS_N_INSNS (5), /* cost of 256bit VCVTPS2PD etc. */
COSTS_N_INSNS (10), /* cost of 512bit VCVTPS2PD etc. */
+ COSTS_N_INSNS (7), /* cost of CVTSI2SS instruction. */
+ COSTS_N_INSNS (6), /* cost of CVT(T)SS2SI instruction. */
+ COSTS_N_INSNS (6), /* cost of CVTPI2PS instruction. */
+ COSTS_N_INSNS (3), /* cost of CVT(T)PS2PI instruction. */
/* Zen can execute 4 integer operations per cycle. FP operations
take 3 cycles and it can execute 2 integer additions and 2
multiplications thus reassociation may make sense up to with of 6.
@@ -1893,6 +1950,10 @@ struct processor_costs znver3_cost = {
COSTS_N_INSNS (3), /* cost of CVTSS2SD etc. */
COSTS_N_INSNS (5), /* cost of 256bit VCVTPS2PD etc. */
COSTS_N_INSNS (10), /* cost of 512bit VCVTPS2PD etc. */
+ COSTS_N_INSNS (6), /* cost of CVTSI2SS instruction. */
+ COSTS_N_INSNS (6), /* cost of CVT(T)SS2SI instruction. */
+ COSTS_N_INSNS (3), /* cost of CVTPI2PS instruction. */
+ COSTS_N_INSNS (4), /* cost of CVT(T)PS2PI instruction. */
/* Zen can execute 4 integer operations per cycle. FP operations
take 3 cycles and it can execute 2 integer additions and 2
multiplications thus reassociation may make sense up to with of 6.
@@ -2034,6 +2095,10 @@ struct processor_costs znver4_cost = {
COSTS_N_INSNS (5), /* cost of 256bit VCVTPS2PD etc. */
/* Real latency is 6, but for split regs multiply cost of half op by 2. */
COSTS_N_INSNS (10), /* cost of 512bit VCVTPS2PD etc. */
+ COSTS_N_INSNS (6), /* cost of CVTSI2SS instruction. */
+ COSTS_N_INSNS (6), /* cost of CVT(T)SS2SI instruction. */
+ COSTS_N_INSNS (3), /* cost of CVTPI2PS instruction. */
+ COSTS_N_INSNS (4), /* cost of CVT(T)PS2PI instruction. */
/* Zen can execute 4 integer operations per cycle. FP operations
take 3 cycles and it can execute 2 integer additions and 2
multiplications thus reassociation may make sense up to with of 6.
@@ -2188,6 +2253,10 @@ struct processor_costs znver5_cost = {
COSTS_N_INSNS (3), /* cost of CVTSS2SD etc. */
COSTS_N_INSNS (5), /* cost of 256bit VCVTPS2PD etc. */
COSTS_N_INSNS (5), /* cost of 512bit VCVTPS2PD etc. */
+ COSTS_N_INSNS (6), /* cost of CVTSI2SS instruction. */
+ COSTS_N_INSNS (6), /* cost of CVT(T)SS2SI instruction. */
+ COSTS_N_INSNS (3), /* cost of CVTPI2PS instruction. */
+ COSTS_N_INSNS (4), /* cost of CVT(T)PS2PI instruction. */
/* Zen5 can execute:
- integer ops: 6 per cycle, at most 3 multiplications.
latency 1 for additions, 3 for multiplications (pipelined)
@@ -2330,6 +2399,10 @@ struct processor_costs skylake_cost = {
COSTS_N_INSNS (2), /* cost of CVTSS2SD etc. */
COSTS_N_INSNS (2), /* cost of 256bit VCVTPS2PD etc. */
COSTS_N_INSNS (4), /* cost of 512bit VCVTPS2PD etc. */
+ COSTS_N_INSNS (6), /* cost of CVTSI2SS instruction. */
+ COSTS_N_INSNS (6), /* cost of CVT(T)SS2SI instruction. */
+ COSTS_N_INSNS (6), /* cost of CVTPI2PS instruction. */
+ COSTS_N_INSNS (7), /* cost of CVT(T)PS2PI instruction. */
1, 4, 2, 2, /* reassoc int, fp, vec_int, vec_fp. */
skylake_memcpy,
skylake_memset,
@@ -2462,6 +2535,10 @@ struct processor_costs icelake_cost = {
COSTS_N_INSNS (2), /* cost of CVTSS2SD etc. */
COSTS_N_INSNS (2), /* cost of 256bit VCVTPS2PD etc. */
COSTS_N_INSNS (2), /* cost of 512bit VCVTPS2PD etc. */
+ COSTS_N_INSNS (6), /* cost of CVTSI2SS instruction. */
+ COSTS_N_INSNS (6), /* cost of CVT(T)SS2SI instruction. */
+ COSTS_N_INSNS (7), /* cost of CVTPI2PS instruction. */
+ COSTS_N_INSNS (6), /* cost of CVT(T)PS2PI instruction. */
1, 4, 2, 2, /* reassoc int, fp, vec_int, vec_fp. */
icelake_memcpy,
icelake_memset,
@@ -2588,6 +2665,10 @@ struct processor_costs alderlake_cost = {
COSTS_N_INSNS (2), /* cost of CVTSS2SD etc. */
COSTS_N_INSNS (2), /* cost of 256bit VCVTPS2PD etc. */
COSTS_N_INSNS (2), /* cost of 512bit VCVTPS2PD etc. */
+ COSTS_N_INSNS (6), /* cost of CVTSI2SS instruction. */
+ COSTS_N_INSNS (6), /* cost of CVT(T)SS2SI instruction. */
+ COSTS_N_INSNS (7), /* cost of CVTPI2PS instruction. */
+ COSTS_N_INSNS (6), /* cost of CVT(T)PS2PI instruction. */
1, 4, 3, 3, /* reassoc int, fp, vec_int, vec_fp. */
alderlake_memcpy,
alderlake_memset,
@@ -2707,6 +2788,10 @@ const struct processor_costs btver1_cost = {
COSTS_N_INSNS (4), /* cost of CVTSS2SD etc. */
COSTS_N_INSNS (7), /* cost of 256bit VCVTPS2PD etc. */
COSTS_N_INSNS (14), /* cost of 512bit VCVTPS2PD etc. */
+ COSTS_N_INSNS (14), /* cost of CVTSI2SS instruction. */
+ COSTS_N_INSNS (13), /* cost of CVT(T)SS2SI instruction. */
+ COSTS_N_INSNS (4), /* cost of CVTPI2PS instruction. */
+ COSTS_N_INSNS (4), /* cost of CVT(T)PS2PI instruction. */
1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
btver1_memcpy,
btver1_memset,
@@ -2823,6 +2908,10 @@ const struct processor_costs btver2_cost = {
COSTS_N_INSNS (4), /* cost of CVTSS2SD etc. */
COSTS_N_INSNS (7), /* cost of 256bit VCVTPS2PD etc. */
COSTS_N_INSNS (14), /* cost of 512bit VCVTPS2PD etc. */
+ COSTS_N_INSNS (14), /* cost of CVTSI2SS instruction. */
+ COSTS_N_INSNS (13), /* cost of CVT(T)SS2SI instruction. */
+ COSTS_N_INSNS (4), /* cost of CVTPI2PS instruction. */
+ COSTS_N_INSNS (4), /* cost of CVT(T)PS2PI instruction. */
1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
btver2_memcpy,
btver2_memset,
@@ -2938,6 +3027,10 @@ struct processor_costs pentium4_cost = {
COSTS_N_INSNS (10), /* cost of CVTSS2SD etc. */
COSTS_N_INSNS (20), /* cost of 256bit VCVTPS2PD etc. */
COSTS_N_INSNS (40), /* cost of 512bit VCVTPS2PD etc. */
+ COSTS_N_INSNS (20), /* cost of CVTSI2SS instruction. */
+ COSTS_N_INSNS (17), /* cost of CVT(T)SS2SI instruction. */
+ COSTS_N_INSNS (12), /* cost of CVTPI2PS instruction. */
+ COSTS_N_INSNS (8), /* cost of CVT(T)PS2PI instruction. */
1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
pentium4_memcpy,
pentium4_memset,
@@ -3056,6 +3149,10 @@ struct processor_costs nocona_cost = {
COSTS_N_INSNS (10), /* cost of CVTSS2SD etc. */
COSTS_N_INSNS (20), /* cost of 256bit VCVTPS2PD etc. */
COSTS_N_INSNS (40), /* cost of 512bit VCVTPS2PD etc. */
+ COSTS_N_INSNS (20), /* cost of CVTSI2SS instruction. */
+ COSTS_N_INSNS (17), /* cost of CVT(T)SS2SI instruction. */
+ COSTS_N_INSNS (12), /* cost of CVTPI2PS instruction. */
+ COSTS_N_INSNS (8), /* cost of CVT(T)PS2PI instruction. */
1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
nocona_memcpy,
nocona_memset,
@@ -3172,6 +3269,10 @@ struct processor_costs atom_cost = {
COSTS_N_INSNS (6), /* cost of CVTSS2SD etc. */
COSTS_N_INSNS (12), /* cost of 256bit VCVTPS2PD etc. */
COSTS_N_INSNS (24), /* cost of 512bit VCVTPS2PD etc. */
+ COSTS_N_INSNS (7), /* cost of CVTSI2SS instruction. */
+ COSTS_N_INSNS (10), /* cost of CVT(T)SS2SI instruction. */
+ COSTS_N_INSNS (6), /* cost of CVTPI2PS instruction. */
+ COSTS_N_INSNS (4), /* cost of CVT(T)PS2PI instruction. */
2, 2, 2, 2, /* reassoc int, fp, vec_int, vec_fp. */
atom_memcpy,
atom_memset,
@@ -3288,6 +3389,10 @@ struct processor_costs slm_cost = {
COSTS_N_INSNS (3), /* cost of CVTSS2SD etc. */
COSTS_N_INSNS (6), /* cost of 256bit VCVTPS2PD etc. */
COSTS_N_INSNS (12), /* cost of 512bit VCVTPS2PD etc. */
+ COSTS_N_INSNS (5), /* cost of CVTSI2SS instruction. */
+ COSTS_N_INSNS (5), /* cost of CVT(T)SS2SI instruction. */
+ COSTS_N_INSNS (4), /* cost of CVTPI2PS instruction. */
+ COSTS_N_INSNS (4), /* cost of CVT(T)PS2PI instruction. */
1, 2, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
slm_memcpy,
slm_memset,
@@ -3418,6 +3523,10 @@ struct processor_costs tremont_cost = {
COSTS_N_INSNS (3), /* cost of CVTSS2SD etc. */
COSTS_N_INSNS (6), /* cost of 256bit VCVTPS2PD etc. */
COSTS_N_INSNS (12), /* cost of 512bit VCVTPS2PD etc. */
+ COSTS_N_INSNS (6), /* cost of CVTSI2SS instruction. */
+ COSTS_N_INSNS (6), /* cost of CVT(T)SS2SI instruction. */
+ COSTS_N_INSNS (4), /* cost of CVTPI2PS instruction. */
+ COSTS_N_INSNS (4), /* cost of CVT(T)PS2PI instruction. */
1, 4, 3, 3, /* reassoc int, fp, vec_int, vec_fp. */
tremont_memcpy,
tremont_memset,
@@ -3534,6 +3643,10 @@ struct processor_costs intel_cost = {
COSTS_N_INSNS (8), /* cost of CVTSS2SD etc. */
COSTS_N_INSNS (16), /* cost of 256bit VCVTPS2PD etc. */
COSTS_N_INSNS (32), /* cost of 512bit VCVTPS2PD etc. */
+ COSTS_N_INSNS (8), /* cost of CVTSI2SS instruction. */
+ COSTS_N_INSNS (8), /* cost of CVT(T)SS2SI instruction. */
+ COSTS_N_INSNS (8), /* cost of CVTPI2PS instruction. */
+ COSTS_N_INSNS (8), /* cost of CVT(T)PS2PI instruction. */
1, 4, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */
intel_memcpy,
intel_memset,
@@ -3655,6 +3768,10 @@ struct processor_costs lujiazui_cost = {
COSTS_N_INSNS (3), /* cost of CVTSS2SD etc. */
COSTS_N_INSNS (6), /* cost of 256bit VCVTPS2PD etc. */
COSTS_N_INSNS (12), /* cost of 512bit VCVTPS2PD etc. */
+ COSTS_N_INSNS (3), /* cost of CVTSI2SS instruction. */
+ COSTS_N_INSNS (3), /* cost of CVT(T)SS2SI instruction. */
+ COSTS_N_INSNS (3), /* cost of CVTPI2PS instruction. */
+ COSTS_N_INSNS (3), /* cost of CVT(T)PS2PI instruction. */
1, 4, 3, 3, /* reassoc int, fp, vec_int, vec_fp. */
lujiazui_memcpy,
lujiazui_memset,
@@ -3774,6 +3891,10 @@ struct processor_costs yongfeng_cost = {
COSTS_N_INSNS (3), /* cost of CVTSS2SD etc. */
COSTS_N_INSNS (6), /* cost of 256bit VCVTPS2PD etc. */
COSTS_N_INSNS (12), /* cost of 512bit VCVTPS2PD etc. */
+ COSTS_N_INSNS (3), /* cost of CVTSI2SS instruction. */
+ COSTS_N_INSNS (3), /* cost of CVT(T)SS2SI instruction. */
+ COSTS_N_INSNS (3), /* cost of CVTPI2PS instruction. */
+ COSTS_N_INSNS (3), /* cost of CVT(T)PS2PI instruction. */
4, 4, 4, 4, /* reassoc int, fp, vec_int, vec_fp. */
yongfeng_memcpy,
yongfeng_memset,
@@ -3893,6 +4014,10 @@ struct processor_costs shijidadao_cost = {
COSTS_N_INSNS (3), /* cost of CVTSS2SD etc. */
COSTS_N_INSNS (6), /* cost of 256bit VCVTPS2PD etc. */
COSTS_N_INSNS (12), /* cost of 512bit VCVTPS2PD etc. */
+ COSTS_N_INSNS (3), /* cost of CVTSI2SS instruction. */
+ COSTS_N_INSNS (3), /* cost of CVT(T)SS2SI instruction. */
+ COSTS_N_INSNS (3), /* cost of CVTPI2PS instruction. */
+ COSTS_N_INSNS (3), /* cost of CVT(T)PS2PI instruction. */
4, 4, 4, 4, /* reassoc int, fp, vec_int, vec_fp. */
shijidadao_memcpy,
shijidadao_memset,
@@ -4020,6 +4145,10 @@ struct processor_costs generic_cost = {
COSTS_N_INSNS (3), /* cost of CVTSS2SD etc. */
COSTS_N_INSNS (4), /* cost of 256bit VCVTPS2PD etc. */
COSTS_N_INSNS (5), /* cost of 512bit VCVTPS2PD etc. */
+ COSTS_N_INSNS (6), /* cost of CVTSI2SS instruction. */
+ COSTS_N_INSNS (6), /* cost of CVT(T)SS2SI instruction. */
+ COSTS_N_INSNS (3), /* cost of CVTPI2PS instruction. */
+ COSTS_N_INSNS (3), /* cost of CVT(T)PS2PI instruction. */
1, 4, 3, 3, /* reassoc int, fp, vec_int, vec_fp. */
generic_memcpy,
generic_memset,
@@ -4152,6 +4281,10 @@ struct processor_costs core_cost = {
COSTS_N_INSNS (2), /* cost of CVTSS2SD etc. */
COSTS_N_INSNS (2), /* cost of 256bit VCVTPS2PD etc. */
COSTS_N_INSNS (2), /* cost of 512bit VCVTPS2PD etc. */
+ COSTS_N_INSNS (6), /* cost of CVTSI2SS instruction. */
+ COSTS_N_INSNS (6), /* cost of CVT(T)SS2SI instruction. */
+ COSTS_N_INSNS (6), /* cost of CVTPI2PS instruction. */
+ COSTS_N_INSNS (7), /* cost of CVT(T)PS2PI instruction. */
1, 4, 2, 2, /* reassoc int, fp, vec_int, vec_fp. */
core_memcpy,
core_memset,
diff --git a/gcc/config/riscv/riscv-vect-permconst.cc b/gcc/config/riscv/riscv-vect-permconst.cc
index feecc7e..8e13cf8 100644
--- a/gcc/config/riscv/riscv-vect-permconst.cc
+++ b/gcc/config/riscv/riscv-vect-permconst.cc
@@ -203,6 +203,24 @@ vector_permconst::process_bb (basic_block bb)
if (bias < 0 || bias > 16384 / 8)
continue;
+ /* We need to verify that each element would be a valid value
+ in the inner mode after applying the bias. */
+ machine_mode inner = GET_MODE_INNER (GET_MODE (cvec));
+ HOST_WIDE_INT precision = GET_MODE_PRECISION (inner).to_constant ();
+ int i;
+ for (i = 0; i < CONST_VECTOR_NUNITS (cvec).to_constant (); i++)
+ {
+ HOST_WIDE_INT val = INTVAL (CONST_VECTOR_ELT (cvec, i)) - bias;
+ if (val != sext_hwi (val, precision))
+ break;
+ }
+
+ /* If the loop terminated early, then we found a case where the
+ adjusted constant would not fit, so we can't record the constant
+ for this case (it's unlikely to be useful anyway. */
+ if (i != CONST_VECTOR_NUNITS (cvec).to_constant ())
+ continue;
+
/* At this point we have a load of a constant integer vector from the
constant pool. That constant integer vector is hopefully a
permutation constant. We need to make a copy of the vector and
@@ -211,7 +229,7 @@ vector_permconst::process_bb (basic_block bb)
XXX This violates structure sharing conventions. */
rtvec_def *nvec = gen_rtvec (CONST_VECTOR_NUNITS (cvec).to_constant ());
- for (int i = 0; i < CONST_VECTOR_NUNITS (cvec).to_constant (); i++)
+ for (i = 0; i < CONST_VECTOR_NUNITS (cvec).to_constant (); i++)
nvec->elem[i] = GEN_INT (INTVAL (CONST_VECTOR_ELT (cvec, i)) - bias);
rtx copy = gen_rtx_CONST_VECTOR (GET_MODE (cvec), nvec);
diff --git a/gcc/config/riscv/riscv.md b/gcc/config/riscv/riscv.md
index 15c89ff..259997f 100644
--- a/gcc/config/riscv/riscv.md
+++ b/gcc/config/riscv/riscv.md
@@ -3173,15 +3173,25 @@
"#"
"&& reload_completed"
[(set (match_dup 4) (lshiftrt:X (subreg:X (match_dup 2) 0) (match_dup 6)))
- (set (match_dup 4) (and:X (match_dup 4) (match_dup 7)))
+ (set (match_dup 4) (match_dup 8))
(set (pc) (if_then_else (match_op_dup 1 [(match_dup 4) (const_int 0)])
(label_ref (match_dup 0)) (pc)))]
{
- HOST_WIDE_INT mask = INTVAL (operands[3]);
- int trailing = ctz_hwi (mask);
+ HOST_WIDE_INT mask = INTVAL (operands[3]);
+ int trailing = ctz_hwi (mask);
+
+ operands[6] = GEN_INT (trailing);
+ operands[7] = GEN_INT (mask >> trailing);
- operands[6] = GEN_INT (trailing);
- operands[7] = GEN_INT (mask >> trailing);
+ /* This splits after reload, so there's little chance to clean things
+ up. Rather than emit a ton of RTL here, we can just make a new
+ operand for that RHS and use it. For the case where the AND would
+ have been redundant, we can make it a NOP move, which does get
+ cleaned up. */
+ if (operands[7] == CONSTM1_RTX (word_mode))
+ operands[8] = operands[4];
+ else
+ operands[8] = gen_rtx_AND (word_mode, operands[4], operands[7]);
}
[(set_attr "type" "branch")])
diff --git a/gcc/config/riscv/riscv.opt b/gcc/config/riscv/riscv.opt
index 7102480..80593ee 100644
--- a/gcc/config/riscv/riscv.opt
+++ b/gcc/config/riscv/riscv.opt
@@ -274,6 +274,8 @@ Mask(ZA64RS) Var(riscv_za_subext)
Mask(ZA128RS) Var(riscv_za_subext)
+Mask(ZAMA16B) Var(riscv_za_subext)
+
TargetVariable
int riscv_zb_subext
@@ -466,6 +468,10 @@ Mask(XCVBI) Var(riscv_xcv_subext)
TargetVariable
int riscv_sv_subext
+Mask(SVADE) Var(riscv_sv_subext)
+
+Mask(SVADU) Var(riscv_sv_subext)
+
Mask(SVINVAL) Var(riscv_sv_subext)
Mask(SVNAPOT) Var(riscv_sv_subext)
diff --git a/gcc/config/s390/s390-protos.h b/gcc/config/s390/s390-protos.h
index e8c7f83..d760a7e 100644
--- a/gcc/config/s390/s390-protos.h
+++ b/gcc/config/s390/s390-protos.h
@@ -114,6 +114,7 @@ extern bool s390_expand_cmpmem (rtx, rtx, rtx, rtx);
extern void s390_expand_vec_strlen (rtx, rtx, rtx);
extern void s390_expand_vec_movstr (rtx, rtx, rtx);
extern bool s390_expand_addcc (enum rtx_code, rtx, rtx, rtx, rtx, rtx);
+extern void s390_expand_cstoreti4 (rtx, rtx, rtx, rtx);
extern bool s390_expand_insv (rtx, rtx, rtx, rtx);
extern void s390_expand_cs (machine_mode, rtx, rtx, rtx, rtx, rtx, bool);
extern void s390_expand_atomic_exchange_tdsi (rtx, rtx, rtx);
diff --git a/gcc/config/s390/s390.cc b/gcc/config/s390/s390.cc
index e3edf85..2d44cec 100644
--- a/gcc/config/s390/s390.cc
+++ b/gcc/config/s390/s390.cc
@@ -7210,6 +7210,82 @@ s390_expand_mask_and_shift (rtx val, machine_mode mode, rtx count)
NULL_RTX, 1, OPTAB_DIRECT);
}
+/* Expand optab cstoreti4. */
+
+void
+s390_expand_cstoreti4 (rtx dst, rtx cmp, rtx op1, rtx op2)
+{
+ rtx_code code = GET_CODE (cmp);
+
+ if (TARGET_VXE3)
+ {
+ rtx cond = s390_emit_compare (GET_MODE (cmp), code, op1, op2);
+ emit_insn (gen_movsicc (dst, cond, const1_rtx, const0_rtx));
+ return;
+ }
+
+ /* Prior VXE3 emulate the comparison. For an (in)equality test exploit
+ VECTOR COMPARE EQUAL. For a relational test, first compare the high part
+ via VECTOR ELEMENT COMPARE (LOGICAL). If the high part does not equal,
+ then consume the CC immediatelly by a subsequent LOAD ON CONDITION.
+ Otherweise, if the high part equals, then perform a subsequent VECTOR
+ COMPARE HIGH LOGICAL followed by a LOAD ON CONDITION. */
+
+ op1 = force_reg (V2DImode, simplify_gen_subreg (V2DImode, op1, TImode, 0));
+ op2 = force_reg (V2DImode, simplify_gen_subreg (V2DImode, op2, TImode, 0));
+
+ if (code == EQ || code == NE)
+ {
+ s390_expand_vec_compare_cc (dst, code, op1, op2, code == EQ);
+ return;
+ }
+
+ /* Normalize code into either GE(U) or GT(U). */
+ if (code == LT || code == LE || code == LTU || code == LEU)
+ {
+ std::swap (op1, op2);
+ code = swap_condition (code);
+ }
+
+ /* For (un)signed comparisons
+ - high(op1) >= high(op2) instruction VECG op1, op2 sets CC1
+ if the relation does _not_ hold.
+ - high(op1) > high(op2) instruction VECG op2, op1 sets CC1
+ if the relation holds. */
+ if (code == GT || code == GTU)
+ std::swap (op1, op2);
+ machine_mode cc_mode = (code == GEU || code == GTU) ? CCUmode : CCSmode;
+ rtx lane0 = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
+ emit_insn (
+ gen_rtx_SET (gen_rtx_REG (cc_mode, CC_REGNUM),
+ gen_rtx_COMPARE (cc_mode,
+ gen_rtx_VEC_SELECT (DImode, op1, lane0),
+ gen_rtx_VEC_SELECT (DImode, op2, lane0))));
+ rtx ccs_reg = gen_rtx_REG (CCSmode, CC_REGNUM);
+ rtx lab = gen_label_rtx ();
+ s390_emit_jump (lab, gen_rtx_NE (VOIDmode, ccs_reg, const0_rtx));
+ /* At this point we have that high(op1) == high(op2). Thus, test the low
+ part, now. For unsigned comparisons
+ - low(op1) >= low(op2) instruction VCHLGS op2, op1 sets CC1
+ if the relation does _not_ hold.
+ - low(op1) > low(op2) instruction VCHLGS op1, op2 sets CC1
+ if the relation holds. */
+ std::swap (op1, op2);
+ emit_insn (gen_rtx_PARALLEL (
+ VOIDmode,
+ gen_rtvec (2,
+ gen_rtx_SET (gen_rtx_REG (CCVIHUmode, CC_REGNUM),
+ gen_rtx_COMPARE (CCVIHUmode, op1, op2)),
+ gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (V2DImode)))));
+ emit_label (lab);
+ /* For (un)signed comparison >= any CC except CC1 means that the relation
+ holds. For (un)signed comparison > only CC1 means that the relation
+ holds. */
+ rtx_code cmp_code = (code == GE || code == GEU) ? UNGE : LT;
+ rtx cond = gen_rtx_fmt_ee (cmp_code, CCSmode, ccs_reg, const0_rtx);
+ emit_insn (gen_movsicc (dst, cond, const1_rtx, const0_rtx));
+}
+
/* Generate a vector comparison COND of CMP_OP1 and CMP_OP2 and store
the result in TARGET. */
@@ -7310,9 +7386,9 @@ s390_expand_vec_compare (rtx target, enum rtx_code cond,
/* Expand the comparison CODE of CMP1 and CMP2 and copy 1 or 0 into
TARGET if either all (ALL_P is true) or any (ALL_P is false) of the
elements in CMP1 and CMP2 fulfill the comparison.
- This function is only used to emit patterns for the vx builtins and
- therefore only handles comparison codes required by the
- builtins. */
+ This function is only used in s390_expand_cstoreti4 and to emit patterns for
+ the vx builtins and therefore only handles comparison codes required by
+ those. */
void
s390_expand_vec_compare_cc (rtx target, enum rtx_code code,
rtx cmp1, rtx cmp2, bool all_p)
diff --git a/gcc/config/s390/s390.md b/gcc/config/s390/s390.md
index 05b9da6..97a4bdf 100644
--- a/gcc/config/s390/s390.md
+++ b/gcc/config/s390/s390.md
@@ -993,6 +993,10 @@
(define_mode_attr asm_fcmp [(CCVEQ "e") (CCVFH "h") (CCVFHE "he")])
(define_mode_attr insn_cmp [(CCVEQ "eq") (CCVIH "h") (CCVIHU "hl") (CCVFH "h") (CCVFHE "he")])
+(define_mode_iterator CC_SUZ [CCS CCU CCZ])
+(define_mode_attr l [(CCS "") (CCU "l") (CCZ "")])
+(define_mode_attr cc_tolower [(CCS "ccs") (CCU "ccu") (CCZ "ccz")])
+
; Analogue to TOINTVEC / tointvec
(define_mode_attr TOINT [(TF "TI") (DF "DI") (SF "SI")])
(define_mode_attr toint [(TF "ti") (DF "di") (SF "si")])
diff --git a/gcc/config/s390/vector.md b/gcc/config/s390/vector.md
index e29255f..160e42a 100644
--- a/gcc/config/s390/vector.md
+++ b/gcc/config/s390/vector.md
@@ -538,6 +538,14 @@
"vlvg<bhfgq>\t%v0,%1,%Y4(%2)"
[(set_attr "op_type" "VRS")])
+(define_expand "cstoreti4"
+ [(set (match_operand:SI 0 "register_operand")
+ (match_operator:SI 1 "ordered_comparison_operator"
+ [(match_operand:TI 2 "register_operand")
+ (match_operand:TI 3 "register_operand")]))]
+ "TARGET_VX"
+ "s390_expand_cstoreti4 (operands[0], operands[1], operands[2], operands[3]); DONE;")
+
;; FIXME: Support also vector mode operands for 0
;; This is used via RTL standard name as well as for expanding the builtin
@@ -2209,6 +2217,28 @@
operands[5] = gen_reg_rtx (V2DImode);
})
+(define_insn "*vec_cmpv2di_lane0_<cc_tolower>"
+ [(set (reg:CC_SUZ CC_REGNUM)
+ (compare:CC_SUZ
+ (vec_select:DI
+ (match_operand:V2DI 0 "register_operand" "v")
+ (parallel [(const_int 0)]))
+ (vec_select:DI
+ (match_operand:V2DI 1 "register_operand" "v")
+ (parallel [(const_int 0)]))))]
+ "TARGET_VX"
+ "vec<l>g\t%v0,%v1"
+ [(set_attr "op_type" "VRR")])
+
+(define_insn "*vec_cmpti_<cc_tolower>"
+ [(set (reg:CC_SUZ CC_REGNUM)
+ (compare:CC_SUZ
+ (match_operand:TI 0 "register_operand" "v")
+ (match_operand:TI 1 "register_operand" "v")))]
+ "TARGET_VXE3"
+ "vec<l>q\t%v0,%v1"
+ [(set_attr "op_type" "VRR")])
+
;;
;; Floating point compares
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index 90cbb51..b1964b3 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -31532,6 +31532,14 @@ to @samp{zvks} and @samp{zvkg}.
@tab 1.0
@tab Supervisor-mode timer interrupts extension.
+@item svade
+@tab 1.0
+@tab Cause exception when hardware updating of A/D bits is disabled
+
+@item svadu
+@tab 1.0
+@tab Hardware Updating of A/D Bits extension.
+
@item svinval
@tab 1.0
@tab Fine-grained address-translation cache invalidation extension.
diff --git a/gcc/fortran/ChangeLog b/gcc/fortran/ChangeLog
index f87c64b..d92b9d6 100644
--- a/gcc/fortran/ChangeLog
+++ b/gcc/fortran/ChangeLog
@@ -1,3 +1,28 @@
+2025-05-07 Paul Thomas <pault@gcc.gnu.org>
+ and Steven G. Kargl <kargl@gcc.gnu.org>
+
+ PR fortran/119948
+ * primary.cc (match_variable): Module procedures with sym the
+ same as result can be treated as variables, although marked
+ external.
+
+2025-05-06 Jerry DeLisle <jvdelisle@gcc.gnu.org>
+
+ PR fortran/120049
+ * check.cc (gfc_check_c_associated): Modify checks to avoid
+ ICE and allow use, intrinsic :: iso_c_binding from a separate
+ module file.
+
+2025-05-06 Thomas Koenig <tkoenig@gcc.gnu.org>
+
+ PR fortran/119928
+ * interface.cc (gfc_check_dummy_characteristics): Do not issue
+ error if one dummy symbol has been generated from an actual
+ argument and the other one has OPTIONAL, INTENT, ALLOCATABLE,
+ POINTER, TARGET, VALUE, ASYNCHRONOUS or CONTIGUOUS.
+ (gfc_get_formal_from_actual_arglist): Do nothing if symbol
+ is a class.
+
2025-05-04 Harald Anlauf <anlauf@gmx.de>
PR fortran/119986
diff --git a/gcc/fortran/check.cc b/gcc/fortran/check.cc
index 299c216..f02a2a3 100644
--- a/gcc/fortran/check.cc
+++ b/gcc/fortran/check.cc
@@ -5955,30 +5955,40 @@ gfc_check_c_sizeof (gfc_expr *arg)
bool
gfc_check_c_associated (gfc_expr *c_ptr_1, gfc_expr *c_ptr_2)
{
- if (c_ptr_1->ts.type != BT_DERIVED
- || c_ptr_1->ts.u.derived->from_intmod != INTMOD_ISO_C_BINDING
- || (c_ptr_1->ts.u.derived->intmod_sym_id != ISOCBINDING_PTR
- && c_ptr_1->ts.u.derived->intmod_sym_id != ISOCBINDING_FUNPTR))
+ if (c_ptr_1)
{
- gfc_error ("Argument C_PTR_1 at %L to C_ASSOCIATED shall have the "
- "type TYPE(C_PTR) or TYPE(C_FUNPTR)", &c_ptr_1->where);
- return false;
+ if (c_ptr_1->expr_type == EXPR_FUNCTION && c_ptr_1->ts.type == BT_VOID)
+ return true;
+
+ if (c_ptr_1->ts.type != BT_DERIVED
+ || c_ptr_1->ts.u.derived->from_intmod != INTMOD_ISO_C_BINDING
+ || (c_ptr_1->ts.u.derived->intmod_sym_id != ISOCBINDING_PTR
+ && c_ptr_1->ts.u.derived->intmod_sym_id != ISOCBINDING_FUNPTR))
+ {
+ gfc_error ("Argument C_PTR_1 at %L to C_ASSOCIATED shall have the "
+ "type TYPE(C_PTR) or TYPE(C_FUNPTR)", &c_ptr_1->where);
+ return false;
+ }
}
if (!scalar_check (c_ptr_1, 0))
return false;
- if (c_ptr_2
- && (c_ptr_2->ts.type != BT_DERIVED
+ if (c_ptr_2)
+ {
+ if (c_ptr_2->expr_type == EXPR_FUNCTION && c_ptr_2->ts.type == BT_VOID)
+ return true;
+
+ if (c_ptr_2->ts.type != BT_DERIVED
|| c_ptr_2->ts.u.derived->from_intmod != INTMOD_ISO_C_BINDING
|| (c_ptr_1->ts.u.derived->intmod_sym_id
- != c_ptr_2->ts.u.derived->intmod_sym_id)))
- {
- gfc_error ("Argument C_PTR_2 at %L to C_ASSOCIATED shall have the "
- "same type as C_PTR_1: %s instead of %s", &c_ptr_1->where,
- gfc_typename (&c_ptr_1->ts),
- gfc_typename (&c_ptr_2->ts));
- return false;
+ != c_ptr_2->ts.u.derived->intmod_sym_id))
+ {
+ gfc_error ("Argument C_PTR_2 at %L to C_ASSOCIATED shall have the "
+ "same type as C_PTR_1: %s instead of %s", &c_ptr_1->where,
+ gfc_typename (&c_ptr_1->ts), gfc_typename (&c_ptr_2->ts));
+ return false;
+ }
}
if (c_ptr_2 && !scalar_check (c_ptr_2, 1))
diff --git a/gcc/fortran/primary.cc b/gcc/fortran/primary.cc
index 72ecc7c..ec4e135 100644
--- a/gcc/fortran/primary.cc
+++ b/gcc/fortran/primary.cc
@@ -4396,7 +4396,7 @@ match_variable (gfc_expr **result, int equiv_flag, int host_flag)
case FL_PROCEDURE:
/* Check for a nonrecursive function result variable. */
if (sym->attr.function
- && !sym->attr.external
+ && (!sym->attr.external || sym->abr_modproc_decl)
&& sym->result == sym
&& (gfc_is_function_return_value (sym, gfc_current_ns)
|| (sym->attr.entry
diff --git a/gcc/gimple-fold.cc b/gcc/gimple-fold.cc
index 5884b79..7721795 100644
--- a/gcc/gimple-fold.cc
+++ b/gcc/gimple-fold.cc
@@ -10573,7 +10573,7 @@ gimple_fold_indirect_ref (tree t)
integer types involves undefined behavior on overflow and the
operation can be expressed with unsigned arithmetic. */
-bool
+static bool
arith_code_with_undefined_signed_overflow (tree_code code)
{
switch (code)
@@ -10590,6 +10590,30 @@ arith_code_with_undefined_signed_overflow (tree_code code)
}
}
+/* Return true if STMT has an operation that operates on a signed
+ integer types involves undefined behavior on overflow and the
+ operation can be expressed with unsigned arithmetic. */
+
+bool
+gimple_with_undefined_signed_overflow (gimple *stmt)
+{
+ if (!is_gimple_assign (stmt))
+ return false;
+ tree lhs = gimple_assign_lhs (stmt);
+ if (!lhs)
+ return false;
+ tree lhs_type = TREE_TYPE (lhs);
+ if (!INTEGRAL_TYPE_P (lhs_type)
+ && !POINTER_TYPE_P (lhs_type))
+ return false;
+ if (!TYPE_OVERFLOW_UNDEFINED (lhs_type))
+ return false;
+ if (!arith_code_with_undefined_signed_overflow
+ (gimple_assign_rhs_code (stmt)))
+ return false;
+ return true;
+}
+
/* Rewrite STMT, an assignment with a signed integer or pointer arithmetic
operation that can be transformed to unsigned arithmetic by converting
its operand, carrying out the operation in the corresponding unsigned
diff --git a/gcc/gimple-fold.h b/gcc/gimple-fold.h
index 2790d0f..5fcfdcd 100644
--- a/gcc/gimple-fold.h
+++ b/gcc/gimple-fold.h
@@ -59,7 +59,7 @@ extern tree gimple_get_virt_method_for_vtable (HOST_WIDE_INT, tree,
extern tree gimple_fold_indirect_ref (tree);
extern bool gimple_fold_builtin_sprintf (gimple_stmt_iterator *);
extern bool gimple_fold_builtin_snprintf (gimple_stmt_iterator *);
-extern bool arith_code_with_undefined_signed_overflow (tree_code);
+extern bool gimple_with_undefined_signed_overflow (gimple *);
extern void rewrite_to_defined_overflow (gimple_stmt_iterator *);
extern gimple_seq rewrite_to_defined_overflow (gimple *);
extern void replace_call_with_value (gimple_stmt_iterator *, tree);
diff --git a/gcc/simplify-rtx.cc b/gcc/simplify-rtx.cc
index 7bcbe11..b34fd2f 100644
--- a/gcc/simplify-rtx.cc
+++ b/gcc/simplify-rtx.cc
@@ -7387,6 +7387,13 @@ simplify_context::simplify_ternary_operation (rtx_code code, machine_mode mode,
return gen_rtx_CONST_VECTOR (mode, v);
}
+ if (swap_commutative_operands_p (op0, op1)
+ /* Two operands have same precedence, then first bit of mask
+ select first operand. */
+ || (!swap_commutative_operands_p (op1, op0) && !(sel & 1)))
+ return simplify_gen_ternary (code, mode, mode, op1, op0,
+ GEN_INT (~sel & mask));
+
/* Replace (vec_merge (vec_merge a b m) c n) with (vec_merge b c n)
if no element from a appears in the result. */
if (GET_CODE (op0) == VEC_MERGE)
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index 4551e2d..73e8f7c 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,218 @@
+2025-05-07 Jeff Law <jlaw@ventanamicro.com>
+
+ PR target/120137
+ PR target/120154
+ * gcc.target/riscv/pr120137.c: New test.
+ * gcc.target/riscv/pr120154.c: New test.
+
+2025-05-07 Dongyan Chen <chendongyan@isrc.iscas.ac.cn>
+
+ * gcc.target/riscv/arch-48.c: New test.
+
+2025-05-07 Richard Earnshaw <rearnsha@arm.com>
+
+ PR target/110796
+ PR target/118446
+ * gcc.target/arm/armv8_2-fp16-arith-1.c: Adjust due to no-longer
+ emitting VCMPE when -ffast-math..
+
+2025-05-07 Jakub Jelinek <jakub@redhat.com>
+
+ PR preprocessor/108900
+ PR preprocessor/116047
+ PR preprocessor/120061
+ * gcc.dg/plugin/plugin.exp: Add location-overflow-test-pr116047.c
+ and location-overflow-test-pr120061.c.
+ * gcc.dg/plugin/location_overflow_plugin.cc (plugin_init): Don't error
+ on unknown values, instead just break. Handle 0x4fHHHHHH arguments
+ differently.
+ * gcc.dg/plugin/location-overflow-test-pr116047.c: New test.
+ * gcc.dg/plugin/location-overflow-test-pr116047-1.h: New test.
+ * gcc.dg/plugin/location-overflow-test-pr116047-2.h: New test.
+ * gcc.dg/plugin/location-overflow-test-pr120061.c: New test.
+ * gcc.dg/plugin/location-overflow-test-pr120061-1.h: New test.
+ * gcc.dg/plugin/location-overflow-test-pr120061-2.h: New test.
+
+2025-05-07 Jan Hubicka <hubicka@ucw.cz>
+
+ * gcc.target/i386/pr119919.c: Add -mtune=znver1
+
+2025-05-07 Jennifer Schmitz <jschmitz@nvidia.com>
+
+ PR target/117978
+ * gcc.target/aarch64/sve/acle/general/whilelt_5.c: Adjust expected
+ outcome.
+ * gcc.target/aarch64/sve/ldst_ptrue_pat_128_to_neon.c: New test.
+ * gcc.target/aarch64/sve/while_7.c: Adjust expected outcome.
+ * gcc.target/aarch64/sve/while_9.c: Adjust expected outcome.
+
+2025-05-07 Stefan Schulze Frielinghaus <stefansf@gcc.gnu.org>
+
+ * gcc.target/s390/vector/cstoreti-1.c: New test.
+ * gcc.target/s390/vector/cstoreti-2.c: New test.
+
+2025-05-07 H.J. Lu <hjl.tools@gmail.com>
+
+ PR target/120036
+ * g++.target/i386/pr120036.C: New test.
+ * gcc.target/i386/pr117839-3a.c: Likewise.
+ * gcc.target/i386/pr117839-3b.c: Likewise.
+
+2025-05-07 Paul Thomas <pault@gcc.gnu.org>
+ and Steven G. Kargl <kargl@gcc.gnu.org>
+
+ PR fortran/119948
+ * gfortran.dg/pr119948.f90: Update to incorporate failing test,
+ where module procedure is the result. Test submodule cases.
+
+2025-05-07 Jeff Law <jlaw@ventanamicro.com>
+
+ * g++.target/riscv/redundant-andi.C: New test.
+
+2025-05-06 Dongyan Chen <chendongyan@isrc.iscas.ac.cn>
+
+ * gcc.target/riscv/arch-47.c: New test.
+
+2025-05-06 Mingzhu Yan <yanmingzhu@iscas.ac.cn>
+
+ * gcc.target/riscv/arch-45.c: New test.
+ * gcc.target/riscv/arch-46.c: New test.
+
+2025-05-06 Jerry DeLisle <jvdelisle@gcc.gnu.org>
+
+ PR fortran/120049
+ * gfortran.dg/pr120049_a.f90: New test.
+ * gfortran.dg/pr120049_b.f90: New test.
+
+2025-05-06 Thomas Koenig <tkoenig@gcc.gnu.org>
+
+ PR fortran/119928
+ * gfortran.dg/interface_60.f90: New test.
+
+2025-05-06 Martin Jambor <mjambor@suse.cz>
+
+ PR ipa/119852
+ * gcc.dg/ipa/pr119852.c: New test.
+
+2025-05-06 David Malcolm <dmalcolm@redhat.com>
+
+ PR sarif-replay/117988
+ * sarif-replay.dg/2.1.0-invalid/3.1-not-an-object.sarif: Add
+ expected logical location.
+ * sarif-replay.dg/2.1.0-invalid/3.11.11-missing-arguments-for-placeholders.sarif:
+ Likewise.
+ * sarif-replay.dg/2.1.0-invalid/3.11.11-not-enough-arguments-for-placeholders.sarif:
+ Likewise.
+ * sarif-replay.dg/2.1.0-invalid/3.11.5-unescaped-braces.sarif: Likewise.
+ * sarif-replay.dg/2.1.0-invalid/3.13.2-no-version.sarif: Likewise.
+ * sarif-replay.dg/2.1.0-invalid/3.13.2-version-not-a-string.sarif: Likewise.
+ * sarif-replay.dg/2.1.0-invalid/3.13.4-bad-runs.sarif: Likewise.
+ * sarif-replay.dg/2.1.0-invalid/3.13.4-no-runs.sarif: Likewise.
+ * sarif-replay.dg/2.1.0-invalid/3.13.4-non-object-in-runs.sarif: Likewise.
+ * sarif-replay.dg/2.1.0-invalid/3.27.10-bad-level.sarif: Likewise.
+ * sarif-replay.dg/2.1.0-invalid/3.33.3-index-out-of-range.sarif: Likewise.
+ * sarif-replay.dg/2.1.0-unhandled/3.27.10-none-level.sarif: Likewise.
+
+2025-05-06 David Malcolm <dmalcolm@redhat.com>
+
+ * libgdiagnostics.dg/test-nested-logical-locations-json-c.py: New test.
+ * libgdiagnostics.dg/test-nested-logical-locations-json.c: New test.
+ * sarif-replay.dg/2.1.0-valid/3.33.7-json-example.sarif: New test.
+ * sarif-replay.dg/2.1.0-valid/3.33.7-xml-example.sarif: New test.
+
+2025-05-06 David Malcolm <dmalcolm@redhat.com>
+
+ PR other/116176
+ * g++.dg/sarif-output/logical-locations-1.C: New test.
+ * g++.dg/sarif-output/logical-locations-1.py: New test script.
+ * g++.dg/sarif-output/logical-locations-2.C: New test.
+ * g++.dg/sarif-output/logical-locations-2.py: New test script.
+ * g++.dg/sarif-output/logical-locations-3.C: New test.
+ * g++.dg/sarif-output/logical-locations-3.py: New test script.
+ * g++.dg/sarif-output/sarif-output.exp: New script, adapted
+ from gcc.dg/sarif-output/sarif-output.exp.
+ * libgdiagnostics.dg/test-logical-location-c.py: Update for using
+ theRun.logicalLocations.
+ * libgdiagnostics.dg/test-warning-with-path-c.py: Likewise.
+
+2025-05-06 David Malcolm <dmalcolm@redhat.com>
+
+ * gcc.dg/plugin/diagnostic_plugin_test_paths.cc: Update for
+ changes to simple_diagnostic_path.
+
+2025-05-06 David Malcolm <dmalcolm@redhat.com>
+
+ * libgdiagnostics.dg/test-logical-location.c: Include
+ <string.h>.
+ (main): Verify that the accessors work.
+ * libgdiagnostics.dg/test-logical-location.cc: New test.
+
+2025-05-06 Shreya Munnangi <smunnangi1@ventanamicro.com>
+
+ PR middle-end/114512
+ * gcc.target/riscv/pr114512.c: New test.
+
+2025-05-06 Pan Li <pan2.li@intel.com>
+
+ * gcc.target/riscv/rvv/autovec/vx_vf/vx_vadd-3-i16.c: New test.
+ * gcc.target/riscv/rvv/autovec/vx_vf/vx_vadd-3-i32.c: New test.
+ * gcc.target/riscv/rvv/autovec/vx_vf/vx_vadd-3-i64.c: New test.
+ * gcc.target/riscv/rvv/autovec/vx_vf/vx_vadd-3-i8.c: New test.
+ * gcc.target/riscv/rvv/autovec/vx_vf/vx_vadd-3-u16.c: New test.
+ * gcc.target/riscv/rvv/autovec/vx_vf/vx_vadd-3-u32.c: New test.
+ * gcc.target/riscv/rvv/autovec/vx_vf/vx_vadd-3-u64.c: New test.
+ * gcc.target/riscv/rvv/autovec/vx_vf/vx_vadd-3-u8.c: New test.
+
+2025-05-06 Pan Li <pan2.li@intel.com>
+
+ * gcc.target/riscv/rvv/autovec/vx_vf/vx_vadd-2-i16.c: New test.
+ * gcc.target/riscv/rvv/autovec/vx_vf/vx_vadd-2-i32.c: New test.
+ * gcc.target/riscv/rvv/autovec/vx_vf/vx_vadd-2-i64.c: New test.
+ * gcc.target/riscv/rvv/autovec/vx_vf/vx_vadd-2-i8.c: New test.
+ * gcc.target/riscv/rvv/autovec/vx_vf/vx_vadd-2-u16.c: New test.
+ * gcc.target/riscv/rvv/autovec/vx_vf/vx_vadd-2-u32.c: New test.
+ * gcc.target/riscv/rvv/autovec/vx_vf/vx_vadd-2-u64.c: New test.
+ * gcc.target/riscv/rvv/autovec/vx_vf/vx_vadd-2-u8.c: New test.
+
+2025-05-06 Pan Li <pan2.li@intel.com>
+
+ * gcc.target/riscv/rvv/rvv.exp: Add new folder vx_vf for all
+ vec_dup + vv to vx testcases.
+ * gcc.target/riscv/rvv/autovec/vx_vf/vx_binary.h: New test.
+ * gcc.target/riscv/rvv/autovec/vx_vf/vx_binary_data.h: New test.
+ * gcc.target/riscv/rvv/autovec/vx_vf/vx_binary_run.h: New test.
+ * gcc.target/riscv/rvv/autovec/vx_vf/vx_vadd-1-i16.c: New test.
+ * gcc.target/riscv/rvv/autovec/vx_vf/vx_vadd-1-i32.c: New test.
+ * gcc.target/riscv/rvv/autovec/vx_vf/vx_vadd-1-i64.c: New test.
+ * gcc.target/riscv/rvv/autovec/vx_vf/vx_vadd-1-i8.c: New test.
+ * gcc.target/riscv/rvv/autovec/vx_vf/vx_vadd-1-u16.c: New test.
+ * gcc.target/riscv/rvv/autovec/vx_vf/vx_vadd-1-u32.c: New test.
+ * gcc.target/riscv/rvv/autovec/vx_vf/vx_vadd-1-u64.c: New test.
+ * gcc.target/riscv/rvv/autovec/vx_vf/vx_vadd-1-u8.c: New test.
+ * gcc.target/riscv/rvv/autovec/vx_vf/vx_vadd-run-1-i16.c: New test.
+ * gcc.target/riscv/rvv/autovec/vx_vf/vx_vadd-run-1-i32.c: New test.
+ * gcc.target/riscv/rvv/autovec/vx_vf/vx_vadd-run-1-i64.c: New test.
+ * gcc.target/riscv/rvv/autovec/vx_vf/vx_vadd-run-1-i8.c: New test.
+ * gcc.target/riscv/rvv/autovec/vx_vf/vx_vadd-run-1-u16.c: New test.
+ * gcc.target/riscv/rvv/autovec/vx_vf/vx_vadd-run-1-u32.c: New test.
+ * gcc.target/riscv/rvv/autovec/vx_vf/vx_vadd-run-1-u64.c: New test.
+ * gcc.target/riscv/rvv/autovec/vx_vf/vx_vadd-run-1-u8.c: New test.
+
+2025-05-06 Richard Biener <rguenther@suse.de>
+
+ PR tree-optimization/1157777
+ * gcc.dg/vect/bb-slp-pr115777.c: New testcase.
+
+2025-05-06 Jakub Jelinek <jakub@redhat.com>
+
+ PR tree-optimization/120074
+ * gcc.dg/pr120074.c: New test.
+
+2025-05-06 Richard Biener <rguenther@suse.de>
+
+ PR tree-optimization/120031
+ * gcc.target/i386/pr120031.c: New testcase.
+
2025-05-05 Jeff Law <jlaw@ventanamicro.com>
PR target/119971
diff --git a/gcc/testsuite/g++.target/i386/pr120036.C b/gcc/testsuite/g++.target/i386/pr120036.C
new file mode 100644
index 0000000..a2fc24f
--- /dev/null
+++ b/gcc/testsuite/g++.target/i386/pr120036.C
@@ -0,0 +1,113 @@
+/* { dg-do compile { target fpic } } */
+/* { dg-options "-O2 -std=c++11 -march=sapphirerapids -fPIC" } */
+
+typedef _Float16 Native;
+struct float16_t
+{
+ Native native;
+ float16_t ();
+ float16_t (Native arg) : native (arg) {}
+ operator Native ();
+ float16_t
+ operator+ (float16_t rhs)
+ {
+ return native + rhs.native;
+ }
+ float16_t
+ operator* (float16_t)
+ {
+ return native * native;
+ }
+};
+template <int N> struct Simd
+{
+ static constexpr int kPrivateLanes = N;
+};
+template <int N> struct ClampNAndPow2
+{
+ using type = Simd<N>;
+};
+template <int kLimit> struct CappedTagChecker
+{
+ static constexpr int N = sizeof (int) ? kLimit : 0;
+ using type = typename ClampNAndPow2<N>::type;
+};
+template <typename, int kLimit, int>
+using CappedTag = typename CappedTagChecker<kLimit>::type;
+template <class D>
+int
+Lanes (D)
+{
+ return D::kPrivateLanes;
+}
+template <class D> int Zero (D);
+template <class D> using VFromD = decltype (Zero (D ()));
+struct Vec512
+{
+ __attribute__ ((__vector_size__ (16))) _Float16 raw;
+};
+Vec512 Zero (Simd<2>);
+template <class D> void ReduceSum (D, VFromD<D>);
+struct Dot
+{
+ template <int, class D, typename T>
+ static T
+ Compute (D d, T *pa, int num_elements)
+ {
+ T *pb;
+ int N = Lanes (d), i = 0;
+ if (__builtin_expect (num_elements < N, 0))
+ {
+ T sum0 = 0, sum1 = 0;
+ for (; i + 2 <= num_elements; i += 2)
+ {
+ float16_t __trans_tmp_6 = pa[i] * pb[i],
+ __trans_tmp_5 = sum0 + __trans_tmp_6,
+ __trans_tmp_8 = pa[i + 1] * pb[1],
+ __trans_tmp_7 = sum1 + __trans_tmp_8;
+ sum0 = __trans_tmp_5;
+ sum1 = __trans_tmp_7;
+ }
+ float16_t __trans_tmp_9 = sum0 + sum1;
+ return __trans_tmp_9;
+ }
+ decltype (Zero (d)) sum0;
+ ReduceSum (d, sum0);
+ __builtin_trap ();
+ }
+};
+template <int kMul, class Test, int kPow2> struct ForeachCappedR
+{
+ static void
+ Do (int min_lanes, int max_lanes)
+ {
+ CappedTag<int, kMul, kPow2> d;
+ Test () (int (), d);
+ ForeachCappedR<kMul / 2, Test, kPow2>::Do (min_lanes, max_lanes);
+ }
+};
+template <class Test, int kPow2> struct ForeachCappedR<0, Test, kPow2>
+{
+ static void Do (int, int);
+};
+struct TestDot
+{
+ template <class T, class D>
+ void
+ operator() (T, D d)
+ {
+ int counts[]{ 1, 3 };
+ for (int num : counts)
+ {
+ float16_t a;
+ T __trans_tmp_4 = Dot::Compute<0> (d, &a, num);
+ }
+ }
+};
+int DotTest_TestAllDot_TestTestBody_max_lanes;
+void
+DotTest_TestAllDot_TestTestBody ()
+{
+ ForeachCappedR<64, TestDot, 0>::Do (
+ 1, DotTest_TestAllDot_TestTestBody_max_lanes);
+}
diff --git a/gcc/testsuite/g++.target/riscv/redundant-andi.C b/gcc/testsuite/g++.target/riscv/redundant-andi.C
new file mode 100644
index 0000000..fe560a7
--- /dev/null
+++ b/gcc/testsuite/g++.target/riscv/redundant-andi.C
@@ -0,0 +1,26 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=rv64gcb -mabi=lp64" { target rv64 } } */
+/* { dg-options "-O2 -march=rv32gcb -mabi=ilp32" { target rv32 } } */
+
+
+typedef int move_s;
+struct state_t
+{
+ int npieces[13];
+};
+typedef struct state_t state_t;
+int
+search (state_t *s, int alpha, int beta, int depth, int is_null, int cutnode,
+ int extend, int wpcs, int bpcs, move_s moves[240])
+{
+ int i;
+ if ((((moves[i]) >> 19) & 0x0F) != 13
+ && (((moves[i]) >> 19) & 0x0F) != 1 && (((moves[i]) >> 19) & 0x0F) != 2)
+ if ((wpcs + bpcs) == 1)
+ extend += 4;
+ return extend;
+}
+
+/* A splitter was generating an unnecessary andi instruction. Verify it's
+ not in our output. */
+/* { dg-final { scan-assembler-not "andi\t\[a-z\]\[0-9\],\[a-z\]\[0-9\],-1" } } */
diff --git a/gcc/testsuite/gcc.dg/plugin/location-overflow-test-pr116047-1.h b/gcc/testsuite/gcc.dg/plugin/location-overflow-test-pr116047-1.h
new file mode 100644
index 0000000..3dd6434
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/plugin/location-overflow-test-pr116047-1.h
@@ -0,0 +1,6 @@
+
+
+
+
+#include "location-overflow-test-pr116047-2.h"
+static_assert (__LINE__ == 6, "");
diff --git a/gcc/testsuite/gcc.dg/plugin/location-overflow-test-pr116047-2.h b/gcc/testsuite/gcc.dg/plugin/location-overflow-test-pr116047-2.h
new file mode 100644
index 0000000..048f715
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/plugin/location-overflow-test-pr116047-2.h
@@ -0,0 +1 @@
+int i;
diff --git a/gcc/testsuite/gcc.dg/plugin/location-overflow-test-pr116047.c b/gcc/testsuite/gcc.dg/plugin/location-overflow-test-pr116047.c
new file mode 100644
index 0000000..75161fa
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/plugin/location-overflow-test-pr116047.c
@@ -0,0 +1,5 @@
+/* PR preprocessor/116047 */
+/* { dg-do preprocess } */
+/* { dg-options "-nostdinc -std=c23 -fplugin-arg-location_overflow_plugin-value=0x4ffe0180" } */
+#include "location-overflow-test-pr116047-1.h"
+/* { dg-final { scan-file location-overflow-test-pr116047.i "static_assert\[^\n\r]\*6\[^\n\r]\*== 6" } } */
diff --git a/gcc/testsuite/gcc.dg/plugin/location-overflow-test-pr120061-1.h b/gcc/testsuite/gcc.dg/plugin/location-overflow-test-pr120061-1.h
new file mode 100644
index 0000000..ebf7704
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/plugin/location-overflow-test-pr120061-1.h
@@ -0,0 +1,6 @@
+
+
+
+
+#include "location-overflow-test-pr120061-2.h"
+
diff --git a/gcc/testsuite/gcc.dg/plugin/location-overflow-test-pr120061-2.h b/gcc/testsuite/gcc.dg/plugin/location-overflow-test-pr120061-2.h
new file mode 100644
index 0000000..048f715
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/plugin/location-overflow-test-pr120061-2.h
@@ -0,0 +1 @@
+int i;
diff --git a/gcc/testsuite/gcc.dg/plugin/location-overflow-test-pr120061.c b/gcc/testsuite/gcc.dg/plugin/location-overflow-test-pr120061.c
new file mode 100644
index 0000000..e8e8038
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/plugin/location-overflow-test-pr120061.c
@@ -0,0 +1,6 @@
+/* PR preprocessor/120061 */
+/* { dg-do preprocess } */
+/* { dg-options "-nostdinc -std=c23 -fplugin-arg-location_overflow_plugin-value=0x61000000" } */
+#include "location-overflow-test-pr120061-1.h"
+static_assert (__LINE__ == 5, "");
+/* { dg-final { scan-file location-overflow-test-pr120061.i "static_assert\[^\n\r]\*5\[^\n\r]\*== 5" } } */
diff --git a/gcc/testsuite/gcc.dg/plugin/location_overflow_plugin.cc b/gcc/testsuite/gcc.dg/plugin/location_overflow_plugin.cc
index f731b14..f770d35 100644
--- a/gcc/testsuite/gcc.dg/plugin/location_overflow_plugin.cc
+++ b/gcc/testsuite/gcc.dg/plugin/location_overflow_plugin.cc
@@ -85,9 +85,18 @@ plugin_init (struct plugin_name_args *plugin_info,
error_at (UNKNOWN_LOCATION, "missing plugin argument");
/* With 64-bit locations, the thresholds are larger, so shift the base
- location argument accordingly. */
+ location argument accordingly, basically remap the GCC 14 32-bit
+ location_t argument values to 64-bit location_t counterparts. There
+ is one exception for values slightly before the 32-bit location_t
+ LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES (0x50000000). In that case
+ remap them to the same amount before the 64-bit location_t
+ LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES -
+ ((location_t) 0x50000000) << 31. */
gcc_assert (sizeof (location_t) == sizeof (uint64_t));
- base_location = 1 + ((base_location - 1) << 31);
+ if (base_location >= 0x4f000000 && base_location <= 0x4fffffff)
+ base_location += (((location_t) 0x50000000) << 31) - 0x50000000;
+ else
+ base_location = 1 + ((base_location - 1) << 31);
register_callback (plugin_info->base_name,
PLUGIN_PRAGMAS,
@@ -107,7 +116,7 @@ plugin_init (struct plugin_name_args *plugin_info,
break;
default:
- error_at (UNKNOWN_LOCATION, "unrecognized value for plugin argument");
+ break;
}
return 0;
diff --git a/gcc/testsuite/gcc.dg/plugin/plugin.exp b/gcc/testsuite/gcc.dg/plugin/plugin.exp
index 90c9162..96e76d2 100644
--- a/gcc/testsuite/gcc.dg/plugin/plugin.exp
+++ b/gcc/testsuite/gcc.dg/plugin/plugin.exp
@@ -138,7 +138,9 @@ set plugin_test_list [list \
{ location_overflow_plugin.cc \
location-overflow-test-1.c \
location-overflow-test-2.c \
- location-overflow-test-pr83173.c } \
+ location-overflow-test-pr83173.c \
+ location-overflow-test-pr116047.c \
+ location-overflow-test-pr120061.c } \
{ must_tail_call_plugin.cc \
must-tail-call-1.c \
must-tail-call-2.c } \
diff --git a/gcc/testsuite/gcc.dg/vect/vect-early-break_134-pr120089.c b/gcc/testsuite/gcc.dg/vect/vect-early-break_134-pr120089.c
new file mode 100644
index 0000000..4d8199c
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-early-break_134-pr120089.c
@@ -0,0 +1,66 @@
+/* { dg-add-options vect_early_break } */
+/* { dg-additional-options "-funswitch-loops" } */
+
+#include "tree-vect.h"
+
+typedef int type;
+typedef type Vec2[2];
+
+struct BytesVec {
+ type d[100];
+};
+
+__attribute__((noipa)) struct BytesVec
+buildVertexBufferData(const Vec2 *origVertices, bool needsZW,
+ unsigned paddingSize, unsigned long t) {
+ const unsigned vertexCount = t;
+ struct BytesVec data = (struct BytesVec){.d = {0}};
+ type *nextVertexPtr = data.d;
+
+ for (unsigned vertexIdx = 0u; vertexIdx < vertexCount; ++vertexIdx) {
+
+ if (vertexIdx > t)
+ __builtin_trap();
+ __builtin_memcpy(nextVertexPtr, &origVertices[vertexIdx],
+ 2 * sizeof(type));
+ nextVertexPtr += 2;
+
+ if (needsZW) {
+ nextVertexPtr += 2;
+ }
+
+ nextVertexPtr += paddingSize;
+ }
+
+ return data;
+}
+Vec2 origVertices[] = {
+ {0, 1}, {2, 3}, {4, 5}, {6, 7},
+ {8, 9}, {10, 11}, {12, 13}, {14, 15},
+ {16, 17}, {18, 19}, {20, 21}, {22, 23},
+ {24, 25}, {26, 27}, {27, 28}, {29, 30},
+};
+
+int main()
+{
+ check_vect ();
+ struct BytesVec vec
+ = buildVertexBufferData(origVertices, false, 0,
+ sizeof(origVertices) / sizeof(origVertices[0]));
+
+ int errors = 0;
+ for (unsigned i = 0; i < 100; i++) {
+ if (i / 2 < sizeof(origVertices) / sizeof(origVertices[0])) {
+ int ii = i;
+ int e = origVertices[ii / 2][ii % 2];
+ if (vec.d[i] != e)
+ errors++;
+ } else {
+ if (vec.d[i] != 0)
+ errors++;
+ }
+ }
+ if (errors)
+ __builtin_abort();
+ return 0;
+}
diff --git a/gcc/testsuite/gcc.dg/vect/vect-early-break_135-pr120143.c b/gcc/testsuite/gcc.dg/vect/vect-early-break_135-pr120143.c
new file mode 100644
index 0000000..1ee30a8
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-early-break_135-pr120143.c
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-add-options vect_early_break } */
+/* { dg-additional-options "-O3 -fwhole-program" } */
+
+short a;
+extern _Bool b[][23];
+short g = 6;
+int v[4];
+int x[3];
+void c(short g, int v[], int x[]) {
+ for (;;)
+ for (unsigned y = 0; y < 023; y++) {
+ b[y][y] = v[y];
+ for (_Bool aa = 0; aa < (_Bool)g; aa = x[y])
+ a = a > 0;
+ }
+}
+int main() { c(g, v, x); }
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/whilelt_5.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/whilelt_5.c
index f06a74a..05e266a 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/whilelt_5.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/whilelt_5.c
@@ -11,8 +11,7 @@ extern "C" {
/*
** load_vl1:
-** ptrue (p[0-7])\.[bhsd], vl1
-** ld1h z0\.h, \1/z, \[x0\]
+** ldr h0, \[x0\]
** ret
*/
svint16_t
@@ -22,7 +21,12 @@ load_vl1 (int16_t *ptr)
}
/*
-** load_vl2:
+** load_vl2: { target aarch64_little_endian }
+** ldr s0, \[x0\]
+** ret
+*/
+/*
+** load_vl2: { target aarch64_big_endian }
** ptrue (p[0-7])\.h, vl2
** ld1h z0\.h, \1/z, \[x0\]
** ret
@@ -46,7 +50,12 @@ load_vl3 (int16_t *ptr)
}
/*
-** load_vl4:
+** load_vl4: { target aarch64_little_endian }
+** ldr d0, \[x0\]
+** ret
+*/
+/*
+** load_vl4: { target aarch64_big_endian }
** ptrue (p[0-7])\.h, vl4
** ld1h z0\.h, \1/z, \[x0\]
** ret
@@ -94,7 +103,12 @@ load_vl7 (int16_t *ptr)
}
/*
-** load_vl8:
+** load_vl8: { target aarch64_little_endian }
+** ldr q0, \[x0\]
+** ret
+*/
+/*
+** load_vl8: { target aarch64_big_endian }
** ptrue (p[0-7])\.h, vl8
** ld1h z0\.h, \1/z, \[x0\]
** ret
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/ldst_ptrue_pat_128_to_neon.c b/gcc/testsuite/gcc.target/aarch64/sve/ldst_ptrue_pat_128_to_neon.c
new file mode 100644
index 0000000..2d47c1f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/ldst_ptrue_pat_128_to_neon.c
@@ -0,0 +1,81 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+/* { dg-require-effective-target aarch64_little_endian } */
+
+#include <arm_sve.h>
+
+#define TEST(TYPE, TY, W, B) \
+ sv##TYPE \
+ ld1_##TY##W##B##_1 (TYPE *x) \
+ { \
+ svbool_t pg = svwhilelt_b##B (0, W); \
+ return svld1_##TY##B (pg, x); \
+ } \
+ sv##TYPE \
+ ld1_##TY##W##B##_2 (TYPE *x) \
+ { \
+ svbool_t pg = svptrue_pat_b##B ((enum svpattern) (W > 8 ? 9 : W)); \
+ return svld1_##TY##B (pg, x); \
+ } \
+ void \
+ st1_##TY##W##B##_1 (TYPE *x, sv##TYPE data) \
+ { \
+ svbool_t pg = svwhilelt_b##B (0, W); \
+ return svst1_##TY##B (pg, x, data); \
+ } \
+ void \
+ st1_##TY##W##B##_2 (TYPE *x, sv##TYPE data) \
+ { \
+ svbool_t pg = svptrue_pat_b##B ((enum svpattern) (W > 8 ? 9 : W)); \
+ return svst1_##TY##B (pg, x, data); \
+ } \
+
+#define TEST64(TYPE, TY, B) \
+ TEST (TYPE, TY, 1, B) \
+ TEST (TYPE, TY, 2, B) \
+
+#define TEST32(TYPE, TY, B) \
+ TEST64 (TYPE, TY, B) \
+ TEST (TYPE, TY, 4, B) \
+
+#define TEST16(TYPE, TY, B) \
+ TEST32 (TYPE, TY, B) \
+ TEST (TYPE, TY, 8, B) \
+
+#define TEST8(TYPE, TY, B) \
+ TEST16 (TYPE, TY, B) \
+ TEST (TYPE, TY, 16, B)
+
+#define T(TYPE, TY, B) \
+ TEST##B (TYPE, TY, B)
+
+T (bfloat16_t, bf, 16)
+T (float16_t, f, 16)
+T (float32_t, f, 32)
+T (float64_t, f, 64)
+T (int8_t, s, 8)
+T (int16_t, s, 16)
+T (int32_t, s, 32)
+T (int64_t, s, 64)
+T (uint8_t, u, 8)
+T (uint16_t, u, 16)
+T (uint32_t, u, 32)
+T (uint64_t, u, 64)
+
+/* { dg-final { scan-assembler-times {\tldr\tq0, \[x0\]} 24 } } */
+/* { dg-final { scan-assembler-times {\tldr\td0, \[x0\]} 24 } } */
+/* { dg-final { scan-assembler-times {\tldr\ts0, \[x0\]} 18 } } */
+/* { dg-final { scan-assembler-times {\tldr\th0, \[x0\]} 12 } } */
+/* { dg-final { scan-assembler-times {\tldr\tb0, \[x0\]} 4 } } */
+
+/* { dg-final { scan-assembler-times {\tstr\tq0, \[x0\]} 24 } } */
+/* { dg-final { scan-assembler-times {\tstr\td0, \[x0\]} 24 } } */
+/* { dg-final { scan-assembler-times {\tstr\ts0, \[x0\]} 18 } } */
+/* { dg-final { scan-assembler-times {\tstr\th0, \[x0\]} 12 } } */
+/* { dg-final { scan-assembler-times {\tstr\tb0, \[x0\]} 4 } } */
+
+svint8_t foo (int8_t *x)
+{
+ return svld1_s8 (svptrue_b16 (), x);
+}
+/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.h, all\n\tld1b} 1 } } */ \ No newline at end of file
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/while_7.c b/gcc/testsuite/gcc.target/aarch64/sve/while_7.c
index a66a20d..ab2fa36 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/while_7.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/while_7.c
@@ -19,7 +19,7 @@
TEST_ALL (ADD_LOOP)
-/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.b, vl8\n} 1 } } */
-/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.h, vl8\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tldr\td[0-9]+, \[x0\]} 1 } } */
+/* { dg-final { scan-assembler-times {\tldr\tq[0-9]+, \[x0\]} 1 } } */
/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.s,} 2 } } */
/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.d,} 2 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/while_9.c b/gcc/testsuite/gcc.target/aarch64/sve/while_9.c
index dd3f404..99940dd 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/while_9.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/while_9.c
@@ -19,7 +19,7 @@
TEST_ALL (ADD_LOOP)
-/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.b, vl16\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tldr\tq[0-9]+\, \[x0\]} 1 } } */
/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.h,} 2 } } */
/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.s,} 2 } } */
/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.d,} 2 } } */
diff --git a/gcc/testsuite/gcc.target/arm/armv8_2-fp16-arith-1.c b/gcc/testsuite/gcc.target/arm/armv8_2-fp16-arith-1.c
index 52b8737..f3fea52 100644
--- a/gcc/testsuite/gcc.target/arm/armv8_2-fp16-arith-1.c
+++ b/gcc/testsuite/gcc.target/arm/armv8_2-fp16-arith-1.c
@@ -106,8 +106,7 @@ TEST_CMP (greaterthanqual, >=, int16x8_t, float16x8_t)
/* { dg-final { scan-assembler-times {vdiv\.f16\ts[0-9]+, s[0-9]+, s[0-9]+} 13 } } */
/* For float16_t. */
-/* { dg-final { scan-assembler-times {vcmp\.f32\ts[0-9]+, s[0-9]+} 2 } } */
-/* { dg-final { scan-assembler-times {vcmpe\.f32\ts[0-9]+, s[0-9]+} 4 } } */
+/* { dg-final { scan-assembler-times {vcmp\.f32\ts[0-9]+, s[0-9]+} 6 } } */
/* For float16x4_t. */
/* { dg-final { scan-assembler-times {vceq\.f16\td[0-9]+, d[0-9]+} 2 } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr117839-3a.c b/gcc/testsuite/gcc.target/i386/pr117839-3a.c
new file mode 100644
index 0000000..81afa9d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr117839-3a.c
@@ -0,0 +1,22 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mno-avx -msse2 -mtune=generic" } */
+/* { dg-final { scan-assembler-times "xor\[a-z\]*\[\t \]*%xmm\[0-9\]\+,\[^,\]*" 1 } } */
+
+typedef char v4qi __attribute__((vector_size(4)));
+typedef char v16qi __attribute__((vector_size(16)));
+
+v4qi a;
+v16qi b;
+void
+foo (v4qi* c, v16qi* d)
+{
+ v4qi sum = __extension__(v4qi){0, 0, 0, 0};
+ v16qi sum2 = __extension__(v16qi){0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+0, 0, 0, 0, 0};
+ for (int i = 0; i != 100; i++)
+ sum += c[i];
+ for (int i = 0 ; i != 100; i++)
+ sum2 += d[i];
+ a = sum;
+ b = sum2;
+}
diff --git a/gcc/testsuite/gcc.target/i386/pr117839-3b.c b/gcc/testsuite/gcc.target/i386/pr117839-3b.c
new file mode 100644
index 0000000..a599c28
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr117839-3b.c
@@ -0,0 +1,5 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=x86-64-v3" } */
+/* { dg-final { scan-assembler-times "xor\[a-z\]*\[\t \]*%xmm\[0-9\]\+,\[^,\]*" 1 } } */
+
+#include "pr117839-3a.c"
diff --git a/gcc/testsuite/gcc.target/i386/pr119919.c b/gcc/testsuite/gcc.target/i386/pr119919.c
index ed64656..e39819f 100644
--- a/gcc/testsuite/gcc.target/i386/pr119919.c
+++ b/gcc/testsuite/gcc.target/i386/pr119919.c
@@ -1,5 +1,5 @@
/* { dg-do compile } */
-/* { dg-options "-O2 -msse2 -fdump-tree-vect-details" } */
+/* { dg-options "-O2 -msse2 -fdump-tree-vect-details -mtune=znver1" } */
int a[9*9];
bool b[9];
void test()
diff --git a/gcc/testsuite/gcc.target/riscv/arch-45.c b/gcc/testsuite/gcc.target/riscv/arch-45.c
new file mode 100644
index 0000000..afffb99
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/arch-45.c
@@ -0,0 +1,5 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gc_svadu -mabi=lp64" } */
+int foo()
+{
+}
diff --git a/gcc/testsuite/gcc.target/riscv/arch-46.c b/gcc/testsuite/gcc.target/riscv/arch-46.c
new file mode 100644
index 0000000..2a06217
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/arch-46.c
@@ -0,0 +1,5 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gc_svade -mabi=lp64" } */
+int foo()
+{
+}
diff --git a/gcc/testsuite/gcc.target/riscv/arch-47.c b/gcc/testsuite/gcc.target/riscv/arch-47.c
new file mode 100644
index 0000000..06bc80f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/arch-47.c
@@ -0,0 +1,5 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gc_sdtrig_ssstrict -mabi=lp64" } */
+int foo()
+{
+}
diff --git a/gcc/testsuite/gcc.target/riscv/arch-48.c b/gcc/testsuite/gcc.target/riscv/arch-48.c
new file mode 100644
index 0000000..58a558e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/arch-48.c
@@ -0,0 +1,5 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gc_zama16b -mabi=lp64" } */
+int foo()
+{
+}
diff --git a/gcc/testsuite/gcc.target/riscv/pr120137.c b/gcc/testsuite/gcc.target/riscv/pr120137.c
new file mode 100644
index 0000000..c55a1c1
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/pr120137.c
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv_zvl256b -mrvv-vector-bits=zvl -mabi=lp64" } */
+
+char b[13][13];
+void c() {
+ for (int d = 0; d < 13; ++d)
+ for (int e = 0; e < 13; ++e)
+ b[d][e] = e == 0 ? -98 : 38;
+}
+
+
+
diff --git a/gcc/testsuite/gcc.target/riscv/pr120154.c b/gcc/testsuite/gcc.target/riscv/pr120154.c
new file mode 100644
index 0000000..fd849ca
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/pr120154.c
@@ -0,0 +1,22 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gv -mabi=lp64" } */
+
+
+
+typedef __attribute__((__vector_size__(4))) char V;
+
+V g;
+
+V
+bar(V a, V b)
+{
+ V s = a + b + g;
+ return s;
+}
+
+V
+foo()
+{
+ return bar((V){20}, (V){23, 150});
+}
+
diff --git a/gcc/testsuite/gcc.target/s390/vector/cstoreti-1.c b/gcc/testsuite/gcc.target/s390/vector/cstoreti-1.c
new file mode 100644
index 0000000..f2a131b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/s390/vector/cstoreti-1.c
@@ -0,0 +1,127 @@
+/* { dg-do compile { target int128 } } */
+/* { dg-options "-O2 -march=z13" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+/*
+** test_le:
+** vl (%v.),0\(%r2\),3
+** vl (%v.),0\(%r3\),3
+** vecg \2,\1
+** jne \.L.+
+** vchlgs %v.,\1,\2
+** lghi %r2,0
+** locghinl %r2,1
+** br %r14
+*/
+
+int test_le (__int128 x, __int128 y) { return x <= y; }
+
+/*
+** test_leu:
+** vl (%v.),0\(%r2\),3
+** vl (%v.),0\(%r3\),3
+** veclg \2,\1
+** jne \.L.+
+** vchlgs %v.,\1,\2
+** lghi %r2,0
+** locghinl %r2,1
+** br %r14
+*/
+
+int test_leu (unsigned __int128 x, unsigned __int128 y) { return x <= y; }
+
+/*
+** test_lt:
+** vl (%v.),0\(%r2\),3
+** vl (%v.),0\(%r3\),3
+** vecg \1,\2
+** jne \.L.+
+** vchlgs %v.,\2,\1
+** lghi %r2,0
+** locghil %r2,1
+** br %r14
+*/
+
+int test_lt (__int128 x, __int128 y) { return x < y; }
+
+/*
+** test_ltu:
+** vl (%v.),0\(%r2\),3
+** vl (%v.),0\(%r3\),3
+** veclg \1,\2
+** jne \.L.+
+** vchlgs %v.,\2,\1
+** lghi %r2,0
+** locghil %r2,1
+** br %r14
+*/
+
+int test_ltu (unsigned __int128 x, unsigned __int128 y) { return x < y; }
+
+/*
+** test_ge:
+** vl (%v.),0\(%r2\),3
+** vl (%v.),0\(%r3\),3
+** vecg \1,\2
+** jne \.L.+
+** vchlgs %v.,\2,\1
+** lghi %r2,0
+** locghinl %r2,1
+** br %r14
+*/
+
+int test_ge (__int128 x, __int128 y) { return x >= y; }
+
+/*
+** test_geu:
+** vl (%v.),0\(%r2\),3
+** vl (%v.),0\(%r3\),3
+** veclg \1,\2
+** jne \.L.+
+** vchlgs %v.,\2,\1
+** lghi %r2,0
+** locghinl %r2,1
+** br %r14
+*/
+
+int test_geu (unsigned __int128 x, unsigned __int128 y) { return x >= y; }
+
+/*
+** test_gt:
+** vl (%v.),0\(%r2\),3
+** vl (%v.),0\(%r3\),3
+** vecg \2,\1
+** jne \.L.+
+** vchlgs %v.,\1,\2
+** lghi %r2,0
+** locghil %r2,1
+** br %r14
+*/
+
+int test_gt (__int128 x, __int128 y) { return x > y; }
+
+/*
+** test_gtu:
+** vl (%v.),0\(%r2\),3
+** vl (%v.),0\(%r3\),3
+** veclg \2,\1
+** jne \.L.+
+** vchlgs %v.,\1,\2
+** lghi %r2,0
+** locghil %r2,1
+** br %r14
+*/
+
+int test_gtu (unsigned __int128 x, unsigned __int128 y) { return x > y; }
+
+/* { dg-final { scan-assembler-times {vceqgs\t} 4 } } */
+/* { dg-final { scan-assembler-times {locghie\t} 2 } } */
+/* { dg-final { scan-assembler-times {locghine\t} 2 } } */
+
+int test_eq (__int128 x, __int128 y) { return x == y; }
+
+int test_equ (unsigned __int128 x, unsigned __int128 y) { return x == y; }
+
+int test_ne (__int128 x, __int128 y) { return x != y; }
+
+int test_neu (unsigned __int128 x, unsigned __int128 y) { return x != y; }
diff --git a/gcc/testsuite/gcc.target/s390/vector/cstoreti-2.c b/gcc/testsuite/gcc.target/s390/vector/cstoreti-2.c
new file mode 100644
index 0000000..d7b0382
--- /dev/null
+++ b/gcc/testsuite/gcc.target/s390/vector/cstoreti-2.c
@@ -0,0 +1,25 @@
+/* { dg-do compile { target int128 } } */
+/* { dg-options "-O2 -march=z17" } */
+/* { dg-final { scan-assembler-times {vecq\t} 8 } } */
+/* { dg-final { scan-assembler-times {veclq\t} 4 } } */
+/* { dg-final { scan-assembler-times {locghile\t} 1 } } LE */
+/* { dg-final { scan-assembler-times {slbgr\t} 1 } } LEU */
+/* { dg-final { scan-assembler-times {locghil\t} 2 } } LT LTU */
+/* { dg-final { scan-assembler-times {locghihe\t} 2 } } GE GEU */
+/* { dg-final { scan-assembler-times {locghih\t} 1 } } GT */
+/* { dg-final { scan-assembler-times {alcgr\t} 1 } } GTU */
+/* { dg-final { scan-assembler-times {locghie\t} 2 } } EQ EQU */
+/* { dg-final { scan-assembler-times {locghine\t} 2 } } NE NEU */
+
+int test_le (__int128 x, __int128 y) { return x <= y; }
+int test_leu (unsigned __int128 x, unsigned __int128 y) { return x <= y; }
+int test_lt (__int128 x, __int128 y) { return x < y; }
+int test_ltu (unsigned __int128 x, unsigned __int128 y) { return x < y; }
+int test_ge (__int128 x, __int128 y) { return x >= y; }
+int test_geu (unsigned __int128 x, unsigned __int128 y) { return x >= y; }
+int test_gt (__int128 x, __int128 y) { return x > y; }
+int test_gtu (unsigned __int128 x, unsigned __int128 y) { return x > y; }
+int test_eq (__int128 x, __int128 y) { return x == y; }
+int test_equ (unsigned __int128 x, unsigned __int128 y) { return x == y; }
+int test_ne (__int128 x, __int128 y) { return x != y; }
+int test_neu (unsigned __int128 x, unsigned __int128 y) { return x != y; }
diff --git a/gcc/testsuite/gfortran.dg/pr119948.f90 b/gcc/testsuite/gfortran.dg/pr119948.f90
index 9ecb080..2e36fae 100644
--- a/gcc/testsuite/gfortran.dg/pr119948.f90
+++ b/gcc/testsuite/gfortran.dg/pr119948.f90
@@ -1,7 +1,8 @@
-! { dg-do compile }
+! { dg-do run }
!
-! Test the fix for PR119948, which used to fail as indicated below with,
-! "Error: Bad allocate-object at (1) for a PURE procedure"
+! Test the fix for PR119948, which used to fail as indicated below with:
+! (1) "Error: Bad allocate-object at (1) for a PURE procedure"
+! (2) "Error: ‘construct_test2 at (1) is not a variable"
!
! Contributed by Damian Rouson <damian@archaeologic.codes>
!
@@ -18,33 +19,65 @@ module test_m
type(test_t) :: test
type(test_t), intent(in) :: arg
end function
- pure module function construct_test_sub(arg) result(test)
+
+ pure module function construct_test2(arg)
+ implicit none
+ type(test_t) construct_test2
+ type(test_t), intent(in) :: arg
+ end function
+
+ pure module function construct_test_3(arg) result(test)
implicit none
type(test_t) :: test
type(test_t), intent(in) :: arg
end function
+
+ pure module function construct_test_4(arg)
+ implicit none
+ type(test_t) :: construct_test_4
+ type(test_t), intent(in) :: arg
+ end function
end interface
contains
module procedure construct_test
- allocate(test%i, source = arg%i) ! Used to fail here
+ allocate(test%i, source = arg%i) ! Fail #1
+ end procedure
+
+ module procedure construct_test2
+ allocate(construct_test2%i, source = arg%i) ! Fail #2
end procedure
end module
submodule (test_m)test_s
contains
- module procedure construct_test_sub
+ module procedure construct_test_3
allocate(test%i, source = arg%i) ! This was OK.
end procedure
+
+ module procedure construct_test_4
+ allocate(construct_test_4%i, source = arg%i) ! This was OK.
+ end procedure
end submodule
use test_m
type(test_t) :: res, dummy
- dummy%i = 42
+!
+ dummy%i = int (rand () * 1e6)
res = construct_test (dummy)
if (res%i /= dummy%i) stop 1
- dummy%i = -42
- res = construct_test_sub (dummy)
+!
+ dummy%i = int (rand () * 1e6)
+ res = construct_test2 (dummy)
if (res%i /= dummy%i) stop 2
+!
+ dummy%i = int (rand () * 1e6)
+ res = construct_test_3 (dummy)
+ if (res%i /= dummy%i) stop 3
+
+ dummy%i = int (rand () * 1e6)
+ res = construct_test_4 (dummy)
+ if (res%i /= dummy%i) stop 4
+
deallocate (res%i, dummy%i)
end
diff --git a/gcc/testsuite/gfortran.dg/pr120049_a.f90 b/gcc/testsuite/gfortran.dg/pr120049_a.f90
new file mode 100644
index 0000000..c404a4d
--- /dev/null
+++ b/gcc/testsuite/gfortran.dg/pr120049_a.f90
@@ -0,0 +1,15 @@
+! { dg-do preprocess }
+! { dg-additional-options "-cpp" }
+!
+! Test the fix for PR86248
+program tests_gtk_sup
+ use gtk_sup
+ implicit none
+ type(c_ptr), target :: val
+ if (c_associated(val, c_loc(val))) then
+ stop 1
+ endif
+ if (c_associated(c_loc(val), val)) then
+ stop 2
+ endif
+end program tests_gtk_sup
diff --git a/gcc/testsuite/gfortran.dg/pr120049_b.f90 b/gcc/testsuite/gfortran.dg/pr120049_b.f90
new file mode 100644
index 0000000..127db98
--- /dev/null
+++ b/gcc/testsuite/gfortran.dg/pr120049_b.f90
@@ -0,0 +1,8 @@
+! { dg-do run }
+! { dg-additional-sources pr120049_a.f90 }
+!
+! Module for pr120049.f90
+!
+module gtk_sup
+ use, intrinsic :: iso_c_binding
+end module gtk_sup
diff --git a/gcc/tree-if-conv.cc b/gcc/tree-if-conv.cc
index 5b63bf6..fe8aee0 100644
--- a/gcc/tree-if-conv.cc
+++ b/gcc/tree-if-conv.cc
@@ -1066,11 +1066,7 @@ if_convertible_gimple_assign_stmt_p (gimple *stmt,
fprintf (dump_file, "tree could trap...\n");
return false;
}
- else if ((INTEGRAL_TYPE_P (TREE_TYPE (lhs))
- || POINTER_TYPE_P (TREE_TYPE (lhs)))
- && TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (lhs))
- && arith_code_with_undefined_signed_overflow
- (gimple_assign_rhs_code (stmt)))
+ else if (gimple_with_undefined_signed_overflow (stmt))
/* We have to rewrite stmts with undefined overflow. */
need_to_rewrite_undefined = true;
@@ -2830,7 +2826,6 @@ predicate_statements (loop_p loop)
for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi);)
{
gassign *stmt = dyn_cast <gassign *> (gsi_stmt (gsi));
- tree lhs;
if (!stmt)
;
else if (is_false_predicate (cond)
@@ -2886,12 +2881,7 @@ predicate_statements (loop_p loop)
gsi_replace (&gsi, new_stmt, true);
}
- else if (((lhs = gimple_assign_lhs (stmt)), true)
- && (INTEGRAL_TYPE_P (TREE_TYPE (lhs))
- || POINTER_TYPE_P (TREE_TYPE (lhs)))
- && TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (lhs))
- && arith_code_with_undefined_signed_overflow
- (gimple_assign_rhs_code (stmt)))
+ else if (gimple_with_undefined_signed_overflow (stmt))
rewrite_to_defined_overflow (&gsi);
else if (gimple_vdef (stmt))
{
@@ -2946,7 +2936,7 @@ predicate_statements (loop_p loop)
gsi_replace (&gsi, new_call, true);
}
- lhs = gimple_get_lhs (gsi_stmt (gsi));
+ tree lhs = gimple_get_lhs (gsi_stmt (gsi));
if (lhs && TREE_CODE (lhs) == SSA_NAME)
ssa_names.add (lhs);
gsi_next (&gsi);
diff --git a/gcc/tree-scalar-evolution.cc b/gcc/tree-scalar-evolution.cc
index 4ca0875..9d64d3a 100644
--- a/gcc/tree-scalar-evolution.cc
+++ b/gcc/tree-scalar-evolution.cc
@@ -3932,10 +3932,7 @@ final_value_replacement_loop (class loop *loop)
gsi2 = gsi_start (stmts);
while (!gsi_end_p (gsi2))
{
- gimple *stmt = gsi_stmt (gsi2);
- if (is_gimple_assign (stmt)
- && arith_code_with_undefined_signed_overflow
- (gimple_assign_rhs_code (stmt)))
+ if (gimple_with_undefined_signed_overflow (gsi_stmt (gsi2)))
rewrite_to_defined_overflow (&gsi2);
gsi_next (&gsi2);
}
diff --git a/gcc/tree-ssa-ifcombine.cc b/gcc/tree-ssa-ifcombine.cc
index f791994..19990d6 100644
--- a/gcc/tree-ssa-ifcombine.cc
+++ b/gcc/tree-ssa-ifcombine.cc
@@ -514,15 +514,9 @@ ifcombine_mark_ssa_name_walk (tree *t, int *, void *data_)
static inline void
ifcombine_rewrite_to_defined_overflow (gimple_stmt_iterator gsi)
{
- gassign *ass = dyn_cast <gassign *> (gsi_stmt (gsi));
- if (!ass)
+ if (!gimple_with_undefined_signed_overflow (gsi_stmt (gsi)))
return;
- tree lhs = gimple_assign_lhs (ass);
- if ((INTEGRAL_TYPE_P (TREE_TYPE (lhs))
- || POINTER_TYPE_P (TREE_TYPE (lhs)))
- && arith_code_with_undefined_signed_overflow
- (gimple_assign_rhs_code (ass)))
- rewrite_to_defined_overflow (&gsi);
+ rewrite_to_defined_overflow (&gsi);
}
diff --git a/gcc/tree-ssa-loop-im.cc b/gcc/tree-ssa-loop-im.cc
index a3ca5af..ae2fd87 100644
--- a/gcc/tree-ssa-loop-im.cc
+++ b/gcc/tree-ssa-loop-im.cc
@@ -1241,12 +1241,24 @@ compute_invariantness (basic_block bb)
lim_data->cost);
}
- if (lim_data->cost >= LIM_EXPENSIVE
- /* When we run before PRE and PRE is active hoist all expressions
- since PRE would do so anyway and we can preserve range info
- but PRE cannot. */
- || (flag_tree_pre && !in_loop_pipeline))
+ if (lim_data->cost >= LIM_EXPENSIVE)
set_profitable_level (stmt);
+ /* When we run before PRE and PRE is active hoist all expressions
+ to the always executed loop since PRE would do so anyway
+ and we can preserve range info while PRE cannot. */
+ else if (flag_tree_pre && !in_loop_pipeline
+ && outermost)
+ {
+ class loop *mloop = lim_data->max_loop;
+ if (loop_depth (outermost) > loop_depth (mloop))
+ {
+ mloop = outermost;
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ fprintf (dump_file, " constraining to loop depth %d\n\n\n",
+ loop_depth (mloop));
+ }
+ set_level (stmt, bb->loop_father, mloop);
+ }
}
}
@@ -1407,11 +1419,7 @@ move_computations_worker (basic_block bb)
when the target loop header is executed and the stmt may
invoke undefined integer or pointer overflow rewrite it to
unsigned arithmetic. */
- if (is_gimple_assign (stmt)
- && INTEGRAL_TYPE_P (TREE_TYPE (gimple_assign_lhs (stmt)))
- && TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (gimple_assign_lhs (stmt)))
- && arith_code_with_undefined_signed_overflow
- (gimple_assign_rhs_code (stmt))
+ if (gimple_with_undefined_signed_overflow (stmt)
&& (!ALWAYS_EXECUTED_IN (bb)
|| !(ALWAYS_EXECUTED_IN (bb) == level
|| flow_loop_nested_p (ALWAYS_EXECUTED_IN (bb), level))))
diff --git a/gcc/tree-ssa-loop-split.cc b/gcc/tree-ssa-loop-split.cc
index 5f78c0b..80f488a 100644
--- a/gcc/tree-ssa-loop-split.cc
+++ b/gcc/tree-ssa-loop-split.cc
@@ -663,10 +663,7 @@ split_loop (class loop *loop1)
gsi = gsi_start (stmts2);
while (!gsi_end_p (gsi))
{
- gimple *stmt = gsi_stmt (gsi);
- if (is_gimple_assign (stmt)
- && arith_code_with_undefined_signed_overflow
- (gimple_assign_rhs_code (stmt)))
+ if (gimple_with_undefined_signed_overflow (gsi_stmt (gsi)))
rewrite_to_defined_overflow (&gsi);
gsi_next (&gsi);
}
diff --git a/gcc/tree-ssa-reassoc.cc b/gcc/tree-ssa-reassoc.cc
index 4017eea..13bb85c 100644
--- a/gcc/tree-ssa-reassoc.cc
+++ b/gcc/tree-ssa-reassoc.cc
@@ -2925,30 +2925,22 @@ update_range_test (struct range_entry *range, struct range_entry *otherrange,
!gsi_end_p (gsi); gsi_next (&gsi))
{
gimple *stmt = gsi_stmt (gsi);
- if (is_gimple_assign (stmt))
- if (tree lhs = gimple_assign_lhs (stmt))
- if ((INTEGRAL_TYPE_P (TREE_TYPE (lhs))
- || POINTER_TYPE_P (TREE_TYPE (lhs)))
- && TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (lhs)))
- {
- enum tree_code code = gimple_assign_rhs_code (stmt);
- if (arith_code_with_undefined_signed_overflow (code))
- {
- gimple_stmt_iterator gsip = gsi;
- gimple_stmt_iterator gsin = gsi;
- gsi_prev (&gsip);
- gsi_next (&gsin);
- rewrite_to_defined_overflow (&gsi);
- unsigned uid = gimple_uid (stmt);
- if (gsi_end_p (gsip))
- gsip = gsi_after_labels (bb);
- else
- gsi_next (&gsip);
- for (; gsi_stmt (gsip) != gsi_stmt (gsin);
- gsi_next (&gsip))
- gimple_set_uid (gsi_stmt (gsip), uid);
- }
- }
+ if (gimple_with_undefined_signed_overflow (stmt))
+ {
+ gimple_stmt_iterator gsip = gsi;
+ gimple_stmt_iterator gsin = gsi;
+ gsi_prev (&gsip);
+ gsi_next (&gsin);
+ rewrite_to_defined_overflow (&gsi);
+ unsigned uid = gimple_uid (stmt);
+ if (gsi_end_p (gsip))
+ gsip = gsi_after_labels (bb);
+ else
+ gsi_next (&gsip);
+ for (; gsi_stmt (gsip) != gsi_stmt (gsin);
+ gsi_next (&gsip))
+ gimple_set_uid (gsi_stmt (gsip), uid);
+ }
}
if (opcode == BIT_IOR_EXPR
diff --git a/gcc/tree-vect-data-refs.cc b/gcc/tree-vect-data-refs.cc
index 231a3ca..9fd1ef2 100644
--- a/gcc/tree-vect-data-refs.cc
+++ b/gcc/tree-vect-data-refs.cc
@@ -734,7 +734,6 @@ vect_analyze_early_break_dependences (loop_vec_info loop_vinfo)
stmt_vec_info stmt_vinfo
= vect_stmt_to_vectorize (loop_vinfo->lookup_stmt (stmt));
- stmt = STMT_VINFO_STMT (stmt_vinfo);
auto dr_ref = STMT_VINFO_DATA_REF (stmt_vinfo);
if (!dr_ref)
continue;
diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc
index 562e222..80e9c01 100644
--- a/gcc/tree-vect-slp.cc
+++ b/gcc/tree-vect-slp.cc
@@ -5042,14 +5042,17 @@ vect_analyze_slp (vec_info *vinfo, unsigned max_tree_size,
vec<stmt_vec_info> roots = vNULL;
vec<tree> remain = vNULL;
gphi *phi = as_a<gphi *> (STMT_VINFO_STMT (stmt_info));
- stmts.create (1);
tree def = gimple_phi_arg_def_from_edge (phi, latch_e);
stmt_vec_info lc_info = loop_vinfo->lookup_def (def);
- stmts.quick_push (vect_stmt_to_vectorize (lc_info));
- vect_build_slp_instance (vinfo, slp_inst_kind_reduc_group,
- stmts, roots, remain,
- max_tree_size, &limit,
- bst_map, NULL, force_single_lane);
+ if (lc_info)
+ {
+ stmts.create (1);
+ stmts.quick_push (vect_stmt_to_vectorize (lc_info));
+ vect_build_slp_instance (vinfo, slp_inst_kind_reduc_group,
+ stmts, roots, remain,
+ max_tree_size, &limit,
+ bst_map, NULL, force_single_lane);
+ }
/* When the latch def is from a different cycle this can only
be a induction. Build a simple instance for this.
??? We should be able to start discovery from the PHI
@@ -5059,8 +5062,6 @@ vect_analyze_slp (vec_info *vinfo, unsigned max_tree_size,
tem.quick_push (stmt_info);
if (!bst_map->get (tem))
{
- gcc_assert (STMT_VINFO_DEF_TYPE (stmt_info)
- == vect_induction_def);
stmts.create (1);
stmts.quick_push (stmt_info);
vect_build_slp_instance (vinfo, slp_inst_kind_reduc_group,
diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc
index ea0b426..a8762ba 100644
--- a/gcc/tree-vect-stmts.cc
+++ b/gcc/tree-vect-stmts.cc
@@ -419,18 +419,21 @@ vect_stmt_relevant_p (stmt_vec_info stmt_info, loop_vec_info loop_vinfo,
}
}
- /* Check if it's an induction and multiple exits. In this case there will be
- a usage later on after peeling which is needed for the alternate exit. */
+ /* Check if it's a not live PHI and multiple exits. In this case
+ there will be a usage later on after peeling which is needed for the
+ alternate exit. */
if (LOOP_VINFO_EARLY_BREAKS (loop_vinfo)
- && STMT_VINFO_DEF_TYPE (stmt_info) == vect_induction_def)
+ && is_a <gphi *> (stmt)
+ && ((! VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (stmt_info))
+ && ! *live_p)
+ || STMT_VINFO_DEF_TYPE (stmt_info) == vect_induction_def))
{
if (dump_enabled_p ())
- dump_printf_loc (MSG_NOTE, vect_location,
- "vec_stmt_relevant_p: induction forced for "
- "early break.\n");
+ dump_printf_loc (MSG_NOTE, vect_location,
+ "vec_stmt_relevant_p: PHI forced live for "
+ "early break.\n");
LOOP_VINFO_EARLY_BREAKS_LIVE_IVS (loop_vinfo).safe_push (stmt_info);
*live_p = true;
-
}
if (*live_p && *relevant == vect_unused_in_scope
@@ -714,6 +717,8 @@ vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo, bool *fatal)
bb = bbs[i];
for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
{
+ if (virtual_operand_p (gimple_phi_result (gsi_stmt (si))))
+ continue;
stmt_vec_info phi_info = loop_vinfo->lookup_stmt (gsi_stmt (si));
if (dump_enabled_p ())
dump_printf_loc (MSG_NOTE, vect_location, "init: phi relevant? %G",
@@ -8786,6 +8791,15 @@ vectorizable_store (vec_info *vinfo,
if (n == const_nunits)
{
int mis_align = dr_misalignment (first_dr_info, vectype);
+ /* With VF > 1 we advance the DR by step, if that is constant
+ and only aligned when performed VF times, DR alignment
+ analysis can analyze this as aligned since it assumes
+ contiguous accesses. But that is not how we code generate
+ here, so adjust for this. */
+ if (maybe_gt (vf, 1u)
+ && !multiple_p (DR_STEP_ALIGNMENT (first_dr_info->dr),
+ DR_TARGET_ALIGNMENT (first_dr_info)))
+ mis_align = -1;
dr_alignment_support dr_align
= vect_supportable_dr_alignment (vinfo, dr_info, vectype,
mis_align);
@@ -8807,6 +8821,10 @@ vectorizable_store (vec_info *vinfo,
ltype = build_vector_type (elem_type, n);
lvectype = vectype;
int mis_align = dr_misalignment (first_dr_info, ltype);
+ if (maybe_gt (vf, 1u)
+ && !multiple_p (DR_STEP_ALIGNMENT (first_dr_info->dr),
+ DR_TARGET_ALIGNMENT (first_dr_info)))
+ mis_align = -1;
dr_alignment_support dr_align
= vect_supportable_dr_alignment (vinfo, dr_info, ltype,
mis_align);
@@ -8867,17 +8885,10 @@ vectorizable_store (vec_info *vinfo,
}
}
unsigned align;
- /* ??? We'd want to use
- if (alignment_support_scheme == dr_aligned)
- align = known_alignment (DR_TARGET_ALIGNMENT (first_dr_info));
- since doing that is what we assume we can in the above checks.
- But this interferes with groups with gaps where for example
- VF == 2 makes the group in the unrolled loop aligned but the
- fact that we advance with step between the two subgroups
- makes the access to the second unaligned. See PR119586.
- We have to anticipate that here or adjust code generation to
- avoid the misaligned loads by means of permutations. */
- align = dr_alignment (vect_dr_behavior (vinfo, first_dr_info));
+ if (alignment_support_scheme == dr_aligned)
+ align = known_alignment (DR_TARGET_ALIGNMENT (first_dr_info));
+ else
+ align = dr_alignment (vect_dr_behavior (vinfo, first_dr_info));
/* Alignment is at most the access size if we do multiple stores. */
if (nstores > 1)
align = MIN (tree_to_uhwi (TYPE_SIZE_UNIT (ltype)), align);
@@ -10805,6 +10816,15 @@ vectorizable_load (vec_info *vinfo,
if (n == const_nunits)
{
int mis_align = dr_misalignment (first_dr_info, vectype);
+ /* With VF > 1 we advance the DR by step, if that is constant
+ and only aligned when performed VF times, DR alignment
+ analysis can analyze this as aligned since it assumes
+ contiguous accesses. But that is not how we code generate
+ here, so adjust for this. */
+ if (maybe_gt (vf, 1u)
+ && !multiple_p (DR_STEP_ALIGNMENT (first_dr_info->dr),
+ DR_TARGET_ALIGNMENT (first_dr_info)))
+ mis_align = -1;
dr_alignment_support dr_align
= vect_supportable_dr_alignment (vinfo, dr_info, vectype,
mis_align);
@@ -10833,6 +10853,10 @@ vectorizable_load (vec_info *vinfo,
if (VECTOR_TYPE_P (ptype))
{
mis_align = dr_misalignment (first_dr_info, ptype);
+ if (maybe_gt (vf, 1u)
+ && !multiple_p (DR_STEP_ALIGNMENT (first_dr_info->dr),
+ DR_TARGET_ALIGNMENT (first_dr_info)))
+ mis_align = -1;
dr_align
= vect_supportable_dr_alignment (vinfo, dr_info, ptype,
mis_align);
@@ -10852,8 +10876,10 @@ vectorizable_load (vec_info *vinfo,
}
}
unsigned align;
- /* ??? The above is still wrong, see vectorizable_store. */
- align = dr_alignment (vect_dr_behavior (vinfo, first_dr_info));
+ if (alignment_support_scheme == dr_aligned)
+ align = known_alignment (DR_TARGET_ALIGNMENT (first_dr_info));
+ else
+ align = dr_alignment (vect_dr_behavior (vinfo, first_dr_info));
/* Alignment is at most the access size if we do multiple loads. */
if (nloads > 1)
align = MIN (tree_to_uhwi (TYPE_SIZE_UNIT (ltype)), align);