diff options
Diffstat (limited to 'gcc')
190 files changed, 4061 insertions, 546 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 1672582..3170604 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,278 @@ +2025-07-30 Andrew Pinski <quic_apinski@quicinc.com> + + PR rtl-optimization/121302 + * simplify-rtx.cc (simplify_context::simplify_subreg): Use + byte instead of 0 when calling simplify_subreg. + +2025-07-29 Spencer Abson <spencer.abson@arm.com> + + * config/aarch64/aarch64-protos.h (aarch64_sve_emit_masked_fp_pred): + Declare. + * config/aarch64/aarch64-sve.md (and<mode>3): Change this to... + (@and<mode>3): ...this, so that we can use gen_and3. + (@cond_<optab><mode>): Extend from SVE_FULL_F_B16B16 to SVE_F_B16B16, + use aarch64_predicate_operand. + (*cond_<optab><mode>_2_strict): Likewise. + (*cond_<optab><mode>_3_strict): Likewise. + (*cond_<optab><mode>_any_strict): Likwise. + (*cond_<optab><mode>_2_const_strict): Extend from SVE_FULL_F to SVE_F, + use aarch64_predicate_operand. + (*cond_<optab><mode>_any_const_strict): Likewise. + (*cond_sub<mode>_3_const_strict): Likwise. + (*cond_sub<mode>_const_strict): Likewise. + (*vcond_mask_<mode><vpred>): Use aarch64_predicate_operand, and update + the comment here. + * config/aarch64/aarch64.cc (aarch64_sve_emit_masked_fp_pred): New + function. Helper to mask the predicate in conditional expanders. + +2025-07-29 Dongyan Chen <chendongyan@isrc.iscas.ac.cn> + + * Makefile.in: Add riscv-mcpu.texi and riscv-mtune.texi to the list + of files to be processed by the Texinfo generator. + * config/riscv/t-riscv: Add rule for generating riscv-mcpu.texi + and riscv-mtune.texi. + * doc/invoke.texi: Replace hand‑written extension table with + `@include riscv-mcpu.texi` and `@include riscv-mtune.texi` to + pull in auto‑generated entries. + * config/riscv/gen-riscv-mcpu-texi.cc: New file. + * config/riscv/gen-riscv-mtune-texi.cc: New file. + * doc/riscv-mcpu.texi: New file. + * doc/riscv-mtune.texi: New file. + +2025-07-29 Richard Sandiford <richard.sandiford@arm.com> + + * simplify-rtx.cc (simplify_context::simplify_subreg): Distribute + lowpart subregs through AND/IOR/XOR, if doing so eliminates one + of the terms. + (test_scalar_int_ext_ops): Add some tests of the above for integers. + * config/aarch64/aarch64.cc (aarch64_test_sve_folding): Likewise + add tests for predicate modes. + +2025-07-29 Richard Sandiford <richard.sandiford@arm.com> + + * config/aarch64/aarch64-sve-builtins.cc + (function_expander::get_reg_target): Check whether the target + is a valid register_operand. + +2025-07-29 Konstantinos Eleftheriou <konstantinos.eleftheriou@vrull.eu> + + PR rtl-optimization/120660 + * avoid-store-forwarding.cc (process_store_forwarding): + Fix instruction generation when haveing multiple stores with + base offset. + +2025-07-29 Christoph Müllner <christoph.muellner@vrull.eu> + + * common/config/riscv/riscv-common.cc (riscv_ext_is_subset): + Remove use of structured binding to fix compiler warning. + +2025-07-29 Konstantinos Eleftheriou <konstantinos.eleftheriou@vrull.eu> + + PR rtl-optimization/119795 + * avoid-store-forwarding.cc + (store_forwarding_analyzer::avoid_store_forwarding): Skip + transformations for stores that operate on the same address + range as deleted ones. + +2025-07-29 Pan Li <pan2.li@intel.com> + + * match.pd: Add mul based unsigned SAT_MUL. + +2025-07-29 Richard Biener <rguenther@suse.de> + + PR tree-optimization/120687 + * tree-ssa-reassoc.cc (reassociate_bb): Do not disturb + the sorted operand order in the early pass. + * tree-vect-slp.cc (vect_analyze_slp): Dump when a detected + reduction chain fails SLP discovery. + +2025-07-29 Alfie Richards <alfie.richards@arm.com> + + PR middle-end/121261 + * vec.h: Add null ptr check. + +2025-07-29 Jakub Jelinek <jakub@redhat.com> + + PR middle-end/121159 + * calls.cc (can_implement_as_sibling_call_p): Don't reject declared + noreturn functions in musttail calls. + +2025-07-29 Andrew Pinski <quic_apinski@quicinc.com> + + * output.h (MAX_ALIGN_MERGABLE): New define. + * tree-switch-conversion.cc (switch_conversion::build_one_array): + Use MAX_ALIGN_MERGABLE instead of 256. + * varasm.cc (mergeable_string_section): Likewise + (mergeable_constant_section): Likewise + +2025-07-29 Andrew Pinski <quic_apinski@quicinc.com> + + PR middle-end/120523 + * output.h (mergeable_constant_section): New declaration taking + unsigned HOST_WIDE_INT for the size. + * tree-switch-conversion.cc (switch_conversion::build_one_array): + Increase the alignment of CSWTCH for sizes less than 32bytes. + * varasm.cc (mergeable_constant_section): Split out twice. + One that takes the size in unsigned HOST_WIDE_INT and the + other size in a tree. + (default_elf_select_section): Pass DECL_SIZE instead of + DECL_MODE to mergeable_constant_section. + +2025-07-29 Richard Biener <rguenther@suse.de> + + * tree-vect-stmts.cc (vectorizable_load): Un-factor VMAT + specific code to their handling blocks. + +2025-07-29 Richard Biener <rguenther@suse.de> + + * tree-vectorizer.h (gather_scatter_info::offset_dt): Remove. + * tree-vect-data-refs.cc (vect_describe_gather_scatter_call): + Do not set it. + (vect_check_gather_scatter): Likewise. + * tree-vect-stmts.cc (vect_truncate_gather_scatter_offset): + Likewise. + (get_group_load_store_type): Use the vector type of the offset + SLP child. Do not re-check vect_is_simple_use validated by + SLP build. + +2025-07-28 Georg-Johann Lay <avr@gjlay.de> + + PR target/121277 + * config/avr/avr.cc (avr_addr_space_convert): When converting + from generic AS to __flashx, don't set bit 23. + (avr_convert_to_type): Don't -Waddr-space-convert when NULL + is converted to __flashx or to __flash. + +2025-07-28 Andrew Pinski <quic_apinski@quicinc.com> + + PR tree-optimization/121236 + * tree-if-conv.cc (is_cond_scalar_reduction): Instead of phi argument, + pass bb and res of the phi. + (factor_out_operators): Add iterator for the phi. Remove the phi + if this is the first time. Return if we had removed the phi. + (predicate_scalar_phi): Add the phi iterator argument. + Update call to is_cond_scalar_reduction. + Update call to factor_out_operators and set the return value to true + when factor_out_operators returns true. + (predicate_all_scalar_phis): Don't remove the phi if predicate_scalar_phi + already removed it. + +2025-07-28 H.J. Lu <hjl.tools@gmail.com> + + PR target/121208 + * config/i386/i386.cc (ix86_tls_get_addr): Issue an error for + -mtls-dialect=gnu with no_caller_saved_registers attribute and + suggest -mtls-dialect=gnu2. + +2025-07-28 Mikael Pettersson <mikpelinux@gmail.com> + + PR other/121260 + * diagnostics/changes.cc: Correct nesting of namespaces + and #if CHECKING_P blocks. + * diagnostics/context.cc: Likewise. + * diagnostics/html-sink.cc: Likewise. + * diagnostics/output-spec.cc: Likewise. + * diagnostics/sarif-sink.cc: Likewise. + +2025-07-28 Tobias Burnus <tburnus@baylibre.com> + + * config/nvptx/nvptx.opt (march-map=): Add sm_100{,f,a}, + sm_101{,f,a}, sm_103{,a,f}, sm_120{,a,f} and sm_121{,f,a}. + +2025-07-28 Tobias Burnus <tburnus@baylibre.com> + + * config/gcn/gcn.md (atomic_load, atomic_store, atomic_exchange): + Fix CDNA3 L2 cache write-back before atomic instructions. + +2025-07-28 Richard Biener <rguenther@suse.de> + + * tree-vect-stmts.cc (check_load_store_for_partial_vectors): + Make *gs_info const. + (vect_build_one_gather_load_call): Likewise. + (vect_build_one_scatter_store_call): Likewise. + (vect_get_gather_scatter_ops): Likewise. + (vect_get_strided_load_store_ops): Likewise. + +2025-07-28 Tobias Burnus <tburnus@baylibre.com> + + * config/gcn/gcn.md (define_attr "vcmp"): Add with values + vcmp/vcmpx/no. + (*movbi, cstoredi4.., cstore<mode>4): Set it. + * config/gcn/gcn-valu.md (vec_cmp<mode>...): Likewise. + * config/gcn/gcn.cc (gcn_cmpx_insn_p): Remove. + (gcn_md_reorg): Add two new conditions for MI300. + +2025-07-28 Tobias Burnus <tburnus@baylibre.com> + + * config/gcn/gcn-opts.h (enum hsaco_attr_type): Add comment + about 'sc0'. + * config/gcn/gcn.cc (gcn_md_reorg): Use gen_nops instead of gen_nop. + (print_operand_address): Document 'R' and 'V' in the + pre-function comment as well. + * config/gcn/gcn.md (nops): Add. + +2025-07-28 Richard Biener <rguenther@suse.de> + + PR tree-optimization/121256 + * tree-vect-loop.cc (vectorizable_recurr): Build a correct + initialization vector for SLP_TREE_LANES > 1. + +2025-07-28 Richard Biener <rguenther@suse.de> + + * tree-vectorizer.h (_slp_tree::type): Add. + (_slp_tree::u): Likewise. + (_stmt_vec_info::type): Remove. + (STMT_VINFO_TYPE): Likewise. + (SLP_TREE_TYPE): New. + * tree-vectorizer.cc (vec_info::new_stmt_vec_info): Do not + initialize type. + * tree-vect-slp.cc (_slp_tree::_slp_tree): Initialize type. + (vect_slp_analyze_node_operations): Adjust. + (vect_schedule_slp_node): Likewise. + * tree-vect-patterns.cc (vect_init_pattern_stmt): Do not + copy STMT_VINFO_TYPE. + * tree-vect-loop.cc: Set SLP_TREE_TYPE instead of + STMT_VINFO_TYPE everywhere. + (vect_create_loop_vinfo): Do not set STMT_VINFO_TYPE on + loop conditions. + * tree-vect-stmts.cc: Set SLP_TREE_TYPE instead of + STMT_VINFO_TYPE everywhere. + (vect_analyze_stmt): Adjust. + (vect_transform_stmt): Likewise. + * config/aarch64/aarch64.cc (aarch64_vector_costs::count_ops): + Access SLP_TREE_TYPE instead of STMT_VINFO_TYPE. + * config/i386/i386.cc (ix86_vector_costs::add_stmt_cost): + Remove non-SLP element-wise load/store matching. + * config/rs6000/rs6000.cc + (rs6000_cost_data::update_target_cost_per_stmt): Pass in + the SLP node. Use that to get at the memory access + kind and type. + (rs6000_cost_data::add_stmt_cost): Pass down SLP node. + * config/riscv/riscv-vector-costs.cc (variable_vectorized_p): + Use SLP_TREE_TYPE. + (costs::need_additional_vector_vars_p): Likewise. + (costs::update_local_live_ranges): Likewise. + +2025-07-28 Jennifer Schmitz <jschmitz@nvidia.com> + Dhruv Chawla <dhruvc@nvidia.com> + + * config/aarch64/aarch64-cores.def (olympus): Use olympus tuning + model. + * config/aarch64/aarch64.cc: Include olympus.h. + * config/aarch64/tuning_models/olympus.h: New file. + +2025-07-28 Lulu Cheng <chenglulu@loongson.cn> + + * config/loongarch/loongarch.h + (CASE_VECTOR_SHORTEN_MODE): Delete. + +2025-07-28 Takayuki 'January June' Suwa <jjsuwa_sys3175@yahoo.co.jp> + + * config/xtensa/xtensa.cc (xtensa_is_insn_L32R_p): + Re-rewrite to more accurately capture insns that could be L32R machine + instructions wherever possible, and add comments that help understand + the intent of the process. + 2025-07-27 Pan Li <pan2.li@intel.com> * config/riscv/riscv-v.cc (expand_vx_binary_vxrm_vec_vec_dup): diff --git a/gcc/DATESTAMP b/gcc/DATESTAMP index 8ad65ad..3db1b8e 100644 --- a/gcc/DATESTAMP +++ b/gcc/DATESTAMP @@ -1 +1 @@ -20250728 +20250730 diff --git a/gcc/Makefile.in b/gcc/Makefile.in index 7314a3b..d7d5cbe 100644 --- a/gcc/Makefile.in +++ b/gcc/Makefile.in @@ -3720,7 +3720,7 @@ TEXI_GCC_FILES = gcc.texi gcc-common.texi gcc-vers.texi frontends.texi \ contribute.texi compat.texi funding.texi gnu.texi gpl_v3.texi \ fdl.texi contrib.texi cppenv.texi cppopts.texi avr-mmcu.texi \ implement-c.texi implement-cxx.texi gcov-tool.texi gcov-dump.texi \ - lto-dump.texi riscv-ext.texi + lto-dump.texi riscv-ext.texi riscv-mcpu.texi riscv-mtune.texi # we explicitly use $(srcdir)/doc/tm.texi here to avoid confusion with # the generated tm.texi; the latter might have a more recent timestamp, diff --git a/gcc/ada/ChangeLog b/gcc/ada/ChangeLog index 6f6a782..821e3c0 100644 --- a/gcc/ada/ChangeLog +++ b/gcc/ada/ChangeLog @@ -1,3 +1,7 @@ +2025-07-28 Marc Poulhiès <poulhies@adacore.com> + + * gcc-interface/trans.cc (gnat_to_gnu): Fix typo in comment. + 2025-07-25 David Malcolm <dmalcolm@redhat.com> * gcc-interface/misc.cc: Make diff --git a/gcc/auto-profile.cc b/gcc/auto-profile.cc index d1954b4..7ff9526 100644 --- a/gcc/auto-profile.cc +++ b/gcc/auto-profile.cc @@ -885,6 +885,8 @@ string_table::read () { vector_.quick_push (xstrdup (gcov_read_string ())); map_[vector_.last ()] = i; + if (gcov_is_error ()) + return false; } return true; } @@ -1629,7 +1631,13 @@ function_instance::match (cgraph_node *node, if (iter->first != end_location && iter->first != start_location && (iter->first & 65535) != zero_location - && iter->first) + && iter->first + /* FIXME: dwarf5 does not represent inline stack of debug + statements and consequently create_gcov is sometimes + mixing up statements from other functions. Do not warn + user about this until this problem is solved. + We still write info into dump file. */ + && 0) { if (!warned) warned = warning_at (DECL_SOURCE_LOCATION (node->decl), @@ -2741,14 +2749,22 @@ read_profile (void) /* autofdo_source_profile. */ afdo_source_profile = autofdo_source_profile::create (); - if (afdo_source_profile == NULL) + if (afdo_source_profile == NULL + || gcov_is_error ()) { error ("cannot read function profile from %s", auto_profile_file); + delete afdo_source_profile; + afdo_source_profile = NULL; return; } /* autofdo_module_profile. */ fake_read_autofdo_module_profile (); + if (gcov_is_error ()) + { + error ("cannot read module profile from %s", auto_profile_file); + return; + } } /* From AutoFDO profiles, find values inside STMT for that we want to measure @@ -3425,7 +3441,7 @@ add_scale (vec <scale> *scales, profile_count annotated, profile_count orig) annotated.dump (dump_file); fprintf (dump_file, "\n"); } - if (orig.force_nonzero () == orig) + if (orig.nonzero_p ()) { sreal scale = annotated.guessed_local () @@ -3672,7 +3688,7 @@ afdo_adjust_guessed_profile (bb_set *annotated_bb) { if (dump_file) fprintf (dump_file, - " Can not determine count from the boundary; giving up"); + " Can not determine count from the boundary; giving up\n"); continue; } gcc_checking_assert (scales.length ()); diff --git a/gcc/avoid-store-forwarding.cc b/gcc/avoid-store-forwarding.cc index 785efd2..1de6fd6 100644 --- a/gcc/avoid-store-forwarding.cc +++ b/gcc/avoid-store-forwarding.cc @@ -231,20 +231,39 @@ process_store_forwarding (vec<store_fwd_info> &stores, rtx_insn *load_insn, int move_to_front = -1; int total_cost = 0; + int base_offset_index = -1; + + /* Find the last store that has the same offset the load, in the case that + we're eliminating the load. We will try to use it as a base register + to avoid bit inserts (see second loop below). We want the last one, as + it will be wider and we don't want to overwrite the base register if + there are many of them. */ + if (load_elim) + { + FOR_EACH_VEC_ELT_REVERSE (stores, i, it) + { + const bool has_base_offset + = known_eq (poly_uint64 (it->offset), + subreg_size_lowpart_offset (MEM_SIZE (it->store_mem), + load_size)); + if (has_base_offset) + { + base_offset_index = i; + break; + } + } + } /* Check if we can emit bit insert instructions for all forwarded stores. */ FOR_EACH_VEC_ELT (stores, i, it) { it->mov_reg = gen_reg_rtx (GET_MODE (it->store_mem)); rtx_insn *insns = NULL; - const bool has_base_offset - = known_eq (poly_uint64 (it->offset), - subreg_size_lowpart_offset (MEM_SIZE (it->store_mem), - load_size)); - - /* If we're eliminating the load then find the store with zero offset - and use it as the base register to avoid a bit insert if possible. */ - if (load_elim && has_base_offset) + + /* Check if this is a store with base offset, if we're eliminating the + load, and use it as the base register to avoid a bit insert if + possible. Load elimination is implied by base_offset_index != -1. */ + if (i == (unsigned) base_offset_index) { start_sequence (); @@ -437,9 +456,22 @@ store_forwarding_analyzer::avoid_store_forwarding (basic_block bb) return; auto_vec<store_fwd_info, 8> store_exprs; + auto_vec<rtx> store_exprs_del; rtx_insn *insn; unsigned int insn_cnt = 0; + /* We are iterating over the basic block's instructions detecting store + instructions. Upon reaching a load instruction, we check if any of the + previously detected stores could result in store forwarding. In that + case, we try to reorder the load and store instructions. + We skip this transformation when we encounter complex memory operations, + instructions that might throw an exception, instruction dependencies, + etc. This is done by clearing the vector of detected stores, while + keeping the removed stores in another vector. By doing so, we can check + if any of the removed stores operated on the load's address range, when + reaching a subsequent store that operates on the same address range, + as this would lead to incorrect values on the register that keeps the + loaded value. */ FOR_BB_INSNS (bb, insn) { if (!NONDEBUG_INSN_P (insn)) @@ -452,6 +484,10 @@ store_forwarding_analyzer::avoid_store_forwarding (basic_block bb) if (!set || insn_could_throw_p (insn)) { + unsigned int i; + store_fwd_info *it; + FOR_EACH_VEC_ELT (store_exprs, i, it) + store_exprs_del.safe_push (it->store_mem); store_exprs.truncate (0); continue; } @@ -475,6 +511,10 @@ store_forwarding_analyzer::avoid_store_forwarding (basic_block bb) || (load_mem && (!MEM_SIZE_KNOWN_P (load_mem) || !MEM_SIZE (load_mem).is_constant ()))) { + unsigned int i; + store_fwd_info *it; + FOR_EACH_VEC_ELT (store_exprs, i, it) + store_exprs_del.safe_push (it->store_mem); store_exprs.truncate (0); continue; } @@ -526,6 +566,7 @@ store_forwarding_analyzer::avoid_store_forwarding (basic_block bb) it->remove = true; removed_count++; remove_rest = true; + store_exprs_del.safe_push (it->store_mem); } } } @@ -565,23 +606,46 @@ store_forwarding_analyzer::avoid_store_forwarding (basic_block bb) it->remove = true; removed_count++; remove_rest = true; + forwardings.truncate (0); } else if (is_store_forwarding (store_mem, load_mem, &off_val)) { + unsigned int j; + rtx *del_it; + bool same_range_as_removed = false; + + /* Check if another store in the load's address range has + been deleted due to a constraint violation. In this case + we can't forward any other stores that operate in this + range, as it would lead to partial update of the register + that holds the loaded value. */ + FOR_EACH_VEC_ELT (store_exprs_del, j, del_it) + { + rtx del_store_mem = *del_it; + same_range_as_removed + = is_store_forwarding (del_store_mem, load_mem, NULL); + if (same_range_as_removed) + break; + } + /* Check if moving this store after the load is legal. */ bool write_dep = false; - for (unsigned int j = store_exprs.length () - 1; j != i; j--) + if (!same_range_as_removed) { - if (!store_exprs[j].forwarded - && output_dependence (store_mem, - store_exprs[j].store_mem)) + unsigned int j = store_exprs.length () - 1; + for (; j != i; j--) { - write_dep = true; - break; + if (!store_exprs[j].forwarded + && output_dependence (store_mem, + store_exprs[j].store_mem)) + { + write_dep = true; + break; + } } } - if (!write_dep) + if (!same_range_as_removed && !write_dep) { it->forwarded = true; it->offset = off_val; @@ -601,6 +665,7 @@ store_forwarding_analyzer::avoid_store_forwarding (basic_block bb) it->remove = true; removed_count++; remove_rest = true; + forwardings.truncate (0); } } @@ -608,9 +673,12 @@ store_forwarding_analyzer::avoid_store_forwarding (basic_block bb) process_store_forwarding (forwardings, insn, load_mem); } + /* Abort in case that we encounter a memory read/write that is not a + simple store/load, as we can't make safe assumptions about the + side-effects of this. */ if ((writes_mem && !is_simple_store) || (reads_mem && !is_simple_load)) - store_exprs.truncate (0); + return; if (removed_count) { diff --git a/gcc/calls.cc b/gcc/calls.cc index e16190c..2711c4e 100644 --- a/gcc/calls.cc +++ b/gcc/calls.cc @@ -2589,7 +2589,8 @@ can_implement_as_sibling_call_p (tree exp, return false; } - if (TYPE_VOLATILE (TREE_TYPE (TREE_TYPE (addr)))) + if (TYPE_VOLATILE (TREE_TYPE (TREE_TYPE (addr))) + && !CALL_EXPR_MUST_TAIL_CALL (exp)) { maybe_complain_about_tail_call (exp, _("volatile function type")); return false; diff --git a/gcc/common/config/riscv/riscv-common.cc b/gcc/common/config/riscv/riscv-common.cc index 82037a3..da3cb9f 100644 --- a/gcc/common/config/riscv/riscv-common.cc +++ b/gcc/common/config/riscv/riscv-common.cc @@ -1606,8 +1606,9 @@ bool riscv_ext_is_subset (struct cl_target_option *opts, struct cl_target_option *subset) { - for (const auto &[ext_name, ext_info] : riscv_ext_infos) + for (const auto &riscv_ext_info : riscv_ext_infos) { + const auto &ext_info = riscv_ext_info.second; if (ext_info.check_opts (opts) && !ext_info.check_opts (subset)) return false; } diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h index e946e8d..38c307c 100644 --- a/gcc/config/aarch64/aarch64-protos.h +++ b/gcc/config/aarch64/aarch64-protos.h @@ -1031,6 +1031,7 @@ rtx aarch64_pfalse_reg (machine_mode); bool aarch64_sve_same_pred_for_ptest_p (rtx *, rtx *); rtx aarch64_sve_packed_pred (machine_mode); rtx aarch64_sve_fp_pred (machine_mode, rtx *); +rtx aarch64_sve_emit_masked_fp_pred (machine_mode, rtx); void aarch64_emit_load_store_through_mode (rtx, rtx, machine_mode); bool aarch64_expand_maskloadstore (rtx *, machine_mode); void aarch64_emit_sve_pred_move (rtx, rtx, rtx); diff --git a/gcc/config/aarch64/aarch64-sve-builtins-sme.def b/gcc/config/aarch64/aarch64-sve-builtins-sme.def index 8e6aadc..117b70e 100644 --- a/gcc/config/aarch64/aarch64-sve-builtins-sme.def +++ b/gcc/config/aarch64/aarch64-sve-builtins-sme.def @@ -92,7 +92,8 @@ DEF_SME_FUNCTION (svstr_zt, str_zt, none, none) DEF_SME_FUNCTION (svzero_zt, inherent_zt, none, none) #undef REQUIRED_EXTENSIONS -#define REQUIRED_EXTENSIONS streaming_only (AARCH64_FL_SME2 && AARCH64_FL_FAMINMAX) +#define REQUIRED_EXTENSIONS streaming_only (AARCH64_FL_SME2 \ + | AARCH64_FL_FAMINMAX) DEF_SME_FUNCTION_GS (svamin, binary_opt_single_n, all_float, x24, none) DEF_SME_FUNCTION_GS (svamax, binary_opt_single_n, all_float, x24, none) #undef REQUIRED_EXTENSIONS diff --git a/gcc/config/aarch64/aarch64-sve-builtins.cc b/gcc/config/aarch64/aarch64-sve-builtins.cc index 2b627a9..01833a8 100644 --- a/gcc/config/aarch64/aarch64-sve-builtins.cc +++ b/gcc/config/aarch64/aarch64-sve-builtins.cc @@ -4004,7 +4004,8 @@ rtx function_expander::get_reg_target () { machine_mode target_mode = result_mode (); - if (!possible_target || GET_MODE (possible_target) != target_mode) + if (!possible_target + || !register_operand (possible_target, target_mode)) possible_target = gen_reg_rtx (target_mode); return possible_target; } diff --git a/gcc/config/aarch64/aarch64-sve.md b/gcc/config/aarch64/aarch64-sve.md index b252eef..80a3288 100644 --- a/gcc/config/aarch64/aarch64-sve.md +++ b/gcc/config/aarch64/aarch64-sve.md @@ -5605,18 +5605,21 @@ ;; Predicated floating-point operations with merging. (define_expand "@cond_<optab><mode>" - [(set (match_operand:SVE_FULL_F_B16B16 0 "register_operand") - (unspec:SVE_FULL_F_B16B16 + [(set (match_operand:SVE_F_B16B16 0 "register_operand") + (unspec:SVE_F_B16B16 [(match_operand:<VPRED> 1 "register_operand") - (unspec:SVE_FULL_F_B16B16 + (unspec:SVE_F_B16B16 [(match_dup 1) (const_int SVE_STRICT_GP) - (match_operand:SVE_FULL_F_B16B16 2 "<sve_pred_fp_rhs1_operand>") - (match_operand:SVE_FULL_F_B16B16 3 "<sve_pred_fp_rhs2_operand>")] + (match_operand:SVE_F_B16B16 2 "<sve_pred_fp_rhs1_operand>") + (match_operand:SVE_F_B16B16 3 "<sve_pred_fp_rhs2_operand>")] SVE_COND_FP_BINARY) - (match_operand:SVE_FULL_F_B16B16 4 "aarch64_simd_reg_or_zero")] + (match_operand:SVE_F_B16B16 4 "aarch64_simd_reg_or_zero")] UNSPEC_SEL))] "TARGET_SVE && (<supports_bf16> || !<is_bf16>)" + { + operands[1] = aarch64_sve_emit_masked_fp_pred (<MODE>mode, operands[1]); + } ) ;; Predicated floating-point operations, merging with the first input. @@ -5644,14 +5647,14 @@ ) (define_insn "*cond_<optab><mode>_2_strict" - [(set (match_operand:SVE_FULL_F_B16B16 0 "register_operand") - (unspec:SVE_FULL_F_B16B16 - [(match_operand:<VPRED> 1 "register_operand") - (unspec:SVE_FULL_F_B16B16 + [(set (match_operand:SVE_F_B16B16 0 "register_operand") + (unspec:SVE_F_B16B16 + [(match_operand:<VPRED> 1 "aarch64_predicate_operand") + (unspec:SVE_F_B16B16 [(match_dup 1) (const_int SVE_STRICT_GP) - (match_operand:SVE_FULL_F_B16B16 2 "register_operand") - (match_operand:SVE_FULL_F_B16B16 3 "register_operand")] + (match_operand:SVE_F_B16B16 2 "register_operand") + (match_operand:SVE_F_B16B16 3 "register_operand")] SVE_COND_FP_BINARY) (match_dup 2)] UNSPEC_SEL))] @@ -5687,14 +5690,14 @@ ) (define_insn "*cond_<optab><mode>_2_const_strict" - [(set (match_operand:SVE_FULL_F 0 "register_operand") - (unspec:SVE_FULL_F - [(match_operand:<VPRED> 1 "register_operand") - (unspec:SVE_FULL_F + [(set (match_operand:SVE_F 0 "register_operand") + (unspec:SVE_F + [(match_operand:<VPRED> 1 "aarch64_predicate_operand") + (unspec:SVE_F [(match_dup 1) (const_int SVE_STRICT_GP) - (match_operand:SVE_FULL_F 2 "register_operand") - (match_operand:SVE_FULL_F 3 "<sve_pred_fp_rhs2_immediate>")] + (match_operand:SVE_F 2 "register_operand") + (match_operand:SVE_F 3 "<sve_pred_fp_rhs2_immediate>")] SVE_COND_FP_BINARY_I1) (match_dup 2)] UNSPEC_SEL))] @@ -5730,14 +5733,14 @@ ) (define_insn "*cond_<optab><mode>_3_strict" - [(set (match_operand:SVE_FULL_F_B16B16 0 "register_operand") - (unspec:SVE_FULL_F_B16B16 - [(match_operand:<VPRED> 1 "register_operand") - (unspec:SVE_FULL_F_B16B16 + [(set (match_operand:SVE_F_B16B16 0 "register_operand") + (unspec:SVE_F_B16B16 + [(match_operand:<VPRED> 1 "aarch64_predicate_operand") + (unspec:SVE_F_B16B16 [(match_dup 1) (const_int SVE_STRICT_GP) - (match_operand:SVE_FULL_F_B16B16 2 "register_operand") - (match_operand:SVE_FULL_F_B16B16 3 "register_operand")] + (match_operand:SVE_F_B16B16 2 "register_operand") + (match_operand:SVE_F_B16B16 3 "register_operand")] SVE_COND_FP_BINARY) (match_dup 3)] UNSPEC_SEL))] @@ -5794,16 +5797,16 @@ ) (define_insn_and_rewrite "*cond_<optab><mode>_any_strict" - [(set (match_operand:SVE_FULL_F_B16B16 0 "register_operand") - (unspec:SVE_FULL_F_B16B16 - [(match_operand:<VPRED> 1 "register_operand") - (unspec:SVE_FULL_F_B16B16 + [(set (match_operand:SVE_F_B16B16 0 "register_operand") + (unspec:SVE_F_B16B16 + [(match_operand:<VPRED> 1 "aarch64_predicate_operand") + (unspec:SVE_F_B16B16 [(match_dup 1) (const_int SVE_STRICT_GP) - (match_operand:SVE_FULL_F_B16B16 2 "register_operand") - (match_operand:SVE_FULL_F_B16B16 3 "register_operand")] + (match_operand:SVE_F_B16B16 2 "register_operand") + (match_operand:SVE_F_B16B16 3 "register_operand")] SVE_COND_FP_BINARY) - (match_operand:SVE_FULL_F_B16B16 4 "aarch64_simd_reg_or_zero")] + (match_operand:SVE_F_B16B16 4 "aarch64_simd_reg_or_zero")] UNSPEC_SEL))] "TARGET_SVE && (<supports_bf16> || !<is_bf16>) @@ -5868,16 +5871,16 @@ ) (define_insn_and_rewrite "*cond_<optab><mode>_any_const_strict" - [(set (match_operand:SVE_FULL_F 0 "register_operand") - (unspec:SVE_FULL_F - [(match_operand:<VPRED> 1 "register_operand") - (unspec:SVE_FULL_F + [(set (match_operand:SVE_F 0 "register_operand") + (unspec:SVE_F + [(match_operand:<VPRED> 1 "aarch64_predicate_operand") + (unspec:SVE_F [(match_dup 1) (const_int SVE_STRICT_GP) - (match_operand:SVE_FULL_F 2 "register_operand") - (match_operand:SVE_FULL_F 3 "<sve_pred_fp_rhs2_immediate>")] + (match_operand:SVE_F 2 "register_operand") + (match_operand:SVE_F 3 "<sve_pred_fp_rhs2_immediate>")] SVE_COND_FP_BINARY_I1) - (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero")] + (match_operand:SVE_F 4 "aarch64_simd_reg_or_zero")] UNSPEC_SEL))] "TARGET_SVE && !rtx_equal_p (operands[2], operands[4])" {@ [ cons: =0 , 1 , 2 , 4 ] @@ -5953,14 +5956,14 @@ ) (define_insn "*cond_add<mode>_2_const_strict" - [(set (match_operand:SVE_FULL_F 0 "register_operand") - (unspec:SVE_FULL_F - [(match_operand:<VPRED> 1 "register_operand") - (unspec:SVE_FULL_F + [(set (match_operand:SVE_F 0 "register_operand") + (unspec:SVE_F + [(match_operand:<VPRED> 1 "aarch64_predicate_operand") + (unspec:SVE_F [(match_dup 1) (const_int SVE_STRICT_GP) - (match_operand:SVE_FULL_F 2 "register_operand") - (match_operand:SVE_FULL_F 3 "aarch64_sve_float_arith_with_sub_immediate")] + (match_operand:SVE_F 2 "register_operand") + (match_operand:SVE_F 3 "aarch64_sve_float_arith_with_sub_immediate")] UNSPEC_COND_FADD) (match_dup 2)] UNSPEC_SEL))] @@ -6015,16 +6018,16 @@ ) (define_insn_and_rewrite "*cond_add<mode>_any_const_strict" - [(set (match_operand:SVE_FULL_F 0 "register_operand") - (unspec:SVE_FULL_F - [(match_operand:<VPRED> 1 "register_operand") - (unspec:SVE_FULL_F + [(set (match_operand:SVE_F 0 "register_operand") + (unspec:SVE_F + [(match_operand:<VPRED> 1 "aarch64_predicate_operand") + (unspec:SVE_F [(match_dup 1) (const_int SVE_STRICT_GP) - (match_operand:SVE_FULL_F 2 "register_operand") - (match_operand:SVE_FULL_F 3 "aarch64_sve_float_arith_with_sub_immediate")] + (match_operand:SVE_F 2 "register_operand") + (match_operand:SVE_F 3 "aarch64_sve_float_arith_with_sub_immediate")] UNSPEC_COND_FADD) - (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero")] + (match_operand:SVE_F 4 "aarch64_simd_reg_or_zero")] UNSPEC_SEL))] "TARGET_SVE && !rtx_equal_p (operands[2], operands[4])" {@ [ cons: =0 , 1 , 2 , 3 , 4 ] @@ -6266,14 +6269,14 @@ ) (define_insn "*cond_sub<mode>_3_const_strict" - [(set (match_operand:SVE_FULL_F 0 "register_operand") - (unspec:SVE_FULL_F - [(match_operand:<VPRED> 1 "register_operand") - (unspec:SVE_FULL_F + [(set (match_operand:SVE_F 0 "register_operand") + (unspec:SVE_F + [(match_operand:<VPRED> 1 "aarch64_predicate_operand") + (unspec:SVE_F [(match_dup 1) (const_int SVE_STRICT_GP) - (match_operand:SVE_FULL_F 2 "aarch64_sve_float_arith_immediate") - (match_operand:SVE_FULL_F 3 "register_operand")] + (match_operand:SVE_F 2 "aarch64_sve_float_arith_immediate") + (match_operand:SVE_F 3 "register_operand")] UNSPEC_COND_FSUB) (match_dup 3)] UNSPEC_SEL))] @@ -6323,16 +6326,16 @@ ) (define_insn_and_rewrite "*cond_sub<mode>_const_strict" - [(set (match_operand:SVE_FULL_F 0 "register_operand") - (unspec:SVE_FULL_F - [(match_operand:<VPRED> 1 "register_operand") - (unspec:SVE_FULL_F + [(set (match_operand:SVE_F 0 "register_operand") + (unspec:SVE_F + [(match_operand:<VPRED> 1 "aarch64_predicate_operand") + (unspec:SVE_F [(match_dup 1) (const_int SVE_STRICT_GP) - (match_operand:SVE_FULL_F 2 "aarch64_sve_float_arith_immediate") - (match_operand:SVE_FULL_F 3 "register_operand")] + (match_operand:SVE_F 2 "aarch64_sve_float_arith_immediate") + (match_operand:SVE_F 3 "register_operand")] UNSPEC_COND_FSUB) - (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero")] + (match_operand:SVE_F 4 "aarch64_simd_reg_or_zero")] UNSPEC_SEL))] "TARGET_SVE && !rtx_equal_p (operands[3], operands[4])" {@ [ cons: =0 , 1 , 3 , 4 ] @@ -6913,7 +6916,7 @@ ;; Predicate AND. We can reuse one of the inputs as the GP. ;; Doubling the second operand is the preferred implementation ;; of the MOV alias, so we use that instead of %1/z, %1, %2. -(define_insn "and<mode>3" +(define_insn "@and<mode>3" [(set (match_operand:PRED_ALL 0 "register_operand") (and:PRED_ALL (match_operand:PRED_ALL 1 "register_operand") (match_operand:PRED_ALL 2 "register_operand")))] @@ -7595,29 +7598,29 @@ ;; Unpredicated floating-point ternary operations. (define_expand "<optab><mode>4" - [(set (match_operand:SVE_FULL_F_B16B16 0 "register_operand") - (unspec:SVE_FULL_F_B16B16 + [(set (match_operand:SVE_F_B16B16 0 "register_operand") + (unspec:SVE_F_B16B16 [(match_dup 4) - (const_int SVE_RELAXED_GP) - (match_operand:SVE_FULL_F_B16B16 1 "register_operand") - (match_operand:SVE_FULL_F_B16B16 2 "register_operand") - (match_operand:SVE_FULL_F_B16B16 3 "register_operand")] + (match_dup 5) + (match_operand:SVE_F_B16B16 1 "register_operand") + (match_operand:SVE_F_B16B16 2 "register_operand") + (match_operand:SVE_F_B16B16 3 "register_operand")] SVE_COND_FP_TERNARY))] "TARGET_SVE && (<supports_bf16> || !<is_bf16>)" { - operands[4] = aarch64_ptrue_reg (<VPRED>mode); + operands[4] = aarch64_sve_fp_pred (<MODE>mode, &operands[5]); } ) ;; Predicated floating-point ternary operations. (define_insn "@aarch64_pred_<optab><mode>" - [(set (match_operand:SVE_FULL_F_B16B16 0 "register_operand") - (unspec:SVE_FULL_F_B16B16 - [(match_operand:<VPRED> 1 "register_operand") + [(set (match_operand:SVE_F_B16B16 0 "register_operand") + (unspec:SVE_F_B16B16 + [(match_operand:<VPRED> 1 "aarch64_predicate_operand") (match_operand:SI 5 "aarch64_sve_gp_strictness") - (match_operand:SVE_FULL_F_B16B16 2 "register_operand") - (match_operand:SVE_FULL_F_B16B16 3 "register_operand") - (match_operand:SVE_FULL_F_B16B16 4 "register_operand")] + (match_operand:SVE_F_B16B16 2 "register_operand") + (match_operand:SVE_F_B16B16 3 "register_operand") + (match_operand:SVE_F_B16B16 4 "register_operand")] SVE_COND_FP_TERNARY))] "TARGET_SVE && (<supports_bf16> || !<is_bf16>)" {@ [ cons: =0 , 1 , %2 , 3 , 4 ; attrs: movprfx , is_rev ] @@ -7631,17 +7634,17 @@ ;; Predicated floating-point ternary operations with merging. (define_expand "@cond_<optab><mode>" - [(set (match_operand:SVE_FULL_F_B16B16 0 "register_operand") - (unspec:SVE_FULL_F_B16B16 + [(set (match_operand:SVE_F_B16B16 0 "register_operand") + (unspec:SVE_F_B16B16 [(match_operand:<VPRED> 1 "register_operand") - (unspec:SVE_FULL_F_B16B16 + (unspec:SVE_F_B16B16 [(match_dup 1) (const_int SVE_STRICT_GP) - (match_operand:SVE_FULL_F_B16B16 2 "register_operand") - (match_operand:SVE_FULL_F_B16B16 3 "register_operand") - (match_operand:SVE_FULL_F_B16B16 4 "register_operand")] + (match_operand:SVE_F_B16B16 2 "register_operand") + (match_operand:SVE_F_B16B16 3 "register_operand") + (match_operand:SVE_F_B16B16 4 "register_operand")] SVE_COND_FP_TERNARY) - (match_operand:SVE_FULL_F_B16B16 5 "aarch64_simd_reg_or_zero")] + (match_operand:SVE_F_B16B16 5 "aarch64_simd_reg_or_zero")] UNSPEC_SEL))] "TARGET_SVE && (<supports_bf16> || !<is_bf16>)" { @@ -7649,20 +7652,22 @@ second of the two. */ if (rtx_equal_p (operands[3], operands[5])) std::swap (operands[2], operands[3]); + + operands[1] = aarch64_sve_emit_masked_fp_pred (<MODE>mode, operands[1]); }) ;; Predicated floating-point ternary operations, merging with the ;; first input. (define_insn_and_rewrite "*cond_<optab><mode>_2_relaxed" - [(set (match_operand:SVE_FULL_F 0 "register_operand") - (unspec:SVE_FULL_F + [(set (match_operand:SVE_F 0 "register_operand") + (unspec:SVE_F [(match_operand:<VPRED> 1 "register_operand") - (unspec:SVE_FULL_F + (unspec:SVE_F [(match_operand 5) (const_int SVE_RELAXED_GP) - (match_operand:SVE_FULL_F 2 "register_operand") - (match_operand:SVE_FULL_F 3 "register_operand") - (match_operand:SVE_FULL_F 4 "register_operand")] + (match_operand:SVE_F 2 "register_operand") + (match_operand:SVE_F 3 "register_operand") + (match_operand:SVE_F 4 "register_operand")] SVE_COND_FP_TERNARY) (match_dup 2)] UNSPEC_SEL))] @@ -7678,15 +7683,15 @@ ) (define_insn "*cond_<optab><mode>_2_strict" - [(set (match_operand:SVE_FULL_F 0 "register_operand") - (unspec:SVE_FULL_F - [(match_operand:<VPRED> 1 "register_operand") - (unspec:SVE_FULL_F + [(set (match_operand:SVE_F 0 "register_operand") + (unspec:SVE_F + [(match_operand:<VPRED> 1 "aarch64_predicate_operand") + (unspec:SVE_F [(match_dup 1) (const_int SVE_STRICT_GP) - (match_operand:SVE_FULL_F 2 "register_operand") - (match_operand:SVE_FULL_F 3 "register_operand") - (match_operand:SVE_FULL_F 4 "register_operand")] + (match_operand:SVE_F 2 "register_operand") + (match_operand:SVE_F 3 "register_operand") + (match_operand:SVE_F 4 "register_operand")] SVE_COND_FP_TERNARY) (match_dup 2)] UNSPEC_SEL))] @@ -7700,15 +7705,15 @@ ;; Predicated floating-point ternary operations, merging with the ;; third input. (define_insn_and_rewrite "*cond_<optab><mode>_4_relaxed" - [(set (match_operand:SVE_FULL_F_B16B16 0 "register_operand") - (unspec:SVE_FULL_F_B16B16 + [(set (match_operand:SVE_F_B16B16 0 "register_operand") + (unspec:SVE_F_B16B16 [(match_operand:<VPRED> 1 "register_operand") - (unspec:SVE_FULL_F_B16B16 + (unspec:SVE_F_B16B16 [(match_operand 5) (const_int SVE_RELAXED_GP) - (match_operand:SVE_FULL_F_B16B16 2 "register_operand") - (match_operand:SVE_FULL_F_B16B16 3 "register_operand") - (match_operand:SVE_FULL_F_B16B16 4 "register_operand")] + (match_operand:SVE_F_B16B16 2 "register_operand") + (match_operand:SVE_F_B16B16 3 "register_operand") + (match_operand:SVE_F_B16B16 4 "register_operand")] SVE_COND_FP_TERNARY) (match_dup 4)] UNSPEC_SEL))] @@ -7724,15 +7729,15 @@ ) (define_insn "*cond_<optab><mode>_4_strict" - [(set (match_operand:SVE_FULL_F_B16B16 0 "register_operand") - (unspec:SVE_FULL_F_B16B16 - [(match_operand:<VPRED> 1 "register_operand") - (unspec:SVE_FULL_F_B16B16 + [(set (match_operand:SVE_F_B16B16 0 "register_operand") + (unspec:SVE_F_B16B16 + [(match_operand:<VPRED> 1 "aarch64_predicate_operand") + (unspec:SVE_F_B16B16 [(match_dup 1) (const_int SVE_STRICT_GP) - (match_operand:SVE_FULL_F_B16B16 2 "register_operand") - (match_operand:SVE_FULL_F_B16B16 3 "register_operand") - (match_operand:SVE_FULL_F_B16B16 4 "register_operand")] + (match_operand:SVE_F_B16B16 2 "register_operand") + (match_operand:SVE_F_B16B16 3 "register_operand") + (match_operand:SVE_F_B16B16 4 "register_operand")] SVE_COND_FP_TERNARY) (match_dup 4)] UNSPEC_SEL))] @@ -7746,17 +7751,17 @@ ;; Predicated floating-point ternary operations, merging with an ;; independent value. (define_insn_and_rewrite "*cond_<optab><mode>_any_relaxed" - [(set (match_operand:SVE_FULL_F_B16B16 0 "register_operand") - (unspec:SVE_FULL_F_B16B16 + [(set (match_operand:SVE_F_B16B16 0 "register_operand") + (unspec:SVE_F_B16B16 [(match_operand:<VPRED> 1 "register_operand") - (unspec:SVE_FULL_F_B16B16 + (unspec:SVE_F_B16B16 [(match_operand 6) (const_int SVE_RELAXED_GP) - (match_operand:SVE_FULL_F_B16B16 2 "register_operand") - (match_operand:SVE_FULL_F_B16B16 3 "register_operand") - (match_operand:SVE_FULL_F_B16B16 4 "register_operand")] + (match_operand:SVE_F_B16B16 2 "register_operand") + (match_operand:SVE_F_B16B16 3 "register_operand") + (match_operand:SVE_F_B16B16 4 "register_operand")] SVE_COND_FP_TERNARY) - (match_operand:SVE_FULL_F_B16B16 5 "aarch64_simd_reg_or_zero")] + (match_operand:SVE_F_B16B16 5 "aarch64_simd_reg_or_zero")] UNSPEC_SEL))] "TARGET_SVE && (<supports_bf16> || !<is_bf16>) @@ -7792,17 +7797,17 @@ ) (define_insn_and_rewrite "*cond_<optab><mode>_any_strict" - [(set (match_operand:SVE_FULL_F_B16B16 0 "register_operand") - (unspec:SVE_FULL_F_B16B16 - [(match_operand:<VPRED> 1 "register_operand") - (unspec:SVE_FULL_F_B16B16 + [(set (match_operand:SVE_F_B16B16 0 "register_operand") + (unspec:SVE_F_B16B16 + [(match_operand:<VPRED> 1 "aarch64_predicate_operand") + (unspec:SVE_F_B16B16 [(match_dup 1) (const_int SVE_STRICT_GP) - (match_operand:SVE_FULL_F_B16B16 2 "register_operand") - (match_operand:SVE_FULL_F_B16B16 3 "register_operand") - (match_operand:SVE_FULL_F_B16B16 4 "register_operand")] + (match_operand:SVE_F_B16B16 2 "register_operand") + (match_operand:SVE_F_B16B16 3 "register_operand") + (match_operand:SVE_F_B16B16 4 "register_operand")] SVE_COND_FP_TERNARY) - (match_operand:SVE_FULL_F_B16B16 5 "aarch64_simd_reg_or_zero")] + (match_operand:SVE_F_B16B16 5 "aarch64_simd_reg_or_zero")] UNSPEC_SEL))] "TARGET_SVE && (<supports_bf16> || !<is_bf16>) @@ -8201,20 +8206,23 @@ ;; ;; For unpacked vectors, it doesn't really matter whether SEL uses the ;; the container size or the element size. If SEL used the container size, -;; it would ignore undefined bits of the predicate but would copy the -;; upper (undefined) bits of each container along with the defined bits. -;; If SEL used the element size, it would use undefined bits of the predicate -;; to select between undefined elements in each input vector. Thus the only -;; difference is whether the undefined bits in a container always come from -;; the same input as the defined bits, or whether the choice can vary -;; independently of the defined bits. +;; it would would copy the upper (undefined) bits of each container along +;; with the corresponding defined bits. If SEL used the element size, +;; it would use separate predicate bits to select between the undefined +;; elements in each input vector; these seperate predicate bits might +;; themselves be undefined, depending on the mode of the predicate. +;; +;; Thus the only difference is whether the undefined bits in a container +;; always come from the same input as the defined bits, or whether the +;; choice can vary independently of the defined bits. ;; ;; For the other instructions, using the element size is more natural, ;; so we do that for SEL as well. +;; (define_insn "*vcond_mask_<mode><vpred>" [(set (match_operand:SVE_ALL 0 "register_operand") (unspec:SVE_ALL - [(match_operand:<VPRED> 3 "register_operand") + [(match_operand:<VPRED> 3 "aarch64_predicate_operand") (match_operand:SVE_ALL 1 "aarch64_sve_reg_or_dup_imm") (match_operand:SVE_ALL 2 "aarch64_simd_reg_or_zero")] UNSPEC_SEL))] diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc index cb1699a..5502d0b 100644 --- a/gcc/config/aarch64/aarch64.cc +++ b/gcc/config/aarch64/aarch64.cc @@ -3933,6 +3933,33 @@ aarch64_sve_fp_pred (machine_mode data_mode, rtx *strictness) return aarch64_ptrue_reg (aarch64_sve_pred_mode (data_mode)); } +/* PRED is a predicate that governs an operation on DATA_MODE. If DATA_MODE + is a partial vector mode, and if exceptions must be suppressed for its + undefined elements, convert PRED from a container-level predicate to + an element-level predicate and ensure that the undefined elements + are inactive. Make no changes otherwise. + + Return the resultant predicate. */ +rtx +aarch64_sve_emit_masked_fp_pred (machine_mode data_mode, rtx pred) +{ + unsigned int vec_flags = aarch64_classify_vector_mode (data_mode); + if (flag_trapping_math && (vec_flags & VEC_PARTIAL)) + { + /* Generate an element-level mask. */ + rtx mask = aarch64_sve_packed_pred (data_mode); + machine_mode pmode = GET_MODE (mask); + + /* Apply the existing predicate. */ + rtx dst = gen_reg_rtx (pmode); + emit_insn (gen_and3 (pmode, dst, mask, + gen_lowpart (pmode, pred))); + return dst; + } + + return pred; +} + /* Emit a comparison CMP between OP0 and OP1, both of which have mode DATA_MODE, and return the result in a predicate of mode PRED_MODE. Use TARGET as the target register if nonnull and convenient. */ @@ -31964,9 +31991,43 @@ aarch64_test_sysreg_encoding_clashes (void) static void aarch64_test_sve_folding () { + aarch64_target_switcher switcher (AARCH64_FL_SVE); + tree res = fold_unary (BIT_NOT_EXPR, ssizetype, ssize_int (poly_int64 (1, 1))); ASSERT_TRUE (operand_equal_p (res, ssize_int (poly_int64 (-2, -1)))); + + auto build_v16bi = [](bool a, bool b) + { + rtx_vector_builder builder (VNx16BImode, 2, 1); + builder.quick_push (a ? const1_rtx : const0_rtx); + builder.quick_push (b ? const1_rtx : const0_rtx); + return builder.build (); + }; + rtx v16bi_10 = build_v16bi (1, 0); + rtx v16bi_01 = build_v16bi (0, 1); + + for (auto mode : { VNx8BImode, VNx4BImode, VNx2BImode }) + { + rtx reg = gen_rtx_REG (mode, LAST_VIRTUAL_REGISTER + 1); + rtx subreg = lowpart_subreg (VNx16BImode, reg, mode); + rtx and1 = simplify_gen_binary (AND, VNx16BImode, subreg, v16bi_10); + ASSERT_EQ (lowpart_subreg (mode, and1, VNx16BImode), reg); + rtx and0 = simplify_gen_binary (AND, VNx16BImode, subreg, v16bi_01); + ASSERT_EQ (lowpart_subreg (mode, and0, VNx16BImode), CONST0_RTX (mode)); + + rtx ior1 = simplify_gen_binary (IOR, VNx16BImode, subreg, v16bi_10); + ASSERT_EQ (lowpart_subreg (mode, ior1, VNx16BImode), CONSTM1_RTX (mode)); + rtx ior0 = simplify_gen_binary (IOR, VNx16BImode, subreg, v16bi_01); + ASSERT_EQ (lowpart_subreg (mode, ior0, VNx16BImode), reg); + + rtx xor1 = simplify_gen_binary (XOR, VNx16BImode, subreg, v16bi_10); + ASSERT_RTX_EQ (lowpart_subreg (mode, xor1, VNx16BImode), + lowpart_subreg (mode, gen_rtx_NOT (VNx16BImode, subreg), + VNx16BImode)); + rtx xor0 = simplify_gen_binary (XOR, VNx16BImode, subreg, v16bi_01); + ASSERT_EQ (lowpart_subreg (mode, xor0, VNx16BImode), reg); + } } /* Run all target-specific selftests. */ diff --git a/gcc/config/gcn/gcn.cc b/gcc/config/gcn/gcn.cc index 557568c..5ffeb23 100644 --- a/gcc/config/gcn/gcn.cc +++ b/gcc/config/gcn/gcn.cc @@ -54,6 +54,7 @@ #include "gimple.h" #include "cgraph.h" #include "case-cfn-macros.h" +#include "opts.h" /* This file should be included last. */ #include "target-def.h" @@ -183,6 +184,11 @@ gcn_option_override (void) if (flag_sram_ecc == HSACO_ATTR_DEFAULT) flag_sram_ecc = gcn_devices[gcn_arch].sramecc_default; + + /* TODO: This seems to produce tighter loops, but the testsuites expects it + to be set to '2', so I'll leave it default for now. + SET_OPTION_IF_UNSET (&global_options, &global_options_set, + param_vect_partial_vector_usage, 1); */ } /* }}} */ @@ -5789,6 +5795,16 @@ gcn_libc_has_function (enum function_class fn_class, return bsd_libc_has_function (fn_class, type); } +/* Implement TARGET_VECTORIZE_PREFER_GATHER_SCATTER. */ + +static bool +gcn_prefer_gather_scatter (machine_mode ARG_UNUSED (mode), + int ARG_UNUSED (scale), + unsigned int ARG_UNUSED (group_size)) +{ + return true; +} + /* }}} */ /* {{{ md_reorg pass. */ @@ -8140,6 +8156,8 @@ gcn_dwarf_register_span (rtx rtl) gcn_vectorize_builtin_vectorized_function #undef TARGET_VECTORIZE_GET_MASK_MODE #define TARGET_VECTORIZE_GET_MASK_MODE gcn_vectorize_get_mask_mode +#undef TARGET_VECTORIZE_PREFER_GATHER_SCATTER +#define TARGET_VECTORIZE_PREFER_GATHER_SCATTER gcn_prefer_gather_scatter #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE gcn_vectorize_preferred_simd_mode #undef TARGET_VECTORIZE_PREFERRED_VECTOR_ALIGNMENT diff --git a/gcc/config/i386/i386-modes.def b/gcc/config/i386/i386-modes.def index 2fedbeb..c2db305 100644 --- a/gcc/config/i386/i386-modes.def +++ b/gcc/config/i386/i386-modes.def @@ -91,7 +91,6 @@ VECTOR_MODES (FLOAT, 16); /* V8HF V4SF V2DF */ VECTOR_MODES (FLOAT, 32); /* V16HF V8SF V4DF V2TF */ VECTOR_MODES (FLOAT, 64); /* V32HF V16SF V8DF V4TF */ VECTOR_MODES (FLOAT, 128); /* V64HF V32SF V16DF V8TF */ -VECTOR_MODES (FLOAT, 256); /* V128HF V64SF V32DF V16TF */ VECTOR_MODE (FLOAT, HF, 2); /* V2HF */ VECTOR_MODE (FLOAT, BF, 2); /* V2BF */ VECTOR_MODE (FLOAT, HF, 6); /* V6HF */ @@ -102,7 +101,6 @@ VECTOR_MODE (INT, QI, 2); /* V2QI */ VECTOR_MODE (INT, QI, 12); /* V12QI */ VECTOR_MODE (INT, QI, 14); /* V14QI */ VECTOR_MODE (INT, HI, 6); /* V6HI */ -VECTOR_MODE (INT, SI, 64); /* V64SI */ INT_MODE (OI, 32); INT_MODE (XI, 64); diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc index 0f0acae..613f2b2 100644 --- a/gcc/config/i386/i386.cc +++ b/gcc/config/i386/i386.cc @@ -21513,8 +21513,7 @@ ix86_hard_regno_nregs (unsigned int regno, machine_mode mode) /* Register pair for mask registers. */ if (mode == P2QImode || mode == P2HImode) return 2; - if (mode == V64SFmode || mode == V64SImode) - return 4; + return 1; } @@ -25233,20 +25232,14 @@ asm_preferred_eh_data_format (int code, int global) return DW_EH_PE_absptr; } -/* Implement targetm.vectorize.builtin_vectorization_cost. */ +/* Worker for ix86_builtin_vectorization_cost and the fallback calls + from ix86_vector_costs::add_stmt_cost. */ static int -ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost, - tree vectype, int) +ix86_default_vector_cost (enum vect_cost_for_stmt type_of_cost, + machine_mode mode) { - bool fp = false; - machine_mode mode = TImode; + bool fp = FLOAT_MODE_P (mode); int index; - if (vectype != NULL) - { - fp = FLOAT_TYPE_P (vectype); - mode = TYPE_MODE (vectype); - } - switch (type_of_cost) { case scalar_stmt: @@ -25305,14 +25298,14 @@ ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost, COSTS_N_INSNS (ix86_cost->gather_static + ix86_cost->gather_per_elt - * TYPE_VECTOR_SUBPARTS (vectype)) / 2); + * GET_MODE_NUNITS (mode)) / 2); case vector_scatter_store: return ix86_vec_cost (mode, COSTS_N_INSNS (ix86_cost->scatter_static + ix86_cost->scatter_per_elt - * TYPE_VECTOR_SUBPARTS (vectype)) / 2); + * GET_MODE_NUNITS (mode)) / 2); case cond_branch_taken: return ix86_cost->cond_taken_branch_cost; @@ -25330,7 +25323,7 @@ ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost, case vec_construct: { - int n = TYPE_VECTOR_SUBPARTS (vectype); + int n = GET_MODE_NUNITS (mode); /* N - 1 element inserts into an SSE vector, the possible GPR -> XMM move is accounted for in add_stmt_cost. */ if (GET_MODE_BITSIZE (mode) <= 128) @@ -25358,6 +25351,17 @@ ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost, } } +/* Implement targetm.vectorize.builtin_vectorization_cost. */ +static int +ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost, + tree vectype, int) +{ + machine_mode mode = TImode; + if (vectype != NULL) + mode = TYPE_MODE (vectype); + return ix86_default_vector_cost (type_of_cost, mode); +} + /* This function returns the calling abi specific va_list type node. It returns the FNDECL specific va_list type. */ @@ -25811,7 +25815,7 @@ ix86_vectorize_create_costs (vec_info *vinfo, bool costing_for_scalar) unsigned ix86_vector_costs::add_stmt_cost (int count, vect_cost_for_stmt kind, stmt_vec_info stmt_info, slp_tree node, - tree vectype, int misalign, + tree vectype, int, vect_cost_model_location where) { unsigned retval = 0; @@ -26160,14 +26164,14 @@ ix86_vector_costs::add_stmt_cost (int count, vect_cost_for_stmt kind, || (SLP_TREE_MEMORY_ACCESS_TYPE (node) == VMAT_GATHER_SCATTER))))) { - stmt_cost = ix86_builtin_vectorization_cost (kind, vectype, misalign); + stmt_cost = ix86_default_vector_cost (kind, mode); stmt_cost *= (TYPE_VECTOR_SUBPARTS (vectype) + 1); } else if ((kind == vec_construct || kind == scalar_to_vec) && node && SLP_TREE_DEF_TYPE (node) == vect_external_def) { - stmt_cost = ix86_builtin_vectorization_cost (kind, vectype, misalign); + stmt_cost = ix86_default_vector_cost (kind, mode); unsigned i; tree op; FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_OPS (node), i, op) @@ -26231,7 +26235,7 @@ ix86_vector_costs::add_stmt_cost (int count, vect_cost_for_stmt kind, TREE_VISITED (op) = 0; } if (stmt_cost == -1) - stmt_cost = ix86_builtin_vectorization_cost (kind, vectype, misalign); + stmt_cost = ix86_default_vector_cost (kind, mode); if (kind == vec_perm && vectype && GET_MODE_SIZE (TYPE_MODE (vectype)) == 32) diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index eb52699..a50475b 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -2968,7 +2968,8 @@ (match_operand:SWI248 1 "const_int_operand"))] "optimize_insn_for_size_p () && optimize_size > 1 && operands[1] != const0_rtx - && operands[1] != constm1_rtx + && (operands[1] != constm1_rtx + || (<MODE>mode == DImode && LEGACY_INT_REG_P (operands[0]))) && IN_RANGE (INTVAL (operands[1]), -128, 127) && !ix86_red_zone_used && REGNO (operands[0]) != SP_REG" diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index d88c3d6..ec74f93 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -21729,6 +21729,19 @@ (const_string "orig"))) (set_attr "mode" "TI,TI,TI,TI,TI,TI,V4SF,V2SF,V2SF")]) +;; Eliminate redundancy caused by +;; /* Special case TImode to 128-bit vector conversions via V2DI. */ +;; in ix86_expand_vector_move + +(define_split + [(set (match_operand:V2DI 0 "register_operand") + (vec_concat:V2DI + (subreg:DI (match_operand:TI 1 "register_operand") 0) + (subreg:DI (match_dup 1) 8)))] + "TARGET_SSE2 && ix86_pre_reload_split ()" + [(set (match_dup 0) + (subreg:V2DI (match_dup 1) 0))]) + (define_insn "*vec_concatv2di_0" [(set (match_operand:V2DI 0 "register_operand" "=v,v ,x") (vec_concat:V2DI diff --git a/gcc/config/riscv/gen-riscv-mcpu-texi.cc b/gcc/config/riscv/gen-riscv-mcpu-texi.cc new file mode 100644 index 0000000..9681438 --- /dev/null +++ b/gcc/config/riscv/gen-riscv-mcpu-texi.cc @@ -0,0 +1,43 @@ +#include <string> +#include <vector> +#include <stdio.h> + +int +main () +{ + puts ("@c Copyright (C) 2025 Free Software Foundation, Inc."); + puts ("@c This is part of the GCC manual."); + puts ("@c For copying conditions, see the file gcc/doc/include/fdl.texi."); + puts (""); + puts ("@c This file is generated automatically using"); + puts ("@c gcc/config/riscv/gen-riscv-mcpu-texi.cc from:"); + puts ("@c gcc/config/riscv/riscv-cores.def"); + puts (""); + puts ("@c Please *DO NOT* edit manually."); + puts (""); + puts ("@samp{Core Name}"); + puts (""); + puts ("@opindex mcpu"); + puts ("@item -mcpu=@var{processor-string}"); + puts ("Use architecture of and optimize the output for the given processor, specified"); + puts ("by particular CPU name. Permissible values for this option are:"); + puts (""); + puts (""); + + std::vector<std::string> coreNames; + +#define RISCV_CORE(CORE_NAME, ARCH, MICRO_ARCH) \ + coreNames.push_back (CORE_NAME); +#include "riscv-cores.def" +#undef RISCV_CORE + + for (size_t i = 0; i < coreNames.size(); ++i) { + if (i == coreNames.size() - 1) { + printf("@samp{%s}.\n", coreNames[i].c_str()); + } else { + printf("@samp{%s},\n\n", coreNames[i].c_str()); + } + } + + return 0; +} diff --git a/gcc/config/riscv/gen-riscv-mtune-texi.cc b/gcc/config/riscv/gen-riscv-mtune-texi.cc new file mode 100644 index 0000000..1bdfe2a --- /dev/null +++ b/gcc/config/riscv/gen-riscv-mtune-texi.cc @@ -0,0 +1,41 @@ +#include <string> +#include <vector> +#include <stdio.h> + +int +main () +{ + puts ("@c Copyright (C) 2025 Free Software Foundation, Inc."); + puts ("@c This is part of the GCC manual."); + puts ("@c For copying conditions, see the file gcc/doc/include/fdl.texi."); + puts (""); + puts ("@c This file is generated automatically using"); + puts ("@c gcc/config/riscv/gen-riscv-mtune-texi.cc from:"); + puts ("@c gcc/config/riscv/riscv-cores.def"); + puts (""); + puts ("@c Please *DO NOT* edit manually."); + puts (""); + puts ("@samp{Tune Name}"); + puts (""); + puts ("@opindex mtune"); + puts ("@item -mtune=@var{processor-string}"); + puts ("Optimize the output for the given processor, specified by microarchitecture or"); + puts ("particular CPU name. Permissible values for this option are:"); + puts (""); + puts (""); + + std::vector<std::string> tuneNames; + +#define RISCV_TUNE(TUNE_NAME, PIPELINE_MODEL, TUNE_INFO) \ + tuneNames.push_back (TUNE_NAME); +#include "riscv-cores.def" +#undef RISCV_TUNE + + for (size_t i = 0; i < tuneNames.size(); ++i) { + printf("@samp{%s},\n\n", tuneNames[i].c_str()); + } + + puts ("and all valid options for @option{-mcpu=}."); + + return 0; +} diff --git a/gcc/config/riscv/t-riscv b/gcc/config/riscv/t-riscv index 7aac56a..a7eaa8b 100644 --- a/gcc/config/riscv/t-riscv +++ b/gcc/config/riscv/t-riscv @@ -229,8 +229,41 @@ s-riscv-ext.texi: build/gen-riscv-ext-texi$(build_exeext) $(SHELL) $(srcdir)/../move-if-change tmp-riscv-ext.texi $(srcdir)/doc/riscv-ext.texi $(STAMP) s-riscv-ext.texi -# Run `riscv-regen' after you changed or added anything from riscv-ext*.def +RISCV_CORES_DEFS = \ + $(srcdir)/config/riscv/riscv-cores.def + +build/gen-riscv-mtune-texi.o: $(srcdir)/config/riscv/gen-riscv-mtune-texi.cc \ + $(RISCV_CORES_DEFS) + $(CXX_FOR_BUILD) $(CXXFLAGS_FOR_BUILD) -c $< -o $@ + +build/gen-riscv-mcpu-texi.o: $(srcdir)/config/riscv/gen-riscv-mcpu-texi.cc \ + $(RISCV_CORES_DEFS) + $(CXX_FOR_BUILD) $(CXXFLAGS_FOR_BUILD) -c $< -o $@ + +build/gen-riscv-mtune-texi$(build_exeext): build/gen-riscv-mtune-texi.o + $(LINKER_FOR_BUILD) $(BUILD_LINKERFLAGS) $(BUILD_LDFLAGS) -o $@ $< + +build/gen-riscv-mcpu-texi$(build_exeext): build/gen-riscv-mcpu-texi.o + $(LINKER_FOR_BUILD) $(BUILD_LINKERFLAGS) $(BUILD_LDFLAGS) -o $@ $< + +$(srcdir)/doc/riscv-mtune.texi: $(RISCV_CORES_DEFS) +$(srcdir)/doc/riscv-mtune.texi: s-riscv-mtune.texi ; @true + +$(srcdir)/doc/riscv-mcpu.texi: $(RISCV_CORES_DEFS) +$(srcdir)/doc/riscv-mcpu.texi: s-riscv-mcpu.texi ; @true + +s-riscv-mtune.texi: build/gen-riscv-mtune-texi$(build_exeext) + $(RUN_GEN) build/gen-riscv-mtune-texi$(build_exeext) > tmp-riscv-mtune.texi + $(SHELL) $(srcdir)/../move-if-change tmp-riscv-mtune.texi $(srcdir)/doc/riscv-mtune.texi + $(STAMP) s-riscv-mtune.texi + +s-riscv-mcpu.texi: build/gen-riscv-mcpu-texi$(build_exeext) + $(RUN_GEN) build/gen-riscv-mcpu-texi$(build_exeext) > tmp-riscv-mcpu.texi + $(SHELL) $(srcdir)/../move-if-change tmp-riscv-mcpu.texi $(srcdir)/doc/riscv-mcpu.texi + $(STAMP) s-riscv-mcpu.texi + +# Run `riscv-regen' after you changed or added anything from riscv-ext*.def and riscv-cores*.def .PHONY: riscv-regen -riscv-regen: s-riscv-ext.texi s-riscv-ext.opt +riscv-regen: s-riscv-ext.texi s-riscv-ext.opt s-riscv-mtune.texi s-riscv-mcpu.texi diff --git a/gcc/config/s390/s390-protos.h b/gcc/config/s390/s390-protos.h index d760a7e..6becad1 100644 --- a/gcc/config/s390/s390-protos.h +++ b/gcc/config/s390/s390-protos.h @@ -128,6 +128,8 @@ extern void s390_expand_vcond (rtx, rtx, rtx, enum rtx_code, rtx, rtx); extern void s390_expand_vec_init (rtx, rtx); extern rtx s390_expand_merge_perm_const (machine_mode, bool); extern void s390_expand_merge (rtx, rtx, rtx, bool); +extern void s390_expand_int_spaceship (rtx, rtx, rtx, rtx); +extern void s390_expand_fp_spaceship (rtx, rtx, rtx, rtx); extern rtx s390_build_signbit_mask (machine_mode); extern rtx s390_return_addr_rtx (int, rtx); extern rtx s390_back_chain_rtx (void); diff --git a/gcc/config/s390/s390.cc b/gcc/config/s390/s390.cc index abe551c..012b6db 100644 --- a/gcc/config/s390/s390.cc +++ b/gcc/config/s390/s390.cc @@ -8213,6 +8213,167 @@ s390_expand_atomic (machine_mode mode, enum rtx_code code, NULL_RTX, 1, OPTAB_DIRECT), 1); } +/* Expand integer op0 = op1 <=> op2, i.e., + op0 = op1 == op2 ? 0 : op1 < op2 ? -1 : 1. + + Signedness is specified by op3. If op3 equals 1, then perform an unsigned + comparison, and if op3 equals -1, then perform a signed comparison. + + For integer comparisons we strive for a sequence like + CR[L] ; LHI ; LOCHIL ; LOCHIH + where the first three instructions fit into a group. */ + +void +s390_expand_int_spaceship (rtx op0, rtx op1, rtx op2, rtx op3) +{ + gcc_assert (op3 == const1_rtx || op3 == constm1_rtx); + + rtx cc, cond_lt, cond_gt; + machine_mode cc_mode; + machine_mode mode = GET_MODE (op1); + + /* Prior VXE3 emulate a 128-bit comparison by breaking it up into three + comparisons. First test the high halfs. In case they equal, then test + the low halfs. Finally, test for equality. Depending on the results + make use of LOCs. */ + if (mode == TImode && !TARGET_VXE3) + { + gcc_assert (TARGET_VX); + op1 + = force_reg (V2DImode, simplify_gen_subreg (V2DImode, op1, TImode, 0)); + op2 + = force_reg (V2DImode, simplify_gen_subreg (V2DImode, op2, TImode, 0)); + rtx lab = gen_label_rtx (); + rtx ccz = gen_rtx_REG (CCZmode, CC_REGNUM); + /* Compare high halfs for equality. + VEC[L]G op1, op2 sets + CC1 if high(op1) < high(op2) + and + CC2 if high(op1) > high(op2). */ + machine_mode cc_mode = op3 == const1_rtx ? CCUmode : CCSmode; + rtx lane0 = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx)); + emit_insn (gen_rtx_SET ( + gen_rtx_REG (cc_mode, CC_REGNUM), + gen_rtx_COMPARE (cc_mode, + gen_rtx_VEC_SELECT (DImode, op1, lane0), + gen_rtx_VEC_SELECT (DImode, op2, lane0)))); + s390_emit_jump (lab, gen_rtx_NE (CCZmode, ccz, const0_rtx)); + /* At this point we know that the high halfs equal. + VCHLGS op2, op1 sets CC1 if low(op1) < low(op2) */ + emit_insn (gen_rtx_PARALLEL ( + VOIDmode, + gen_rtvec (2, + gen_rtx_SET (gen_rtx_REG (CCVIHUmode, CC_REGNUM), + gen_rtx_COMPARE (CCVIHUmode, op2, op1)), + gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (V2DImode))))); + emit_label (lab); + emit_insn (gen_rtx_SET (op0, const1_rtx)); + emit_insn ( + gen_movsicc (op0, + gen_rtx_LTU (CCUmode, gen_rtx_REG (CCUmode, CC_REGNUM), + const0_rtx), + constm1_rtx, op0)); + /* Deal with the case where both halfs equal. */ + emit_insn (gen_rtx_PARALLEL ( + VOIDmode, + gen_rtvec (2, + gen_rtx_SET (gen_rtx_REG (CCVEQmode, CC_REGNUM), + gen_rtx_COMPARE (CCVEQmode, op1, op2)), + gen_rtx_SET (gen_reg_rtx (V2DImode), + gen_rtx_EQ (V2DImode, op1, op2))))); + emit_insn (gen_movsicc (op0, gen_rtx_EQ (CCZmode, ccz, const0_rtx), + const0_rtx, op0)); + return; + } + + if (mode == QImode || mode == HImode) + { + rtx_code extend = op3 == const1_rtx ? ZERO_EXTEND : SIGN_EXTEND; + op1 = simplify_gen_unary (extend, SImode, op1, mode); + op1 = force_reg (SImode, op1); + op2 = simplify_gen_unary (extend, SImode, op2, mode); + op2 = force_reg (SImode, op2); + mode = SImode; + } + + if (op3 == const1_rtx) + { + cc_mode = CCUmode; + cc = gen_rtx_REG (cc_mode, CC_REGNUM); + cond_lt = gen_rtx_LTU (mode, cc, const0_rtx); + cond_gt = gen_rtx_GTU (mode, cc, const0_rtx); + } + else + { + cc_mode = CCSmode; + cc = gen_rtx_REG (cc_mode, CC_REGNUM); + cond_lt = gen_rtx_LT (mode, cc, const0_rtx); + cond_gt = gen_rtx_GT (mode, cc, const0_rtx); + } + + emit_insn (gen_rtx_SET (cc, gen_rtx_COMPARE (cc_mode, op1, op2))); + emit_move_insn (op0, const0_rtx); + emit_insn (gen_movsicc (op0, cond_lt, constm1_rtx, op0)); + emit_insn (gen_movsicc (op0, cond_gt, const1_rtx, op0)); +} + +/* Expand floating-point op0 = op1 <=> op2, i.e., + op0 = op1 == op2 ? 0 : op1 < op2 ? -1 : op1 > op2 ? 1 : 2. + + If op3 equals const0_rtx, then we are interested in the compare only (see + test spaceship-fp-4.c). Otherwise, op3 is a CONST_INT different than + const1_rtx and constm1_rtx which is used in order to set op0 for unordered. + + Emit a branch-only solution, i.e., let if-convert fold the branches into + LOCs if applicable. This has the benefit that the solution is also + applicable if we are only interested in the compare, i.e., if op3 equals + const0_rtx. + */ + +void +s390_expand_fp_spaceship (rtx op0, rtx op1, rtx op2, rtx op3) +{ + gcc_assert (op3 != const1_rtx && op3 != constm1_rtx); + + machine_mode mode = GET_MODE (op1); + machine_mode cc_mode = s390_select_ccmode (LTGT, op1, op2); + rtx cc_reg = gen_rtx_REG (cc_mode, CC_REGNUM); + rtx cond_unordered = gen_rtx_UNORDERED (mode, cc_reg, const0_rtx); + rtx cond_eq = gen_rtx_EQ (mode, cc_reg, const0_rtx); + rtx cond_gt = gen_rtx_GT (mode, cc_reg, const0_rtx); + rtx_insn *insn; + rtx l_unordered = gen_label_rtx (); + rtx l_eq = gen_label_rtx (); + rtx l_gt = gen_label_rtx (); + rtx l_end = gen_label_rtx (); + + s390_emit_compare (VOIDmode, LTGT, op1, op2); + if (!flag_finite_math_only) + { + insn = s390_emit_jump (l_unordered, cond_unordered); + add_reg_br_prob_note (insn, profile_probability::very_unlikely ()); + } + insn = s390_emit_jump (l_eq, cond_eq); + add_reg_br_prob_note (insn, profile_probability::unlikely ()); + insn = s390_emit_jump (l_gt, cond_gt); + add_reg_br_prob_note (insn, profile_probability::even ()); + emit_move_insn (op0, constm1_rtx); + emit_jump (l_end); + emit_label (l_eq); + emit_move_insn (op0, const0_rtx); + emit_jump (l_end); + emit_label (l_gt); + emit_move_insn (op0, const1_rtx); + if (!flag_finite_math_only) + { + emit_jump (l_end); + emit_label (l_unordered); + rtx unord_val = op3 == const0_rtx ? const2_rtx : op3; + emit_move_insn (op0, unord_val); + } + emit_label (l_end); +} + /* This is called from dwarf2out.cc via TARGET_ASM_OUTPUT_DWARF_DTPREL. We need to emit DTP-relative relocations. */ diff --git a/gcc/config/s390/s390.md b/gcc/config/s390/s390.md index 1edbfde..8cc48b0 100644 --- a/gcc/config/s390/s390.md +++ b/gcc/config/s390/s390.md @@ -1527,6 +1527,27 @@ operands[0] = SET_DEST (PATTERN (curr_insn)); }) +; Restrict spaceship optab to z13 or later since there we have +; LOAD HALFWORD IMMEDIATE ON CONDITION. + +(define_mode_iterator SPACESHIP_INT [(TI "TARGET_VX") DI SI HI QI]) +(define_expand "spaceship<mode>4" + [(match_operand:SI 0 "register_operand") + (match_operand:SPACESHIP_INT 1 "register_operand") + (match_operand:SPACESHIP_INT 2 "register_operand") + (match_operand:SI 3 "const_int_operand")] + "TARGET_Z13 && TARGET_64BIT" + "s390_expand_int_spaceship (operands[0], operands[1], operands[2], operands[3]); DONE;") + +(define_mode_iterator SPACESHIP_BFP [TF DF SF]) +(define_expand "spaceship<mode>4" + [(match_operand:SI 0 "register_operand") + (match_operand:SPACESHIP_BFP 1 "register_operand") + (match_operand:SPACESHIP_BFP 2 "register_operand") + (match_operand:SI 3 "const_int_operand")] + "TARGET_Z13 && TARGET_64BIT && TARGET_HARD_FLOAT" + "s390_expand_fp_spaceship (operands[0], operands[1], operands[2], operands[3]); DONE;") + ; (TF|DF|SF|TD|DD|SD) instructions diff --git a/gcc/cp/parser.cc b/gcc/cp/parser.cc index 9e9cd9b..eb66427 100644 --- a/gcc/cp/parser.cc +++ b/gcc/cp/parser.cc @@ -2921,7 +2921,7 @@ static size_t cp_parser_skip_std_attribute_spec_seq static size_t cp_parser_skip_attributes_opt (cp_parser *, size_t); static bool cp_parser_extension_opt - (cp_parser *, int *); + (cp_parser *, int *, int *); static void cp_parser_label_declaration (cp_parser *); @@ -9504,11 +9504,12 @@ cp_parser_unary_expression (cp_parser *parser, cp_id_kind * pidk, case RID_EXTENSION: { /* The saved value of the PEDANTIC flag. */ - int saved_pedantic; + int saved_pedantic, saved_long_long; tree expr; /* Save away the PEDANTIC flag. */ - cp_parser_extension_opt (parser, &saved_pedantic); + cp_parser_extension_opt (parser, &saved_pedantic, + &saved_long_long); /* Also suppress -Wconditionally-supported. */ diagnostic_push_diagnostics (global_dc, input_location); diagnostic_classify_diagnostic @@ -9519,6 +9520,7 @@ cp_parser_unary_expression (cp_parser *parser, cp_id_kind * pidk, /* Restore the PEDANTIC flag. */ diagnostic_pop_diagnostics (global_dc, input_location); pedantic = saved_pedantic; + warn_long_long = saved_long_long; return expr; } @@ -16047,15 +16049,16 @@ cp_parser_declaration_seq_opt (cp_parser* parser) static void cp_parser_declaration (cp_parser* parser, tree prefix_attrs) { - int saved_pedantic; + int saved_pedantic, saved_long_long; /* Check for the `__extension__' keyword. */ - if (cp_parser_extension_opt (parser, &saved_pedantic)) + if (cp_parser_extension_opt (parser, &saved_pedantic, &saved_long_long)) { /* Parse the qualified declaration. */ cp_parser_declaration (parser, prefix_attrs); /* Restore the PEDANTIC flag. */ pedantic = saved_pedantic; + warn_long_long = saved_long_long; return; } @@ -16323,15 +16326,16 @@ static void cp_parser_block_declaration (cp_parser *parser, bool statement_p) { - int saved_pedantic; + int saved_pedantic, saved_long_long; /* Check for the `__extension__' keyword. */ - if (cp_parser_extension_opt (parser, &saved_pedantic)) + if (cp_parser_extension_opt (parser, &saved_pedantic, &saved_long_long)) { /* Parse the qualified declaration. */ cp_parser_block_declaration (parser, statement_p); /* Restore the PEDANTIC flag. */ pedantic = saved_pedantic; + warn_long_long = saved_long_long; return; } @@ -28869,16 +28873,17 @@ cp_parser_member_declaration (cp_parser* parser) cp_token *token = NULL; cp_token *decl_spec_token_start = NULL; cp_token *initializer_token_start = NULL; - int saved_pedantic; + int saved_pedantic, saved_long_long; bool saved_colon_corrects_to_scope_p = parser->colon_corrects_to_scope_p; /* Check for the `__extension__' keyword. */ - if (cp_parser_extension_opt (parser, &saved_pedantic)) + if (cp_parser_extension_opt (parser, &saved_pedantic, &saved_long_long)) { /* Recurse. */ cp_parser_member_declaration (parser); /* Restore the old value of the PEDANTIC flag. */ pedantic = saved_pedantic; + warn_long_long = saved_long_long; return; } @@ -32020,13 +32025,16 @@ cp_parser_skip_attributes_opt (cp_parser *parser, size_t n) present, and FALSE otherwise. *SAVED_PEDANTIC is set to the current value of the PEDANTIC flag, regardless of whether or not the `__extension__' keyword is present. The caller is responsible - for restoring the value of the PEDANTIC flag. */ + for restoring the value of the PEDANTIC flag. Similarly *SAVED_LONG_LONG + for warn_long_long flag. */ static bool -cp_parser_extension_opt (cp_parser* parser, int* saved_pedantic) +cp_parser_extension_opt (cp_parser *parser, int *saved_pedantic, + int *saved_long_long) { /* Save the old value of the PEDANTIC flag. */ *saved_pedantic = pedantic; + *saved_long_long = warn_long_long; if (cp_lexer_next_token_is_keyword (parser->lexer, RID_EXTENSION)) { @@ -32035,6 +32043,8 @@ cp_parser_extension_opt (cp_parser* parser, int* saved_pedantic) /* We're not being pedantic while the `__extension__' keyword is in effect. */ pedantic = 0; + /* And we don't want -Wlong-long warning. */ + warn_long_long = 0; return true; } diff --git a/gcc/cp/pt.cc b/gcc/cp/pt.cc index 71ae764..0b7a05c 100644 --- a/gcc/cp/pt.cc +++ b/gcc/cp/pt.cc @@ -6952,14 +6952,22 @@ convert_nontype_argument_function (tree type, tree expr, { auto_diagnostic_group d; location_t loc = cp_expr_loc_or_input_loc (expr); - error_at (loc, "%qE is not a valid template argument for type %qT", - expr, type); - if (TYPE_PTR_P (type)) - inform (loc, "it must be the address of a function " - "with external linkage"); + tree c; + if (cxx_dialect >= cxx17 + && (c = cxx_constant_value (fn), + c == error_mark_node)) + ; else - inform (loc, "it must be the name of a function with " - "external linkage"); + { + error_at (loc, "%qE is not a valid template argument for " + "type %qT", expr, type); + if (TYPE_PTR_P (type)) + inform (loc, "it must be the address of a function " + "with external linkage"); + else + inform (loc, "it must be the name of a function with " + "external linkage"); + } } return NULL_TREE; } @@ -7402,22 +7410,22 @@ invalid_tparm_referent_p (tree type, tree expr, tsubst_flags_t complain) /* Null pointer values are OK in C++11. */; else { - if (VAR_P (expr)) - { - if (complain & tf_error) - error ("%qD is not a valid template argument " - "because %qD is a variable, not the address of " - "a variable", expr, expr); - return true; - } + tree c; + if (!(complain & tf_error)) + ; + else if (cxx_dialect >= cxx17 + && (c = cxx_constant_value (expr), + c == error_mark_node)) + ; + else if (VAR_P (expr)) + error ("%qD is not a valid template argument " + "because %qD is a variable, not the address of " + "a variable", expr, expr); else - { - if (complain & tf_error) - error ("%qE is not a valid template argument for %qT " - "because it is not the address of a variable", - expr, type); - return true; - } + error ("%qE is not a valid template argument for %qT " + "because it is not the address of a variable", + expr, type); + return true; } } return false; diff --git a/gcc/cprop.cc b/gcc/cprop.cc index bc72e64..dfe3462 100644 --- a/gcc/cprop.cc +++ b/gcc/cprop.cc @@ -1525,6 +1525,7 @@ static bool bypass_block (basic_block bb, rtx_insn *setcc, rtx_insn *jump) { rtx_insn *insn; + rtx setcc_src, setcc_dest; rtx note; edge e, edest; bool change; @@ -1533,7 +1534,19 @@ bypass_block (basic_block bb, rtx_insn *setcc, rtx_insn *jump) unsigned i; edge_iterator ei; - insn = (setcc != NULL) ? setcc : jump; + if (setcc != NULL) + { + rtx set = single_set (setcc); + setcc_dest = SET_DEST (set); + setcc_src = SET_SRC (set); + insn = setcc; + } + else + { + setcc_dest = NULL; + setcc_src = NULL; + insn = jump; + } /* Determine set of register uses in INSN. */ reg_use_count = 0; @@ -1608,9 +1621,7 @@ bypass_block (basic_block bb, rtx_insn *setcc, rtx_insn *jump) src = SET_SRC (pc_set (jump)); if (setcc != NULL) - src = simplify_replace_rtx (src, - SET_DEST (PATTERN (setcc)), - SET_SRC (PATTERN (setcc))); + src = simplify_replace_rtx (src, setcc_dest, setcc_src); new_rtx = simplify_replace_rtx (src, reg_used, set->src); @@ -1716,10 +1727,11 @@ bypass_conditional_jumps (void) { if (setcc) break; - if (GET_CODE (PATTERN (insn)) != SET) + rtx singleset = single_set (insn); + if (singleset == NULL_RTX) break; - dest = SET_DEST (PATTERN (insn)); + dest = SET_DEST (singleset); if (REG_P (dest)) setcc = insn; else diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi index 0980230..e442a9c 100644 --- a/gcc/doc/invoke.texi +++ b/gcc/doc/invoke.texi @@ -31370,31 +31370,14 @@ When the RISC-V specifications define an extension as depending on other extensions, GCC will implicitly add the dependent extensions to the enabled extension set if they weren't added explicitly. -@opindex mcpu -@item -mcpu=@var{processor-string} -Use architecture of and optimize the output for the given processor, specified -by particular CPU name. -Permissible values for this option are: @samp{mips-p8700}, @samp{sifive-e20}, -@samp{sifive-e21}, @samp{sifive-e24}, @samp{sifive-e31}, @samp{sifive-e34}, -@samp{sifive-e76}, @samp{sifive-s21}, @samp{sifive-s51}, @samp{sifive-s54}, -@samp{sifive-s76}, @samp{sifive-u54}, @samp{sifive-u74}, @samp{sifive-x280}, -@samp{sifive-xp450}, @samp{sifive-x670}, @samp{thead-c906}, @samp{tt-ascalon-d8}, -@samp{xiangshan-nanhu}, @samp{xiangshan-kunminghu}, @samp{xt-c908}, @samp{xt-c908v}, -@samp{xt-c910}, @samp{xt-c910v2}, @samp{xt-c920}, @samp{xt-c920v2}. +@include riscv-mcpu.texi Note that @option{-mcpu} does not override @option{-march} or @option{-mtune}. -@opindex mtune -@item -mtune=@var{processor-string} -Optimize the output for the given processor, specified by microarchitecture or -particular CPU name. Permissible values for this option are: -@samp{generic-ooo}, @samp{mips-p8700}, @samp{rocket}, @samp{sifive-3-series}, -@samp{sifive-5-series}, @samp{sifive-7-series}, @samp{size}, -@samp{sifive-p400-series}, @samp{sifive-p600-series}, and all valid options for -@option{-mcpu=}. +@include riscv-mtune.texi When @option{-mtune=} is not specified, use the setting from @option{-mcpu}, -the default is @samp{rocket} if both are not specified. +the default is @samp{generic} if both are not specified. The @samp{size} choice is not intended for use by end-users. This is used when @option{-Os} is specified. It overrides the instruction cost info diff --git a/gcc/doc/riscv-mcpu.texi b/gcc/doc/riscv-mcpu.texi new file mode 100644 index 0000000..6753e51 --- /dev/null +++ b/gcc/doc/riscv-mcpu.texi @@ -0,0 +1,69 @@ +@c Copyright (C) 2025 Free Software Foundation, Inc. +@c This is part of the GCC manual. +@c For copying conditions, see the file gcc/doc/include/fdl.texi. + +@c This file is generated automatically using +@c gcc/config/riscv/gen-riscv-mcpu-texi.cc from: +@c gcc/config/riscv/riscv-cores.def + +@c Please *DO NOT* edit manually. + +@samp{Core Name} + +@opindex mcpu +@item -mcpu=@var{processor-string} +Use architecture of and optimize the output for the given processor, specified +by particular CPU name. Permissible values for this option are: + + +@samp{sifive-e20}, + +@samp{sifive-e21}, + +@samp{sifive-e24}, + +@samp{sifive-e31}, + +@samp{sifive-e34}, + +@samp{sifive-e76}, + +@samp{sifive-s21}, + +@samp{sifive-s51}, + +@samp{sifive-s54}, + +@samp{sifive-s76}, + +@samp{sifive-u54}, + +@samp{sifive-u74}, + +@samp{sifive-x280}, + +@samp{sifive-p450}, + +@samp{sifive-p670}, + +@samp{thead-c906}, + +@samp{xt-c908}, + +@samp{xt-c908v}, + +@samp{xt-c910}, + +@samp{xt-c910v2}, + +@samp{xt-c920}, + +@samp{xt-c920v2}, + +@samp{tt-ascalon-d8}, + +@samp{xiangshan-nanhu}, + +@samp{xiangshan-kunminghu}, + +@samp{mips-p8700}. diff --git a/gcc/doc/riscv-mtune.texi b/gcc/doc/riscv-mtune.texi new file mode 100644 index 0000000..a2a4d3e --- /dev/null +++ b/gcc/doc/riscv-mtune.texi @@ -0,0 +1,59 @@ +@c Copyright (C) 2025 Free Software Foundation, Inc. +@c This is part of the GCC manual. +@c For copying conditions, see the file gcc/doc/include/fdl.texi. + +@c This file is generated automatically using +@c gcc/config/riscv/gen-riscv-mtune-texi.cc from: +@c gcc/config/riscv/riscv-cores.def + +@c Please *DO NOT* edit manually. + +@samp{Tune Name} + +@opindex mtune +@item -mtune=@var{processor-string} +Optimize the output for the given processor, specified by microarchitecture or +particular CPU name. Permissible values for this option are: + + +@samp{generic}, + +@samp{rocket}, + +@samp{sifive-3-series}, + +@samp{sifive-5-series}, + +@samp{sifive-7-series}, + +@samp{sifive-p400-series}, + +@samp{sifive-p600-series}, + +@samp{tt-ascalon-d8}, + +@samp{thead-c906}, + +@samp{xt-c908}, + +@samp{xt-c908v}, + +@samp{xt-c910}, + +@samp{xt-c910v2}, + +@samp{xt-c920}, + +@samp{xt-c920v2}, + +@samp{xiangshan-nanhu}, + +@samp{xiangshan-kunminghu}, + +@samp{generic-ooo}, + +@samp{size}, + +@samp{mips-p8700}, + +and all valid options for @option{-mcpu=}. diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi index 928578b..215552c 100644 --- a/gcc/doc/tm.texi +++ b/gcc/doc/tm.texi @@ -6513,6 +6513,15 @@ The default is @code{NULL_TREE} which means to not vectorize scatter stores. @end deftypefn +@deftypefn {Target Hook} bool TARGET_VECTORIZE_PREFER_GATHER_SCATTER (machine_mode @var{mode}, int @var{scale}, unsigned int @var{group_size}) +This hook returns TRUE if gather loads or scatter stores are cheaper on +this target than a sequence of elementwise loads or stores. The @var{mode} +and @var{scale} correspond to the @code{gather_load} and +@code{scatter_store} instruction patterns. The @var{group_size} is the +number of scalar elements in each scalar loop iteration that are to be +combined into the vector. +@end deftypefn + @deftypefn {Target Hook} int TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN (struct cgraph_node *@var{}, struct cgraph_simd_clone *@var{}, @var{tree}, @var{int}, @var{bool}) This hook should set @var{vecsize_mangle}, @var{vecsize_int}, @var{vecsize_float} fields in @var{simd_clone} structure pointed by @var{clone_info} argument and also diff --git a/gcc/doc/tm.texi.in b/gcc/doc/tm.texi.in index eccc4d8..b03ad4c 100644 --- a/gcc/doc/tm.texi.in +++ b/gcc/doc/tm.texi.in @@ -4311,6 +4311,8 @@ address; but often a machine-dependent strategy can generate better code. @hook TARGET_VECTORIZE_BUILTIN_SCATTER +@hook TARGET_VECTORIZE_PREFER_GATHER_SCATTER + @hook TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN @hook TARGET_SIMD_CLONE_ADJUST diff --git a/gcc/fortran/check.cc b/gcc/fortran/check.cc index 838d523..8626526 100644 --- a/gcc/fortran/check.cc +++ b/gcc/fortran/check.cc @@ -5559,6 +5559,27 @@ gfc_check_scan (gfc_expr *x, gfc_expr *y, gfc_expr *z, gfc_expr *kind) return true; } +bool +gfc_check_split (gfc_expr *string, gfc_expr *set, gfc_expr *pos, gfc_expr *back) +{ + if (!type_check (string, 0, BT_CHARACTER)) + return false; + + if (!type_check (set, 1, BT_CHARACTER)) + return false; + + if (!type_check (pos, 2, BT_INTEGER) || !scalar_check (pos, 2)) + return false; + + if (back != NULL + && (!type_check (back, 3, BT_LOGICAL) || !scalar_check (back, 3))) + return false; + + if (!same_type_check (string, 0, set, 1)) + return false; + + return true; +} bool gfc_check_secnds (gfc_expr *r) diff --git a/gcc/fortran/gfortran.h b/gcc/fortran/gfortran.h index 85feb18..d9dcd1b 100644 --- a/gcc/fortran/gfortran.h +++ b/gcc/fortran/gfortran.h @@ -729,6 +729,8 @@ enum gfc_isym_id GFC_ISYM_COSPI, GFC_ISYM_SINPI, GFC_ISYM_TANPI, + + GFC_ISYM_SPLIT, }; enum init_local_logical diff --git a/gcc/fortran/intrinsic.cc b/gcc/fortran/intrinsic.cc index 9e07627..c99a7a8 100644 --- a/gcc/fortran/intrinsic.cc +++ b/gcc/fortran/intrinsic.cc @@ -3933,6 +3933,14 @@ add_subroutines (void) pt, BT_INTEGER, di, OPTIONAL, INTENT_IN, gt, BT_INTEGER, di, OPTIONAL, INTENT_OUT); + add_sym_4s ("split", GFC_ISYM_SPLIT, CLASS_PURE, + BT_UNKNOWN, 0, GFC_STD_F2023, + gfc_check_split, NULL, gfc_resolve_split, + "string", BT_CHARACTER, dc, REQUIRED, INTENT_IN, + "set", BT_CHARACTER, dc, REQUIRED, INTENT_IN, + "pos", BT_INTEGER, di, REQUIRED, INTENT_INOUT, + "back", BT_LOGICAL, dl, OPTIONAL, INTENT_IN); + /* The following subroutines are part of ISO_C_BINDING. */ add_sym_3s ("c_f_pointer", GFC_ISYM_C_F_POINTER, CLASS_IMPURE, BT_UNKNOWN, 0, diff --git a/gcc/fortran/intrinsic.h b/gcc/fortran/intrinsic.h index fd54588..8a0ab93 100644 --- a/gcc/fortran/intrinsic.h +++ b/gcc/fortran/intrinsic.h @@ -215,6 +215,7 @@ bool gfc_check_mvbits (gfc_expr *, gfc_expr *, gfc_expr *, gfc_expr *, bool gfc_check_random_init (gfc_expr *, gfc_expr *); bool gfc_check_random_number (gfc_expr *); bool gfc_check_random_seed (gfc_expr *, gfc_expr *, gfc_expr *); +bool gfc_check_split (gfc_expr *, gfc_expr *, gfc_expr *, gfc_expr *); bool gfc_check_dtime_etime_sub (gfc_expr *, gfc_expr *); bool gfc_check_fgetputc_sub (gfc_expr *, gfc_expr *, gfc_expr *); bool gfc_check_fgetput_sub (gfc_expr *, gfc_expr *); @@ -693,6 +694,7 @@ void gfc_resolve_link_sub (gfc_code *); void gfc_resolve_symlnk_sub (gfc_code *); void gfc_resolve_signal_sub (gfc_code *); void gfc_resolve_sleep_sub (gfc_code *); +void gfc_resolve_split (gfc_code *); void gfc_resolve_stat_sub (gfc_code *); void gfc_resolve_system_clock (gfc_code *); void gfc_resolve_system_sub (gfc_code *); diff --git a/gcc/fortran/intrinsic.texi b/gcc/fortran/intrinsic.texi index 3103da3..a24b234 100644 --- a/gcc/fortran/intrinsic.texi +++ b/gcc/fortran/intrinsic.texi @@ -313,6 +313,7 @@ Some basic guidelines for editing this document: * @code{SIZEOF}: SIZEOF, Determine the size in bytes of an expression * @code{SLEEP}: SLEEP, Sleep for the specified number of seconds * @code{SPACING}: SPACING, Smallest distance between two numbers of a given type +* @code{SPLIT}: SPLIT, Parse a string into tokens, one at a time. * @code{SPREAD}: SPREAD, Add a dimension to an array * @code{SQRT}: SQRT, Square-root function * @code{SRAND}: SRAND, Reinitialize the random number generator @@ -14203,6 +14204,69 @@ Fortran 90 and later +@node SPLIT +@section @code{SPLIT} --- Parse a string into tokens, one at a time +@fnindex SPLIT +@cindex string, split + +@table @asis +@item @emph{Synopsis}: +@code{RESULT = SPLIT(STRING, SET, POS [, BACK])} + +@item @emph{Description}: +Updates the integer @var{POS} to the position of the next (or previous) +separator in @var{STRING}. + +If @var{BACK} is absent or is present with the value false, @var{POS} is +assigned the position of the leftmost token delimiter in @var{STRING} whose +position is greater than @var{POS}, or if there is no such character, it is +assigned a value one greater than the length of @var{STRING}. This identifies +a token with starting position one greater than the value of @var{POS} on +invocation, and ending position one less than the value of @var{POS} on return. + +If @var{BACK} is present with the value true, @var{POS} is assigned the +position of the rightmost token delimiter in @var{STRING} whose position is +less than @var{POS}, or if there is no such character, it is assigned the value +zero. This identifies a token with ending position one less than the value of +@var{POS} on invocation, and starting position one greater than the value of +@var{POS} on return. + +@item @emph{Class}: +Subroutine + +@item @emph{Arguments}: +@multitable @columnfractions .15 .70 +@item @var{STRING} @tab Shall be of type @code{CHARACTER}. +@item @var{SET} @tab Shall be of type @code{CHARACTER}. +@item @var{POS} @tab Shall be of type @code{INTEGER}. +@item @var{BACK} @tab (Optional) Shall be of type @code{LOGICAL}. +@end multitable + +@item @emph{Example}: +@smallexample +character(len=:), allocatable :: input +character(len=2) :: set = ', ' +integer :: p +input = "one,last example" +p = 0 +do + if (p > len(input)) exit + istart = p + 1 + call split(input, set, p) + iend = p - 1 + print '(t7, a)', input(istart:iend) +end do +@end smallexample + +@item @emph{Standard}: +Fortran 2023 + +@item @emph{See also}: +@ref{SCAN} +@end table + + + @node SPREAD @section @code{SPREAD} --- Add a dimension to an array @fnindex SPREAD diff --git a/gcc/fortran/iresolve.cc b/gcc/fortran/iresolve.cc index 1001309..da354ab 100644 --- a/gcc/fortran/iresolve.cc +++ b/gcc/fortran/iresolve.cc @@ -3863,6 +3863,19 @@ gfc_resolve_sleep_sub (gfc_code *c) c->resolved_sym = gfc_get_intrinsic_sub_symbol (name); } +void +gfc_resolve_split (gfc_code *c) +{ + const char *name; + gfc_expr *string; + + string = c->ext.actual->expr; + if (string->ts.type == BT_CHARACTER && string->ts.kind == 4) + name = "__split_char4"; + else + name = "__split"; + c->resolved_sym = gfc_get_intrinsic_sub_symbol (name); +} /* G77 compatibility function srand(). */ diff --git a/gcc/fortran/trans-array.cc b/gcc/fortran/trans-array.cc index 6b759d1..0f7637d 100644 --- a/gcc/fortran/trans-array.cc +++ b/gcc/fortran/trans-array.cc @@ -6296,8 +6296,8 @@ static tree gfc_array_init_size (tree descriptor, int rank, int corank, tree * poffset, gfc_expr ** lower, gfc_expr ** upper, stmtblock_t * pblock, stmtblock_t * descriptor_block, tree * overflow, - tree expr3_elem_size, tree *nelems, gfc_expr *expr3, - tree expr3_desc, bool e3_has_nodescriptor, gfc_expr *expr, + tree expr3_elem_size, gfc_expr *expr3, tree expr3_desc, + bool e3_has_nodescriptor, gfc_expr *expr, tree *element_size, bool explicit_ts) { tree type; @@ -6573,7 +6573,6 @@ gfc_array_init_size (tree descriptor, int rank, int corank, tree * poffset, if (rank == 0) return *element_size; - *nelems = gfc_evaluate_now (stride, pblock); stride = fold_convert (size_type_node, stride); /* First check for overflow. Since an array of type character can @@ -6662,9 +6661,8 @@ retrieve_last_ref (gfc_ref **ref_in, gfc_ref **prev_ref_in) bool gfc_array_allocate (gfc_se * se, gfc_expr * expr, tree status, tree errmsg, tree errlen, tree label_finish, tree expr3_elem_size, - tree *nelems, gfc_expr *expr3, tree e3_arr_desc, - bool e3_has_nodescriptor, gfc_omp_namelist *omp_alloc, - bool explicit_ts) + gfc_expr *expr3, tree e3_arr_desc, bool e3_has_nodescriptor, + gfc_omp_namelist *omp_alloc, bool explicit_ts) { tree tmp; tree pointer; @@ -6795,7 +6793,7 @@ gfc_array_allocate (gfc_se * se, gfc_expr * expr, tree status, tree errmsg, coarray ? ref->u.ar.as->corank : 0, &offset, lower, upper, &se->pre, &set_descriptor_block, &overflow, - expr3_elem_size, nelems, expr3, e3_arr_desc, + expr3_elem_size, expr3, e3_arr_desc, e3_has_nodescriptor, expr, &element_size, explicit_ts); diff --git a/gcc/fortran/trans-array.h b/gcc/fortran/trans-array.h index 1bb3294..29098fd 100644 --- a/gcc/fortran/trans-array.h +++ b/gcc/fortran/trans-array.h @@ -20,9 +20,8 @@ along with GCC; see the file COPYING3. If not see /* Generate code to initialize and allocate an array. Statements are added to se, which should contain an expression for the array descriptor. */ -bool gfc_array_allocate (gfc_se *, gfc_expr *, tree, tree, tree, tree, - tree, tree *, gfc_expr *, tree, bool, - gfc_omp_namelist *, bool); +bool gfc_array_allocate (gfc_se *, gfc_expr *, tree, tree, tree, tree, tree, + gfc_expr *, tree, bool, gfc_omp_namelist *, bool); /* Allow the bounds of a loop to be set from a callee's array spec. */ void gfc_set_loop_bounds_from_array_spec (gfc_interface_mapping *, diff --git a/gcc/fortran/trans-decl.cc b/gcc/fortran/trans-decl.cc index d5acdca..741acc0 100644 --- a/gcc/fortran/trans-decl.cc +++ b/gcc/fortran/trans-decl.cc @@ -197,6 +197,7 @@ tree gfor_fndecl_string_scan; tree gfor_fndecl_string_verify; tree gfor_fndecl_string_trim; tree gfor_fndecl_string_minmax; +tree gfor_fndecl_string_split; tree gfor_fndecl_adjustl; tree gfor_fndecl_adjustr; tree gfor_fndecl_select_string; @@ -208,6 +209,7 @@ tree gfor_fndecl_string_scan_char4; tree gfor_fndecl_string_verify_char4; tree gfor_fndecl_string_trim_char4; tree gfor_fndecl_string_minmax_char4; +tree gfor_fndecl_string_split_char4; tree gfor_fndecl_adjustl_char4; tree gfor_fndecl_adjustr_char4; tree gfor_fndecl_select_string_char4; @@ -3569,6 +3571,12 @@ gfc_build_intrinsic_function_decls (void) build_pointer_type (pchar1_type_node), integer_type_node, integer_type_node); + gfor_fndecl_string_split = gfc_build_library_function_decl_with_spec ( + get_identifier (PREFIX ("string_split")), ". . R . R . . ", + gfc_charlen_type_node, 6, gfc_charlen_type_node, pchar1_type_node, + gfc_charlen_type_node, pchar1_type_node, gfc_charlen_type_node, + gfc_logical4_type_node); + gfor_fndecl_adjustl = gfc_build_library_function_decl_with_spec ( get_identifier (PREFIX("adjustl")), ". W . R ", void_type_node, 3, pchar1_type_node, gfc_charlen_type_node, @@ -3641,6 +3649,12 @@ gfc_build_intrinsic_function_decls (void) build_pointer_type (pchar4_type_node), integer_type_node, integer_type_node); + gfor_fndecl_string_split_char4 = gfc_build_library_function_decl_with_spec ( + get_identifier (PREFIX ("string_split_char4")), ". . R . R . . ", + gfc_charlen_type_node, 6, gfc_charlen_type_node, pchar4_type_node, + gfc_charlen_type_node, pchar4_type_node, gfc_charlen_type_node, + gfc_logical4_type_node); + gfor_fndecl_adjustl_char4 = gfc_build_library_function_decl_with_spec ( get_identifier (PREFIX("adjustl_char4")), ". W . R ", void_type_node, 3, pchar4_type_node, gfc_charlen_type_node, diff --git a/gcc/fortran/trans-intrinsic.cc b/gcc/fortran/trans-intrinsic.cc index be98427..f68ceb1 100644 --- a/gcc/fortran/trans-intrinsic.cc +++ b/gcc/fortran/trans-intrinsic.cc @@ -3466,6 +3466,74 @@ else return gfc_finish_block (&block); } +static tree +conv_intrinsic_split (gfc_code *code) +{ + stmtblock_t block, post_block; + gfc_se se; + gfc_expr *string_expr, *set_expr, *pos_expr, *back_expr; + tree string, string_len; + tree set, set_len; + tree pos, pos_for_call; + tree back; + tree fndecl, call; + + string_expr = code->ext.actual->expr; + set_expr = code->ext.actual->next->expr; + pos_expr = code->ext.actual->next->next->expr; + back_expr = code->ext.actual->next->next->next->expr; + + gfc_start_block (&block); + gfc_init_block (&post_block); + + gfc_init_se (&se, NULL); + gfc_conv_expr (&se, string_expr); + gfc_conv_string_parameter (&se); + gfc_add_block_to_block (&block, &se.pre); + gfc_add_block_to_block (&post_block, &se.post); + string = se.expr; + string_len = se.string_length; + + gfc_init_se (&se, NULL); + gfc_conv_expr (&se, set_expr); + gfc_conv_string_parameter (&se); + gfc_add_block_to_block (&block, &se.pre); + gfc_add_block_to_block (&post_block, &se.post); + set = se.expr; + set_len = se.string_length; + + gfc_init_se (&se, NULL); + gfc_conv_expr (&se, pos_expr); + gfc_add_block_to_block (&block, &se.pre); + gfc_add_block_to_block (&post_block, &se.post); + pos = se.expr; + pos_for_call = fold_convert (gfc_charlen_type_node, pos); + + if (back_expr) + { + gfc_init_se (&se, NULL); + gfc_conv_expr (&se, back_expr); + gfc_add_block_to_block (&block, &se.pre); + gfc_add_block_to_block (&post_block, &se.post); + back = se.expr; + } + else + back = logical_false_node; + + if (string_expr->ts.kind == 1) + fndecl = gfor_fndecl_string_split; + else if (string_expr->ts.kind == 4) + fndecl = gfor_fndecl_string_split_char4; + else + gcc_unreachable (); + + call = build_call_expr_loc (input_location, fndecl, 6, string_len, string, + set_len, set, pos_for_call, back); + gfc_add_modify (&block, pos, fold_convert (TREE_TYPE (pos), call)); + + gfc_add_block_to_block (&block, &post_block); + return gfc_finish_block (&block); +} /* Return a character string containing the tty name. */ @@ -13261,6 +13329,10 @@ gfc_conv_intrinsic_subroutine (gfc_code *code) res = conv_intrinsic_system_clock (code); break; + case GFC_ISYM_SPLIT: + res = conv_intrinsic_split (code); + break; + default: res = NULL_TREE; break; diff --git a/gcc/fortran/trans-stmt.cc b/gcc/fortran/trans-stmt.cc index f105401..b4ddf75 100644 --- a/gcc/fortran/trans-stmt.cc +++ b/gcc/fortran/trans-stmt.cc @@ -6710,7 +6710,6 @@ gfc_trans_allocate (gfc_code * code, gfc_omp_namelist *omp_allocate) stmtblock_t block; stmtblock_t post; stmtblock_t final_block; - tree nelems; bool upoly_expr, tmp_expr3_len_flag = false, al_len_needs_set, is_coarray; bool needs_caf_sync, caf_refs_comp; bool e3_has_nodescriptor = false; @@ -7242,7 +7241,6 @@ gfc_trans_allocate (gfc_code * code, gfc_omp_namelist *omp_allocate) to handle the complete array allocation. Only the element size needs to be provided, which is done most of the time by the pre-evaluation step. */ - nelems = NULL_TREE; if (expr3_len && (code->expr3->ts.type == BT_CHARACTER || code->expr3->ts.type == BT_CLASS)) { @@ -7313,9 +7311,8 @@ gfc_trans_allocate (gfc_code * code, gfc_omp_namelist *omp_allocate) } - if (!gfc_array_allocate (&se, expr, stat, errmsg, errlen, - label_finish, tmp, &nelems, - e3rhs ? e3rhs : code->expr3, + if (!gfc_array_allocate (&se, expr, stat, errmsg, errlen, label_finish, + tmp, e3rhs ? e3rhs : code->expr3, e3_is == E3_DESC ? expr3 : NULL_TREE, e3_has_nodescriptor, omp_alloc_item, code->ext.alloc.ts.type != BT_UNKNOWN)) diff --git a/gcc/fortran/trans.h b/gcc/fortran/trans.h index 461b0cd..40680e9 100644 --- a/gcc/fortran/trans.h +++ b/gcc/fortran/trans.h @@ -961,6 +961,7 @@ extern GTY(()) tree gfor_fndecl_string_scan; extern GTY(()) tree gfor_fndecl_string_verify; extern GTY(()) tree gfor_fndecl_string_trim; extern GTY(()) tree gfor_fndecl_string_minmax; +extern GTY(()) tree gfor_fndecl_string_split; extern GTY(()) tree gfor_fndecl_adjustl; extern GTY(()) tree gfor_fndecl_adjustr; extern GTY(()) tree gfor_fndecl_select_string; @@ -972,6 +973,7 @@ extern GTY(()) tree gfor_fndecl_string_scan_char4; extern GTY(()) tree gfor_fndecl_string_verify_char4; extern GTY(()) tree gfor_fndecl_string_trim_char4; extern GTY(()) tree gfor_fndecl_string_minmax_char4; +extern GTY(()) tree gfor_fndecl_string_split_char4; extern GTY(()) tree gfor_fndecl_adjustl_char4; extern GTY(()) tree gfor_fndecl_adjustr_char4; extern GTY(()) tree gfor_fndecl_select_string_char4; diff --git a/gcc/gcov-io.cc b/gcc/gcov-io.cc index f39b4bd..dd3fc88 100644 --- a/gcc/gcov-io.cc +++ b/gcc/gcov-io.cc @@ -69,7 +69,7 @@ gcov_position (void) /* Return nonzero if the error flag is set. */ /* We need to expose this function when compiling for gcov-tool. */ -#ifndef IN_GCOV_TOOL +#if !defined (IN_GCOV_TOOL) && !defined (IN_GCC) static inline #endif int diff --git a/gcc/gcov-io.h b/gcc/gcov-io.h index f3e3a1c..313c15c 100644 --- a/gcc/gcov-io.h +++ b/gcc/gcov-io.h @@ -387,6 +387,7 @@ char *mangle_path (char const *base); /* Available outside gcov */ GCOV_LINKAGE void gcov_write (const void *, unsigned) ATTRIBUTE_HIDDEN; GCOV_LINKAGE void gcov_write_unsigned (gcov_unsigned_t) ATTRIBUTE_HIDDEN; +GCOV_LINKAGE int gcov_is_error (void); #endif #if !IN_GCOV && !IN_LIBGCOV diff --git a/gcc/hooks.cc b/gcc/hooks.cc index 951825d..76cb5931 100644 --- a/gcc/hooks.cc +++ b/gcc/hooks.cc @@ -117,6 +117,13 @@ hook_bool_mode_const_rtx_true (machine_mode, const_rtx) return true; } +/* Generic hook that takes (machine_mode, int, unsigned) and returns false. */ +bool +hook_bool_mode_int_unsigned_false (machine_mode, int, unsigned) +{ + return false; +} + /* Generic hook that takes (machine_mode, rtx) and returns false. */ bool hook_bool_mode_rtx_false (machine_mode, rtx) diff --git a/gcc/hooks.h b/gcc/hooks.h index c0663bf..e95bd11 100644 --- a/gcc/hooks.h +++ b/gcc/hooks.h @@ -36,6 +36,7 @@ extern bool hook_bool_mode_true (machine_mode); extern bool hook_bool_mode_mode_true (machine_mode, machine_mode); extern bool hook_bool_mode_const_rtx_false (machine_mode, const_rtx); extern bool hook_bool_mode_const_rtx_true (machine_mode, const_rtx); +extern bool hook_bool_mode_int_unsigned_false (machine_mode, int, unsigned); extern bool hook_bool_mode_rtx_false (machine_mode, rtx); extern bool hook_bool_mode_rtx_true (machine_mode, rtx); extern bool hook_bool_const_rtx_insn_const_rtx_insn_true (const rtx_insn *, diff --git a/gcc/m2/ChangeLog b/gcc/m2/ChangeLog index 6babeb9..2406b95 100644 --- a/gcc/m2/ChangeLog +++ b/gcc/m2/ChangeLog @@ -1,3 +1,36 @@ +2025-07-29 Gaius Mulley <gaiusmod2@gmail.com> + + * gm2-compiler/M2GenGCC.mod (FoldBecomes): Remove all + local variables. + (CodeIndrX): Remove length. + Remove newstr. + * gm2-compiler/M2Range.mod (FoldTypeIndrX): Remove desType. + +2025-07-29 Gaius Mulley <gaiusmod2@gmail.com> + + PR modula2/121289 + * gm2-compiler/M2Students.def (CheckVariableAgainstKeyword): New + parameter tok. + * gm2-compiler/M2Students.mod (CheckVariableAgainstKeyword): New + parameter tok. + Pass tok to PerformVariableKeywordCheck. + (PerformVariableKeywordCheck): New parameter tok. + Pass tok to MetaErrorStringT0. + * gm2-compiler/P2SymBuild.mod (BuildVariable): Pass tok to + CheckVariableAgainstKeyword. + * gm2-libs-iso/LowLong.mod (except): Replace with ... + (exceptSrc): ... this. + * gm2-libs-iso/LowReal.mod (except): Replace with ... + (exceptSrc): ... this. + * gm2-libs-iso/LowShort.mod (except): Replace with ... + (exceptSrc): ... this. + * gm2-libs-iso/Processes.mod (Wait): Replace from with fromCor. + * gm2-libs-iso/RndFile.mod (EndPos): Replace end with endP. + * gm2-libs/SCmdArgs.mod (GetArg): Replace start with startPos. + Replace end with endPos. + (NArg): Replace start with startPos. + Replace end with endPos. + 2025-07-25 David Malcolm <dmalcolm@redhat.com> * gm2-gcc/m2linemap.cc: Update usage of "diagnostic_info" to diff --git a/gcc/m2/gm2-compiler/M2GenGCC.mod b/gcc/m2/gm2-compiler/M2GenGCC.mod index 4a9ced3..2507c53 100644 --- a/gcc/m2/gm2-compiler/M2GenGCC.mod +++ b/gcc/m2/gm2-compiler/M2GenGCC.mod @@ -2903,9 +2903,6 @@ END CheckStop ; *) PROCEDURE FoldBecomes (p: WalkAction; bb: BasicBlock; quad: CARDINAL) ; -VAR - op : QuadOperator ; - des, op2, expr: CARDINAL ; BEGIN IF DeclaredOperandsBecomes (p, quad) THEN @@ -8154,8 +8151,6 @@ VAR rightpos, typepos, indrxpos : CARDINAL ; - length, - newstr : tree ; location : location_t ; BEGIN GetQuadOtok (quad, indrxpos, op, left, type, right, diff --git a/gcc/m2/gm2-compiler/M2Range.mod b/gcc/m2/gm2-compiler/M2Range.mod index dcac2ba..f1516d3 100644 --- a/gcc/m2/gm2-compiler/M2Range.mod +++ b/gcc/m2/gm2-compiler/M2Range.mod @@ -1869,14 +1869,12 @@ END FoldTypeAssign ; PROCEDURE FoldTypeIndrX (q: CARDINAL; tokenNo: CARDINAL; des, expr: CARDINAL; r: CARDINAL) ; VAR - desType, exprType: CARDINAL ; BEGIN (* Need to skip over a variable or temporary in des and expr so long as expr is not a procedure. In the case of des = *expr, both expr and des will be variables due to the property of indirection. *) - desType := GetType (des) ; IF IsProcedure (expr) THEN (* Must not GetType for a procedure as it gives the return type. *) diff --git a/gcc/m2/gm2-compiler/M2Students.def b/gcc/m2/gm2-compiler/M2Students.def index 7d67a0a..a3ecdcd 100644 --- a/gcc/m2/gm2-compiler/M2Students.def +++ b/gcc/m2/gm2-compiler/M2Students.def @@ -39,7 +39,7 @@ EXPORT QUALIFIED StudentVariableCheck, CheckVariableAgainstKeyword ; as a keyword except for its case. *) -PROCEDURE CheckVariableAgainstKeyword (name: Name) ; +PROCEDURE CheckVariableAgainstKeyword (tok: CARDINAL; name: Name) ; (* diff --git a/gcc/m2/gm2-compiler/M2Students.mod b/gcc/m2/gm2-compiler/M2Students.mod index e539eb0..3df160a 100644 --- a/gcc/m2/gm2-compiler/M2Students.mod +++ b/gcc/m2/gm2-compiler/M2Students.mod @@ -25,7 +25,7 @@ IMPLEMENTATION MODULE M2Students ; FROM SymbolTable IMPORT FinalSymbol, IsVar, IsProcedure, IsModule, GetMainModule, IsType, NulSym, IsRecord, GetSymName, GetNth, GetNthProcedure, GetDeclaredMod, NoOfParam ; FROM NameKey IMPORT GetKey, WriteKey, MakeKey, IsSameExcludingCase, NulName, makekey, KeyToCharStar ; -FROM M2MetaError IMPORT MetaErrorString0, MetaError2 ; +FROM M2MetaError IMPORT MetaErrorStringT0, MetaError2 ; FROM Lists IMPORT List, InitList, IsItemInList, IncludeItemIntoList ; FROM M2Reserved IMPORT IsReserved, toktype ; FROM DynamicStrings IMPORT String, InitString, KillString, ToUpper, InitStringCharStar, string, Mark, ToUpper, Dup ; @@ -78,11 +78,11 @@ END IsNotADuplicateName ; as a keyword except for its case. *) -PROCEDURE CheckVariableAgainstKeyword (name: Name) ; +PROCEDURE CheckVariableAgainstKeyword (tok: CARDINAL; name: Name) ; BEGIN IF StyleChecking THEN - PerformVariableKeywordCheck (name) + PerformVariableKeywordCheck (tok, name) END END CheckVariableAgainstKeyword ; @@ -91,7 +91,7 @@ END CheckVariableAgainstKeyword ; PerformVariableKeywordCheck - performs the check and constructs the metaerror notes if appropriate. *) -PROCEDURE PerformVariableKeywordCheck (name: Name) ; +PROCEDURE PerformVariableKeywordCheck (tok: CARDINAL; name: Name) ; VAR upper : Name ; token : toktype ; @@ -105,9 +105,11 @@ BEGIN THEN IF IsNotADuplicateName (name) THEN - MetaErrorString0 (Sprintf2 (Mark (InitString ('either the identifier has the same name as a keyword or alternatively a keyword has the wrong case ({%%K%s} and {!%%O:{%%K%s}})')), - upperS, orig)) ; - MetaErrorString0 (Sprintf1 (Mark (InitString ('the symbol name {!%%O:{%%K%s}} is legal as an identifier, however as such it might cause confusion and is considered bad programming practice')), orig)) + MetaErrorStringT0 (tok, + Sprintf2 (Mark (InitString ('either the identifier has the same name as a keyword or alternatively a keyword has the wrong case ({%%K%s} and {!%%O:{%%K%s}})')), + upperS, orig)) ; + MetaErrorStringT0 (tok, + Sprintf1 (Mark (InitString ('the symbol name {!%%O:{%%K%s}} is legal as an identifier, however as such it might cause confusion and is considered bad programming practice')), orig)) END END ; upperS := KillString (upperS) ; diff --git a/gcc/m2/gm2-compiler/P2SymBuild.mod b/gcc/m2/gm2-compiler/P2SymBuild.mod index 3bb3e47..54e624f 100644 --- a/gcc/m2/gm2-compiler/P2SymBuild.mod +++ b/gcc/m2/gm2-compiler/P2SymBuild.mod @@ -1179,8 +1179,8 @@ BEGIN PopT (n) ; i := 1 ; WHILE i <= n DO - CheckVariableAgainstKeyword (OperandT (n+1-i)) ; tok := OperandTok (n+1-i) ; + CheckVariableAgainstKeyword (tok, OperandT (n+1-i)) ; Var := MakeVar (tok, OperandT (n+1-i)) ; AtAddress := OperandA (n+1-i) ; IF AtAddress # NulSym diff --git a/gcc/m2/gm2-libs-iso/LowLong.mod b/gcc/m2/gm2-libs-iso/LowLong.mod index 92c7d91..f611923 100644 --- a/gcc/m2/gm2-libs-iso/LowLong.mod +++ b/gcc/m2/gm2-libs-iso/LowLong.mod @@ -182,7 +182,7 @@ BEGIN IF n<0 THEN (* exception raised *) - RAISE(except, ORD(badparam), + RAISE(exceptSrc, ORD(badparam), 'LowLong.trunc: cannot truncate to a negative number of digits') ; RETURN x ELSE @@ -230,7 +230,7 @@ BEGIN IF n<0 THEN (* exception raised *) - RAISE(except, ORD(badparam), + RAISE(exceptSrc, ORD(badparam), 'LowLong.round: cannot round to a negative number of digits') ; RETURN x ELSE @@ -287,12 +287,12 @@ END currentMode ; PROCEDURE IsLowException () : BOOLEAN ; BEGIN - RETURN( IsExceptionalExecution() AND IsCurrentSource(except) ) + RETURN( IsExceptionalExecution () AND IsCurrentSource (exceptSrc) ) END IsLowException ; VAR - except: ExceptionSource ; + exceptSrc: ExceptionSource ; BEGIN - AllocateSource(except) + AllocateSource (exceptSrc) END LowLong. diff --git a/gcc/m2/gm2-libs-iso/LowReal.mod b/gcc/m2/gm2-libs-iso/LowReal.mod index 580f36b..6d9ea00 100644 --- a/gcc/m2/gm2-libs-iso/LowReal.mod +++ b/gcc/m2/gm2-libs-iso/LowReal.mod @@ -183,8 +183,8 @@ BEGIN IF n<0 THEN (* exception raised *) - RAISE(except, ORD(badparam), - 'LowReal.trunc: cannot truncate to a negative number of digits') ; + RAISE (exceptSrc, ORD(badparam), + 'LowReal.trunc: cannot truncate to a negative number of digits') ; RETURN x ELSE r := dtoa(x, maxsignificant, 100, point, sign) ; @@ -231,8 +231,8 @@ BEGIN IF n<0 THEN (* exception raised *) - RAISE(except, ORD(badparam), - 'LowReal.round: cannot round to a negative number of digits') ; + RAISE (exceptSrc, ORD(badparam), + 'LowReal.round: cannot round to a negative number of digits') ; RETURN x ELSE s := RealToFloatString(x, n) ; @@ -288,12 +288,12 @@ END currentMode ; PROCEDURE IsLowException () : BOOLEAN ; BEGIN - RETURN( IsExceptionalExecution() AND IsCurrentSource(except) ) + RETURN( IsExceptionalExecution () AND IsCurrentSource (exceptSrc) ) END IsLowException ; VAR - except: ExceptionSource ; + exceptSrc: ExceptionSource ; BEGIN - AllocateSource(except) + AllocateSource (exceptSrc) END LowReal. diff --git a/gcc/m2/gm2-libs-iso/LowShort.mod b/gcc/m2/gm2-libs-iso/LowShort.mod index 8531a88..62e4887 100644 --- a/gcc/m2/gm2-libs-iso/LowShort.mod +++ b/gcc/m2/gm2-libs-iso/LowShort.mod @@ -183,8 +183,8 @@ BEGIN IF n<0 THEN (* exception raised *) - RAISE(except, ORD(badparam), - 'LowLong.trunc: cannot truncate to a negative number of digits') ; + RAISE (exceptSrc, ORD(badparam), + 'LowLong.trunc: cannot truncate to a negative number of digits') ; RETURN x ELSE r := dtoa(x, maxsignificant, 100, point, sign) ; @@ -231,8 +231,8 @@ BEGIN IF n<0 THEN (* exception raised *) - RAISE(except, ORD(badparam), - 'LowLong.round: cannot round to a negative number of digits') ; + RAISE (exceptSrc, ORD(badparam), + 'LowLong.round: cannot round to a negative number of digits') ; RETURN x ELSE s := RealToFloatString(x, n) ; @@ -288,12 +288,12 @@ END currentMode ; PROCEDURE IsLowException () : BOOLEAN ; BEGIN - RETURN( IsExceptionalExecution() AND IsCurrentSource(except) ) + RETURN( IsExceptionalExecution () AND IsCurrentSource (exceptSrc) ) END IsLowException ; VAR - except: ExceptionSource ; + exceptSrc: ExceptionSource ; BEGIN - AllocateSource(except) + AllocateSource (exceptSrc) END LowShort. diff --git a/gcc/m2/gm2-libs-iso/Processes.mod b/gcc/m2/gm2-libs-iso/Processes.mod index 8ef22c0..b0c1b69 100644 --- a/gcc/m2/gm2-libs-iso/Processes.mod +++ b/gcc/m2/gm2-libs-iso/Processes.mod @@ -441,7 +441,7 @@ PROCEDURE Wait ; VAR calling, best : ProcessId ; - from : COROUTINE ; + fromCor: COROUTINE ; BEGIN IF debugging THEN @@ -451,17 +451,17 @@ BEGIN OnWaitingQueue (calling) ; best := chooseProcess () ; currentId := best ; - from := calling^.context ; + fromCor := calling^.context ; IF debugging THEN displayProcesses ("Wait about to perform IOTRANSFER") END ; - IOTRANSFER (from, currentId^.context) ; + IOTRANSFER (fromCor, currentId^.context) ; IF debugging THEN displayProcesses ("Wait after IOTRANSFER") END ; - currentId^.context := from ; + currentId^.context := fromCor ; currentId := calling ; OnReadyQueue (calling) ; IF debugging diff --git a/gcc/m2/gm2-libs-iso/RndFile.mod b/gcc/m2/gm2-libs-iso/RndFile.mod index e04cd8f..0a2264a 100644 --- a/gcc/m2/gm2-libs-iso/RndFile.mod +++ b/gcc/m2/gm2-libs-iso/RndFile.mod @@ -398,9 +398,9 @@ PROCEDURE EndPos (cid: ChanId): FilePos; position after which there have been no writes. *) VAR - d : DeviceTablePtr ; - end, - old: FilePos ; + d : DeviceTablePtr ; + endP, + old : FilePos ; BEGIN IF IsRndFile(cid) THEN @@ -410,9 +410,9 @@ BEGIN old := CurrentPos(cid) ; FIO.SetPositionFromEnd(RTio.GetFile(cid), 0) ; checkErrno(dev, d) ; - end := CurrentPos(cid) ; + endP := CurrentPos(cid) ; FIO.SetPositionFromBeginning(RTio.GetFile(cid), old) ; - RETURN( end ) + RETURN( endP ) END ELSE RAISEdevException(cid, did, IOChan.wrongDevice, diff --git a/gcc/m2/gm2-libs/SCmdArgs.mod b/gcc/m2/gm2-libs/SCmdArgs.mod index ed76fc4..8443d5f 100644 --- a/gcc/m2/gm2-libs/SCmdArgs.mod +++ b/gcc/m2/gm2-libs/SCmdArgs.mod @@ -132,26 +132,27 @@ PROCEDURE GetArg (CmdLine: String; VAR i : CARDINAL ; sn, - start, end: INTEGER ; + startPos, + endPos : INTEGER ; ch : CHAR ; BEGIN i := 0 ; - start := 0 ; - end := Length(CmdLine) ; + startPos := 0 ; + endPos := Length(CmdLine) ; WHILE i<n DO - start := skipWhite(CmdLine, start, end) ; - sn := skipNextArg(CmdLine, start, end) ; - IF sn<end + startPos := skipWhite(CmdLine, startPos, endPos) ; + sn := skipNextArg(CmdLine, startPos, endPos) ; + IF sn<endPos THEN - start := sn ; + startPos := sn ; INC(i) ELSE RETURN( FALSE ) END END ; - start := skipWhite(CmdLine, start, end) ; - sn := skipNextArg(CmdLine, start, end) ; - Argi := Slice(CmdLine, start, sn) ; + startPos := skipWhite(CmdLine, startPos, endPos) ; + sn := skipNextArg(CmdLine, startPos, endPos) ; + Argi := Slice(CmdLine, startPos, sn) ; RETURN( TRUE ) END GetArg ; @@ -165,17 +166,18 @@ PROCEDURE Narg (CmdLine: String) : CARDINAL ; VAR n : CARDINAL ; s, - start, end: INTEGER ; + startPos, + endPos : INTEGER ; BEGIN n := 0 ; - start := 0 ; - end := Length(CmdLine) ; + startPos := 0 ; + endPos := Length(CmdLine) ; LOOP - start := skipWhite(CmdLine, start, end) ; - s := skipNextArg(CmdLine, start, end) ; - IF s<end + startPos := skipWhite(CmdLine, startPos, endPos) ; + s := skipNextArg(CmdLine, startPos, endPos) ; + IF s<endPos THEN - start := s ; + startPos := s ; INC(n) ELSE RETURN( n ) diff --git a/gcc/match.pd b/gcc/match.pd index 4903552..82e6e29 100644 --- a/gcc/match.pd +++ b/gcc/match.pd @@ -3595,22 +3595,34 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) return (T)x; } while WT is uint128_t, T is uint8_t, uint16_t, uint32_t or uint64_t. */ - (convert@4 (min (widen_mult:c@3 (convert@5 (convert @0)) - (convert@6 (convert @1))) + (convert (min (widen_mult:c@3 (convert@4 (convert @0)) + (convert@5 (convert @1))) INTEGER_CST@2)) - (if (types_match (type, @0, @1) && types_match (type, @4)) + (if (types_match (type, @0, @1)) (with { unsigned prec = TYPE_PRECISION (type); unsigned widen_prec = TYPE_PRECISION (TREE_TYPE (@3)); + unsigned cvt4_prec = TYPE_PRECISION (TREE_TYPE (@4)); unsigned cvt5_prec = TYPE_PRECISION (TREE_TYPE (@5)); - unsigned cvt6_prec = TYPE_PRECISION (TREE_TYPE (@6)); wide_int c2 = wi::to_wide (@2); wide_int max = wi::mask (prec, false, widen_prec); bool c2_is_max_p = wi::eq_p (c2, max); - bool widen_mult_p = cvt5_prec == cvt6_prec && widen_prec == cvt6_prec * 2; + bool widen_mult_p = cvt4_prec == cvt5_prec && widen_prec == cvt5_prec * 2; } (if (widen_prec > prec && c2_is_max_p && widen_mult_p))))) + (match (unsigned_integer_sat_mul @0 @1) + (convert (min (mult:c@3 (convert @0) (convert @1)) INTEGER_CST@2)) + (if (types_match (type, @0, @1)) + (with + { + unsigned prec = TYPE_PRECISION (type); + unsigned widen_prec = TYPE_PRECISION (TREE_TYPE (@3)); + wide_int c2 = wi::to_wide (@2); + wide_int max = wi::mask (prec, false, widen_prec); + bool c2_is_max_p = wi::eq_p (c2, max); + } + (if (widen_prec > prec && c2_is_max_p))))) ) /* The boundary condition for case 10: IMM = 1: diff --git a/gcc/optc-save-gen.awk b/gcc/optc-save-gen.awk index a3d7e5a..31756ec 100644 --- a/gcc/optc-save-gen.awk +++ b/gcc/optc-save-gen.awk @@ -1313,6 +1313,12 @@ for (i = 0; i < n_opts; i++) { # offloading is enabled. if (flag_set_p("Target", flags[i])) var_target_opt[n_opt_val] = 1; + + # These options should not be passed from host to target, but + # are not actually target specific. + if (flag_set_p("NoOffload", flags[i])) + var_target_opt[n_opt_val] = 2; + n_opt_val++; } } @@ -1393,7 +1399,7 @@ for (i = 0; i < n_opt_val; i++) { # Do not stream out target-specific opts if offloading is # enabled. if (var_target_opt[i]) - print " if (!lto_stream_offload_p)" + print " if (!lto_stream_offload_p) {" # If applicable, encode the streamed value. if (var_opt_optimize_init[i]) { print " if (" var_opt_optimize_init[i] " > (" var_opt_val_type[i] ") 10)"; @@ -1403,6 +1409,8 @@ for (i = 0; i < n_opt_val; i++) { } else { print " bp_pack_var_len_" sgn " (bp, ptr->" name");"; } + if (var_target_opt[i]) + print "}" } } print " for (size_t i = 0; i < ARRAY_SIZE (ptr->explicit_mask); i++)"; @@ -1418,10 +1426,14 @@ print " struct cl_optimization *ptr ATTRIBUTE_UNUSED)" print "{"; for (i = 0; i < n_opt_val; i++) { name = var_opt_val[i] - if (var_target_opt[i]) { + if (var_target_opt[i] == 1) { print "#ifdef ACCEL_COMPILER" print "#error accel compiler cannot define Optimization attribute for target-specific option " name; print "#else" + } else if (var_target_opt[i] == 2) { + print "#ifdef ACCEL_COMPILER" + print " ptr->" name " = global_options." name ";" + print "#else" } otype = var_opt_val_type[i]; if (otype ~ "^const char \\**$") { @@ -1489,6 +1501,9 @@ for (i = 0; i < n_opts; i++) { if (flag_set_p("Warning", flags[i])) continue; + if (flag_set_p("NoOffload", flags[i])) + continue; + if (name in checked_options) continue; checked_options[name]++ diff --git a/gcc/output.h b/gcc/output.h index 0c329ff..51c2d36 100644 --- a/gcc/output.h +++ b/gcc/output.h @@ -545,6 +545,9 @@ extern GTY(()) section *bss_noswitch_section; extern GTY(()) section *in_section; extern GTY(()) bool in_cold_section_p; +/* MAX bit alignment for mergable sections. */ +#define MAX_ALIGN_MERGABLE 256 + extern section *get_unnamed_section (unsigned int, void (*) (const char *), const char *); extern section *get_section (const char *, unsigned int, tree, @@ -557,6 +560,9 @@ extern rtx get_section_anchor (struct object_block *, HOST_WIDE_INT, extern section *mergeable_constant_section (machine_mode, unsigned HOST_WIDE_INT, unsigned int); +extern section *mergeable_constant_section (unsigned HOST_WIDE_INT, + unsigned HOST_WIDE_INT, + unsigned int); extern section *function_section (tree); extern section *unlikely_text_section (void); extern section *current_function_section (void); diff --git a/gcc/params.opt b/gcc/params.opt index c7d5fd4..ac1b2c7 100644 --- a/gcc/params.opt +++ b/gcc/params.opt @@ -1226,7 +1226,7 @@ Common Joined UInteger Var(param_use_canonical_types) Init(1) IntegerRange(0, 1) Whether to use canonical types. -param=vect-epilogues-nomask= -Common Joined UInteger Var(param_vect_epilogues_nomask) Init(1) IntegerRange(0, 1) Param Optimization +Common Joined UInteger Var(param_vect_epilogues_nomask) Init(1) IntegerRange(0, 1) Param Optimization NoOffload Enable loop epilogue vectorization using smaller vector size. -param=vect-max-layout-candidates= @@ -1246,11 +1246,11 @@ Common Joined UInteger Var(param_vect_max_version_for_alignment_checks) Init(6) Bound on number of runtime checks inserted by the vectorizer's loop versioning for alignment check. -param=vect-partial-vector-usage= -Common Joined UInteger Var(param_vect_partial_vector_usage) Init(2) IntegerRange(0, 2) Param Optimization +Common Joined UInteger Var(param_vect_partial_vector_usage) Init(2) IntegerRange(0, 2) Param Optimization NoOffload Controls how loop vectorizer uses partial vectors. 0 means never, 1 means only for loops whose need to iterate can be removed, 2 means for all loops. The default value is 2. -param=vect-inner-loop-cost-factor= -Common Joined UInteger Var(param_vect_inner_loop_cost_factor) Init(50) IntegerRange(1, 10000) Param Optimization +Common Joined UInteger Var(param_vect_inner_loop_cost_factor) Init(50) IntegerRange(1, 10000) Param Optimization NoOffload The maximum factor which the loop vectorizer applies to the cost of statements in an inner loop relative to the loop being vectorized. -param=vect-induction-float= diff --git a/gcc/predict.cc b/gcc/predict.cc index 872f54d..5639d81 100644 --- a/gcc/predict.cc +++ b/gcc/predict.cc @@ -245,7 +245,10 @@ unlikely_executed_edge_p (edge e) { return (e->src->count == profile_count::zero () || e->probability == profile_probability::never ()) - || (e->flags & (EDGE_EH | EDGE_FAKE)); + || (e->flags & EDGE_FAKE) + /* If we read profile and know EH edge is executed, trust it. + Otherwise we consider EH edges never executed. */ + || ((e->flags & EDGE_EH) && !e->probability.reliable_p ()); } /* Return true if edge E of function FUN is probably never executed. */ @@ -830,6 +833,26 @@ unlikely_executed_stmt_p (gimple *stmt) { if (!is_gimple_call (stmt)) return false; + + /* Those calls are inserted by optimizers when code is known to be + unreachable or undefined. */ + if (gimple_call_builtin_p (stmt, BUILT_IN_UNREACHABLE) + || gimple_call_builtin_p (stmt, BUILT_IN_UNREACHABLE_TRAP) + || gimple_call_builtin_p (stmt, BUILT_IN_TRAP)) + return false; + + /* Checks below do not need to be fully reliable. Cold attribute may be + misplaced by user and in the presence of comdat we may result in call to + function with 0 profile having non-zero profile. + + We later detect that profile is lost and will drop the profile of the + comdat. + + So if we think profile count is reliable, do not try to apply these + heuristics. */ + if (gimple_bb (stmt)->count.reliable_p () + && gimple_bb (stmt)->count.nonzero_p ()) + return gimple_bb (stmt)->count == profile_count::zero (); /* NORETURN attribute alone is not strong enough: exit() may be quite likely executed once during program run. */ if (gimple_call_fntype (stmt) @@ -3269,7 +3292,8 @@ tree_estimate_probability (bool dry_run) calculate_dominance_info (CDI_POST_DOMINATORS); /* Decide which edges are known to be unlikely. This improves later branch prediction. */ - determine_unlikely_bbs (); + if (!dry_run) + determine_unlikely_bbs (); bb_predictions = new hash_map<const_basic_block, edge_prediction *>; ssa_expected_value = new hash_map<int_hash<unsigned, 0>, expected_value>; diff --git a/gcc/simplify-rtx.cc b/gcc/simplify-rtx.cc index cbe61b4..c723a07 100644 --- a/gcc/simplify-rtx.cc +++ b/gcc/simplify-rtx.cc @@ -8344,6 +8344,15 @@ simplify_context::simplify_subreg (machine_mode outermode, rtx op, return simplify_gen_binary (GET_CODE (op), outermode, op0, op1); } + /* Attempt to simplify WORD_MODE SUBREGs of unary bitwise expression. */ + if (outermode == word_mode && GET_CODE (op) == NOT + && SCALAR_INT_MODE_P (innermode)) + { + rtx op0 = simplify_subreg (outermode, XEXP (op, 0), innermode, byte); + if (op0) + return simplify_gen_unary (GET_CODE (op), outermode, op0, outermode); + } + scalar_int_mode int_outermode, int_innermode; if (is_a <scalar_int_mode> (outermode, &int_outermode) && is_a <scalar_int_mode> (innermode, &int_innermode) @@ -8394,9 +8403,45 @@ simplify_context::simplify_subreg (machine_mode outermode, rtx op, && VECTOR_MODE_P (innermode) && known_eq (GET_MODE_NUNITS (outermode), GET_MODE_NUNITS (innermode)) && known_eq (GET_MODE_UNIT_SIZE (outermode), - GET_MODE_UNIT_SIZE (innermode))) + GET_MODE_UNIT_SIZE (innermode))) return simplify_gen_relational (GET_CODE (op), outermode, innermode, XEXP (op, 0), XEXP (op, 1)); + + /* Distribute non-paradoxical subregs through logic ops in cases where one term + disappears. + + (subreg:M1 (and:M2 X C1)) -> (subreg:M1 X) + (subreg:M1 (ior:M2 X C1)) -> (subreg:M1 C1) + (subreg:M1 (xor:M2 X C1)) -> (subreg:M1 (not:M2 X)) + + if M2 is no smaller than M1 and (subreg:M1 C1) is all-ones. + + (subreg:M1 (and:M2 X C2)) -> (subreg:M1 C2) + (subreg:M1 (ior/xor:M2 X C2)) -> (subreg:M1 X) + + if M2 is no smaller than M1 and (subreg:M1 C2) is zero. */ + if (known_ge (innersize, outersize) + && GET_MODE_CLASS (outermode) == GET_MODE_CLASS (innermode) + && (GET_CODE (op) == AND || GET_CODE (op) == IOR || GET_CODE (op) == XOR) + && CONSTANT_P (XEXP (op, 1))) + { + rtx op1_subreg = simplify_subreg (outermode, XEXP (op, 1), innermode, byte); + if (op1_subreg == CONSTM1_RTX (outermode)) + { + if (GET_CODE (op) == IOR) + return op1_subreg; + rtx op0 = XEXP (op, 0); + if (GET_CODE (op) == XOR) + op0 = simplify_gen_unary (NOT, innermode, op0, innermode); + return simplify_gen_subreg (outermode, op0, innermode, byte); + } + + if (op1_subreg == CONST0_RTX (outermode)) + return (GET_CODE (op) == AND + ? op1_subreg + : simplify_gen_subreg (outermode, XEXP (op, 0), innermode, byte)); + } + return NULL_RTX; } @@ -8668,6 +8713,43 @@ test_scalar_int_ext_ops (machine_mode bmode, machine_mode smode) lowpart_subreg (bmode, sreg, smode), bmode), sreg); + + /* Test extensions, followed by logic ops, followed by truncations. */ + rtx bsubreg = lowpart_subreg (bmode, sreg, smode); + rtx smask = gen_int_mode (GET_MODE_MASK (smode), bmode); + rtx inv_smask = gen_int_mode (~GET_MODE_MASK (smode), bmode); + ASSERT_RTX_EQ (lowpart_subreg (smode, + simplify_gen_binary (AND, bmode, + bsubreg, smask), + bmode), + sreg); + ASSERT_RTX_EQ (lowpart_subreg (smode, + simplify_gen_binary (AND, bmode, + bsubreg, inv_smask), + bmode), + const0_rtx); + ASSERT_RTX_EQ (lowpart_subreg (smode, + simplify_gen_binary (IOR, bmode, + bsubreg, smask), + bmode), + constm1_rtx); + ASSERT_RTX_EQ (lowpart_subreg (smode, + simplify_gen_binary (IOR, bmode, + bsubreg, inv_smask), + bmode), + sreg); + ASSERT_RTX_EQ (lowpart_subreg (smode, + simplify_gen_binary (XOR, bmode, + bsubreg, smask), + bmode), + lowpart_subreg (smode, + gen_rtx_NOT (bmode, bsubreg), + bmode)); + ASSERT_RTX_EQ (lowpart_subreg (smode, + simplify_gen_binary (XOR, bmode, + bsubreg, inv_smask), + bmode), + sreg); } /* Verify more simplifications of integer extension/truncation. diff --git a/gcc/symtab.cc b/gcc/symtab.cc index 652f66a..20dfe09 100644 --- a/gcc/symtab.cc +++ b/gcc/symtab.cc @@ -303,6 +303,11 @@ symbol_table::change_decl_assembler_name (tree decl, tree name) warning (0, "%qD renamed after being referenced in assembly", decl); SET_DECL_ASSEMBLER_NAME (decl, name); + if (DECL_RTL_SET_P (decl)) + { + SET_DECL_RTL (decl, NULL); + make_decl_rtl (decl); + } if (alias) { gcc_assert (!IDENTIFIER_INTERNAL_P (name)); diff --git a/gcc/target.def b/gcc/target.def index 427dc40..5dd8f25 100644 --- a/gcc/target.def +++ b/gcc/target.def @@ -2060,6 +2060,20 @@ all zeros. GCC can then try to branch around the instruction instead.", (unsigned ifn), default_empty_mask_is_expensive) +/* Prefer gather/scatter loads/stores to e.g. elementwise accesses if\n\ +we cannot use a contiguous access. */ +DEFHOOK +(prefer_gather_scatter, + "This hook returns TRUE if gather loads or scatter stores are cheaper on\n\ +this target than a sequence of elementwise loads or stores. The @var{mode}\n\ +and @var{scale} correspond to the @code{gather_load} and\n\ +@code{scatter_store} instruction patterns. The @var{group_size} is the\n\ +number of scalar elements in each scalar loop iteration that are to be\n\ +combined into the vector.", + bool, + (machine_mode mode, int scale, unsigned int group_size), + hook_bool_mode_int_unsigned_false) + /* Target builtin that implements vector gather operation. */ DEFHOOK (builtin_gather, diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 6d62009..0280d3b 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,150 @@ +2025-07-29 Andrew Pinski <quic_apinski@quicinc.com> + + PR testsuite/121215 + * lib/profopt.exp (profopt-execute): Call cleanup-after-saved-dg-test + if returning early for the -fauto-profile case failing case. + +2025-07-29 Spencer Abson <spencer.abson@arm.com> + + * g++.target/aarch64/sve/unpacked_cond_binary_bf16_2.C: New test. + * gcc.target/aarch64/sve/unpacked_cond_builtin_fmax_2.c: Likewise. + * gcc.target/aarch64/sve/unpacked_cond_builtin_fmin_2.c: Likewise. + * gcc.target/aarch64/sve/unpacked_cond_fadd_2.c: Likewise. + * gcc.target/aarch64/sve/unpacked_cond_fdiv_2.c: Likewise. + * gcc.target/aarch64/sve/unpacked_cond_fmaxnm_2.c: Likewise. + * gcc.target/aarch64/sve/unpacked_cond_fminnm_2.c: Likewise. + * gcc.target/aarch64/sve/unpacked_cond_fmul_2.c: Likewise. + * gcc.target/aarch64/sve/unpacked_cond_fsubr_2.c: Likewise. + +2025-07-29 H.J. Lu <hjl.tools@gmail.com> + + PR target/121208 + * gcc.target/i386/pr121208-1a.c (dg-options): Add -mno-80387. + * gcc.target/i386/pr121208-1b.c (dg-options): Likewise. + +2025-07-29 Juergen Christ <jchrist@linux.ibm.com> + + PR testsuite/121286 + PR testsuite/121288 + * gcc.dg/vect/pr112325.c: Adjust parameters for s390. + * gcc.dg/vect/pr117888-1.c: Ditto. + +2025-07-29 Richard Sandiford <richard.sandiford@arm.com> + + * gcc.target/aarch64/saturating_arithmetic_1.c: Allow w0 and w1 + to be duplicated in either order. + * gcc.target/aarch64/saturating_arithmetic_2.c: Likewise. + +2025-07-29 Richard Sandiford <richard.sandiford@arm.com> + + * gcc.target/aarch64/cmpbr.c: Support both operand orders + for 8-bit and 16-bit comparisons. + +2025-07-29 Konstantinos Eleftheriou <konstantinos.eleftheriou@vrull.eu> + + PR rtl-optimization/120660 + * gcc.dg/pr120660.c: New test. + +2025-07-29 Konstantinos Eleftheriou <konstantinos.eleftheriou@vrull.eu> + + PR rtl-optimization/119795 + * gcc.target/i386/pr119795.c: New test. + +2025-07-29 Pan Li <pan2.li@intel.com> + + * gcc.target/riscv/sat/sat_u_mul-run-1-u16-from-u64.c: Add rv64 + target for run. + * gcc.target/riscv/sat/sat_u_mul-run-1-u32-from-u64.c: Ditto. + * gcc.target/riscv/sat/sat_u_mul-run-1-u8-from-u64.c: Ditto. + * gcc.target/riscv/sat/sat_u_mul-1-u16-from-u32.c: New test. + * gcc.target/riscv/sat/sat_u_mul-1-u8-from-u16.c: New test. + * gcc.target/riscv/sat/sat_u_mul-1-u8-from-u32.c: New test. + * gcc.target/riscv/sat/sat_u_mul-2-u16-from-u64.c: New test. + * gcc.target/riscv/sat/sat_u_mul-2-u32-from-u64.c: New test. + * gcc.target/riscv/sat/sat_u_mul-2-u8-from-u64.c: New test. + * gcc.target/riscv/sat/sat_u_mul-run-1-u16-from-u32.c: New test. + * gcc.target/riscv/sat/sat_u_mul-run-1-u8-from-u16.c: New test. + * gcc.target/riscv/sat/sat_u_mul-run-1-u8-from-u32.c: New test. + +2025-07-29 Richard Biener <rguenther@suse.de> + + PR tree-optimization/120687 + * gcc.dg/vect/pr120687-3.c: New testcase. + +2025-07-29 Nathaniel Shead <nathanieloshead@gmail.com> + + PR testsuite/121285 + * g++.dg/modules/class-11_a.H: Make static_asserts valid for + C++14. + +2025-07-29 Richard Biener <rguenther@suse.de> + + PR tree-optimization/120687 + * gcc.dg/vect/pr120687-1.c: New testcase. + * gcc.dg/vect/pr120687-2.c: Likewise. + +2025-07-29 Gaius Mulley <gaiusmod2@gmail.com> + + PR modula2/121289 + * gm2/warnings/style/fail/badvarname.mod: New test. + * gm2/warnings/style/fail/warnings-style-fail.exp: New test. + +2025-07-29 Christophe Lyon <christophe.lyon@linaro.org> + + * gcc.dg/pr116906-1.c: Add 'dg-do run'. + * gcc.dg/pr116906-2.c: Likewise. + * gcc.dg/pr78185.c: Likewise. + +2025-07-29 Jakub Jelinek <jakub@redhat.com> + + PR middle-end/121159 + * c-c++-common/pr121159.c: New test. + * gcc.dg/plugin/must-tail-call-2.c (test_5): Don't expect an error. + +2025-07-29 Andrew Pinski <quic_apinski@quicinc.com> + + PR middle-end/120523 + * gcc.dg/tree-ssa/cswtch-7.c: New test. + +2025-07-28 Andrew Pinski <quic_apinski@quicinc.com> + + PR tree-optimization/121236 + * gcc.dg/torture/pr121236-1.c: New test. + +2025-07-28 H.J. Lu <hjl.tools@gmail.com> + + PR target/121208 + * gcc.target/i386/pr121208-1a.c: New test. + * gcc.target/i386/pr121208-1b.c: Likewise. + * gcc.target/i386/pr121208-2a.c: Likewise. + * gcc.target/i386/pr121208-2b.c: Likewise. + * gcc.target/i386/pr121208-3a.c: Likewise. + * gcc.target/i386/pr121208-3b.c: Likewise. + +2025-07-28 Thomas Schwinge <tschwinge@baylibre.com> + + * gcc.target/nvptx/march-map=sm_100.c: New. + * gcc.target/nvptx/march-map=sm_100a.c: Likewise. + * gcc.target/nvptx/march-map=sm_100f.c: Likewise. + * gcc.target/nvptx/march-map=sm_101.c: Likewise. + * gcc.target/nvptx/march-map=sm_101a.c: Likewise. + * gcc.target/nvptx/march-map=sm_101f.c: Likewise. + * gcc.target/nvptx/march-map=sm_103.c: Likewise. + * gcc.target/nvptx/march-map=sm_103a.c: Likewise. + * gcc.target/nvptx/march-map=sm_103f.c: Likewise. + * gcc.target/nvptx/march-map=sm_120.c: Likewise. + * gcc.target/nvptx/march-map=sm_120a.c: Likewise. + * gcc.target/nvptx/march-map=sm_120f.c: Likewise. + * gcc.target/nvptx/march-map=sm_121.c: Likewise. + * gcc.target/nvptx/march-map=sm_121a.c: Likewise. + * gcc.target/nvptx/march-map=sm_121f.c: Likewise. + +2025-07-28 Richard Biener <rguenther@suse.de> + + PR tree-optimization/121256 + * gcc.dg/vect/vect-recurr-pr121256.c: New testcase. + * gcc.dg/vect/vect-recurr-pr121256-2.c: Likewise. + 2025-07-27 Mikael Morin <mikael@gcc.gnu.org> PR fortran/121185 diff --git a/gcc/testsuite/c-c++-common/pr121159.c b/gcc/testsuite/c-c++-common/pr121159.c new file mode 100644 index 0000000..c8c5d67 --- /dev/null +++ b/gcc/testsuite/c-c++-common/pr121159.c @@ -0,0 +1,17 @@ +/* PR middle-end/121159 */ +/* { dg-do compile { target musttail } } */ +/* { dg-options "-O2 -fdump-tree-optimized" } */ +/* { dg-final { scan-tree-dump-times "foo \\\(\[^\n\r]*\\\); \\\[tail call\\\] \\\[must tail call\\\]" 1 "optimized" } } */ + +[[noreturn, gnu::noipa]] void +foo (void) +{ + for (;;) + ; +} + +void +bar (void) +{ + [[gnu::musttail]] return foo (); +} diff --git a/gcc/testsuite/g++.dg/cpp/if-comma-1.C b/gcc/testsuite/g++.dg/cpp/if-comma-1.C new file mode 100644 index 0000000..0daaff9 --- /dev/null +++ b/gcc/testsuite/g++.dg/cpp/if-comma-1.C @@ -0,0 +1,42 @@ +// PR c++/120778 +// { dg-do preprocess } +// { dg-options "-pedantic-errors" } + +#if (1, 2) +#define M1 1 +#else +#error +#endif +#if 1 ? 2, 3 : 4 +#define M2 2 +#else +#error +#endif +#if 0 ? 2, 0 : 1 +#define M3 3 +#else +#error +#endif +#if 0 || (1, 2) +#define M4 4 +#else +#error +#endif +#if 1 || (1, 2) +#define M5 5 +#else +#error +#endif +#if (1, 2) && 1 +#define M6 6 +#else +#error +#endif +#if 1 && (1, 2) +#define M7 7 +#else +#error +#endif +#if M1 + M2 + M3 + M4 + M5 + M6 + M7 != 28 +#error +#endif diff --git a/gcc/testsuite/g++.dg/cpp1z/nontype8.C b/gcc/testsuite/g++.dg/cpp1z/nontype8.C new file mode 100644 index 0000000..b81e85b --- /dev/null +++ b/gcc/testsuite/g++.dg/cpp1z/nontype8.C @@ -0,0 +1,12 @@ +// Test that the diagnostic mentions lack of constexpr +// { dg-do compile { target c++17 } } + +template <auto f> void g() {} +void x() +{ + using fp = void (*)(); + fp f = nullptr; // { dg-message "constexpr" } + g<f>(); // { dg-error "" } + int *p = nullptr; // { dg-message "constexpr" } + g<p>(); // { dg-error "" } +} diff --git a/gcc/testsuite/g++.dg/modules/class-11_a.H b/gcc/testsuite/g++.dg/modules/class-11_a.H index f7bbf9d..799dbdd 100644 --- a/gcc/testsuite/g++.dg/modules/class-11_a.H +++ b/gcc/testsuite/g++.dg/modules/class-11_a.H @@ -20,7 +20,7 @@ struct pr106381 { struct L1 : pr106381 { char x; // { dg-warning "offset" "" { target c++14 } } }; -static_assert(sizeof(L1) == sizeof(pr106381)); +static_assert(sizeof(L1) == sizeof(pr106381), ""); struct pr120012 { @@ -33,4 +33,4 @@ struct pr120012 { struct L2 : pr120012 { unsigned char y; // { dg-warning "offset" "" { target c++20 } } }; -static_assert(sizeof(L2) > sizeof(pr120012)); +static_assert(sizeof(L2) > sizeof(pr120012), ""); diff --git a/gcc/testsuite/g++.dg/tc1/dr49.C b/gcc/testsuite/g++.dg/tc1/dr49.C index 753d96b..6ddea6b 100644 --- a/gcc/testsuite/g++.dg/tc1/dr49.C +++ b/gcc/testsuite/g++.dg/tc1/dr49.C @@ -10,8 +10,8 @@ template struct R<&p>; // OK template struct S<&p>; // OK due to parameter adjustment int *ptr; -template struct R<ptr>; // { dg-error "argument" } -template struct S<ptr>; // { dg-error "argument" } +template struct R<ptr>; // { dg-error "template argument|constant expression" } +template struct S<ptr>; // { dg-error "template argument|constant expression" } int v[5]; template struct R<v>; // OK due to implicit argument conversion diff --git a/gcc/testsuite/g++.dg/template/func2.C b/gcc/testsuite/g++.dg/template/func2.C index 0116f23..360f430 100644 --- a/gcc/testsuite/g++.dg/template/func2.C +++ b/gcc/testsuite/g++.dg/template/func2.C @@ -4,8 +4,7 @@ typedef void (*fptr)(); fptr zeroptr = 0; template<typename T, fptr F> struct foo { }; template<typename T> struct foo<T,zeroptr> { }; -// { dg-error "not a valid template argument" "not valid" { target *-*-* } .-1 } -// { dg-message "must be the address" "must be the address " { target *-*-* } .-2 } +// { dg-error "template argument|constant expression" "not valid" { target *-*-* } .-1 } // The rest is needed to trigger the ICE in 4.0 to 4.3: void f() { } diff --git a/gcc/testsuite/g++.dg/tree-prof/eh1.C b/gcc/testsuite/g++.dg/tree-prof/eh1.C new file mode 100644 index 0000000..10a3596 --- /dev/null +++ b/gcc/testsuite/g++.dg/tree-prof/eh1.C @@ -0,0 +1,34 @@ +/* { dg-options "-O3 -fdump-ipa-profile-details -fno-inline -fdump-tree-fixup_cfg3-details -fdump-tree-optimized-details" } */ +char a[10000]; +char b[10000]; +int sz = 1000; + +__attribute__((noipa)) + void test2 () +{ + throw (sz); +} +void +test () +{ + try + { + test2 (); + } + catch (int v) + { + __builtin_memcpy (b, a, v); + } +} +int +main () +{ + for (int i = 0; i < 100000; i++) + test (); +} +/* { dg-final-use-not-autofdo { scan-ipa-dump-times "Average value sum:100000000" 2 "profile" } } */ +/* 1 zero count for resx block. */ +/* { dg-final-use-not-autofdo { scan-tree-dump-times "count: 0" 1 "fixup_cfg3" } } */ +/* 2 zero count for resx block and return block since return gets duplicated by tracer. */ +/* { dg-final-use-not-autofdo { scan-tree-dump-times "count: 0" 2 "optimized" } } */ +/* { dg-final-use-not-autofdo { scan-tree-dump-times "Average value sum:100000000" 1 "optimized" } } */ diff --git a/gcc/testsuite/g++.dg/warn/pr121133-1.C b/gcc/testsuite/g++.dg/warn/pr121133-1.C new file mode 100644 index 0000000..6d6e13b --- /dev/null +++ b/gcc/testsuite/g++.dg/warn/pr121133-1.C @@ -0,0 +1,16 @@ +// PR c++/121133 +// { dg-do compile } +// { dg-options "-std=c++98 -Wno-long-long -pedantic-errors" } + +__extension__ typedef long long L; +__extension__ long long a; +struct S { + __extension__ long long b; +}; + +void +foo () +{ + __extension__ long long c; + c = c + (__extension__ (long long) 1); +} diff --git a/gcc/testsuite/g++.dg/warn/pr121133-2.C b/gcc/testsuite/g++.dg/warn/pr121133-2.C new file mode 100644 index 0000000..cd97a76 --- /dev/null +++ b/gcc/testsuite/g++.dg/warn/pr121133-2.C @@ -0,0 +1,5 @@ +// PR c++/121133 +// { dg-do compile } +// { dg-options "-std=c++98 -pedantic-errors" } + +#include "pr121133-1.C" diff --git a/gcc/testsuite/g++.dg/warn/pr121133-3.C b/gcc/testsuite/g++.dg/warn/pr121133-3.C new file mode 100644 index 0000000..9ffd407 --- /dev/null +++ b/gcc/testsuite/g++.dg/warn/pr121133-3.C @@ -0,0 +1,5 @@ +// PR c++/121133 +// { dg-do compile { target c++11 } } +// { dg-options "-pedantic-errors" } + +#include "pr121133-1.C" diff --git a/gcc/testsuite/g++.dg/warn/pr121133-4.C b/gcc/testsuite/g++.dg/warn/pr121133-4.C new file mode 100644 index 0000000..76885ba --- /dev/null +++ b/gcc/testsuite/g++.dg/warn/pr121133-4.C @@ -0,0 +1,5 @@ +// PR c++/121133 +// { dg-do compile { target c++11 } } +// { dg-options "-pedantic-errors -Wlong-long" } + +#include "pr121133-1.C" diff --git a/gcc/testsuite/g++.target/aarch64/sve/unpacked_cond_binary_bf16_2.C b/gcc/testsuite/g++.target/aarch64/sve/unpacked_cond_binary_bf16_2.C new file mode 100644 index 0000000..02880ef --- /dev/null +++ b/gcc/testsuite/g++.target/aarch64/sve/unpacked_cond_binary_bf16_2.C @@ -0,0 +1,18 @@ +/* { dg-do compile }*/ +/* { dg-options "-O -ffinite-math-only -fno-signed-zeros -msve-vector-bits=2048 " } */ + +#include "unpacked_cond_binary_bf16_1.C" + +/* { dg-final { scan-assembler-times {\tptrue\tp[0-9]+\.s} 15 } } */ +/* { dg-final { scan-assembler-times {\tptrue\tp[0-9]+\.d} 15 } } */ +/* { dg-final { scan-assembler-times {\tand} 30 } } */ + +/* { dg-final { scan-assembler-times {\tbfadd\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 6 } } */ +/* { dg-final { scan-assembler-times {\tbfsub\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 6 } } */ +/* { dg-final { scan-assembler-times {\tbfmul\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 6 } } */ + +/* { dg-final { scan-assembler-times {\tbfminnm\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 6 } } */ +/* { dg-final { scan-assembler-times {\tbfmaxnm\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 6 } } */ + +// There's no BFSUBR. +/* { dg-final { scan-assembler-times {\tsel\t} 2 } } */ diff --git a/gcc/testsuite/g++.target/aarch64/sve/unpacked_cond_ternary_bf16_1.C b/gcc/testsuite/g++.target/aarch64/sve/unpacked_cond_ternary_bf16_1.C new file mode 100644 index 0000000..95cd698 --- /dev/null +++ b/gcc/testsuite/g++.target/aarch64/sve/unpacked_cond_ternary_bf16_1.C @@ -0,0 +1,35 @@ +/* { dg-do compile }*/ +/* { dg-options "-O2 -fno-trapping-math -msve-vector-bits=2048 " } */ + +#include <stdint.h> +#pragma GCC target "arch=armv9-a+sve-b16b16" + +#define COND_BFMLA(TYPE, PRED_TYPE, MERGE) \ + TYPE test_bfmla_##TYPE##_##MERGE (TYPE a, TYPE b, TYPE c, PRED_TYPE p) \ + {return p ? a * b + c : MERGE; } + +#define COND_BFMLS(TYPE, PRED_TYPE, MERGE) \ + TYPE test_bfmls_##TYPE##_##MERGE (TYPE a, TYPE b, TYPE c, PRED_TYPE p) \ + {return p ? a * -b + c : MERGE; } + +#define TEST_OP(TYPE, PRED_TYPE, T) \ + T (TYPE, PRED_TYPE, c) \ + T (TYPE, PRED_TYPE, 0) + +#define TEST(TYPE, PTYPE, SIZE) \ + typedef TYPE TYPE##SIZE __attribute__ ((vector_size (SIZE))); \ + typedef PTYPE PTYPE##SIZE __attribute__ ((vector_size (SIZE))); \ + TEST_OP (TYPE##SIZE, PTYPE##SIZE, COND_BFMLA) \ + TEST_OP (TYPE##SIZE, PTYPE##SIZE, COND_BFMLS) + +TEST (__bf16, uint16_t, 128) + +TEST (__bf16, uint16_t, 64) + +/* { dg-final { scan-assembler-times {\tptrue} 8 } } */ + +/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.h, p[0-7]/z, z[0-9]+\.h\n} 4 } } */ +/* { dg-final { scan-assembler-times {\tbfmla\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 4 } } */ +/* { dg-final { scan-assembler-times {\tbfmls\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 4 } } */ + +/* { dg-final { scan-assembler-not {\tsel\t} } } */ diff --git a/gcc/testsuite/g++.target/aarch64/sve/unpacked_cond_ternary_bf16_2.C b/gcc/testsuite/g++.target/aarch64/sve/unpacked_cond_ternary_bf16_2.C new file mode 100644 index 0000000..c0d7c50 --- /dev/null +++ b/gcc/testsuite/g++.target/aarch64/sve/unpacked_cond_ternary_bf16_2.C @@ -0,0 +1,14 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -msve-vector-bits=2048" } */ + +#include "unpacked_cond_ternary_bf16_1.C" + +/* { dg-final { scan-assembler-times {\tptrue\tp[0-9]+\.s} 4 } } */ +/* { dg-final { scan-assembler-times {\tptrue\tp[0-9]+\.d} 4 } } */ +/* { dg-final { scan-assembler-times {\tand} 8 } } */ + +/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.h, p[0-7]/z, z[0-9]+\.h\n} 4 } } */ +/* { dg-final { scan-assembler-times {\tbfmla\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 4 } } */ +/* { dg-final { scan-assembler-times {\tbfmls\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 4 } } */ + +/* { dg-final { scan-assembler-not {\tsel\t} } } */ diff --git a/gcc/testsuite/g++.target/aarch64/sve/unpacked_ternary_bf16_1.C b/gcc/testsuite/g++.target/aarch64/sve/unpacked_ternary_bf16_1.C new file mode 100644 index 0000000..19bfe95 --- /dev/null +++ b/gcc/testsuite/g++.target/aarch64/sve/unpacked_ternary_bf16_1.C @@ -0,0 +1,27 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -msve-vector-bits=2048" } */ + +#define BFMLA(TYPE) \ + TYPE test_bfmla_##TYPE (TYPE a, TYPE b, TYPE c) \ + { return a * b + c; } + +#define BFMLS(TYPE) \ + TYPE test_bfmls_##TYPE (TYPE a, TYPE b, TYPE c) \ + { return a * -b + c; } + +#define TEST_TYPE(TYPE, SIZE) \ + typedef TYPE TYPE##SIZE __attribute__((vector_size(SIZE))); \ + BFMLA (TYPE##SIZE) \ + BFMLS (TYPE##SIZE) + +#pragma GCC target "arch=armv9-a+sve-b16b16" + +TEST_TYPE (__bf16, 128) + +TEST_TYPE (__bf16, 64) + +/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.s} 2 } } */ +/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.d} 2 } } */ + +/* { dg-final { scan-assembler-times {\tbfmla\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tbfmls\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */ diff --git a/gcc/testsuite/g++.target/aarch64/sve/unpacked_ternary_bf16_2.C b/gcc/testsuite/g++.target/aarch64/sve/unpacked_ternary_bf16_2.C new file mode 100644 index 0000000..ef37400 --- /dev/null +++ b/gcc/testsuite/g++.target/aarch64/sve/unpacked_ternary_bf16_2.C @@ -0,0 +1,11 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -msve-vector-bits=2048 -fno-trapping-math" } */ + +#include "unpacked_ternary_bf16_1.C" + +/* { dg-final { scan-assembler-not {\tptrue\tp[0-7]\.s} } } */ +/* { dg-final { scan-assembler-not {\tptrue\tp[0-7]\.d} } } */ +/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.b} 4 } } */ + +/* { dg-final { scan-assembler-times {\tbfmla\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tbfmls\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */ diff --git a/gcc/testsuite/gcc.dg/plugin/must-tail-call-2.c b/gcc/testsuite/gcc.dg/plugin/must-tail-call-2.c index d51d15c..6f65f4a 100644 --- a/gcc/testsuite/gcc.dg/plugin/must-tail-call-2.c +++ b/gcc/testsuite/gcc.dg/plugin/must-tail-call-2.c @@ -55,5 +55,5 @@ volatile fn_ptr_t fn_ptr; void test_5 (void) { - fn_ptr (); /* { dg-error "cannot tail-call: " } */ + fn_ptr (); } diff --git a/gcc/testsuite/gcc.dg/pr116906-1.c b/gcc/testsuite/gcc.dg/pr116906-1.c index 7187507..ee60ad6 100644 --- a/gcc/testsuite/gcc.dg/pr116906-1.c +++ b/gcc/testsuite/gcc.dg/pr116906-1.c @@ -1,3 +1,4 @@ +/* { dg-do run } */ /* { dg-require-effective-target alarm } */ /* { dg-require-effective-target signal } */ /* { dg-options "-O2" } */ diff --git a/gcc/testsuite/gcc.dg/pr116906-2.c b/gcc/testsuite/gcc.dg/pr116906-2.c index 41a352b..4172ec3 100644 --- a/gcc/testsuite/gcc.dg/pr116906-2.c +++ b/gcc/testsuite/gcc.dg/pr116906-2.c @@ -1,3 +1,4 @@ +/* { dg-do run } */ /* { dg-require-effective-target alarm } */ /* { dg-require-effective-target signal } */ /* { dg-options "-O2 -fno-tree-ch" } */ diff --git a/gcc/testsuite/gcc.dg/pr120660.c b/gcc/testsuite/gcc.dg/pr120660.c new file mode 100644 index 0000000..6e8c5e8 --- /dev/null +++ b/gcc/testsuite/gcc.dg/pr120660.c @@ -0,0 +1,19 @@ +/* { dg-do run } */ +/* { dg-options "-O -favoid-store-forwarding" } */ + +int c; + +short +foo (short s) +{ + __builtin_memset (&s, c, 1); + return s; +} + +int +main () +{ + short x = foo (0x1111); + if (x != 0x1100 && x != 0x0011) + __builtin_abort(); +} diff --git a/gcc/testsuite/gcc.dg/pr78185.c b/gcc/testsuite/gcc.dg/pr78185.c index ada8b1b..4c3af4f 100644 --- a/gcc/testsuite/gcc.dg/pr78185.c +++ b/gcc/testsuite/gcc.dg/pr78185.c @@ -1,3 +1,4 @@ +/* { dg-do run } */ /* { dg-require-effective-target alarm } */ /* { dg-require-effective-target signal } */ /* { dg-options "-O" } */ diff --git a/gcc/testsuite/gcc.dg/torture/pr121295-1.c b/gcc/testsuite/gcc.dg/torture/pr121295-1.c new file mode 100644 index 0000000..7825c6e --- /dev/null +++ b/gcc/testsuite/gcc.dg/torture/pr121295-1.c @@ -0,0 +1,13 @@ +/* { dg-do compile } */ +/* { dg-additional-options " -fno-tree-copy-prop -fno-tree-pre -fno-code-hoisting" */ + +/* PR tree-optimization/121295 */ + + +int a, b, c; +int main() { + int *d = &a; + while (b) + b = (*d &= 10) <= 0 || (*d = c); + return 0; +} diff --git a/gcc/testsuite/gcc.dg/tree-ssa/cswtch-7.c b/gcc/testsuite/gcc.dg/tree-ssa/cswtch-7.c new file mode 100644 index 0000000..7b797807 --- /dev/null +++ b/gcc/testsuite/gcc.dg/tree-ssa/cswtch-7.c @@ -0,0 +1,48 @@ +/* PR tree-optimization/120523 */ +/* PR tree-optimization/120451 */ +/* { dg-do compile { target elf } } */ +/* { dg-options "-O2" } */ + +void foo (int, int); + +__attribute__((noinline, noclone)) void +f1 (int v, int w) +{ + int i, j; + if (w) + { + i = 129; + j = i - 1; + goto lab; + } + switch (v) + { + case 170: + j = 7; + i = 27; + break; + case 171: + i = 8; + j = 122; + break; + case 172: + i = 21; + j = -19; + break; + case 173: + i = 18; + j = 17; + break; + case 174: + i = 33; + j = 55; + break; + default: + __builtin_abort (); + } + + lab: + foo (i, j); +} + +/* { dg-final { scan-assembler ".rodata.cst32" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/pr112325.c b/gcc/testsuite/gcc.dg/vect/pr112325.c index 8689fbf..d380595 100644 --- a/gcc/testsuite/gcc.dg/vect/pr112325.c +++ b/gcc/testsuite/gcc.dg/vect/pr112325.c @@ -5,6 +5,7 @@ /* { dg-additional-options "-mavx2" { target x86_64-*-* i?86-*-* } } */ /* { dg-additional-options "--param max-completely-peeled-insns=200" { target powerpc64*-*-* } } */ /* { dg-additional-options "-mlsx" { target loongarch64-*-* } } */ +/* { dg-additional-options "--param max-completely-peeled-insns=200 --param min-vect-loop-bound=0" { target s390*-*-* } } */ typedef unsigned short ggml_fp16_t; static float table_f32_f16[1 << 16]; diff --git a/gcc/testsuite/gcc.dg/vect/pr117888-1.c b/gcc/testsuite/gcc.dg/vect/pr117888-1.c index 0b31fcd..884aed2 100644 --- a/gcc/testsuite/gcc.dg/vect/pr117888-1.c +++ b/gcc/testsuite/gcc.dg/vect/pr117888-1.c @@ -5,6 +5,7 @@ /* { dg-additional-options "-mavx2" { target x86_64-*-* i?86-*-* } } */ /* { dg-additional-options "--param max-completely-peeled-insns=200" { target powerpc64*-*-* } } */ /* { dg-additional-options "-mlsx" { target loongarch64-*-* } } */ +/* { dg-additional-options "--param max-completely-peeled-insns=200 --param min-vect-loop-bound=0" { target s390*-*-* } } */ typedef unsigned short ggml_fp16_t; static float table_f32_f16[1 << 16]; diff --git a/gcc/testsuite/gcc.dg/vect/pr120687-1.c b/gcc/testsuite/gcc.dg/vect/pr120687-1.c new file mode 100644 index 0000000..ce9cf63 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/pr120687-1.c @@ -0,0 +1,16 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target vect_int } */ + +unsigned +frd (unsigned *p, unsigned *lastone) +{ + unsigned sum = 0; + for (; p <= lastone; p += 16) + sum += p[0] + p[1] + p[2] + p[3] + p[4] + p[5] + p[6] + p[7] + + p[8] + p[9] + p[10] + p[11] + p[12] + p[13] + p[14] + p[15]; + return sum; +} + +/* { dg-final { scan-tree-dump "reduction: detected reduction chain" "vect" } } */ +/* { dg-final { scan-tree-dump-not "SLP discovery of reduction chain failed" "vect" } } */ +/* { dg-final { scan-tree-dump "optimized: loop vectorized" "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/pr120687-2.c b/gcc/testsuite/gcc.dg/vect/pr120687-2.c new file mode 100644 index 0000000..dfc6dc7 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/pr120687-2.c @@ -0,0 +1,17 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target vect_float } */ +/* { dg-additional-options "-ffast-math" } */ + +float +frd (float *p, float *lastone) +{ + float sum = 0; + for (; p <= lastone; p += 16) + sum += p[0] + p[1] + p[2] + p[3] + p[4] + p[5] + p[6] + p[7] + + p[8] + p[9] + p[10] + p[11] + p[12] + p[13] + p[14] + p[15]; + return sum; +} + +/* { dg-final { scan-tree-dump "reduction: detected reduction chain" "vect" } } */ +/* { dg-final { scan-tree-dump-not "SLP discovery of reduction chain failed" "vect" } } */ +/* { dg-final { scan-tree-dump "optimized: loop vectorized" "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/pr120687-3.c b/gcc/testsuite/gcc.dg/vect/pr120687-3.c new file mode 100644 index 0000000..f20a66a --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/pr120687-3.c @@ -0,0 +1,16 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target vect_double } */ +/* { dg-additional-options "-ffast-math" } */ + +float +frd (float *p, float *lastone) +{ + float sum = 0; + for (; p <= lastone; p += 2) + sum += p[0] + p[1]; + return sum; +} + +/* { dg-final { scan-tree-dump "reduction: detected reduction chain" "vect" } } */ +/* { dg-final { scan-tree-dump-not "SLP discovery of reduction chain failed" "vect" } } */ +/* { dg-final { scan-tree-dump "optimized: loop vectorized" "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-early-break_137-pr121190.c b/gcc/testsuite/gcc.dg/vect/vect-early-break_137-pr121190.c new file mode 100644 index 0000000..e6b071c --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-early-break_137-pr121190.c @@ -0,0 +1,62 @@ +/* PR tree-optimization/121190 */ +/* { dg-options "-O3" } */ +/* { dg-additional-options "-march=znver2" { target x86_64-*-* i?86-*-* } } */ +/* { dg-require-effective-target mmap } */ +/* { dg-require-effective-target vect_early_break } */ + +#include <stdint.h> +#include <string.h> +#include <stdio.h> +#include <sys/mman.h> +#include <unistd.h> +#include "tree-vect.h" + +#define MAX_COMPARE 5000 + +__attribute__((noipa)) +int diff (uint64_t *restrict p, uint64_t *restrict q) +{ + int i = 0; + while (i < MAX_COMPARE) { + if (*(p + i) != *(q + i)) + return i; + i++; + } + return -1; +} + +int main () +{ + check_vect (); + + long pgsz = sysconf (_SC_PAGESIZE); + if (pgsz == -1) { + fprintf (stderr, "sysconf failed\n"); + return 0; + } + + /* Allocate 2 consecutive pages of memory and let p1 and p2 point to the + beginning of each. */ + void *mem = mmap (NULL, pgsz * 2, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + if (mem == MAP_FAILED) { + fprintf (stderr, "mmap failed\n"); + return 0; + } + uint64_t *p1 = (uint64_t *) mem; + uint64_t *p2 = (uint64_t *) mem + pgsz / sizeof (uint64_t); + + /* Fill the first page with zeros, except for its last 64 bits. */ + memset (p1, 0, pgsz); + *(p2 - 1) = -1; + + /* Make the 2nd page not accessable. */ + mprotect (p2, pgsz, PROT_NONE); + + /* Calls to diff should not read the 2nd page. */ + for (int i = 1; i <= 20; i++) { + if (diff (p2 - i, p1) != i - 1) + __builtin_abort (); + } +} + diff --git a/gcc/testsuite/gcc.dg/vect/vect-early-break_138-pr121020.c b/gcc/testsuite/gcc.dg/vect/vect-early-break_138-pr121020.c new file mode 100644 index 0000000..8cb62bf --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-early-break_138-pr121020.c @@ -0,0 +1,54 @@ +/* PR tree-optimization/121020 */ +/* { dg-options "-O3 --vect-cost-model=unlimited" } */ +/* { dg-additional-options "-march=znver2" { target x86_64-*-* i?86-*-* } } */ +/* { dg-require-effective-target mmap } */ +/* { dg-require-effective-target vect_early_break } */ + +#include <stdint.h> +#include <stdio.h> +#include <sys/mman.h> +#include <unistd.h> +#include "tree-vect.h" + +__attribute__((noipa)) +bool equal (uint64_t *restrict p, uint64_t *restrict q, int length) +{ + for (int i = 0; i < length; i++) { + if (*(p + i) != *(q + i)) + return false; + } + return true; +} + +int main () +{ + check_vect (); + + long pgsz = sysconf (_SC_PAGESIZE); + if (pgsz == -1) { + fprintf (stderr, "sysconf failed\n"); + return 0; + } + + /* Allocate a whole page of memory. */ + void *mem = mmap (NULL, pgsz, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + if (mem == MAP_FAILED) { + fprintf (stderr, "mmap failed\n"); + return 0; + } + uint64_t *p1 = (uint64_t *) mem; + uint64_t *p2 = (uint64_t *) mem + 32; + + /* The first 16 elements pointed to by p1 and p2 are the same. */ + for (int i = 0; i < 32; i++) { + *(p1 + i) = 0; + *(p2 + i) = (i < 16 ? 0 : -1); + } + + /* All calls to equal should return true. */ + for (int len = 0; len < 16; len++) { + if (!equal (p1 + 1, p2 + 1, len)) + __builtin_abort(); + } +} diff --git a/gcc/testsuite/gcc.dg/vect/vect-early-break_52.c b/gcc/testsuite/gcc.dg/vect/vect-early-break_52.c index 86a632f..6abfcd6 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-early-break_52.c +++ b/gcc/testsuite/gcc.dg/vect/vect-early-break_52.c @@ -18,4 +18,4 @@ int main1 (short X) } } -/* { dg-final { scan-tree-dump "vectorized 1 loops in function" "vect" { target { ! "x86_64-*-* i?86-*-*" } } } } */ +/* { dg-final { scan-tree-dump "vectorized 1 loops in function" "vect" { target { ! "x86_64-*-* i?86-*-* arm*-*-*" } } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-simd-pr121130.c b/gcc/testsuite/gcc.dg/vect/vect-simd-pr121130.c new file mode 100644 index 0000000..c882ded --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-simd-pr121130.c @@ -0,0 +1,11 @@ +/* { dg-do compile } */ + +int n2; + +__attribute__((simd)) char +w7(void) +{ + short int xb = n2; + xb = w7() < 1; + return xb; +} diff --git a/gcc/testsuite/gcc.target/aarch64/cmpbr.c b/gcc/testsuite/gcc.target/aarch64/cmpbr.c index a86af9d..34630f9 100644 --- a/gcc/testsuite/gcc.target/aarch64/cmpbr.c +++ b/gcc/testsuite/gcc.target/aarch64/cmpbr.c @@ -121,7 +121,7 @@ FAR_BRANCH(u64, 42); /* ** u8_x0_eq_x1: -** cbbeq w1, w0, .L([0-9]+) +** cbbeq (?:w1, w0|w0, w1), .L([0-9]+) ** b not_taken ** .L\1: ** b taken @@ -129,7 +129,7 @@ FAR_BRANCH(u64, 42); /* ** u8_x0_ne_x1: -** cbbne w1, w0, .L([0-9]+) +** cbbne (?:w1, w0|w0, w1), .L([0-9]+) ** b not_taken ** .L\1: ** b taken @@ -137,7 +137,7 @@ FAR_BRANCH(u64, 42); /* ** u8_x0_ult_x1: -** cbbhi w1, w0, .L([0-9]+) +** (?:cbbhi w1, w0|cbblo w0, w1), .L([0-9]+) ** b not_taken ** .L\1: ** b taken @@ -145,7 +145,7 @@ FAR_BRANCH(u64, 42); /* ** u8_x0_ule_x1: -** cbbhs w1, w0, .L([0-9]+) +** (?:cbbhs w1, w0|cbbls w0, w1), .L([0-9]+) ** b not_taken ** .L\1: ** b taken @@ -153,7 +153,7 @@ FAR_BRANCH(u64, 42); /* ** u8_x0_ugt_x1: -** cbblo w1, w0, .L([0-9]+) +** (?:cbblo w1, w0|cbbhi w0, w1), .L([0-9]+) ** b not_taken ** .L\1: ** b taken @@ -161,7 +161,7 @@ FAR_BRANCH(u64, 42); /* ** u8_x0_uge_x1: -** cbbls w1, w0, .L([0-9]+) +** (?:cbbls w1, w0|cbbhs w0, w1), .L([0-9]+) ** b not_taken ** .L\1: ** b taken @@ -169,7 +169,7 @@ FAR_BRANCH(u64, 42); /* ** i8_x0_slt_x1: -** cbbgt w1, w0, .L([0-9]+) +** (?:cbbgt w1, w0|cbblt w0, w1), .L([0-9]+) ** b not_taken ** .L\1: ** b taken @@ -177,7 +177,7 @@ FAR_BRANCH(u64, 42); /* ** i8_x0_sle_x1: -** cbbge w1, w0, .L([0-9]+) +** (?:cbbge w1, w0|cbble w0, w1), .L([0-9]+) ** b not_taken ** .L\1: ** b taken @@ -185,7 +185,7 @@ FAR_BRANCH(u64, 42); /* ** i8_x0_sgt_x1: -** cbblt w1, w0, .L([0-9]+) +** (?:cbblt w1, w0|cbbgt w0, w1), .L([0-9]+) ** b not_taken ** .L\1: ** b taken @@ -193,7 +193,7 @@ FAR_BRANCH(u64, 42); /* ** i8_x0_sge_x1: -** cbble w1, w0, .L([0-9]+) +** (?:cbble w1, w0|cbbge w0, w1), .L([0-9]+) ** b not_taken ** .L\1: ** b taken @@ -201,7 +201,7 @@ FAR_BRANCH(u64, 42); /* ** u16_x0_eq_x1: -** cbheq w1, w0, .L([0-9]+) +** cbheq (?:w1, w0|w0, w1), .L([0-9]+) ** b not_taken ** .L\1: ** b taken @@ -209,7 +209,7 @@ FAR_BRANCH(u64, 42); /* ** u16_x0_ne_x1: -** cbhne w0|w1, w1|w0, .L([0-9]+) +** cbhne (?:w1, w0|w0, w1), .L([0-9]+) ** b not_taken ** .L\1: ** b taken @@ -217,7 +217,7 @@ FAR_BRANCH(u64, 42); /* ** u16_x0_ult_x1: -** cbhhi w1, w0, .L([0-9]+) +** (?:cbhhi w1, w0|cbhlo w0, w1), .L([0-9]+) ** b not_taken ** .L\1: ** b taken @@ -225,7 +225,7 @@ FAR_BRANCH(u64, 42); /* ** u16_x0_ule_x1: -** cbhhs w1, w0, .L([0-9]+) +** (?:cbhhs w1, w0|cbhls w0, w1), .L([0-9]+) ** b not_taken ** .L\1: ** b taken @@ -233,7 +233,7 @@ FAR_BRANCH(u64, 42); /* ** u16_x0_ugt_x1: -** cbhlo w1, w0, .L([0-9]+) +** (?:cbhlo w1, w0|cbhhi w0, w1), .L([0-9]+) ** b not_taken ** .L\1: ** b taken @@ -241,7 +241,7 @@ FAR_BRANCH(u64, 42); /* ** u16_x0_uge_x1: -** cbhls w1, w0, .L([0-9]+) +** (?:cbhls w1, w0|cbhhs w0, w1), .L([0-9]+) ** b not_taken ** .L\1: ** b taken @@ -249,7 +249,7 @@ FAR_BRANCH(u64, 42); /* ** i16_x0_slt_x1: -** cbhgt w1, w0, .L([0-9]+) +** (?:cbhgt w1, w0|cbhlt w0, w1), .L([0-9]+) ** b not_taken ** .L\1: ** b taken @@ -257,7 +257,7 @@ FAR_BRANCH(u64, 42); /* ** i16_x0_sle_x1: -** cbhge w1, w0, .L([0-9]+) +** (?:cbhge w1, w0|cbhle w0, w1), .L([0-9]+) ** b not_taken ** .L\1: ** b taken @@ -265,7 +265,7 @@ FAR_BRANCH(u64, 42); /* ** i16_x0_sgt_x1: -** cbhlt w1, w0, .L([0-9]+) +** (?:cbhlt w1, w0|cbhgt w0, w1), .L([0-9]+) ** b not_taken ** .L\1: ** b taken @@ -273,7 +273,7 @@ FAR_BRANCH(u64, 42); /* ** i16_x0_sge_x1: -** cbhle w1, w0, .L([0-9]+) +** (?:cbhle w1, w0|cbhge w0, w1), .L([0-9]+) ** b not_taken ** .L\1: ** b taken diff --git a/gcc/testsuite/gcc.target/aarch64/pr121300.c b/gcc/testsuite/gcc.target/aarch64/pr121300.c new file mode 100644 index 0000000..5f2cd9a --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/pr121300.c @@ -0,0 +1,9 @@ +/* { dg-do compile } */ +/* { dg-options "-S -O3 -march=armv9-a+sme2" } */ + +#include <arm_sme.h> + +svfloat16x2_t test (svfloat16x2_t zd, svfloat16x2_t zm) __arm_streaming +{ + return svamin_f16_x2 (zd, zm); // { dg-error "ACLE function .svamin_f16_x2. requires ISA extension .faminmax." } +} diff --git a/gcc/testsuite/gcc.target/aarch64/saturating_arithmetic_1.c b/gcc/testsuite/gcc.target/aarch64/saturating_arithmetic_1.c index acd2e11..8fc1569 100644 --- a/gcc/testsuite/gcc.target/aarch64/saturating_arithmetic_1.c +++ b/gcc/testsuite/gcc.target/aarch64/saturating_arithmetic_1.c @@ -4,24 +4,24 @@ /* ** uadd: -** dup v([0-9]+).8b, w1 -** dup v([0-9]+).8b, w0 +** dup v([0-9]+).8b, w[01] +** dup v([0-9]+).8b, w[01] ** uqadd b([0-9]+), (?:b\2, b\1|b\1, b\2) ** umov w0, v\3.b\[0\] ** ret */ /* ** uadd2: -** dup v([0-9]+).8b, w1 -** dup v([0-9]+).8b, w0 +** dup v([0-9]+).8b, w[01] +** dup v([0-9]+).8b, w[01] ** uqadd b([0-9]+), (?:b\2, b\1|b\1, b\2) ** umov w0, v\3.b\[0\] ** ret */ /* ** usub: { xfail *-*-* } -** dup v([0-9]+).8b, w1 -** dup v([0-9]+).8b, w0 +** dup v([0-9]+).8b, w[01] +** dup v([0-9]+).8b, w[01] ** uqsub b([0-9]+), b\1, b\2 ** umov w0, v\3.b\[0\] ** ret diff --git a/gcc/testsuite/gcc.target/aarch64/saturating_arithmetic_2.c b/gcc/testsuite/gcc.target/aarch64/saturating_arithmetic_2.c index 86c88f8..dd0fefa 100644 --- a/gcc/testsuite/gcc.target/aarch64/saturating_arithmetic_2.c +++ b/gcc/testsuite/gcc.target/aarch64/saturating_arithmetic_2.c @@ -4,16 +4,16 @@ /* ** uadd: -** dup v([0-9]+).4h, w1 -** dup v([0-9]+).4h, w0 +** dup v([0-9]+).4h, w[01] +** dup v([0-9]+).4h, w[01] ** uqadd h([0-9]+), (?:h\2, h\1|h\1, h\2) ** umov w0, v\3.h\[0\] ** ret */ /* ** uadd2: -** dup v([0-9]+).4h, w1 -** dup v([0-9]+).4h, w0 +** dup v([0-9]+).4h, w[01] +** dup v([0-9]+).4h, w[01] ** uqadd h([0-9]+), (?:h\2, h\1|h\1, h\2) ** umov w0, v\3.h\[0\] ** ret diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_builtin_fmax_2.c b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_builtin_fmax_2.c new file mode 100644 index 0000000..f84ded5 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_builtin_fmax_2.c @@ -0,0 +1,24 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize -moverride=sve_width=2048" } */ + +#include "unpacked_cond_builtin_fmax_1.c" + +/* { dg-final { scan-assembler-times {\tptrue\tp[0-9]+\.s} 7 } } */ +/* { dg-final { scan-assembler-times {\tptrue\tp[0-9]+\.d} 14 } } */ +/* { dg-final { scan-assembler-times {\tand} 21 } } */ + +/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.d} 13 } } */ +/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.s} 13 } } */ +/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.d} 13 } } */ + +/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s\n} 3 } } */ +/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 3 } } */ +/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #0.0\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #1.0\n} 2 } } */ + +/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h\n} 6 } } */ +/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 6 } } */ +/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #0.0\n} 4 } } */ +/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #1.0\n} 4 } } */ + +/* { dg-final { scan-assembler-not {\tsel\t} } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_builtin_fmin_2.c b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_builtin_fmin_2.c new file mode 100644 index 0000000..bceddf9 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_builtin_fmin_2.c @@ -0,0 +1,24 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize -moverride=sve_width=2048" } */ + +#include "unpacked_cond_builtin_fmin_1.c" + +/* { dg-final { scan-assembler-times {\tptrue\tp[0-9]+\.s} 7 } } */ +/* { dg-final { scan-assembler-times {\tptrue\tp[0-9]+\.d} 14 } } */ +/* { dg-final { scan-assembler-times {\tand} 21 } } */ + +/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.d} 13 } } */ +/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.s} 13 } } */ +/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.d} 13 } } */ + +/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s\n} 3 } } */ +/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 3 } } */ +/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #0.0\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #1.0\n} 2 } } */ + +/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h\n} 6 } } */ +/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 6 } } */ +/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #0.0\n} 4 } } */ +/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #1.0\n} 4 } } */ + +/* { dg-final { scan-assembler-not {\tsel\t} } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fadd_2.c b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fadd_2.c new file mode 100644 index 0000000..e59864b --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fadd_2.c @@ -0,0 +1,28 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize -moverride=sve_width=2048" } */ + +#include "unpacked_cond_fadd_1.c" + +/* { dg-final { scan-assembler-times {\tptrue\tp[0-9]+\.s} 11 } } */ +/* { dg-final { scan-assembler-times {\tptrue\tp[0-9]+\.d} 22 } } */ +/* { dg-final { scan-assembler-times {\tand} 33 } } */ + +/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.d} 19 } } */ +/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.s} 19 } } */ +/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.d} 19 } } */ + +/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s\n} 5 } } */ +/* { dg-final { scan-assembler-times {\tfadd\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 3 } } */ +/* { dg-final { scan-assembler-times {\tfadd\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #0.5\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tfadd\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #1.0\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tfsub\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #0.5\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tfsub\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #1.0\n} 2 } } */ + +/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h\n} 10 } } */ +/* { dg-final { scan-assembler-times {\tfadd\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 6 } } */ +/* { dg-final { scan-assembler-times {\tfadd\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #0.5\n} 4 } } */ +/* { dg-final { scan-assembler-times {\tfadd\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #1.0\n} 4 } } */ +/* { dg-final { scan-assembler-times {\tfsub\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #0.5\n} 4 } } */ +/* { dg-final { scan-assembler-times {\tfsub\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #1.0\n} 4 } } */ + +/* { dg-final { scan-assembler-not {\tsel\t} } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fdiv_2.c b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fdiv_2.c new file mode 100644 index 0000000..1ca3dbf --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fdiv_2.c @@ -0,0 +1,22 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize -moverride=sve_width=2048" } */ + +#include "unpacked_cond_fdiv_1.c" + +/* { dg-final { scan-assembler-times {\tptrue\tp[0-9]+\.s} 3 } } */ +/* { dg-final { scan-assembler-times {\tptrue\tp[0-9]+\.d} 6 } } */ +/* { dg-final { scan-assembler-times {\tand} 9 } } */ + +/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.d} 7 } } */ +/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.s} 7 } } */ +/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.d} 7 } } */ + +/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tfdivr\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tfdiv\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */ + +/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tfdivr\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tfdiv\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 4 } } */ + +/* { dg-final { scan-assembler-not {\tsel\t} } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fmaxnm_2.c b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fmaxnm_2.c new file mode 100644 index 0000000..282f3ed --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fmaxnm_2.c @@ -0,0 +1,24 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize -moverride=sve_width=2048 -fno-signed-zeros -ffinite-math-only" } */ + +#include "unpacked_cond_fmaxnm_1.c" + +/* { dg-final { scan-assembler-times {\tptrue\tp[0-9]+\.s} 7 } } */ +/* { dg-final { scan-assembler-times {\tptrue\tp[0-9]+\.d} 14 } } */ +/* { dg-final { scan-assembler-times {\tand} 21 } } */ + +/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.d} 13 } } */ +/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.s} 13 } } */ +/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.d} 13 } } */ + +/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s\n} 3 } } */ +/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 3 } } */ +/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #0.0\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #1.0\n} 2 } } */ + +/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h\n} 6 } } */ +/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 6 } } */ +/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #0.0\n} 4 } } */ +/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #1.0\n} 4 } } */ + +/* { dg-final { scan-assembler-not {\tsel\t} } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fminnm_2.c b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fminnm_2.c new file mode 100644 index 0000000..8226a6f --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fminnm_2.c @@ -0,0 +1,24 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize -moverride=sve_width=2048 -fno-signed-zeros -ffinite-math-only" } */ + +#include "unpacked_cond_fminnm_1.c" + +/* { dg-final { scan-assembler-times {\tptrue\tp[0-9]+\.s} 7 } } */ +/* { dg-final { scan-assembler-times {\tptrue\tp[0-9]+\.d} 14 } } */ +/* { dg-final { scan-assembler-times {\tand} 21 } } */ + +/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.d} 13 } } */ +/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.s} 13 } } */ +/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.d} 13 } } */ + +/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s\n} 3 } } */ +/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 3 } } */ +/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #0.0\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #1.0\n} 2 } } */ + +/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h\n} 6 } } */ +/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 6 } } */ +/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #0.0\n} 4 } } */ +/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #1.0\n} 4 } } */ + +/* { dg-final { scan-assembler-not {\tsel\t} } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fmla_1.c b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fmla_1.c new file mode 100644 index 0000000..cae9242 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fmla_1.c @@ -0,0 +1,51 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize -moverride=sve_width=2048 -fno-trapping-math" } */ + +#include <stdint.h> + +#define FMLA(SUFF) __builtin_fma##SUFF (a[i], b[i], c[i]) +#define FMLS(SUFF) __builtin_fma##SUFF (a[i], -b[i], c[i]) +#define FNMLA(SUFF) -FMLA (SUFF) +#define FNMLS(SUFF) -FMLS (SUFF) + +#define a_i a[i] +#define b_i b[i] +#define c_i c[i] + +#define TEST_FN(FN, TYPE0, TYPE1, COUNT, MERGE) \ + void \ + f_##TYPE0##_##TYPE1##_##MERGE (TYPE0 *__restrict out, \ + TYPE0 *__restrict a, \ + TYPE0 *__restrict b, \ + TYPE0 *__restrict c, \ + TYPE1 *__restrict p) \ + { \ + for (unsigned int i = 0; i < COUNT; i++) \ + out[i] = p[i] ? FN : MERGE; \ + } + +#define TEST_ALL(FN, TYPE0, TYPE1, COUNT) \ + TEST_FN (FN, TYPE0, TYPE1, COUNT, a_i) \ + TEST_FN (FN, TYPE0, TYPE1, COUNT, b_i) \ + TEST_FN (FN, TYPE0, TYPE1, COUNT, c_i) \ + TEST_FN (FN, TYPE0, TYPE1, COUNT, 0) + +TEST_ALL (FMLA (f16), _Float16, uint64_t, 32) + +TEST_ALL (FMLA (f16), _Float16, uint32_t, 64) + +TEST_ALL (FMLA (f32), float, uint64_t, 32) + +/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.d} 12 } } */ +/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.s} 12 } } */ +/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.d} 12 } } */ + +/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.s, p[0-7]/z, z[0-9]+\.s\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tfmad\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tfmla\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */ + +/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.h, p[0-7]/z, z[0-9]+\.h\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tfmad\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 4 } } */ +/* { dg-final { scan-assembler-times {\tfmla\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 4 } } */ + +/* { dg-final { scan-assembler-not {\tsel\t} } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fmla_2.c b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fmla_2.c new file mode 100644 index 0000000..72e04a4 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fmla_2.c @@ -0,0 +1,22 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize -moverride=sve_width=2048" } */ + +#include "unpacked_cond_fmla_1.c" + +/* { dg-final { scan-assembler-times {\tptrue\tp[0-9]+\.s} 4 } } */ +/* { dg-final { scan-assembler-times {\tptrue\tp[0-9]+\.d} 8 } } */ +/* { dg-final { scan-assembler-times {\tand} 12 } } */ + +/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.d} 12 } } */ +/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.s} 12 } } */ +/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.d} 12 } } */ + +/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.s, p[0-7]/z, z[0-9]+\.s\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tfmad\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tfmla\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */ + +/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.h, p[0-7]/z, z[0-9]+\.h\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tfmad\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 4 } } */ +/* { dg-final { scan-assembler-times {\tfmla\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 4 } } */ + +/* { dg-final { scan-assembler-not {\tsel\t} } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fmls_1.c b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fmls_1.c new file mode 100644 index 0000000..db0f818 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fmls_1.c @@ -0,0 +1,51 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize -moverride=sve_width=2048 -fno-trapping-math" } */ + +#include <stdint.h> + +#define FMLA(SUFF) __builtin_fma##SUFF (a[i], b[i], c[i]) +#define FMLS(SUFF) __builtin_fma##SUFF (a[i], -b[i], c[i]) +#define FNMLA(SUFF) -FMLA (SUFF) +#define FNMLS(SUFF) -FMLS (SUFF) + +#define a_i a[i] +#define b_i b[i] +#define c_i c[i] + +#define TEST_FN(FN, TYPE0, TYPE1, COUNT, MERGE) \ + void \ + f_##TYPE0##_##TYPE1##_##MERGE (TYPE0 *__restrict out, \ + TYPE0 *__restrict a, \ + TYPE0 *__restrict b, \ + TYPE0 *__restrict c, \ + TYPE1 *__restrict p) \ + { \ + for (unsigned int i = 0; i < COUNT; i++) \ + out[i] = p[i] ? FN : MERGE; \ + } + +#define TEST_ALL(FN, TYPE0, TYPE1, COUNT) \ + TEST_FN (FN, TYPE0, TYPE1, COUNT, a_i) \ + TEST_FN (FN, TYPE0, TYPE1, COUNT, b_i) \ + TEST_FN (FN, TYPE0, TYPE1, COUNT, c_i) \ + TEST_FN (FN, TYPE0, TYPE1, COUNT, 0) + +TEST_ALL (FMLS (f16), _Float16, uint64_t, 32) + +TEST_ALL (FMLS (f16), _Float16, uint32_t, 64) + +TEST_ALL (FMLS (f32), float, uint64_t, 32) + +/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.d} 12 } } */ +/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.s} 12 } } */ +/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.d} 12 } } */ + +/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.s, p[0-7]/z, z[0-9]+\.s\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tfmsb\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tfmls\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */ + +/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.h, p[0-7]/z, z[0-9]+\.h\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tfmsb\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 4 } } */ +/* { dg-final { scan-assembler-times {\tfmls\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 4 } } */ + +/* { dg-final { scan-assembler-not {\tsel\t} } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fmls_2.c b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fmls_2.c new file mode 100644 index 0000000..3012052 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fmls_2.c @@ -0,0 +1,22 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize -moverride=sve_width=2048" } */ + +#include "unpacked_cond_fmls_1.c" + +/* { dg-final { scan-assembler-times {\tptrue\tp[0-9]+\.s} 4 } } */ +/* { dg-final { scan-assembler-times {\tptrue\tp[0-9]+\.d} 8 } } */ +/* { dg-final { scan-assembler-times {\tand} 12 } } */ + +/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.d} 12 } } */ +/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.s} 12 } } */ +/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.d} 12 } } */ + +/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.s, p[0-7]/z, z[0-9]+\.s\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tfmsb\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tfmls\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */ + +/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.h, p[0-7]/z, z[0-9]+\.h\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tfmsb\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 4 } } */ +/* { dg-final { scan-assembler-times {\tfmls\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 4 } } */ + +/* { dg-final { scan-assembler-not {\tsel\t} } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fmul_2.c b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fmul_2.c new file mode 100644 index 0000000..21713f5 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fmul_2.c @@ -0,0 +1,22 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize -moverride=sve_width=2048" } */ + +#include "unpacked_cond_fmul_1.c" + +/* { dg-final { scan-assembler-times {\tptrue\tp[0-9]+\.s} 5 } } */ +/* { dg-final { scan-assembler-times {\tptrue\tp[0-9]+\.d} 10 } } */ +/* { dg-final { scan-assembler-times {\tand} 15 } } */ + +/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.d} 10 } } */ +/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.s} 10 } } */ +/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.d} 10 } } */ + +/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tfmul\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 3 } } */ +/* { dg-final { scan-assembler-times {\tfmul\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #0.5\n} 2 } } */ + +/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h\n} 4 } } */ +/* { dg-final { scan-assembler-times {\tfmul\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 6 } } */ +/* { dg-final { scan-assembler-times {\tfmul\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #0.5\n} 4 } } */ + +/* { dg-final { scan-assembler-not {\tsel\t} } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fnmla_1.c b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fnmla_1.c new file mode 100644 index 0000000..07bab63 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fnmla_1.c @@ -0,0 +1,51 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize -moverride=sve_width=2048 -fno-trapping-math" } */ + +#include <stdint.h> + +#define FMLA(SUFF) __builtin_fma##SUFF (a[i], b[i], c[i]) +#define FMLS(SUFF) __builtin_fma##SUFF (a[i], -b[i], c[i]) +#define FNMLA(SUFF) -FMLA (SUFF) +#define FNMLS(SUFF) -FMLS (SUFF) + +#define a_i a[i] +#define b_i b[i] +#define c_i c[i] + +#define TEST_FN(FN, TYPE0, TYPE1, COUNT, MERGE) \ + void \ + f_##TYPE0##_##TYPE1##_##MERGE (TYPE0 *__restrict out, \ + TYPE0 *__restrict a, \ + TYPE0 *__restrict b, \ + TYPE0 *__restrict c, \ + TYPE1 *__restrict p) \ + { \ + for (unsigned int i = 0; i < COUNT; i++) \ + out[i] = p[i] ? FN : MERGE; \ + } + +#define TEST_ALL(FN, TYPE0, TYPE1, COUNT) \ + TEST_FN (FN, TYPE0, TYPE1, COUNT, a_i) \ + TEST_FN (FN, TYPE0, TYPE1, COUNT, b_i) \ + TEST_FN (FN, TYPE0, TYPE1, COUNT, c_i) \ + TEST_FN (FN, TYPE0, TYPE1, COUNT, 0) + +TEST_ALL (FNMLA (f16), _Float16, uint64_t, 32) + +TEST_ALL (FNMLA (f16), _Float16, uint32_t, 64) + +TEST_ALL (FNMLA (f32), float, uint64_t, 32) + +/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.d} 12 } } */ +/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.s} 12 } } */ +/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.d} 12 } } */ + +/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.s, p[0-7]/z, z[0-9]+\.s\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tfnmla\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tfnmad\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */ + +/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.h, p[0-7]/z, z[0-9]+\.h\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tfnmad\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 4 } } */ +/* { dg-final { scan-assembler-times {\tfnmla\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 4 } } */ + +/* { dg-final { scan-assembler-not {\tsel\t} } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fnmla_2.c b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fnmla_2.c new file mode 100644 index 0000000..daef4e49 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fnmla_2.c @@ -0,0 +1,22 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize -moverride=sve_width=2048" } */ + +#include "unpacked_cond_fnmla_1.c" + +/* { dg-final { scan-assembler-times {\tptrue\tp[0-9]+\.s} 4 } } */ +/* { dg-final { scan-assembler-times {\tptrue\tp[0-9]+\.d} 8 } } */ +/* { dg-final { scan-assembler-times {\tand} 12 } } */ + +/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.d} 12 } } */ +/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.s} 12 } } */ +/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.d} 12 } } */ + +/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.s, p[0-7]/z, z[0-9]+\.s\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tfnmad\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tfnmla\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */ + +/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.h, p[0-7]/z, z[0-9]+\.h\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tfnmad\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 4 } } */ +/* { dg-final { scan-assembler-times {\tfnmla\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 4 } } */ + +/* { dg-final { scan-assembler-not {\tsel\t} } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fnmls_1.c b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fnmls_1.c new file mode 100644 index 0000000..5526378 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fnmls_1.c @@ -0,0 +1,51 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize -moverride=sve_width=2048 -fno-trapping-math" } */ + +#include <stdint.h> + +#define FMLA(SUFF) __builtin_fma##SUFF (a[i], b[i], c[i]) +#define FMLS(SUFF) __builtin_fma##SUFF (a[i], -b[i], c[i]) +#define FNMLA(SUFF) -FMLA (SUFF) +#define FNMLS(SUFF) -FMLS (SUFF) + +#define a_i a[i] +#define b_i b[i] +#define c_i c[i] + +#define TEST_FN(FN, TYPE0, TYPE1, COUNT, MERGE) \ + void \ + f_##TYPE0##_##TYPE1##_##MERGE (TYPE0 *__restrict out, \ + TYPE0 *__restrict a, \ + TYPE0 *__restrict b, \ + TYPE0 *__restrict c, \ + TYPE1 *__restrict p) \ + { \ + for (unsigned int i = 0; i < COUNT; i++) \ + out[i] = p[i] ? FN : MERGE; \ + } + +#define TEST_ALL(FN, TYPE0, TYPE1, COUNT) \ + TEST_FN (FN, TYPE0, TYPE1, COUNT, a_i) \ + TEST_FN (FN, TYPE0, TYPE1, COUNT, b_i) \ + TEST_FN (FN, TYPE0, TYPE1, COUNT, c_i) \ + TEST_FN (FN, TYPE0, TYPE1, COUNT, 0) + +TEST_ALL (FNMLS (f16), _Float16, uint64_t, 32) + +TEST_ALL (FNMLS (f16), _Float16, uint32_t, 64) + +TEST_ALL (FNMLS (f32), float, uint64_t, 32) + +/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.d} 12 } } */ +/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.s} 12 } } */ +/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.d} 12 } } */ + +/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.s, p[0-7]/z, z[0-9]+\.s\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tfnmsb\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tfnmls\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */ + +/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.h, p[0-7]/z, z[0-9]+\.h\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tfnmsb\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 4 } } */ +/* { dg-final { scan-assembler-times {\tfnmls\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 4 } } */ + +/* { dg-final { scan-assembler-not {\tsel\t} } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fnmls_2.c b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fnmls_2.c new file mode 100644 index 0000000..8a8f348 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fnmls_2.c @@ -0,0 +1,22 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize -moverride=sve_width=2048" } */ + +#include "unpacked_cond_fnmls_1.c" + +/* { dg-final { scan-assembler-times {\tptrue\tp[0-9]+\.s} 4 } } */ +/* { dg-final { scan-assembler-times {\tptrue\tp[0-9]+\.d} 8 } } */ +/* { dg-final { scan-assembler-times {\tand} 12 } } */ + +/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.d} 12 } } */ +/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.s} 12 } } */ +/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.d} 12 } } */ + +/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.s, p[0-7]/z, z[0-9]+\.s\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tfnmsb\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tfnmls\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */ + +/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.h, p[0-7]/z, z[0-9]+\.h\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tfnmsb\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 4 } } */ +/* { dg-final { scan-assembler-times {\tfnmls\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 4 } } */ + +/* { dg-final { scan-assembler-not {\tsel\t} } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fsubr_2.c b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fsubr_2.c new file mode 100644 index 0000000..cd7a0e1 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fsubr_2.c @@ -0,0 +1,26 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize -moverride=sve_width=2048" } */ + +#include "unpacked_cond_fsubr_1.c" + +/* { dg-final { scan-assembler-times {\tptrue\tp[0-9]+\.s} 7 } } */ +/* { dg-final { scan-assembler-times {\tptrue\tp[0-9]+\.d} 14 } } */ +/* { dg-final { scan-assembler-times {\tand} 21 } } */ + +/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.d} 13 } } */ +/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.s} 13 } } */ +/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.d} 13 } } */ + +/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s\n} 3 } } */ +/* { dg-final { scan-assembler-times {\tfsub\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tfsubr\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tfsubr\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #0.5\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tfsubr\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #1.0\n} 2 } } */ + +/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h\n} 6 } } */ +/* { dg-final { scan-assembler-times {\tfsub\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 4 } } */ +/* { dg-final { scan-assembler-times {\tfsubr\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tfsubr\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #0.5\n} 4 } } */ +/* { dg-final { scan-assembler-times {\tfsubr\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #1.0\n} 4 } } */ + +/* { dg-final { scan-assembler-not {\tsel\t} } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_fmla_1.c b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_fmla_1.c new file mode 100644 index 0000000..312bccc --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_fmla_1.c @@ -0,0 +1,38 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize -moverride=sve_width=2048" } */ + +#include <stdint.h> + +#define FMLA(SUFF) __builtin_fma##SUFF (a[i], b[i], c[i]) +#define FMLS(SUFF) __builtin_fma##SUFF (a[i], -b[i], c[i]) +#define FNMLA(SUFF) -FMLA (SUFF) +#define FNMLS(SUFF) -FMLS (SUFF) + +#define TEST_FN(FN, TYPE0, TYPE1, COUNT) \ + void \ + f_##TYPE0##_##TYPE1 (TYPE1 *__restrict out, \ + TYPE0 *__restrict a, \ + TYPE0 *__restrict b, \ + TYPE0 *__restrict c, \ + TYPE0 *__restrict d) \ + { \ + for (unsigned int i = 0; i < COUNT; i++) \ + if (FN > d[i]) \ + out[i] = 3; \ + } + +TEST_FN (FMLA (f16), _Float16, uint64_t, 32) + +TEST_FN (FMLA (f16), _Float16, uint32_t, 64) + +TEST_FN (FMLA (f32), float, uint64_t, 32) + +/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.s} 1 } } */ +/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.d} 2 } } */ + +/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.d} 4 } } */ +/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.s} 4 } } */ +/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.d} 4 } } */ + +/* { dg-final { scan-assembler-times {\t(fmla|fmad)\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */ +/* { dg-final { scan-assembler-times {\t(fmla|fmad)\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_fmla_2.c b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_fmla_2.c new file mode 100644 index 0000000..ca3f94d --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_fmla_2.c @@ -0,0 +1,15 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize -moverride=sve_width=2048 -fno-trapping-math" } */ + +#include "unpacked_fmla_1.c" + +/* { dg-final { scan-assembler-not {\tptrue\tp[0-7]\.s} } } */ +/* { dg-final { scan-assembler-not {\tptrue\tp[0-7]\.d} } } */ +/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.b} 3 } } */ + +/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.d} 4 } } */ +/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.s} 4 } } */ +/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.d} 4 } } */ + +/* { dg-final { scan-assembler-times {\t(fmla|fmad)\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */ +/* { dg-final { scan-assembler-times {\t(fmla|fmad)\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_fmls_1.c b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_fmls_1.c new file mode 100644 index 0000000..f7cbfb3 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_fmls_1.c @@ -0,0 +1,38 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize -moverride=sve_width=2048" } */ + +#include <stdint.h> + +#define FMLA(SUFF) __builtin_fma##SUFF (a[i], b[i], c[i]) +#define FMLS(SUFF) __builtin_fma##SUFF (a[i], -b[i], c[i]) +#define FNMLA(SUFF) -FMLA (SUFF) +#define FNMLS(SUFF) -FMLS (SUFF) + +#define TEST_FN(FN, TYPE0, TYPE1, COUNT) \ + void \ + f_##TYPE0##_##TYPE1 (TYPE1 *__restrict out, \ + TYPE0 *__restrict a, \ + TYPE0 *__restrict b, \ + TYPE0 *__restrict c, \ + TYPE0 *__restrict d) \ + { \ + for (unsigned int i = 0; i < COUNT; i++) \ + if (FN > d[i]) \ + out[i] = 3; \ + } + +TEST_FN (FMLS (f16), _Float16, uint64_t, 32) + +TEST_FN (FMLS (f16), _Float16, uint32_t, 64) + +TEST_FN (FMLS (f32), float, uint64_t, 32) + +/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.s} 1 } } */ +/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.d} 2 } } */ + +/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.d} 4 } } */ +/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.s} 4 } } */ +/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.d} 4 } } */ + +/* { dg-final { scan-assembler-times {\t(fmls|fmsb)\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */ +/* { dg-final { scan-assembler-times {\t(fmls|fmsb)\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_fmls_2.c b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_fmls_2.c new file mode 100644 index 0000000..387dbec --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_fmls_2.c @@ -0,0 +1,15 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize -moverride=sve_width=2048 -fno-trapping-math" } */ + +#include "unpacked_fmls_1.c" + +/* { dg-final { scan-assembler-not {\tptrue\tp[0-7]\.s} } } */ +/* { dg-final { scan-assembler-not {\tptrue\tp[0-7]\.d} } } */ +/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.b} 3 } } */ + +/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.d} 4 } } */ +/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.s} 4 } } */ +/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.d} 4 } } */ + +/* { dg-final { scan-assembler-times {\t(fmls|fmsb)\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */ +/* { dg-final { scan-assembler-times {\t(fmls|fmsb)\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_fnmla_1.c b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_fnmla_1.c new file mode 100644 index 0000000..bf13ff5 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_fnmla_1.c @@ -0,0 +1,38 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize -moverride=sve_width=2048" } */ + +#include <stdint.h> + +#define FMLA(SUFF) __builtin_fma##SUFF (a[i], b[i], c[i]) +#define FMLS(SUFF) __builtin_fma##SUFF (a[i], -b[i], c[i]) +#define FNMLA(SUFF) -FMLA (SUFF) +#define FNMLS(SUFF) -FMLS (SUFF) + +#define TEST_FN(FN, TYPE0, TYPE1, COUNT) \ + void \ + f_##TYPE0##_##TYPE1 (TYPE1 *__restrict out, \ + TYPE0 *__restrict a, \ + TYPE0 *__restrict b, \ + TYPE0 *__restrict c, \ + TYPE0 *__restrict d) \ + { \ + for (unsigned int i = 0; i < COUNT; i++) \ + if (FN > d[i]) \ + out[i] = 3; \ + } + +TEST_FN (FNMLA (f16), _Float16, uint64_t, 32) + +TEST_FN (FNMLA (f16), _Float16, uint32_t, 64) + +TEST_FN (FNMLA (f32), float, uint64_t, 32) + +/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.s} 1 } } */ +/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.d} 2 } } */ + +/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.d} 4 } } */ +/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.s} 4 } } */ +/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.d} 4 } } */ + +/* { dg-final { scan-assembler-times {\t(fnmla|fnmad)\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */ +/* { dg-final { scan-assembler-times {\t(fnmla|fnmad)\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_fnmla_2.c b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_fnmla_2.c new file mode 100644 index 0000000..64130ba --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_fnmla_2.c @@ -0,0 +1,15 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize -moverride=sve_width=2048 -fno-trapping-math" } */ + +#include "unpacked_fnmla_1.c" + +/* { dg-final { scan-assembler-not {\tptrue\tp[0-7]\.s} } } */ +/* { dg-final { scan-assembler-not {\tptrue\tp[0-7]\.d} } } */ +/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.b} 3 } } */ + +/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.d} 4 } } */ +/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.s} 4 } } */ +/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.d} 4 } } */ + +/* { dg-final { scan-assembler-times {\t(fnmla|fnmad)\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */ +/* { dg-final { scan-assembler-times {\t(fnmla|fnmad)\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_fnmls_1.c b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_fnmls_1.c new file mode 100644 index 0000000..399920a --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_fnmls_1.c @@ -0,0 +1,38 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize -moverride=sve_width=2048" } */ + +#include <stdint.h> + +#define FMLA(SUFF) __builtin_fma##SUFF (a[i], b[i], c[i]) +#define FMLS(SUFF) __builtin_fma##SUFF (a[i], -b[i], c[i]) +#define FNMLA(SUFF) -FMLA (SUFF) +#define FNMLS(SUFF) -FMLS (SUFF) + +#define TEST_FN(FN, TYPE0, TYPE1, COUNT) \ + void \ + f_##TYPE0##_##TYPE1 (TYPE1 *__restrict out, \ + TYPE0 *__restrict a, \ + TYPE0 *__restrict b, \ + TYPE0 *__restrict c, \ + TYPE0 *__restrict d) \ + { \ + for (unsigned int i = 0; i < COUNT; i++) \ + if (FN > d[i]) \ + out[i] = 3; \ + } + +TEST_FN (FNMLS (f16), _Float16, uint64_t, 32) + +TEST_FN (FNMLS (f16), _Float16, uint32_t, 64) + +TEST_FN (FNMLS (f32), float, uint64_t, 32) + +/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.s} 1 } } */ +/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.d} 2 } } */ + +/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.d} 4 } } */ +/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.s} 4 } } */ +/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.d} 4 } } */ + +/* { dg-final { scan-assembler-times {\t(fnmls|fnmsb)\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */ +/* { dg-final { scan-assembler-times {\t(fnmls|fnmsb)\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_fnmls_2.c b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_fnmls_2.c new file mode 100644 index 0000000..59fb7f9 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_fnmls_2.c @@ -0,0 +1,15 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize -moverride=sve_width=2048 -fno-trapping-math" } */ + +#include "unpacked_fnmls_1.c" + +/* { dg-final { scan-assembler-not {\tptrue\tp[0-7]\.s} } } */ +/* { dg-final { scan-assembler-not {\tptrue\tp[0-7]\.d} } } */ +/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.b} 3 } } */ + +/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.d} 4 } } */ +/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.s} 4 } } */ +/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.d} 4 } } */ + +/* { dg-final { scan-assembler-times {\t(fnmls|fnmsb)\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */ +/* { dg-final { scan-assembler-times {\t(fnmls|fnmsb)\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */ diff --git a/gcc/testsuite/gcc.target/i386/pr119795.c b/gcc/testsuite/gcc.target/i386/pr119795.c new file mode 100644 index 0000000..03c91cc --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr119795.c @@ -0,0 +1,26 @@ +/* { dg-do run } */ +/* { dg-options "-O -fschedule-insns -favoid-store-forwarding" } */ + +unsigned a, b, c; + +void +foo (_BitInt(2) b2, unsigned _BitInt(255) by, unsigned _BitInt(5) b5, + unsigned _BitInt(256) *ret) +{ + unsigned _BitInt(255) bx = b2; + by += 0x80000000000000000000000000000000wb; + __builtin_memmove (&b, &c, 3); + unsigned d = b; + unsigned e = __builtin_stdc_rotate_right (0x1uwb % b5, a); + unsigned _BitInt(256) r = by + bx + d + e; + *ret = r; +} + +int +main () +{ + unsigned _BitInt(256) x; + foo (0, -1, 2, &x); + if (x != 0x80000000000000000000000000000000wb) + __builtin_abort(); +}
\ No newline at end of file diff --git a/gcc/testsuite/gcc.target/i386/pr120427-5.c b/gcc/testsuite/gcc.target/i386/pr120427-5.c new file mode 100644 index 0000000..7199aef --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr120427-5.c @@ -0,0 +1,10 @@ +/* { dg-do compile { target { ! ia32 } } } */ +/* { dg-options "-Oz" } */ + +long long +func1 (void) +{ + return -1; +} +/* { dg-final { scan-assembler-times "pushq\[ \\t\]+\\\$-1" 1 } } */ +/* { dg-final { scan-assembler-times "popq\[ \\t\]+%rax" 1 } } */ diff --git a/gcc/testsuite/gcc.target/i386/pr121208-1a.c b/gcc/testsuite/gcc.target/i386/pr121208-1a.c index ac851cb..cb8bd0b 100644 --- a/gcc/testsuite/gcc.target/i386/pr121208-1a.c +++ b/gcc/testsuite/gcc.target/i386/pr121208-1a.c @@ -1,5 +1,5 @@ /* { dg-do compile { target *-*-linux* } } */ -/* { dg-options "-O2 -fPIC -mtls-dialect=gnu" } */ +/* { dg-options "-O2 -fPIC -mno-80387 -mtls-dialect=gnu" } */ extern __thread int bar; extern void func (void); diff --git a/gcc/testsuite/gcc.target/i386/pr121208-1b.c b/gcc/testsuite/gcc.target/i386/pr121208-1b.c index b97ac71..037e9a0 100644 --- a/gcc/testsuite/gcc.target/i386/pr121208-1b.c +++ b/gcc/testsuite/gcc.target/i386/pr121208-1b.c @@ -1,4 +1,4 @@ /* { dg-do compile { target *-*-linux* } } */ -/* { dg-options "-O2 -fPIC -mtls-dialect=gnu2" } */ +/* { dg-options "-O2 -fPIC -mno-80387 -mtls-dialect=gnu2" } */ #include "pr121208-1a.c" diff --git a/gcc/testsuite/gcc.target/i386/pr121274.c b/gcc/testsuite/gcc.target/i386/pr121274.c new file mode 100644 index 0000000..16760cf --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr121274.c @@ -0,0 +1,24 @@ +/* { dg-do compile { target { ! ia32 } } } */ +/* { dg-options "-march=x86-64-v4 -O2" } */ +/* { dg-final { scan-assembler-not "vpextrq" } } */ +/* { dg-final { scan-assembler-not "vpinsrq" } } */ + +typedef int v16si __attribute__((vector_size(64))); +typedef int v4si __attribute__((vector_size(16))); + +v4si f(v16si x) +{ + return __builtin_shufflevector(x, x, 0, 1, 2, 3); +} + +v4si g(v16si x) +{ +return __builtin_shufflevector(x, x, 4, 5, 6, 7); +} + +v4si f1(__int128 *x) +{ + __int128 t = *x; + asm("":"+x"(t)); + return (v4si)t; +} diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-1-u64.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-1-u64.c index e7b1ef0..8e7a788 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-1-u64.c +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-1-u64.c @@ -19,4 +19,7 @@ TEST_BINARY_VX_UNSIGNED_0(T) /* { dg-final { scan-assembler-times {vminu.vx} 2 } } */ /* { dg-final { scan-assembler-times {vsaddu.vx} 1 } } */ /* { dg-final { scan-assembler-times {vssubu.vx} 1 } } */ -/* { dg-final { scan-assembler-times {vaaddu.vx} 1 { target { no-opts "-O3 -mrvv-vector-bits=zvl -mrvv-max-lmul=m2" "-O3 -mrvv-vector-bits=zvl -mrvv-max-lmul=m4" } } } } */ +/* { dg-final { scan-assembler-times {vaaddu.vx} 2 { target { no-opts { + "-O3 -mrvv-vector-bits=zvl -mrvv-max-lmul=m2" + "-O3 -mrvv-vector-bits=zvl -mrvv-max-lmul=m4" + } } } } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-1-u8.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-1-u8.c index 559887e..d213c18 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-1-u8.c +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-1-u8.c @@ -19,4 +19,4 @@ TEST_BINARY_VX_UNSIGNED_0(T) /* { dg-final { scan-assembler-times {vminu.vx} 2 } } */ /* { dg-final { scan-assembler-times {vsaddu.vx} 1 } } */ /* { dg-final { scan-assembler-times {vssubu.vx} 1 } } */ -/* { dg-final { scan-assembler-times {vaaddu.vx} 1 } } */ +/* { dg-final { scan-assembler-times {vaaddu.vx} 2 } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-4-u16.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-4-u16.c index c851f23..3ecfce6 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-4-u16.c +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-4-u16.c @@ -20,6 +20,7 @@ DEF_VX_BINARY_CASE_3_WRAP(T, MIN_FUNC_1_WARP(T), min, VX_BINARY_FUNC_BODY_X8) DEF_VX_BINARY_CASE_3_WRAP(T, SAT_U_ADD_FUNC_WRAP(T), sat_add, VX_BINARY_FUNC_BODY_X8) DEF_VX_BINARY_CASE_3_WRAP(T, SAT_U_SUB_FUNC_WRAP(T), sat_sub, VX_BINARY_FUNC_BODY_X8) DEF_VX_BINARY_CASE_3_WRAP(T, AVG_FLOOR_FUNC_WRAP(T), avg_floor, VX_BINARY_FUNC_BODY_X8) +DEF_VX_BINARY_CASE_3_WRAP(T, AVG_CEIL_FUNC_WRAP(T), avg_ceil, VX_BINARY_FUNC_BODY_X8) /* { dg-final { scan-assembler {vadd.vx} } } */ /* { dg-final { scan-assembler {vsub.vx} } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-4-u32.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-4-u32.c index b7805c1..7ce1fe8 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-4-u32.c +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-4-u32.c @@ -20,6 +20,7 @@ DEF_VX_BINARY_CASE_3_WRAP(T, MIN_FUNC_1_WARP(T), min, VX_BINARY_FUNC_BODY_X4) DEF_VX_BINARY_CASE_3_WRAP(T, SAT_U_ADD_FUNC_WRAP(T), sat_add, VX_BINARY_FUNC_BODY_X4) DEF_VX_BINARY_CASE_3_WRAP(T, SAT_U_SUB_FUNC_WRAP(T), sat_sub, VX_BINARY_FUNC_BODY_X4) DEF_VX_BINARY_CASE_3_WRAP(T, AVG_FLOOR_FUNC_WRAP(T), avg_floor, VX_BINARY_FUNC_BODY_X4) +DEF_VX_BINARY_CASE_3_WRAP(T, AVG_CEIL_FUNC_WRAP(T), avg_ceil, VX_BINARY_FUNC_BODY_X4) /* { dg-final { scan-assembler {vadd.vx} } } */ /* { dg-final { scan-assembler {vsub.vx} } } */ @@ -31,5 +32,6 @@ DEF_VX_BINARY_CASE_3_WRAP(T, AVG_FLOOR_FUNC_WRAP(T), avg_floor, VX_BINARY_FUNC_B /* { dg-final { scan-assembler {vremu.vx} } } */ /* { dg-final { scan-assembler {vmaxu.vx} } } */ /* { dg-final { scan-assembler {vminu.vx} } } */ +/* { dg-final { scan-assembler {vsaddu.vx} } } */ /* { dg-final { scan-assembler {vssubu.vx} } } */ /* { dg-final { scan-assembler {vaaddu.vx} } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-4-u64.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-4-u64.c index 8295dc2..c84a30c 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-4-u64.c +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-4-u64.c @@ -20,6 +20,7 @@ DEF_VX_BINARY_CASE_3_WRAP(T, MIN_FUNC_1_WARP(T), min, VX_BINARY_FUNC_BODY) DEF_VX_BINARY_CASE_3_WRAP(T, SAT_U_ADD_FUNC_WRAP(T), sat_add, VX_BINARY_FUNC_BODY) DEF_VX_BINARY_CASE_3_WRAP(T, SAT_U_SUB_FUNC_WRAP(T), sat_sub, VX_BINARY_FUNC_BODY) DEF_VX_BINARY_CASE_3_WRAP(T, AVG_FLOOR_FUNC_WRAP(T), avg_floor, VX_BINARY_FUNC_BODY) +DEF_VX_BINARY_CASE_3_WRAP(T, AVG_CEIL_FUNC_WRAP(T), avg_ceil, VX_BINARY_FUNC_BODY) /* { dg-final { scan-assembler {vadd.vx} } } */ /* { dg-final { scan-assembler {vsub.vx} } } */ @@ -33,4 +34,7 @@ DEF_VX_BINARY_CASE_3_WRAP(T, AVG_FLOOR_FUNC_WRAP(T), avg_floor, VX_BINARY_FUNC_B /* { dg-final { scan-assembler {vminu.vx} } } */ /* { dg-final { scan-assembler-not {vsaddu.vx} } } */ /* { dg-final { scan-assembler-not {vssubu.vx} } } */ -/* { dg-final { scan-assembler {vaaddu.vx} { target { no-opts "-O3 -mrvv-vector-bits=zvl -mrvv-max-lmul=m2" "-O3 -mrvv-vector-bits=zvl -mrvv-max-lmul=m4" } } } } */ +/* { dg-final { scan-assembler {vaaddu.vx} { target { no-opts { + "-O3 -mrvv-vector-bits=zvl -mrvv-max-lmul=m2" + "-O3 -mrvv-vector-bits=zvl -mrvv-max-lmul=m4" + } } } } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-4-u8.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-4-u8.c index d214da9..9f3d7df 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-4-u8.c +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-4-u8.c @@ -20,6 +20,7 @@ DEF_VX_BINARY_CASE_3_WRAP(T, MIN_FUNC_1_WARP(T), min, VX_BINARY_FUNC_BODY_X8) DEF_VX_BINARY_CASE_3_WRAP(T, SAT_U_ADD_FUNC_WRAP(T), sat_add, VX_BINARY_FUNC_BODY_X8) DEF_VX_BINARY_CASE_3_WRAP(T, SAT_U_SUB_FUNC_WRAP(T), sat_sub, VX_BINARY_FUNC_BODY_X8) DEF_VX_BINARY_CASE_3_WRAP(T, AVG_FLOOR_FUNC_WRAP(T), avg_floor, VX_BINARY_FUNC_BODY_X8) +DEF_VX_BINARY_CASE_3_WRAP(T, AVG_CEIL_FUNC_WRAP(T), avg_ceil, VX_BINARY_FUNC_BODY_X8) /* { dg-final { scan-assembler {vadd.vx} } } */ /* { dg-final { scan-assembler {vsub.vx} } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-5-u16.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-5-u16.c index b7c7ad4..5497b5a 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-5-u16.c +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-5-u16.c @@ -20,6 +20,7 @@ DEF_VX_BINARY_CASE_3_WRAP(T, MIN_FUNC_1_WARP(T), min, VX_BINARY_FUNC_BODY_X8) DEF_VX_BINARY_CASE_3_WRAP(T, SAT_U_ADD_FUNC_WRAP(T), sat_add, VX_BINARY_FUNC_BODY_X8) DEF_VX_BINARY_CASE_3_WRAP(T, SAT_U_SUB_FUNC_WRAP(T), sat_sub, VX_BINARY_FUNC_BODY_X8) DEF_VX_BINARY_CASE_3_WRAP(T, AVG_FLOOR_FUNC_WRAP(T), avg_floor, VX_BINARY_FUNC_BODY_X8) +DEF_VX_BINARY_CASE_3_WRAP(T, AVG_CEIL_FUNC_WRAP(T), avg_ceil, VX_BINARY_FUNC_BODY_X8) /* { dg-final { scan-assembler {vadd.vx} } } */ /* { dg-final { scan-assembler {vsub.vx} } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-5-u32.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-5-u32.c index dd9c845..3a8e85f 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-5-u32.c +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-5-u32.c @@ -20,6 +20,7 @@ DEF_VX_BINARY_CASE_3_WRAP(T, MIN_FUNC_1_WARP(T), min, VX_BINARY_FUNC_BODY_X4) DEF_VX_BINARY_CASE_3_WRAP(T, SAT_U_ADD_FUNC_WRAP(T), sat_add, VX_BINARY_FUNC_BODY_X4) DEF_VX_BINARY_CASE_3_WRAP(T, SAT_U_SUB_FUNC_WRAP(T), sat_sub, VX_BINARY_FUNC_BODY_X4) DEF_VX_BINARY_CASE_3_WRAP(T, AVG_FLOOR_FUNC_WRAP(T), avg_floor, VX_BINARY_FUNC_BODY_X4) +DEF_VX_BINARY_CASE_3_WRAP(T, AVG_CEIL_FUNC_WRAP(T), avg_ceil, VX_BINARY_FUNC_BODY_X4) /* { dg-final { scan-assembler {vadd.vx} } } */ /* { dg-final { scan-assembler {vsub.vx} } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-5-u64.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-5-u64.c index 1fda062..060d591 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-5-u64.c +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-5-u64.c @@ -20,6 +20,7 @@ DEF_VX_BINARY_CASE_3_WRAP(T, MIN_FUNC_1_WARP(T), min, VX_BINARY_FUNC_BODY) DEF_VX_BINARY_CASE_3_WRAP(T, SAT_U_ADD_FUNC_WRAP(T), sat_add, VX_BINARY_FUNC_BODY) DEF_VX_BINARY_CASE_3_WRAP(T, SAT_U_SUB_FUNC_WRAP(T), sat_sub, VX_BINARY_FUNC_BODY) DEF_VX_BINARY_CASE_3_WRAP(T, AVG_FLOOR_FUNC_WRAP(T), avg_floor, VX_BINARY_FUNC_BODY) +DEF_VX_BINARY_CASE_3_WRAP(T, AVG_CEIL_FUNC_WRAP(T), avg_ceil, VX_BINARY_FUNC_BODY) /* { dg-final { scan-assembler {vadd.vx} } } */ /* { dg-final { scan-assembler {vsub.vx} } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-5-u8.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-5-u8.c index 725a55b..86a6c45 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-5-u8.c +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-5-u8.c @@ -20,6 +20,7 @@ DEF_VX_BINARY_CASE_3_WRAP(T, MIN_FUNC_1_WARP(T), min, VX_BINARY_FUNC_BODY_X8) DEF_VX_BINARY_CASE_3_WRAP(T, SAT_U_ADD_FUNC_WRAP(T), sat_add, VX_BINARY_FUNC_BODY_X8) DEF_VX_BINARY_CASE_3_WRAP(T, SAT_U_SUB_FUNC_WRAP(T), sat_sub, VX_BINARY_FUNC_BODY_X8) DEF_VX_BINARY_CASE_3_WRAP(T, AVG_FLOOR_FUNC_WRAP(T), avg_floor, VX_BINARY_FUNC_BODY_X8) +DEF_VX_BINARY_CASE_3_WRAP(T, AVG_CEIL_FUNC_WRAP(T), avg_ceil, VX_BINARY_FUNC_BODY_X8) /* { dg-final { scan-assembler {vadd.vx} } } */ /* { dg-final { scan-assembler {vsub.vx} } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-6-u16.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-6-u16.c index 3a215ea..f51e7a1 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-6-u16.c +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-6-u16.c @@ -20,6 +20,7 @@ DEF_VX_BINARY_CASE_3_WRAP(T, MIN_FUNC_1_WARP(T), min, VX_BINARY_FUNC_BODY_X8) DEF_VX_BINARY_CASE_3_WRAP(T, SAT_U_ADD_FUNC_WRAP(T), sat_add, VX_BINARY_FUNC_BODY_X8) DEF_VX_BINARY_CASE_3_WRAP(T, SAT_U_SUB_FUNC_WRAP(T), sat_sub, VX_BINARY_FUNC_BODY_X8) DEF_VX_BINARY_CASE_3_WRAP(T, AVG_FLOOR_FUNC_WRAP(T), avg_floor, VX_BINARY_FUNC_BODY_X8) +DEF_VX_BINARY_CASE_3_WRAP(T, AVG_CEIL_FUNC_WRAP(T), avg_ceil, VX_BINARY_FUNC_BODY_X8) /* { dg-final { scan-assembler {vadd.vx} } } */ /* { dg-final { scan-assembler {vsub.vx} } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-6-u32.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-6-u32.c index ac4d100..79b7477 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-6-u32.c +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-6-u32.c @@ -20,6 +20,7 @@ DEF_VX_BINARY_CASE_3_WRAP(T, MIN_FUNC_1_WARP(T), min, VX_BINARY_FUNC_BODY_X4) DEF_VX_BINARY_CASE_3_WRAP(T, SAT_U_ADD_FUNC_WRAP(T), sat_add, VX_BINARY_FUNC_BODY_X4) DEF_VX_BINARY_CASE_3_WRAP(T, SAT_U_SUB_FUNC_WRAP(T), sat_sub, VX_BINARY_FUNC_BODY_X4) DEF_VX_BINARY_CASE_3_WRAP(T, AVG_FLOOR_FUNC_WRAP(T), avg_floor, VX_BINARY_FUNC_BODY_X4) +DEF_VX_BINARY_CASE_3_WRAP(T, AVG_CEIL_FUNC_WRAP(T), avg_ceil, VX_BINARY_FUNC_BODY_X4) /* { dg-final { scan-assembler {vadd.vx} } } */ /* { dg-final { scan-assembler {vsub.vx} } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-6-u64.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-6-u64.c index 5eb0ed6..ac5fd69 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-6-u64.c +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-6-u64.c @@ -20,6 +20,7 @@ DEF_VX_BINARY_CASE_3_WRAP(T, MIN_FUNC_1_WARP(T), min, VX_BINARY_FUNC_BODY) DEF_VX_BINARY_CASE_3_WRAP(T, SAT_U_ADD_FUNC_WRAP(T), sat_add, VX_BINARY_FUNC_BODY) DEF_VX_BINARY_CASE_3_WRAP(T, SAT_U_SUB_FUNC_WRAP(T), sat_sub, VX_BINARY_FUNC_BODY) DEF_VX_BINARY_CASE_3_WRAP(T, AVG_FLOOR_FUNC_WRAP(T), avg_floor, VX_BINARY_FUNC_BODY) +DEF_VX_BINARY_CASE_3_WRAP(T, AVG_CEIL_FUNC_WRAP(T), avg_ceil, VX_BINARY_FUNC_BODY) /* { dg-final { scan-assembler-not {vadd.vx} } } */ /* { dg-final { scan-assembler-not {vsub.vx} } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-6-u8.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-6-u8.c index 8b404b6..84aa06b 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-6-u8.c +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-6-u8.c @@ -20,6 +20,7 @@ DEF_VX_BINARY_CASE_3_WRAP(T, MIN_FUNC_1_WARP(T), min, VX_BINARY_FUNC_BODY_X8) DEF_VX_BINARY_CASE_3_WRAP(T, SAT_U_ADD_FUNC_WRAP(T), sat_add, VX_BINARY_FUNC_BODY_X8) DEF_VX_BINARY_CASE_3_WRAP(T, SAT_U_SUB_FUNC_WRAP(T), sat_sub, VX_BINARY_FUNC_BODY_X8) DEF_VX_BINARY_CASE_3_WRAP(T, AVG_FLOOR_FUNC_WRAP(T), avg_floor, VX_BINARY_FUNC_BODY_X8) +DEF_VX_BINARY_CASE_3_WRAP(T, AVG_CEIL_FUNC_WRAP(T), avg_ceil, VX_BINARY_FUNC_BODY_X8) /* { dg-final { scan-assembler {vadd.vx} } } */ /* { dg-final { scan-assembler {vsub.vx} } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx_binary.h b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx_binary.h index b7c0f79..de48ebd 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx_binary.h +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx_binary.h @@ -363,14 +363,30 @@ DEF_AVG_FLOOR(int8_t, int16_t) DEF_AVG_FLOOR(int16_t, int32_t) DEF_AVG_FLOOR(int32_t, int64_t) +#define DEF_AVG_CEIL(NT, WT) \ +NT \ +test_##NT##_avg_ceil(NT x, NT y) \ +{ \ + return (NT)(((WT)x + (WT)y + 1) >> 1); \ +} + +DEF_AVG_CEIL(uint8_t, uint16_t) +DEF_AVG_CEIL(uint16_t, uint32_t) +DEF_AVG_CEIL(uint32_t, uint64_t) + #ifdef HAS_INT128 DEF_AVG_FLOOR(uint64_t, uint128_t) DEF_AVG_FLOOR(int64_t, int128_t) + + DEF_AVG_CEIL(uint64_t, uint128_t) #endif #define AVG_FLOOR_FUNC(T) test_##T##_avg_floor #define AVG_FLOOR_FUNC_WRAP(T) AVG_FLOOR_FUNC(T) +#define AVG_CEIL_FUNC(T) test_##T##_avg_ceil +#define AVG_CEIL_FUNC_WRAP(T) AVG_CEIL_FUNC(T) + #define TEST_BINARY_VX_SIGNED_0(T) \ DEF_VX_BINARY_CASE_0_WRAP(T, +, add) \ DEF_VX_BINARY_CASE_0_WRAP(T, -, sub) \ @@ -405,5 +421,6 @@ DEF_AVG_FLOOR(int32_t, int64_t) DEF_VX_BINARY_CASE_2_WRAP(T, SAT_U_ADD_FUNC(T), sat_add) \ DEF_VX_BINARY_CASE_2_WRAP(T, SAT_U_SUB_FUNC(T), sat_sub) \ DEF_VX_BINARY_CASE_2_WRAP(T, AVG_FLOOR_FUNC_WRAP(T), avg_floor) \ + DEF_VX_BINARY_CASE_2_WRAP(T, AVG_CEIL_FUNC_WRAP(T), avg_ceil) \ #endif diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx_binary_data.h b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx_binary_data.h index 6847309..5024ae7 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx_binary_data.h +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx_binary_data.h @@ -5298,4 +5298,200 @@ int64_t TEST_BINARY_DATA(int64_t, avg_floor)[][3][N] = }, }; +uint8_t TEST_BINARY_DATA(uint8_t, avg_ceil)[][3][N] = +{ + { + { 0 }, + { + 2, 2, 2, 2, + 1, 1, 1, 1, + 0, 0, 0, 0, + 4, 4, 4, 4, + }, + { + 1, 1, 1, 1, + 1, 1, 1, 1, + 0, 0, 0, 0, + 2, 2, 2, 2, + }, + }, + { + { 127 }, + { + 127, 127, 127, 127, + 128, 128, 128, 128, + 255, 255, 255, 255, + 1, 1, 1, 1, + }, + { + 127, 127, 127, 127, + 128, 128, 128, 128, + 191, 191, 191, 191, + 64, 64, 64, 64, + }, + }, + { + { 255 }, + { + 0, 0, 0, 0, + 255, 255, 255, 255, + 254, 254, 254, 254, + 1, 1, 1, 1, + }, + { + 128, 128, 128, 128, + 255, 255, 255, 255, + 255, 255, 255, 255, + 128, 128, 128, 128, + }, + }, +}; + +uint16_t TEST_BINARY_DATA(uint16_t, avg_ceil)[][3][N] = +{ + { + { 0 }, + { + 2, 2, 2, 2, + 1, 1, 1, 1, + 0, 0, 0, 0, + 4, 4, 4, 4, + }, + { + 1, 1, 1, 1, + 1, 1, 1, 1, + 0, 0, 0, 0, + 2, 2, 2, 2, + }, + }, + { + { 32767 }, + { + 32767, 32767, 32767, 32767, + 32768, 32768, 32768, 32768, + 65535, 65535, 65535, 65535, + 1, 1, 1, 1, + }, + { + 32767, 32767, 32767, 32767, + 32768, 32768, 32768, 32768, + 49151, 49151, 49151, 49151, + 16384, 16384, 16384, 16384, + }, + }, + { + { 65535 }, + { + 0, 0, 0, 0, + 65535, 65535, 65535, 65535, + 65534, 65534, 65534, 65534, + 1, 1, 1, 1, + }, + { + 32768, 32768, 32768, 32768, + 65535, 65535, 65535, 65535, + 65535, 65535, 65535, 65535, + 32768, 32768, 32768, 32768, + }, + }, +}; + +uint32_t TEST_BINARY_DATA(uint32_t, avg_ceil)[][3][N] = +{ + { + { 0 }, + { + 2, 2, 2, 2, + 1, 1, 1, 1, + 0, 0, 0, 0, + 4, 4, 4, 4, + }, + { + 1, 1, 1, 1, + 1, 1, 1, 1, + 0, 0, 0, 0, + 2, 2, 2, 2, + }, + }, + { + { 2147483647 }, + { + 2147483647, 2147483647, 2147483647, 2147483647, + 2147483648, 2147483648, 2147483648, 2147483648, + 4294967295, 4294967295, 4294967295, 4294967295, + 1, 1, 1, 1, + }, + { + 2147483647, 2147483647, 2147483647, 2147483647, + 2147483648, 2147483648, 2147483648, 2147483648, + 3221225471, 3221225471, 3221225471, 3221225471, + 1073741824, 1073741824, 1073741824, 1073741824, + }, + }, + { + { 4294967295 }, + { + 0, 0, 0, 0, + 4294967295, 4294967295, 4294967295, 4294967295, + 4294967294, 4294967294, 4294967294, 4294967294, + 1, 1, 1, 1, + }, + { + 2147483648, 2147483648, 2147483648, 2147483648, + 4294967295, 4294967295, 4294967295, 4294967295, + 4294967295, 4294967295, 4294967295, 4294967295, + 2147483648, 2147483648, 2147483648, 2147483648, + }, + }, +}; + +uint64_t TEST_BINARY_DATA(uint64_t, avg_ceil)[][3][N] = +{ + { + { 0 }, + { + 2, 2, 2, 2, + 1, 1, 1, 1, + 0, 0, 0, 0, + 4, 4, 4, 4, + }, + { + 1, 1, 1, 1, + 1, 1, 1, 1, + 0, 0, 0, 0, + 2, 2, 2, 2, + }, + }, + { + { 9223372036854775807ull }, + { + 9223372036854775807ull, 9223372036854775807ull, 9223372036854775807ull, 9223372036854775807ull, + 9223372036854775808ull, 9223372036854775808ull, 9223372036854775808ull, 9223372036854775808ull, + 18446744073709551615ull, 18446744073709551615ull, 18446744073709551615ull, 18446744073709551615ull, + 1, 1, 1, 1, + }, + { + 9223372036854775807ull, 9223372036854775807ull, 9223372036854775807ull, 9223372036854775807ull, + 9223372036854775808ull, 9223372036854775808ull, 9223372036854775808ull, 9223372036854775808ull, + 13835058055282163711ull, 13835058055282163711ull, 13835058055282163711ull, 13835058055282163711ull, + 4611686018427387904ull, 4611686018427387904ull, 4611686018427387904ull, 4611686018427387904ull, + }, + }, + { + { 18446744073709551615ull }, + { + 0, 0, 0, 0, + 18446744073709551615ull, 18446744073709551615ull, 18446744073709551615ull, 18446744073709551615ull, + 18446744073709551614ull, 18446744073709551614ull, 18446744073709551614ull, 18446744073709551614ull, + 1, 1, 1, 1, + }, + { + 9223372036854775808ull, 9223372036854775808ull, 9223372036854775808ull, 9223372036854775808ull, + 18446744073709551615ull, 18446744073709551615ull, 18446744073709551615ull, 18446744073709551615ull, + 18446744073709551615ull, 18446744073709551615ull, 18446744073709551615ull, 18446744073709551615ull, + 9223372036854775808ull, 9223372036854775808ull, 9223372036854775808ull, 9223372036854775808ull, + }, + }, +}; + #endif diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx_vaadd-run-2-u16.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx_vaadd-run-2-u16.c new file mode 100644 index 0000000..6297672 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx_vaadd-run-2-u16.c @@ -0,0 +1,17 @@ +/* { dg-do run { target { riscv_v } } } */ +/* { dg-additional-options "-std=c99 --param=gpr2vr-cost=0" } */ + +#include "vx_binary.h" +#include "vx_binary_data.h" + +#define T uint16_t +#define NAME avg_ceil +#define FUNC AVG_CEIL_FUNC_WRAP(T) +#define TEST_DATA TEST_BINARY_DATA_WRAP(T, NAME) + +DEF_VX_BINARY_CASE_2_WRAP(T, FUNC, NAME) + +#define TEST_RUN(T, NAME, out, in, x, n) \ + RUN_VX_BINARY_CASE_2_WRAP(T, NAME, FUNC, out, in, x, n) + +#include "vx_binary_run.h" diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx_vaadd-run-2-u32.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx_vaadd-run-2-u32.c new file mode 100644 index 0000000..30db24b --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx_vaadd-run-2-u32.c @@ -0,0 +1,17 @@ +/* { dg-do run { target { riscv_v } } } */ +/* { dg-additional-options "-std=c99 --param=gpr2vr-cost=0" } */ + +#include "vx_binary.h" +#include "vx_binary_data.h" + +#define T uint32_t +#define NAME avg_ceil +#define FUNC AVG_CEIL_FUNC_WRAP(T) +#define TEST_DATA TEST_BINARY_DATA_WRAP(T, NAME) + +DEF_VX_BINARY_CASE_2_WRAP(T, FUNC, NAME) + +#define TEST_RUN(T, NAME, out, in, x, n) \ + RUN_VX_BINARY_CASE_2_WRAP(T, NAME, FUNC, out, in, x, n) + +#include "vx_binary_run.h" diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx_vaadd-run-2-u64.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx_vaadd-run-2-u64.c new file mode 100644 index 0000000..db3c911 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx_vaadd-run-2-u64.c @@ -0,0 +1,17 @@ +/* { dg-do run { target { riscv_v && rv64 } } } */ +/* { dg-additional-options "-std=c99 --param=gpr2vr-cost=0" } */ + +#include "vx_binary.h" +#include "vx_binary_data.h" + +#define T uint64_t +#define NAME avg_ceil +#define FUNC AVG_CEIL_FUNC_WRAP(T) +#define TEST_DATA TEST_BINARY_DATA_WRAP(T, NAME) + +DEF_VX_BINARY_CASE_2_WRAP(T, FUNC, NAME) + +#define TEST_RUN(T, NAME, out, in, x, n) \ + RUN_VX_BINARY_CASE_2_WRAP(T, NAME, FUNC, out, in, x, n) + +#include "vx_binary_run.h" diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx_vaadd-run-2-u8.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx_vaadd-run-2-u8.c new file mode 100644 index 0000000..a7755f0 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx_vaadd-run-2-u8.c @@ -0,0 +1,17 @@ +/* { dg-do run { target { riscv_v } } } */ +/* { dg-additional-options "-std=c99 --param=gpr2vr-cost=0" } */ + +#include "vx_binary.h" +#include "vx_binary_data.h" + +#define T uint8_t +#define NAME avg_ceil +#define FUNC AVG_CEIL_FUNC_WRAP(T) +#define TEST_DATA TEST_BINARY_DATA_WRAP(T, NAME) + +DEF_VX_BINARY_CASE_2_WRAP(T, FUNC, NAME) + +#define TEST_RUN(T, NAME, out, in, x, n) \ + RUN_VX_BINARY_CASE_2_WRAP(T, NAME, FUNC, out, in, x, n) + +#include "vx_binary_run.h" diff --git a/gcc/testsuite/gcc.target/riscv/sat/sat_u_mul-1-u16-from-u32.c b/gcc/testsuite/gcc.target/riscv/sat/sat_u_mul-1-u16-from-u32.c new file mode 100644 index 0000000..7409232 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/sat/sat_u_mul-1-u16-from-u32.c @@ -0,0 +1,11 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gc -mabi=lp64d -fdump-tree-optimized" } */ + +#include "sat_arith.h" + +#define NT uint16_t +#define WT uint32_t + +DEF_SAT_U_MUL_FMT_1_WRAP(NT, WT) + +/* { dg-final { scan-tree-dump-times ".SAT_MUL" 1 "optimized" } } */ diff --git a/gcc/testsuite/gcc.target/riscv/sat/sat_u_mul-1-u8-from-u16.c b/gcc/testsuite/gcc.target/riscv/sat/sat_u_mul-1-u8-from-u16.c new file mode 100644 index 0000000..ec79e5d --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/sat/sat_u_mul-1-u8-from-u16.c @@ -0,0 +1,11 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gc -mabi=lp64d -fdump-tree-optimized" } */ + +#include "sat_arith.h" + +#define NT uint8_t +#define WT uint16_t + +DEF_SAT_U_MUL_FMT_1_WRAP(NT, WT) + +/* { dg-final { scan-tree-dump-times ".SAT_MUL" 1 "optimized" } } */ diff --git a/gcc/testsuite/gcc.target/riscv/sat/sat_u_mul-1-u8-from-u32.c b/gcc/testsuite/gcc.target/riscv/sat/sat_u_mul-1-u8-from-u32.c new file mode 100644 index 0000000..eb95184 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/sat/sat_u_mul-1-u8-from-u32.c @@ -0,0 +1,11 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gc -mabi=lp64d -fdump-tree-optimized" } */ + +#include "sat_arith.h" + +#define NT uint8_t +#define WT uint32_t + +DEF_SAT_U_MUL_FMT_1_WRAP(NT, WT) + +/* { dg-final { scan-tree-dump-times ".SAT_MUL" 1 "optimized" } } */ diff --git a/gcc/testsuite/gcc.target/riscv/sat/sat_u_mul-2-u16-from-u64.c b/gcc/testsuite/gcc.target/riscv/sat/sat_u_mul-2-u16-from-u64.c new file mode 100644 index 0000000..b1d33a9 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/sat/sat_u_mul-2-u16-from-u64.c @@ -0,0 +1,11 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gc -mabi=lp64d -fdump-tree-optimized" } */ + +#include "sat_arith.h" + +#define NT uint16_t +#define WT uint64_t + +DEF_SAT_U_MUL_FMT_1_WRAP(NT, WT) + +/* { dg-final { scan-tree-dump-times ".SAT_MUL" 1 "optimized" } } */ diff --git a/gcc/testsuite/gcc.target/riscv/sat/sat_u_mul-2-u32-from-u64.c b/gcc/testsuite/gcc.target/riscv/sat/sat_u_mul-2-u32-from-u64.c new file mode 100644 index 0000000..af5ffecf --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/sat/sat_u_mul-2-u32-from-u64.c @@ -0,0 +1,11 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gc -mabi=lp64d -fdump-tree-optimized" } */ + +#include "sat_arith.h" + +#define NT uint32_t +#define WT uint64_t + +DEF_SAT_U_MUL_FMT_1_WRAP(NT, WT) + +/* { dg-final { scan-tree-dump-times ".SAT_MUL" 1 "optimized" } } */ diff --git a/gcc/testsuite/gcc.target/riscv/sat/sat_u_mul-2-u8-from-u64.c b/gcc/testsuite/gcc.target/riscv/sat/sat_u_mul-2-u8-from-u64.c new file mode 100644 index 0000000..d65cab0 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/sat/sat_u_mul-2-u8-from-u64.c @@ -0,0 +1,11 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gc -mabi=lp64d -fdump-tree-optimized" } */ + +#include "sat_arith.h" + +#define NT uint8_t +#define WT uint64_t + +DEF_SAT_U_MUL_FMT_1_WRAP(NT, WT) + +/* { dg-final { scan-tree-dump-times ".SAT_MUL" 1 "optimized" } } */ diff --git a/gcc/testsuite/gcc.target/riscv/sat/sat_u_mul-run-1-u16-from-u32.c b/gcc/testsuite/gcc.target/riscv/sat/sat_u_mul-run-1-u16-from-u32.c new file mode 100644 index 0000000..e212391 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/sat/sat_u_mul-run-1-u16-from-u32.c @@ -0,0 +1,16 @@ +/* { dg-do run { target { rv32 || rv64 } } } */ +/* { dg-additional-options "-std=c99" } */ + +#include "sat_arith.h" +#include "sat_arith_data.h" + +#define NT uint16_t +#define WT uint32_t +#define NAME usmul +#define DATA TEST_BINARY_DATA_WRAP(NT, NAME) +#define T TEST_BINARY_STRUCT_DECL_WRAP(NT, NAME) +#define RUN_BINARY(x, y) RUN_SAT_U_MUL_FMT_1_WRAP(NT, WT, x, y) + +DEF_SAT_U_MUL_FMT_1_WRAP(NT, WT) + +#include "scalar_sat_binary_run_xxx.h" diff --git a/gcc/testsuite/gcc.target/riscv/sat/sat_u_mul-run-1-u16-from-u64.c b/gcc/testsuite/gcc.target/riscv/sat/sat_u_mul-run-1-u16-from-u64.c index 065afb8..79d3fb3 100644 --- a/gcc/testsuite/gcc.target/riscv/sat/sat_u_mul-run-1-u16-from-u64.c +++ b/gcc/testsuite/gcc.target/riscv/sat/sat_u_mul-run-1-u16-from-u64.c @@ -1,4 +1,4 @@ -/* { dg-do run { target { rv32 } } } */ +/* { dg-do run { target { rv32 || rv64 } } } */ /* { dg-additional-options "-std=c99" } */ #include "sat_arith.h" diff --git a/gcc/testsuite/gcc.target/riscv/sat/sat_u_mul-run-1-u32-from-u64.c b/gcc/testsuite/gcc.target/riscv/sat/sat_u_mul-run-1-u32-from-u64.c index 062bbc9..ad63db3 100644 --- a/gcc/testsuite/gcc.target/riscv/sat/sat_u_mul-run-1-u32-from-u64.c +++ b/gcc/testsuite/gcc.target/riscv/sat/sat_u_mul-run-1-u32-from-u64.c @@ -1,4 +1,4 @@ -/* { dg-do run { target { rv32 } } } */ +/* { dg-do run { target { rv32 || rv64 } } } */ /* { dg-additional-options "-std=c99" } */ #include "sat_arith.h" diff --git a/gcc/testsuite/gcc.target/riscv/sat/sat_u_mul-run-1-u8-from-u16.c b/gcc/testsuite/gcc.target/riscv/sat/sat_u_mul-run-1-u8-from-u16.c new file mode 100644 index 0000000..f5a0ab5 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/sat/sat_u_mul-run-1-u8-from-u16.c @@ -0,0 +1,16 @@ +/* { dg-do run { target { rv32 || rv64 } } } */ +/* { dg-additional-options "-std=c99" } */ + +#include "sat_arith.h" +#include "sat_arith_data.h" + +#define NT uint8_t +#define WT uint16_t +#define NAME usmul +#define DATA TEST_BINARY_DATA_WRAP(NT, NAME) +#define T TEST_BINARY_STRUCT_DECL_WRAP(NT, NAME) +#define RUN_BINARY(x, y) RUN_SAT_U_MUL_FMT_1_WRAP(NT, WT, x, y) + +DEF_SAT_U_MUL_FMT_1_WRAP(NT, WT) + +#include "scalar_sat_binary_run_xxx.h" diff --git a/gcc/testsuite/gcc.target/riscv/sat/sat_u_mul-run-1-u8-from-u32.c b/gcc/testsuite/gcc.target/riscv/sat/sat_u_mul-run-1-u8-from-u32.c new file mode 100644 index 0000000..32074a4 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/sat/sat_u_mul-run-1-u8-from-u32.c @@ -0,0 +1,16 @@ +/* { dg-do run { target { rv32 || rv64 } } } */ +/* { dg-additional-options "-std=c99" } */ + +#include "sat_arith.h" +#include "sat_arith_data.h" + +#define NT uint8_t +#define WT uint32_t +#define NAME usmul +#define DATA TEST_BINARY_DATA_WRAP(NT, NAME) +#define T TEST_BINARY_STRUCT_DECL_WRAP(NT, NAME) +#define RUN_BINARY(x, y) RUN_SAT_U_MUL_FMT_1_WRAP(NT, WT, x, y) + +DEF_SAT_U_MUL_FMT_1_WRAP(NT, WT) + +#include "scalar_sat_binary_run_xxx.h" diff --git a/gcc/testsuite/gcc.target/riscv/sat/sat_u_mul-run-1-u8-from-u64.c b/gcc/testsuite/gcc.target/riscv/sat/sat_u_mul-run-1-u8-from-u64.c index e6f632b..16ca905 100644 --- a/gcc/testsuite/gcc.target/riscv/sat/sat_u_mul-run-1-u8-from-u64.c +++ b/gcc/testsuite/gcc.target/riscv/sat/sat_u_mul-run-1-u8-from-u64.c @@ -1,4 +1,4 @@ -/* { dg-do run { target { rv32 } } } */ +/* { dg-do run { target { rv32 || rv64 } } } */ /* { dg-additional-options "-std=c99" } */ #include "sat_arith.h" diff --git a/gcc/testsuite/gcc.target/s390/spaceship-fp-1.c b/gcc/testsuite/gcc.target/s390/spaceship-fp-1.c new file mode 100644 index 0000000..56c3d77 --- /dev/null +++ b/gcc/testsuite/gcc.target/s390/spaceship-fp-1.c @@ -0,0 +1,23 @@ +/* { dg-do compile { target lp64 } } */ +/* { dg-options "-O2 -mzarch -march=z13 -fdump-tree-optimized" } */ +/* { dg-final { scan-tree-dump-times {\.SPACESHIP \([^,]+, [^,]+, 2\)} 3 optimized } } */ +/* { dg-final { scan-assembler-times {\tk[edx]br\t} 3 } } */ +/* { dg-final { scan-assembler-not {\tbrc} } } */ +/* { dg-final { scan-assembler-not {\tc[edx]br\t} } } */ + +#define TEST(T, U) \ + int test_##U (T x, T y) \ + { \ + if (x == y) \ + return 0; \ + else if (x < y) \ + return -1; \ + else if (x > y) \ + return 1; \ + else \ + return 2; \ + } + +TEST (float, float) +TEST (double, double) +TEST (long double, longdouble) diff --git a/gcc/testsuite/gcc.target/s390/spaceship-fp-2.c b/gcc/testsuite/gcc.target/s390/spaceship-fp-2.c new file mode 100644 index 0000000..0c6e6b6 --- /dev/null +++ b/gcc/testsuite/gcc.target/s390/spaceship-fp-2.c @@ -0,0 +1,23 @@ +/* { dg-do compile { target lp64 } } */ +/* { dg-options "-O2 -mzarch -march=z13 -ffinite-math-only -fdump-tree-optimized" } */ +/* { dg-final { scan-tree-dump-times {\.SPACESHIP \([^,]+, [^,]+, 2\)} 3 optimized } } */ +/* { dg-final { scan-assembler-times {\tc[edx]br\t} 3 } } */ +/* { dg-final { scan-assembler-not {\tbrc} } } */ +/* { dg-final { scan-assembler-not {\tk[edx]br\t} } } */ + +#define TEST(T, U) \ + int test_##U (T x, T y) \ + { \ + if (x == y) \ + return 0; \ + else if (x < y) \ + return -1; \ + else if (x > y) \ + return 1; \ + else \ + return 2; \ + } + +TEST (float, float) +TEST (double, double) +TEST (long double, longdouble) diff --git a/gcc/testsuite/gcc.target/s390/spaceship-fp-3.c b/gcc/testsuite/gcc.target/s390/spaceship-fp-3.c new file mode 100644 index 0000000..2f567d1 --- /dev/null +++ b/gcc/testsuite/gcc.target/s390/spaceship-fp-3.c @@ -0,0 +1,23 @@ +/* { dg-do compile { target lp64 } } */ +/* { dg-options "-O2 -mzarch -march=z13 -fdump-tree-optimized" } */ +/* { dg-final { scan-tree-dump-times {\.SPACESHIP \([^,]+, [^,]+, 42\)} 3 optimized } } */ +/* { dg-final { scan-assembler-times {\tk[edx]br\t} 3 } } */ +/* { dg-final { scan-assembler-not {\tbrc} } } */ +/* { dg-final { scan-assembler-not {\tc[edx]br\t} } } */ + +#define TEST(T, U) \ + int test_##U (T x, T y) \ + { \ + if (x == y) \ + return 0; \ + else if (x < y) \ + return -1; \ + else if (x > y) \ + return 1; \ + else \ + return 42; \ + } + +TEST (float, float) +TEST (double, double) +TEST (long double, longdouble) diff --git a/gcc/testsuite/gcc.target/s390/spaceship-fp-4.c b/gcc/testsuite/gcc.target/s390/spaceship-fp-4.c new file mode 100644 index 0000000..4531ecb --- /dev/null +++ b/gcc/testsuite/gcc.target/s390/spaceship-fp-4.c @@ -0,0 +1,53 @@ +/* { dg-do compile { target lp64 } } */ +/* { dg-options "-O2 -mzarch -march=z13 -fdump-tree-optimized" } */ +/* { dg-final { scan-tree-dump-times {\.SPACESHIP \([^,]+, [^,]+, 0\)} 3 optimized } } */ +/* { dg-final { scan-assembler-times {\tk[edx]br\t} 3 } } */ +/* { dg-final { scan-assembler-not {\tloc} } } */ +/* { dg-final { scan-assembler-not {\tbrc} } } */ +/* { dg-final { scan-assembler-not {\tc[edx]br\t} } } */ + +/* By time of writing this we emit + + kebr %f0,%f2 + jo .L2 + je .L3 + jnh .L10 + jg f3@PLT +.L10: + jg f2@PLT +.L3: + jg f1@PLT +.L2: + jg f4@PLT + + which is not optimal. Instead we could fold the conditional branch with the + unconditional into something along the lines + + kebr %f0,%f2 + jo f4@PLT + je f1@PLT + jnh f2@PLT + jg f3@PLT +*/ + +void f1 (void); +void f2 (void); +void f3 (void); +void f4 (void); + +#define TEST(T, U) \ + void test_##U (T x, T y) \ + { \ + if (x == y) \ + f1 (); \ + else if (x < y) \ + f2 (); \ + else if (x > y) \ + f3 (); \ + else \ + f4 (); \ + } + +TEST (float, float) +TEST (double, double) +TEST (long double, longdouble) diff --git a/gcc/testsuite/gcc.target/s390/spaceship-int-1.c b/gcc/testsuite/gcc.target/s390/spaceship-int-1.c new file mode 100644 index 0000000..8ca2677 --- /dev/null +++ b/gcc/testsuite/gcc.target/s390/spaceship-int-1.c @@ -0,0 +1,30 @@ +/* { dg-do compile { target lp64 } } */ +/* { dg-options "-O2 -mzarch -march=z13 -fdump-tree-optimized" } */ +/* { dg-final { scan-tree-dump-times {\.SPACESHIP \([^,]+, [^,]+, -1\)} 4 optimized } } */ +/* { dg-final { scan-tree-dump-times {\.SPACESHIP \([^,]+, [^,]+, 1\)} 5 optimized } } */ +/* { dg-final { scan-assembler-times {\tlhi} 9 } } */ +/* { dg-final { scan-assembler-times {\tloc} 18 } } */ + +#define TEST(T, U) \ + int test_##U (T x, T y) \ + { \ + if (x == y) \ + return 0; \ + else if (x < y) \ + return -1; \ + else \ + return 1; \ + } + +TEST(signed char, schar) +TEST(unsigned char, uchar) +TEST(char, char) + +TEST(short, sshort) +TEST(unsigned short, ushort) + +TEST(int, sint) +TEST(unsigned int, uint) + +TEST(long, slong) +TEST(unsigned long, ulong) diff --git a/gcc/testsuite/gcc.target/s390/spaceship-int-2.c b/gcc/testsuite/gcc.target/s390/spaceship-int-2.c new file mode 100644 index 0000000..5f7975c --- /dev/null +++ b/gcc/testsuite/gcc.target/s390/spaceship-int-2.c @@ -0,0 +1,24 @@ +/* { dg-do compile { target int128 } } */ +/* { dg-options "-O2 -mzarch -march=z13 -fdump-tree-optimized" } */ +/* { dg-final { scan-tree-dump-times {\.SPACESHIP \([^,]+, [^,]+, -1\)} 1 optimized } } */ +/* { dg-final { scan-tree-dump-times {\.SPACESHIP \([^,]+, [^,]+, 1\)} 1 optimized } } */ +/* { dg-final { scan-assembler-times {\tvecg} 1 } } */ +/* { dg-final { scan-assembler-times {\tveclg} 1 } } */ +/* { dg-final { scan-assembler-times {\tvchlgs} 2 } } */ +/* { dg-final { scan-assembler-times {\tvceqgs} 2 } } */ +/* { dg-final { scan-assembler-times {\tlhi} 2 } } */ +/* { dg-final { scan-assembler-times {\tloc} 4 } } */ + +#define TEST(T, U) \ + int test_##U (T x, T y) \ + { \ + if (x == y) \ + return 0; \ + else if (x < y) \ + return -1; \ + else \ + return 1; \ + } + +TEST(__int128, sint128) +TEST(unsigned __int128, uint128) diff --git a/gcc/testsuite/gcc.target/s390/spaceship-int-3.c b/gcc/testsuite/gcc.target/s390/spaceship-int-3.c new file mode 100644 index 0000000..46b0e4a --- /dev/null +++ b/gcc/testsuite/gcc.target/s390/spaceship-int-3.c @@ -0,0 +1,21 @@ +/* { dg-do compile { target int128 } } */ +/* { dg-options "-O2 -march=z17 -fdump-tree-optimized" } */ +/* { dg-final { scan-tree-dump-times {\.SPACESHIP \([^,]+, [^,]+, -1\)} 1 optimized } } */ +/* { dg-final { scan-tree-dump-times {\.SPACESHIP \([^,]+, [^,]+, 1\)} 1 optimized } } */ +/* { dg-final { scan-assembler-times {\tvecq\t} 1 } } */ +/* { dg-final { scan-assembler-times {\tveclq\t} 1 } } */ +/* { dg-final { scan-assembler-times {\tloc} 4 } } */ + +#define TEST(T, U) \ + int test_##U (T x, T y) \ + { \ + if (x == y) \ + return 0; \ + else if (x < y) \ + return -1; \ + else \ + return 1; \ + } + +TEST(__int128, sint128) +TEST(unsigned __int128, uint128) diff --git a/gcc/testsuite/gfortran.dg/split_1.f90 b/gcc/testsuite/gfortran.dg/split_1.f90 new file mode 100644 index 0000000..21659b0 --- /dev/null +++ b/gcc/testsuite/gfortran.dg/split_1.f90 @@ -0,0 +1,28 @@ +! { dg-do run } +program b + character(len=:), allocatable :: input + character(len=2) :: set = ', ' + integer :: p + input = " one,last example," + p = 0 + + call split(input, set, p) + if (p /= 1) STOP 1 + call split(input, set, p) + if (p /= 5) STOP 2 + call split(input, set, p) + if (p /= 10) STOP 3 + call split(input, set, p) + if (p /= 18) STOP 4 + call split(input, set, p) + if (p /= 19) STOP 5 + + call split(input, set, p, .true.) + if (p /= 18) STOP 6 + call split(input, set, p, .true.) + if (p /= 10) STOP 7 + call split(input, set, p, .true.) + if (p /= 5) STOP 8 + call split(input, set, p, .true.) + if (p /= 1) STOP 9 +end program b diff --git a/gcc/testsuite/gfortran.dg/split_2.f90 b/gcc/testsuite/gfortran.dg/split_2.f90 new file mode 100644 index 0000000..9afb30b --- /dev/null +++ b/gcc/testsuite/gfortran.dg/split_2.f90 @@ -0,0 +1,22 @@ +! { dg-do run } +program b + integer, parameter :: ucs4 = selected_char_kind('ISO_10646') + character(kind=ucs4, len=:), allocatable :: input, set + integer :: p = 0 + + input = char(int(z'4f60'), ucs4) // char(int(z'597d'), ucs4) // char(int(z'4f60'), ucs4) // char(int(z'4e16'), ucs4) + set = char(int(z'597d'), ucs4) // char(int(z'4e16'), ucs4) + + call split(input, set, p) + if (p /= 2) stop 1 + call split(input, set, p) + if (p /= 4) stop 2 + call split(input, set, p) + if (p /= 5) stop 3 + call split(input, set, p, .true.) + if (p /= 4) stop 4 + call split(input, set, p, .true.) + if (p /= 2) stop 5 + call split(input, set, p, .true.) + if (p /= 0) stop 6 +end program b diff --git a/gcc/testsuite/gfortran.dg/split_3.f90 b/gcc/testsuite/gfortran.dg/split_3.f90 new file mode 100644 index 0000000..bec3fdc --- /dev/null +++ b/gcc/testsuite/gfortran.dg/split_3.f90 @@ -0,0 +1,11 @@ +! { dg-do run } +! { dg-shouldfail "Fortran runtime error" } + +program b + character(len=:), allocatable :: input + character(len=2) :: set = ', ' + integer :: p + input = " one,last example," + p = -1 + call split(input, set, p) +end program b diff --git a/gcc/testsuite/gfortran.dg/split_4.f90 b/gcc/testsuite/gfortran.dg/split_4.f90 new file mode 100644 index 0000000..a3c27bb --- /dev/null +++ b/gcc/testsuite/gfortran.dg/split_4.f90 @@ -0,0 +1,11 @@ +! { dg-do run } +! { dg-shouldfail "Fortran runtime error" } + +program b + character(len=:), allocatable :: input + character(len=2) :: set = ', ' + integer :: p + input = " one,last example," + p = 0 + call split(input, set, p, .true.) +end program b diff --git a/gcc/testsuite/gm2/warnings/style/fail/badvarname.mod b/gcc/testsuite/gm2/warnings/style/fail/badvarname.mod new file mode 100644 index 0000000..e589b0d --- /dev/null +++ b/gcc/testsuite/gm2/warnings/style/fail/badvarname.mod @@ -0,0 +1,14 @@ +MODULE badvarname ; + + +PROCEDURE Foo ; +VAR + end: CARDINAL ; +BEGIN + end := 1 +END Foo ; + + +BEGIN + Foo +END badvarname. diff --git a/gcc/testsuite/gm2/warnings/style/fail/warnings-style-fail.exp b/gcc/testsuite/gm2/warnings/style/fail/warnings-style-fail.exp new file mode 100644 index 0000000..f44ed80 --- /dev/null +++ b/gcc/testsuite/gm2/warnings/style/fail/warnings-style-fail.exp @@ -0,0 +1,44 @@ +# Expect driver script for GCC Regression Tests +# Copyright (C) 2025 Free Software Foundation, Inc. + +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCC; see the file COPYING3. If not see +# <http://www.gnu.org/licenses/>. + +# This file was written by Gaius Mulley (gaius.mulley@southwales.ac.uk) +# for GNU Modula-2. + +if $tracelevel then { + strace $tracelevel +} + +# load support procs +load_lib gm2-torture.exp + +gm2_init_pim "${srcdir}/gm2/warnings/style/fail" + +global TORTURE_OPTIONS + +set old_options $TORTURE_OPTIONS +set TORTURE_OPTIONS { { -O0 -g -Werror=style } } + +foreach testcase [lsort [glob -nocomplain $srcdir/$subdir/*.mod]] { + # If we're only testing specific files and this isn't one of them, skip it. + if ![runtest_file_p $runtests $testcase] then { + continue + } + + gm2-torture-fail $testcase +} + +set TORTURE_OPTIONS $old_options diff --git a/gcc/testsuite/lib/profopt.exp b/gcc/testsuite/lib/profopt.exp index b4d244b..81d86c6 100644 --- a/gcc/testsuite/lib/profopt.exp +++ b/gcc/testsuite/lib/profopt.exp @@ -382,6 +382,7 @@ proc profopt-execute { src } { unsupported "$testcase" unset testname_with_flags verbose "$src not supported on this target, skipping it" 3 + cleanup-after-saved-dg-test return } @@ -458,6 +459,7 @@ proc profopt-execute { src } { unsupported "$testcase -fauto-profile: cannot run create_gcov" unset testname_with_flags set status "fail" + cleanup-after-saved-dg-test return } set status [remote_wait "" 300] diff --git a/gcc/tree-if-conv.cc b/gcc/tree-if-conv.cc index a8b800b..b7ce072 100644 --- a/gcc/tree-if-conv.cc +++ b/gcc/tree-if-conv.cc @@ -1755,7 +1755,7 @@ strip_nop_cond_scalar_reduction (bool has_nop, tree op) EXTENDED is true if PHI has > 2 arguments. */ static bool -is_cond_scalar_reduction (basic_block bb, tree phi_res, gimple **reduc, tree arg_0, tree arg_1, +is_cond_scalar_reduction (gimple *phi, gimple **reduc, tree arg_0, tree arg_1, tree *op0, tree *op1, bool extended, bool* has_nop, gimple **nop_reduc) { @@ -1763,6 +1763,7 @@ is_cond_scalar_reduction (basic_block bb, tree phi_res, gimple **reduc, tree arg gimple *stmt; gimple *header_phi = NULL; enum tree_code reduction_op; + basic_block bb = gimple_bb (phi); class loop *loop = bb->loop_father; edge latch_e = loop_latch_edge (loop); imm_use_iterator imm_iter; @@ -1790,7 +1791,7 @@ is_cond_scalar_reduction (basic_block bb, tree phi_res, gimple **reduc, tree arg if (gimple_bb (header_phi) != loop->header) return false; - if (PHI_ARG_DEF_FROM_EDGE (header_phi, latch_e) != phi_res) + if (PHI_ARG_DEF_FROM_EDGE (header_phi, latch_e) != PHI_RESULT (phi)) return false; if (gimple_code (stmt) != GIMPLE_ASSIGN @@ -1888,7 +1889,7 @@ is_cond_scalar_reduction (basic_block bb, tree phi_res, gimple **reduc, tree arg continue; if (use_stmt == SSA_NAME_DEF_STMT (r_op1)) continue; - if (use_stmt != SSA_NAME_DEF_STMT (phi_res)) + if (use_stmt != phi) return false; } } @@ -2198,8 +2199,8 @@ commutative: and *RES to the new values if the factoring happened. Loops until all of the factoring is completed. */ -static bool -factor_out_operators (gimple_stmt_iterator *pgsi, tree *res, gimple_stmt_iterator *gsi, +static void +factor_out_operators (tree *res, gimple_stmt_iterator *gsi, tree *arg0, tree *arg1, gphi *phi) { gimple_match_op arg0_op, arg1_op; @@ -2207,28 +2208,28 @@ factor_out_operators (gimple_stmt_iterator *pgsi, tree *res, gimple_stmt_iterato again: if (TREE_CODE (*arg0) != SSA_NAME || TREE_CODE (*arg1) != SSA_NAME) - return repeated; + return; if (operand_equal_p (*arg0, *arg1)) - return repeated; + return; /* If either args have > 1 use, then this transformation actually increases the number of expressions evaluated at runtime. */ if (repeated ? (!has_zero_uses (*arg0) || !has_zero_uses (*arg1)) : (!has_single_use (*arg0) || !has_single_use (*arg1))) - return repeated; + return; gimple *arg0_def_stmt = SSA_NAME_DEF_STMT (*arg0); if (!gimple_extract_op (arg0_def_stmt, &arg0_op)) - return repeated; + return; /* At this point there should be no ssa names occuring in abnormals. */ gcc_assert (!arg0_op.operands_occurs_in_abnormal_phi ()); gimple *arg1_def_stmt = SSA_NAME_DEF_STMT (*arg1); if (!gimple_extract_op (arg1_def_stmt, &arg1_op)) - return repeated; + return; /* At this point there should be no ssa names occuring in abnormals. */ gcc_assert (!arg1_op.operands_occurs_in_abnormal_phi ()); @@ -2237,15 +2238,15 @@ again: or the number operands. */ if (arg1_op.code != arg0_op.code || arg1_op.num_ops != arg0_op.num_ops) - return repeated; + return; tree new_arg0, new_arg1; int opnum = find_different_opnum (arg0_op, arg1_op, &new_arg0, &new_arg1); if (opnum == -1) - return repeated; + return; if (!types_compatible_p (TREE_TYPE (new_arg0), TREE_TYPE (new_arg1))) - return repeated; + return; tree new_res = make_ssa_name (TREE_TYPE (new_arg0), NULL); /* Create the operation stmt if possible and insert it. */ @@ -2261,7 +2262,7 @@ again: if (!result) { release_ssa_name (new_res); - return repeated; + return; } gsi_insert_seq_before (gsi, seq, GSI_CONTINUE_LINKING); @@ -2276,10 +2277,6 @@ again: fprintf (dump_file, ".\n"); } - /* Remove the phi and move to the next phi arg if needed. */ - if (!repeated) - remove_phi_node (pgsi, false); - /* Remove the old operation(s) that has single use. */ gimple_stmt_iterator gsi_for_def; @@ -2294,6 +2291,13 @@ again: *arg0 = new_arg0; *arg1 = new_arg1; *res = new_res; + + /* Update the phi node too. */ + gimple_phi_set_result (phi, new_res); + gimple_phi_arg (phi, 0)->def = new_arg0; + gimple_phi_arg (phi, 0)->def = new_arg1; + update_stmt (phi); + repeated = true; goto again; } @@ -2403,9 +2407,8 @@ cmp_arg_entry (const void *p1, const void *p2, void * /* data. */) vectorization. */ -static bool -predicate_scalar_phi (gimple_stmt_iterator *phi_gsi, gphi *phi, - gimple_stmt_iterator *gsi, bool loop_versioned) +static void +predicate_scalar_phi (gphi *phi, gimple_stmt_iterator *gsi, bool loop_versioned) { gimple *new_stmt = NULL, *reduc, *nop_reduc; tree rhs, res, arg0, arg1, op0, op1, scev; @@ -2415,11 +2418,10 @@ predicate_scalar_phi (gimple_stmt_iterator *phi_gsi, gphi *phi, basic_block bb; unsigned int i; bool has_nop; - bool removed_phi = false; res = gimple_phi_result (phi); if (virtual_operand_p (res)) - return removed_phi; + return; if ((rhs = degenerate_phi_result (phi)) || ((scev = analyze_scalar_evolution (gimple_bb (phi)->loop_father, @@ -2436,7 +2438,7 @@ predicate_scalar_phi (gimple_stmt_iterator *phi_gsi, gphi *phi, new_stmt = gimple_build_assign (res, rhs); gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT); update_stmt (new_stmt); - return removed_phi; + return; } bb = gimple_bb (phi); @@ -2482,13 +2484,9 @@ predicate_scalar_phi (gimple_stmt_iterator *phi_gsi, gphi *phi, /* Factor out operand if possible. This can only be done easily for PHI with 2 elements. */ - if (factor_out_operators (phi_gsi, &res, gsi, &arg0, &arg1, phi)) - { - phi = nullptr; - removed_phi = true; - } + factor_out_operators (&res, gsi, &arg0, &arg1, phi); - if (is_cond_scalar_reduction (bb, res, &reduc, arg0, arg1, + if (is_cond_scalar_reduction (phi, &reduc, arg0, arg1, &op0, &op1, false, &has_nop, &nop_reduc)) { @@ -2517,7 +2515,7 @@ predicate_scalar_phi (gimple_stmt_iterator *phi_gsi, gphi *phi, fprintf (dump_file, "new phi replacement stmt\n"); print_gimple_stmt (dump_file, new_stmt, 0, TDF_SLIM); } - return removed_phi; + return; } /* Create hashmap for PHI node which contain vector of argument indexes @@ -2585,7 +2583,7 @@ predicate_scalar_phi (gimple_stmt_iterator *phi_gsi, gphi *phi, /* Gimplify the condition to a valid cond-expr conditonal operand. */ cond = force_gimple_operand_gsi (gsi, unshare_expr (cond), true, NULL_TREE, true, GSI_SAME_STMT); - if (!(is_cond_scalar_reduction (bb, res, &reduc, arg0 , arg1, + if (!(is_cond_scalar_reduction (phi, &reduc, arg0 , arg1, &op0, &op1, true, &has_nop, &nop_reduc))) rhs = fold_build_cond_expr (TREE_TYPE (res), unshare_expr (cond), swap ? arg1 : arg0, @@ -2615,7 +2613,6 @@ predicate_scalar_phi (gimple_stmt_iterator *phi_gsi, gphi *phi, fprintf (dump_file, "new extended phi replacement stmt\n"); print_gimple_stmt (dump_file, new_stmt, 0, TDF_SLIM); } - return removed_phi; } /* Replaces in LOOP all the scalar phi nodes other than those in the @@ -2652,8 +2649,8 @@ predicate_all_scalar_phis (class loop *loop, bool loop_versioned) gsi_next (&phi_gsi); else { - if (!predicate_scalar_phi (&phi_gsi, phi, &gsi, loop_versioned)) - remove_phi_node (&phi_gsi, false); + predicate_scalar_phi (phi, &gsi, loop_versioned); + remove_phi_node (&phi_gsi, false); } } } diff --git a/gcc/tree-ssa-reassoc.cc b/gcc/tree-ssa-reassoc.cc index 3c38f3d..c140f76 100644 --- a/gcc/tree-ssa-reassoc.cc +++ b/gcc/tree-ssa-reassoc.cc @@ -7167,9 +7167,10 @@ reassociate_bb (basic_block bb) /* If the target support FMA, rank_ops_for_fma will detect if the chain has fmas and rearrange the ops if so. */ - if (direct_internal_fn_supported_p (IFN_FMA, - TREE_TYPE (lhs), - opt_type) + if (!reassoc_insert_powi_p + && direct_internal_fn_supported_p (IFN_FMA, + TREE_TYPE (lhs), + opt_type) && (rhs_code == PLUS_EXPR || rhs_code == MINUS_EXPR)) { mult_num = rank_ops_for_fma (&ops); @@ -7200,7 +7201,8 @@ reassociate_bb (basic_block bb) to make sure the ones that get the double binary op are chosen wisely. */ int len = ops.length (); - if (len >= 3 + if (!reassoc_insert_powi_p + && len >= 3 && (!has_fma /* width > 1 means ranking ops results in better parallelism. Check current value to avoid diff --git a/gcc/tree-switch-conversion.cc b/gcc/tree-switch-conversion.cc index d088287..04b357f 100644 --- a/gcc/tree-switch-conversion.cc +++ b/gcc/tree-switch-conversion.cc @@ -55,6 +55,7 @@ Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA #include "hwint.h" #include "internal-fn.h" #include "diagnostic-core.h" +#include "output.h" /* ??? For lang_hooks.types.type_for_mode, but is there a word_mode type in the GIMPLE type system that is language-independent? */ @@ -1033,6 +1034,16 @@ switch_conversion::build_one_array (int num, tree arr_index_type, /* The decl is mergable since we don't take the address ever and just reading from it. */ DECL_MERGEABLE (decl) = 1; + + /* Increase the alignments as needed. */ + if (tree_to_uhwi (DECL_SIZE (decl)) > DECL_ALIGN (decl)) + { + unsigned HOST_WIDE_INT s = tree_to_uhwi (DECL_SIZE (decl)); + /* Only support up to the max supported for merging. */ + if (s <= MAX_ALIGN_MERGABLE) + SET_DECL_ALIGN (decl, HOST_WIDE_INT_1U << ceil_log2 (s)); + } + if (offloading_function_p (cfun->decl)) DECL_ATTRIBUTES (decl) = tree_cons (get_identifier ("omp declare target"), NULL_TREE, diff --git a/gcc/tree-vect-data-refs.cc b/gcc/tree-vect-data-refs.cc index 3bf2852..da700cd 100644 --- a/gcc/tree-vect-data-refs.cc +++ b/gcc/tree-vect-data-refs.cc @@ -2918,12 +2918,14 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo) 2) there is at least one unsupported misaligned data ref with an unknown misalignment, and 3) all misaligned data refs with a known misalignment are supported, and - 4) the number of runtime alignment checks is within reason. */ + 4) the number of runtime alignment checks is within reason. + 5) the vectorization factor is a constant. */ do_versioning = (optimize_loop_nest_for_speed_p (loop) && !loop->inner /* FORNOW */ - && loop_cost_model (loop) > VECT_COST_MODEL_CHEAP); + && loop_cost_model (loop) > VECT_COST_MODEL_CHEAP) + && LOOP_VINFO_VECT_FACTOR (loop_vinfo).is_constant (); if (do_versioning) { @@ -2964,17 +2966,6 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo) break; } - /* At present we don't support versioning for alignment - with variable VF, since there's no guarantee that the - VF is a power of two. We could relax this if we added - a way of enforcing a power-of-two size. */ - unsigned HOST_WIDE_INT size; - if (!GET_MODE_SIZE (TYPE_MODE (vectype)).is_constant (&size)) - { - do_versioning = false; - break; - } - /* Forcing alignment in the first iteration is no good if we don't keep it across iterations. For now, just disable versioning in this case. @@ -2993,7 +2984,8 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo) Construct the mask needed for this test. For example, GET_MODE_SIZE for the vector mode V4SI is 16 bytes so the mask must be 15 = 0xf. */ - int mask = size - 1; + gcc_assert (DR_TARGET_ALIGNMENT (dr_info).is_constant ()); + int mask = DR_TARGET_ALIGNMENT (dr_info).to_constant () - 1; /* FORNOW: use the same mask to test all potentially unaligned references in the loop. */ @@ -4542,7 +4534,6 @@ vect_describe_gather_scatter_call (stmt_vec_info stmt_info, (call, internal_fn_alias_ptr_index (info->ifn)); info->offset = gimple_call_arg (call, internal_fn_offset_index (info->ifn)); - info->offset_dt = vect_unknown_def_type; info->offset_vectype = NULL_TREE; info->scale = TREE_INT_CST_LOW (gimple_call_arg (call, internal_fn_scale_index (info->ifn))); @@ -4872,7 +4863,6 @@ vect_check_gather_scatter (stmt_vec_info stmt_info, loop_vec_info loop_vinfo, get_object_alignment (DR_REF (dr))); info->offset = off; - info->offset_dt = vect_unknown_def_type; info->offset_vectype = offset_vectype; info->scale = scale; info->element_type = TREE_TYPE (vectype); diff --git a/gcc/tree-vect-loop-manip.cc b/gcc/tree-vect-loop-manip.cc index 2d01a4b..7fcbc1a 100644 --- a/gcc/tree-vect-loop-manip.cc +++ b/gcc/tree-vect-loop-manip.cc @@ -3295,7 +3295,7 @@ vect_do_peeling (loop_vec_info loop_vinfo, tree niters, tree nitersm1, bool skip_vector = (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo) ? maybe_lt (LOOP_VINFO_INT_NITERS (loop_vinfo), bound_prolog + bound_epilog) - : (!LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo) + : (!LOOP_VINFO_USE_VERSIONING_WITHOUT_PEELING (loop_vinfo) || vect_epilogues)); /* Epilog loop must be executed if the number of iterations for epilog diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc index cb27d16..a9c7105 100644 --- a/gcc/tree-vect-slp.cc +++ b/gcc/tree-vect-slp.cc @@ -4950,6 +4950,9 @@ vect_analyze_slp (vec_info *vinfo, unsigned max_tree_size, max_tree_size, &limit, force_single_lane)) { + if (dump_enabled_p ()) + dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, + "SLP discovery of reduction chain failed\n"); /* Dissolve reduction chain group. */ stmt_vec_info vinfo = first_element; stmt_vec_info last = NULL; diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc index 9edc4a8..88a12a1 100644 --- a/gcc/tree-vect-stmts.cc +++ b/gcc/tree-vect-stmts.cc @@ -1676,7 +1676,6 @@ vect_truncate_gather_scatter_offset (stmt_vec_info stmt_info, tree vectype, get_object_alignment (DR_REF (dr))); gs_info->element_type = TREE_TYPE (vectype); gs_info->offset = fold_convert (offset_type, step); - gs_info->offset_dt = vect_constant_def; gs_info->scale = scale; gs_info->memory_type = memory_type; return true; @@ -1703,19 +1702,32 @@ static bool vect_use_strided_gather_scatters_p (stmt_vec_info stmt_info, tree vectype, loop_vec_info loop_vinfo, bool masked_p, gather_scatter_info *gs_info, - vec<int> *elsvals) + vec<int> *elsvals, + unsigned int group_size, + bool single_element_p) { if (!vect_check_gather_scatter (stmt_info, loop_vinfo, gs_info, elsvals) || gs_info->ifn == IFN_LAST) - return vect_truncate_gather_scatter_offset (stmt_info, vectype, loop_vinfo, - masked_p, gs_info, elsvals); + { + if (!vect_truncate_gather_scatter_offset (stmt_info, vectype, loop_vinfo, + masked_p, gs_info, elsvals)) + return false; + } + else + { + tree old_offset_type = TREE_TYPE (gs_info->offset); + tree new_offset_type = TREE_TYPE (gs_info->offset_vectype); - tree old_offset_type = TREE_TYPE (gs_info->offset); - tree new_offset_type = TREE_TYPE (gs_info->offset_vectype); + gcc_assert (TYPE_PRECISION (new_offset_type) + >= TYPE_PRECISION (old_offset_type)); + gs_info->offset = fold_convert (new_offset_type, gs_info->offset); + } - gcc_assert (TYPE_PRECISION (new_offset_type) - >= TYPE_PRECISION (old_offset_type)); - gs_info->offset = fold_convert (new_offset_type, gs_info->offset); + if (!single_element_p + && !targetm.vectorize.prefer_gather_scatter (TYPE_MODE (vectype), + gs_info->scale, + group_size)) + return false; if (dump_enabled_p ()) dump_printf_loc (MSG_NOTE, vect_location, @@ -1977,7 +1989,49 @@ get_group_load_store_type (vec_info *vinfo, stmt_vec_info stmt_info, separated by the stride, until we have a complete vector. Fall back to scalar accesses if that isn't possible. */ *memory_access_type = VMAT_STRIDED_SLP; - else if (!STMT_VINFO_GATHER_SCATTER_P (stmt_info)) + else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info)) + { + *memory_access_type = VMAT_GATHER_SCATTER; + if (!vect_check_gather_scatter (stmt_info, loop_vinfo, gs_info, + elsvals)) + gcc_unreachable (); + slp_tree offset_node = SLP_TREE_CHILDREN (slp_node)[0]; + tree offset_vectype = SLP_TREE_VECTYPE (offset_node); + gs_info->offset_vectype = offset_vectype; + /* When using internal functions, we rely on pattern recognition + to convert the type of the offset to the type that the target + requires, with the result being a call to an internal function. + If that failed for some reason (e.g. because another pattern + took priority), just handle cases in which the offset already + has the right type. */ + if (GATHER_SCATTER_IFN_P (*gs_info) + && !is_gimple_call (stmt_info->stmt) + && !tree_nop_conversion_p (TREE_TYPE (gs_info->offset), + offset_vectype)) + { + if (dump_enabled_p ()) + dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, + "%s offset requires a conversion\n", + vls_type == VLS_LOAD ? "gather" : "scatter"); + return false; + } + else if (GATHER_SCATTER_EMULATED_P (*gs_info)) + { + if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant () + || !TYPE_VECTOR_SUBPARTS (offset_vectype).is_constant () + || VECTOR_BOOLEAN_TYPE_P (offset_vectype) + || !constant_multiple_p (TYPE_VECTOR_SUBPARTS (offset_vectype), + TYPE_VECTOR_SUBPARTS (vectype))) + { + if (dump_enabled_p ()) + dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, + "unsupported vector types for emulated " + "gather.\n"); + return false; + } + } + } + else { int cmp = compare_step_with_zero (vinfo, stmt_info); if (cmp < 0) @@ -2221,64 +2275,12 @@ get_group_load_store_type (vec_info *vinfo, stmt_vec_info stmt_info, if ((*memory_access_type == VMAT_ELEMENTWISE || *memory_access_type == VMAT_STRIDED_SLP) && !STMT_VINFO_GATHER_SCATTER_P (stmt_info) - && single_element_p && SLP_TREE_LANES (slp_node) == 1 && loop_vinfo && vect_use_strided_gather_scatters_p (stmt_info, vectype, loop_vinfo, - masked_p, gs_info, elsvals)) + masked_p, gs_info, elsvals, + group_size, single_element_p)) *memory_access_type = VMAT_GATHER_SCATTER; - else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info)) - { - tree offset; - slp_tree offset_node; - *memory_access_type = VMAT_GATHER_SCATTER; - if (!vect_check_gather_scatter (stmt_info, loop_vinfo, gs_info, - elsvals)) - gcc_unreachable (); - /* When using internal functions, we rely on pattern recognition - to convert the type of the offset to the type that the target - requires, with the result being a call to an internal function. - If that failed for some reason (e.g. because another pattern - took priority), just handle cases in which the offset already - has the right type. */ - else if (GATHER_SCATTER_IFN_P (*gs_info) - && !is_gimple_call (stmt_info->stmt) - && !tree_nop_conversion_p (TREE_TYPE (gs_info->offset), - TREE_TYPE (gs_info->offset_vectype))) - { - if (dump_enabled_p ()) - dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, - "%s offset requires a conversion\n", - vls_type == VLS_LOAD ? "gather" : "scatter"); - return false; - } - else if (!vect_is_simple_use (vinfo, slp_node, 0, &offset, &offset_node, - &gs_info->offset_dt, - &gs_info->offset_vectype)) - { - if (dump_enabled_p ()) - dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, - "%s index use not simple.\n", - vls_type == VLS_LOAD ? "gather" : "scatter"); - return false; - } - else if (GATHER_SCATTER_EMULATED_P (*gs_info)) - { - if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant () - || !TYPE_VECTOR_SUBPARTS (gs_info->offset_vectype).is_constant () - || VECTOR_BOOLEAN_TYPE_P (gs_info->offset_vectype) - || !constant_multiple_p (TYPE_VECTOR_SUBPARTS - (gs_info->offset_vectype), - TYPE_VECTOR_SUBPARTS (vectype))) - { - if (dump_enabled_p ()) - dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, - "unsupported vector types for emulated " - "gather.\n"); - return false; - } - } - } if (*memory_access_type == VMAT_CONTIGUOUS_DOWN || *memory_access_type == VMAT_CONTIGUOUS_REVERSE) @@ -3312,7 +3314,13 @@ vectorizable_call (vec_info *vinfo, int mask_opno = -1; if (internal_fn_p (cfn)) - mask_opno = internal_fn_mask_index (as_internal_fn (cfn)); + { + /* We can only handle direct internal masked calls here, + vectorizable_simd_clone_call is for the rest. */ + if (cfn == CFN_MASK_CALL) + return false; + mask_opno = internal_fn_mask_index (as_internal_fn (cfn)); + } for (i = 0; i < nargs; i++) { @@ -10211,29 +10219,6 @@ vectorizable_load (vec_info *vinfo, tree bump; tree vec_offset = NULL_TREE; - if (STMT_VINFO_GATHER_SCATTER_P (stmt_info)) - { - aggr_type = NULL_TREE; - bump = NULL_TREE; - } - else if (memory_access_type == VMAT_GATHER_SCATTER) - { - aggr_type = elem_type; - if (!costing_p) - vect_get_strided_load_store_ops (stmt_info, vectype, loop_vinfo, - gsi, &gs_info, - &bump, &vec_offset, loop_lens); - } - else - { - if (memory_access_type == VMAT_LOAD_STORE_LANES) - aggr_type = build_array_type_nelts (elem_type, group_size * nunits); - else - aggr_type = vectype; - if (!costing_p) - bump = vect_get_data_ptr_increment (vinfo, gsi, dr_info, aggr_type, - memory_access_type, loop_lens); - } auto_vec<tree> vec_offsets; auto_vec<tree> vec_masks; @@ -10248,6 +10233,11 @@ vectorizable_load (vec_info *vinfo, gcc_assert (alignment_support_scheme == dr_aligned || alignment_support_scheme == dr_unaligned_supported); + aggr_type = build_array_type_nelts (elem_type, group_size * nunits); + if (!costing_p) + bump = vect_get_data_ptr_increment (vinfo, gsi, dr_info, aggr_type, + memory_access_type, loop_lens); + unsigned int inside_cost = 0, prologue_cost = 0; /* For costing some adjacent vector loads, we'd like to cost with the total number of them once instead of cost each one by one. */ @@ -10409,21 +10399,32 @@ vectorizable_load (vec_info *vinfo, { gcc_assert (!grouped_load && !slp_perm); - unsigned int inside_cost = 0, prologue_cost = 0; - /* 1. Create the vector or array pointer update chain. */ - if (!costing_p) + if (STMT_VINFO_GATHER_SCATTER_P (stmt_info)) { - if (STMT_VINFO_GATHER_SCATTER_P (stmt_info)) + aggr_type = NULL_TREE; + bump = NULL_TREE; + if (!costing_p) vect_get_gather_scatter_ops (loop, slp_node, &gs_info, &dataref_ptr, &vec_offsets); - else - dataref_ptr - = vect_create_data_ref_ptr (vinfo, first_stmt_info, aggr_type, - at_loop, offset, &dummy, gsi, - &ptr_incr, false, bump); + } + else + { + aggr_type = elem_type; + if (!costing_p) + { + vect_get_strided_load_store_ops (stmt_info, vectype, loop_vinfo, + gsi, &gs_info, + &bump, &vec_offset, loop_lens); + dataref_ptr + = vect_create_data_ref_ptr (vinfo, first_stmt_info, aggr_type, + at_loop, offset, &dummy, gsi, + &ptr_incr, false, bump); + } } + unsigned int inside_cost = 0, prologue_cost = 0; + gimple *new_stmt = NULL; for (i = 0; i < vec_num; i++) { @@ -10744,6 +10745,11 @@ vectorizable_load (vec_info *vinfo, return true; } + aggr_type = vectype; + if (!costing_p) + bump = vect_get_data_ptr_increment (vinfo, gsi, dr_info, aggr_type, + memory_access_type, loop_lens); + poly_uint64 group_elt = 0; unsigned int inside_cost = 0, prologue_cost = 0; /* For costing some adjacent vector loads, we'd like to cost with diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h index 4a1e4fc..e1900279 100644 --- a/gcc/tree-vectorizer.h +++ b/gcc/tree-vectorizer.h @@ -1197,6 +1197,10 @@ public: || LOOP_REQUIRES_VERSIONING_FOR_NITERS (L) \ || LOOP_REQUIRES_VERSIONING_FOR_SIMD_IF_COND (L)) +#define LOOP_VINFO_USE_VERSIONING_WITHOUT_PEELING(L) \ + ((L)->may_misalign_stmts.length () > 0 \ + && !LOOP_VINFO_ALLOW_MUTUAL_ALIGNMENT (L)) + #define LOOP_VINFO_NITERS_KNOWN_P(L) \ (tree_fits_shwi_p ((L)->num_iters) && tree_to_shwi ((L)->num_iters) > 0) @@ -1561,9 +1565,6 @@ struct gather_scatter_info { being added to the base. */ int scale; - /* The definition type for the vectorized offset. */ - enum vect_def_type offset_dt; - /* The type of the vectorized offset. */ tree offset_vectype; diff --git a/gcc/varasm.cc b/gcc/varasm.cc index 8266282..000ad9e 100644 --- a/gcc/varasm.cc +++ b/gcc/varasm.cc @@ -871,7 +871,7 @@ mergeable_string_section (tree decl ATTRIBUTE_UNUSED, if (HAVE_GAS_SHF_MERGE && flag_merge_constants && TREE_CODE (decl) == STRING_CST && TREE_CODE (TREE_TYPE (decl)) == ARRAY_TYPE - && align <= 256 + && align <= MAX_ALIGN_MERGABLE && (len = int_size_in_bytes (TREE_TYPE (decl))) > 0 && TREE_STRING_LENGTH (decl) == len) { @@ -885,7 +885,7 @@ mergeable_string_section (tree decl ATTRIBUTE_UNUSED, mode = SCALAR_INT_TYPE_MODE (TREE_TYPE (TREE_TYPE (decl))); modesize = GET_MODE_BITSIZE (mode); - if (modesize >= 8 && modesize <= 256 + if (modesize >= 8 && modesize <= MAX_ALIGN_MERGABLE && (modesize & (modesize - 1)) == 0) { if (align < modesize) @@ -919,16 +919,14 @@ mergeable_string_section (tree decl ATTRIBUTE_UNUSED, /* Return the section to use for constant merging. */ section * -mergeable_constant_section (machine_mode mode ATTRIBUTE_UNUSED, - unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED, - unsigned int flags ATTRIBUTE_UNUSED) +mergeable_constant_section (unsigned HOST_WIDE_INT size_bits, + unsigned HOST_WIDE_INT align, + unsigned int flags) { if (HAVE_GAS_SHF_MERGE && flag_merge_constants - && mode != VOIDmode - && mode != BLKmode - && known_le (GET_MODE_BITSIZE (mode), align) + && size_bits <= align && align >= 8 - && align <= 256 + && align <= MAX_ALIGN_MERGABLE && (align & (align - 1)) == 0) { const char *prefix = function_mergeable_rodata_prefix (); @@ -940,6 +938,38 @@ mergeable_constant_section (machine_mode mode ATTRIBUTE_UNUSED, } return readonly_data_section; } + + +/* Return the section to use for constant merging. Like the above + but the size stored as a tree. */ +static section * +mergeable_constant_section (tree size_bits, + unsigned HOST_WIDE_INT align, + unsigned int flags) +{ + if (!size_bits || !tree_fits_uhwi_p (size_bits)) + return readonly_data_section; + return mergeable_constant_section (tree_to_uhwi (size_bits), align, flags); +} + + +/* Return the section to use for constant merging. Like the above + but given a mode rather than the size. */ + +section * +mergeable_constant_section (machine_mode mode, + unsigned HOST_WIDE_INT align, + unsigned int flags) +{ + /* If the mode is unknown (BLK or VOID), then return a non mergable section. */ + if (mode == BLKmode || mode == VOIDmode) + return readonly_data_section; + unsigned HOST_WIDE_INT size; + if (!GET_MODE_BITSIZE (mode).is_constant (&size)) + return readonly_data_section; + return mergeable_constant_section (size, align, flags); +} + /* Given NAME, a putative register name, discard any customary prefixes. */ @@ -7453,7 +7483,7 @@ default_elf_select_section (tree decl, int reloc, case SECCAT_RODATA_MERGE_STR_INIT: return mergeable_string_section (DECL_INITIAL (decl), align, 0); case SECCAT_RODATA_MERGE_CONST: - return mergeable_constant_section (DECL_MODE (decl), align, 0); + return mergeable_constant_section (DECL_SIZE (decl), align, 0); case SECCAT_SRODATA: sname = ".sdata2"; break; @@ -2514,6 +2514,10 @@ public: return false; if (lhs.size () != rhs.size ()) return false; + /* Case where either is a NULL pointer and therefore, as both are valid, + both are empty slices with length 0. */ + if (lhs.size () == 0) + return true; return memcmp (lhs.begin (), rhs.begin (), lhs.size ()) == 0; } |