diff options
Diffstat (limited to 'gcc')
354 files changed, 8137 insertions, 1298 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 1672582..ee6f906 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,723 @@ +2025-08-01 Richard Biener <rguenther@suse.de> + + PR tree-optimization/121350 + * tree-vect-stmts.cc (vectorizable_store): Pass down SLP + node when costing scalar stores in vect_body. + +2025-08-01 Richard Biener <rguenther@suse.de> + + PR tree-optimization/121349 + * tree-vect-stmts.cc (check_load_store_for_partial_vectors): + Get full SLP mask, reduce to uniform scalar_mask for further + processing if possible. + (vect_check_scalar_mask): Remove scalar mask output, remove + code conditional on slp_mask. + (vectorizable_call): Adjust. + (check_scan_store): Get and check SLP mask. + (vectorizable_store): Eliminate scalar mask variable. + (vectorizable_load): Likewise. + +2025-08-01 Gerald Pfeifer <gerald@pfeifer.com> + + * doc/install.texi (Prerequisites): mdocml.bsd.lv is now + mandoc.bsd.lv. + +2025-08-01 Richard Biener <rguenther@suse.de> + + * tree-vect-stmts.cc (get_group_load_store_type): Remove, + inline into ... + (get_load_store_type): ... this. Remove ncopies parameter. + (vectorizable_load): Adjust. + (vectorizable_store): Likewise. + +2025-08-01 Richard Biener <rguenther@suse.de> + + * tree-vect-stmts.cc (get_group_load_store_type): Remove + checks performed at SLP build time. + (vect_check_store_rhs): Remove scalar RHS output. + (vectorizable_store): Remove uses of scalar RHS. + +2025-08-01 Richard Biener <rguenther@suse.de> + + * tree-vectorizer.h (VMAT_UNINITIALIZED): New + vect_memory_access_type. + * tree-vect-slp.cc (_slp_tree::_slp_tree): Use it. + +2025-08-01 Richard Biener <rguenther@suse.de> + + PR tree-optimization/121338 + * tree-ssa-loop-ivopts.cc (avg_loop_niter): Return an + unsigned. + (adjust_setup_cost): When niters is so large the division + result is one or zero avoid it. + (create_new_ivs): Adjust. + +2025-08-01 Richard Biener <rguenther@suse.de> + + * tree-vectorizer.h (vect_simd_clone_data): New. + (_slp_tree::simd_clone_info): Remove. + (SLP_TREE_SIMD_CLONE_INFO): Likewise. + * tree-vect-slp.cc (_slp_tree::_slp_tree): Adjust. + (_slp_tree::~_slp_tree): Likewise. + * tree-vect-stmts.cc (vectorizable_simd_clone_call): Use + tyupe specific data to store SLP_TREE_SIMD_CLONE_INFO. + +2025-08-01 Richard Biener <rguenther@suse.de> + + * tree-vect-slp.cc (_slp_tree::_slp_tree): Adjust. + (_slp_tree::~_slp_tree): Likewise. + * tree-vectorizer.h (vect_data): New base class. + (_slp_tree::u): Remove. + (_slp_tree::data): Add pointer to vect_data. + (_slp_tree::get_data): New helper template. + +2025-08-01 Jakub Jelinek <jakub@redhat.com> + + PR middle-end/121322 + * gimple-ssa-store-merging.cc (find_bswap_or_nop): Return NULL if + count is 0. + +2025-07-31 Georg-Johann Lay <avr@gjlay.de> + + * config/avr/avr.opt.urls (-mfuse-move2): Add url. + +2025-07-31 Georg-Johann Lay <avr@gjlay.de> + + * config/avr/avr.cc (avr_output_addr_vec) <labl>: Asm out its .type. + +2025-07-31 Georg-Johann Lay <avr@gjlay.de> + + PR rtl-optimization/121340 + * config/avr/avr.opt (-mfuse-move2): New option. + * config/avr/avr-passes.def (avr_pass_2moves): Insert after combine. + * config/avr/avr-passes.cc (make_avr_pass_2moves): New function. + (pass_data avr_pass_data_2moves): New static variable. + (avr_pass_2moves): New rtl_opt_pass. + * config/avr/avr-protos.h (make_avr_pass_2moves): New proto. + * common/config/avr/avr-common.cc + (default_options avr_option_optimization_table) <-mfuse-move2>: + Set for -O1 and higher. + * doc/invoke.texi (AVR Options) <-mfuse-move2>: Document. + +2025-07-31 Tamar Christina <tamar.christina@arm.com> + + PR tree-optimization/120805 + * tree-vect-loop-manip.cc (vect_gen_vector_loop_niters): Skip setting + bounds on epilogues. + +2025-07-31 Wilco Dijkstra <wilco.dijkstra@arm.com> + + * common/config/aarch64/cpuinfo.h: Remove unused features, add FEAT_CSSC + and FEAT_MOPS. + * config/aarch64/aarch64-option-extensions.def: Remove FMV support + for RPRES, use PULL rather than AES, add FMV support for CSSC and MOPS. + +2025-07-31 Wilco Dijkstra <wilco.dijkstra@arm.com> + + * config/aarch64/tuning_models/generic_armv9_a.h + (generic_armv9_a_addrcost_table): Use zero cost for himode. + +2025-07-31 Richard Biener <rguenther@suse.de> + + * tree-vect-stmts.cc (get_group_load_store_type): Properly + compare the scalar type of the gather/scatter offset to + the offset vector component type. + +2025-07-31 Richard Biener <rguenther@suse.de> + + * gimple-fold.h (fold_stmt_inplace): Add valueization hook + argument, defaulted to no_follow_ssa_edges. + * gimple-fold.cc (fold_stmt_inplace): Adjust. + +2025-07-31 Artemiy Granat <a.granat@ispras.ru> + + * config/i386/i386-options.cc (ix86_handle_cconv_attribute): + Fix typo. + +2025-07-31 Artemiy Granat <a.granat@ispras.ru> + + * config/i386/i386-options.cc (ix86_handle_cconv_attribute): + Handle simultaneous use of regparm and thiscall attributes in + case when regparm is set before thiscall. + +2025-07-31 Artemiy Granat <a.granat@ispras.ru> + + * config/i386/i386-options.cc (ix86_handle_cconv_attribute): + Fix comments which state that combination of stdcall and fastcall + attributes is valid but redundant. + +2025-07-31 Artemiy Granat <a.granat@ispras.ru> + + * config/i386/i386-options.cc (ix86_handle_cconv_attribute): + Move 64-bit mode check before regparm handling. + +2025-07-31 Richard Biener <rguenther@suse.de> + + PR tree-optimization/121320 + * tree-ssa-sccvn.cc (ao_ref_init_from_vn_reference): Convert + op->off to poly_offset_int before multiplying by + BITS_PER_UNIT. + +2025-07-31 Richard Biener <rguenther@suse.de> + + PR tree-optimization/121323 + * tree-ssa-alias.cc (ao_ref_init_from_ptr_and_range): Check + the pointer offset fits in a HWI when represented in bits. + +2025-07-31 Yury Khrustalev <yury.khrustalev@arm.com> + + * config/aarch64/aarch64.cc (build_ifunc_arg_type): + Add new fields _hwcap3 and _hwcap4. + +2025-07-31 Kishan Parmar <kishan@linux.ibm.com> + + PR target/118890 + * config/rs6000/rs6000.cc (can_be_rotated_to_negative_lis): Avoid left + shift of negative value and guard shift count. + (can_be_built_by_li_and_rldic): Likewise. + (rs6000_emit_set_long_const): Likewise. + * config/rs6000/rs6000.md (splitter for plus into two 16-bit parts): Fix + UB from overflow in addition. + +2025-07-31 Richard Biener <rguenther@suse.de> + + * config/aarch64/aarch64.cc (aarch64_detect_vector_stmt_subtype): + Check for node before dereferencing. + (aarch64_vector_costs::add_stmt_cost): Likewise. + +2025-07-31 Spencer Abson <spencer.abson@arm.com> + + PR target/121028 + * config/aarch64/aarch64-sme.md (aarch64_smstart_sm): Use the .inst + directive if !TARGET_SME. + (aarch64_smstop_sm): Likewise. + +2025-07-31 Richard Biener <rguenther@suse.de> + + * tree-vectorizer.h (_stmt_vec_info::memory_access_type): Remove. + (STMT_VINFO_MEMORY_ACCESS_TYPE): Likewise. + (vect_mem_access_type): Likewise. + * tree-vect-stmts.cc (vectorizable_store): Do not set + STMT_VINFO_MEMORY_ACCESS_TYPE. Fix SLP_TREE_MEMORY_ACCESS_TYPE + usage. + * tree-vect-loop.cc (update_epilogue_loop_vinfo): Remove + checking of memory access type. + * config/riscv/riscv-vector-costs.cc (costs::compute_local_live_ranges): + Use SLP_TREE_MEMORY_ACCESS_TYPE. + (costs::need_additional_vector_vars_p): Likewise. + (segment_loadstore_group_size): Get SLP node as argument, + use SLP_TREE_MEMORY_ACCESS_TYPE. + (costs::adjust_stmt_cost): Pass down SLP node. + * config/aarch64/aarch64.cc (aarch64_ld234_st234_vectors): Use + SLP_TREE_MEMORY_ACCESS_TYPE instead of vect_mem_access_type. + (aarch64_detect_vector_stmt_subtype): Likewise. + (aarch64_vector_costs::count_ops): Likewise. + (aarch64_vector_costs::add_stmt_cost): Likewise. + +2025-07-31 Richard Biener <rguenther@suse.de> + + * tree-vect-loop.cc (vect_transform_loop): Do not verify DRs + have not been modified for epilogue loops. + (update_epilogue_loop_vinfo): Do not copy modified DRs to + the originals. + +2025-07-31 Jakub Jelinek <jakub@redhat.com> + + PR tree-optimization/121264 + * machmode.h (get_best_mode): Change type of first 2 arguments + from int to HOST_WIDE_INT. + * stor-layout.cc (get_best_mode): Likewise. + +2025-07-31 Jakub Jelinek <jakub@redhat.com> + + * gimple-ssa-store-merging.cc (find_bswap_or_nop): Fix comment typos, + hanlde -> handle. + * config/i386/i386.cc (ix86_gimple_fold_builtin, ix86_rtx_costs): + Likewise. + * config/i386/i386-features.cc (remove_partial_avx_dependency): + Likewise. + +2025-07-31 Richard Biener <rguenther@suse.de> + + * tree-vect-stmts.cc (check_scan_store): Remove redundant + slp_node check. Disallow epilogue vectorization. + +2025-07-31 Richard Biener <rguenther@suse.de> + + * tree-vectorizer.h (vector_costs::costing_for_scalar): New + accessor. + (add_stmt_cost): For scalar costing force vectype to NULL. + Verify we do not pass in a SLP node. + +2025-07-31 Kito Cheng <kito.cheng@sifive.com> + + PR target/121312 + * config/riscv/arch-canonicalize: Add H extension to the + canonical order. + +2025-07-30 Andrew Pinski <quic_apinski@quicinc.com> + + PR rtl-optimization/121308 + * simplify-rtx.cc (simplify_context::simplify_subreg): Handle + subreg of `not` with word_mode to make it symmetric with the + other bitwise operators. + +2025-07-30 Andrew Pinski <quic_apinski@quicinc.com> + + PR tree-optimization/121236 + PR tree-optimization/121295 + * tree-if-conv.cc (factor_out_operators): Change the phi node + to the new result and args. + +2025-07-30 Andrew Pinski <quic_apinski@quicinc.com> + + Revert: + 2025-07-28 Andrew Pinski <quic_apinski@quicinc.com> + + PR tree-optimization/121236 + * tree-if-conv.cc (is_cond_scalar_reduction): Instead of phi argument, + pass bb and res of the phi. + (factor_out_operators): Add iterator for the phi. Remove the phi + if this is the first time. Return if we had removed the phi. + (predicate_scalar_phi): Add the phi iterator argument. + Update call to is_cond_scalar_reduction. + Update call to factor_out_operators and set the return value to true + when factor_out_operators returns true. + (predicate_all_scalar_phis): Don't remove the phi if predicate_scalar_phi + already removed it. + +2025-07-30 Jan Hubicka <jh@suse.cz> + + * auto-profile.cc (string_table::read): Check gcov_is_error. + (read_profile): Likewise. + * gcov-io.cc (gcov_is_error): Export for gcc linkage. + * gcov-io.h (gcov_is_error): Declare. + +2025-07-30 Richard Biener <rguenther@suse.de> + + * config/i386/i386.cc (ix86_default_vector_cost): Split + out from ... + (ix86_builtin_vectorization_cost): ... this and use + mode instead of vectype as argument. + (ix86_vector_costs::add_stmt_cost): Call + ix86_default_vector_cost instead of ix86_builtin_vectorization_cost. + +2025-07-30 Stefan Schulze Frielinghaus <stefansf@gcc.gnu.org> + + PR target/117015 + * config/s390/s390-protos.h (s390_expand_int_spaceship): New + function. + (s390_expand_fp_spaceship): New function. + * config/s390/s390.cc (s390_expand_int_spaceship): New function. + (s390_expand_fp_spaceship): New function. + * config/s390/s390.md (spaceship<mode>4): New expander. + +2025-07-30 Stefan Schulze Frielinghaus <stefansf@gcc.gnu.org> + + * cprop.cc (bypass_block): Extract single set. + (bypass_conditional_jumps): Ditto. + +2025-07-30 H.J. Lu <hjl.tools@gmail.com> + + PR target/120427 + * config/i386/i386.md (peephole2): Transform "movq $-1,reg" to + "pushq $-1; popq reg" for -Oz if reg is a legacy integer register. + +2025-07-30 Jan Hubicka <jh@suse.cz> + + * auto-profile.cc (function_instance::match): Disable warning + about bogus locations since dwarf does not represent enough + info to output them correctly in all cases. + (add_scale): Use nonzero_p instead of orig.force_nonzero () == orig. + (afdo_adjust_guessed_profile): Add missing newline in dump + file. + +2025-07-30 Jan Hubicka <jh@suse.cz> + + * symtab.cc (symbol_table::change_decl_assembler_name): Recompute DECL_RTL + in case it is already computed. + +2025-07-30 Jan Hubicka <jh@suse.cz> + + * predict.cc (unlikely_executed_edge_p): Ignore EDGE_EH if profile + is reliable. + (unlikely_executed_stmt_p): special case builtin_trap/unreachable and + ignore other heuristics for reliable profiles. + (tree_estimate_probability): Disable unlikely bb detection when + doing dry run + +2025-07-30 Andrew Stubbs <ams@baylibre.com> + Julian Brown <julian@codesourcery.com> + + * doc/tm.texi.in (TARGET_VECTORIZE_PREFER_GATHER_SCATTER): Add + documentation hook. + * doc/tm.texi: Regenerate. + * target.def (prefer_gather_scatter): Add target hook under vectorizer. + * hooks.cc (hook_bool_mode_int_unsigned_false): New function. + * hooks.h (hook_bool_mode_int_unsigned_false): New prototype. + * tree-vect-stmts.cc (vect_use_strided_gather_scatters_p): Add + parameters group_size and single_element_p, and rework to use + targetm.vectorize.prefer_gather_scatter. + (get_group_load_store_type): Move some of the condition into + vect_use_strided_gather_scatters_p. + * config/gcn/gcn.cc (gcn_prefer_gather_scatter): New function. + (TARGET_VECTORIZE_PREFER_GATHER_SCATTER): Define hook. + +2025-07-30 Andrew Stubbs <ams@baylibre.com> + + * config/gcn/gcn.cc (gcn_option_override): Add note to set default for + param_vect_partial_vector_usage to "1". + * optc-save-gen.awk: Don't pass through options marked "NoOffload". + * params.opt (-param=vect-epilogues-nomask): Add NoOffload. + (-param=vect-partial-vector-usage): Likewise. + (-param=vect-inner-loop-cost-factor): Likewise. + +2025-07-30 Richard Biener <rguenther@suse.de> + + PR tree-optimization/121130 + * tree-vect-stmts.cc (vectorizable_call): Bail out for + .MASK_CALL. + +2025-07-30 Pengfei Li <Pengfei.Li2@arm.com> + + PR tree-optimization/121020 + * tree-vect-loop-manip.cc (vect_do_peeling): Update the + condition of omitting the skip-vector check. + * tree-vectorizer.h (LOOP_VINFO_USE_VERSIONING_WITHOUT_PEELING): + Add a helper macro. + +2025-07-30 Pengfei Li <Pengfei.Li2@arm.com> + + PR tree-optimization/121190 + * tree-vect-data-refs.cc (vect_enhance_data_refs_alignment): + Increase alignment requirement for speculative loads. + +2025-07-30 Alfie Richards <alfie.richards@arm.com> + + PR target/121300 + * config/aarch64/aarch64-sve-builtins-sme.def (svamin/svamax): Fix + arch gating. + +2025-07-30 Richard Biener <rguenther@suse.de> + + * tree-vect-stmts.cc (get_group_load_store_type): + Process STMT_VINFO_GATHER_SCATTER before reading + memory_access_type. + +2025-07-30 Spencer Abson <spencer.abson@arm.com> + + * config/aarch64/aarch64-sve.md (@cond_<optab><mode>): Extend + to support partial FP modes. + (*cond_<optab><mode>_2_strict): Extend from SVE_FULL_F to SVE_F, + use aarch64_predicate_operand. + (*cond_<optab><mode>_4_strict): Extend from SVE_FULL_F_B16B16 to + SVE_F_B16B16, use aarch64_predicate_operand. + (*cond_<optab><mode>_any_strict): Likewise. + +2025-07-30 Spencer Abson <spencer.abson@arm.com> + + * config/aarch64/aarch64-sve.md (*cond_<optab><mode>_2_relaxed): + Extend from SVE_FULL_F to SVE_F. + (*cond_<optab><mode>_4_relaxed): Extend from SVE_FULL_F_B16B16 + to SVE_F_B16B16. + (*cond_<optab><mode>_any_relaxed): Likewise. + +2025-07-30 Spencer Abson <spencer.abson@arm.com> + + * config/aarch64/aarch64-sve.md (<optab><mode>4): Extend from + SVE_FULL_F_B16B16 to SVE_F_B16B16. Use aarch64_sve_fp_pred instead + of aarch64_ptrue_reg. + (@aarch64_pred_<optab><mode>): Extend from SVE_FULL_F_B16B16 to + SVE_F_B16B16. Use aarch64_predicate_operand. + +2025-07-30 liuhongt <hongtao.liu@intel.com> + + * config/i386/i386-modes.def: Remove VECTOR_MODES(FLOAT, 256) + and VECTOR_MODE (INT, SI, 64). + * config/i386/i386.cc (ix86_hard_regno_nregs): Remove related + code for V64SF/V64SImode. + +2025-07-30 liuhongt <hongtao.liu@intel.com> + + PR target/121274 + * config/i386/sse.md (*vec_concatv2di_0): Add a splitter + before it. + +2025-07-30 Andrew Pinski <quic_apinski@quicinc.com> + + PR rtl-optimization/121302 + * simplify-rtx.cc (simplify_context::simplify_subreg): Use + byte instead of 0 when calling simplify_subreg. + +2025-07-29 Spencer Abson <spencer.abson@arm.com> + + * config/aarch64/aarch64-protos.h (aarch64_sve_emit_masked_fp_pred): + Declare. + * config/aarch64/aarch64-sve.md (and<mode>3): Change this to... + (@and<mode>3): ...this, so that we can use gen_and3. + (@cond_<optab><mode>): Extend from SVE_FULL_F_B16B16 to SVE_F_B16B16, + use aarch64_predicate_operand. + (*cond_<optab><mode>_2_strict): Likewise. + (*cond_<optab><mode>_3_strict): Likewise. + (*cond_<optab><mode>_any_strict): Likwise. + (*cond_<optab><mode>_2_const_strict): Extend from SVE_FULL_F to SVE_F, + use aarch64_predicate_operand. + (*cond_<optab><mode>_any_const_strict): Likewise. + (*cond_sub<mode>_3_const_strict): Likwise. + (*cond_sub<mode>_const_strict): Likewise. + (*vcond_mask_<mode><vpred>): Use aarch64_predicate_operand, and update + the comment here. + * config/aarch64/aarch64.cc (aarch64_sve_emit_masked_fp_pred): New + function. Helper to mask the predicate in conditional expanders. + +2025-07-29 Dongyan Chen <chendongyan@isrc.iscas.ac.cn> + + * Makefile.in: Add riscv-mcpu.texi and riscv-mtune.texi to the list + of files to be processed by the Texinfo generator. + * config/riscv/t-riscv: Add rule for generating riscv-mcpu.texi + and riscv-mtune.texi. + * doc/invoke.texi: Replace hand‑written extension table with + `@include riscv-mcpu.texi` and `@include riscv-mtune.texi` to + pull in auto‑generated entries. + * config/riscv/gen-riscv-mcpu-texi.cc: New file. + * config/riscv/gen-riscv-mtune-texi.cc: New file. + * doc/riscv-mcpu.texi: New file. + * doc/riscv-mtune.texi: New file. + +2025-07-29 Richard Sandiford <richard.sandiford@arm.com> + + * simplify-rtx.cc (simplify_context::simplify_subreg): Distribute + lowpart subregs through AND/IOR/XOR, if doing so eliminates one + of the terms. + (test_scalar_int_ext_ops): Add some tests of the above for integers. + * config/aarch64/aarch64.cc (aarch64_test_sve_folding): Likewise + add tests for predicate modes. + +2025-07-29 Richard Sandiford <richard.sandiford@arm.com> + + * config/aarch64/aarch64-sve-builtins.cc + (function_expander::get_reg_target): Check whether the target + is a valid register_operand. + +2025-07-29 Konstantinos Eleftheriou <konstantinos.eleftheriou@vrull.eu> + + PR rtl-optimization/120660 + * avoid-store-forwarding.cc (process_store_forwarding): + Fix instruction generation when haveing multiple stores with + base offset. + +2025-07-29 Christoph Müllner <christoph.muellner@vrull.eu> + + * common/config/riscv/riscv-common.cc (riscv_ext_is_subset): + Remove use of structured binding to fix compiler warning. + +2025-07-29 Konstantinos Eleftheriou <konstantinos.eleftheriou@vrull.eu> + + PR rtl-optimization/119795 + * avoid-store-forwarding.cc + (store_forwarding_analyzer::avoid_store_forwarding): Skip + transformations for stores that operate on the same address + range as deleted ones. + +2025-07-29 Pan Li <pan2.li@intel.com> + + * match.pd: Add mul based unsigned SAT_MUL. + +2025-07-29 Richard Biener <rguenther@suse.de> + + PR tree-optimization/120687 + * tree-ssa-reassoc.cc (reassociate_bb): Do not disturb + the sorted operand order in the early pass. + * tree-vect-slp.cc (vect_analyze_slp): Dump when a detected + reduction chain fails SLP discovery. + +2025-07-29 Alfie Richards <alfie.richards@arm.com> + + PR middle-end/121261 + * vec.h: Add null ptr check. + +2025-07-29 Jakub Jelinek <jakub@redhat.com> + + PR middle-end/121159 + * calls.cc (can_implement_as_sibling_call_p): Don't reject declared + noreturn functions in musttail calls. + +2025-07-29 Andrew Pinski <quic_apinski@quicinc.com> + + * output.h (MAX_ALIGN_MERGABLE): New define. + * tree-switch-conversion.cc (switch_conversion::build_one_array): + Use MAX_ALIGN_MERGABLE instead of 256. + * varasm.cc (mergeable_string_section): Likewise + (mergeable_constant_section): Likewise + +2025-07-29 Andrew Pinski <quic_apinski@quicinc.com> + + PR middle-end/120523 + * output.h (mergeable_constant_section): New declaration taking + unsigned HOST_WIDE_INT for the size. + * tree-switch-conversion.cc (switch_conversion::build_one_array): + Increase the alignment of CSWTCH for sizes less than 32bytes. + * varasm.cc (mergeable_constant_section): Split out twice. + One that takes the size in unsigned HOST_WIDE_INT and the + other size in a tree. + (default_elf_select_section): Pass DECL_SIZE instead of + DECL_MODE to mergeable_constant_section. + +2025-07-29 Richard Biener <rguenther@suse.de> + + * tree-vect-stmts.cc (vectorizable_load): Un-factor VMAT + specific code to their handling blocks. + +2025-07-29 Richard Biener <rguenther@suse.de> + + * tree-vectorizer.h (gather_scatter_info::offset_dt): Remove. + * tree-vect-data-refs.cc (vect_describe_gather_scatter_call): + Do not set it. + (vect_check_gather_scatter): Likewise. + * tree-vect-stmts.cc (vect_truncate_gather_scatter_offset): + Likewise. + (get_group_load_store_type): Use the vector type of the offset + SLP child. Do not re-check vect_is_simple_use validated by + SLP build. + +2025-07-28 Georg-Johann Lay <avr@gjlay.de> + + PR target/121277 + * config/avr/avr.cc (avr_addr_space_convert): When converting + from generic AS to __flashx, don't set bit 23. + (avr_convert_to_type): Don't -Waddr-space-convert when NULL + is converted to __flashx or to __flash. + +2025-07-28 Andrew Pinski <quic_apinski@quicinc.com> + + PR tree-optimization/121236 + * tree-if-conv.cc (is_cond_scalar_reduction): Instead of phi argument, + pass bb and res of the phi. + (factor_out_operators): Add iterator for the phi. Remove the phi + if this is the first time. Return if we had removed the phi. + (predicate_scalar_phi): Add the phi iterator argument. + Update call to is_cond_scalar_reduction. + Update call to factor_out_operators and set the return value to true + when factor_out_operators returns true. + (predicate_all_scalar_phis): Don't remove the phi if predicate_scalar_phi + already removed it. + +2025-07-28 H.J. Lu <hjl.tools@gmail.com> + + PR target/121208 + * config/i386/i386.cc (ix86_tls_get_addr): Issue an error for + -mtls-dialect=gnu with no_caller_saved_registers attribute and + suggest -mtls-dialect=gnu2. + +2025-07-28 Mikael Pettersson <mikpelinux@gmail.com> + + PR other/121260 + * diagnostics/changes.cc: Correct nesting of namespaces + and #if CHECKING_P blocks. + * diagnostics/context.cc: Likewise. + * diagnostics/html-sink.cc: Likewise. + * diagnostics/output-spec.cc: Likewise. + * diagnostics/sarif-sink.cc: Likewise. + +2025-07-28 Tobias Burnus <tburnus@baylibre.com> + + * config/nvptx/nvptx.opt (march-map=): Add sm_100{,f,a}, + sm_101{,f,a}, sm_103{,a,f}, sm_120{,a,f} and sm_121{,f,a}. + +2025-07-28 Tobias Burnus <tburnus@baylibre.com> + + * config/gcn/gcn.md (atomic_load, atomic_store, atomic_exchange): + Fix CDNA3 L2 cache write-back before atomic instructions. + +2025-07-28 Richard Biener <rguenther@suse.de> + + * tree-vect-stmts.cc (check_load_store_for_partial_vectors): + Make *gs_info const. + (vect_build_one_gather_load_call): Likewise. + (vect_build_one_scatter_store_call): Likewise. + (vect_get_gather_scatter_ops): Likewise. + (vect_get_strided_load_store_ops): Likewise. + +2025-07-28 Tobias Burnus <tburnus@baylibre.com> + + * config/gcn/gcn.md (define_attr "vcmp"): Add with values + vcmp/vcmpx/no. + (*movbi, cstoredi4.., cstore<mode>4): Set it. + * config/gcn/gcn-valu.md (vec_cmp<mode>...): Likewise. + * config/gcn/gcn.cc (gcn_cmpx_insn_p): Remove. + (gcn_md_reorg): Add two new conditions for MI300. + +2025-07-28 Tobias Burnus <tburnus@baylibre.com> + + * config/gcn/gcn-opts.h (enum hsaco_attr_type): Add comment + about 'sc0'. + * config/gcn/gcn.cc (gcn_md_reorg): Use gen_nops instead of gen_nop. + (print_operand_address): Document 'R' and 'V' in the + pre-function comment as well. + * config/gcn/gcn.md (nops): Add. + +2025-07-28 Richard Biener <rguenther@suse.de> + + PR tree-optimization/121256 + * tree-vect-loop.cc (vectorizable_recurr): Build a correct + initialization vector for SLP_TREE_LANES > 1. + +2025-07-28 Richard Biener <rguenther@suse.de> + + * tree-vectorizer.h (_slp_tree::type): Add. + (_slp_tree::u): Likewise. + (_stmt_vec_info::type): Remove. + (STMT_VINFO_TYPE): Likewise. + (SLP_TREE_TYPE): New. + * tree-vectorizer.cc (vec_info::new_stmt_vec_info): Do not + initialize type. + * tree-vect-slp.cc (_slp_tree::_slp_tree): Initialize type. + (vect_slp_analyze_node_operations): Adjust. + (vect_schedule_slp_node): Likewise. + * tree-vect-patterns.cc (vect_init_pattern_stmt): Do not + copy STMT_VINFO_TYPE. + * tree-vect-loop.cc: Set SLP_TREE_TYPE instead of + STMT_VINFO_TYPE everywhere. + (vect_create_loop_vinfo): Do not set STMT_VINFO_TYPE on + loop conditions. + * tree-vect-stmts.cc: Set SLP_TREE_TYPE instead of + STMT_VINFO_TYPE everywhere. + (vect_analyze_stmt): Adjust. + (vect_transform_stmt): Likewise. + * config/aarch64/aarch64.cc (aarch64_vector_costs::count_ops): + Access SLP_TREE_TYPE instead of STMT_VINFO_TYPE. + * config/i386/i386.cc (ix86_vector_costs::add_stmt_cost): + Remove non-SLP element-wise load/store matching. + * config/rs6000/rs6000.cc + (rs6000_cost_data::update_target_cost_per_stmt): Pass in + the SLP node. Use that to get at the memory access + kind and type. + (rs6000_cost_data::add_stmt_cost): Pass down SLP node. + * config/riscv/riscv-vector-costs.cc (variable_vectorized_p): + Use SLP_TREE_TYPE. + (costs::need_additional_vector_vars_p): Likewise. + (costs::update_local_live_ranges): Likewise. + +2025-07-28 Jennifer Schmitz <jschmitz@nvidia.com> + Dhruv Chawla <dhruvc@nvidia.com> + + * config/aarch64/aarch64-cores.def (olympus): Use olympus tuning + model. + * config/aarch64/aarch64.cc: Include olympus.h. + * config/aarch64/tuning_models/olympus.h: New file. + +2025-07-28 Lulu Cheng <chenglulu@loongson.cn> + + * config/loongarch/loongarch.h + (CASE_VECTOR_SHORTEN_MODE): Delete. + +2025-07-28 Takayuki 'January June' Suwa <jjsuwa_sys3175@yahoo.co.jp> + + * config/xtensa/xtensa.cc (xtensa_is_insn_L32R_p): + Re-rewrite to more accurately capture insns that could be L32R machine + instructions wherever possible, and add comments that help understand + the intent of the process. + 2025-07-27 Pan Li <pan2.li@intel.com> * config/riscv/riscv-v.cc (expand_vx_binary_vxrm_vec_vec_dup): diff --git a/gcc/DATESTAMP b/gcc/DATESTAMP index 8ad65ad..cb7ccb3 100644 --- a/gcc/DATESTAMP +++ b/gcc/DATESTAMP @@ -1 +1 @@ -20250728 +20250802 diff --git a/gcc/Makefile.in b/gcc/Makefile.in index 7314a3b..d7d5cbe 100644 --- a/gcc/Makefile.in +++ b/gcc/Makefile.in @@ -3720,7 +3720,7 @@ TEXI_GCC_FILES = gcc.texi gcc-common.texi gcc-vers.texi frontends.texi \ contribute.texi compat.texi funding.texi gnu.texi gpl_v3.texi \ fdl.texi contrib.texi cppenv.texi cppopts.texi avr-mmcu.texi \ implement-c.texi implement-cxx.texi gcov-tool.texi gcov-dump.texi \ - lto-dump.texi riscv-ext.texi + lto-dump.texi riscv-ext.texi riscv-mcpu.texi riscv-mtune.texi # we explicitly use $(srcdir)/doc/tm.texi here to avoid confusion with # the generated tm.texi; the latter might have a more recent timestamp, diff --git a/gcc/ada/ChangeLog b/gcc/ada/ChangeLog index 6f6a782..85464e3 100644 --- a/gcc/ada/ChangeLog +++ b/gcc/ada/ChangeLog @@ -1,3 +1,206 @@ +2025-07-31 Eric Botcazou <ebotcazou@gcc.gnu.org> + + Revert: + 2025-07-31 Nicolas Boulenguez <nicolas@debian.org> + + PR ada/114065 + * Makefile.rtl (GNATRTL_NONTASKING_OBJS): Add g-c_time$(objext) and + s-c_time$(objext). + (Aarch64/Android): Do not use s-osinte__android.adb. + (SPARC/Solaris): Do not use s-osprim__solaris.adb. + (x86/Solaris): Likewise. + (LynxOS178): Do not use s-parame__posix2008.ads. + (RTEMS): Likewise. + (x32/Linux): Likewise, as well as s-linux__x32.ads. Replace + s-osprim__x32.adb with s-osprim__posix.adb. + (LIBGNAT_OBJS): Remove cal.o. + * cal.c: Delete. + * doc/gnat_rm/the_gnat_library.rst (GNAT.C_Time): New entry. + (GNAT.Calendar): Do not mention the obsolete conversion functions. + * impunit.adb (Non_Imp_File_Names_95): Add g-c_time. + * libgnarl/a-exetim__posix.adb: Add with clause for System.C_Time + (Clock): Use type and functions from System.C_Time. + * libgnarl/s-linux.ads: Remove with clause for System.Parameters. + Remove declarations of C time types. + * libgnarl/s-linux__alpha.ads: Likewise. + * libgnarl/s-linux__android-aarch64.ads: Likewise. + * libgnarl/s-linux__android-arm.ads: Likewise. + * libgnarl/s-linux__hppa.ads: Likewise. + * libgnarl/s-linux__loongarch.ads: Likewise. + * libgnarl/s-linux__mips.ads: Likewise. + * libgnarl/s-linux__riscv.ads: Likewise. + * libgnarl/s-linux__sparc.ads: Likewise. + * libgnarl/s-osinte__aix.ads: Likewise. + * libgnarl/s-osinte__android.ads: Likewise. + * libgnarl/s-osinte__cheribsd.ads: Likewise. + * libgnarl/s-osinte__darwin.ads: Likewise. + * libgnarl/s-osinte__dragonfly.ads: Likewise. + * libgnarl/s-osinte__freebsd.ads: Likewise. + * libgnarl/s-osinte__gnu.ads: Likewise. + * libgnarl/s-osinte__hpux.ads: Likewise. + * libgnarl/s-osinte__kfreebsd-gnu.ads: Likewise. + * libgnarl/s-osinte__linux.ads: Likewise. + * libgnarl/s-osinte__lynxos178e.ads: Likewise. + * libgnarl/s-osinte__qnx.ads: Likewise. + * libgnarl/s-osinte__rtems.ads: Likewise. + * libgnarl/s-osinte__solaris.ads: Likewise. + * libgnarl/s-osinte__vxworks.ads: Likewise. + * libgnarl/s-qnx.ads: Likewise. + * libgnarl/s-linux__x32.ads: Delete. + * libgnarl/s-osinte__darwin.adb (To_Duration): Remove. + (To_Timespec): Likewise. + * libgnarl/s-osinte__aix.adb: Likewise. + * libgnarl/s-osinte__dragonfly.adb: Likewise. + * libgnarl/s-osinte__freebsd.adb: Likewise. + * libgnarl/s-osinte__gnu.adb: Likewise. + * libgnarl/s-osinte__lynxos178.adb: Likewise. + * libgnarl/s-osinte__posix.adb: Likewise. + * libgnarl/s-osinte__qnx.adb: Likewise. + * libgnarl/s-osinte__rtems.adb: Likewise. + * libgnarl/s-osinte__solaris.adb: Likewise. + * libgnarl/s-osinte__vxworks.adb: Likewise. + * libgnarl/s-osinte__x32.adb: Likewise. + * libgnarl/s-taprop__solaris.adb: Add with clause for System.C_Time. + (Monotonic_Clock): Use type and functions from System.C_Time. + (RT_Resolution): Likewise. + (Timed_Sleep): Likewise. + (Timed_Delay): Likewise. + * libgnarl/s-taprop__vxworks.adb: Likewise. + * libgnarl/s-tpopmo.adb: Likewise. + * libgnarl/s-osinte__android.adb: Delete. + * libgnat/g-c_time.ads: New file. + * libgnat/g-calend.adb: Delegate to System.C_Time. + * libgnat/g-calend.ads: Likewise. + * libgnat/g-socket.adb: Likewise. + * libgnat/g-socthi.adb: Likewise. + * libgnat/g-socthi__vxworks.adb: Likewise. + * libgnat/g-sothco.ads: Likewise. + * libgnat/g-spogwa.adb: Likewise. + * libgnat/s-c_time.adb: New file. + * libgnat/s-c_time.ads: Likewise. + * libgnat/s-optide.adb: Import nanosleep here. + * libgnat/s-os_lib.ads (time_t): Remove. + (To_Ada): Adjust. + (To_C): Likewise. + * libgnat/s-os_lib.adb: Likewise. + * libgnat/s-osprim__darwin.adb: Delegate to System.C_Time. + * libgnat/s-osprim__posix.adb: Likewise. + * libgnat/s-osprim__posix2008.adb: Likewise. + * libgnat/s-osprim__rtems.adb: Likewise. + * libgnat/s-osprim__unix.adb: Likewise. + * libgnat/s-osprim__solaris.adb: Delete. + * libgnat/s-osprim__x32.adb: Likewise. + * libgnat/s-parame.ads (time_t_bits): Remove. + * libgnat/s-parame__hpux.ads: Likewise. + * libgnat/s-parame__vxworks.ads: Likewise. + * libgnat/s-parame__posix2008.ads: Delete. + * s-oscons-tmplt.c (SIZEOF_tv_nsec): New constant. + +2025-07-31 Eric Botcazou <ebotcazou@gcc.gnu.org> + + PR ada/120440 + * gcc-interface/Makefile.in (GNATLINK_OBJS): Add s-excmac.o. + (GNATMAKE_OBJS): Likewise. + +2025-07-31 Nicolas Boulenguez <nicolas@debian.org> + + PR ada/114065 + * Makefile.rtl (GNATRTL_NONTASKING_OBJS): Add g-c_time$(objext) and + s-c_time$(objext). + (Aarch64/Android): Do not use s-osinte__android.adb. + (SPARC/Solaris): Do not use s-osprim__solaris.adb. + (x86/Solaris): Likewise. + (LynxOS178): Do not use s-parame__posix2008.ads. + (RTEMS): Likewise. + (x32/Linux): Likewise, as well as s-linux__x32.ads. Replace + s-osprim__x32.adb with s-osprim__posix.adb. + (LIBGNAT_OBJS): Remove cal.o. + * cal.c: Delete. + * doc/gnat_rm/the_gnat_library.rst (GNAT.C_Time): New entry. + (GNAT.Calendar): Do not mention the obsolete conversion functions. + * impunit.adb (Non_Imp_File_Names_95): Add g-c_time. + * libgnarl/a-exetim__posix.adb: Add with clause for System.C_Time + (Clock): Use type and functions from System.C_Time. + * libgnarl/s-linux.ads: Remove with clause for System.Parameters. + Remove declarations of C time types. + * libgnarl/s-linux__alpha.ads: Likewise. + * libgnarl/s-linux__android-aarch64.ads: Likewise. + * libgnarl/s-linux__android-arm.ads: Likewise. + * libgnarl/s-linux__hppa.ads: Likewise. + * libgnarl/s-linux__loongarch.ads: Likewise. + * libgnarl/s-linux__mips.ads: Likewise. + * libgnarl/s-linux__riscv.ads: Likewise. + * libgnarl/s-linux__sparc.ads: Likewise. + * libgnarl/s-osinte__aix.ads: Likewise. + * libgnarl/s-osinte__android.ads: Likewise. + * libgnarl/s-osinte__cheribsd.ads: Likewise. + * libgnarl/s-osinte__darwin.ads: Likewise. + * libgnarl/s-osinte__dragonfly.ads: Likewise. + * libgnarl/s-osinte__freebsd.ads: Likewise. + * libgnarl/s-osinte__gnu.ads: Likewise. + * libgnarl/s-osinte__hpux.ads: Likewise. + * libgnarl/s-osinte__kfreebsd-gnu.ads: Likewise. + * libgnarl/s-osinte__linux.ads: Likewise. + * libgnarl/s-osinte__lynxos178e.ads: Likewise. + * libgnarl/s-osinte__qnx.ads: Likewise. + * libgnarl/s-osinte__rtems.ads: Likewise. + * libgnarl/s-osinte__solaris.ads: Likewise. + * libgnarl/s-osinte__vxworks.ads: Likewise. + * libgnarl/s-qnx.ads: Likewise. + * libgnarl/s-linux__x32.ads: Delete. + * libgnarl/s-osinte__darwin.adb (To_Duration): Remove. + (To_Timespec): Likewise. + * libgnarl/s-osinte__aix.adb: Likewise. + * libgnarl/s-osinte__dragonfly.adb: Likewise. + * libgnarl/s-osinte__freebsd.adb: Likewise. + * libgnarl/s-osinte__gnu.adb: Likewise. + * libgnarl/s-osinte__lynxos178.adb: Likewise. + * libgnarl/s-osinte__posix.adb: Likewise. + * libgnarl/s-osinte__qnx.adb: Likewise. + * libgnarl/s-osinte__rtems.adb: Likewise. + * libgnarl/s-osinte__solaris.adb: Likewise. + * libgnarl/s-osinte__vxworks.adb: Likewise. + * libgnarl/s-osinte__x32.adb: Likewise. + * libgnarl/s-taprop__solaris.adb: Add with clause for System.C_Time. + (Monotonic_Clock): Use type and functions from System.C_Time. + (RT_Resolution): Likewise. + (Timed_Sleep): Likewise. + (Timed_Delay): Likewise. + * libgnarl/s-taprop__vxworks.adb: Likewise. + * libgnarl/s-tpopmo.adb: Likewise. + * libgnarl/s-osinte__android.adb: Delete. + * libgnat/g-c_time.ads: New file. + * libgnat/g-calend.adb: Delegate to System.C_Time. + * libgnat/g-calend.ads: Likewise. + * libgnat/g-socket.adb: Likewise. + * libgnat/g-socthi.adb: Likewise. + * libgnat/g-socthi__vxworks.adb: Likewise. + * libgnat/g-sothco.ads: Likewise. + * libgnat/g-spogwa.adb: Likewise. + * libgnat/s-c_time.adb: New file. + * libgnat/s-c_time.ads: Likewise. + * libgnat/s-optide.adb: Import nanosleep here. + * libgnat/s-os_lib.ads (time_t): Remove. + (To_Ada): Adjust. + (To_C): Likewise. + * libgnat/s-os_lib.adb: Likewise. + * libgnat/s-osprim__darwin.adb: Delegate to System.C_Time. + * libgnat/s-osprim__posix.adb: Likewise. + * libgnat/s-osprim__posix2008.adb: Likewise. + * libgnat/s-osprim__rtems.adb: Likewise. + * libgnat/s-osprim__unix.adb: Likewise. + * libgnat/s-osprim__solaris.adb: Delete. + * libgnat/s-osprim__x32.adb: Likewise. + * libgnat/s-parame.ads (time_t_bits): Remove. + * libgnat/s-parame__hpux.ads: Likewise. + * libgnat/s-parame__vxworks.ads: Likewise. + * libgnat/s-parame__posix2008.ads: Delete. + * s-oscons-tmplt.c (SIZEOF_tv_nsec): New constant. + +2025-07-28 Marc Poulhiès <poulhies@adacore.com> + + * gcc-interface/trans.cc (gnat_to_gnu): Fix typo in comment. + 2025-07-25 David Malcolm <dmalcolm@redhat.com> * gcc-interface/misc.cc: Make diff --git a/gcc/ada/gcc-interface/Makefile.in b/gcc/ada/gcc-interface/Makefile.in index 8615b59..d456ac1 100644 --- a/gcc/ada/gcc-interface/Makefile.in +++ b/gcc/ada/gcc-interface/Makefile.in @@ -314,16 +314,16 @@ Makefile: ../config.status $(srcdir)/ada/gcc-interface/Makefile.in $(srcdir)/ada GNATLINK_OBJS = gnatlink.o \ a-except.o ali.o alloc.o butil.o casing.o csets.o debug.o fmap.o fname.o \ gnatvsn.o hostparm.o indepsw.o interfac.o i-c.o i-cstrin.o namet.o opt.o \ - osint.o output.o rident.o s-exctab.o s-secsta.o s-stalib.o s-stoele.o \ - sdefault.o snames.o stylesw.o switch.o system.o table.o targparm.o \ - types.o validsw.o widechar.o + osint.o output.o rident.o s-excmac.o s-exctab.o s-secsta.o s-stalib.o \ + s-stoele.o sdefault.o snames.o stylesw.o switch.o system.o table.o \ + targparm.o types.o validsw.o widechar.o GNATMAKE_OBJS = a-except.o ali.o ali-util.o aspects.o s-casuti.o alloc.o \ atree.o binderr.o butil.o casing.o csets.o debug.o elists.o einfo.o errout.o \ erroutc.o errutil.o err_vars.o fmap.o fname.o fname-uf.o fname-sf.o \ gnatmake.o gnatvsn.o hostparm.o interfac.o i-c.o i-cstrin.o krunch.o lib.o \ make.o makeusg.o make_util.o namet.o nlists.o opt.o osint.o osint-m.o \ - output.o restrict.o rident.o s-exctab.o s-cautns.o \ + output.o restrict.o rident.o s-cautns.o s-excmac.o s-exctab.o \ s-secsta.o s-stalib.o s-stoele.o scans.o scng.o sdefault.o sfn_scan.o \ s-purexc.o s-htable.o scil_ll.o sem_aux.o sinfo.o sinput.o sinput-c.o \ snames.o stand.o stringt.o styleg.o stylesw.o system.o validsw.o \ diff --git a/gcc/auto-profile.cc b/gcc/auto-profile.cc index d1954b4..7ff9526 100644 --- a/gcc/auto-profile.cc +++ b/gcc/auto-profile.cc @@ -885,6 +885,8 @@ string_table::read () { vector_.quick_push (xstrdup (gcov_read_string ())); map_[vector_.last ()] = i; + if (gcov_is_error ()) + return false; } return true; } @@ -1629,7 +1631,13 @@ function_instance::match (cgraph_node *node, if (iter->first != end_location && iter->first != start_location && (iter->first & 65535) != zero_location - && iter->first) + && iter->first + /* FIXME: dwarf5 does not represent inline stack of debug + statements and consequently create_gcov is sometimes + mixing up statements from other functions. Do not warn + user about this until this problem is solved. + We still write info into dump file. */ + && 0) { if (!warned) warned = warning_at (DECL_SOURCE_LOCATION (node->decl), @@ -2741,14 +2749,22 @@ read_profile (void) /* autofdo_source_profile. */ afdo_source_profile = autofdo_source_profile::create (); - if (afdo_source_profile == NULL) + if (afdo_source_profile == NULL + || gcov_is_error ()) { error ("cannot read function profile from %s", auto_profile_file); + delete afdo_source_profile; + afdo_source_profile = NULL; return; } /* autofdo_module_profile. */ fake_read_autofdo_module_profile (); + if (gcov_is_error ()) + { + error ("cannot read module profile from %s", auto_profile_file); + return; + } } /* From AutoFDO profiles, find values inside STMT for that we want to measure @@ -3425,7 +3441,7 @@ add_scale (vec <scale> *scales, profile_count annotated, profile_count orig) annotated.dump (dump_file); fprintf (dump_file, "\n"); } - if (orig.force_nonzero () == orig) + if (orig.nonzero_p ()) { sreal scale = annotated.guessed_local () @@ -3672,7 +3688,7 @@ afdo_adjust_guessed_profile (bb_set *annotated_bb) { if (dump_file) fprintf (dump_file, - " Can not determine count from the boundary; giving up"); + " Can not determine count from the boundary; giving up\n"); continue; } gcc_checking_assert (scales.length ()); diff --git a/gcc/avoid-store-forwarding.cc b/gcc/avoid-store-forwarding.cc index 785efd2..1de6fd6 100644 --- a/gcc/avoid-store-forwarding.cc +++ b/gcc/avoid-store-forwarding.cc @@ -231,20 +231,39 @@ process_store_forwarding (vec<store_fwd_info> &stores, rtx_insn *load_insn, int move_to_front = -1; int total_cost = 0; + int base_offset_index = -1; + + /* Find the last store that has the same offset the load, in the case that + we're eliminating the load. We will try to use it as a base register + to avoid bit inserts (see second loop below). We want the last one, as + it will be wider and we don't want to overwrite the base register if + there are many of them. */ + if (load_elim) + { + FOR_EACH_VEC_ELT_REVERSE (stores, i, it) + { + const bool has_base_offset + = known_eq (poly_uint64 (it->offset), + subreg_size_lowpart_offset (MEM_SIZE (it->store_mem), + load_size)); + if (has_base_offset) + { + base_offset_index = i; + break; + } + } + } /* Check if we can emit bit insert instructions for all forwarded stores. */ FOR_EACH_VEC_ELT (stores, i, it) { it->mov_reg = gen_reg_rtx (GET_MODE (it->store_mem)); rtx_insn *insns = NULL; - const bool has_base_offset - = known_eq (poly_uint64 (it->offset), - subreg_size_lowpart_offset (MEM_SIZE (it->store_mem), - load_size)); - - /* If we're eliminating the load then find the store with zero offset - and use it as the base register to avoid a bit insert if possible. */ - if (load_elim && has_base_offset) + + /* Check if this is a store with base offset, if we're eliminating the + load, and use it as the base register to avoid a bit insert if + possible. Load elimination is implied by base_offset_index != -1. */ + if (i == (unsigned) base_offset_index) { start_sequence (); @@ -437,9 +456,22 @@ store_forwarding_analyzer::avoid_store_forwarding (basic_block bb) return; auto_vec<store_fwd_info, 8> store_exprs; + auto_vec<rtx> store_exprs_del; rtx_insn *insn; unsigned int insn_cnt = 0; + /* We are iterating over the basic block's instructions detecting store + instructions. Upon reaching a load instruction, we check if any of the + previously detected stores could result in store forwarding. In that + case, we try to reorder the load and store instructions. + We skip this transformation when we encounter complex memory operations, + instructions that might throw an exception, instruction dependencies, + etc. This is done by clearing the vector of detected stores, while + keeping the removed stores in another vector. By doing so, we can check + if any of the removed stores operated on the load's address range, when + reaching a subsequent store that operates on the same address range, + as this would lead to incorrect values on the register that keeps the + loaded value. */ FOR_BB_INSNS (bb, insn) { if (!NONDEBUG_INSN_P (insn)) @@ -452,6 +484,10 @@ store_forwarding_analyzer::avoid_store_forwarding (basic_block bb) if (!set || insn_could_throw_p (insn)) { + unsigned int i; + store_fwd_info *it; + FOR_EACH_VEC_ELT (store_exprs, i, it) + store_exprs_del.safe_push (it->store_mem); store_exprs.truncate (0); continue; } @@ -475,6 +511,10 @@ store_forwarding_analyzer::avoid_store_forwarding (basic_block bb) || (load_mem && (!MEM_SIZE_KNOWN_P (load_mem) || !MEM_SIZE (load_mem).is_constant ()))) { + unsigned int i; + store_fwd_info *it; + FOR_EACH_VEC_ELT (store_exprs, i, it) + store_exprs_del.safe_push (it->store_mem); store_exprs.truncate (0); continue; } @@ -526,6 +566,7 @@ store_forwarding_analyzer::avoid_store_forwarding (basic_block bb) it->remove = true; removed_count++; remove_rest = true; + store_exprs_del.safe_push (it->store_mem); } } } @@ -565,23 +606,46 @@ store_forwarding_analyzer::avoid_store_forwarding (basic_block bb) it->remove = true; removed_count++; remove_rest = true; + forwardings.truncate (0); } else if (is_store_forwarding (store_mem, load_mem, &off_val)) { + unsigned int j; + rtx *del_it; + bool same_range_as_removed = false; + + /* Check if another store in the load's address range has + been deleted due to a constraint violation. In this case + we can't forward any other stores that operate in this + range, as it would lead to partial update of the register + that holds the loaded value. */ + FOR_EACH_VEC_ELT (store_exprs_del, j, del_it) + { + rtx del_store_mem = *del_it; + same_range_as_removed + = is_store_forwarding (del_store_mem, load_mem, NULL); + if (same_range_as_removed) + break; + } + /* Check if moving this store after the load is legal. */ bool write_dep = false; - for (unsigned int j = store_exprs.length () - 1; j != i; j--) + if (!same_range_as_removed) { - if (!store_exprs[j].forwarded - && output_dependence (store_mem, - store_exprs[j].store_mem)) + unsigned int j = store_exprs.length () - 1; + for (; j != i; j--) { - write_dep = true; - break; + if (!store_exprs[j].forwarded + && output_dependence (store_mem, + store_exprs[j].store_mem)) + { + write_dep = true; + break; + } } } - if (!write_dep) + if (!same_range_as_removed && !write_dep) { it->forwarded = true; it->offset = off_val; @@ -601,6 +665,7 @@ store_forwarding_analyzer::avoid_store_forwarding (basic_block bb) it->remove = true; removed_count++; remove_rest = true; + forwardings.truncate (0); } } @@ -608,9 +673,12 @@ store_forwarding_analyzer::avoid_store_forwarding (basic_block bb) process_store_forwarding (forwardings, insn, load_mem); } + /* Abort in case that we encounter a memory read/write that is not a + simple store/load, as we can't make safe assumptions about the + side-effects of this. */ if ((writes_mem && !is_simple_store) || (reads_mem && !is_simple_load)) - store_exprs.truncate (0); + return; if (removed_count) { diff --git a/gcc/c-family/c-attribs.cc b/gcc/c-family/c-attribs.cc index 1f4a0df..a0d832b 100644 --- a/gcc/c-family/c-attribs.cc +++ b/gcc/c-family/c-attribs.cc @@ -4120,10 +4120,11 @@ handle_argspec_attribute (tree *, tree, tree args, int, bool *) { /* Verify the attribute has one or two arguments and their kind. */ gcc_assert (args && TREE_CODE (TREE_VALUE (args)) == STRING_CST); - for (tree next = TREE_CHAIN (args); next; next = TREE_CHAIN (next)) + if (TREE_CHAIN (args)) { - tree val = TREE_VALUE (next); - gcc_assert (DECL_P (val) || EXPR_P (val)); + tree val = TREE_VALUE (TREE_CHAIN (args)); + gcc_assert (!TREE_CHAIN (TREE_CHAIN (args))); + gcc_assert (TYPE_P (val)); } return NULL_TREE; } @@ -5736,6 +5737,71 @@ handle_access_attribute (tree node[3], tree name, tree args, int flags, return NULL_TREE; } + +/* This function builds a string which is concatenated to SPEC and returns + list of variably bounds corresponding to an array/VLA parameter with + type TYPE. The string consists of one dollar symbol for each specified + variable bound, one asterisk for each unspecified variable bound, + a space for an array of unknown size (only possibly for the outermost), + and a zero for a zero-sized array. + + The chainof variable bounds starts with the most significant bound. + For example, the TYPE T[2][m][3][n] will produce "$$" and (m, (n, nil)). */ + +static tree +build_arg_spec (tree type, std::string *spec) +{ + while (POINTER_TYPE_P (type)) + type = TREE_TYPE (type); + + if (TREE_CODE (type) != ARRAY_TYPE) + return NULL_TREE; + + tree list = build_arg_spec (TREE_TYPE (type), spec); + + if (!COMPLETE_TYPE_P (type)) + { + (*spec) += ' '; + return list; + } + + tree mval = TYPE_MAX_VALUE (TYPE_DOMAIN (type)); + + if (!mval) + { + (*spec) += '0'; + return list; + } + + if (TREE_CODE (mval) == COMPOUND_EXPR + && integer_zerop (TREE_OPERAND (mval, 0)) + && integer_zerop (TREE_OPERAND (mval, 1))) + { + (*spec) += '*'; + return list; + } + + if (TREE_CODE (mval) == INTEGER_CST) + return list; + + /* A variable bound. */ + (*spec) += '$'; + + mval = array_type_nelts_top (type); + + /* Remove NOP_EXPR and SAVE_EXPR to uncover possible PARM_DECLS. */ + if (TREE_CODE (mval) == NOP_EXPR) + mval = TREE_OPERAND (mval, 0); + if (TREE_CODE (mval) == SAVE_EXPR) + { + mval = TREE_OPERAND (mval, 0); + if (TREE_CODE (mval) == NOP_EXPR) + mval = TREE_OPERAND (mval, 0); + } + + return tree_cons (NULL_TREE, mval, list); +} + /* Extract attribute "arg spec" from each FNDECL argument that has it, build a single attribute access corresponding to all the arguments, and return the result. SKIP_VOIDPTR set to ignore void* parameters @@ -5812,15 +5878,16 @@ build_attr_access_from_parms (tree parms, bool skip_voidptr) argspec = TREE_VALUE (argspec); /* The attribute arg spec string. */ - tree str = TREE_VALUE (argspec); - const char *s = TREE_STRING_POINTER (str); + const char *s = TREE_STRING_POINTER (TREE_VALUE (argspec)); + bool static_p = s && (0 == strcmp("static", s)); /* Collect the list of nonnull arguments which use "[static ..]". */ - if (s != NULL && s[0] == '[' && s[1] == 's') + if (static_p) nnlist = tree_cons (NULL_TREE, build_int_cst (integer_type_node, argpos + 1), nnlist); - /* Create the attribute access string from the arg spec string, + tree argvbs; + /* Create the attribute access string from the arg spec data, optionally followed by position of the VLA bound argument if it is one. */ { @@ -5831,21 +5898,52 @@ build_attr_access_from_parms (tree parms, bool skip_voidptr) specend = 1; } - /* Format the access string in place. */ - int len = snprintf (NULL, 0, "%c%u%s", - attr_access::mode_chars[access_deferred], - argpos, s); - spec.resize (specend + len + 1); - sprintf (&spec[specend], "%c%u%s", - attr_access::mode_chars[access_deferred], - argpos, s); + spec += attr_access::mode_chars[access_deferred]; + spec += std::to_string (argpos); + spec += '['; + tree type = TREE_VALUE (TREE_CHAIN (argspec)); + argvbs = build_arg_spec (type, &spec); + + /* Postprocess the string to bring it in the format expected + by the code handling the access attribute. First, we + add 's' if the array was declared as [static ...]. */ + if (static_p) + { + size_t send = spec.length(); + + if (spec[send - 1] == '[') + { + spec += 's'; + } + else + { + /* If there is a symbol, we need to swap the order. */ + spec += spec[send - 1]; + spec[send - 1] = 's'; + } + } + + /* If the outermost bound is an integer constant, we need to write + the size if it is constant. */ + if (type && TYPE_DOMAIN (type)) + { + tree mval = TYPE_MAX_VALUE (TYPE_DOMAIN (type)); + if (mval && TREE_CODE (mval) == INTEGER_CST) + { + char buf[40]; + unsigned HOST_WIDE_INT n = tree_to_uhwi (mval) + 1; + sprintf (buf, HOST_WIDE_INT_PRINT_UNSIGNED, n); + spec += buf; + } + } + spec += ']'; + /* Trim the trailing NUL. */ - spec.resize (specend + len); + spec.resize (spec.length ()); } /* The (optional) list of expressions denoting the VLA bounds N in ARGTYPE <arg>[Ni]...[Nj]...[Nk]. */ - tree argvbs = TREE_CHAIN (argspec); if (argvbs) { spec += ','; diff --git a/gcc/c/c-decl.cc b/gcc/c/c-decl.cc index 4bef438..7850365 100644 --- a/gcc/c/c-decl.cc +++ b/gcc/c/c-decl.cc @@ -6208,184 +6208,7 @@ grokparm (const struct c_parm *parm, tree *expr) return decl; } -/* Return attribute "arg spec" corresponding to an array/VLA parameter - described by PARM, concatenated onto attributes ATTRS. - The spec consists of one dollar symbol for each specified variable - bound, one asterisk for each unspecified variable bound, followed - by at most one specification of the most significant bound of - an ordinary array parameter. For ordinary arrays the specification - is either the constant bound itself, or the space character for - an array with an unspecified bound (the [] form). Finally, a chain - of specified variable bounds is appended to the spec, starting with - the most significant bound. For example, the PARM T a[2][m][3][n] - will produce __attribute__((arg spec ("[$$2]", m, n)). - For T a typedef for an array with variable bounds, the bounds are - included in the specification in the expected order. - No "arg spec" is created for parameters of pointer types, making - a distinction between T(*)[N] (or, equivalently, T[][N]) and - the T[M][N] form, all of which have the same type and are represented - the same, but only the last of which gets an "arg spec" describing - the most significant bound M. */ -static tree -get_parm_array_spec (const struct c_parm *parm, tree attrs) -{ - /* The attribute specification string, minor bound first. */ - std::string spec; - - /* A list of VLA variable bounds, major first, or null if unspecified - or not a VLA. */ - tree vbchain = NULL_TREE; - /* True for a pointer parameter. */ - bool pointer = false; - /* True for an ordinary array with an unpecified bound. */ - bool nobound = false; - - /* Create a string representation for the bounds of the array/VLA. */ - for (c_declarator *pd = parm->declarator, *next; pd; pd = next) - { - next = pd->declarator; - while (next && next->kind == cdk_attrs) - next = next->declarator; - - /* Remember if a pointer has been seen to avoid storing the constant - bound. */ - if (pd->kind == cdk_pointer) - pointer = true; - - if ((pd->kind == cdk_pointer || pd->kind == cdk_function) - && (!next || next->kind == cdk_id)) - { - /* Do nothing for the common case of a pointer. The fact that - the parameter is one can be deduced from the absence of - an arg spec for it. */ - return attrs; - } - - if (pd->kind == cdk_id) - { - if (pointer - || !parm->specs->type - || TREE_CODE (parm->specs->type) != ARRAY_TYPE - || !TYPE_DOMAIN (parm->specs->type) - || !TYPE_MAX_VALUE (TYPE_DOMAIN (parm->specs->type))) - continue; - - tree max = TYPE_MAX_VALUE (TYPE_DOMAIN (parm->specs->type)); - if (!vbchain - && TREE_CODE (max) == INTEGER_CST) - { - /* Extract the upper bound from a parameter of an array type - unless the parameter is an ordinary array of unspecified - bound in which case a next iteration of the loop will - exit. */ - if (spec.empty () || spec.end ()[-1] != ' ') - { - if (!tree_fits_shwi_p (max)) - continue; - - /* The upper bound is the value of the largest valid - index. */ - HOST_WIDE_INT n = tree_to_shwi (max) + 1; - char buf[40]; - sprintf (buf, HOST_WIDE_INT_PRINT_UNSIGNED, n); - spec += buf; - } - continue; - } - - /* For a VLA typedef, create a list of its variable bounds and - append it in the expected order to VBCHAIN. */ - tree tpbnds = NULL_TREE; - for (tree type = parm->specs->type; TREE_CODE (type) == ARRAY_TYPE; - type = TREE_TYPE (type)) - { - tree nelts_minus_one = array_type_nelts_minus_one (type); - if (error_operand_p (nelts_minus_one)) - return attrs; - if (TREE_CODE (nelts_minus_one) != INTEGER_CST) - { - /* Each variable VLA bound is represented by the dollar - sign. */ - spec += "$"; - tpbnds = tree_cons (NULL_TREE, nelts_minus_one, tpbnds); - } - } - tpbnds = nreverse (tpbnds); - vbchain = chainon (vbchain, tpbnds); - continue; - } - - if (pd->kind != cdk_array) - continue; - - if (pd->u.array.vla_unspec_p) - { - /* Each unspecified bound is represented by a star. There - can be any number of these in a declaration (but none in - a definition). */ - spec += '*'; - continue; - } - - tree nelts = pd->u.array.dimen; - if (!nelts) - { - /* Ordinary array of unspecified size. There can be at most - one for the most significant bound. Exit on the next - iteration which determines whether or not PARM is declared - as a pointer or an array. */ - nobound = true; - continue; - } - - if (pd->u.array.static_p) - spec += 's'; - - if (!INTEGRAL_TYPE_P (TREE_TYPE (nelts))) - /* Avoid invalid NELTS. */ - return attrs; - - STRIP_NOPS (nelts); - nelts = c_fully_fold (nelts, false, nullptr); - if (TREE_CODE (nelts) == INTEGER_CST) - { - /* Skip all constant bounds except the most significant one. - The interior ones are included in the array type. */ - if (next && (next->kind == cdk_array || next->kind == cdk_pointer)) - continue; - - if (!tree_fits_uhwi_p (nelts)) - /* Bail completely on invalid bounds. */ - return attrs; - - char buf[40]; - unsigned HOST_WIDE_INT n = tree_to_uhwi (nelts); - sprintf (buf, HOST_WIDE_INT_PRINT_UNSIGNED, n); - spec += buf; - break; - } - - /* Each variable VLA bound is represented by a dollar sign. */ - spec += "$"; - vbchain = tree_cons (NULL_TREE, nelts, vbchain); - } - - if (spec.empty () && !nobound) - return attrs; - - spec.insert (0, "["); - if (nobound) - /* Ordinary array of unspecified bound is represented by a space. - It must be last in the spec. */ - spec += ' '; - spec += ']'; - - tree acsstr = build_string (spec.length () + 1, spec.c_str ()); - tree args = tree_cons (NULL_TREE, acsstr, vbchain); - tree name = get_identifier ("arg spec"); - return tree_cons (name, args, attrs); -} /* Given a parsed parameter declaration, decode it into a PARM_DECL and push that on the current scope. EXPR is a pointer to an @@ -6401,7 +6224,6 @@ push_parm_decl (const struct c_parm *parm, tree *expr) if (decl && DECL_P (decl)) DECL_SOURCE_LOCATION (decl) = parm->loc; - attrs = get_parm_array_spec (parm, attrs); decl_attributes (&decl, attrs, 0); decl = pushdecl (decl); @@ -6775,6 +6597,25 @@ add_decl_expr (location_t loc, tree type, tree *expr, bool set_name_p) } } + +/* Add attribute "arg spec" to ATTRS corresponding to an array/VLA parameter + declared with type TYPE. The attribute has two arguments. The first is + a string that encodes the presence of the static keyword. The second is + the declared type of the array before adjustment, i.e. as an array type + including the outermost bound. */ + +static tree +build_arg_spec_attribute (tree type, bool static_p, tree attrs) +{ + tree vbchain = tree_cons (NULL_TREE, type, NULL_TREE); + tree acsstr = static_p ? build_string (7, "static") : + build_string (1, ""); + tree args = tree_cons (NULL_TREE, acsstr, vbchain); + tree name = get_identifier ("arg spec"); + return tree_cons (name, args, attrs); +} + + /* Given declspecs and a declarator, determine the name and type of the object declared and construct a ..._DECL node for it. @@ -6834,6 +6675,7 @@ grokdeclarator (const struct c_declarator *declarator, bool funcdef_flag = false; bool funcdef_syntax = false; bool size_varies = false; + bool size_error = false; tree decl_attr = declspecs->decl_attr; int array_ptr_quals = TYPE_UNQUALIFIED; tree array_ptr_attrs = NULL_TREE; @@ -7326,6 +7168,7 @@ grokdeclarator (const struct c_declarator *declarator, "size of unnamed array has non-integer type"); size = integer_one_node; size_int_const = true; + size_error = true; } /* This can happen with enum forward declaration. */ else if (!COMPLETE_TYPE_P (TREE_TYPE (size))) @@ -7338,6 +7181,7 @@ grokdeclarator (const struct c_declarator *declarator, "type"); size = integer_one_node; size_int_const = true; + size_error = true; } size = c_fully_fold (size, false, &size_maybe_const); @@ -7363,6 +7207,7 @@ grokdeclarator (const struct c_declarator *declarator, error_at (loc, "size of unnamed array is negative"); size = integer_one_node; size_int_const = true; + size_error = true; } /* Handle a size folded to an integer constant but not an integer constant expression. */ @@ -7978,6 +7823,10 @@ grokdeclarator (const struct c_declarator *declarator, if (TREE_CODE (type) == ARRAY_TYPE) { + if (!size_error) + *decl_attrs = build_arg_spec_attribute (type, array_parm_static, + *decl_attrs); + /* Transfer const-ness of array into that of type pointed to. */ type = TREE_TYPE (type); if (orig_qual_type != NULL_TREE) diff --git a/gcc/calls.cc b/gcc/calls.cc index e16190c..2711c4e 100644 --- a/gcc/calls.cc +++ b/gcc/calls.cc @@ -2589,7 +2589,8 @@ can_implement_as_sibling_call_p (tree exp, return false; } - if (TYPE_VOLATILE (TREE_TYPE (TREE_TYPE (addr)))) + if (TYPE_VOLATILE (TREE_TYPE (TREE_TYPE (addr))) + && !CALL_EXPR_MUST_TAIL_CALL (exp)) { maybe_complain_about_tail_call (exp, _("volatile function type")); return false; diff --git a/gcc/cobol/ChangeLog b/gcc/cobol/ChangeLog index b286f5d..918371d 100644 --- a/gcc/cobol/ChangeLog +++ b/gcc/cobol/ChangeLog @@ -1,3 +1,19 @@ +2025-08-01 Robert Dubner <rdubner@symas.com> + + PR cobol/119324 + * cbldiag.h (location_dump): Inline suppression of knownConditionTrueFalse. + * genapi.cc (parser_statement_begin): Combine two if() statements. + * genutil.cc (get_binary_value): File-level suppression of duplicateBreak. + * symbols.cc (symbol_elem_cmp): File-level suppression of duplicateBreak. + +2025-07-31 Robert Dubner <rdubner@symas.com> + + PR cobol/120244 + * genapi.cc (get_level_88_domain): Increase array size for final byte. + (psa_FldLiteralA): Use correct length in build_string_literal call. + * scan.l: Use a loop instead of std:transform to avoid EOF overrun. + * scan_ante.h (binary_integer_usage): Use a variable-length buffer. + 2025-07-25 David Malcolm <dmalcolm@redhat.com> * util.cc: Update for diagnostic_t becoming diff --git a/gcc/cobol/cbldiag.h b/gcc/cobol/cbldiag.h index 39f1369..dd16190 100644 --- a/gcc/cobol/cbldiag.h +++ b/gcc/cobol/cbldiag.h @@ -122,7 +122,7 @@ static void location_dump( const char func[], int line, const char tag[], const LOC& loc) { extern int yy_flex_debug; // cppcheck-suppress shadowVariable if( yy_flex_debug ) { - const char *detail = gcobol_getenv("update_location"); + const char *detail = gcobol_getenv("update_location"); // cppcheck-suppress knownConditionTrueFalse if( detail ) { fprintf(stderr, "%s:%d: %s location (%d,%d) to (%d,%d)\n", func, line, tag, diff --git a/gcc/cobol/genapi.cc b/gcc/cobol/genapi.cc index 666802e..c9d2da4 100644 --- a/gcc/cobol/genapi.cc +++ b/gcc/cobol/genapi.cc @@ -531,6 +531,14 @@ get_level_88_domain(size_t parent_capacity, cbl_field_t *var, size_t &returned_s free(stream); domain += 1; } + + if( returned_size >= retval_capacity) + { + retval_capacity *= 2; + retval = static_cast<char *>(xrealloc(retval, retval_capacity)); + } + + gcc_assert(returned_size < retval_capacity); retval[returned_size++] = '\0'; return retval; } @@ -1190,12 +1198,9 @@ parser_statement_begin( const cbl_name_t statement_name, if( exception_processing ) { store_location_stuff(statement_name); - } - - if( exception_processing ) - { set_exception_environment(ecs, dcls); } + sv_is_i_o = false; } @@ -16765,9 +16770,9 @@ psa_FldLiteralA(struct cbl_field_t *field ) vs_file_static); actually_create_the_static_field( field, - build_string_literal(field->data.capacity+1, + build_string_literal(field->data.capacity, buffer), - field->data.capacity+1, + field->data.capacity, field->data.initial, NULL_TREE, field->var_decl_node); diff --git a/gcc/cobol/genutil.cc b/gcc/cobol/genutil.cc index 7895ea8..a5f69a0 100644 --- a/gcc/cobol/genutil.cc +++ b/gcc/cobol/genutil.cc @@ -27,6 +27,9 @@ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ + +// cppcheck-suppress-file duplicateBreak + #include "cobol-system.h" #include "coretypes.h" #include "tree.h" @@ -1267,7 +1270,7 @@ get_binary_value( tree value, cbl_field_type_str(field->type) ); cbl_internal_error("%s", err); abort(); - // break; // break not needed after abort(); + break; } } diff --git a/gcc/cobol/parse.y b/gcc/cobol/parse.y index 59cc64d..fae96ed 100644 --- a/gcc/cobol/parse.y +++ b/gcc/cobol/parse.y @@ -10336,8 +10336,8 @@ intrinsic: function_udf if( p != NULL ) { auto loc = symbol_field_location(field_index(p->field)); error_msg(loc, "FUNCTION %qs has " - "inconsistent parameter type %ld (%qs)", - keyword_str($1), (long)(p - args.data()), name_of(p->field) ); + "inconsistent parameter type %td (%qs)", + keyword_str($1), p - args.data(), name_of(p->field) ); YYERROR; } $$ = is_numeric(args[0].field)? diff --git a/gcc/cobol/scan.l b/gcc/cobol/scan.l index 2da38d8..ba4c044 100644 --- a/gcc/cobol/scan.l +++ b/gcc/cobol/scan.l @@ -1673,16 +1673,17 @@ B-SHIFT-RC p += 2; while( ISSPACE(*p) ) p++; cbl_name_t name2; - std::transform( p, p + sizeof(name2), name2, - []( char ch ) { - switch(ch) { - case '-': - case '_': return ch; - default: - if( ISALNUM(ch) ) return ch; - } - return '\0'; - } ); + const char *pend = p + sizeof(name2); + char *pout = name2; + while( p < pend ) { + char ch = *p++; + if( ISALNUM(ch) || ch == '-' || ch == '_' ) { + *pout++ = ch; + } else { + *pout++ = '\0'; + break; + } + } symbol_elem_t *e = symbol_file(PROGRAM, name2); /* * For NAME IN FILENAME, we want the parser to handle it. diff --git a/gcc/cobol/scan_ante.h b/gcc/cobol/scan_ante.h index 19ceb2b..31093a6 100644 --- a/gcc/cobol/scan_ante.h +++ b/gcc/cobol/scan_ante.h @@ -149,7 +149,7 @@ numstr_of( const char string[], radix_t radix = decimal_e ) { } auto nx = std::count_if(input, p, fisdigit); if( 36 < nx ) { - error_msg(yylloc, "significand of %s has more than 36 digits (%ld)", input, (long)nx); + error_msg(yylloc, "significand of %s has more than 36 digits (%td)", input, nx); return NO_CONDITION; } @@ -609,7 +609,9 @@ static const std::map <std::string, bint_t > binary_integers { static int binary_integer_usage( const char name[]) { - cbl_name_t uname = {}; + // uname can't be cbl_name_t, because at this point name[] might have more + // than sizeof(cbl_name_t) characters. The length check comes later. + char *uname = xstrdup(name); std::transform(name, name + strlen(name), uname, ftoupper); dbgmsg("%s:%d: checking %s in %zu keyword_aliases", @@ -628,6 +630,7 @@ binary_integer_usage( const char name[]) { yylval.computational.signable = p->second.signable; dbgmsg("%s:%d: %s has type %d", __func__, __LINE__, uname, p->second.type ); + free(uname); return p->second.token; } diff --git a/gcc/cobol/symbols.cc b/gcc/cobol/symbols.cc index 7d6a955..f2cd1b5 100644 --- a/gcc/cobol/symbols.cc +++ b/gcc/cobol/symbols.cc @@ -28,6 +28,8 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ +// cppcheck-suppress-file duplicateBreak + #include "config.h" #include <fstream> // Before cobol-system because it uses poisoned functions #include "cobol-system.h" @@ -500,13 +502,13 @@ symbol_elem_cmp( const void *K, const void *E ) } return strcasecmp(key.name, elem.name); } - // break; // This break not needed if all options do a return. + break; case SymSpecial: return special_pair_cmp(k->elem.special, e->elem.special)? 0 : 1; - // break; // This break not needed after return. + break; case SymAlphabet: return strcasecmp(k->elem.alphabet.name, e->elem.alphabet.name); - // break; // This break not needed after return. + break; case SymFile: // If the key is global, so must be the found element. if( (cbl_file_of(k)->attr & global_e) == global_e && @@ -514,7 +516,7 @@ symbol_elem_cmp( const void *K, const void *E ) return 1; } return strcasecmp(k->elem.file.name, e->elem.file.name); - // break; // This break not needed after return. + break; } assert(k->type == SymField); diff --git a/gcc/cobol/util.cc b/gcc/cobol/util.cc index 2a7bf2b..aed9483 100644 --- a/gcc/cobol/util.cc +++ b/gcc/cobol/util.cc @@ -1049,8 +1049,8 @@ cbl_field_t::report_invalid_initial_value(const YYLTYPE& loc) const { return TOUPPER(ch) == 'E'; } ); if( !has_exponent && data.precision() < pend - p ) { - error_msg(loc, "%s cannot represent VALUE %qs exactly (max %c%ld)", - name, data.initial, '.', (long)(pend - p)); + error_msg(loc, "%s cannot represent VALUE %qs exactly (max %c%td)", + name, data.initial, '.', pend - p); } } } diff --git a/gcc/common/config/aarch64/cpuinfo.h b/gcc/common/config/aarch64/cpuinfo.h index cd3c2b2..d329d86 100644 --- a/gcc/common/config/aarch64/cpuinfo.h +++ b/gcc/common/config/aarch64/cpuinfo.h @@ -39,10 +39,10 @@ enum CPUFeatures { FEAT_FP, FEAT_SIMD, FEAT_CRC, - FEAT_SHA1, + FEAT_CSSC, FEAT_SHA2, FEAT_SHA3, - FEAT_AES, + FEAT_unused5, FEAT_PMULL, FEAT_FP16, FEAT_DIT, @@ -53,30 +53,30 @@ enum CPUFeatures { FEAT_RCPC, FEAT_RCPC2, FEAT_FRINTTS, - FEAT_DGH, + FEAT_unused6, FEAT_I8MM, FEAT_BF16, - FEAT_EBF16, - FEAT_RPRES, + FEAT_unused7, + FEAT_unused8, FEAT_SVE, - FEAT_SVE_BF16, - FEAT_SVE_EBF16, - FEAT_SVE_I8MM, + FEAT_unused9, + FEAT_unused10, + FEAT_unused11, FEAT_SVE_F32MM, FEAT_SVE_F64MM, FEAT_SVE2, - FEAT_SVE_AES, + FEAT_unused12, FEAT_SVE_PMULL128, FEAT_SVE_BITPERM, FEAT_SVE_SHA3, FEAT_SVE_SM4, FEAT_SME, - FEAT_MEMTAG, + FEAT_unused13, FEAT_MEMTAG2, - FEAT_MEMTAG3, + FEAT_unused14, FEAT_SB, FEAT_unused1, - FEAT_SSBS, + FEAT_unused15, FEAT_SSBS2, FEAT_BTI, FEAT_unused2, @@ -87,6 +87,7 @@ enum CPUFeatures { FEAT_SME_I64, FEAT_SME2, FEAT_RCPC3, + FEAT_MOPS, FEAT_MAX, FEAT_EXT = 62, /* Reserved to indicate presence of additional features field in __aarch64_cpu_features. */ diff --git a/gcc/common/config/avr/avr-common.cc b/gcc/common/config/avr/avr-common.cc index 203a965..d8b982c 100644 --- a/gcc/common/config/avr/avr-common.cc +++ b/gcc/common/config/avr/avr-common.cc @@ -38,6 +38,7 @@ static const struct default_options avr_option_optimization_table[] = { OPT_LEVELS_1_PLUS, OPT_mmain_is_OS_task, NULL, 1 }, { OPT_LEVELS_1_PLUS, OPT_mfuse_add_, NULL, 1 }, { OPT_LEVELS_2_PLUS, OPT_mfuse_add_, NULL, 2 }, + { OPT_LEVELS_1_PLUS, OPT_mfuse_move2, NULL, 1 }, { OPT_LEVELS_1_PLUS_NOT_DEBUG, OPT_mfuse_move_, NULL, 3 }, { OPT_LEVELS_2_PLUS, OPT_mfuse_move_, NULL, 23 }, { OPT_LEVELS_2_PLUS, OPT_msplit_bit_shift, NULL, 1 }, diff --git a/gcc/common/config/riscv/riscv-common.cc b/gcc/common/config/riscv/riscv-common.cc index 82037a3..da3cb9f 100644 --- a/gcc/common/config/riscv/riscv-common.cc +++ b/gcc/common/config/riscv/riscv-common.cc @@ -1606,8 +1606,9 @@ bool riscv_ext_is_subset (struct cl_target_option *opts, struct cl_target_option *subset) { - for (const auto &[ext_name, ext_info] : riscv_ext_infos) + for (const auto &riscv_ext_info : riscv_ext_infos) { + const auto &ext_info = riscv_ext_info.second; if (ext_info.check_opts (opts) && !ext_info.check_opts (subset)) return false; } diff --git a/gcc/config/aarch64/aarch64-option-extensions.def b/gcc/config/aarch64/aarch64-option-extensions.def index 1c3e697..db88df0 100644 --- a/gcc/config/aarch64/aarch64-option-extensions.def +++ b/gcc/config/aarch64/aarch64-option-extensions.def @@ -128,7 +128,9 @@ AARCH64_OPT_FMV_EXTENSION("sha2", SHA2, (SIMD), (), (), "sha1 sha2") AARCH64_FMV_FEATURE("sha3", SHA3, (SHA3)) -AARCH64_OPT_FMV_EXTENSION("aes", AES, (SIMD), (), (), "aes") +AARCH64_OPT_EXTENSION("aes", AES, (SIMD), (), (), "aes") + +AARCH64_FMV_FEATURE("aes", PMULL, (AES)) /* +nocrypto disables AES, SHA2 and SM4, and anything that depends on them (such as SHA3 and the SVE2 crypto extensions). */ @@ -171,8 +173,6 @@ AARCH64_OPT_FMV_EXTENSION("i8mm", I8MM, (SIMD), (), (), "i8mm") instructions. */ AARCH64_OPT_FMV_EXTENSION("bf16", BF16, (FP), (SIMD), (), "bf16") -AARCH64_FMV_FEATURE("rpres", RPRES, ()) - AARCH64_OPT_FMV_EXTENSION("sve", SVE, (SIMD, F16, FCMA), (), (), "sve") /* This specifically does not imply +sve. */ @@ -190,7 +190,7 @@ AARCH64_OPT_FMV_EXTENSION("sve2", SVE2, (SVE), (), (), "sve2") AARCH64_OPT_EXTENSION("sve2-aes", SVE2_AES, (SVE2, AES), (), (), "sveaes") -AARCH64_FMV_FEATURE("sve2-aes", SVE_AES, (SVE2_AES)) +AARCH64_FMV_FEATURE("sve2-aes", SVE_PMULL128, (SVE2_AES)) AARCH64_OPT_EXTENSION("sve2-bitperm", SVE2_BITPERM, (SVE2), (), (), "svebitperm") @@ -245,9 +245,9 @@ AARCH64_OPT_EXTENSION("sme-b16b16", SME_B16B16, (SME2, SVE_B16B16), (), (), "sme AARCH64_OPT_EXTENSION("sme-f16f16", SME_F16F16, (SME2), (), (), "smef16f16") -AARCH64_OPT_EXTENSION("mops", MOPS, (), (), (), "mops") +AARCH64_OPT_FMV_EXTENSION("mops", MOPS, (), (), (), "mops") -AARCH64_OPT_EXTENSION("cssc", CSSC, (), (), (), "cssc") +AARCH64_OPT_FMV_EXTENSION("cssc", CSSC, (), (), (), "cssc") AARCH64_OPT_EXTENSION("cmpbr", CMPBR, (), (), (), "cmpbr") diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h index e946e8d..38c307c 100644 --- a/gcc/config/aarch64/aarch64-protos.h +++ b/gcc/config/aarch64/aarch64-protos.h @@ -1031,6 +1031,7 @@ rtx aarch64_pfalse_reg (machine_mode); bool aarch64_sve_same_pred_for_ptest_p (rtx *, rtx *); rtx aarch64_sve_packed_pred (machine_mode); rtx aarch64_sve_fp_pred (machine_mode, rtx *); +rtx aarch64_sve_emit_masked_fp_pred (machine_mode, rtx); void aarch64_emit_load_store_through_mode (rtx, rtx, machine_mode); bool aarch64_expand_maskloadstore (rtx *, machine_mode); void aarch64_emit_sve_pred_move (rtx, rtx, rtx); diff --git a/gcc/config/aarch64/aarch64-sme.md b/gcc/config/aarch64/aarch64-sme.md index 6b3f439..6b1a747 100644 --- a/gcc/config/aarch64/aarch64-sme.md +++ b/gcc/config/aarch64/aarch64-sme.md @@ -62,6 +62,10 @@ ;; (b) they are sometimes used conditionally, particularly in streaming- ;; compatible code. ;; +;; To prevent the latter from upsetting the assembler, we emit the literal +;; encodings of "SMSTART SM" and "SMSTOP SM" when compiling without +;; TARGET_SME. +;; ;; ========================================================================= ;; ------------------------------------------------------------------------- @@ -161,7 +165,9 @@ (clobber (reg:VNx16BI P14_REGNUM)) (clobber (reg:VNx16BI P15_REGNUM))] "" - "smstart\tsm" + { + return TARGET_SME ? "smstart\tsm" : ".inst 0xd503437f // smstart sm"; + } ) ;; Turn off streaming mode. This clobbers all SVE state. @@ -196,7 +202,9 @@ (clobber (reg:VNx16BI P14_REGNUM)) (clobber (reg:VNx16BI P15_REGNUM))] "" - "smstop\tsm" + { + return TARGET_SME ? "smstop\tsm" : ".inst 0xd503427f // smstop sm"; + } ) ;; ------------------------------------------------------------------------- diff --git a/gcc/config/aarch64/aarch64-sve-builtins-sme.def b/gcc/config/aarch64/aarch64-sve-builtins-sme.def index 8e6aadc..117b70e 100644 --- a/gcc/config/aarch64/aarch64-sve-builtins-sme.def +++ b/gcc/config/aarch64/aarch64-sve-builtins-sme.def @@ -92,7 +92,8 @@ DEF_SME_FUNCTION (svstr_zt, str_zt, none, none) DEF_SME_FUNCTION (svzero_zt, inherent_zt, none, none) #undef REQUIRED_EXTENSIONS -#define REQUIRED_EXTENSIONS streaming_only (AARCH64_FL_SME2 && AARCH64_FL_FAMINMAX) +#define REQUIRED_EXTENSIONS streaming_only (AARCH64_FL_SME2 \ + | AARCH64_FL_FAMINMAX) DEF_SME_FUNCTION_GS (svamin, binary_opt_single_n, all_float, x24, none) DEF_SME_FUNCTION_GS (svamax, binary_opt_single_n, all_float, x24, none) #undef REQUIRED_EXTENSIONS diff --git a/gcc/config/aarch64/aarch64-sve-builtins.cc b/gcc/config/aarch64/aarch64-sve-builtins.cc index 2b627a9..01833a8 100644 --- a/gcc/config/aarch64/aarch64-sve-builtins.cc +++ b/gcc/config/aarch64/aarch64-sve-builtins.cc @@ -4004,7 +4004,8 @@ rtx function_expander::get_reg_target () { machine_mode target_mode = result_mode (); - if (!possible_target || GET_MODE (possible_target) != target_mode) + if (!possible_target + || !register_operand (possible_target, target_mode)) possible_target = gen_reg_rtx (target_mode); return possible_target; } diff --git a/gcc/config/aarch64/aarch64-sve.md b/gcc/config/aarch64/aarch64-sve.md index b252eef..80a3288 100644 --- a/gcc/config/aarch64/aarch64-sve.md +++ b/gcc/config/aarch64/aarch64-sve.md @@ -5605,18 +5605,21 @@ ;; Predicated floating-point operations with merging. (define_expand "@cond_<optab><mode>" - [(set (match_operand:SVE_FULL_F_B16B16 0 "register_operand") - (unspec:SVE_FULL_F_B16B16 + [(set (match_operand:SVE_F_B16B16 0 "register_operand") + (unspec:SVE_F_B16B16 [(match_operand:<VPRED> 1 "register_operand") - (unspec:SVE_FULL_F_B16B16 + (unspec:SVE_F_B16B16 [(match_dup 1) (const_int SVE_STRICT_GP) - (match_operand:SVE_FULL_F_B16B16 2 "<sve_pred_fp_rhs1_operand>") - (match_operand:SVE_FULL_F_B16B16 3 "<sve_pred_fp_rhs2_operand>")] + (match_operand:SVE_F_B16B16 2 "<sve_pred_fp_rhs1_operand>") + (match_operand:SVE_F_B16B16 3 "<sve_pred_fp_rhs2_operand>")] SVE_COND_FP_BINARY) - (match_operand:SVE_FULL_F_B16B16 4 "aarch64_simd_reg_or_zero")] + (match_operand:SVE_F_B16B16 4 "aarch64_simd_reg_or_zero")] UNSPEC_SEL))] "TARGET_SVE && (<supports_bf16> || !<is_bf16>)" + { + operands[1] = aarch64_sve_emit_masked_fp_pred (<MODE>mode, operands[1]); + } ) ;; Predicated floating-point operations, merging with the first input. @@ -5644,14 +5647,14 @@ ) (define_insn "*cond_<optab><mode>_2_strict" - [(set (match_operand:SVE_FULL_F_B16B16 0 "register_operand") - (unspec:SVE_FULL_F_B16B16 - [(match_operand:<VPRED> 1 "register_operand") - (unspec:SVE_FULL_F_B16B16 + [(set (match_operand:SVE_F_B16B16 0 "register_operand") + (unspec:SVE_F_B16B16 + [(match_operand:<VPRED> 1 "aarch64_predicate_operand") + (unspec:SVE_F_B16B16 [(match_dup 1) (const_int SVE_STRICT_GP) - (match_operand:SVE_FULL_F_B16B16 2 "register_operand") - (match_operand:SVE_FULL_F_B16B16 3 "register_operand")] + (match_operand:SVE_F_B16B16 2 "register_operand") + (match_operand:SVE_F_B16B16 3 "register_operand")] SVE_COND_FP_BINARY) (match_dup 2)] UNSPEC_SEL))] @@ -5687,14 +5690,14 @@ ) (define_insn "*cond_<optab><mode>_2_const_strict" - [(set (match_operand:SVE_FULL_F 0 "register_operand") - (unspec:SVE_FULL_F - [(match_operand:<VPRED> 1 "register_operand") - (unspec:SVE_FULL_F + [(set (match_operand:SVE_F 0 "register_operand") + (unspec:SVE_F + [(match_operand:<VPRED> 1 "aarch64_predicate_operand") + (unspec:SVE_F [(match_dup 1) (const_int SVE_STRICT_GP) - (match_operand:SVE_FULL_F 2 "register_operand") - (match_operand:SVE_FULL_F 3 "<sve_pred_fp_rhs2_immediate>")] + (match_operand:SVE_F 2 "register_operand") + (match_operand:SVE_F 3 "<sve_pred_fp_rhs2_immediate>")] SVE_COND_FP_BINARY_I1) (match_dup 2)] UNSPEC_SEL))] @@ -5730,14 +5733,14 @@ ) (define_insn "*cond_<optab><mode>_3_strict" - [(set (match_operand:SVE_FULL_F_B16B16 0 "register_operand") - (unspec:SVE_FULL_F_B16B16 - [(match_operand:<VPRED> 1 "register_operand") - (unspec:SVE_FULL_F_B16B16 + [(set (match_operand:SVE_F_B16B16 0 "register_operand") + (unspec:SVE_F_B16B16 + [(match_operand:<VPRED> 1 "aarch64_predicate_operand") + (unspec:SVE_F_B16B16 [(match_dup 1) (const_int SVE_STRICT_GP) - (match_operand:SVE_FULL_F_B16B16 2 "register_operand") - (match_operand:SVE_FULL_F_B16B16 3 "register_operand")] + (match_operand:SVE_F_B16B16 2 "register_operand") + (match_operand:SVE_F_B16B16 3 "register_operand")] SVE_COND_FP_BINARY) (match_dup 3)] UNSPEC_SEL))] @@ -5794,16 +5797,16 @@ ) (define_insn_and_rewrite "*cond_<optab><mode>_any_strict" - [(set (match_operand:SVE_FULL_F_B16B16 0 "register_operand") - (unspec:SVE_FULL_F_B16B16 - [(match_operand:<VPRED> 1 "register_operand") - (unspec:SVE_FULL_F_B16B16 + [(set (match_operand:SVE_F_B16B16 0 "register_operand") + (unspec:SVE_F_B16B16 + [(match_operand:<VPRED> 1 "aarch64_predicate_operand") + (unspec:SVE_F_B16B16 [(match_dup 1) (const_int SVE_STRICT_GP) - (match_operand:SVE_FULL_F_B16B16 2 "register_operand") - (match_operand:SVE_FULL_F_B16B16 3 "register_operand")] + (match_operand:SVE_F_B16B16 2 "register_operand") + (match_operand:SVE_F_B16B16 3 "register_operand")] SVE_COND_FP_BINARY) - (match_operand:SVE_FULL_F_B16B16 4 "aarch64_simd_reg_or_zero")] + (match_operand:SVE_F_B16B16 4 "aarch64_simd_reg_or_zero")] UNSPEC_SEL))] "TARGET_SVE && (<supports_bf16> || !<is_bf16>) @@ -5868,16 +5871,16 @@ ) (define_insn_and_rewrite "*cond_<optab><mode>_any_const_strict" - [(set (match_operand:SVE_FULL_F 0 "register_operand") - (unspec:SVE_FULL_F - [(match_operand:<VPRED> 1 "register_operand") - (unspec:SVE_FULL_F + [(set (match_operand:SVE_F 0 "register_operand") + (unspec:SVE_F + [(match_operand:<VPRED> 1 "aarch64_predicate_operand") + (unspec:SVE_F [(match_dup 1) (const_int SVE_STRICT_GP) - (match_operand:SVE_FULL_F 2 "register_operand") - (match_operand:SVE_FULL_F 3 "<sve_pred_fp_rhs2_immediate>")] + (match_operand:SVE_F 2 "register_operand") + (match_operand:SVE_F 3 "<sve_pred_fp_rhs2_immediate>")] SVE_COND_FP_BINARY_I1) - (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero")] + (match_operand:SVE_F 4 "aarch64_simd_reg_or_zero")] UNSPEC_SEL))] "TARGET_SVE && !rtx_equal_p (operands[2], operands[4])" {@ [ cons: =0 , 1 , 2 , 4 ] @@ -5953,14 +5956,14 @@ ) (define_insn "*cond_add<mode>_2_const_strict" - [(set (match_operand:SVE_FULL_F 0 "register_operand") - (unspec:SVE_FULL_F - [(match_operand:<VPRED> 1 "register_operand") - (unspec:SVE_FULL_F + [(set (match_operand:SVE_F 0 "register_operand") + (unspec:SVE_F + [(match_operand:<VPRED> 1 "aarch64_predicate_operand") + (unspec:SVE_F [(match_dup 1) (const_int SVE_STRICT_GP) - (match_operand:SVE_FULL_F 2 "register_operand") - (match_operand:SVE_FULL_F 3 "aarch64_sve_float_arith_with_sub_immediate")] + (match_operand:SVE_F 2 "register_operand") + (match_operand:SVE_F 3 "aarch64_sve_float_arith_with_sub_immediate")] UNSPEC_COND_FADD) (match_dup 2)] UNSPEC_SEL))] @@ -6015,16 +6018,16 @@ ) (define_insn_and_rewrite "*cond_add<mode>_any_const_strict" - [(set (match_operand:SVE_FULL_F 0 "register_operand") - (unspec:SVE_FULL_F - [(match_operand:<VPRED> 1 "register_operand") - (unspec:SVE_FULL_F + [(set (match_operand:SVE_F 0 "register_operand") + (unspec:SVE_F + [(match_operand:<VPRED> 1 "aarch64_predicate_operand") + (unspec:SVE_F [(match_dup 1) (const_int SVE_STRICT_GP) - (match_operand:SVE_FULL_F 2 "register_operand") - (match_operand:SVE_FULL_F 3 "aarch64_sve_float_arith_with_sub_immediate")] + (match_operand:SVE_F 2 "register_operand") + (match_operand:SVE_F 3 "aarch64_sve_float_arith_with_sub_immediate")] UNSPEC_COND_FADD) - (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero")] + (match_operand:SVE_F 4 "aarch64_simd_reg_or_zero")] UNSPEC_SEL))] "TARGET_SVE && !rtx_equal_p (operands[2], operands[4])" {@ [ cons: =0 , 1 , 2 , 3 , 4 ] @@ -6266,14 +6269,14 @@ ) (define_insn "*cond_sub<mode>_3_const_strict" - [(set (match_operand:SVE_FULL_F 0 "register_operand") - (unspec:SVE_FULL_F - [(match_operand:<VPRED> 1 "register_operand") - (unspec:SVE_FULL_F + [(set (match_operand:SVE_F 0 "register_operand") + (unspec:SVE_F + [(match_operand:<VPRED> 1 "aarch64_predicate_operand") + (unspec:SVE_F [(match_dup 1) (const_int SVE_STRICT_GP) - (match_operand:SVE_FULL_F 2 "aarch64_sve_float_arith_immediate") - (match_operand:SVE_FULL_F 3 "register_operand")] + (match_operand:SVE_F 2 "aarch64_sve_float_arith_immediate") + (match_operand:SVE_F 3 "register_operand")] UNSPEC_COND_FSUB) (match_dup 3)] UNSPEC_SEL))] @@ -6323,16 +6326,16 @@ ) (define_insn_and_rewrite "*cond_sub<mode>_const_strict" - [(set (match_operand:SVE_FULL_F 0 "register_operand") - (unspec:SVE_FULL_F - [(match_operand:<VPRED> 1 "register_operand") - (unspec:SVE_FULL_F + [(set (match_operand:SVE_F 0 "register_operand") + (unspec:SVE_F + [(match_operand:<VPRED> 1 "aarch64_predicate_operand") + (unspec:SVE_F [(match_dup 1) (const_int SVE_STRICT_GP) - (match_operand:SVE_FULL_F 2 "aarch64_sve_float_arith_immediate") - (match_operand:SVE_FULL_F 3 "register_operand")] + (match_operand:SVE_F 2 "aarch64_sve_float_arith_immediate") + (match_operand:SVE_F 3 "register_operand")] UNSPEC_COND_FSUB) - (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero")] + (match_operand:SVE_F 4 "aarch64_simd_reg_or_zero")] UNSPEC_SEL))] "TARGET_SVE && !rtx_equal_p (operands[3], operands[4])" {@ [ cons: =0 , 1 , 3 , 4 ] @@ -6913,7 +6916,7 @@ ;; Predicate AND. We can reuse one of the inputs as the GP. ;; Doubling the second operand is the preferred implementation ;; of the MOV alias, so we use that instead of %1/z, %1, %2. -(define_insn "and<mode>3" +(define_insn "@and<mode>3" [(set (match_operand:PRED_ALL 0 "register_operand") (and:PRED_ALL (match_operand:PRED_ALL 1 "register_operand") (match_operand:PRED_ALL 2 "register_operand")))] @@ -7595,29 +7598,29 @@ ;; Unpredicated floating-point ternary operations. (define_expand "<optab><mode>4" - [(set (match_operand:SVE_FULL_F_B16B16 0 "register_operand") - (unspec:SVE_FULL_F_B16B16 + [(set (match_operand:SVE_F_B16B16 0 "register_operand") + (unspec:SVE_F_B16B16 [(match_dup 4) - (const_int SVE_RELAXED_GP) - (match_operand:SVE_FULL_F_B16B16 1 "register_operand") - (match_operand:SVE_FULL_F_B16B16 2 "register_operand") - (match_operand:SVE_FULL_F_B16B16 3 "register_operand")] + (match_dup 5) + (match_operand:SVE_F_B16B16 1 "register_operand") + (match_operand:SVE_F_B16B16 2 "register_operand") + (match_operand:SVE_F_B16B16 3 "register_operand")] SVE_COND_FP_TERNARY))] "TARGET_SVE && (<supports_bf16> || !<is_bf16>)" { - operands[4] = aarch64_ptrue_reg (<VPRED>mode); + operands[4] = aarch64_sve_fp_pred (<MODE>mode, &operands[5]); } ) ;; Predicated floating-point ternary operations. (define_insn "@aarch64_pred_<optab><mode>" - [(set (match_operand:SVE_FULL_F_B16B16 0 "register_operand") - (unspec:SVE_FULL_F_B16B16 - [(match_operand:<VPRED> 1 "register_operand") + [(set (match_operand:SVE_F_B16B16 0 "register_operand") + (unspec:SVE_F_B16B16 + [(match_operand:<VPRED> 1 "aarch64_predicate_operand") (match_operand:SI 5 "aarch64_sve_gp_strictness") - (match_operand:SVE_FULL_F_B16B16 2 "register_operand") - (match_operand:SVE_FULL_F_B16B16 3 "register_operand") - (match_operand:SVE_FULL_F_B16B16 4 "register_operand")] + (match_operand:SVE_F_B16B16 2 "register_operand") + (match_operand:SVE_F_B16B16 3 "register_operand") + (match_operand:SVE_F_B16B16 4 "register_operand")] SVE_COND_FP_TERNARY))] "TARGET_SVE && (<supports_bf16> || !<is_bf16>)" {@ [ cons: =0 , 1 , %2 , 3 , 4 ; attrs: movprfx , is_rev ] @@ -7631,17 +7634,17 @@ ;; Predicated floating-point ternary operations with merging. (define_expand "@cond_<optab><mode>" - [(set (match_operand:SVE_FULL_F_B16B16 0 "register_operand") - (unspec:SVE_FULL_F_B16B16 + [(set (match_operand:SVE_F_B16B16 0 "register_operand") + (unspec:SVE_F_B16B16 [(match_operand:<VPRED> 1 "register_operand") - (unspec:SVE_FULL_F_B16B16 + (unspec:SVE_F_B16B16 [(match_dup 1) (const_int SVE_STRICT_GP) - (match_operand:SVE_FULL_F_B16B16 2 "register_operand") - (match_operand:SVE_FULL_F_B16B16 3 "register_operand") - (match_operand:SVE_FULL_F_B16B16 4 "register_operand")] + (match_operand:SVE_F_B16B16 2 "register_operand") + (match_operand:SVE_F_B16B16 3 "register_operand") + (match_operand:SVE_F_B16B16 4 "register_operand")] SVE_COND_FP_TERNARY) - (match_operand:SVE_FULL_F_B16B16 5 "aarch64_simd_reg_or_zero")] + (match_operand:SVE_F_B16B16 5 "aarch64_simd_reg_or_zero")] UNSPEC_SEL))] "TARGET_SVE && (<supports_bf16> || !<is_bf16>)" { @@ -7649,20 +7652,22 @@ second of the two. */ if (rtx_equal_p (operands[3], operands[5])) std::swap (operands[2], operands[3]); + + operands[1] = aarch64_sve_emit_masked_fp_pred (<MODE>mode, operands[1]); }) ;; Predicated floating-point ternary operations, merging with the ;; first input. (define_insn_and_rewrite "*cond_<optab><mode>_2_relaxed" - [(set (match_operand:SVE_FULL_F 0 "register_operand") - (unspec:SVE_FULL_F + [(set (match_operand:SVE_F 0 "register_operand") + (unspec:SVE_F [(match_operand:<VPRED> 1 "register_operand") - (unspec:SVE_FULL_F + (unspec:SVE_F [(match_operand 5) (const_int SVE_RELAXED_GP) - (match_operand:SVE_FULL_F 2 "register_operand") - (match_operand:SVE_FULL_F 3 "register_operand") - (match_operand:SVE_FULL_F 4 "register_operand")] + (match_operand:SVE_F 2 "register_operand") + (match_operand:SVE_F 3 "register_operand") + (match_operand:SVE_F 4 "register_operand")] SVE_COND_FP_TERNARY) (match_dup 2)] UNSPEC_SEL))] @@ -7678,15 +7683,15 @@ ) (define_insn "*cond_<optab><mode>_2_strict" - [(set (match_operand:SVE_FULL_F 0 "register_operand") - (unspec:SVE_FULL_F - [(match_operand:<VPRED> 1 "register_operand") - (unspec:SVE_FULL_F + [(set (match_operand:SVE_F 0 "register_operand") + (unspec:SVE_F + [(match_operand:<VPRED> 1 "aarch64_predicate_operand") + (unspec:SVE_F [(match_dup 1) (const_int SVE_STRICT_GP) - (match_operand:SVE_FULL_F 2 "register_operand") - (match_operand:SVE_FULL_F 3 "register_operand") - (match_operand:SVE_FULL_F 4 "register_operand")] + (match_operand:SVE_F 2 "register_operand") + (match_operand:SVE_F 3 "register_operand") + (match_operand:SVE_F 4 "register_operand")] SVE_COND_FP_TERNARY) (match_dup 2)] UNSPEC_SEL))] @@ -7700,15 +7705,15 @@ ;; Predicated floating-point ternary operations, merging with the ;; third input. (define_insn_and_rewrite "*cond_<optab><mode>_4_relaxed" - [(set (match_operand:SVE_FULL_F_B16B16 0 "register_operand") - (unspec:SVE_FULL_F_B16B16 + [(set (match_operand:SVE_F_B16B16 0 "register_operand") + (unspec:SVE_F_B16B16 [(match_operand:<VPRED> 1 "register_operand") - (unspec:SVE_FULL_F_B16B16 + (unspec:SVE_F_B16B16 [(match_operand 5) (const_int SVE_RELAXED_GP) - (match_operand:SVE_FULL_F_B16B16 2 "register_operand") - (match_operand:SVE_FULL_F_B16B16 3 "register_operand") - (match_operand:SVE_FULL_F_B16B16 4 "register_operand")] + (match_operand:SVE_F_B16B16 2 "register_operand") + (match_operand:SVE_F_B16B16 3 "register_operand") + (match_operand:SVE_F_B16B16 4 "register_operand")] SVE_COND_FP_TERNARY) (match_dup 4)] UNSPEC_SEL))] @@ -7724,15 +7729,15 @@ ) (define_insn "*cond_<optab><mode>_4_strict" - [(set (match_operand:SVE_FULL_F_B16B16 0 "register_operand") - (unspec:SVE_FULL_F_B16B16 - [(match_operand:<VPRED> 1 "register_operand") - (unspec:SVE_FULL_F_B16B16 + [(set (match_operand:SVE_F_B16B16 0 "register_operand") + (unspec:SVE_F_B16B16 + [(match_operand:<VPRED> 1 "aarch64_predicate_operand") + (unspec:SVE_F_B16B16 [(match_dup 1) (const_int SVE_STRICT_GP) - (match_operand:SVE_FULL_F_B16B16 2 "register_operand") - (match_operand:SVE_FULL_F_B16B16 3 "register_operand") - (match_operand:SVE_FULL_F_B16B16 4 "register_operand")] + (match_operand:SVE_F_B16B16 2 "register_operand") + (match_operand:SVE_F_B16B16 3 "register_operand") + (match_operand:SVE_F_B16B16 4 "register_operand")] SVE_COND_FP_TERNARY) (match_dup 4)] UNSPEC_SEL))] @@ -7746,17 +7751,17 @@ ;; Predicated floating-point ternary operations, merging with an ;; independent value. (define_insn_and_rewrite "*cond_<optab><mode>_any_relaxed" - [(set (match_operand:SVE_FULL_F_B16B16 0 "register_operand") - (unspec:SVE_FULL_F_B16B16 + [(set (match_operand:SVE_F_B16B16 0 "register_operand") + (unspec:SVE_F_B16B16 [(match_operand:<VPRED> 1 "register_operand") - (unspec:SVE_FULL_F_B16B16 + (unspec:SVE_F_B16B16 [(match_operand 6) (const_int SVE_RELAXED_GP) - (match_operand:SVE_FULL_F_B16B16 2 "register_operand") - (match_operand:SVE_FULL_F_B16B16 3 "register_operand") - (match_operand:SVE_FULL_F_B16B16 4 "register_operand")] + (match_operand:SVE_F_B16B16 2 "register_operand") + (match_operand:SVE_F_B16B16 3 "register_operand") + (match_operand:SVE_F_B16B16 4 "register_operand")] SVE_COND_FP_TERNARY) - (match_operand:SVE_FULL_F_B16B16 5 "aarch64_simd_reg_or_zero")] + (match_operand:SVE_F_B16B16 5 "aarch64_simd_reg_or_zero")] UNSPEC_SEL))] "TARGET_SVE && (<supports_bf16> || !<is_bf16>) @@ -7792,17 +7797,17 @@ ) (define_insn_and_rewrite "*cond_<optab><mode>_any_strict" - [(set (match_operand:SVE_FULL_F_B16B16 0 "register_operand") - (unspec:SVE_FULL_F_B16B16 - [(match_operand:<VPRED> 1 "register_operand") - (unspec:SVE_FULL_F_B16B16 + [(set (match_operand:SVE_F_B16B16 0 "register_operand") + (unspec:SVE_F_B16B16 + [(match_operand:<VPRED> 1 "aarch64_predicate_operand") + (unspec:SVE_F_B16B16 [(match_dup 1) (const_int SVE_STRICT_GP) - (match_operand:SVE_FULL_F_B16B16 2 "register_operand") - (match_operand:SVE_FULL_F_B16B16 3 "register_operand") - (match_operand:SVE_FULL_F_B16B16 4 "register_operand")] + (match_operand:SVE_F_B16B16 2 "register_operand") + (match_operand:SVE_F_B16B16 3 "register_operand") + (match_operand:SVE_F_B16B16 4 "register_operand")] SVE_COND_FP_TERNARY) - (match_operand:SVE_FULL_F_B16B16 5 "aarch64_simd_reg_or_zero")] + (match_operand:SVE_F_B16B16 5 "aarch64_simd_reg_or_zero")] UNSPEC_SEL))] "TARGET_SVE && (<supports_bf16> || !<is_bf16>) @@ -8201,20 +8206,23 @@ ;; ;; For unpacked vectors, it doesn't really matter whether SEL uses the ;; the container size or the element size. If SEL used the container size, -;; it would ignore undefined bits of the predicate but would copy the -;; upper (undefined) bits of each container along with the defined bits. -;; If SEL used the element size, it would use undefined bits of the predicate -;; to select between undefined elements in each input vector. Thus the only -;; difference is whether the undefined bits in a container always come from -;; the same input as the defined bits, or whether the choice can vary -;; independently of the defined bits. +;; it would would copy the upper (undefined) bits of each container along +;; with the corresponding defined bits. If SEL used the element size, +;; it would use separate predicate bits to select between the undefined +;; elements in each input vector; these seperate predicate bits might +;; themselves be undefined, depending on the mode of the predicate. +;; +;; Thus the only difference is whether the undefined bits in a container +;; always come from the same input as the defined bits, or whether the +;; choice can vary independently of the defined bits. ;; ;; For the other instructions, using the element size is more natural, ;; so we do that for SEL as well. +;; (define_insn "*vcond_mask_<mode><vpred>" [(set (match_operand:SVE_ALL 0 "register_operand") (unspec:SVE_ALL - [(match_operand:<VPRED> 3 "register_operand") + [(match_operand:<VPRED> 3 "aarch64_predicate_operand") (match_operand:SVE_ALL 1 "aarch64_sve_reg_or_dup_imm") (match_operand:SVE_ALL 2 "aarch64_simd_reg_or_zero")] UNSPEC_SEL))] diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc index cb1699a..f4a2062 100644 --- a/gcc/config/aarch64/aarch64.cc +++ b/gcc/config/aarch64/aarch64.cc @@ -3933,6 +3933,33 @@ aarch64_sve_fp_pred (machine_mode data_mode, rtx *strictness) return aarch64_ptrue_reg (aarch64_sve_pred_mode (data_mode)); } +/* PRED is a predicate that governs an operation on DATA_MODE. If DATA_MODE + is a partial vector mode, and if exceptions must be suppressed for its + undefined elements, convert PRED from a container-level predicate to + an element-level predicate and ensure that the undefined elements + are inactive. Make no changes otherwise. + + Return the resultant predicate. */ +rtx +aarch64_sve_emit_masked_fp_pred (machine_mode data_mode, rtx pred) +{ + unsigned int vec_flags = aarch64_classify_vector_mode (data_mode); + if (flag_trapping_math && (vec_flags & VEC_PARTIAL)) + { + /* Generate an element-level mask. */ + rtx mask = aarch64_sve_packed_pred (data_mode); + machine_mode pmode = GET_MODE (mask); + + /* Apply the existing predicate. */ + rtx dst = gen_reg_rtx (pmode); + emit_insn (gen_and3 (pmode, dst, mask, + gen_lowpart (pmode, pred))); + return dst; + } + + return pred; +} + /* Emit a comparison CMP between OP0 and OP1, both of which have mode DATA_MODE, and return the result in a predicate of mode PRED_MODE. Use TARGET as the target register if nonnull and convenient. */ @@ -17166,8 +17193,8 @@ aarch64_ld234_st234_vectors (vect_cost_for_stmt kind, stmt_vec_info stmt_info, && STMT_VINFO_DATA_REF (stmt_info)) { stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info); - if (stmt_info - && vect_mem_access_type (stmt_info, node) == VMAT_LOAD_STORE_LANES) + if (node + && SLP_TREE_MEMORY_ACCESS_TYPE (node) == VMAT_LOAD_STORE_LANES) return DR_GROUP_SIZE (stmt_info); } return 0; @@ -17438,8 +17465,9 @@ aarch64_detect_vector_stmt_subtype (vec_info *vinfo, vect_cost_for_stmt kind, for each element. We therefore need to divide the full-instruction cost by the number of elements in the vector. */ if (kind == scalar_load + && node && sve_costs - && vect_mem_access_type (stmt_info, node) == VMAT_GATHER_SCATTER) + && SLP_TREE_MEMORY_ACCESS_TYPE (node) == VMAT_GATHER_SCATTER) { unsigned int nunits = vect_nunits_for_cost (vectype); /* Test for VNx2 modes, which have 64-bit containers. */ @@ -17451,8 +17479,9 @@ aarch64_detect_vector_stmt_subtype (vec_info *vinfo, vect_cost_for_stmt kind, /* Detect cases in which a scalar_store is really storing one element in a scatter operation. */ if (kind == scalar_store + && node && sve_costs - && vect_mem_access_type (stmt_info, node) == VMAT_GATHER_SCATTER) + && SLP_TREE_MEMORY_ACCESS_TYPE (node) == VMAT_GATHER_SCATTER) return sve_costs->scatter_store_elt_cost; /* Detect cases in which vec_to_scalar represents an in-loop reduction. */ @@ -17708,7 +17737,7 @@ aarch64_vector_costs::count_ops (unsigned int count, vect_cost_for_stmt kind, if (stmt_info && kind == vec_to_scalar && (m_vec_flags & VEC_ADVSIMD) - && vect_mem_access_type (stmt_info, node) == VMAT_GATHER_SCATTER) + && SLP_TREE_MEMORY_ACCESS_TYPE (node) == VMAT_GATHER_SCATTER) { auto dr = STMT_VINFO_DATA_REF (stmt_info); tree dr_ref = DR_REF (dr); @@ -17823,7 +17852,7 @@ aarch64_vector_costs::count_ops (unsigned int count, vect_cost_for_stmt kind, if (stmt_info && sve_issue && (kind == scalar_load || kind == scalar_store) - && vect_mem_access_type (stmt_info, node) == VMAT_GATHER_SCATTER) + && SLP_TREE_MEMORY_ACCESS_TYPE (node) == VMAT_GATHER_SCATTER) { unsigned int pairs = CEIL (count, 2); ops->pred_ops += sve_issue->gather_scatter_pair_pred_ops * pairs; @@ -17978,9 +18007,10 @@ aarch64_vector_costs::add_stmt_cost (int count, vect_cost_for_stmt kind, /* Check if we've seen an SVE gather/scatter operation and which size. */ if (kind == scalar_load + && node && vectype && aarch64_sve_mode_p (TYPE_MODE (vectype)) - && vect_mem_access_type (stmt_info, node) == VMAT_GATHER_SCATTER) + && SLP_TREE_MEMORY_ACCESS_TYPE (node) == VMAT_GATHER_SCATTER) { const sve_vec_cost *sve_costs = aarch64_tune_params.vec_costs->sve; if (sve_costs) @@ -20482,6 +20512,8 @@ aarch64_compare_version_priority (tree decl1, tree decl2) unsigned long _size; // Size of the struct, so it can grow. unsigned long _hwcap; unsigned long _hwcap2; + unsigned long _hwcap3; + unsigned long _hwcap4; } */ @@ -20498,14 +20530,24 @@ build_ifunc_arg_type () tree field3 = build_decl (UNKNOWN_LOCATION, FIELD_DECL, get_identifier ("_hwcap2"), long_unsigned_type_node); + tree field4 = build_decl (UNKNOWN_LOCATION, FIELD_DECL, + get_identifier ("_hwcap3"), + long_unsigned_type_node); + tree field5 = build_decl (UNKNOWN_LOCATION, FIELD_DECL, + get_identifier ("_hwcap4"), + long_unsigned_type_node); DECL_FIELD_CONTEXT (field1) = ifunc_arg_type; DECL_FIELD_CONTEXT (field2) = ifunc_arg_type; DECL_FIELD_CONTEXT (field3) = ifunc_arg_type; + DECL_FIELD_CONTEXT (field4) = ifunc_arg_type; + DECL_FIELD_CONTEXT (field5) = ifunc_arg_type; TYPE_FIELDS (ifunc_arg_type) = field1; DECL_CHAIN (field1) = field2; DECL_CHAIN (field2) = field3; + DECL_CHAIN (field3) = field4; + DECL_CHAIN (field4) = field5; layout_type (ifunc_arg_type); @@ -31964,9 +32006,43 @@ aarch64_test_sysreg_encoding_clashes (void) static void aarch64_test_sve_folding () { + aarch64_target_switcher switcher (AARCH64_FL_SVE); + tree res = fold_unary (BIT_NOT_EXPR, ssizetype, ssize_int (poly_int64 (1, 1))); ASSERT_TRUE (operand_equal_p (res, ssize_int (poly_int64 (-2, -1)))); + + auto build_v16bi = [](bool a, bool b) + { + rtx_vector_builder builder (VNx16BImode, 2, 1); + builder.quick_push (a ? const1_rtx : const0_rtx); + builder.quick_push (b ? const1_rtx : const0_rtx); + return builder.build (); + }; + rtx v16bi_10 = build_v16bi (1, 0); + rtx v16bi_01 = build_v16bi (0, 1); + + for (auto mode : { VNx8BImode, VNx4BImode, VNx2BImode }) + { + rtx reg = gen_rtx_REG (mode, LAST_VIRTUAL_REGISTER + 1); + rtx subreg = lowpart_subreg (VNx16BImode, reg, mode); + rtx and1 = simplify_gen_binary (AND, VNx16BImode, subreg, v16bi_10); + ASSERT_EQ (lowpart_subreg (mode, and1, VNx16BImode), reg); + rtx and0 = simplify_gen_binary (AND, VNx16BImode, subreg, v16bi_01); + ASSERT_EQ (lowpart_subreg (mode, and0, VNx16BImode), CONST0_RTX (mode)); + + rtx ior1 = simplify_gen_binary (IOR, VNx16BImode, subreg, v16bi_10); + ASSERT_EQ (lowpart_subreg (mode, ior1, VNx16BImode), CONSTM1_RTX (mode)); + rtx ior0 = simplify_gen_binary (IOR, VNx16BImode, subreg, v16bi_01); + ASSERT_EQ (lowpart_subreg (mode, ior0, VNx16BImode), reg); + + rtx xor1 = simplify_gen_binary (XOR, VNx16BImode, subreg, v16bi_10); + ASSERT_RTX_EQ (lowpart_subreg (mode, xor1, VNx16BImode), + lowpart_subreg (mode, gen_rtx_NOT (VNx16BImode, subreg), + VNx16BImode)); + rtx xor0 = simplify_gen_binary (XOR, VNx16BImode, subreg, v16bi_01); + ASSERT_EQ (lowpart_subreg (mode, xor0, VNx16BImode), reg); + } } /* Run all target-specific selftests. */ diff --git a/gcc/config/aarch64/tuning_models/generic_armv9_a.h b/gcc/config/aarch64/tuning_models/generic_armv9_a.h index f76a250..9eb1a20 100644 --- a/gcc/config/aarch64/tuning_models/generic_armv9_a.h +++ b/gcc/config/aarch64/tuning_models/generic_armv9_a.h @@ -26,7 +26,7 @@ static const struct cpu_addrcost_table generic_armv9_a_addrcost_table = { { - 1, /* hi */ + 0, /* hi */ 0, /* si */ 0, /* di */ 1, /* ti */ diff --git a/gcc/config/avr/avr-passes.cc b/gcc/config/avr/avr-passes.cc index 6a88a27..69df6d2 100644 --- a/gcc/config/avr/avr-passes.cc +++ b/gcc/config/avr/avr-passes.cc @@ -4843,6 +4843,137 @@ avr_pass_fuse_add::execute1 (function *func) ////////////////////////////////////////////////////////////////////////////// +// Fuse 2 move insns after combine. + +static const pass_data avr_pass_data_2moves = +{ + RTL_PASS, // type + "", // name (will be patched) + OPTGROUP_NONE, // optinfo_flags + TV_DF_SCAN, // tv_id + 0, // properties_required + 0, // properties_provided + 0, // properties_destroyed + 0, // todo_flags_start + 0 // todo_flags_finish +}; + +class avr_pass_2moves : public rtl_opt_pass +{ +public: + avr_pass_2moves (gcc::context *ctxt, const char *name) + : rtl_opt_pass (avr_pass_data_2moves, ctxt) + { + this->name = name; + } + + unsigned int execute (function *func) final override + { + if (optimize && avropt_fuse_move2) + { + bool changed = false; + basic_block bb; + + FOR_EACH_BB_FN (bb, func) + { + changed |= optimize_2moves_bb (bb); + } + + if (changed) + { + df_note_add_problem (); + df_analyze (); + } + } + + return 0; + } + + bool optimize_2moves (rtx_insn *, rtx_insn *); + bool optimize_2moves_bb (basic_block); +}; // avr_pass_2moves + +bool +avr_pass_2moves::optimize_2moves_bb (basic_block bb) +{ + bool changed = false; + rtx_insn *insn1 = nullptr; + rtx_insn *insn2 = nullptr; + rtx_insn *curr; + + FOR_BB_INSNS (bb, curr) + { + if (insn1 && INSN_P (insn1) + && insn2 && INSN_P (insn2)) + changed |= optimize_2moves (insn1, insn2); + + insn1 = insn2; + insn2 = curr; + } + + return changed; +} + +bool +avr_pass_2moves::optimize_2moves (rtx_insn *insn1, rtx_insn *insn2) +{ + bool good = false; + bool bad = false; + rtx set1, dest1, src1; + rtx set2, dest2, src2; + + if ((set1 = single_set (insn1)) + && (set2 = single_set (insn2)) + && (src1 = SET_SRC (set1)) + && REG_P (src2 = SET_SRC (set2)) + && REG_P (dest1 = SET_DEST (set1)) + && REG_P (dest2 = SET_DEST (set2)) + && rtx_equal_p (dest1, src2) + // Now we have: + // insn1: dest1 = src1 + // insn2: dest2 = dest1 + && REGNO (dest1) >= FIRST_PSEUDO_REGISTER + // Paranoia. + && GET_CODE (PATTERN (insn1)) != PARALLEL + && GET_CODE (PATTERN (insn2)) != PARALLEL + && (rtx_equal_p (dest2, src1) + || !reg_overlap_mentioned_p (dest2, src1))) + { + avr_dump ("\n;; Found 2moves:\n%r\n%r\n", insn1, insn2); + avr_dump (";; reg %d: insn uses uids:", REGNO (dest1)); + + // Go check that dest1 is used exactly once, namely by insn2. + + df_ref use = DF_REG_USE_CHAIN (REGNO (dest1)); + for (; use; use = DF_REF_NEXT_REG (use)) + { + rtx_insn *user = DF_REF_INSN (use); + avr_dump (" %d", INSN_UID (user)); + good |= INSN_UID (user) == INSN_UID (insn2); + bad |= INSN_UID (user) != INSN_UID (insn2); + } + avr_dump (".\n"); + + if (good && !bad + // Propagate src1 to insn2: + // insn1: # Deleted + // insn2: dest2 = src1 + && validate_change (insn2, &SET_SRC (set2), src1, false)) + { + SET_INSN_DELETED (insn1); + return true; + } + } + + if (good && !bad) + avr_dump (";; Failed\n"); + + return false; +} + + + +////////////////////////////////////////////////////////////////////////////// // Split insns with nonzero_bits() after combine. static const pass_data avr_pass_data_split_nzb = @@ -5704,6 +5835,14 @@ make_avr_pass_casesi (gcc::context *ctxt) return new avr_pass_casesi (ctxt, "avr-casesi"); } +// Optimize 2 consecutive moves after combine. + +rtl_opt_pass * +make_avr_pass_2moves (gcc::context *ctxt) +{ + return new avr_pass_2moves (ctxt, "avr-2moves"); +} + rtl_opt_pass * make_avr_pass_split_nzb (gcc::context *ctxt) { diff --git a/gcc/config/avr/avr-passes.def b/gcc/config/avr/avr-passes.def index eb60a93..d668c7f 100644 --- a/gcc/config/avr/avr-passes.def +++ b/gcc/config/avr/avr-passes.def @@ -74,6 +74,14 @@ INSERT_PASS_BEFORE (pass_free_cfg, 1, avr_pass_recompute_notes); INSERT_PASS_AFTER (pass_expand, 1, avr_pass_casesi); +/* Insn combine may come up with superfluous reg-reg moves, where the combine + people say that these are no problem since reg-alloc is supposed to optimize + them. The issue is that the lower-subreg pass sitting between combine and + reg-alloc may split such moves, coming up with a zoo of subregs which are + only handled poorly by the register allocator. */ + +INSERT_PASS_AFTER (pass_combine, 1, avr_pass_2moves); + /* Some combine insns have nonzero_bits() in their condition, though insns should not use such stuff in their condition. Therefore, we split such insn into something without nonzero_bits() in their condition right after diff --git a/gcc/config/avr/avr-protos.h b/gcc/config/avr/avr-protos.h index ca30136..37911e7 100644 --- a/gcc/config/avr/avr-protos.h +++ b/gcc/config/avr/avr-protos.h @@ -208,6 +208,7 @@ extern rtl_opt_pass *make_avr_pass_casesi (gcc::context *); extern rtl_opt_pass *make_avr_pass_ifelse (gcc::context *); extern rtl_opt_pass *make_avr_pass_split_nzb (gcc::context *); extern rtl_opt_pass *make_avr_pass_split_after_peephole2 (gcc::context *); +extern rtl_opt_pass *make_avr_pass_2moves (gcc::context *); #ifdef RTX_CODE extern bool avr_casei_sequence_check_operands (rtx *xop); extern bool avr_split_fake_addressing_move (rtx_insn *insn, rtx *operands); diff --git a/gcc/config/avr/avr.cc b/gcc/config/avr/avr.cc index c469297..1fb59b6 100644 --- a/gcc/config/avr/avr.cc +++ b/gcc/config/avr/avr.cc @@ -14418,6 +14418,13 @@ avr_output_addr_vec (rtx_insn *labl, rtx table) // Output the label that precedes the table. ASM_OUTPUT_ALIGN (stream, 1); + + char s_labl[40]; + targetm.asm_out.generate_internal_label (s_labl, "L", + CODE_LABEL_NUMBER (labl)); + ASM_OUTPUT_TYPE_DIRECTIVE (stream, s_labl, + AVR_HAVE_JMP_CALL ? "object" : "function"); + targetm.asm_out.internal_label (stream, "L", CODE_LABEL_NUMBER (labl)); // Output the table's content. @@ -14984,10 +14991,11 @@ avr_addr_space_convert (rtx src, tree type_old, tree type_new) /* Linearize memory: RAM has bit 23 set. When as_new = __flashx then this is basically UB since __flashx mistreats RAM addresses, but there - is no way to bail out. (Though -Waddr-space-convert will tell.) */ + is no way to bail out. (Though -Waddr-space-convert will tell.) + ...but PR121277 is confusing, in particular when NULL is coming in. */ int msb = ADDR_SPACE_GENERIC_P (as_old) - ? 0x80 + ? as_new == ADDR_SPACE_MEMX ? 0x80 : 0x00 : avr_addrspace[as_old].segment; src = force_reg (Pmode, src); @@ -15085,10 +15093,16 @@ avr_convert_to_type (tree type, tree expr) const char *name_old = avr_addrspace[as_old].name; const char *name_new = avr_addrspace[as_new].name; - warning (OPT_Waddr_space_convert, - "conversion from address space %qs to address space %qs", - ADDR_SPACE_GENERIC_P (as_old) ? "generic" : name_old, - ADDR_SPACE_GENERIC_P (as_new) ? "generic" : name_new); + // Be relaxed when NULL is used, and when 0x0 stands for + // address 0x0. + bool nowarn = (expr == null_pointer_node + && (as_new == ADDR_SPACE_FLASHX + || as_new == ADDR_SPACE_FLASH)); + if (!nowarn) + warning (OPT_Waddr_space_convert, + "conversion from address space %qs to address space %qs", + ADDR_SPACE_GENERIC_P (as_old) ? "generic" : name_old, + ADDR_SPACE_GENERIC_P (as_new) ? "generic" : name_new); return fold_build1_loc (loc, ADDR_SPACE_CONVERT_EXPR, type, expr); } diff --git a/gcc/config/avr/avr.opt b/gcc/config/avr/avr.opt index 9883119..7f6f18c 100644 --- a/gcc/config/avr/avr.opt +++ b/gcc/config/avr/avr.opt @@ -164,6 +164,10 @@ mfuse-move= Target Joined RejectNegative UInteger Var(avropt_fuse_move) Init(0) Optimization IntegerRange(0, 23) -mfuse-move=<0,23> Optimization. Run a post-reload pass that tweaks move instructions. +mfuse-move2 +Target Var(avropt_fuse_move2) Init(0) Optimization +Optimization. Fuse some move insns after insn combine. + mabsdata Target Mask(ABSDATA) Assume that all data in static storage can be accessed by LDS / STS instructions. This option is only useful for reduced Tiny devices like ATtiny40. diff --git a/gcc/config/avr/avr.opt.urls b/gcc/config/avr/avr.opt.urls index 662fdee..87c26b2 100644 --- a/gcc/config/avr/avr.opt.urls +++ b/gcc/config/avr/avr.opt.urls @@ -92,6 +92,9 @@ UrlSuffix(gcc/AVR-Options.html#index-mfuse-move) mfuse-move= UrlSuffix(gcc/AVR-Options.html#index-mfuse-move) +mfuse-move2 +UrlSuffix(gcc/AVR-Options.html#index-mfuse-move2) + mabsdata UrlSuffix(gcc/AVR-Options.html#index-mabsdata) diff --git a/gcc/config/gcn/gcn-opts.h b/gcc/config/gcn/gcn-opts.h index fe68678..0287400 100644 --- a/gcc/config/gcn/gcn-opts.h +++ b/gcc/config/gcn/gcn-opts.h @@ -92,6 +92,8 @@ enum hsaco_attr_type /* Whether to use the 'globally coherent' (glc) or the 'scope' (sc0) flag for non-scalar memory operations. The string starts on purpose with a space. Note: for scalar memory operations (i.e. 's_...'), 'glc' is still used. + Note: on atomics, glc/sc0 denotes whether the pre-op operation should + be used. CDNA3 also uses 'nt' instead of 'slc' and 'sc1' instead of 'scc'; however, there is no non-scalar user so far. */ #define TARGET_GLC_NAME (TARGET_CDNA3 ? " sc0" : " glc") diff --git a/gcc/config/gcn/gcn-valu.md b/gcc/config/gcn/gcn-valu.md index 0994329..a34d2e3 100644 --- a/gcc/config/gcn/gcn-valu.md +++ b/gcc/config/gcn/gcn-valu.md @@ -3938,6 +3938,7 @@ v_cmpx%E1\t%2, %3 v_cmpx%E1\t%2, %3" [(set_attr "type" "vopc,vopc,vopc,vopc,vop3a,vop3a,vopc,vopc") + (set_attr "vcmp" "vcmp,vcmp,vcmpx,vcmpx,vcmp,vcmp,vcmpx,vcmpx") (set_attr "length" "4,8,4,8,8,8,4,8") (set_attr "rdna" "*,*,no,no,*,*,yes,yes")]) @@ -3992,6 +3993,7 @@ v_cmpx%E1\t%2, %3 v_cmpx%E1\t%2, %3" [(set_attr "type" "vopc,vopc,vopc,vopc,vop3a,vop3a,vopc,vopc") + (set_attr "vcmp" "vcmp,vcmp,vcmpx,vcmpx,vcmp,vcmp,vcmpx,vcmpx") (set_attr "length" "4,8,4,8,8,8,4,8") (set_attr "rdna" "*,*,no,no,*,*,yes,yes")]) @@ -4050,6 +4052,7 @@ v_cmpx%E1\t%2, %3 v_cmpx%E1\t%2, %3" [(set_attr "type" "vopc,vopc,vopc,vopc,vop3a,vopc,vopc") + (set_attr "vcmp" "vcmp,vcmp,vcmpx,vcmpx,vcmp,vcmpx,vcmpx") (set_attr "length" "4,8,4,8,8,4,8") (set_attr "rdna" "*,*,no,no,*,yes,yes")]) @@ -4073,6 +4076,7 @@ v_cmpx%E1\t%2, %3 v_cmpx%E1\t%2, %3" [(set_attr "type" "vopc,vopc,vopc,vopc,vop3a,vopc,vopc") + (set_attr "vcmp" "vcmp,vcmp,vcmpx,vcmpx,vcmp,vcmpx,vcmpx") (set_attr "length" "4,8,4,8,8,4,8") (set_attr "rdna" "*,*,no,no,*,yes,yes")]) diff --git a/gcc/config/gcn/gcn.cc b/gcc/config/gcn/gcn.cc index 8959118..5ffeb23 100644 --- a/gcc/config/gcn/gcn.cc +++ b/gcc/config/gcn/gcn.cc @@ -54,6 +54,7 @@ #include "gimple.h" #include "cgraph.h" #include "case-cfn-macros.h" +#include "opts.h" /* This file should be included last. */ #include "target-def.h" @@ -183,6 +184,11 @@ gcn_option_override (void) if (flag_sram_ecc == HSACO_ATTR_DEFAULT) flag_sram_ecc = gcn_devices[gcn_arch].sramecc_default; + + /* TODO: This seems to produce tighter loops, but the testsuites expects it + to be set to '2', so I'll leave it default for now. + SET_OPTION_IF_UNSET (&global_options, &global_options_set, + param_vect_partial_vector_usage, 1); */ } /* }}} */ @@ -5789,45 +5795,19 @@ gcn_libc_has_function (enum function_class fn_class, return bsd_libc_has_function (fn_class, type); } -/* }}} */ -/* {{{ md_reorg pass. */ - -/* Identify V_CMPX from the "type" attribute; - note: this will also match 'v_cmp %E1 vcc'. */ +/* Implement TARGET_VECTORIZE_PREFER_GATHER_SCATTER. */ static bool -gcn_cmpx_insn_p (attr_type type) +gcn_prefer_gather_scatter (machine_mode ARG_UNUSED (mode), + int ARG_UNUSED (scale), + unsigned int ARG_UNUSED (group_size)) { - switch (type) - { - case TYPE_VOPC: - return true; - case TYPE_MUBUF: - case TYPE_MTBUF: - case TYPE_FLAT: - case TYPE_VOP3P_MAI: - case TYPE_UNKNOWN: - case TYPE_SOP1: - case TYPE_SOP2: - case TYPE_SOPK: - case TYPE_SOPC: - case TYPE_SOPP: - case TYPE_SMEM: - case TYPE_DS: - case TYPE_VOP2: - case TYPE_VOP1: - case TYPE_VOP3A: - case TYPE_VOP3B: - case TYPE_VOP_SDWA: - case TYPE_VOP_DPP: - case TYPE_MULT: - case TYPE_VMULT: - return false; - } - gcc_unreachable (); - return false; + return true; } +/* }}} */ +/* {{{ md_reorg pass. */ + /* Identify VMEM instructions from their "type" attribute. */ static bool @@ -6356,19 +6336,59 @@ gcn_md_reorg (void) reg_class_contents[(int)VCC_CONDITIONAL_REG]))) nops_rqd = ivccwait - prev_insn->age; + /* NOTE: The following condition for adding wait state exists, but + GCC does not access the special registers using their SGPR#. + Thus, no action is required here. The following wait-state + condition exists at least for VEGA/gfx900+ to CDNA3: + Mixed use of VCC: alias vs. SGPR# - v_readlane, + v_readfirstlane, v_cmp, v_add_*i/u, v_sub_*i/u, v_div_*scale + followed by VALU reads VCC as constant requires 1 wait state. + (As carry-in, it requires none.) + [VCC can be accessed by name or logical SGPR that holds it.] */ + + /* Testing indicates that CDNA3 requires an s_nop between + e.g. 'v_cmp_eq_u64 vcc, v[4:5], v[8:9]' and 'v_mov_b32 v0, vcc_lo'. + Thus: add it between v_cmp writing VCC and VALU read of VCC. */ + if (TARGET_CDNA3_NOPS + && (prev_insn->age + nops_rqd) < 1 + && iunit == UNIT_VECTOR + && (hard_reg_set_intersect_p + (depregs, reg_class_contents[(int)VCC_CONDITIONAL_REG])) + && get_attr_vcmp (prev_insn->insn) == VCMP_VCMP) + nops_rqd = 1 - prev_insn->age; + + /* CDNA3: VALU writes SGPR/VCC: v_readlane, v_readfirstlane, v_cmp, + v_add_*i/u, v_sub_*i/u, v_div_*scale - followed by: + - VALU reads SGPR as constant requires 1 waite state + - VALU reads SGPR as carry-in requires no waite state + - v_readlane/v_writelane reads SGPR as lane select requires 4 wait + states. */ + if (TARGET_CDNA3_NOPS + && (prev_insn->age + nops_rqd) < 4 + && iunit == UNIT_VECTOR + && prev_insn->unit == UNIT_VECTOR + && hard_reg_set_intersect_p + (depregs, reg_class_contents[(int) SGPR_SRC_REGS])) + { + if (get_attr_laneselect (insn) != LANESELECT_NO) + nops_rqd = 4 - prev_insn->age; + else if ((prev_insn->age + nops_rqd) < 1) + nops_rqd = 1 - prev_insn->age; + } + /* CDNA3: v_cmpx followed by - V_readlane, v_readfirstlane, v_writelane requires 4 wait states - VALU reads EXEC as constant requires 2 wait states - other VALU requires no wait state */ if (TARGET_CDNA3_NOPS && (prev_insn->age + nops_rqd) < 4 - && gcn_cmpx_insn_p (prev_insn->type) + && get_attr_vcmp (prev_insn->insn) == VCMP_VCMPX && get_attr_laneselect (insn) != LANESELECT_NO) nops_rqd = 4 - prev_insn->age; else if (TARGET_CDNA3_NOPS && (prev_insn->age + nops_rqd) < 2 && iunit == UNIT_VECTOR - && gcn_cmpx_insn_p (prev_insn->type) + && get_attr_vcmp (prev_insn->insn) == VCMP_VCMPX && TEST_HARD_REG_BIT (ireads, EXECZ_REG)) nops_rqd = 2 - prev_insn->age; @@ -6436,8 +6456,8 @@ gcn_md_reorg (void) } /* Insert the required number of NOPs. */ - for (int i = nops_rqd; i > 0; i--) - emit_insn_after (gen_nop (), last_insn); + if (nops_rqd > 0) + emit_insn_after (gen_nops (GEN_INT (nops_rqd-1)), last_insn); /* Age the previous instructions. We can also ignore writes to registers subsequently overwritten. */ @@ -7283,6 +7303,11 @@ print_operand_address (FILE *file, rtx mem) H - print second part of a multi-reg value (high-part of 2-reg value) J - print third part of a multi-reg value K - print fourth part of a multi-reg value + R Print a scalar register number as an integer. Temporary hack. + V - Print a vector register number as an integer. Temporary hack. + + Additionally, the standard builtin c, n, a, and l exist; see gccint's + "Output Templates and Operand Substitution" for details. */ void @@ -8131,6 +8156,8 @@ gcn_dwarf_register_span (rtx rtl) gcn_vectorize_builtin_vectorized_function #undef TARGET_VECTORIZE_GET_MASK_MODE #define TARGET_VECTORIZE_GET_MASK_MODE gcn_vectorize_get_mask_mode +#undef TARGET_VECTORIZE_PREFER_GATHER_SCATTER +#define TARGET_VECTORIZE_PREFER_GATHER_SCATTER gcn_prefer_gather_scatter #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE gcn_vectorize_preferred_simd_mode #undef TARGET_VECTORIZE_PREFERRED_VECTOR_ALIGNMENT diff --git a/gcc/config/gcn/gcn.md b/gcc/config/gcn/gcn.md index fad42e6..4130cf6 100644 --- a/gcc/config/gcn/gcn.md +++ b/gcc/config/gcn/gcn.md @@ -324,6 +324,11 @@ "store,storex34,load,atomic,atomicwait,cmpswapx2,no" (const_string "no")) +; Identify v_cmp and v_cmpx instructions for "Manually Inserted Wait State" +; handling. + +(define_attr "vcmp" "vcmp,vcmpx,no" (const_string "no")) + ; Identify instructions that require "Manually Inserted Wait State" if ; a previous instruction writes to VCC. The number gives the number of NOPs. @@ -424,6 +429,15 @@ "s_nop\t0x0" [(set_attr "type" "sopp")]) +; Variant of 'nop' that accepts a count argument. +; s_nop accepts 0x0 to 0xf for 1 to 16 nops; however, +; as %0 prints decimals, only 0 to 9 (= 1 to 10 nops) can be used. +(define_insn "nops" + [(match_operand 0 "const_int_operand")] + "" + "s_nop\t0x%0" + [(set_attr "type" "sopp")]) + ; FIXME: What should the value of the immediate be? Zero is disallowed, so ; pick 1 for now. (define_insn "trap" @@ -566,6 +580,7 @@ [(set_attr "type" "sop1,vop1,vop3a,sopk,vopc,mult,smem,smem,smem,flat,flat, flat,flat,flat,flat") (set_attr "flatmemaccess" "*,*,*,*,*,*,*,*,*,load,load,store,load,load,store") + (set_attr "vcmp" "*,*,*,*,vcmp,*,*,*,*,*,*,*,*,*,*") (set_attr "exec" "*,*,none,*,*,*,*,*,*,*,*,*,*,*,*") (set_attr "length" "4,4,4,4,4,8,12,12,12,12,12,12,12,12,12") (set_attr "xnack" "*,*,*,*,*,*,off,on,*,off,on,*,off,on,*") @@ -1089,6 +1104,7 @@ s_cmp%D1\t%2, %3 v_cmp%E1\tvcc, %2, %3" [(set_attr "type" "sopc,vopc") + (set_attr "vcmp" "vcmp") (set_attr "length" "8")]) (define_insn "cstoredi4_vector" @@ -1099,6 +1115,7 @@ "" "v_cmp%E1\tvcc, %2, %3" [(set_attr "type" "vopc") + (set_attr "vcmp" "vcmp") (set_attr "length" "8")]) (define_expand "cbranchdi4" @@ -1125,6 +1142,7 @@ "" "v_cmp%E1\tvcc, %2, %3" [(set_attr "type" "vopc") + (set_attr "vcmp" "vcmp") (set_attr "length" "8")]) (define_expand "cbranch<mode>4" @@ -2165,7 +2183,7 @@ ? "buffer_gl1_inv\;buffer_gl0_inv\;flat_load%o0\t%0, %A1%O1 %G1\;" "s_waitcnt\t0\;buffer_gl1_inv\;buffer_gl0_inv" : TARGET_TARGET_SC_CACHE - ? "buffer_inv sc1\;flat_load%o0\t%0, %A1%O1 %G1\;" + ? "buffer_wbl2\tsc0\;s_waitcnt\t0\;flat_load%o0\t%0, %A1%O1 %G1\;" "s_waitcnt\t0\;buffer_inv sc1" : "buffer_wbinvl1_vol\;flat_load%o0\t%0, %A1%O1 %G1\;" "s_waitcnt\t0\;buffer_wbinvl1_vol"); @@ -2177,7 +2195,7 @@ ? "buffer_gl1_inv\;buffer_gl0_inv\;global_load%o0\t%0, %A1%O1 %G1\;" "s_waitcnt\tvmcnt(0)\;buffer_gl1_inv\;buffer_gl0_inv" : TARGET_TARGET_SC_CACHE - ? "buffer_inv sc1\;global_load%o0\t%0, %A1%O1 %G1\;" + ? "buffer_wbl2\tsc0\;s_waitcnt\tvmcnt(0)\;global_load%o0\t%0, %A1%O1 %G1\;" "s_waitcnt\tvmcnt(0)\;buffer_inv sc1" : "buffer_wbinvl1_vol\;global_load%o0\t%0, %A1%O1 %G1\;" "s_waitcnt\tvmcnt(0)\;buffer_wbinvl1_vol"); @@ -2224,7 +2242,7 @@ : TARGET_WBINVL1_CACHE ? "buffer_wbinvl1_vol\;flat_store%o1\t%A0, %1%O0 %G1" : TARGET_TARGET_SC_CACHE - ? "buffer_inv sc1\;flat_store%o1\t%A0, %1%O0 %G1" + ? "buffer_wbl2\tsc0\;s_waitcnt\t0\;flat_store%o1\t%A0, %1%O0 %G1" : "error: cache architectire unspecified"); case 2: return (TARGET_GLn_CACHE @@ -2232,7 +2250,7 @@ : TARGET_WBINVL1_CACHE ? "buffer_wbinvl1_vol\;global_store%o1\t%A0, %1%O0 %G1" : TARGET_TARGET_SC_CACHE - ? "buffer_inv sc1\;global_store%o1\t%A0, %1%O0 %G1" + ? "buffer_wbl2\tsc0\;s_waitcnt\tvmcnt(0)\;global_store%o1\t%A0, %1%O0 %G1" : "error: cache architecture unspecified"); } break; @@ -2252,7 +2270,8 @@ ? "buffer_wbinvl1_vol\;flat_store%o1\t%A0, %1%O0 %G1\;" "s_waitcnt\t0\;buffer_wbinvl1_vol" : TARGET_TARGET_SC_CACHE - ? "buffer_inv sc1\;flat_store%o1\t%A0, %1%O0 %G1\;" + ? "buffer_wbl2\tsc0\;s_waitcnt\t0\;" + "flat_store%o1\t%A0, %1%O0 %G1\;" "s_waitcnt\t0\;buffer_inv sc1" : "error: cache architecture unspecified"); case 2: @@ -2263,7 +2282,8 @@ ? "buffer_wbinvl1_vol\;global_store%o1\t%A0, %1%O0 %G1\;" "s_waitcnt\tvmcnt(0)\;buffer_wbinvl1_vol" : TARGET_TARGET_SC_CACHE - ? "buffer_inv sc1\;global_store%o1\t%A0, %1%O0 %G1\;" + ? "buffer_wbl2\tsc0\;s_waitcnt\tvmcnt(0)\;" + "global_store%o1\t%A0, %1%O0 %G1\;" "s_waitcnt\tvmcnt(0)\;buffer_inv sc1" : "error: cache architecture unspecified"); } @@ -2347,7 +2367,7 @@ ? "buffer_wbinvl1_vol\;flat_atomic_swap<X>\t%0, %1, %2 %G1\;" "s_waitcnt\t0" : TARGET_TARGET_SC_CACHE - ? "buffer_inv sc1\;flat_atomic_swap<X>\t%0, %1, %2 %G1\;" + ? "buffer_wbl2\tsc0\;s_waitcnt\t0\;flat_atomic_swap<X>\t%0, %1, %2 %G1\;" "s_waitcnt\t0" : "error: cache architecture unspecified"); case 2: @@ -2360,7 +2380,7 @@ "global_atomic_swap<X>\t%0, %A1, %2%O1 %G1\;" "s_waitcnt\tvmcnt(0)" : TARGET_TARGET_SC_CACHE - ? "buffer_inv sc1\;" + ? "buffer_wbl2\tsc0\;s_waitcnt\tvmcnt(0)\;" "global_atomic_swap<X>\t%0, %A1, %2%O1 %G1\;" "s_waitcnt\tvmcnt(0)" : "error: cache architecture unspecified"); @@ -2382,7 +2402,7 @@ ? "buffer_wbinvl1_vol\;flat_atomic_swap<X>\t%0, %1, %2 %G1\;" "s_waitcnt\t0\;buffer_wbinvl1_vol" : TARGET_TARGET_SC_CACHE - ? "buffer_inv sc1\;flat_atomic_swap<X>\t%0, %1, %2 %G1\;" + ? "buffer_wbl2\tsc0\;s_waitcnt\t0\;flat_atomic_swap<X>\t%0, %1, %2 %G1\;" "s_waitcnt\t0\;buffer_inv sc1" : "error: cache architecture unspecified"); case 2: @@ -2395,7 +2415,7 @@ "global_atomic_swap<X>\t%0, %A1, %2%O1 %G1\;" "s_waitcnt\tvmcnt(0)\;buffer_wbinvl1_vol" : TARGET_TARGET_SC_CACHE - ? "buffer_inv sc1\;" + ? "buffer_wbl2\tsc0\;s_waitcnt\tvmcnt(0)\;" "global_atomic_swap<X>\t%0, %A1, %2%O1 %G1\;" "s_waitcnt\tvmcnt(0)\;buffer_inv sc1" : "error: cache architecture unspecified"); diff --git a/gcc/config/i386/i386-features.cc b/gcc/config/i386/i386-features.cc index c131577..53e86c8 100644 --- a/gcc/config/i386/i386-features.cc +++ b/gcc/config/i386/i386-features.cc @@ -3226,7 +3226,7 @@ remove_partial_avx_dependency (void) break; } - /* Only hanlde conversion here. */ + /* Only handle conversion here. */ machine_mode src_mode = convert_p ? GET_MODE (XEXP (src, 0)) : VOIDmode; switch (src_mode) diff --git a/gcc/config/i386/i386-modes.def b/gcc/config/i386/i386-modes.def index 2fedbeb..c2db305 100644 --- a/gcc/config/i386/i386-modes.def +++ b/gcc/config/i386/i386-modes.def @@ -91,7 +91,6 @@ VECTOR_MODES (FLOAT, 16); /* V8HF V4SF V2DF */ VECTOR_MODES (FLOAT, 32); /* V16HF V8SF V4DF V2TF */ VECTOR_MODES (FLOAT, 64); /* V32HF V16SF V8DF V4TF */ VECTOR_MODES (FLOAT, 128); /* V64HF V32SF V16DF V8TF */ -VECTOR_MODES (FLOAT, 256); /* V128HF V64SF V32DF V16TF */ VECTOR_MODE (FLOAT, HF, 2); /* V2HF */ VECTOR_MODE (FLOAT, BF, 2); /* V2BF */ VECTOR_MODE (FLOAT, HF, 6); /* V6HF */ @@ -102,7 +101,6 @@ VECTOR_MODE (INT, QI, 2); /* V2QI */ VECTOR_MODE (INT, QI, 12); /* V12QI */ VECTOR_MODE (INT, QI, 14); /* V14QI */ VECTOR_MODE (INT, HI, 6); /* V6HI */ -VECTOR_MODE (INT, SI, 64); /* V64SI */ INT_MODE (OI, 32); INT_MODE (XI, 64); diff --git a/gcc/config/i386/i386-options.cc b/gcc/config/i386/i386-options.cc index ca6bb83..09a35ef 100644 --- a/gcc/config/i386/i386-options.cc +++ b/gcc/config/i386/i386-options.cc @@ -3615,6 +3615,18 @@ ix86_handle_cconv_attribute (tree *node, tree name, tree args, int, return NULL_TREE; } + if (TARGET_64BIT) + { + /* Do not warn when emulating the MS ABI. */ + if ((TREE_CODE (*node) != FUNCTION_TYPE + && TREE_CODE (*node) != METHOD_TYPE) + || ix86_function_type_abi (*node) != MS_ABI) + warning (OPT_Wattributes, "%qE attribute ignored", + name); + *no_add_attrs = true; + return NULL_TREE; + } + /* Can combine regparm with all attributes but fastcall, and thiscall. */ if (is_attribute_p ("regparm", name)) { @@ -3627,7 +3639,7 @@ ix86_handle_cconv_attribute (tree *node, tree name, tree args, int, if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node))) { - error ("regparam and thiscall attributes are not compatible"); + error ("regparm and thiscall attributes are not compatible"); } cst = TREE_VALUE (args); @@ -3648,19 +3660,7 @@ ix86_handle_cconv_attribute (tree *node, tree name, tree args, int, return NULL_TREE; } - if (TARGET_64BIT) - { - /* Do not warn when emulating the MS ABI. */ - if ((TREE_CODE (*node) != FUNCTION_TYPE - && TREE_CODE (*node) != METHOD_TYPE) - || ix86_function_type_abi (*node) != MS_ABI) - warning (OPT_Wattributes, "%qE attribute ignored", - name); - *no_add_attrs = true; - return NULL_TREE; - } - - /* Can combine fastcall with stdcall (redundant) and sseregparm. */ + /* Can combine fastcall with sseregparm. */ if (is_attribute_p ("fastcall", name)) { if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node))) @@ -3681,8 +3681,7 @@ ix86_handle_cconv_attribute (tree *node, tree name, tree args, int, } } - /* Can combine stdcall with fastcall (redundant), regparm and - sseregparm. */ + /* Can combine stdcall with regparm and sseregparm. */ else if (is_attribute_p ("stdcall", name)) { if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node))) @@ -3732,6 +3731,10 @@ ix86_handle_cconv_attribute (tree *node, tree name, tree args, int, { error ("cdecl and thiscall attributes are not compatible"); } + if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node))) + { + error ("regparm and thiscall attributes are not compatible"); + } } /* Can combine sseregparm with all attributes. */ diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc index 590cdf1..65e04d3 100644 --- a/gcc/config/i386/i386.cc +++ b/gcc/config/i386/i386.cc @@ -12442,6 +12442,28 @@ static GTY(()) rtx ix86_tls_symbol; static rtx ix86_tls_get_addr (void) { + if (cfun->machine->call_saved_registers + == TYPE_NO_CALLER_SAVED_REGISTERS) + { + /* __tls_get_addr doesn't preserve vector registers. When a + function with no_caller_saved_registers attribute calls + __tls_get_addr, YMM and ZMM registers will be clobbered. + Issue an error and suggest -mtls-dialect=gnu2 in this case. */ + if (cfun->machine->func_type == TYPE_NORMAL) + error (G_("%<-mtls-dialect=gnu2%> must be used with a function" + " with the %<no_caller_saved_registers%> attribute")); + else + error (cfun->machine->func_type == TYPE_EXCEPTION + ? G_("%<-mtls-dialect=gnu2%> must be used with an" + " exception service routine") + : G_("%<-mtls-dialect=gnu2%> must be used with an" + " interrupt service routine")); + /* Don't issue the same error twice. */ + cfun->machine->func_type = TYPE_NORMAL; + cfun->machine->call_saved_registers + = TYPE_DEFAULT_CALL_SAVED_REGISTERS; + } + if (!ix86_tls_symbol) { const char *sym @@ -20007,7 +20029,7 @@ ix86_gimple_fold_builtin (gimple_stmt_iterator *gsi) tree utype, ures, vce; utype = unsigned_type_for (TREE_TYPE (arg0)); /* PABSB/W/D/Q store the unsigned result in dst, use ABSU_EXPR - instead of ABS_EXPR to hanlde overflow case(TYPE_MIN). */ + instead of ABS_EXPR to handle overflow case(TYPE_MIN). */ ures = gimple_build (&stmts, ABSU_EXPR, utype, arg0); gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT); loc = gimple_location (stmt); @@ -21491,8 +21513,7 @@ ix86_hard_regno_nregs (unsigned int regno, machine_mode mode) /* Register pair for mask registers. */ if (mode == P2QImode || mode == P2HImode) return 2; - if (mode == V64SFmode || mode == V64SImode) - return 4; + return 1; } @@ -23132,7 +23153,7 @@ ix86_rtx_costs (rtx x, machine_mode mode, int outer_code_i, int opno, So current solution is make constant disp as cheap as possible. */ if (GET_CODE (addr) == PLUS && x86_64_immediate_operand (XEXP (addr, 1), Pmode) - /* Only hanlde (reg + disp) since other forms of addr are mostly LEA, + /* Only handle (reg + disp) since other forms of addr are mostly LEA, there's no additional cost for the plus of disp. */ && register_operand (XEXP (addr, 0), Pmode)) { @@ -25211,20 +25232,14 @@ asm_preferred_eh_data_format (int code, int global) return DW_EH_PE_absptr; } -/* Implement targetm.vectorize.builtin_vectorization_cost. */ +/* Worker for ix86_builtin_vectorization_cost and the fallback calls + from ix86_vector_costs::add_stmt_cost. */ static int -ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost, - tree vectype, int) +ix86_default_vector_cost (enum vect_cost_for_stmt type_of_cost, + machine_mode mode) { - bool fp = false; - machine_mode mode = TImode; + bool fp = FLOAT_MODE_P (mode); int index; - if (vectype != NULL) - { - fp = FLOAT_TYPE_P (vectype); - mode = TYPE_MODE (vectype); - } - switch (type_of_cost) { case scalar_stmt: @@ -25283,14 +25298,14 @@ ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost, COSTS_N_INSNS (ix86_cost->gather_static + ix86_cost->gather_per_elt - * TYPE_VECTOR_SUBPARTS (vectype)) / 2); + * GET_MODE_NUNITS (mode)) / 2); case vector_scatter_store: return ix86_vec_cost (mode, COSTS_N_INSNS (ix86_cost->scatter_static + ix86_cost->scatter_per_elt - * TYPE_VECTOR_SUBPARTS (vectype)) / 2); + * GET_MODE_NUNITS (mode)) / 2); case cond_branch_taken: return ix86_cost->cond_taken_branch_cost; @@ -25308,7 +25323,7 @@ ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost, case vec_construct: { - int n = TYPE_VECTOR_SUBPARTS (vectype); + int n = GET_MODE_NUNITS (mode); /* N - 1 element inserts into an SSE vector, the possible GPR -> XMM move is accounted for in add_stmt_cost. */ if (GET_MODE_BITSIZE (mode) <= 128) @@ -25336,6 +25351,17 @@ ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost, } } +/* Implement targetm.vectorize.builtin_vectorization_cost. */ +static int +ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost, + tree vectype, int) +{ + machine_mode mode = TImode; + if (vectype != NULL) + mode = TYPE_MODE (vectype); + return ix86_default_vector_cost (type_of_cost, mode); +} + /* This function returns the calling abi specific va_list type node. It returns the FNDECL specific va_list type. */ @@ -25789,7 +25815,7 @@ ix86_vectorize_create_costs (vec_info *vinfo, bool costing_for_scalar) unsigned ix86_vector_costs::add_stmt_cost (int count, vect_cost_for_stmt kind, stmt_vec_info stmt_info, slp_tree node, - tree vectype, int misalign, + tree vectype, int, vect_cost_model_location where) { unsigned retval = 0; @@ -26138,14 +26164,14 @@ ix86_vector_costs::add_stmt_cost (int count, vect_cost_for_stmt kind, || (SLP_TREE_MEMORY_ACCESS_TYPE (node) == VMAT_GATHER_SCATTER))))) { - stmt_cost = ix86_builtin_vectorization_cost (kind, vectype, misalign); + stmt_cost = ix86_default_vector_cost (kind, mode); stmt_cost *= (TYPE_VECTOR_SUBPARTS (vectype) + 1); } else if ((kind == vec_construct || kind == scalar_to_vec) && node && SLP_TREE_DEF_TYPE (node) == vect_external_def) { - stmt_cost = ix86_builtin_vectorization_cost (kind, vectype, misalign); + stmt_cost = ix86_default_vector_cost (kind, mode); unsigned i; tree op; FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_OPS (node), i, op) @@ -26209,7 +26235,7 @@ ix86_vector_costs::add_stmt_cost (int count, vect_cost_for_stmt kind, TREE_VISITED (op) = 0; } if (stmt_cost == -1) - stmt_cost = ix86_builtin_vectorization_cost (kind, vectype, misalign); + stmt_cost = ix86_default_vector_cost (kind, mode); if (kind == vec_perm && vectype && GET_MODE_SIZE (TYPE_MODE (vectype)) == 32) diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index eb52699..a50475b 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -2968,7 +2968,8 @@ (match_operand:SWI248 1 "const_int_operand"))] "optimize_insn_for_size_p () && optimize_size > 1 && operands[1] != const0_rtx - && operands[1] != constm1_rtx + && (operands[1] != constm1_rtx + || (<MODE>mode == DImode && LEGACY_INT_REG_P (operands[0]))) && IN_RANGE (INTVAL (operands[1]), -128, 127) && !ix86_red_zone_used && REGNO (operands[0]) != SP_REG" diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index d88c3d6..ec74f93 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -21729,6 +21729,19 @@ (const_string "orig"))) (set_attr "mode" "TI,TI,TI,TI,TI,TI,V4SF,V2SF,V2SF")]) +;; Eliminate redundancy caused by +;; /* Special case TImode to 128-bit vector conversions via V2DI. */ +;; in ix86_expand_vector_move + +(define_split + [(set (match_operand:V2DI 0 "register_operand") + (vec_concat:V2DI + (subreg:DI (match_operand:TI 1 "register_operand") 0) + (subreg:DI (match_dup 1) 8)))] + "TARGET_SSE2 && ix86_pre_reload_split ()" + [(set (match_dup 0) + (subreg:V2DI (match_dup 1) 0))]) + (define_insn "*vec_concatv2di_0" [(set (match_operand:V2DI 0 "register_operand" "=v,v ,x") (vec_concat:V2DI diff --git a/gcc/config/nvptx/nvptx.opt b/gcc/config/nvptx/nvptx.opt index d326ca4..9796839 100644 --- a/gcc/config/nvptx/nvptx.opt +++ b/gcc/config/nvptx/nvptx.opt @@ -120,6 +120,51 @@ Target RejectNegative Alias(misa=,sm_89) march-map=sm_90a Target RejectNegative Alias(misa=,sm_89) +march-map=sm_100 +Target RejectNegative Alias(misa=,sm_89) + +march-map=sm_100f +Target RejectNegative Alias(misa=,sm_89) + +march-map=sm_100a +Target RejectNegative Alias(misa=,sm_89) + +march-map=sm_101 +Target RejectNegative Alias(misa=,sm_89) + +march-map=sm_101f +Target RejectNegative Alias(misa=,sm_89) + +march-map=sm_101a +Target RejectNegative Alias(misa=,sm_89) + +march-map=sm_103 +Target RejectNegative Alias(misa=,sm_89) + +march-map=sm_103f +Target RejectNegative Alias(misa=,sm_89) + +march-map=sm_103a +Target RejectNegative Alias(misa=,sm_89) + +march-map=sm_120 +Target RejectNegative Alias(misa=,sm_89) + +march-map=sm_120f +Target RejectNegative Alias(misa=,sm_89) + +march-map=sm_120a +Target RejectNegative Alias(misa=,sm_89) + +march-map=sm_121 +Target RejectNegative Alias(misa=,sm_89) + +march-map=sm_121f +Target RejectNegative Alias(misa=,sm_89) + +march-map=sm_121a +Target RejectNegative Alias(misa=,sm_89) + Enum Name(ptx_version) Type(enum ptx_version) Known PTX ISA versions (for use with the -mptx= option): diff --git a/gcc/config/riscv/arch-canonicalize b/gcc/config/riscv/arch-canonicalize index fd55255..34dad45 100755 --- a/gcc/config/riscv/arch-canonicalize +++ b/gcc/config/riscv/arch-canonicalize @@ -32,7 +32,7 @@ import itertools from functools import reduce SUPPORTED_ISA_SPEC = ["2.2", "20190608", "20191213"] -CANONICAL_ORDER = "imafdgqlcbkjtpvn" +CANONICAL_ORDER = "imafdqlcbkjtpvnh" LONG_EXT_PREFIXES = ['z', 's', 'h', 'x'] # diff --git a/gcc/config/riscv/gen-riscv-mcpu-texi.cc b/gcc/config/riscv/gen-riscv-mcpu-texi.cc new file mode 100644 index 0000000..9681438 --- /dev/null +++ b/gcc/config/riscv/gen-riscv-mcpu-texi.cc @@ -0,0 +1,43 @@ +#include <string> +#include <vector> +#include <stdio.h> + +int +main () +{ + puts ("@c Copyright (C) 2025 Free Software Foundation, Inc."); + puts ("@c This is part of the GCC manual."); + puts ("@c For copying conditions, see the file gcc/doc/include/fdl.texi."); + puts (""); + puts ("@c This file is generated automatically using"); + puts ("@c gcc/config/riscv/gen-riscv-mcpu-texi.cc from:"); + puts ("@c gcc/config/riscv/riscv-cores.def"); + puts (""); + puts ("@c Please *DO NOT* edit manually."); + puts (""); + puts ("@samp{Core Name}"); + puts (""); + puts ("@opindex mcpu"); + puts ("@item -mcpu=@var{processor-string}"); + puts ("Use architecture of and optimize the output for the given processor, specified"); + puts ("by particular CPU name. Permissible values for this option are:"); + puts (""); + puts (""); + + std::vector<std::string> coreNames; + +#define RISCV_CORE(CORE_NAME, ARCH, MICRO_ARCH) \ + coreNames.push_back (CORE_NAME); +#include "riscv-cores.def" +#undef RISCV_CORE + + for (size_t i = 0; i < coreNames.size(); ++i) { + if (i == coreNames.size() - 1) { + printf("@samp{%s}.\n", coreNames[i].c_str()); + } else { + printf("@samp{%s},\n\n", coreNames[i].c_str()); + } + } + + return 0; +} diff --git a/gcc/config/riscv/gen-riscv-mtune-texi.cc b/gcc/config/riscv/gen-riscv-mtune-texi.cc new file mode 100644 index 0000000..1bdfe2a --- /dev/null +++ b/gcc/config/riscv/gen-riscv-mtune-texi.cc @@ -0,0 +1,41 @@ +#include <string> +#include <vector> +#include <stdio.h> + +int +main () +{ + puts ("@c Copyright (C) 2025 Free Software Foundation, Inc."); + puts ("@c This is part of the GCC manual."); + puts ("@c For copying conditions, see the file gcc/doc/include/fdl.texi."); + puts (""); + puts ("@c This file is generated automatically using"); + puts ("@c gcc/config/riscv/gen-riscv-mtune-texi.cc from:"); + puts ("@c gcc/config/riscv/riscv-cores.def"); + puts (""); + puts ("@c Please *DO NOT* edit manually."); + puts (""); + puts ("@samp{Tune Name}"); + puts (""); + puts ("@opindex mtune"); + puts ("@item -mtune=@var{processor-string}"); + puts ("Optimize the output for the given processor, specified by microarchitecture or"); + puts ("particular CPU name. Permissible values for this option are:"); + puts (""); + puts (""); + + std::vector<std::string> tuneNames; + +#define RISCV_TUNE(TUNE_NAME, PIPELINE_MODEL, TUNE_INFO) \ + tuneNames.push_back (TUNE_NAME); +#include "riscv-cores.def" +#undef RISCV_TUNE + + for (size_t i = 0; i < tuneNames.size(); ++i) { + printf("@samp{%s},\n\n", tuneNames[i].c_str()); + } + + puts ("and all valid options for @option{-mcpu=}."); + + return 0; +} diff --git a/gcc/config/riscv/riscv-vector-costs.cc b/gcc/config/riscv/riscv-vector-costs.cc index 1c6bc25..44ef44a 100644 --- a/gcc/config/riscv/riscv-vector-costs.cc +++ b/gcc/config/riscv/riscv-vector-costs.cc @@ -400,7 +400,7 @@ costs::compute_local_live_ranges ( pair &live_range = live_ranges->get_or_insert (lhs, &existed_p); gcc_assert (!existed_p); - if (STMT_VINFO_MEMORY_ACCESS_TYPE (program_point.stmt_info) + if (SLP_TREE_MEMORY_ACCESS_TYPE (*node) == VMAT_LOAD_STORE_LANES) point = get_first_lane_point (program_points, program_point.stmt_info); @@ -418,8 +418,7 @@ costs::compute_local_live_ranges ( bool existed_p = false; pair &live_range = live_ranges->get_or_insert (var, &existed_p); - if (STMT_VINFO_MEMORY_ACCESS_TYPE ( - program_point.stmt_info) + if (SLP_TREE_MEMORY_ACCESS_TYPE (*node) == VMAT_LOAD_STORE_LANES) point = get_last_lane_point (program_points, program_point.stmt_info); @@ -608,7 +607,7 @@ costs::need_additional_vector_vars_p (stmt_vec_info stmt_info, if (type == load_vec_info_type || type == store_vec_info_type) { if (STMT_VINFO_GATHER_SCATTER_P (stmt_info) - && STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) == VMAT_GATHER_SCATTER) + && SLP_TREE_MEMORY_ACCESS_TYPE (node) == VMAT_GATHER_SCATTER) return true; machine_mode mode = TYPE_MODE (STMT_VINFO_VECTYPE (stmt_info)); @@ -1086,7 +1085,7 @@ costs::better_main_loop_than_p (const vector_costs *uncast_other) const load/store. */ static int segment_loadstore_group_size (enum vect_cost_for_stmt kind, - stmt_vec_info stmt_info) + stmt_vec_info stmt_info, slp_tree node) { if (stmt_info && (kind == vector_load || kind == vector_store) @@ -1094,7 +1093,7 @@ segment_loadstore_group_size (enum vect_cost_for_stmt kind, { stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info); if (stmt_info - && STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) == VMAT_LOAD_STORE_LANES) + && SLP_TREE_MEMORY_ACCESS_TYPE (node) == VMAT_LOAD_STORE_LANES) return DR_GROUP_SIZE (stmt_info); } return 0; @@ -1108,7 +1107,7 @@ segment_loadstore_group_size (enum vect_cost_for_stmt kind, unsigned costs::adjust_stmt_cost (enum vect_cost_for_stmt kind, loop_vec_info loop, stmt_vec_info stmt_info, - slp_tree, tree vectype, int stmt_cost) + slp_tree node, tree vectype, int stmt_cost) { const cpu_vector_cost *costs = get_vector_costs (); switch (kind) @@ -1131,7 +1130,8 @@ costs::adjust_stmt_cost (enum vect_cost_for_stmt kind, loop_vec_info loop, each vector in the group. Here we additionally add permute costs for each. */ /* TODO: Indexed and ordered/unordered cost. */ - int group_size = segment_loadstore_group_size (kind, stmt_info); + int group_size = segment_loadstore_group_size (kind, stmt_info, + node); if (group_size > 1) { switch (group_size) diff --git a/gcc/config/riscv/t-riscv b/gcc/config/riscv/t-riscv index 7aac56a..a7eaa8b 100644 --- a/gcc/config/riscv/t-riscv +++ b/gcc/config/riscv/t-riscv @@ -229,8 +229,41 @@ s-riscv-ext.texi: build/gen-riscv-ext-texi$(build_exeext) $(SHELL) $(srcdir)/../move-if-change tmp-riscv-ext.texi $(srcdir)/doc/riscv-ext.texi $(STAMP) s-riscv-ext.texi -# Run `riscv-regen' after you changed or added anything from riscv-ext*.def +RISCV_CORES_DEFS = \ + $(srcdir)/config/riscv/riscv-cores.def + +build/gen-riscv-mtune-texi.o: $(srcdir)/config/riscv/gen-riscv-mtune-texi.cc \ + $(RISCV_CORES_DEFS) + $(CXX_FOR_BUILD) $(CXXFLAGS_FOR_BUILD) -c $< -o $@ + +build/gen-riscv-mcpu-texi.o: $(srcdir)/config/riscv/gen-riscv-mcpu-texi.cc \ + $(RISCV_CORES_DEFS) + $(CXX_FOR_BUILD) $(CXXFLAGS_FOR_BUILD) -c $< -o $@ + +build/gen-riscv-mtune-texi$(build_exeext): build/gen-riscv-mtune-texi.o + $(LINKER_FOR_BUILD) $(BUILD_LINKERFLAGS) $(BUILD_LDFLAGS) -o $@ $< + +build/gen-riscv-mcpu-texi$(build_exeext): build/gen-riscv-mcpu-texi.o + $(LINKER_FOR_BUILD) $(BUILD_LINKERFLAGS) $(BUILD_LDFLAGS) -o $@ $< + +$(srcdir)/doc/riscv-mtune.texi: $(RISCV_CORES_DEFS) +$(srcdir)/doc/riscv-mtune.texi: s-riscv-mtune.texi ; @true + +$(srcdir)/doc/riscv-mcpu.texi: $(RISCV_CORES_DEFS) +$(srcdir)/doc/riscv-mcpu.texi: s-riscv-mcpu.texi ; @true + +s-riscv-mtune.texi: build/gen-riscv-mtune-texi$(build_exeext) + $(RUN_GEN) build/gen-riscv-mtune-texi$(build_exeext) > tmp-riscv-mtune.texi + $(SHELL) $(srcdir)/../move-if-change tmp-riscv-mtune.texi $(srcdir)/doc/riscv-mtune.texi + $(STAMP) s-riscv-mtune.texi + +s-riscv-mcpu.texi: build/gen-riscv-mcpu-texi$(build_exeext) + $(RUN_GEN) build/gen-riscv-mcpu-texi$(build_exeext) > tmp-riscv-mcpu.texi + $(SHELL) $(srcdir)/../move-if-change tmp-riscv-mcpu.texi $(srcdir)/doc/riscv-mcpu.texi + $(STAMP) s-riscv-mcpu.texi + +# Run `riscv-regen' after you changed or added anything from riscv-ext*.def and riscv-cores*.def .PHONY: riscv-regen -riscv-regen: s-riscv-ext.texi s-riscv-ext.opt +riscv-regen: s-riscv-ext.texi s-riscv-ext.opt s-riscv-mtune.texi s-riscv-mcpu.texi diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc index 1c60695..764b499 100644 --- a/gcc/config/rs6000/rs6000.cc +++ b/gcc/config/rs6000/rs6000.cc @@ -10320,15 +10320,18 @@ can_be_rotated_to_negative_lis (HOST_WIDE_INT c, int *rot) /* case b. xx0..01..1xx: some of 15 x's (and some of 16 0's) are rotated over the highest bit. */ - int pos_one = clz_hwi ((c << 16) >> 16); - middle_zeros = ctz_hwi (c >> (HOST_BITS_PER_WIDE_INT - pos_one)); - int middle_ones = clz_hwi (~(c << pos_one)); - if (middle_zeros >= 16 && middle_ones >= 33) + unsigned HOST_WIDE_INT uc = c; + int pos_one = clz_hwi ((HOST_WIDE_INT) (uc << 16) >> 16); + if (pos_one != 0) { - *rot = pos_one; - return true; + middle_zeros = ctz_hwi (c >> (HOST_BITS_PER_WIDE_INT - pos_one)); + int middle_ones = clz_hwi (~(uc << pos_one)); + if (middle_zeros >= 16 && middle_ones >= 33) + { + *rot = pos_one; + return true; + } } - return false; } @@ -10445,7 +10448,8 @@ can_be_built_by_li_and_rldic (HOST_WIDE_INT c, int *shift, HOST_WIDE_INT *mask) if (lz >= HOST_BITS_PER_WIDE_INT) return false; - int middle_ones = clz_hwi (~(c << lz)); + unsigned HOST_WIDE_INT uc = c; + int middle_ones = clz_hwi (~(uc << lz)); if (tz + lz + middle_ones >= ones && (tz - lz) < HOST_BITS_PER_WIDE_INT && tz < HOST_BITS_PER_WIDE_INT) @@ -10479,7 +10483,7 @@ can_be_built_by_li_and_rldic (HOST_WIDE_INT c, int *shift, HOST_WIDE_INT *mask) if (!IN_RANGE (pos_first_1, 1, HOST_BITS_PER_WIDE_INT-1)) return false; - middle_ones = clz_hwi (~c << pos_first_1); + middle_ones = clz_hwi ((~(unsigned HOST_WIDE_INT) c) << pos_first_1); middle_zeros = ctz_hwi (c >> (HOST_BITS_PER_WIDE_INT - pos_first_1)); if (pos_first_1 < HOST_BITS_PER_WIDE_INT && middle_ones + middle_zeros < HOST_BITS_PER_WIDE_INT @@ -10581,7 +10585,8 @@ rs6000_emit_set_long_const (rtx dest, HOST_WIDE_INT c, int *num_insns) { /* li/lis; rldicX */ unsigned HOST_WIDE_INT imm = (c | ~mask); - imm = (imm >> shift) | (imm << (HOST_BITS_PER_WIDE_INT - shift)); + if (shift != 0) + imm = (imm >> shift) | (imm << (HOST_BITS_PER_WIDE_INT - shift)); count_or_emit_insn (temp, GEN_INT (imm)); if (shift != 0) diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md index 9c718ca..e31ee40 100644 --- a/gcc/config/rs6000/rs6000.md +++ b/gcc/config/rs6000/rs6000.md @@ -1969,7 +1969,7 @@ [(set (match_dup 0) (plus:GPR (match_dup 1) (match_dup 3))) (set (match_dup 0) (plus:GPR (match_dup 0) (match_dup 4)))] { - HOST_WIDE_INT val = INTVAL (operands[2]); + unsigned HOST_WIDE_INT val = UINTVAL (operands[2]); HOST_WIDE_INT low = sext_hwi (val, 16); HOST_WIDE_INT rest = trunc_int_for_mode (val - low, <MODE>mode); diff --git a/gcc/config/s390/s390-protos.h b/gcc/config/s390/s390-protos.h index d760a7e..6becad1 100644 --- a/gcc/config/s390/s390-protos.h +++ b/gcc/config/s390/s390-protos.h @@ -128,6 +128,8 @@ extern void s390_expand_vcond (rtx, rtx, rtx, enum rtx_code, rtx, rtx); extern void s390_expand_vec_init (rtx, rtx); extern rtx s390_expand_merge_perm_const (machine_mode, bool); extern void s390_expand_merge (rtx, rtx, rtx, bool); +extern void s390_expand_int_spaceship (rtx, rtx, rtx, rtx); +extern void s390_expand_fp_spaceship (rtx, rtx, rtx, rtx); extern rtx s390_build_signbit_mask (machine_mode); extern rtx s390_return_addr_rtx (int, rtx); extern rtx s390_back_chain_rtx (void); diff --git a/gcc/config/s390/s390.cc b/gcc/config/s390/s390.cc index abe551c..012b6db 100644 --- a/gcc/config/s390/s390.cc +++ b/gcc/config/s390/s390.cc @@ -8213,6 +8213,167 @@ s390_expand_atomic (machine_mode mode, enum rtx_code code, NULL_RTX, 1, OPTAB_DIRECT), 1); } +/* Expand integer op0 = op1 <=> op2, i.e., + op0 = op1 == op2 ? 0 : op1 < op2 ? -1 : 1. + + Signedness is specified by op3. If op3 equals 1, then perform an unsigned + comparison, and if op3 equals -1, then perform a signed comparison. + + For integer comparisons we strive for a sequence like + CR[L] ; LHI ; LOCHIL ; LOCHIH + where the first three instructions fit into a group. */ + +void +s390_expand_int_spaceship (rtx op0, rtx op1, rtx op2, rtx op3) +{ + gcc_assert (op3 == const1_rtx || op3 == constm1_rtx); + + rtx cc, cond_lt, cond_gt; + machine_mode cc_mode; + machine_mode mode = GET_MODE (op1); + + /* Prior VXE3 emulate a 128-bit comparison by breaking it up into three + comparisons. First test the high halfs. In case they equal, then test + the low halfs. Finally, test for equality. Depending on the results + make use of LOCs. */ + if (mode == TImode && !TARGET_VXE3) + { + gcc_assert (TARGET_VX); + op1 + = force_reg (V2DImode, simplify_gen_subreg (V2DImode, op1, TImode, 0)); + op2 + = force_reg (V2DImode, simplify_gen_subreg (V2DImode, op2, TImode, 0)); + rtx lab = gen_label_rtx (); + rtx ccz = gen_rtx_REG (CCZmode, CC_REGNUM); + /* Compare high halfs for equality. + VEC[L]G op1, op2 sets + CC1 if high(op1) < high(op2) + and + CC2 if high(op1) > high(op2). */ + machine_mode cc_mode = op3 == const1_rtx ? CCUmode : CCSmode; + rtx lane0 = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx)); + emit_insn (gen_rtx_SET ( + gen_rtx_REG (cc_mode, CC_REGNUM), + gen_rtx_COMPARE (cc_mode, + gen_rtx_VEC_SELECT (DImode, op1, lane0), + gen_rtx_VEC_SELECT (DImode, op2, lane0)))); + s390_emit_jump (lab, gen_rtx_NE (CCZmode, ccz, const0_rtx)); + /* At this point we know that the high halfs equal. + VCHLGS op2, op1 sets CC1 if low(op1) < low(op2) */ + emit_insn (gen_rtx_PARALLEL ( + VOIDmode, + gen_rtvec (2, + gen_rtx_SET (gen_rtx_REG (CCVIHUmode, CC_REGNUM), + gen_rtx_COMPARE (CCVIHUmode, op2, op1)), + gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (V2DImode))))); + emit_label (lab); + emit_insn (gen_rtx_SET (op0, const1_rtx)); + emit_insn ( + gen_movsicc (op0, + gen_rtx_LTU (CCUmode, gen_rtx_REG (CCUmode, CC_REGNUM), + const0_rtx), + constm1_rtx, op0)); + /* Deal with the case where both halfs equal. */ + emit_insn (gen_rtx_PARALLEL ( + VOIDmode, + gen_rtvec (2, + gen_rtx_SET (gen_rtx_REG (CCVEQmode, CC_REGNUM), + gen_rtx_COMPARE (CCVEQmode, op1, op2)), + gen_rtx_SET (gen_reg_rtx (V2DImode), + gen_rtx_EQ (V2DImode, op1, op2))))); + emit_insn (gen_movsicc (op0, gen_rtx_EQ (CCZmode, ccz, const0_rtx), + const0_rtx, op0)); + return; + } + + if (mode == QImode || mode == HImode) + { + rtx_code extend = op3 == const1_rtx ? ZERO_EXTEND : SIGN_EXTEND; + op1 = simplify_gen_unary (extend, SImode, op1, mode); + op1 = force_reg (SImode, op1); + op2 = simplify_gen_unary (extend, SImode, op2, mode); + op2 = force_reg (SImode, op2); + mode = SImode; + } + + if (op3 == const1_rtx) + { + cc_mode = CCUmode; + cc = gen_rtx_REG (cc_mode, CC_REGNUM); + cond_lt = gen_rtx_LTU (mode, cc, const0_rtx); + cond_gt = gen_rtx_GTU (mode, cc, const0_rtx); + } + else + { + cc_mode = CCSmode; + cc = gen_rtx_REG (cc_mode, CC_REGNUM); + cond_lt = gen_rtx_LT (mode, cc, const0_rtx); + cond_gt = gen_rtx_GT (mode, cc, const0_rtx); + } + + emit_insn (gen_rtx_SET (cc, gen_rtx_COMPARE (cc_mode, op1, op2))); + emit_move_insn (op0, const0_rtx); + emit_insn (gen_movsicc (op0, cond_lt, constm1_rtx, op0)); + emit_insn (gen_movsicc (op0, cond_gt, const1_rtx, op0)); +} + +/* Expand floating-point op0 = op1 <=> op2, i.e., + op0 = op1 == op2 ? 0 : op1 < op2 ? -1 : op1 > op2 ? 1 : 2. + + If op3 equals const0_rtx, then we are interested in the compare only (see + test spaceship-fp-4.c). Otherwise, op3 is a CONST_INT different than + const1_rtx and constm1_rtx which is used in order to set op0 for unordered. + + Emit a branch-only solution, i.e., let if-convert fold the branches into + LOCs if applicable. This has the benefit that the solution is also + applicable if we are only interested in the compare, i.e., if op3 equals + const0_rtx. + */ + +void +s390_expand_fp_spaceship (rtx op0, rtx op1, rtx op2, rtx op3) +{ + gcc_assert (op3 != const1_rtx && op3 != constm1_rtx); + + machine_mode mode = GET_MODE (op1); + machine_mode cc_mode = s390_select_ccmode (LTGT, op1, op2); + rtx cc_reg = gen_rtx_REG (cc_mode, CC_REGNUM); + rtx cond_unordered = gen_rtx_UNORDERED (mode, cc_reg, const0_rtx); + rtx cond_eq = gen_rtx_EQ (mode, cc_reg, const0_rtx); + rtx cond_gt = gen_rtx_GT (mode, cc_reg, const0_rtx); + rtx_insn *insn; + rtx l_unordered = gen_label_rtx (); + rtx l_eq = gen_label_rtx (); + rtx l_gt = gen_label_rtx (); + rtx l_end = gen_label_rtx (); + + s390_emit_compare (VOIDmode, LTGT, op1, op2); + if (!flag_finite_math_only) + { + insn = s390_emit_jump (l_unordered, cond_unordered); + add_reg_br_prob_note (insn, profile_probability::very_unlikely ()); + } + insn = s390_emit_jump (l_eq, cond_eq); + add_reg_br_prob_note (insn, profile_probability::unlikely ()); + insn = s390_emit_jump (l_gt, cond_gt); + add_reg_br_prob_note (insn, profile_probability::even ()); + emit_move_insn (op0, constm1_rtx); + emit_jump (l_end); + emit_label (l_eq); + emit_move_insn (op0, const0_rtx); + emit_jump (l_end); + emit_label (l_gt); + emit_move_insn (op0, const1_rtx); + if (!flag_finite_math_only) + { + emit_jump (l_end); + emit_label (l_unordered); + rtx unord_val = op3 == const0_rtx ? const2_rtx : op3; + emit_move_insn (op0, unord_val); + } + emit_label (l_end); +} + /* This is called from dwarf2out.cc via TARGET_ASM_OUTPUT_DWARF_DTPREL. We need to emit DTP-relative relocations. */ diff --git a/gcc/config/s390/s390.md b/gcc/config/s390/s390.md index 1edbfde..8cc48b0 100644 --- a/gcc/config/s390/s390.md +++ b/gcc/config/s390/s390.md @@ -1527,6 +1527,27 @@ operands[0] = SET_DEST (PATTERN (curr_insn)); }) +; Restrict spaceship optab to z13 or later since there we have +; LOAD HALFWORD IMMEDIATE ON CONDITION. + +(define_mode_iterator SPACESHIP_INT [(TI "TARGET_VX") DI SI HI QI]) +(define_expand "spaceship<mode>4" + [(match_operand:SI 0 "register_operand") + (match_operand:SPACESHIP_INT 1 "register_operand") + (match_operand:SPACESHIP_INT 2 "register_operand") + (match_operand:SI 3 "const_int_operand")] + "TARGET_Z13 && TARGET_64BIT" + "s390_expand_int_spaceship (operands[0], operands[1], operands[2], operands[3]); DONE;") + +(define_mode_iterator SPACESHIP_BFP [TF DF SF]) +(define_expand "spaceship<mode>4" + [(match_operand:SI 0 "register_operand") + (match_operand:SPACESHIP_BFP 1 "register_operand") + (match_operand:SPACESHIP_BFP 2 "register_operand") + (match_operand:SI 3 "const_int_operand")] + "TARGET_Z13 && TARGET_64BIT && TARGET_HARD_FLOAT" + "s390_expand_fp_spaceship (operands[0], operands[1], operands[2], operands[3]); DONE;") + ; (TF|DF|SF|TD|DD|SD) instructions diff --git a/gcc/cp/ChangeLog b/gcc/cp/ChangeLog index 08ec840..3f76afd 100644 --- a/gcc/cp/ChangeLog +++ b/gcc/cp/ChangeLog @@ -1,3 +1,76 @@ +2025-08-01 Nathaniel Shead <nathanieloshead@gmail.com> + + PR c++/108080 + * module.cc (trees_out::core_vals): Warn when streaming + target/optimize node; adjust comments. + (trees_in::core_vals): Don't stream a target/optimize node. + +2025-08-01 Nathaniel Shead <nathanieloshead@gmail.com> + + PR c++/121238 + * module.cc (trees_in::fn_parms_fini): Merge properties for + definitions. + +2025-07-31 Jason Merrill <jason@redhat.com> + + PR c++/120800 + * constexpr.cc (cxx_eval_vec_init_1): Suppress access control. + +2025-07-31 Marek Polacek <polacek@redhat.com> + + PR c++/120775 + * constexpr.cc (cxx_eval_outermost_constant_expr): Use + extract_call_expr. + * cp-tree.h (CONSTEVAL_BLOCK_P, LAMBDA_EXPR_CONSTEVAL_BLOCK_P): Define. + (finish_static_assert): Adjust declaration. + (current_nonlambda_function): Likewise. + * lambda.cc (current_nonlambda_function): New parameter. Only keep + iterating if the function represents a consteval block. + * parser.cc (cp_parser_lambda_expression): New parameter for + consteval blocks. Use it. Set LAMBDA_EXPR_CONSTEVAL_BLOCK_P. + (cp_parser_lambda_declarator_opt): Likewise. + (build_empty_string): New. + (cp_parser_next_tokens_are_consteval_block_p): New. + (cp_parser_consteval_block): New. + (cp_parser_block_declaration): Handle consteval blocks. + (cp_parser_static_assert): Use build_empty_string. + (cp_parser_member_declaration): Handle consteval blocks. + * pt.cc (tsubst_stmt): Adjust a call to finish_static_assert. + * semantics.cc (finish_fname): Warn for consteval blocks. + (finish_static_assert): New parameter for consteval blocks. Set + CONSTEVAL_BLOCK_P. Evaluate consteval blocks specially. + +2025-07-30 Nathaniel Shead <nathanieloshead@gmail.com> + + PR c++/121291 + * constraint.cc (diagnose_trait_expr): Remove assumption about + failures returning error_mark_node. + * except.cc (explain_not_noexcept): Allow expr not being + noexcept. + * method.cc (build_invoke): Adjust comment. + (is_trivially_xible): Always note non-trivial components if expr + is not null or error_mark_node. + (is_nothrow_xible): Likewise for non-noexcept components. + (is_nothrow_convertible): Likewise. + +2025-07-30 Jason Merrill <jason@redhat.com> + + * pt.cc (convert_nontype_argument_function): Check + cxx_constant_value on failure. + (invalid_tparm_referent_p): Likewise. + +2025-07-30 Jakub Jelinek <jakub@redhat.com> + + PR c++/121133 + * parser.cc (cp_parser_unary_expression): Adjust + cp_parser_extension_opt caller and restore warn_long_long. + (cp_parser_declaration): Likewise. + (cp_parser_block_declaration): Likewise. + (cp_parser_member_declaration): Likewise. + (cp_parser_extension_opt): Add SAVED_LONG_LONG argument, + save previous warn_long_long state into it and clear it + for __extension__. + 2025-07-27 Nathaniel Shead <nathanieloshead@gmail.com> * cp-tree.h (struct lang_type): Add comment mentioning modules. diff --git a/gcc/cp/constexpr.cc b/gcc/cp/constexpr.cc index f92beb1..be24ae2 100644 --- a/gcc/cp/constexpr.cc +++ b/gcc/cp/constexpr.cc @@ -6598,6 +6598,9 @@ cxx_eval_vec_init_1 (const constexpr_ctx *ctx, tree atype, tree init, return cxx_eval_bare_aggregate (ctx, init, lval, non_constant_p, overflow_p, jump_target); + /* We already checked access when building the VEC_INIT_EXPR. */ + deferring_access_check_sentinel acs (dk_deferred); + /* For the default constructor, build up a call to the default constructor of the element type. We only need to handle class types here, as for a constructor to be constexpr, all members must be @@ -10329,11 +10332,14 @@ cxx_eval_outermost_constant_expr (tree t, bool allow_non_constant, { if (cxx_dialect < cxx20) return t; - if (TREE_CODE (t) != CALL_EXPR && TREE_CODE (t) != AGGR_INIT_EXPR) + /* We could have a COMPOUND_EXPR here coming from + keep_unused_object_arg. */ + tree x = extract_call_expr (t); + if (x == NULL_TREE || x == error_mark_node) return t; /* Calls to immediate functions returning void need to be evaluated. */ - tree fndecl = cp_get_callee_fndecl_nofold (t); + tree fndecl = cp_get_callee_fndecl_nofold (x); if (fndecl == NULL_TREE || !DECL_IMMEDIATE_FUNCTION_P (fndecl)) return t; else diff --git a/gcc/cp/constraint.cc b/gcc/cp/constraint.cc index d4a83e4..cbdfafc 100644 --- a/gcc/cp/constraint.cc +++ b/gcc/cp/constraint.cc @@ -3176,8 +3176,7 @@ diagnose_trait_expr (location_t loc, tree expr, tree args) inform (loc, "%qT is not invocable, because", t1); else inform (loc, "%qT is not invocable by %qT, because", t1, t2); - tree call = build_invoke (t1, t2, tf_error); - gcc_assert (call == error_mark_node); + build_invoke (t1, t2, tf_error); } break; case CPTK_IS_LAYOUT_COMPATIBLE: diff --git a/gcc/cp/cp-tree.h b/gcc/cp/cp-tree.h index 0ac3ecb..fb8e0d8 100644 --- a/gcc/cp/cp-tree.h +++ b/gcc/cp/cp-tree.h @@ -453,6 +453,8 @@ extern GTY(()) tree cp_global_trees[CPTI_MAX]; RETURN_EXPR_LOCAL_ADDR_P (in RETURN_EXPR) PACK_INDEX_PARENTHESIZED_P (in PACK_INDEX_*) MUST_NOT_THROW_NOEXCEPT_P (in MUST_NOT_THROW_EXPR) + CONSTEVAL_BLOCK_P (in STATIC_ASSERT) + LAMBDA_EXPR_CONSTEVAL_BLOCK_P (in LAMBDA_EXPR) 1: IDENTIFIER_KIND_BIT_1 (in IDENTIFIER_NODE) TI_PENDING_TEMPLATE_FLAG. TEMPLATE_PARMS_FOR_INLINE. @@ -1433,6 +1435,10 @@ struct GTY (()) tree_deferred_noexcept { #define STATIC_ASSERT_SOURCE_LOCATION(NODE) \ (((struct tree_static_assert *)STATIC_ASSERT_CHECK (NODE))->location) +/* True if this static assert represents a C++26 consteval block. */ +#define CONSTEVAL_BLOCK_P(NODE) \ + TREE_LANG_FLAG_0 (STATIC_ASSERT_CHECK (NODE)) + struct GTY (()) tree_static_assert { struct tree_base base; tree condition; @@ -1547,6 +1553,10 @@ enum cp_lambda_default_capture_mode_type { #define LAMBDA_EXPR_THIS_CAPTURE(NODE) \ (((struct tree_lambda_expr *)LAMBDA_EXPR_CHECK (NODE))->this_capture) +/* True iff this lambda was created for a consteval block. */ +#define LAMBDA_EXPR_CONSTEVAL_BLOCK_P(NODE) \ + TREE_LANG_FLAG_0 (LAMBDA_EXPR_CHECK (NODE)) + /* True iff uses of a const variable capture were optimized away. */ #define LAMBDA_EXPR_CAPTURE_OPTIMIZED(NODE) \ TREE_LANG_FLAG_2 (LAMBDA_EXPR_CHECK (NODE)) @@ -8243,7 +8253,7 @@ extern bool cxx_omp_create_clause_info (tree, tree, bool, bool, bool, bool); extern tree baselink_for_fns (tree); extern void finish_static_assert (tree, tree, location_t, - bool, bool); + bool, bool, bool = false); extern tree finish_decltype_type (tree, bool, tsubst_flags_t); extern tree fold_builtin_is_corresponding_member (location_t, int, tree *); extern tree fold_builtin_is_pointer_inverconvertible_with_class (location_t, int, tree *); @@ -8268,7 +8278,7 @@ extern void register_capture_members (tree); extern tree lambda_expr_this_capture (tree, int); extern void maybe_generic_this_capture (tree, tree); extern tree maybe_resolve_dummy (tree, bool); -extern tree current_nonlambda_function (void); +extern tree current_nonlambda_function (bool = false); extern tree nonlambda_method_basetype (void); extern tree current_nonlambda_scope (bool = false); extern tree current_lambda_expr (void); diff --git a/gcc/cp/except.cc b/gcc/cp/except.cc index 2c1ef4c..204769f 100644 --- a/gcc/cp/except.cc +++ b/gcc/cp/except.cc @@ -1218,13 +1218,15 @@ expr_noexcept_p (tree expr, tsubst_flags_t complain) return true; } -/* Explain why EXPR is not noexcept. */ +/* If EXPR is not noexcept, explain why. */ -void explain_not_noexcept (tree expr) +void +explain_not_noexcept (tree expr) { tree fn = cp_walk_tree_without_duplicates (&expr, check_noexcept_r, 0); - gcc_assert (fn); - if (DECL_P (fn)) + if (!fn) + /* The call was noexcept, nothing to do. */; + else if (DECL_P (fn)) inform (DECL_SOURCE_LOCATION (fn), "%qD is not %<noexcept%>", fn); else inform (location_of (fn), "%qT is not %<noexcept%>", TREE_TYPE (fn)); diff --git a/gcc/cp/lambda.cc b/gcc/cp/lambda.cc index ecf55eb..c798967 100644 --- a/gcc/cp/lambda.cc +++ b/gcc/cp/lambda.cc @@ -1038,13 +1038,19 @@ maybe_generic_this_capture (tree object, tree fns) } } -/* Returns the innermost non-lambda function. */ +/* Returns the innermost non-lambda function. If ONLY_SKIP_CONSTEVAL_BLOCK_P, + we only skip lambda functions that represent consteval blocks. */ tree -current_nonlambda_function (void) +current_nonlambda_function (bool only_skip_consteval_block_p/*=false*/) { tree fn = current_function_decl; - while (fn && LAMBDA_FUNCTION_P (fn)) + tree lam; + while (fn && LAMBDA_FUNCTION_P (fn) + && (!only_skip_consteval_block_p + /* Only keep going if FN represents a consteval block. */ + || ((lam = CLASSTYPE_LAMBDA_EXPR (CP_DECL_CONTEXT (fn))) + && LAMBDA_EXPR_CONSTEVAL_BLOCK_P (lam)))) fn = decl_function_context (fn); return fn; } diff --git a/gcc/cp/method.cc b/gcc/cp/method.cc index 62f8d80..397e496 100644 --- a/gcc/cp/method.cc +++ b/gcc/cp/method.cc @@ -1952,7 +1952,8 @@ build_trait_object (tree type, tsubst_flags_t complain) } /* [func.require] Build an expression of INVOKE(FN_TYPE, ARG_TYPES...). If the - given is not invocable, returns error_mark_node. */ + given is not invocable, returns error_mark_node, unless COMPLAIN includes + tf_error. */ tree build_invoke (tree fn_type, const_tree arg_types, tsubst_flags_t complain) @@ -2460,21 +2461,13 @@ bool is_trivially_xible (enum tree_code code, tree to, tree from, bool explain/*=false*/) { - /* In some cases, when producing errors is_xible_helper may not return - error_mark_node, so check if it looks like we've already emitted any - diagnostics to ensure we don't do so multiple times. */ - int errs = errorcount + sorrycount; - tree expr = is_xible_helper (code, to, from, explain); if (expr == NULL_TREE || expr == error_mark_node) return false; tree nt = cp_walk_tree_without_duplicates (&expr, check_nontriv, NULL); - if (explain && errs == (errorcount + sorrycount)) - { - gcc_assert (nt); - inform (location_of (nt), "%qE is non-trivial", nt); - } + if (explain && nt) + inform (location_of (nt), "%qE is non-trivial", nt); return !nt; } @@ -2487,9 +2480,6 @@ bool is_nothrow_xible (enum tree_code code, tree to, tree from, bool explain/*=false*/) { - /* As with is_trivially_xible. */ - int errs = errorcount + sorrycount; - ++cp_noexcept_operand; tree expr = is_xible_helper (code, to, from, explain); --cp_noexcept_operand; @@ -2497,11 +2487,8 @@ is_nothrow_xible (enum tree_code code, tree to, tree from, return false; bool is_noexcept = expr_noexcept_p (expr, tf_none); - if (explain && errs == (errorcount + sorrycount)) - { - gcc_assert (!is_noexcept); - explain_not_noexcept (expr); - } + if (explain && !is_noexcept) + explain_not_noexcept (expr); return is_noexcept; } @@ -2601,12 +2588,10 @@ is_nothrow_convertible (tree from, tree to, bool explain/*=false*/) tree expr = is_convertible_helper (from, to, explain); if (expr == NULL_TREE || expr == error_mark_node) return false; + bool is_noexcept = expr_noexcept_p (expr, tf_none); - if (explain) - { - gcc_assert (!is_noexcept); - explain_not_noexcept (expr); - } + if (explain && !is_noexcept) + explain_not_noexcept (expr); return is_noexcept; } diff --git a/gcc/cp/module.cc b/gcc/cp/module.cc index 2f6a8ab..9412f78 100644 --- a/gcc/cp/module.cc +++ b/gcc/cp/module.cc @@ -6560,8 +6560,14 @@ trees_out::core_vals (tree t) } WT (t->function_decl.personality); - WT (t->function_decl.function_specific_target); - WT (t->function_decl.function_specific_optimization); + /* Rather than streaming target/optimize nodes, we should reconstruct + them on stream-in from any attributes applied to the function. */ + if (streaming_p () && t->function_decl.function_specific_target) + warning_at (DECL_SOURCE_LOCATION (t), 0, + "%<target%> attribute currently unsupported in modules"); + if (streaming_p () && t->function_decl.function_specific_optimization) + warning_at (DECL_SOURCE_LOCATION (t), 0, + "%<optimize%> attribute currently unsupported in modules"); WT (t->function_decl.vindex); if (DECL_HAS_DEPENDENT_EXPLICIT_SPEC_P (t)) @@ -6651,11 +6657,12 @@ trees_out::core_vals (tree t) case TARGET_OPTION_NODE: // FIXME: Our representation for these two nodes is a cache of // the resulting set of options. Not a record of the options - // that got changed by a particular attribute or pragma. Should - // we record that, or should we record the diff from the command - // line options? The latter seems the right behaviour, but is - // (a) harder, and I guess could introduce strangeness if the - // importer has set some incompatible set of optimization flags? + // that got changed by a particular attribute or pragma. Instead + // of recording that, we probably should just rebuild the options + // on stream-in from the function attributes. This could introduce + // strangeness if the importer has some incompatible set of flags + // but we currently assume users "know what they're doing" in such + // a case anyway. gcc_unreachable (); break; @@ -7114,8 +7121,10 @@ trees_in::core_vals (tree t) } RT (t->function_decl.personality); - RT (t->function_decl.function_specific_target); - RT (t->function_decl.function_specific_optimization); + /* These properties are not streamed, and should be reconstructed + from any function attributes. */ + // t->function_decl.function_specific_target); + // t->function_decl.function_specific_optimization); RT (t->function_decl.vindex); if (DECL_HAS_DEPENDENT_EXPLICIT_SPEC_P (t)) @@ -7221,7 +7230,7 @@ trees_in::core_vals (tree t) case OPTIMIZATION_NODE: case TARGET_OPTION_NODE: - /* Not yet implemented, see trees_out::core_vals. */ + /* Not implemented, see trees_out::core_vals. */ gcc_unreachable (); break; @@ -11164,6 +11173,20 @@ trees_in::fn_parms_fini (int tag, tree fn, tree existing, bool is_defn) names of the parms from us. */ DECL_NAME (existing_parm) = DECL_NAME (parm); DECL_SOURCE_LOCATION (existing_parm) = DECL_SOURCE_LOCATION (parm); + + /* And some other flags important for codegen are only set + by the definition. */ + TREE_ADDRESSABLE (existing_parm) = TREE_ADDRESSABLE (parm); + DECL_BY_REFERENCE (existing_parm) = DECL_BY_REFERENCE (parm); + DECL_NONLOCAL (existing_parm) = DECL_NONLOCAL (parm); + DECL_ARG_TYPE (existing_parm) = DECL_ARG_TYPE (parm); + + /* Invisiref parms had their types adjusted by cp_genericize. */ + if (DECL_BY_REFERENCE (parm)) + { + TREE_TYPE (existing_parm) = TREE_TYPE (parm); + relayout_decl (existing_parm); + } } back_refs[~tag] = existing_parm; diff --git a/gcc/cp/parser.cc b/gcc/cp/parser.cc index 9e9cd9b..860f3f0 100644 --- a/gcc/cp/parser.cc +++ b/gcc/cp/parser.cc @@ -2576,11 +2576,11 @@ static cp_expr cp_parser_constant_expression static cp_expr cp_parser_builtin_offsetof (cp_parser *); static cp_expr cp_parser_lambda_expression - (cp_parser *); + (cp_parser *, bool = false); static void cp_parser_lambda_introducer (cp_parser *, tree); static bool cp_parser_lambda_declarator_opt - (cp_parser *, tree); + (cp_parser *, tree, bool = false); static void cp_parser_lambda_body (cp_parser *, tree); @@ -2921,7 +2921,7 @@ static size_t cp_parser_skip_std_attribute_spec_seq static size_t cp_parser_skip_attributes_opt (cp_parser *, size_t); static bool cp_parser_extension_opt - (cp_parser *, int *); + (cp_parser *, int *, int *); static void cp_parser_label_declaration (cp_parser *); @@ -9504,11 +9504,12 @@ cp_parser_unary_expression (cp_parser *parser, cp_id_kind * pidk, case RID_EXTENSION: { /* The saved value of the PEDANTIC flag. */ - int saved_pedantic; + int saved_pedantic, saved_long_long; tree expr; /* Save away the PEDANTIC flag. */ - cp_parser_extension_opt (parser, &saved_pedantic); + cp_parser_extension_opt (parser, &saved_pedantic, + &saved_long_long); /* Also suppress -Wconditionally-supported. */ diagnostic_push_diagnostics (global_dc, input_location); diagnostic_classify_diagnostic @@ -9519,6 +9520,7 @@ cp_parser_unary_expression (cp_parser *parser, cp_id_kind * pidk, /* Restore the PEDANTIC flag. */ diagnostic_pop_diagnostics (global_dc, input_location); pedantic = saved_pedantic; + warn_long_long = saved_long_long; return expr; } @@ -11742,10 +11744,14 @@ cp_parser_trait (cp_parser* parser, const cp_trait* trait) lambda-introducer < template-parameter-list > requires-clause [opt] lambda-declarator [opt] compound-statement + If CONSTEVAL_BLOCK_P is true, we are parsing a consteval block, which + is syntactic sugar for a consteval lambda. + Returns a representation of the expression. */ static cp_expr -cp_parser_lambda_expression (cp_parser* parser) +cp_parser_lambda_expression (cp_parser* parser, + bool consteval_block_p/*=false*/) { tree lambda_expr = build_lambda_expr (); tree type; @@ -11754,6 +11760,7 @@ cp_parser_lambda_expression (cp_parser* parser) cp_token_position start = 0; LAMBDA_EXPR_LOCATION (lambda_expr) = token->location; + LAMBDA_EXPR_CONSTEVAL_BLOCK_P (lambda_expr) = consteval_block_p; if (cxx_dialect >= cxx20) { @@ -11797,9 +11804,12 @@ cp_parser_lambda_expression (cp_parser* parser) it now. */ push_deferring_access_checks (dk_no_deferred); - cp_parser_lambda_introducer (parser, lambda_expr); - if (cp_parser_error_occurred (parser)) - return error_mark_node; + if (!consteval_block_p) + { + cp_parser_lambda_introducer (parser, lambda_expr); + if (cp_parser_error_occurred (parser)) + return error_mark_node; + } { /* OK, this is a bit tricksy. cp_parser_requires_expression sets @@ -11871,7 +11881,8 @@ cp_parser_lambda_expression (cp_parser* parser) if (cp_parser_start_tentative_firewall (parser)) start = token; - ok &= cp_parser_lambda_declarator_opt (parser, lambda_expr); + ok &= cp_parser_lambda_declarator_opt (parser, lambda_expr, + consteval_block_p); if (ok && cp_parser_error_occurred (parser)) ok = false; @@ -12254,10 +12265,13 @@ cp_parser_lambda_introducer (cp_parser* parser, tree lambda_expr) decl-specifier-seq [opt] noexcept-specifier [opt] attribute-specifier-seq [opt] trailing-return-type [opt] - LAMBDA_EXPR is the current representation of the lambda expression. */ + LAMBDA_EXPR is the current representation of the lambda expression. + If CONSTEVAL_BLOCK_P is true, we are parsing a consteval block, which + is syntactic sugar for a consteval lambda. */ static bool -cp_parser_lambda_declarator_opt (cp_parser* parser, tree lambda_expr) +cp_parser_lambda_declarator_opt (cp_parser* parser, tree lambda_expr, + bool consteval_block_p/*=false*/) { /* 5.1.1.4 of the standard says: If a lambda-expression does not include a lambda-declarator, it is as if @@ -12360,6 +12374,18 @@ cp_parser_lambda_declarator_opt (cp_parser* parser, tree lambda_expr) CP_PARSER_FLAGS_ONLY_MUTABLE_OR_CONSTEXPR, &lambda_specs, &declares_class_or_enum); + /* [dcl.pre] For a consteval-block-declaration D, the expression E + corresponding to D is: + [] -> void static consteval compound-statement () + Make it so. */ + if (consteval_block_p) + { + return_type = void_type_node; + lambda_specs.storage_class = sc_static; + set_and_check_decl_spec_loc (&lambda_specs, ds_consteval, + cp_lexer_peek_token (parser->lexer)); + } + if (omitted_parms_loc && lambda_specs.any_specifiers_p) { pedwarn (omitted_parms_loc, OPT_Wc__23_extensions, @@ -16047,15 +16073,16 @@ cp_parser_declaration_seq_opt (cp_parser* parser) static void cp_parser_declaration (cp_parser* parser, tree prefix_attrs) { - int saved_pedantic; + int saved_pedantic, saved_long_long; /* Check for the `__extension__' keyword. */ - if (cp_parser_extension_opt (parser, &saved_pedantic)) + if (cp_parser_extension_opt (parser, &saved_pedantic, &saved_long_long)) { /* Parse the qualified declaration. */ cp_parser_declaration (parser, prefix_attrs); /* Restore the PEDANTIC flag. */ pedantic = saved_pedantic; + warn_long_long = saved_long_long; return; } @@ -16297,6 +16324,56 @@ cp_parser_toplevel_declaration (cp_parser* parser) cp_parser_declaration (parser, NULL_TREE); } +/* Build an empty string for static_assert. */ + +static tree +build_empty_string () +{ + tree message = build_string (1, ""); + TREE_TYPE (message) = char_array_type_node; + fix_string_type (message); + return message; +} + +/* Return true iff the next tokens start a C++26 consteval block. */ + +static bool +cp_parser_next_tokens_are_consteval_block_p (cp_parser *parser) +{ + return (cxx_dialect >= cxx26 + && cp_lexer_next_token_is_keyword (parser->lexer, RID_CONSTEVAL) + && cp_lexer_nth_token_is (parser->lexer, 2, CPP_OPEN_BRACE)); +} + +/* Parse a consteval-block-declaration. + + consteval-block-declaration: + consteval compound-statement + + If MEMBER_P, this consteval block is a member declaration. */ + +static void +cp_parser_consteval_block (cp_parser *parser, bool member_p) +{ + const location_t loc = cp_lexer_peek_token (parser->lexer)->location; + /* Consume the 'consteval'. */ + cp_lexer_consume_token (parser->lexer); + + /* We know the next token is '{'. Let cp_parser_lambda_body handle it. */ + cp_expr lam = cp_parser_lambda_expression (parser, + /*consteval_block_p=*/true); + if (!cp_parser_error_occurred (parser)) + { + releasing_vec args; + tree call = finish_call_expr (lam, &args, + /*disallow_virtual=*/false, + /*koenig_p=*/false, + tf_warning_or_error); + finish_static_assert (call, build_empty_string (), loc, member_p, + /*show_expr_p=*/false, /*consteval_block_p=*/true); + } +} + /* Parse a block-declaration. block-declaration: @@ -16304,18 +16381,18 @@ cp_parser_toplevel_declaration (cp_parser* parser) asm-definition namespace-alias-definition using-declaration + using-enum-declaration using-directive + static_assert-declaration + consteval-block-declaration + alias-declaration + opaque-enum-declaration GNU Extension: block-declaration: __extension__ block-declaration - C++0x Extension: - - block-declaration: - static_assert-declaration - If STATEMENT_P is TRUE, then this block-declaration is occurring as part of a declaration-statement. */ @@ -16323,15 +16400,16 @@ static void cp_parser_block_declaration (cp_parser *parser, bool statement_p) { - int saved_pedantic; + int saved_pedantic, saved_long_long; /* Check for the `__extension__' keyword. */ - if (cp_parser_extension_opt (parser, &saved_pedantic)) + if (cp_parser_extension_opt (parser, &saved_pedantic, &saved_long_long)) { /* Parse the qualified declaration. */ cp_parser_block_declaration (parser, statement_p); /* Restore the PEDANTIC flag. */ pedantic = saved_pedantic; + warn_long_long = saved_long_long; return; } @@ -16392,6 +16470,8 @@ cp_parser_block_declaration (cp_parser *parser, /* If the next token is `static_assert' we have a static assertion. */ else if (token1->keyword == RID_STATIC_ASSERT) cp_parser_static_assert (parser, /*member_p=*/false); + else if (cp_parser_next_tokens_are_consteval_block_p (parser)) + cp_parser_consteval_block (parser, /*member_p=*/false); else { size_t attr_idx = cp_parser_skip_std_attribute_spec_seq (parser, 1); @@ -17695,9 +17775,7 @@ cp_parser_static_assert (cp_parser *parser, bool member_p) "only available with %<-std=c++17%> or %<-std=gnu++17%>"); /* Eat the ')' */ cp_lexer_consume_token (parser->lexer); - message = build_string (1, ""); - TREE_TYPE (message) = char_array_type_node; - fix_string_type (message); + message = build_empty_string (); } else { @@ -28827,12 +28905,20 @@ cp_parser_member_specification_opt (cp_parser* parser) /* Parse a member-declaration. member-declaration: - decl-specifier-seq [opt] member-declarator-list [opt] ; - function-definition ; [opt] - :: [opt] nested-name-specifier template [opt] unqualified-id ; + attribute-specifier-seq [opt] decl-specifier-seq [opt] + member-declarator-list [opt] ; + function-definition + friend-type-declaration using-declaration + using-enum-declaration + static_assert-declaration + consteval-block-declaration template-declaration + explicit-specialization + deduction-guide alias-declaration + opaque-enum-declaration + empty-declaration member-declarator-list: member-declarator @@ -28851,12 +28937,7 @@ cp_parser_member_specification_opt (cp_parser* parser) member-declarator: declarator attributes [opt] pure-specifier [opt] declarator attributes [opt] constant-initializer [opt] - identifier [opt] attributes [opt] : constant-expression - - C++0x Extensions: - - member-declaration: - static_assert-declaration */ + identifier [opt] attributes [opt] : constant-expression */ static void cp_parser_member_declaration (cp_parser* parser) @@ -28869,16 +28950,17 @@ cp_parser_member_declaration (cp_parser* parser) cp_token *token = NULL; cp_token *decl_spec_token_start = NULL; cp_token *initializer_token_start = NULL; - int saved_pedantic; + int saved_pedantic, saved_long_long; bool saved_colon_corrects_to_scope_p = parser->colon_corrects_to_scope_p; /* Check for the `__extension__' keyword. */ - if (cp_parser_extension_opt (parser, &saved_pedantic)) + if (cp_parser_extension_opt (parser, &saved_pedantic, &saved_long_long)) { /* Recurse. */ cp_parser_member_declaration (parser); /* Restore the old value of the PEDANTIC flag. */ pedantic = saved_pedantic; + warn_long_long = saved_long_long; return; } @@ -28955,6 +29037,12 @@ cp_parser_member_declaration (cp_parser* parser) return; } + if (cp_parser_next_tokens_are_consteval_block_p (parser)) + { + cp_parser_consteval_block (parser, /*member_p=*/true); + return; + } + parser->colon_corrects_to_scope_p = false; cp_omp_declare_simd_data odsd; @@ -32020,13 +32108,16 @@ cp_parser_skip_attributes_opt (cp_parser *parser, size_t n) present, and FALSE otherwise. *SAVED_PEDANTIC is set to the current value of the PEDANTIC flag, regardless of whether or not the `__extension__' keyword is present. The caller is responsible - for restoring the value of the PEDANTIC flag. */ + for restoring the value of the PEDANTIC flag. Similarly *SAVED_LONG_LONG + for warn_long_long flag. */ static bool -cp_parser_extension_opt (cp_parser* parser, int* saved_pedantic) +cp_parser_extension_opt (cp_parser *parser, int *saved_pedantic, + int *saved_long_long) { /* Save the old value of the PEDANTIC flag. */ *saved_pedantic = pedantic; + *saved_long_long = warn_long_long; if (cp_lexer_next_token_is_keyword (parser->lexer, RID_EXTENSION)) { @@ -32035,6 +32126,8 @@ cp_parser_extension_opt (cp_parser* parser, int* saved_pedantic) /* We're not being pedantic while the `__extension__' keyword is in effect. */ pedantic = 0; + /* And we don't want -Wlong-long warning. */ + warn_long_long = 0; return true; } diff --git a/gcc/cp/pt.cc b/gcc/cp/pt.cc index 71ae764..acfeb81 100644 --- a/gcc/cp/pt.cc +++ b/gcc/cp/pt.cc @@ -6952,14 +6952,22 @@ convert_nontype_argument_function (tree type, tree expr, { auto_diagnostic_group d; location_t loc = cp_expr_loc_or_input_loc (expr); - error_at (loc, "%qE is not a valid template argument for type %qT", - expr, type); - if (TYPE_PTR_P (type)) - inform (loc, "it must be the address of a function " - "with external linkage"); + tree c; + if (cxx_dialect >= cxx17 + && (c = cxx_constant_value (fn), + c == error_mark_node)) + ; else - inform (loc, "it must be the name of a function with " - "external linkage"); + { + error_at (loc, "%qE is not a valid template argument for " + "type %qT", expr, type); + if (TYPE_PTR_P (type)) + inform (loc, "it must be the address of a function " + "with external linkage"); + else + inform (loc, "it must be the name of a function with " + "external linkage"); + } } return NULL_TREE; } @@ -7402,22 +7410,22 @@ invalid_tparm_referent_p (tree type, tree expr, tsubst_flags_t complain) /* Null pointer values are OK in C++11. */; else { - if (VAR_P (expr)) - { - if (complain & tf_error) - error ("%qD is not a valid template argument " - "because %qD is a variable, not the address of " - "a variable", expr, expr); - return true; - } + tree c; + if (!(complain & tf_error)) + ; + else if (cxx_dialect >= cxx17 + && (c = cxx_constant_value (expr), + c == error_mark_node)) + ; + else if (VAR_P (expr)) + error ("%qD is not a valid template argument " + "because %qD is a variable, not the address of " + "a variable", expr, expr); else - { - if (complain & tf_error) - error ("%qE is not a valid template argument for %qT " - "because it is not the address of a variable", - expr, type); - return true; - } + error ("%qE is not a valid template argument for %qT " + "because it is not the address of a variable", + expr, type); + return true; } } return false; @@ -19593,7 +19601,8 @@ tsubst_stmt (tree t, tree args, tsubst_flags_t complain, tree in_decl) finish_static_assert (condition, message, STATIC_ASSERT_SOURCE_LOCATION (t), - /*member_p=*/false, /*show_expr_p=*/true); + /*member_p=*/false, /*show_expr_p=*/true, + CONSTEVAL_BLOCK_P (t)); } break; diff --git a/gcc/cp/semantics.cc b/gcc/cp/semantics.cc index 52ecac4..be79b50 100644 --- a/gcc/cp/semantics.cc +++ b/gcc/cp/semantics.cc @@ -3992,9 +3992,15 @@ finish_compound_literal (tree type, tree compound_literal, tree finish_fname (tree id) { - tree decl; - - decl = fname_decl (input_location, C_RID_CODE (id), id); + tree decl = fname_decl (input_location, C_RID_CODE (id), id); + /* [expr.prim.lambda.closure]/16 "Unless the compound-statement is that + of a consteval-block-declaration, a variable __func__ is implicitly + defined...". We could be in a consteval block in a function, though, + and then we shouldn't warn. */ + if (current_function_decl + && !current_nonlambda_function (/*only_skip_consteval_block_p=*/true)) + pedwarn (input_location, 0, "%qD is not defined outside of function scope", + decl); if (processing_template_decl && current_function_decl && decl != error_mark_node) decl = DECL_NAME (decl); @@ -12598,11 +12604,14 @@ cexpr_str::extract (location_t location, const char * & msg, int &len) CONDITION and the message text MESSAGE. LOCATION is the location of the static assertion in the source code. When MEMBER_P, this static assertion is a member of a class. If SHOW_EXPR_P is true, - print the condition (because it was instantiation-dependent). */ + print the condition (because it was instantiation-dependent). + If CONSTEVAL_BLOCK_P is true, this static assertion represents + a consteval block. */ void finish_static_assert (tree condition, tree message, location_t location, - bool member_p, bool show_expr_p) + bool member_p, bool show_expr_p, + bool consteval_block_p/*=false*/) { tsubst_flags_t complain = tf_warning_or_error; @@ -12630,6 +12639,7 @@ finish_static_assert (tree condition, tree message, location_t location, STATIC_ASSERT_CONDITION (assertion) = orig_condition; STATIC_ASSERT_MESSAGE (assertion) = cstr.message; STATIC_ASSERT_SOURCE_LOCATION (assertion) = location; + CONSTEVAL_BLOCK_P (assertion) = consteval_block_p; if (member_p) maybe_add_class_template_decl_list (current_class_type, @@ -12641,6 +12651,13 @@ finish_static_assert (tree condition, tree message, location_t location, return; } + /* Evaluate the consteval { }. This must be done only once. */ + if (consteval_block_p) + { + cxx_constant_value (condition); + return; + } + /* Fold the expression and convert it to a boolean value. */ condition = contextual_conv_bool (condition, complain); condition = fold_non_dependent_expr (condition, complain, diff --git a/gcc/cprop.cc b/gcc/cprop.cc index bc72e64..dfe3462 100644 --- a/gcc/cprop.cc +++ b/gcc/cprop.cc @@ -1525,6 +1525,7 @@ static bool bypass_block (basic_block bb, rtx_insn *setcc, rtx_insn *jump) { rtx_insn *insn; + rtx setcc_src, setcc_dest; rtx note; edge e, edest; bool change; @@ -1533,7 +1534,19 @@ bypass_block (basic_block bb, rtx_insn *setcc, rtx_insn *jump) unsigned i; edge_iterator ei; - insn = (setcc != NULL) ? setcc : jump; + if (setcc != NULL) + { + rtx set = single_set (setcc); + setcc_dest = SET_DEST (set); + setcc_src = SET_SRC (set); + insn = setcc; + } + else + { + setcc_dest = NULL; + setcc_src = NULL; + insn = jump; + } /* Determine set of register uses in INSN. */ reg_use_count = 0; @@ -1608,9 +1621,7 @@ bypass_block (basic_block bb, rtx_insn *setcc, rtx_insn *jump) src = SET_SRC (pc_set (jump)); if (setcc != NULL) - src = simplify_replace_rtx (src, - SET_DEST (PATTERN (setcc)), - SET_SRC (PATTERN (setcc))); + src = simplify_replace_rtx (src, setcc_dest, setcc_src); new_rtx = simplify_replace_rtx (src, reg_used, set->src); @@ -1716,10 +1727,11 @@ bypass_conditional_jumps (void) { if (setcc) break; - if (GET_CODE (PATTERN (insn)) != SET) + rtx singleset = single_set (insn); + if (singleset == NULL_RTX) break; - dest = SET_DEST (PATTERN (insn)); + dest = SET_DEST (singleset); if (REG_P (dest)) setcc = insn; else diff --git a/gcc/diagnostics/changes.cc b/gcc/diagnostics/changes.cc index 290d602..e1caab0 100644 --- a/gcc/diagnostics/changes.cc +++ b/gcc/diagnostics/changes.cc @@ -1850,8 +1850,13 @@ run_all_tests () } } // namespace diagnostics::changes::selftest + +#endif /* CHECKING_P */ + } // namespace diagnostics::changes +#if CHECKING_P + namespace selftest { // diagnostics::selftest /* Run all of the selftests within this file. */ @@ -1863,6 +1868,7 @@ changes_cc_tests () } } // namespace selftest -} // namespace diagnostics #endif /* CHECKING_P */ + +} // namespace diagnostics diff --git a/gcc/diagnostics/context.cc b/gcc/diagnostics/context.cc index 0dbc148..85f7d2a 100644 --- a/gcc/diagnostics/context.cc +++ b/gcc/diagnostics/context.cc @@ -2130,10 +2130,11 @@ context_cc_tests () } } // namespace diagnostics::selftest -} // namespace diagnostics #endif /* #if CHECKING_P */ +} // namespace diagnostics + #if __GNUC__ >= 10 # pragma GCC diagnostic pop #endif diff --git a/gcc/diagnostics/html-sink.cc b/gcc/diagnostics/html-sink.cc index 07e7187..13d6309 100644 --- a/gcc/diagnostics/html-sink.cc +++ b/gcc/diagnostics/html-sink.cc @@ -1702,6 +1702,7 @@ html_sink_cc_tests () } } // namespace selftest -} // namespace diagnostics #endif /* CHECKING_P */ + +} // namespace diagnostics diff --git a/gcc/diagnostics/output-spec.cc b/gcc/diagnostics/output-spec.cc index 08128a9..83f128c 100644 --- a/gcc/diagnostics/output-spec.cc +++ b/gcc/diagnostics/output-spec.cc @@ -846,6 +846,7 @@ output_spec_cc_tests () } } // namespace diagnostics::selftest -} // namespace diagnostics #endif /* #if CHECKING_P */ + +} // namespace diagnostics diff --git a/gcc/diagnostics/sarif-sink.cc b/gcc/diagnostics/sarif-sink.cc index 05c0a8e..4738ae9 100644 --- a/gcc/diagnostics/sarif-sink.cc +++ b/gcc/diagnostics/sarif-sink.cc @@ -5072,6 +5072,7 @@ sarif_sink_cc_tests () } } // namespace diagnostics::selftest -} // namespace diagnostics #endif /* CHECKING_P */ + +} // namespace diagnostics diff --git a/gcc/doc/install.texi b/gcc/doc/install.texi index 09ea87a..f4cba09 100644 --- a/gcc/doc/install.texi +++ b/gcc/doc/install.texi @@ -302,7 +302,7 @@ released on 2017-05-06. The gcobol documentation is maintained as manpages using troff mdoc. GNU groff is required to convert them to PDF format. Conversion to HTML is done with mandoc, available at -@uref{http://mdocml.bsd.lv/}. +@uref{https://mandoc.bsd.lv}. Because ISO COBOL defines strict requirements for numerical precision, gcobol requires hardware with 128-bit computation instructions. This diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi index 0980230..c1e708b 100644 --- a/gcc/doc/invoke.texi +++ b/gcc/doc/invoke.texi @@ -915,7 +915,7 @@ Objective-C and Objective-C++ Dialects}. @emph{AVR Options} (@ref{AVR Options}) @gccoptlist{-mmcu=@var{mcu} -mabsdata -maccumulate-args -mcvt -mbranch-cost=@var{cost} -mfuse-add=@var{level} -mfuse-move=@var{level} --mcall-prologues -mgas-isr-prologues -mint8 -mflmap +-mfuse-move2 -mcall-prologues -mgas-isr-prologues -mint8 -mflmap -mdouble=@var{bits} -mlong-double=@var{bits} -mno-call-main -mn_flash=@var{size} -mfract-convert-truncate -mno-interrupts -mmain-is-OS_task -mrelax -mrmw -mstrict-X -mtiny-stack @@ -25110,6 +25110,10 @@ Valid values for @var{level} are in the range @code{0} @dots{} @code{23} which is a 3:2:2:2 mixed radix value. Each digit controls some aspect of the optimization. +@opindex mfuse-move2 +@item -mfuse-move2 +Run a post combine optimization pass that tries to fuse move instructions. + @opindex mstrict-X @item -mstrict-X Use address register @code{X} in a way proposed by the hardware. This means @@ -31370,31 +31374,14 @@ When the RISC-V specifications define an extension as depending on other extensions, GCC will implicitly add the dependent extensions to the enabled extension set if they weren't added explicitly. -@opindex mcpu -@item -mcpu=@var{processor-string} -Use architecture of and optimize the output for the given processor, specified -by particular CPU name. -Permissible values for this option are: @samp{mips-p8700}, @samp{sifive-e20}, -@samp{sifive-e21}, @samp{sifive-e24}, @samp{sifive-e31}, @samp{sifive-e34}, -@samp{sifive-e76}, @samp{sifive-s21}, @samp{sifive-s51}, @samp{sifive-s54}, -@samp{sifive-s76}, @samp{sifive-u54}, @samp{sifive-u74}, @samp{sifive-x280}, -@samp{sifive-xp450}, @samp{sifive-x670}, @samp{thead-c906}, @samp{tt-ascalon-d8}, -@samp{xiangshan-nanhu}, @samp{xiangshan-kunminghu}, @samp{xt-c908}, @samp{xt-c908v}, -@samp{xt-c910}, @samp{xt-c910v2}, @samp{xt-c920}, @samp{xt-c920v2}. +@include riscv-mcpu.texi Note that @option{-mcpu} does not override @option{-march} or @option{-mtune}. -@opindex mtune -@item -mtune=@var{processor-string} -Optimize the output for the given processor, specified by microarchitecture or -particular CPU name. Permissible values for this option are: -@samp{generic-ooo}, @samp{mips-p8700}, @samp{rocket}, @samp{sifive-3-series}, -@samp{sifive-5-series}, @samp{sifive-7-series}, @samp{size}, -@samp{sifive-p400-series}, @samp{sifive-p600-series}, and all valid options for -@option{-mcpu=}. +@include riscv-mtune.texi When @option{-mtune=} is not specified, use the setting from @option{-mcpu}, -the default is @samp{rocket} if both are not specified. +the default is @samp{generic} if both are not specified. The @samp{size} choice is not intended for use by end-users. This is used when @option{-Os} is specified. It overrides the instruction cost info diff --git a/gcc/doc/riscv-mcpu.texi b/gcc/doc/riscv-mcpu.texi new file mode 100644 index 0000000..6753e51 --- /dev/null +++ b/gcc/doc/riscv-mcpu.texi @@ -0,0 +1,69 @@ +@c Copyright (C) 2025 Free Software Foundation, Inc. +@c This is part of the GCC manual. +@c For copying conditions, see the file gcc/doc/include/fdl.texi. + +@c This file is generated automatically using +@c gcc/config/riscv/gen-riscv-mcpu-texi.cc from: +@c gcc/config/riscv/riscv-cores.def + +@c Please *DO NOT* edit manually. + +@samp{Core Name} + +@opindex mcpu +@item -mcpu=@var{processor-string} +Use architecture of and optimize the output for the given processor, specified +by particular CPU name. Permissible values for this option are: + + +@samp{sifive-e20}, + +@samp{sifive-e21}, + +@samp{sifive-e24}, + +@samp{sifive-e31}, + +@samp{sifive-e34}, + +@samp{sifive-e76}, + +@samp{sifive-s21}, + +@samp{sifive-s51}, + +@samp{sifive-s54}, + +@samp{sifive-s76}, + +@samp{sifive-u54}, + +@samp{sifive-u74}, + +@samp{sifive-x280}, + +@samp{sifive-p450}, + +@samp{sifive-p670}, + +@samp{thead-c906}, + +@samp{xt-c908}, + +@samp{xt-c908v}, + +@samp{xt-c910}, + +@samp{xt-c910v2}, + +@samp{xt-c920}, + +@samp{xt-c920v2}, + +@samp{tt-ascalon-d8}, + +@samp{xiangshan-nanhu}, + +@samp{xiangshan-kunminghu}, + +@samp{mips-p8700}. diff --git a/gcc/doc/riscv-mtune.texi b/gcc/doc/riscv-mtune.texi new file mode 100644 index 0000000..a2a4d3e --- /dev/null +++ b/gcc/doc/riscv-mtune.texi @@ -0,0 +1,59 @@ +@c Copyright (C) 2025 Free Software Foundation, Inc. +@c This is part of the GCC manual. +@c For copying conditions, see the file gcc/doc/include/fdl.texi. + +@c This file is generated automatically using +@c gcc/config/riscv/gen-riscv-mtune-texi.cc from: +@c gcc/config/riscv/riscv-cores.def + +@c Please *DO NOT* edit manually. + +@samp{Tune Name} + +@opindex mtune +@item -mtune=@var{processor-string} +Optimize the output for the given processor, specified by microarchitecture or +particular CPU name. Permissible values for this option are: + + +@samp{generic}, + +@samp{rocket}, + +@samp{sifive-3-series}, + +@samp{sifive-5-series}, + +@samp{sifive-7-series}, + +@samp{sifive-p400-series}, + +@samp{sifive-p600-series}, + +@samp{tt-ascalon-d8}, + +@samp{thead-c906}, + +@samp{xt-c908}, + +@samp{xt-c908v}, + +@samp{xt-c910}, + +@samp{xt-c910v2}, + +@samp{xt-c920}, + +@samp{xt-c920v2}, + +@samp{xiangshan-nanhu}, + +@samp{xiangshan-kunminghu}, + +@samp{generic-ooo}, + +@samp{size}, + +@samp{mips-p8700}, + +and all valid options for @option{-mcpu=}. diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi index 928578b..215552c 100644 --- a/gcc/doc/tm.texi +++ b/gcc/doc/tm.texi @@ -6513,6 +6513,15 @@ The default is @code{NULL_TREE} which means to not vectorize scatter stores. @end deftypefn +@deftypefn {Target Hook} bool TARGET_VECTORIZE_PREFER_GATHER_SCATTER (machine_mode @var{mode}, int @var{scale}, unsigned int @var{group_size}) +This hook returns TRUE if gather loads or scatter stores are cheaper on +this target than a sequence of elementwise loads or stores. The @var{mode} +and @var{scale} correspond to the @code{gather_load} and +@code{scatter_store} instruction patterns. The @var{group_size} is the +number of scalar elements in each scalar loop iteration that are to be +combined into the vector. +@end deftypefn + @deftypefn {Target Hook} int TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN (struct cgraph_node *@var{}, struct cgraph_simd_clone *@var{}, @var{tree}, @var{int}, @var{bool}) This hook should set @var{vecsize_mangle}, @var{vecsize_int}, @var{vecsize_float} fields in @var{simd_clone} structure pointed by @var{clone_info} argument and also diff --git a/gcc/doc/tm.texi.in b/gcc/doc/tm.texi.in index eccc4d8..b03ad4c 100644 --- a/gcc/doc/tm.texi.in +++ b/gcc/doc/tm.texi.in @@ -4311,6 +4311,8 @@ address; but often a machine-dependent strategy can generate better code. @hook TARGET_VECTORIZE_BUILTIN_SCATTER +@hook TARGET_VECTORIZE_PREFER_GATHER_SCATTER + @hook TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN @hook TARGET_SIMD_CLONE_ADJUST diff --git a/gcc/fortran/ChangeLog b/gcc/fortran/ChangeLog index 52bd14c..d75d64f 100644 --- a/gcc/fortran/ChangeLog +++ b/gcc/fortran/ChangeLog @@ -1,3 +1,53 @@ +2025-08-01 Mikael Morin <mikael@gcc.gnu.org> + + * trans-decl.cc (gfc_trans_deferred_vars): Fix closing brace in + a comment. + +2025-07-31 Mikael Morin <morin-mikael@orange.fr> + + PR fortran/121342 + * trans-expr.cc (gfc_conv_subref_array_arg): Remove offset + update. + (gfc_conv_procedure_call): For polymorphic functions, move the + scalarizer descriptor information... + * trans-array.cc (gfc_add_loop_ss_code): ... here, and evaluate + the bounds to fresh variables. + (get_class_info_from_ss): Remove offset update. + (gfc_conv_ss_startstride): Don't set a zero value for function + result upper bounds. + (late_set_loop_bounds): New. + (gfc_conv_loop_setup): If the bounds of a function result have + been set, and no other array provided loop bounds for a + dimension, use the function result bounds as loop bounds for + that dimension. + (gfc_set_delta): Don't skip delta setting for polymorphic + function results. + +2025-07-30 Mikael Morin <morin-mikael@orange.fr> + + * trans-array.cc (gfc_array_init_size): Remove the nelems + argument. + (gfc_array_allocate): Update caller. Remove the nelems + argument. + * trans-stmt.cc (gfc_trans_allocate): Update caller. Remove the + nelems variable. + * trans-array.h (gfc_array_allocate): Update prototype. + +2025-07-30 Yuao Ma <c8ef@outlook.com> + + * check.cc (gfc_check_split): Argument check for SPLIT. + * gfortran.h (enum gfc_isym_id): Define GFC_ISYM_SPLIT. + * intrinsic.cc (add_subroutines): Register SPLIT intrinsic. + * intrinsic.h (gfc_check_split): New decl. + (gfc_resolve_split): Ditto. + * intrinsic.texi: SPLIT documentation. + * iresolve.cc (gfc_resolve_split): Add resolved_sym for SPLIT. + * trans-decl.cc (gfc_build_intrinsic_function_decls): Add decl for + SPLIT in libgfortran. + * trans-intrinsic.cc (conv_intrinsic_split): SPLIT codegen. + (gfc_conv_intrinsic_subroutine): Handle SPLIT case. + * trans.h (GTY): Declare gfor_fndecl_string_split{, _char4}. + 2025-07-27 Mikael Morin <mikael@gcc.gnu.org> PR fortran/121185 diff --git a/gcc/fortran/check.cc b/gcc/fortran/check.cc index 838d523..8626526 100644 --- a/gcc/fortran/check.cc +++ b/gcc/fortran/check.cc @@ -5559,6 +5559,27 @@ gfc_check_scan (gfc_expr *x, gfc_expr *y, gfc_expr *z, gfc_expr *kind) return true; } +bool +gfc_check_split (gfc_expr *string, gfc_expr *set, gfc_expr *pos, gfc_expr *back) +{ + if (!type_check (string, 0, BT_CHARACTER)) + return false; + + if (!type_check (set, 1, BT_CHARACTER)) + return false; + + if (!type_check (pos, 2, BT_INTEGER) || !scalar_check (pos, 2)) + return false; + + if (back != NULL + && (!type_check (back, 3, BT_LOGICAL) || !scalar_check (back, 3))) + return false; + + if (!same_type_check (string, 0, set, 1)) + return false; + + return true; +} bool gfc_check_secnds (gfc_expr *r) diff --git a/gcc/fortran/gfortran.h b/gcc/fortran/gfortran.h index 85feb18..d9dcd1b 100644 --- a/gcc/fortran/gfortran.h +++ b/gcc/fortran/gfortran.h @@ -729,6 +729,8 @@ enum gfc_isym_id GFC_ISYM_COSPI, GFC_ISYM_SINPI, GFC_ISYM_TANPI, + + GFC_ISYM_SPLIT, }; enum init_local_logical diff --git a/gcc/fortran/intrinsic.cc b/gcc/fortran/intrinsic.cc index 9e07627..c99a7a8 100644 --- a/gcc/fortran/intrinsic.cc +++ b/gcc/fortran/intrinsic.cc @@ -3933,6 +3933,14 @@ add_subroutines (void) pt, BT_INTEGER, di, OPTIONAL, INTENT_IN, gt, BT_INTEGER, di, OPTIONAL, INTENT_OUT); + add_sym_4s ("split", GFC_ISYM_SPLIT, CLASS_PURE, + BT_UNKNOWN, 0, GFC_STD_F2023, + gfc_check_split, NULL, gfc_resolve_split, + "string", BT_CHARACTER, dc, REQUIRED, INTENT_IN, + "set", BT_CHARACTER, dc, REQUIRED, INTENT_IN, + "pos", BT_INTEGER, di, REQUIRED, INTENT_INOUT, + "back", BT_LOGICAL, dl, OPTIONAL, INTENT_IN); + /* The following subroutines are part of ISO_C_BINDING. */ add_sym_3s ("c_f_pointer", GFC_ISYM_C_F_POINTER, CLASS_IMPURE, BT_UNKNOWN, 0, diff --git a/gcc/fortran/intrinsic.h b/gcc/fortran/intrinsic.h index fd54588..8a0ab93 100644 --- a/gcc/fortran/intrinsic.h +++ b/gcc/fortran/intrinsic.h @@ -215,6 +215,7 @@ bool gfc_check_mvbits (gfc_expr *, gfc_expr *, gfc_expr *, gfc_expr *, bool gfc_check_random_init (gfc_expr *, gfc_expr *); bool gfc_check_random_number (gfc_expr *); bool gfc_check_random_seed (gfc_expr *, gfc_expr *, gfc_expr *); +bool gfc_check_split (gfc_expr *, gfc_expr *, gfc_expr *, gfc_expr *); bool gfc_check_dtime_etime_sub (gfc_expr *, gfc_expr *); bool gfc_check_fgetputc_sub (gfc_expr *, gfc_expr *, gfc_expr *); bool gfc_check_fgetput_sub (gfc_expr *, gfc_expr *); @@ -693,6 +694,7 @@ void gfc_resolve_link_sub (gfc_code *); void gfc_resolve_symlnk_sub (gfc_code *); void gfc_resolve_signal_sub (gfc_code *); void gfc_resolve_sleep_sub (gfc_code *); +void gfc_resolve_split (gfc_code *); void gfc_resolve_stat_sub (gfc_code *); void gfc_resolve_system_clock (gfc_code *); void gfc_resolve_system_sub (gfc_code *); diff --git a/gcc/fortran/intrinsic.texi b/gcc/fortran/intrinsic.texi index 3103da3..a24b234 100644 --- a/gcc/fortran/intrinsic.texi +++ b/gcc/fortran/intrinsic.texi @@ -313,6 +313,7 @@ Some basic guidelines for editing this document: * @code{SIZEOF}: SIZEOF, Determine the size in bytes of an expression * @code{SLEEP}: SLEEP, Sleep for the specified number of seconds * @code{SPACING}: SPACING, Smallest distance between two numbers of a given type +* @code{SPLIT}: SPLIT, Parse a string into tokens, one at a time. * @code{SPREAD}: SPREAD, Add a dimension to an array * @code{SQRT}: SQRT, Square-root function * @code{SRAND}: SRAND, Reinitialize the random number generator @@ -14203,6 +14204,69 @@ Fortran 90 and later +@node SPLIT +@section @code{SPLIT} --- Parse a string into tokens, one at a time +@fnindex SPLIT +@cindex string, split + +@table @asis +@item @emph{Synopsis}: +@code{RESULT = SPLIT(STRING, SET, POS [, BACK])} + +@item @emph{Description}: +Updates the integer @var{POS} to the position of the next (or previous) +separator in @var{STRING}. + +If @var{BACK} is absent or is present with the value false, @var{POS} is +assigned the position of the leftmost token delimiter in @var{STRING} whose +position is greater than @var{POS}, or if there is no such character, it is +assigned a value one greater than the length of @var{STRING}. This identifies +a token with starting position one greater than the value of @var{POS} on +invocation, and ending position one less than the value of @var{POS} on return. + +If @var{BACK} is present with the value true, @var{POS} is assigned the +position of the rightmost token delimiter in @var{STRING} whose position is +less than @var{POS}, or if there is no such character, it is assigned the value +zero. This identifies a token with ending position one less than the value of +@var{POS} on invocation, and starting position one greater than the value of +@var{POS} on return. + +@item @emph{Class}: +Subroutine + +@item @emph{Arguments}: +@multitable @columnfractions .15 .70 +@item @var{STRING} @tab Shall be of type @code{CHARACTER}. +@item @var{SET} @tab Shall be of type @code{CHARACTER}. +@item @var{POS} @tab Shall be of type @code{INTEGER}. +@item @var{BACK} @tab (Optional) Shall be of type @code{LOGICAL}. +@end multitable + +@item @emph{Example}: +@smallexample +character(len=:), allocatable :: input +character(len=2) :: set = ', ' +integer :: p +input = "one,last example" +p = 0 +do + if (p > len(input)) exit + istart = p + 1 + call split(input, set, p) + iend = p - 1 + print '(t7, a)', input(istart:iend) +end do +@end smallexample + +@item @emph{Standard}: +Fortran 2023 + +@item @emph{See also}: +@ref{SCAN} +@end table + + + @node SPREAD @section @code{SPREAD} --- Add a dimension to an array @fnindex SPREAD diff --git a/gcc/fortran/iresolve.cc b/gcc/fortran/iresolve.cc index 1001309..da354ab 100644 --- a/gcc/fortran/iresolve.cc +++ b/gcc/fortran/iresolve.cc @@ -3863,6 +3863,19 @@ gfc_resolve_sleep_sub (gfc_code *c) c->resolved_sym = gfc_get_intrinsic_sub_symbol (name); } +void +gfc_resolve_split (gfc_code *c) +{ + const char *name; + gfc_expr *string; + + string = c->ext.actual->expr; + if (string->ts.type == BT_CHARACTER && string->ts.kind == 4) + name = "__split_char4"; + else + name = "__split"; + c->resolved_sym = gfc_get_intrinsic_sub_symbol (name); +} /* G77 compatibility function srand(). */ diff --git a/gcc/fortran/trans-array.cc b/gcc/fortran/trans-array.cc index 6b759d1..990aaaf 100644 --- a/gcc/fortran/trans-array.cc +++ b/gcc/fortran/trans-array.cc @@ -1426,12 +1426,6 @@ get_class_info_from_ss (stmtblock_t * pre, gfc_ss *ss, tree *eltype, tmp2 = gfc_class_len_get (class_expr); gfc_add_modify (pre, tmp, tmp2); } - - if (rhs_function) - { - tmp = gfc_class_data_get (class_expr); - gfc_conv_descriptor_offset_set (pre, tmp, gfc_index_zero_node); - } } else if (rhs_ss->info->data.array.descriptor) { @@ -3372,18 +3366,51 @@ gfc_add_loop_ss_code (gfc_loopinfo * loop, gfc_ss * ss, bool subscript, break; case GFC_SS_FUNCTION: - /* Array function return value. We call the function and save its - result in a temporary for use inside the loop. */ - gfc_init_se (&se, NULL); - se.loop = loop; - se.ss = ss; - if (gfc_is_class_array_function (expr)) - expr->must_finalize = 1; - gfc_conv_expr (&se, expr); - gfc_add_block_to_block (&outer_loop->pre, &se.pre); - gfc_add_block_to_block (&outer_loop->post, &se.post); - gfc_add_block_to_block (&outer_loop->post, &se.finalblock); - ss_info->string_length = se.string_length; + { + /* Array function return value. We call the function and save its + result in a temporary for use inside the loop. */ + gfc_init_se (&se, NULL); + se.loop = loop; + se.ss = ss; + bool class_func = gfc_is_class_array_function (expr); + if (class_func) + expr->must_finalize = 1; + gfc_conv_expr (&se, expr); + gfc_add_block_to_block (&outer_loop->pre, &se.pre); + if (class_func + && se.expr + && GFC_CLASS_TYPE_P (TREE_TYPE (se.expr))) + { + tree tmp = gfc_class_data_get (se.expr); + info->descriptor = tmp; + info->data = gfc_conv_descriptor_data_get (tmp); + info->offset = gfc_conv_descriptor_offset_get (tmp); + for (gfc_ss *s = ss; s; s = s->parent) + for (int n = 0; n < s->dimen; n++) + { + int dim = s->dim[n]; + tree tree_dim = gfc_rank_cst[dim]; + + tree start; + start = gfc_conv_descriptor_lbound_get (tmp, tree_dim); + start = gfc_evaluate_now (start, &outer_loop->pre); + info->start[dim] = start; + + tree end; + end = gfc_conv_descriptor_ubound_get (tmp, tree_dim); + end = gfc_evaluate_now (end, &outer_loop->pre); + info->end[dim] = end; + + tree stride; + stride = gfc_conv_descriptor_stride_get (tmp, tree_dim); + stride = gfc_evaluate_now (stride, &outer_loop->pre); + info->stride[dim] = stride; + } + } + gfc_add_block_to_block (&outer_loop->post, &se.post); + gfc_add_block_to_block (&outer_loop->post, &se.finalblock); + ss_info->string_length = se.string_length; + } break; case GFC_SS_CONSTRUCTOR: @@ -5383,7 +5410,8 @@ done: int dim = ss->dim[n]; info->start[dim] = gfc_index_zero_node; - info->end[dim] = gfc_index_zero_node; + if (ss_info->type != GFC_SS_FUNCTION) + info->end[dim] = gfc_index_zero_node; info->stride[dim] = gfc_index_one_node; } break; @@ -6068,6 +6096,46 @@ set_loop_bounds (gfc_loopinfo *loop) } +/* Last attempt to set the loop bounds, in case they depend on an allocatable + function result. */ + +static void +late_set_loop_bounds (gfc_loopinfo *loop) +{ + int n, dim; + gfc_array_info *info; + gfc_ss **loopspec; + + loopspec = loop->specloop; + + for (n = 0; n < loop->dimen; n++) + { + /* Set the extents of this range. */ + if (loop->from[n] == NULL_TREE + || loop->to[n] == NULL_TREE) + { + /* We should have found the scalarization loop specifier. If not, + that's bad news. */ + gcc_assert (loopspec[n]); + + info = &loopspec[n]->info->data.array; + dim = loopspec[n]->dim[n]; + + if (loopspec[n]->info->type == GFC_SS_FUNCTION + && info->start[dim] + && info->end[dim]) + { + loop->from[n] = info->start[dim]; + loop->to[n] = info->end[dim]; + } + } + } + + for (loop = loop->nested; loop; loop = loop->next) + late_set_loop_bounds (loop); +} + + /* Initialize the scalarization loop. Creates the loop variables. Determines the range of the loop variables. Creates a temporary if required. Also generates code for scalar expressions which have been @@ -6086,6 +6154,8 @@ gfc_conv_loop_setup (gfc_loopinfo * loop, locus * where) allocating the temporary. */ gfc_add_loop_ss_code (loop, loop->ss, false, where); + late_set_loop_bounds (loop); + tmp_ss = loop->temp_ss; /* If we want a temporary then create it. */ if (tmp_ss != NULL) @@ -6142,9 +6212,11 @@ gfc_set_delta (gfc_loopinfo *loop) gfc_ss_type ss_type; ss_type = ss->info->type; - if (ss_type != GFC_SS_SECTION - && ss_type != GFC_SS_COMPONENT - && ss_type != GFC_SS_CONSTRUCTOR) + if (!(ss_type == GFC_SS_SECTION + || ss_type == GFC_SS_COMPONENT + || ss_type == GFC_SS_CONSTRUCTOR + || (ss_type == GFC_SS_FUNCTION + && gfc_is_class_array_function (ss->info->expr)))) continue; info = &ss->info->data.array; @@ -6296,8 +6368,8 @@ static tree gfc_array_init_size (tree descriptor, int rank, int corank, tree * poffset, gfc_expr ** lower, gfc_expr ** upper, stmtblock_t * pblock, stmtblock_t * descriptor_block, tree * overflow, - tree expr3_elem_size, tree *nelems, gfc_expr *expr3, - tree expr3_desc, bool e3_has_nodescriptor, gfc_expr *expr, + tree expr3_elem_size, gfc_expr *expr3, tree expr3_desc, + bool e3_has_nodescriptor, gfc_expr *expr, tree *element_size, bool explicit_ts) { tree type; @@ -6573,7 +6645,6 @@ gfc_array_init_size (tree descriptor, int rank, int corank, tree * poffset, if (rank == 0) return *element_size; - *nelems = gfc_evaluate_now (stride, pblock); stride = fold_convert (size_type_node, stride); /* First check for overflow. Since an array of type character can @@ -6662,9 +6733,8 @@ retrieve_last_ref (gfc_ref **ref_in, gfc_ref **prev_ref_in) bool gfc_array_allocate (gfc_se * se, gfc_expr * expr, tree status, tree errmsg, tree errlen, tree label_finish, tree expr3_elem_size, - tree *nelems, gfc_expr *expr3, tree e3_arr_desc, - bool e3_has_nodescriptor, gfc_omp_namelist *omp_alloc, - bool explicit_ts) + gfc_expr *expr3, tree e3_arr_desc, bool e3_has_nodescriptor, + gfc_omp_namelist *omp_alloc, bool explicit_ts) { tree tmp; tree pointer; @@ -6795,7 +6865,7 @@ gfc_array_allocate (gfc_se * se, gfc_expr * expr, tree status, tree errmsg, coarray ? ref->u.ar.as->corank : 0, &offset, lower, upper, &se->pre, &set_descriptor_block, &overflow, - expr3_elem_size, nelems, expr3, e3_arr_desc, + expr3_elem_size, expr3, e3_arr_desc, e3_has_nodescriptor, expr, &element_size, explicit_ts); diff --git a/gcc/fortran/trans-array.h b/gcc/fortran/trans-array.h index 1bb3294..29098fd 100644 --- a/gcc/fortran/trans-array.h +++ b/gcc/fortran/trans-array.h @@ -20,9 +20,8 @@ along with GCC; see the file COPYING3. If not see /* Generate code to initialize and allocate an array. Statements are added to se, which should contain an expression for the array descriptor. */ -bool gfc_array_allocate (gfc_se *, gfc_expr *, tree, tree, tree, tree, - tree, tree *, gfc_expr *, tree, bool, - gfc_omp_namelist *, bool); +bool gfc_array_allocate (gfc_se *, gfc_expr *, tree, tree, tree, tree, tree, + gfc_expr *, tree, bool, gfc_omp_namelist *, bool); /* Allow the bounds of a loop to be set from a callee's array spec. */ void gfc_set_loop_bounds_from_array_spec (gfc_interface_mapping *, diff --git a/gcc/fortran/trans-decl.cc b/gcc/fortran/trans-decl.cc index d5acdca..3b49b18 100644 --- a/gcc/fortran/trans-decl.cc +++ b/gcc/fortran/trans-decl.cc @@ -197,6 +197,7 @@ tree gfor_fndecl_string_scan; tree gfor_fndecl_string_verify; tree gfor_fndecl_string_trim; tree gfor_fndecl_string_minmax; +tree gfor_fndecl_string_split; tree gfor_fndecl_adjustl; tree gfor_fndecl_adjustr; tree gfor_fndecl_select_string; @@ -208,6 +209,7 @@ tree gfor_fndecl_string_scan_char4; tree gfor_fndecl_string_verify_char4; tree gfor_fndecl_string_trim_char4; tree gfor_fndecl_string_minmax_char4; +tree gfor_fndecl_string_split_char4; tree gfor_fndecl_adjustl_char4; tree gfor_fndecl_adjustr_char4; tree gfor_fndecl_select_string_char4; @@ -3569,6 +3571,12 @@ gfc_build_intrinsic_function_decls (void) build_pointer_type (pchar1_type_node), integer_type_node, integer_type_node); + gfor_fndecl_string_split = gfc_build_library_function_decl_with_spec ( + get_identifier (PREFIX ("string_split")), ". . R . R . . ", + gfc_charlen_type_node, 6, gfc_charlen_type_node, pchar1_type_node, + gfc_charlen_type_node, pchar1_type_node, gfc_charlen_type_node, + gfc_logical4_type_node); + gfor_fndecl_adjustl = gfc_build_library_function_decl_with_spec ( get_identifier (PREFIX("adjustl")), ". W . R ", void_type_node, 3, pchar1_type_node, gfc_charlen_type_node, @@ -3641,6 +3649,12 @@ gfc_build_intrinsic_function_decls (void) build_pointer_type (pchar4_type_node), integer_type_node, integer_type_node); + gfor_fndecl_string_split_char4 = gfc_build_library_function_decl_with_spec ( + get_identifier (PREFIX ("string_split_char4")), ". . R . R . . ", + gfc_charlen_type_node, 6, gfc_charlen_type_node, pchar4_type_node, + gfc_charlen_type_node, pchar4_type_node, gfc_charlen_type_node, + gfc_logical4_type_node); + gfor_fndecl_adjustl_char4 = gfc_build_library_function_decl_with_spec ( get_identifier (PREFIX("adjustl_char4")), ". W . R ", void_type_node, 3, pchar4_type_node, gfc_charlen_type_node, @@ -5326,7 +5340,7 @@ gfc_trans_deferred_vars (gfc_symbol * proc_sym, gfc_wrapped_block * block) continue; /* 'omp allocate( {purpose: allocator, value: align}, {purpose: init-stmtlist, value: cleanup-stmtlist}, - {purpose: size-var, value: last-size-expr}} + {purpose: size-var, value: last-size-expr} ) where init-stmt/cleanup-stmt is the STATEMENT list to find the try-final block; last-size-expr is to find the location after which to add the code and 'size-var' is for the proper size, cf. diff --git a/gcc/fortran/trans-expr.cc b/gcc/fortran/trans-expr.cc index 0db7ba3..ec24084 100644 --- a/gcc/fortran/trans-expr.cc +++ b/gcc/fortran/trans-expr.cc @@ -5485,16 +5485,6 @@ gfc_conv_subref_array_arg (gfc_se *se, gfc_expr * expr, int g77, /* Translate the expression. */ gfc_conv_expr (&rse, expr); - /* Reset the offset for the function call since the loop - is zero based on the data pointer. Note that the temp - comes first in the loop chain since it is added second. */ - if (gfc_is_class_array_function (expr)) - { - tmp = loop.ss->loop_chain->info->data.array.descriptor; - gfc_conv_descriptor_offset_set (&loop.pre, tmp, - gfc_index_zero_node); - } - gfc_conv_tmp_array_ref (&lse); if (intent != INTENT_OUT) @@ -8864,28 +8854,9 @@ gfc_conv_procedure_call (gfc_se * se, gfc_symbol * sym, && se->expr && GFC_CLASS_TYPE_P (TREE_TYPE (se->expr)) && expr->must_finalize) { - int n; - if (se->ss && se->ss->loop) - { - gfc_add_block_to_block (&se->ss->loop->pre, &se->pre); - se->expr = gfc_evaluate_now (se->expr, &se->ss->loop->pre); - tmp = gfc_class_data_get (se->expr); - info->descriptor = tmp; - info->data = gfc_conv_descriptor_data_get (tmp); - info->offset = gfc_conv_descriptor_offset_get (tmp); - for (n = 0; n < se->ss->loop->dimen; n++) - { - tree dim = gfc_rank_cst[n]; - se->ss->loop->to[n] = gfc_conv_descriptor_ubound_get (tmp, dim); - se->ss->loop->from[n] = gfc_conv_descriptor_lbound_get (tmp, dim); - } - } - else - { - /* TODO Eliminate the doubling of temporaries. This - one is necessary to ensure no memory leakage. */ - se->expr = gfc_evaluate_now (se->expr, &se->pre); - } + /* TODO Eliminate the doubling of temporaries. This + one is necessary to ensure no memory leakage. */ + se->expr = gfc_evaluate_now (se->expr, &se->pre); /* Finalize the result, if necessary. */ attr = expr->value.function.esym diff --git a/gcc/fortran/trans-intrinsic.cc b/gcc/fortran/trans-intrinsic.cc index be98427..f68ceb1 100644 --- a/gcc/fortran/trans-intrinsic.cc +++ b/gcc/fortran/trans-intrinsic.cc @@ -3466,6 +3466,74 @@ else return gfc_finish_block (&block); } +static tree +conv_intrinsic_split (gfc_code *code) +{ + stmtblock_t block, post_block; + gfc_se se; + gfc_expr *string_expr, *set_expr, *pos_expr, *back_expr; + tree string, string_len; + tree set, set_len; + tree pos, pos_for_call; + tree back; + tree fndecl, call; + + string_expr = code->ext.actual->expr; + set_expr = code->ext.actual->next->expr; + pos_expr = code->ext.actual->next->next->expr; + back_expr = code->ext.actual->next->next->next->expr; + + gfc_start_block (&block); + gfc_init_block (&post_block); + + gfc_init_se (&se, NULL); + gfc_conv_expr (&se, string_expr); + gfc_conv_string_parameter (&se); + gfc_add_block_to_block (&block, &se.pre); + gfc_add_block_to_block (&post_block, &se.post); + string = se.expr; + string_len = se.string_length; + + gfc_init_se (&se, NULL); + gfc_conv_expr (&se, set_expr); + gfc_conv_string_parameter (&se); + gfc_add_block_to_block (&block, &se.pre); + gfc_add_block_to_block (&post_block, &se.post); + set = se.expr; + set_len = se.string_length; + + gfc_init_se (&se, NULL); + gfc_conv_expr (&se, pos_expr); + gfc_add_block_to_block (&block, &se.pre); + gfc_add_block_to_block (&post_block, &se.post); + pos = se.expr; + pos_for_call = fold_convert (gfc_charlen_type_node, pos); + + if (back_expr) + { + gfc_init_se (&se, NULL); + gfc_conv_expr (&se, back_expr); + gfc_add_block_to_block (&block, &se.pre); + gfc_add_block_to_block (&post_block, &se.post); + back = se.expr; + } + else + back = logical_false_node; + + if (string_expr->ts.kind == 1) + fndecl = gfor_fndecl_string_split; + else if (string_expr->ts.kind == 4) + fndecl = gfor_fndecl_string_split_char4; + else + gcc_unreachable (); + + call = build_call_expr_loc (input_location, fndecl, 6, string_len, string, + set_len, set, pos_for_call, back); + gfc_add_modify (&block, pos, fold_convert (TREE_TYPE (pos), call)); + + gfc_add_block_to_block (&block, &post_block); + return gfc_finish_block (&block); +} /* Return a character string containing the tty name. */ @@ -13261,6 +13329,10 @@ gfc_conv_intrinsic_subroutine (gfc_code *code) res = conv_intrinsic_system_clock (code); break; + case GFC_ISYM_SPLIT: + res = conv_intrinsic_split (code); + break; + default: res = NULL_TREE; break; diff --git a/gcc/fortran/trans-stmt.cc b/gcc/fortran/trans-stmt.cc index f105401..b4ddf75 100644 --- a/gcc/fortran/trans-stmt.cc +++ b/gcc/fortran/trans-stmt.cc @@ -6710,7 +6710,6 @@ gfc_trans_allocate (gfc_code * code, gfc_omp_namelist *omp_allocate) stmtblock_t block; stmtblock_t post; stmtblock_t final_block; - tree nelems; bool upoly_expr, tmp_expr3_len_flag = false, al_len_needs_set, is_coarray; bool needs_caf_sync, caf_refs_comp; bool e3_has_nodescriptor = false; @@ -7242,7 +7241,6 @@ gfc_trans_allocate (gfc_code * code, gfc_omp_namelist *omp_allocate) to handle the complete array allocation. Only the element size needs to be provided, which is done most of the time by the pre-evaluation step. */ - nelems = NULL_TREE; if (expr3_len && (code->expr3->ts.type == BT_CHARACTER || code->expr3->ts.type == BT_CLASS)) { @@ -7313,9 +7311,8 @@ gfc_trans_allocate (gfc_code * code, gfc_omp_namelist *omp_allocate) } - if (!gfc_array_allocate (&se, expr, stat, errmsg, errlen, - label_finish, tmp, &nelems, - e3rhs ? e3rhs : code->expr3, + if (!gfc_array_allocate (&se, expr, stat, errmsg, errlen, label_finish, + tmp, e3rhs ? e3rhs : code->expr3, e3_is == E3_DESC ? expr3 : NULL_TREE, e3_has_nodescriptor, omp_alloc_item, code->ext.alloc.ts.type != BT_UNKNOWN)) diff --git a/gcc/fortran/trans.h b/gcc/fortran/trans.h index 461b0cd..40680e9 100644 --- a/gcc/fortran/trans.h +++ b/gcc/fortran/trans.h @@ -961,6 +961,7 @@ extern GTY(()) tree gfor_fndecl_string_scan; extern GTY(()) tree gfor_fndecl_string_verify; extern GTY(()) tree gfor_fndecl_string_trim; extern GTY(()) tree gfor_fndecl_string_minmax; +extern GTY(()) tree gfor_fndecl_string_split; extern GTY(()) tree gfor_fndecl_adjustl; extern GTY(()) tree gfor_fndecl_adjustr; extern GTY(()) tree gfor_fndecl_select_string; @@ -972,6 +973,7 @@ extern GTY(()) tree gfor_fndecl_string_scan_char4; extern GTY(()) tree gfor_fndecl_string_verify_char4; extern GTY(()) tree gfor_fndecl_string_trim_char4; extern GTY(()) tree gfor_fndecl_string_minmax_char4; +extern GTY(()) tree gfor_fndecl_string_split_char4; extern GTY(()) tree gfor_fndecl_adjustl_char4; extern GTY(()) tree gfor_fndecl_adjustr_char4; extern GTY(()) tree gfor_fndecl_select_string_char4; diff --git a/gcc/gcov-io.cc b/gcc/gcov-io.cc index f39b4bd..dd3fc88 100644 --- a/gcc/gcov-io.cc +++ b/gcc/gcov-io.cc @@ -69,7 +69,7 @@ gcov_position (void) /* Return nonzero if the error flag is set. */ /* We need to expose this function when compiling for gcov-tool. */ -#ifndef IN_GCOV_TOOL +#if !defined (IN_GCOV_TOOL) && !defined (IN_GCC) static inline #endif int diff --git a/gcc/gcov-io.h b/gcc/gcov-io.h index f3e3a1c..313c15c 100644 --- a/gcc/gcov-io.h +++ b/gcc/gcov-io.h @@ -387,6 +387,7 @@ char *mangle_path (char const *base); /* Available outside gcov */ GCOV_LINKAGE void gcov_write (const void *, unsigned) ATTRIBUTE_HIDDEN; GCOV_LINKAGE void gcov_write_unsigned (gcov_unsigned_t) ATTRIBUTE_HIDDEN; +GCOV_LINKAGE int gcov_is_error (void); #endif #if !IN_GCOV && !IN_LIBGCOV diff --git a/gcc/gimple-fold.cc b/gcc/gimple-fold.cc index 49e3440..85319b3 100644 --- a/gcc/gimple-fold.cc +++ b/gcc/gimple-fold.cc @@ -6762,10 +6762,10 @@ fold_stmt (gimple_stmt_iterator *gsi, tree (*valueize) (tree), bitmap dce_bitmap which can produce *&x = 0. */ bool -fold_stmt_inplace (gimple_stmt_iterator *gsi) +fold_stmt_inplace (gimple_stmt_iterator *gsi, tree (*valueize) (tree)) { gimple *stmt = gsi_stmt (*gsi); - bool changed = fold_stmt_1 (gsi, true, no_follow_ssa_edges); + bool changed = fold_stmt_1 (gsi, true, valueize); gcc_assert (gsi_stmt (*gsi) == stmt); return changed; } diff --git a/gcc/gimple-fold.h b/gcc/gimple-fold.h index e3cf1f6..b678502 100644 --- a/gcc/gimple-fold.h +++ b/gcc/gimple-fold.h @@ -28,9 +28,12 @@ struct c_strlen_data; extern bool get_range_strlen (tree, c_strlen_data *, unsigned eltsize); extern void gimplify_and_update_call_from_tree (gimple_stmt_iterator *, tree); extern bool update_gimple_call (gimple_stmt_iterator *, tree, int, ...); +extern tree no_follow_ssa_edges (tree); +extern tree follow_single_use_edges (tree); +extern tree follow_all_ssa_edges (tree); extern bool fold_stmt (gimple_stmt_iterator *, bitmap = nullptr); extern bool fold_stmt (gimple_stmt_iterator *, tree (*) (tree), bitmap = nullptr); -extern bool fold_stmt_inplace (gimple_stmt_iterator *); +extern bool fold_stmt_inplace (gimple_stmt_iterator *, tree (*) (tree) = no_follow_ssa_edges); extern tree maybe_fold_and_comparisons (tree, enum tree_code, tree, tree, enum tree_code, tree, tree, basic_block = nullptr); @@ -39,9 +42,6 @@ extern tree maybe_fold_or_comparisons (tree, enum tree_code, tree, tree, basic_block = nullptr); extern bool optimize_atomic_compare_exchange_p (gimple *); extern void fold_builtin_atomic_compare_exchange (gimple_stmt_iterator *); -extern tree no_follow_ssa_edges (tree); -extern tree follow_single_use_edges (tree); -extern tree follow_all_ssa_edges (tree); extern tree gimple_fold_stmt_to_constant_1 (gimple *, tree (*) (tree), tree (*) (tree) = no_follow_ssa_edges); extern tree gimple_fold_stmt_to_constant (gimple *, tree (*) (tree)); diff --git a/gcc/gimple-ssa-store-merging.cc b/gcc/gimple-ssa-store-merging.cc index ce56d97..d8075ca 100644 --- a/gcc/gimple-ssa-store-merging.cc +++ b/gcc/gimple-ssa-store-merging.cc @@ -1033,7 +1033,7 @@ find_bswap_or_nop (gimple *stmt, struct symbolic_number *n, bool *bswap, source when rsize < range. */ if (n->range == orig_range /* There're case like 0x300000200 for uint32->uint64 cast, - Don't hanlde this. */ + Don't handle this. */ && n->range == TYPE_PRECISION (n->type) && ((orig_range == 32 && optab_handler (rotl_optab, SImode) != CODE_FOR_nothing) @@ -1043,7 +1043,7 @@ find_bswap_or_nop (gimple *stmt, struct symbolic_number *n, bool *bswap, { uint64_t range = (orig_range / BITS_PER_UNIT) * BITS_PER_MARKER; uint64_t count = (tmp_n & MARKER_MASK) * BITS_PER_MARKER; - /* .i.e. hanlde 0x203040506070800 when lower byte is zero. */ + /* .i.e. handle 0x203040506070800 when lower byte is zero. */ if (!count) { for (uint64_t i = 1; i != range / BITS_PER_MARKER; i++) @@ -1055,6 +1055,8 @@ find_bswap_or_nop (gimple *stmt, struct symbolic_number *n, bool *bswap, if (count <= range / BITS_PER_MARKER) { count = (count + i) * BITS_PER_MARKER % range; + if (!count) + return NULL; break; } else diff --git a/gcc/hooks.cc b/gcc/hooks.cc index 951825d..76cb5931 100644 --- a/gcc/hooks.cc +++ b/gcc/hooks.cc @@ -117,6 +117,13 @@ hook_bool_mode_const_rtx_true (machine_mode, const_rtx) return true; } +/* Generic hook that takes (machine_mode, int, unsigned) and returns false. */ +bool +hook_bool_mode_int_unsigned_false (machine_mode, int, unsigned) +{ + return false; +} + /* Generic hook that takes (machine_mode, rtx) and returns false. */ bool hook_bool_mode_rtx_false (machine_mode, rtx) diff --git a/gcc/hooks.h b/gcc/hooks.h index c0663bf..e95bd11 100644 --- a/gcc/hooks.h +++ b/gcc/hooks.h @@ -36,6 +36,7 @@ extern bool hook_bool_mode_true (machine_mode); extern bool hook_bool_mode_mode_true (machine_mode, machine_mode); extern bool hook_bool_mode_const_rtx_false (machine_mode, const_rtx); extern bool hook_bool_mode_const_rtx_true (machine_mode, const_rtx); +extern bool hook_bool_mode_int_unsigned_false (machine_mode, int, unsigned); extern bool hook_bool_mode_rtx_false (machine_mode, rtx); extern bool hook_bool_mode_rtx_true (machine_mode, rtx); extern bool hook_bool_const_rtx_insn_const_rtx_insn_true (const rtx_insn *, diff --git a/gcc/m2/ChangeLog b/gcc/m2/ChangeLog index 6babeb9..ad10605 100644 --- a/gcc/m2/ChangeLog +++ b/gcc/m2/ChangeLog @@ -1,3 +1,57 @@ +2025-08-01 Gaius Mulley <gaiusmod2@gmail.com> + + PR modula2/121354 + * gm2-compiler/M2GenGCC.mod (FoldHigh): Rewrite. + (IsUnboundedArray): New procedure function. + +2025-07-31 Gaius Mulley <gaiusmod2@gmail.com> + + PR modula2/121314 + * mc-boot/GFormatStrings.cc (PerformFormatString): Rebuilt. + * mc-boot/GM2EXCEPTION.cc (M2EXCEPTION_M2Exception): Rebuilt. + * mc-boot/GSFIO.cc (SFIO_GetFileName): Rebuilt. + * mc-boot/GSFIO.h (SFIO_GetFileName): Rebuilt. + * mc-boot/Gdecl.cc: Rebuilt. + * mc-boot/GmcFileName.h: Rebuilt. + * mc/decl.mod (getStringChar): New procedure function. + (getStringContents): Call getStringChar. + (addQuotes): New procedure function. + (foldBinary): Call addQuotes to add delimiting quotes + to the new string. + +2025-07-29 Gaius Mulley <gaiusmod2@gmail.com> + + * gm2-compiler/M2GenGCC.mod (FoldBecomes): Remove all + local variables. + (CodeIndrX): Remove length. + Remove newstr. + * gm2-compiler/M2Range.mod (FoldTypeIndrX): Remove desType. + +2025-07-29 Gaius Mulley <gaiusmod2@gmail.com> + + PR modula2/121289 + * gm2-compiler/M2Students.def (CheckVariableAgainstKeyword): New + parameter tok. + * gm2-compiler/M2Students.mod (CheckVariableAgainstKeyword): New + parameter tok. + Pass tok to PerformVariableKeywordCheck. + (PerformVariableKeywordCheck): New parameter tok. + Pass tok to MetaErrorStringT0. + * gm2-compiler/P2SymBuild.mod (BuildVariable): Pass tok to + CheckVariableAgainstKeyword. + * gm2-libs-iso/LowLong.mod (except): Replace with ... + (exceptSrc): ... this. + * gm2-libs-iso/LowReal.mod (except): Replace with ... + (exceptSrc): ... this. + * gm2-libs-iso/LowShort.mod (except): Replace with ... + (exceptSrc): ... this. + * gm2-libs-iso/Processes.mod (Wait): Replace from with fromCor. + * gm2-libs-iso/RndFile.mod (EndPos): Replace end with endP. + * gm2-libs/SCmdArgs.mod (GetArg): Replace start with startPos. + Replace end with endPos. + (NArg): Replace start with startPos. + Replace end with endPos. + 2025-07-25 David Malcolm <dmalcolm@redhat.com> * gm2-gcc/m2linemap.cc: Update usage of "diagnostic_info" to diff --git a/gcc/m2/gm2-compiler/M2GenGCC.mod b/gcc/m2/gm2-compiler/M2GenGCC.mod index 4a9ced3..2440b2a 100644 --- a/gcc/m2/gm2-compiler/M2GenGCC.mod +++ b/gcc/m2/gm2-compiler/M2GenGCC.mod @@ -2903,9 +2903,6 @@ END CheckStop ; *) PROCEDURE FoldBecomes (p: WalkAction; bb: BasicBlock; quad: CARDINAL) ; -VAR - op : QuadOperator ; - des, op2, expr: CARDINAL ; BEGIN IF DeclaredOperandsBecomes (p, quad) THEN @@ -6442,37 +6439,52 @@ END ResolveHigh ; (* + IsUnboundedArray - return TRUE if symbol is an unbounded array. +*) + +PROCEDURE IsUnboundedArray (sym: CARDINAL) : BOOLEAN ; +BEGIN + IF IsParameter (sym) OR IsVar (sym) + THEN + RETURN IsUnbounded (GetType (sym)) + END ; + RETURN FALSE +END IsUnboundedArray ; + + +(* FoldHigh - if the array is not dynamic then we should be able to remove the HighOp quadruple and assign op1 with - the known compile time HIGH(op3). + the known compile time HIGH(array). *) PROCEDURE FoldHigh (tokenno: CARDINAL; p: WalkAction; - quad: CARDINAL; op1, dim, op3: CARDINAL) ; + quad: CARDINAL; op1, dim, array: CARDINAL) ; VAR t : tree ; location: location_t ; BEGIN - (* firstly ensure that any constant literal is declared *) - TryDeclareConstant(tokenno, op3) ; - location := TokenToLocation(tokenno) ; - IF GccKnowsAbout(op3) AND CompletelyResolved(op3) + (* Firstly ensure that any constant literal is declared. *) + TryDeclareConstant (tokenno, array) ; + location := TokenToLocation (tokenno) ; + IF (NOT IsUnboundedArray (array)) AND + GccKnowsAbout (array) AND CompletelyResolved (array) THEN - t := ResolveHigh(tokenno, dim, op3) ; - (* fine, we can take advantage of this and fold constants *) - IF IsConst(op1) AND (t#tree(NIL)) + t := ResolveHigh (tokenno, dim, array) ; + (* We can take advantage of this and fold constants. *) + IF IsConst (op1) AND (t # tree (NIL)) THEN - PutConst(op1, Cardinal) ; - AddModGcc(op1, - DeclareKnownConstant(location, GetCardinalType(), - ToCardinal(location, t))) ; - p(op1) ; + PutConst (op1, Cardinal) ; + AddModGcc (op1, + DeclareKnownConstant (location, GetCardinalType (), + ToCardinal (location, t))) ; + p (op1) ; NoChange := FALSE ; - SubQuad(quad) + SubQuad (quad) ELSE - (* we can still fold the expression, but not the assignment, however, we will - not do this here but in CodeHigh - *) + (* We can still fold the expression but not the assignment, + we will not do this here but in CodeHigh when the result + can be stored. *) END END END FoldHigh ; @@ -8154,8 +8166,6 @@ VAR rightpos, typepos, indrxpos : CARDINAL ; - length, - newstr : tree ; location : location_t ; BEGIN GetQuadOtok (quad, indrxpos, op, left, type, right, diff --git a/gcc/m2/gm2-compiler/M2Range.mod b/gcc/m2/gm2-compiler/M2Range.mod index dcac2ba..f1516d3 100644 --- a/gcc/m2/gm2-compiler/M2Range.mod +++ b/gcc/m2/gm2-compiler/M2Range.mod @@ -1869,14 +1869,12 @@ END FoldTypeAssign ; PROCEDURE FoldTypeIndrX (q: CARDINAL; tokenNo: CARDINAL; des, expr: CARDINAL; r: CARDINAL) ; VAR - desType, exprType: CARDINAL ; BEGIN (* Need to skip over a variable or temporary in des and expr so long as expr is not a procedure. In the case of des = *expr, both expr and des will be variables due to the property of indirection. *) - desType := GetType (des) ; IF IsProcedure (expr) THEN (* Must not GetType for a procedure as it gives the return type. *) diff --git a/gcc/m2/gm2-compiler/M2Students.def b/gcc/m2/gm2-compiler/M2Students.def index 7d67a0a..a3ecdcd 100644 --- a/gcc/m2/gm2-compiler/M2Students.def +++ b/gcc/m2/gm2-compiler/M2Students.def @@ -39,7 +39,7 @@ EXPORT QUALIFIED StudentVariableCheck, CheckVariableAgainstKeyword ; as a keyword except for its case. *) -PROCEDURE CheckVariableAgainstKeyword (name: Name) ; +PROCEDURE CheckVariableAgainstKeyword (tok: CARDINAL; name: Name) ; (* diff --git a/gcc/m2/gm2-compiler/M2Students.mod b/gcc/m2/gm2-compiler/M2Students.mod index e539eb0..3df160a 100644 --- a/gcc/m2/gm2-compiler/M2Students.mod +++ b/gcc/m2/gm2-compiler/M2Students.mod @@ -25,7 +25,7 @@ IMPLEMENTATION MODULE M2Students ; FROM SymbolTable IMPORT FinalSymbol, IsVar, IsProcedure, IsModule, GetMainModule, IsType, NulSym, IsRecord, GetSymName, GetNth, GetNthProcedure, GetDeclaredMod, NoOfParam ; FROM NameKey IMPORT GetKey, WriteKey, MakeKey, IsSameExcludingCase, NulName, makekey, KeyToCharStar ; -FROM M2MetaError IMPORT MetaErrorString0, MetaError2 ; +FROM M2MetaError IMPORT MetaErrorStringT0, MetaError2 ; FROM Lists IMPORT List, InitList, IsItemInList, IncludeItemIntoList ; FROM M2Reserved IMPORT IsReserved, toktype ; FROM DynamicStrings IMPORT String, InitString, KillString, ToUpper, InitStringCharStar, string, Mark, ToUpper, Dup ; @@ -78,11 +78,11 @@ END IsNotADuplicateName ; as a keyword except for its case. *) -PROCEDURE CheckVariableAgainstKeyword (name: Name) ; +PROCEDURE CheckVariableAgainstKeyword (tok: CARDINAL; name: Name) ; BEGIN IF StyleChecking THEN - PerformVariableKeywordCheck (name) + PerformVariableKeywordCheck (tok, name) END END CheckVariableAgainstKeyword ; @@ -91,7 +91,7 @@ END CheckVariableAgainstKeyword ; PerformVariableKeywordCheck - performs the check and constructs the metaerror notes if appropriate. *) -PROCEDURE PerformVariableKeywordCheck (name: Name) ; +PROCEDURE PerformVariableKeywordCheck (tok: CARDINAL; name: Name) ; VAR upper : Name ; token : toktype ; @@ -105,9 +105,11 @@ BEGIN THEN IF IsNotADuplicateName (name) THEN - MetaErrorString0 (Sprintf2 (Mark (InitString ('either the identifier has the same name as a keyword or alternatively a keyword has the wrong case ({%%K%s} and {!%%O:{%%K%s}})')), - upperS, orig)) ; - MetaErrorString0 (Sprintf1 (Mark (InitString ('the symbol name {!%%O:{%%K%s}} is legal as an identifier, however as such it might cause confusion and is considered bad programming practice')), orig)) + MetaErrorStringT0 (tok, + Sprintf2 (Mark (InitString ('either the identifier has the same name as a keyword or alternatively a keyword has the wrong case ({%%K%s} and {!%%O:{%%K%s}})')), + upperS, orig)) ; + MetaErrorStringT0 (tok, + Sprintf1 (Mark (InitString ('the symbol name {!%%O:{%%K%s}} is legal as an identifier, however as such it might cause confusion and is considered bad programming practice')), orig)) END END ; upperS := KillString (upperS) ; diff --git a/gcc/m2/gm2-compiler/P2SymBuild.mod b/gcc/m2/gm2-compiler/P2SymBuild.mod index 3bb3e47..54e624f 100644 --- a/gcc/m2/gm2-compiler/P2SymBuild.mod +++ b/gcc/m2/gm2-compiler/P2SymBuild.mod @@ -1179,8 +1179,8 @@ BEGIN PopT (n) ; i := 1 ; WHILE i <= n DO - CheckVariableAgainstKeyword (OperandT (n+1-i)) ; tok := OperandTok (n+1-i) ; + CheckVariableAgainstKeyword (tok, OperandT (n+1-i)) ; Var := MakeVar (tok, OperandT (n+1-i)) ; AtAddress := OperandA (n+1-i) ; IF AtAddress # NulSym diff --git a/gcc/m2/gm2-libs-iso/LowLong.mod b/gcc/m2/gm2-libs-iso/LowLong.mod index 92c7d91..f611923 100644 --- a/gcc/m2/gm2-libs-iso/LowLong.mod +++ b/gcc/m2/gm2-libs-iso/LowLong.mod @@ -182,7 +182,7 @@ BEGIN IF n<0 THEN (* exception raised *) - RAISE(except, ORD(badparam), + RAISE(exceptSrc, ORD(badparam), 'LowLong.trunc: cannot truncate to a negative number of digits') ; RETURN x ELSE @@ -230,7 +230,7 @@ BEGIN IF n<0 THEN (* exception raised *) - RAISE(except, ORD(badparam), + RAISE(exceptSrc, ORD(badparam), 'LowLong.round: cannot round to a negative number of digits') ; RETURN x ELSE @@ -287,12 +287,12 @@ END currentMode ; PROCEDURE IsLowException () : BOOLEAN ; BEGIN - RETURN( IsExceptionalExecution() AND IsCurrentSource(except) ) + RETURN( IsExceptionalExecution () AND IsCurrentSource (exceptSrc) ) END IsLowException ; VAR - except: ExceptionSource ; + exceptSrc: ExceptionSource ; BEGIN - AllocateSource(except) + AllocateSource (exceptSrc) END LowLong. diff --git a/gcc/m2/gm2-libs-iso/LowReal.mod b/gcc/m2/gm2-libs-iso/LowReal.mod index 580f36b..6d9ea00 100644 --- a/gcc/m2/gm2-libs-iso/LowReal.mod +++ b/gcc/m2/gm2-libs-iso/LowReal.mod @@ -183,8 +183,8 @@ BEGIN IF n<0 THEN (* exception raised *) - RAISE(except, ORD(badparam), - 'LowReal.trunc: cannot truncate to a negative number of digits') ; + RAISE (exceptSrc, ORD(badparam), + 'LowReal.trunc: cannot truncate to a negative number of digits') ; RETURN x ELSE r := dtoa(x, maxsignificant, 100, point, sign) ; @@ -231,8 +231,8 @@ BEGIN IF n<0 THEN (* exception raised *) - RAISE(except, ORD(badparam), - 'LowReal.round: cannot round to a negative number of digits') ; + RAISE (exceptSrc, ORD(badparam), + 'LowReal.round: cannot round to a negative number of digits') ; RETURN x ELSE s := RealToFloatString(x, n) ; @@ -288,12 +288,12 @@ END currentMode ; PROCEDURE IsLowException () : BOOLEAN ; BEGIN - RETURN( IsExceptionalExecution() AND IsCurrentSource(except) ) + RETURN( IsExceptionalExecution () AND IsCurrentSource (exceptSrc) ) END IsLowException ; VAR - except: ExceptionSource ; + exceptSrc: ExceptionSource ; BEGIN - AllocateSource(except) + AllocateSource (exceptSrc) END LowReal. diff --git a/gcc/m2/gm2-libs-iso/LowShort.mod b/gcc/m2/gm2-libs-iso/LowShort.mod index 8531a88..62e4887 100644 --- a/gcc/m2/gm2-libs-iso/LowShort.mod +++ b/gcc/m2/gm2-libs-iso/LowShort.mod @@ -183,8 +183,8 @@ BEGIN IF n<0 THEN (* exception raised *) - RAISE(except, ORD(badparam), - 'LowLong.trunc: cannot truncate to a negative number of digits') ; + RAISE (exceptSrc, ORD(badparam), + 'LowLong.trunc: cannot truncate to a negative number of digits') ; RETURN x ELSE r := dtoa(x, maxsignificant, 100, point, sign) ; @@ -231,8 +231,8 @@ BEGIN IF n<0 THEN (* exception raised *) - RAISE(except, ORD(badparam), - 'LowLong.round: cannot round to a negative number of digits') ; + RAISE (exceptSrc, ORD(badparam), + 'LowLong.round: cannot round to a negative number of digits') ; RETURN x ELSE s := RealToFloatString(x, n) ; @@ -288,12 +288,12 @@ END currentMode ; PROCEDURE IsLowException () : BOOLEAN ; BEGIN - RETURN( IsExceptionalExecution() AND IsCurrentSource(except) ) + RETURN( IsExceptionalExecution () AND IsCurrentSource (exceptSrc) ) END IsLowException ; VAR - except: ExceptionSource ; + exceptSrc: ExceptionSource ; BEGIN - AllocateSource(except) + AllocateSource (exceptSrc) END LowShort. diff --git a/gcc/m2/gm2-libs-iso/Processes.mod b/gcc/m2/gm2-libs-iso/Processes.mod index 8ef22c0..b0c1b69 100644 --- a/gcc/m2/gm2-libs-iso/Processes.mod +++ b/gcc/m2/gm2-libs-iso/Processes.mod @@ -441,7 +441,7 @@ PROCEDURE Wait ; VAR calling, best : ProcessId ; - from : COROUTINE ; + fromCor: COROUTINE ; BEGIN IF debugging THEN @@ -451,17 +451,17 @@ BEGIN OnWaitingQueue (calling) ; best := chooseProcess () ; currentId := best ; - from := calling^.context ; + fromCor := calling^.context ; IF debugging THEN displayProcesses ("Wait about to perform IOTRANSFER") END ; - IOTRANSFER (from, currentId^.context) ; + IOTRANSFER (fromCor, currentId^.context) ; IF debugging THEN displayProcesses ("Wait after IOTRANSFER") END ; - currentId^.context := from ; + currentId^.context := fromCor ; currentId := calling ; OnReadyQueue (calling) ; IF debugging diff --git a/gcc/m2/gm2-libs-iso/RndFile.mod b/gcc/m2/gm2-libs-iso/RndFile.mod index e04cd8f..0a2264a 100644 --- a/gcc/m2/gm2-libs-iso/RndFile.mod +++ b/gcc/m2/gm2-libs-iso/RndFile.mod @@ -398,9 +398,9 @@ PROCEDURE EndPos (cid: ChanId): FilePos; position after which there have been no writes. *) VAR - d : DeviceTablePtr ; - end, - old: FilePos ; + d : DeviceTablePtr ; + endP, + old : FilePos ; BEGIN IF IsRndFile(cid) THEN @@ -410,9 +410,9 @@ BEGIN old := CurrentPos(cid) ; FIO.SetPositionFromEnd(RTio.GetFile(cid), 0) ; checkErrno(dev, d) ; - end := CurrentPos(cid) ; + endP := CurrentPos(cid) ; FIO.SetPositionFromBeginning(RTio.GetFile(cid), old) ; - RETURN( end ) + RETURN( endP ) END ELSE RAISEdevException(cid, did, IOChan.wrongDevice, diff --git a/gcc/m2/gm2-libs/SCmdArgs.mod b/gcc/m2/gm2-libs/SCmdArgs.mod index ed76fc4..8443d5f 100644 --- a/gcc/m2/gm2-libs/SCmdArgs.mod +++ b/gcc/m2/gm2-libs/SCmdArgs.mod @@ -132,26 +132,27 @@ PROCEDURE GetArg (CmdLine: String; VAR i : CARDINAL ; sn, - start, end: INTEGER ; + startPos, + endPos : INTEGER ; ch : CHAR ; BEGIN i := 0 ; - start := 0 ; - end := Length(CmdLine) ; + startPos := 0 ; + endPos := Length(CmdLine) ; WHILE i<n DO - start := skipWhite(CmdLine, start, end) ; - sn := skipNextArg(CmdLine, start, end) ; - IF sn<end + startPos := skipWhite(CmdLine, startPos, endPos) ; + sn := skipNextArg(CmdLine, startPos, endPos) ; + IF sn<endPos THEN - start := sn ; + startPos := sn ; INC(i) ELSE RETURN( FALSE ) END END ; - start := skipWhite(CmdLine, start, end) ; - sn := skipNextArg(CmdLine, start, end) ; - Argi := Slice(CmdLine, start, sn) ; + startPos := skipWhite(CmdLine, startPos, endPos) ; + sn := skipNextArg(CmdLine, startPos, endPos) ; + Argi := Slice(CmdLine, startPos, sn) ; RETURN( TRUE ) END GetArg ; @@ -165,17 +166,18 @@ PROCEDURE Narg (CmdLine: String) : CARDINAL ; VAR n : CARDINAL ; s, - start, end: INTEGER ; + startPos, + endPos : INTEGER ; BEGIN n := 0 ; - start := 0 ; - end := Length(CmdLine) ; + startPos := 0 ; + endPos := Length(CmdLine) ; LOOP - start := skipWhite(CmdLine, start, end) ; - s := skipNextArg(CmdLine, start, end) ; - IF s<end + startPos := skipWhite(CmdLine, startPos, endPos) ; + s := skipNextArg(CmdLine, startPos, endPos) ; + IF s<endPos THEN - start := s ; + startPos := s ; INC(n) ELSE RETURN( n ) diff --git a/gcc/m2/mc-boot/GFormatStrings.cc b/gcc/m2/mc-boot/GFormatStrings.cc index f4c4fd6..ad7e7d8 100644 --- a/gcc/m2/mc-boot/GFormatStrings.cc +++ b/gcc/m2/mc-boot/GFormatStrings.cc @@ -464,7 +464,7 @@ static DynamicStrings_String PerformFormatString (DynamicStrings_String fmt, int /* avoid dangling else. */ afterperc += 1; Cast ((unsigned char *) &u, (sizeof (u)-1), (const unsigned char *) w, _w_high); - in = DynamicStrings_ConCat (in, DynamicStrings_Slice (fmt, (*startpos), nextperc)); + in = Copy (fmt, in, (*startpos), nextperc); in = DynamicStrings_ConCat (in, StringConvert_CardinalToString (u, static_cast<unsigned int> (width), leader, 16, true)); (*startpos) = afterperc; DSdbExit (static_cast<DynamicStrings_String> (NULL)); @@ -475,7 +475,7 @@ static DynamicStrings_String PerformFormatString (DynamicStrings_String fmt, int /* avoid dangling else. */ afterperc += 1; Cast ((unsigned char *) &u, (sizeof (u)-1), (const unsigned char *) w, _w_high); - in = DynamicStrings_ConCat (in, DynamicStrings_Slice (fmt, (*startpos), nextperc)); + in = Copy (fmt, in, (*startpos), nextperc); in = DynamicStrings_ConCat (in, StringConvert_CardinalToString (u, static_cast<unsigned int> (width), leader, 10, false)); (*startpos) = afterperc; DSdbExit (static_cast<DynamicStrings_String> (NULL)); diff --git a/gcc/m2/mc-boot/GM2EXCEPTION.cc b/gcc/m2/mc-boot/GM2EXCEPTION.cc index 62d47f0..6baff3c 100644 --- a/gcc/m2/mc-boot/GM2EXCEPTION.cc +++ b/gcc/m2/mc-boot/GM2EXCEPTION.cc @@ -34,7 +34,6 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see typedef struct { PROC_t proc; } PROC; # endif -# include "Gmcrts.h" #define _M2EXCEPTION_C #include "GM2EXCEPTION.h" @@ -51,18 +50,19 @@ extern "C" M2EXCEPTION_M2Exceptions M2EXCEPTION_M2Exception (void) /* If the program or coroutine is in the exception state then return the enumeration value representing the exception cause. If it is not in the exception state then - raises and exception (exException). */ + raises an exException exception. */ e = RTExceptions_GetExceptionBlock (); n = RTExceptions_GetNumber (e); if (n == (UINT_MAX)) { RTExceptions_Raise ( ((unsigned int) (M2EXCEPTION_exException)), const_cast<void*> (static_cast<const void*>("../../gcc/m2/gm2-libs/M2EXCEPTION.mod")), 47, 6, const_cast<void*> (static_cast<const void*>("M2Exception")), const_cast<void*> (static_cast<const void*>("current coroutine is not in the exceptional execution state"))); + return M2EXCEPTION_exException; } else { return (M2EXCEPTION_M2Exceptions) (n); } - ReturnException ("../../gcc/m2/gm2-libs/M2EXCEPTION.def", 25, 1); + /* static analysis guarentees a RETURN statement will be used before here. */ __builtin_unreachable (); } diff --git a/gcc/m2/mc-boot/GSFIO.cc b/gcc/m2/mc-boot/GSFIO.cc index 6ae0d5e..f8c13d3 100644 --- a/gcc/m2/mc-boot/GSFIO.cc +++ b/gcc/m2/mc-boot/GSFIO.cc @@ -99,6 +99,13 @@ extern "C" DynamicStrings_String SFIO_WriteS (FIO_File file, DynamicStrings_Stri extern "C" DynamicStrings_String SFIO_ReadS (FIO_File file); +/* + GetFileName - return a new string containing the name of the file. + The string should be killed by the caller. +*/ + +extern "C" DynamicStrings_String SFIO_GetFileName (FIO_File file); + /* Exists - returns TRUE if a file named, fname exists for reading. @@ -207,6 +214,19 @@ extern "C" DynamicStrings_String SFIO_ReadS (FIO_File file) __builtin_unreachable (); } + +/* + GetFileName - return a new string containing the name of the file. + The string should be killed by the caller. +*/ + +extern "C" DynamicStrings_String SFIO_GetFileName (FIO_File file) +{ + return DynamicStrings_InitStringCharStar (FIO_getFileName (file)); + /* static analysis guarentees a RETURN statement will be used before here. */ + __builtin_unreachable (); +} + extern "C" void _M2_SFIO_init (__attribute__((unused)) int argc, __attribute__((unused)) char *argv[], __attribute__((unused)) char *envp[]) { } diff --git a/gcc/m2/mc-boot/GSFIO.h b/gcc/m2/mc-boot/GSFIO.h index 42ffc48..93c8099 100644 --- a/gcc/m2/mc-boot/GSFIO.h +++ b/gcc/m2/mc-boot/GSFIO.h @@ -103,6 +103,13 @@ EXTERN DynamicStrings_String SFIO_WriteS (FIO_File file, DynamicStrings_String s */ EXTERN DynamicStrings_String SFIO_ReadS (FIO_File file); + +/* + GetFileName - return a new string containing the name of the file. + The string should be killed by the caller. +*/ + +EXTERN DynamicStrings_String SFIO_GetFileName (FIO_File file); # ifdef __cplusplus } # endif diff --git a/gcc/m2/mc-boot/Gdecl.cc b/gcc/m2/mc-boot/Gdecl.cc index ae03483..94ea098 100644 --- a/gcc/m2/mc-boot/Gdecl.cc +++ b/gcc/m2/mc-boot/Gdecl.cc @@ -2550,6 +2550,14 @@ static bool isLeafString (decl_node__opaque n); static DynamicStrings_String getLiteralStringContents (decl_node__opaque n); /* + getStringChar - if the string is delimited by single + or double quotes then strip both + quotes from the string. +*/ + +static DynamicStrings_String getStringChar (decl_node__opaque n); + +/* getStringContents - return the string contents of a constant, literal, string or a constexp node. */ @@ -2569,7 +2577,13 @@ static nameKey_Name addNames (decl_node__opaque a, decl_node__opaque b); static decl_node__opaque resolveString (decl_node__opaque n); /* - foldBinary - + addQuotes - adds delimiter quote char to string. +*/ + +static DynamicStrings_String addQuotes (DynamicStrings_String s, char quote); + +/* + foldBinary - attempt to fold binary + for string constants. */ static decl_node__opaque foldBinary (decl_nodeT k, decl_node__opaque l, decl_node__opaque r, decl_node__opaque res); @@ -7590,6 +7604,32 @@ static DynamicStrings_String getLiteralStringContents (decl_node__opaque n) /* + getStringChar - if the string is delimited by single + or double quotes then strip both + quotes from the string. +*/ + +static DynamicStrings_String getStringChar (decl_node__opaque n) +{ + DynamicStrings_String s; + + s = getString (n); + if (((DynamicStrings_char (s, 0)) == '\'') && ((DynamicStrings_char (s, -1)) == '\'')) + { + s = DynamicStrings_Slice (s, 1, -1); + } + else if (((DynamicStrings_char (s, 0)) == '"') && ((DynamicStrings_char (s, -1)) == '"')) + { + /* avoid dangling else. */ + s = DynamicStrings_Slice (s, 1, -1); + } + return s; + /* static analysis guarentees a RETURN statement will be used before here. */ + __builtin_unreachable (); +} + + +/* getStringContents - return the string contents of a constant, literal, string or a constexp node. */ @@ -7608,7 +7648,7 @@ static DynamicStrings_String getStringContents (decl_node__opaque n) else if (isString (n)) { /* avoid dangling else. */ - return getString (n); + return getStringChar (n); } else if (isConstExp (n)) { @@ -7672,11 +7712,29 @@ static decl_node__opaque resolveString (decl_node__opaque n) /* - foldBinary - + addQuotes - adds delimiter quote char to string. +*/ + +static DynamicStrings_String addQuotes (DynamicStrings_String s, char quote) +{ + DynamicStrings_String qs; + + s = DynamicStrings_ConCatChar (s, quote); + qs = DynamicStrings_InitStringChar (quote); + qs = DynamicStrings_ConCat (qs, DynamicStrings_Mark (s)); + return qs; + /* static analysis guarentees a RETURN statement will be used before here. */ + __builtin_unreachable (); +} + + +/* + foldBinary - attempt to fold binary + for string constants. */ static decl_node__opaque foldBinary (decl_nodeT k, decl_node__opaque l, decl_node__opaque r, decl_node__opaque res) { + char qc; decl_node__opaque n; DynamicStrings_String ls; DynamicStrings_String rs; @@ -7686,7 +7744,12 @@ static decl_node__opaque foldBinary (decl_nodeT k, decl_node__opaque l, decl_nod { ls = getStringContents (l); rs = getStringContents (r); + qc = '\''; + /* Add unquoted contents. */ ls = DynamicStrings_Add (ls, rs); + /* Add quote. */ + ls = addQuotes (ls, qc); + /* Build new string. */ n = static_cast<decl_node__opaque> (decl_makeString (nameKey_makekey (DynamicStrings_string (ls)))); ls = DynamicStrings_KillString (ls); rs = DynamicStrings_KillString (rs); @@ -22789,7 +22852,7 @@ static decl_node__opaque doDupExpr (decl_node__opaque n) break; case decl_length: - M2RTS_HALT (-1); + M2RTS_HALT (-1); /* length should have been converted into unary. */ __builtin_unreachable (); break; diff --git a/gcc/m2/mc-boot/GmcFileName.h b/gcc/m2/mc-boot/GmcFileName.h index 11f1512..6c7ec75 100644 --- a/gcc/m2/mc-boot/GmcFileName.h +++ b/gcc/m2/mc-boot/GmcFileName.h @@ -50,7 +50,7 @@ extern "C" { given a module and an extension. This file name length will be operating system specific. String, Extension, is concatenated onto - Module and thus it is safe to `Mark' the extension + Module and thus it is safe to Mark the extension for garbage collection. */ diff --git a/gcc/m2/mc/decl.mod b/gcc/m2/mc/decl.mod index 342487e..197ca5e 100644 --- a/gcc/m2/mc/decl.mod +++ b/gcc/m2/mc/decl.mod @@ -4643,6 +4643,28 @@ END getLiteralStringContents ; (* + getStringChar - if the string is delimited by single + or double quotes then strip both + quotes from the string. +*) + +PROCEDURE getStringChar (n: node) : String ; +VAR + s: String ; +BEGIN + s := getString (n) ; + IF (DynamicStrings.char (s, 0) = "'") AND (DynamicStrings.char (s, -1) = "'") + THEN + s := DynamicStrings.Slice (s, 1, -1) + ELSIF (DynamicStrings.char (s, 0) = '"') AND (DynamicStrings.char (s, -1) = '"') + THEN + s := DynamicStrings.Slice (s, 1, -1) + END ; + RETURN s +END getStringChar ; + + +(* getStringContents - return the string contents of a constant, literal, string or a constexp node. *) @@ -4657,7 +4679,7 @@ BEGIN RETURN getLiteralStringContents (n) ELSIF isString (n) THEN - RETURN getString (n) + RETURN getStringChar (n) ELSIF isConstExp (n) THEN RETURN getStringContents (n^.unaryF.arg) @@ -4709,11 +4731,27 @@ END resolveString ; (* - foldBinary - + addQuotes - adds delimiter quote char to string. +*) + +PROCEDURE addQuotes (s: String; quote: CHAR) : String ; +VAR + qs: String ; +BEGIN + s := DynamicStrings.ConCatChar (s, quote) ; + qs := DynamicStrings.InitStringChar (quote) ; + qs := DynamicStrings.ConCat (qs, DynamicStrings.Mark (s)) ; + RETURN qs +END addQuotes ; + + +(* + foldBinary - attempt to fold binary + for string constants. *) PROCEDURE foldBinary (k: nodeT; l, r: node; res: node) : node ; VAR + qc: CHAR ; n : node ; ls, rs: String ; @@ -4723,7 +4761,12 @@ BEGIN THEN ls := getStringContents (l) ; rs := getStringContents (r) ; + qc := "'" ; + (* Add unquoted contents. *) ls := DynamicStrings.Add (ls, rs) ; + (* Add quote. *) + ls := addQuotes (ls, qc) ; + (* Build new string. *) n := makeString (makekey (DynamicStrings.string (ls))) ; ls := DynamicStrings.KillString (ls) ; rs := DynamicStrings.KillString (rs) diff --git a/gcc/machmode.h b/gcc/machmode.h index 467681d9..2f2d349 100644 --- a/gcc/machmode.h +++ b/gcc/machmode.h @@ -958,7 +958,8 @@ private: /* Find the best mode to use to access a bit field. */ -extern bool get_best_mode (int, int, poly_uint64, poly_uint64, unsigned int, +extern bool get_best_mode (HOST_WIDE_INT, HOST_WIDE_INT, + poly_uint64, poly_uint64, unsigned int, unsigned HOST_WIDE_INT, bool, scalar_int_mode *); /* Determine alignment, 1<=result<=BIGGEST_ALIGNMENT. */ diff --git a/gcc/match.pd b/gcc/match.pd index 4903552..82e6e29 100644 --- a/gcc/match.pd +++ b/gcc/match.pd @@ -3595,22 +3595,34 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) return (T)x; } while WT is uint128_t, T is uint8_t, uint16_t, uint32_t or uint64_t. */ - (convert@4 (min (widen_mult:c@3 (convert@5 (convert @0)) - (convert@6 (convert @1))) + (convert (min (widen_mult:c@3 (convert@4 (convert @0)) + (convert@5 (convert @1))) INTEGER_CST@2)) - (if (types_match (type, @0, @1) && types_match (type, @4)) + (if (types_match (type, @0, @1)) (with { unsigned prec = TYPE_PRECISION (type); unsigned widen_prec = TYPE_PRECISION (TREE_TYPE (@3)); + unsigned cvt4_prec = TYPE_PRECISION (TREE_TYPE (@4)); unsigned cvt5_prec = TYPE_PRECISION (TREE_TYPE (@5)); - unsigned cvt6_prec = TYPE_PRECISION (TREE_TYPE (@6)); wide_int c2 = wi::to_wide (@2); wide_int max = wi::mask (prec, false, widen_prec); bool c2_is_max_p = wi::eq_p (c2, max); - bool widen_mult_p = cvt5_prec == cvt6_prec && widen_prec == cvt6_prec * 2; + bool widen_mult_p = cvt4_prec == cvt5_prec && widen_prec == cvt5_prec * 2; } (if (widen_prec > prec && c2_is_max_p && widen_mult_p))))) + (match (unsigned_integer_sat_mul @0 @1) + (convert (min (mult:c@3 (convert @0) (convert @1)) INTEGER_CST@2)) + (if (types_match (type, @0, @1)) + (with + { + unsigned prec = TYPE_PRECISION (type); + unsigned widen_prec = TYPE_PRECISION (TREE_TYPE (@3)); + wide_int c2 = wi::to_wide (@2); + wide_int max = wi::mask (prec, false, widen_prec); + bool c2_is_max_p = wi::eq_p (c2, max); + } + (if (widen_prec > prec && c2_is_max_p))))) ) /* The boundary condition for case 10: IMM = 1: diff --git a/gcc/optc-save-gen.awk b/gcc/optc-save-gen.awk index a3d7e5a..31756ec 100644 --- a/gcc/optc-save-gen.awk +++ b/gcc/optc-save-gen.awk @@ -1313,6 +1313,12 @@ for (i = 0; i < n_opts; i++) { # offloading is enabled. if (flag_set_p("Target", flags[i])) var_target_opt[n_opt_val] = 1; + + # These options should not be passed from host to target, but + # are not actually target specific. + if (flag_set_p("NoOffload", flags[i])) + var_target_opt[n_opt_val] = 2; + n_opt_val++; } } @@ -1393,7 +1399,7 @@ for (i = 0; i < n_opt_val; i++) { # Do not stream out target-specific opts if offloading is # enabled. if (var_target_opt[i]) - print " if (!lto_stream_offload_p)" + print " if (!lto_stream_offload_p) {" # If applicable, encode the streamed value. if (var_opt_optimize_init[i]) { print " if (" var_opt_optimize_init[i] " > (" var_opt_val_type[i] ") 10)"; @@ -1403,6 +1409,8 @@ for (i = 0; i < n_opt_val; i++) { } else { print " bp_pack_var_len_" sgn " (bp, ptr->" name");"; } + if (var_target_opt[i]) + print "}" } } print " for (size_t i = 0; i < ARRAY_SIZE (ptr->explicit_mask); i++)"; @@ -1418,10 +1426,14 @@ print " struct cl_optimization *ptr ATTRIBUTE_UNUSED)" print "{"; for (i = 0; i < n_opt_val; i++) { name = var_opt_val[i] - if (var_target_opt[i]) { + if (var_target_opt[i] == 1) { print "#ifdef ACCEL_COMPILER" print "#error accel compiler cannot define Optimization attribute for target-specific option " name; print "#else" + } else if (var_target_opt[i] == 2) { + print "#ifdef ACCEL_COMPILER" + print " ptr->" name " = global_options." name ";" + print "#else" } otype = var_opt_val_type[i]; if (otype ~ "^const char \\**$") { @@ -1489,6 +1501,9 @@ for (i = 0; i < n_opts; i++) { if (flag_set_p("Warning", flags[i])) continue; + if (flag_set_p("NoOffload", flags[i])) + continue; + if (name in checked_options) continue; checked_options[name]++ diff --git a/gcc/output.h b/gcc/output.h index 0c329ff..51c2d36 100644 --- a/gcc/output.h +++ b/gcc/output.h @@ -545,6 +545,9 @@ extern GTY(()) section *bss_noswitch_section; extern GTY(()) section *in_section; extern GTY(()) bool in_cold_section_p; +/* MAX bit alignment for mergable sections. */ +#define MAX_ALIGN_MERGABLE 256 + extern section *get_unnamed_section (unsigned int, void (*) (const char *), const char *); extern section *get_section (const char *, unsigned int, tree, @@ -557,6 +560,9 @@ extern rtx get_section_anchor (struct object_block *, HOST_WIDE_INT, extern section *mergeable_constant_section (machine_mode, unsigned HOST_WIDE_INT, unsigned int); +extern section *mergeable_constant_section (unsigned HOST_WIDE_INT, + unsigned HOST_WIDE_INT, + unsigned int); extern section *function_section (tree); extern section *unlikely_text_section (void); extern section *current_function_section (void); diff --git a/gcc/params.opt b/gcc/params.opt index c7d5fd4..ac1b2c7 100644 --- a/gcc/params.opt +++ b/gcc/params.opt @@ -1226,7 +1226,7 @@ Common Joined UInteger Var(param_use_canonical_types) Init(1) IntegerRange(0, 1) Whether to use canonical types. -param=vect-epilogues-nomask= -Common Joined UInteger Var(param_vect_epilogues_nomask) Init(1) IntegerRange(0, 1) Param Optimization +Common Joined UInteger Var(param_vect_epilogues_nomask) Init(1) IntegerRange(0, 1) Param Optimization NoOffload Enable loop epilogue vectorization using smaller vector size. -param=vect-max-layout-candidates= @@ -1246,11 +1246,11 @@ Common Joined UInteger Var(param_vect_max_version_for_alignment_checks) Init(6) Bound on number of runtime checks inserted by the vectorizer's loop versioning for alignment check. -param=vect-partial-vector-usage= -Common Joined UInteger Var(param_vect_partial_vector_usage) Init(2) IntegerRange(0, 2) Param Optimization +Common Joined UInteger Var(param_vect_partial_vector_usage) Init(2) IntegerRange(0, 2) Param Optimization NoOffload Controls how loop vectorizer uses partial vectors. 0 means never, 1 means only for loops whose need to iterate can be removed, 2 means for all loops. The default value is 2. -param=vect-inner-loop-cost-factor= -Common Joined UInteger Var(param_vect_inner_loop_cost_factor) Init(50) IntegerRange(1, 10000) Param Optimization +Common Joined UInteger Var(param_vect_inner_loop_cost_factor) Init(50) IntegerRange(1, 10000) Param Optimization NoOffload The maximum factor which the loop vectorizer applies to the cost of statements in an inner loop relative to the loop being vectorized. -param=vect-induction-float= diff --git a/gcc/predict.cc b/gcc/predict.cc index 872f54d..5639d81 100644 --- a/gcc/predict.cc +++ b/gcc/predict.cc @@ -245,7 +245,10 @@ unlikely_executed_edge_p (edge e) { return (e->src->count == profile_count::zero () || e->probability == profile_probability::never ()) - || (e->flags & (EDGE_EH | EDGE_FAKE)); + || (e->flags & EDGE_FAKE) + /* If we read profile and know EH edge is executed, trust it. + Otherwise we consider EH edges never executed. */ + || ((e->flags & EDGE_EH) && !e->probability.reliable_p ()); } /* Return true if edge E of function FUN is probably never executed. */ @@ -830,6 +833,26 @@ unlikely_executed_stmt_p (gimple *stmt) { if (!is_gimple_call (stmt)) return false; + + /* Those calls are inserted by optimizers when code is known to be + unreachable or undefined. */ + if (gimple_call_builtin_p (stmt, BUILT_IN_UNREACHABLE) + || gimple_call_builtin_p (stmt, BUILT_IN_UNREACHABLE_TRAP) + || gimple_call_builtin_p (stmt, BUILT_IN_TRAP)) + return false; + + /* Checks below do not need to be fully reliable. Cold attribute may be + misplaced by user and in the presence of comdat we may result in call to + function with 0 profile having non-zero profile. + + We later detect that profile is lost and will drop the profile of the + comdat. + + So if we think profile count is reliable, do not try to apply these + heuristics. */ + if (gimple_bb (stmt)->count.reliable_p () + && gimple_bb (stmt)->count.nonzero_p ()) + return gimple_bb (stmt)->count == profile_count::zero (); /* NORETURN attribute alone is not strong enough: exit() may be quite likely executed once during program run. */ if (gimple_call_fntype (stmt) @@ -3269,7 +3292,8 @@ tree_estimate_probability (bool dry_run) calculate_dominance_info (CDI_POST_DOMINATORS); /* Decide which edges are known to be unlikely. This improves later branch prediction. */ - determine_unlikely_bbs (); + if (!dry_run) + determine_unlikely_bbs (); bb_predictions = new hash_map<const_basic_block, edge_prediction *>; ssa_expected_value = new hash_map<int_hash<unsigned, 0>, expected_value>; diff --git a/gcc/simplify-rtx.cc b/gcc/simplify-rtx.cc index cbe61b4..c723a07 100644 --- a/gcc/simplify-rtx.cc +++ b/gcc/simplify-rtx.cc @@ -8344,6 +8344,15 @@ simplify_context::simplify_subreg (machine_mode outermode, rtx op, return simplify_gen_binary (GET_CODE (op), outermode, op0, op1); } + /* Attempt to simplify WORD_MODE SUBREGs of unary bitwise expression. */ + if (outermode == word_mode && GET_CODE (op) == NOT + && SCALAR_INT_MODE_P (innermode)) + { + rtx op0 = simplify_subreg (outermode, XEXP (op, 0), innermode, byte); + if (op0) + return simplify_gen_unary (GET_CODE (op), outermode, op0, outermode); + } + scalar_int_mode int_outermode, int_innermode; if (is_a <scalar_int_mode> (outermode, &int_outermode) && is_a <scalar_int_mode> (innermode, &int_innermode) @@ -8394,9 +8403,45 @@ simplify_context::simplify_subreg (machine_mode outermode, rtx op, && VECTOR_MODE_P (innermode) && known_eq (GET_MODE_NUNITS (outermode), GET_MODE_NUNITS (innermode)) && known_eq (GET_MODE_UNIT_SIZE (outermode), - GET_MODE_UNIT_SIZE (innermode))) + GET_MODE_UNIT_SIZE (innermode))) return simplify_gen_relational (GET_CODE (op), outermode, innermode, XEXP (op, 0), XEXP (op, 1)); + + /* Distribute non-paradoxical subregs through logic ops in cases where one term + disappears. + + (subreg:M1 (and:M2 X C1)) -> (subreg:M1 X) + (subreg:M1 (ior:M2 X C1)) -> (subreg:M1 C1) + (subreg:M1 (xor:M2 X C1)) -> (subreg:M1 (not:M2 X)) + + if M2 is no smaller than M1 and (subreg:M1 C1) is all-ones. + + (subreg:M1 (and:M2 X C2)) -> (subreg:M1 C2) + (subreg:M1 (ior/xor:M2 X C2)) -> (subreg:M1 X) + + if M2 is no smaller than M1 and (subreg:M1 C2) is zero. */ + if (known_ge (innersize, outersize) + && GET_MODE_CLASS (outermode) == GET_MODE_CLASS (innermode) + && (GET_CODE (op) == AND || GET_CODE (op) == IOR || GET_CODE (op) == XOR) + && CONSTANT_P (XEXP (op, 1))) + { + rtx op1_subreg = simplify_subreg (outermode, XEXP (op, 1), innermode, byte); + if (op1_subreg == CONSTM1_RTX (outermode)) + { + if (GET_CODE (op) == IOR) + return op1_subreg; + rtx op0 = XEXP (op, 0); + if (GET_CODE (op) == XOR) + op0 = simplify_gen_unary (NOT, innermode, op0, innermode); + return simplify_gen_subreg (outermode, op0, innermode, byte); + } + + if (op1_subreg == CONST0_RTX (outermode)) + return (GET_CODE (op) == AND + ? op1_subreg + : simplify_gen_subreg (outermode, XEXP (op, 0), innermode, byte)); + } + return NULL_RTX; } @@ -8668,6 +8713,43 @@ test_scalar_int_ext_ops (machine_mode bmode, machine_mode smode) lowpart_subreg (bmode, sreg, smode), bmode), sreg); + + /* Test extensions, followed by logic ops, followed by truncations. */ + rtx bsubreg = lowpart_subreg (bmode, sreg, smode); + rtx smask = gen_int_mode (GET_MODE_MASK (smode), bmode); + rtx inv_smask = gen_int_mode (~GET_MODE_MASK (smode), bmode); + ASSERT_RTX_EQ (lowpart_subreg (smode, + simplify_gen_binary (AND, bmode, + bsubreg, smask), + bmode), + sreg); + ASSERT_RTX_EQ (lowpart_subreg (smode, + simplify_gen_binary (AND, bmode, + bsubreg, inv_smask), + bmode), + const0_rtx); + ASSERT_RTX_EQ (lowpart_subreg (smode, + simplify_gen_binary (IOR, bmode, + bsubreg, smask), + bmode), + constm1_rtx); + ASSERT_RTX_EQ (lowpart_subreg (smode, + simplify_gen_binary (IOR, bmode, + bsubreg, inv_smask), + bmode), + sreg); + ASSERT_RTX_EQ (lowpart_subreg (smode, + simplify_gen_binary (XOR, bmode, + bsubreg, smask), + bmode), + lowpart_subreg (smode, + gen_rtx_NOT (bmode, bsubreg), + bmode)); + ASSERT_RTX_EQ (lowpart_subreg (smode, + simplify_gen_binary (XOR, bmode, + bsubreg, inv_smask), + bmode), + sreg); } /* Verify more simplifications of integer extension/truncation. diff --git a/gcc/stor-layout.cc b/gcc/stor-layout.cc index 12071c9..63e830a 100644 --- a/gcc/stor-layout.cc +++ b/gcc/stor-layout.cc @@ -3167,7 +3167,7 @@ bit_field_mode_iterator::prefer_smaller_modes () decide which of the above modes should be used. */ bool -get_best_mode (int bitsize, int bitpos, +get_best_mode (HOST_WIDE_INT bitsize, HOST_WIDE_INT bitpos, poly_uint64 bitregion_start, poly_uint64 bitregion_end, unsigned int align, unsigned HOST_WIDE_INT largest_mode_bitsize, bool volatilep, diff --git a/gcc/symtab.cc b/gcc/symtab.cc index 652f66a..20dfe09 100644 --- a/gcc/symtab.cc +++ b/gcc/symtab.cc @@ -303,6 +303,11 @@ symbol_table::change_decl_assembler_name (tree decl, tree name) warning (0, "%qD renamed after being referenced in assembly", decl); SET_DECL_ASSEMBLER_NAME (decl, name); + if (DECL_RTL_SET_P (decl)) + { + SET_DECL_RTL (decl, NULL); + make_decl_rtl (decl); + } if (alias) { gcc_assert (!IDENTIFIER_INTERNAL_P (name)); diff --git a/gcc/target.def b/gcc/target.def index 427dc40..5dd8f25 100644 --- a/gcc/target.def +++ b/gcc/target.def @@ -2060,6 +2060,20 @@ all zeros. GCC can then try to branch around the instruction instead.", (unsigned ifn), default_empty_mask_is_expensive) +/* Prefer gather/scatter loads/stores to e.g. elementwise accesses if\n\ +we cannot use a contiguous access. */ +DEFHOOK +(prefer_gather_scatter, + "This hook returns TRUE if gather loads or scatter stores are cheaper on\n\ +this target than a sequence of elementwise loads or stores. The @var{mode}\n\ +and @var{scale} correspond to the @code{gather_load} and\n\ +@code{scatter_store} instruction patterns. The @var{group_size} is the\n\ +number of scalar elements in each scalar loop iteration that are to be\n\ +combined into the vector.", + bool, + (machine_mode mode, int scale, unsigned int group_size), + hook_bool_mode_int_unsigned_false) + /* Target builtin that implements vector gather operation. */ DEFHOOK (builtin_gather, diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 6d62009..0ec05ad 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,451 @@ +2025-08-01 Artemiy Granat <a.granat@ispras.ru> + + * gcc.target/i386/attributes-error.c: Change incorrect + sseregparm,fastcall combination to cdecl,fastcall. + +2025-08-01 Jakub Jelinek <jakub@redhat.com> + + PR middle-end/121322 + * gcc.dg/pr121322.c: New test. + +2025-08-01 Nathaniel Shead <nathanieloshead@gmail.com> + + PR c++/108080 + * g++.dg/modules/pr108080.H: New test. + +2025-08-01 Nathaniel Shead <nathanieloshead@gmail.com> + + PR c++/121238 + * g++.dg/modules/merge-19.h: New test. + * g++.dg/modules/merge-19_a.H: New test. + * g++.dg/modules/merge-19_b.C: New test. + +2025-07-31 Gaius Mulley <gaiusmod2@gmail.com> + + PR modula2/121314 + * gm2/errors/fail/badindrtype.mod: New test. + * gm2/errors/fail/badindrtype2.mod: New test. + +2025-07-31 Mikael Morin <morin-mikael@orange.fr> + + PR fortran/121342 + * gfortran.dg/class_elemental_1.f90: New test. + +2025-07-31 Jason Merrill <jason@redhat.com> + + PR c++/120800 + * g++.dg/cpp0x/constexpr-array30.C: New test. + +2025-07-31 Marek Polacek <polacek@redhat.com> + + PR c++/120775 + * g++.dg/cpp26/consteval-block1.C: New test. + * g++.dg/cpp26/consteval-block2.C: New test. + * g++.dg/cpp26/consteval-block3.C: New test. + * g++.dg/cpp26/consteval-block4.C: New test. + * g++.dg/cpp26/consteval-block5.C: New test. + * g++.dg/cpp26/consteval-block6.C: New test. + * g++.dg/cpp26/consteval-block7.C: New test. + * g++.dg/cpp26/consteval-block8.C: New test. + +2025-07-31 Pan Li <pan2.li@intel.com> + + * gcc.target/riscv/rvv/autovec/vx_vf/vx-1-i16.c: Add asm check + for signed avg ceil. + * gcc.target/riscv/rvv/autovec/vx_vf/vx-1-i32.c: Ditto. + * gcc.target/riscv/rvv/autovec/vx_vf/vx-1-i64.c: Ditto. + * gcc.target/riscv/rvv/autovec/vx_vf/vx-1-i8.c: Ditto. + * gcc.target/riscv/rvv/autovec/vx_vf/vx-4-i16.c: Ditto. + * gcc.target/riscv/rvv/autovec/vx_vf/vx-4-i32.c: Ditto. + * gcc.target/riscv/rvv/autovec/vx_vf/vx-4-i64.c: Ditto. + * gcc.target/riscv/rvv/autovec/vx_vf/vx-4-i8.c: Ditto. + * gcc.target/riscv/rvv/autovec/vx_vf/vx-5-i16.c: Ditto. + * gcc.target/riscv/rvv/autovec/vx_vf/vx-5-i32.c: Ditto. + * gcc.target/riscv/rvv/autovec/vx_vf/vx-5-i64.c: Ditto. + * gcc.target/riscv/rvv/autovec/vx_vf/vx-5-i8.c: Ditto. + * gcc.target/riscv/rvv/autovec/vx_vf/vx-6-i16.c: Ditto. + * gcc.target/riscv/rvv/autovec/vx_vf/vx-6-i32.c: Ditto. + * gcc.target/riscv/rvv/autovec/vx_vf/vx-6-i64.c: Ditto. + * gcc.target/riscv/rvv/autovec/vx_vf/vx-6-i8.c: Ditto. + * gcc.target/riscv/rvv/autovec/vx_vf/vx_binary.h: Add test + helper macros. + * gcc.target/riscv/rvv/autovec/vx_vf/vx_binary_data.h: Add + test data for run test. + * gcc.target/riscv/rvv/autovec/vx_vf/vx_vaadd-run-2-i16.c: New test. + * gcc.target/riscv/rvv/autovec/vx_vf/vx_vaadd-run-2-i32.c: New test. + * gcc.target/riscv/rvv/autovec/vx_vf/vx_vaadd-run-2-i64.c: New test. + * gcc.target/riscv/rvv/autovec/vx_vf/vx_vaadd-run-2-i8.c: New test. + +2025-07-31 Artemiy Granat <a.granat@ispras.ru> + + * gcc.target/i386/attributes-error.c: Add more attributes + combinations. + +2025-07-31 Artemiy Granat <a.granat@ispras.ru> + + * g++.dg/abi/regparm1.C: Require ia32 target. + * gcc.target/i386/20020224-1.c: Likewise. + * gcc.target/i386/pr103785.c: Use regparm attribute only if + not in 64-bit mode. + * gcc.target/i386/pr36533.c: Likewise. + * gcc.target/i386/pr59099.c: Likewise. + * gcc.target/i386/sibcall-8.c: Likewise. + * gcc.target/i386/sw-1.c: Likewise. + * gcc.target/i386/pr15184-2.c: Fix invalid comment. + * gcc.target/i386/attributes-ignore.c: New test. + +2025-07-31 Yury Khrustalev <yury.khrustalev@arm.com> + + * g++.target/aarch64/mv-cpu-features.C: new test. + +2025-07-31 Yury Khrustalev <yury.khrustalev@arm.com> + + * gcc.target/aarch64/ifunc-resolver.in: add core test functions. + * gcc.target/aarch64/ifunc-resolver-0.c: new test. + * gcc.target/aarch64/ifunc-resolver-1.c: ditto. + * gcc.target/aarch64/ifunc-resolver-2.c: ditto. + * gcc.target/aarch64/ifunc-resolver-3.c: ditto. + * gcc.target/aarch64/ifunc-resolver-4.c: as above. + +2025-07-31 Spencer Abson <spencer.abson@arm.com> + + PR target/121028 + * gcc.target/aarch64/sme/call_sm_switch_1.c: Tell check-function + -bodies not to ignore .inst directives, and replace the test for + "smstart sm" with one for it's encoding. + * gcc.target/aarch64/sme/call_sm_switch_11.c: Likewise. + * gcc.target/aarch64/sme/pr121028.c: New test. + +2025-07-31 Jakub Jelinek <jakub@redhat.com> + + PR tree-optimization/121264 + * gcc.dg/tree-ssa/pr121264.c: New test. + +2025-07-31 Spencer Abson <spencer.abson@arm.com> + + * gcc.target/aarch64/sme2/acle-asm/amax_f16_x2.c: Gate do-assemble on + assembler support for +faminmax and +sme2. + * gcc.target/aarch64/sme2/acle-asm/amax_f16_x4.c: Likewise. + * gcc.target/aarch64/sme2/acle-asm/amax_f32_x2.c: Likewise. + * gcc.target/aarch64/sme2/acle-asm/amax_f32_x4.c: Likewise. + * gcc.target/aarch64/sme2/acle-asm/amax_f64_x2.c: Likewise. + * gcc.target/aarch64/sme2/acle-asm/amax_f64_x4.c: Likewise. + * gcc.target/aarch64/sme2/acle-asm/amin_f16_x2.c: Likewise. + * gcc.target/aarch64/sme2/acle-asm/amin_f16_x4.c: Likewise. + * gcc.target/aarch64/sme2/acle-asm/amin_f32_x2.c: Likewise. + * gcc.target/aarch64/sme2/acle-asm/amin_f32_x4.c: Likewise. + * gcc.target/aarch64/sme2/acle-asm/amin_f64_x2.c: Likewise. + * gcc.target/aarch64/sme2/acle-asm/amin_f64_x4.c: Likewise. + * lib/target-supports.exp: Split the extensions that require SME into + a separate set, and use armv9-a as their baseline. + +2025-07-31 Jakub Jelinek <jakub@redhat.com> + + * gcc.target/i386/apx-1.c (apx_hanlder): Rename to ... + (apx_handler): ... this. + * gcc.target/i386/uintr-2.c (UINTR_hanlder): Rename to ... + (UINTR_handler): ... this. + * gcc.target/i386/uintr-5.c (UINTR_hanlder): Rename to ... + (UINTR_handler): ... this. + +2025-07-30 Nathaniel Shead <nathanieloshead@gmail.com> + + PR c++/121291 + * g++.dg/ext/is_invocable7.C: New test. + * g++.dg/ext/is_nothrow_convertible5.C: New test. + +2025-07-30 Jason Merrill <jason@redhat.com> + + * g++.dg/tc1/dr49.C: Adjust diagnostic. + * g++.dg/template/func2.C: Likewise. + * g++.dg/cpp1z/nontype8.C: New test. + +2025-07-30 Andrew Pinski <quic_apinski@quicinc.com> + + PR tree-optimization/121236 + PR tree-optimization/121295 + * gcc.dg/torture/pr121236-1.c: New test. + * gcc.dg/torture/pr121295-1.c: New test. + +2025-07-30 Andrew Pinski <quic_apinski@quicinc.com> + + Revert: + 2025-07-30 Andrew Pinski <quic_apinski@quicinc.com> + + PR tree-optimization/121236 + * gcc.dg/torture/pr121236-1.c: New test. + +2025-07-30 Stefan Schulze Frielinghaus <stefansf@gcc.gnu.org> + + * gcc.target/s390/spaceship-fp-1.c: New test. + * gcc.target/s390/spaceship-fp-2.c: New test. + * gcc.target/s390/spaceship-fp-3.c: New test. + * gcc.target/s390/spaceship-fp-4.c: New test. + * gcc.target/s390/spaceship-int-1.c: New test. + * gcc.target/s390/spaceship-int-2.c: New test. + * gcc.target/s390/spaceship-int-3.c: New test. + +2025-07-30 H.J. Lu <hjl.tools@gmail.com> + + PR target/120427 + * gcc.target/i386/pr120427-5.c: New test. + +2025-07-30 Jan Hubicka <jh@suse.cz> + + * g++.dg/tree-prof/eh1.C: New test. + +2025-07-30 Richard Biener <rguenther@suse.de> + + PR tree-optimization/121130 + * gcc.dg/vect/vect-simd-pr121130.c: New testcase. + +2025-07-30 Jakub Jelinek <jakub@redhat.com> + + PR c++/121133 + * g++.dg/warn/pr121133-1.C: New test. + * g++.dg/warn/pr121133-2.C: New test. + * g++.dg/warn/pr121133-3.C: New test. + * g++.dg/warn/pr121133-4.C: New test. + +2025-07-30 Jakub Jelinek <jakub@redhat.com> + + PR c++/120778 + * g++.dg/cpp/if-comma-1.C: New test. + +2025-07-30 Pengfei Li <Pengfei.Li2@arm.com> + + PR tree-optimization/121020 + * gcc.dg/vect/vect-early-break_138-pr121020.c: New test. + +2025-07-30 Pengfei Li <Pengfei.Li2@arm.com> + + PR tree-optimization/121190 + * gcc.dg/vect/vect-early-break_52.c: Update an unsafe test. + * gcc.dg/vect/vect-early-break_137-pr121190.c: New test. + +2025-07-30 Alfie Richards <alfie.richards@arm.com> + + PR target/121300 + * gcc.target/aarch64/pr121300.c: New test. + +2025-07-30 Spencer Abson <spencer.abson@arm.com> + + * gcc.target/aarch64/sve/unpacked_cond_fmla_1.c: Add test cases + for merging with multiplcand. + * gcc.target/aarch64/sve/unpacked_cond_fmls_1.c: Likewise. + * gcc.target/aarch64/sve/unpacked_cond_fnmla_1.c: Likewise. + * gcc.target/aarch64/sve/unpacked_cond_fnmls_1.c: Likewise. + * gcc.target/aarch64/sve/unpacked_cond_fmla_2.c: New test. + * gcc.target/aarch64/sve/unpacked_cond_fmls_2.c: Likewise. + * gcc.target/aarch64/sve/unpacked_cond_fnmla_2.c: Likewise.. + * gcc.target/aarch64/sve/unpacked_cond_fnmls_2.c: Likewise. + * g++.target/aarch64/sve/unpacked_cond_ternary_bf16_1.C: Likewise. + * g++.target/aarch64/sve/unpacked_cond_ternary_bf16_2.C: Likewise. + +2025-07-30 Spencer Abson <spencer.abson@arm.com> + + * gcc.target/aarch64/sve/unpacked_cond_fmla_1.c: New test. + * gcc.target/aarch64/sve/unpacked_cond_fmls_1.c: Likewise. + * gcc.target/aarch64/sve/unpacked_cond_fnmla_1.c: Likewise. + * gcc.target/aarch64/sve/unpacked_cond_fnmls_1.c: Likewise. + +2025-07-30 Yuao Ma <c8ef@outlook.com> + + * gfortran.dg/split_1.f90: New test. + * gfortran.dg/split_2.f90: New test. + * gfortran.dg/split_3.f90: New test. + * gfortran.dg/split_4.f90: New test. + +2025-07-30 Spencer Abson <spencer.abson@arm.com> + + * g++.target/aarch64/sve/unpacked_ternary_bf16_1.C: New test. + * g++.target/aarch64/sve/unpacked_ternary_bf16_2.C: Likewise. + * gcc.target/aarch64/sve/unpacked_fmla_1.c: Likewise. + * gcc.target/aarch64/sve/unpacked_fmla_2.c: Likewise. + * gcc.target/aarch64/sve/unpacked_fmls_1.c: Likewise. + * gcc.target/aarch64/sve/unpacked_fmls_2.c: Likewise. + * gcc.target/aarch64/sve/unpacked_fnmla_1.c: Likeiwse. + * gcc.target/aarch64/sve/unpacked_fnmla_2.c: Likewise. + * gcc.target/aarch64/sve/unpacked_fnmls_1.c: Likewise. + * gcc.target/aarch64/sve/unpacked_fnmls_2.c: Likewise. + +2025-07-30 liuhongt <hongtao.liu@intel.com> + + * gcc.target/i386/pr121274.c: New test. + +2025-07-30 Pan Li <pan2.li@intel.com> + + * gcc.target/riscv/rvv/autovec/vx_vf/vx-1-u64.c: Add asm check + for unsigned avg ceil. + * gcc.target/riscv/rvv/autovec/vx_vf/vx-1-u8.c: Ditto. + * gcc.target/riscv/rvv/autovec/vx_vf/vx-4-u16.c: Ditto. + * gcc.target/riscv/rvv/autovec/vx_vf/vx-4-u32.c: Ditto. + * gcc.target/riscv/rvv/autovec/vx_vf/vx-4-u64.c: Ditto. + * gcc.target/riscv/rvv/autovec/vx_vf/vx-4-u8.c: Ditto. + * gcc.target/riscv/rvv/autovec/vx_vf/vx-5-u16.c: Ditto. + * gcc.target/riscv/rvv/autovec/vx_vf/vx-5-u32.c: Ditto. + * gcc.target/riscv/rvv/autovec/vx_vf/vx-5-u64.c: Ditto. + * gcc.target/riscv/rvv/autovec/vx_vf/vx-5-u8.c: Ditto. + * gcc.target/riscv/rvv/autovec/vx_vf/vx-6-u16.c: Ditto. + * gcc.target/riscv/rvv/autovec/vx_vf/vx-6-u32.c: Ditto. + * gcc.target/riscv/rvv/autovec/vx_vf/vx-6-u64.c: Ditto. + * gcc.target/riscv/rvv/autovec/vx_vf/vx-6-u8.c: Ditto. + * gcc.target/riscv/rvv/autovec/vx_vf/vx_binary.h: Add test + helper macros. + * gcc.target/riscv/rvv/autovec/vx_vf/vx_binary_data.h: Add + test data. + * gcc.target/riscv/rvv/autovec/vx_vf/vx_vaadd-run-2-u16.c: New test. + * gcc.target/riscv/rvv/autovec/vx_vf/vx_vaadd-run-2-u32.c: New test. + * gcc.target/riscv/rvv/autovec/vx_vf/vx_vaadd-run-2-u64.c: New test. + * gcc.target/riscv/rvv/autovec/vx_vf/vx_vaadd-run-2-u8.c: New test. + +2025-07-29 Andrew Pinski <quic_apinski@quicinc.com> + + PR testsuite/121215 + * lib/profopt.exp (profopt-execute): Call cleanup-after-saved-dg-test + if returning early for the -fauto-profile case failing case. + +2025-07-29 Spencer Abson <spencer.abson@arm.com> + + * g++.target/aarch64/sve/unpacked_cond_binary_bf16_2.C: New test. + * gcc.target/aarch64/sve/unpacked_cond_builtin_fmax_2.c: Likewise. + * gcc.target/aarch64/sve/unpacked_cond_builtin_fmin_2.c: Likewise. + * gcc.target/aarch64/sve/unpacked_cond_fadd_2.c: Likewise. + * gcc.target/aarch64/sve/unpacked_cond_fdiv_2.c: Likewise. + * gcc.target/aarch64/sve/unpacked_cond_fmaxnm_2.c: Likewise. + * gcc.target/aarch64/sve/unpacked_cond_fminnm_2.c: Likewise. + * gcc.target/aarch64/sve/unpacked_cond_fmul_2.c: Likewise. + * gcc.target/aarch64/sve/unpacked_cond_fsubr_2.c: Likewise. + +2025-07-29 H.J. Lu <hjl.tools@gmail.com> + + PR target/121208 + * gcc.target/i386/pr121208-1a.c (dg-options): Add -mno-80387. + * gcc.target/i386/pr121208-1b.c (dg-options): Likewise. + +2025-07-29 Juergen Christ <jchrist@linux.ibm.com> + + PR testsuite/121286 + PR testsuite/121288 + * gcc.dg/vect/pr112325.c: Adjust parameters for s390. + * gcc.dg/vect/pr117888-1.c: Ditto. + +2025-07-29 Richard Sandiford <richard.sandiford@arm.com> + + * gcc.target/aarch64/saturating_arithmetic_1.c: Allow w0 and w1 + to be duplicated in either order. + * gcc.target/aarch64/saturating_arithmetic_2.c: Likewise. + +2025-07-29 Richard Sandiford <richard.sandiford@arm.com> + + * gcc.target/aarch64/cmpbr.c: Support both operand orders + for 8-bit and 16-bit comparisons. + +2025-07-29 Konstantinos Eleftheriou <konstantinos.eleftheriou@vrull.eu> + + PR rtl-optimization/120660 + * gcc.dg/pr120660.c: New test. + +2025-07-29 Konstantinos Eleftheriou <konstantinos.eleftheriou@vrull.eu> + + PR rtl-optimization/119795 + * gcc.target/i386/pr119795.c: New test. + +2025-07-29 Pan Li <pan2.li@intel.com> + + * gcc.target/riscv/sat/sat_u_mul-run-1-u16-from-u64.c: Add rv64 + target for run. + * gcc.target/riscv/sat/sat_u_mul-run-1-u32-from-u64.c: Ditto. + * gcc.target/riscv/sat/sat_u_mul-run-1-u8-from-u64.c: Ditto. + * gcc.target/riscv/sat/sat_u_mul-1-u16-from-u32.c: New test. + * gcc.target/riscv/sat/sat_u_mul-1-u8-from-u16.c: New test. + * gcc.target/riscv/sat/sat_u_mul-1-u8-from-u32.c: New test. + * gcc.target/riscv/sat/sat_u_mul-2-u16-from-u64.c: New test. + * gcc.target/riscv/sat/sat_u_mul-2-u32-from-u64.c: New test. + * gcc.target/riscv/sat/sat_u_mul-2-u8-from-u64.c: New test. + * gcc.target/riscv/sat/sat_u_mul-run-1-u16-from-u32.c: New test. + * gcc.target/riscv/sat/sat_u_mul-run-1-u8-from-u16.c: New test. + * gcc.target/riscv/sat/sat_u_mul-run-1-u8-from-u32.c: New test. + +2025-07-29 Richard Biener <rguenther@suse.de> + + PR tree-optimization/120687 + * gcc.dg/vect/pr120687-3.c: New testcase. + +2025-07-29 Nathaniel Shead <nathanieloshead@gmail.com> + + PR testsuite/121285 + * g++.dg/modules/class-11_a.H: Make static_asserts valid for + C++14. + +2025-07-29 Richard Biener <rguenther@suse.de> + + PR tree-optimization/120687 + * gcc.dg/vect/pr120687-1.c: New testcase. + * gcc.dg/vect/pr120687-2.c: Likewise. + +2025-07-29 Gaius Mulley <gaiusmod2@gmail.com> + + PR modula2/121289 + * gm2/warnings/style/fail/badvarname.mod: New test. + * gm2/warnings/style/fail/warnings-style-fail.exp: New test. + +2025-07-29 Christophe Lyon <christophe.lyon@linaro.org> + + * gcc.dg/pr116906-1.c: Add 'dg-do run'. + * gcc.dg/pr116906-2.c: Likewise. + * gcc.dg/pr78185.c: Likewise. + +2025-07-29 Jakub Jelinek <jakub@redhat.com> + + PR middle-end/121159 + * c-c++-common/pr121159.c: New test. + * gcc.dg/plugin/must-tail-call-2.c (test_5): Don't expect an error. + +2025-07-29 Andrew Pinski <quic_apinski@quicinc.com> + + PR middle-end/120523 + * gcc.dg/tree-ssa/cswtch-7.c: New test. + +2025-07-28 Andrew Pinski <quic_apinski@quicinc.com> + + PR tree-optimization/121236 + * gcc.dg/torture/pr121236-1.c: New test. + +2025-07-28 H.J. Lu <hjl.tools@gmail.com> + + PR target/121208 + * gcc.target/i386/pr121208-1a.c: New test. + * gcc.target/i386/pr121208-1b.c: Likewise. + * gcc.target/i386/pr121208-2a.c: Likewise. + * gcc.target/i386/pr121208-2b.c: Likewise. + * gcc.target/i386/pr121208-3a.c: Likewise. + * gcc.target/i386/pr121208-3b.c: Likewise. + +2025-07-28 Thomas Schwinge <tschwinge@baylibre.com> + + * gcc.target/nvptx/march-map=sm_100.c: New. + * gcc.target/nvptx/march-map=sm_100a.c: Likewise. + * gcc.target/nvptx/march-map=sm_100f.c: Likewise. + * gcc.target/nvptx/march-map=sm_101.c: Likewise. + * gcc.target/nvptx/march-map=sm_101a.c: Likewise. + * gcc.target/nvptx/march-map=sm_101f.c: Likewise. + * gcc.target/nvptx/march-map=sm_103.c: Likewise. + * gcc.target/nvptx/march-map=sm_103a.c: Likewise. + * gcc.target/nvptx/march-map=sm_103f.c: Likewise. + * gcc.target/nvptx/march-map=sm_120.c: Likewise. + * gcc.target/nvptx/march-map=sm_120a.c: Likewise. + * gcc.target/nvptx/march-map=sm_120f.c: Likewise. + * gcc.target/nvptx/march-map=sm_121.c: Likewise. + * gcc.target/nvptx/march-map=sm_121a.c: Likewise. + * gcc.target/nvptx/march-map=sm_121f.c: Likewise. + +2025-07-28 Richard Biener <rguenther@suse.de> + + PR tree-optimization/121256 + * gcc.dg/vect/vect-recurr-pr121256.c: New testcase. + * gcc.dg/vect/vect-recurr-pr121256-2.c: Likewise. + 2025-07-27 Mikael Morin <mikael@gcc.gnu.org> PR fortran/121185 diff --git a/gcc/testsuite/c-c++-common/pr121159.c b/gcc/testsuite/c-c++-common/pr121159.c new file mode 100644 index 0000000..c8c5d67 --- /dev/null +++ b/gcc/testsuite/c-c++-common/pr121159.c @@ -0,0 +1,17 @@ +/* PR middle-end/121159 */ +/* { dg-do compile { target musttail } } */ +/* { dg-options "-O2 -fdump-tree-optimized" } */ +/* { dg-final { scan-tree-dump-times "foo \\\(\[^\n\r]*\\\); \\\[tail call\\\] \\\[must tail call\\\]" 1 "optimized" } } */ + +[[noreturn, gnu::noipa]] void +foo (void) +{ + for (;;) + ; +} + +void +bar (void) +{ + [[gnu::musttail]] return foo (); +} diff --git a/gcc/testsuite/g++.dg/abi/regparm1.C b/gcc/testsuite/g++.dg/abi/regparm1.C index c471046..3aae3dd 100644 --- a/gcc/testsuite/g++.dg/abi/regparm1.C +++ b/gcc/testsuite/g++.dg/abi/regparm1.C @@ -1,5 +1,5 @@ // PR c++/29911 (9381) -// { dg-do run { target i?86-*-* x86_64-*-* } } +// { dg-do run { target { { i?86-*-* x86_64-*-* } && ia32 } } } // { dg-require-effective-target c++11 } extern "C" int printf(const char *, ...); diff --git a/gcc/testsuite/g++.dg/cpp/if-comma-1.C b/gcc/testsuite/g++.dg/cpp/if-comma-1.C new file mode 100644 index 0000000..0daaff9 --- /dev/null +++ b/gcc/testsuite/g++.dg/cpp/if-comma-1.C @@ -0,0 +1,42 @@ +// PR c++/120778 +// { dg-do preprocess } +// { dg-options "-pedantic-errors" } + +#if (1, 2) +#define M1 1 +#else +#error +#endif +#if 1 ? 2, 3 : 4 +#define M2 2 +#else +#error +#endif +#if 0 ? 2, 0 : 1 +#define M3 3 +#else +#error +#endif +#if 0 || (1, 2) +#define M4 4 +#else +#error +#endif +#if 1 || (1, 2) +#define M5 5 +#else +#error +#endif +#if (1, 2) && 1 +#define M6 6 +#else +#error +#endif +#if 1 && (1, 2) +#define M7 7 +#else +#error +#endif +#if M1 + M2 + M3 + M4 + M5 + M6 + M7 != 28 +#error +#endif diff --git a/gcc/testsuite/g++.dg/cpp0x/constexpr-array30.C b/gcc/testsuite/g++.dg/cpp0x/constexpr-array30.C new file mode 100644 index 0000000..3f72407 --- /dev/null +++ b/gcc/testsuite/g++.dg/cpp0x/constexpr-array30.C @@ -0,0 +1,22 @@ +// PR c++/120800 +// { dg-do compile { target c++11 } } + +template<typename T> +struct Container +{ + T m_data[1] {}; +}; + +class Element +{ +private: + Element() = default; + +private: + bool m_bool1 { false }; + bool m_bool2; + + friend struct Container<Element>; +}; + +Container<Element> element; diff --git a/gcc/testsuite/g++.dg/cpp1z/nontype8.C b/gcc/testsuite/g++.dg/cpp1z/nontype8.C new file mode 100644 index 0000000..b81e85b --- /dev/null +++ b/gcc/testsuite/g++.dg/cpp1z/nontype8.C @@ -0,0 +1,12 @@ +// Test that the diagnostic mentions lack of constexpr +// { dg-do compile { target c++17 } } + +template <auto f> void g() {} +void x() +{ + using fp = void (*)(); + fp f = nullptr; // { dg-message "constexpr" } + g<f>(); // { dg-error "" } + int *p = nullptr; // { dg-message "constexpr" } + g<p>(); // { dg-error "" } +} diff --git a/gcc/testsuite/g++.dg/cpp26/consteval-block1.C b/gcc/testsuite/g++.dg/cpp26/consteval-block1.C new file mode 100644 index 0000000..9e2cf22 --- /dev/null +++ b/gcc/testsuite/g++.dg/cpp26/consteval-block1.C @@ -0,0 +1,82 @@ +// { dg-do compile { target c++26 } } +// Test consteval blocks, as specified by P2996. + +constexpr int fn () { return 42; } +struct M { + static consteval void foo () {} +}; + +consteval { } +consteval { fn (); } +consteval { M::foo (); } +consteval { auto x = fn (); return; } +consteval { + [](int i) { return i; }(5); +} +auto lam = [] { }; +consteval { lam (); } + +struct S { + consteval { } +}; + +struct S2 { + consteval { fn(); } +}; + +class C { + consteval { } +}; + +class C2 { + consteval { M::foo (); } +}; + +union U { + consteval { } +}; + +template<typename> +struct TS { + consteval { } +}; + +template<typename... Ts> +struct TS2 { + consteval { + (Ts::foo (), ...); + } +}; + +TS2<M> ts2; + +void +g () +{ + consteval { } +} + +template<typename> +void +tg () +{ + consteval { } +} + +void die (); +constexpr int +bar (int i) +{ + if (i != 42) + die (); + return 0; +} + +void +foo () +{ + constexpr int r = 42; + consteval { + bar (r); + } +} diff --git a/gcc/testsuite/g++.dg/cpp26/consteval-block2.C b/gcc/testsuite/g++.dg/cpp26/consteval-block2.C new file mode 100644 index 0000000..895fcb6 --- /dev/null +++ b/gcc/testsuite/g++.dg/cpp26/consteval-block2.C @@ -0,0 +1,49 @@ +// { dg-do compile { target c++26 } } +// Test consteval blocks, as specified by P2996. + +void fn (); + +consteval { fn (); } // { dg-error "call to non-.constexpr. function" } +consteval { return 42; } // { dg-error "return-statement with a value" } + +struct S { + consteval { + fn (); // { dg-error "call to non-.constexpr. function" } + } + consteval { + return 42; // { dg-error "return-statement with a value" } + } +}; + +template<typename T> +constexpr void foo (T t) { return t; } // { dg-error "return-statement with a value" } + +template<int N> +struct R { + consteval { foo (N); } +}; + +R<1> r; + +template<typename T> +constexpr void foo2 (T t) { return t; } // { dg-error "return-statement with a value" } + +template<int N> +void +f () +{ + consteval { foo2 (1); } +} + +constexpr int bar (int) { return 0; } + +void +g () +{ + f<1>(); + + int r = 42; + consteval { + bar (r); // { dg-error ".r. is not captured" } + } +} diff --git a/gcc/testsuite/g++.dg/cpp26/consteval-block3.C b/gcc/testsuite/g++.dg/cpp26/consteval-block3.C new file mode 100644 index 0000000..c1221c3 --- /dev/null +++ b/gcc/testsuite/g++.dg/cpp26/consteval-block3.C @@ -0,0 +1,41 @@ +// { dg-do compile { target c++26 } } +// Test consteval blocks, as specified by P2996. +// Test that we actually evaluate the consteval block. + +void bar () { } + +template<int N> +constexpr void +fn () +{ + if (N > 0) + bar (); // { dg-error "call to non-.constexpr. function" } +} + +template<int N> +struct S { + consteval { fn<N>(); } // { dg-error "called in a constant expression" } +}; + +S<1> s; + +template<int N> +constexpr void +fn2 () +{ + if (N > 0) + bar (); // { dg-error "call to non-.constexpr. function" } +} + +template<int N> +void +g () +{ + consteval { fn2<N>(); } // { dg-error "called in a constant expression" } +} + +void +f () +{ + g<1>(); +} diff --git a/gcc/testsuite/g++.dg/cpp26/consteval-block4.C b/gcc/testsuite/g++.dg/cpp26/consteval-block4.C new file mode 100644 index 0000000..be95e17 --- /dev/null +++ b/gcc/testsuite/g++.dg/cpp26/consteval-block4.C @@ -0,0 +1,41 @@ +// { dg-do compile { target c++26 } } +// Test consteval blocks, as specified by P2996. +// Test that we actually evaluate the consteval block. + +void bar () { } + +template<int N> +constexpr void +fn () +{ + if (N > 0) + bar (); +} + +template<int N> +struct S { + consteval { fn<N>(); } +}; + +S<0> s; + +template<int N> +constexpr void +fn2 () +{ + if (N > 0) + bar (); +} + +template<int N> +void +g () +{ + consteval { fn2<N>(); } +} + +void +f () +{ + g<0>(); +} diff --git a/gcc/testsuite/g++.dg/cpp26/consteval-block5.C b/gcc/testsuite/g++.dg/cpp26/consteval-block5.C new file mode 100644 index 0000000..462cebe --- /dev/null +++ b/gcc/testsuite/g++.dg/cpp26/consteval-block5.C @@ -0,0 +1,70 @@ +// { dg-do compile { target c++26 } } +// Test consteval blocks, as specified by P2996. + +void bar () { } + +template<int N> +constexpr void +fn () +{ + if (N > 0) + bar (); +} + +template<typename> +struct S { + consteval { fn<1>(); } +}; + +template<> +struct S<int> { + consteval { fn<0>(); } +}; + +S<int> s1; + +template<typename T> +struct S<T*> { + consteval { fn<0>(); } +}; + +S<int *> s2; + +template<typename T, int N> +struct W { + consteval { T t; fn<N - 1>(); } +}; + +template<typename T> +struct W<T, 0> { + consteval { T t; fn<0>(); } +}; + +template<> +struct W<char, 0> { + consteval { fn<0>(); } +}; + +W<int, 0> w1; +W<int, 1> w2; +W<char, 0> w3; + +template<typename> +void +f () +{ + consteval { fn<1>(); } +} + +template<> +void +f<int> () +{ + consteval { fn<0>(); } +} + +void +g () +{ + f<int> (); +} diff --git a/gcc/testsuite/g++.dg/cpp26/consteval-block6.C b/gcc/testsuite/g++.dg/cpp26/consteval-block6.C new file mode 100644 index 0000000..ca90b3e --- /dev/null +++ b/gcc/testsuite/g++.dg/cpp26/consteval-block6.C @@ -0,0 +1,108 @@ +// { dg-do compile { target c++26 } } +// Test consteval blocks, as specified by P2996. + +void die () {} + +template<int N> +constexpr void +fn () +{ + if (N > 0) + die (); +} + +template<int N> +void +fn2 () +{ + struct S { + consteval { + fn<N>(); + } + }; +} + +template<int N> +struct A { + struct B { + consteval { + fn<N>(); + } + }; + template<int M> + struct C { + consteval { + fn<N + M>(); + } + }; +}; + +template<int N> +struct D { + constexpr static int i = 0; + struct E { + consteval { + fn<i>(); + } + }; +}; + +A<0>::B b; +A<0>::C<0> c; +D<0>::E e; + +void +f () +{ + fn2<0>(); +} + +static constexpr int j = 0; +const int x = 0; + +consteval { + fn<j>(); + consteval { + fn<j + j>(); + consteval { + fn<j + j + j>(); + consteval { + fn<j + j + x>(); + consteval { + fn<j + x>(); + } + } + } + } +} + +struct R { constexpr R() {} }; + +template<int N> +constexpr auto X = N; + +consteval { + R{}; + constexpr auto x = 0; + fn<x>(); + fn<X<0>>(); + if consteval + { + fn<j>(); + } + else + { + die (); + } +} + +template<typename T> +struct G { + consteval { + using U = T[3]; + U arr{}; + int i = arr[2]; + } +}; + +G<int> g; diff --git a/gcc/testsuite/g++.dg/cpp26/consteval-block7.C b/gcc/testsuite/g++.dg/cpp26/consteval-block7.C new file mode 100644 index 0000000..231682f --- /dev/null +++ b/gcc/testsuite/g++.dg/cpp26/consteval-block7.C @@ -0,0 +1,12 @@ +// { dg-do compile { target c++26 } } +// Test consteval blocks, as specified by P2996. + +consteval { + template <class T> // { dg-error "template declaration cannot appear at block scope" } + struct X { }; + + template <class T> // { dg-error "template declaration cannot appear at block scope" } + concept C = true; + + return; // OK +} diff --git a/gcc/testsuite/g++.dg/cpp26/consteval-block8.C b/gcc/testsuite/g++.dg/cpp26/consteval-block8.C new file mode 100644 index 0000000..ad164fd --- /dev/null +++ b/gcc/testsuite/g++.dg/cpp26/consteval-block8.C @@ -0,0 +1,38 @@ +// { dg-do compile { target c++26 } } +// Test consteval blocks, as specified by P2996. + +/* __func__ won't be set. Make sure we warn. */ +consteval { __func__; } // { dg-error "outside of function scope" } +consteval { { __func__; } } // { dg-error "outside of function scope" } +consteval { []() mutable consteval -> void { __func__; } (); } // { dg-bogus "outside of function scope" } +consteval { []() mutable consteval -> void { consteval { __func__; } } (); } // { dg-bogus "outside of function scope" } + +auto l = []() -> void { + consteval { __func__; } // { dg-bogus "outside of function scope" } +}; + +struct F { + consteval { __func__; } // { dg-error "outside of function scope" } +}; +template<typename> +struct TF { + consteval { __func__; } // { dg-error "outside of function scope" } +}; + +void +g () +{ + consteval { __func__; } // { dg-bogus "outside of function scope" } + // Not a consteval-block-declaration. + []() mutable consteval -> void { __func__; } (); // { dg-bogus "outside of function scope" } +} + +template<typename> +void +f () +{ + consteval { __func__; } // { dg-bogus "outside of function scope" } + { consteval { __func__; } } // { dg-bogus "outside of function scope" } + __func__; // { dg-bogus "outside of function scope" } + []() mutable consteval -> void { __func__; } (); // { dg-bogus "outside of function scope" } +} diff --git a/gcc/testsuite/g++.dg/ext/is_invocable7.C b/gcc/testsuite/g++.dg/ext/is_invocable7.C new file mode 100644 index 0000000..5c852fc --- /dev/null +++ b/gcc/testsuite/g++.dg/ext/is_invocable7.C @@ -0,0 +1,21 @@ +// PR c++/121291 +// { dg-do compile { target c++17 } } + +template <typename T> +constexpr bool is_invocable = __is_invocable(T); + +template <typename T> +constexpr bool is_nothrow_invocable = __is_nothrow_invocable(T); + +struct S { +private: + int operator()() noexcept; // { dg-message "here" } +}; + +static_assert(is_invocable<S>); // { dg-error "assert" } +// { dg-message "not invocable" "" { target *-*-* } .-1 } +// { dg-error "private within this context" "" { target *-*-* } .-2 } + +static_assert(is_nothrow_invocable<S>); // { dg-error "assert" } +// { dg-message "not nothrow invocable" "" { target *-*-* } .-1 } +// { dg-error "private within this context" "" { target *-*-* } .-2 } diff --git a/gcc/testsuite/g++.dg/ext/is_nothrow_convertible5.C b/gcc/testsuite/g++.dg/ext/is_nothrow_convertible5.C new file mode 100644 index 0000000..0ce8fb8 --- /dev/null +++ b/gcc/testsuite/g++.dg/ext/is_nothrow_convertible5.C @@ -0,0 +1,15 @@ +// PR c++/121291 +// { dg-do compile { target c++17 } } + +template <typename T, typename U> +constexpr bool is_nothrow_convertible = __is_nothrow_convertible(T, U); + +struct A {}; +struct B { +private: + operator A() noexcept; // { dg-message "here" } +}; + +static_assert(is_nothrow_convertible<B, A>); // { dg-error "assert" } +// { dg-message "not nothrow convertible" "" { target *-*-* } .-1 } +// { dg-error "private within this context" "" { target *-*-* } .-2 } diff --git a/gcc/testsuite/g++.dg/modules/class-11_a.H b/gcc/testsuite/g++.dg/modules/class-11_a.H index f7bbf9d..799dbdd 100644 --- a/gcc/testsuite/g++.dg/modules/class-11_a.H +++ b/gcc/testsuite/g++.dg/modules/class-11_a.H @@ -20,7 +20,7 @@ struct pr106381 { struct L1 : pr106381 { char x; // { dg-warning "offset" "" { target c++14 } } }; -static_assert(sizeof(L1) == sizeof(pr106381)); +static_assert(sizeof(L1) == sizeof(pr106381), ""); struct pr120012 { @@ -33,4 +33,4 @@ struct pr120012 { struct L2 : pr120012 { unsigned char y; // { dg-warning "offset" "" { target c++20 } } }; -static_assert(sizeof(L2) > sizeof(pr120012)); +static_assert(sizeof(L2) > sizeof(pr120012), ""); diff --git a/gcc/testsuite/g++.dg/modules/merge-19.h b/gcc/testsuite/g++.dg/modules/merge-19.h new file mode 100644 index 0000000..c3faadc --- /dev/null +++ b/gcc/testsuite/g++.dg/modules/merge-19.h @@ -0,0 +1,21 @@ +// PR c++/121238 + +inline void inc(const char*& __first) { + ++__first; +} + +template <typename = void> +bool parse_integer(const char *first) { + const char *start = first; + inc(first); + return first != start; +} +template bool parse_integer<void>(const char*); + + +struct S { ~S() {} int x; }; +template <typename = void> +bool take_by_invisiref(S s) { + return s.x == 5; +} +template bool take_by_invisiref<void>(S); diff --git a/gcc/testsuite/g++.dg/modules/merge-19_a.H b/gcc/testsuite/g++.dg/modules/merge-19_a.H new file mode 100644 index 0000000..149a447 --- /dev/null +++ b/gcc/testsuite/g++.dg/modules/merge-19_a.H @@ -0,0 +1,5 @@ +// PR c++/121238 +// { dg-additional-options "-fmodule-header" } +// { dg-module-cmi {} } + +#include "merge-19.h" diff --git a/gcc/testsuite/g++.dg/modules/merge-19_b.C b/gcc/testsuite/g++.dg/modules/merge-19_b.C new file mode 100644 index 0000000..345e7fe --- /dev/null +++ b/gcc/testsuite/g++.dg/modules/merge-19_b.C @@ -0,0 +1,16 @@ +// PR c++/121238 +// { dg-module-do run } +// { dg-additional-options "-fmodules -fno-module-lazy" } + +#include "merge-19.h" +import "merge-19_a.H"; + +int main() { + const char fmt[] = "5"; + if (!parse_integer<void>(fmt)) + __builtin_abort(); + + S s{ 5 }; + if (!take_by_invisiref(s)) + __builtin_abort(); +} diff --git a/gcc/testsuite/g++.dg/modules/pr108080.H b/gcc/testsuite/g++.dg/modules/pr108080.H new file mode 100644 index 0000000..b05d957 --- /dev/null +++ b/gcc/testsuite/g++.dg/modules/pr108080.H @@ -0,0 +1,5 @@ +// PR c++/108080 +// { dg-additional-options "-fmodules" } +// Give a diagnostic message rather than a crash for unsupported features. + +[[gnu::optimize("-O3")]] void foo(); // { dg-warning "optimize" } diff --git a/gcc/testsuite/g++.dg/tc1/dr49.C b/gcc/testsuite/g++.dg/tc1/dr49.C index 753d96b..6ddea6b 100644 --- a/gcc/testsuite/g++.dg/tc1/dr49.C +++ b/gcc/testsuite/g++.dg/tc1/dr49.C @@ -10,8 +10,8 @@ template struct R<&p>; // OK template struct S<&p>; // OK due to parameter adjustment int *ptr; -template struct R<ptr>; // { dg-error "argument" } -template struct S<ptr>; // { dg-error "argument" } +template struct R<ptr>; // { dg-error "template argument|constant expression" } +template struct S<ptr>; // { dg-error "template argument|constant expression" } int v[5]; template struct R<v>; // OK due to implicit argument conversion diff --git a/gcc/testsuite/g++.dg/template/func2.C b/gcc/testsuite/g++.dg/template/func2.C index 0116f23..360f430 100644 --- a/gcc/testsuite/g++.dg/template/func2.C +++ b/gcc/testsuite/g++.dg/template/func2.C @@ -4,8 +4,7 @@ typedef void (*fptr)(); fptr zeroptr = 0; template<typename T, fptr F> struct foo { }; template<typename T> struct foo<T,zeroptr> { }; -// { dg-error "not a valid template argument" "not valid" { target *-*-* } .-1 } -// { dg-message "must be the address" "must be the address " { target *-*-* } .-2 } +// { dg-error "template argument|constant expression" "not valid" { target *-*-* } .-1 } // The rest is needed to trigger the ICE in 4.0 to 4.3: void f() { } diff --git a/gcc/testsuite/g++.dg/tree-prof/eh1.C b/gcc/testsuite/g++.dg/tree-prof/eh1.C new file mode 100644 index 0000000..10a3596 --- /dev/null +++ b/gcc/testsuite/g++.dg/tree-prof/eh1.C @@ -0,0 +1,34 @@ +/* { dg-options "-O3 -fdump-ipa-profile-details -fno-inline -fdump-tree-fixup_cfg3-details -fdump-tree-optimized-details" } */ +char a[10000]; +char b[10000]; +int sz = 1000; + +__attribute__((noipa)) + void test2 () +{ + throw (sz); +} +void +test () +{ + try + { + test2 (); + } + catch (int v) + { + __builtin_memcpy (b, a, v); + } +} +int +main () +{ + for (int i = 0; i < 100000; i++) + test (); +} +/* { dg-final-use-not-autofdo { scan-ipa-dump-times "Average value sum:100000000" 2 "profile" } } */ +/* 1 zero count for resx block. */ +/* { dg-final-use-not-autofdo { scan-tree-dump-times "count: 0" 1 "fixup_cfg3" } } */ +/* 2 zero count for resx block and return block since return gets duplicated by tracer. */ +/* { dg-final-use-not-autofdo { scan-tree-dump-times "count: 0" 2 "optimized" } } */ +/* { dg-final-use-not-autofdo { scan-tree-dump-times "Average value sum:100000000" 1 "optimized" } } */ diff --git a/gcc/testsuite/g++.dg/warn/pr121133-1.C b/gcc/testsuite/g++.dg/warn/pr121133-1.C new file mode 100644 index 0000000..6d6e13b --- /dev/null +++ b/gcc/testsuite/g++.dg/warn/pr121133-1.C @@ -0,0 +1,16 @@ +// PR c++/121133 +// { dg-do compile } +// { dg-options "-std=c++98 -Wno-long-long -pedantic-errors" } + +__extension__ typedef long long L; +__extension__ long long a; +struct S { + __extension__ long long b; +}; + +void +foo () +{ + __extension__ long long c; + c = c + (__extension__ (long long) 1); +} diff --git a/gcc/testsuite/g++.dg/warn/pr121133-2.C b/gcc/testsuite/g++.dg/warn/pr121133-2.C new file mode 100644 index 0000000..cd97a76 --- /dev/null +++ b/gcc/testsuite/g++.dg/warn/pr121133-2.C @@ -0,0 +1,5 @@ +// PR c++/121133 +// { dg-do compile } +// { dg-options "-std=c++98 -pedantic-errors" } + +#include "pr121133-1.C" diff --git a/gcc/testsuite/g++.dg/warn/pr121133-3.C b/gcc/testsuite/g++.dg/warn/pr121133-3.C new file mode 100644 index 0000000..9ffd407 --- /dev/null +++ b/gcc/testsuite/g++.dg/warn/pr121133-3.C @@ -0,0 +1,5 @@ +// PR c++/121133 +// { dg-do compile { target c++11 } } +// { dg-options "-pedantic-errors" } + +#include "pr121133-1.C" diff --git a/gcc/testsuite/g++.dg/warn/pr121133-4.C b/gcc/testsuite/g++.dg/warn/pr121133-4.C new file mode 100644 index 0000000..76885ba --- /dev/null +++ b/gcc/testsuite/g++.dg/warn/pr121133-4.C @@ -0,0 +1,5 @@ +// PR c++/121133 +// { dg-do compile { target c++11 } } +// { dg-options "-pedantic-errors -Wlong-long" } + +#include "pr121133-1.C" diff --git a/gcc/testsuite/g++.target/aarch64/mv-cpu-features.C b/gcc/testsuite/g++.target/aarch64/mv-cpu-features.C new file mode 100644 index 0000000..ad6accd --- /dev/null +++ b/gcc/testsuite/g++.target/aarch64/mv-cpu-features.C @@ -0,0 +1,82 @@ +/* { dg-do run } */ +/* { dg-require-ifunc "" } */ +/* { dg-require-effective-target mmap } */ +/* { dg-options "-Wno-experimental-fmv-target" } */ + +#include <cstdint> +#include <sys/auxv.h> + +__attribute__((target_version ("default"))) +int foo () +{ + return 0; +} + +__attribute__((target_version ("rng"))) +int foo () +{ + return 1; +} + +__attribute__((target_version ("lse"))) +int foo () +{ + return 2; +} + +typedef struct { + uint64_t size; + uint64_t hwcap; + uint64_t hwcap2; + uint64_t hwcap3; + uint64_t hwcap4; +} ifunc_arg_t; + +int impl () +{ + return 0; +} + +#ifndef _IFUNC_ARG_HWCAP +#define _IFUNC_ARG_HWCAP (1ULL << 62) +#endif + +extern "C" void +__init_cpu_features_resolver (unsigned long hwcap, const ifunc_arg_t *arg); + +extern "C" void * +fun_resolver (uint64_t a0, const ifunc_arg_t *a1) +{ + ifunc_arg_t arg = {}; + arg.size = sizeof (ifunc_arg_t); + /* These flags determine that the implementation of foo () + that returns 2 will be selected. */ + arg.hwcap = HWCAP_ATOMICS; + arg.hwcap2 = HWCAP2_RNG; + __init_cpu_features_resolver (arg.hwcap | _IFUNC_ARG_HWCAP, &arg); + return (void *)(uintptr_t)impl; +} + +extern "C" int fun (void) __attribute__((ifunc ("fun_resolver"))); + +/* In this test we expect that the manual resolver for the fun () + function will be executed before the automatic resolver for the + FMV function foo (). This is because resolvers from the same TU + are executed according to the offset of corresponding relocations. + + Automatic resolver is generated in a dedicated section while the + manually written resolver will be put in the .text section which + will come first. + + The manual resolver above calls __init_cpu_features_resolver() + supplying synthetic ifunc_arg_t fields that will determine the + choice for the FMV implementation. + */ + +int main () +{ + int res = fun (); + if (res == 0 && foo () == 2) + return 0; + return 1; +} diff --git a/gcc/testsuite/g++.target/aarch64/sve/unpacked_cond_binary_bf16_2.C b/gcc/testsuite/g++.target/aarch64/sve/unpacked_cond_binary_bf16_2.C new file mode 100644 index 0000000..02880ef --- /dev/null +++ b/gcc/testsuite/g++.target/aarch64/sve/unpacked_cond_binary_bf16_2.C @@ -0,0 +1,18 @@ +/* { dg-do compile }*/ +/* { dg-options "-O -ffinite-math-only -fno-signed-zeros -msve-vector-bits=2048 " } */ + +#include "unpacked_cond_binary_bf16_1.C" + +/* { dg-final { scan-assembler-times {\tptrue\tp[0-9]+\.s} 15 } } */ +/* { dg-final { scan-assembler-times {\tptrue\tp[0-9]+\.d} 15 } } */ +/* { dg-final { scan-assembler-times {\tand} 30 } } */ + +/* { dg-final { scan-assembler-times {\tbfadd\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 6 } } */ +/* { dg-final { scan-assembler-times {\tbfsub\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 6 } } */ +/* { dg-final { scan-assembler-times {\tbfmul\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 6 } } */ + +/* { dg-final { scan-assembler-times {\tbfminnm\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 6 } } */ +/* { dg-final { scan-assembler-times {\tbfmaxnm\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 6 } } */ + +// There's no BFSUBR. +/* { dg-final { scan-assembler-times {\tsel\t} 2 } } */ diff --git a/gcc/testsuite/g++.target/aarch64/sve/unpacked_cond_ternary_bf16_1.C b/gcc/testsuite/g++.target/aarch64/sve/unpacked_cond_ternary_bf16_1.C new file mode 100644 index 0000000..95cd698 --- /dev/null +++ b/gcc/testsuite/g++.target/aarch64/sve/unpacked_cond_ternary_bf16_1.C @@ -0,0 +1,35 @@ +/* { dg-do compile }*/ +/* { dg-options "-O2 -fno-trapping-math -msve-vector-bits=2048 " } */ + +#include <stdint.h> +#pragma GCC target "arch=armv9-a+sve-b16b16" + +#define COND_BFMLA(TYPE, PRED_TYPE, MERGE) \ + TYPE test_bfmla_##TYPE##_##MERGE (TYPE a, TYPE b, TYPE c, PRED_TYPE p) \ + {return p ? a * b + c : MERGE; } + +#define COND_BFMLS(TYPE, PRED_TYPE, MERGE) \ + TYPE test_bfmls_##TYPE##_##MERGE (TYPE a, TYPE b, TYPE c, PRED_TYPE p) \ + {return p ? a * -b + c : MERGE; } + +#define TEST_OP(TYPE, PRED_TYPE, T) \ + T (TYPE, PRED_TYPE, c) \ + T (TYPE, PRED_TYPE, 0) + +#define TEST(TYPE, PTYPE, SIZE) \ + typedef TYPE TYPE##SIZE __attribute__ ((vector_size (SIZE))); \ + typedef PTYPE PTYPE##SIZE __attribute__ ((vector_size (SIZE))); \ + TEST_OP (TYPE##SIZE, PTYPE##SIZE, COND_BFMLA) \ + TEST_OP (TYPE##SIZE, PTYPE##SIZE, COND_BFMLS) + +TEST (__bf16, uint16_t, 128) + +TEST (__bf16, uint16_t, 64) + +/* { dg-final { scan-assembler-times {\tptrue} 8 } } */ + +/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.h, p[0-7]/z, z[0-9]+\.h\n} 4 } } */ +/* { dg-final { scan-assembler-times {\tbfmla\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 4 } } */ +/* { dg-final { scan-assembler-times {\tbfmls\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 4 } } */ + +/* { dg-final { scan-assembler-not {\tsel\t} } } */ diff --git a/gcc/testsuite/g++.target/aarch64/sve/unpacked_cond_ternary_bf16_2.C b/gcc/testsuite/g++.target/aarch64/sve/unpacked_cond_ternary_bf16_2.C new file mode 100644 index 0000000..c0d7c50 --- /dev/null +++ b/gcc/testsuite/g++.target/aarch64/sve/unpacked_cond_ternary_bf16_2.C @@ -0,0 +1,14 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -msve-vector-bits=2048" } */ + +#include "unpacked_cond_ternary_bf16_1.C" + +/* { dg-final { scan-assembler-times {\tptrue\tp[0-9]+\.s} 4 } } */ +/* { dg-final { scan-assembler-times {\tptrue\tp[0-9]+\.d} 4 } } */ +/* { dg-final { scan-assembler-times {\tand} 8 } } */ + +/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.h, p[0-7]/z, z[0-9]+\.h\n} 4 } } */ +/* { dg-final { scan-assembler-times {\tbfmla\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 4 } } */ +/* { dg-final { scan-assembler-times {\tbfmls\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 4 } } */ + +/* { dg-final { scan-assembler-not {\tsel\t} } } */ diff --git a/gcc/testsuite/g++.target/aarch64/sve/unpacked_ternary_bf16_1.C b/gcc/testsuite/g++.target/aarch64/sve/unpacked_ternary_bf16_1.C new file mode 100644 index 0000000..19bfe95 --- /dev/null +++ b/gcc/testsuite/g++.target/aarch64/sve/unpacked_ternary_bf16_1.C @@ -0,0 +1,27 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -msve-vector-bits=2048" } */ + +#define BFMLA(TYPE) \ + TYPE test_bfmla_##TYPE (TYPE a, TYPE b, TYPE c) \ + { return a * b + c; } + +#define BFMLS(TYPE) \ + TYPE test_bfmls_##TYPE (TYPE a, TYPE b, TYPE c) \ + { return a * -b + c; } + +#define TEST_TYPE(TYPE, SIZE) \ + typedef TYPE TYPE##SIZE __attribute__((vector_size(SIZE))); \ + BFMLA (TYPE##SIZE) \ + BFMLS (TYPE##SIZE) + +#pragma GCC target "arch=armv9-a+sve-b16b16" + +TEST_TYPE (__bf16, 128) + +TEST_TYPE (__bf16, 64) + +/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.s} 2 } } */ +/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.d} 2 } } */ + +/* { dg-final { scan-assembler-times {\tbfmla\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tbfmls\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */ diff --git a/gcc/testsuite/g++.target/aarch64/sve/unpacked_ternary_bf16_2.C b/gcc/testsuite/g++.target/aarch64/sve/unpacked_ternary_bf16_2.C new file mode 100644 index 0000000..ef37400 --- /dev/null +++ b/gcc/testsuite/g++.target/aarch64/sve/unpacked_ternary_bf16_2.C @@ -0,0 +1,11 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -msve-vector-bits=2048 -fno-trapping-math" } */ + +#include "unpacked_ternary_bf16_1.C" + +/* { dg-final { scan-assembler-not {\tptrue\tp[0-7]\.s} } } */ +/* { dg-final { scan-assembler-not {\tptrue\tp[0-7]\.d} } } */ +/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.b} 4 } } */ + +/* { dg-final { scan-assembler-times {\tbfmla\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tbfmls\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */ diff --git a/gcc/testsuite/gcc.dg/Warray-parameter-11.c b/gcc/testsuite/gcc.dg/Warray-parameter-11.c index 8ca1b55..e05835c 100644 --- a/gcc/testsuite/gcc.dg/Warray-parameter-11.c +++ b/gcc/testsuite/gcc.dg/Warray-parameter-11.c @@ -9,7 +9,7 @@ typedef __INTPTR_TYPE__ intptr_t; void f0 (double[!copysign (~2, 3)]); void f1 (double[!copysign (~2, 3)]); -void f1 (double[1]); // { dg-warning "-Warray-parameter" } +void f1 (double[1]); // { dg-warning "-Wvla-parameter" } void f2 (int[(int)+1.0]); void f2 (int[(int)+1.1]); @@ -21,4 +21,4 @@ extern struct S *sp; void f3 (int[(intptr_t)((char*)sp->a - (char*)sp)]); void f3 (int[(intptr_t)((char*)&sp->a[0] - (char*)sp)]); -void f3 (int[(intptr_t)((char*)&sp->a[1] - (char*)sp)]); // { dg-warning "-Warray-parameter" } +void f3 (int[(intptr_t)((char*)&sp->a[1] - (char*)sp)]); // { dg-warning "-Wvla-parameter" } diff --git a/gcc/testsuite/gcc.dg/Warray-parameter.c b/gcc/testsuite/gcc.dg/Warray-parameter.c index 6c5195a..31879a8 100644 --- a/gcc/testsuite/gcc.dg/Warray-parameter.c +++ b/gcc/testsuite/gcc.dg/Warray-parameter.c @@ -118,8 +118,7 @@ typedef int IA2[2]; typedef int IA3[3]; // The message should differentiate between the [] form and *. -void f1IAx_A1 (IAx); // { dg-message "previously declared as 'int\\\[]'" "pr?????" { xfail *-*-* } } - // { dg-message "previously declared as 'int *\\\*'" "note" { target *-*-* } .-1 } +void f1IAx_A1 (IAx); // { dg-message "previously declared as 'int\\\[]'" } void f1IAx_A1 (IA1); // { dg-message "argument 1 of type 'int\\\[1]' with mismatched bound" } void f1IA1_A2 (IA1); // { dg-message "previously declared as 'int\\\[1]'" } diff --git a/gcc/testsuite/gcc.dg/plugin/must-tail-call-2.c b/gcc/testsuite/gcc.dg/plugin/must-tail-call-2.c index d51d15c..6f65f4a 100644 --- a/gcc/testsuite/gcc.dg/plugin/must-tail-call-2.c +++ b/gcc/testsuite/gcc.dg/plugin/must-tail-call-2.c @@ -55,5 +55,5 @@ volatile fn_ptr_t fn_ptr; void test_5 (void) { - fn_ptr (); /* { dg-error "cannot tail-call: " } */ + fn_ptr (); } diff --git a/gcc/testsuite/gcc.dg/pr116906-1.c b/gcc/testsuite/gcc.dg/pr116906-1.c index 7187507..ee60ad6 100644 --- a/gcc/testsuite/gcc.dg/pr116906-1.c +++ b/gcc/testsuite/gcc.dg/pr116906-1.c @@ -1,3 +1,4 @@ +/* { dg-do run } */ /* { dg-require-effective-target alarm } */ /* { dg-require-effective-target signal } */ /* { dg-options "-O2" } */ diff --git a/gcc/testsuite/gcc.dg/pr116906-2.c b/gcc/testsuite/gcc.dg/pr116906-2.c index 41a352b..4172ec3 100644 --- a/gcc/testsuite/gcc.dg/pr116906-2.c +++ b/gcc/testsuite/gcc.dg/pr116906-2.c @@ -1,3 +1,4 @@ +/* { dg-do run } */ /* { dg-require-effective-target alarm } */ /* { dg-require-effective-target signal } */ /* { dg-options "-O2 -fno-tree-ch" } */ diff --git a/gcc/testsuite/gcc.dg/pr120660.c b/gcc/testsuite/gcc.dg/pr120660.c new file mode 100644 index 0000000..6e8c5e8 --- /dev/null +++ b/gcc/testsuite/gcc.dg/pr120660.c @@ -0,0 +1,19 @@ +/* { dg-do run } */ +/* { dg-options "-O -favoid-store-forwarding" } */ + +int c; + +short +foo (short s) +{ + __builtin_memset (&s, c, 1); + return s; +} + +int +main () +{ + short x = foo (0x1111); + if (x != 0x1100 && x != 0x0011) + __builtin_abort(); +} diff --git a/gcc/testsuite/gcc.dg/pr121322.c b/gcc/testsuite/gcc.dg/pr121322.c new file mode 100644 index 0000000..2fad5b5 --- /dev/null +++ b/gcc/testsuite/gcc.dg/pr121322.c @@ -0,0 +1,14 @@ +/* PR middle-end/121322 */ +/* { dg-do compile } */ +/* { dg-options "-O2" } */ + +unsigned long long +foo (unsigned long long *p) +{ + unsigned long long a = *p; + unsigned long long b = __builtin_bswap64 (a); + return ((b << 32) + | ((b >> 8) & 0xff000000ULL) + | ((b >> 24) & 0xff0000ULL) + | ((b >> 40) & 0xff00ULL)); +} diff --git a/gcc/testsuite/gcc.dg/pr78185.c b/gcc/testsuite/gcc.dg/pr78185.c index ada8b1b..4c3af4f 100644 --- a/gcc/testsuite/gcc.dg/pr78185.c +++ b/gcc/testsuite/gcc.dg/pr78185.c @@ -1,3 +1,4 @@ +/* { dg-do run } */ /* { dg-require-effective-target alarm } */ /* { dg-require-effective-target signal } */ /* { dg-options "-O" } */ diff --git a/gcc/testsuite/gcc.dg/torture/pr121236-1.c b/gcc/testsuite/gcc.dg/torture/pr121236-1.c new file mode 100644 index 0000000..2b397e3 --- /dev/null +++ b/gcc/testsuite/gcc.dg/torture/pr121236-1.c @@ -0,0 +1,20 @@ +/* { dg-do compile } */ +/* PR tree-optimization/121236 */ + + +unsigned func_26(short *p_27, int gg, int p) { + unsigned l_184 = 0; + unsigned m = 0; + for (int g_59 = 0; g_59 < 10; g_59++) + { + if (gg) + l_184--; + else + { + m |= l_184 |= p; + (l_184)--; + } + } + return m; +} + diff --git a/gcc/testsuite/gcc.dg/torture/pr121295-1.c b/gcc/testsuite/gcc.dg/torture/pr121295-1.c new file mode 100644 index 0000000..7825c6e --- /dev/null +++ b/gcc/testsuite/gcc.dg/torture/pr121295-1.c @@ -0,0 +1,13 @@ +/* { dg-do compile } */ +/* { dg-additional-options " -fno-tree-copy-prop -fno-tree-pre -fno-code-hoisting" */ + +/* PR tree-optimization/121295 */ + + +int a, b, c; +int main() { + int *d = &a; + while (b) + b = (*d &= 10) <= 0 || (*d = c); + return 0; +} diff --git a/gcc/testsuite/gcc.dg/tree-ssa/cswtch-7.c b/gcc/testsuite/gcc.dg/tree-ssa/cswtch-7.c new file mode 100644 index 0000000..7b797807 --- /dev/null +++ b/gcc/testsuite/gcc.dg/tree-ssa/cswtch-7.c @@ -0,0 +1,48 @@ +/* PR tree-optimization/120523 */ +/* PR tree-optimization/120451 */ +/* { dg-do compile { target elf } } */ +/* { dg-options "-O2" } */ + +void foo (int, int); + +__attribute__((noinline, noclone)) void +f1 (int v, int w) +{ + int i, j; + if (w) + { + i = 129; + j = i - 1; + goto lab; + } + switch (v) + { + case 170: + j = 7; + i = 27; + break; + case 171: + i = 8; + j = 122; + break; + case 172: + i = 21; + j = -19; + break; + case 173: + i = 18; + j = 17; + break; + case 174: + i = 33; + j = 55; + break; + default: + __builtin_abort (); + } + + lab: + foo (i, j); +} + +/* { dg-final { scan-assembler ".rodata.cst32" } } */ diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr121264.c b/gcc/testsuite/gcc.dg/tree-ssa/pr121264.c new file mode 100644 index 0000000..bd5acc0 --- /dev/null +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr121264.c @@ -0,0 +1,12 @@ +/* PR tree-optimization/121264 */ +/* { dg-do compile } */ +/* { dg-options "-O2 -fdump-tree-optimized" } */ +/* { dg-final { scan-tree-dump " \\\| " "optimized" } } */ + +struct A { char b; char c[0x20000010]; } a; + +int +foo () +{ + return a.c[0x20000000] || a.c[1]; +} diff --git a/gcc/testsuite/gcc.dg/vect/pr112325.c b/gcc/testsuite/gcc.dg/vect/pr112325.c index 8689fbf..d380595 100644 --- a/gcc/testsuite/gcc.dg/vect/pr112325.c +++ b/gcc/testsuite/gcc.dg/vect/pr112325.c @@ -5,6 +5,7 @@ /* { dg-additional-options "-mavx2" { target x86_64-*-* i?86-*-* } } */ /* { dg-additional-options "--param max-completely-peeled-insns=200" { target powerpc64*-*-* } } */ /* { dg-additional-options "-mlsx" { target loongarch64-*-* } } */ +/* { dg-additional-options "--param max-completely-peeled-insns=200 --param min-vect-loop-bound=0" { target s390*-*-* } } */ typedef unsigned short ggml_fp16_t; static float table_f32_f16[1 << 16]; diff --git a/gcc/testsuite/gcc.dg/vect/pr117888-1.c b/gcc/testsuite/gcc.dg/vect/pr117888-1.c index 0b31fcd..884aed2 100644 --- a/gcc/testsuite/gcc.dg/vect/pr117888-1.c +++ b/gcc/testsuite/gcc.dg/vect/pr117888-1.c @@ -5,6 +5,7 @@ /* { dg-additional-options "-mavx2" { target x86_64-*-* i?86-*-* } } */ /* { dg-additional-options "--param max-completely-peeled-insns=200" { target powerpc64*-*-* } } */ /* { dg-additional-options "-mlsx" { target loongarch64-*-* } } */ +/* { dg-additional-options "--param max-completely-peeled-insns=200 --param min-vect-loop-bound=0" { target s390*-*-* } } */ typedef unsigned short ggml_fp16_t; static float table_f32_f16[1 << 16]; diff --git a/gcc/testsuite/gcc.dg/vect/pr120687-1.c b/gcc/testsuite/gcc.dg/vect/pr120687-1.c new file mode 100644 index 0000000..ce9cf63 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/pr120687-1.c @@ -0,0 +1,16 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target vect_int } */ + +unsigned +frd (unsigned *p, unsigned *lastone) +{ + unsigned sum = 0; + for (; p <= lastone; p += 16) + sum += p[0] + p[1] + p[2] + p[3] + p[4] + p[5] + p[6] + p[7] + + p[8] + p[9] + p[10] + p[11] + p[12] + p[13] + p[14] + p[15]; + return sum; +} + +/* { dg-final { scan-tree-dump "reduction: detected reduction chain" "vect" } } */ +/* { dg-final { scan-tree-dump-not "SLP discovery of reduction chain failed" "vect" } } */ +/* { dg-final { scan-tree-dump "optimized: loop vectorized" "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/pr120687-2.c b/gcc/testsuite/gcc.dg/vect/pr120687-2.c new file mode 100644 index 0000000..dfc6dc7 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/pr120687-2.c @@ -0,0 +1,17 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target vect_float } */ +/* { dg-additional-options "-ffast-math" } */ + +float +frd (float *p, float *lastone) +{ + float sum = 0; + for (; p <= lastone; p += 16) + sum += p[0] + p[1] + p[2] + p[3] + p[4] + p[5] + p[6] + p[7] + + p[8] + p[9] + p[10] + p[11] + p[12] + p[13] + p[14] + p[15]; + return sum; +} + +/* { dg-final { scan-tree-dump "reduction: detected reduction chain" "vect" } } */ +/* { dg-final { scan-tree-dump-not "SLP discovery of reduction chain failed" "vect" } } */ +/* { dg-final { scan-tree-dump "optimized: loop vectorized" "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/pr120687-3.c b/gcc/testsuite/gcc.dg/vect/pr120687-3.c new file mode 100644 index 0000000..f20a66a --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/pr120687-3.c @@ -0,0 +1,16 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target vect_double } */ +/* { dg-additional-options "-ffast-math" } */ + +float +frd (float *p, float *lastone) +{ + float sum = 0; + for (; p <= lastone; p += 2) + sum += p[0] + p[1]; + return sum; +} + +/* { dg-final { scan-tree-dump "reduction: detected reduction chain" "vect" } } */ +/* { dg-final { scan-tree-dump-not "SLP discovery of reduction chain failed" "vect" } } */ +/* { dg-final { scan-tree-dump "optimized: loop vectorized" "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-early-break_137-pr121190.c b/gcc/testsuite/gcc.dg/vect/vect-early-break_137-pr121190.c new file mode 100644 index 0000000..e6b071c --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-early-break_137-pr121190.c @@ -0,0 +1,62 @@ +/* PR tree-optimization/121190 */ +/* { dg-options "-O3" } */ +/* { dg-additional-options "-march=znver2" { target x86_64-*-* i?86-*-* } } */ +/* { dg-require-effective-target mmap } */ +/* { dg-require-effective-target vect_early_break } */ + +#include <stdint.h> +#include <string.h> +#include <stdio.h> +#include <sys/mman.h> +#include <unistd.h> +#include "tree-vect.h" + +#define MAX_COMPARE 5000 + +__attribute__((noipa)) +int diff (uint64_t *restrict p, uint64_t *restrict q) +{ + int i = 0; + while (i < MAX_COMPARE) { + if (*(p + i) != *(q + i)) + return i; + i++; + } + return -1; +} + +int main () +{ + check_vect (); + + long pgsz = sysconf (_SC_PAGESIZE); + if (pgsz == -1) { + fprintf (stderr, "sysconf failed\n"); + return 0; + } + + /* Allocate 2 consecutive pages of memory and let p1 and p2 point to the + beginning of each. */ + void *mem = mmap (NULL, pgsz * 2, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + if (mem == MAP_FAILED) { + fprintf (stderr, "mmap failed\n"); + return 0; + } + uint64_t *p1 = (uint64_t *) mem; + uint64_t *p2 = (uint64_t *) mem + pgsz / sizeof (uint64_t); + + /* Fill the first page with zeros, except for its last 64 bits. */ + memset (p1, 0, pgsz); + *(p2 - 1) = -1; + + /* Make the 2nd page not accessable. */ + mprotect (p2, pgsz, PROT_NONE); + + /* Calls to diff should not read the 2nd page. */ + for (int i = 1; i <= 20; i++) { + if (diff (p2 - i, p1) != i - 1) + __builtin_abort (); + } +} + diff --git a/gcc/testsuite/gcc.dg/vect/vect-early-break_138-pr121020.c b/gcc/testsuite/gcc.dg/vect/vect-early-break_138-pr121020.c new file mode 100644 index 0000000..8cb62bf --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-early-break_138-pr121020.c @@ -0,0 +1,54 @@ +/* PR tree-optimization/121020 */ +/* { dg-options "-O3 --vect-cost-model=unlimited" } */ +/* { dg-additional-options "-march=znver2" { target x86_64-*-* i?86-*-* } } */ +/* { dg-require-effective-target mmap } */ +/* { dg-require-effective-target vect_early_break } */ + +#include <stdint.h> +#include <stdio.h> +#include <sys/mman.h> +#include <unistd.h> +#include "tree-vect.h" + +__attribute__((noipa)) +bool equal (uint64_t *restrict p, uint64_t *restrict q, int length) +{ + for (int i = 0; i < length; i++) { + if (*(p + i) != *(q + i)) + return false; + } + return true; +} + +int main () +{ + check_vect (); + + long pgsz = sysconf (_SC_PAGESIZE); + if (pgsz == -1) { + fprintf (stderr, "sysconf failed\n"); + return 0; + } + + /* Allocate a whole page of memory. */ + void *mem = mmap (NULL, pgsz, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + if (mem == MAP_FAILED) { + fprintf (stderr, "mmap failed\n"); + return 0; + } + uint64_t *p1 = (uint64_t *) mem; + uint64_t *p2 = (uint64_t *) mem + 32; + + /* The first 16 elements pointed to by p1 and p2 are the same. */ + for (int i = 0; i < 32; i++) { + *(p1 + i) = 0; + *(p2 + i) = (i < 16 ? 0 : -1); + } + + /* All calls to equal should return true. */ + for (int len = 0; len < 16; len++) { + if (!equal (p1 + 1, p2 + 1, len)) + __builtin_abort(); + } +} diff --git a/gcc/testsuite/gcc.dg/vect/vect-early-break_52.c b/gcc/testsuite/gcc.dg/vect/vect-early-break_52.c index 86a632f..6abfcd6 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-early-break_52.c +++ b/gcc/testsuite/gcc.dg/vect/vect-early-break_52.c @@ -18,4 +18,4 @@ int main1 (short X) } } -/* { dg-final { scan-tree-dump "vectorized 1 loops in function" "vect" { target { ! "x86_64-*-* i?86-*-*" } } } } */ +/* { dg-final { scan-tree-dump "vectorized 1 loops in function" "vect" { target { ! "x86_64-*-* i?86-*-* arm*-*-*" } } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-simd-pr121130.c b/gcc/testsuite/gcc.dg/vect/vect-simd-pr121130.c new file mode 100644 index 0000000..c882ded --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-simd-pr121130.c @@ -0,0 +1,11 @@ +/* { dg-do compile } */ + +int n2; + +__attribute__((simd)) char +w7(void) +{ + short int xb = n2; + xb = w7() < 1; + return xb; +} diff --git a/gcc/testsuite/gcc.target/aarch64/cmpbr.c b/gcc/testsuite/gcc.target/aarch64/cmpbr.c index a86af9d..34630f9 100644 --- a/gcc/testsuite/gcc.target/aarch64/cmpbr.c +++ b/gcc/testsuite/gcc.target/aarch64/cmpbr.c @@ -121,7 +121,7 @@ FAR_BRANCH(u64, 42); /* ** u8_x0_eq_x1: -** cbbeq w1, w0, .L([0-9]+) +** cbbeq (?:w1, w0|w0, w1), .L([0-9]+) ** b not_taken ** .L\1: ** b taken @@ -129,7 +129,7 @@ FAR_BRANCH(u64, 42); /* ** u8_x0_ne_x1: -** cbbne w1, w0, .L([0-9]+) +** cbbne (?:w1, w0|w0, w1), .L([0-9]+) ** b not_taken ** .L\1: ** b taken @@ -137,7 +137,7 @@ FAR_BRANCH(u64, 42); /* ** u8_x0_ult_x1: -** cbbhi w1, w0, .L([0-9]+) +** (?:cbbhi w1, w0|cbblo w0, w1), .L([0-9]+) ** b not_taken ** .L\1: ** b taken @@ -145,7 +145,7 @@ FAR_BRANCH(u64, 42); /* ** u8_x0_ule_x1: -** cbbhs w1, w0, .L([0-9]+) +** (?:cbbhs w1, w0|cbbls w0, w1), .L([0-9]+) ** b not_taken ** .L\1: ** b taken @@ -153,7 +153,7 @@ FAR_BRANCH(u64, 42); /* ** u8_x0_ugt_x1: -** cbblo w1, w0, .L([0-9]+) +** (?:cbblo w1, w0|cbbhi w0, w1), .L([0-9]+) ** b not_taken ** .L\1: ** b taken @@ -161,7 +161,7 @@ FAR_BRANCH(u64, 42); /* ** u8_x0_uge_x1: -** cbbls w1, w0, .L([0-9]+) +** (?:cbbls w1, w0|cbbhs w0, w1), .L([0-9]+) ** b not_taken ** .L\1: ** b taken @@ -169,7 +169,7 @@ FAR_BRANCH(u64, 42); /* ** i8_x0_slt_x1: -** cbbgt w1, w0, .L([0-9]+) +** (?:cbbgt w1, w0|cbblt w0, w1), .L([0-9]+) ** b not_taken ** .L\1: ** b taken @@ -177,7 +177,7 @@ FAR_BRANCH(u64, 42); /* ** i8_x0_sle_x1: -** cbbge w1, w0, .L([0-9]+) +** (?:cbbge w1, w0|cbble w0, w1), .L([0-9]+) ** b not_taken ** .L\1: ** b taken @@ -185,7 +185,7 @@ FAR_BRANCH(u64, 42); /* ** i8_x0_sgt_x1: -** cbblt w1, w0, .L([0-9]+) +** (?:cbblt w1, w0|cbbgt w0, w1), .L([0-9]+) ** b not_taken ** .L\1: ** b taken @@ -193,7 +193,7 @@ FAR_BRANCH(u64, 42); /* ** i8_x0_sge_x1: -** cbble w1, w0, .L([0-9]+) +** (?:cbble w1, w0|cbbge w0, w1), .L([0-9]+) ** b not_taken ** .L\1: ** b taken @@ -201,7 +201,7 @@ FAR_BRANCH(u64, 42); /* ** u16_x0_eq_x1: -** cbheq w1, w0, .L([0-9]+) +** cbheq (?:w1, w0|w0, w1), .L([0-9]+) ** b not_taken ** .L\1: ** b taken @@ -209,7 +209,7 @@ FAR_BRANCH(u64, 42); /* ** u16_x0_ne_x1: -** cbhne w0|w1, w1|w0, .L([0-9]+) +** cbhne (?:w1, w0|w0, w1), .L([0-9]+) ** b not_taken ** .L\1: ** b taken @@ -217,7 +217,7 @@ FAR_BRANCH(u64, 42); /* ** u16_x0_ult_x1: -** cbhhi w1, w0, .L([0-9]+) +** (?:cbhhi w1, w0|cbhlo w0, w1), .L([0-9]+) ** b not_taken ** .L\1: ** b taken @@ -225,7 +225,7 @@ FAR_BRANCH(u64, 42); /* ** u16_x0_ule_x1: -** cbhhs w1, w0, .L([0-9]+) +** (?:cbhhs w1, w0|cbhls w0, w1), .L([0-9]+) ** b not_taken ** .L\1: ** b taken @@ -233,7 +233,7 @@ FAR_BRANCH(u64, 42); /* ** u16_x0_ugt_x1: -** cbhlo w1, w0, .L([0-9]+) +** (?:cbhlo w1, w0|cbhhi w0, w1), .L([0-9]+) ** b not_taken ** .L\1: ** b taken @@ -241,7 +241,7 @@ FAR_BRANCH(u64, 42); /* ** u16_x0_uge_x1: -** cbhls w1, w0, .L([0-9]+) +** (?:cbhls w1, w0|cbhhs w0, w1), .L([0-9]+) ** b not_taken ** .L\1: ** b taken @@ -249,7 +249,7 @@ FAR_BRANCH(u64, 42); /* ** i16_x0_slt_x1: -** cbhgt w1, w0, .L([0-9]+) +** (?:cbhgt w1, w0|cbhlt w0, w1), .L([0-9]+) ** b not_taken ** .L\1: ** b taken @@ -257,7 +257,7 @@ FAR_BRANCH(u64, 42); /* ** i16_x0_sle_x1: -** cbhge w1, w0, .L([0-9]+) +** (?:cbhge w1, w0|cbhle w0, w1), .L([0-9]+) ** b not_taken ** .L\1: ** b taken @@ -265,7 +265,7 @@ FAR_BRANCH(u64, 42); /* ** i16_x0_sgt_x1: -** cbhlt w1, w0, .L([0-9]+) +** (?:cbhlt w1, w0|cbhgt w0, w1), .L([0-9]+) ** b not_taken ** .L\1: ** b taken @@ -273,7 +273,7 @@ FAR_BRANCH(u64, 42); /* ** i16_x0_sge_x1: -** cbhle w1, w0, .L([0-9]+) +** (?:cbhle w1, w0|cbhge w0, w1), .L([0-9]+) ** b not_taken ** .L\1: ** b taken diff --git a/gcc/testsuite/gcc.target/aarch64/ifunc-resolver-0.c b/gcc/testsuite/gcc.target/aarch64/ifunc-resolver-0.c new file mode 100644 index 0000000..e544b04f --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/ifunc-resolver-0.c @@ -0,0 +1,12 @@ +/* { dg-do run } */ +/* { dg-require-ifunc "" } */ +/* { dg-require-effective-target mmap } */ +/* { dg-options "-Wno-experimental-fmv-target" } */ + +#include <stdint.h> + +typedef struct { + uint64_t size; +} ifunc_arg_t; + +#include "ifunc-resolver.in" diff --git a/gcc/testsuite/gcc.target/aarch64/ifunc-resolver-1.c b/gcc/testsuite/gcc.target/aarch64/ifunc-resolver-1.c new file mode 100644 index 0000000..be70687 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/ifunc-resolver-1.c @@ -0,0 +1,13 @@ +/* { dg-do run } */ +/* { dg-require-ifunc "" } */ +/* { dg-require-effective-target mmap } */ +/* { dg-options "-Wno-experimental-fmv-target" } */ + +#include <stdint.h> + +typedef struct { + uint64_t size; + uint64_t hwcap; +} ifunc_arg_t; + +#include "ifunc-resolver.in" diff --git a/gcc/testsuite/gcc.target/aarch64/ifunc-resolver-2.c b/gcc/testsuite/gcc.target/aarch64/ifunc-resolver-2.c new file mode 100644 index 0000000..bf594d0 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/ifunc-resolver-2.c @@ -0,0 +1,14 @@ +/* { dg-do run } */ +/* { dg-require-ifunc "" } */ +/* { dg-require-effective-target mmap } */ +/* { dg-options "-Wno-experimental-fmv-target" } */ + +#include <stdint.h> + +typedef struct { + uint64_t size; + uint64_t hwcap; + uint64_t hwcap2; +} ifunc_arg_t; + +#include "ifunc-resolver.in" diff --git a/gcc/testsuite/gcc.target/aarch64/ifunc-resolver-3.c b/gcc/testsuite/gcc.target/aarch64/ifunc-resolver-3.c new file mode 100644 index 0000000..f16d01b --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/ifunc-resolver-3.c @@ -0,0 +1,15 @@ +/* { dg-do run } */ +/* { dg-require-ifunc "" } */ +/* { dg-require-effective-target mmap } */ +/* { dg-options "-Wno-experimental-fmv-target" } */ + +#include <stdint.h> + +typedef struct { + uint64_t size; + uint64_t hwcap; + uint64_t hwcap2; + uint64_t hwcap3; +} ifunc_arg_t; + +#include "ifunc-resolver.in" diff --git a/gcc/testsuite/gcc.target/aarch64/ifunc-resolver-4.c b/gcc/testsuite/gcc.target/aarch64/ifunc-resolver-4.c new file mode 100644 index 0000000..1b4ccbd --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/ifunc-resolver-4.c @@ -0,0 +1,16 @@ +/* { dg-do run } */ +/* { dg-require-ifunc "" } */ +/* { dg-require-effective-target mmap } */ +/* { dg-options "-Wno-experimental-fmv-target" } */ + +#include <stdint.h> + +typedef struct { + uint64_t size; + uint64_t hwcap; + uint64_t hwcap2; + uint64_t hwcap3; + uint64_t hwcap4; +} ifunc_arg_t; + +#include "ifunc-resolver.in" diff --git a/gcc/testsuite/gcc.target/aarch64/ifunc-resolver.in b/gcc/testsuite/gcc.target/aarch64/ifunc-resolver.in new file mode 100644 index 0000000..ada0b33 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/ifunc-resolver.in @@ -0,0 +1,48 @@ +#include <unistd.h> +#include <string.h> +#include <sys/mman.h> + +/* Allocate memory buffer of size LEN with a protected page + following right after the buffer end so that any memory + accesses past the end of the buffer would trigger SEGFAUL. */ +void *allocate_mem (size_t len) +{ + size_t pagesize = sysconf (_SC_PAGESIZE); + char *m = mmap (NULL, pagesize * 2, + PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, + -1, 0); + mprotect (m + pagesize, pagesize, PROT_NONE); + m = m + pagesize - len; + memset(m, 0, len); + return m; +} + +int impl () +{ + return 0; +} + +#ifndef _IFUNC_ARG_HWCAP +#define _IFUNC_ARG_HWCAP (1ULL << 62) +#endif + +void +__init_cpu_features_resolver (unsigned long hwcap, const void *arg); + +static void * +fun_resolver (uint64_t a0, const uint64_t *a1) +{ + ifunc_arg_t *arg = allocate_mem (sizeof (ifunc_arg_t)); + arg->size = sizeof (ifunc_arg_t); + /* Call this function with synthetic ifunc_arg_t arg. */ + __init_cpu_features_resolver (_IFUNC_ARG_HWCAP, arg); + return (void *)(uintptr_t)impl; +} + +int fun (void) __attribute__ ((ifunc ("fun_resolver"))); + +int main (int argc, char *argv[]) +{ + return fun (); +} diff --git a/gcc/testsuite/gcc.target/aarch64/pr121300.c b/gcc/testsuite/gcc.target/aarch64/pr121300.c new file mode 100644 index 0000000..5f2cd9a --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/pr121300.c @@ -0,0 +1,9 @@ +/* { dg-do compile } */ +/* { dg-options "-S -O3 -march=armv9-a+sme2" } */ + +#include <arm_sme.h> + +svfloat16x2_t test (svfloat16x2_t zd, svfloat16x2_t zm) __arm_streaming +{ + return svamin_f16_x2 (zd, zm); // { dg-error "ACLE function .svamin_f16_x2. requires ISA extension .faminmax." } +} diff --git a/gcc/testsuite/gcc.target/aarch64/saturating_arithmetic_1.c b/gcc/testsuite/gcc.target/aarch64/saturating_arithmetic_1.c index acd2e11..8fc1569 100644 --- a/gcc/testsuite/gcc.target/aarch64/saturating_arithmetic_1.c +++ b/gcc/testsuite/gcc.target/aarch64/saturating_arithmetic_1.c @@ -4,24 +4,24 @@ /* ** uadd: -** dup v([0-9]+).8b, w1 -** dup v([0-9]+).8b, w0 +** dup v([0-9]+).8b, w[01] +** dup v([0-9]+).8b, w[01] ** uqadd b([0-9]+), (?:b\2, b\1|b\1, b\2) ** umov w0, v\3.b\[0\] ** ret */ /* ** uadd2: -** dup v([0-9]+).8b, w1 -** dup v([0-9]+).8b, w0 +** dup v([0-9]+).8b, w[01] +** dup v([0-9]+).8b, w[01] ** uqadd b([0-9]+), (?:b\2, b\1|b\1, b\2) ** umov w0, v\3.b\[0\] ** ret */ /* ** usub: { xfail *-*-* } -** dup v([0-9]+).8b, w1 -** dup v([0-9]+).8b, w0 +** dup v([0-9]+).8b, w[01] +** dup v([0-9]+).8b, w[01] ** uqsub b([0-9]+), b\1, b\2 ** umov w0, v\3.b\[0\] ** ret diff --git a/gcc/testsuite/gcc.target/aarch64/saturating_arithmetic_2.c b/gcc/testsuite/gcc.target/aarch64/saturating_arithmetic_2.c index 86c88f8..dd0fefa 100644 --- a/gcc/testsuite/gcc.target/aarch64/saturating_arithmetic_2.c +++ b/gcc/testsuite/gcc.target/aarch64/saturating_arithmetic_2.c @@ -4,16 +4,16 @@ /* ** uadd: -** dup v([0-9]+).4h, w1 -** dup v([0-9]+).4h, w0 +** dup v([0-9]+).4h, w[01] +** dup v([0-9]+).4h, w[01] ** uqadd h([0-9]+), (?:h\2, h\1|h\1, h\2) ** umov w0, v\3.h\[0\] ** ret */ /* ** uadd2: -** dup v([0-9]+).4h, w1 -** dup v([0-9]+).4h, w0 +** dup v([0-9]+).4h, w[01] +** dup v([0-9]+).4h, w[01] ** uqadd h([0-9]+), (?:h\2, h\1|h\1, h\2) ** umov w0, v\3.h\[0\] ** ret diff --git a/gcc/testsuite/gcc.target/aarch64/sme/call_sm_switch_1.c b/gcc/testsuite/gcc.target/aarch64/sme/call_sm_switch_1.c index 98922aa..3a63da7 100644 --- a/gcc/testsuite/gcc.target/aarch64/sme/call_sm_switch_1.c +++ b/gcc/testsuite/gcc.target/aarch64/sme/call_sm_switch_1.c @@ -1,5 +1,5 @@ // { dg-options "-O -fomit-frame-pointer -fno-optimize-sibling-calls -funwind-tables" } -// { dg-final { check-function-bodies "**" "" } } +// { dg-final { check-function-bodies "**" "" "" { target "*-*-*" } {\t\.inst} } } void ns_callee (); void s_callee () [[arm::streaming]]; @@ -218,7 +218,7 @@ sc_caller_x1 (int *ptr, int a) [[arm::streaming_compatible]] ** bl ns_callee_stack ** ldr x16, \[x29, #?16\] ** tbz x16, 0, .* -** smstart sm +** .inst 0xd503437f // smstart sm ** ... */ void diff --git a/gcc/testsuite/gcc.target/aarch64/sme/call_sm_switch_11.c b/gcc/testsuite/gcc.target/aarch64/sme/call_sm_switch_11.c index ee6f987..c72d03f 100644 --- a/gcc/testsuite/gcc.target/aarch64/sme/call_sm_switch_11.c +++ b/gcc/testsuite/gcc.target/aarch64/sme/call_sm_switch_11.c @@ -1,5 +1,6 @@ // { dg-options "-O -fomit-frame-pointer -fno-optimize-sibling-calls -funwind-tables -mtrack-speculation" } -// { dg-final { check-function-bodies "**" "" } } +// { dg-final { check-function-bodies "**" "" "" { target "*-*-*" } {\t\.inst} } } + void ns_callee (); void s_callee () [[arm::streaming]]; @@ -196,7 +197,7 @@ sc_caller_x1 (int *ptr, int a) [[arm::streaming_compatible]] ** tst x16, #?1 ** beq [^\n]* ** csel x15, x15, xzr, ne -** smstart sm +** .inst 0xd503437f // smstart sm ** ... */ void diff --git a/gcc/testsuite/gcc.target/aarch64/sme/pr121028.c b/gcc/testsuite/gcc.target/aarch64/sme/pr121028.c new file mode 100644 index 0000000..a6aa119 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sme/pr121028.c @@ -0,0 +1,46 @@ +// PR121028 +// { dg-do assemble { target aarch64_asm_sme_ok } } +// { dg-options "-O --save-temps" } +// { dg-final { check-function-bodies "**" "" "" { target "*-*-*" } {\t\.inst} } } + +void ns_callee (); + +/* +** sc_caller_sme: +** ... +** mrs x16, svcr +** str x16, \[x29, #?16\] +** ldr x16, \[x29, #?16\] +** tbz x16, 0, .* +** smstop sm +** bl ns_callee +** ldr x16, \[x29, #?16\] +** tbz x16, 0, .* +** smstart sm +** ... +*/ +void sc_caller_sme() __arm_streaming_compatible +{ + ns_callee (); +} + +#pragma GCC target "+nosme" + +/* +** sc_caller_nosme: +** ... +** bl __arm_sme_state +** str x0, \[x29, #?16\] +** ldr x16, \[x29, #?16\] +** tbz x16, 0, .* +** .inst 0xd503427f // smstop sm +** bl ns_callee +** ldr x16, \[x29, #?16\] +** tbz x16, 0, .* +** .inst 0xd503437f // smstart sm +** ... +*/ +void sc_caller_nosme() __arm_streaming_compatible +{ + ns_callee (); +} diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/amax_f16_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/amax_f16_x2.c index 90b5438..b9fd96a 100644 --- a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/amax_f16_x2.c +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/amax_f16_x2.c @@ -1,3 +1,5 @@ +/* { dg-do assemble { target { aarch64_asm_sme2_ok && aarch64_asm_faminmax_ok } } } */ +/* { dg-do compile { target { ! { aarch64_asm_sme2_ok && aarch64_asm_faminmax_ok } } } } */ /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ #include "test_sme2_acle.h" diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/amax_f16_x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/amax_f16_x4.c index d168ad7..70e2697 100644 --- a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/amax_f16_x4.c +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/amax_f16_x4.c @@ -1,3 +1,5 @@ +/* { dg-do assemble { target { aarch64_asm_sme2_ok && aarch64_asm_faminmax_ok } } } */ +/* { dg-do compile { target { ! { aarch64_asm_sme2_ok && aarch64_asm_faminmax_ok } } } } */ /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ #include "test_sme2_acle.h" diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/amax_f32_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/amax_f32_x2.c index 618d50b9..cf57d1b 100644 --- a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/amax_f32_x2.c +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/amax_f32_x2.c @@ -1,3 +1,5 @@ +/* { dg-do assemble { target { aarch64_asm_sme2_ok && aarch64_asm_faminmax_ok } } } */ +/* { dg-do compile { target { ! { aarch64_asm_sme2_ok && aarch64_asm_faminmax_ok } } } } */ /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ #include "test_sme2_acle.h" diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/amax_f32_x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/amax_f32_x4.c index 981e78c..10d9175 100644 --- a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/amax_f32_x4.c +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/amax_f32_x4.c @@ -1,3 +1,5 @@ +/* { dg-do assemble { target { aarch64_asm_sme2_ok && aarch64_asm_faminmax_ok } } } */ +/* { dg-do compile { target { ! { aarch64_asm_sme2_ok && aarch64_asm_faminmax_ok } } } } */ /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ #include "test_sme2_acle.h" diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/amax_f64_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/amax_f64_x2.c index e93a409..b7918ab 100644 --- a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/amax_f64_x2.c +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/amax_f64_x2.c @@ -1,3 +1,5 @@ +/* { dg-do assemble { target { aarch64_asm_sme2_ok && aarch64_asm_faminmax_ok } } } */ +/* { dg-do compile { target { ! { aarch64_asm_sme2_ok && aarch64_asm_faminmax_ok } } } } */ /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ #include "test_sme2_acle.h" diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/amax_f64_x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/amax_f64_x4.c index 2db629e..153a37a 100644 --- a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/amax_f64_x4.c +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/amax_f64_x4.c @@ -1,3 +1,5 @@ +/* { dg-do assemble { target { aarch64_asm_sme2_ok && aarch64_asm_faminmax_ok } } } */ +/* { dg-do compile { target { ! { aarch64_asm_sme2_ok && aarch64_asm_faminmax_ok } } } } */ /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ #include "test_sme2_acle.h" diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/amin_f16_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/amin_f16_x2.c index 74604e1..bd6e13b 100644 --- a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/amin_f16_x2.c +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/amin_f16_x2.c @@ -1,3 +1,5 @@ +/* { dg-do assemble { target { aarch64_asm_sme2_ok && aarch64_asm_faminmax_ok } } } */ +/* { dg-do compile { target { ! { aarch64_asm_sme2_ok && aarch64_asm_faminmax_ok } } } } */ /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ #include "test_sme2_acle.h" diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/amin_f16_x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/amin_f16_x4.c index bc3779b..9f71b1f 100644 --- a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/amin_f16_x4.c +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/amin_f16_x4.c @@ -1,3 +1,5 @@ +/* { dg-do assemble { target { aarch64_asm_sme2_ok && aarch64_asm_faminmax_ok } } } */ +/* { dg-do compile { target { ! { aarch64_asm_sme2_ok && aarch64_asm_faminmax_ok } } } } */ /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ #include "test_sme2_acle.h" diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/amin_f32_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/amin_f32_x2.c index 43e3075..aaa6a2e 100644 --- a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/amin_f32_x2.c +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/amin_f32_x2.c @@ -1,3 +1,5 @@ +/* { dg-do assemble { target { aarch64_asm_sme2_ok && aarch64_asm_faminmax_ok } } } */ +/* { dg-do compile { target { ! { aarch64_asm_sme2_ok && aarch64_asm_faminmax_ok } } } } */ /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ #include "test_sme2_acle.h" diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/amin_f32_x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/amin_f32_x4.c index 6bd20f8f..34c1098 100644 --- a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/amin_f32_x4.c +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/amin_f32_x4.c @@ -1,3 +1,5 @@ +/* { dg-do assemble { target { aarch64_asm_sme2_ok && aarch64_asm_faminmax_ok } } } */ +/* { dg-do compile { target { ! { aarch64_asm_sme2_ok && aarch64_asm_faminmax_ok } } } } */ /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ #include "test_sme2_acle.h" diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/amin_f64_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/amin_f64_x2.c index 3bbef3f..e4138e0 100644 --- a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/amin_f64_x2.c +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/amin_f64_x2.c @@ -1,3 +1,5 @@ +/* { dg-do assemble { target { aarch64_asm_sme2_ok && aarch64_asm_faminmax_ok } } } */ +/* { dg-do compile { target { ! { aarch64_asm_sme2_ok && aarch64_asm_faminmax_ok } } } } */ /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ #include "test_sme2_acle.h" diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/amin_f64_x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/amin_f64_x4.c index 6f4c9b7..8fbabe7 100644 --- a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/amin_f64_x4.c +++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/amin_f64_x4.c @@ -1,3 +1,5 @@ +/* { dg-do assemble { target { aarch64_asm_sme2_ok && aarch64_asm_faminmax_ok } } } */ +/* { dg-do compile { target { ! { aarch64_asm_sme2_ok && aarch64_asm_faminmax_ok } } } } */ /* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ #include "test_sme2_acle.h" diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_builtin_fmax_2.c b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_builtin_fmax_2.c new file mode 100644 index 0000000..f84ded5 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_builtin_fmax_2.c @@ -0,0 +1,24 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize -moverride=sve_width=2048" } */ + +#include "unpacked_cond_builtin_fmax_1.c" + +/* { dg-final { scan-assembler-times {\tptrue\tp[0-9]+\.s} 7 } } */ +/* { dg-final { scan-assembler-times {\tptrue\tp[0-9]+\.d} 14 } } */ +/* { dg-final { scan-assembler-times {\tand} 21 } } */ + +/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.d} 13 } } */ +/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.s} 13 } } */ +/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.d} 13 } } */ + +/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s\n} 3 } } */ +/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 3 } } */ +/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #0.0\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #1.0\n} 2 } } */ + +/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h\n} 6 } } */ +/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 6 } } */ +/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #0.0\n} 4 } } */ +/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #1.0\n} 4 } } */ + +/* { dg-final { scan-assembler-not {\tsel\t} } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_builtin_fmin_2.c b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_builtin_fmin_2.c new file mode 100644 index 0000000..bceddf9 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_builtin_fmin_2.c @@ -0,0 +1,24 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize -moverride=sve_width=2048" } */ + +#include "unpacked_cond_builtin_fmin_1.c" + +/* { dg-final { scan-assembler-times {\tptrue\tp[0-9]+\.s} 7 } } */ +/* { dg-final { scan-assembler-times {\tptrue\tp[0-9]+\.d} 14 } } */ +/* { dg-final { scan-assembler-times {\tand} 21 } } */ + +/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.d} 13 } } */ +/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.s} 13 } } */ +/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.d} 13 } } */ + +/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s\n} 3 } } */ +/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 3 } } */ +/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #0.0\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #1.0\n} 2 } } */ + +/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h\n} 6 } } */ +/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 6 } } */ +/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #0.0\n} 4 } } */ +/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #1.0\n} 4 } } */ + +/* { dg-final { scan-assembler-not {\tsel\t} } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fadd_2.c b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fadd_2.c new file mode 100644 index 0000000..e59864b --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fadd_2.c @@ -0,0 +1,28 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize -moverride=sve_width=2048" } */ + +#include "unpacked_cond_fadd_1.c" + +/* { dg-final { scan-assembler-times {\tptrue\tp[0-9]+\.s} 11 } } */ +/* { dg-final { scan-assembler-times {\tptrue\tp[0-9]+\.d} 22 } } */ +/* { dg-final { scan-assembler-times {\tand} 33 } } */ + +/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.d} 19 } } */ +/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.s} 19 } } */ +/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.d} 19 } } */ + +/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s\n} 5 } } */ +/* { dg-final { scan-assembler-times {\tfadd\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 3 } } */ +/* { dg-final { scan-assembler-times {\tfadd\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #0.5\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tfadd\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #1.0\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tfsub\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #0.5\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tfsub\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #1.0\n} 2 } } */ + +/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h\n} 10 } } */ +/* { dg-final { scan-assembler-times {\tfadd\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 6 } } */ +/* { dg-final { scan-assembler-times {\tfadd\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #0.5\n} 4 } } */ +/* { dg-final { scan-assembler-times {\tfadd\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #1.0\n} 4 } } */ +/* { dg-final { scan-assembler-times {\tfsub\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #0.5\n} 4 } } */ +/* { dg-final { scan-assembler-times {\tfsub\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #1.0\n} 4 } } */ + +/* { dg-final { scan-assembler-not {\tsel\t} } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fdiv_2.c b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fdiv_2.c new file mode 100644 index 0000000..1ca3dbf --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fdiv_2.c @@ -0,0 +1,22 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize -moverride=sve_width=2048" } */ + +#include "unpacked_cond_fdiv_1.c" + +/* { dg-final { scan-assembler-times {\tptrue\tp[0-9]+\.s} 3 } } */ +/* { dg-final { scan-assembler-times {\tptrue\tp[0-9]+\.d} 6 } } */ +/* { dg-final { scan-assembler-times {\tand} 9 } } */ + +/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.d} 7 } } */ +/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.s} 7 } } */ +/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.d} 7 } } */ + +/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tfdivr\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tfdiv\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */ + +/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tfdivr\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tfdiv\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 4 } } */ + +/* { dg-final { scan-assembler-not {\tsel\t} } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fmaxnm_2.c b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fmaxnm_2.c new file mode 100644 index 0000000..282f3ed --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fmaxnm_2.c @@ -0,0 +1,24 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize -moverride=sve_width=2048 -fno-signed-zeros -ffinite-math-only" } */ + +#include "unpacked_cond_fmaxnm_1.c" + +/* { dg-final { scan-assembler-times {\tptrue\tp[0-9]+\.s} 7 } } */ +/* { dg-final { scan-assembler-times {\tptrue\tp[0-9]+\.d} 14 } } */ +/* { dg-final { scan-assembler-times {\tand} 21 } } */ + +/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.d} 13 } } */ +/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.s} 13 } } */ +/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.d} 13 } } */ + +/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s\n} 3 } } */ +/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 3 } } */ +/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #0.0\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #1.0\n} 2 } } */ + +/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h\n} 6 } } */ +/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 6 } } */ +/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #0.0\n} 4 } } */ +/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #1.0\n} 4 } } */ + +/* { dg-final { scan-assembler-not {\tsel\t} } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fminnm_2.c b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fminnm_2.c new file mode 100644 index 0000000..8226a6f --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fminnm_2.c @@ -0,0 +1,24 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize -moverride=sve_width=2048 -fno-signed-zeros -ffinite-math-only" } */ + +#include "unpacked_cond_fminnm_1.c" + +/* { dg-final { scan-assembler-times {\tptrue\tp[0-9]+\.s} 7 } } */ +/* { dg-final { scan-assembler-times {\tptrue\tp[0-9]+\.d} 14 } } */ +/* { dg-final { scan-assembler-times {\tand} 21 } } */ + +/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.d} 13 } } */ +/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.s} 13 } } */ +/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.d} 13 } } */ + +/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s\n} 3 } } */ +/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 3 } } */ +/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #0.0\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #1.0\n} 2 } } */ + +/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h\n} 6 } } */ +/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 6 } } */ +/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #0.0\n} 4 } } */ +/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #1.0\n} 4 } } */ + +/* { dg-final { scan-assembler-not {\tsel\t} } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fmla_1.c b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fmla_1.c new file mode 100644 index 0000000..cae9242 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fmla_1.c @@ -0,0 +1,51 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize -moverride=sve_width=2048 -fno-trapping-math" } */ + +#include <stdint.h> + +#define FMLA(SUFF) __builtin_fma##SUFF (a[i], b[i], c[i]) +#define FMLS(SUFF) __builtin_fma##SUFF (a[i], -b[i], c[i]) +#define FNMLA(SUFF) -FMLA (SUFF) +#define FNMLS(SUFF) -FMLS (SUFF) + +#define a_i a[i] +#define b_i b[i] +#define c_i c[i] + +#define TEST_FN(FN, TYPE0, TYPE1, COUNT, MERGE) \ + void \ + f_##TYPE0##_##TYPE1##_##MERGE (TYPE0 *__restrict out, \ + TYPE0 *__restrict a, \ + TYPE0 *__restrict b, \ + TYPE0 *__restrict c, \ + TYPE1 *__restrict p) \ + { \ + for (unsigned int i = 0; i < COUNT; i++) \ + out[i] = p[i] ? FN : MERGE; \ + } + +#define TEST_ALL(FN, TYPE0, TYPE1, COUNT) \ + TEST_FN (FN, TYPE0, TYPE1, COUNT, a_i) \ + TEST_FN (FN, TYPE0, TYPE1, COUNT, b_i) \ + TEST_FN (FN, TYPE0, TYPE1, COUNT, c_i) \ + TEST_FN (FN, TYPE0, TYPE1, COUNT, 0) + +TEST_ALL (FMLA (f16), _Float16, uint64_t, 32) + +TEST_ALL (FMLA (f16), _Float16, uint32_t, 64) + +TEST_ALL (FMLA (f32), float, uint64_t, 32) + +/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.d} 12 } } */ +/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.s} 12 } } */ +/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.d} 12 } } */ + +/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.s, p[0-7]/z, z[0-9]+\.s\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tfmad\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tfmla\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */ + +/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.h, p[0-7]/z, z[0-9]+\.h\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tfmad\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 4 } } */ +/* { dg-final { scan-assembler-times {\tfmla\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 4 } } */ + +/* { dg-final { scan-assembler-not {\tsel\t} } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fmla_2.c b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fmla_2.c new file mode 100644 index 0000000..72e04a4 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fmla_2.c @@ -0,0 +1,22 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize -moverride=sve_width=2048" } */ + +#include "unpacked_cond_fmla_1.c" + +/* { dg-final { scan-assembler-times {\tptrue\tp[0-9]+\.s} 4 } } */ +/* { dg-final { scan-assembler-times {\tptrue\tp[0-9]+\.d} 8 } } */ +/* { dg-final { scan-assembler-times {\tand} 12 } } */ + +/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.d} 12 } } */ +/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.s} 12 } } */ +/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.d} 12 } } */ + +/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.s, p[0-7]/z, z[0-9]+\.s\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tfmad\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tfmla\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */ + +/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.h, p[0-7]/z, z[0-9]+\.h\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tfmad\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 4 } } */ +/* { dg-final { scan-assembler-times {\tfmla\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 4 } } */ + +/* { dg-final { scan-assembler-not {\tsel\t} } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fmls_1.c b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fmls_1.c new file mode 100644 index 0000000..db0f818 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fmls_1.c @@ -0,0 +1,51 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize -moverride=sve_width=2048 -fno-trapping-math" } */ + +#include <stdint.h> + +#define FMLA(SUFF) __builtin_fma##SUFF (a[i], b[i], c[i]) +#define FMLS(SUFF) __builtin_fma##SUFF (a[i], -b[i], c[i]) +#define FNMLA(SUFF) -FMLA (SUFF) +#define FNMLS(SUFF) -FMLS (SUFF) + +#define a_i a[i] +#define b_i b[i] +#define c_i c[i] + +#define TEST_FN(FN, TYPE0, TYPE1, COUNT, MERGE) \ + void \ + f_##TYPE0##_##TYPE1##_##MERGE (TYPE0 *__restrict out, \ + TYPE0 *__restrict a, \ + TYPE0 *__restrict b, \ + TYPE0 *__restrict c, \ + TYPE1 *__restrict p) \ + { \ + for (unsigned int i = 0; i < COUNT; i++) \ + out[i] = p[i] ? FN : MERGE; \ + } + +#define TEST_ALL(FN, TYPE0, TYPE1, COUNT) \ + TEST_FN (FN, TYPE0, TYPE1, COUNT, a_i) \ + TEST_FN (FN, TYPE0, TYPE1, COUNT, b_i) \ + TEST_FN (FN, TYPE0, TYPE1, COUNT, c_i) \ + TEST_FN (FN, TYPE0, TYPE1, COUNT, 0) + +TEST_ALL (FMLS (f16), _Float16, uint64_t, 32) + +TEST_ALL (FMLS (f16), _Float16, uint32_t, 64) + +TEST_ALL (FMLS (f32), float, uint64_t, 32) + +/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.d} 12 } } */ +/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.s} 12 } } */ +/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.d} 12 } } */ + +/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.s, p[0-7]/z, z[0-9]+\.s\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tfmsb\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tfmls\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */ + +/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.h, p[0-7]/z, z[0-9]+\.h\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tfmsb\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 4 } } */ +/* { dg-final { scan-assembler-times {\tfmls\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 4 } } */ + +/* { dg-final { scan-assembler-not {\tsel\t} } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fmls_2.c b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fmls_2.c new file mode 100644 index 0000000..3012052 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fmls_2.c @@ -0,0 +1,22 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize -moverride=sve_width=2048" } */ + +#include "unpacked_cond_fmls_1.c" + +/* { dg-final { scan-assembler-times {\tptrue\tp[0-9]+\.s} 4 } } */ +/* { dg-final { scan-assembler-times {\tptrue\tp[0-9]+\.d} 8 } } */ +/* { dg-final { scan-assembler-times {\tand} 12 } } */ + +/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.d} 12 } } */ +/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.s} 12 } } */ +/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.d} 12 } } */ + +/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.s, p[0-7]/z, z[0-9]+\.s\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tfmsb\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tfmls\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */ + +/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.h, p[0-7]/z, z[0-9]+\.h\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tfmsb\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 4 } } */ +/* { dg-final { scan-assembler-times {\tfmls\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 4 } } */ + +/* { dg-final { scan-assembler-not {\tsel\t} } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fmul_2.c b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fmul_2.c new file mode 100644 index 0000000..21713f5 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fmul_2.c @@ -0,0 +1,22 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize -moverride=sve_width=2048" } */ + +#include "unpacked_cond_fmul_1.c" + +/* { dg-final { scan-assembler-times {\tptrue\tp[0-9]+\.s} 5 } } */ +/* { dg-final { scan-assembler-times {\tptrue\tp[0-9]+\.d} 10 } } */ +/* { dg-final { scan-assembler-times {\tand} 15 } } */ + +/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.d} 10 } } */ +/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.s} 10 } } */ +/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.d} 10 } } */ + +/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tfmul\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 3 } } */ +/* { dg-final { scan-assembler-times {\tfmul\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #0.5\n} 2 } } */ + +/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h\n} 4 } } */ +/* { dg-final { scan-assembler-times {\tfmul\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 6 } } */ +/* { dg-final { scan-assembler-times {\tfmul\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #0.5\n} 4 } } */ + +/* { dg-final { scan-assembler-not {\tsel\t} } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fnmla_1.c b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fnmla_1.c new file mode 100644 index 0000000..07bab63 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fnmla_1.c @@ -0,0 +1,51 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize -moverride=sve_width=2048 -fno-trapping-math" } */ + +#include <stdint.h> + +#define FMLA(SUFF) __builtin_fma##SUFF (a[i], b[i], c[i]) +#define FMLS(SUFF) __builtin_fma##SUFF (a[i], -b[i], c[i]) +#define FNMLA(SUFF) -FMLA (SUFF) +#define FNMLS(SUFF) -FMLS (SUFF) + +#define a_i a[i] +#define b_i b[i] +#define c_i c[i] + +#define TEST_FN(FN, TYPE0, TYPE1, COUNT, MERGE) \ + void \ + f_##TYPE0##_##TYPE1##_##MERGE (TYPE0 *__restrict out, \ + TYPE0 *__restrict a, \ + TYPE0 *__restrict b, \ + TYPE0 *__restrict c, \ + TYPE1 *__restrict p) \ + { \ + for (unsigned int i = 0; i < COUNT; i++) \ + out[i] = p[i] ? FN : MERGE; \ + } + +#define TEST_ALL(FN, TYPE0, TYPE1, COUNT) \ + TEST_FN (FN, TYPE0, TYPE1, COUNT, a_i) \ + TEST_FN (FN, TYPE0, TYPE1, COUNT, b_i) \ + TEST_FN (FN, TYPE0, TYPE1, COUNT, c_i) \ + TEST_FN (FN, TYPE0, TYPE1, COUNT, 0) + +TEST_ALL (FNMLA (f16), _Float16, uint64_t, 32) + +TEST_ALL (FNMLA (f16), _Float16, uint32_t, 64) + +TEST_ALL (FNMLA (f32), float, uint64_t, 32) + +/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.d} 12 } } */ +/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.s} 12 } } */ +/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.d} 12 } } */ + +/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.s, p[0-7]/z, z[0-9]+\.s\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tfnmla\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tfnmad\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */ + +/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.h, p[0-7]/z, z[0-9]+\.h\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tfnmad\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 4 } } */ +/* { dg-final { scan-assembler-times {\tfnmla\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 4 } } */ + +/* { dg-final { scan-assembler-not {\tsel\t} } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fnmla_2.c b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fnmla_2.c new file mode 100644 index 0000000..daef4e49 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fnmla_2.c @@ -0,0 +1,22 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize -moverride=sve_width=2048" } */ + +#include "unpacked_cond_fnmla_1.c" + +/* { dg-final { scan-assembler-times {\tptrue\tp[0-9]+\.s} 4 } } */ +/* { dg-final { scan-assembler-times {\tptrue\tp[0-9]+\.d} 8 } } */ +/* { dg-final { scan-assembler-times {\tand} 12 } } */ + +/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.d} 12 } } */ +/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.s} 12 } } */ +/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.d} 12 } } */ + +/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.s, p[0-7]/z, z[0-9]+\.s\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tfnmad\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tfnmla\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */ + +/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.h, p[0-7]/z, z[0-9]+\.h\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tfnmad\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 4 } } */ +/* { dg-final { scan-assembler-times {\tfnmla\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 4 } } */ + +/* { dg-final { scan-assembler-not {\tsel\t} } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fnmls_1.c b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fnmls_1.c new file mode 100644 index 0000000..5526378 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fnmls_1.c @@ -0,0 +1,51 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize -moverride=sve_width=2048 -fno-trapping-math" } */ + +#include <stdint.h> + +#define FMLA(SUFF) __builtin_fma##SUFF (a[i], b[i], c[i]) +#define FMLS(SUFF) __builtin_fma##SUFF (a[i], -b[i], c[i]) +#define FNMLA(SUFF) -FMLA (SUFF) +#define FNMLS(SUFF) -FMLS (SUFF) + +#define a_i a[i] +#define b_i b[i] +#define c_i c[i] + +#define TEST_FN(FN, TYPE0, TYPE1, COUNT, MERGE) \ + void \ + f_##TYPE0##_##TYPE1##_##MERGE (TYPE0 *__restrict out, \ + TYPE0 *__restrict a, \ + TYPE0 *__restrict b, \ + TYPE0 *__restrict c, \ + TYPE1 *__restrict p) \ + { \ + for (unsigned int i = 0; i < COUNT; i++) \ + out[i] = p[i] ? FN : MERGE; \ + } + +#define TEST_ALL(FN, TYPE0, TYPE1, COUNT) \ + TEST_FN (FN, TYPE0, TYPE1, COUNT, a_i) \ + TEST_FN (FN, TYPE0, TYPE1, COUNT, b_i) \ + TEST_FN (FN, TYPE0, TYPE1, COUNT, c_i) \ + TEST_FN (FN, TYPE0, TYPE1, COUNT, 0) + +TEST_ALL (FNMLS (f16), _Float16, uint64_t, 32) + +TEST_ALL (FNMLS (f16), _Float16, uint32_t, 64) + +TEST_ALL (FNMLS (f32), float, uint64_t, 32) + +/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.d} 12 } } */ +/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.s} 12 } } */ +/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.d} 12 } } */ + +/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.s, p[0-7]/z, z[0-9]+\.s\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tfnmsb\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tfnmls\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */ + +/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.h, p[0-7]/z, z[0-9]+\.h\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tfnmsb\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 4 } } */ +/* { dg-final { scan-assembler-times {\tfnmls\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 4 } } */ + +/* { dg-final { scan-assembler-not {\tsel\t} } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fnmls_2.c b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fnmls_2.c new file mode 100644 index 0000000..8a8f348 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fnmls_2.c @@ -0,0 +1,22 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize -moverride=sve_width=2048" } */ + +#include "unpacked_cond_fnmls_1.c" + +/* { dg-final { scan-assembler-times {\tptrue\tp[0-9]+\.s} 4 } } */ +/* { dg-final { scan-assembler-times {\tptrue\tp[0-9]+\.d} 8 } } */ +/* { dg-final { scan-assembler-times {\tand} 12 } } */ + +/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.d} 12 } } */ +/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.s} 12 } } */ +/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.d} 12 } } */ + +/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.s, p[0-7]/z, z[0-9]+\.s\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tfnmsb\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tfnmls\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */ + +/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.h, p[0-7]/z, z[0-9]+\.h\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tfnmsb\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 4 } } */ +/* { dg-final { scan-assembler-times {\tfnmls\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 4 } } */ + +/* { dg-final { scan-assembler-not {\tsel\t} } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fsubr_2.c b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fsubr_2.c new file mode 100644 index 0000000..cd7a0e1 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_cond_fsubr_2.c @@ -0,0 +1,26 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize -moverride=sve_width=2048" } */ + +#include "unpacked_cond_fsubr_1.c" + +/* { dg-final { scan-assembler-times {\tptrue\tp[0-9]+\.s} 7 } } */ +/* { dg-final { scan-assembler-times {\tptrue\tp[0-9]+\.d} 14 } } */ +/* { dg-final { scan-assembler-times {\tand} 21 } } */ + +/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.d} 13 } } */ +/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.s} 13 } } */ +/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.d} 13 } } */ + +/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s\n} 3 } } */ +/* { dg-final { scan-assembler-times {\tfsub\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tfsubr\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tfsubr\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #0.5\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tfsubr\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #1.0\n} 2 } } */ + +/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h\n} 6 } } */ +/* { dg-final { scan-assembler-times {\tfsub\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 4 } } */ +/* { dg-final { scan-assembler-times {\tfsubr\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tfsubr\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #0.5\n} 4 } } */ +/* { dg-final { scan-assembler-times {\tfsubr\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #1.0\n} 4 } } */ + +/* { dg-final { scan-assembler-not {\tsel\t} } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_fmla_1.c b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_fmla_1.c new file mode 100644 index 0000000..312bccc --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_fmla_1.c @@ -0,0 +1,38 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize -moverride=sve_width=2048" } */ + +#include <stdint.h> + +#define FMLA(SUFF) __builtin_fma##SUFF (a[i], b[i], c[i]) +#define FMLS(SUFF) __builtin_fma##SUFF (a[i], -b[i], c[i]) +#define FNMLA(SUFF) -FMLA (SUFF) +#define FNMLS(SUFF) -FMLS (SUFF) + +#define TEST_FN(FN, TYPE0, TYPE1, COUNT) \ + void \ + f_##TYPE0##_##TYPE1 (TYPE1 *__restrict out, \ + TYPE0 *__restrict a, \ + TYPE0 *__restrict b, \ + TYPE0 *__restrict c, \ + TYPE0 *__restrict d) \ + { \ + for (unsigned int i = 0; i < COUNT; i++) \ + if (FN > d[i]) \ + out[i] = 3; \ + } + +TEST_FN (FMLA (f16), _Float16, uint64_t, 32) + +TEST_FN (FMLA (f16), _Float16, uint32_t, 64) + +TEST_FN (FMLA (f32), float, uint64_t, 32) + +/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.s} 1 } } */ +/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.d} 2 } } */ + +/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.d} 4 } } */ +/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.s} 4 } } */ +/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.d} 4 } } */ + +/* { dg-final { scan-assembler-times {\t(fmla|fmad)\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */ +/* { dg-final { scan-assembler-times {\t(fmla|fmad)\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_fmla_2.c b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_fmla_2.c new file mode 100644 index 0000000..ca3f94d --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_fmla_2.c @@ -0,0 +1,15 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize -moverride=sve_width=2048 -fno-trapping-math" } */ + +#include "unpacked_fmla_1.c" + +/* { dg-final { scan-assembler-not {\tptrue\tp[0-7]\.s} } } */ +/* { dg-final { scan-assembler-not {\tptrue\tp[0-7]\.d} } } */ +/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.b} 3 } } */ + +/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.d} 4 } } */ +/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.s} 4 } } */ +/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.d} 4 } } */ + +/* { dg-final { scan-assembler-times {\t(fmla|fmad)\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */ +/* { dg-final { scan-assembler-times {\t(fmla|fmad)\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_fmls_1.c b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_fmls_1.c new file mode 100644 index 0000000..f7cbfb3 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_fmls_1.c @@ -0,0 +1,38 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize -moverride=sve_width=2048" } */ + +#include <stdint.h> + +#define FMLA(SUFF) __builtin_fma##SUFF (a[i], b[i], c[i]) +#define FMLS(SUFF) __builtin_fma##SUFF (a[i], -b[i], c[i]) +#define FNMLA(SUFF) -FMLA (SUFF) +#define FNMLS(SUFF) -FMLS (SUFF) + +#define TEST_FN(FN, TYPE0, TYPE1, COUNT) \ + void \ + f_##TYPE0##_##TYPE1 (TYPE1 *__restrict out, \ + TYPE0 *__restrict a, \ + TYPE0 *__restrict b, \ + TYPE0 *__restrict c, \ + TYPE0 *__restrict d) \ + { \ + for (unsigned int i = 0; i < COUNT; i++) \ + if (FN > d[i]) \ + out[i] = 3; \ + } + +TEST_FN (FMLS (f16), _Float16, uint64_t, 32) + +TEST_FN (FMLS (f16), _Float16, uint32_t, 64) + +TEST_FN (FMLS (f32), float, uint64_t, 32) + +/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.s} 1 } } */ +/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.d} 2 } } */ + +/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.d} 4 } } */ +/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.s} 4 } } */ +/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.d} 4 } } */ + +/* { dg-final { scan-assembler-times {\t(fmls|fmsb)\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */ +/* { dg-final { scan-assembler-times {\t(fmls|fmsb)\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_fmls_2.c b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_fmls_2.c new file mode 100644 index 0000000..387dbec --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_fmls_2.c @@ -0,0 +1,15 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize -moverride=sve_width=2048 -fno-trapping-math" } */ + +#include "unpacked_fmls_1.c" + +/* { dg-final { scan-assembler-not {\tptrue\tp[0-7]\.s} } } */ +/* { dg-final { scan-assembler-not {\tptrue\tp[0-7]\.d} } } */ +/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.b} 3 } } */ + +/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.d} 4 } } */ +/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.s} 4 } } */ +/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.d} 4 } } */ + +/* { dg-final { scan-assembler-times {\t(fmls|fmsb)\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */ +/* { dg-final { scan-assembler-times {\t(fmls|fmsb)\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_fnmla_1.c b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_fnmla_1.c new file mode 100644 index 0000000..bf13ff5 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_fnmla_1.c @@ -0,0 +1,38 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize -moverride=sve_width=2048" } */ + +#include <stdint.h> + +#define FMLA(SUFF) __builtin_fma##SUFF (a[i], b[i], c[i]) +#define FMLS(SUFF) __builtin_fma##SUFF (a[i], -b[i], c[i]) +#define FNMLA(SUFF) -FMLA (SUFF) +#define FNMLS(SUFF) -FMLS (SUFF) + +#define TEST_FN(FN, TYPE0, TYPE1, COUNT) \ + void \ + f_##TYPE0##_##TYPE1 (TYPE1 *__restrict out, \ + TYPE0 *__restrict a, \ + TYPE0 *__restrict b, \ + TYPE0 *__restrict c, \ + TYPE0 *__restrict d) \ + { \ + for (unsigned int i = 0; i < COUNT; i++) \ + if (FN > d[i]) \ + out[i] = 3; \ + } + +TEST_FN (FNMLA (f16), _Float16, uint64_t, 32) + +TEST_FN (FNMLA (f16), _Float16, uint32_t, 64) + +TEST_FN (FNMLA (f32), float, uint64_t, 32) + +/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.s} 1 } } */ +/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.d} 2 } } */ + +/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.d} 4 } } */ +/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.s} 4 } } */ +/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.d} 4 } } */ + +/* { dg-final { scan-assembler-times {\t(fnmla|fnmad)\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */ +/* { dg-final { scan-assembler-times {\t(fnmla|fnmad)\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_fnmla_2.c b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_fnmla_2.c new file mode 100644 index 0000000..64130ba --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_fnmla_2.c @@ -0,0 +1,15 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize -moverride=sve_width=2048 -fno-trapping-math" } */ + +#include "unpacked_fnmla_1.c" + +/* { dg-final { scan-assembler-not {\tptrue\tp[0-7]\.s} } } */ +/* { dg-final { scan-assembler-not {\tptrue\tp[0-7]\.d} } } */ +/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.b} 3 } } */ + +/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.d} 4 } } */ +/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.s} 4 } } */ +/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.d} 4 } } */ + +/* { dg-final { scan-assembler-times {\t(fnmla|fnmad)\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */ +/* { dg-final { scan-assembler-times {\t(fnmla|fnmad)\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_fnmls_1.c b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_fnmls_1.c new file mode 100644 index 0000000..399920a --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_fnmls_1.c @@ -0,0 +1,38 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize -moverride=sve_width=2048" } */ + +#include <stdint.h> + +#define FMLA(SUFF) __builtin_fma##SUFF (a[i], b[i], c[i]) +#define FMLS(SUFF) __builtin_fma##SUFF (a[i], -b[i], c[i]) +#define FNMLA(SUFF) -FMLA (SUFF) +#define FNMLS(SUFF) -FMLS (SUFF) + +#define TEST_FN(FN, TYPE0, TYPE1, COUNT) \ + void \ + f_##TYPE0##_##TYPE1 (TYPE1 *__restrict out, \ + TYPE0 *__restrict a, \ + TYPE0 *__restrict b, \ + TYPE0 *__restrict c, \ + TYPE0 *__restrict d) \ + { \ + for (unsigned int i = 0; i < COUNT; i++) \ + if (FN > d[i]) \ + out[i] = 3; \ + } + +TEST_FN (FNMLS (f16), _Float16, uint64_t, 32) + +TEST_FN (FNMLS (f16), _Float16, uint32_t, 64) + +TEST_FN (FNMLS (f32), float, uint64_t, 32) + +/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.s} 1 } } */ +/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.d} 2 } } */ + +/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.d} 4 } } */ +/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.s} 4 } } */ +/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.d} 4 } } */ + +/* { dg-final { scan-assembler-times {\t(fnmls|fnmsb)\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */ +/* { dg-final { scan-assembler-times {\t(fnmls|fnmsb)\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_fnmls_2.c b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_fnmls_2.c new file mode 100644 index 0000000..59fb7f9 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_fnmls_2.c @@ -0,0 +1,15 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize -moverride=sve_width=2048 -fno-trapping-math" } */ + +#include "unpacked_fnmls_1.c" + +/* { dg-final { scan-assembler-not {\tptrue\tp[0-7]\.s} } } */ +/* { dg-final { scan-assembler-not {\tptrue\tp[0-7]\.d} } } */ +/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.b} 3 } } */ + +/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.d} 4 } } */ +/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.s} 4 } } */ +/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.d} 4 } } */ + +/* { dg-final { scan-assembler-times {\t(fnmls|fnmsb)\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */ +/* { dg-final { scan-assembler-times {\t(fnmls|fnmsb)\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */ diff --git a/gcc/testsuite/gcc.target/i386/20020224-1.c b/gcc/testsuite/gcc.target/i386/20020224-1.c index 2905719..769332b 100644 --- a/gcc/testsuite/gcc.target/i386/20020224-1.c +++ b/gcc/testsuite/gcc.target/i386/20020224-1.c @@ -4,6 +4,7 @@ while callee was actually not poping it up (as the hidden argument was passed in register). */ /* { dg-do run } */ +/* { dg-require-effective-target ia32 } */ /* { dg-options "-O2 -fomit-frame-pointer" } */ extern void abort (void); diff --git a/gcc/testsuite/gcc.target/i386/apx-1.c b/gcc/testsuite/gcc.target/i386/apx-1.c index 4e580ec..b118928 100644 --- a/gcc/testsuite/gcc.target/i386/apx-1.c +++ b/gcc/testsuite/gcc.target/i386/apx-1.c @@ -3,6 +3,6 @@ /* { dg-error "'-mapxf' is not supported for 32-bit code" "" { target ia32 } 0 } */ void -apx_hanlder () +apx_handler () { } diff --git a/gcc/testsuite/gcc.target/i386/attributes-error.c b/gcc/testsuite/gcc.target/i386/attributes-error.c index 405eda5..5d1c77d 100644 --- a/gcc/testsuite/gcc.target/i386/attributes-error.c +++ b/gcc/testsuite/gcc.target/i386/attributes-error.c @@ -1,12 +1,40 @@ +/* { dg-options "-msse2" } */ /* { dg-do compile } */ /* { dg-require-effective-target ia32 } */ -void foo1(int i, int j) __attribute__((fastcall, cdecl)); /* { dg-error "not compatible" } */ -void foo2(int i, int j) __attribute__((fastcall, stdcall)); /* { dg-error "not compatible" } */ +void foo1(int i, int j) __attribute__((cdecl, regparm(2))); +void foo2(int i, int j) __attribute__((stdcall, regparm(2))); void foo3(int i, int j) __attribute__((fastcall, regparm(2))); /* { dg-error "not compatible" } */ -void foo4(int i, int j) __attribute__((stdcall, cdecl)); /* { dg-error "not compatible" } */ -void foo5(int i, int j) __attribute__((stdcall, fastcall)); /* { dg-error "not compatible" } */ -void foo6(int i, int j) __attribute__((cdecl, fastcall)); /* { dg-error "not compatible" } */ -void foo7(int i, int j) __attribute__((cdecl, stdcall)); /* { dg-error "not compatible" } */ -void foo8(int i, int j) __attribute__((regparm(2), fastcall)); /* { dg-error "not compatible" } */ +void foo4(int i, int j) __attribute__((thiscall, regparm(2))); /* { dg-error "not compatible" } */ +void foo5(int i, int j) __attribute__((sseregparm, regparm(2))); + +void foo6(int i, int j) __attribute__((stdcall, fastcall)); /* { dg-error "not compatible" } */ +void foo7(int i, int j) __attribute__((regparm(2), fastcall)); /* { dg-error "not compatible" } */ +void foo8(int i, int j) __attribute__((cdecl, fastcall)); /* { dg-error "not compatible" } */ +void foo9(int i, int j) __attribute__((thiscall, fastcall)); /* { dg-error "not compatible" } */ +void foo10(int i, int j) __attribute__((sseregparm, fastcall)); + +void foo11(int i, int j) __attribute__((cdecl, stdcall)); /* { dg-error "not compatible" } */ +void foo12(int i, int j) __attribute__((fastcall, stdcall)); /* { dg-error "not compatible" } */ +void foo13(int i, int j) __attribute__((thiscall, stdcall)); /* { dg-error "not compatible" } */ +void foo14(int i, int j) __attribute__((regparm(2), stdcall)); +void foo15(int i, int j) __attribute__((sseregparm, stdcall)); + +void foo16(int i, int j) __attribute__((stdcall, cdecl)); /* { dg-error "not compatible" } */ +void foo17(int i, int j) __attribute__((fastcall, cdecl)); /* { dg-error "not compatible" } */ +void foo18(int i, int j) __attribute__((thiscall, cdecl)); /* { dg-error "not compatible" } */ +void foo19(int i, int j) __attribute__((regparm(2), cdecl)); +void foo20(int i, int j) __attribute__((sseregparm, cdecl)); + +void foo21(int i, int j) __attribute__((stdcall, thiscall)); /* { dg-error "not compatible" } */ +void foo22(int i, int j) __attribute__((fastcall, thiscall)); /* { dg-error "not compatible" } */ +void foo23(int i, int j) __attribute__((cdecl, thiscall)); /* { dg-error "not compatible" } */ +void foo24(int i, int j) __attribute__((regparm(2), thiscall)); /* { dg-error "not compatible" } */ +void foo25(int i, int j) __attribute__((sseregparm, thiscall)); + +void foo26(int i, int j) __attribute__((cdecl, sseregparm)); +void foo27(int i, int j) __attribute__((fastcall, sseregparm)); +void foo28(int i, int j) __attribute__((stdcall, sseregparm)); +void foo29(int i, int j) __attribute__((thiscall, sseregparm)); +void foo30(int i, int j) __attribute__((regparm(2), sseregparm)); diff --git a/gcc/testsuite/gcc.target/i386/attributes-ignore.c b/gcc/testsuite/gcc.target/i386/attributes-ignore.c new file mode 100644 index 0000000..93a3770 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/attributes-ignore.c @@ -0,0 +1,8 @@ +/* { dg-do compile { target { ! ia32 } } } */ + +void foo1(int i, int j) __attribute__((regparm(0))); /* { dg-warning "ignored" } */ +void foo2(int i, int j) __attribute__((stdcall)); /* { dg-warning "ignored" } */ +void foo3(int i, int j) __attribute__((fastcall)); /* { dg-warning "ignored" } */ +void foo4(int i, int j) __attribute__((cdecl)); /* { dg-warning "ignored" } */ +void foo5(int i, int j) __attribute__((thiscall)); /* { dg-warning "ignored" } */ +void foo6(int i, int j) __attribute__((sseregparm)); /* { dg-warning "ignored" } */ diff --git a/gcc/testsuite/gcc.target/i386/pr103785.c b/gcc/testsuite/gcc.target/i386/pr103785.c index 5503b96..49d6c56 100644 --- a/gcc/testsuite/gcc.target/i386/pr103785.c +++ b/gcc/testsuite/gcc.target/i386/pr103785.c @@ -11,7 +11,10 @@ struct wrapper_t struct wrapper_t **table; -__attribute__ ((weak, regparm (2))) +#ifndef __x86_64__ +__attribute__ ((regparm (2))) +#endif +__attribute__ ((weak)) void update (long k, long e) { diff --git a/gcc/testsuite/gcc.target/i386/pr119795.c b/gcc/testsuite/gcc.target/i386/pr119795.c new file mode 100644 index 0000000..03c91cc --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr119795.c @@ -0,0 +1,26 @@ +/* { dg-do run } */ +/* { dg-options "-O -fschedule-insns -favoid-store-forwarding" } */ + +unsigned a, b, c; + +void +foo (_BitInt(2) b2, unsigned _BitInt(255) by, unsigned _BitInt(5) b5, + unsigned _BitInt(256) *ret) +{ + unsigned _BitInt(255) bx = b2; + by += 0x80000000000000000000000000000000wb; + __builtin_memmove (&b, &c, 3); + unsigned d = b; + unsigned e = __builtin_stdc_rotate_right (0x1uwb % b5, a); + unsigned _BitInt(256) r = by + bx + d + e; + *ret = r; +} + +int +main () +{ + unsigned _BitInt(256) x; + foo (0, -1, 2, &x); + if (x != 0x80000000000000000000000000000000wb) + __builtin_abort(); +}
\ No newline at end of file diff --git a/gcc/testsuite/gcc.target/i386/pr120427-5.c b/gcc/testsuite/gcc.target/i386/pr120427-5.c new file mode 100644 index 0000000..7199aef --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr120427-5.c @@ -0,0 +1,10 @@ +/* { dg-do compile { target { ! ia32 } } } */ +/* { dg-options "-Oz" } */ + +long long +func1 (void) +{ + return -1; +} +/* { dg-final { scan-assembler-times "pushq\[ \\t\]+\\\$-1" 1 } } */ +/* { dg-final { scan-assembler-times "popq\[ \\t\]+%rax" 1 } } */ diff --git a/gcc/testsuite/gcc.target/i386/pr121208-1a.c b/gcc/testsuite/gcc.target/i386/pr121208-1a.c new file mode 100644 index 0000000..cb8bd0b --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr121208-1a.c @@ -0,0 +1,15 @@ +/* { dg-do compile { target *-*-linux* } } */ +/* { dg-options "-O2 -fPIC -mno-80387 -mtls-dialect=gnu" } */ + +extern __thread int bar; +extern void func (void); + +__attribute__((no_caller_saved_registers)) +void +foo (int error) +{ + bar = 1; /* { dg-error -mtls-dialect=gnu2 } */ + if (error == 0) + func (); + bar = 0; +} diff --git a/gcc/testsuite/gcc.target/i386/pr121208-1b.c b/gcc/testsuite/gcc.target/i386/pr121208-1b.c new file mode 100644 index 0000000..037e9a0 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr121208-1b.c @@ -0,0 +1,4 @@ +/* { dg-do compile { target *-*-linux* } } */ +/* { dg-options "-O2 -fPIC -mno-80387 -mtls-dialect=gnu2" } */ + +#include "pr121208-1a.c" diff --git a/gcc/testsuite/gcc.target/i386/pr121208-2a.c b/gcc/testsuite/gcc.target/i386/pr121208-2a.c new file mode 100644 index 0000000..c1891ae --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr121208-2a.c @@ -0,0 +1,17 @@ +/* { dg-do compile { target *-*-linux* } } */ +/* { dg-options "-O2 -fPIC -mtls-dialect=gnu" } */ + +typedef unsigned int uword_t __attribute__ ((mode (__word__))); +extern __thread int bar; +extern void func (void); + +__attribute__((target("general-regs-only"))) +__attribute__((interrupt)) +void +foo (void *frame, uword_t error) +{ + bar = 1; /* { dg-error -mtls-dialect=gnu2 } */ + if (error == 0) + func (); + bar = 0; +} diff --git a/gcc/testsuite/gcc.target/i386/pr121208-2b.c b/gcc/testsuite/gcc.target/i386/pr121208-2b.c new file mode 100644 index 0000000..269b120 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr121208-2b.c @@ -0,0 +1,4 @@ +/* { dg-do compile { target *-*-linux* } } */ +/* { dg-options "-O2 -fPIC -mtls-dialect=gnu2" } */ + +#include "pr121208-2a.c" diff --git a/gcc/testsuite/gcc.target/i386/pr121208-3a.c b/gcc/testsuite/gcc.target/i386/pr121208-3a.c new file mode 100644 index 0000000..26fe687 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr121208-3a.c @@ -0,0 +1,17 @@ +/* { dg-do compile { target *-*-linux* } } */ +/* { dg-options "-O2 -fPIC -mtls-dialect=gnu" } */ + +typedef unsigned int uword_t __attribute__ ((mode (__word__))); +extern __thread int bar; +extern void func (void); + +__attribute__((target("general-regs-only"))) +__attribute__((interrupt)) +void +foo (void *frame) +{ + bar = 1; /* { dg-error -mtls-dialect=gnu2 } */ + if (frame == 0) + func (); + bar = 0; +} diff --git a/gcc/testsuite/gcc.target/i386/pr121208-3b.c b/gcc/testsuite/gcc.target/i386/pr121208-3b.c new file mode 100644 index 0000000..b672d75 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr121208-3b.c @@ -0,0 +1,4 @@ +/* { dg-do compile { target *-*-linux* } } */ +/* { dg-options "-O2 -fPIC -mtls-dialect=gnu2" } */ + +#include "pr121208-3a.c" diff --git a/gcc/testsuite/gcc.target/i386/pr121274.c b/gcc/testsuite/gcc.target/i386/pr121274.c new file mode 100644 index 0000000..16760cf --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr121274.c @@ -0,0 +1,24 @@ +/* { dg-do compile { target { ! ia32 } } } */ +/* { dg-options "-march=x86-64-v4 -O2" } */ +/* { dg-final { scan-assembler-not "vpextrq" } } */ +/* { dg-final { scan-assembler-not "vpinsrq" } } */ + +typedef int v16si __attribute__((vector_size(64))); +typedef int v4si __attribute__((vector_size(16))); + +v4si f(v16si x) +{ + return __builtin_shufflevector(x, x, 0, 1, 2, 3); +} + +v4si g(v16si x) +{ +return __builtin_shufflevector(x, x, 4, 5, 6, 7); +} + +v4si f1(__int128 *x) +{ + __int128 t = *x; + asm("":"+x"(t)); + return (v4si)t; +} diff --git a/gcc/testsuite/gcc.target/i386/pr15184-2.c b/gcc/testsuite/gcc.target/i386/pr15184-2.c index cb8201f..dd50c42 100644 --- a/gcc/testsuite/gcc.target/i386/pr15184-2.c +++ b/gcc/testsuite/gcc.target/i386/pr15184-2.c @@ -1,4 +1,4 @@ -/* PR 15184 second two tests +/* PR 15184 second two tests */ /* { dg-do compile { target ia32 } } */ /* { dg-options "-O2 -march=pentiumpro" } */ /* { dg-additional-options "-fno-PIE" { target ia32 } } */ diff --git a/gcc/testsuite/gcc.target/i386/pr36533.c b/gcc/testsuite/gcc.target/i386/pr36533.c index 8d71ece..8699d26 100644 --- a/gcc/testsuite/gcc.target/i386/pr36533.c +++ b/gcc/testsuite/gcc.target/i386/pr36533.c @@ -55,14 +55,22 @@ typedef struct S1 *s18; } S7; -__attribute__((regparm (3), noinline)) int +#ifndef __x86_64__ +__attribute__((regparm (3))) +#endif +__attribute__((noinline)) +int fn1 (const char *x, void *y, S1 *z) { asm volatile ("" : : : "memory"); return *x + (y != 0); } -__attribute__((regparm (3), noinline)) int +#ifndef __x86_64__ +__attribute__((regparm (3))) +#endif +__attribute__((noinline)) +int fn2 (const char *x, int y, S2 *z) { asm volatile ("" : : : "memory"); @@ -84,7 +92,11 @@ fn3 (S3 *p) return (S3 *) ((char *) p + fn4 (p->s9)); } -__attribute__((regparm (3), noinline)) int +#ifndef __x86_64__ +__attribute__((regparm (3))) +#endif +__attribute__((noinline)) +int fn5 (void) { asm volatile ("" : : : "memory"); @@ -116,7 +128,11 @@ fn6 (S3 *w, int x, S2 *y, S4 *z) return a; } -__attribute__((regparm (3), noinline)) unsigned int +#ifndef __x86_64__ +__attribute__((regparm (3))) +#endif +__attribute__((noinline)) +unsigned int test (void *u, S6 *v, S1 **w, S7 *x, S2 *y, S1 *z) { unsigned b = v->s17->s16; diff --git a/gcc/testsuite/gcc.target/i386/pr59099.c b/gcc/testsuite/gcc.target/i386/pr59099.c index cf4a8da..21dfbc2 100644 --- a/gcc/testsuite/gcc.target/i386/pr59099.c +++ b/gcc/testsuite/gcc.target/i386/pr59099.c @@ -13,10 +13,17 @@ struct s }; -void* f (struct s *, struct s *) __attribute__ ((noinline, regparm(1))); +void* f (struct s *, struct s *) +#ifndef __x86_64__ +__attribute__ ((regparm(1))) +#endif +__attribute__ ((noinline)) +; void* +#ifndef __x86_64__ __attribute__ ((regparm(1))) +#endif f (struct s *p, struct s *p2) { void *gp, *gp1; diff --git a/gcc/testsuite/gcc.target/i386/sibcall-8.c b/gcc/testsuite/gcc.target/i386/sibcall-8.c index 3ab3809..29ebfe5 100644 --- a/gcc/testsuite/gcc.target/i386/sibcall-8.c +++ b/gcc/testsuite/gcc.target/i386/sibcall-8.c @@ -1,23 +1,29 @@ /* { dg-do run } */ /* { dg-options "-O2" } */ +#ifndef __x86_64__ +#define REGPARM __attribute__((regparm(1))) +#else +#define REGPARM +#endif + extern void abort (void); -static int __attribute__((regparm(1))) +static int REGPARM bar(void *arg) { return arg != bar; } -static int __attribute__((noinline,noclone,regparm(1))) -foo(int (__attribute__((regparm(1))) **bar)(void*)) +static int __attribute__((noinline,noclone)) REGPARM +foo(int (REGPARM **bar)(void*)) { return (*bar)(*bar); } int main() { - int (__attribute__((regparm(1))) *p)(void*) = bar; + int (REGPARM *p)(void*) = bar; if (foo(&p)) abort(); return 0; diff --git a/gcc/testsuite/gcc.target/i386/sw-1.c b/gcc/testsuite/gcc.target/i386/sw-1.c index 14db3ce..025f0e1 100644 --- a/gcc/testsuite/gcc.target/i386/sw-1.c +++ b/gcc/testsuite/gcc.target/i386/sw-1.c @@ -7,7 +7,10 @@ int c; int x[2000]; -__attribute__((regparm(1))) void foo (int a, int b) +#ifndef __x86_64__ +__attribute__((regparm(1))) +#endif +void foo (int a, int b) { int t[200]; if (a == 0 || c == 0) diff --git a/gcc/testsuite/gcc.target/i386/uintr-2.c b/gcc/testsuite/gcc.target/i386/uintr-2.c index 0a83c66..a0d2514 100644 --- a/gcc/testsuite/gcc.target/i386/uintr-2.c +++ b/gcc/testsuite/gcc.target/i386/uintr-2.c @@ -15,6 +15,6 @@ foo (void *frame, uword_t uirrv) void __attribute__((interrupt)) -UINTR_hanlder (struct __uintr_frame *frame, uword_t uirrv) +UINTR_handler (struct __uintr_frame *frame, uword_t uirrv) { } diff --git a/gcc/testsuite/gcc.target/i386/uintr-5.c b/gcc/testsuite/gcc.target/i386/uintr-5.c index 49cb2ec..7c7c12f 100644 --- a/gcc/testsuite/gcc.target/i386/uintr-5.c +++ b/gcc/testsuite/gcc.target/i386/uintr-5.c @@ -7,6 +7,6 @@ typedef unsigned int uword_t __attribute__ ((mode (__word__))); void -UINTR_hanlder (struct __uintr_frame *frame, uword_t uirrv) +UINTR_handler (struct __uintr_frame *frame, uword_t uirrv) { } diff --git a/gcc/testsuite/gcc.target/nvptx/march-map=sm_100.c b/gcc/testsuite/gcc.target/nvptx/march-map=sm_100.c new file mode 100644 index 0000000..e759a11 --- /dev/null +++ b/gcc/testsuite/gcc.target/nvptx/march-map=sm_100.c @@ -0,0 +1,19 @@ +/* { dg-do assemble } */ +/* { dg-options {-march-map=sm_100 -mptx=_} } */ +/* { dg-additional-options -save-temps } */ +/* { dg-final { scan-assembler-times {(?n)^ \.version 7\.8$} 1 } } */ +/* { dg-final { scan-assembler-times {(?n)^ \.target sm_89$} 1 } } */ + +#if __PTX_ISA_VERSION_MAJOR__ != 7 +#error wrong value for __PTX_ISA_VERSION_MAJOR__ +#endif + +#if __PTX_ISA_VERSION_MINOR__ != 8 +#error wrong value for __PTX_ISA_VERSION_MINOR__ +#endif + +#if __PTX_SM__ != 890 +#error wrong value for __PTX_SM__ +#endif + +int dummy; diff --git a/gcc/testsuite/gcc.target/nvptx/march-map=sm_100a.c b/gcc/testsuite/gcc.target/nvptx/march-map=sm_100a.c new file mode 100644 index 0000000..153ed1e --- /dev/null +++ b/gcc/testsuite/gcc.target/nvptx/march-map=sm_100a.c @@ -0,0 +1,19 @@ +/* { dg-do assemble } */ +/* { dg-options {-march-map=sm_100a -mptx=_} } */ +/* { dg-additional-options -save-temps } */ +/* { dg-final { scan-assembler-times {(?n)^ \.version 7\.8$} 1 } } */ +/* { dg-final { scan-assembler-times {(?n)^ \.target sm_89$} 1 } } */ + +#if __PTX_ISA_VERSION_MAJOR__ != 7 +#error wrong value for __PTX_ISA_VERSION_MAJOR__ +#endif + +#if __PTX_ISA_VERSION_MINOR__ != 8 +#error wrong value for __PTX_ISA_VERSION_MINOR__ +#endif + +#if __PTX_SM__ != 890 +#error wrong value for __PTX_SM__ +#endif + +int dummy; diff --git a/gcc/testsuite/gcc.target/nvptx/march-map=sm_100f.c b/gcc/testsuite/gcc.target/nvptx/march-map=sm_100f.c new file mode 100644 index 0000000..9bb9127 --- /dev/null +++ b/gcc/testsuite/gcc.target/nvptx/march-map=sm_100f.c @@ -0,0 +1,19 @@ +/* { dg-do assemble } */ +/* { dg-options {-march-map=sm_100f -mptx=_} } */ +/* { dg-additional-options -save-temps } */ +/* { dg-final { scan-assembler-times {(?n)^ \.version 7\.8$} 1 } } */ +/* { dg-final { scan-assembler-times {(?n)^ \.target sm_89$} 1 } } */ + +#if __PTX_ISA_VERSION_MAJOR__ != 7 +#error wrong value for __PTX_ISA_VERSION_MAJOR__ +#endif + +#if __PTX_ISA_VERSION_MINOR__ != 8 +#error wrong value for __PTX_ISA_VERSION_MINOR__ +#endif + +#if __PTX_SM__ != 890 +#error wrong value for __PTX_SM__ +#endif + +int dummy; diff --git a/gcc/testsuite/gcc.target/nvptx/march-map=sm_101.c b/gcc/testsuite/gcc.target/nvptx/march-map=sm_101.c new file mode 100644 index 0000000..06b3ceb --- /dev/null +++ b/gcc/testsuite/gcc.target/nvptx/march-map=sm_101.c @@ -0,0 +1,19 @@ +/* { dg-do assemble } */ +/* { dg-options {-march-map=sm_101 -mptx=_} } */ +/* { dg-additional-options -save-temps } */ +/* { dg-final { scan-assembler-times {(?n)^ \.version 7\.8$} 1 } } */ +/* { dg-final { scan-assembler-times {(?n)^ \.target sm_89$} 1 } } */ + +#if __PTX_ISA_VERSION_MAJOR__ != 7 +#error wrong value for __PTX_ISA_VERSION_MAJOR__ +#endif + +#if __PTX_ISA_VERSION_MINOR__ != 8 +#error wrong value for __PTX_ISA_VERSION_MINOR__ +#endif + +#if __PTX_SM__ != 890 +#error wrong value for __PTX_SM__ +#endif + +int dummy; diff --git a/gcc/testsuite/gcc.target/nvptx/march-map=sm_101a.c b/gcc/testsuite/gcc.target/nvptx/march-map=sm_101a.c new file mode 100644 index 0000000..0cca3f3 --- /dev/null +++ b/gcc/testsuite/gcc.target/nvptx/march-map=sm_101a.c @@ -0,0 +1,19 @@ +/* { dg-do assemble } */ +/* { dg-options {-march-map=sm_101a -mptx=_} } */ +/* { dg-additional-options -save-temps } */ +/* { dg-final { scan-assembler-times {(?n)^ \.version 7\.8$} 1 } } */ +/* { dg-final { scan-assembler-times {(?n)^ \.target sm_89$} 1 } } */ + +#if __PTX_ISA_VERSION_MAJOR__ != 7 +#error wrong value for __PTX_ISA_VERSION_MAJOR__ +#endif + +#if __PTX_ISA_VERSION_MINOR__ != 8 +#error wrong value for __PTX_ISA_VERSION_MINOR__ +#endif + +#if __PTX_SM__ != 890 +#error wrong value for __PTX_SM__ +#endif + +int dummy; diff --git a/gcc/testsuite/gcc.target/nvptx/march-map=sm_101f.c b/gcc/testsuite/gcc.target/nvptx/march-map=sm_101f.c new file mode 100644 index 0000000..9548be5 --- /dev/null +++ b/gcc/testsuite/gcc.target/nvptx/march-map=sm_101f.c @@ -0,0 +1,19 @@ +/* { dg-do assemble } */ +/* { dg-options {-march-map=sm_101f -mptx=_} } */ +/* { dg-additional-options -save-temps } */ +/* { dg-final { scan-assembler-times {(?n)^ \.version 7\.8$} 1 } } */ +/* { dg-final { scan-assembler-times {(?n)^ \.target sm_89$} 1 } } */ + +#if __PTX_ISA_VERSION_MAJOR__ != 7 +#error wrong value for __PTX_ISA_VERSION_MAJOR__ +#endif + +#if __PTX_ISA_VERSION_MINOR__ != 8 +#error wrong value for __PTX_ISA_VERSION_MINOR__ +#endif + +#if __PTX_SM__ != 890 +#error wrong value for __PTX_SM__ +#endif + +int dummy; diff --git a/gcc/testsuite/gcc.target/nvptx/march-map=sm_103.c b/gcc/testsuite/gcc.target/nvptx/march-map=sm_103.c new file mode 100644 index 0000000..5731249 --- /dev/null +++ b/gcc/testsuite/gcc.target/nvptx/march-map=sm_103.c @@ -0,0 +1,19 @@ +/* { dg-do assemble } */ +/* { dg-options {-march-map=sm_103 -mptx=_} } */ +/* { dg-additional-options -save-temps } */ +/* { dg-final { scan-assembler-times {(?n)^ \.version 7\.8$} 1 } } */ +/* { dg-final { scan-assembler-times {(?n)^ \.target sm_89$} 1 } } */ + +#if __PTX_ISA_VERSION_MAJOR__ != 7 +#error wrong value for __PTX_ISA_VERSION_MAJOR__ +#endif + +#if __PTX_ISA_VERSION_MINOR__ != 8 +#error wrong value for __PTX_ISA_VERSION_MINOR__ +#endif + +#if __PTX_SM__ != 890 +#error wrong value for __PTX_SM__ +#endif + +int dummy; diff --git a/gcc/testsuite/gcc.target/nvptx/march-map=sm_103a.c b/gcc/testsuite/gcc.target/nvptx/march-map=sm_103a.c new file mode 100644 index 0000000..aea501e --- /dev/null +++ b/gcc/testsuite/gcc.target/nvptx/march-map=sm_103a.c @@ -0,0 +1,19 @@ +/* { dg-do assemble } */ +/* { dg-options {-march-map=sm_103a -mptx=_} } */ +/* { dg-additional-options -save-temps } */ +/* { dg-final { scan-assembler-times {(?n)^ \.version 7\.8$} 1 } } */ +/* { dg-final { scan-assembler-times {(?n)^ \.target sm_89$} 1 } } */ + +#if __PTX_ISA_VERSION_MAJOR__ != 7 +#error wrong value for __PTX_ISA_VERSION_MAJOR__ +#endif + +#if __PTX_ISA_VERSION_MINOR__ != 8 +#error wrong value for __PTX_ISA_VERSION_MINOR__ +#endif + +#if __PTX_SM__ != 890 +#error wrong value for __PTX_SM__ +#endif + +int dummy; diff --git a/gcc/testsuite/gcc.target/nvptx/march-map=sm_103f.c b/gcc/testsuite/gcc.target/nvptx/march-map=sm_103f.c new file mode 100644 index 0000000..59d8987 --- /dev/null +++ b/gcc/testsuite/gcc.target/nvptx/march-map=sm_103f.c @@ -0,0 +1,19 @@ +/* { dg-do assemble } */ +/* { dg-options {-march-map=sm_103f -mptx=_} } */ +/* { dg-additional-options -save-temps } */ +/* { dg-final { scan-assembler-times {(?n)^ \.version 7\.8$} 1 } } */ +/* { dg-final { scan-assembler-times {(?n)^ \.target sm_89$} 1 } } */ + +#if __PTX_ISA_VERSION_MAJOR__ != 7 +#error wrong value for __PTX_ISA_VERSION_MAJOR__ +#endif + +#if __PTX_ISA_VERSION_MINOR__ != 8 +#error wrong value for __PTX_ISA_VERSION_MINOR__ +#endif + +#if __PTX_SM__ != 890 +#error wrong value for __PTX_SM__ +#endif + +int dummy; diff --git a/gcc/testsuite/gcc.target/nvptx/march-map=sm_120.c b/gcc/testsuite/gcc.target/nvptx/march-map=sm_120.c new file mode 100644 index 0000000..d28a671 --- /dev/null +++ b/gcc/testsuite/gcc.target/nvptx/march-map=sm_120.c @@ -0,0 +1,19 @@ +/* { dg-do assemble } */ +/* { dg-options {-march-map=sm_120 -mptx=_} } */ +/* { dg-additional-options -save-temps } */ +/* { dg-final { scan-assembler-times {(?n)^ \.version 7\.8$} 1 } } */ +/* { dg-final { scan-assembler-times {(?n)^ \.target sm_89$} 1 } } */ + +#if __PTX_ISA_VERSION_MAJOR__ != 7 +#error wrong value for __PTX_ISA_VERSION_MAJOR__ +#endif + +#if __PTX_ISA_VERSION_MINOR__ != 8 +#error wrong value for __PTX_ISA_VERSION_MINOR__ +#endif + +#if __PTX_SM__ != 890 +#error wrong value for __PTX_SM__ +#endif + +int dummy; diff --git a/gcc/testsuite/gcc.target/nvptx/march-map=sm_120a.c b/gcc/testsuite/gcc.target/nvptx/march-map=sm_120a.c new file mode 100644 index 0000000..613dd65 --- /dev/null +++ b/gcc/testsuite/gcc.target/nvptx/march-map=sm_120a.c @@ -0,0 +1,19 @@ +/* { dg-do assemble } */ +/* { dg-options {-march-map=sm_120a -mptx=_} } */ +/* { dg-additional-options -save-temps } */ +/* { dg-final { scan-assembler-times {(?n)^ \.version 7\.8$} 1 } } */ +/* { dg-final { scan-assembler-times {(?n)^ \.target sm_89$} 1 } } */ + +#if __PTX_ISA_VERSION_MAJOR__ != 7 +#error wrong value for __PTX_ISA_VERSION_MAJOR__ +#endif + +#if __PTX_ISA_VERSION_MINOR__ != 8 +#error wrong value for __PTX_ISA_VERSION_MINOR__ +#endif + +#if __PTX_SM__ != 890 +#error wrong value for __PTX_SM__ +#endif + +int dummy; diff --git a/gcc/testsuite/gcc.target/nvptx/march-map=sm_120f.c b/gcc/testsuite/gcc.target/nvptx/march-map=sm_120f.c new file mode 100644 index 0000000..1b23350 --- /dev/null +++ b/gcc/testsuite/gcc.target/nvptx/march-map=sm_120f.c @@ -0,0 +1,19 @@ +/* { dg-do assemble } */ +/* { dg-options {-march-map=sm_120f -mptx=_} } */ +/* { dg-additional-options -save-temps } */ +/* { dg-final { scan-assembler-times {(?n)^ \.version 7\.8$} 1 } } */ +/* { dg-final { scan-assembler-times {(?n)^ \.target sm_89$} 1 } } */ + +#if __PTX_ISA_VERSION_MAJOR__ != 7 +#error wrong value for __PTX_ISA_VERSION_MAJOR__ +#endif + +#if __PTX_ISA_VERSION_MINOR__ != 8 +#error wrong value for __PTX_ISA_VERSION_MINOR__ +#endif + +#if __PTX_SM__ != 890 +#error wrong value for __PTX_SM__ +#endif + +int dummy; diff --git a/gcc/testsuite/gcc.target/nvptx/march-map=sm_121.c b/gcc/testsuite/gcc.target/nvptx/march-map=sm_121.c new file mode 100644 index 0000000..240332b --- /dev/null +++ b/gcc/testsuite/gcc.target/nvptx/march-map=sm_121.c @@ -0,0 +1,19 @@ +/* { dg-do assemble } */ +/* { dg-options {-march-map=sm_121 -mptx=_} } */ +/* { dg-additional-options -save-temps } */ +/* { dg-final { scan-assembler-times {(?n)^ \.version 7\.8$} 1 } } */ +/* { dg-final { scan-assembler-times {(?n)^ \.target sm_89$} 1 } } */ + +#if __PTX_ISA_VERSION_MAJOR__ != 7 +#error wrong value for __PTX_ISA_VERSION_MAJOR__ +#endif + +#if __PTX_ISA_VERSION_MINOR__ != 8 +#error wrong value for __PTX_ISA_VERSION_MINOR__ +#endif + +#if __PTX_SM__ != 890 +#error wrong value for __PTX_SM__ +#endif + +int dummy; diff --git a/gcc/testsuite/gcc.target/nvptx/march-map=sm_121a.c b/gcc/testsuite/gcc.target/nvptx/march-map=sm_121a.c new file mode 100644 index 0000000..1e7fb70 --- /dev/null +++ b/gcc/testsuite/gcc.target/nvptx/march-map=sm_121a.c @@ -0,0 +1,19 @@ +/* { dg-do assemble } */ +/* { dg-options {-march-map=sm_121a -mptx=_} } */ +/* { dg-additional-options -save-temps } */ +/* { dg-final { scan-assembler-times {(?n)^ \.version 7\.8$} 1 } } */ +/* { dg-final { scan-assembler-times {(?n)^ \.target sm_89$} 1 } } */ + +#if __PTX_ISA_VERSION_MAJOR__ != 7 +#error wrong value for __PTX_ISA_VERSION_MAJOR__ +#endif + +#if __PTX_ISA_VERSION_MINOR__ != 8 +#error wrong value for __PTX_ISA_VERSION_MINOR__ +#endif + +#if __PTX_SM__ != 890 +#error wrong value for __PTX_SM__ +#endif + +int dummy; diff --git a/gcc/testsuite/gcc.target/nvptx/march-map=sm_121f.c b/gcc/testsuite/gcc.target/nvptx/march-map=sm_121f.c new file mode 100644 index 0000000..2cbec51 --- /dev/null +++ b/gcc/testsuite/gcc.target/nvptx/march-map=sm_121f.c @@ -0,0 +1,19 @@ +/* { dg-do assemble } */ +/* { dg-options {-march-map=sm_121f -mptx=_} } */ +/* { dg-additional-options -save-temps } */ +/* { dg-final { scan-assembler-times {(?n)^ \.version 7\.8$} 1 } } */ +/* { dg-final { scan-assembler-times {(?n)^ \.target sm_89$} 1 } } */ + +#if __PTX_ISA_VERSION_MAJOR__ != 7 +#error wrong value for __PTX_ISA_VERSION_MAJOR__ +#endif + +#if __PTX_ISA_VERSION_MINOR__ != 8 +#error wrong value for __PTX_ISA_VERSION_MINOR__ +#endif + +#if __PTX_SM__ != 890 +#error wrong value for __PTX_SM__ +#endif + +int dummy; diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-1-i16.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-1-i16.c index f84d7f5..4e1a575 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-1-i16.c +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-1-i16.c @@ -20,4 +20,4 @@ TEST_BINARY_VX_SIGNED_0(T) /* { dg-final { scan-assembler-times {vmin.vx} 2 } } */ /* { dg-final { scan-assembler-times {vsadd.vx} 1 } } */ /* { dg-final { scan-assembler-times {vssub.vx} 1 } } */ -/* { dg-final { scan-assembler-times {vaadd.vx} 1 } } */ +/* { dg-final { scan-assembler-times {vaadd.vx} 2 } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-1-i32.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-1-i32.c index 70b6743..4c4f72d 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-1-i32.c +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-1-i32.c @@ -20,4 +20,4 @@ TEST_BINARY_VX_SIGNED_0(T) /* { dg-final { scan-assembler-times {vmin.vx} 2 } } */ /* { dg-final { scan-assembler-times {vsadd.vx} 1 } } */ /* { dg-final { scan-assembler-times {vssub.vx} 1 } } */ -/* { dg-final { scan-assembler-times {vaadd.vx} 1 } } */ +/* { dg-final { scan-assembler-times {vaadd.vx} 2 } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-1-i64.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-1-i64.c index 986fa4c..abf62c2 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-1-i64.c +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-1-i64.c @@ -20,4 +20,7 @@ TEST_BINARY_VX_SIGNED_0(T) /* { dg-final { scan-assembler-times {vmin.vx} 2 } } */ /* { dg-final { scan-assembler-times {vsadd.vx} 1 } } */ /* { dg-final { scan-assembler-times {vssub.vx} 1 } } */ -/* { dg-final { scan-assembler-times {vaadd.vx} 1 { target { no-opts "-O3 -mrvv-vector-bits=zvl -mrvv-max-lmul=m2" "-O3 -mrvv-vector-bits=zvl -mrvv-max-lmul=m4" } } } } */ +/* { dg-final { scan-assembler-times {vaadd.vx} 2 { target { no-opts + "-O3 -mrvv-vector-bits=zvl -mrvv-max-lmul=m2" + "-O3 -mrvv-vector-bits=zvl -mrvv-max-lmul=m4" + } } } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-1-i8.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-1-i8.c index c479295..7744bcb 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-1-i8.c +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-1-i8.c @@ -20,4 +20,4 @@ TEST_BINARY_VX_SIGNED_0(T) /* { dg-final { scan-assembler-times {vmin.vx} 2 } } */ /* { dg-final { scan-assembler-times {vsadd.vx} 1 } } */ /* { dg-final { scan-assembler-times {vssub.vx} 1 } } */ -/* { dg-final { scan-assembler-times {vaadd.vx} 1 } } */ +/* { dg-final { scan-assembler-times {vaadd.vx} 2 } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-1-u64.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-1-u64.c index e7b1ef0..8e7a788 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-1-u64.c +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-1-u64.c @@ -19,4 +19,7 @@ TEST_BINARY_VX_UNSIGNED_0(T) /* { dg-final { scan-assembler-times {vminu.vx} 2 } } */ /* { dg-final { scan-assembler-times {vsaddu.vx} 1 } } */ /* { dg-final { scan-assembler-times {vssubu.vx} 1 } } */ -/* { dg-final { scan-assembler-times {vaaddu.vx} 1 { target { no-opts "-O3 -mrvv-vector-bits=zvl -mrvv-max-lmul=m2" "-O3 -mrvv-vector-bits=zvl -mrvv-max-lmul=m4" } } } } */ +/* { dg-final { scan-assembler-times {vaaddu.vx} 2 { target { no-opts { + "-O3 -mrvv-vector-bits=zvl -mrvv-max-lmul=m2" + "-O3 -mrvv-vector-bits=zvl -mrvv-max-lmul=m4" + } } } } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-1-u8.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-1-u8.c index 559887e..d213c18 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-1-u8.c +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-1-u8.c @@ -19,4 +19,4 @@ TEST_BINARY_VX_UNSIGNED_0(T) /* { dg-final { scan-assembler-times {vminu.vx} 2 } } */ /* { dg-final { scan-assembler-times {vsaddu.vx} 1 } } */ /* { dg-final { scan-assembler-times {vssubu.vx} 1 } } */ -/* { dg-final { scan-assembler-times {vaaddu.vx} 1 } } */ +/* { dg-final { scan-assembler-times {vaaddu.vx} 2 } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-4-i16.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-4-i16.c index 86c8040..2ae4804 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-4-i16.c +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-4-i16.c @@ -21,6 +21,7 @@ DEF_VX_BINARY_CASE_3_WRAP(T, MIN_FUNC_1_WARP(T), min, VX_BINARY_FUNC_BODY_X8) DEF_VX_BINARY_CASE_3_WRAP(T, SAT_S_ADD_FUNC_WRAP(T), sat_add, VX_BINARY_FUNC_BODY_X8) DEF_VX_BINARY_CASE_3_WRAP(T, SAT_S_SUB_FUNC_WRAP(T), sat_sub, VX_BINARY_FUNC_BODY_X8) DEF_VX_BINARY_CASE_3_WRAP(T, AVG_FLOOR_FUNC_WRAP(T), avg_floor, VX_BINARY_FUNC_BODY_X8) +DEF_VX_BINARY_CASE_3_WRAP(T, AVG_CEIL_FUNC_WRAP(T), avg_ceil, VX_BINARY_FUNC_BODY_X8) /* { dg-final { scan-assembler {vadd.vx} } } */ /* { dg-final { scan-assembler {vsub.vx} } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-4-i32.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-4-i32.c index e2d1613..88cfc72 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-4-i32.c +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-4-i32.c @@ -21,6 +21,7 @@ DEF_VX_BINARY_CASE_3_WRAP(T, MIN_FUNC_1_WARP(T), min, VX_BINARY_FUNC_BODY_X4) DEF_VX_BINARY_CASE_3_WRAP(T, SAT_S_ADD_FUNC_WRAP(T), sat_add, VX_BINARY_FUNC_BODY_X4) DEF_VX_BINARY_CASE_3_WRAP(T, SAT_S_SUB_FUNC_WRAP(T), sat_sub, VX_BINARY_FUNC_BODY_X4) DEF_VX_BINARY_CASE_3_WRAP(T, AVG_FLOOR_FUNC_WRAP(T), avg_floor, VX_BINARY_FUNC_BODY_X4) +DEF_VX_BINARY_CASE_3_WRAP(T, AVG_CEIL_FUNC_WRAP(T), avg_ceil, VX_BINARY_FUNC_BODY_X4) /* { dg-final { scan-assembler {vadd.vx} } } */ /* { dg-final { scan-assembler {vsub.vx} } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-4-i64.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-4-i64.c index 06ffa15..6b29a72 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-4-i64.c +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-4-i64.c @@ -21,6 +21,7 @@ DEF_VX_BINARY_CASE_3_WRAP(T, MIN_FUNC_1_WARP(T), min, VX_BINARY_FUNC_BODY) DEF_VX_BINARY_CASE_3_WRAP(T, SAT_S_ADD_FUNC_WRAP(T), sat_add, VX_BINARY_FUNC_BODY) DEF_VX_BINARY_CASE_3_WRAP(T, SAT_S_SUB_FUNC_WRAP(T), sat_sub, VX_BINARY_FUNC_BODY) DEF_VX_BINARY_CASE_3_WRAP(T, AVG_FLOOR_FUNC_WRAP(T), avg_floor, VX_BINARY_FUNC_BODY) +DEF_VX_BINARY_CASE_3_WRAP(T, AVG_CEIL_FUNC_WRAP(T), avg_ceil, VX_BINARY_FUNC_BODY) /* { dg-final { scan-assembler {vadd.vx} } } */ /* { dg-final { scan-assembler {vsub.vx} } } */ @@ -35,4 +36,7 @@ DEF_VX_BINARY_CASE_3_WRAP(T, AVG_FLOOR_FUNC_WRAP(T), avg_floor, VX_BINARY_FUNC_B /* { dg-final { scan-assembler {vmin.vx} } } */ /* { dg-final { scan-assembler-not {vsadd.vx} } } */ /* { dg-final { scan-assembler-not {vssub.vx} } } */ -/* { dg-final { scan-assembler {vaadd.vx} { target { no-opts "-O3 -mrvv-vector-bits=zvl -mrvv-max-lmul=m2" "-O3 -mrvv-vector-bits=zvl -mrvv-max-lmul=m4" } } } } */ +/* { dg-final { scan-assembler {vaadd.vx} { target { no-opts { + "-O3 -mrvv-vector-bits=zvl -mrvv-max-lmul=m2" + "-O3 -mrvv-vector-bits=zvl -mrvv-max-lmul=m4" + } } } } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-4-i8.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-4-i8.c index cb086aa..f862eb7 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-4-i8.c +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-4-i8.c @@ -21,6 +21,7 @@ DEF_VX_BINARY_CASE_3_WRAP(T, MIN_FUNC_1_WARP(T), min, VX_BINARY_FUNC_BODY_X8) DEF_VX_BINARY_CASE_3_WRAP(T, SAT_S_ADD_FUNC_WRAP(T), sat_add, VX_BINARY_FUNC_BODY_X8) DEF_VX_BINARY_CASE_3_WRAP(T, SAT_S_SUB_FUNC_WRAP(T), sat_sub, VX_BINARY_FUNC_BODY_X8) DEF_VX_BINARY_CASE_3_WRAP(T, AVG_FLOOR_FUNC_WRAP(T), avg_floor, VX_BINARY_FUNC_BODY_X8) +DEF_VX_BINARY_CASE_3_WRAP(T, AVG_CEIL_FUNC_WRAP(T), avg_ceil, VX_BINARY_FUNC_BODY_X8) /* { dg-final { scan-assembler {vadd.vx} } } */ /* { dg-final { scan-assembler {vsub.vx} } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-4-u16.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-4-u16.c index c851f23..3ecfce6 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-4-u16.c +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-4-u16.c @@ -20,6 +20,7 @@ DEF_VX_BINARY_CASE_3_WRAP(T, MIN_FUNC_1_WARP(T), min, VX_BINARY_FUNC_BODY_X8) DEF_VX_BINARY_CASE_3_WRAP(T, SAT_U_ADD_FUNC_WRAP(T), sat_add, VX_BINARY_FUNC_BODY_X8) DEF_VX_BINARY_CASE_3_WRAP(T, SAT_U_SUB_FUNC_WRAP(T), sat_sub, VX_BINARY_FUNC_BODY_X8) DEF_VX_BINARY_CASE_3_WRAP(T, AVG_FLOOR_FUNC_WRAP(T), avg_floor, VX_BINARY_FUNC_BODY_X8) +DEF_VX_BINARY_CASE_3_WRAP(T, AVG_CEIL_FUNC_WRAP(T), avg_ceil, VX_BINARY_FUNC_BODY_X8) /* { dg-final { scan-assembler {vadd.vx} } } */ /* { dg-final { scan-assembler {vsub.vx} } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-4-u32.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-4-u32.c index b7805c1..7ce1fe8 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-4-u32.c +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-4-u32.c @@ -20,6 +20,7 @@ DEF_VX_BINARY_CASE_3_WRAP(T, MIN_FUNC_1_WARP(T), min, VX_BINARY_FUNC_BODY_X4) DEF_VX_BINARY_CASE_3_WRAP(T, SAT_U_ADD_FUNC_WRAP(T), sat_add, VX_BINARY_FUNC_BODY_X4) DEF_VX_BINARY_CASE_3_WRAP(T, SAT_U_SUB_FUNC_WRAP(T), sat_sub, VX_BINARY_FUNC_BODY_X4) DEF_VX_BINARY_CASE_3_WRAP(T, AVG_FLOOR_FUNC_WRAP(T), avg_floor, VX_BINARY_FUNC_BODY_X4) +DEF_VX_BINARY_CASE_3_WRAP(T, AVG_CEIL_FUNC_WRAP(T), avg_ceil, VX_BINARY_FUNC_BODY_X4) /* { dg-final { scan-assembler {vadd.vx} } } */ /* { dg-final { scan-assembler {vsub.vx} } } */ @@ -31,5 +32,6 @@ DEF_VX_BINARY_CASE_3_WRAP(T, AVG_FLOOR_FUNC_WRAP(T), avg_floor, VX_BINARY_FUNC_B /* { dg-final { scan-assembler {vremu.vx} } } */ /* { dg-final { scan-assembler {vmaxu.vx} } } */ /* { dg-final { scan-assembler {vminu.vx} } } */ +/* { dg-final { scan-assembler {vsaddu.vx} } } */ /* { dg-final { scan-assembler {vssubu.vx} } } */ /* { dg-final { scan-assembler {vaaddu.vx} } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-4-u64.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-4-u64.c index 8295dc2..c84a30c 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-4-u64.c +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-4-u64.c @@ -20,6 +20,7 @@ DEF_VX_BINARY_CASE_3_WRAP(T, MIN_FUNC_1_WARP(T), min, VX_BINARY_FUNC_BODY) DEF_VX_BINARY_CASE_3_WRAP(T, SAT_U_ADD_FUNC_WRAP(T), sat_add, VX_BINARY_FUNC_BODY) DEF_VX_BINARY_CASE_3_WRAP(T, SAT_U_SUB_FUNC_WRAP(T), sat_sub, VX_BINARY_FUNC_BODY) DEF_VX_BINARY_CASE_3_WRAP(T, AVG_FLOOR_FUNC_WRAP(T), avg_floor, VX_BINARY_FUNC_BODY) +DEF_VX_BINARY_CASE_3_WRAP(T, AVG_CEIL_FUNC_WRAP(T), avg_ceil, VX_BINARY_FUNC_BODY) /* { dg-final { scan-assembler {vadd.vx} } } */ /* { dg-final { scan-assembler {vsub.vx} } } */ @@ -33,4 +34,7 @@ DEF_VX_BINARY_CASE_3_WRAP(T, AVG_FLOOR_FUNC_WRAP(T), avg_floor, VX_BINARY_FUNC_B /* { dg-final { scan-assembler {vminu.vx} } } */ /* { dg-final { scan-assembler-not {vsaddu.vx} } } */ /* { dg-final { scan-assembler-not {vssubu.vx} } } */ -/* { dg-final { scan-assembler {vaaddu.vx} { target { no-opts "-O3 -mrvv-vector-bits=zvl -mrvv-max-lmul=m2" "-O3 -mrvv-vector-bits=zvl -mrvv-max-lmul=m4" } } } } */ +/* { dg-final { scan-assembler {vaaddu.vx} { target { no-opts { + "-O3 -mrvv-vector-bits=zvl -mrvv-max-lmul=m2" + "-O3 -mrvv-vector-bits=zvl -mrvv-max-lmul=m4" + } } } } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-4-u8.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-4-u8.c index d214da9..9f3d7df 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-4-u8.c +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-4-u8.c @@ -20,6 +20,7 @@ DEF_VX_BINARY_CASE_3_WRAP(T, MIN_FUNC_1_WARP(T), min, VX_BINARY_FUNC_BODY_X8) DEF_VX_BINARY_CASE_3_WRAP(T, SAT_U_ADD_FUNC_WRAP(T), sat_add, VX_BINARY_FUNC_BODY_X8) DEF_VX_BINARY_CASE_3_WRAP(T, SAT_U_SUB_FUNC_WRAP(T), sat_sub, VX_BINARY_FUNC_BODY_X8) DEF_VX_BINARY_CASE_3_WRAP(T, AVG_FLOOR_FUNC_WRAP(T), avg_floor, VX_BINARY_FUNC_BODY_X8) +DEF_VX_BINARY_CASE_3_WRAP(T, AVG_CEIL_FUNC_WRAP(T), avg_ceil, VX_BINARY_FUNC_BODY_X8) /* { dg-final { scan-assembler {vadd.vx} } } */ /* { dg-final { scan-assembler {vsub.vx} } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-5-i16.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-5-i16.c index 7c7bf09..df6872c 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-5-i16.c +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-5-i16.c @@ -21,6 +21,7 @@ DEF_VX_BINARY_CASE_3_WRAP(T, MIN_FUNC_1_WARP(T), min, VX_BINARY_FUNC_BODY_X8) DEF_VX_BINARY_CASE_3_WRAP(T, SAT_S_ADD_FUNC_WRAP(T), sat_add, VX_BINARY_FUNC_BODY_X8) DEF_VX_BINARY_CASE_3_WRAP(T, SAT_S_SUB_FUNC_WRAP(T), sat_sub, VX_BINARY_FUNC_BODY_X8) DEF_VX_BINARY_CASE_3_WRAP(T, AVG_FLOOR_FUNC_WRAP(T), avg_floor, VX_BINARY_FUNC_BODY_X8) +DEF_VX_BINARY_CASE_3_WRAP(T, AVG_CEIL_FUNC_WRAP(T), avg_ceil, VX_BINARY_FUNC_BODY_X8) /* { dg-final { scan-assembler-not {vadd.vx} } } */ /* { dg-final { scan-assembler {vsub.vx} } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-5-i32.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-5-i32.c index 6d161bd..05ed639 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-5-i32.c +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-5-i32.c @@ -21,6 +21,7 @@ DEF_VX_BINARY_CASE_3_WRAP(T, MIN_FUNC_1_WARP(T), min, VX_BINARY_FUNC_BODY_X4) DEF_VX_BINARY_CASE_3_WRAP(T, SAT_S_ADD_FUNC_WRAP(T), sat_add, VX_BINARY_FUNC_BODY_X4) DEF_VX_BINARY_CASE_3_WRAP(T, SAT_S_SUB_FUNC_WRAP(T), sat_sub, VX_BINARY_FUNC_BODY_X4) DEF_VX_BINARY_CASE_3_WRAP(T, AVG_FLOOR_FUNC_WRAP(T), avg_floor, VX_BINARY_FUNC_BODY_X4) +DEF_VX_BINARY_CASE_3_WRAP(T, AVG_CEIL_FUNC_WRAP(T), avg_ceil, VX_BINARY_FUNC_BODY_X4) /* { dg-final { scan-assembler {vadd.vx} } } */ /* { dg-final { scan-assembler {vsub.vx} } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-5-i64.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-5-i64.c index 0409012..6776b1f 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-5-i64.c +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-5-i64.c @@ -21,6 +21,7 @@ DEF_VX_BINARY_CASE_3_WRAP(T, MIN_FUNC_1_WARP(T), min, VX_BINARY_FUNC_BODY) DEF_VX_BINARY_CASE_3_WRAP(T, SAT_S_ADD_FUNC_WRAP(T), sat_add, VX_BINARY_FUNC_BODY) DEF_VX_BINARY_CASE_3_WRAP(T, SAT_S_SUB_FUNC_WRAP(T), sat_sub, VX_BINARY_FUNC_BODY) DEF_VX_BINARY_CASE_3_WRAP(T, AVG_FLOOR_FUNC_WRAP(T), avg_floor, VX_BINARY_FUNC_BODY) +DEF_VX_BINARY_CASE_3_WRAP(T, AVG_CEIL_FUNC_WRAP(T), avg_ceil, VX_BINARY_FUNC_BODY) /* { dg-final { scan-assembler {vadd.vx} } } */ /* { dg-final { scan-assembler {vsub.vx} } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-5-i8.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-5-i8.c index ed437319..d3e2785 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-5-i8.c +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-5-i8.c @@ -21,6 +21,7 @@ DEF_VX_BINARY_CASE_3_WRAP(T, MIN_FUNC_1_WARP(T), min, VX_BINARY_FUNC_BODY_X8) DEF_VX_BINARY_CASE_3_WRAP(T, SAT_S_ADD_FUNC_WRAP(T), sat_add, VX_BINARY_FUNC_BODY_X8) DEF_VX_BINARY_CASE_3_WRAP(T, SAT_S_SUB_FUNC_WRAP(T), sat_sub, VX_BINARY_FUNC_BODY_X8) DEF_VX_BINARY_CASE_3_WRAP(T, AVG_FLOOR_FUNC_WRAP(T), avg_floor, VX_BINARY_FUNC_BODY_X8) +DEF_VX_BINARY_CASE_3_WRAP(T, AVG_CEIL_FUNC_WRAP(T), avg_ceil, VX_BINARY_FUNC_BODY_X8) /* { dg-final { scan-assembler-not {vadd.vx} } } */ /* { dg-final { scan-assembler {vsub.vx} } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-5-u16.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-5-u16.c index b7c7ad4..5497b5a 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-5-u16.c +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-5-u16.c @@ -20,6 +20,7 @@ DEF_VX_BINARY_CASE_3_WRAP(T, MIN_FUNC_1_WARP(T), min, VX_BINARY_FUNC_BODY_X8) DEF_VX_BINARY_CASE_3_WRAP(T, SAT_U_ADD_FUNC_WRAP(T), sat_add, VX_BINARY_FUNC_BODY_X8) DEF_VX_BINARY_CASE_3_WRAP(T, SAT_U_SUB_FUNC_WRAP(T), sat_sub, VX_BINARY_FUNC_BODY_X8) DEF_VX_BINARY_CASE_3_WRAP(T, AVG_FLOOR_FUNC_WRAP(T), avg_floor, VX_BINARY_FUNC_BODY_X8) +DEF_VX_BINARY_CASE_3_WRAP(T, AVG_CEIL_FUNC_WRAP(T), avg_ceil, VX_BINARY_FUNC_BODY_X8) /* { dg-final { scan-assembler {vadd.vx} } } */ /* { dg-final { scan-assembler {vsub.vx} } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-5-u32.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-5-u32.c index dd9c845..3a8e85f 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-5-u32.c +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-5-u32.c @@ -20,6 +20,7 @@ DEF_VX_BINARY_CASE_3_WRAP(T, MIN_FUNC_1_WARP(T), min, VX_BINARY_FUNC_BODY_X4) DEF_VX_BINARY_CASE_3_WRAP(T, SAT_U_ADD_FUNC_WRAP(T), sat_add, VX_BINARY_FUNC_BODY_X4) DEF_VX_BINARY_CASE_3_WRAP(T, SAT_U_SUB_FUNC_WRAP(T), sat_sub, VX_BINARY_FUNC_BODY_X4) DEF_VX_BINARY_CASE_3_WRAP(T, AVG_FLOOR_FUNC_WRAP(T), avg_floor, VX_BINARY_FUNC_BODY_X4) +DEF_VX_BINARY_CASE_3_WRAP(T, AVG_CEIL_FUNC_WRAP(T), avg_ceil, VX_BINARY_FUNC_BODY_X4) /* { dg-final { scan-assembler {vadd.vx} } } */ /* { dg-final { scan-assembler {vsub.vx} } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-5-u64.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-5-u64.c index 1fda062..060d591 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-5-u64.c +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-5-u64.c @@ -20,6 +20,7 @@ DEF_VX_BINARY_CASE_3_WRAP(T, MIN_FUNC_1_WARP(T), min, VX_BINARY_FUNC_BODY) DEF_VX_BINARY_CASE_3_WRAP(T, SAT_U_ADD_FUNC_WRAP(T), sat_add, VX_BINARY_FUNC_BODY) DEF_VX_BINARY_CASE_3_WRAP(T, SAT_U_SUB_FUNC_WRAP(T), sat_sub, VX_BINARY_FUNC_BODY) DEF_VX_BINARY_CASE_3_WRAP(T, AVG_FLOOR_FUNC_WRAP(T), avg_floor, VX_BINARY_FUNC_BODY) +DEF_VX_BINARY_CASE_3_WRAP(T, AVG_CEIL_FUNC_WRAP(T), avg_ceil, VX_BINARY_FUNC_BODY) /* { dg-final { scan-assembler {vadd.vx} } } */ /* { dg-final { scan-assembler {vsub.vx} } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-5-u8.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-5-u8.c index 725a55b..86a6c45 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-5-u8.c +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-5-u8.c @@ -20,6 +20,7 @@ DEF_VX_BINARY_CASE_3_WRAP(T, MIN_FUNC_1_WARP(T), min, VX_BINARY_FUNC_BODY_X8) DEF_VX_BINARY_CASE_3_WRAP(T, SAT_U_ADD_FUNC_WRAP(T), sat_add, VX_BINARY_FUNC_BODY_X8) DEF_VX_BINARY_CASE_3_WRAP(T, SAT_U_SUB_FUNC_WRAP(T), sat_sub, VX_BINARY_FUNC_BODY_X8) DEF_VX_BINARY_CASE_3_WRAP(T, AVG_FLOOR_FUNC_WRAP(T), avg_floor, VX_BINARY_FUNC_BODY_X8) +DEF_VX_BINARY_CASE_3_WRAP(T, AVG_CEIL_FUNC_WRAP(T), avg_ceil, VX_BINARY_FUNC_BODY_X8) /* { dg-final { scan-assembler {vadd.vx} } } */ /* { dg-final { scan-assembler {vsub.vx} } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-6-i16.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-6-i16.c index 1e18342..0bfa2cb 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-6-i16.c +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-6-i16.c @@ -21,6 +21,7 @@ DEF_VX_BINARY_CASE_3_WRAP(T, MIN_FUNC_1_WARP(T), min, VX_BINARY_FUNC_BODY_X8) DEF_VX_BINARY_CASE_3_WRAP(T, SAT_S_ADD_FUNC_WRAP(T), sat_add, VX_BINARY_FUNC_BODY_X8) DEF_VX_BINARY_CASE_3_WRAP(T, SAT_S_SUB_FUNC_WRAP(T), sat_sub, VX_BINARY_FUNC_BODY_X8) DEF_VX_BINARY_CASE_3_WRAP(T, AVG_FLOOR_FUNC_WRAP(T), avg_floor, VX_BINARY_FUNC_BODY_X8) +DEF_VX_BINARY_CASE_3_WRAP(T, AVG_CEIL_FUNC_WRAP(T), avg_ceil, VX_BINARY_FUNC_BODY_X8) /* { dg-final { scan-assembler-not {vadd.vx} } } */ /* { dg-final { scan-assembler {vsub.vx} } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-6-i32.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-6-i32.c index fd6e47c..3e3acfc 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-6-i32.c +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-6-i32.c @@ -21,6 +21,7 @@ DEF_VX_BINARY_CASE_3_WRAP(T, MIN_FUNC_1_WARP(T), min, VX_BINARY_FUNC_BODY_X4) DEF_VX_BINARY_CASE_3_WRAP(T, SAT_S_ADD_FUNC_WRAP(T), sat_add, VX_BINARY_FUNC_BODY_X4) DEF_VX_BINARY_CASE_3_WRAP(T, SAT_S_SUB_FUNC_WRAP(T), sat_sub, VX_BINARY_FUNC_BODY_X4) DEF_VX_BINARY_CASE_3_WRAP(T, AVG_FLOOR_FUNC_WRAP(T), avg_floor, VX_BINARY_FUNC_BODY_X4) +DEF_VX_BINARY_CASE_3_WRAP(T, AVG_CEIL_FUNC_WRAP(T), avg_ceil, VX_BINARY_FUNC_BODY_X4) /* { dg-final { scan-assembler {vadd.vx} } } */ /* { dg-final { scan-assembler {vsub.vx} } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-6-i64.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-6-i64.c index 399d0f5..531c119 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-6-i64.c +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-6-i64.c @@ -21,6 +21,7 @@ DEF_VX_BINARY_CASE_3_WRAP(T, MIN_FUNC_1_WARP(T), min, VX_BINARY_FUNC_BODY) DEF_VX_BINARY_CASE_3_WRAP(T, SAT_S_ADD_FUNC_WRAP(T), sat_add, VX_BINARY_FUNC_BODY) DEF_VX_BINARY_CASE_3_WRAP(T, SAT_S_SUB_FUNC_WRAP(T), sat_sub, VX_BINARY_FUNC_BODY) DEF_VX_BINARY_CASE_3_WRAP(T, AVG_FLOOR_FUNC_WRAP(T), avg_floor, VX_BINARY_FUNC_BODY) +DEF_VX_BINARY_CASE_3_WRAP(T, AVG_CEIL_FUNC_WRAP(T), avg_ceil, VX_BINARY_FUNC_BODY) /* { dg-final { scan-assembler-not {vadd.vx} } } */ /* { dg-final { scan-assembler-not {vsub.vx} } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-6-i8.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-6-i8.c index 98567a3..43246bb 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-6-i8.c +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-6-i8.c @@ -21,6 +21,7 @@ DEF_VX_BINARY_CASE_3_WRAP(T, MIN_FUNC_1_WARP(T), min, VX_BINARY_FUNC_BODY_X8) DEF_VX_BINARY_CASE_3_WRAP(T, SAT_S_ADD_FUNC_WRAP(T), sat_add, VX_BINARY_FUNC_BODY_X8) DEF_VX_BINARY_CASE_3_WRAP(T, SAT_S_SUB_FUNC_WRAP(T), sat_sub, VX_BINARY_FUNC_BODY_X8) DEF_VX_BINARY_CASE_3_WRAP(T, AVG_FLOOR_FUNC_WRAP(T), avg_floor, VX_BINARY_FUNC_BODY_X8) +DEF_VX_BINARY_CASE_3_WRAP(T, AVG_CEIL_FUNC_WRAP(T), avg_ceil, VX_BINARY_FUNC_BODY_X8) /* { dg-final { scan-assembler-not {vadd.vx} } } */ /* { dg-final { scan-assembler {vsub.vx} } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-6-u16.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-6-u16.c index 3a215ea..f51e7a1 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-6-u16.c +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-6-u16.c @@ -20,6 +20,7 @@ DEF_VX_BINARY_CASE_3_WRAP(T, MIN_FUNC_1_WARP(T), min, VX_BINARY_FUNC_BODY_X8) DEF_VX_BINARY_CASE_3_WRAP(T, SAT_U_ADD_FUNC_WRAP(T), sat_add, VX_BINARY_FUNC_BODY_X8) DEF_VX_BINARY_CASE_3_WRAP(T, SAT_U_SUB_FUNC_WRAP(T), sat_sub, VX_BINARY_FUNC_BODY_X8) DEF_VX_BINARY_CASE_3_WRAP(T, AVG_FLOOR_FUNC_WRAP(T), avg_floor, VX_BINARY_FUNC_BODY_X8) +DEF_VX_BINARY_CASE_3_WRAP(T, AVG_CEIL_FUNC_WRAP(T), avg_ceil, VX_BINARY_FUNC_BODY_X8) /* { dg-final { scan-assembler {vadd.vx} } } */ /* { dg-final { scan-assembler {vsub.vx} } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-6-u32.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-6-u32.c index ac4d100..79b7477 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-6-u32.c +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-6-u32.c @@ -20,6 +20,7 @@ DEF_VX_BINARY_CASE_3_WRAP(T, MIN_FUNC_1_WARP(T), min, VX_BINARY_FUNC_BODY_X4) DEF_VX_BINARY_CASE_3_WRAP(T, SAT_U_ADD_FUNC_WRAP(T), sat_add, VX_BINARY_FUNC_BODY_X4) DEF_VX_BINARY_CASE_3_WRAP(T, SAT_U_SUB_FUNC_WRAP(T), sat_sub, VX_BINARY_FUNC_BODY_X4) DEF_VX_BINARY_CASE_3_WRAP(T, AVG_FLOOR_FUNC_WRAP(T), avg_floor, VX_BINARY_FUNC_BODY_X4) +DEF_VX_BINARY_CASE_3_WRAP(T, AVG_CEIL_FUNC_WRAP(T), avg_ceil, VX_BINARY_FUNC_BODY_X4) /* { dg-final { scan-assembler {vadd.vx} } } */ /* { dg-final { scan-assembler {vsub.vx} } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-6-u64.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-6-u64.c index 5eb0ed6..ac5fd69 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-6-u64.c +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-6-u64.c @@ -20,6 +20,7 @@ DEF_VX_BINARY_CASE_3_WRAP(T, MIN_FUNC_1_WARP(T), min, VX_BINARY_FUNC_BODY) DEF_VX_BINARY_CASE_3_WRAP(T, SAT_U_ADD_FUNC_WRAP(T), sat_add, VX_BINARY_FUNC_BODY) DEF_VX_BINARY_CASE_3_WRAP(T, SAT_U_SUB_FUNC_WRAP(T), sat_sub, VX_BINARY_FUNC_BODY) DEF_VX_BINARY_CASE_3_WRAP(T, AVG_FLOOR_FUNC_WRAP(T), avg_floor, VX_BINARY_FUNC_BODY) +DEF_VX_BINARY_CASE_3_WRAP(T, AVG_CEIL_FUNC_WRAP(T), avg_ceil, VX_BINARY_FUNC_BODY) /* { dg-final { scan-assembler-not {vadd.vx} } } */ /* { dg-final { scan-assembler-not {vsub.vx} } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-6-u8.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-6-u8.c index 8b404b6..84aa06b 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-6-u8.c +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx-6-u8.c @@ -20,6 +20,7 @@ DEF_VX_BINARY_CASE_3_WRAP(T, MIN_FUNC_1_WARP(T), min, VX_BINARY_FUNC_BODY_X8) DEF_VX_BINARY_CASE_3_WRAP(T, SAT_U_ADD_FUNC_WRAP(T), sat_add, VX_BINARY_FUNC_BODY_X8) DEF_VX_BINARY_CASE_3_WRAP(T, SAT_U_SUB_FUNC_WRAP(T), sat_sub, VX_BINARY_FUNC_BODY_X8) DEF_VX_BINARY_CASE_3_WRAP(T, AVG_FLOOR_FUNC_WRAP(T), avg_floor, VX_BINARY_FUNC_BODY_X8) +DEF_VX_BINARY_CASE_3_WRAP(T, AVG_CEIL_FUNC_WRAP(T), avg_ceil, VX_BINARY_FUNC_BODY_X8) /* { dg-final { scan-assembler {vadd.vx} } } */ /* { dg-final { scan-assembler {vsub.vx} } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx_binary.h b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx_binary.h index b7c0f79..4a9daff 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx_binary.h +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx_binary.h @@ -363,14 +363,35 @@ DEF_AVG_FLOOR(int8_t, int16_t) DEF_AVG_FLOOR(int16_t, int32_t) DEF_AVG_FLOOR(int32_t, int64_t) +#define DEF_AVG_CEIL(NT, WT) \ +NT \ +test_##NT##_avg_ceil(NT x, NT y) \ +{ \ + return (NT)(((WT)x + (WT)y + 1) >> 1); \ +} + +DEF_AVG_CEIL(uint8_t, uint16_t) +DEF_AVG_CEIL(uint16_t, uint32_t) +DEF_AVG_CEIL(uint32_t, uint64_t) + +DEF_AVG_CEIL(int8_t, int16_t) +DEF_AVG_CEIL(int16_t, int32_t) +DEF_AVG_CEIL(int32_t, int64_t) + #ifdef HAS_INT128 DEF_AVG_FLOOR(uint64_t, uint128_t) DEF_AVG_FLOOR(int64_t, int128_t) + + DEF_AVG_CEIL(uint64_t, uint128_t) + DEF_AVG_CEIL(int64_t, int128_t) #endif #define AVG_FLOOR_FUNC(T) test_##T##_avg_floor #define AVG_FLOOR_FUNC_WRAP(T) AVG_FLOOR_FUNC(T) +#define AVG_CEIL_FUNC(T) test_##T##_avg_ceil +#define AVG_CEIL_FUNC_WRAP(T) AVG_CEIL_FUNC(T) + #define TEST_BINARY_VX_SIGNED_0(T) \ DEF_VX_BINARY_CASE_0_WRAP(T, +, add) \ DEF_VX_BINARY_CASE_0_WRAP(T, -, sub) \ @@ -388,6 +409,7 @@ DEF_AVG_FLOOR(int32_t, int64_t) DEF_VX_BINARY_CASE_2_WRAP(T, SAT_S_ADD_FUNC(T), sat_add) \ DEF_VX_BINARY_CASE_2_WRAP(T, SAT_S_SUB_FUNC(T), sat_sub) \ DEF_VX_BINARY_CASE_2_WRAP(T, AVG_FLOOR_FUNC_WRAP(T), avg_floor) \ + DEF_VX_BINARY_CASE_2_WRAP(T, AVG_CEIL_FUNC_WRAP(T), avg_ceil) \ #define TEST_BINARY_VX_UNSIGNED_0(T) \ DEF_VX_BINARY_CASE_0_WRAP(T, +, add) \ @@ -405,5 +427,6 @@ DEF_AVG_FLOOR(int32_t, int64_t) DEF_VX_BINARY_CASE_2_WRAP(T, SAT_U_ADD_FUNC(T), sat_add) \ DEF_VX_BINARY_CASE_2_WRAP(T, SAT_U_SUB_FUNC(T), sat_sub) \ DEF_VX_BINARY_CASE_2_WRAP(T, AVG_FLOOR_FUNC_WRAP(T), avg_floor) \ + DEF_VX_BINARY_CASE_2_WRAP(T, AVG_CEIL_FUNC_WRAP(T), avg_ceil) \ #endif diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx_binary_data.h b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx_binary_data.h index 6847309..626347c 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx_binary_data.h +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx_binary_data.h @@ -5298,4 +5298,396 @@ int64_t TEST_BINARY_DATA(int64_t, avg_floor)[][3][N] = }, }; +uint8_t TEST_BINARY_DATA(uint8_t, avg_ceil)[][3][N] = +{ + { + { 0 }, + { + 2, 2, 2, 2, + 1, 1, 1, 1, + 0, 0, 0, 0, + 4, 4, 4, 4, + }, + { + 1, 1, 1, 1, + 1, 1, 1, 1, + 0, 0, 0, 0, + 2, 2, 2, 2, + }, + }, + { + { 127 }, + { + 127, 127, 127, 127, + 128, 128, 128, 128, + 255, 255, 255, 255, + 1, 1, 1, 1, + }, + { + 127, 127, 127, 127, + 128, 128, 128, 128, + 191, 191, 191, 191, + 64, 64, 64, 64, + }, + }, + { + { 255 }, + { + 0, 0, 0, 0, + 255, 255, 255, 255, + 254, 254, 254, 254, + 1, 1, 1, 1, + }, + { + 128, 128, 128, 128, + 255, 255, 255, 255, + 255, 255, 255, 255, + 128, 128, 128, 128, + }, + }, +}; + +uint16_t TEST_BINARY_DATA(uint16_t, avg_ceil)[][3][N] = +{ + { + { 0 }, + { + 2, 2, 2, 2, + 1, 1, 1, 1, + 0, 0, 0, 0, + 4, 4, 4, 4, + }, + { + 1, 1, 1, 1, + 1, 1, 1, 1, + 0, 0, 0, 0, + 2, 2, 2, 2, + }, + }, + { + { 32767 }, + { + 32767, 32767, 32767, 32767, + 32768, 32768, 32768, 32768, + 65535, 65535, 65535, 65535, + 1, 1, 1, 1, + }, + { + 32767, 32767, 32767, 32767, + 32768, 32768, 32768, 32768, + 49151, 49151, 49151, 49151, + 16384, 16384, 16384, 16384, + }, + }, + { + { 65535 }, + { + 0, 0, 0, 0, + 65535, 65535, 65535, 65535, + 65534, 65534, 65534, 65534, + 1, 1, 1, 1, + }, + { + 32768, 32768, 32768, 32768, + 65535, 65535, 65535, 65535, + 65535, 65535, 65535, 65535, + 32768, 32768, 32768, 32768, + }, + }, +}; + +uint32_t TEST_BINARY_DATA(uint32_t, avg_ceil)[][3][N] = +{ + { + { 0 }, + { + 2, 2, 2, 2, + 1, 1, 1, 1, + 0, 0, 0, 0, + 4, 4, 4, 4, + }, + { + 1, 1, 1, 1, + 1, 1, 1, 1, + 0, 0, 0, 0, + 2, 2, 2, 2, + }, + }, + { + { 2147483647 }, + { + 2147483647, 2147483647, 2147483647, 2147483647, + 2147483648, 2147483648, 2147483648, 2147483648, + 4294967295, 4294967295, 4294967295, 4294967295, + 1, 1, 1, 1, + }, + { + 2147483647, 2147483647, 2147483647, 2147483647, + 2147483648, 2147483648, 2147483648, 2147483648, + 3221225471, 3221225471, 3221225471, 3221225471, + 1073741824, 1073741824, 1073741824, 1073741824, + }, + }, + { + { 4294967295 }, + { + 0, 0, 0, 0, + 4294967295, 4294967295, 4294967295, 4294967295, + 4294967294, 4294967294, 4294967294, 4294967294, + 1, 1, 1, 1, + }, + { + 2147483648, 2147483648, 2147483648, 2147483648, + 4294967295, 4294967295, 4294967295, 4294967295, + 4294967295, 4294967295, 4294967295, 4294967295, + 2147483648, 2147483648, 2147483648, 2147483648, + }, + }, +}; + +uint64_t TEST_BINARY_DATA(uint64_t, avg_ceil)[][3][N] = +{ + { + { 0 }, + { + 2, 2, 2, 2, + 1, 1, 1, 1, + 0, 0, 0, 0, + 4, 4, 4, 4, + }, + { + 1, 1, 1, 1, + 1, 1, 1, 1, + 0, 0, 0, 0, + 2, 2, 2, 2, + }, + }, + { + { 9223372036854775807ull }, + { + 9223372036854775807ull, 9223372036854775807ull, 9223372036854775807ull, 9223372036854775807ull, + 9223372036854775808ull, 9223372036854775808ull, 9223372036854775808ull, 9223372036854775808ull, + 18446744073709551615ull, 18446744073709551615ull, 18446744073709551615ull, 18446744073709551615ull, + 1, 1, 1, 1, + }, + { + 9223372036854775807ull, 9223372036854775807ull, 9223372036854775807ull, 9223372036854775807ull, + 9223372036854775808ull, 9223372036854775808ull, 9223372036854775808ull, 9223372036854775808ull, + 13835058055282163711ull, 13835058055282163711ull, 13835058055282163711ull, 13835058055282163711ull, + 4611686018427387904ull, 4611686018427387904ull, 4611686018427387904ull, 4611686018427387904ull, + }, + }, + { + { 18446744073709551615ull }, + { + 0, 0, 0, 0, + 18446744073709551615ull, 18446744073709551615ull, 18446744073709551615ull, 18446744073709551615ull, + 18446744073709551614ull, 18446744073709551614ull, 18446744073709551614ull, 18446744073709551614ull, + 1, 1, 1, 1, + }, + { + 9223372036854775808ull, 9223372036854775808ull, 9223372036854775808ull, 9223372036854775808ull, + 18446744073709551615ull, 18446744073709551615ull, 18446744073709551615ull, 18446744073709551615ull, + 18446744073709551615ull, 18446744073709551615ull, 18446744073709551615ull, 18446744073709551615ull, + 9223372036854775808ull, 9223372036854775808ull, 9223372036854775808ull, 9223372036854775808ull, + }, + }, +}; + +int8_t TEST_BINARY_DATA(int8_t, avg_ceil)[][3][N] = +{ + { + { 0 }, + { + 2, 2, 2, 2, + 1, 1, 1, 1, + 0, 0, 0, 0, + 4, 4, 4, 4, + }, + { + 1, 1, 1, 1, + 1, 1, 1, 1, + 0, 0, 0, 0, + 2, 2, 2, 2, + }, + }, + { + { 127 }, + { + 127, 127, 127, 127, + -128, -128, -128, -128, + -127, -127, -127, -127, + 1, 1, 1, 1, + }, + { + 127, 127, 127, 127, + 0, 0, 0, 0, + 0, 0, 0, 0, + 64, 64, 64, 64, + }, + }, + { + {-128 }, + { + 0, 0, 0, 0, + -128, -128, -128, -128, + 126, 126, 126, 126, + 127, 127, 127, 127, + }, + { + -64, -64, -64, -64, + -128, -128, -128, -128, + -1, -1, -1, -1, + 0, 0, 0, 0, + }, + }, +}; + +int16_t TEST_BINARY_DATA(int16_t, avg_ceil)[][3][N] = +{ + { + { 0 }, + { + 2, 2, 2, 2, + 1, 1, 1, 1, + 0, 0, 0, 0, + 4, 4, 4, 4, + }, + { + 1, 1, 1, 1, + 1, 1, 1, 1, + 0, 0, 0, 0, + 2, 2, 2, 2, + }, + }, + { + { 32767 }, + { + 32767, 32767, 32767, 32767, + -32768, -32768, -32768, -32768, + -32767, -32767, -32767, -32767, + 1, 1, 1, 1, + }, + { + 32767, 32767, 32767, 32767, + 0, 0, 0, 0, + 0, 0, 0, 0, + 16384, 16384, 16384, 16384, + }, + }, + { + {-32768 }, + { + 0, 0, 0, 0, + -32768, -32768, -32768, -32768, + 32766, 32766, 32766, 32766, + 32767, 32767, 32767, 32767, + }, + { + -16384, -16384, -16384, -16384, + -32768, -32768, -32768, -32768, + -1, -1, -1, -1, + 0, 0, 0, 0, + }, + }, +}; + +int32_t TEST_BINARY_DATA(int32_t, avg_ceil)[][3][N] = +{ + { + { 0 }, + { + 2, 2, 2, 2, + 1, 1, 1, 1, + 0, 0, 0, 0, + 4, 4, 4, 4, + }, + { + 1, 1, 1, 1, + 1, 1, 1, 1, + 0, 0, 0, 0, + 2, 2, 2, 2, + }, + }, + { + { 2147483647 }, + { + 2147483647, 2147483647, 2147483647, 2147483647, + -2147483648, -2147483648, -2147483648, -2147483648, + -2147483647, -2147483647, -2147483647, -2147483647, + 1, 1, 1, 1, + }, + { + 2147483647, 2147483647, 2147483647, 2147483647, + 0, 0, 0, 0, + 0, 0, 0, 0, + 1073741824, 1073741824, 1073741824, 1073741824, + }, + }, + { + {-2147483648 }, + { + 0, 0, 0, 0, + -2147483648, -2147483648, -2147483648, -2147483648, + 2147483646, 2147483646, 2147483646, 2147483646, + 2147483647, 2147483647, 2147483647, 2147483647, + }, + { + -1073741824, -1073741824, -1073741824, -1073741824, + -2147483648, -2147483648, -2147483648, -2147483648, + -1, -1, -1, -1, + 0, 0, 0, 0, + }, + }, +}; + +int64_t TEST_BINARY_DATA(int64_t, avg_ceil)[][3][N] = +{ + { + { 0 }, + { + 2, 2, 2, 2, + 1, 1, 1, 1, + 0, 0, 0, 0, + 4, 4, 4, 4, + }, + { + 1, 1, 1, 1, + 1, 1, 1, 1, + 0, 0, 0, 0, + 2, 2, 2, 2, + }, + }, + { + { 9223372036854775807ull }, + { + 9223372036854775807ull, 9223372036854775807ull, 9223372036854775807ull, 9223372036854775807ull, + -9223372036854775808ull, -9223372036854775808ull, -9223372036854775808ull, -9223372036854775808ull, + -9223372036854775807ull, -9223372036854775807ull, -9223372036854775807ull, -9223372036854775807ull, + 1, 1, 1, 1, + }, + { + 9223372036854775807ull, 9223372036854775807ull, 9223372036854775807ull, 9223372036854775807ull, + 0, 0, 0, 0, + 0, 0, 0, 0, + 4611686018427387904ull, 4611686018427387904ull, 4611686018427387904ull, 4611686018427387904ull, + }, + }, + { + {-9223372036854775808ull }, + { + 0, 0, 0, 0, + -9223372036854775808ull, -9223372036854775808ull, -9223372036854775808ull, -9223372036854775808ull, + 9223372036854775806ull, 9223372036854775806ull, 9223372036854775806ull, 9223372036854775806ull, + 9223372036854775807ull, 9223372036854775807ull, 9223372036854775807ull, 9223372036854775807ull, + }, + { + -4611686018427387904ull, -4611686018427387904ull, -4611686018427387904ull, -4611686018427387904ull, + -9223372036854775808ull, -9223372036854775808ull, -9223372036854775808ull, -9223372036854775808ull, + -1, -1, -1, -1, + 0, 0, 0, 0, + }, + }, +}; + #endif diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx_vaadd-run-2-i16.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx_vaadd-run-2-i16.c new file mode 100644 index 0000000..8def643 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx_vaadd-run-2-i16.c @@ -0,0 +1,17 @@ +/* { dg-do run { target { riscv_v } } } */ +/* { dg-additional-options "-std=c99 --param=gpr2vr-cost=0" } */ + +#include "vx_binary.h" +#include "vx_binary_data.h" + +#define T int16_t +#define NAME avg_ceil +#define FUNC AVG_CEIL_FUNC_WRAP(T) +#define TEST_DATA TEST_BINARY_DATA_WRAP(T, NAME) + +DEF_VX_BINARY_CASE_2_WRAP(T, FUNC, NAME) + +#define TEST_RUN(T, NAME, out, in, x, n) \ + RUN_VX_BINARY_CASE_2_WRAP(T, NAME, FUNC, out, in, x, n) + +#include "vx_binary_run.h" diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx_vaadd-run-2-i32.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx_vaadd-run-2-i32.c new file mode 100644 index 0000000..d9ca67d --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx_vaadd-run-2-i32.c @@ -0,0 +1,17 @@ +/* { dg-do run { target { riscv_v } } } */ +/* { dg-additional-options "-std=c99 --param=gpr2vr-cost=0" } */ + +#include "vx_binary.h" +#include "vx_binary_data.h" + +#define T int32_t +#define NAME avg_ceil +#define FUNC AVG_CEIL_FUNC_WRAP(T) +#define TEST_DATA TEST_BINARY_DATA_WRAP(T, NAME) + +DEF_VX_BINARY_CASE_2_WRAP(T, FUNC, NAME) + +#define TEST_RUN(T, NAME, out, in, x, n) \ + RUN_VX_BINARY_CASE_2_WRAP(T, NAME, FUNC, out, in, x, n) + +#include "vx_binary_run.h" diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx_vaadd-run-2-i64.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx_vaadd-run-2-i64.c new file mode 100644 index 0000000..313109a --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx_vaadd-run-2-i64.c @@ -0,0 +1,17 @@ +/* { dg-do run { target { riscv_v && rv64 } } } */ +/* { dg-additional-options "-std=c99 --param=gpr2vr-cost=0" } */ + +#include "vx_binary.h" +#include "vx_binary_data.h" + +#define T int64_t +#define NAME avg_ceil +#define FUNC AVG_CEIL_FUNC_WRAP(T) +#define TEST_DATA TEST_BINARY_DATA_WRAP(T, NAME) + +DEF_VX_BINARY_CASE_2_WRAP(T, FUNC, NAME) + +#define TEST_RUN(T, NAME, out, in, x, n) \ + RUN_VX_BINARY_CASE_2_WRAP(T, NAME, FUNC, out, in, x, n) + +#include "vx_binary_run.h" diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx_vaadd-run-2-i8.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx_vaadd-run-2-i8.c new file mode 100644 index 0000000..47e4a5d7 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx_vaadd-run-2-i8.c @@ -0,0 +1,17 @@ +/* { dg-do run { target { riscv_v } } } */ +/* { dg-additional-options "-std=c99 --param=gpr2vr-cost=0" } */ + +#include "vx_binary.h" +#include "vx_binary_data.h" + +#define T int8_t +#define NAME avg_ceil +#define FUNC AVG_CEIL_FUNC_WRAP(T) +#define TEST_DATA TEST_BINARY_DATA_WRAP(T, NAME) + +DEF_VX_BINARY_CASE_2_WRAP(T, FUNC, NAME) + +#define TEST_RUN(T, NAME, out, in, x, n) \ + RUN_VX_BINARY_CASE_2_WRAP(T, NAME, FUNC, out, in, x, n) + +#include "vx_binary_run.h" diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx_vaadd-run-2-u16.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx_vaadd-run-2-u16.c new file mode 100644 index 0000000..6297672 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx_vaadd-run-2-u16.c @@ -0,0 +1,17 @@ +/* { dg-do run { target { riscv_v } } } */ +/* { dg-additional-options "-std=c99 --param=gpr2vr-cost=0" } */ + +#include "vx_binary.h" +#include "vx_binary_data.h" + +#define T uint16_t +#define NAME avg_ceil +#define FUNC AVG_CEIL_FUNC_WRAP(T) +#define TEST_DATA TEST_BINARY_DATA_WRAP(T, NAME) + +DEF_VX_BINARY_CASE_2_WRAP(T, FUNC, NAME) + +#define TEST_RUN(T, NAME, out, in, x, n) \ + RUN_VX_BINARY_CASE_2_WRAP(T, NAME, FUNC, out, in, x, n) + +#include "vx_binary_run.h" diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx_vaadd-run-2-u32.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx_vaadd-run-2-u32.c new file mode 100644 index 0000000..30db24b --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx_vaadd-run-2-u32.c @@ -0,0 +1,17 @@ +/* { dg-do run { target { riscv_v } } } */ +/* { dg-additional-options "-std=c99 --param=gpr2vr-cost=0" } */ + +#include "vx_binary.h" +#include "vx_binary_data.h" + +#define T uint32_t +#define NAME avg_ceil +#define FUNC AVG_CEIL_FUNC_WRAP(T) +#define TEST_DATA TEST_BINARY_DATA_WRAP(T, NAME) + +DEF_VX_BINARY_CASE_2_WRAP(T, FUNC, NAME) + +#define TEST_RUN(T, NAME, out, in, x, n) \ + RUN_VX_BINARY_CASE_2_WRAP(T, NAME, FUNC, out, in, x, n) + +#include "vx_binary_run.h" diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx_vaadd-run-2-u64.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx_vaadd-run-2-u64.c new file mode 100644 index 0000000..db3c911 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx_vaadd-run-2-u64.c @@ -0,0 +1,17 @@ +/* { dg-do run { target { riscv_v && rv64 } } } */ +/* { dg-additional-options "-std=c99 --param=gpr2vr-cost=0" } */ + +#include "vx_binary.h" +#include "vx_binary_data.h" + +#define T uint64_t +#define NAME avg_ceil +#define FUNC AVG_CEIL_FUNC_WRAP(T) +#define TEST_DATA TEST_BINARY_DATA_WRAP(T, NAME) + +DEF_VX_BINARY_CASE_2_WRAP(T, FUNC, NAME) + +#define TEST_RUN(T, NAME, out, in, x, n) \ + RUN_VX_BINARY_CASE_2_WRAP(T, NAME, FUNC, out, in, x, n) + +#include "vx_binary_run.h" diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx_vaadd-run-2-u8.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx_vaadd-run-2-u8.c new file mode 100644 index 0000000..a7755f0 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vx_vf/vx_vaadd-run-2-u8.c @@ -0,0 +1,17 @@ +/* { dg-do run { target { riscv_v } } } */ +/* { dg-additional-options "-std=c99 --param=gpr2vr-cost=0" } */ + +#include "vx_binary.h" +#include "vx_binary_data.h" + +#define T uint8_t +#define NAME avg_ceil +#define FUNC AVG_CEIL_FUNC_WRAP(T) +#define TEST_DATA TEST_BINARY_DATA_WRAP(T, NAME) + +DEF_VX_BINARY_CASE_2_WRAP(T, FUNC, NAME) + +#define TEST_RUN(T, NAME, out, in, x, n) \ + RUN_VX_BINARY_CASE_2_WRAP(T, NAME, FUNC, out, in, x, n) + +#include "vx_binary_run.h" diff --git a/gcc/testsuite/gcc.target/riscv/sat/sat_u_mul-1-u16-from-u32.c b/gcc/testsuite/gcc.target/riscv/sat/sat_u_mul-1-u16-from-u32.c new file mode 100644 index 0000000..7409232 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/sat/sat_u_mul-1-u16-from-u32.c @@ -0,0 +1,11 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gc -mabi=lp64d -fdump-tree-optimized" } */ + +#include "sat_arith.h" + +#define NT uint16_t +#define WT uint32_t + +DEF_SAT_U_MUL_FMT_1_WRAP(NT, WT) + +/* { dg-final { scan-tree-dump-times ".SAT_MUL" 1 "optimized" } } */ diff --git a/gcc/testsuite/gcc.target/riscv/sat/sat_u_mul-1-u8-from-u16.c b/gcc/testsuite/gcc.target/riscv/sat/sat_u_mul-1-u8-from-u16.c new file mode 100644 index 0000000..ec79e5d --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/sat/sat_u_mul-1-u8-from-u16.c @@ -0,0 +1,11 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gc -mabi=lp64d -fdump-tree-optimized" } */ + +#include "sat_arith.h" + +#define NT uint8_t +#define WT uint16_t + +DEF_SAT_U_MUL_FMT_1_WRAP(NT, WT) + +/* { dg-final { scan-tree-dump-times ".SAT_MUL" 1 "optimized" } } */ diff --git a/gcc/testsuite/gcc.target/riscv/sat/sat_u_mul-1-u8-from-u32.c b/gcc/testsuite/gcc.target/riscv/sat/sat_u_mul-1-u8-from-u32.c new file mode 100644 index 0000000..eb95184 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/sat/sat_u_mul-1-u8-from-u32.c @@ -0,0 +1,11 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gc -mabi=lp64d -fdump-tree-optimized" } */ + +#include "sat_arith.h" + +#define NT uint8_t +#define WT uint32_t + +DEF_SAT_U_MUL_FMT_1_WRAP(NT, WT) + +/* { dg-final { scan-tree-dump-times ".SAT_MUL" 1 "optimized" } } */ diff --git a/gcc/testsuite/gcc.target/riscv/sat/sat_u_mul-2-u16-from-u64.c b/gcc/testsuite/gcc.target/riscv/sat/sat_u_mul-2-u16-from-u64.c new file mode 100644 index 0000000..b1d33a9 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/sat/sat_u_mul-2-u16-from-u64.c @@ -0,0 +1,11 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gc -mabi=lp64d -fdump-tree-optimized" } */ + +#include "sat_arith.h" + +#define NT uint16_t +#define WT uint64_t + +DEF_SAT_U_MUL_FMT_1_WRAP(NT, WT) + +/* { dg-final { scan-tree-dump-times ".SAT_MUL" 1 "optimized" } } */ diff --git a/gcc/testsuite/gcc.target/riscv/sat/sat_u_mul-2-u32-from-u64.c b/gcc/testsuite/gcc.target/riscv/sat/sat_u_mul-2-u32-from-u64.c new file mode 100644 index 0000000..af5ffecf --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/sat/sat_u_mul-2-u32-from-u64.c @@ -0,0 +1,11 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gc -mabi=lp64d -fdump-tree-optimized" } */ + +#include "sat_arith.h" + +#define NT uint32_t +#define WT uint64_t + +DEF_SAT_U_MUL_FMT_1_WRAP(NT, WT) + +/* { dg-final { scan-tree-dump-times ".SAT_MUL" 1 "optimized" } } */ diff --git a/gcc/testsuite/gcc.target/riscv/sat/sat_u_mul-2-u8-from-u64.c b/gcc/testsuite/gcc.target/riscv/sat/sat_u_mul-2-u8-from-u64.c new file mode 100644 index 0000000..d65cab0 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/sat/sat_u_mul-2-u8-from-u64.c @@ -0,0 +1,11 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gc -mabi=lp64d -fdump-tree-optimized" } */ + +#include "sat_arith.h" + +#define NT uint8_t +#define WT uint64_t + +DEF_SAT_U_MUL_FMT_1_WRAP(NT, WT) + +/* { dg-final { scan-tree-dump-times ".SAT_MUL" 1 "optimized" } } */ diff --git a/gcc/testsuite/gcc.target/riscv/sat/sat_u_mul-run-1-u16-from-u32.c b/gcc/testsuite/gcc.target/riscv/sat/sat_u_mul-run-1-u16-from-u32.c new file mode 100644 index 0000000..e212391 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/sat/sat_u_mul-run-1-u16-from-u32.c @@ -0,0 +1,16 @@ +/* { dg-do run { target { rv32 || rv64 } } } */ +/* { dg-additional-options "-std=c99" } */ + +#include "sat_arith.h" +#include "sat_arith_data.h" + +#define NT uint16_t +#define WT uint32_t +#define NAME usmul +#define DATA TEST_BINARY_DATA_WRAP(NT, NAME) +#define T TEST_BINARY_STRUCT_DECL_WRAP(NT, NAME) +#define RUN_BINARY(x, y) RUN_SAT_U_MUL_FMT_1_WRAP(NT, WT, x, y) + +DEF_SAT_U_MUL_FMT_1_WRAP(NT, WT) + +#include "scalar_sat_binary_run_xxx.h" diff --git a/gcc/testsuite/gcc.target/riscv/sat/sat_u_mul-run-1-u16-from-u64.c b/gcc/testsuite/gcc.target/riscv/sat/sat_u_mul-run-1-u16-from-u64.c index 065afb8..79d3fb3 100644 --- a/gcc/testsuite/gcc.target/riscv/sat/sat_u_mul-run-1-u16-from-u64.c +++ b/gcc/testsuite/gcc.target/riscv/sat/sat_u_mul-run-1-u16-from-u64.c @@ -1,4 +1,4 @@ -/* { dg-do run { target { rv32 } } } */ +/* { dg-do run { target { rv32 || rv64 } } } */ /* { dg-additional-options "-std=c99" } */ #include "sat_arith.h" diff --git a/gcc/testsuite/gcc.target/riscv/sat/sat_u_mul-run-1-u32-from-u64.c b/gcc/testsuite/gcc.target/riscv/sat/sat_u_mul-run-1-u32-from-u64.c index 062bbc9..ad63db3 100644 --- a/gcc/testsuite/gcc.target/riscv/sat/sat_u_mul-run-1-u32-from-u64.c +++ b/gcc/testsuite/gcc.target/riscv/sat/sat_u_mul-run-1-u32-from-u64.c @@ -1,4 +1,4 @@ -/* { dg-do run { target { rv32 } } } */ +/* { dg-do run { target { rv32 || rv64 } } } */ /* { dg-additional-options "-std=c99" } */ #include "sat_arith.h" diff --git a/gcc/testsuite/gcc.target/riscv/sat/sat_u_mul-run-1-u8-from-u16.c b/gcc/testsuite/gcc.target/riscv/sat/sat_u_mul-run-1-u8-from-u16.c new file mode 100644 index 0000000..f5a0ab5 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/sat/sat_u_mul-run-1-u8-from-u16.c @@ -0,0 +1,16 @@ +/* { dg-do run { target { rv32 || rv64 } } } */ +/* { dg-additional-options "-std=c99" } */ + +#include "sat_arith.h" +#include "sat_arith_data.h" + +#define NT uint8_t +#define WT uint16_t +#define NAME usmul +#define DATA TEST_BINARY_DATA_WRAP(NT, NAME) +#define T TEST_BINARY_STRUCT_DECL_WRAP(NT, NAME) +#define RUN_BINARY(x, y) RUN_SAT_U_MUL_FMT_1_WRAP(NT, WT, x, y) + +DEF_SAT_U_MUL_FMT_1_WRAP(NT, WT) + +#include "scalar_sat_binary_run_xxx.h" diff --git a/gcc/testsuite/gcc.target/riscv/sat/sat_u_mul-run-1-u8-from-u32.c b/gcc/testsuite/gcc.target/riscv/sat/sat_u_mul-run-1-u8-from-u32.c new file mode 100644 index 0000000..32074a4 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/sat/sat_u_mul-run-1-u8-from-u32.c @@ -0,0 +1,16 @@ +/* { dg-do run { target { rv32 || rv64 } } } */ +/* { dg-additional-options "-std=c99" } */ + +#include "sat_arith.h" +#include "sat_arith_data.h" + +#define NT uint8_t +#define WT uint32_t +#define NAME usmul +#define DATA TEST_BINARY_DATA_WRAP(NT, NAME) +#define T TEST_BINARY_STRUCT_DECL_WRAP(NT, NAME) +#define RUN_BINARY(x, y) RUN_SAT_U_MUL_FMT_1_WRAP(NT, WT, x, y) + +DEF_SAT_U_MUL_FMT_1_WRAP(NT, WT) + +#include "scalar_sat_binary_run_xxx.h" diff --git a/gcc/testsuite/gcc.target/riscv/sat/sat_u_mul-run-1-u8-from-u64.c b/gcc/testsuite/gcc.target/riscv/sat/sat_u_mul-run-1-u8-from-u64.c index e6f632b..16ca905 100644 --- a/gcc/testsuite/gcc.target/riscv/sat/sat_u_mul-run-1-u8-from-u64.c +++ b/gcc/testsuite/gcc.target/riscv/sat/sat_u_mul-run-1-u8-from-u64.c @@ -1,4 +1,4 @@ -/* { dg-do run { target { rv32 } } } */ +/* { dg-do run { target { rv32 || rv64 } } } */ /* { dg-additional-options "-std=c99" } */ #include "sat_arith.h" diff --git a/gcc/testsuite/gcc.target/s390/spaceship-fp-1.c b/gcc/testsuite/gcc.target/s390/spaceship-fp-1.c new file mode 100644 index 0000000..56c3d77 --- /dev/null +++ b/gcc/testsuite/gcc.target/s390/spaceship-fp-1.c @@ -0,0 +1,23 @@ +/* { dg-do compile { target lp64 } } */ +/* { dg-options "-O2 -mzarch -march=z13 -fdump-tree-optimized" } */ +/* { dg-final { scan-tree-dump-times {\.SPACESHIP \([^,]+, [^,]+, 2\)} 3 optimized } } */ +/* { dg-final { scan-assembler-times {\tk[edx]br\t} 3 } } */ +/* { dg-final { scan-assembler-not {\tbrc} } } */ +/* { dg-final { scan-assembler-not {\tc[edx]br\t} } } */ + +#define TEST(T, U) \ + int test_##U (T x, T y) \ + { \ + if (x == y) \ + return 0; \ + else if (x < y) \ + return -1; \ + else if (x > y) \ + return 1; \ + else \ + return 2; \ + } + +TEST (float, float) +TEST (double, double) +TEST (long double, longdouble) diff --git a/gcc/testsuite/gcc.target/s390/spaceship-fp-2.c b/gcc/testsuite/gcc.target/s390/spaceship-fp-2.c new file mode 100644 index 0000000..0c6e6b6 --- /dev/null +++ b/gcc/testsuite/gcc.target/s390/spaceship-fp-2.c @@ -0,0 +1,23 @@ +/* { dg-do compile { target lp64 } } */ +/* { dg-options "-O2 -mzarch -march=z13 -ffinite-math-only -fdump-tree-optimized" } */ +/* { dg-final { scan-tree-dump-times {\.SPACESHIP \([^,]+, [^,]+, 2\)} 3 optimized } } */ +/* { dg-final { scan-assembler-times {\tc[edx]br\t} 3 } } */ +/* { dg-final { scan-assembler-not {\tbrc} } } */ +/* { dg-final { scan-assembler-not {\tk[edx]br\t} } } */ + +#define TEST(T, U) \ + int test_##U (T x, T y) \ + { \ + if (x == y) \ + return 0; \ + else if (x < y) \ + return -1; \ + else if (x > y) \ + return 1; \ + else \ + return 2; \ + } + +TEST (float, float) +TEST (double, double) +TEST (long double, longdouble) diff --git a/gcc/testsuite/gcc.target/s390/spaceship-fp-3.c b/gcc/testsuite/gcc.target/s390/spaceship-fp-3.c new file mode 100644 index 0000000..2f567d1 --- /dev/null +++ b/gcc/testsuite/gcc.target/s390/spaceship-fp-3.c @@ -0,0 +1,23 @@ +/* { dg-do compile { target lp64 } } */ +/* { dg-options "-O2 -mzarch -march=z13 -fdump-tree-optimized" } */ +/* { dg-final { scan-tree-dump-times {\.SPACESHIP \([^,]+, [^,]+, 42\)} 3 optimized } } */ +/* { dg-final { scan-assembler-times {\tk[edx]br\t} 3 } } */ +/* { dg-final { scan-assembler-not {\tbrc} } } */ +/* { dg-final { scan-assembler-not {\tc[edx]br\t} } } */ + +#define TEST(T, U) \ + int test_##U (T x, T y) \ + { \ + if (x == y) \ + return 0; \ + else if (x < y) \ + return -1; \ + else if (x > y) \ + return 1; \ + else \ + return 42; \ + } + +TEST (float, float) +TEST (double, double) +TEST (long double, longdouble) diff --git a/gcc/testsuite/gcc.target/s390/spaceship-fp-4.c b/gcc/testsuite/gcc.target/s390/spaceship-fp-4.c new file mode 100644 index 0000000..4531ecb --- /dev/null +++ b/gcc/testsuite/gcc.target/s390/spaceship-fp-4.c @@ -0,0 +1,53 @@ +/* { dg-do compile { target lp64 } } */ +/* { dg-options "-O2 -mzarch -march=z13 -fdump-tree-optimized" } */ +/* { dg-final { scan-tree-dump-times {\.SPACESHIP \([^,]+, [^,]+, 0\)} 3 optimized } } */ +/* { dg-final { scan-assembler-times {\tk[edx]br\t} 3 } } */ +/* { dg-final { scan-assembler-not {\tloc} } } */ +/* { dg-final { scan-assembler-not {\tbrc} } } */ +/* { dg-final { scan-assembler-not {\tc[edx]br\t} } } */ + +/* By time of writing this we emit + + kebr %f0,%f2 + jo .L2 + je .L3 + jnh .L10 + jg f3@PLT +.L10: + jg f2@PLT +.L3: + jg f1@PLT +.L2: + jg f4@PLT + + which is not optimal. Instead we could fold the conditional branch with the + unconditional into something along the lines + + kebr %f0,%f2 + jo f4@PLT + je f1@PLT + jnh f2@PLT + jg f3@PLT +*/ + +void f1 (void); +void f2 (void); +void f3 (void); +void f4 (void); + +#define TEST(T, U) \ + void test_##U (T x, T y) \ + { \ + if (x == y) \ + f1 (); \ + else if (x < y) \ + f2 (); \ + else if (x > y) \ + f3 (); \ + else \ + f4 (); \ + } + +TEST (float, float) +TEST (double, double) +TEST (long double, longdouble) diff --git a/gcc/testsuite/gcc.target/s390/spaceship-int-1.c b/gcc/testsuite/gcc.target/s390/spaceship-int-1.c new file mode 100644 index 0000000..8ca2677 --- /dev/null +++ b/gcc/testsuite/gcc.target/s390/spaceship-int-1.c @@ -0,0 +1,30 @@ +/* { dg-do compile { target lp64 } } */ +/* { dg-options "-O2 -mzarch -march=z13 -fdump-tree-optimized" } */ +/* { dg-final { scan-tree-dump-times {\.SPACESHIP \([^,]+, [^,]+, -1\)} 4 optimized } } */ +/* { dg-final { scan-tree-dump-times {\.SPACESHIP \([^,]+, [^,]+, 1\)} 5 optimized } } */ +/* { dg-final { scan-assembler-times {\tlhi} 9 } } */ +/* { dg-final { scan-assembler-times {\tloc} 18 } } */ + +#define TEST(T, U) \ + int test_##U (T x, T y) \ + { \ + if (x == y) \ + return 0; \ + else if (x < y) \ + return -1; \ + else \ + return 1; \ + } + +TEST(signed char, schar) +TEST(unsigned char, uchar) +TEST(char, char) + +TEST(short, sshort) +TEST(unsigned short, ushort) + +TEST(int, sint) +TEST(unsigned int, uint) + +TEST(long, slong) +TEST(unsigned long, ulong) diff --git a/gcc/testsuite/gcc.target/s390/spaceship-int-2.c b/gcc/testsuite/gcc.target/s390/spaceship-int-2.c new file mode 100644 index 0000000..5f7975c --- /dev/null +++ b/gcc/testsuite/gcc.target/s390/spaceship-int-2.c @@ -0,0 +1,24 @@ +/* { dg-do compile { target int128 } } */ +/* { dg-options "-O2 -mzarch -march=z13 -fdump-tree-optimized" } */ +/* { dg-final { scan-tree-dump-times {\.SPACESHIP \([^,]+, [^,]+, -1\)} 1 optimized } } */ +/* { dg-final { scan-tree-dump-times {\.SPACESHIP \([^,]+, [^,]+, 1\)} 1 optimized } } */ +/* { dg-final { scan-assembler-times {\tvecg} 1 } } */ +/* { dg-final { scan-assembler-times {\tveclg} 1 } } */ +/* { dg-final { scan-assembler-times {\tvchlgs} 2 } } */ +/* { dg-final { scan-assembler-times {\tvceqgs} 2 } } */ +/* { dg-final { scan-assembler-times {\tlhi} 2 } } */ +/* { dg-final { scan-assembler-times {\tloc} 4 } } */ + +#define TEST(T, U) \ + int test_##U (T x, T y) \ + { \ + if (x == y) \ + return 0; \ + else if (x < y) \ + return -1; \ + else \ + return 1; \ + } + +TEST(__int128, sint128) +TEST(unsigned __int128, uint128) diff --git a/gcc/testsuite/gcc.target/s390/spaceship-int-3.c b/gcc/testsuite/gcc.target/s390/spaceship-int-3.c new file mode 100644 index 0000000..46b0e4a --- /dev/null +++ b/gcc/testsuite/gcc.target/s390/spaceship-int-3.c @@ -0,0 +1,21 @@ +/* { dg-do compile { target int128 } } */ +/* { dg-options "-O2 -march=z17 -fdump-tree-optimized" } */ +/* { dg-final { scan-tree-dump-times {\.SPACESHIP \([^,]+, [^,]+, -1\)} 1 optimized } } */ +/* { dg-final { scan-tree-dump-times {\.SPACESHIP \([^,]+, [^,]+, 1\)} 1 optimized } } */ +/* { dg-final { scan-assembler-times {\tvecq\t} 1 } } */ +/* { dg-final { scan-assembler-times {\tveclq\t} 1 } } */ +/* { dg-final { scan-assembler-times {\tloc} 4 } } */ + +#define TEST(T, U) \ + int test_##U (T x, T y) \ + { \ + if (x == y) \ + return 0; \ + else if (x < y) \ + return -1; \ + else \ + return 1; \ + } + +TEST(__int128, sint128) +TEST(unsigned __int128, uint128) diff --git a/gcc/testsuite/gfortran.dg/class_elemental_1.f90 b/gcc/testsuite/gfortran.dg/class_elemental_1.f90 new file mode 100644 index 0000000..547ae98 --- /dev/null +++ b/gcc/testsuite/gfortran.dg/class_elemental_1.f90 @@ -0,0 +1,35 @@ +! { dg-do run } +! +! PR fortran/121342 +! The polymorphic function result as actual argument used to force the loop +! bounds around the elemental call, altering access to the other arrays. + +program p + implicit none + type :: t + integer :: i + end type + type :: u + integer :: i, a + end type + type(u) :: accum(5) + integer :: a(3:7), k + a = [ (k*k, k=1,5) ] + call s(accum, f(), a) + ! print *, accum%i + ! print *, accum%a + if (any(accum%i /= accum%a)) error stop 1 +contains + elemental subroutine s(l, c, a) + type(u) , intent(out) :: l + class(t) , intent(in) :: c + integer , intent(in) :: a + l%i = c%i + l%a = a + end subroutine + function f() + class(t), allocatable :: f(:) + allocate(f(-1:3)) + f%i = [ (k*k, k=1,5) ] + end function +end program diff --git a/gcc/testsuite/gfortran.dg/split_1.f90 b/gcc/testsuite/gfortran.dg/split_1.f90 new file mode 100644 index 0000000..21659b0 --- /dev/null +++ b/gcc/testsuite/gfortran.dg/split_1.f90 @@ -0,0 +1,28 @@ +! { dg-do run } +program b + character(len=:), allocatable :: input + character(len=2) :: set = ', ' + integer :: p + input = " one,last example," + p = 0 + + call split(input, set, p) + if (p /= 1) STOP 1 + call split(input, set, p) + if (p /= 5) STOP 2 + call split(input, set, p) + if (p /= 10) STOP 3 + call split(input, set, p) + if (p /= 18) STOP 4 + call split(input, set, p) + if (p /= 19) STOP 5 + + call split(input, set, p, .true.) + if (p /= 18) STOP 6 + call split(input, set, p, .true.) + if (p /= 10) STOP 7 + call split(input, set, p, .true.) + if (p /= 5) STOP 8 + call split(input, set, p, .true.) + if (p /= 1) STOP 9 +end program b diff --git a/gcc/testsuite/gfortran.dg/split_2.f90 b/gcc/testsuite/gfortran.dg/split_2.f90 new file mode 100644 index 0000000..9afb30b --- /dev/null +++ b/gcc/testsuite/gfortran.dg/split_2.f90 @@ -0,0 +1,22 @@ +! { dg-do run } +program b + integer, parameter :: ucs4 = selected_char_kind('ISO_10646') + character(kind=ucs4, len=:), allocatable :: input, set + integer :: p = 0 + + input = char(int(z'4f60'), ucs4) // char(int(z'597d'), ucs4) // char(int(z'4f60'), ucs4) // char(int(z'4e16'), ucs4) + set = char(int(z'597d'), ucs4) // char(int(z'4e16'), ucs4) + + call split(input, set, p) + if (p /= 2) stop 1 + call split(input, set, p) + if (p /= 4) stop 2 + call split(input, set, p) + if (p /= 5) stop 3 + call split(input, set, p, .true.) + if (p /= 4) stop 4 + call split(input, set, p, .true.) + if (p /= 2) stop 5 + call split(input, set, p, .true.) + if (p /= 0) stop 6 +end program b diff --git a/gcc/testsuite/gfortran.dg/split_3.f90 b/gcc/testsuite/gfortran.dg/split_3.f90 new file mode 100644 index 0000000..bec3fdc --- /dev/null +++ b/gcc/testsuite/gfortran.dg/split_3.f90 @@ -0,0 +1,11 @@ +! { dg-do run } +! { dg-shouldfail "Fortran runtime error" } + +program b + character(len=:), allocatable :: input + character(len=2) :: set = ', ' + integer :: p + input = " one,last example," + p = -1 + call split(input, set, p) +end program b diff --git a/gcc/testsuite/gfortran.dg/split_4.f90 b/gcc/testsuite/gfortran.dg/split_4.f90 new file mode 100644 index 0000000..a3c27bb --- /dev/null +++ b/gcc/testsuite/gfortran.dg/split_4.f90 @@ -0,0 +1,11 @@ +! { dg-do run } +! { dg-shouldfail "Fortran runtime error" } + +program b + character(len=:), allocatable :: input + character(len=2) :: set = ', ' + integer :: p + input = " one,last example," + p = 0 + call split(input, set, p, .true.) +end program b diff --git a/gcc/testsuite/gm2/errors/fail/badindrtype.mod b/gcc/testsuite/gm2/errors/fail/badindrtype.mod new file mode 100644 index 0000000..b393027 --- /dev/null +++ b/gcc/testsuite/gm2/errors/fail/badindrtype.mod @@ -0,0 +1,16 @@ +MODULE badindrtype ; + + +PROCEDURE init (VAR ch: CHAR) ; +VAR + c: CARDINAL ; +BEGIN + ch := c +END init ; + + +VAR + ch: CHAR ; +BEGIN + init (ch) +END badindrtype. diff --git a/gcc/testsuite/gm2/errors/fail/badindrtype2.mod b/gcc/testsuite/gm2/errors/fail/badindrtype2.mod new file mode 100644 index 0000000..a31303b --- /dev/null +++ b/gcc/testsuite/gm2/errors/fail/badindrtype2.mod @@ -0,0 +1,16 @@ +MODULE badindrtype2 ; + + +PROCEDURE init (VAR ch: CHAR) ; +VAR + c: CARDINAL ; +BEGIN + c := ch +END init ; + + +VAR + ch: CHAR ; +BEGIN + init (ch) +END badindrtype2. diff --git a/gcc/testsuite/gm2/warnings/style/fail/badvarname.mod b/gcc/testsuite/gm2/warnings/style/fail/badvarname.mod new file mode 100644 index 0000000..e589b0d --- /dev/null +++ b/gcc/testsuite/gm2/warnings/style/fail/badvarname.mod @@ -0,0 +1,14 @@ +MODULE badvarname ; + + +PROCEDURE Foo ; +VAR + end: CARDINAL ; +BEGIN + end := 1 +END Foo ; + + +BEGIN + Foo +END badvarname. diff --git a/gcc/testsuite/gm2/warnings/style/fail/warnings-style-fail.exp b/gcc/testsuite/gm2/warnings/style/fail/warnings-style-fail.exp new file mode 100644 index 0000000..f44ed80 --- /dev/null +++ b/gcc/testsuite/gm2/warnings/style/fail/warnings-style-fail.exp @@ -0,0 +1,44 @@ +# Expect driver script for GCC Regression Tests +# Copyright (C) 2025 Free Software Foundation, Inc. + +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCC; see the file COPYING3. If not see +# <http://www.gnu.org/licenses/>. + +# This file was written by Gaius Mulley (gaius.mulley@southwales.ac.uk) +# for GNU Modula-2. + +if $tracelevel then { + strace $tracelevel +} + +# load support procs +load_lib gm2-torture.exp + +gm2_init_pim "${srcdir}/gm2/warnings/style/fail" + +global TORTURE_OPTIONS + +set old_options $TORTURE_OPTIONS +set TORTURE_OPTIONS { { -O0 -g -Werror=style } } + +foreach testcase [lsort [glob -nocomplain $srcdir/$subdir/*.mod]] { + # If we're only testing specific files and this isn't one of them, skip it. + if ![runtest_file_p $runtests $testcase] then { + continue + } + + gm2-torture-fail $testcase +} + +set TORTURE_OPTIONS $old_options diff --git a/gcc/testsuite/lib/profopt.exp b/gcc/testsuite/lib/profopt.exp index b4d244b..81d86c6 100644 --- a/gcc/testsuite/lib/profopt.exp +++ b/gcc/testsuite/lib/profopt.exp @@ -382,6 +382,7 @@ proc profopt-execute { src } { unsupported "$testcase" unset testname_with_flags verbose "$src not supported on this target, skipping it" 3 + cleanup-after-saved-dg-test return } @@ -458,6 +459,7 @@ proc profopt-execute { src } { unsupported "$testcase -fauto-profile: cannot run create_gcov" unset testname_with_flags set status "fail" + cleanup-after-saved-dg-test return } set status [remote_wait "" 300] diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp index e375b1e..7435519 100644 --- a/gcc/testsuite/lib/target-supports.exp +++ b/gcc/testsuite/lib/target-supports.exp @@ -12522,10 +12522,16 @@ proc check_effective_target_aarch64_gas_has_build_attributes { } { # various architecture extensions via the .arch_extension pseudo-op. set exts { - "bf16" "cmpbr" "crc" "crypto" "dotprod" "f32mm" "f64mm" "fp" "fp8" - "fp8dot2" "fp8dot4" "fp8fma" "i8mm" "ls64" "lse" "lut" "sb" "simd" - "sme-b16b16" "sme-f16f16" "sme-i16i64" "sme" "sme2" "sme2p1" "ssve-fp8dot2" - "ssve-fp8dot4" "ssve-fp8fma" "sve-b16b16" "sve" "sve2" + "bf16" "cmpbr" "crc" "crypto" "dotprod" "f32mm" "f64mm" "faminmax" + "fp" "fp8" "fp8dot2" "fp8dot4" "fp8fma" "i8mm" "ls64" "lse" "lut" + "sb" "simd" "sve-b16b16" "sve" "sve2" +} + +# We don't support SME without SVE2, so we'll use armv9 as the base +# archiecture for SME and the features that require it. +set exts_sve2 { + "sme-b16b16" "sme-f16f16" "sme-i16i64" "sme" "sme2" "sme2p1" + "ssve-fp8dot2" "ssve-fp8dot4" "ssve-fp8fma" } foreach { aarch64_ext } $exts { @@ -12542,6 +12548,20 @@ foreach { aarch64_ext } $exts { }] } +foreach { aarch64_ext } $exts_sve2 { + eval [string map [list FUNC $aarch64_ext] { + proc check_effective_target_aarch64_asm_FUNC_ok { } { + if { [istarget aarch64*-*-*] } { + return [check_no_compiler_messages aarch64_FUNC_assembler object { + __asm__ (".arch_extension FUNC"); + } "-march=armv9-a+FUNC"] + } else { + return 0 + } + } + }] +} + proc check_effective_target_aarch64_asm_sve2p1_ok { } { if { [istarget aarch64*-*-*] } { return [check_no_compiler_messages aarch64_sve2p1_assembler object { diff --git a/gcc/tree-if-conv.cc b/gcc/tree-if-conv.cc index ba25c19..b7ce072 100644 --- a/gcc/tree-if-conv.cc +++ b/gcc/tree-if-conv.cc @@ -2291,6 +2291,13 @@ again: *arg0 = new_arg0; *arg1 = new_arg1; *res = new_res; + + /* Update the phi node too. */ + gimple_phi_set_result (phi, new_res); + gimple_phi_arg (phi, 0)->def = new_arg0; + gimple_phi_arg (phi, 0)->def = new_arg1; + update_stmt (phi); + repeated = true; goto again; } diff --git a/gcc/tree-ssa-alias.cc b/gcc/tree-ssa-alias.cc index 4119343..9b028e0 100644 --- a/gcc/tree-ssa-alias.cc +++ b/gcc/tree-ssa-alias.cc @@ -901,7 +901,9 @@ ao_ref_init_from_ptr_and_range (ao_ref *ref, tree ptr, if (TREE_CODE (ptr) == ADDR_EXPR) { ref->base = get_addr_base_and_unit_offset (TREE_OPERAND (ptr, 0), &t); - if (ref->base) + if (ref->base + && coeffs_in_range_p (t, -HOST_WIDE_INT_MAX / BITS_PER_UNIT, + HOST_WIDE_INT_MAX / BITS_PER_UNIT)) ref->offset = BITS_PER_UNIT * t; else { diff --git a/gcc/tree-ssa-loop-ivopts.cc b/gcc/tree-ssa-loop-ivopts.cc index 544a946..879668c 100644 --- a/gcc/tree-ssa-loop-ivopts.cc +++ b/gcc/tree-ssa-loop-ivopts.cc @@ -147,7 +147,7 @@ along with GCC; see the file COPYING3. If not see The average trip count is computed from profile data if it exists. */ -static inline HOST_WIDE_INT +static inline unsigned HOST_WIDE_INT avg_loop_niter (class loop *loop) { HOST_WIDE_INT niter = estimated_stmt_executions_int (loop); @@ -4213,7 +4213,9 @@ adjust_setup_cost (struct ivopts_data *data, int64_t cost, return cost; else if (optimize_loop_for_speed_p (data->current_loop)) { - int64_t niters = (int64_t) avg_loop_niter (data->current_loop); + uint64_t niters = avg_loop_niter (data->current_loop); + if (niters > (uint64_t) cost) + return (round_up_p && cost != 0) ? 1 : 0; return (cost + (round_up_p ? niters - 1 : 0)) / niters; } else @@ -7277,7 +7279,7 @@ create_new_ivs (struct ivopts_data *data, class iv_ca *set) if (data->loop_loc != UNKNOWN_LOCATION) fprintf (dump_file, " at %s:%d", LOCATION_FILE (data->loop_loc), LOCATION_LINE (data->loop_loc)); - fprintf (dump_file, ", " HOST_WIDE_INT_PRINT_DEC " avg niters", + fprintf (dump_file, ", " HOST_WIDE_INT_PRINT_UNSIGNED " avg niters", avg_loop_niter (data->current_loop)); fprintf (dump_file, ", %lu IVs:\n", bitmap_count_bits (set->cands)); EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi) diff --git a/gcc/tree-ssa-reassoc.cc b/gcc/tree-ssa-reassoc.cc index 3c38f3d..c140f76 100644 --- a/gcc/tree-ssa-reassoc.cc +++ b/gcc/tree-ssa-reassoc.cc @@ -7167,9 +7167,10 @@ reassociate_bb (basic_block bb) /* If the target support FMA, rank_ops_for_fma will detect if the chain has fmas and rearrange the ops if so. */ - if (direct_internal_fn_supported_p (IFN_FMA, - TREE_TYPE (lhs), - opt_type) + if (!reassoc_insert_powi_p + && direct_internal_fn_supported_p (IFN_FMA, + TREE_TYPE (lhs), + opt_type) && (rhs_code == PLUS_EXPR || rhs_code == MINUS_EXPR)) { mult_num = rank_ops_for_fma (&ops); @@ -7200,7 +7201,8 @@ reassociate_bb (basic_block bb) to make sure the ones that get the double binary op are chosen wisely. */ int len = ops.length (); - if (len >= 3 + if (!reassoc_insert_powi_p + && len >= 3 && (!has_fma /* width > 1 means ranking ops results in better parallelism. Check current value to avoid diff --git a/gcc/tree-ssa-sccvn.cc b/gcc/tree-ssa-sccvn.cc index 45fb79c..a3117da 100644 --- a/gcc/tree-ssa-sccvn.cc +++ b/gcc/tree-ssa-sccvn.cc @@ -1219,7 +1219,7 @@ ao_ref_init_from_vn_reference (ao_ref *ref, offset = 0; } else - offset += pop->off * BITS_PER_UNIT; + offset += poly_offset_int (pop->off) * BITS_PER_UNIT; op0_p = NULL; break; } @@ -1270,7 +1270,7 @@ ao_ref_init_from_vn_reference (ao_ref *ref, if (maybe_eq (op->off, -1)) max_size = -1; else - offset += op->off * BITS_PER_UNIT; + offset += poly_offset_int (op->off) * BITS_PER_UNIT; break; case REALPART_EXPR: diff --git a/gcc/tree-switch-conversion.cc b/gcc/tree-switch-conversion.cc index d088287..04b357f 100644 --- a/gcc/tree-switch-conversion.cc +++ b/gcc/tree-switch-conversion.cc @@ -55,6 +55,7 @@ Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA #include "hwint.h" #include "internal-fn.h" #include "diagnostic-core.h" +#include "output.h" /* ??? For lang_hooks.types.type_for_mode, but is there a word_mode type in the GIMPLE type system that is language-independent? */ @@ -1033,6 +1034,16 @@ switch_conversion::build_one_array (int num, tree arr_index_type, /* The decl is mergable since we don't take the address ever and just reading from it. */ DECL_MERGEABLE (decl) = 1; + + /* Increase the alignments as needed. */ + if (tree_to_uhwi (DECL_SIZE (decl)) > DECL_ALIGN (decl)) + { + unsigned HOST_WIDE_INT s = tree_to_uhwi (DECL_SIZE (decl)); + /* Only support up to the max supported for merging. */ + if (s <= MAX_ALIGN_MERGABLE) + SET_DECL_ALIGN (decl, HOST_WIDE_INT_1U << ceil_log2 (s)); + } + if (offloading_function_p (cfun->decl)) DECL_ATTRIBUTES (decl) = tree_cons (get_identifier ("omp declare target"), NULL_TREE, diff --git a/gcc/tree-vect-data-refs.cc b/gcc/tree-vect-data-refs.cc index 3bf2852..da700cd 100644 --- a/gcc/tree-vect-data-refs.cc +++ b/gcc/tree-vect-data-refs.cc @@ -2918,12 +2918,14 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo) 2) there is at least one unsupported misaligned data ref with an unknown misalignment, and 3) all misaligned data refs with a known misalignment are supported, and - 4) the number of runtime alignment checks is within reason. */ + 4) the number of runtime alignment checks is within reason. + 5) the vectorization factor is a constant. */ do_versioning = (optimize_loop_nest_for_speed_p (loop) && !loop->inner /* FORNOW */ - && loop_cost_model (loop) > VECT_COST_MODEL_CHEAP); + && loop_cost_model (loop) > VECT_COST_MODEL_CHEAP) + && LOOP_VINFO_VECT_FACTOR (loop_vinfo).is_constant (); if (do_versioning) { @@ -2964,17 +2966,6 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo) break; } - /* At present we don't support versioning for alignment - with variable VF, since there's no guarantee that the - VF is a power of two. We could relax this if we added - a way of enforcing a power-of-two size. */ - unsigned HOST_WIDE_INT size; - if (!GET_MODE_SIZE (TYPE_MODE (vectype)).is_constant (&size)) - { - do_versioning = false; - break; - } - /* Forcing alignment in the first iteration is no good if we don't keep it across iterations. For now, just disable versioning in this case. @@ -2993,7 +2984,8 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo) Construct the mask needed for this test. For example, GET_MODE_SIZE for the vector mode V4SI is 16 bytes so the mask must be 15 = 0xf. */ - int mask = size - 1; + gcc_assert (DR_TARGET_ALIGNMENT (dr_info).is_constant ()); + int mask = DR_TARGET_ALIGNMENT (dr_info).to_constant () - 1; /* FORNOW: use the same mask to test all potentially unaligned references in the loop. */ @@ -4542,7 +4534,6 @@ vect_describe_gather_scatter_call (stmt_vec_info stmt_info, (call, internal_fn_alias_ptr_index (info->ifn)); info->offset = gimple_call_arg (call, internal_fn_offset_index (info->ifn)); - info->offset_dt = vect_unknown_def_type; info->offset_vectype = NULL_TREE; info->scale = TREE_INT_CST_LOW (gimple_call_arg (call, internal_fn_scale_index (info->ifn))); @@ -4872,7 +4863,6 @@ vect_check_gather_scatter (stmt_vec_info stmt_info, loop_vec_info loop_vinfo, get_object_alignment (DR_REF (dr))); info->offset = off; - info->offset_dt = vect_unknown_def_type; info->offset_vectype = offset_vectype; info->scale = scale; info->element_type = TREE_TYPE (vectype); diff --git a/gcc/tree-vect-loop-manip.cc b/gcc/tree-vect-loop-manip.cc index 2d01a4b..6c1b26a 100644 --- a/gcc/tree-vect-loop-manip.cc +++ b/gcc/tree-vect-loop-manip.cc @@ -2857,7 +2857,7 @@ vect_gen_vector_loop_niters (loop_vec_info loop_vinfo, tree niters, we set range information to make niters analyzer's life easier. Note the number of latch iteration value can be TYPE_MAX_VALUE so we have to represent the vector niter TYPE_MAX_VALUE + 1 / vf. */ - if (stmts != NULL && const_vf > 0) + if (stmts != NULL && const_vf > 0 && !LOOP_VINFO_EPILOGUE_P (loop_vinfo)) { if (niters_no_overflow && LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo)) @@ -3295,7 +3295,7 @@ vect_do_peeling (loop_vec_info loop_vinfo, tree niters, tree nitersm1, bool skip_vector = (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo) ? maybe_lt (LOOP_VINFO_INT_NITERS (loop_vinfo), bound_prolog + bound_epilog) - : (!LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo) + : (!LOOP_VINFO_USE_VERSIONING_WITHOUT_PEELING (loop_vinfo) || vect_epilogues)); /* Epilog loop must be executed if the number of iterations for epilog diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc index 80b5a0a..460de57 100644 --- a/gcc/tree-vect-loop.cc +++ b/gcc/tree-vect-loop.cc @@ -11292,11 +11292,9 @@ update_epilogue_loop_vinfo (class loop *epilogue, tree advance) updated offset we set using ADVANCE. Instead we have to make sure the reference in the data references point to the corresponding copy of the original in the epilogue. Make sure to update both - gather/scatters recognized by dataref analysis and also other - refs that get_load_store_type classified as VMAT_GATHER_SCATTER. */ + gather/scatters recognized by dataref analysis. */ auto vstmt_vinfo = vect_stmt_to_vectorize (stmt_vinfo); - if (STMT_VINFO_MEMORY_ACCESS_TYPE (vstmt_vinfo) == VMAT_GATHER_SCATTER - || STMT_VINFO_STRIDED_P (vstmt_vinfo) + if (STMT_VINFO_STRIDED_P (vstmt_vinfo) || STMT_VINFO_GATHER_SCATTER_P (vstmt_vinfo)) { /* ??? As we copy epilogues from the main loop incremental @@ -11318,9 +11316,6 @@ update_epilogue_loop_vinfo (class loop *epilogue, tree advance) /* Remember the advancement made. */ LOOP_VINFO_DRS_ADVANCED_BY (epilogue_vinfo) = advance; - - epilogue_vinfo->shared->datarefs_copy.release (); - epilogue_vinfo->shared->save_datarefs (); } /* When vectorizing early break statements instructions that happen before @@ -11426,7 +11421,8 @@ vect_transform_loop (loop_vec_info loop_vinfo, gimple *loop_vectorized_call) DUMP_VECT_SCOPE ("vec_transform_loop"); - loop_vinfo->shared->check_datarefs (); + if (! LOOP_VINFO_EPILOGUE_P (loop_vinfo)) + loop_vinfo->shared->check_datarefs (); /* Use the more conservative vectorization threshold. If the number of iterations is constant assume the cost check has been performed diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc index cb27d16..ca14a2d 100644 --- a/gcc/tree-vect-slp.cc +++ b/gcc/tree-vect-slp.cc @@ -118,20 +118,19 @@ _slp_tree::_slp_tree () SLP_TREE_CHILDREN (this) = vNULL; SLP_TREE_LOAD_PERMUTATION (this) = vNULL; SLP_TREE_LANE_PERMUTATION (this) = vNULL; - SLP_TREE_SIMD_CLONE_INFO (this) = vNULL; SLP_TREE_DEF_TYPE (this) = vect_uninitialized_def; SLP_TREE_CODE (this) = ERROR_MARK; this->ldst_lanes = false; this->avoid_stlf_fail = false; SLP_TREE_VECTYPE (this) = NULL_TREE; SLP_TREE_REPRESENTATIVE (this) = NULL; - SLP_TREE_MEMORY_ACCESS_TYPE (this) = VMAT_INVARIANT; + SLP_TREE_MEMORY_ACCESS_TYPE (this) = VMAT_UNINITIALIZED; SLP_TREE_REF_COUNT (this) = 1; this->failed = NULL; this->max_nunits = 1; this->lanes = 0; SLP_TREE_TYPE (this) = undef_vec_info_type; - this->u.undef = NULL; + this->data = NULL; } /* Tear down a SLP node. */ @@ -150,9 +149,10 @@ _slp_tree::~_slp_tree () SLP_TREE_VEC_DEFS (this).release (); SLP_TREE_LOAD_PERMUTATION (this).release (); SLP_TREE_LANE_PERMUTATION (this).release (); - SLP_TREE_SIMD_CLONE_INFO (this).release (); if (this->failed) free (failed); + if (this->data) + delete this->data; } /* Push the single SSA definition in DEF to the vector of vector defs. */ @@ -4950,6 +4950,9 @@ vect_analyze_slp (vec_info *vinfo, unsigned max_tree_size, max_tree_size, &limit, force_single_lane)) { + if (dump_enabled_p ()) + dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, + "SLP discovery of reduction chain failed\n"); /* Dissolve reduction chain group. */ stmt_vec_info vinfo = first_element; stmt_vec_info last = NULL; diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc index 20a4bb2..97222f6 100644 --- a/gcc/tree-vect-stmts.cc +++ b/gcc/tree-vect-stmts.cc @@ -1422,14 +1422,31 @@ check_load_store_for_partial_vectors (loop_vec_info loop_vinfo, tree vectype, int group_size, vect_memory_access_type memory_access_type, - gather_scatter_info *gs_info, - tree scalar_mask, + const gather_scatter_info *gs_info, + slp_tree mask_node, vec<int> *elsvals = nullptr) { /* Invariant loads need no special support. */ if (memory_access_type == VMAT_INVARIANT) return; + /* Figure whether the mask is uniform. scalar_mask is used to + populate the scalar_cond_masked_set. */ + tree scalar_mask = NULL_TREE; + if (mask_node) + for (unsigned i = 0; i < SLP_TREE_LANES (mask_node); ++i) + { + tree def = vect_get_slp_scalar_def (mask_node, i); + if (!def + || (scalar_mask && def != scalar_mask)) + { + scalar_mask = NULL; + break; + } + else + scalar_mask = def; + } + unsigned int nvectors = vect_get_num_copies (loop_vinfo, slp_node, vectype); vec_loop_masks *masks = &LOOP_VINFO_MASKS (loop_vinfo); vec_loop_lens *lens = &LOOP_VINFO_LENS (loop_vinfo); @@ -1515,7 +1532,7 @@ check_load_store_for_partial_vectors (loop_vec_info loop_vinfo, tree vectype, } /* We might load more scalars than we need for permuting SLP loads. - We checked in get_group_load_store_type that the extra elements + We checked in get_load_store_type that the extra elements don't leak into a new vector. */ auto group_memory_nvectors = [](poly_uint64 size, poly_uint64 nunits) { @@ -1676,7 +1693,6 @@ vect_truncate_gather_scatter_offset (stmt_vec_info stmt_info, tree vectype, get_object_alignment (DR_REF (dr))); gs_info->element_type = TREE_TYPE (vectype); gs_info->offset = fold_convert (offset_type, step); - gs_info->offset_dt = vect_constant_def; gs_info->scale = scale; gs_info->memory_type = memory_type; return true; @@ -1703,19 +1719,32 @@ static bool vect_use_strided_gather_scatters_p (stmt_vec_info stmt_info, tree vectype, loop_vec_info loop_vinfo, bool masked_p, gather_scatter_info *gs_info, - vec<int> *elsvals) + vec<int> *elsvals, + unsigned int group_size, + bool single_element_p) { if (!vect_check_gather_scatter (stmt_info, loop_vinfo, gs_info, elsvals) || gs_info->ifn == IFN_LAST) - return vect_truncate_gather_scatter_offset (stmt_info, vectype, loop_vinfo, - masked_p, gs_info, elsvals); + { + if (!vect_truncate_gather_scatter_offset (stmt_info, vectype, loop_vinfo, + masked_p, gs_info, elsvals)) + return false; + } + else + { + tree old_offset_type = TREE_TYPE (gs_info->offset); + tree new_offset_type = TREE_TYPE (gs_info->offset_vectype); - tree old_offset_type = TREE_TYPE (gs_info->offset); - tree new_offset_type = TREE_TYPE (gs_info->offset_vectype); + gcc_assert (TYPE_PRECISION (new_offset_type) + >= TYPE_PRECISION (old_offset_type)); + gs_info->offset = fold_convert (new_offset_type, gs_info->offset); + } - gcc_assert (TYPE_PRECISION (new_offset_type) - >= TYPE_PRECISION (old_offset_type)); - gs_info->offset = fold_convert (new_offset_type, gs_info->offset); + if (!single_element_p + && !targetm.vectorize.prefer_gather_scatter (TYPE_MODE (vectype), + gs_info->scale, + group_size)) + return false; if (dump_enabled_p ()) dump_printf_loc (MSG_NOTE, vect_location, @@ -1903,38 +1932,45 @@ vector_vector_composition_type (tree vtype, poly_uint64 nelts, tree *ptype) return NULL_TREE; } -/* A subroutine of get_load_store_type, with a subset of the same - arguments. Handle the case where STMT_INFO is part of a grouped load - or store. +/* Analyze load or store SLP_NODE of type VLS_TYPE. Return true + if there is a memory access type that the vectorized form can use, + storing it in *MEMORY_ACCESS_TYPE if so. If we decide to use gathers + or scatters, fill in GS_INFO accordingly. In addition + *ALIGNMENT_SUPPORT_SCHEME is filled out and false is returned if + the target does not support the alignment scheme. *MISALIGNMENT + is set according to the alignment of the access (including + DR_MISALIGNMENT_UNKNOWN when it is unknown). - For stores, the statements in the group are all consecutive - and there is no gap at the end. For loads, the statements in the - group might not be consecutive; there can be gaps between statements - as well as at the end. + MASKED_P is true if the statement is conditional on a vectorized mask. + VECTYPE is the vector type that the vectorized statements will use. - If we can use gather/scatter and ELSVALS is nonzero the supported - else values will be stored in the vector ELSVALS points to. -*/ + If ELSVALS is nonzero the supported else values will be stored in the + vector ELSVALS points to. */ static bool -get_group_load_store_type (vec_info *vinfo, stmt_vec_info stmt_info, - tree vectype, slp_tree slp_node, - bool masked_p, vec_load_store_type vls_type, - vect_memory_access_type *memory_access_type, - poly_int64 *poffset, - dr_alignment_support *alignment_support_scheme, - int *misalignment, - gather_scatter_info *gs_info, - internal_fn *lanes_ifn, - vec<int> *elsvals) +get_load_store_type (vec_info *vinfo, stmt_vec_info stmt_info, + tree vectype, slp_tree slp_node, + bool masked_p, vec_load_store_type vls_type, + vect_memory_access_type *memory_access_type, + poly_int64 *poffset, + dr_alignment_support *alignment_support_scheme, + int *misalignment, + gather_scatter_info *gs_info, + internal_fn *lanes_ifn, + vec<int> *elsvals = nullptr) { loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo); + poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype); class loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL; stmt_vec_info first_stmt_info; unsigned int group_size; unsigned HOST_WIDE_INT gap; bool single_element_p; poly_int64 neg_ldst_offset = 0; + + *misalignment = DR_MISALIGNMENT_UNKNOWN; + *poffset = 0; + if (STMT_VINFO_GROUPED_ACCESS (stmt_info)) { first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info); @@ -1951,7 +1987,6 @@ get_group_load_store_type (vec_info *vinfo, stmt_vec_info stmt_info, single_element_p = true; } dr_vec_info *first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info); - poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype); /* True if the vectorized statements would access beyond the last statement in the group. */ @@ -1977,7 +2012,49 @@ get_group_load_store_type (vec_info *vinfo, stmt_vec_info stmt_info, separated by the stride, until we have a complete vector. Fall back to scalar accesses if that isn't possible. */ *memory_access_type = VMAT_STRIDED_SLP; - else if (!STMT_VINFO_GATHER_SCATTER_P (stmt_info)) + else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info)) + { + *memory_access_type = VMAT_GATHER_SCATTER; + if (!vect_check_gather_scatter (stmt_info, loop_vinfo, gs_info, + elsvals)) + gcc_unreachable (); + slp_tree offset_node = SLP_TREE_CHILDREN (slp_node)[0]; + tree offset_vectype = SLP_TREE_VECTYPE (offset_node); + gs_info->offset_vectype = offset_vectype; + /* When using internal functions, we rely on pattern recognition + to convert the type of the offset to the type that the target + requires, with the result being a call to an internal function. + If that failed for some reason (e.g. because another pattern + took priority), just handle cases in which the offset already + has the right type. */ + if (GATHER_SCATTER_IFN_P (*gs_info) + && !is_gimple_call (stmt_info->stmt) + && !tree_nop_conversion_p (TREE_TYPE (gs_info->offset), + TREE_TYPE (offset_vectype))) + { + if (dump_enabled_p ()) + dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, + "%s offset requires a conversion\n", + vls_type == VLS_LOAD ? "gather" : "scatter"); + return false; + } + else if (GATHER_SCATTER_EMULATED_P (*gs_info)) + { + if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant () + || !TYPE_VECTOR_SUBPARTS (offset_vectype).is_constant () + || VECTOR_BOOLEAN_TYPE_P (offset_vectype) + || !constant_multiple_p (TYPE_VECTOR_SUBPARTS (offset_vectype), + TYPE_VECTOR_SUBPARTS (vectype))) + { + if (dump_enabled_p ()) + dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, + "unsupported vector types for emulated " + "gather.\n"); + return false; + } + } + } + else { int cmp = compare_step_with_zero (vinfo, stmt_info); if (cmp < 0) @@ -2221,64 +2298,12 @@ get_group_load_store_type (vec_info *vinfo, stmt_vec_info stmt_info, if ((*memory_access_type == VMAT_ELEMENTWISE || *memory_access_type == VMAT_STRIDED_SLP) && !STMT_VINFO_GATHER_SCATTER_P (stmt_info) - && single_element_p && SLP_TREE_LANES (slp_node) == 1 && loop_vinfo && vect_use_strided_gather_scatters_p (stmt_info, vectype, loop_vinfo, - masked_p, gs_info, elsvals)) + masked_p, gs_info, elsvals, + group_size, single_element_p)) *memory_access_type = VMAT_GATHER_SCATTER; - else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info)) - { - tree offset; - slp_tree offset_node; - *memory_access_type = VMAT_GATHER_SCATTER; - if (!vect_check_gather_scatter (stmt_info, loop_vinfo, gs_info, - elsvals)) - gcc_unreachable (); - /* When using internal functions, we rely on pattern recognition - to convert the type of the offset to the type that the target - requires, with the result being a call to an internal function. - If that failed for some reason (e.g. because another pattern - took priority), just handle cases in which the offset already - has the right type. */ - else if (GATHER_SCATTER_IFN_P (*gs_info) - && !is_gimple_call (stmt_info->stmt) - && !tree_nop_conversion_p (TREE_TYPE (gs_info->offset), - TREE_TYPE (gs_info->offset_vectype))) - { - if (dump_enabled_p ()) - dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, - "%s offset requires a conversion\n", - vls_type == VLS_LOAD ? "gather" : "scatter"); - return false; - } - else if (!vect_is_simple_use (vinfo, slp_node, 0, &offset, &offset_node, - &gs_info->offset_dt, - &gs_info->offset_vectype)) - { - if (dump_enabled_p ()) - dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, - "%s index use not simple.\n", - vls_type == VLS_LOAD ? "gather" : "scatter"); - return false; - } - else if (GATHER_SCATTER_EMULATED_P (*gs_info)) - { - if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant () - || !TYPE_VECTOR_SUBPARTS (gs_info->offset_vectype).is_constant () - || VECTOR_BOOLEAN_TYPE_P (gs_info->offset_vectype) - || !constant_multiple_p (TYPE_VECTOR_SUBPARTS - (gs_info->offset_vectype), - TYPE_VECTOR_SUBPARTS (vectype))) - { - if (dump_enabled_p ()) - dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, - "unsupported vector types for emulated " - "gather.\n"); - return false; - } - } - } if (*memory_access_type == VMAT_CONTIGUOUS_DOWN || *memory_access_type == VMAT_CONTIGUOUS_REVERSE) @@ -2306,26 +2331,6 @@ get_group_load_store_type (vec_info *vinfo, stmt_vec_info stmt_info, *memory_access_type == VMAT_GATHER_SCATTER ? gs_info : nullptr); } - if (vls_type != VLS_LOAD && first_stmt_info == stmt_info) - { - /* STMT is the leader of the group. Check the operands of all the - stmts of the group. */ - stmt_vec_info next_stmt_info = DR_GROUP_NEXT_ELEMENT (stmt_info); - while (next_stmt_info) - { - tree op = vect_get_store_rhs (next_stmt_info); - enum vect_def_type dt; - if (!vect_is_simple_use (op, vinfo, &dt)) - { - if (dump_enabled_p ()) - dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, - "use not simple.\n"); - return false; - } - next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info); - } - } - if (overrun_p) { gcc_assert (can_overrun_p); @@ -2336,51 +2341,6 @@ get_group_load_store_type (vec_info *vinfo, stmt_vec_info stmt_info, LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) = true; } - return true; -} - -/* Analyze load or store statement STMT_INFO of type VLS_TYPE. Return true - if there is a memory access type that the vectorized form can use, - storing it in *MEMORY_ACCESS_TYPE if so. If we decide to use gathers - or scatters, fill in GS_INFO accordingly. In addition - *ALIGNMENT_SUPPORT_SCHEME is filled out and false is returned if - the target does not support the alignment scheme. *MISALIGNMENT - is set according to the alignment of the access (including - DR_MISALIGNMENT_UNKNOWN when it is unknown). - - SLP says whether we're performing SLP rather than loop vectorization. - MASKED_P is true if the statement is conditional on a vectorized mask. - VECTYPE is the vector type that the vectorized statements will use. - NCOPIES is the number of vector statements that will be needed. - - If ELSVALS is nonzero the supported else values will be stored in the - vector ELSVALS points to. */ - -static bool -get_load_store_type (vec_info *vinfo, stmt_vec_info stmt_info, - tree vectype, slp_tree slp_node, - bool masked_p, vec_load_store_type vls_type, - unsigned int, - vect_memory_access_type *memory_access_type, - poly_int64 *poffset, - dr_alignment_support *alignment_support_scheme, - int *misalignment, - gather_scatter_info *gs_info, - internal_fn *lanes_ifn, - vec<int> *elsvals = nullptr) -{ - loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo); - poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype); - *misalignment = DR_MISALIGNMENT_UNKNOWN; - *poffset = 0; - if (!get_group_load_store_type (vinfo, stmt_info, vectype, slp_node, - masked_p, - vls_type, memory_access_type, poffset, - alignment_support_scheme, - misalignment, gs_info, lanes_ifn, - elsvals)) - return false; - if ((*memory_access_type == VMAT_ELEMENTWISE || *memory_access_type == VMAT_STRIDED_SLP) && !nunits.is_constant ()) @@ -2392,7 +2352,6 @@ get_load_store_type (vec_info *vinfo, stmt_vec_info stmt_info, return false; } - /* Checks if all scalar iterations are known to be inbounds. */ bool inbounds = DR_SCALAR_KNOWN_BOUNDS (STMT_VINFO_DR_INFO (stmt_info)); @@ -2526,9 +2485,6 @@ get_load_store_type (vec_info *vinfo, stmt_vec_info stmt_info, /* FIXME: At the moment the cost model seems to underestimate the cost of using elementwise accesses. This check preserves the traditional behavior until that can be fixed. */ - stmt_vec_info first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info); - if (!first_stmt_info) - first_stmt_info = stmt_info; if (*memory_access_type == VMAT_ELEMENTWISE && !STMT_VINFO_STRIDED_P (first_stmt_info) && !(stmt_info == DR_GROUP_FIRST_ELEMENT (stmt_info) @@ -2545,21 +2501,21 @@ get_load_store_type (vec_info *vinfo, stmt_vec_info stmt_info, /* Return true if boolean argument at MASK_INDEX is suitable for vectorizing conditional operation STMT_INFO. When returning true, store the mask - in *MASK, the type of its definition in *MASK_DT_OUT, the type of the - vectorized mask in *MASK_VECTYPE_OUT and the SLP node corresponding - to the mask in *MASK_NODE if MASK_NODE is not NULL. */ + in *MASK_NODE, the type of its definition in *MASK_DT_OUT and the type of + the vectorized mask in *MASK_VECTYPE_OUT. */ static bool vect_check_scalar_mask (vec_info *vinfo, slp_tree slp_node, unsigned mask_index, - tree *mask, slp_tree *mask_node, + slp_tree *mask_node, vect_def_type *mask_dt_out, tree *mask_vectype_out) { enum vect_def_type mask_dt; tree mask_vectype; slp_tree mask_node_1; + tree mask_; if (!vect_is_simple_use (vinfo, slp_node, mask_index, - mask, &mask_node_1, &mask_dt, &mask_vectype)) + &mask_, &mask_node_1, &mask_dt, &mask_vectype)) { if (dump_enabled_p ()) dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, @@ -2568,7 +2524,7 @@ vect_check_scalar_mask (vec_info *vinfo, } if ((mask_dt == vect_constant_def || mask_dt == vect_external_def) - && !VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (*mask))) + && !VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (mask_))) { if (dump_enabled_p ()) dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, @@ -2576,17 +2532,6 @@ vect_check_scalar_mask (vec_info *vinfo, return false; } - /* If the caller is not prepared for adjusting an external/constant - SLP mask vector type fail. */ - if (!mask_node - && SLP_TREE_DEF_TYPE (mask_node_1) != vect_internal_def) - { - if (dump_enabled_p ()) - dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, - "SLP mask argument is not vectorized.\n"); - return false; - } - tree vectype = SLP_TREE_VECTYPE (slp_node); if (!mask_vectype) mask_vectype = get_mask_type_for_scalar_type (vinfo, TREE_TYPE (vectype), @@ -2614,11 +2559,11 @@ vect_check_scalar_mask (vec_info *vinfo, *mask_dt_out = mask_dt; *mask_vectype_out = mask_vectype; - if (mask_node) - *mask_node = mask_node_1; + *mask_node = mask_node_1; return true; } + /* Return true if stored value is suitable for vectorizing store statement STMT_INFO. When returning true, store the scalar stored in *RHS and *RHS_NODE, the type of the definition in *RHS_DT_OUT, @@ -2627,7 +2572,7 @@ vect_check_scalar_mask (vec_info *vinfo, static bool vect_check_store_rhs (vec_info *vinfo, stmt_vec_info stmt_info, - slp_tree slp_node, tree *rhs, slp_tree *rhs_node, + slp_tree slp_node, slp_tree *rhs_node, vect_def_type *rhs_dt_out, tree *rhs_vectype_out, vec_load_store_type *vls_type_out) { @@ -2643,8 +2588,9 @@ vect_check_store_rhs (vec_info *vinfo, stmt_vec_info stmt_info, enum vect_def_type rhs_dt; tree rhs_vectype; + tree rhs; if (!vect_is_simple_use (vinfo, slp_node, op_no, - rhs, rhs_node, &rhs_dt, &rhs_vectype)) + &rhs, rhs_node, &rhs_dt, &rhs_vectype)) { if (dump_enabled_p ()) dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, @@ -2655,7 +2601,7 @@ vect_check_store_rhs (vec_info *vinfo, stmt_vec_info stmt_info, /* In the case this is a store from a constant make sure native_encode_expr can handle it. */ if (rhs_dt == vect_constant_def - && CONSTANT_CLASS_P (*rhs) && native_encode_expr (*rhs, NULL, 64) == 0) + && CONSTANT_CLASS_P (rhs) && native_encode_expr (rhs, NULL, 64) == 0) { if (dump_enabled_p ()) dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, @@ -2771,7 +2717,7 @@ static gimple * vect_build_one_gather_load_call (vec_info *vinfo, stmt_vec_info stmt_info, tree vectype, gimple_stmt_iterator *gsi, - gather_scatter_info *gs_info, + const gather_scatter_info *gs_info, tree ptr, tree offset, tree mask) { tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info->decl)); @@ -2869,7 +2815,7 @@ vect_build_one_gather_load_call (vec_info *vinfo, stmt_vec_info stmt_info, static gimple * vect_build_one_scatter_store_call (vec_info *vinfo, stmt_vec_info stmt_info, gimple_stmt_iterator *gsi, - gather_scatter_info *gs_info, + const gather_scatter_info *gs_info, tree ptr, tree offset, tree oprnd, tree mask) { tree rettype = TREE_TYPE (TREE_TYPE (gs_info->decl)); @@ -2950,8 +2896,8 @@ vect_build_one_scatter_store_call (vec_info *vinfo, stmt_vec_info stmt_info, containing loop. */ static void -vect_get_gather_scatter_ops (class loop *loop, - slp_tree slp_node, gather_scatter_info *gs_info, +vect_get_gather_scatter_ops (class loop *loop, slp_tree slp_node, + const gather_scatter_info *gs_info, tree *dataref_ptr, vec<tree> *vec_offset) { gimple_seq stmts = NULL; @@ -2979,7 +2925,7 @@ static void vect_get_strided_load_store_ops (stmt_vec_info stmt_info, tree vectype, loop_vec_info loop_vinfo, gimple_stmt_iterator *gsi, - gather_scatter_info *gs_info, + const gather_scatter_info *gs_info, tree *dataref_bump, tree *vec_offset, vec_loop_lens *loop_lens) { @@ -3312,14 +3258,20 @@ vectorizable_call (vec_info *vinfo, int mask_opno = -1; if (internal_fn_p (cfn)) - mask_opno = internal_fn_mask_index (as_internal_fn (cfn)); + { + /* We can only handle direct internal masked calls here, + vectorizable_simd_clone_call is for the rest. */ + if (cfn == CFN_MASK_CALL) + return false; + mask_opno = internal_fn_mask_index (as_internal_fn (cfn)); + } for (i = 0; i < nargs; i++) { if ((int) i == mask_opno) { if (!vect_check_scalar_mask (vinfo, slp_node, mask_opno, - &op, &slp_op[i], &dt[i], &vectypes[i])) + &slp_op[i], &dt[i], &vectypes[i])) return false; continue; } @@ -3884,9 +3836,9 @@ vectorizable_simd_clone_call (vec_info *vinfo, stmt_vec_info stmt_info, if (nargs == 0) return false; - vec<tree>& simd_clone_info = SLP_TREE_SIMD_CLONE_INFO (slp_node); - if (cost_vec) - simd_clone_info.truncate (0); + vect_simd_clone_data _data; + vect_simd_clone_data &data = slp_node->get_data (_data); + vec<tree>& simd_clone_info = data.simd_clone_info; arginfo.reserve (nargs, true); auto_vec<slp_tree> slp_op; slp_op.safe_grow_cleared (nargs); @@ -4283,6 +4235,7 @@ vectorizable_simd_clone_call (vec_info *vinfo, stmt_vec_info stmt_info, } SLP_TREE_TYPE (slp_node) = call_simd_clone_vec_info_type; + slp_node->data = new vect_simd_clone_data (std::move (_data)); DUMP_VECT_SCOPE ("vectorizable_simd_clone_call"); /* vect_model_simple_cost (vinfo, 1, slp_node, cost_vec); */ return true; @@ -7082,7 +7035,8 @@ scan_store_can_perm_p (tree vectype, tree init, static bool check_scan_store (vec_info *vinfo, stmt_vec_info stmt_info, tree vectype, - enum vect_def_type rhs_dt, slp_tree slp_node, tree mask, + enum vect_def_type rhs_dt, slp_tree slp_node, + slp_tree mask_node, vect_memory_access_type memory_access_type) { loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo); @@ -7090,13 +7044,14 @@ check_scan_store (vec_info *vinfo, stmt_vec_info stmt_info, tree vectype, tree ref_type; gcc_assert (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) > 1); - if ((slp_node && SLP_TREE_LANES (slp_node) > 1) - || mask + if (SLP_TREE_LANES (slp_node) > 1 + || mask_node || memory_access_type != VMAT_CONTIGUOUS || TREE_CODE (DR_BASE_ADDRESS (dr_info->dr)) != ADDR_EXPR || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0)) || loop_vinfo == NULL || LOOP_VINFO_FULLY_MASKED_P (loop_vinfo) + || LOOP_VINFO_EPILOGUE_P (loop_vinfo) || STMT_VINFO_GROUPED_ACCESS (stmt_info) || !integer_zerop (get_dr_vinfo_offset (vinfo, dr_info)) || !integer_zerop (DR_INIT (dr_info->dr)) @@ -7787,7 +7742,7 @@ vectorizable_store (vec_info *vinfo, /* Is vectorizable store? */ - tree mask = NULL_TREE, mask_vectype = NULL_TREE; + tree mask_vectype = NULL_TREE; slp_tree mask_node = NULL; if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt)) { @@ -7820,7 +7775,7 @@ vectorizable_store (vec_info *vinfo, (call, mask_index, STMT_VINFO_GATHER_SCATTER_P (stmt_info)); if (mask_index >= 0 && !vect_check_scalar_mask (vinfo, slp_node, mask_index, - &mask, &mask_node, &mask_dt, + &mask_node, &mask_dt, &mask_vectype)) return false; } @@ -7851,10 +7806,9 @@ vectorizable_store (vec_info *vinfo, return false; } - tree op; slp_tree op_node; if (!vect_check_store_rhs (vinfo, stmt_info, slp_node, - &op, &op_node, &rhs_dt, &rhs_vectype, &vls_type)) + &op_node, &rhs_dt, &rhs_vectype, &vls_type)) return false; elem_type = TREE_TYPE (vectype); @@ -7868,8 +7822,8 @@ vectorizable_store (vec_info *vinfo, int misalignment; poly_int64 poffset; internal_fn lanes_ifn; - if (!get_load_store_type (vinfo, stmt_info, vectype, slp_node, mask, vls_type, - 1, &memory_access_type, &poffset, + if (!get_load_store_type (vinfo, stmt_info, vectype, slp_node, mask_node, + vls_type, &memory_access_type, &poffset, &alignment_support_scheme, &misalignment, &gs_info, &lanes_ifn)) return false; @@ -7883,7 +7837,7 @@ vectorizable_store (vec_info *vinfo, return false; } - if (mask) + if (mask_node) { if (memory_access_type == VMAT_CONTIGUOUS) { @@ -7937,15 +7891,14 @@ vectorizable_store (vec_info *vinfo, if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) > 1 && cost_vec) { - if (!check_scan_store (vinfo, stmt_info, vectype, rhs_dt, slp_node, mask, - memory_access_type)) + if (!check_scan_store (vinfo, stmt_info, vectype, rhs_dt, slp_node, + mask_node, memory_access_type)) return false; } bool costing_p = cost_vec; if (costing_p) /* transformation not required. */ { - STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type; SLP_TREE_MEMORY_ACCESS_TYPE (slp_node) = memory_access_type; if (loop_vinfo @@ -7953,10 +7906,10 @@ vectorizable_store (vec_info *vinfo, check_load_store_for_partial_vectors (loop_vinfo, vectype, slp_node, vls_type, group_size, memory_access_type, &gs_info, - mask); + mask_node); if (!vect_maybe_update_slp_op_vectype (op_node, vectype) - || (mask + || (mask_node && !vect_maybe_update_slp_op_vectype (mask_node, mask_vectype))) { @@ -7976,7 +7929,7 @@ vectorizable_store (vec_info *vinfo, SLP_TREE_TYPE (slp_node) = store_vec_info_type; } - gcc_assert (memory_access_type == SLP_TREE_MEMORY_ACCESS_TYPE (stmt_info)); + gcc_assert (memory_access_type == SLP_TREE_MEMORY_ACCESS_TYPE (slp_node)); /* Transform. */ @@ -8020,7 +7973,6 @@ vectorizable_store (vec_info *vinfo, gcc_assert (!STMT_VINFO_GROUPED_ACCESS (first_stmt_info) || (DR_GROUP_FIRST_ELEMENT (first_stmt_info) == first_stmt_info)); first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info); - op = vect_get_store_rhs (first_stmt_info); ref_type = get_group_alias_ptr_type (first_stmt_info); @@ -8206,7 +8158,7 @@ vectorizable_store (vec_info *vinfo, unsigned int n_adjacent_stores = 0; running_off = offvar; if (!costing_p) - vect_get_vec_defs (vinfo, slp_node, op, &vec_oprnds); + vect_get_slp_defs (op_node, &vec_oprnds); unsigned int group_el = 0; unsigned HOST_WIDE_INT elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype))); @@ -8281,7 +8233,7 @@ vectorizable_store (vec_info *vinfo, else inside_cost += record_stmt_cost (cost_vec, n_adjacent_stores, - scalar_store, stmt_info, 0, vect_body); + scalar_store, slp_node, 0, vect_body); /* Only need vector extracting when there are more than one stores. */ if (nstores > 1) @@ -8321,7 +8273,7 @@ vectorizable_store (vec_info *vinfo, realignment. vect_supportable_dr_alignment always returns either dr_aligned or dr_unaligned_supported for masked operations. */ gcc_assert ((memory_access_type != VMAT_LOAD_STORE_LANES - && !mask + && !mask_node && !loop_masks) || alignment_support_scheme == dr_aligned || alignment_support_scheme == dr_unaligned_supported); @@ -8356,7 +8308,7 @@ vectorizable_store (vec_info *vinfo, memory_access_type, loop_lens); } - if (mask && !costing_p) + if (mask_node && !costing_p) LOOP_VINFO_HAS_MASK_STORE (loop_vinfo) = true; /* In case the vectorization factor (VF) is bigger than the number @@ -8396,7 +8348,7 @@ vectorizable_store (vec_info *vinfo, { if (!costing_p) { - if (mask) + if (mask_node) { vect_get_slp_defs (mask_node, &vec_masks); vec_mask = vec_masks[0]; @@ -8410,7 +8362,7 @@ vectorizable_store (vec_info *vinfo, else if (!costing_p) { gcc_assert (!LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo)); - if (mask) + if (mask_node) vec_mask = vec_masks[j]; dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi, stmt_info, bump); @@ -8545,7 +8497,7 @@ vectorizable_store (vec_info *vinfo, DR_CHAIN is of size 1. */ gcc_assert (group_size == 1); vect_get_slp_defs (op_node, gvec_oprnds[0]); - if (mask) + if (mask_node) vect_get_slp_defs (mask_node, &vec_masks); if (STMT_VINFO_GATHER_SCATTER_P (stmt_info)) @@ -8571,7 +8523,7 @@ vectorizable_store (vec_info *vinfo, if (!costing_p) { vec_oprnd = (*gvec_oprnds[0])[j]; - if (mask) + if (mask_node) vec_mask = vec_masks[j]; /* We should have catched mismatched types earlier. */ gcc_assert (useless_type_conversion_p (vectype, @@ -8871,10 +8823,13 @@ vectorizable_store (vec_info *vinfo, if (!costing_p) { /* Get vectorized arguments for SLP_NODE. */ - vect_get_vec_defs (vinfo, slp_node, op, &vec_oprnds, mask, &vec_masks); + vect_get_slp_defs (op_node, &vec_oprnds); vec_oprnd = vec_oprnds[0]; - if (mask) - vec_mask = vec_masks[0]; + if (mask_node) + { + vect_get_slp_defs (mask_node, &vec_masks); + vec_mask = vec_masks[0]; + } } /* We should have catched mismatched types earlier. */ @@ -8916,10 +8871,7 @@ vectorizable_store (vec_info *vinfo, else { tree perm_mask = perm_mask_for_reverse (vectype); - tree perm_dest - = vect_create_destination_var (vect_get_store_rhs (stmt_info), - vectype); - tree new_temp = make_ssa_name (perm_dest); + tree new_temp = make_ssa_name (vectype); /* Generate the permute statement. */ gimple *perm_stmt @@ -9305,12 +9257,12 @@ vectorizable_load (vec_info *vinfo, if (!STMT_VINFO_DATA_REF (stmt_info)) return false; - tree mask = NULL_TREE, mask_vectype = NULL_TREE; + tree mask_vectype = NULL_TREE; tree els = NULL_TREE; tree els_vectype = NULL_TREE; int mask_index = -1; int els_index = -1; - slp_tree slp_op = NULL; + slp_tree mask_node = NULL; slp_tree els_op = NULL; if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt)) { @@ -9349,7 +9301,7 @@ vectorizable_load (vec_info *vinfo, (call, mask_index, STMT_VINFO_GATHER_SCATTER_P (stmt_info)); if (mask_index >= 0 && !vect_check_scalar_mask (vinfo, slp_node, mask_index, - &mask, &slp_op, &mask_dt, &mask_vectype)) + &mask_node, &mask_dt, &mask_vectype)) return false; els_index = internal_fn_else_index (ifn); @@ -9432,8 +9384,8 @@ vectorizable_load (vec_info *vinfo, auto_vec<int> elsvals; int maskload_elsval = 0; bool need_zeroing = false; - if (!get_load_store_type (vinfo, stmt_info, vectype, slp_node, mask, VLS_LOAD, - 1, &memory_access_type, &poffset, + if (!get_load_store_type (vinfo, stmt_info, vectype, slp_node, mask_node, + VLS_LOAD, &memory_access_type, &poffset, &alignment_support_scheme, &misalignment, &gs_info, &lanes_ifn, &elsvals)) return false; @@ -9447,7 +9399,7 @@ vectorizable_load (vec_info *vinfo, = TYPE_PRECISION (scalar_type) < GET_MODE_PRECISION (GET_MODE_INNER (mode)); /* ??? The following checks should really be part of - get_group_load_store_type. */ + get_load_store_type. */ if (SLP_TREE_LOAD_PERMUTATION (slp_node).exists () && !((memory_access_type == VMAT_ELEMENTWISE || memory_access_type == VMAT_GATHER_SCATTER) @@ -9500,7 +9452,7 @@ vectorizable_load (vec_info *vinfo, return false; } - if (mask) + if (mask_node) { if (memory_access_type == VMAT_CONTIGUOUS) { @@ -9541,8 +9493,8 @@ vectorizable_load (vec_info *vinfo, if (costing_p) /* transformation not required. */ { - if (mask - && !vect_maybe_update_slp_op_vectype (slp_op, + if (mask_node + && !vect_maybe_update_slp_op_vectype (mask_node, mask_vectype)) { if (dump_enabled_p ()) @@ -9558,7 +9510,7 @@ vectorizable_load (vec_info *vinfo, check_load_store_for_partial_vectors (loop_vinfo, vectype, slp_node, VLS_LOAD, group_size, memory_access_type, &gs_info, - mask, &elsvals); + mask_node, &elsvals); if (dump_enabled_p () && memory_access_type != VMAT_ELEMENTWISE @@ -9582,7 +9534,7 @@ vectorizable_load (vec_info *vinfo, check_load_store_for_partial_vectors (loop_vinfo, vectype, slp_node, VLS_LOAD, group_size, memory_access_type, &gs_info, - mask, &elsvals); + mask_node, &elsvals); } /* If the type needs padding we must zero inactive elements. @@ -9617,7 +9569,7 @@ vectorizable_load (vec_info *vinfo, if (memory_access_type == VMAT_INVARIANT) { - gcc_assert (!grouped_load && !mask && !bb_vinfo); + gcc_assert (!grouped_load && !mask_node && !bb_vinfo); /* If we have versioned for aliasing or the loop doesn't have any data dependencies that would preclude this, then we are sure this is a loop invariant load and @@ -10092,7 +10044,7 @@ vectorizable_load (vec_info *vinfo, dr_aligned or dr_unaligned_supported for (non-length) masked operations. */ gcc_assert ((memory_access_type != VMAT_LOAD_STORE_LANES - && !mask + && !mask_node && !loop_masks) || memory_access_type == VMAT_GATHER_SCATTER || alignment_support_scheme == dr_aligned @@ -10211,33 +10163,10 @@ vectorizable_load (vec_info *vinfo, tree bump; tree vec_offset = NULL_TREE; - if (STMT_VINFO_GATHER_SCATTER_P (stmt_info)) - { - aggr_type = NULL_TREE; - bump = NULL_TREE; - } - else if (memory_access_type == VMAT_GATHER_SCATTER) - { - aggr_type = elem_type; - if (!costing_p) - vect_get_strided_load_store_ops (stmt_info, vectype, loop_vinfo, - gsi, &gs_info, - &bump, &vec_offset, loop_lens); - } - else - { - if (memory_access_type == VMAT_LOAD_STORE_LANES) - aggr_type = build_array_type_nelts (elem_type, group_size * nunits); - else - aggr_type = vectype; - if (!costing_p) - bump = vect_get_data_ptr_increment (vinfo, gsi, dr_info, aggr_type, - memory_access_type, loop_lens); - } auto_vec<tree> vec_offsets; auto_vec<tree> vec_masks; - if (mask && !costing_p) + if (mask_node && !costing_p) vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[mask_index], &vec_masks); @@ -10248,6 +10177,11 @@ vectorizable_load (vec_info *vinfo, gcc_assert (alignment_support_scheme == dr_aligned || alignment_support_scheme == dr_unaligned_supported); + aggr_type = build_array_type_nelts (elem_type, group_size * nunits); + if (!costing_p) + bump = vect_get_data_ptr_increment (vinfo, gsi, dr_info, aggr_type, + memory_access_type, loop_lens); + unsigned int inside_cost = 0, prologue_cost = 0; /* For costing some adjacent vector loads, we'd like to cost with the total number of them once instead of cost each one by one. */ @@ -10300,7 +10234,7 @@ vectorizable_load (vec_info *vinfo, dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi, stmt_info, bump); } - if (mask) + if (mask_node) vec_mask = vec_masks[j]; tree vec_array = create_vector_array (vectype, group_size); @@ -10409,20 +10343,31 @@ vectorizable_load (vec_info *vinfo, { gcc_assert (!grouped_load && !slp_perm); - unsigned int inside_cost = 0, prologue_cost = 0; - /* 1. Create the vector or array pointer update chain. */ - if (!costing_p) + if (STMT_VINFO_GATHER_SCATTER_P (stmt_info)) { - if (STMT_VINFO_GATHER_SCATTER_P (stmt_info)) + aggr_type = NULL_TREE; + bump = NULL_TREE; + if (!costing_p) vect_get_gather_scatter_ops (loop, slp_node, &gs_info, &dataref_ptr, &vec_offsets); - else - dataref_ptr - = vect_create_data_ref_ptr (vinfo, first_stmt_info, aggr_type, - at_loop, offset, &dummy, gsi, - &ptr_incr, false, bump); } + else + { + aggr_type = elem_type; + if (!costing_p) + { + vect_get_strided_load_store_ops (stmt_info, vectype, loop_vinfo, + gsi, &gs_info, + &bump, &vec_offset, loop_lens); + dataref_ptr + = vect_create_data_ref_ptr (vinfo, first_stmt_info, aggr_type, + at_loop, offset, &dummy, gsi, + &ptr_incr, false, bump); + } + } + + unsigned int inside_cost = 0, prologue_cost = 0; gimple *new_stmt = NULL; for (i = 0; i < vec_num; i++) @@ -10432,7 +10377,7 @@ vectorizable_load (vec_info *vinfo, tree bias = NULL_TREE; if (!costing_p) { - if (mask) + if (mask_node) vec_mask = vec_masks[i]; if (loop_masks) final_mask = vect_get_loop_mask (loop_vinfo, gsi, loop_masks, @@ -10744,6 +10689,11 @@ vectorizable_load (vec_info *vinfo, return true; } + aggr_type = vectype; + if (!costing_p) + bump = vect_get_data_ptr_increment (vinfo, gsi, dr_info, aggr_type, + memory_access_type, loop_lens); + poly_uint64 group_elt = 0; unsigned int inside_cost = 0, prologue_cost = 0; /* For costing some adjacent vector loads, we'd like to cost with @@ -10823,7 +10773,7 @@ vectorizable_load (vec_info *vinfo, if (!costing_p) { - if (mask) + if (mask_node) vec_mask = vec_masks[i]; if (loop_masks) final_mask = vect_get_loop_mask (loop_vinfo, gsi, loop_masks, diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h index 4a1e4fc..0a75ee1 100644 --- a/gcc/tree-vectorizer.h +++ b/gcc/tree-vectorizer.h @@ -173,6 +173,8 @@ struct vect_scalar_ops_slice_hash : typed_noop_remove<vect_scalar_ops_slice> /* Describes how we're going to vectorize an individual load or store, or a group of loads or stores. */ enum vect_memory_access_type { + VMAT_UNINITIALIZED, + /* An access to an invariant address. This is used only for loads. */ VMAT_INVARIANT, @@ -239,6 +241,23 @@ typedef auto_vec<std::pair<unsigned, unsigned>, 16> auto_lane_permutation_t; typedef vec<unsigned> load_permutation_t; typedef auto_vec<unsigned, 16> auto_load_permutation_t; +struct vect_data { + virtual ~vect_data () = default; +}; + +/* Analysis data from vectorizable_simd_clone_call for + call_simd_clone_vec_info_type. */ +struct vect_simd_clone_data : vect_data { + virtual ~vect_simd_clone_data () = default; + vect_simd_clone_data () = default; + vect_simd_clone_data (vect_simd_clone_data &&other) = default; + + /* Selected SIMD clone's function info. First vector element + is SIMD clone's function decl, followed by a pair of trees (base + step) + for linear arguments (pair of NULLs for other arguments). */ + auto_vec<tree> simd_clone_info; +}; + /* A computation tree of an SLP instance. Each node corresponds to a group of stmts to be packed in a SIMD stmt. */ struct _slp_tree { @@ -267,11 +286,6 @@ struct _slp_tree { denotes the number of output lanes. */ lane_permutation_t lane_permutation; - /* Selected SIMD clone's function info. First vector element - is SIMD clone's function decl, followed by a pair of trees (base + step) - for linear arguments (pair of NULLs for other arguments). */ - vec<tree> simd_clone_info; - tree vectype; /* Vectorized defs. */ vec<tree> vec_defs; @@ -305,12 +319,13 @@ struct _slp_tree { for loop vectorization. */ vect_memory_access_type memory_access_type; - /* The kind of operation as determined by analysis and a tagged - union with kind specific data. */ + /* The kind of operation as determined by analysis and optional + kind specific data. */ enum stmt_vec_info_type type; - union { - void *undef; - } u; + vect_data *data; + + template <class T> + T& get_data (T& else_) { return data ? *static_cast <T *> (data) : else_; } /* If not NULL this is a cached failed SLP discovery attempt with the lanes that failed during SLP discovery as 'false'. This is @@ -390,7 +405,6 @@ public: #define SLP_TREE_NUMBER_OF_VEC_STMTS(S) (S)->vec_stmts_size #define SLP_TREE_LOAD_PERMUTATION(S) (S)->load_permutation #define SLP_TREE_LANE_PERMUTATION(S) (S)->lane_permutation -#define SLP_TREE_SIMD_CLONE_INFO(S) (S)->simd_clone_info #define SLP_TREE_DEF_TYPE(S) (S)->def_type #define SLP_TREE_VECTYPE(S) (S)->vectype #define SLP_TREE_REPRESENTATIVE(S) (S)->representative @@ -1197,6 +1211,10 @@ public: || LOOP_REQUIRES_VERSIONING_FOR_NITERS (L) \ || LOOP_REQUIRES_VERSIONING_FOR_SIMD_IF_COND (L)) +#define LOOP_VINFO_USE_VERSIONING_WITHOUT_PEELING(L) \ + ((L)->may_misalign_stmts.length () > 0 \ + && !LOOP_VINFO_ALLOW_MUTUAL_ALIGNMENT (L)) + #define LOOP_VINFO_NITERS_KNOWN_P(L) \ (tree_fits_shwi_p ((L)->num_iters) && tree_to_shwi ((L)->num_iters) > 0) @@ -1434,10 +1452,6 @@ public: /* For both loads and stores. */ unsigned simd_lane_access_p : 3; - /* Classifies how the load or store is going to be implemented - for loop vectorization. */ - vect_memory_access_type memory_access_type; - /* For INTEGER_INDUC_COND_REDUCTION, the initial value to be used. */ tree induc_cond_initial_val; @@ -1561,9 +1575,6 @@ struct gather_scatter_info { being added to the base. */ int scale; - /* The definition type for the vectorized offset. */ - enum vect_def_type offset_dt; - /* The type of the vectorized offset. */ tree offset_vectype; @@ -1583,7 +1594,6 @@ struct gather_scatter_info { #define STMT_VINFO_DATA_REF(S) ((S)->dr_aux.dr + 0) #define STMT_VINFO_GATHER_SCATTER_P(S) (S)->gather_scatter_p #define STMT_VINFO_STRIDED_P(S) (S)->strided_p -#define STMT_VINFO_MEMORY_ACCESS_TYPE(S) (S)->memory_access_type #define STMT_VINFO_SIMD_LANE_ACCESS_P(S) (S)->simd_lane_access_p #define STMT_VINFO_VEC_INDUC_COND_INITIAL_VAL(S) (S)->induc_cond_initial_val #define STMT_VINFO_REDUC_EPILOGUE_ADJUSTMENT(S) (S)->reduc_epilogue_adjustment @@ -1717,6 +1727,7 @@ public: unsigned int total_cost () const; unsigned int suggested_unroll_factor () const; machine_mode suggested_epilogue_mode (int &masked) const; + bool costing_for_scalar () const { return m_costing_for_scalar; } protected: unsigned int record_stmt_cost (stmt_vec_info, vect_cost_model_location, @@ -1999,6 +2010,13 @@ add_stmt_cost (vector_costs *costs, int count, tree vectype, int misalign, enum vect_cost_model_location where) { + /* Even though a vector type might be set on stmt do not pass that on when + costing the scalar IL. A SLP node shouldn't have been recorded. */ + if (costs->costing_for_scalar ()) + { + vectype = NULL_TREE; + gcc_checking_assert (node == NULL); + } unsigned cost = costs->add_stmt_cost (count, kind, stmt_info, node, vectype, misalign, where); if (dump_file && (dump_flags & TDF_DETAILS)) @@ -2824,18 +2842,6 @@ vect_is_reduction (stmt_vec_info stmt_info) return STMT_VINFO_REDUC_IDX (stmt_info) >= 0; } -/* Returns the memory acccess type being used to vectorize the statement. If - SLP this is read from NODE, otherwise it's read from the STMT_VINFO. */ - -inline vect_memory_access_type -vect_mem_access_type (stmt_vec_info stmt_info, slp_tree node) -{ - if (node) - return SLP_TREE_MEMORY_ACCESS_TYPE (node); - else - return STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info); -} - /* If STMT_INFO describes a reduction, return the vect_reduction_type of the reduction it describes, otherwise return -1. */ inline int diff --git a/gcc/varasm.cc b/gcc/varasm.cc index 8266282..000ad9e 100644 --- a/gcc/varasm.cc +++ b/gcc/varasm.cc @@ -871,7 +871,7 @@ mergeable_string_section (tree decl ATTRIBUTE_UNUSED, if (HAVE_GAS_SHF_MERGE && flag_merge_constants && TREE_CODE (decl) == STRING_CST && TREE_CODE (TREE_TYPE (decl)) == ARRAY_TYPE - && align <= 256 + && align <= MAX_ALIGN_MERGABLE && (len = int_size_in_bytes (TREE_TYPE (decl))) > 0 && TREE_STRING_LENGTH (decl) == len) { @@ -885,7 +885,7 @@ mergeable_string_section (tree decl ATTRIBUTE_UNUSED, mode = SCALAR_INT_TYPE_MODE (TREE_TYPE (TREE_TYPE (decl))); modesize = GET_MODE_BITSIZE (mode); - if (modesize >= 8 && modesize <= 256 + if (modesize >= 8 && modesize <= MAX_ALIGN_MERGABLE && (modesize & (modesize - 1)) == 0) { if (align < modesize) @@ -919,16 +919,14 @@ mergeable_string_section (tree decl ATTRIBUTE_UNUSED, /* Return the section to use for constant merging. */ section * -mergeable_constant_section (machine_mode mode ATTRIBUTE_UNUSED, - unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED, - unsigned int flags ATTRIBUTE_UNUSED) +mergeable_constant_section (unsigned HOST_WIDE_INT size_bits, + unsigned HOST_WIDE_INT align, + unsigned int flags) { if (HAVE_GAS_SHF_MERGE && flag_merge_constants - && mode != VOIDmode - && mode != BLKmode - && known_le (GET_MODE_BITSIZE (mode), align) + && size_bits <= align && align >= 8 - && align <= 256 + && align <= MAX_ALIGN_MERGABLE && (align & (align - 1)) == 0) { const char *prefix = function_mergeable_rodata_prefix (); @@ -940,6 +938,38 @@ mergeable_constant_section (machine_mode mode ATTRIBUTE_UNUSED, } return readonly_data_section; } + + +/* Return the section to use for constant merging. Like the above + but the size stored as a tree. */ +static section * +mergeable_constant_section (tree size_bits, + unsigned HOST_WIDE_INT align, + unsigned int flags) +{ + if (!size_bits || !tree_fits_uhwi_p (size_bits)) + return readonly_data_section; + return mergeable_constant_section (tree_to_uhwi (size_bits), align, flags); +} + + +/* Return the section to use for constant merging. Like the above + but given a mode rather than the size. */ + +section * +mergeable_constant_section (machine_mode mode, + unsigned HOST_WIDE_INT align, + unsigned int flags) +{ + /* If the mode is unknown (BLK or VOID), then return a non mergable section. */ + if (mode == BLKmode || mode == VOIDmode) + return readonly_data_section; + unsigned HOST_WIDE_INT size; + if (!GET_MODE_BITSIZE (mode).is_constant (&size)) + return readonly_data_section; + return mergeable_constant_section (size, align, flags); +} + /* Given NAME, a putative register name, discard any customary prefixes. */ @@ -7453,7 +7483,7 @@ default_elf_select_section (tree decl, int reloc, case SECCAT_RODATA_MERGE_STR_INIT: return mergeable_string_section (DECL_INITIAL (decl), align, 0); case SECCAT_RODATA_MERGE_CONST: - return mergeable_constant_section (DECL_MODE (decl), align, 0); + return mergeable_constant_section (DECL_SIZE (decl), align, 0); case SECCAT_SRODATA: sname = ".sdata2"; break; @@ -2514,6 +2514,10 @@ public: return false; if (lhs.size () != rhs.size ()) return false; + /* Case where either is a NULL pointer and therefore, as both are valid, + both are empty slices with length 0. */ + if (lhs.size () == 0) + return true; return memcmp (lhs.begin (), rhs.begin (), lhs.size ()) == 0; } |