diff options
Diffstat (limited to 'gcc')
120 files changed, 4663 insertions, 289 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog index f970c13..e082958 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,281 @@ +2025-04-15 Sandra Loosemore <sloosemore@baylibre.com> + + PR tree-optimization/71094 + * doc/invoke.texi (Optimize Options): Document that -fivopts is + enabled at -O1 and higher. Add blurb about -O0 causing GCC to + completely ignore most optimization options. + +2025-04-15 Iain Sandoe <iain@sandoe.co.uk> + + * configure: Regenerate. + * configure.ac: Recognise PROJECT:ld-mmmm.nn.aa as an identifier + for Darwin's static linker. + +2025-04-15 Iain Sandoe <iainsandoe@mini-05-seq.local> + + PR target/116827 + * ginclude/stddef.h: Undefine __PTRDIFF_T and __SIZE_T for module- + enabled c++ on Darwin/macOS platforms. + +2025-04-15 Kyrylo Tkachov <ktkachov@nvidia.com> + + * common.opt.urls: Regenerate. + +2025-04-15 Jan Hubicka <hubicka@ucw.cz> + + * config/i386/x86-tune-sched.cc (ix86_issue_rate): Set + to 4 for znver5. + +2025-04-15 Jan Hubicka <hubicka@ucw.cz> + + PR target/119298 + * config/i386/x86-tune-costs.h (znver5_cost): Set ADDSS cost to 3. + +2025-04-15 Vineet Gupta <vineetg@rivosinc.com> + + PR target/119533 + * config/riscv/riscv-vsetvl.cc (invalid_opt_bb_p): Check for + EDGE_ABNOMAL. + (pre_vsetvl::compute_lcm_local_properties): Initialize kill + bitmap. + Debug dump skipped edge. + +2025-04-15 Robin Dapp <rdapp@ventanamicro.com> + + PR target/119547 + * config/riscv/riscv-vsetvl.cc (pre_vsetvl::earliest_fuse_vsetvl_info): + Do not perform lift if block is not transparent. + +2025-04-15 Kyrylo Tkachov <ktkachov@nvidia.com> + + * Makefile.in (OBJS): Add ipa-locality-cloning.o. + * cgraph.h (set_new_clone_decl_and_node_flags): Declare prototype. + * cgraphclones.cc (set_new_clone_decl_and_node_flags): Remove static + qualifier. + * common.opt (fipa-reorder-for-locality): New flag. + (LTO_PARTITION_DEFAULT): Declare. + (flto-partition): Change default to LTO_PARTITION_DFEAULT. + * doc/invoke.texi: Document -fipa-reorder-for-locality. + * flag-types.h (enum lto_locality_cloning_model): Declare. + (lto_partitioning_model): Add LTO_PARTITION_DEFAULT. + * lto-cgraph.cc (lto_set_symtab_encoder_in_partition): Add dumping of + node and index. + * opts.cc (validate_ipa_reorder_locality_lto_partition): Define. + (finish_options): Handle LTO_PARTITION_DEFAULT. + * params.opt (lto_locality_cloning_model): New enum. + (lto-partition-locality-cloning): New param. + (lto-partition-locality-frequency-cutoff): Likewise. + (lto-partition-locality-size-cutoff): Likewise. + (lto-max-locality-partition): Likewise. + * passes.def: Register pass_ipa_locality_cloning. + * timevar.def (TV_IPA_LC): New timevar. + * tree-pass.h (make_pass_ipa_locality_cloning): Declare. + * ipa-locality-cloning.cc: New file. + * ipa-locality-cloning.h: New file. + +2025-04-15 Martin Jambor <mjambor@suse.cz> + Jakub Jelinek <jakub@redhat.com> + + PR ipa/119803 + * ipa-cp.cc (ipcp_bits_lattice::meet_with_1): Move m_value adjustmed + according to m_mask below the adjustment of the latter according to + cap_mask. Optimize the calculation of cap_mask a bit. + (ipcp_bits_lattice::meet_with): Optimize the calculation of cap_mask a + bit. + +2025-04-15 Jakub Jelinek <jakub@redhat.com> + + * ipa-cp.cc (ipcp_print_widest_int): Print values with all ones in + bits 128+ with "0xf..f" prefix instead of "all ones folled by ". + Simplify wide_int check for -1 or all ones above least significant + 128 bits. + +2025-04-15 Jakub Jelinek <jakub@redhat.com> + + PR sanitizer/119801 + * sanitizer.def (BUILT_IN_TSAN_FUNC_EXIT): Use BT_FN_VOID rather + than BT_FN_VOID_PTR. + * tree-tailcall.cc: Include attribs.h and asan.h. + (struct tailcall): Add has_tsan_func_exit member. + (empty_eh_cleanup): Add eh_has_tsan_func_exit argument, set what + it points to to 1 if there is exactly one __tsan_func_exit call + and ignore that call otherwise. Adjust recursive call. + (find_tail_calls): Add RETRY_TSAN_FUNC_EXIT argument, pass it + to recursive calls. When seeing __tsan_func_exit call with + RETRY_TSAN_FUNC_EXIT 0, set it to -1. If RETRY_TSAN_FUNC_EXIT + is 1, initially ignore __tsan_func_exit calls. Adjust + empty_eh_cleanup caller. When looking through stmts after the call, + ignore exactly one __tsan_func_exit call but remember it in + t->has_tsan_func_exit. Diagnose if EH cleanups didn't have + __tsan_func_exit and normal path did or vice versa. + (optimize_tail_call): Emit __tsan_func_exit before the tail call + or tail recursion. + (tree_optimize_tail_calls_1): Adjust find_tail_calls callers. If + find_tail_calls changes retry_tsan_func_exit to -1, set it to 1 + and call it again with otherwise the same arguments. + +2025-04-15 Sandra Loosemore <sloosemore@baylibre.com> + + PR ipa/113203 + * doc/extend.texi (Common Function Attributes): Explain how to + use always_inline in programs that have multiple translation + units, and that LTO inlining additionally needs optimization + enabled. + +2025-04-15 liuhongt <hongtao.liu@intel.com> + + PR target/108134 + * doc/extend.texi: Remove documents from r11-344-g0fec3f62b9bfc0. + +2025-04-15 Sandra Loosemore <sloosemore@baylibre.com> + + PR target/42683 + * doc/invoke.texi (x86 Options): Clarify that -march=pentiumpro + doesn't include MMX. + +2025-04-14 Thomas Schwinge <tschwinge@baylibre.com> + + PR target/118794 + * config/gcn/gcn.opt (-mfake-exceptions): Support. + * config/nvptx/nvptx.opt (-mfake-exceptions): Likewise. + * config/gcn/gcn.md (define_expand "exception_receiver"): Use it. + * config/nvptx/nvptx.md (define_expand "exception_receiver"): + Likewise. + * config/gcn/mkoffload.cc (main): Set it. + * config/nvptx/mkoffload.cc (main): Likewise. + * config/nvptx/nvptx.cc (nvptx_assemble_integer) + <in_section == exception_section>: Special handling for + 'SYMBOL_REF's. + * except.cc (expand_dw2_landing_pad_for_region): Don't generate + bogus code for (default) + '#define EH_RETURN_DATA_REGNO(N) INVALID_REGNUM'. + +2025-04-14 Jakub Jelinek <jakub@redhat.com> + + PR rtl-optimization/119785 + * expmed.cc (init_expmed): Always pass QImode rather than mode to + set_src_cost passed to set_zero_cost. + +2025-04-14 Jakub Jelinek <jakub@redhat.com> + + PR tree-optimization/119718 + * tree-pretty-print.cc (dump_generic_node) <case CALL_EXPR>: Dump + also CALL_EXPR_MUST_TAIL_CALL flag. + * calls.cc (maybe_complain_about_tail_call): Emit error about + CALL_EXPR_MUST_TAIL_CALL only after emitting dump message, not before + it. + +2025-04-14 Arthur Cohen <arthur.cohen@embecosm.com> + + * doc/install.texi: Add requirements for building gccrs. + +2025-04-14 H.J. Lu <hjl.tools@gmail.com> + + PR target/119784 + * config/i386/i386.cc (ix86_using_red_zone): Don't use red-zone + with 32 GPRs and no caller-saved registers. + +2025-04-14 Martin Jambor <mjambor@suse.cz> + + PR ipa/118097 + * ipa-cp.cc (ipa_get_jf_arith_result): Require res_operand for + anything except NOP_EXPR or ADDR_EXPR, document it and remove the code + trying to deduce it. + (ipa_value_from_jfunc): Use the stored and streamed type of arithmetic + pass-through functions. + (ipa_agg_value_from_jfunc): Use the stored and streamed type of + arithmetic pass-through functions, convert to the type used to store + the value if necessary. + (get_val_across_arith_op): New parameter op_type, pass it to + ipa_get_jf_arith_result. + (propagate_vals_across_arith_jfunc): New parameter op_type, pass it to + get_val_across_arith_op. + (propagate_vals_across_pass_through): Use the stored and streamed type + of arithmetic pass-through functions. + (propagate_aggregate_lattice): Likewise. + (push_agg_values_for_index_from_edge): Use the stored and streamed + type of arithmetic pass-through functions, convert to the type used to + store the value if necessary. + +2025-04-14 Martin Jambor <mjambor@suse.cz> + + PR ipa/118785 + * ipa-cp.cc (ipa_vr_intersect_with_arith_jfunc): Use the stored + and streamed type of arithmetic pass-through functions. + +2025-04-14 Martin Jambor <mjambor@suse.cz> + + * ipa-cp.cc (ipcp_print_widest_int): Also add a truncated form of + dumping of widest ints which only have zeros in the lowest 128 bits. + Update the comment. + (ipcp_bits_lattice::print): Also dump the mask using + ipcp_print_widest_int. + (ipcp_store_vr_results): Likewise. + +2025-04-14 Martin Jambor <mjambor@suse.cz> + + PR ipa/119318 + * ipa-cp.cc (ipcp_bits_lattice::meet_with_1): Set all mask bits + not covered by precision to one. + (ipcp_bits_lattice::meet_with): Likewise. + (propagate_bits_across_jump_function): Use the stored operation + type to perform meet with other lattices. + +2025-04-14 Martin Jambor <mjambor@suse.cz> + + PR ipa/118097 + PR ipa/118785 + PR ipa/119318 + * lto-streamer.h (lto_variably_modified_type_p): Declare. + * ipa-prop.h (ipa_pass_through_data): New field op_type. + (ipa_get_jf_pass_through_op_type): New function. + * ipa-prop.cc: Include lto-streamer.h. + (ipa_dump_jump_function): Dump also pass-through + operation types, if any. Dump pass-through operands only if not NULL. + (ipa_set_jf_simple_pass_through): Set op_type accordingly. + (compute_complex_assign_jump_func): Set op_type of arithmetic + pass-through jump_functions. + (analyze_agg_content_value): Update lhs when walking assighment + copies. Set op_type of aggregate arithmetic pass-through + jump_functions. + (update_jump_functions_after_inlining): Also transfer the operation + type from the source arithmentic pass-through jump function to the + destination jump function. + (ipa_write_jump_function): Stream also the op_type when necessary. + (ipa_read_jump_function): Likewise. + (ipa_agg_pass_through_jf_equivalent_p): Also compare operation types. + * lto-streamer-out.cc (lto_variably_modified_type_p): Make public. + +2025-04-14 Richard Biener <rguenther@suse.de> + + PR tree-optimization/119757 + * tree-vect-slp.cc (vect_build_slp_tree_1): Record and compare + whether a stmt uses a maks. + +2025-04-14 Richard Biener <rguenther@suse.de> + + PR tree-optimization/119778 + * tree-inline.cc (copy_edges_for_bb): Mark calls that are + source of abnormal edges as altering control-flow. + +2025-04-14 Gaius Mulley <gaiusmod2@gmail.com> + + PR modula2/119779 + * doc/gm2.texi (Interface to assembly language): Use eax + rather than rax in both examples. + +2025-04-14 Jakub Jelinek <jakub@redhat.com> + + PR driver/119727 + * configure.ac (HOST_HAS_PERSONALITY_ADDR_NO_RANDOMIZE): New check. + * gcc.cc: Include sys/personality.h if + HOST_HAS_PERSONALITY_ADDR_NO_RANDOMIZE is defined. + (try_generate_repro): Call + personality (personality (0xffffffffU) | ADDR_NO_RANDOMIZE) + if HOST_HAS_PERSONALITY_ADDR_NO_RANDOMIZE is defined. + * config.in: Regenerate. + * configure: Regenerate. + 2025-04-13 Stefan Schulze Frielinghaus <stefansf@gcc.gnu.org> * config/s390/s390.cc: Add z17 scheduler description. diff --git a/gcc/DATESTAMP b/gcc/DATESTAMP index 1e2b10c..c9d404d 100644 --- a/gcc/DATESTAMP +++ b/gcc/DATESTAMP @@ -1 +1 @@ -20250414 +20250416 diff --git a/gcc/Makefile.in b/gcc/Makefile.in index ebfcd8a..55b4cd7 100644 --- a/gcc/Makefile.in +++ b/gcc/Makefile.in @@ -1555,6 +1555,7 @@ OBJS = \ incpath.o \ init-regs.o \ internal-fn.o \ + ipa-locality-cloning.o \ ipa-cp.o \ ipa-sra.o \ ipa-devirt.o \ @@ -3026,6 +3027,7 @@ GTFILES = $(CPPLIB_H) $(srcdir)/input.h $(srcdir)/coretypes.h \ $(srcdir)/ipa-param-manipulation.h $(srcdir)/ipa-sra.cc \ $(srcdir)/ipa-modref.h $(srcdir)/ipa-modref.cc \ $(srcdir)/ipa-modref-tree.h \ + $(srcdir)/ipa-locality-cloning.cc \ $(srcdir)/signop.h \ $(srcdir)/diagnostic-spec.h $(srcdir)/diagnostic-spec.cc \ $(srcdir)/dwarf2out.h \ diff --git a/gcc/c/ChangeLog b/gcc/c/ChangeLog index dec9f16..f75a0f6 100644 --- a/gcc/c/ChangeLog +++ b/gcc/c/ChangeLog @@ -1,3 +1,9 @@ +2025-04-15 Qing Zhao <qing.zhao@oracle.com> + + PR c/119717 + * c-typeck.cc (build_access_with_size_for_counted_by): Fully fold the + parameters for call to .ACCESS_WITH_SIZE. + 2025-04-08 Martin Uecker <uecker@tugraz.at> PR c/119612 diff --git a/gcc/c/c-typeck.cc b/gcc/c/c-typeck.cc index 3870e8a..55d896e 100644 --- a/gcc/c/c-typeck.cc +++ b/gcc/c/c-typeck.cc @@ -3013,12 +3013,16 @@ build_access_with_size_for_counted_by (location_t loc, tree ref, gcc_assert (c_flexible_array_member_type_p (TREE_TYPE (ref))); /* The result type of the call is a pointer to the flexible array type. */ tree result_type = c_build_pointer_type (TREE_TYPE (ref)); + tree first_param + = c_fully_fold (array_to_pointer_conversion (loc, ref), false, NULL); + tree second_param + = c_fully_fold (counted_by_ref, false, NULL); tree call = build_call_expr_internal_loc (loc, IFN_ACCESS_WITH_SIZE, result_type, 6, - array_to_pointer_conversion (loc, ref), - counted_by_ref, + first_param, + second_param, build_int_cst (integer_type_node, 1), build_int_cst (counted_by_type, 0), build_int_cst (integer_type_node, -1), diff --git a/gcc/cgraph.h b/gcc/cgraph.h index 065fcc7..abde770 100644 --- a/gcc/cgraph.h +++ b/gcc/cgraph.h @@ -2627,6 +2627,7 @@ void tree_function_versioning (tree, tree, vec<ipa_replace_map *, va_gc> *, void dump_callgraph_transformation (const cgraph_node *original, const cgraph_node *clone, const char *suffix); +void set_new_clone_decl_and_node_flags (cgraph_node *new_node); /* In cgraphbuild.cc */ int compute_call_stmt_bb_frequency (tree, basic_block bb); void record_references_in_initializer (tree, bool); diff --git a/gcc/cgraphclones.cc b/gcc/cgraphclones.cc index 5332a43..e6223fa 100644 --- a/gcc/cgraphclones.cc +++ b/gcc/cgraphclones.cc @@ -158,7 +158,7 @@ cgraph_edge::clone (cgraph_node *n, gcall *call_stmt, unsigned stmt_uid, /* Set flags of NEW_NODE and its decl. NEW_NODE is a newly created private clone or its thunk. */ -static void +void set_new_clone_decl_and_node_flags (cgraph_node *new_node) { DECL_EXTERNAL (new_node->decl) = 0; diff --git a/gcc/cobol/ChangeLog b/gcc/cobol/ChangeLog index 07c687c..59a1107 100644 --- a/gcc/cobol/ChangeLog +++ b/gcc/cobol/ChangeLog @@ -1,3 +1,33 @@ +2025-04-15 Richard Biener <rguenther@suse.de> + + PR cobol/119302 + * Make-lang.in (GCOBOLIO_INSTALL_NAME): Define. + Use $(GCOBOLIO_INSTALL_NAME) for gcobol.3 manpage source + upon install. + +2025-04-14 Jakub Jelinek <jakub@redhat.com> + + PR cobol/119776 + * lang.opt (fmax-errors): Remove. + * lang.opt.urls: Regenerate. + * cobol1.cc (cobol_langhook_handle_option) <case OPT_fmax_errors>: + Remove. + * gcobol.1: Document -fmax-errors=nerror rather than + -fmax-errors nerror. + +2025-04-14 Jakub Jelinek <jakub@redhat.com> + + PR cobol/119777 + * lang.opt (include): Remove Var(cobol_include). + * cobol1.cc (cobol_langhook_handle_option) <case OPT_include>: Use + arg instead of cobol_include. + +2025-04-14 Jakub Jelinek <jakub@redhat.com> + + PR cobol/119777 + * lang.opt (fsyntax-only): Remove. + * lang.opt.urls: Regenerate. + 2025-04-13 Rainer Orth <ro@CeBiTec.Uni-Bielefeld.DE> Simon Sobisch <simonsobisch@gnu.org> diff --git a/gcc/cobol/Make-lang.in b/gcc/cobol/Make-lang.in index 422ebe2..9b74dd3 100644 --- a/gcc/cobol/Make-lang.in +++ b/gcc/cobol/Make-lang.in @@ -35,6 +35,7 @@ # - define the names for selecting the language in LANGUAGES. GCOBOL_INSTALL_NAME := $(shell echo gcobol|sed '$(program_transform_name)') +GCOBOLIO_INSTALL_NAME := $(shell echo gcobol-io|sed '$(program_transform_name)') GCOBOL_TARGET_INSTALL_NAME := $(target_noncanonical)-$(shell echo gcobol|sed '$(program_transform_name)') GCOBC_INSTALL_NAME := $(shell echo gcobc|sed '$(program_transform_name)') @@ -293,7 +294,7 @@ cobol.install-common: installdirs cobol.install-man: installdirs $(INSTALL_DATA) $(srcdir)/cobol/gcobol.1 $(DESTDIR)$(man1dir)/$(GCOBOL_INSTALL_NAME)$(man1ext) - $(INSTALL_DATA) $(srcdir)/cobol/gcobol.3 $(DESTDIR)$(man3dir)/ + $(INSTALL_DATA) $(srcdir)/cobol/gcobol.3 $(DESTDIR)$(man3dir)/$(GCOBOLIO_INSTALL_NAME)$(man3ext) cobol.install-info: @@ -342,8 +343,8 @@ cobol.uninstall: rm -rf $(DESTDIR)$(bindir)/$(GCOBOL_INSTALL_NAME)$(exeext) \ $(DESTDIR)$(bindir)/$(GCOBC_INSTALL_NAME) \ $(DESTDIR)$(datadir)/gcobol/ \ - $(DESTDIR)$(man1dir)/$(GCOBOL_INSTALL_NAME).1 \ - $(DESTDIR)$(man3dir)/gcobol.3 + $(DESTDIR)$(man1dir)/$(GCOBOL_INSTALL_NAME)$(man1ext) \ + $(DESTDIR)$(man3dir)/$(GCOBOLIO_INSTALL_NAME)$(man3ext) cobol.man: cobol.srcman: diff --git a/gcc/common.opt b/gcc/common.opt index 2c8fdde..88d987e 100644 --- a/gcc/common.opt +++ b/gcc/common.opt @@ -2116,6 +2116,10 @@ fipa-modref Common Var(flag_ipa_modref) Optimization Perform interprocedural modref analysis. +fipa-reorder-for-locality +Common Var(flag_ipa_reorder_for_locality) Init(0) Optimization +Perform reordering and cloning of functions to maximize locality. + fipa-profile Common Var(flag_ipa_profile) Init(0) Optimization Perform interprocedural profile propagation. @@ -2275,6 +2279,9 @@ Enum Name(lto_partition_model) Type(enum lto_partition_model) UnknownError(unknown LTO partitioning model %qs) EnumValue +Enum(lto_partition_model) String(default) Value(LTO_PARTITION_DEFAULT) + +EnumValue Enum(lto_partition_model) String(none) Value(LTO_PARTITION_NONE) EnumValue @@ -2293,7 +2300,7 @@ EnumValue Enum(lto_partition_model) String(cache) Value(LTO_PARTITION_CACHE) flto-partition= -Common Joined RejectNegative Enum(lto_partition_model) Var(flag_lto_partition) Init(LTO_PARTITION_BALANCED) +Common Joined RejectNegative Enum(lto_partition_model) Var(flag_lto_partition) Init(LTO_PARTITION_DEFAULT) Specify the algorithm to partition symbols and vars at linktime. ; The initial value of -1 comes from Z_DEFAULT_COMPRESSION in zlib.h. diff --git a/gcc/common.opt.urls b/gcc/common.opt.urls index a4b14f5..8bd75b1 100644 --- a/gcc/common.opt.urls +++ b/gcc/common.opt.urls @@ -868,6 +868,9 @@ UrlSuffix(gcc/Optimize-Options.html#index-fipa-bit-cp) fipa-modref UrlSuffix(gcc/Optimize-Options.html#index-fipa-modref) +fipa-reorder-for-locality +UrlSuffix(gcc/Optimize-Options.html#index-fipa-reorder-for-locality) + fipa-profile UrlSuffix(gcc/Optimize-Options.html#index-fipa-profile) diff --git a/gcc/config/aarch64/aarch64-sve.md b/gcc/config/aarch64/aarch64-sve.md index 3dbd659..d4af370 100644 --- a/gcc/config/aarch64/aarch64-sve.md +++ b/gcc/config/aarch64/aarch64-sve.md @@ -3133,9 +3133,9 @@ "TARGET_SVE" { rtx tmp = gen_reg_rtx (<MODE>mode); - emit_insn (gen_vcond_mask_<mode><vpred> (tmp, operands[1], - CONST1_RTX (<MODE>mode), - CONST0_RTX (<MODE>mode))); + emit_insn (gen_vcond_mask_<mode><vpred> (tmp, CONST1_RTX (<MODE>mode), + CONST0_RTX (<MODE>mode), + operands[1])); emit_insn (gen_vec_extract<mode><Vel> (operands[0], tmp, operands[2])); DONE; } diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc index 4e80114..433ec97 100644 --- a/gcc/config/aarch64/aarch64.cc +++ b/gcc/config/aarch64/aarch64.cc @@ -31073,8 +31073,6 @@ aarch64_valid_sysreg_name_p (const char *regname) const sysreg_t *sysreg = aarch64_lookup_sysreg_map (regname); if (sysreg == NULL) return aarch64_is_implem_def_reg (regname); - if (sysreg->arch_reqs) - return bool (aarch64_isa_flags & sysreg->arch_reqs); return true; } @@ -31098,8 +31096,6 @@ aarch64_retrieve_sysreg (const char *regname, bool write_p, bool is128op) if ((write_p && (sysreg->properties & F_REG_READ)) || (!write_p && (sysreg->properties & F_REG_WRITE))) return NULL; - if ((~aarch64_isa_flags & sysreg->arch_reqs) != 0) - return NULL; return sysreg->encoding; } diff --git a/gcc/config/gcn/gcn.md b/gcc/config/gcn/gcn.md index 695656f..e0fb735 100644 --- a/gcc/config/gcn/gcn.md +++ b/gcc/config/gcn/gcn.md @@ -1018,7 +1018,9 @@ [(const_int 0)] "" { - sorry ("exception handling not supported"); + if (!fake_exceptions) + sorry ("exception handling not supported"); + DONE; }) ;; }}} diff --git a/gcc/config/gcn/gcn.opt b/gcc/config/gcn/gcn.opt index 142b439..99d6aeb 100644 --- a/gcc/config/gcn/gcn.opt +++ b/gcc/config/gcn/gcn.opt @@ -101,3 +101,11 @@ Enum(gcn_preferred_vectorization_factor) String(32) Value(32) EnumValue Enum(gcn_preferred_vectorization_factor) String(64) Value(64) + +mfake-exceptions +Target Var(fake_exceptions) Init(0) Undocumented +; With '-mfake-exceptions' enabled, the user-visible behavior in presence of +; exception handling constructs changes such that the compile-time +; 'sorry, unimplemented: exception handling not supported' is skipped, code +; generation proceeds, and instead, exception handling constructs 'abort' at +; run time. (..., or don't, if they're in dead code.) diff --git a/gcc/config/gcn/mkoffload.cc b/gcc/config/gcn/mkoffload.cc index f5b89c9..b284ff4 100644 --- a/gcc/config/gcn/mkoffload.cc +++ b/gcc/config/gcn/mkoffload.cc @@ -1160,6 +1160,9 @@ main (int argc, char **argv) obstack_ptr_grow (&cc_argv_obstack, "-xlto"); if (fopenmp) obstack_ptr_grow (&cc_argv_obstack, "-mgomp"); + /* The host code may contain exception handling constructs. + Handle these as good as we can. */ + obstack_ptr_grow (&cc_argv_obstack, "-mfake-exceptions"); for (int ix = 1; ix != argc; ix++) { diff --git a/gcc/config/i386/x86-tune-costs.h b/gcc/config/i386/x86-tune-costs.h index 7c8cb73..9477345 100644 --- a/gcc/config/i386/x86-tune-costs.h +++ b/gcc/config/i386/x86-tune-costs.h @@ -2120,7 +2120,7 @@ struct processor_costs znver5_cost = { COSTS_N_INSNS (1), /* cost of cheap SSE instruction. */ /* ADDSS has throughput 2 and latency 2 (in some cases when source is another addition). */ - COSTS_N_INSNS (2), /* cost of ADDSS/SD SUBSS/SD insns. */ + COSTS_N_INSNS (3), /* cost of ADDSS/SD SUBSS/SD insns. */ /* MULSS has throughput 2 and latency 3. */ COSTS_N_INSNS (3), /* cost of MULSS instruction. */ COSTS_N_INSNS (3), /* cost of MULSD instruction. */ diff --git a/gcc/config/i386/x86-tune-sched.cc b/gcc/config/i386/x86-tune-sched.cc index 685a83c..15d3d91 100644 --- a/gcc/config/i386/x86-tune-sched.cc +++ b/gcc/config/i386/x86-tune-sched.cc @@ -81,6 +81,14 @@ ix86_issue_rate (void) case PROCESSOR_YONGFENG: case PROCESSOR_SHIJIDADAO: case PROCESSOR_GENERIC: + /* For znver5 decoder can handle 4 or 8 instructions per cycle, + op cache 12 instruction/cycle, dispatch 8 instructions + integer rename 8 instructions and Fp 6 instructions. + + The scheduler, without understanding out of order nature of the CPU + is not going to be able to use more than 4 instructions since that + is limits of the decoders. */ + case PROCESSOR_ZNVER5: return 4; case PROCESSOR_ICELAKE_CLIENT: @@ -91,13 +99,6 @@ ix86_issue_rate (void) return 5; case PROCESSOR_SAPPHIRERAPIDS: - /* For znver5 decoder can handle 4 or 8 instructions per cycle, - op cache 12 instruction/cycle, dispatch 8 instructions - integer rename 8 instructions and Fp 6 instructions. - - The scheduler, without understanding out of order nature of the CPU - is unlikely going to be able to fill all of these. */ - case PROCESSOR_ZNVER5: return 6; default: diff --git a/gcc/config/nvptx/mkoffload.cc b/gcc/config/nvptx/mkoffload.cc index bdfe7f5..e7ec0ef 100644 --- a/gcc/config/nvptx/mkoffload.cc +++ b/gcc/config/nvptx/mkoffload.cc @@ -778,6 +778,9 @@ main (int argc, char **argv) } if (fopenmp) obstack_ptr_grow (&argv_obstack, "-mgomp"); + /* The host code may contain exception handling constructs. + Handle these as good as we can. */ + obstack_ptr_grow (&argv_obstack, "-mfake-exceptions"); for (int ix = 1; ix != argc; ix++) { diff --git a/gcc/config/nvptx/nvptx.cc b/gcc/config/nvptx/nvptx.cc index 87364bf..28da43c 100644 --- a/gcc/config/nvptx/nvptx.cc +++ b/gcc/config/nvptx/nvptx.cc @@ -2359,7 +2359,25 @@ nvptx_assemble_integer (rtx x, unsigned int size, int ARG_UNUSED (aligned_p)) { gcc_checking_assert (!init_frag.active); /* Just use the default machinery; it's not getting used, anyway. */ - return default_assemble_integer (x, size, aligned_p); + bool ok = default_assemble_integer (x, size, aligned_p); + /* ..., but a few cases need special handling. */ + switch (GET_CODE (x)) + { + case SYMBOL_REF: + /* The default machinery won't work: we don't define the necessary + operations; don't use them outside of this. */ + gcc_checking_assert (!ok); + { + /* Just emit something; it's not getting used, anyway. */ + const char *op = "\t.symbol_ref\t"; + ok = (assemble_integer_with_op (op, x), true); + } + break; + + default: + break; + } + return ok; } gcc_checking_assert (init_frag.active); diff --git a/gcc/config/nvptx/nvptx.md b/gcc/config/nvptx/nvptx.md index 3201247..7c3bd69 100644 --- a/gcc/config/nvptx/nvptx.md +++ b/gcc/config/nvptx/nvptx.md @@ -1644,7 +1644,9 @@ [(const_int 0)] "" { - sorry ("exception handling not supported"); + if (!fake_exceptions) + sorry ("exception handling not supported"); + DONE; }) (define_expand "nonlocal_goto" diff --git a/gcc/config/nvptx/nvptx.opt b/gcc/config/nvptx/nvptx.opt index 9be81ae..ce9fbc7 100644 --- a/gcc/config/nvptx/nvptx.opt +++ b/gcc/config/nvptx/nvptx.opt @@ -168,6 +168,14 @@ Target Var(nvptx_alias) Init(0) Undocumented mexperimental Target Var(nvptx_experimental) Init(0) Undocumented +mfake-exceptions +Target Var(fake_exceptions) Init(0) Undocumented +; With '-mfake-exceptions' enabled, the user-visible behavior in presence of +; exception handling constructs changes such that the compile-time +; 'sorry, unimplemented: exception handling not supported' is skipped, code +; generation proceeds, and instead, exception handling constructs 'abort' at +; run time. (..., or don't, if they're in dead code.) + mfake-ptx-alloca Target Var(nvptx_fake_ptx_alloca) Init(0) Undocumented ; With '-mfake-ptx-alloca' enabled, the user-visible behavior changes only diff --git a/gcc/config/riscv/riscv-vsetvl.cc b/gcc/config/riscv/riscv-vsetvl.cc index 0ac2538..a8c9256 100644 --- a/gcc/config/riscv/riscv-vsetvl.cc +++ b/gcc/config/riscv/riscv-vsetvl.cc @@ -685,7 +685,7 @@ invalid_opt_bb_p (basic_block cfg_bb) /* We only do LCM optimizations on blocks that are post dominated by EXIT block, that is, we don't do LCM optimizations on infinite loop. */ FOR_EACH_EDGE (e, ei, cfg_bb->succs) - if (e->flags & EDGE_FAKE) + if ((e->flags & EDGE_FAKE) || (e->flags & EDGE_ABNORMAL)) return true; return false; @@ -2698,6 +2698,7 @@ pre_vsetvl::compute_lcm_local_properties () m_avout = sbitmap_vector_alloc (last_basic_block_for_fn (cfun), num_exprs); bitmap_vector_clear (m_avloc, last_basic_block_for_fn (cfun)); + bitmap_vector_clear (m_kill, last_basic_block_for_fn (cfun)); bitmap_vector_clear (m_antloc, last_basic_block_for_fn (cfun)); bitmap_vector_ones (m_transp, last_basic_block_for_fn (cfun)); @@ -2749,6 +2750,10 @@ pre_vsetvl::compute_lcm_local_properties () if (invalid_opt_bb_p (bb->cfg_bb ())) { + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, "\n --- skipping bb %u due to weird edge", + bb->index ()); + bitmap_clear (m_antloc[bb_index]); bitmap_clear (m_transp[bb_index]); } @@ -3022,6 +3027,18 @@ pre_vsetvl::earliest_fuse_vsetvl_info (int iter) continue; } + /* We cannot lift a vsetvl into the source block if the block is + not transparent WRT to it. + This is too restrictive for blocks where a register's use only + feeds into vsetvls and no regular insns. One example is the + test rvv/vsetvl/avl_single-68.c which is currently XFAILed for + that reason. + In order to support this case we'd need to check the vsetvl's + AVL operand's uses in the source block and make sure they are + only used in other vsetvls. */ + if (!bitmap_bit_p (m_transp[eg->src->index], expr_index)) + continue; + if (dump_file && (dump_flags & TDF_DETAILS)) { fprintf (dump_file, diff --git a/gcc/configure b/gcc/configure index 821f8b4..1696595 100755 --- a/gcc/configure +++ b/gcc/configure @@ -3948,7 +3948,7 @@ if test x"${DEFAULT_LINKER+set}" = x"set"; then as_fn_error $? "cannot execute: $DEFAULT_LINKER: check --with-ld or env. var. DEFAULT_LINKER" "$LINENO" 5 elif $DEFAULT_LINKER -v < /dev/null 2>&1 | grep GNU > /dev/null; then gnu_ld_flag=yes - elif $DEFAULT_LINKER -v < /dev/null 2>&1 | grep ld64- > /dev/null; then + elif $DEFAULT_LINKER -v < /dev/null 2>&1 | grep 'PROJECT:ld\(64\)*-' > /dev/null; then ld64_flag=yes fi @@ -32730,8 +32730,9 @@ $as_echo "$gcc_cv_ld64_major" >&6; } { $as_echo "$as_me:${as_lineno-$LINENO}: checking linker version" >&5 $as_echo_n "checking linker version... " >&6; } if test x"${gcc_cv_ld64_version}" = x; then - gcc_cv_ld64_version=`$gcc_cv_ld -v 2>&1 | $EGREP 'ld64|dyld' \ - | sed -e 's/.*ld64-//' -e 's/.*dyld-//'| awk '{print $1}'` + gcc_cv_ld64_version=`$gcc_cv_ld -v 2>&1 | $EGREP 'ld64|dyld|PROJECT:ld' \ + | sed -e 's/.*ld64-//' -e 's/.*dyld-//' -e 's/.*PROJECT:ld-//' \ + | awk '{print $1}'` fi { $as_echo "$as_me:${as_lineno-$LINENO}: result: $gcc_cv_ld64_version" >&5 $as_echo "$gcc_cv_ld64_version" >&6; } diff --git a/gcc/configure.ac b/gcc/configure.ac index 3d0a4e6..9f67e62 100644 --- a/gcc/configure.ac +++ b/gcc/configure.ac @@ -358,7 +358,7 @@ if test x"${DEFAULT_LINKER+set}" = x"set"; then AC_MSG_ERROR([cannot execute: $DEFAULT_LINKER: check --with-ld or env. var. DEFAULT_LINKER]) elif $DEFAULT_LINKER -v < /dev/null 2>&1 | grep GNU > /dev/null; then gnu_ld_flag=yes - elif $DEFAULT_LINKER -v < /dev/null 2>&1 | grep ld64- > /dev/null; then + elif $DEFAULT_LINKER -v < /dev/null 2>&1 | grep 'PROJECT:ld\(64\)*-' > /dev/null; then ld64_flag=yes fi AC_DEFINE_UNQUOTED(DEFAULT_LINKER,"$DEFAULT_LINKER", @@ -6418,8 +6418,9 @@ if test x"$ld64_flag" = x"yes"; then # If the version was not specified, try to find it. AC_MSG_CHECKING(linker version) if test x"${gcc_cv_ld64_version}" = x; then - gcc_cv_ld64_version=`$gcc_cv_ld -v 2>&1 | $EGREP 'ld64|dyld' \ - | sed -e 's/.*ld64-//' -e 's/.*dyld-//'| awk '{print $1}'` + gcc_cv_ld64_version=`$gcc_cv_ld -v 2>&1 | $EGREP 'ld64|dyld|PROJECT:ld' \ + | sed -e 's/.*ld64-//' -e 's/.*dyld-//' -e 's/.*PROJECT:ld-//' \ + | awk '{print $1}'` fi AC_MSG_RESULT($gcc_cv_ld64_version) diff --git a/gcc/cp/ChangeLog b/gcc/cp/ChangeLog index e9e80f8..aa2f076 100644 --- a/gcc/cp/ChangeLog +++ b/gcc/cp/ChangeLog @@ -1,3 +1,35 @@ +2025-04-15 Nathaniel Shead <nathanieloshead@gmail.com> + + PR c++/119755 + * lambda.cc (prune_lambda_captures): Remove pruned capture from + function's BLOCK_VARS and BIND_EXPR_VARS. + +2025-04-15 Jason Merrill <jason@redhat.com> + + PR c++/111075 + * constexpr.cc (cxx_eval_call_expression): Allow trivial + call from a thunk. + +2025-04-15 Patrick Palka <ppalka@redhat.com> + + PR c++/119807 + PR c++/112288 + * pt.cc (tsubst_friend_function): Skip remapping an + existing specialization if it doesn't match the shape of + the new friend definition. + +2025-04-15 Jason Merrill <jason@redhat.com> + + PR c++/113835 + * constexpr.cc (cxx_eval_outermost_constant_expr): Bail out early + for std::vector(N). + +2025-04-14 Patrick Palka <ppalka@redhat.com> + + PR c++/99214 + * constraint.cc (satisfy_declaration_constraints): Pass the + original ARGS to push_tinst_level. + 2025-04-13 Patrick Palka <ppalka@redhat.com> PR c++/115639 diff --git a/gcc/cp/constexpr.cc b/gcc/cp/constexpr.cc index 7e37582..4346b29 100644 --- a/gcc/cp/constexpr.cc +++ b/gcc/cp/constexpr.cc @@ -3103,6 +3103,9 @@ cxx_eval_call_expression (const constexpr_ctx *ctx, tree t, we can only get a trivial function here with -fno-elide-constructors. */ gcc_checking_assert (!trivial_fn_p (fun) || !flag_elide_constructors + /* Or it's a call from maybe_thunk_body (111075). */ + || (TREE_CODE (t) == CALL_EXPR ? CALL_FROM_THUNK_P (t) + : AGGR_INIT_FROM_THUNK_P (t)) /* We don't elide constructors when processing a noexcept-expression. */ || cp_noexcept_operand); @@ -9127,6 +9130,15 @@ cxx_eval_outermost_constant_expr (tree t, bool allow_non_constant, tree fndecl = cp_get_callee_fndecl_nofold (x); if (fndecl && DECL_IMMEDIATE_FUNCTION_P (fndecl)) is_consteval = true; + /* Don't try to evaluate a std::vector constructor taking an integer, it + will fail in the 'if (heap_var)' block below after doing all the work + (c++/113835). This will need adjustment if P3554 is accepted. Note + that evaluation of e.g. the vector default constructor can succeed, so + we don't shortcut all vector constructors. */ + if (fndecl && DECL_CONSTRUCTOR_P (fndecl) && allow_non_constant + && is_std_class (type, "vector") && call_expr_nargs (x) > 1 + && TREE_CODE (TREE_TYPE (get_nth_callarg (x, 1))) == INTEGER_TYPE) + return t; } if (AGGREGATE_TYPE_P (type) || VECTOR_TYPE_P (type)) { diff --git a/gcc/cp/lambda.cc b/gcc/cp/lambda.cc index f0a54b6..b2e0ecd 100644 --- a/gcc/cp/lambda.cc +++ b/gcc/cp/lambda.cc @@ -1858,6 +1858,13 @@ prune_lambda_captures (tree body) cp_walk_tree_without_duplicates (&body, mark_const_cap_r, &const_vars); + tree bind_expr = expr_single (DECL_SAVED_TREE (lambda_function (lam))); + if (bind_expr && TREE_CODE (bind_expr) == MUST_NOT_THROW_EXPR) + bind_expr = expr_single (TREE_OPERAND (bind_expr, 0)); + /* FIXME: We don't currently handle noexcept lambda captures correctly, + so bind_expr may not be set; see PR c++/119764. */ + gcc_assert (!bind_expr || TREE_CODE (bind_expr) == BIND_EXPR); + tree *fieldp = &TYPE_FIELDS (LAMBDA_EXPR_CLOSURE (lam)); for (tree *capp = &LAMBDA_EXPR_CAPTURE_LIST (lam); *capp; ) { @@ -1879,6 +1886,23 @@ prune_lambda_captures (tree body) fieldp = &DECL_CHAIN (*fieldp); *fieldp = DECL_CHAIN (*fieldp); + /* And out of the bindings for the function. */ + tree *blockp = &BLOCK_VARS (current_binding_level->blocks); + while (*blockp != DECL_EXPR_DECL (**use)) + blockp = &DECL_CHAIN (*blockp); + *blockp = DECL_CHAIN (*blockp); + + /* And maybe out of the vars declared in the containing + BIND_EXPR, if it's listed there. */ + if (bind_expr) + { + tree *bindp = &BIND_EXPR_VARS (bind_expr); + while (*bindp && *bindp != DECL_EXPR_DECL (**use)) + bindp = &DECL_CHAIN (*bindp); + if (*bindp) + *bindp = DECL_CHAIN (*bindp); + } + /* And remove the capture proxy declaration. */ **use = void_node; continue; diff --git a/gcc/cp/pt.cc b/gcc/cp/pt.cc index b7060b4..4349b19 100644 --- a/gcc/cp/pt.cc +++ b/gcc/cp/pt.cc @@ -11772,6 +11772,10 @@ tsubst_friend_function (tree decl, tree args) elt.args = DECL_TI_ARGS (spec); elt.spec = NULL_TREE; + if (TMPL_ARGS_HAVE_MULTIPLE_LEVELS (DECL_TI_ARGS (spec)) + && !is_specialization_of_friend (spec, new_template)) + continue; + decl_specializations->remove_elt (&elt); tree& spec_args = DECL_TI_ARGS (spec); diff --git a/gcc/d/ChangeLog b/gcc/d/ChangeLog index b0a4f12..b025453 100644 --- a/gcc/d/ChangeLog +++ b/gcc/d/ChangeLog @@ -1,3 +1,22 @@ +2025-04-15 Iain Buclaw <ibuclaw@gdcproject.org> + + PR d/119826 + * types.cc (TypeVisitor::visit (TypeEnum *)): Propagate flags of main + enum types to all forward-referenced variants. + +2025-04-15 Iain Buclaw <ibuclaw@gdcproject.org> + + PR d/119799 + * decl.cc (DeclVisitor::visit (VarDeclaration *)): Check front-end + type size before building the VAR_DECL. Allow C symbols to have a + size of `0'. + +2025-04-15 Iain Buclaw <ibuclaw@gdcproject.org> + + PR d/119817 + * imports.cc (ImportVisitor::visit (OverloadSet *)): Don't push + NULL_TREE to vector of import symbols. + 2025-04-12 Iain Buclaw <ibuclaw@gdcproject.org> PR d/109023 diff --git a/gcc/d/decl.cc b/gcc/d/decl.cc index 136f78b..9ddf7cf 100644 --- a/gcc/d/decl.cc +++ b/gcc/d/decl.cc @@ -791,6 +791,12 @@ public: } else if (d->isDataseg ()) { + /* When the front-end type size is invalid, an error has already been + given for the declaration or type. */ + dinteger_t size = dmd::size (d->type, d->loc); + if (size == SIZE_INVALID) + return; + tree decl = get_symbol_decl (d); /* Only need to build the VAR_DECL for extern declarations. */ @@ -804,9 +810,7 @@ public: return; /* How big a symbol can be should depend on back-end. */ - tree size = build_integer_cst (dmd::size (d->type, d->loc), - build_ctype (Type::tsize_t)); - if (!valid_constant_size_p (size)) + if (!valid_constant_size_p (build_integer_cst (size, size_type_node))) { error_at (make_location_t (d->loc), "size is too large"); return; @@ -835,8 +839,9 @@ public: } /* Frontend should have already caught this. */ - gcc_assert (!integer_zerop (size) - || d->type->toBasetype ()->isTypeSArray ()); + gcc_assert ((size != 0 && size != SIZE_INVALID) + || d->type->toBasetype ()->isTypeSArray () + || d->isCsymbol ()); d_finish_decl (decl); diff --git a/gcc/d/imports.cc b/gcc/d/imports.cc index 776caaf..16e4df6 100644 --- a/gcc/d/imports.cc +++ b/gcc/d/imports.cc @@ -182,7 +182,11 @@ public: vec_alloc (tset, d->a.length); for (size_t i = 0; i < d->a.length; i++) - vec_safe_push (tset, build_import_decl (d->a[i])); + { + tree overload = build_import_decl (d->a[i]); + if (overload != NULL_TREE) + vec_safe_push (tset, overload); + } this->result_ = build_tree_list_vec (tset); tset->truncate (0); diff --git a/gcc/d/types.cc b/gcc/d/types.cc index e43fa88..1c74840 100644 --- a/gcc/d/types.cc +++ b/gcc/d/types.cc @@ -1179,6 +1179,26 @@ public: layout_type (t->ctype); + /* Fix up all forward-referenced variants of this enum type. */ + for (tree v = TYPE_MAIN_VARIANT (t->ctype); v; + v = TYPE_NEXT_VARIANT (v)) + { + if (v == t->ctype) + continue; + + TYPE_VALUES (v) = TYPE_VALUES (t->ctype); + TYPE_LANG_SPECIFIC (v) = TYPE_LANG_SPECIFIC (t->ctype); + TYPE_MIN_VALUE (v) = TYPE_MIN_VALUE (t->ctype); + TYPE_MAX_VALUE (v) = TYPE_MAX_VALUE (t->ctype); + TYPE_UNSIGNED (v) = TYPE_UNSIGNED (t->ctype); + TYPE_SIZE (v) = TYPE_SIZE (t->ctype); + TYPE_SIZE_UNIT (v) = TYPE_SIZE_UNIT (t->ctype); + SET_TYPE_MODE (v, TYPE_MODE (t->ctype)); + TYPE_PRECISION (v) = TYPE_PRECISION (t->ctype); + SET_TYPE_ALIGN (v, TYPE_ALIGN (t->ctype)); + TYPE_USER_ALIGN (v) = TYPE_USER_ALIGN (t->ctype); + } + /* Complete forward-referenced fields of this enum type. */ finish_incomplete_fields (t->ctype); } diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi index ae3357f..5bc2785 100644 --- a/gcc/doc/extend.texi +++ b/gcc/doc/extend.texi @@ -1933,6 +1933,13 @@ Note that if such a function is called indirectly the compiler may or may not inline it depending on optimization level and a failure to inline an indirect call may or may not be diagnosed. +If you need to use the inlined function in multiple translation units, +you should put the @code{always_inline} attribute on a function +definition in a header file that is included in all translation units +where the function is used. Link-time optimization can inline +functions across translation units, but only if an optimization level +that normally enables inlining is additionally specified. + @cindex @code{artificial} function attribute @item artificial This attribute is useful for small inline wrappers that if possible @@ -12299,15 +12306,6 @@ for the @samp{att} and @samp{intel} dialects of assembler: @item @code{%3} @tab @code{$.L3} @tab @code{OFFSET FLAT:.L3} -@item @code{%4} -@tab @code{$8} -@tab @code{8} -@item @code{%5} -@tab @code{%xmm0} -@tab @code{xmm0} -@item @code{%7} -@tab @code{$0} -@tab @code{0} @end multitable The table below shows the list of supported modifiers and their effects. @@ -12324,32 +12322,17 @@ The table below shows the list of supported modifiers and their effects. @tab @code{%b0} @tab @code{%al} @tab @code{al} -@item @code{B} -@tab print the opcode suffix of b. -@tab @code{%B0} -@tab @code{b} -@tab @item @code{c} @tab Require a constant operand and print the constant expression with no punctuation. @tab @code{%c1} @tab @code{2} @tab @code{2} -@item @code{d} -@tab print duplicated register operand for AVX instruction. -@tab @code{%d5} -@tab @code{%xmm0, %xmm0} -@tab @code{xmm0, xmm0} @item @code{E} @tab Print the address in Double Integer (DImode) mode (8 bytes) when the target is 64-bit. Otherwise mode is unspecified (VOIDmode). @tab @code{%E1} @tab @code{%(rax)} @tab @code{[rax]} -@item @code{g} -@tab Print the V16SFmode name of the register. -@tab @code{%g0} -@tab @code{%zmm0} -@tab @code{zmm0} @item @code{h} @tab Print the QImode name for a ``high'' register. @tab @code{%h0} @@ -12371,16 +12354,6 @@ high 8 bytes of SSE values. For a memref in (%rax), it generates @tab @code{%l3} @tab @code{.L3} @tab @code{.L3} -@item @code{L} -@tab print the opcode suffix of l. -@tab @code{%L0} -@tab @code{l} -@tab -@item @code{N} -@tab print maskz. -@tab @code{%N7} -@tab @code{@{z@}} -@tab @code{@{z@}} @item @code{p} @tab Print raw symbol name (without syntax-specific prefixes). @tab @code{%p2} @@ -12396,76 +12369,20 @@ issue the bare constant. See @code{p} above. @tab @code{%q0} @tab @code{%rax} @tab @code{rax} -@item @code{Q} -@tab print the opcode suffix of q. -@tab @code{%Q0} -@tab @code{q} -@tab -@item @code{R} -@tab print embedded rounding and sae. -@tab @code{%R4} -@tab @code{@{rn-sae@}, } -@tab @code{, @{rn-sae@}} -@item @code{r} -@tab print only sae. -@tab @code{%r4} -@tab @code{@{sae@}, } -@tab @code{, @{sae@}} -@item @code{s} -@tab print a shift double count, followed by the assemblers argument -delimiterprint the opcode suffix of s. -@tab @code{%s1} -@tab @code{$2, } -@tab @code{2, } -@item @code{S} -@tab print the opcode suffix of s. -@tab @code{%S0} -@tab @code{s} -@tab -@item @code{t} -@tab print the V8SFmode name of the register. -@tab @code{%t5} -@tab @code{%ymm0} -@tab @code{ymm0} -@item @code{T} -@tab print the opcode suffix of t. -@tab @code{%T0} -@tab @code{t} -@tab -@item @code{V} -@tab print naked full integer register name without %. -@tab @code{%V0} -@tab @code{eax} -@tab @code{eax} @item @code{w} @tab Print the HImode name of the register. @tab @code{%w0} @tab @code{%ax} @tab @code{ax} -@item @code{W} -@tab print the opcode suffix of w. -@tab @code{%W0} -@tab @code{w} -@tab -@item @code{x} -@tab print the V4SFmode name of the register. -@tab @code{%x5} -@tab @code{%xmm0} -@tab @code{xmm0} -@item @code{y} -@tab print "st(0)" instead of "st" as a register. -@tab @code{%y6} -@tab @code{%st(0)} -@tab @code{st(0)} @item @code{z} @tab Print the opcode suffix for the size of the current integer operand (one of @code{b}/@code{w}/@code{l}/@code{q}). @tab @code{%z0} @tab @code{l} @tab -@item @code{Z} -@tab Like @code{z}, with special suffixes for x87 instructions. @end multitable +@code{V} is a special modifier which prints the name of the full integer +register without @code{%}. @anchor{x86floatingpointasmoperands} @subsubsection x86 Floating-Point @code{asm} Operands diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi index 67155ee..0b6644b 100644 --- a/gcc/doc/invoke.texi +++ b/gcc/doc/invoke.texi @@ -593,7 +593,7 @@ Objective-C and Objective-C++ Dialects}. -finline-functions -finline-functions-called-once -finline-limit=@var{n} -finline-small-functions -fipa-modref -fipa-cp -fipa-cp-clone -fipa-bit-cp -fipa-vrp -fipa-pta -fipa-profile -fipa-pure-const --fipa-reference -fipa-reference-addressable +-fipa-reference -fipa-reference-addressable -fipa-reorder-for-locality -fipa-stack-alignment -fipa-icf -fira-algorithm=@var{algorithm} -flate-combine-instructions -flifetime-dse -flive-patching=@var{level} -fira-region=@var{region} -fira-hoist-pressure @@ -12746,6 +12746,7 @@ complexity than at @option{-O}. -fipa-pure-const -fipa-reference -fipa-reference-addressable +-fivopts -fmerge-constants -fmove-loop-invariants -fmove-loop-stores @@ -12854,6 +12855,13 @@ by @option{-O2} and also turns on the following optimization flags: Reduce compilation time and make debugging produce the expected results. This is the default. +At @option{-O0}, GCC completely disables most optimization passes; +they are not run even if you explicitly enable them on the command +line, or are listed by @option{-Q --help=optimizers} as being enabled by +default. Many optimizations performed by GCC depend on code analysis +or canonicalization passes that are enabled by @option{-O}, and it would +not be useful to run individual optimization passes in isolation. + @opindex Os @item -Os Optimize for size. @option{-Os} enables all @option{-O2} optimizations @@ -13871,6 +13879,21 @@ Enabled by default at @option{-O1} and higher. Discover read-only, write-only and non-addressable static variables. Enabled by default at @option{-O1} and higher. +@opindex fipa-reorder-for-locality +@item -fipa-reorder-for-locality +Group call chains close together in the binary layout to improve code +locality and minimize jump distances between frequently called functions. +Unlike @option{-freorder-functions} this pass considers the call +chains between functions and groups them together, rather than grouping all +hot/normal/cold/never-executed functions into separate sections. +Unlike @option{-fprofile-reorder-functions} it aims to improve code locality +throughout the runtime of the program rather than focusing on program startup. +This option is incompatible with an explicit +@option{-flto-partition=} option since it enforces a custom partitioning +scheme. +If using this option it is recommended to also use profile feedback, but this +option is not enabled by default otherwise. + @opindex fipa-stack-alignment @item -fipa-stack-alignment Reduce stack alignment on call sites if possible. @@ -14291,6 +14314,7 @@ Enabled by default at @option{-O1} and higher. @item -fivopts Perform induction variable optimizations (strength reduction, induction variable merging and induction variable elimination) on trees. +Enabled by default at @option{-O1} and higher. @opindex ftree-parallelize-loops @item -ftree-parallelize-loops=n @@ -14606,11 +14630,13 @@ Enabled for x86 at levels @option{-O2}, @option{-O3}, @option{-Os}. @opindex freorder-functions @item -freorder-functions Reorder functions in the object file in order to -improve code locality. This is implemented by using special -subsections @code{.text.hot} for most frequently executed functions and -@code{.text.unlikely} for unlikely executed functions. Reordering is done by -the linker so object file format must support named sections and linker must -place them in a reasonable way. +improve code locality. Unlike @option{-fipa-reorder-for-locality} this option +prioritises grouping all functions within a category +(hot/normal/cold/never-executed) together. +This is implemented by using special subsections @code{.text.hot} for most +frequently executed functions and @code{.text.unlikely} for unlikely executed +functions. Reordering is done by the linker so object file format must support +named sections and linker must place them in a reasonable way. This option isn't effective unless you either provide profile feedback (see @option{-fprofile-arcs} for details) or manually annotate functions with @@ -15635,7 +15661,8 @@ Enabled by @option{-fprofile-generate}, @option{-fprofile-use}, and @item -fprofile-reorder-functions Function reordering based on profile instrumentation collects first time of execution of a function and orders these functions -in ascending order. +in ascending order, aiming to optimize program startup through more +efficient loading of text segments. Enabled with @option{-fprofile-use}. @@ -34872,7 +34899,7 @@ Intel Lakemont MCU, based on Intel Pentium CPU. Intel Pentium MMX CPU, based on Pentium core with MMX instruction set support. @item pentiumpro -Intel Pentium Pro CPU@. +Intel Pentium Pro CPU with no MMX support. @item i686 When used with @option{-march}, the Pentium Pro diff --git a/gcc/except.cc b/gcc/except.cc index d5eb927..205811c 100644 --- a/gcc/except.cc +++ b/gcc/except.cc @@ -970,12 +970,26 @@ expand_dw2_landing_pad_for_region (eh_region region) { /* Nothing */ } if (region->exc_ptr_reg) - emit_move_insn (region->exc_ptr_reg, - gen_rtx_REG (ptr_mode, EH_RETURN_DATA_REGNO (0))); + { + rtx exc_ptr_reg; + if (EH_RETURN_DATA_REGNO (0) != INVALID_REGNUM) + exc_ptr_reg = gen_rtx_REG (ptr_mode, EH_RETURN_DATA_REGNO (0)); + else + /* The target must be doing something special. Submit a dummy. */ + exc_ptr_reg = constm1_rtx; + emit_move_insn (region->exc_ptr_reg, exc_ptr_reg); + } if (region->filter_reg) - emit_move_insn (region->filter_reg, - gen_rtx_REG (targetm.eh_return_filter_mode (), - EH_RETURN_DATA_REGNO (1))); + { + rtx filter_reg; + if (EH_RETURN_DATA_REGNO (1) != INVALID_REGNUM) + filter_reg = gen_rtx_REG (targetm.eh_return_filter_mode (), + EH_RETURN_DATA_REGNO (1)); + else + /* The target must be doing something special. Submit a dummy. */ + filter_reg = constm1_rtx; + emit_move_insn (region->filter_reg, filter_reg); + } } /* Expand the extra code needed at landing pads for dwarf2 unwinding. */ diff --git a/gcc/flag-types.h b/gcc/flag-types.h index 0127698..db57376 100644 --- a/gcc/flag-types.h +++ b/gcc/flag-types.h @@ -404,7 +404,15 @@ enum lto_partition_model { LTO_PARTITION_BALANCED = 2, LTO_PARTITION_1TO1 = 3, LTO_PARTITION_MAX = 4, - LTO_PARTITION_CACHE = 5 + LTO_PARTITION_CACHE = 5, + LTO_PARTITION_DEFAULT= 6 +}; + +/* flag_lto_locality_cloning initialization values. */ +enum lto_locality_cloning_model { + LTO_LOCALITY_NO_CLONING = 0, + LTO_LOCALITY_NON_INTERPOSABLE_CLONING = 1, + LTO_LOCALITY_MAXIMAL_CLONING = 2, }; /* flag_lto_linker_output initialization values. */ diff --git a/gcc/fortran/ChangeLog b/gcc/fortran/ChangeLog index a10d8df..55bff2e 100644 --- a/gcc/fortran/ChangeLog +++ b/gcc/fortran/ChangeLog @@ -1,3 +1,32 @@ +2025-04-15 Tobias Burnus <tburnus@baylibre.com> + + * f95-lang.cc (LANG_HOOKS_OMP_DEEP_MAPPING, + LANG_HOOKS_OMP_DEEP_MAPPING_P, LANG_HOOKS_OMP_DEEP_MAPPING_CNT): + Define. + * openmp.cc (gfc_match_omp_clause_reduction): Fix location setting. + (resolve_omp_clauses): Permit allocatable components, reject + them and polymorphic variables in PRIVATE/FIRSTPRIVATE. + * trans-decl.cc (add_clause): Set clause location. + * trans-openmp.cc (gfc_has_alloc_comps): Add ptr_ok and + shallow_alloc_only Boolean arguments. + (gfc_omp_replace_alloc_by_to_mapping): New. + (gfc_omp_private_outer_ref, gfc_walk_alloc_comps, + gfc_omp_clause_default_ctor, gfc_omp_clause_copy_ctor, + gfc_omp_clause_assign_op, gfc_omp_clause_dtor): Update call to it. + (gfc_omp_finish_clause): Minor cleanups, improve location data, + handle allocatable components. + (gfc_omp_deep_mapping_map, gfc_omp_deep_mapping_item, + gfc_omp_deep_mapping_comps, gfc_omp_gen_simple_loop, + gfc_omp_get_array_size, gfc_omp_elmental_loop, + gfc_omp_deep_map_kind_p, gfc_omp_deep_mapping_int_p, + gfc_omp_deep_mapping_p, gfc_omp_deep_mapping_do, + gfc_omp_deep_mapping_cnt, gfc_omp_deep_mapping): New. + (gfc_trans_omp_array_section): Save array descriptor in case + deep-mapping lang hook will need it. + (gfc_trans_omp_clauses): Likewise; use better clause location data. + * trans.h (gfc_omp_deep_mapping_p, gfc_omp_deep_mapping_cnt, + gfc_omp_deep_mapping): Add function prototypes. + 2025-04-13 Thomas Koenig <tkoenig@gcc.gnu.org> PR fortran/119669 diff --git a/gcc/fortran/f95-lang.cc b/gcc/fortran/f95-lang.cc index 124d62f..1f09553 100644 --- a/gcc/fortran/f95-lang.cc +++ b/gcc/fortran/f95-lang.cc @@ -148,6 +148,9 @@ gfc_get_sarif_source_language (const char *) #undef LANG_HOOKS_OMP_CLAUSE_LINEAR_CTOR #undef LANG_HOOKS_OMP_CLAUSE_DTOR #undef LANG_HOOKS_OMP_FINISH_CLAUSE +#undef LANG_HOOKS_OMP_DEEP_MAPPING +#undef LANG_HOOKS_OMP_DEEP_MAPPING_P +#undef LANG_HOOKS_OMP_DEEP_MAPPING_CNT #undef LANG_HOOKS_OMP_ALLOCATABLE_P #undef LANG_HOOKS_OMP_SCALAR_TARGET_P #undef LANG_HOOKS_OMP_SCALAR_P @@ -188,6 +191,9 @@ gfc_get_sarif_source_language (const char *) #define LANG_HOOKS_OMP_CLAUSE_LINEAR_CTOR gfc_omp_clause_linear_ctor #define LANG_HOOKS_OMP_CLAUSE_DTOR gfc_omp_clause_dtor #define LANG_HOOKS_OMP_FINISH_CLAUSE gfc_omp_finish_clause +#define LANG_HOOKS_OMP_DEEP_MAPPING gfc_omp_deep_mapping +#define LANG_HOOKS_OMP_DEEP_MAPPING_P gfc_omp_deep_mapping_p +#define LANG_HOOKS_OMP_DEEP_MAPPING_CNT gfc_omp_deep_mapping_cnt #define LANG_HOOKS_OMP_ALLOCATABLE_P gfc_omp_allocatable_p #define LANG_HOOKS_OMP_SCALAR_P gfc_omp_scalar_p #define LANG_HOOKS_OMP_SCALAR_TARGET_P gfc_omp_scalar_target_p diff --git a/gcc/fortran/openmp.cc b/gcc/fortran/openmp.cc index ded80b7..df82940 100644 --- a/gcc/fortran/openmp.cc +++ b/gcc/fortran/openmp.cc @@ -1588,7 +1588,7 @@ gfc_match_omp_clause_reduction (char pc, gfc_omp_clauses *c, bool openacc, { gfc_omp_namelist *p = gfc_get_omp_namelist (), **tl; p->sym = n->sym; - p->where = p->where; + p->where = n->where; p->u.map.op = OMP_MAP_ALWAYS_TOFROM; tl = &c->lists[OMP_LIST_MAP]; @@ -9681,22 +9681,6 @@ resolve_omp_clauses (gfc_code *code, gfc_omp_clauses *omp_clauses, && n->sym->as->type == AS_ASSUMED_SIZE) gfc_error ("Assumed size array %qs in %s clause at %L", n->sym->name, name, &n->where); - if (!openacc - && list == OMP_LIST_MAP - && n->sym->ts.type == BT_DERIVED - && n->sym->ts.u.derived->attr.alloc_comp) - gfc_error ("List item %qs with allocatable components is not " - "permitted in map clause at %L", n->sym->name, - &n->where); - if (!openacc - && (list == OMP_LIST_MAP - || list == OMP_LIST_FROM - || list == OMP_LIST_TO) - && ((n->expr && n->expr->ts.type == BT_CLASS) - || (!n->expr && n->sym->ts.type == BT_CLASS))) - gfc_warning (OPT_Wopenmp, - "Mapping polymorphic list item at %L is " - "unspecified behavior", &n->where); if (list == OMP_LIST_MAP && !openacc) switch (code->op) { @@ -10008,9 +9992,11 @@ resolve_omp_clauses (gfc_code *code, gfc_omp_clauses *omp_clauses, n->sym->name, name, &n->where); if (!openacc - && list == OMP_LIST_FIRSTPRIVATE - && ((n->expr && n->expr->ts.type == BT_CLASS) - || (!n->expr && n->sym->ts.type == BT_CLASS))) + && (list == OMP_LIST_PRIVATE + || list == OMP_LIST_FIRSTPRIVATE) + && ((n->sym->ts.type == BT_DERIVED + && n->sym->ts.u.derived->attr.alloc_comp) + || n->sym->ts.type == BT_CLASS)) switch (code->op) { case EXEC_OMP_TARGET: @@ -10025,9 +10011,19 @@ resolve_omp_clauses (gfc_code *code, gfc_omp_clauses *omp_clauses, case EXEC_OMP_TARGET_TEAMS_DISTRIBUTE_PARALLEL_DO_SIMD: case EXEC_OMP_TARGET_TEAMS_DISTRIBUTE_SIMD: case EXEC_OMP_TARGET_TEAMS_LOOP: - gfc_warning (OPT_Wopenmp, - "FIRSTPRIVATE with polymorphic list item at " - "%L is unspecified behavior", &n->where); + if (n->sym->ts.type == BT_DERIVED + && n->sym->ts.u.derived->attr.alloc_comp) + gfc_error ("Sorry, list item %qs at %L with allocatable" + " components is not yet supported in %s " + "clause", n->sym->name, &n->where, + list == OMP_LIST_PRIVATE ? "PRIVATE" + : "FIRSTPRIVATE"); + else + gfc_error ("Polymorphic list item %qs at %L in %s " + "clause has unspecified behavior and " + "unsupported", n->sym->name, &n->where, + list == OMP_LIST_PRIVATE ? "PRIVATE" + : "FIRSTPRIVATE"); break; default: break; diff --git a/gcc/fortran/trans-decl.cc b/gcc/fortran/trans-decl.cc index aea132d..ddc4960 100644 --- a/gcc/fortran/trans-decl.cc +++ b/gcc/fortran/trans-decl.cc @@ -6920,6 +6920,7 @@ add_clause (gfc_symbol *sym, gfc_omp_map_op map_op) n = gfc_get_omp_namelist (); n->sym = sym; + n->where = sym->declared_at; n->u.map.op = map_op; if (!module_oacc_clauses) diff --git a/gcc/fortran/trans-openmp.cc b/gcc/fortran/trans-openmp.cc index 03d9432..0b8150f 100644 --- a/gcc/fortran/trans-openmp.cc +++ b/gcc/fortran/trans-openmp.cc @@ -25,6 +25,10 @@ along with GCC; see the file COPYING3. If not see #include "options.h" #include "tree.h" #include "gfortran.h" +#include "basic-block.h" +#include "tree-ssa.h" +#include "function.h" +#include "gimple.h" #include "gimple-expr.h" #include "trans.h" #include "stringpool.h" @@ -41,6 +45,8 @@ along with GCC; see the file COPYING3. If not see #include "omp-low.h" #include "memmodel.h" /* For MEMMODEL_ enums. */ #include "dependency.h" +#include "gimple-iterator.h" /* For gsi_iterator_update. */ +#include "gimplify-me.h" /* For force_gimple_operand. */ #undef GCC_DIAG_STYLE #define GCC_DIAG_STYLE __gcc_tdiag__ @@ -375,22 +381,28 @@ gfc_omp_report_decl (tree decl) return decl; } -/* Return true if TYPE has any allocatable components. */ +/* Return true if TYPE has any allocatable components; + if ptr_ok, the decl itself is permitted to have the POINTER attribute. + if shallow_alloc_only, returns only true if any of the fields is an + allocatable; called with true by gfc_omp_replace_alloc_by_to_mapping. */ static bool -gfc_has_alloc_comps (tree type, tree decl) +gfc_has_alloc_comps (tree type, tree decl, bool ptr_ok, + bool shallow_alloc_only=false) { tree field, ftype; if (POINTER_TYPE_P (type)) { - if (GFC_DECL_GET_SCALAR_ALLOCATABLE (decl)) + if (GFC_DECL_GET_SCALAR_ALLOCATABLE (decl) + || (ptr_ok && GFC_DECL_GET_SCALAR_POINTER (decl))) type = TREE_TYPE (type); else if (GFC_DECL_GET_SCALAR_POINTER (decl)) return false; } - if (GFC_DESCRIPTOR_TYPE_P (type) + if (!ptr_ok + && GFC_DESCRIPTOR_TYPE_P (type) && (GFC_TYPE_ARRAY_AKIND (type) == GFC_ARRAY_POINTER || GFC_TYPE_ARRAY_AKIND (type) == GFC_ARRAY_POINTER_CONT)) return false; @@ -409,12 +421,51 @@ gfc_has_alloc_comps (tree type, tree decl) if (GFC_DESCRIPTOR_TYPE_P (ftype) && GFC_TYPE_ARRAY_AKIND (ftype) == GFC_ARRAY_ALLOCATABLE) return true; - if (gfc_has_alloc_comps (ftype, field)) + if (!shallow_alloc_only + && gfc_has_alloc_comps (ftype, field, false)) return true; } return false; } +/* gfc_omp_replace_alloc_by_to_mapping is used with gfc_omp_deep_mapping... to + handle the following: + + For map(alloc: dt), the array descriptors of allocatable components should + be mapped as 'to'; this could be done by (A) adding 'map(to: dt%alloc_comp)' + for each component (and avoiding to increment the reference count). + Or (B) by just mapping all of 'dt' as 'to'. + + If 'dt' contains several allocatable components and not much other data, + (A) is more efficient. If 'dt' contains a large const-size array, (A) will + copy it to the device instead of only 'alloc'ating it. + + IMPLEMENTATION CHOICE: We do (A). It avoids the ref-count issue and it is + expected that, for real-world code, derived types with allocatable + components only have few other components and either no const-size arrays. + This copying is done irrespectively whether the allocatables are allocated. + + If users wanted to save memory, they have to use 'map(alloc:dt%comp)' as + also with 'map(alloc:dt)' all components get copied. + + For the copy to the device, only allocatable arrays are relevant as their + the bounds are required; the pointer is set separately (GOMP_MAP_ATTACH) + and the only setting required for scalars. However, when later copying out + of the device, an unallocated allocatable must remain unallocated/NULL on + the host; to achieve this we also must have it set to NULL on the device + to avoid issues with uninitialized memory being copied back for the pointer + address. If we could set the pointer to NULL, gfc_has_alloc_comps's + shallow_alloc_only could be restricted to return true only for arrays. + + We only need to return true if there are allocatable-array components. */ + +static bool +gfc_omp_replace_alloc_by_to_mapping (tree type, tree decl, bool ptr_ok) +{ + return gfc_has_alloc_comps (type, decl, ptr_ok, true); +} + + /* Return true if TYPE is polymorphic but not with pointer attribute. */ static bool @@ -487,7 +538,7 @@ gfc_omp_private_outer_ref (tree decl) if (GFC_DECL_GET_SCALAR_ALLOCATABLE (decl)) return true; - if (gfc_has_alloc_comps (type, decl)) + if (gfc_has_alloc_comps (type, decl, false)) return true; return false; @@ -627,7 +678,7 @@ gfc_walk_alloc_comps (tree decl, tree dest, tree var, { tree ftype = TREE_TYPE (field); tree declf, destf = NULL_TREE; - bool has_alloc_comps = gfc_has_alloc_comps (ftype, field); + bool has_alloc_comps = gfc_has_alloc_comps (ftype, field, false); if ((!GFC_DESCRIPTOR_TYPE_P (ftype) || GFC_TYPE_ARRAY_AKIND (ftype) != GFC_ARRAY_ALLOCATABLE) && !GFC_DECL_GET_SCALAR_ALLOCATABLE (field) @@ -751,7 +802,7 @@ gfc_omp_clause_default_ctor (tree clause, tree decl, tree outer) && (!GFC_DECL_GET_SCALAR_ALLOCATABLE (OMP_CLAUSE_DECL (clause)) || !POINTER_TYPE_P (type))) { - if (gfc_has_alloc_comps (type, OMP_CLAUSE_DECL (clause))) + if (gfc_has_alloc_comps (type, OMP_CLAUSE_DECL (clause), false)) { gcc_assert (outer); gfc_start_block (&block); @@ -804,7 +855,7 @@ gfc_omp_clause_default_ctor (tree clause, tree decl, tree outer) else gfc_add_modify (&cond_block, unshare_expr (decl), fold_convert (TREE_TYPE (decl), ptr)); - if (gfc_has_alloc_comps (type, OMP_CLAUSE_DECL (clause))) + if (gfc_has_alloc_comps (type, OMP_CLAUSE_DECL (clause), false)) { tree tem = gfc_walk_alloc_comps (outer, decl, OMP_CLAUSE_DECL (clause), @@ -945,7 +996,7 @@ gfc_omp_clause_copy_ctor (tree clause, tree dest, tree src) && (!GFC_DECL_GET_SCALAR_ALLOCATABLE (OMP_CLAUSE_DECL (clause)) || !POINTER_TYPE_P (type))) { - if (gfc_has_alloc_comps (type, OMP_CLAUSE_DECL (clause))) + if (gfc_has_alloc_comps (type, OMP_CLAUSE_DECL (clause), false)) { gfc_start_block (&block); gfc_add_modify (&block, dest, src); @@ -1004,7 +1055,7 @@ gfc_omp_clause_copy_ctor (tree clause, tree dest, tree src) builtin_decl_explicit (BUILT_IN_MEMCPY), 3, ptr, srcptr, size); gfc_add_expr_to_block (&cond_block, fold_convert (void_type_node, call)); - if (gfc_has_alloc_comps (type, OMP_CLAUSE_DECL (clause))) + if (gfc_has_alloc_comps (type, OMP_CLAUSE_DECL (clause), false)) { tree tem = gfc_walk_alloc_comps (src, dest, OMP_CLAUSE_DECL (clause), @@ -1049,7 +1100,7 @@ gfc_omp_clause_assign_op (tree clause, tree dest, tree src) && (!GFC_DECL_GET_SCALAR_ALLOCATABLE (OMP_CLAUSE_DECL (clause)) || !POINTER_TYPE_P (type))) { - if (gfc_has_alloc_comps (type, OMP_CLAUSE_DECL (clause))) + if (gfc_has_alloc_comps (type, OMP_CLAUSE_DECL (clause), false)) { gfc_start_block (&block); /* First dealloc any allocatable components in DEST. */ @@ -1071,7 +1122,7 @@ gfc_omp_clause_assign_op (tree clause, tree dest, tree src) gfc_start_block (&block); - if (gfc_has_alloc_comps (type, OMP_CLAUSE_DECL (clause))) + if (gfc_has_alloc_comps (type, OMP_CLAUSE_DECL (clause), false)) { then_b = gfc_walk_alloc_comps (dest, NULL_TREE, OMP_CLAUSE_DECL (clause), WALK_ALLOC_COMPS_DTOR); @@ -1186,7 +1237,7 @@ gfc_omp_clause_assign_op (tree clause, tree dest, tree src) builtin_decl_explicit (BUILT_IN_MEMCPY), 3, ptr, srcptr, size); gfc_add_expr_to_block (&cond_block, fold_convert (void_type_node, call)); - if (gfc_has_alloc_comps (type, OMP_CLAUSE_DECL (clause))) + if (gfc_has_alloc_comps (type, OMP_CLAUSE_DECL (clause), false)) { tree tem = gfc_walk_alloc_comps (src, dest, OMP_CLAUSE_DECL (clause), @@ -1438,7 +1489,7 @@ gfc_omp_clause_dtor (tree clause, tree decl) && (!GFC_DECL_GET_SCALAR_ALLOCATABLE (OMP_CLAUSE_DECL (clause)) || !POINTER_TYPE_P (type))) { - if (gfc_has_alloc_comps (type, OMP_CLAUSE_DECL (clause))) + if (gfc_has_alloc_comps (type, OMP_CLAUSE_DECL (clause), false)) return gfc_walk_alloc_comps (decl, NULL_TREE, OMP_CLAUSE_DECL (clause), WALK_ALLOC_COMPS_DTOR); @@ -1458,7 +1509,7 @@ gfc_omp_clause_dtor (tree clause, tree decl) tem = gfc_call_free (decl); tem = gfc_omp_unshare_expr (tem); - if (gfc_has_alloc_comps (type, OMP_CLAUSE_DECL (clause))) + if (gfc_has_alloc_comps (type, OMP_CLAUSE_DECL (clause), false)) { stmtblock_t block; tree then_b; @@ -1538,6 +1589,7 @@ gfc_omp_finish_clause (tree c, gimple_seq *pre_p, bool openacc) return; tree decl = OMP_CLAUSE_DECL (c); + location_t loc = OMP_CLAUSE_LOCATION (c); /* Assumed-size arrays can't be mapped implicitly, they have to be mapped explicitly using array sections. */ @@ -1553,13 +1605,9 @@ gfc_omp_finish_clause (tree c, gimple_seq *pre_p, bool openacc) return; } - if (!openacc && GFC_CLASS_TYPE_P (TREE_TYPE (decl))) - warning_at (OMP_CLAUSE_LOCATION (c), OPT_Wopenmp, - "Implicit mapping of polymorphic variable %qD is " - "unspecified behavior", decl); - tree c2 = NULL_TREE, c3 = NULL_TREE, c4 = NULL_TREE; tree present = gfc_omp_check_optional_argument (decl, true); + tree orig_decl = NULL_TREE; if (POINTER_TYPE_P (TREE_TYPE (decl))) { if (!gfc_omp_privatize_by_reference (decl) @@ -1568,7 +1616,7 @@ gfc_omp_finish_clause (tree c, gimple_seq *pre_p, bool openacc) && !GFC_DECL_CRAY_POINTEE (decl) && !GFC_DESCRIPTOR_TYPE_P (TREE_TYPE (TREE_TYPE (decl)))) return; - tree orig_decl = decl; + orig_decl = decl; c4 = build_omp_clause (OMP_CLAUSE_LOCATION (c), OMP_CLAUSE_MAP); OMP_CLAUSE_SET_MAP_KIND (c4, GOMP_MAP_POINTER); @@ -1579,16 +1627,16 @@ gfc_omp_finish_clause (tree c, gimple_seq *pre_p, bool openacc) && (GFC_DECL_GET_SCALAR_POINTER (orig_decl) || GFC_DECL_GET_SCALAR_ALLOCATABLE (orig_decl))) { - c2 = build_omp_clause (input_location, OMP_CLAUSE_MAP); + c2 = build_omp_clause (loc, OMP_CLAUSE_MAP); OMP_CLAUSE_SET_MAP_KIND (c2, GOMP_MAP_POINTER); - OMP_CLAUSE_DECL (c2) = decl; + OMP_CLAUSE_DECL (c2) = unshare_expr (decl); OMP_CLAUSE_SIZE (c2) = size_int (0); stmtblock_t block; gfc_start_block (&block); - tree ptr = decl; - ptr = gfc_build_cond_assign_expr (&block, present, decl, - null_pointer_node); + tree ptr = gfc_build_cond_assign_expr (&block, present, + unshare_expr (decl), + null_pointer_node); gimplify_and_add (gfc_finish_block (&block), pre_p); ptr = build_fold_indirect_ref (ptr); OMP_CLAUSE_DECL (c) = ptr; @@ -1605,10 +1653,10 @@ gfc_omp_finish_clause (tree c, gimple_seq *pre_p, bool openacc) { c3 = build_omp_clause (OMP_CLAUSE_LOCATION (c), OMP_CLAUSE_MAP); OMP_CLAUSE_SET_MAP_KIND (c3, GOMP_MAP_POINTER); - OMP_CLAUSE_DECL (c3) = unshare_expr (decl); + OMP_CLAUSE_DECL (c3) = decl; OMP_CLAUSE_SIZE (c3) = size_int (0); decl = build_fold_indirect_ref (decl); - OMP_CLAUSE_DECL (c) = decl; + OMP_CLAUSE_DECL (c) = unshare_expr (decl); } } if (GFC_DESCRIPTOR_TYPE_P (TREE_TYPE (decl))) @@ -1634,7 +1682,7 @@ gfc_omp_finish_clause (tree c, gimple_seq *pre_p, bool openacc) gcc_assert (POINTER_TYPE_P (TREE_TYPE (ptr))); ptr = build_fold_indirect_ref (ptr); OMP_CLAUSE_DECL (c) = ptr; - c2 = build_omp_clause (input_location, OMP_CLAUSE_MAP); + c2 = build_omp_clause (loc, OMP_CLAUSE_MAP); OMP_CLAUSE_SET_MAP_KIND (c2, GOMP_MAP_TO_PSET); if (present) { @@ -1651,7 +1699,7 @@ gfc_omp_finish_clause (tree c, gimple_seq *pre_p, bool openacc) : GOMP_MAP_POINTER); if (present) { - ptr = gfc_conv_descriptor_data_get (decl); + ptr = gfc_conv_descriptor_data_get (unshare_expr (decl)); ptr = gfc_build_addr_expr (NULL, ptr); ptr = gfc_build_cond_assign_expr (&block, present, ptr, null_pointer_node); @@ -1664,6 +1712,17 @@ gfc_omp_finish_clause (tree c, gimple_seq *pre_p, bool openacc) tree size = create_tmp_var (gfc_array_index_type); tree elemsz = TYPE_SIZE_UNIT (gfc_get_element_type (type)); elemsz = fold_convert (gfc_array_index_type, elemsz); + + if (orig_decl == NULL_TREE) + orig_decl = decl; + if (!openacc + && gfc_has_alloc_comps (type, orig_decl, true)) + { + /* Save array descriptor for use in gfc_omp_deep_mapping{,_p,_cnt}; + force evaluate to ensure that it is not gimplified + is a decl. */ + gfc_allocate_lang_decl (size); + GFC_DECL_SAVED_DESCRIPTOR (size) = orig_decl; + } enum gfc_array_kind akind = GFC_TYPE_ARRAY_AKIND (type); if (akind == GFC_ARRAY_ALLOCATABLE || akind == GFC_ARRAY_POINTER @@ -1692,14 +1751,14 @@ gfc_omp_finish_clause (tree c, gimple_seq *pre_p, bool openacc) else_b = gfc_finish_block (&cond_block); tem = gfc_conv_descriptor_data_get (unshare_expr (decl)); tem = fold_convert (pvoid_type_node, tem); - cond = fold_build2_loc (input_location, NE_EXPR, + cond = fold_build2_loc (loc, NE_EXPR, boolean_type_node, tem, null_pointer_node); if (present) { - cond = fold_build2_loc (input_location, TRUTH_ANDIF_EXPR, + cond = fold_build2_loc (loc, TRUTH_ANDIF_EXPR, boolean_type_node, present, cond); } - gfc_add_expr_to_block (&block, build3_loc (input_location, COND_EXPR, + gfc_add_expr_to_block (&block, build3_loc (loc, COND_EXPR, void_type_node, cond, then_b, else_b)); } @@ -1739,11 +1798,30 @@ gfc_omp_finish_clause (tree c, gimple_seq *pre_p, bool openacc) tree stmt = gfc_finish_block (&block); gimplify_and_add (stmt, pre_p); } + else + { + if (OMP_CLAUSE_SIZE (c) == NULL_TREE) + OMP_CLAUSE_SIZE (c) + = DECL_P (decl) ? DECL_SIZE_UNIT (decl) + : TYPE_SIZE_UNIT (TREE_TYPE (decl)); + + tree type = TREE_TYPE (decl); + if (POINTER_TYPE_P (type) && POINTER_TYPE_P (TREE_TYPE (type))) + type = TREE_TYPE (type); + if (!openacc + && orig_decl != NULL_TREE + && gfc_has_alloc_comps (type, orig_decl, true)) + { + /* Save array descriptor for use in gfc_omp_deep_mapping{,_p,_cnt}; + force evaluate to ensure that it is not gimplified + is a decl. */ + tree size = create_tmp_var (TREE_TYPE (OMP_CLAUSE_SIZE (c))); + gfc_allocate_lang_decl (size); + GFC_DECL_SAVED_DESCRIPTOR (size) = orig_decl; + gimplify_assign (size, OMP_CLAUSE_SIZE (c), pre_p); + OMP_CLAUSE_SIZE (c) = size; + } + } tree last = c; - if (OMP_CLAUSE_SIZE (c) == NULL_TREE) - OMP_CLAUSE_SIZE (c) - = DECL_P (decl) ? DECL_SIZE_UNIT (decl) - : TYPE_SIZE_UNIT (TREE_TYPE (decl)); if (gimplify_expr (&OMP_CLAUSE_SIZE (c), pre_p, NULL, is_gimple_val, fb_rvalue) == GS_ERROR) OMP_CLAUSE_SIZE (c) = size_int (0); @@ -1767,6 +1845,715 @@ gfc_omp_finish_clause (tree c, gimple_seq *pre_p, bool openacc) } +/* map(<flag>: data [len: <size>]) + map(attach: &data [bias: <bias>]) + offset += 2; offset_data += 2 */ +static void +gfc_omp_deep_mapping_map (tree data, tree size, unsigned HOST_WIDE_INT tkind, + location_t loc, tree data_array, tree sizes_array, + tree kinds_array, tree offset_data, tree offset, + gimple_seq *seq, const gimple *ctx) +{ + tree one = build_int_cst (size_type_node, 1); + + STRIP_NOPS (data); + if (!POINTER_TYPE_P (TREE_TYPE (data))) + { + gcc_assert (TREE_CODE (data) == INDIRECT_REF); + data = TREE_OPERAND (data, 0); + } + + /* data_array[offset_data] = data; */ + tree tmp = build4 (ARRAY_REF, TREE_TYPE (TREE_TYPE (data_array)), + unshare_expr (data_array), offset_data, + NULL_TREE, NULL_TREE); + gimplify_assign (tmp, data, seq); + + /* offset_data++ */ + tmp = build2_loc (loc, PLUS_EXPR, size_type_node, offset_data, one); + gimplify_assign (offset_data, tmp, seq); + + /* data_array[offset_data] = &data; */ + tmp = build4 (ARRAY_REF, TREE_TYPE (TREE_TYPE (data_array)), + unshare_expr (data_array), + offset_data, NULL_TREE, NULL_TREE); + gimplify_assign (tmp, build_fold_addr_expr (data), seq); + + /* offset_data++ */ + tmp = build2_loc (loc, PLUS_EXPR, size_type_node, offset_data, one); + gimplify_assign (offset_data, tmp, seq); + + /* sizes_array[offset] = size */ + tmp = build2_loc (loc, MULT_EXPR, size_type_node, + TYPE_SIZE_UNIT (size_type_node), offset); + tmp = build2_loc (loc, POINTER_PLUS_EXPR, TREE_TYPE (sizes_array), + sizes_array, tmp); + gimple_seq seq2 = NULL; + tmp = force_gimple_operand (tmp, &seq2, true, NULL_TREE); + gimple_seq_add_seq (seq, seq2); + tmp = build_fold_indirect_ref_loc (loc, tmp); + gimplify_assign (tmp, size, seq); + + /* FIXME: tkind |= talign << talign_shift; */ + /* kinds_array[offset] = tkind. */ + tmp = build2_loc (loc, MULT_EXPR, size_type_node, + TYPE_SIZE_UNIT (short_unsigned_type_node), offset); + tmp = build2_loc (loc, POINTER_PLUS_EXPR, TREE_TYPE (kinds_array), + kinds_array, tmp); + seq2 = NULL; + tmp = force_gimple_operand (tmp, &seq2, true, NULL_TREE); + gimple_seq_add_seq (seq, seq2); + tmp = build_fold_indirect_ref_loc (loc, tmp); + gimplify_assign (tmp, build_int_cst (short_unsigned_type_node, tkind), seq); + + /* offset++ */ + tmp = build2_loc (loc, PLUS_EXPR, size_type_node, offset, one); + gimplify_assign (offset, tmp, seq); + + /* sizes_array[offset] = bias (= 0). */ + tmp = build2_loc (loc, MULT_EXPR, size_type_node, + TYPE_SIZE_UNIT (size_type_node), offset); + tmp = build2_loc (loc, POINTER_PLUS_EXPR, TREE_TYPE (sizes_array), + sizes_array, tmp); + seq2 = NULL; + tmp = force_gimple_operand (tmp, &seq2, true, NULL_TREE); + gimple_seq_add_seq (seq, seq2); + tmp = build_fold_indirect_ref_loc (loc, tmp); + gimplify_assign (tmp, build_zero_cst (size_type_node), seq); + + gcc_assert (gimple_code (ctx) == GIMPLE_OMP_TARGET); + tkind = (gimple_omp_target_kind (ctx) == GF_OMP_TARGET_KIND_EXIT_DATA + ? GOMP_MAP_DETACH : GOMP_MAP_ATTACH); + + /* kinds_array[offset] = tkind. */ + tmp = build2_loc (loc, MULT_EXPR, size_type_node, + TYPE_SIZE_UNIT (short_unsigned_type_node), offset); + tmp = build2_loc (loc, POINTER_PLUS_EXPR, TREE_TYPE (kinds_array), + kinds_array, tmp); + seq2 = NULL; + tmp = force_gimple_operand (tmp, &seq2, true, NULL_TREE); + gimple_seq_add_seq (seq, seq2); + tmp = build_fold_indirect_ref_loc (loc, tmp); + gimplify_assign (tmp, build_int_cst (short_unsigned_type_node, tkind), seq); + + /* offset++ */ + tmp = build2_loc (loc, PLUS_EXPR, size_type_node, offset, one); + gimplify_assign (offset, tmp, seq); +} + +static void gfc_omp_deep_mapping_item (bool, bool, bool, location_t, tree, + tree *, unsigned HOST_WIDE_INT, tree, + tree, tree, tree, tree, tree, + gimple_seq *, const gimple *, bool *); + +/* Map allocatable components. */ +static void +gfc_omp_deep_mapping_comps (bool is_cnt, location_t loc, tree decl, + tree *token, unsigned HOST_WIDE_INT tkind, + tree data_array, tree sizes_array, tree kinds_array, + tree offset_data, tree offset, tree num, + gimple_seq *seq, const gimple *ctx, + bool *poly_warned) +{ + tree type = TREE_TYPE (decl); + if (TREE_CODE (type) != RECORD_TYPE) + return; + for (tree field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field)) + { + type = TREE_TYPE (field); + if (gfc_is_polymorphic_nonptr (type) + || GFC_DECL_GET_SCALAR_ALLOCATABLE (field) + || (GFC_DESCRIPTOR_TYPE_P (type) + && GFC_TYPE_ARRAY_AKIND (type) == GFC_ARRAY_ALLOCATABLE)) + { + tree tmp = fold_build3_loc (loc, COMPONENT_REF, TREE_TYPE (field), + decl, field, NULL_TREE); + gfc_omp_deep_mapping_item (is_cnt, true, true, loc, tmp, token, + tkind, data_array, sizes_array, + kinds_array, offset_data, offset, num, + seq, ctx, poly_warned); + } + else if (GFC_DECL_GET_SCALAR_POINTER (field) + || GFC_DESCRIPTOR_TYPE_P (type)) + continue; + else if (gfc_has_alloc_comps (TREE_TYPE (field), field, false)) + { + tree tmp = fold_build3_loc (loc, COMPONENT_REF, TREE_TYPE (field), + decl, field, NULL_TREE); + if (TREE_CODE (TREE_TYPE (tmp)) == ARRAY_TYPE) + gfc_omp_deep_mapping_item (is_cnt, false, false, loc, tmp, + token, tkind, data_array, sizes_array, + kinds_array, offset_data, offset, num, + seq, ctx, poly_warned); + else + gfc_omp_deep_mapping_comps (is_cnt, loc, tmp, token, tkind, + data_array, sizes_array, kinds_array, + offset_data, offset, num, seq, ctx, + poly_warned); + } + } +} + +static void +gfc_omp_gen_simple_loop (tree var, tree begin, tree end, enum tree_code cond, + tree step, location_t loc, gimple_seq *seq1, + gimple_seq *seq2) +{ + tree tmp; + + /* var = begin. */ + gimplify_assign (var, begin, seq1); + + /* Loop: for (var = begin; var <cond> end; var += step). */ + tree label_loop = create_artificial_label (loc); + tree label_cond = create_artificial_label (loc); + + gimplify_and_add (fold_build1_loc (loc, GOTO_EXPR, void_type_node, + label_cond), seq1); + gimple_seq_add_stmt (seq1, gimple_build_label (label_loop)); + + /* Everything above is seq1; place loop body here. */ + + /* End of loop body -> put into seq2. */ + tmp = fold_build2_loc (loc, PLUS_EXPR, TREE_TYPE (var), var, step); + gimplify_assign (var, tmp, seq2); + gimple_seq_add_stmt (seq2, gimple_build_label (label_cond)); + tmp = fold_build2_loc (loc, cond, boolean_type_node, var, end); + tmp = build3_v (COND_EXPR, tmp, build1_v (GOTO_EXPR, label_loop), + build_empty_stmt (loc)); + gimplify_and_add (tmp, seq2); +} + +/* Return size variable with the size of an array. */ +static tree +gfc_omp_get_array_size (location_t loc, tree desc, gimple_seq *seq) +{ + tree tmp; + gimple_seq seq1 = NULL, seq2 = NULL; + tree size = build_decl (loc, VAR_DECL, create_tmp_var_name ("size"), + size_type_node); + tree extent = build_decl (loc, VAR_DECL, create_tmp_var_name ("extent"), + gfc_array_index_type); + tree idx = build_decl (loc, VAR_DECL, create_tmp_var_name ("idx"), + signed_char_type_node); + + tree begin = build_zero_cst (signed_char_type_node); + tree end; + if (GFC_TYPE_ARRAY_AKIND (TREE_TYPE (desc)) == GFC_ARRAY_ASSUMED_SHAPE_CONT + || GFC_TYPE_ARRAY_AKIND (TREE_TYPE (desc)) == GFC_ARRAY_ASSUMED_SHAPE) + end = gfc_conv_descriptor_rank (desc); + else + end = build_int_cst (signed_char_type_node, + GFC_TYPE_ARRAY_RANK (TREE_TYPE (desc))); + tree step = build_int_cst (signed_char_type_node, 1); + + /* size = 0 + for (idx = 0; idx < rank; idx++) + extent = gfc->dim[i].ubound - gfc->dim[i].lbound + 1 + if (extent < 0) extent = 0 + size *= extent. */ + gimplify_assign (size, build_int_cst (size_type_node, 1), seq); + + gfc_omp_gen_simple_loop (idx, begin, end, LT_EXPR, step, loc, &seq1, &seq2); + gimple_seq_add_seq (seq, seq1); + + tmp = fold_build2_loc (loc, MINUS_EXPR, gfc_array_index_type, + gfc_conv_descriptor_ubound_get (desc, idx), + gfc_conv_descriptor_lbound_get (desc, idx)); + tmp = fold_build2_loc (loc, PLUS_EXPR, gfc_array_index_type, + tmp, gfc_index_one_node); + gimplify_assign (extent, tmp, seq); + tmp = fold_build2_loc (loc, LT_EXPR, boolean_type_node, + extent, gfc_index_zero_node); + tmp = build3_v (COND_EXPR, tmp, + fold_build2_loc (loc, MODIFY_EXPR, + gfc_array_index_type, + extent, gfc_index_zero_node), + build_empty_stmt (loc)); + gimplify_and_add (tmp, seq); + /* size *= extent. */ + gimplify_assign (size, fold_build2_loc (loc, MULT_EXPR, size_type_node, size, + fold_convert (size_type_node, + extent)), seq); + gimple_seq_add_seq (seq, seq2); + return size; +} + +/* Generate loop to access every array element; takes addr of first element + (decl's data comp); returns loop code in seq1 + seq2 + and the pointer to the element as return value. */ +static tree +gfc_omp_elmental_loop (location_t loc, tree decl, tree size, tree elem_len, + gimple_seq *seq1, gimple_seq *seq2) +{ + tree idx = build_decl (loc, VAR_DECL, create_tmp_var_name ("idx"), + size_type_node); + tree begin = build_zero_cst (size_type_node); + tree end = size; + tree step = build_int_cst (size_type_node, 1); + tree ptr; + + gfc_omp_gen_simple_loop (idx, begin, end, LT_EXPR, step, loc, seq1, seq2); + + tree type = TREE_TYPE (decl); + if (POINTER_TYPE_P (type)) + { + type = TREE_TYPE (type); + gcc_assert (TREE_CODE (type) == ARRAY_TYPE); + decl = fold_convert (build_pointer_type (TREE_TYPE (type)), decl); + } + else + { + gcc_assert (TREE_CODE (type) == ARRAY_TYPE); + decl = build_fold_addr_expr_loc (loc, decl); + } + decl = fold_convert (build_pointer_type (TREE_TYPE (type)), decl); + tree tmp = build2_loc (loc, MULT_EXPR, size_type_node, idx, + fold_convert (size_type_node, elem_len)); + ptr = build2_loc (loc, POINTER_PLUS_EXPR, TREE_TYPE (decl), decl, tmp); + gimple_seq seq3 = NULL; + ptr = force_gimple_operand (ptr, &seq3, true, NULL_TREE); + gimple_seq_add_seq (seq1, seq3); + + return ptr; +} + + +/* If do_copy, copy data pointer and vptr (if applicable) as well. + Otherwise, only handle allocatable components. + do_copy == false can happen only with nonpolymorphic arguments + to a copy clause. + if (is_cnt) token ... offset is ignored and num is used, otherwise + num is NULL_TREE and unused. */ + +static void +gfc_omp_deep_mapping_item (bool is_cnt, bool do_copy, bool do_alloc_check, + location_t loc, tree decl, tree *token, + unsigned HOST_WIDE_INT tkind, tree data_array, + tree sizes_array, tree kinds_array, tree offset_data, + tree offset, tree num, gimple_seq *seq, + const gimple *ctx, bool *poly_warned) +{ + tree tmp; + tree type = TREE_TYPE (decl); + if (POINTER_TYPE_P (type)) + type = TREE_TYPE (type); + tree end_label = NULL_TREE; + tree size = NULL_TREE, elem_len = NULL_TREE; + + bool poly = gfc_is_polymorphic_nonptr (type); + if (poly && is_cnt && !*poly_warned) + { + if (gfc_is_unlimited_polymorphic_nonptr (type)) + error_at (loc, + "Mapping of unlimited polymorphic list item %qD is " + "unspecified behavior and unsupported", decl); + + else + warning_at (loc, OPT_Wopenmp, + "Mapping of polymorphic list item %qD is " + "unspecified behavior", decl); + *poly_warned = true; + } + if (do_alloc_check) + { + tree then_label = create_artificial_label (loc); + end_label = create_artificial_label (loc); + tmp = decl; + if (TREE_CODE (TREE_TYPE (tmp)) == REFERENCE_TYPE + || (POINTER_TYPE_P (TREE_TYPE (tmp)) + && (POINTER_TYPE_P (TREE_TYPE (TREE_TYPE (tmp))) + || GFC_DESCRIPTOR_TYPE_P (TREE_TYPE (TREE_TYPE (tmp)))))) + tmp = build_fold_indirect_ref_loc (loc, tmp); + if (poly) + tmp = gfc_class_data_get (tmp); + if (GFC_DESCRIPTOR_TYPE_P (TREE_TYPE (tmp))) + tmp = gfc_conv_descriptor_data_get (tmp); + gimple_seq seq2 = NULL; + tmp = force_gimple_operand (tmp, &seq2, true, NULL_TREE); + gimple_seq_add_seq (seq, seq2); + + gimple_seq_add_stmt (seq, + gimple_build_cond (NE_EXPR, tmp, null_pointer_node, + then_label, end_label)); + gimple_seq_add_stmt (seq, gimple_build_label (then_label)); + } + tree class_decl = decl; + if (poly) + { + decl = gfc_class_data_get (decl); + type = TREE_TYPE (decl); + } + if (POINTER_TYPE_P (TREE_TYPE (decl))) + { + decl = build_fold_indirect_ref (decl); + type = TREE_TYPE (decl); + } + + if (is_cnt && do_copy) + { + tree tmp = fold_build2_loc (loc, PLUS_EXPR, size_type_node, + num, build_int_cst (size_type_node, 1)); + gimplify_assign (num, tmp, seq); + } + else if (do_copy) + { + /* copy data pointer */ + tree bytesize; + if (GFC_DESCRIPTOR_TYPE_P (TREE_TYPE (decl))) + { + /* TODO: Optimization: Shouldn't this be an expr. const, except for + deferred-length strings. (Cf. also below). */ + elem_len = (poly ? gfc_class_vtab_size_get (class_decl) + : gfc_conv_descriptor_elem_len (decl)); + tmp = (POINTER_TYPE_P (TREE_TYPE (decl)) + ? build_fold_indirect_ref (decl) : decl); + size = gfc_omp_get_array_size (loc, tmp, seq); + bytesize = fold_build2_loc (loc, MULT_EXPR, size_type_node, + fold_convert (size_type_node, size), + fold_convert (size_type_node, elem_len)); + tmp = gfc_conv_descriptor_data_get (decl); + } + else if (poly) + { + tmp = decl; + bytesize = fold_convert (size_type_node, + gfc_class_vtab_size_get (class_decl)); + } + else + { + tmp = decl; + bytesize = TYPE_SIZE_UNIT (TREE_TYPE (decl)); + } + unsigned HOST_WIDE_INT tkind2 = tkind; + if (!is_cnt + && (tkind == GOMP_MAP_ALLOC + || (tkind == GOMP_MAP_FROM + && (gimple_omp_target_kind (ctx) + != GF_OMP_TARGET_KIND_EXIT_DATA))) + && gfc_omp_replace_alloc_by_to_mapping (TREE_TYPE (decl), decl, true)) + tkind2 = tkind == GOMP_MAP_ALLOC ? GOMP_MAP_TO : GOMP_MAP_TOFROM; + + gfc_omp_deep_mapping_map (tmp, bytesize, tkind2, loc, data_array, + sizes_array, kinds_array, offset_data, + offset, seq, ctx); + } + + tmp = decl; + if (POINTER_TYPE_P (TREE_TYPE (decl))) + while (TREE_CODE (tmp) == COMPONENT_REF || TREE_CODE (tmp) == ARRAY_REF) + tmp = TREE_OPERAND (tmp, TREE_CODE (tmp) == COMPONENT_REF ? 1 : 0); + if (poly || gfc_has_alloc_comps (type, tmp, true)) + { + gimple_seq seq2 = NULL; + if (GFC_DESCRIPTOR_TYPE_P (TREE_TYPE (decl))) + { + if (elem_len == NULL_TREE) + { + elem_len = gfc_conv_descriptor_elem_len (decl); + size = fold_convert (size_type_node, + gfc_omp_get_array_size (loc, decl, seq)); + } + decl = gfc_conv_descriptor_data_get (decl); + decl = gfc_omp_elmental_loop (loc, decl, size, elem_len, seq, &seq2); + decl = build_fold_indirect_ref_loc (loc, decl); + } + else if (TREE_CODE (TREE_TYPE (tmp)) == ARRAY_TYPE) + { + type = TREE_TYPE (tmp); + /* FIXME: PR95868 - for var%str of deferred length, elem_len == 0; + len is stored as var%_str_length, but not in GFC_DECL_STRING_LEN + nor in TYPE_SIZE_UNIT as expression. */ + elem_len = TYPE_SIZE_UNIT (TREE_TYPE (type)); + size = fold_convert (size_type_node, GFC_TYPE_ARRAY_SIZE (type)); + decl = gfc_omp_elmental_loop (loc, decl, size, elem_len, seq, &seq2); + decl = build_fold_indirect_ref_loc (loc, decl); + } + else if (POINTER_TYPE_P (TREE_TYPE (decl))) + decl = build_fold_indirect_ref (decl); + + gfc_omp_deep_mapping_comps (is_cnt, loc, decl, token, tkind, + data_array, sizes_array, kinds_array, + offset_data, offset, num, seq, ctx, + poly_warned); + gimple_seq_add_seq (seq, seq2); + } + if (end_label) + gimple_seq_add_stmt (seq, gimple_build_label (end_label)); +} + + +/* Which map types to check/handle for deep mapping. */ +static bool +gfc_omp_deep_map_kind_p (tree clause) +{ + switch (OMP_CLAUSE_CODE (clause)) + { + case OMP_CLAUSE_MAP: + break; + case OMP_CLAUSE_FIRSTPRIVATE: + case OMP_CLAUSE_TO: + case OMP_CLAUSE_FROM: + return true; + default: + gcc_unreachable (); + } + + switch (OMP_CLAUSE_MAP_KIND (clause)) + { + case GOMP_MAP_TO: + case GOMP_MAP_FROM: + case GOMP_MAP_TOFROM: + case GOMP_MAP_ALWAYS_TO: + case GOMP_MAP_ALWAYS_FROM: + case GOMP_MAP_ALWAYS_TOFROM: + case GOMP_MAP_ALWAYS_PRESENT_FROM: + case GOMP_MAP_ALWAYS_PRESENT_TO: + case GOMP_MAP_ALWAYS_PRESENT_TOFROM: + case GOMP_MAP_FIRSTPRIVATE: + case GOMP_MAP_ALLOC: + return true; + case GOMP_MAP_POINTER: + case GOMP_MAP_TO_PSET: + case GOMP_MAP_FORCE_PRESENT: + case GOMP_MAP_DELETE: + case GOMP_MAP_FORCE_DEVICEPTR: + case GOMP_MAP_DEVICE_RESIDENT: + case GOMP_MAP_LINK: + case GOMP_MAP_IF_PRESENT: + case GOMP_MAP_PRESENT_ALLOC: + case GOMP_MAP_PRESENT_FROM: + case GOMP_MAP_PRESENT_TO: + case GOMP_MAP_PRESENT_TOFROM: + case GOMP_MAP_FIRSTPRIVATE_INT: + case GOMP_MAP_USE_DEVICE_PTR: + case GOMP_MAP_ZERO_LEN_ARRAY_SECTION: + case GOMP_MAP_FORCE_ALLOC: + case GOMP_MAP_FORCE_TO: + case GOMP_MAP_FORCE_FROM: + case GOMP_MAP_FORCE_TOFROM: + case GOMP_MAP_USE_DEVICE_PTR_IF_PRESENT: + case GOMP_MAP_STRUCT: + case GOMP_MAP_STRUCT_UNORD: + case GOMP_MAP_ALWAYS_POINTER: + case GOMP_MAP_POINTER_TO_ZERO_LENGTH_ARRAY_SECTION: + case GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION: + case GOMP_MAP_RELEASE: + case GOMP_MAP_ATTACH: + case GOMP_MAP_DETACH: + case GOMP_MAP_FORCE_DETACH: + case GOMP_MAP_ATTACH_ZERO_LENGTH_ARRAY_SECTION: + case GOMP_MAP_FIRSTPRIVATE_POINTER: + case GOMP_MAP_FIRSTPRIVATE_REFERENCE: + case GOMP_MAP_ATTACH_DETACH: + break; + default: + gcc_unreachable (); + } + return false; +} + +/* Three OpenMP deep-mapping lang hooks: gfc_omp_deep_mapping{_p,_cnt,}. */ + +/* Common check for gfc_omp_deep_mapping_p and gfc_omp_deep_mapping_do. */ + +static tree +gfc_omp_deep_mapping_int_p (const gimple *ctx, tree clause) +{ + if (is_gimple_omp_oacc (ctx) || !gfc_omp_deep_map_kind_p (clause)) + return NULL_TREE; + tree decl = OMP_CLAUSE_DECL (clause); + if (OMP_CLAUSE_SIZE (clause) != NULL_TREE + && DECL_P (OMP_CLAUSE_SIZE (clause)) + && DECL_LANG_SPECIFIC (OMP_CLAUSE_SIZE (clause)) + && GFC_DECL_SAVED_DESCRIPTOR (OMP_CLAUSE_SIZE (clause))) + /* Saved decl. */ + decl = GFC_DECL_SAVED_DESCRIPTOR (OMP_CLAUSE_SIZE (clause)); + else if (TREE_CODE (decl) == MEM_REF || TREE_CODE (decl) == INDIRECT_REF) + /* The following can happen for, e.g., class(t) :: var(..) */ + decl = TREE_OPERAND (decl, 0); + if (TREE_CODE (decl) == INDIRECT_REF) + /* The following can happen for, e.g., class(t) :: var(..) */ + decl = TREE_OPERAND (decl, 0); + if (DECL_P (decl) + && DECL_LANG_SPECIFIC (decl) + && GFC_DECL_SAVED_DESCRIPTOR (decl)) + decl = GFC_DECL_SAVED_DESCRIPTOR (decl); + /* Handle map(to: var.desc) map([to/from/tofrom:] var.desc.data) + to get proper map kind by skipping to the next item. */ + tree tmp = OMP_CLAUSE_CHAIN (clause); + if (tmp != NULL_TREE + && OMP_CLAUSE_CODE (tmp) == OMP_CLAUSE_CODE (clause) + && OMP_CLAUSE_SIZE (tmp) != NULL_TREE + && DECL_P (OMP_CLAUSE_SIZE (tmp)) + && DECL_LANG_SPECIFIC (OMP_CLAUSE_SIZE (tmp)) + && GFC_DECL_SAVED_DESCRIPTOR (OMP_CLAUSE_SIZE (tmp)) == decl) + return NULL_TREE; + if (DECL_P (decl) + && DECL_LANG_SPECIFIC (decl) + && GFC_DECL_SAVED_DESCRIPTOR (decl)) + decl = GFC_DECL_SAVED_DESCRIPTOR (decl); + tree type = TREE_TYPE (decl); + if (POINTER_TYPE_P (type)) + type = TREE_TYPE (type); + if (POINTER_TYPE_P (type)) + type = TREE_TYPE (type); + tmp = decl; + while (TREE_CODE (tmp) == COMPONENT_REF || TREE_CODE (tmp) == ARRAY_REF) + tmp = TREE_OPERAND (tmp, TREE_CODE (tmp) == COMPONENT_REF ? 1 : 0); + if (!gfc_is_polymorphic_nonptr (type) + && !gfc_has_alloc_comps (type, tmp, true)) + return NULL_TREE; + return decl; +} + +/* Return true if there is deep mapping, even if the number of mapping is known + at compile time. */ +bool +gfc_omp_deep_mapping_p (const gimple *ctx, tree clause) +{ + tree decl = gfc_omp_deep_mapping_int_p (ctx, clause); + if (decl == NULL_TREE) + return false; + return true; +} + +/* Handle gfc_omp_deep_mapping{,_cnt} */ +static tree +gfc_omp_deep_mapping_do (bool is_cnt, const gimple *ctx, tree clause, + unsigned HOST_WIDE_INT tkind, tree data, tree sizes, + tree kinds, tree offset_data, tree offset, + gimple_seq *seq) +{ + tree num = NULL_TREE; + location_t loc = OMP_CLAUSE_LOCATION (clause); + tree decl = gfc_omp_deep_mapping_int_p (ctx, clause); + bool poly_warned = false; + if (decl == NULL_TREE) + return NULL_TREE; + /* Handle: map(alloc:dt%cmp [len: ptr_size]) map(tofrom: D.0123...), + where GFC_DECL_SAVED_DESCRIPTOR(D.0123) is the same (here: dt%cmp). */ + if (OMP_CLAUSE_CODE (clause) == OMP_CLAUSE_MAP + && (OMP_CLAUSE_MAP_KIND (clause) == GOMP_MAP_ALLOC + || OMP_CLAUSE_MAP_KIND (clause) == GOMP_MAP_PRESENT_ALLOC)) + { + tree c = clause; + while ((c = OMP_CLAUSE_CHAIN (c)) != NULL_TREE) + { + if (!gfc_omp_deep_map_kind_p (c)) + continue; + tree d = gfc_omp_deep_mapping_int_p (ctx, c); + if (d != NULL_TREE && operand_equal_p (decl, d, 0)) + return NULL_TREE; + } + } + tree type = TREE_TYPE (decl); + if (POINTER_TYPE_P (type)) + type = TREE_TYPE (type); + if (POINTER_TYPE_P (type)) + type = TREE_TYPE (type); + bool poly = gfc_is_polymorphic_nonptr (type); + + if (is_cnt) + { + num = build_decl (loc, VAR_DECL, + create_tmp_var_name ("n_deepmap"), size_type_node); + tree tmp = fold_build2_loc (loc, MODIFY_EXPR, size_type_node, num, + build_int_cst (size_type_node, 0)); + gimple_add_tmp_var (num); + gimplify_and_add (tmp, seq); + } + else + gcc_assert (short_unsigned_type_node == TREE_TYPE (TREE_TYPE (kinds))); + + bool do_copy = poly; + bool do_alloc_check = false; + tree token = NULL_TREE; + tree tmp = decl; + if (poly) + { + tmp = TYPE_FIELDS (type); + type = TREE_TYPE (tmp); + } + else + while (TREE_CODE (tmp) == COMPONENT_REF || TREE_CODE (tmp) == ARRAY_REF) + tmp = TREE_OPERAND (tmp, TREE_CODE (tmp) == COMPONENT_REF ? 1 : 0); + /* If the clause argument is nonallocatable, skip is-allocate check. */ + if (GFC_DECL_GET_SCALAR_ALLOCATABLE (tmp) + || GFC_DECL_GET_SCALAR_POINTER (tmp) + || (GFC_DESCRIPTOR_TYPE_P (type) + && (GFC_TYPE_ARRAY_AKIND (type) == GFC_ARRAY_ALLOCATABLE + || GFC_TYPE_ARRAY_AKIND (type) == GFC_ARRAY_POINTER + || GFC_TYPE_ARRAY_AKIND (type) == GFC_ARRAY_POINTER_CONT))) + do_alloc_check = true; + + if (!is_cnt + && OMP_CLAUSE_CODE (clause) == OMP_CLAUSE_MAP + && (tkind == GOMP_MAP_ALLOC + || (tkind == GOMP_MAP_FROM + && (gimple_omp_target_kind (ctx) + != GF_OMP_TARGET_KIND_EXIT_DATA))) + && (poly || gfc_omp_replace_alloc_by_to_mapping (type, tmp, true))) + OMP_CLAUSE_SET_MAP_KIND (clause, tkind == GOMP_MAP_ALLOC ? GOMP_MAP_TO + : GOMP_MAP_TOFROM); + + /* TODO: For map(a(:)), we know it is present & allocated. */ + + tree present = (DECL_P (decl) ? gfc_omp_check_optional_argument (decl, true) + : NULL_TREE); + if (POINTER_TYPE_P (TREE_TYPE (decl)) + && POINTER_TYPE_P (TREE_TYPE (TREE_TYPE (decl)))) + decl = build_fold_indirect_ref (decl); + if (present) + { + tree then_label = create_artificial_label (loc); + tree end_label = create_artificial_label (loc); + gimple_seq seq2 = NULL; + tmp = force_gimple_operand (present, &seq2, true, NULL_TREE); + gimple_seq_add_seq (seq, seq2); + gimple_seq_add_stmt (seq, + gimple_build_cond_from_tree (present, + then_label, end_label)); + gimple_seq_add_stmt (seq, gimple_build_label (then_label)); + gfc_omp_deep_mapping_item (is_cnt, do_copy, do_alloc_check, loc, decl, + &token, tkind, data, sizes, kinds, + offset_data, offset, num, seq, ctx, + &poly_warned); + gimple_seq_add_stmt (seq, gimple_build_label (end_label)); + } + else + gfc_omp_deep_mapping_item (is_cnt, do_copy, do_alloc_check, loc, decl, + &token, tkind, data, sizes, kinds, offset_data, + offset, num, seq, ctx, &poly_warned); + /* Multiply by 2 as there are two mappings: data + pointer assign. */ + if (is_cnt) + gimplify_assign (num, + fold_build2_loc (loc, MULT_EXPR, + size_type_node, num, + build_int_cst (size_type_node, 2)), seq); + return num; +} + +/* Return tree with a variable which contains the count of deep-mappyings + (value depends, e.g., on allocation status) */ +tree +gfc_omp_deep_mapping_cnt (const gimple *ctx, tree clause, gimple_seq *seq) +{ + return gfc_omp_deep_mapping_do (true, ctx, clause, 0, NULL_TREE, NULL_TREE, + NULL_TREE, NULL_TREE, NULL_TREE, seq); +} + +/* Does the actual deep mapping. */ +void +gfc_omp_deep_mapping (const gimple *ctx, tree clause, + unsigned HOST_WIDE_INT tkind, tree data, + tree sizes, tree kinds, tree offset_data, tree offset, + gimple_seq *seq) +{ + (void) gfc_omp_deep_mapping_do (false, ctx, clause, tkind, data, sizes, kinds, + offset_data, offset, seq); +} + /* Return true if DECL is a scalar variable (for the purpose of implicit firstprivatization/mapping). Only if 'ptr_alloc_ok.' is true, allocatables and pointers are permitted. */ @@ -2478,6 +3265,18 @@ gfc_trans_omp_array_section (stmtblock_t *block, gfc_exec_op op, elemsz = fold_convert (gfc_array_index_type, elemsz); OMP_CLAUSE_SIZE (node) = fold_build2 (MULT_EXPR, gfc_array_index_type, OMP_CLAUSE_SIZE (node), elemsz); + if (n->expr->ts.type == BT_DERIVED + && n->expr->ts.u.derived->attr.alloc_comp) + { + /* Save array descriptor for use in gfc_omp_deep_mapping{,_p,_cnt}; + force evaluate to ensure that it is not gimplified + is a decl. */ + tree tmp = OMP_CLAUSE_SIZE (node); + tree var = gfc_create_var (TREE_TYPE (tmp), NULL); + gfc_add_modify_loc (input_location, block, var, tmp); + OMP_CLAUSE_SIZE (node) = var; + gfc_allocate_lang_decl (var); + GFC_DECL_SAVED_DESCRIPTOR (var) = se.expr; + } } gcc_assert (se.post.head == NULL_TREE); gcc_assert (POINTER_TYPE_P (TREE_TYPE (ptr))); @@ -3213,8 +4012,9 @@ gfc_trans_omp_clauses (stmtblock_t *block, gfc_omp_clauses *clauses, if (!n->sym->attr.referenced) continue; + location_t map_loc = gfc_get_location (&n->where); bool always_modifier = false; - tree node = build_omp_clause (input_location, OMP_CLAUSE_MAP); + tree node = build_omp_clause (map_loc, OMP_CLAUSE_MAP); tree node2 = NULL_TREE; tree node3 = NULL_TREE; tree node4 = NULL_TREE; @@ -3361,7 +4161,7 @@ gfc_trans_omp_clauses (stmtblock_t *block, gfc_omp_clauses *clauses, && n->u.map.op != OMP_MAP_RELEASE) { gcc_assert (n->sym->ts.u.cl->backend_decl); - node5 = build_omp_clause (input_location, OMP_CLAUSE_MAP); + node5 = build_omp_clause (map_loc, OMP_CLAUSE_MAP); OMP_CLAUSE_SET_MAP_KIND (node5, GOMP_MAP_ALWAYS_TO); OMP_CLAUSE_DECL (node5) = n->sym->ts.u.cl->backend_decl; OMP_CLAUSE_SIZE (node5) @@ -3378,7 +4178,7 @@ gfc_trans_omp_clauses (stmtblock_t *block, gfc_omp_clauses *clauses, ptr = build_fold_indirect_ref (ptr); OMP_CLAUSE_DECL (node) = ptr; OMP_CLAUSE_SIZE (node) = gfc_class_vtab_size_get (decl); - node2 = build_omp_clause (input_location, OMP_CLAUSE_MAP); + node2 = build_omp_clause (map_loc, OMP_CLAUSE_MAP); OMP_CLAUSE_SET_MAP_KIND (node2, GOMP_MAP_ATTACH_DETACH); OMP_CLAUSE_DECL (node2) = gfc_class_data_get (decl); OMP_CLAUSE_SIZE (node2) = size_int (0); @@ -3434,8 +4234,7 @@ gfc_trans_omp_clauses (stmtblock_t *block, gfc_omp_clauses *clauses, size = TYPE_SIZE_UNIT (TREE_TYPE (decl)); else size = size_int (0); - node4 = build_omp_clause (input_location, - OMP_CLAUSE_MAP); + node4 = build_omp_clause (map_loc, OMP_CLAUSE_MAP); OMP_CLAUSE_SET_MAP_KIND (node4, gmk); OMP_CLAUSE_DECL (node4) = decl; OMP_CLAUSE_SIZE (node4) = size; @@ -3459,8 +4258,7 @@ gfc_trans_omp_clauses (stmtblock_t *block, gfc_omp_clauses *clauses, size = TYPE_SIZE_UNIT (TREE_TYPE (decl)); else size = size_int (0); - node3 = build_omp_clause (input_location, - OMP_CLAUSE_MAP); + node3 = build_omp_clause (map_loc, OMP_CLAUSE_MAP); OMP_CLAUSE_SET_MAP_KIND (node3, gmk); OMP_CLAUSE_DECL (node3) = decl; OMP_CLAUSE_SIZE (node3) = size; @@ -3477,7 +4275,7 @@ gfc_trans_omp_clauses (stmtblock_t *block, gfc_omp_clauses *clauses, gcc_assert (POINTER_TYPE_P (TREE_TYPE (ptr))); ptr = build_fold_indirect_ref (ptr); OMP_CLAUSE_DECL (node) = ptr; - node2 = build_omp_clause (input_location, OMP_CLAUSE_MAP); + node2 = build_omp_clause (map_loc, OMP_CLAUSE_MAP); OMP_CLAUSE_DECL (node2) = decl; OMP_CLAUSE_SIZE (node2) = TYPE_SIZE_UNIT (type); if (n->u.map.op == OMP_MAP_DELETE) @@ -3493,8 +4291,7 @@ gfc_trans_omp_clauses (stmtblock_t *block, gfc_omp_clauses *clauses, && n->u.map.op != OMP_MAP_DELETE && n->u.map.op != OMP_MAP_RELEASE) { - node3 = build_omp_clause (input_location, - OMP_CLAUSE_MAP); + node3 = build_omp_clause (map_loc, OMP_CLAUSE_MAP); if (present) { ptr = gfc_conv_descriptor_data_get (decl); @@ -3634,10 +4431,10 @@ gfc_trans_omp_clauses (stmtblock_t *block, gfc_omp_clauses *clauses, { /* A single indirectref is handled by the middle end. */ gcc_assert (!POINTER_TYPE_P (TREE_TYPE (decl))); - decl = TREE_OPERAND (decl, 0); - decl = gfc_build_cond_assign_expr (block, present, decl, + tree tmp = TREE_OPERAND (decl, 0); + tmp = gfc_build_cond_assign_expr (block, present, tmp, null_pointer_node); - OMP_CLAUSE_DECL (node) = build_fold_indirect_ref (decl); + OMP_CLAUSE_DECL (node) = build_fold_indirect_ref (tmp); } else OMP_CLAUSE_DECL (node) = decl; @@ -3672,6 +4469,33 @@ gfc_trans_omp_clauses (stmtblock_t *block, gfc_omp_clauses *clauses, size = gfc_evaluate_now (size, block); OMP_CLAUSE_SIZE (node) = size; } + if ((TREE_CODE (decl) != PARM_DECL + || DECL_ARTIFICIAL (OMP_CLAUSE_DECL (node))) + && n->sym->ts.type == BT_DERIVED + && n->sym->ts.u.derived->attr.alloc_comp) + { + /* Save array descriptor for use in + gfc_omp_deep_mapping{,_p,_cnt}; force evaluate + to ensure that it is not gimplified + is a decl. */ + tree tmp = OMP_CLAUSE_SIZE (node); + if (tmp == NULL_TREE) + tmp = DECL_P (decl) ? DECL_SIZE_UNIT (decl) + : TYPE_SIZE_UNIT (TREE_TYPE (decl)); + tree var = gfc_create_var (TREE_TYPE (tmp), NULL); + gfc_add_modify_loc (input_location, block, var, tmp); + OMP_CLAUSE_SIZE (node) = var; + gfc_allocate_lang_decl (var); + if (TREE_CODE (decl) == INDIRECT_REF) + decl = TREE_OPERAND (decl, 0); + if (TREE_CODE (decl) == INDIRECT_REF) + decl = TREE_OPERAND (decl, 0); + if (DECL_LANG_SPECIFIC (decl) + && GFC_DECL_SAVED_DESCRIPTOR (decl)) + GFC_DECL_SAVED_DESCRIPTOR (var) + = GFC_DECL_SAVED_DESCRIPTOR (decl); + else + GFC_DECL_SAVED_DESCRIPTOR (var) = decl; + } } else if (n->expr && n->expr->expr_type == EXPR_VARIABLE @@ -3727,8 +4551,7 @@ gfc_trans_omp_clauses (stmtblock_t *block, gfc_omp_clauses *clauses, goto finalize_map_clause; } - node2 = build_omp_clause (input_location, - OMP_CLAUSE_MAP); + node2 = build_omp_clause (map_loc, OMP_CLAUSE_MAP); OMP_CLAUSE_SET_MAP_KIND (node2, GOMP_MAP_ATTACH_DETACH); OMP_CLAUSE_DECL (node2) = POINTER_TYPE_P (TREE_TYPE (se.expr)) @@ -3754,13 +4577,37 @@ gfc_trans_omp_clauses (stmtblock_t *block, gfc_omp_clauses *clauses, kind = GOMP_MAP_RELEASE; else kind = GOMP_MAP_TO; - node3 = build_omp_clause (input_location, - OMP_CLAUSE_MAP); + node3 = build_omp_clause (map_loc, OMP_CLAUSE_MAP); OMP_CLAUSE_SET_MAP_KIND (node3, kind); OMP_CLAUSE_DECL (node3) = se.string_length; OMP_CLAUSE_SIZE (node3) = TYPE_SIZE_UNIT (gfc_charlen_type_node); } + if (!openacc + && n->expr->ts.type == BT_DERIVED + && n->expr->ts.u.derived->attr.alloc_comp) + { + /* Save array descriptor for use in + gfc_omp_deep_mapping{,_p,_cnt}; force evaluate + to ensure that it is not gimplified + is a decl. */ + tree tmp = OMP_CLAUSE_SIZE (node); + if (tmp == NULL_TREE) + tmp = (DECL_P (se.expr) + ? DECL_SIZE_UNIT (se.expr) + : TYPE_SIZE_UNIT (TREE_TYPE (se.expr))); + tree var = gfc_create_var (TREE_TYPE (tmp), NULL); + gfc_add_modify_loc (input_location, block, var, tmp); + OMP_CLAUSE_SIZE (node) = var; + gfc_allocate_lang_decl (var); + if (TREE_CODE (se.expr) == INDIRECT_REF) + se.expr = TREE_OPERAND (se.expr, 0); + if (DECL_LANG_SPECIFIC (se.expr) + && GFC_DECL_SAVED_DESCRIPTOR (se.expr)) + GFC_DECL_SAVED_DESCRIPTOR (var) + = GFC_DECL_SAVED_DESCRIPTOR (se.expr); + else + GFC_DECL_SAVED_DESCRIPTOR (var) = se.expr; + } } } else if (n->expr @@ -3800,7 +4647,7 @@ gfc_trans_omp_clauses (stmtblock_t *block, gfc_omp_clauses *clauses, && (lastref->u.c.component->ts.type == BT_DERIVED || lastref->u.c.component->ts.type == BT_CLASS)) { - if (pointer || (openacc && allocatable)) + if (pointer || allocatable) { /* If it's a bare attach/detach clause, we just want to perform a single attach/detach operation, of the @@ -3880,8 +4727,7 @@ gfc_trans_omp_clauses (stmtblock_t *block, gfc_omp_clauses *clauses, OMP_CLAUSE_DECL (node) = data; OMP_CLAUSE_SIZE (node) = size; - node2 = build_omp_clause (input_location, - OMP_CLAUSE_MAP); + node2 = build_omp_clause (map_loc, OMP_CLAUSE_MAP); OMP_CLAUSE_SET_MAP_KIND (node2, GOMP_MAP_ATTACH_DETACH); OMP_CLAUSE_DECL (node2) = build_fold_addr_expr (data); @@ -3893,6 +4739,22 @@ gfc_trans_omp_clauses (stmtblock_t *block, gfc_omp_clauses *clauses, OMP_CLAUSE_SIZE (node) = TYPE_SIZE_UNIT (TREE_TYPE (inner)); } + if (!openacc + && n->expr->ts.type == BT_DERIVED + && n->expr->ts.u.derived->attr.alloc_comp) + { + /* Save array descriptor for use in + gfc_omp_deep_mapping{,_p,_cnt}; force evaluate + to ensure that it is not gimplified + is a decl. */ + tree tmp = OMP_CLAUSE_SIZE (node); + tree var = gfc_create_var (TREE_TYPE (tmp), NULL); + gfc_add_modify_loc (input_location, block, var, tmp); + OMP_CLAUSE_SIZE (node) = var; + gfc_allocate_lang_decl (var); + if (TREE_CODE (inner) == INDIRECT_REF) + inner = TREE_OPERAND (inner, 0); + GFC_DECL_SAVED_DESCRIPTOR (var) = inner; + } } else if (lastref->type == REF_ARRAY && lastref->u.ar.type == AR_FULL) @@ -3952,8 +4814,7 @@ gfc_trans_omp_clauses (stmtblock_t *block, gfc_omp_clauses *clauses, elemsz = TYPE_SIZE_UNIT (elemsz); elemsz = fold_build2 (MULT_EXPR, size_type_node, len, elemsz); - node4 = build_omp_clause (input_location, - OMP_CLAUSE_MAP); + node4 = build_omp_clause (map_loc, OMP_CLAUSE_MAP); OMP_CLAUSE_SET_MAP_KIND (node4, map_kind); OMP_CLAUSE_DECL (node4) = se.string_length; OMP_CLAUSE_SIZE (node4) @@ -3963,8 +4824,7 @@ gfc_trans_omp_clauses (stmtblock_t *block, gfc_omp_clauses *clauses, OMP_CLAUSE_SIZE (node) = fold_build2 (MULT_EXPR, gfc_array_index_type, OMP_CLAUSE_SIZE (node), elemsz); - node2 = build_omp_clause (input_location, - OMP_CLAUSE_MAP); + node2 = build_omp_clause (map_loc, OMP_CLAUSE_MAP); if (map_kind == GOMP_MAP_RELEASE || map_kind == GOMP_MAP_DELETE) { @@ -3978,6 +4838,23 @@ gfc_trans_omp_clauses (stmtblock_t *block, gfc_omp_clauses *clauses, OMP_CLAUSE_SIZE (node2) = TYPE_SIZE_UNIT (type); if (!openacc) { + if (n->expr->ts.type == BT_DERIVED + && n->expr->ts.u.derived->attr.alloc_comp) + { + /* Save array descriptor for use + in gfc_omp_deep_mapping{,_p,_cnt}; force + evaluate to ensure that it is + not gimplified + is a decl. */ + tree tmp = OMP_CLAUSE_SIZE (node); + tree var = gfc_create_var (TREE_TYPE (tmp), + NULL); + gfc_add_modify_loc (map_loc, block, + var, tmp); + OMP_CLAUSE_SIZE (node) = var; + gfc_allocate_lang_decl (var); + GFC_DECL_SAVED_DESCRIPTOR (var) = inner; + } + gfc_omp_namelist *n2 = clauses->lists[OMP_LIST_MAP]; @@ -4035,8 +4912,7 @@ gfc_trans_omp_clauses (stmtblock_t *block, gfc_omp_clauses *clauses, if (drop_mapping) continue; } - node3 = build_omp_clause (input_location, - OMP_CLAUSE_MAP); + node3 = build_omp_clause (map_loc, OMP_CLAUSE_MAP); OMP_CLAUSE_SET_MAP_KIND (node3, GOMP_MAP_ATTACH_DETACH); OMP_CLAUSE_DECL (node3) @@ -4107,7 +4983,8 @@ gfc_trans_omp_clauses (stmtblock_t *block, gfc_omp_clauses *clauses, default: gcc_unreachable (); } - tree node = build_omp_clause (input_location, clause_code); + tree node = build_omp_clause (gfc_get_location (&n->where), + clause_code); if (n->expr == NULL || (n->expr->ref->type == REF_ARRAY && n->expr->ref->u.ar.type == AR_FULL diff --git a/gcc/fortran/trans.h b/gcc/fortran/trans.h index 63a566a..ae7be9f 100644 --- a/gcc/fortran/trans.h +++ b/gcc/fortran/trans.h @@ -839,6 +839,10 @@ tree gfc_omp_clause_assign_op (tree, tree, tree); tree gfc_omp_clause_linear_ctor (tree, tree, tree, tree); tree gfc_omp_clause_dtor (tree, tree); void gfc_omp_finish_clause (tree, gimple_seq *, bool); +bool gfc_omp_deep_mapping_p (const gimple *, tree); +tree gfc_omp_deep_mapping_cnt (const gimple *, tree, gimple_seq *); +void gfc_omp_deep_mapping (const gimple *, tree, unsigned HOST_WIDE_INT, tree, + tree, tree, tree, tree, gimple_seq *); bool gfc_omp_allocatable_p (tree); bool gfc_omp_scalar_p (tree, bool); bool gfc_omp_scalar_target_p (tree); diff --git a/gcc/ginclude/stddef.h b/gcc/ginclude/stddef.h index 0d53103..bacf24d 100644 --- a/gcc/ginclude/stddef.h +++ b/gcc/ginclude/stddef.h @@ -89,6 +89,21 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see #undef _PTRDIFF_T_ #endif +/* When modular code is enabled with macOS SDKs from version 15, the + include guards are set in the includers of this code, rather than as + part of it. This means the we must unset them or the intended code + here will be bypassed (resulting in undefined values). */ +#if defined (__APPLE__) +# if defined(__has_feature) && __has_feature(modules) +# if defined (__need_ptrdiff_t) +# undef __PTRDIFF_T +# endif +# if defined (__need_size_t) +# undef __SIZE_T +# endif +# endif +#endif + /* On VxWorks, <type/vxTypesBase.h> may have defined macros like _TYPE_size_t which will typedef size_t. fixincludes patched the vxTypesBase.h so that this macro is only defined if _GCC_SIZE_T is diff --git a/gcc/ipa-cp.cc b/gcc/ipa-cp.cc index 26b1496..806c2bd 100644 --- a/gcc/ipa-cp.cc +++ b/gcc/ipa-cp.cc @@ -313,14 +313,24 @@ ipcp_lattice<valtype>::print (FILE * f, bool dump_sources, bool dump_benefits) static void ipcp_print_widest_int (FILE *f, const widest_int &value) { - if (wi::eq_p (wi::bit_not (value), 0)) + if (value == -1) fprintf (f, "-1"); - else if (wi::eq_p (wi::bit_not (wi::bit_or (value, - wi::sub (wi::lshift (1, 128), - 1))), 0)) - { - fprintf (f, "all ones folled by "); - print_hex (wi::bit_and (value, wi::sub (wi::lshift (1, 128), 1)), f); + else if (wi::arshift (value, 128) == -1) + { + char buf[35], *p = buf + 2; + widest_int v = wi::zext (value, 128); + size_t len; + print_hex (v, buf); + len = strlen (p); + if (len == 32) + { + fprintf (f, "0xf..f"); + while (*p == 'f') + ++p; + } + else + fprintf (f, "0xf..f%0*d", (int) (32 - len), 0); + fputs (p, f); } else print_hex (value, f); @@ -923,13 +933,13 @@ ipcp_bits_lattice::meet_with_1 (widest_int value, widest_int mask, m_mask = (m_mask | mask) | (m_value ^ value); if (drop_all_ones) m_mask |= m_value; - m_value &= ~m_mask; - widest_int cap_mask = wi::bit_not (wi::sub (wi::lshift (1, precision), 1)); + widest_int cap_mask = wi::shifted_mask <widest_int> (0, precision, true); m_mask |= cap_mask; if (wi::sext (m_mask, precision) == -1) return set_to_bottom (); + m_value &= ~m_mask; return m_mask != old_mask; } @@ -1005,7 +1015,7 @@ ipcp_bits_lattice::meet_with (ipcp_bits_lattice& other, unsigned precision, adjusted_mask |= adjusted_value; adjusted_value &= ~adjusted_mask; } - widest_int cap_mask = wi::bit_not (wi::sub (wi::lshift (1, precision), 1)); + widest_int cap_mask = wi::shifted_mask <widest_int> (0, precision, true); adjusted_mask |= cap_mask; if (wi::sext (adjusted_mask, precision) == -1) return set_to_bottom (); diff --git a/gcc/ipa-locality-cloning.cc b/gcc/ipa-locality-cloning.cc new file mode 100644 index 0000000..2684046 --- /dev/null +++ b/gcc/ipa-locality-cloning.cc @@ -0,0 +1,1137 @@ +/* Code locality based function cloning. + Copyright The GNU Toolchain Authors + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +<http://www.gnu.org/licenses/>. */ + +/* This file implements cloning required to improve partitioning of the + callgraph for locality considerations. + + Partitioning for improving code locality. + This pass aims to place frequently executed callchains closer together in + memory to improve performance through improved locality. If any frequent + callchains cannot be placed together because they are already placed + elsewhere, local function clones are created and all callers near to the + clones are redirected to use this copy. + + Locality code placement is done in 2 parts. + 1. IPA pass to be executed after ipa-inline and before ipa-pure-const. + Execute stage prepares the plan to place all nodes into partitions. + 2. WPA Partition stage actually implements the plan. + + Brief overview of the IPA pass: + 1. Create and sort callchains. If PGO is available, use real profile + counts. Otherwise, use a set of heuristics to sort the callchains. + 2. Create a partition plan for the callchains, processing them in the sorted + order. + 1. If a function is unpartitioned, place it in the current partition. + 2. If a function is already placed in a partition away from current + partition as part of another callchain: + Create a local clone in current partition, if cloning criteria is + satisfied. + 3. Redirect any new caller to a local clone if one exists. + Partition size is param controlled to fine tune per program behavior. */ + +#include "config.h" +#define INCLUDE_ALGORITHM +#include "system.h" +#include "coretypes.h" +#include "target.h" +#include "function.h" +#include "tree.h" +#include "alloc-pool.h" +#include "tree-pass.h" +#include "cgraph.h" +#include "symbol-summary.h" +#include "tree-vrp.h" +#include "symtab-thunks.h" +#include "sreal.h" +#include "ipa-cp.h" +#include "ipa-prop.h" +#include "ipa-fnsummary.h" +#include "ipa-modref-tree.h" +#include "ipa-modref.h" +#include "symtab-clones.h" +#include "ipa-locality-cloning.h" + +/* Locality partitions, assigns nodes to partitions. These are used later in + WPA partitioning. */ +vec<locality_partition> locality_partitions; + +/* Map from original node to its latest clone. Gets overwritten whenever a new + clone is created from the same node. */ +hash_map<cgraph_node *, cgraph_node *> node_to_clone; +/* Map from clone to its original node. */ +hash_map<cgraph_node *, cgraph_node *> clone_to_node; + +/* Data structure to hold static heuristics and orders for cgraph_nodes. */ +struct locality_order +{ + cgraph_node *node; + sreal order; + locality_order (cgraph_node *node, sreal order) : node (node), order (order) + {} +}; + +/* Return true if NODE is already in some partition. */ +static inline bool +node_partitioned_p (cgraph_node *node) +{ + return node->aux; +} + +/* Add symbol NODE to partition PART. */ +static void +add_node_to_partition (locality_partition part, cgraph_node *node) +{ + struct cgraph_edge *e; + if (node_partitioned_p (node)) + return; + + part->nodes.safe_push (node); + node->aux = (void *) (uintptr_t) (part->part_id); + + if (!node->alias && node->get_partitioning_class () == SYMBOL_PARTITION) + part->insns += ipa_size_summaries->get (node)->size; + + /* Add all inline clones and callees that are duplicated. */ + for (e = node->callees; e; e = e->next_callee) + if (!e->inline_failed) + add_node_to_partition (part, e->callee); + /* omp declare_variant_alt or transparent_alias with definition or linker + discardable (non-local comdat but not forced and not + used by non-LTO). */ + else if (e->callee->get_partitioning_class () == SYMBOL_DUPLICATE) + add_node_to_partition (part, e->callee); + + /* Add all thunks associated with the function. */ + for (e = node->callers; e; e = e->next_caller) + if (e->caller->thunk && !e->caller->inlined_to) + add_node_to_partition (part, e->caller); + + /* Add all aliases associated with the symbol. */ + struct ipa_ref *ref; + FOR_EACH_ALIAS (node, ref) + if (!ref->referring->transparent_alias) + { + cgraph_node *referring = dyn_cast<cgraph_node *> (ref->referring); + /* Only add function aliases. + Varpool refs are added later in LTO partitioning pass. */ + if (referring) + add_node_to_partition (part, referring); + } + else + { + struct ipa_ref *ref2; + /* We do not need to add transparent aliases if they are not used. + However we must add aliases of transparent aliases if they exist. */ + FOR_EACH_ALIAS (ref->referring, ref2) + { + /* Nested transparent aliases are not permitted. */ + gcc_checking_assert (!ref2->referring->transparent_alias); + cgraph_node *referring = dyn_cast<cgraph_node *> (ref2->referring); + if (referring) + add_node_to_partition (part, referring); + } + } +} + +/* Return TRUE if NODE is in PARTITION. */ +static bool +node_in_partition_p (locality_partition partition, cgraph_node *node) +{ + return ((uintptr_t) (partition->part_id) == (uintptr_t) (node->aux)); +} + +/* Helper function for qsort; to break ties. */ +static int +compare_node_uids (cgraph_node *n1, cgraph_node *n2) +{ + int res = n1->get_uid () - n2->get_uid (); + gcc_assert (res != 0); + return res > 0 ? 1 : -1; +} + +/* Helper function for qsort; sort nodes by order. */ +static int +static_profile_cmp (const void *pa, const void *pb) +{ + const locality_order *a = *static_cast<const locality_order *const *> (pa); + const locality_order *b = *static_cast<const locality_order *const *> (pb); + /* Ascending order. */ + if (b->order < a->order) + return 1; + if (b->order > a->order) + return -1; + return compare_node_uids (a->node, b->node); +} + +/* Helper function for qsort; sort nodes by profile count. */ +static int +compare_edge_profile_counts (const void *pa, const void *pb) +{ + const locality_order *a = *static_cast<const locality_order *const *> (pa); + const locality_order *b = *static_cast<const locality_order *const *> (pb); + + profile_count cnt1 = a->node->count.ipa (); + profile_count cnt2 = b->node->count.ipa (); + if (!cnt1.compatible_p (cnt2)) + return static_profile_cmp (pa, pb); + + if (cnt1 < cnt2) + return 1; + if (cnt1 > cnt2) + return -1; + return static_profile_cmp (pa, pb); +} + +/* Create and return a new partition and increment NPARTITIONS. */ + +static locality_partition +create_partition (int &npartitions) +{ + locality_partition part = XCNEW (struct locality_partition_def); + npartitions++; + part->part_id = npartitions; + part->nodes.create (1); + part->insns = 0; + locality_partitions.safe_push (part); + return part; +} + +/* Structure for holding profile count information of callers of a node. */ +struct profile_stats +{ + /* Sum of non-recursive call counts. */ + profile_count nonrec_count; + + /* Sum of recursive call counts. */ + profile_count rec_count; + + /* If non-NULL, this node is the target of alias or thunk and calls from this + should be count in rec_count. */ + cgraph_node *target; +}; + +/* Initialize fields of STATS. */ +static inline void +init_profile_stats (profile_stats *stats, cgraph_node *target = NULL) +{ + stats->nonrec_count = profile_count::zero (); + stats->rec_count = profile_count::zero (); + stats->target = target; +} + +/* Helper function of to accumulate call counts. */ +static bool +accumulate_profile_counts_after_cloning (cgraph_node *node, void *data) +{ + struct profile_stats *stats = (struct profile_stats *) data; + for (cgraph_edge *e = node->callers; e; e = e->next_caller) + { + if (!e->count.initialized_p ()) + continue; + + if (e->caller == stats->target) + stats->rec_count += e->count.ipa (); + else + stats->nonrec_count += e->count.ipa (); + } + return false; +} + +/* NEW_NODE is a previously created clone of ORIG_NODE already present in + current partition. EDGES contains newly redirected edges to NEW_NODE. + Adjust profile information for both nodes and the edge. */ + +static void +adjust_profile_info_for_non_self_rec_edges (auto_vec<cgraph_edge *> &edges, + cgraph_node *new_node, + cgraph_node *orig_node) +{ + profile_count orig_node_count = orig_node->count.ipa (); + profile_count edge_count = profile_count::zero (); + profile_count final_new_count = profile_count::zero (); + profile_count final_orig_count = profile_count::zero (); + + for (unsigned i = 0; i < edges.length (); ++i) + if (edges[i]->count.initialized_p ()) + edge_count += edges[i]->count.ipa (); + + final_orig_count = orig_node_count - edge_count; + + /* NEW_NODE->count was adjusted for other callers when the clone was + first created. Just add the new edge count. */ + final_new_count = new_node->count + edge_count; + + final_new_count = orig_node_count.combine_with_ipa_count (final_new_count); + orig_node->count = final_orig_count; + new_node->count = final_new_count; + + if (dump_file) + { + fprintf (dump_file, "Adjusting profile information for %s\n", + new_node->dump_asm_name ()); + fprintf (dump_file, "\tOriginal node %s\n", orig_node->dump_asm_name ()); + fprintf (dump_file, "\tOriginal count: "); + orig_node_count.dump (dump_file); + fprintf (dump_file, "\n\tAdjusted original count to: "); + final_orig_count.dump (dump_file); + fprintf (dump_file, "\n\tAdjusted clone count to: "); + final_new_count.dump (dump_file); + fprintf (dump_file, "\n"); + } + + /* Scale all callee edges according to adjusted counts. */ + profile_count orig_node_count_copy = orig_node_count; + profile_count::adjust_for_ipa_scaling (&final_new_count, + &orig_node_count_copy); + for (cgraph_edge *cs = new_node->callees; cs; cs = cs->next_callee) + cs->count = cs->count.apply_scale (final_new_count, orig_node_count_copy); + for (cgraph_edge *cs = new_node->indirect_calls; cs; cs = cs->next_callee) + cs->count = cs->count.apply_scale (final_new_count, orig_node_count_copy); + + profile_count::adjust_for_ipa_scaling (&final_orig_count, &orig_node_count); + for (cgraph_edge *cs = orig_node->callees; cs; cs = cs->next_callee) + cs->count = cs->count.apply_scale (final_orig_count, orig_node_count); + for (cgraph_edge *cs = orig_node->indirect_calls; cs; cs = cs->next_callee) + cs->count = cs->count.apply_scale (final_orig_count, orig_node_count); +} + +/* Adjust profile counts of NEW_NODE and ORIG_NODE, where NEW_NODE is a clone + of OLD_NODE. + Assumes that all eligible edges from current partition so far are redirected + to NEW_NODE and recursive edges are adjusted. */ + +static void +adjust_profile_info (cgraph_node *new_node, cgraph_node *orig_node) +{ + /* If all calls to NEW_NODE are non-recursive, subtract corresponding count + from ORIG_NODE and assign to NEW_NODE, any unexpected remainder stays with + ORIG_NODE. + Recursive calls if present, likely contribute to majority of count; + scale according to redirected callers' count. */ + + profile_count orig_node_count = orig_node->count.ipa (); + profile_stats new_stats, orig_stats; + + init_profile_stats (&new_stats); + init_profile_stats (&orig_stats); + + new_node->call_for_symbol_thunks_and_aliases + (accumulate_profile_counts_after_cloning, &new_stats, false); + orig_node->call_for_symbol_thunks_and_aliases + (accumulate_profile_counts_after_cloning, &orig_stats, false); + + profile_count orig_nonrec_count = orig_stats.nonrec_count; + profile_count orig_rec_count = orig_stats.rec_count; + profile_count new_nonrec_count = new_stats.nonrec_count; + profile_count new_rec_count = new_stats.rec_count; + + profile_count final_new_count = new_nonrec_count; + profile_count final_orig_count = profile_count::zero (); + + /* All calls to NEW_NODE are non-recursive or recursive calls have + zero count. */ + if (!new_rec_count.nonzero_p ()) + final_orig_count = orig_node_count - new_nonrec_count; + else + { + /* If ORIG_NODE is externally visible, indirect calls or calls from + another part of the code may contribute to the count. + update_profiling_info () from ipa-cp.cc pretends to have an extra + caller to represent the extra counts. */ + if (!orig_node->local) + { + profile_count pretend_count = (orig_node_count - new_nonrec_count - + orig_nonrec_count - orig_rec_count); + orig_nonrec_count += pretend_count; + } + + /* Remaining rec_count is assigned in proportion to clone's non-recursive + count. */ + profile_count rec_count = orig_node_count - new_nonrec_count + - orig_nonrec_count; + profile_count new_rec_scaled + = rec_count.apply_scale (new_nonrec_count, + new_nonrec_count + orig_nonrec_count); + final_new_count += new_rec_scaled; + final_orig_count = orig_node_count - final_new_count; + } + + final_new_count = orig_node_count.combine_with_ipa_count (final_new_count); + new_node->count = final_new_count; + orig_node->count = final_orig_count; + + if (dump_file) + { + fprintf (dump_file, "Adjusting profile information for %s\n", + new_node->dump_asm_name ()); + fprintf (dump_file, "\tOriginal node %s\n", orig_node->dump_asm_name ()); + fprintf (dump_file, "\tOriginal count: "); + orig_node_count.dump (dump_file); + fprintf (dump_file, "\n\tAdjusted original count to: "); + final_orig_count.dump (dump_file); + fprintf (dump_file, "\n\tAdjusted clone count to: "); + final_new_count.dump (dump_file); + fprintf (dump_file, "\n"); + } + + /* Scale all callee edges according to adjusted counts. */ + profile_count orig_node_count_copy = orig_node_count; + profile_count::adjust_for_ipa_scaling (&final_new_count, + &orig_node_count_copy); + for (cgraph_edge *cs = new_node->callees; cs; cs = cs->next_callee) + cs->count = cs->count.apply_scale (final_new_count, orig_node_count_copy); + for (cgraph_edge *cs = new_node->indirect_calls; cs; cs = cs->next_callee) + cs->count = cs->count.apply_scale (final_new_count, orig_node_count_copy); + + profile_count::adjust_for_ipa_scaling (&final_orig_count, &orig_node_count); + for (cgraph_edge *cs = orig_node->callees; cs; cs = cs->next_callee) + cs->count = cs->count.apply_scale (final_orig_count, orig_node_count); + for (cgraph_edge *cs = orig_node->indirect_calls; cs; cs = cs->next_callee) + cs->count = cs->count.apply_scale (final_orig_count, orig_node_count); +} + +/* Return true if EDGE can be safely redirected to another callee. */ +static inline bool +edge_redirectable_p (cgraph_edge *edge, lto_locality_cloning_model cm) +{ + if (cm == LTO_LOCALITY_NON_INTERPOSABLE_CLONING) + { + /* Interposability may change on edge basis. */ + enum availability avail; + avail = edge->callee->get_availability (edge->caller); + if (avail <= AVAIL_INTERPOSABLE) + return false; + } + return true; +} + +/* Create a locality clone of CNODE and redirect all callers present in + PARTITION. + Create a clone dpending on whether CNODE itself is a clone or not. */ + +static cgraph_node * +create_locality_clone (cgraph_node *cnode, + locality_partition partition, int &cl_num, + lto_locality_cloning_model cm) +{ + cgraph_node *cl_node = NULL; + vec<cgraph_edge *> redirect_callers = vNULL; + /* All callers of cnode in current partition are redirected. */ + struct cgraph_edge *edge; + for (edge = cnode->callers; edge; edge = edge->next_caller) + { + struct cgraph_node *caller = edge->caller; + if (node_in_partition_p (partition, caller) && caller->definition + && caller != cnode && edge_redirectable_p (edge, cm)) + redirect_callers.safe_push (edge); + } + + const char *suffix = "locality_clone"; + + tree old_decl = cnode->decl; + tree new_decl = copy_node (old_decl); + + /* Generate a new name for the new version. */ + const char *name = IDENTIFIER_POINTER (DECL_NAME (old_decl)); + DECL_NAME (new_decl) = clone_function_name (name, suffix, cl_num); + SET_DECL_ASSEMBLER_NAME (new_decl, + clone_function_name (old_decl, suffix, cl_num)); + cl_num++; + if (dump_file) + fprintf (dump_file, "\tNew name %s\n", + IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (new_decl))); + + cl_node = cnode->create_clone (new_decl, cnode->count /*profile_count*/, + false /*update_original*/, redirect_callers, + false /*call_duplication_hook*/, + NULL /*new_inlined_to*/, + NULL /*param_adjustments*/, suffix); + + set_new_clone_decl_and_node_flags (cl_node); + + if (cnode->ipa_transforms_to_apply.exists ()) + cl_node->ipa_transforms_to_apply + = cnode->ipa_transforms_to_apply.copy (); + + if (dump_file) + { + fprintf (dump_file, "Cloned Node: %s %s\n", cnode->dump_asm_name (), + cl_node->dump_asm_name ()); + + for (edge = cl_node->callers; edge; edge = edge->next_caller) + fprintf (dump_file, "Redirected callers: %s\n", + edge->caller->dump_asm_name ()); + + for (edge = cl_node->callees; edge; edge = edge->next_callee) + fprintf (dump_file, "Callees of clone: %s %d\n", + edge->callee->dump_asm_name (), edge->frequency ()); + } + return cl_node; +} + +/* Redirect recursive edges of CLONE to correctly point to CLONE. As part of + cloning process, all callee edges of a node are just duplicated but not + redirected. Therefore, these edges still call to original of CLONE. + + For non-inlined CLONEs, NEW_CALLEE == CLONE and ORIG_CALLEE is CLONE's + original node. + + For inlined node, self recursion to CLONE's original same as non-inlined, + additionally, calls to CLONE->inlined_to are also recursive: + NEW_CALLEE == CLONE->inlined_into and + ORIG_CALLEE == original node of CLONE->inlined_into. */ + +static void +adjust_recursive_callees (cgraph_node *clone, cgraph_node *new_callee, + cgraph_node *orig_callee) +{ + cgraph_node *alias = NULL; + for (cgraph_edge *e = clone->callees; e; e = e->next_callee) + { + if (!e->inline_failed) + continue; + + /* Only self-cycle or local alias are handled. */ + cgraph_node *callee = e->callee; + if (callee == orig_callee) + { + cgraph_node **cl = node_to_clone.get (orig_callee); + gcc_assert (cl && *cl == new_callee); + e->redirect_callee_duplicating_thunks (new_callee); + if (dump_file) + fprintf (dump_file, "recursive call from %s to %s orig %s\n", + e->caller->dump_asm_name (), e->callee->dump_asm_name (), + callee->dump_asm_name ()); + } + else if (callee->alias + && e->callee->ultimate_alias_target () == orig_callee) + { + if (!alias) + { + alias = dyn_cast<cgraph_node *> ( + new_callee->noninterposable_alias ()); + } + e->redirect_callee_duplicating_thunks (alias); + if (dump_file) + fprintf (dump_file, "recursive call from %s to %s orig %s\n", + e->caller->dump_asm_name (), e->callee->dump_asm_name (), + callee->dump_asm_name ()); + } + } + new_callee->expand_all_artificial_thunks (); + if (alias) + alias->expand_all_artificial_thunks (); +} + +/* Create clones for CALLER's inlined callees, ORIG_INLINED_TO is the original + node from clone_as_needed () such that new_inlined_to is a clone of it. */ + +static void +inline_clones (cgraph_node *caller, cgraph_node *orig_inlined_to) +{ + struct cgraph_edge *edge; + for (edge = caller->callees; edge; edge = edge->next_callee) + { + struct cgraph_node *callee = edge->callee; + if (edge->inline_failed) + continue; + + if (callee->inlined_to != orig_inlined_to) + continue; + + struct cgraph_node *new_inlined_to, *cl; + if (caller->inlined_to) + new_inlined_to = caller->inlined_to; + else + new_inlined_to = caller; + + cl = callee->create_clone (callee->decl, + edge->count /*profile_count*/, + true /*update_original*/, + vNULL /*redirect_callers*/, + false /*call_duplication_hook*/, + new_inlined_to /*new_inlined_to*/, + NULL /*param_adjustments*/, + "locality_clone" /*suffix*/); + edge->redirect_callee (cl); + + node_to_clone.put (callee, cl); + clone_to_node.put (cl, callee); + + if (callee->thunk) + { + thunk_info *info = thunk_info::get (callee); + *thunk_info::get_create (cl) = *info; + } + + adjust_recursive_callees (cl, new_inlined_to, orig_inlined_to); + adjust_recursive_callees (cl, cl, callee); + if (dump_file) + { + fprintf (dump_file, "Inline cloned\n"); + cl->dump (dump_file); + } + + /* Recursively inline till end of this callchain. */ + inline_clones (cl, orig_inlined_to); + } +} + +/* Clone EDGE->CALLEE if it or a clone of it is not already in PARTITION. + Redirect all callers of EDGE->CALLEE that are in PARTITION, not just the + EDGE. If a clone is already present in PARTITION, redirect all edges from + EDGE->CALLER to EDGE->CALLEE. This is because we only visit one edge per + caller to callee and redirect for all others from there. + + If cloning, also recursively clone inlined functions till the end of the + callchain because inlined clones have 1-1 exclusive copy and edge from + caller to inlined node. + + There are 2 flows possible: + 1. Only redirect + 1.1. cnode is already in current partition - cnode mustn't be a + locality_clone -> nothing to do + 1.2. A clone of cnode is in current partition - find out if it's the + correct clone for edge - must be a locality_clone but the exact same + kind as callee i.e. orig or cp/sra clone, if yes, redirect, else go to #2 + 1.3. Cnode/a clone of cnode is in current partition but caller is inlined + 2. Clone and redirect + 2.1. cnode is original node + 2.2. cnode itself is a clone + Clone inlines + Flavors of edges: + 1. Normal -> orig nodes, locality clones or cp/sra clones + 2. Recursive -> direct recursion + 3. Alias -> recursion via aliasing or as a result of IPA code duplication + 4. Inline -> shouldn't be included in callchain. */ + +static cgraph_node * +clone_node_as_needed (cgraph_edge *edge, locality_partition partition, + int &cl_num, lto_locality_cloning_model cm) +{ + /* suitable_for_locality_cloning_p () currently prohibits cloning aliases due + to potential versioning and materialization issues. Could be enabled in + the future. suitable_for_locality_cloning_p () also checks for + interposability for CNODE but not for edge redirection. */ + struct cgraph_node *cnode = edge->callee; + struct cgraph_node *caller = edge->caller; + + /* If clone of cnode is already in the partition + Get latest clone of cnode. If current partition has cloned cnode, that + clone should be returned. Otherwise, clone from previous partition is + returned + Original node and its clone shouldn't co-exist in current partition + + This is required if callee is partitioned via another edge before caller + was, and we are now visiting caller->callee edge + + 1) a -> b ==> a -> bc1; b was cloned say via d -> bc1, a is orig + 2) ac1 -> b ==> ac1 -> bc1; b was cloned and a was just cloned + 3) a -> bc1 and bc2 present, mustn't happen, b was cloned and a was + redirected without being partitioned first. + Why will we do this again - multiple edges and something's wrong in + partition_callchain () + 4) ac1 -> bc1 ==> ac1 -> bc2; a was cloned and we already got (1) in some + other partition + 5) ac1 -> bc1 but no clone present in this PARTITION. Create from b, not + from bc1? + 6) a -> b; a -> bc0; create new clone, no clone present + 7) ac0 -> b; ac0 -> bc0 same as (6) + 8) a -> bc0 and no clone present, mustn't happen, same as (3) + + Redirect when bc1 is present and: + a -> b or ac -> b or ac -> bc0 */ + + cgraph_node *orig_cnode = cnode; + cgraph_node **o_cnode = clone_to_node.get (cnode); + if (o_cnode) + orig_cnode = *o_cnode; + + cgraph_node **cnode_cl = node_to_clone.get (orig_cnode); + + if (cnode_cl && node_in_partition_p (partition, *cnode_cl)) + { + if (node_in_partition_p (partition, caller)) + { + bool clone_p = false; + auto_vec<cgraph_edge *> redirected_edges; + for (cgraph_edge *ec = caller->callees; ec; ec = ec->next_callee) + if (ec->callee == cnode && edge_redirectable_p (ec, cm)) + { + ec->redirect_callee_duplicating_thunks (*cnode_cl); + clone_p = true; + redirected_edges.safe_push (ec); + if (dump_file) + { + fprintf (dump_file, "clone present %s %s redirecting %s\n", + cnode->dump_asm_name (), + (*cnode_cl)->dump_asm_name (), + caller->dump_asm_name ()); + } + } + if (clone_p) + { + (*cnode_cl)->expand_all_artificial_thunks (); + adjust_profile_info_for_non_self_rec_edges (redirected_edges, + *cnode_cl, cnode); + return NULL; + } + } + } + + /* Create a new clone for a -> b, ac -> b. + For ac -> bc, should be done on bc or b? + bc could be from b_cp/b_sra or b. */ + + if (orig_cnode != cnode) + { + if (dump_file) + fprintf (dump_file, "Clone of clone %s %s\n", cnode->dump_asm_name (), + orig_cnode->dump_asm_name ()); + return NULL; + } + + struct cgraph_node *cloned_node + = create_locality_clone (cnode, partition, cl_num, cm); + + gcc_assert (cloned_node); + if (!cloned_node) + return NULL; + + node_to_clone.put (cnode, cloned_node); + clone_to_node.put (cloned_node, cnode); + + adjust_recursive_callees (cloned_node, cloned_node, cnode); + symtab->call_cgraph_duplication_hooks (cnode, cloned_node); + + adjust_profile_info (cloned_node, cnode); + /* Inline clones are created iff their inlined_to == CNODE. */ + inline_clones (cloned_node, cnode); + + return cloned_node; +} + +/* Accumulate frequency of all edges from EDGE->caller to EDGE->callee. */ + +static sreal +accumulate_incoming_edge_frequency (cgraph_edge *edge) +{ + sreal count = 0; + struct cgraph_edge *e; + for (e = edge->callee->callers; e; e = e->next_caller) + { + /* Make a local decision about all edges for EDGE->caller but not the + other nodes already in the partition. Their edges will be visited + later or may have been visited before and not fit the + cut-off criteria. */ + if (e->caller == edge->caller) + count += e->sreal_frequency (); + } + return count; +} + +/* Determine if EDGE->CALLEE is suitable for cloning. It is assummed that the + callee is not an inlined node. */ + +static bool +suitable_for_locality_cloning_p (cgraph_edge *edge, + lto_locality_cloning_model cm) +{ + cgraph_node *node = edge->callee; + if (!node->versionable) + return false; + + /* Out-of-line locality clones of ipcp or sra clones will be created in this + pass after IPA inline is run. A locality clone has the same function + body and the same updated signature as the ipcp/sra clone. + This fails or asserts based on how the clone is created: + 1. If param_adjustments and tree_map are not recorded for locality clone: + clone materialization (tree_function_versioning ()) fails when + updating signature and remapping calls because clone_of (ipcp/sra + clone) and locality clone differ in param information. + 2. If param_adjustments and tree_map are provided: asserts are triggered + in fnsummary duplication because IPA inline resets some summaries. + + One inelegant solution is to provide param_adjustments and tree_map, and + then set clone_of to ipcp/sra clone's clone_of. However, this sometimes + results in segmentation fault when the compiled program is run. + Disabling clone of clones altogether for now with an aim to resolve this + is future. */ + if (node->clone_of) + return false; + + if (node->alias) + return false; + + if (edge->recursive_p ()) + return false; + + if (!node->definition) + return false; + + /* Don't clone NODE if IPA count of NODE or EDGE is zero. */ + if (!node->count.ipa ().nonzero_p () || !edge->count.ipa ().nonzero_p ()) + return false; + + if (cm == LTO_LOCALITY_NON_INTERPOSABLE_CLONING) + { + /* Interposability may change on edge basis. */ + enum availability avail; + edge->callee->ultimate_alias_target (&avail, edge->caller); + if (avail <= AVAIL_INTERPOSABLE) + return false; + } + + return true; +} + +/* Map from caller to all callees already visited for partitioning. */ +hash_map<cgraph_node *, auto_vec<cgraph_node *> > caller_to_callees; + +/* Partition EDGE->CALLEE into PARTITION or clone if already partitioned and + satisfies cloning criteria such as CLONING_MODEL, REAL_FREQ and SIZE + cut-offs and CLONE_FURTHER_P set by previous caller. */ + +/* callgraph can have multiple caller to callee edges for multiple callsites + For the first such edge, we make decisions about cutoffs and cloning because + we redirect ALL callsites to cloned callee, not just one of them. */ + +static void +partition_callchain (cgraph_edge *edge, locality_partition partition, + bool clone_further_p, + lto_locality_cloning_model cloning_model, + double freq_cutoff, int size, int &cl_num) +{ + /* Aliases are added in the same partition as their targets. + Aliases are not cloned and their callees are not processed separately. */ + cgraph_node *node = edge->callee->ultimate_alias_target (); + cgraph_node *caller = edge->caller; + cgraph_node *caller_node = node, *cl_node = NULL; + + /* Already visited the caller to callee edges. */ + auto_vec<cgraph_node *> &callees = caller_to_callees.get_or_insert (caller); + if (std::find (callees.begin (), callees.end (), node) != callees.end ()) + return; + + callees.safe_push (node); + + if (node->get_partitioning_class () == SYMBOL_PARTITION) + { + if (!node_partitioned_p (node)) + { + add_node_to_partition (partition, node); + if (dump_file) + fprintf (dump_file, "Partitioned node: %s\n", + node->dump_asm_name ()); + } + else if (cloning_model >= LTO_LOCALITY_NON_INTERPOSABLE_CLONING + && !node_in_partition_p (partition, node)) + { + /* Non-inlined node, or alias, already partitioned + If cut-off, don't clone callees but partition unpartitioned + callees. + size is node + inlined nodes. */ + if (clone_further_p) + { + if (!node->alias) + if (ipa_size_summaries->get (node)->size >= size) + clone_further_p = false; + + if (freq_cutoff != 0.0) + { + sreal acc_freq = accumulate_incoming_edge_frequency (edge); + if (acc_freq.to_double () < freq_cutoff) + clone_further_p = false; + } + } + + if (!suitable_for_locality_cloning_p (edge, cloning_model)) + clone_further_p = false; + + if (clone_further_p) + { + /* Try to clone NODE and its inline chain. */ + if (dump_file) + fprintf (dump_file, "Cloning node: %s\n", + node->dump_asm_name ()); + cl_node = clone_node_as_needed (edge, partition, cl_num, + cloning_model); + if (cl_node) + { + add_node_to_partition (partition, cl_node); + caller_node = cl_node; + } + else + caller_node = NULL; + } + } + } + else if (!node->inlined_to) + return; + + if (caller_node) + for (cgraph_edge *e = caller_node->callees; e; e = e->next_callee) + partition_callchain (e, partition, clone_further_p, cloning_model, + freq_cutoff, size, cl_num); +} + +/* Determine whether NODE is an entrypoint to a callchain. */ + +static bool +is_entry_node_p (cgraph_node *node) +{ + /* node->inlined_to is returned as SYMBOL_DUPLICATE. */ + if (node->get_partitioning_class () != SYMBOL_PARTITION) + return false; + + if (!node->callers) + return true; + + for (cgraph_edge *e = node->callers; e; e = e->next_caller) + { + if (! e->recursive_p ()) + return false; + } + if (node->alias + && !is_entry_node_p (node->ultimate_alias_target ())) + return false; + return true; +} + +/* Determine order of all external nodes if PGO profile is available. + Store the order in ORDER. */ + +static bool +locality_determine_ipa_order (auto_vec<locality_order *> *order) +{ + struct cgraph_node *node; + auto_vec<locality_order *> non_comparable_nodes; + FOR_EACH_DEFINED_FUNCTION (node) + if (node->get_partitioning_class () == SYMBOL_PARTITION) + { + if (node->no_reorder) + { + if (dump_file) + fprintf (dump_file, "no reorder %s\n", node->dump_asm_name ()); + return false; + } + else if (is_entry_node_p (node)) + { + profile_count pcnt = node->count.ipa (); + if (!pcnt.initialized_p () || !pcnt.ipa_p ()) + { + sreal cnt = 0; + locality_order *lo = new locality_order (node, cnt); + non_comparable_nodes.safe_push (lo); + continue; + } + sreal count = 0; + struct cgraph_edge *edge; + for (edge = node->callees; edge; edge = edge->next_callee) + { + /* For PGO, frequency is not used in + compare_edge_profile_counts (), it's used only as part of + static profile order. */ + sreal freq = edge->sreal_frequency (); + count += freq; + } + locality_order *cl = new locality_order (node, count); + order->safe_push (cl); + } + } + order->qsort (compare_edge_profile_counts); + for (auto el : non_comparable_nodes) + order->safe_push (el); + return true; +} + +/* Determine order of all external nodes if only static profile is available. + Store the order in ORDER. */ + +static bool +locality_determine_static_order (auto_vec<locality_order *> *order) +{ + struct cgraph_node *node; + FOR_EACH_DEFINED_FUNCTION (node) + if (node->get_partitioning_class () == SYMBOL_PARTITION) + { + if (node->no_reorder) + { + if (dump_file) + fprintf (dump_file, "no reorder %s\n", node->dump_asm_name ()); + return false; + } + else if (is_entry_node_p (node)) + { + sreal count = 0; + struct cgraph_edge *edge; + for (edge = node->callees; edge; edge = edge->next_callee) + { + sreal freq = edge->sreal_frequency (); + count += freq; + } + locality_order *cl = new locality_order (node, count); + order->safe_push (cl); + } + } + order->qsort (static_profile_cmp); + return true; +} + +/* Partitioning for code locality. + 1. Create and sort callchains. If PGO is available, use real profile + counts. Otherwise, use a set of heuristics to sort the callchains. + 2. Partition the external nodes and their callchains in the determined order + 2.1. If !partition, partition, else try and clone if it satisfies cloning + criteria. + 3. Partition all other unpartitioned nodes. */ + +static void +locality_partition_and_clone (int max_locality_partition_size, + lto_locality_cloning_model cloning_model, + int freq_denominator, int size) +{ + locality_partition partition; + int npartitions = 0; + + auto_vec<locality_order *> order; + auto_vec<varpool_node *> varpool_order; + struct cgraph_node *node; + bool order_p; + + int cl_num = 0; + + double real_freq = 0.0; + if (freq_denominator > 0) + real_freq = 1.0 / (double) freq_denominator; + + cgraph_node *n = symtab->first_defined_function (); + if (n && n->count.ipa_p ()) + order_p = locality_determine_ipa_order (&order); + else + order_p = locality_determine_static_order (&order); + if (!order_p) + { + if (dump_file) + { + fprintf (dump_file, "Locality partition: falling back to balanced" + "model\n"); + } + + return; + } + + int64_t partition_size + = max_locality_partition_size + ? max_locality_partition_size : param_max_partition_size; + partition = create_partition (npartitions); + + for (unsigned i = 0; i < order.length (); i++) + { + node = order[i]->node; + if (node_partitioned_p (node)) + continue; + + if (partition->insns > partition_size) + partition = create_partition (npartitions); + if (dump_file) + fprintf (dump_file, "Partition id: %d\n", partition->part_id); + + add_node_to_partition (partition, node); + if (dump_file) + fprintf (dump_file, "Ordered Node: %s\n", node->dump_asm_name ()); + + for (cgraph_edge *edge = node->callees; edge; edge = edge->next_callee) + { + /* Recursively partition the callchain of edge->callee. */ + partition_callchain (edge, partition, true, cloning_model, real_freq, + size, cl_num); + } + } + + for (unsigned i = 0; i < order.length (); i++) + delete order[i]; + order = vNULL; +} + +/* Entry point to locality-clone pass. */ +static int +lc_execute (void) +{ + symtab_node *node; + FOR_EACH_SYMBOL (node) + node->aux = NULL; + + locality_partition_and_clone (param_max_locality_partition_size, + flag_lto_locality_cloning, + param_lto_locality_frequency, + param_lto_locality_size); + + FOR_EACH_SYMBOL (node) + node->aux = NULL; + return 0; +} + +namespace { + +const pass_data pass_data_ipa_locality_clone = { + IPA_PASS, /* type */ + "locality-clone", /* name */ + OPTGROUP_NONE, /* optinfo_flags */ + TV_IPA_LC, /* tv_id */ + 0, /* properties_required */ + 0, /* properties_provided */ + 0, /* properties_destroyed */ + 0, /* todo_flags_start */ + (TODO_dump_symtab | TODO_remove_functions), /* todo_flags_finish */ +}; + +class pass_ipa_locality_cloning : public ipa_opt_pass_d +{ +public: + pass_ipa_locality_cloning (gcc::context *ctxt) + : ipa_opt_pass_d (pass_data_ipa_locality_clone, ctxt, + NULL, /* generate_summary */ + NULL, /* write_summary */ + NULL, /* read_summary */ + NULL, /* write_optimization_summary */ + NULL, /* read_optimization_summary */ + NULL, /* stmt_fixup */ + 0, /* function_transform_todo_flags_start */ + NULL, /* function_transform */ + NULL) /* variable_transform */ + {} + + /* opt_pass methods: */ + virtual bool gate (function *) + { + return (flag_wpa && flag_ipa_reorder_for_locality); + } + + virtual unsigned int execute (function *) { return lc_execute (); } + +}; // class pass_ipa_locality_cloning + +} // namespace + +ipa_opt_pass_d * +make_pass_ipa_locality_cloning (gcc::context *ctxt) +{ + return new pass_ipa_locality_cloning (ctxt); +} diff --git a/gcc/ipa-locality-cloning.h b/gcc/ipa-locality-cloning.h new file mode 100644 index 0000000..591ce57 --- /dev/null +++ b/gcc/ipa-locality-cloning.h @@ -0,0 +1,35 @@ +/* LTO partitioning logic routines. + Copyright The GNU Toolchain Authors + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +<http://www.gnu.org/licenses/>. */ + +#ifndef IPA_LOCALITY_CLONING_H +#define IPA_LOCALITY_CLONING_H + +/* Structure describing locality partitions. */ +struct locality_partition_def +{ + int part_id; + vec<cgraph_node *> nodes; + int insns; +}; + +typedef struct locality_partition_def *locality_partition; + +extern vec<locality_partition> locality_partitions; + +#endif /* IPA_LOCALITY_CLONING_H */ diff --git a/gcc/lto-cgraph.cc b/gcc/lto-cgraph.cc index ac835a4..8439c51 100644 --- a/gcc/lto-cgraph.cc +++ b/gcc/lto-cgraph.cc @@ -229,6 +229,8 @@ lto_set_symtab_encoder_in_partition (lto_symtab_encoder_t encoder, symtab_node *node) { int index = lto_symtab_encoder_encode (encoder, node); + if (dump_file) + fprintf(dump_file, "Node %s, index %d\n", node->asm_name(), index); encoder->nodes[index].in_partition = true; } diff --git a/gcc/lto/ChangeLog b/gcc/lto/ChangeLog index ee53915..4da9ca3 100644 --- a/gcc/lto/ChangeLog +++ b/gcc/lto/ChangeLog @@ -1,3 +1,13 @@ +2025-04-15 Kyrylo Tkachov <ktkachov@nvidia.com> + + * lto-partition.cc (add_node_references_to_partition): Define. + (create_partition): Likewise. + (lto_locality_map): Likewise. + (lto_promote_cross_file_statics): Add extra dumping. + * lto-partition.h (lto_locality_map): Declare prototype. + * lto.cc (do_whole_program_analysis): Handle + flag_ipa_reorder_for_locality. + 2025-02-28 Richard Biener <rguenther@suse.de> PR lto/91299 diff --git a/gcc/lto/lto-partition.cc b/gcc/lto/lto-partition.cc index 3046951..c7e69ee 100644 --- a/gcc/lto/lto-partition.cc +++ b/gcc/lto/lto-partition.cc @@ -37,6 +37,7 @@ along with GCC; see the file COPYING3. If not see #include "ipa-prop.h" #include "ipa-fnsummary.h" #include "lto-partition.h" +#include "ipa-locality-cloning.h" #include <limits> @@ -1418,6 +1419,126 @@ lto_balanced_map (int n_lto_partitions, int max_partition_size) } } +/* Add all references of NODE into PARTITION. */ + +static void +add_node_references_to_partition (ltrans_partition partition, symtab_node *node) +{ + struct ipa_ref *ref = NULL; + varpool_node *vnode; + for (int j = 0; node->iterate_reference (j, ref); j++) + if (is_a <varpool_node *> (ref->referred)) + { + vnode = dyn_cast <varpool_node *> (ref->referred); + if (!symbol_partitioned_p (vnode) + && !vnode->no_reorder + && vnode->get_partitioning_class () == SYMBOL_PARTITION) + { + add_symbol_to_partition (partition, vnode); + if (dump_file) + fprintf (dump_file, "Varpool Node: %s\n", vnode->dump_asm_name ()); + add_node_references_to_partition (partition, vnode); + } + } + + for (int j = 0; node->iterate_referring (j, ref); j++) + if (is_a <varpool_node *> (ref->referring)) + { + vnode = dyn_cast <varpool_node *> (ref->referring); + gcc_assert (vnode->definition); + if (!symbol_partitioned_p (vnode) + && !vnode->no_reorder + && !vnode->can_remove_if_no_refs_p () + && vnode->get_partitioning_class () == SYMBOL_PARTITION) + { + add_symbol_to_partition (partition, vnode); + if (dump_file) + fprintf (dump_file, "Varpool Node: %s\n", vnode->dump_asm_name ()); + add_node_references_to_partition (partition, vnode); + } + } + if (cgraph_node *cnode = dyn_cast <cgraph_node *> (node)) + { + struct cgraph_edge *e; + + /* Add all inline clones and callees that are duplicated. */ + for (e = cnode->callees; e; e = e->next_callee) + if (e->callee->get_partitioning_class () == SYMBOL_DUPLICATE) + add_node_references_to_partition (partition, e->callee); + + /* Add all thunks associated with the function. */ + for (e = cnode->callers; e; e = e->next_caller) + if (e->caller->thunk && !e->caller->inlined_to) + add_node_references_to_partition (partition, e->caller); + } + +} + +/* Create and return the created partition of name NAME. */ + +static ltrans_partition +create_partition (int &npartitions, const char *name) +{ + npartitions++; + return new_partition (name); +} + +/* Partitioning for code locality. + The partitioning plan (and prerequisite cloning) will have been done by the + IPA locality cloning pass. This function just implements that plan by + assigning those partitions to ltrans_parititions. */ + +void +lto_locality_map (int max_partition_size) +{ + symtab_node *snode; + int npartitions = 0; + + auto_vec<varpool_node *> varpool_order; + struct cgraph_node *node; + + if (locality_partitions.length () == 0) + { + if (dump_file) + { + fprintf (dump_file, "Locality partition: falling back to balanced " + "model\n"); + } + lto_balanced_map (param_lto_partitions, param_max_partition_size); + return; + } + ltrans_partition partition = nullptr; + for (auto part : locality_partitions) + { + partition = create_partition (npartitions, ""); + for (unsigned j = 0; j < part->nodes.length (); j++) + { + node = part->nodes[j]; + if (symbol_partitioned_p (node)) + continue; + + add_symbol_to_partition (partition, node); + add_node_references_to_partition (partition, node); + } + } + + int64_t partition_size = max_partition_size; + /* All other unpartitioned symbols. */ + FOR_EACH_SYMBOL (snode) + { + if (snode->get_partitioning_class () == SYMBOL_PARTITION + && !symbol_partitioned_p (snode)) + { + if (partition->insns > partition_size) + partition = create_partition (npartitions, ""); + + add_symbol_to_partition (partition, snode); + if (dump_file) + fprintf (dump_file, "Un-ordered Node: %s\n", snode->dump_asm_name ()); + } + } +} + /* Return true if we must not change the name of the NODE. The name as extracted from the corresponding decl should be passed in NAME. */ @@ -1732,7 +1853,12 @@ lto_promote_cross_file_statics (void) { ltrans_partition part = ltrans_partitions[i]; + if (dump_file) + fprintf (dump_file, "lto_promote_cross_file_statics for part %s %p\n", + part->name, (void *)part->encoder); part->encoder = compute_ltrans_boundary (part->encoder); + if (dump_file) + fprintf (dump_file, "new encoder %p\n", (void *)part->encoder); } lto_clone_numbers = new hash_map<const char *, unsigned>; diff --git a/gcc/lto/lto-partition.h b/gcc/lto/lto-partition.h index 38b3f1e..a6a4195 100644 --- a/gcc/lto/lto-partition.h +++ b/gcc/lto/lto-partition.h @@ -37,6 +37,7 @@ void lto_1_to_1_map (void); void lto_max_map (void); void lto_cache_map (int, int); void lto_balanced_map (int, int); +void lto_locality_map (int); void lto_promote_cross_file_statics (void); void free_ltrans_partitions (void); void lto_promote_statics_nonwpa (void); diff --git a/gcc/lto/lto.cc b/gcc/lto/lto.cc index 18ca475..183634f 100644 --- a/gcc/lto/lto.cc +++ b/gcc/lto/lto.cc @@ -547,7 +547,9 @@ do_whole_program_analysis (void) symtab_node::checking_verify_symtab_nodes (); bitmap_obstack_release (NULL); - if (flag_lto_partition == LTO_PARTITION_1TO1) + if (flag_ipa_reorder_for_locality) + lto_locality_map (param_max_locality_partition_size); + else if (flag_lto_partition == LTO_PARTITION_1TO1) lto_1_to_1_map (); else if (flag_lto_partition == LTO_PARTITION_MAX) lto_max_map (); diff --git a/gcc/opts.cc b/gcc/opts.cc index 80c7a97..5e7b77d 100644 --- a/gcc/opts.cc +++ b/gcc/opts.cc @@ -1037,6 +1037,25 @@ report_conflicting_sanitizer_options (struct gcc_options *opts, location_t loc, } } +/* Validate from OPTS and OPTS_SET that when -fipa-reorder-for-locality is + enabled no explicit -flto-partition is also passed as the locality cloning + pass uses its own partitioning scheme. */ + +static void +validate_ipa_reorder_locality_lto_partition (struct gcc_options *opts, + struct gcc_options *opts_set) +{ + static bool validated_p = false; + + if (opts->x_flag_lto_partition != LTO_PARTITION_DEFAULT) + { + if (opts_set->x_flag_ipa_reorder_for_locality && !validated_p) + error ("%<-fipa-reorder-for-locality%> is incompatible with" + " an explicit %qs option", "-flto-partition"); + } + validated_p = true; +} + /* After all options at LOC have been read into OPTS and OPTS_SET, finalize settings of those options and diagnose incompatible combinations. */ @@ -1249,6 +1268,10 @@ finish_options (struct gcc_options *opts, struct gcc_options *opts_set, if (opts->x_flag_reorder_blocks_and_partition) SET_OPTION_IF_UNSET (opts, opts_set, flag_reorder_functions, 1); + validate_ipa_reorder_locality_lto_partition (opts, opts_set); + if (opts_set->x_flag_lto_partition != LTO_PARTITION_DEFAULT) + opts_set->x_flag_lto_partition = opts->x_flag_lto_partition = LTO_PARTITION_BALANCED; + /* The -gsplit-dwarf option requires -ggnu-pubnames. */ if (opts->x_dwarf_split_debug_info) opts->x_debug_generate_pub_sections = 2; diff --git a/gcc/params.opt b/gcc/params.opt index 422d082..a2b606f 100644 --- a/gcc/params.opt +++ b/gcc/params.opt @@ -469,6 +469,33 @@ Minimal size of a partition for LTO (in estimated instructions). Common Joined UInteger Var(param_lto_partitions) Init(128) IntegerRange(1, 65536) Param Number of partitions the program should be split to. +Enum +Name(lto_locality_cloning_model) Type(enum lto_locality_cloning_model) UnknownError(unknown LTO partitioning model %qs) + +EnumValue +Enum(lto_locality_cloning_model) String(no) Value(LTO_LOCALITY_NO_CLONING) + +EnumValue +Enum(lto_locality_cloning_model) String(non_interposable) Value(LTO_LOCALITY_NON_INTERPOSABLE_CLONING) + +EnumValue +Enum(lto_locality_cloning_model) String(maximal) Value(LTO_LOCALITY_MAXIMAL_CLONING) + +-param=lto-partition-locality-cloning= +Common Joined RejectNegative Enum(lto_locality_cloning_model) Var(flag_lto_locality_cloning) Init(LTO_LOCALITY_MAXIMAL_CLONING) Optimization + +-param=lto-partition-locality-frequency-cutoff= +Common Joined UInteger Var(param_lto_locality_frequency) Init(1) IntegerRange(0, 65536) Param Optimization +The denominator n of fraction 1/n of the execution frequency of callee to be cloned for a particular caller. Special value of 0 dictates to always clone without a cut-off. + +-param=lto-partition-locality-size-cutoff= +Common Joined UInteger Var(param_lto_locality_size) Init(1000) IntegerRange(1, 65536) Param Optimization +Size cut-off for callee including inlined calls to be cloned for a particular caller. + +-param=lto-max-locality-partition= +Common Joined UInteger Var(param_max_locality_partition_size) Init(1000000) Param +Maximal size of a locality partition for LTO (in estimated instructions). Value of 0 results in default value being used. + -param=max-average-unrolled-insns= Common Joined UInteger Var(param_max_average_unrolled_insns) Init(80) Param Optimization The maximum number of instructions to consider to unroll in a loop on average. diff --git a/gcc/passes.def b/gcc/passes.def index 9fd85a3..3b25105 100644 --- a/gcc/passes.def +++ b/gcc/passes.def @@ -162,6 +162,7 @@ along with GCC; see the file COPYING3. If not see NEXT_PASS (pass_ipa_sra); NEXT_PASS (pass_ipa_fn_summary); NEXT_PASS (pass_ipa_inline); + NEXT_PASS (pass_ipa_locality_cloning); NEXT_PASS (pass_ipa_pure_const); NEXT_PASS (pass_ipa_modref); NEXT_PASS (pass_ipa_free_fn_summary, false /* small_p */); diff --git a/gcc/rust/ChangeLog b/gcc/rust/ChangeLog index 64053d4..453b9f7 100644 --- a/gcc/rust/ChangeLog +++ b/gcc/rust/ChangeLog @@ -1,3 +1,162 @@ +2025-04-14 Arthur Cohen <arthur.cohen@embecosm.com> + + * util/rust-lang-item.h: Add new manually_drop lang item. + * util/rust-lang-item.cc: Likewise. + +2025-04-14 Arthur Cohen <arthur.cohen@embecosm.com> + + * util/rust-attribute-values.h: Add RUSTFMT value. + * util/rust-attributes.cc: Define the attribute. + * util/rust-attributes.h (enum CompilerPass): Add EXTERNAL variant. + * expand/rust-macro-builtins.cc: Fix formatting. + +2025-04-14 Arthur Cohen <arthur.cohen@embecosm.com> + + * resolve/rust-early-name-resolver-2.0.cc (Early::visit_attributes): Remove assertion. + +2025-04-14 Arthur Cohen <arthur.cohen@embecosm.com> + + * util/rust-attribute-values.h: Add missing attributes. + * util/rust-attributes.cc: Likewise. + * util/rust-attributes.h (enum CompilerPass): Mention adding something for const + functions. + +2025-04-14 beamandala <mandalapubhavesh@gmail.com> + + * expand/rust-macro-builtins.cc (MacroBuiltin::builtin_transcribers): + Add entry for track_caller. + * util/rust-attribute-values.h: add `TRACK_CALLER` attribute. + * util/rust-attributes.cc: add `track_caller` attribute definition. + +2025-04-14 Owen Avery <powerboat9.gamer@gmail.com> + + * checks/errors/rust-const-checker.cc + (ConstChecker::visit): Visit the enum items of enums. + * resolve/rust-ast-resolve-item.cc + (ResolveItem::visit): Resolve enum discriminants during nr1.0. + +2025-04-14 Arthur Cohen <arthur.cohen@embecosm.com> + + * expand/rust-macro-builtins-format-args.cc (format_args_parse_arguments): Improve safety, + allow extra commas after end of argument list. + +2025-04-14 Arthur Cohen <arthur.cohen@embecosm.com> + + * expand/rust-macro-expand.cc (MacroExpander::expand_decl_macro): Call into + TokenTreeDesugar. + * expand/rust-token-tree-desugar.cc: New file. + * expand/rust-token-tree-desugar.h: New file. + * Make-lang.in: Compile them. + +2025-04-14 Arthur Cohen <arthur.cohen@embecosm.com> + + * expand/rust-macro-expand.cc (MacroExpander::match_n_matches): Do not + insert fragments and substack fragments if the matcher failed. + +2025-04-14 Arthur Cohen <arthur.cohen@embecosm.com> + + * rust-session-manager.cc (Session::compile_crate): Call the visitor later in the pipeline. + +2025-04-14 Arthur Cohen <arthur.cohen@embecosm.com> + + * ast/rust-ast.h (DelimTokenTree::get_locus): New function. + +2025-04-14 Arthur Cohen <arthur.cohen@embecosm.com> + + * ast/rust-expr.h (class RangeExpr): Add empty outer attributes and allow getting them + and setting them. + +2025-04-14 Arthur Cohen <arthur.cohen@embecosm.com> + + * resolve/rust-toplevel-name-resolver-2.0.cc (TopLevel::visit): Return if module + is unloaded. + +2025-04-14 Arthur Cohen <arthur.cohen@embecosm.com> + + * typecheck/rust-hir-type-check-expr.cc (is_default_fn): New. + (emit_ambiguous_resolution_error): New. + (handle_multiple_candidates): Properly handle multiple candidates in + the case of specialization. + (TypeCheckExpr::visit): Call `handle_multiple_candidates`. + +2025-04-14 Andrew Pinski <quic_apinski@quicinc.com> + + PR rust/119342 + * rust-gcc.cc (block): Add comment on why chaining + the variables of the scope toether. + +2025-04-14 Andrew Pinski <quic_apinski@quicinc.com> + + PR rust/119341 + * rust-gcc.cc (function_type): Use range fors. + (function_type_variadic): Likewise. + (fill_in_fields): Likewise. + (statement_list): Likewise. + (block): Likewise. + (block_add_statements): Likewise. + (function_set_parameters): Likewise. + (write_global_definitions): Likewise. + +2025-04-14 Andrew Pinski <quic_apinski@quicinc.com> + + * rust-gcc.cc (Bvariable::get_tree): Use error_operand_p. + (pointer_type): Likewise. + (reference_type): Likewise. + (immutable_type): Likewise. + (function_type): Likewise. + (function_type_variadic): Likewise. + Cleanup the check for receiver.type first. + (function_ptr_type): Use error_operand_p. + (fill_in_fields): Likewise. + (fill_in_array): Likewise. + (named_type): Likewise. + (type_size): Likewise. + (type_alignment): Likewise. + (type_field_alignment): Likewise. + (type_field_offset): Likewise. + (zero_expression): Likewise. + (float_constant_expression): Likewise. + (convert_expression): Likewise. + (struct_field_expression): Likewise. + (compound_expression): Likewise. + (conditional_expression): Likewise. + (negation_expression): Likewise. + (arithmetic_or_logical_expression): Likewise. + (arithmetic_or_logical_expression_checked): Likewise. + (comparison_expression): Likewise. + (lazy_boolean_expression): Likewise. + (constructor_expression): Likewise. + (array_constructor_expression): Likewise. + (array_index_expression): Likewise. + (call_expression): Likewise. + (init_statement): Likewise. + (assignment_statement): Likewise. + (return_statement): Likewise. + (exception_handler_statement): Likewise. + (if_statement): Likewise. + (compound_statement): Likewise. + Tighten up the code, removing t variable. + (statement_list): Use error_operand_p. + (block): Likewise. + (block_add_statements): Likewise. + (convert_tree): Likewise. + (global_variable): Likewise. + (global_variable_set_init): Likewise. + (local_variable): Likewise. + (parameter_variable): Likewise. + (static_chain_variable): Likewise. + (temporary_variable): Likewise. + (function): Likewise. Tighten up the code. + (function_defer_statement): Use error_operand_p. + (function_set_parameters): Use error_operand_p. + (write_global_definitions): Use error_operand_p. + Tighten up the code around the loop. + +2025-04-14 Andrew Pinski <quic_apinski@quicinc.com> + + * rust-gcc.cc (is_floating_point): Use FLOAT_TYPE_P + instead of manually checking the type. + 2025-04-08 Matty Kuhn <matty.kuhn.1@gmail.com> * ast/rust-ast.h: (AST::Attribute): add empty_input function diff --git a/gcc/sanitizer.def b/gcc/sanitizer.def index 4b7c9dc..c5a9c2d 100644 --- a/gcc/sanitizer.def +++ b/gcc/sanitizer.def @@ -247,7 +247,7 @@ DEF_SANITIZER_BUILTIN(BUILT_IN_TSAN_INIT, "__tsan_init", DEF_SANITIZER_BUILTIN(BUILT_IN_TSAN_FUNC_ENTRY, "__tsan_func_entry", BT_FN_VOID_PTR, ATTR_NOTHROW_LEAF_LIST) DEF_SANITIZER_BUILTIN(BUILT_IN_TSAN_FUNC_EXIT, "__tsan_func_exit", - BT_FN_VOID_PTR, ATTR_NOTHROW_LEAF_LIST) + BT_FN_VOID, ATTR_NOTHROW_LEAF_LIST) DEF_SANITIZER_BUILTIN(BUILT_IN_TSAN_VPTR_UPDATE, "__tsan_vptr_update", BT_FN_VOID_PTR_PTR, ATTR_NOTHROW_LEAF_LIST) DEF_SANITIZER_BUILTIN(BUILT_IN_TSAN_READ1, "__tsan_read1", diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 5710dbe3..822c126 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,255 @@ +2025-04-15 Iain Buclaw <ibuclaw@gdcproject.org> + + PR d/119826 + * gdc.dg/debug/imports/pr119826b.d: New test. + * gdc.dg/debug/pr119826.d: New test. + +2025-04-15 Nathaniel Shead <nathanieloshead@gmail.com> + + PR c++/119755 + * g++.dg/modules/lambda-10_a.H: New test. + * g++.dg/modules/lambda-10_b.C: New test. + +2025-04-15 Jakub Jelinek <jakub@redhat.com> + + * gcc.dg/completion-2.c: Expect also -flto-partition=default line. + +2025-04-15 Qing Zhao <qing.zhao@oracle.com> + + PR c/119717 + * gcc.dg/pr119717.c: New test. + +2025-04-15 H.J. Lu <hjl.tools@gmail.com> + + PR target/119784 + * gcc.target/i386/apx-interrupt-1.c: Expect 31 .cfi_restore + directives. + +2025-04-15 Vineet Gupta <vineetg@rivosinc.com> + + PR target/119533 + * go.dg/pr119533-riscv.go: New test. + * go.dg/pr119533-riscv-2.go: New test. + +2025-04-15 Robin Dapp <rdapp@ventanamicro.com> + + PR target/119547 + * gcc.target/riscv/rvv/vsetvl/avl_single-68.c: xfail. + * g++.target/riscv/rvv/autovec/pr119547.C: New test. + * g++.target/riscv/rvv/autovec/pr119547-2.C: New test. + * gcc.target/riscv/rvv/vsetvl/vlmax_switch_vtype-10.c: Adjust. + +2025-04-15 Tobias Burnus <tburnus@baylibre.com> + + * gfortran.dg/gomp/map-alloc-comp-1.f90: Remove dg-error. + * gfortran.dg/gomp/polymorphic-mapping-2.f90: Update warn wording. + * gfortran.dg/gomp/polymorphic-mapping.f90: Change expected + diagnostic; some tests moved to ... + * gfortran.dg/gomp/polymorphic-mapping-1.f90: ... here as new test. + * gfortran.dg/gomp/polymorphic-mapping-3.f90: New test. + * gfortran.dg/gomp/polymorphic-mapping-4.f90: New test. + * gfortran.dg/gomp/polymorphic-mapping-5.f90: New test. + +2025-04-15 Martin Jambor <mjambor@suse.cz> + Jakub Jelinek <jakub@redhat.com> + + PR ipa/119803 + * gcc.dg/ipa/pr119803.c: New test. + +2025-04-15 Iain Buclaw <ibuclaw@gdcproject.org> + + PR d/119799 + * gdc.dg/import-c/pr119799.d: New test. + * gdc.dg/import-c/pr119799c.c: New test. + +2025-04-15 Patrick Palka <ppalka@redhat.com> + + PR c++/119807 + PR c++/112288 + * g++.dg/template/friend86.C: New test. + * g++.dg/template/friend87.C: New test. + +2025-04-15 Iain Buclaw <ibuclaw@gdcproject.org> + + PR d/119817 + * gdc.dg/debug/imports/m119817/a.d: New test. + * gdc.dg/debug/imports/m119817/b.d: New test. + * gdc.dg/debug/imports/m119817/package.d: New test. + * gdc.dg/debug/pr119817.d: New test. + +2025-04-15 Jakub Jelinek <jakub@redhat.com> + + PR sanitizer/119801 + * c-c++-common/tsan/pr119801.c: New test. + +2025-04-15 Jonathan Yong <10walls@gmail.com> + + * gcc.dg/Wbuiltin-declaration-mismatch-4.c: Make diagnostic + accept long long. + +2025-04-15 Jakub Jelinek <jakub@redhat.com> + + PR ipa/119318 + * gcc.dg/ipa/pr119318.c: Remove dg-additional-options, add -w to + dg-options. + +2025-04-15 Jason Merrill <jason@redhat.com> + + PR c++/113835 + * g++.dg/cpp2a/constexpr-vector1.C: New test. + +2025-04-14 Thomas Schwinge <tschwinge@baylibre.com> + + PR target/118794 + * g++.target/gcn/exceptions-bad_cast-2.C: Set + '-mno-fake-exceptions'. + * g++.target/gcn/exceptions-pr118794-1.C: Likewise. + * g++.target/gcn/exceptions-throw-2.C: Likewise. + * g++.target/nvptx/exceptions-bad_cast-2.C: Likewise. + * g++.target/nvptx/exceptions-pr118794-1.C: Likewise. + * g++.target/nvptx/exceptions-throw-2.C: Likewise. + * g++.target/gcn/exceptions-bad_cast-2_-mfake-exceptions.C: New. + * g++.target/gcn/exceptions-pr118794-1_-mfake-exceptions.C: + Likewise. + * g++.target/gcn/exceptions-throw-2_-mfake-exceptions.C: Likewise. + * g++.target/nvptx/exceptions-bad_cast-2_-mfake-exceptions.C: + Likewise. + * g++.target/nvptx/exceptions-pr118794-1_-mfake-exceptions.C: + Likewise. + * g++.target/nvptx/exceptions-throw-2_-mfake-exceptions.C: + Likewise. + +2025-04-14 Thomas Schwinge <tschwinge@baylibre.com> + + * g++.target/gcn/exceptions-throw-3.C: New. + * g++.target/nvptx/exceptions-throw-3.C: Likewise. + +2025-04-14 Thomas Schwinge <tschwinge@baylibre.com> + + * g++.target/gcn/exceptions-throw-2.C: New. + * g++.target/nvptx/exceptions-throw-2.C: Likewise. + +2025-04-14 Thomas Schwinge <tschwinge@baylibre.com> + + * g++.target/gcn/exceptions-throw-1.C: New. + * g++.target/nvptx/exceptions-throw-1.C: Likewise. + +2025-04-14 Thomas Schwinge <tschwinge@baylibre.com> + + * g++.target/gcn/exceptions-bad_cast-3.C: New. + * g++.target/nvptx/exceptions-bad_cast-3.C: Likewise. + +2025-04-14 Thomas Schwinge <tschwinge@baylibre.com> + + * g++.target/gcn/exceptions-bad_cast-2.C: New. + * g++.target/nvptx/exceptions-bad_cast-2.C: Likewise. + +2025-04-14 Thomas Schwinge <tschwinge@baylibre.com> + + * g++.target/gcn/exceptions-bad_cast-1.C: New. + * g++.target/nvptx/exceptions-bad_cast-1.C: Likewise. + +2025-04-14 Thomas Schwinge <tschwinge@baylibre.com> + + PR target/118794 + * g++.target/gcn/exceptions-pr118794-1.C: New. + * g++.target/nvptx/exceptions-pr118794-1.C: Likewise. + +2025-04-14 Thomas Schwinge <tschwinge@baylibre.com> + + PR c++/119692 + * g++.target/gcn/pr119692-1-1.C: New. + * g++.target/nvptx/pr119692-1-1.C: Likewise. + +2025-04-14 Thomas Schwinge <tschwinge@baylibre.com> + + * g++.target/gcn/gcn.exp: New. + +2025-04-14 Thomas Schwinge <tschwinge@baylibre.com> + + * lib/gcc-dg.exp (${tool}_load): Polish 'dg-output-file' test + logs. + +2025-04-14 Jakub Jelinek <jakub@redhat.com> + + PR ipa/119318 + * gcc.dg/ipa/pr119530.c (d): Change type from char to signed char. + (e): Change argument type from long to long long. + +2025-04-14 beamandala <mandalapubhavesh@gmail.com> + + * rust/compile/track_caller.rs: New test. + +2025-04-14 Owen Avery <powerboat9.gamer@gmail.com> + + * rust/compile/enum_discriminant2.rs: New test. + +2025-04-14 Arthur Cohen <arthur.cohen@embecosm.com> + + * rust/compile/format_args_extra_comma.rs: New test. + +2025-04-14 Arthur Cohen <arthur.cohen@embecosm.com> + + * rust/compile/macros/mbe/macro-issue3709-1.rs: New test. + * rust/compile/macros/mbe/macro-issue3709-2.rs: New test. + * rust/compile/macros/mbe/macro-issue3693.rs: New file. + +2025-04-14 Arthur Cohen <arthur.cohen@embecosm.com> + + * rust/compile/macros/mbe/macro-issue3708.rs: New test. + +2025-04-14 Arthur Cohen <arthur.cohen@embecosm.com> + + * rust/execute/torture/min_specialization2.rs: New test. + * rust/execute/torture/min_specialization3.rs: New test. + +2025-04-14 Andrew Pinski <quic_apinski@quicinc.com> + + PR tree-optimization/118476 + * gcc.dg/torture/pr118476-1.c: New test. + +2025-04-14 Patrick Palka <ppalka@redhat.com> + + PR c++/99214 + * g++.dg/concepts/diagnostic20.C: New test. + +2025-04-14 H.J. Lu <hjl.tools@gmail.com> + + PR target/119784 + * gcc.target/i386/pr119784a.c: New test. + * gcc.target/i386/pr119784b.c: Likewise. + +2025-04-14 Martin Jambor <mjambor@suse.cz> + + PR ipa/119318 + * gcc.dg/ipa/pr119318.c: New test. + * gcc.dg/ipa/pr119530.c: Likwise. + +2025-04-14 Richard Biener <rguenther@suse.de> + + PR tree-optimization/119757 + * gcc.dg/vect/pr119757.c: New testcase. + +2025-04-14 Richard Biener <rguenther@suse.de> + + PR tree-optimization/119778 + * g++.dg/torture/pr119778.C: New testcase. + +2025-04-14 Gaius Mulley <gaiusmod2@gmail.com> + + PR modula2/119779 + * gm2.dg/doc/examples/pass/doc-examples-pass.exp: New test. + * gm2.dg/doc/examples/pass/exampleadd.mod: New test. + * gm2.dg/doc/examples/pass/exampleadd2.mod: New test. + * gm2.dg/doc/examples/pass/hello.mod: New test. + * gm2.dg/doc/examples/pass/hellopim.mod: New test. + +2025-04-14 Eric Botcazou <ebotcazou@adacore.com> + + PR lto/119792 + * gnat.dg/lto29.adb: New test. + * gnat.dg/lto29_pkg.ads: New helper. + 2025-04-13 Jerry DeLisle <jvdelisle@gcc.gnu.org> PR libfortran/119502 diff --git a/gcc/testsuite/c-c++-common/tsan/pr119801.c b/gcc/testsuite/c-c++-common/tsan/pr119801.c new file mode 100644 index 0000000..d3a6bb4 --- /dev/null +++ b/gcc/testsuite/c-c++-common/tsan/pr119801.c @@ -0,0 +1,24 @@ +/* PR sanitizer/119801 */ +/* { dg-do compile } */ +/* { dg-options "-fsanitize=thread" } */ + +[[gnu::noipa]] int +bar (int *p) +{ + return ++*p; +} + +int +foo (int *p) +{ + ++*p; + [[gnu::musttail]] return bar (p); +} + +[[gnu::noinline]] int +baz (int x) +{ + if (x < 10) + return x; + [[gnu::musttail]] return baz (x - 2); +} diff --git a/gcc/testsuite/g++.dg/cpp2a/constexpr-vector1.C b/gcc/testsuite/g++.dg/cpp2a/constexpr-vector1.C new file mode 100644 index 0000000..196c6ec --- /dev/null +++ b/gcc/testsuite/g++.dg/cpp2a/constexpr-vector1.C @@ -0,0 +1,8 @@ +// PR c++/113835 +// { dg-timeout-factor 0.05 } +// { dg-do compile { target c++20_only } } + +#include <vector> +const std::size_t N = 1'000'000; +std::vector<int> x(N); +int main() {} diff --git a/gcc/testsuite/g++.dg/modules/lambda-10_a.H b/gcc/testsuite/g++.dg/modules/lambda-10_a.H new file mode 100644 index 0000000..1ad1a80 --- /dev/null +++ b/gcc/testsuite/g++.dg/modules/lambda-10_a.H @@ -0,0 +1,17 @@ +// PR c++/119755 +// { dg-additional-options "-fmodule-header" } +// { dg-module-cmi {} } + +template <typename _Out> void format(_Out) { + constexpr int __term = 1; + [&] { __term; }; + [&] { const int outer = __term; { __term; } }; + [&]() noexcept { __term; }; + [&]() noexcept { const int outer = __term; { __term; } }; + [&](auto) { int n[__term]; }(0); + [&](auto) noexcept { int n[__term]; }(0); +} + +inline void vformat() { + format(0); +} diff --git a/gcc/testsuite/g++.dg/modules/lambda-10_b.C b/gcc/testsuite/g++.dg/modules/lambda-10_b.C new file mode 100644 index 0000000..3556bce --- /dev/null +++ b/gcc/testsuite/g++.dg/modules/lambda-10_b.C @@ -0,0 +1,7 @@ +// PR c++/119755 +// { dg-additional-options "-fmodules" } + +import "lambda-10_a.H"; +int main() { + vformat(); +} diff --git a/gcc/testsuite/g++.dg/template/friend86.C b/gcc/testsuite/g++.dg/template/friend86.C new file mode 100644 index 0000000..9e2c1af --- /dev/null +++ b/gcc/testsuite/g++.dg/template/friend86.C @@ -0,0 +1,25 @@ +// PR c++/119807 +// { dg-do run } + +template<int N> +struct A { + template<class T> friend int f(A<N>, T); +}; + +template struct A<0>; +template struct A<1>; + +int main() { + A<0> x; + A<1> y; + if (f(x, true) != 0) __builtin_abort(); + if (f(y, true) != 1) __builtin_abort(); +} + +template<int N> +struct B { + template<class T> friend int f(A<N>, T) { return N; } +}; + +template struct B<0>; +template struct B<1>; diff --git a/gcc/testsuite/g++.dg/template/friend87.C b/gcc/testsuite/g++.dg/template/friend87.C new file mode 100644 index 0000000..94c0dfc --- /dev/null +++ b/gcc/testsuite/g++.dg/template/friend87.C @@ -0,0 +1,42 @@ +// PR c++/119807 +// { dg-do compile { target c++20 } } + +using size_t = decltype(sizeof(0)); + +template<auto tag, size_t current> +struct CounterReader { + template<typename> + friend auto counterFlag(CounterReader<tag, current>) noexcept; +}; + +template<auto tag, size_t current> +struct CounterWriter { + static constexpr size_t value = current; + + template<typename> + friend auto counterFlag(CounterReader<tag, current>) noexcept {} +}; + +template<auto tag, auto unique, size_t current = 0, size_t mask = size_t(1) << (sizeof(size_t) * 8 - 1)> +[[nodiscard]] constexpr size_t counterAdvance() noexcept { + if constexpr (!mask) { + return CounterWriter<tag, current + 1>::value; + } else if constexpr (requires { counterFlag<void>(CounterReader<tag, current | mask>()); }) { + return counterAdvance<tag, unique, current | mask, (mask >> 1)>(); + } + else { + return counterAdvance<tag, unique, current, (mask >> 1)>(); + } +} + +constexpr auto defaultCounterTag = [] {}; + +template<auto tag = defaultCounterTag, auto unique = [] {}> +constexpr size_t counter() noexcept { + return counterAdvance<tag, unique>(); +} + +int main() { + static_assert(counter() == 1); + static_assert(counter() == 2); +} diff --git a/gcc/testsuite/g++.target/gcn/exceptions-bad_cast-1.C b/gcc/testsuite/g++.target/gcn/exceptions-bad_cast-1.C new file mode 100644 index 0000000..f3e3099 --- /dev/null +++ b/gcc/testsuite/g++.target/gcn/exceptions-bad_cast-1.C @@ -0,0 +1,15 @@ +/* 'std::bad_cast' exception. */ + +/* { dg-do run } */ +/* Via the magic string "-std=*++" indicate that testing one (the default) C++ standard is sufficient. */ +/* { dg-additional-options -fexceptions } */ +/* { dg-additional-options -fdump-tree-optimized-raw } */ + +#include "../../../../libgomp/testsuite/libgomp.oacc-c++/exceptions-bad_cast-1.C" + +/* { dg-output {CheCKpOInT[\r\n]+} } + + { dg-final { scan-tree-dump-times {gimple_call <__cxa_bad_cast, } 1 optimized } } + We don't print anything, but just 'abort'. + + { dg-shouldfail {'std::bad_cast' exception} } */ diff --git a/gcc/testsuite/g++.target/gcn/exceptions-bad_cast-2.C b/gcc/testsuite/g++.target/gcn/exceptions-bad_cast-2.C new file mode 100644 index 0000000..b047cbed --- /dev/null +++ b/gcc/testsuite/g++.target/gcn/exceptions-bad_cast-2.C @@ -0,0 +1,13 @@ +/* 'std::bad_cast' exception, caught. */ + +/* Via the magic string "-std=*++" indicate that testing one (the default) C++ standard is sufficient. */ +/* { dg-additional-options -fexceptions } */ +/* { dg-additional-options -mno-fake-exceptions } */ +/* { dg-additional-options -fdump-tree-optimized-raw } */ + +#include "../../../../libgomp/testsuite/libgomp.oacc-c++/exceptions-bad_cast-2.C" + +/* { dg-final { scan-tree-dump-times {gimple_call <__cxa_bad_cast, } 1 optimized } } + Compilation fails: + { dg-regexp {[^\r\n]+: In function 'int main\(\)':[\r\n]+(?:[^\r\n]+: sorry, unimplemented: exception handling not supported[\r\n]+)+} } + (Note, using 'dg-regexp' instead of 'dg-message', as the former runs before the auto-mark-UNSUPPORTED.) */ diff --git a/gcc/testsuite/g++.target/gcn/exceptions-bad_cast-2_-mfake-exceptions.C b/gcc/testsuite/g++.target/gcn/exceptions-bad_cast-2_-mfake-exceptions.C new file mode 100644 index 0000000..2904188 --- /dev/null +++ b/gcc/testsuite/g++.target/gcn/exceptions-bad_cast-2_-mfake-exceptions.C @@ -0,0 +1,18 @@ +/* 'std::bad_cast' exception, caught, '-mfake-exceptions'. */ + +/* { dg-do run } */ +/* Via the magic string "-std=*++" indicate that testing one (the default) C++ standard is sufficient. */ +/* { dg-additional-options -fexceptions } */ +/* { dg-additional-options -mfake-exceptions } + { dg-bogus {sorry, unimplemented: exception handling not supported} {} { target *-*-* } 0 } */ +/* { dg-additional-options -fdump-tree-optimized-raw } */ + +#include "exceptions-bad_cast-2.C" + +/* { dg-output {CheCKpOInT[\r\n]+} } + + { dg-final { scan-tree-dump-times {gimple_call <__cxa_bad_cast, } 1 optimized } } + We don't print anything, but just 'abort'. + + There is no 'catch'ing; any exception is fatal. + { dg-shouldfail {'std::bad_cast' exception} } */ diff --git a/gcc/testsuite/g++.target/gcn/exceptions-bad_cast-3.C b/gcc/testsuite/g++.target/gcn/exceptions-bad_cast-3.C new file mode 100644 index 0000000..3d0118c --- /dev/null +++ b/gcc/testsuite/g++.target/gcn/exceptions-bad_cast-3.C @@ -0,0 +1,10 @@ +/* 'std::bad_cast' exception, dead code. */ + +/* { dg-do run } */ +/* Via the magic string "-std=*++" indicate that testing one (the default) C++ standard is sufficient. */ +/* { dg-additional-options -fexceptions } */ +/* { dg-additional-options -fdump-tree-optimized-raw } */ + +#include "../../../../libgomp/testsuite/libgomp.oacc-c++/exceptions-bad_cast-3.C" + +/* { dg-final { scan-tree-dump-times {gimple_call <__cxa_bad_cast, } 1 optimized } } */ diff --git a/gcc/testsuite/g++.target/gcn/exceptions-pr118794-1.C b/gcc/testsuite/g++.target/gcn/exceptions-pr118794-1.C new file mode 100644 index 0000000..20f9d49 --- /dev/null +++ b/gcc/testsuite/g++.target/gcn/exceptions-pr118794-1.C @@ -0,0 +1,17 @@ +/* Exception handling constructs in dead code. */ + +/* Via the magic string "-std=*++" indicate that testing one (the default) C++ standard is sufficient. */ +/* { dg-additional-options -fexceptions } */ +/* { dg-additional-options -mno-fake-exceptions } */ +/* { dg-additional-options -O0 } */ +/* { dg-additional-options -fdump-tree-optimized-raw } */ + +#include "../../../../libgomp/testsuite/libgomp.c++/target-exceptions-pr118794-1.C" + +/* In this specific C++ arrangement, distilled from PR118794, GCC synthesizes + '__builtin_eh_pointer', '__builtin_unwind_resume' calls as dead code in 'f': + { dg-final { scan-tree-dump-times {gimple_call <__builtin_eh_pointer, } 1 optimized } } + { dg-final { scan-tree-dump-times {gimple_call <__builtin_unwind_resume, } 1 optimized } } + Given '-O0', compilation fails: + { dg-regexp {[^\r\n]+: In function 'void f\(\)':[\r\n]+(?:[^\r\n]+: sorry, unimplemented: exception handling not supported[\r\n]+)+} } + (Note, using 'dg-regexp' instead of 'dg-message', as the former runs before the auto-mark-UNSUPPORTED.) */ diff --git a/gcc/testsuite/g++.target/gcn/exceptions-pr118794-1_-mfake-exceptions.C b/gcc/testsuite/g++.target/gcn/exceptions-pr118794-1_-mfake-exceptions.C new file mode 100644 index 0000000..a5f0da2 --- /dev/null +++ b/gcc/testsuite/g++.target/gcn/exceptions-pr118794-1_-mfake-exceptions.C @@ -0,0 +1,16 @@ +/* Exception handling constructs in dead code, '-mfake-exceptions'. */ + +/* { dg-do run } */ +/* Via the magic string "-std=*++" indicate that testing one (the default) C++ standard is sufficient. */ +/* { dg-additional-options -fexceptions } */ +/* { dg-additional-options -mfake-exceptions } + { dg-bogus {sorry, unimplemented: exception handling not supported} {} { target *-*-* } 0 } */ +/* { dg-additional-options -O0 } */ +/* { dg-additional-options -fdump-tree-optimized-raw } */ + +#include "exceptions-pr118794-1.C" + +/* In this specific C++ arrangement, distilled from PR118794, GCC synthesizes + '__builtin_eh_pointer', '__builtin_unwind_resume' calls as dead code in 'f': + { dg-final { scan-tree-dump-times {gimple_call <__builtin_eh_pointer, } 1 optimized } } + { dg-final { scan-tree-dump-times {gimple_call <__builtin_unwind_resume, } 1 optimized } } */ diff --git a/gcc/testsuite/g++.target/gcn/exceptions-throw-1.C b/gcc/testsuite/g++.target/gcn/exceptions-throw-1.C new file mode 100644 index 0000000..6cadf58 --- /dev/null +++ b/gcc/testsuite/g++.target/gcn/exceptions-throw-1.C @@ -0,0 +1,16 @@ +/* 'throw'. */ + +/* { dg-do run } */ +/* Via the magic string "-std=*++" indicate that testing one (the default) C++ standard is sufficient. */ +/* { dg-additional-options -fexceptions } */ +/* { dg-additional-options -fdump-tree-optimized-raw } */ + +#include "../../../../libgomp/testsuite/libgomp.oacc-c++/exceptions-throw-1.C" + +/* { dg-output {CheCKpOInT[\r\n]+} } + + { dg-final { scan-tree-dump-times {gimple_call <__cxa_allocate_exception, } 1 optimized } } + { dg-final { scan-tree-dump-times {gimple_call <__cxa_throw, } 1 optimized } } + We don't print anything, but just 'abort'. + + { dg-shouldfail {'MyException' exception} } */ diff --git a/gcc/testsuite/g++.target/gcn/exceptions-throw-2.C b/gcc/testsuite/g++.target/gcn/exceptions-throw-2.C new file mode 100644 index 0000000..671c810 --- /dev/null +++ b/gcc/testsuite/g++.target/gcn/exceptions-throw-2.C @@ -0,0 +1,14 @@ +/* 'throw', caught. */ + +/* Via the magic string "-std=*++" indicate that testing one (the default) C++ standard is sufficient. */ +/* { dg-additional-options -fexceptions } */ +/* { dg-additional-options -mno-fake-exceptions } */ +/* { dg-additional-options -fdump-tree-optimized-raw } */ + +#include "../../../../libgomp/testsuite/libgomp.oacc-c++/exceptions-throw-2.C" + +/* { dg-final { scan-tree-dump-times {gimple_call <__cxa_allocate_exception, } 1 optimized } } + { dg-final { scan-tree-dump-times {gimple_call <__cxa_throw, } 1 optimized } } + Compilation fails: + { dg-regexp {[^\r\n]+: In function 'int main\(\)':[\r\n]+(?:[^\r\n]+: sorry, unimplemented: exception handling not supported[\r\n]+)+} } + (Note, using 'dg-regexp' instead of 'dg-message', as the former runs before the auto-mark-UNSUPPORTED.) */ diff --git a/gcc/testsuite/g++.target/gcn/exceptions-throw-2_-mfake-exceptions.C b/gcc/testsuite/g++.target/gcn/exceptions-throw-2_-mfake-exceptions.C new file mode 100644 index 0000000..f1fd505 --- /dev/null +++ b/gcc/testsuite/g++.target/gcn/exceptions-throw-2_-mfake-exceptions.C @@ -0,0 +1,19 @@ +/* 'throw', caught, '-mfake-exceptions'. */ + +/* { dg-do run } */ +/* Via the magic string "-std=*++" indicate that testing one (the default) C++ standard is sufficient. */ +/* { dg-additional-options -fexceptions } */ +/* { dg-additional-options -mfake-exceptions } + { dg-bogus {sorry, unimplemented: exception handling not supported} {} { target *-*-* } 0 } */ +/* { dg-additional-options -fdump-tree-optimized-raw } */ + +#include "exceptions-throw-2.C" + +/* { dg-output {CheCKpOInT[\r\n]+} } + + { dg-final { scan-tree-dump-times {gimple_call <__cxa_allocate_exception, } 1 optimized } } + { dg-final { scan-tree-dump-times {gimple_call <__cxa_throw, } 1 optimized } } + We don't print anything, but just 'abort'. + + There is no 'catch'ing; any exception is fatal. + { dg-shouldfail {'MyException' exception} } */ diff --git a/gcc/testsuite/g++.target/gcn/exceptions-throw-3.C b/gcc/testsuite/g++.target/gcn/exceptions-throw-3.C new file mode 100644 index 0000000..5c1ad7a --- /dev/null +++ b/gcc/testsuite/g++.target/gcn/exceptions-throw-3.C @@ -0,0 +1,11 @@ +/* 'throw', dead code. */ + +/* { dg-do run } */ +/* Via the magic string "-std=*++" indicate that testing one (the default) C++ standard is sufficient. */ +/* { dg-additional-options -fexceptions } */ +/* { dg-additional-options -fdump-tree-optimized-raw } */ + +#include "../../../../libgomp/testsuite/libgomp.oacc-c++/exceptions-throw-3.C" + +/* { dg-final { scan-tree-dump-times {gimple_call <__cxa_allocate_exception, } 1 optimized } } + { dg-final { scan-tree-dump-times {gimple_call <__cxa_throw, } 1 optimized } } */ diff --git a/gcc/testsuite/g++.target/gcn/gcn.exp b/gcc/testsuite/g++.target/gcn/gcn.exp new file mode 100644 index 0000000..a3bd75f --- /dev/null +++ b/gcc/testsuite/g++.target/gcn/gcn.exp @@ -0,0 +1,56 @@ +# Specific regression driver for GCN. +# Copyright (C) 2000-2025 Free Software Foundation, Inc. + +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCC; see the file COPYING3. If not see +# <http://www.gnu.org/licenses/>. + +# G++ testsuite that uses the `dg.exp' driver. + +# Exit immediately if this isn't a GCN target. +if ![istarget amdgcn*-*-*] then { + return +} + +# Load support procs. +load_lib g++-dg.exp + +# If a testcase doesn't have special options, use these. +global DEFAULT_CXXFLAGS +if ![info exists DEFAULT_CXXFLAGS] then { + set DEFAULT_CXXFLAGS " -pedantic-errors -Wno-long-long" +} + +# Initialize `dg'. +dg-init + +# Recursively find files in $dir and subdirs, do not walk into subdirs +# that contain their own .exp file. +proc find-cxx-tests { dir suffix } { + set tests [lsort [glob -nocomplain -directory $dir "*.$suffix" ]] + foreach subdir [lsort [glob -nocomplain -type d -directory $dir *]] { + if { [glob -nocomplain -directory $subdir *.exp] eq "" } { + eval lappend tests [find-cxx-tests $subdir $suffix] + } + } + return $tests +} + +set tests [find-cxx-tests $srcdir/$subdir {C}] + +# Main loop. +g++-dg-runtest $tests "" $DEFAULT_CXXFLAGS + + +# All done. +dg-finish diff --git a/gcc/testsuite/g++.target/gcn/pr119692-1-1.C b/gcc/testsuite/g++.target/gcn/pr119692-1-1.C new file mode 100644 index 0000000..b44b08d --- /dev/null +++ b/gcc/testsuite/g++.target/gcn/pr119692-1-1.C @@ -0,0 +1,6 @@ +/* PR119692 "C++ 'typeinfo', 'vtable' vs. OpenACC, OpenMP 'target' offloading" */ + +/* { dg-do run } */ +/* Via the magic string "-std=*++" indicate that testing one (the default) C++ standard is sufficient. */ + +#include "../../../../libgomp/testsuite/libgomp.oacc-c++/pr119692-1-1.C" diff --git a/gcc/testsuite/g++.target/nvptx/exceptions-bad_cast-1.C b/gcc/testsuite/g++.target/nvptx/exceptions-bad_cast-1.C new file mode 100644 index 0000000..f3e3099 --- /dev/null +++ b/gcc/testsuite/g++.target/nvptx/exceptions-bad_cast-1.C @@ -0,0 +1,15 @@ +/* 'std::bad_cast' exception. */ + +/* { dg-do run } */ +/* Via the magic string "-std=*++" indicate that testing one (the default) C++ standard is sufficient. */ +/* { dg-additional-options -fexceptions } */ +/* { dg-additional-options -fdump-tree-optimized-raw } */ + +#include "../../../../libgomp/testsuite/libgomp.oacc-c++/exceptions-bad_cast-1.C" + +/* { dg-output {CheCKpOInT[\r\n]+} } + + { dg-final { scan-tree-dump-times {gimple_call <__cxa_bad_cast, } 1 optimized } } + We don't print anything, but just 'abort'. + + { dg-shouldfail {'std::bad_cast' exception} } */ diff --git a/gcc/testsuite/g++.target/nvptx/exceptions-bad_cast-2.C b/gcc/testsuite/g++.target/nvptx/exceptions-bad_cast-2.C new file mode 100644 index 0000000..b047cbed --- /dev/null +++ b/gcc/testsuite/g++.target/nvptx/exceptions-bad_cast-2.C @@ -0,0 +1,13 @@ +/* 'std::bad_cast' exception, caught. */ + +/* Via the magic string "-std=*++" indicate that testing one (the default) C++ standard is sufficient. */ +/* { dg-additional-options -fexceptions } */ +/* { dg-additional-options -mno-fake-exceptions } */ +/* { dg-additional-options -fdump-tree-optimized-raw } */ + +#include "../../../../libgomp/testsuite/libgomp.oacc-c++/exceptions-bad_cast-2.C" + +/* { dg-final { scan-tree-dump-times {gimple_call <__cxa_bad_cast, } 1 optimized } } + Compilation fails: + { dg-regexp {[^\r\n]+: In function 'int main\(\)':[\r\n]+(?:[^\r\n]+: sorry, unimplemented: exception handling not supported[\r\n]+)+} } + (Note, using 'dg-regexp' instead of 'dg-message', as the former runs before the auto-mark-UNSUPPORTED.) */ diff --git a/gcc/testsuite/g++.target/nvptx/exceptions-bad_cast-2_-mfake-exceptions.C b/gcc/testsuite/g++.target/nvptx/exceptions-bad_cast-2_-mfake-exceptions.C new file mode 100644 index 0000000..3f40951 --- /dev/null +++ b/gcc/testsuite/g++.target/nvptx/exceptions-bad_cast-2_-mfake-exceptions.C @@ -0,0 +1,19 @@ +/* 'std::bad_cast' exception, caught, '-mfake-exceptions'. */ + +/* { dg-do run } */ +/* Via the magic string "-std=*++" indicate that testing one (the default) C++ standard is sufficient. */ +/* { dg-additional-options -fexceptions } */ +/* { dg-additional-options -mfake-exceptions } + { dg-bogus {sorry, unimplemented: exception handling not supported} {} { target *-*-* } 0 } */ +/* { dg-additional-options -fdump-tree-optimized-raw } */ +/* { dg-bogus {_ZTISt8bad_cast} PR119734 { xfail *-*-* } 0 } */ + +#include "exceptions-bad_cast-2.C" + +/* { dg-output {CheCKpOInT[\r\n]+} } + + { dg-final { scan-tree-dump-times {gimple_call <__cxa_bad_cast, } 1 optimized } } + We don't print anything, but just 'abort'. + + There is no 'catch'ing; any exception is fatal. + { dg-shouldfail {'std::bad_cast' exception} } */ diff --git a/gcc/testsuite/g++.target/nvptx/exceptions-bad_cast-3.C b/gcc/testsuite/g++.target/nvptx/exceptions-bad_cast-3.C new file mode 100644 index 0000000..3d0118c --- /dev/null +++ b/gcc/testsuite/g++.target/nvptx/exceptions-bad_cast-3.C @@ -0,0 +1,10 @@ +/* 'std::bad_cast' exception, dead code. */ + +/* { dg-do run } */ +/* Via the magic string "-std=*++" indicate that testing one (the default) C++ standard is sufficient. */ +/* { dg-additional-options -fexceptions } */ +/* { dg-additional-options -fdump-tree-optimized-raw } */ + +#include "../../../../libgomp/testsuite/libgomp.oacc-c++/exceptions-bad_cast-3.C" + +/* { dg-final { scan-tree-dump-times {gimple_call <__cxa_bad_cast, } 1 optimized } } */ diff --git a/gcc/testsuite/g++.target/nvptx/exceptions-pr118794-1.C b/gcc/testsuite/g++.target/nvptx/exceptions-pr118794-1.C new file mode 100644 index 0000000..20f9d49 --- /dev/null +++ b/gcc/testsuite/g++.target/nvptx/exceptions-pr118794-1.C @@ -0,0 +1,17 @@ +/* Exception handling constructs in dead code. */ + +/* Via the magic string "-std=*++" indicate that testing one (the default) C++ standard is sufficient. */ +/* { dg-additional-options -fexceptions } */ +/* { dg-additional-options -mno-fake-exceptions } */ +/* { dg-additional-options -O0 } */ +/* { dg-additional-options -fdump-tree-optimized-raw } */ + +#include "../../../../libgomp/testsuite/libgomp.c++/target-exceptions-pr118794-1.C" + +/* In this specific C++ arrangement, distilled from PR118794, GCC synthesizes + '__builtin_eh_pointer', '__builtin_unwind_resume' calls as dead code in 'f': + { dg-final { scan-tree-dump-times {gimple_call <__builtin_eh_pointer, } 1 optimized } } + { dg-final { scan-tree-dump-times {gimple_call <__builtin_unwind_resume, } 1 optimized } } + Given '-O0', compilation fails: + { dg-regexp {[^\r\n]+: In function 'void f\(\)':[\r\n]+(?:[^\r\n]+: sorry, unimplemented: exception handling not supported[\r\n]+)+} } + (Note, using 'dg-regexp' instead of 'dg-message', as the former runs before the auto-mark-UNSUPPORTED.) */ diff --git a/gcc/testsuite/g++.target/nvptx/exceptions-pr118794-1_-mfake-exceptions.C b/gcc/testsuite/g++.target/nvptx/exceptions-pr118794-1_-mfake-exceptions.C new file mode 100644 index 0000000..a5f0da2 --- /dev/null +++ b/gcc/testsuite/g++.target/nvptx/exceptions-pr118794-1_-mfake-exceptions.C @@ -0,0 +1,16 @@ +/* Exception handling constructs in dead code, '-mfake-exceptions'. */ + +/* { dg-do run } */ +/* Via the magic string "-std=*++" indicate that testing one (the default) C++ standard is sufficient. */ +/* { dg-additional-options -fexceptions } */ +/* { dg-additional-options -mfake-exceptions } + { dg-bogus {sorry, unimplemented: exception handling not supported} {} { target *-*-* } 0 } */ +/* { dg-additional-options -O0 } */ +/* { dg-additional-options -fdump-tree-optimized-raw } */ + +#include "exceptions-pr118794-1.C" + +/* In this specific C++ arrangement, distilled from PR118794, GCC synthesizes + '__builtin_eh_pointer', '__builtin_unwind_resume' calls as dead code in 'f': + { dg-final { scan-tree-dump-times {gimple_call <__builtin_eh_pointer, } 1 optimized } } + { dg-final { scan-tree-dump-times {gimple_call <__builtin_unwind_resume, } 1 optimized } } */ diff --git a/gcc/testsuite/g++.target/nvptx/exceptions-throw-1.C b/gcc/testsuite/g++.target/nvptx/exceptions-throw-1.C new file mode 100644 index 0000000..6cadf58 --- /dev/null +++ b/gcc/testsuite/g++.target/nvptx/exceptions-throw-1.C @@ -0,0 +1,16 @@ +/* 'throw'. */ + +/* { dg-do run } */ +/* Via the magic string "-std=*++" indicate that testing one (the default) C++ standard is sufficient. */ +/* { dg-additional-options -fexceptions } */ +/* { dg-additional-options -fdump-tree-optimized-raw } */ + +#include "../../../../libgomp/testsuite/libgomp.oacc-c++/exceptions-throw-1.C" + +/* { dg-output {CheCKpOInT[\r\n]+} } + + { dg-final { scan-tree-dump-times {gimple_call <__cxa_allocate_exception, } 1 optimized } } + { dg-final { scan-tree-dump-times {gimple_call <__cxa_throw, } 1 optimized } } + We don't print anything, but just 'abort'. + + { dg-shouldfail {'MyException' exception} } */ diff --git a/gcc/testsuite/g++.target/nvptx/exceptions-throw-2.C b/gcc/testsuite/g++.target/nvptx/exceptions-throw-2.C new file mode 100644 index 0000000..671c810 --- /dev/null +++ b/gcc/testsuite/g++.target/nvptx/exceptions-throw-2.C @@ -0,0 +1,14 @@ +/* 'throw', caught. */ + +/* Via the magic string "-std=*++" indicate that testing one (the default) C++ standard is sufficient. */ +/* { dg-additional-options -fexceptions } */ +/* { dg-additional-options -mno-fake-exceptions } */ +/* { dg-additional-options -fdump-tree-optimized-raw } */ + +#include "../../../../libgomp/testsuite/libgomp.oacc-c++/exceptions-throw-2.C" + +/* { dg-final { scan-tree-dump-times {gimple_call <__cxa_allocate_exception, } 1 optimized } } + { dg-final { scan-tree-dump-times {gimple_call <__cxa_throw, } 1 optimized } } + Compilation fails: + { dg-regexp {[^\r\n]+: In function 'int main\(\)':[\r\n]+(?:[^\r\n]+: sorry, unimplemented: exception handling not supported[\r\n]+)+} } + (Note, using 'dg-regexp' instead of 'dg-message', as the former runs before the auto-mark-UNSUPPORTED.) */ diff --git a/gcc/testsuite/g++.target/nvptx/exceptions-throw-2_-mfake-exceptions.C b/gcc/testsuite/g++.target/nvptx/exceptions-throw-2_-mfake-exceptions.C new file mode 100644 index 0000000..f1fd505 --- /dev/null +++ b/gcc/testsuite/g++.target/nvptx/exceptions-throw-2_-mfake-exceptions.C @@ -0,0 +1,19 @@ +/* 'throw', caught, '-mfake-exceptions'. */ + +/* { dg-do run } */ +/* Via the magic string "-std=*++" indicate that testing one (the default) C++ standard is sufficient. */ +/* { dg-additional-options -fexceptions } */ +/* { dg-additional-options -mfake-exceptions } + { dg-bogus {sorry, unimplemented: exception handling not supported} {} { target *-*-* } 0 } */ +/* { dg-additional-options -fdump-tree-optimized-raw } */ + +#include "exceptions-throw-2.C" + +/* { dg-output {CheCKpOInT[\r\n]+} } + + { dg-final { scan-tree-dump-times {gimple_call <__cxa_allocate_exception, } 1 optimized } } + { dg-final { scan-tree-dump-times {gimple_call <__cxa_throw, } 1 optimized } } + We don't print anything, but just 'abort'. + + There is no 'catch'ing; any exception is fatal. + { dg-shouldfail {'MyException' exception} } */ diff --git a/gcc/testsuite/g++.target/nvptx/exceptions-throw-3.C b/gcc/testsuite/g++.target/nvptx/exceptions-throw-3.C new file mode 100644 index 0000000..5c1ad7a --- /dev/null +++ b/gcc/testsuite/g++.target/nvptx/exceptions-throw-3.C @@ -0,0 +1,11 @@ +/* 'throw', dead code. */ + +/* { dg-do run } */ +/* Via the magic string "-std=*++" indicate that testing one (the default) C++ standard is sufficient. */ +/* { dg-additional-options -fexceptions } */ +/* { dg-additional-options -fdump-tree-optimized-raw } */ + +#include "../../../../libgomp/testsuite/libgomp.oacc-c++/exceptions-throw-3.C" + +/* { dg-final { scan-tree-dump-times {gimple_call <__cxa_allocate_exception, } 1 optimized } } + { dg-final { scan-tree-dump-times {gimple_call <__cxa_throw, } 1 optimized } } */ diff --git a/gcc/testsuite/g++.target/nvptx/pr119692-1-1.C b/gcc/testsuite/g++.target/nvptx/pr119692-1-1.C new file mode 100644 index 0000000..b44b08d --- /dev/null +++ b/gcc/testsuite/g++.target/nvptx/pr119692-1-1.C @@ -0,0 +1,6 @@ +/* PR119692 "C++ 'typeinfo', 'vtable' vs. OpenACC, OpenMP 'target' offloading" */ + +/* { dg-do run } */ +/* Via the magic string "-std=*++" indicate that testing one (the default) C++ standard is sufficient. */ + +#include "../../../../libgomp/testsuite/libgomp.oacc-c++/pr119692-1-1.C" diff --git a/gcc/testsuite/g++.target/riscv/rvv/autovec/pr119547-2.C b/gcc/testsuite/g++.target/riscv/rvv/autovec/pr119547-2.C new file mode 100644 index 0000000..1b98d3d --- /dev/null +++ b/gcc/testsuite/g++.target/riscv/rvv/autovec/pr119547-2.C @@ -0,0 +1,212 @@ +/* { dg-do run { target rv64 } } */ +/* { dg-require-effective-target riscv_v_ok } */ +/* { dg-options "-O3 -march=rv64gcv -mabi=lp64d --param=logical-op-non-short-circuit=0" } */ + +#include <riscv_vector.h> + +using v_uint8 = vuint8m2_t; +using v_int8 = vint8m2_t; +using v_uint16 = vuint16m2_t; +using v_int16 = vint16m2_t; +using v_uint32 = vuint32m2_t; +using v_int32 = vint32m2_t; +using v_uint64 = vuint64m2_t; +using v_int64 = vint64m2_t; +using v_float32 = vfloat32m2_t; +using v_float64 = vfloat64m2_t; + +using uchar = unsigned char; +using schar = signed char; +using ushort = unsigned short; +using uint = unsigned int; +using uint64 = unsigned long int; +using int64 = long int; + +struct Size +{ + int width; + int height; +}; + +template <class T> struct VTraits; + +template <> struct VTraits<vint32m1_t> +{ + static inline int vlanes () { return __riscv_vsetvlmax_e32m1 (); } + using lane_type = int32_t; + static const int max_nlanes = 1024 / 32 * 2; +}; +template <> struct VTraits<vint32m2_t> +{ + static inline int vlanes () { return __riscv_vsetvlmax_e32m2 (); } + using lane_type = int32_t; + static const int max_nlanes = 1024 / 32 * 2; +}; +template <> struct VTraits<vint32m4_t> +{ + static inline int vlanes () { return __riscv_vsetvlmax_e32m4 (); } + using lane_type = int32_t; + static const int max_nlanes = 1024 / 32 * 2; +}; +template <> struct VTraits<vint32m8_t> +{ + static inline int vlanes () { return __riscv_vsetvlmax_e32m8 (); } + using lane_type = int32_t; + static const int max_nlanes = 1024 / 32 * 2; +}; + +template <> struct VTraits<vfloat64m1_t> +{ + static inline int vlanes () { return __riscv_vsetvlmax_e64m1 (); } + using lane_type = double; + static const int max_nlanes = 1024 / 64 * 2; +}; +template <> struct VTraits<vfloat64m2_t> +{ + static inline int vlanes () { return __riscv_vsetvlmax_e64m2 (); } + using lane_type = double; + static const int max_nlanes = 1024 / 64 * 2; +}; +template <> struct VTraits<vfloat64m4_t> +{ + static inline int vlanes () { return __riscv_vsetvlmax_e64m4 (); } + using lane_type = double; + static const int max_nlanes = 1024 / 64 * 2; +}; +template <> struct VTraits<vfloat64m8_t> +{ + static inline int vlanes () { return __riscv_vsetvlmax_e64m8 (); } + using lane_type = double; + static const int max_nlanes = 1024 / 64 * 2; +}; + +static inline v_float64 +v_setall_f64 (double v) +{ + return __riscv_vfmv_v_f_f64m2 (v, VTraits<v_float64>::vlanes ()); +} +static inline v_float64 +vx_setall_f64 (double v) +{ + return v_setall_f64 (v); +} + +inline v_int32 +v_load_expand_q (const schar *ptr) +{ + return __riscv_vwcvt_x ( + __riscv_vwcvt_x (__riscv_vle8_v_i8mf2 (ptr, VTraits<v_int32>::vlanes ()), + VTraits<v_int32>::vlanes ()), + VTraits<v_int32>::vlanes ()); +} + +static inline v_int32 +vx_load_expand_q (const schar *ptr) +{ + return v_load_expand_q (ptr); +} + +inline v_float64 +v_cvt_f64 (const v_int32 &a) +{ + return __riscv_vget_f64m2 (__riscv_vfwcvt_f (a, VTraits<v_int32>::vlanes ()), + 0); +} + +inline v_float64 +v_cvt_f64_high (const v_int32 &a) +{ + return __riscv_vget_f64m2 (__riscv_vfwcvt_f (a, VTraits<v_int32>::vlanes ()), + 1); +} + +inline void +v_store (double *ptr, const v_float64 &a) +{ + __riscv_vse64 (ptr, a, VTraits<v_float64>::vlanes ()); +} + +static inline void +v_store_pair_as (double *ptr, const v_float64 &a, const v_float64 &b) +{ + v_store (ptr, a); + v_store (ptr + VTraits<v_float64>::vlanes (), b); +} + +static inline void +vx_load_pair_as (const schar *ptr, v_float64 &a, v_float64 &b) +{ + v_int32 v0 = vx_load_expand_q (ptr); + a = v_cvt_f64 (v0); + b = v_cvt_f64_high (v0); +} + +inline v_float64 +v_fma (const v_float64 &a, const v_float64 &b, const v_float64 &c) +{ + return __riscv_vfmacc_vv_f64m2 (c, a, b, VTraits<v_float64>::vlanes ()); +} + +template <typename _Tp> +static inline _Tp +saturate_cast (double v) +{ + return _Tp (v); +} + +template <typename _Ts, typename _Td> +__attribute__ ((noipa)) void +cvt_64f (const _Ts *src, size_t sstep, _Td *dst, size_t dstep, Size size, + double a, double b) +{ + v_float64 va = vx_setall_f64 (a), vb = vx_setall_f64 (b); + const int VECSZ = VTraits<v_float64>::vlanes () * 2; + + sstep /= sizeof (src[0]); + dstep /= sizeof (dst[0]); + + for (int i = 0; i < size.height; i++, src += sstep, dst += dstep) + { + int j = 0; + + for (; j < size.width; j += VECSZ) + { + if (j > size.width - VECSZ) + { + if (j == 0 || src == (_Ts *) dst) + break; + j = size.width - VECSZ; + } + v_float64 v0, v1; + vx_load_pair_as (src + j, v0, v1); + v0 = v_fma (v0, va, vb); + v1 = v_fma (v1, va, vb); + v_store_pair_as (dst + j, v0, v1); + } + + for (; j < size.width; j++) + dst[j] = saturate_cast<_Td> (src[j] * a + b); + } +} + +void +__attribute__ ((noipa)) +cvtScale8s64f (const uchar *src_, size_t sstep, const uchar *, size_t, + uchar *dst_, size_t dstep, Size size, void *scale_) +{ + const schar *src = (const schar *) src_; + double *dst = (double *) dst_; + double *scale = (double *) scale_; + cvt_64f (src, sstep, dst, dstep, size, (double) scale[0], (double) scale[1]); +} + +int main () +{ + uchar src[1024]; + uchar dst[1024]; + + double scale[2] = {2.0, 3.0}; + Size size {4, 1}; + + cvtScale8s64f (src, 4, NULL, 0, dst, 32, size, (void *)scale); +} diff --git a/gcc/testsuite/g++.target/riscv/rvv/autovec/pr119547.C b/gcc/testsuite/g++.target/riscv/rvv/autovec/pr119547.C new file mode 100644 index 0000000..bac0fb1 --- /dev/null +++ b/gcc/testsuite/g++.target/riscv/rvv/autovec/pr119547.C @@ -0,0 +1,82 @@ +/* { dg-do run { target rv64 } } */ +/* { dg-require-effective-target riscv_v_ok } */ +/* { dg-options "-O3 -march=rv64gcv -mabi=lp64d --param=logical-op-non-short-circuit=0" } */ + +#include <riscv_vector.h> +using v_int32 = vint32m2_t; +using v_float64 = vfloat64m2_t; +struct Size +{ + int width; + int height; +}; +template <class> struct VTraits +{ + static int vlanes () { return __riscv_vsetvlmax_e32m2 (); } +}; +v_int32 +v_load_expand_q (const signed char *ptr) +{ + return __riscv_vwcvt_x ( + __riscv_vwcvt_x (__riscv_vle8_v_i8mf2 (ptr, VTraits<v_int32>::vlanes ()), + VTraits<v_int32>::vlanes ()), + VTraits<v_int32>::vlanes ()); +} +v_float64 +v_cvt_f64_high (v_int32 a) +{ + return __riscv_vget_f64m2 (__riscv_vfwcvt_f (a, VTraits<v_int32>::vlanes ()), + 1); +} +void +v_store (double *ptr, v_float64 a) +{ + __riscv_vse64 (ptr, a, __riscv_vsetvlmax_e64m2 ()); +} +void +v_store_pair_as (double *ptr, v_float64 b) +{ + v_store (ptr, b); +} +void +vx_load_pair_as (const signed char *ptr, v_float64, v_float64 &b) +{ + v_int32 v0; + b = v_cvt_f64_high (v0); +}; +void +cvt_64f (const signed char *src, double *dst, Size size) +{ + int VECSZ = __riscv_vsetvlmax_e64m2 (); + for (int i; i < size.height; i++) + { + int j; + for (;; j += VECSZ) + { + if (j > -VECSZ) + if (j == 0 || dst) + break; + v_float64 v0, v1; + vx_load_pair_as (src, v0, v1); + v_store_pair_as (dst, v1); + } + for (; j < size.width; j++) + dst[j] = (src[j]); + } +} +void +cvtScale8s64f (unsigned char *src_, unsigned char *dst_, + size_t, Size size, void *) +{ + signed char src; + double dst = *dst_; + cvt_64f (&src, &dst, size); +} +int main () +{ + unsigned char src[1]; + unsigned char dst[1024]; + double scale[1]; + Size size{4, 1}; + cvtScale8s64f (src, dst, 32, size, scale); +} diff --git a/gcc/testsuite/gcc.dg/Wbuiltin-declaration-mismatch-4.c b/gcc/testsuite/gcc.dg/Wbuiltin-declaration-mismatch-4.c index c48fe5f..09aaaa6 100644 --- a/gcc/testsuite/gcc.dg/Wbuiltin-declaration-mismatch-4.c +++ b/gcc/testsuite/gcc.dg/Wbuiltin-declaration-mismatch-4.c @@ -77,9 +77,9 @@ void test_integer_conversion_memset (void *d) /* Passing a ptrdiff_t where size_t is expected may not be unsafe but because GCC may emits suboptimal code for such calls warning for them helps improve efficiency. */ - memset (d, 0, diffi); /* { dg-warning ".memset. argument 3 promotes to .ptrdiff_t. {aka .\(long \)?\(int\)?\(__int20\)?.} where .\(long \)?\(__int20 \)?unsigned\( int\)?. is expected" } */ + memset (d, 0, diffi); /* { dg-warning ".memset. argument 3 promotes to .ptrdiff_t. {aka .\(long \)*\(int\)?\(__int20\)?.} where .\(long \)*\(__int20 \)?unsigned\( int\)?. is expected" } */ - memset (d, 0, 2.0); /* { dg-warning ".memset. argument 3 type is .double. where '\(long \)?\(__int20 \)?unsigned\( int\)?' is expected" } */ + memset (d, 0, 2.0); /* { dg-warning ".memset. argument 3 type is .double. where '\(long \)*\(__int20 \)?unsigned\( int\)?' is expected" } */ /* Verify that the same call as above but to the built-in doesn't trigger a warning. */ diff --git a/gcc/testsuite/gcc.dg/completion-2.c b/gcc/testsuite/gcc.dg/completion-2.c index 99e6531..46c511c 100644 --- a/gcc/testsuite/gcc.dg/completion-2.c +++ b/gcc/testsuite/gcc.dg/completion-2.c @@ -5,6 +5,7 @@ -flto-partition=1to1 -flto-partition=balanced -flto-partition=cache +-flto-partition=default -flto-partition=max -flto-partition=none -flto-partition=one diff --git a/gcc/testsuite/gcc.dg/ipa/pr119318.c b/gcc/testsuite/gcc.dg/ipa/pr119318.c index 8e62ec5..f179aed 100644 --- a/gcc/testsuite/gcc.dg/ipa/pr119318.c +++ b/gcc/testsuite/gcc.dg/ipa/pr119318.c @@ -1,7 +1,6 @@ /* { dg-do run } */ /* { dg-require-effective-target int128 } */ -/* { dg-additional-options "-Wno-psabi -w" } */ -/* { dg-options "-Wno-psabi -O2" } */ +/* { dg-options "-Wno-psabi -w -O2" } */ typedef unsigned V __attribute__((vector_size (64))); typedef unsigned __int128 W __attribute__((vector_size (64))); diff --git a/gcc/testsuite/gcc.dg/ipa/pr119530.c b/gcc/testsuite/gcc.dg/ipa/pr119530.c index 70f158a..f99c4fd 100644 --- a/gcc/testsuite/gcc.dg/ipa/pr119530.c +++ b/gcc/testsuite/gcc.dg/ipa/pr119530.c @@ -5,8 +5,8 @@ struct a { int b; }; int c; -char d; -static int e(long f) { return f < 0; } +signed char d; +static int e(long long f) { return f < 0; } static void g(unsigned f) { c = e(~f); } int main() { int h; diff --git a/gcc/testsuite/gcc.dg/ipa/pr119803.c b/gcc/testsuite/gcc.dg/ipa/pr119803.c new file mode 100644 index 0000000..1a7bfd2 --- /dev/null +++ b/gcc/testsuite/gcc.dg/ipa/pr119803.c @@ -0,0 +1,16 @@ +/* { dg-do compile } */ +/* { dg-options "-O2" } */ + +extern void f(int p); +int a, b; +char c; +static int d(int e) { return !e || a == 1 ? 0 : a / e; } +static void h(short e) { + int g = d(e); + f(g); +} +void i() { + c = 128; + h(c); + b = d(65536); +} diff --git a/gcc/testsuite/gcc.dg/pr119717.c b/gcc/testsuite/gcc.dg/pr119717.c new file mode 100644 index 0000000..e5eedc5 --- /dev/null +++ b/gcc/testsuite/gcc.dg/pr119717.c @@ -0,0 +1,24 @@ +/* PR c/119717 */ +/* { dg-additional-options "-std=c23" } */ +/* { dg-do compile } */ + +struct annotated { + unsigned count; + [[gnu::counted_by(count)]] char array[]; +}; + +[[gnu::noinline,gnu::noipa]] +static unsigned +size_of (bool x, struct annotated *a) +{ + char *p = (x ? a : 0)->array; + return __builtin_dynamic_object_size (p, 1); +} + +int main() +{ + struct annotated *p = __builtin_malloc(sizeof *p); + p->count = 0; + __builtin_printf ("the bdos whole is %ld\n", size_of (0, p)); + return 0; +} diff --git a/gcc/testsuite/gcc.target/aarch64/acle/rwsr-ungated.c b/gcc/testsuite/gcc.target/aarch64/acle/rwsr-ungated.c new file mode 100644 index 0000000..d67a426 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/acle/rwsr-ungated.c @@ -0,0 +1,13 @@ +/* Test that __arm_[r,w]sr intrinsics aren't gated (by default). */ + +/* { dg-do compile } */ +/* { dg-options "-march=armv8-a" } */ + +#include <arm_acle.h> + +uint64_t +foo (uint64_t a) +{ + __arm_wsr64 ("zcr_el1", a); + return __arm_rsr64 ("smcr_el1"); +} diff --git a/gcc/testsuite/gcc.target/i386/apx-interrupt-1.c b/gcc/testsuite/gcc.target/i386/apx-interrupt-1.c index fefe2e6..fa1acc7 100644 --- a/gcc/testsuite/gcc.target/i386/apx-interrupt-1.c +++ b/gcc/testsuite/gcc.target/i386/apx-interrupt-1.c @@ -66,7 +66,7 @@ void foo (void *frame) /* { dg-final { scan-assembler-times {\t\.cfi_offset 132, -120} 1 } } */ /* { dg-final { scan-assembler-times {\t\.cfi_offset 131, -128} 1 } } */ /* { dg-final { scan-assembler-times {\t\.cfi_offset 130, -136} 1 } } */ -/* { dg-final { scan-assembler-times ".cfi_restore" 15} } */ +/* { dg-final { scan-assembler-times ".cfi_restore" 31 } } */ /* { dg-final { scan-assembler-times "pop(?:l|q)\[\\t \]*%(?:e|r)ax" 1 } } */ /* { dg-final { scan-assembler-times "pop(?:l|q)\[\\t \]*%(?:e|r)bx" 1 } } */ /* { dg-final { scan-assembler-times "pop(?:l|q)\[\\t \]*%(?:e|r)cx" 1 } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/avl_single-68.c b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/avl_single-68.c index bf95e1c..64666d3 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/avl_single-68.c +++ b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/avl_single-68.c @@ -21,6 +21,12 @@ void f2 (void * restrict in, void * restrict out, int l, int n, int m) } } +/* The second check is XFAILed because we currently don't lift + vsetvls into non-transparent (in LCM parlance) blocks. + See PR119547. + In this test it is still possible because the conflicting + register only ever feeds vsetvls. */ + /* { dg-final { scan-assembler-times {vsetvli} 2 { target { no-opts "-O0" no-opts "-Os" no-opts "-Oz" no-opts "-g" no-opts "-funroll-loops" } } } } */ -/* { dg-final { scan-assembler-times {vsetvli\s+zero,\s*[a-x0-9]+,\s*e8,\s*mf8,\s*tu,\s*m[au]} 2 { target { no-opts "-O0" no-opts "-Os" no-opts "-Oz" no-opts "-g" no-opts "-funroll-loops" } } } } */ +/* { dg-final { scan-assembler-times {vsetvli\s+zero,\s*[a-x0-9]+,\s*e8,\s*mf8,\s*tu,\s*m[au]} 2 { target { no-opts "-O0" no-opts "-Os" no-opts "-Oz" no-opts "-g" no-opts "-funroll-loops" } xfail { *-*-* } } } } */ /* { dg-final { scan-assembler-times {addi\s+[a-x0-9]+,\s*[a-x0-9]+,\s*44} 1 { target { no-opts "-O0" no-opts "-Os" no-opts "-Oz" no-opts "-g" no-opts "-funroll-loops" } } } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/vlmax_switch_vtype-10.c b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/vlmax_switch_vtype-10.c index ddf53ca..0dbf34a 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/vlmax_switch_vtype-10.c +++ b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/vlmax_switch_vtype-10.c @@ -43,6 +43,6 @@ void foo (int8_t * restrict in, int8_t * restrict out, int n, int cond) } } -/* { dg-final { scan-assembler-times {vsetvli} 15 { target { no-opts "-O0" no-opts "-O1" no-opts "-Os" no-opts "-Oz" no-opts "-funroll-loops" no-opts "-g" no-opts "-flto" } } } } */ +/* { dg-final { scan-assembler-times {vsetvli} 14 { target { no-opts "-O0" no-opts "-O1" no-opts "-Os" no-opts "-Oz" no-opts "-funroll-loops" no-opts "-g" no-opts "-flto" } } } } */ /* { dg-final { scan-assembler-times {vsetvli\s+[a-x0-9]+,\s*zero,\s*e32,\s*mf2,\s*t[au],\s*m[au]} 3 { target { no-opts "-O0" no-opts "-O1" no-opts "-Os" no-opts "-Oz" no-opts "-funroll-loops" no-opts "-g" } } } } */ -/* { dg-final { scan-assembler-times {vsetvli\s+[a-x0-9]+,\s*zero,\s*e16,\s*mf2,\s*t[au],\s*m[au]} 4 { target { no-opts "-O0" no-opts "-O1" no-opts "-Os" no-opts "-Oz" no-opts "-funroll-loops" no-opts "-g" } } } } */ +/* { dg-final { scan-assembler-times {vsetvli\s+[a-x0-9]+,\s*zero,\s*e16,\s*mf2,\s*t[au],\s*m[au]} 3 { target { no-opts "-O0" no-opts "-O1" no-opts "-Os" no-opts "-Oz" no-opts "-funroll-loops" no-opts "-g" } } } } */ diff --git a/gcc/testsuite/gdc.dg/debug/imports/m119817/a.d b/gcc/testsuite/gdc.dg/debug/imports/m119817/a.d new file mode 100644 index 0000000..a137472 --- /dev/null +++ b/gcc/testsuite/gdc.dg/debug/imports/m119817/a.d @@ -0,0 +1,2 @@ +module imports.m119817.a; +void f119817()() { } diff --git a/gcc/testsuite/gdc.dg/debug/imports/m119817/b.d b/gcc/testsuite/gdc.dg/debug/imports/m119817/b.d new file mode 100644 index 0000000..aef0e37 --- /dev/null +++ b/gcc/testsuite/gdc.dg/debug/imports/m119817/b.d @@ -0,0 +1,2 @@ +module imports.m119817.b; +void f119817() { } diff --git a/gcc/testsuite/gdc.dg/debug/imports/m119817/package.d b/gcc/testsuite/gdc.dg/debug/imports/m119817/package.d new file mode 100644 index 0000000..188827e --- /dev/null +++ b/gcc/testsuite/gdc.dg/debug/imports/m119817/package.d @@ -0,0 +1,4 @@ +module imports.m119817; +public import + imports.m119817.a, + imports.m119817.b; diff --git a/gcc/testsuite/gdc.dg/debug/imports/pr119826b.d b/gcc/testsuite/gdc.dg/debug/imports/pr119826b.d new file mode 100644 index 0000000..3c5a6ac --- /dev/null +++ b/gcc/testsuite/gdc.dg/debug/imports/pr119826b.d @@ -0,0 +1,14 @@ +module imports.pr119826b; + +import pr119826 : t119826; + +class C119826 +{ + enum E119826 { Evalue } + const E119826 em = void; +} + +void f119826(C119826 c) +{ + t119826(c.em); +} diff --git a/gcc/testsuite/gdc.dg/debug/pr119817.d b/gcc/testsuite/gdc.dg/debug/pr119817.d new file mode 100644 index 0000000..3eea6ba --- /dev/null +++ b/gcc/testsuite/gdc.dg/debug/pr119817.d @@ -0,0 +1,6 @@ +// { dg-do compile } +// { dg-additional-sources "imports/m119817/package.d" } +// { dg-additional-sources "imports/m119817/a.d" } +// { dg-additional-sources "imports/m119817/b.d" } +module pr119817; +import imports.m119817 : f119817; diff --git a/gcc/testsuite/gdc.dg/debug/pr119826.d b/gcc/testsuite/gdc.dg/debug/pr119826.d new file mode 100644 index 0000000..2fb98c7 --- /dev/null +++ b/gcc/testsuite/gdc.dg/debug/pr119826.d @@ -0,0 +1,8 @@ +// { dg-do compile } +// { dg-additional-sources "imports/pr119826b.d" } +module pr119826; + +int t119826(A)(A args) +{ + assert(false); +} diff --git a/gcc/testsuite/gdc.dg/import-c/pr119799.d b/gcc/testsuite/gdc.dg/import-c/pr119799.d new file mode 100644 index 0000000..d8b0fa2 --- /dev/null +++ b/gcc/testsuite/gdc.dg/import-c/pr119799.d @@ -0,0 +1,2 @@ +// { dg-do compile } +import pr119799c; diff --git a/gcc/testsuite/gdc.dg/import-c/pr119799c.c b/gcc/testsuite/gdc.dg/import-c/pr119799c.c new file mode 100644 index 0000000..b80e856 --- /dev/null +++ b/gcc/testsuite/gdc.dg/import-c/pr119799c.c @@ -0,0 +1 @@ +static struct {} s119799; diff --git a/gcc/testsuite/gfortran.dg/gomp/map-alloc-comp-1.f90 b/gcc/testsuite/gfortran.dg/gomp/map-alloc-comp-1.f90 index 0c44296..f48addc 100644 --- a/gcc/testsuite/gfortran.dg/gomp/map-alloc-comp-1.f90 +++ b/gcc/testsuite/gfortran.dg/gomp/map-alloc-comp-1.f90 @@ -10,5 +10,5 @@ type sct end type type(sct) var -!$omp target enter data map(to:var) ! { dg-error "allocatable components is not permitted in map clause" } +!$omp target enter data map(to:var) end diff --git a/gcc/testsuite/gfortran.dg/gomp/polymorphic-mapping-1.f90 b/gcc/testsuite/gfortran.dg/gomp/polymorphic-mapping-1.f90 new file mode 100644 index 0000000..750cec9 --- /dev/null +++ b/gcc/testsuite/gfortran.dg/gomp/polymorphic-mapping-1.f90 @@ -0,0 +1,30 @@ +type t + integer :: t +end type t +class(t), target, allocatable :: c, ca(:) +class(t), pointer :: p, pa(:) +integer :: x +allocate( t :: c, ca(5)) +p => c +pa => ca + +! 11111111112222222222333333333344 +!2345678901234567890123456789012345678901 +!$omp target enter data map(c, ca, p, pa) +! { dg-warning "29:Mapping of polymorphic list item 'c' is unspecified behavior \\\[-Wopenmp\\\]" "" { target *-*-* } .-1 } +! { dg-warning "32:Mapping of polymorphic list item 'ca' is unspecified behavior \\\[-Wopenmp\\\]" "" { target *-*-* } .-2 } +! { dg-warning "36:Mapping of polymorphic list item 'p' is unspecified behavior \\\[-Wopenmp\\\]" "" { target *-*-* } .-3 } +! { dg-warning "39:Mapping of polymorphic list item 'pa' is unspecified behavior \\\[-Wopenmp\\\]" "" { target *-*-* } .-4 } + +! 11111111112222222222333333333344 +!2345678901234567890123456789012345678901 + +! 11111111112222222222333333333344 +!2345678901234567890123456789012345678901 +!$omp target update from(c,ca), to(p,pa) +! { dg-warning "26:Mapping of polymorphic list item 'c' is unspecified behavior \\\[-Wopenmp\\\]" "" { target *-*-* } .-1 } +! { dg-warning "28:Mapping of polymorphic list item 'ca' is unspecified behavior \\\[-Wopenmp\\\]" "" { target *-*-* } .-2 } +! { dg-warning "36:Mapping of polymorphic list item 'p' is unspecified behavior \\\[-Wopenmp\\\]" "" { target *-*-* } .-3 } +! { dg-warning "38:Mapping of polymorphic list item 'pa' is unspecified behavior \\\[-Wopenmp\\\]" "" { target *-*-* } .-4 } + +end diff --git a/gcc/testsuite/gfortran.dg/gomp/polymorphic-mapping-2.f90 b/gcc/testsuite/gfortran.dg/gomp/polymorphic-mapping-2.f90 index e25db68..3bedc9b 100644 --- a/gcc/testsuite/gfortran.dg/gomp/polymorphic-mapping-2.f90 +++ b/gcc/testsuite/gfortran.dg/gomp/polymorphic-mapping-2.f90 @@ -9,7 +9,7 @@ allocate( t :: c, ca(5)) p => c pa => ca -!$omp target ! { dg-warning "Implicit mapping of polymorphic variable 'ca' is unspecified behavior \\\[-Wopenmp\\\]" } +!$omp target ! { dg-warning "Mapping of polymorphic list item 'ca' is unspecified behavior \\\[-Wopenmp\\\]" } ll = allocated(ca) !$omp end target diff --git a/gcc/testsuite/gfortran.dg/gomp/polymorphic-mapping-3.f90 b/gcc/testsuite/gfortran.dg/gomp/polymorphic-mapping-3.f90 new file mode 100644 index 0000000..9777ecf --- /dev/null +++ b/gcc/testsuite/gfortran.dg/gomp/polymorphic-mapping-3.f90 @@ -0,0 +1,23 @@ +subroutine sub(var, var2) +type t + integer :: x +end type t + +type t2 + integer :: x + integer, allocatable :: y +end type + +class(t) var, var2 +type(t2) :: var3, var4 +!$omp target firstprivate(var) & ! { dg-error "Polymorphic list item 'var' at .1. in FIRSTPRIVATE clause has unspecified behavior and unsupported" } +!$omp& private(var2) ! { dg-error "Polymorphic list item 'var2' at .1. in PRIVATE clause has unspecified behavior and unsupported" } + var%x = 5 + var2%x = 5 +!$omp end target +!$omp target firstprivate(var3) & ! { dg-error "Sorry, list item 'var3' at .1. with allocatable components is not yet supported in FIRSTPRIVATE clause" } +!$omp& private(var4) ! { dg-error "Sorry, list item 'var4' at .1. with allocatable components is not yet supported in PRIVATE clause" } + var3%x = 5 + var4%x = 5 +!$omp end target +end diff --git a/gcc/testsuite/gfortran.dg/gomp/polymorphic-mapping-4.f90 b/gcc/testsuite/gfortran.dg/gomp/polymorphic-mapping-4.f90 new file mode 100644 index 0000000..5a1a70a --- /dev/null +++ b/gcc/testsuite/gfortran.dg/gomp/polymorphic-mapping-4.f90 @@ -0,0 +1,9 @@ +subroutine one +implicit none +type t + class(*), allocatable :: ul +end type + +type(t) :: var +!$omp target enter data map(to:var) ! { dg-error "Mapping of unlimited polymorphic list item 'var.ul' is unspecified behavior and unsupported" } +end diff --git a/gcc/testsuite/gfortran.dg/gomp/polymorphic-mapping-5.f90 b/gcc/testsuite/gfortran.dg/gomp/polymorphic-mapping-5.f90 new file mode 100644 index 0000000..4b5814e --- /dev/null +++ b/gcc/testsuite/gfortran.dg/gomp/polymorphic-mapping-5.f90 @@ -0,0 +1,9 @@ +subroutine one +implicit none +type t + class(*), allocatable :: ul +end type + +class(*), allocatable :: ul_var +!$omp target enter data map(to: ul_var) ! { dg-error "Mapping of unlimited polymorphic list item 'ul_var' is unspecified behavior and unsupported" } +end diff --git a/gcc/testsuite/gfortran.dg/gomp/polymorphic-mapping.f90 b/gcc/testsuite/gfortran.dg/gomp/polymorphic-mapping.f90 index dd7eb31..752cca2 100644 --- a/gcc/testsuite/gfortran.dg/gomp/polymorphic-mapping.f90 +++ b/gcc/testsuite/gfortran.dg/gomp/polymorphic-mapping.f90 @@ -10,37 +10,21 @@ pa => ca ! 11111111112222222222333333333344 !2345678901234567890123456789012345678901 -!$omp target enter data map(c, ca, p, pa) -! { dg-warning "29:Mapping polymorphic list item at .1. is unspecified behavior \\\[-Wopenmp\\\]" "" { target *-*-* } .-1 } -! { dg-warning "32:Mapping polymorphic list item at .1. is unspecified behavior \\\[-Wopenmp\\\]" "" { target *-*-* } .-2 } -! { dg-warning "36:Mapping polymorphic list item at .1. is unspecified behavior \\\[-Wopenmp\\\]" "" { target *-*-* } .-3 } -! { dg-warning "39:Mapping polymorphic list item at .1. is unspecified behavior \\\[-Wopenmp\\\]" "" { target *-*-* } .-4 } - -! 11111111112222222222333333333344 -!2345678901234567890123456789012345678901 -!$omp target firstprivate(ca) ! { dg-warning "27:FIRSTPRIVATE with polymorphic list item at .1. is unspecified behavior \\\[-Wopenmp\\\]" } +!$omp target firstprivate(ca) ! { dg-error "27:Polymorphic list item 'ca' at .1. in FIRSTPRIVATE clause has unspecified behavior and unsupported" } !$omp end target -!$omp target parallel do firstprivate(ca) ! { dg-warning "39:FIRSTPRIVATE with polymorphic list item at .1. is unspecified behavior \\\[-Wopenmp\\\]" } +!$omp target parallel do firstprivate(ca) ! { dg-error "39:Polymorphic list item 'ca' at .1. in FIRSTPRIVATE clause has unspecified behavior and unsupported" } do x = 0, 5 end do -!$omp target parallel do private(ca) ! OK; should map declared type +!$omp target parallel do private(ca) ! { dg-error "34:Polymorphic list item 'ca' at .1. in PRIVATE clause has unspecified behavior and unsupported" } do x = 0, 5 end do -!$omp target private(ca) ! OK; should map declared type +!$omp target private(ca) ! { dg-error "22:Polymorphic list item 'ca' at .1. in PRIVATE clause has unspecified behavior and unsupported" } block end block -! 11111111112222222222333333333344 -!2345678901234567890123456789012345678901 -!$omp target update from(c,ca), to(p,pa) -! { dg-warning "26:Mapping polymorphic list item at .1. is unspecified behavior \\\[-Wopenmp\\\]" "" { target *-*-* } .-1 } -! { dg-warning "28:Mapping polymorphic list item at .1. is unspecified behavior \\\[-Wopenmp\\\]" "" { target *-*-* } .-2 } -! { dg-warning "36:Mapping polymorphic list item at .1. is unspecified behavior \\\[-Wopenmp\\\]" "" { target *-*-* } .-3 } -! { dg-warning "38:Mapping polymorphic list item at .1. is unspecified behavior \\\[-Wopenmp\\\]" "" { target *-*-* } .-4 } - ! ------------------------- !$omp target parallel map(release: x) ! { dg-error "36:TARGET with map-type other than TO, FROM, TOFROM, or ALLOC on MAP clause" } diff --git a/gcc/testsuite/go.dg/pr119533-riscv-2.go b/gcc/testsuite/go.dg/pr119533-riscv-2.go new file mode 100644 index 0000000..ce3ffaf --- /dev/null +++ b/gcc/testsuite/go.dg/pr119533-riscv-2.go @@ -0,0 +1,42 @@ +// { dg-do compile { target riscv64*-*-* } } +// { dg-options "-O2 -march=rv64gcv -mabi=lp64d" } + +package ast + +type as struct { + bt []struct{} + an string +} + +func bj(a *as) string { + if b := a.bt; len(a.an) == 1 { + _ = b[0] + } + return a.an +} + +func MergePackageFiles(f map[string][]interface{}, g uint) []interface{} { + bl := make([]string, len(f)) + var bo []interface{} + bu := make(map[string]int) + for _, bm := range bl { + a := f[bm] + for _, d := range a { + if g != 0 { + if a, p := d.(*as); p { + n := bj(a) + if j, bp := bu[n]; bp { + _ = j + } + } + } + } + } + for _, bm := range bl { + _ = bm + } + for _, bm := range bl { + _ = f[bm] + } + return bo +} diff --git a/gcc/testsuite/go.dg/pr119533-riscv.go b/gcc/testsuite/go.dg/pr119533-riscv.go new file mode 100644 index 0000000..30f52d2 --- /dev/null +++ b/gcc/testsuite/go.dg/pr119533-riscv.go @@ -0,0 +1,120 @@ +// { dg-do compile { target riscv64*-*-* } } +// { dg-options "-O2 -march=rv64gcv -mabi=lp64d" } + +// Reduced from libgo build (multi-file reduction, merged mnaully +// and hand reduced again). + +package ast +import ( + "go/token" + "go/scanner" + "reflect" +) +type v struct {} +type w func( string, reflect.Value) bool +func x( string, reflect.Value) bool +type r struct { + scanner.ErrorList +} +type ab interface {} +type ae interface {} +type af interface {} +type ag struct {} +func (ag) Pos() token.Pos +func (ag) ah() token.Pos +type c struct { + aj ae } +type ak struct { + al []c } +type ( + am struct { + an string } + bs struct { + Value string + } +) +func ao(string) *am +type ( + ap interface {} + aq struct { + ar bs } +as struct { + bt ak + an am } +) +type File struct { + *ag + token.Pos + *am + at []af + *v + au []*aq + av *am + aw []*ag } +type ax struct { + an string + *v + ay map[string]File } +func a(az *token.FileSet, b token.Pos) int +type k struct { + l token.Pos + ah token.Pos +} +type m struct { + bb bool + bc *ag +} + +type bi uint +func bj(a *as) string { + if b := a.bt; len(b.al) == 1 { + c := b.al[0].aj + if e := c; e != nil {} + } + return a.an.an +} +func MergePackageFiles(f ax, g bi) *File { + h := 0 + bk := 0 + k := 0 + bl := make([]string, len(f.ay)) + i := 0 + for bm, a := range f.ay { + bl[i] = bm + k += len(a.at) + } + var bn *ag + var l token.Pos + if h > 0 {} + var bo []af + bu := make(map[string]int) + m := 0 + for _, bm := range bl { + a := f.ay[bm] + for _, d := range a.at { + if g!= 0 { + if a, p := d.(*as); p { + n := bj(a) + if j, bp := bu[n]; bp { + if bo != nil && bo[j]== nil {} + } + } + } + } + } + if m > 0 {} + var bq []*aq + q := make(map[string]bool) + for _, bm := range bl { + a := f.ay[bm] + for _, br := range a.au { + if o := br.ar.Value; q[o] {} + } + } + var bh = make([]*ag, bk) + for _, bm := range bl { + a := f.ay[bm] + copy(bh, a.aw) + } + return &File{bn, l, ao(f.an), bo, f.v, bq, nil, bh} +} diff --git a/gcc/testsuite/lib/gcc-dg.exp b/gcc/testsuite/lib/gcc-dg.exp index eadc1cd..6dd8fa3 100644 --- a/gcc/testsuite/lib/gcc-dg.exp +++ b/gcc/testsuite/lib/gcc-dg.exp @@ -507,8 +507,7 @@ if { [info procs ${tool}_load] != [list] \ set linenum 1 set outfile [open [lindex ${output-file} 1]] set do_fail 0 - set name [file tail [lindex ${output-file} 1]] - verbose "output-file args is $args program is $program" 1 + set name [testname-for-summary] while { [gets $outfile line] >= 0 } { if { $linenum != 1 } { set c [string index $output $idx] diff --git a/gcc/timevar.def b/gcc/timevar.def index c1029d9..02ace46 100644 --- a/gcc/timevar.def +++ b/gcc/timevar.def @@ -105,6 +105,7 @@ DEFTIMEVAR (TV_IPA_PURE_CONST , "ipa pure const") DEFTIMEVAR (TV_IPA_ICF , "ipa icf") DEFTIMEVAR (TV_IPA_PTA , "ipa points-to") DEFTIMEVAR (TV_IPA_SRA , "ipa SRA") +DEFTIMEVAR (TV_IPA_LC , "ipa locality clone") DEFTIMEVAR (TV_IPA_FREE_LANG_DATA , "ipa free lang data") DEFTIMEVAR (TV_IPA_FREE_INLINE_SUMMARY, "ipa free inline summary") DEFTIMEVAR (TV_IPA_MODREF , "ipa modref") diff --git a/gcc/tree-pass.h b/gcc/tree-pass.h index 217c31f..7cb5a12 100644 --- a/gcc/tree-pass.h +++ b/gcc/tree-pass.h @@ -551,6 +551,7 @@ extern ipa_opt_pass_d *make_pass_ipa_cdtor_merge (gcc::context *ctxt); extern ipa_opt_pass_d *make_pass_ipa_single_use (gcc::context *ctxt); extern ipa_opt_pass_d *make_pass_ipa_comdats (gcc::context *ctxt); extern ipa_opt_pass_d *make_pass_ipa_modref (gcc::context *ctxt); +extern ipa_opt_pass_d *make_pass_ipa_locality_cloning (gcc::context *ctxt); extern gimple_opt_pass *make_pass_cleanup_cfg_post_optimizing (gcc::context *ctxt); diff --git a/gcc/tree-tailcall.cc b/gcc/tree-tailcall.cc index d5c4c7b..f593363 100644 --- a/gcc/tree-tailcall.cc +++ b/gcc/tree-tailcall.cc @@ -51,6 +51,8 @@ along with GCC; see the file COPYING3. If not see #include "symbol-summary.h" #include "ipa-cp.h" #include "ipa-prop.h" +#include "attribs.h" +#include "asan.h" /* The file implements the tail recursion elimination. It is also used to analyze the tail calls in general, passing the results to the rtl level @@ -122,6 +124,9 @@ struct tailcall /* True if it is a call to the current function. */ bool tail_recursion; + /* True if there is __tsan_func_exit call after the call. */ + bool has_tsan_func_exit; + /* The return value of the caller is mult * f + add, where f is the return value of the call. */ tree mult, add; @@ -504,7 +509,7 @@ maybe_error_musttail (gcall *call, const char *err, bool diag_musttail) Search at most CNT basic blocks (so that we don't need to do trivial loop discovery). */ static bool -empty_eh_cleanup (basic_block bb, int cnt) +empty_eh_cleanup (basic_block bb, int *eh_has_tsan_func_exit, int cnt) { if (EDGE_COUNT (bb->succs) > 1) return false; @@ -515,6 +520,14 @@ empty_eh_cleanup (basic_block bb, int cnt) gimple *g = gsi_stmt (gsi); if (is_gimple_debug (g) || gimple_clobber_p (g)) continue; + if (eh_has_tsan_func_exit + && !*eh_has_tsan_func_exit + && sanitize_flags_p (SANITIZE_THREAD) + && gimple_call_builtin_p (g, BUILT_IN_TSAN_FUNC_EXIT)) + { + *eh_has_tsan_func_exit = 1; + continue; + } if (is_gimple_resx (g) && stmt_can_throw_external (cfun, g)) return true; return false; @@ -523,7 +536,7 @@ empty_eh_cleanup (basic_block bb, int cnt) return false; if (cnt == 1) return false; - return empty_eh_cleanup (single_succ (bb), cnt - 1); + return empty_eh_cleanup (single_succ (bb), eh_has_tsan_func_exit, cnt - 1); } /* Argument for compute_live_vars/live_vars_at_stmt and what compute_live_vars @@ -531,14 +544,22 @@ empty_eh_cleanup (basic_block bb, int cnt) static live_vars_map *live_vars; static vec<bitmap_head> live_vars_vec; -/* Finds tailcalls falling into basic block BB. The list of found tailcalls is +/* Finds tailcalls falling into basic block BB. The list of found tailcalls is added to the start of RET. When ONLY_MUSTTAIL is set only handle musttail. Update OPT_TAILCALLS as output parameter. If DIAG_MUSTTAIL, diagnose - failures for musttail calls. */ + failures for musttail calls. RETRY_TSAN_FUNC_EXIT is initially 0 and + in that case the last call is attempted to be tail called, including + __tsan_func_exit with -fsanitize=thread. It is set to -1 if we + detect __tsan_func_exit call and in that case tree_optimize_tail_calls_1 + will retry with it set to 1 (regardless of whether turning the + __tsan_func_exit was successfully detected as tail call or not) and that + will allow turning musttail calls before that call into tail calls as well + by adding __tsan_func_exit call before the call. */ static void find_tail_calls (basic_block bb, struct tailcall **ret, bool only_musttail, - bool &opt_tailcalls, bool diag_musttail) + bool &opt_tailcalls, bool diag_musttail, + int &retry_tsan_func_exit) { tree ass_var = NULL_TREE, ret_var, func, param; gimple *stmt; @@ -552,6 +573,8 @@ find_tail_calls (basic_block bb, struct tailcall **ret, bool only_musttail, size_t idx; tree var; bool only_tailr = false; + bool has_tsan_func_exit = false; + int eh_has_tsan_func_exit = -1; if (!single_succ_p (bb) && (EDGE_COUNT (bb->succs) || !cfun->has_musttail || !diag_musttail)) @@ -585,6 +608,17 @@ find_tail_calls (basic_block bb, struct tailcall **ret, bool only_musttail, || is_gimple_debug (stmt)) continue; + if (cfun->has_musttail + && sanitize_flags_p (SANITIZE_THREAD) + && gimple_call_builtin_p (stmt, BUILT_IN_TSAN_FUNC_EXIT) + && diag_musttail) + { + if (retry_tsan_func_exit == 0) + retry_tsan_func_exit = -1; + else if (retry_tsan_func_exit == 1) + continue; + } + if (!last_stmt) last_stmt = stmt; /* Check for a call. */ @@ -635,7 +669,7 @@ find_tail_calls (basic_block bb, struct tailcall **ret, bool only_musttail, /* Recurse to the predecessors. */ FOR_EACH_EDGE (e, ei, bb->preds) find_tail_calls (e->src, ret, only_musttail, opt_tailcalls, - diag_musttail); + diag_musttail, retry_tsan_func_exit); return; } @@ -715,8 +749,12 @@ find_tail_calls (basic_block bb, struct tailcall **ret, bool only_musttail, return; } + if (diag_musttail && gimple_call_must_tail_p (call)) + eh_has_tsan_func_exit = 0; if (!gimple_call_must_tail_p (call) - || !empty_eh_cleanup (e->dest, 20) + || !empty_eh_cleanup (e->dest, + eh_has_tsan_func_exit + ? NULL : &eh_has_tsan_func_exit, 20) || EDGE_COUNT (bb->succs) > 2) { maybe_error_musttail (call, _("call may throw exception caught " @@ -947,6 +985,17 @@ find_tail_calls (basic_block bb, struct tailcall **ret, bool only_musttail, || is_gimple_debug (stmt)) continue; + if (cfun->has_musttail + && sanitize_flags_p (SANITIZE_THREAD) + && retry_tsan_func_exit == 1 + && gimple_call_builtin_p (stmt, BUILT_IN_TSAN_FUNC_EXIT) + && !has_tsan_func_exit + && gimple_call_must_tail_p (call)) + { + has_tsan_func_exit = true; + continue; + } + if (gimple_code (stmt) != GIMPLE_ASSIGN) { maybe_error_musttail (call, _("unhandled code after call"), @@ -1110,6 +1159,19 @@ find_tail_calls (basic_block bb, struct tailcall **ret, bool only_musttail, return; } + if (eh_has_tsan_func_exit != -1 + && eh_has_tsan_func_exit != has_tsan_func_exit) + { + if (eh_has_tsan_func_exit) + maybe_error_musttail (call, _("call may throw exception caught " + "locally or perform cleanups"), + diag_musttail); + else + maybe_error_musttail (call, _("exception cleanups omit " + "__tsan_func_exit call"), diag_musttail); + return; + } + /* Move queued defs. */ if (tail_recursion) { @@ -1138,6 +1200,7 @@ find_tail_calls (basic_block bb, struct tailcall **ret, bool only_musttail, nw->call_gsi = gsi; nw->tail_recursion = tail_recursion; + nw->has_tsan_func_exit = has_tsan_func_exit; nw->mult = m; nw->add = a; @@ -1472,6 +1535,14 @@ static bool optimize_tail_call (struct tailcall *t, bool opt_tailcalls, class loop *&new_loop) { + if (t->has_tsan_func_exit && (t->tail_recursion || opt_tailcalls)) + { + tree builtin_decl = builtin_decl_implicit (BUILT_IN_TSAN_FUNC_EXIT); + gimple *g = gimple_build_call (builtin_decl, 0); + gimple_set_location (g, cfun->function_end_locus); + gsi_insert_before (&t->call_gsi, g, GSI_SAME_STMT); + } + if (t->tail_recursion) { eliminate_tail_call (t, new_loop); @@ -1490,6 +1561,7 @@ optimize_tail_call (struct tailcall *t, bool opt_tailcalls, print_gimple_stmt (dump_file, stmt, 0, dump_flags); fprintf (dump_file, " in bb %i\n", (gsi_bb (t->call_gsi))->index); } + return t->has_tsan_func_exit; } return false; @@ -1539,12 +1611,23 @@ tree_optimize_tail_calls_1 (bool opt_tailcalls, bool only_musttail, /* Only traverse the normal exits, i.e. those that end with return statement. */ if (safe_is_a <greturn *> (*gsi_last_bb (e->src))) - find_tail_calls (e->src, &tailcalls, only_musttail, opt_tailcalls, - diag_musttail); + { + int retry_tsan_func_exit = 0; + find_tail_calls (e->src, &tailcalls, only_musttail, opt_tailcalls, + diag_musttail, retry_tsan_func_exit); + if (retry_tsan_func_exit == -1) + { + retry_tsan_func_exit = 1; + find_tail_calls (e->src, &tailcalls, only_musttail, + opt_tailcalls, diag_musttail, + retry_tsan_func_exit); + } + } } if (cfun->has_musttail && diag_musttail) { basic_block bb; + int retry_tsan_func_exit = 0; FOR_EACH_BB_FN (bb, cfun) if (EDGE_COUNT (bb->succs) == 0 || (single_succ_p (bb) @@ -1554,7 +1637,7 @@ tree_optimize_tail_calls_1 (bool opt_tailcalls, bool only_musttail, && gimple_call_must_tail_p (as_a <gcall *> (c)) && gimple_call_noreturn_p (as_a <gcall *> (c))) find_tail_calls (bb, &tailcalls, only_musttail, opt_tailcalls, - diag_musttail); + diag_musttail, retry_tsan_func_exit); } if (live_vars) |