diff options
125 files changed, 7793 insertions, 425 deletions
diff --git a/config/ChangeLog b/config/ChangeLog index 9268a8e..2551f82 100644 --- a/config/ChangeLog +++ b/config/ChangeLog @@ -1,3 +1,7 @@ +2025-04-15 Kyrylo Tkachov <ktkachov@nvidia.com> + + * bootstrap-lto-locality.mk: New file. + 2024-11-25 Sandra Loosemore <sloosemore@baylibre.com> * mt-nios2-elf: Deleted. diff --git a/config/bootstrap-lto-locality.mk b/config/bootstrap-lto-locality.mk new file mode 100644 index 0000000..b31565c --- /dev/null +++ b/config/bootstrap-lto-locality.mk @@ -0,0 +1,20 @@ +# This option enables LTO and locality partitioning for stage2 and stage3 in slim mode + +STAGE2_CFLAGS += -flto=jobserver -frandom-seed=1 -fipa-reorder-for-locality +STAGE3_CFLAGS += -flto=jobserver -frandom-seed=1 -fipa-reorder-for-locality +STAGEprofile_CFLAGS += -flto=jobserver -frandom-seed=1 -fipa-reorder-for-locality +STAGEtrain_CFLAGS += -flto=jobserver -frandom-seed=1 -fipa-reorder-for-locality +STAGEfeedback_CFLAGS += -flto=jobserver -frandom-seed=1 -fipa-reorder-for-locality + +# assumes the host supports the linker plugin +LTO_AR = $$r/$(HOST_SUBDIR)/prev-gcc/gcc-ar$(exeext) -B$$r/$(HOST_SUBDIR)/prev-gcc/ +LTO_RANLIB = $$r/$(HOST_SUBDIR)/prev-gcc/gcc-ranlib$(exeext) -B$$r/$(HOST_SUBDIR)/prev-gcc/ +LTO_NM = $$r/$(HOST_SUBDIR)/prev-gcc/gcc-nm$(exeext) -B$$r/$(HOST_SUBDIR)/prev-gcc/ + +LTO_EXPORTS = AR="$(LTO_AR)"; export AR; \ + RANLIB="$(LTO_RANLIB)"; export RANLIB; \ + NM="$(LTO_NM)"; export NM; +LTO_FLAGS_TO_PASS = AR="$(LTO_AR)" RANLIB="$(LTO_RANLIB)" NM="$(LTO_NM)" + +do-compare = $(SHELL) $(srcdir)/contrib/compare-lto $$f1 $$f2 +extra-compare = gcc/lto1$(exeext) diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 05637fa..e082958 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,138 @@ +2025-04-15 Sandra Loosemore <sloosemore@baylibre.com> + + PR tree-optimization/71094 + * doc/invoke.texi (Optimize Options): Document that -fivopts is + enabled at -O1 and higher. Add blurb about -O0 causing GCC to + completely ignore most optimization options. + +2025-04-15 Iain Sandoe <iain@sandoe.co.uk> + + * configure: Regenerate. + * configure.ac: Recognise PROJECT:ld-mmmm.nn.aa as an identifier + for Darwin's static linker. + +2025-04-15 Iain Sandoe <iainsandoe@mini-05-seq.local> + + PR target/116827 + * ginclude/stddef.h: Undefine __PTRDIFF_T and __SIZE_T for module- + enabled c++ on Darwin/macOS platforms. + +2025-04-15 Kyrylo Tkachov <ktkachov@nvidia.com> + + * common.opt.urls: Regenerate. + +2025-04-15 Jan Hubicka <hubicka@ucw.cz> + + * config/i386/x86-tune-sched.cc (ix86_issue_rate): Set + to 4 for znver5. + +2025-04-15 Jan Hubicka <hubicka@ucw.cz> + + PR target/119298 + * config/i386/x86-tune-costs.h (znver5_cost): Set ADDSS cost to 3. + +2025-04-15 Vineet Gupta <vineetg@rivosinc.com> + + PR target/119533 + * config/riscv/riscv-vsetvl.cc (invalid_opt_bb_p): Check for + EDGE_ABNOMAL. + (pre_vsetvl::compute_lcm_local_properties): Initialize kill + bitmap. + Debug dump skipped edge. + +2025-04-15 Robin Dapp <rdapp@ventanamicro.com> + + PR target/119547 + * config/riscv/riscv-vsetvl.cc (pre_vsetvl::earliest_fuse_vsetvl_info): + Do not perform lift if block is not transparent. + +2025-04-15 Kyrylo Tkachov <ktkachov@nvidia.com> + + * Makefile.in (OBJS): Add ipa-locality-cloning.o. + * cgraph.h (set_new_clone_decl_and_node_flags): Declare prototype. + * cgraphclones.cc (set_new_clone_decl_and_node_flags): Remove static + qualifier. + * common.opt (fipa-reorder-for-locality): New flag. + (LTO_PARTITION_DEFAULT): Declare. + (flto-partition): Change default to LTO_PARTITION_DFEAULT. + * doc/invoke.texi: Document -fipa-reorder-for-locality. + * flag-types.h (enum lto_locality_cloning_model): Declare. + (lto_partitioning_model): Add LTO_PARTITION_DEFAULT. + * lto-cgraph.cc (lto_set_symtab_encoder_in_partition): Add dumping of + node and index. + * opts.cc (validate_ipa_reorder_locality_lto_partition): Define. + (finish_options): Handle LTO_PARTITION_DEFAULT. + * params.opt (lto_locality_cloning_model): New enum. + (lto-partition-locality-cloning): New param. + (lto-partition-locality-frequency-cutoff): Likewise. + (lto-partition-locality-size-cutoff): Likewise. + (lto-max-locality-partition): Likewise. + * passes.def: Register pass_ipa_locality_cloning. + * timevar.def (TV_IPA_LC): New timevar. + * tree-pass.h (make_pass_ipa_locality_cloning): Declare. + * ipa-locality-cloning.cc: New file. + * ipa-locality-cloning.h: New file. + +2025-04-15 Martin Jambor <mjambor@suse.cz> + Jakub Jelinek <jakub@redhat.com> + + PR ipa/119803 + * ipa-cp.cc (ipcp_bits_lattice::meet_with_1): Move m_value adjustmed + according to m_mask below the adjustment of the latter according to + cap_mask. Optimize the calculation of cap_mask a bit. + (ipcp_bits_lattice::meet_with): Optimize the calculation of cap_mask a + bit. + +2025-04-15 Jakub Jelinek <jakub@redhat.com> + + * ipa-cp.cc (ipcp_print_widest_int): Print values with all ones in + bits 128+ with "0xf..f" prefix instead of "all ones folled by ". + Simplify wide_int check for -1 or all ones above least significant + 128 bits. + +2025-04-15 Jakub Jelinek <jakub@redhat.com> + + PR sanitizer/119801 + * sanitizer.def (BUILT_IN_TSAN_FUNC_EXIT): Use BT_FN_VOID rather + than BT_FN_VOID_PTR. + * tree-tailcall.cc: Include attribs.h and asan.h. + (struct tailcall): Add has_tsan_func_exit member. + (empty_eh_cleanup): Add eh_has_tsan_func_exit argument, set what + it points to to 1 if there is exactly one __tsan_func_exit call + and ignore that call otherwise. Adjust recursive call. + (find_tail_calls): Add RETRY_TSAN_FUNC_EXIT argument, pass it + to recursive calls. When seeing __tsan_func_exit call with + RETRY_TSAN_FUNC_EXIT 0, set it to -1. If RETRY_TSAN_FUNC_EXIT + is 1, initially ignore __tsan_func_exit calls. Adjust + empty_eh_cleanup caller. When looking through stmts after the call, + ignore exactly one __tsan_func_exit call but remember it in + t->has_tsan_func_exit. Diagnose if EH cleanups didn't have + __tsan_func_exit and normal path did or vice versa. + (optimize_tail_call): Emit __tsan_func_exit before the tail call + or tail recursion. + (tree_optimize_tail_calls_1): Adjust find_tail_calls callers. If + find_tail_calls changes retry_tsan_func_exit to -1, set it to 1 + and call it again with otherwise the same arguments. + +2025-04-15 Sandra Loosemore <sloosemore@baylibre.com> + + PR ipa/113203 + * doc/extend.texi (Common Function Attributes): Explain how to + use always_inline in programs that have multiple translation + units, and that LTO inlining additionally needs optimization + enabled. + +2025-04-15 liuhongt <hongtao.liu@intel.com> + + PR target/108134 + * doc/extend.texi: Remove documents from r11-344-g0fec3f62b9bfc0. + +2025-04-15 Sandra Loosemore <sloosemore@baylibre.com> + + PR target/42683 + * doc/invoke.texi (x86 Options): Clarify that -march=pentiumpro + doesn't include MMX. + 2025-04-14 Thomas Schwinge <tschwinge@baylibre.com> PR target/118794 diff --git a/gcc/DATESTAMP b/gcc/DATESTAMP index 6a8ff35..c9d404d 100644 --- a/gcc/DATESTAMP +++ b/gcc/DATESTAMP @@ -1 +1 @@ -20250415 +20250416 diff --git a/gcc/Makefile.in b/gcc/Makefile.in index ebfcd8a..55b4cd7 100644 --- a/gcc/Makefile.in +++ b/gcc/Makefile.in @@ -1555,6 +1555,7 @@ OBJS = \ incpath.o \ init-regs.o \ internal-fn.o \ + ipa-locality-cloning.o \ ipa-cp.o \ ipa-sra.o \ ipa-devirt.o \ @@ -3026,6 +3027,7 @@ GTFILES = $(CPPLIB_H) $(srcdir)/input.h $(srcdir)/coretypes.h \ $(srcdir)/ipa-param-manipulation.h $(srcdir)/ipa-sra.cc \ $(srcdir)/ipa-modref.h $(srcdir)/ipa-modref.cc \ $(srcdir)/ipa-modref-tree.h \ + $(srcdir)/ipa-locality-cloning.cc \ $(srcdir)/signop.h \ $(srcdir)/diagnostic-spec.h $(srcdir)/diagnostic-spec.cc \ $(srcdir)/dwarf2out.h \ diff --git a/gcc/c/ChangeLog b/gcc/c/ChangeLog index dec9f16..f75a0f6 100644 --- a/gcc/c/ChangeLog +++ b/gcc/c/ChangeLog @@ -1,3 +1,9 @@ +2025-04-15 Qing Zhao <qing.zhao@oracle.com> + + PR c/119717 + * c-typeck.cc (build_access_with_size_for_counted_by): Fully fold the + parameters for call to .ACCESS_WITH_SIZE. + 2025-04-08 Martin Uecker <uecker@tugraz.at> PR c/119612 diff --git a/gcc/c/c-typeck.cc b/gcc/c/c-typeck.cc index 3870e8a..55d896e 100644 --- a/gcc/c/c-typeck.cc +++ b/gcc/c/c-typeck.cc @@ -3013,12 +3013,16 @@ build_access_with_size_for_counted_by (location_t loc, tree ref, gcc_assert (c_flexible_array_member_type_p (TREE_TYPE (ref))); /* The result type of the call is a pointer to the flexible array type. */ tree result_type = c_build_pointer_type (TREE_TYPE (ref)); + tree first_param + = c_fully_fold (array_to_pointer_conversion (loc, ref), false, NULL); + tree second_param + = c_fully_fold (counted_by_ref, false, NULL); tree call = build_call_expr_internal_loc (loc, IFN_ACCESS_WITH_SIZE, result_type, 6, - array_to_pointer_conversion (loc, ref), - counted_by_ref, + first_param, + second_param, build_int_cst (integer_type_node, 1), build_int_cst (counted_by_type, 0), build_int_cst (integer_type_node, -1), diff --git a/gcc/cgraph.h b/gcc/cgraph.h index 065fcc7..abde770 100644 --- a/gcc/cgraph.h +++ b/gcc/cgraph.h @@ -2627,6 +2627,7 @@ void tree_function_versioning (tree, tree, vec<ipa_replace_map *, va_gc> *, void dump_callgraph_transformation (const cgraph_node *original, const cgraph_node *clone, const char *suffix); +void set_new_clone_decl_and_node_flags (cgraph_node *new_node); /* In cgraphbuild.cc */ int compute_call_stmt_bb_frequency (tree, basic_block bb); void record_references_in_initializer (tree, bool); diff --git a/gcc/cgraphclones.cc b/gcc/cgraphclones.cc index 5332a43..e6223fa 100644 --- a/gcc/cgraphclones.cc +++ b/gcc/cgraphclones.cc @@ -158,7 +158,7 @@ cgraph_edge::clone (cgraph_node *n, gcall *call_stmt, unsigned stmt_uid, /* Set flags of NEW_NODE and its decl. NEW_NODE is a newly created private clone or its thunk. */ -static void +void set_new_clone_decl_and_node_flags (cgraph_node *new_node) { DECL_EXTERNAL (new_node->decl) = 0; diff --git a/gcc/cobol/ChangeLog b/gcc/cobol/ChangeLog index f537f05..59a1107 100644 --- a/gcc/cobol/ChangeLog +++ b/gcc/cobol/ChangeLog @@ -1,3 +1,10 @@ +2025-04-15 Richard Biener <rguenther@suse.de> + + PR cobol/119302 + * Make-lang.in (GCOBOLIO_INSTALL_NAME): Define. + Use $(GCOBOLIO_INSTALL_NAME) for gcobol.3 manpage source + upon install. + 2025-04-14 Jakub Jelinek <jakub@redhat.com> PR cobol/119776 diff --git a/gcc/cobol/Make-lang.in b/gcc/cobol/Make-lang.in index 422ebe2..9b74dd3 100644 --- a/gcc/cobol/Make-lang.in +++ b/gcc/cobol/Make-lang.in @@ -35,6 +35,7 @@ # - define the names for selecting the language in LANGUAGES. GCOBOL_INSTALL_NAME := $(shell echo gcobol|sed '$(program_transform_name)') +GCOBOLIO_INSTALL_NAME := $(shell echo gcobol-io|sed '$(program_transform_name)') GCOBOL_TARGET_INSTALL_NAME := $(target_noncanonical)-$(shell echo gcobol|sed '$(program_transform_name)') GCOBC_INSTALL_NAME := $(shell echo gcobc|sed '$(program_transform_name)') @@ -293,7 +294,7 @@ cobol.install-common: installdirs cobol.install-man: installdirs $(INSTALL_DATA) $(srcdir)/cobol/gcobol.1 $(DESTDIR)$(man1dir)/$(GCOBOL_INSTALL_NAME)$(man1ext) - $(INSTALL_DATA) $(srcdir)/cobol/gcobol.3 $(DESTDIR)$(man3dir)/ + $(INSTALL_DATA) $(srcdir)/cobol/gcobol.3 $(DESTDIR)$(man3dir)/$(GCOBOLIO_INSTALL_NAME)$(man3ext) cobol.install-info: @@ -342,8 +343,8 @@ cobol.uninstall: rm -rf $(DESTDIR)$(bindir)/$(GCOBOL_INSTALL_NAME)$(exeext) \ $(DESTDIR)$(bindir)/$(GCOBC_INSTALL_NAME) \ $(DESTDIR)$(datadir)/gcobol/ \ - $(DESTDIR)$(man1dir)/$(GCOBOL_INSTALL_NAME).1 \ - $(DESTDIR)$(man3dir)/gcobol.3 + $(DESTDIR)$(man1dir)/$(GCOBOL_INSTALL_NAME)$(man1ext) \ + $(DESTDIR)$(man3dir)/$(GCOBOLIO_INSTALL_NAME)$(man3ext) cobol.man: cobol.srcman: diff --git a/gcc/common.opt b/gcc/common.opt index 2c8fdde..88d987e 100644 --- a/gcc/common.opt +++ b/gcc/common.opt @@ -2116,6 +2116,10 @@ fipa-modref Common Var(flag_ipa_modref) Optimization Perform interprocedural modref analysis. +fipa-reorder-for-locality +Common Var(flag_ipa_reorder_for_locality) Init(0) Optimization +Perform reordering and cloning of functions to maximize locality. + fipa-profile Common Var(flag_ipa_profile) Init(0) Optimization Perform interprocedural profile propagation. @@ -2275,6 +2279,9 @@ Enum Name(lto_partition_model) Type(enum lto_partition_model) UnknownError(unknown LTO partitioning model %qs) EnumValue +Enum(lto_partition_model) String(default) Value(LTO_PARTITION_DEFAULT) + +EnumValue Enum(lto_partition_model) String(none) Value(LTO_PARTITION_NONE) EnumValue @@ -2293,7 +2300,7 @@ EnumValue Enum(lto_partition_model) String(cache) Value(LTO_PARTITION_CACHE) flto-partition= -Common Joined RejectNegative Enum(lto_partition_model) Var(flag_lto_partition) Init(LTO_PARTITION_BALANCED) +Common Joined RejectNegative Enum(lto_partition_model) Var(flag_lto_partition) Init(LTO_PARTITION_DEFAULT) Specify the algorithm to partition symbols and vars at linktime. ; The initial value of -1 comes from Z_DEFAULT_COMPRESSION in zlib.h. diff --git a/gcc/common.opt.urls b/gcc/common.opt.urls index a4b14f5..8bd75b1 100644 --- a/gcc/common.opt.urls +++ b/gcc/common.opt.urls @@ -868,6 +868,9 @@ UrlSuffix(gcc/Optimize-Options.html#index-fipa-bit-cp) fipa-modref UrlSuffix(gcc/Optimize-Options.html#index-fipa-modref) +fipa-reorder-for-locality +UrlSuffix(gcc/Optimize-Options.html#index-fipa-reorder-for-locality) + fipa-profile UrlSuffix(gcc/Optimize-Options.html#index-fipa-profile) diff --git a/gcc/config/aarch64/aarch64-sve.md b/gcc/config/aarch64/aarch64-sve.md index 3dbd659..d4af370 100644 --- a/gcc/config/aarch64/aarch64-sve.md +++ b/gcc/config/aarch64/aarch64-sve.md @@ -3133,9 +3133,9 @@ "TARGET_SVE" { rtx tmp = gen_reg_rtx (<MODE>mode); - emit_insn (gen_vcond_mask_<mode><vpred> (tmp, operands[1], - CONST1_RTX (<MODE>mode), - CONST0_RTX (<MODE>mode))); + emit_insn (gen_vcond_mask_<mode><vpred> (tmp, CONST1_RTX (<MODE>mode), + CONST0_RTX (<MODE>mode), + operands[1])); emit_insn (gen_vec_extract<mode><Vel> (operands[0], tmp, operands[2])); DONE; } diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc index 4e80114..433ec97 100644 --- a/gcc/config/aarch64/aarch64.cc +++ b/gcc/config/aarch64/aarch64.cc @@ -31073,8 +31073,6 @@ aarch64_valid_sysreg_name_p (const char *regname) const sysreg_t *sysreg = aarch64_lookup_sysreg_map (regname); if (sysreg == NULL) return aarch64_is_implem_def_reg (regname); - if (sysreg->arch_reqs) - return bool (aarch64_isa_flags & sysreg->arch_reqs); return true; } @@ -31098,8 +31096,6 @@ aarch64_retrieve_sysreg (const char *regname, bool write_p, bool is128op) if ((write_p && (sysreg->properties & F_REG_READ)) || (!write_p && (sysreg->properties & F_REG_WRITE))) return NULL; - if ((~aarch64_isa_flags & sysreg->arch_reqs) != 0) - return NULL; return sysreg->encoding; } diff --git a/gcc/config/i386/x86-tune-costs.h b/gcc/config/i386/x86-tune-costs.h index 7c8cb73..9477345 100644 --- a/gcc/config/i386/x86-tune-costs.h +++ b/gcc/config/i386/x86-tune-costs.h @@ -2120,7 +2120,7 @@ struct processor_costs znver5_cost = { COSTS_N_INSNS (1), /* cost of cheap SSE instruction. */ /* ADDSS has throughput 2 and latency 2 (in some cases when source is another addition). */ - COSTS_N_INSNS (2), /* cost of ADDSS/SD SUBSS/SD insns. */ + COSTS_N_INSNS (3), /* cost of ADDSS/SD SUBSS/SD insns. */ /* MULSS has throughput 2 and latency 3. */ COSTS_N_INSNS (3), /* cost of MULSS instruction. */ COSTS_N_INSNS (3), /* cost of MULSD instruction. */ diff --git a/gcc/config/i386/x86-tune-sched.cc b/gcc/config/i386/x86-tune-sched.cc index 685a83c..15d3d91 100644 --- a/gcc/config/i386/x86-tune-sched.cc +++ b/gcc/config/i386/x86-tune-sched.cc @@ -81,6 +81,14 @@ ix86_issue_rate (void) case PROCESSOR_YONGFENG: case PROCESSOR_SHIJIDADAO: case PROCESSOR_GENERIC: + /* For znver5 decoder can handle 4 or 8 instructions per cycle, + op cache 12 instruction/cycle, dispatch 8 instructions + integer rename 8 instructions and Fp 6 instructions. + + The scheduler, without understanding out of order nature of the CPU + is not going to be able to use more than 4 instructions since that + is limits of the decoders. */ + case PROCESSOR_ZNVER5: return 4; case PROCESSOR_ICELAKE_CLIENT: @@ -91,13 +99,6 @@ ix86_issue_rate (void) return 5; case PROCESSOR_SAPPHIRERAPIDS: - /* For znver5 decoder can handle 4 or 8 instructions per cycle, - op cache 12 instruction/cycle, dispatch 8 instructions - integer rename 8 instructions and Fp 6 instructions. - - The scheduler, without understanding out of order nature of the CPU - is unlikely going to be able to fill all of these. */ - case PROCESSOR_ZNVER5: return 6; default: diff --git a/gcc/config/riscv/riscv-vsetvl.cc b/gcc/config/riscv/riscv-vsetvl.cc index 0ac2538..a8c9256 100644 --- a/gcc/config/riscv/riscv-vsetvl.cc +++ b/gcc/config/riscv/riscv-vsetvl.cc @@ -685,7 +685,7 @@ invalid_opt_bb_p (basic_block cfg_bb) /* We only do LCM optimizations on blocks that are post dominated by EXIT block, that is, we don't do LCM optimizations on infinite loop. */ FOR_EACH_EDGE (e, ei, cfg_bb->succs) - if (e->flags & EDGE_FAKE) + if ((e->flags & EDGE_FAKE) || (e->flags & EDGE_ABNORMAL)) return true; return false; @@ -2698,6 +2698,7 @@ pre_vsetvl::compute_lcm_local_properties () m_avout = sbitmap_vector_alloc (last_basic_block_for_fn (cfun), num_exprs); bitmap_vector_clear (m_avloc, last_basic_block_for_fn (cfun)); + bitmap_vector_clear (m_kill, last_basic_block_for_fn (cfun)); bitmap_vector_clear (m_antloc, last_basic_block_for_fn (cfun)); bitmap_vector_ones (m_transp, last_basic_block_for_fn (cfun)); @@ -2749,6 +2750,10 @@ pre_vsetvl::compute_lcm_local_properties () if (invalid_opt_bb_p (bb->cfg_bb ())) { + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, "\n --- skipping bb %u due to weird edge", + bb->index ()); + bitmap_clear (m_antloc[bb_index]); bitmap_clear (m_transp[bb_index]); } @@ -3022,6 +3027,18 @@ pre_vsetvl::earliest_fuse_vsetvl_info (int iter) continue; } + /* We cannot lift a vsetvl into the source block if the block is + not transparent WRT to it. + This is too restrictive for blocks where a register's use only + feeds into vsetvls and no regular insns. One example is the + test rvv/vsetvl/avl_single-68.c which is currently XFAILed for + that reason. + In order to support this case we'd need to check the vsetvl's + AVL operand's uses in the source block and make sure they are + only used in other vsetvls. */ + if (!bitmap_bit_p (m_transp[eg->src->index], expr_index)) + continue; + if (dump_file && (dump_flags & TDF_DETAILS)) { fprintf (dump_file, diff --git a/gcc/configure b/gcc/configure index 821f8b4..1696595 100755 --- a/gcc/configure +++ b/gcc/configure @@ -3948,7 +3948,7 @@ if test x"${DEFAULT_LINKER+set}" = x"set"; then as_fn_error $? "cannot execute: $DEFAULT_LINKER: check --with-ld or env. var. DEFAULT_LINKER" "$LINENO" 5 elif $DEFAULT_LINKER -v < /dev/null 2>&1 | grep GNU > /dev/null; then gnu_ld_flag=yes - elif $DEFAULT_LINKER -v < /dev/null 2>&1 | grep ld64- > /dev/null; then + elif $DEFAULT_LINKER -v < /dev/null 2>&1 | grep 'PROJECT:ld\(64\)*-' > /dev/null; then ld64_flag=yes fi @@ -32730,8 +32730,9 @@ $as_echo "$gcc_cv_ld64_major" >&6; } { $as_echo "$as_me:${as_lineno-$LINENO}: checking linker version" >&5 $as_echo_n "checking linker version... " >&6; } if test x"${gcc_cv_ld64_version}" = x; then - gcc_cv_ld64_version=`$gcc_cv_ld -v 2>&1 | $EGREP 'ld64|dyld' \ - | sed -e 's/.*ld64-//' -e 's/.*dyld-//'| awk '{print $1}'` + gcc_cv_ld64_version=`$gcc_cv_ld -v 2>&1 | $EGREP 'ld64|dyld|PROJECT:ld' \ + | sed -e 's/.*ld64-//' -e 's/.*dyld-//' -e 's/.*PROJECT:ld-//' \ + | awk '{print $1}'` fi { $as_echo "$as_me:${as_lineno-$LINENO}: result: $gcc_cv_ld64_version" >&5 $as_echo "$gcc_cv_ld64_version" >&6; } diff --git a/gcc/configure.ac b/gcc/configure.ac index 3d0a4e6..9f67e62 100644 --- a/gcc/configure.ac +++ b/gcc/configure.ac @@ -358,7 +358,7 @@ if test x"${DEFAULT_LINKER+set}" = x"set"; then AC_MSG_ERROR([cannot execute: $DEFAULT_LINKER: check --with-ld or env. var. DEFAULT_LINKER]) elif $DEFAULT_LINKER -v < /dev/null 2>&1 | grep GNU > /dev/null; then gnu_ld_flag=yes - elif $DEFAULT_LINKER -v < /dev/null 2>&1 | grep ld64- > /dev/null; then + elif $DEFAULT_LINKER -v < /dev/null 2>&1 | grep 'PROJECT:ld\(64\)*-' > /dev/null; then ld64_flag=yes fi AC_DEFINE_UNQUOTED(DEFAULT_LINKER,"$DEFAULT_LINKER", @@ -6418,8 +6418,9 @@ if test x"$ld64_flag" = x"yes"; then # If the version was not specified, try to find it. AC_MSG_CHECKING(linker version) if test x"${gcc_cv_ld64_version}" = x; then - gcc_cv_ld64_version=`$gcc_cv_ld -v 2>&1 | $EGREP 'ld64|dyld' \ - | sed -e 's/.*ld64-//' -e 's/.*dyld-//'| awk '{print $1}'` + gcc_cv_ld64_version=`$gcc_cv_ld -v 2>&1 | $EGREP 'ld64|dyld|PROJECT:ld' \ + | sed -e 's/.*ld64-//' -e 's/.*dyld-//' -e 's/.*PROJECT:ld-//' \ + | awk '{print $1}'` fi AC_MSG_RESULT($gcc_cv_ld64_version) diff --git a/gcc/cp/ChangeLog b/gcc/cp/ChangeLog index 94470dc..aa2f076 100644 --- a/gcc/cp/ChangeLog +++ b/gcc/cp/ChangeLog @@ -1,3 +1,29 @@ +2025-04-15 Nathaniel Shead <nathanieloshead@gmail.com> + + PR c++/119755 + * lambda.cc (prune_lambda_captures): Remove pruned capture from + function's BLOCK_VARS and BIND_EXPR_VARS. + +2025-04-15 Jason Merrill <jason@redhat.com> + + PR c++/111075 + * constexpr.cc (cxx_eval_call_expression): Allow trivial + call from a thunk. + +2025-04-15 Patrick Palka <ppalka@redhat.com> + + PR c++/119807 + PR c++/112288 + * pt.cc (tsubst_friend_function): Skip remapping an + existing specialization if it doesn't match the shape of + the new friend definition. + +2025-04-15 Jason Merrill <jason@redhat.com> + + PR c++/113835 + * constexpr.cc (cxx_eval_outermost_constant_expr): Bail out early + for std::vector(N). + 2025-04-14 Patrick Palka <ppalka@redhat.com> PR c++/99214 diff --git a/gcc/cp/constexpr.cc b/gcc/cp/constexpr.cc index dc59f59..4346b29 100644 --- a/gcc/cp/constexpr.cc +++ b/gcc/cp/constexpr.cc @@ -3103,6 +3103,9 @@ cxx_eval_call_expression (const constexpr_ctx *ctx, tree t, we can only get a trivial function here with -fno-elide-constructors. */ gcc_checking_assert (!trivial_fn_p (fun) || !flag_elide_constructors + /* Or it's a call from maybe_thunk_body (111075). */ + || (TREE_CODE (t) == CALL_EXPR ? CALL_FROM_THUNK_P (t) + : AGGR_INIT_FROM_THUNK_P (t)) /* We don't elide constructors when processing a noexcept-expression. */ || cp_noexcept_operand); diff --git a/gcc/cp/lambda.cc b/gcc/cp/lambda.cc index f0a54b6..b2e0ecd 100644 --- a/gcc/cp/lambda.cc +++ b/gcc/cp/lambda.cc @@ -1858,6 +1858,13 @@ prune_lambda_captures (tree body) cp_walk_tree_without_duplicates (&body, mark_const_cap_r, &const_vars); + tree bind_expr = expr_single (DECL_SAVED_TREE (lambda_function (lam))); + if (bind_expr && TREE_CODE (bind_expr) == MUST_NOT_THROW_EXPR) + bind_expr = expr_single (TREE_OPERAND (bind_expr, 0)); + /* FIXME: We don't currently handle noexcept lambda captures correctly, + so bind_expr may not be set; see PR c++/119764. */ + gcc_assert (!bind_expr || TREE_CODE (bind_expr) == BIND_EXPR); + tree *fieldp = &TYPE_FIELDS (LAMBDA_EXPR_CLOSURE (lam)); for (tree *capp = &LAMBDA_EXPR_CAPTURE_LIST (lam); *capp; ) { @@ -1879,6 +1886,23 @@ prune_lambda_captures (tree body) fieldp = &DECL_CHAIN (*fieldp); *fieldp = DECL_CHAIN (*fieldp); + /* And out of the bindings for the function. */ + tree *blockp = &BLOCK_VARS (current_binding_level->blocks); + while (*blockp != DECL_EXPR_DECL (**use)) + blockp = &DECL_CHAIN (*blockp); + *blockp = DECL_CHAIN (*blockp); + + /* And maybe out of the vars declared in the containing + BIND_EXPR, if it's listed there. */ + if (bind_expr) + { + tree *bindp = &BIND_EXPR_VARS (bind_expr); + while (*bindp && *bindp != DECL_EXPR_DECL (**use)) + bindp = &DECL_CHAIN (*bindp); + if (*bindp) + *bindp = DECL_CHAIN (*bindp); + } + /* And remove the capture proxy declaration. */ **use = void_node; continue; diff --git a/gcc/cp/pt.cc b/gcc/cp/pt.cc index b7060b4..4349b19 100644 --- a/gcc/cp/pt.cc +++ b/gcc/cp/pt.cc @@ -11772,6 +11772,10 @@ tsubst_friend_function (tree decl, tree args) elt.args = DECL_TI_ARGS (spec); elt.spec = NULL_TREE; + if (TMPL_ARGS_HAVE_MULTIPLE_LEVELS (DECL_TI_ARGS (spec)) + && !is_specialization_of_friend (spec, new_template)) + continue; + decl_specializations->remove_elt (&elt); tree& spec_args = DECL_TI_ARGS (spec); diff --git a/gcc/d/ChangeLog b/gcc/d/ChangeLog index b0a4f12..b025453 100644 --- a/gcc/d/ChangeLog +++ b/gcc/d/ChangeLog @@ -1,3 +1,22 @@ +2025-04-15 Iain Buclaw <ibuclaw@gdcproject.org> + + PR d/119826 + * types.cc (TypeVisitor::visit (TypeEnum *)): Propagate flags of main + enum types to all forward-referenced variants. + +2025-04-15 Iain Buclaw <ibuclaw@gdcproject.org> + + PR d/119799 + * decl.cc (DeclVisitor::visit (VarDeclaration *)): Check front-end + type size before building the VAR_DECL. Allow C symbols to have a + size of `0'. + +2025-04-15 Iain Buclaw <ibuclaw@gdcproject.org> + + PR d/119817 + * imports.cc (ImportVisitor::visit (OverloadSet *)): Don't push + NULL_TREE to vector of import symbols. + 2025-04-12 Iain Buclaw <ibuclaw@gdcproject.org> PR d/109023 diff --git a/gcc/d/decl.cc b/gcc/d/decl.cc index 136f78b..9ddf7cf 100644 --- a/gcc/d/decl.cc +++ b/gcc/d/decl.cc @@ -791,6 +791,12 @@ public: } else if (d->isDataseg ()) { + /* When the front-end type size is invalid, an error has already been + given for the declaration or type. */ + dinteger_t size = dmd::size (d->type, d->loc); + if (size == SIZE_INVALID) + return; + tree decl = get_symbol_decl (d); /* Only need to build the VAR_DECL for extern declarations. */ @@ -804,9 +810,7 @@ public: return; /* How big a symbol can be should depend on back-end. */ - tree size = build_integer_cst (dmd::size (d->type, d->loc), - build_ctype (Type::tsize_t)); - if (!valid_constant_size_p (size)) + if (!valid_constant_size_p (build_integer_cst (size, size_type_node))) { error_at (make_location_t (d->loc), "size is too large"); return; @@ -835,8 +839,9 @@ public: } /* Frontend should have already caught this. */ - gcc_assert (!integer_zerop (size) - || d->type->toBasetype ()->isTypeSArray ()); + gcc_assert ((size != 0 && size != SIZE_INVALID) + || d->type->toBasetype ()->isTypeSArray () + || d->isCsymbol ()); d_finish_decl (decl); diff --git a/gcc/d/imports.cc b/gcc/d/imports.cc index 776caaf..16e4df6 100644 --- a/gcc/d/imports.cc +++ b/gcc/d/imports.cc @@ -182,7 +182,11 @@ public: vec_alloc (tset, d->a.length); for (size_t i = 0; i < d->a.length; i++) - vec_safe_push (tset, build_import_decl (d->a[i])); + { + tree overload = build_import_decl (d->a[i]); + if (overload != NULL_TREE) + vec_safe_push (tset, overload); + } this->result_ = build_tree_list_vec (tset); tset->truncate (0); diff --git a/gcc/d/types.cc b/gcc/d/types.cc index e43fa88..1c74840 100644 --- a/gcc/d/types.cc +++ b/gcc/d/types.cc @@ -1179,6 +1179,26 @@ public: layout_type (t->ctype); + /* Fix up all forward-referenced variants of this enum type. */ + for (tree v = TYPE_MAIN_VARIANT (t->ctype); v; + v = TYPE_NEXT_VARIANT (v)) + { + if (v == t->ctype) + continue; + + TYPE_VALUES (v) = TYPE_VALUES (t->ctype); + TYPE_LANG_SPECIFIC (v) = TYPE_LANG_SPECIFIC (t->ctype); + TYPE_MIN_VALUE (v) = TYPE_MIN_VALUE (t->ctype); + TYPE_MAX_VALUE (v) = TYPE_MAX_VALUE (t->ctype); + TYPE_UNSIGNED (v) = TYPE_UNSIGNED (t->ctype); + TYPE_SIZE (v) = TYPE_SIZE (t->ctype); + TYPE_SIZE_UNIT (v) = TYPE_SIZE_UNIT (t->ctype); + SET_TYPE_MODE (v, TYPE_MODE (t->ctype)); + TYPE_PRECISION (v) = TYPE_PRECISION (t->ctype); + SET_TYPE_ALIGN (v, TYPE_ALIGN (t->ctype)); + TYPE_USER_ALIGN (v) = TYPE_USER_ALIGN (t->ctype); + } + /* Complete forward-referenced fields of this enum type. */ finish_incomplete_fields (t->ctype); } diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi index 1f7657b..0b6644b 100644 --- a/gcc/doc/invoke.texi +++ b/gcc/doc/invoke.texi @@ -593,7 +593,7 @@ Objective-C and Objective-C++ Dialects}. -finline-functions -finline-functions-called-once -finline-limit=@var{n} -finline-small-functions -fipa-modref -fipa-cp -fipa-cp-clone -fipa-bit-cp -fipa-vrp -fipa-pta -fipa-profile -fipa-pure-const --fipa-reference -fipa-reference-addressable +-fipa-reference -fipa-reference-addressable -fipa-reorder-for-locality -fipa-stack-alignment -fipa-icf -fira-algorithm=@var{algorithm} -flate-combine-instructions -flifetime-dse -flive-patching=@var{level} -fira-region=@var{region} -fira-hoist-pressure @@ -12746,6 +12746,7 @@ complexity than at @option{-O}. -fipa-pure-const -fipa-reference -fipa-reference-addressable +-fivopts -fmerge-constants -fmove-loop-invariants -fmove-loop-stores @@ -12854,6 +12855,13 @@ by @option{-O2} and also turns on the following optimization flags: Reduce compilation time and make debugging produce the expected results. This is the default. +At @option{-O0}, GCC completely disables most optimization passes; +they are not run even if you explicitly enable them on the command +line, or are listed by @option{-Q --help=optimizers} as being enabled by +default. Many optimizations performed by GCC depend on code analysis +or canonicalization passes that are enabled by @option{-O}, and it would +not be useful to run individual optimization passes in isolation. + @opindex Os @item -Os Optimize for size. @option{-Os} enables all @option{-O2} optimizations @@ -13871,6 +13879,21 @@ Enabled by default at @option{-O1} and higher. Discover read-only, write-only and non-addressable static variables. Enabled by default at @option{-O1} and higher. +@opindex fipa-reorder-for-locality +@item -fipa-reorder-for-locality +Group call chains close together in the binary layout to improve code +locality and minimize jump distances between frequently called functions. +Unlike @option{-freorder-functions} this pass considers the call +chains between functions and groups them together, rather than grouping all +hot/normal/cold/never-executed functions into separate sections. +Unlike @option{-fprofile-reorder-functions} it aims to improve code locality +throughout the runtime of the program rather than focusing on program startup. +This option is incompatible with an explicit +@option{-flto-partition=} option since it enforces a custom partitioning +scheme. +If using this option it is recommended to also use profile feedback, but this +option is not enabled by default otherwise. + @opindex fipa-stack-alignment @item -fipa-stack-alignment Reduce stack alignment on call sites if possible. @@ -14291,6 +14314,7 @@ Enabled by default at @option{-O1} and higher. @item -fivopts Perform induction variable optimizations (strength reduction, induction variable merging and induction variable elimination) on trees. +Enabled by default at @option{-O1} and higher. @opindex ftree-parallelize-loops @item -ftree-parallelize-loops=n @@ -14606,11 +14630,13 @@ Enabled for x86 at levels @option{-O2}, @option{-O3}, @option{-Os}. @opindex freorder-functions @item -freorder-functions Reorder functions in the object file in order to -improve code locality. This is implemented by using special -subsections @code{.text.hot} for most frequently executed functions and -@code{.text.unlikely} for unlikely executed functions. Reordering is done by -the linker so object file format must support named sections and linker must -place them in a reasonable way. +improve code locality. Unlike @option{-fipa-reorder-for-locality} this option +prioritises grouping all functions within a category +(hot/normal/cold/never-executed) together. +This is implemented by using special subsections @code{.text.hot} for most +frequently executed functions and @code{.text.unlikely} for unlikely executed +functions. Reordering is done by the linker so object file format must support +named sections and linker must place them in a reasonable way. This option isn't effective unless you either provide profile feedback (see @option{-fprofile-arcs} for details) or manually annotate functions with @@ -15635,7 +15661,8 @@ Enabled by @option{-fprofile-generate}, @option{-fprofile-use}, and @item -fprofile-reorder-functions Function reordering based on profile instrumentation collects first time of execution of a function and orders these functions -in ascending order. +in ascending order, aiming to optimize program startup through more +efficient loading of text segments. Enabled with @option{-fprofile-use}. diff --git a/gcc/flag-types.h b/gcc/flag-types.h index 0127698..db57376 100644 --- a/gcc/flag-types.h +++ b/gcc/flag-types.h @@ -404,7 +404,15 @@ enum lto_partition_model { LTO_PARTITION_BALANCED = 2, LTO_PARTITION_1TO1 = 3, LTO_PARTITION_MAX = 4, - LTO_PARTITION_CACHE = 5 + LTO_PARTITION_CACHE = 5, + LTO_PARTITION_DEFAULT= 6 +}; + +/* flag_lto_locality_cloning initialization values. */ +enum lto_locality_cloning_model { + LTO_LOCALITY_NO_CLONING = 0, + LTO_LOCALITY_NON_INTERPOSABLE_CLONING = 1, + LTO_LOCALITY_MAXIMAL_CLONING = 2, }; /* flag_lto_linker_output initialization values. */ diff --git a/gcc/fortran/ChangeLog b/gcc/fortran/ChangeLog index a10d8df..55bff2e 100644 --- a/gcc/fortran/ChangeLog +++ b/gcc/fortran/ChangeLog @@ -1,3 +1,32 @@ +2025-04-15 Tobias Burnus <tburnus@baylibre.com> + + * f95-lang.cc (LANG_HOOKS_OMP_DEEP_MAPPING, + LANG_HOOKS_OMP_DEEP_MAPPING_P, LANG_HOOKS_OMP_DEEP_MAPPING_CNT): + Define. + * openmp.cc (gfc_match_omp_clause_reduction): Fix location setting. + (resolve_omp_clauses): Permit allocatable components, reject + them and polymorphic variables in PRIVATE/FIRSTPRIVATE. + * trans-decl.cc (add_clause): Set clause location. + * trans-openmp.cc (gfc_has_alloc_comps): Add ptr_ok and + shallow_alloc_only Boolean arguments. + (gfc_omp_replace_alloc_by_to_mapping): New. + (gfc_omp_private_outer_ref, gfc_walk_alloc_comps, + gfc_omp_clause_default_ctor, gfc_omp_clause_copy_ctor, + gfc_omp_clause_assign_op, gfc_omp_clause_dtor): Update call to it. + (gfc_omp_finish_clause): Minor cleanups, improve location data, + handle allocatable components. + (gfc_omp_deep_mapping_map, gfc_omp_deep_mapping_item, + gfc_omp_deep_mapping_comps, gfc_omp_gen_simple_loop, + gfc_omp_get_array_size, gfc_omp_elmental_loop, + gfc_omp_deep_map_kind_p, gfc_omp_deep_mapping_int_p, + gfc_omp_deep_mapping_p, gfc_omp_deep_mapping_do, + gfc_omp_deep_mapping_cnt, gfc_omp_deep_mapping): New. + (gfc_trans_omp_array_section): Save array descriptor in case + deep-mapping lang hook will need it. + (gfc_trans_omp_clauses): Likewise; use better clause location data. + * trans.h (gfc_omp_deep_mapping_p, gfc_omp_deep_mapping_cnt, + gfc_omp_deep_mapping): Add function prototypes. + 2025-04-13 Thomas Koenig <tkoenig@gcc.gnu.org> PR fortran/119669 diff --git a/gcc/fortran/f95-lang.cc b/gcc/fortran/f95-lang.cc index 124d62f..1f09553 100644 --- a/gcc/fortran/f95-lang.cc +++ b/gcc/fortran/f95-lang.cc @@ -148,6 +148,9 @@ gfc_get_sarif_source_language (const char *) #undef LANG_HOOKS_OMP_CLAUSE_LINEAR_CTOR #undef LANG_HOOKS_OMP_CLAUSE_DTOR #undef LANG_HOOKS_OMP_FINISH_CLAUSE +#undef LANG_HOOKS_OMP_DEEP_MAPPING +#undef LANG_HOOKS_OMP_DEEP_MAPPING_P +#undef LANG_HOOKS_OMP_DEEP_MAPPING_CNT #undef LANG_HOOKS_OMP_ALLOCATABLE_P #undef LANG_HOOKS_OMP_SCALAR_TARGET_P #undef LANG_HOOKS_OMP_SCALAR_P @@ -188,6 +191,9 @@ gfc_get_sarif_source_language (const char *) #define LANG_HOOKS_OMP_CLAUSE_LINEAR_CTOR gfc_omp_clause_linear_ctor #define LANG_HOOKS_OMP_CLAUSE_DTOR gfc_omp_clause_dtor #define LANG_HOOKS_OMP_FINISH_CLAUSE gfc_omp_finish_clause +#define LANG_HOOKS_OMP_DEEP_MAPPING gfc_omp_deep_mapping +#define LANG_HOOKS_OMP_DEEP_MAPPING_P gfc_omp_deep_mapping_p +#define LANG_HOOKS_OMP_DEEP_MAPPING_CNT gfc_omp_deep_mapping_cnt #define LANG_HOOKS_OMP_ALLOCATABLE_P gfc_omp_allocatable_p #define LANG_HOOKS_OMP_SCALAR_P gfc_omp_scalar_p #define LANG_HOOKS_OMP_SCALAR_TARGET_P gfc_omp_scalar_target_p diff --git a/gcc/fortran/openmp.cc b/gcc/fortran/openmp.cc index ded80b7..df82940 100644 --- a/gcc/fortran/openmp.cc +++ b/gcc/fortran/openmp.cc @@ -1588,7 +1588,7 @@ gfc_match_omp_clause_reduction (char pc, gfc_omp_clauses *c, bool openacc, { gfc_omp_namelist *p = gfc_get_omp_namelist (), **tl; p->sym = n->sym; - p->where = p->where; + p->where = n->where; p->u.map.op = OMP_MAP_ALWAYS_TOFROM; tl = &c->lists[OMP_LIST_MAP]; @@ -9681,22 +9681,6 @@ resolve_omp_clauses (gfc_code *code, gfc_omp_clauses *omp_clauses, && n->sym->as->type == AS_ASSUMED_SIZE) gfc_error ("Assumed size array %qs in %s clause at %L", n->sym->name, name, &n->where); - if (!openacc - && list == OMP_LIST_MAP - && n->sym->ts.type == BT_DERIVED - && n->sym->ts.u.derived->attr.alloc_comp) - gfc_error ("List item %qs with allocatable components is not " - "permitted in map clause at %L", n->sym->name, - &n->where); - if (!openacc - && (list == OMP_LIST_MAP - || list == OMP_LIST_FROM - || list == OMP_LIST_TO) - && ((n->expr && n->expr->ts.type == BT_CLASS) - || (!n->expr && n->sym->ts.type == BT_CLASS))) - gfc_warning (OPT_Wopenmp, - "Mapping polymorphic list item at %L is " - "unspecified behavior", &n->where); if (list == OMP_LIST_MAP && !openacc) switch (code->op) { @@ -10008,9 +9992,11 @@ resolve_omp_clauses (gfc_code *code, gfc_omp_clauses *omp_clauses, n->sym->name, name, &n->where); if (!openacc - && list == OMP_LIST_FIRSTPRIVATE - && ((n->expr && n->expr->ts.type == BT_CLASS) - || (!n->expr && n->sym->ts.type == BT_CLASS))) + && (list == OMP_LIST_PRIVATE + || list == OMP_LIST_FIRSTPRIVATE) + && ((n->sym->ts.type == BT_DERIVED + && n->sym->ts.u.derived->attr.alloc_comp) + || n->sym->ts.type == BT_CLASS)) switch (code->op) { case EXEC_OMP_TARGET: @@ -10025,9 +10011,19 @@ resolve_omp_clauses (gfc_code *code, gfc_omp_clauses *omp_clauses, case EXEC_OMP_TARGET_TEAMS_DISTRIBUTE_PARALLEL_DO_SIMD: case EXEC_OMP_TARGET_TEAMS_DISTRIBUTE_SIMD: case EXEC_OMP_TARGET_TEAMS_LOOP: - gfc_warning (OPT_Wopenmp, - "FIRSTPRIVATE with polymorphic list item at " - "%L is unspecified behavior", &n->where); + if (n->sym->ts.type == BT_DERIVED + && n->sym->ts.u.derived->attr.alloc_comp) + gfc_error ("Sorry, list item %qs at %L with allocatable" + " components is not yet supported in %s " + "clause", n->sym->name, &n->where, + list == OMP_LIST_PRIVATE ? "PRIVATE" + : "FIRSTPRIVATE"); + else + gfc_error ("Polymorphic list item %qs at %L in %s " + "clause has unspecified behavior and " + "unsupported", n->sym->name, &n->where, + list == OMP_LIST_PRIVATE ? "PRIVATE" + : "FIRSTPRIVATE"); break; default: break; diff --git a/gcc/fortran/trans-decl.cc b/gcc/fortran/trans-decl.cc index aea132d..ddc4960 100644 --- a/gcc/fortran/trans-decl.cc +++ b/gcc/fortran/trans-decl.cc @@ -6920,6 +6920,7 @@ add_clause (gfc_symbol *sym, gfc_omp_map_op map_op) n = gfc_get_omp_namelist (); n->sym = sym; + n->where = sym->declared_at; n->u.map.op = map_op; if (!module_oacc_clauses) diff --git a/gcc/fortran/trans-openmp.cc b/gcc/fortran/trans-openmp.cc index 03d9432..0b8150f 100644 --- a/gcc/fortran/trans-openmp.cc +++ b/gcc/fortran/trans-openmp.cc @@ -25,6 +25,10 @@ along with GCC; see the file COPYING3. If not see #include "options.h" #include "tree.h" #include "gfortran.h" +#include "basic-block.h" +#include "tree-ssa.h" +#include "function.h" +#include "gimple.h" #include "gimple-expr.h" #include "trans.h" #include "stringpool.h" @@ -41,6 +45,8 @@ along with GCC; see the file COPYING3. If not see #include "omp-low.h" #include "memmodel.h" /* For MEMMODEL_ enums. */ #include "dependency.h" +#include "gimple-iterator.h" /* For gsi_iterator_update. */ +#include "gimplify-me.h" /* For force_gimple_operand. */ #undef GCC_DIAG_STYLE #define GCC_DIAG_STYLE __gcc_tdiag__ @@ -375,22 +381,28 @@ gfc_omp_report_decl (tree decl) return decl; } -/* Return true if TYPE has any allocatable components. */ +/* Return true if TYPE has any allocatable components; + if ptr_ok, the decl itself is permitted to have the POINTER attribute. + if shallow_alloc_only, returns only true if any of the fields is an + allocatable; called with true by gfc_omp_replace_alloc_by_to_mapping. */ static bool -gfc_has_alloc_comps (tree type, tree decl) +gfc_has_alloc_comps (tree type, tree decl, bool ptr_ok, + bool shallow_alloc_only=false) { tree field, ftype; if (POINTER_TYPE_P (type)) { - if (GFC_DECL_GET_SCALAR_ALLOCATABLE (decl)) + if (GFC_DECL_GET_SCALAR_ALLOCATABLE (decl) + || (ptr_ok && GFC_DECL_GET_SCALAR_POINTER (decl))) type = TREE_TYPE (type); else if (GFC_DECL_GET_SCALAR_POINTER (decl)) return false; } - if (GFC_DESCRIPTOR_TYPE_P (type) + if (!ptr_ok + && GFC_DESCRIPTOR_TYPE_P (type) && (GFC_TYPE_ARRAY_AKIND (type) == GFC_ARRAY_POINTER || GFC_TYPE_ARRAY_AKIND (type) == GFC_ARRAY_POINTER_CONT)) return false; @@ -409,12 +421,51 @@ gfc_has_alloc_comps (tree type, tree decl) if (GFC_DESCRIPTOR_TYPE_P (ftype) && GFC_TYPE_ARRAY_AKIND (ftype) == GFC_ARRAY_ALLOCATABLE) return true; - if (gfc_has_alloc_comps (ftype, field)) + if (!shallow_alloc_only + && gfc_has_alloc_comps (ftype, field, false)) return true; } return false; } +/* gfc_omp_replace_alloc_by_to_mapping is used with gfc_omp_deep_mapping... to + handle the following: + + For map(alloc: dt), the array descriptors of allocatable components should + be mapped as 'to'; this could be done by (A) adding 'map(to: dt%alloc_comp)' + for each component (and avoiding to increment the reference count). + Or (B) by just mapping all of 'dt' as 'to'. + + If 'dt' contains several allocatable components and not much other data, + (A) is more efficient. If 'dt' contains a large const-size array, (A) will + copy it to the device instead of only 'alloc'ating it. + + IMPLEMENTATION CHOICE: We do (A). It avoids the ref-count issue and it is + expected that, for real-world code, derived types with allocatable + components only have few other components and either no const-size arrays. + This copying is done irrespectively whether the allocatables are allocated. + + If users wanted to save memory, they have to use 'map(alloc:dt%comp)' as + also with 'map(alloc:dt)' all components get copied. + + For the copy to the device, only allocatable arrays are relevant as their + the bounds are required; the pointer is set separately (GOMP_MAP_ATTACH) + and the only setting required for scalars. However, when later copying out + of the device, an unallocated allocatable must remain unallocated/NULL on + the host; to achieve this we also must have it set to NULL on the device + to avoid issues with uninitialized memory being copied back for the pointer + address. If we could set the pointer to NULL, gfc_has_alloc_comps's + shallow_alloc_only could be restricted to return true only for arrays. + + We only need to return true if there are allocatable-array components. */ + +static bool +gfc_omp_replace_alloc_by_to_mapping (tree type, tree decl, bool ptr_ok) +{ + return gfc_has_alloc_comps (type, decl, ptr_ok, true); +} + + /* Return true if TYPE is polymorphic but not with pointer attribute. */ static bool @@ -487,7 +538,7 @@ gfc_omp_private_outer_ref (tree decl) if (GFC_DECL_GET_SCALAR_ALLOCATABLE (decl)) return true; - if (gfc_has_alloc_comps (type, decl)) + if (gfc_has_alloc_comps (type, decl, false)) return true; return false; @@ -627,7 +678,7 @@ gfc_walk_alloc_comps (tree decl, tree dest, tree var, { tree ftype = TREE_TYPE (field); tree declf, destf = NULL_TREE; - bool has_alloc_comps = gfc_has_alloc_comps (ftype, field); + bool has_alloc_comps = gfc_has_alloc_comps (ftype, field, false); if ((!GFC_DESCRIPTOR_TYPE_P (ftype) || GFC_TYPE_ARRAY_AKIND (ftype) != GFC_ARRAY_ALLOCATABLE) && !GFC_DECL_GET_SCALAR_ALLOCATABLE (field) @@ -751,7 +802,7 @@ gfc_omp_clause_default_ctor (tree clause, tree decl, tree outer) && (!GFC_DECL_GET_SCALAR_ALLOCATABLE (OMP_CLAUSE_DECL (clause)) || !POINTER_TYPE_P (type))) { - if (gfc_has_alloc_comps (type, OMP_CLAUSE_DECL (clause))) + if (gfc_has_alloc_comps (type, OMP_CLAUSE_DECL (clause), false)) { gcc_assert (outer); gfc_start_block (&block); @@ -804,7 +855,7 @@ gfc_omp_clause_default_ctor (tree clause, tree decl, tree outer) else gfc_add_modify (&cond_block, unshare_expr (decl), fold_convert (TREE_TYPE (decl), ptr)); - if (gfc_has_alloc_comps (type, OMP_CLAUSE_DECL (clause))) + if (gfc_has_alloc_comps (type, OMP_CLAUSE_DECL (clause), false)) { tree tem = gfc_walk_alloc_comps (outer, decl, OMP_CLAUSE_DECL (clause), @@ -945,7 +996,7 @@ gfc_omp_clause_copy_ctor (tree clause, tree dest, tree src) && (!GFC_DECL_GET_SCALAR_ALLOCATABLE (OMP_CLAUSE_DECL (clause)) || !POINTER_TYPE_P (type))) { - if (gfc_has_alloc_comps (type, OMP_CLAUSE_DECL (clause))) + if (gfc_has_alloc_comps (type, OMP_CLAUSE_DECL (clause), false)) { gfc_start_block (&block); gfc_add_modify (&block, dest, src); @@ -1004,7 +1055,7 @@ gfc_omp_clause_copy_ctor (tree clause, tree dest, tree src) builtin_decl_explicit (BUILT_IN_MEMCPY), 3, ptr, srcptr, size); gfc_add_expr_to_block (&cond_block, fold_convert (void_type_node, call)); - if (gfc_has_alloc_comps (type, OMP_CLAUSE_DECL (clause))) + if (gfc_has_alloc_comps (type, OMP_CLAUSE_DECL (clause), false)) { tree tem = gfc_walk_alloc_comps (src, dest, OMP_CLAUSE_DECL (clause), @@ -1049,7 +1100,7 @@ gfc_omp_clause_assign_op (tree clause, tree dest, tree src) && (!GFC_DECL_GET_SCALAR_ALLOCATABLE (OMP_CLAUSE_DECL (clause)) || !POINTER_TYPE_P (type))) { - if (gfc_has_alloc_comps (type, OMP_CLAUSE_DECL (clause))) + if (gfc_has_alloc_comps (type, OMP_CLAUSE_DECL (clause), false)) { gfc_start_block (&block); /* First dealloc any allocatable components in DEST. */ @@ -1071,7 +1122,7 @@ gfc_omp_clause_assign_op (tree clause, tree dest, tree src) gfc_start_block (&block); - if (gfc_has_alloc_comps (type, OMP_CLAUSE_DECL (clause))) + if (gfc_has_alloc_comps (type, OMP_CLAUSE_DECL (clause), false)) { then_b = gfc_walk_alloc_comps (dest, NULL_TREE, OMP_CLAUSE_DECL (clause), WALK_ALLOC_COMPS_DTOR); @@ -1186,7 +1237,7 @@ gfc_omp_clause_assign_op (tree clause, tree dest, tree src) builtin_decl_explicit (BUILT_IN_MEMCPY), 3, ptr, srcptr, size); gfc_add_expr_to_block (&cond_block, fold_convert (void_type_node, call)); - if (gfc_has_alloc_comps (type, OMP_CLAUSE_DECL (clause))) + if (gfc_has_alloc_comps (type, OMP_CLAUSE_DECL (clause), false)) { tree tem = gfc_walk_alloc_comps (src, dest, OMP_CLAUSE_DECL (clause), @@ -1438,7 +1489,7 @@ gfc_omp_clause_dtor (tree clause, tree decl) && (!GFC_DECL_GET_SCALAR_ALLOCATABLE (OMP_CLAUSE_DECL (clause)) || !POINTER_TYPE_P (type))) { - if (gfc_has_alloc_comps (type, OMP_CLAUSE_DECL (clause))) + if (gfc_has_alloc_comps (type, OMP_CLAUSE_DECL (clause), false)) return gfc_walk_alloc_comps (decl, NULL_TREE, OMP_CLAUSE_DECL (clause), WALK_ALLOC_COMPS_DTOR); @@ -1458,7 +1509,7 @@ gfc_omp_clause_dtor (tree clause, tree decl) tem = gfc_call_free (decl); tem = gfc_omp_unshare_expr (tem); - if (gfc_has_alloc_comps (type, OMP_CLAUSE_DECL (clause))) + if (gfc_has_alloc_comps (type, OMP_CLAUSE_DECL (clause), false)) { stmtblock_t block; tree then_b; @@ -1538,6 +1589,7 @@ gfc_omp_finish_clause (tree c, gimple_seq *pre_p, bool openacc) return; tree decl = OMP_CLAUSE_DECL (c); + location_t loc = OMP_CLAUSE_LOCATION (c); /* Assumed-size arrays can't be mapped implicitly, they have to be mapped explicitly using array sections. */ @@ -1553,13 +1605,9 @@ gfc_omp_finish_clause (tree c, gimple_seq *pre_p, bool openacc) return; } - if (!openacc && GFC_CLASS_TYPE_P (TREE_TYPE (decl))) - warning_at (OMP_CLAUSE_LOCATION (c), OPT_Wopenmp, - "Implicit mapping of polymorphic variable %qD is " - "unspecified behavior", decl); - tree c2 = NULL_TREE, c3 = NULL_TREE, c4 = NULL_TREE; tree present = gfc_omp_check_optional_argument (decl, true); + tree orig_decl = NULL_TREE; if (POINTER_TYPE_P (TREE_TYPE (decl))) { if (!gfc_omp_privatize_by_reference (decl) @@ -1568,7 +1616,7 @@ gfc_omp_finish_clause (tree c, gimple_seq *pre_p, bool openacc) && !GFC_DECL_CRAY_POINTEE (decl) && !GFC_DESCRIPTOR_TYPE_P (TREE_TYPE (TREE_TYPE (decl)))) return; - tree orig_decl = decl; + orig_decl = decl; c4 = build_omp_clause (OMP_CLAUSE_LOCATION (c), OMP_CLAUSE_MAP); OMP_CLAUSE_SET_MAP_KIND (c4, GOMP_MAP_POINTER); @@ -1579,16 +1627,16 @@ gfc_omp_finish_clause (tree c, gimple_seq *pre_p, bool openacc) && (GFC_DECL_GET_SCALAR_POINTER (orig_decl) || GFC_DECL_GET_SCALAR_ALLOCATABLE (orig_decl))) { - c2 = build_omp_clause (input_location, OMP_CLAUSE_MAP); + c2 = build_omp_clause (loc, OMP_CLAUSE_MAP); OMP_CLAUSE_SET_MAP_KIND (c2, GOMP_MAP_POINTER); - OMP_CLAUSE_DECL (c2) = decl; + OMP_CLAUSE_DECL (c2) = unshare_expr (decl); OMP_CLAUSE_SIZE (c2) = size_int (0); stmtblock_t block; gfc_start_block (&block); - tree ptr = decl; - ptr = gfc_build_cond_assign_expr (&block, present, decl, - null_pointer_node); + tree ptr = gfc_build_cond_assign_expr (&block, present, + unshare_expr (decl), + null_pointer_node); gimplify_and_add (gfc_finish_block (&block), pre_p); ptr = build_fold_indirect_ref (ptr); OMP_CLAUSE_DECL (c) = ptr; @@ -1605,10 +1653,10 @@ gfc_omp_finish_clause (tree c, gimple_seq *pre_p, bool openacc) { c3 = build_omp_clause (OMP_CLAUSE_LOCATION (c), OMP_CLAUSE_MAP); OMP_CLAUSE_SET_MAP_KIND (c3, GOMP_MAP_POINTER); - OMP_CLAUSE_DECL (c3) = unshare_expr (decl); + OMP_CLAUSE_DECL (c3) = decl; OMP_CLAUSE_SIZE (c3) = size_int (0); decl = build_fold_indirect_ref (decl); - OMP_CLAUSE_DECL (c) = decl; + OMP_CLAUSE_DECL (c) = unshare_expr (decl); } } if (GFC_DESCRIPTOR_TYPE_P (TREE_TYPE (decl))) @@ -1634,7 +1682,7 @@ gfc_omp_finish_clause (tree c, gimple_seq *pre_p, bool openacc) gcc_assert (POINTER_TYPE_P (TREE_TYPE (ptr))); ptr = build_fold_indirect_ref (ptr); OMP_CLAUSE_DECL (c) = ptr; - c2 = build_omp_clause (input_location, OMP_CLAUSE_MAP); + c2 = build_omp_clause (loc, OMP_CLAUSE_MAP); OMP_CLAUSE_SET_MAP_KIND (c2, GOMP_MAP_TO_PSET); if (present) { @@ -1651,7 +1699,7 @@ gfc_omp_finish_clause (tree c, gimple_seq *pre_p, bool openacc) : GOMP_MAP_POINTER); if (present) { - ptr = gfc_conv_descriptor_data_get (decl); + ptr = gfc_conv_descriptor_data_get (unshare_expr (decl)); ptr = gfc_build_addr_expr (NULL, ptr); ptr = gfc_build_cond_assign_expr (&block, present, ptr, null_pointer_node); @@ -1664,6 +1712,17 @@ gfc_omp_finish_clause (tree c, gimple_seq *pre_p, bool openacc) tree size = create_tmp_var (gfc_array_index_type); tree elemsz = TYPE_SIZE_UNIT (gfc_get_element_type (type)); elemsz = fold_convert (gfc_array_index_type, elemsz); + + if (orig_decl == NULL_TREE) + orig_decl = decl; + if (!openacc + && gfc_has_alloc_comps (type, orig_decl, true)) + { + /* Save array descriptor for use in gfc_omp_deep_mapping{,_p,_cnt}; + force evaluate to ensure that it is not gimplified + is a decl. */ + gfc_allocate_lang_decl (size); + GFC_DECL_SAVED_DESCRIPTOR (size) = orig_decl; + } enum gfc_array_kind akind = GFC_TYPE_ARRAY_AKIND (type); if (akind == GFC_ARRAY_ALLOCATABLE || akind == GFC_ARRAY_POINTER @@ -1692,14 +1751,14 @@ gfc_omp_finish_clause (tree c, gimple_seq *pre_p, bool openacc) else_b = gfc_finish_block (&cond_block); tem = gfc_conv_descriptor_data_get (unshare_expr (decl)); tem = fold_convert (pvoid_type_node, tem); - cond = fold_build2_loc (input_location, NE_EXPR, + cond = fold_build2_loc (loc, NE_EXPR, boolean_type_node, tem, null_pointer_node); if (present) { - cond = fold_build2_loc (input_location, TRUTH_ANDIF_EXPR, + cond = fold_build2_loc (loc, TRUTH_ANDIF_EXPR, boolean_type_node, present, cond); } - gfc_add_expr_to_block (&block, build3_loc (input_location, COND_EXPR, + gfc_add_expr_to_block (&block, build3_loc (loc, COND_EXPR, void_type_node, cond, then_b, else_b)); } @@ -1739,11 +1798,30 @@ gfc_omp_finish_clause (tree c, gimple_seq *pre_p, bool openacc) tree stmt = gfc_finish_block (&block); gimplify_and_add (stmt, pre_p); } + else + { + if (OMP_CLAUSE_SIZE (c) == NULL_TREE) + OMP_CLAUSE_SIZE (c) + = DECL_P (decl) ? DECL_SIZE_UNIT (decl) + : TYPE_SIZE_UNIT (TREE_TYPE (decl)); + + tree type = TREE_TYPE (decl); + if (POINTER_TYPE_P (type) && POINTER_TYPE_P (TREE_TYPE (type))) + type = TREE_TYPE (type); + if (!openacc + && orig_decl != NULL_TREE + && gfc_has_alloc_comps (type, orig_decl, true)) + { + /* Save array descriptor for use in gfc_omp_deep_mapping{,_p,_cnt}; + force evaluate to ensure that it is not gimplified + is a decl. */ + tree size = create_tmp_var (TREE_TYPE (OMP_CLAUSE_SIZE (c))); + gfc_allocate_lang_decl (size); + GFC_DECL_SAVED_DESCRIPTOR (size) = orig_decl; + gimplify_assign (size, OMP_CLAUSE_SIZE (c), pre_p); + OMP_CLAUSE_SIZE (c) = size; + } + } tree last = c; - if (OMP_CLAUSE_SIZE (c) == NULL_TREE) - OMP_CLAUSE_SIZE (c) - = DECL_P (decl) ? DECL_SIZE_UNIT (decl) - : TYPE_SIZE_UNIT (TREE_TYPE (decl)); if (gimplify_expr (&OMP_CLAUSE_SIZE (c), pre_p, NULL, is_gimple_val, fb_rvalue) == GS_ERROR) OMP_CLAUSE_SIZE (c) = size_int (0); @@ -1767,6 +1845,715 @@ gfc_omp_finish_clause (tree c, gimple_seq *pre_p, bool openacc) } +/* map(<flag>: data [len: <size>]) + map(attach: &data [bias: <bias>]) + offset += 2; offset_data += 2 */ +static void +gfc_omp_deep_mapping_map (tree data, tree size, unsigned HOST_WIDE_INT tkind, + location_t loc, tree data_array, tree sizes_array, + tree kinds_array, tree offset_data, tree offset, + gimple_seq *seq, const gimple *ctx) +{ + tree one = build_int_cst (size_type_node, 1); + + STRIP_NOPS (data); + if (!POINTER_TYPE_P (TREE_TYPE (data))) + { + gcc_assert (TREE_CODE (data) == INDIRECT_REF); + data = TREE_OPERAND (data, 0); + } + + /* data_array[offset_data] = data; */ + tree tmp = build4 (ARRAY_REF, TREE_TYPE (TREE_TYPE (data_array)), + unshare_expr (data_array), offset_data, + NULL_TREE, NULL_TREE); + gimplify_assign (tmp, data, seq); + + /* offset_data++ */ + tmp = build2_loc (loc, PLUS_EXPR, size_type_node, offset_data, one); + gimplify_assign (offset_data, tmp, seq); + + /* data_array[offset_data] = &data; */ + tmp = build4 (ARRAY_REF, TREE_TYPE (TREE_TYPE (data_array)), + unshare_expr (data_array), + offset_data, NULL_TREE, NULL_TREE); + gimplify_assign (tmp, build_fold_addr_expr (data), seq); + + /* offset_data++ */ + tmp = build2_loc (loc, PLUS_EXPR, size_type_node, offset_data, one); + gimplify_assign (offset_data, tmp, seq); + + /* sizes_array[offset] = size */ + tmp = build2_loc (loc, MULT_EXPR, size_type_node, + TYPE_SIZE_UNIT (size_type_node), offset); + tmp = build2_loc (loc, POINTER_PLUS_EXPR, TREE_TYPE (sizes_array), + sizes_array, tmp); + gimple_seq seq2 = NULL; + tmp = force_gimple_operand (tmp, &seq2, true, NULL_TREE); + gimple_seq_add_seq (seq, seq2); + tmp = build_fold_indirect_ref_loc (loc, tmp); + gimplify_assign (tmp, size, seq); + + /* FIXME: tkind |= talign << talign_shift; */ + /* kinds_array[offset] = tkind. */ + tmp = build2_loc (loc, MULT_EXPR, size_type_node, + TYPE_SIZE_UNIT (short_unsigned_type_node), offset); + tmp = build2_loc (loc, POINTER_PLUS_EXPR, TREE_TYPE (kinds_array), + kinds_array, tmp); + seq2 = NULL; + tmp = force_gimple_operand (tmp, &seq2, true, NULL_TREE); + gimple_seq_add_seq (seq, seq2); + tmp = build_fold_indirect_ref_loc (loc, tmp); + gimplify_assign (tmp, build_int_cst (short_unsigned_type_node, tkind), seq); + + /* offset++ */ + tmp = build2_loc (loc, PLUS_EXPR, size_type_node, offset, one); + gimplify_assign (offset, tmp, seq); + + /* sizes_array[offset] = bias (= 0). */ + tmp = build2_loc (loc, MULT_EXPR, size_type_node, + TYPE_SIZE_UNIT (size_type_node), offset); + tmp = build2_loc (loc, POINTER_PLUS_EXPR, TREE_TYPE (sizes_array), + sizes_array, tmp); + seq2 = NULL; + tmp = force_gimple_operand (tmp, &seq2, true, NULL_TREE); + gimple_seq_add_seq (seq, seq2); + tmp = build_fold_indirect_ref_loc (loc, tmp); + gimplify_assign (tmp, build_zero_cst (size_type_node), seq); + + gcc_assert (gimple_code (ctx) == GIMPLE_OMP_TARGET); + tkind = (gimple_omp_target_kind (ctx) == GF_OMP_TARGET_KIND_EXIT_DATA + ? GOMP_MAP_DETACH : GOMP_MAP_ATTACH); + + /* kinds_array[offset] = tkind. */ + tmp = build2_loc (loc, MULT_EXPR, size_type_node, + TYPE_SIZE_UNIT (short_unsigned_type_node), offset); + tmp = build2_loc (loc, POINTER_PLUS_EXPR, TREE_TYPE (kinds_array), + kinds_array, tmp); + seq2 = NULL; + tmp = force_gimple_operand (tmp, &seq2, true, NULL_TREE); + gimple_seq_add_seq (seq, seq2); + tmp = build_fold_indirect_ref_loc (loc, tmp); + gimplify_assign (tmp, build_int_cst (short_unsigned_type_node, tkind), seq); + + /* offset++ */ + tmp = build2_loc (loc, PLUS_EXPR, size_type_node, offset, one); + gimplify_assign (offset, tmp, seq); +} + +static void gfc_omp_deep_mapping_item (bool, bool, bool, location_t, tree, + tree *, unsigned HOST_WIDE_INT, tree, + tree, tree, tree, tree, tree, + gimple_seq *, const gimple *, bool *); + +/* Map allocatable components. */ +static void +gfc_omp_deep_mapping_comps (bool is_cnt, location_t loc, tree decl, + tree *token, unsigned HOST_WIDE_INT tkind, + tree data_array, tree sizes_array, tree kinds_array, + tree offset_data, tree offset, tree num, + gimple_seq *seq, const gimple *ctx, + bool *poly_warned) +{ + tree type = TREE_TYPE (decl); + if (TREE_CODE (type) != RECORD_TYPE) + return; + for (tree field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field)) + { + type = TREE_TYPE (field); + if (gfc_is_polymorphic_nonptr (type) + || GFC_DECL_GET_SCALAR_ALLOCATABLE (field) + || (GFC_DESCRIPTOR_TYPE_P (type) + && GFC_TYPE_ARRAY_AKIND (type) == GFC_ARRAY_ALLOCATABLE)) + { + tree tmp = fold_build3_loc (loc, COMPONENT_REF, TREE_TYPE (field), + decl, field, NULL_TREE); + gfc_omp_deep_mapping_item (is_cnt, true, true, loc, tmp, token, + tkind, data_array, sizes_array, + kinds_array, offset_data, offset, num, + seq, ctx, poly_warned); + } + else if (GFC_DECL_GET_SCALAR_POINTER (field) + || GFC_DESCRIPTOR_TYPE_P (type)) + continue; + else if (gfc_has_alloc_comps (TREE_TYPE (field), field, false)) + { + tree tmp = fold_build3_loc (loc, COMPONENT_REF, TREE_TYPE (field), + decl, field, NULL_TREE); + if (TREE_CODE (TREE_TYPE (tmp)) == ARRAY_TYPE) + gfc_omp_deep_mapping_item (is_cnt, false, false, loc, tmp, + token, tkind, data_array, sizes_array, + kinds_array, offset_data, offset, num, + seq, ctx, poly_warned); + else + gfc_omp_deep_mapping_comps (is_cnt, loc, tmp, token, tkind, + data_array, sizes_array, kinds_array, + offset_data, offset, num, seq, ctx, + poly_warned); + } + } +} + +static void +gfc_omp_gen_simple_loop (tree var, tree begin, tree end, enum tree_code cond, + tree step, location_t loc, gimple_seq *seq1, + gimple_seq *seq2) +{ + tree tmp; + + /* var = begin. */ + gimplify_assign (var, begin, seq1); + + /* Loop: for (var = begin; var <cond> end; var += step). */ + tree label_loop = create_artificial_label (loc); + tree label_cond = create_artificial_label (loc); + + gimplify_and_add (fold_build1_loc (loc, GOTO_EXPR, void_type_node, + label_cond), seq1); + gimple_seq_add_stmt (seq1, gimple_build_label (label_loop)); + + /* Everything above is seq1; place loop body here. */ + + /* End of loop body -> put into seq2. */ + tmp = fold_build2_loc (loc, PLUS_EXPR, TREE_TYPE (var), var, step); + gimplify_assign (var, tmp, seq2); + gimple_seq_add_stmt (seq2, gimple_build_label (label_cond)); + tmp = fold_build2_loc (loc, cond, boolean_type_node, var, end); + tmp = build3_v (COND_EXPR, tmp, build1_v (GOTO_EXPR, label_loop), + build_empty_stmt (loc)); + gimplify_and_add (tmp, seq2); +} + +/* Return size variable with the size of an array. */ +static tree +gfc_omp_get_array_size (location_t loc, tree desc, gimple_seq *seq) +{ + tree tmp; + gimple_seq seq1 = NULL, seq2 = NULL; + tree size = build_decl (loc, VAR_DECL, create_tmp_var_name ("size"), + size_type_node); + tree extent = build_decl (loc, VAR_DECL, create_tmp_var_name ("extent"), + gfc_array_index_type); + tree idx = build_decl (loc, VAR_DECL, create_tmp_var_name ("idx"), + signed_char_type_node); + + tree begin = build_zero_cst (signed_char_type_node); + tree end; + if (GFC_TYPE_ARRAY_AKIND (TREE_TYPE (desc)) == GFC_ARRAY_ASSUMED_SHAPE_CONT + || GFC_TYPE_ARRAY_AKIND (TREE_TYPE (desc)) == GFC_ARRAY_ASSUMED_SHAPE) + end = gfc_conv_descriptor_rank (desc); + else + end = build_int_cst (signed_char_type_node, + GFC_TYPE_ARRAY_RANK (TREE_TYPE (desc))); + tree step = build_int_cst (signed_char_type_node, 1); + + /* size = 0 + for (idx = 0; idx < rank; idx++) + extent = gfc->dim[i].ubound - gfc->dim[i].lbound + 1 + if (extent < 0) extent = 0 + size *= extent. */ + gimplify_assign (size, build_int_cst (size_type_node, 1), seq); + + gfc_omp_gen_simple_loop (idx, begin, end, LT_EXPR, step, loc, &seq1, &seq2); + gimple_seq_add_seq (seq, seq1); + + tmp = fold_build2_loc (loc, MINUS_EXPR, gfc_array_index_type, + gfc_conv_descriptor_ubound_get (desc, idx), + gfc_conv_descriptor_lbound_get (desc, idx)); + tmp = fold_build2_loc (loc, PLUS_EXPR, gfc_array_index_type, + tmp, gfc_index_one_node); + gimplify_assign (extent, tmp, seq); + tmp = fold_build2_loc (loc, LT_EXPR, boolean_type_node, + extent, gfc_index_zero_node); + tmp = build3_v (COND_EXPR, tmp, + fold_build2_loc (loc, MODIFY_EXPR, + gfc_array_index_type, + extent, gfc_index_zero_node), + build_empty_stmt (loc)); + gimplify_and_add (tmp, seq); + /* size *= extent. */ + gimplify_assign (size, fold_build2_loc (loc, MULT_EXPR, size_type_node, size, + fold_convert (size_type_node, + extent)), seq); + gimple_seq_add_seq (seq, seq2); + return size; +} + +/* Generate loop to access every array element; takes addr of first element + (decl's data comp); returns loop code in seq1 + seq2 + and the pointer to the element as return value. */ +static tree +gfc_omp_elmental_loop (location_t loc, tree decl, tree size, tree elem_len, + gimple_seq *seq1, gimple_seq *seq2) +{ + tree idx = build_decl (loc, VAR_DECL, create_tmp_var_name ("idx"), + size_type_node); + tree begin = build_zero_cst (size_type_node); + tree end = size; + tree step = build_int_cst (size_type_node, 1); + tree ptr; + + gfc_omp_gen_simple_loop (idx, begin, end, LT_EXPR, step, loc, seq1, seq2); + + tree type = TREE_TYPE (decl); + if (POINTER_TYPE_P (type)) + { + type = TREE_TYPE (type); + gcc_assert (TREE_CODE (type) == ARRAY_TYPE); + decl = fold_convert (build_pointer_type (TREE_TYPE (type)), decl); + } + else + { + gcc_assert (TREE_CODE (type) == ARRAY_TYPE); + decl = build_fold_addr_expr_loc (loc, decl); + } + decl = fold_convert (build_pointer_type (TREE_TYPE (type)), decl); + tree tmp = build2_loc (loc, MULT_EXPR, size_type_node, idx, + fold_convert (size_type_node, elem_len)); + ptr = build2_loc (loc, POINTER_PLUS_EXPR, TREE_TYPE (decl), decl, tmp); + gimple_seq seq3 = NULL; + ptr = force_gimple_operand (ptr, &seq3, true, NULL_TREE); + gimple_seq_add_seq (seq1, seq3); + + return ptr; +} + + +/* If do_copy, copy data pointer and vptr (if applicable) as well. + Otherwise, only handle allocatable components. + do_copy == false can happen only with nonpolymorphic arguments + to a copy clause. + if (is_cnt) token ... offset is ignored and num is used, otherwise + num is NULL_TREE and unused. */ + +static void +gfc_omp_deep_mapping_item (bool is_cnt, bool do_copy, bool do_alloc_check, + location_t loc, tree decl, tree *token, + unsigned HOST_WIDE_INT tkind, tree data_array, + tree sizes_array, tree kinds_array, tree offset_data, + tree offset, tree num, gimple_seq *seq, + const gimple *ctx, bool *poly_warned) +{ + tree tmp; + tree type = TREE_TYPE (decl); + if (POINTER_TYPE_P (type)) + type = TREE_TYPE (type); + tree end_label = NULL_TREE; + tree size = NULL_TREE, elem_len = NULL_TREE; + + bool poly = gfc_is_polymorphic_nonptr (type); + if (poly && is_cnt && !*poly_warned) + { + if (gfc_is_unlimited_polymorphic_nonptr (type)) + error_at (loc, + "Mapping of unlimited polymorphic list item %qD is " + "unspecified behavior and unsupported", decl); + + else + warning_at (loc, OPT_Wopenmp, + "Mapping of polymorphic list item %qD is " + "unspecified behavior", decl); + *poly_warned = true; + } + if (do_alloc_check) + { + tree then_label = create_artificial_label (loc); + end_label = create_artificial_label (loc); + tmp = decl; + if (TREE_CODE (TREE_TYPE (tmp)) == REFERENCE_TYPE + || (POINTER_TYPE_P (TREE_TYPE (tmp)) + && (POINTER_TYPE_P (TREE_TYPE (TREE_TYPE (tmp))) + || GFC_DESCRIPTOR_TYPE_P (TREE_TYPE (TREE_TYPE (tmp)))))) + tmp = build_fold_indirect_ref_loc (loc, tmp); + if (poly) + tmp = gfc_class_data_get (tmp); + if (GFC_DESCRIPTOR_TYPE_P (TREE_TYPE (tmp))) + tmp = gfc_conv_descriptor_data_get (tmp); + gimple_seq seq2 = NULL; + tmp = force_gimple_operand (tmp, &seq2, true, NULL_TREE); + gimple_seq_add_seq (seq, seq2); + + gimple_seq_add_stmt (seq, + gimple_build_cond (NE_EXPR, tmp, null_pointer_node, + then_label, end_label)); + gimple_seq_add_stmt (seq, gimple_build_label (then_label)); + } + tree class_decl = decl; + if (poly) + { + decl = gfc_class_data_get (decl); + type = TREE_TYPE (decl); + } + if (POINTER_TYPE_P (TREE_TYPE (decl))) + { + decl = build_fold_indirect_ref (decl); + type = TREE_TYPE (decl); + } + + if (is_cnt && do_copy) + { + tree tmp = fold_build2_loc (loc, PLUS_EXPR, size_type_node, + num, build_int_cst (size_type_node, 1)); + gimplify_assign (num, tmp, seq); + } + else if (do_copy) + { + /* copy data pointer */ + tree bytesize; + if (GFC_DESCRIPTOR_TYPE_P (TREE_TYPE (decl))) + { + /* TODO: Optimization: Shouldn't this be an expr. const, except for + deferred-length strings. (Cf. also below). */ + elem_len = (poly ? gfc_class_vtab_size_get (class_decl) + : gfc_conv_descriptor_elem_len (decl)); + tmp = (POINTER_TYPE_P (TREE_TYPE (decl)) + ? build_fold_indirect_ref (decl) : decl); + size = gfc_omp_get_array_size (loc, tmp, seq); + bytesize = fold_build2_loc (loc, MULT_EXPR, size_type_node, + fold_convert (size_type_node, size), + fold_convert (size_type_node, elem_len)); + tmp = gfc_conv_descriptor_data_get (decl); + } + else if (poly) + { + tmp = decl; + bytesize = fold_convert (size_type_node, + gfc_class_vtab_size_get (class_decl)); + } + else + { + tmp = decl; + bytesize = TYPE_SIZE_UNIT (TREE_TYPE (decl)); + } + unsigned HOST_WIDE_INT tkind2 = tkind; + if (!is_cnt + && (tkind == GOMP_MAP_ALLOC + || (tkind == GOMP_MAP_FROM + && (gimple_omp_target_kind (ctx) + != GF_OMP_TARGET_KIND_EXIT_DATA))) + && gfc_omp_replace_alloc_by_to_mapping (TREE_TYPE (decl), decl, true)) + tkind2 = tkind == GOMP_MAP_ALLOC ? GOMP_MAP_TO : GOMP_MAP_TOFROM; + + gfc_omp_deep_mapping_map (tmp, bytesize, tkind2, loc, data_array, + sizes_array, kinds_array, offset_data, + offset, seq, ctx); + } + + tmp = decl; + if (POINTER_TYPE_P (TREE_TYPE (decl))) + while (TREE_CODE (tmp) == COMPONENT_REF || TREE_CODE (tmp) == ARRAY_REF) + tmp = TREE_OPERAND (tmp, TREE_CODE (tmp) == COMPONENT_REF ? 1 : 0); + if (poly || gfc_has_alloc_comps (type, tmp, true)) + { + gimple_seq seq2 = NULL; + if (GFC_DESCRIPTOR_TYPE_P (TREE_TYPE (decl))) + { + if (elem_len == NULL_TREE) + { + elem_len = gfc_conv_descriptor_elem_len (decl); + size = fold_convert (size_type_node, + gfc_omp_get_array_size (loc, decl, seq)); + } + decl = gfc_conv_descriptor_data_get (decl); + decl = gfc_omp_elmental_loop (loc, decl, size, elem_len, seq, &seq2); + decl = build_fold_indirect_ref_loc (loc, decl); + } + else if (TREE_CODE (TREE_TYPE (tmp)) == ARRAY_TYPE) + { + type = TREE_TYPE (tmp); + /* FIXME: PR95868 - for var%str of deferred length, elem_len == 0; + len is stored as var%_str_length, but not in GFC_DECL_STRING_LEN + nor in TYPE_SIZE_UNIT as expression. */ + elem_len = TYPE_SIZE_UNIT (TREE_TYPE (type)); + size = fold_convert (size_type_node, GFC_TYPE_ARRAY_SIZE (type)); + decl = gfc_omp_elmental_loop (loc, decl, size, elem_len, seq, &seq2); + decl = build_fold_indirect_ref_loc (loc, decl); + } + else if (POINTER_TYPE_P (TREE_TYPE (decl))) + decl = build_fold_indirect_ref (decl); + + gfc_omp_deep_mapping_comps (is_cnt, loc, decl, token, tkind, + data_array, sizes_array, kinds_array, + offset_data, offset, num, seq, ctx, + poly_warned); + gimple_seq_add_seq (seq, seq2); + } + if (end_label) + gimple_seq_add_stmt (seq, gimple_build_label (end_label)); +} + + +/* Which map types to check/handle for deep mapping. */ +static bool +gfc_omp_deep_map_kind_p (tree clause) +{ + switch (OMP_CLAUSE_CODE (clause)) + { + case OMP_CLAUSE_MAP: + break; + case OMP_CLAUSE_FIRSTPRIVATE: + case OMP_CLAUSE_TO: + case OMP_CLAUSE_FROM: + return true; + default: + gcc_unreachable (); + } + + switch (OMP_CLAUSE_MAP_KIND (clause)) + { + case GOMP_MAP_TO: + case GOMP_MAP_FROM: + case GOMP_MAP_TOFROM: + case GOMP_MAP_ALWAYS_TO: + case GOMP_MAP_ALWAYS_FROM: + case GOMP_MAP_ALWAYS_TOFROM: + case GOMP_MAP_ALWAYS_PRESENT_FROM: + case GOMP_MAP_ALWAYS_PRESENT_TO: + case GOMP_MAP_ALWAYS_PRESENT_TOFROM: + case GOMP_MAP_FIRSTPRIVATE: + case GOMP_MAP_ALLOC: + return true; + case GOMP_MAP_POINTER: + case GOMP_MAP_TO_PSET: + case GOMP_MAP_FORCE_PRESENT: + case GOMP_MAP_DELETE: + case GOMP_MAP_FORCE_DEVICEPTR: + case GOMP_MAP_DEVICE_RESIDENT: + case GOMP_MAP_LINK: + case GOMP_MAP_IF_PRESENT: + case GOMP_MAP_PRESENT_ALLOC: + case GOMP_MAP_PRESENT_FROM: + case GOMP_MAP_PRESENT_TO: + case GOMP_MAP_PRESENT_TOFROM: + case GOMP_MAP_FIRSTPRIVATE_INT: + case GOMP_MAP_USE_DEVICE_PTR: + case GOMP_MAP_ZERO_LEN_ARRAY_SECTION: + case GOMP_MAP_FORCE_ALLOC: + case GOMP_MAP_FORCE_TO: + case GOMP_MAP_FORCE_FROM: + case GOMP_MAP_FORCE_TOFROM: + case GOMP_MAP_USE_DEVICE_PTR_IF_PRESENT: + case GOMP_MAP_STRUCT: + case GOMP_MAP_STRUCT_UNORD: + case GOMP_MAP_ALWAYS_POINTER: + case GOMP_MAP_POINTER_TO_ZERO_LENGTH_ARRAY_SECTION: + case GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION: + case GOMP_MAP_RELEASE: + case GOMP_MAP_ATTACH: + case GOMP_MAP_DETACH: + case GOMP_MAP_FORCE_DETACH: + case GOMP_MAP_ATTACH_ZERO_LENGTH_ARRAY_SECTION: + case GOMP_MAP_FIRSTPRIVATE_POINTER: + case GOMP_MAP_FIRSTPRIVATE_REFERENCE: + case GOMP_MAP_ATTACH_DETACH: + break; + default: + gcc_unreachable (); + } + return false; +} + +/* Three OpenMP deep-mapping lang hooks: gfc_omp_deep_mapping{_p,_cnt,}. */ + +/* Common check for gfc_omp_deep_mapping_p and gfc_omp_deep_mapping_do. */ + +static tree +gfc_omp_deep_mapping_int_p (const gimple *ctx, tree clause) +{ + if (is_gimple_omp_oacc (ctx) || !gfc_omp_deep_map_kind_p (clause)) + return NULL_TREE; + tree decl = OMP_CLAUSE_DECL (clause); + if (OMP_CLAUSE_SIZE (clause) != NULL_TREE + && DECL_P (OMP_CLAUSE_SIZE (clause)) + && DECL_LANG_SPECIFIC (OMP_CLAUSE_SIZE (clause)) + && GFC_DECL_SAVED_DESCRIPTOR (OMP_CLAUSE_SIZE (clause))) + /* Saved decl. */ + decl = GFC_DECL_SAVED_DESCRIPTOR (OMP_CLAUSE_SIZE (clause)); + else if (TREE_CODE (decl) == MEM_REF || TREE_CODE (decl) == INDIRECT_REF) + /* The following can happen for, e.g., class(t) :: var(..) */ + decl = TREE_OPERAND (decl, 0); + if (TREE_CODE (decl) == INDIRECT_REF) + /* The following can happen for, e.g., class(t) :: var(..) */ + decl = TREE_OPERAND (decl, 0); + if (DECL_P (decl) + && DECL_LANG_SPECIFIC (decl) + && GFC_DECL_SAVED_DESCRIPTOR (decl)) + decl = GFC_DECL_SAVED_DESCRIPTOR (decl); + /* Handle map(to: var.desc) map([to/from/tofrom:] var.desc.data) + to get proper map kind by skipping to the next item. */ + tree tmp = OMP_CLAUSE_CHAIN (clause); + if (tmp != NULL_TREE + && OMP_CLAUSE_CODE (tmp) == OMP_CLAUSE_CODE (clause) + && OMP_CLAUSE_SIZE (tmp) != NULL_TREE + && DECL_P (OMP_CLAUSE_SIZE (tmp)) + && DECL_LANG_SPECIFIC (OMP_CLAUSE_SIZE (tmp)) + && GFC_DECL_SAVED_DESCRIPTOR (OMP_CLAUSE_SIZE (tmp)) == decl) + return NULL_TREE; + if (DECL_P (decl) + && DECL_LANG_SPECIFIC (decl) + && GFC_DECL_SAVED_DESCRIPTOR (decl)) + decl = GFC_DECL_SAVED_DESCRIPTOR (decl); + tree type = TREE_TYPE (decl); + if (POINTER_TYPE_P (type)) + type = TREE_TYPE (type); + if (POINTER_TYPE_P (type)) + type = TREE_TYPE (type); + tmp = decl; + while (TREE_CODE (tmp) == COMPONENT_REF || TREE_CODE (tmp) == ARRAY_REF) + tmp = TREE_OPERAND (tmp, TREE_CODE (tmp) == COMPONENT_REF ? 1 : 0); + if (!gfc_is_polymorphic_nonptr (type) + && !gfc_has_alloc_comps (type, tmp, true)) + return NULL_TREE; + return decl; +} + +/* Return true if there is deep mapping, even if the number of mapping is known + at compile time. */ +bool +gfc_omp_deep_mapping_p (const gimple *ctx, tree clause) +{ + tree decl = gfc_omp_deep_mapping_int_p (ctx, clause); + if (decl == NULL_TREE) + return false; + return true; +} + +/* Handle gfc_omp_deep_mapping{,_cnt} */ +static tree +gfc_omp_deep_mapping_do (bool is_cnt, const gimple *ctx, tree clause, + unsigned HOST_WIDE_INT tkind, tree data, tree sizes, + tree kinds, tree offset_data, tree offset, + gimple_seq *seq) +{ + tree num = NULL_TREE; + location_t loc = OMP_CLAUSE_LOCATION (clause); + tree decl = gfc_omp_deep_mapping_int_p (ctx, clause); + bool poly_warned = false; + if (decl == NULL_TREE) + return NULL_TREE; + /* Handle: map(alloc:dt%cmp [len: ptr_size]) map(tofrom: D.0123...), + where GFC_DECL_SAVED_DESCRIPTOR(D.0123) is the same (here: dt%cmp). */ + if (OMP_CLAUSE_CODE (clause) == OMP_CLAUSE_MAP + && (OMP_CLAUSE_MAP_KIND (clause) == GOMP_MAP_ALLOC + || OMP_CLAUSE_MAP_KIND (clause) == GOMP_MAP_PRESENT_ALLOC)) + { + tree c = clause; + while ((c = OMP_CLAUSE_CHAIN (c)) != NULL_TREE) + { + if (!gfc_omp_deep_map_kind_p (c)) + continue; + tree d = gfc_omp_deep_mapping_int_p (ctx, c); + if (d != NULL_TREE && operand_equal_p (decl, d, 0)) + return NULL_TREE; + } + } + tree type = TREE_TYPE (decl); + if (POINTER_TYPE_P (type)) + type = TREE_TYPE (type); + if (POINTER_TYPE_P (type)) + type = TREE_TYPE (type); + bool poly = gfc_is_polymorphic_nonptr (type); + + if (is_cnt) + { + num = build_decl (loc, VAR_DECL, + create_tmp_var_name ("n_deepmap"), size_type_node); + tree tmp = fold_build2_loc (loc, MODIFY_EXPR, size_type_node, num, + build_int_cst (size_type_node, 0)); + gimple_add_tmp_var (num); + gimplify_and_add (tmp, seq); + } + else + gcc_assert (short_unsigned_type_node == TREE_TYPE (TREE_TYPE (kinds))); + + bool do_copy = poly; + bool do_alloc_check = false; + tree token = NULL_TREE; + tree tmp = decl; + if (poly) + { + tmp = TYPE_FIELDS (type); + type = TREE_TYPE (tmp); + } + else + while (TREE_CODE (tmp) == COMPONENT_REF || TREE_CODE (tmp) == ARRAY_REF) + tmp = TREE_OPERAND (tmp, TREE_CODE (tmp) == COMPONENT_REF ? 1 : 0); + /* If the clause argument is nonallocatable, skip is-allocate check. */ + if (GFC_DECL_GET_SCALAR_ALLOCATABLE (tmp) + || GFC_DECL_GET_SCALAR_POINTER (tmp) + || (GFC_DESCRIPTOR_TYPE_P (type) + && (GFC_TYPE_ARRAY_AKIND (type) == GFC_ARRAY_ALLOCATABLE + || GFC_TYPE_ARRAY_AKIND (type) == GFC_ARRAY_POINTER + || GFC_TYPE_ARRAY_AKIND (type) == GFC_ARRAY_POINTER_CONT))) + do_alloc_check = true; + + if (!is_cnt + && OMP_CLAUSE_CODE (clause) == OMP_CLAUSE_MAP + && (tkind == GOMP_MAP_ALLOC + || (tkind == GOMP_MAP_FROM + && (gimple_omp_target_kind (ctx) + != GF_OMP_TARGET_KIND_EXIT_DATA))) + && (poly || gfc_omp_replace_alloc_by_to_mapping (type, tmp, true))) + OMP_CLAUSE_SET_MAP_KIND (clause, tkind == GOMP_MAP_ALLOC ? GOMP_MAP_TO + : GOMP_MAP_TOFROM); + + /* TODO: For map(a(:)), we know it is present & allocated. */ + + tree present = (DECL_P (decl) ? gfc_omp_check_optional_argument (decl, true) + : NULL_TREE); + if (POINTER_TYPE_P (TREE_TYPE (decl)) + && POINTER_TYPE_P (TREE_TYPE (TREE_TYPE (decl)))) + decl = build_fold_indirect_ref (decl); + if (present) + { + tree then_label = create_artificial_label (loc); + tree end_label = create_artificial_label (loc); + gimple_seq seq2 = NULL; + tmp = force_gimple_operand (present, &seq2, true, NULL_TREE); + gimple_seq_add_seq (seq, seq2); + gimple_seq_add_stmt (seq, + gimple_build_cond_from_tree (present, + then_label, end_label)); + gimple_seq_add_stmt (seq, gimple_build_label (then_label)); + gfc_omp_deep_mapping_item (is_cnt, do_copy, do_alloc_check, loc, decl, + &token, tkind, data, sizes, kinds, + offset_data, offset, num, seq, ctx, + &poly_warned); + gimple_seq_add_stmt (seq, gimple_build_label (end_label)); + } + else + gfc_omp_deep_mapping_item (is_cnt, do_copy, do_alloc_check, loc, decl, + &token, tkind, data, sizes, kinds, offset_data, + offset, num, seq, ctx, &poly_warned); + /* Multiply by 2 as there are two mappings: data + pointer assign. */ + if (is_cnt) + gimplify_assign (num, + fold_build2_loc (loc, MULT_EXPR, + size_type_node, num, + build_int_cst (size_type_node, 2)), seq); + return num; +} + +/* Return tree with a variable which contains the count of deep-mappyings + (value depends, e.g., on allocation status) */ +tree +gfc_omp_deep_mapping_cnt (const gimple *ctx, tree clause, gimple_seq *seq) +{ + return gfc_omp_deep_mapping_do (true, ctx, clause, 0, NULL_TREE, NULL_TREE, + NULL_TREE, NULL_TREE, NULL_TREE, seq); +} + +/* Does the actual deep mapping. */ +void +gfc_omp_deep_mapping (const gimple *ctx, tree clause, + unsigned HOST_WIDE_INT tkind, tree data, + tree sizes, tree kinds, tree offset_data, tree offset, + gimple_seq *seq) +{ + (void) gfc_omp_deep_mapping_do (false, ctx, clause, tkind, data, sizes, kinds, + offset_data, offset, seq); +} + /* Return true if DECL is a scalar variable (for the purpose of implicit firstprivatization/mapping). Only if 'ptr_alloc_ok.' is true, allocatables and pointers are permitted. */ @@ -2478,6 +3265,18 @@ gfc_trans_omp_array_section (stmtblock_t *block, gfc_exec_op op, elemsz = fold_convert (gfc_array_index_type, elemsz); OMP_CLAUSE_SIZE (node) = fold_build2 (MULT_EXPR, gfc_array_index_type, OMP_CLAUSE_SIZE (node), elemsz); + if (n->expr->ts.type == BT_DERIVED + && n->expr->ts.u.derived->attr.alloc_comp) + { + /* Save array descriptor for use in gfc_omp_deep_mapping{,_p,_cnt}; + force evaluate to ensure that it is not gimplified + is a decl. */ + tree tmp = OMP_CLAUSE_SIZE (node); + tree var = gfc_create_var (TREE_TYPE (tmp), NULL); + gfc_add_modify_loc (input_location, block, var, tmp); + OMP_CLAUSE_SIZE (node) = var; + gfc_allocate_lang_decl (var); + GFC_DECL_SAVED_DESCRIPTOR (var) = se.expr; + } } gcc_assert (se.post.head == NULL_TREE); gcc_assert (POINTER_TYPE_P (TREE_TYPE (ptr))); @@ -3213,8 +4012,9 @@ gfc_trans_omp_clauses (stmtblock_t *block, gfc_omp_clauses *clauses, if (!n->sym->attr.referenced) continue; + location_t map_loc = gfc_get_location (&n->where); bool always_modifier = false; - tree node = build_omp_clause (input_location, OMP_CLAUSE_MAP); + tree node = build_omp_clause (map_loc, OMP_CLAUSE_MAP); tree node2 = NULL_TREE; tree node3 = NULL_TREE; tree node4 = NULL_TREE; @@ -3361,7 +4161,7 @@ gfc_trans_omp_clauses (stmtblock_t *block, gfc_omp_clauses *clauses, && n->u.map.op != OMP_MAP_RELEASE) { gcc_assert (n->sym->ts.u.cl->backend_decl); - node5 = build_omp_clause (input_location, OMP_CLAUSE_MAP); + node5 = build_omp_clause (map_loc, OMP_CLAUSE_MAP); OMP_CLAUSE_SET_MAP_KIND (node5, GOMP_MAP_ALWAYS_TO); OMP_CLAUSE_DECL (node5) = n->sym->ts.u.cl->backend_decl; OMP_CLAUSE_SIZE (node5) @@ -3378,7 +4178,7 @@ gfc_trans_omp_clauses (stmtblock_t *block, gfc_omp_clauses *clauses, ptr = build_fold_indirect_ref (ptr); OMP_CLAUSE_DECL (node) = ptr; OMP_CLAUSE_SIZE (node) = gfc_class_vtab_size_get (decl); - node2 = build_omp_clause (input_location, OMP_CLAUSE_MAP); + node2 = build_omp_clause (map_loc, OMP_CLAUSE_MAP); OMP_CLAUSE_SET_MAP_KIND (node2, GOMP_MAP_ATTACH_DETACH); OMP_CLAUSE_DECL (node2) = gfc_class_data_get (decl); OMP_CLAUSE_SIZE (node2) = size_int (0); @@ -3434,8 +4234,7 @@ gfc_trans_omp_clauses (stmtblock_t *block, gfc_omp_clauses *clauses, size = TYPE_SIZE_UNIT (TREE_TYPE (decl)); else size = size_int (0); - node4 = build_omp_clause (input_location, - OMP_CLAUSE_MAP); + node4 = build_omp_clause (map_loc, OMP_CLAUSE_MAP); OMP_CLAUSE_SET_MAP_KIND (node4, gmk); OMP_CLAUSE_DECL (node4) = decl; OMP_CLAUSE_SIZE (node4) = size; @@ -3459,8 +4258,7 @@ gfc_trans_omp_clauses (stmtblock_t *block, gfc_omp_clauses *clauses, size = TYPE_SIZE_UNIT (TREE_TYPE (decl)); else size = size_int (0); - node3 = build_omp_clause (input_location, - OMP_CLAUSE_MAP); + node3 = build_omp_clause (map_loc, OMP_CLAUSE_MAP); OMP_CLAUSE_SET_MAP_KIND (node3, gmk); OMP_CLAUSE_DECL (node3) = decl; OMP_CLAUSE_SIZE (node3) = size; @@ -3477,7 +4275,7 @@ gfc_trans_omp_clauses (stmtblock_t *block, gfc_omp_clauses *clauses, gcc_assert (POINTER_TYPE_P (TREE_TYPE (ptr))); ptr = build_fold_indirect_ref (ptr); OMP_CLAUSE_DECL (node) = ptr; - node2 = build_omp_clause (input_location, OMP_CLAUSE_MAP); + node2 = build_omp_clause (map_loc, OMP_CLAUSE_MAP); OMP_CLAUSE_DECL (node2) = decl; OMP_CLAUSE_SIZE (node2) = TYPE_SIZE_UNIT (type); if (n->u.map.op == OMP_MAP_DELETE) @@ -3493,8 +4291,7 @@ gfc_trans_omp_clauses (stmtblock_t *block, gfc_omp_clauses *clauses, && n->u.map.op != OMP_MAP_DELETE && n->u.map.op != OMP_MAP_RELEASE) { - node3 = build_omp_clause (input_location, - OMP_CLAUSE_MAP); + node3 = build_omp_clause (map_loc, OMP_CLAUSE_MAP); if (present) { ptr = gfc_conv_descriptor_data_get (decl); @@ -3634,10 +4431,10 @@ gfc_trans_omp_clauses (stmtblock_t *block, gfc_omp_clauses *clauses, { /* A single indirectref is handled by the middle end. */ gcc_assert (!POINTER_TYPE_P (TREE_TYPE (decl))); - decl = TREE_OPERAND (decl, 0); - decl = gfc_build_cond_assign_expr (block, present, decl, + tree tmp = TREE_OPERAND (decl, 0); + tmp = gfc_build_cond_assign_expr (block, present, tmp, null_pointer_node); - OMP_CLAUSE_DECL (node) = build_fold_indirect_ref (decl); + OMP_CLAUSE_DECL (node) = build_fold_indirect_ref (tmp); } else OMP_CLAUSE_DECL (node) = decl; @@ -3672,6 +4469,33 @@ gfc_trans_omp_clauses (stmtblock_t *block, gfc_omp_clauses *clauses, size = gfc_evaluate_now (size, block); OMP_CLAUSE_SIZE (node) = size; } + if ((TREE_CODE (decl) != PARM_DECL + || DECL_ARTIFICIAL (OMP_CLAUSE_DECL (node))) + && n->sym->ts.type == BT_DERIVED + && n->sym->ts.u.derived->attr.alloc_comp) + { + /* Save array descriptor for use in + gfc_omp_deep_mapping{,_p,_cnt}; force evaluate + to ensure that it is not gimplified + is a decl. */ + tree tmp = OMP_CLAUSE_SIZE (node); + if (tmp == NULL_TREE) + tmp = DECL_P (decl) ? DECL_SIZE_UNIT (decl) + : TYPE_SIZE_UNIT (TREE_TYPE (decl)); + tree var = gfc_create_var (TREE_TYPE (tmp), NULL); + gfc_add_modify_loc (input_location, block, var, tmp); + OMP_CLAUSE_SIZE (node) = var; + gfc_allocate_lang_decl (var); + if (TREE_CODE (decl) == INDIRECT_REF) + decl = TREE_OPERAND (decl, 0); + if (TREE_CODE (decl) == INDIRECT_REF) + decl = TREE_OPERAND (decl, 0); + if (DECL_LANG_SPECIFIC (decl) + && GFC_DECL_SAVED_DESCRIPTOR (decl)) + GFC_DECL_SAVED_DESCRIPTOR (var) + = GFC_DECL_SAVED_DESCRIPTOR (decl); + else + GFC_DECL_SAVED_DESCRIPTOR (var) = decl; + } } else if (n->expr && n->expr->expr_type == EXPR_VARIABLE @@ -3727,8 +4551,7 @@ gfc_trans_omp_clauses (stmtblock_t *block, gfc_omp_clauses *clauses, goto finalize_map_clause; } - node2 = build_omp_clause (input_location, - OMP_CLAUSE_MAP); + node2 = build_omp_clause (map_loc, OMP_CLAUSE_MAP); OMP_CLAUSE_SET_MAP_KIND (node2, GOMP_MAP_ATTACH_DETACH); OMP_CLAUSE_DECL (node2) = POINTER_TYPE_P (TREE_TYPE (se.expr)) @@ -3754,13 +4577,37 @@ gfc_trans_omp_clauses (stmtblock_t *block, gfc_omp_clauses *clauses, kind = GOMP_MAP_RELEASE; else kind = GOMP_MAP_TO; - node3 = build_omp_clause (input_location, - OMP_CLAUSE_MAP); + node3 = build_omp_clause (map_loc, OMP_CLAUSE_MAP); OMP_CLAUSE_SET_MAP_KIND (node3, kind); OMP_CLAUSE_DECL (node3) = se.string_length; OMP_CLAUSE_SIZE (node3) = TYPE_SIZE_UNIT (gfc_charlen_type_node); } + if (!openacc + && n->expr->ts.type == BT_DERIVED + && n->expr->ts.u.derived->attr.alloc_comp) + { + /* Save array descriptor for use in + gfc_omp_deep_mapping{,_p,_cnt}; force evaluate + to ensure that it is not gimplified + is a decl. */ + tree tmp = OMP_CLAUSE_SIZE (node); + if (tmp == NULL_TREE) + tmp = (DECL_P (se.expr) + ? DECL_SIZE_UNIT (se.expr) + : TYPE_SIZE_UNIT (TREE_TYPE (se.expr))); + tree var = gfc_create_var (TREE_TYPE (tmp), NULL); + gfc_add_modify_loc (input_location, block, var, tmp); + OMP_CLAUSE_SIZE (node) = var; + gfc_allocate_lang_decl (var); + if (TREE_CODE (se.expr) == INDIRECT_REF) + se.expr = TREE_OPERAND (se.expr, 0); + if (DECL_LANG_SPECIFIC (se.expr) + && GFC_DECL_SAVED_DESCRIPTOR (se.expr)) + GFC_DECL_SAVED_DESCRIPTOR (var) + = GFC_DECL_SAVED_DESCRIPTOR (se.expr); + else + GFC_DECL_SAVED_DESCRIPTOR (var) = se.expr; + } } } else if (n->expr @@ -3800,7 +4647,7 @@ gfc_trans_omp_clauses (stmtblock_t *block, gfc_omp_clauses *clauses, && (lastref->u.c.component->ts.type == BT_DERIVED || lastref->u.c.component->ts.type == BT_CLASS)) { - if (pointer || (openacc && allocatable)) + if (pointer || allocatable) { /* If it's a bare attach/detach clause, we just want to perform a single attach/detach operation, of the @@ -3880,8 +4727,7 @@ gfc_trans_omp_clauses (stmtblock_t *block, gfc_omp_clauses *clauses, OMP_CLAUSE_DECL (node) = data; OMP_CLAUSE_SIZE (node) = size; - node2 = build_omp_clause (input_location, - OMP_CLAUSE_MAP); + node2 = build_omp_clause (map_loc, OMP_CLAUSE_MAP); OMP_CLAUSE_SET_MAP_KIND (node2, GOMP_MAP_ATTACH_DETACH); OMP_CLAUSE_DECL (node2) = build_fold_addr_expr (data); @@ -3893,6 +4739,22 @@ gfc_trans_omp_clauses (stmtblock_t *block, gfc_omp_clauses *clauses, OMP_CLAUSE_SIZE (node) = TYPE_SIZE_UNIT (TREE_TYPE (inner)); } + if (!openacc + && n->expr->ts.type == BT_DERIVED + && n->expr->ts.u.derived->attr.alloc_comp) + { + /* Save array descriptor for use in + gfc_omp_deep_mapping{,_p,_cnt}; force evaluate + to ensure that it is not gimplified + is a decl. */ + tree tmp = OMP_CLAUSE_SIZE (node); + tree var = gfc_create_var (TREE_TYPE (tmp), NULL); + gfc_add_modify_loc (input_location, block, var, tmp); + OMP_CLAUSE_SIZE (node) = var; + gfc_allocate_lang_decl (var); + if (TREE_CODE (inner) == INDIRECT_REF) + inner = TREE_OPERAND (inner, 0); + GFC_DECL_SAVED_DESCRIPTOR (var) = inner; + } } else if (lastref->type == REF_ARRAY && lastref->u.ar.type == AR_FULL) @@ -3952,8 +4814,7 @@ gfc_trans_omp_clauses (stmtblock_t *block, gfc_omp_clauses *clauses, elemsz = TYPE_SIZE_UNIT (elemsz); elemsz = fold_build2 (MULT_EXPR, size_type_node, len, elemsz); - node4 = build_omp_clause (input_location, - OMP_CLAUSE_MAP); + node4 = build_omp_clause (map_loc, OMP_CLAUSE_MAP); OMP_CLAUSE_SET_MAP_KIND (node4, map_kind); OMP_CLAUSE_DECL (node4) = se.string_length; OMP_CLAUSE_SIZE (node4) @@ -3963,8 +4824,7 @@ gfc_trans_omp_clauses (stmtblock_t *block, gfc_omp_clauses *clauses, OMP_CLAUSE_SIZE (node) = fold_build2 (MULT_EXPR, gfc_array_index_type, OMP_CLAUSE_SIZE (node), elemsz); - node2 = build_omp_clause (input_location, - OMP_CLAUSE_MAP); + node2 = build_omp_clause (map_loc, OMP_CLAUSE_MAP); if (map_kind == GOMP_MAP_RELEASE || map_kind == GOMP_MAP_DELETE) { @@ -3978,6 +4838,23 @@ gfc_trans_omp_clauses (stmtblock_t *block, gfc_omp_clauses *clauses, OMP_CLAUSE_SIZE (node2) = TYPE_SIZE_UNIT (type); if (!openacc) { + if (n->expr->ts.type == BT_DERIVED + && n->expr->ts.u.derived->attr.alloc_comp) + { + /* Save array descriptor for use + in gfc_omp_deep_mapping{,_p,_cnt}; force + evaluate to ensure that it is + not gimplified + is a decl. */ + tree tmp = OMP_CLAUSE_SIZE (node); + tree var = gfc_create_var (TREE_TYPE (tmp), + NULL); + gfc_add_modify_loc (map_loc, block, + var, tmp); + OMP_CLAUSE_SIZE (node) = var; + gfc_allocate_lang_decl (var); + GFC_DECL_SAVED_DESCRIPTOR (var) = inner; + } + gfc_omp_namelist *n2 = clauses->lists[OMP_LIST_MAP]; @@ -4035,8 +4912,7 @@ gfc_trans_omp_clauses (stmtblock_t *block, gfc_omp_clauses *clauses, if (drop_mapping) continue; } - node3 = build_omp_clause (input_location, - OMP_CLAUSE_MAP); + node3 = build_omp_clause (map_loc, OMP_CLAUSE_MAP); OMP_CLAUSE_SET_MAP_KIND (node3, GOMP_MAP_ATTACH_DETACH); OMP_CLAUSE_DECL (node3) @@ -4107,7 +4983,8 @@ gfc_trans_omp_clauses (stmtblock_t *block, gfc_omp_clauses *clauses, default: gcc_unreachable (); } - tree node = build_omp_clause (input_location, clause_code); + tree node = build_omp_clause (gfc_get_location (&n->where), + clause_code); if (n->expr == NULL || (n->expr->ref->type == REF_ARRAY && n->expr->ref->u.ar.type == AR_FULL diff --git a/gcc/fortran/trans.h b/gcc/fortran/trans.h index 63a566a..ae7be9f 100644 --- a/gcc/fortran/trans.h +++ b/gcc/fortran/trans.h @@ -839,6 +839,10 @@ tree gfc_omp_clause_assign_op (tree, tree, tree); tree gfc_omp_clause_linear_ctor (tree, tree, tree, tree); tree gfc_omp_clause_dtor (tree, tree); void gfc_omp_finish_clause (tree, gimple_seq *, bool); +bool gfc_omp_deep_mapping_p (const gimple *, tree); +tree gfc_omp_deep_mapping_cnt (const gimple *, tree, gimple_seq *); +void gfc_omp_deep_mapping (const gimple *, tree, unsigned HOST_WIDE_INT, tree, + tree, tree, tree, tree, gimple_seq *); bool gfc_omp_allocatable_p (tree); bool gfc_omp_scalar_p (tree, bool); bool gfc_omp_scalar_target_p (tree); diff --git a/gcc/ginclude/stddef.h b/gcc/ginclude/stddef.h index 0d53103..bacf24d 100644 --- a/gcc/ginclude/stddef.h +++ b/gcc/ginclude/stddef.h @@ -89,6 +89,21 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see #undef _PTRDIFF_T_ #endif +/* When modular code is enabled with macOS SDKs from version 15, the + include guards are set in the includers of this code, rather than as + part of it. This means the we must unset them or the intended code + here will be bypassed (resulting in undefined values). */ +#if defined (__APPLE__) +# if defined(__has_feature) && __has_feature(modules) +# if defined (__need_ptrdiff_t) +# undef __PTRDIFF_T +# endif +# if defined (__need_size_t) +# undef __SIZE_T +# endif +# endif +#endif + /* On VxWorks, <type/vxTypesBase.h> may have defined macros like _TYPE_size_t which will typedef size_t. fixincludes patched the vxTypesBase.h so that this macro is only defined if _GCC_SIZE_T is diff --git a/gcc/ipa-cp.cc b/gcc/ipa-cp.cc index 26b1496..806c2bd 100644 --- a/gcc/ipa-cp.cc +++ b/gcc/ipa-cp.cc @@ -313,14 +313,24 @@ ipcp_lattice<valtype>::print (FILE * f, bool dump_sources, bool dump_benefits) static void ipcp_print_widest_int (FILE *f, const widest_int &value) { - if (wi::eq_p (wi::bit_not (value), 0)) + if (value == -1) fprintf (f, "-1"); - else if (wi::eq_p (wi::bit_not (wi::bit_or (value, - wi::sub (wi::lshift (1, 128), - 1))), 0)) - { - fprintf (f, "all ones folled by "); - print_hex (wi::bit_and (value, wi::sub (wi::lshift (1, 128), 1)), f); + else if (wi::arshift (value, 128) == -1) + { + char buf[35], *p = buf + 2; + widest_int v = wi::zext (value, 128); + size_t len; + print_hex (v, buf); + len = strlen (p); + if (len == 32) + { + fprintf (f, "0xf..f"); + while (*p == 'f') + ++p; + } + else + fprintf (f, "0xf..f%0*d", (int) (32 - len), 0); + fputs (p, f); } else print_hex (value, f); @@ -923,13 +933,13 @@ ipcp_bits_lattice::meet_with_1 (widest_int value, widest_int mask, m_mask = (m_mask | mask) | (m_value ^ value); if (drop_all_ones) m_mask |= m_value; - m_value &= ~m_mask; - widest_int cap_mask = wi::bit_not (wi::sub (wi::lshift (1, precision), 1)); + widest_int cap_mask = wi::shifted_mask <widest_int> (0, precision, true); m_mask |= cap_mask; if (wi::sext (m_mask, precision) == -1) return set_to_bottom (); + m_value &= ~m_mask; return m_mask != old_mask; } @@ -1005,7 +1015,7 @@ ipcp_bits_lattice::meet_with (ipcp_bits_lattice& other, unsigned precision, adjusted_mask |= adjusted_value; adjusted_value &= ~adjusted_mask; } - widest_int cap_mask = wi::bit_not (wi::sub (wi::lshift (1, precision), 1)); + widest_int cap_mask = wi::shifted_mask <widest_int> (0, precision, true); adjusted_mask |= cap_mask; if (wi::sext (adjusted_mask, precision) == -1) return set_to_bottom (); diff --git a/gcc/ipa-locality-cloning.cc b/gcc/ipa-locality-cloning.cc new file mode 100644 index 0000000..2684046 --- /dev/null +++ b/gcc/ipa-locality-cloning.cc @@ -0,0 +1,1137 @@ +/* Code locality based function cloning. + Copyright The GNU Toolchain Authors + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +<http://www.gnu.org/licenses/>. */ + +/* This file implements cloning required to improve partitioning of the + callgraph for locality considerations. + + Partitioning for improving code locality. + This pass aims to place frequently executed callchains closer together in + memory to improve performance through improved locality. If any frequent + callchains cannot be placed together because they are already placed + elsewhere, local function clones are created and all callers near to the + clones are redirected to use this copy. + + Locality code placement is done in 2 parts. + 1. IPA pass to be executed after ipa-inline and before ipa-pure-const. + Execute stage prepares the plan to place all nodes into partitions. + 2. WPA Partition stage actually implements the plan. + + Brief overview of the IPA pass: + 1. Create and sort callchains. If PGO is available, use real profile + counts. Otherwise, use a set of heuristics to sort the callchains. + 2. Create a partition plan for the callchains, processing them in the sorted + order. + 1. If a function is unpartitioned, place it in the current partition. + 2. If a function is already placed in a partition away from current + partition as part of another callchain: + Create a local clone in current partition, if cloning criteria is + satisfied. + 3. Redirect any new caller to a local clone if one exists. + Partition size is param controlled to fine tune per program behavior. */ + +#include "config.h" +#define INCLUDE_ALGORITHM +#include "system.h" +#include "coretypes.h" +#include "target.h" +#include "function.h" +#include "tree.h" +#include "alloc-pool.h" +#include "tree-pass.h" +#include "cgraph.h" +#include "symbol-summary.h" +#include "tree-vrp.h" +#include "symtab-thunks.h" +#include "sreal.h" +#include "ipa-cp.h" +#include "ipa-prop.h" +#include "ipa-fnsummary.h" +#include "ipa-modref-tree.h" +#include "ipa-modref.h" +#include "symtab-clones.h" +#include "ipa-locality-cloning.h" + +/* Locality partitions, assigns nodes to partitions. These are used later in + WPA partitioning. */ +vec<locality_partition> locality_partitions; + +/* Map from original node to its latest clone. Gets overwritten whenever a new + clone is created from the same node. */ +hash_map<cgraph_node *, cgraph_node *> node_to_clone; +/* Map from clone to its original node. */ +hash_map<cgraph_node *, cgraph_node *> clone_to_node; + +/* Data structure to hold static heuristics and orders for cgraph_nodes. */ +struct locality_order +{ + cgraph_node *node; + sreal order; + locality_order (cgraph_node *node, sreal order) : node (node), order (order) + {} +}; + +/* Return true if NODE is already in some partition. */ +static inline bool +node_partitioned_p (cgraph_node *node) +{ + return node->aux; +} + +/* Add symbol NODE to partition PART. */ +static void +add_node_to_partition (locality_partition part, cgraph_node *node) +{ + struct cgraph_edge *e; + if (node_partitioned_p (node)) + return; + + part->nodes.safe_push (node); + node->aux = (void *) (uintptr_t) (part->part_id); + + if (!node->alias && node->get_partitioning_class () == SYMBOL_PARTITION) + part->insns += ipa_size_summaries->get (node)->size; + + /* Add all inline clones and callees that are duplicated. */ + for (e = node->callees; e; e = e->next_callee) + if (!e->inline_failed) + add_node_to_partition (part, e->callee); + /* omp declare_variant_alt or transparent_alias with definition or linker + discardable (non-local comdat but not forced and not + used by non-LTO). */ + else if (e->callee->get_partitioning_class () == SYMBOL_DUPLICATE) + add_node_to_partition (part, e->callee); + + /* Add all thunks associated with the function. */ + for (e = node->callers; e; e = e->next_caller) + if (e->caller->thunk && !e->caller->inlined_to) + add_node_to_partition (part, e->caller); + + /* Add all aliases associated with the symbol. */ + struct ipa_ref *ref; + FOR_EACH_ALIAS (node, ref) + if (!ref->referring->transparent_alias) + { + cgraph_node *referring = dyn_cast<cgraph_node *> (ref->referring); + /* Only add function aliases. + Varpool refs are added later in LTO partitioning pass. */ + if (referring) + add_node_to_partition (part, referring); + } + else + { + struct ipa_ref *ref2; + /* We do not need to add transparent aliases if they are not used. + However we must add aliases of transparent aliases if they exist. */ + FOR_EACH_ALIAS (ref->referring, ref2) + { + /* Nested transparent aliases are not permitted. */ + gcc_checking_assert (!ref2->referring->transparent_alias); + cgraph_node *referring = dyn_cast<cgraph_node *> (ref2->referring); + if (referring) + add_node_to_partition (part, referring); + } + } +} + +/* Return TRUE if NODE is in PARTITION. */ +static bool +node_in_partition_p (locality_partition partition, cgraph_node *node) +{ + return ((uintptr_t) (partition->part_id) == (uintptr_t) (node->aux)); +} + +/* Helper function for qsort; to break ties. */ +static int +compare_node_uids (cgraph_node *n1, cgraph_node *n2) +{ + int res = n1->get_uid () - n2->get_uid (); + gcc_assert (res != 0); + return res > 0 ? 1 : -1; +} + +/* Helper function for qsort; sort nodes by order. */ +static int +static_profile_cmp (const void *pa, const void *pb) +{ + const locality_order *a = *static_cast<const locality_order *const *> (pa); + const locality_order *b = *static_cast<const locality_order *const *> (pb); + /* Ascending order. */ + if (b->order < a->order) + return 1; + if (b->order > a->order) + return -1; + return compare_node_uids (a->node, b->node); +} + +/* Helper function for qsort; sort nodes by profile count. */ +static int +compare_edge_profile_counts (const void *pa, const void *pb) +{ + const locality_order *a = *static_cast<const locality_order *const *> (pa); + const locality_order *b = *static_cast<const locality_order *const *> (pb); + + profile_count cnt1 = a->node->count.ipa (); + profile_count cnt2 = b->node->count.ipa (); + if (!cnt1.compatible_p (cnt2)) + return static_profile_cmp (pa, pb); + + if (cnt1 < cnt2) + return 1; + if (cnt1 > cnt2) + return -1; + return static_profile_cmp (pa, pb); +} + +/* Create and return a new partition and increment NPARTITIONS. */ + +static locality_partition +create_partition (int &npartitions) +{ + locality_partition part = XCNEW (struct locality_partition_def); + npartitions++; + part->part_id = npartitions; + part->nodes.create (1); + part->insns = 0; + locality_partitions.safe_push (part); + return part; +} + +/* Structure for holding profile count information of callers of a node. */ +struct profile_stats +{ + /* Sum of non-recursive call counts. */ + profile_count nonrec_count; + + /* Sum of recursive call counts. */ + profile_count rec_count; + + /* If non-NULL, this node is the target of alias or thunk and calls from this + should be count in rec_count. */ + cgraph_node *target; +}; + +/* Initialize fields of STATS. */ +static inline void +init_profile_stats (profile_stats *stats, cgraph_node *target = NULL) +{ + stats->nonrec_count = profile_count::zero (); + stats->rec_count = profile_count::zero (); + stats->target = target; +} + +/* Helper function of to accumulate call counts. */ +static bool +accumulate_profile_counts_after_cloning (cgraph_node *node, void *data) +{ + struct profile_stats *stats = (struct profile_stats *) data; + for (cgraph_edge *e = node->callers; e; e = e->next_caller) + { + if (!e->count.initialized_p ()) + continue; + + if (e->caller == stats->target) + stats->rec_count += e->count.ipa (); + else + stats->nonrec_count += e->count.ipa (); + } + return false; +} + +/* NEW_NODE is a previously created clone of ORIG_NODE already present in + current partition. EDGES contains newly redirected edges to NEW_NODE. + Adjust profile information for both nodes and the edge. */ + +static void +adjust_profile_info_for_non_self_rec_edges (auto_vec<cgraph_edge *> &edges, + cgraph_node *new_node, + cgraph_node *orig_node) +{ + profile_count orig_node_count = orig_node->count.ipa (); + profile_count edge_count = profile_count::zero (); + profile_count final_new_count = profile_count::zero (); + profile_count final_orig_count = profile_count::zero (); + + for (unsigned i = 0; i < edges.length (); ++i) + if (edges[i]->count.initialized_p ()) + edge_count += edges[i]->count.ipa (); + + final_orig_count = orig_node_count - edge_count; + + /* NEW_NODE->count was adjusted for other callers when the clone was + first created. Just add the new edge count. */ + final_new_count = new_node->count + edge_count; + + final_new_count = orig_node_count.combine_with_ipa_count (final_new_count); + orig_node->count = final_orig_count; + new_node->count = final_new_count; + + if (dump_file) + { + fprintf (dump_file, "Adjusting profile information for %s\n", + new_node->dump_asm_name ()); + fprintf (dump_file, "\tOriginal node %s\n", orig_node->dump_asm_name ()); + fprintf (dump_file, "\tOriginal count: "); + orig_node_count.dump (dump_file); + fprintf (dump_file, "\n\tAdjusted original count to: "); + final_orig_count.dump (dump_file); + fprintf (dump_file, "\n\tAdjusted clone count to: "); + final_new_count.dump (dump_file); + fprintf (dump_file, "\n"); + } + + /* Scale all callee edges according to adjusted counts. */ + profile_count orig_node_count_copy = orig_node_count; + profile_count::adjust_for_ipa_scaling (&final_new_count, + &orig_node_count_copy); + for (cgraph_edge *cs = new_node->callees; cs; cs = cs->next_callee) + cs->count = cs->count.apply_scale (final_new_count, orig_node_count_copy); + for (cgraph_edge *cs = new_node->indirect_calls; cs; cs = cs->next_callee) + cs->count = cs->count.apply_scale (final_new_count, orig_node_count_copy); + + profile_count::adjust_for_ipa_scaling (&final_orig_count, &orig_node_count); + for (cgraph_edge *cs = orig_node->callees; cs; cs = cs->next_callee) + cs->count = cs->count.apply_scale (final_orig_count, orig_node_count); + for (cgraph_edge *cs = orig_node->indirect_calls; cs; cs = cs->next_callee) + cs->count = cs->count.apply_scale (final_orig_count, orig_node_count); +} + +/* Adjust profile counts of NEW_NODE and ORIG_NODE, where NEW_NODE is a clone + of OLD_NODE. + Assumes that all eligible edges from current partition so far are redirected + to NEW_NODE and recursive edges are adjusted. */ + +static void +adjust_profile_info (cgraph_node *new_node, cgraph_node *orig_node) +{ + /* If all calls to NEW_NODE are non-recursive, subtract corresponding count + from ORIG_NODE and assign to NEW_NODE, any unexpected remainder stays with + ORIG_NODE. + Recursive calls if present, likely contribute to majority of count; + scale according to redirected callers' count. */ + + profile_count orig_node_count = orig_node->count.ipa (); + profile_stats new_stats, orig_stats; + + init_profile_stats (&new_stats); + init_profile_stats (&orig_stats); + + new_node->call_for_symbol_thunks_and_aliases + (accumulate_profile_counts_after_cloning, &new_stats, false); + orig_node->call_for_symbol_thunks_and_aliases + (accumulate_profile_counts_after_cloning, &orig_stats, false); + + profile_count orig_nonrec_count = orig_stats.nonrec_count; + profile_count orig_rec_count = orig_stats.rec_count; + profile_count new_nonrec_count = new_stats.nonrec_count; + profile_count new_rec_count = new_stats.rec_count; + + profile_count final_new_count = new_nonrec_count; + profile_count final_orig_count = profile_count::zero (); + + /* All calls to NEW_NODE are non-recursive or recursive calls have + zero count. */ + if (!new_rec_count.nonzero_p ()) + final_orig_count = orig_node_count - new_nonrec_count; + else + { + /* If ORIG_NODE is externally visible, indirect calls or calls from + another part of the code may contribute to the count. + update_profiling_info () from ipa-cp.cc pretends to have an extra + caller to represent the extra counts. */ + if (!orig_node->local) + { + profile_count pretend_count = (orig_node_count - new_nonrec_count - + orig_nonrec_count - orig_rec_count); + orig_nonrec_count += pretend_count; + } + + /* Remaining rec_count is assigned in proportion to clone's non-recursive + count. */ + profile_count rec_count = orig_node_count - new_nonrec_count + - orig_nonrec_count; + profile_count new_rec_scaled + = rec_count.apply_scale (new_nonrec_count, + new_nonrec_count + orig_nonrec_count); + final_new_count += new_rec_scaled; + final_orig_count = orig_node_count - final_new_count; + } + + final_new_count = orig_node_count.combine_with_ipa_count (final_new_count); + new_node->count = final_new_count; + orig_node->count = final_orig_count; + + if (dump_file) + { + fprintf (dump_file, "Adjusting profile information for %s\n", + new_node->dump_asm_name ()); + fprintf (dump_file, "\tOriginal node %s\n", orig_node->dump_asm_name ()); + fprintf (dump_file, "\tOriginal count: "); + orig_node_count.dump (dump_file); + fprintf (dump_file, "\n\tAdjusted original count to: "); + final_orig_count.dump (dump_file); + fprintf (dump_file, "\n\tAdjusted clone count to: "); + final_new_count.dump (dump_file); + fprintf (dump_file, "\n"); + } + + /* Scale all callee edges according to adjusted counts. */ + profile_count orig_node_count_copy = orig_node_count; + profile_count::adjust_for_ipa_scaling (&final_new_count, + &orig_node_count_copy); + for (cgraph_edge *cs = new_node->callees; cs; cs = cs->next_callee) + cs->count = cs->count.apply_scale (final_new_count, orig_node_count_copy); + for (cgraph_edge *cs = new_node->indirect_calls; cs; cs = cs->next_callee) + cs->count = cs->count.apply_scale (final_new_count, orig_node_count_copy); + + profile_count::adjust_for_ipa_scaling (&final_orig_count, &orig_node_count); + for (cgraph_edge *cs = orig_node->callees; cs; cs = cs->next_callee) + cs->count = cs->count.apply_scale (final_orig_count, orig_node_count); + for (cgraph_edge *cs = orig_node->indirect_calls; cs; cs = cs->next_callee) + cs->count = cs->count.apply_scale (final_orig_count, orig_node_count); +} + +/* Return true if EDGE can be safely redirected to another callee. */ +static inline bool +edge_redirectable_p (cgraph_edge *edge, lto_locality_cloning_model cm) +{ + if (cm == LTO_LOCALITY_NON_INTERPOSABLE_CLONING) + { + /* Interposability may change on edge basis. */ + enum availability avail; + avail = edge->callee->get_availability (edge->caller); + if (avail <= AVAIL_INTERPOSABLE) + return false; + } + return true; +} + +/* Create a locality clone of CNODE and redirect all callers present in + PARTITION. + Create a clone dpending on whether CNODE itself is a clone or not. */ + +static cgraph_node * +create_locality_clone (cgraph_node *cnode, + locality_partition partition, int &cl_num, + lto_locality_cloning_model cm) +{ + cgraph_node *cl_node = NULL; + vec<cgraph_edge *> redirect_callers = vNULL; + /* All callers of cnode in current partition are redirected. */ + struct cgraph_edge *edge; + for (edge = cnode->callers; edge; edge = edge->next_caller) + { + struct cgraph_node *caller = edge->caller; + if (node_in_partition_p (partition, caller) && caller->definition + && caller != cnode && edge_redirectable_p (edge, cm)) + redirect_callers.safe_push (edge); + } + + const char *suffix = "locality_clone"; + + tree old_decl = cnode->decl; + tree new_decl = copy_node (old_decl); + + /* Generate a new name for the new version. */ + const char *name = IDENTIFIER_POINTER (DECL_NAME (old_decl)); + DECL_NAME (new_decl) = clone_function_name (name, suffix, cl_num); + SET_DECL_ASSEMBLER_NAME (new_decl, + clone_function_name (old_decl, suffix, cl_num)); + cl_num++; + if (dump_file) + fprintf (dump_file, "\tNew name %s\n", + IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (new_decl))); + + cl_node = cnode->create_clone (new_decl, cnode->count /*profile_count*/, + false /*update_original*/, redirect_callers, + false /*call_duplication_hook*/, + NULL /*new_inlined_to*/, + NULL /*param_adjustments*/, suffix); + + set_new_clone_decl_and_node_flags (cl_node); + + if (cnode->ipa_transforms_to_apply.exists ()) + cl_node->ipa_transforms_to_apply + = cnode->ipa_transforms_to_apply.copy (); + + if (dump_file) + { + fprintf (dump_file, "Cloned Node: %s %s\n", cnode->dump_asm_name (), + cl_node->dump_asm_name ()); + + for (edge = cl_node->callers; edge; edge = edge->next_caller) + fprintf (dump_file, "Redirected callers: %s\n", + edge->caller->dump_asm_name ()); + + for (edge = cl_node->callees; edge; edge = edge->next_callee) + fprintf (dump_file, "Callees of clone: %s %d\n", + edge->callee->dump_asm_name (), edge->frequency ()); + } + return cl_node; +} + +/* Redirect recursive edges of CLONE to correctly point to CLONE. As part of + cloning process, all callee edges of a node are just duplicated but not + redirected. Therefore, these edges still call to original of CLONE. + + For non-inlined CLONEs, NEW_CALLEE == CLONE and ORIG_CALLEE is CLONE's + original node. + + For inlined node, self recursion to CLONE's original same as non-inlined, + additionally, calls to CLONE->inlined_to are also recursive: + NEW_CALLEE == CLONE->inlined_into and + ORIG_CALLEE == original node of CLONE->inlined_into. */ + +static void +adjust_recursive_callees (cgraph_node *clone, cgraph_node *new_callee, + cgraph_node *orig_callee) +{ + cgraph_node *alias = NULL; + for (cgraph_edge *e = clone->callees; e; e = e->next_callee) + { + if (!e->inline_failed) + continue; + + /* Only self-cycle or local alias are handled. */ + cgraph_node *callee = e->callee; + if (callee == orig_callee) + { + cgraph_node **cl = node_to_clone.get (orig_callee); + gcc_assert (cl && *cl == new_callee); + e->redirect_callee_duplicating_thunks (new_callee); + if (dump_file) + fprintf (dump_file, "recursive call from %s to %s orig %s\n", + e->caller->dump_asm_name (), e->callee->dump_asm_name (), + callee->dump_asm_name ()); + } + else if (callee->alias + && e->callee->ultimate_alias_target () == orig_callee) + { + if (!alias) + { + alias = dyn_cast<cgraph_node *> ( + new_callee->noninterposable_alias ()); + } + e->redirect_callee_duplicating_thunks (alias); + if (dump_file) + fprintf (dump_file, "recursive call from %s to %s orig %s\n", + e->caller->dump_asm_name (), e->callee->dump_asm_name (), + callee->dump_asm_name ()); + } + } + new_callee->expand_all_artificial_thunks (); + if (alias) + alias->expand_all_artificial_thunks (); +} + +/* Create clones for CALLER's inlined callees, ORIG_INLINED_TO is the original + node from clone_as_needed () such that new_inlined_to is a clone of it. */ + +static void +inline_clones (cgraph_node *caller, cgraph_node *orig_inlined_to) +{ + struct cgraph_edge *edge; + for (edge = caller->callees; edge; edge = edge->next_callee) + { + struct cgraph_node *callee = edge->callee; + if (edge->inline_failed) + continue; + + if (callee->inlined_to != orig_inlined_to) + continue; + + struct cgraph_node *new_inlined_to, *cl; + if (caller->inlined_to) + new_inlined_to = caller->inlined_to; + else + new_inlined_to = caller; + + cl = callee->create_clone (callee->decl, + edge->count /*profile_count*/, + true /*update_original*/, + vNULL /*redirect_callers*/, + false /*call_duplication_hook*/, + new_inlined_to /*new_inlined_to*/, + NULL /*param_adjustments*/, + "locality_clone" /*suffix*/); + edge->redirect_callee (cl); + + node_to_clone.put (callee, cl); + clone_to_node.put (cl, callee); + + if (callee->thunk) + { + thunk_info *info = thunk_info::get (callee); + *thunk_info::get_create (cl) = *info; + } + + adjust_recursive_callees (cl, new_inlined_to, orig_inlined_to); + adjust_recursive_callees (cl, cl, callee); + if (dump_file) + { + fprintf (dump_file, "Inline cloned\n"); + cl->dump (dump_file); + } + + /* Recursively inline till end of this callchain. */ + inline_clones (cl, orig_inlined_to); + } +} + +/* Clone EDGE->CALLEE if it or a clone of it is not already in PARTITION. + Redirect all callers of EDGE->CALLEE that are in PARTITION, not just the + EDGE. If a clone is already present in PARTITION, redirect all edges from + EDGE->CALLER to EDGE->CALLEE. This is because we only visit one edge per + caller to callee and redirect for all others from there. + + If cloning, also recursively clone inlined functions till the end of the + callchain because inlined clones have 1-1 exclusive copy and edge from + caller to inlined node. + + There are 2 flows possible: + 1. Only redirect + 1.1. cnode is already in current partition - cnode mustn't be a + locality_clone -> nothing to do + 1.2. A clone of cnode is in current partition - find out if it's the + correct clone for edge - must be a locality_clone but the exact same + kind as callee i.e. orig or cp/sra clone, if yes, redirect, else go to #2 + 1.3. Cnode/a clone of cnode is in current partition but caller is inlined + 2. Clone and redirect + 2.1. cnode is original node + 2.2. cnode itself is a clone + Clone inlines + Flavors of edges: + 1. Normal -> orig nodes, locality clones or cp/sra clones + 2. Recursive -> direct recursion + 3. Alias -> recursion via aliasing or as a result of IPA code duplication + 4. Inline -> shouldn't be included in callchain. */ + +static cgraph_node * +clone_node_as_needed (cgraph_edge *edge, locality_partition partition, + int &cl_num, lto_locality_cloning_model cm) +{ + /* suitable_for_locality_cloning_p () currently prohibits cloning aliases due + to potential versioning and materialization issues. Could be enabled in + the future. suitable_for_locality_cloning_p () also checks for + interposability for CNODE but not for edge redirection. */ + struct cgraph_node *cnode = edge->callee; + struct cgraph_node *caller = edge->caller; + + /* If clone of cnode is already in the partition + Get latest clone of cnode. If current partition has cloned cnode, that + clone should be returned. Otherwise, clone from previous partition is + returned + Original node and its clone shouldn't co-exist in current partition + + This is required if callee is partitioned via another edge before caller + was, and we are now visiting caller->callee edge + + 1) a -> b ==> a -> bc1; b was cloned say via d -> bc1, a is orig + 2) ac1 -> b ==> ac1 -> bc1; b was cloned and a was just cloned + 3) a -> bc1 and bc2 present, mustn't happen, b was cloned and a was + redirected without being partitioned first. + Why will we do this again - multiple edges and something's wrong in + partition_callchain () + 4) ac1 -> bc1 ==> ac1 -> bc2; a was cloned and we already got (1) in some + other partition + 5) ac1 -> bc1 but no clone present in this PARTITION. Create from b, not + from bc1? + 6) a -> b; a -> bc0; create new clone, no clone present + 7) ac0 -> b; ac0 -> bc0 same as (6) + 8) a -> bc0 and no clone present, mustn't happen, same as (3) + + Redirect when bc1 is present and: + a -> b or ac -> b or ac -> bc0 */ + + cgraph_node *orig_cnode = cnode; + cgraph_node **o_cnode = clone_to_node.get (cnode); + if (o_cnode) + orig_cnode = *o_cnode; + + cgraph_node **cnode_cl = node_to_clone.get (orig_cnode); + + if (cnode_cl && node_in_partition_p (partition, *cnode_cl)) + { + if (node_in_partition_p (partition, caller)) + { + bool clone_p = false; + auto_vec<cgraph_edge *> redirected_edges; + for (cgraph_edge *ec = caller->callees; ec; ec = ec->next_callee) + if (ec->callee == cnode && edge_redirectable_p (ec, cm)) + { + ec->redirect_callee_duplicating_thunks (*cnode_cl); + clone_p = true; + redirected_edges.safe_push (ec); + if (dump_file) + { + fprintf (dump_file, "clone present %s %s redirecting %s\n", + cnode->dump_asm_name (), + (*cnode_cl)->dump_asm_name (), + caller->dump_asm_name ()); + } + } + if (clone_p) + { + (*cnode_cl)->expand_all_artificial_thunks (); + adjust_profile_info_for_non_self_rec_edges (redirected_edges, + *cnode_cl, cnode); + return NULL; + } + } + } + + /* Create a new clone for a -> b, ac -> b. + For ac -> bc, should be done on bc or b? + bc could be from b_cp/b_sra or b. */ + + if (orig_cnode != cnode) + { + if (dump_file) + fprintf (dump_file, "Clone of clone %s %s\n", cnode->dump_asm_name (), + orig_cnode->dump_asm_name ()); + return NULL; + } + + struct cgraph_node *cloned_node + = create_locality_clone (cnode, partition, cl_num, cm); + + gcc_assert (cloned_node); + if (!cloned_node) + return NULL; + + node_to_clone.put (cnode, cloned_node); + clone_to_node.put (cloned_node, cnode); + + adjust_recursive_callees (cloned_node, cloned_node, cnode); + symtab->call_cgraph_duplication_hooks (cnode, cloned_node); + + adjust_profile_info (cloned_node, cnode); + /* Inline clones are created iff their inlined_to == CNODE. */ + inline_clones (cloned_node, cnode); + + return cloned_node; +} + +/* Accumulate frequency of all edges from EDGE->caller to EDGE->callee. */ + +static sreal +accumulate_incoming_edge_frequency (cgraph_edge *edge) +{ + sreal count = 0; + struct cgraph_edge *e; + for (e = edge->callee->callers; e; e = e->next_caller) + { + /* Make a local decision about all edges for EDGE->caller but not the + other nodes already in the partition. Their edges will be visited + later or may have been visited before and not fit the + cut-off criteria. */ + if (e->caller == edge->caller) + count += e->sreal_frequency (); + } + return count; +} + +/* Determine if EDGE->CALLEE is suitable for cloning. It is assummed that the + callee is not an inlined node. */ + +static bool +suitable_for_locality_cloning_p (cgraph_edge *edge, + lto_locality_cloning_model cm) +{ + cgraph_node *node = edge->callee; + if (!node->versionable) + return false; + + /* Out-of-line locality clones of ipcp or sra clones will be created in this + pass after IPA inline is run. A locality clone has the same function + body and the same updated signature as the ipcp/sra clone. + This fails or asserts based on how the clone is created: + 1. If param_adjustments and tree_map are not recorded for locality clone: + clone materialization (tree_function_versioning ()) fails when + updating signature and remapping calls because clone_of (ipcp/sra + clone) and locality clone differ in param information. + 2. If param_adjustments and tree_map are provided: asserts are triggered + in fnsummary duplication because IPA inline resets some summaries. + + One inelegant solution is to provide param_adjustments and tree_map, and + then set clone_of to ipcp/sra clone's clone_of. However, this sometimes + results in segmentation fault when the compiled program is run. + Disabling clone of clones altogether for now with an aim to resolve this + is future. */ + if (node->clone_of) + return false; + + if (node->alias) + return false; + + if (edge->recursive_p ()) + return false; + + if (!node->definition) + return false; + + /* Don't clone NODE if IPA count of NODE or EDGE is zero. */ + if (!node->count.ipa ().nonzero_p () || !edge->count.ipa ().nonzero_p ()) + return false; + + if (cm == LTO_LOCALITY_NON_INTERPOSABLE_CLONING) + { + /* Interposability may change on edge basis. */ + enum availability avail; + edge->callee->ultimate_alias_target (&avail, edge->caller); + if (avail <= AVAIL_INTERPOSABLE) + return false; + } + + return true; +} + +/* Map from caller to all callees already visited for partitioning. */ +hash_map<cgraph_node *, auto_vec<cgraph_node *> > caller_to_callees; + +/* Partition EDGE->CALLEE into PARTITION or clone if already partitioned and + satisfies cloning criteria such as CLONING_MODEL, REAL_FREQ and SIZE + cut-offs and CLONE_FURTHER_P set by previous caller. */ + +/* callgraph can have multiple caller to callee edges for multiple callsites + For the first such edge, we make decisions about cutoffs and cloning because + we redirect ALL callsites to cloned callee, not just one of them. */ + +static void +partition_callchain (cgraph_edge *edge, locality_partition partition, + bool clone_further_p, + lto_locality_cloning_model cloning_model, + double freq_cutoff, int size, int &cl_num) +{ + /* Aliases are added in the same partition as their targets. + Aliases are not cloned and their callees are not processed separately. */ + cgraph_node *node = edge->callee->ultimate_alias_target (); + cgraph_node *caller = edge->caller; + cgraph_node *caller_node = node, *cl_node = NULL; + + /* Already visited the caller to callee edges. */ + auto_vec<cgraph_node *> &callees = caller_to_callees.get_or_insert (caller); + if (std::find (callees.begin (), callees.end (), node) != callees.end ()) + return; + + callees.safe_push (node); + + if (node->get_partitioning_class () == SYMBOL_PARTITION) + { + if (!node_partitioned_p (node)) + { + add_node_to_partition (partition, node); + if (dump_file) + fprintf (dump_file, "Partitioned node: %s\n", + node->dump_asm_name ()); + } + else if (cloning_model >= LTO_LOCALITY_NON_INTERPOSABLE_CLONING + && !node_in_partition_p (partition, node)) + { + /* Non-inlined node, or alias, already partitioned + If cut-off, don't clone callees but partition unpartitioned + callees. + size is node + inlined nodes. */ + if (clone_further_p) + { + if (!node->alias) + if (ipa_size_summaries->get (node)->size >= size) + clone_further_p = false; + + if (freq_cutoff != 0.0) + { + sreal acc_freq = accumulate_incoming_edge_frequency (edge); + if (acc_freq.to_double () < freq_cutoff) + clone_further_p = false; + } + } + + if (!suitable_for_locality_cloning_p (edge, cloning_model)) + clone_further_p = false; + + if (clone_further_p) + { + /* Try to clone NODE and its inline chain. */ + if (dump_file) + fprintf (dump_file, "Cloning node: %s\n", + node->dump_asm_name ()); + cl_node = clone_node_as_needed (edge, partition, cl_num, + cloning_model); + if (cl_node) + { + add_node_to_partition (partition, cl_node); + caller_node = cl_node; + } + else + caller_node = NULL; + } + } + } + else if (!node->inlined_to) + return; + + if (caller_node) + for (cgraph_edge *e = caller_node->callees; e; e = e->next_callee) + partition_callchain (e, partition, clone_further_p, cloning_model, + freq_cutoff, size, cl_num); +} + +/* Determine whether NODE is an entrypoint to a callchain. */ + +static bool +is_entry_node_p (cgraph_node *node) +{ + /* node->inlined_to is returned as SYMBOL_DUPLICATE. */ + if (node->get_partitioning_class () != SYMBOL_PARTITION) + return false; + + if (!node->callers) + return true; + + for (cgraph_edge *e = node->callers; e; e = e->next_caller) + { + if (! e->recursive_p ()) + return false; + } + if (node->alias + && !is_entry_node_p (node->ultimate_alias_target ())) + return false; + return true; +} + +/* Determine order of all external nodes if PGO profile is available. + Store the order in ORDER. */ + +static bool +locality_determine_ipa_order (auto_vec<locality_order *> *order) +{ + struct cgraph_node *node; + auto_vec<locality_order *> non_comparable_nodes; + FOR_EACH_DEFINED_FUNCTION (node) + if (node->get_partitioning_class () == SYMBOL_PARTITION) + { + if (node->no_reorder) + { + if (dump_file) + fprintf (dump_file, "no reorder %s\n", node->dump_asm_name ()); + return false; + } + else if (is_entry_node_p (node)) + { + profile_count pcnt = node->count.ipa (); + if (!pcnt.initialized_p () || !pcnt.ipa_p ()) + { + sreal cnt = 0; + locality_order *lo = new locality_order (node, cnt); + non_comparable_nodes.safe_push (lo); + continue; + } + sreal count = 0; + struct cgraph_edge *edge; + for (edge = node->callees; edge; edge = edge->next_callee) + { + /* For PGO, frequency is not used in + compare_edge_profile_counts (), it's used only as part of + static profile order. */ + sreal freq = edge->sreal_frequency (); + count += freq; + } + locality_order *cl = new locality_order (node, count); + order->safe_push (cl); + } + } + order->qsort (compare_edge_profile_counts); + for (auto el : non_comparable_nodes) + order->safe_push (el); + return true; +} + +/* Determine order of all external nodes if only static profile is available. + Store the order in ORDER. */ + +static bool +locality_determine_static_order (auto_vec<locality_order *> *order) +{ + struct cgraph_node *node; + FOR_EACH_DEFINED_FUNCTION (node) + if (node->get_partitioning_class () == SYMBOL_PARTITION) + { + if (node->no_reorder) + { + if (dump_file) + fprintf (dump_file, "no reorder %s\n", node->dump_asm_name ()); + return false; + } + else if (is_entry_node_p (node)) + { + sreal count = 0; + struct cgraph_edge *edge; + for (edge = node->callees; edge; edge = edge->next_callee) + { + sreal freq = edge->sreal_frequency (); + count += freq; + } + locality_order *cl = new locality_order (node, count); + order->safe_push (cl); + } + } + order->qsort (static_profile_cmp); + return true; +} + +/* Partitioning for code locality. + 1. Create and sort callchains. If PGO is available, use real profile + counts. Otherwise, use a set of heuristics to sort the callchains. + 2. Partition the external nodes and their callchains in the determined order + 2.1. If !partition, partition, else try and clone if it satisfies cloning + criteria. + 3. Partition all other unpartitioned nodes. */ + +static void +locality_partition_and_clone (int max_locality_partition_size, + lto_locality_cloning_model cloning_model, + int freq_denominator, int size) +{ + locality_partition partition; + int npartitions = 0; + + auto_vec<locality_order *> order; + auto_vec<varpool_node *> varpool_order; + struct cgraph_node *node; + bool order_p; + + int cl_num = 0; + + double real_freq = 0.0; + if (freq_denominator > 0) + real_freq = 1.0 / (double) freq_denominator; + + cgraph_node *n = symtab->first_defined_function (); + if (n && n->count.ipa_p ()) + order_p = locality_determine_ipa_order (&order); + else + order_p = locality_determine_static_order (&order); + if (!order_p) + { + if (dump_file) + { + fprintf (dump_file, "Locality partition: falling back to balanced" + "model\n"); + } + + return; + } + + int64_t partition_size + = max_locality_partition_size + ? max_locality_partition_size : param_max_partition_size; + partition = create_partition (npartitions); + + for (unsigned i = 0; i < order.length (); i++) + { + node = order[i]->node; + if (node_partitioned_p (node)) + continue; + + if (partition->insns > partition_size) + partition = create_partition (npartitions); + if (dump_file) + fprintf (dump_file, "Partition id: %d\n", partition->part_id); + + add_node_to_partition (partition, node); + if (dump_file) + fprintf (dump_file, "Ordered Node: %s\n", node->dump_asm_name ()); + + for (cgraph_edge *edge = node->callees; edge; edge = edge->next_callee) + { + /* Recursively partition the callchain of edge->callee. */ + partition_callchain (edge, partition, true, cloning_model, real_freq, + size, cl_num); + } + } + + for (unsigned i = 0; i < order.length (); i++) + delete order[i]; + order = vNULL; +} + +/* Entry point to locality-clone pass. */ +static int +lc_execute (void) +{ + symtab_node *node; + FOR_EACH_SYMBOL (node) + node->aux = NULL; + + locality_partition_and_clone (param_max_locality_partition_size, + flag_lto_locality_cloning, + param_lto_locality_frequency, + param_lto_locality_size); + + FOR_EACH_SYMBOL (node) + node->aux = NULL; + return 0; +} + +namespace { + +const pass_data pass_data_ipa_locality_clone = { + IPA_PASS, /* type */ + "locality-clone", /* name */ + OPTGROUP_NONE, /* optinfo_flags */ + TV_IPA_LC, /* tv_id */ + 0, /* properties_required */ + 0, /* properties_provided */ + 0, /* properties_destroyed */ + 0, /* todo_flags_start */ + (TODO_dump_symtab | TODO_remove_functions), /* todo_flags_finish */ +}; + +class pass_ipa_locality_cloning : public ipa_opt_pass_d +{ +public: + pass_ipa_locality_cloning (gcc::context *ctxt) + : ipa_opt_pass_d (pass_data_ipa_locality_clone, ctxt, + NULL, /* generate_summary */ + NULL, /* write_summary */ + NULL, /* read_summary */ + NULL, /* write_optimization_summary */ + NULL, /* read_optimization_summary */ + NULL, /* stmt_fixup */ + 0, /* function_transform_todo_flags_start */ + NULL, /* function_transform */ + NULL) /* variable_transform */ + {} + + /* opt_pass methods: */ + virtual bool gate (function *) + { + return (flag_wpa && flag_ipa_reorder_for_locality); + } + + virtual unsigned int execute (function *) { return lc_execute (); } + +}; // class pass_ipa_locality_cloning + +} // namespace + +ipa_opt_pass_d * +make_pass_ipa_locality_cloning (gcc::context *ctxt) +{ + return new pass_ipa_locality_cloning (ctxt); +} diff --git a/gcc/ipa-locality-cloning.h b/gcc/ipa-locality-cloning.h new file mode 100644 index 0000000..591ce57 --- /dev/null +++ b/gcc/ipa-locality-cloning.h @@ -0,0 +1,35 @@ +/* LTO partitioning logic routines. + Copyright The GNU Toolchain Authors + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +<http://www.gnu.org/licenses/>. */ + +#ifndef IPA_LOCALITY_CLONING_H +#define IPA_LOCALITY_CLONING_H + +/* Structure describing locality partitions. */ +struct locality_partition_def +{ + int part_id; + vec<cgraph_node *> nodes; + int insns; +}; + +typedef struct locality_partition_def *locality_partition; + +extern vec<locality_partition> locality_partitions; + +#endif /* IPA_LOCALITY_CLONING_H */ diff --git a/gcc/lto-cgraph.cc b/gcc/lto-cgraph.cc index ac835a4..8439c51 100644 --- a/gcc/lto-cgraph.cc +++ b/gcc/lto-cgraph.cc @@ -229,6 +229,8 @@ lto_set_symtab_encoder_in_partition (lto_symtab_encoder_t encoder, symtab_node *node) { int index = lto_symtab_encoder_encode (encoder, node); + if (dump_file) + fprintf(dump_file, "Node %s, index %d\n", node->asm_name(), index); encoder->nodes[index].in_partition = true; } diff --git a/gcc/lto/ChangeLog b/gcc/lto/ChangeLog index ee53915..4da9ca3 100644 --- a/gcc/lto/ChangeLog +++ b/gcc/lto/ChangeLog @@ -1,3 +1,13 @@ +2025-04-15 Kyrylo Tkachov <ktkachov@nvidia.com> + + * lto-partition.cc (add_node_references_to_partition): Define. + (create_partition): Likewise. + (lto_locality_map): Likewise. + (lto_promote_cross_file_statics): Add extra dumping. + * lto-partition.h (lto_locality_map): Declare prototype. + * lto.cc (do_whole_program_analysis): Handle + flag_ipa_reorder_for_locality. + 2025-02-28 Richard Biener <rguenther@suse.de> PR lto/91299 diff --git a/gcc/lto/lto-partition.cc b/gcc/lto/lto-partition.cc index 3046951..c7e69ee 100644 --- a/gcc/lto/lto-partition.cc +++ b/gcc/lto/lto-partition.cc @@ -37,6 +37,7 @@ along with GCC; see the file COPYING3. If not see #include "ipa-prop.h" #include "ipa-fnsummary.h" #include "lto-partition.h" +#include "ipa-locality-cloning.h" #include <limits> @@ -1418,6 +1419,126 @@ lto_balanced_map (int n_lto_partitions, int max_partition_size) } } +/* Add all references of NODE into PARTITION. */ + +static void +add_node_references_to_partition (ltrans_partition partition, symtab_node *node) +{ + struct ipa_ref *ref = NULL; + varpool_node *vnode; + for (int j = 0; node->iterate_reference (j, ref); j++) + if (is_a <varpool_node *> (ref->referred)) + { + vnode = dyn_cast <varpool_node *> (ref->referred); + if (!symbol_partitioned_p (vnode) + && !vnode->no_reorder + && vnode->get_partitioning_class () == SYMBOL_PARTITION) + { + add_symbol_to_partition (partition, vnode); + if (dump_file) + fprintf (dump_file, "Varpool Node: %s\n", vnode->dump_asm_name ()); + add_node_references_to_partition (partition, vnode); + } + } + + for (int j = 0; node->iterate_referring (j, ref); j++) + if (is_a <varpool_node *> (ref->referring)) + { + vnode = dyn_cast <varpool_node *> (ref->referring); + gcc_assert (vnode->definition); + if (!symbol_partitioned_p (vnode) + && !vnode->no_reorder + && !vnode->can_remove_if_no_refs_p () + && vnode->get_partitioning_class () == SYMBOL_PARTITION) + { + add_symbol_to_partition (partition, vnode); + if (dump_file) + fprintf (dump_file, "Varpool Node: %s\n", vnode->dump_asm_name ()); + add_node_references_to_partition (partition, vnode); + } + } + if (cgraph_node *cnode = dyn_cast <cgraph_node *> (node)) + { + struct cgraph_edge *e; + + /* Add all inline clones and callees that are duplicated. */ + for (e = cnode->callees; e; e = e->next_callee) + if (e->callee->get_partitioning_class () == SYMBOL_DUPLICATE) + add_node_references_to_partition (partition, e->callee); + + /* Add all thunks associated with the function. */ + for (e = cnode->callers; e; e = e->next_caller) + if (e->caller->thunk && !e->caller->inlined_to) + add_node_references_to_partition (partition, e->caller); + } + +} + +/* Create and return the created partition of name NAME. */ + +static ltrans_partition +create_partition (int &npartitions, const char *name) +{ + npartitions++; + return new_partition (name); +} + +/* Partitioning for code locality. + The partitioning plan (and prerequisite cloning) will have been done by the + IPA locality cloning pass. This function just implements that plan by + assigning those partitions to ltrans_parititions. */ + +void +lto_locality_map (int max_partition_size) +{ + symtab_node *snode; + int npartitions = 0; + + auto_vec<varpool_node *> varpool_order; + struct cgraph_node *node; + + if (locality_partitions.length () == 0) + { + if (dump_file) + { + fprintf (dump_file, "Locality partition: falling back to balanced " + "model\n"); + } + lto_balanced_map (param_lto_partitions, param_max_partition_size); + return; + } + ltrans_partition partition = nullptr; + for (auto part : locality_partitions) + { + partition = create_partition (npartitions, ""); + for (unsigned j = 0; j < part->nodes.length (); j++) + { + node = part->nodes[j]; + if (symbol_partitioned_p (node)) + continue; + + add_symbol_to_partition (partition, node); + add_node_references_to_partition (partition, node); + } + } + + int64_t partition_size = max_partition_size; + /* All other unpartitioned symbols. */ + FOR_EACH_SYMBOL (snode) + { + if (snode->get_partitioning_class () == SYMBOL_PARTITION + && !symbol_partitioned_p (snode)) + { + if (partition->insns > partition_size) + partition = create_partition (npartitions, ""); + + add_symbol_to_partition (partition, snode); + if (dump_file) + fprintf (dump_file, "Un-ordered Node: %s\n", snode->dump_asm_name ()); + } + } +} + /* Return true if we must not change the name of the NODE. The name as extracted from the corresponding decl should be passed in NAME. */ @@ -1732,7 +1853,12 @@ lto_promote_cross_file_statics (void) { ltrans_partition part = ltrans_partitions[i]; + if (dump_file) + fprintf (dump_file, "lto_promote_cross_file_statics for part %s %p\n", + part->name, (void *)part->encoder); part->encoder = compute_ltrans_boundary (part->encoder); + if (dump_file) + fprintf (dump_file, "new encoder %p\n", (void *)part->encoder); } lto_clone_numbers = new hash_map<const char *, unsigned>; diff --git a/gcc/lto/lto-partition.h b/gcc/lto/lto-partition.h index 38b3f1e..a6a4195 100644 --- a/gcc/lto/lto-partition.h +++ b/gcc/lto/lto-partition.h @@ -37,6 +37,7 @@ void lto_1_to_1_map (void); void lto_max_map (void); void lto_cache_map (int, int); void lto_balanced_map (int, int); +void lto_locality_map (int); void lto_promote_cross_file_statics (void); void free_ltrans_partitions (void); void lto_promote_statics_nonwpa (void); diff --git a/gcc/lto/lto.cc b/gcc/lto/lto.cc index 18ca475..183634f 100644 --- a/gcc/lto/lto.cc +++ b/gcc/lto/lto.cc @@ -547,7 +547,9 @@ do_whole_program_analysis (void) symtab_node::checking_verify_symtab_nodes (); bitmap_obstack_release (NULL); - if (flag_lto_partition == LTO_PARTITION_1TO1) + if (flag_ipa_reorder_for_locality) + lto_locality_map (param_max_locality_partition_size); + else if (flag_lto_partition == LTO_PARTITION_1TO1) lto_1_to_1_map (); else if (flag_lto_partition == LTO_PARTITION_MAX) lto_max_map (); diff --git a/gcc/opts.cc b/gcc/opts.cc index 80c7a97..5e7b77d 100644 --- a/gcc/opts.cc +++ b/gcc/opts.cc @@ -1037,6 +1037,25 @@ report_conflicting_sanitizer_options (struct gcc_options *opts, location_t loc, } } +/* Validate from OPTS and OPTS_SET that when -fipa-reorder-for-locality is + enabled no explicit -flto-partition is also passed as the locality cloning + pass uses its own partitioning scheme. */ + +static void +validate_ipa_reorder_locality_lto_partition (struct gcc_options *opts, + struct gcc_options *opts_set) +{ + static bool validated_p = false; + + if (opts->x_flag_lto_partition != LTO_PARTITION_DEFAULT) + { + if (opts_set->x_flag_ipa_reorder_for_locality && !validated_p) + error ("%<-fipa-reorder-for-locality%> is incompatible with" + " an explicit %qs option", "-flto-partition"); + } + validated_p = true; +} + /* After all options at LOC have been read into OPTS and OPTS_SET, finalize settings of those options and diagnose incompatible combinations. */ @@ -1249,6 +1268,10 @@ finish_options (struct gcc_options *opts, struct gcc_options *opts_set, if (opts->x_flag_reorder_blocks_and_partition) SET_OPTION_IF_UNSET (opts, opts_set, flag_reorder_functions, 1); + validate_ipa_reorder_locality_lto_partition (opts, opts_set); + if (opts_set->x_flag_lto_partition != LTO_PARTITION_DEFAULT) + opts_set->x_flag_lto_partition = opts->x_flag_lto_partition = LTO_PARTITION_BALANCED; + /* The -gsplit-dwarf option requires -ggnu-pubnames. */ if (opts->x_dwarf_split_debug_info) opts->x_debug_generate_pub_sections = 2; diff --git a/gcc/params.opt b/gcc/params.opt index 422d082..a2b606f 100644 --- a/gcc/params.opt +++ b/gcc/params.opt @@ -469,6 +469,33 @@ Minimal size of a partition for LTO (in estimated instructions). Common Joined UInteger Var(param_lto_partitions) Init(128) IntegerRange(1, 65536) Param Number of partitions the program should be split to. +Enum +Name(lto_locality_cloning_model) Type(enum lto_locality_cloning_model) UnknownError(unknown LTO partitioning model %qs) + +EnumValue +Enum(lto_locality_cloning_model) String(no) Value(LTO_LOCALITY_NO_CLONING) + +EnumValue +Enum(lto_locality_cloning_model) String(non_interposable) Value(LTO_LOCALITY_NON_INTERPOSABLE_CLONING) + +EnumValue +Enum(lto_locality_cloning_model) String(maximal) Value(LTO_LOCALITY_MAXIMAL_CLONING) + +-param=lto-partition-locality-cloning= +Common Joined RejectNegative Enum(lto_locality_cloning_model) Var(flag_lto_locality_cloning) Init(LTO_LOCALITY_MAXIMAL_CLONING) Optimization + +-param=lto-partition-locality-frequency-cutoff= +Common Joined UInteger Var(param_lto_locality_frequency) Init(1) IntegerRange(0, 65536) Param Optimization +The denominator n of fraction 1/n of the execution frequency of callee to be cloned for a particular caller. Special value of 0 dictates to always clone without a cut-off. + +-param=lto-partition-locality-size-cutoff= +Common Joined UInteger Var(param_lto_locality_size) Init(1000) IntegerRange(1, 65536) Param Optimization +Size cut-off for callee including inlined calls to be cloned for a particular caller. + +-param=lto-max-locality-partition= +Common Joined UInteger Var(param_max_locality_partition_size) Init(1000000) Param +Maximal size of a locality partition for LTO (in estimated instructions). Value of 0 results in default value being used. + -param=max-average-unrolled-insns= Common Joined UInteger Var(param_max_average_unrolled_insns) Init(80) Param Optimization The maximum number of instructions to consider to unroll in a loop on average. diff --git a/gcc/passes.def b/gcc/passes.def index 9fd85a3..3b25105 100644 --- a/gcc/passes.def +++ b/gcc/passes.def @@ -162,6 +162,7 @@ along with GCC; see the file COPYING3. If not see NEXT_PASS (pass_ipa_sra); NEXT_PASS (pass_ipa_fn_summary); NEXT_PASS (pass_ipa_inline); + NEXT_PASS (pass_ipa_locality_cloning); NEXT_PASS (pass_ipa_pure_const); NEXT_PASS (pass_ipa_modref); NEXT_PASS (pass_ipa_free_fn_summary, false /* small_p */); diff --git a/gcc/sanitizer.def b/gcc/sanitizer.def index 4b7c9dc..c5a9c2d 100644 --- a/gcc/sanitizer.def +++ b/gcc/sanitizer.def @@ -247,7 +247,7 @@ DEF_SANITIZER_BUILTIN(BUILT_IN_TSAN_INIT, "__tsan_init", DEF_SANITIZER_BUILTIN(BUILT_IN_TSAN_FUNC_ENTRY, "__tsan_func_entry", BT_FN_VOID_PTR, ATTR_NOTHROW_LEAF_LIST) DEF_SANITIZER_BUILTIN(BUILT_IN_TSAN_FUNC_EXIT, "__tsan_func_exit", - BT_FN_VOID_PTR, ATTR_NOTHROW_LEAF_LIST) + BT_FN_VOID, ATTR_NOTHROW_LEAF_LIST) DEF_SANITIZER_BUILTIN(BUILT_IN_TSAN_VPTR_UPDATE, "__tsan_vptr_update", BT_FN_VOID_PTR_PTR, ATTR_NOTHROW_LEAF_LIST) DEF_SANITIZER_BUILTIN(BUILT_IN_TSAN_READ1, "__tsan_read1", diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index a06b2e7..822c126 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,103 @@ +2025-04-15 Iain Buclaw <ibuclaw@gdcproject.org> + + PR d/119826 + * gdc.dg/debug/imports/pr119826b.d: New test. + * gdc.dg/debug/pr119826.d: New test. + +2025-04-15 Nathaniel Shead <nathanieloshead@gmail.com> + + PR c++/119755 + * g++.dg/modules/lambda-10_a.H: New test. + * g++.dg/modules/lambda-10_b.C: New test. + +2025-04-15 Jakub Jelinek <jakub@redhat.com> + + * gcc.dg/completion-2.c: Expect also -flto-partition=default line. + +2025-04-15 Qing Zhao <qing.zhao@oracle.com> + + PR c/119717 + * gcc.dg/pr119717.c: New test. + +2025-04-15 H.J. Lu <hjl.tools@gmail.com> + + PR target/119784 + * gcc.target/i386/apx-interrupt-1.c: Expect 31 .cfi_restore + directives. + +2025-04-15 Vineet Gupta <vineetg@rivosinc.com> + + PR target/119533 + * go.dg/pr119533-riscv.go: New test. + * go.dg/pr119533-riscv-2.go: New test. + +2025-04-15 Robin Dapp <rdapp@ventanamicro.com> + + PR target/119547 + * gcc.target/riscv/rvv/vsetvl/avl_single-68.c: xfail. + * g++.target/riscv/rvv/autovec/pr119547.C: New test. + * g++.target/riscv/rvv/autovec/pr119547-2.C: New test. + * gcc.target/riscv/rvv/vsetvl/vlmax_switch_vtype-10.c: Adjust. + +2025-04-15 Tobias Burnus <tburnus@baylibre.com> + + * gfortran.dg/gomp/map-alloc-comp-1.f90: Remove dg-error. + * gfortran.dg/gomp/polymorphic-mapping-2.f90: Update warn wording. + * gfortran.dg/gomp/polymorphic-mapping.f90: Change expected + diagnostic; some tests moved to ... + * gfortran.dg/gomp/polymorphic-mapping-1.f90: ... here as new test. + * gfortran.dg/gomp/polymorphic-mapping-3.f90: New test. + * gfortran.dg/gomp/polymorphic-mapping-4.f90: New test. + * gfortran.dg/gomp/polymorphic-mapping-5.f90: New test. + +2025-04-15 Martin Jambor <mjambor@suse.cz> + Jakub Jelinek <jakub@redhat.com> + + PR ipa/119803 + * gcc.dg/ipa/pr119803.c: New test. + +2025-04-15 Iain Buclaw <ibuclaw@gdcproject.org> + + PR d/119799 + * gdc.dg/import-c/pr119799.d: New test. + * gdc.dg/import-c/pr119799c.c: New test. + +2025-04-15 Patrick Palka <ppalka@redhat.com> + + PR c++/119807 + PR c++/112288 + * g++.dg/template/friend86.C: New test. + * g++.dg/template/friend87.C: New test. + +2025-04-15 Iain Buclaw <ibuclaw@gdcproject.org> + + PR d/119817 + * gdc.dg/debug/imports/m119817/a.d: New test. + * gdc.dg/debug/imports/m119817/b.d: New test. + * gdc.dg/debug/imports/m119817/package.d: New test. + * gdc.dg/debug/pr119817.d: New test. + +2025-04-15 Jakub Jelinek <jakub@redhat.com> + + PR sanitizer/119801 + * c-c++-common/tsan/pr119801.c: New test. + +2025-04-15 Jonathan Yong <10walls@gmail.com> + + * gcc.dg/Wbuiltin-declaration-mismatch-4.c: Make diagnostic + accept long long. + +2025-04-15 Jakub Jelinek <jakub@redhat.com> + + PR ipa/119318 + * gcc.dg/ipa/pr119318.c: Remove dg-additional-options, add -w to + dg-options. + +2025-04-15 Jason Merrill <jason@redhat.com> + + PR c++/113835 + * g++.dg/cpp2a/constexpr-vector1.C: New test. + 2025-04-14 Thomas Schwinge <tschwinge@baylibre.com> PR target/118794 diff --git a/gcc/testsuite/c-c++-common/tsan/pr119801.c b/gcc/testsuite/c-c++-common/tsan/pr119801.c new file mode 100644 index 0000000..d3a6bb4 --- /dev/null +++ b/gcc/testsuite/c-c++-common/tsan/pr119801.c @@ -0,0 +1,24 @@ +/* PR sanitizer/119801 */ +/* { dg-do compile } */ +/* { dg-options "-fsanitize=thread" } */ + +[[gnu::noipa]] int +bar (int *p) +{ + return ++*p; +} + +int +foo (int *p) +{ + ++*p; + [[gnu::musttail]] return bar (p); +} + +[[gnu::noinline]] int +baz (int x) +{ + if (x < 10) + return x; + [[gnu::musttail]] return baz (x - 2); +} diff --git a/gcc/testsuite/g++.dg/modules/lambda-10_a.H b/gcc/testsuite/g++.dg/modules/lambda-10_a.H new file mode 100644 index 0000000..1ad1a80 --- /dev/null +++ b/gcc/testsuite/g++.dg/modules/lambda-10_a.H @@ -0,0 +1,17 @@ +// PR c++/119755 +// { dg-additional-options "-fmodule-header" } +// { dg-module-cmi {} } + +template <typename _Out> void format(_Out) { + constexpr int __term = 1; + [&] { __term; }; + [&] { const int outer = __term; { __term; } }; + [&]() noexcept { __term; }; + [&]() noexcept { const int outer = __term; { __term; } }; + [&](auto) { int n[__term]; }(0); + [&](auto) noexcept { int n[__term]; }(0); +} + +inline void vformat() { + format(0); +} diff --git a/gcc/testsuite/g++.dg/modules/lambda-10_b.C b/gcc/testsuite/g++.dg/modules/lambda-10_b.C new file mode 100644 index 0000000..3556bce --- /dev/null +++ b/gcc/testsuite/g++.dg/modules/lambda-10_b.C @@ -0,0 +1,7 @@ +// PR c++/119755 +// { dg-additional-options "-fmodules" } + +import "lambda-10_a.H"; +int main() { + vformat(); +} diff --git a/gcc/testsuite/g++.dg/template/friend86.C b/gcc/testsuite/g++.dg/template/friend86.C new file mode 100644 index 0000000..9e2c1af --- /dev/null +++ b/gcc/testsuite/g++.dg/template/friend86.C @@ -0,0 +1,25 @@ +// PR c++/119807 +// { dg-do run } + +template<int N> +struct A { + template<class T> friend int f(A<N>, T); +}; + +template struct A<0>; +template struct A<1>; + +int main() { + A<0> x; + A<1> y; + if (f(x, true) != 0) __builtin_abort(); + if (f(y, true) != 1) __builtin_abort(); +} + +template<int N> +struct B { + template<class T> friend int f(A<N>, T) { return N; } +}; + +template struct B<0>; +template struct B<1>; diff --git a/gcc/testsuite/g++.dg/template/friend87.C b/gcc/testsuite/g++.dg/template/friend87.C new file mode 100644 index 0000000..94c0dfc --- /dev/null +++ b/gcc/testsuite/g++.dg/template/friend87.C @@ -0,0 +1,42 @@ +// PR c++/119807 +// { dg-do compile { target c++20 } } + +using size_t = decltype(sizeof(0)); + +template<auto tag, size_t current> +struct CounterReader { + template<typename> + friend auto counterFlag(CounterReader<tag, current>) noexcept; +}; + +template<auto tag, size_t current> +struct CounterWriter { + static constexpr size_t value = current; + + template<typename> + friend auto counterFlag(CounterReader<tag, current>) noexcept {} +}; + +template<auto tag, auto unique, size_t current = 0, size_t mask = size_t(1) << (sizeof(size_t) * 8 - 1)> +[[nodiscard]] constexpr size_t counterAdvance() noexcept { + if constexpr (!mask) { + return CounterWriter<tag, current + 1>::value; + } else if constexpr (requires { counterFlag<void>(CounterReader<tag, current | mask>()); }) { + return counterAdvance<tag, unique, current | mask, (mask >> 1)>(); + } + else { + return counterAdvance<tag, unique, current, (mask >> 1)>(); + } +} + +constexpr auto defaultCounterTag = [] {}; + +template<auto tag = defaultCounterTag, auto unique = [] {}> +constexpr size_t counter() noexcept { + return counterAdvance<tag, unique>(); +} + +int main() { + static_assert(counter() == 1); + static_assert(counter() == 2); +} diff --git a/gcc/testsuite/g++.target/riscv/rvv/autovec/pr119547-2.C b/gcc/testsuite/g++.target/riscv/rvv/autovec/pr119547-2.C new file mode 100644 index 0000000..1b98d3d --- /dev/null +++ b/gcc/testsuite/g++.target/riscv/rvv/autovec/pr119547-2.C @@ -0,0 +1,212 @@ +/* { dg-do run { target rv64 } } */ +/* { dg-require-effective-target riscv_v_ok } */ +/* { dg-options "-O3 -march=rv64gcv -mabi=lp64d --param=logical-op-non-short-circuit=0" } */ + +#include <riscv_vector.h> + +using v_uint8 = vuint8m2_t; +using v_int8 = vint8m2_t; +using v_uint16 = vuint16m2_t; +using v_int16 = vint16m2_t; +using v_uint32 = vuint32m2_t; +using v_int32 = vint32m2_t; +using v_uint64 = vuint64m2_t; +using v_int64 = vint64m2_t; +using v_float32 = vfloat32m2_t; +using v_float64 = vfloat64m2_t; + +using uchar = unsigned char; +using schar = signed char; +using ushort = unsigned short; +using uint = unsigned int; +using uint64 = unsigned long int; +using int64 = long int; + +struct Size +{ + int width; + int height; +}; + +template <class T> struct VTraits; + +template <> struct VTraits<vint32m1_t> +{ + static inline int vlanes () { return __riscv_vsetvlmax_e32m1 (); } + using lane_type = int32_t; + static const int max_nlanes = 1024 / 32 * 2; +}; +template <> struct VTraits<vint32m2_t> +{ + static inline int vlanes () { return __riscv_vsetvlmax_e32m2 (); } + using lane_type = int32_t; + static const int max_nlanes = 1024 / 32 * 2; +}; +template <> struct VTraits<vint32m4_t> +{ + static inline int vlanes () { return __riscv_vsetvlmax_e32m4 (); } + using lane_type = int32_t; + static const int max_nlanes = 1024 / 32 * 2; +}; +template <> struct VTraits<vint32m8_t> +{ + static inline int vlanes () { return __riscv_vsetvlmax_e32m8 (); } + using lane_type = int32_t; + static const int max_nlanes = 1024 / 32 * 2; +}; + +template <> struct VTraits<vfloat64m1_t> +{ + static inline int vlanes () { return __riscv_vsetvlmax_e64m1 (); } + using lane_type = double; + static const int max_nlanes = 1024 / 64 * 2; +}; +template <> struct VTraits<vfloat64m2_t> +{ + static inline int vlanes () { return __riscv_vsetvlmax_e64m2 (); } + using lane_type = double; + static const int max_nlanes = 1024 / 64 * 2; +}; +template <> struct VTraits<vfloat64m4_t> +{ + static inline int vlanes () { return __riscv_vsetvlmax_e64m4 (); } + using lane_type = double; + static const int max_nlanes = 1024 / 64 * 2; +}; +template <> struct VTraits<vfloat64m8_t> +{ + static inline int vlanes () { return __riscv_vsetvlmax_e64m8 (); } + using lane_type = double; + static const int max_nlanes = 1024 / 64 * 2; +}; + +static inline v_float64 +v_setall_f64 (double v) +{ + return __riscv_vfmv_v_f_f64m2 (v, VTraits<v_float64>::vlanes ()); +} +static inline v_float64 +vx_setall_f64 (double v) +{ + return v_setall_f64 (v); +} + +inline v_int32 +v_load_expand_q (const schar *ptr) +{ + return __riscv_vwcvt_x ( + __riscv_vwcvt_x (__riscv_vle8_v_i8mf2 (ptr, VTraits<v_int32>::vlanes ()), + VTraits<v_int32>::vlanes ()), + VTraits<v_int32>::vlanes ()); +} + +static inline v_int32 +vx_load_expand_q (const schar *ptr) +{ + return v_load_expand_q (ptr); +} + +inline v_float64 +v_cvt_f64 (const v_int32 &a) +{ + return __riscv_vget_f64m2 (__riscv_vfwcvt_f (a, VTraits<v_int32>::vlanes ()), + 0); +} + +inline v_float64 +v_cvt_f64_high (const v_int32 &a) +{ + return __riscv_vget_f64m2 (__riscv_vfwcvt_f (a, VTraits<v_int32>::vlanes ()), + 1); +} + +inline void +v_store (double *ptr, const v_float64 &a) +{ + __riscv_vse64 (ptr, a, VTraits<v_float64>::vlanes ()); +} + +static inline void +v_store_pair_as (double *ptr, const v_float64 &a, const v_float64 &b) +{ + v_store (ptr, a); + v_store (ptr + VTraits<v_float64>::vlanes (), b); +} + +static inline void +vx_load_pair_as (const schar *ptr, v_float64 &a, v_float64 &b) +{ + v_int32 v0 = vx_load_expand_q (ptr); + a = v_cvt_f64 (v0); + b = v_cvt_f64_high (v0); +} + +inline v_float64 +v_fma (const v_float64 &a, const v_float64 &b, const v_float64 &c) +{ + return __riscv_vfmacc_vv_f64m2 (c, a, b, VTraits<v_float64>::vlanes ()); +} + +template <typename _Tp> +static inline _Tp +saturate_cast (double v) +{ + return _Tp (v); +} + +template <typename _Ts, typename _Td> +__attribute__ ((noipa)) void +cvt_64f (const _Ts *src, size_t sstep, _Td *dst, size_t dstep, Size size, + double a, double b) +{ + v_float64 va = vx_setall_f64 (a), vb = vx_setall_f64 (b); + const int VECSZ = VTraits<v_float64>::vlanes () * 2; + + sstep /= sizeof (src[0]); + dstep /= sizeof (dst[0]); + + for (int i = 0; i < size.height; i++, src += sstep, dst += dstep) + { + int j = 0; + + for (; j < size.width; j += VECSZ) + { + if (j > size.width - VECSZ) + { + if (j == 0 || src == (_Ts *) dst) + break; + j = size.width - VECSZ; + } + v_float64 v0, v1; + vx_load_pair_as (src + j, v0, v1); + v0 = v_fma (v0, va, vb); + v1 = v_fma (v1, va, vb); + v_store_pair_as (dst + j, v0, v1); + } + + for (; j < size.width; j++) + dst[j] = saturate_cast<_Td> (src[j] * a + b); + } +} + +void +__attribute__ ((noipa)) +cvtScale8s64f (const uchar *src_, size_t sstep, const uchar *, size_t, + uchar *dst_, size_t dstep, Size size, void *scale_) +{ + const schar *src = (const schar *) src_; + double *dst = (double *) dst_; + double *scale = (double *) scale_; + cvt_64f (src, sstep, dst, dstep, size, (double) scale[0], (double) scale[1]); +} + +int main () +{ + uchar src[1024]; + uchar dst[1024]; + + double scale[2] = {2.0, 3.0}; + Size size {4, 1}; + + cvtScale8s64f (src, 4, NULL, 0, dst, 32, size, (void *)scale); +} diff --git a/gcc/testsuite/g++.target/riscv/rvv/autovec/pr119547.C b/gcc/testsuite/g++.target/riscv/rvv/autovec/pr119547.C new file mode 100644 index 0000000..bac0fb1 --- /dev/null +++ b/gcc/testsuite/g++.target/riscv/rvv/autovec/pr119547.C @@ -0,0 +1,82 @@ +/* { dg-do run { target rv64 } } */ +/* { dg-require-effective-target riscv_v_ok } */ +/* { dg-options "-O3 -march=rv64gcv -mabi=lp64d --param=logical-op-non-short-circuit=0" } */ + +#include <riscv_vector.h> +using v_int32 = vint32m2_t; +using v_float64 = vfloat64m2_t; +struct Size +{ + int width; + int height; +}; +template <class> struct VTraits +{ + static int vlanes () { return __riscv_vsetvlmax_e32m2 (); } +}; +v_int32 +v_load_expand_q (const signed char *ptr) +{ + return __riscv_vwcvt_x ( + __riscv_vwcvt_x (__riscv_vle8_v_i8mf2 (ptr, VTraits<v_int32>::vlanes ()), + VTraits<v_int32>::vlanes ()), + VTraits<v_int32>::vlanes ()); +} +v_float64 +v_cvt_f64_high (v_int32 a) +{ + return __riscv_vget_f64m2 (__riscv_vfwcvt_f (a, VTraits<v_int32>::vlanes ()), + 1); +} +void +v_store (double *ptr, v_float64 a) +{ + __riscv_vse64 (ptr, a, __riscv_vsetvlmax_e64m2 ()); +} +void +v_store_pair_as (double *ptr, v_float64 b) +{ + v_store (ptr, b); +} +void +vx_load_pair_as (const signed char *ptr, v_float64, v_float64 &b) +{ + v_int32 v0; + b = v_cvt_f64_high (v0); +}; +void +cvt_64f (const signed char *src, double *dst, Size size) +{ + int VECSZ = __riscv_vsetvlmax_e64m2 (); + for (int i; i < size.height; i++) + { + int j; + for (;; j += VECSZ) + { + if (j > -VECSZ) + if (j == 0 || dst) + break; + v_float64 v0, v1; + vx_load_pair_as (src, v0, v1); + v_store_pair_as (dst, v1); + } + for (; j < size.width; j++) + dst[j] = (src[j]); + } +} +void +cvtScale8s64f (unsigned char *src_, unsigned char *dst_, + size_t, Size size, void *) +{ + signed char src; + double dst = *dst_; + cvt_64f (&src, &dst, size); +} +int main () +{ + unsigned char src[1]; + unsigned char dst[1024]; + double scale[1]; + Size size{4, 1}; + cvtScale8s64f (src, dst, 32, size, scale); +} diff --git a/gcc/testsuite/gcc.dg/Wbuiltin-declaration-mismatch-4.c b/gcc/testsuite/gcc.dg/Wbuiltin-declaration-mismatch-4.c index c48fe5f..09aaaa6 100644 --- a/gcc/testsuite/gcc.dg/Wbuiltin-declaration-mismatch-4.c +++ b/gcc/testsuite/gcc.dg/Wbuiltin-declaration-mismatch-4.c @@ -77,9 +77,9 @@ void test_integer_conversion_memset (void *d) /* Passing a ptrdiff_t where size_t is expected may not be unsafe but because GCC may emits suboptimal code for such calls warning for them helps improve efficiency. */ - memset (d, 0, diffi); /* { dg-warning ".memset. argument 3 promotes to .ptrdiff_t. {aka .\(long \)?\(int\)?\(__int20\)?.} where .\(long \)?\(__int20 \)?unsigned\( int\)?. is expected" } */ + memset (d, 0, diffi); /* { dg-warning ".memset. argument 3 promotes to .ptrdiff_t. {aka .\(long \)*\(int\)?\(__int20\)?.} where .\(long \)*\(__int20 \)?unsigned\( int\)?. is expected" } */ - memset (d, 0, 2.0); /* { dg-warning ".memset. argument 3 type is .double. where '\(long \)?\(__int20 \)?unsigned\( int\)?' is expected" } */ + memset (d, 0, 2.0); /* { dg-warning ".memset. argument 3 type is .double. where '\(long \)*\(__int20 \)?unsigned\( int\)?' is expected" } */ /* Verify that the same call as above but to the built-in doesn't trigger a warning. */ diff --git a/gcc/testsuite/gcc.dg/completion-2.c b/gcc/testsuite/gcc.dg/completion-2.c index 99e6531..46c511c 100644 --- a/gcc/testsuite/gcc.dg/completion-2.c +++ b/gcc/testsuite/gcc.dg/completion-2.c @@ -5,6 +5,7 @@ -flto-partition=1to1 -flto-partition=balanced -flto-partition=cache +-flto-partition=default -flto-partition=max -flto-partition=none -flto-partition=one diff --git a/gcc/testsuite/gcc.dg/ipa/pr119318.c b/gcc/testsuite/gcc.dg/ipa/pr119318.c index 8e62ec5..f179aed 100644 --- a/gcc/testsuite/gcc.dg/ipa/pr119318.c +++ b/gcc/testsuite/gcc.dg/ipa/pr119318.c @@ -1,7 +1,6 @@ /* { dg-do run } */ /* { dg-require-effective-target int128 } */ -/* { dg-additional-options "-Wno-psabi -w" } */ -/* { dg-options "-Wno-psabi -O2" } */ +/* { dg-options "-Wno-psabi -w -O2" } */ typedef unsigned V __attribute__((vector_size (64))); typedef unsigned __int128 W __attribute__((vector_size (64))); diff --git a/gcc/testsuite/gcc.dg/ipa/pr119803.c b/gcc/testsuite/gcc.dg/ipa/pr119803.c new file mode 100644 index 0000000..1a7bfd2 --- /dev/null +++ b/gcc/testsuite/gcc.dg/ipa/pr119803.c @@ -0,0 +1,16 @@ +/* { dg-do compile } */ +/* { dg-options "-O2" } */ + +extern void f(int p); +int a, b; +char c; +static int d(int e) { return !e || a == 1 ? 0 : a / e; } +static void h(short e) { + int g = d(e); + f(g); +} +void i() { + c = 128; + h(c); + b = d(65536); +} diff --git a/gcc/testsuite/gcc.dg/pr119717.c b/gcc/testsuite/gcc.dg/pr119717.c new file mode 100644 index 0000000..e5eedc5 --- /dev/null +++ b/gcc/testsuite/gcc.dg/pr119717.c @@ -0,0 +1,24 @@ +/* PR c/119717 */ +/* { dg-additional-options "-std=c23" } */ +/* { dg-do compile } */ + +struct annotated { + unsigned count; + [[gnu::counted_by(count)]] char array[]; +}; + +[[gnu::noinline,gnu::noipa]] +static unsigned +size_of (bool x, struct annotated *a) +{ + char *p = (x ? a : 0)->array; + return __builtin_dynamic_object_size (p, 1); +} + +int main() +{ + struct annotated *p = __builtin_malloc(sizeof *p); + p->count = 0; + __builtin_printf ("the bdos whole is %ld\n", size_of (0, p)); + return 0; +} diff --git a/gcc/testsuite/gcc.target/aarch64/acle/rwsr-ungated.c b/gcc/testsuite/gcc.target/aarch64/acle/rwsr-ungated.c new file mode 100644 index 0000000..d67a426 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/acle/rwsr-ungated.c @@ -0,0 +1,13 @@ +/* Test that __arm_[r,w]sr intrinsics aren't gated (by default). */ + +/* { dg-do compile } */ +/* { dg-options "-march=armv8-a" } */ + +#include <arm_acle.h> + +uint64_t +foo (uint64_t a) +{ + __arm_wsr64 ("zcr_el1", a); + return __arm_rsr64 ("smcr_el1"); +} diff --git a/gcc/testsuite/gcc.target/i386/apx-interrupt-1.c b/gcc/testsuite/gcc.target/i386/apx-interrupt-1.c index fefe2e6..fa1acc7 100644 --- a/gcc/testsuite/gcc.target/i386/apx-interrupt-1.c +++ b/gcc/testsuite/gcc.target/i386/apx-interrupt-1.c @@ -66,7 +66,7 @@ void foo (void *frame) /* { dg-final { scan-assembler-times {\t\.cfi_offset 132, -120} 1 } } */ /* { dg-final { scan-assembler-times {\t\.cfi_offset 131, -128} 1 } } */ /* { dg-final { scan-assembler-times {\t\.cfi_offset 130, -136} 1 } } */ -/* { dg-final { scan-assembler-times ".cfi_restore" 15} } */ +/* { dg-final { scan-assembler-times ".cfi_restore" 31 } } */ /* { dg-final { scan-assembler-times "pop(?:l|q)\[\\t \]*%(?:e|r)ax" 1 } } */ /* { dg-final { scan-assembler-times "pop(?:l|q)\[\\t \]*%(?:e|r)bx" 1 } } */ /* { dg-final { scan-assembler-times "pop(?:l|q)\[\\t \]*%(?:e|r)cx" 1 } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/avl_single-68.c b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/avl_single-68.c index bf95e1c..64666d3 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/avl_single-68.c +++ b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/avl_single-68.c @@ -21,6 +21,12 @@ void f2 (void * restrict in, void * restrict out, int l, int n, int m) } } +/* The second check is XFAILed because we currently don't lift + vsetvls into non-transparent (in LCM parlance) blocks. + See PR119547. + In this test it is still possible because the conflicting + register only ever feeds vsetvls. */ + /* { dg-final { scan-assembler-times {vsetvli} 2 { target { no-opts "-O0" no-opts "-Os" no-opts "-Oz" no-opts "-g" no-opts "-funroll-loops" } } } } */ -/* { dg-final { scan-assembler-times {vsetvli\s+zero,\s*[a-x0-9]+,\s*e8,\s*mf8,\s*tu,\s*m[au]} 2 { target { no-opts "-O0" no-opts "-Os" no-opts "-Oz" no-opts "-g" no-opts "-funroll-loops" } } } } */ +/* { dg-final { scan-assembler-times {vsetvli\s+zero,\s*[a-x0-9]+,\s*e8,\s*mf8,\s*tu,\s*m[au]} 2 { target { no-opts "-O0" no-opts "-Os" no-opts "-Oz" no-opts "-g" no-opts "-funroll-loops" } xfail { *-*-* } } } } */ /* { dg-final { scan-assembler-times {addi\s+[a-x0-9]+,\s*[a-x0-9]+,\s*44} 1 { target { no-opts "-O0" no-opts "-Os" no-opts "-Oz" no-opts "-g" no-opts "-funroll-loops" } } } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/vlmax_switch_vtype-10.c b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/vlmax_switch_vtype-10.c index ddf53ca..0dbf34a 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/vlmax_switch_vtype-10.c +++ b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/vlmax_switch_vtype-10.c @@ -43,6 +43,6 @@ void foo (int8_t * restrict in, int8_t * restrict out, int n, int cond) } } -/* { dg-final { scan-assembler-times {vsetvli} 15 { target { no-opts "-O0" no-opts "-O1" no-opts "-Os" no-opts "-Oz" no-opts "-funroll-loops" no-opts "-g" no-opts "-flto" } } } } */ +/* { dg-final { scan-assembler-times {vsetvli} 14 { target { no-opts "-O0" no-opts "-O1" no-opts "-Os" no-opts "-Oz" no-opts "-funroll-loops" no-opts "-g" no-opts "-flto" } } } } */ /* { dg-final { scan-assembler-times {vsetvli\s+[a-x0-9]+,\s*zero,\s*e32,\s*mf2,\s*t[au],\s*m[au]} 3 { target { no-opts "-O0" no-opts "-O1" no-opts "-Os" no-opts "-Oz" no-opts "-funroll-loops" no-opts "-g" } } } } */ -/* { dg-final { scan-assembler-times {vsetvli\s+[a-x0-9]+,\s*zero,\s*e16,\s*mf2,\s*t[au],\s*m[au]} 4 { target { no-opts "-O0" no-opts "-O1" no-opts "-Os" no-opts "-Oz" no-opts "-funroll-loops" no-opts "-g" } } } } */ +/* { dg-final { scan-assembler-times {vsetvli\s+[a-x0-9]+,\s*zero,\s*e16,\s*mf2,\s*t[au],\s*m[au]} 3 { target { no-opts "-O0" no-opts "-O1" no-opts "-Os" no-opts "-Oz" no-opts "-funroll-loops" no-opts "-g" } } } } */ diff --git a/gcc/testsuite/gdc.dg/debug/imports/m119817/a.d b/gcc/testsuite/gdc.dg/debug/imports/m119817/a.d new file mode 100644 index 0000000..a137472 --- /dev/null +++ b/gcc/testsuite/gdc.dg/debug/imports/m119817/a.d @@ -0,0 +1,2 @@ +module imports.m119817.a; +void f119817()() { } diff --git a/gcc/testsuite/gdc.dg/debug/imports/m119817/b.d b/gcc/testsuite/gdc.dg/debug/imports/m119817/b.d new file mode 100644 index 0000000..aef0e37 --- /dev/null +++ b/gcc/testsuite/gdc.dg/debug/imports/m119817/b.d @@ -0,0 +1,2 @@ +module imports.m119817.b; +void f119817() { } diff --git a/gcc/testsuite/gdc.dg/debug/imports/m119817/package.d b/gcc/testsuite/gdc.dg/debug/imports/m119817/package.d new file mode 100644 index 0000000..188827e --- /dev/null +++ b/gcc/testsuite/gdc.dg/debug/imports/m119817/package.d @@ -0,0 +1,4 @@ +module imports.m119817; +public import + imports.m119817.a, + imports.m119817.b; diff --git a/gcc/testsuite/gdc.dg/debug/imports/pr119826b.d b/gcc/testsuite/gdc.dg/debug/imports/pr119826b.d new file mode 100644 index 0000000..3c5a6ac --- /dev/null +++ b/gcc/testsuite/gdc.dg/debug/imports/pr119826b.d @@ -0,0 +1,14 @@ +module imports.pr119826b; + +import pr119826 : t119826; + +class C119826 +{ + enum E119826 { Evalue } + const E119826 em = void; +} + +void f119826(C119826 c) +{ + t119826(c.em); +} diff --git a/gcc/testsuite/gdc.dg/debug/pr119817.d b/gcc/testsuite/gdc.dg/debug/pr119817.d new file mode 100644 index 0000000..3eea6ba --- /dev/null +++ b/gcc/testsuite/gdc.dg/debug/pr119817.d @@ -0,0 +1,6 @@ +// { dg-do compile } +// { dg-additional-sources "imports/m119817/package.d" } +// { dg-additional-sources "imports/m119817/a.d" } +// { dg-additional-sources "imports/m119817/b.d" } +module pr119817; +import imports.m119817 : f119817; diff --git a/gcc/testsuite/gdc.dg/debug/pr119826.d b/gcc/testsuite/gdc.dg/debug/pr119826.d new file mode 100644 index 0000000..2fb98c7 --- /dev/null +++ b/gcc/testsuite/gdc.dg/debug/pr119826.d @@ -0,0 +1,8 @@ +// { dg-do compile } +// { dg-additional-sources "imports/pr119826b.d" } +module pr119826; + +int t119826(A)(A args) +{ + assert(false); +} diff --git a/gcc/testsuite/gdc.dg/import-c/pr119799.d b/gcc/testsuite/gdc.dg/import-c/pr119799.d new file mode 100644 index 0000000..d8b0fa2 --- /dev/null +++ b/gcc/testsuite/gdc.dg/import-c/pr119799.d @@ -0,0 +1,2 @@ +// { dg-do compile } +import pr119799c; diff --git a/gcc/testsuite/gdc.dg/import-c/pr119799c.c b/gcc/testsuite/gdc.dg/import-c/pr119799c.c new file mode 100644 index 0000000..b80e856 --- /dev/null +++ b/gcc/testsuite/gdc.dg/import-c/pr119799c.c @@ -0,0 +1 @@ +static struct {} s119799; diff --git a/gcc/testsuite/gfortran.dg/gomp/map-alloc-comp-1.f90 b/gcc/testsuite/gfortran.dg/gomp/map-alloc-comp-1.f90 index 0c44296..f48addc 100644 --- a/gcc/testsuite/gfortran.dg/gomp/map-alloc-comp-1.f90 +++ b/gcc/testsuite/gfortran.dg/gomp/map-alloc-comp-1.f90 @@ -10,5 +10,5 @@ type sct end type type(sct) var -!$omp target enter data map(to:var) ! { dg-error "allocatable components is not permitted in map clause" } +!$omp target enter data map(to:var) end diff --git a/gcc/testsuite/gfortran.dg/gomp/polymorphic-mapping-1.f90 b/gcc/testsuite/gfortran.dg/gomp/polymorphic-mapping-1.f90 new file mode 100644 index 0000000..750cec9 --- /dev/null +++ b/gcc/testsuite/gfortran.dg/gomp/polymorphic-mapping-1.f90 @@ -0,0 +1,30 @@ +type t + integer :: t +end type t +class(t), target, allocatable :: c, ca(:) +class(t), pointer :: p, pa(:) +integer :: x +allocate( t :: c, ca(5)) +p => c +pa => ca + +! 11111111112222222222333333333344 +!2345678901234567890123456789012345678901 +!$omp target enter data map(c, ca, p, pa) +! { dg-warning "29:Mapping of polymorphic list item 'c' is unspecified behavior \\\[-Wopenmp\\\]" "" { target *-*-* } .-1 } +! { dg-warning "32:Mapping of polymorphic list item 'ca' is unspecified behavior \\\[-Wopenmp\\\]" "" { target *-*-* } .-2 } +! { dg-warning "36:Mapping of polymorphic list item 'p' is unspecified behavior \\\[-Wopenmp\\\]" "" { target *-*-* } .-3 } +! { dg-warning "39:Mapping of polymorphic list item 'pa' is unspecified behavior \\\[-Wopenmp\\\]" "" { target *-*-* } .-4 } + +! 11111111112222222222333333333344 +!2345678901234567890123456789012345678901 + +! 11111111112222222222333333333344 +!2345678901234567890123456789012345678901 +!$omp target update from(c,ca), to(p,pa) +! { dg-warning "26:Mapping of polymorphic list item 'c' is unspecified behavior \\\[-Wopenmp\\\]" "" { target *-*-* } .-1 } +! { dg-warning "28:Mapping of polymorphic list item 'ca' is unspecified behavior \\\[-Wopenmp\\\]" "" { target *-*-* } .-2 } +! { dg-warning "36:Mapping of polymorphic list item 'p' is unspecified behavior \\\[-Wopenmp\\\]" "" { target *-*-* } .-3 } +! { dg-warning "38:Mapping of polymorphic list item 'pa' is unspecified behavior \\\[-Wopenmp\\\]" "" { target *-*-* } .-4 } + +end diff --git a/gcc/testsuite/gfortran.dg/gomp/polymorphic-mapping-2.f90 b/gcc/testsuite/gfortran.dg/gomp/polymorphic-mapping-2.f90 index e25db68..3bedc9b 100644 --- a/gcc/testsuite/gfortran.dg/gomp/polymorphic-mapping-2.f90 +++ b/gcc/testsuite/gfortran.dg/gomp/polymorphic-mapping-2.f90 @@ -9,7 +9,7 @@ allocate( t :: c, ca(5)) p => c pa => ca -!$omp target ! { dg-warning "Implicit mapping of polymorphic variable 'ca' is unspecified behavior \\\[-Wopenmp\\\]" } +!$omp target ! { dg-warning "Mapping of polymorphic list item 'ca' is unspecified behavior \\\[-Wopenmp\\\]" } ll = allocated(ca) !$omp end target diff --git a/gcc/testsuite/gfortran.dg/gomp/polymorphic-mapping-3.f90 b/gcc/testsuite/gfortran.dg/gomp/polymorphic-mapping-3.f90 new file mode 100644 index 0000000..9777ecf --- /dev/null +++ b/gcc/testsuite/gfortran.dg/gomp/polymorphic-mapping-3.f90 @@ -0,0 +1,23 @@ +subroutine sub(var, var2) +type t + integer :: x +end type t + +type t2 + integer :: x + integer, allocatable :: y +end type + +class(t) var, var2 +type(t2) :: var3, var4 +!$omp target firstprivate(var) & ! { dg-error "Polymorphic list item 'var' at .1. in FIRSTPRIVATE clause has unspecified behavior and unsupported" } +!$omp& private(var2) ! { dg-error "Polymorphic list item 'var2' at .1. in PRIVATE clause has unspecified behavior and unsupported" } + var%x = 5 + var2%x = 5 +!$omp end target +!$omp target firstprivate(var3) & ! { dg-error "Sorry, list item 'var3' at .1. with allocatable components is not yet supported in FIRSTPRIVATE clause" } +!$omp& private(var4) ! { dg-error "Sorry, list item 'var4' at .1. with allocatable components is not yet supported in PRIVATE clause" } + var3%x = 5 + var4%x = 5 +!$omp end target +end diff --git a/gcc/testsuite/gfortran.dg/gomp/polymorphic-mapping-4.f90 b/gcc/testsuite/gfortran.dg/gomp/polymorphic-mapping-4.f90 new file mode 100644 index 0000000..5a1a70a --- /dev/null +++ b/gcc/testsuite/gfortran.dg/gomp/polymorphic-mapping-4.f90 @@ -0,0 +1,9 @@ +subroutine one +implicit none +type t + class(*), allocatable :: ul +end type + +type(t) :: var +!$omp target enter data map(to:var) ! { dg-error "Mapping of unlimited polymorphic list item 'var.ul' is unspecified behavior and unsupported" } +end diff --git a/gcc/testsuite/gfortran.dg/gomp/polymorphic-mapping-5.f90 b/gcc/testsuite/gfortran.dg/gomp/polymorphic-mapping-5.f90 new file mode 100644 index 0000000..4b5814e --- /dev/null +++ b/gcc/testsuite/gfortran.dg/gomp/polymorphic-mapping-5.f90 @@ -0,0 +1,9 @@ +subroutine one +implicit none +type t + class(*), allocatable :: ul +end type + +class(*), allocatable :: ul_var +!$omp target enter data map(to: ul_var) ! { dg-error "Mapping of unlimited polymorphic list item 'ul_var' is unspecified behavior and unsupported" } +end diff --git a/gcc/testsuite/gfortran.dg/gomp/polymorphic-mapping.f90 b/gcc/testsuite/gfortran.dg/gomp/polymorphic-mapping.f90 index dd7eb31..752cca2 100644 --- a/gcc/testsuite/gfortran.dg/gomp/polymorphic-mapping.f90 +++ b/gcc/testsuite/gfortran.dg/gomp/polymorphic-mapping.f90 @@ -10,37 +10,21 @@ pa => ca ! 11111111112222222222333333333344 !2345678901234567890123456789012345678901 -!$omp target enter data map(c, ca, p, pa) -! { dg-warning "29:Mapping polymorphic list item at .1. is unspecified behavior \\\[-Wopenmp\\\]" "" { target *-*-* } .-1 } -! { dg-warning "32:Mapping polymorphic list item at .1. is unspecified behavior \\\[-Wopenmp\\\]" "" { target *-*-* } .-2 } -! { dg-warning "36:Mapping polymorphic list item at .1. is unspecified behavior \\\[-Wopenmp\\\]" "" { target *-*-* } .-3 } -! { dg-warning "39:Mapping polymorphic list item at .1. is unspecified behavior \\\[-Wopenmp\\\]" "" { target *-*-* } .-4 } - -! 11111111112222222222333333333344 -!2345678901234567890123456789012345678901 -!$omp target firstprivate(ca) ! { dg-warning "27:FIRSTPRIVATE with polymorphic list item at .1. is unspecified behavior \\\[-Wopenmp\\\]" } +!$omp target firstprivate(ca) ! { dg-error "27:Polymorphic list item 'ca' at .1. in FIRSTPRIVATE clause has unspecified behavior and unsupported" } !$omp end target -!$omp target parallel do firstprivate(ca) ! { dg-warning "39:FIRSTPRIVATE with polymorphic list item at .1. is unspecified behavior \\\[-Wopenmp\\\]" } +!$omp target parallel do firstprivate(ca) ! { dg-error "39:Polymorphic list item 'ca' at .1. in FIRSTPRIVATE clause has unspecified behavior and unsupported" } do x = 0, 5 end do -!$omp target parallel do private(ca) ! OK; should map declared type +!$omp target parallel do private(ca) ! { dg-error "34:Polymorphic list item 'ca' at .1. in PRIVATE clause has unspecified behavior and unsupported" } do x = 0, 5 end do -!$omp target private(ca) ! OK; should map declared type +!$omp target private(ca) ! { dg-error "22:Polymorphic list item 'ca' at .1. in PRIVATE clause has unspecified behavior and unsupported" } block end block -! 11111111112222222222333333333344 -!2345678901234567890123456789012345678901 -!$omp target update from(c,ca), to(p,pa) -! { dg-warning "26:Mapping polymorphic list item at .1. is unspecified behavior \\\[-Wopenmp\\\]" "" { target *-*-* } .-1 } -! { dg-warning "28:Mapping polymorphic list item at .1. is unspecified behavior \\\[-Wopenmp\\\]" "" { target *-*-* } .-2 } -! { dg-warning "36:Mapping polymorphic list item at .1. is unspecified behavior \\\[-Wopenmp\\\]" "" { target *-*-* } .-3 } -! { dg-warning "38:Mapping polymorphic list item at .1. is unspecified behavior \\\[-Wopenmp\\\]" "" { target *-*-* } .-4 } - ! ------------------------- !$omp target parallel map(release: x) ! { dg-error "36:TARGET with map-type other than TO, FROM, TOFROM, or ALLOC on MAP clause" } diff --git a/gcc/testsuite/go.dg/pr119533-riscv-2.go b/gcc/testsuite/go.dg/pr119533-riscv-2.go new file mode 100644 index 0000000..ce3ffaf --- /dev/null +++ b/gcc/testsuite/go.dg/pr119533-riscv-2.go @@ -0,0 +1,42 @@ +// { dg-do compile { target riscv64*-*-* } } +// { dg-options "-O2 -march=rv64gcv -mabi=lp64d" } + +package ast + +type as struct { + bt []struct{} + an string +} + +func bj(a *as) string { + if b := a.bt; len(a.an) == 1 { + _ = b[0] + } + return a.an +} + +func MergePackageFiles(f map[string][]interface{}, g uint) []interface{} { + bl := make([]string, len(f)) + var bo []interface{} + bu := make(map[string]int) + for _, bm := range bl { + a := f[bm] + for _, d := range a { + if g != 0 { + if a, p := d.(*as); p { + n := bj(a) + if j, bp := bu[n]; bp { + _ = j + } + } + } + } + } + for _, bm := range bl { + _ = bm + } + for _, bm := range bl { + _ = f[bm] + } + return bo +} diff --git a/gcc/testsuite/go.dg/pr119533-riscv.go b/gcc/testsuite/go.dg/pr119533-riscv.go new file mode 100644 index 0000000..30f52d2 --- /dev/null +++ b/gcc/testsuite/go.dg/pr119533-riscv.go @@ -0,0 +1,120 @@ +// { dg-do compile { target riscv64*-*-* } } +// { dg-options "-O2 -march=rv64gcv -mabi=lp64d" } + +// Reduced from libgo build (multi-file reduction, merged mnaully +// and hand reduced again). + +package ast +import ( + "go/token" + "go/scanner" + "reflect" +) +type v struct {} +type w func( string, reflect.Value) bool +func x( string, reflect.Value) bool +type r struct { + scanner.ErrorList +} +type ab interface {} +type ae interface {} +type af interface {} +type ag struct {} +func (ag) Pos() token.Pos +func (ag) ah() token.Pos +type c struct { + aj ae } +type ak struct { + al []c } +type ( + am struct { + an string } + bs struct { + Value string + } +) +func ao(string) *am +type ( + ap interface {} + aq struct { + ar bs } +as struct { + bt ak + an am } +) +type File struct { + *ag + token.Pos + *am + at []af + *v + au []*aq + av *am + aw []*ag } +type ax struct { + an string + *v + ay map[string]File } +func a(az *token.FileSet, b token.Pos) int +type k struct { + l token.Pos + ah token.Pos +} +type m struct { + bb bool + bc *ag +} + +type bi uint +func bj(a *as) string { + if b := a.bt; len(b.al) == 1 { + c := b.al[0].aj + if e := c; e != nil {} + } + return a.an.an +} +func MergePackageFiles(f ax, g bi) *File { + h := 0 + bk := 0 + k := 0 + bl := make([]string, len(f.ay)) + i := 0 + for bm, a := range f.ay { + bl[i] = bm + k += len(a.at) + } + var bn *ag + var l token.Pos + if h > 0 {} + var bo []af + bu := make(map[string]int) + m := 0 + for _, bm := range bl { + a := f.ay[bm] + for _, d := range a.at { + if g!= 0 { + if a, p := d.(*as); p { + n := bj(a) + if j, bp := bu[n]; bp { + if bo != nil && bo[j]== nil {} + } + } + } + } + } + if m > 0 {} + var bq []*aq + q := make(map[string]bool) + for _, bm := range bl { + a := f.ay[bm] + for _, br := range a.au { + if o := br.ar.Value; q[o] {} + } + } + var bh = make([]*ag, bk) + for _, bm := range bl { + a := f.ay[bm] + copy(bh, a.aw) + } + return &File{bn, l, ao(f.an), bo, f.v, bq, nil, bh} +} diff --git a/gcc/timevar.def b/gcc/timevar.def index c1029d9..02ace46 100644 --- a/gcc/timevar.def +++ b/gcc/timevar.def @@ -105,6 +105,7 @@ DEFTIMEVAR (TV_IPA_PURE_CONST , "ipa pure const") DEFTIMEVAR (TV_IPA_ICF , "ipa icf") DEFTIMEVAR (TV_IPA_PTA , "ipa points-to") DEFTIMEVAR (TV_IPA_SRA , "ipa SRA") +DEFTIMEVAR (TV_IPA_LC , "ipa locality clone") DEFTIMEVAR (TV_IPA_FREE_LANG_DATA , "ipa free lang data") DEFTIMEVAR (TV_IPA_FREE_INLINE_SUMMARY, "ipa free inline summary") DEFTIMEVAR (TV_IPA_MODREF , "ipa modref") diff --git a/gcc/tree-pass.h b/gcc/tree-pass.h index 217c31f..7cb5a12 100644 --- a/gcc/tree-pass.h +++ b/gcc/tree-pass.h @@ -551,6 +551,7 @@ extern ipa_opt_pass_d *make_pass_ipa_cdtor_merge (gcc::context *ctxt); extern ipa_opt_pass_d *make_pass_ipa_single_use (gcc::context *ctxt); extern ipa_opt_pass_d *make_pass_ipa_comdats (gcc::context *ctxt); extern ipa_opt_pass_d *make_pass_ipa_modref (gcc::context *ctxt); +extern ipa_opt_pass_d *make_pass_ipa_locality_cloning (gcc::context *ctxt); extern gimple_opt_pass *make_pass_cleanup_cfg_post_optimizing (gcc::context *ctxt); diff --git a/gcc/tree-tailcall.cc b/gcc/tree-tailcall.cc index d5c4c7b..f593363 100644 --- a/gcc/tree-tailcall.cc +++ b/gcc/tree-tailcall.cc @@ -51,6 +51,8 @@ along with GCC; see the file COPYING3. If not see #include "symbol-summary.h" #include "ipa-cp.h" #include "ipa-prop.h" +#include "attribs.h" +#include "asan.h" /* The file implements the tail recursion elimination. It is also used to analyze the tail calls in general, passing the results to the rtl level @@ -122,6 +124,9 @@ struct tailcall /* True if it is a call to the current function. */ bool tail_recursion; + /* True if there is __tsan_func_exit call after the call. */ + bool has_tsan_func_exit; + /* The return value of the caller is mult * f + add, where f is the return value of the call. */ tree mult, add; @@ -504,7 +509,7 @@ maybe_error_musttail (gcall *call, const char *err, bool diag_musttail) Search at most CNT basic blocks (so that we don't need to do trivial loop discovery). */ static bool -empty_eh_cleanup (basic_block bb, int cnt) +empty_eh_cleanup (basic_block bb, int *eh_has_tsan_func_exit, int cnt) { if (EDGE_COUNT (bb->succs) > 1) return false; @@ -515,6 +520,14 @@ empty_eh_cleanup (basic_block bb, int cnt) gimple *g = gsi_stmt (gsi); if (is_gimple_debug (g) || gimple_clobber_p (g)) continue; + if (eh_has_tsan_func_exit + && !*eh_has_tsan_func_exit + && sanitize_flags_p (SANITIZE_THREAD) + && gimple_call_builtin_p (g, BUILT_IN_TSAN_FUNC_EXIT)) + { + *eh_has_tsan_func_exit = 1; + continue; + } if (is_gimple_resx (g) && stmt_can_throw_external (cfun, g)) return true; return false; @@ -523,7 +536,7 @@ empty_eh_cleanup (basic_block bb, int cnt) return false; if (cnt == 1) return false; - return empty_eh_cleanup (single_succ (bb), cnt - 1); + return empty_eh_cleanup (single_succ (bb), eh_has_tsan_func_exit, cnt - 1); } /* Argument for compute_live_vars/live_vars_at_stmt and what compute_live_vars @@ -531,14 +544,22 @@ empty_eh_cleanup (basic_block bb, int cnt) static live_vars_map *live_vars; static vec<bitmap_head> live_vars_vec; -/* Finds tailcalls falling into basic block BB. The list of found tailcalls is +/* Finds tailcalls falling into basic block BB. The list of found tailcalls is added to the start of RET. When ONLY_MUSTTAIL is set only handle musttail. Update OPT_TAILCALLS as output parameter. If DIAG_MUSTTAIL, diagnose - failures for musttail calls. */ + failures for musttail calls. RETRY_TSAN_FUNC_EXIT is initially 0 and + in that case the last call is attempted to be tail called, including + __tsan_func_exit with -fsanitize=thread. It is set to -1 if we + detect __tsan_func_exit call and in that case tree_optimize_tail_calls_1 + will retry with it set to 1 (regardless of whether turning the + __tsan_func_exit was successfully detected as tail call or not) and that + will allow turning musttail calls before that call into tail calls as well + by adding __tsan_func_exit call before the call. */ static void find_tail_calls (basic_block bb, struct tailcall **ret, bool only_musttail, - bool &opt_tailcalls, bool diag_musttail) + bool &opt_tailcalls, bool diag_musttail, + int &retry_tsan_func_exit) { tree ass_var = NULL_TREE, ret_var, func, param; gimple *stmt; @@ -552,6 +573,8 @@ find_tail_calls (basic_block bb, struct tailcall **ret, bool only_musttail, size_t idx; tree var; bool only_tailr = false; + bool has_tsan_func_exit = false; + int eh_has_tsan_func_exit = -1; if (!single_succ_p (bb) && (EDGE_COUNT (bb->succs) || !cfun->has_musttail || !diag_musttail)) @@ -585,6 +608,17 @@ find_tail_calls (basic_block bb, struct tailcall **ret, bool only_musttail, || is_gimple_debug (stmt)) continue; + if (cfun->has_musttail + && sanitize_flags_p (SANITIZE_THREAD) + && gimple_call_builtin_p (stmt, BUILT_IN_TSAN_FUNC_EXIT) + && diag_musttail) + { + if (retry_tsan_func_exit == 0) + retry_tsan_func_exit = -1; + else if (retry_tsan_func_exit == 1) + continue; + } + if (!last_stmt) last_stmt = stmt; /* Check for a call. */ @@ -635,7 +669,7 @@ find_tail_calls (basic_block bb, struct tailcall **ret, bool only_musttail, /* Recurse to the predecessors. */ FOR_EACH_EDGE (e, ei, bb->preds) find_tail_calls (e->src, ret, only_musttail, opt_tailcalls, - diag_musttail); + diag_musttail, retry_tsan_func_exit); return; } @@ -715,8 +749,12 @@ find_tail_calls (basic_block bb, struct tailcall **ret, bool only_musttail, return; } + if (diag_musttail && gimple_call_must_tail_p (call)) + eh_has_tsan_func_exit = 0; if (!gimple_call_must_tail_p (call) - || !empty_eh_cleanup (e->dest, 20) + || !empty_eh_cleanup (e->dest, + eh_has_tsan_func_exit + ? NULL : &eh_has_tsan_func_exit, 20) || EDGE_COUNT (bb->succs) > 2) { maybe_error_musttail (call, _("call may throw exception caught " @@ -947,6 +985,17 @@ find_tail_calls (basic_block bb, struct tailcall **ret, bool only_musttail, || is_gimple_debug (stmt)) continue; + if (cfun->has_musttail + && sanitize_flags_p (SANITIZE_THREAD) + && retry_tsan_func_exit == 1 + && gimple_call_builtin_p (stmt, BUILT_IN_TSAN_FUNC_EXIT) + && !has_tsan_func_exit + && gimple_call_must_tail_p (call)) + { + has_tsan_func_exit = true; + continue; + } + if (gimple_code (stmt) != GIMPLE_ASSIGN) { maybe_error_musttail (call, _("unhandled code after call"), @@ -1110,6 +1159,19 @@ find_tail_calls (basic_block bb, struct tailcall **ret, bool only_musttail, return; } + if (eh_has_tsan_func_exit != -1 + && eh_has_tsan_func_exit != has_tsan_func_exit) + { + if (eh_has_tsan_func_exit) + maybe_error_musttail (call, _("call may throw exception caught " + "locally or perform cleanups"), + diag_musttail); + else + maybe_error_musttail (call, _("exception cleanups omit " + "__tsan_func_exit call"), diag_musttail); + return; + } + /* Move queued defs. */ if (tail_recursion) { @@ -1138,6 +1200,7 @@ find_tail_calls (basic_block bb, struct tailcall **ret, bool only_musttail, nw->call_gsi = gsi; nw->tail_recursion = tail_recursion; + nw->has_tsan_func_exit = has_tsan_func_exit; nw->mult = m; nw->add = a; @@ -1472,6 +1535,14 @@ static bool optimize_tail_call (struct tailcall *t, bool opt_tailcalls, class loop *&new_loop) { + if (t->has_tsan_func_exit && (t->tail_recursion || opt_tailcalls)) + { + tree builtin_decl = builtin_decl_implicit (BUILT_IN_TSAN_FUNC_EXIT); + gimple *g = gimple_build_call (builtin_decl, 0); + gimple_set_location (g, cfun->function_end_locus); + gsi_insert_before (&t->call_gsi, g, GSI_SAME_STMT); + } + if (t->tail_recursion) { eliminate_tail_call (t, new_loop); @@ -1490,6 +1561,7 @@ optimize_tail_call (struct tailcall *t, bool opt_tailcalls, print_gimple_stmt (dump_file, stmt, 0, dump_flags); fprintf (dump_file, " in bb %i\n", (gsi_bb (t->call_gsi))->index); } + return t->has_tsan_func_exit; } return false; @@ -1539,12 +1611,23 @@ tree_optimize_tail_calls_1 (bool opt_tailcalls, bool only_musttail, /* Only traverse the normal exits, i.e. those that end with return statement. */ if (safe_is_a <greturn *> (*gsi_last_bb (e->src))) - find_tail_calls (e->src, &tailcalls, only_musttail, opt_tailcalls, - diag_musttail); + { + int retry_tsan_func_exit = 0; + find_tail_calls (e->src, &tailcalls, only_musttail, opt_tailcalls, + diag_musttail, retry_tsan_func_exit); + if (retry_tsan_func_exit == -1) + { + retry_tsan_func_exit = 1; + find_tail_calls (e->src, &tailcalls, only_musttail, + opt_tailcalls, diag_musttail, + retry_tsan_func_exit); + } + } } if (cfun->has_musttail && diag_musttail) { basic_block bb; + int retry_tsan_func_exit = 0; FOR_EACH_BB_FN (bb, cfun) if (EDGE_COUNT (bb->succs) == 0 || (single_succ_p (bb) @@ -1554,7 +1637,7 @@ tree_optimize_tail_calls_1 (bool opt_tailcalls, bool only_musttail, && gimple_call_must_tail_p (as_a <gcall *> (c)) && gimple_call_noreturn_p (as_a <gcall *> (c))) find_tail_calls (bb, &tailcalls, only_musttail, opt_tailcalls, - diag_musttail); + diag_musttail, retry_tsan_func_exit); } if (live_vars) diff --git a/libgcobol/ChangeLog b/libgcobol/ChangeLog index 6795bb2..6a0e961 100644 --- a/libgcobol/ChangeLog +++ b/libgcobol/ChangeLog @@ -1,3 +1,28 @@ +2025-04-15 Andreas Schwab <schwab@suse.de> + + * configure.tgt: Set LIBGCOBOL_SUPPORTED for riscv64-*-linux* with + 64-bit multilib. + +2025-04-15 Jakub Jelinek <jakub@redhat.com> + Rainer Orth <ro@CeBiTec.Uni-Bielefeld.DE> + + PR cobol/119244 + * acinclude.m4 (LIBGCOBOL_CHECK_FLOAT128): Ensure + libgcob_cv_have_float128 is not yes on targets with IEEE quad + long double. Don't check for --as-needed nor set LIBQUADSPEC + on targets which USE_IEC_60559. + * libgcobol-fp.h (FP128_FMT, strtofp128, strfromfp128): Define. + * intrinsic.cc (strtof128): Don't redefine. + (WEIRD_TRANSCENDENT_RETURN_VALUE): Use GCOB_FP128_LITERAL macro. + (__gg__numval_f): Use strtofp128 instead of strtof128. + * libgcobol.cc (strtof128): Don't redefine. + (format_for_display_internal): Use strfromfp128 instead of + strfromf128 or quadmath_snprintf and use FP128_FMT in the format + string. + (get_float128, __gg__compare_2, __gg__move, __gg__move_literala): + Use strtofp128 instead of strtof128. + * configure: Regenerate. + 2025-04-14 Andreas Schwab <schwab@suse.de> * libgcobol.cc (__gg__float64_from_128): Mark literal as float128 diff --git a/libgcobol/acinclude.m4 b/libgcobol/acinclude.m4 index aab0895..0e81b10 100644 --- a/libgcobol/acinclude.m4 +++ b/libgcobol/acinclude.m4 @@ -44,6 +44,10 @@ AC_DEFUN([LIBGCOBOL_CHECK_FLOAT128], [ AC_CACHE_CHECK([whether we have a usable _Float128 type], libgcob_cv_have_float128, [ GCC_TRY_COMPILE_OR_LINK([ +#if __LDBL_MANT_DIG__ == 113 && __LDBL_MIN_EXP__ == -16381 +#error "long double is IEEE quad, no need for libquadmath" +#endif + _Float128 foo (_Float128 x) { _Complex _Float128 z1, z2; @@ -90,32 +94,22 @@ AC_DEFUN([LIBGCOBOL_CHECK_FLOAT128], [ fi AC_DEFINE(HAVE_FLOAT128, 1, [Define if target has usable _Float128 and __float128 types.]) - dnl Check whether -Wl,--as-needed resp. -Wl,-zignore is supported - dnl - dnl Turn warnings into error to avoid testsuite breakage. So enable - dnl AC_LANG_WERROR, but there's currently (autoconf 2.64) no way to turn - dnl it off again. As a workaround, save and restore werror flag like - dnl AC_PATH_XTRA. - dnl Cf. http://gcc.gnu.org/ml/gcc-patches/2010-05/msg01889.html - ac_xsave_[]_AC_LANG_ABBREV[]_werror_flag=$ac_[]_AC_LANG_ABBREV[]_werror_flag - AC_CACHE_CHECK([whether --as-needed/-z ignore works], - [libgcob_cv_have_as_needed], - [ - # Test for native Solaris options first. - # No whitespace after -z to pass it through -Wl. - libgcob_cv_as_needed_option="-zignore" - libgcob_cv_no_as_needed_option="-zrecord" - save_LDFLAGS="$LDFLAGS" - LDFLAGS="$LDFLAGS -Wl,$libgcob_cv_as_needed_option -lm -Wl,$libgcob_cv_no_as_needed_option" - libgcob_cv_have_as_needed=no - AC_LANG_WERROR - AC_LINK_IFELSE([AC_LANG_PROGRAM([])], - [libgcob_cv_have_as_needed=yes], - [libgcob_cv_have_as_needed=no]) - LDFLAGS="$save_LDFLAGS" - if test "x$libgcob_cv_have_as_needed" = xno; then - libgcob_cv_as_needed_option="--as-needed" - libgcob_cv_no_as_needed_option="--no-as-needed" + if test "x$USE_IEC_60559" != xyes; then + dnl Check whether -Wl,--as-needed resp. -Wl,-zignore is supported + dnl + dnl Turn warnings into error to avoid testsuite breakage. So enable + dnl AC_LANG_WERROR, but there's currently (autoconf 2.64) no way to turn + dnl it off again. As a workaround, save and restore werror flag like + dnl AC_PATH_XTRA. + dnl Cf. http://gcc.gnu.org/ml/gcc-patches/2010-05/msg01889.html + ac_xsave_[]_AC_LANG_ABBREV[]_werror_flag=$ac_[]_AC_LANG_ABBREV[]_werror_flag + AC_CACHE_CHECK([whether --as-needed/-z ignore works], + [libgcob_cv_have_as_needed], + [ + # Test for native Solaris options first. + # No whitespace after -z to pass it through -Wl. + libgcob_cv_as_needed_option="-zignore" + libgcob_cv_no_as_needed_option="-zrecord" save_LDFLAGS="$LDFLAGS" LDFLAGS="$LDFLAGS -Wl,$libgcob_cv_as_needed_option -lm -Wl,$libgcob_cv_no_as_needed_option" libgcob_cv_have_as_needed=no @@ -124,45 +118,51 @@ AC_DEFUN([LIBGCOBOL_CHECK_FLOAT128], [ [libgcob_cv_have_as_needed=yes], [libgcob_cv_have_as_needed=no]) LDFLAGS="$save_LDFLAGS" - fi - ac_[]_AC_LANG_ABBREV[]_werror_flag=$ac_xsave_[]_AC_LANG_ABBREV[]_werror_flag - ]) - - dnl Determine -Bstatic ... -Bdynamic etc. support from gfortran -### stderr. - touch conftest1.$ac_objext conftest2.$ac_objext - LQUADMATH=-lquadmath - $CXX -static-libgcobol -### -o conftest \ - conftest1.$ac_objext -lgcobol conftest2.$ac_objext 2>&1 >/dev/null \ - | grep "conftest1.$ac_objext.*conftest2.$ac_objext" > conftest.cmd - if grep "conftest1.$ac_objext.* -Bstatic -lgcobol -Bdynamic .*conftest2.$ac_objext" \ - conftest.cmd >/dev/null 2>&1; then - LQUADMATH="%{static-libquadmath:-Bstatic} -lquadmath %{static-libquadmath:-Bdynamic}" - elif grep "conftest1.$ac_objext.* -bstatic -lgcobol -bdynamic .*conftest2.$ac_objext" \ - conftest.cmd >/dev/null 2>&1; then - LQUADMATH="%{static-libquadmath:-bstatic} -lquadmath %{static-libquadmath:-bdynamic}" - elif grep "conftest1.$ac_objext.* -aarchive_shared -lgcobol -adefault .*conftest2.$ac_objext" \ - conftest.cmd >/dev/null 2>&1; then - LQUADMATH="%{static-libquadmath:-aarchive_shared} -lquadmath %{static-libquadmath:-adefault}" - elif grep "conftest1.$ac_objext.*libgcobol.a .*conftest2.$ac_objext" \ + if test "x$libgcob_cv_have_as_needed" = xno; then + libgcob_cv_as_needed_option="--as-needed" + libgcob_cv_no_as_needed_option="--no-as-needed" + save_LDFLAGS="$LDFLAGS" + LDFLAGS="$LDFLAGS -Wl,$libgcob_cv_as_needed_option -lm -Wl,$libgcob_cv_no_as_needed_option" + libgcob_cv_have_as_needed=no + AC_LANG_WERROR + AC_LINK_IFELSE([AC_LANG_PROGRAM([])], + [libgcob_cv_have_as_needed=yes], + [libgcob_cv_have_as_needed=no]) + LDFLAGS="$save_LDFLAGS" + fi + ac_[]_AC_LANG_ABBREV[]_werror_flag=$ac_xsave_[]_AC_LANG_ABBREV[]_werror_flag + ]) + + dnl Determine -Bstatic ... -Bdynamic etc. support from gfortran -### stderr. + touch conftest1.$ac_objext conftest2.$ac_objext + LQUADMATH=-lquadmath + $CXX -static-libgcobol -### -o conftest \ + conftest1.$ac_objext -lgcobol conftest2.$ac_objext 2>&1 >/dev/null \ + | grep "conftest1.$ac_objext.*conftest2.$ac_objext" > conftest.cmd + if grep "conftest1.$ac_objext.* -Bstatic -lgcobol -Bdynamic .*conftest2.$ac_objext" \ + conftest.cmd >/dev/null 2>&1; then + LQUADMATH="%{static-libquadmath:-Bstatic} -lquadmath %{static-libquadmath:-Bdynamic}" + elif grep "conftest1.$ac_objext.* -bstatic -lgcobol -bdynamic .*conftest2.$ac_objext" \ + conftest.cmd >/dev/null 2>&1; then + LQUADMATH="%{static-libquadmath:-bstatic} -lquadmath %{static-libquadmath:-bdynamic}" + elif grep "conftest1.$ac_objext.* -aarchive_shared -lgcobol -adefault .*conftest2.$ac_objext" \ + conftest.cmd >/dev/null 2>&1; then + LQUADMATH="%{static-libquadmath:-aarchive_shared} -lquadmath %{static-libquadmath:-adefault}" + elif grep "conftest1.$ac_objext.*libgcobol.a .*conftest2.$ac_objext" \ conftest.cmd >/dev/null 2>&1; then - LQUADMATH="%{static-libquadmath:libquadmath.a%s;:-lquadmath}" - fi - rm -f conftest1.$ac_objext conftest2.$ac_objext conftest conftest.cmd - - dnl For static libgcobol linkage, depend on libquadmath only if needed. - dnl If using *f128 APIs from libc/libm, depend on libquadmath only if needed - dnl even for dynamic libgcobol linkage, and don't link libgcobol against - dnl -lquadmath. - if test "x$libgcob_cv_have_as_needed" = xyes; then - if test "x$USE_IEC_60559" = xyes; then - LIBQUADSPEC="$libgcob_cv_as_needed_option $LQUADMATH $libgcob_cv_no_as_needed_option" + LQUADMATH="%{static-libquadmath:libquadmath.a%s;:-lquadmath}" + fi + rm -f conftest1.$ac_objext conftest2.$ac_objext conftest conftest.cmd + + if test "x$libgcob_cv_have_as_needed" = xyes; then + if test "x$USE_IEC_60559" = xyes; then + LIBQUADSPEC="$libgcob_cv_as_needed_option $LQUADMATH $libgcob_cv_no_as_needed_option" + else + LIBQUADSPEC="%{static-libgcobol:$libgcob_cv_as_needed_option} $LQUADMATH %{static-libgcobol:$libgcob_cv_no_as_needed_option}" + fi else - LIBQUADSPEC="%{static-libgcobol:$libgcob_cv_as_needed_option} $LQUADMATH %{static-libgcobol:$libgcob_cv_no_as_needed_option}" + LIBQUADSPEC="$LQUADMATH" fi - else - LIBQUADSPEC="$LQUADMATH" - fi - if test "x$USE_IEC_60559" != xyes; then if test -f ../libquadmath/libquadmath.la; then LIBQUADLIB=../libquadmath/libquadmath.la LIBQUADLIB_DEP=../libquadmath/libquadmath.la diff --git a/libgcobol/configure b/libgcobol/configure index 6c2747c..e83119d 100755 --- a/libgcobol/configure +++ b/libgcobol/configure @@ -17172,6 +17172,10 @@ else cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ +#if __LDBL_MANT_DIG__ == 113 && __LDBL_MIN_EXP__ == -16381 +#error "long double is IEEE quad, no need for libquadmath" +#endif + _Float128 foo (_Float128 x) { _Complex _Float128 z1, z2; @@ -17225,6 +17229,10 @@ fi cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ +#if __LDBL_MANT_DIG__ == 113 && __LDBL_MIN_EXP__ == -16381 +#error "long double is IEEE quad, no need for libquadmath" +#endif + _Float128 foo (_Float128 x) { _Complex _Float128 z1, z2; @@ -17296,23 +17304,24 @@ $as_echo "#define USE_QUADMATH 1" >>confdefs.h $as_echo "#define HAVE_FLOAT128 1" >>confdefs.h - ac_xsave_cxx_werror_flag=$ac_cxx_werror_flag - { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether --as-needed/-z ignore works" >&5 + if test "x$USE_IEC_60559" != xyes; then + ac_xsave_cxx_werror_flag=$ac_cxx_werror_flag + { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether --as-needed/-z ignore works" >&5 $as_echo_n "checking whether --as-needed/-z ignore works... " >&6; } if ${libgcob_cv_have_as_needed+:} false; then : $as_echo_n "(cached) " >&6 else - # Test for native Solaris options first. - # No whitespace after -z to pass it through -Wl. - libgcob_cv_as_needed_option="-zignore" - libgcob_cv_no_as_needed_option="-zrecord" - save_LDFLAGS="$LDFLAGS" - LDFLAGS="$LDFLAGS -Wl,$libgcob_cv_as_needed_option -lm -Wl,$libgcob_cv_no_as_needed_option" - libgcob_cv_have_as_needed=no + # Test for native Solaris options first. + # No whitespace after -z to pass it through -Wl. + libgcob_cv_as_needed_option="-zignore" + libgcob_cv_no_as_needed_option="-zrecord" + save_LDFLAGS="$LDFLAGS" + LDFLAGS="$LDFLAGS -Wl,$libgcob_cv_as_needed_option -lm -Wl,$libgcob_cv_no_as_needed_option" + libgcob_cv_have_as_needed=no ac_cxx_werror_flag=yes - if test x$gcc_no_link = xyes; then + if test x$gcc_no_link = xyes; then as_fn_error $? "Link tests are not allowed after GCC_NO_EXECUTABLES." "$LINENO" 5 fi cat confdefs.h - <<_ACEOF >conftest.$ac_ext @@ -17333,16 +17342,16 @@ else fi rm -f core conftest.err conftest.$ac_objext \ conftest$ac_exeext conftest.$ac_ext - LDFLAGS="$save_LDFLAGS" - if test "x$libgcob_cv_have_as_needed" = xno; then - libgcob_cv_as_needed_option="--as-needed" - libgcob_cv_no_as_needed_option="--no-as-needed" - save_LDFLAGS="$LDFLAGS" - LDFLAGS="$LDFLAGS -Wl,$libgcob_cv_as_needed_option -lm -Wl,$libgcob_cv_no_as_needed_option" - libgcob_cv_have_as_needed=no + LDFLAGS="$save_LDFLAGS" + if test "x$libgcob_cv_have_as_needed" = xno; then + libgcob_cv_as_needed_option="--as-needed" + libgcob_cv_no_as_needed_option="--no-as-needed" + save_LDFLAGS="$LDFLAGS" + LDFLAGS="$LDFLAGS -Wl,$libgcob_cv_as_needed_option -lm -Wl,$libgcob_cv_no_as_needed_option" + libgcob_cv_have_as_needed=no ac_cxx_werror_flag=yes - if test x$gcc_no_link = xyes; then + if test x$gcc_no_link = xyes; then as_fn_error $? "Link tests are not allowed after GCC_NO_EXECUTABLES." "$LINENO" 5 fi cat confdefs.h - <<_ACEOF >conftest.$ac_ext @@ -17363,44 +17372,43 @@ else fi rm -f core conftest.err conftest.$ac_objext \ conftest$ac_exeext conftest.$ac_ext - LDFLAGS="$save_LDFLAGS" - fi - ac_cxx_werror_flag=$ac_xsave_cxx_werror_flag + LDFLAGS="$save_LDFLAGS" + fi + ac_cxx_werror_flag=$ac_xsave_cxx_werror_flag fi { $as_echo "$as_me:${as_lineno-$LINENO}: result: $libgcob_cv_have_as_needed" >&5 $as_echo "$libgcob_cv_have_as_needed" >&6; } - touch conftest1.$ac_objext conftest2.$ac_objext - LQUADMATH=-lquadmath - $CXX -static-libgcobol -### -o conftest \ - conftest1.$ac_objext -lgcobol conftest2.$ac_objext 2>&1 >/dev/null \ - | grep "conftest1.$ac_objext.*conftest2.$ac_objext" > conftest.cmd - if grep "conftest1.$ac_objext.* -Bstatic -lgcobol -Bdynamic .*conftest2.$ac_objext" \ - conftest.cmd >/dev/null 2>&1; then - LQUADMATH="%{static-libquadmath:-Bstatic} -lquadmath %{static-libquadmath:-Bdynamic}" - elif grep "conftest1.$ac_objext.* -bstatic -lgcobol -bdynamic .*conftest2.$ac_objext" \ + touch conftest1.$ac_objext conftest2.$ac_objext + LQUADMATH=-lquadmath + $CXX -static-libgcobol -### -o conftest \ + conftest1.$ac_objext -lgcobol conftest2.$ac_objext 2>&1 >/dev/null \ + | grep "conftest1.$ac_objext.*conftest2.$ac_objext" > conftest.cmd + if grep "conftest1.$ac_objext.* -Bstatic -lgcobol -Bdynamic .*conftest2.$ac_objext" \ + conftest.cmd >/dev/null 2>&1; then + LQUADMATH="%{static-libquadmath:-Bstatic} -lquadmath %{static-libquadmath:-Bdynamic}" + elif grep "conftest1.$ac_objext.* -bstatic -lgcobol -bdynamic .*conftest2.$ac_objext" \ + conftest.cmd >/dev/null 2>&1; then + LQUADMATH="%{static-libquadmath:-bstatic} -lquadmath %{static-libquadmath:-bdynamic}" + elif grep "conftest1.$ac_objext.* -aarchive_shared -lgcobol -adefault .*conftest2.$ac_objext" \ + conftest.cmd >/dev/null 2>&1; then + LQUADMATH="%{static-libquadmath:-aarchive_shared} -lquadmath %{static-libquadmath:-adefault}" + elif grep "conftest1.$ac_objext.*libgcobol.a .*conftest2.$ac_objext" \ conftest.cmd >/dev/null 2>&1; then - LQUADMATH="%{static-libquadmath:-bstatic} -lquadmath %{static-libquadmath:-bdynamic}" - elif grep "conftest1.$ac_objext.* -aarchive_shared -lgcobol -adefault .*conftest2.$ac_objext" \ - conftest.cmd >/dev/null 2>&1; then - LQUADMATH="%{static-libquadmath:-aarchive_shared} -lquadmath %{static-libquadmath:-adefault}" - elif grep "conftest1.$ac_objext.*libgcobol.a .*conftest2.$ac_objext" \ - conftest.cmd >/dev/null 2>&1; then - LQUADMATH="%{static-libquadmath:libquadmath.a%s;:-lquadmath}" - fi - rm -f conftest1.$ac_objext conftest2.$ac_objext conftest conftest.cmd + LQUADMATH="%{static-libquadmath:libquadmath.a%s;:-lquadmath}" + fi + rm -f conftest1.$ac_objext conftest2.$ac_objext conftest conftest.cmd - if test "x$libgcob_cv_have_as_needed" = xyes; then - if test "x$USE_IEC_60559" = xyes; then - LIBQUADSPEC="$libgcob_cv_as_needed_option $LQUADMATH $libgcob_cv_no_as_needed_option" + if test "x$libgcob_cv_have_as_needed" = xyes; then + if test "x$USE_IEC_60559" = xyes; then + LIBQUADSPEC="$libgcob_cv_as_needed_option $LQUADMATH $libgcob_cv_no_as_needed_option" + else + LIBQUADSPEC="%{static-libgcobol:$libgcob_cv_as_needed_option} $LQUADMATH %{static-libgcobol:$libgcob_cv_no_as_needed_option}" + fi else - LIBQUADSPEC="%{static-libgcobol:$libgcob_cv_as_needed_option} $LQUADMATH %{static-libgcobol:$libgcob_cv_no_as_needed_option}" + LIBQUADSPEC="$LQUADMATH" fi - else - LIBQUADSPEC="$LQUADMATH" - fi - if test "x$USE_IEC_60559" != xyes; then if test -f ../libquadmath/libquadmath.la; then LIBQUADLIB=../libquadmath/libquadmath.la LIBQUADLIB_DEP=../libquadmath/libquadmath.la diff --git a/libgcobol/configure.tgt b/libgcobol/configure.tgt index ebf044e..a239252 100644 --- a/libgcobol/configure.tgt +++ b/libgcobol/configure.tgt @@ -34,6 +34,11 @@ case "${target}" in LIBGCOBOL_SUPPORTED=yes fi ;; + riscv64-*-linux*) + if test x$ac_cv_sizeof_void_p = x8; then + LIBGCOBOL_SUPPORTED=yes + fi + ;; x86_64-*-linux* | i?86-*-linux* | x86_64-*-darwin*) if test x$ac_cv_sizeof_void_p = x8; then LIBGCOBOL_SUPPORTED=yes diff --git a/libgcobol/intrinsic.cc b/libgcobol/intrinsic.cc index 844cd38..181b053 100644 --- a/libgcobol/intrinsic.cc +++ b/libgcobol/intrinsic.cc @@ -55,19 +55,11 @@ #include "charmaps.h" -#if !defined (HAVE_STRTOF128) -# if USE_QUADMATH -# define strtof128 strtoflt128 -# else -# error "no available string to float 128" -# endif -#endif - #pragma GCC diagnostic ignored "-Wformat-truncation" #define JD_OF_1601_01_02 2305812.5 -#define WEIRD_TRANSCENDENT_RETURN_VALUE (0.0Q) +#define WEIRD_TRANSCENDENT_RETURN_VALUE GCOB_FP128_LITERAL (0.0) #define NO_RDIGITS (0) struct cobol_tm @@ -5016,7 +5008,7 @@ __gg__numval_f( cblc_field_t *dest, } } *p++ = '\0'; - value = strtof128(ach, NULL); + value = strtofp128(ach, NULL); } __gg__float128_to_field(dest, value, diff --git a/libgcobol/libgcobol-fp.h b/libgcobol/libgcobol-fp.h index bd443f3..fcfa0a7 100644 --- a/libgcobol/libgcobol-fp.h +++ b/libgcobol/libgcobol-fp.h @@ -28,17 +28,26 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see # define GCOB_FP128 long double # define GCOB_FP128_LITERAL(lit) (lit ## l) # define FP128_FUNC(funcname) funcname ## l +# define FP128_FMT "L" +# define strtofp128(nptr, endptr) strtold(nptr, endptr) +# define strfromfp128(str, n, format, fp) snprintf(str, n, format, fp) #elif __FLT128_MANT_DIG__ == 113 && __FLT128_MIN_EXP__ == -16381 \ && defined(USE_IEC_60559) // Use _Float128, f128 suffix on calls, f128 or F128 suffix on literals # define GCOB_FP128 _Float128 # define GCOB_FP128_LITERAL(lit) (lit ## f128) # define FP128_FUNC(funcname) funcname ## f128 +# define FP128_FMT "" +# define strtofp128(nptr, endptr) strtof128(nptr, endptr) +# define strfromfp128(str, n, format, fp) strfromf128(str, n, format, fp) #elif __FLT128_MANT_DIG__ == 113 && __FLT128_MIN_EXP__ == -16381 // Use __float128, q suffix on calls, q or Q suffix on literals # define GCOB_FP128 __float128 # define GCOB_FP128_LITERAL(lit) (lit ## q) # define FP128_FUNC(funcname) funcname ## q +# define FP128_FMT "Q" +# define strtofp128(nptr, endptr) strtoflt128(nptr, endptr) +# define strfromfp128(str, n, format, fp) quadmath_snprintf(str, n, format, fp) #else # error "libgcobol requires 128b floating point" #endif diff --git a/libgcobol/libgcobol.cc b/libgcobol/libgcobol.cc index 07d4e8b..c438d6b 100644 --- a/libgcobol/libgcobol.cc +++ b/libgcobol/libgcobol.cc @@ -93,20 +93,6 @@ strfromf64 (char *s, size_t n, const char *f, double v) # endif #endif -#if !defined (HAVE_STRFROMF128) -# if !USE_QUADMATH -# error "no available float 128 to string" -# endif -#endif - -#if !defined (HAVE_STRTOF128) -# if USE_QUADMATH -# define strtof128 strtoflt128 -# else -# error "no available string to float 128" -# endif -#endif - // This couldn't be defined in symbols.h because it conflicts with a LEVEL66 // in parse.h #define LEVEL66 (66) @@ -3262,11 +3248,7 @@ format_for_display_internal(char **dest, // on a 16-bit boundary. GCOB_FP128 floatval; memcpy(&floatval, actual_location, 16); -#if !defined (HAVE_STRFROMF128) && USE_QUADMATH - quadmath_snprintf(ach, sizeof(ach), "%.36QE", floatval); -#else - strfromf128(ach, sizeof(ach), "%.36E", floatval); -#endif + strfromfp128(ach, sizeof(ach), "%.36" FP128_FMT "E", floatval); char *p = strchr(ach, 'E'); if( !p ) { @@ -3288,13 +3270,8 @@ format_for_display_internal(char **dest, int precision = 36 - exp; char achFormat[24]; -#if !defined (HAVE_STRFROMF128) && USE_QUADMATH - sprintf(achFormat, "%%.%dQf", precision); - quadmath_snprintf(ach, sizeof(ach), achFormat, floatval); -#else - sprintf(achFormat, "%%.%df", precision); - strfromf128(ach, sizeof(ach), achFormat, floatval); -#endif + sprintf(achFormat, "%%.%d" FP128_FMT "f", precision); + strfromfp128(ach, sizeof(ach), achFormat, floatval); } __gg__remove_trailing_zeroes(ach); __gg__realloc_if_necessary(dest, dest_size, strlen(ach)+1); @@ -3533,7 +3510,7 @@ get_float128( cblc_field_t *field, { if( __gg__decimal_point == '.' ) { - retval = strtof128(field->initial, NULL); + retval = strtofp128(field->initial, NULL); } else { @@ -3551,7 +3528,7 @@ get_float128( cblc_field_t *field, { *p = '.'; } - retval = strtof128(buffer, NULL); + retval = strtofp128(buffer, NULL); } } else @@ -4248,7 +4225,7 @@ __gg__compare_2(cblc_field_t *left_side, //_Float128 left_value = *(_Float128 *)left_location; GCOB_FP128 left_value; memcpy(&left_value, left_location, 16); - GCOB_FP128 right_value = strtof128(buffer, NULL); + GCOB_FP128 right_value = strtofp128(buffer, NULL); retval = 0; retval = left_value < right_value ? -1 : retval; retval = left_value > right_value ? 1 : retval; @@ -5998,8 +5975,8 @@ __gg__move( cblc_field_t *fdest, } case 16: { - //*(_Float128 *)(fdest->data+dest_offset) = strtof128(ach, NULL); - GCOB_FP128 t = strtof128(ach, NULL); + //*(_Float128 *)(fdest->data+dest_offset) = strtofp128(ach, NULL); + GCOB_FP128 t = strtofp128(ach, NULL); memcpy(fdest->data+dest_offset, &t, 16); break; } @@ -6168,7 +6145,7 @@ __gg__move_literala(cblc_field_t *field, } case 16: { - GCOB_FP128 t = strtof128(ach, NULL); + GCOB_FP128 t = strtofp128(ach, NULL); memcpy(field->data+field_offset, &t, 16); break; } diff --git a/libgomp/ChangeLog b/libgomp/ChangeLog index 18b7a49..c61c9db 100644 --- a/libgomp/ChangeLog +++ b/libgomp/ChangeLog @@ -1,3 +1,38 @@ +2025-04-15 Tobias Burnus <tburnus@baylibre.com> + + * libgomp.texi (gcn, nvptx): Mention self_maps clause + besides unified_shared_memory in the requirements item. + +2025-04-15 waffl3x <waffl3x@baylibre.com> + + * omp.h.in: Add omp::allocator::* and ompx::allocator::* allocators. + (__detail::__allocator_templ<T, omp_allocator_handle_t>): + New struct template. + (null_allocator<T>): New struct template. + (default_mem<T>): Likewise. + (large_cap_mem<T>): Likewise. + (const_mem<T>): Likewise. + (high_bw_mem<T>): Likewise. + (low_lat_mem<T>): Likewise. + (cgroup_mem<T>): Likewise. + (pteam_mem<T>): Likewise. + (thread_mem<T>): Likewise. + (ompx::allocator::gnu_pinned_mem<T>): Likewise. + * testsuite/libgomp.c++/allocator-1.C: New test. + * testsuite/libgomp.c++/allocator-2.C: New test. + +2025-04-15 Tobias Burnus <tburnus@baylibre.com> + + * libgomp.texi (5.0 Impl. Status): Mark mapping alloc comps as 'Y'. + * testsuite/libgomp.fortran/allocatable-comp.f90: New test. + * testsuite/libgomp.fortran/map-alloc-comp-3.f90: New test. + * testsuite/libgomp.fortran/map-alloc-comp-4.f90: New test. + * testsuite/libgomp.fortran/map-alloc-comp-5.f90: New test. + * testsuite/libgomp.fortran/map-alloc-comp-6.f90: New test. + * testsuite/libgomp.fortran/map-alloc-comp-7.f90: New test. + * testsuite/libgomp.fortran/map-alloc-comp-8.f90: New test. + * testsuite/libgomp.fortran/map-alloc-comp-9.f90: New test. + 2025-04-14 Thomas Schwinge <tschwinge@baylibre.com> PR target/118794 diff --git a/libgomp/libgomp.texi b/libgomp/libgomp.texi index fed9d5e..dfd189b 100644 --- a/libgomp/libgomp.texi +++ b/libgomp/libgomp.texi @@ -258,7 +258,7 @@ The OpenMP 4.5 specification is fully supported. device memory mapped by an array section @tab P @tab @item Mapping of Fortran pointer and allocatable variables, including pointer and allocatable components of variables - @tab P @tab Mapping of vars with allocatable components unsupported + @tab Y @tab @item @code{defaultmap} extensions @tab Y @tab @item @code{declare mapper} directive @tab N @tab @item @code{omp_get_supported_active_levels} routine @tab Y @tab @@ -6888,7 +6888,7 @@ The implementation remark: @code{device(ancestor:1)}) are processed serially per @code{target} region such that the next reverse offload region is only executed after the previous one returned. -@item OpenMP code that has a @code{requires} directive with +@item OpenMP code that has a @code{requires} directive with @code{self_maps} or @code{unified_shared_memory} is only supported if all AMD GPUs have the @code{HSA_AMD_SYSTEM_INFO_SVM_ACCESSIBLE_BY_DEFAULT} property; for discrete GPUs, this may require setting the @code{HSA_XNACK} environment @@ -7045,7 +7045,7 @@ The implementation remark: Per device, reverse offload regions are processed serially such that the next reverse offload region is only executed after the previous one returned. -@item OpenMP code that has a @code{requires} directive with +@item OpenMP code that has a @code{requires} directive with @code{self_maps} or @code{unified_shared_memory} runs on nvptx devices if and only if all of those support the @code{pageableMemoryAccess} property;@footnote{ @uref{https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#um-requirements}} diff --git a/libgomp/omp.h.in b/libgomp/omp.h.in index d5e8be4..8d17db1 100644 --- a/libgomp/omp.h.in +++ b/libgomp/omp.h.in @@ -432,4 +432,136 @@ extern const char *omp_get_uid_from_device (int) __GOMP_NOTHROW; } #endif +#if __cplusplus >= 201103L + +/* std::__throw_bad_alloc and std::__throw_bad_array_new_length. */ +#include <bits/functexcept.h> + +namespace omp +{ +namespace allocator +{ + +namespace __detail +{ + +template<typename __T, omp_allocator_handle_t __Handle> +struct __allocator_templ +{ + using value_type = __T; + using pointer = __T*; + using const_pointer = const __T*; + using size_type = __SIZE_TYPE__; + using difference_type = __PTRDIFF_TYPE__; + + __T* + allocate (size_type __n) + { + if (__SIZE_MAX__ / sizeof(__T) < __n) + std::__throw_bad_array_new_length (); + void *__p = omp_aligned_alloc (alignof(__T), __n * sizeof(__T), __Handle); + if (!__p) + std::__throw_bad_alloc (); + return static_cast<__T*>(__p); + } + + void + deallocate (__T *__p, size_type) __GOMP_NOTHROW + { + omp_free (static_cast<void*>(__p), __Handle); + } +}; + +template<typename __T, typename __U, omp_allocator_handle_t __Handle> +constexpr bool +operator== (const __allocator_templ<__T, __Handle>&, + const __allocator_templ<__U, __Handle>&) __GOMP_NOTHROW +{ + return true; +} + +template<typename __T, omp_allocator_handle_t __Handle, + typename __U, omp_allocator_handle_t __UHandle> +constexpr bool +operator== (const __allocator_templ<__T, __Handle>&, + const __allocator_templ<__U, __UHandle>&) __GOMP_NOTHROW +{ + return false; +} + +template<typename __T, typename __U, omp_allocator_handle_t __Handle> +constexpr bool +operator!= (const __allocator_templ<__T, __Handle>&, + const __allocator_templ<__U, __Handle>&) __GOMP_NOTHROW +{ + return false; +} + +template<typename __T, omp_allocator_handle_t __Handle, + typename __U, omp_allocator_handle_t __UHandle> +constexpr bool +operator!= (const __allocator_templ<__T, __Handle>&, + const __allocator_templ<__U, __UHandle>&) __GOMP_NOTHROW +{ + return true; +} + +} /* namespace __detail */ + +template<typename __T> +struct null_allocator + : __detail::__allocator_templ<__T, omp_null_allocator> {}; + +template<typename __T> +struct default_mem + : __detail::__allocator_templ<__T, omp_default_mem_alloc> {}; + +template<typename __T> +struct large_cap_mem + : __detail::__allocator_templ<__T, omp_large_cap_mem_alloc> {}; + +template<typename __T> +struct const_mem + : __detail::__allocator_templ<__T, omp_const_mem_alloc> {}; + +template<typename __T> +struct high_bw_mem + : __detail::__allocator_templ<__T, omp_high_bw_mem_alloc> {}; + +template<typename __T> +struct low_lat_mem + : __detail::__allocator_templ<__T, omp_low_lat_mem_alloc> {}; + +template<typename __T> +struct cgroup_mem + : __detail::__allocator_templ<__T, omp_cgroup_mem_alloc> {}; + +template<typename __T> +struct pteam_mem + : __detail::__allocator_templ<__T, omp_pteam_mem_alloc> {}; + +template<typename __T> +struct thread_mem + : __detail::__allocator_templ<__T, omp_thread_mem_alloc> {}; + +} /* namespace allocator */ + +} /* namespace omp */ + +namespace ompx +{ + +namespace allocator +{ + +template<typename __T> +struct gnu_pinned_mem + : omp::allocator::__detail::__allocator_templ<__T, ompx_gnu_pinned_mem_alloc> {}; + +} /* namespace allocator */ + +} /* namespace ompx */ + +#endif /* __cplusplus */ + #endif /* _OMP_H */ diff --git a/libgomp/testsuite/libgomp.c++/allocator-1.C b/libgomp/testsuite/libgomp.c++/allocator-1.C new file mode 100644 index 0000000..f820722 --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/allocator-1.C @@ -0,0 +1,158 @@ +// { dg-do run } + +#include <omp.h> +#include <memory> +#include <limits> + +template<typename T, template<typename> class Alloc> +void test (T const initial_value = T()) +{ + using Allocator = Alloc<T>; + Allocator a; + using Traits = std::allocator_traits<Allocator>; + static_assert (__is_same(typename Traits::allocator_type, Allocator )); + static_assert (__is_same(typename Traits::value_type, T )); + static_assert (__is_same(typename Traits::pointer, T* )); + static_assert (__is_same(typename Traits::const_pointer, T const* )); + static_assert (__is_same(typename Traits::void_pointer, void* )); + static_assert (__is_same(typename Traits::const_void_pointer, void const* )); + static_assert (__is_same(typename Traits::difference_type, __PTRDIFF_TYPE__)); + static_assert (__is_same(typename Traits::size_type, __SIZE_TYPE__ )); + static_assert (Traits::propagate_on_container_copy_assignment::value == false); + static_assert (Traits::propagate_on_container_move_assignment::value == false); + static_assert (Traits::propagate_on_container_swap::value == false); + static_assert (Traits::is_always_equal::value == true); + + static constexpr __SIZE_TYPE__ correct_max_size + = std::numeric_limits<__SIZE_TYPE__>::max () / sizeof (T); + if (Traits::max_size (a) != correct_max_size) + __builtin_abort (); + + static constexpr __SIZE_TYPE__ alloc_count = 1; + T *p = Traits::allocate (a, alloc_count); + if (p == nullptr) + __builtin_abort (); + Traits::construct (a, p, initial_value); + if (*p != initial_value) + __builtin_abort (); + Traits::destroy (a, p); + Traits::deallocate (a, p, alloc_count); + /* Not interesting but might as well test it. */ + static_cast<void>(Traits::select_on_container_copy_construction (a)); + + if (!(a == Allocator())) + __builtin_abort (); + if (a != Allocator()) + __builtin_abort (); + if (!(a == Alloc<void>())) + __builtin_abort (); + if (a != Alloc<void>()) + __builtin_abort (); +} + +#define CHECK_INEQUALITY(other_alloc_templ, type) \ +do { \ + /* Skip tests for itself, those are equal. Intantiate each */ \ + /* one with void so we can easily tell if they are the same. */ \ + if (!__is_same (AllocTempl<void>, other_alloc_templ<void>)) \ + { \ + other_alloc_templ<type> other; \ + if (a == other) \ + __builtin_abort (); \ + if (!(a != other)) \ + __builtin_abort (); \ + } \ +} while (false) + +template<typename T, template<typename> class AllocTempl> +void test_inequality () +{ + using Allocator = AllocTempl<T>; + Allocator a; + CHECK_INEQUALITY (omp::allocator::null_allocator, void); + CHECK_INEQUALITY (omp::allocator::default_mem, void); + CHECK_INEQUALITY (omp::allocator::large_cap_mem, void); + CHECK_INEQUALITY (omp::allocator::const_mem, void); + CHECK_INEQUALITY (omp::allocator::high_bw_mem, void); + CHECK_INEQUALITY (omp::allocator::low_lat_mem, void); + CHECK_INEQUALITY (omp::allocator::cgroup_mem, void); + CHECK_INEQUALITY (omp::allocator::pteam_mem, void); + CHECK_INEQUALITY (omp::allocator::thread_mem, void); + CHECK_INEQUALITY (ompx::allocator::gnu_pinned_mem, void); + /* And again with the same type passed to the allocator. */ + CHECK_INEQUALITY (omp::allocator::null_allocator, T); + CHECK_INEQUALITY (omp::allocator::default_mem, T); + CHECK_INEQUALITY (omp::allocator::large_cap_mem, T); + CHECK_INEQUALITY (omp::allocator::const_mem, T); + CHECK_INEQUALITY (omp::allocator::high_bw_mem, T); + CHECK_INEQUALITY (omp::allocator::low_lat_mem, T); + CHECK_INEQUALITY (omp::allocator::cgroup_mem, T); + CHECK_INEQUALITY (omp::allocator::pteam_mem, T); + CHECK_INEQUALITY (omp::allocator::thread_mem, T); + CHECK_INEQUALITY (ompx::allocator::gnu_pinned_mem, T); +} + +#undef CHECK_INEQUALITY + +struct S +{ + int _v0; + bool _v1; + float _v2; + + bool operator== (S const& other) const noexcept { + return _v0 == other._v0 + && _v1 == other._v1 + && _v2 == other._v2; + } + bool operator!= (S const& other) const noexcept { + return !this->operator==(other); + } +}; + +int main () +{ + test<int, omp::allocator::null_allocator>(42); + test<int, omp::allocator::default_mem>(42); + test<int, omp::allocator::large_cap_mem>(42); + test<int, omp::allocator::const_mem>(42); + test<int, omp::allocator::high_bw_mem>(42); + test<int, omp::allocator::low_lat_mem>(42); + test<int, omp::allocator::cgroup_mem>(42); + test<int, omp::allocator::pteam_mem>(42); + test<int, omp::allocator::thread_mem>(42); + test<int, ompx::allocator::gnu_pinned_mem>(42); + + test<long long, omp::allocator::null_allocator>(42); + test<long long, omp::allocator::default_mem>(42); + test<long long, omp::allocator::large_cap_mem>(42); + test<long long, omp::allocator::const_mem>(42); + test<long long, omp::allocator::high_bw_mem>(42); + test<long long, omp::allocator::low_lat_mem>(42); + test<long long, omp::allocator::cgroup_mem>(42); + test<long long, omp::allocator::pteam_mem>(42); + test<long long, omp::allocator::thread_mem>(42); + test<long long, ompx::allocator::gnu_pinned_mem>(42); + + test<S, omp::allocator::null_allocator>( S{42, true, 128.f}); + test<S, omp::allocator::default_mem>( S{42, true, 128.f}); + test<S, omp::allocator::large_cap_mem>( S{42, true, 128.f}); + test<S, omp::allocator::const_mem>( S{42, true, 128.f}); + test<S, omp::allocator::high_bw_mem>( S{42, true, 128.f}); + test<S, omp::allocator::low_lat_mem>( S{42, true, 128.f}); + test<S, omp::allocator::cgroup_mem>( S{42, true, 128.f}); + test<S, omp::allocator::pteam_mem>( S{42, true, 128.f}); + test<S, omp::allocator::thread_mem>( S{42, true, 128.f}); + test<S, ompx::allocator::gnu_pinned_mem>(S{42, true, 128.f}); + + test_inequality<int, omp::allocator::null_allocator>(); + test_inequality<int, omp::allocator::default_mem>(); + test_inequality<int, omp::allocator::large_cap_mem>(); + test_inequality<int, omp::allocator::const_mem>(); + test_inequality<int, omp::allocator::high_bw_mem>(); + test_inequality<int, omp::allocator::low_lat_mem>(); + test_inequality<int, omp::allocator::cgroup_mem>(); + test_inequality<int, omp::allocator::pteam_mem>(); + test_inequality<int, omp::allocator::thread_mem>(); + test_inequality<int, ompx::allocator::gnu_pinned_mem>(); +} diff --git a/libgomp/testsuite/libgomp.c++/allocator-2.C b/libgomp/testsuite/libgomp.c++/allocator-2.C new file mode 100644 index 0000000..d25b755 --- /dev/null +++ b/libgomp/testsuite/libgomp.c++/allocator-2.C @@ -0,0 +1,132 @@ +// { dg-do run } +// { dg-additional-options "-Wno-psabi" } + +#include <omp.h> +#include <vector> + +template<typename T> +bool ptr_is_aligned(T *ptr, std::size_t alignment) +{ + /* ALIGNMENT must be a power of 2. */ + if ((alignment & (alignment - 1)) != 0) + __builtin_abort (); + __UINTPTR_TYPE__ ptr_value + = reinterpret_cast<__UINTPTR_TYPE__>(static_cast<void*>(ptr)); + return (ptr_value % alignment) == 0; +} + +template<typename T, template<typename> class Alloc> +void f (T v0, T v1, T v2, T v3) +{ + std::vector<T, Alloc<T>> vec; + vec.push_back (v0); + vec.push_back (v1); + vec.push_back (v2); + vec.push_back (v3); + if (vec.at (0) != v0) + __builtin_abort (); + if (vec.at (1) != v1) + __builtin_abort (); + if (vec.at (2) != v2) + __builtin_abort (); + if (vec.at (3) != v3) + __builtin_abort (); + if (!ptr_is_aligned (&vec.at (0), alignof (T))) + __builtin_abort (); + if (!ptr_is_aligned (&vec.at (1), alignof (T))) + __builtin_abort (); + if (!ptr_is_aligned (&vec.at (2), alignof (T))) + __builtin_abort (); + if (!ptr_is_aligned (&vec.at (3), alignof (T))) + __builtin_abort (); +} + +struct S0 +{ + int _v0; + bool _v1; + float _v2; + + bool operator== (S0 const& other) const noexcept { + return _v0 == other._v0 + && _v1 == other._v1 + && _v2 == other._v2; + } + bool operator!= (S0 const& other) const noexcept { + return !this->operator==(other); + } +}; + +struct alignas(128) S1 +{ + int _v0; + bool _v1; + float _v2; + + bool operator== (S1 const& other) const noexcept { + return _v0 == other._v0 + && _v1 == other._v1 + && _v2 == other._v2; + } + bool operator!= (S1 const& other) const noexcept { + return !this->operator==(other); + } +}; + +/* Note: the test for const_mem should be disabled in the future. */ + +int main () +{ + f<int, omp::allocator::null_allocator >(0, 1, 2, 3); + f<int, omp::allocator::default_mem >(0, 1, 2, 3); + f<int, omp::allocator::large_cap_mem >(0, 1, 2, 3); + f<int, omp::allocator::const_mem >(0, 1, 2, 3); + f<int, omp::allocator::high_bw_mem >(0, 1, 2, 3); + f<int, omp::allocator::low_lat_mem >(0, 1, 2, 3); + f<int, omp::allocator::cgroup_mem >(0, 1, 2, 3); + f<int, omp::allocator::pteam_mem >(0, 1, 2, 3); + f<int, omp::allocator::thread_mem >(0, 1, 2, 3); + f<int, ompx::allocator::gnu_pinned_mem>(0, 1, 2, 3); + + f<long long, omp::allocator::null_allocator >(0, 1, 2, 3); + f<long long, omp::allocator::default_mem >(0, 1, 2, 3); + f<long long, omp::allocator::large_cap_mem >(0, 1, 2, 3); + f<long long, omp::allocator::const_mem >(0, 1, 2, 3); + f<long long, omp::allocator::high_bw_mem >(0, 1, 2, 3); + f<long long, omp::allocator::low_lat_mem >(0, 1, 2, 3); + f<long long, omp::allocator::cgroup_mem >(0, 1, 2, 3); + f<long long, omp::allocator::pteam_mem >(0, 1, 2, 3); + f<long long, omp::allocator::thread_mem >(0, 1, 2, 3); + f<long long, ompx::allocator::gnu_pinned_mem>(0, 1, 2, 3); + + S0 s0_0{ 42, true, 111128.f}; + S0 s0_1{ 142, false, 11128.f}; + S0 s0_2{ 1142, true, 1128.f}; + S0 s0_3{11142, false, 128.f}; + f<S0, omp::allocator::null_allocator >(s0_0, s0_1, s0_2, s0_3); + f<S0, omp::allocator::default_mem >(s0_0, s0_1, s0_2, s0_3); + f<S0, omp::allocator::large_cap_mem >(s0_0, s0_1, s0_2, s0_3); + f<S0, omp::allocator::const_mem >(s0_0, s0_1, s0_2, s0_3); + f<S0, omp::allocator::high_bw_mem >(s0_0, s0_1, s0_2, s0_3); + f<S0, omp::allocator::low_lat_mem >(s0_0, s0_1, s0_2, s0_3); + f<S0, omp::allocator::cgroup_mem >(s0_0, s0_1, s0_2, s0_3); + f<S0, omp::allocator::pteam_mem >(s0_0, s0_1, s0_2, s0_3); + f<S0, omp::allocator::thread_mem >(s0_0, s0_1, s0_2, s0_3); + f<S0, ompx::allocator::gnu_pinned_mem>(s0_0, s0_1, s0_2, s0_3); + + S1 s1_0{ 42, true, 111128.f}; + S1 s1_1{ 142, false, 11128.f}; + S1 s1_2{ 1142, true, 1128.f}; + S1 s1_3{11142, false, 128.f}; + + f<S1, omp::allocator::null_allocator >(s1_0, s1_1, s1_2, s1_3); + f<S1, omp::allocator::default_mem >(s1_0, s1_1, s1_2, s1_3); + f<S1, omp::allocator::large_cap_mem >(s1_0, s1_1, s1_2, s1_3); + f<S1, omp::allocator::const_mem >(s1_0, s1_1, s1_2, s1_3); + f<S1, omp::allocator::high_bw_mem >(s1_0, s1_1, s1_2, s1_3); + f<S1, omp::allocator::low_lat_mem >(s1_0, s1_1, s1_2, s1_3); + f<S1, omp::allocator::cgroup_mem >(s1_0, s1_1, s1_2, s1_3); + f<S1, omp::allocator::pteam_mem >(s1_0, s1_1, s1_2, s1_3); + f<S1, omp::allocator::thread_mem >(s1_0, s1_1, s1_2, s1_3); + f<S1, ompx::allocator::gnu_pinned_mem>(s1_0, s1_1, s1_2, s1_3); +} diff --git a/libgomp/testsuite/libgomp.fortran/allocatable-comp.f90 b/libgomp/testsuite/libgomp.fortran/allocatable-comp.f90 new file mode 100644 index 0000000..383ecba --- /dev/null +++ b/libgomp/testsuite/libgomp.fortran/allocatable-comp.f90 @@ -0,0 +1,53 @@ +implicit none +type t + integer, allocatable :: a, b(:) +end type t +type(t) :: x, y, z +integer :: i + +!$omp target map(to: x) + if (allocated(x%a)) stop 1 + if (allocated(x%b)) stop 2 +!$omp end target + +allocate(x%a, x%b(-4:6)) +x%b(:) = [(i, i=-4,6)] + +!$omp target map(to: x) + if (.not. allocated(x%a)) stop 3 + if (.not. allocated(x%b)) stop 4 + if (lbound(x%b,1) /= -4) stop 5 + if (ubound(x%b,1) /= 6) stop 6 + if (any (x%b /= [(i, i=-4,6)])) stop 7 +!$omp end target + + +! The following only works with arrays due to +! PR fortran/96668 + +!$omp target enter data map(to: y, z) + +!$omp target map(to: y, z) + if (allocated(y%b)) stop 8 + if (allocated(z%b)) stop 9 +!$omp end target + +allocate(y%b(5), z%b(3)) +y%b = 42 +z%b = 99 + +! (implicitly) 'tofrom' mapped +! Planned for OpenMP 6.0 (but common extension) +! OpenMP <= 5.0 unclear +!$omp target map(to: y) + if (.not.allocated(y%b)) stop 10 + if (any (y%b /= 42)) stop 11 +!$omp end target + +! always map: OpenMP 5.1 (clarified) +!$omp target map(always, tofrom: z) + if (.not.allocated(z%b)) stop 12 + if (any (z%b /= 99)) stop 13 +!$omp end target + +end diff --git a/libgomp/testsuite/libgomp.fortran/map-alloc-comp-3.f90 b/libgomp/testsuite/libgomp.fortran/map-alloc-comp-3.f90 new file mode 100644 index 0000000..9d48c7c --- /dev/null +++ b/libgomp/testsuite/libgomp.fortran/map-alloc-comp-3.f90 @@ -0,0 +1,121 @@ +type t2 + integer x, y, z +end type t2 +type t + integer, allocatable :: A + integer, allocatable :: B(:) + type(t2), allocatable :: C + type(t2), allocatable :: D(:,:) +end type t + +type t3 + type(t) :: Q + type(t) :: R(5) +end type + +type(t) :: var, var2 +type(t3) :: var3, var4 + +! -------------------------------------- +! Assign + allocate +var%A = 45 +var%B = [1,2,3] +var%C = t2(6,5,4) +var%D = reshape([t2(1,2,3), t2(4,5,6), t2(11,12,13), t2(14,15,16)], [2,2]) + +! Assign + allocate +var2%A = 145 +var2%B = [991,992,993] +var2%C = t2(996,995,994) +var2%D = reshape([t2(199,299,399), t2(499,599,699), t2(1199,1299,1399), t2(1499,1599,1699)], [2,2]) + + +!$omp target map(to: var) map(tofrom: var2) + call foo(var, var2) +!$omp end target + +if (var2%A /= 45) stop 9 +if (any (var2%B /= [1,2,3])) stop 10 +if (var2%C%x /= 6) stop 11 +if (var2%C%y /= 5) stop 11 +if (var2%C%z /= 4) stop 11 +if (any (var2%D(:,:)%x /= reshape([1, 4, 11, 14], [2,2]))) stop 12 +if (any (var2%D(:,:)%y /= reshape([2, 5, 12, 15], [2,2]))) stop 12 +if (any (var2%D(:,:)%z /= reshape([3, 6, 13, 16], [2,2]))) stop 12 + +! -------------------------------------- +! Assign + allocate +var3%Q%A = 45 +var3%Q%B = [1,2,3] +var3%Q%C = t2(6,5,4) +var3%Q%D = reshape([t2(1,2,3), t2(4,5,6), t2(11,12,13), t2(14,15,16)], [2,2]) + +var3%R(2)%A = 45 +var3%R(2)%B = [1,2,3] +var3%R(2)%C = t2(6,5,4) +var3%R(2)%D = reshape([t2(1,2,3), t2(4,5,6), t2(11,12,13), t2(14,15,16)], [2,2]) + +! Assign + allocate +var4%Q%A = 145 +var4%Q%B = [991,992,993] +var4%Q%C = t2(996,995,994) +var4%Q%D = reshape([t2(199,299,399), t2(499,599,699), t2(1199,1299,1399), t2(1499,1599,1699)], [2,2]) + +var4%R(3)%A = 145 +var4%R(3)%B = [991,992,993] +var4%R(3)%C = t2(996,995,994) +var4%R(3)%D = reshape([t2(199,299,399), t2(499,599,699), t2(1199,1299,1399), t2(1499,1599,1699)], [2,2]) + +!$omp target map(to: var3%Q) map(tofrom: var4%Q) + call foo(var3%Q, var4%Q) +!$omp end target + +!$omp target map(to: var3%R(2)) map(tofrom: var4%R(3)) + call foo(var3%R(2), var4%R(3)) +!$omp end target + +if (var4%Q%A /= 45) stop 13 +if (any (var4%Q%B /= [1,2,3])) stop 14 +if (var4%Q%C%x /= 6) stop 15 +if (var4%Q%C%y /= 5) stop 15 +if (var4%Q%C%z /= 4) stop 15 +if (any (var4%Q%D(:,:)%x /= reshape([1, 4, 11, 14], [2,2]))) stop 16 +if (any (var4%Q%D(:,:)%y /= reshape([2, 5, 12, 15], [2,2]))) stop 16 +if (any (var4%Q%D(:,:)%z /= reshape([3, 6, 13, 16], [2,2]))) stop 16 + +if (var4%R(3)%A /= 45) stop 17 +if (any (var4%R(3)%B /= [1,2,3])) stop 18 +if (var4%R(3)%C%x /= 6) stop 19 +if (var4%R(3)%C%y /= 5) stop 19 +if (var4%R(3)%C%z /= 4) stop 19 +if (any (var4%R(3)%D(:,:)%x /= reshape([1, 4, 11, 14], [2,2]))) stop 20 +if (any (var4%R(3)%D(:,:)%y /= reshape([2, 5, 12, 15], [2,2]))) stop 20 +if (any (var4%R(3)%D(:,:)%z /= reshape([3, 6, 13, 16], [2,2]))) stop 20 + +contains + subroutine foo(x, y) + type(t) :: x, y + if (x%A /= 45) stop 1 + if (any (x%B /= [1,2,3])) stop 2 + if (x%C%x /= 6) stop 3 + if (x%C%y /= 5) stop 3 + if (x%C%z /= 4) stop 3 + if (any (x%D(:,:)%x /= reshape([1, 4, 11, 14], [2,2]))) stop 4 + if (any (x%D(:,:)%y /= reshape([2, 5, 12, 15], [2,2]))) stop 4 + if (any (x%D(:,:)%z /= reshape([3, 6, 13, 16], [2,2]))) stop 4 + + if (y%A /= 145) stop 5 + if (any (y%B /= [991,992,993])) stop 6 + if (y%C%x /= 996) stop 7 + if (y%C%y /= 995) stop 7 + if (y%C%z /= 994) stop 7 + if (any (y%D(:,:)%x /= reshape([199, 499, 1199, 1499], [2,2]))) stop 8 + if (any (y%D(:,:)%y /= reshape([299, 599, 1299, 1599], [2,2]))) stop 8 + if (any (y%D(:,:)%z /= reshape([399, 699, 1399, 1699], [2,2]))) stop 8 + + y%A = x%A + y%B(:) = x%B + y%C = x%C + y%D(:,:) = x%D(:,:) + end +end diff --git a/libgomp/testsuite/libgomp.fortran/map-alloc-comp-4.f90 b/libgomp/testsuite/libgomp.fortran/map-alloc-comp-4.f90 new file mode 100644 index 0000000..fb9859d --- /dev/null +++ b/libgomp/testsuite/libgomp.fortran/map-alloc-comp-4.f90 @@ -0,0 +1,124 @@ +type t2 + integer x, y, z +end type t2 +type t + integer, allocatable :: A + integer, allocatable :: B(:) + type(t2), allocatable :: C + type(t2), allocatable :: D(:,:) +end type t + +type t3 + type(t) :: Q + type(t) :: R(5) +end type + +type(t) :: var, var2 +type(t3) :: var3, var4 + +! -------------------------------------- +! Assign + allocate +var%A = 45 +var%B = [1,2,3] +var%C = t2(6,5,4) +var%D = reshape([t2(1,2,3), t2(4,5,6), t2(11,12,13), t2(14,15,16)], [2,2]) + +! Assign + allocate +var2%A = 145 +var2%B = [991,992,993] +var2%C = t2(996,995,994) +var2%D = reshape([t2(199,299,399), t2(499,599,699), t2(1199,1299,1399), t2(1499,1599,1699)], [2,2]) + + +!$omp target map(to: var%A, var%B, var%C, var%D) & +!$omp& map(tofrom: var2%A, var2%B, var2%C, var2%D) + call foo(var, var2) +!$omp end target + +if (var2%A /= 45) stop 9 +if (any (var2%B /= [1,2,3])) stop 10 +if (var2%C%x /= 6) stop 11 +if (var2%C%y /= 5) stop 11 +if (var2%C%z /= 4) stop 11 +if (any (var2%D(:,:)%x /= reshape([1, 4, 11, 14], [2,2]))) stop 12 +if (any (var2%D(:,:)%y /= reshape([2, 5, 12, 15], [2,2]))) stop 12 +if (any (var2%D(:,:)%z /= reshape([3, 6, 13, 16], [2,2]))) stop 12 + +! -------------------------------------- +! Assign + allocate +var3%Q%A = 45 +var3%Q%B = [1,2,3] +var3%Q%C = t2(6,5,4) +var3%Q%D = reshape([t2(1,2,3), t2(4,5,6), t2(11,12,13), t2(14,15,16)], [2,2]) + +var3%R(2)%A = 45 +var3%R(2)%B = [1,2,3] +var3%R(2)%C = t2(6,5,4) +var3%R(2)%D = reshape([t2(1,2,3), t2(4,5,6), t2(11,12,13), t2(14,15,16)], [2,2]) + +! Assign + allocate +var4%Q%A = 145 +var4%Q%B = [991,992,993] +var4%Q%C = t2(996,995,994) +var4%Q%D = reshape([t2(199,299,399), t2(499,599,699), t2(1199,1299,1399), t2(1499,1599,1699)], [2,2]) + +var4%R(3)%A = 145 +var4%R(3)%B = [991,992,993] +var4%R(3)%C = t2(996,995,994) +var4%R(3)%D = reshape([t2(199,299,399), t2(499,599,699), t2(1199,1299,1399), t2(1499,1599,1699)], [2,2]) + +!$omp target map(to: var3%Q%A, var3%Q%B, var3%Q%C, var3%Q%D) & +!$omp& map(tofrom: var4%Q%A, var4%Q%B, var4%Q%C, var4%Q%D) + call foo(var3%Q, var4%Q) +!$omp end target + +if (var4%Q%A /= 45) stop 13 +if (any (var4%Q%B /= [1,2,3])) stop 14 +if (var4%Q%C%x /= 6) stop 15 +if (var4%Q%C%y /= 5) stop 15 +if (var4%Q%C%z /= 4) stop 15 +if (any (var4%Q%D(:,:)%x /= reshape([1, 4, 11, 14], [2,2]))) stop 16 +if (any (var4%Q%D(:,:)%y /= reshape([2, 5, 12, 15], [2,2]))) stop 16 +if (any (var4%Q%D(:,:)%z /= reshape([3, 6, 13, 16], [2,2]))) stop 16 + +!$omp target map(to: var3%R(2)%A, var3%R(2)%B, var3%R(2)%C, var3%R(2)%D) & +!$omp& map(tofrom: var4%R(3)%A, var4%R(3)%B, var4%R(3)%C, var4%R(3)%D) + call foo(var3%R(2), var4%R(3)) +!$omp end target + +if (var4%R(3)%A /= 45) stop 17 +if (any (var4%R(3)%B /= [1,2,3])) stop 18 +if (var4%R(3)%C%x /= 6) stop 19 +if (var4%R(3)%C%y /= 5) stop 19 +if (var4%R(3)%C%z /= 4) stop 19 +if (any (var4%R(3)%D(:,:)%x /= reshape([1, 4, 11, 14], [2,2]))) stop 20 +if (any (var4%R(3)%D(:,:)%y /= reshape([2, 5, 12, 15], [2,2]))) stop 20 +if (any (var4%R(3)%D(:,:)%z /= reshape([3, 6, 13, 16], [2,2]))) stop 20 + +contains + subroutine foo(x, y) + type(t) :: x, y + if (x%A /= 45) stop 1 + if (any (x%B /= [1,2,3])) stop 2 + if (x%C%x /= 6) stop 3 + if (x%C%y /= 5) stop 3 + if (x%C%z /= 4) stop 3 + if (any (x%D(:,:)%x /= reshape([1, 4, 11, 14], [2,2]))) stop 4 + if (any (x%D(:,:)%y /= reshape([2, 5, 12, 15], [2,2]))) stop 4 + if (any (x%D(:,:)%z /= reshape([3, 6, 13, 16], [2,2]))) stop 4 + + if (y%A /= 145) stop 5 + if (any (y%B /= [991,992,993])) stop 6 + if (y%C%x /= 996) stop 7 + if (y%C%y /= 995) stop 7 + if (y%C%z /= 994) stop 7 + if (any (y%D(:,:)%x /= reshape([199, 499, 1199, 1499], [2,2]))) stop 8 + if (any (y%D(:,:)%y /= reshape([299, 599, 1299, 1599], [2,2]))) stop 8 + if (any (y%D(:,:)%z /= reshape([399, 699, 1399, 1699], [2,2]))) stop 8 + + y%A = x%A + y%B(:) = x%B + y%C = x%C + y%D(:,:) = x%D(:,:) + end +end diff --git a/libgomp/testsuite/libgomp.fortran/map-alloc-comp-5.f90 b/libgomp/testsuite/libgomp.fortran/map-alloc-comp-5.f90 new file mode 100644 index 0000000..b2e36b2 --- /dev/null +++ b/libgomp/testsuite/libgomp.fortran/map-alloc-comp-5.f90 @@ -0,0 +1,53 @@ +implicit none +type t + integer, allocatable :: a, b(:) +end type t +type(t) :: x, y, z +integer :: i + +!$omp target + if (allocated(x%a)) stop 1 + if (allocated(x%b)) stop 2 +!$omp end target + +allocate(x%a, x%b(-4:6)) +x%b(:) = [(i, i=-4,6)] + +!$omp target + if (.not. allocated(x%a)) stop 3 + if (.not. allocated(x%b)) stop 4 + if (lbound(x%b,1) /= -4) stop 5 + if (ubound(x%b,1) /= 6) stop 6 + if (any (x%b /= [(i, i=-4,6)])) stop 7 +!$omp end target + + +! The following only works with arrays due to +! PR fortran/96668 + +!$omp target enter data map(to: y, z) + +!$omp target + if (allocated(y%b)) stop 8 + if (allocated(z%b)) stop 9 +!$omp end target + +allocate(y%b(5), z%b(3)) +y%b = 42 +z%b = 99 + +! (implicitly) 'tofrom' mapped +! Planned for OpenMP 6.0 (but common extension) +! OpenMP <= 5.0 unclear +!$omp target + if (.not.allocated(y%b)) stop 10 + if (any (y%b /= 42)) stop 11 +!$omp end target + +! always map: OpenMP 5.1 (clarified) +!$omp target map(always, tofrom: z) + if (.not.allocated(z%b)) stop 12 + if (any (z%b /= 99)) stop 13 +!$omp end target + +end diff --git a/libgomp/testsuite/libgomp.fortran/map-alloc-comp-6.f90 b/libgomp/testsuite/libgomp.fortran/map-alloc-comp-6.f90 new file mode 100644 index 0000000..48d4aea --- /dev/null +++ b/libgomp/testsuite/libgomp.fortran/map-alloc-comp-6.f90 @@ -0,0 +1,308 @@ +! NOTE: This code uses POINTER. +! While map(p, var%p) etc. maps the ptr/ptr comp p / var%p (incl. allocatable comps), +! map(var) does not map var%p. + +use iso_c_binding +implicit none +type t2 + integer, allocatable :: x, y, z +end type t2 +type t + integer, pointer :: A => null() + integer, pointer :: B(:) => null() + type(t2), pointer :: C => null() + type(t2), pointer :: D(:,:) => null() +end type t + +type t3 + type(t) :: Q + type(t) :: R(5) +end type + +type(t) :: var, var2 +type(t3) :: var3, var4 +integer(c_intptr_t) :: iptr + +! -------------------------------------- +! Assign + allocate +allocate (var%A, source=45) +allocate (var%B(3), source=[1,2,3]) +allocate (var%C) +var%C%x = 6; var%C%y = 5; var%C%z = 4 +allocate (var%D(2,2)) +var%D(1,1)%x = 1 +var%D(1,1)%y = 2 +var%D(1,1)%z = 3 +var%D(2,1)%x = 4 +var%D(2,1)%y = 5 +var%D(2,1)%z = 6 +var%D(1,2)%x = 11 +var%D(1,2)%y = 12 +var%D(1,2)%z = 13 +var%D(2,2)%x = 14 +var%D(2,2)%y = 15 +var%D(2,2)%z = 16 + +! Assign + allocate +allocate (var2%A, source=145) +allocate (var2%B, source=[991,992,993]) +allocate (var2%C) +var2%C%x = 996; var2%C%y = 995; var2%C%z = 994 +allocate (var2%D(2,2)) +var2%D(1,1)%x = 199 +var2%D(1,1)%y = 299 +var2%D(1,1)%z = 399 +var2%D(2,1)%x = 499 +var2%D(2,1)%y = 599 +var2%D(2,1)%z = 699 +var2%D(1,2)%x = 1199 +var2%D(1,2)%y = 1299 +var2%D(1,2)%z = 1399 +var2%D(2,2)%x = 1499 +var2%D(2,2)%y = 1599 +var2%D(2,2)%z = 1699 + +block + integer(c_intptr_t) :: loc_a, loc_b, loc_c, loc_d, loc2_a, loc2_b, loc2_c, loc2_d + loc_a = loc (var%a) + loc_b = loc (var%b) + loc_c = loc (var%d) + loc_d = loc (var%d) + loc2_a = loc (var2%a) + loc2_b = loc (var2%b) + loc2_c = loc (var2%c) + loc2_d = loc (var2%d) + ! var/var2 are mapped, but the pointer components aren't + !$omp target map(to: var) map(tofrom: var2) + if (loc_a /= loc (var%a)) stop 31 + if (loc_b /= loc (var%b)) stop 32 + if (loc_c /= loc (var%d)) stop 33 + if (loc_d /= loc (var%d)) stop 34 + if (loc2_a /= loc (var2%a)) stop 35 + if (loc2_b /= loc (var2%b)) stop 36 + if (loc2_c /= loc (var2%c)) stop 37 + if (loc2_d /= loc (var2%d)) stop 38 + !$omp end target + if (loc_a /= loc (var%a)) stop 41 + if (loc_b /= loc (var%b)) stop 42 + if (loc_c /= loc (var%d)) stop 43 + if (loc_d /= loc (var%d)) stop 44 + if (loc2_a /= loc (var2%a)) stop 45 + if (loc2_b /= loc (var2%b)) stop 46 + if (loc2_c /= loc (var2%c)) stop 47 + if (loc2_d /= loc (var2%d)) stop 48 +end block + +block + ! Map only (all) components, but this maps also the alloc comps + !$omp target map(to: var%a, var%b, var%c, var%d) map(tofrom: var2%a, var2%b, var2%c, var2%d) + call foo (var,var2) + !$omp end target +end block + +if (var2%A /= 45) stop 9 +if (any (var2%B /= [1,2,3])) stop 10 +if (var2%C%x /= 6) stop 11 +if (var2%C%y /= 5) stop 11 +if (var2%C%z /= 4) stop 11 +block + integer :: tmp_x(2,2), tmp_y(2,2), tmp_z(2,2), i, j + tmp_x = reshape([1, 4, 11, 14], [2,2]) + tmp_y = reshape([2, 5, 12, 15], [2,2]) + tmp_z = reshape([3, 6, 13, 16], [2,2]) + do j = 1, 2 + do i = 1, 2 + if (var2%D(i,j)%x /= tmp_x(i,j)) stop 12 + if (var2%D(i,j)%y /= tmp_y(i,j)) stop 12 + if (var2%D(i,j)%z /= tmp_z(i,j)) stop 12 + end do + end do +end block + +! Extra deallocates due to PR fortran/104697 +deallocate(var%C%x, var%C%y, var%C%z) +deallocate(var%D(1,1)%x, var%D(1,1)%y, var%D(1,1)%z) +deallocate(var%D(2,1)%x, var%D(2,1)%y, var%D(2,1)%z) +deallocate(var%D(1,2)%x, var%D(1,2)%y, var%D(1,2)%z) +deallocate(var%D(2,2)%x, var%D(2,2)%y, var%D(2,2)%z) +deallocate(var%A, var%B, var%C, var%D) + +deallocate(var2%C%x, var2%C%y, var2%C%z) +deallocate(var2%D(1,1)%x, var2%D(1,1)%y, var2%D(1,1)%z) +deallocate(var2%D(2,1)%x, var2%D(2,1)%y, var2%D(2,1)%z) +deallocate(var2%D(1,2)%x, var2%D(1,2)%y, var2%D(1,2)%z) +deallocate(var2%D(2,2)%x, var2%D(2,2)%y, var2%D(2,2)%z) +deallocate(var2%A, var2%B, var2%C, var2%D) + +! -------------------------------------- +! Assign + allocate +allocate (var3%Q%A, source=45) +allocate (var3%Q%B, source=[1,2,3]) +allocate (var3%Q%C, source=t2(6,5,4)) +allocate (var3%Q%D(2,2)) +var3%Q%D(1,1) = t2(1,2,3) +var3%Q%D(2,1) = t2(4,5,6) +var3%Q%D(1,2) = t2(11,12,13) +var3%Q%D(2,2) = t2(14,15,16) + +allocate (var3%R(2)%A, source=45) +allocate (var3%R(2)%B, source=[1,2,3]) +allocate (var3%R(2)%C, source=t2(6,5,4)) +allocate (var3%R(2)%D(2,2)) +var3%R(2)%D(1,1) = t2(1,2,3) +var3%R(2)%D(2,1) = t2(4,5,6) +var3%R(2)%D(1,2) = t2(11,12,13) +var3%R(2)%D(2,2) = t2(14,15,16) + +! Assign + allocate +allocate (var4%Q%A, source=145) +allocate (var4%Q%B, source=[991,992,993]) +allocate (var4%Q%C, source=t2(996,995,994)) +allocate (var4%Q%D(2,2)) +var4%Q%D(1,1) = t2(199,299,399) +var4%Q%D(2,1) = t2(499,599,699) +var4%Q%D(1,2) = t2(1199,1299,1399) +var4%Q%D(2,2) = t2(1499,1599,1699) + +allocate (var4%R(3)%A, source=145) +allocate (var4%R(3)%B, source=[991,992,993]) +allocate (var4%R(3)%C, source=t2(996,995,994)) +allocate (var4%R(3)%D(2,2)) +var4%R(3)%D(1,1) = t2(199,299,399) +var4%R(3)%D(2,1) = t2(499,599,699) +var4%R(3)%D(1,2) = t2(1199,1299,1399) +var4%R(3)%D(2,2) = t2(1499,1599,1699) + +!$omp target map(to: var3%Q%A, var3%Q%B, var3%Q%C, var3%Q%D) & +!$omp& map(tofrom: var4%Q%A, var4%Q%B, var4%Q%C, var4%Q%D) + call foo(var3%Q, var4%Q) +!$omp end target + +iptr = loc(var3%R(2)%A) + +!$omp target map(to: var3%R(2)%A, var3%R(2)%B, var3%R(2)%C, var3%R(2)%D) & +!$omp& map(tofrom: var4%R(3)%A, var4%R(3)%B, var4%R(3)%C, var4%R(3)%D) + call foo(var3%R(2), var4%R(3)) +!$omp end target + +if (var4%Q%A /= 45) stop 13 +if (any (var4%Q%B /= [1,2,3])) stop 14 +if (var4%Q%C%x /= 6) stop 15 +if (var4%Q%C%y /= 5) stop 15 +if (var4%Q%C%z /= 4) stop 15 +block + integer :: tmp_x(2,2), tmp_y(2,2), tmp_z(2,2), i, j + tmp_x = reshape([1, 4, 11, 14], [2,2]) + tmp_y = reshape([2, 5, 12, 15], [2,2]) + tmp_z = reshape([3, 6, 13, 16], [2,2]) + do j = 1, 2 + do i = 1, 2 + if (var4%Q%D(i,j)%x /= tmp_x(i,j)) stop 16 + if (var4%Q%D(i,j)%y /= tmp_y(i,j)) stop 16 + if (var4%Q%D(i,j)%z /= tmp_z(i,j)) stop 16 + end do + end do +end block + +! Cf. PR fortran/104696 +! { dg-output "valid mapping, OK" { xfail { offload_device_nonshared_as } } } +if (iptr /= loc(var3%R(2)%A)) then + print *, "invalid mapping, cf. PR fortran/104696" +else + +if (var4%R(3)%A /= 45) stop 17 +if (any (var4%R(3)%B /= [1,2,3])) stop 18 +if (var4%R(3)%C%x /= 6) stop 19 +if (var4%R(3)%C%y /= 5) stop 19 +if (var4%R(3)%C%z /= 4) stop 19 +block + integer :: tmp_x(2,2), tmp_y(2,2), tmp_z(2,2), i, j + tmp_x = reshape([1, 4, 11, 14], [2,2]) + tmp_y = reshape([2, 5, 12, 15], [2,2]) + tmp_z = reshape([3, 6, 13, 16], [2,2]) + do j = 1, 2 + do i = 1, 2 + if (var4%R(3)%D(i,j)%x /= tmp_x(i,j)) stop 20 + if (var4%R(3)%D(i,j)%y /= tmp_y(i,j)) stop 20 + if (var4%R(3)%D(i,j)%z /= tmp_z(i,j)) stop 20 + end do + end do +end block + +! Extra deallocates due to PR fortran/104697 +deallocate(var3%Q%C%x, var3%Q%D(1,1)%x, var3%Q%D(2,1)%x, var3%Q%D(1,2)%x, var3%Q%D(2,2)%x) +deallocate(var3%Q%C%y, var3%Q%D(1,1)%y, var3%Q%D(2,1)%y, var3%Q%D(1,2)%y, var3%Q%D(2,2)%y) +deallocate(var3%Q%C%z, var3%Q%D(1,1)%z, var3%Q%D(2,1)%z, var3%Q%D(1,2)%z, var3%Q%D(2,2)%z) +deallocate(var3%Q%A, var3%Q%B, var3%Q%C, var3%Q%D) + +deallocate(var4%Q%C%x, var4%Q%D(1,1)%x, var4%Q%D(2,1)%x, var4%Q%D(1,2)%x, var4%Q%D(2,2)%x) +deallocate(var4%Q%C%y, var4%Q%D(1,1)%y, var4%Q%D(2,1)%y, var4%Q%D(1,2)%y, var4%Q%D(2,2)%y) +deallocate(var4%Q%C%z, var4%Q%D(1,1)%z, var4%Q%D(2,1)%z, var4%Q%D(1,2)%z, var4%Q%D(2,2)%z) +deallocate(var4%Q%A, var4%Q%B, var4%Q%C, var4%Q%D) + +deallocate(var3%R(2)%C%x, var3%R(2)%D(1,1)%x, var3%R(2)%D(2,1)%x, var3%R(2)%D(1,2)%x, var3%R(2)%D(2,2)%x) +deallocate(var3%R(2)%C%y, var3%R(2)%D(1,1)%y, var3%R(2)%D(2,1)%y, var3%R(2)%D(1,2)%y, var3%R(2)%D(2,2)%y) +deallocate(var3%R(2)%C%z, var3%R(2)%D(1,1)%z, var3%R(2)%D(2,1)%z, var3%R(2)%D(1,2)%z, var3%R(2)%D(2,2)%z) +deallocate(var3%R(2)%A, var3%R(2)%B, var3%R(2)%C, var3%R(2)%D) + +deallocate(var4%R(3)%C%x, var4%R(3)%D(1,1)%x, var4%R(3)%D(2,1)%x, var4%R(3)%D(1,2)%x, var4%R(3)%D(2,2)%x) +deallocate(var4%R(3)%C%y, var4%R(3)%D(1,1)%y, var4%R(3)%D(2,1)%y, var4%R(3)%D(1,2)%y, var4%R(3)%D(2,2)%y) +deallocate(var4%R(3)%C%z, var4%R(3)%D(1,1)%z, var4%R(3)%D(2,1)%z, var4%R(3)%D(1,2)%z, var4%R(3)%D(2,2)%z) +deallocate(var4%R(3)%A, var4%R(3)%B, var4%R(3)%C, var4%R(3)%D) + + print *, "valid mapping, OK" +endif + +contains + subroutine foo(x, y) + type(t) :: x, y + intent(in) :: x + intent(inout) :: y + integer :: tmp_x(2,2), tmp_y(2,2), tmp_z(2,2), i, j + if (x%A /= 45) stop 1 + if (any (x%B /= [1,2,3])) stop 2 + if (x%C%x /= 6) stop 3 + if (x%C%y /= 5) stop 3 + if (x%C%z /= 4) stop 3 + + tmp_x = reshape([1, 4, 11, 14], [2,2]) + tmp_y = reshape([2, 5, 12, 15], [2,2]) + tmp_z = reshape([3, 6, 13, 16], [2,2]) + do j = 1, 2 + do i = 1, 2 + if (x%D(i,j)%x /= tmp_x(i,j)) stop 4 + if (x%D(i,j)%y /= tmp_y(i,j)) stop 4 + if (x%D(i,j)%z /= tmp_z(i,j)) stop 4 + end do + end do + + if (y%A /= 145) stop 5 + if (any (y%B /= [991,992,993])) stop 6 + if (y%C%x /= 996) stop 7 + if (y%C%y /= 995) stop 7 + if (y%C%z /= 994) stop 7 + tmp_x = reshape([199, 499, 1199, 1499], [2,2]) + tmp_y = reshape([299, 599, 1299, 1599], [2,2]) + tmp_z = reshape([399, 699, 1399, 1699], [2,2]) + do j = 1, 2 + do i = 1, 2 + if (y%D(i,j)%x /= tmp_x(i,j)) stop 8 + if (y%D(i,j)%y /= tmp_y(i,j)) stop 8 + if (y%D(i,j)%z /= tmp_z(i,j)) stop 8 + end do + end do + + y%A = x%A + y%B(:) = x%B + y%C%x = x%C%x + y%C%y = x%C%y + y%C%z = x%C%z + do j = 1, 2 + do i = 1, 2 + y%D(i,j)%x = x%D(i,j)%x + y%D(i,j)%y = x%D(i,j)%y + y%D(i,j)%z = x%D(i,j)%z + end do + end do + end +end diff --git a/libgomp/testsuite/libgomp.fortran/map-alloc-comp-7.f90 b/libgomp/testsuite/libgomp.fortran/map-alloc-comp-7.f90 new file mode 100644 index 0000000..1493c5f --- /dev/null +++ b/libgomp/testsuite/libgomp.fortran/map-alloc-comp-7.f90 @@ -0,0 +1,672 @@ +module m + implicit none (type, external) + type t + integer, allocatable :: arr(:,:) + integer :: var + integer, allocatable :: slr + end type t + +contains + + subroutine check_it (is_present, dummy_alloced, inner_alloc, & + scalar, array, a_scalar, a_array, & + l_scalar, l_array, la_scalar, la_array, & + opt_scalar, opt_array, a_opt_scalar, a_opt_array) + type(t), intent(inout) :: & + scalar, array(:,:), opt_scalar, opt_array(:,:), a_scalar, a_array(:,:), & + a_opt_scalar, a_opt_array(:,:), & + l_scalar, l_array(:,:), la_scalar, la_array(:,:) + optional :: opt_scalar, opt_array, a_opt_scalar, a_opt_array + allocatable :: a_scalar, a_array, a_opt_scalar, a_opt_array, la_scalar, la_array + logical, value :: is_present, dummy_alloced, inner_alloc + integer :: i, j, k, l + + ! CHECK VALUE + if (scalar%var /= 42) stop 1 + if (l_scalar%var /= 42) stop 1 + if (is_present) then + if (opt_scalar%var /= 42) stop 2 + end if + if (any (shape(array) /= [3,2])) stop 1 + if (any (shape(l_array) /= [3,2])) stop 1 + if (is_present) then + if (any (shape(opt_array) /= [3,2])) stop 1 + end if + do j = 1, 2 + do i = 1, 3 + if (array(i,j)%var /= i*97 + 100*41*j) stop 3 + if (l_array(i,j)%var /= i*97 + 100*41*j) stop 3 + if (is_present) then + if (opt_array(i,j)%var /= i*97 + 100*41*j) stop 4 + end if + end do + end do + + if (dummy_alloced) then + if (a_scalar%var /= 42) stop 1 + if (la_scalar%var /= 42) stop 1 + if (is_present) then + if (a_opt_scalar%var /= 42) stop 1 + end if + if (any (shape(a_array) /= [3,2])) stop 1 + if (any (shape(la_array) /= [3,2])) stop 1 + if (is_present) then + if (any (shape(a_opt_array) /= [3,2])) stop 1 + end if + do j = 1, 2 + do i = 1, 3 + if (a_array(i,j)%var /= i*97 + 100*41*j) stop 1 + if (la_array(i,j)%var /= i*97 + 100*41*j) stop 1 + if (is_present) then + if (a_opt_array(i,j)%var /= i*97 + 100*41*j) stop 1 + end if + end do + end do + else + if (allocated (a_scalar)) stop 1 + if (allocated (la_scalar)) stop 1 + if (allocated (a_array)) stop 1 + if (allocated (la_array)) stop 1 + if (is_present) then + if (allocated (a_opt_scalar)) stop 1 + if (allocated (a_opt_array)) stop 1 + end if + end if + + if (inner_alloc) then + if (scalar%slr /= 467) stop 5 + if (l_scalar%slr /= 467) stop 5 + if (a_scalar%slr /= 467) stop 6 + if (la_scalar%slr /= 467) stop 6 + if (is_present) then + if (opt_scalar%slr /= 467) stop 7 + if (a_opt_scalar%slr /= 467) stop 8 + end if + do j = 1, 2 + do i = 1, 3 + if (array(i,j)%slr /= (i*97 + 100*41*j) + 467) stop 9 + if (l_array(i,j)%slr /= (i*97 + 100*41*j) + 467) stop 9 + if (a_array(i,j)%slr /= (i*97 + 100*41*j) + 467) stop 10 + if (la_array(i,j)%slr /= (i*97 + 100*41*j) + 467) stop 10 + if (is_present) then + if (opt_array(i,j)%slr /= (i*97 + 100*41*j) + 467) stop 11 + if (a_opt_array(i,j)%slr /= (i*97 + 100*41*j) + 467) stop 12 + end if + end do + end do + + do l = 1, 5 + do k = 1, 4 + if (any (shape(scalar%arr) /= [4,5])) stop 1 + if (any (shape(l_scalar%arr) /= [4,5])) stop 1 + if (any (shape(a_scalar%arr) /= [4,5])) stop 1 + if (any (shape(la_scalar%arr) /= [4,5])) stop 1 + if (scalar%arr(k,l) /= (i*27 + 1000*11*j) + 467) stop 13 + if (l_scalar%arr(k,l) /= (i*27 + 1000*11*j) + 467) stop 13 + if (a_scalar%arr(k,l) /= (i*27 + 1000*11*j) + 467) stop 14 + if (la_scalar%arr(k,l) /= (i*27 + 1000*11*j) + 467) stop 14 + if (is_present) then + if (any (shape(opt_scalar%arr) /= [4,5])) stop 1 + if (any (shape(a_opt_scalar%arr) /= [4,5])) stop 1 + if (opt_scalar%arr(k,l) /= (i*27 + 1000*11*j) + 467) stop 15 + if (a_opt_scalar%arr(k,l) /= (i*27 + 1000*11*j) + 467) stop 16 + end if + end do + end do + do j = 1, 2 + do i = 1, 3 + if (any (shape(array(i,j)%arr) /= [i,j])) stop 1 + if (any (shape(l_array(i,j)%arr) /= [i,j])) stop 1 + if (any (shape(a_array(i,j)%arr) /= [i,j])) stop 1 + if (any (shape(la_array(i,j)%arr) /= [i,j])) stop 1 + if (is_present) then + if (any (shape(opt_array(i,j)%arr) /= [i,j])) stop 1 + if (any (shape(a_opt_array(i,j)%arr) /= [i,j])) stop 1 + endif + do l = 1, j + do k = 1, i + if (array(i,j)%arr(k,l) /= i*27 + 1000*11*j + 467 + 3*k +53*l) stop 17 + if (l_array(i,j)%arr(k,l) /= i*27 + 1000*11*j + 467 + 3*k +53*l) stop 17 + if (a_array(i,j)%arr(k,l) /= i*27 + 1000*11*j + 467 + 3*k +53*l) stop 18 + if (la_array(i,j)%arr(k,l) /= i*27 + 1000*11*j + 467 + 3*k +53*l) stop 18 + if (is_present) then + if (opt_array(i,j)%arr(k,l) /= i*27 + 1000*11*j + 467 + 3*k +53*l) stop 19 + if (a_opt_array(i,j)%arr(k,l) /= i*27 + 1000*11*j + 467 + 3*k +53*l) stop 20 + end if + end do + end do + end do + end do + else if (dummy_alloced) then + if (allocated (scalar%slr)) stop 1 + if (allocated (l_scalar%slr)) stop 1 + if (allocated (a_scalar%slr)) stop 1 + if (allocated (la_scalar%slr)) stop 1 + if (is_present) then + if (allocated (opt_scalar%slr)) stop 1 + if (allocated (a_opt_scalar%slr)) stop 1 + endif + if (allocated (scalar%arr)) stop 1 + if (allocated (l_scalar%arr)) stop 1 + if (allocated (a_scalar%arr)) stop 1 + if (allocated (la_scalar%arr)) stop 1 + if (is_present) then + if (allocated (opt_scalar%arr)) stop 1 + if (allocated (a_opt_scalar%arr)) stop 1 + endif + end if + + ! SET VALUE + scalar%var = 42 + 13 + l_scalar%var = 42 + 13 + if (is_present) then + opt_scalar%var = 42 + 13 + endif + do j = 1, 2 + do i = 1, 3 + array(i,j)%var = i*97 + 100*41*j + 13 + l_array(i,j)%var = i*97 + 100*41*j + 13 + if (is_present) then + opt_array(i,j)%var = i*97 + 100*41*j + 13 + end if + end do + end do + + if (dummy_alloced) then + a_scalar%var = 42 + 13 + la_scalar%var = 42 + 13 + if (is_present) then + a_opt_scalar%var = 42 + 13 + endif + do j = 1, 2 + do i = 1, 3 + a_array(i,j)%var = i*97 + 100*41*j + 13 + la_array(i,j)%var = i*97 + 100*41*j + 13 + if (is_present) then + a_opt_array(i,j)%var = i*97 + 100*41*j + 13 + endif + end do + end do + end if + + if (inner_alloc) then + scalar%slr = 467 + 13 + l_scalar%slr = 467 + 13 + a_scalar%slr = 467 + 13 + la_scalar%slr = 467 + 13 + if (is_present) then + opt_scalar%slr = 467 + 13 + a_opt_scalar%slr = 467 + 13 + end if + do j = 1, 2 + do i = 1, 3 + array(i,j)%slr = (i*97 + 100*41*j) + 467 + 13 + l_array(i,j)%slr = (i*97 + 100*41*j) + 467 + 13 + a_array(i,j)%slr = (i*97 + 100*41*j) + 467 + 13 + la_array(i,j)%slr = (i*97 + 100*41*j) + 467 + 13 + if (is_present) then + opt_array(i,j)%slr = (i*97 + 100*41*j) + 467 + 13 + a_opt_array(i,j)%slr = (i*97 + 100*41*j) + 467 + 13 + end if + end do + end do + + do l = 1, 5 + do k = 1, 4 + scalar%arr(k,l) = (i*27 + 1000*11*j) + 467 + 13 + l_scalar%arr(k,l) = (i*27 + 1000*11*j) + 467 + 13 + a_scalar%arr(k,l) = (i*27 + 1000*11*j) + 467 + 13 + la_scalar%arr(k,l) = (i*27 + 1000*11*j) + 467 + 13 + if (is_present) then + opt_scalar%arr(k,l) = (i*27 + 1000*11*j) + 467 + 13 + a_opt_scalar%arr(k,l) = (i*27 + 1000*11*j) + 467 + 13 + end if + end do + end do + do j = 1, 2 + do i = 1, 3 + do l = 1, j + do k = 1, i + array(i,j)%arr(k,l) = i*27 + 1000*11*j + 467 + 3*k +53*l + 13 + l_array(i,j)%arr(k,l) = i*27 + 1000*11*j + 467 + 3*k +53*l + 13 + a_array(i,j)%arr(k,l) = i*27 + 1000*11*j + 467 + 3*k +53*l + 13 + la_array(i,j)%arr(k,l) = i*27 + 1000*11*j + 467 + 3*k +53*l + 13 + if (is_present) then + opt_array(i,j)%arr(k,l) = i*27 + 1000*11*j + 467 + 3*k +53*l + 13 + a_opt_array(i,j)%arr(k,l) = i*27 + 1000*11*j + 467 + 3*k +53*l + 13 + end if + end do + end do + end do + end do + end if + + end subroutine + subroutine check_reset (is_present, dummy_alloced, inner_alloc, & + scalar, array, a_scalar, a_array, & + l_scalar, l_array, la_scalar, la_array, & + opt_scalar, opt_array, a_opt_scalar, a_opt_array) + type(t), intent(inout) :: & + scalar, array(:,:), opt_scalar, opt_array(:,:), a_scalar, a_array(:,:), & + a_opt_scalar, a_opt_array(:,:), & + l_scalar, l_array(:,:), la_scalar, la_array(:,:) + optional :: opt_scalar, opt_array, a_opt_scalar, a_opt_array + allocatable :: a_scalar, a_array, a_opt_scalar, a_opt_array, la_scalar, la_array + logical, value :: is_present, dummy_alloced, inner_alloc + integer :: i, j, k, l + + ! CHECK VALUE + if (scalar%var /= 42 + 13) stop 1 + if (l_scalar%var /= 42 + 13) stop 1 + if (is_present) then + if (opt_scalar%var /= 42 + 13) stop 2 + end if + if (any (shape(array) /= [3,2])) stop 1 + if (any (shape(l_array) /= [3,2])) stop 1 + if (is_present) then + if (any (shape(opt_array) /= [3,2])) stop 1 + end if + do j = 1, 2 + do i = 1, 3 + if (array(i,j)%var /= i*97 + 100*41*j + 13) stop 3 + if (l_array(i,j)%var /= i*97 + 100*41*j + 13) stop 3 + if (is_present) then + if (opt_array(i,j)%var /= i*97 + 100*41*j + 13) stop 4 + end if + end do + end do + + if (dummy_alloced) then + if (a_scalar%var /= 42 + 13) stop 1 + if (la_scalar%var /= 42 + 13) stop 1 + if (is_present) then + if (a_opt_scalar%var /= 42 + 13) stop 1 + end if + if (any (shape(a_array) /= [3,2])) stop 1 + if (any (shape(la_array) /= [3,2])) stop 1 + if (is_present) then + if (any (shape(a_opt_array) /= [3,2])) stop 1 + end if + do j = 1, 2 + do i = 1, 3 + if (a_array(i,j)%var /= i*97 + 100*41*j + 13) stop 1 + if (la_array(i,j)%var /= i*97 + 100*41*j + 13) stop 1 + if (is_present) then + if (a_opt_array(i,j)%var /= i*97 + 100*41*j + 13) stop 1 + end if + end do + end do + else + if (allocated (a_scalar)) stop 1 + if (allocated (la_scalar)) stop 1 + if (allocated (a_array)) stop 1 + if (allocated (la_array)) stop 1 + if (is_present) then + if (allocated (a_opt_scalar)) stop 1 + if (allocated (a_opt_array)) stop 1 + end if + end if + + if (inner_alloc) then + if (scalar%slr /= 467 + 13) stop 5 + if (l_scalar%slr /= 467 + 13) stop 5 + if (a_scalar%slr /= 467 + 13) stop 6 + if (la_scalar%slr /= 467 + 13) stop 6 + if (is_present) then + if (opt_scalar%slr /= 467 + 13) stop 7 + if (a_opt_scalar%slr /= 467 + 13) stop 8 + end if + do j = 1, 2 + do i = 1, 3 + if (array(i,j)%slr /= (i*97 + 100*41*j) + 467 + 13) stop 9 + if (l_array(i,j)%slr /= (i*97 + 100*41*j) + 467 + 13) stop 9 + if (a_array(i,j)%slr /= (i*97 + 100*41*j) + 467 + 13) stop 10 + if (la_array(i,j)%slr /= (i*97 + 100*41*j) + 467 + 13) stop 10 + if (is_present) then + if (opt_array(i,j)%slr /= (i*97 + 100*41*j) + 467 + 13) stop 11 + if (a_opt_array(i,j)%slr /= (i*97 + 100*41*j) + 467 + 13) stop 12 + end if + end do + end do + + do l = 1, 5 + do k = 1, 4 + if (any (shape(scalar%arr) /= [4,5])) stop 1 + if (any (shape(l_scalar%arr) /= [4,5])) stop 1 + if (any (shape(a_scalar%arr) /= [4,5])) stop 1 + if (any (shape(la_scalar%arr) /= [4,5])) stop 1 + if (scalar%arr(k,l) /= (i*27 + 1000*11*j) + 467 + 13) stop 13 + if (l_scalar%arr(k,l) /= (i*27 + 1000*11*j) + 467 + 13) stop 13 + if (a_scalar%arr(k,l) /= (i*27 + 1000*11*j) + 467 + 13) stop 14 + if (la_scalar%arr(k,l) /= (i*27 + 1000*11*j) + 467 + 13) stop 14 + if (is_present) then + if (any (shape(opt_scalar%arr) /= [4,5])) stop 1 + if (any (shape(a_opt_scalar%arr) /= [4,5])) stop 1 + if (opt_scalar%arr(k,l) /= (i*27 + 1000*11*j) + 467 + 13) stop 15 + if (a_opt_scalar%arr(k,l) /= (i*27 + 1000*11*j) + 467 + 13) stop 16 + end if + end do + end do + do j = 1, 2 + do i = 1, 3 + if (any (shape(array(i,j)%arr) /= [i,j])) stop 1 + if (any (shape(l_array(i,j)%arr) /= [i,j])) stop 1 + if (any (shape(a_array(i,j)%arr) /= [i,j])) stop 1 + if (any (shape(la_array(i,j)%arr) /= [i,j])) stop 1 + if (is_present) then + if (any (shape(opt_array(i,j)%arr) /= [i,j])) stop 1 + if (any (shape(a_opt_array(i,j)%arr) /= [i,j])) stop 1 + endif + do l = 1, j + do k = 1, i + if (array(i,j)%arr(k,l) /= i*27 + 1000*11*j + 467 + 3*k +53*l + 13) stop 17 + if (l_array(i,j)%arr(k,l) /= i*27 + 1000*11*j + 467 + 3*k +53*l + 13) stop 17 + if (a_array(i,j)%arr(k,l) /= i*27 + 1000*11*j + 467 + 3*k +53*l + 13) stop 18 + if (la_array(i,j)%arr(k,l) /= i*27 + 1000*11*j + 467 + 3*k +53*l + 13) stop 18 + if (is_present) then + if (opt_array(i,j)%arr(k,l) /= i*27 + 1000*11*j + 467 + 3*k +53*l + 13) stop 19 + if (a_opt_array(i,j)%arr(k,l) /= i*27 + 1000*11*j + 467 + 3*k +53*l + 13) stop 20 + end if + end do + end do + end do + end do + else if (dummy_alloced) then + if (allocated (scalar%slr)) stop 1 + if (allocated (l_scalar%slr)) stop 1 + if (allocated (a_scalar%slr)) stop 1 + if (allocated (la_scalar%slr)) stop 1 + if (is_present) then + if (allocated (opt_scalar%slr)) stop 1 + if (allocated (a_opt_scalar%slr)) stop 1 + endif + if (allocated (scalar%arr)) stop 1 + if (allocated (l_scalar%arr)) stop 1 + if (allocated (a_scalar%arr)) stop 1 + if (allocated (la_scalar%arr)) stop 1 + if (is_present) then + if (allocated (opt_scalar%arr)) stop 1 + if (allocated (a_opt_scalar%arr)) stop 1 + endif + end if + + ! (RE)SET VALUE + scalar%var = 42 + l_scalar%var = 42 + if (is_present) then + opt_scalar%var = 42 + endif + do j = 1, 2 + do i = 1, 3 + array(i,j)%var = i*97 + 100*41*j + l_array(i,j)%var = i*97 + 100*41*j + if (is_present) then + opt_array(i,j)%var = i*97 + 100*41*j + end if + end do + end do + + if (dummy_alloced) then + a_scalar%var = 42 + la_scalar%var = 42 + if (is_present) then + a_opt_scalar%var = 42 + endif + do j = 1, 2 + do i = 1, 3 + a_array(i,j)%var = i*97 + 100*41*j + la_array(i,j)%var = i*97 + 100*41*j + if (is_present) then + a_opt_array(i,j)%var = i*97 + 100*41*j + endif + end do + end do + end if + + if (inner_alloc) then + scalar%slr = 467 + l_scalar%slr = 467 + a_scalar%slr = 467 + la_scalar%slr = 467 + if (is_present) then + opt_scalar%slr = 467 + a_opt_scalar%slr = 467 + end if + do j = 1, 2 + do i = 1, 3 + array(i,j)%slr = (i*97 + 100*41*j) + 467 + l_array(i,j)%slr = (i*97 + 100*41*j) + 467 + a_array(i,j)%slr = (i*97 + 100*41*j) + 467 + la_array(i,j)%slr = (i*97 + 100*41*j) + 467 + if (is_present) then + opt_array(i,j)%slr = (i*97 + 100*41*j) + 467 + a_opt_array(i,j)%slr = (i*97 + 100*41*j) + 467 + end if + end do + end do + + do l = 1, 5 + do k = 1, 4 + scalar%arr(k,l) = (i*27 + 1000*11*j) + 467 + l_scalar%arr(k,l) = (i*27 + 1000*11*j) + 467 + a_scalar%arr(k,l) = (i*27 + 1000*11*j) + 467 + la_scalar%arr(k,l) = (i*27 + 1000*11*j) + 467 + if (is_present) then + opt_scalar%arr(k,l) = (i*27 + 1000*11*j) + 467 + a_opt_scalar%arr(k,l) = (i*27 + 1000*11*j) + 467 + end if + end do + end do + do j = 1, 2 + do i = 1, 3 + do l = 1, j + do k = 1, i + array(i,j)%arr(k,l) = i*27 + 1000*11*j + 467 + 3*k +53*l + l_array(i,j)%arr(k,l) = i*27 + 1000*11*j + 467 + 3*k +53*l + a_array(i,j)%arr(k,l) = i*27 + 1000*11*j + 467 + 3*k +53*l + la_array(i,j)%arr(k,l) = i*27 + 1000*11*j + 467 + 3*k +53*l + if (is_present) then + opt_array(i,j)%arr(k,l) = i*27 + 1000*11*j + 467 + 3*k +53*l + a_opt_array(i,j)%arr(k,l) = i*27 + 1000*11*j + 467 + 3*k +53*l + end if + end do + end do + end do + end do + end if + end subroutine + + subroutine test(scalar, array, a_scalar, a_array, opt_scalar, opt_array, & + a_opt_scalar, a_opt_array) + type(t) :: scalar, array(:,:), opt_scalar, opt_array(:,:), a_scalar, a_array(:,:) + type(t) :: a_opt_scalar, a_opt_array(:,:) + type(t) :: l_scalar, l_array(3,2), la_scalar, la_array(:,:) + allocatable :: a_scalar, a_array, a_opt_scalar, a_opt_array, la_scalar, la_array + optional :: opt_scalar, opt_array, a_opt_scalar, a_opt_array + + integer :: i, j, k, l + logical :: is_present, dummy_alloced, local_alloced, inner_alloc + is_present = present(opt_scalar) + dummy_alloced = allocated(a_scalar) + inner_alloc = allocated(scalar%slr) + + l_scalar%var = 42 + do j = 1, 2 + do i = 1, 3 + l_array(i,j)%var = i*97 + 100*41*j + end do + end do + + if (dummy_alloced) then + allocate(la_scalar, la_array(3,2)) + a_scalar%var = 42 + la_scalar%var = 42 + do j = 1, 2 + do i = 1, 3 + l_array(i,j)%var = i*97 + 100*41*j + la_array(i,j)%var = i*97 + 100*41*j + end do + end do + end if + + if (inner_alloc) then + l_scalar%slr = 467 + la_scalar%slr = 467 + do j = 1, 2 + do i = 1, 3 + l_array(i,j)%slr = (i*97 + 100*41*j) + 467 + la_array(i,j)%slr = (i*97 + 100*41*j) + 467 + end do + end do + + allocate(l_scalar%arr(4,5), la_scalar%arr(4,5)) + do l = 1, 5 + do k = 1, 4 + l_scalar%arr(k,l) = (i*27 + 1000*11*j) + 467 + la_scalar%arr(k,l) = (i*27 + 1000*11*j) + 467 + end do + end do + do j = 1, 2 + do i = 1, 3 + allocate(l_array(i,j)%arr(i,j), la_array(i,j)%arr(i,j)) + do l = 1, j + do k = 1, i + l_array(i,j)%arr(k,l) = i*27 + 1000*11*j + 467 + 3*k +53*l + la_array(i,j)%arr(k,l) = i*27 + 1000*11*j + 467 + 3*k +53*l + end do + end do + end do + end do + end if + + ! implicit mapping + !$omp target + if (is_present) then + call check_it (is_present, dummy_alloced, inner_alloc, & + scalar, array, a_scalar, a_array, & + l_scalar, l_array, la_scalar, la_array, & + opt_scalar, opt_array, a_opt_scalar, a_opt_array) + else + call check_it (is_present, dummy_alloced, inner_alloc, & + scalar, array, a_scalar, a_array, & + l_scalar, l_array, la_scalar, la_array) + end if + !$omp end target + + if (is_present) then + call check_reset (is_present, dummy_alloced, inner_alloc, & + scalar, array, a_scalar, a_array, & + l_scalar, l_array, la_scalar, la_array, & + opt_scalar, opt_array, a_opt_scalar, a_opt_array) + else + call check_reset (is_present, dummy_alloced, inner_alloc, & + scalar, array, a_scalar, a_array, & + l_scalar, l_array, la_scalar, la_array) + endif + + ! explicit mapping + !$omp target map(scalar, array, opt_scalar, opt_array, a_scalar, a_array) & + !$omp& map(a_opt_scalar, a_opt_array) & + !$omp& map(l_scalar, l_array, la_scalar, la_array) + if (is_present) then + call check_it (is_present, dummy_alloced, inner_alloc, & + scalar, array, a_scalar, a_array, & + l_scalar, l_array, la_scalar, la_array, & + opt_scalar, opt_array, a_opt_scalar, a_opt_array) + else + call check_it (is_present, dummy_alloced, inner_alloc, & + scalar, array, a_scalar, a_array, & + l_scalar, l_array, la_scalar, la_array) + endif + !$omp end target + + if (is_present) then + call check_reset (is_present, dummy_alloced, inner_alloc, & + scalar, array, a_scalar, a_array, & + l_scalar, l_array, la_scalar, la_array, & + opt_scalar, opt_array, a_opt_scalar, a_opt_array) + else + call check_reset (is_present, dummy_alloced, inner_alloc, & + scalar, array, a_scalar, a_array, & + l_scalar, l_array, la_scalar, la_array) + endif + end subroutine +end module + +program main + use m + implicit none (type, external) + type(t) :: scalar, array(3,2), opt_scalar, opt_array(3,2), a_scalar, a_array(:,:) + type(t) :: a_opt_scalar, a_opt_array(:,:) + allocatable :: a_scalar, a_array, a_opt_scalar, a_opt_array + integer :: i, j, k, l, n + + scalar%var = 42 + opt_scalar%var = 42 + do j = 1, 2 + do i = 1, 3 + array(i,j)%var = i*97 + 100*41*j + opt_array(i,j)%var = i*97 + 100*41*j + end do + end do + + ! unallocated + call test (scalar, array, a_scalar, a_array) + call test (scalar, array, a_scalar, a_array, opt_scalar, opt_array, a_opt_scalar, a_opt_array) + + ! allocated + allocate(a_scalar, a_opt_scalar, a_array(3,2), a_opt_array(3,2)) + a_scalar%var = 42 + a_opt_scalar%var = 42 + do j = 1, 2 + do i = 1, 3 + a_array(i,j)%var = i*97 + 100*41*j + a_opt_array(i,j)%var = i*97 + 100*41*j + end do + end do + + call test (scalar, array, a_scalar, a_array) + call test (scalar, array, a_scalar, a_array, opt_scalar, opt_array, a_opt_scalar, a_opt_array) + + ! comps allocated + scalar%slr = 467 + a_scalar%slr = 467 + opt_scalar%slr = 467 + a_opt_scalar%slr = 467 + do j = 1, 2 + do i = 1, 3 + array(i,j)%slr = (i*97 + 100*41*j) + 467 + a_array(i,j)%slr = (i*97 + 100*41*j) + 467 + opt_array(i,j)%slr = (i*97 + 100*41*j) + 467 + a_opt_array(i,j)%slr = (i*97 + 100*41*j) + 467 + end do + end do + + allocate(scalar%arr(4,5), a_scalar%arr(4,5), opt_scalar%arr(4,5), a_opt_scalar%arr(4,5)) + do l = 1, 5 + do k = 1, 4 + scalar%arr(k,l) = (i*27 + 1000*11*j) + 467 + a_scalar%arr(k,l) = (i*27 + 1000*11*j) + 467 + opt_scalar%arr(k,l) = (i*27 + 1000*11*j) + 467 + a_opt_scalar%arr(k,l) = (i*27 + 1000*11*j) + 467 + end do + end do + do j = 1, 2 + do i = 1, 3 + allocate(array(i,j)%arr(i,j), a_array(i,j)%arr(i,j), opt_array(i,j)%arr(i,j), a_opt_array(i,j)%arr(i,j)) + do l = 1, j + do k = 1, i + array(i,j)%arr(k,l) = i*27 + 1000*11*j + 467 + 3*k +53*l + a_array(i,j)%arr(k,l) = i*27 + 1000*11*j + 467 + 3*k +53*l + opt_array(i,j)%arr(k,l) = i*27 + 1000*11*j + 467 + 3*k +53*l + a_opt_array(i,j)%arr(k,l) = i*27 + 1000*11*j + 467 + 3*k +53*l + end do + end do + end do + end do + + call test (scalar, array, a_scalar, a_array) + call test (scalar, array, a_scalar, a_array, opt_scalar, opt_array, a_opt_scalar, a_opt_array) + + deallocate(a_scalar, a_opt_scalar, a_array, a_opt_array) +end diff --git a/libgomp/testsuite/libgomp.fortran/map-alloc-comp-8.f90 b/libgomp/testsuite/libgomp.fortran/map-alloc-comp-8.f90 new file mode 100644 index 0000000..f5a286e --- /dev/null +++ b/libgomp/testsuite/libgomp.fortran/map-alloc-comp-8.f90 @@ -0,0 +1,268 @@ +module m + implicit none (type, external) + type t + integer, allocatable :: A(:) + end type t + type t2 + type(t), allocatable :: vT + integer, allocatable :: x + end type t2 + +contains + + subroutine test_alloc() + type(t) :: var + type(t), allocatable :: var2 + + allocate(var2) + allocate(var%A(4), var2%A(5)) + + !$omp target enter data map(alloc: var, var2) + !$omp target + if (.not. allocated(Var2)) stop 1 + if (.not. allocated(Var%A)) stop 2 + if (.not. allocated(Var2%A)) stop 3 + if (lbound(var%A, 1) /= 1 .or. ubound(var%A, 1) /= 4) stop 4 + if (lbound(var2%A, 1) /= 1 .or. ubound(var2%A, 1) /= 5) stop 5 + var%A = [1,2,3,4] + var2%A = [11,22,33,44,55] + !$omp end target + !$omp target exit data map(from: var, var2) + + if (.not. allocated(Var2)) error stop + if (.not. allocated(Var%A)) error stop + if (.not. allocated(Var2%A)) error stop + if (lbound(var%A, 1) /= 1 .or. ubound(var%A, 1) /= 4) error stop + if (lbound(var2%A, 1) /= 1 .or. ubound(var2%A, 1) /= 5) error stop + if (any(var%A /= [1,2,3,4])) error stop + if (any(var2%A /= [11,22,33,44,55])) error stop + end subroutine test_alloc + + subroutine test2_alloc() + type(t2) :: var + type(t2), allocatable :: var2 + + allocate(var2) + allocate(var%x, var2%x) + + !$omp target enter data map(alloc: var, var2) + !$omp target + if (.not. allocated(Var2)) stop 6 + if (.not. allocated(Var%x)) stop 7 + if (.not. allocated(Var2%x)) stop 8 + var%x = 42 + var2%x = 43 + !$omp end target + !$omp target exit data map(from: var, var2) + + if (.not. allocated(Var2)) error stop + if (.not. allocated(Var%x)) error stop + if (.not. allocated(Var2%x)) error stop + if (var%x /= 42) error stop + if (var2%x /= 43) error stop + + allocate(var%vt, var2%vt) + allocate(var%vt%A(-1:3), var2%vt%A(0:4)) + + !$omp target enter data map(alloc: var, var2) + !$omp target + if (.not. allocated(Var2)) stop 11 + if (.not. allocated(Var%x)) stop 12 + if (.not. allocated(Var2%x)) stop 13 + if (.not. allocated(Var%vt)) stop 14 + if (.not. allocated(Var2%vt)) stop 15 + if (.not. allocated(Var%vt%a)) stop 16 + if (.not. allocated(Var2%vt%a)) stop 17 + var%x = 42 + var2%x = 43 + if (lbound(var%vt%A, 1) /= -1 .or. ubound(var%vt%A, 1) /= 3) stop 4 + if (lbound(var2%vt%A, 1) /= 0 .or. ubound(var2%vt%A, 1) /= 4) stop 5 + var%vt%A = [1,2,3,4,5] + var2%vt%A = [11,22,33,44,55] + !$omp end target + !$omp target exit data map(from: var, var2) + + if (.not. allocated(Var2)) error stop + if (.not. allocated(Var%x)) error stop + if (.not. allocated(Var2%x)) error stop + if (.not. allocated(Var%vt)) error stop + if (.not. allocated(Var2%vt)) error stop + if (.not. allocated(Var%vt%a)) error stop + if (.not. allocated(Var2%vt%a)) error stop + if (var%x /= 42) error stop + if (var2%x /= 43) error stop + if (lbound(var%vt%A, 1) /= -1 .or. ubound(var%vt%A, 1) /= 3) error stop + if (lbound(var2%vt%A, 1) /= 0 .or. ubound(var2%vt%A, 1) /= 4) error stop + if (any(var%vt%A /= [1,2,3,4,5])) error stop + if (any(var2%vt%A /= [11,22,33,44,55])) error stop + end subroutine test2_alloc + + + subroutine test_alloc_target() + type(t) :: var + type(t), allocatable :: var2 + + allocate(var2) + allocate(var%A(4), var2%A(5)) + + !$omp target map(alloc: var, var2) + if (.not. allocated(Var2)) stop 1 + if (.not. allocated(Var%A)) stop 2 + if (.not. allocated(Var2%A)) stop 3 + if (lbound(var%A, 1) /= 1 .or. ubound(var%A, 1) /= 4) stop 4 + if (lbound(var2%A, 1) /= 1 .or. ubound(var2%A, 1) /= 5) stop 5 + var%A = [1,2,3,4] + var2%A = [11,22,33,44,55] + !$omp end target + + if (.not. allocated(Var2)) error stop + if (.not. allocated(Var%A)) error stop + if (.not. allocated(Var2%A)) error stop + if (lbound(var%A, 1) /= 1 .or. ubound(var%A, 1) /= 4) error stop + if (lbound(var2%A, 1) /= 1 .or. ubound(var2%A, 1) /= 5) error stop + end subroutine test_alloc_target + + subroutine test2_alloc_target() + type(t2) :: var + type(t2), allocatable :: var2 + + allocate(var2) + allocate(var%x, var2%x) + + !$omp target map(alloc: var, var2) + if (.not. allocated(Var2)) stop 6 + if (.not. allocated(Var%x)) stop 7 + if (.not. allocated(Var2%x)) stop 8 + var%x = 42 + var2%x = 43 + !$omp end target + + if (.not. allocated(Var2)) error stop + if (.not. allocated(Var%x)) error stop + if (.not. allocated(Var2%x)) error stop + + allocate(var%vt, var2%vt) + allocate(var%vt%A(-1:3), var2%vt%A(0:4)) + + !$omp target map(alloc: var, var2) + if (.not. allocated(Var2)) stop 11 + if (.not. allocated(Var%x)) stop 12 + if (.not. allocated(Var2%x)) stop 13 + if (.not. allocated(Var%vt)) stop 14 + if (.not. allocated(Var2%vt)) stop 15 + if (.not. allocated(Var%vt%a)) stop 16 + if (.not. allocated(Var2%vt%a)) stop 17 + var%x = 42 + var2%x = 43 + if (lbound(var%vt%A, 1) /= -1 .or. ubound(var%vt%A, 1) /= 3) stop 4 + if (lbound(var2%vt%A, 1) /= 0 .or. ubound(var2%vt%A, 1) /= 4) stop 5 + var%vt%A = [1,2,3,4,5] + var2%vt%A = [11,22,33,44,55] + !$omp end target + + if (.not. allocated(Var2)) error stop + if (.not. allocated(Var%x)) error stop + if (.not. allocated(Var2%x)) error stop + if (.not. allocated(Var%vt)) error stop + if (.not. allocated(Var2%vt)) error stop + if (.not. allocated(Var%vt%a)) error stop + if (.not. allocated(Var2%vt%a)) error stop + if (lbound(var%vt%A, 1) /= -1 .or. ubound(var%vt%A, 1) /= 3) error stop + if (lbound(var2%vt%A, 1) /= 0 .or. ubound(var2%vt%A, 1) /= 4) error stop + end subroutine test2_alloc_target + + + + subroutine test_from() + type(t) :: var + type(t), allocatable :: var2 + + allocate(var2) + allocate(var%A(4), var2%A(5)) + + !$omp target map(from: var, var2) + if (.not. allocated(Var2)) stop 1 + if (.not. allocated(Var%A)) stop 2 + if (.not. allocated(Var2%A)) stop 3 + if (lbound(var%A, 1) /= 1 .or. ubound(var%A, 1) /= 4) stop 4 + if (lbound(var2%A, 1) /= 1 .or. ubound(var2%A, 1) /= 5) stop 5 + var%A = [1,2,3,4] + var2%A = [11,22,33,44,55] + !$omp end target + + if (.not. allocated(Var2)) error stop + if (.not. allocated(Var%A)) error stop + if (.not. allocated(Var2%A)) error stop + if (lbound(var%A, 1) /= 1 .or. ubound(var%A, 1) /= 4) error stop + if (lbound(var2%A, 1) /= 1 .or. ubound(var2%A, 1) /= 5) error stop + if (any(var%A /= [1,2,3,4])) error stop + if (any(var2%A /= [11,22,33,44,55])) error stop + end subroutine test_from + + subroutine test2_from() + type(t2) :: var + type(t2), allocatable :: var2 + + allocate(var2) + allocate(var%x, var2%x) + + !$omp target map(from: var, var2) + if (.not. allocated(Var2)) stop 6 + if (.not. allocated(Var%x)) stop 7 + if (.not. allocated(Var2%x)) stop 8 + var%x = 42 + var2%x = 43 + !$omp end target + + if (.not. allocated(Var2)) error stop + if (.not. allocated(Var%x)) error stop + if (.not. allocated(Var2%x)) error stop + if (var%x /= 42) error stop + if (var2%x /= 43) error stop + + allocate(var%vt, var2%vt) + allocate(var%vt%A(-1:3), var2%vt%A(0:4)) + + !$omp target map(from: var, var2) + if (.not. allocated(Var2)) stop 11 + if (.not. allocated(Var%x)) stop 12 + if (.not. allocated(Var2%x)) stop 13 + if (.not. allocated(Var%vt)) stop 14 + if (.not. allocated(Var2%vt)) stop 15 + if (.not. allocated(Var%vt%a)) stop 16 + if (.not. allocated(Var2%vt%a)) stop 17 + var%x = 42 + var2%x = 43 + if (lbound(var%vt%A, 1) /= -1 .or. ubound(var%vt%A, 1) /= 3) stop 4 + if (lbound(var2%vt%A, 1) /= 0 .or. ubound(var2%vt%A, 1) /= 4) stop 5 + var%vt%A = [1,2,3,4,5] + var2%vt%A = [11,22,33,44,55] + !$omp end target + + if (.not. allocated(Var2)) error stop + if (.not. allocated(Var%x)) error stop + if (.not. allocated(Var2%x)) error stop + if (.not. allocated(Var%vt)) error stop + if (.not. allocated(Var2%vt)) error stop + if (.not. allocated(Var%vt%a)) error stop + if (.not. allocated(Var2%vt%a)) error stop + if (var%x /= 42) error stop + if (var2%x /= 43) error stop + if (lbound(var%vt%A, 1) /= -1 .or. ubound(var%vt%A, 1) /= 3) error stop + if (lbound(var2%vt%A, 1) /= 0 .or. ubound(var2%vt%A, 1) /= 4) error stop + if (any(var%vt%A /= [1,2,3,4,5])) error stop + if (any(var2%vt%A /= [11,22,33,44,55])) error stop + end subroutine test2_from + +end module m + +use m + implicit none (type, external) + call test_alloc + call test2_alloc + call test_alloc_target + call test2_alloc_target + + call test_from + call test2_from +end diff --git a/libgomp/testsuite/libgomp.fortran/map-alloc-comp-9.f90 b/libgomp/testsuite/libgomp.fortran/map-alloc-comp-9.f90 new file mode 100644 index 0000000..3cec392 --- /dev/null +++ b/libgomp/testsuite/libgomp.fortran/map-alloc-comp-9.f90 @@ -0,0 +1,559 @@ +! Ensure that polymorphic mapping is diagnosed as undefined behavior +! Ensure that static access to polymorphic variables works + +subroutine test(case) +implicit none(type, external) +type t + integer :: x(4) +end type t + +type ta + integer, allocatable :: x(:) +end type ta + +type t2 + class(t), allocatable :: x + class(t), allocatable :: x2(:) +end type t2 + +type t3 + type(t2) :: y + type(t2) :: y2(2) +end type t3 + +type t4 + type(t3), allocatable :: y + type(t3), allocatable :: y2(:) +end type t4 + +integer, value :: case + +logical :: is_shared_mem + +! Mangle stack addresses +integer, volatile :: case_var(100*case) + +type(t), allocatable :: var1 +type(ta), allocatable :: var1a +class(t), allocatable :: var2 +type(t2), allocatable :: var3 +type(t4), allocatable :: var4 + +case_var(100) = 0 +!print *, 'case', case + +var1 = t([1,2,3,4]) +var1a = ta([-1,-2,-3,-4,-5]) + +var2 = t([11,22,33,44]) + +allocate(t2 :: var3) +allocate(t :: var3%x) +allocate(t :: var3%x2(2)) +var3%x%x = [111,222,333,444] +var3%x2(1)%x = 2*[111,222,333,444] +var3%x2(2)%x = 3*[111,222,333,444] + +allocate(t4 :: var4) +allocate(t3 :: var4%y) +allocate(t3 :: var4%y2(2)) +allocate(t :: var4%y%y%x) +allocate(t :: var4%y%y%x2(2)) +allocate(t :: var4%y2(1)%y%x) +allocate(t :: var4%y2(1)%y%x2(2)) +allocate(t :: var4%y2(2)%y%x) +allocate(t :: var4%y2(2)%y%x2(2)) +var4%y%y%x%x = -1 * [1111,2222,3333,4444] +var4%y%y%x2(1)%x = -2 * [1111,2222,3333,4444] +var4%y%y%x2(2)%x = -3 * [1111,2222,3333,4444] +var4%y2(1)%y%x%x = -4 * [1111,2222,3333,4444] +var4%y2(1)%y%x2(1)%x = -5 * [1111,2222,3333,4444] +var4%y2(1)%y%x2(2)%x = -6 * [1111,2222,3333,4444] +var4%y2(2)%y%x%x = -7 * [1111,2222,3333,4444] +var4%y2(2)%y%x2(1)%x = -8 * [1111,2222,3333,4444] +var4%y2(2)%y%x2(2)%x = -9 * [1111,2222,3333,4444] + +is_shared_mem = .false. +!$omp target map(to: is_shared_mem) + is_shared_mem = .true. +!$omp end target + +if (case == 1) then + ! implicit mapping + !$omp target + if (any (var1%x /= [1,2,3,4])) stop 1 + var1%x = 2 * var1%x + !$omp end target + + !$omp target + if (any (var1a%x /= [-1,-2,-3,-4])) stop 2 + var1a%x = 3 * var1a%x + !$omp end target + + !$omp target ! { dg-warning "Mapping of polymorphic list item 'var2' is unspecified behavior \\\[-Wopenmp\\\]" } + if (any (var2%x /= [11,22,33,44])) stop 3 + var2%x = 4 * var2%x + !$omp end target + + !$omp target ! { dg-warning "Mapping of polymorphic list item 'var3->x' is unspecified behavior \\\[-Wopenmp\\\]" } + if (any (var3%x%x /= [111,222,333,444])) stop 4 + var3%x%x = 5 * var3%x%x + if (is_shared_mem) then ! For stride data, this accesses the host's _vtab + if (any (var3%x2(1)%x /= 2*[111,222,333,444])) stop 4 + if (any (var3%x2(2)%x /= 3*[111,222,333,444])) stop 4 + var3%x2(1)%x = 5 * var3%x2(1)%x + var3%x2(2)%x = 5 * var3%x2(2)%x + end if + !$omp end target + + !$omp target ! { dg-warning "Mapping of polymorphic list item 'var4\.\[0-9\]+->y->y\.x' is unspecified behavior \\\[-Wopenmp\\\]" } + if (any (var4%y%y%x%x /= -1 * [1111,2222,3333,4444])) stop 5 + if (is_shared_mem) then ! For stride data, this accesses the host's _vtab + if (any (var4%y%y%x2(1)%x /= -2 * [1111,2222,3333,4444])) stop 5 + if (any (var4%y%y%x2(2)%x /= -3 * [1111,2222,3333,4444])) stop 5 + endif + if (any (var4%y2(1)%y%x%x /= -4 * [1111,2222,3333,4444])) stop 5 + if (is_shared_mem) then ! For stride data, this accesses the host's _vtab + if (any (var4%y2(1)%y%x2(1)%x /= -5 * [1111,2222,3333,4444])) stop 5 + if (any (var4%y2(1)%y%x2(2)%x /= -6 * [1111,2222,3333,4444])) stop 5 + endif + if (any (var4%y2(2)%y%x%x /= -7 * [1111,2222,3333,4444])) stop 5 + if (is_shared_mem) then ! For stride data, this accesses the host's _vtab + if (any (var4%y2(2)%y%x2(1)%x /= -8 * [1111,2222,3333,4444])) stop 5 + if (any (var4%y2(2)%y%x2(2)%x /= -9 * [1111,2222,3333,4444])) stop 5 + end if + var4%y%y%x%x = 6 * var4%y%y%x%x + if (is_shared_mem) then ! For stride data, this accesses the host's _vtab + var4%y%y%x2(1)%x = 6 * var4%y%y%x2(1)%x + var4%y%y%x2(2)%x = 6 * var4%y%y%x2(2)%x + endif + var4%y2(1)%y%x%x = 6 * var4%y2(1)%y%x%x + if (is_shared_mem) then ! For stride data, this accesses the host's _vtab + var4%y2(1)%y%x2(1)%x = 6 * var4%y2(1)%y%x2(1)%x + var4%y2(1)%y%x2(2)%x = 6 * var4%y2(1)%y%x2(2)%x + endif + var4%y2(2)%y%x%x = 6 * var4%y2(2)%y%x%x + if (is_shared_mem) then ! For stride data, this accesses the host's _vtab + var4%y2(2)%y%x2(1)%x = 6 * var4%y2(2)%y%x2(1)%x + var4%y2(2)%y%x2(2)%x = 6 * var4%y2(2)%y%x2(2)%x + endif + !$omp end target + +else if (case == 2) then + ! Use target with defaultmap(TO) + + !$omp target defaultmap(to : all) + if (any (var1%x /= [1,2,3,4])) stop 1 + var1%x = 2 * var1%x + !$omp end target + + !$omp target defaultmap(to : all) + if (any (var1a%x /= [-1,-2,-3,-4])) stop 2 + var1a%x = 3 * var1a%x + !$omp end target + + !$omp target defaultmap(to : all) ! { dg-warning "Mapping of polymorphic list item 'var2' is unspecified behavior \\\[-Wopenmp\\\]" } + if (any (var2%x /= [11,22,33,44])) stop 3 + var2%x = 4 * var2%x + !$omp end target + + !$omp target defaultmap(to : all) ! { dg-warning "Mapping of polymorphic list item 'var3->x' is unspecified behavior \\\[-Wopenmp\\\]" } + if (any (var3%x%x /= [111,222,333,444])) stop 4 + var3%x%x = 5 * var3%x%x + if (is_shared_mem) then ! For stride data, this accesses the host's _vtab + if (any (var3%x2(1)%x /= 2*[111,222,333,444])) stop 4 + if (any (var3%x2(2)%x /= 3*[111,222,333,444])) stop 4 + var3%x2(1)%x = 5 * var3%x2(1)%x + var3%x2(2)%x = 5 * var3%x2(2)%x + endif + !$omp end target + + !$omp target defaultmap(to : all) firstprivate(is_shared_mem) ! { dg-warning "Mapping of polymorphic list item 'var4\.\[0-9\]+->y->y\.x' is unspecified behavior \\\[-Wopenmp\\\]" } + if (any (var4%y%y%x%x /= -1 * [1111,2222,3333,4444])) stop 5 + if (is_shared_mem) then ! For stride data, this accesses the host's _vtab + if (any (var4%y%y%x2(1)%x /= -2 * [1111,2222,3333,4444])) stop 5 + if (any (var4%y%y%x2(2)%x /= -3 * [1111,2222,3333,4444])) stop 5 + endif + if (any (var4%y2(1)%y%x%x /= -4 * [1111,2222,3333,4444])) stop 5 + if (is_shared_mem) then ! For stride data, this accesses the host's _vtab + if (any (var4%y2(1)%y%x2(1)%x /= -5 * [1111,2222,3333,4444])) stop 5 + if (any (var4%y2(1)%y%x2(2)%x /= -6 * [1111,2222,3333,4444])) stop 5 + endif + if (any (var4%y2(2)%y%x%x /= -7 * [1111,2222,3333,4444])) stop 5 + if (is_shared_mem) then ! For stride data, this accesses the host's _vtab + if (any (var4%y2(2)%y%x2(1)%x /= -8 * [1111,2222,3333,4444])) stop 5 + if (any (var4%y2(2)%y%x2(2)%x /= -9 * [1111,2222,3333,4444])) stop 5 + endif + var4%y%y%x%x = 6 * var4%y%y%x%x + if (is_shared_mem) then ! For stride data, this accesses the host's _vtab + var4%y%y%x2(1)%x = 6 * var4%y%y%x2(1)%x + var4%y%y%x2(2)%x = 6 * var4%y%y%x2(2)%x + endif + var4%y2(1)%y%x%x = 6 * var4%y2(1)%y%x%x + if (is_shared_mem) then ! For stride data, this accesses the host's _vtab + var4%y2(1)%y%x2(1)%x = 6 * var4%y2(1)%y%x2(1)%x + var4%y2(1)%y%x2(2)%x = 6 * var4%y2(1)%y%x2(2)%x + endif + var4%y2(2)%y%x%x = 6 * var4%y2(2)%y%x%x + if (is_shared_mem) then ! For stride data, this accesses the host's _vtab + var4%y2(2)%y%x2(1)%x = 6 * var4%y2(2)%y%x2(1)%x + var4%y2(2)%y%x2(2)%x = 6 * var4%y2(2)%y%x2(2)%x + endif + !$omp end target + +else if (case == 3) then + ! Use target with map clause + + !$omp target map(tofrom: var1) + if (any (var1%x /= [1,2,3,4])) stop 1 + var1%x = 2 * var1%x + !$omp end target + + !$omp target map(tofrom: var1a) + if (any (var1a%x /= [-1,-2,-3,-4])) stop 2 + var1a%x = 3 * var1a%x + !$omp end target + + !$omp target map(tofrom: var2) ! { dg-warning "28: Mapping of polymorphic list item 'var2' is unspecified behavior \\\[-Wopenmp\\\]" } + if (any (var2%x /= [11,22,33,44])) stop 3 + var2%x = 4 * var2%x + !$omp end target + + !$omp target map(tofrom: var3) ! { dg-warning "28: Mapping of polymorphic list item 'var3->x' is unspecified behavior \\\[-Wopenmp\\\]" } + if (any (var3%x%x /= [111,222,333,444])) stop 4 + var3%x%x = 5 * var3%x%x + if (is_shared_mem) then ! For stride data, this accesses the host's _vtab + if (any (var3%x2(1)%x /= 2*[111,222,333,444])) stop 4 + if (any (var3%x2(2)%x /= 3*[111,222,333,444])) stop 4 + var3%x2(1)%x = 5 * var3%x2(1)%x + var3%x2(2)%x = 5 * var3%x2(2)%x + endif + !$omp end target + + !$omp target map(tofrom: var4) ! { dg-warning "28: Mapping of polymorphic list item 'var4\.\[0-9\]+->y->y\.x' is unspecified behavior \\\[-Wopenmp\\\]" } + if (any (var4%y%y%x%x /= -1 * [1111,2222,3333,4444])) stop 5 + if (is_shared_mem) then ! For stride data, this accesses the host's _vtab + if (any (var4%y%y%x2(1)%x /= -2 * [1111,2222,3333,4444])) stop 5 + if (any (var4%y%y%x2(2)%x /= -3 * [1111,2222,3333,4444])) stop 5 + end if + if (any (var4%y2(1)%y%x%x /= -4 * [1111,2222,3333,4444])) stop 5 + if (is_shared_mem) then ! For stride data, this accesses the host's _vtab + if (any (var4%y2(1)%y%x2(1)%x /= -5 * [1111,2222,3333,4444])) stop 5 + if (any (var4%y2(1)%y%x2(2)%x /= -6 * [1111,2222,3333,4444])) stop 5 + endif + if (any (var4%y2(2)%y%x%x /= -7 * [1111,2222,3333,4444])) stop 5 + if (is_shared_mem) then ! For stride data, this accesses the host's _vtab + if (any (var4%y2(2)%y%x2(1)%x /= -8 * [1111,2222,3333,4444])) stop 5 + if (any (var4%y2(2)%y%x2(2)%x /= -9 * [1111,2222,3333,4444])) stop 5 + endif + var4%y%y%x%x = 6 * var4%y%y%x%x + if (is_shared_mem) then ! For stride data, this accesses the host's _vtab + var4%y%y%x2(1)%x = 6 * var4%y%y%x2(1)%x + var4%y%y%x2(2)%x = 6 * var4%y%y%x2(2)%x + endif + var4%y2(1)%y%x%x = 6 * var4%y2(1)%y%x%x + if (is_shared_mem) then ! For stride data, this accesses the host's _vtab + var4%y2(1)%y%x2(1)%x = 6 * var4%y2(1)%y%x2(1)%x + var4%y2(1)%y%x2(2)%x = 6 * var4%y2(1)%y%x2(2)%x + endif + var4%y2(2)%y%x%x = 6 * var4%y2(2)%y%x%x + if (is_shared_mem) then ! For stride data, this accesses the host's _vtab + var4%y2(2)%y%x2(1)%x = 6 * var4%y2(2)%y%x2(1)%x + var4%y2(2)%y%x2(2)%x = 6 * var4%y2(2)%y%x2(2)%x + endif + !$omp end target + +else if (case == 4) then + ! Use target with map clause -- NOTE: This uses TO not TOFROM + + !$omp target map(to: var1) + if (any (var1%x /= [1,2,3,4])) stop 1 + var1%x = 2 * var1%x + !$omp end target + + !$omp target map(to: var1a) + if (any (var1a%x /= [-1,-2,-3,-4])) stop 2 + var1a%x = 3 * var1a%x + !$omp end target + + !$omp target map(to: var2) ! { dg-warning "24: Mapping of polymorphic list item 'var2' is unspecified behavior \\\[-Wopenmp\\\]" } + if (any (var2%x /= [11,22,33,44])) stop 3 + var2%x = 4 * var2%x + !$omp end target + + !$omp target map(to: var3) ! { dg-warning "24: Mapping of polymorphic list item 'var3->x' is unspecified behavior \\\[-Wopenmp\\\]" } + if (any (var3%x%x /= [111,222,333,444])) stop 4 + var3%x%x = 5 * var3%x%x + if (is_shared_mem) then ! For stride data, this accesses the host's _vtab + if (any (var3%x2(1)%x /= 2*[111,222,333,444])) stop 4 + if (any (var3%x2(2)%x /= 3*[111,222,333,444])) stop 4 + var3%x2(1)%x = 5 * var3%x2(1)%x + var3%x2(2)%x = 5 * var3%x2(2)%x + endif + !$omp end target + + !$omp target map(to: var4) ! { dg-warning "24: Mapping of polymorphic list item 'var4\.\[0-9\]+->y->y\.x' is unspecified behavior \\\[-Wopenmp\\\]" } + if (any (var4%y%y%x%x /= -1 * [1111,2222,3333,4444])) stop 5 + if (is_shared_mem) then ! For stride data, this accesses the host's _vtab + if (any (var4%y%y%x2(1)%x /= -2 * [1111,2222,3333,4444])) stop 5 + if (any (var4%y%y%x2(2)%x /= -3 * [1111,2222,3333,4444])) stop 5 + endif + if (any (var4%y2(1)%y%x%x /= -4 * [1111,2222,3333,4444])) stop 5 + if (is_shared_mem) then ! For stride data, this accesses the host's _vtab + if (any (var4%y2(1)%y%x2(1)%x /= -5 * [1111,2222,3333,4444])) stop 5 + if (any (var4%y2(1)%y%x2(2)%x /= -6 * [1111,2222,3333,4444])) stop 5 + endif + if (any (var4%y2(2)%y%x%x /= -7 * [1111,2222,3333,4444])) stop 5 + if (is_shared_mem) then ! For stride data, this accesses the host's _vtab + if (any (var4%y2(2)%y%x2(1)%x /= -8 * [1111,2222,3333,4444])) stop 5 + if (any (var4%y2(2)%y%x2(2)%x /= -9 * [1111,2222,3333,4444])) stop 5 + endif + var4%y%y%x%x = 6 * var4%y%y%x%x + if (is_shared_mem) then ! For stride data, this accesses the host's _vtab + var4%y%y%x2(1)%x = 6 * var4%y%y%x2(1)%x + var4%y%y%x2(2)%x = 6 * var4%y%y%x2(2)%x + endif + var4%y2(1)%y%x%x = 6 * var4%y2(1)%y%x%x + if (is_shared_mem) then ! For stride data, this accesses the host's _vtab + var4%y2(1)%y%x2(1)%x = 6 * var4%y2(1)%y%x2(1)%x + var4%y2(1)%y%x2(2)%x = 6 * var4%y2(1)%y%x2(2)%x + endif + var4%y2(2)%y%x%x = 6 * var4%y2(2)%y%x%x + if (is_shared_mem) then ! For stride data, this accesses the host's _vtab + var4%y2(2)%y%x2(1)%x = 6 * var4%y2(2)%y%x2(1)%x + var4%y2(2)%y%x2(2)%x = 6 * var4%y2(2)%y%x2(2)%x + endif + !$omp end target + +else if (case == 5) then + ! Use target enter/exit data + target with explicit map + !$omp target enter data map(to: var1) + !$omp target enter data map(to: var1a) + !$omp target enter data map(to: var2) ! { dg-warning "35: Mapping of polymorphic list item 'var2' is unspecified behavior \\\[-Wopenmp\\\]" } + !$omp target enter data map(to: var3) ! { dg-warning "35: Mapping of polymorphic list item 'var3->x' is unspecified behavior \\\[-Wopenmp\\\]" } + !$omp target enter data map(to: var4) ! { dg-warning "35: Mapping of polymorphic list item 'var4\.\[0-9\]+->y->y\.x' is unspecified behavior \\\[-Wopenmp\\\]" } + + !$omp target map(to: var1) + if (any (var1%x /= [1,2,3,4])) stop 1 + var1%x = 2 * var1%x + !$omp end target + + !$omp target map(to: var1a) + if (any (var1a%x /= [-1,-2,-3,-4])) stop 2 + var1a%x = 3 * var1a%x + !$omp end target + + !$omp target map(to: var2) ! { dg-warning "24: Mapping of polymorphic list item 'var2' is unspecified behavior \\\[-Wopenmp\\\]" } + if (any (var2%x /= [11,22,33,44])) stop 3 + var2%x = 4 * var2%x + !$omp end target + + !$omp target map(to: var3) ! { dg-warning "24: Mapping of polymorphic list item 'var3->x' is unspecified behavior \\\[-Wopenmp\\\]" } + if (any (var3%x%x /= [111,222,333,444])) stop 4 + var3%x%x = 5 * var3%x%x + if (is_shared_mem) then ! For stride data, this accesses the host's _vtab + if (any (var3%x2(1)%x /= 2*[111,222,333,444])) stop 4 + if (any (var3%x2(2)%x /= 3*[111,222,333,444])) stop 4 + var3%x2(1)%x = 5 * var3%x2(1)%x + var3%x2(2)%x = 5 * var3%x2(2)%x + endif + !$omp end target + + !$omp target map(to: var4) ! { dg-warning "24: Mapping of polymorphic list item 'var4\.\[0-9\]+->y->y\.x' is unspecified behavior \\\[-Wopenmp\\\]" } + if (any (var4%y%y%x%x /= -1 * [1111,2222,3333,4444])) stop 5 + if (is_shared_mem) then ! For stride data, this accesses the host's _vtab + if (any (var4%y%y%x2(1)%x /= -2 * [1111,2222,3333,4444])) stop 5 + if (any (var4%y%y%x2(2)%x /= -3 * [1111,2222,3333,4444])) stop 5 + endif + if (any (var4%y2(1)%y%x%x /= -4 * [1111,2222,3333,4444])) stop 5 + if (is_shared_mem) then ! For stride data, this accesses the host's _vtab + if (any (var4%y2(1)%y%x2(1)%x /= -5 * [1111,2222,3333,4444])) stop 5 + if (any (var4%y2(1)%y%x2(2)%x /= -6 * [1111,2222,3333,4444])) stop 5 + endif + if (any (var4%y2(2)%y%x%x /= -7 * [1111,2222,3333,4444])) stop 5 + if (is_shared_mem) then ! For stride data, this accesses the host's _vtab + if (any (var4%y2(2)%y%x2(1)%x /= -8 * [1111,2222,3333,4444])) stop 5 + if (any (var4%y2(2)%y%x2(2)%x /= -9 * [1111,2222,3333,4444])) stop 5 + endif + var4%y%y%x%x = 6 * var4%y%y%x%x + if (is_shared_mem) then ! For stride data, this accesses the host's _vtab + var4%y%y%x2(1)%x = 6 * var4%y%y%x2(1)%x + var4%y%y%x2(2)%x = 6 * var4%y%y%x2(2)%x + endif + var4%y2(1)%y%x%x = 6 * var4%y2(1)%y%x%x + if (is_shared_mem) then ! For stride data, this accesses the host's _vtab + var4%y2(1)%y%x2(1)%x = 6 * var4%y2(1)%y%x2(1)%x + var4%y2(1)%y%x2(2)%x = 6 * var4%y2(1)%y%x2(2)%x + endif + var4%y2(2)%y%x%x = 6 * var4%y2(2)%y%x%x + if (is_shared_mem) then ! For stride data, this accesses the host's _vtab + var4%y2(2)%y%x2(1)%x = 6 * var4%y2(2)%y%x2(1)%x + var4%y2(2)%y%x2(2)%x = 6 * var4%y2(2)%y%x2(2)%x + endif + !$omp end target + + !$omp target exit data map(from: var1) + !$omp target exit data map(from: var1a) + !$omp target exit data map(from: var2) ! { dg-warning "36: Mapping of polymorphic list item 'var2' is unspecified behavior \\\[-Wopenmp\\\]" } + !$omp target exit data map(from: var3) ! { dg-warning "36: Mapping of polymorphic list item 'var3->x' is unspecified behavior \\\[-Wopenmp\\\]" } + !$omp target exit data map(from: var4) ! { dg-warning "36: Mapping of polymorphic list item 'var4\.\[0-9\]+->y->y\.x' is unspecified behavior \\\[-Wopenmp\\\]" } + +else if (case == 6) then + ! Use target enter/exit data + target with implicit map + + !$omp target enter data map(to: var1) + !$omp target enter data map(to: var1a) + !$omp target enter data map(to: var2) ! { dg-warning "35: Mapping of polymorphic list item 'var2' is unspecified behavior \\\[-Wopenmp\\\]" } + !$omp target enter data map(to: var3) ! { dg-warning "35: Mapping of polymorphic list item 'var3->x' is unspecified behavior \\\[-Wopenmp\\\]" } + !$omp target enter data map(to: var4) ! { dg-warning "35: Mapping of polymorphic list item 'var4\.\[0-9\]+->y->y\.x' is unspecified behavior \\\[-Wopenmp\\\]" } + + !$omp target + if (any (var1%x /= [1,2,3,4])) stop 1 + var1%x = 2 * var1%x + !$omp end target + + !$omp target + if (any (var1a%x /= [-1,-2,-3,-4])) stop 2 + var1a%x = 3 * var1a%x + !$omp end target + + !$omp target ! { dg-warning "Mapping of polymorphic list item 'var2' is unspecified behavior \\\[-Wopenmp\\\]" } + if (any (var2%x /= [11,22,33,44])) stop 3 + var2%x = 4 * var2%x + !$omp end target + + !$omp target ! { dg-warning "Mapping of polymorphic list item 'var3->x' is unspecified behavior \\\[-Wopenmp\\\]" } + if (any (var3%x%x /= [111,222,333,444])) stop 4 + var3%x%x = 5 * var3%x%x + if (is_shared_mem) then ! For stride data, this accesses the host's _vtab + if (any (var3%x2(1)%x /= 2*[111,222,333,444])) stop 4 + if (any (var3%x2(2)%x /= 3*[111,222,333,444])) stop 4 + var3%x2(1)%x = 5 * var3%x2(1)%x + var3%x2(2)%x = 5 * var3%x2(2)%x + endif + !$omp end target + + !$omp target ! { dg-warning "Mapping of polymorphic list item 'var4\.\[0-9\]+->y->y\.x' is unspecified behavior \\\[-Wopenmp\\\]" } + if (any (var4%y%y%x%x /= -1 * [1111,2222,3333,4444])) stop 5 + if (is_shared_mem) then ! For stride data, this accesses the host's _vtab + if (any (var4%y%y%x2(1)%x /= -2 * [1111,2222,3333,4444])) stop 5 + if (any (var4%y%y%x2(2)%x /= -3 * [1111,2222,3333,4444])) stop 5 + endif + if (any (var4%y2(1)%y%x%x /= -4 * [1111,2222,3333,4444])) stop 5 + if (is_shared_mem) then ! For stride data, this accesses the host's _vtab + if (any (var4%y2(1)%y%x2(1)%x /= -5 * [1111,2222,3333,4444])) stop 5 + if (any (var4%y2(1)%y%x2(2)%x /= -6 * [1111,2222,3333,4444])) stop 5 + endif + if (any (var4%y2(2)%y%x%x /= -7 * [1111,2222,3333,4444])) stop 5 + if (is_shared_mem) then ! For stride data, this accesses the host's _vtab + if (any (var4%y2(2)%y%x2(1)%x /= -8 * [1111,2222,3333,4444])) stop 5 + if (any (var4%y2(2)%y%x2(2)%x /= -9 * [1111,2222,3333,4444])) stop 5 + endif + var4%y%y%x%x = 6 * var4%y%y%x%x + if (is_shared_mem) then ! For stride data, this accesses the host's _vtab + var4%y%y%x2(1)%x = 6 * var4%y%y%x2(1)%x + var4%y%y%x2(2)%x = 6 * var4%y%y%x2(2)%x + endif + var4%y2(1)%y%x%x = 6 * var4%y2(1)%y%x%x + if (is_shared_mem) then ! For stride data, this accesses the host's _vtab + var4%y2(1)%y%x2(1)%x = 6 * var4%y2(1)%y%x2(1)%x + var4%y2(1)%y%x2(2)%x = 6 * var4%y2(1)%y%x2(2)%x + endif + var4%y2(2)%y%x%x = 6 * var4%y2(2)%y%x%x + if (is_shared_mem) then ! For stride data, this accesses the host's _vtab + var4%y2(2)%y%x2(1)%x = 6 * var4%y2(2)%y%x2(1)%x + var4%y2(2)%y%x2(2)%x = 6 * var4%y2(2)%y%x2(2)%x + endif + !$omp end target + + !$omp target exit data map(from: var1) + !$omp target exit data map(from: var1a) + !$omp target exit data map(from: var2) ! { dg-warning "36: Mapping of polymorphic list item 'var2' is unspecified behavior \\\[-Wopenmp\\\]" } + !$omp target exit data map(from: var3) ! { dg-warning "36: Mapping of polymorphic list item 'var3->x' is unspecified behavior \\\[-Wopenmp\\\]" } + !$omp target exit data map(from: var4) ! { dg-warning "36: Mapping of polymorphic list item 'var4\.\[0-9\]+->y->y\.x' is unspecified behavior \\\[-Wopenmp\\\]" } + +else + error stop +end if + +if ((case /= 2 .and. case /= 4) .or. is_shared_mem) then + ! The target update should have been active, check for the updated values + if (any (var1%x /= 2 * [1,2,3,4])) stop 11 + if (any (var1a%x /= 3 * [-1,-2,-3,-4])) stop 22 + if (any (var2%x /= 4 * [11,22,33,44])) stop 33 + + if (any (var3%x%x /= 5 * [111,222,333,444])) stop 44 + if (is_shared_mem) then ! For stride data, this accesses the host's _vtab + if (any (var3%x2(1)%x /= 2 * 5 * [111,222,333,444])) stop 44 + if (any (var3%x2(2)%x /= 3 * 5 * [111,222,333,444])) stop 44 + endif + + if (any (var4%y%y%x%x /= -1 * 6 * [1111,2222,3333,4444])) stop 55 + if (is_shared_mem) then ! For stride data, this accesses the host's _vtab + if (any (var4%y%y%x2(1)%x /= -2 * 6 * [1111,2222,3333,4444])) stop 55 + if (any (var4%y%y%x2(2)%x /= -3 * 6 * [1111,2222,3333,4444])) stop 55 + endif + if (any (var4%y2(1)%y%x%x /= -4 * 6 * [1111,2222,3333,4444])) stop 55 + if (is_shared_mem) then ! For stride data, this accesses the host's _vtab + if (any (var4%y2(1)%y%x2(1)%x /= -5 * 6 * [1111,2222,3333,4444])) stop 55 + if (any (var4%y2(1)%y%x2(2)%x /= -6 * 6 * [1111,2222,3333,4444])) stop 55 + endif + if (any (var4%y2(2)%y%x%x /= -7 * 6 * [1111,2222,3333,4444])) stop 55 + if (is_shared_mem) then ! For stride data, this accesses the host's _vtab + if (any (var4%y2(2)%y%x2(1)%x /= -8 * 6 * [1111,2222,3333,4444])) stop 55 + if (any (var4%y2(2)%y%x2(2)%x /= -9 * 6 * [1111,2222,3333,4444])) stop 55 + endif +else + ! The old host values should still be there as 'to:' created a device copy + if (any (var1%x /= [1,2,3,4])) stop 12 + if (any (var1a%x /= [-1,-2,-3,-4])) stop 22 + if (any (var2%x /= [11,22,33,44])) stop 33 + + if (any (var3%x%x /= [111,222,333,444])) stop 44 + ! .not. is_shared_mem: + ! if (any (var3%x2(1)%x /= 2*[111,222,333,444])) stop 44 + ! if (any (var3%x2(2)%x /= 3*[111,222,333,444])) stop 44 + + if (any (var4%y%y%x%x /= -1 * [1111,2222,3333,4444])) stop 55 + if (any (var4%y%y%x2(1)%x /= -2 * [1111,2222,3333,4444])) stop 55 + if (any (var4%y%y%x2(2)%x /= -3 * [1111,2222,3333,4444])) stop 55 + if (any (var4%y2(1)%y%x%x /= -4 * [1111,2222,3333,4444])) stop 55 + ! .not. is_shared_mem: + !if (any (var4%y2(1)%y%x2(1)%x /= -5 * [1111,2222,3333,4444])) stop 55 + !if (any (var4%y2(1)%y%x2(2)%x /= -6 * [1111,2222,3333,4444])) stop 55 + if (any (var4%y2(2)%y%x%x /= -7 * [1111,2222,3333,4444])) stop 55 + ! .not. is_shared_mem: + !if (any (var4%y2(2)%y%x2(1)%x /= -8 * [1111,2222,3333,4444])) stop 55 + !if (any (var4%y2(2)%y%x2(2)%x /= -9 * [1111,2222,3333,4444])) stop 55 +end if +if (case_var(100) /= 0) stop 123 +end subroutine test + +program main + use omp_lib + implicit none(type, external) + interface + subroutine test(case) + integer, value :: case + end + end interface + integer :: dev + call run_it(omp_get_default_device()) + do dev = 0, omp_get_num_devices() + call run_it(dev) + end do + call run_it(omp_initial_device) +! print *, 'all done' +contains +subroutine run_it(dev) + integer, value :: dev +! print *, 'DEVICE', dev + call omp_set_default_device(dev) + call test(1) + call test(2) + call test(3) + call test(4) + call test(5) + call test(6) +end +end diff --git a/libstdc++-v3/ChangeLog b/libstdc++-v3/ChangeLog index 2ec1e8f..2f128f2 100644 --- a/libstdc++-v3/ChangeLog +++ b/libstdc++-v3/ChangeLog @@ -1,3 +1,64 @@ +2025-04-15 Jonathan Wakely <jwakely@redhat.com> + + * include/std/ranges (__glibcxx_want_ranges_iota): Do not + define. + +2025-04-15 Jonathan Wakely <jwakely@redhat.com> + + * include/std/numeric (ranges): Only declare namespace for C++23 + and later. + (ranges::iota_result): Fix indentation. + * testsuite/17_intro/names.cc: Check ranges is not used as an + identifier before C++20. + +2025-04-15 Tomasz Kamiński <tkaminsk@redhat.com> + + PR libstdc++/109162 + * include/std/format (__format::__has_debug_format, _Pres_type::_Pres_seq) + (_Pres_type::_Pres_str, __format::__Stackbuf_size): Define. + (_Separators::_S_squares, _Separators::_S_parens, _Separators::_S_comma) + (_Separators::_S_colon): Define additional constants. + (_Spec::_M_parse_fill_and_align): Define overload accepting + list of excluded characters for fill, and forward existing overload. + (__formatter_str::_M_format_range): Define. + (__format::_Buf_sink) Use __Stackbuf_size for size of array. + (__format::__is_map_formattable, std::range_formatter) + (std::formatter<_Rg, _CharT>): Define. + * src/c++23/std.cc.in (std::format_kind, std::range_format) + (std::range_formatter): Export. + * testsuite/std/format/formatter/lwg3944.cc: Guarded tests with + __glibcxx_format_ranges. + * testsuite/std/format/formatter/requirements.cc: Adjusted for standard + behavior. + * testsuite/23_containers/vector/bool/format.cc: Test vector<bool> formatting. + * testsuite/std/format/ranges/format_kind.cc: New test. + * testsuite/std/format/ranges/formatter.cc: New test. + * testsuite/std/format/ranges/sequence.cc: New test. + * testsuite/std/format/ranges/string.cc: New test. + +2025-04-15 Jonathan Wakely <jwakely@redhat.com> + + PR libstdc++/119748 + * include/bits/basic_string.h (_S_copy_chars): Only optimize for + contiguous iterators that are convertible to const charT*. Use + explicit conversion to charT after dereferencing iterator. + (_S_copy_range): Likewise for contiguous ranges. + * include/bits/basic_string.tcc (_M_construct): Use explicit + conversion to charT after dereferencing iterator. + * include/bits/cow_string.h (_S_copy_chars): Likewise. + (basic_string(from_range_t, R&&, const Allocator&)): Likewise. + Only optimize for contiguous iterators that are convertible to + const charT*. + * testsuite/21_strings/basic_string/cons/char/119748.cc: New + test. + * testsuite/21_strings/basic_string/cons/wchar_t/119748.cc: + New test. + +2025-04-15 Jonathan Wakely <jwakely@redhat.com> + + * testsuite/util/testsuite_iterators.h (test_container): Define + array constructor for C++98 as well. + 2025-04-14 Jonathan Wakely <jwakely@redhat.com> PR libstdc++/21334 diff --git a/libstdc++-v3/include/bits/basic_string.h b/libstdc++-v3/include/bits/basic_string.h index 9c431c7..c90bd09 100644 --- a/libstdc++-v3/include/bits/basic_string.h +++ b/libstdc++-v3/include/bits/basic_string.h @@ -488,8 +488,11 @@ _GLIBCXX_BEGIN_NAMESPACE_CXX11 is_same<_IterBase, const _CharT*>>::value) _S_copy(__p, std::__niter_base(__k1), __k2 - __k1); #if __cpp_lib_concepts - else if constexpr (contiguous_iterator<_Iterator> - && is_same_v<iter_value_t<_Iterator>, _CharT>) + else if constexpr (requires { + requires contiguous_iterator<_Iterator>; + { std::to_address(__k1) } + -> convertible_to<const _CharT*>; + }) { const auto __d = __k2 - __k1; (void) (__k1 + __d); // See P3349R1 @@ -499,7 +502,7 @@ _GLIBCXX_BEGIN_NAMESPACE_CXX11 else #endif for (; __k1 != __k2; ++__k1, (void)++__p) - traits_type::assign(*__p, *__k1); // These types are off. + traits_type::assign(*__p, static_cast<_CharT>(*__k1)); } #pragma GCC diagnostic pop @@ -527,12 +530,19 @@ _GLIBCXX_BEGIN_NAMESPACE_CXX11 static constexpr void _S_copy_range(pointer __p, _Rg&& __rg, size_type __n) { - if constexpr (ranges::contiguous_range<_Rg> - && is_same_v<ranges::range_value_t<_Rg>, _CharT>) + if constexpr (requires { + requires ranges::contiguous_range<_Rg>; + { ranges::data(std::forward<_Rg>(__rg)) } + -> convertible_to<const _CharT*>; + }) _S_copy(__p, ranges::data(std::forward<_Rg>(__rg)), __n); else - for (auto&& __e : __rg) - traits_type::assign(*__p++, std::forward<decltype(__e)>(__e)); + { + auto __first = ranges::begin(__rg); + const auto __last = ranges::end(__rg); + for (; __first != __last; ++__first) + traits_type::assign(*__p++, static_cast<_CharT>(*__first)); + } } #endif diff --git a/libstdc++-v3/include/bits/basic_string.tcc b/libstdc++-v3/include/bits/basic_string.tcc index 02230ac..bca55bc 100644 --- a/libstdc++-v3/include/bits/basic_string.tcc +++ b/libstdc++-v3/include/bits/basic_string.tcc @@ -210,7 +210,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION _M_data(__another); _M_capacity(__capacity); } - traits_type::assign(_M_data()[__len++], *__beg); + traits_type::assign(_M_data()[__len++], + static_cast<_CharT>(*__beg)); ++__beg; } diff --git a/libstdc++-v3/include/bits/cow_string.h b/libstdc++-v3/include/bits/cow_string.h index b250397..f9df2be 100644 --- a/libstdc++-v3/include/bits/cow_string.h +++ b/libstdc++-v3/include/bits/cow_string.h @@ -423,7 +423,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION _S_copy_chars(_CharT* __p, _Iterator __k1, _Iterator __k2) { for (; __k1 != __k2; ++__k1, (void)++__p) - traits_type::assign(*__p, *__k1); // These types are off. + traits_type::assign(*__p, static_cast<_CharT>(*__k1)); } static void @@ -656,12 +656,19 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION reserve(__n); pointer __p = _M_data(); - if constexpr (ranges::contiguous_range<_Rg> - && is_same_v<ranges::range_value_t<_Rg>, _CharT>) + if constexpr (requires { + requires ranges::contiguous_range<_Rg>; + { ranges::data(std::forward<_Rg>(__rg)) } + -> convertible_to<const _CharT*>; + }) _M_copy(__p, ranges::data(std::forward<_Rg>(__rg)), __n); else - for (auto&& __e : __rg) - traits_type::assign(*__p++, std::forward<decltype(__e)>(__e)); + { + auto __first = ranges::begin(__rg); + const auto __last = ranges::end(__rg); + for (; __first != __last; ++__first) + traits_type::assign(*__p++, static_cast<_CharT>(*__first)); + } _M_rep()->_M_set_length_and_sharable(__n); } else diff --git a/libstdc++-v3/include/std/format b/libstdc++-v3/include/std/format index 23f0097..096dda4 100644 --- a/libstdc++-v3/include/std/format +++ b/libstdc++-v3/include/std/format @@ -97,6 +97,10 @@ namespace __format #define _GLIBCXX_WIDEN_(C, S) ::std::__format::_Widen<C>(S, L##S) #define _GLIBCXX_WIDEN(S) _GLIBCXX_WIDEN_(_CharT, S) + // Size for stack located buffer + template<typename _CharT> + constexpr size_t __stackbuf_size = 32 * sizeof(void*) / sizeof(_CharT); + // Type-erased character sinks. template<typename _CharT> class _Sink; template<typename _CharT> class _Fixedbuf_sink; @@ -475,9 +479,10 @@ namespace __format _Pres_d = 1, _Pres_b, _Pres_B, _Pres_o, _Pres_x, _Pres_X, _Pres_c, // Presentation types for floating-point types. _Pres_a = 1, _Pres_A, _Pres_e, _Pres_E, _Pres_f, _Pres_F, _Pres_g, _Pres_G, - _Pres_p = 0, _Pres_P, // For pointers. - _Pres_s = 0, // For strings and bool. - _Pres_esc = 0xf, // For strings and charT. + _Pres_p = 0, _Pres_P, // For pointers. + _Pres_s = 0, // For strings, bool + _Pres_seq = 0, _Pres_str, // For ranges + _Pres_esc = 0xf, // For strings, charT and ranges }; enum _Align { @@ -544,42 +549,48 @@ namespace __format // pre: __first != __last constexpr iterator _M_parse_fill_and_align(iterator __first, iterator __last) noexcept + { return _M_parse_fill_and_align(__first, __last, "{"); } + + // pre: __first != __last + constexpr iterator + _M_parse_fill_and_align(iterator __first, iterator __last, string_view __not_fill) noexcept { - if (*__first != '{') + for (char c : __not_fill) + if (*__first == c) + return __first; + + using namespace __unicode; + if constexpr (__literal_encoding_is_unicode<_CharT>()) { - using namespace __unicode; - if constexpr (__literal_encoding_is_unicode<_CharT>()) - { - // Accept any UCS scalar value as fill character. - _Utf32_view<ranges::subrange<iterator>> __uv({__first, __last}); - if (!__uv.empty()) - { - auto __beg = __uv.begin(); - char32_t __c = *__beg++; - if (__is_scalar_value(__c)) - if (auto __next = __beg.base(); __next != __last) - if (_Align __align = _S_align(*__next)) - { - _M_fill = __c; - _M_align = __align; - return ++__next; - } - } - } - else if (__last - __first >= 2) - if (_Align __align = _S_align(__first[1])) - { - _M_fill = *__first; - _M_align = __align; - return __first + 2; - } + // Accept any UCS scalar value as fill character. + _Utf32_view<ranges::subrange<iterator>> __uv({__first, __last}); + if (!__uv.empty()) + { + auto __beg = __uv.begin(); + char32_t __c = *__beg++; + if (__is_scalar_value(__c)) + if (auto __next = __beg.base(); __next != __last) + if (_Align __align = _S_align(*__next)) + { + _M_fill = __c; + _M_align = __align; + return ++__next; + } + } + } + else if (__last - __first >= 2) + if (_Align __align = _S_align(__first[1])) + { + _M_fill = *__first; + _M_align = __align; + return __first + 2; + } - if (_Align __align = _S_align(__first[0])) - { - _M_fill = ' '; - _M_align = __align; - return __first + 1; - } + if (_Align __align = _S_align(__first[0])) + { + _M_fill = ' '; + _M_align = __align; + return __first + 1; } return __first; } @@ -934,11 +945,27 @@ namespace __format static consteval _Str_view _S_all() - { return _GLIBCXX_WIDEN("{}"); } + { return _GLIBCXX_WIDEN("[]{}(), : "); } static consteval - _Str_view _S_braces() + _Str_view _S_squares() { return _S_all().substr(0, 2); } + + static consteval + _Str_view _S_braces() + { return _S_all().substr(2, 2); } + + static consteval + _Str_view _S_parens() + { return _S_all().substr(4, 2); } + + static consteval + _Str_view _S_comma() + { return _S_all().substr(6, 2); } + + static consteval + _Str_view _S_colon() + { return _S_all().substr(8, 2); } }; template<typename _CharT> @@ -1231,6 +1258,13 @@ namespace __format template<__char _CharT> struct __formatter_str { + __formatter_str() = default; + + constexpr + __formatter_str(_Spec<_CharT> __spec) noexcept + : _M_spec(__spec) + { } + constexpr typename basic_format_parse_context<_CharT>::iterator parse(basic_format_parse_context<_CharT>& __pc) { @@ -1329,6 +1363,43 @@ namespace __format } #if __glibcxx_format_ranges // C++ >= 23 && HOSTED + template<ranges::input_range _Rg, typename _Out> + requires same_as<remove_cvref_t<ranges::range_reference_t<_Rg>>, _CharT> + typename basic_format_context<_Out, _CharT>::iterator + _M_format_range(_Rg&& __rg, basic_format_context<_Out, _CharT>& __fc) const + { + using _String = basic_string<_CharT>; + using _String_view = basic_string_view<_CharT>; + if constexpr (ranges::forward_range<_Rg> || ranges::sized_range<_Rg>) + { + const size_t __n(ranges::distance(__rg)); + if constexpr (ranges::contiguous_range<_Rg>) + return format(_String_view(ranges::data(__rg), __n), __fc); + else if (__n <= __format::__stackbuf_size<_CharT>) + { + _CharT __buf[__format::__stackbuf_size<_CharT>]; + ranges::copy(__rg, __buf); + return format(_String_view(__buf, __n), __fc); + } + else if constexpr (ranges::sized_range<_Rg>) + return format(_String(from_range, __rg), __fc); + else if constexpr (ranges::random_access_range<_Rg>) + { + ranges::iterator_t<_Rg> __first = ranges::begin(__rg); + ranges::subrange __sub(__first, __first + __n); + return format(_String(from_range, __sub), __fc); + } + else + { + // N.B. preserve the computed size + ranges::subrange __sub(__rg, __n); + return format(_String(from_range, __sub), __fc); + } + } + else + return format(_String(from_range, __rg), __fc); + } + constexpr void set_debug_format() noexcept { _M_spec._M_type = _Pres_esc; } @@ -2931,7 +3002,7 @@ namespace __format }; /// @} -#if defined _GLIBCXX_USE_WCHAR_T && __cpp_lib_format_ranges +#if defined _GLIBCXX_USE_WCHAR_T && __glibcxx_format_ranges // _GLIBCXX_RESOLVE_LIB_DEFECTS // 3944. Formatters converting sequences of char to sequences of wchar_t @@ -2991,19 +3062,21 @@ namespace __format concept __formattable_impl = __parsable_with<_Tp, _Context> && __formattable_with<_Tp, _Context>; + template<typename _Formatter> + concept __has_debug_format = requires(_Formatter __f) + { + __f.set_debug_format(); + }; + } // namespace __format /// @endcond -// Concept std::formattable was introduced by P2286R8 "Formatting Ranges", -// but we can't guard it with __cpp_lib_format_ranges until we define that! -#if __cplusplus > 202002L +#if __glibcxx_format_ranges // C++ >= 23 && HOSTED // [format.formattable], concept formattable template<typename _Tp, typename _CharT> concept formattable = __format::__formattable_impl<remove_reference_t<_Tp>, _CharT>; -#endif -#if __cpp_lib_format_ranges /// @cond undocumented namespace __format { @@ -3246,7 +3319,7 @@ namespace __format class _Buf_sink : public _Sink<_CharT> { protected: - _CharT _M_buf[32 * sizeof(void*) / sizeof(_CharT)]; + _CharT _M_buf[__stackbuf_size<_CharT>]; [[__gnu__::__always_inline__]] constexpr @@ -5088,7 +5161,7 @@ namespace __format } #endif -#if __cpp_lib_format_ranges +#if __glibcxx_format_ranges // C++ >= 23 && HOSTED // [format.range], formatting of ranges // [format.range.fmtkind], variable template format_kind enum class range_format { @@ -5133,28 +5206,346 @@ namespace __format template<ranges::input_range _Rg> requires same_as<_Rg, remove_cvref_t<_Rg>> constexpr range_format format_kind<_Rg> = __fmt_kind<_Rg>(); - // [format.range.formatter], class template range_formatter - template<typename _Tp, typename _CharT = char> - requires same_as<remove_cvref_t<_Tp>, _Tp> && formattable<_Tp, _CharT> - class range_formatter; // TODO - /// @cond undocumented namespace __format { - // [format.range.fmtdef], class template range-default-formatter - template<range_format _Kind, ranges::input_range _Rg, typename _CharT> - struct __range_default_formatter; // TODO + template<typename _Tp> + concept __is_map_formattable + = __is_pair<_Tp> || (__is_tuple_v<_Tp> && tuple_size_v<_Tp> == 2); + } // namespace __format /// @endcond + // [format.range.formatter], class template range_formatter + template<typename _Tp, __format::__char _CharT = char> + requires same_as<remove_cvref_t<_Tp>, _Tp> && formattable<_Tp, _CharT> + class range_formatter + { + using _String_view = basic_string_view<_CharT>; + using _Seps = __format::_Separators<_CharT>; + + public: + constexpr void + set_separator(basic_string_view<_CharT> __sep) noexcept + { _M_sep = __sep; } + + constexpr void + set_brackets(basic_string_view<_CharT> __open, + basic_string_view<_CharT> __close) noexcept + { + _M_open = __open; + _M_close = __close; + } + + constexpr formatter<_Tp, _CharT>& + underlying() noexcept + { return _M_fval; } + + constexpr const formatter<_Tp, _CharT>& + underlying() const noexcept + { return _M_fval; } + + // We deviate from standard, that declares this as template accepting + // unconstrained ParseContext type, which seems unimplementable. + constexpr typename basic_format_parse_context<_CharT>::iterator + parse(basic_format_parse_context<_CharT>& __pc) + { + auto __first = __pc.begin(); + const auto __last = __pc.end(); + __format::_Spec<_CharT> __spec{}; + bool __no_brace = false; + + auto __finished = [&] + { return __first == __last || *__first == '}'; }; + + auto __finalize = [&] + { + _M_spec = __spec; + return __first; + }; + + auto __parse_val = [&](_String_view __nfs = _String_view()) + { + basic_format_parse_context<_CharT> __npc(__nfs); + if (_M_fval.parse(__npc) != __npc.end()) + __format::__failed_to_parse_format_spec(); + if constexpr (__format::__has_debug_format<formatter<_Tp, _CharT>>) + _M_fval.set_debug_format(); + return __finalize(); + }; + + if (__finished()) + return __parse_val(); + + __first = __spec._M_parse_fill_and_align(__first, __last, "{:"); + if (__finished()) + return __parse_val(); + + __first = __spec._M_parse_width(__first, __last, __pc); + if (__finished()) + return __parse_val(); + + if (*__first == '?') + { + ++__first; + __spec._M_type = __format::_Pres_esc; + if (__finished() || *__first != 's') + __throw_format_error("format error: '?' is allowed only in" + " combination with 's'"); + } + + if (*__first == 's') + { + ++__first; + if constexpr (same_as<_Tp, _CharT>) + { + if (__spec._M_type != __format::_Pres_esc) + __spec._M_type = __format::_Pres_str; + if (__finished()) + return __finalize(); + __throw_format_error("format error: element format specifier" + " cannot be provided when 's' specifier is used"); + } + else + __throw_format_error("format error: 's' specifier requires" + " range of character types"); + } + + if (__finished()) + return __parse_val(); + + if (*__first == 'n') + { + ++__first; + _M_open = _M_close = _String_view(); + __no_brace = true; + } + + if (__finished()) + return __parse_val(); + + if (*__first == 'm') + { + _String_view __m(__first, 1); + ++__first; + if constexpr (__format::__is_map_formattable<_Tp>) + { + _M_sep = _Seps::_S_comma(); + if (!__no_brace) + { + _M_open = _Seps::_S_braces().substr(0, 1); + _M_close = _Seps::_S_braces().substr(1, 1); + } + if (__finished()) + return __parse_val(__m); + __throw_format_error("format error: element format specifier" + " cannot be provided when 'm' specifier is used"); + } + else + __throw_format_error("format error: 'm' specifier requires" + " range of pairs or tuples of two elements"); + } + + if (__finished()) + return __parse_val(); + + if (*__first == ':') + { + __pc.advance_to(++__first); + __first = _M_fval.parse(__pc); + } + + if (__finished()) + return __finalize(); + + __format::__failed_to_parse_format_spec(); + } + + // We deviate from standard, that declares this as template accepting + // unconstrained FormatContext type, which seems unimplementable. + template<ranges::input_range _Rg, typename _Out> + requires formattable<ranges::range_reference_t<_Rg>, _CharT> && + same_as<remove_cvref_t<ranges::range_reference_t<_Rg>>, _Tp> + typename basic_format_context<_Out, _CharT>::iterator + format(_Rg&& __rg, basic_format_context<_Out, _CharT>& __fc) const + { + // This is required to implement formatting with padding, + // as we need to format to temporary buffer, using the same iterator. + static_assert(is_same_v<_Out, __format::_Sink_iter<_CharT>>); + if constexpr (same_as<_Tp, _CharT>) + if (_M_spec._M_type == __format::_Pres_str + || _M_spec._M_type == __format::_Pres_esc) + { + __format::__formatter_str __fstr(_M_spec); + return __fstr._M_format_range(__rg, __fc); + } + if (_M_spec._M_get_width(__fc) > 0) + return _M_format_with_padding(__rg, __fc); + return _M_format_no_padding(__rg, __fc); + } + + private: + template<ranges::input_range _Rg, typename _Out> + typename basic_format_context<_Out, _CharT>::iterator + _M_format_no_padding(_Rg& __rg, + basic_format_context<_Out, _CharT>& __fc) const + { + auto __out = __format::__write(__fc.out(), _M_open); + + auto __first = ranges::begin(__rg); + auto const __last = ranges::end(__rg); + if (__first == __last) + return __format::__write(__out, _M_close); + + __fc.advance_to(__out); + __out = _M_fval.format(*__first, __fc); + for (++__first; __first != __last; ++__first) + { + __out = __format::__write(__out, _M_sep); + __fc.advance_to(__out); + __out = _M_fval.format(*__first, __fc); + } + + return __format::__write(__out, _M_close); + } + + template<ranges::input_range _Rg, typename _Out> + typename basic_format_context<_Out, _CharT>::iterator + _M_format_with_padding(_Rg& __rg, + basic_format_context<_Out, _CharT>& __fc) const + { + struct _Restore_out + { + _Restore_out(basic_format_context<_Out, _CharT>& __fc) + : _M_ctx(addressof(__fc)), _M_out(__fc.out()) + { } + + void trigger() + { + if (_M_ctx) + _M_ctx->advance_to(_M_out); + _M_ctx = nullptr; + } + + ~_Restore_out() + { trigger(); } + + private: + basic_format_context<_Out, _CharT>* _M_ctx; + __format::_Sink_iter<_CharT> _M_out; + }; + + _Restore_out __restore{__fc}; + // TODO Consider double sinking, first buffer of width + // size and then original sink, if first buffer is overrun + // we do not need to align + __format::_Str_sink<_CharT> __buf; + __fc.advance_to(__format::_Sink_iter<_CharT>(__buf)); + _M_format_no_padding(__rg, __fc); + __restore.trigger(); + + _String_view __s(__buf.view()); + size_t __width; + if constexpr (__unicode::__literal_encoding_is_unicode<_CharT>()) + __width = __unicode::__field_width(__s); + else + __width = __s.size(); + return __format::__write_padded_as_spec(__s, __width, __fc, _M_spec); + } + + __format::_Spec<_CharT> _M_spec{}; + _String_view _M_open = _Seps::_S_squares().substr(0, 1); + _String_view _M_close = _Seps::_S_squares().substr(1, 1); + _String_view _M_sep = _Seps::_S_comma(); + formatter<_Tp, _CharT> _M_fval; + }; + + // In standard this is shown as inheriting from specialization of + // exposition only specialization for range-default-formatter for + // each range_format. We opt for simpler implementation. // [format.range.fmtmap], [format.range.fmtset], [format.range.fmtstr], // specializations for maps, sets, and strings - template<ranges::input_range _Rg, typename _CharT> + template<ranges::input_range _Rg, __format::__char _CharT> requires (format_kind<_Rg> != range_format::disabled) && formattable<ranges::range_reference_t<_Rg>, _CharT> struct formatter<_Rg, _CharT> - : __format::__range_default_formatter<format_kind<_Rg>, _Rg, _CharT> - { }; + { + private: + static const bool _S_range_format_is_string = + (format_kind<_Rg> == range_format::string) + || (format_kind<_Rg> == range_format::debug_string); + using _Vt = remove_cvref_t< + ranges::range_reference_t< + __format::__maybe_const_range<_Rg, _CharT>>>; + + static consteval bool _S_is_correct() + { + if constexpr (_S_range_format_is_string) + static_assert(same_as<_Vt, _CharT>); + return true; + } + + static_assert(_S_is_correct()); + + public: + constexpr formatter() noexcept + { + using _Seps = __format::_Separators<_CharT>; + if constexpr (format_kind<_Rg> == range_format::map) + { + static_assert(__format::__is_map_formattable<_Vt>); + _M_under.set_brackets(_Seps::_S_braces().substr(0, 1), + _Seps::_S_braces().substr(1, 1)); + _M_under.underlying().set_brackets({}, {}); + _M_under.underlying().set_separator(_Seps::_S_colon()); + } + else if constexpr (format_kind<_Rg> == range_format::set) + _M_under.set_brackets(_Seps::_S_braces().substr(0, 1), + _Seps::_S_braces().substr(1, 1)); + } + + constexpr void + set_separator(basic_string_view<_CharT> __sep) noexcept + requires (!_S_range_format_is_string) + { _M_under.set_separator(__sep); } + + constexpr void + set_brackets(basic_string_view<_CharT> __open, + basic_string_view<_CharT> __close) noexcept + requires (!_S_range_format_is_string) + { _M_under.set_brackets(__open, __close); } + + // We deviate from standard, that declares this as template accepting + // unconstrained ParseContext type, which seems unimplementable. + constexpr typename basic_format_parse_context<_CharT>::iterator + parse(basic_format_parse_context<_CharT>& __pc) + { + auto __res = _M_under.parse(__pc); + if constexpr (format_kind<_Rg> == range_format::debug_string) + _M_under.set_debug_format(); + return __res; + } + + // We deviate from standard, that declares this as template accepting + // unconstrained FormatContext type, which seems unimplementable. + template<typename _Out> + typename basic_format_context<_Out, _CharT>::iterator + format(__format::__maybe_const_range<_Rg, _CharT>& __rg, + basic_format_context<_Out, _CharT>& __fc) const + { + if constexpr (_S_range_format_is_string) + return _M_under._M_format_range(__rg, __fc); + else + return _M_under.format(__rg, __fc); + } + + private: + using _Formatter_under + = __conditional_t<_S_range_format_is_string, + __format::__formatter_str<_CharT>, + range_formatter<_Vt, _CharT>>; + _Formatter_under _M_under; + }; #endif // C++23 formatting ranges #undef _GLIBCXX_WIDEN diff --git a/libstdc++-v3/include/std/numeric b/libstdc++-v3/include/std/numeric index 4d36fcd..490963e 100644 --- a/libstdc++-v3/include/std/numeric +++ b/libstdc++-v3/include/std/numeric @@ -732,12 +732,11 @@ namespace __detail /// @} group numeric_ops #endif // C++17 +#if __glibcxx_ranges_iota >= 202202L // C++ >= 23 namespace ranges { -#if __glibcxx_ranges_iota >= 202202L // C++ >= 23 - template<typename _Out, typename _Tp> - using iota_result = out_value_result<_Out, _Tp>; + using iota_result = out_value_result<_Out, _Tp>; struct __iota_fn { @@ -762,9 +761,8 @@ namespace ranges }; inline constexpr __iota_fn iota{}; - -#endif // __glibcxx_ranges_iota } // namespace ranges +#endif // __glibcxx_ranges_iota _GLIBCXX_END_NAMESPACE_VERSION } // namespace std diff --git a/libstdc++-v3/include/std/ranges b/libstdc++-v3/include/std/ranges index 7a339c5..9300c36 100644 --- a/libstdc++-v3/include/std/ranges +++ b/libstdc++-v3/include/std/ranges @@ -64,7 +64,6 @@ #define __glibcxx_want_ranges_chunk #define __glibcxx_want_ranges_chunk_by #define __glibcxx_want_ranges_enumerate -#define __glibcxx_want_ranges_iota #define __glibcxx_want_ranges_join_with #define __glibcxx_want_ranges_repeat #define __glibcxx_want_ranges_slide diff --git a/libstdc++-v3/src/c++23/std.cc.in b/libstdc++-v3/src/c++23/std.cc.in index 12253b9..5e18ad7 100644 --- a/libstdc++-v3/src/c++23/std.cc.in +++ b/libstdc++-v3/src/c++23/std.cc.in @@ -1332,6 +1332,12 @@ export namespace std using std::wformat_context; using std::wformat_parse_context; using std::wformat_string; +// FIXME __cpp_lib_format_ranges +#ifdef __glibcxx_format_ranges + using std::format_kind; + using std::range_format; + using std::range_formatter; +#endif } // <forward_list> diff --git a/libstdc++-v3/testsuite/17_intro/names.cc b/libstdc++-v3/testsuite/17_intro/names.cc index 4458325..f67818d 100644 --- a/libstdc++-v3/testsuite/17_intro/names.cc +++ b/libstdc++-v3/testsuite/17_intro/names.cc @@ -142,6 +142,10 @@ #define try_emplace ( #endif +#if __cplusplus < 202002L +#define ranges ( +#endif + // These clash with newlib so don't use them. # define __lockable cannot be used as an identifier # define __null_sentinel cannot be used as an identifier diff --git a/libstdc++-v3/testsuite/21_strings/basic_string/cons/char/119748.cc b/libstdc++-v3/testsuite/21_strings/basic_string/cons/char/119748.cc new file mode 100644 index 0000000..301ca5d --- /dev/null +++ b/libstdc++-v3/testsuite/21_strings/basic_string/cons/char/119748.cc @@ -0,0 +1,35 @@ +// { dg-do compile } + +// Bug 119748 +// string(InputIterator, InputIterator) rejects volatile charT* as iterator + +#ifndef TEST_CHAR_TYPE +#define TEST_CHAR_TYPE char +#endif + +#include <string> +#include <testsuite_iterators.h> + +typedef TEST_CHAR_TYPE C; + +volatile C vs[42] = {}; +std::basic_string<C> s(vs+0, vs+42); +#ifdef __cpp_lib_containers_ranges +std::basic_string<C> s2(std::from_range, vs); +#endif + +using namespace __gnu_test; + +test_container<volatile C, input_iterator_wrapper> input_cont(vs); +std::basic_string<C> s3(input_cont.begin(), input_cont.end()); + +test_container<volatile C, forward_iterator_wrapper> fwd_cont(vs); +std::basic_string<C> s4(fwd_cont.begin(), fwd_cont.end()); + +#ifdef __cpp_lib_containers_ranges +test_input_range<volatile C> input_range(vs); +std::basic_string<C> s5(std::from_range, input_range); + +test_forward_range<volatile C> fwd_range(vs); +std::basic_string<C> s6(std::from_range, fwd_range); +#endif diff --git a/libstdc++-v3/testsuite/21_strings/basic_string/cons/wchar_t/119748.cc b/libstdc++-v3/testsuite/21_strings/basic_string/cons/wchar_t/119748.cc new file mode 100644 index 0000000..7d3ba10 --- /dev/null +++ b/libstdc++-v3/testsuite/21_strings/basic_string/cons/wchar_t/119748.cc @@ -0,0 +1,7 @@ +// { dg-do compile } + +// Bug 119748 +// string(InputIterator, InputIterator) rejects volatile charT* as iterator + +#define TEST_CHAR_TYPE wchar_t +#include "../char/119748.cc" diff --git a/libstdc++-v3/testsuite/23_containers/vector/bool/format.cc b/libstdc++-v3/testsuite/23_containers/vector/bool/format.cc index 2586225..eb24b66 100644 --- a/libstdc++-v3/testsuite/23_containers/vector/bool/format.cc +++ b/libstdc++-v3/testsuite/23_containers/vector/bool/format.cc @@ -56,6 +56,12 @@ test_output() res = std::format(WIDEN("{:=^#7X}"), v[1]); VERIFY( res == WIDEN("==0X0==") ); + + res = std::format(WIDEN("{}"), v); + VERIFY( res == WIDEN("[true, false]") ); + + res = std::format(WIDEN("{::d}"), v); + VERIFY( res == WIDEN("[1, 0]") ); } int main() diff --git a/libstdc++-v3/testsuite/std/format/formatter/lwg3944.cc b/libstdc++-v3/testsuite/std/format/formatter/lwg3944.cc index ff5f075..1f3edc9 100644 --- a/libstdc++-v3/testsuite/std/format/formatter/lwg3944.cc +++ b/libstdc++-v3/testsuite/std/format/formatter/lwg3944.cc @@ -4,6 +4,7 @@ // LWG 3944. Formatters converting sequences of char to sequences of wchar_t #include <format> +#include <vector> void test_lwg3944() { @@ -14,11 +15,10 @@ void test_lwg3944() std::format(L"{}",cstr); // { dg-error "here" } // Ill-formed in C++20 - // In C++23 they give L"['h', 'e', 'l', 'l', 'o']" std::format(L"{}", "hello"); // { dg-error "here" } std::format(L"{}", std::string_view("hello")); // { dg-error "here" } std::format(L"{}", std::string("hello")); // { dg-error "here" } -#ifdef __cpp_lib_format_ranges +#ifdef __glibcxx_format_ranges // LWG 3944 does not change this, it's still valid. std::format(L"{}", std::vector{'h', 'e', 'l', 'l', 'o'}); #endif diff --git a/libstdc++-v3/testsuite/std/format/formatter/requirements.cc b/libstdc++-v3/testsuite/std/format/formatter/requirements.cc index 416b9a8..1c9a0a5 100644 --- a/libstdc++-v3/testsuite/std/format/formatter/requirements.cc +++ b/libstdc++-v3/testsuite/std/format/formatter/requirements.cc @@ -70,12 +70,14 @@ test_specializations() // [format.formatter.spec] // LWG 3833. Remove specialization // template<size_t N> struct formatter<const charT[N], charT> - using Farr = std::format_context::formatter_type<const char[1]>; - static_assert( ! std::is_default_constructible_v<Farr> ); - static_assert( ! std::is_copy_constructible_v<Farr> ); - static_assert( ! std::is_move_constructible_v<Farr> ); - static_assert( ! std::is_copy_assignable_v<Farr> ); - static_assert( ! std::is_move_assignable_v<Farr> ); + // Formatter is only expected to be instantiated with only cv-unqual types + // and attempting to instantiate this specialization is ill-formed + // using Farr = std::format_context::formatter_type<const char[1]>; + // static_assert( ! std::is_default_constructible_v<Farr> ); + // static_assert( ! std::is_copy_constructible_v<Farr> ); + // static_assert( ! std::is_move_constructible_v<Farr> ); + // static_assert( ! std::is_copy_assignable_v<Farr> ); + // static_assert( ! std::is_move_assignable_v<Farr> ); } int main() diff --git a/libstdc++-v3/testsuite/std/format/ranges/format_kind.cc b/libstdc++-v3/testsuite/std/format/ranges/format_kind.cc new file mode 100644 index 0000000..14b9ff2 --- /dev/null +++ b/libstdc++-v3/testsuite/std/format/ranges/format_kind.cc @@ -0,0 +1,94 @@ +// { dg-do run { target c++23 } } + +#include <deque> +#include <flat_map> +#include <flat_set> +#include <format> +#include <list> +#include <map> +#include <set> +#include <testsuite_hooks.h> +#include <unordered_map> +#include <unordered_set> +#include <vector> + +static_assert( std::format_kind<std::vector<int>> == std::range_format::sequence ); +static_assert( std::format_kind<std::deque<int>> == std::range_format::sequence ); +static_assert( std::format_kind<std::list<int>> == std::range_format::sequence ); + +static_assert( std::format_kind<std::set<int>> == std::range_format::set ); +static_assert( std::format_kind<std::multiset<int>> == std::range_format::set ); +static_assert( std::format_kind<std::unordered_set<int>> == std::range_format::set ); +static_assert( std::format_kind<std::unordered_multiset<int>> == std::range_format::set ); +static_assert( std::format_kind<std::flat_set<int>> == std::range_format::set ); +static_assert( std::format_kind<std::flat_multiset<int>> == std::range_format::set ); + +static_assert( std::format_kind<std::map<int, int>> == std::range_format::map ); +static_assert( std::format_kind<std::multimap<int, int>> == std::range_format::map ); +static_assert( std::format_kind<std::unordered_map<int, int>> == std::range_format::map ); +static_assert( std::format_kind<std::unordered_multimap<int, int>> == std::range_format::map ); +static_assert( std::format_kind<std::flat_map<int, int>> == std::range_format::map ); +static_assert( std::format_kind<std::flat_multimap<int, int>> == std::range_format::map ); + +template<typename T> +struct MyVec : std::vector<T> +{}; + +static_assert( std::format_kind<MyVec<int>> == std::range_format::sequence ); + +template<typename T> +struct MySet : std::vector<T> +{ + using key_type = T; +}; + +static_assert( std::format_kind<MySet<int>> == std::range_format::set ); + +template<typename T> +struct MyMap : std::vector<T> +{ + using key_type = T; + using mapped_type = int; +}; + +static_assert( std::format_kind<MyMap<std::pair<int, int>>> == std::range_format::map ); +static_assert( std::format_kind<MyMap<std::tuple<int, int>>> == std::range_format::map ); +static_assert( std::format_kind<MyMap<int>> == std::range_format::set ); + +template<typename T, std::range_format rf> +struct CustFormat : std::vector<T> +{ + using std::vector<T>::vector; +}; + +template<typename T, std::range_format rf> +constexpr auto std::format_kind<CustFormat<T, rf>> = rf; + +void test_override() +{ + CustFormat<int, std::range_format::disabled> disabledf; + static_assert( !std::formattable<decltype(disabledf), char> ); + + CustFormat<int, std::range_format::sequence> seqf{1, 2, 3}; + VERIFY( std::format("{}", seqf) == "[1, 2, 3]" ); + + CustFormat<int, std::range_format::set> setf{1, 2, 3}; + VERIFY( std::format("{}", setf) == "{1, 2, 3}" ); + + // TODO test map once formatter for pair is implenented + + CustFormat<char, std::range_format::string> stringf{'a', 'b', 'c', 'd'}; + VERIFY( std::format("{}", stringf) == "abcd" ); + // Support precision as string do + VERIFY( std::format("{:.2}", stringf) == "ab" ); + + CustFormat<char, std::range_format::debug_string> debugf{'a', 'b', 'c', 'd'}; + VERIFY( std::format("{}", debugf) == R"("abcd")" ); + // Support precision as string do + VERIFY( std::format("{:.3}", debugf) == R"("ab)" ); +} + +int main() +{ + test_override(); +} diff --git a/libstdc++-v3/testsuite/std/format/ranges/formatter.cc b/libstdc++-v3/testsuite/std/format/ranges/formatter.cc new file mode 100644 index 0000000..2045b51 --- /dev/null +++ b/libstdc++-v3/testsuite/std/format/ranges/formatter.cc @@ -0,0 +1,145 @@ +// { dg-do run { target c++23 } } + +#include <format> +#include <testsuite_hooks.h> +#include <vector> + +#define WIDEN_(C, S) ::std::__format::_Widen<C>(S, L##S) +#define WIDEN(S) WIDEN_(_CharT, S) + +template<typename T, + template<typename, typename> class Formatter = std::range_formatter> +struct MyVector : std::vector<T> +{ + using std::vector<T>::vector; +}; + +template<typename T, + template<typename, typename> class Formatter, + typename CharT> +struct std::formatter<MyVector<T, Formatter>, CharT> +{ + constexpr formatter() noexcept + { + using _CharT = CharT; + _formatter.set_brackets(WIDEN("<"), WIDEN(">")); + _formatter.set_separator(WIDEN("; ")); + } + + constexpr std::basic_format_parse_context<CharT>::iterator + parse(std::basic_format_parse_context<CharT>& pc) + { return _formatter.parse(pc); } + + template<typename Out> + typename std::basic_format_context<Out, CharT>::iterator + format(const MyVector<T, Formatter>& mv, + std::basic_format_context<Out, CharT>& fc) const + { return _formatter.format(mv, fc); } + +private: + Formatter<T, CharT> _formatter; +}; + +template<typename _CharT, template<typename, typename> class Formatter> +void +test_default() +{ + MyVector<int, Formatter> vec{1, 2, 3}; + std::basic_string<_CharT> res; + + res = std::format(WIDEN("{}"), vec); + VERIFY( res == WIDEN("<1; 2; 3>") ); + res = std::format(WIDEN("{:}"), vec); + VERIFY( res == WIDEN("<1; 2; 3>") ); + res = std::format(WIDEN("{:n}"), vec); + VERIFY( res == WIDEN("1; 2; 3") ); + + res = std::format(WIDEN("{:3}"), vec); + VERIFY( res == WIDEN("<1; 2; 3>") ); + + res = std::format(WIDEN("{:10}"), vec); + VERIFY( res == WIDEN("<1; 2; 3> ") ); + + res = std::format(WIDEN("{:{}}"), vec, 10); + VERIFY( res == WIDEN("<1; 2; 3> ") ); + + res = std::format(WIDEN("{1:{0}}"), 10, vec); + VERIFY( res == WIDEN("<1; 2; 3> ") ); + + res = std::format(WIDEN("{:10n}"), vec); + VERIFY( res == WIDEN("1; 2; 3 ") ); + + res = std::format(WIDEN("{:*<11}"), vec); + VERIFY( res == WIDEN("<1; 2; 3>**") ); + + res = std::format(WIDEN("{:->12}"), vec); + VERIFY( res == WIDEN("---<1; 2; 3>") ); + + res = std::format(WIDEN("{:=^13}"), vec); + VERIFY( res == WIDEN("==<1; 2; 3>==") ); + + res = std::format(WIDEN("{:=^13n}"), vec); + VERIFY( res == WIDEN("===1; 2; 3===") ); + + res = std::format(WIDEN("{::#x}"), vec); + VERIFY( res == WIDEN("<0x1; 0x2; 0x3>") ); + + res = std::format(WIDEN("{:|^25n:#05x}"), vec); + VERIFY( res == WIDEN("|||0x001; 0x002; 0x003|||") ); + + // ':' is start of the format string for element + res = std::format(WIDEN("{::^+4}"), vec); + VERIFY( res == WIDEN("< +1 ; +2 ; +3 >") ); +} + +template<typename _CharT, template<typename, typename> class Formatter> +void +test_override() +{ + MyVector<_CharT, Formatter> vc{'a', 'b', 'c', 'd'}; + std::basic_string<_CharT> res; + + res = std::format(WIDEN("{:s}"), vc); + VERIFY( res == WIDEN("abcd") ); + res = std::format(WIDEN("{:?s}"), vc); + VERIFY( res == WIDEN("\"abcd\"") ); + res = std::format(WIDEN("{:+^6s}"), vc); + VERIFY( res == WIDEN("+abcd+") ); + + // TODO test map +} + +template<template<typename, typename> class Formatter> +void test_outputs() +{ + test_default<char, Formatter>(); + test_default<wchar_t, Formatter>(); + test_override<char, Formatter>(); + test_override<wchar_t, Formatter>(); +} + +void +test_nested() +{ + MyVector<MyVector<int>> v + { + {1, 2}, + {11, 12} + }; + + std::string res = std::format("{}", v); + VERIFY( res == "<<1; 2>; <11; 12>>" ); + + res = std::format("{:+^18:n:02}", v); + VERIFY( res == "+<01; 02; 11; 12>+" ); +} + +template<typename T, typename CharT> +using VectorFormatter = std::formatter<std::vector<T>, CharT>; + +int main() +{ + test_outputs<std::range_formatter>(); + test_outputs<VectorFormatter>(); + test_nested(); +} diff --git a/libstdc++-v3/testsuite/std/format/ranges/sequence.cc b/libstdc++-v3/testsuite/std/format/ranges/sequence.cc new file mode 100644 index 0000000..0657437 --- /dev/null +++ b/libstdc++-v3/testsuite/std/format/ranges/sequence.cc @@ -0,0 +1,190 @@ +// { dg-do run { target c++23 } } + +#include <format> +#include <list> +#include <span> +#include <testsuite_hooks.h> +#include <testsuite_iterators.h> +#include <vector> + +struct NotFormattable +{}; + +static_assert(!std::formattable<std::vector<NotFormattable>, char>); +static_assert(!std::formattable<std::span<NotFormattable>, wchar_t>); + +template<typename... Args> +bool +is_format_string_for(const char* str, Args&&... args) +{ + try { + (void) std::vformat(str, std::make_format_args(args...)); + return true; + } catch (const std::format_error&) { + return false; + } +} + +template<typename... Args> +bool +is_format_string_for(const wchar_t* str, Args&&... args) +{ + try { + (void) std::vformat(str, std::make_wformat_args(args...)); + return true; + } catch (const std::format_error&) { + return false; + } +} + +template<typename Rg, typename CharT> +bool is_range_formatter_spec_for(CharT const* spec, Rg&& rg) +{ + using V = std::remove_cvref_t<std::ranges::range_reference_t<Rg>>; + std::range_formatter<V, CharT> fmt; + std::basic_format_parse_context<CharT> pc(spec); + try { + (void)fmt.parse(pc); + return true; + } catch (const std::format_error&) { + return false; + } +} + +void +test_format_string() +{ + // invalid format spec 'p' + VERIFY( !is_range_formatter_spec_for("p", std::vector<int>()) ); + VERIFY( !is_format_string_for("{:p}", std::vector<int>()) ); + VERIFY( !is_range_formatter_spec_for("np", std::vector<int>()) ); + VERIFY( !is_format_string_for("{:np}", std::vector<int>()) ); + + // width needs to be integer type + VERIFY( !is_format_string_for("{:{}}", std::vector<int>(), 1.0f) ); + + // element format needs to be valid + VERIFY( !is_range_formatter_spec_for(":p", std::vector<int>()) ); + VERIFY( !is_format_string_for("{::p}", std::vector<int>()) ); + VERIFY( !is_range_formatter_spec_for("n:p", std::vector<int>()) ); + VERIFY( !is_format_string_for("{:n:p}", std::vector<int>()) ); +} + +#define WIDEN_(C, S) ::std::__format::_Widen<C>(S, L##S) +#define WIDEN(S) WIDEN_(_CharT, S) + +template<typename _CharT, typename Range> +void test_output() +{ + using Sv = std::basic_string_view<_CharT>; + using T = std::ranges::range_value_t<Range>; + auto makeRange = [](std::span<T> s) { + return Range(s.data(), s.data() + s.size()); + }; + + std::basic_string<_CharT> res; + size_t size = 0; + + T v1[]{1, 2, 3}; + res = std::format(WIDEN("{}"), makeRange(v1)); + VERIFY( res == WIDEN("[1, 2, 3]") ); + res = std::format(WIDEN("{:}"), makeRange(v1)); + VERIFY( res == WIDEN("[1, 2, 3]") ); + res = std::format(WIDEN("{:n}"), makeRange(v1)); + VERIFY( res == WIDEN("1, 2, 3") ); + + res = std::format(WIDEN("{:3}"), makeRange(v1)); + VERIFY( res == WIDEN("[1, 2, 3]") ); + + res = std::format(WIDEN("{:10}"), makeRange(v1)); + VERIFY( res == WIDEN("[1, 2, 3] ") ); + + res = std::format(WIDEN("{:{}}"), makeRange(v1), 10); + VERIFY( res == WIDEN("[1, 2, 3] ") ); + + res = std::format(WIDEN("{1:{0}}"), 10, makeRange(v1)); + VERIFY( res == WIDEN("[1, 2, 3] ") ); + + res = std::format(WIDEN("{:10n}"), makeRange(v1)); + VERIFY( res == WIDEN("1, 2, 3 ") ); + + res = std::format(WIDEN("{:*<11}"), makeRange(v1)); + VERIFY( res == WIDEN("[1, 2, 3]**") ); + + res = std::format(WIDEN("{:->12}"), makeRange(v1)); + VERIFY( res == WIDEN("---[1, 2, 3]") ); + + res = std::format(WIDEN("{:=^13}"), makeRange(v1)); + VERIFY( res == WIDEN("==[1, 2, 3]==") ); + + res = std::format(WIDEN("{:=^13n}"), makeRange(v1)); + VERIFY( res == WIDEN("===1, 2, 3===") ); + + res = std::format(WIDEN("{::#x}"), makeRange(v1)); + VERIFY( res == WIDEN("[0x1, 0x2, 0x3]") ); + + res = std::format(WIDEN("{:|^25n:#05x}"), makeRange(v1)); + VERIFY( res == WIDEN("|||0x001, 0x002, 0x003|||") ); + + // ':' is start of the format string for element + res = std::format(WIDEN("{::^+04}"), makeRange(v1)); + VERIFY( res == WIDEN("[ +1 , +2 , +3 ]") ); + + size = std::formatted_size(WIDEN("{:}"), makeRange(v1)); + VERIFY( size == Sv(WIDEN("[1, 2, 3]")).size() ); + + size = std::formatted_size(WIDEN("{:3}"), makeRange(v1)); + VERIFY( size == Sv(WIDEN("[1, 2, 3]")).size() ); + + size = std::formatted_size(WIDEN("{:10}"), makeRange(v1)); + VERIFY( size == 10 ); + + size = std::formatted_size(WIDEN("{:|^25n:#05x}"), makeRange(v1)); + VERIFY( size == 25 ); +} + +template<typename Range> +void test_output_c() +{ + test_output<char, Range>(); + test_output<wchar_t, Range>(); +} + +void +test_outputs() +{ + using namespace __gnu_test; + test_output_c<std::vector<int>>(); + test_output_c<std::list<int>>(); + test_output_c<std::span<int>>(); + + test_output_c<test_forward_range<int>>(); + test_output_c<test_input_range<int>>(); + test_output_c<test_range_nocopy<int, input_iterator_wrapper_nocopy>>(); + + test_output_c<std::span<const int>>(); + test_output_c<test_forward_range<const int>>(); +} + +void +test_nested() +{ + std::vector<std::vector<int>> v + { + {1, 2}, + {11, 12} + }; + + std::string res = std::format("{}", v); + VERIFY( res == "[[1, 2], [11, 12]]" ); + + res = std::format("{:+^18:n:02}", v); + VERIFY( res == "+[01, 02, 11, 12]+" ); +} + +int main() +{ + test_format_string(); + test_outputs(); + test_nested(); +} diff --git a/libstdc++-v3/testsuite/std/format/ranges/string.cc b/libstdc++-v3/testsuite/std/format/ranges/string.cc new file mode 100644 index 0000000..7f59f59 --- /dev/null +++ b/libstdc++-v3/testsuite/std/format/ranges/string.cc @@ -0,0 +1,226 @@ +// { dg-do run { target c++23 } } + +#include <format> +#include <span> +#include <testsuite_hooks.h> +#include <testsuite_iterators.h> +#include <vector> + +template<typename... Args> +bool +is_format_string_for(const char* str, Args&&... args) +{ + try { + (void) std::vformat(str, std::make_format_args(args...)); + return true; + } catch (const std::format_error&) { + return false; + } +} + +template<typename... Args> +bool +is_format_string_for(const wchar_t* str, Args&&... args) +{ + try { + (void) std::vformat(str, std::make_wformat_args(args...)); + return true; + } catch (const std::format_error&) { + return false; + } +} + +template<typename Rg, typename CharT> +bool is_range_formatter_spec_for(CharT const* spec, Rg&& rg) +{ + using V = std::remove_cvref_t<std::ranges::range_reference_t<Rg>>; + std::range_formatter<V, CharT> fmt; + std::basic_format_parse_context<CharT> pc(spec); + try { + (void)fmt.parse(pc); + return true; + } catch (const std::format_error&) { + return false; + } +} + +#define WIDEN_(C, S) ::std::__format::_Widen<C>(S, L##S) +#define WIDEN(S) WIDEN_(_CharT, S) + +void +test_format_string() +{ + // only CharT value types are supported + VERIFY( !is_range_formatter_spec_for(L"s", std::vector<char>()) ); + VERIFY( !is_format_string_for(L"{:s}", std::vector<char>()) ); + VERIFY( !is_range_formatter_spec_for(L"s", std::vector<char>()) ); + VERIFY( !is_format_string_for(L"{:s}", std::vector<char>()) ); + VERIFY( !is_range_formatter_spec_for("s", std::vector<int>()) ); + VERIFY( !is_format_string_for("{:s}", std::vector<int>()) ); + + // invalid format stringss + VERIFY( !is_range_formatter_spec_for("?", std::vector<char>()) ); + VERIFY( !is_format_string_for("{:?}", std::vector<char>()) ); + VERIFY( !is_range_formatter_spec_for("ns", std::vector<char>()) ); + VERIFY( !is_format_string_for("{:ns}", std::vector<char>()) ); + VERIFY( !is_range_formatter_spec_for("s:", std::vector<char>()) ); + VERIFY( !is_format_string_for("{:s:}", std::vector<char>()) ); + + // precision is not supported, even for s + VERIFY( !is_range_formatter_spec_for(".10s", std::vector<char>()) ); + VERIFY( !is_format_string_for("{:.10s}", std::vector<char>()) ); + VERIFY( !is_format_string_for("{:.{}s}", std::vector<char>(), 10) ); + + // width needs to be integer type + VERIFY( !is_format_string_for("{:{}s}", std::vector<char>(), 1.0f) ); +} + +template<typename Range> +void test_output() +{ + using _CharT = std::ranges::range_value_t<Range>; + auto makeRange = [](std::basic_string<_CharT>& s) { + return Range(s.data(), s.data() + s.size()); + }; + std::basic_string<_CharT> res; + size_t size = 0; + + std::basic_string<_CharT> s1 = WIDEN("abcd"); + res = std::format(WIDEN("{}"), makeRange(s1)); + VERIFY( res == WIDEN("['a', 'b', 'c', 'd']") ); + + res = std::format(WIDEN("{::}"), makeRange(s1)); + VERIFY( res == WIDEN("[a, b, c, d]") ); + + res = std::format(WIDEN("{:s}"), makeRange(s1)); + VERIFY( res == WIDEN("abcd") ); + + res = std::format(WIDEN("{:?s}"), makeRange(s1)); + VERIFY( res == WIDEN(R"("abcd")") ); + + res = std::format(WIDEN("{:3s}"), makeRange(s1)); + VERIFY( res == WIDEN("abcd") ); + + res = std::format(WIDEN("{:7s}"), makeRange(s1)); + VERIFY( res == WIDEN("abcd ") ); + + res = std::format(WIDEN("{:{}s}"), makeRange(s1), 7); + VERIFY( res == WIDEN("abcd ") ); + + res = std::format(WIDEN("{1:{0}s}"), 7, makeRange(s1)); + VERIFY( res == WIDEN("abcd ") ); + + res = std::format(WIDEN("{:*>6s}"), makeRange(s1)); + VERIFY( res == WIDEN("**abcd") ); + + res = std::format(WIDEN("{:-<5s}"), makeRange(s1)); + VERIFY( res == WIDEN("abcd-") ); + + res = std::format(WIDEN("{:=^8s}"), makeRange(s1)); + VERIFY( res == WIDEN("==abcd==") ); + + std::basic_string<_CharT> s2(512, static_cast<_CharT>('a')); + res = std::format(WIDEN("{:=^8s}"), makeRange(s2)); + VERIFY( res == s2 ); + + size = std::formatted_size(WIDEN("{:s}"), makeRange(s1)); + VERIFY( size == 4 ); + + size = std::formatted_size(WIDEN("{:3s}"), makeRange(s1)); + VERIFY( size == 4 ); + + size = std::formatted_size(WIDEN("{:7s}"), makeRange(s1)); + VERIFY( size == 7 ); + + size = std::formatted_size(WIDEN("{:s}"), makeRange(s2)); + VERIFY( size == 512 ); +} + +template<typename CharT> +struct cstr_view +{ + cstr_view() = default; + explicit cstr_view(CharT* f, CharT* l) + : ptr(f) + { VERIFY(!*l); } + + struct sentinel + { + friend constexpr + bool operator==(CharT const* ptr, sentinel) noexcept + { return !*ptr; } + }; + + constexpr + CharT* begin() const noexcept + { return ptr; }; + static constexpr + sentinel end() noexcept + { return {}; } + +private: + CharT* ptr = ""; +}; + +template<typename CharT> +void +test_outputs() +{ + using namespace __gnu_test; + test_output<std::vector<CharT>>(); + test_output<std::span<CharT>>(); + test_output<cstr_view<CharT>>(); + + test_output<test_forward_range<CharT>>(); + test_output<test_forward_sized_range<CharT>>(); + + test_output<test_input_range<CharT>>(); + test_output<test_input_sized_range<CharT>>(); + + test_output<test_range_nocopy<CharT, input_iterator_wrapper_nocopy>>(); + test_output<test_sized_range<CharT, input_iterator_wrapper_nocopy>>(); + + test_output<std::span<const CharT>>(); + test_output<cstr_view<const CharT>>(); + test_output<test_forward_range<const CharT>>(); + + static_assert(!std::formattable<std::span<volatile CharT>, CharT>); + static_assert(!std::formattable<std::span<const volatile CharT>, CharT>); +} + +void +test_nested() +{ + std::string_view s1 = "str1"; + std::string_view s2 = "str2"; + + std::vector<std::string> vs; + vs.emplace_back(s1); + vs.emplace_back(s2); + + VERIFY( std::format("{}", vs) == R"(["str1", "str2"])" ); + VERIFY( std::format("{:}", vs) == R"(["str1", "str2"])" ); + VERIFY( std::format("{::?}", vs) == R"(["str1", "str2"])" ); + VERIFY( std::format("{::}", vs) == R"([str1, str2])" ); + + std::vector<std::vector<char>> vv; + vv.emplace_back(s1.begin(), s1.end()); + vv.emplace_back(s2.begin(), s2.end()); + std::string_view escaped = R"([['s', 't', 'r', '1'], ['s', 't', 'r', '2']])"; + + VERIFY( std::format("{}", vv) == escaped ); + VERIFY( std::format("{:}", vv) == escaped ); + VERIFY( std::format("{::}", vv) == escaped ); + VERIFY( std::format("{:::?}", vv) == escaped ); + VERIFY( std::format("{:::}", vv) == R"([[s, t, r, 1], [s, t, r, 2]])" ); + VERIFY( std::format("{::s}", vv) == R"([str1, str2])" ); + VERIFY( std::format("{::?s}", vv) == R"(["str1", "str2"])" ); +} + +int main() +{ + test_format_string(); + test_outputs<char>(); + test_outputs<wchar_t>(); + test_nested(); +} diff --git a/libstdc++-v3/testsuite/util/testsuite_iterators.h b/libstdc++-v3/testsuite/util/testsuite_iterators.h index 0df6dcc..20539ec 100644 --- a/libstdc++-v3/testsuite/util/testsuite_iterators.h +++ b/libstdc++-v3/testsuite/util/testsuite_iterators.h @@ -610,12 +610,10 @@ namespace __gnu_test test_container(T* _first, T* _last) : bounds(_first, _last) { } -#if __cplusplus >= 201103L template<std::size_t N> explicit - test_container(T (&arr)[N]) : test_container(arr, arr+N) + test_container(T (&arr)[N]) : bounds(arr, arr+N) { } -#endif ItType<T> it(int pos) |