aboutsummaryrefslogtreecommitdiff
path: root/libgfortran
diff options
context:
space:
mode:
authorI'm not telling you my name, idiot <git_is@stupid.com>2020-09-23 18:14:20 +0200
committerI'm not telling you my name, idiot <git_is@stupid.com>2020-09-23 18:14:20 +0200
commitbef0a39f2f8e87780f990d12fa71f5ed0039267a (patch)
treece8138e924d9bfff3ce0b63aa4c29397fbfaa1a5 /libgfortran
parent9044db88d634c631920eaa9f66c0275adf18fdf5 (diff)
downloadgcc-bef0a39f2f8e87780f990d12fa71f5ed0039267a.zip
gcc-bef0a39f2f8e87780f990d12fa71f5ed0039267a.tar.gz
gcc-bef0a39f2f8e87780f990d12fa71f5ed0039267a.tar.bz2
Initial commit of coarray_native branch.
Diffstat (limited to 'libgfortran')
-rw-r--r--libgfortran/Makefile.am55
-rw-r--r--libgfortran/Makefile.in311
-rw-r--r--libgfortran/config.h.in15
-rwxr-xr-xlibgfortran/configure198
-rw-r--r--libgfortran/configure.ac37
-rw-r--r--libgfortran/generated/nca_minmax_i1.c653
-rw-r--r--libgfortran/generated/nca_minmax_i16.c653
-rw-r--r--libgfortran/generated/nca_minmax_i2.c653
-rw-r--r--libgfortran/generated/nca_minmax_i4.c653
-rw-r--r--libgfortran/generated/nca_minmax_i8.c653
-rw-r--r--libgfortran/generated/nca_minmax_r10.c653
-rw-r--r--libgfortran/generated/nca_minmax_r16.c653
-rw-r--r--libgfortran/generated/nca_minmax_r4.c653
-rw-r--r--libgfortran/generated/nca_minmax_r8.c653
-rw-r--r--libgfortran/generated/nca_minmax_s1.c494
-rw-r--r--libgfortran/generated/nca_minmax_s4.c494
-rw-r--r--libgfortran/libgfortran.h1
-rw-r--r--libgfortran/m4/nca-minmax-s.m4289
-rw-r--r--libgfortran/m4/nca_minmax.m4259
-rw-r--r--libgfortran/nca/.tags275
-rw-r--r--libgfortran/nca/alloc.c152
-rw-r--r--libgfortran/nca/alloc.h67
-rw-r--r--libgfortran/nca/allocator.c90
-rw-r--r--libgfortran/nca/allocator.h21
-rw-r--r--libgfortran/nca/coarraynative.c145
-rw-r--r--libgfortran/nca/collective_inline.h42
-rw-r--r--libgfortran/nca/collective_subroutine.c416
-rw-r--r--libgfortran/nca/collective_subroutine.h44
-rw-r--r--libgfortran/nca/hashmap.c447
-rw-r--r--libgfortran/nca/hashmap.h70
-rw-r--r--libgfortran/nca/libcoarraynative.h103
-rw-r--r--libgfortran/nca/lock.h37
-rw-r--r--libgfortran/nca/shared_memory.c221
-rw-r--r--libgfortran/nca/shared_memory.h78
-rw-r--r--libgfortran/nca/sync.c156
-rw-r--r--libgfortran/nca/sync.h56
-rw-r--r--libgfortran/nca/util.c197
-rw-r--r--libgfortran/nca/util.h86
-rw-r--r--libgfortran/nca/wrapper.c258
39 files changed, 10958 insertions, 33 deletions
diff --git a/libgfortran/Makefile.am b/libgfortran/Makefile.am
index 61bf05d..683fd04 100644
--- a/libgfortran/Makefile.am
+++ b/libgfortran/Makefile.am
@@ -36,14 +36,21 @@ gfor_cdir = $(libdir)/gcc/$(target_alias)/$(gcc_version)/include
LTLDFLAGS = $(shell $(SHELL) $(top_srcdir)/../libtool-ldflags $(LDFLAGS)) \
$(lt_host_flags)
+if LIBGFOR_NATIVE_COARRAY
+COARRAY_LIBS=$(PTHREAD_LIBS) $(RT_LIBS)
+else
+COARRAY_LIBS=""
+endif
+
toolexeclib_LTLIBRARIES = libgfortran.la
toolexeclib_DATA = libgfortran.spec
libgfortran_la_LINK = $(LINK) $(libgfortran_la_LDFLAGS)
libgfortran_la_LDFLAGS = -version-info `grep -v '^\#' $(srcdir)/libtool-version` \
$(LTLDFLAGS) $(LIBQUADLIB) ../libbacktrace/libbacktrace.la \
$(HWCAP_LDFLAGS) \
- -lm $(extra_ldflags_libgfortran) \
+ -lm $(COARRAY_LIBS) $(extra_ldflags_libgfortran) \
$(version_arg) -Wc,-shared-libgcc
+
libgfortran_la_DEPENDENCIES = $(version_dep) libgfortran.spec $(LIBQUADLIB_DEP)
cafexeclib_LTLIBRARIES = libcaf_single.la
@@ -53,6 +60,37 @@ libcaf_single_la_LDFLAGS = -static
libcaf_single_la_DEPENDENCIES = caf/libcaf.h
libcaf_single_la_LINK = $(LINK) $(libcaf_single_la_LDFLAGS)
+i_nca_minmax_c = \
+ $(srcdir)/generated/nca_minmax_i1.c \
+ $(srcdir)/generated/nca_minmax_i2.c \
+ $(srcdir)/generated/nca_minmax_i4.c \
+ $(srcdir)/generated/nca_minmax_i8.c \
+ $(srcdir)/generated/nca_minmax_i16.c \
+ $(srcdir)/generated/nca_minmax_r4.c \
+ $(srcdir)/generated/nca_minmax_r8.c \
+ $(srcdir)/generated/nca_minmax_r10.c \
+ $(srcdir)/generated/nca_minmax_r16.c
+
+i_nca_minmax_s_c = \
+ $(srcdir)/generated/nca_minmax_s1.c \
+ $(srcdir)/generated/nca_minmax_s4.c
+
+if LIBGFOR_NATIVE_COARRAY
+
+mylib_LTLIBRARIES = libnca.la
+mylibdir = $(libdir)/gcc/$(target_alias)/$(gcc_version)$(MULTISUBDIR)
+libnca_la_SOURCES = nca/alloc.c nca/allocator.c nca/coarraynative.c \
+ nca/hashmap.c \
+ nca/sync.c nca/util.c nca/wrapper.c nca/collective_subroutine.c \
+ nca/shared_memory.c \
+ $(i_nca_minmax_c) $(i_nca_minmax_s_c)
+libnca_la_DEPENDENCIES = nca/alloc.h nca/allocator.h nca/hashmap.h
+ nca/libcoarraynative.h nca/sync.h shared_memory.h \
+ nca/util.h nca/lock.h nca/collective_subroutine.h\
+ nca/collective_inline.h
+libnca_la_LINK = $(LINK) $(libnca_la_LDFLAGS)
+endif
+
if IEEE_SUPPORT
fincludedir = $(libdir)/gcc/$(target_alias)/$(gcc_version)$(MULTISUBDIR)/finclude
nodist_finclude_HEADERS = ieee_arithmetic.mod ieee_exceptions.mod ieee_features.mod
@@ -83,6 +121,10 @@ if LIBGFOR_MINIMAL
AM_CFLAGS += -DLIBGFOR_MINIMAL
endif
+if LIBGFOR_NATIVE_COARRAY
+AM_CFLAGS += $(PTHREAD_CFLAGS)
+endif
+
gfor_io_src= \
io/size_from_kind.c
@@ -1231,9 +1273,20 @@ $(gfor_built_specific2_src): m4/specific2.m4 m4/head.m4
$(gfor_misc_specifics): m4/misc_specifics.m4 m4/head.m4
$(M4) -Dfile=$@ -I$(srcdir)/m4 misc_specifics.m4 > $@
+
+
+if LIBGFOR_NATIVE_COARRAY
+$(i_nca_minmax_c): m4/nca_minmax.m4 $(I_M4_DEPS)
+ $(M4) -Dfile=$@ -I$(srcdir)/m4 nca_minmax.m4 > $@
+
+$(i_nca_minmax_s_c): m4/nca-minmax-s.m4 $(I_M4_DEPS)
+ $(M4) -Dfile=$@ -I$(srcdir)/m4 nca-minmax-s.m4 > $@
+endif
+
## end of maintainer mode only rules
endif
+
EXTRA_DIST = $(m4_files)
# target overrides
diff --git a/libgfortran/Makefile.in b/libgfortran/Makefile.in
index 3d043aa..2379fe3 100644
--- a/libgfortran/Makefile.in
+++ b/libgfortran/Makefile.in
@@ -92,7 +92,8 @@ build_triplet = @build@
host_triplet = @host@
target_triplet = @target@
@LIBGFOR_MINIMAL_TRUE@am__append_1 = -DLIBGFOR_MINIMAL
-@LIBGFOR_MINIMAL_FALSE@am__append_2 = \
+@LIBGFOR_NATIVE_COARRAY_TRUE@am__append_2 = $(PTHREAD_CFLAGS)
+@LIBGFOR_MINIMAL_FALSE@am__append_3 = \
@LIBGFOR_MINIMAL_FALSE@io/close.c \
@LIBGFOR_MINIMAL_FALSE@io/file_pos.c \
@LIBGFOR_MINIMAL_FALSE@io/format.c \
@@ -110,7 +111,7 @@ target_triplet = @target@
@LIBGFOR_MINIMAL_FALSE@io/fbuf.c \
@LIBGFOR_MINIMAL_FALSE@io/async.c
-@LIBGFOR_MINIMAL_FALSE@am__append_3 = \
+@LIBGFOR_MINIMAL_FALSE@am__append_4 = \
@LIBGFOR_MINIMAL_FALSE@intrinsics/access.c \
@LIBGFOR_MINIMAL_FALSE@intrinsics/c99_functions.c \
@LIBGFOR_MINIMAL_FALSE@intrinsics/chdir.c \
@@ -143,9 +144,9 @@ target_triplet = @target@
@LIBGFOR_MINIMAL_FALSE@intrinsics/umask.c \
@LIBGFOR_MINIMAL_FALSE@intrinsics/unlink.c
-@IEEE_SUPPORT_TRUE@am__append_4 = ieee/ieee_helper.c
-@LIBGFOR_MINIMAL_TRUE@am__append_5 = runtime/minimal.c
-@LIBGFOR_MINIMAL_FALSE@am__append_6 = \
+@IEEE_SUPPORT_TRUE@am__append_5 = ieee/ieee_helper.c
+@LIBGFOR_MINIMAL_TRUE@am__append_6 = runtime/minimal.c
+@LIBGFOR_MINIMAL_FALSE@am__append_7 = \
@LIBGFOR_MINIMAL_FALSE@runtime/backtrace.c \
@LIBGFOR_MINIMAL_FALSE@runtime/convert_char.c \
@LIBGFOR_MINIMAL_FALSE@runtime/environ.c \
@@ -157,7 +158,7 @@ target_triplet = @target@
# dummy sources for libtool
-@onestep_TRUE@am__append_7 = libgfortran_c.c libgfortran_f.f90
+@onestep_TRUE@am__append_8 = libgfortran_c.c libgfortran_f.f90
subdir = .
ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
am__aclocal_m4_deps = $(top_srcdir)/../config/depstand.m4 \
@@ -214,10 +215,11 @@ am__uninstall_files_from_dir = { \
|| { echo " ( cd '$$dir' && rm -f" $$files ")"; \
$(am__cd) "$$dir" && rm -f $$files; }; \
}
-am__installdirs = "$(DESTDIR)$(cafexeclibdir)" \
+am__installdirs = "$(DESTDIR)$(cafexeclibdir)" "$(DESTDIR)$(mylibdir)" \
"$(DESTDIR)$(toolexeclibdir)" "$(DESTDIR)$(toolexeclibdir)" \
"$(DESTDIR)$(gfor_cdir)" "$(DESTDIR)$(fincludedir)"
-LTLIBRARIES = $(cafexeclib_LTLIBRARIES) $(toolexeclib_LTLIBRARIES)
+LTLIBRARIES = $(cafexeclib_LTLIBRARIES) $(mylib_LTLIBRARIES) \
+ $(toolexeclib_LTLIBRARIES)
libcaf_single_la_LIBADD =
am_libcaf_single_la_OBJECTS = single.lo
libcaf_single_la_OBJECTS = $(am_libcaf_single_la_OBJECTS)
@@ -467,6 +469,21 @@ am__objects_65 = $(am__objects_3) $(am__objects_53) $(am__objects_55) \
@onestep_FALSE@am_libgfortran_la_OBJECTS = $(am__objects_65)
@onestep_TRUE@am_libgfortran_la_OBJECTS = libgfortran_c.lo
libgfortran_la_OBJECTS = $(am_libgfortran_la_OBJECTS)
+libnca_la_LIBADD =
+am__objects_66 = nca_minmax_i1.lo nca_minmax_i2.lo nca_minmax_i4.lo \
+ nca_minmax_i8.lo nca_minmax_i16.lo nca_minmax_r4.lo \
+ nca_minmax_r8.lo nca_minmax_r10.lo nca_minmax_r16.lo
+am__objects_67 = nca_minmax_s1.lo nca_minmax_s4.lo
+@LIBGFOR_NATIVE_COARRAY_TRUE@am_libnca_la_OBJECTS = alloc.lo \
+@LIBGFOR_NATIVE_COARRAY_TRUE@ allocator.lo coarraynative.lo \
+@LIBGFOR_NATIVE_COARRAY_TRUE@ hashmap.lo sync.lo util.lo \
+@LIBGFOR_NATIVE_COARRAY_TRUE@ wrapper.lo \
+@LIBGFOR_NATIVE_COARRAY_TRUE@ collective_subroutine.lo \
+@LIBGFOR_NATIVE_COARRAY_TRUE@ shared_memory.lo \
+@LIBGFOR_NATIVE_COARRAY_TRUE@ $(am__objects_66) \
+@LIBGFOR_NATIVE_COARRAY_TRUE@ $(am__objects_67)
+libnca_la_OBJECTS = $(am_libnca_la_OBJECTS)
+@LIBGFOR_NATIVE_COARRAY_TRUE@am_libnca_la_rpath = -rpath $(mylibdir)
AM_V_P = $(am__v_P_@AM_V@)
am__v_P_ = $(am__v_P_@AM_DEFAULT_V@)
am__v_P_0 = false
@@ -530,7 +547,8 @@ AM_V_FC = $(am__v_FC_@AM_V@)
am__v_FC_ = $(am__v_FC_@AM_DEFAULT_V@)
am__v_FC_0 = @echo " FC " $@;
am__v_FC_1 =
-SOURCES = $(libcaf_single_la_SOURCES) $(libgfortran_la_SOURCES)
+SOURCES = $(libcaf_single_la_SOURCES) $(libgfortran_la_SOURCES) \
+ $(libnca_la_SOURCES)
am__can_run_installinfo = \
case $$AM_UPDATE_INFO_DIR in \
n|no|NO) false;; \
@@ -569,7 +587,7 @@ AMTAR = @AMTAR@
# Some targets require additional compiler options for IEEE compatibility.
AM_CFLAGS = @AM_CFLAGS@ -fcx-fortran-rules $(SECTION_FLAGS) \
- $(IEEE_FLAGS) $(am__append_1)
+ $(IEEE_FLAGS) $(am__append_1) $(am__append_2)
AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@
AM_FCFLAGS = @AM_FCFLAGS@ $(IEEE_FLAGS)
AR = @AR@
@@ -637,7 +655,10 @@ PACKAGE_TARNAME = @PACKAGE_TARNAME@
PACKAGE_URL = @PACKAGE_URL@
PACKAGE_VERSION = @PACKAGE_VERSION@
PATH_SEPARATOR = @PATH_SEPARATOR@
+PTHREAD_CFLAGS = @PTHREAD_CFLAGS@
+PTHREAD_LIBS = @PTHREAD_LIBS@
RANLIB = @RANLIB@
+RT_LIBS = @RT_LIBS@
SECTION_FLAGS = @SECTION_FLAGS@
SED = @SED@
SET_MAKE = @SET_MAKE@
@@ -698,6 +719,7 @@ pdfdir = @pdfdir@
prefix = @prefix@
program_transform_name = @program_transform_name@
psdir = @psdir@
+runstatedir = @runstatedir@
sbindir = @sbindir@
sharedstatedir = @sharedstatedir@
srcdir = @srcdir@
@@ -728,13 +750,15 @@ gfor_cdir = $(libdir)/gcc/$(target_alias)/$(gcc_version)/include
LTLDFLAGS = $(shell $(SHELL) $(top_srcdir)/../libtool-ldflags $(LDFLAGS)) \
$(lt_host_flags)
+@LIBGFOR_NATIVE_COARRAY_FALSE@COARRAY_LIBS = ""
+@LIBGFOR_NATIVE_COARRAY_TRUE@COARRAY_LIBS = $(PTHREAD_LIBS) $(RT_LIBS)
toolexeclib_LTLIBRARIES = libgfortran.la
toolexeclib_DATA = libgfortran.spec
libgfortran_la_LINK = $(LINK) $(libgfortran_la_LDFLAGS)
libgfortran_la_LDFLAGS = -version-info `grep -v '^\#' $(srcdir)/libtool-version` \
$(LTLDFLAGS) $(LIBQUADLIB) ../libbacktrace/libbacktrace.la \
$(HWCAP_LDFLAGS) \
- -lm $(extra_ldflags_libgfortran) \
+ -lm $(COARRAY_LIBS) $(extra_ldflags_libgfortran) \
$(version_arg) -Wc,-shared-libgcc
libgfortran_la_DEPENDENCIES = $(version_dep) libgfortran.spec $(LIBQUADLIB_DEP)
@@ -744,6 +768,31 @@ libcaf_single_la_SOURCES = caf/single.c
libcaf_single_la_LDFLAGS = -static
libcaf_single_la_DEPENDENCIES = caf/libcaf.h
libcaf_single_la_LINK = $(LINK) $(libcaf_single_la_LDFLAGS)
+i_nca_minmax_c = \
+ $(srcdir)/generated/nca_minmax_i1.c \
+ $(srcdir)/generated/nca_minmax_i2.c \
+ $(srcdir)/generated/nca_minmax_i4.c \
+ $(srcdir)/generated/nca_minmax_i8.c \
+ $(srcdir)/generated/nca_minmax_i16.c \
+ $(srcdir)/generated/nca_minmax_r4.c \
+ $(srcdir)/generated/nca_minmax_r8.c \
+ $(srcdir)/generated/nca_minmax_r10.c \
+ $(srcdir)/generated/nca_minmax_r16.c
+
+i_nca_minmax_s_c = \
+ $(srcdir)/generated/nca_minmax_s1.c \
+ $(srcdir)/generated/nca_minmax_s4.c
+
+@LIBGFOR_NATIVE_COARRAY_TRUE@mylib_LTLIBRARIES = libnca.la
+@LIBGFOR_NATIVE_COARRAY_TRUE@mylibdir = $(libdir)/gcc/$(target_alias)/$(gcc_version)$(MULTISUBDIR)
+@LIBGFOR_NATIVE_COARRAY_TRUE@libnca_la_SOURCES = nca/alloc.c nca/allocator.c nca/coarraynative.c \
+@LIBGFOR_NATIVE_COARRAY_TRUE@ nca/hashmap.c \
+@LIBGFOR_NATIVE_COARRAY_TRUE@ nca/sync.c nca/util.c nca/wrapper.c nca/collective_subroutine.c \
+@LIBGFOR_NATIVE_COARRAY_TRUE@ nca/shared_memory.c \
+@LIBGFOR_NATIVE_COARRAY_TRUE@ $(i_nca_minmax_c) $(i_nca_minmax_s_c)
+
+@LIBGFOR_NATIVE_COARRAY_TRUE@libnca_la_DEPENDENCIES = nca/alloc.h nca/allocator.h nca/hashmap.h
+@LIBGFOR_NATIVE_COARRAY_TRUE@libnca_la_LINK = $(LINK) $(libnca_la_LDFLAGS)
@IEEE_SUPPORT_TRUE@fincludedir = $(libdir)/gcc/$(target_alias)/$(gcc_version)$(MULTISUBDIR)/finclude
@IEEE_SUPPORT_TRUE@nodist_finclude_HEADERS = ieee_arithmetic.mod ieee_exceptions.mod ieee_features.mod
AM_CPPFLAGS = -iquote$(srcdir)/io -I$(srcdir)/$(MULTISRCTOP)../gcc \
@@ -755,7 +804,7 @@ AM_CPPFLAGS = -iquote$(srcdir)/io -I$(srcdir)/$(MULTISRCTOP)../gcc \
-I$(MULTIBUILDTOP)../libbacktrace \
-I../libbacktrace
-gfor_io_src = io/size_from_kind.c $(am__append_2)
+gfor_io_src = io/size_from_kind.c $(am__append_3)
gfor_io_headers = \
io/io.h \
io/fbuf.h \
@@ -777,7 +826,7 @@ gfor_helper_src = intrinsics/associated.c intrinsics/abort.c \
intrinsics/selected_int_kind.f90 \
intrinsics/selected_real_kind.f90 intrinsics/trigd.c \
intrinsics/unpack_generic.c runtime/in_pack_generic.c \
- runtime/in_unpack_generic.c $(am__append_3) $(am__append_4)
+ runtime/in_unpack_generic.c $(am__append_4) $(am__append_5)
@IEEE_SUPPORT_FALSE@gfor_ieee_src =
@IEEE_SUPPORT_TRUE@gfor_ieee_src = \
@IEEE_SUPPORT_TRUE@ieee/ieee_arithmetic.F90 \
@@ -785,8 +834,8 @@ gfor_helper_src = intrinsics/associated.c intrinsics/abort.c \
@IEEE_SUPPORT_TRUE@ieee/ieee_features.F90
gfor_src = runtime/bounds.c runtime/compile_options.c runtime/memory.c \
- runtime/string.c runtime/select.c $(am__append_5) \
- $(am__append_6)
+ runtime/string.c runtime/select.c $(am__append_6) \
+ $(am__append_7)
i_all_c = \
$(srcdir)/generated/all_l1.c \
$(srcdir)/generated/all_l2.c \
@@ -1540,7 +1589,7 @@ intrinsics/random_init.f90
BUILT_SOURCES = $(gfor_built_src) $(gfor_built_specific_src) \
$(gfor_built_specific2_src) $(gfor_misc_specifics) \
- $(am__append_7)
+ $(am__append_8)
prereq_SRC = $(gfor_src) $(gfor_built_src) $(gfor_io_src) \
$(gfor_helper_src) $(gfor_ieee_src) $(gfor_io_headers) $(gfor_specific_src)
@@ -1668,6 +1717,41 @@ clean-cafexeclibLTLIBRARIES:
rm -f $${locs}; \
}
+install-mylibLTLIBRARIES: $(mylib_LTLIBRARIES)
+ @$(NORMAL_INSTALL)
+ @list='$(mylib_LTLIBRARIES)'; test -n "$(mylibdir)" || list=; \
+ list2=; for p in $$list; do \
+ if test -f $$p; then \
+ list2="$$list2 $$p"; \
+ else :; fi; \
+ done; \
+ test -z "$$list2" || { \
+ echo " $(MKDIR_P) '$(DESTDIR)$(mylibdir)'"; \
+ $(MKDIR_P) "$(DESTDIR)$(mylibdir)" || exit 1; \
+ echo " $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL) $(INSTALL_STRIP_FLAG) $$list2 '$(DESTDIR)$(mylibdir)'"; \
+ $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL) $(INSTALL_STRIP_FLAG) $$list2 "$(DESTDIR)$(mylibdir)"; \
+ }
+
+uninstall-mylibLTLIBRARIES:
+ @$(NORMAL_UNINSTALL)
+ @list='$(mylib_LTLIBRARIES)'; test -n "$(mylibdir)" || list=; \
+ for p in $$list; do \
+ $(am__strip_dir) \
+ echo " $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=uninstall rm -f '$(DESTDIR)$(mylibdir)/$$f'"; \
+ $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=uninstall rm -f "$(DESTDIR)$(mylibdir)/$$f"; \
+ done
+
+clean-mylibLTLIBRARIES:
+ -test -z "$(mylib_LTLIBRARIES)" || rm -f $(mylib_LTLIBRARIES)
+ @list='$(mylib_LTLIBRARIES)'; \
+ locs=`for p in $$list; do echo $$p; done | \
+ sed 's|^[^/]*$$|.|; s|/[^/]*$$||; s|$$|/so_locations|' | \
+ sort -u`; \
+ test -z "$$locs" || { \
+ echo rm -f $${locs}; \
+ rm -f $${locs}; \
+ }
+
install-toolexeclibLTLIBRARIES: $(toolexeclib_LTLIBRARIES)
@$(NORMAL_INSTALL)
@list='$(toolexeclib_LTLIBRARIES)'; test -n "$(toolexeclibdir)" || list=; \
@@ -1709,6 +1793,9 @@ libcaf_single.la: $(libcaf_single_la_OBJECTS) $(libcaf_single_la_DEPENDENCIES) $
libgfortran.la: $(libgfortran_la_OBJECTS) $(libgfortran_la_DEPENDENCIES) $(EXTRA_libgfortran_la_DEPENDENCIES)
$(AM_V_GEN)$(libgfortran_la_LINK) -rpath $(toolexeclibdir) $(libgfortran_la_OBJECTS) $(libgfortran_la_LIBADD) $(LIBS)
+libnca.la: $(libnca_la_OBJECTS) $(libnca_la_DEPENDENCIES) $(EXTRA_libnca_la_DEPENDENCIES)
+ $(AM_V_GEN)$(libnca_la_LINK) $(am_libnca_la_rpath) $(libnca_la_OBJECTS) $(libnca_la_LIBADD) $(LIBS)
+
mostlyclean-compile:
-rm -f *.$(OBJEXT)
@@ -1723,6 +1810,8 @@ distclean-compile:
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/all_l2.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/all_l4.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/all_l8.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/alloc.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/allocator.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/any_l1.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/any_l16.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/any_l2.Plo@am__quote@
@@ -1742,6 +1831,8 @@ distclean-compile:
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/chmod.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/clock.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/close.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/coarraynative.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/collective_subroutine.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/compile_options.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/convert_char.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/count_16_l.Plo@am__quote@
@@ -1866,6 +1957,7 @@ distclean-compile:
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/getXid.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/getcwd.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/getlog.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/hashmap.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/hostnm.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/iall_i1.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/iall_i16.Plo@am__quote@
@@ -2125,6 +2217,17 @@ distclean-compile:
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/minval_r8.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/move_alloc.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/mvbits.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/nca_minmax_i1.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/nca_minmax_i16.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/nca_minmax_i2.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/nca_minmax_i4.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/nca_minmax_i8.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/nca_minmax_r10.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/nca_minmax_r16.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/nca_minmax_r4.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/nca_minmax_r8.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/nca_minmax_s1.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/nca_minmax_s4.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/norm2_r10.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/norm2_r16.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/norm2_r4.Plo@am__quote@
@@ -2218,6 +2321,7 @@ distclean-compile:
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/shape_i2.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/shape_i4.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/shape_i8.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/shared_memory.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/signal.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/single.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/size.Plo@am__quote@
@@ -2255,6 +2359,7 @@ distclean-compile:
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/sum_r4.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/sum_r8.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/symlnk.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/sync.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/system.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/system_clock.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/time.Plo@am__quote@
@@ -2279,6 +2384,8 @@ distclean-compile:
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/unpack_r16.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/unpack_r4.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/unpack_r8.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/util.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/wrapper.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/write.Plo@am__quote@
.F90.o:
@@ -6679,6 +6786,146 @@ ieee_helper.lo: ieee/ieee_helper.c
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o ieee_helper.lo `test -f 'ieee/ieee_helper.c' || echo '$(srcdir)/'`ieee/ieee_helper.c
+alloc.lo: nca/alloc.c
+@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT alloc.lo -MD -MP -MF $(DEPDIR)/alloc.Tpo -c -o alloc.lo `test -f 'nca/alloc.c' || echo '$(srcdir)/'`nca/alloc.c
+@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/alloc.Tpo $(DEPDIR)/alloc.Plo
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='nca/alloc.c' object='alloc.lo' libtool=yes @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o alloc.lo `test -f 'nca/alloc.c' || echo '$(srcdir)/'`nca/alloc.c
+
+allocator.lo: nca/allocator.c
+@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT allocator.lo -MD -MP -MF $(DEPDIR)/allocator.Tpo -c -o allocator.lo `test -f 'nca/allocator.c' || echo '$(srcdir)/'`nca/allocator.c
+@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/allocator.Tpo $(DEPDIR)/allocator.Plo
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='nca/allocator.c' object='allocator.lo' libtool=yes @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o allocator.lo `test -f 'nca/allocator.c' || echo '$(srcdir)/'`nca/allocator.c
+
+coarraynative.lo: nca/coarraynative.c
+@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT coarraynative.lo -MD -MP -MF $(DEPDIR)/coarraynative.Tpo -c -o coarraynative.lo `test -f 'nca/coarraynative.c' || echo '$(srcdir)/'`nca/coarraynative.c
+@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/coarraynative.Tpo $(DEPDIR)/coarraynative.Plo
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='nca/coarraynative.c' object='coarraynative.lo' libtool=yes @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o coarraynative.lo `test -f 'nca/coarraynative.c' || echo '$(srcdir)/'`nca/coarraynative.c
+
+hashmap.lo: nca/hashmap.c
+@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT hashmap.lo -MD -MP -MF $(DEPDIR)/hashmap.Tpo -c -o hashmap.lo `test -f 'nca/hashmap.c' || echo '$(srcdir)/'`nca/hashmap.c
+@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/hashmap.Tpo $(DEPDIR)/hashmap.Plo
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='nca/hashmap.c' object='hashmap.lo' libtool=yes @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o hashmap.lo `test -f 'nca/hashmap.c' || echo '$(srcdir)/'`nca/hashmap.c
+
+sync.lo: nca/sync.c
+@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT sync.lo -MD -MP -MF $(DEPDIR)/sync.Tpo -c -o sync.lo `test -f 'nca/sync.c' || echo '$(srcdir)/'`nca/sync.c
+@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/sync.Tpo $(DEPDIR)/sync.Plo
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='nca/sync.c' object='sync.lo' libtool=yes @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o sync.lo `test -f 'nca/sync.c' || echo '$(srcdir)/'`nca/sync.c
+
+util.lo: nca/util.c
+@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT util.lo -MD -MP -MF $(DEPDIR)/util.Tpo -c -o util.lo `test -f 'nca/util.c' || echo '$(srcdir)/'`nca/util.c
+@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/util.Tpo $(DEPDIR)/util.Plo
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='nca/util.c' object='util.lo' libtool=yes @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o util.lo `test -f 'nca/util.c' || echo '$(srcdir)/'`nca/util.c
+
+wrapper.lo: nca/wrapper.c
+@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT wrapper.lo -MD -MP -MF $(DEPDIR)/wrapper.Tpo -c -o wrapper.lo `test -f 'nca/wrapper.c' || echo '$(srcdir)/'`nca/wrapper.c
+@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/wrapper.Tpo $(DEPDIR)/wrapper.Plo
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='nca/wrapper.c' object='wrapper.lo' libtool=yes @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o wrapper.lo `test -f 'nca/wrapper.c' || echo '$(srcdir)/'`nca/wrapper.c
+
+collective_subroutine.lo: nca/collective_subroutine.c
+@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT collective_subroutine.lo -MD -MP -MF $(DEPDIR)/collective_subroutine.Tpo -c -o collective_subroutine.lo `test -f 'nca/collective_subroutine.c' || echo '$(srcdir)/'`nca/collective_subroutine.c
+@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/collective_subroutine.Tpo $(DEPDIR)/collective_subroutine.Plo
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='nca/collective_subroutine.c' object='collective_subroutine.lo' libtool=yes @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o collective_subroutine.lo `test -f 'nca/collective_subroutine.c' || echo '$(srcdir)/'`nca/collective_subroutine.c
+
+shared_memory.lo: nca/shared_memory.c
+@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT shared_memory.lo -MD -MP -MF $(DEPDIR)/shared_memory.Tpo -c -o shared_memory.lo `test -f 'nca/shared_memory.c' || echo '$(srcdir)/'`nca/shared_memory.c
+@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/shared_memory.Tpo $(DEPDIR)/shared_memory.Plo
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='nca/shared_memory.c' object='shared_memory.lo' libtool=yes @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o shared_memory.lo `test -f 'nca/shared_memory.c' || echo '$(srcdir)/'`nca/shared_memory.c
+
+nca_minmax_i1.lo: $(srcdir)/generated/nca_minmax_i1.c
+@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT nca_minmax_i1.lo -MD -MP -MF $(DEPDIR)/nca_minmax_i1.Tpo -c -o nca_minmax_i1.lo `test -f '$(srcdir)/generated/nca_minmax_i1.c' || echo '$(srcdir)/'`$(srcdir)/generated/nca_minmax_i1.c
+@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/nca_minmax_i1.Tpo $(DEPDIR)/nca_minmax_i1.Plo
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$(srcdir)/generated/nca_minmax_i1.c' object='nca_minmax_i1.lo' libtool=yes @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o nca_minmax_i1.lo `test -f '$(srcdir)/generated/nca_minmax_i1.c' || echo '$(srcdir)/'`$(srcdir)/generated/nca_minmax_i1.c
+
+nca_minmax_i2.lo: $(srcdir)/generated/nca_minmax_i2.c
+@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT nca_minmax_i2.lo -MD -MP -MF $(DEPDIR)/nca_minmax_i2.Tpo -c -o nca_minmax_i2.lo `test -f '$(srcdir)/generated/nca_minmax_i2.c' || echo '$(srcdir)/'`$(srcdir)/generated/nca_minmax_i2.c
+@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/nca_minmax_i2.Tpo $(DEPDIR)/nca_minmax_i2.Plo
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$(srcdir)/generated/nca_minmax_i2.c' object='nca_minmax_i2.lo' libtool=yes @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o nca_minmax_i2.lo `test -f '$(srcdir)/generated/nca_minmax_i2.c' || echo '$(srcdir)/'`$(srcdir)/generated/nca_minmax_i2.c
+
+nca_minmax_i4.lo: $(srcdir)/generated/nca_minmax_i4.c
+@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT nca_minmax_i4.lo -MD -MP -MF $(DEPDIR)/nca_minmax_i4.Tpo -c -o nca_minmax_i4.lo `test -f '$(srcdir)/generated/nca_minmax_i4.c' || echo '$(srcdir)/'`$(srcdir)/generated/nca_minmax_i4.c
+@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/nca_minmax_i4.Tpo $(DEPDIR)/nca_minmax_i4.Plo
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$(srcdir)/generated/nca_minmax_i4.c' object='nca_minmax_i4.lo' libtool=yes @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o nca_minmax_i4.lo `test -f '$(srcdir)/generated/nca_minmax_i4.c' || echo '$(srcdir)/'`$(srcdir)/generated/nca_minmax_i4.c
+
+nca_minmax_i8.lo: $(srcdir)/generated/nca_minmax_i8.c
+@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT nca_minmax_i8.lo -MD -MP -MF $(DEPDIR)/nca_minmax_i8.Tpo -c -o nca_minmax_i8.lo `test -f '$(srcdir)/generated/nca_minmax_i8.c' || echo '$(srcdir)/'`$(srcdir)/generated/nca_minmax_i8.c
+@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/nca_minmax_i8.Tpo $(DEPDIR)/nca_minmax_i8.Plo
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$(srcdir)/generated/nca_minmax_i8.c' object='nca_minmax_i8.lo' libtool=yes @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o nca_minmax_i8.lo `test -f '$(srcdir)/generated/nca_minmax_i8.c' || echo '$(srcdir)/'`$(srcdir)/generated/nca_minmax_i8.c
+
+nca_minmax_i16.lo: $(srcdir)/generated/nca_minmax_i16.c
+@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT nca_minmax_i16.lo -MD -MP -MF $(DEPDIR)/nca_minmax_i16.Tpo -c -o nca_minmax_i16.lo `test -f '$(srcdir)/generated/nca_minmax_i16.c' || echo '$(srcdir)/'`$(srcdir)/generated/nca_minmax_i16.c
+@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/nca_minmax_i16.Tpo $(DEPDIR)/nca_minmax_i16.Plo
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$(srcdir)/generated/nca_minmax_i16.c' object='nca_minmax_i16.lo' libtool=yes @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o nca_minmax_i16.lo `test -f '$(srcdir)/generated/nca_minmax_i16.c' || echo '$(srcdir)/'`$(srcdir)/generated/nca_minmax_i16.c
+
+nca_minmax_r4.lo: $(srcdir)/generated/nca_minmax_r4.c
+@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT nca_minmax_r4.lo -MD -MP -MF $(DEPDIR)/nca_minmax_r4.Tpo -c -o nca_minmax_r4.lo `test -f '$(srcdir)/generated/nca_minmax_r4.c' || echo '$(srcdir)/'`$(srcdir)/generated/nca_minmax_r4.c
+@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/nca_minmax_r4.Tpo $(DEPDIR)/nca_minmax_r4.Plo
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$(srcdir)/generated/nca_minmax_r4.c' object='nca_minmax_r4.lo' libtool=yes @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o nca_minmax_r4.lo `test -f '$(srcdir)/generated/nca_minmax_r4.c' || echo '$(srcdir)/'`$(srcdir)/generated/nca_minmax_r4.c
+
+nca_minmax_r8.lo: $(srcdir)/generated/nca_minmax_r8.c
+@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT nca_minmax_r8.lo -MD -MP -MF $(DEPDIR)/nca_minmax_r8.Tpo -c -o nca_minmax_r8.lo `test -f '$(srcdir)/generated/nca_minmax_r8.c' || echo '$(srcdir)/'`$(srcdir)/generated/nca_minmax_r8.c
+@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/nca_minmax_r8.Tpo $(DEPDIR)/nca_minmax_r8.Plo
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$(srcdir)/generated/nca_minmax_r8.c' object='nca_minmax_r8.lo' libtool=yes @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o nca_minmax_r8.lo `test -f '$(srcdir)/generated/nca_minmax_r8.c' || echo '$(srcdir)/'`$(srcdir)/generated/nca_minmax_r8.c
+
+nca_minmax_r10.lo: $(srcdir)/generated/nca_minmax_r10.c
+@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT nca_minmax_r10.lo -MD -MP -MF $(DEPDIR)/nca_minmax_r10.Tpo -c -o nca_minmax_r10.lo `test -f '$(srcdir)/generated/nca_minmax_r10.c' || echo '$(srcdir)/'`$(srcdir)/generated/nca_minmax_r10.c
+@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/nca_minmax_r10.Tpo $(DEPDIR)/nca_minmax_r10.Plo
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$(srcdir)/generated/nca_minmax_r10.c' object='nca_minmax_r10.lo' libtool=yes @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o nca_minmax_r10.lo `test -f '$(srcdir)/generated/nca_minmax_r10.c' || echo '$(srcdir)/'`$(srcdir)/generated/nca_minmax_r10.c
+
+nca_minmax_r16.lo: $(srcdir)/generated/nca_minmax_r16.c
+@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT nca_minmax_r16.lo -MD -MP -MF $(DEPDIR)/nca_minmax_r16.Tpo -c -o nca_minmax_r16.lo `test -f '$(srcdir)/generated/nca_minmax_r16.c' || echo '$(srcdir)/'`$(srcdir)/generated/nca_minmax_r16.c
+@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/nca_minmax_r16.Tpo $(DEPDIR)/nca_minmax_r16.Plo
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$(srcdir)/generated/nca_minmax_r16.c' object='nca_minmax_r16.lo' libtool=yes @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o nca_minmax_r16.lo `test -f '$(srcdir)/generated/nca_minmax_r16.c' || echo '$(srcdir)/'`$(srcdir)/generated/nca_minmax_r16.c
+
+nca_minmax_s1.lo: $(srcdir)/generated/nca_minmax_s1.c
+@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT nca_minmax_s1.lo -MD -MP -MF $(DEPDIR)/nca_minmax_s1.Tpo -c -o nca_minmax_s1.lo `test -f '$(srcdir)/generated/nca_minmax_s1.c' || echo '$(srcdir)/'`$(srcdir)/generated/nca_minmax_s1.c
+@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/nca_minmax_s1.Tpo $(DEPDIR)/nca_minmax_s1.Plo
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$(srcdir)/generated/nca_minmax_s1.c' object='nca_minmax_s1.lo' libtool=yes @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o nca_minmax_s1.lo `test -f '$(srcdir)/generated/nca_minmax_s1.c' || echo '$(srcdir)/'`$(srcdir)/generated/nca_minmax_s1.c
+
+nca_minmax_s4.lo: $(srcdir)/generated/nca_minmax_s4.c
+@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT nca_minmax_s4.lo -MD -MP -MF $(DEPDIR)/nca_minmax_s4.Tpo -c -o nca_minmax_s4.lo `test -f '$(srcdir)/generated/nca_minmax_s4.c' || echo '$(srcdir)/'`$(srcdir)/generated/nca_minmax_s4.c
+@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/nca_minmax_s4.Tpo $(DEPDIR)/nca_minmax_s4.Plo
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$(srcdir)/generated/nca_minmax_s4.c' object='nca_minmax_s4.lo' libtool=yes @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o nca_minmax_s4.lo `test -f '$(srcdir)/generated/nca_minmax_s4.c' || echo '$(srcdir)/'`$(srcdir)/generated/nca_minmax_s4.c
+
.f90.o:
$(AM_V_FC)$(FCCOMPILE) -c -o $@ $<
@@ -6835,7 +7082,7 @@ check: $(BUILT_SOURCES)
$(MAKE) $(AM_MAKEFLAGS) check-am
all-am: Makefile $(LTLIBRARIES) $(DATA) $(HEADERS) config.h all-local
installdirs:
- for dir in "$(DESTDIR)$(cafexeclibdir)" "$(DESTDIR)$(toolexeclibdir)" "$(DESTDIR)$(toolexeclibdir)" "$(DESTDIR)$(gfor_cdir)" "$(DESTDIR)$(fincludedir)"; do \
+ for dir in "$(DESTDIR)$(cafexeclibdir)" "$(DESTDIR)$(mylibdir)" "$(DESTDIR)$(toolexeclibdir)" "$(DESTDIR)$(toolexeclibdir)" "$(DESTDIR)$(gfor_cdir)" "$(DESTDIR)$(fincludedir)"; do \
test -z "$$dir" || $(MKDIR_P) "$$dir"; \
done
install: $(BUILT_SOURCES)
@@ -6873,7 +7120,8 @@ maintainer-clean-generic:
clean: clean-am
clean-am: clean-cafexeclibLTLIBRARIES clean-generic clean-libtool \
- clean-local clean-toolexeclibLTLIBRARIES mostlyclean-am
+ clean-local clean-mylibLTLIBRARIES \
+ clean-toolexeclibLTLIBRARIES mostlyclean-am
distclean: distclean-am
-rm -f $(am__CONFIG_DISTCLEAN_FILES)
@@ -6894,7 +7142,8 @@ info: info-am
info-am:
-install-data-am: install-gfor_cHEADERS install-nodist_fincludeHEADERS
+install-data-am: install-gfor_cHEADERS install-mylibLTLIBRARIES \
+ install-nodist_fincludeHEADERS
install-dvi: install-dvi-am
@@ -6945,14 +7194,14 @@ ps: ps-am
ps-am:
uninstall-am: uninstall-cafexeclibLTLIBRARIES uninstall-gfor_cHEADERS \
- uninstall-nodist_fincludeHEADERS uninstall-toolexeclibDATA \
- uninstall-toolexeclibLTLIBRARIES
+ uninstall-mylibLTLIBRARIES uninstall-nodist_fincludeHEADERS \
+ uninstall-toolexeclibDATA uninstall-toolexeclibLTLIBRARIES
.MAKE: all check install install-am install-strip
.PHONY: CTAGS GTAGS TAGS all all-am all-local am--refresh check \
check-am clean clean-cafexeclibLTLIBRARIES clean-cscope \
- clean-generic clean-libtool clean-local \
+ clean-generic clean-libtool clean-local clean-mylibLTLIBRARIES \
clean-toolexeclibLTLIBRARIES cscope cscopelist-am ctags \
ctags-am distclean distclean-compile distclean-generic \
distclean-hdr distclean-libtool distclean-local distclean-tags \
@@ -6961,16 +7210,17 @@ uninstall-am: uninstall-cafexeclibLTLIBRARIES uninstall-gfor_cHEADERS \
install-dvi install-dvi-am install-exec install-exec-am \
install-exec-local install-gfor_cHEADERS install-html \
install-html-am install-info install-info-am install-man \
- install-nodist_fincludeHEADERS install-pdf install-pdf-am \
- install-ps install-ps-am install-strip install-toolexeclibDATA \
+ install-mylibLTLIBRARIES install-nodist_fincludeHEADERS \
+ install-pdf install-pdf-am install-ps install-ps-am \
+ install-strip install-toolexeclibDATA \
install-toolexeclibLTLIBRARIES installcheck installcheck-am \
installdirs maintainer-clean maintainer-clean-generic \
maintainer-clean-local mostlyclean mostlyclean-compile \
mostlyclean-generic mostlyclean-libtool mostlyclean-local pdf \
pdf-am ps ps-am tags tags-am uninstall uninstall-am \
uninstall-cafexeclibLTLIBRARIES uninstall-gfor_cHEADERS \
- uninstall-nodist_fincludeHEADERS uninstall-toolexeclibDATA \
- uninstall-toolexeclibLTLIBRARIES
+ uninstall-mylibLTLIBRARIES uninstall-nodist_fincludeHEADERS \
+ uninstall-toolexeclibDATA uninstall-toolexeclibLTLIBRARIES
.PRECIOUS: Makefile
@@ -6983,6 +7233,9 @@ uninstall-am: uninstall-cafexeclibLTLIBRARIES uninstall-gfor_cHEADERS \
@LIBGFOR_USE_SYMVER_SUN_TRUE@@LIBGFOR_USE_SYMVER_TRUE@ `echo $(libgfortran_la_LIBADD) | \
@LIBGFOR_USE_SYMVER_SUN_TRUE@@LIBGFOR_USE_SYMVER_TRUE@ sed 's,/\([^/.]*\)\.la,/.libs/\1.a,g'` \
@LIBGFOR_USE_SYMVER_SUN_TRUE@@LIBGFOR_USE_SYMVER_TRUE@ > $@ || (rm -f $@ ; exit 1)
+@LIBGFOR_NATIVE_COARRAY_TRUE@ nca/libcoarraynative.h nca/sync.h shared_memory.h \
+@LIBGFOR_NATIVE_COARRAY_TRUE@ nca/util.h nca/lock.h nca/collective_subroutine.h\
+@LIBGFOR_NATIVE_COARRAY_TRUE@ nca/collective_inline.h
# Turn on vectorization and loop unrolling for matmul.
$(patsubst %.c,%.lo,$(notdir $(i_matmul_c))): AM_CFLAGS += -ffast-math -ftree-vectorize -funroll-loops --param max-unroll-times=4
@@ -7193,6 +7446,12 @@ fpu-target.inc: fpu-target.h $(srcdir)/libgfortran.h
@MAINTAINER_MODE_TRUE@$(gfor_misc_specifics): m4/misc_specifics.m4 m4/head.m4
@MAINTAINER_MODE_TRUE@ $(M4) -Dfile=$@ -I$(srcdir)/m4 misc_specifics.m4 > $@
+@LIBGFOR_NATIVE_COARRAY_TRUE@@MAINTAINER_MODE_TRUE@$(i_nca_minmax_c): m4/nca_minmax.m4 $(I_M4_DEPS)
+@LIBGFOR_NATIVE_COARRAY_TRUE@@MAINTAINER_MODE_TRUE@ $(M4) -Dfile=$@ -I$(srcdir)/m4 nca_minmax.m4 > $@
+
+@LIBGFOR_NATIVE_COARRAY_TRUE@@MAINTAINER_MODE_TRUE@$(i_nca_minmax_s_c): m4/nca-minmax-s.m4 $(I_M4_DEPS)
+@LIBGFOR_NATIVE_COARRAY_TRUE@@MAINTAINER_MODE_TRUE@ $(M4) -Dfile=$@ -I$(srcdir)/m4 nca-minmax-s.m4 > $@
+
# target overrides
-include $(tmake_file)
diff --git a/libgfortran/config.h.in b/libgfortran/config.h.in
index 2d58188..795c9fe 100644
--- a/libgfortran/config.h.in
+++ b/libgfortran/config.h.in
@@ -657,6 +657,15 @@
/* Define to 1 if you have the `powf' function. */
#undef HAVE_POWF
+/* Define to 1 if you have the `pthread_barrierattr_setpshared' function. */
+#undef HAVE_PTHREAD_BARRIERATTR_SETPSHARED
+
+/* Define to 1 if you have the `pthread_condattr_setpshared' function. */
+#undef HAVE_PTHREAD_CONDATTR_SETPSHARED
+
+/* Define to 1 if you have the `pthread_mutexattr_setpshared' function. */
+#undef HAVE_PTHREAD_MUTEXATTR_SETPSHARED
+
/* Define to 1 if the system has the type `ptrdiff_t'. */
#undef HAVE_PTRDIFF_T
@@ -687,6 +696,12 @@
/* Define to 1 if you have the `setmode' function. */
#undef HAVE_SETMODE
+/* Define to 1 if you have the `shm_open' function. */
+#undef HAVE_SHM_OPEN
+
+/* Define to 1 if you have the `shm_unlink' function. */
+#undef HAVE_SHM_UNLINK
+
/* Define to 1 if you have the `sigaction' function. */
#undef HAVE_SIGACTION
diff --git a/libgfortran/configure b/libgfortran/configure
index 99cca96..d703fda 100755
--- a/libgfortran/configure
+++ b/libgfortran/configure
@@ -637,6 +637,11 @@ am__EXEEXT_TRUE
LTLIBOBJS
LIBOBJS
get_gcc_base_ver
+LIBGFOR_NATIVE_COARRAY_FALSE
+LIBGFOR_NATIVE_COARRAY_TRUE
+RT_LIBS
+PTHREAD_LIBS
+PTHREAD_CFLAGS
HAVE_AVX128_FALSE
HAVE_AVX128_TRUE
tmake_file
@@ -783,6 +788,7 @@ infodir
docdir
oldincludedir
includedir
+runstatedir
localstatedir
sharedstatedir
sysconfdir
@@ -874,6 +880,7 @@ datadir='${datarootdir}'
sysconfdir='${prefix}/etc'
sharedstatedir='${prefix}/com'
localstatedir='${prefix}/var'
+runstatedir='${localstatedir}/run'
includedir='${prefix}/include'
oldincludedir='/usr/include'
docdir='${datarootdir}/doc/${PACKAGE_TARNAME}'
@@ -1126,6 +1133,15 @@ do
| -silent | --silent | --silen | --sile | --sil)
silent=yes ;;
+ -runstatedir | --runstatedir | --runstatedi | --runstated \
+ | --runstate | --runstat | --runsta | --runst | --runs \
+ | --run | --ru | --r)
+ ac_prev=runstatedir ;;
+ -runstatedir=* | --runstatedir=* | --runstatedi=* | --runstated=* \
+ | --runstate=* | --runstat=* | --runsta=* | --runst=* | --runs=* \
+ | --run=* | --ru=* | --r=*)
+ runstatedir=$ac_optarg ;;
+
-sbindir | --sbindir | --sbindi | --sbind | --sbin | --sbi | --sb)
ac_prev=sbindir ;;
-sbindir=* | --sbindir=* | --sbindi=* | --sbind=* | --sbin=* \
@@ -1263,7 +1279,7 @@ fi
for ac_var in exec_prefix prefix bindir sbindir libexecdir datarootdir \
datadir sysconfdir sharedstatedir localstatedir includedir \
oldincludedir docdir infodir htmldir dvidir pdfdir psdir \
- libdir localedir mandir
+ libdir localedir mandir runstatedir
do
eval ac_val=\$$ac_var
# Remove trailing slashes.
@@ -1416,6 +1432,7 @@ Fine tuning of the installation directories:
--sysconfdir=DIR read-only single-machine data [PREFIX/etc]
--sharedstatedir=DIR modifiable architecture-independent data [PREFIX/com]
--localstatedir=DIR modifiable single-machine data [PREFIX/var]
+ --runstatedir=DIR modifiable per-process data [LOCALSTATEDIR/run]
--libdir=DIR object code libraries [EPREFIX/lib]
--includedir=DIR C header files [PREFIX/include]
--oldincludedir=DIR C header files for non-gcc [/usr/include]
@@ -16099,7 +16116,7 @@ else
We can't simply define LARGE_OFF_T to be 9223372036854775807,
since some C++ compilers masquerading as C compilers
incorrectly reject 9223372036854775807. */
-#define LARGE_OFF_T (((off_t) 1 << 62) - 1 + ((off_t) 1 << 62))
+#define LARGE_OFF_T ((((off_t) 1 << 31) << 31) - 1 + (((off_t) 1 << 31) << 31))
int off_t_is_large[(LARGE_OFF_T % 2147483629 == 721
&& LARGE_OFF_T % 2147483647 == 1)
? 1 : -1];
@@ -16145,7 +16162,7 @@ else
We can't simply define LARGE_OFF_T to be 9223372036854775807,
since some C++ compilers masquerading as C compilers
incorrectly reject 9223372036854775807. */
-#define LARGE_OFF_T (((off_t) 1 << 62) - 1 + ((off_t) 1 << 62))
+#define LARGE_OFF_T ((((off_t) 1 << 31) << 31) - 1 + (((off_t) 1 << 31) << 31))
int off_t_is_large[(LARGE_OFF_T % 2147483629 == 721
&& LARGE_OFF_T % 2147483647 == 1)
? 1 : -1];
@@ -16169,7 +16186,7 @@ rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
We can't simply define LARGE_OFF_T to be 9223372036854775807,
since some C++ compilers masquerading as C compilers
incorrectly reject 9223372036854775807. */
-#define LARGE_OFF_T (((off_t) 1 << 62) - 1 + ((off_t) 1 << 62))
+#define LARGE_OFF_T ((((off_t) 1 << 31) << 31) - 1 + (((off_t) 1 << 31) << 31))
int off_t_is_large[(LARGE_OFF_T % 2147483629 == 721
&& LARGE_OFF_T % 2147483647 == 1)
? 1 : -1];
@@ -16214,7 +16231,7 @@ else
We can't simply define LARGE_OFF_T to be 9223372036854775807,
since some C++ compilers masquerading as C compilers
incorrectly reject 9223372036854775807. */
-#define LARGE_OFF_T (((off_t) 1 << 62) - 1 + ((off_t) 1 << 62))
+#define LARGE_OFF_T ((((off_t) 1 << 31) << 31) - 1 + (((off_t) 1 << 31) << 31))
int off_t_is_large[(LARGE_OFF_T % 2147483629 == 721
&& LARGE_OFF_T % 2147483647 == 1)
? 1 : -1];
@@ -16238,7 +16255,7 @@ rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
We can't simply define LARGE_OFF_T to be 9223372036854775807,
since some C++ compilers masquerading as C compilers
incorrectly reject 9223372036854775807. */
-#define LARGE_OFF_T (((off_t) 1 << 62) - 1 + ((off_t) 1 << 62))
+#define LARGE_OFF_T ((((off_t) 1 << 31) << 31) - 1 + (((off_t) 1 << 31) << 31))
int off_t_is_large[(LARGE_OFF_T % 2147483629 == 721
&& LARGE_OFF_T % 2147483647 == 1)
? 1 : -1];
@@ -27147,6 +27164,167 @@ rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
CFLAGS="$ac_save_CFLAGS"
+# Tests related to native coarrays
+# Test whether the compiler supports the -pthread option.
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether -pthread is supported" >&5
+$as_echo_n "checking whether -pthread is supported... " >&6; }
+if ${libgfortran_cv_lib_pthread+:} false; then :
+ $as_echo_n "(cached) " >&6
+else
+ CFLAGS_hold=$CFLAGS
+CFLAGS="$CFLAGS -pthread -L../libatomic/.libs"
+cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
+int i;
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"; then :
+ libgfortran_cv_lib_pthread=yes
+else
+ libgfortran_cv_lib_pthread=no
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+CFLAGS=$CFLAGS_hold
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $libgfortran_cv_lib_pthread" >&5
+$as_echo "$libgfortran_cv_lib_pthread" >&6; }
+PTHREAD_CFLAGS=
+if test "$libgfortran_cv_lib_pthread" = yes; then
+ # RISC-V apparently adds -latomic when using -pthread.
+ PTHREAD_CFLAGS="-pthread -L../libatomic/.libs"
+fi
+
+
+# Test for the -lpthread library.
+PTHREAD_LIBS=
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for pthread_create in -lpthread" >&5
+$as_echo_n "checking for pthread_create in -lpthread... " >&6; }
+if ${ac_cv_lib_pthread_pthread_create+:} false; then :
+ $as_echo_n "(cached) " >&6
+else
+ ac_check_lib_save_LIBS=$LIBS
+LIBS="-lpthread $LIBS"
+if test x$gcc_no_link = xyes; then
+ as_fn_error $? "Link tests are not allowed after GCC_NO_EXECUTABLES." "$LINENO" 5
+fi
+cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
+
+/* Override any GCC internal prototype to avoid an error.
+ Use char because int might match the return type of a GCC
+ builtin and then its argument prototype would still apply. */
+#ifdef __cplusplus
+extern "C"
+#endif
+char pthread_create ();
+int
+main ()
+{
+return pthread_create ();
+ ;
+ return 0;
+}
+_ACEOF
+if ac_fn_c_try_link "$LINENO"; then :
+ ac_cv_lib_pthread_pthread_create=yes
+else
+ ac_cv_lib_pthread_pthread_create=no
+fi
+rm -f core conftest.err conftest.$ac_objext \
+ conftest$ac_exeext conftest.$ac_ext
+LIBS=$ac_check_lib_save_LIBS
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_pthread_pthread_create" >&5
+$as_echo "$ac_cv_lib_pthread_pthread_create" >&6; }
+if test "x$ac_cv_lib_pthread_pthread_create" = xyes; then :
+ PTHREAD_LIBS=-lpthread
+fi
+
+
+
+# Test if -lrt is required
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for shm_open in -lrt" >&5
+$as_echo_n "checking for shm_open in -lrt... " >&6; }
+if ${ac_cv_lib_rt_shm_open+:} false; then :
+ $as_echo_n "(cached) " >&6
+else
+ ac_check_lib_save_LIBS=$LIBS
+LIBS="-lrt $LIBS"
+if test x$gcc_no_link = xyes; then
+ as_fn_error $? "Link tests are not allowed after GCC_NO_EXECUTABLES." "$LINENO" 5
+fi
+cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
+
+/* Override any GCC internal prototype to avoid an error.
+ Use char because int might match the return type of a GCC
+ builtin and then its argument prototype would still apply. */
+#ifdef __cplusplus
+extern "C"
+#endif
+char shm_open ();
+int
+main ()
+{
+return shm_open ();
+ ;
+ return 0;
+}
+_ACEOF
+if ac_fn_c_try_link "$LINENO"; then :
+ ac_cv_lib_rt_shm_open=yes
+else
+ ac_cv_lib_rt_shm_open=no
+fi
+rm -f core conftest.err conftest.$ac_objext \
+ conftest$ac_exeext conftest.$ac_ext
+LIBS=$ac_check_lib_save_LIBS
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_rt_shm_open" >&5
+$as_echo "$ac_cv_lib_rt_shm_open" >&6; }
+if test "x$ac_cv_lib_rt_shm_open" = xyes; then :
+ RT_LIBS=-lrt
+fi
+
+
+
+CFLAGS_hold="$CFLAGS"
+CFLAGS="$CFLAGS $PTHREAD_CFLAGS"
+LIBS_hold="$LIBS"
+LIBS="$LIBS $PTHREAD_LIBS $RT_LIBS"
+
+# Find the functions that we need
+for ac_func in pthread_condattr_setpshared pthread_mutexattr_setpshared pthread_barrierattr_setpshared shm_open shm_unlink
+do :
+ as_ac_var=`$as_echo "ac_cv_func_$ac_func" | $as_tr_sh`
+ac_fn_c_check_func "$LINENO" "$ac_func" "$as_ac_var"
+if eval test \"x\$"$as_ac_var"\" = x"yes"; then :
+ cat >>confdefs.h <<_ACEOF
+#define `$as_echo "HAVE_$ac_func" | $as_tr_cpp` 1
+_ACEOF
+ if true; then
+ LIBGFOR_NATIVE_COARRAY_TRUE=
+ LIBGFOR_NATIVE_COARRAY_FALSE='#'
+else
+ LIBGFOR_NATIVE_COARRAY_TRUE='#'
+ LIBGFOR_NATIVE_COARRAY_FALSE=
+fi
+
+else
+ if false; then
+ LIBGFOR_NATIVE_COARRAY_TRUE=
+ LIBGFOR_NATIVE_COARRAY_FALSE='#'
+else
+ LIBGFOR_NATIVE_COARRAY_TRUE='#'
+ LIBGFOR_NATIVE_COARRAY_FALSE=
+fi
+
+fi
+done
+
+
+CFLAGS="$CFLAGS_hold"
+LIBS="$LIBS_hold"
+
# Determine what GCC version number to use in filesystem paths.
get_gcc_base_ver="cat"
@@ -27438,6 +27616,14 @@ if test -z "${HAVE_AVX128_TRUE}" && test -z "${HAVE_AVX128_FALSE}"; then
as_fn_error $? "conditional \"HAVE_AVX128\" was never defined.
Usually this means the macro was only invoked conditionally." "$LINENO" 5
fi
+if test -z "${LIBGFOR_NATIVE_COARRAY_TRUE}" && test -z "${LIBGFOR_NATIVE_COARRAY_FALSE}"; then
+ as_fn_error $? "conditional \"LIBGFOR_NATIVE_COARRAY\" was never defined.
+Usually this means the macro was only invoked conditionally." "$LINENO" 5
+fi
+if test -z "${LIBGFOR_NATIVE_COARRAY_TRUE}" && test -z "${LIBGFOR_NATIVE_COARRAY_FALSE}"; then
+ as_fn_error $? "conditional \"LIBGFOR_NATIVE_COARRAY\" was never defined.
+Usually this means the macro was only invoked conditionally." "$LINENO" 5
+fi
: "${CONFIG_STATUS=./config.status}"
ac_write_fail=0
diff --git a/libgfortran/configure.ac b/libgfortran/configure.ac
index 8961e31..1aac140 100644
--- a/libgfortran/configure.ac
+++ b/libgfortran/configure.ac
@@ -683,6 +683,43 @@ LIBGFOR_CHECK_FMA4
# Check if AVX128 works
LIBGFOR_CHECK_AVX128
+# Tests related to native coarrays
+# Test whether the compiler supports the -pthread option.
+AC_CACHE_CHECK([whether -pthread is supported],
+[libgfortran_cv_lib_pthread],
+[CFLAGS_hold=$CFLAGS
+CFLAGS="$CFLAGS -pthread -L../libatomic/.libs"
+AC_COMPILE_IFELSE([AC_LANG_SOURCE([int i;])],
+[libgfortran_cv_lib_pthread=yes],
+[libgfortran_cv_lib_pthread=no])
+CFLAGS=$CFLAGS_hold])
+PTHREAD_CFLAGS=
+if test "$libgfortran_cv_lib_pthread" = yes; then
+ # RISC-V apparently adds -latomic when using -pthread.
+ PTHREAD_CFLAGS="-pthread -L../libatomic/.libs"
+fi
+AC_SUBST(PTHREAD_CFLAGS)
+
+# Test for the -lpthread library.
+PTHREAD_LIBS=
+AC_CHECK_LIB([pthread], [pthread_create], PTHREAD_LIBS=-lpthread)
+AC_SUBST(PTHREAD_LIBS)
+
+# Test if -lrt is required
+AC_CHECK_LIB([rt], [shm_open], RT_LIBS=-lrt)
+AC_SUBST(RT_LIBS)
+
+CFLAGS_hold="$CFLAGS"
+CFLAGS="$CFLAGS $PTHREAD_CFLAGS"
+LIBS_hold="$LIBS"
+LIBS="$LIBS $PTHREAD_LIBS $RT_LIBS"
+
+# Find the functions that we need
+AC_CHECK_FUNCS([pthread_condattr_setpshared pthread_mutexattr_setpshared pthread_barrierattr_setpshared shm_open shm_unlink],[AM_CONDITIONAL(LIBGFOR_NATIVE_COARRAY,true)],[AM_CONDITIONAL(LIBGFOR_NATIVE_COARRAY,false)])
+
+CFLAGS="$CFLAGS_hold"
+LIBS="$LIBS_hold"
+
# Determine what GCC version number to use in filesystem paths.
GCC_BASE_VER
diff --git a/libgfortran/generated/nca_minmax_i1.c b/libgfortran/generated/nca_minmax_i1.c
new file mode 100644
index 0000000..3bc9a2b
--- /dev/null
+++ b/libgfortran/generated/nca_minmax_i1.c
@@ -0,0 +1,653 @@
+/* Implementation of collective subroutines minmax.
+ Copyright (C) 2020 Free Software Foundation, Inc.
+ Contributed by Thomas Koenig <tkoenig@gcc.gnu.org>.
+
+This file is part of the GNU Fortran runtime library (libgfortran).
+
+Libgfortran is free software; you can redistribute it and/or
+modify it under the terms of the GNU General Public
+License as published by the Free Software Foundation; either
+version 3 of the License, or (at your option) any later version.
+
+Libgfortran is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+<http://www.gnu.org/licenses/>. */
+
+
+#include "libgfortran.h"
+
+#if defined (HAVE_GFC_INTEGER_1)
+#include <string.h>
+#include "../nca/libcoarraynative.h"
+#include "../nca/collective_subroutine.h"
+#include "../nca/collective_inline.h"
+
+void nca_collsub_max_scalar_i1 (GFC_INTEGER_1 *obj, int *result_image,
+ int *stat, char *errmsg, index_type errmsg_len);
+export_proto(nca_collsub_max_scalar_i1);
+
+void
+nca_collsub_max_scalar_i1 (GFC_INTEGER_1 *obj, int *result_image,
+ int *stat __attribute__ ((unused)),
+ char *errmsg __attribute__ ((unused)),
+ index_type errmsg_len __attribute__ ((unused)))
+{
+ int cbit = 0;
+ int imoffset;
+ GFC_INTEGER_1 *a, *b;
+ GFC_INTEGER_1 *buffer, *this_image_buf;
+ collsub_iface *ci;
+
+ ci = &local->ci;
+
+ buffer = get_collsub_buf (ci, sizeof(GFC_INTEGER_1) * local->num_images);
+ this_image_buf = buffer + this_image.image_num;
+ *this_image_buf = *obj;
+
+ collsub_sync (ci);
+ for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++)
+ {
+ imoffset = 1 << cbit;
+ if (this_image.image_num + imoffset < local->num_images)
+ {
+ a = this_image_buf;
+ b = this_image_buf + imoffset;
+ if (*b > *a)
+ *a = *b;
+ }
+ collsub_sync (ci);
+ }
+ for ( ; (local->num_images >> cbit) != 0; cbit++)
+ collsub_sync (ci);
+
+ if (!result_image || (*result_image - 1) == this_image.image_num)
+ *obj = *buffer;
+
+ finish_collective_subroutine (ci);
+
+}
+
+void nca_collsub_min_scalar_i1 (GFC_INTEGER_1 *obj, int *result_image,
+ int *stat, char *errmsg, index_type errmsg_len);
+export_proto(nca_collsub_min_scalar_i1);
+
+void
+nca_collsub_min_scalar_i1 (GFC_INTEGER_1 *obj, int *result_image,
+ int *stat __attribute__ ((unused)),
+ char *errmsg __attribute__ ((unused)),
+ index_type errmsg_len __attribute__ ((unused)))
+{
+ int cbit = 0;
+ int imoffset;
+ GFC_INTEGER_1 *a, *b;
+ GFC_INTEGER_1 *buffer, *this_image_buf;
+ collsub_iface *ci;
+
+ ci = &local->ci;
+
+ buffer = get_collsub_buf (ci, sizeof(GFC_INTEGER_1) * local->num_images);
+ this_image_buf = buffer + this_image.image_num;
+ *this_image_buf = *obj;
+
+ collsub_sync (ci);
+ for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++)
+ {
+ imoffset = 1 << cbit;
+ if (this_image.image_num + imoffset < local->num_images)
+ {
+ a = this_image_buf;
+ b = this_image_buf + imoffset;
+ if (*b < *a)
+ *a = *b;
+ }
+ collsub_sync (ci);
+ }
+ for ( ; (local->num_images >> cbit) != 0; cbit++)
+ collsub_sync (ci);
+
+ if (!result_image || (*result_image - 1) == this_image.image_num)
+ *obj = *buffer;
+
+ finish_collective_subroutine (ci);
+
+}
+
+void nca_collsub_sum_scalar_i1 (GFC_INTEGER_1 *obj, int *result_image,
+ int *stat, char *errmsg, index_type errmsg_len);
+export_proto(nca_collsub_sum_scalar_i1);
+
+void
+nca_collsub_sum_scalar_i1 (GFC_INTEGER_1 *obj, int *result_image,
+ int *stat __attribute__ ((unused)),
+ char *errmsg __attribute__ ((unused)),
+ index_type errmsg_len __attribute__ ((unused)))
+{
+ int cbit = 0;
+ int imoffset;
+ GFC_INTEGER_1 *a, *b;
+ GFC_INTEGER_1 *buffer, *this_image_buf;
+ collsub_iface *ci;
+
+ ci = &local->ci;
+
+ buffer = get_collsub_buf (ci, sizeof(GFC_INTEGER_1) * local->num_images);
+ this_image_buf = buffer + this_image.image_num;
+ *this_image_buf = *obj;
+
+ collsub_sync (ci);
+ for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++)
+ {
+ imoffset = 1 << cbit;
+ if (this_image.image_num + imoffset < local->num_images)
+ {
+ a = this_image_buf;
+ b = this_image_buf + imoffset;
+ *a += *b;
+ }
+ collsub_sync (ci);
+ }
+ for ( ; (local->num_images >> cbit) != 0; cbit++)
+ collsub_sync (ci);
+
+ if (!result_image || (*result_image - 1) == this_image.image_num)
+ *obj = *buffer;
+
+ finish_collective_subroutine (ci);
+
+}
+
+void nca_collsub_max_array_i1 (gfc_array_i1 * restrict array, int *result_image,
+ int *stat, char *errmsg, index_type errmsg_len);
+export_proto (nca_collsub_max_array_i1);
+
+void
+nca_collsub_max_array_i1 (gfc_array_i1 * restrict array, int *result_image,
+ int *stat __attribute__ ((unused)),
+ char *errmsg __attribute__ ((unused)),
+ index_type errmsg_len __attribute__ ((unused)))
+{
+ index_type count[GFC_MAX_DIMENSIONS];
+ index_type stride[GFC_MAX_DIMENSIONS];
+ index_type extent[GFC_MAX_DIMENSIONS];
+ GFC_INTEGER_1 *this_shared_ptr; /* Points to the shared memory allocated to this image. */
+ GFC_INTEGER_1 *buffer;
+ index_type dim;
+ bool packed;
+ index_type span;
+ index_type ssize, num_elems;
+ int cbit = 0;
+ int imoffset;
+ collsub_iface *ci;
+
+ ci = &local->ci;
+
+ dim = GFC_DESCRIPTOR_RANK (array);
+ ssize = sizeof (GFC_INTEGER_1);
+ packed = true;
+ span = array->span != 0 ? array->span : (index_type) sizeof (GFC_INTEGER_1);
+ for (index_type n = 0; n < dim; n++)
+ {
+ count[n] = 0;
+ stride[n] = GFC_DESCRIPTOR_STRIDE (array, n) * span;
+ extent[n] = GFC_DESCRIPTOR_EXTENT (array, n);
+
+ /* No-op for an empty array. */
+ if (extent[n] <= 0)
+ return;
+
+ if (ssize != stride[n])
+ packed = false;
+
+ ssize *= extent[n];
+ }
+
+ num_elems = ssize / sizeof (GFC_INTEGER_1);
+
+ buffer = get_collsub_buf (ci, ssize * local->num_images);
+ this_shared_ptr = buffer + this_image.image_num * num_elems;
+
+ if (packed)
+ memcpy (this_shared_ptr, array->base_addr, ssize);
+ else
+ {
+ char *src = (char *) array->base_addr;
+ GFC_INTEGER_1 *restrict dest = this_shared_ptr;
+ index_type stride0 = stride[0];
+
+ while (src)
+ {
+ /* Copy the data. */
+ *(dest++) = *((GFC_INTEGER_1 *) src);
+ src += stride0;
+ count[0] ++;
+ /* Advance to the next source element. */
+ for (index_type n = 0; count[n] == extent[n] ; )
+ {
+ /* When we get to the end of a dimension, reset it and increment
+ the next dimension. */
+ count[n] = 0;
+ src -= stride[n] * extent[n];
+ n++;
+ if (n == dim)
+ {
+ src = NULL;
+ break;
+ }
+ else
+ {
+ count[n]++;
+ src += stride[n];
+ }
+ }
+ }
+ }
+
+ collsub_sync (ci);
+
+ /* Reduce the array to image zero. Here the general scheme:
+
+ abababababab
+ a_b_a_b_a_b_
+ a___b___a___
+ a_______b___
+ r___________
+ */
+ for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++)
+ {
+ imoffset = 1 << cbit;
+ if (this_image.image_num + imoffset < local->num_images)
+ {
+ GFC_INTEGER_1 * other_shared_ptr; /* Points to the shared memory
+ allocated to another image. */
+ GFC_INTEGER_1 *a;
+ GFC_INTEGER_1 *b;
+
+ other_shared_ptr = this_shared_ptr + num_elems * imoffset;
+ for (index_type i = 0; i < num_elems; i++)
+ {
+ a = this_shared_ptr + i;
+ b = other_shared_ptr + i;
+ if (*b > *a)
+ *a = *b;
+ }
+ }
+ collsub_sync (ci);
+ }
+ for ( ; (local->num_images >> cbit) != 0; cbit++)
+ collsub_sync (ci);
+
+ if (!result_image || (*result_image - 1) == this_image.image_num)
+ {
+ if (packed)
+ memcpy (array->base_addr, buffer, ssize);
+ else
+ {
+ GFC_INTEGER_1 *src = buffer;
+ char * restrict dest = (char *) array->base_addr;
+ index_type stride0 = stride[0];
+
+ for (index_type n = 0; n < dim; n++)
+ count[n] = 0;
+
+ while (dest)
+ {
+ *((GFC_INTEGER_1 * ) dest) = *src++;
+ dest += stride0;
+ count[0] ++;
+ for (index_type n = 0; count[n] == extent[n] ;)
+ {
+ /* When we get to the end of a dimension, reset it and increment
+ the next dimension. */
+ count[n] = 0;
+ dest -= stride[n] * extent[n];
+ n++;
+ if (n == dim)
+ {
+ dest = NULL;
+ break;
+ }
+ else
+ {
+ count[n]++;
+ dest += stride[n];
+ }
+ }
+ }
+ }
+ }
+ finish_collective_subroutine (ci);
+}
+void nca_collsub_min_array_i1 (gfc_array_i1 * restrict array, int *result_image,
+ int *stat, char *errmsg, index_type errmsg_len);
+export_proto (nca_collsub_min_array_i1);
+
+void
+nca_collsub_min_array_i1 (gfc_array_i1 * restrict array, int *result_image,
+ int *stat __attribute__ ((unused)),
+ char *errmsg __attribute__ ((unused)),
+ index_type errmsg_len __attribute__ ((unused)))
+{
+ index_type count[GFC_MAX_DIMENSIONS];
+ index_type stride[GFC_MAX_DIMENSIONS];
+ index_type extent[GFC_MAX_DIMENSIONS];
+ GFC_INTEGER_1 *this_shared_ptr; /* Points to the shared memory allocated to this image. */
+ GFC_INTEGER_1 *buffer;
+ index_type dim;
+ bool packed;
+ index_type span;
+ index_type ssize, num_elems;
+ int cbit = 0;
+ int imoffset;
+ collsub_iface *ci;
+
+ ci = &local->ci;
+
+ dim = GFC_DESCRIPTOR_RANK (array);
+ ssize = sizeof (GFC_INTEGER_1);
+ packed = true;
+ span = array->span != 0 ? array->span : (index_type) sizeof (GFC_INTEGER_1);
+ for (index_type n = 0; n < dim; n++)
+ {
+ count[n] = 0;
+ stride[n] = GFC_DESCRIPTOR_STRIDE (array, n) * span;
+ extent[n] = GFC_DESCRIPTOR_EXTENT (array, n);
+
+ /* No-op for an empty array. */
+ if (extent[n] <= 0)
+ return;
+
+ if (ssize != stride[n])
+ packed = false;
+
+ ssize *= extent[n];
+ }
+
+ num_elems = ssize / sizeof (GFC_INTEGER_1);
+
+ buffer = get_collsub_buf (ci, ssize * local->num_images);
+ this_shared_ptr = buffer + this_image.image_num * num_elems;
+
+ if (packed)
+ memcpy (this_shared_ptr, array->base_addr, ssize);
+ else
+ {
+ char *src = (char *) array->base_addr;
+ GFC_INTEGER_1 *restrict dest = this_shared_ptr;
+ index_type stride0 = stride[0];
+
+ while (src)
+ {
+ /* Copy the data. */
+ *(dest++) = *((GFC_INTEGER_1 *) src);
+ src += stride0;
+ count[0] ++;
+ /* Advance to the next source element. */
+ for (index_type n = 0; count[n] == extent[n] ; )
+ {
+ /* When we get to the end of a dimension, reset it and increment
+ the next dimension. */
+ count[n] = 0;
+ src -= stride[n] * extent[n];
+ n++;
+ if (n == dim)
+ {
+ src = NULL;
+ break;
+ }
+ else
+ {
+ count[n]++;
+ src += stride[n];
+ }
+ }
+ }
+ }
+
+ collsub_sync (ci);
+
+ /* Reduce the array to image zero. Here the general scheme:
+
+ abababababab
+ a_b_a_b_a_b_
+ a___b___a___
+ a_______b___
+ r___________
+ */
+ for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++)
+ {
+ imoffset = 1 << cbit;
+ if (this_image.image_num + imoffset < local->num_images)
+ {
+ GFC_INTEGER_1 * other_shared_ptr; /* Points to the shared memory
+ allocated to another image. */
+ GFC_INTEGER_1 *a;
+ GFC_INTEGER_1 *b;
+
+ other_shared_ptr = this_shared_ptr + num_elems * imoffset;
+ for (index_type i = 0; i < num_elems; i++)
+ {
+ a = this_shared_ptr + i;
+ b = other_shared_ptr + i;
+ if (*b < *a)
+ *a = *b;
+ }
+ }
+ collsub_sync (ci);
+ }
+ for ( ; (local->num_images >> cbit) != 0; cbit++)
+ collsub_sync (ci);
+
+ if (!result_image || (*result_image - 1) == this_image.image_num)
+ {
+ if (packed)
+ memcpy (array->base_addr, buffer, ssize);
+ else
+ {
+ GFC_INTEGER_1 *src = buffer;
+ char * restrict dest = (char *) array->base_addr;
+ index_type stride0 = stride[0];
+
+ for (index_type n = 0; n < dim; n++)
+ count[n] = 0;
+
+ while (dest)
+ {
+ *((GFC_INTEGER_1 * ) dest) = *src++;
+ dest += stride0;
+ count[0] ++;
+ for (index_type n = 0; count[n] == extent[n] ;)
+ {
+ /* When we get to the end of a dimension, reset it and increment
+ the next dimension. */
+ count[n] = 0;
+ dest -= stride[n] * extent[n];
+ n++;
+ if (n == dim)
+ {
+ dest = NULL;
+ break;
+ }
+ else
+ {
+ count[n]++;
+ dest += stride[n];
+ }
+ }
+ }
+ }
+ }
+ finish_collective_subroutine (ci);
+}
+void nca_collsub_sum_array_i1 (gfc_array_i1 * restrict array, int *result_image,
+ int *stat, char *errmsg, index_type errmsg_len);
+export_proto (nca_collsub_sum_array_i1);
+
+void
+nca_collsub_sum_array_i1 (gfc_array_i1 * restrict array, int *result_image,
+ int *stat __attribute__ ((unused)),
+ char *errmsg __attribute__ ((unused)),
+ index_type errmsg_len __attribute__ ((unused)))
+{
+ index_type count[GFC_MAX_DIMENSIONS];
+ index_type stride[GFC_MAX_DIMENSIONS];
+ index_type extent[GFC_MAX_DIMENSIONS];
+ GFC_INTEGER_1 *this_shared_ptr; /* Points to the shared memory allocated to this image. */
+ GFC_INTEGER_1 *buffer;
+ index_type dim;
+ bool packed;
+ index_type span;
+ index_type ssize, num_elems;
+ int cbit = 0;
+ int imoffset;
+ collsub_iface *ci;
+
+ ci = &local->ci;
+
+ dim = GFC_DESCRIPTOR_RANK (array);
+ ssize = sizeof (GFC_INTEGER_1);
+ packed = true;
+ span = array->span != 0 ? array->span : (index_type) sizeof (GFC_INTEGER_1);
+ for (index_type n = 0; n < dim; n++)
+ {
+ count[n] = 0;
+ stride[n] = GFC_DESCRIPTOR_STRIDE (array, n) * span;
+ extent[n] = GFC_DESCRIPTOR_EXTENT (array, n);
+
+ /* No-op for an empty array. */
+ if (extent[n] <= 0)
+ return;
+
+ if (ssize != stride[n])
+ packed = false;
+
+ ssize *= extent[n];
+ }
+
+ num_elems = ssize / sizeof (GFC_INTEGER_1);
+
+ buffer = get_collsub_buf (ci, ssize * local->num_images);
+ this_shared_ptr = buffer + this_image.image_num * num_elems;
+
+ if (packed)
+ memcpy (this_shared_ptr, array->base_addr, ssize);
+ else
+ {
+ char *src = (char *) array->base_addr;
+ GFC_INTEGER_1 *restrict dest = this_shared_ptr;
+ index_type stride0 = stride[0];
+
+ while (src)
+ {
+ /* Copy the data. */
+ *(dest++) = *((GFC_INTEGER_1 *) src);
+ src += stride0;
+ count[0] ++;
+ /* Advance to the next source element. */
+ for (index_type n = 0; count[n] == extent[n] ; )
+ {
+ /* When we get to the end of a dimension, reset it and increment
+ the next dimension. */
+ count[n] = 0;
+ src -= stride[n] * extent[n];
+ n++;
+ if (n == dim)
+ {
+ src = NULL;
+ break;
+ }
+ else
+ {
+ count[n]++;
+ src += stride[n];
+ }
+ }
+ }
+ }
+
+ collsub_sync (ci);
+
+ /* Reduce the array to image zero. Here the general scheme:
+
+ abababababab
+ a_b_a_b_a_b_
+ a___b___a___
+ a_______b___
+ r___________
+ */
+ for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++)
+ {
+ imoffset = 1 << cbit;
+ if (this_image.image_num + imoffset < local->num_images)
+ {
+ GFC_INTEGER_1 * other_shared_ptr; /* Points to the shared memory
+ allocated to another image. */
+ GFC_INTEGER_1 *a;
+ GFC_INTEGER_1 *b;
+
+ other_shared_ptr = this_shared_ptr + num_elems * imoffset;
+ for (index_type i = 0; i < num_elems; i++)
+ {
+ a = this_shared_ptr + i;
+ b = other_shared_ptr + i;
+ *a += *b;
+ }
+ }
+ collsub_sync (ci);
+ }
+ for ( ; (local->num_images >> cbit) != 0; cbit++)
+ collsub_sync (ci);
+
+ if (!result_image || (*result_image - 1) == this_image.image_num)
+ {
+ if (packed)
+ memcpy (array->base_addr, buffer, ssize);
+ else
+ {
+ GFC_INTEGER_1 *src = buffer;
+ char * restrict dest = (char *) array->base_addr;
+ index_type stride0 = stride[0];
+
+ for (index_type n = 0; n < dim; n++)
+ count[n] = 0;
+
+ while (dest)
+ {
+ *((GFC_INTEGER_1 * ) dest) = *src++;
+ dest += stride0;
+ count[0] ++;
+ for (index_type n = 0; count[n] == extent[n] ;)
+ {
+ /* When we get to the end of a dimension, reset it and increment
+ the next dimension. */
+ count[n] = 0;
+ dest -= stride[n] * extent[n];
+ n++;
+ if (n == dim)
+ {
+ dest = NULL;
+ break;
+ }
+ else
+ {
+ count[n]++;
+ dest += stride[n];
+ }
+ }
+ }
+ }
+ }
+ finish_collective_subroutine (ci);
+}
+
+#endif
+
diff --git a/libgfortran/generated/nca_minmax_i16.c b/libgfortran/generated/nca_minmax_i16.c
new file mode 100644
index 0000000..8fbb948
--- /dev/null
+++ b/libgfortran/generated/nca_minmax_i16.c
@@ -0,0 +1,653 @@
+/* Implementation of collective subroutines minmax.
+ Copyright (C) 2020 Free Software Foundation, Inc.
+ Contributed by Thomas Koenig <tkoenig@gcc.gnu.org>.
+
+This file is part of the GNU Fortran runtime library (libgfortran).
+
+Libgfortran is free software; you can redistribute it and/or
+modify it under the terms of the GNU General Public
+License as published by the Free Software Foundation; either
+version 3 of the License, or (at your option) any later version.
+
+Libgfortran is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+<http://www.gnu.org/licenses/>. */
+
+
+#include "libgfortran.h"
+
+#if defined (HAVE_GFC_INTEGER_16)
+#include <string.h>
+#include "../nca/libcoarraynative.h"
+#include "../nca/collective_subroutine.h"
+#include "../nca/collective_inline.h"
+
+void nca_collsub_max_scalar_i16 (GFC_INTEGER_16 *obj, int *result_image,
+ int *stat, char *errmsg, index_type errmsg_len);
+export_proto(nca_collsub_max_scalar_i16);
+
+void
+nca_collsub_max_scalar_i16 (GFC_INTEGER_16 *obj, int *result_image,
+ int *stat __attribute__ ((unused)),
+ char *errmsg __attribute__ ((unused)),
+ index_type errmsg_len __attribute__ ((unused)))
+{
+ int cbit = 0;
+ int imoffset;
+ GFC_INTEGER_16 *a, *b;
+ GFC_INTEGER_16 *buffer, *this_image_buf;
+ collsub_iface *ci;
+
+ ci = &local->ci;
+
+ buffer = get_collsub_buf (ci, sizeof(GFC_INTEGER_16) * local->num_images);
+ this_image_buf = buffer + this_image.image_num;
+ *this_image_buf = *obj;
+
+ collsub_sync (ci);
+ for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++)
+ {
+ imoffset = 1 << cbit;
+ if (this_image.image_num + imoffset < local->num_images)
+ {
+ a = this_image_buf;
+ b = this_image_buf + imoffset;
+ if (*b > *a)
+ *a = *b;
+ }
+ collsub_sync (ci);
+ }
+ for ( ; (local->num_images >> cbit) != 0; cbit++)
+ collsub_sync (ci);
+
+ if (!result_image || (*result_image - 1) == this_image.image_num)
+ *obj = *buffer;
+
+ finish_collective_subroutine (ci);
+
+}
+
+void nca_collsub_min_scalar_i16 (GFC_INTEGER_16 *obj, int *result_image,
+ int *stat, char *errmsg, index_type errmsg_len);
+export_proto(nca_collsub_min_scalar_i16);
+
+void
+nca_collsub_min_scalar_i16 (GFC_INTEGER_16 *obj, int *result_image,
+ int *stat __attribute__ ((unused)),
+ char *errmsg __attribute__ ((unused)),
+ index_type errmsg_len __attribute__ ((unused)))
+{
+ int cbit = 0;
+ int imoffset;
+ GFC_INTEGER_16 *a, *b;
+ GFC_INTEGER_16 *buffer, *this_image_buf;
+ collsub_iface *ci;
+
+ ci = &local->ci;
+
+ buffer = get_collsub_buf (ci, sizeof(GFC_INTEGER_16) * local->num_images);
+ this_image_buf = buffer + this_image.image_num;
+ *this_image_buf = *obj;
+
+ collsub_sync (ci);
+ for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++)
+ {
+ imoffset = 1 << cbit;
+ if (this_image.image_num + imoffset < local->num_images)
+ {
+ a = this_image_buf;
+ b = this_image_buf + imoffset;
+ if (*b < *a)
+ *a = *b;
+ }
+ collsub_sync (ci);
+ }
+ for ( ; (local->num_images >> cbit) != 0; cbit++)
+ collsub_sync (ci);
+
+ if (!result_image || (*result_image - 1) == this_image.image_num)
+ *obj = *buffer;
+
+ finish_collective_subroutine (ci);
+
+}
+
+void nca_collsub_sum_scalar_i16 (GFC_INTEGER_16 *obj, int *result_image,
+ int *stat, char *errmsg, index_type errmsg_len);
+export_proto(nca_collsub_sum_scalar_i16);
+
+void
+nca_collsub_sum_scalar_i16 (GFC_INTEGER_16 *obj, int *result_image,
+ int *stat __attribute__ ((unused)),
+ char *errmsg __attribute__ ((unused)),
+ index_type errmsg_len __attribute__ ((unused)))
+{
+ int cbit = 0;
+ int imoffset;
+ GFC_INTEGER_16 *a, *b;
+ GFC_INTEGER_16 *buffer, *this_image_buf;
+ collsub_iface *ci;
+
+ ci = &local->ci;
+
+ buffer = get_collsub_buf (ci, sizeof(GFC_INTEGER_16) * local->num_images);
+ this_image_buf = buffer + this_image.image_num;
+ *this_image_buf = *obj;
+
+ collsub_sync (ci);
+ for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++)
+ {
+ imoffset = 1 << cbit;
+ if (this_image.image_num + imoffset < local->num_images)
+ {
+ a = this_image_buf;
+ b = this_image_buf + imoffset;
+ *a += *b;
+ }
+ collsub_sync (ci);
+ }
+ for ( ; (local->num_images >> cbit) != 0; cbit++)
+ collsub_sync (ci);
+
+ if (!result_image || (*result_image - 1) == this_image.image_num)
+ *obj = *buffer;
+
+ finish_collective_subroutine (ci);
+
+}
+
+void nca_collsub_max_array_i16 (gfc_array_i16 * restrict array, int *result_image,
+ int *stat, char *errmsg, index_type errmsg_len);
+export_proto (nca_collsub_max_array_i16);
+
+void
+nca_collsub_max_array_i16 (gfc_array_i16 * restrict array, int *result_image,
+ int *stat __attribute__ ((unused)),
+ char *errmsg __attribute__ ((unused)),
+ index_type errmsg_len __attribute__ ((unused)))
+{
+ index_type count[GFC_MAX_DIMENSIONS];
+ index_type stride[GFC_MAX_DIMENSIONS];
+ index_type extent[GFC_MAX_DIMENSIONS];
+ GFC_INTEGER_16 *this_shared_ptr; /* Points to the shared memory allocated to this image. */
+ GFC_INTEGER_16 *buffer;
+ index_type dim;
+ bool packed;
+ index_type span;
+ index_type ssize, num_elems;
+ int cbit = 0;
+ int imoffset;
+ collsub_iface *ci;
+
+ ci = &local->ci;
+
+ dim = GFC_DESCRIPTOR_RANK (array);
+ ssize = sizeof (GFC_INTEGER_16);
+ packed = true;
+ span = array->span != 0 ? array->span : (index_type) sizeof (GFC_INTEGER_16);
+ for (index_type n = 0; n < dim; n++)
+ {
+ count[n] = 0;
+ stride[n] = GFC_DESCRIPTOR_STRIDE (array, n) * span;
+ extent[n] = GFC_DESCRIPTOR_EXTENT (array, n);
+
+ /* No-op for an empty array. */
+ if (extent[n] <= 0)
+ return;
+
+ if (ssize != stride[n])
+ packed = false;
+
+ ssize *= extent[n];
+ }
+
+ num_elems = ssize / sizeof (GFC_INTEGER_16);
+
+ buffer = get_collsub_buf (ci, ssize * local->num_images);
+ this_shared_ptr = buffer + this_image.image_num * num_elems;
+
+ if (packed)
+ memcpy (this_shared_ptr, array->base_addr, ssize);
+ else
+ {
+ char *src = (char *) array->base_addr;
+ GFC_INTEGER_16 *restrict dest = this_shared_ptr;
+ index_type stride0 = stride[0];
+
+ while (src)
+ {
+ /* Copy the data. */
+ *(dest++) = *((GFC_INTEGER_16 *) src);
+ src += stride0;
+ count[0] ++;
+ /* Advance to the next source element. */
+ for (index_type n = 0; count[n] == extent[n] ; )
+ {
+ /* When we get to the end of a dimension, reset it and increment
+ the next dimension. */
+ count[n] = 0;
+ src -= stride[n] * extent[n];
+ n++;
+ if (n == dim)
+ {
+ src = NULL;
+ break;
+ }
+ else
+ {
+ count[n]++;
+ src += stride[n];
+ }
+ }
+ }
+ }
+
+ collsub_sync (ci);
+
+ /* Reduce the array to image zero. Here the general scheme:
+
+ abababababab
+ a_b_a_b_a_b_
+ a___b___a___
+ a_______b___
+ r___________
+ */
+ for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++)
+ {
+ imoffset = 1 << cbit;
+ if (this_image.image_num + imoffset < local->num_images)
+ {
+ GFC_INTEGER_16 * other_shared_ptr; /* Points to the shared memory
+ allocated to another image. */
+ GFC_INTEGER_16 *a;
+ GFC_INTEGER_16 *b;
+
+ other_shared_ptr = this_shared_ptr + num_elems * imoffset;
+ for (index_type i = 0; i < num_elems; i++)
+ {
+ a = this_shared_ptr + i;
+ b = other_shared_ptr + i;
+ if (*b > *a)
+ *a = *b;
+ }
+ }
+ collsub_sync (ci);
+ }
+ for ( ; (local->num_images >> cbit) != 0; cbit++)
+ collsub_sync (ci);
+
+ if (!result_image || (*result_image - 1) == this_image.image_num)
+ {
+ if (packed)
+ memcpy (array->base_addr, buffer, ssize);
+ else
+ {
+ GFC_INTEGER_16 *src = buffer;
+ char * restrict dest = (char *) array->base_addr;
+ index_type stride0 = stride[0];
+
+ for (index_type n = 0; n < dim; n++)
+ count[n] = 0;
+
+ while (dest)
+ {
+ *((GFC_INTEGER_16 * ) dest) = *src++;
+ dest += stride0;
+ count[0] ++;
+ for (index_type n = 0; count[n] == extent[n] ;)
+ {
+ /* When we get to the end of a dimension, reset it and increment
+ the next dimension. */
+ count[n] = 0;
+ dest -= stride[n] * extent[n];
+ n++;
+ if (n == dim)
+ {
+ dest = NULL;
+ break;
+ }
+ else
+ {
+ count[n]++;
+ dest += stride[n];
+ }
+ }
+ }
+ }
+ }
+ finish_collective_subroutine (ci);
+}
+void nca_collsub_min_array_i16 (gfc_array_i16 * restrict array, int *result_image,
+ int *stat, char *errmsg, index_type errmsg_len);
+export_proto (nca_collsub_min_array_i16);
+
+void
+nca_collsub_min_array_i16 (gfc_array_i16 * restrict array, int *result_image,
+ int *stat __attribute__ ((unused)),
+ char *errmsg __attribute__ ((unused)),
+ index_type errmsg_len __attribute__ ((unused)))
+{
+ index_type count[GFC_MAX_DIMENSIONS];
+ index_type stride[GFC_MAX_DIMENSIONS];
+ index_type extent[GFC_MAX_DIMENSIONS];
+ GFC_INTEGER_16 *this_shared_ptr; /* Points to the shared memory allocated to this image. */
+ GFC_INTEGER_16 *buffer;
+ index_type dim;
+ bool packed;
+ index_type span;
+ index_type ssize, num_elems;
+ int cbit = 0;
+ int imoffset;
+ collsub_iface *ci;
+
+ ci = &local->ci;
+
+ dim = GFC_DESCRIPTOR_RANK (array);
+ ssize = sizeof (GFC_INTEGER_16);
+ packed = true;
+ span = array->span != 0 ? array->span : (index_type) sizeof (GFC_INTEGER_16);
+ for (index_type n = 0; n < dim; n++)
+ {
+ count[n] = 0;
+ stride[n] = GFC_DESCRIPTOR_STRIDE (array, n) * span;
+ extent[n] = GFC_DESCRIPTOR_EXTENT (array, n);
+
+ /* No-op for an empty array. */
+ if (extent[n] <= 0)
+ return;
+
+ if (ssize != stride[n])
+ packed = false;
+
+ ssize *= extent[n];
+ }
+
+ num_elems = ssize / sizeof (GFC_INTEGER_16);
+
+ buffer = get_collsub_buf (ci, ssize * local->num_images);
+ this_shared_ptr = buffer + this_image.image_num * num_elems;
+
+ if (packed)
+ memcpy (this_shared_ptr, array->base_addr, ssize);
+ else
+ {
+ char *src = (char *) array->base_addr;
+ GFC_INTEGER_16 *restrict dest = this_shared_ptr;
+ index_type stride0 = stride[0];
+
+ while (src)
+ {
+ /* Copy the data. */
+ *(dest++) = *((GFC_INTEGER_16 *) src);
+ src += stride0;
+ count[0] ++;
+ /* Advance to the next source element. */
+ for (index_type n = 0; count[n] == extent[n] ; )
+ {
+ /* When we get to the end of a dimension, reset it and increment
+ the next dimension. */
+ count[n] = 0;
+ src -= stride[n] * extent[n];
+ n++;
+ if (n == dim)
+ {
+ src = NULL;
+ break;
+ }
+ else
+ {
+ count[n]++;
+ src += stride[n];
+ }
+ }
+ }
+ }
+
+ collsub_sync (ci);
+
+ /* Reduce the array to image zero. Here the general scheme:
+
+ abababababab
+ a_b_a_b_a_b_
+ a___b___a___
+ a_______b___
+ r___________
+ */
+ for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++)
+ {
+ imoffset = 1 << cbit;
+ if (this_image.image_num + imoffset < local->num_images)
+ {
+ GFC_INTEGER_16 * other_shared_ptr; /* Points to the shared memory
+ allocated to another image. */
+ GFC_INTEGER_16 *a;
+ GFC_INTEGER_16 *b;
+
+ other_shared_ptr = this_shared_ptr + num_elems * imoffset;
+ for (index_type i = 0; i < num_elems; i++)
+ {
+ a = this_shared_ptr + i;
+ b = other_shared_ptr + i;
+ if (*b < *a)
+ *a = *b;
+ }
+ }
+ collsub_sync (ci);
+ }
+ for ( ; (local->num_images >> cbit) != 0; cbit++)
+ collsub_sync (ci);
+
+ if (!result_image || (*result_image - 1) == this_image.image_num)
+ {
+ if (packed)
+ memcpy (array->base_addr, buffer, ssize);
+ else
+ {
+ GFC_INTEGER_16 *src = buffer;
+ char * restrict dest = (char *) array->base_addr;
+ index_type stride0 = stride[0];
+
+ for (index_type n = 0; n < dim; n++)
+ count[n] = 0;
+
+ while (dest)
+ {
+ *((GFC_INTEGER_16 * ) dest) = *src++;
+ dest += stride0;
+ count[0] ++;
+ for (index_type n = 0; count[n] == extent[n] ;)
+ {
+ /* When we get to the end of a dimension, reset it and increment
+ the next dimension. */
+ count[n] = 0;
+ dest -= stride[n] * extent[n];
+ n++;
+ if (n == dim)
+ {
+ dest = NULL;
+ break;
+ }
+ else
+ {
+ count[n]++;
+ dest += stride[n];
+ }
+ }
+ }
+ }
+ }
+ finish_collective_subroutine (ci);
+}
+void nca_collsub_sum_array_i16 (gfc_array_i16 * restrict array, int *result_image,
+ int *stat, char *errmsg, index_type errmsg_len);
+export_proto (nca_collsub_sum_array_i16);
+
+void
+nca_collsub_sum_array_i16 (gfc_array_i16 * restrict array, int *result_image,
+ int *stat __attribute__ ((unused)),
+ char *errmsg __attribute__ ((unused)),
+ index_type errmsg_len __attribute__ ((unused)))
+{
+ index_type count[GFC_MAX_DIMENSIONS];
+ index_type stride[GFC_MAX_DIMENSIONS];
+ index_type extent[GFC_MAX_DIMENSIONS];
+ GFC_INTEGER_16 *this_shared_ptr; /* Points to the shared memory allocated to this image. */
+ GFC_INTEGER_16 *buffer;
+ index_type dim;
+ bool packed;
+ index_type span;
+ index_type ssize, num_elems;
+ int cbit = 0;
+ int imoffset;
+ collsub_iface *ci;
+
+ ci = &local->ci;
+
+ dim = GFC_DESCRIPTOR_RANK (array);
+ ssize = sizeof (GFC_INTEGER_16);
+ packed = true;
+ span = array->span != 0 ? array->span : (index_type) sizeof (GFC_INTEGER_16);
+ for (index_type n = 0; n < dim; n++)
+ {
+ count[n] = 0;
+ stride[n] = GFC_DESCRIPTOR_STRIDE (array, n) * span;
+ extent[n] = GFC_DESCRIPTOR_EXTENT (array, n);
+
+ /* No-op for an empty array. */
+ if (extent[n] <= 0)
+ return;
+
+ if (ssize != stride[n])
+ packed = false;
+
+ ssize *= extent[n];
+ }
+
+ num_elems = ssize / sizeof (GFC_INTEGER_16);
+
+ buffer = get_collsub_buf (ci, ssize * local->num_images);
+ this_shared_ptr = buffer + this_image.image_num * num_elems;
+
+ if (packed)
+ memcpy (this_shared_ptr, array->base_addr, ssize);
+ else
+ {
+ char *src = (char *) array->base_addr;
+ GFC_INTEGER_16 *restrict dest = this_shared_ptr;
+ index_type stride0 = stride[0];
+
+ while (src)
+ {
+ /* Copy the data. */
+ *(dest++) = *((GFC_INTEGER_16 *) src);
+ src += stride0;
+ count[0] ++;
+ /* Advance to the next source element. */
+ for (index_type n = 0; count[n] == extent[n] ; )
+ {
+ /* When we get to the end of a dimension, reset it and increment
+ the next dimension. */
+ count[n] = 0;
+ src -= stride[n] * extent[n];
+ n++;
+ if (n == dim)
+ {
+ src = NULL;
+ break;
+ }
+ else
+ {
+ count[n]++;
+ src += stride[n];
+ }
+ }
+ }
+ }
+
+ collsub_sync (ci);
+
+ /* Reduce the array to image zero. Here the general scheme:
+
+ abababababab
+ a_b_a_b_a_b_
+ a___b___a___
+ a_______b___
+ r___________
+ */
+ for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++)
+ {
+ imoffset = 1 << cbit;
+ if (this_image.image_num + imoffset < local->num_images)
+ {
+ GFC_INTEGER_16 * other_shared_ptr; /* Points to the shared memory
+ allocated to another image. */
+ GFC_INTEGER_16 *a;
+ GFC_INTEGER_16 *b;
+
+ other_shared_ptr = this_shared_ptr + num_elems * imoffset;
+ for (index_type i = 0; i < num_elems; i++)
+ {
+ a = this_shared_ptr + i;
+ b = other_shared_ptr + i;
+ *a += *b;
+ }
+ }
+ collsub_sync (ci);
+ }
+ for ( ; (local->num_images >> cbit) != 0; cbit++)
+ collsub_sync (ci);
+
+ if (!result_image || (*result_image - 1) == this_image.image_num)
+ {
+ if (packed)
+ memcpy (array->base_addr, buffer, ssize);
+ else
+ {
+ GFC_INTEGER_16 *src = buffer;
+ char * restrict dest = (char *) array->base_addr;
+ index_type stride0 = stride[0];
+
+ for (index_type n = 0; n < dim; n++)
+ count[n] = 0;
+
+ while (dest)
+ {
+ *((GFC_INTEGER_16 * ) dest) = *src++;
+ dest += stride0;
+ count[0] ++;
+ for (index_type n = 0; count[n] == extent[n] ;)
+ {
+ /* When we get to the end of a dimension, reset it and increment
+ the next dimension. */
+ count[n] = 0;
+ dest -= stride[n] * extent[n];
+ n++;
+ if (n == dim)
+ {
+ dest = NULL;
+ break;
+ }
+ else
+ {
+ count[n]++;
+ dest += stride[n];
+ }
+ }
+ }
+ }
+ }
+ finish_collective_subroutine (ci);
+}
+
+#endif
+
diff --git a/libgfortran/generated/nca_minmax_i2.c b/libgfortran/generated/nca_minmax_i2.c
new file mode 100644
index 0000000..61908d6
--- /dev/null
+++ b/libgfortran/generated/nca_minmax_i2.c
@@ -0,0 +1,653 @@
+/* Implementation of collective subroutines minmax.
+ Copyright (C) 2020 Free Software Foundation, Inc.
+ Contributed by Thomas Koenig <tkoenig@gcc.gnu.org>.
+
+This file is part of the GNU Fortran runtime library (libgfortran).
+
+Libgfortran is free software; you can redistribute it and/or
+modify it under the terms of the GNU General Public
+License as published by the Free Software Foundation; either
+version 3 of the License, or (at your option) any later version.
+
+Libgfortran is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+<http://www.gnu.org/licenses/>. */
+
+
+#include "libgfortran.h"
+
+#if defined (HAVE_GFC_INTEGER_2)
+#include <string.h>
+#include "../nca/libcoarraynative.h"
+#include "../nca/collective_subroutine.h"
+#include "../nca/collective_inline.h"
+
+void nca_collsub_max_scalar_i2 (GFC_INTEGER_2 *obj, int *result_image,
+ int *stat, char *errmsg, index_type errmsg_len);
+export_proto(nca_collsub_max_scalar_i2);
+
+void
+nca_collsub_max_scalar_i2 (GFC_INTEGER_2 *obj, int *result_image,
+ int *stat __attribute__ ((unused)),
+ char *errmsg __attribute__ ((unused)),
+ index_type errmsg_len __attribute__ ((unused)))
+{
+ int cbit = 0;
+ int imoffset;
+ GFC_INTEGER_2 *a, *b;
+ GFC_INTEGER_2 *buffer, *this_image_buf;
+ collsub_iface *ci;
+
+ ci = &local->ci;
+
+ buffer = get_collsub_buf (ci, sizeof(GFC_INTEGER_2) * local->num_images);
+ this_image_buf = buffer + this_image.image_num;
+ *this_image_buf = *obj;
+
+ collsub_sync (ci);
+ for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++)
+ {
+ imoffset = 1 << cbit;
+ if (this_image.image_num + imoffset < local->num_images)
+ {
+ a = this_image_buf;
+ b = this_image_buf + imoffset;
+ if (*b > *a)
+ *a = *b;
+ }
+ collsub_sync (ci);
+ }
+ for ( ; (local->num_images >> cbit) != 0; cbit++)
+ collsub_sync (ci);
+
+ if (!result_image || (*result_image - 1) == this_image.image_num)
+ *obj = *buffer;
+
+ finish_collective_subroutine (ci);
+
+}
+
+void nca_collsub_min_scalar_i2 (GFC_INTEGER_2 *obj, int *result_image,
+ int *stat, char *errmsg, index_type errmsg_len);
+export_proto(nca_collsub_min_scalar_i2);
+
+void
+nca_collsub_min_scalar_i2 (GFC_INTEGER_2 *obj, int *result_image,
+ int *stat __attribute__ ((unused)),
+ char *errmsg __attribute__ ((unused)),
+ index_type errmsg_len __attribute__ ((unused)))
+{
+ int cbit = 0;
+ int imoffset;
+ GFC_INTEGER_2 *a, *b;
+ GFC_INTEGER_2 *buffer, *this_image_buf;
+ collsub_iface *ci;
+
+ ci = &local->ci;
+
+ buffer = get_collsub_buf (ci, sizeof(GFC_INTEGER_2) * local->num_images);
+ this_image_buf = buffer + this_image.image_num;
+ *this_image_buf = *obj;
+
+ collsub_sync (ci);
+ for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++)
+ {
+ imoffset = 1 << cbit;
+ if (this_image.image_num + imoffset < local->num_images)
+ {
+ a = this_image_buf;
+ b = this_image_buf + imoffset;
+ if (*b < *a)
+ *a = *b;
+ }
+ collsub_sync (ci);
+ }
+ for ( ; (local->num_images >> cbit) != 0; cbit++)
+ collsub_sync (ci);
+
+ if (!result_image || (*result_image - 1) == this_image.image_num)
+ *obj = *buffer;
+
+ finish_collective_subroutine (ci);
+
+}
+
+void nca_collsub_sum_scalar_i2 (GFC_INTEGER_2 *obj, int *result_image,
+ int *stat, char *errmsg, index_type errmsg_len);
+export_proto(nca_collsub_sum_scalar_i2);
+
+void
+nca_collsub_sum_scalar_i2 (GFC_INTEGER_2 *obj, int *result_image,
+ int *stat __attribute__ ((unused)),
+ char *errmsg __attribute__ ((unused)),
+ index_type errmsg_len __attribute__ ((unused)))
+{
+ int cbit = 0;
+ int imoffset;
+ GFC_INTEGER_2 *a, *b;
+ GFC_INTEGER_2 *buffer, *this_image_buf;
+ collsub_iface *ci;
+
+ ci = &local->ci;
+
+ buffer = get_collsub_buf (ci, sizeof(GFC_INTEGER_2) * local->num_images);
+ this_image_buf = buffer + this_image.image_num;
+ *this_image_buf = *obj;
+
+ collsub_sync (ci);
+ for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++)
+ {
+ imoffset = 1 << cbit;
+ if (this_image.image_num + imoffset < local->num_images)
+ {
+ a = this_image_buf;
+ b = this_image_buf + imoffset;
+ *a += *b;
+ }
+ collsub_sync (ci);
+ }
+ for ( ; (local->num_images >> cbit) != 0; cbit++)
+ collsub_sync (ci);
+
+ if (!result_image || (*result_image - 1) == this_image.image_num)
+ *obj = *buffer;
+
+ finish_collective_subroutine (ci);
+
+}
+
+void nca_collsub_max_array_i2 (gfc_array_i2 * restrict array, int *result_image,
+ int *stat, char *errmsg, index_type errmsg_len);
+export_proto (nca_collsub_max_array_i2);
+
+void
+nca_collsub_max_array_i2 (gfc_array_i2 * restrict array, int *result_image,
+ int *stat __attribute__ ((unused)),
+ char *errmsg __attribute__ ((unused)),
+ index_type errmsg_len __attribute__ ((unused)))
+{
+ index_type count[GFC_MAX_DIMENSIONS];
+ index_type stride[GFC_MAX_DIMENSIONS];
+ index_type extent[GFC_MAX_DIMENSIONS];
+ GFC_INTEGER_2 *this_shared_ptr; /* Points to the shared memory allocated to this image. */
+ GFC_INTEGER_2 *buffer;
+ index_type dim;
+ bool packed;
+ index_type span;
+ index_type ssize, num_elems;
+ int cbit = 0;
+ int imoffset;
+ collsub_iface *ci;
+
+ ci = &local->ci;
+
+ dim = GFC_DESCRIPTOR_RANK (array);
+ ssize = sizeof (GFC_INTEGER_2);
+ packed = true;
+ span = array->span != 0 ? array->span : (index_type) sizeof (GFC_INTEGER_2);
+ for (index_type n = 0; n < dim; n++)
+ {
+ count[n] = 0;
+ stride[n] = GFC_DESCRIPTOR_STRIDE (array, n) * span;
+ extent[n] = GFC_DESCRIPTOR_EXTENT (array, n);
+
+ /* No-op for an empty array. */
+ if (extent[n] <= 0)
+ return;
+
+ if (ssize != stride[n])
+ packed = false;
+
+ ssize *= extent[n];
+ }
+
+ num_elems = ssize / sizeof (GFC_INTEGER_2);
+
+ buffer = get_collsub_buf (ci, ssize * local->num_images);
+ this_shared_ptr = buffer + this_image.image_num * num_elems;
+
+ if (packed)
+ memcpy (this_shared_ptr, array->base_addr, ssize);
+ else
+ {
+ char *src = (char *) array->base_addr;
+ GFC_INTEGER_2 *restrict dest = this_shared_ptr;
+ index_type stride0 = stride[0];
+
+ while (src)
+ {
+ /* Copy the data. */
+ *(dest++) = *((GFC_INTEGER_2 *) src);
+ src += stride0;
+ count[0] ++;
+ /* Advance to the next source element. */
+ for (index_type n = 0; count[n] == extent[n] ; )
+ {
+ /* When we get to the end of a dimension, reset it and increment
+ the next dimension. */
+ count[n] = 0;
+ src -= stride[n] * extent[n];
+ n++;
+ if (n == dim)
+ {
+ src = NULL;
+ break;
+ }
+ else
+ {
+ count[n]++;
+ src += stride[n];
+ }
+ }
+ }
+ }
+
+ collsub_sync (ci);
+
+ /* Reduce the array to image zero. Here the general scheme:
+
+ abababababab
+ a_b_a_b_a_b_
+ a___b___a___
+ a_______b___
+ r___________
+ */
+ for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++)
+ {
+ imoffset = 1 << cbit;
+ if (this_image.image_num + imoffset < local->num_images)
+ {
+ GFC_INTEGER_2 * other_shared_ptr; /* Points to the shared memory
+ allocated to another image. */
+ GFC_INTEGER_2 *a;
+ GFC_INTEGER_2 *b;
+
+ other_shared_ptr = this_shared_ptr + num_elems * imoffset;
+ for (index_type i = 0; i < num_elems; i++)
+ {
+ a = this_shared_ptr + i;
+ b = other_shared_ptr + i;
+ if (*b > *a)
+ *a = *b;
+ }
+ }
+ collsub_sync (ci);
+ }
+ for ( ; (local->num_images >> cbit) != 0; cbit++)
+ collsub_sync (ci);
+
+ if (!result_image || (*result_image - 1) == this_image.image_num)
+ {
+ if (packed)
+ memcpy (array->base_addr, buffer, ssize);
+ else
+ {
+ GFC_INTEGER_2 *src = buffer;
+ char * restrict dest = (char *) array->base_addr;
+ index_type stride0 = stride[0];
+
+ for (index_type n = 0; n < dim; n++)
+ count[n] = 0;
+
+ while (dest)
+ {
+ *((GFC_INTEGER_2 * ) dest) = *src++;
+ dest += stride0;
+ count[0] ++;
+ for (index_type n = 0; count[n] == extent[n] ;)
+ {
+ /* When we get to the end of a dimension, reset it and increment
+ the next dimension. */
+ count[n] = 0;
+ dest -= stride[n] * extent[n];
+ n++;
+ if (n == dim)
+ {
+ dest = NULL;
+ break;
+ }
+ else
+ {
+ count[n]++;
+ dest += stride[n];
+ }
+ }
+ }
+ }
+ }
+ finish_collective_subroutine (ci);
+}
+void nca_collsub_min_array_i2 (gfc_array_i2 * restrict array, int *result_image,
+ int *stat, char *errmsg, index_type errmsg_len);
+export_proto (nca_collsub_min_array_i2);
+
+void
+nca_collsub_min_array_i2 (gfc_array_i2 * restrict array, int *result_image,
+ int *stat __attribute__ ((unused)),
+ char *errmsg __attribute__ ((unused)),
+ index_type errmsg_len __attribute__ ((unused)))
+{
+ index_type count[GFC_MAX_DIMENSIONS];
+ index_type stride[GFC_MAX_DIMENSIONS];
+ index_type extent[GFC_MAX_DIMENSIONS];
+ GFC_INTEGER_2 *this_shared_ptr; /* Points to the shared memory allocated to this image. */
+ GFC_INTEGER_2 *buffer;
+ index_type dim;
+ bool packed;
+ index_type span;
+ index_type ssize, num_elems;
+ int cbit = 0;
+ int imoffset;
+ collsub_iface *ci;
+
+ ci = &local->ci;
+
+ dim = GFC_DESCRIPTOR_RANK (array);
+ ssize = sizeof (GFC_INTEGER_2);
+ packed = true;
+ span = array->span != 0 ? array->span : (index_type) sizeof (GFC_INTEGER_2);
+ for (index_type n = 0; n < dim; n++)
+ {
+ count[n] = 0;
+ stride[n] = GFC_DESCRIPTOR_STRIDE (array, n) * span;
+ extent[n] = GFC_DESCRIPTOR_EXTENT (array, n);
+
+ /* No-op for an empty array. */
+ if (extent[n] <= 0)
+ return;
+
+ if (ssize != stride[n])
+ packed = false;
+
+ ssize *= extent[n];
+ }
+
+ num_elems = ssize / sizeof (GFC_INTEGER_2);
+
+ buffer = get_collsub_buf (ci, ssize * local->num_images);
+ this_shared_ptr = buffer + this_image.image_num * num_elems;
+
+ if (packed)
+ memcpy (this_shared_ptr, array->base_addr, ssize);
+ else
+ {
+ char *src = (char *) array->base_addr;
+ GFC_INTEGER_2 *restrict dest = this_shared_ptr;
+ index_type stride0 = stride[0];
+
+ while (src)
+ {
+ /* Copy the data. */
+ *(dest++) = *((GFC_INTEGER_2 *) src);
+ src += stride0;
+ count[0] ++;
+ /* Advance to the next source element. */
+ for (index_type n = 0; count[n] == extent[n] ; )
+ {
+ /* When we get to the end of a dimension, reset it and increment
+ the next dimension. */
+ count[n] = 0;
+ src -= stride[n] * extent[n];
+ n++;
+ if (n == dim)
+ {
+ src = NULL;
+ break;
+ }
+ else
+ {
+ count[n]++;
+ src += stride[n];
+ }
+ }
+ }
+ }
+
+ collsub_sync (ci);
+
+ /* Reduce the array to image zero. Here the general scheme:
+
+ abababababab
+ a_b_a_b_a_b_
+ a___b___a___
+ a_______b___
+ r___________
+ */
+ for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++)
+ {
+ imoffset = 1 << cbit;
+ if (this_image.image_num + imoffset < local->num_images)
+ {
+ GFC_INTEGER_2 * other_shared_ptr; /* Points to the shared memory
+ allocated to another image. */
+ GFC_INTEGER_2 *a;
+ GFC_INTEGER_2 *b;
+
+ other_shared_ptr = this_shared_ptr + num_elems * imoffset;
+ for (index_type i = 0; i < num_elems; i++)
+ {
+ a = this_shared_ptr + i;
+ b = other_shared_ptr + i;
+ if (*b < *a)
+ *a = *b;
+ }
+ }
+ collsub_sync (ci);
+ }
+ for ( ; (local->num_images >> cbit) != 0; cbit++)
+ collsub_sync (ci);
+
+ if (!result_image || (*result_image - 1) == this_image.image_num)
+ {
+ if (packed)
+ memcpy (array->base_addr, buffer, ssize);
+ else
+ {
+ GFC_INTEGER_2 *src = buffer;
+ char * restrict dest = (char *) array->base_addr;
+ index_type stride0 = stride[0];
+
+ for (index_type n = 0; n < dim; n++)
+ count[n] = 0;
+
+ while (dest)
+ {
+ *((GFC_INTEGER_2 * ) dest) = *src++;
+ dest += stride0;
+ count[0] ++;
+ for (index_type n = 0; count[n] == extent[n] ;)
+ {
+ /* When we get to the end of a dimension, reset it and increment
+ the next dimension. */
+ count[n] = 0;
+ dest -= stride[n] * extent[n];
+ n++;
+ if (n == dim)
+ {
+ dest = NULL;
+ break;
+ }
+ else
+ {
+ count[n]++;
+ dest += stride[n];
+ }
+ }
+ }
+ }
+ }
+ finish_collective_subroutine (ci);
+}
+void nca_collsub_sum_array_i2 (gfc_array_i2 * restrict array, int *result_image,
+ int *stat, char *errmsg, index_type errmsg_len);
+export_proto (nca_collsub_sum_array_i2);
+
+void
+nca_collsub_sum_array_i2 (gfc_array_i2 * restrict array, int *result_image,
+ int *stat __attribute__ ((unused)),
+ char *errmsg __attribute__ ((unused)),
+ index_type errmsg_len __attribute__ ((unused)))
+{
+ index_type count[GFC_MAX_DIMENSIONS];
+ index_type stride[GFC_MAX_DIMENSIONS];
+ index_type extent[GFC_MAX_DIMENSIONS];
+ GFC_INTEGER_2 *this_shared_ptr; /* Points to the shared memory allocated to this image. */
+ GFC_INTEGER_2 *buffer;
+ index_type dim;
+ bool packed;
+ index_type span;
+ index_type ssize, num_elems;
+ int cbit = 0;
+ int imoffset;
+ collsub_iface *ci;
+
+ ci = &local->ci;
+
+ dim = GFC_DESCRIPTOR_RANK (array);
+ ssize = sizeof (GFC_INTEGER_2);
+ packed = true;
+ span = array->span != 0 ? array->span : (index_type) sizeof (GFC_INTEGER_2);
+ for (index_type n = 0; n < dim; n++)
+ {
+ count[n] = 0;
+ stride[n] = GFC_DESCRIPTOR_STRIDE (array, n) * span;
+ extent[n] = GFC_DESCRIPTOR_EXTENT (array, n);
+
+ /* No-op for an empty array. */
+ if (extent[n] <= 0)
+ return;
+
+ if (ssize != stride[n])
+ packed = false;
+
+ ssize *= extent[n];
+ }
+
+ num_elems = ssize / sizeof (GFC_INTEGER_2);
+
+ buffer = get_collsub_buf (ci, ssize * local->num_images);
+ this_shared_ptr = buffer + this_image.image_num * num_elems;
+
+ if (packed)
+ memcpy (this_shared_ptr, array->base_addr, ssize);
+ else
+ {
+ char *src = (char *) array->base_addr;
+ GFC_INTEGER_2 *restrict dest = this_shared_ptr;
+ index_type stride0 = stride[0];
+
+ while (src)
+ {
+ /* Copy the data. */
+ *(dest++) = *((GFC_INTEGER_2 *) src);
+ src += stride0;
+ count[0] ++;
+ /* Advance to the next source element. */
+ for (index_type n = 0; count[n] == extent[n] ; )
+ {
+ /* When we get to the end of a dimension, reset it and increment
+ the next dimension. */
+ count[n] = 0;
+ src -= stride[n] * extent[n];
+ n++;
+ if (n == dim)
+ {
+ src = NULL;
+ break;
+ }
+ else
+ {
+ count[n]++;
+ src += stride[n];
+ }
+ }
+ }
+ }
+
+ collsub_sync (ci);
+
+ /* Reduce the array to image zero. Here the general scheme:
+
+ abababababab
+ a_b_a_b_a_b_
+ a___b___a___
+ a_______b___
+ r___________
+ */
+ for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++)
+ {
+ imoffset = 1 << cbit;
+ if (this_image.image_num + imoffset < local->num_images)
+ {
+ GFC_INTEGER_2 * other_shared_ptr; /* Points to the shared memory
+ allocated to another image. */
+ GFC_INTEGER_2 *a;
+ GFC_INTEGER_2 *b;
+
+ other_shared_ptr = this_shared_ptr + num_elems * imoffset;
+ for (index_type i = 0; i < num_elems; i++)
+ {
+ a = this_shared_ptr + i;
+ b = other_shared_ptr + i;
+ *a += *b;
+ }
+ }
+ collsub_sync (ci);
+ }
+ for ( ; (local->num_images >> cbit) != 0; cbit++)
+ collsub_sync (ci);
+
+ if (!result_image || (*result_image - 1) == this_image.image_num)
+ {
+ if (packed)
+ memcpy (array->base_addr, buffer, ssize);
+ else
+ {
+ GFC_INTEGER_2 *src = buffer;
+ char * restrict dest = (char *) array->base_addr;
+ index_type stride0 = stride[0];
+
+ for (index_type n = 0; n < dim; n++)
+ count[n] = 0;
+
+ while (dest)
+ {
+ *((GFC_INTEGER_2 * ) dest) = *src++;
+ dest += stride0;
+ count[0] ++;
+ for (index_type n = 0; count[n] == extent[n] ;)
+ {
+ /* When we get to the end of a dimension, reset it and increment
+ the next dimension. */
+ count[n] = 0;
+ dest -= stride[n] * extent[n];
+ n++;
+ if (n == dim)
+ {
+ dest = NULL;
+ break;
+ }
+ else
+ {
+ count[n]++;
+ dest += stride[n];
+ }
+ }
+ }
+ }
+ }
+ finish_collective_subroutine (ci);
+}
+
+#endif
+
diff --git a/libgfortran/generated/nca_minmax_i4.c b/libgfortran/generated/nca_minmax_i4.c
new file mode 100644
index 0000000..5e37586
--- /dev/null
+++ b/libgfortran/generated/nca_minmax_i4.c
@@ -0,0 +1,653 @@
+/* Implementation of collective subroutines minmax.
+ Copyright (C) 2020 Free Software Foundation, Inc.
+ Contributed by Thomas Koenig <tkoenig@gcc.gnu.org>.
+
+This file is part of the GNU Fortran runtime library (libgfortran).
+
+Libgfortran is free software; you can redistribute it and/or
+modify it under the terms of the GNU General Public
+License as published by the Free Software Foundation; either
+version 3 of the License, or (at your option) any later version.
+
+Libgfortran is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+<http://www.gnu.org/licenses/>. */
+
+
+#include "libgfortran.h"
+
+#if defined (HAVE_GFC_INTEGER_4)
+#include <string.h>
+#include "../nca/libcoarraynative.h"
+#include "../nca/collective_subroutine.h"
+#include "../nca/collective_inline.h"
+
+void nca_collsub_max_scalar_i4 (GFC_INTEGER_4 *obj, int *result_image,
+ int *stat, char *errmsg, index_type errmsg_len);
+export_proto(nca_collsub_max_scalar_i4);
+
+void
+nca_collsub_max_scalar_i4 (GFC_INTEGER_4 *obj, int *result_image,
+ int *stat __attribute__ ((unused)),
+ char *errmsg __attribute__ ((unused)),
+ index_type errmsg_len __attribute__ ((unused)))
+{
+ int cbit = 0;
+ int imoffset;
+ GFC_INTEGER_4 *a, *b;
+ GFC_INTEGER_4 *buffer, *this_image_buf;
+ collsub_iface *ci;
+
+ ci = &local->ci;
+
+ buffer = get_collsub_buf (ci, sizeof(GFC_INTEGER_4) * local->num_images);
+ this_image_buf = buffer + this_image.image_num;
+ *this_image_buf = *obj;
+
+ collsub_sync (ci);
+ for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++)
+ {
+ imoffset = 1 << cbit;
+ if (this_image.image_num + imoffset < local->num_images)
+ {
+ a = this_image_buf;
+ b = this_image_buf + imoffset;
+ if (*b > *a)
+ *a = *b;
+ }
+ collsub_sync (ci);
+ }
+ for ( ; (local->num_images >> cbit) != 0; cbit++)
+ collsub_sync (ci);
+
+ if (!result_image || (*result_image - 1) == this_image.image_num)
+ *obj = *buffer;
+
+ finish_collective_subroutine (ci);
+
+}
+
+void nca_collsub_min_scalar_i4 (GFC_INTEGER_4 *obj, int *result_image,
+ int *stat, char *errmsg, index_type errmsg_len);
+export_proto(nca_collsub_min_scalar_i4);
+
+void
+nca_collsub_min_scalar_i4 (GFC_INTEGER_4 *obj, int *result_image,
+ int *stat __attribute__ ((unused)),
+ char *errmsg __attribute__ ((unused)),
+ index_type errmsg_len __attribute__ ((unused)))
+{
+ int cbit = 0;
+ int imoffset;
+ GFC_INTEGER_4 *a, *b;
+ GFC_INTEGER_4 *buffer, *this_image_buf;
+ collsub_iface *ci;
+
+ ci = &local->ci;
+
+ buffer = get_collsub_buf (ci, sizeof(GFC_INTEGER_4) * local->num_images);
+ this_image_buf = buffer + this_image.image_num;
+ *this_image_buf = *obj;
+
+ collsub_sync (ci);
+ for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++)
+ {
+ imoffset = 1 << cbit;
+ if (this_image.image_num + imoffset < local->num_images)
+ {
+ a = this_image_buf;
+ b = this_image_buf + imoffset;
+ if (*b < *a)
+ *a = *b;
+ }
+ collsub_sync (ci);
+ }
+ for ( ; (local->num_images >> cbit) != 0; cbit++)
+ collsub_sync (ci);
+
+ if (!result_image || (*result_image - 1) == this_image.image_num)
+ *obj = *buffer;
+
+ finish_collective_subroutine (ci);
+
+}
+
+void nca_collsub_sum_scalar_i4 (GFC_INTEGER_4 *obj, int *result_image,
+ int *stat, char *errmsg, index_type errmsg_len);
+export_proto(nca_collsub_sum_scalar_i4);
+
+void
+nca_collsub_sum_scalar_i4 (GFC_INTEGER_4 *obj, int *result_image,
+ int *stat __attribute__ ((unused)),
+ char *errmsg __attribute__ ((unused)),
+ index_type errmsg_len __attribute__ ((unused)))
+{
+ int cbit = 0;
+ int imoffset;
+ GFC_INTEGER_4 *a, *b;
+ GFC_INTEGER_4 *buffer, *this_image_buf;
+ collsub_iface *ci;
+
+ ci = &local->ci;
+
+ buffer = get_collsub_buf (ci, sizeof(GFC_INTEGER_4) * local->num_images);
+ this_image_buf = buffer + this_image.image_num;
+ *this_image_buf = *obj;
+
+ collsub_sync (ci);
+ for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++)
+ {
+ imoffset = 1 << cbit;
+ if (this_image.image_num + imoffset < local->num_images)
+ {
+ a = this_image_buf;
+ b = this_image_buf + imoffset;
+ *a += *b;
+ }
+ collsub_sync (ci);
+ }
+ for ( ; (local->num_images >> cbit) != 0; cbit++)
+ collsub_sync (ci);
+
+ if (!result_image || (*result_image - 1) == this_image.image_num)
+ *obj = *buffer;
+
+ finish_collective_subroutine (ci);
+
+}
+
+void nca_collsub_max_array_i4 (gfc_array_i4 * restrict array, int *result_image,
+ int *stat, char *errmsg, index_type errmsg_len);
+export_proto (nca_collsub_max_array_i4);
+
+void
+nca_collsub_max_array_i4 (gfc_array_i4 * restrict array, int *result_image,
+ int *stat __attribute__ ((unused)),
+ char *errmsg __attribute__ ((unused)),
+ index_type errmsg_len __attribute__ ((unused)))
+{
+ index_type count[GFC_MAX_DIMENSIONS];
+ index_type stride[GFC_MAX_DIMENSIONS];
+ index_type extent[GFC_MAX_DIMENSIONS];
+ GFC_INTEGER_4 *this_shared_ptr; /* Points to the shared memory allocated to this image. */
+ GFC_INTEGER_4 *buffer;
+ index_type dim;
+ bool packed;
+ index_type span;
+ index_type ssize, num_elems;
+ int cbit = 0;
+ int imoffset;
+ collsub_iface *ci;
+
+ ci = &local->ci;
+
+ dim = GFC_DESCRIPTOR_RANK (array);
+ ssize = sizeof (GFC_INTEGER_4);
+ packed = true;
+ span = array->span != 0 ? array->span : (index_type) sizeof (GFC_INTEGER_4);
+ for (index_type n = 0; n < dim; n++)
+ {
+ count[n] = 0;
+ stride[n] = GFC_DESCRIPTOR_STRIDE (array, n) * span;
+ extent[n] = GFC_DESCRIPTOR_EXTENT (array, n);
+
+ /* No-op for an empty array. */
+ if (extent[n] <= 0)
+ return;
+
+ if (ssize != stride[n])
+ packed = false;
+
+ ssize *= extent[n];
+ }
+
+ num_elems = ssize / sizeof (GFC_INTEGER_4);
+
+ buffer = get_collsub_buf (ci, ssize * local->num_images);
+ this_shared_ptr = buffer + this_image.image_num * num_elems;
+
+ if (packed)
+ memcpy (this_shared_ptr, array->base_addr, ssize);
+ else
+ {
+ char *src = (char *) array->base_addr;
+ GFC_INTEGER_4 *restrict dest = this_shared_ptr;
+ index_type stride0 = stride[0];
+
+ while (src)
+ {
+ /* Copy the data. */
+ *(dest++) = *((GFC_INTEGER_4 *) src);
+ src += stride0;
+ count[0] ++;
+ /* Advance to the next source element. */
+ for (index_type n = 0; count[n] == extent[n] ; )
+ {
+ /* When we get to the end of a dimension, reset it and increment
+ the next dimension. */
+ count[n] = 0;
+ src -= stride[n] * extent[n];
+ n++;
+ if (n == dim)
+ {
+ src = NULL;
+ break;
+ }
+ else
+ {
+ count[n]++;
+ src += stride[n];
+ }
+ }
+ }
+ }
+
+ collsub_sync (ci);
+
+ /* Reduce the array to image zero. Here the general scheme:
+
+ abababababab
+ a_b_a_b_a_b_
+ a___b___a___
+ a_______b___
+ r___________
+ */
+ for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++)
+ {
+ imoffset = 1 << cbit;
+ if (this_image.image_num + imoffset < local->num_images)
+ {
+ GFC_INTEGER_4 * other_shared_ptr; /* Points to the shared memory
+ allocated to another image. */
+ GFC_INTEGER_4 *a;
+ GFC_INTEGER_4 *b;
+
+ other_shared_ptr = this_shared_ptr + num_elems * imoffset;
+ for (index_type i = 0; i < num_elems; i++)
+ {
+ a = this_shared_ptr + i;
+ b = other_shared_ptr + i;
+ if (*b > *a)
+ *a = *b;
+ }
+ }
+ collsub_sync (ci);
+ }
+ for ( ; (local->num_images >> cbit) != 0; cbit++)
+ collsub_sync (ci);
+
+ if (!result_image || (*result_image - 1) == this_image.image_num)
+ {
+ if (packed)
+ memcpy (array->base_addr, buffer, ssize);
+ else
+ {
+ GFC_INTEGER_4 *src = buffer;
+ char * restrict dest = (char *) array->base_addr;
+ index_type stride0 = stride[0];
+
+ for (index_type n = 0; n < dim; n++)
+ count[n] = 0;
+
+ while (dest)
+ {
+ *((GFC_INTEGER_4 * ) dest) = *src++;
+ dest += stride0;
+ count[0] ++;
+ for (index_type n = 0; count[n] == extent[n] ;)
+ {
+ /* When we get to the end of a dimension, reset it and increment
+ the next dimension. */
+ count[n] = 0;
+ dest -= stride[n] * extent[n];
+ n++;
+ if (n == dim)
+ {
+ dest = NULL;
+ break;
+ }
+ else
+ {
+ count[n]++;
+ dest += stride[n];
+ }
+ }
+ }
+ }
+ }
+ finish_collective_subroutine (ci);
+}
+void nca_collsub_min_array_i4 (gfc_array_i4 * restrict array, int *result_image,
+ int *stat, char *errmsg, index_type errmsg_len);
+export_proto (nca_collsub_min_array_i4);
+
+void
+nca_collsub_min_array_i4 (gfc_array_i4 * restrict array, int *result_image,
+ int *stat __attribute__ ((unused)),
+ char *errmsg __attribute__ ((unused)),
+ index_type errmsg_len __attribute__ ((unused)))
+{
+ index_type count[GFC_MAX_DIMENSIONS];
+ index_type stride[GFC_MAX_DIMENSIONS];
+ index_type extent[GFC_MAX_DIMENSIONS];
+ GFC_INTEGER_4 *this_shared_ptr; /* Points to the shared memory allocated to this image. */
+ GFC_INTEGER_4 *buffer;
+ index_type dim;
+ bool packed;
+ index_type span;
+ index_type ssize, num_elems;
+ int cbit = 0;
+ int imoffset;
+ collsub_iface *ci;
+
+ ci = &local->ci;
+
+ dim = GFC_DESCRIPTOR_RANK (array);
+ ssize = sizeof (GFC_INTEGER_4);
+ packed = true;
+ span = array->span != 0 ? array->span : (index_type) sizeof (GFC_INTEGER_4);
+ for (index_type n = 0; n < dim; n++)
+ {
+ count[n] = 0;
+ stride[n] = GFC_DESCRIPTOR_STRIDE (array, n) * span;
+ extent[n] = GFC_DESCRIPTOR_EXTENT (array, n);
+
+ /* No-op for an empty array. */
+ if (extent[n] <= 0)
+ return;
+
+ if (ssize != stride[n])
+ packed = false;
+
+ ssize *= extent[n];
+ }
+
+ num_elems = ssize / sizeof (GFC_INTEGER_4);
+
+ buffer = get_collsub_buf (ci, ssize * local->num_images);
+ this_shared_ptr = buffer + this_image.image_num * num_elems;
+
+ if (packed)
+ memcpy (this_shared_ptr, array->base_addr, ssize);
+ else
+ {
+ char *src = (char *) array->base_addr;
+ GFC_INTEGER_4 *restrict dest = this_shared_ptr;
+ index_type stride0 = stride[0];
+
+ while (src)
+ {
+ /* Copy the data. */
+ *(dest++) = *((GFC_INTEGER_4 *) src);
+ src += stride0;
+ count[0] ++;
+ /* Advance to the next source element. */
+ for (index_type n = 0; count[n] == extent[n] ; )
+ {
+ /* When we get to the end of a dimension, reset it and increment
+ the next dimension. */
+ count[n] = 0;
+ src -= stride[n] * extent[n];
+ n++;
+ if (n == dim)
+ {
+ src = NULL;
+ break;
+ }
+ else
+ {
+ count[n]++;
+ src += stride[n];
+ }
+ }
+ }
+ }
+
+ collsub_sync (ci);
+
+ /* Reduce the array to image zero. Here the general scheme:
+
+ abababababab
+ a_b_a_b_a_b_
+ a___b___a___
+ a_______b___
+ r___________
+ */
+ for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++)
+ {
+ imoffset = 1 << cbit;
+ if (this_image.image_num + imoffset < local->num_images)
+ {
+ GFC_INTEGER_4 * other_shared_ptr; /* Points to the shared memory
+ allocated to another image. */
+ GFC_INTEGER_4 *a;
+ GFC_INTEGER_4 *b;
+
+ other_shared_ptr = this_shared_ptr + num_elems * imoffset;
+ for (index_type i = 0; i < num_elems; i++)
+ {
+ a = this_shared_ptr + i;
+ b = other_shared_ptr + i;
+ if (*b < *a)
+ *a = *b;
+ }
+ }
+ collsub_sync (ci);
+ }
+ for ( ; (local->num_images >> cbit) != 0; cbit++)
+ collsub_sync (ci);
+
+ if (!result_image || (*result_image - 1) == this_image.image_num)
+ {
+ if (packed)
+ memcpy (array->base_addr, buffer, ssize);
+ else
+ {
+ GFC_INTEGER_4 *src = buffer;
+ char * restrict dest = (char *) array->base_addr;
+ index_type stride0 = stride[0];
+
+ for (index_type n = 0; n < dim; n++)
+ count[n] = 0;
+
+ while (dest)
+ {
+ *((GFC_INTEGER_4 * ) dest) = *src++;
+ dest += stride0;
+ count[0] ++;
+ for (index_type n = 0; count[n] == extent[n] ;)
+ {
+ /* When we get to the end of a dimension, reset it and increment
+ the next dimension. */
+ count[n] = 0;
+ dest -= stride[n] * extent[n];
+ n++;
+ if (n == dim)
+ {
+ dest = NULL;
+ break;
+ }
+ else
+ {
+ count[n]++;
+ dest += stride[n];
+ }
+ }
+ }
+ }
+ }
+ finish_collective_subroutine (ci);
+}
+void nca_collsub_sum_array_i4 (gfc_array_i4 * restrict array, int *result_image,
+ int *stat, char *errmsg, index_type errmsg_len);
+export_proto (nca_collsub_sum_array_i4);
+
+void
+nca_collsub_sum_array_i4 (gfc_array_i4 * restrict array, int *result_image,
+ int *stat __attribute__ ((unused)),
+ char *errmsg __attribute__ ((unused)),
+ index_type errmsg_len __attribute__ ((unused)))
+{
+ index_type count[GFC_MAX_DIMENSIONS];
+ index_type stride[GFC_MAX_DIMENSIONS];
+ index_type extent[GFC_MAX_DIMENSIONS];
+ GFC_INTEGER_4 *this_shared_ptr; /* Points to the shared memory allocated to this image. */
+ GFC_INTEGER_4 *buffer;
+ index_type dim;
+ bool packed;
+ index_type span;
+ index_type ssize, num_elems;
+ int cbit = 0;
+ int imoffset;
+ collsub_iface *ci;
+
+ ci = &local->ci;
+
+ dim = GFC_DESCRIPTOR_RANK (array);
+ ssize = sizeof (GFC_INTEGER_4);
+ packed = true;
+ span = array->span != 0 ? array->span : (index_type) sizeof (GFC_INTEGER_4);
+ for (index_type n = 0; n < dim; n++)
+ {
+ count[n] = 0;
+ stride[n] = GFC_DESCRIPTOR_STRIDE (array, n) * span;
+ extent[n] = GFC_DESCRIPTOR_EXTENT (array, n);
+
+ /* No-op for an empty array. */
+ if (extent[n] <= 0)
+ return;
+
+ if (ssize != stride[n])
+ packed = false;
+
+ ssize *= extent[n];
+ }
+
+ num_elems = ssize / sizeof (GFC_INTEGER_4);
+
+ buffer = get_collsub_buf (ci, ssize * local->num_images);
+ this_shared_ptr = buffer + this_image.image_num * num_elems;
+
+ if (packed)
+ memcpy (this_shared_ptr, array->base_addr, ssize);
+ else
+ {
+ char *src = (char *) array->base_addr;
+ GFC_INTEGER_4 *restrict dest = this_shared_ptr;
+ index_type stride0 = stride[0];
+
+ while (src)
+ {
+ /* Copy the data. */
+ *(dest++) = *((GFC_INTEGER_4 *) src);
+ src += stride0;
+ count[0] ++;
+ /* Advance to the next source element. */
+ for (index_type n = 0; count[n] == extent[n] ; )
+ {
+ /* When we get to the end of a dimension, reset it and increment
+ the next dimension. */
+ count[n] = 0;
+ src -= stride[n] * extent[n];
+ n++;
+ if (n == dim)
+ {
+ src = NULL;
+ break;
+ }
+ else
+ {
+ count[n]++;
+ src += stride[n];
+ }
+ }
+ }
+ }
+
+ collsub_sync (ci);
+
+ /* Reduce the array to image zero. Here the general scheme:
+
+ abababababab
+ a_b_a_b_a_b_
+ a___b___a___
+ a_______b___
+ r___________
+ */
+ for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++)
+ {
+ imoffset = 1 << cbit;
+ if (this_image.image_num + imoffset < local->num_images)
+ {
+ GFC_INTEGER_4 * other_shared_ptr; /* Points to the shared memory
+ allocated to another image. */
+ GFC_INTEGER_4 *a;
+ GFC_INTEGER_4 *b;
+
+ other_shared_ptr = this_shared_ptr + num_elems * imoffset;
+ for (index_type i = 0; i < num_elems; i++)
+ {
+ a = this_shared_ptr + i;
+ b = other_shared_ptr + i;
+ *a += *b;
+ }
+ }
+ collsub_sync (ci);
+ }
+ for ( ; (local->num_images >> cbit) != 0; cbit++)
+ collsub_sync (ci);
+
+ if (!result_image || (*result_image - 1) == this_image.image_num)
+ {
+ if (packed)
+ memcpy (array->base_addr, buffer, ssize);
+ else
+ {
+ GFC_INTEGER_4 *src = buffer;
+ char * restrict dest = (char *) array->base_addr;
+ index_type stride0 = stride[0];
+
+ for (index_type n = 0; n < dim; n++)
+ count[n] = 0;
+
+ while (dest)
+ {
+ *((GFC_INTEGER_4 * ) dest) = *src++;
+ dest += stride0;
+ count[0] ++;
+ for (index_type n = 0; count[n] == extent[n] ;)
+ {
+ /* When we get to the end of a dimension, reset it and increment
+ the next dimension. */
+ count[n] = 0;
+ dest -= stride[n] * extent[n];
+ n++;
+ if (n == dim)
+ {
+ dest = NULL;
+ break;
+ }
+ else
+ {
+ count[n]++;
+ dest += stride[n];
+ }
+ }
+ }
+ }
+ }
+ finish_collective_subroutine (ci);
+}
+
+#endif
+
diff --git a/libgfortran/generated/nca_minmax_i8.c b/libgfortran/generated/nca_minmax_i8.c
new file mode 100644
index 0000000..b3dc861
--- /dev/null
+++ b/libgfortran/generated/nca_minmax_i8.c
@@ -0,0 +1,653 @@
+/* Implementation of collective subroutines minmax.
+ Copyright (C) 2020 Free Software Foundation, Inc.
+ Contributed by Thomas Koenig <tkoenig@gcc.gnu.org>.
+
+This file is part of the GNU Fortran runtime library (libgfortran).
+
+Libgfortran is free software; you can redistribute it and/or
+modify it under the terms of the GNU General Public
+License as published by the Free Software Foundation; either
+version 3 of the License, or (at your option) any later version.
+
+Libgfortran is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+<http://www.gnu.org/licenses/>. */
+
+
+#include "libgfortran.h"
+
+#if defined (HAVE_GFC_INTEGER_8)
+#include <string.h>
+#include "../nca/libcoarraynative.h"
+#include "../nca/collective_subroutine.h"
+#include "../nca/collective_inline.h"
+
+void nca_collsub_max_scalar_i8 (GFC_INTEGER_8 *obj, int *result_image,
+ int *stat, char *errmsg, index_type errmsg_len);
+export_proto(nca_collsub_max_scalar_i8);
+
+void
+nca_collsub_max_scalar_i8 (GFC_INTEGER_8 *obj, int *result_image,
+ int *stat __attribute__ ((unused)),
+ char *errmsg __attribute__ ((unused)),
+ index_type errmsg_len __attribute__ ((unused)))
+{
+ int cbit = 0;
+ int imoffset;
+ GFC_INTEGER_8 *a, *b;
+ GFC_INTEGER_8 *buffer, *this_image_buf;
+ collsub_iface *ci;
+
+ ci = &local->ci;
+
+ buffer = get_collsub_buf (ci, sizeof(GFC_INTEGER_8) * local->num_images);
+ this_image_buf = buffer + this_image.image_num;
+ *this_image_buf = *obj;
+
+ collsub_sync (ci);
+ for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++)
+ {
+ imoffset = 1 << cbit;
+ if (this_image.image_num + imoffset < local->num_images)
+ {
+ a = this_image_buf;
+ b = this_image_buf + imoffset;
+ if (*b > *a)
+ *a = *b;
+ }
+ collsub_sync (ci);
+ }
+ for ( ; (local->num_images >> cbit) != 0; cbit++)
+ collsub_sync (ci);
+
+ if (!result_image || (*result_image - 1) == this_image.image_num)
+ *obj = *buffer;
+
+ finish_collective_subroutine (ci);
+
+}
+
+void nca_collsub_min_scalar_i8 (GFC_INTEGER_8 *obj, int *result_image,
+ int *stat, char *errmsg, index_type errmsg_len);
+export_proto(nca_collsub_min_scalar_i8);
+
+void
+nca_collsub_min_scalar_i8 (GFC_INTEGER_8 *obj, int *result_image,
+ int *stat __attribute__ ((unused)),
+ char *errmsg __attribute__ ((unused)),
+ index_type errmsg_len __attribute__ ((unused)))
+{
+ int cbit = 0;
+ int imoffset;
+ GFC_INTEGER_8 *a, *b;
+ GFC_INTEGER_8 *buffer, *this_image_buf;
+ collsub_iface *ci;
+
+ ci = &local->ci;
+
+ buffer = get_collsub_buf (ci, sizeof(GFC_INTEGER_8) * local->num_images);
+ this_image_buf = buffer + this_image.image_num;
+ *this_image_buf = *obj;
+
+ collsub_sync (ci);
+ for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++)
+ {
+ imoffset = 1 << cbit;
+ if (this_image.image_num + imoffset < local->num_images)
+ {
+ a = this_image_buf;
+ b = this_image_buf + imoffset;
+ if (*b < *a)
+ *a = *b;
+ }
+ collsub_sync (ci);
+ }
+ for ( ; (local->num_images >> cbit) != 0; cbit++)
+ collsub_sync (ci);
+
+ if (!result_image || (*result_image - 1) == this_image.image_num)
+ *obj = *buffer;
+
+ finish_collective_subroutine (ci);
+
+}
+
+void nca_collsub_sum_scalar_i8 (GFC_INTEGER_8 *obj, int *result_image,
+ int *stat, char *errmsg, index_type errmsg_len);
+export_proto(nca_collsub_sum_scalar_i8);
+
+void
+nca_collsub_sum_scalar_i8 (GFC_INTEGER_8 *obj, int *result_image,
+ int *stat __attribute__ ((unused)),
+ char *errmsg __attribute__ ((unused)),
+ index_type errmsg_len __attribute__ ((unused)))
+{
+ int cbit = 0;
+ int imoffset;
+ GFC_INTEGER_8 *a, *b;
+ GFC_INTEGER_8 *buffer, *this_image_buf;
+ collsub_iface *ci;
+
+ ci = &local->ci;
+
+ buffer = get_collsub_buf (ci, sizeof(GFC_INTEGER_8) * local->num_images);
+ this_image_buf = buffer + this_image.image_num;
+ *this_image_buf = *obj;
+
+ collsub_sync (ci);
+ for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++)
+ {
+ imoffset = 1 << cbit;
+ if (this_image.image_num + imoffset < local->num_images)
+ {
+ a = this_image_buf;
+ b = this_image_buf + imoffset;
+ *a += *b;
+ }
+ collsub_sync (ci);
+ }
+ for ( ; (local->num_images >> cbit) != 0; cbit++)
+ collsub_sync (ci);
+
+ if (!result_image || (*result_image - 1) == this_image.image_num)
+ *obj = *buffer;
+
+ finish_collective_subroutine (ci);
+
+}
+
+void nca_collsub_max_array_i8 (gfc_array_i8 * restrict array, int *result_image,
+ int *stat, char *errmsg, index_type errmsg_len);
+export_proto (nca_collsub_max_array_i8);
+
+void
+nca_collsub_max_array_i8 (gfc_array_i8 * restrict array, int *result_image,
+ int *stat __attribute__ ((unused)),
+ char *errmsg __attribute__ ((unused)),
+ index_type errmsg_len __attribute__ ((unused)))
+{
+ index_type count[GFC_MAX_DIMENSIONS];
+ index_type stride[GFC_MAX_DIMENSIONS];
+ index_type extent[GFC_MAX_DIMENSIONS];
+ GFC_INTEGER_8 *this_shared_ptr; /* Points to the shared memory allocated to this image. */
+ GFC_INTEGER_8 *buffer;
+ index_type dim;
+ bool packed;
+ index_type span;
+ index_type ssize, num_elems;
+ int cbit = 0;
+ int imoffset;
+ collsub_iface *ci;
+
+ ci = &local->ci;
+
+ dim = GFC_DESCRIPTOR_RANK (array);
+ ssize = sizeof (GFC_INTEGER_8);
+ packed = true;
+ span = array->span != 0 ? array->span : (index_type) sizeof (GFC_INTEGER_8);
+ for (index_type n = 0; n < dim; n++)
+ {
+ count[n] = 0;
+ stride[n] = GFC_DESCRIPTOR_STRIDE (array, n) * span;
+ extent[n] = GFC_DESCRIPTOR_EXTENT (array, n);
+
+ /* No-op for an empty array. */
+ if (extent[n] <= 0)
+ return;
+
+ if (ssize != stride[n])
+ packed = false;
+
+ ssize *= extent[n];
+ }
+
+ num_elems = ssize / sizeof (GFC_INTEGER_8);
+
+ buffer = get_collsub_buf (ci, ssize * local->num_images);
+ this_shared_ptr = buffer + this_image.image_num * num_elems;
+
+ if (packed)
+ memcpy (this_shared_ptr, array->base_addr, ssize);
+ else
+ {
+ char *src = (char *) array->base_addr;
+ GFC_INTEGER_8 *restrict dest = this_shared_ptr;
+ index_type stride0 = stride[0];
+
+ while (src)
+ {
+ /* Copy the data. */
+ *(dest++) = *((GFC_INTEGER_8 *) src);
+ src += stride0;
+ count[0] ++;
+ /* Advance to the next source element. */
+ for (index_type n = 0; count[n] == extent[n] ; )
+ {
+ /* When we get to the end of a dimension, reset it and increment
+ the next dimension. */
+ count[n] = 0;
+ src -= stride[n] * extent[n];
+ n++;
+ if (n == dim)
+ {
+ src = NULL;
+ break;
+ }
+ else
+ {
+ count[n]++;
+ src += stride[n];
+ }
+ }
+ }
+ }
+
+ collsub_sync (ci);
+
+ /* Reduce the array to image zero. Here the general scheme:
+
+ abababababab
+ a_b_a_b_a_b_
+ a___b___a___
+ a_______b___
+ r___________
+ */
+ for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++)
+ {
+ imoffset = 1 << cbit;
+ if (this_image.image_num + imoffset < local->num_images)
+ {
+ GFC_INTEGER_8 * other_shared_ptr; /* Points to the shared memory
+ allocated to another image. */
+ GFC_INTEGER_8 *a;
+ GFC_INTEGER_8 *b;
+
+ other_shared_ptr = this_shared_ptr + num_elems * imoffset;
+ for (index_type i = 0; i < num_elems; i++)
+ {
+ a = this_shared_ptr + i;
+ b = other_shared_ptr + i;
+ if (*b > *a)
+ *a = *b;
+ }
+ }
+ collsub_sync (ci);
+ }
+ for ( ; (local->num_images >> cbit) != 0; cbit++)
+ collsub_sync (ci);
+
+ if (!result_image || (*result_image - 1) == this_image.image_num)
+ {
+ if (packed)
+ memcpy (array->base_addr, buffer, ssize);
+ else
+ {
+ GFC_INTEGER_8 *src = buffer;
+ char * restrict dest = (char *) array->base_addr;
+ index_type stride0 = stride[0];
+
+ for (index_type n = 0; n < dim; n++)
+ count[n] = 0;
+
+ while (dest)
+ {
+ *((GFC_INTEGER_8 * ) dest) = *src++;
+ dest += stride0;
+ count[0] ++;
+ for (index_type n = 0; count[n] == extent[n] ;)
+ {
+ /* When we get to the end of a dimension, reset it and increment
+ the next dimension. */
+ count[n] = 0;
+ dest -= stride[n] * extent[n];
+ n++;
+ if (n == dim)
+ {
+ dest = NULL;
+ break;
+ }
+ else
+ {
+ count[n]++;
+ dest += stride[n];
+ }
+ }
+ }
+ }
+ }
+ finish_collective_subroutine (ci);
+}
+void nca_collsub_min_array_i8 (gfc_array_i8 * restrict array, int *result_image,
+ int *stat, char *errmsg, index_type errmsg_len);
+export_proto (nca_collsub_min_array_i8);
+
+void
+nca_collsub_min_array_i8 (gfc_array_i8 * restrict array, int *result_image,
+ int *stat __attribute__ ((unused)),
+ char *errmsg __attribute__ ((unused)),
+ index_type errmsg_len __attribute__ ((unused)))
+{
+ index_type count[GFC_MAX_DIMENSIONS];
+ index_type stride[GFC_MAX_DIMENSIONS];
+ index_type extent[GFC_MAX_DIMENSIONS];
+ GFC_INTEGER_8 *this_shared_ptr; /* Points to the shared memory allocated to this image. */
+ GFC_INTEGER_8 *buffer;
+ index_type dim;
+ bool packed;
+ index_type span;
+ index_type ssize, num_elems;
+ int cbit = 0;
+ int imoffset;
+ collsub_iface *ci;
+
+ ci = &local->ci;
+
+ dim = GFC_DESCRIPTOR_RANK (array);
+ ssize = sizeof (GFC_INTEGER_8);
+ packed = true;
+ span = array->span != 0 ? array->span : (index_type) sizeof (GFC_INTEGER_8);
+ for (index_type n = 0; n < dim; n++)
+ {
+ count[n] = 0;
+ stride[n] = GFC_DESCRIPTOR_STRIDE (array, n) * span;
+ extent[n] = GFC_DESCRIPTOR_EXTENT (array, n);
+
+ /* No-op for an empty array. */
+ if (extent[n] <= 0)
+ return;
+
+ if (ssize != stride[n])
+ packed = false;
+
+ ssize *= extent[n];
+ }
+
+ num_elems = ssize / sizeof (GFC_INTEGER_8);
+
+ buffer = get_collsub_buf (ci, ssize * local->num_images);
+ this_shared_ptr = buffer + this_image.image_num * num_elems;
+
+ if (packed)
+ memcpy (this_shared_ptr, array->base_addr, ssize);
+ else
+ {
+ char *src = (char *) array->base_addr;
+ GFC_INTEGER_8 *restrict dest = this_shared_ptr;
+ index_type stride0 = stride[0];
+
+ while (src)
+ {
+ /* Copy the data. */
+ *(dest++) = *((GFC_INTEGER_8 *) src);
+ src += stride0;
+ count[0] ++;
+ /* Advance to the next source element. */
+ for (index_type n = 0; count[n] == extent[n] ; )
+ {
+ /* When we get to the end of a dimension, reset it and increment
+ the next dimension. */
+ count[n] = 0;
+ src -= stride[n] * extent[n];
+ n++;
+ if (n == dim)
+ {
+ src = NULL;
+ break;
+ }
+ else
+ {
+ count[n]++;
+ src += stride[n];
+ }
+ }
+ }
+ }
+
+ collsub_sync (ci);
+
+ /* Reduce the array to image zero. Here the general scheme:
+
+ abababababab
+ a_b_a_b_a_b_
+ a___b___a___
+ a_______b___
+ r___________
+ */
+ for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++)
+ {
+ imoffset = 1 << cbit;
+ if (this_image.image_num + imoffset < local->num_images)
+ {
+ GFC_INTEGER_8 * other_shared_ptr; /* Points to the shared memory
+ allocated to another image. */
+ GFC_INTEGER_8 *a;
+ GFC_INTEGER_8 *b;
+
+ other_shared_ptr = this_shared_ptr + num_elems * imoffset;
+ for (index_type i = 0; i < num_elems; i++)
+ {
+ a = this_shared_ptr + i;
+ b = other_shared_ptr + i;
+ if (*b < *a)
+ *a = *b;
+ }
+ }
+ collsub_sync (ci);
+ }
+ for ( ; (local->num_images >> cbit) != 0; cbit++)
+ collsub_sync (ci);
+
+ if (!result_image || (*result_image - 1) == this_image.image_num)
+ {
+ if (packed)
+ memcpy (array->base_addr, buffer, ssize);
+ else
+ {
+ GFC_INTEGER_8 *src = buffer;
+ char * restrict dest = (char *) array->base_addr;
+ index_type stride0 = stride[0];
+
+ for (index_type n = 0; n < dim; n++)
+ count[n] = 0;
+
+ while (dest)
+ {
+ *((GFC_INTEGER_8 * ) dest) = *src++;
+ dest += stride0;
+ count[0] ++;
+ for (index_type n = 0; count[n] == extent[n] ;)
+ {
+ /* When we get to the end of a dimension, reset it and increment
+ the next dimension. */
+ count[n] = 0;
+ dest -= stride[n] * extent[n];
+ n++;
+ if (n == dim)
+ {
+ dest = NULL;
+ break;
+ }
+ else
+ {
+ count[n]++;
+ dest += stride[n];
+ }
+ }
+ }
+ }
+ }
+ finish_collective_subroutine (ci);
+}
+void nca_collsub_sum_array_i8 (gfc_array_i8 * restrict array, int *result_image,
+ int *stat, char *errmsg, index_type errmsg_len);
+export_proto (nca_collsub_sum_array_i8);
+
+void
+nca_collsub_sum_array_i8 (gfc_array_i8 * restrict array, int *result_image,
+ int *stat __attribute__ ((unused)),
+ char *errmsg __attribute__ ((unused)),
+ index_type errmsg_len __attribute__ ((unused)))
+{
+ index_type count[GFC_MAX_DIMENSIONS];
+ index_type stride[GFC_MAX_DIMENSIONS];
+ index_type extent[GFC_MAX_DIMENSIONS];
+ GFC_INTEGER_8 *this_shared_ptr; /* Points to the shared memory allocated to this image. */
+ GFC_INTEGER_8 *buffer;
+ index_type dim;
+ bool packed;
+ index_type span;
+ index_type ssize, num_elems;
+ int cbit = 0;
+ int imoffset;
+ collsub_iface *ci;
+
+ ci = &local->ci;
+
+ dim = GFC_DESCRIPTOR_RANK (array);
+ ssize = sizeof (GFC_INTEGER_8);
+ packed = true;
+ span = array->span != 0 ? array->span : (index_type) sizeof (GFC_INTEGER_8);
+ for (index_type n = 0; n < dim; n++)
+ {
+ count[n] = 0;
+ stride[n] = GFC_DESCRIPTOR_STRIDE (array, n) * span;
+ extent[n] = GFC_DESCRIPTOR_EXTENT (array, n);
+
+ /* No-op for an empty array. */
+ if (extent[n] <= 0)
+ return;
+
+ if (ssize != stride[n])
+ packed = false;
+
+ ssize *= extent[n];
+ }
+
+ num_elems = ssize / sizeof (GFC_INTEGER_8);
+
+ buffer = get_collsub_buf (ci, ssize * local->num_images);
+ this_shared_ptr = buffer + this_image.image_num * num_elems;
+
+ if (packed)
+ memcpy (this_shared_ptr, array->base_addr, ssize);
+ else
+ {
+ char *src = (char *) array->base_addr;
+ GFC_INTEGER_8 *restrict dest = this_shared_ptr;
+ index_type stride0 = stride[0];
+
+ while (src)
+ {
+ /* Copy the data. */
+ *(dest++) = *((GFC_INTEGER_8 *) src);
+ src += stride0;
+ count[0] ++;
+ /* Advance to the next source element. */
+ for (index_type n = 0; count[n] == extent[n] ; )
+ {
+ /* When we get to the end of a dimension, reset it and increment
+ the next dimension. */
+ count[n] = 0;
+ src -= stride[n] * extent[n];
+ n++;
+ if (n == dim)
+ {
+ src = NULL;
+ break;
+ }
+ else
+ {
+ count[n]++;
+ src += stride[n];
+ }
+ }
+ }
+ }
+
+ collsub_sync (ci);
+
+ /* Reduce the array to image zero. Here the general scheme:
+
+ abababababab
+ a_b_a_b_a_b_
+ a___b___a___
+ a_______b___
+ r___________
+ */
+ for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++)
+ {
+ imoffset = 1 << cbit;
+ if (this_image.image_num + imoffset < local->num_images)
+ {
+ GFC_INTEGER_8 * other_shared_ptr; /* Points to the shared memory
+ allocated to another image. */
+ GFC_INTEGER_8 *a;
+ GFC_INTEGER_8 *b;
+
+ other_shared_ptr = this_shared_ptr + num_elems * imoffset;
+ for (index_type i = 0; i < num_elems; i++)
+ {
+ a = this_shared_ptr + i;
+ b = other_shared_ptr + i;
+ *a += *b;
+ }
+ }
+ collsub_sync (ci);
+ }
+ for ( ; (local->num_images >> cbit) != 0; cbit++)
+ collsub_sync (ci);
+
+ if (!result_image || (*result_image - 1) == this_image.image_num)
+ {
+ if (packed)
+ memcpy (array->base_addr, buffer, ssize);
+ else
+ {
+ GFC_INTEGER_8 *src = buffer;
+ char * restrict dest = (char *) array->base_addr;
+ index_type stride0 = stride[0];
+
+ for (index_type n = 0; n < dim; n++)
+ count[n] = 0;
+
+ while (dest)
+ {
+ *((GFC_INTEGER_8 * ) dest) = *src++;
+ dest += stride0;
+ count[0] ++;
+ for (index_type n = 0; count[n] == extent[n] ;)
+ {
+ /* When we get to the end of a dimension, reset it and increment
+ the next dimension. */
+ count[n] = 0;
+ dest -= stride[n] * extent[n];
+ n++;
+ if (n == dim)
+ {
+ dest = NULL;
+ break;
+ }
+ else
+ {
+ count[n]++;
+ dest += stride[n];
+ }
+ }
+ }
+ }
+ }
+ finish_collective_subroutine (ci);
+}
+
+#endif
+
diff --git a/libgfortran/generated/nca_minmax_r10.c b/libgfortran/generated/nca_minmax_r10.c
new file mode 100644
index 0000000..10f7324
--- /dev/null
+++ b/libgfortran/generated/nca_minmax_r10.c
@@ -0,0 +1,653 @@
+/* Implementation of collective subroutines minmax.
+ Copyright (C) 2020 Free Software Foundation, Inc.
+ Contributed by Thomas Koenig <tkoenig@gcc.gnu.org>.
+
+This file is part of the GNU Fortran runtime library (libgfortran).
+
+Libgfortran is free software; you can redistribute it and/or
+modify it under the terms of the GNU General Public
+License as published by the Free Software Foundation; either
+version 3 of the License, or (at your option) any later version.
+
+Libgfortran is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+<http://www.gnu.org/licenses/>. */
+
+
+#include "libgfortran.h"
+
+#if defined (HAVE_GFC_REAL_10)
+#include <string.h>
+#include "../nca/libcoarraynative.h"
+#include "../nca/collective_subroutine.h"
+#include "../nca/collective_inline.h"
+
+void nca_collsub_max_scalar_r10 (GFC_REAL_10 *obj, int *result_image,
+ int *stat, char *errmsg, index_type errmsg_len);
+export_proto(nca_collsub_max_scalar_r10);
+
+void
+nca_collsub_max_scalar_r10 (GFC_REAL_10 *obj, int *result_image,
+ int *stat __attribute__ ((unused)),
+ char *errmsg __attribute__ ((unused)),
+ index_type errmsg_len __attribute__ ((unused)))
+{
+ int cbit = 0;
+ int imoffset;
+ GFC_REAL_10 *a, *b;
+ GFC_REAL_10 *buffer, *this_image_buf;
+ collsub_iface *ci;
+
+ ci = &local->ci;
+
+ buffer = get_collsub_buf (ci, sizeof(GFC_REAL_10) * local->num_images);
+ this_image_buf = buffer + this_image.image_num;
+ *this_image_buf = *obj;
+
+ collsub_sync (ci);
+ for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++)
+ {
+ imoffset = 1 << cbit;
+ if (this_image.image_num + imoffset < local->num_images)
+ {
+ a = this_image_buf;
+ b = this_image_buf + imoffset;
+ if (*b > *a)
+ *a = *b;
+ }
+ collsub_sync (ci);
+ }
+ for ( ; (local->num_images >> cbit) != 0; cbit++)
+ collsub_sync (ci);
+
+ if (!result_image || (*result_image - 1) == this_image.image_num)
+ *obj = *buffer;
+
+ finish_collective_subroutine (ci);
+
+}
+
+void nca_collsub_min_scalar_r10 (GFC_REAL_10 *obj, int *result_image,
+ int *stat, char *errmsg, index_type errmsg_len);
+export_proto(nca_collsub_min_scalar_r10);
+
+void
+nca_collsub_min_scalar_r10 (GFC_REAL_10 *obj, int *result_image,
+ int *stat __attribute__ ((unused)),
+ char *errmsg __attribute__ ((unused)),
+ index_type errmsg_len __attribute__ ((unused)))
+{
+ int cbit = 0;
+ int imoffset;
+ GFC_REAL_10 *a, *b;
+ GFC_REAL_10 *buffer, *this_image_buf;
+ collsub_iface *ci;
+
+ ci = &local->ci;
+
+ buffer = get_collsub_buf (ci, sizeof(GFC_REAL_10) * local->num_images);
+ this_image_buf = buffer + this_image.image_num;
+ *this_image_buf = *obj;
+
+ collsub_sync (ci);
+ for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++)
+ {
+ imoffset = 1 << cbit;
+ if (this_image.image_num + imoffset < local->num_images)
+ {
+ a = this_image_buf;
+ b = this_image_buf + imoffset;
+ if (*b < *a)
+ *a = *b;
+ }
+ collsub_sync (ci);
+ }
+ for ( ; (local->num_images >> cbit) != 0; cbit++)
+ collsub_sync (ci);
+
+ if (!result_image || (*result_image - 1) == this_image.image_num)
+ *obj = *buffer;
+
+ finish_collective_subroutine (ci);
+
+}
+
+void nca_collsub_sum_scalar_r10 (GFC_REAL_10 *obj, int *result_image,
+ int *stat, char *errmsg, index_type errmsg_len);
+export_proto(nca_collsub_sum_scalar_r10);
+
+void
+nca_collsub_sum_scalar_r10 (GFC_REAL_10 *obj, int *result_image,
+ int *stat __attribute__ ((unused)),
+ char *errmsg __attribute__ ((unused)),
+ index_type errmsg_len __attribute__ ((unused)))
+{
+ int cbit = 0;
+ int imoffset;
+ GFC_REAL_10 *a, *b;
+ GFC_REAL_10 *buffer, *this_image_buf;
+ collsub_iface *ci;
+
+ ci = &local->ci;
+
+ buffer = get_collsub_buf (ci, sizeof(GFC_REAL_10) * local->num_images);
+ this_image_buf = buffer + this_image.image_num;
+ *this_image_buf = *obj;
+
+ collsub_sync (ci);
+ for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++)
+ {
+ imoffset = 1 << cbit;
+ if (this_image.image_num + imoffset < local->num_images)
+ {
+ a = this_image_buf;
+ b = this_image_buf + imoffset;
+ *a += *b;
+ }
+ collsub_sync (ci);
+ }
+ for ( ; (local->num_images >> cbit) != 0; cbit++)
+ collsub_sync (ci);
+
+ if (!result_image || (*result_image - 1) == this_image.image_num)
+ *obj = *buffer;
+
+ finish_collective_subroutine (ci);
+
+}
+
+void nca_collsub_max_array_r10 (gfc_array_r10 * restrict array, int *result_image,
+ int *stat, char *errmsg, index_type errmsg_len);
+export_proto (nca_collsub_max_array_r10);
+
+void
+nca_collsub_max_array_r10 (gfc_array_r10 * restrict array, int *result_image,
+ int *stat __attribute__ ((unused)),
+ char *errmsg __attribute__ ((unused)),
+ index_type errmsg_len __attribute__ ((unused)))
+{
+ index_type count[GFC_MAX_DIMENSIONS];
+ index_type stride[GFC_MAX_DIMENSIONS];
+ index_type extent[GFC_MAX_DIMENSIONS];
+ GFC_REAL_10 *this_shared_ptr; /* Points to the shared memory allocated to this image. */
+ GFC_REAL_10 *buffer;
+ index_type dim;
+ bool packed;
+ index_type span;
+ index_type ssize, num_elems;
+ int cbit = 0;
+ int imoffset;
+ collsub_iface *ci;
+
+ ci = &local->ci;
+
+ dim = GFC_DESCRIPTOR_RANK (array);
+ ssize = sizeof (GFC_REAL_10);
+ packed = true;
+ span = array->span != 0 ? array->span : (index_type) sizeof (GFC_REAL_10);
+ for (index_type n = 0; n < dim; n++)
+ {
+ count[n] = 0;
+ stride[n] = GFC_DESCRIPTOR_STRIDE (array, n) * span;
+ extent[n] = GFC_DESCRIPTOR_EXTENT (array, n);
+
+ /* No-op for an empty array. */
+ if (extent[n] <= 0)
+ return;
+
+ if (ssize != stride[n])
+ packed = false;
+
+ ssize *= extent[n];
+ }
+
+ num_elems = ssize / sizeof (GFC_REAL_10);
+
+ buffer = get_collsub_buf (ci, ssize * local->num_images);
+ this_shared_ptr = buffer + this_image.image_num * num_elems;
+
+ if (packed)
+ memcpy (this_shared_ptr, array->base_addr, ssize);
+ else
+ {
+ char *src = (char *) array->base_addr;
+ GFC_REAL_10 *restrict dest = this_shared_ptr;
+ index_type stride0 = stride[0];
+
+ while (src)
+ {
+ /* Copy the data. */
+ *(dest++) = *((GFC_REAL_10 *) src);
+ src += stride0;
+ count[0] ++;
+ /* Advance to the next source element. */
+ for (index_type n = 0; count[n] == extent[n] ; )
+ {
+ /* When we get to the end of a dimension, reset it and increment
+ the next dimension. */
+ count[n] = 0;
+ src -= stride[n] * extent[n];
+ n++;
+ if (n == dim)
+ {
+ src = NULL;
+ break;
+ }
+ else
+ {
+ count[n]++;
+ src += stride[n];
+ }
+ }
+ }
+ }
+
+ collsub_sync (ci);
+
+ /* Reduce the array to image zero. Here the general scheme:
+
+ abababababab
+ a_b_a_b_a_b_
+ a___b___a___
+ a_______b___
+ r___________
+ */
+ for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++)
+ {
+ imoffset = 1 << cbit;
+ if (this_image.image_num + imoffset < local->num_images)
+ {
+ GFC_REAL_10 * other_shared_ptr; /* Points to the shared memory
+ allocated to another image. */
+ GFC_REAL_10 *a;
+ GFC_REAL_10 *b;
+
+ other_shared_ptr = this_shared_ptr + num_elems * imoffset;
+ for (index_type i = 0; i < num_elems; i++)
+ {
+ a = this_shared_ptr + i;
+ b = other_shared_ptr + i;
+ if (*b > *a)
+ *a = *b;
+ }
+ }
+ collsub_sync (ci);
+ }
+ for ( ; (local->num_images >> cbit) != 0; cbit++)
+ collsub_sync (ci);
+
+ if (!result_image || (*result_image - 1) == this_image.image_num)
+ {
+ if (packed)
+ memcpy (array->base_addr, buffer, ssize);
+ else
+ {
+ GFC_REAL_10 *src = buffer;
+ char * restrict dest = (char *) array->base_addr;
+ index_type stride0 = stride[0];
+
+ for (index_type n = 0; n < dim; n++)
+ count[n] = 0;
+
+ while (dest)
+ {
+ *((GFC_REAL_10 * ) dest) = *src++;
+ dest += stride0;
+ count[0] ++;
+ for (index_type n = 0; count[n] == extent[n] ;)
+ {
+ /* When we get to the end of a dimension, reset it and increment
+ the next dimension. */
+ count[n] = 0;
+ dest -= stride[n] * extent[n];
+ n++;
+ if (n == dim)
+ {
+ dest = NULL;
+ break;
+ }
+ else
+ {
+ count[n]++;
+ dest += stride[n];
+ }
+ }
+ }
+ }
+ }
+ finish_collective_subroutine (ci);
+}
+void nca_collsub_min_array_r10 (gfc_array_r10 * restrict array, int *result_image,
+ int *stat, char *errmsg, index_type errmsg_len);
+export_proto (nca_collsub_min_array_r10);
+
+void
+nca_collsub_min_array_r10 (gfc_array_r10 * restrict array, int *result_image,
+ int *stat __attribute__ ((unused)),
+ char *errmsg __attribute__ ((unused)),
+ index_type errmsg_len __attribute__ ((unused)))
+{
+ index_type count[GFC_MAX_DIMENSIONS];
+ index_type stride[GFC_MAX_DIMENSIONS];
+ index_type extent[GFC_MAX_DIMENSIONS];
+ GFC_REAL_10 *this_shared_ptr; /* Points to the shared memory allocated to this image. */
+ GFC_REAL_10 *buffer;
+ index_type dim;
+ bool packed;
+ index_type span;
+ index_type ssize, num_elems;
+ int cbit = 0;
+ int imoffset;
+ collsub_iface *ci;
+
+ ci = &local->ci;
+
+ dim = GFC_DESCRIPTOR_RANK (array);
+ ssize = sizeof (GFC_REAL_10);
+ packed = true;
+ span = array->span != 0 ? array->span : (index_type) sizeof (GFC_REAL_10);
+ for (index_type n = 0; n < dim; n++)
+ {
+ count[n] = 0;
+ stride[n] = GFC_DESCRIPTOR_STRIDE (array, n) * span;
+ extent[n] = GFC_DESCRIPTOR_EXTENT (array, n);
+
+ /* No-op for an empty array. */
+ if (extent[n] <= 0)
+ return;
+
+ if (ssize != stride[n])
+ packed = false;
+
+ ssize *= extent[n];
+ }
+
+ num_elems = ssize / sizeof (GFC_REAL_10);
+
+ buffer = get_collsub_buf (ci, ssize * local->num_images);
+ this_shared_ptr = buffer + this_image.image_num * num_elems;
+
+ if (packed)
+ memcpy (this_shared_ptr, array->base_addr, ssize);
+ else
+ {
+ char *src = (char *) array->base_addr;
+ GFC_REAL_10 *restrict dest = this_shared_ptr;
+ index_type stride0 = stride[0];
+
+ while (src)
+ {
+ /* Copy the data. */
+ *(dest++) = *((GFC_REAL_10 *) src);
+ src += stride0;
+ count[0] ++;
+ /* Advance to the next source element. */
+ for (index_type n = 0; count[n] == extent[n] ; )
+ {
+ /* When we get to the end of a dimension, reset it and increment
+ the next dimension. */
+ count[n] = 0;
+ src -= stride[n] * extent[n];
+ n++;
+ if (n == dim)
+ {
+ src = NULL;
+ break;
+ }
+ else
+ {
+ count[n]++;
+ src += stride[n];
+ }
+ }
+ }
+ }
+
+ collsub_sync (ci);
+
+ /* Reduce the array to image zero. Here the general scheme:
+
+ abababababab
+ a_b_a_b_a_b_
+ a___b___a___
+ a_______b___
+ r___________
+ */
+ for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++)
+ {
+ imoffset = 1 << cbit;
+ if (this_image.image_num + imoffset < local->num_images)
+ {
+ GFC_REAL_10 * other_shared_ptr; /* Points to the shared memory
+ allocated to another image. */
+ GFC_REAL_10 *a;
+ GFC_REAL_10 *b;
+
+ other_shared_ptr = this_shared_ptr + num_elems * imoffset;
+ for (index_type i = 0; i < num_elems; i++)
+ {
+ a = this_shared_ptr + i;
+ b = other_shared_ptr + i;
+ if (*b < *a)
+ *a = *b;
+ }
+ }
+ collsub_sync (ci);
+ }
+ for ( ; (local->num_images >> cbit) != 0; cbit++)
+ collsub_sync (ci);
+
+ if (!result_image || (*result_image - 1) == this_image.image_num)
+ {
+ if (packed)
+ memcpy (array->base_addr, buffer, ssize);
+ else
+ {
+ GFC_REAL_10 *src = buffer;
+ char * restrict dest = (char *) array->base_addr;
+ index_type stride0 = stride[0];
+
+ for (index_type n = 0; n < dim; n++)
+ count[n] = 0;
+
+ while (dest)
+ {
+ *((GFC_REAL_10 * ) dest) = *src++;
+ dest += stride0;
+ count[0] ++;
+ for (index_type n = 0; count[n] == extent[n] ;)
+ {
+ /* When we get to the end of a dimension, reset it and increment
+ the next dimension. */
+ count[n] = 0;
+ dest -= stride[n] * extent[n];
+ n++;
+ if (n == dim)
+ {
+ dest = NULL;
+ break;
+ }
+ else
+ {
+ count[n]++;
+ dest += stride[n];
+ }
+ }
+ }
+ }
+ }
+ finish_collective_subroutine (ci);
+}
+void nca_collsub_sum_array_r10 (gfc_array_r10 * restrict array, int *result_image,
+ int *stat, char *errmsg, index_type errmsg_len);
+export_proto (nca_collsub_sum_array_r10);
+
+void
+nca_collsub_sum_array_r10 (gfc_array_r10 * restrict array, int *result_image,
+ int *stat __attribute__ ((unused)),
+ char *errmsg __attribute__ ((unused)),
+ index_type errmsg_len __attribute__ ((unused)))
+{
+ index_type count[GFC_MAX_DIMENSIONS];
+ index_type stride[GFC_MAX_DIMENSIONS];
+ index_type extent[GFC_MAX_DIMENSIONS];
+ GFC_REAL_10 *this_shared_ptr; /* Points to the shared memory allocated to this image. */
+ GFC_REAL_10 *buffer;
+ index_type dim;
+ bool packed;
+ index_type span;
+ index_type ssize, num_elems;
+ int cbit = 0;
+ int imoffset;
+ collsub_iface *ci;
+
+ ci = &local->ci;
+
+ dim = GFC_DESCRIPTOR_RANK (array);
+ ssize = sizeof (GFC_REAL_10);
+ packed = true;
+ span = array->span != 0 ? array->span : (index_type) sizeof (GFC_REAL_10);
+ for (index_type n = 0; n < dim; n++)
+ {
+ count[n] = 0;
+ stride[n] = GFC_DESCRIPTOR_STRIDE (array, n) * span;
+ extent[n] = GFC_DESCRIPTOR_EXTENT (array, n);
+
+ /* No-op for an empty array. */
+ if (extent[n] <= 0)
+ return;
+
+ if (ssize != stride[n])
+ packed = false;
+
+ ssize *= extent[n];
+ }
+
+ num_elems = ssize / sizeof (GFC_REAL_10);
+
+ buffer = get_collsub_buf (ci, ssize * local->num_images);
+ this_shared_ptr = buffer + this_image.image_num * num_elems;
+
+ if (packed)
+ memcpy (this_shared_ptr, array->base_addr, ssize);
+ else
+ {
+ char *src = (char *) array->base_addr;
+ GFC_REAL_10 *restrict dest = this_shared_ptr;
+ index_type stride0 = stride[0];
+
+ while (src)
+ {
+ /* Copy the data. */
+ *(dest++) = *((GFC_REAL_10 *) src);
+ src += stride0;
+ count[0] ++;
+ /* Advance to the next source element. */
+ for (index_type n = 0; count[n] == extent[n] ; )
+ {
+ /* When we get to the end of a dimension, reset it and increment
+ the next dimension. */
+ count[n] = 0;
+ src -= stride[n] * extent[n];
+ n++;
+ if (n == dim)
+ {
+ src = NULL;
+ break;
+ }
+ else
+ {
+ count[n]++;
+ src += stride[n];
+ }
+ }
+ }
+ }
+
+ collsub_sync (ci);
+
+ /* Reduce the array to image zero. Here the general scheme:
+
+ abababababab
+ a_b_a_b_a_b_
+ a___b___a___
+ a_______b___
+ r___________
+ */
+ for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++)
+ {
+ imoffset = 1 << cbit;
+ if (this_image.image_num + imoffset < local->num_images)
+ {
+ GFC_REAL_10 * other_shared_ptr; /* Points to the shared memory
+ allocated to another image. */
+ GFC_REAL_10 *a;
+ GFC_REAL_10 *b;
+
+ other_shared_ptr = this_shared_ptr + num_elems * imoffset;
+ for (index_type i = 0; i < num_elems; i++)
+ {
+ a = this_shared_ptr + i;
+ b = other_shared_ptr + i;
+ *a += *b;
+ }
+ }
+ collsub_sync (ci);
+ }
+ for ( ; (local->num_images >> cbit) != 0; cbit++)
+ collsub_sync (ci);
+
+ if (!result_image || (*result_image - 1) == this_image.image_num)
+ {
+ if (packed)
+ memcpy (array->base_addr, buffer, ssize);
+ else
+ {
+ GFC_REAL_10 *src = buffer;
+ char * restrict dest = (char *) array->base_addr;
+ index_type stride0 = stride[0];
+
+ for (index_type n = 0; n < dim; n++)
+ count[n] = 0;
+
+ while (dest)
+ {
+ *((GFC_REAL_10 * ) dest) = *src++;
+ dest += stride0;
+ count[0] ++;
+ for (index_type n = 0; count[n] == extent[n] ;)
+ {
+ /* When we get to the end of a dimension, reset it and increment
+ the next dimension. */
+ count[n] = 0;
+ dest -= stride[n] * extent[n];
+ n++;
+ if (n == dim)
+ {
+ dest = NULL;
+ break;
+ }
+ else
+ {
+ count[n]++;
+ dest += stride[n];
+ }
+ }
+ }
+ }
+ }
+ finish_collective_subroutine (ci);
+}
+
+#endif
+
diff --git a/libgfortran/generated/nca_minmax_r16.c b/libgfortran/generated/nca_minmax_r16.c
new file mode 100644
index 0000000..a0a0a51
--- /dev/null
+++ b/libgfortran/generated/nca_minmax_r16.c
@@ -0,0 +1,653 @@
+/* Implementation of collective subroutines minmax.
+ Copyright (C) 2020 Free Software Foundation, Inc.
+ Contributed by Thomas Koenig <tkoenig@gcc.gnu.org>.
+
+This file is part of the GNU Fortran runtime library (libgfortran).
+
+Libgfortran is free software; you can redistribute it and/or
+modify it under the terms of the GNU General Public
+License as published by the Free Software Foundation; either
+version 3 of the License, or (at your option) any later version.
+
+Libgfortran is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+<http://www.gnu.org/licenses/>. */
+
+
+#include "libgfortran.h"
+
+#if defined (HAVE_GFC_REAL_16)
+#include <string.h>
+#include "../nca/libcoarraynative.h"
+#include "../nca/collective_subroutine.h"
+#include "../nca/collective_inline.h"
+
+void nca_collsub_max_scalar_r16 (GFC_REAL_16 *obj, int *result_image,
+ int *stat, char *errmsg, index_type errmsg_len);
+export_proto(nca_collsub_max_scalar_r16);
+
+void
+nca_collsub_max_scalar_r16 (GFC_REAL_16 *obj, int *result_image,
+ int *stat __attribute__ ((unused)),
+ char *errmsg __attribute__ ((unused)),
+ index_type errmsg_len __attribute__ ((unused)))
+{
+ int cbit = 0;
+ int imoffset;
+ GFC_REAL_16 *a, *b;
+ GFC_REAL_16 *buffer, *this_image_buf;
+ collsub_iface *ci;
+
+ ci = &local->ci;
+
+ buffer = get_collsub_buf (ci, sizeof(GFC_REAL_16) * local->num_images);
+ this_image_buf = buffer + this_image.image_num;
+ *this_image_buf = *obj;
+
+ collsub_sync (ci);
+ for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++)
+ {
+ imoffset = 1 << cbit;
+ if (this_image.image_num + imoffset < local->num_images)
+ {
+ a = this_image_buf;
+ b = this_image_buf + imoffset;
+ if (*b > *a)
+ *a = *b;
+ }
+ collsub_sync (ci);
+ }
+ for ( ; (local->num_images >> cbit) != 0; cbit++)
+ collsub_sync (ci);
+
+ if (!result_image || (*result_image - 1) == this_image.image_num)
+ *obj = *buffer;
+
+ finish_collective_subroutine (ci);
+
+}
+
+void nca_collsub_min_scalar_r16 (GFC_REAL_16 *obj, int *result_image,
+ int *stat, char *errmsg, index_type errmsg_len);
+export_proto(nca_collsub_min_scalar_r16);
+
+void
+nca_collsub_min_scalar_r16 (GFC_REAL_16 *obj, int *result_image,
+ int *stat __attribute__ ((unused)),
+ char *errmsg __attribute__ ((unused)),
+ index_type errmsg_len __attribute__ ((unused)))
+{
+ int cbit = 0;
+ int imoffset;
+ GFC_REAL_16 *a, *b;
+ GFC_REAL_16 *buffer, *this_image_buf;
+ collsub_iface *ci;
+
+ ci = &local->ci;
+
+ buffer = get_collsub_buf (ci, sizeof(GFC_REAL_16) * local->num_images);
+ this_image_buf = buffer + this_image.image_num;
+ *this_image_buf = *obj;
+
+ collsub_sync (ci);
+ for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++)
+ {
+ imoffset = 1 << cbit;
+ if (this_image.image_num + imoffset < local->num_images)
+ {
+ a = this_image_buf;
+ b = this_image_buf + imoffset;
+ if (*b < *a)
+ *a = *b;
+ }
+ collsub_sync (ci);
+ }
+ for ( ; (local->num_images >> cbit) != 0; cbit++)
+ collsub_sync (ci);
+
+ if (!result_image || (*result_image - 1) == this_image.image_num)
+ *obj = *buffer;
+
+ finish_collective_subroutine (ci);
+
+}
+
+void nca_collsub_sum_scalar_r16 (GFC_REAL_16 *obj, int *result_image,
+ int *stat, char *errmsg, index_type errmsg_len);
+export_proto(nca_collsub_sum_scalar_r16);
+
+void
+nca_collsub_sum_scalar_r16 (GFC_REAL_16 *obj, int *result_image,
+ int *stat __attribute__ ((unused)),
+ char *errmsg __attribute__ ((unused)),
+ index_type errmsg_len __attribute__ ((unused)))
+{
+ int cbit = 0;
+ int imoffset;
+ GFC_REAL_16 *a, *b;
+ GFC_REAL_16 *buffer, *this_image_buf;
+ collsub_iface *ci;
+
+ ci = &local->ci;
+
+ buffer = get_collsub_buf (ci, sizeof(GFC_REAL_16) * local->num_images);
+ this_image_buf = buffer + this_image.image_num;
+ *this_image_buf = *obj;
+
+ collsub_sync (ci);
+ for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++)
+ {
+ imoffset = 1 << cbit;
+ if (this_image.image_num + imoffset < local->num_images)
+ {
+ a = this_image_buf;
+ b = this_image_buf + imoffset;
+ *a += *b;
+ }
+ collsub_sync (ci);
+ }
+ for ( ; (local->num_images >> cbit) != 0; cbit++)
+ collsub_sync (ci);
+
+ if (!result_image || (*result_image - 1) == this_image.image_num)
+ *obj = *buffer;
+
+ finish_collective_subroutine (ci);
+
+}
+
+void nca_collsub_max_array_r16 (gfc_array_r16 * restrict array, int *result_image,
+ int *stat, char *errmsg, index_type errmsg_len);
+export_proto (nca_collsub_max_array_r16);
+
+void
+nca_collsub_max_array_r16 (gfc_array_r16 * restrict array, int *result_image,
+ int *stat __attribute__ ((unused)),
+ char *errmsg __attribute__ ((unused)),
+ index_type errmsg_len __attribute__ ((unused)))
+{
+ index_type count[GFC_MAX_DIMENSIONS];
+ index_type stride[GFC_MAX_DIMENSIONS];
+ index_type extent[GFC_MAX_DIMENSIONS];
+ GFC_REAL_16 *this_shared_ptr; /* Points to the shared memory allocated to this image. */
+ GFC_REAL_16 *buffer;
+ index_type dim;
+ bool packed;
+ index_type span;
+ index_type ssize, num_elems;
+ int cbit = 0;
+ int imoffset;
+ collsub_iface *ci;
+
+ ci = &local->ci;
+
+ dim = GFC_DESCRIPTOR_RANK (array);
+ ssize = sizeof (GFC_REAL_16);
+ packed = true;
+ span = array->span != 0 ? array->span : (index_type) sizeof (GFC_REAL_16);
+ for (index_type n = 0; n < dim; n++)
+ {
+ count[n] = 0;
+ stride[n] = GFC_DESCRIPTOR_STRIDE (array, n) * span;
+ extent[n] = GFC_DESCRIPTOR_EXTENT (array, n);
+
+ /* No-op for an empty array. */
+ if (extent[n] <= 0)
+ return;
+
+ if (ssize != stride[n])
+ packed = false;
+
+ ssize *= extent[n];
+ }
+
+ num_elems = ssize / sizeof (GFC_REAL_16);
+
+ buffer = get_collsub_buf (ci, ssize * local->num_images);
+ this_shared_ptr = buffer + this_image.image_num * num_elems;
+
+ if (packed)
+ memcpy (this_shared_ptr, array->base_addr, ssize);
+ else
+ {
+ char *src = (char *) array->base_addr;
+ GFC_REAL_16 *restrict dest = this_shared_ptr;
+ index_type stride0 = stride[0];
+
+ while (src)
+ {
+ /* Copy the data. */
+ *(dest++) = *((GFC_REAL_16 *) src);
+ src += stride0;
+ count[0] ++;
+ /* Advance to the next source element. */
+ for (index_type n = 0; count[n] == extent[n] ; )
+ {
+ /* When we get to the end of a dimension, reset it and increment
+ the next dimension. */
+ count[n] = 0;
+ src -= stride[n] * extent[n];
+ n++;
+ if (n == dim)
+ {
+ src = NULL;
+ break;
+ }
+ else
+ {
+ count[n]++;
+ src += stride[n];
+ }
+ }
+ }
+ }
+
+ collsub_sync (ci);
+
+ /* Reduce the array to image zero. Here the general scheme:
+
+ abababababab
+ a_b_a_b_a_b_
+ a___b___a___
+ a_______b___
+ r___________
+ */
+ for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++)
+ {
+ imoffset = 1 << cbit;
+ if (this_image.image_num + imoffset < local->num_images)
+ {
+ GFC_REAL_16 * other_shared_ptr; /* Points to the shared memory
+ allocated to another image. */
+ GFC_REAL_16 *a;
+ GFC_REAL_16 *b;
+
+ other_shared_ptr = this_shared_ptr + num_elems * imoffset;
+ for (index_type i = 0; i < num_elems; i++)
+ {
+ a = this_shared_ptr + i;
+ b = other_shared_ptr + i;
+ if (*b > *a)
+ *a = *b;
+ }
+ }
+ collsub_sync (ci);
+ }
+ for ( ; (local->num_images >> cbit) != 0; cbit++)
+ collsub_sync (ci);
+
+ if (!result_image || (*result_image - 1) == this_image.image_num)
+ {
+ if (packed)
+ memcpy (array->base_addr, buffer, ssize);
+ else
+ {
+ GFC_REAL_16 *src = buffer;
+ char * restrict dest = (char *) array->base_addr;
+ index_type stride0 = stride[0];
+
+ for (index_type n = 0; n < dim; n++)
+ count[n] = 0;
+
+ while (dest)
+ {
+ *((GFC_REAL_16 * ) dest) = *src++;
+ dest += stride0;
+ count[0] ++;
+ for (index_type n = 0; count[n] == extent[n] ;)
+ {
+ /* When we get to the end of a dimension, reset it and increment
+ the next dimension. */
+ count[n] = 0;
+ dest -= stride[n] * extent[n];
+ n++;
+ if (n == dim)
+ {
+ dest = NULL;
+ break;
+ }
+ else
+ {
+ count[n]++;
+ dest += stride[n];
+ }
+ }
+ }
+ }
+ }
+ finish_collective_subroutine (ci);
+}
+void nca_collsub_min_array_r16 (gfc_array_r16 * restrict array, int *result_image,
+ int *stat, char *errmsg, index_type errmsg_len);
+export_proto (nca_collsub_min_array_r16);
+
+void
+nca_collsub_min_array_r16 (gfc_array_r16 * restrict array, int *result_image,
+ int *stat __attribute__ ((unused)),
+ char *errmsg __attribute__ ((unused)),
+ index_type errmsg_len __attribute__ ((unused)))
+{
+ index_type count[GFC_MAX_DIMENSIONS];
+ index_type stride[GFC_MAX_DIMENSIONS];
+ index_type extent[GFC_MAX_DIMENSIONS];
+ GFC_REAL_16 *this_shared_ptr; /* Points to the shared memory allocated to this image. */
+ GFC_REAL_16 *buffer;
+ index_type dim;
+ bool packed;
+ index_type span;
+ index_type ssize, num_elems;
+ int cbit = 0;
+ int imoffset;
+ collsub_iface *ci;
+
+ ci = &local->ci;
+
+ dim = GFC_DESCRIPTOR_RANK (array);
+ ssize = sizeof (GFC_REAL_16);
+ packed = true;
+ span = array->span != 0 ? array->span : (index_type) sizeof (GFC_REAL_16);
+ for (index_type n = 0; n < dim; n++)
+ {
+ count[n] = 0;
+ stride[n] = GFC_DESCRIPTOR_STRIDE (array, n) * span;
+ extent[n] = GFC_DESCRIPTOR_EXTENT (array, n);
+
+ /* No-op for an empty array. */
+ if (extent[n] <= 0)
+ return;
+
+ if (ssize != stride[n])
+ packed = false;
+
+ ssize *= extent[n];
+ }
+
+ num_elems = ssize / sizeof (GFC_REAL_16);
+
+ buffer = get_collsub_buf (ci, ssize * local->num_images);
+ this_shared_ptr = buffer + this_image.image_num * num_elems;
+
+ if (packed)
+ memcpy (this_shared_ptr, array->base_addr, ssize);
+ else
+ {
+ char *src = (char *) array->base_addr;
+ GFC_REAL_16 *restrict dest = this_shared_ptr;
+ index_type stride0 = stride[0];
+
+ while (src)
+ {
+ /* Copy the data. */
+ *(dest++) = *((GFC_REAL_16 *) src);
+ src += stride0;
+ count[0] ++;
+ /* Advance to the next source element. */
+ for (index_type n = 0; count[n] == extent[n] ; )
+ {
+ /* When we get to the end of a dimension, reset it and increment
+ the next dimension. */
+ count[n] = 0;
+ src -= stride[n] * extent[n];
+ n++;
+ if (n == dim)
+ {
+ src = NULL;
+ break;
+ }
+ else
+ {
+ count[n]++;
+ src += stride[n];
+ }
+ }
+ }
+ }
+
+ collsub_sync (ci);
+
+ /* Reduce the array to image zero. Here the general scheme:
+
+ abababababab
+ a_b_a_b_a_b_
+ a___b___a___
+ a_______b___
+ r___________
+ */
+ for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++)
+ {
+ imoffset = 1 << cbit;
+ if (this_image.image_num + imoffset < local->num_images)
+ {
+ GFC_REAL_16 * other_shared_ptr; /* Points to the shared memory
+ allocated to another image. */
+ GFC_REAL_16 *a;
+ GFC_REAL_16 *b;
+
+ other_shared_ptr = this_shared_ptr + num_elems * imoffset;
+ for (index_type i = 0; i < num_elems; i++)
+ {
+ a = this_shared_ptr + i;
+ b = other_shared_ptr + i;
+ if (*b < *a)
+ *a = *b;
+ }
+ }
+ collsub_sync (ci);
+ }
+ for ( ; (local->num_images >> cbit) != 0; cbit++)
+ collsub_sync (ci);
+
+ if (!result_image || (*result_image - 1) == this_image.image_num)
+ {
+ if (packed)
+ memcpy (array->base_addr, buffer, ssize);
+ else
+ {
+ GFC_REAL_16 *src = buffer;
+ char * restrict dest = (char *) array->base_addr;
+ index_type stride0 = stride[0];
+
+ for (index_type n = 0; n < dim; n++)
+ count[n] = 0;
+
+ while (dest)
+ {
+ *((GFC_REAL_16 * ) dest) = *src++;
+ dest += stride0;
+ count[0] ++;
+ for (index_type n = 0; count[n] == extent[n] ;)
+ {
+ /* When we get to the end of a dimension, reset it and increment
+ the next dimension. */
+ count[n] = 0;
+ dest -= stride[n] * extent[n];
+ n++;
+ if (n == dim)
+ {
+ dest = NULL;
+ break;
+ }
+ else
+ {
+ count[n]++;
+ dest += stride[n];
+ }
+ }
+ }
+ }
+ }
+ finish_collective_subroutine (ci);
+}
+void nca_collsub_sum_array_r16 (gfc_array_r16 * restrict array, int *result_image,
+ int *stat, char *errmsg, index_type errmsg_len);
+export_proto (nca_collsub_sum_array_r16);
+
+void
+nca_collsub_sum_array_r16 (gfc_array_r16 * restrict array, int *result_image,
+ int *stat __attribute__ ((unused)),
+ char *errmsg __attribute__ ((unused)),
+ index_type errmsg_len __attribute__ ((unused)))
+{
+ index_type count[GFC_MAX_DIMENSIONS];
+ index_type stride[GFC_MAX_DIMENSIONS];
+ index_type extent[GFC_MAX_DIMENSIONS];
+ GFC_REAL_16 *this_shared_ptr; /* Points to the shared memory allocated to this image. */
+ GFC_REAL_16 *buffer;
+ index_type dim;
+ bool packed;
+ index_type span;
+ index_type ssize, num_elems;
+ int cbit = 0;
+ int imoffset;
+ collsub_iface *ci;
+
+ ci = &local->ci;
+
+ dim = GFC_DESCRIPTOR_RANK (array);
+ ssize = sizeof (GFC_REAL_16);
+ packed = true;
+ span = array->span != 0 ? array->span : (index_type) sizeof (GFC_REAL_16);
+ for (index_type n = 0; n < dim; n++)
+ {
+ count[n] = 0;
+ stride[n] = GFC_DESCRIPTOR_STRIDE (array, n) * span;
+ extent[n] = GFC_DESCRIPTOR_EXTENT (array, n);
+
+ /* No-op for an empty array. */
+ if (extent[n] <= 0)
+ return;
+
+ if (ssize != stride[n])
+ packed = false;
+
+ ssize *= extent[n];
+ }
+
+ num_elems = ssize / sizeof (GFC_REAL_16);
+
+ buffer = get_collsub_buf (ci, ssize * local->num_images);
+ this_shared_ptr = buffer + this_image.image_num * num_elems;
+
+ if (packed)
+ memcpy (this_shared_ptr, array->base_addr, ssize);
+ else
+ {
+ char *src = (char *) array->base_addr;
+ GFC_REAL_16 *restrict dest = this_shared_ptr;
+ index_type stride0 = stride[0];
+
+ while (src)
+ {
+ /* Copy the data. */
+ *(dest++) = *((GFC_REAL_16 *) src);
+ src += stride0;
+ count[0] ++;
+ /* Advance to the next source element. */
+ for (index_type n = 0; count[n] == extent[n] ; )
+ {
+ /* When we get to the end of a dimension, reset it and increment
+ the next dimension. */
+ count[n] = 0;
+ src -= stride[n] * extent[n];
+ n++;
+ if (n == dim)
+ {
+ src = NULL;
+ break;
+ }
+ else
+ {
+ count[n]++;
+ src += stride[n];
+ }
+ }
+ }
+ }
+
+ collsub_sync (ci);
+
+ /* Reduce the array to image zero. Here the general scheme:
+
+ abababababab
+ a_b_a_b_a_b_
+ a___b___a___
+ a_______b___
+ r___________
+ */
+ for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++)
+ {
+ imoffset = 1 << cbit;
+ if (this_image.image_num + imoffset < local->num_images)
+ {
+ GFC_REAL_16 * other_shared_ptr; /* Points to the shared memory
+ allocated to another image. */
+ GFC_REAL_16 *a;
+ GFC_REAL_16 *b;
+
+ other_shared_ptr = this_shared_ptr + num_elems * imoffset;
+ for (index_type i = 0; i < num_elems; i++)
+ {
+ a = this_shared_ptr + i;
+ b = other_shared_ptr + i;
+ *a += *b;
+ }
+ }
+ collsub_sync (ci);
+ }
+ for ( ; (local->num_images >> cbit) != 0; cbit++)
+ collsub_sync (ci);
+
+ if (!result_image || (*result_image - 1) == this_image.image_num)
+ {
+ if (packed)
+ memcpy (array->base_addr, buffer, ssize);
+ else
+ {
+ GFC_REAL_16 *src = buffer;
+ char * restrict dest = (char *) array->base_addr;
+ index_type stride0 = stride[0];
+
+ for (index_type n = 0; n < dim; n++)
+ count[n] = 0;
+
+ while (dest)
+ {
+ *((GFC_REAL_16 * ) dest) = *src++;
+ dest += stride0;
+ count[0] ++;
+ for (index_type n = 0; count[n] == extent[n] ;)
+ {
+ /* When we get to the end of a dimension, reset it and increment
+ the next dimension. */
+ count[n] = 0;
+ dest -= stride[n] * extent[n];
+ n++;
+ if (n == dim)
+ {
+ dest = NULL;
+ break;
+ }
+ else
+ {
+ count[n]++;
+ dest += stride[n];
+ }
+ }
+ }
+ }
+ }
+ finish_collective_subroutine (ci);
+}
+
+#endif
+
diff --git a/libgfortran/generated/nca_minmax_r4.c b/libgfortran/generated/nca_minmax_r4.c
new file mode 100644
index 0000000..0eb3f1b
--- /dev/null
+++ b/libgfortran/generated/nca_minmax_r4.c
@@ -0,0 +1,653 @@
+/* Implementation of collective subroutines minmax.
+ Copyright (C) 2020 Free Software Foundation, Inc.
+ Contributed by Thomas Koenig <tkoenig@gcc.gnu.org>.
+
+This file is part of the GNU Fortran runtime library (libgfortran).
+
+Libgfortran is free software; you can redistribute it and/or
+modify it under the terms of the GNU General Public
+License as published by the Free Software Foundation; either
+version 3 of the License, or (at your option) any later version.
+
+Libgfortran is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+<http://www.gnu.org/licenses/>. */
+
+
+#include "libgfortran.h"
+
+#if defined (HAVE_GFC_REAL_4)
+#include <string.h>
+#include "../nca/libcoarraynative.h"
+#include "../nca/collective_subroutine.h"
+#include "../nca/collective_inline.h"
+
+void nca_collsub_max_scalar_r4 (GFC_REAL_4 *obj, int *result_image,
+ int *stat, char *errmsg, index_type errmsg_len);
+export_proto(nca_collsub_max_scalar_r4);
+
+void
+nca_collsub_max_scalar_r4 (GFC_REAL_4 *obj, int *result_image,
+ int *stat __attribute__ ((unused)),
+ char *errmsg __attribute__ ((unused)),
+ index_type errmsg_len __attribute__ ((unused)))
+{
+ int cbit = 0;
+ int imoffset;
+ GFC_REAL_4 *a, *b;
+ GFC_REAL_4 *buffer, *this_image_buf;
+ collsub_iface *ci;
+
+ ci = &local->ci;
+
+ buffer = get_collsub_buf (ci, sizeof(GFC_REAL_4) * local->num_images);
+ this_image_buf = buffer + this_image.image_num;
+ *this_image_buf = *obj;
+
+ collsub_sync (ci);
+ for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++)
+ {
+ imoffset = 1 << cbit;
+ if (this_image.image_num + imoffset < local->num_images)
+ {
+ a = this_image_buf;
+ b = this_image_buf + imoffset;
+ if (*b > *a)
+ *a = *b;
+ }
+ collsub_sync (ci);
+ }
+ for ( ; (local->num_images >> cbit) != 0; cbit++)
+ collsub_sync (ci);
+
+ if (!result_image || (*result_image - 1) == this_image.image_num)
+ *obj = *buffer;
+
+ finish_collective_subroutine (ci);
+
+}
+
+void nca_collsub_min_scalar_r4 (GFC_REAL_4 *obj, int *result_image,
+ int *stat, char *errmsg, index_type errmsg_len);
+export_proto(nca_collsub_min_scalar_r4);
+
+void
+nca_collsub_min_scalar_r4 (GFC_REAL_4 *obj, int *result_image,
+ int *stat __attribute__ ((unused)),
+ char *errmsg __attribute__ ((unused)),
+ index_type errmsg_len __attribute__ ((unused)))
+{
+ int cbit = 0;
+ int imoffset;
+ GFC_REAL_4 *a, *b;
+ GFC_REAL_4 *buffer, *this_image_buf;
+ collsub_iface *ci;
+
+ ci = &local->ci;
+
+ buffer = get_collsub_buf (ci, sizeof(GFC_REAL_4) * local->num_images);
+ this_image_buf = buffer + this_image.image_num;
+ *this_image_buf = *obj;
+
+ collsub_sync (ci);
+ for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++)
+ {
+ imoffset = 1 << cbit;
+ if (this_image.image_num + imoffset < local->num_images)
+ {
+ a = this_image_buf;
+ b = this_image_buf + imoffset;
+ if (*b < *a)
+ *a = *b;
+ }
+ collsub_sync (ci);
+ }
+ for ( ; (local->num_images >> cbit) != 0; cbit++)
+ collsub_sync (ci);
+
+ if (!result_image || (*result_image - 1) == this_image.image_num)
+ *obj = *buffer;
+
+ finish_collective_subroutine (ci);
+
+}
+
+void nca_collsub_sum_scalar_r4 (GFC_REAL_4 *obj, int *result_image,
+ int *stat, char *errmsg, index_type errmsg_len);
+export_proto(nca_collsub_sum_scalar_r4);
+
+void
+nca_collsub_sum_scalar_r4 (GFC_REAL_4 *obj, int *result_image,
+ int *stat __attribute__ ((unused)),
+ char *errmsg __attribute__ ((unused)),
+ index_type errmsg_len __attribute__ ((unused)))
+{
+ int cbit = 0;
+ int imoffset;
+ GFC_REAL_4 *a, *b;
+ GFC_REAL_4 *buffer, *this_image_buf;
+ collsub_iface *ci;
+
+ ci = &local->ci;
+
+ buffer = get_collsub_buf (ci, sizeof(GFC_REAL_4) * local->num_images);
+ this_image_buf = buffer + this_image.image_num;
+ *this_image_buf = *obj;
+
+ collsub_sync (ci);
+ for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++)
+ {
+ imoffset = 1 << cbit;
+ if (this_image.image_num + imoffset < local->num_images)
+ {
+ a = this_image_buf;
+ b = this_image_buf + imoffset;
+ *a += *b;
+ }
+ collsub_sync (ci);
+ }
+ for ( ; (local->num_images >> cbit) != 0; cbit++)
+ collsub_sync (ci);
+
+ if (!result_image || (*result_image - 1) == this_image.image_num)
+ *obj = *buffer;
+
+ finish_collective_subroutine (ci);
+
+}
+
+void nca_collsub_max_array_r4 (gfc_array_r4 * restrict array, int *result_image,
+ int *stat, char *errmsg, index_type errmsg_len);
+export_proto (nca_collsub_max_array_r4);
+
+void
+nca_collsub_max_array_r4 (gfc_array_r4 * restrict array, int *result_image,
+ int *stat __attribute__ ((unused)),
+ char *errmsg __attribute__ ((unused)),
+ index_type errmsg_len __attribute__ ((unused)))
+{
+ index_type count[GFC_MAX_DIMENSIONS];
+ index_type stride[GFC_MAX_DIMENSIONS];
+ index_type extent[GFC_MAX_DIMENSIONS];
+ GFC_REAL_4 *this_shared_ptr; /* Points to the shared memory allocated to this image. */
+ GFC_REAL_4 *buffer;
+ index_type dim;
+ bool packed;
+ index_type span;
+ index_type ssize, num_elems;
+ int cbit = 0;
+ int imoffset;
+ collsub_iface *ci;
+
+ ci = &local->ci;
+
+ dim = GFC_DESCRIPTOR_RANK (array);
+ ssize = sizeof (GFC_REAL_4);
+ packed = true;
+ span = array->span != 0 ? array->span : (index_type) sizeof (GFC_REAL_4);
+ for (index_type n = 0; n < dim; n++)
+ {
+ count[n] = 0;
+ stride[n] = GFC_DESCRIPTOR_STRIDE (array, n) * span;
+ extent[n] = GFC_DESCRIPTOR_EXTENT (array, n);
+
+ /* No-op for an empty array. */
+ if (extent[n] <= 0)
+ return;
+
+ if (ssize != stride[n])
+ packed = false;
+
+ ssize *= extent[n];
+ }
+
+ num_elems = ssize / sizeof (GFC_REAL_4);
+
+ buffer = get_collsub_buf (ci, ssize * local->num_images);
+ this_shared_ptr = buffer + this_image.image_num * num_elems;
+
+ if (packed)
+ memcpy (this_shared_ptr, array->base_addr, ssize);
+ else
+ {
+ char *src = (char *) array->base_addr;
+ GFC_REAL_4 *restrict dest = this_shared_ptr;
+ index_type stride0 = stride[0];
+
+ while (src)
+ {
+ /* Copy the data. */
+ *(dest++) = *((GFC_REAL_4 *) src);
+ src += stride0;
+ count[0] ++;
+ /* Advance to the next source element. */
+ for (index_type n = 0; count[n] == extent[n] ; )
+ {
+ /* When we get to the end of a dimension, reset it and increment
+ the next dimension. */
+ count[n] = 0;
+ src -= stride[n] * extent[n];
+ n++;
+ if (n == dim)
+ {
+ src = NULL;
+ break;
+ }
+ else
+ {
+ count[n]++;
+ src += stride[n];
+ }
+ }
+ }
+ }
+
+ collsub_sync (ci);
+
+ /* Reduce the array to image zero. Here the general scheme:
+
+ abababababab
+ a_b_a_b_a_b_
+ a___b___a___
+ a_______b___
+ r___________
+ */
+ for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++)
+ {
+ imoffset = 1 << cbit;
+ if (this_image.image_num + imoffset < local->num_images)
+ {
+ GFC_REAL_4 * other_shared_ptr; /* Points to the shared memory
+ allocated to another image. */
+ GFC_REAL_4 *a;
+ GFC_REAL_4 *b;
+
+ other_shared_ptr = this_shared_ptr + num_elems * imoffset;
+ for (index_type i = 0; i < num_elems; i++)
+ {
+ a = this_shared_ptr + i;
+ b = other_shared_ptr + i;
+ if (*b > *a)
+ *a = *b;
+ }
+ }
+ collsub_sync (ci);
+ }
+ for ( ; (local->num_images >> cbit) != 0; cbit++)
+ collsub_sync (ci);
+
+ if (!result_image || (*result_image - 1) == this_image.image_num)
+ {
+ if (packed)
+ memcpy (array->base_addr, buffer, ssize);
+ else
+ {
+ GFC_REAL_4 *src = buffer;
+ char * restrict dest = (char *) array->base_addr;
+ index_type stride0 = stride[0];
+
+ for (index_type n = 0; n < dim; n++)
+ count[n] = 0;
+
+ while (dest)
+ {
+ *((GFC_REAL_4 * ) dest) = *src++;
+ dest += stride0;
+ count[0] ++;
+ for (index_type n = 0; count[n] == extent[n] ;)
+ {
+ /* When we get to the end of a dimension, reset it and increment
+ the next dimension. */
+ count[n] = 0;
+ dest -= stride[n] * extent[n];
+ n++;
+ if (n == dim)
+ {
+ dest = NULL;
+ break;
+ }
+ else
+ {
+ count[n]++;
+ dest += stride[n];
+ }
+ }
+ }
+ }
+ }
+ finish_collective_subroutine (ci);
+}
+void nca_collsub_min_array_r4 (gfc_array_r4 * restrict array, int *result_image,
+ int *stat, char *errmsg, index_type errmsg_len);
+export_proto (nca_collsub_min_array_r4);
+
+void
+nca_collsub_min_array_r4 (gfc_array_r4 * restrict array, int *result_image,
+ int *stat __attribute__ ((unused)),
+ char *errmsg __attribute__ ((unused)),
+ index_type errmsg_len __attribute__ ((unused)))
+{
+ index_type count[GFC_MAX_DIMENSIONS];
+ index_type stride[GFC_MAX_DIMENSIONS];
+ index_type extent[GFC_MAX_DIMENSIONS];
+ GFC_REAL_4 *this_shared_ptr; /* Points to the shared memory allocated to this image. */
+ GFC_REAL_4 *buffer;
+ index_type dim;
+ bool packed;
+ index_type span;
+ index_type ssize, num_elems;
+ int cbit = 0;
+ int imoffset;
+ collsub_iface *ci;
+
+ ci = &local->ci;
+
+ dim = GFC_DESCRIPTOR_RANK (array);
+ ssize = sizeof (GFC_REAL_4);
+ packed = true;
+ span = array->span != 0 ? array->span : (index_type) sizeof (GFC_REAL_4);
+ for (index_type n = 0; n < dim; n++)
+ {
+ count[n] = 0;
+ stride[n] = GFC_DESCRIPTOR_STRIDE (array, n) * span;
+ extent[n] = GFC_DESCRIPTOR_EXTENT (array, n);
+
+ /* No-op for an empty array. */
+ if (extent[n] <= 0)
+ return;
+
+ if (ssize != stride[n])
+ packed = false;
+
+ ssize *= extent[n];
+ }
+
+ num_elems = ssize / sizeof (GFC_REAL_4);
+
+ buffer = get_collsub_buf (ci, ssize * local->num_images);
+ this_shared_ptr = buffer + this_image.image_num * num_elems;
+
+ if (packed)
+ memcpy (this_shared_ptr, array->base_addr, ssize);
+ else
+ {
+ char *src = (char *) array->base_addr;
+ GFC_REAL_4 *restrict dest = this_shared_ptr;
+ index_type stride0 = stride[0];
+
+ while (src)
+ {
+ /* Copy the data. */
+ *(dest++) = *((GFC_REAL_4 *) src);
+ src += stride0;
+ count[0] ++;
+ /* Advance to the next source element. */
+ for (index_type n = 0; count[n] == extent[n] ; )
+ {
+ /* When we get to the end of a dimension, reset it and increment
+ the next dimension. */
+ count[n] = 0;
+ src -= stride[n] * extent[n];
+ n++;
+ if (n == dim)
+ {
+ src = NULL;
+ break;
+ }
+ else
+ {
+ count[n]++;
+ src += stride[n];
+ }
+ }
+ }
+ }
+
+ collsub_sync (ci);
+
+ /* Reduce the array to image zero. Here the general scheme:
+
+ abababababab
+ a_b_a_b_a_b_
+ a___b___a___
+ a_______b___
+ r___________
+ */
+ for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++)
+ {
+ imoffset = 1 << cbit;
+ if (this_image.image_num + imoffset < local->num_images)
+ {
+ GFC_REAL_4 * other_shared_ptr; /* Points to the shared memory
+ allocated to another image. */
+ GFC_REAL_4 *a;
+ GFC_REAL_4 *b;
+
+ other_shared_ptr = this_shared_ptr + num_elems * imoffset;
+ for (index_type i = 0; i < num_elems; i++)
+ {
+ a = this_shared_ptr + i;
+ b = other_shared_ptr + i;
+ if (*b < *a)
+ *a = *b;
+ }
+ }
+ collsub_sync (ci);
+ }
+ for ( ; (local->num_images >> cbit) != 0; cbit++)
+ collsub_sync (ci);
+
+ if (!result_image || (*result_image - 1) == this_image.image_num)
+ {
+ if (packed)
+ memcpy (array->base_addr, buffer, ssize);
+ else
+ {
+ GFC_REAL_4 *src = buffer;
+ char * restrict dest = (char *) array->base_addr;
+ index_type stride0 = stride[0];
+
+ for (index_type n = 0; n < dim; n++)
+ count[n] = 0;
+
+ while (dest)
+ {
+ *((GFC_REAL_4 * ) dest) = *src++;
+ dest += stride0;
+ count[0] ++;
+ for (index_type n = 0; count[n] == extent[n] ;)
+ {
+ /* When we get to the end of a dimension, reset it and increment
+ the next dimension. */
+ count[n] = 0;
+ dest -= stride[n] * extent[n];
+ n++;
+ if (n == dim)
+ {
+ dest = NULL;
+ break;
+ }
+ else
+ {
+ count[n]++;
+ dest += stride[n];
+ }
+ }
+ }
+ }
+ }
+ finish_collective_subroutine (ci);
+}
+void nca_collsub_sum_array_r4 (gfc_array_r4 * restrict array, int *result_image,
+ int *stat, char *errmsg, index_type errmsg_len);
+export_proto (nca_collsub_sum_array_r4);
+
+void
+nca_collsub_sum_array_r4 (gfc_array_r4 * restrict array, int *result_image,
+ int *stat __attribute__ ((unused)),
+ char *errmsg __attribute__ ((unused)),
+ index_type errmsg_len __attribute__ ((unused)))
+{
+ index_type count[GFC_MAX_DIMENSIONS];
+ index_type stride[GFC_MAX_DIMENSIONS];
+ index_type extent[GFC_MAX_DIMENSIONS];
+ GFC_REAL_4 *this_shared_ptr; /* Points to the shared memory allocated to this image. */
+ GFC_REAL_4 *buffer;
+ index_type dim;
+ bool packed;
+ index_type span;
+ index_type ssize, num_elems;
+ int cbit = 0;
+ int imoffset;
+ collsub_iface *ci;
+
+ ci = &local->ci;
+
+ dim = GFC_DESCRIPTOR_RANK (array);
+ ssize = sizeof (GFC_REAL_4);
+ packed = true;
+ span = array->span != 0 ? array->span : (index_type) sizeof (GFC_REAL_4);
+ for (index_type n = 0; n < dim; n++)
+ {
+ count[n] = 0;
+ stride[n] = GFC_DESCRIPTOR_STRIDE (array, n) * span;
+ extent[n] = GFC_DESCRIPTOR_EXTENT (array, n);
+
+ /* No-op for an empty array. */
+ if (extent[n] <= 0)
+ return;
+
+ if (ssize != stride[n])
+ packed = false;
+
+ ssize *= extent[n];
+ }
+
+ num_elems = ssize / sizeof (GFC_REAL_4);
+
+ buffer = get_collsub_buf (ci, ssize * local->num_images);
+ this_shared_ptr = buffer + this_image.image_num * num_elems;
+
+ if (packed)
+ memcpy (this_shared_ptr, array->base_addr, ssize);
+ else
+ {
+ char *src = (char *) array->base_addr;
+ GFC_REAL_4 *restrict dest = this_shared_ptr;
+ index_type stride0 = stride[0];
+
+ while (src)
+ {
+ /* Copy the data. */
+ *(dest++) = *((GFC_REAL_4 *) src);
+ src += stride0;
+ count[0] ++;
+ /* Advance to the next source element. */
+ for (index_type n = 0; count[n] == extent[n] ; )
+ {
+ /* When we get to the end of a dimension, reset it and increment
+ the next dimension. */
+ count[n] = 0;
+ src -= stride[n] * extent[n];
+ n++;
+ if (n == dim)
+ {
+ src = NULL;
+ break;
+ }
+ else
+ {
+ count[n]++;
+ src += stride[n];
+ }
+ }
+ }
+ }
+
+ collsub_sync (ci);
+
+ /* Reduce the array to image zero. Here the general scheme:
+
+ abababababab
+ a_b_a_b_a_b_
+ a___b___a___
+ a_______b___
+ r___________
+ */
+ for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++)
+ {
+ imoffset = 1 << cbit;
+ if (this_image.image_num + imoffset < local->num_images)
+ {
+ GFC_REAL_4 * other_shared_ptr; /* Points to the shared memory
+ allocated to another image. */
+ GFC_REAL_4 *a;
+ GFC_REAL_4 *b;
+
+ other_shared_ptr = this_shared_ptr + num_elems * imoffset;
+ for (index_type i = 0; i < num_elems; i++)
+ {
+ a = this_shared_ptr + i;
+ b = other_shared_ptr + i;
+ *a += *b;
+ }
+ }
+ collsub_sync (ci);
+ }
+ for ( ; (local->num_images >> cbit) != 0; cbit++)
+ collsub_sync (ci);
+
+ if (!result_image || (*result_image - 1) == this_image.image_num)
+ {
+ if (packed)
+ memcpy (array->base_addr, buffer, ssize);
+ else
+ {
+ GFC_REAL_4 *src = buffer;
+ char * restrict dest = (char *) array->base_addr;
+ index_type stride0 = stride[0];
+
+ for (index_type n = 0; n < dim; n++)
+ count[n] = 0;
+
+ while (dest)
+ {
+ *((GFC_REAL_4 * ) dest) = *src++;
+ dest += stride0;
+ count[0] ++;
+ for (index_type n = 0; count[n] == extent[n] ;)
+ {
+ /* When we get to the end of a dimension, reset it and increment
+ the next dimension. */
+ count[n] = 0;
+ dest -= stride[n] * extent[n];
+ n++;
+ if (n == dim)
+ {
+ dest = NULL;
+ break;
+ }
+ else
+ {
+ count[n]++;
+ dest += stride[n];
+ }
+ }
+ }
+ }
+ }
+ finish_collective_subroutine (ci);
+}
+
+#endif
+
diff --git a/libgfortran/generated/nca_minmax_r8.c b/libgfortran/generated/nca_minmax_r8.c
new file mode 100644
index 0000000..3b3e962
--- /dev/null
+++ b/libgfortran/generated/nca_minmax_r8.c
@@ -0,0 +1,653 @@
+/* Implementation of collective subroutines minmax.
+ Copyright (C) 2020 Free Software Foundation, Inc.
+ Contributed by Thomas Koenig <tkoenig@gcc.gnu.org>.
+
+This file is part of the GNU Fortran runtime library (libgfortran).
+
+Libgfortran is free software; you can redistribute it and/or
+modify it under the terms of the GNU General Public
+License as published by the Free Software Foundation; either
+version 3 of the License, or (at your option) any later version.
+
+Libgfortran is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+<http://www.gnu.org/licenses/>. */
+
+
+#include "libgfortran.h"
+
+#if defined (HAVE_GFC_REAL_8)
+#include <string.h>
+#include "../nca/libcoarraynative.h"
+#include "../nca/collective_subroutine.h"
+#include "../nca/collective_inline.h"
+
+void nca_collsub_max_scalar_r8 (GFC_REAL_8 *obj, int *result_image,
+ int *stat, char *errmsg, index_type errmsg_len);
+export_proto(nca_collsub_max_scalar_r8);
+
+void
+nca_collsub_max_scalar_r8 (GFC_REAL_8 *obj, int *result_image,
+ int *stat __attribute__ ((unused)),
+ char *errmsg __attribute__ ((unused)),
+ index_type errmsg_len __attribute__ ((unused)))
+{
+ int cbit = 0;
+ int imoffset;
+ GFC_REAL_8 *a, *b;
+ GFC_REAL_8 *buffer, *this_image_buf;
+ collsub_iface *ci;
+
+ ci = &local->ci;
+
+ buffer = get_collsub_buf (ci, sizeof(GFC_REAL_8) * local->num_images);
+ this_image_buf = buffer + this_image.image_num;
+ *this_image_buf = *obj;
+
+ collsub_sync (ci);
+ for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++)
+ {
+ imoffset = 1 << cbit;
+ if (this_image.image_num + imoffset < local->num_images)
+ {
+ a = this_image_buf;
+ b = this_image_buf + imoffset;
+ if (*b > *a)
+ *a = *b;
+ }
+ collsub_sync (ci);
+ }
+ for ( ; (local->num_images >> cbit) != 0; cbit++)
+ collsub_sync (ci);
+
+ if (!result_image || (*result_image - 1) == this_image.image_num)
+ *obj = *buffer;
+
+ finish_collective_subroutine (ci);
+
+}
+
+void nca_collsub_min_scalar_r8 (GFC_REAL_8 *obj, int *result_image,
+ int *stat, char *errmsg, index_type errmsg_len);
+export_proto(nca_collsub_min_scalar_r8);
+
+void
+nca_collsub_min_scalar_r8 (GFC_REAL_8 *obj, int *result_image,
+ int *stat __attribute__ ((unused)),
+ char *errmsg __attribute__ ((unused)),
+ index_type errmsg_len __attribute__ ((unused)))
+{
+ int cbit = 0;
+ int imoffset;
+ GFC_REAL_8 *a, *b;
+ GFC_REAL_8 *buffer, *this_image_buf;
+ collsub_iface *ci;
+
+ ci = &local->ci;
+
+ buffer = get_collsub_buf (ci, sizeof(GFC_REAL_8) * local->num_images);
+ this_image_buf = buffer + this_image.image_num;
+ *this_image_buf = *obj;
+
+ collsub_sync (ci);
+ for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++)
+ {
+ imoffset = 1 << cbit;
+ if (this_image.image_num + imoffset < local->num_images)
+ {
+ a = this_image_buf;
+ b = this_image_buf + imoffset;
+ if (*b < *a)
+ *a = *b;
+ }
+ collsub_sync (ci);
+ }
+ for ( ; (local->num_images >> cbit) != 0; cbit++)
+ collsub_sync (ci);
+
+ if (!result_image || (*result_image - 1) == this_image.image_num)
+ *obj = *buffer;
+
+ finish_collective_subroutine (ci);
+
+}
+
+void nca_collsub_sum_scalar_r8 (GFC_REAL_8 *obj, int *result_image,
+ int *stat, char *errmsg, index_type errmsg_len);
+export_proto(nca_collsub_sum_scalar_r8);
+
+void
+nca_collsub_sum_scalar_r8 (GFC_REAL_8 *obj, int *result_image,
+ int *stat __attribute__ ((unused)),
+ char *errmsg __attribute__ ((unused)),
+ index_type errmsg_len __attribute__ ((unused)))
+{
+ int cbit = 0;
+ int imoffset;
+ GFC_REAL_8 *a, *b;
+ GFC_REAL_8 *buffer, *this_image_buf;
+ collsub_iface *ci;
+
+ ci = &local->ci;
+
+ buffer = get_collsub_buf (ci, sizeof(GFC_REAL_8) * local->num_images);
+ this_image_buf = buffer + this_image.image_num;
+ *this_image_buf = *obj;
+
+ collsub_sync (ci);
+ for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++)
+ {
+ imoffset = 1 << cbit;
+ if (this_image.image_num + imoffset < local->num_images)
+ {
+ a = this_image_buf;
+ b = this_image_buf + imoffset;
+ *a += *b;
+ }
+ collsub_sync (ci);
+ }
+ for ( ; (local->num_images >> cbit) != 0; cbit++)
+ collsub_sync (ci);
+
+ if (!result_image || (*result_image - 1) == this_image.image_num)
+ *obj = *buffer;
+
+ finish_collective_subroutine (ci);
+
+}
+
+void nca_collsub_max_array_r8 (gfc_array_r8 * restrict array, int *result_image,
+ int *stat, char *errmsg, index_type errmsg_len);
+export_proto (nca_collsub_max_array_r8);
+
+void
+nca_collsub_max_array_r8 (gfc_array_r8 * restrict array, int *result_image,
+ int *stat __attribute__ ((unused)),
+ char *errmsg __attribute__ ((unused)),
+ index_type errmsg_len __attribute__ ((unused)))
+{
+ index_type count[GFC_MAX_DIMENSIONS];
+ index_type stride[GFC_MAX_DIMENSIONS];
+ index_type extent[GFC_MAX_DIMENSIONS];
+ GFC_REAL_8 *this_shared_ptr; /* Points to the shared memory allocated to this image. */
+ GFC_REAL_8 *buffer;
+ index_type dim;
+ bool packed;
+ index_type span;
+ index_type ssize, num_elems;
+ int cbit = 0;
+ int imoffset;
+ collsub_iface *ci;
+
+ ci = &local->ci;
+
+ dim = GFC_DESCRIPTOR_RANK (array);
+ ssize = sizeof (GFC_REAL_8);
+ packed = true;
+ span = array->span != 0 ? array->span : (index_type) sizeof (GFC_REAL_8);
+ for (index_type n = 0; n < dim; n++)
+ {
+ count[n] = 0;
+ stride[n] = GFC_DESCRIPTOR_STRIDE (array, n) * span;
+ extent[n] = GFC_DESCRIPTOR_EXTENT (array, n);
+
+ /* No-op for an empty array. */
+ if (extent[n] <= 0)
+ return;
+
+ if (ssize != stride[n])
+ packed = false;
+
+ ssize *= extent[n];
+ }
+
+ num_elems = ssize / sizeof (GFC_REAL_8);
+
+ buffer = get_collsub_buf (ci, ssize * local->num_images);
+ this_shared_ptr = buffer + this_image.image_num * num_elems;
+
+ if (packed)
+ memcpy (this_shared_ptr, array->base_addr, ssize);
+ else
+ {
+ char *src = (char *) array->base_addr;
+ GFC_REAL_8 *restrict dest = this_shared_ptr;
+ index_type stride0 = stride[0];
+
+ while (src)
+ {
+ /* Copy the data. */
+ *(dest++) = *((GFC_REAL_8 *) src);
+ src += stride0;
+ count[0] ++;
+ /* Advance to the next source element. */
+ for (index_type n = 0; count[n] == extent[n] ; )
+ {
+ /* When we get to the end of a dimension, reset it and increment
+ the next dimension. */
+ count[n] = 0;
+ src -= stride[n] * extent[n];
+ n++;
+ if (n == dim)
+ {
+ src = NULL;
+ break;
+ }
+ else
+ {
+ count[n]++;
+ src += stride[n];
+ }
+ }
+ }
+ }
+
+ collsub_sync (ci);
+
+ /* Reduce the array to image zero. Here the general scheme:
+
+ abababababab
+ a_b_a_b_a_b_
+ a___b___a___
+ a_______b___
+ r___________
+ */
+ for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++)
+ {
+ imoffset = 1 << cbit;
+ if (this_image.image_num + imoffset < local->num_images)
+ {
+ GFC_REAL_8 * other_shared_ptr; /* Points to the shared memory
+ allocated to another image. */
+ GFC_REAL_8 *a;
+ GFC_REAL_8 *b;
+
+ other_shared_ptr = this_shared_ptr + num_elems * imoffset;
+ for (index_type i = 0; i < num_elems; i++)
+ {
+ a = this_shared_ptr + i;
+ b = other_shared_ptr + i;
+ if (*b > *a)
+ *a = *b;
+ }
+ }
+ collsub_sync (ci);
+ }
+ for ( ; (local->num_images >> cbit) != 0; cbit++)
+ collsub_sync (ci);
+
+ if (!result_image || (*result_image - 1) == this_image.image_num)
+ {
+ if (packed)
+ memcpy (array->base_addr, buffer, ssize);
+ else
+ {
+ GFC_REAL_8 *src = buffer;
+ char * restrict dest = (char *) array->base_addr;
+ index_type stride0 = stride[0];
+
+ for (index_type n = 0; n < dim; n++)
+ count[n] = 0;
+
+ while (dest)
+ {
+ *((GFC_REAL_8 * ) dest) = *src++;
+ dest += stride0;
+ count[0] ++;
+ for (index_type n = 0; count[n] == extent[n] ;)
+ {
+ /* When we get to the end of a dimension, reset it and increment
+ the next dimension. */
+ count[n] = 0;
+ dest -= stride[n] * extent[n];
+ n++;
+ if (n == dim)
+ {
+ dest = NULL;
+ break;
+ }
+ else
+ {
+ count[n]++;
+ dest += stride[n];
+ }
+ }
+ }
+ }
+ }
+ finish_collective_subroutine (ci);
+}
+void nca_collsub_min_array_r8 (gfc_array_r8 * restrict array, int *result_image,
+ int *stat, char *errmsg, index_type errmsg_len);
+export_proto (nca_collsub_min_array_r8);
+
+void
+nca_collsub_min_array_r8 (gfc_array_r8 * restrict array, int *result_image,
+ int *stat __attribute__ ((unused)),
+ char *errmsg __attribute__ ((unused)),
+ index_type errmsg_len __attribute__ ((unused)))
+{
+ index_type count[GFC_MAX_DIMENSIONS];
+ index_type stride[GFC_MAX_DIMENSIONS];
+ index_type extent[GFC_MAX_DIMENSIONS];
+ GFC_REAL_8 *this_shared_ptr; /* Points to the shared memory allocated to this image. */
+ GFC_REAL_8 *buffer;
+ index_type dim;
+ bool packed;
+ index_type span;
+ index_type ssize, num_elems;
+ int cbit = 0;
+ int imoffset;
+ collsub_iface *ci;
+
+ ci = &local->ci;
+
+ dim = GFC_DESCRIPTOR_RANK (array);
+ ssize = sizeof (GFC_REAL_8);
+ packed = true;
+ span = array->span != 0 ? array->span : (index_type) sizeof (GFC_REAL_8);
+ for (index_type n = 0; n < dim; n++)
+ {
+ count[n] = 0;
+ stride[n] = GFC_DESCRIPTOR_STRIDE (array, n) * span;
+ extent[n] = GFC_DESCRIPTOR_EXTENT (array, n);
+
+ /* No-op for an empty array. */
+ if (extent[n] <= 0)
+ return;
+
+ if (ssize != stride[n])
+ packed = false;
+
+ ssize *= extent[n];
+ }
+
+ num_elems = ssize / sizeof (GFC_REAL_8);
+
+ buffer = get_collsub_buf (ci, ssize * local->num_images);
+ this_shared_ptr = buffer + this_image.image_num * num_elems;
+
+ if (packed)
+ memcpy (this_shared_ptr, array->base_addr, ssize);
+ else
+ {
+ char *src = (char *) array->base_addr;
+ GFC_REAL_8 *restrict dest = this_shared_ptr;
+ index_type stride0 = stride[0];
+
+ while (src)
+ {
+ /* Copy the data. */
+ *(dest++) = *((GFC_REAL_8 *) src);
+ src += stride0;
+ count[0] ++;
+ /* Advance to the next source element. */
+ for (index_type n = 0; count[n] == extent[n] ; )
+ {
+ /* When we get to the end of a dimension, reset it and increment
+ the next dimension. */
+ count[n] = 0;
+ src -= stride[n] * extent[n];
+ n++;
+ if (n == dim)
+ {
+ src = NULL;
+ break;
+ }
+ else
+ {
+ count[n]++;
+ src += stride[n];
+ }
+ }
+ }
+ }
+
+ collsub_sync (ci);
+
+ /* Reduce the array to image zero. Here the general scheme:
+
+ abababababab
+ a_b_a_b_a_b_
+ a___b___a___
+ a_______b___
+ r___________
+ */
+ for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++)
+ {
+ imoffset = 1 << cbit;
+ if (this_image.image_num + imoffset < local->num_images)
+ {
+ GFC_REAL_8 * other_shared_ptr; /* Points to the shared memory
+ allocated to another image. */
+ GFC_REAL_8 *a;
+ GFC_REAL_8 *b;
+
+ other_shared_ptr = this_shared_ptr + num_elems * imoffset;
+ for (index_type i = 0; i < num_elems; i++)
+ {
+ a = this_shared_ptr + i;
+ b = other_shared_ptr + i;
+ if (*b < *a)
+ *a = *b;
+ }
+ }
+ collsub_sync (ci);
+ }
+ for ( ; (local->num_images >> cbit) != 0; cbit++)
+ collsub_sync (ci);
+
+ if (!result_image || (*result_image - 1) == this_image.image_num)
+ {
+ if (packed)
+ memcpy (array->base_addr, buffer, ssize);
+ else
+ {
+ GFC_REAL_8 *src = buffer;
+ char * restrict dest = (char *) array->base_addr;
+ index_type stride0 = stride[0];
+
+ for (index_type n = 0; n < dim; n++)
+ count[n] = 0;
+
+ while (dest)
+ {
+ *((GFC_REAL_8 * ) dest) = *src++;
+ dest += stride0;
+ count[0] ++;
+ for (index_type n = 0; count[n] == extent[n] ;)
+ {
+ /* When we get to the end of a dimension, reset it and increment
+ the next dimension. */
+ count[n] = 0;
+ dest -= stride[n] * extent[n];
+ n++;
+ if (n == dim)
+ {
+ dest = NULL;
+ break;
+ }
+ else
+ {
+ count[n]++;
+ dest += stride[n];
+ }
+ }
+ }
+ }
+ }
+ finish_collective_subroutine (ci);
+}
+void nca_collsub_sum_array_r8 (gfc_array_r8 * restrict array, int *result_image,
+ int *stat, char *errmsg, index_type errmsg_len);
+export_proto (nca_collsub_sum_array_r8);
+
+void
+nca_collsub_sum_array_r8 (gfc_array_r8 * restrict array, int *result_image,
+ int *stat __attribute__ ((unused)),
+ char *errmsg __attribute__ ((unused)),
+ index_type errmsg_len __attribute__ ((unused)))
+{
+ index_type count[GFC_MAX_DIMENSIONS];
+ index_type stride[GFC_MAX_DIMENSIONS];
+ index_type extent[GFC_MAX_DIMENSIONS];
+ GFC_REAL_8 *this_shared_ptr; /* Points to the shared memory allocated to this image. */
+ GFC_REAL_8 *buffer;
+ index_type dim;
+ bool packed;
+ index_type span;
+ index_type ssize, num_elems;
+ int cbit = 0;
+ int imoffset;
+ collsub_iface *ci;
+
+ ci = &local->ci;
+
+ dim = GFC_DESCRIPTOR_RANK (array);
+ ssize = sizeof (GFC_REAL_8);
+ packed = true;
+ span = array->span != 0 ? array->span : (index_type) sizeof (GFC_REAL_8);
+ for (index_type n = 0; n < dim; n++)
+ {
+ count[n] = 0;
+ stride[n] = GFC_DESCRIPTOR_STRIDE (array, n) * span;
+ extent[n] = GFC_DESCRIPTOR_EXTENT (array, n);
+
+ /* No-op for an empty array. */
+ if (extent[n] <= 0)
+ return;
+
+ if (ssize != stride[n])
+ packed = false;
+
+ ssize *= extent[n];
+ }
+
+ num_elems = ssize / sizeof (GFC_REAL_8);
+
+ buffer = get_collsub_buf (ci, ssize * local->num_images);
+ this_shared_ptr = buffer + this_image.image_num * num_elems;
+
+ if (packed)
+ memcpy (this_shared_ptr, array->base_addr, ssize);
+ else
+ {
+ char *src = (char *) array->base_addr;
+ GFC_REAL_8 *restrict dest = this_shared_ptr;
+ index_type stride0 = stride[0];
+
+ while (src)
+ {
+ /* Copy the data. */
+ *(dest++) = *((GFC_REAL_8 *) src);
+ src += stride0;
+ count[0] ++;
+ /* Advance to the next source element. */
+ for (index_type n = 0; count[n] == extent[n] ; )
+ {
+ /* When we get to the end of a dimension, reset it and increment
+ the next dimension. */
+ count[n] = 0;
+ src -= stride[n] * extent[n];
+ n++;
+ if (n == dim)
+ {
+ src = NULL;
+ break;
+ }
+ else
+ {
+ count[n]++;
+ src += stride[n];
+ }
+ }
+ }
+ }
+
+ collsub_sync (ci);
+
+ /* Reduce the array to image zero. Here the general scheme:
+
+ abababababab
+ a_b_a_b_a_b_
+ a___b___a___
+ a_______b___
+ r___________
+ */
+ for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++)
+ {
+ imoffset = 1 << cbit;
+ if (this_image.image_num + imoffset < local->num_images)
+ {
+ GFC_REAL_8 * other_shared_ptr; /* Points to the shared memory
+ allocated to another image. */
+ GFC_REAL_8 *a;
+ GFC_REAL_8 *b;
+
+ other_shared_ptr = this_shared_ptr + num_elems * imoffset;
+ for (index_type i = 0; i < num_elems; i++)
+ {
+ a = this_shared_ptr + i;
+ b = other_shared_ptr + i;
+ *a += *b;
+ }
+ }
+ collsub_sync (ci);
+ }
+ for ( ; (local->num_images >> cbit) != 0; cbit++)
+ collsub_sync (ci);
+
+ if (!result_image || (*result_image - 1) == this_image.image_num)
+ {
+ if (packed)
+ memcpy (array->base_addr, buffer, ssize);
+ else
+ {
+ GFC_REAL_8 *src = buffer;
+ char * restrict dest = (char *) array->base_addr;
+ index_type stride0 = stride[0];
+
+ for (index_type n = 0; n < dim; n++)
+ count[n] = 0;
+
+ while (dest)
+ {
+ *((GFC_REAL_8 * ) dest) = *src++;
+ dest += stride0;
+ count[0] ++;
+ for (index_type n = 0; count[n] == extent[n] ;)
+ {
+ /* When we get to the end of a dimension, reset it and increment
+ the next dimension. */
+ count[n] = 0;
+ dest -= stride[n] * extent[n];
+ n++;
+ if (n == dim)
+ {
+ dest = NULL;
+ break;
+ }
+ else
+ {
+ count[n]++;
+ dest += stride[n];
+ }
+ }
+ }
+ }
+ }
+ finish_collective_subroutine (ci);
+}
+
+#endif
+
diff --git a/libgfortran/generated/nca_minmax_s1.c b/libgfortran/generated/nca_minmax_s1.c
new file mode 100644
index 0000000..b081452
--- /dev/null
+++ b/libgfortran/generated/nca_minmax_s1.c
@@ -0,0 +1,494 @@
+/* Implementation of collective subroutines minmax.
+ Copyright (C) 2020 Free Software Foundation, Inc.
+ Contributed by Thomas Koenig <tkoenig@gcc.gnu.org>.
+
+This file is part of the GNU Fortran runtime library (libgfortran).
+
+Libgfortran is free software; you can redistribute it and/or
+modify it under the terms of the GNU General Public
+License as published by the Free Software Foundation; either
+version 3 of the License, or (at your option) any later version.
+
+Libgfortran is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+<http://www.gnu.org/licenses/>. */
+
+
+
+#include "libgfortran.h"
+
+#if defined (HAVE_GFC_UINTEGER_1)
+#include <string.h>
+#include "../nca/libcoarraynative.h"
+#include "../nca/collective_subroutine.h"
+#include "../nca/collective_inline.h"
+
+#if 1 == 4
+
+/* Compare wide character types, which are handled internally as
+ unsigned 4-byte integers. */
+static inline int
+memcmp4 (const void *a, const void *b, size_t len)
+{
+ const GFC_UINTEGER_4 *pa = a;
+ const GFC_UINTEGER_4 *pb = b;
+ while (len-- > 0)
+ {
+ if (*pa != *pb)
+ return *pa < *pb ? -1 : 1;
+ pa ++;
+ pb ++;
+ }
+ return 0;
+}
+
+#endif
+void nca_collsub_max_scalar_s1 (GFC_UINTEGER_1 *obj, int *result_image,
+ int *stat, char *errmsg, index_type char_len, index_type errmsg_len);
+export_proto(nca_collsub_max_scalar_s1);
+
+void
+nca_collsub_max_scalar_s1 (GFC_UINTEGER_1 *obj, int *result_image,
+ int *stat __attribute__ ((unused)),
+ char *errmsg __attribute__ ((unused)),
+ index_type char_len,
+ index_type errmsg_len __attribute__ ((unused)))
+{
+ int cbit = 0;
+ int imoffset;
+ GFC_UINTEGER_1 *a, *b;
+ GFC_UINTEGER_1 *buffer, *this_image_buf;
+ collsub_iface *ci;
+ index_type type_size;
+
+ ci = &local->ci;
+
+ type_size = char_len * sizeof (GFC_UINTEGER_1);
+ buffer = get_collsub_buf (ci, type_size * local->num_images);
+ this_image_buf = buffer + this_image.image_num * char_len;
+ memcpy (this_image_buf, obj, type_size);
+
+ collsub_sync (ci);
+ for (; ((this_image.image_num >> cbit) & 1) == 0
+ && (local->num_images >> cbit) != 0; cbit++)
+ {
+ imoffset = 1 << cbit;
+ if (this_image.image_num + imoffset < local->num_images)
+ {
+ a = this_image_buf;
+ b = this_image_buf + imoffset * char_len;
+ if (memcmp (b, a, char_len) > 0)
+ memcpy (a, b, type_size);
+ }
+ collsub_sync (ci);
+ }
+ for ( ; (local->num_images >> cbit) != 0; cbit++)
+ collsub_sync (ci);
+
+ if (!result_image || (*result_image - 1) == this_image.image_num)
+ memcpy (obj, buffer, type_size);
+
+ finish_collective_subroutine (ci);
+
+}
+
+void nca_collsub_min_scalar_s1 (GFC_UINTEGER_1 *obj, int *result_image,
+ int *stat, char *errmsg, index_type char_len, index_type errmsg_len);
+export_proto(nca_collsub_min_scalar_s1);
+
+void
+nca_collsub_min_scalar_s1 (GFC_UINTEGER_1 *obj, int *result_image,
+ int *stat __attribute__ ((unused)),
+ char *errmsg __attribute__ ((unused)),
+ index_type char_len,
+ index_type errmsg_len __attribute__ ((unused)))
+{
+ int cbit = 0;
+ int imoffset;
+ GFC_UINTEGER_1 *a, *b;
+ GFC_UINTEGER_1 *buffer, *this_image_buf;
+ collsub_iface *ci;
+ index_type type_size;
+
+ ci = &local->ci;
+
+ type_size = char_len * sizeof (GFC_UINTEGER_1);
+ buffer = get_collsub_buf (ci, type_size * local->num_images);
+ this_image_buf = buffer + this_image.image_num * char_len;
+ memcpy (this_image_buf, obj, type_size);
+
+ collsub_sync (ci);
+ for (; ((this_image.image_num >> cbit) & 1) == 0
+ && (local->num_images >> cbit) != 0; cbit++)
+ {
+ imoffset = 1 << cbit;
+ if (this_image.image_num + imoffset < local->num_images)
+ {
+ a = this_image_buf;
+ b = this_image_buf + imoffset * char_len;
+ if (memcmp (b, a, char_len) < 0)
+ memcpy (a, b, type_size);
+ }
+ collsub_sync (ci);
+ }
+ for ( ; (local->num_images >> cbit) != 0; cbit++)
+ collsub_sync (ci);
+
+ if (!result_image || (*result_image - 1) == this_image.image_num)
+ memcpy (obj, buffer, type_size);
+
+ finish_collective_subroutine (ci);
+
+}
+
+void nca_collsub_max_array_s1 (gfc_array_s1 * restrict array, int *result_image,
+ int *stat, char *errmsg, index_type char_len,
+ index_type errmsg_len);
+export_proto (nca_collsub_max_array_s1);
+
+void
+nca_collsub_max_array_s1 (gfc_array_s1 * restrict array, int *result_image,
+ int *stat __attribute__ ((unused)),
+ char *errmsg __attribute__ ((unused)),
+ index_type char_len,
+ index_type errmsg_len __attribute__ ((unused)))
+{
+ index_type count[GFC_MAX_DIMENSIONS];
+ index_type stride[GFC_MAX_DIMENSIONS]; /* stride is byte-based here. */
+ index_type extent[GFC_MAX_DIMENSIONS];
+ char *this_shared_ptr; /* Points to the shared memory allocated to this image. */
+ char *buffer;
+ index_type dim;
+ bool packed;
+ index_type span;
+ index_type ssize, num_elems;
+ int cbit = 0;
+ int imoffset;
+ index_type type_size;
+ collsub_iface *ci;
+
+ ci = &local->ci;
+
+ type_size = char_len * sizeof (GFC_UINTEGER_1);
+ dim = GFC_DESCRIPTOR_RANK (array);
+ num_elems = 1;
+ ssize = type_size;
+ packed = true;
+ span = array->span != 0 ? array->span : type_size;
+ for (index_type n = 0; n < dim; n++)
+ {
+ count[n] = 0;
+ stride[n] = GFC_DESCRIPTOR_STRIDE (array, n) * span;
+ extent[n] = GFC_DESCRIPTOR_EXTENT (array, n);
+
+ /* No-op for an empty array. */
+ if (extent[n] <= 0)
+ return;
+
+ if (num_elems != GFC_DESCRIPTOR_STRIDE (array,n))
+ packed = false;
+
+ num_elems *= extent[n];
+ }
+
+ ssize = num_elems * type_size;
+ buffer = get_collsub_buf (ci, ssize * local->num_images);
+ this_shared_ptr = buffer + this_image.image_num * ssize;
+
+ if (packed)
+ memcpy (this_shared_ptr, array->base_addr, ssize);
+ else
+ {
+ char *src = (char *) array->base_addr;
+ char *restrict dest = this_shared_ptr;
+ index_type stride0 = stride[0];
+
+ while (src)
+ {
+ /* Copy the data. */
+
+ memcpy (dest, src, type_size);
+ dest += type_size;
+ src += stride0;
+ count[0] ++;
+ /* Advance to the next source element. */
+ for (index_type n = 0; count[n] == extent[n] ; )
+ {
+ /* When we get to the end of a dimension, reset it and increment
+ the next dimension. */
+ count[n] = 0;
+ src -= stride[n] * extent[n];
+ n++;
+ if (n == dim)
+ {
+ src = NULL;
+ break;
+ }
+ else
+ {
+ count[n]++;
+ src += stride[n];
+ }
+ }
+ }
+ }
+
+ collsub_sync (ci);
+
+ /* Reduce the array to image zero. Here the general scheme:
+
+ abababababab
+ a_b_a_b_a_b_
+ a___b___a___
+ a_______b___
+ r___________
+ */
+ for (; ((this_image.image_num >> cbit) & 1) == 0
+ && (local->num_images >> cbit) != 0; cbit++)
+ {
+ imoffset = 1 << cbit;
+ if (this_image.image_num + imoffset < local->num_images)
+ {
+ char *other_shared_ptr; /* Points to the shared memory
+ allocated to another image. */
+ GFC_UINTEGER_1 *a;
+ GFC_UINTEGER_1 *b;
+
+ other_shared_ptr = this_shared_ptr + imoffset * ssize;
+ for (index_type i = 0; i < num_elems; i++)
+ {
+ a = (GFC_UINTEGER_1 *) (this_shared_ptr + i * type_size);
+ b = (GFC_UINTEGER_1 *) (other_shared_ptr + i * type_size);
+ if (memcmp (b, a, char_len) > 0)
+ memcpy (a, b, type_size);
+ }
+ }
+ collsub_sync (ci);
+ }
+ for ( ; (local->num_images >> cbit) != 0; cbit++)
+ collsub_sync (ci);
+
+ if (!result_image || (*result_image - 1) == this_image.image_num)
+ {
+ if (packed)
+ memcpy (array->base_addr, buffer, ssize);
+ else
+ {
+ char *src = buffer;
+ char *restrict dest = (char *) array->base_addr;
+ index_type stride0 = stride[0];
+
+ for (index_type n = 0; n < dim; n++)
+ count[n] = 0;
+
+ while (dest)
+ {
+ memcpy (dest, src, type_size);
+ src += span;
+ dest += stride0;
+ count[0] ++;
+ for (index_type n = 0; count[n] == extent[n] ;)
+ {
+ /* When we get to the end of a dimension, reset it and increment
+ the next dimension. */
+ count[n] = 0;
+ dest -= stride[n] * extent[n];
+ n++;
+ if (n == dim)
+ {
+ dest = NULL;
+ break;
+ }
+ else
+ {
+ count[n]++;
+ dest += stride[n];
+ }
+ }
+ }
+ }
+ }
+ finish_collective_subroutine (ci);
+}
+void nca_collsub_min_array_s1 (gfc_array_s1 * restrict array, int *result_image,
+ int *stat, char *errmsg, index_type char_len,
+ index_type errmsg_len);
+export_proto (nca_collsub_min_array_s1);
+
+void
+nca_collsub_min_array_s1 (gfc_array_s1 * restrict array, int *result_image,
+ int *stat __attribute__ ((unused)),
+ char *errmsg __attribute__ ((unused)),
+ index_type char_len,
+ index_type errmsg_len __attribute__ ((unused)))
+{
+ index_type count[GFC_MAX_DIMENSIONS];
+ index_type stride[GFC_MAX_DIMENSIONS]; /* stride is byte-based here. */
+ index_type extent[GFC_MAX_DIMENSIONS];
+ char *this_shared_ptr; /* Points to the shared memory allocated to this image. */
+ char *buffer;
+ index_type dim;
+ bool packed;
+ index_type span;
+ index_type ssize, num_elems;
+ int cbit = 0;
+ int imoffset;
+ index_type type_size;
+ collsub_iface *ci;
+
+ ci = &local->ci;
+
+ type_size = char_len * sizeof (GFC_UINTEGER_1);
+ dim = GFC_DESCRIPTOR_RANK (array);
+ num_elems = 1;
+ ssize = type_size;
+ packed = true;
+ span = array->span != 0 ? array->span : type_size;
+ for (index_type n = 0; n < dim; n++)
+ {
+ count[n] = 0;
+ stride[n] = GFC_DESCRIPTOR_STRIDE (array, n) * span;
+ extent[n] = GFC_DESCRIPTOR_EXTENT (array, n);
+
+ /* No-op for an empty array. */
+ if (extent[n] <= 0)
+ return;
+
+ if (num_elems != GFC_DESCRIPTOR_STRIDE (array,n))
+ packed = false;
+
+ num_elems *= extent[n];
+ }
+
+ ssize = num_elems * type_size;
+ buffer = get_collsub_buf (ci, ssize * local->num_images);
+ this_shared_ptr = buffer + this_image.image_num * ssize;
+
+ if (packed)
+ memcpy (this_shared_ptr, array->base_addr, ssize);
+ else
+ {
+ char *src = (char *) array->base_addr;
+ char *restrict dest = this_shared_ptr;
+ index_type stride0 = stride[0];
+
+ while (src)
+ {
+ /* Copy the data. */
+
+ memcpy (dest, src, type_size);
+ dest += type_size;
+ src += stride0;
+ count[0] ++;
+ /* Advance to the next source element. */
+ for (index_type n = 0; count[n] == extent[n] ; )
+ {
+ /* When we get to the end of a dimension, reset it and increment
+ the next dimension. */
+ count[n] = 0;
+ src -= stride[n] * extent[n];
+ n++;
+ if (n == dim)
+ {
+ src = NULL;
+ break;
+ }
+ else
+ {
+ count[n]++;
+ src += stride[n];
+ }
+ }
+ }
+ }
+
+ collsub_sync (ci);
+
+ /* Reduce the array to image zero. Here the general scheme:
+
+ abababababab
+ a_b_a_b_a_b_
+ a___b___a___
+ a_______b___
+ r___________
+ */
+ for (; ((this_image.image_num >> cbit) & 1) == 0
+ && (local->num_images >> cbit) != 0; cbit++)
+ {
+ imoffset = 1 << cbit;
+ if (this_image.image_num + imoffset < local->num_images)
+ {
+ char *other_shared_ptr; /* Points to the shared memory
+ allocated to another image. */
+ GFC_UINTEGER_1 *a;
+ GFC_UINTEGER_1 *b;
+
+ other_shared_ptr = this_shared_ptr + imoffset * ssize;
+ for (index_type i = 0; i < num_elems; i++)
+ {
+ a = (GFC_UINTEGER_1 *) (this_shared_ptr + i * type_size);
+ b = (GFC_UINTEGER_1 *) (other_shared_ptr + i * type_size);
+ if (memcmp (b, a, char_len) < 0)
+ memcpy (a, b, type_size);
+ }
+ }
+ collsub_sync (ci);
+ }
+ for ( ; (local->num_images >> cbit) != 0; cbit++)
+ collsub_sync (ci);
+
+ if (!result_image || (*result_image - 1) == this_image.image_num)
+ {
+ if (packed)
+ memcpy (array->base_addr, buffer, ssize);
+ else
+ {
+ char *src = buffer;
+ char *restrict dest = (char *) array->base_addr;
+ index_type stride0 = stride[0];
+
+ for (index_type n = 0; n < dim; n++)
+ count[n] = 0;
+
+ while (dest)
+ {
+ memcpy (dest, src, type_size);
+ src += span;
+ dest += stride0;
+ count[0] ++;
+ for (index_type n = 0; count[n] == extent[n] ;)
+ {
+ /* When we get to the end of a dimension, reset it and increment
+ the next dimension. */
+ count[n] = 0;
+ dest -= stride[n] * extent[n];
+ n++;
+ if (n == dim)
+ {
+ dest = NULL;
+ break;
+ }
+ else
+ {
+ count[n]++;
+ dest += stride[n];
+ }
+ }
+ }
+ }
+ }
+ finish_collective_subroutine (ci);
+}
+
+#endif
+
diff --git a/libgfortran/generated/nca_minmax_s4.c b/libgfortran/generated/nca_minmax_s4.c
new file mode 100644
index 0000000..b202fda
--- /dev/null
+++ b/libgfortran/generated/nca_minmax_s4.c
@@ -0,0 +1,494 @@
+/* Implementation of collective subroutines minmax.
+ Copyright (C) 2020 Free Software Foundation, Inc.
+ Contributed by Thomas Koenig <tkoenig@gcc.gnu.org>.
+
+This file is part of the GNU Fortran runtime library (libgfortran).
+
+Libgfortran is free software; you can redistribute it and/or
+modify it under the terms of the GNU General Public
+License as published by the Free Software Foundation; either
+version 3 of the License, or (at your option) any later version.
+
+Libgfortran is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+<http://www.gnu.org/licenses/>. */
+
+
+
+#include "libgfortran.h"
+
+#if defined (HAVE_GFC_UINTEGER_4)
+#include <string.h>
+#include "../nca/libcoarraynative.h"
+#include "../nca/collective_subroutine.h"
+#include "../nca/collective_inline.h"
+
+#if 4 == 4
+
+/* Compare wide character types, which are handled internally as
+ unsigned 4-byte integers. */
+static inline int
+memcmp4 (const void *a, const void *b, size_t len)
+{
+ const GFC_UINTEGER_4 *pa = a;
+ const GFC_UINTEGER_4 *pb = b;
+ while (len-- > 0)
+ {
+ if (*pa != *pb)
+ return *pa < *pb ? -1 : 1;
+ pa ++;
+ pb ++;
+ }
+ return 0;
+}
+
+#endif
+void nca_collsub_max_scalar_s4 (GFC_UINTEGER_4 *obj, int *result_image,
+ int *stat, char *errmsg, index_type char_len, index_type errmsg_len);
+export_proto(nca_collsub_max_scalar_s4);
+
+void
+nca_collsub_max_scalar_s4 (GFC_UINTEGER_4 *obj, int *result_image,
+ int *stat __attribute__ ((unused)),
+ char *errmsg __attribute__ ((unused)),
+ index_type char_len,
+ index_type errmsg_len __attribute__ ((unused)))
+{
+ int cbit = 0;
+ int imoffset;
+ GFC_UINTEGER_4 *a, *b;
+ GFC_UINTEGER_4 *buffer, *this_image_buf;
+ collsub_iface *ci;
+ index_type type_size;
+
+ ci = &local->ci;
+
+ type_size = char_len * sizeof (GFC_UINTEGER_4);
+ buffer = get_collsub_buf (ci, type_size * local->num_images);
+ this_image_buf = buffer + this_image.image_num * char_len;
+ memcpy (this_image_buf, obj, type_size);
+
+ collsub_sync (ci);
+ for (; ((this_image.image_num >> cbit) & 1) == 0
+ && (local->num_images >> cbit) != 0; cbit++)
+ {
+ imoffset = 1 << cbit;
+ if (this_image.image_num + imoffset < local->num_images)
+ {
+ a = this_image_buf;
+ b = this_image_buf + imoffset * char_len;
+ if (memcmp4 (b, a, char_len) > 0)
+ memcpy (a, b, type_size);
+ }
+ collsub_sync (ci);
+ }
+ for ( ; (local->num_images >> cbit) != 0; cbit++)
+ collsub_sync (ci);
+
+ if (!result_image || (*result_image - 1) == this_image.image_num)
+ memcpy (obj, buffer, type_size);
+
+ finish_collective_subroutine (ci);
+
+}
+
+void nca_collsub_min_scalar_s4 (GFC_UINTEGER_4 *obj, int *result_image,
+ int *stat, char *errmsg, index_type char_len, index_type errmsg_len);
+export_proto(nca_collsub_min_scalar_s4);
+
+void
+nca_collsub_min_scalar_s4 (GFC_UINTEGER_4 *obj, int *result_image,
+ int *stat __attribute__ ((unused)),
+ char *errmsg __attribute__ ((unused)),
+ index_type char_len,
+ index_type errmsg_len __attribute__ ((unused)))
+{
+ int cbit = 0;
+ int imoffset;
+ GFC_UINTEGER_4 *a, *b;
+ GFC_UINTEGER_4 *buffer, *this_image_buf;
+ collsub_iface *ci;
+ index_type type_size;
+
+ ci = &local->ci;
+
+ type_size = char_len * sizeof (GFC_UINTEGER_4);
+ buffer = get_collsub_buf (ci, type_size * local->num_images);
+ this_image_buf = buffer + this_image.image_num * char_len;
+ memcpy (this_image_buf, obj, type_size);
+
+ collsub_sync (ci);
+ for (; ((this_image.image_num >> cbit) & 1) == 0
+ && (local->num_images >> cbit) != 0; cbit++)
+ {
+ imoffset = 1 << cbit;
+ if (this_image.image_num + imoffset < local->num_images)
+ {
+ a = this_image_buf;
+ b = this_image_buf + imoffset * char_len;
+ if (memcmp4 (b, a, char_len) < 0)
+ memcpy (a, b, type_size);
+ }
+ collsub_sync (ci);
+ }
+ for ( ; (local->num_images >> cbit) != 0; cbit++)
+ collsub_sync (ci);
+
+ if (!result_image || (*result_image - 1) == this_image.image_num)
+ memcpy (obj, buffer, type_size);
+
+ finish_collective_subroutine (ci);
+
+}
+
+void nca_collsub_max_array_s4 (gfc_array_s4 * restrict array, int *result_image,
+ int *stat, char *errmsg, index_type char_len,
+ index_type errmsg_len);
+export_proto (nca_collsub_max_array_s4);
+
+void
+nca_collsub_max_array_s4 (gfc_array_s4 * restrict array, int *result_image,
+ int *stat __attribute__ ((unused)),
+ char *errmsg __attribute__ ((unused)),
+ index_type char_len,
+ index_type errmsg_len __attribute__ ((unused)))
+{
+ index_type count[GFC_MAX_DIMENSIONS];
+ index_type stride[GFC_MAX_DIMENSIONS]; /* stride is byte-based here. */
+ index_type extent[GFC_MAX_DIMENSIONS];
+ char *this_shared_ptr; /* Points to the shared memory allocated to this image. */
+ char *buffer;
+ index_type dim;
+ bool packed;
+ index_type span;
+ index_type ssize, num_elems;
+ int cbit = 0;
+ int imoffset;
+ index_type type_size;
+ collsub_iface *ci;
+
+ ci = &local->ci;
+
+ type_size = char_len * sizeof (GFC_UINTEGER_4);
+ dim = GFC_DESCRIPTOR_RANK (array);
+ num_elems = 1;
+ ssize = type_size;
+ packed = true;
+ span = array->span != 0 ? array->span : type_size;
+ for (index_type n = 0; n < dim; n++)
+ {
+ count[n] = 0;
+ stride[n] = GFC_DESCRIPTOR_STRIDE (array, n) * span;
+ extent[n] = GFC_DESCRIPTOR_EXTENT (array, n);
+
+ /* No-op for an empty array. */
+ if (extent[n] <= 0)
+ return;
+
+ if (num_elems != GFC_DESCRIPTOR_STRIDE (array,n))
+ packed = false;
+
+ num_elems *= extent[n];
+ }
+
+ ssize = num_elems * type_size;
+ buffer = get_collsub_buf (ci, ssize * local->num_images);
+ this_shared_ptr = buffer + this_image.image_num * ssize;
+
+ if (packed)
+ memcpy (this_shared_ptr, array->base_addr, ssize);
+ else
+ {
+ char *src = (char *) array->base_addr;
+ char *restrict dest = this_shared_ptr;
+ index_type stride0 = stride[0];
+
+ while (src)
+ {
+ /* Copy the data. */
+
+ memcpy (dest, src, type_size);
+ dest += type_size;
+ src += stride0;
+ count[0] ++;
+ /* Advance to the next source element. */
+ for (index_type n = 0; count[n] == extent[n] ; )
+ {
+ /* When we get to the end of a dimension, reset it and increment
+ the next dimension. */
+ count[n] = 0;
+ src -= stride[n] * extent[n];
+ n++;
+ if (n == dim)
+ {
+ src = NULL;
+ break;
+ }
+ else
+ {
+ count[n]++;
+ src += stride[n];
+ }
+ }
+ }
+ }
+
+ collsub_sync (ci);
+
+ /* Reduce the array to image zero. Here the general scheme:
+
+ abababababab
+ a_b_a_b_a_b_
+ a___b___a___
+ a_______b___
+ r___________
+ */
+ for (; ((this_image.image_num >> cbit) & 1) == 0
+ && (local->num_images >> cbit) != 0; cbit++)
+ {
+ imoffset = 1 << cbit;
+ if (this_image.image_num + imoffset < local->num_images)
+ {
+ char *other_shared_ptr; /* Points to the shared memory
+ allocated to another image. */
+ GFC_UINTEGER_4 *a;
+ GFC_UINTEGER_4 *b;
+
+ other_shared_ptr = this_shared_ptr + imoffset * ssize;
+ for (index_type i = 0; i < num_elems; i++)
+ {
+ a = (GFC_UINTEGER_4 *) (this_shared_ptr + i * type_size);
+ b = (GFC_UINTEGER_4 *) (other_shared_ptr + i * type_size);
+ if (memcmp4 (b, a, char_len) > 0)
+ memcpy (a, b, type_size);
+ }
+ }
+ collsub_sync (ci);
+ }
+ for ( ; (local->num_images >> cbit) != 0; cbit++)
+ collsub_sync (ci);
+
+ if (!result_image || (*result_image - 1) == this_image.image_num)
+ {
+ if (packed)
+ memcpy (array->base_addr, buffer, ssize);
+ else
+ {
+ char *src = buffer;
+ char *restrict dest = (char *) array->base_addr;
+ index_type stride0 = stride[0];
+
+ for (index_type n = 0; n < dim; n++)
+ count[n] = 0;
+
+ while (dest)
+ {
+ memcpy (dest, src, type_size);
+ src += span;
+ dest += stride0;
+ count[0] ++;
+ for (index_type n = 0; count[n] == extent[n] ;)
+ {
+ /* When we get to the end of a dimension, reset it and increment
+ the next dimension. */
+ count[n] = 0;
+ dest -= stride[n] * extent[n];
+ n++;
+ if (n == dim)
+ {
+ dest = NULL;
+ break;
+ }
+ else
+ {
+ count[n]++;
+ dest += stride[n];
+ }
+ }
+ }
+ }
+ }
+ finish_collective_subroutine (ci);
+}
+void nca_collsub_min_array_s4 (gfc_array_s4 * restrict array, int *result_image,
+ int *stat, char *errmsg, index_type char_len,
+ index_type errmsg_len);
+export_proto (nca_collsub_min_array_s4);
+
+void
+nca_collsub_min_array_s4 (gfc_array_s4 * restrict array, int *result_image,
+ int *stat __attribute__ ((unused)),
+ char *errmsg __attribute__ ((unused)),
+ index_type char_len,
+ index_type errmsg_len __attribute__ ((unused)))
+{
+ index_type count[GFC_MAX_DIMENSIONS];
+ index_type stride[GFC_MAX_DIMENSIONS]; /* stride is byte-based here. */
+ index_type extent[GFC_MAX_DIMENSIONS];
+ char *this_shared_ptr; /* Points to the shared memory allocated to this image. */
+ char *buffer;
+ index_type dim;
+ bool packed;
+ index_type span;
+ index_type ssize, num_elems;
+ int cbit = 0;
+ int imoffset;
+ index_type type_size;
+ collsub_iface *ci;
+
+ ci = &local->ci;
+
+ type_size = char_len * sizeof (GFC_UINTEGER_4);
+ dim = GFC_DESCRIPTOR_RANK (array);
+ num_elems = 1;
+ ssize = type_size;
+ packed = true;
+ span = array->span != 0 ? array->span : type_size;
+ for (index_type n = 0; n < dim; n++)
+ {
+ count[n] = 0;
+ stride[n] = GFC_DESCRIPTOR_STRIDE (array, n) * span;
+ extent[n] = GFC_DESCRIPTOR_EXTENT (array, n);
+
+ /* No-op for an empty array. */
+ if (extent[n] <= 0)
+ return;
+
+ if (num_elems != GFC_DESCRIPTOR_STRIDE (array,n))
+ packed = false;
+
+ num_elems *= extent[n];
+ }
+
+ ssize = num_elems * type_size;
+ buffer = get_collsub_buf (ci, ssize * local->num_images);
+ this_shared_ptr = buffer + this_image.image_num * ssize;
+
+ if (packed)
+ memcpy (this_shared_ptr, array->base_addr, ssize);
+ else
+ {
+ char *src = (char *) array->base_addr;
+ char *restrict dest = this_shared_ptr;
+ index_type stride0 = stride[0];
+
+ while (src)
+ {
+ /* Copy the data. */
+
+ memcpy (dest, src, type_size);
+ dest += type_size;
+ src += stride0;
+ count[0] ++;
+ /* Advance to the next source element. */
+ for (index_type n = 0; count[n] == extent[n] ; )
+ {
+ /* When we get to the end of a dimension, reset it and increment
+ the next dimension. */
+ count[n] = 0;
+ src -= stride[n] * extent[n];
+ n++;
+ if (n == dim)
+ {
+ src = NULL;
+ break;
+ }
+ else
+ {
+ count[n]++;
+ src += stride[n];
+ }
+ }
+ }
+ }
+
+ collsub_sync (ci);
+
+ /* Reduce the array to image zero. Here the general scheme:
+
+ abababababab
+ a_b_a_b_a_b_
+ a___b___a___
+ a_______b___
+ r___________
+ */
+ for (; ((this_image.image_num >> cbit) & 1) == 0
+ && (local->num_images >> cbit) != 0; cbit++)
+ {
+ imoffset = 1 << cbit;
+ if (this_image.image_num + imoffset < local->num_images)
+ {
+ char *other_shared_ptr; /* Points to the shared memory
+ allocated to another image. */
+ GFC_UINTEGER_4 *a;
+ GFC_UINTEGER_4 *b;
+
+ other_shared_ptr = this_shared_ptr + imoffset * ssize;
+ for (index_type i = 0; i < num_elems; i++)
+ {
+ a = (GFC_UINTEGER_4 *) (this_shared_ptr + i * type_size);
+ b = (GFC_UINTEGER_4 *) (other_shared_ptr + i * type_size);
+ if (memcmp4 (b, a, char_len) < 0)
+ memcpy (a, b, type_size);
+ }
+ }
+ collsub_sync (ci);
+ }
+ for ( ; (local->num_images >> cbit) != 0; cbit++)
+ collsub_sync (ci);
+
+ if (!result_image || (*result_image - 1) == this_image.image_num)
+ {
+ if (packed)
+ memcpy (array->base_addr, buffer, ssize);
+ else
+ {
+ char *src = buffer;
+ char *restrict dest = (char *) array->base_addr;
+ index_type stride0 = stride[0];
+
+ for (index_type n = 0; n < dim; n++)
+ count[n] = 0;
+
+ while (dest)
+ {
+ memcpy (dest, src, type_size);
+ src += span;
+ dest += stride0;
+ count[0] ++;
+ for (index_type n = 0; count[n] == extent[n] ;)
+ {
+ /* When we get to the end of a dimension, reset it and increment
+ the next dimension. */
+ count[n] = 0;
+ dest -= stride[n] * extent[n];
+ n++;
+ if (n == dim)
+ {
+ dest = NULL;
+ break;
+ }
+ else
+ {
+ count[n]++;
+ dest += stride[n];
+ }
+ }
+ }
+ }
+ }
+ finish_collective_subroutine (ci);
+}
+
+#endif
+
diff --git a/libgfortran/libgfortran.h b/libgfortran/libgfortran.h
index 8c539e0..a6b0d5a 100644
--- a/libgfortran/libgfortran.h
+++ b/libgfortran/libgfortran.h
@@ -403,6 +403,7 @@ struct {\
}
typedef GFC_FULL_ARRAY_DESCRIPTOR (GFC_MAX_DIMENSIONS, GFC_INTEGER_4) gfc_full_array_i4;
+typedef GFC_FULL_ARRAY_DESCRIPTOR (GFC_MAX_DIMENSIONS, char) gfc_full_array_char;
#define GFC_DESCRIPTOR_RANK(desc) ((desc)->dtype.rank)
#define GFC_DESCRIPTOR_TYPE(desc) ((desc)->dtype.type)
diff --git a/libgfortran/m4/nca-minmax-s.m4 b/libgfortran/m4/nca-minmax-s.m4
new file mode 100644
index 0000000..2d8891f
--- /dev/null
+++ b/libgfortran/m4/nca-minmax-s.m4
@@ -0,0 +1,289 @@
+dnl Support macro file for intrinsic functions.
+dnl Contains the generic sections of gfortran functions.
+dnl This file is part of the GNU Fortran Runtime Library (libgfortran)
+dnl Distributed under the GNU GPL with exception. See COPYING for details.
+dnl
+`/* Implementation of collective subroutines minmax.
+ Copyright (C) 2020 Free Software Foundation, Inc.
+ Contributed by Thomas Koenig <tkoenig@gcc.gnu.org>.
+
+This file is part of the GNU Fortran runtime library (libgfortran).
+
+Libgfortran is free software; you can redistribute it and/or
+modify it under the terms of the GNU General Public
+License as published by the Free Software Foundation; either
+version 3 of the License, or (at your option) any later version.
+
+Libgfortran is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+<http://www.gnu.org/licenses/>. */'
+
+include(iparm.m4)dnl
+define(`compare_fcn',`ifelse(rtype_kind,1,memcmp,memcmp4)')dnl
+define(SCALAR_FUNCTION,`void nca_collsub_'$1`_scalar_'rtype_code` ('rtype_name` *obj, int *result_image,
+ int *stat, char *errmsg, index_type char_len, index_type errmsg_len);
+export_proto(nca_collsub_'$1`_scalar_'rtype_code`);
+
+void
+nca_collsub_'$1`_scalar_'rtype_code` ('rtype_name` *obj, int *result_image,
+ int *stat __attribute__ ((unused)),
+ char *errmsg __attribute__ ((unused)),
+ index_type char_len,
+ index_type errmsg_len __attribute__ ((unused)))
+{
+ int cbit = 0;
+ int imoffset;
+ 'rtype_name` *a, *b;
+ 'rtype_name` *buffer, *this_image_buf;
+ collsub_iface *ci;
+ index_type type_size;
+
+ ci = &local->ci;
+
+ type_size = char_len * sizeof ('rtype_name`);
+ buffer = get_collsub_buf (ci, type_size * local->num_images);
+ this_image_buf = buffer + this_image.image_num * char_len;
+ memcpy (this_image_buf, obj, type_size);
+
+ collsub_sync (ci);
+ for (; ((this_image.image_num >> cbit) & 1) == 0
+ && (local->num_images >> cbit) != 0; cbit++)
+ {
+ imoffset = 1 << cbit;
+ if (this_image.image_num + imoffset < local->num_images)
+ {
+ a = this_image_buf;
+ b = this_image_buf + imoffset * char_len;
+ if ('compare_fcn` (b, a, char_len) '$2` 0)
+ memcpy (a, b, type_size);
+ }
+ collsub_sync (ci);
+ }
+ for ( ; (local->num_images >> cbit) != 0; cbit++)
+ collsub_sync (ci);
+
+ if (!result_image || (*result_image - 1) == this_image.image_num)
+ memcpy (obj, buffer, type_size);
+
+ finish_collective_subroutine (ci);
+
+}
+
+')dnl
+define(ARRAY_FUNCTION,dnl
+`void nca_collsub_'$1`_array_'rtype_code` ('rtype` * restrict array, int *result_image,
+ int *stat, char *errmsg, index_type char_len,
+ index_type errmsg_len);
+export_proto (nca_collsub_'$1`_array_'rtype_code`);
+
+void
+nca_collsub_'$1`_array_'rtype_code` ('rtype` * restrict array, int *result_image,
+ int *stat __attribute__ ((unused)),
+ char *errmsg __attribute__ ((unused)),
+ index_type char_len,
+ index_type errmsg_len __attribute__ ((unused)))
+{
+ index_type count[GFC_MAX_DIMENSIONS];
+ index_type stride[GFC_MAX_DIMENSIONS]; /* stride is byte-based here. */
+ index_type extent[GFC_MAX_DIMENSIONS];
+ char *this_shared_ptr; /* Points to the shared memory allocated to this image. */
+ char *buffer;
+ index_type dim;
+ bool packed;
+ index_type span;
+ index_type ssize, num_elems;
+ int cbit = 0;
+ int imoffset;
+ index_type type_size;
+ collsub_iface *ci;
+
+ ci = &local->ci;
+
+ type_size = char_len * sizeof ('rtype_name`);
+ dim = GFC_DESCRIPTOR_RANK (array);
+ num_elems = 1;
+ ssize = type_size;
+ packed = true;
+ span = array->span != 0 ? array->span : type_size;
+ for (index_type n = 0; n < dim; n++)
+ {
+ count[n] = 0;
+ stride[n] = GFC_DESCRIPTOR_STRIDE (array, n) * span;
+ extent[n] = GFC_DESCRIPTOR_EXTENT (array, n);
+
+ /* No-op for an empty array. */
+ if (extent[n] <= 0)
+ return;
+
+ if (num_elems != GFC_DESCRIPTOR_STRIDE (array,n))
+ packed = false;
+
+ num_elems *= extent[n];
+ }
+
+ ssize = num_elems * type_size;
+ buffer = get_collsub_buf (ci, ssize * local->num_images);
+ this_shared_ptr = buffer + this_image.image_num * ssize;
+
+ if (packed)
+ memcpy (this_shared_ptr, array->base_addr, ssize);
+ else
+ {
+ char *src = (char *) array->base_addr;
+ char *restrict dest = this_shared_ptr;
+ index_type stride0 = stride[0];
+
+ while (src)
+ {
+ /* Copy the data. */
+
+ memcpy (dest, src, type_size);
+ dest += type_size;
+ src += stride0;
+ count[0] ++;
+ /* Advance to the next source element. */
+ for (index_type n = 0; count[n] == extent[n] ; )
+ {
+ /* When we get to the end of a dimension, reset it and increment
+ the next dimension. */
+ count[n] = 0;
+ src -= stride[n] * extent[n];
+ n++;
+ if (n == dim)
+ {
+ src = NULL;
+ break;
+ }
+ else
+ {
+ count[n]++;
+ src += stride[n];
+ }
+ }
+ }
+ }
+
+ collsub_sync (ci);
+
+ /* Reduce the array to image zero. Here the general scheme:
+
+ abababababab
+ a_b_a_b_a_b_
+ a___b___a___
+ a_______b___
+ r___________
+ */
+ for (; ((this_image.image_num >> cbit) & 1) == 0
+ && (local->num_images >> cbit) != 0; cbit++)
+ {
+ imoffset = 1 << cbit;
+ if (this_image.image_num + imoffset < local->num_images)
+ {
+ char *other_shared_ptr; /* Points to the shared memory
+ allocated to another image. */
+ 'rtype_name` *a;
+ 'rtype_name` *b;
+
+ other_shared_ptr = this_shared_ptr + imoffset * ssize;
+ for (index_type i = 0; i < num_elems; i++)
+ {
+ a = ('rtype_name` *) (this_shared_ptr + i * type_size);
+ b = ('rtype_name` *) (other_shared_ptr + i * type_size);
+ if ('compare_fcn` (b, a, char_len) '$2` 0)
+ memcpy (a, b, type_size);
+ }
+ }
+ collsub_sync (ci);
+ }
+ for ( ; (local->num_images >> cbit) != 0; cbit++)
+ collsub_sync (ci);
+
+ if (!result_image || (*result_image - 1) == this_image.image_num)
+ {
+ if (packed)
+ memcpy (array->base_addr, buffer, ssize);
+ else
+ {
+ char *src = buffer;
+ char *restrict dest = (char *) array->base_addr;
+ index_type stride0 = stride[0];
+
+ for (index_type n = 0; n < dim; n++)
+ count[n] = 0;
+
+ while (dest)
+ {
+ memcpy (dest, src, type_size);
+ src += span;
+ dest += stride0;
+ count[0] ++;
+ for (index_type n = 0; count[n] == extent[n] ;)
+ {
+ /* When we get to the end of a dimension, reset it and increment
+ the next dimension. */
+ count[n] = 0;
+ dest -= stride[n] * extent[n];
+ n++;
+ if (n == dim)
+ {
+ dest = NULL;
+ break;
+ }
+ else
+ {
+ count[n]++;
+ dest += stride[n];
+ }
+ }
+ }
+ }
+ }
+ finish_collective_subroutine (ci);
+}
+')
+`
+#include "libgfortran.h"
+
+#if defined (HAVE_'rtype_name`)
+#include <string.h>
+#include "../nca/libcoarraynative.h"
+#include "../nca/collective_subroutine.h"
+#include "../nca/collective_inline.h"
+
+#if 'rtype_kind` == 4
+
+/* Compare wide character types, which are handled internally as
+ unsigned 4-byte integers. */
+static inline int
+memcmp4 (const void *a, const void *b, size_t len)
+{
+ const GFC_UINTEGER_4 *pa = a;
+ const GFC_UINTEGER_4 *pb = b;
+ while (len-- > 0)
+ {
+ if (*pa != *pb)
+ return *pa < *pb ? -1 : 1;
+ pa ++;
+ pb ++;
+ }
+ return 0;
+}
+
+#endif
+'SCALAR_FUNCTION(`max',`>')dnl
+SCALAR_FUNCTION(`min',`<')dnl
+ARRAY_FUNCTION(`max',`>')dnl
+ARRAY_FUNCTION(`min',`<')dnl
+`
+#endif
+'
diff --git a/libgfortran/m4/nca_minmax.m4 b/libgfortran/m4/nca_minmax.m4
new file mode 100644
index 0000000..76070c1
--- /dev/null
+++ b/libgfortran/m4/nca_minmax.m4
@@ -0,0 +1,259 @@
+dnl Support macro file for intrinsic functions.
+dnl Contains the generic sections of gfortran functions.
+dnl This file is part of the GNU Fortran Runtime Library (libgfortran)
+dnl Distributed under the GNU GPL with exception. See COPYING for details.
+dnl
+`/* Implementation of collective subroutines minmax.
+ Copyright (C) 2020 Free Software Foundation, Inc.
+ Contributed by Thomas Koenig <tkoenig@gcc.gnu.org>.
+
+This file is part of the GNU Fortran runtime library (libgfortran).
+
+Libgfortran is free software; you can redistribute it and/or
+modify it under the terms of the GNU General Public
+License as published by the Free Software Foundation; either
+version 3 of the License, or (at your option) any later version.
+
+Libgfortran is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+<http://www.gnu.org/licenses/>. */'
+
+include(iparm.m4)dnl
+define(SCALAR_FUNCTION,`void nca_collsub_'$1`_scalar_'rtype_code` ('rtype_name` *obj, int *result_image,
+ int *stat, char *errmsg, index_type errmsg_len);
+export_proto(nca_collsub_'$1`_scalar_'rtype_code`);
+
+void
+nca_collsub_'$1`_scalar_'rtype_code` ('rtype_name` *obj, int *result_image,
+ int *stat __attribute__ ((unused)),
+ char *errmsg __attribute__ ((unused)),
+ index_type errmsg_len __attribute__ ((unused)))
+{
+ int cbit = 0;
+ int imoffset;
+ 'rtype_name` *a, *b;
+ 'rtype_name` *buffer, *this_image_buf;
+ collsub_iface *ci;
+
+ ci = &local->ci;
+
+ buffer = get_collsub_buf (ci, sizeof('rtype_name`) * local->num_images);
+ this_image_buf = buffer + this_image.image_num;
+ *this_image_buf = *obj;
+
+ collsub_sync (ci);
+ for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++)
+ {
+ imoffset = 1 << cbit;
+ if (this_image.image_num + imoffset < local->num_images)
+ {
+ a = this_image_buf;
+ b = this_image_buf + imoffset;
+ '$2`
+ }
+ collsub_sync (ci);
+ }
+ for ( ; (local->num_images >> cbit) != 0; cbit++)
+ collsub_sync (ci);
+
+ if (!result_image || (*result_image - 1) == this_image.image_num)
+ *obj = *buffer;
+
+ finish_collective_subroutine (ci);
+
+}
+
+')dnl
+define(ARRAY_FUNCTION,dnl
+`void nca_collsub_'$1`_array_'rtype_code` ('rtype` * restrict array, int *result_image,
+ int *stat, char *errmsg, index_type errmsg_len);
+export_proto (nca_collsub_'$1`_array_'rtype_code`);
+
+void
+nca_collsub_'$1`_array_'rtype_code` ('rtype` * restrict array, int *result_image,
+ int *stat __attribute__ ((unused)),
+ char *errmsg __attribute__ ((unused)),
+ index_type errmsg_len __attribute__ ((unused)))
+{
+ index_type count[GFC_MAX_DIMENSIONS];
+ index_type stride[GFC_MAX_DIMENSIONS];
+ index_type extent[GFC_MAX_DIMENSIONS];
+ 'rtype_name` *this_shared_ptr; /* Points to the shared memory allocated to this image. */
+ 'rtype_name` *buffer;
+ index_type dim;
+ bool packed;
+ index_type span;
+ index_type ssize, num_elems;
+ int cbit = 0;
+ int imoffset;
+ collsub_iface *ci;
+
+ ci = &local->ci;
+
+ dim = GFC_DESCRIPTOR_RANK (array);
+ ssize = sizeof ('rtype_name`);
+ packed = true;
+ span = array->span != 0 ? array->span : (index_type) sizeof ('rtype_name`);
+ for (index_type n = 0; n < dim; n++)
+ {
+ count[n] = 0;
+ stride[n] = GFC_DESCRIPTOR_STRIDE (array, n) * span;
+ extent[n] = GFC_DESCRIPTOR_EXTENT (array, n);
+
+ /* No-op for an empty array. */
+ if (extent[n] <= 0)
+ return;
+
+ if (ssize != stride[n])
+ packed = false;
+
+ ssize *= extent[n];
+ }
+
+ num_elems = ssize / sizeof ('rtype_name`);
+
+ buffer = get_collsub_buf (ci, ssize * local->num_images);
+ this_shared_ptr = buffer + this_image.image_num * num_elems;
+
+ if (packed)
+ memcpy (this_shared_ptr, array->base_addr, ssize);
+ else
+ {
+ char *src = (char *) array->base_addr;
+ 'rtype_name` *restrict dest = this_shared_ptr;
+ index_type stride0 = stride[0];
+
+ while (src)
+ {
+ /* Copy the data. */
+ *(dest++) = *(('rtype_name` *) src);
+ src += stride0;
+ count[0] ++;
+ /* Advance to the next source element. */
+ for (index_type n = 0; count[n] == extent[n] ; )
+ {
+ /* When we get to the end of a dimension, reset it and increment
+ the next dimension. */
+ count[n] = 0;
+ src -= stride[n] * extent[n];
+ n++;
+ if (n == dim)
+ {
+ src = NULL;
+ break;
+ }
+ else
+ {
+ count[n]++;
+ src += stride[n];
+ }
+ }
+ }
+ }
+
+ collsub_sync (ci);
+
+ /* Reduce the array to image zero. Here the general scheme:
+
+ abababababab
+ a_b_a_b_a_b_
+ a___b___a___
+ a_______b___
+ r___________
+ */
+ for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++)
+ {
+ imoffset = 1 << cbit;
+ if (this_image.image_num + imoffset < local->num_images)
+ {
+ 'rtype_name` * other_shared_ptr; /* Points to the shared memory
+ allocated to another image. */
+ 'rtype_name` *a;
+ 'rtype_name` *b;
+
+ other_shared_ptr = this_shared_ptr + num_elems * imoffset;
+ for (index_type i = 0; i < num_elems; i++)
+ {
+ a = this_shared_ptr + i;
+ b = other_shared_ptr + i;
+ '$2`
+ }
+ }
+ collsub_sync (ci);
+ }
+ for ( ; (local->num_images >> cbit) != 0; cbit++)
+ collsub_sync (ci);
+
+ if (!result_image || (*result_image - 1) == this_image.image_num)
+ {
+ if (packed)
+ memcpy (array->base_addr, buffer, ssize);
+ else
+ {
+ 'rtype_name` *src = buffer;
+ char * restrict dest = (char *) array->base_addr;
+ index_type stride0 = stride[0];
+
+ for (index_type n = 0; n < dim; n++)
+ count[n] = 0;
+
+ while (dest)
+ {
+ *(('rtype_name` * ) dest) = *src++;
+ dest += stride0;
+ count[0] ++;
+ for (index_type n = 0; count[n] == extent[n] ;)
+ {
+ /* When we get to the end of a dimension, reset it and increment
+ the next dimension. */
+ count[n] = 0;
+ dest -= stride[n] * extent[n];
+ n++;
+ if (n == dim)
+ {
+ dest = NULL;
+ break;
+ }
+ else
+ {
+ count[n]++;
+ dest += stride[n];
+ }
+ }
+ }
+ }
+ }
+ finish_collective_subroutine (ci);
+}
+')
+`#include "libgfortran.h"
+
+#if defined (HAVE_'rtype_name`)'
+#include <string.h>
+#include "../nca/libcoarraynative.h"
+#include "../nca/collective_subroutine.h"
+#include "../nca/collective_inline.h"
+
+SCALAR_FUNCTION(`max',`if (*b > *a)
+ *a = *b;')dnl
+SCALAR_FUNCTION(`min',`if (*b < *a)
+ *a = *b;')dnl
+SCALAR_FUNCTION(`sum',`*a += *b;')dnl
+ARRAY_FUNCTION(`max',`if (*b > *a)
+ *a = *b;')dnl
+ARRAY_FUNCTION(`min',`if (*b < *a)
+ *a = *b;')dnl
+ARRAY_FUNCTION(`sum',`*a += *b;')dnl
+`
+#endif
+'
diff --git a/libgfortran/nca/.tags b/libgfortran/nca/.tags
new file mode 100644
index 0000000..07d260d
--- /dev/null
+++ b/libgfortran/nca/.tags
@@ -0,0 +1,275 @@
+!_TAG_FILE_FORMAT 2 /extended format; --format=1 will not append ;" to lines/
+!_TAG_FILE_SORTED 1 /0=unsorted, 1=sorted, 2=foldcase/
+!_TAG_PROGRAM_AUTHOR Darren Hiebert /dhiebert@users.sourceforge.net/
+!_TAG_PROGRAM_NAME Exuberant Ctags //
+!_TAG_PROGRAM_URL http://ctags.sourceforge.net /official site/
+!_TAG_PROGRAM_VERSION 5.9~svn20110310 //
+ALIGN_TO serialize.c 47;" d file:
+ALLOC_H alloc.h 26;" d
+COARRAY_LOCK_HDR lock.h 27;" d
+COARRAY_NATIVE_HDR libcoarraynative.h 30;" d
+COLLECTIVE_SUBROUTINE_HDR collective_subroutine.h 3;" d
+CRITICAL_LOOKAHEAD hashmap.c 30;" d file:
+DEBUG_PRINTF libcoarraynative.h 45;" d
+DEBUG_PRINTF libcoarraynative.h 47;" d
+FILL_VALUE serialize.c 57;" d file:
+GFC_NCA_EVENT_COARRAY wrapper.c /^ GFC_NCA_EVENT_COARRAY,$/;" e enum:gfc_coarray_allocation_type file:
+GFC_NCA_LOCK_COARRAY wrapper.c /^ GFC_NCA_LOCK_COARRAY,$/;" e enum:gfc_coarray_allocation_type file:
+GFC_NCA_NORMAL_COARRAY wrapper.c /^ GFC_NCA_NORMAL_COARRAY = 3,$/;" e enum:gfc_coarray_allocation_type file:
+GFORTRAN_ENV_NUM_IMAGES coarraynative.c 37;" d file:
+HASHMAP_H hashmap.h 69;" d
+IMAGE_FAILED libcoarraynative.h /^ IMAGE_FAILED$/;" e enum:__anon7
+IMAGE_FAILED master.c /^ IMAGE_FAILED$/;" e enum:__anon5 file:
+IMAGE_OK libcoarraynative.h /^ IMAGE_OK,$/;" e enum:__anon7
+IMAGE_OK master.c /^ IMAGE_OK,$/;" e enum:__anon5 file:
+IMAGE_UNKNOWN libcoarraynative.h /^ IMAGE_UNKNOWN = 0,$/;" e enum:__anon7
+IMAGE_UNKNOWN master.c /^ IMAGE_UNKNOWN = 0,$/;" e enum:__anon5 file:
+INDENT hashmap.c 403;" d file:
+INDENT hashmap.c 414;" d file:
+INDENT hashmap.c 415;" d file:
+INDENT hashmap.c 429;" d file:
+INDENT hashmap.c 430;" d file:
+INITIAL_BITNUM hashmap.c 28;" d file:
+INITIAL_SIZE hashmap.c 29;" d file:
+IPSYNC_HDR sync.h 26;" d
+ITER malloc_test.c 13;" d file:
+MAX_ALIGN allocator.c 50;" d file:
+MAX_NUM malloc_test.c 10;" d file:
+MEMOBJ_NAME util.c 11;" d file:
+MIN_NUM malloc_test.c 11;" d file:
+NUM_BITS malloc_test.c 8;" d file:
+NUM_SIZES malloc_test.c 9;" d file:
+PE hashmap.c 402;" d file:
+PTR_BITS util.h 32;" d
+SHARED_ALLOCATOR_HDR allocator.h 2;" d
+SHARED_MEMORY_H shared_memory.h 77;" d
+SHARED_MEMORY_RAW_ALLOC shared_memory.h 50;" d
+SHARED_MEMORY_RAW_ALLOC_PTR shared_memory.h 53;" d
+SHMPTR_AS shared_memory.h 46;" d
+SHMPTR_DEREF shared_memory.h 44;" d
+SHMPTR_EQUALS shared_memory.h 48;" d
+SHMPTR_IS_NULL shared_memory.h 42;" d
+SHMPTR_NULL shared_memory.h 41;" d
+SHMPTR_SET shared_memory.h 47;" d
+SZ malloc_test.c 12;" d file:
+UTIL_HDR util.h 26;" d
+a hashmap.h /^ allocator *a;$/;" m struct:hashmap
+a sync.h /^ allocator *a;$/;" m struct:__anon3
+ai libcoarraynative.h /^ alloc_iface ai;$/;" m struct:__anon11
+alignto util.c /^alignto(size_t size, size_t align) {$/;" f
+alloc alloc.h /^ allocator alloc;$/;" m struct:alloc_iface
+alloc_iface alloc.h /^typedef struct alloc_iface$/;" s
+alloc_iface alloc.h /^} alloc_iface;$/;" t typeref:struct:alloc_iface
+alloc_iface_init alloc.c /^alloc_iface_init (alloc_iface *iface, shared_memory *mem)$/;" f
+alloc_iface_init alloc.h /^internal_proto (alloc_iface_init);$/;" v
+alloc_iface_shared alloc.h /^typedef struct alloc_iface_shared$/;" s
+alloc_iface_shared alloc.h /^} alloc_iface_shared;$/;" t typeref:struct:alloc_iface_shared
+allocator allocator.h /^} allocator;$/;" t typeref:struct:__anon17
+allocator_init allocator.c /^allocator_init (allocator *a, allocator_shared *s, shared_memory *sm)$/;" f
+allocator_s alloc.h /^ allocator_shared allocator_s;$/;" m struct:alloc_iface_shared
+allocator_shared allocator.h /^} allocator_shared;$/;" t typeref:struct:__anon16
+allocs shared_memory.c /^ } allocs[];$/;" m struct:shared_memory_act typeref:struct:shared_memory_act::local_alloc file:
+arr lock.h /^ pthread_mutex_t arr[];$/;" m struct:__anon12
+as alloc.h /^ alloc_iface_shared *as;$/;" m struct:alloc_iface
+barrier collective_subroutine.h /^ pthread_barrier_t barrier;$/;" m struct:collsub_iface_shared
+barrier libcoarraynative.h /^ pthread_barrier_t barrier;$/;" m struct:__anon6
+base shared_memory.c /^ void *base;$/;" m struct:shared_memory_act::local_alloc file:
+bitnum hashmap.h /^ int bitnum;$/;" m struct:__anon14
+bucket allocator.c /^} bucket;$/;" t typeref:struct:__anon1 file:
+ci libcoarraynative.h /^ collsub_iface ci;$/;" m struct:__anon11
+cis sync.h /^ sync_iface_shared *cis;$/;" m struct:__anon3
+collsub_broadcast collective_subroutine.h /^internal_proto (collsub_broadcast);$/;" v
+collsub_buf collective_subroutine.c /^void *collsub_buf = NULL;$/;" v
+collsub_buf collective_subroutine.h /^ shared_mem_ptr collsub_buf;$/;" m struct:collsub_iface_shared
+collsub_buf collective_subroutine.h /^ void *collsub_buf; \/* Cached pointer to shared collsub_buf. *\/$/;" m struct:collsub_iface
+collsub_buf collective_subroutine.h /^internal_proto (collsub_buf);$/;" v
+collsub_iface collective_subroutine.h /^typedef struct collsub_iface$/;" s
+collsub_iface collective_subroutine.h /^} collsub_iface;$/;" t typeref:struct:collsub_iface
+collsub_iface_shared collective_subroutine.h /^typedef struct collsub_iface_shared $/;" s
+collsub_iface_shared collective_subroutine.h /^} collsub_iface_shared;$/;" t typeref:struct:collsub_iface_shared
+collsub_reduce collective_subroutine.c /^collsub_reduce (void *obj, size_t nobjs, int *result_image, size_t size, $/;" f
+collsub_reduce collective_subroutine.h /^internal_proto (collsub_reduce);$/;" v
+collsub_sync collective_subroutine.c /^collsub_sync (void) {$/;" f
+collsub_sync collective_subroutine.h /^internal_proto (collsub_sync);$/;" v
+copy_from collective_inline.h /^copy_from (int image) $/;" f
+copy_in collective_inline.h /^copy_in (void *obj) {$/;" f
+copy_out collective_inline.h /^copy_out (void *obj, int image)$/;" f
+copy_to collective_inline.h /^copy_to (void *obj, int image)$/;" f
+curr_size collective_subroutine.c /^size_t curr_size = 0;$/;" v
+curr_size collective_subroutine.h /^ size_t curr_size;$/;" m struct:collsub_iface_shared
+curr_size collective_subroutine.h /^internal_proto (curr_size);$/;" v
+data hashmap.h /^ shared_mem_ptr data;$/;" m struct:__anon14
+deserialize_memory serialize.c /^export_proto (deserialize_memory);$/;" v
+div_ru wrapper.c /^div_ru (int divident, int divisor)$/;" f file:
+dump_hm hashmap.c /^dump_hm(hashmap *hm) {$/;" f
+enlarge_hashmap_mem hashmap.c /^enlarge_hashmap_mem (hashmap *hm, hashmap_entry **data, bool f)$/;" f file:
+ensure_initialization coarraynative.c /^ensure_initialization(void) {$/;" f
+fd shared_memory.c /^ int fd;$/;" m struct:__anon18 file:
+finish_collective_subroutine collective_inline.h /^finish_collective_subroutine (void) $/;" f
+free_bucket_head allocator.h /^ shared_mem_ptr free_bucket_head[PTR_BITS];$/;" m struct:__anon16
+free_memory_with_id alloc.c /^free_memory_with_id (alloc_iface* iface, memid id)$/;" f
+free_memory_with_id alloc.h /^internal_proto (free_memory_with_id);$/;" v
+gen_mask hashmap.c /^gen_mask (hashmap *hm)$/;" f file:
+get_allocator alloc.c /^get_allocator (alloc_iface * iface)$/;" f
+get_allocator alloc.h /^internal_proto (get_allocator);$/;" v
+get_data hashmap.c /^get_data(hashmap *hm)$/;" f file:
+get_environ_image_num coarraynative.c /^get_environ_image_num (void)$/;" f file:
+get_expected_offset hashmap.c /^get_expected_offset (hashmap *hm, memid id)$/;" f file:
+get_locked_table sync.c /^get_locked_table(sync_iface *si) { \/\/ The initialization of the table has to $/;" f file:
+get_master coarraynative.c /^get_master (void) {$/;" f file:
+get_memory_by_id alloc.c /^get_memory_by_id (alloc_iface *iface, size_t size, memid id)$/;" f
+get_memory_by_id alloc.h /^internal_proto (get_memory_by_id);$/;" v
+get_obj_ptr collective_inline.h /^get_obj_ptr (int image) $/;" f
+get_shared_memory_act_size shared_memory.c /^get_shared_memory_act_size (int nallocs)$/;" f file:
+get_shmem_fd util.c /^get_shmem_fd (void)$/;" f
+gfc_coarray_allocation_type wrapper.c /^enum gfc_coarray_allocation_type {$/;" g file:
+global_shared_memory_meta shared_memory.c /^} global_shared_memory_meta;$/;" t typeref:struct:__anon18 file:
+has_failed_image libcoarraynative.h /^ int has_failed_image;$/;" m struct:__anon9
+has_failed_image master.c /^ int has_failed_image = 0;$/;" m struct:__anon4 file:
+hash hashmap.c /^hash (uint64_t key)$/;" f file:
+hashmap hashmap.h /^typedef struct hashmap$/;" s
+hashmap hashmap.h /^} hashmap;$/;" t typeref:struct:hashmap
+hashmap_change_refcnt hashmap.c /^hashmap_change_refcnt (hashmap *hm, memid id, hashmap_search_result *res,$/;" f file:
+hashmap_dec hashmap.c /^hashmap_dec (hashmap *hm, memid id, hashmap_search_result * res)$/;" f
+hashmap_entry hashmap.c /^} hashmap_entry;$/;" t typeref:struct:__anon13 file:
+hashmap_get hashmap.c /^hashmap_get (hashmap *hm, memid id)$/;" f
+hashmap_inc hashmap.c /^hashmap_inc (hashmap *hm, memid id, hashmap_search_result * res)$/;" f
+hashmap_init hashmap.c /^hashmap_init (hashmap *hm, hashmap_shared *hs, allocator *a,$/;" f
+hashmap_search_result hashmap.h /^} hashmap_search_result;$/;" t typeref:struct:__anon15
+hashmap_set hashmap.c /^hashmap_set (hashmap *hm, memid id, hashmap_search_result *hsr,$/;" f
+hashmap_shared hashmap.h /^} hashmap_shared;$/;" t typeref:struct:__anon14
+header shared_memory.c /^ void *header;$/;" m struct:shared_memory_act file:
+hm alloc.h /^ hashmap hm;$/;" m struct:alloc_iface
+hm_search_result_contains hashmap.c /^hm_search_result_contains (hashmap_search_result *res)$/;" f
+hm_search_result_ptr hashmap.c /^hm_search_result_ptr (hashmap_search_result *res)$/;" f
+hm_search_result_size hashmap.c /^hm_search_result_size (hashmap_search_result *res)$/;" f
+hmiadd hashmap.c /^hmiadd (hashmap *hm, size_t s, ssize_t o) {$/;" f file:
+hms alloc.h /^ hashmap_shared hms;$/;" m struct:alloc_iface_shared
+id hashmap.c /^ memid id;$/;" m struct:__anon13 file:
+image libcoarraynative.h /^} image;$/;" t typeref:struct:__anon10
+image_main_wrapper master.c /^image_main_wrapper (void (*image_main) (void), int this_image_num)$/;" f file:
+image_num libcoarraynative.h /^ int image_num;$/;" m struct:__anon10
+image_status libcoarraynative.h /^} image_status;$/;" t typeref:enum:__anon7
+image_tracker libcoarraynative.h /^} image_tracker;$/;" t typeref:struct:__anon8
+images libcoarraynative.h /^ image_tracker images[];$/;" m struct:__anon9
+images master.c /^ struct image_status * images;$/;" m struct:__anon4 typeref:struct:__anon4::image_status file:
+init_collsub collective_subroutine.c /^init_collsub (void) {$/;" f
+init_collsub collective_subroutine.h /^internal_proto (init_collsub);$/;" v
+initialize_shared_mutex util.c /^initialize_shared_mutex (pthread_mutex_t *mutex)$/;" f
+initialized lock.h /^ int initialized;$/;" m struct:__anon12
+ipcollsub libcoarraynative.h /^} ipcollsub;$/;" t typeref:struct:__anon6
+last_base shared_memory.c /^last_base (shared_memory_act *mem)$/;" f file:
+last_seen_size shared_memory.c /^ size_t last_seen_size;$/;" m struct:shared_memory_act file:
+local coarraynative.c /^nca_local_data *local = NULL;$/;" v
+local_alloc shared_memory.c /^ struct local_alloc {$/;" s struct:shared_memory_act file:
+lock alloc.h /^ pthread_mutex_t lock;$/;" m struct:alloc_iface_shared
+lock_array lock.h /^} lock_array;$/;" t typeref:struct:__anon12
+lock_table sync.c /^lock_table (sync_iface *si)$/;" f file:
+m libcoarraynative.h /^ master *m;$/;" m struct:__anon10
+main malloc_test.c /^int main()$/;" f
+map_memory shared_memory.c /^map_memory (int fd, size_t size, off_t offset)$/;" f file:
+master libcoarraynative.h /^} master;$/;" t typeref:struct:__anon9
+master master.c /^} master;$/;" t typeref:struct:__anon4 file:
+max_lookahead hashmap.c /^ int max_lookahead; $/;" m struct:__anon13 file:
+maximg libcoarraynative.h /^ int maximg;$/;" m struct:__anon6
+mem alloc.h /^ shared_memory *mem;$/;" m struct:alloc_iface
+memid hashmap.h /^typedef intptr_t memid;$/;" t
+meta shared_memory.c /^ global_shared_memory_meta *meta;$/;" m struct:shared_memory_act file:
+n_ent hashmap.c /^static ssize_t n_ent;$/;" v file:
+nca_co_broadcast collective_subroutine.c /^export_proto (nca_co_broadcast);$/;" v
+nca_co_broadcast collective_subroutine.c /^nca_co_broadcast (gfc_array_char * restrict a, int source_image,$/;" f
+nca_coarray_alloc wrapper.c /^export_proto (nca_coarray_alloc);$/;" v
+nca_coarray_alloc wrapper.c /^nca_coarray_alloc (gfc_array_void *desc, int elem_size, int corank,$/;" f
+nca_coarray_free wrapper.c /^export_proto (nca_coarray_free);$/;" v
+nca_coarray_free wrapper.c /^nca_coarray_free (gfc_array_void *desc, int alloc_type)$/;" f
+nca_coarray_num_images wrapper.c /^export_proto (nca_coarray_num_images);$/;" v
+nca_coarray_num_images wrapper.c /^nca_coarray_num_images (int distance __attribute__((unused)))$/;" f
+nca_coarray_sync_all wrapper.c /^export_proto (nca_coarray_sync_all);$/;" v
+nca_coarray_sync_all wrapper.c /^nca_coarray_sync_all (int *stat __attribute__((unused)))$/;" f
+nca_coarray_this_image wrapper.c /^export_proto (nca_coarray_this_image);$/;" v
+nca_coarray_this_image wrapper.c /^nca_coarray_this_image (int distance __attribute__((unused)))$/;" f
+nca_collsub_reduce_array wrapper.c /^export_proto (nca_collsub_reduce_array);$/;" v
+nca_collsub_reduce_array wrapper.c /^nca_collsub_reduce_array (gfc_array_void *desc, void (*assign_function) (void *, void *),$/;" f
+nca_collsub_reduce_scalar wrapper.c /^export_proto (nca_collsub_reduce_scalar);$/;" v
+nca_collsub_reduce_scalar wrapper.c /^nca_collsub_reduce_scalar (void *obj, index_type elem_size,$/;" f
+nca_local_data libcoarraynative.h /^} nca_local_data;$/;" t typeref:struct:__anon11
+nca_lock wrapper.c /^export_proto (nca_lock);$/;" v
+nca_lock wrapper.c /^nca_lock (void *lock)$/;" f
+nca_master coarraynative.c /^nca_master (void (*image_main) (void)) {$/;" f
+nca_master master.c /^nca_master (void (*image_main) (void)) {$/;" f
+nca_master master.c /^nca_master (void (*image_main) (void))$/;" f
+nca_sync_images wrapper.c /^export_proto (nca_sync_images);$/;" v
+nca_sync_images wrapper.c /^nca_sync_images (size_t s, int *images,$/;" f
+nca_unlock wrapper.c /^export_proto (nca_unlock);$/;" v
+nca_unlock wrapper.c /^nca_unlock (void *lock)$/;" f
+new_base_mapping shared_memory.c /^new_base_mapping (shared_memory_act *mem)$/;" f file:
+next allocator.c /^ shared_mem_ptr next;$/;" m struct:__anon1 file:
+next_power_of_two util.c /^next_power_of_two(size_t size) {$/;" f
+num_entries hashmap.c /^num_entries (hashmap_entry *data, size_t size)$/;" f file:
+num_images libcoarraynative.h /^ int num_images;$/;" m struct:__anon11
+num_local_allocs shared_memory.c /^ size_t num_local_allocs;$/;" m struct:shared_memory_act file:
+offset shared_memory.h /^ ssize_t offset;$/;" m struct:shared_mem_ptr
+owner lock.h /^ int owner;$/;" m struct:__anon12
+p hashmap.c /^ shared_mem_ptr p; \/* If p == SHMPTR_NULL, the entry is empty. *\/$/;" m struct:__anon13 file:
+p hashmap.h /^ shared_mem_ptr p;$/;" m struct:__anon15
+pagesize util.c /^size_t pagesize = 1<<17;$/;" v
+pid libcoarraynative.h /^ pid_t pid;$/;" m struct:__anon8
+prepare_collective_subroutine collective_subroutine.c /^prepare_collective_subroutine (size_t size)$/;" f
+prepare_collective_subroutine collective_subroutine.h /^internal_proto (prepare_collective_subroutine);$/;" v
+refcnt hashmap.c /^ int refcnt;$/;" m struct:__anon13 file:
+res_offset hashmap.h /^ ssize_t res_offset;$/;" m struct:__anon15
+resize_hm hashmap.c /^resize_hm (hashmap *hm, hashmap_entry **data)$/;" f file:
+round_to_pagesize util.c /^round_to_pagesize(size_t s) {$/;" f
+s allocator.h /^ allocator_shared *s;$/;" m struct:__anon17
+s collective_subroutine.h /^ collsub_iface_shared *s;$/;" m struct:collsub_iface
+s hashmap.c /^ size_t s;$/;" m struct:__anon13 file:
+s hashmap.h /^ hashmap_shared *s;$/;" m struct:hashmap
+scan_empty hashmap.c /^scan_empty (hashmap *hm, ssize_t expected_off, memid id)$/;" f file:
+scan_inside_lookahead hashmap.c /^scan_inside_lookahead (hashmap *hm, ssize_t expected_off, memid id)$/;" f file:
+serialize_memory serialize.c /^export_proto (serialize_memory);$/;" v
+serialize_memory serialize.c /^serialize_memory (gfc_array_char * const restrict source, char *d)$/;" f
+shared_free allocator.c /^shared_free (allocator *a, shared_mem_ptr p, size_t size) {$/;" f
+shared_malloc allocator.c /^shared_malloc (allocator *a, size_t size)$/;" f
+shared_mem_ptr shared_memory.h /^typedef struct shared_mem_ptr$/;" s
+shared_mem_ptr shared_memory.h /^} shared_mem_ptr;$/;" t typeref:struct:shared_mem_ptr
+shared_mem_ptr_to_void_ptr shared_memory.c /^shared_mem_ptr_to_void_ptr(shared_memory_act **pmem, shared_mem_ptr smp)$/;" f
+shared_mem_ptr_to_void_ptr shared_memory.h /^internal_proto (shared_mem_ptr_to_void_ptr);$/;" v
+shared_memory shared_memory.h /^typedef struct shared_memory_act * shared_memory;$/;" t typeref:struct:shared_memory_act
+shared_memory_act shared_memory.c /^typedef struct shared_memory_act$/;" s file:
+shared_memory_act shared_memory.c /^} shared_memory_act;$/;" t typeref:struct:shared_memory_act file:
+shared_memory_get_mem_with_alignment shared_memory.c /^shared_memory_get_mem_with_alignment (shared_memory_act **pmem, size_t size,$/;" f
+shared_memory_get_mem_with_alignment shared_memory.h /^internal_proto (shared_memory_get_mem_with_alignment);$/;" v
+shared_memory_init shared_memory.c /^shared_memory_init (shared_memory_act **pmem)$/;" f
+shared_memory_init shared_memory.h /^internal_proto (shared_memory_init);$/;" v
+shared_memory_prepare shared_memory.c /^shared_memory_prepare (shared_memory_act **pmem)$/;" f
+shared_memory_prepare shared_memory.h /^internal_proto (shared_memory_prepare);$/;" v
+shm allocator.h /^ shared_memory *shm;$/;" m struct:__anon17
+si libcoarraynative.h /^ sync_iface si;$/;" m struct:__anon11
+size hashmap.h /^ size_t size;$/;" m struct:__anon14
+size hashmap.h /^ size_t size;$/;" m struct:__anon15
+size shared_memory.c /^ size_t size;$/;" m struct:shared_memory_act::local_alloc file:
+size shared_memory.c /^ size_t size;$/;" m struct:__anon18 file:
+sm collective_subroutine.h /^ shared_memory *sm;$/;" m struct:collsub_iface
+sm hashmap.h /^ shared_memory *sm;$/;" m struct:hashmap
+sm libcoarraynative.h /^ shared_memory sm;$/;" m struct:__anon11
+sm sync.h /^ shared_memory *sm;$/;" m struct:__anon3
+status libcoarraynative.h /^ image_status status;$/;" m struct:__anon8
+sync_all sync.c /^sync_all (sync_iface *si)$/;" f
+sync_all sync.h /^ pthread_barrier_t sync_all;$/;" m struct:__anon2
+sync_all sync.h /^internal_proto (sync_all);$/;" v
+sync_all_init sync.c /^sync_all_init (pthread_barrier_t *b)$/;" f file:
+sync_iface sync.h /^} sync_iface;$/;" t typeref:struct:__anon3
+sync_iface_init sync.c /^sync_iface_init (sync_iface *si, alloc_iface *ai, shared_memory *sm)$/;" f
+sync_iface_init sync.h /^internal_proto (sync_iface_init);$/;" v
+sync_iface_shared sync.h /^} sync_iface_shared;$/;" t typeref:struct:__anon2
+sync_table sync.c /^sync_table (sync_iface *si, int *images, size_t size)$/;" f
+sync_table sync.h /^internal_proto (sync_table);$/;" v
+table sync.h /^ int *table; \/\/ we can cache the table and the trigger pointers here$/;" m struct:__anon3
+table sync.h /^ shared_mem_ptr table;$/;" m struct:__anon2
+table_lock sync.h /^ pthread_mutex_t table_lock;$/;" m struct:__anon2
+this_image coarraynative.c /^image this_image;$/;" v
+triggers sync.h /^ pthread_cond_t *triggers;$/;" m struct:__anon3
+triggers sync.h /^ shared_mem_ptr triggers;$/;" m struct:__anon2
+unlock_table sync.c /^unlock_table (sync_iface *si)$/;" f file:
+used shared_memory.c /^ size_t used;$/;" m struct:__anon18 file:
+wait_table_cond sync.c /^wait_table_cond (sync_iface *si, pthread_cond_t *cond)$/;" f file:
diff --git a/libgfortran/nca/alloc.c b/libgfortran/nca/alloc.c
new file mode 100644
index 0000000..174fe33
--- /dev/null
+++ b/libgfortran/nca/alloc.c
@@ -0,0 +1,152 @@
+/* Copyright (C) 2020 Free Software Foundation, Inc.
+ Contributed by Thomas Koenig
+
+This file is part of the GNU Fortran Native Coarray Library (libnca).
+
+Libnca is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+Libnca is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+<http://www.gnu.org/licenses/>. */
+
+/* This provides the coarray-specific features (like IDs etc) for
+ allocator.c, in turn calling routines from shared_memory.c.
+*/
+
+#include "libgfortran.h"
+#include "shared_memory.h"
+#include "allocator.h"
+#include "hashmap.h"
+#include "alloc.h"
+
+#include <string.h>
+
+/* Return a local pointer into a shared memory object identified by
+ id. If the object is already found, it has been allocated before,
+ so just increase the reference counter.
+
+ The pointers returned by this function remain valid even if the
+ size of the memory allocation changes (see shared_memory.c). */
+
+static void *
+get_memory_by_id_internal (alloc_iface *iface, size_t size, memid id,
+ bool zero_mem)
+{
+ hashmap_search_result res;
+ shared_mem_ptr shared_ptr;
+ void *ret;
+
+ pthread_mutex_lock (&iface->as->lock);
+ shared_memory_prepare(iface->mem);
+
+ res = hashmap_get (&iface->hm, id);
+
+ if (hm_search_result_contains (&res))
+ {
+ size_t found_size;
+ found_size = hm_search_result_size (&res);
+ if (found_size != size)
+ {
+ dprintf (2, "Size mismatch for coarray allocation id %p: "
+ "found = %lu != size = %lu\n", (void *) id, found_size, size);
+ pthread_mutex_unlock (&iface->as->lock);
+ exit (1);
+ }
+ shared_ptr = hm_search_result_ptr (&res);
+ hashmap_inc (&iface->hm, id, &res);
+ }
+ else
+ {
+ shared_ptr = shared_malloc (&iface->alloc, size);
+ hashmap_set (&iface->hm, id, NULL, shared_ptr, size);
+ }
+
+ ret = SHMPTR_AS (void *, shared_ptr, iface->mem);
+ if (zero_mem)
+ memset (ret, '\0', size);
+
+ pthread_mutex_unlock (&iface->as->lock);
+ return ret;
+}
+
+void *
+get_memory_by_id (alloc_iface *iface, size_t size, memid id)
+{
+ return get_memory_by_id_internal (iface, size, id, 0);
+}
+
+void *
+get_memory_by_id_zero (alloc_iface *iface, size_t size, memid id)
+{
+ return get_memory_by_id_internal (iface, size, id, 1);
+}
+
+/* Free memory with id. Free it if this is the last image which
+ holds that memory segment, decrease the reference count otherwise. */
+
+void
+free_memory_with_id (alloc_iface* iface, memid id)
+{
+ hashmap_search_result res;
+ int entries_left;
+
+ pthread_mutex_lock (&iface->as->lock);
+ shared_memory_prepare(iface->mem);
+
+ res = hashmap_get (&iface->hm, id);
+ if (!hm_search_result_contains (&res))
+ {
+ pthread_mutex_unlock (&iface->as->lock);
+ char buffer[100];
+ snprintf (buffer, sizeof(buffer), "Error in free_memory_with_id: "
+ "%p not found", (void *) id);
+ dprintf (2, buffer);
+ // internal_error (NULL, buffer);
+ exit (1);
+ }
+
+ entries_left = hashmap_dec (&iface->hm, id, &res);
+ assert (entries_left >=0);
+
+ if (entries_left == 0)
+ {
+ shared_free (&iface->alloc, hm_search_result_ptr (&res),
+ hm_search_result_size (&res));
+ }
+
+ pthread_mutex_unlock (&iface->as->lock);
+ return;
+}
+
+/* Allocate the shared memory interface. This is called before we have
+ multiple images. */
+
+void
+alloc_iface_init (alloc_iface *iface, shared_memory *mem)
+{
+
+ iface->as = SHARED_MEMORY_RAW_ALLOC_PTR (mem, alloc_iface_shared);
+ iface->mem = mem;
+ initialize_shared_mutex (&iface->as->lock);
+ allocator_init (&iface->alloc, &iface->as->allocator_s, mem);
+ hashmap_init (&iface->hm, &iface->as->hms, &iface->alloc, mem);
+}
+
+allocator *
+get_allocator (alloc_iface * iface)
+{
+ return &iface->alloc;
+}
diff --git a/libgfortran/nca/alloc.h b/libgfortran/nca/alloc.h
new file mode 100644
index 0000000..f65121c
--- /dev/null
+++ b/libgfortran/nca/alloc.h
@@ -0,0 +1,67 @@
+/* Copyright (C) 2020 Free Software Foundation, Inc.
+ Contributed by Nicolas Koenig
+
+This file is part of the GNU Fortran Native Coarray Library (libnca).
+
+Libnca is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+Libnca is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+<http://www.gnu.org/licenses/>. */
+
+#ifndef ALLOC_H
+#define ALLOC_H
+
+#include "allocator.h"
+#include "hashmap.h"
+
+/* High-level interface for shared memory allocation. */
+
+/* This part of the alloc interface goes into shared memory. */
+
+typedef struct alloc_iface_shared
+{
+ allocator_shared allocator_s;
+ hashmap_shared hms;
+ pthread_mutex_t lock;
+} alloc_iface_shared;
+
+/* This is the local part. */
+
+typedef struct alloc_iface
+{
+ alloc_iface_shared *as;
+ shared_memory *mem;
+ allocator alloc;
+ hashmap hm;
+} alloc_iface;
+
+void *get_memory_by_id (alloc_iface *, size_t, memid);
+internal_proto (get_memory_by_id);
+
+void *get_memory_by_id_zero (alloc_iface *, size_t, memid);
+internal_proto (get_memory_by_id_zero);
+
+void free_memory_with_id (alloc_iface *, memid);
+internal_proto (free_memory_with_id);
+
+void alloc_iface_init (alloc_iface *, shared_memory *);
+internal_proto (alloc_iface_init);
+
+allocator *get_allocator (alloc_iface *);
+internal_proto (get_allocator);
+
+#endif
diff --git a/libgfortran/nca/allocator.c b/libgfortran/nca/allocator.c
new file mode 100644
index 0000000..e7aa9fd
--- /dev/null
+++ b/libgfortran/nca/allocator.c
@@ -0,0 +1,90 @@
+/* Copyright (C) 2020 Free Software Foundation, Inc.
+ Contributed by Nicolas Koenig
+
+This file is part of the GNU Fortran Native Coarray Library (libnca).
+
+Libnca is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+Libnca is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+<http://www.gnu.org/licenses/>. */
+
+/* A malloc() - and free() - like interface, but for shared memory
+ pointers, except that we pass the size to free as well. */
+
+#include "libgfortran.h"
+#include "shared_memory.h"
+#include "allocator.h"
+
+typedef struct {
+ shared_mem_ptr next;
+} bucket;
+
+/* Initialize the allocator. */
+
+void
+allocator_init (allocator *a, allocator_shared *s, shared_memory *sm)
+{
+ a->s = s;
+ a->shm = sm;
+ for (int i = 0; i < PTR_BITS; i++)
+ s->free_bucket_head[i] = SHMPTR_NULL;
+}
+
+/* Main allocation routine, works like malloc. Round up allocations
+ to the next power of two and keep free lists in buckets. */
+
+#define MAX_ALIGN 16
+
+shared_mem_ptr
+shared_malloc (allocator *a, size_t size)
+{
+ shared_mem_ptr ret;
+ size_t sz;
+ size_t act_size;
+ int bucket_list_index;
+
+ sz = next_power_of_two (size);
+ act_size = sz > sizeof (bucket) ? sz : sizeof (bucket);
+ bucket_list_index = __builtin_clzl(act_size);
+
+ if (SHMPTR_IS_NULL (a->s->free_bucket_head[bucket_list_index]))
+ return shared_memory_get_mem_with_alignment (a->shm, act_size, MAX_ALIGN);
+
+ ret = a->s->free_bucket_head[bucket_list_index];
+ a->s->free_bucket_head[bucket_list_index]
+ = (SHMPTR_AS (bucket *, ret, a->shm)->next);
+ assert(ret.offset != 0);
+ return ret;
+}
+
+/* Free memory. */
+
+void
+shared_free (allocator *a, shared_mem_ptr p, size_t size) {
+ bucket *b;
+ size_t sz;
+ int bucket_list_index;
+ size_t act_size;
+
+ sz = next_power_of_two (size);
+ act_size = sz > sizeof (bucket) ? sz : sizeof (bucket);
+ bucket_list_index = __builtin_clzl(act_size);
+
+ b = SHMPTR_AS(bucket *, p, a->shm);
+ b->next = a->s->free_bucket_head[bucket_list_index];
+ a->s->free_bucket_head[bucket_list_index] = p;
+}
diff --git a/libgfortran/nca/allocator.h b/libgfortran/nca/allocator.h
new file mode 100644
index 0000000..306022a
--- /dev/null
+++ b/libgfortran/nca/allocator.h
@@ -0,0 +1,21 @@
+#ifndef SHARED_ALLOCATOR_HDR
+#define SHARED_ALLOCATOR_HDR
+
+#include "util.h"
+#include "shared_memory.h"
+
+typedef struct {
+ shared_mem_ptr free_bucket_head[PTR_BITS];
+} allocator_shared;
+
+typedef struct {
+ allocator_shared *s;
+ shared_memory *shm;
+} allocator;
+
+void allocator_init (allocator *, allocator_shared *, shared_memory *);
+
+shared_mem_ptr shared_malloc (allocator *, size_t size);
+void shared_free (allocator *, shared_mem_ptr, size_t size);
+
+#endif
diff --git a/libgfortran/nca/coarraynative.c b/libgfortran/nca/coarraynative.c
new file mode 100644
index 0000000..c9d13ee
--- /dev/null
+++ b/libgfortran/nca/coarraynative.c
@@ -0,0 +1,145 @@
+/* Copyright (C) 2019-2020 Free Software Foundation, Inc.
+ Contributed by Nicolas Koenig
+
+This file is part of the GNU Fortran Native Coarray Library (libnca).
+
+Libnca is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+Libnca is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+<http://www.gnu.org/licenses/>. */
+
+
+#include "libgfortran.h"
+#include "libcoarraynative.h"
+#include "allocator.h"
+#include "hashmap.h"
+#include "util.h"
+#include "lock.h"
+#include "collective_subroutine.h"
+
+#include <unistd.h>
+#include <sys/mman.h>
+// #include <stdlib.h>
+#include <sys/wait.h>
+
+#define GFORTRAN_ENV_NUM_IMAGES "GFORTRAN_NUM_IMAGES"
+
+nca_local_data *local = NULL;
+
+image this_image;
+
+static int
+get_environ_image_num (void)
+{
+ char *num_images_char;
+ int nimages;
+ num_images_char = getenv (GFORTRAN_ENV_NUM_IMAGES);
+ if (!num_images_char)
+ return sysconf (_SC_NPROCESSORS_ONLN); /* TODO: Make portable. */
+ /* TODO: Error checking. */
+ nimages = atoi (num_images_char);
+ return nimages;
+}
+
+void
+ensure_initialization(void)
+{
+ if (local)
+ return;
+
+ local = malloc(sizeof(nca_local_data)); // Is malloc already init'ed at that
+ // point? Maybe use mmap(MAP_ANON)
+ // instead
+ pagesize = sysconf (_SC_PAGE_SIZE);
+ local->num_images = get_environ_image_num ();
+ shared_memory_init (&local->sm);
+ shared_memory_prepare (&local->sm);
+ alloc_iface_init (&local->ai, &local->sm);
+ collsub_iface_init (&local->ci, &local->ai, &local->sm);
+ sync_iface_init (&local->si, &local->ai, &local->sm);
+}
+
+static void __attribute__((noreturn))
+image_main_wrapper (void (*image_main) (void), image *this)
+{
+ this_image = *this;
+
+ sync_all(&local->si);
+
+ image_main ();
+
+ exit (0);
+}
+
+static master *
+get_master (void) {
+ master *m;
+ m = SHMPTR_AS (master *,
+ shared_memory_get_mem_with_alignment
+ (&local->sm,
+ sizeof (master) + sizeof(image_status) * local->num_images,
+ __alignof__(master)), &local->sm);
+ m->has_failed_image = 0;
+ return m;
+}
+
+/* This is called from main, with a pointer to the user's program as
+ argument. It forks the images and waits for their completion. */
+
+void
+nca_master (void (*image_main) (void)) {
+ master *m;
+ int i, j;
+ pid_t new;
+ image im;
+ int exit_code = 0;
+ int chstatus;
+ ensure_initialization();
+ m = get_master();
+
+ im.m = m;
+
+ for (im.image_num = 0; im.image_num < local->num_images; im.image_num++)
+ {
+ if ((new = fork()))
+ {
+ if (new == -1)
+ {
+ dprintf(2, "error spawning child\n");
+ exit_code = 1;
+ }
+ m->images[im.image_num].pid = new;
+ m->images[im.image_num].status = IMAGE_OK;
+ }
+ else
+ image_main_wrapper(image_main, &im);
+ }
+ for (i = 0; i < local->num_images; i++)
+ {
+ new = wait (&chstatus);
+ if (!WIFEXITED (chstatus) || WEXITSTATUS (chstatus))
+ {
+ j = 0;
+ for (; j < local->num_images && m->images[j].pid != new; j++);
+ m->images[j].status = IMAGE_FAILED;
+ m->has_failed_image++; //FIXME: Needs to be atomic, probably
+ dprintf (2, "ERROR: Image %d(%#x) failed\n", j, new);
+ exit_code = 1;
+ }
+ }
+ exit (exit_code);
+}
diff --git a/libgfortran/nca/collective_inline.h b/libgfortran/nca/collective_inline.h
new file mode 100644
index 0000000..4e7107b
--- /dev/null
+++ b/libgfortran/nca/collective_inline.h
@@ -0,0 +1,42 @@
+#include "collective_subroutine.h"
+
+static inline void
+finish_collective_subroutine (collsub_iface *ci)
+{
+ collsub_sync (ci);
+}
+
+#if 0
+static inline void *
+get_obj_ptr (void *buffer, int image)
+{
+ return (char *) + curr_size * image;
+}
+
+/* If obj is NULL, copy the object from the entry in this image. */
+static inline void
+copy_to (void *buffer, void *obj, int image)
+{
+ if (obj == 0)
+ obj = get_obj_ptr (this_image.image_num);
+ memcpy (get_obj_ptr (image), obj, curr_size);
+}
+
+static inline void
+copy_out (void *buffer, void *obj, int image)
+{
+ memcpy (obj, get_obj_ptr (image), curr_size);
+}
+
+static inline void
+copy_from (void *buffer, int image)
+{
+ copy_out (get_obj_ptr (this_image.image_num), image);
+}
+
+static inline void
+copy_in (void *buffer, void *obj)
+{
+ copy_to (obj, this_image.image_num);
+}
+#endif
diff --git a/libgfortran/nca/collective_subroutine.c b/libgfortran/nca/collective_subroutine.c
new file mode 100644
index 0000000..8a8a7d6
--- /dev/null
+++ b/libgfortran/nca/collective_subroutine.c
@@ -0,0 +1,416 @@
+/* Copyright (C) 2020 Free Software Foundation, Inc.
+ Contributed by Nicolas Koenig
+
+This file is part of the GNU Fortran Native Coarray Library (libnca).
+
+Libnca is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+Libnca is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+<http://www.gnu.org/licenses/>. */
+
+#include <string.h>
+#include "libgfortran.h"
+#include "libcoarraynative.h"
+#include "collective_subroutine.h"
+#include "collective_inline.h"
+#include "allocator.h"
+
+void *
+get_collsub_buf (collsub_iface *ci, size_t size)
+{
+ void *ret;
+
+ pthread_mutex_lock (&ci->s->mutex);
+ if (size > ci->s->curr_size)
+ {
+ shared_free (ci->a, ci->s->collsub_buf, ci->s->curr_size);
+ ci->s->collsub_buf = shared_malloc (ci->a, size);
+ ci->s->curr_size = size;
+ }
+
+ ret = SHMPTR_AS (void *, ci->s->collsub_buf, ci->sm);
+ pthread_mutex_unlock (&ci->s->mutex);
+ return ret;
+}
+
+/* It appears as if glibc's barrier implementation does not spin (at
+ least that is what I got from a quick glance at the source code),
+ so performance would be improved quite a bit if we spun a few times
+ here so we don't run into the futex syscall. */
+
+void
+collsub_sync (collsub_iface *ci)
+{
+ //dprintf (2, "Calling collsub_sync %d times\n", ++called);
+ pthread_barrier_wait (&ci->s->barrier);
+}
+
+/* assign_function is needed since we only know how to assign the type inside
+ the compiler. It should be implemented as follows:
+
+ void assign_function (void *a, void *b)
+ {
+ *((t *) a) = reduction_operation ((t *) a, (t *) b);
+ }
+
+ */
+
+void
+collsub_reduce_array (collsub_iface *ci, gfc_array_char *desc, int *result_image,
+ void (*assign_function) (void *, void *))
+{
+ void *buffer;
+ pack_info pi;
+ bool packed;
+ int cbit = 0;
+ int imoffset;
+ index_type elem_size;
+ index_type this_image_size_bytes;
+ char *this_image_buf;
+
+ packed = pack_array_prepare (&pi, desc);
+ if (pi.num_elem == 0)
+ return;
+
+ elem_size = GFC_DESCRIPTOR_SIZE (desc);
+ this_image_size_bytes = elem_size * pi.num_elem;
+
+ buffer = get_collsub_buf (ci, this_image_size_bytes * local->num_images);
+ this_image_buf = buffer + this_image_size_bytes * this_image.image_num;
+
+ if (packed)
+ memcpy (this_image_buf, GFC_DESCRIPTOR_DATA (desc), this_image_size_bytes);
+ else
+ pack_array_finish (&pi, desc, this_image_buf);
+
+ collsub_sync (ci);
+ for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++)
+ {
+ imoffset = 1 << cbit;
+ if (this_image.image_num + imoffset < local->num_images)
+ /* Reduce arrays elementwise. */
+ for (size_t i = 0; i < pi.num_elem; i++)
+ assign_function (this_image_buf + elem_size * i,
+ this_image_buf + this_image_size_bytes * imoffset + elem_size * i);
+
+ collsub_sync (ci);
+ }
+ for ( ; (local->num_images >> cbit) != 0; cbit++)
+ collsub_sync (ci);
+
+ if (!result_image || *result_image == this_image.image_num)
+ {
+ if (packed)
+ memcpy (GFC_DESCRIPTOR_DATA (desc), buffer, this_image_size_bytes);
+ else
+ unpack_array_finish(&pi, desc, buffer);
+ }
+
+ finish_collective_subroutine (ci);
+}
+
+void
+collsub_reduce_scalar (collsub_iface *ci, void *obj, index_type elem_size,
+ int *result_image,
+ void (*assign_function) (void *, void *))
+{
+ void *buffer;
+ int cbit = 0;
+ int imoffset;
+ char *this_image_buf;
+
+ buffer = get_collsub_buf (ci, elem_size * local->num_images);
+ this_image_buf = buffer + elem_size * this_image.image_num;
+
+ memcpy (this_image_buf, obj, elem_size);
+
+ collsub_sync (ci);
+ for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++)
+ {
+ imoffset = 1 << cbit;
+ if (this_image.image_num + imoffset < local->num_images)
+ /* Reduce arrays elementwise. */
+ assign_function (this_image_buf, this_image_buf + elem_size*imoffset);
+
+ collsub_sync (ci);
+ }
+ for ( ; (local->num_images >> cbit) != 0; cbit++)
+ collsub_sync (ci);
+
+ if (!result_image || *result_image == this_image.image_num)
+ memcpy(obj, buffer, elem_size);
+
+ finish_collective_subroutine (ci);
+}
+
+/* Do not use sync_all(), because the program should deadlock in the case that
+ * some images are on a sync_all barrier while others are in a collective
+ * subroutine. */
+
+void
+collsub_iface_init (collsub_iface *ci, alloc_iface *ai, shared_memory *sm)
+{
+ pthread_barrierattr_t attr;
+ shared_mem_ptr p;
+ ci->s = SHARED_MEMORY_RAW_ALLOC_PTR(sm, collsub_iface_shared);
+
+ ci->s->collsub_buf = shared_malloc(get_allocator(ai), sizeof(double)*local->num_images);
+ ci->s->curr_size = sizeof(double)*local->num_images;
+ ci->sm = sm;
+ ci->a = get_allocator(ai);
+
+ pthread_barrierattr_init (&attr);
+ pthread_barrierattr_setpshared (&attr, PTHREAD_PROCESS_SHARED);
+ pthread_barrier_init (&ci->s->barrier, &attr, local->num_images);
+ pthread_barrierattr_destroy(&attr);
+
+ initialize_shared_mutex (&ci->s->mutex);
+}
+
+void
+collsub_broadcast_scalar (collsub_iface *ci, void *obj, index_type elem_size,
+ int source_image /* Adjusted in the wrapper. */)
+{
+ void *buffer;
+
+ buffer = get_collsub_buf (ci, elem_size);
+
+ dprintf(2, "Source image: %d\n", source_image);
+
+ if (source_image == this_image.image_num)
+ {
+ memcpy (buffer, obj, elem_size);
+ collsub_sync (ci);
+ }
+ else
+ {
+ collsub_sync (ci);
+ memcpy (obj, buffer, elem_size);
+ }
+
+ finish_collective_subroutine (ci);
+}
+
+void
+collsub_broadcast_array (collsub_iface *ci, gfc_array_char *desc,
+ int source_image)
+{
+ void *buffer;
+ pack_info pi;
+ bool packed;
+ index_type elem_size;
+ index_type size_bytes;
+ char *this_image_buf;
+
+ packed = pack_array_prepare (&pi, desc);
+ if (pi.num_elem == 0)
+ return;
+
+ elem_size = GFC_DESCRIPTOR_SIZE (desc);
+ size_bytes = elem_size * pi.num_elem;
+
+ buffer = get_collsub_buf (ci, size_bytes);
+
+ if (source_image == this_image.image_num)
+ {
+ if (packed)
+ memcpy (buffer, GFC_DESCRIPTOR_DATA (desc), size_bytes);
+ else
+ pack_array_finish (&pi, desc, buffer);
+ collsub_sync (ci);
+ }
+ else
+ {
+ collsub_sync (ci);
+ if (packed)
+ memcpy (GFC_DESCRIPTOR_DATA (desc), buffer, size_bytes);
+ else
+ unpack_array_finish(&pi, desc, buffer);
+ }
+
+ finish_collective_subroutine (ci);
+}
+
+#if 0
+
+void nca_co_broadcast (gfc_array_char *, int, int*, char *, size_t);
+export_proto (nca_co_broadcast);
+
+void
+nca_co_broadcast (gfc_array_char * restrict a, int source_image,
+ int *stat, char *errmsg __attribute__ ((unused)),
+ size_t errmsg_len __attribute__ ((unused)))
+{
+ index_type count[GFC_MAX_DIMENSIONS];
+ index_type stride[GFC_MAX_DIMENSIONS];
+ index_type extent[GFC_MAX_DIMENSIONS];
+ index_type type_size;
+ index_type dim;
+ index_type span;
+ bool packed, empty;
+ index_type num_elems;
+ index_type ssize, ssize_bytes;
+ char *this_shared_ptr, *other_shared_ptr;
+
+ if (stat)
+ *stat = 0;
+
+ dim = GFC_DESCRIPTOR_RANK (a);
+ type_size = GFC_DESCRIPTOR_SIZE (a);
+
+ /* Source image, gather. */
+ if (source_image - 1 == image_num)
+ {
+ num_elems = 1;
+ if (dim > 0)
+ {
+ span = a->span != 0 ? a->span : type_size;
+ packed = true;
+ empty = false;
+ for (index_type n = 0; n < dim; n++)
+ {
+ count[n] = 0;
+ stride[n] = GFC_DESCRIPTOR_STRIDE (a, n) * span;
+ extent[n] = GFC_DESCRIPTOR_EXTENT (a, n);
+
+ empty = empty || extent[n] <= 0;
+
+ if (num_elems != GFC_DESCRIPTOR_STRIDE (a, n))
+ packed = false;
+
+ num_elems *= extent[n];
+ }
+ ssize_bytes = num_elems * type_size;
+ }
+ else
+ {
+ ssize_bytes = type_size;
+ packed = true;
+ empty = false;
+ }
+
+ prepare_collective_subroutine (ssize_bytes); // broadcast barrier 1
+ this_shared_ptr = get_obj_ptr (image_num);
+ if (packed)
+ memcpy (this_shared_ptr, a->base_addr, ssize_bytes);
+ else
+ {
+ char *src = (char *) a->base_addr;
+ char * restrict dest = this_shared_ptr;
+ index_type stride0 = stride[0];
+
+ while (src)
+ {
+ /* Copy the data. */
+
+ memcpy (dest, src, type_size);
+ dest += type_size;
+ src += stride0;
+ count[0] ++;
+ /* Advance to the next source element. */
+ for (index_type n = 0; count[n] == extent[n] ; )
+ {
+ /* When we get to the end of a dimension, reset it
+ and increment the next dimension. */
+ count[n] = 0;
+ src -= stride[n] * extent[n];
+ n++;
+ if (n == dim)
+ {
+ src = NULL;
+ break;
+ }
+ else
+ {
+ count[n]++;
+ src += stride[n];
+ }
+ }
+ }
+ }
+ collsub_sync (ci); /* Broadcast barrier 2. */
+ }
+ else /* Target image, scatter. */
+ {
+ collsub_sync (ci); /* Broadcast barrier 1. */
+ packed = 1;
+ num_elems = 1;
+ span = a->span != 0 ? a->span : type_size;
+
+ for (index_type n = 0; n < dim; n++)
+ {
+ index_type stride_n;
+ count[n] = 0;
+ stride_n = GFC_DESCRIPTOR_STRIDE (a, n);
+ stride[n] = stride_n * type_size;
+ extent[n] = GFC_DESCRIPTOR_EXTENT (a, n);
+ if (extent[n] <= 0)
+ {
+ packed = true;
+ num_elems = 0;
+ break;
+ }
+ if (num_elems != stride_n)
+ packed = false;
+
+ num_elems *= extent[n];
+ }
+ ssize = num_elems * type_size;
+ prepare_collective_subroutine (ssize); /* Broadcaset barrier 2. */
+ other_shared_ptr = get_obj_ptr (source_image - 1);
+ if (packed)
+ memcpy (a->base_addr, other_shared_ptr, ssize);
+ else
+ {
+ char *src = other_shared_ptr;
+ char * restrict dest = (char *) a->base_addr;
+ index_type stride0 = stride[0];
+
+ for (index_type n = 0; n < dim; n++)
+ count[n] = 0;
+
+ while (dest)
+ {
+ memcpy (dest, src, type_size);
+ src += span;
+ dest += stride0;
+ count[0] ++;
+ for (index_type n = 0; count[n] == extent[n] ;)
+ {
+ /* When we get to the end of a dimension, reset it and increment
+ the next dimension. */
+ count[n] = 0;
+ dest -= stride[n] * extent[n];
+ n++;
+ if (n == dim)
+ {
+ dest = NULL;
+ break;
+ }
+ else
+ {
+ count[n]++;
+ dest += stride[n];
+ }
+ }
+ }
+ }
+ }
+ finish_collective_subroutine (ci); /* Broadcast barrier 3. */
+}
+
+#endif
diff --git a/libgfortran/nca/collective_subroutine.h b/libgfortran/nca/collective_subroutine.h
new file mode 100644
index 0000000..6147dd6
--- /dev/null
+++ b/libgfortran/nca/collective_subroutine.h
@@ -0,0 +1,44 @@
+
+#ifndef COLLECTIVE_SUBROUTINE_HDR
+#define COLLECTIVE_SUBROUTINE_HDR
+
+#include "shared_memory.h"
+
+typedef struct collsub_iface_shared
+{
+ size_t curr_size;
+ shared_mem_ptr collsub_buf;
+ pthread_barrier_t barrier;
+ pthread_mutex_t mutex;
+} collsub_iface_shared;
+
+typedef struct collsub_iface
+{
+ collsub_iface_shared *s;
+ allocator *a;
+ shared_memory *sm;
+} collsub_iface;
+
+void collsub_broadcast_scalar (collsub_iface *, void *, index_type, int);
+internal_proto (collsub_broadcast_scalar);
+
+void collsub_broadcast_array (collsub_iface *, gfc_array_char *, int);
+internal_proto (collsub_broadcast_array);
+
+void collsub_reduce_array (collsub_iface *, gfc_array_char *, int *,
+ void (*) (void *, void *));
+internal_proto (collsub_reduce_array);
+
+void collsub_reduce_scalar (collsub_iface *, void *, index_type, int *,
+ void (*) (void *, void *));
+internal_proto (collsub_reduce_scalar);
+
+void collsub_sync (collsub_iface *);
+internal_proto (collsub_sync);
+
+void collsub_iface_init (collsub_iface *, alloc_iface *, shared_memory *);
+internal_proto (collsub_iface_init);
+
+void * get_collsub_buf (collsub_iface *ci, size_t size);
+internal_proto (get_collsub_buf);
+#endif
diff --git a/libgfortran/nca/hashmap.c b/libgfortran/nca/hashmap.c
new file mode 100644
index 0000000..61f5487
--- /dev/null
+++ b/libgfortran/nca/hashmap.c
@@ -0,0 +1,447 @@
+/* Copyright (C) 2020 Free Software Foundation, Inc.
+ Contributed by Nicolas Koenig
+
+This file is part of the GNU Fortran Native Coarray Library (libnca).
+
+Libnca is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+Libnca is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+<http://www.gnu.org/licenses/>. */
+
+#include "libgfortran.h"
+#include "hashmap.h"
+#include <string.h>
+
+#define INITIAL_BITNUM (5)
+#define INITIAL_SIZE (1<<INITIAL_BITNUM)
+#define CRITICAL_LOOKAHEAD (16)
+
+static ssize_t n_ent;
+
+typedef struct {
+ memid id;
+ shared_mem_ptr p; /* If p == SHMPTR_NULL, the entry is empty. */
+ size_t s;
+ int max_lookahead;
+ int refcnt;
+} hashmap_entry;
+
+static ssize_t
+num_entries (hashmap_entry *data, size_t size)
+{
+ ssize_t i;
+ ssize_t ret = 0;
+ for (i = 0; i < size; i++)
+ {
+ if (!SHMPTR_IS_NULL (data[i].p))
+ ret ++;
+ }
+ return ret;
+}
+
+/* 64 bit to 64 bit hash function. */
+
+/*
+static inline uint64_t
+hash (uint64_t x)
+{
+ return x * 11400714819323198485lu;
+}
+*/
+
+#define ASSERT_HM(hm, cond) assert_hashmap(hm, cond, #cond)
+
+static void
+assert_hashmap(hashmap *hm, bool asserted, const char *cond)
+{
+ if (!asserted)
+ {
+ dprintf(2, cond);
+ dump_hm(hm);
+ }
+ assert(asserted);
+}
+
+static inline uint64_t
+hash (uint64_t key)
+{
+ key ^= (key >> 30);
+ key *= 0xbf58476d1ce4e5b9ul;
+ key ^= (key >> 27);
+ key *= 0x94d049bb133111ebul;
+ key ^= (key >> 31);
+
+ return key;
+}
+
+/* Gets a pointer to the current data in the hashmap. */
+static inline hashmap_entry *
+get_data(hashmap *hm)
+{
+ return SHMPTR_AS (hashmap_entry *, hm->s->data, hm->sm);
+}
+
+/* Generate mask from current number of bits. */
+
+static inline intptr_t
+gen_mask (hashmap *hm)
+{
+ return (1 << hm->s->bitnum) - 1;
+}
+
+/* Add with wrap-around at hashmap size. */
+
+static inline size_t
+hmiadd (hashmap *hm, size_t s, ssize_t o) {
+ return (s + o) & gen_mask (hm);
+}
+
+/* Get the expected offset for entry id. */
+
+static inline ssize_t
+get_expected_offset (hashmap *hm, memid id)
+{
+ return hash(id) >> (PTR_BITS - hm->s->bitnum);
+}
+
+/* Initialize the hashmap. */
+
+void
+hashmap_init (hashmap *hm, hashmap_shared *hs, allocator *a,
+ shared_memory *mem)
+{
+ hashmap_entry *data;
+ hm->s = hs;
+ hm->sm = mem;
+ hm->s->data = shared_malloc (a, INITIAL_SIZE * sizeof(hashmap_entry));
+ data = get_data (hm);
+ memset(data, '\0', INITIAL_SIZE*sizeof(hashmap_entry));
+
+ for (int i = 0; i < INITIAL_SIZE; i++)
+ data[i].p = SHMPTR_NULL;
+
+ hm->s->size = INITIAL_SIZE;
+ hm->s->bitnum = INITIAL_BITNUM;
+ hm->a = a;
+}
+
+/* This checks if the entry id exists in that range the range between
+ the expected position and the maximum lookahead. */
+
+static ssize_t
+scan_inside_lookahead (hashmap *hm, ssize_t expected_off, memid id)
+{
+ ssize_t lookahead;
+ hashmap_entry *data;
+
+ data = get_data (hm);
+ lookahead = data[expected_off].max_lookahead;
+ ASSERT_HM (hm, lookahead < CRITICAL_LOOKAHEAD);
+
+ for (int i = 0; i <= lookahead; i++) /* For performance, this could
+ iterate backwards. */
+ if (data[hmiadd (hm, expected_off, i)].id == id)
+ return hmiadd (hm, expected_off, i);
+
+ return -1;
+}
+
+/* Scan for the next empty slot we can use. Returns offset relative
+ to the expected position. */
+
+static ssize_t
+scan_empty (hashmap *hm, ssize_t expected_off, memid id)
+{
+ hashmap_entry *data;
+
+ data = get_data(hm);
+ for (int i = 0; i < CRITICAL_LOOKAHEAD; i++)
+ if (SHMPTR_IS_NULL (data[hmiadd (hm, expected_off, i)].p))
+ return i;
+
+ return -1;
+}
+
+/* Search the hashmap for id. */
+
+hashmap_search_result
+hashmap_get (hashmap *hm, memid id)
+{
+ hashmap_search_result ret;
+ hashmap_entry *data;
+ size_t expected_offset;
+ ssize_t res;
+
+ data = get_data (hm);
+ expected_offset = get_expected_offset (hm, id);
+ res = scan_inside_lookahead (hm, expected_offset, id);
+
+ if (res != -1)
+ ret = ((hashmap_search_result)
+ { .p = data[res].p, .size=data[res].s, .res_offset = res });
+ else
+ ret.p = SHMPTR_NULL;
+
+ return ret;
+}
+
+/* Return size of a hashmap search result. */
+size_t
+hm_search_result_size (hashmap_search_result *res)
+{
+ return res->size;
+}
+
+/* Return pointer of a hashmap search result. */
+
+shared_mem_ptr
+hm_search_result_ptr (hashmap_search_result *res)
+{
+ return res->p;
+}
+
+/* Return pointer of a hashmap search result. */
+
+bool
+hm_search_result_contains (hashmap_search_result *res)
+{
+ return !SHMPTR_IS_NULL(res->p);
+}
+
+/* Enlarge hashmap memory. */
+
+static void
+enlarge_hashmap_mem (hashmap *hm, hashmap_entry **data, bool f)
+{
+ shared_mem_ptr old_data_p;
+ size_t old_size;
+
+ old_data_p = hm->s->data;
+ old_size = hm->s->size;
+
+ hm->s->data = shared_malloc (hm->a, (hm->s->size *= 2)*sizeof(hashmap_entry));
+ fprintf (stderr,"enlarge_hashmap_mem: %ld\n", hm->s->data.offset);
+ hm->s->bitnum++;
+
+ *data = get_data(hm);
+ for (size_t i = 0; i < hm->s->size; i++)
+ (*data)[i] = ((hashmap_entry) { .id = 0, .p = SHMPTR_NULL, .s=0,
+ .max_lookahead = 0, .refcnt=0 });
+
+ if (f)
+ shared_free(hm->a, old_data_p, old_size);
+}
+
+/* Resize hashmap. */
+
+static void
+resize_hm (hashmap *hm, hashmap_entry **data)
+{
+ shared_mem_ptr old_data_p;
+ hashmap_entry *old_data, *new_data;
+ size_t old_size;
+ ssize_t new_offset, inital_index, new_index;
+ memid id;
+ ssize_t max_lookahead;
+ ssize_t old_count, new_count;
+
+ /* old_data points to the old block containing the hashmap. We
+ redistribute the data from there into the new block. */
+
+ old_data_p = hm->s->data;
+ old_data = *data;
+ old_size = hm->s->size;
+ old_count = num_entries (old_data, old_size);
+
+ fprintf(stderr, "Occupancy at resize: %f\n", ((double) old_count)/old_size);
+
+ //fprintf (stderr,"\n====== Resizing hashmap =========\n\nOld map:\n\n");
+ //dump_hm (hm);
+ enlarge_hashmap_mem (hm, &new_data, false);
+ //fprintf (stderr,"old_data: %p new_data: %p\n", old_data, new_data);
+ retry_resize:
+ for (size_t i = 0; i < old_size; i++)
+ {
+ if (SHMPTR_IS_NULL (old_data[i].p))
+ continue;
+
+ id = old_data[i].id;
+ inital_index = get_expected_offset (hm, id);
+ new_offset = scan_empty (hm, inital_index, id);
+
+ /* If we didn't find a free slot, just resize the hashmap
+ again. */
+ if (new_offset == -1)
+ {
+ enlarge_hashmap_mem (hm, &new_data, true);
+ //fprintf (stderr,"\n====== AGAIN Resizing hashmap =========\n\n");
+ //fprintf (stderr,"old_data: %p new_data %p\n", old_data, new_data);
+ goto retry_resize; /* Sue me. */
+ }
+
+ ASSERT_HM (hm, new_offset < CRITICAL_LOOKAHEAD);
+ new_index = hmiadd (hm, inital_index, new_offset);
+ max_lookahead = new_data[inital_index].max_lookahead;
+ new_data[inital_index].max_lookahead
+ = new_offset > max_lookahead ? new_offset : max_lookahead;
+
+ new_data[new_index] = ((hashmap_entry) {.id = id, .p = old_data[i].p,
+ .s = old_data[i].s,
+ .max_lookahead = new_data[new_index].max_lookahead,
+ .refcnt = old_data[i].refcnt});
+ }
+ new_count = num_entries (new_data, hm->s->size);
+ //fprintf (stderr,"Number of elements: %ld to %ld\n", old_count, new_count);
+ //fprintf (stderr,"============ After resizing: =======\n\n");
+ //dump_hm (hm);
+
+ shared_free (hm->a, old_data_p, old_size);
+ *data = new_data;
+}
+
+/* Set an entry in the hashmap. */
+
+void
+hashmap_set (hashmap *hm, memid id, hashmap_search_result *hsr,
+ shared_mem_ptr p, size_t size)
+{
+ hashmap_entry *data;
+ ssize_t expected_offset, lookahead;
+ ssize_t empty_offset;
+ ssize_t delta;
+
+ // //fprintf (stderr,"hashmap_set: id = %-16p\n", (void *) id);
+ data = get_data(hm);
+
+ if (hsr) {
+ data[hsr->res_offset].s = size;
+ data[hsr->res_offset].p = p;
+ return;
+ }
+
+ expected_offset = get_expected_offset (hm, id);
+ while ((delta = scan_empty (hm, expected_offset, id)) == -1)
+ {
+ resize_hm (hm, &data);
+ expected_offset = get_expected_offset (hm, id);
+ }
+
+ empty_offset = hmiadd (hm, expected_offset, delta);
+ lookahead = data[expected_offset].max_lookahead;
+ data[expected_offset].max_lookahead = delta > lookahead ? delta : lookahead;
+ data[empty_offset] = ((hashmap_entry) {.id = id, .p = p, .s = size,
+ .max_lookahead = data[empty_offset].max_lookahead,
+ .refcnt = 1});
+
+ n_ent ++;
+ fprintf (stderr,"hashmap_set: Setting %p at %p, n_ent = %ld\n", (void *) id, data + empty_offset,
+ n_ent);
+ // dump_hm (hm);
+ // fprintf(stderr, "--------------------------------------------------\n");
+ /* TODO: Shouldn't reset refcnt, but this doesn't matter at the
+ moment because of the way the function is used. */
+}
+
+/* Change the refcount of a hashmap entry. */
+
+static int
+hashmap_change_refcnt (hashmap *hm, memid id, hashmap_search_result *res,
+ int delta)
+{
+ hashmap_entry *data;
+ hashmap_search_result r;
+ hashmap_search_result *pr;
+ int ret;
+ hashmap_entry *entry;
+
+ data = get_data (hm);
+
+ if (res)
+ pr = res;
+ else
+ {
+ r = hashmap_get (hm, id);
+ pr = &r;
+ }
+
+ entry = &data[pr->res_offset];
+ ret = (entry->refcnt += delta);
+ if (ret == 0)
+ {
+ n_ent --;
+ //fprintf (stderr, "hashmap_change_refcnt: removing %p at %p, n_ent = %ld\n",
+ // (void *) id, entry, n_ent);
+ entry->id = 0;
+ entry->p = SHMPTR_NULL;
+ entry->s = 0;
+ }
+
+ return ret;
+}
+
+/* Increase hashmap entry refcount. */
+
+void
+hashmap_inc (hashmap *hm, memid id, hashmap_search_result * res)
+{
+ int ret;
+ ret = hashmap_change_refcnt (hm, id, res, 1);
+ ASSERT_HM (hm, ret > 0);
+}
+
+/* Decrease hashmap entry refcount. */
+
+int
+hashmap_dec (hashmap *hm, memid id, hashmap_search_result * res)
+{
+ int ret;
+ ret = hashmap_change_refcnt (hm, id, res, -1);
+ ASSERT_HM (hm, ret >= 0);
+ return ret;
+}
+
+#define PE(str, ...) fprintf(stderr, INDENT str, ##__VA_ARGS__)
+#define INDENT ""
+
+void
+dump_hm(hashmap *hm) {
+ hashmap_entry *data;
+ size_t exp;
+ size_t occ_num = 0;
+ PE("h %p (size: %lu, bitnum: %d)\n", hm, hm->s->size, hm->s->bitnum);
+ data = get_data (hm);
+ fprintf (stderr,"offset = %lx data = %p\n", (unsigned long) hm->s->data.offset, data);
+
+#undef INDENT
+#define INDENT " "
+ for (size_t i = 0; i < hm->s->size; i++) {
+ exp = get_expected_offset(hm, data[i].id);
+ if (!SHMPTR_IS_NULL(data[i].p)) {
+ PE("%2lu. (exp: %2lu w la %d) id %#-16lx p %#-14lx s %-7lu -- la %u ref %u %-16p\n",
+ i, exp, data[exp].max_lookahead, data[i].id, data[i].p.offset, data[i].s,
+ data[i].max_lookahead, data[i].refcnt, data + i);
+ occ_num++;
+ }
+ else
+ PE("%2lu. empty -- la %u %p\n", i, data[i].max_lookahead,
+ data + i);
+
+ }
+#undef INDENT
+#define INDENT ""
+ PE("occupancy: %lu %f\n", occ_num, ((double) occ_num)/hm->s->size);
+}
diff --git a/libgfortran/nca/hashmap.h b/libgfortran/nca/hashmap.h
new file mode 100644
index 0000000..4d999e3
--- /dev/null
+++ b/libgfortran/nca/hashmap.h
@@ -0,0 +1,70 @@
+#ifndef HASHMAP_H
+
+#include "shared_memory.h"
+#include "allocator.h"
+
+#include <stdint.h>
+#include <stddef.h>
+
+
+/* Data structures and variables:
+
+ memid is a unique identifier for the coarray, the address of its
+ descriptor (which is unique in the program). */
+typedef intptr_t memid;
+
+typedef struct {
+ shared_mem_ptr data;
+ size_t size;
+ int bitnum;
+} hashmap_shared;
+
+typedef struct hashmap
+{
+ hashmap_shared *s;
+ shared_memory *sm;
+ allocator *a;
+} hashmap;
+
+typedef struct {
+ shared_mem_ptr p;
+ size_t size;
+ ssize_t res_offset;
+} hashmap_search_result;
+
+void hashmap_init (hashmap *, hashmap_shared *, allocator *a, shared_memory *);
+
+/* Look up memid in the hashmap. The result can be inspected via the
+ hm_search_result_* functions. */
+
+hashmap_search_result hashmap_get (hashmap *, memid);
+
+/* Given a search result, returns the size. */
+size_t hm_search_result_size (hashmap_search_result *);
+
+/* Given a search result, returns the pointer. */
+shared_mem_ptr hm_search_result_ptr (hashmap_search_result *);
+
+/* Given a search result, returns whether something was found. */
+bool hm_search_result_contains (hashmap_search_result *);
+
+/* Sets the hashmap entry for memid to shared_mem_ptr and
+ size_t. Optionally, if a hashmap_search_result is supplied, it is
+ used to make the lookup faster. */
+
+void hashmap_set (hashmap *, memid, hashmap_search_result *, shared_mem_ptr p,
+ size_t);
+
+/* Increments the hashmap entry for memid. Optionally, if a
+ hashmap_search_result is supplied, it is used to make the lookup
+ faster. */
+
+void hashmap_inc (hashmap *, memid, hashmap_search_result *);
+
+/* Same, but decrement. */
+int hashmap_dec (hashmap *, memid, hashmap_search_result *);
+
+void dump_hm (hashmap *hm);
+
+#define HASHMAP_H
+#endif
diff --git a/libgfortran/nca/libcoarraynative.h b/libgfortran/nca/libcoarraynative.h
new file mode 100644
index 0000000..507de0c
--- /dev/null
+++ b/libgfortran/nca/libcoarraynative.h
@@ -0,0 +1,103 @@
+/* Copyright (C) 2019-2020 Free Software Foundation, Inc.
+ Contributed by Nicolas Koenig
+
+This file is part of the GNU Fortran Native Coarray Library (libnca).
+
+Libnca is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+Libnca is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+<http://www.gnu.org/licenses/>. */
+
+#ifndef LIBGFOR_H
+#error "Include libgfortran.h before libcoarraynative.h"
+#endif
+
+#ifndef COARRAY_NATIVE_HDR
+#define COARRAY_NATIVE_HDR
+
+#include "libgfortran.h"
+
+#include <sys/types.h>
+#include <stdint.h>
+#include <stdio.h>
+
+
+/* This is to create a _nca_gfortrani_ prefix for all variables and
+ function used only by nca. */
+#if 0
+#define NUM_ADDR_BITS (8 * sizeof (int *))
+#endif
+
+#define DEBUG_NATIVE_COARRAY 1
+
+#ifdef DEBUG_NATIVE_COARRAY
+#define DEBUG_PRINTF(...) dprintf (2,__VA_ARGS__)
+#else
+#define DEBUG_PRINTF(...) do {} while(0)
+#endif
+
+#include "allocator.h"
+#include "hashmap.h"
+#include "sync.h"
+#include "lock.h"
+#include "collective_subroutine.h"
+
+typedef struct {
+ pthread_barrier_t barrier;
+ int maximg;
+} ipcollsub;
+
+typedef enum {
+ IMAGE_UNKNOWN = 0,
+ IMAGE_OK,
+ IMAGE_FAILED
+} image_status;
+
+typedef struct {
+ image_status status;
+ pid_t pid;
+} image_tracker;
+
+typedef struct {
+ int has_failed_image;
+ image_tracker images[];
+} master;
+
+typedef struct {
+ int image_num;
+ master *m;
+} image;
+
+extern image this_image;
+
+typedef struct {
+ int num_images;
+ shared_memory sm;
+ alloc_iface ai;
+ collsub_iface ci;
+ sync_iface si;
+} nca_local_data;
+
+extern nca_local_data *local;
+internal_proto (local);
+void ensure_initialization(void);
+internal_proto(ensure_initialization);
+
+void nca_master(void (*)(void));
+export_proto (nca_master);
+
+#endif
diff --git a/libgfortran/nca/lock.h b/libgfortran/nca/lock.h
new file mode 100644
index 0000000..4697395
--- /dev/null
+++ b/libgfortran/nca/lock.h
@@ -0,0 +1,37 @@
+/* Copyright (C) 2019-2020 Free Software Foundation, Inc.
+ Contributed by Nicolas Koenig
+
+This file is part of the GNU Fortran Native Coarray Library (libnca).
+
+Libnca is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+Libnca is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+<http://www.gnu.org/licenses/>. */
+
+
+#ifndef COARRAY_LOCK_HDR
+#define COARRAY_LOCK_HDR
+
+#include <pthread.h>
+
+typedef struct {
+ int owner;
+ int initialized;
+ pthread_mutex_t arr[];
+} lock_array;
+
+#endif
diff --git a/libgfortran/nca/shared_memory.c b/libgfortran/nca/shared_memory.c
new file mode 100644
index 0000000..bc3093d
--- /dev/null
+++ b/libgfortran/nca/shared_memory.c
@@ -0,0 +1,221 @@
+/* Copyright (C) 2020 Free Software Foundation, Inc.
+ Contributed by Nicolas Koenig
+
+This file is part of the GNU Fortran Native Coarray Library (libnca).
+
+Libnca is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+Libnca is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+<http://www.gnu.org/licenses/>. */
+
+#include "libgfortran.h"
+#include "libcoarraynative.h"
+#include "util.h"
+#include <sys/mman.h>
+#include <unistd.h>
+#include <string.h>
+
+#include "shared_memory.h"
+
+/* This implements shared memory based on POSIX mmap. We start with
+ memory block of the size of the global shared memory data, rounded
+ up to one pagesize, and enlarge as needed.
+
+ We address the memory via a shared_memory_ptr, which is an offset into
+ the shared memory block. The metadata is situated at offset 0.
+
+ In order to be able to resize the memory and to keep pointers
+ valid, we keep the old mapping around, so the memory is actually
+ visible several times to the process. Thus, pointers returned by
+ shared_memory_get_mem_with_alignment remain valid even when
+ resizing. */
+
+/* Global metadata for shared memory, always kept at offset 0. */
+
+typedef struct
+{
+ size_t size;
+ size_t used;
+ int fd;
+} global_shared_memory_meta;
+
+/* Type realization for opaque type shared_memory. */
+
+typedef struct shared_memory_act
+{
+ global_shared_memory_meta *meta;
+ void *header;
+ size_t last_seen_size;
+
+ /* We don't need to free these. We probably also don't need to keep
+ track of them, but it is much more future proof if we do. */
+
+ size_t num_local_allocs;
+
+ struct local_alloc {
+ void *base;
+ size_t size;
+ } allocs[];
+
+} shared_memory_act;
+
+/* Convenience wrapper for mmap. */
+
+static inline void *
+map_memory (int fd, size_t size, off_t offset)
+{
+ void *ret = mmap (NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, offset);
+ if (ret == MAP_FAILED)
+ {
+ perror("mmap failed");
+ exit(1);
+ }
+ return ret;
+}
+
+/* Returns the size of shared_memory_act. */
+
+static inline size_t
+get_shared_memory_act_size (int nallocs)
+{
+ return sizeof(shared_memory_act) + nallocs*sizeof(struct local_alloc);
+}
+
+/* When the shared memory block is enlarged, we need to map it into
+ virtual memory again. */
+
+static inline shared_memory_act *
+new_base_mapping (shared_memory_act *mem)
+{
+ shared_memory_act *newmem;
+ /* We need another entry in the alloc table. */
+ mem->num_local_allocs++;
+ newmem = realloc (mem, get_shared_memory_act_size (mem->num_local_allocs));
+ newmem->allocs[newmem->num_local_allocs - 1]
+ = ((struct local_alloc)
+ {.base = map_memory (newmem->meta->fd, newmem->meta->size, 0),
+ .size = newmem->meta->size});
+ newmem->last_seen_size = newmem->meta->size;
+ return newmem;
+}
+
+/* Return the most recently allocated base pointer. */
+
+static inline void *
+last_base (shared_memory_act *mem)
+{
+ return mem->allocs[mem->num_local_allocs - 1].base;
+}
+
+/* Get a pointer into the shared memory block with alignemnt
+ (works similar to sbrk). */
+
+shared_mem_ptr
+shared_memory_get_mem_with_alignment (shared_memory_act **pmem, size_t size,
+ size_t align)
+{
+ shared_memory_act *mem = *pmem;
+ size_t new_size;
+ size_t orig_used;
+
+ /* Offset into memory block with alignment. */
+ size_t used_wa = alignto (mem->meta->used, align);
+
+ if (used_wa + size <= mem->meta->size)
+ {
+ memset(last_base(mem) + mem->meta->used, 0xCA, used_wa - mem->meta->used);
+ memset(last_base(mem) + used_wa, 0x42, size);
+ mem->meta->used = used_wa + size;
+
+ DEBUG_PRINTF ("Shared Memory: New memory of size %#lx requested, returned %#lx\n", size, used_wa);
+ return (shared_mem_ptr) {.offset = used_wa};
+ }
+
+ /* We need to enlarge the memory segment. Double the size if that
+ is big enough, otherwise get what's needed. */
+
+ if (mem->meta->size * 2 < used_wa + size)
+ new_size = mem->meta->size * 2;
+ else
+ new_size = round_to_pagesize (used_wa + size);
+
+ orig_used = mem->meta->used;
+ mem->meta->size = new_size;
+ mem->meta->used = used_wa + size;
+ ftruncate (mem->meta->fd, mem->meta->size);
+ /* This also sets the new base pointer where the shared memory
+ can be found in the address space. */
+
+ mem = new_base_mapping (mem);
+
+ *pmem = mem;
+ assert(used_wa != 0);
+
+ dprintf(2, "Shared Memory: New memory of size %#lx requested, returned %#lx\n", size, used_wa);
+ memset(last_base(mem) + orig_used, 0xCA, used_wa - orig_used);
+ memset(last_base(mem) + used_wa, 0x42, size);
+
+ return (shared_mem_ptr) {.offset = used_wa};
+}
+
+/* If another image changed the size, update the size accordingly. */
+
+void
+shared_memory_prepare (shared_memory_act **pmem)
+{
+ shared_memory_act *mem = *pmem;
+ if (mem->meta->size == mem->last_seen_size)
+ return;
+ mem = new_base_mapping(mem);
+ *pmem = mem;
+}
+
+/* Initialize the memory with one page, the shared metadata of the
+ shared memory is stored at the beginning. */
+
+void
+shared_memory_init (shared_memory_act **pmem)
+{
+ shared_memory_act *mem;
+ int fd;
+ size_t initial_size = round_to_pagesize (sizeof (global_shared_memory_meta));
+
+ mem = malloc (get_shared_memory_act_size(1));
+ fd = get_shmem_fd();
+
+ ftruncate(fd, initial_size);
+ mem->meta = map_memory (fd, initial_size, 0);
+ *mem->meta = ((global_shared_memory_meta) {.size = initial_size,
+ .used = sizeof(global_shared_memory_meta),
+ .fd = fd});
+ mem->last_seen_size = initial_size;
+ mem->num_local_allocs = 1;
+ mem->allocs[0] = ((struct local_alloc) {.base = mem->meta,
+ .size = initial_size});
+
+ *pmem = mem;
+}
+
+/* Convert a shared memory pointer (i.e. an offset into the shared
+ memory block) to a pointer. */
+
+void *
+shared_mem_ptr_to_void_ptr(shared_memory_act **pmem, shared_mem_ptr smp)
+{
+ return last_base(*pmem) + smp.offset;
+}
+
diff --git a/libgfortran/nca/shared_memory.h b/libgfortran/nca/shared_memory.h
new file mode 100644
index 0000000..4adc104
--- /dev/null
+++ b/libgfortran/nca/shared_memory.h
@@ -0,0 +1,78 @@
+/* Copyright (C) 2020 Free Software Foundation, Inc.
+ Contributed by Nicolas Koenig
+
+This file is part of the GNU Fortran Native Coarray Library (libnca).
+
+Libnca is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+Libnca is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+<http://www.gnu.org/licenses/>. */
+
+#ifndef SHARED_MEMORY_H
+#include <stdbool.h>
+#include <stdint.h>
+#include <stddef.h>
+#include <sys/types.h>
+#include <pthread.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <assert.h>
+#include <limits.h>
+
+/* A struct to serve as an opaque shared memory object. */
+
+struct shared_memory_act;
+typedef struct shared_memory_act * shared_memory;
+
+#define SHMPTR_NULL ((shared_mem_ptr) {.offset = -1})
+#define SHMPTR_IS_NULL(x) (x.offset == -1)
+
+#define SHMPTR_DEREF(x, s, sm) \
+ ((x) = *(__typeof(x) *) shared_mem_ptr_to_void_ptr (sm, s);
+#define SHMPTR_AS(t, s, sm) ((t) shared_mem_ptr_to_void_ptr(sm, s))
+#define SHMPTR_SET(v, s, sm) (v = SHMPTR_AS(__typeof(v), s, sm))
+#define SHMPTR_EQUALS(s1, s2) (s1.offset == s2.offset)
+
+#define SHARED_MEMORY_RAW_ALLOC(mem, t, n) \
+ shared_memory_get_mem_with_alignment(mem, sizeof(t)*n, __alignof__(t))
+
+#define SHARED_MEMORY_RAW_ALLOC_PTR(mem, t) \
+ SHMPTR_AS (t *, SHARED_MEMORY_RAW_ALLOC (mem, t, 1), mem)
+
+/* A shared-memory pointer is implemented as an offset into the shared
+ memory region. */
+
+typedef struct shared_mem_ptr
+{
+ ssize_t offset;
+} shared_mem_ptr;
+
+void shared_memory_init (shared_memory *);
+internal_proto (shared_memory_init);
+
+void shared_memory_prepare (shared_memory *);
+internal_proto (shared_memory_prepare);
+
+shared_mem_ptr shared_memory_get_mem_with_alignment (shared_memory *mem,
+ size_t size, size_t align);
+internal_proto (shared_memory_get_mem_with_alignment);
+
+void *shared_mem_ptr_to_void_ptr (shared_memory *, shared_mem_ptr);
+internal_proto (shared_mem_ptr_to_void_ptr);
+
+#define SHARED_MEMORY_H
+#endif
diff --git a/libgfortran/nca/sync.c b/libgfortran/nca/sync.c
new file mode 100644
index 0000000..6d7f7ca
--- /dev/null
+++ b/libgfortran/nca/sync.c
@@ -0,0 +1,156 @@
+/* Copyright (C) 2019-2020 Free Software Foundation, Inc.
+ Contributed by Nicolas Koenig
+
+This file is part of the GNU Fortran Native Coarray Library (libnca).
+
+Libnca is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+Libnca is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+<http://www.gnu.org/licenses/>. */
+
+
+#include <string.h>
+
+#include "libgfortran.h"
+#include "libcoarraynative.h"
+#include "sync.h"
+#include "util.h"
+
+static void
+sync_all_init (pthread_barrier_t *b)
+{
+ pthread_barrierattr_t battr;
+ pthread_barrierattr_init (&battr);
+ pthread_barrierattr_setpshared (&battr, PTHREAD_PROCESS_SHARED);
+ pthread_barrier_init (b, &battr, local->num_images);
+ pthread_barrierattr_destroy (&battr);
+}
+
+static inline void
+lock_table (sync_iface *si)
+{
+ pthread_mutex_lock (&si->cis->table_lock);
+}
+
+static inline void
+unlock_table (sync_iface *si)
+{
+ pthread_mutex_unlock (&si->cis->table_lock);
+}
+
+static inline void
+wait_table_cond (sync_iface *si, pthread_cond_t *cond)
+{
+ pthread_cond_wait (cond,&si->cis->table_lock);
+}
+
+static int *
+get_locked_table(sync_iface *si) { // The initialization of the table has to
+ // be delayed, since we might not know the
+ // number of images when the library is
+ // initialized
+ lock_table(si);
+ return si->table;
+ /*
+ if (si->table)
+ return si->table;
+ else if (!SHMPTR_IS_NULL(si->cis->table))
+ {
+ si->table = SHMPTR_AS(int *, si->cis->table, si->sm);
+ si->triggers = SHMPTR_AS(pthread_cond_t *, si->cis->triggers, si->sm);
+ return si->table;
+ }
+
+ si->cis->table =
+ shared_malloc(si->a, sizeof(int)*local->num_images * local->num_images);
+ si->cis->triggers =
+ shared_malloc(si->a, sizeof(int)*local->num_images);
+
+ si->table = SHMPTR_AS(int *, si->cis->table, si->sm);
+ si->triggers = SHMPTR_AS(pthread_cond_t *, si->cis->triggers, si->sm);
+
+ for (int i = 0; i < local->num_images; i++)
+ initialize_shared_condition (&si->triggers[i]);
+
+ return si->table;
+ */
+}
+
+void
+sync_iface_init (sync_iface *si, alloc_iface *ai, shared_memory *sm)
+{
+ si->cis = SHMPTR_AS (sync_iface_shared *,
+ shared_malloc (get_allocator(ai),
+ sizeof(collsub_iface_shared)),
+ sm);
+ DEBUG_PRINTF ("%s: num_images is %d\n", __PRETTY_FUNCTION__, local->num_images);
+
+ sync_all_init (&si->cis->sync_all);
+ initialize_shared_mutex (&si->cis->table_lock);
+ si->sm = sm;
+ si->a = get_allocator(ai);
+
+ si->cis->table =
+ shared_malloc(si->a, sizeof(int)*local->num_images * local->num_images);
+ si->cis->triggers =
+ shared_malloc(si->a, sizeof(pthread_cond_t)*local->num_images);
+
+ si->table = SHMPTR_AS(int *, si->cis->table, si->sm);
+ si->triggers = SHMPTR_AS(pthread_cond_t *, si->cis->triggers, si->sm);
+
+ for (int i = 0; i < local->num_images; i++)
+ initialize_shared_condition (&si->triggers[i]);
+}
+
+void
+sync_table (sync_iface *si, int *images, size_t size)
+{
+#ifdef DEBUG_NATIVE_COARRAY
+ dprintf (2, "Image %d waiting for these %ld images: ", this_image.image_num + 1, size);
+ for (int d_i = 0; d_i < size; d_i++)
+ dprintf (2, "%d ", images[d_i]);
+ dprintf (2, "\n");
+#endif
+ size_t i;
+ int done;
+ int *table = get_locked_table(si);
+ for (i = 0; i < size; i++)
+ {
+ table[images[i] - 1 + local->num_images*this_image.image_num]++;
+ pthread_cond_signal (&si->triggers[images[i] - 1]);
+ }
+ for (;;)
+ {
+ done = 1;
+ for (i = 0; i < size; i++)
+ done &= si->table[images[i] - 1 + this_image.image_num*local->num_images]
+ == si->table[this_image.image_num + (images[i] - 1)*local->num_images];
+ if (done)
+ break;
+ wait_table_cond (si, &si->triggers[this_image.image_num]);
+ }
+ unlock_table (si);
+}
+
+void
+sync_all (sync_iface *si)
+{
+
+ DEBUG_PRINTF("Syncing all\n");
+
+ pthread_barrier_wait (&si->cis->sync_all);
+}
diff --git a/libgfortran/nca/sync.h b/libgfortran/nca/sync.h
new file mode 100644
index 0000000..4b49441
--- /dev/null
+++ b/libgfortran/nca/sync.h
@@ -0,0 +1,56 @@
+/* Copyright (C) 2019-2020 Free Software Foundation, Inc.
+ Contributed by Nicolas Koenig
+
+This file is part of the GNU Fortran Native Coarray Library (libnca).
+
+Libnca is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+Libnca is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+<http://www.gnu.org/licenses/>. */
+
+#ifndef IPSYNC_HDR
+#define IPSYNC_HDR
+
+#include "shared_memory.h"
+#include "alloc.h"
+#include<pthread.h>
+
+typedef struct {
+ pthread_barrier_t sync_all;
+ pthread_mutex_t table_lock;
+ shared_mem_ptr table;
+ shared_mem_ptr triggers;
+} sync_iface_shared;
+
+typedef struct {
+ sync_iface_shared *cis;
+ shared_memory *sm;
+ allocator *a;
+ int *table; // we can cache the table and the trigger pointers here
+ pthread_cond_t *triggers;
+} sync_iface;
+
+void sync_iface_init (sync_iface *, alloc_iface *, shared_memory *);
+internal_proto (sync_iface_init);
+
+void sync_all (sync_iface *);
+internal_proto (sync_all);
+
+void sync_table (sync_iface *, int *, size_t);
+internal_proto (sync_table);
+
+#endif
diff --git a/libgfortran/nca/util.c b/libgfortran/nca/util.c
new file mode 100644
index 0000000..5805218
--- /dev/null
+++ b/libgfortran/nca/util.c
@@ -0,0 +1,197 @@
+#include "libgfortran.h"
+#include "util.h"
+#include <string.h>
+#include <stddef.h>
+#include <stdlib.h>
+#include <limits.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+
+#define MEMOBJ_NAME "/gfortran_coarray_memfd"
+
+size_t
+alignto(size_t size, size_t align) {
+ return align*((size + align - 1)/align);
+}
+
+size_t pagesize;
+
+size_t
+round_to_pagesize(size_t s) {
+ return alignto(s, pagesize);
+}
+
+size_t
+next_power_of_two(size_t size) {
+ return 1 << (PTR_BITS - __builtin_clzl(size-1)); //FIXME: There's an off-by-one error, I can feel it
+}
+
+void
+initialize_shared_mutex (pthread_mutex_t *mutex)
+{
+ pthread_mutexattr_t mattr;
+ pthread_mutexattr_init (&mattr);
+ pthread_mutexattr_setpshared (&mattr, PTHREAD_PROCESS_SHARED);
+ pthread_mutex_init (mutex, &mattr);
+ pthread_mutexattr_destroy (&mattr);
+}
+
+void
+initialize_shared_condition (pthread_cond_t *cond)
+{
+ pthread_condattr_t cattr;
+ pthread_condattr_init (&cattr);
+ pthread_condattr_setpshared (&cattr, PTHREAD_PROCESS_SHARED);
+ pthread_cond_init (cond, &cattr);
+ pthread_condattr_destroy (&cattr);
+}
+
+int
+get_shmem_fd (void)
+{
+ char buffer[1<<10];
+ int fd, id;
+ id = random ();
+ do
+ {
+ snprintf (buffer, sizeof (buffer),
+ MEMOBJ_NAME "_%u_%d", (unsigned int) getpid (), id++);
+ fd = shm_open (buffer, O_RDWR | O_CREAT | O_EXCL, S_IRUSR | S_IWUSR);
+ }
+ while (fd == -1);
+ shm_unlink (buffer);
+ return fd;
+}
+
+bool
+pack_array_prepare (pack_info * restrict pi, const gfc_array_char * restrict source)
+{
+ index_type dim;
+ bool packed;
+ index_type span;
+ index_type type_size;
+ index_type ssize;
+
+ dim = GFC_DESCRIPTOR_RANK (source);
+ type_size = GFC_DESCRIPTOR_SIZE (source);
+ ssize = type_size;
+
+ pi->num_elem = 1;
+ packed = true;
+ span = source->span != 0 ? source->span : type_size;
+ for (index_type n = 0; n < dim; n++)
+ {
+ pi->stride[n] = GFC_DESCRIPTOR_STRIDE (source,n) * span;
+ pi->extent[n] = GFC_DESCRIPTOR_EXTENT (source,n);
+ if (pi->extent[n] <= 0)
+ {
+ /* Do nothing. */
+ packed = 1;
+ pi->num_elem = 0;
+ break;
+ }
+
+ if (ssize != pi->stride[n])
+ packed = 0;
+
+ pi->num_elem *= pi->extent[n];
+ ssize *= pi->extent[n];
+ }
+
+ return packed;
+}
+
+void
+pack_array_finish (pack_info * const restrict pi, const gfc_array_char * const restrict source,
+ char * restrict dest)
+{
+ index_type dim;
+ const char *restrict src;
+
+ index_type size;
+ index_type stride0;
+ index_type count[GFC_MAX_DIMENSIONS];
+
+ dim = GFC_DESCRIPTOR_RANK (source);
+ src = source->base_addr;
+ stride0 = pi->stride[0];
+ size = GFC_DESCRIPTOR_SIZE (source);
+
+ memset (count, 0, sizeof(count));
+ while (src)
+ {
+ /* Copy the data. */
+ memcpy(dest, src, size);
+ /* Advance to the next element. */
+ dest += size;
+ src += stride0;
+ count[0]++;
+ /* Advance to the next source element. */
+ index_type n = 0;
+ while (count[n] == pi->extent[n])
+ {
+ /* When we get to the end of a dimension, reset it and increment
+ the next dimension. */
+ count[n] = 0;
+ /* We could precalculate these products, but this is a less
+ frequently used path so probably not worth it. */
+ src -= pi->stride[n] * pi->extent[n];
+ n++;
+ if (n == dim)
+ {
+ src = NULL;
+ break;
+ }
+ else
+ {
+ count[n]++;
+ src += pi->stride[n];
+ }
+ }
+ }
+}
+
+void
+unpack_array_finish (pack_info * const restrict pi,
+ const gfc_array_char * restrict d,
+ const char *restrict src)
+{
+ index_type stride0;
+ char * restrict dest;
+ index_type size;
+ index_type count[GFC_MAX_DIMENSIONS];
+ index_type dim;
+
+ size = GFC_DESCRIPTOR_SIZE (d);
+ stride0 = pi->stride[0];
+ dest = d->base_addr;
+ dim = GFC_DESCRIPTOR_RANK (d);
+
+ while (dest)
+ {
+ memcpy (dest, src, size);
+ src += size;
+ dest += stride0;
+ count[0]++;
+ index_type n = 0;
+ while (count[n] == pi->extent[n])
+ {
+ count[n] = 0;
+ dest -= pi->stride[n] * pi->extent[n];
+ n++;
+ if (n == dim)
+ {
+ dest = NULL;
+ break;
+ }
+ else
+ {
+ count[n] ++;
+ dest += pi->stride[n];
+ }
+ }
+ }
+}
diff --git a/libgfortran/nca/util.h b/libgfortran/nca/util.h
new file mode 100644
index 0000000..9abd7ad
--- /dev/null
+++ b/libgfortran/nca/util.h
@@ -0,0 +1,86 @@
+/* Copyright (C) 2020 Free Software Foundation, Inc.
+ Contributed by Nicolas Koenig
+
+This file is part of the GNU Fortran Native Coarray Library (libnca).
+
+Libnca is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+Libnca is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+<http://www.gnu.org/licenses/>. */
+
+#ifndef UTIL_HDR
+#define UTIL_HDR
+
+#include <stdint.h>
+#include <stddef.h>
+#include <pthread.h>
+
+#define PTR_BITS (CHAR_BIT*sizeof(void *))
+
+size_t alignto (size_t, size_t);
+internal_proto (alignto);
+
+size_t round_to_pagesize (size_t);
+internal_proto (round_to_pagesize);
+
+size_t next_power_of_two (size_t);
+internal_proto (next_power_of_two);
+
+int get_shmem_fd (void);
+internal_proto (get_shmem_fd);
+
+void initialize_shared_mutex (pthread_mutex_t *);
+internal_proto (initialize_shared_mutex);
+
+void initialize_shared_condition (pthread_cond_t *);
+internal_proto (initialize_shared_condition);
+
+extern size_t pagesize;
+internal_proto (pagesize);
+
+/* Usage:
+ pack_info pi;
+ packed = pack_array_prepare (&pi, source);
+
+ // Awesome allocation of destptr using pi.num_elem
+ if (packed)
+ memcpy (...);
+ else
+ pack_array_finish (&pi, source, destptr);
+
+ This could also be used in in_pack_generic.c. Additionally, since
+ pack_array_prepare is the same for all type sizes, we would only have to
+ specialize pack_array_finish, saving on code size. */
+
+typedef struct
+{
+ index_type num_elem;
+ index_type extent[GFC_MAX_DIMENSIONS];
+ index_type stride[GFC_MAX_DIMENSIONS]; /* Stride is byte-based. */
+} pack_info;
+
+bool pack_array_prepare (pack_info *restrict, const gfc_array_char * restrict);
+internal_proto (pack_array_prepare);
+
+void pack_array_finish (pack_info * const restrict, const gfc_array_char * const restrict,
+ char * restrict);
+
+internal_proto (pack_array_finish);
+
+void unpack_array_finish (pack_info * const restrict, const gfc_array_char * const,
+ const char * restrict);
+#endif
diff --git a/libgfortran/nca/wrapper.c b/libgfortran/nca/wrapper.c
new file mode 100644
index 0000000..eeb64d3
--- /dev/null
+++ b/libgfortran/nca/wrapper.c
@@ -0,0 +1,258 @@
+/* Copyright (C) 2019-2020 Free Software Foundation, Inc.
+ Contributed by Nicolas Koenig
+
+This file is part of the GNU Fortran Native Coarray Library (libnca).
+
+Libnca is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+Libnca is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+<http://www.gnu.org/licenses/>. */
+
+#include <string.h>
+#include "libgfortran.h"
+#include "libcoarraynative.h"
+#include "sync.h"
+#include "lock.h"
+#include "util.h"
+#include "collective_subroutine.h"
+
+static inline int
+div_ru (int divident, int divisor)
+{
+ return (divident + divisor - 1)/divisor;
+}
+
+enum gfc_coarray_allocation_type {
+ GFC_NCA_NORMAL_COARRAY = 3,
+ GFC_NCA_LOCK_COARRAY,
+ GFC_NCA_EVENT_COARRAY,
+};
+
+void nca_coarray_alloc (gfc_array_void *, int, int, int);
+export_proto (nca_coarray_alloc);
+
+void
+nca_coarray_free (gfc_array_void *, int);
+export_proto (nca_coarray_free);
+
+int nca_coarray_this_image (int);
+export_proto (nca_coarray_this_image);
+
+int nca_coarray_num_images (int);
+export_proto (nca_coarray_num_images);
+
+void nca_coarray_sync_all (int *);
+export_proto (nca_coarray_sync_all);
+
+void nca_sync_images (size_t, int *, int*, char *, size_t);
+export_proto (nca_sync_images);
+
+void nca_lock (void *);
+export_proto (nca_lock);
+
+void nca_unlock (void *);
+export_proto (nca_unlock);
+
+void nca_collsub_reduce_array (gfc_array_char *, void (*) (void *, void *),
+ int *);
+export_proto (nca_collsub_reduce_array);
+
+void nca_collsub_reduce_scalar (void *, index_type, void (*) (void *, void *),
+ int *);
+export_proto (nca_collsub_reduce_scalar);
+
+void nca_collsub_broadcast_array (gfc_array_char * restrict, int/*, int *, char *,
+ size_t*/);
+export_proto (nca_collsub_broadcast_array);
+
+void nca_collsub_broadcast_scalar (void * restrict, size_t, int/*, int *, char *,
+ size_t*/);
+export_proto(nca_collsub_broadcast_scalar);
+
+void
+nca_coarray_alloc (gfc_array_void *desc, int elem_size, int corank,
+ int alloc_type)
+{
+ int i, last_rank_index;
+ int num_coarray_elems, num_elems; /* Excludes the last dimension, because it
+ will have to be determined later. */
+ int extent_last_codimen;
+ size_t last_lbound;
+ size_t size_in_bytes;
+
+ ensure_initialization(); /* This function might be the first one to be
+ called, if it is called in a constructor. */
+
+ if (alloc_type == GFC_NCA_LOCK_COARRAY)
+ elem_size = sizeof (pthread_mutex_t);
+ else if (alloc_type == GFC_NCA_EVENT_COARRAY)
+ elem_size = sizeof(char); /* replace with proper type. */
+
+ last_rank_index = GFC_DESCRIPTOR_RANK(desc) + corank -1;
+
+ num_elems = 1;
+ num_coarray_elems = 1;
+ for (i = 0; i < GFC_DESCRIPTOR_RANK(desc); i++)
+ num_elems *= GFC_DESCRIPTOR_EXTENT(desc, i);
+ for (i = GFC_DESCRIPTOR_RANK(desc); i < last_rank_index; i++)
+ {
+ num_elems *= GFC_DESCRIPTOR_EXTENT(desc, i);
+ num_coarray_elems *= GFC_DESCRIPTOR_EXTENT(desc, i);
+ }
+
+ extent_last_codimen = div_ru (local->num_images, num_coarray_elems);
+
+ last_lbound = GFC_DIMENSION_LBOUND(desc->dim[last_rank_index]);
+ GFC_DIMENSION_SET(desc->dim[last_rank_index], last_lbound,
+ last_lbound + extent_last_codimen - 1,
+ num_elems);
+
+ size_in_bytes = elem_size * num_elems * extent_last_codimen;
+ if (alloc_type == GFC_NCA_LOCK_COARRAY)
+ {
+ lock_array *addr;
+ int expected = 0;
+ /* Allocate enough space for the metadata infront of the lock
+ array. */
+ addr = get_memory_by_id_zero (&local->ai, size_in_bytes
+ + sizeof (lock_array),
+ (intptr_t) desc);
+
+ /* Use of a traditional spin lock to avoid race conditions with
+ the initization of the mutex. We could alternatively put a
+ global lock around allocate, but that would probably be
+ slower. */
+ while (!__atomic_compare_exchange_n (&addr->owner, &expected,
+ this_image.image_num + 1,
+ false, __ATOMIC_SEQ_CST,
+ __ATOMIC_SEQ_CST));
+ if (!addr->initialized++)
+ {
+ for (i = 0; i < local->num_images; i++)
+ initialize_shared_mutex (&addr->arr[i]);
+ }
+ __atomic_store_n (&addr->owner, 0, __ATOMIC_SEQ_CST);
+ desc->base_addr = &addr->arr;
+ }
+ else if (alloc_type == GFC_NCA_EVENT_COARRAY)
+ (void) 0; // TODO
+ else
+ desc->base_addr = get_memory_by_id (&local->ai, size_in_bytes,
+ (intptr_t) desc);
+ dprintf(2, "Base address of desc for image %d: %p\n", this_image.image_num + 1, desc->base_addr);
+}
+
+void
+nca_coarray_free (gfc_array_void *desc, int alloc_type)
+{
+ int i;
+ if (alloc_type == GFC_NCA_LOCK_COARRAY)
+ {
+ lock_array *la;
+ int expected = 0;
+ la = desc->base_addr - offsetof (lock_array, arr);
+ while (!__atomic_compare_exchange_n (&la->owner, &expected,
+ this_image.image_num+1,
+ false, __ATOMIC_SEQ_CST,
+ __ATOMIC_SEQ_CST));
+ if (!--la->initialized)
+ {
+ /* Coarray locks can be removed and just normal
+ pthread_mutex can be used. */
+ for (i = 0; i < local->num_images; i++)
+ pthread_mutex_destroy (&la->arr[i]);
+ }
+ __atomic_store_n (&la->owner, 0, __ATOMIC_SEQ_CST);
+ }
+ else if (alloc_type == GFC_NCA_EVENT_COARRAY)
+ (void) 0; //TODO
+
+ free_memory_with_id (&local->ai, (intptr_t) desc);
+ desc->base_addr = NULL;
+}
+
+int
+nca_coarray_this_image (int distance __attribute__((unused)))
+{
+ return this_image.image_num + 1;
+}
+
+int
+nca_coarray_num_images (int distance __attribute__((unused)))
+{
+ return local->num_images;
+}
+
+void
+nca_coarray_sync_all (int *stat __attribute__((unused)))
+{
+ sync_all (&local->si);
+}
+
+void
+nca_sync_images (size_t s, int *images,
+ int *stat __attribute__((unused)),
+ char *error __attribute__((unused)),
+ size_t err_size __attribute__((unused)))
+{
+ sync_table (&local->si, images, s);
+}
+
+void
+nca_lock (void *lock)
+{
+ pthread_mutex_lock (lock);
+}
+
+void
+nca_unlock (void *lock)
+{
+ pthread_mutex_unlock (lock);
+}
+
+void
+nca_collsub_reduce_array (gfc_array_char *desc, void (*assign_function) (void *, void *),
+ int *result_image)
+{
+ collsub_reduce_array (&local->ci, desc, result_image, assign_function);
+}
+
+void
+nca_collsub_reduce_scalar (void *obj, index_type elem_size,
+ void (*assign_function) (void *, void *),
+ int *result_image)
+{
+ collsub_reduce_scalar (&local->ci, obj, elem_size, result_image, assign_function);
+}
+
+void
+nca_collsub_broadcast_array (gfc_array_char * restrict a, int source_image
+ /* , int *stat __attribute__ ((unused)),
+ char *errmsg __attribute__ ((unused)),
+ size_t errmsg_len __attribute__ ((unused))*/)
+{
+ collsub_broadcast_array (&local->ci, a, source_image - 1);
+}
+
+void
+nca_collsub_broadcast_scalar (void * restrict obj, size_t size, int source_image/*,
+ int *stat __attribute__((unused)),
+ char *errmsg __attribute__ ((unused)),
+ size_t errmsg_len __attribute__ ((unused))*/)
+{
+ collsub_broadcast_scalar (&local->ci, obj, size, source_image - 1);
+}