aboutsummaryrefslogtreecommitdiff
path: root/libgfortran/generated
diff options
context:
space:
mode:
authorI'm not telling you my name, idiot <git_is@stupid.com>2020-09-23 18:14:20 +0200
committerI'm not telling you my name, idiot <git_is@stupid.com>2020-09-23 18:14:20 +0200
commitbef0a39f2f8e87780f990d12fa71f5ed0039267a (patch)
treece8138e924d9bfff3ce0b63aa4c29397fbfaa1a5 /libgfortran/generated
parent9044db88d634c631920eaa9f66c0275adf18fdf5 (diff)
downloadgcc-bef0a39f2f8e87780f990d12fa71f5ed0039267a.zip
gcc-bef0a39f2f8e87780f990d12fa71f5ed0039267a.tar.gz
gcc-bef0a39f2f8e87780f990d12fa71f5ed0039267a.tar.bz2
Initial commit of coarray_native branch.
Diffstat (limited to 'libgfortran/generated')
-rw-r--r--libgfortran/generated/nca_minmax_i1.c653
-rw-r--r--libgfortran/generated/nca_minmax_i16.c653
-rw-r--r--libgfortran/generated/nca_minmax_i2.c653
-rw-r--r--libgfortran/generated/nca_minmax_i4.c653
-rw-r--r--libgfortran/generated/nca_minmax_i8.c653
-rw-r--r--libgfortran/generated/nca_minmax_r10.c653
-rw-r--r--libgfortran/generated/nca_minmax_r16.c653
-rw-r--r--libgfortran/generated/nca_minmax_r4.c653
-rw-r--r--libgfortran/generated/nca_minmax_r8.c653
-rw-r--r--libgfortran/generated/nca_minmax_s1.c494
-rw-r--r--libgfortran/generated/nca_minmax_s4.c494
11 files changed, 6865 insertions, 0 deletions
diff --git a/libgfortran/generated/nca_minmax_i1.c b/libgfortran/generated/nca_minmax_i1.c
new file mode 100644
index 0000000..3bc9a2b
--- /dev/null
+++ b/libgfortran/generated/nca_minmax_i1.c
@@ -0,0 +1,653 @@
+/* Implementation of collective subroutines minmax.
+ Copyright (C) 2020 Free Software Foundation, Inc.
+ Contributed by Thomas Koenig <tkoenig@gcc.gnu.org>.
+
+This file is part of the GNU Fortran runtime library (libgfortran).
+
+Libgfortran is free software; you can redistribute it and/or
+modify it under the terms of the GNU General Public
+License as published by the Free Software Foundation; either
+version 3 of the License, or (at your option) any later version.
+
+Libgfortran is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+<http://www.gnu.org/licenses/>. */
+
+
+#include "libgfortran.h"
+
+#if defined (HAVE_GFC_INTEGER_1)
+#include <string.h>
+#include "../nca/libcoarraynative.h"
+#include "../nca/collective_subroutine.h"
+#include "../nca/collective_inline.h"
+
+void nca_collsub_max_scalar_i1 (GFC_INTEGER_1 *obj, int *result_image,
+ int *stat, char *errmsg, index_type errmsg_len);
+export_proto(nca_collsub_max_scalar_i1);
+
+void
+nca_collsub_max_scalar_i1 (GFC_INTEGER_1 *obj, int *result_image,
+ int *stat __attribute__ ((unused)),
+ char *errmsg __attribute__ ((unused)),
+ index_type errmsg_len __attribute__ ((unused)))
+{
+ int cbit = 0;
+ int imoffset;
+ GFC_INTEGER_1 *a, *b;
+ GFC_INTEGER_1 *buffer, *this_image_buf;
+ collsub_iface *ci;
+
+ ci = &local->ci;
+
+ buffer = get_collsub_buf (ci, sizeof(GFC_INTEGER_1) * local->num_images);
+ this_image_buf = buffer + this_image.image_num;
+ *this_image_buf = *obj;
+
+ collsub_sync (ci);
+ for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++)
+ {
+ imoffset = 1 << cbit;
+ if (this_image.image_num + imoffset < local->num_images)
+ {
+ a = this_image_buf;
+ b = this_image_buf + imoffset;
+ if (*b > *a)
+ *a = *b;
+ }
+ collsub_sync (ci);
+ }
+ for ( ; (local->num_images >> cbit) != 0; cbit++)
+ collsub_sync (ci);
+
+ if (!result_image || (*result_image - 1) == this_image.image_num)
+ *obj = *buffer;
+
+ finish_collective_subroutine (ci);
+
+}
+
+void nca_collsub_min_scalar_i1 (GFC_INTEGER_1 *obj, int *result_image,
+ int *stat, char *errmsg, index_type errmsg_len);
+export_proto(nca_collsub_min_scalar_i1);
+
+void
+nca_collsub_min_scalar_i1 (GFC_INTEGER_1 *obj, int *result_image,
+ int *stat __attribute__ ((unused)),
+ char *errmsg __attribute__ ((unused)),
+ index_type errmsg_len __attribute__ ((unused)))
+{
+ int cbit = 0;
+ int imoffset;
+ GFC_INTEGER_1 *a, *b;
+ GFC_INTEGER_1 *buffer, *this_image_buf;
+ collsub_iface *ci;
+
+ ci = &local->ci;
+
+ buffer = get_collsub_buf (ci, sizeof(GFC_INTEGER_1) * local->num_images);
+ this_image_buf = buffer + this_image.image_num;
+ *this_image_buf = *obj;
+
+ collsub_sync (ci);
+ for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++)
+ {
+ imoffset = 1 << cbit;
+ if (this_image.image_num + imoffset < local->num_images)
+ {
+ a = this_image_buf;
+ b = this_image_buf + imoffset;
+ if (*b < *a)
+ *a = *b;
+ }
+ collsub_sync (ci);
+ }
+ for ( ; (local->num_images >> cbit) != 0; cbit++)
+ collsub_sync (ci);
+
+ if (!result_image || (*result_image - 1) == this_image.image_num)
+ *obj = *buffer;
+
+ finish_collective_subroutine (ci);
+
+}
+
+void nca_collsub_sum_scalar_i1 (GFC_INTEGER_1 *obj, int *result_image,
+ int *stat, char *errmsg, index_type errmsg_len);
+export_proto(nca_collsub_sum_scalar_i1);
+
+void
+nca_collsub_sum_scalar_i1 (GFC_INTEGER_1 *obj, int *result_image,
+ int *stat __attribute__ ((unused)),
+ char *errmsg __attribute__ ((unused)),
+ index_type errmsg_len __attribute__ ((unused)))
+{
+ int cbit = 0;
+ int imoffset;
+ GFC_INTEGER_1 *a, *b;
+ GFC_INTEGER_1 *buffer, *this_image_buf;
+ collsub_iface *ci;
+
+ ci = &local->ci;
+
+ buffer = get_collsub_buf (ci, sizeof(GFC_INTEGER_1) * local->num_images);
+ this_image_buf = buffer + this_image.image_num;
+ *this_image_buf = *obj;
+
+ collsub_sync (ci);
+ for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++)
+ {
+ imoffset = 1 << cbit;
+ if (this_image.image_num + imoffset < local->num_images)
+ {
+ a = this_image_buf;
+ b = this_image_buf + imoffset;
+ *a += *b;
+ }
+ collsub_sync (ci);
+ }
+ for ( ; (local->num_images >> cbit) != 0; cbit++)
+ collsub_sync (ci);
+
+ if (!result_image || (*result_image - 1) == this_image.image_num)
+ *obj = *buffer;
+
+ finish_collective_subroutine (ci);
+
+}
+
+void nca_collsub_max_array_i1 (gfc_array_i1 * restrict array, int *result_image,
+ int *stat, char *errmsg, index_type errmsg_len);
+export_proto (nca_collsub_max_array_i1);
+
+void
+nca_collsub_max_array_i1 (gfc_array_i1 * restrict array, int *result_image,
+ int *stat __attribute__ ((unused)),
+ char *errmsg __attribute__ ((unused)),
+ index_type errmsg_len __attribute__ ((unused)))
+{
+ index_type count[GFC_MAX_DIMENSIONS];
+ index_type stride[GFC_MAX_DIMENSIONS];
+ index_type extent[GFC_MAX_DIMENSIONS];
+ GFC_INTEGER_1 *this_shared_ptr; /* Points to the shared memory allocated to this image. */
+ GFC_INTEGER_1 *buffer;
+ index_type dim;
+ bool packed;
+ index_type span;
+ index_type ssize, num_elems;
+ int cbit = 0;
+ int imoffset;
+ collsub_iface *ci;
+
+ ci = &local->ci;
+
+ dim = GFC_DESCRIPTOR_RANK (array);
+ ssize = sizeof (GFC_INTEGER_1);
+ packed = true;
+ span = array->span != 0 ? array->span : (index_type) sizeof (GFC_INTEGER_1);
+ for (index_type n = 0; n < dim; n++)
+ {
+ count[n] = 0;
+ stride[n] = GFC_DESCRIPTOR_STRIDE (array, n) * span;
+ extent[n] = GFC_DESCRIPTOR_EXTENT (array, n);
+
+ /* No-op for an empty array. */
+ if (extent[n] <= 0)
+ return;
+
+ if (ssize != stride[n])
+ packed = false;
+
+ ssize *= extent[n];
+ }
+
+ num_elems = ssize / sizeof (GFC_INTEGER_1);
+
+ buffer = get_collsub_buf (ci, ssize * local->num_images);
+ this_shared_ptr = buffer + this_image.image_num * num_elems;
+
+ if (packed)
+ memcpy (this_shared_ptr, array->base_addr, ssize);
+ else
+ {
+ char *src = (char *) array->base_addr;
+ GFC_INTEGER_1 *restrict dest = this_shared_ptr;
+ index_type stride0 = stride[0];
+
+ while (src)
+ {
+ /* Copy the data. */
+ *(dest++) = *((GFC_INTEGER_1 *) src);
+ src += stride0;
+ count[0] ++;
+ /* Advance to the next source element. */
+ for (index_type n = 0; count[n] == extent[n] ; )
+ {
+ /* When we get to the end of a dimension, reset it and increment
+ the next dimension. */
+ count[n] = 0;
+ src -= stride[n] * extent[n];
+ n++;
+ if (n == dim)
+ {
+ src = NULL;
+ break;
+ }
+ else
+ {
+ count[n]++;
+ src += stride[n];
+ }
+ }
+ }
+ }
+
+ collsub_sync (ci);
+
+ /* Reduce the array to image zero. Here the general scheme:
+
+ abababababab
+ a_b_a_b_a_b_
+ a___b___a___
+ a_______b___
+ r___________
+ */
+ for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++)
+ {
+ imoffset = 1 << cbit;
+ if (this_image.image_num + imoffset < local->num_images)
+ {
+ GFC_INTEGER_1 * other_shared_ptr; /* Points to the shared memory
+ allocated to another image. */
+ GFC_INTEGER_1 *a;
+ GFC_INTEGER_1 *b;
+
+ other_shared_ptr = this_shared_ptr + num_elems * imoffset;
+ for (index_type i = 0; i < num_elems; i++)
+ {
+ a = this_shared_ptr + i;
+ b = other_shared_ptr + i;
+ if (*b > *a)
+ *a = *b;
+ }
+ }
+ collsub_sync (ci);
+ }
+ for ( ; (local->num_images >> cbit) != 0; cbit++)
+ collsub_sync (ci);
+
+ if (!result_image || (*result_image - 1) == this_image.image_num)
+ {
+ if (packed)
+ memcpy (array->base_addr, buffer, ssize);
+ else
+ {
+ GFC_INTEGER_1 *src = buffer;
+ char * restrict dest = (char *) array->base_addr;
+ index_type stride0 = stride[0];
+
+ for (index_type n = 0; n < dim; n++)
+ count[n] = 0;
+
+ while (dest)
+ {
+ *((GFC_INTEGER_1 * ) dest) = *src++;
+ dest += stride0;
+ count[0] ++;
+ for (index_type n = 0; count[n] == extent[n] ;)
+ {
+ /* When we get to the end of a dimension, reset it and increment
+ the next dimension. */
+ count[n] = 0;
+ dest -= stride[n] * extent[n];
+ n++;
+ if (n == dim)
+ {
+ dest = NULL;
+ break;
+ }
+ else
+ {
+ count[n]++;
+ dest += stride[n];
+ }
+ }
+ }
+ }
+ }
+ finish_collective_subroutine (ci);
+}
+void nca_collsub_min_array_i1 (gfc_array_i1 * restrict array, int *result_image,
+ int *stat, char *errmsg, index_type errmsg_len);
+export_proto (nca_collsub_min_array_i1);
+
+void
+nca_collsub_min_array_i1 (gfc_array_i1 * restrict array, int *result_image,
+ int *stat __attribute__ ((unused)),
+ char *errmsg __attribute__ ((unused)),
+ index_type errmsg_len __attribute__ ((unused)))
+{
+ index_type count[GFC_MAX_DIMENSIONS];
+ index_type stride[GFC_MAX_DIMENSIONS];
+ index_type extent[GFC_MAX_DIMENSIONS];
+ GFC_INTEGER_1 *this_shared_ptr; /* Points to the shared memory allocated to this image. */
+ GFC_INTEGER_1 *buffer;
+ index_type dim;
+ bool packed;
+ index_type span;
+ index_type ssize, num_elems;
+ int cbit = 0;
+ int imoffset;
+ collsub_iface *ci;
+
+ ci = &local->ci;
+
+ dim = GFC_DESCRIPTOR_RANK (array);
+ ssize = sizeof (GFC_INTEGER_1);
+ packed = true;
+ span = array->span != 0 ? array->span : (index_type) sizeof (GFC_INTEGER_1);
+ for (index_type n = 0; n < dim; n++)
+ {
+ count[n] = 0;
+ stride[n] = GFC_DESCRIPTOR_STRIDE (array, n) * span;
+ extent[n] = GFC_DESCRIPTOR_EXTENT (array, n);
+
+ /* No-op for an empty array. */
+ if (extent[n] <= 0)
+ return;
+
+ if (ssize != stride[n])
+ packed = false;
+
+ ssize *= extent[n];
+ }
+
+ num_elems = ssize / sizeof (GFC_INTEGER_1);
+
+ buffer = get_collsub_buf (ci, ssize * local->num_images);
+ this_shared_ptr = buffer + this_image.image_num * num_elems;
+
+ if (packed)
+ memcpy (this_shared_ptr, array->base_addr, ssize);
+ else
+ {
+ char *src = (char *) array->base_addr;
+ GFC_INTEGER_1 *restrict dest = this_shared_ptr;
+ index_type stride0 = stride[0];
+
+ while (src)
+ {
+ /* Copy the data. */
+ *(dest++) = *((GFC_INTEGER_1 *) src);
+ src += stride0;
+ count[0] ++;
+ /* Advance to the next source element. */
+ for (index_type n = 0; count[n] == extent[n] ; )
+ {
+ /* When we get to the end of a dimension, reset it and increment
+ the next dimension. */
+ count[n] = 0;
+ src -= stride[n] * extent[n];
+ n++;
+ if (n == dim)
+ {
+ src = NULL;
+ break;
+ }
+ else
+ {
+ count[n]++;
+ src += stride[n];
+ }
+ }
+ }
+ }
+
+ collsub_sync (ci);
+
+ /* Reduce the array to image zero. Here the general scheme:
+
+ abababababab
+ a_b_a_b_a_b_
+ a___b___a___
+ a_______b___
+ r___________
+ */
+ for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++)
+ {
+ imoffset = 1 << cbit;
+ if (this_image.image_num + imoffset < local->num_images)
+ {
+ GFC_INTEGER_1 * other_shared_ptr; /* Points to the shared memory
+ allocated to another image. */
+ GFC_INTEGER_1 *a;
+ GFC_INTEGER_1 *b;
+
+ other_shared_ptr = this_shared_ptr + num_elems * imoffset;
+ for (index_type i = 0; i < num_elems; i++)
+ {
+ a = this_shared_ptr + i;
+ b = other_shared_ptr + i;
+ if (*b < *a)
+ *a = *b;
+ }
+ }
+ collsub_sync (ci);
+ }
+ for ( ; (local->num_images >> cbit) != 0; cbit++)
+ collsub_sync (ci);
+
+ if (!result_image || (*result_image - 1) == this_image.image_num)
+ {
+ if (packed)
+ memcpy (array->base_addr, buffer, ssize);
+ else
+ {
+ GFC_INTEGER_1 *src = buffer;
+ char * restrict dest = (char *) array->base_addr;
+ index_type stride0 = stride[0];
+
+ for (index_type n = 0; n < dim; n++)
+ count[n] = 0;
+
+ while (dest)
+ {
+ *((GFC_INTEGER_1 * ) dest) = *src++;
+ dest += stride0;
+ count[0] ++;
+ for (index_type n = 0; count[n] == extent[n] ;)
+ {
+ /* When we get to the end of a dimension, reset it and increment
+ the next dimension. */
+ count[n] = 0;
+ dest -= stride[n] * extent[n];
+ n++;
+ if (n == dim)
+ {
+ dest = NULL;
+ break;
+ }
+ else
+ {
+ count[n]++;
+ dest += stride[n];
+ }
+ }
+ }
+ }
+ }
+ finish_collective_subroutine (ci);
+}
+void nca_collsub_sum_array_i1 (gfc_array_i1 * restrict array, int *result_image,
+ int *stat, char *errmsg, index_type errmsg_len);
+export_proto (nca_collsub_sum_array_i1);
+
+void
+nca_collsub_sum_array_i1 (gfc_array_i1 * restrict array, int *result_image,
+ int *stat __attribute__ ((unused)),
+ char *errmsg __attribute__ ((unused)),
+ index_type errmsg_len __attribute__ ((unused)))
+{
+ index_type count[GFC_MAX_DIMENSIONS];
+ index_type stride[GFC_MAX_DIMENSIONS];
+ index_type extent[GFC_MAX_DIMENSIONS];
+ GFC_INTEGER_1 *this_shared_ptr; /* Points to the shared memory allocated to this image. */
+ GFC_INTEGER_1 *buffer;
+ index_type dim;
+ bool packed;
+ index_type span;
+ index_type ssize, num_elems;
+ int cbit = 0;
+ int imoffset;
+ collsub_iface *ci;
+
+ ci = &local->ci;
+
+ dim = GFC_DESCRIPTOR_RANK (array);
+ ssize = sizeof (GFC_INTEGER_1);
+ packed = true;
+ span = array->span != 0 ? array->span : (index_type) sizeof (GFC_INTEGER_1);
+ for (index_type n = 0; n < dim; n++)
+ {
+ count[n] = 0;
+ stride[n] = GFC_DESCRIPTOR_STRIDE (array, n) * span;
+ extent[n] = GFC_DESCRIPTOR_EXTENT (array, n);
+
+ /* No-op for an empty array. */
+ if (extent[n] <= 0)
+ return;
+
+ if (ssize != stride[n])
+ packed = false;
+
+ ssize *= extent[n];
+ }
+
+ num_elems = ssize / sizeof (GFC_INTEGER_1);
+
+ buffer = get_collsub_buf (ci, ssize * local->num_images);
+ this_shared_ptr = buffer + this_image.image_num * num_elems;
+
+ if (packed)
+ memcpy (this_shared_ptr, array->base_addr, ssize);
+ else
+ {
+ char *src = (char *) array->base_addr;
+ GFC_INTEGER_1 *restrict dest = this_shared_ptr;
+ index_type stride0 = stride[0];
+
+ while (src)
+ {
+ /* Copy the data. */
+ *(dest++) = *((GFC_INTEGER_1 *) src);
+ src += stride0;
+ count[0] ++;
+ /* Advance to the next source element. */
+ for (index_type n = 0; count[n] == extent[n] ; )
+ {
+ /* When we get to the end of a dimension, reset it and increment
+ the next dimension. */
+ count[n] = 0;
+ src -= stride[n] * extent[n];
+ n++;
+ if (n == dim)
+ {
+ src = NULL;
+ break;
+ }
+ else
+ {
+ count[n]++;
+ src += stride[n];
+ }
+ }
+ }
+ }
+
+ collsub_sync (ci);
+
+ /* Reduce the array to image zero. Here the general scheme:
+
+ abababababab
+ a_b_a_b_a_b_
+ a___b___a___
+ a_______b___
+ r___________
+ */
+ for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++)
+ {
+ imoffset = 1 << cbit;
+ if (this_image.image_num + imoffset < local->num_images)
+ {
+ GFC_INTEGER_1 * other_shared_ptr; /* Points to the shared memory
+ allocated to another image. */
+ GFC_INTEGER_1 *a;
+ GFC_INTEGER_1 *b;
+
+ other_shared_ptr = this_shared_ptr + num_elems * imoffset;
+ for (index_type i = 0; i < num_elems; i++)
+ {
+ a = this_shared_ptr + i;
+ b = other_shared_ptr + i;
+ *a += *b;
+ }
+ }
+ collsub_sync (ci);
+ }
+ for ( ; (local->num_images >> cbit) != 0; cbit++)
+ collsub_sync (ci);
+
+ if (!result_image || (*result_image - 1) == this_image.image_num)
+ {
+ if (packed)
+ memcpy (array->base_addr, buffer, ssize);
+ else
+ {
+ GFC_INTEGER_1 *src = buffer;
+ char * restrict dest = (char *) array->base_addr;
+ index_type stride0 = stride[0];
+
+ for (index_type n = 0; n < dim; n++)
+ count[n] = 0;
+
+ while (dest)
+ {
+ *((GFC_INTEGER_1 * ) dest) = *src++;
+ dest += stride0;
+ count[0] ++;
+ for (index_type n = 0; count[n] == extent[n] ;)
+ {
+ /* When we get to the end of a dimension, reset it and increment
+ the next dimension. */
+ count[n] = 0;
+ dest -= stride[n] * extent[n];
+ n++;
+ if (n == dim)
+ {
+ dest = NULL;
+ break;
+ }
+ else
+ {
+ count[n]++;
+ dest += stride[n];
+ }
+ }
+ }
+ }
+ }
+ finish_collective_subroutine (ci);
+}
+
+#endif
+
diff --git a/libgfortran/generated/nca_minmax_i16.c b/libgfortran/generated/nca_minmax_i16.c
new file mode 100644
index 0000000..8fbb948
--- /dev/null
+++ b/libgfortran/generated/nca_minmax_i16.c
@@ -0,0 +1,653 @@
+/* Implementation of collective subroutines minmax.
+ Copyright (C) 2020 Free Software Foundation, Inc.
+ Contributed by Thomas Koenig <tkoenig@gcc.gnu.org>.
+
+This file is part of the GNU Fortran runtime library (libgfortran).
+
+Libgfortran is free software; you can redistribute it and/or
+modify it under the terms of the GNU General Public
+License as published by the Free Software Foundation; either
+version 3 of the License, or (at your option) any later version.
+
+Libgfortran is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+<http://www.gnu.org/licenses/>. */
+
+
+#include "libgfortran.h"
+
+#if defined (HAVE_GFC_INTEGER_16)
+#include <string.h>
+#include "../nca/libcoarraynative.h"
+#include "../nca/collective_subroutine.h"
+#include "../nca/collective_inline.h"
+
+void nca_collsub_max_scalar_i16 (GFC_INTEGER_16 *obj, int *result_image,
+ int *stat, char *errmsg, index_type errmsg_len);
+export_proto(nca_collsub_max_scalar_i16);
+
+void
+nca_collsub_max_scalar_i16 (GFC_INTEGER_16 *obj, int *result_image,
+ int *stat __attribute__ ((unused)),
+ char *errmsg __attribute__ ((unused)),
+ index_type errmsg_len __attribute__ ((unused)))
+{
+ int cbit = 0;
+ int imoffset;
+ GFC_INTEGER_16 *a, *b;
+ GFC_INTEGER_16 *buffer, *this_image_buf;
+ collsub_iface *ci;
+
+ ci = &local->ci;
+
+ buffer = get_collsub_buf (ci, sizeof(GFC_INTEGER_16) * local->num_images);
+ this_image_buf = buffer + this_image.image_num;
+ *this_image_buf = *obj;
+
+ collsub_sync (ci);
+ for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++)
+ {
+ imoffset = 1 << cbit;
+ if (this_image.image_num + imoffset < local->num_images)
+ {
+ a = this_image_buf;
+ b = this_image_buf + imoffset;
+ if (*b > *a)
+ *a = *b;
+ }
+ collsub_sync (ci);
+ }
+ for ( ; (local->num_images >> cbit) != 0; cbit++)
+ collsub_sync (ci);
+
+ if (!result_image || (*result_image - 1) == this_image.image_num)
+ *obj = *buffer;
+
+ finish_collective_subroutine (ci);
+
+}
+
+void nca_collsub_min_scalar_i16 (GFC_INTEGER_16 *obj, int *result_image,
+ int *stat, char *errmsg, index_type errmsg_len);
+export_proto(nca_collsub_min_scalar_i16);
+
+void
+nca_collsub_min_scalar_i16 (GFC_INTEGER_16 *obj, int *result_image,
+ int *stat __attribute__ ((unused)),
+ char *errmsg __attribute__ ((unused)),
+ index_type errmsg_len __attribute__ ((unused)))
+{
+ int cbit = 0;
+ int imoffset;
+ GFC_INTEGER_16 *a, *b;
+ GFC_INTEGER_16 *buffer, *this_image_buf;
+ collsub_iface *ci;
+
+ ci = &local->ci;
+
+ buffer = get_collsub_buf (ci, sizeof(GFC_INTEGER_16) * local->num_images);
+ this_image_buf = buffer + this_image.image_num;
+ *this_image_buf = *obj;
+
+ collsub_sync (ci);
+ for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++)
+ {
+ imoffset = 1 << cbit;
+ if (this_image.image_num + imoffset < local->num_images)
+ {
+ a = this_image_buf;
+ b = this_image_buf + imoffset;
+ if (*b < *a)
+ *a = *b;
+ }
+ collsub_sync (ci);
+ }
+ for ( ; (local->num_images >> cbit) != 0; cbit++)
+ collsub_sync (ci);
+
+ if (!result_image || (*result_image - 1) == this_image.image_num)
+ *obj = *buffer;
+
+ finish_collective_subroutine (ci);
+
+}
+
+void nca_collsub_sum_scalar_i16 (GFC_INTEGER_16 *obj, int *result_image,
+ int *stat, char *errmsg, index_type errmsg_len);
+export_proto(nca_collsub_sum_scalar_i16);
+
+void
+nca_collsub_sum_scalar_i16 (GFC_INTEGER_16 *obj, int *result_image,
+ int *stat __attribute__ ((unused)),
+ char *errmsg __attribute__ ((unused)),
+ index_type errmsg_len __attribute__ ((unused)))
+{
+ int cbit = 0;
+ int imoffset;
+ GFC_INTEGER_16 *a, *b;
+ GFC_INTEGER_16 *buffer, *this_image_buf;
+ collsub_iface *ci;
+
+ ci = &local->ci;
+
+ buffer = get_collsub_buf (ci, sizeof(GFC_INTEGER_16) * local->num_images);
+ this_image_buf = buffer + this_image.image_num;
+ *this_image_buf = *obj;
+
+ collsub_sync (ci);
+ for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++)
+ {
+ imoffset = 1 << cbit;
+ if (this_image.image_num + imoffset < local->num_images)
+ {
+ a = this_image_buf;
+ b = this_image_buf + imoffset;
+ *a += *b;
+ }
+ collsub_sync (ci);
+ }
+ for ( ; (local->num_images >> cbit) != 0; cbit++)
+ collsub_sync (ci);
+
+ if (!result_image || (*result_image - 1) == this_image.image_num)
+ *obj = *buffer;
+
+ finish_collective_subroutine (ci);
+
+}
+
+void nca_collsub_max_array_i16 (gfc_array_i16 * restrict array, int *result_image,
+ int *stat, char *errmsg, index_type errmsg_len);
+export_proto (nca_collsub_max_array_i16);
+
+void
+nca_collsub_max_array_i16 (gfc_array_i16 * restrict array, int *result_image,
+ int *stat __attribute__ ((unused)),
+ char *errmsg __attribute__ ((unused)),
+ index_type errmsg_len __attribute__ ((unused)))
+{
+ index_type count[GFC_MAX_DIMENSIONS];
+ index_type stride[GFC_MAX_DIMENSIONS];
+ index_type extent[GFC_MAX_DIMENSIONS];
+ GFC_INTEGER_16 *this_shared_ptr; /* Points to the shared memory allocated to this image. */
+ GFC_INTEGER_16 *buffer;
+ index_type dim;
+ bool packed;
+ index_type span;
+ index_type ssize, num_elems;
+ int cbit = 0;
+ int imoffset;
+ collsub_iface *ci;
+
+ ci = &local->ci;
+
+ dim = GFC_DESCRIPTOR_RANK (array);
+ ssize = sizeof (GFC_INTEGER_16);
+ packed = true;
+ span = array->span != 0 ? array->span : (index_type) sizeof (GFC_INTEGER_16);
+ for (index_type n = 0; n < dim; n++)
+ {
+ count[n] = 0;
+ stride[n] = GFC_DESCRIPTOR_STRIDE (array, n) * span;
+ extent[n] = GFC_DESCRIPTOR_EXTENT (array, n);
+
+ /* No-op for an empty array. */
+ if (extent[n] <= 0)
+ return;
+
+ if (ssize != stride[n])
+ packed = false;
+
+ ssize *= extent[n];
+ }
+
+ num_elems = ssize / sizeof (GFC_INTEGER_16);
+
+ buffer = get_collsub_buf (ci, ssize * local->num_images);
+ this_shared_ptr = buffer + this_image.image_num * num_elems;
+
+ if (packed)
+ memcpy (this_shared_ptr, array->base_addr, ssize);
+ else
+ {
+ char *src = (char *) array->base_addr;
+ GFC_INTEGER_16 *restrict dest = this_shared_ptr;
+ index_type stride0 = stride[0];
+
+ while (src)
+ {
+ /* Copy the data. */
+ *(dest++) = *((GFC_INTEGER_16 *) src);
+ src += stride0;
+ count[0] ++;
+ /* Advance to the next source element. */
+ for (index_type n = 0; count[n] == extent[n] ; )
+ {
+ /* When we get to the end of a dimension, reset it and increment
+ the next dimension. */
+ count[n] = 0;
+ src -= stride[n] * extent[n];
+ n++;
+ if (n == dim)
+ {
+ src = NULL;
+ break;
+ }
+ else
+ {
+ count[n]++;
+ src += stride[n];
+ }
+ }
+ }
+ }
+
+ collsub_sync (ci);
+
+ /* Reduce the array to image zero. Here the general scheme:
+
+ abababababab
+ a_b_a_b_a_b_
+ a___b___a___
+ a_______b___
+ r___________
+ */
+ for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++)
+ {
+ imoffset = 1 << cbit;
+ if (this_image.image_num + imoffset < local->num_images)
+ {
+ GFC_INTEGER_16 * other_shared_ptr; /* Points to the shared memory
+ allocated to another image. */
+ GFC_INTEGER_16 *a;
+ GFC_INTEGER_16 *b;
+
+ other_shared_ptr = this_shared_ptr + num_elems * imoffset;
+ for (index_type i = 0; i < num_elems; i++)
+ {
+ a = this_shared_ptr + i;
+ b = other_shared_ptr + i;
+ if (*b > *a)
+ *a = *b;
+ }
+ }
+ collsub_sync (ci);
+ }
+ for ( ; (local->num_images >> cbit) != 0; cbit++)
+ collsub_sync (ci);
+
+ if (!result_image || (*result_image - 1) == this_image.image_num)
+ {
+ if (packed)
+ memcpy (array->base_addr, buffer, ssize);
+ else
+ {
+ GFC_INTEGER_16 *src = buffer;
+ char * restrict dest = (char *) array->base_addr;
+ index_type stride0 = stride[0];
+
+ for (index_type n = 0; n < dim; n++)
+ count[n] = 0;
+
+ while (dest)
+ {
+ *((GFC_INTEGER_16 * ) dest) = *src++;
+ dest += stride0;
+ count[0] ++;
+ for (index_type n = 0; count[n] == extent[n] ;)
+ {
+ /* When we get to the end of a dimension, reset it and increment
+ the next dimension. */
+ count[n] = 0;
+ dest -= stride[n] * extent[n];
+ n++;
+ if (n == dim)
+ {
+ dest = NULL;
+ break;
+ }
+ else
+ {
+ count[n]++;
+ dest += stride[n];
+ }
+ }
+ }
+ }
+ }
+ finish_collective_subroutine (ci);
+}
+void nca_collsub_min_array_i16 (gfc_array_i16 * restrict array, int *result_image,
+ int *stat, char *errmsg, index_type errmsg_len);
+export_proto (nca_collsub_min_array_i16);
+
+void
+nca_collsub_min_array_i16 (gfc_array_i16 * restrict array, int *result_image,
+ int *stat __attribute__ ((unused)),
+ char *errmsg __attribute__ ((unused)),
+ index_type errmsg_len __attribute__ ((unused)))
+{
+ index_type count[GFC_MAX_DIMENSIONS];
+ index_type stride[GFC_MAX_DIMENSIONS];
+ index_type extent[GFC_MAX_DIMENSIONS];
+ GFC_INTEGER_16 *this_shared_ptr; /* Points to the shared memory allocated to this image. */
+ GFC_INTEGER_16 *buffer;
+ index_type dim;
+ bool packed;
+ index_type span;
+ index_type ssize, num_elems;
+ int cbit = 0;
+ int imoffset;
+ collsub_iface *ci;
+
+ ci = &local->ci;
+
+ dim = GFC_DESCRIPTOR_RANK (array);
+ ssize = sizeof (GFC_INTEGER_16);
+ packed = true;
+ span = array->span != 0 ? array->span : (index_type) sizeof (GFC_INTEGER_16);
+ for (index_type n = 0; n < dim; n++)
+ {
+ count[n] = 0;
+ stride[n] = GFC_DESCRIPTOR_STRIDE (array, n) * span;
+ extent[n] = GFC_DESCRIPTOR_EXTENT (array, n);
+
+ /* No-op for an empty array. */
+ if (extent[n] <= 0)
+ return;
+
+ if (ssize != stride[n])
+ packed = false;
+
+ ssize *= extent[n];
+ }
+
+ num_elems = ssize / sizeof (GFC_INTEGER_16);
+
+ buffer = get_collsub_buf (ci, ssize * local->num_images);
+ this_shared_ptr = buffer + this_image.image_num * num_elems;
+
+ if (packed)
+ memcpy (this_shared_ptr, array->base_addr, ssize);
+ else
+ {
+ char *src = (char *) array->base_addr;
+ GFC_INTEGER_16 *restrict dest = this_shared_ptr;
+ index_type stride0 = stride[0];
+
+ while (src)
+ {
+ /* Copy the data. */
+ *(dest++) = *((GFC_INTEGER_16 *) src);
+ src += stride0;
+ count[0] ++;
+ /* Advance to the next source element. */
+ for (index_type n = 0; count[n] == extent[n] ; )
+ {
+ /* When we get to the end of a dimension, reset it and increment
+ the next dimension. */
+ count[n] = 0;
+ src -= stride[n] * extent[n];
+ n++;
+ if (n == dim)
+ {
+ src = NULL;
+ break;
+ }
+ else
+ {
+ count[n]++;
+ src += stride[n];
+ }
+ }
+ }
+ }
+
+ collsub_sync (ci);
+
+ /* Reduce the array to image zero. Here the general scheme:
+
+ abababababab
+ a_b_a_b_a_b_
+ a___b___a___
+ a_______b___
+ r___________
+ */
+ for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++)
+ {
+ imoffset = 1 << cbit;
+ if (this_image.image_num + imoffset < local->num_images)
+ {
+ GFC_INTEGER_16 * other_shared_ptr; /* Points to the shared memory
+ allocated to another image. */
+ GFC_INTEGER_16 *a;
+ GFC_INTEGER_16 *b;
+
+ other_shared_ptr = this_shared_ptr + num_elems * imoffset;
+ for (index_type i = 0; i < num_elems; i++)
+ {
+ a = this_shared_ptr + i;
+ b = other_shared_ptr + i;
+ if (*b < *a)
+ *a = *b;
+ }
+ }
+ collsub_sync (ci);
+ }
+ for ( ; (local->num_images >> cbit) != 0; cbit++)
+ collsub_sync (ci);
+
+ if (!result_image || (*result_image - 1) == this_image.image_num)
+ {
+ if (packed)
+ memcpy (array->base_addr, buffer, ssize);
+ else
+ {
+ GFC_INTEGER_16 *src = buffer;
+ char * restrict dest = (char *) array->base_addr;
+ index_type stride0 = stride[0];
+
+ for (index_type n = 0; n < dim; n++)
+ count[n] = 0;
+
+ while (dest)
+ {
+ *((GFC_INTEGER_16 * ) dest) = *src++;
+ dest += stride0;
+ count[0] ++;
+ for (index_type n = 0; count[n] == extent[n] ;)
+ {
+ /* When we get to the end of a dimension, reset it and increment
+ the next dimension. */
+ count[n] = 0;
+ dest -= stride[n] * extent[n];
+ n++;
+ if (n == dim)
+ {
+ dest = NULL;
+ break;
+ }
+ else
+ {
+ count[n]++;
+ dest += stride[n];
+ }
+ }
+ }
+ }
+ }
+ finish_collective_subroutine (ci);
+}
+void nca_collsub_sum_array_i16 (gfc_array_i16 * restrict array, int *result_image,
+ int *stat, char *errmsg, index_type errmsg_len);
+export_proto (nca_collsub_sum_array_i16);
+
+void
+nca_collsub_sum_array_i16 (gfc_array_i16 * restrict array, int *result_image,
+ int *stat __attribute__ ((unused)),
+ char *errmsg __attribute__ ((unused)),
+ index_type errmsg_len __attribute__ ((unused)))
+{
+ index_type count[GFC_MAX_DIMENSIONS];
+ index_type stride[GFC_MAX_DIMENSIONS];
+ index_type extent[GFC_MAX_DIMENSIONS];
+ GFC_INTEGER_16 *this_shared_ptr; /* Points to the shared memory allocated to this image. */
+ GFC_INTEGER_16 *buffer;
+ index_type dim;
+ bool packed;
+ index_type span;
+ index_type ssize, num_elems;
+ int cbit = 0;
+ int imoffset;
+ collsub_iface *ci;
+
+ ci = &local->ci;
+
+ dim = GFC_DESCRIPTOR_RANK (array);
+ ssize = sizeof (GFC_INTEGER_16);
+ packed = true;
+ span = array->span != 0 ? array->span : (index_type) sizeof (GFC_INTEGER_16);
+ for (index_type n = 0; n < dim; n++)
+ {
+ count[n] = 0;
+ stride[n] = GFC_DESCRIPTOR_STRIDE (array, n) * span;
+ extent[n] = GFC_DESCRIPTOR_EXTENT (array, n);
+
+ /* No-op for an empty array. */
+ if (extent[n] <= 0)
+ return;
+
+ if (ssize != stride[n])
+ packed = false;
+
+ ssize *= extent[n];
+ }
+
+ num_elems = ssize / sizeof (GFC_INTEGER_16);
+
+ buffer = get_collsub_buf (ci, ssize * local->num_images);
+ this_shared_ptr = buffer + this_image.image_num * num_elems;
+
+ if (packed)
+ memcpy (this_shared_ptr, array->base_addr, ssize);
+ else
+ {
+ char *src = (char *) array->base_addr;
+ GFC_INTEGER_16 *restrict dest = this_shared_ptr;
+ index_type stride0 = stride[0];
+
+ while (src)
+ {
+ /* Copy the data. */
+ *(dest++) = *((GFC_INTEGER_16 *) src);
+ src += stride0;
+ count[0] ++;
+ /* Advance to the next source element. */
+ for (index_type n = 0; count[n] == extent[n] ; )
+ {
+ /* When we get to the end of a dimension, reset it and increment
+ the next dimension. */
+ count[n] = 0;
+ src -= stride[n] * extent[n];
+ n++;
+ if (n == dim)
+ {
+ src = NULL;
+ break;
+ }
+ else
+ {
+ count[n]++;
+ src += stride[n];
+ }
+ }
+ }
+ }
+
+ collsub_sync (ci);
+
+ /* Reduce the array to image zero. Here the general scheme:
+
+ abababababab
+ a_b_a_b_a_b_
+ a___b___a___
+ a_______b___
+ r___________
+ */
+ for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++)
+ {
+ imoffset = 1 << cbit;
+ if (this_image.image_num + imoffset < local->num_images)
+ {
+ GFC_INTEGER_16 * other_shared_ptr; /* Points to the shared memory
+ allocated to another image. */
+ GFC_INTEGER_16 *a;
+ GFC_INTEGER_16 *b;
+
+ other_shared_ptr = this_shared_ptr + num_elems * imoffset;
+ for (index_type i = 0; i < num_elems; i++)
+ {
+ a = this_shared_ptr + i;
+ b = other_shared_ptr + i;
+ *a += *b;
+ }
+ }
+ collsub_sync (ci);
+ }
+ for ( ; (local->num_images >> cbit) != 0; cbit++)
+ collsub_sync (ci);
+
+ if (!result_image || (*result_image - 1) == this_image.image_num)
+ {
+ if (packed)
+ memcpy (array->base_addr, buffer, ssize);
+ else
+ {
+ GFC_INTEGER_16 *src = buffer;
+ char * restrict dest = (char *) array->base_addr;
+ index_type stride0 = stride[0];
+
+ for (index_type n = 0; n < dim; n++)
+ count[n] = 0;
+
+ while (dest)
+ {
+ *((GFC_INTEGER_16 * ) dest) = *src++;
+ dest += stride0;
+ count[0] ++;
+ for (index_type n = 0; count[n] == extent[n] ;)
+ {
+ /* When we get to the end of a dimension, reset it and increment
+ the next dimension. */
+ count[n] = 0;
+ dest -= stride[n] * extent[n];
+ n++;
+ if (n == dim)
+ {
+ dest = NULL;
+ break;
+ }
+ else
+ {
+ count[n]++;
+ dest += stride[n];
+ }
+ }
+ }
+ }
+ }
+ finish_collective_subroutine (ci);
+}
+
+#endif
+
diff --git a/libgfortran/generated/nca_minmax_i2.c b/libgfortran/generated/nca_minmax_i2.c
new file mode 100644
index 0000000..61908d6
--- /dev/null
+++ b/libgfortran/generated/nca_minmax_i2.c
@@ -0,0 +1,653 @@
+/* Implementation of collective subroutines minmax.
+ Copyright (C) 2020 Free Software Foundation, Inc.
+ Contributed by Thomas Koenig <tkoenig@gcc.gnu.org>.
+
+This file is part of the GNU Fortran runtime library (libgfortran).
+
+Libgfortran is free software; you can redistribute it and/or
+modify it under the terms of the GNU General Public
+License as published by the Free Software Foundation; either
+version 3 of the License, or (at your option) any later version.
+
+Libgfortran is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+<http://www.gnu.org/licenses/>. */
+
+
+#include "libgfortran.h"
+
+#if defined (HAVE_GFC_INTEGER_2)
+#include <string.h>
+#include "../nca/libcoarraynative.h"
+#include "../nca/collective_subroutine.h"
+#include "../nca/collective_inline.h"
+
+void nca_collsub_max_scalar_i2 (GFC_INTEGER_2 *obj, int *result_image,
+ int *stat, char *errmsg, index_type errmsg_len);
+export_proto(nca_collsub_max_scalar_i2);
+
+void
+nca_collsub_max_scalar_i2 (GFC_INTEGER_2 *obj, int *result_image,
+ int *stat __attribute__ ((unused)),
+ char *errmsg __attribute__ ((unused)),
+ index_type errmsg_len __attribute__ ((unused)))
+{
+ int cbit = 0;
+ int imoffset;
+ GFC_INTEGER_2 *a, *b;
+ GFC_INTEGER_2 *buffer, *this_image_buf;
+ collsub_iface *ci;
+
+ ci = &local->ci;
+
+ buffer = get_collsub_buf (ci, sizeof(GFC_INTEGER_2) * local->num_images);
+ this_image_buf = buffer + this_image.image_num;
+ *this_image_buf = *obj;
+
+ collsub_sync (ci);
+ for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++)
+ {
+ imoffset = 1 << cbit;
+ if (this_image.image_num + imoffset < local->num_images)
+ {
+ a = this_image_buf;
+ b = this_image_buf + imoffset;
+ if (*b > *a)
+ *a = *b;
+ }
+ collsub_sync (ci);
+ }
+ for ( ; (local->num_images >> cbit) != 0; cbit++)
+ collsub_sync (ci);
+
+ if (!result_image || (*result_image - 1) == this_image.image_num)
+ *obj = *buffer;
+
+ finish_collective_subroutine (ci);
+
+}
+
+void nca_collsub_min_scalar_i2 (GFC_INTEGER_2 *obj, int *result_image,
+ int *stat, char *errmsg, index_type errmsg_len);
+export_proto(nca_collsub_min_scalar_i2);
+
+void
+nca_collsub_min_scalar_i2 (GFC_INTEGER_2 *obj, int *result_image,
+ int *stat __attribute__ ((unused)),
+ char *errmsg __attribute__ ((unused)),
+ index_type errmsg_len __attribute__ ((unused)))
+{
+ int cbit = 0;
+ int imoffset;
+ GFC_INTEGER_2 *a, *b;
+ GFC_INTEGER_2 *buffer, *this_image_buf;
+ collsub_iface *ci;
+
+ ci = &local->ci;
+
+ buffer = get_collsub_buf (ci, sizeof(GFC_INTEGER_2) * local->num_images);
+ this_image_buf = buffer + this_image.image_num;
+ *this_image_buf = *obj;
+
+ collsub_sync (ci);
+ for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++)
+ {
+ imoffset = 1 << cbit;
+ if (this_image.image_num + imoffset < local->num_images)
+ {
+ a = this_image_buf;
+ b = this_image_buf + imoffset;
+ if (*b < *a)
+ *a = *b;
+ }
+ collsub_sync (ci);
+ }
+ for ( ; (local->num_images >> cbit) != 0; cbit++)
+ collsub_sync (ci);
+
+ if (!result_image || (*result_image - 1) == this_image.image_num)
+ *obj = *buffer;
+
+ finish_collective_subroutine (ci);
+
+}
+
+void nca_collsub_sum_scalar_i2 (GFC_INTEGER_2 *obj, int *result_image,
+ int *stat, char *errmsg, index_type errmsg_len);
+export_proto(nca_collsub_sum_scalar_i2);
+
+void
+nca_collsub_sum_scalar_i2 (GFC_INTEGER_2 *obj, int *result_image,
+ int *stat __attribute__ ((unused)),
+ char *errmsg __attribute__ ((unused)),
+ index_type errmsg_len __attribute__ ((unused)))
+{
+ int cbit = 0;
+ int imoffset;
+ GFC_INTEGER_2 *a, *b;
+ GFC_INTEGER_2 *buffer, *this_image_buf;
+ collsub_iface *ci;
+
+ ci = &local->ci;
+
+ buffer = get_collsub_buf (ci, sizeof(GFC_INTEGER_2) * local->num_images);
+ this_image_buf = buffer + this_image.image_num;
+ *this_image_buf = *obj;
+
+ collsub_sync (ci);
+ for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++)
+ {
+ imoffset = 1 << cbit;
+ if (this_image.image_num + imoffset < local->num_images)
+ {
+ a = this_image_buf;
+ b = this_image_buf + imoffset;
+ *a += *b;
+ }
+ collsub_sync (ci);
+ }
+ for ( ; (local->num_images >> cbit) != 0; cbit++)
+ collsub_sync (ci);
+
+ if (!result_image || (*result_image - 1) == this_image.image_num)
+ *obj = *buffer;
+
+ finish_collective_subroutine (ci);
+
+}
+
+void nca_collsub_max_array_i2 (gfc_array_i2 * restrict array, int *result_image,
+ int *stat, char *errmsg, index_type errmsg_len);
+export_proto (nca_collsub_max_array_i2);
+
+void
+nca_collsub_max_array_i2 (gfc_array_i2 * restrict array, int *result_image,
+ int *stat __attribute__ ((unused)),
+ char *errmsg __attribute__ ((unused)),
+ index_type errmsg_len __attribute__ ((unused)))
+{
+ index_type count[GFC_MAX_DIMENSIONS];
+ index_type stride[GFC_MAX_DIMENSIONS];
+ index_type extent[GFC_MAX_DIMENSIONS];
+ GFC_INTEGER_2 *this_shared_ptr; /* Points to the shared memory allocated to this image. */
+ GFC_INTEGER_2 *buffer;
+ index_type dim;
+ bool packed;
+ index_type span;
+ index_type ssize, num_elems;
+ int cbit = 0;
+ int imoffset;
+ collsub_iface *ci;
+
+ ci = &local->ci;
+
+ dim = GFC_DESCRIPTOR_RANK (array);
+ ssize = sizeof (GFC_INTEGER_2);
+ packed = true;
+ span = array->span != 0 ? array->span : (index_type) sizeof (GFC_INTEGER_2);
+ for (index_type n = 0; n < dim; n++)
+ {
+ count[n] = 0;
+ stride[n] = GFC_DESCRIPTOR_STRIDE (array, n) * span;
+ extent[n] = GFC_DESCRIPTOR_EXTENT (array, n);
+
+ /* No-op for an empty array. */
+ if (extent[n] <= 0)
+ return;
+
+ if (ssize != stride[n])
+ packed = false;
+
+ ssize *= extent[n];
+ }
+
+ num_elems = ssize / sizeof (GFC_INTEGER_2);
+
+ buffer = get_collsub_buf (ci, ssize * local->num_images);
+ this_shared_ptr = buffer + this_image.image_num * num_elems;
+
+ if (packed)
+ memcpy (this_shared_ptr, array->base_addr, ssize);
+ else
+ {
+ char *src = (char *) array->base_addr;
+ GFC_INTEGER_2 *restrict dest = this_shared_ptr;
+ index_type stride0 = stride[0];
+
+ while (src)
+ {
+ /* Copy the data. */
+ *(dest++) = *((GFC_INTEGER_2 *) src);
+ src += stride0;
+ count[0] ++;
+ /* Advance to the next source element. */
+ for (index_type n = 0; count[n] == extent[n] ; )
+ {
+ /* When we get to the end of a dimension, reset it and increment
+ the next dimension. */
+ count[n] = 0;
+ src -= stride[n] * extent[n];
+ n++;
+ if (n == dim)
+ {
+ src = NULL;
+ break;
+ }
+ else
+ {
+ count[n]++;
+ src += stride[n];
+ }
+ }
+ }
+ }
+
+ collsub_sync (ci);
+
+ /* Reduce the array to image zero. Here the general scheme:
+
+ abababababab
+ a_b_a_b_a_b_
+ a___b___a___
+ a_______b___
+ r___________
+ */
+ for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++)
+ {
+ imoffset = 1 << cbit;
+ if (this_image.image_num + imoffset < local->num_images)
+ {
+ GFC_INTEGER_2 * other_shared_ptr; /* Points to the shared memory
+ allocated to another image. */
+ GFC_INTEGER_2 *a;
+ GFC_INTEGER_2 *b;
+
+ other_shared_ptr = this_shared_ptr + num_elems * imoffset;
+ for (index_type i = 0; i < num_elems; i++)
+ {
+ a = this_shared_ptr + i;
+ b = other_shared_ptr + i;
+ if (*b > *a)
+ *a = *b;
+ }
+ }
+ collsub_sync (ci);
+ }
+ for ( ; (local->num_images >> cbit) != 0; cbit++)
+ collsub_sync (ci);
+
+ if (!result_image || (*result_image - 1) == this_image.image_num)
+ {
+ if (packed)
+ memcpy (array->base_addr, buffer, ssize);
+ else
+ {
+ GFC_INTEGER_2 *src = buffer;
+ char * restrict dest = (char *) array->base_addr;
+ index_type stride0 = stride[0];
+
+ for (index_type n = 0; n < dim; n++)
+ count[n] = 0;
+
+ while (dest)
+ {
+ *((GFC_INTEGER_2 * ) dest) = *src++;
+ dest += stride0;
+ count[0] ++;
+ for (index_type n = 0; count[n] == extent[n] ;)
+ {
+ /* When we get to the end of a dimension, reset it and increment
+ the next dimension. */
+ count[n] = 0;
+ dest -= stride[n] * extent[n];
+ n++;
+ if (n == dim)
+ {
+ dest = NULL;
+ break;
+ }
+ else
+ {
+ count[n]++;
+ dest += stride[n];
+ }
+ }
+ }
+ }
+ }
+ finish_collective_subroutine (ci);
+}
+void nca_collsub_min_array_i2 (gfc_array_i2 * restrict array, int *result_image,
+ int *stat, char *errmsg, index_type errmsg_len);
+export_proto (nca_collsub_min_array_i2);
+
+void
+nca_collsub_min_array_i2 (gfc_array_i2 * restrict array, int *result_image,
+ int *stat __attribute__ ((unused)),
+ char *errmsg __attribute__ ((unused)),
+ index_type errmsg_len __attribute__ ((unused)))
+{
+ index_type count[GFC_MAX_DIMENSIONS];
+ index_type stride[GFC_MAX_DIMENSIONS];
+ index_type extent[GFC_MAX_DIMENSIONS];
+ GFC_INTEGER_2 *this_shared_ptr; /* Points to the shared memory allocated to this image. */
+ GFC_INTEGER_2 *buffer;
+ index_type dim;
+ bool packed;
+ index_type span;
+ index_type ssize, num_elems;
+ int cbit = 0;
+ int imoffset;
+ collsub_iface *ci;
+
+ ci = &local->ci;
+
+ dim = GFC_DESCRIPTOR_RANK (array);
+ ssize = sizeof (GFC_INTEGER_2);
+ packed = true;
+ span = array->span != 0 ? array->span : (index_type) sizeof (GFC_INTEGER_2);
+ for (index_type n = 0; n < dim; n++)
+ {
+ count[n] = 0;
+ stride[n] = GFC_DESCRIPTOR_STRIDE (array, n) * span;
+ extent[n] = GFC_DESCRIPTOR_EXTENT (array, n);
+
+ /* No-op for an empty array. */
+ if (extent[n] <= 0)
+ return;
+
+ if (ssize != stride[n])
+ packed = false;
+
+ ssize *= extent[n];
+ }
+
+ num_elems = ssize / sizeof (GFC_INTEGER_2);
+
+ buffer = get_collsub_buf (ci, ssize * local->num_images);
+ this_shared_ptr = buffer + this_image.image_num * num_elems;
+
+ if (packed)
+ memcpy (this_shared_ptr, array->base_addr, ssize);
+ else
+ {
+ char *src = (char *) array->base_addr;
+ GFC_INTEGER_2 *restrict dest = this_shared_ptr;
+ index_type stride0 = stride[0];
+
+ while (src)
+ {
+ /* Copy the data. */
+ *(dest++) = *((GFC_INTEGER_2 *) src);
+ src += stride0;
+ count[0] ++;
+ /* Advance to the next source element. */
+ for (index_type n = 0; count[n] == extent[n] ; )
+ {
+ /* When we get to the end of a dimension, reset it and increment
+ the next dimension. */
+ count[n] = 0;
+ src -= stride[n] * extent[n];
+ n++;
+ if (n == dim)
+ {
+ src = NULL;
+ break;
+ }
+ else
+ {
+ count[n]++;
+ src += stride[n];
+ }
+ }
+ }
+ }
+
+ collsub_sync (ci);
+
+ /* Reduce the array to image zero. Here the general scheme:
+
+ abababababab
+ a_b_a_b_a_b_
+ a___b___a___
+ a_______b___
+ r___________
+ */
+ for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++)
+ {
+ imoffset = 1 << cbit;
+ if (this_image.image_num + imoffset < local->num_images)
+ {
+ GFC_INTEGER_2 * other_shared_ptr; /* Points to the shared memory
+ allocated to another image. */
+ GFC_INTEGER_2 *a;
+ GFC_INTEGER_2 *b;
+
+ other_shared_ptr = this_shared_ptr + num_elems * imoffset;
+ for (index_type i = 0; i < num_elems; i++)
+ {
+ a = this_shared_ptr + i;
+ b = other_shared_ptr + i;
+ if (*b < *a)
+ *a = *b;
+ }
+ }
+ collsub_sync (ci);
+ }
+ for ( ; (local->num_images >> cbit) != 0; cbit++)
+ collsub_sync (ci);
+
+ if (!result_image || (*result_image - 1) == this_image.image_num)
+ {
+ if (packed)
+ memcpy (array->base_addr, buffer, ssize);
+ else
+ {
+ GFC_INTEGER_2 *src = buffer;
+ char * restrict dest = (char *) array->base_addr;
+ index_type stride0 = stride[0];
+
+ for (index_type n = 0; n < dim; n++)
+ count[n] = 0;
+
+ while (dest)
+ {
+ *((GFC_INTEGER_2 * ) dest) = *src++;
+ dest += stride0;
+ count[0] ++;
+ for (index_type n = 0; count[n] == extent[n] ;)
+ {
+ /* When we get to the end of a dimension, reset it and increment
+ the next dimension. */
+ count[n] = 0;
+ dest -= stride[n] * extent[n];
+ n++;
+ if (n == dim)
+ {
+ dest = NULL;
+ break;
+ }
+ else
+ {
+ count[n]++;
+ dest += stride[n];
+ }
+ }
+ }
+ }
+ }
+ finish_collective_subroutine (ci);
+}
+void nca_collsub_sum_array_i2 (gfc_array_i2 * restrict array, int *result_image,
+ int *stat, char *errmsg, index_type errmsg_len);
+export_proto (nca_collsub_sum_array_i2);
+
+void
+nca_collsub_sum_array_i2 (gfc_array_i2 * restrict array, int *result_image,
+ int *stat __attribute__ ((unused)),
+ char *errmsg __attribute__ ((unused)),
+ index_type errmsg_len __attribute__ ((unused)))
+{
+ index_type count[GFC_MAX_DIMENSIONS];
+ index_type stride[GFC_MAX_DIMENSIONS];
+ index_type extent[GFC_MAX_DIMENSIONS];
+ GFC_INTEGER_2 *this_shared_ptr; /* Points to the shared memory allocated to this image. */
+ GFC_INTEGER_2 *buffer;
+ index_type dim;
+ bool packed;
+ index_type span;
+ index_type ssize, num_elems;
+ int cbit = 0;
+ int imoffset;
+ collsub_iface *ci;
+
+ ci = &local->ci;
+
+ dim = GFC_DESCRIPTOR_RANK (array);
+ ssize = sizeof (GFC_INTEGER_2);
+ packed = true;
+ span = array->span != 0 ? array->span : (index_type) sizeof (GFC_INTEGER_2);
+ for (index_type n = 0; n < dim; n++)
+ {
+ count[n] = 0;
+ stride[n] = GFC_DESCRIPTOR_STRIDE (array, n) * span;
+ extent[n] = GFC_DESCRIPTOR_EXTENT (array, n);
+
+ /* No-op for an empty array. */
+ if (extent[n] <= 0)
+ return;
+
+ if (ssize != stride[n])
+ packed = false;
+
+ ssize *= extent[n];
+ }
+
+ num_elems = ssize / sizeof (GFC_INTEGER_2);
+
+ buffer = get_collsub_buf (ci, ssize * local->num_images);
+ this_shared_ptr = buffer + this_image.image_num * num_elems;
+
+ if (packed)
+ memcpy (this_shared_ptr, array->base_addr, ssize);
+ else
+ {
+ char *src = (char *) array->base_addr;
+ GFC_INTEGER_2 *restrict dest = this_shared_ptr;
+ index_type stride0 = stride[0];
+
+ while (src)
+ {
+ /* Copy the data. */
+ *(dest++) = *((GFC_INTEGER_2 *) src);
+ src += stride0;
+ count[0] ++;
+ /* Advance to the next source element. */
+ for (index_type n = 0; count[n] == extent[n] ; )
+ {
+ /* When we get to the end of a dimension, reset it and increment
+ the next dimension. */
+ count[n] = 0;
+ src -= stride[n] * extent[n];
+ n++;
+ if (n == dim)
+ {
+ src = NULL;
+ break;
+ }
+ else
+ {
+ count[n]++;
+ src += stride[n];
+ }
+ }
+ }
+ }
+
+ collsub_sync (ci);
+
+ /* Reduce the array to image zero. Here the general scheme:
+
+ abababababab
+ a_b_a_b_a_b_
+ a___b___a___
+ a_______b___
+ r___________
+ */
+ for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++)
+ {
+ imoffset = 1 << cbit;
+ if (this_image.image_num + imoffset < local->num_images)
+ {
+ GFC_INTEGER_2 * other_shared_ptr; /* Points to the shared memory
+ allocated to another image. */
+ GFC_INTEGER_2 *a;
+ GFC_INTEGER_2 *b;
+
+ other_shared_ptr = this_shared_ptr + num_elems * imoffset;
+ for (index_type i = 0; i < num_elems; i++)
+ {
+ a = this_shared_ptr + i;
+ b = other_shared_ptr + i;
+ *a += *b;
+ }
+ }
+ collsub_sync (ci);
+ }
+ for ( ; (local->num_images >> cbit) != 0; cbit++)
+ collsub_sync (ci);
+
+ if (!result_image || (*result_image - 1) == this_image.image_num)
+ {
+ if (packed)
+ memcpy (array->base_addr, buffer, ssize);
+ else
+ {
+ GFC_INTEGER_2 *src = buffer;
+ char * restrict dest = (char *) array->base_addr;
+ index_type stride0 = stride[0];
+
+ for (index_type n = 0; n < dim; n++)
+ count[n] = 0;
+
+ while (dest)
+ {
+ *((GFC_INTEGER_2 * ) dest) = *src++;
+ dest += stride0;
+ count[0] ++;
+ for (index_type n = 0; count[n] == extent[n] ;)
+ {
+ /* When we get to the end of a dimension, reset it and increment
+ the next dimension. */
+ count[n] = 0;
+ dest -= stride[n] * extent[n];
+ n++;
+ if (n == dim)
+ {
+ dest = NULL;
+ break;
+ }
+ else
+ {
+ count[n]++;
+ dest += stride[n];
+ }
+ }
+ }
+ }
+ }
+ finish_collective_subroutine (ci);
+}
+
+#endif
+
diff --git a/libgfortran/generated/nca_minmax_i4.c b/libgfortran/generated/nca_minmax_i4.c
new file mode 100644
index 0000000..5e37586
--- /dev/null
+++ b/libgfortran/generated/nca_minmax_i4.c
@@ -0,0 +1,653 @@
+/* Implementation of collective subroutines minmax.
+ Copyright (C) 2020 Free Software Foundation, Inc.
+ Contributed by Thomas Koenig <tkoenig@gcc.gnu.org>.
+
+This file is part of the GNU Fortran runtime library (libgfortran).
+
+Libgfortran is free software; you can redistribute it and/or
+modify it under the terms of the GNU General Public
+License as published by the Free Software Foundation; either
+version 3 of the License, or (at your option) any later version.
+
+Libgfortran is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+<http://www.gnu.org/licenses/>. */
+
+
+#include "libgfortran.h"
+
+#if defined (HAVE_GFC_INTEGER_4)
+#include <string.h>
+#include "../nca/libcoarraynative.h"
+#include "../nca/collective_subroutine.h"
+#include "../nca/collective_inline.h"
+
+void nca_collsub_max_scalar_i4 (GFC_INTEGER_4 *obj, int *result_image,
+ int *stat, char *errmsg, index_type errmsg_len);
+export_proto(nca_collsub_max_scalar_i4);
+
+void
+nca_collsub_max_scalar_i4 (GFC_INTEGER_4 *obj, int *result_image,
+ int *stat __attribute__ ((unused)),
+ char *errmsg __attribute__ ((unused)),
+ index_type errmsg_len __attribute__ ((unused)))
+{
+ int cbit = 0;
+ int imoffset;
+ GFC_INTEGER_4 *a, *b;
+ GFC_INTEGER_4 *buffer, *this_image_buf;
+ collsub_iface *ci;
+
+ ci = &local->ci;
+
+ buffer = get_collsub_buf (ci, sizeof(GFC_INTEGER_4) * local->num_images);
+ this_image_buf = buffer + this_image.image_num;
+ *this_image_buf = *obj;
+
+ collsub_sync (ci);
+ for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++)
+ {
+ imoffset = 1 << cbit;
+ if (this_image.image_num + imoffset < local->num_images)
+ {
+ a = this_image_buf;
+ b = this_image_buf + imoffset;
+ if (*b > *a)
+ *a = *b;
+ }
+ collsub_sync (ci);
+ }
+ for ( ; (local->num_images >> cbit) != 0; cbit++)
+ collsub_sync (ci);
+
+ if (!result_image || (*result_image - 1) == this_image.image_num)
+ *obj = *buffer;
+
+ finish_collective_subroutine (ci);
+
+}
+
+void nca_collsub_min_scalar_i4 (GFC_INTEGER_4 *obj, int *result_image,
+ int *stat, char *errmsg, index_type errmsg_len);
+export_proto(nca_collsub_min_scalar_i4);
+
+void
+nca_collsub_min_scalar_i4 (GFC_INTEGER_4 *obj, int *result_image,
+ int *stat __attribute__ ((unused)),
+ char *errmsg __attribute__ ((unused)),
+ index_type errmsg_len __attribute__ ((unused)))
+{
+ int cbit = 0;
+ int imoffset;
+ GFC_INTEGER_4 *a, *b;
+ GFC_INTEGER_4 *buffer, *this_image_buf;
+ collsub_iface *ci;
+
+ ci = &local->ci;
+
+ buffer = get_collsub_buf (ci, sizeof(GFC_INTEGER_4) * local->num_images);
+ this_image_buf = buffer + this_image.image_num;
+ *this_image_buf = *obj;
+
+ collsub_sync (ci);
+ for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++)
+ {
+ imoffset = 1 << cbit;
+ if (this_image.image_num + imoffset < local->num_images)
+ {
+ a = this_image_buf;
+ b = this_image_buf + imoffset;
+ if (*b < *a)
+ *a = *b;
+ }
+ collsub_sync (ci);
+ }
+ for ( ; (local->num_images >> cbit) != 0; cbit++)
+ collsub_sync (ci);
+
+ if (!result_image || (*result_image - 1) == this_image.image_num)
+ *obj = *buffer;
+
+ finish_collective_subroutine (ci);
+
+}
+
+void nca_collsub_sum_scalar_i4 (GFC_INTEGER_4 *obj, int *result_image,
+ int *stat, char *errmsg, index_type errmsg_len);
+export_proto(nca_collsub_sum_scalar_i4);
+
+void
+nca_collsub_sum_scalar_i4 (GFC_INTEGER_4 *obj, int *result_image,
+ int *stat __attribute__ ((unused)),
+ char *errmsg __attribute__ ((unused)),
+ index_type errmsg_len __attribute__ ((unused)))
+{
+ int cbit = 0;
+ int imoffset;
+ GFC_INTEGER_4 *a, *b;
+ GFC_INTEGER_4 *buffer, *this_image_buf;
+ collsub_iface *ci;
+
+ ci = &local->ci;
+
+ buffer = get_collsub_buf (ci, sizeof(GFC_INTEGER_4) * local->num_images);
+ this_image_buf = buffer + this_image.image_num;
+ *this_image_buf = *obj;
+
+ collsub_sync (ci);
+ for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++)
+ {
+ imoffset = 1 << cbit;
+ if (this_image.image_num + imoffset < local->num_images)
+ {
+ a = this_image_buf;
+ b = this_image_buf + imoffset;
+ *a += *b;
+ }
+ collsub_sync (ci);
+ }
+ for ( ; (local->num_images >> cbit) != 0; cbit++)
+ collsub_sync (ci);
+
+ if (!result_image || (*result_image - 1) == this_image.image_num)
+ *obj = *buffer;
+
+ finish_collective_subroutine (ci);
+
+}
+
+void nca_collsub_max_array_i4 (gfc_array_i4 * restrict array, int *result_image,
+ int *stat, char *errmsg, index_type errmsg_len);
+export_proto (nca_collsub_max_array_i4);
+
+void
+nca_collsub_max_array_i4 (gfc_array_i4 * restrict array, int *result_image,
+ int *stat __attribute__ ((unused)),
+ char *errmsg __attribute__ ((unused)),
+ index_type errmsg_len __attribute__ ((unused)))
+{
+ index_type count[GFC_MAX_DIMENSIONS];
+ index_type stride[GFC_MAX_DIMENSIONS];
+ index_type extent[GFC_MAX_DIMENSIONS];
+ GFC_INTEGER_4 *this_shared_ptr; /* Points to the shared memory allocated to this image. */
+ GFC_INTEGER_4 *buffer;
+ index_type dim;
+ bool packed;
+ index_type span;
+ index_type ssize, num_elems;
+ int cbit = 0;
+ int imoffset;
+ collsub_iface *ci;
+
+ ci = &local->ci;
+
+ dim = GFC_DESCRIPTOR_RANK (array);
+ ssize = sizeof (GFC_INTEGER_4);
+ packed = true;
+ span = array->span != 0 ? array->span : (index_type) sizeof (GFC_INTEGER_4);
+ for (index_type n = 0; n < dim; n++)
+ {
+ count[n] = 0;
+ stride[n] = GFC_DESCRIPTOR_STRIDE (array, n) * span;
+ extent[n] = GFC_DESCRIPTOR_EXTENT (array, n);
+
+ /* No-op for an empty array. */
+ if (extent[n] <= 0)
+ return;
+
+ if (ssize != stride[n])
+ packed = false;
+
+ ssize *= extent[n];
+ }
+
+ num_elems = ssize / sizeof (GFC_INTEGER_4);
+
+ buffer = get_collsub_buf (ci, ssize * local->num_images);
+ this_shared_ptr = buffer + this_image.image_num * num_elems;
+
+ if (packed)
+ memcpy (this_shared_ptr, array->base_addr, ssize);
+ else
+ {
+ char *src = (char *) array->base_addr;
+ GFC_INTEGER_4 *restrict dest = this_shared_ptr;
+ index_type stride0 = stride[0];
+
+ while (src)
+ {
+ /* Copy the data. */
+ *(dest++) = *((GFC_INTEGER_4 *) src);
+ src += stride0;
+ count[0] ++;
+ /* Advance to the next source element. */
+ for (index_type n = 0; count[n] == extent[n] ; )
+ {
+ /* When we get to the end of a dimension, reset it and increment
+ the next dimension. */
+ count[n] = 0;
+ src -= stride[n] * extent[n];
+ n++;
+ if (n == dim)
+ {
+ src = NULL;
+ break;
+ }
+ else
+ {
+ count[n]++;
+ src += stride[n];
+ }
+ }
+ }
+ }
+
+ collsub_sync (ci);
+
+ /* Reduce the array to image zero. Here the general scheme:
+
+ abababababab
+ a_b_a_b_a_b_
+ a___b___a___
+ a_______b___
+ r___________
+ */
+ for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++)
+ {
+ imoffset = 1 << cbit;
+ if (this_image.image_num + imoffset < local->num_images)
+ {
+ GFC_INTEGER_4 * other_shared_ptr; /* Points to the shared memory
+ allocated to another image. */
+ GFC_INTEGER_4 *a;
+ GFC_INTEGER_4 *b;
+
+ other_shared_ptr = this_shared_ptr + num_elems * imoffset;
+ for (index_type i = 0; i < num_elems; i++)
+ {
+ a = this_shared_ptr + i;
+ b = other_shared_ptr + i;
+ if (*b > *a)
+ *a = *b;
+ }
+ }
+ collsub_sync (ci);
+ }
+ for ( ; (local->num_images >> cbit) != 0; cbit++)
+ collsub_sync (ci);
+
+ if (!result_image || (*result_image - 1) == this_image.image_num)
+ {
+ if (packed)
+ memcpy (array->base_addr, buffer, ssize);
+ else
+ {
+ GFC_INTEGER_4 *src = buffer;
+ char * restrict dest = (char *) array->base_addr;
+ index_type stride0 = stride[0];
+
+ for (index_type n = 0; n < dim; n++)
+ count[n] = 0;
+
+ while (dest)
+ {
+ *((GFC_INTEGER_4 * ) dest) = *src++;
+ dest += stride0;
+ count[0] ++;
+ for (index_type n = 0; count[n] == extent[n] ;)
+ {
+ /* When we get to the end of a dimension, reset it and increment
+ the next dimension. */
+ count[n] = 0;
+ dest -= stride[n] * extent[n];
+ n++;
+ if (n == dim)
+ {
+ dest = NULL;
+ break;
+ }
+ else
+ {
+ count[n]++;
+ dest += stride[n];
+ }
+ }
+ }
+ }
+ }
+ finish_collective_subroutine (ci);
+}
+void nca_collsub_min_array_i4 (gfc_array_i4 * restrict array, int *result_image,
+ int *stat, char *errmsg, index_type errmsg_len);
+export_proto (nca_collsub_min_array_i4);
+
+void
+nca_collsub_min_array_i4 (gfc_array_i4 * restrict array, int *result_image,
+ int *stat __attribute__ ((unused)),
+ char *errmsg __attribute__ ((unused)),
+ index_type errmsg_len __attribute__ ((unused)))
+{
+ index_type count[GFC_MAX_DIMENSIONS];
+ index_type stride[GFC_MAX_DIMENSIONS];
+ index_type extent[GFC_MAX_DIMENSIONS];
+ GFC_INTEGER_4 *this_shared_ptr; /* Points to the shared memory allocated to this image. */
+ GFC_INTEGER_4 *buffer;
+ index_type dim;
+ bool packed;
+ index_type span;
+ index_type ssize, num_elems;
+ int cbit = 0;
+ int imoffset;
+ collsub_iface *ci;
+
+ ci = &local->ci;
+
+ dim = GFC_DESCRIPTOR_RANK (array);
+ ssize = sizeof (GFC_INTEGER_4);
+ packed = true;
+ span = array->span != 0 ? array->span : (index_type) sizeof (GFC_INTEGER_4);
+ for (index_type n = 0; n < dim; n++)
+ {
+ count[n] = 0;
+ stride[n] = GFC_DESCRIPTOR_STRIDE (array, n) * span;
+ extent[n] = GFC_DESCRIPTOR_EXTENT (array, n);
+
+ /* No-op for an empty array. */
+ if (extent[n] <= 0)
+ return;
+
+ if (ssize != stride[n])
+ packed = false;
+
+ ssize *= extent[n];
+ }
+
+ num_elems = ssize / sizeof (GFC_INTEGER_4);
+
+ buffer = get_collsub_buf (ci, ssize * local->num_images);
+ this_shared_ptr = buffer + this_image.image_num * num_elems;
+
+ if (packed)
+ memcpy (this_shared_ptr, array->base_addr, ssize);
+ else
+ {
+ char *src = (char *) array->base_addr;
+ GFC_INTEGER_4 *restrict dest = this_shared_ptr;
+ index_type stride0 = stride[0];
+
+ while (src)
+ {
+ /* Copy the data. */
+ *(dest++) = *((GFC_INTEGER_4 *) src);
+ src += stride0;
+ count[0] ++;
+ /* Advance to the next source element. */
+ for (index_type n = 0; count[n] == extent[n] ; )
+ {
+ /* When we get to the end of a dimension, reset it and increment
+ the next dimension. */
+ count[n] = 0;
+ src -= stride[n] * extent[n];
+ n++;
+ if (n == dim)
+ {
+ src = NULL;
+ break;
+ }
+ else
+ {
+ count[n]++;
+ src += stride[n];
+ }
+ }
+ }
+ }
+
+ collsub_sync (ci);
+
+ /* Reduce the array to image zero. Here the general scheme:
+
+ abababababab
+ a_b_a_b_a_b_
+ a___b___a___
+ a_______b___
+ r___________
+ */
+ for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++)
+ {
+ imoffset = 1 << cbit;
+ if (this_image.image_num + imoffset < local->num_images)
+ {
+ GFC_INTEGER_4 * other_shared_ptr; /* Points to the shared memory
+ allocated to another image. */
+ GFC_INTEGER_4 *a;
+ GFC_INTEGER_4 *b;
+
+ other_shared_ptr = this_shared_ptr + num_elems * imoffset;
+ for (index_type i = 0; i < num_elems; i++)
+ {
+ a = this_shared_ptr + i;
+ b = other_shared_ptr + i;
+ if (*b < *a)
+ *a = *b;
+ }
+ }
+ collsub_sync (ci);
+ }
+ for ( ; (local->num_images >> cbit) != 0; cbit++)
+ collsub_sync (ci);
+
+ if (!result_image || (*result_image - 1) == this_image.image_num)
+ {
+ if (packed)
+ memcpy (array->base_addr, buffer, ssize);
+ else
+ {
+ GFC_INTEGER_4 *src = buffer;
+ char * restrict dest = (char *) array->base_addr;
+ index_type stride0 = stride[0];
+
+ for (index_type n = 0; n < dim; n++)
+ count[n] = 0;
+
+ while (dest)
+ {
+ *((GFC_INTEGER_4 * ) dest) = *src++;
+ dest += stride0;
+ count[0] ++;
+ for (index_type n = 0; count[n] == extent[n] ;)
+ {
+ /* When we get to the end of a dimension, reset it and increment
+ the next dimension. */
+ count[n] = 0;
+ dest -= stride[n] * extent[n];
+ n++;
+ if (n == dim)
+ {
+ dest = NULL;
+ break;
+ }
+ else
+ {
+ count[n]++;
+ dest += stride[n];
+ }
+ }
+ }
+ }
+ }
+ finish_collective_subroutine (ci);
+}
+void nca_collsub_sum_array_i4 (gfc_array_i4 * restrict array, int *result_image,
+ int *stat, char *errmsg, index_type errmsg_len);
+export_proto (nca_collsub_sum_array_i4);
+
+void
+nca_collsub_sum_array_i4 (gfc_array_i4 * restrict array, int *result_image,
+ int *stat __attribute__ ((unused)),
+ char *errmsg __attribute__ ((unused)),
+ index_type errmsg_len __attribute__ ((unused)))
+{
+ index_type count[GFC_MAX_DIMENSIONS];
+ index_type stride[GFC_MAX_DIMENSIONS];
+ index_type extent[GFC_MAX_DIMENSIONS];
+ GFC_INTEGER_4 *this_shared_ptr; /* Points to the shared memory allocated to this image. */
+ GFC_INTEGER_4 *buffer;
+ index_type dim;
+ bool packed;
+ index_type span;
+ index_type ssize, num_elems;
+ int cbit = 0;
+ int imoffset;
+ collsub_iface *ci;
+
+ ci = &local->ci;
+
+ dim = GFC_DESCRIPTOR_RANK (array);
+ ssize = sizeof (GFC_INTEGER_4);
+ packed = true;
+ span = array->span != 0 ? array->span : (index_type) sizeof (GFC_INTEGER_4);
+ for (index_type n = 0; n < dim; n++)
+ {
+ count[n] = 0;
+ stride[n] = GFC_DESCRIPTOR_STRIDE (array, n) * span;
+ extent[n] = GFC_DESCRIPTOR_EXTENT (array, n);
+
+ /* No-op for an empty array. */
+ if (extent[n] <= 0)
+ return;
+
+ if (ssize != stride[n])
+ packed = false;
+
+ ssize *= extent[n];
+ }
+
+ num_elems = ssize / sizeof (GFC_INTEGER_4);
+
+ buffer = get_collsub_buf (ci, ssize * local->num_images);
+ this_shared_ptr = buffer + this_image.image_num * num_elems;
+
+ if (packed)
+ memcpy (this_shared_ptr, array->base_addr, ssize);
+ else
+ {
+ char *src = (char *) array->base_addr;
+ GFC_INTEGER_4 *restrict dest = this_shared_ptr;
+ index_type stride0 = stride[0];
+
+ while (src)
+ {
+ /* Copy the data. */
+ *(dest++) = *((GFC_INTEGER_4 *) src);
+ src += stride0;
+ count[0] ++;
+ /* Advance to the next source element. */
+ for (index_type n = 0; count[n] == extent[n] ; )
+ {
+ /* When we get to the end of a dimension, reset it and increment
+ the next dimension. */
+ count[n] = 0;
+ src -= stride[n] * extent[n];
+ n++;
+ if (n == dim)
+ {
+ src = NULL;
+ break;
+ }
+ else
+ {
+ count[n]++;
+ src += stride[n];
+ }
+ }
+ }
+ }
+
+ collsub_sync (ci);
+
+ /* Reduce the array to image zero. Here the general scheme:
+
+ abababababab
+ a_b_a_b_a_b_
+ a___b___a___
+ a_______b___
+ r___________
+ */
+ for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++)
+ {
+ imoffset = 1 << cbit;
+ if (this_image.image_num + imoffset < local->num_images)
+ {
+ GFC_INTEGER_4 * other_shared_ptr; /* Points to the shared memory
+ allocated to another image. */
+ GFC_INTEGER_4 *a;
+ GFC_INTEGER_4 *b;
+
+ other_shared_ptr = this_shared_ptr + num_elems * imoffset;
+ for (index_type i = 0; i < num_elems; i++)
+ {
+ a = this_shared_ptr + i;
+ b = other_shared_ptr + i;
+ *a += *b;
+ }
+ }
+ collsub_sync (ci);
+ }
+ for ( ; (local->num_images >> cbit) != 0; cbit++)
+ collsub_sync (ci);
+
+ if (!result_image || (*result_image - 1) == this_image.image_num)
+ {
+ if (packed)
+ memcpy (array->base_addr, buffer, ssize);
+ else
+ {
+ GFC_INTEGER_4 *src = buffer;
+ char * restrict dest = (char *) array->base_addr;
+ index_type stride0 = stride[0];
+
+ for (index_type n = 0; n < dim; n++)
+ count[n] = 0;
+
+ while (dest)
+ {
+ *((GFC_INTEGER_4 * ) dest) = *src++;
+ dest += stride0;
+ count[0] ++;
+ for (index_type n = 0; count[n] == extent[n] ;)
+ {
+ /* When we get to the end of a dimension, reset it and increment
+ the next dimension. */
+ count[n] = 0;
+ dest -= stride[n] * extent[n];
+ n++;
+ if (n == dim)
+ {
+ dest = NULL;
+ break;
+ }
+ else
+ {
+ count[n]++;
+ dest += stride[n];
+ }
+ }
+ }
+ }
+ }
+ finish_collective_subroutine (ci);
+}
+
+#endif
+
diff --git a/libgfortran/generated/nca_minmax_i8.c b/libgfortran/generated/nca_minmax_i8.c
new file mode 100644
index 0000000..b3dc861
--- /dev/null
+++ b/libgfortran/generated/nca_minmax_i8.c
@@ -0,0 +1,653 @@
+/* Implementation of collective subroutines minmax.
+ Copyright (C) 2020 Free Software Foundation, Inc.
+ Contributed by Thomas Koenig <tkoenig@gcc.gnu.org>.
+
+This file is part of the GNU Fortran runtime library (libgfortran).
+
+Libgfortran is free software; you can redistribute it and/or
+modify it under the terms of the GNU General Public
+License as published by the Free Software Foundation; either
+version 3 of the License, or (at your option) any later version.
+
+Libgfortran is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+<http://www.gnu.org/licenses/>. */
+
+
+#include "libgfortran.h"
+
+#if defined (HAVE_GFC_INTEGER_8)
+#include <string.h>
+#include "../nca/libcoarraynative.h"
+#include "../nca/collective_subroutine.h"
+#include "../nca/collective_inline.h"
+
+void nca_collsub_max_scalar_i8 (GFC_INTEGER_8 *obj, int *result_image,
+ int *stat, char *errmsg, index_type errmsg_len);
+export_proto(nca_collsub_max_scalar_i8);
+
+void
+nca_collsub_max_scalar_i8 (GFC_INTEGER_8 *obj, int *result_image,
+ int *stat __attribute__ ((unused)),
+ char *errmsg __attribute__ ((unused)),
+ index_type errmsg_len __attribute__ ((unused)))
+{
+ int cbit = 0;
+ int imoffset;
+ GFC_INTEGER_8 *a, *b;
+ GFC_INTEGER_8 *buffer, *this_image_buf;
+ collsub_iface *ci;
+
+ ci = &local->ci;
+
+ buffer = get_collsub_buf (ci, sizeof(GFC_INTEGER_8) * local->num_images);
+ this_image_buf = buffer + this_image.image_num;
+ *this_image_buf = *obj;
+
+ collsub_sync (ci);
+ for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++)
+ {
+ imoffset = 1 << cbit;
+ if (this_image.image_num + imoffset < local->num_images)
+ {
+ a = this_image_buf;
+ b = this_image_buf + imoffset;
+ if (*b > *a)
+ *a = *b;
+ }
+ collsub_sync (ci);
+ }
+ for ( ; (local->num_images >> cbit) != 0; cbit++)
+ collsub_sync (ci);
+
+ if (!result_image || (*result_image - 1) == this_image.image_num)
+ *obj = *buffer;
+
+ finish_collective_subroutine (ci);
+
+}
+
+void nca_collsub_min_scalar_i8 (GFC_INTEGER_8 *obj, int *result_image,
+ int *stat, char *errmsg, index_type errmsg_len);
+export_proto(nca_collsub_min_scalar_i8);
+
+void
+nca_collsub_min_scalar_i8 (GFC_INTEGER_8 *obj, int *result_image,
+ int *stat __attribute__ ((unused)),
+ char *errmsg __attribute__ ((unused)),
+ index_type errmsg_len __attribute__ ((unused)))
+{
+ int cbit = 0;
+ int imoffset;
+ GFC_INTEGER_8 *a, *b;
+ GFC_INTEGER_8 *buffer, *this_image_buf;
+ collsub_iface *ci;
+
+ ci = &local->ci;
+
+ buffer = get_collsub_buf (ci, sizeof(GFC_INTEGER_8) * local->num_images);
+ this_image_buf = buffer + this_image.image_num;
+ *this_image_buf = *obj;
+
+ collsub_sync (ci);
+ for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++)
+ {
+ imoffset = 1 << cbit;
+ if (this_image.image_num + imoffset < local->num_images)
+ {
+ a = this_image_buf;
+ b = this_image_buf + imoffset;
+ if (*b < *a)
+ *a = *b;
+ }
+ collsub_sync (ci);
+ }
+ for ( ; (local->num_images >> cbit) != 0; cbit++)
+ collsub_sync (ci);
+
+ if (!result_image || (*result_image - 1) == this_image.image_num)
+ *obj = *buffer;
+
+ finish_collective_subroutine (ci);
+
+}
+
+void nca_collsub_sum_scalar_i8 (GFC_INTEGER_8 *obj, int *result_image,
+ int *stat, char *errmsg, index_type errmsg_len);
+export_proto(nca_collsub_sum_scalar_i8);
+
+void
+nca_collsub_sum_scalar_i8 (GFC_INTEGER_8 *obj, int *result_image,
+ int *stat __attribute__ ((unused)),
+ char *errmsg __attribute__ ((unused)),
+ index_type errmsg_len __attribute__ ((unused)))
+{
+ int cbit = 0;
+ int imoffset;
+ GFC_INTEGER_8 *a, *b;
+ GFC_INTEGER_8 *buffer, *this_image_buf;
+ collsub_iface *ci;
+
+ ci = &local->ci;
+
+ buffer = get_collsub_buf (ci, sizeof(GFC_INTEGER_8) * local->num_images);
+ this_image_buf = buffer + this_image.image_num;
+ *this_image_buf = *obj;
+
+ collsub_sync (ci);
+ for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++)
+ {
+ imoffset = 1 << cbit;
+ if (this_image.image_num + imoffset < local->num_images)
+ {
+ a = this_image_buf;
+ b = this_image_buf + imoffset;
+ *a += *b;
+ }
+ collsub_sync (ci);
+ }
+ for ( ; (local->num_images >> cbit) != 0; cbit++)
+ collsub_sync (ci);
+
+ if (!result_image || (*result_image - 1) == this_image.image_num)
+ *obj = *buffer;
+
+ finish_collective_subroutine (ci);
+
+}
+
+void nca_collsub_max_array_i8 (gfc_array_i8 * restrict array, int *result_image,
+ int *stat, char *errmsg, index_type errmsg_len);
+export_proto (nca_collsub_max_array_i8);
+
+void
+nca_collsub_max_array_i8 (gfc_array_i8 * restrict array, int *result_image,
+ int *stat __attribute__ ((unused)),
+ char *errmsg __attribute__ ((unused)),
+ index_type errmsg_len __attribute__ ((unused)))
+{
+ index_type count[GFC_MAX_DIMENSIONS];
+ index_type stride[GFC_MAX_DIMENSIONS];
+ index_type extent[GFC_MAX_DIMENSIONS];
+ GFC_INTEGER_8 *this_shared_ptr; /* Points to the shared memory allocated to this image. */
+ GFC_INTEGER_8 *buffer;
+ index_type dim;
+ bool packed;
+ index_type span;
+ index_type ssize, num_elems;
+ int cbit = 0;
+ int imoffset;
+ collsub_iface *ci;
+
+ ci = &local->ci;
+
+ dim = GFC_DESCRIPTOR_RANK (array);
+ ssize = sizeof (GFC_INTEGER_8);
+ packed = true;
+ span = array->span != 0 ? array->span : (index_type) sizeof (GFC_INTEGER_8);
+ for (index_type n = 0; n < dim; n++)
+ {
+ count[n] = 0;
+ stride[n] = GFC_DESCRIPTOR_STRIDE (array, n) * span;
+ extent[n] = GFC_DESCRIPTOR_EXTENT (array, n);
+
+ /* No-op for an empty array. */
+ if (extent[n] <= 0)
+ return;
+
+ if (ssize != stride[n])
+ packed = false;
+
+ ssize *= extent[n];
+ }
+
+ num_elems = ssize / sizeof (GFC_INTEGER_8);
+
+ buffer = get_collsub_buf (ci, ssize * local->num_images);
+ this_shared_ptr = buffer + this_image.image_num * num_elems;
+
+ if (packed)
+ memcpy (this_shared_ptr, array->base_addr, ssize);
+ else
+ {
+ char *src = (char *) array->base_addr;
+ GFC_INTEGER_8 *restrict dest = this_shared_ptr;
+ index_type stride0 = stride[0];
+
+ while (src)
+ {
+ /* Copy the data. */
+ *(dest++) = *((GFC_INTEGER_8 *) src);
+ src += stride0;
+ count[0] ++;
+ /* Advance to the next source element. */
+ for (index_type n = 0; count[n] == extent[n] ; )
+ {
+ /* When we get to the end of a dimension, reset it and increment
+ the next dimension. */
+ count[n] = 0;
+ src -= stride[n] * extent[n];
+ n++;
+ if (n == dim)
+ {
+ src = NULL;
+ break;
+ }
+ else
+ {
+ count[n]++;
+ src += stride[n];
+ }
+ }
+ }
+ }
+
+ collsub_sync (ci);
+
+ /* Reduce the array to image zero. Here the general scheme:
+
+ abababababab
+ a_b_a_b_a_b_
+ a___b___a___
+ a_______b___
+ r___________
+ */
+ for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++)
+ {
+ imoffset = 1 << cbit;
+ if (this_image.image_num + imoffset < local->num_images)
+ {
+ GFC_INTEGER_8 * other_shared_ptr; /* Points to the shared memory
+ allocated to another image. */
+ GFC_INTEGER_8 *a;
+ GFC_INTEGER_8 *b;
+
+ other_shared_ptr = this_shared_ptr + num_elems * imoffset;
+ for (index_type i = 0; i < num_elems; i++)
+ {
+ a = this_shared_ptr + i;
+ b = other_shared_ptr + i;
+ if (*b > *a)
+ *a = *b;
+ }
+ }
+ collsub_sync (ci);
+ }
+ for ( ; (local->num_images >> cbit) != 0; cbit++)
+ collsub_sync (ci);
+
+ if (!result_image || (*result_image - 1) == this_image.image_num)
+ {
+ if (packed)
+ memcpy (array->base_addr, buffer, ssize);
+ else
+ {
+ GFC_INTEGER_8 *src = buffer;
+ char * restrict dest = (char *) array->base_addr;
+ index_type stride0 = stride[0];
+
+ for (index_type n = 0; n < dim; n++)
+ count[n] = 0;
+
+ while (dest)
+ {
+ *((GFC_INTEGER_8 * ) dest) = *src++;
+ dest += stride0;
+ count[0] ++;
+ for (index_type n = 0; count[n] == extent[n] ;)
+ {
+ /* When we get to the end of a dimension, reset it and increment
+ the next dimension. */
+ count[n] = 0;
+ dest -= stride[n] * extent[n];
+ n++;
+ if (n == dim)
+ {
+ dest = NULL;
+ break;
+ }
+ else
+ {
+ count[n]++;
+ dest += stride[n];
+ }
+ }
+ }
+ }
+ }
+ finish_collective_subroutine (ci);
+}
+void nca_collsub_min_array_i8 (gfc_array_i8 * restrict array, int *result_image,
+ int *stat, char *errmsg, index_type errmsg_len);
+export_proto (nca_collsub_min_array_i8);
+
+void
+nca_collsub_min_array_i8 (gfc_array_i8 * restrict array, int *result_image,
+ int *stat __attribute__ ((unused)),
+ char *errmsg __attribute__ ((unused)),
+ index_type errmsg_len __attribute__ ((unused)))
+{
+ index_type count[GFC_MAX_DIMENSIONS];
+ index_type stride[GFC_MAX_DIMENSIONS];
+ index_type extent[GFC_MAX_DIMENSIONS];
+ GFC_INTEGER_8 *this_shared_ptr; /* Points to the shared memory allocated to this image. */
+ GFC_INTEGER_8 *buffer;
+ index_type dim;
+ bool packed;
+ index_type span;
+ index_type ssize, num_elems;
+ int cbit = 0;
+ int imoffset;
+ collsub_iface *ci;
+
+ ci = &local->ci;
+
+ dim = GFC_DESCRIPTOR_RANK (array);
+ ssize = sizeof (GFC_INTEGER_8);
+ packed = true;
+ span = array->span != 0 ? array->span : (index_type) sizeof (GFC_INTEGER_8);
+ for (index_type n = 0; n < dim; n++)
+ {
+ count[n] = 0;
+ stride[n] = GFC_DESCRIPTOR_STRIDE (array, n) * span;
+ extent[n] = GFC_DESCRIPTOR_EXTENT (array, n);
+
+ /* No-op for an empty array. */
+ if (extent[n] <= 0)
+ return;
+
+ if (ssize != stride[n])
+ packed = false;
+
+ ssize *= extent[n];
+ }
+
+ num_elems = ssize / sizeof (GFC_INTEGER_8);
+
+ buffer = get_collsub_buf (ci, ssize * local->num_images);
+ this_shared_ptr = buffer + this_image.image_num * num_elems;
+
+ if (packed)
+ memcpy (this_shared_ptr, array->base_addr, ssize);
+ else
+ {
+ char *src = (char *) array->base_addr;
+ GFC_INTEGER_8 *restrict dest = this_shared_ptr;
+ index_type stride0 = stride[0];
+
+ while (src)
+ {
+ /* Copy the data. */
+ *(dest++) = *((GFC_INTEGER_8 *) src);
+ src += stride0;
+ count[0] ++;
+ /* Advance to the next source element. */
+ for (index_type n = 0; count[n] == extent[n] ; )
+ {
+ /* When we get to the end of a dimension, reset it and increment
+ the next dimension. */
+ count[n] = 0;
+ src -= stride[n] * extent[n];
+ n++;
+ if (n == dim)
+ {
+ src = NULL;
+ break;
+ }
+ else
+ {
+ count[n]++;
+ src += stride[n];
+ }
+ }
+ }
+ }
+
+ collsub_sync (ci);
+
+ /* Reduce the array to image zero. Here the general scheme:
+
+ abababababab
+ a_b_a_b_a_b_
+ a___b___a___
+ a_______b___
+ r___________
+ */
+ for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++)
+ {
+ imoffset = 1 << cbit;
+ if (this_image.image_num + imoffset < local->num_images)
+ {
+ GFC_INTEGER_8 * other_shared_ptr; /* Points to the shared memory
+ allocated to another image. */
+ GFC_INTEGER_8 *a;
+ GFC_INTEGER_8 *b;
+
+ other_shared_ptr = this_shared_ptr + num_elems * imoffset;
+ for (index_type i = 0; i < num_elems; i++)
+ {
+ a = this_shared_ptr + i;
+ b = other_shared_ptr + i;
+ if (*b < *a)
+ *a = *b;
+ }
+ }
+ collsub_sync (ci);
+ }
+ for ( ; (local->num_images >> cbit) != 0; cbit++)
+ collsub_sync (ci);
+
+ if (!result_image || (*result_image - 1) == this_image.image_num)
+ {
+ if (packed)
+ memcpy (array->base_addr, buffer, ssize);
+ else
+ {
+ GFC_INTEGER_8 *src = buffer;
+ char * restrict dest = (char *) array->base_addr;
+ index_type stride0 = stride[0];
+
+ for (index_type n = 0; n < dim; n++)
+ count[n] = 0;
+
+ while (dest)
+ {
+ *((GFC_INTEGER_8 * ) dest) = *src++;
+ dest += stride0;
+ count[0] ++;
+ for (index_type n = 0; count[n] == extent[n] ;)
+ {
+ /* When we get to the end of a dimension, reset it and increment
+ the next dimension. */
+ count[n] = 0;
+ dest -= stride[n] * extent[n];
+ n++;
+ if (n == dim)
+ {
+ dest = NULL;
+ break;
+ }
+ else
+ {
+ count[n]++;
+ dest += stride[n];
+ }
+ }
+ }
+ }
+ }
+ finish_collective_subroutine (ci);
+}
+void nca_collsub_sum_array_i8 (gfc_array_i8 * restrict array, int *result_image,
+ int *stat, char *errmsg, index_type errmsg_len);
+export_proto (nca_collsub_sum_array_i8);
+
+void
+nca_collsub_sum_array_i8 (gfc_array_i8 * restrict array, int *result_image,
+ int *stat __attribute__ ((unused)),
+ char *errmsg __attribute__ ((unused)),
+ index_type errmsg_len __attribute__ ((unused)))
+{
+ index_type count[GFC_MAX_DIMENSIONS];
+ index_type stride[GFC_MAX_DIMENSIONS];
+ index_type extent[GFC_MAX_DIMENSIONS];
+ GFC_INTEGER_8 *this_shared_ptr; /* Points to the shared memory allocated to this image. */
+ GFC_INTEGER_8 *buffer;
+ index_type dim;
+ bool packed;
+ index_type span;
+ index_type ssize, num_elems;
+ int cbit = 0;
+ int imoffset;
+ collsub_iface *ci;
+
+ ci = &local->ci;
+
+ dim = GFC_DESCRIPTOR_RANK (array);
+ ssize = sizeof (GFC_INTEGER_8);
+ packed = true;
+ span = array->span != 0 ? array->span : (index_type) sizeof (GFC_INTEGER_8);
+ for (index_type n = 0; n < dim; n++)
+ {
+ count[n] = 0;
+ stride[n] = GFC_DESCRIPTOR_STRIDE (array, n) * span;
+ extent[n] = GFC_DESCRIPTOR_EXTENT (array, n);
+
+ /* No-op for an empty array. */
+ if (extent[n] <= 0)
+ return;
+
+ if (ssize != stride[n])
+ packed = false;
+
+ ssize *= extent[n];
+ }
+
+ num_elems = ssize / sizeof (GFC_INTEGER_8);
+
+ buffer = get_collsub_buf (ci, ssize * local->num_images);
+ this_shared_ptr = buffer + this_image.image_num * num_elems;
+
+ if (packed)
+ memcpy (this_shared_ptr, array->base_addr, ssize);
+ else
+ {
+ char *src = (char *) array->base_addr;
+ GFC_INTEGER_8 *restrict dest = this_shared_ptr;
+ index_type stride0 = stride[0];
+
+ while (src)
+ {
+ /* Copy the data. */
+ *(dest++) = *((GFC_INTEGER_8 *) src);
+ src += stride0;
+ count[0] ++;
+ /* Advance to the next source element. */
+ for (index_type n = 0; count[n] == extent[n] ; )
+ {
+ /* When we get to the end of a dimension, reset it and increment
+ the next dimension. */
+ count[n] = 0;
+ src -= stride[n] * extent[n];
+ n++;
+ if (n == dim)
+ {
+ src = NULL;
+ break;
+ }
+ else
+ {
+ count[n]++;
+ src += stride[n];
+ }
+ }
+ }
+ }
+
+ collsub_sync (ci);
+
+ /* Reduce the array to image zero. Here the general scheme:
+
+ abababababab
+ a_b_a_b_a_b_
+ a___b___a___
+ a_______b___
+ r___________
+ */
+ for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++)
+ {
+ imoffset = 1 << cbit;
+ if (this_image.image_num + imoffset < local->num_images)
+ {
+ GFC_INTEGER_8 * other_shared_ptr; /* Points to the shared memory
+ allocated to another image. */
+ GFC_INTEGER_8 *a;
+ GFC_INTEGER_8 *b;
+
+ other_shared_ptr = this_shared_ptr + num_elems * imoffset;
+ for (index_type i = 0; i < num_elems; i++)
+ {
+ a = this_shared_ptr + i;
+ b = other_shared_ptr + i;
+ *a += *b;
+ }
+ }
+ collsub_sync (ci);
+ }
+ for ( ; (local->num_images >> cbit) != 0; cbit++)
+ collsub_sync (ci);
+
+ if (!result_image || (*result_image - 1) == this_image.image_num)
+ {
+ if (packed)
+ memcpy (array->base_addr, buffer, ssize);
+ else
+ {
+ GFC_INTEGER_8 *src = buffer;
+ char * restrict dest = (char *) array->base_addr;
+ index_type stride0 = stride[0];
+
+ for (index_type n = 0; n < dim; n++)
+ count[n] = 0;
+
+ while (dest)
+ {
+ *((GFC_INTEGER_8 * ) dest) = *src++;
+ dest += stride0;
+ count[0] ++;
+ for (index_type n = 0; count[n] == extent[n] ;)
+ {
+ /* When we get to the end of a dimension, reset it and increment
+ the next dimension. */
+ count[n] = 0;
+ dest -= stride[n] * extent[n];
+ n++;
+ if (n == dim)
+ {
+ dest = NULL;
+ break;
+ }
+ else
+ {
+ count[n]++;
+ dest += stride[n];
+ }
+ }
+ }
+ }
+ }
+ finish_collective_subroutine (ci);
+}
+
+#endif
+
diff --git a/libgfortran/generated/nca_minmax_r10.c b/libgfortran/generated/nca_minmax_r10.c
new file mode 100644
index 0000000..10f7324
--- /dev/null
+++ b/libgfortran/generated/nca_minmax_r10.c
@@ -0,0 +1,653 @@
+/* Implementation of collective subroutines minmax.
+ Copyright (C) 2020 Free Software Foundation, Inc.
+ Contributed by Thomas Koenig <tkoenig@gcc.gnu.org>.
+
+This file is part of the GNU Fortran runtime library (libgfortran).
+
+Libgfortran is free software; you can redistribute it and/or
+modify it under the terms of the GNU General Public
+License as published by the Free Software Foundation; either
+version 3 of the License, or (at your option) any later version.
+
+Libgfortran is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+<http://www.gnu.org/licenses/>. */
+
+
+#include "libgfortran.h"
+
+#if defined (HAVE_GFC_REAL_10)
+#include <string.h>
+#include "../nca/libcoarraynative.h"
+#include "../nca/collective_subroutine.h"
+#include "../nca/collective_inline.h"
+
+void nca_collsub_max_scalar_r10 (GFC_REAL_10 *obj, int *result_image,
+ int *stat, char *errmsg, index_type errmsg_len);
+export_proto(nca_collsub_max_scalar_r10);
+
+void
+nca_collsub_max_scalar_r10 (GFC_REAL_10 *obj, int *result_image,
+ int *stat __attribute__ ((unused)),
+ char *errmsg __attribute__ ((unused)),
+ index_type errmsg_len __attribute__ ((unused)))
+{
+ int cbit = 0;
+ int imoffset;
+ GFC_REAL_10 *a, *b;
+ GFC_REAL_10 *buffer, *this_image_buf;
+ collsub_iface *ci;
+
+ ci = &local->ci;
+
+ buffer = get_collsub_buf (ci, sizeof(GFC_REAL_10) * local->num_images);
+ this_image_buf = buffer + this_image.image_num;
+ *this_image_buf = *obj;
+
+ collsub_sync (ci);
+ for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++)
+ {
+ imoffset = 1 << cbit;
+ if (this_image.image_num + imoffset < local->num_images)
+ {
+ a = this_image_buf;
+ b = this_image_buf + imoffset;
+ if (*b > *a)
+ *a = *b;
+ }
+ collsub_sync (ci);
+ }
+ for ( ; (local->num_images >> cbit) != 0; cbit++)
+ collsub_sync (ci);
+
+ if (!result_image || (*result_image - 1) == this_image.image_num)
+ *obj = *buffer;
+
+ finish_collective_subroutine (ci);
+
+}
+
+void nca_collsub_min_scalar_r10 (GFC_REAL_10 *obj, int *result_image,
+ int *stat, char *errmsg, index_type errmsg_len);
+export_proto(nca_collsub_min_scalar_r10);
+
+void
+nca_collsub_min_scalar_r10 (GFC_REAL_10 *obj, int *result_image,
+ int *stat __attribute__ ((unused)),
+ char *errmsg __attribute__ ((unused)),
+ index_type errmsg_len __attribute__ ((unused)))
+{
+ int cbit = 0;
+ int imoffset;
+ GFC_REAL_10 *a, *b;
+ GFC_REAL_10 *buffer, *this_image_buf;
+ collsub_iface *ci;
+
+ ci = &local->ci;
+
+ buffer = get_collsub_buf (ci, sizeof(GFC_REAL_10) * local->num_images);
+ this_image_buf = buffer + this_image.image_num;
+ *this_image_buf = *obj;
+
+ collsub_sync (ci);
+ for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++)
+ {
+ imoffset = 1 << cbit;
+ if (this_image.image_num + imoffset < local->num_images)
+ {
+ a = this_image_buf;
+ b = this_image_buf + imoffset;
+ if (*b < *a)
+ *a = *b;
+ }
+ collsub_sync (ci);
+ }
+ for ( ; (local->num_images >> cbit) != 0; cbit++)
+ collsub_sync (ci);
+
+ if (!result_image || (*result_image - 1) == this_image.image_num)
+ *obj = *buffer;
+
+ finish_collective_subroutine (ci);
+
+}
+
+void nca_collsub_sum_scalar_r10 (GFC_REAL_10 *obj, int *result_image,
+ int *stat, char *errmsg, index_type errmsg_len);
+export_proto(nca_collsub_sum_scalar_r10);
+
+void
+nca_collsub_sum_scalar_r10 (GFC_REAL_10 *obj, int *result_image,
+ int *stat __attribute__ ((unused)),
+ char *errmsg __attribute__ ((unused)),
+ index_type errmsg_len __attribute__ ((unused)))
+{
+ int cbit = 0;
+ int imoffset;
+ GFC_REAL_10 *a, *b;
+ GFC_REAL_10 *buffer, *this_image_buf;
+ collsub_iface *ci;
+
+ ci = &local->ci;
+
+ buffer = get_collsub_buf (ci, sizeof(GFC_REAL_10) * local->num_images);
+ this_image_buf = buffer + this_image.image_num;
+ *this_image_buf = *obj;
+
+ collsub_sync (ci);
+ for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++)
+ {
+ imoffset = 1 << cbit;
+ if (this_image.image_num + imoffset < local->num_images)
+ {
+ a = this_image_buf;
+ b = this_image_buf + imoffset;
+ *a += *b;
+ }
+ collsub_sync (ci);
+ }
+ for ( ; (local->num_images >> cbit) != 0; cbit++)
+ collsub_sync (ci);
+
+ if (!result_image || (*result_image - 1) == this_image.image_num)
+ *obj = *buffer;
+
+ finish_collective_subroutine (ci);
+
+}
+
+void nca_collsub_max_array_r10 (gfc_array_r10 * restrict array, int *result_image,
+ int *stat, char *errmsg, index_type errmsg_len);
+export_proto (nca_collsub_max_array_r10);
+
+void
+nca_collsub_max_array_r10 (gfc_array_r10 * restrict array, int *result_image,
+ int *stat __attribute__ ((unused)),
+ char *errmsg __attribute__ ((unused)),
+ index_type errmsg_len __attribute__ ((unused)))
+{
+ index_type count[GFC_MAX_DIMENSIONS];
+ index_type stride[GFC_MAX_DIMENSIONS];
+ index_type extent[GFC_MAX_DIMENSIONS];
+ GFC_REAL_10 *this_shared_ptr; /* Points to the shared memory allocated to this image. */
+ GFC_REAL_10 *buffer;
+ index_type dim;
+ bool packed;
+ index_type span;
+ index_type ssize, num_elems;
+ int cbit = 0;
+ int imoffset;
+ collsub_iface *ci;
+
+ ci = &local->ci;
+
+ dim = GFC_DESCRIPTOR_RANK (array);
+ ssize = sizeof (GFC_REAL_10);
+ packed = true;
+ span = array->span != 0 ? array->span : (index_type) sizeof (GFC_REAL_10);
+ for (index_type n = 0; n < dim; n++)
+ {
+ count[n] = 0;
+ stride[n] = GFC_DESCRIPTOR_STRIDE (array, n) * span;
+ extent[n] = GFC_DESCRIPTOR_EXTENT (array, n);
+
+ /* No-op for an empty array. */
+ if (extent[n] <= 0)
+ return;
+
+ if (ssize != stride[n])
+ packed = false;
+
+ ssize *= extent[n];
+ }
+
+ num_elems = ssize / sizeof (GFC_REAL_10);
+
+ buffer = get_collsub_buf (ci, ssize * local->num_images);
+ this_shared_ptr = buffer + this_image.image_num * num_elems;
+
+ if (packed)
+ memcpy (this_shared_ptr, array->base_addr, ssize);
+ else
+ {
+ char *src = (char *) array->base_addr;
+ GFC_REAL_10 *restrict dest = this_shared_ptr;
+ index_type stride0 = stride[0];
+
+ while (src)
+ {
+ /* Copy the data. */
+ *(dest++) = *((GFC_REAL_10 *) src);
+ src += stride0;
+ count[0] ++;
+ /* Advance to the next source element. */
+ for (index_type n = 0; count[n] == extent[n] ; )
+ {
+ /* When we get to the end of a dimension, reset it and increment
+ the next dimension. */
+ count[n] = 0;
+ src -= stride[n] * extent[n];
+ n++;
+ if (n == dim)
+ {
+ src = NULL;
+ break;
+ }
+ else
+ {
+ count[n]++;
+ src += stride[n];
+ }
+ }
+ }
+ }
+
+ collsub_sync (ci);
+
+ /* Reduce the array to image zero. Here the general scheme:
+
+ abababababab
+ a_b_a_b_a_b_
+ a___b___a___
+ a_______b___
+ r___________
+ */
+ for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++)
+ {
+ imoffset = 1 << cbit;
+ if (this_image.image_num + imoffset < local->num_images)
+ {
+ GFC_REAL_10 * other_shared_ptr; /* Points to the shared memory
+ allocated to another image. */
+ GFC_REAL_10 *a;
+ GFC_REAL_10 *b;
+
+ other_shared_ptr = this_shared_ptr + num_elems * imoffset;
+ for (index_type i = 0; i < num_elems; i++)
+ {
+ a = this_shared_ptr + i;
+ b = other_shared_ptr + i;
+ if (*b > *a)
+ *a = *b;
+ }
+ }
+ collsub_sync (ci);
+ }
+ for ( ; (local->num_images >> cbit) != 0; cbit++)
+ collsub_sync (ci);
+
+ if (!result_image || (*result_image - 1) == this_image.image_num)
+ {
+ if (packed)
+ memcpy (array->base_addr, buffer, ssize);
+ else
+ {
+ GFC_REAL_10 *src = buffer;
+ char * restrict dest = (char *) array->base_addr;
+ index_type stride0 = stride[0];
+
+ for (index_type n = 0; n < dim; n++)
+ count[n] = 0;
+
+ while (dest)
+ {
+ *((GFC_REAL_10 * ) dest) = *src++;
+ dest += stride0;
+ count[0] ++;
+ for (index_type n = 0; count[n] == extent[n] ;)
+ {
+ /* When we get to the end of a dimension, reset it and increment
+ the next dimension. */
+ count[n] = 0;
+ dest -= stride[n] * extent[n];
+ n++;
+ if (n == dim)
+ {
+ dest = NULL;
+ break;
+ }
+ else
+ {
+ count[n]++;
+ dest += stride[n];
+ }
+ }
+ }
+ }
+ }
+ finish_collective_subroutine (ci);
+}
+void nca_collsub_min_array_r10 (gfc_array_r10 * restrict array, int *result_image,
+ int *stat, char *errmsg, index_type errmsg_len);
+export_proto (nca_collsub_min_array_r10);
+
+void
+nca_collsub_min_array_r10 (gfc_array_r10 * restrict array, int *result_image,
+ int *stat __attribute__ ((unused)),
+ char *errmsg __attribute__ ((unused)),
+ index_type errmsg_len __attribute__ ((unused)))
+{
+ index_type count[GFC_MAX_DIMENSIONS];
+ index_type stride[GFC_MAX_DIMENSIONS];
+ index_type extent[GFC_MAX_DIMENSIONS];
+ GFC_REAL_10 *this_shared_ptr; /* Points to the shared memory allocated to this image. */
+ GFC_REAL_10 *buffer;
+ index_type dim;
+ bool packed;
+ index_type span;
+ index_type ssize, num_elems;
+ int cbit = 0;
+ int imoffset;
+ collsub_iface *ci;
+
+ ci = &local->ci;
+
+ dim = GFC_DESCRIPTOR_RANK (array);
+ ssize = sizeof (GFC_REAL_10);
+ packed = true;
+ span = array->span != 0 ? array->span : (index_type) sizeof (GFC_REAL_10);
+ for (index_type n = 0; n < dim; n++)
+ {
+ count[n] = 0;
+ stride[n] = GFC_DESCRIPTOR_STRIDE (array, n) * span;
+ extent[n] = GFC_DESCRIPTOR_EXTENT (array, n);
+
+ /* No-op for an empty array. */
+ if (extent[n] <= 0)
+ return;
+
+ if (ssize != stride[n])
+ packed = false;
+
+ ssize *= extent[n];
+ }
+
+ num_elems = ssize / sizeof (GFC_REAL_10);
+
+ buffer = get_collsub_buf (ci, ssize * local->num_images);
+ this_shared_ptr = buffer + this_image.image_num * num_elems;
+
+ if (packed)
+ memcpy (this_shared_ptr, array->base_addr, ssize);
+ else
+ {
+ char *src = (char *) array->base_addr;
+ GFC_REAL_10 *restrict dest = this_shared_ptr;
+ index_type stride0 = stride[0];
+
+ while (src)
+ {
+ /* Copy the data. */
+ *(dest++) = *((GFC_REAL_10 *) src);
+ src += stride0;
+ count[0] ++;
+ /* Advance to the next source element. */
+ for (index_type n = 0; count[n] == extent[n] ; )
+ {
+ /* When we get to the end of a dimension, reset it and increment
+ the next dimension. */
+ count[n] = 0;
+ src -= stride[n] * extent[n];
+ n++;
+ if (n == dim)
+ {
+ src = NULL;
+ break;
+ }
+ else
+ {
+ count[n]++;
+ src += stride[n];
+ }
+ }
+ }
+ }
+
+ collsub_sync (ci);
+
+ /* Reduce the array to image zero. Here the general scheme:
+
+ abababababab
+ a_b_a_b_a_b_
+ a___b___a___
+ a_______b___
+ r___________
+ */
+ for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++)
+ {
+ imoffset = 1 << cbit;
+ if (this_image.image_num + imoffset < local->num_images)
+ {
+ GFC_REAL_10 * other_shared_ptr; /* Points to the shared memory
+ allocated to another image. */
+ GFC_REAL_10 *a;
+ GFC_REAL_10 *b;
+
+ other_shared_ptr = this_shared_ptr + num_elems * imoffset;
+ for (index_type i = 0; i < num_elems; i++)
+ {
+ a = this_shared_ptr + i;
+ b = other_shared_ptr + i;
+ if (*b < *a)
+ *a = *b;
+ }
+ }
+ collsub_sync (ci);
+ }
+ for ( ; (local->num_images >> cbit) != 0; cbit++)
+ collsub_sync (ci);
+
+ if (!result_image || (*result_image - 1) == this_image.image_num)
+ {
+ if (packed)
+ memcpy (array->base_addr, buffer, ssize);
+ else
+ {
+ GFC_REAL_10 *src = buffer;
+ char * restrict dest = (char *) array->base_addr;
+ index_type stride0 = stride[0];
+
+ for (index_type n = 0; n < dim; n++)
+ count[n] = 0;
+
+ while (dest)
+ {
+ *((GFC_REAL_10 * ) dest) = *src++;
+ dest += stride0;
+ count[0] ++;
+ for (index_type n = 0; count[n] == extent[n] ;)
+ {
+ /* When we get to the end of a dimension, reset it and increment
+ the next dimension. */
+ count[n] = 0;
+ dest -= stride[n] * extent[n];
+ n++;
+ if (n == dim)
+ {
+ dest = NULL;
+ break;
+ }
+ else
+ {
+ count[n]++;
+ dest += stride[n];
+ }
+ }
+ }
+ }
+ }
+ finish_collective_subroutine (ci);
+}
+void nca_collsub_sum_array_r10 (gfc_array_r10 * restrict array, int *result_image,
+ int *stat, char *errmsg, index_type errmsg_len);
+export_proto (nca_collsub_sum_array_r10);
+
+void
+nca_collsub_sum_array_r10 (gfc_array_r10 * restrict array, int *result_image,
+ int *stat __attribute__ ((unused)),
+ char *errmsg __attribute__ ((unused)),
+ index_type errmsg_len __attribute__ ((unused)))
+{
+ index_type count[GFC_MAX_DIMENSIONS];
+ index_type stride[GFC_MAX_DIMENSIONS];
+ index_type extent[GFC_MAX_DIMENSIONS];
+ GFC_REAL_10 *this_shared_ptr; /* Points to the shared memory allocated to this image. */
+ GFC_REAL_10 *buffer;
+ index_type dim;
+ bool packed;
+ index_type span;
+ index_type ssize, num_elems;
+ int cbit = 0;
+ int imoffset;
+ collsub_iface *ci;
+
+ ci = &local->ci;
+
+ dim = GFC_DESCRIPTOR_RANK (array);
+ ssize = sizeof (GFC_REAL_10);
+ packed = true;
+ span = array->span != 0 ? array->span : (index_type) sizeof (GFC_REAL_10);
+ for (index_type n = 0; n < dim; n++)
+ {
+ count[n] = 0;
+ stride[n] = GFC_DESCRIPTOR_STRIDE (array, n) * span;
+ extent[n] = GFC_DESCRIPTOR_EXTENT (array, n);
+
+ /* No-op for an empty array. */
+ if (extent[n] <= 0)
+ return;
+
+ if (ssize != stride[n])
+ packed = false;
+
+ ssize *= extent[n];
+ }
+
+ num_elems = ssize / sizeof (GFC_REAL_10);
+
+ buffer = get_collsub_buf (ci, ssize * local->num_images);
+ this_shared_ptr = buffer + this_image.image_num * num_elems;
+
+ if (packed)
+ memcpy (this_shared_ptr, array->base_addr, ssize);
+ else
+ {
+ char *src = (char *) array->base_addr;
+ GFC_REAL_10 *restrict dest = this_shared_ptr;
+ index_type stride0 = stride[0];
+
+ while (src)
+ {
+ /* Copy the data. */
+ *(dest++) = *((GFC_REAL_10 *) src);
+ src += stride0;
+ count[0] ++;
+ /* Advance to the next source element. */
+ for (index_type n = 0; count[n] == extent[n] ; )
+ {
+ /* When we get to the end of a dimension, reset it and increment
+ the next dimension. */
+ count[n] = 0;
+ src -= stride[n] * extent[n];
+ n++;
+ if (n == dim)
+ {
+ src = NULL;
+ break;
+ }
+ else
+ {
+ count[n]++;
+ src += stride[n];
+ }
+ }
+ }
+ }
+
+ collsub_sync (ci);
+
+ /* Reduce the array to image zero. Here the general scheme:
+
+ abababababab
+ a_b_a_b_a_b_
+ a___b___a___
+ a_______b___
+ r___________
+ */
+ for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++)
+ {
+ imoffset = 1 << cbit;
+ if (this_image.image_num + imoffset < local->num_images)
+ {
+ GFC_REAL_10 * other_shared_ptr; /* Points to the shared memory
+ allocated to another image. */
+ GFC_REAL_10 *a;
+ GFC_REAL_10 *b;
+
+ other_shared_ptr = this_shared_ptr + num_elems * imoffset;
+ for (index_type i = 0; i < num_elems; i++)
+ {
+ a = this_shared_ptr + i;
+ b = other_shared_ptr + i;
+ *a += *b;
+ }
+ }
+ collsub_sync (ci);
+ }
+ for ( ; (local->num_images >> cbit) != 0; cbit++)
+ collsub_sync (ci);
+
+ if (!result_image || (*result_image - 1) == this_image.image_num)
+ {
+ if (packed)
+ memcpy (array->base_addr, buffer, ssize);
+ else
+ {
+ GFC_REAL_10 *src = buffer;
+ char * restrict dest = (char *) array->base_addr;
+ index_type stride0 = stride[0];
+
+ for (index_type n = 0; n < dim; n++)
+ count[n] = 0;
+
+ while (dest)
+ {
+ *((GFC_REAL_10 * ) dest) = *src++;
+ dest += stride0;
+ count[0] ++;
+ for (index_type n = 0; count[n] == extent[n] ;)
+ {
+ /* When we get to the end of a dimension, reset it and increment
+ the next dimension. */
+ count[n] = 0;
+ dest -= stride[n] * extent[n];
+ n++;
+ if (n == dim)
+ {
+ dest = NULL;
+ break;
+ }
+ else
+ {
+ count[n]++;
+ dest += stride[n];
+ }
+ }
+ }
+ }
+ }
+ finish_collective_subroutine (ci);
+}
+
+#endif
+
diff --git a/libgfortran/generated/nca_minmax_r16.c b/libgfortran/generated/nca_minmax_r16.c
new file mode 100644
index 0000000..a0a0a51
--- /dev/null
+++ b/libgfortran/generated/nca_minmax_r16.c
@@ -0,0 +1,653 @@
+/* Implementation of collective subroutines minmax.
+ Copyright (C) 2020 Free Software Foundation, Inc.
+ Contributed by Thomas Koenig <tkoenig@gcc.gnu.org>.
+
+This file is part of the GNU Fortran runtime library (libgfortran).
+
+Libgfortran is free software; you can redistribute it and/or
+modify it under the terms of the GNU General Public
+License as published by the Free Software Foundation; either
+version 3 of the License, or (at your option) any later version.
+
+Libgfortran is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+<http://www.gnu.org/licenses/>. */
+
+
+#include "libgfortran.h"
+
+#if defined (HAVE_GFC_REAL_16)
+#include <string.h>
+#include "../nca/libcoarraynative.h"
+#include "../nca/collective_subroutine.h"
+#include "../nca/collective_inline.h"
+
+void nca_collsub_max_scalar_r16 (GFC_REAL_16 *obj, int *result_image,
+ int *stat, char *errmsg, index_type errmsg_len);
+export_proto(nca_collsub_max_scalar_r16);
+
+void
+nca_collsub_max_scalar_r16 (GFC_REAL_16 *obj, int *result_image,
+ int *stat __attribute__ ((unused)),
+ char *errmsg __attribute__ ((unused)),
+ index_type errmsg_len __attribute__ ((unused)))
+{
+ int cbit = 0;
+ int imoffset;
+ GFC_REAL_16 *a, *b;
+ GFC_REAL_16 *buffer, *this_image_buf;
+ collsub_iface *ci;
+
+ ci = &local->ci;
+
+ buffer = get_collsub_buf (ci, sizeof(GFC_REAL_16) * local->num_images);
+ this_image_buf = buffer + this_image.image_num;
+ *this_image_buf = *obj;
+
+ collsub_sync (ci);
+ for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++)
+ {
+ imoffset = 1 << cbit;
+ if (this_image.image_num + imoffset < local->num_images)
+ {
+ a = this_image_buf;
+ b = this_image_buf + imoffset;
+ if (*b > *a)
+ *a = *b;
+ }
+ collsub_sync (ci);
+ }
+ for ( ; (local->num_images >> cbit) != 0; cbit++)
+ collsub_sync (ci);
+
+ if (!result_image || (*result_image - 1) == this_image.image_num)
+ *obj = *buffer;
+
+ finish_collective_subroutine (ci);
+
+}
+
+void nca_collsub_min_scalar_r16 (GFC_REAL_16 *obj, int *result_image,
+ int *stat, char *errmsg, index_type errmsg_len);
+export_proto(nca_collsub_min_scalar_r16);
+
+void
+nca_collsub_min_scalar_r16 (GFC_REAL_16 *obj, int *result_image,
+ int *stat __attribute__ ((unused)),
+ char *errmsg __attribute__ ((unused)),
+ index_type errmsg_len __attribute__ ((unused)))
+{
+ int cbit = 0;
+ int imoffset;
+ GFC_REAL_16 *a, *b;
+ GFC_REAL_16 *buffer, *this_image_buf;
+ collsub_iface *ci;
+
+ ci = &local->ci;
+
+ buffer = get_collsub_buf (ci, sizeof(GFC_REAL_16) * local->num_images);
+ this_image_buf = buffer + this_image.image_num;
+ *this_image_buf = *obj;
+
+ collsub_sync (ci);
+ for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++)
+ {
+ imoffset = 1 << cbit;
+ if (this_image.image_num + imoffset < local->num_images)
+ {
+ a = this_image_buf;
+ b = this_image_buf + imoffset;
+ if (*b < *a)
+ *a = *b;
+ }
+ collsub_sync (ci);
+ }
+ for ( ; (local->num_images >> cbit) != 0; cbit++)
+ collsub_sync (ci);
+
+ if (!result_image || (*result_image - 1) == this_image.image_num)
+ *obj = *buffer;
+
+ finish_collective_subroutine (ci);
+
+}
+
+void nca_collsub_sum_scalar_r16 (GFC_REAL_16 *obj, int *result_image,
+ int *stat, char *errmsg, index_type errmsg_len);
+export_proto(nca_collsub_sum_scalar_r16);
+
+void
+nca_collsub_sum_scalar_r16 (GFC_REAL_16 *obj, int *result_image,
+ int *stat __attribute__ ((unused)),
+ char *errmsg __attribute__ ((unused)),
+ index_type errmsg_len __attribute__ ((unused)))
+{
+ int cbit = 0;
+ int imoffset;
+ GFC_REAL_16 *a, *b;
+ GFC_REAL_16 *buffer, *this_image_buf;
+ collsub_iface *ci;
+
+ ci = &local->ci;
+
+ buffer = get_collsub_buf (ci, sizeof(GFC_REAL_16) * local->num_images);
+ this_image_buf = buffer + this_image.image_num;
+ *this_image_buf = *obj;
+
+ collsub_sync (ci);
+ for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++)
+ {
+ imoffset = 1 << cbit;
+ if (this_image.image_num + imoffset < local->num_images)
+ {
+ a = this_image_buf;
+ b = this_image_buf + imoffset;
+ *a += *b;
+ }
+ collsub_sync (ci);
+ }
+ for ( ; (local->num_images >> cbit) != 0; cbit++)
+ collsub_sync (ci);
+
+ if (!result_image || (*result_image - 1) == this_image.image_num)
+ *obj = *buffer;
+
+ finish_collective_subroutine (ci);
+
+}
+
+void nca_collsub_max_array_r16 (gfc_array_r16 * restrict array, int *result_image,
+ int *stat, char *errmsg, index_type errmsg_len);
+export_proto (nca_collsub_max_array_r16);
+
+void
+nca_collsub_max_array_r16 (gfc_array_r16 * restrict array, int *result_image,
+ int *stat __attribute__ ((unused)),
+ char *errmsg __attribute__ ((unused)),
+ index_type errmsg_len __attribute__ ((unused)))
+{
+ index_type count[GFC_MAX_DIMENSIONS];
+ index_type stride[GFC_MAX_DIMENSIONS];
+ index_type extent[GFC_MAX_DIMENSIONS];
+ GFC_REAL_16 *this_shared_ptr; /* Points to the shared memory allocated to this image. */
+ GFC_REAL_16 *buffer;
+ index_type dim;
+ bool packed;
+ index_type span;
+ index_type ssize, num_elems;
+ int cbit = 0;
+ int imoffset;
+ collsub_iface *ci;
+
+ ci = &local->ci;
+
+ dim = GFC_DESCRIPTOR_RANK (array);
+ ssize = sizeof (GFC_REAL_16);
+ packed = true;
+ span = array->span != 0 ? array->span : (index_type) sizeof (GFC_REAL_16);
+ for (index_type n = 0; n < dim; n++)
+ {
+ count[n] = 0;
+ stride[n] = GFC_DESCRIPTOR_STRIDE (array, n) * span;
+ extent[n] = GFC_DESCRIPTOR_EXTENT (array, n);
+
+ /* No-op for an empty array. */
+ if (extent[n] <= 0)
+ return;
+
+ if (ssize != stride[n])
+ packed = false;
+
+ ssize *= extent[n];
+ }
+
+ num_elems = ssize / sizeof (GFC_REAL_16);
+
+ buffer = get_collsub_buf (ci, ssize * local->num_images);
+ this_shared_ptr = buffer + this_image.image_num * num_elems;
+
+ if (packed)
+ memcpy (this_shared_ptr, array->base_addr, ssize);
+ else
+ {
+ char *src = (char *) array->base_addr;
+ GFC_REAL_16 *restrict dest = this_shared_ptr;
+ index_type stride0 = stride[0];
+
+ while (src)
+ {
+ /* Copy the data. */
+ *(dest++) = *((GFC_REAL_16 *) src);
+ src += stride0;
+ count[0] ++;
+ /* Advance to the next source element. */
+ for (index_type n = 0; count[n] == extent[n] ; )
+ {
+ /* When we get to the end of a dimension, reset it and increment
+ the next dimension. */
+ count[n] = 0;
+ src -= stride[n] * extent[n];
+ n++;
+ if (n == dim)
+ {
+ src = NULL;
+ break;
+ }
+ else
+ {
+ count[n]++;
+ src += stride[n];
+ }
+ }
+ }
+ }
+
+ collsub_sync (ci);
+
+ /* Reduce the array to image zero. Here the general scheme:
+
+ abababababab
+ a_b_a_b_a_b_
+ a___b___a___
+ a_______b___
+ r___________
+ */
+ for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++)
+ {
+ imoffset = 1 << cbit;
+ if (this_image.image_num + imoffset < local->num_images)
+ {
+ GFC_REAL_16 * other_shared_ptr; /* Points to the shared memory
+ allocated to another image. */
+ GFC_REAL_16 *a;
+ GFC_REAL_16 *b;
+
+ other_shared_ptr = this_shared_ptr + num_elems * imoffset;
+ for (index_type i = 0; i < num_elems; i++)
+ {
+ a = this_shared_ptr + i;
+ b = other_shared_ptr + i;
+ if (*b > *a)
+ *a = *b;
+ }
+ }
+ collsub_sync (ci);
+ }
+ for ( ; (local->num_images >> cbit) != 0; cbit++)
+ collsub_sync (ci);
+
+ if (!result_image || (*result_image - 1) == this_image.image_num)
+ {
+ if (packed)
+ memcpy (array->base_addr, buffer, ssize);
+ else
+ {
+ GFC_REAL_16 *src = buffer;
+ char * restrict dest = (char *) array->base_addr;
+ index_type stride0 = stride[0];
+
+ for (index_type n = 0; n < dim; n++)
+ count[n] = 0;
+
+ while (dest)
+ {
+ *((GFC_REAL_16 * ) dest) = *src++;
+ dest += stride0;
+ count[0] ++;
+ for (index_type n = 0; count[n] == extent[n] ;)
+ {
+ /* When we get to the end of a dimension, reset it and increment
+ the next dimension. */
+ count[n] = 0;
+ dest -= stride[n] * extent[n];
+ n++;
+ if (n == dim)
+ {
+ dest = NULL;
+ break;
+ }
+ else
+ {
+ count[n]++;
+ dest += stride[n];
+ }
+ }
+ }
+ }
+ }
+ finish_collective_subroutine (ci);
+}
+void nca_collsub_min_array_r16 (gfc_array_r16 * restrict array, int *result_image,
+ int *stat, char *errmsg, index_type errmsg_len);
+export_proto (nca_collsub_min_array_r16);
+
+void
+nca_collsub_min_array_r16 (gfc_array_r16 * restrict array, int *result_image,
+ int *stat __attribute__ ((unused)),
+ char *errmsg __attribute__ ((unused)),
+ index_type errmsg_len __attribute__ ((unused)))
+{
+ index_type count[GFC_MAX_DIMENSIONS];
+ index_type stride[GFC_MAX_DIMENSIONS];
+ index_type extent[GFC_MAX_DIMENSIONS];
+ GFC_REAL_16 *this_shared_ptr; /* Points to the shared memory allocated to this image. */
+ GFC_REAL_16 *buffer;
+ index_type dim;
+ bool packed;
+ index_type span;
+ index_type ssize, num_elems;
+ int cbit = 0;
+ int imoffset;
+ collsub_iface *ci;
+
+ ci = &local->ci;
+
+ dim = GFC_DESCRIPTOR_RANK (array);
+ ssize = sizeof (GFC_REAL_16);
+ packed = true;
+ span = array->span != 0 ? array->span : (index_type) sizeof (GFC_REAL_16);
+ for (index_type n = 0; n < dim; n++)
+ {
+ count[n] = 0;
+ stride[n] = GFC_DESCRIPTOR_STRIDE (array, n) * span;
+ extent[n] = GFC_DESCRIPTOR_EXTENT (array, n);
+
+ /* No-op for an empty array. */
+ if (extent[n] <= 0)
+ return;
+
+ if (ssize != stride[n])
+ packed = false;
+
+ ssize *= extent[n];
+ }
+
+ num_elems = ssize / sizeof (GFC_REAL_16);
+
+ buffer = get_collsub_buf (ci, ssize * local->num_images);
+ this_shared_ptr = buffer + this_image.image_num * num_elems;
+
+ if (packed)
+ memcpy (this_shared_ptr, array->base_addr, ssize);
+ else
+ {
+ char *src = (char *) array->base_addr;
+ GFC_REAL_16 *restrict dest = this_shared_ptr;
+ index_type stride0 = stride[0];
+
+ while (src)
+ {
+ /* Copy the data. */
+ *(dest++) = *((GFC_REAL_16 *) src);
+ src += stride0;
+ count[0] ++;
+ /* Advance to the next source element. */
+ for (index_type n = 0; count[n] == extent[n] ; )
+ {
+ /* When we get to the end of a dimension, reset it and increment
+ the next dimension. */
+ count[n] = 0;
+ src -= stride[n] * extent[n];
+ n++;
+ if (n == dim)
+ {
+ src = NULL;
+ break;
+ }
+ else
+ {
+ count[n]++;
+ src += stride[n];
+ }
+ }
+ }
+ }
+
+ collsub_sync (ci);
+
+ /* Reduce the array to image zero. Here the general scheme:
+
+ abababababab
+ a_b_a_b_a_b_
+ a___b___a___
+ a_______b___
+ r___________
+ */
+ for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++)
+ {
+ imoffset = 1 << cbit;
+ if (this_image.image_num + imoffset < local->num_images)
+ {
+ GFC_REAL_16 * other_shared_ptr; /* Points to the shared memory
+ allocated to another image. */
+ GFC_REAL_16 *a;
+ GFC_REAL_16 *b;
+
+ other_shared_ptr = this_shared_ptr + num_elems * imoffset;
+ for (index_type i = 0; i < num_elems; i++)
+ {
+ a = this_shared_ptr + i;
+ b = other_shared_ptr + i;
+ if (*b < *a)
+ *a = *b;
+ }
+ }
+ collsub_sync (ci);
+ }
+ for ( ; (local->num_images >> cbit) != 0; cbit++)
+ collsub_sync (ci);
+
+ if (!result_image || (*result_image - 1) == this_image.image_num)
+ {
+ if (packed)
+ memcpy (array->base_addr, buffer, ssize);
+ else
+ {
+ GFC_REAL_16 *src = buffer;
+ char * restrict dest = (char *) array->base_addr;
+ index_type stride0 = stride[0];
+
+ for (index_type n = 0; n < dim; n++)
+ count[n] = 0;
+
+ while (dest)
+ {
+ *((GFC_REAL_16 * ) dest) = *src++;
+ dest += stride0;
+ count[0] ++;
+ for (index_type n = 0; count[n] == extent[n] ;)
+ {
+ /* When we get to the end of a dimension, reset it and increment
+ the next dimension. */
+ count[n] = 0;
+ dest -= stride[n] * extent[n];
+ n++;
+ if (n == dim)
+ {
+ dest = NULL;
+ break;
+ }
+ else
+ {
+ count[n]++;
+ dest += stride[n];
+ }
+ }
+ }
+ }
+ }
+ finish_collective_subroutine (ci);
+}
+void nca_collsub_sum_array_r16 (gfc_array_r16 * restrict array, int *result_image,
+ int *stat, char *errmsg, index_type errmsg_len);
+export_proto (nca_collsub_sum_array_r16);
+
+void
+nca_collsub_sum_array_r16 (gfc_array_r16 * restrict array, int *result_image,
+ int *stat __attribute__ ((unused)),
+ char *errmsg __attribute__ ((unused)),
+ index_type errmsg_len __attribute__ ((unused)))
+{
+ index_type count[GFC_MAX_DIMENSIONS];
+ index_type stride[GFC_MAX_DIMENSIONS];
+ index_type extent[GFC_MAX_DIMENSIONS];
+ GFC_REAL_16 *this_shared_ptr; /* Points to the shared memory allocated to this image. */
+ GFC_REAL_16 *buffer;
+ index_type dim;
+ bool packed;
+ index_type span;
+ index_type ssize, num_elems;
+ int cbit = 0;
+ int imoffset;
+ collsub_iface *ci;
+
+ ci = &local->ci;
+
+ dim = GFC_DESCRIPTOR_RANK (array);
+ ssize = sizeof (GFC_REAL_16);
+ packed = true;
+ span = array->span != 0 ? array->span : (index_type) sizeof (GFC_REAL_16);
+ for (index_type n = 0; n < dim; n++)
+ {
+ count[n] = 0;
+ stride[n] = GFC_DESCRIPTOR_STRIDE (array, n) * span;
+ extent[n] = GFC_DESCRIPTOR_EXTENT (array, n);
+
+ /* No-op for an empty array. */
+ if (extent[n] <= 0)
+ return;
+
+ if (ssize != stride[n])
+ packed = false;
+
+ ssize *= extent[n];
+ }
+
+ num_elems = ssize / sizeof (GFC_REAL_16);
+
+ buffer = get_collsub_buf (ci, ssize * local->num_images);
+ this_shared_ptr = buffer + this_image.image_num * num_elems;
+
+ if (packed)
+ memcpy (this_shared_ptr, array->base_addr, ssize);
+ else
+ {
+ char *src = (char *) array->base_addr;
+ GFC_REAL_16 *restrict dest = this_shared_ptr;
+ index_type stride0 = stride[0];
+
+ while (src)
+ {
+ /* Copy the data. */
+ *(dest++) = *((GFC_REAL_16 *) src);
+ src += stride0;
+ count[0] ++;
+ /* Advance to the next source element. */
+ for (index_type n = 0; count[n] == extent[n] ; )
+ {
+ /* When we get to the end of a dimension, reset it and increment
+ the next dimension. */
+ count[n] = 0;
+ src -= stride[n] * extent[n];
+ n++;
+ if (n == dim)
+ {
+ src = NULL;
+ break;
+ }
+ else
+ {
+ count[n]++;
+ src += stride[n];
+ }
+ }
+ }
+ }
+
+ collsub_sync (ci);
+
+ /* Reduce the array to image zero. Here the general scheme:
+
+ abababababab
+ a_b_a_b_a_b_
+ a___b___a___
+ a_______b___
+ r___________
+ */
+ for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++)
+ {
+ imoffset = 1 << cbit;
+ if (this_image.image_num + imoffset < local->num_images)
+ {
+ GFC_REAL_16 * other_shared_ptr; /* Points to the shared memory
+ allocated to another image. */
+ GFC_REAL_16 *a;
+ GFC_REAL_16 *b;
+
+ other_shared_ptr = this_shared_ptr + num_elems * imoffset;
+ for (index_type i = 0; i < num_elems; i++)
+ {
+ a = this_shared_ptr + i;
+ b = other_shared_ptr + i;
+ *a += *b;
+ }
+ }
+ collsub_sync (ci);
+ }
+ for ( ; (local->num_images >> cbit) != 0; cbit++)
+ collsub_sync (ci);
+
+ if (!result_image || (*result_image - 1) == this_image.image_num)
+ {
+ if (packed)
+ memcpy (array->base_addr, buffer, ssize);
+ else
+ {
+ GFC_REAL_16 *src = buffer;
+ char * restrict dest = (char *) array->base_addr;
+ index_type stride0 = stride[0];
+
+ for (index_type n = 0; n < dim; n++)
+ count[n] = 0;
+
+ while (dest)
+ {
+ *((GFC_REAL_16 * ) dest) = *src++;
+ dest += stride0;
+ count[0] ++;
+ for (index_type n = 0; count[n] == extent[n] ;)
+ {
+ /* When we get to the end of a dimension, reset it and increment
+ the next dimension. */
+ count[n] = 0;
+ dest -= stride[n] * extent[n];
+ n++;
+ if (n == dim)
+ {
+ dest = NULL;
+ break;
+ }
+ else
+ {
+ count[n]++;
+ dest += stride[n];
+ }
+ }
+ }
+ }
+ }
+ finish_collective_subroutine (ci);
+}
+
+#endif
+
diff --git a/libgfortran/generated/nca_minmax_r4.c b/libgfortran/generated/nca_minmax_r4.c
new file mode 100644
index 0000000..0eb3f1b
--- /dev/null
+++ b/libgfortran/generated/nca_minmax_r4.c
@@ -0,0 +1,653 @@
+/* Implementation of collective subroutines minmax.
+ Copyright (C) 2020 Free Software Foundation, Inc.
+ Contributed by Thomas Koenig <tkoenig@gcc.gnu.org>.
+
+This file is part of the GNU Fortran runtime library (libgfortran).
+
+Libgfortran is free software; you can redistribute it and/or
+modify it under the terms of the GNU General Public
+License as published by the Free Software Foundation; either
+version 3 of the License, or (at your option) any later version.
+
+Libgfortran is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+<http://www.gnu.org/licenses/>. */
+
+
+#include "libgfortran.h"
+
+#if defined (HAVE_GFC_REAL_4)
+#include <string.h>
+#include "../nca/libcoarraynative.h"
+#include "../nca/collective_subroutine.h"
+#include "../nca/collective_inline.h"
+
+void nca_collsub_max_scalar_r4 (GFC_REAL_4 *obj, int *result_image,
+ int *stat, char *errmsg, index_type errmsg_len);
+export_proto(nca_collsub_max_scalar_r4);
+
+void
+nca_collsub_max_scalar_r4 (GFC_REAL_4 *obj, int *result_image,
+ int *stat __attribute__ ((unused)),
+ char *errmsg __attribute__ ((unused)),
+ index_type errmsg_len __attribute__ ((unused)))
+{
+ int cbit = 0;
+ int imoffset;
+ GFC_REAL_4 *a, *b;
+ GFC_REAL_4 *buffer, *this_image_buf;
+ collsub_iface *ci;
+
+ ci = &local->ci;
+
+ buffer = get_collsub_buf (ci, sizeof(GFC_REAL_4) * local->num_images);
+ this_image_buf = buffer + this_image.image_num;
+ *this_image_buf = *obj;
+
+ collsub_sync (ci);
+ for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++)
+ {
+ imoffset = 1 << cbit;
+ if (this_image.image_num + imoffset < local->num_images)
+ {
+ a = this_image_buf;
+ b = this_image_buf + imoffset;
+ if (*b > *a)
+ *a = *b;
+ }
+ collsub_sync (ci);
+ }
+ for ( ; (local->num_images >> cbit) != 0; cbit++)
+ collsub_sync (ci);
+
+ if (!result_image || (*result_image - 1) == this_image.image_num)
+ *obj = *buffer;
+
+ finish_collective_subroutine (ci);
+
+}
+
+void nca_collsub_min_scalar_r4 (GFC_REAL_4 *obj, int *result_image,
+ int *stat, char *errmsg, index_type errmsg_len);
+export_proto(nca_collsub_min_scalar_r4);
+
+void
+nca_collsub_min_scalar_r4 (GFC_REAL_4 *obj, int *result_image,
+ int *stat __attribute__ ((unused)),
+ char *errmsg __attribute__ ((unused)),
+ index_type errmsg_len __attribute__ ((unused)))
+{
+ int cbit = 0;
+ int imoffset;
+ GFC_REAL_4 *a, *b;
+ GFC_REAL_4 *buffer, *this_image_buf;
+ collsub_iface *ci;
+
+ ci = &local->ci;
+
+ buffer = get_collsub_buf (ci, sizeof(GFC_REAL_4) * local->num_images);
+ this_image_buf = buffer + this_image.image_num;
+ *this_image_buf = *obj;
+
+ collsub_sync (ci);
+ for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++)
+ {
+ imoffset = 1 << cbit;
+ if (this_image.image_num + imoffset < local->num_images)
+ {
+ a = this_image_buf;
+ b = this_image_buf + imoffset;
+ if (*b < *a)
+ *a = *b;
+ }
+ collsub_sync (ci);
+ }
+ for ( ; (local->num_images >> cbit) != 0; cbit++)
+ collsub_sync (ci);
+
+ if (!result_image || (*result_image - 1) == this_image.image_num)
+ *obj = *buffer;
+
+ finish_collective_subroutine (ci);
+
+}
+
+void nca_collsub_sum_scalar_r4 (GFC_REAL_4 *obj, int *result_image,
+ int *stat, char *errmsg, index_type errmsg_len);
+export_proto(nca_collsub_sum_scalar_r4);
+
+void
+nca_collsub_sum_scalar_r4 (GFC_REAL_4 *obj, int *result_image,
+ int *stat __attribute__ ((unused)),
+ char *errmsg __attribute__ ((unused)),
+ index_type errmsg_len __attribute__ ((unused)))
+{
+ int cbit = 0;
+ int imoffset;
+ GFC_REAL_4 *a, *b;
+ GFC_REAL_4 *buffer, *this_image_buf;
+ collsub_iface *ci;
+
+ ci = &local->ci;
+
+ buffer = get_collsub_buf (ci, sizeof(GFC_REAL_4) * local->num_images);
+ this_image_buf = buffer + this_image.image_num;
+ *this_image_buf = *obj;
+
+ collsub_sync (ci);
+ for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++)
+ {
+ imoffset = 1 << cbit;
+ if (this_image.image_num + imoffset < local->num_images)
+ {
+ a = this_image_buf;
+ b = this_image_buf + imoffset;
+ *a += *b;
+ }
+ collsub_sync (ci);
+ }
+ for ( ; (local->num_images >> cbit) != 0; cbit++)
+ collsub_sync (ci);
+
+ if (!result_image || (*result_image - 1) == this_image.image_num)
+ *obj = *buffer;
+
+ finish_collective_subroutine (ci);
+
+}
+
+void nca_collsub_max_array_r4 (gfc_array_r4 * restrict array, int *result_image,
+ int *stat, char *errmsg, index_type errmsg_len);
+export_proto (nca_collsub_max_array_r4);
+
+void
+nca_collsub_max_array_r4 (gfc_array_r4 * restrict array, int *result_image,
+ int *stat __attribute__ ((unused)),
+ char *errmsg __attribute__ ((unused)),
+ index_type errmsg_len __attribute__ ((unused)))
+{
+ index_type count[GFC_MAX_DIMENSIONS];
+ index_type stride[GFC_MAX_DIMENSIONS];
+ index_type extent[GFC_MAX_DIMENSIONS];
+ GFC_REAL_4 *this_shared_ptr; /* Points to the shared memory allocated to this image. */
+ GFC_REAL_4 *buffer;
+ index_type dim;
+ bool packed;
+ index_type span;
+ index_type ssize, num_elems;
+ int cbit = 0;
+ int imoffset;
+ collsub_iface *ci;
+
+ ci = &local->ci;
+
+ dim = GFC_DESCRIPTOR_RANK (array);
+ ssize = sizeof (GFC_REAL_4);
+ packed = true;
+ span = array->span != 0 ? array->span : (index_type) sizeof (GFC_REAL_4);
+ for (index_type n = 0; n < dim; n++)
+ {
+ count[n] = 0;
+ stride[n] = GFC_DESCRIPTOR_STRIDE (array, n) * span;
+ extent[n] = GFC_DESCRIPTOR_EXTENT (array, n);
+
+ /* No-op for an empty array. */
+ if (extent[n] <= 0)
+ return;
+
+ if (ssize != stride[n])
+ packed = false;
+
+ ssize *= extent[n];
+ }
+
+ num_elems = ssize / sizeof (GFC_REAL_4);
+
+ buffer = get_collsub_buf (ci, ssize * local->num_images);
+ this_shared_ptr = buffer + this_image.image_num * num_elems;
+
+ if (packed)
+ memcpy (this_shared_ptr, array->base_addr, ssize);
+ else
+ {
+ char *src = (char *) array->base_addr;
+ GFC_REAL_4 *restrict dest = this_shared_ptr;
+ index_type stride0 = stride[0];
+
+ while (src)
+ {
+ /* Copy the data. */
+ *(dest++) = *((GFC_REAL_4 *) src);
+ src += stride0;
+ count[0] ++;
+ /* Advance to the next source element. */
+ for (index_type n = 0; count[n] == extent[n] ; )
+ {
+ /* When we get to the end of a dimension, reset it and increment
+ the next dimension. */
+ count[n] = 0;
+ src -= stride[n] * extent[n];
+ n++;
+ if (n == dim)
+ {
+ src = NULL;
+ break;
+ }
+ else
+ {
+ count[n]++;
+ src += stride[n];
+ }
+ }
+ }
+ }
+
+ collsub_sync (ci);
+
+ /* Reduce the array to image zero. Here the general scheme:
+
+ abababababab
+ a_b_a_b_a_b_
+ a___b___a___
+ a_______b___
+ r___________
+ */
+ for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++)
+ {
+ imoffset = 1 << cbit;
+ if (this_image.image_num + imoffset < local->num_images)
+ {
+ GFC_REAL_4 * other_shared_ptr; /* Points to the shared memory
+ allocated to another image. */
+ GFC_REAL_4 *a;
+ GFC_REAL_4 *b;
+
+ other_shared_ptr = this_shared_ptr + num_elems * imoffset;
+ for (index_type i = 0; i < num_elems; i++)
+ {
+ a = this_shared_ptr + i;
+ b = other_shared_ptr + i;
+ if (*b > *a)
+ *a = *b;
+ }
+ }
+ collsub_sync (ci);
+ }
+ for ( ; (local->num_images >> cbit) != 0; cbit++)
+ collsub_sync (ci);
+
+ if (!result_image || (*result_image - 1) == this_image.image_num)
+ {
+ if (packed)
+ memcpy (array->base_addr, buffer, ssize);
+ else
+ {
+ GFC_REAL_4 *src = buffer;
+ char * restrict dest = (char *) array->base_addr;
+ index_type stride0 = stride[0];
+
+ for (index_type n = 0; n < dim; n++)
+ count[n] = 0;
+
+ while (dest)
+ {
+ *((GFC_REAL_4 * ) dest) = *src++;
+ dest += stride0;
+ count[0] ++;
+ for (index_type n = 0; count[n] == extent[n] ;)
+ {
+ /* When we get to the end of a dimension, reset it and increment
+ the next dimension. */
+ count[n] = 0;
+ dest -= stride[n] * extent[n];
+ n++;
+ if (n == dim)
+ {
+ dest = NULL;
+ break;
+ }
+ else
+ {
+ count[n]++;
+ dest += stride[n];
+ }
+ }
+ }
+ }
+ }
+ finish_collective_subroutine (ci);
+}
+void nca_collsub_min_array_r4 (gfc_array_r4 * restrict array, int *result_image,
+ int *stat, char *errmsg, index_type errmsg_len);
+export_proto (nca_collsub_min_array_r4);
+
+void
+nca_collsub_min_array_r4 (gfc_array_r4 * restrict array, int *result_image,
+ int *stat __attribute__ ((unused)),
+ char *errmsg __attribute__ ((unused)),
+ index_type errmsg_len __attribute__ ((unused)))
+{
+ index_type count[GFC_MAX_DIMENSIONS];
+ index_type stride[GFC_MAX_DIMENSIONS];
+ index_type extent[GFC_MAX_DIMENSIONS];
+ GFC_REAL_4 *this_shared_ptr; /* Points to the shared memory allocated to this image. */
+ GFC_REAL_4 *buffer;
+ index_type dim;
+ bool packed;
+ index_type span;
+ index_type ssize, num_elems;
+ int cbit = 0;
+ int imoffset;
+ collsub_iface *ci;
+
+ ci = &local->ci;
+
+ dim = GFC_DESCRIPTOR_RANK (array);
+ ssize = sizeof (GFC_REAL_4);
+ packed = true;
+ span = array->span != 0 ? array->span : (index_type) sizeof (GFC_REAL_4);
+ for (index_type n = 0; n < dim; n++)
+ {
+ count[n] = 0;
+ stride[n] = GFC_DESCRIPTOR_STRIDE (array, n) * span;
+ extent[n] = GFC_DESCRIPTOR_EXTENT (array, n);
+
+ /* No-op for an empty array. */
+ if (extent[n] <= 0)
+ return;
+
+ if (ssize != stride[n])
+ packed = false;
+
+ ssize *= extent[n];
+ }
+
+ num_elems = ssize / sizeof (GFC_REAL_4);
+
+ buffer = get_collsub_buf (ci, ssize * local->num_images);
+ this_shared_ptr = buffer + this_image.image_num * num_elems;
+
+ if (packed)
+ memcpy (this_shared_ptr, array->base_addr, ssize);
+ else
+ {
+ char *src = (char *) array->base_addr;
+ GFC_REAL_4 *restrict dest = this_shared_ptr;
+ index_type stride0 = stride[0];
+
+ while (src)
+ {
+ /* Copy the data. */
+ *(dest++) = *((GFC_REAL_4 *) src);
+ src += stride0;
+ count[0] ++;
+ /* Advance to the next source element. */
+ for (index_type n = 0; count[n] == extent[n] ; )
+ {
+ /* When we get to the end of a dimension, reset it and increment
+ the next dimension. */
+ count[n] = 0;
+ src -= stride[n] * extent[n];
+ n++;
+ if (n == dim)
+ {
+ src = NULL;
+ break;
+ }
+ else
+ {
+ count[n]++;
+ src += stride[n];
+ }
+ }
+ }
+ }
+
+ collsub_sync (ci);
+
+ /* Reduce the array to image zero. Here the general scheme:
+
+ abababababab
+ a_b_a_b_a_b_
+ a___b___a___
+ a_______b___
+ r___________
+ */
+ for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++)
+ {
+ imoffset = 1 << cbit;
+ if (this_image.image_num + imoffset < local->num_images)
+ {
+ GFC_REAL_4 * other_shared_ptr; /* Points to the shared memory
+ allocated to another image. */
+ GFC_REAL_4 *a;
+ GFC_REAL_4 *b;
+
+ other_shared_ptr = this_shared_ptr + num_elems * imoffset;
+ for (index_type i = 0; i < num_elems; i++)
+ {
+ a = this_shared_ptr + i;
+ b = other_shared_ptr + i;
+ if (*b < *a)
+ *a = *b;
+ }
+ }
+ collsub_sync (ci);
+ }
+ for ( ; (local->num_images >> cbit) != 0; cbit++)
+ collsub_sync (ci);
+
+ if (!result_image || (*result_image - 1) == this_image.image_num)
+ {
+ if (packed)
+ memcpy (array->base_addr, buffer, ssize);
+ else
+ {
+ GFC_REAL_4 *src = buffer;
+ char * restrict dest = (char *) array->base_addr;
+ index_type stride0 = stride[0];
+
+ for (index_type n = 0; n < dim; n++)
+ count[n] = 0;
+
+ while (dest)
+ {
+ *((GFC_REAL_4 * ) dest) = *src++;
+ dest += stride0;
+ count[0] ++;
+ for (index_type n = 0; count[n] == extent[n] ;)
+ {
+ /* When we get to the end of a dimension, reset it and increment
+ the next dimension. */
+ count[n] = 0;
+ dest -= stride[n] * extent[n];
+ n++;
+ if (n == dim)
+ {
+ dest = NULL;
+ break;
+ }
+ else
+ {
+ count[n]++;
+ dest += stride[n];
+ }
+ }
+ }
+ }
+ }
+ finish_collective_subroutine (ci);
+}
+void nca_collsub_sum_array_r4 (gfc_array_r4 * restrict array, int *result_image,
+ int *stat, char *errmsg, index_type errmsg_len);
+export_proto (nca_collsub_sum_array_r4);
+
+void
+nca_collsub_sum_array_r4 (gfc_array_r4 * restrict array, int *result_image,
+ int *stat __attribute__ ((unused)),
+ char *errmsg __attribute__ ((unused)),
+ index_type errmsg_len __attribute__ ((unused)))
+{
+ index_type count[GFC_MAX_DIMENSIONS];
+ index_type stride[GFC_MAX_DIMENSIONS];
+ index_type extent[GFC_MAX_DIMENSIONS];
+ GFC_REAL_4 *this_shared_ptr; /* Points to the shared memory allocated to this image. */
+ GFC_REAL_4 *buffer;
+ index_type dim;
+ bool packed;
+ index_type span;
+ index_type ssize, num_elems;
+ int cbit = 0;
+ int imoffset;
+ collsub_iface *ci;
+
+ ci = &local->ci;
+
+ dim = GFC_DESCRIPTOR_RANK (array);
+ ssize = sizeof (GFC_REAL_4);
+ packed = true;
+ span = array->span != 0 ? array->span : (index_type) sizeof (GFC_REAL_4);
+ for (index_type n = 0; n < dim; n++)
+ {
+ count[n] = 0;
+ stride[n] = GFC_DESCRIPTOR_STRIDE (array, n) * span;
+ extent[n] = GFC_DESCRIPTOR_EXTENT (array, n);
+
+ /* No-op for an empty array. */
+ if (extent[n] <= 0)
+ return;
+
+ if (ssize != stride[n])
+ packed = false;
+
+ ssize *= extent[n];
+ }
+
+ num_elems = ssize / sizeof (GFC_REAL_4);
+
+ buffer = get_collsub_buf (ci, ssize * local->num_images);
+ this_shared_ptr = buffer + this_image.image_num * num_elems;
+
+ if (packed)
+ memcpy (this_shared_ptr, array->base_addr, ssize);
+ else
+ {
+ char *src = (char *) array->base_addr;
+ GFC_REAL_4 *restrict dest = this_shared_ptr;
+ index_type stride0 = stride[0];
+
+ while (src)
+ {
+ /* Copy the data. */
+ *(dest++) = *((GFC_REAL_4 *) src);
+ src += stride0;
+ count[0] ++;
+ /* Advance to the next source element. */
+ for (index_type n = 0; count[n] == extent[n] ; )
+ {
+ /* When we get to the end of a dimension, reset it and increment
+ the next dimension. */
+ count[n] = 0;
+ src -= stride[n] * extent[n];
+ n++;
+ if (n == dim)
+ {
+ src = NULL;
+ break;
+ }
+ else
+ {
+ count[n]++;
+ src += stride[n];
+ }
+ }
+ }
+ }
+
+ collsub_sync (ci);
+
+ /* Reduce the array to image zero. Here the general scheme:
+
+ abababababab
+ a_b_a_b_a_b_
+ a___b___a___
+ a_______b___
+ r___________
+ */
+ for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++)
+ {
+ imoffset = 1 << cbit;
+ if (this_image.image_num + imoffset < local->num_images)
+ {
+ GFC_REAL_4 * other_shared_ptr; /* Points to the shared memory
+ allocated to another image. */
+ GFC_REAL_4 *a;
+ GFC_REAL_4 *b;
+
+ other_shared_ptr = this_shared_ptr + num_elems * imoffset;
+ for (index_type i = 0; i < num_elems; i++)
+ {
+ a = this_shared_ptr + i;
+ b = other_shared_ptr + i;
+ *a += *b;
+ }
+ }
+ collsub_sync (ci);
+ }
+ for ( ; (local->num_images >> cbit) != 0; cbit++)
+ collsub_sync (ci);
+
+ if (!result_image || (*result_image - 1) == this_image.image_num)
+ {
+ if (packed)
+ memcpy (array->base_addr, buffer, ssize);
+ else
+ {
+ GFC_REAL_4 *src = buffer;
+ char * restrict dest = (char *) array->base_addr;
+ index_type stride0 = stride[0];
+
+ for (index_type n = 0; n < dim; n++)
+ count[n] = 0;
+
+ while (dest)
+ {
+ *((GFC_REAL_4 * ) dest) = *src++;
+ dest += stride0;
+ count[0] ++;
+ for (index_type n = 0; count[n] == extent[n] ;)
+ {
+ /* When we get to the end of a dimension, reset it and increment
+ the next dimension. */
+ count[n] = 0;
+ dest -= stride[n] * extent[n];
+ n++;
+ if (n == dim)
+ {
+ dest = NULL;
+ break;
+ }
+ else
+ {
+ count[n]++;
+ dest += stride[n];
+ }
+ }
+ }
+ }
+ }
+ finish_collective_subroutine (ci);
+}
+
+#endif
+
diff --git a/libgfortran/generated/nca_minmax_r8.c b/libgfortran/generated/nca_minmax_r8.c
new file mode 100644
index 0000000..3b3e962
--- /dev/null
+++ b/libgfortran/generated/nca_minmax_r8.c
@@ -0,0 +1,653 @@
+/* Implementation of collective subroutines minmax.
+ Copyright (C) 2020 Free Software Foundation, Inc.
+ Contributed by Thomas Koenig <tkoenig@gcc.gnu.org>.
+
+This file is part of the GNU Fortran runtime library (libgfortran).
+
+Libgfortran is free software; you can redistribute it and/or
+modify it under the terms of the GNU General Public
+License as published by the Free Software Foundation; either
+version 3 of the License, or (at your option) any later version.
+
+Libgfortran is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+<http://www.gnu.org/licenses/>. */
+
+
+#include "libgfortran.h"
+
+#if defined (HAVE_GFC_REAL_8)
+#include <string.h>
+#include "../nca/libcoarraynative.h"
+#include "../nca/collective_subroutine.h"
+#include "../nca/collective_inline.h"
+
+void nca_collsub_max_scalar_r8 (GFC_REAL_8 *obj, int *result_image,
+ int *stat, char *errmsg, index_type errmsg_len);
+export_proto(nca_collsub_max_scalar_r8);
+
+void
+nca_collsub_max_scalar_r8 (GFC_REAL_8 *obj, int *result_image,
+ int *stat __attribute__ ((unused)),
+ char *errmsg __attribute__ ((unused)),
+ index_type errmsg_len __attribute__ ((unused)))
+{
+ int cbit = 0;
+ int imoffset;
+ GFC_REAL_8 *a, *b;
+ GFC_REAL_8 *buffer, *this_image_buf;
+ collsub_iface *ci;
+
+ ci = &local->ci;
+
+ buffer = get_collsub_buf (ci, sizeof(GFC_REAL_8) * local->num_images);
+ this_image_buf = buffer + this_image.image_num;
+ *this_image_buf = *obj;
+
+ collsub_sync (ci);
+ for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++)
+ {
+ imoffset = 1 << cbit;
+ if (this_image.image_num + imoffset < local->num_images)
+ {
+ a = this_image_buf;
+ b = this_image_buf + imoffset;
+ if (*b > *a)
+ *a = *b;
+ }
+ collsub_sync (ci);
+ }
+ for ( ; (local->num_images >> cbit) != 0; cbit++)
+ collsub_sync (ci);
+
+ if (!result_image || (*result_image - 1) == this_image.image_num)
+ *obj = *buffer;
+
+ finish_collective_subroutine (ci);
+
+}
+
+void nca_collsub_min_scalar_r8 (GFC_REAL_8 *obj, int *result_image,
+ int *stat, char *errmsg, index_type errmsg_len);
+export_proto(nca_collsub_min_scalar_r8);
+
+void
+nca_collsub_min_scalar_r8 (GFC_REAL_8 *obj, int *result_image,
+ int *stat __attribute__ ((unused)),
+ char *errmsg __attribute__ ((unused)),
+ index_type errmsg_len __attribute__ ((unused)))
+{
+ int cbit = 0;
+ int imoffset;
+ GFC_REAL_8 *a, *b;
+ GFC_REAL_8 *buffer, *this_image_buf;
+ collsub_iface *ci;
+
+ ci = &local->ci;
+
+ buffer = get_collsub_buf (ci, sizeof(GFC_REAL_8) * local->num_images);
+ this_image_buf = buffer + this_image.image_num;
+ *this_image_buf = *obj;
+
+ collsub_sync (ci);
+ for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++)
+ {
+ imoffset = 1 << cbit;
+ if (this_image.image_num + imoffset < local->num_images)
+ {
+ a = this_image_buf;
+ b = this_image_buf + imoffset;
+ if (*b < *a)
+ *a = *b;
+ }
+ collsub_sync (ci);
+ }
+ for ( ; (local->num_images >> cbit) != 0; cbit++)
+ collsub_sync (ci);
+
+ if (!result_image || (*result_image - 1) == this_image.image_num)
+ *obj = *buffer;
+
+ finish_collective_subroutine (ci);
+
+}
+
+void nca_collsub_sum_scalar_r8 (GFC_REAL_8 *obj, int *result_image,
+ int *stat, char *errmsg, index_type errmsg_len);
+export_proto(nca_collsub_sum_scalar_r8);
+
+void
+nca_collsub_sum_scalar_r8 (GFC_REAL_8 *obj, int *result_image,
+ int *stat __attribute__ ((unused)),
+ char *errmsg __attribute__ ((unused)),
+ index_type errmsg_len __attribute__ ((unused)))
+{
+ int cbit = 0;
+ int imoffset;
+ GFC_REAL_8 *a, *b;
+ GFC_REAL_8 *buffer, *this_image_buf;
+ collsub_iface *ci;
+
+ ci = &local->ci;
+
+ buffer = get_collsub_buf (ci, sizeof(GFC_REAL_8) * local->num_images);
+ this_image_buf = buffer + this_image.image_num;
+ *this_image_buf = *obj;
+
+ collsub_sync (ci);
+ for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++)
+ {
+ imoffset = 1 << cbit;
+ if (this_image.image_num + imoffset < local->num_images)
+ {
+ a = this_image_buf;
+ b = this_image_buf + imoffset;
+ *a += *b;
+ }
+ collsub_sync (ci);
+ }
+ for ( ; (local->num_images >> cbit) != 0; cbit++)
+ collsub_sync (ci);
+
+ if (!result_image || (*result_image - 1) == this_image.image_num)
+ *obj = *buffer;
+
+ finish_collective_subroutine (ci);
+
+}
+
+void nca_collsub_max_array_r8 (gfc_array_r8 * restrict array, int *result_image,
+ int *stat, char *errmsg, index_type errmsg_len);
+export_proto (nca_collsub_max_array_r8);
+
+void
+nca_collsub_max_array_r8 (gfc_array_r8 * restrict array, int *result_image,
+ int *stat __attribute__ ((unused)),
+ char *errmsg __attribute__ ((unused)),
+ index_type errmsg_len __attribute__ ((unused)))
+{
+ index_type count[GFC_MAX_DIMENSIONS];
+ index_type stride[GFC_MAX_DIMENSIONS];
+ index_type extent[GFC_MAX_DIMENSIONS];
+ GFC_REAL_8 *this_shared_ptr; /* Points to the shared memory allocated to this image. */
+ GFC_REAL_8 *buffer;
+ index_type dim;
+ bool packed;
+ index_type span;
+ index_type ssize, num_elems;
+ int cbit = 0;
+ int imoffset;
+ collsub_iface *ci;
+
+ ci = &local->ci;
+
+ dim = GFC_DESCRIPTOR_RANK (array);
+ ssize = sizeof (GFC_REAL_8);
+ packed = true;
+ span = array->span != 0 ? array->span : (index_type) sizeof (GFC_REAL_8);
+ for (index_type n = 0; n < dim; n++)
+ {
+ count[n] = 0;
+ stride[n] = GFC_DESCRIPTOR_STRIDE (array, n) * span;
+ extent[n] = GFC_DESCRIPTOR_EXTENT (array, n);
+
+ /* No-op for an empty array. */
+ if (extent[n] <= 0)
+ return;
+
+ if (ssize != stride[n])
+ packed = false;
+
+ ssize *= extent[n];
+ }
+
+ num_elems = ssize / sizeof (GFC_REAL_8);
+
+ buffer = get_collsub_buf (ci, ssize * local->num_images);
+ this_shared_ptr = buffer + this_image.image_num * num_elems;
+
+ if (packed)
+ memcpy (this_shared_ptr, array->base_addr, ssize);
+ else
+ {
+ char *src = (char *) array->base_addr;
+ GFC_REAL_8 *restrict dest = this_shared_ptr;
+ index_type stride0 = stride[0];
+
+ while (src)
+ {
+ /* Copy the data. */
+ *(dest++) = *((GFC_REAL_8 *) src);
+ src += stride0;
+ count[0] ++;
+ /* Advance to the next source element. */
+ for (index_type n = 0; count[n] == extent[n] ; )
+ {
+ /* When we get to the end of a dimension, reset it and increment
+ the next dimension. */
+ count[n] = 0;
+ src -= stride[n] * extent[n];
+ n++;
+ if (n == dim)
+ {
+ src = NULL;
+ break;
+ }
+ else
+ {
+ count[n]++;
+ src += stride[n];
+ }
+ }
+ }
+ }
+
+ collsub_sync (ci);
+
+ /* Reduce the array to image zero. Here the general scheme:
+
+ abababababab
+ a_b_a_b_a_b_
+ a___b___a___
+ a_______b___
+ r___________
+ */
+ for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++)
+ {
+ imoffset = 1 << cbit;
+ if (this_image.image_num + imoffset < local->num_images)
+ {
+ GFC_REAL_8 * other_shared_ptr; /* Points to the shared memory
+ allocated to another image. */
+ GFC_REAL_8 *a;
+ GFC_REAL_8 *b;
+
+ other_shared_ptr = this_shared_ptr + num_elems * imoffset;
+ for (index_type i = 0; i < num_elems; i++)
+ {
+ a = this_shared_ptr + i;
+ b = other_shared_ptr + i;
+ if (*b > *a)
+ *a = *b;
+ }
+ }
+ collsub_sync (ci);
+ }
+ for ( ; (local->num_images >> cbit) != 0; cbit++)
+ collsub_sync (ci);
+
+ if (!result_image || (*result_image - 1) == this_image.image_num)
+ {
+ if (packed)
+ memcpy (array->base_addr, buffer, ssize);
+ else
+ {
+ GFC_REAL_8 *src = buffer;
+ char * restrict dest = (char *) array->base_addr;
+ index_type stride0 = stride[0];
+
+ for (index_type n = 0; n < dim; n++)
+ count[n] = 0;
+
+ while (dest)
+ {
+ *((GFC_REAL_8 * ) dest) = *src++;
+ dest += stride0;
+ count[0] ++;
+ for (index_type n = 0; count[n] == extent[n] ;)
+ {
+ /* When we get to the end of a dimension, reset it and increment
+ the next dimension. */
+ count[n] = 0;
+ dest -= stride[n] * extent[n];
+ n++;
+ if (n == dim)
+ {
+ dest = NULL;
+ break;
+ }
+ else
+ {
+ count[n]++;
+ dest += stride[n];
+ }
+ }
+ }
+ }
+ }
+ finish_collective_subroutine (ci);
+}
+void nca_collsub_min_array_r8 (gfc_array_r8 * restrict array, int *result_image,
+ int *stat, char *errmsg, index_type errmsg_len);
+export_proto (nca_collsub_min_array_r8);
+
+void
+nca_collsub_min_array_r8 (gfc_array_r8 * restrict array, int *result_image,
+ int *stat __attribute__ ((unused)),
+ char *errmsg __attribute__ ((unused)),
+ index_type errmsg_len __attribute__ ((unused)))
+{
+ index_type count[GFC_MAX_DIMENSIONS];
+ index_type stride[GFC_MAX_DIMENSIONS];
+ index_type extent[GFC_MAX_DIMENSIONS];
+ GFC_REAL_8 *this_shared_ptr; /* Points to the shared memory allocated to this image. */
+ GFC_REAL_8 *buffer;
+ index_type dim;
+ bool packed;
+ index_type span;
+ index_type ssize, num_elems;
+ int cbit = 0;
+ int imoffset;
+ collsub_iface *ci;
+
+ ci = &local->ci;
+
+ dim = GFC_DESCRIPTOR_RANK (array);
+ ssize = sizeof (GFC_REAL_8);
+ packed = true;
+ span = array->span != 0 ? array->span : (index_type) sizeof (GFC_REAL_8);
+ for (index_type n = 0; n < dim; n++)
+ {
+ count[n] = 0;
+ stride[n] = GFC_DESCRIPTOR_STRIDE (array, n) * span;
+ extent[n] = GFC_DESCRIPTOR_EXTENT (array, n);
+
+ /* No-op for an empty array. */
+ if (extent[n] <= 0)
+ return;
+
+ if (ssize != stride[n])
+ packed = false;
+
+ ssize *= extent[n];
+ }
+
+ num_elems = ssize / sizeof (GFC_REAL_8);
+
+ buffer = get_collsub_buf (ci, ssize * local->num_images);
+ this_shared_ptr = buffer + this_image.image_num * num_elems;
+
+ if (packed)
+ memcpy (this_shared_ptr, array->base_addr, ssize);
+ else
+ {
+ char *src = (char *) array->base_addr;
+ GFC_REAL_8 *restrict dest = this_shared_ptr;
+ index_type stride0 = stride[0];
+
+ while (src)
+ {
+ /* Copy the data. */
+ *(dest++) = *((GFC_REAL_8 *) src);
+ src += stride0;
+ count[0] ++;
+ /* Advance to the next source element. */
+ for (index_type n = 0; count[n] == extent[n] ; )
+ {
+ /* When we get to the end of a dimension, reset it and increment
+ the next dimension. */
+ count[n] = 0;
+ src -= stride[n] * extent[n];
+ n++;
+ if (n == dim)
+ {
+ src = NULL;
+ break;
+ }
+ else
+ {
+ count[n]++;
+ src += stride[n];
+ }
+ }
+ }
+ }
+
+ collsub_sync (ci);
+
+ /* Reduce the array to image zero. Here the general scheme:
+
+ abababababab
+ a_b_a_b_a_b_
+ a___b___a___
+ a_______b___
+ r___________
+ */
+ for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++)
+ {
+ imoffset = 1 << cbit;
+ if (this_image.image_num + imoffset < local->num_images)
+ {
+ GFC_REAL_8 * other_shared_ptr; /* Points to the shared memory
+ allocated to another image. */
+ GFC_REAL_8 *a;
+ GFC_REAL_8 *b;
+
+ other_shared_ptr = this_shared_ptr + num_elems * imoffset;
+ for (index_type i = 0; i < num_elems; i++)
+ {
+ a = this_shared_ptr + i;
+ b = other_shared_ptr + i;
+ if (*b < *a)
+ *a = *b;
+ }
+ }
+ collsub_sync (ci);
+ }
+ for ( ; (local->num_images >> cbit) != 0; cbit++)
+ collsub_sync (ci);
+
+ if (!result_image || (*result_image - 1) == this_image.image_num)
+ {
+ if (packed)
+ memcpy (array->base_addr, buffer, ssize);
+ else
+ {
+ GFC_REAL_8 *src = buffer;
+ char * restrict dest = (char *) array->base_addr;
+ index_type stride0 = stride[0];
+
+ for (index_type n = 0; n < dim; n++)
+ count[n] = 0;
+
+ while (dest)
+ {
+ *((GFC_REAL_8 * ) dest) = *src++;
+ dest += stride0;
+ count[0] ++;
+ for (index_type n = 0; count[n] == extent[n] ;)
+ {
+ /* When we get to the end of a dimension, reset it and increment
+ the next dimension. */
+ count[n] = 0;
+ dest -= stride[n] * extent[n];
+ n++;
+ if (n == dim)
+ {
+ dest = NULL;
+ break;
+ }
+ else
+ {
+ count[n]++;
+ dest += stride[n];
+ }
+ }
+ }
+ }
+ }
+ finish_collective_subroutine (ci);
+}
+void nca_collsub_sum_array_r8 (gfc_array_r8 * restrict array, int *result_image,
+ int *stat, char *errmsg, index_type errmsg_len);
+export_proto (nca_collsub_sum_array_r8);
+
+void
+nca_collsub_sum_array_r8 (gfc_array_r8 * restrict array, int *result_image,
+ int *stat __attribute__ ((unused)),
+ char *errmsg __attribute__ ((unused)),
+ index_type errmsg_len __attribute__ ((unused)))
+{
+ index_type count[GFC_MAX_DIMENSIONS];
+ index_type stride[GFC_MAX_DIMENSIONS];
+ index_type extent[GFC_MAX_DIMENSIONS];
+ GFC_REAL_8 *this_shared_ptr; /* Points to the shared memory allocated to this image. */
+ GFC_REAL_8 *buffer;
+ index_type dim;
+ bool packed;
+ index_type span;
+ index_type ssize, num_elems;
+ int cbit = 0;
+ int imoffset;
+ collsub_iface *ci;
+
+ ci = &local->ci;
+
+ dim = GFC_DESCRIPTOR_RANK (array);
+ ssize = sizeof (GFC_REAL_8);
+ packed = true;
+ span = array->span != 0 ? array->span : (index_type) sizeof (GFC_REAL_8);
+ for (index_type n = 0; n < dim; n++)
+ {
+ count[n] = 0;
+ stride[n] = GFC_DESCRIPTOR_STRIDE (array, n) * span;
+ extent[n] = GFC_DESCRIPTOR_EXTENT (array, n);
+
+ /* No-op for an empty array. */
+ if (extent[n] <= 0)
+ return;
+
+ if (ssize != stride[n])
+ packed = false;
+
+ ssize *= extent[n];
+ }
+
+ num_elems = ssize / sizeof (GFC_REAL_8);
+
+ buffer = get_collsub_buf (ci, ssize * local->num_images);
+ this_shared_ptr = buffer + this_image.image_num * num_elems;
+
+ if (packed)
+ memcpy (this_shared_ptr, array->base_addr, ssize);
+ else
+ {
+ char *src = (char *) array->base_addr;
+ GFC_REAL_8 *restrict dest = this_shared_ptr;
+ index_type stride0 = stride[0];
+
+ while (src)
+ {
+ /* Copy the data. */
+ *(dest++) = *((GFC_REAL_8 *) src);
+ src += stride0;
+ count[0] ++;
+ /* Advance to the next source element. */
+ for (index_type n = 0; count[n] == extent[n] ; )
+ {
+ /* When we get to the end of a dimension, reset it and increment
+ the next dimension. */
+ count[n] = 0;
+ src -= stride[n] * extent[n];
+ n++;
+ if (n == dim)
+ {
+ src = NULL;
+ break;
+ }
+ else
+ {
+ count[n]++;
+ src += stride[n];
+ }
+ }
+ }
+ }
+
+ collsub_sync (ci);
+
+ /* Reduce the array to image zero. Here the general scheme:
+
+ abababababab
+ a_b_a_b_a_b_
+ a___b___a___
+ a_______b___
+ r___________
+ */
+ for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++)
+ {
+ imoffset = 1 << cbit;
+ if (this_image.image_num + imoffset < local->num_images)
+ {
+ GFC_REAL_8 * other_shared_ptr; /* Points to the shared memory
+ allocated to another image. */
+ GFC_REAL_8 *a;
+ GFC_REAL_8 *b;
+
+ other_shared_ptr = this_shared_ptr + num_elems * imoffset;
+ for (index_type i = 0; i < num_elems; i++)
+ {
+ a = this_shared_ptr + i;
+ b = other_shared_ptr + i;
+ *a += *b;
+ }
+ }
+ collsub_sync (ci);
+ }
+ for ( ; (local->num_images >> cbit) != 0; cbit++)
+ collsub_sync (ci);
+
+ if (!result_image || (*result_image - 1) == this_image.image_num)
+ {
+ if (packed)
+ memcpy (array->base_addr, buffer, ssize);
+ else
+ {
+ GFC_REAL_8 *src = buffer;
+ char * restrict dest = (char *) array->base_addr;
+ index_type stride0 = stride[0];
+
+ for (index_type n = 0; n < dim; n++)
+ count[n] = 0;
+
+ while (dest)
+ {
+ *((GFC_REAL_8 * ) dest) = *src++;
+ dest += stride0;
+ count[0] ++;
+ for (index_type n = 0; count[n] == extent[n] ;)
+ {
+ /* When we get to the end of a dimension, reset it and increment
+ the next dimension. */
+ count[n] = 0;
+ dest -= stride[n] * extent[n];
+ n++;
+ if (n == dim)
+ {
+ dest = NULL;
+ break;
+ }
+ else
+ {
+ count[n]++;
+ dest += stride[n];
+ }
+ }
+ }
+ }
+ }
+ finish_collective_subroutine (ci);
+}
+
+#endif
+
diff --git a/libgfortran/generated/nca_minmax_s1.c b/libgfortran/generated/nca_minmax_s1.c
new file mode 100644
index 0000000..b081452
--- /dev/null
+++ b/libgfortran/generated/nca_minmax_s1.c
@@ -0,0 +1,494 @@
+/* Implementation of collective subroutines minmax.
+ Copyright (C) 2020 Free Software Foundation, Inc.
+ Contributed by Thomas Koenig <tkoenig@gcc.gnu.org>.
+
+This file is part of the GNU Fortran runtime library (libgfortran).
+
+Libgfortran is free software; you can redistribute it and/or
+modify it under the terms of the GNU General Public
+License as published by the Free Software Foundation; either
+version 3 of the License, or (at your option) any later version.
+
+Libgfortran is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+<http://www.gnu.org/licenses/>. */
+
+
+
+#include "libgfortran.h"
+
+#if defined (HAVE_GFC_UINTEGER_1)
+#include <string.h>
+#include "../nca/libcoarraynative.h"
+#include "../nca/collective_subroutine.h"
+#include "../nca/collective_inline.h"
+
+#if 1 == 4
+
+/* Compare wide character types, which are handled internally as
+ unsigned 4-byte integers. */
+static inline int
+memcmp4 (const void *a, const void *b, size_t len)
+{
+ const GFC_UINTEGER_4 *pa = a;
+ const GFC_UINTEGER_4 *pb = b;
+ while (len-- > 0)
+ {
+ if (*pa != *pb)
+ return *pa < *pb ? -1 : 1;
+ pa ++;
+ pb ++;
+ }
+ return 0;
+}
+
+#endif
+void nca_collsub_max_scalar_s1 (GFC_UINTEGER_1 *obj, int *result_image,
+ int *stat, char *errmsg, index_type char_len, index_type errmsg_len);
+export_proto(nca_collsub_max_scalar_s1);
+
+void
+nca_collsub_max_scalar_s1 (GFC_UINTEGER_1 *obj, int *result_image,
+ int *stat __attribute__ ((unused)),
+ char *errmsg __attribute__ ((unused)),
+ index_type char_len,
+ index_type errmsg_len __attribute__ ((unused)))
+{
+ int cbit = 0;
+ int imoffset;
+ GFC_UINTEGER_1 *a, *b;
+ GFC_UINTEGER_1 *buffer, *this_image_buf;
+ collsub_iface *ci;
+ index_type type_size;
+
+ ci = &local->ci;
+
+ type_size = char_len * sizeof (GFC_UINTEGER_1);
+ buffer = get_collsub_buf (ci, type_size * local->num_images);
+ this_image_buf = buffer + this_image.image_num * char_len;
+ memcpy (this_image_buf, obj, type_size);
+
+ collsub_sync (ci);
+ for (; ((this_image.image_num >> cbit) & 1) == 0
+ && (local->num_images >> cbit) != 0; cbit++)
+ {
+ imoffset = 1 << cbit;
+ if (this_image.image_num + imoffset < local->num_images)
+ {
+ a = this_image_buf;
+ b = this_image_buf + imoffset * char_len;
+ if (memcmp (b, a, char_len) > 0)
+ memcpy (a, b, type_size);
+ }
+ collsub_sync (ci);
+ }
+ for ( ; (local->num_images >> cbit) != 0; cbit++)
+ collsub_sync (ci);
+
+ if (!result_image || (*result_image - 1) == this_image.image_num)
+ memcpy (obj, buffer, type_size);
+
+ finish_collective_subroutine (ci);
+
+}
+
+void nca_collsub_min_scalar_s1 (GFC_UINTEGER_1 *obj, int *result_image,
+ int *stat, char *errmsg, index_type char_len, index_type errmsg_len);
+export_proto(nca_collsub_min_scalar_s1);
+
+void
+nca_collsub_min_scalar_s1 (GFC_UINTEGER_1 *obj, int *result_image,
+ int *stat __attribute__ ((unused)),
+ char *errmsg __attribute__ ((unused)),
+ index_type char_len,
+ index_type errmsg_len __attribute__ ((unused)))
+{
+ int cbit = 0;
+ int imoffset;
+ GFC_UINTEGER_1 *a, *b;
+ GFC_UINTEGER_1 *buffer, *this_image_buf;
+ collsub_iface *ci;
+ index_type type_size;
+
+ ci = &local->ci;
+
+ type_size = char_len * sizeof (GFC_UINTEGER_1);
+ buffer = get_collsub_buf (ci, type_size * local->num_images);
+ this_image_buf = buffer + this_image.image_num * char_len;
+ memcpy (this_image_buf, obj, type_size);
+
+ collsub_sync (ci);
+ for (; ((this_image.image_num >> cbit) & 1) == 0
+ && (local->num_images >> cbit) != 0; cbit++)
+ {
+ imoffset = 1 << cbit;
+ if (this_image.image_num + imoffset < local->num_images)
+ {
+ a = this_image_buf;
+ b = this_image_buf + imoffset * char_len;
+ if (memcmp (b, a, char_len) < 0)
+ memcpy (a, b, type_size);
+ }
+ collsub_sync (ci);
+ }
+ for ( ; (local->num_images >> cbit) != 0; cbit++)
+ collsub_sync (ci);
+
+ if (!result_image || (*result_image - 1) == this_image.image_num)
+ memcpy (obj, buffer, type_size);
+
+ finish_collective_subroutine (ci);
+
+}
+
+void nca_collsub_max_array_s1 (gfc_array_s1 * restrict array, int *result_image,
+ int *stat, char *errmsg, index_type char_len,
+ index_type errmsg_len);
+export_proto (nca_collsub_max_array_s1);
+
+void
+nca_collsub_max_array_s1 (gfc_array_s1 * restrict array, int *result_image,
+ int *stat __attribute__ ((unused)),
+ char *errmsg __attribute__ ((unused)),
+ index_type char_len,
+ index_type errmsg_len __attribute__ ((unused)))
+{
+ index_type count[GFC_MAX_DIMENSIONS];
+ index_type stride[GFC_MAX_DIMENSIONS]; /* stride is byte-based here. */
+ index_type extent[GFC_MAX_DIMENSIONS];
+ char *this_shared_ptr; /* Points to the shared memory allocated to this image. */
+ char *buffer;
+ index_type dim;
+ bool packed;
+ index_type span;
+ index_type ssize, num_elems;
+ int cbit = 0;
+ int imoffset;
+ index_type type_size;
+ collsub_iface *ci;
+
+ ci = &local->ci;
+
+ type_size = char_len * sizeof (GFC_UINTEGER_1);
+ dim = GFC_DESCRIPTOR_RANK (array);
+ num_elems = 1;
+ ssize = type_size;
+ packed = true;
+ span = array->span != 0 ? array->span : type_size;
+ for (index_type n = 0; n < dim; n++)
+ {
+ count[n] = 0;
+ stride[n] = GFC_DESCRIPTOR_STRIDE (array, n) * span;
+ extent[n] = GFC_DESCRIPTOR_EXTENT (array, n);
+
+ /* No-op for an empty array. */
+ if (extent[n] <= 0)
+ return;
+
+ if (num_elems != GFC_DESCRIPTOR_STRIDE (array,n))
+ packed = false;
+
+ num_elems *= extent[n];
+ }
+
+ ssize = num_elems * type_size;
+ buffer = get_collsub_buf (ci, ssize * local->num_images);
+ this_shared_ptr = buffer + this_image.image_num * ssize;
+
+ if (packed)
+ memcpy (this_shared_ptr, array->base_addr, ssize);
+ else
+ {
+ char *src = (char *) array->base_addr;
+ char *restrict dest = this_shared_ptr;
+ index_type stride0 = stride[0];
+
+ while (src)
+ {
+ /* Copy the data. */
+
+ memcpy (dest, src, type_size);
+ dest += type_size;
+ src += stride0;
+ count[0] ++;
+ /* Advance to the next source element. */
+ for (index_type n = 0; count[n] == extent[n] ; )
+ {
+ /* When we get to the end of a dimension, reset it and increment
+ the next dimension. */
+ count[n] = 0;
+ src -= stride[n] * extent[n];
+ n++;
+ if (n == dim)
+ {
+ src = NULL;
+ break;
+ }
+ else
+ {
+ count[n]++;
+ src += stride[n];
+ }
+ }
+ }
+ }
+
+ collsub_sync (ci);
+
+ /* Reduce the array to image zero. Here the general scheme:
+
+ abababababab
+ a_b_a_b_a_b_
+ a___b___a___
+ a_______b___
+ r___________
+ */
+ for (; ((this_image.image_num >> cbit) & 1) == 0
+ && (local->num_images >> cbit) != 0; cbit++)
+ {
+ imoffset = 1 << cbit;
+ if (this_image.image_num + imoffset < local->num_images)
+ {
+ char *other_shared_ptr; /* Points to the shared memory
+ allocated to another image. */
+ GFC_UINTEGER_1 *a;
+ GFC_UINTEGER_1 *b;
+
+ other_shared_ptr = this_shared_ptr + imoffset * ssize;
+ for (index_type i = 0; i < num_elems; i++)
+ {
+ a = (GFC_UINTEGER_1 *) (this_shared_ptr + i * type_size);
+ b = (GFC_UINTEGER_1 *) (other_shared_ptr + i * type_size);
+ if (memcmp (b, a, char_len) > 0)
+ memcpy (a, b, type_size);
+ }
+ }
+ collsub_sync (ci);
+ }
+ for ( ; (local->num_images >> cbit) != 0; cbit++)
+ collsub_sync (ci);
+
+ if (!result_image || (*result_image - 1) == this_image.image_num)
+ {
+ if (packed)
+ memcpy (array->base_addr, buffer, ssize);
+ else
+ {
+ char *src = buffer;
+ char *restrict dest = (char *) array->base_addr;
+ index_type stride0 = stride[0];
+
+ for (index_type n = 0; n < dim; n++)
+ count[n] = 0;
+
+ while (dest)
+ {
+ memcpy (dest, src, type_size);
+ src += span;
+ dest += stride0;
+ count[0] ++;
+ for (index_type n = 0; count[n] == extent[n] ;)
+ {
+ /* When we get to the end of a dimension, reset it and increment
+ the next dimension. */
+ count[n] = 0;
+ dest -= stride[n] * extent[n];
+ n++;
+ if (n == dim)
+ {
+ dest = NULL;
+ break;
+ }
+ else
+ {
+ count[n]++;
+ dest += stride[n];
+ }
+ }
+ }
+ }
+ }
+ finish_collective_subroutine (ci);
+}
+void nca_collsub_min_array_s1 (gfc_array_s1 * restrict array, int *result_image,
+ int *stat, char *errmsg, index_type char_len,
+ index_type errmsg_len);
+export_proto (nca_collsub_min_array_s1);
+
+void
+nca_collsub_min_array_s1 (gfc_array_s1 * restrict array, int *result_image,
+ int *stat __attribute__ ((unused)),
+ char *errmsg __attribute__ ((unused)),
+ index_type char_len,
+ index_type errmsg_len __attribute__ ((unused)))
+{
+ index_type count[GFC_MAX_DIMENSIONS];
+ index_type stride[GFC_MAX_DIMENSIONS]; /* stride is byte-based here. */
+ index_type extent[GFC_MAX_DIMENSIONS];
+ char *this_shared_ptr; /* Points to the shared memory allocated to this image. */
+ char *buffer;
+ index_type dim;
+ bool packed;
+ index_type span;
+ index_type ssize, num_elems;
+ int cbit = 0;
+ int imoffset;
+ index_type type_size;
+ collsub_iface *ci;
+
+ ci = &local->ci;
+
+ type_size = char_len * sizeof (GFC_UINTEGER_1);
+ dim = GFC_DESCRIPTOR_RANK (array);
+ num_elems = 1;
+ ssize = type_size;
+ packed = true;
+ span = array->span != 0 ? array->span : type_size;
+ for (index_type n = 0; n < dim; n++)
+ {
+ count[n] = 0;
+ stride[n] = GFC_DESCRIPTOR_STRIDE (array, n) * span;
+ extent[n] = GFC_DESCRIPTOR_EXTENT (array, n);
+
+ /* No-op for an empty array. */
+ if (extent[n] <= 0)
+ return;
+
+ if (num_elems != GFC_DESCRIPTOR_STRIDE (array,n))
+ packed = false;
+
+ num_elems *= extent[n];
+ }
+
+ ssize = num_elems * type_size;
+ buffer = get_collsub_buf (ci, ssize * local->num_images);
+ this_shared_ptr = buffer + this_image.image_num * ssize;
+
+ if (packed)
+ memcpy (this_shared_ptr, array->base_addr, ssize);
+ else
+ {
+ char *src = (char *) array->base_addr;
+ char *restrict dest = this_shared_ptr;
+ index_type stride0 = stride[0];
+
+ while (src)
+ {
+ /* Copy the data. */
+
+ memcpy (dest, src, type_size);
+ dest += type_size;
+ src += stride0;
+ count[0] ++;
+ /* Advance to the next source element. */
+ for (index_type n = 0; count[n] == extent[n] ; )
+ {
+ /* When we get to the end of a dimension, reset it and increment
+ the next dimension. */
+ count[n] = 0;
+ src -= stride[n] * extent[n];
+ n++;
+ if (n == dim)
+ {
+ src = NULL;
+ break;
+ }
+ else
+ {
+ count[n]++;
+ src += stride[n];
+ }
+ }
+ }
+ }
+
+ collsub_sync (ci);
+
+ /* Reduce the array to image zero. Here the general scheme:
+
+ abababababab
+ a_b_a_b_a_b_
+ a___b___a___
+ a_______b___
+ r___________
+ */
+ for (; ((this_image.image_num >> cbit) & 1) == 0
+ && (local->num_images >> cbit) != 0; cbit++)
+ {
+ imoffset = 1 << cbit;
+ if (this_image.image_num + imoffset < local->num_images)
+ {
+ char *other_shared_ptr; /* Points to the shared memory
+ allocated to another image. */
+ GFC_UINTEGER_1 *a;
+ GFC_UINTEGER_1 *b;
+
+ other_shared_ptr = this_shared_ptr + imoffset * ssize;
+ for (index_type i = 0; i < num_elems; i++)
+ {
+ a = (GFC_UINTEGER_1 *) (this_shared_ptr + i * type_size);
+ b = (GFC_UINTEGER_1 *) (other_shared_ptr + i * type_size);
+ if (memcmp (b, a, char_len) < 0)
+ memcpy (a, b, type_size);
+ }
+ }
+ collsub_sync (ci);
+ }
+ for ( ; (local->num_images >> cbit) != 0; cbit++)
+ collsub_sync (ci);
+
+ if (!result_image || (*result_image - 1) == this_image.image_num)
+ {
+ if (packed)
+ memcpy (array->base_addr, buffer, ssize);
+ else
+ {
+ char *src = buffer;
+ char *restrict dest = (char *) array->base_addr;
+ index_type stride0 = stride[0];
+
+ for (index_type n = 0; n < dim; n++)
+ count[n] = 0;
+
+ while (dest)
+ {
+ memcpy (dest, src, type_size);
+ src += span;
+ dest += stride0;
+ count[0] ++;
+ for (index_type n = 0; count[n] == extent[n] ;)
+ {
+ /* When we get to the end of a dimension, reset it and increment
+ the next dimension. */
+ count[n] = 0;
+ dest -= stride[n] * extent[n];
+ n++;
+ if (n == dim)
+ {
+ dest = NULL;
+ break;
+ }
+ else
+ {
+ count[n]++;
+ dest += stride[n];
+ }
+ }
+ }
+ }
+ }
+ finish_collective_subroutine (ci);
+}
+
+#endif
+
diff --git a/libgfortran/generated/nca_minmax_s4.c b/libgfortran/generated/nca_minmax_s4.c
new file mode 100644
index 0000000..b202fda
--- /dev/null
+++ b/libgfortran/generated/nca_minmax_s4.c
@@ -0,0 +1,494 @@
+/* Implementation of collective subroutines minmax.
+ Copyright (C) 2020 Free Software Foundation, Inc.
+ Contributed by Thomas Koenig <tkoenig@gcc.gnu.org>.
+
+This file is part of the GNU Fortran runtime library (libgfortran).
+
+Libgfortran is free software; you can redistribute it and/or
+modify it under the terms of the GNU General Public
+License as published by the Free Software Foundation; either
+version 3 of the License, or (at your option) any later version.
+
+Libgfortran is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+<http://www.gnu.org/licenses/>. */
+
+
+
+#include "libgfortran.h"
+
+#if defined (HAVE_GFC_UINTEGER_4)
+#include <string.h>
+#include "../nca/libcoarraynative.h"
+#include "../nca/collective_subroutine.h"
+#include "../nca/collective_inline.h"
+
+#if 4 == 4
+
+/* Compare wide character types, which are handled internally as
+ unsigned 4-byte integers. */
+static inline int
+memcmp4 (const void *a, const void *b, size_t len)
+{
+ const GFC_UINTEGER_4 *pa = a;
+ const GFC_UINTEGER_4 *pb = b;
+ while (len-- > 0)
+ {
+ if (*pa != *pb)
+ return *pa < *pb ? -1 : 1;
+ pa ++;
+ pb ++;
+ }
+ return 0;
+}
+
+#endif
+void nca_collsub_max_scalar_s4 (GFC_UINTEGER_4 *obj, int *result_image,
+ int *stat, char *errmsg, index_type char_len, index_type errmsg_len);
+export_proto(nca_collsub_max_scalar_s4);
+
+void
+nca_collsub_max_scalar_s4 (GFC_UINTEGER_4 *obj, int *result_image,
+ int *stat __attribute__ ((unused)),
+ char *errmsg __attribute__ ((unused)),
+ index_type char_len,
+ index_type errmsg_len __attribute__ ((unused)))
+{
+ int cbit = 0;
+ int imoffset;
+ GFC_UINTEGER_4 *a, *b;
+ GFC_UINTEGER_4 *buffer, *this_image_buf;
+ collsub_iface *ci;
+ index_type type_size;
+
+ ci = &local->ci;
+
+ type_size = char_len * sizeof (GFC_UINTEGER_4);
+ buffer = get_collsub_buf (ci, type_size * local->num_images);
+ this_image_buf = buffer + this_image.image_num * char_len;
+ memcpy (this_image_buf, obj, type_size);
+
+ collsub_sync (ci);
+ for (; ((this_image.image_num >> cbit) & 1) == 0
+ && (local->num_images >> cbit) != 0; cbit++)
+ {
+ imoffset = 1 << cbit;
+ if (this_image.image_num + imoffset < local->num_images)
+ {
+ a = this_image_buf;
+ b = this_image_buf + imoffset * char_len;
+ if (memcmp4 (b, a, char_len) > 0)
+ memcpy (a, b, type_size);
+ }
+ collsub_sync (ci);
+ }
+ for ( ; (local->num_images >> cbit) != 0; cbit++)
+ collsub_sync (ci);
+
+ if (!result_image || (*result_image - 1) == this_image.image_num)
+ memcpy (obj, buffer, type_size);
+
+ finish_collective_subroutine (ci);
+
+}
+
+void nca_collsub_min_scalar_s4 (GFC_UINTEGER_4 *obj, int *result_image,
+ int *stat, char *errmsg, index_type char_len, index_type errmsg_len);
+export_proto(nca_collsub_min_scalar_s4);
+
+void
+nca_collsub_min_scalar_s4 (GFC_UINTEGER_4 *obj, int *result_image,
+ int *stat __attribute__ ((unused)),
+ char *errmsg __attribute__ ((unused)),
+ index_type char_len,
+ index_type errmsg_len __attribute__ ((unused)))
+{
+ int cbit = 0;
+ int imoffset;
+ GFC_UINTEGER_4 *a, *b;
+ GFC_UINTEGER_4 *buffer, *this_image_buf;
+ collsub_iface *ci;
+ index_type type_size;
+
+ ci = &local->ci;
+
+ type_size = char_len * sizeof (GFC_UINTEGER_4);
+ buffer = get_collsub_buf (ci, type_size * local->num_images);
+ this_image_buf = buffer + this_image.image_num * char_len;
+ memcpy (this_image_buf, obj, type_size);
+
+ collsub_sync (ci);
+ for (; ((this_image.image_num >> cbit) & 1) == 0
+ && (local->num_images >> cbit) != 0; cbit++)
+ {
+ imoffset = 1 << cbit;
+ if (this_image.image_num + imoffset < local->num_images)
+ {
+ a = this_image_buf;
+ b = this_image_buf + imoffset * char_len;
+ if (memcmp4 (b, a, char_len) < 0)
+ memcpy (a, b, type_size);
+ }
+ collsub_sync (ci);
+ }
+ for ( ; (local->num_images >> cbit) != 0; cbit++)
+ collsub_sync (ci);
+
+ if (!result_image || (*result_image - 1) == this_image.image_num)
+ memcpy (obj, buffer, type_size);
+
+ finish_collective_subroutine (ci);
+
+}
+
+void nca_collsub_max_array_s4 (gfc_array_s4 * restrict array, int *result_image,
+ int *stat, char *errmsg, index_type char_len,
+ index_type errmsg_len);
+export_proto (nca_collsub_max_array_s4);
+
+void
+nca_collsub_max_array_s4 (gfc_array_s4 * restrict array, int *result_image,
+ int *stat __attribute__ ((unused)),
+ char *errmsg __attribute__ ((unused)),
+ index_type char_len,
+ index_type errmsg_len __attribute__ ((unused)))
+{
+ index_type count[GFC_MAX_DIMENSIONS];
+ index_type stride[GFC_MAX_DIMENSIONS]; /* stride is byte-based here. */
+ index_type extent[GFC_MAX_DIMENSIONS];
+ char *this_shared_ptr; /* Points to the shared memory allocated to this image. */
+ char *buffer;
+ index_type dim;
+ bool packed;
+ index_type span;
+ index_type ssize, num_elems;
+ int cbit = 0;
+ int imoffset;
+ index_type type_size;
+ collsub_iface *ci;
+
+ ci = &local->ci;
+
+ type_size = char_len * sizeof (GFC_UINTEGER_4);
+ dim = GFC_DESCRIPTOR_RANK (array);
+ num_elems = 1;
+ ssize = type_size;
+ packed = true;
+ span = array->span != 0 ? array->span : type_size;
+ for (index_type n = 0; n < dim; n++)
+ {
+ count[n] = 0;
+ stride[n] = GFC_DESCRIPTOR_STRIDE (array, n) * span;
+ extent[n] = GFC_DESCRIPTOR_EXTENT (array, n);
+
+ /* No-op for an empty array. */
+ if (extent[n] <= 0)
+ return;
+
+ if (num_elems != GFC_DESCRIPTOR_STRIDE (array,n))
+ packed = false;
+
+ num_elems *= extent[n];
+ }
+
+ ssize = num_elems * type_size;
+ buffer = get_collsub_buf (ci, ssize * local->num_images);
+ this_shared_ptr = buffer + this_image.image_num * ssize;
+
+ if (packed)
+ memcpy (this_shared_ptr, array->base_addr, ssize);
+ else
+ {
+ char *src = (char *) array->base_addr;
+ char *restrict dest = this_shared_ptr;
+ index_type stride0 = stride[0];
+
+ while (src)
+ {
+ /* Copy the data. */
+
+ memcpy (dest, src, type_size);
+ dest += type_size;
+ src += stride0;
+ count[0] ++;
+ /* Advance to the next source element. */
+ for (index_type n = 0; count[n] == extent[n] ; )
+ {
+ /* When we get to the end of a dimension, reset it and increment
+ the next dimension. */
+ count[n] = 0;
+ src -= stride[n] * extent[n];
+ n++;
+ if (n == dim)
+ {
+ src = NULL;
+ break;
+ }
+ else
+ {
+ count[n]++;
+ src += stride[n];
+ }
+ }
+ }
+ }
+
+ collsub_sync (ci);
+
+ /* Reduce the array to image zero. Here the general scheme:
+
+ abababababab
+ a_b_a_b_a_b_
+ a___b___a___
+ a_______b___
+ r___________
+ */
+ for (; ((this_image.image_num >> cbit) & 1) == 0
+ && (local->num_images >> cbit) != 0; cbit++)
+ {
+ imoffset = 1 << cbit;
+ if (this_image.image_num + imoffset < local->num_images)
+ {
+ char *other_shared_ptr; /* Points to the shared memory
+ allocated to another image. */
+ GFC_UINTEGER_4 *a;
+ GFC_UINTEGER_4 *b;
+
+ other_shared_ptr = this_shared_ptr + imoffset * ssize;
+ for (index_type i = 0; i < num_elems; i++)
+ {
+ a = (GFC_UINTEGER_4 *) (this_shared_ptr + i * type_size);
+ b = (GFC_UINTEGER_4 *) (other_shared_ptr + i * type_size);
+ if (memcmp4 (b, a, char_len) > 0)
+ memcpy (a, b, type_size);
+ }
+ }
+ collsub_sync (ci);
+ }
+ for ( ; (local->num_images >> cbit) != 0; cbit++)
+ collsub_sync (ci);
+
+ if (!result_image || (*result_image - 1) == this_image.image_num)
+ {
+ if (packed)
+ memcpy (array->base_addr, buffer, ssize);
+ else
+ {
+ char *src = buffer;
+ char *restrict dest = (char *) array->base_addr;
+ index_type stride0 = stride[0];
+
+ for (index_type n = 0; n < dim; n++)
+ count[n] = 0;
+
+ while (dest)
+ {
+ memcpy (dest, src, type_size);
+ src += span;
+ dest += stride0;
+ count[0] ++;
+ for (index_type n = 0; count[n] == extent[n] ;)
+ {
+ /* When we get to the end of a dimension, reset it and increment
+ the next dimension. */
+ count[n] = 0;
+ dest -= stride[n] * extent[n];
+ n++;
+ if (n == dim)
+ {
+ dest = NULL;
+ break;
+ }
+ else
+ {
+ count[n]++;
+ dest += stride[n];
+ }
+ }
+ }
+ }
+ }
+ finish_collective_subroutine (ci);
+}
+void nca_collsub_min_array_s4 (gfc_array_s4 * restrict array, int *result_image,
+ int *stat, char *errmsg, index_type char_len,
+ index_type errmsg_len);
+export_proto (nca_collsub_min_array_s4);
+
+void
+nca_collsub_min_array_s4 (gfc_array_s4 * restrict array, int *result_image,
+ int *stat __attribute__ ((unused)),
+ char *errmsg __attribute__ ((unused)),
+ index_type char_len,
+ index_type errmsg_len __attribute__ ((unused)))
+{
+ index_type count[GFC_MAX_DIMENSIONS];
+ index_type stride[GFC_MAX_DIMENSIONS]; /* stride is byte-based here. */
+ index_type extent[GFC_MAX_DIMENSIONS];
+ char *this_shared_ptr; /* Points to the shared memory allocated to this image. */
+ char *buffer;
+ index_type dim;
+ bool packed;
+ index_type span;
+ index_type ssize, num_elems;
+ int cbit = 0;
+ int imoffset;
+ index_type type_size;
+ collsub_iface *ci;
+
+ ci = &local->ci;
+
+ type_size = char_len * sizeof (GFC_UINTEGER_4);
+ dim = GFC_DESCRIPTOR_RANK (array);
+ num_elems = 1;
+ ssize = type_size;
+ packed = true;
+ span = array->span != 0 ? array->span : type_size;
+ for (index_type n = 0; n < dim; n++)
+ {
+ count[n] = 0;
+ stride[n] = GFC_DESCRIPTOR_STRIDE (array, n) * span;
+ extent[n] = GFC_DESCRIPTOR_EXTENT (array, n);
+
+ /* No-op for an empty array. */
+ if (extent[n] <= 0)
+ return;
+
+ if (num_elems != GFC_DESCRIPTOR_STRIDE (array,n))
+ packed = false;
+
+ num_elems *= extent[n];
+ }
+
+ ssize = num_elems * type_size;
+ buffer = get_collsub_buf (ci, ssize * local->num_images);
+ this_shared_ptr = buffer + this_image.image_num * ssize;
+
+ if (packed)
+ memcpy (this_shared_ptr, array->base_addr, ssize);
+ else
+ {
+ char *src = (char *) array->base_addr;
+ char *restrict dest = this_shared_ptr;
+ index_type stride0 = stride[0];
+
+ while (src)
+ {
+ /* Copy the data. */
+
+ memcpy (dest, src, type_size);
+ dest += type_size;
+ src += stride0;
+ count[0] ++;
+ /* Advance to the next source element. */
+ for (index_type n = 0; count[n] == extent[n] ; )
+ {
+ /* When we get to the end of a dimension, reset it and increment
+ the next dimension. */
+ count[n] = 0;
+ src -= stride[n] * extent[n];
+ n++;
+ if (n == dim)
+ {
+ src = NULL;
+ break;
+ }
+ else
+ {
+ count[n]++;
+ src += stride[n];
+ }
+ }
+ }
+ }
+
+ collsub_sync (ci);
+
+ /* Reduce the array to image zero. Here the general scheme:
+
+ abababababab
+ a_b_a_b_a_b_
+ a___b___a___
+ a_______b___
+ r___________
+ */
+ for (; ((this_image.image_num >> cbit) & 1) == 0
+ && (local->num_images >> cbit) != 0; cbit++)
+ {
+ imoffset = 1 << cbit;
+ if (this_image.image_num + imoffset < local->num_images)
+ {
+ char *other_shared_ptr; /* Points to the shared memory
+ allocated to another image. */
+ GFC_UINTEGER_4 *a;
+ GFC_UINTEGER_4 *b;
+
+ other_shared_ptr = this_shared_ptr + imoffset * ssize;
+ for (index_type i = 0; i < num_elems; i++)
+ {
+ a = (GFC_UINTEGER_4 *) (this_shared_ptr + i * type_size);
+ b = (GFC_UINTEGER_4 *) (other_shared_ptr + i * type_size);
+ if (memcmp4 (b, a, char_len) < 0)
+ memcpy (a, b, type_size);
+ }
+ }
+ collsub_sync (ci);
+ }
+ for ( ; (local->num_images >> cbit) != 0; cbit++)
+ collsub_sync (ci);
+
+ if (!result_image || (*result_image - 1) == this_image.image_num)
+ {
+ if (packed)
+ memcpy (array->base_addr, buffer, ssize);
+ else
+ {
+ char *src = buffer;
+ char *restrict dest = (char *) array->base_addr;
+ index_type stride0 = stride[0];
+
+ for (index_type n = 0; n < dim; n++)
+ count[n] = 0;
+
+ while (dest)
+ {
+ memcpy (dest, src, type_size);
+ src += span;
+ dest += stride0;
+ count[0] ++;
+ for (index_type n = 0; count[n] == extent[n] ;)
+ {
+ /* When we get to the end of a dimension, reset it and increment
+ the next dimension. */
+ count[n] = 0;
+ dest -= stride[n] * extent[n];
+ n++;
+ if (n == dim)
+ {
+ dest = NULL;
+ break;
+ }
+ else
+ {
+ count[n]++;
+ dest += stride[n];
+ }
+ }
+ }
+ }
+ }
+ finish_collective_subroutine (ci);
+}
+
+#endif
+