diff options
author | I'm not telling you my name, idiot <git_is@stupid.com> | 2020-09-23 18:14:20 +0200 |
---|---|---|
committer | I'm not telling you my name, idiot <git_is@stupid.com> | 2020-09-23 18:14:20 +0200 |
commit | bef0a39f2f8e87780f990d12fa71f5ed0039267a (patch) | |
tree | ce8138e924d9bfff3ce0b63aa4c29397fbfaa1a5 /libgfortran/generated | |
parent | 9044db88d634c631920eaa9f66c0275adf18fdf5 (diff) | |
download | gcc-bef0a39f2f8e87780f990d12fa71f5ed0039267a.zip gcc-bef0a39f2f8e87780f990d12fa71f5ed0039267a.tar.gz gcc-bef0a39f2f8e87780f990d12fa71f5ed0039267a.tar.bz2 |
Initial commit of coarray_native branch.
Diffstat (limited to 'libgfortran/generated')
-rw-r--r-- | libgfortran/generated/nca_minmax_i1.c | 653 | ||||
-rw-r--r-- | libgfortran/generated/nca_minmax_i16.c | 653 | ||||
-rw-r--r-- | libgfortran/generated/nca_minmax_i2.c | 653 | ||||
-rw-r--r-- | libgfortran/generated/nca_minmax_i4.c | 653 | ||||
-rw-r--r-- | libgfortran/generated/nca_minmax_i8.c | 653 | ||||
-rw-r--r-- | libgfortran/generated/nca_minmax_r10.c | 653 | ||||
-rw-r--r-- | libgfortran/generated/nca_minmax_r16.c | 653 | ||||
-rw-r--r-- | libgfortran/generated/nca_minmax_r4.c | 653 | ||||
-rw-r--r-- | libgfortran/generated/nca_minmax_r8.c | 653 | ||||
-rw-r--r-- | libgfortran/generated/nca_minmax_s1.c | 494 | ||||
-rw-r--r-- | libgfortran/generated/nca_minmax_s4.c | 494 |
11 files changed, 6865 insertions, 0 deletions
diff --git a/libgfortran/generated/nca_minmax_i1.c b/libgfortran/generated/nca_minmax_i1.c new file mode 100644 index 0000000..3bc9a2b --- /dev/null +++ b/libgfortran/generated/nca_minmax_i1.c @@ -0,0 +1,653 @@ +/* Implementation of collective subroutines minmax. + Copyright (C) 2020 Free Software Foundation, Inc. + Contributed by Thomas Koenig <tkoenig@gcc.gnu.org>. + +This file is part of the GNU Fortran runtime library (libgfortran). + +Libgfortran is free software; you can redistribute it and/or +modify it under the terms of the GNU General Public +License as published by the Free Software Foundation; either +version 3 of the License, or (at your option) any later version. + +Libgfortran is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +<http://www.gnu.org/licenses/>. */ + + +#include "libgfortran.h" + +#if defined (HAVE_GFC_INTEGER_1) +#include <string.h> +#include "../nca/libcoarraynative.h" +#include "../nca/collective_subroutine.h" +#include "../nca/collective_inline.h" + +void nca_collsub_max_scalar_i1 (GFC_INTEGER_1 *obj, int *result_image, + int *stat, char *errmsg, index_type errmsg_len); +export_proto(nca_collsub_max_scalar_i1); + +void +nca_collsub_max_scalar_i1 (GFC_INTEGER_1 *obj, int *result_image, + int *stat __attribute__ ((unused)), + char *errmsg __attribute__ ((unused)), + index_type errmsg_len __attribute__ ((unused))) +{ + int cbit = 0; + int imoffset; + GFC_INTEGER_1 *a, *b; + GFC_INTEGER_1 *buffer, *this_image_buf; + collsub_iface *ci; + + ci = &local->ci; + + buffer = get_collsub_buf (ci, sizeof(GFC_INTEGER_1) * local->num_images); + this_image_buf = buffer + this_image.image_num; + *this_image_buf = *obj; + + collsub_sync (ci); + for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++) + { + imoffset = 1 << cbit; + if (this_image.image_num + imoffset < local->num_images) + { + a = this_image_buf; + b = this_image_buf + imoffset; + if (*b > *a) + *a = *b; + } + collsub_sync (ci); + } + for ( ; (local->num_images >> cbit) != 0; cbit++) + collsub_sync (ci); + + if (!result_image || (*result_image - 1) == this_image.image_num) + *obj = *buffer; + + finish_collective_subroutine (ci); + +} + +void nca_collsub_min_scalar_i1 (GFC_INTEGER_1 *obj, int *result_image, + int *stat, char *errmsg, index_type errmsg_len); +export_proto(nca_collsub_min_scalar_i1); + +void +nca_collsub_min_scalar_i1 (GFC_INTEGER_1 *obj, int *result_image, + int *stat __attribute__ ((unused)), + char *errmsg __attribute__ ((unused)), + index_type errmsg_len __attribute__ ((unused))) +{ + int cbit = 0; + int imoffset; + GFC_INTEGER_1 *a, *b; + GFC_INTEGER_1 *buffer, *this_image_buf; + collsub_iface *ci; + + ci = &local->ci; + + buffer = get_collsub_buf (ci, sizeof(GFC_INTEGER_1) * local->num_images); + this_image_buf = buffer + this_image.image_num; + *this_image_buf = *obj; + + collsub_sync (ci); + for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++) + { + imoffset = 1 << cbit; + if (this_image.image_num + imoffset < local->num_images) + { + a = this_image_buf; + b = this_image_buf + imoffset; + if (*b < *a) + *a = *b; + } + collsub_sync (ci); + } + for ( ; (local->num_images >> cbit) != 0; cbit++) + collsub_sync (ci); + + if (!result_image || (*result_image - 1) == this_image.image_num) + *obj = *buffer; + + finish_collective_subroutine (ci); + +} + +void nca_collsub_sum_scalar_i1 (GFC_INTEGER_1 *obj, int *result_image, + int *stat, char *errmsg, index_type errmsg_len); +export_proto(nca_collsub_sum_scalar_i1); + +void +nca_collsub_sum_scalar_i1 (GFC_INTEGER_1 *obj, int *result_image, + int *stat __attribute__ ((unused)), + char *errmsg __attribute__ ((unused)), + index_type errmsg_len __attribute__ ((unused))) +{ + int cbit = 0; + int imoffset; + GFC_INTEGER_1 *a, *b; + GFC_INTEGER_1 *buffer, *this_image_buf; + collsub_iface *ci; + + ci = &local->ci; + + buffer = get_collsub_buf (ci, sizeof(GFC_INTEGER_1) * local->num_images); + this_image_buf = buffer + this_image.image_num; + *this_image_buf = *obj; + + collsub_sync (ci); + for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++) + { + imoffset = 1 << cbit; + if (this_image.image_num + imoffset < local->num_images) + { + a = this_image_buf; + b = this_image_buf + imoffset; + *a += *b; + } + collsub_sync (ci); + } + for ( ; (local->num_images >> cbit) != 0; cbit++) + collsub_sync (ci); + + if (!result_image || (*result_image - 1) == this_image.image_num) + *obj = *buffer; + + finish_collective_subroutine (ci); + +} + +void nca_collsub_max_array_i1 (gfc_array_i1 * restrict array, int *result_image, + int *stat, char *errmsg, index_type errmsg_len); +export_proto (nca_collsub_max_array_i1); + +void +nca_collsub_max_array_i1 (gfc_array_i1 * restrict array, int *result_image, + int *stat __attribute__ ((unused)), + char *errmsg __attribute__ ((unused)), + index_type errmsg_len __attribute__ ((unused))) +{ + index_type count[GFC_MAX_DIMENSIONS]; + index_type stride[GFC_MAX_DIMENSIONS]; + index_type extent[GFC_MAX_DIMENSIONS]; + GFC_INTEGER_1 *this_shared_ptr; /* Points to the shared memory allocated to this image. */ + GFC_INTEGER_1 *buffer; + index_type dim; + bool packed; + index_type span; + index_type ssize, num_elems; + int cbit = 0; + int imoffset; + collsub_iface *ci; + + ci = &local->ci; + + dim = GFC_DESCRIPTOR_RANK (array); + ssize = sizeof (GFC_INTEGER_1); + packed = true; + span = array->span != 0 ? array->span : (index_type) sizeof (GFC_INTEGER_1); + for (index_type n = 0; n < dim; n++) + { + count[n] = 0; + stride[n] = GFC_DESCRIPTOR_STRIDE (array, n) * span; + extent[n] = GFC_DESCRIPTOR_EXTENT (array, n); + + /* No-op for an empty array. */ + if (extent[n] <= 0) + return; + + if (ssize != stride[n]) + packed = false; + + ssize *= extent[n]; + } + + num_elems = ssize / sizeof (GFC_INTEGER_1); + + buffer = get_collsub_buf (ci, ssize * local->num_images); + this_shared_ptr = buffer + this_image.image_num * num_elems; + + if (packed) + memcpy (this_shared_ptr, array->base_addr, ssize); + else + { + char *src = (char *) array->base_addr; + GFC_INTEGER_1 *restrict dest = this_shared_ptr; + index_type stride0 = stride[0]; + + while (src) + { + /* Copy the data. */ + *(dest++) = *((GFC_INTEGER_1 *) src); + src += stride0; + count[0] ++; + /* Advance to the next source element. */ + for (index_type n = 0; count[n] == extent[n] ; ) + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + count[n] = 0; + src -= stride[n] * extent[n]; + n++; + if (n == dim) + { + src = NULL; + break; + } + else + { + count[n]++; + src += stride[n]; + } + } + } + } + + collsub_sync (ci); + + /* Reduce the array to image zero. Here the general scheme: + + abababababab + a_b_a_b_a_b_ + a___b___a___ + a_______b___ + r___________ + */ + for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++) + { + imoffset = 1 << cbit; + if (this_image.image_num + imoffset < local->num_images) + { + GFC_INTEGER_1 * other_shared_ptr; /* Points to the shared memory + allocated to another image. */ + GFC_INTEGER_1 *a; + GFC_INTEGER_1 *b; + + other_shared_ptr = this_shared_ptr + num_elems * imoffset; + for (index_type i = 0; i < num_elems; i++) + { + a = this_shared_ptr + i; + b = other_shared_ptr + i; + if (*b > *a) + *a = *b; + } + } + collsub_sync (ci); + } + for ( ; (local->num_images >> cbit) != 0; cbit++) + collsub_sync (ci); + + if (!result_image || (*result_image - 1) == this_image.image_num) + { + if (packed) + memcpy (array->base_addr, buffer, ssize); + else + { + GFC_INTEGER_1 *src = buffer; + char * restrict dest = (char *) array->base_addr; + index_type stride0 = stride[0]; + + for (index_type n = 0; n < dim; n++) + count[n] = 0; + + while (dest) + { + *((GFC_INTEGER_1 * ) dest) = *src++; + dest += stride0; + count[0] ++; + for (index_type n = 0; count[n] == extent[n] ;) + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + count[n] = 0; + dest -= stride[n] * extent[n]; + n++; + if (n == dim) + { + dest = NULL; + break; + } + else + { + count[n]++; + dest += stride[n]; + } + } + } + } + } + finish_collective_subroutine (ci); +} +void nca_collsub_min_array_i1 (gfc_array_i1 * restrict array, int *result_image, + int *stat, char *errmsg, index_type errmsg_len); +export_proto (nca_collsub_min_array_i1); + +void +nca_collsub_min_array_i1 (gfc_array_i1 * restrict array, int *result_image, + int *stat __attribute__ ((unused)), + char *errmsg __attribute__ ((unused)), + index_type errmsg_len __attribute__ ((unused))) +{ + index_type count[GFC_MAX_DIMENSIONS]; + index_type stride[GFC_MAX_DIMENSIONS]; + index_type extent[GFC_MAX_DIMENSIONS]; + GFC_INTEGER_1 *this_shared_ptr; /* Points to the shared memory allocated to this image. */ + GFC_INTEGER_1 *buffer; + index_type dim; + bool packed; + index_type span; + index_type ssize, num_elems; + int cbit = 0; + int imoffset; + collsub_iface *ci; + + ci = &local->ci; + + dim = GFC_DESCRIPTOR_RANK (array); + ssize = sizeof (GFC_INTEGER_1); + packed = true; + span = array->span != 0 ? array->span : (index_type) sizeof (GFC_INTEGER_1); + for (index_type n = 0; n < dim; n++) + { + count[n] = 0; + stride[n] = GFC_DESCRIPTOR_STRIDE (array, n) * span; + extent[n] = GFC_DESCRIPTOR_EXTENT (array, n); + + /* No-op for an empty array. */ + if (extent[n] <= 0) + return; + + if (ssize != stride[n]) + packed = false; + + ssize *= extent[n]; + } + + num_elems = ssize / sizeof (GFC_INTEGER_1); + + buffer = get_collsub_buf (ci, ssize * local->num_images); + this_shared_ptr = buffer + this_image.image_num * num_elems; + + if (packed) + memcpy (this_shared_ptr, array->base_addr, ssize); + else + { + char *src = (char *) array->base_addr; + GFC_INTEGER_1 *restrict dest = this_shared_ptr; + index_type stride0 = stride[0]; + + while (src) + { + /* Copy the data. */ + *(dest++) = *((GFC_INTEGER_1 *) src); + src += stride0; + count[0] ++; + /* Advance to the next source element. */ + for (index_type n = 0; count[n] == extent[n] ; ) + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + count[n] = 0; + src -= stride[n] * extent[n]; + n++; + if (n == dim) + { + src = NULL; + break; + } + else + { + count[n]++; + src += stride[n]; + } + } + } + } + + collsub_sync (ci); + + /* Reduce the array to image zero. Here the general scheme: + + abababababab + a_b_a_b_a_b_ + a___b___a___ + a_______b___ + r___________ + */ + for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++) + { + imoffset = 1 << cbit; + if (this_image.image_num + imoffset < local->num_images) + { + GFC_INTEGER_1 * other_shared_ptr; /* Points to the shared memory + allocated to another image. */ + GFC_INTEGER_1 *a; + GFC_INTEGER_1 *b; + + other_shared_ptr = this_shared_ptr + num_elems * imoffset; + for (index_type i = 0; i < num_elems; i++) + { + a = this_shared_ptr + i; + b = other_shared_ptr + i; + if (*b < *a) + *a = *b; + } + } + collsub_sync (ci); + } + for ( ; (local->num_images >> cbit) != 0; cbit++) + collsub_sync (ci); + + if (!result_image || (*result_image - 1) == this_image.image_num) + { + if (packed) + memcpy (array->base_addr, buffer, ssize); + else + { + GFC_INTEGER_1 *src = buffer; + char * restrict dest = (char *) array->base_addr; + index_type stride0 = stride[0]; + + for (index_type n = 0; n < dim; n++) + count[n] = 0; + + while (dest) + { + *((GFC_INTEGER_1 * ) dest) = *src++; + dest += stride0; + count[0] ++; + for (index_type n = 0; count[n] == extent[n] ;) + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + count[n] = 0; + dest -= stride[n] * extent[n]; + n++; + if (n == dim) + { + dest = NULL; + break; + } + else + { + count[n]++; + dest += stride[n]; + } + } + } + } + } + finish_collective_subroutine (ci); +} +void nca_collsub_sum_array_i1 (gfc_array_i1 * restrict array, int *result_image, + int *stat, char *errmsg, index_type errmsg_len); +export_proto (nca_collsub_sum_array_i1); + +void +nca_collsub_sum_array_i1 (gfc_array_i1 * restrict array, int *result_image, + int *stat __attribute__ ((unused)), + char *errmsg __attribute__ ((unused)), + index_type errmsg_len __attribute__ ((unused))) +{ + index_type count[GFC_MAX_DIMENSIONS]; + index_type stride[GFC_MAX_DIMENSIONS]; + index_type extent[GFC_MAX_DIMENSIONS]; + GFC_INTEGER_1 *this_shared_ptr; /* Points to the shared memory allocated to this image. */ + GFC_INTEGER_1 *buffer; + index_type dim; + bool packed; + index_type span; + index_type ssize, num_elems; + int cbit = 0; + int imoffset; + collsub_iface *ci; + + ci = &local->ci; + + dim = GFC_DESCRIPTOR_RANK (array); + ssize = sizeof (GFC_INTEGER_1); + packed = true; + span = array->span != 0 ? array->span : (index_type) sizeof (GFC_INTEGER_1); + for (index_type n = 0; n < dim; n++) + { + count[n] = 0; + stride[n] = GFC_DESCRIPTOR_STRIDE (array, n) * span; + extent[n] = GFC_DESCRIPTOR_EXTENT (array, n); + + /* No-op for an empty array. */ + if (extent[n] <= 0) + return; + + if (ssize != stride[n]) + packed = false; + + ssize *= extent[n]; + } + + num_elems = ssize / sizeof (GFC_INTEGER_1); + + buffer = get_collsub_buf (ci, ssize * local->num_images); + this_shared_ptr = buffer + this_image.image_num * num_elems; + + if (packed) + memcpy (this_shared_ptr, array->base_addr, ssize); + else + { + char *src = (char *) array->base_addr; + GFC_INTEGER_1 *restrict dest = this_shared_ptr; + index_type stride0 = stride[0]; + + while (src) + { + /* Copy the data. */ + *(dest++) = *((GFC_INTEGER_1 *) src); + src += stride0; + count[0] ++; + /* Advance to the next source element. */ + for (index_type n = 0; count[n] == extent[n] ; ) + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + count[n] = 0; + src -= stride[n] * extent[n]; + n++; + if (n == dim) + { + src = NULL; + break; + } + else + { + count[n]++; + src += stride[n]; + } + } + } + } + + collsub_sync (ci); + + /* Reduce the array to image zero. Here the general scheme: + + abababababab + a_b_a_b_a_b_ + a___b___a___ + a_______b___ + r___________ + */ + for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++) + { + imoffset = 1 << cbit; + if (this_image.image_num + imoffset < local->num_images) + { + GFC_INTEGER_1 * other_shared_ptr; /* Points to the shared memory + allocated to another image. */ + GFC_INTEGER_1 *a; + GFC_INTEGER_1 *b; + + other_shared_ptr = this_shared_ptr + num_elems * imoffset; + for (index_type i = 0; i < num_elems; i++) + { + a = this_shared_ptr + i; + b = other_shared_ptr + i; + *a += *b; + } + } + collsub_sync (ci); + } + for ( ; (local->num_images >> cbit) != 0; cbit++) + collsub_sync (ci); + + if (!result_image || (*result_image - 1) == this_image.image_num) + { + if (packed) + memcpy (array->base_addr, buffer, ssize); + else + { + GFC_INTEGER_1 *src = buffer; + char * restrict dest = (char *) array->base_addr; + index_type stride0 = stride[0]; + + for (index_type n = 0; n < dim; n++) + count[n] = 0; + + while (dest) + { + *((GFC_INTEGER_1 * ) dest) = *src++; + dest += stride0; + count[0] ++; + for (index_type n = 0; count[n] == extent[n] ;) + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + count[n] = 0; + dest -= stride[n] * extent[n]; + n++; + if (n == dim) + { + dest = NULL; + break; + } + else + { + count[n]++; + dest += stride[n]; + } + } + } + } + } + finish_collective_subroutine (ci); +} + +#endif + diff --git a/libgfortran/generated/nca_minmax_i16.c b/libgfortran/generated/nca_minmax_i16.c new file mode 100644 index 0000000..8fbb948 --- /dev/null +++ b/libgfortran/generated/nca_minmax_i16.c @@ -0,0 +1,653 @@ +/* Implementation of collective subroutines minmax. + Copyright (C) 2020 Free Software Foundation, Inc. + Contributed by Thomas Koenig <tkoenig@gcc.gnu.org>. + +This file is part of the GNU Fortran runtime library (libgfortran). + +Libgfortran is free software; you can redistribute it and/or +modify it under the terms of the GNU General Public +License as published by the Free Software Foundation; either +version 3 of the License, or (at your option) any later version. + +Libgfortran is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +<http://www.gnu.org/licenses/>. */ + + +#include "libgfortran.h" + +#if defined (HAVE_GFC_INTEGER_16) +#include <string.h> +#include "../nca/libcoarraynative.h" +#include "../nca/collective_subroutine.h" +#include "../nca/collective_inline.h" + +void nca_collsub_max_scalar_i16 (GFC_INTEGER_16 *obj, int *result_image, + int *stat, char *errmsg, index_type errmsg_len); +export_proto(nca_collsub_max_scalar_i16); + +void +nca_collsub_max_scalar_i16 (GFC_INTEGER_16 *obj, int *result_image, + int *stat __attribute__ ((unused)), + char *errmsg __attribute__ ((unused)), + index_type errmsg_len __attribute__ ((unused))) +{ + int cbit = 0; + int imoffset; + GFC_INTEGER_16 *a, *b; + GFC_INTEGER_16 *buffer, *this_image_buf; + collsub_iface *ci; + + ci = &local->ci; + + buffer = get_collsub_buf (ci, sizeof(GFC_INTEGER_16) * local->num_images); + this_image_buf = buffer + this_image.image_num; + *this_image_buf = *obj; + + collsub_sync (ci); + for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++) + { + imoffset = 1 << cbit; + if (this_image.image_num + imoffset < local->num_images) + { + a = this_image_buf; + b = this_image_buf + imoffset; + if (*b > *a) + *a = *b; + } + collsub_sync (ci); + } + for ( ; (local->num_images >> cbit) != 0; cbit++) + collsub_sync (ci); + + if (!result_image || (*result_image - 1) == this_image.image_num) + *obj = *buffer; + + finish_collective_subroutine (ci); + +} + +void nca_collsub_min_scalar_i16 (GFC_INTEGER_16 *obj, int *result_image, + int *stat, char *errmsg, index_type errmsg_len); +export_proto(nca_collsub_min_scalar_i16); + +void +nca_collsub_min_scalar_i16 (GFC_INTEGER_16 *obj, int *result_image, + int *stat __attribute__ ((unused)), + char *errmsg __attribute__ ((unused)), + index_type errmsg_len __attribute__ ((unused))) +{ + int cbit = 0; + int imoffset; + GFC_INTEGER_16 *a, *b; + GFC_INTEGER_16 *buffer, *this_image_buf; + collsub_iface *ci; + + ci = &local->ci; + + buffer = get_collsub_buf (ci, sizeof(GFC_INTEGER_16) * local->num_images); + this_image_buf = buffer + this_image.image_num; + *this_image_buf = *obj; + + collsub_sync (ci); + for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++) + { + imoffset = 1 << cbit; + if (this_image.image_num + imoffset < local->num_images) + { + a = this_image_buf; + b = this_image_buf + imoffset; + if (*b < *a) + *a = *b; + } + collsub_sync (ci); + } + for ( ; (local->num_images >> cbit) != 0; cbit++) + collsub_sync (ci); + + if (!result_image || (*result_image - 1) == this_image.image_num) + *obj = *buffer; + + finish_collective_subroutine (ci); + +} + +void nca_collsub_sum_scalar_i16 (GFC_INTEGER_16 *obj, int *result_image, + int *stat, char *errmsg, index_type errmsg_len); +export_proto(nca_collsub_sum_scalar_i16); + +void +nca_collsub_sum_scalar_i16 (GFC_INTEGER_16 *obj, int *result_image, + int *stat __attribute__ ((unused)), + char *errmsg __attribute__ ((unused)), + index_type errmsg_len __attribute__ ((unused))) +{ + int cbit = 0; + int imoffset; + GFC_INTEGER_16 *a, *b; + GFC_INTEGER_16 *buffer, *this_image_buf; + collsub_iface *ci; + + ci = &local->ci; + + buffer = get_collsub_buf (ci, sizeof(GFC_INTEGER_16) * local->num_images); + this_image_buf = buffer + this_image.image_num; + *this_image_buf = *obj; + + collsub_sync (ci); + for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++) + { + imoffset = 1 << cbit; + if (this_image.image_num + imoffset < local->num_images) + { + a = this_image_buf; + b = this_image_buf + imoffset; + *a += *b; + } + collsub_sync (ci); + } + for ( ; (local->num_images >> cbit) != 0; cbit++) + collsub_sync (ci); + + if (!result_image || (*result_image - 1) == this_image.image_num) + *obj = *buffer; + + finish_collective_subroutine (ci); + +} + +void nca_collsub_max_array_i16 (gfc_array_i16 * restrict array, int *result_image, + int *stat, char *errmsg, index_type errmsg_len); +export_proto (nca_collsub_max_array_i16); + +void +nca_collsub_max_array_i16 (gfc_array_i16 * restrict array, int *result_image, + int *stat __attribute__ ((unused)), + char *errmsg __attribute__ ((unused)), + index_type errmsg_len __attribute__ ((unused))) +{ + index_type count[GFC_MAX_DIMENSIONS]; + index_type stride[GFC_MAX_DIMENSIONS]; + index_type extent[GFC_MAX_DIMENSIONS]; + GFC_INTEGER_16 *this_shared_ptr; /* Points to the shared memory allocated to this image. */ + GFC_INTEGER_16 *buffer; + index_type dim; + bool packed; + index_type span; + index_type ssize, num_elems; + int cbit = 0; + int imoffset; + collsub_iface *ci; + + ci = &local->ci; + + dim = GFC_DESCRIPTOR_RANK (array); + ssize = sizeof (GFC_INTEGER_16); + packed = true; + span = array->span != 0 ? array->span : (index_type) sizeof (GFC_INTEGER_16); + for (index_type n = 0; n < dim; n++) + { + count[n] = 0; + stride[n] = GFC_DESCRIPTOR_STRIDE (array, n) * span; + extent[n] = GFC_DESCRIPTOR_EXTENT (array, n); + + /* No-op for an empty array. */ + if (extent[n] <= 0) + return; + + if (ssize != stride[n]) + packed = false; + + ssize *= extent[n]; + } + + num_elems = ssize / sizeof (GFC_INTEGER_16); + + buffer = get_collsub_buf (ci, ssize * local->num_images); + this_shared_ptr = buffer + this_image.image_num * num_elems; + + if (packed) + memcpy (this_shared_ptr, array->base_addr, ssize); + else + { + char *src = (char *) array->base_addr; + GFC_INTEGER_16 *restrict dest = this_shared_ptr; + index_type stride0 = stride[0]; + + while (src) + { + /* Copy the data. */ + *(dest++) = *((GFC_INTEGER_16 *) src); + src += stride0; + count[0] ++; + /* Advance to the next source element. */ + for (index_type n = 0; count[n] == extent[n] ; ) + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + count[n] = 0; + src -= stride[n] * extent[n]; + n++; + if (n == dim) + { + src = NULL; + break; + } + else + { + count[n]++; + src += stride[n]; + } + } + } + } + + collsub_sync (ci); + + /* Reduce the array to image zero. Here the general scheme: + + abababababab + a_b_a_b_a_b_ + a___b___a___ + a_______b___ + r___________ + */ + for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++) + { + imoffset = 1 << cbit; + if (this_image.image_num + imoffset < local->num_images) + { + GFC_INTEGER_16 * other_shared_ptr; /* Points to the shared memory + allocated to another image. */ + GFC_INTEGER_16 *a; + GFC_INTEGER_16 *b; + + other_shared_ptr = this_shared_ptr + num_elems * imoffset; + for (index_type i = 0; i < num_elems; i++) + { + a = this_shared_ptr + i; + b = other_shared_ptr + i; + if (*b > *a) + *a = *b; + } + } + collsub_sync (ci); + } + for ( ; (local->num_images >> cbit) != 0; cbit++) + collsub_sync (ci); + + if (!result_image || (*result_image - 1) == this_image.image_num) + { + if (packed) + memcpy (array->base_addr, buffer, ssize); + else + { + GFC_INTEGER_16 *src = buffer; + char * restrict dest = (char *) array->base_addr; + index_type stride0 = stride[0]; + + for (index_type n = 0; n < dim; n++) + count[n] = 0; + + while (dest) + { + *((GFC_INTEGER_16 * ) dest) = *src++; + dest += stride0; + count[0] ++; + for (index_type n = 0; count[n] == extent[n] ;) + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + count[n] = 0; + dest -= stride[n] * extent[n]; + n++; + if (n == dim) + { + dest = NULL; + break; + } + else + { + count[n]++; + dest += stride[n]; + } + } + } + } + } + finish_collective_subroutine (ci); +} +void nca_collsub_min_array_i16 (gfc_array_i16 * restrict array, int *result_image, + int *stat, char *errmsg, index_type errmsg_len); +export_proto (nca_collsub_min_array_i16); + +void +nca_collsub_min_array_i16 (gfc_array_i16 * restrict array, int *result_image, + int *stat __attribute__ ((unused)), + char *errmsg __attribute__ ((unused)), + index_type errmsg_len __attribute__ ((unused))) +{ + index_type count[GFC_MAX_DIMENSIONS]; + index_type stride[GFC_MAX_DIMENSIONS]; + index_type extent[GFC_MAX_DIMENSIONS]; + GFC_INTEGER_16 *this_shared_ptr; /* Points to the shared memory allocated to this image. */ + GFC_INTEGER_16 *buffer; + index_type dim; + bool packed; + index_type span; + index_type ssize, num_elems; + int cbit = 0; + int imoffset; + collsub_iface *ci; + + ci = &local->ci; + + dim = GFC_DESCRIPTOR_RANK (array); + ssize = sizeof (GFC_INTEGER_16); + packed = true; + span = array->span != 0 ? array->span : (index_type) sizeof (GFC_INTEGER_16); + for (index_type n = 0; n < dim; n++) + { + count[n] = 0; + stride[n] = GFC_DESCRIPTOR_STRIDE (array, n) * span; + extent[n] = GFC_DESCRIPTOR_EXTENT (array, n); + + /* No-op for an empty array. */ + if (extent[n] <= 0) + return; + + if (ssize != stride[n]) + packed = false; + + ssize *= extent[n]; + } + + num_elems = ssize / sizeof (GFC_INTEGER_16); + + buffer = get_collsub_buf (ci, ssize * local->num_images); + this_shared_ptr = buffer + this_image.image_num * num_elems; + + if (packed) + memcpy (this_shared_ptr, array->base_addr, ssize); + else + { + char *src = (char *) array->base_addr; + GFC_INTEGER_16 *restrict dest = this_shared_ptr; + index_type stride0 = stride[0]; + + while (src) + { + /* Copy the data. */ + *(dest++) = *((GFC_INTEGER_16 *) src); + src += stride0; + count[0] ++; + /* Advance to the next source element. */ + for (index_type n = 0; count[n] == extent[n] ; ) + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + count[n] = 0; + src -= stride[n] * extent[n]; + n++; + if (n == dim) + { + src = NULL; + break; + } + else + { + count[n]++; + src += stride[n]; + } + } + } + } + + collsub_sync (ci); + + /* Reduce the array to image zero. Here the general scheme: + + abababababab + a_b_a_b_a_b_ + a___b___a___ + a_______b___ + r___________ + */ + for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++) + { + imoffset = 1 << cbit; + if (this_image.image_num + imoffset < local->num_images) + { + GFC_INTEGER_16 * other_shared_ptr; /* Points to the shared memory + allocated to another image. */ + GFC_INTEGER_16 *a; + GFC_INTEGER_16 *b; + + other_shared_ptr = this_shared_ptr + num_elems * imoffset; + for (index_type i = 0; i < num_elems; i++) + { + a = this_shared_ptr + i; + b = other_shared_ptr + i; + if (*b < *a) + *a = *b; + } + } + collsub_sync (ci); + } + for ( ; (local->num_images >> cbit) != 0; cbit++) + collsub_sync (ci); + + if (!result_image || (*result_image - 1) == this_image.image_num) + { + if (packed) + memcpy (array->base_addr, buffer, ssize); + else + { + GFC_INTEGER_16 *src = buffer; + char * restrict dest = (char *) array->base_addr; + index_type stride0 = stride[0]; + + for (index_type n = 0; n < dim; n++) + count[n] = 0; + + while (dest) + { + *((GFC_INTEGER_16 * ) dest) = *src++; + dest += stride0; + count[0] ++; + for (index_type n = 0; count[n] == extent[n] ;) + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + count[n] = 0; + dest -= stride[n] * extent[n]; + n++; + if (n == dim) + { + dest = NULL; + break; + } + else + { + count[n]++; + dest += stride[n]; + } + } + } + } + } + finish_collective_subroutine (ci); +} +void nca_collsub_sum_array_i16 (gfc_array_i16 * restrict array, int *result_image, + int *stat, char *errmsg, index_type errmsg_len); +export_proto (nca_collsub_sum_array_i16); + +void +nca_collsub_sum_array_i16 (gfc_array_i16 * restrict array, int *result_image, + int *stat __attribute__ ((unused)), + char *errmsg __attribute__ ((unused)), + index_type errmsg_len __attribute__ ((unused))) +{ + index_type count[GFC_MAX_DIMENSIONS]; + index_type stride[GFC_MAX_DIMENSIONS]; + index_type extent[GFC_MAX_DIMENSIONS]; + GFC_INTEGER_16 *this_shared_ptr; /* Points to the shared memory allocated to this image. */ + GFC_INTEGER_16 *buffer; + index_type dim; + bool packed; + index_type span; + index_type ssize, num_elems; + int cbit = 0; + int imoffset; + collsub_iface *ci; + + ci = &local->ci; + + dim = GFC_DESCRIPTOR_RANK (array); + ssize = sizeof (GFC_INTEGER_16); + packed = true; + span = array->span != 0 ? array->span : (index_type) sizeof (GFC_INTEGER_16); + for (index_type n = 0; n < dim; n++) + { + count[n] = 0; + stride[n] = GFC_DESCRIPTOR_STRIDE (array, n) * span; + extent[n] = GFC_DESCRIPTOR_EXTENT (array, n); + + /* No-op for an empty array. */ + if (extent[n] <= 0) + return; + + if (ssize != stride[n]) + packed = false; + + ssize *= extent[n]; + } + + num_elems = ssize / sizeof (GFC_INTEGER_16); + + buffer = get_collsub_buf (ci, ssize * local->num_images); + this_shared_ptr = buffer + this_image.image_num * num_elems; + + if (packed) + memcpy (this_shared_ptr, array->base_addr, ssize); + else + { + char *src = (char *) array->base_addr; + GFC_INTEGER_16 *restrict dest = this_shared_ptr; + index_type stride0 = stride[0]; + + while (src) + { + /* Copy the data. */ + *(dest++) = *((GFC_INTEGER_16 *) src); + src += stride0; + count[0] ++; + /* Advance to the next source element. */ + for (index_type n = 0; count[n] == extent[n] ; ) + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + count[n] = 0; + src -= stride[n] * extent[n]; + n++; + if (n == dim) + { + src = NULL; + break; + } + else + { + count[n]++; + src += stride[n]; + } + } + } + } + + collsub_sync (ci); + + /* Reduce the array to image zero. Here the general scheme: + + abababababab + a_b_a_b_a_b_ + a___b___a___ + a_______b___ + r___________ + */ + for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++) + { + imoffset = 1 << cbit; + if (this_image.image_num + imoffset < local->num_images) + { + GFC_INTEGER_16 * other_shared_ptr; /* Points to the shared memory + allocated to another image. */ + GFC_INTEGER_16 *a; + GFC_INTEGER_16 *b; + + other_shared_ptr = this_shared_ptr + num_elems * imoffset; + for (index_type i = 0; i < num_elems; i++) + { + a = this_shared_ptr + i; + b = other_shared_ptr + i; + *a += *b; + } + } + collsub_sync (ci); + } + for ( ; (local->num_images >> cbit) != 0; cbit++) + collsub_sync (ci); + + if (!result_image || (*result_image - 1) == this_image.image_num) + { + if (packed) + memcpy (array->base_addr, buffer, ssize); + else + { + GFC_INTEGER_16 *src = buffer; + char * restrict dest = (char *) array->base_addr; + index_type stride0 = stride[0]; + + for (index_type n = 0; n < dim; n++) + count[n] = 0; + + while (dest) + { + *((GFC_INTEGER_16 * ) dest) = *src++; + dest += stride0; + count[0] ++; + for (index_type n = 0; count[n] == extent[n] ;) + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + count[n] = 0; + dest -= stride[n] * extent[n]; + n++; + if (n == dim) + { + dest = NULL; + break; + } + else + { + count[n]++; + dest += stride[n]; + } + } + } + } + } + finish_collective_subroutine (ci); +} + +#endif + diff --git a/libgfortran/generated/nca_minmax_i2.c b/libgfortran/generated/nca_minmax_i2.c new file mode 100644 index 0000000..61908d6 --- /dev/null +++ b/libgfortran/generated/nca_minmax_i2.c @@ -0,0 +1,653 @@ +/* Implementation of collective subroutines minmax. + Copyright (C) 2020 Free Software Foundation, Inc. + Contributed by Thomas Koenig <tkoenig@gcc.gnu.org>. + +This file is part of the GNU Fortran runtime library (libgfortran). + +Libgfortran is free software; you can redistribute it and/or +modify it under the terms of the GNU General Public +License as published by the Free Software Foundation; either +version 3 of the License, or (at your option) any later version. + +Libgfortran is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +<http://www.gnu.org/licenses/>. */ + + +#include "libgfortran.h" + +#if defined (HAVE_GFC_INTEGER_2) +#include <string.h> +#include "../nca/libcoarraynative.h" +#include "../nca/collective_subroutine.h" +#include "../nca/collective_inline.h" + +void nca_collsub_max_scalar_i2 (GFC_INTEGER_2 *obj, int *result_image, + int *stat, char *errmsg, index_type errmsg_len); +export_proto(nca_collsub_max_scalar_i2); + +void +nca_collsub_max_scalar_i2 (GFC_INTEGER_2 *obj, int *result_image, + int *stat __attribute__ ((unused)), + char *errmsg __attribute__ ((unused)), + index_type errmsg_len __attribute__ ((unused))) +{ + int cbit = 0; + int imoffset; + GFC_INTEGER_2 *a, *b; + GFC_INTEGER_2 *buffer, *this_image_buf; + collsub_iface *ci; + + ci = &local->ci; + + buffer = get_collsub_buf (ci, sizeof(GFC_INTEGER_2) * local->num_images); + this_image_buf = buffer + this_image.image_num; + *this_image_buf = *obj; + + collsub_sync (ci); + for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++) + { + imoffset = 1 << cbit; + if (this_image.image_num + imoffset < local->num_images) + { + a = this_image_buf; + b = this_image_buf + imoffset; + if (*b > *a) + *a = *b; + } + collsub_sync (ci); + } + for ( ; (local->num_images >> cbit) != 0; cbit++) + collsub_sync (ci); + + if (!result_image || (*result_image - 1) == this_image.image_num) + *obj = *buffer; + + finish_collective_subroutine (ci); + +} + +void nca_collsub_min_scalar_i2 (GFC_INTEGER_2 *obj, int *result_image, + int *stat, char *errmsg, index_type errmsg_len); +export_proto(nca_collsub_min_scalar_i2); + +void +nca_collsub_min_scalar_i2 (GFC_INTEGER_2 *obj, int *result_image, + int *stat __attribute__ ((unused)), + char *errmsg __attribute__ ((unused)), + index_type errmsg_len __attribute__ ((unused))) +{ + int cbit = 0; + int imoffset; + GFC_INTEGER_2 *a, *b; + GFC_INTEGER_2 *buffer, *this_image_buf; + collsub_iface *ci; + + ci = &local->ci; + + buffer = get_collsub_buf (ci, sizeof(GFC_INTEGER_2) * local->num_images); + this_image_buf = buffer + this_image.image_num; + *this_image_buf = *obj; + + collsub_sync (ci); + for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++) + { + imoffset = 1 << cbit; + if (this_image.image_num + imoffset < local->num_images) + { + a = this_image_buf; + b = this_image_buf + imoffset; + if (*b < *a) + *a = *b; + } + collsub_sync (ci); + } + for ( ; (local->num_images >> cbit) != 0; cbit++) + collsub_sync (ci); + + if (!result_image || (*result_image - 1) == this_image.image_num) + *obj = *buffer; + + finish_collective_subroutine (ci); + +} + +void nca_collsub_sum_scalar_i2 (GFC_INTEGER_2 *obj, int *result_image, + int *stat, char *errmsg, index_type errmsg_len); +export_proto(nca_collsub_sum_scalar_i2); + +void +nca_collsub_sum_scalar_i2 (GFC_INTEGER_2 *obj, int *result_image, + int *stat __attribute__ ((unused)), + char *errmsg __attribute__ ((unused)), + index_type errmsg_len __attribute__ ((unused))) +{ + int cbit = 0; + int imoffset; + GFC_INTEGER_2 *a, *b; + GFC_INTEGER_2 *buffer, *this_image_buf; + collsub_iface *ci; + + ci = &local->ci; + + buffer = get_collsub_buf (ci, sizeof(GFC_INTEGER_2) * local->num_images); + this_image_buf = buffer + this_image.image_num; + *this_image_buf = *obj; + + collsub_sync (ci); + for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++) + { + imoffset = 1 << cbit; + if (this_image.image_num + imoffset < local->num_images) + { + a = this_image_buf; + b = this_image_buf + imoffset; + *a += *b; + } + collsub_sync (ci); + } + for ( ; (local->num_images >> cbit) != 0; cbit++) + collsub_sync (ci); + + if (!result_image || (*result_image - 1) == this_image.image_num) + *obj = *buffer; + + finish_collective_subroutine (ci); + +} + +void nca_collsub_max_array_i2 (gfc_array_i2 * restrict array, int *result_image, + int *stat, char *errmsg, index_type errmsg_len); +export_proto (nca_collsub_max_array_i2); + +void +nca_collsub_max_array_i2 (gfc_array_i2 * restrict array, int *result_image, + int *stat __attribute__ ((unused)), + char *errmsg __attribute__ ((unused)), + index_type errmsg_len __attribute__ ((unused))) +{ + index_type count[GFC_MAX_DIMENSIONS]; + index_type stride[GFC_MAX_DIMENSIONS]; + index_type extent[GFC_MAX_DIMENSIONS]; + GFC_INTEGER_2 *this_shared_ptr; /* Points to the shared memory allocated to this image. */ + GFC_INTEGER_2 *buffer; + index_type dim; + bool packed; + index_type span; + index_type ssize, num_elems; + int cbit = 0; + int imoffset; + collsub_iface *ci; + + ci = &local->ci; + + dim = GFC_DESCRIPTOR_RANK (array); + ssize = sizeof (GFC_INTEGER_2); + packed = true; + span = array->span != 0 ? array->span : (index_type) sizeof (GFC_INTEGER_2); + for (index_type n = 0; n < dim; n++) + { + count[n] = 0; + stride[n] = GFC_DESCRIPTOR_STRIDE (array, n) * span; + extent[n] = GFC_DESCRIPTOR_EXTENT (array, n); + + /* No-op for an empty array. */ + if (extent[n] <= 0) + return; + + if (ssize != stride[n]) + packed = false; + + ssize *= extent[n]; + } + + num_elems = ssize / sizeof (GFC_INTEGER_2); + + buffer = get_collsub_buf (ci, ssize * local->num_images); + this_shared_ptr = buffer + this_image.image_num * num_elems; + + if (packed) + memcpy (this_shared_ptr, array->base_addr, ssize); + else + { + char *src = (char *) array->base_addr; + GFC_INTEGER_2 *restrict dest = this_shared_ptr; + index_type stride0 = stride[0]; + + while (src) + { + /* Copy the data. */ + *(dest++) = *((GFC_INTEGER_2 *) src); + src += stride0; + count[0] ++; + /* Advance to the next source element. */ + for (index_type n = 0; count[n] == extent[n] ; ) + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + count[n] = 0; + src -= stride[n] * extent[n]; + n++; + if (n == dim) + { + src = NULL; + break; + } + else + { + count[n]++; + src += stride[n]; + } + } + } + } + + collsub_sync (ci); + + /* Reduce the array to image zero. Here the general scheme: + + abababababab + a_b_a_b_a_b_ + a___b___a___ + a_______b___ + r___________ + */ + for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++) + { + imoffset = 1 << cbit; + if (this_image.image_num + imoffset < local->num_images) + { + GFC_INTEGER_2 * other_shared_ptr; /* Points to the shared memory + allocated to another image. */ + GFC_INTEGER_2 *a; + GFC_INTEGER_2 *b; + + other_shared_ptr = this_shared_ptr + num_elems * imoffset; + for (index_type i = 0; i < num_elems; i++) + { + a = this_shared_ptr + i; + b = other_shared_ptr + i; + if (*b > *a) + *a = *b; + } + } + collsub_sync (ci); + } + for ( ; (local->num_images >> cbit) != 0; cbit++) + collsub_sync (ci); + + if (!result_image || (*result_image - 1) == this_image.image_num) + { + if (packed) + memcpy (array->base_addr, buffer, ssize); + else + { + GFC_INTEGER_2 *src = buffer; + char * restrict dest = (char *) array->base_addr; + index_type stride0 = stride[0]; + + for (index_type n = 0; n < dim; n++) + count[n] = 0; + + while (dest) + { + *((GFC_INTEGER_2 * ) dest) = *src++; + dest += stride0; + count[0] ++; + for (index_type n = 0; count[n] == extent[n] ;) + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + count[n] = 0; + dest -= stride[n] * extent[n]; + n++; + if (n == dim) + { + dest = NULL; + break; + } + else + { + count[n]++; + dest += stride[n]; + } + } + } + } + } + finish_collective_subroutine (ci); +} +void nca_collsub_min_array_i2 (gfc_array_i2 * restrict array, int *result_image, + int *stat, char *errmsg, index_type errmsg_len); +export_proto (nca_collsub_min_array_i2); + +void +nca_collsub_min_array_i2 (gfc_array_i2 * restrict array, int *result_image, + int *stat __attribute__ ((unused)), + char *errmsg __attribute__ ((unused)), + index_type errmsg_len __attribute__ ((unused))) +{ + index_type count[GFC_MAX_DIMENSIONS]; + index_type stride[GFC_MAX_DIMENSIONS]; + index_type extent[GFC_MAX_DIMENSIONS]; + GFC_INTEGER_2 *this_shared_ptr; /* Points to the shared memory allocated to this image. */ + GFC_INTEGER_2 *buffer; + index_type dim; + bool packed; + index_type span; + index_type ssize, num_elems; + int cbit = 0; + int imoffset; + collsub_iface *ci; + + ci = &local->ci; + + dim = GFC_DESCRIPTOR_RANK (array); + ssize = sizeof (GFC_INTEGER_2); + packed = true; + span = array->span != 0 ? array->span : (index_type) sizeof (GFC_INTEGER_2); + for (index_type n = 0; n < dim; n++) + { + count[n] = 0; + stride[n] = GFC_DESCRIPTOR_STRIDE (array, n) * span; + extent[n] = GFC_DESCRIPTOR_EXTENT (array, n); + + /* No-op for an empty array. */ + if (extent[n] <= 0) + return; + + if (ssize != stride[n]) + packed = false; + + ssize *= extent[n]; + } + + num_elems = ssize / sizeof (GFC_INTEGER_2); + + buffer = get_collsub_buf (ci, ssize * local->num_images); + this_shared_ptr = buffer + this_image.image_num * num_elems; + + if (packed) + memcpy (this_shared_ptr, array->base_addr, ssize); + else + { + char *src = (char *) array->base_addr; + GFC_INTEGER_2 *restrict dest = this_shared_ptr; + index_type stride0 = stride[0]; + + while (src) + { + /* Copy the data. */ + *(dest++) = *((GFC_INTEGER_2 *) src); + src += stride0; + count[0] ++; + /* Advance to the next source element. */ + for (index_type n = 0; count[n] == extent[n] ; ) + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + count[n] = 0; + src -= stride[n] * extent[n]; + n++; + if (n == dim) + { + src = NULL; + break; + } + else + { + count[n]++; + src += stride[n]; + } + } + } + } + + collsub_sync (ci); + + /* Reduce the array to image zero. Here the general scheme: + + abababababab + a_b_a_b_a_b_ + a___b___a___ + a_______b___ + r___________ + */ + for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++) + { + imoffset = 1 << cbit; + if (this_image.image_num + imoffset < local->num_images) + { + GFC_INTEGER_2 * other_shared_ptr; /* Points to the shared memory + allocated to another image. */ + GFC_INTEGER_2 *a; + GFC_INTEGER_2 *b; + + other_shared_ptr = this_shared_ptr + num_elems * imoffset; + for (index_type i = 0; i < num_elems; i++) + { + a = this_shared_ptr + i; + b = other_shared_ptr + i; + if (*b < *a) + *a = *b; + } + } + collsub_sync (ci); + } + for ( ; (local->num_images >> cbit) != 0; cbit++) + collsub_sync (ci); + + if (!result_image || (*result_image - 1) == this_image.image_num) + { + if (packed) + memcpy (array->base_addr, buffer, ssize); + else + { + GFC_INTEGER_2 *src = buffer; + char * restrict dest = (char *) array->base_addr; + index_type stride0 = stride[0]; + + for (index_type n = 0; n < dim; n++) + count[n] = 0; + + while (dest) + { + *((GFC_INTEGER_2 * ) dest) = *src++; + dest += stride0; + count[0] ++; + for (index_type n = 0; count[n] == extent[n] ;) + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + count[n] = 0; + dest -= stride[n] * extent[n]; + n++; + if (n == dim) + { + dest = NULL; + break; + } + else + { + count[n]++; + dest += stride[n]; + } + } + } + } + } + finish_collective_subroutine (ci); +} +void nca_collsub_sum_array_i2 (gfc_array_i2 * restrict array, int *result_image, + int *stat, char *errmsg, index_type errmsg_len); +export_proto (nca_collsub_sum_array_i2); + +void +nca_collsub_sum_array_i2 (gfc_array_i2 * restrict array, int *result_image, + int *stat __attribute__ ((unused)), + char *errmsg __attribute__ ((unused)), + index_type errmsg_len __attribute__ ((unused))) +{ + index_type count[GFC_MAX_DIMENSIONS]; + index_type stride[GFC_MAX_DIMENSIONS]; + index_type extent[GFC_MAX_DIMENSIONS]; + GFC_INTEGER_2 *this_shared_ptr; /* Points to the shared memory allocated to this image. */ + GFC_INTEGER_2 *buffer; + index_type dim; + bool packed; + index_type span; + index_type ssize, num_elems; + int cbit = 0; + int imoffset; + collsub_iface *ci; + + ci = &local->ci; + + dim = GFC_DESCRIPTOR_RANK (array); + ssize = sizeof (GFC_INTEGER_2); + packed = true; + span = array->span != 0 ? array->span : (index_type) sizeof (GFC_INTEGER_2); + for (index_type n = 0; n < dim; n++) + { + count[n] = 0; + stride[n] = GFC_DESCRIPTOR_STRIDE (array, n) * span; + extent[n] = GFC_DESCRIPTOR_EXTENT (array, n); + + /* No-op for an empty array. */ + if (extent[n] <= 0) + return; + + if (ssize != stride[n]) + packed = false; + + ssize *= extent[n]; + } + + num_elems = ssize / sizeof (GFC_INTEGER_2); + + buffer = get_collsub_buf (ci, ssize * local->num_images); + this_shared_ptr = buffer + this_image.image_num * num_elems; + + if (packed) + memcpy (this_shared_ptr, array->base_addr, ssize); + else + { + char *src = (char *) array->base_addr; + GFC_INTEGER_2 *restrict dest = this_shared_ptr; + index_type stride0 = stride[0]; + + while (src) + { + /* Copy the data. */ + *(dest++) = *((GFC_INTEGER_2 *) src); + src += stride0; + count[0] ++; + /* Advance to the next source element. */ + for (index_type n = 0; count[n] == extent[n] ; ) + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + count[n] = 0; + src -= stride[n] * extent[n]; + n++; + if (n == dim) + { + src = NULL; + break; + } + else + { + count[n]++; + src += stride[n]; + } + } + } + } + + collsub_sync (ci); + + /* Reduce the array to image zero. Here the general scheme: + + abababababab + a_b_a_b_a_b_ + a___b___a___ + a_______b___ + r___________ + */ + for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++) + { + imoffset = 1 << cbit; + if (this_image.image_num + imoffset < local->num_images) + { + GFC_INTEGER_2 * other_shared_ptr; /* Points to the shared memory + allocated to another image. */ + GFC_INTEGER_2 *a; + GFC_INTEGER_2 *b; + + other_shared_ptr = this_shared_ptr + num_elems * imoffset; + for (index_type i = 0; i < num_elems; i++) + { + a = this_shared_ptr + i; + b = other_shared_ptr + i; + *a += *b; + } + } + collsub_sync (ci); + } + for ( ; (local->num_images >> cbit) != 0; cbit++) + collsub_sync (ci); + + if (!result_image || (*result_image - 1) == this_image.image_num) + { + if (packed) + memcpy (array->base_addr, buffer, ssize); + else + { + GFC_INTEGER_2 *src = buffer; + char * restrict dest = (char *) array->base_addr; + index_type stride0 = stride[0]; + + for (index_type n = 0; n < dim; n++) + count[n] = 0; + + while (dest) + { + *((GFC_INTEGER_2 * ) dest) = *src++; + dest += stride0; + count[0] ++; + for (index_type n = 0; count[n] == extent[n] ;) + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + count[n] = 0; + dest -= stride[n] * extent[n]; + n++; + if (n == dim) + { + dest = NULL; + break; + } + else + { + count[n]++; + dest += stride[n]; + } + } + } + } + } + finish_collective_subroutine (ci); +} + +#endif + diff --git a/libgfortran/generated/nca_minmax_i4.c b/libgfortran/generated/nca_minmax_i4.c new file mode 100644 index 0000000..5e37586 --- /dev/null +++ b/libgfortran/generated/nca_minmax_i4.c @@ -0,0 +1,653 @@ +/* Implementation of collective subroutines minmax. + Copyright (C) 2020 Free Software Foundation, Inc. + Contributed by Thomas Koenig <tkoenig@gcc.gnu.org>. + +This file is part of the GNU Fortran runtime library (libgfortran). + +Libgfortran is free software; you can redistribute it and/or +modify it under the terms of the GNU General Public +License as published by the Free Software Foundation; either +version 3 of the License, or (at your option) any later version. + +Libgfortran is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +<http://www.gnu.org/licenses/>. */ + + +#include "libgfortran.h" + +#if defined (HAVE_GFC_INTEGER_4) +#include <string.h> +#include "../nca/libcoarraynative.h" +#include "../nca/collective_subroutine.h" +#include "../nca/collective_inline.h" + +void nca_collsub_max_scalar_i4 (GFC_INTEGER_4 *obj, int *result_image, + int *stat, char *errmsg, index_type errmsg_len); +export_proto(nca_collsub_max_scalar_i4); + +void +nca_collsub_max_scalar_i4 (GFC_INTEGER_4 *obj, int *result_image, + int *stat __attribute__ ((unused)), + char *errmsg __attribute__ ((unused)), + index_type errmsg_len __attribute__ ((unused))) +{ + int cbit = 0; + int imoffset; + GFC_INTEGER_4 *a, *b; + GFC_INTEGER_4 *buffer, *this_image_buf; + collsub_iface *ci; + + ci = &local->ci; + + buffer = get_collsub_buf (ci, sizeof(GFC_INTEGER_4) * local->num_images); + this_image_buf = buffer + this_image.image_num; + *this_image_buf = *obj; + + collsub_sync (ci); + for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++) + { + imoffset = 1 << cbit; + if (this_image.image_num + imoffset < local->num_images) + { + a = this_image_buf; + b = this_image_buf + imoffset; + if (*b > *a) + *a = *b; + } + collsub_sync (ci); + } + for ( ; (local->num_images >> cbit) != 0; cbit++) + collsub_sync (ci); + + if (!result_image || (*result_image - 1) == this_image.image_num) + *obj = *buffer; + + finish_collective_subroutine (ci); + +} + +void nca_collsub_min_scalar_i4 (GFC_INTEGER_4 *obj, int *result_image, + int *stat, char *errmsg, index_type errmsg_len); +export_proto(nca_collsub_min_scalar_i4); + +void +nca_collsub_min_scalar_i4 (GFC_INTEGER_4 *obj, int *result_image, + int *stat __attribute__ ((unused)), + char *errmsg __attribute__ ((unused)), + index_type errmsg_len __attribute__ ((unused))) +{ + int cbit = 0; + int imoffset; + GFC_INTEGER_4 *a, *b; + GFC_INTEGER_4 *buffer, *this_image_buf; + collsub_iface *ci; + + ci = &local->ci; + + buffer = get_collsub_buf (ci, sizeof(GFC_INTEGER_4) * local->num_images); + this_image_buf = buffer + this_image.image_num; + *this_image_buf = *obj; + + collsub_sync (ci); + for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++) + { + imoffset = 1 << cbit; + if (this_image.image_num + imoffset < local->num_images) + { + a = this_image_buf; + b = this_image_buf + imoffset; + if (*b < *a) + *a = *b; + } + collsub_sync (ci); + } + for ( ; (local->num_images >> cbit) != 0; cbit++) + collsub_sync (ci); + + if (!result_image || (*result_image - 1) == this_image.image_num) + *obj = *buffer; + + finish_collective_subroutine (ci); + +} + +void nca_collsub_sum_scalar_i4 (GFC_INTEGER_4 *obj, int *result_image, + int *stat, char *errmsg, index_type errmsg_len); +export_proto(nca_collsub_sum_scalar_i4); + +void +nca_collsub_sum_scalar_i4 (GFC_INTEGER_4 *obj, int *result_image, + int *stat __attribute__ ((unused)), + char *errmsg __attribute__ ((unused)), + index_type errmsg_len __attribute__ ((unused))) +{ + int cbit = 0; + int imoffset; + GFC_INTEGER_4 *a, *b; + GFC_INTEGER_4 *buffer, *this_image_buf; + collsub_iface *ci; + + ci = &local->ci; + + buffer = get_collsub_buf (ci, sizeof(GFC_INTEGER_4) * local->num_images); + this_image_buf = buffer + this_image.image_num; + *this_image_buf = *obj; + + collsub_sync (ci); + for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++) + { + imoffset = 1 << cbit; + if (this_image.image_num + imoffset < local->num_images) + { + a = this_image_buf; + b = this_image_buf + imoffset; + *a += *b; + } + collsub_sync (ci); + } + for ( ; (local->num_images >> cbit) != 0; cbit++) + collsub_sync (ci); + + if (!result_image || (*result_image - 1) == this_image.image_num) + *obj = *buffer; + + finish_collective_subroutine (ci); + +} + +void nca_collsub_max_array_i4 (gfc_array_i4 * restrict array, int *result_image, + int *stat, char *errmsg, index_type errmsg_len); +export_proto (nca_collsub_max_array_i4); + +void +nca_collsub_max_array_i4 (gfc_array_i4 * restrict array, int *result_image, + int *stat __attribute__ ((unused)), + char *errmsg __attribute__ ((unused)), + index_type errmsg_len __attribute__ ((unused))) +{ + index_type count[GFC_MAX_DIMENSIONS]; + index_type stride[GFC_MAX_DIMENSIONS]; + index_type extent[GFC_MAX_DIMENSIONS]; + GFC_INTEGER_4 *this_shared_ptr; /* Points to the shared memory allocated to this image. */ + GFC_INTEGER_4 *buffer; + index_type dim; + bool packed; + index_type span; + index_type ssize, num_elems; + int cbit = 0; + int imoffset; + collsub_iface *ci; + + ci = &local->ci; + + dim = GFC_DESCRIPTOR_RANK (array); + ssize = sizeof (GFC_INTEGER_4); + packed = true; + span = array->span != 0 ? array->span : (index_type) sizeof (GFC_INTEGER_4); + for (index_type n = 0; n < dim; n++) + { + count[n] = 0; + stride[n] = GFC_DESCRIPTOR_STRIDE (array, n) * span; + extent[n] = GFC_DESCRIPTOR_EXTENT (array, n); + + /* No-op for an empty array. */ + if (extent[n] <= 0) + return; + + if (ssize != stride[n]) + packed = false; + + ssize *= extent[n]; + } + + num_elems = ssize / sizeof (GFC_INTEGER_4); + + buffer = get_collsub_buf (ci, ssize * local->num_images); + this_shared_ptr = buffer + this_image.image_num * num_elems; + + if (packed) + memcpy (this_shared_ptr, array->base_addr, ssize); + else + { + char *src = (char *) array->base_addr; + GFC_INTEGER_4 *restrict dest = this_shared_ptr; + index_type stride0 = stride[0]; + + while (src) + { + /* Copy the data. */ + *(dest++) = *((GFC_INTEGER_4 *) src); + src += stride0; + count[0] ++; + /* Advance to the next source element. */ + for (index_type n = 0; count[n] == extent[n] ; ) + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + count[n] = 0; + src -= stride[n] * extent[n]; + n++; + if (n == dim) + { + src = NULL; + break; + } + else + { + count[n]++; + src += stride[n]; + } + } + } + } + + collsub_sync (ci); + + /* Reduce the array to image zero. Here the general scheme: + + abababababab + a_b_a_b_a_b_ + a___b___a___ + a_______b___ + r___________ + */ + for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++) + { + imoffset = 1 << cbit; + if (this_image.image_num + imoffset < local->num_images) + { + GFC_INTEGER_4 * other_shared_ptr; /* Points to the shared memory + allocated to another image. */ + GFC_INTEGER_4 *a; + GFC_INTEGER_4 *b; + + other_shared_ptr = this_shared_ptr + num_elems * imoffset; + for (index_type i = 0; i < num_elems; i++) + { + a = this_shared_ptr + i; + b = other_shared_ptr + i; + if (*b > *a) + *a = *b; + } + } + collsub_sync (ci); + } + for ( ; (local->num_images >> cbit) != 0; cbit++) + collsub_sync (ci); + + if (!result_image || (*result_image - 1) == this_image.image_num) + { + if (packed) + memcpy (array->base_addr, buffer, ssize); + else + { + GFC_INTEGER_4 *src = buffer; + char * restrict dest = (char *) array->base_addr; + index_type stride0 = stride[0]; + + for (index_type n = 0; n < dim; n++) + count[n] = 0; + + while (dest) + { + *((GFC_INTEGER_4 * ) dest) = *src++; + dest += stride0; + count[0] ++; + for (index_type n = 0; count[n] == extent[n] ;) + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + count[n] = 0; + dest -= stride[n] * extent[n]; + n++; + if (n == dim) + { + dest = NULL; + break; + } + else + { + count[n]++; + dest += stride[n]; + } + } + } + } + } + finish_collective_subroutine (ci); +} +void nca_collsub_min_array_i4 (gfc_array_i4 * restrict array, int *result_image, + int *stat, char *errmsg, index_type errmsg_len); +export_proto (nca_collsub_min_array_i4); + +void +nca_collsub_min_array_i4 (gfc_array_i4 * restrict array, int *result_image, + int *stat __attribute__ ((unused)), + char *errmsg __attribute__ ((unused)), + index_type errmsg_len __attribute__ ((unused))) +{ + index_type count[GFC_MAX_DIMENSIONS]; + index_type stride[GFC_MAX_DIMENSIONS]; + index_type extent[GFC_MAX_DIMENSIONS]; + GFC_INTEGER_4 *this_shared_ptr; /* Points to the shared memory allocated to this image. */ + GFC_INTEGER_4 *buffer; + index_type dim; + bool packed; + index_type span; + index_type ssize, num_elems; + int cbit = 0; + int imoffset; + collsub_iface *ci; + + ci = &local->ci; + + dim = GFC_DESCRIPTOR_RANK (array); + ssize = sizeof (GFC_INTEGER_4); + packed = true; + span = array->span != 0 ? array->span : (index_type) sizeof (GFC_INTEGER_4); + for (index_type n = 0; n < dim; n++) + { + count[n] = 0; + stride[n] = GFC_DESCRIPTOR_STRIDE (array, n) * span; + extent[n] = GFC_DESCRIPTOR_EXTENT (array, n); + + /* No-op for an empty array. */ + if (extent[n] <= 0) + return; + + if (ssize != stride[n]) + packed = false; + + ssize *= extent[n]; + } + + num_elems = ssize / sizeof (GFC_INTEGER_4); + + buffer = get_collsub_buf (ci, ssize * local->num_images); + this_shared_ptr = buffer + this_image.image_num * num_elems; + + if (packed) + memcpy (this_shared_ptr, array->base_addr, ssize); + else + { + char *src = (char *) array->base_addr; + GFC_INTEGER_4 *restrict dest = this_shared_ptr; + index_type stride0 = stride[0]; + + while (src) + { + /* Copy the data. */ + *(dest++) = *((GFC_INTEGER_4 *) src); + src += stride0; + count[0] ++; + /* Advance to the next source element. */ + for (index_type n = 0; count[n] == extent[n] ; ) + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + count[n] = 0; + src -= stride[n] * extent[n]; + n++; + if (n == dim) + { + src = NULL; + break; + } + else + { + count[n]++; + src += stride[n]; + } + } + } + } + + collsub_sync (ci); + + /* Reduce the array to image zero. Here the general scheme: + + abababababab + a_b_a_b_a_b_ + a___b___a___ + a_______b___ + r___________ + */ + for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++) + { + imoffset = 1 << cbit; + if (this_image.image_num + imoffset < local->num_images) + { + GFC_INTEGER_4 * other_shared_ptr; /* Points to the shared memory + allocated to another image. */ + GFC_INTEGER_4 *a; + GFC_INTEGER_4 *b; + + other_shared_ptr = this_shared_ptr + num_elems * imoffset; + for (index_type i = 0; i < num_elems; i++) + { + a = this_shared_ptr + i; + b = other_shared_ptr + i; + if (*b < *a) + *a = *b; + } + } + collsub_sync (ci); + } + for ( ; (local->num_images >> cbit) != 0; cbit++) + collsub_sync (ci); + + if (!result_image || (*result_image - 1) == this_image.image_num) + { + if (packed) + memcpy (array->base_addr, buffer, ssize); + else + { + GFC_INTEGER_4 *src = buffer; + char * restrict dest = (char *) array->base_addr; + index_type stride0 = stride[0]; + + for (index_type n = 0; n < dim; n++) + count[n] = 0; + + while (dest) + { + *((GFC_INTEGER_4 * ) dest) = *src++; + dest += stride0; + count[0] ++; + for (index_type n = 0; count[n] == extent[n] ;) + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + count[n] = 0; + dest -= stride[n] * extent[n]; + n++; + if (n == dim) + { + dest = NULL; + break; + } + else + { + count[n]++; + dest += stride[n]; + } + } + } + } + } + finish_collective_subroutine (ci); +} +void nca_collsub_sum_array_i4 (gfc_array_i4 * restrict array, int *result_image, + int *stat, char *errmsg, index_type errmsg_len); +export_proto (nca_collsub_sum_array_i4); + +void +nca_collsub_sum_array_i4 (gfc_array_i4 * restrict array, int *result_image, + int *stat __attribute__ ((unused)), + char *errmsg __attribute__ ((unused)), + index_type errmsg_len __attribute__ ((unused))) +{ + index_type count[GFC_MAX_DIMENSIONS]; + index_type stride[GFC_MAX_DIMENSIONS]; + index_type extent[GFC_MAX_DIMENSIONS]; + GFC_INTEGER_4 *this_shared_ptr; /* Points to the shared memory allocated to this image. */ + GFC_INTEGER_4 *buffer; + index_type dim; + bool packed; + index_type span; + index_type ssize, num_elems; + int cbit = 0; + int imoffset; + collsub_iface *ci; + + ci = &local->ci; + + dim = GFC_DESCRIPTOR_RANK (array); + ssize = sizeof (GFC_INTEGER_4); + packed = true; + span = array->span != 0 ? array->span : (index_type) sizeof (GFC_INTEGER_4); + for (index_type n = 0; n < dim; n++) + { + count[n] = 0; + stride[n] = GFC_DESCRIPTOR_STRIDE (array, n) * span; + extent[n] = GFC_DESCRIPTOR_EXTENT (array, n); + + /* No-op for an empty array. */ + if (extent[n] <= 0) + return; + + if (ssize != stride[n]) + packed = false; + + ssize *= extent[n]; + } + + num_elems = ssize / sizeof (GFC_INTEGER_4); + + buffer = get_collsub_buf (ci, ssize * local->num_images); + this_shared_ptr = buffer + this_image.image_num * num_elems; + + if (packed) + memcpy (this_shared_ptr, array->base_addr, ssize); + else + { + char *src = (char *) array->base_addr; + GFC_INTEGER_4 *restrict dest = this_shared_ptr; + index_type stride0 = stride[0]; + + while (src) + { + /* Copy the data. */ + *(dest++) = *((GFC_INTEGER_4 *) src); + src += stride0; + count[0] ++; + /* Advance to the next source element. */ + for (index_type n = 0; count[n] == extent[n] ; ) + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + count[n] = 0; + src -= stride[n] * extent[n]; + n++; + if (n == dim) + { + src = NULL; + break; + } + else + { + count[n]++; + src += stride[n]; + } + } + } + } + + collsub_sync (ci); + + /* Reduce the array to image zero. Here the general scheme: + + abababababab + a_b_a_b_a_b_ + a___b___a___ + a_______b___ + r___________ + */ + for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++) + { + imoffset = 1 << cbit; + if (this_image.image_num + imoffset < local->num_images) + { + GFC_INTEGER_4 * other_shared_ptr; /* Points to the shared memory + allocated to another image. */ + GFC_INTEGER_4 *a; + GFC_INTEGER_4 *b; + + other_shared_ptr = this_shared_ptr + num_elems * imoffset; + for (index_type i = 0; i < num_elems; i++) + { + a = this_shared_ptr + i; + b = other_shared_ptr + i; + *a += *b; + } + } + collsub_sync (ci); + } + for ( ; (local->num_images >> cbit) != 0; cbit++) + collsub_sync (ci); + + if (!result_image || (*result_image - 1) == this_image.image_num) + { + if (packed) + memcpy (array->base_addr, buffer, ssize); + else + { + GFC_INTEGER_4 *src = buffer; + char * restrict dest = (char *) array->base_addr; + index_type stride0 = stride[0]; + + for (index_type n = 0; n < dim; n++) + count[n] = 0; + + while (dest) + { + *((GFC_INTEGER_4 * ) dest) = *src++; + dest += stride0; + count[0] ++; + for (index_type n = 0; count[n] == extent[n] ;) + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + count[n] = 0; + dest -= stride[n] * extent[n]; + n++; + if (n == dim) + { + dest = NULL; + break; + } + else + { + count[n]++; + dest += stride[n]; + } + } + } + } + } + finish_collective_subroutine (ci); +} + +#endif + diff --git a/libgfortran/generated/nca_minmax_i8.c b/libgfortran/generated/nca_minmax_i8.c new file mode 100644 index 0000000..b3dc861 --- /dev/null +++ b/libgfortran/generated/nca_minmax_i8.c @@ -0,0 +1,653 @@ +/* Implementation of collective subroutines minmax. + Copyright (C) 2020 Free Software Foundation, Inc. + Contributed by Thomas Koenig <tkoenig@gcc.gnu.org>. + +This file is part of the GNU Fortran runtime library (libgfortran). + +Libgfortran is free software; you can redistribute it and/or +modify it under the terms of the GNU General Public +License as published by the Free Software Foundation; either +version 3 of the License, or (at your option) any later version. + +Libgfortran is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +<http://www.gnu.org/licenses/>. */ + + +#include "libgfortran.h" + +#if defined (HAVE_GFC_INTEGER_8) +#include <string.h> +#include "../nca/libcoarraynative.h" +#include "../nca/collective_subroutine.h" +#include "../nca/collective_inline.h" + +void nca_collsub_max_scalar_i8 (GFC_INTEGER_8 *obj, int *result_image, + int *stat, char *errmsg, index_type errmsg_len); +export_proto(nca_collsub_max_scalar_i8); + +void +nca_collsub_max_scalar_i8 (GFC_INTEGER_8 *obj, int *result_image, + int *stat __attribute__ ((unused)), + char *errmsg __attribute__ ((unused)), + index_type errmsg_len __attribute__ ((unused))) +{ + int cbit = 0; + int imoffset; + GFC_INTEGER_8 *a, *b; + GFC_INTEGER_8 *buffer, *this_image_buf; + collsub_iface *ci; + + ci = &local->ci; + + buffer = get_collsub_buf (ci, sizeof(GFC_INTEGER_8) * local->num_images); + this_image_buf = buffer + this_image.image_num; + *this_image_buf = *obj; + + collsub_sync (ci); + for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++) + { + imoffset = 1 << cbit; + if (this_image.image_num + imoffset < local->num_images) + { + a = this_image_buf; + b = this_image_buf + imoffset; + if (*b > *a) + *a = *b; + } + collsub_sync (ci); + } + for ( ; (local->num_images >> cbit) != 0; cbit++) + collsub_sync (ci); + + if (!result_image || (*result_image - 1) == this_image.image_num) + *obj = *buffer; + + finish_collective_subroutine (ci); + +} + +void nca_collsub_min_scalar_i8 (GFC_INTEGER_8 *obj, int *result_image, + int *stat, char *errmsg, index_type errmsg_len); +export_proto(nca_collsub_min_scalar_i8); + +void +nca_collsub_min_scalar_i8 (GFC_INTEGER_8 *obj, int *result_image, + int *stat __attribute__ ((unused)), + char *errmsg __attribute__ ((unused)), + index_type errmsg_len __attribute__ ((unused))) +{ + int cbit = 0; + int imoffset; + GFC_INTEGER_8 *a, *b; + GFC_INTEGER_8 *buffer, *this_image_buf; + collsub_iface *ci; + + ci = &local->ci; + + buffer = get_collsub_buf (ci, sizeof(GFC_INTEGER_8) * local->num_images); + this_image_buf = buffer + this_image.image_num; + *this_image_buf = *obj; + + collsub_sync (ci); + for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++) + { + imoffset = 1 << cbit; + if (this_image.image_num + imoffset < local->num_images) + { + a = this_image_buf; + b = this_image_buf + imoffset; + if (*b < *a) + *a = *b; + } + collsub_sync (ci); + } + for ( ; (local->num_images >> cbit) != 0; cbit++) + collsub_sync (ci); + + if (!result_image || (*result_image - 1) == this_image.image_num) + *obj = *buffer; + + finish_collective_subroutine (ci); + +} + +void nca_collsub_sum_scalar_i8 (GFC_INTEGER_8 *obj, int *result_image, + int *stat, char *errmsg, index_type errmsg_len); +export_proto(nca_collsub_sum_scalar_i8); + +void +nca_collsub_sum_scalar_i8 (GFC_INTEGER_8 *obj, int *result_image, + int *stat __attribute__ ((unused)), + char *errmsg __attribute__ ((unused)), + index_type errmsg_len __attribute__ ((unused))) +{ + int cbit = 0; + int imoffset; + GFC_INTEGER_8 *a, *b; + GFC_INTEGER_8 *buffer, *this_image_buf; + collsub_iface *ci; + + ci = &local->ci; + + buffer = get_collsub_buf (ci, sizeof(GFC_INTEGER_8) * local->num_images); + this_image_buf = buffer + this_image.image_num; + *this_image_buf = *obj; + + collsub_sync (ci); + for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++) + { + imoffset = 1 << cbit; + if (this_image.image_num + imoffset < local->num_images) + { + a = this_image_buf; + b = this_image_buf + imoffset; + *a += *b; + } + collsub_sync (ci); + } + for ( ; (local->num_images >> cbit) != 0; cbit++) + collsub_sync (ci); + + if (!result_image || (*result_image - 1) == this_image.image_num) + *obj = *buffer; + + finish_collective_subroutine (ci); + +} + +void nca_collsub_max_array_i8 (gfc_array_i8 * restrict array, int *result_image, + int *stat, char *errmsg, index_type errmsg_len); +export_proto (nca_collsub_max_array_i8); + +void +nca_collsub_max_array_i8 (gfc_array_i8 * restrict array, int *result_image, + int *stat __attribute__ ((unused)), + char *errmsg __attribute__ ((unused)), + index_type errmsg_len __attribute__ ((unused))) +{ + index_type count[GFC_MAX_DIMENSIONS]; + index_type stride[GFC_MAX_DIMENSIONS]; + index_type extent[GFC_MAX_DIMENSIONS]; + GFC_INTEGER_8 *this_shared_ptr; /* Points to the shared memory allocated to this image. */ + GFC_INTEGER_8 *buffer; + index_type dim; + bool packed; + index_type span; + index_type ssize, num_elems; + int cbit = 0; + int imoffset; + collsub_iface *ci; + + ci = &local->ci; + + dim = GFC_DESCRIPTOR_RANK (array); + ssize = sizeof (GFC_INTEGER_8); + packed = true; + span = array->span != 0 ? array->span : (index_type) sizeof (GFC_INTEGER_8); + for (index_type n = 0; n < dim; n++) + { + count[n] = 0; + stride[n] = GFC_DESCRIPTOR_STRIDE (array, n) * span; + extent[n] = GFC_DESCRIPTOR_EXTENT (array, n); + + /* No-op for an empty array. */ + if (extent[n] <= 0) + return; + + if (ssize != stride[n]) + packed = false; + + ssize *= extent[n]; + } + + num_elems = ssize / sizeof (GFC_INTEGER_8); + + buffer = get_collsub_buf (ci, ssize * local->num_images); + this_shared_ptr = buffer + this_image.image_num * num_elems; + + if (packed) + memcpy (this_shared_ptr, array->base_addr, ssize); + else + { + char *src = (char *) array->base_addr; + GFC_INTEGER_8 *restrict dest = this_shared_ptr; + index_type stride0 = stride[0]; + + while (src) + { + /* Copy the data. */ + *(dest++) = *((GFC_INTEGER_8 *) src); + src += stride0; + count[0] ++; + /* Advance to the next source element. */ + for (index_type n = 0; count[n] == extent[n] ; ) + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + count[n] = 0; + src -= stride[n] * extent[n]; + n++; + if (n == dim) + { + src = NULL; + break; + } + else + { + count[n]++; + src += stride[n]; + } + } + } + } + + collsub_sync (ci); + + /* Reduce the array to image zero. Here the general scheme: + + abababababab + a_b_a_b_a_b_ + a___b___a___ + a_______b___ + r___________ + */ + for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++) + { + imoffset = 1 << cbit; + if (this_image.image_num + imoffset < local->num_images) + { + GFC_INTEGER_8 * other_shared_ptr; /* Points to the shared memory + allocated to another image. */ + GFC_INTEGER_8 *a; + GFC_INTEGER_8 *b; + + other_shared_ptr = this_shared_ptr + num_elems * imoffset; + for (index_type i = 0; i < num_elems; i++) + { + a = this_shared_ptr + i; + b = other_shared_ptr + i; + if (*b > *a) + *a = *b; + } + } + collsub_sync (ci); + } + for ( ; (local->num_images >> cbit) != 0; cbit++) + collsub_sync (ci); + + if (!result_image || (*result_image - 1) == this_image.image_num) + { + if (packed) + memcpy (array->base_addr, buffer, ssize); + else + { + GFC_INTEGER_8 *src = buffer; + char * restrict dest = (char *) array->base_addr; + index_type stride0 = stride[0]; + + for (index_type n = 0; n < dim; n++) + count[n] = 0; + + while (dest) + { + *((GFC_INTEGER_8 * ) dest) = *src++; + dest += stride0; + count[0] ++; + for (index_type n = 0; count[n] == extent[n] ;) + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + count[n] = 0; + dest -= stride[n] * extent[n]; + n++; + if (n == dim) + { + dest = NULL; + break; + } + else + { + count[n]++; + dest += stride[n]; + } + } + } + } + } + finish_collective_subroutine (ci); +} +void nca_collsub_min_array_i8 (gfc_array_i8 * restrict array, int *result_image, + int *stat, char *errmsg, index_type errmsg_len); +export_proto (nca_collsub_min_array_i8); + +void +nca_collsub_min_array_i8 (gfc_array_i8 * restrict array, int *result_image, + int *stat __attribute__ ((unused)), + char *errmsg __attribute__ ((unused)), + index_type errmsg_len __attribute__ ((unused))) +{ + index_type count[GFC_MAX_DIMENSIONS]; + index_type stride[GFC_MAX_DIMENSIONS]; + index_type extent[GFC_MAX_DIMENSIONS]; + GFC_INTEGER_8 *this_shared_ptr; /* Points to the shared memory allocated to this image. */ + GFC_INTEGER_8 *buffer; + index_type dim; + bool packed; + index_type span; + index_type ssize, num_elems; + int cbit = 0; + int imoffset; + collsub_iface *ci; + + ci = &local->ci; + + dim = GFC_DESCRIPTOR_RANK (array); + ssize = sizeof (GFC_INTEGER_8); + packed = true; + span = array->span != 0 ? array->span : (index_type) sizeof (GFC_INTEGER_8); + for (index_type n = 0; n < dim; n++) + { + count[n] = 0; + stride[n] = GFC_DESCRIPTOR_STRIDE (array, n) * span; + extent[n] = GFC_DESCRIPTOR_EXTENT (array, n); + + /* No-op for an empty array. */ + if (extent[n] <= 0) + return; + + if (ssize != stride[n]) + packed = false; + + ssize *= extent[n]; + } + + num_elems = ssize / sizeof (GFC_INTEGER_8); + + buffer = get_collsub_buf (ci, ssize * local->num_images); + this_shared_ptr = buffer + this_image.image_num * num_elems; + + if (packed) + memcpy (this_shared_ptr, array->base_addr, ssize); + else + { + char *src = (char *) array->base_addr; + GFC_INTEGER_8 *restrict dest = this_shared_ptr; + index_type stride0 = stride[0]; + + while (src) + { + /* Copy the data. */ + *(dest++) = *((GFC_INTEGER_8 *) src); + src += stride0; + count[0] ++; + /* Advance to the next source element. */ + for (index_type n = 0; count[n] == extent[n] ; ) + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + count[n] = 0; + src -= stride[n] * extent[n]; + n++; + if (n == dim) + { + src = NULL; + break; + } + else + { + count[n]++; + src += stride[n]; + } + } + } + } + + collsub_sync (ci); + + /* Reduce the array to image zero. Here the general scheme: + + abababababab + a_b_a_b_a_b_ + a___b___a___ + a_______b___ + r___________ + */ + for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++) + { + imoffset = 1 << cbit; + if (this_image.image_num + imoffset < local->num_images) + { + GFC_INTEGER_8 * other_shared_ptr; /* Points to the shared memory + allocated to another image. */ + GFC_INTEGER_8 *a; + GFC_INTEGER_8 *b; + + other_shared_ptr = this_shared_ptr + num_elems * imoffset; + for (index_type i = 0; i < num_elems; i++) + { + a = this_shared_ptr + i; + b = other_shared_ptr + i; + if (*b < *a) + *a = *b; + } + } + collsub_sync (ci); + } + for ( ; (local->num_images >> cbit) != 0; cbit++) + collsub_sync (ci); + + if (!result_image || (*result_image - 1) == this_image.image_num) + { + if (packed) + memcpy (array->base_addr, buffer, ssize); + else + { + GFC_INTEGER_8 *src = buffer; + char * restrict dest = (char *) array->base_addr; + index_type stride0 = stride[0]; + + for (index_type n = 0; n < dim; n++) + count[n] = 0; + + while (dest) + { + *((GFC_INTEGER_8 * ) dest) = *src++; + dest += stride0; + count[0] ++; + for (index_type n = 0; count[n] == extent[n] ;) + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + count[n] = 0; + dest -= stride[n] * extent[n]; + n++; + if (n == dim) + { + dest = NULL; + break; + } + else + { + count[n]++; + dest += stride[n]; + } + } + } + } + } + finish_collective_subroutine (ci); +} +void nca_collsub_sum_array_i8 (gfc_array_i8 * restrict array, int *result_image, + int *stat, char *errmsg, index_type errmsg_len); +export_proto (nca_collsub_sum_array_i8); + +void +nca_collsub_sum_array_i8 (gfc_array_i8 * restrict array, int *result_image, + int *stat __attribute__ ((unused)), + char *errmsg __attribute__ ((unused)), + index_type errmsg_len __attribute__ ((unused))) +{ + index_type count[GFC_MAX_DIMENSIONS]; + index_type stride[GFC_MAX_DIMENSIONS]; + index_type extent[GFC_MAX_DIMENSIONS]; + GFC_INTEGER_8 *this_shared_ptr; /* Points to the shared memory allocated to this image. */ + GFC_INTEGER_8 *buffer; + index_type dim; + bool packed; + index_type span; + index_type ssize, num_elems; + int cbit = 0; + int imoffset; + collsub_iface *ci; + + ci = &local->ci; + + dim = GFC_DESCRIPTOR_RANK (array); + ssize = sizeof (GFC_INTEGER_8); + packed = true; + span = array->span != 0 ? array->span : (index_type) sizeof (GFC_INTEGER_8); + for (index_type n = 0; n < dim; n++) + { + count[n] = 0; + stride[n] = GFC_DESCRIPTOR_STRIDE (array, n) * span; + extent[n] = GFC_DESCRIPTOR_EXTENT (array, n); + + /* No-op for an empty array. */ + if (extent[n] <= 0) + return; + + if (ssize != stride[n]) + packed = false; + + ssize *= extent[n]; + } + + num_elems = ssize / sizeof (GFC_INTEGER_8); + + buffer = get_collsub_buf (ci, ssize * local->num_images); + this_shared_ptr = buffer + this_image.image_num * num_elems; + + if (packed) + memcpy (this_shared_ptr, array->base_addr, ssize); + else + { + char *src = (char *) array->base_addr; + GFC_INTEGER_8 *restrict dest = this_shared_ptr; + index_type stride0 = stride[0]; + + while (src) + { + /* Copy the data. */ + *(dest++) = *((GFC_INTEGER_8 *) src); + src += stride0; + count[0] ++; + /* Advance to the next source element. */ + for (index_type n = 0; count[n] == extent[n] ; ) + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + count[n] = 0; + src -= stride[n] * extent[n]; + n++; + if (n == dim) + { + src = NULL; + break; + } + else + { + count[n]++; + src += stride[n]; + } + } + } + } + + collsub_sync (ci); + + /* Reduce the array to image zero. Here the general scheme: + + abababababab + a_b_a_b_a_b_ + a___b___a___ + a_______b___ + r___________ + */ + for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++) + { + imoffset = 1 << cbit; + if (this_image.image_num + imoffset < local->num_images) + { + GFC_INTEGER_8 * other_shared_ptr; /* Points to the shared memory + allocated to another image. */ + GFC_INTEGER_8 *a; + GFC_INTEGER_8 *b; + + other_shared_ptr = this_shared_ptr + num_elems * imoffset; + for (index_type i = 0; i < num_elems; i++) + { + a = this_shared_ptr + i; + b = other_shared_ptr + i; + *a += *b; + } + } + collsub_sync (ci); + } + for ( ; (local->num_images >> cbit) != 0; cbit++) + collsub_sync (ci); + + if (!result_image || (*result_image - 1) == this_image.image_num) + { + if (packed) + memcpy (array->base_addr, buffer, ssize); + else + { + GFC_INTEGER_8 *src = buffer; + char * restrict dest = (char *) array->base_addr; + index_type stride0 = stride[0]; + + for (index_type n = 0; n < dim; n++) + count[n] = 0; + + while (dest) + { + *((GFC_INTEGER_8 * ) dest) = *src++; + dest += stride0; + count[0] ++; + for (index_type n = 0; count[n] == extent[n] ;) + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + count[n] = 0; + dest -= stride[n] * extent[n]; + n++; + if (n == dim) + { + dest = NULL; + break; + } + else + { + count[n]++; + dest += stride[n]; + } + } + } + } + } + finish_collective_subroutine (ci); +} + +#endif + diff --git a/libgfortran/generated/nca_minmax_r10.c b/libgfortran/generated/nca_minmax_r10.c new file mode 100644 index 0000000..10f7324 --- /dev/null +++ b/libgfortran/generated/nca_minmax_r10.c @@ -0,0 +1,653 @@ +/* Implementation of collective subroutines minmax. + Copyright (C) 2020 Free Software Foundation, Inc. + Contributed by Thomas Koenig <tkoenig@gcc.gnu.org>. + +This file is part of the GNU Fortran runtime library (libgfortran). + +Libgfortran is free software; you can redistribute it and/or +modify it under the terms of the GNU General Public +License as published by the Free Software Foundation; either +version 3 of the License, or (at your option) any later version. + +Libgfortran is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +<http://www.gnu.org/licenses/>. */ + + +#include "libgfortran.h" + +#if defined (HAVE_GFC_REAL_10) +#include <string.h> +#include "../nca/libcoarraynative.h" +#include "../nca/collective_subroutine.h" +#include "../nca/collective_inline.h" + +void nca_collsub_max_scalar_r10 (GFC_REAL_10 *obj, int *result_image, + int *stat, char *errmsg, index_type errmsg_len); +export_proto(nca_collsub_max_scalar_r10); + +void +nca_collsub_max_scalar_r10 (GFC_REAL_10 *obj, int *result_image, + int *stat __attribute__ ((unused)), + char *errmsg __attribute__ ((unused)), + index_type errmsg_len __attribute__ ((unused))) +{ + int cbit = 0; + int imoffset; + GFC_REAL_10 *a, *b; + GFC_REAL_10 *buffer, *this_image_buf; + collsub_iface *ci; + + ci = &local->ci; + + buffer = get_collsub_buf (ci, sizeof(GFC_REAL_10) * local->num_images); + this_image_buf = buffer + this_image.image_num; + *this_image_buf = *obj; + + collsub_sync (ci); + for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++) + { + imoffset = 1 << cbit; + if (this_image.image_num + imoffset < local->num_images) + { + a = this_image_buf; + b = this_image_buf + imoffset; + if (*b > *a) + *a = *b; + } + collsub_sync (ci); + } + for ( ; (local->num_images >> cbit) != 0; cbit++) + collsub_sync (ci); + + if (!result_image || (*result_image - 1) == this_image.image_num) + *obj = *buffer; + + finish_collective_subroutine (ci); + +} + +void nca_collsub_min_scalar_r10 (GFC_REAL_10 *obj, int *result_image, + int *stat, char *errmsg, index_type errmsg_len); +export_proto(nca_collsub_min_scalar_r10); + +void +nca_collsub_min_scalar_r10 (GFC_REAL_10 *obj, int *result_image, + int *stat __attribute__ ((unused)), + char *errmsg __attribute__ ((unused)), + index_type errmsg_len __attribute__ ((unused))) +{ + int cbit = 0; + int imoffset; + GFC_REAL_10 *a, *b; + GFC_REAL_10 *buffer, *this_image_buf; + collsub_iface *ci; + + ci = &local->ci; + + buffer = get_collsub_buf (ci, sizeof(GFC_REAL_10) * local->num_images); + this_image_buf = buffer + this_image.image_num; + *this_image_buf = *obj; + + collsub_sync (ci); + for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++) + { + imoffset = 1 << cbit; + if (this_image.image_num + imoffset < local->num_images) + { + a = this_image_buf; + b = this_image_buf + imoffset; + if (*b < *a) + *a = *b; + } + collsub_sync (ci); + } + for ( ; (local->num_images >> cbit) != 0; cbit++) + collsub_sync (ci); + + if (!result_image || (*result_image - 1) == this_image.image_num) + *obj = *buffer; + + finish_collective_subroutine (ci); + +} + +void nca_collsub_sum_scalar_r10 (GFC_REAL_10 *obj, int *result_image, + int *stat, char *errmsg, index_type errmsg_len); +export_proto(nca_collsub_sum_scalar_r10); + +void +nca_collsub_sum_scalar_r10 (GFC_REAL_10 *obj, int *result_image, + int *stat __attribute__ ((unused)), + char *errmsg __attribute__ ((unused)), + index_type errmsg_len __attribute__ ((unused))) +{ + int cbit = 0; + int imoffset; + GFC_REAL_10 *a, *b; + GFC_REAL_10 *buffer, *this_image_buf; + collsub_iface *ci; + + ci = &local->ci; + + buffer = get_collsub_buf (ci, sizeof(GFC_REAL_10) * local->num_images); + this_image_buf = buffer + this_image.image_num; + *this_image_buf = *obj; + + collsub_sync (ci); + for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++) + { + imoffset = 1 << cbit; + if (this_image.image_num + imoffset < local->num_images) + { + a = this_image_buf; + b = this_image_buf + imoffset; + *a += *b; + } + collsub_sync (ci); + } + for ( ; (local->num_images >> cbit) != 0; cbit++) + collsub_sync (ci); + + if (!result_image || (*result_image - 1) == this_image.image_num) + *obj = *buffer; + + finish_collective_subroutine (ci); + +} + +void nca_collsub_max_array_r10 (gfc_array_r10 * restrict array, int *result_image, + int *stat, char *errmsg, index_type errmsg_len); +export_proto (nca_collsub_max_array_r10); + +void +nca_collsub_max_array_r10 (gfc_array_r10 * restrict array, int *result_image, + int *stat __attribute__ ((unused)), + char *errmsg __attribute__ ((unused)), + index_type errmsg_len __attribute__ ((unused))) +{ + index_type count[GFC_MAX_DIMENSIONS]; + index_type stride[GFC_MAX_DIMENSIONS]; + index_type extent[GFC_MAX_DIMENSIONS]; + GFC_REAL_10 *this_shared_ptr; /* Points to the shared memory allocated to this image. */ + GFC_REAL_10 *buffer; + index_type dim; + bool packed; + index_type span; + index_type ssize, num_elems; + int cbit = 0; + int imoffset; + collsub_iface *ci; + + ci = &local->ci; + + dim = GFC_DESCRIPTOR_RANK (array); + ssize = sizeof (GFC_REAL_10); + packed = true; + span = array->span != 0 ? array->span : (index_type) sizeof (GFC_REAL_10); + for (index_type n = 0; n < dim; n++) + { + count[n] = 0; + stride[n] = GFC_DESCRIPTOR_STRIDE (array, n) * span; + extent[n] = GFC_DESCRIPTOR_EXTENT (array, n); + + /* No-op for an empty array. */ + if (extent[n] <= 0) + return; + + if (ssize != stride[n]) + packed = false; + + ssize *= extent[n]; + } + + num_elems = ssize / sizeof (GFC_REAL_10); + + buffer = get_collsub_buf (ci, ssize * local->num_images); + this_shared_ptr = buffer + this_image.image_num * num_elems; + + if (packed) + memcpy (this_shared_ptr, array->base_addr, ssize); + else + { + char *src = (char *) array->base_addr; + GFC_REAL_10 *restrict dest = this_shared_ptr; + index_type stride0 = stride[0]; + + while (src) + { + /* Copy the data. */ + *(dest++) = *((GFC_REAL_10 *) src); + src += stride0; + count[0] ++; + /* Advance to the next source element. */ + for (index_type n = 0; count[n] == extent[n] ; ) + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + count[n] = 0; + src -= stride[n] * extent[n]; + n++; + if (n == dim) + { + src = NULL; + break; + } + else + { + count[n]++; + src += stride[n]; + } + } + } + } + + collsub_sync (ci); + + /* Reduce the array to image zero. Here the general scheme: + + abababababab + a_b_a_b_a_b_ + a___b___a___ + a_______b___ + r___________ + */ + for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++) + { + imoffset = 1 << cbit; + if (this_image.image_num + imoffset < local->num_images) + { + GFC_REAL_10 * other_shared_ptr; /* Points to the shared memory + allocated to another image. */ + GFC_REAL_10 *a; + GFC_REAL_10 *b; + + other_shared_ptr = this_shared_ptr + num_elems * imoffset; + for (index_type i = 0; i < num_elems; i++) + { + a = this_shared_ptr + i; + b = other_shared_ptr + i; + if (*b > *a) + *a = *b; + } + } + collsub_sync (ci); + } + for ( ; (local->num_images >> cbit) != 0; cbit++) + collsub_sync (ci); + + if (!result_image || (*result_image - 1) == this_image.image_num) + { + if (packed) + memcpy (array->base_addr, buffer, ssize); + else + { + GFC_REAL_10 *src = buffer; + char * restrict dest = (char *) array->base_addr; + index_type stride0 = stride[0]; + + for (index_type n = 0; n < dim; n++) + count[n] = 0; + + while (dest) + { + *((GFC_REAL_10 * ) dest) = *src++; + dest += stride0; + count[0] ++; + for (index_type n = 0; count[n] == extent[n] ;) + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + count[n] = 0; + dest -= stride[n] * extent[n]; + n++; + if (n == dim) + { + dest = NULL; + break; + } + else + { + count[n]++; + dest += stride[n]; + } + } + } + } + } + finish_collective_subroutine (ci); +} +void nca_collsub_min_array_r10 (gfc_array_r10 * restrict array, int *result_image, + int *stat, char *errmsg, index_type errmsg_len); +export_proto (nca_collsub_min_array_r10); + +void +nca_collsub_min_array_r10 (gfc_array_r10 * restrict array, int *result_image, + int *stat __attribute__ ((unused)), + char *errmsg __attribute__ ((unused)), + index_type errmsg_len __attribute__ ((unused))) +{ + index_type count[GFC_MAX_DIMENSIONS]; + index_type stride[GFC_MAX_DIMENSIONS]; + index_type extent[GFC_MAX_DIMENSIONS]; + GFC_REAL_10 *this_shared_ptr; /* Points to the shared memory allocated to this image. */ + GFC_REAL_10 *buffer; + index_type dim; + bool packed; + index_type span; + index_type ssize, num_elems; + int cbit = 0; + int imoffset; + collsub_iface *ci; + + ci = &local->ci; + + dim = GFC_DESCRIPTOR_RANK (array); + ssize = sizeof (GFC_REAL_10); + packed = true; + span = array->span != 0 ? array->span : (index_type) sizeof (GFC_REAL_10); + for (index_type n = 0; n < dim; n++) + { + count[n] = 0; + stride[n] = GFC_DESCRIPTOR_STRIDE (array, n) * span; + extent[n] = GFC_DESCRIPTOR_EXTENT (array, n); + + /* No-op for an empty array. */ + if (extent[n] <= 0) + return; + + if (ssize != stride[n]) + packed = false; + + ssize *= extent[n]; + } + + num_elems = ssize / sizeof (GFC_REAL_10); + + buffer = get_collsub_buf (ci, ssize * local->num_images); + this_shared_ptr = buffer + this_image.image_num * num_elems; + + if (packed) + memcpy (this_shared_ptr, array->base_addr, ssize); + else + { + char *src = (char *) array->base_addr; + GFC_REAL_10 *restrict dest = this_shared_ptr; + index_type stride0 = stride[0]; + + while (src) + { + /* Copy the data. */ + *(dest++) = *((GFC_REAL_10 *) src); + src += stride0; + count[0] ++; + /* Advance to the next source element. */ + for (index_type n = 0; count[n] == extent[n] ; ) + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + count[n] = 0; + src -= stride[n] * extent[n]; + n++; + if (n == dim) + { + src = NULL; + break; + } + else + { + count[n]++; + src += stride[n]; + } + } + } + } + + collsub_sync (ci); + + /* Reduce the array to image zero. Here the general scheme: + + abababababab + a_b_a_b_a_b_ + a___b___a___ + a_______b___ + r___________ + */ + for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++) + { + imoffset = 1 << cbit; + if (this_image.image_num + imoffset < local->num_images) + { + GFC_REAL_10 * other_shared_ptr; /* Points to the shared memory + allocated to another image. */ + GFC_REAL_10 *a; + GFC_REAL_10 *b; + + other_shared_ptr = this_shared_ptr + num_elems * imoffset; + for (index_type i = 0; i < num_elems; i++) + { + a = this_shared_ptr + i; + b = other_shared_ptr + i; + if (*b < *a) + *a = *b; + } + } + collsub_sync (ci); + } + for ( ; (local->num_images >> cbit) != 0; cbit++) + collsub_sync (ci); + + if (!result_image || (*result_image - 1) == this_image.image_num) + { + if (packed) + memcpy (array->base_addr, buffer, ssize); + else + { + GFC_REAL_10 *src = buffer; + char * restrict dest = (char *) array->base_addr; + index_type stride0 = stride[0]; + + for (index_type n = 0; n < dim; n++) + count[n] = 0; + + while (dest) + { + *((GFC_REAL_10 * ) dest) = *src++; + dest += stride0; + count[0] ++; + for (index_type n = 0; count[n] == extent[n] ;) + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + count[n] = 0; + dest -= stride[n] * extent[n]; + n++; + if (n == dim) + { + dest = NULL; + break; + } + else + { + count[n]++; + dest += stride[n]; + } + } + } + } + } + finish_collective_subroutine (ci); +} +void nca_collsub_sum_array_r10 (gfc_array_r10 * restrict array, int *result_image, + int *stat, char *errmsg, index_type errmsg_len); +export_proto (nca_collsub_sum_array_r10); + +void +nca_collsub_sum_array_r10 (gfc_array_r10 * restrict array, int *result_image, + int *stat __attribute__ ((unused)), + char *errmsg __attribute__ ((unused)), + index_type errmsg_len __attribute__ ((unused))) +{ + index_type count[GFC_MAX_DIMENSIONS]; + index_type stride[GFC_MAX_DIMENSIONS]; + index_type extent[GFC_MAX_DIMENSIONS]; + GFC_REAL_10 *this_shared_ptr; /* Points to the shared memory allocated to this image. */ + GFC_REAL_10 *buffer; + index_type dim; + bool packed; + index_type span; + index_type ssize, num_elems; + int cbit = 0; + int imoffset; + collsub_iface *ci; + + ci = &local->ci; + + dim = GFC_DESCRIPTOR_RANK (array); + ssize = sizeof (GFC_REAL_10); + packed = true; + span = array->span != 0 ? array->span : (index_type) sizeof (GFC_REAL_10); + for (index_type n = 0; n < dim; n++) + { + count[n] = 0; + stride[n] = GFC_DESCRIPTOR_STRIDE (array, n) * span; + extent[n] = GFC_DESCRIPTOR_EXTENT (array, n); + + /* No-op for an empty array. */ + if (extent[n] <= 0) + return; + + if (ssize != stride[n]) + packed = false; + + ssize *= extent[n]; + } + + num_elems = ssize / sizeof (GFC_REAL_10); + + buffer = get_collsub_buf (ci, ssize * local->num_images); + this_shared_ptr = buffer + this_image.image_num * num_elems; + + if (packed) + memcpy (this_shared_ptr, array->base_addr, ssize); + else + { + char *src = (char *) array->base_addr; + GFC_REAL_10 *restrict dest = this_shared_ptr; + index_type stride0 = stride[0]; + + while (src) + { + /* Copy the data. */ + *(dest++) = *((GFC_REAL_10 *) src); + src += stride0; + count[0] ++; + /* Advance to the next source element. */ + for (index_type n = 0; count[n] == extent[n] ; ) + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + count[n] = 0; + src -= stride[n] * extent[n]; + n++; + if (n == dim) + { + src = NULL; + break; + } + else + { + count[n]++; + src += stride[n]; + } + } + } + } + + collsub_sync (ci); + + /* Reduce the array to image zero. Here the general scheme: + + abababababab + a_b_a_b_a_b_ + a___b___a___ + a_______b___ + r___________ + */ + for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++) + { + imoffset = 1 << cbit; + if (this_image.image_num + imoffset < local->num_images) + { + GFC_REAL_10 * other_shared_ptr; /* Points to the shared memory + allocated to another image. */ + GFC_REAL_10 *a; + GFC_REAL_10 *b; + + other_shared_ptr = this_shared_ptr + num_elems * imoffset; + for (index_type i = 0; i < num_elems; i++) + { + a = this_shared_ptr + i; + b = other_shared_ptr + i; + *a += *b; + } + } + collsub_sync (ci); + } + for ( ; (local->num_images >> cbit) != 0; cbit++) + collsub_sync (ci); + + if (!result_image || (*result_image - 1) == this_image.image_num) + { + if (packed) + memcpy (array->base_addr, buffer, ssize); + else + { + GFC_REAL_10 *src = buffer; + char * restrict dest = (char *) array->base_addr; + index_type stride0 = stride[0]; + + for (index_type n = 0; n < dim; n++) + count[n] = 0; + + while (dest) + { + *((GFC_REAL_10 * ) dest) = *src++; + dest += stride0; + count[0] ++; + for (index_type n = 0; count[n] == extent[n] ;) + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + count[n] = 0; + dest -= stride[n] * extent[n]; + n++; + if (n == dim) + { + dest = NULL; + break; + } + else + { + count[n]++; + dest += stride[n]; + } + } + } + } + } + finish_collective_subroutine (ci); +} + +#endif + diff --git a/libgfortran/generated/nca_minmax_r16.c b/libgfortran/generated/nca_minmax_r16.c new file mode 100644 index 0000000..a0a0a51 --- /dev/null +++ b/libgfortran/generated/nca_minmax_r16.c @@ -0,0 +1,653 @@ +/* Implementation of collective subroutines minmax. + Copyright (C) 2020 Free Software Foundation, Inc. + Contributed by Thomas Koenig <tkoenig@gcc.gnu.org>. + +This file is part of the GNU Fortran runtime library (libgfortran). + +Libgfortran is free software; you can redistribute it and/or +modify it under the terms of the GNU General Public +License as published by the Free Software Foundation; either +version 3 of the License, or (at your option) any later version. + +Libgfortran is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +<http://www.gnu.org/licenses/>. */ + + +#include "libgfortran.h" + +#if defined (HAVE_GFC_REAL_16) +#include <string.h> +#include "../nca/libcoarraynative.h" +#include "../nca/collective_subroutine.h" +#include "../nca/collective_inline.h" + +void nca_collsub_max_scalar_r16 (GFC_REAL_16 *obj, int *result_image, + int *stat, char *errmsg, index_type errmsg_len); +export_proto(nca_collsub_max_scalar_r16); + +void +nca_collsub_max_scalar_r16 (GFC_REAL_16 *obj, int *result_image, + int *stat __attribute__ ((unused)), + char *errmsg __attribute__ ((unused)), + index_type errmsg_len __attribute__ ((unused))) +{ + int cbit = 0; + int imoffset; + GFC_REAL_16 *a, *b; + GFC_REAL_16 *buffer, *this_image_buf; + collsub_iface *ci; + + ci = &local->ci; + + buffer = get_collsub_buf (ci, sizeof(GFC_REAL_16) * local->num_images); + this_image_buf = buffer + this_image.image_num; + *this_image_buf = *obj; + + collsub_sync (ci); + for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++) + { + imoffset = 1 << cbit; + if (this_image.image_num + imoffset < local->num_images) + { + a = this_image_buf; + b = this_image_buf + imoffset; + if (*b > *a) + *a = *b; + } + collsub_sync (ci); + } + for ( ; (local->num_images >> cbit) != 0; cbit++) + collsub_sync (ci); + + if (!result_image || (*result_image - 1) == this_image.image_num) + *obj = *buffer; + + finish_collective_subroutine (ci); + +} + +void nca_collsub_min_scalar_r16 (GFC_REAL_16 *obj, int *result_image, + int *stat, char *errmsg, index_type errmsg_len); +export_proto(nca_collsub_min_scalar_r16); + +void +nca_collsub_min_scalar_r16 (GFC_REAL_16 *obj, int *result_image, + int *stat __attribute__ ((unused)), + char *errmsg __attribute__ ((unused)), + index_type errmsg_len __attribute__ ((unused))) +{ + int cbit = 0; + int imoffset; + GFC_REAL_16 *a, *b; + GFC_REAL_16 *buffer, *this_image_buf; + collsub_iface *ci; + + ci = &local->ci; + + buffer = get_collsub_buf (ci, sizeof(GFC_REAL_16) * local->num_images); + this_image_buf = buffer + this_image.image_num; + *this_image_buf = *obj; + + collsub_sync (ci); + for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++) + { + imoffset = 1 << cbit; + if (this_image.image_num + imoffset < local->num_images) + { + a = this_image_buf; + b = this_image_buf + imoffset; + if (*b < *a) + *a = *b; + } + collsub_sync (ci); + } + for ( ; (local->num_images >> cbit) != 0; cbit++) + collsub_sync (ci); + + if (!result_image || (*result_image - 1) == this_image.image_num) + *obj = *buffer; + + finish_collective_subroutine (ci); + +} + +void nca_collsub_sum_scalar_r16 (GFC_REAL_16 *obj, int *result_image, + int *stat, char *errmsg, index_type errmsg_len); +export_proto(nca_collsub_sum_scalar_r16); + +void +nca_collsub_sum_scalar_r16 (GFC_REAL_16 *obj, int *result_image, + int *stat __attribute__ ((unused)), + char *errmsg __attribute__ ((unused)), + index_type errmsg_len __attribute__ ((unused))) +{ + int cbit = 0; + int imoffset; + GFC_REAL_16 *a, *b; + GFC_REAL_16 *buffer, *this_image_buf; + collsub_iface *ci; + + ci = &local->ci; + + buffer = get_collsub_buf (ci, sizeof(GFC_REAL_16) * local->num_images); + this_image_buf = buffer + this_image.image_num; + *this_image_buf = *obj; + + collsub_sync (ci); + for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++) + { + imoffset = 1 << cbit; + if (this_image.image_num + imoffset < local->num_images) + { + a = this_image_buf; + b = this_image_buf + imoffset; + *a += *b; + } + collsub_sync (ci); + } + for ( ; (local->num_images >> cbit) != 0; cbit++) + collsub_sync (ci); + + if (!result_image || (*result_image - 1) == this_image.image_num) + *obj = *buffer; + + finish_collective_subroutine (ci); + +} + +void nca_collsub_max_array_r16 (gfc_array_r16 * restrict array, int *result_image, + int *stat, char *errmsg, index_type errmsg_len); +export_proto (nca_collsub_max_array_r16); + +void +nca_collsub_max_array_r16 (gfc_array_r16 * restrict array, int *result_image, + int *stat __attribute__ ((unused)), + char *errmsg __attribute__ ((unused)), + index_type errmsg_len __attribute__ ((unused))) +{ + index_type count[GFC_MAX_DIMENSIONS]; + index_type stride[GFC_MAX_DIMENSIONS]; + index_type extent[GFC_MAX_DIMENSIONS]; + GFC_REAL_16 *this_shared_ptr; /* Points to the shared memory allocated to this image. */ + GFC_REAL_16 *buffer; + index_type dim; + bool packed; + index_type span; + index_type ssize, num_elems; + int cbit = 0; + int imoffset; + collsub_iface *ci; + + ci = &local->ci; + + dim = GFC_DESCRIPTOR_RANK (array); + ssize = sizeof (GFC_REAL_16); + packed = true; + span = array->span != 0 ? array->span : (index_type) sizeof (GFC_REAL_16); + for (index_type n = 0; n < dim; n++) + { + count[n] = 0; + stride[n] = GFC_DESCRIPTOR_STRIDE (array, n) * span; + extent[n] = GFC_DESCRIPTOR_EXTENT (array, n); + + /* No-op for an empty array. */ + if (extent[n] <= 0) + return; + + if (ssize != stride[n]) + packed = false; + + ssize *= extent[n]; + } + + num_elems = ssize / sizeof (GFC_REAL_16); + + buffer = get_collsub_buf (ci, ssize * local->num_images); + this_shared_ptr = buffer + this_image.image_num * num_elems; + + if (packed) + memcpy (this_shared_ptr, array->base_addr, ssize); + else + { + char *src = (char *) array->base_addr; + GFC_REAL_16 *restrict dest = this_shared_ptr; + index_type stride0 = stride[0]; + + while (src) + { + /* Copy the data. */ + *(dest++) = *((GFC_REAL_16 *) src); + src += stride0; + count[0] ++; + /* Advance to the next source element. */ + for (index_type n = 0; count[n] == extent[n] ; ) + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + count[n] = 0; + src -= stride[n] * extent[n]; + n++; + if (n == dim) + { + src = NULL; + break; + } + else + { + count[n]++; + src += stride[n]; + } + } + } + } + + collsub_sync (ci); + + /* Reduce the array to image zero. Here the general scheme: + + abababababab + a_b_a_b_a_b_ + a___b___a___ + a_______b___ + r___________ + */ + for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++) + { + imoffset = 1 << cbit; + if (this_image.image_num + imoffset < local->num_images) + { + GFC_REAL_16 * other_shared_ptr; /* Points to the shared memory + allocated to another image. */ + GFC_REAL_16 *a; + GFC_REAL_16 *b; + + other_shared_ptr = this_shared_ptr + num_elems * imoffset; + for (index_type i = 0; i < num_elems; i++) + { + a = this_shared_ptr + i; + b = other_shared_ptr + i; + if (*b > *a) + *a = *b; + } + } + collsub_sync (ci); + } + for ( ; (local->num_images >> cbit) != 0; cbit++) + collsub_sync (ci); + + if (!result_image || (*result_image - 1) == this_image.image_num) + { + if (packed) + memcpy (array->base_addr, buffer, ssize); + else + { + GFC_REAL_16 *src = buffer; + char * restrict dest = (char *) array->base_addr; + index_type stride0 = stride[0]; + + for (index_type n = 0; n < dim; n++) + count[n] = 0; + + while (dest) + { + *((GFC_REAL_16 * ) dest) = *src++; + dest += stride0; + count[0] ++; + for (index_type n = 0; count[n] == extent[n] ;) + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + count[n] = 0; + dest -= stride[n] * extent[n]; + n++; + if (n == dim) + { + dest = NULL; + break; + } + else + { + count[n]++; + dest += stride[n]; + } + } + } + } + } + finish_collective_subroutine (ci); +} +void nca_collsub_min_array_r16 (gfc_array_r16 * restrict array, int *result_image, + int *stat, char *errmsg, index_type errmsg_len); +export_proto (nca_collsub_min_array_r16); + +void +nca_collsub_min_array_r16 (gfc_array_r16 * restrict array, int *result_image, + int *stat __attribute__ ((unused)), + char *errmsg __attribute__ ((unused)), + index_type errmsg_len __attribute__ ((unused))) +{ + index_type count[GFC_MAX_DIMENSIONS]; + index_type stride[GFC_MAX_DIMENSIONS]; + index_type extent[GFC_MAX_DIMENSIONS]; + GFC_REAL_16 *this_shared_ptr; /* Points to the shared memory allocated to this image. */ + GFC_REAL_16 *buffer; + index_type dim; + bool packed; + index_type span; + index_type ssize, num_elems; + int cbit = 0; + int imoffset; + collsub_iface *ci; + + ci = &local->ci; + + dim = GFC_DESCRIPTOR_RANK (array); + ssize = sizeof (GFC_REAL_16); + packed = true; + span = array->span != 0 ? array->span : (index_type) sizeof (GFC_REAL_16); + for (index_type n = 0; n < dim; n++) + { + count[n] = 0; + stride[n] = GFC_DESCRIPTOR_STRIDE (array, n) * span; + extent[n] = GFC_DESCRIPTOR_EXTENT (array, n); + + /* No-op for an empty array. */ + if (extent[n] <= 0) + return; + + if (ssize != stride[n]) + packed = false; + + ssize *= extent[n]; + } + + num_elems = ssize / sizeof (GFC_REAL_16); + + buffer = get_collsub_buf (ci, ssize * local->num_images); + this_shared_ptr = buffer + this_image.image_num * num_elems; + + if (packed) + memcpy (this_shared_ptr, array->base_addr, ssize); + else + { + char *src = (char *) array->base_addr; + GFC_REAL_16 *restrict dest = this_shared_ptr; + index_type stride0 = stride[0]; + + while (src) + { + /* Copy the data. */ + *(dest++) = *((GFC_REAL_16 *) src); + src += stride0; + count[0] ++; + /* Advance to the next source element. */ + for (index_type n = 0; count[n] == extent[n] ; ) + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + count[n] = 0; + src -= stride[n] * extent[n]; + n++; + if (n == dim) + { + src = NULL; + break; + } + else + { + count[n]++; + src += stride[n]; + } + } + } + } + + collsub_sync (ci); + + /* Reduce the array to image zero. Here the general scheme: + + abababababab + a_b_a_b_a_b_ + a___b___a___ + a_______b___ + r___________ + */ + for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++) + { + imoffset = 1 << cbit; + if (this_image.image_num + imoffset < local->num_images) + { + GFC_REAL_16 * other_shared_ptr; /* Points to the shared memory + allocated to another image. */ + GFC_REAL_16 *a; + GFC_REAL_16 *b; + + other_shared_ptr = this_shared_ptr + num_elems * imoffset; + for (index_type i = 0; i < num_elems; i++) + { + a = this_shared_ptr + i; + b = other_shared_ptr + i; + if (*b < *a) + *a = *b; + } + } + collsub_sync (ci); + } + for ( ; (local->num_images >> cbit) != 0; cbit++) + collsub_sync (ci); + + if (!result_image || (*result_image - 1) == this_image.image_num) + { + if (packed) + memcpy (array->base_addr, buffer, ssize); + else + { + GFC_REAL_16 *src = buffer; + char * restrict dest = (char *) array->base_addr; + index_type stride0 = stride[0]; + + for (index_type n = 0; n < dim; n++) + count[n] = 0; + + while (dest) + { + *((GFC_REAL_16 * ) dest) = *src++; + dest += stride0; + count[0] ++; + for (index_type n = 0; count[n] == extent[n] ;) + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + count[n] = 0; + dest -= stride[n] * extent[n]; + n++; + if (n == dim) + { + dest = NULL; + break; + } + else + { + count[n]++; + dest += stride[n]; + } + } + } + } + } + finish_collective_subroutine (ci); +} +void nca_collsub_sum_array_r16 (gfc_array_r16 * restrict array, int *result_image, + int *stat, char *errmsg, index_type errmsg_len); +export_proto (nca_collsub_sum_array_r16); + +void +nca_collsub_sum_array_r16 (gfc_array_r16 * restrict array, int *result_image, + int *stat __attribute__ ((unused)), + char *errmsg __attribute__ ((unused)), + index_type errmsg_len __attribute__ ((unused))) +{ + index_type count[GFC_MAX_DIMENSIONS]; + index_type stride[GFC_MAX_DIMENSIONS]; + index_type extent[GFC_MAX_DIMENSIONS]; + GFC_REAL_16 *this_shared_ptr; /* Points to the shared memory allocated to this image. */ + GFC_REAL_16 *buffer; + index_type dim; + bool packed; + index_type span; + index_type ssize, num_elems; + int cbit = 0; + int imoffset; + collsub_iface *ci; + + ci = &local->ci; + + dim = GFC_DESCRIPTOR_RANK (array); + ssize = sizeof (GFC_REAL_16); + packed = true; + span = array->span != 0 ? array->span : (index_type) sizeof (GFC_REAL_16); + for (index_type n = 0; n < dim; n++) + { + count[n] = 0; + stride[n] = GFC_DESCRIPTOR_STRIDE (array, n) * span; + extent[n] = GFC_DESCRIPTOR_EXTENT (array, n); + + /* No-op for an empty array. */ + if (extent[n] <= 0) + return; + + if (ssize != stride[n]) + packed = false; + + ssize *= extent[n]; + } + + num_elems = ssize / sizeof (GFC_REAL_16); + + buffer = get_collsub_buf (ci, ssize * local->num_images); + this_shared_ptr = buffer + this_image.image_num * num_elems; + + if (packed) + memcpy (this_shared_ptr, array->base_addr, ssize); + else + { + char *src = (char *) array->base_addr; + GFC_REAL_16 *restrict dest = this_shared_ptr; + index_type stride0 = stride[0]; + + while (src) + { + /* Copy the data. */ + *(dest++) = *((GFC_REAL_16 *) src); + src += stride0; + count[0] ++; + /* Advance to the next source element. */ + for (index_type n = 0; count[n] == extent[n] ; ) + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + count[n] = 0; + src -= stride[n] * extent[n]; + n++; + if (n == dim) + { + src = NULL; + break; + } + else + { + count[n]++; + src += stride[n]; + } + } + } + } + + collsub_sync (ci); + + /* Reduce the array to image zero. Here the general scheme: + + abababababab + a_b_a_b_a_b_ + a___b___a___ + a_______b___ + r___________ + */ + for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++) + { + imoffset = 1 << cbit; + if (this_image.image_num + imoffset < local->num_images) + { + GFC_REAL_16 * other_shared_ptr; /* Points to the shared memory + allocated to another image. */ + GFC_REAL_16 *a; + GFC_REAL_16 *b; + + other_shared_ptr = this_shared_ptr + num_elems * imoffset; + for (index_type i = 0; i < num_elems; i++) + { + a = this_shared_ptr + i; + b = other_shared_ptr + i; + *a += *b; + } + } + collsub_sync (ci); + } + for ( ; (local->num_images >> cbit) != 0; cbit++) + collsub_sync (ci); + + if (!result_image || (*result_image - 1) == this_image.image_num) + { + if (packed) + memcpy (array->base_addr, buffer, ssize); + else + { + GFC_REAL_16 *src = buffer; + char * restrict dest = (char *) array->base_addr; + index_type stride0 = stride[0]; + + for (index_type n = 0; n < dim; n++) + count[n] = 0; + + while (dest) + { + *((GFC_REAL_16 * ) dest) = *src++; + dest += stride0; + count[0] ++; + for (index_type n = 0; count[n] == extent[n] ;) + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + count[n] = 0; + dest -= stride[n] * extent[n]; + n++; + if (n == dim) + { + dest = NULL; + break; + } + else + { + count[n]++; + dest += stride[n]; + } + } + } + } + } + finish_collective_subroutine (ci); +} + +#endif + diff --git a/libgfortran/generated/nca_minmax_r4.c b/libgfortran/generated/nca_minmax_r4.c new file mode 100644 index 0000000..0eb3f1b --- /dev/null +++ b/libgfortran/generated/nca_minmax_r4.c @@ -0,0 +1,653 @@ +/* Implementation of collective subroutines minmax. + Copyright (C) 2020 Free Software Foundation, Inc. + Contributed by Thomas Koenig <tkoenig@gcc.gnu.org>. + +This file is part of the GNU Fortran runtime library (libgfortran). + +Libgfortran is free software; you can redistribute it and/or +modify it under the terms of the GNU General Public +License as published by the Free Software Foundation; either +version 3 of the License, or (at your option) any later version. + +Libgfortran is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +<http://www.gnu.org/licenses/>. */ + + +#include "libgfortran.h" + +#if defined (HAVE_GFC_REAL_4) +#include <string.h> +#include "../nca/libcoarraynative.h" +#include "../nca/collective_subroutine.h" +#include "../nca/collective_inline.h" + +void nca_collsub_max_scalar_r4 (GFC_REAL_4 *obj, int *result_image, + int *stat, char *errmsg, index_type errmsg_len); +export_proto(nca_collsub_max_scalar_r4); + +void +nca_collsub_max_scalar_r4 (GFC_REAL_4 *obj, int *result_image, + int *stat __attribute__ ((unused)), + char *errmsg __attribute__ ((unused)), + index_type errmsg_len __attribute__ ((unused))) +{ + int cbit = 0; + int imoffset; + GFC_REAL_4 *a, *b; + GFC_REAL_4 *buffer, *this_image_buf; + collsub_iface *ci; + + ci = &local->ci; + + buffer = get_collsub_buf (ci, sizeof(GFC_REAL_4) * local->num_images); + this_image_buf = buffer + this_image.image_num; + *this_image_buf = *obj; + + collsub_sync (ci); + for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++) + { + imoffset = 1 << cbit; + if (this_image.image_num + imoffset < local->num_images) + { + a = this_image_buf; + b = this_image_buf + imoffset; + if (*b > *a) + *a = *b; + } + collsub_sync (ci); + } + for ( ; (local->num_images >> cbit) != 0; cbit++) + collsub_sync (ci); + + if (!result_image || (*result_image - 1) == this_image.image_num) + *obj = *buffer; + + finish_collective_subroutine (ci); + +} + +void nca_collsub_min_scalar_r4 (GFC_REAL_4 *obj, int *result_image, + int *stat, char *errmsg, index_type errmsg_len); +export_proto(nca_collsub_min_scalar_r4); + +void +nca_collsub_min_scalar_r4 (GFC_REAL_4 *obj, int *result_image, + int *stat __attribute__ ((unused)), + char *errmsg __attribute__ ((unused)), + index_type errmsg_len __attribute__ ((unused))) +{ + int cbit = 0; + int imoffset; + GFC_REAL_4 *a, *b; + GFC_REAL_4 *buffer, *this_image_buf; + collsub_iface *ci; + + ci = &local->ci; + + buffer = get_collsub_buf (ci, sizeof(GFC_REAL_4) * local->num_images); + this_image_buf = buffer + this_image.image_num; + *this_image_buf = *obj; + + collsub_sync (ci); + for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++) + { + imoffset = 1 << cbit; + if (this_image.image_num + imoffset < local->num_images) + { + a = this_image_buf; + b = this_image_buf + imoffset; + if (*b < *a) + *a = *b; + } + collsub_sync (ci); + } + for ( ; (local->num_images >> cbit) != 0; cbit++) + collsub_sync (ci); + + if (!result_image || (*result_image - 1) == this_image.image_num) + *obj = *buffer; + + finish_collective_subroutine (ci); + +} + +void nca_collsub_sum_scalar_r4 (GFC_REAL_4 *obj, int *result_image, + int *stat, char *errmsg, index_type errmsg_len); +export_proto(nca_collsub_sum_scalar_r4); + +void +nca_collsub_sum_scalar_r4 (GFC_REAL_4 *obj, int *result_image, + int *stat __attribute__ ((unused)), + char *errmsg __attribute__ ((unused)), + index_type errmsg_len __attribute__ ((unused))) +{ + int cbit = 0; + int imoffset; + GFC_REAL_4 *a, *b; + GFC_REAL_4 *buffer, *this_image_buf; + collsub_iface *ci; + + ci = &local->ci; + + buffer = get_collsub_buf (ci, sizeof(GFC_REAL_4) * local->num_images); + this_image_buf = buffer + this_image.image_num; + *this_image_buf = *obj; + + collsub_sync (ci); + for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++) + { + imoffset = 1 << cbit; + if (this_image.image_num + imoffset < local->num_images) + { + a = this_image_buf; + b = this_image_buf + imoffset; + *a += *b; + } + collsub_sync (ci); + } + for ( ; (local->num_images >> cbit) != 0; cbit++) + collsub_sync (ci); + + if (!result_image || (*result_image - 1) == this_image.image_num) + *obj = *buffer; + + finish_collective_subroutine (ci); + +} + +void nca_collsub_max_array_r4 (gfc_array_r4 * restrict array, int *result_image, + int *stat, char *errmsg, index_type errmsg_len); +export_proto (nca_collsub_max_array_r4); + +void +nca_collsub_max_array_r4 (gfc_array_r4 * restrict array, int *result_image, + int *stat __attribute__ ((unused)), + char *errmsg __attribute__ ((unused)), + index_type errmsg_len __attribute__ ((unused))) +{ + index_type count[GFC_MAX_DIMENSIONS]; + index_type stride[GFC_MAX_DIMENSIONS]; + index_type extent[GFC_MAX_DIMENSIONS]; + GFC_REAL_4 *this_shared_ptr; /* Points to the shared memory allocated to this image. */ + GFC_REAL_4 *buffer; + index_type dim; + bool packed; + index_type span; + index_type ssize, num_elems; + int cbit = 0; + int imoffset; + collsub_iface *ci; + + ci = &local->ci; + + dim = GFC_DESCRIPTOR_RANK (array); + ssize = sizeof (GFC_REAL_4); + packed = true; + span = array->span != 0 ? array->span : (index_type) sizeof (GFC_REAL_4); + for (index_type n = 0; n < dim; n++) + { + count[n] = 0; + stride[n] = GFC_DESCRIPTOR_STRIDE (array, n) * span; + extent[n] = GFC_DESCRIPTOR_EXTENT (array, n); + + /* No-op for an empty array. */ + if (extent[n] <= 0) + return; + + if (ssize != stride[n]) + packed = false; + + ssize *= extent[n]; + } + + num_elems = ssize / sizeof (GFC_REAL_4); + + buffer = get_collsub_buf (ci, ssize * local->num_images); + this_shared_ptr = buffer + this_image.image_num * num_elems; + + if (packed) + memcpy (this_shared_ptr, array->base_addr, ssize); + else + { + char *src = (char *) array->base_addr; + GFC_REAL_4 *restrict dest = this_shared_ptr; + index_type stride0 = stride[0]; + + while (src) + { + /* Copy the data. */ + *(dest++) = *((GFC_REAL_4 *) src); + src += stride0; + count[0] ++; + /* Advance to the next source element. */ + for (index_type n = 0; count[n] == extent[n] ; ) + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + count[n] = 0; + src -= stride[n] * extent[n]; + n++; + if (n == dim) + { + src = NULL; + break; + } + else + { + count[n]++; + src += stride[n]; + } + } + } + } + + collsub_sync (ci); + + /* Reduce the array to image zero. Here the general scheme: + + abababababab + a_b_a_b_a_b_ + a___b___a___ + a_______b___ + r___________ + */ + for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++) + { + imoffset = 1 << cbit; + if (this_image.image_num + imoffset < local->num_images) + { + GFC_REAL_4 * other_shared_ptr; /* Points to the shared memory + allocated to another image. */ + GFC_REAL_4 *a; + GFC_REAL_4 *b; + + other_shared_ptr = this_shared_ptr + num_elems * imoffset; + for (index_type i = 0; i < num_elems; i++) + { + a = this_shared_ptr + i; + b = other_shared_ptr + i; + if (*b > *a) + *a = *b; + } + } + collsub_sync (ci); + } + for ( ; (local->num_images >> cbit) != 0; cbit++) + collsub_sync (ci); + + if (!result_image || (*result_image - 1) == this_image.image_num) + { + if (packed) + memcpy (array->base_addr, buffer, ssize); + else + { + GFC_REAL_4 *src = buffer; + char * restrict dest = (char *) array->base_addr; + index_type stride0 = stride[0]; + + for (index_type n = 0; n < dim; n++) + count[n] = 0; + + while (dest) + { + *((GFC_REAL_4 * ) dest) = *src++; + dest += stride0; + count[0] ++; + for (index_type n = 0; count[n] == extent[n] ;) + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + count[n] = 0; + dest -= stride[n] * extent[n]; + n++; + if (n == dim) + { + dest = NULL; + break; + } + else + { + count[n]++; + dest += stride[n]; + } + } + } + } + } + finish_collective_subroutine (ci); +} +void nca_collsub_min_array_r4 (gfc_array_r4 * restrict array, int *result_image, + int *stat, char *errmsg, index_type errmsg_len); +export_proto (nca_collsub_min_array_r4); + +void +nca_collsub_min_array_r4 (gfc_array_r4 * restrict array, int *result_image, + int *stat __attribute__ ((unused)), + char *errmsg __attribute__ ((unused)), + index_type errmsg_len __attribute__ ((unused))) +{ + index_type count[GFC_MAX_DIMENSIONS]; + index_type stride[GFC_MAX_DIMENSIONS]; + index_type extent[GFC_MAX_DIMENSIONS]; + GFC_REAL_4 *this_shared_ptr; /* Points to the shared memory allocated to this image. */ + GFC_REAL_4 *buffer; + index_type dim; + bool packed; + index_type span; + index_type ssize, num_elems; + int cbit = 0; + int imoffset; + collsub_iface *ci; + + ci = &local->ci; + + dim = GFC_DESCRIPTOR_RANK (array); + ssize = sizeof (GFC_REAL_4); + packed = true; + span = array->span != 0 ? array->span : (index_type) sizeof (GFC_REAL_4); + for (index_type n = 0; n < dim; n++) + { + count[n] = 0; + stride[n] = GFC_DESCRIPTOR_STRIDE (array, n) * span; + extent[n] = GFC_DESCRIPTOR_EXTENT (array, n); + + /* No-op for an empty array. */ + if (extent[n] <= 0) + return; + + if (ssize != stride[n]) + packed = false; + + ssize *= extent[n]; + } + + num_elems = ssize / sizeof (GFC_REAL_4); + + buffer = get_collsub_buf (ci, ssize * local->num_images); + this_shared_ptr = buffer + this_image.image_num * num_elems; + + if (packed) + memcpy (this_shared_ptr, array->base_addr, ssize); + else + { + char *src = (char *) array->base_addr; + GFC_REAL_4 *restrict dest = this_shared_ptr; + index_type stride0 = stride[0]; + + while (src) + { + /* Copy the data. */ + *(dest++) = *((GFC_REAL_4 *) src); + src += stride0; + count[0] ++; + /* Advance to the next source element. */ + for (index_type n = 0; count[n] == extent[n] ; ) + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + count[n] = 0; + src -= stride[n] * extent[n]; + n++; + if (n == dim) + { + src = NULL; + break; + } + else + { + count[n]++; + src += stride[n]; + } + } + } + } + + collsub_sync (ci); + + /* Reduce the array to image zero. Here the general scheme: + + abababababab + a_b_a_b_a_b_ + a___b___a___ + a_______b___ + r___________ + */ + for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++) + { + imoffset = 1 << cbit; + if (this_image.image_num + imoffset < local->num_images) + { + GFC_REAL_4 * other_shared_ptr; /* Points to the shared memory + allocated to another image. */ + GFC_REAL_4 *a; + GFC_REAL_4 *b; + + other_shared_ptr = this_shared_ptr + num_elems * imoffset; + for (index_type i = 0; i < num_elems; i++) + { + a = this_shared_ptr + i; + b = other_shared_ptr + i; + if (*b < *a) + *a = *b; + } + } + collsub_sync (ci); + } + for ( ; (local->num_images >> cbit) != 0; cbit++) + collsub_sync (ci); + + if (!result_image || (*result_image - 1) == this_image.image_num) + { + if (packed) + memcpy (array->base_addr, buffer, ssize); + else + { + GFC_REAL_4 *src = buffer; + char * restrict dest = (char *) array->base_addr; + index_type stride0 = stride[0]; + + for (index_type n = 0; n < dim; n++) + count[n] = 0; + + while (dest) + { + *((GFC_REAL_4 * ) dest) = *src++; + dest += stride0; + count[0] ++; + for (index_type n = 0; count[n] == extent[n] ;) + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + count[n] = 0; + dest -= stride[n] * extent[n]; + n++; + if (n == dim) + { + dest = NULL; + break; + } + else + { + count[n]++; + dest += stride[n]; + } + } + } + } + } + finish_collective_subroutine (ci); +} +void nca_collsub_sum_array_r4 (gfc_array_r4 * restrict array, int *result_image, + int *stat, char *errmsg, index_type errmsg_len); +export_proto (nca_collsub_sum_array_r4); + +void +nca_collsub_sum_array_r4 (gfc_array_r4 * restrict array, int *result_image, + int *stat __attribute__ ((unused)), + char *errmsg __attribute__ ((unused)), + index_type errmsg_len __attribute__ ((unused))) +{ + index_type count[GFC_MAX_DIMENSIONS]; + index_type stride[GFC_MAX_DIMENSIONS]; + index_type extent[GFC_MAX_DIMENSIONS]; + GFC_REAL_4 *this_shared_ptr; /* Points to the shared memory allocated to this image. */ + GFC_REAL_4 *buffer; + index_type dim; + bool packed; + index_type span; + index_type ssize, num_elems; + int cbit = 0; + int imoffset; + collsub_iface *ci; + + ci = &local->ci; + + dim = GFC_DESCRIPTOR_RANK (array); + ssize = sizeof (GFC_REAL_4); + packed = true; + span = array->span != 0 ? array->span : (index_type) sizeof (GFC_REAL_4); + for (index_type n = 0; n < dim; n++) + { + count[n] = 0; + stride[n] = GFC_DESCRIPTOR_STRIDE (array, n) * span; + extent[n] = GFC_DESCRIPTOR_EXTENT (array, n); + + /* No-op for an empty array. */ + if (extent[n] <= 0) + return; + + if (ssize != stride[n]) + packed = false; + + ssize *= extent[n]; + } + + num_elems = ssize / sizeof (GFC_REAL_4); + + buffer = get_collsub_buf (ci, ssize * local->num_images); + this_shared_ptr = buffer + this_image.image_num * num_elems; + + if (packed) + memcpy (this_shared_ptr, array->base_addr, ssize); + else + { + char *src = (char *) array->base_addr; + GFC_REAL_4 *restrict dest = this_shared_ptr; + index_type stride0 = stride[0]; + + while (src) + { + /* Copy the data. */ + *(dest++) = *((GFC_REAL_4 *) src); + src += stride0; + count[0] ++; + /* Advance to the next source element. */ + for (index_type n = 0; count[n] == extent[n] ; ) + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + count[n] = 0; + src -= stride[n] * extent[n]; + n++; + if (n == dim) + { + src = NULL; + break; + } + else + { + count[n]++; + src += stride[n]; + } + } + } + } + + collsub_sync (ci); + + /* Reduce the array to image zero. Here the general scheme: + + abababababab + a_b_a_b_a_b_ + a___b___a___ + a_______b___ + r___________ + */ + for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++) + { + imoffset = 1 << cbit; + if (this_image.image_num + imoffset < local->num_images) + { + GFC_REAL_4 * other_shared_ptr; /* Points to the shared memory + allocated to another image. */ + GFC_REAL_4 *a; + GFC_REAL_4 *b; + + other_shared_ptr = this_shared_ptr + num_elems * imoffset; + for (index_type i = 0; i < num_elems; i++) + { + a = this_shared_ptr + i; + b = other_shared_ptr + i; + *a += *b; + } + } + collsub_sync (ci); + } + for ( ; (local->num_images >> cbit) != 0; cbit++) + collsub_sync (ci); + + if (!result_image || (*result_image - 1) == this_image.image_num) + { + if (packed) + memcpy (array->base_addr, buffer, ssize); + else + { + GFC_REAL_4 *src = buffer; + char * restrict dest = (char *) array->base_addr; + index_type stride0 = stride[0]; + + for (index_type n = 0; n < dim; n++) + count[n] = 0; + + while (dest) + { + *((GFC_REAL_4 * ) dest) = *src++; + dest += stride0; + count[0] ++; + for (index_type n = 0; count[n] == extent[n] ;) + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + count[n] = 0; + dest -= stride[n] * extent[n]; + n++; + if (n == dim) + { + dest = NULL; + break; + } + else + { + count[n]++; + dest += stride[n]; + } + } + } + } + } + finish_collective_subroutine (ci); +} + +#endif + diff --git a/libgfortran/generated/nca_minmax_r8.c b/libgfortran/generated/nca_minmax_r8.c new file mode 100644 index 0000000..3b3e962 --- /dev/null +++ b/libgfortran/generated/nca_minmax_r8.c @@ -0,0 +1,653 @@ +/* Implementation of collective subroutines minmax. + Copyright (C) 2020 Free Software Foundation, Inc. + Contributed by Thomas Koenig <tkoenig@gcc.gnu.org>. + +This file is part of the GNU Fortran runtime library (libgfortran). + +Libgfortran is free software; you can redistribute it and/or +modify it under the terms of the GNU General Public +License as published by the Free Software Foundation; either +version 3 of the License, or (at your option) any later version. + +Libgfortran is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +<http://www.gnu.org/licenses/>. */ + + +#include "libgfortran.h" + +#if defined (HAVE_GFC_REAL_8) +#include <string.h> +#include "../nca/libcoarraynative.h" +#include "../nca/collective_subroutine.h" +#include "../nca/collective_inline.h" + +void nca_collsub_max_scalar_r8 (GFC_REAL_8 *obj, int *result_image, + int *stat, char *errmsg, index_type errmsg_len); +export_proto(nca_collsub_max_scalar_r8); + +void +nca_collsub_max_scalar_r8 (GFC_REAL_8 *obj, int *result_image, + int *stat __attribute__ ((unused)), + char *errmsg __attribute__ ((unused)), + index_type errmsg_len __attribute__ ((unused))) +{ + int cbit = 0; + int imoffset; + GFC_REAL_8 *a, *b; + GFC_REAL_8 *buffer, *this_image_buf; + collsub_iface *ci; + + ci = &local->ci; + + buffer = get_collsub_buf (ci, sizeof(GFC_REAL_8) * local->num_images); + this_image_buf = buffer + this_image.image_num; + *this_image_buf = *obj; + + collsub_sync (ci); + for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++) + { + imoffset = 1 << cbit; + if (this_image.image_num + imoffset < local->num_images) + { + a = this_image_buf; + b = this_image_buf + imoffset; + if (*b > *a) + *a = *b; + } + collsub_sync (ci); + } + for ( ; (local->num_images >> cbit) != 0; cbit++) + collsub_sync (ci); + + if (!result_image || (*result_image - 1) == this_image.image_num) + *obj = *buffer; + + finish_collective_subroutine (ci); + +} + +void nca_collsub_min_scalar_r8 (GFC_REAL_8 *obj, int *result_image, + int *stat, char *errmsg, index_type errmsg_len); +export_proto(nca_collsub_min_scalar_r8); + +void +nca_collsub_min_scalar_r8 (GFC_REAL_8 *obj, int *result_image, + int *stat __attribute__ ((unused)), + char *errmsg __attribute__ ((unused)), + index_type errmsg_len __attribute__ ((unused))) +{ + int cbit = 0; + int imoffset; + GFC_REAL_8 *a, *b; + GFC_REAL_8 *buffer, *this_image_buf; + collsub_iface *ci; + + ci = &local->ci; + + buffer = get_collsub_buf (ci, sizeof(GFC_REAL_8) * local->num_images); + this_image_buf = buffer + this_image.image_num; + *this_image_buf = *obj; + + collsub_sync (ci); + for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++) + { + imoffset = 1 << cbit; + if (this_image.image_num + imoffset < local->num_images) + { + a = this_image_buf; + b = this_image_buf + imoffset; + if (*b < *a) + *a = *b; + } + collsub_sync (ci); + } + for ( ; (local->num_images >> cbit) != 0; cbit++) + collsub_sync (ci); + + if (!result_image || (*result_image - 1) == this_image.image_num) + *obj = *buffer; + + finish_collective_subroutine (ci); + +} + +void nca_collsub_sum_scalar_r8 (GFC_REAL_8 *obj, int *result_image, + int *stat, char *errmsg, index_type errmsg_len); +export_proto(nca_collsub_sum_scalar_r8); + +void +nca_collsub_sum_scalar_r8 (GFC_REAL_8 *obj, int *result_image, + int *stat __attribute__ ((unused)), + char *errmsg __attribute__ ((unused)), + index_type errmsg_len __attribute__ ((unused))) +{ + int cbit = 0; + int imoffset; + GFC_REAL_8 *a, *b; + GFC_REAL_8 *buffer, *this_image_buf; + collsub_iface *ci; + + ci = &local->ci; + + buffer = get_collsub_buf (ci, sizeof(GFC_REAL_8) * local->num_images); + this_image_buf = buffer + this_image.image_num; + *this_image_buf = *obj; + + collsub_sync (ci); + for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++) + { + imoffset = 1 << cbit; + if (this_image.image_num + imoffset < local->num_images) + { + a = this_image_buf; + b = this_image_buf + imoffset; + *a += *b; + } + collsub_sync (ci); + } + for ( ; (local->num_images >> cbit) != 0; cbit++) + collsub_sync (ci); + + if (!result_image || (*result_image - 1) == this_image.image_num) + *obj = *buffer; + + finish_collective_subroutine (ci); + +} + +void nca_collsub_max_array_r8 (gfc_array_r8 * restrict array, int *result_image, + int *stat, char *errmsg, index_type errmsg_len); +export_proto (nca_collsub_max_array_r8); + +void +nca_collsub_max_array_r8 (gfc_array_r8 * restrict array, int *result_image, + int *stat __attribute__ ((unused)), + char *errmsg __attribute__ ((unused)), + index_type errmsg_len __attribute__ ((unused))) +{ + index_type count[GFC_MAX_DIMENSIONS]; + index_type stride[GFC_MAX_DIMENSIONS]; + index_type extent[GFC_MAX_DIMENSIONS]; + GFC_REAL_8 *this_shared_ptr; /* Points to the shared memory allocated to this image. */ + GFC_REAL_8 *buffer; + index_type dim; + bool packed; + index_type span; + index_type ssize, num_elems; + int cbit = 0; + int imoffset; + collsub_iface *ci; + + ci = &local->ci; + + dim = GFC_DESCRIPTOR_RANK (array); + ssize = sizeof (GFC_REAL_8); + packed = true; + span = array->span != 0 ? array->span : (index_type) sizeof (GFC_REAL_8); + for (index_type n = 0; n < dim; n++) + { + count[n] = 0; + stride[n] = GFC_DESCRIPTOR_STRIDE (array, n) * span; + extent[n] = GFC_DESCRIPTOR_EXTENT (array, n); + + /* No-op for an empty array. */ + if (extent[n] <= 0) + return; + + if (ssize != stride[n]) + packed = false; + + ssize *= extent[n]; + } + + num_elems = ssize / sizeof (GFC_REAL_8); + + buffer = get_collsub_buf (ci, ssize * local->num_images); + this_shared_ptr = buffer + this_image.image_num * num_elems; + + if (packed) + memcpy (this_shared_ptr, array->base_addr, ssize); + else + { + char *src = (char *) array->base_addr; + GFC_REAL_8 *restrict dest = this_shared_ptr; + index_type stride0 = stride[0]; + + while (src) + { + /* Copy the data. */ + *(dest++) = *((GFC_REAL_8 *) src); + src += stride0; + count[0] ++; + /* Advance to the next source element. */ + for (index_type n = 0; count[n] == extent[n] ; ) + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + count[n] = 0; + src -= stride[n] * extent[n]; + n++; + if (n == dim) + { + src = NULL; + break; + } + else + { + count[n]++; + src += stride[n]; + } + } + } + } + + collsub_sync (ci); + + /* Reduce the array to image zero. Here the general scheme: + + abababababab + a_b_a_b_a_b_ + a___b___a___ + a_______b___ + r___________ + */ + for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++) + { + imoffset = 1 << cbit; + if (this_image.image_num + imoffset < local->num_images) + { + GFC_REAL_8 * other_shared_ptr; /* Points to the shared memory + allocated to another image. */ + GFC_REAL_8 *a; + GFC_REAL_8 *b; + + other_shared_ptr = this_shared_ptr + num_elems * imoffset; + for (index_type i = 0; i < num_elems; i++) + { + a = this_shared_ptr + i; + b = other_shared_ptr + i; + if (*b > *a) + *a = *b; + } + } + collsub_sync (ci); + } + for ( ; (local->num_images >> cbit) != 0; cbit++) + collsub_sync (ci); + + if (!result_image || (*result_image - 1) == this_image.image_num) + { + if (packed) + memcpy (array->base_addr, buffer, ssize); + else + { + GFC_REAL_8 *src = buffer; + char * restrict dest = (char *) array->base_addr; + index_type stride0 = stride[0]; + + for (index_type n = 0; n < dim; n++) + count[n] = 0; + + while (dest) + { + *((GFC_REAL_8 * ) dest) = *src++; + dest += stride0; + count[0] ++; + for (index_type n = 0; count[n] == extent[n] ;) + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + count[n] = 0; + dest -= stride[n] * extent[n]; + n++; + if (n == dim) + { + dest = NULL; + break; + } + else + { + count[n]++; + dest += stride[n]; + } + } + } + } + } + finish_collective_subroutine (ci); +} +void nca_collsub_min_array_r8 (gfc_array_r8 * restrict array, int *result_image, + int *stat, char *errmsg, index_type errmsg_len); +export_proto (nca_collsub_min_array_r8); + +void +nca_collsub_min_array_r8 (gfc_array_r8 * restrict array, int *result_image, + int *stat __attribute__ ((unused)), + char *errmsg __attribute__ ((unused)), + index_type errmsg_len __attribute__ ((unused))) +{ + index_type count[GFC_MAX_DIMENSIONS]; + index_type stride[GFC_MAX_DIMENSIONS]; + index_type extent[GFC_MAX_DIMENSIONS]; + GFC_REAL_8 *this_shared_ptr; /* Points to the shared memory allocated to this image. */ + GFC_REAL_8 *buffer; + index_type dim; + bool packed; + index_type span; + index_type ssize, num_elems; + int cbit = 0; + int imoffset; + collsub_iface *ci; + + ci = &local->ci; + + dim = GFC_DESCRIPTOR_RANK (array); + ssize = sizeof (GFC_REAL_8); + packed = true; + span = array->span != 0 ? array->span : (index_type) sizeof (GFC_REAL_8); + for (index_type n = 0; n < dim; n++) + { + count[n] = 0; + stride[n] = GFC_DESCRIPTOR_STRIDE (array, n) * span; + extent[n] = GFC_DESCRIPTOR_EXTENT (array, n); + + /* No-op for an empty array. */ + if (extent[n] <= 0) + return; + + if (ssize != stride[n]) + packed = false; + + ssize *= extent[n]; + } + + num_elems = ssize / sizeof (GFC_REAL_8); + + buffer = get_collsub_buf (ci, ssize * local->num_images); + this_shared_ptr = buffer + this_image.image_num * num_elems; + + if (packed) + memcpy (this_shared_ptr, array->base_addr, ssize); + else + { + char *src = (char *) array->base_addr; + GFC_REAL_8 *restrict dest = this_shared_ptr; + index_type stride0 = stride[0]; + + while (src) + { + /* Copy the data. */ + *(dest++) = *((GFC_REAL_8 *) src); + src += stride0; + count[0] ++; + /* Advance to the next source element. */ + for (index_type n = 0; count[n] == extent[n] ; ) + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + count[n] = 0; + src -= stride[n] * extent[n]; + n++; + if (n == dim) + { + src = NULL; + break; + } + else + { + count[n]++; + src += stride[n]; + } + } + } + } + + collsub_sync (ci); + + /* Reduce the array to image zero. Here the general scheme: + + abababababab + a_b_a_b_a_b_ + a___b___a___ + a_______b___ + r___________ + */ + for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++) + { + imoffset = 1 << cbit; + if (this_image.image_num + imoffset < local->num_images) + { + GFC_REAL_8 * other_shared_ptr; /* Points to the shared memory + allocated to another image. */ + GFC_REAL_8 *a; + GFC_REAL_8 *b; + + other_shared_ptr = this_shared_ptr + num_elems * imoffset; + for (index_type i = 0; i < num_elems; i++) + { + a = this_shared_ptr + i; + b = other_shared_ptr + i; + if (*b < *a) + *a = *b; + } + } + collsub_sync (ci); + } + for ( ; (local->num_images >> cbit) != 0; cbit++) + collsub_sync (ci); + + if (!result_image || (*result_image - 1) == this_image.image_num) + { + if (packed) + memcpy (array->base_addr, buffer, ssize); + else + { + GFC_REAL_8 *src = buffer; + char * restrict dest = (char *) array->base_addr; + index_type stride0 = stride[0]; + + for (index_type n = 0; n < dim; n++) + count[n] = 0; + + while (dest) + { + *((GFC_REAL_8 * ) dest) = *src++; + dest += stride0; + count[0] ++; + for (index_type n = 0; count[n] == extent[n] ;) + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + count[n] = 0; + dest -= stride[n] * extent[n]; + n++; + if (n == dim) + { + dest = NULL; + break; + } + else + { + count[n]++; + dest += stride[n]; + } + } + } + } + } + finish_collective_subroutine (ci); +} +void nca_collsub_sum_array_r8 (gfc_array_r8 * restrict array, int *result_image, + int *stat, char *errmsg, index_type errmsg_len); +export_proto (nca_collsub_sum_array_r8); + +void +nca_collsub_sum_array_r8 (gfc_array_r8 * restrict array, int *result_image, + int *stat __attribute__ ((unused)), + char *errmsg __attribute__ ((unused)), + index_type errmsg_len __attribute__ ((unused))) +{ + index_type count[GFC_MAX_DIMENSIONS]; + index_type stride[GFC_MAX_DIMENSIONS]; + index_type extent[GFC_MAX_DIMENSIONS]; + GFC_REAL_8 *this_shared_ptr; /* Points to the shared memory allocated to this image. */ + GFC_REAL_8 *buffer; + index_type dim; + bool packed; + index_type span; + index_type ssize, num_elems; + int cbit = 0; + int imoffset; + collsub_iface *ci; + + ci = &local->ci; + + dim = GFC_DESCRIPTOR_RANK (array); + ssize = sizeof (GFC_REAL_8); + packed = true; + span = array->span != 0 ? array->span : (index_type) sizeof (GFC_REAL_8); + for (index_type n = 0; n < dim; n++) + { + count[n] = 0; + stride[n] = GFC_DESCRIPTOR_STRIDE (array, n) * span; + extent[n] = GFC_DESCRIPTOR_EXTENT (array, n); + + /* No-op for an empty array. */ + if (extent[n] <= 0) + return; + + if (ssize != stride[n]) + packed = false; + + ssize *= extent[n]; + } + + num_elems = ssize / sizeof (GFC_REAL_8); + + buffer = get_collsub_buf (ci, ssize * local->num_images); + this_shared_ptr = buffer + this_image.image_num * num_elems; + + if (packed) + memcpy (this_shared_ptr, array->base_addr, ssize); + else + { + char *src = (char *) array->base_addr; + GFC_REAL_8 *restrict dest = this_shared_ptr; + index_type stride0 = stride[0]; + + while (src) + { + /* Copy the data. */ + *(dest++) = *((GFC_REAL_8 *) src); + src += stride0; + count[0] ++; + /* Advance to the next source element. */ + for (index_type n = 0; count[n] == extent[n] ; ) + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + count[n] = 0; + src -= stride[n] * extent[n]; + n++; + if (n == dim) + { + src = NULL; + break; + } + else + { + count[n]++; + src += stride[n]; + } + } + } + } + + collsub_sync (ci); + + /* Reduce the array to image zero. Here the general scheme: + + abababababab + a_b_a_b_a_b_ + a___b___a___ + a_______b___ + r___________ + */ + for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++) + { + imoffset = 1 << cbit; + if (this_image.image_num + imoffset < local->num_images) + { + GFC_REAL_8 * other_shared_ptr; /* Points to the shared memory + allocated to another image. */ + GFC_REAL_8 *a; + GFC_REAL_8 *b; + + other_shared_ptr = this_shared_ptr + num_elems * imoffset; + for (index_type i = 0; i < num_elems; i++) + { + a = this_shared_ptr + i; + b = other_shared_ptr + i; + *a += *b; + } + } + collsub_sync (ci); + } + for ( ; (local->num_images >> cbit) != 0; cbit++) + collsub_sync (ci); + + if (!result_image || (*result_image - 1) == this_image.image_num) + { + if (packed) + memcpy (array->base_addr, buffer, ssize); + else + { + GFC_REAL_8 *src = buffer; + char * restrict dest = (char *) array->base_addr; + index_type stride0 = stride[0]; + + for (index_type n = 0; n < dim; n++) + count[n] = 0; + + while (dest) + { + *((GFC_REAL_8 * ) dest) = *src++; + dest += stride0; + count[0] ++; + for (index_type n = 0; count[n] == extent[n] ;) + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + count[n] = 0; + dest -= stride[n] * extent[n]; + n++; + if (n == dim) + { + dest = NULL; + break; + } + else + { + count[n]++; + dest += stride[n]; + } + } + } + } + } + finish_collective_subroutine (ci); +} + +#endif + diff --git a/libgfortran/generated/nca_minmax_s1.c b/libgfortran/generated/nca_minmax_s1.c new file mode 100644 index 0000000..b081452 --- /dev/null +++ b/libgfortran/generated/nca_minmax_s1.c @@ -0,0 +1,494 @@ +/* Implementation of collective subroutines minmax. + Copyright (C) 2020 Free Software Foundation, Inc. + Contributed by Thomas Koenig <tkoenig@gcc.gnu.org>. + +This file is part of the GNU Fortran runtime library (libgfortran). + +Libgfortran is free software; you can redistribute it and/or +modify it under the terms of the GNU General Public +License as published by the Free Software Foundation; either +version 3 of the License, or (at your option) any later version. + +Libgfortran is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +<http://www.gnu.org/licenses/>. */ + + + +#include "libgfortran.h" + +#if defined (HAVE_GFC_UINTEGER_1) +#include <string.h> +#include "../nca/libcoarraynative.h" +#include "../nca/collective_subroutine.h" +#include "../nca/collective_inline.h" + +#if 1 == 4 + +/* Compare wide character types, which are handled internally as + unsigned 4-byte integers. */ +static inline int +memcmp4 (const void *a, const void *b, size_t len) +{ + const GFC_UINTEGER_4 *pa = a; + const GFC_UINTEGER_4 *pb = b; + while (len-- > 0) + { + if (*pa != *pb) + return *pa < *pb ? -1 : 1; + pa ++; + pb ++; + } + return 0; +} + +#endif +void nca_collsub_max_scalar_s1 (GFC_UINTEGER_1 *obj, int *result_image, + int *stat, char *errmsg, index_type char_len, index_type errmsg_len); +export_proto(nca_collsub_max_scalar_s1); + +void +nca_collsub_max_scalar_s1 (GFC_UINTEGER_1 *obj, int *result_image, + int *stat __attribute__ ((unused)), + char *errmsg __attribute__ ((unused)), + index_type char_len, + index_type errmsg_len __attribute__ ((unused))) +{ + int cbit = 0; + int imoffset; + GFC_UINTEGER_1 *a, *b; + GFC_UINTEGER_1 *buffer, *this_image_buf; + collsub_iface *ci; + index_type type_size; + + ci = &local->ci; + + type_size = char_len * sizeof (GFC_UINTEGER_1); + buffer = get_collsub_buf (ci, type_size * local->num_images); + this_image_buf = buffer + this_image.image_num * char_len; + memcpy (this_image_buf, obj, type_size); + + collsub_sync (ci); + for (; ((this_image.image_num >> cbit) & 1) == 0 + && (local->num_images >> cbit) != 0; cbit++) + { + imoffset = 1 << cbit; + if (this_image.image_num + imoffset < local->num_images) + { + a = this_image_buf; + b = this_image_buf + imoffset * char_len; + if (memcmp (b, a, char_len) > 0) + memcpy (a, b, type_size); + } + collsub_sync (ci); + } + for ( ; (local->num_images >> cbit) != 0; cbit++) + collsub_sync (ci); + + if (!result_image || (*result_image - 1) == this_image.image_num) + memcpy (obj, buffer, type_size); + + finish_collective_subroutine (ci); + +} + +void nca_collsub_min_scalar_s1 (GFC_UINTEGER_1 *obj, int *result_image, + int *stat, char *errmsg, index_type char_len, index_type errmsg_len); +export_proto(nca_collsub_min_scalar_s1); + +void +nca_collsub_min_scalar_s1 (GFC_UINTEGER_1 *obj, int *result_image, + int *stat __attribute__ ((unused)), + char *errmsg __attribute__ ((unused)), + index_type char_len, + index_type errmsg_len __attribute__ ((unused))) +{ + int cbit = 0; + int imoffset; + GFC_UINTEGER_1 *a, *b; + GFC_UINTEGER_1 *buffer, *this_image_buf; + collsub_iface *ci; + index_type type_size; + + ci = &local->ci; + + type_size = char_len * sizeof (GFC_UINTEGER_1); + buffer = get_collsub_buf (ci, type_size * local->num_images); + this_image_buf = buffer + this_image.image_num * char_len; + memcpy (this_image_buf, obj, type_size); + + collsub_sync (ci); + for (; ((this_image.image_num >> cbit) & 1) == 0 + && (local->num_images >> cbit) != 0; cbit++) + { + imoffset = 1 << cbit; + if (this_image.image_num + imoffset < local->num_images) + { + a = this_image_buf; + b = this_image_buf + imoffset * char_len; + if (memcmp (b, a, char_len) < 0) + memcpy (a, b, type_size); + } + collsub_sync (ci); + } + for ( ; (local->num_images >> cbit) != 0; cbit++) + collsub_sync (ci); + + if (!result_image || (*result_image - 1) == this_image.image_num) + memcpy (obj, buffer, type_size); + + finish_collective_subroutine (ci); + +} + +void nca_collsub_max_array_s1 (gfc_array_s1 * restrict array, int *result_image, + int *stat, char *errmsg, index_type char_len, + index_type errmsg_len); +export_proto (nca_collsub_max_array_s1); + +void +nca_collsub_max_array_s1 (gfc_array_s1 * restrict array, int *result_image, + int *stat __attribute__ ((unused)), + char *errmsg __attribute__ ((unused)), + index_type char_len, + index_type errmsg_len __attribute__ ((unused))) +{ + index_type count[GFC_MAX_DIMENSIONS]; + index_type stride[GFC_MAX_DIMENSIONS]; /* stride is byte-based here. */ + index_type extent[GFC_MAX_DIMENSIONS]; + char *this_shared_ptr; /* Points to the shared memory allocated to this image. */ + char *buffer; + index_type dim; + bool packed; + index_type span; + index_type ssize, num_elems; + int cbit = 0; + int imoffset; + index_type type_size; + collsub_iface *ci; + + ci = &local->ci; + + type_size = char_len * sizeof (GFC_UINTEGER_1); + dim = GFC_DESCRIPTOR_RANK (array); + num_elems = 1; + ssize = type_size; + packed = true; + span = array->span != 0 ? array->span : type_size; + for (index_type n = 0; n < dim; n++) + { + count[n] = 0; + stride[n] = GFC_DESCRIPTOR_STRIDE (array, n) * span; + extent[n] = GFC_DESCRIPTOR_EXTENT (array, n); + + /* No-op for an empty array. */ + if (extent[n] <= 0) + return; + + if (num_elems != GFC_DESCRIPTOR_STRIDE (array,n)) + packed = false; + + num_elems *= extent[n]; + } + + ssize = num_elems * type_size; + buffer = get_collsub_buf (ci, ssize * local->num_images); + this_shared_ptr = buffer + this_image.image_num * ssize; + + if (packed) + memcpy (this_shared_ptr, array->base_addr, ssize); + else + { + char *src = (char *) array->base_addr; + char *restrict dest = this_shared_ptr; + index_type stride0 = stride[0]; + + while (src) + { + /* Copy the data. */ + + memcpy (dest, src, type_size); + dest += type_size; + src += stride0; + count[0] ++; + /* Advance to the next source element. */ + for (index_type n = 0; count[n] == extent[n] ; ) + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + count[n] = 0; + src -= stride[n] * extent[n]; + n++; + if (n == dim) + { + src = NULL; + break; + } + else + { + count[n]++; + src += stride[n]; + } + } + } + } + + collsub_sync (ci); + + /* Reduce the array to image zero. Here the general scheme: + + abababababab + a_b_a_b_a_b_ + a___b___a___ + a_______b___ + r___________ + */ + for (; ((this_image.image_num >> cbit) & 1) == 0 + && (local->num_images >> cbit) != 0; cbit++) + { + imoffset = 1 << cbit; + if (this_image.image_num + imoffset < local->num_images) + { + char *other_shared_ptr; /* Points to the shared memory + allocated to another image. */ + GFC_UINTEGER_1 *a; + GFC_UINTEGER_1 *b; + + other_shared_ptr = this_shared_ptr + imoffset * ssize; + for (index_type i = 0; i < num_elems; i++) + { + a = (GFC_UINTEGER_1 *) (this_shared_ptr + i * type_size); + b = (GFC_UINTEGER_1 *) (other_shared_ptr + i * type_size); + if (memcmp (b, a, char_len) > 0) + memcpy (a, b, type_size); + } + } + collsub_sync (ci); + } + for ( ; (local->num_images >> cbit) != 0; cbit++) + collsub_sync (ci); + + if (!result_image || (*result_image - 1) == this_image.image_num) + { + if (packed) + memcpy (array->base_addr, buffer, ssize); + else + { + char *src = buffer; + char *restrict dest = (char *) array->base_addr; + index_type stride0 = stride[0]; + + for (index_type n = 0; n < dim; n++) + count[n] = 0; + + while (dest) + { + memcpy (dest, src, type_size); + src += span; + dest += stride0; + count[0] ++; + for (index_type n = 0; count[n] == extent[n] ;) + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + count[n] = 0; + dest -= stride[n] * extent[n]; + n++; + if (n == dim) + { + dest = NULL; + break; + } + else + { + count[n]++; + dest += stride[n]; + } + } + } + } + } + finish_collective_subroutine (ci); +} +void nca_collsub_min_array_s1 (gfc_array_s1 * restrict array, int *result_image, + int *stat, char *errmsg, index_type char_len, + index_type errmsg_len); +export_proto (nca_collsub_min_array_s1); + +void +nca_collsub_min_array_s1 (gfc_array_s1 * restrict array, int *result_image, + int *stat __attribute__ ((unused)), + char *errmsg __attribute__ ((unused)), + index_type char_len, + index_type errmsg_len __attribute__ ((unused))) +{ + index_type count[GFC_MAX_DIMENSIONS]; + index_type stride[GFC_MAX_DIMENSIONS]; /* stride is byte-based here. */ + index_type extent[GFC_MAX_DIMENSIONS]; + char *this_shared_ptr; /* Points to the shared memory allocated to this image. */ + char *buffer; + index_type dim; + bool packed; + index_type span; + index_type ssize, num_elems; + int cbit = 0; + int imoffset; + index_type type_size; + collsub_iface *ci; + + ci = &local->ci; + + type_size = char_len * sizeof (GFC_UINTEGER_1); + dim = GFC_DESCRIPTOR_RANK (array); + num_elems = 1; + ssize = type_size; + packed = true; + span = array->span != 0 ? array->span : type_size; + for (index_type n = 0; n < dim; n++) + { + count[n] = 0; + stride[n] = GFC_DESCRIPTOR_STRIDE (array, n) * span; + extent[n] = GFC_DESCRIPTOR_EXTENT (array, n); + + /* No-op for an empty array. */ + if (extent[n] <= 0) + return; + + if (num_elems != GFC_DESCRIPTOR_STRIDE (array,n)) + packed = false; + + num_elems *= extent[n]; + } + + ssize = num_elems * type_size; + buffer = get_collsub_buf (ci, ssize * local->num_images); + this_shared_ptr = buffer + this_image.image_num * ssize; + + if (packed) + memcpy (this_shared_ptr, array->base_addr, ssize); + else + { + char *src = (char *) array->base_addr; + char *restrict dest = this_shared_ptr; + index_type stride0 = stride[0]; + + while (src) + { + /* Copy the data. */ + + memcpy (dest, src, type_size); + dest += type_size; + src += stride0; + count[0] ++; + /* Advance to the next source element. */ + for (index_type n = 0; count[n] == extent[n] ; ) + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + count[n] = 0; + src -= stride[n] * extent[n]; + n++; + if (n == dim) + { + src = NULL; + break; + } + else + { + count[n]++; + src += stride[n]; + } + } + } + } + + collsub_sync (ci); + + /* Reduce the array to image zero. Here the general scheme: + + abababababab + a_b_a_b_a_b_ + a___b___a___ + a_______b___ + r___________ + */ + for (; ((this_image.image_num >> cbit) & 1) == 0 + && (local->num_images >> cbit) != 0; cbit++) + { + imoffset = 1 << cbit; + if (this_image.image_num + imoffset < local->num_images) + { + char *other_shared_ptr; /* Points to the shared memory + allocated to another image. */ + GFC_UINTEGER_1 *a; + GFC_UINTEGER_1 *b; + + other_shared_ptr = this_shared_ptr + imoffset * ssize; + for (index_type i = 0; i < num_elems; i++) + { + a = (GFC_UINTEGER_1 *) (this_shared_ptr + i * type_size); + b = (GFC_UINTEGER_1 *) (other_shared_ptr + i * type_size); + if (memcmp (b, a, char_len) < 0) + memcpy (a, b, type_size); + } + } + collsub_sync (ci); + } + for ( ; (local->num_images >> cbit) != 0; cbit++) + collsub_sync (ci); + + if (!result_image || (*result_image - 1) == this_image.image_num) + { + if (packed) + memcpy (array->base_addr, buffer, ssize); + else + { + char *src = buffer; + char *restrict dest = (char *) array->base_addr; + index_type stride0 = stride[0]; + + for (index_type n = 0; n < dim; n++) + count[n] = 0; + + while (dest) + { + memcpy (dest, src, type_size); + src += span; + dest += stride0; + count[0] ++; + for (index_type n = 0; count[n] == extent[n] ;) + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + count[n] = 0; + dest -= stride[n] * extent[n]; + n++; + if (n == dim) + { + dest = NULL; + break; + } + else + { + count[n]++; + dest += stride[n]; + } + } + } + } + } + finish_collective_subroutine (ci); +} + +#endif + diff --git a/libgfortran/generated/nca_minmax_s4.c b/libgfortran/generated/nca_minmax_s4.c new file mode 100644 index 0000000..b202fda --- /dev/null +++ b/libgfortran/generated/nca_minmax_s4.c @@ -0,0 +1,494 @@ +/* Implementation of collective subroutines minmax. + Copyright (C) 2020 Free Software Foundation, Inc. + Contributed by Thomas Koenig <tkoenig@gcc.gnu.org>. + +This file is part of the GNU Fortran runtime library (libgfortran). + +Libgfortran is free software; you can redistribute it and/or +modify it under the terms of the GNU General Public +License as published by the Free Software Foundation; either +version 3 of the License, or (at your option) any later version. + +Libgfortran is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +<http://www.gnu.org/licenses/>. */ + + + +#include "libgfortran.h" + +#if defined (HAVE_GFC_UINTEGER_4) +#include <string.h> +#include "../nca/libcoarraynative.h" +#include "../nca/collective_subroutine.h" +#include "../nca/collective_inline.h" + +#if 4 == 4 + +/* Compare wide character types, which are handled internally as + unsigned 4-byte integers. */ +static inline int +memcmp4 (const void *a, const void *b, size_t len) +{ + const GFC_UINTEGER_4 *pa = a; + const GFC_UINTEGER_4 *pb = b; + while (len-- > 0) + { + if (*pa != *pb) + return *pa < *pb ? -1 : 1; + pa ++; + pb ++; + } + return 0; +} + +#endif +void nca_collsub_max_scalar_s4 (GFC_UINTEGER_4 *obj, int *result_image, + int *stat, char *errmsg, index_type char_len, index_type errmsg_len); +export_proto(nca_collsub_max_scalar_s4); + +void +nca_collsub_max_scalar_s4 (GFC_UINTEGER_4 *obj, int *result_image, + int *stat __attribute__ ((unused)), + char *errmsg __attribute__ ((unused)), + index_type char_len, + index_type errmsg_len __attribute__ ((unused))) +{ + int cbit = 0; + int imoffset; + GFC_UINTEGER_4 *a, *b; + GFC_UINTEGER_4 *buffer, *this_image_buf; + collsub_iface *ci; + index_type type_size; + + ci = &local->ci; + + type_size = char_len * sizeof (GFC_UINTEGER_4); + buffer = get_collsub_buf (ci, type_size * local->num_images); + this_image_buf = buffer + this_image.image_num * char_len; + memcpy (this_image_buf, obj, type_size); + + collsub_sync (ci); + for (; ((this_image.image_num >> cbit) & 1) == 0 + && (local->num_images >> cbit) != 0; cbit++) + { + imoffset = 1 << cbit; + if (this_image.image_num + imoffset < local->num_images) + { + a = this_image_buf; + b = this_image_buf + imoffset * char_len; + if (memcmp4 (b, a, char_len) > 0) + memcpy (a, b, type_size); + } + collsub_sync (ci); + } + for ( ; (local->num_images >> cbit) != 0; cbit++) + collsub_sync (ci); + + if (!result_image || (*result_image - 1) == this_image.image_num) + memcpy (obj, buffer, type_size); + + finish_collective_subroutine (ci); + +} + +void nca_collsub_min_scalar_s4 (GFC_UINTEGER_4 *obj, int *result_image, + int *stat, char *errmsg, index_type char_len, index_type errmsg_len); +export_proto(nca_collsub_min_scalar_s4); + +void +nca_collsub_min_scalar_s4 (GFC_UINTEGER_4 *obj, int *result_image, + int *stat __attribute__ ((unused)), + char *errmsg __attribute__ ((unused)), + index_type char_len, + index_type errmsg_len __attribute__ ((unused))) +{ + int cbit = 0; + int imoffset; + GFC_UINTEGER_4 *a, *b; + GFC_UINTEGER_4 *buffer, *this_image_buf; + collsub_iface *ci; + index_type type_size; + + ci = &local->ci; + + type_size = char_len * sizeof (GFC_UINTEGER_4); + buffer = get_collsub_buf (ci, type_size * local->num_images); + this_image_buf = buffer + this_image.image_num * char_len; + memcpy (this_image_buf, obj, type_size); + + collsub_sync (ci); + for (; ((this_image.image_num >> cbit) & 1) == 0 + && (local->num_images >> cbit) != 0; cbit++) + { + imoffset = 1 << cbit; + if (this_image.image_num + imoffset < local->num_images) + { + a = this_image_buf; + b = this_image_buf + imoffset * char_len; + if (memcmp4 (b, a, char_len) < 0) + memcpy (a, b, type_size); + } + collsub_sync (ci); + } + for ( ; (local->num_images >> cbit) != 0; cbit++) + collsub_sync (ci); + + if (!result_image || (*result_image - 1) == this_image.image_num) + memcpy (obj, buffer, type_size); + + finish_collective_subroutine (ci); + +} + +void nca_collsub_max_array_s4 (gfc_array_s4 * restrict array, int *result_image, + int *stat, char *errmsg, index_type char_len, + index_type errmsg_len); +export_proto (nca_collsub_max_array_s4); + +void +nca_collsub_max_array_s4 (gfc_array_s4 * restrict array, int *result_image, + int *stat __attribute__ ((unused)), + char *errmsg __attribute__ ((unused)), + index_type char_len, + index_type errmsg_len __attribute__ ((unused))) +{ + index_type count[GFC_MAX_DIMENSIONS]; + index_type stride[GFC_MAX_DIMENSIONS]; /* stride is byte-based here. */ + index_type extent[GFC_MAX_DIMENSIONS]; + char *this_shared_ptr; /* Points to the shared memory allocated to this image. */ + char *buffer; + index_type dim; + bool packed; + index_type span; + index_type ssize, num_elems; + int cbit = 0; + int imoffset; + index_type type_size; + collsub_iface *ci; + + ci = &local->ci; + + type_size = char_len * sizeof (GFC_UINTEGER_4); + dim = GFC_DESCRIPTOR_RANK (array); + num_elems = 1; + ssize = type_size; + packed = true; + span = array->span != 0 ? array->span : type_size; + for (index_type n = 0; n < dim; n++) + { + count[n] = 0; + stride[n] = GFC_DESCRIPTOR_STRIDE (array, n) * span; + extent[n] = GFC_DESCRIPTOR_EXTENT (array, n); + + /* No-op for an empty array. */ + if (extent[n] <= 0) + return; + + if (num_elems != GFC_DESCRIPTOR_STRIDE (array,n)) + packed = false; + + num_elems *= extent[n]; + } + + ssize = num_elems * type_size; + buffer = get_collsub_buf (ci, ssize * local->num_images); + this_shared_ptr = buffer + this_image.image_num * ssize; + + if (packed) + memcpy (this_shared_ptr, array->base_addr, ssize); + else + { + char *src = (char *) array->base_addr; + char *restrict dest = this_shared_ptr; + index_type stride0 = stride[0]; + + while (src) + { + /* Copy the data. */ + + memcpy (dest, src, type_size); + dest += type_size; + src += stride0; + count[0] ++; + /* Advance to the next source element. */ + for (index_type n = 0; count[n] == extent[n] ; ) + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + count[n] = 0; + src -= stride[n] * extent[n]; + n++; + if (n == dim) + { + src = NULL; + break; + } + else + { + count[n]++; + src += stride[n]; + } + } + } + } + + collsub_sync (ci); + + /* Reduce the array to image zero. Here the general scheme: + + abababababab + a_b_a_b_a_b_ + a___b___a___ + a_______b___ + r___________ + */ + for (; ((this_image.image_num >> cbit) & 1) == 0 + && (local->num_images >> cbit) != 0; cbit++) + { + imoffset = 1 << cbit; + if (this_image.image_num + imoffset < local->num_images) + { + char *other_shared_ptr; /* Points to the shared memory + allocated to another image. */ + GFC_UINTEGER_4 *a; + GFC_UINTEGER_4 *b; + + other_shared_ptr = this_shared_ptr + imoffset * ssize; + for (index_type i = 0; i < num_elems; i++) + { + a = (GFC_UINTEGER_4 *) (this_shared_ptr + i * type_size); + b = (GFC_UINTEGER_4 *) (other_shared_ptr + i * type_size); + if (memcmp4 (b, a, char_len) > 0) + memcpy (a, b, type_size); + } + } + collsub_sync (ci); + } + for ( ; (local->num_images >> cbit) != 0; cbit++) + collsub_sync (ci); + + if (!result_image || (*result_image - 1) == this_image.image_num) + { + if (packed) + memcpy (array->base_addr, buffer, ssize); + else + { + char *src = buffer; + char *restrict dest = (char *) array->base_addr; + index_type stride0 = stride[0]; + + for (index_type n = 0; n < dim; n++) + count[n] = 0; + + while (dest) + { + memcpy (dest, src, type_size); + src += span; + dest += stride0; + count[0] ++; + for (index_type n = 0; count[n] == extent[n] ;) + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + count[n] = 0; + dest -= stride[n] * extent[n]; + n++; + if (n == dim) + { + dest = NULL; + break; + } + else + { + count[n]++; + dest += stride[n]; + } + } + } + } + } + finish_collective_subroutine (ci); +} +void nca_collsub_min_array_s4 (gfc_array_s4 * restrict array, int *result_image, + int *stat, char *errmsg, index_type char_len, + index_type errmsg_len); +export_proto (nca_collsub_min_array_s4); + +void +nca_collsub_min_array_s4 (gfc_array_s4 * restrict array, int *result_image, + int *stat __attribute__ ((unused)), + char *errmsg __attribute__ ((unused)), + index_type char_len, + index_type errmsg_len __attribute__ ((unused))) +{ + index_type count[GFC_MAX_DIMENSIONS]; + index_type stride[GFC_MAX_DIMENSIONS]; /* stride is byte-based here. */ + index_type extent[GFC_MAX_DIMENSIONS]; + char *this_shared_ptr; /* Points to the shared memory allocated to this image. */ + char *buffer; + index_type dim; + bool packed; + index_type span; + index_type ssize, num_elems; + int cbit = 0; + int imoffset; + index_type type_size; + collsub_iface *ci; + + ci = &local->ci; + + type_size = char_len * sizeof (GFC_UINTEGER_4); + dim = GFC_DESCRIPTOR_RANK (array); + num_elems = 1; + ssize = type_size; + packed = true; + span = array->span != 0 ? array->span : type_size; + for (index_type n = 0; n < dim; n++) + { + count[n] = 0; + stride[n] = GFC_DESCRIPTOR_STRIDE (array, n) * span; + extent[n] = GFC_DESCRIPTOR_EXTENT (array, n); + + /* No-op for an empty array. */ + if (extent[n] <= 0) + return; + + if (num_elems != GFC_DESCRIPTOR_STRIDE (array,n)) + packed = false; + + num_elems *= extent[n]; + } + + ssize = num_elems * type_size; + buffer = get_collsub_buf (ci, ssize * local->num_images); + this_shared_ptr = buffer + this_image.image_num * ssize; + + if (packed) + memcpy (this_shared_ptr, array->base_addr, ssize); + else + { + char *src = (char *) array->base_addr; + char *restrict dest = this_shared_ptr; + index_type stride0 = stride[0]; + + while (src) + { + /* Copy the data. */ + + memcpy (dest, src, type_size); + dest += type_size; + src += stride0; + count[0] ++; + /* Advance to the next source element. */ + for (index_type n = 0; count[n] == extent[n] ; ) + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + count[n] = 0; + src -= stride[n] * extent[n]; + n++; + if (n == dim) + { + src = NULL; + break; + } + else + { + count[n]++; + src += stride[n]; + } + } + } + } + + collsub_sync (ci); + + /* Reduce the array to image zero. Here the general scheme: + + abababababab + a_b_a_b_a_b_ + a___b___a___ + a_______b___ + r___________ + */ + for (; ((this_image.image_num >> cbit) & 1) == 0 + && (local->num_images >> cbit) != 0; cbit++) + { + imoffset = 1 << cbit; + if (this_image.image_num + imoffset < local->num_images) + { + char *other_shared_ptr; /* Points to the shared memory + allocated to another image. */ + GFC_UINTEGER_4 *a; + GFC_UINTEGER_4 *b; + + other_shared_ptr = this_shared_ptr + imoffset * ssize; + for (index_type i = 0; i < num_elems; i++) + { + a = (GFC_UINTEGER_4 *) (this_shared_ptr + i * type_size); + b = (GFC_UINTEGER_4 *) (other_shared_ptr + i * type_size); + if (memcmp4 (b, a, char_len) < 0) + memcpy (a, b, type_size); + } + } + collsub_sync (ci); + } + for ( ; (local->num_images >> cbit) != 0; cbit++) + collsub_sync (ci); + + if (!result_image || (*result_image - 1) == this_image.image_num) + { + if (packed) + memcpy (array->base_addr, buffer, ssize); + else + { + char *src = buffer; + char *restrict dest = (char *) array->base_addr; + index_type stride0 = stride[0]; + + for (index_type n = 0; n < dim; n++) + count[n] = 0; + + while (dest) + { + memcpy (dest, src, type_size); + src += span; + dest += stride0; + count[0] ++; + for (index_type n = 0; count[n] == extent[n] ;) + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + count[n] = 0; + dest -= stride[n] * extent[n]; + n++; + if (n == dim) + { + dest = NULL; + break; + } + else + { + count[n]++; + dest += stride[n]; + } + } + } + } + } + finish_collective_subroutine (ci); +} + +#endif + |