aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--gcc/flag-types.h3
-rw-r--r--gcc/fortran/dump-parse-tree.c2
-rw-r--r--gcc/fortran/frontend-passes.c122
-rw-r--r--gcc/fortran/gfortran.h1
-rw-r--r--gcc/fortran/intrinsic.c10
-rw-r--r--gcc/fortran/intrinsic.h6
-rw-r--r--gcc/fortran/iresolve.c98
-rw-r--r--gcc/fortran/lang.opt5
-rw-r--r--gcc/fortran/resolve.c67
-rw-r--r--gcc/fortran/trans-array.c288
-rw-r--r--gcc/fortran/trans-array.h13
-rw-r--r--gcc/fortran/trans-decl.c296
-rw-r--r--gcc/fortran/trans-expr.c22
-rw-r--r--gcc/fortran/trans-intrinsic.c181
-rw-r--r--gcc/fortran/trans-stmt.c100
-rw-r--r--gcc/fortran/trans-types.c51
-rw-r--r--gcc/fortran/trans.c22
-rw-r--r--gcc/fortran/trans.h18
-rw-r--r--libgfortran/Makefile.am55
-rw-r--r--libgfortran/Makefile.in311
-rw-r--r--libgfortran/config.h.in15
-rwxr-xr-xlibgfortran/configure198
-rw-r--r--libgfortran/configure.ac37
-rw-r--r--libgfortran/generated/nca_minmax_i1.c653
-rw-r--r--libgfortran/generated/nca_minmax_i16.c653
-rw-r--r--libgfortran/generated/nca_minmax_i2.c653
-rw-r--r--libgfortran/generated/nca_minmax_i4.c653
-rw-r--r--libgfortran/generated/nca_minmax_i8.c653
-rw-r--r--libgfortran/generated/nca_minmax_r10.c653
-rw-r--r--libgfortran/generated/nca_minmax_r16.c653
-rw-r--r--libgfortran/generated/nca_minmax_r4.c653
-rw-r--r--libgfortran/generated/nca_minmax_r8.c653
-rw-r--r--libgfortran/generated/nca_minmax_s1.c494
-rw-r--r--libgfortran/generated/nca_minmax_s4.c494
-rw-r--r--libgfortran/libgfortran.h1
-rw-r--r--libgfortran/m4/nca-minmax-s.m4289
-rw-r--r--libgfortran/m4/nca_minmax.m4259
-rw-r--r--libgfortran/nca/.tags275
-rw-r--r--libgfortran/nca/alloc.c152
-rw-r--r--libgfortran/nca/alloc.h67
-rw-r--r--libgfortran/nca/allocator.c90
-rw-r--r--libgfortran/nca/allocator.h21
-rw-r--r--libgfortran/nca/coarraynative.c145
-rw-r--r--libgfortran/nca/collective_inline.h42
-rw-r--r--libgfortran/nca/collective_subroutine.c416
-rw-r--r--libgfortran/nca/collective_subroutine.h44
-rw-r--r--libgfortran/nca/hashmap.c447
-rw-r--r--libgfortran/nca/hashmap.h70
-rw-r--r--libgfortran/nca/libcoarraynative.h103
-rw-r--r--libgfortran/nca/lock.h37
-rw-r--r--libgfortran/nca/shared_memory.c221
-rw-r--r--libgfortran/nca/shared_memory.h78
-rw-r--r--libgfortran/nca/sync.c156
-rw-r--r--libgfortran/nca/sync.h56
-rw-r--r--libgfortran/nca/util.c197
-rw-r--r--libgfortran/nca/util.h86
-rw-r--r--libgfortran/nca/wrapper.c258
57 files changed, 12113 insertions, 183 deletions
diff --git a/gcc/flag-types.h b/gcc/flag-types.h
index 852ea76..51e698d 100644
--- a/gcc/flag-types.h
+++ b/gcc/flag-types.h
@@ -346,7 +346,8 @@ enum gfc_fcoarray
{
GFC_FCOARRAY_NONE = 0,
GFC_FCOARRAY_SINGLE,
- GFC_FCOARRAY_LIB
+ GFC_FCOARRAY_LIB,
+ GFC_FCOARRAY_NATIVE
};
diff --git a/gcc/fortran/dump-parse-tree.c b/gcc/fortran/dump-parse-tree.c
index 6e265f4..acff75a 100644
--- a/gcc/fortran/dump-parse-tree.c
+++ b/gcc/fortran/dump-parse-tree.c
@@ -1060,7 +1060,7 @@ show_symbol (gfc_symbol *sym)
if (sym == NULL)
return;
- fprintf (dumpfile, "|| symbol: '%s' ", sym->name);
+ fprintf (dumpfile, "|| symbol: '%s' %p ", sym->name, (void *) &(sym->backend_decl));
len = strlen (sym->name);
for (i=len; i<12; i++)
fputc(' ', dumpfile);
diff --git a/gcc/fortran/frontend-passes.c b/gcc/fortran/frontend-passes.c
index 83f6fd8..c573731 100644
--- a/gcc/fortran/frontend-passes.c
+++ b/gcc/fortran/frontend-passes.c
@@ -57,6 +57,7 @@ static int call_external_blas (gfc_code **, int *, void *);
static int matmul_temp_args (gfc_code **, int *,void *data);
static int index_interchange (gfc_code **, int*, void *);
static bool is_fe_temp (gfc_expr *e);
+static void rewrite_co_reduce (gfc_namespace *);
#ifdef CHECKING_P
static void check_locus (gfc_namespace *);
@@ -179,6 +180,9 @@ gfc_run_passes (gfc_namespace *ns)
if (flag_realloc_lhs)
realloc_strings (ns);
+
+ if (flag_coarray == GFC_FCOARRAY_NATIVE)
+ rewrite_co_reduce (ns);
}
#ifdef CHECKING_P
@@ -5895,3 +5899,121 @@ gfc_fix_implicit_pure (gfc_namespace *ns)
return changed;
}
+
+/* Callback function. Create a wrapper around VALUE functions. */
+
+static int
+co_reduce_code (gfc_code **c, int *walk_subtrees ATTRIBUTE_UNUSED, void *data)
+{
+ gfc_code *co = *c;
+ gfc_expr *oper;
+ gfc_symbol *op_sym;
+ gfc_symbol *arg1, *arg2;
+ gfc_namespace *parent_ns;
+ gfc_namespace *proc_ns;
+ gfc_symbol *proc_sym;
+ gfc_symtree *f1t, *f2t;
+ gfc_symbol *f1, *f2;
+ gfc_code *assign;
+ gfc_expr *e1, *e2;
+ char name[GFC_MAX_SYMBOL_LEN + 1];
+ static int num;
+
+ if (co->op != EXEC_CALL || co->resolved_isym == NULL
+ || co->resolved_isym->id != GFC_ISYM_CO_REDUCE)
+ return 0;
+
+ oper = co->ext.actual->next->expr;
+ op_sym = oper->symtree->n.sym;
+ arg1 = op_sym->formal->sym;
+ arg2 = op_sym->formal->next->sym;
+
+ parent_ns = (gfc_namespace *) data;
+
+ /* Generate the wrapper around the function. */
+ proc_ns = gfc_get_namespace (parent_ns, 0);
+ snprintf (name, GFC_MAX_SYMBOL_LEN, "__coreduce_%d_%s", num++, op_sym->name);
+ gfc_get_symbol (name, proc_ns, &proc_sym);
+ proc_sym->attr.flavor = FL_PROCEDURE;
+ proc_sym->attr.subroutine = 1;
+ proc_sym->attr.referenced = 1;
+ proc_sym->attr.access = ACCESS_PRIVATE;
+ gfc_commit_symbol (proc_sym);
+ proc_ns->proc_name = proc_sym;
+
+ /* Make up the formal arguments. */
+ gfc_get_sym_tree (arg1->name, proc_ns, &f1t, false);
+ f1 = f1t->n.sym;
+ f1->ts = arg1->ts;
+ f1->attr.flavor = FL_VARIABLE;
+ f1->attr.dummy = 1;
+ f1->attr.intent = INTENT_INOUT;
+ f1->attr.fe_temp = 1;
+ f1->declared_at = arg1->declared_at;
+ f1->attr.referenced = 1;
+ proc_sym->formal = gfc_get_formal_arglist ();
+ proc_sym->formal->sym = f1;
+ gfc_commit_symbol (f1);
+
+ gfc_get_sym_tree (arg2->name, proc_ns, &f2t, false);
+ f2 = f2t->n.sym;
+ f2->ts = arg2->ts;
+ f2->attr.flavor = FL_VARIABLE;
+ f2->attr.dummy = 1;
+ f2->attr.intent = INTENT_IN;
+ f2->attr.fe_temp = 1;
+ f2->declared_at = arg2->declared_at;
+ f2->attr.referenced = 1;
+ proc_sym->formal->next = gfc_get_formal_arglist ();
+ proc_sym->formal->next->sym = f2;
+ gfc_commit_symbol (f2);
+
+ /* Generate the assignment statement. */
+ assign = gfc_get_code (EXEC_ASSIGN);
+
+ e1 = gfc_lval_expr_from_sym (f1);
+ e2 = gfc_get_expr ();
+ e2->where = proc_sym->declared_at;
+ e2->expr_type = EXPR_FUNCTION;
+ e2->symtree = f2t;
+ e2->ts = arg1->ts;
+ e2->value.function.esym = op_sym;
+ e2->value.function.actual = gfc_get_actual_arglist ();
+ e2->value.function.actual->expr = gfc_lval_expr_from_sym (f1);
+ e2->value.function.actual->next = gfc_get_actual_arglist ();
+ e2->value.function.actual->next->expr = gfc_lval_expr_from_sym (f2);
+ assign->expr1 = e1;
+ assign->expr2 = e2;
+ assign->loc = proc_sym->declared_at;
+
+ proc_ns->code = assign;
+
+ /* And hang it into the sibling list. */
+ proc_ns->sibling = parent_ns->contained;
+ parent_ns->contained = proc_ns;
+
+ /* ... and finally replace the call in the statement. */
+
+ oper->symtree->n.sym = proc_sym;
+ proc_sym->refs ++;
+ return 0;
+}
+
+/* Rewrite functions for co_reduce for a consistent calling
+ signature. This is only necessary if any of the functions
+ has a VALUE argument. */
+
+static void
+rewrite_co_reduce (gfc_namespace *global_ns)
+{
+ gfc_namespace *ns;
+
+ gfc_code_walker (&global_ns->code, co_reduce_code, dummy_expr_callback,
+ (void *) global_ns);
+
+ for (ns = global_ns->contained; ns; ns = ns->sibling)
+ gfc_code_walker (&ns->code, co_reduce_code, dummy_expr_callback,
+ (void *) global_ns);
+
+ return;
+}
diff --git a/gcc/fortran/gfortran.h b/gcc/fortran/gfortran.h
index d0cea83..6940c24 100644
--- a/gcc/fortran/gfortran.h
+++ b/gcc/fortran/gfortran.h
@@ -2010,6 +2010,7 @@ typedef struct gfc_array_ref
int dimen; /* # of components in the reference */
int codimen;
bool in_allocate; /* For coarray checks. */
+ bool native_coarray_argument;
gfc_expr *team;
gfc_expr *stat;
locus where;
diff --git a/gcc/fortran/intrinsic.c b/gcc/fortran/intrinsic.c
index ef33587..6a5b3e9 100644
--- a/gcc/fortran/intrinsic.c
+++ b/gcc/fortran/intrinsic.c
@@ -3734,7 +3734,7 @@ add_subroutines (void)
/* Coarray collectives. */
add_sym_4s ("co_broadcast", GFC_ISYM_CO_BROADCAST, CLASS_IMPURE,
BT_UNKNOWN, 0, GFC_STD_F2018,
- gfc_check_co_broadcast, NULL, NULL,
+ gfc_check_co_broadcast, NULL, gfc_resolve_co_broadcast,
a, BT_REAL, dr, REQUIRED, INTENT_INOUT,
"source_image", BT_INTEGER, di, REQUIRED, INTENT_IN,
stat, BT_INTEGER, di, OPTIONAL, INTENT_OUT,
@@ -3742,7 +3742,7 @@ add_subroutines (void)
add_sym_4s ("co_max", GFC_ISYM_CO_MAX, CLASS_IMPURE,
BT_UNKNOWN, 0, GFC_STD_F2018,
- gfc_check_co_minmax, NULL, NULL,
+ gfc_check_co_minmax, NULL, gfc_resolve_co_max,
a, BT_REAL, dr, REQUIRED, INTENT_INOUT,
result_image, BT_INTEGER, di, OPTIONAL, INTENT_IN,
stat, BT_INTEGER, di, OPTIONAL, INTENT_OUT,
@@ -3750,7 +3750,7 @@ add_subroutines (void)
add_sym_4s ("co_min", GFC_ISYM_CO_MIN, CLASS_IMPURE,
BT_UNKNOWN, 0, GFC_STD_F2018,
- gfc_check_co_minmax, NULL, NULL,
+ gfc_check_co_minmax, NULL, gfc_resolve_co_min,
a, BT_REAL, dr, REQUIRED, INTENT_INOUT,
result_image, BT_INTEGER, di, OPTIONAL, INTENT_IN,
stat, BT_INTEGER, di, OPTIONAL, INTENT_OUT,
@@ -3758,7 +3758,7 @@ add_subroutines (void)
add_sym_4s ("co_sum", GFC_ISYM_CO_SUM, CLASS_IMPURE,
BT_UNKNOWN, 0, GFC_STD_F2018,
- gfc_check_co_sum, NULL, NULL,
+ gfc_check_co_sum, NULL, gfc_resolve_co_sum,
a, BT_REAL, dr, REQUIRED, INTENT_INOUT,
result_image, BT_INTEGER, di, OPTIONAL, INTENT_IN,
stat, BT_INTEGER, di, OPTIONAL, INTENT_OUT,
@@ -3766,7 +3766,7 @@ add_subroutines (void)
add_sym_5s ("co_reduce", GFC_ISYM_CO_REDUCE, CLASS_IMPURE,
BT_UNKNOWN, 0, GFC_STD_F2018,
- gfc_check_co_reduce, NULL, NULL,
+ gfc_check_co_reduce, NULL, gfc_resolve_co_reduce,
a, BT_REAL, dr, REQUIRED, INTENT_INOUT,
"operator", BT_INTEGER, di, REQUIRED, INTENT_IN,
result_image, BT_INTEGER, di, OPTIONAL, INTENT_IN,
diff --git a/gcc/fortran/intrinsic.h b/gcc/fortran/intrinsic.h
index 166ae79..2ca566c 100644
--- a/gcc/fortran/intrinsic.h
+++ b/gcc/fortran/intrinsic.h
@@ -677,7 +677,11 @@ void gfc_resolve_system_sub (gfc_code *);
void gfc_resolve_ttynam_sub (gfc_code *);
void gfc_resolve_umask_sub (gfc_code *);
void gfc_resolve_unlink_sub (gfc_code *);
-
+void gfc_resolve_co_sum (gfc_code *);
+void gfc_resolve_co_min (gfc_code *);
+void gfc_resolve_co_max (gfc_code *);
+void gfc_resolve_co_reduce (gfc_code *);
+void gfc_resolve_co_broadcast (gfc_code *);
/* The findloc() subroutine requires the most arguments: six. */
diff --git a/gcc/fortran/iresolve.c b/gcc/fortran/iresolve.c
index 7376961..844891e 100644
--- a/gcc/fortran/iresolve.c
+++ b/gcc/fortran/iresolve.c
@@ -36,6 +36,7 @@ along with GCC; see the file COPYING3. If not see
#include "constructor.h"
#include "arith.h"
#include "trans.h"
+#include "options.h"
/* Given printf-like arguments, return a stable version of the result string.
@@ -4030,3 +4031,100 @@ gfc_resolve_unlink_sub (gfc_code *c)
name = gfc_get_string (PREFIX ("unlink_i%d_sub"), kind);
c->resolved_sym = gfc_get_intrinsic_sub_symbol (name);
}
+
+/* Resolve the CO_SUM et al. intrinsic subroutines. */
+
+static void
+gfc_resolve_co_collective (gfc_code *c, const char *oper)
+{
+ int kind;
+ gfc_expr *e;
+ const char *name;
+
+ if (flag_coarray != GFC_FCOARRAY_NATIVE)
+ name = gfc_get_string (PREFIX ("caf_co_sum"));
+ else
+ {
+ e = c->ext.actual->expr;
+ kind = e->ts.kind;
+
+ name = gfc_get_string (PREFIX ("nca_collsub_%s_%s_%c%d"), oper,
+ e->rank ? "array" : "scalar",
+ gfc_type_letter (e->ts.type), kind);
+ }
+
+ c->resolved_sym = gfc_get_intrinsic_sub_symbol (name);
+}
+
+/* Resolve CO_SUM. */
+
+void
+gfc_resolve_co_sum (gfc_code *c)
+{
+ gfc_resolve_co_collective (c, "sum");
+}
+
+/* Resolve CO_MIN. */
+
+void
+gfc_resolve_co_min (gfc_code *c)
+{
+ gfc_resolve_co_collective (c, "min");
+}
+
+/* Resolve CO_MAX. */
+
+void
+gfc_resolve_co_max (gfc_code *c)
+{
+ gfc_resolve_co_collective (c, "max");
+}
+
+/* Resolve CO_REDUCE. */
+
+void
+gfc_resolve_co_reduce (gfc_code *c)
+{
+ gfc_expr *e;
+ const char *name;
+
+ if (flag_coarray != GFC_FCOARRAY_NATIVE)
+ name = gfc_get_string (PREFIX ("caf_co_reduce"));
+
+ else
+ {
+ e = c->ext.actual->expr;
+ if (e->ts.type == BT_CHARACTER)
+ name = gfc_get_string (PREFIX ("nca_collsub_reduce_%s%c%d"),
+ e->rank ? "array" : "scalar",
+ gfc_type_letter (e->ts.type), e->ts.kind);
+ else
+ name = gfc_get_string (PREFIX ("nca_collsub_reduce_%s"),
+ e->rank ? "array" : "scalar" );
+ }
+
+ c->resolved_sym = gfc_get_intrinsic_sub_symbol (name);
+}
+
+void
+gfc_resolve_co_broadcast (gfc_code * c)
+{
+ gfc_expr *e;
+ const char *name;
+
+ if (flag_coarray != GFC_FCOARRAY_NATIVE)
+ name = gfc_get_string (PREFIX ("caf_co_broadcast"));
+ else
+ {
+ e = c->ext.actual->expr;
+ if (e->ts.type == BT_CHARACTER)
+ name = gfc_get_string (PREFIX ("nca_collsub_broadcast_%s%c%d"),
+ e->rank ? "array" : "scalar",
+ gfc_type_letter (e->ts.type), e->ts.kind);
+ else
+ name = gfc_get_string (PREFIX ("nca_collsub_broadcast_%s"),
+ e->rank ? "array" : "scalar" );
+ }
+
+ c->resolved_sym = gfc_get_intrinsic_sub_symbol (name);
+}
diff --git a/gcc/fortran/lang.opt b/gcc/fortran/lang.opt
index da4b1aa..e267fb0 100644
--- a/gcc/fortran/lang.opt
+++ b/gcc/fortran/lang.opt
@@ -761,7 +761,7 @@ Copy array sections into a contiguous block on procedure entry.
fcoarray=
Fortran RejectNegative Joined Enum(gfc_fcoarray) Var(flag_coarray) Init(GFC_FCOARRAY_NONE)
--fcoarray=<none|single|lib> Specify which coarray parallelization should be used.
+-fcoarray=<none|single|lib|native> Specify which coarray parallelization should be used.
Enum
Name(gfc_fcoarray) Type(enum gfc_fcoarray) UnknownError(Unrecognized option: %qs)
@@ -775,6 +775,9 @@ Enum(gfc_fcoarray) String(single) Value(GFC_FCOARRAY_SINGLE)
EnumValue
Enum(gfc_fcoarray) String(lib) Value(GFC_FCOARRAY_LIB)
+EnumValue
+Enum(gfc_fcoarray) String(native) Value(GFC_FCOARRAY_NATIVE)
+
fcheck=
Fortran RejectNegative JoinedOrMissing
-fcheck=[...] Specify which runtime checks are to be performed.
diff --git a/gcc/fortran/resolve.c b/gcc/fortran/resolve.c
index f4ce49f..6d6984b 100644
--- a/gcc/fortran/resolve.c
+++ b/gcc/fortran/resolve.c
@@ -3585,6 +3585,53 @@ resolve_specific_s (gfc_code *c)
return false;
}
+/* Fix up references to native coarrays in call - element references
+ have to be converted to full references if the coarray has to be
+ passed fully. */
+
+static void
+fixup_coarray_args (gfc_symbol *sym, gfc_actual_arglist *actual)
+{
+ gfc_formal_arglist *formal, *f;
+ gfc_actual_arglist *a;
+
+ formal = gfc_sym_get_dummy_args (sym);
+
+ if (formal == NULL)
+ return;
+
+ for (a = actual, f = formal; a && f; a = a->next, f = f->next)
+ {
+ if (a->expr == NULL || f->sym == NULL)
+ continue;
+ if (a->expr->expr_type == EXPR_VARIABLE
+ && a->expr->symtree->n.sym->attr.codimension
+ && f->sym->attr.codimension)
+ {
+ gfc_ref *r;
+ for (r = a->expr->ref; r; r = r->next)
+ {
+ if (r->type == REF_ARRAY && r->u.ar.codimen)
+ {
+ gfc_array_ref *ar = &r->u.ar;
+ int i, eff_dimen = ar->dimen + ar->codimen;
+
+ for (i = ar->dimen; i < eff_dimen; i++)
+ {
+ ar->dimen_type[i] = DIMEN_RANGE;
+ gcc_assert (ar->start[i] == NULL);
+ gcc_assert (ar->end[i] == NULL);
+ }
+
+ if (ar->type == AR_ELEMENT)
+ ar->type = !ar->dimen ? AR_FULL : AR_SECTION;
+
+ ar->native_coarray_argument = true;
+ }
+ }
+ }
+ }
+}
/* Resolve a subroutine call not known to be generic nor specific. */
@@ -3615,7 +3662,7 @@ resolve_unknown_s (gfc_code *c)
found:
gfc_procedure_use (sym, &c->ext.actual, &c->loc);
-
+
c->resolved_sym = sym;
return pure_subroutine (sym, sym->name, &c->loc);
@@ -3740,6 +3787,9 @@ resolve_call (gfc_code *c)
/* Typebound procedure: Assume the worst. */
gfc_current_ns->proc_name->attr.array_outer_dependency = 1;
+ if (flag_coarray == GFC_FCOARRAY_NATIVE)
+ fixup_coarray_args (csym, c->ext.actual);
+
return t;
}
@@ -10117,7 +10167,7 @@ resolve_critical (gfc_code *code)
char name[GFC_MAX_SYMBOL_LEN];
static int serial = 0;
- if (flag_coarray != GFC_FCOARRAY_LIB)
+ if (flag_coarray != GFC_FCOARRAY_LIB && flag_coarray != GFC_FCOARRAY_NATIVE)
return;
symtree = gfc_find_symtree (gfc_current_ns->sym_root,
@@ -10154,6 +10204,19 @@ resolve_critical (gfc_code *code)
symtree->n.sym->as->lower[0] = gfc_get_int_expr (gfc_default_integer_kind,
NULL, 1);
gfc_commit_symbols();
+
+ if (flag_coarray == GFC_FCOARRAY_NATIVE)
+ {
+ gfc_ref *r = gfc_get_ref ();
+ r->type = REF_ARRAY;
+ r->u.ar.type = AR_ELEMENT;
+ r->u.ar.as = code->resolved_sym->as;
+ for (int i = 0; i < code->resolved_sym->as->corank; i++)
+ r->u.ar.dimen_type [i] = DIMEN_THIS_IMAGE;
+
+ code->expr1 = gfc_lval_expr_from_sym (code->resolved_sym);
+ code->expr1->ref = r;
+ }
}
diff --git a/gcc/fortran/trans-array.c b/gcc/fortran/trans-array.c
index 6566c47..9013f19 100644
--- a/gcc/fortran/trans-array.c
+++ b/gcc/fortran/trans-array.c
@@ -2940,6 +2940,60 @@ gfc_add_loop_ss_code (gfc_loopinfo * loop, gfc_ss * ss, bool subscript,
gfc_add_loop_ss_code (nested_loop, nested_loop->ss, subscript, where);
}
+static tree
+gfc_add_strides (tree expr, tree desc, int beg, int end)
+{
+ int i;
+ tree tmp, stride;
+ tmp = gfc_index_zero_node;
+ for (i = beg; i < end; i++)
+ {
+ stride = gfc_conv_array_stride (desc, i);
+ tmp = fold_build2_loc (input_location, PLUS_EXPR, TREE_TYPE(tmp),
+ tmp, stride);
+ }
+ return fold_build2_loc (input_location, PLUS_EXPR, TREE_TYPE(expr),
+ expr, tmp);
+}
+
+/* This function calculates the new offset via
+ new_offset = offset + this_image ()
+ * arrray.stride[first_codimension]
+ + sum (remaining codimension offsets)
+ If offset is a pointer, we also need to multiply it by the size.*/
+static tree
+gfc_native_coarray_add_this_image_offset (tree offset, tree desc,
+ gfc_array_ref *ar, int is_pointer,
+ int subtract)
+{
+ tree tmp, off;
+ /* Calculate the actual offset. */
+ tmp = build_call_expr_loc (input_location, gfor_fndecl_nca_this_image,
+ 1, integer_zero_node);
+ tmp = convert (TREE_TYPE(gfc_index_zero_node), tmp);
+ tmp = fold_build2_loc (input_location, MINUS_EXPR, TREE_TYPE(tmp), tmp,
+ build_int_cst (TREE_TYPE(tmp), subtract));
+ tmp = fold_build2_loc (input_location, MULT_EXPR, TREE_TYPE(tmp),
+ gfc_conv_array_stride (desc, ar->dimen), tmp);
+ /* We also need to add the missing strides once to compensate for the
+ offset, that is to large now. The loop starts at sym->as.rank+1
+ because we need to skip the first corank stride */
+ off = gfc_add_strides (tmp, desc, ar->as->rank + 1,
+ ar->as->rank + ar->as->corank);
+ if (is_pointer)
+ {
+ /* Remove pointer and array from type in order to get the raw base type. */
+ tmp = TREE_TYPE(TREE_TYPE(TREE_TYPE(offset)));
+ /* And get the size of that base type. */
+ tmp = convert (TREE_TYPE(off), size_in_bytes_loc (input_location, tmp));
+ tmp = fold_build2_loc (input_location, MULT_EXPR, TREE_TYPE(off),
+ off, tmp);
+ return fold_build_pointer_plus_loc (input_location, offset, tmp);
+ }
+ else
+ return fold_build2_loc (input_location, PLUS_EXPR, TREE_TYPE(offset),
+ offset, off);
+}
/* Translate expressions for the descriptor and data pointer of a SS. */
/*GCC ARRAYS*/
@@ -2951,6 +3005,7 @@ gfc_conv_ss_descriptor (stmtblock_t * block, gfc_ss * ss, int base)
gfc_ss_info *ss_info;
gfc_array_info *info;
tree tmp;
+ gfc_ref *ref;
ss_info = ss->info;
info = &ss_info->data.array;
@@ -2982,10 +3037,18 @@ gfc_conv_ss_descriptor (stmtblock_t * block, gfc_ss * ss, int base)
}
/* Also the data pointer. */
tmp = gfc_conv_array_data (se.expr);
+ /* If we have a native coarray with implied this_image (), add the
+ appropriate offset to the data pointer. */
+ ref = ss_info->expr->ref;
+ if (flag_coarray == GFC_FCOARRAY_NATIVE && ref
+ && ref->u.ar.dimen_type[ref->u.ar.dimen + ref->u.ar.codimen - 1]
+ == DIMEN_THIS_IMAGE)
+ tmp = gfc_native_coarray_add_this_image_offset (tmp, se.expr, &ref->u.ar, 1, 1);
/* If this is a variable or address of a variable we use it directly.
Otherwise we must evaluate it now to avoid breaking dependency
analysis by pulling the expressions for elemental array indices
inside the loop. */
+
if (!(DECL_P (tmp)
|| (TREE_CODE (tmp) == ADDR_EXPR
&& DECL_P (TREE_OPERAND (tmp, 0)))))
@@ -2993,6 +3056,15 @@ gfc_conv_ss_descriptor (stmtblock_t * block, gfc_ss * ss, int base)
info->data = tmp;
tmp = gfc_conv_array_offset (se.expr);
+ /* If we have a native coarray, adjust the offset to remove the
+ offset for the codimensions. */
+ // TODO: check whether the recipient is a coarray, if it is, disable
+ // all of this
+ if (flag_coarray == GFC_FCOARRAY_NATIVE && ref
+ && ref->u.ar.dimen_type[ref->u.ar.dimen + ref->u.ar.codimen - 1]
+ == DIMEN_THIS_IMAGE)
+ tmp = gfc_add_strides (tmp, se.expr, ref->u.ar.as->rank,
+ ref->u.ar.as->rank + ref->u.ar.as->corank);
info->offset = gfc_evaluate_now (tmp, block);
/* Make absolutely sure that the saved_offset is indeed saved
@@ -3593,6 +3665,7 @@ build_array_ref (tree desc, tree offset, tree decl, tree vptr)
}
+
/* Build an array reference. se->expr already holds the array descriptor.
This should be either a variable, indirect variable reference or component
reference. For arrays which do not have a descriptor, se->expr will be
@@ -3612,8 +3685,20 @@ gfc_conv_array_ref (gfc_se * se, gfc_array_ref * ar, gfc_expr *expr,
gfc_se tmpse;
gfc_symbol * sym = expr->symtree->n.sym;
char *var_name = NULL;
+ bool need_impl_this_image;
+ int eff_dimen;
+
+ need_impl_this_image =
+ ar->dimen_type[ar->dimen + ar->codimen - 1] == DIMEN_THIS_IMAGE;
+
+ if (flag_coarray == GFC_FCOARRAY_NATIVE
+ && !need_impl_this_image)
+ eff_dimen = ar->dimen + ar->codimen - 1;
+ else
+ eff_dimen = ar->dimen - 1;
- if (ar->dimen == 0)
+
+ if (flag_coarray != GFC_FCOARRAY_NATIVE && ar->dimen == 0)
{
gcc_assert (ar->codimen || sym->attr.select_rank_temporary
|| (ar->as && ar->as->corank));
@@ -3681,7 +3766,7 @@ gfc_conv_array_ref (gfc_se * se, gfc_array_ref * ar, gfc_expr *expr,
/* Calculate the offsets from all the dimensions. Make sure to associate
the final offset so that we form a chain of loop invariant summands. */
- for (n = ar->dimen - 1; n >= 0; n--)
+ for (n = eff_dimen; n >= 0; n--)
{
/* Calculate the index for this dimension. */
gfc_init_se (&indexse, se);
@@ -3753,6 +3838,9 @@ gfc_conv_array_ref (gfc_se * se, gfc_array_ref * ar, gfc_expr *expr,
add_to_offset (&cst_offset, &offset, tmp);
}
+ if (flag_coarray == GFC_FCOARRAY_NATIVE && need_impl_this_image)
+ offset = gfc_native_coarray_add_this_image_offset (offset, se->expr, ar, 0, 0);
+
if (!integer_zerop (cst_offset))
offset = fold_build2_loc (input_location, PLUS_EXPR,
gfc_array_index_type, offset, cst_offset);
@@ -5423,7 +5511,7 @@ gfc_conv_descriptor_cosize (tree desc, int rank, int corank)
} */
/*GCC ARRAYS*/
-static tree
+tree
gfc_array_init_size (tree descriptor, int rank, int corank, tree * poffset,
gfc_expr ** lower, gfc_expr ** upper, stmtblock_t * pblock,
stmtblock_t * descriptor_block, tree * overflow,
@@ -5441,6 +5529,8 @@ gfc_array_init_size (tree descriptor, int rank, int corank, tree * poffset,
tree elsecase;
tree cond;
tree var;
+ tree conv_lbound;
+ tree conv_ubound;
stmtblock_t thenblock;
stmtblock_t elseblock;
gfc_expr *ubound;
@@ -5454,7 +5544,7 @@ gfc_array_init_size (tree descriptor, int rank, int corank, tree * poffset,
/* Set the dtype before the alloc, because registration of coarrays needs
it initialized. */
- if (expr->ts.type == BT_CHARACTER
+ if (expr && expr->ts.type == BT_CHARACTER
&& expr->ts.deferred
&& VAR_P (expr->ts.u.cl->backend_decl))
{
@@ -5462,7 +5552,7 @@ gfc_array_init_size (tree descriptor, int rank, int corank, tree * poffset,
tmp = gfc_conv_descriptor_dtype (descriptor);
gfc_add_modify (pblock, tmp, gfc_get_dtype_rank_type (rank, type));
}
- else if (expr->ts.type == BT_CHARACTER
+ else if (expr && expr->ts.type == BT_CHARACTER
&& expr->ts.deferred
&& TREE_CODE (descriptor) == COMPONENT_REF)
{
@@ -5494,9 +5584,6 @@ gfc_array_init_size (tree descriptor, int rank, int corank, tree * poffset,
for (n = 0; n < rank; n++)
{
- tree conv_lbound;
- tree conv_ubound;
-
/* We have 3 possibilities for determining the size of the array:
lower == NULL => lbound = 1, ubound = upper[n]
upper[n] = NULL => lbound = 1, ubound = lower[n]
@@ -5646,6 +5733,15 @@ gfc_array_init_size (tree descriptor, int rank, int corank, tree * poffset,
}
gfc_conv_descriptor_lbound_set (descriptor_block, descriptor,
gfc_rank_cst[n], se.expr);
+ conv_lbound = se.expr;
+ if (flag_coarray == GFC_FCOARRAY_NATIVE)
+ {
+
+ tmp = fold_build2_loc (input_location, MULT_EXPR, gfc_array_index_type,
+ se.expr, stride);
+ offset = fold_build2_loc (input_location, MINUS_EXPR,
+ gfc_array_index_type, offset, tmp);
+ }
if (n < rank + corank - 1)
{
@@ -5655,6 +5751,18 @@ gfc_array_init_size (tree descriptor, int rank, int corank, tree * poffset,
gfc_add_block_to_block (pblock, &se.pre);
gfc_conv_descriptor_ubound_set (descriptor_block, descriptor,
gfc_rank_cst[n], se.expr);
+ gfc_conv_descriptor_stride_set (descriptor_block, descriptor,
+ gfc_rank_cst[n], stride);
+ conv_ubound = se.expr;
+ if (flag_coarray == GFC_FCOARRAY_NATIVE)
+ {
+ size = gfc_conv_array_extent_dim (conv_lbound, conv_ubound,
+ &or_expr);
+ size = gfc_evaluate_now (size, descriptor_block);
+ stride = fold_build2_loc (input_location, MULT_EXPR,
+ gfc_array_index_type, stride, size);
+ stride = gfc_evaluate_now (stride, descriptor_block);
+ }
}
}
@@ -5688,7 +5796,7 @@ gfc_array_init_size (tree descriptor, int rank, int corank, tree * poffset,
/* Convert to size_t. */
*element_size = fold_convert (size_type_node, tmp);
- if (rank == 0)
+ if (rank == 0 && !(flag_coarray == GFC_FCOARRAY_NATIVE && corank))
return *element_size;
*nelems = gfc_evaluate_now (stride, pblock);
@@ -5773,6 +5881,38 @@ retrieve_last_ref (gfc_ref **ref_in, gfc_ref **prev_ref_in)
return true;
}
+int
+gfc_native_coarray_get_allocation_type (gfc_symbol * sym)
+{
+ bool is_lock_type, is_event_type;
+ is_lock_type = sym->ts.type == BT_DERIVED
+ && sym->ts.u.derived->from_intmod == INTMOD_ISO_FORTRAN_ENV
+ && sym->ts.u.derived->intmod_sym_id == ISOFORTRAN_LOCK_TYPE;
+
+ is_event_type = sym->ts.type == BT_DERIVED
+ && sym->ts.u.derived->from_intmod == INTMOD_ISO_FORTRAN_ENV
+ && sym->ts.u.derived->intmod_sym_id == ISOFORTRAN_EVENT_TYPE;
+
+ if (is_lock_type)
+ return GFC_NCA_LOCK_COARRAY;
+ else if (is_event_type)
+ return GFC_NCA_EVENT_COARRAY;
+ else
+ return GFC_NCA_NORMAL_COARRAY;
+}
+
+void
+gfc_allocate_native_coarray (stmtblock_t *b, tree decl, tree size, int corank,
+ int alloc_type)
+{
+ gfc_add_expr_to_block (b,
+ build_call_expr_loc (input_location, gfor_fndecl_nca_coarray_allocate,
+ 4, gfc_build_addr_expr (pvoid_type_node, decl),
+ size, build_int_cst (integer_type_node, corank),
+ build_int_cst (integer_type_node, alloc_type)));
+
+}
+
/* Initializes the descriptor and generates a call to _gfor_allocate. Does
the work for an ALLOCATE statement. */
/*GCC ARRAYS*/
@@ -5784,6 +5924,7 @@ gfc_array_allocate (gfc_se * se, gfc_expr * expr, tree status, tree errmsg,
bool e3_has_nodescriptor)
{
tree tmp;
+ tree allocation;
tree pointer;
tree offset = NULL_TREE;
tree token = NULL_TREE;
@@ -5914,7 +6055,7 @@ gfc_array_allocate (gfc_se * se, gfc_expr * expr, tree status, tree errmsg,
expr3_elem_size, nelems, expr3, e3_arr_desc,
e3_has_nodescriptor, expr, &element_size);
- if (dimension)
+ if (dimension || (flag_coarray == GFC_FCOARRAY_NATIVE && coarray))
{
var_overflow = gfc_create_var (integer_type_node, "overflow");
gfc_add_modify (&se->pre, var_overflow, overflow);
@@ -5956,7 +6097,7 @@ gfc_array_allocate (gfc_se * se, gfc_expr * expr, tree status, tree errmsg,
pointer = gfc_conv_descriptor_data_get (se->expr);
STRIP_NOPS (pointer);
- if (allocatable)
+ if (allocatable && !(flag_coarray == GFC_FCOARRAY_NATIVE && coarray))
{
not_prev_allocated = gfc_create_var (logical_type_node,
"not_prev_allocated");
@@ -5969,8 +6110,17 @@ gfc_array_allocate (gfc_se * se, gfc_expr * expr, tree status, tree errmsg,
gfc_start_block (&elseblock);
+ if (coarray && flag_coarray == GFC_FCOARRAY_NATIVE)
+ {
+ tree elem_size
+ = size_in_bytes (gfc_get_element_type (TREE_TYPE(se->expr)));
+ int alloc_type
+ = gfc_native_coarray_get_allocation_type (expr->symtree->n.sym);
+ gfc_allocate_native_coarray (&elseblock, se->expr, elem_size,
+ ref->u.ar.as->corank, alloc_type);
+ }
/* The allocatable variant takes the old pointer as first argument. */
- if (allocatable)
+ else if (allocatable)
gfc_allocate_allocatable (&elseblock, pointer, size, token,
status, errmsg, errlen, label_finish, expr,
coref != NULL ? coref->u.ar.as->corank : 0);
@@ -5987,13 +6137,12 @@ gfc_array_allocate (gfc_se * se, gfc_expr * expr, tree status, tree errmsg,
cond = gfc_unlikely (fold_build2_loc (input_location, NE_EXPR,
logical_type_node, var_overflow, integer_zero_node),
PRED_FORTRAN_OVERFLOW);
- tmp = fold_build3_loc (input_location, COND_EXPR, void_type_node, cond,
+ allocation = fold_build3_loc (input_location, COND_EXPR, void_type_node, cond,
error, gfc_finish_block (&elseblock));
}
else
- tmp = gfc_finish_block (&elseblock);
+ allocation = gfc_finish_block (&elseblock);
- gfc_add_expr_to_block (&se->pre, tmp);
/* Update the array descriptor with the offset and the span. */
if (dimension)
@@ -6004,6 +6153,7 @@ gfc_array_allocate (gfc_se * se, gfc_expr * expr, tree status, tree errmsg,
}
set_descriptor = gfc_finish_block (&set_descriptor_block);
+
if (status != NULL_TREE)
{
cond = fold_build2_loc (input_location, EQ_EXPR,
@@ -6014,14 +6164,25 @@ gfc_array_allocate (gfc_se * se, gfc_expr * expr, tree status, tree errmsg,
cond = fold_build2_loc (input_location, TRUTH_OR_EXPR,
logical_type_node, cond, not_prev_allocated);
- gfc_add_expr_to_block (&se->pre,
- fold_build3_loc (input_location, COND_EXPR, void_type_node,
+ set_descriptor = fold_build3_loc (input_location, COND_EXPR, void_type_node,
cond,
set_descriptor,
- build_empty_stmt (input_location)));
+ build_empty_stmt (input_location));
+ }
+
+ // For native coarrays, the size must be set before the allocation routine
+ // can be called.
+ if (coarray && flag_coarray == GFC_FCOARRAY_NATIVE)
+ {
+ gfc_add_expr_to_block (&se->pre, set_descriptor);
+ gfc_add_expr_to_block (&se->pre, allocation);
}
else
+ {
+ gfc_add_expr_to_block (&se->pre, allocation);
gfc_add_expr_to_block (&se->pre, set_descriptor);
+ }
+
return true;
}
@@ -6524,6 +6685,7 @@ gfc_trans_dummy_array_bias (gfc_symbol * sym, tree tmpdesc,
bool optional_arg;
gfc_array_spec *as;
bool is_classarray = IS_CLASS_ARRAY (sym);
+ int eff_dimen;
/* Do nothing for pointer and allocatable arrays. */
if ((sym->ts.type != BT_CLASS && sym->attr.pointer)
@@ -6638,8 +6800,13 @@ gfc_trans_dummy_array_bias (gfc_symbol * sym, tree tmpdesc,
offset = gfc_index_zero_node;
size = gfc_index_one_node;
+ if (flag_coarray == GFC_FCOARRAY_NATIVE)
+ eff_dimen = as->rank + as->corank;
+ else
+ eff_dimen = as->rank;
+
/* Evaluate the bounds of the array. */
- for (n = 0; n < as->rank; n++)
+ for (n = 0; n < eff_dimen; n++)
{
if (checkparm || !as->upper[n])
{
@@ -6724,7 +6891,7 @@ gfc_trans_dummy_array_bias (gfc_symbol * sym, tree tmpdesc,
gfc_array_index_type, offset, tmp);
/* The size of this dimension, and the stride of the next. */
- if (n + 1 < as->rank)
+ if (n + 1 < eff_dimen)
{
stride = GFC_TYPE_ARRAY_STRIDE (type, n + 1);
@@ -6879,20 +7046,35 @@ gfc_get_dataptr_offset (stmtblock_t *block, tree parm, tree desc, tree offset,
return;
}
+ /* if it's a coarray with implicit this_image, add that to the offset. */
+ ref = expr->ref;
+ if (flag_coarray == GFC_FCOARRAY_NATIVE && ref && ref->type == REF_ARRAY
+ && ref->u.ar.dimen_type[ref->u.ar.dimen + ref->u.ar.codimen - 1]
+ == DIMEN_THIS_IMAGE
+ && !ref->u.ar.native_coarray_argument)
+ offset = gfc_native_coarray_add_this_image_offset (offset, desc,
+ &ref->u.ar, 0, 1);
+
tmp = build_array_ref (desc, offset, NULL, NULL);
/* Offset the data pointer for pointer assignments from arrays with
subreferences; e.g. my_integer => my_type(:)%integer_component. */
if (subref)
{
- /* Go past the array reference. */
+ /* Go past the array reference. */
for (ref = expr->ref; ref; ref = ref->next)
- if (ref->type == REF_ARRAY &&
- ref->u.ar.type != AR_ELEMENT)
- {
- ref = ref->next;
- break;
- }
+ {
+ if (ref->type == REF_ARRAY &&
+ ref->u.ar.type != AR_ELEMENT)
+ {
+ ref = ref->next;
+ break;
+ }
+ else if (flag_coarray == GFC_FCOARRAY_NATIVE && ref->type == REF_ARRAY &&
+ ref->u.ar.dimen_type[ref->u.ar.dimen +ref->u.ar.codimen -1]
+ == DIMEN_THIS_IMAGE)
+ tmp = gfc_native_coarray_add_this_image_offset (tmp, desc, &ref->u.ar, 0, 1);
+ }
/* Calculate the offset for each subsequent subreference. */
for (; ref; ref = ref->next)
@@ -6955,7 +7137,10 @@ gfc_get_dataptr_offset (stmtblock_t *block, tree parm, tree desc, tree offset,
gfc_array_index_type, stride, itmp);
stride = gfc_evaluate_now (stride, block);
}
-
+ if (flag_coarray == GFC_FCOARRAY_NATIVE &&
+ ref->u.ar.dimen_type[ref->u.ar.dimen +ref->u.ar.codimen -1]
+ == DIMEN_THIS_IMAGE)
+ tmp = gfc_native_coarray_add_this_image_offset (tmp, desc, &ref->u.ar, 0, 1);
/* Apply the index to obtain the array element. */
tmp = gfc_build_array_ref (tmp, index, NULL);
break;
@@ -7306,6 +7491,13 @@ gfc_conv_expr_descriptor (gfc_se *se, gfc_expr *expr)
else
full = gfc_full_array_ref_p (info->ref, NULL);
+ if (flag_coarray == GFC_FCOARRAY_NATIVE &&
+ info->ref->type == REF_ARRAY &&
+ info->ref->u.ar.dimen_type[info->ref->u.ar.dimen
+ + info->ref->u.ar.codimen - 1] ==
+ DIMEN_THIS_IMAGE)
+ full = 0;
+
if (full && !transposed_dims (ss))
{
if (se->direct_byref && !se->byref_noassign)
@@ -7540,9 +7732,19 @@ gfc_conv_expr_descriptor (gfc_se *se, gfc_expr *expr)
tree to;
tree base;
tree offset;
-
+#if 0 /* TK */
ndim = info->ref ? info->ref->u.ar.dimen : ss->dimen;
-
+#else
+ if (info->ref)
+ {
+ if (info->ref->u.ar.native_coarray_argument)
+ ndim = info->ref->u.ar.dimen + info->ref->u.ar.codimen;
+ else
+ ndim = info->ref->u.ar.dimen;
+ }
+ else
+ ndim = ss->dimen;
+#endif
if (se->want_coarray)
{
gfc_array_ref *ar = &info->ref->u.ar;
@@ -7911,7 +8113,15 @@ gfc_conv_array_parameter (gfc_se * se, gfc_expr * expr, bool g77,
expr->ts.u.cl->backend_decl = tmp;
se->string_length = tmp;
}
-
+#if 0
+ if (flag_coarray == GFC_FCOARRAY_NATIVE && fsym && fsym->attr.codimension && sym)
+ {
+ gfc_init_se (se, NULL);
+ tmp = gfc_get_symbol_decl (sym);
+ se->expr = gfc_build_addr_expr (NULL_TREE, tmp);
+ return;
+ }
+#endif
/* Is this the result of the enclosing procedure? */
this_array_result = (full_array_var && sym->attr.flavor == FL_PROCEDURE);
if (this_array_result
@@ -7919,6 +8129,10 @@ gfc_conv_array_parameter (gfc_se * se, gfc_expr * expr, bool g77,
&& (sym->backend_decl != parent))
this_array_result = false;
+#if 1 /* TK */
+ if (flag_coarray == GFC_FCOARRAY_NATIVE && fsym && fsym->attr.codimension)
+ g77 = false;
+#endif
/* Passing address of the array if it is not pointer or assumed-shape. */
if (full_array_var && g77 && !this_array_result
&& sym->ts.type != BT_DERIVED && sym->ts.type != BT_CLASS)
@@ -8053,8 +8267,8 @@ gfc_conv_array_parameter (gfc_se * se, gfc_expr * expr, bool g77,
{
/* Every other type of array. */
se->want_pointer = 1;
- gfc_conv_expr_descriptor (se, expr);
+ gfc_conv_expr_descriptor (se, expr);
if (size)
array_parameter_size (build_fold_indirect_ref_loc (input_location,
se->expr),
@@ -10869,9 +11083,15 @@ gfc_walk_array_ref (gfc_ss * ss, gfc_expr * expr, gfc_ref * ref)
case AR_SECTION:
newss = gfc_get_array_ss (ss, expr, 0, GFC_SS_SECTION);
newss->info->data.array.ref = ref;
-
+#if 1 /* TK */
+ int eff_dimen;
+ if (ar->native_coarray_argument)
+ eff_dimen = ar->dimen + ar->codimen;
+ else
+ eff_dimen = ar->dimen;
+#endif
/* We add SS chains for all the subscripts in the section. */
- for (n = 0; n < ar->dimen; n++)
+ for (n = 0; n < eff_dimen; n++)
{
gfc_ss *indexss;
diff --git a/gcc/fortran/trans-array.h b/gcc/fortran/trans-array.h
index e561605..0bfd1b0 100644
--- a/gcc/fortran/trans-array.h
+++ b/gcc/fortran/trans-array.h
@@ -23,6 +23,15 @@ along with GCC; see the file COPYING3. If not see
bool gfc_array_allocate (gfc_se *, gfc_expr *, tree, tree, tree, tree,
tree, tree *, gfc_expr *, tree, bool);
+enum gfc_coarray_allocation_type {
+ GFC_NCA_NORMAL_COARRAY = 3,
+ GFC_NCA_LOCK_COARRAY,
+ GFC_NCA_EVENT_COARRAY
+};
+int gfc_native_coarray_get_allocation_type (gfc_symbol *);
+
+void gfc_allocate_native_coarray (stmtblock_t *, tree, tree, int, int);
+
/* Allow the bounds of a loop to be set from a callee's array spec. */
void gfc_set_loop_bounds_from_array_spec (gfc_interface_mapping *,
gfc_se *, gfc_array_spec *);
@@ -57,6 +66,10 @@ tree gfc_bcast_alloc_comp (gfc_symbol *, gfc_expr *, int, tree,
tree gfc_deallocate_alloc_comp_no_caf (gfc_symbol *, tree, int);
tree gfc_reassign_alloc_comp_caf (gfc_symbol *, tree, tree);
+tree gfc_array_init_size (tree, int, int, tree *, gfc_expr **, gfc_expr **,
+ stmtblock_t *, stmtblock_t *, tree *, tree, tree *,
+ gfc_expr *, tree, bool, gfc_expr *, tree *);
+
tree gfc_copy_alloc_comp (gfc_symbol *, tree, tree, int, int);
tree gfc_copy_only_alloc_comp (gfc_symbol *, tree, tree, int);
diff --git a/gcc/fortran/trans-decl.c b/gcc/fortran/trans-decl.c
index 9224277..1973c4d 100644
--- a/gcc/fortran/trans-decl.c
+++ b/gcc/fortran/trans-decl.c
@@ -170,6 +170,21 @@ tree gfor_fndecl_co_reduce;
tree gfor_fndecl_co_sum;
tree gfor_fndecl_caf_is_present;
+/* Native coarray functions. */
+
+tree gfor_fndecl_nca_master;
+tree gfor_fndecl_nca_coarray_allocate;
+tree gfor_fndecl_nca_coarray_free;
+tree gfor_fndecl_nca_this_image;
+tree gfor_fndecl_nca_num_images;
+tree gfor_fndecl_nca_sync_all;
+tree gfor_fndecl_nca_sync_images;
+tree gfor_fndecl_nca_lock;
+tree gfor_fndecl_nca_unlock;
+tree gfor_fndecl_nca_reduce_scalar;
+tree gfor_fndecl_nca_reduce_array;
+tree gfor_fndecl_nca_broadcast_scalar;
+tree gfor_fndecl_nca_broadcast_array;
/* Math functions. Many other math functions are handled in
trans-intrinsic.c. */
@@ -961,6 +976,7 @@ gfc_build_qualified_array (tree decl, gfc_symbol * sym)
tree type;
int dim;
int nest;
+ int eff_dimen;
gfc_namespace* procns;
symbol_attribute *array_attr;
gfc_array_spec *as;
@@ -1031,8 +1047,12 @@ gfc_build_qualified_array (tree decl, gfc_symbol * sym)
else
gfc_add_decl_to_function (token);
}
+
+ eff_dimen = flag_coarray == GFC_FCOARRAY_NATIVE
+ ? GFC_TYPE_ARRAY_RANK (type) + GFC_TYPE_ARRAY_CORANK (type)
+ : GFC_TYPE_ARRAY_RANK (type);
- for (dim = 0; dim < GFC_TYPE_ARRAY_RANK (type); dim++)
+ for (dim = 0; dim < eff_dimen; dim++)
{
if (GFC_TYPE_ARRAY_LBOUND (type, dim) == NULL_TREE)
{
@@ -1054,22 +1074,30 @@ gfc_build_qualified_array (tree decl, gfc_symbol * sym)
TREE_NO_WARNING (GFC_TYPE_ARRAY_STRIDE (type, dim)) = 1;
}
}
- for (dim = GFC_TYPE_ARRAY_RANK (type);
- dim < GFC_TYPE_ARRAY_RANK (type) + GFC_TYPE_ARRAY_CORANK (type); dim++)
- {
- if (GFC_TYPE_ARRAY_LBOUND (type, dim) == NULL_TREE)
- {
- GFC_TYPE_ARRAY_LBOUND (type, dim) = create_index_var ("lbound", nest);
- TREE_NO_WARNING (GFC_TYPE_ARRAY_LBOUND (type, dim)) = 1;
- }
- /* Don't try to use the unknown ubound for the last coarray dimension. */
- if (GFC_TYPE_ARRAY_UBOUND (type, dim) == NULL_TREE
- && dim < GFC_TYPE_ARRAY_RANK (type) + GFC_TYPE_ARRAY_CORANK (type) - 1)
- {
- GFC_TYPE_ARRAY_UBOUND (type, dim) = create_index_var ("ubound", nest);
- TREE_NO_WARNING (GFC_TYPE_ARRAY_UBOUND (type, dim)) = 1;
- }
- }
+
+ if (flag_coarray != GFC_FCOARRAY_NATIVE)
+ for (dim = GFC_TYPE_ARRAY_RANK (type);
+ dim < GFC_TYPE_ARRAY_RANK (type) + GFC_TYPE_ARRAY_CORANK (type);
+ dim++)
+ {
+ if (GFC_TYPE_ARRAY_LBOUND (type, dim) == NULL_TREE)
+ {
+ GFC_TYPE_ARRAY_LBOUND (type, dim)
+ = create_index_var ("lbound", nest);
+ TREE_NO_WARNING (GFC_TYPE_ARRAY_LBOUND (type, dim)) = 1;
+ }
+ /* Don't try to use the unknown ubound for the last coarray
+ dimension. */
+ if (GFC_TYPE_ARRAY_UBOUND (type, dim) == NULL_TREE
+ && dim < GFC_TYPE_ARRAY_RANK (type)
+ + GFC_TYPE_ARRAY_CORANK (type) - 1)
+ {
+ GFC_TYPE_ARRAY_UBOUND (type, dim)
+ = create_index_var ("ubound", nest);
+ TREE_NO_WARNING (GFC_TYPE_ARRAY_UBOUND (type, dim)) = 1;
+ }
+ }
+
if (GFC_TYPE_ARRAY_OFFSET (type) == NULL_TREE)
{
GFC_TYPE_ARRAY_OFFSET (type) = gfc_create_var_np (gfc_array_index_type,
@@ -1202,6 +1230,10 @@ gfc_build_dummy_array_decl (gfc_symbol * sym, tree dummy)
|| (as && as->type == AS_ASSUMED_RANK))
return dummy;
+ if (flag_coarray == GFC_FCOARRAY_NATIVE && sym->attr.codimension
+ && sym->attr.allocatable)
+ return dummy;
+
/* Add to list of variables if not a fake result variable.
These symbols are set on the symbol only, not on the class component. */
if (sym->attr.result || sym->attr.dummy)
@@ -1504,7 +1536,6 @@ add_attributes_to_decl (symbol_attribute sym_attr, tree list)
static void build_function_decl (gfc_symbol * sym, bool global);
-
/* Return the decl for a gfc_symbol, create it if it doesn't already
exist. */
@@ -1820,7 +1851,7 @@ gfc_get_symbol_decl (gfc_symbol * sym)
}
/* Remember this variable for allocation/cleanup. */
- if (sym->attr.dimension || sym->attr.allocatable || sym->attr.codimension
+ if (sym->attr.dimension || sym->attr.codimension || sym->attr.allocatable
|| (sym->ts.type == BT_CLASS &&
(CLASS_DATA (sym)->attr.dimension
|| CLASS_DATA (sym)->attr.allocatable))
@@ -1869,6 +1900,9 @@ gfc_get_symbol_decl (gfc_symbol * sym)
gcc_assert (!sym->value || sym->value->expr_type == EXPR_NULL);
}
+ if (flag_coarray == GFC_FCOARRAY_NATIVE && sym->attr.codimension)
+ TREE_STATIC(decl) = 1;
+
gfc_finish_var_decl (decl, sym);
if (sym->ts.type == BT_CHARACTER)
@@ -3693,6 +3727,7 @@ void
gfc_build_builtin_function_decls (void)
{
tree gfc_int8_type_node = gfc_get_int_type (8);
+ tree pint_type = build_pointer_type (integer_type_node);
gfor_fndecl_stop_numeric = gfc_build_library_function_decl (
get_identifier (PREFIX("stop_numeric")),
@@ -3820,9 +3855,8 @@ gfc_build_builtin_function_decls (void)
/* Coarray library calls. */
if (flag_coarray == GFC_FCOARRAY_LIB)
{
- tree pint_type, pppchar_type;
+ tree pppchar_type;
- pint_type = build_pointer_type (integer_type_node);
pppchar_type
= build_pointer_type (build_pointer_type (pchar_type_node));
@@ -4062,6 +4096,64 @@ gfc_build_builtin_function_decls (void)
integer_type_node, 3, pvoid_type_node, integer_type_node,
pvoid_type_node);
}
+ else if (flag_coarray == GFC_FCOARRAY_NATIVE)
+ {
+ gfor_fndecl_nca_master = gfc_build_library_function_decl_with_spec (
+ get_identifier (PREFIX("nca_master")), ".r", integer_type_node, 1,
+ build_pointer_type (build_function_type_list (void_type_node, NULL_TREE)));
+ gfor_fndecl_nca_coarray_allocate = gfc_build_library_function_decl_with_spec (
+ get_identifier (PREFIX("nca_coarray_alloc")), "..RRR", integer_type_node, 4,
+ pvoid_type_node, integer_type_node, integer_type_node, integer_type_node,
+ NULL_TREE);
+ gfor_fndecl_nca_coarray_free = gfc_build_library_function_decl_with_spec (
+ get_identifier (PREFIX("nca_coarray_free")), "..RR", integer_type_node, 3,
+ pvoid_type_node, integer_type_node, integer_type_node, NULL_TREE);
+ gfor_fndecl_nca_this_image = gfc_build_library_function_decl_with_spec (
+ get_identifier (PREFIX("nca_coarray_this_image")), ".X", integer_type_node, 1,
+ integer_type_node, NULL_TREE);
+ DECL_PURE_P (gfor_fndecl_nca_this_image) = 1;
+ gfor_fndecl_nca_num_images = gfc_build_library_function_decl_with_spec (
+ get_identifier (PREFIX("nca_coarray_num_images")), ".X", integer_type_node, 1,
+ integer_type_node, NULL_TREE);
+ DECL_PURE_P (gfor_fndecl_nca_num_images) = 1;
+ gfor_fndecl_nca_sync_all = gfc_build_library_function_decl_with_spec (
+ get_identifier (PREFIX("nca_coarray_sync_all")), ".X", void_type_node, 1,
+ build_pointer_type (integer_type_node), NULL_TREE);
+ gfor_fndecl_nca_sync_images = gfc_build_library_function_decl_with_spec (
+ get_identifier (PREFIX("nca_sync_images")), ".RRXXX", void_type_node,
+ 5, integer_type_node, pint_type, pint_type,
+ pchar_type_node, size_type_node, NULL_TREE);
+ gfor_fndecl_nca_lock = gfc_build_library_function_decl_with_spec (
+ get_identifier (PREFIX("nca_lock")), ".w", void_type_node, 1,
+ pvoid_type_node, NULL_TREE);
+ gfor_fndecl_nca_unlock = gfc_build_library_function_decl_with_spec (
+ get_identifier (PREFIX("nca_unlock")), ".w", void_type_node, 1,
+ pvoid_type_node, NULL_TREE);
+
+ gfor_fndecl_nca_reduce_scalar =
+ gfc_build_library_function_decl_with_spec (
+ get_identifier (PREFIX("nca_collsub_reduce_scalar")), ".wrW",
+ void_type_node, 3, pvoid_type_node,
+ build_pointer_type (build_function_type_list (void_type_node,
+ pvoid_type_node, pvoid_type_node, NULL_TREE)),
+ pint_type, NULL_TREE);
+
+ gfor_fndecl_nca_reduce_array =
+ gfc_build_library_function_decl_with_spec (
+ get_identifier (PREFIX("nca_collsub_reduce_array")), ".wrWR",
+ void_type_node, 4, pvoid_type_node,
+ build_pointer_type (build_function_type_list (void_type_node,
+ pvoid_type_node, pvoid_type_node, NULL_TREE)),
+ pint_type, integer_type_node, NULL_TREE);
+
+ gfor_fndecl_nca_broadcast_scalar = gfc_build_library_function_decl_with_spec (
+ get_identifier (PREFIX ("nca_collsub_broadcast_scalar")), ".w..",
+ void_type_node, 3, pvoid_type_node, size_type_node, integer_type_node);
+ gfor_fndecl_nca_broadcast_array = gfc_build_library_function_decl_with_spec (
+ get_identifier (PREFIX ("nca_collsub_broadcast_array")), ".W.",
+ void_type_node, 2, pvoid_type_node, integer_type_node);
+ }
+
gfc_build_intrinsic_function_decls ();
gfc_build_intrinsic_lib_fndecls ();
@@ -4538,6 +4630,76 @@ get_proc_result (gfc_symbol* sym)
}
+void
+gfc_trans_native_coarray (stmtblock_t * init, stmtblock_t *cleanup, gfc_symbol * sym)
+{
+ tree tmp, decl;
+ tree overflow = build_int_cst (integer_type_node, 0), nelems, element_size; //All unused
+ tree offset;
+ tree elem_size;
+ int alloc_type;
+
+ decl = sym->backend_decl;
+
+ TREE_STATIC(decl) = 1;
+
+ /* Tell the library to handle arrays of locks and event types seperatly. */
+ alloc_type = gfc_native_coarray_get_allocation_type (sym);
+
+ if (init)
+ {
+ gfc_array_init_size (decl, sym->as->rank, sym->as->corank, &offset,
+ sym->as->lower, sym->as->upper, init,
+ init, &overflow,
+ NULL_TREE, &nelems, NULL,
+ NULL_TREE, true, NULL, &element_size);
+ gfc_conv_descriptor_offset_set (init, decl, offset);
+ elem_size = size_in_bytes (gfc_get_element_type (TREE_TYPE(decl)));
+ gfc_allocate_native_coarray (init, decl, elem_size, sym->as->corank,
+ alloc_type);
+ }
+
+ if (cleanup)
+ {
+ tmp = build_call_expr_loc (input_location, gfor_fndecl_nca_coarray_free,
+ 2, gfc_build_addr_expr (pvoid_type_node, decl),
+ build_int_cst (integer_type_node, alloc_type),
+ build_int_cst (integer_type_node,
+ sym->as->corank));
+ gfc_add_expr_to_block (cleanup, tmp);
+ }
+}
+
+static void
+finish_coarray_constructor_function (tree *, tree *);
+
+static void
+generate_coarray_constructor_function (tree *, tree *);
+
+static void
+gfc_trans_native_coarray_static (gfc_symbol * sym)
+{
+ tree save_fn_decl, fndecl;
+ generate_coarray_constructor_function (&save_fn_decl, &fndecl);
+ gfc_trans_native_coarray (&caf_init_block, NULL, sym);
+ finish_coarray_constructor_function (&save_fn_decl, &fndecl);
+}
+
+static void
+gfc_trans_native_coarray_inline (gfc_wrapped_block * block, gfc_symbol * sym)
+{
+ stmtblock_t init, cleanup;
+
+ gfc_init_block (&init);
+ gfc_init_block (&cleanup);
+
+ gfc_trans_native_coarray (&init, &cleanup, sym);
+
+ gfc_add_init_cleanup (block, gfc_finish_block (&init), gfc_finish_block (&cleanup));
+}
+
+
+
/* Generate function entry and exit code, and add it to the function body.
This includes:
Allocation and initialization of array variables.
@@ -4833,7 +4995,8 @@ gfc_trans_deferred_vars (gfc_symbol * proc_sym, gfc_wrapped_block * block)
gfc_trans_deferred_array (sym, block);
}
}
- else if (sym->attr.codimension
+ else if (flag_coarray != GFC_FCOARRAY_NATIVE
+ && sym->attr.codimension
&& TREE_STATIC (sym->backend_decl))
{
gfc_init_block (&tmpblock);
@@ -4843,6 +5006,11 @@ gfc_trans_deferred_vars (gfc_symbol * proc_sym, gfc_wrapped_block * block)
NULL_TREE);
continue;
}
+ else if (flag_coarray == GFC_FCOARRAY_NATIVE
+ && sym->attr.codimension)
+ {
+ gfc_trans_native_coarray_inline (block, sym);
+ }
else
{
gfc_save_backend_locus (&loc);
@@ -5333,6 +5501,10 @@ gfc_create_module_variable (gfc_symbol * sym)
&& sym->fn_result_spec));
DECL_CONTEXT (decl) = sym->ns->proc_name->backend_decl;
rest_of_decl_compilation (decl, 1, 0);
+
+ if (flag_coarray == GFC_FCOARRAY_NATIVE && sym->attr.codimension)
+ gfc_trans_native_coarray_static (sym);
+
gfc_module_add_decl (cur_module, decl);
/* Also add length of strings. */
@@ -5730,64 +5902,82 @@ generate_coarray_sym_init (gfc_symbol *sym)
}
-/* Generate constructor function to initialize static, nonallocatable
- coarrays. */
static void
-generate_coarray_init (gfc_namespace * ns __attribute((unused)))
+generate_coarray_constructor_function (tree *save_fn_decl, tree *fndecl)
{
- tree fndecl, tmp, decl, save_fn_decl;
+ tree tmp, decl;
- save_fn_decl = current_function_decl;
+ *save_fn_decl = current_function_decl;
push_function_context ();
tmp = build_function_type_list (void_type_node, NULL_TREE);
- fndecl = build_decl (input_location, FUNCTION_DECL,
- create_tmp_var_name ("_caf_init"), tmp);
+ *fndecl = build_decl (input_location, FUNCTION_DECL,
+ create_tmp_var_name (flag_coarray == GFC_FCOARRAY_LIB ? "_caf_init" : "_nca_init"), tmp);
- DECL_STATIC_CONSTRUCTOR (fndecl) = 1;
- SET_DECL_INIT_PRIORITY (fndecl, DEFAULT_INIT_PRIORITY);
+ DECL_STATIC_CONSTRUCTOR (*fndecl) = 1;
+ SET_DECL_INIT_PRIORITY (*fndecl, DEFAULT_INIT_PRIORITY);
decl = build_decl (input_location, RESULT_DECL, NULL_TREE, void_type_node);
DECL_ARTIFICIAL (decl) = 1;
DECL_IGNORED_P (decl) = 1;
- DECL_CONTEXT (decl) = fndecl;
- DECL_RESULT (fndecl) = decl;
+ DECL_CONTEXT (decl) = *fndecl;
+ DECL_RESULT (*fndecl) = decl;
- pushdecl (fndecl);
- current_function_decl = fndecl;
- announce_function (fndecl);
+ pushdecl (*fndecl);
+ current_function_decl = *fndecl;
+ announce_function (*fndecl);
- rest_of_decl_compilation (fndecl, 0, 0);
- make_decl_rtl (fndecl);
- allocate_struct_function (fndecl, false);
+ rest_of_decl_compilation (*fndecl, 0, 0);
+ make_decl_rtl (*fndecl);
+ allocate_struct_function (*fndecl, false);
pushlevel ();
gfc_init_block (&caf_init_block);
+}
- gfc_traverse_ns (ns, generate_coarray_sym_init);
+static void
+finish_coarray_constructor_function (tree *save_fn_decl, tree *fndecl)
+{
+ tree decl;
- DECL_SAVED_TREE (fndecl) = gfc_finish_block (&caf_init_block);
+ DECL_SAVED_TREE (*fndecl) = gfc_finish_block (&caf_init_block);
decl = getdecls ();
poplevel (1, 1);
- BLOCK_SUPERCONTEXT (DECL_INITIAL (fndecl)) = fndecl;
+ BLOCK_SUPERCONTEXT (DECL_INITIAL (*fndecl)) = *fndecl;
- DECL_SAVED_TREE (fndecl)
- = build3_v (BIND_EXPR, decl, DECL_SAVED_TREE (fndecl),
- DECL_INITIAL (fndecl));
- dump_function (TDI_original, fndecl);
+ DECL_SAVED_TREE (*fndecl)
+ = build3_v (BIND_EXPR, decl, DECL_SAVED_TREE (*fndecl),
+ DECL_INITIAL (*fndecl));
+ dump_function (TDI_original, *fndecl);
cfun->function_end_locus = input_location;
set_cfun (NULL);
- if (decl_function_context (fndecl))
- (void) cgraph_node::create (fndecl);
+ if (decl_function_context (*fndecl))
+ (void) cgraph_node::create (*fndecl);
else
- cgraph_node::finalize_function (fndecl, true);
+ cgraph_node::finalize_function (*fndecl, true);
pop_function_context ();
- current_function_decl = save_fn_decl;
+ current_function_decl = *save_fn_decl;
+}
+
+/* Generate constructor function to initialize static, nonallocatable
+ coarrays. */
+
+static void
+generate_coarray_init (gfc_namespace * ns)
+{
+ tree save_fn_decl, fndecl;
+
+ generate_coarray_constructor_function (&save_fn_decl, &fndecl);
+
+ gfc_traverse_ns (ns, generate_coarray_sym_init);
+
+ finish_coarray_constructor_function (&save_fn_decl, &fndecl);
+
}
@@ -6470,7 +6660,11 @@ create_main_function (tree fndecl)
}
/* Call MAIN__(). */
- tmp = build_call_expr_loc (input_location,
+ if (flag_coarray == GFC_FCOARRAY_NATIVE)
+ tmp = build_call_expr_loc (input_location, gfor_fndecl_nca_master, 1,
+ gfc_build_addr_expr (NULL, fndecl));
+ else
+ tmp = build_call_expr_loc (input_location,
fndecl, 0);
gfc_add_expr_to_block (&body, tmp);
diff --git a/gcc/fortran/trans-expr.c b/gcc/fortran/trans-expr.c
index 36ff9b5..9979980 100644
--- a/gcc/fortran/trans-expr.c
+++ b/gcc/fortran/trans-expr.c
@@ -2622,8 +2622,14 @@ gfc_maybe_dereference_var (gfc_symbol *sym, tree var, bool descriptor_only_p,
}
else if (!sym->attr.value)
{
+
+ /* Do not derefernce native coarray dummies. */
+ if (false && flag_coarray == GFC_FCOARRAY_NATIVE
+ && sym->attr.codimension && sym->attr.dummy)
+ return var;
+
/* Dereference temporaries for class array dummy arguments. */
- if (sym->attr.dummy && is_classarray
+ else if (sym->attr.dummy && is_classarray
&& GFC_ARRAY_TYPE_P (TREE_TYPE (var)))
{
if (!descriptor_only_p)
@@ -2635,6 +2641,7 @@ gfc_maybe_dereference_var (gfc_symbol *sym, tree var, bool descriptor_only_p,
/* Dereference non-character scalar dummy arguments. */
if (sym->attr.dummy && !sym->attr.dimension
&& !(sym->attr.codimension && sym->attr.allocatable)
+ && !(sym->attr.codimension && flag_coarray == GFC_FCOARRAY_NATIVE)
&& (sym->ts.type != BT_CLASS
|| (!CLASS_DATA (sym)->attr.dimension
&& !(CLASS_DATA (sym)->attr.codimension
@@ -2670,6 +2677,7 @@ gfc_maybe_dereference_var (gfc_symbol *sym, tree var, bool descriptor_only_p,
|| CLASS_DATA (sym)->attr.allocatable
|| CLASS_DATA (sym)->attr.class_pointer))
var = build_fold_indirect_ref_loc (input_location, var);
+
/* And the case where a non-dummy, non-result, non-function,
non-allotable and non-pointer classarray is present. This case was
previously covered by the first if, but with introducing the
@@ -5528,7 +5536,10 @@ gfc_conv_procedure_call (gfc_se * se, gfc_symbol * sym,
nodesc_arg = nodesc_arg || !comp->attr.always_explicit;
else
nodesc_arg = nodesc_arg || !sym->attr.always_explicit;
-
+#if 0
+ if (flag_coarray == GFC_FCOARRAY_NATIVE && fsym->attr.codimension)
+ nodesc_arg = false;
+#endif
/* Class array expressions are sometimes coming completely unadorned
with either arrayspec or _data component. Correct that here.
OOP-TODO: Move this to the frontend. */
@@ -5720,7 +5731,10 @@ gfc_conv_procedure_call (gfc_se * se, gfc_symbol * sym,
parmse.want_coarray = 1;
scalar = false;
}
-
+#if 0
+ if (flag_coarray == GFC_FCOARRAY_NATIVE && fsym->attr.codimension)
+ scalar = false;
+#endif
/* A scalar or transformational function. */
if (scalar)
{
@@ -6233,7 +6247,7 @@ gfc_conv_procedure_call (gfc_se * se, gfc_symbol * sym,
else
gfc_conv_array_parameter (&parmse, e, nodesc_arg, fsym,
sym->name, NULL);
-
+
/* Unallocated allocatable arrays and unassociated pointer arrays
need their dtype setting if they are argument associated with
assumed rank dummies. */
diff --git a/gcc/fortran/trans-intrinsic.c b/gcc/fortran/trans-intrinsic.c
index 32fe988..b418321 100644
--- a/gcc/fortran/trans-intrinsic.c
+++ b/gcc/fortran/trans-intrinsic.c
@@ -41,6 +41,7 @@ along with GCC; see the file COPYING3. If not see
#include "trans-array.h"
#include "dependency.h" /* For CAF array alias analysis. */
/* Only for gfc_trans_assign and gfc_trans_pointer_assign. */
+#include "trans-stmt.h"
/* This maps Fortran intrinsic math functions to external library or GCC
builtin functions. */
@@ -2363,7 +2364,6 @@ conv_caf_send (gfc_code *code) {
return gfc_finish_block (&block);
}
-
static void
trans_this_image (gfc_se * se, gfc_expr *expr)
{
@@ -2394,14 +2394,18 @@ trans_this_image (gfc_se * se, gfc_expr *expr)
}
else
tmp = integer_zero_node;
- tmp = build_call_expr_loc (input_location, gfor_fndecl_caf_this_image, 1,
- tmp);
+ tmp = build_call_expr_loc (input_location,
+ flag_coarray == GFC_FCOARRAY_NATIVE ?
+ gfor_fndecl_nca_this_image :
+ gfor_fndecl_caf_this_image,
+ 1, tmp);
se->expr = fold_convert (gfc_get_int_type (gfc_default_integer_kind),
tmp);
return;
}
/* Coarray-argument version: THIS_IMAGE(coarray [, dim]). */
+ /* TODO: NCA handle native coarrays. */
type = gfc_get_int_type (gfc_default_integer_kind);
corank = gfc_get_corank (expr->value.function.actual->expr);
@@ -2490,8 +2494,11 @@ trans_this_image (gfc_se * se, gfc_expr *expr)
*/
/* this_image () - 1. */
- tmp = build_call_expr_loc (input_location, gfor_fndecl_caf_this_image, 1,
- integer_zero_node);
+ tmp = build_call_expr_loc (input_location,
+ flag_coarray == GFC_FCOARRAY_NATIVE
+ ? gfor_fndecl_nca_this_image
+ : gfor_fndecl_caf_this_image,
+ 1, integer_zero_node);
tmp = fold_build2_loc (input_location, MINUS_EXPR, type,
fold_convert (type, tmp), build_int_cst (type, 1));
if (corank == 1)
@@ -2774,7 +2781,10 @@ trans_image_index (gfc_se * se, gfc_expr *expr)
num_images = build_int_cst (type, 1);
else
{
- tmp = build_call_expr_loc (input_location, gfor_fndecl_caf_num_images, 2,
+ tmp = build_call_expr_loc (input_location,
+ flag_coarray == GFC_FCOARRAY_NATIVE
+ ? gfor_fndecl_nca_num_images
+ : gfor_fndecl_caf_num_images, 2,
integer_zero_node,
build_int_cst (integer_type_node, -1));
num_images = fold_convert (type, tmp);
@@ -2819,8 +2829,13 @@ trans_num_images (gfc_se * se, gfc_expr *expr)
}
else
failed = build_int_cst (integer_type_node, -1);
- tmp = build_call_expr_loc (input_location, gfor_fndecl_caf_num_images, 2,
- distance, failed);
+
+ if (flag_coarray == GFC_FCOARRAY_NATIVE)
+ tmp = build_call_expr_loc (input_location, gfor_fndecl_nca_num_images, 1,
+ distance);
+ else
+ tmp = build_call_expr_loc (input_location, gfor_fndecl_caf_num_images, 2,
+ distance, failed);
se->expr = fold_convert (gfc_get_int_type (gfc_default_integer_kind), tmp);
}
@@ -3264,7 +3279,10 @@ conv_intrinsic_cobound (gfc_se * se, gfc_expr * expr)
tree cosize;
cosize = gfc_conv_descriptor_cosize (desc, arg->expr->rank, corank);
- tmp = build_call_expr_loc (input_location, gfor_fndecl_caf_num_images,
+ tmp = build_call_expr_loc (input_location,
+ flag_coarray == GFC_FCOARRAY_NATIVE
+ ? gfor_fndecl_nca_num_images
+ : gfor_fndecl_caf_num_images,
2, integer_zero_node,
build_int_cst (integer_type_node, -1));
tmp = fold_build2_loc (input_location, MINUS_EXPR,
@@ -3280,7 +3298,9 @@ conv_intrinsic_cobound (gfc_se * se, gfc_expr * expr)
else if (flag_coarray != GFC_FCOARRAY_SINGLE)
{
/* ubound = lbound + num_images() - 1. */
- tmp = build_call_expr_loc (input_location, gfor_fndecl_caf_num_images,
+ tmp = build_call_expr_loc (input_location,
+ flag_coarray == GFC_FCOARRAY_NATIVE ? gfor_fndecl_nca_num_images :
+ gfor_fndecl_caf_num_images,
2, integer_zero_node,
build_int_cst (integer_type_node, -1));
tmp = fold_build2_loc (input_location, MINUS_EXPR,
@@ -11004,6 +11024,136 @@ gfc_walk_intrinsic_function (gfc_ss * ss, gfc_expr * expr,
}
}
+/* Helper function - advance to the next argument. */
+
+static tree
+trans_argument (gfc_actual_arglist **curr_al, stmtblock_t *blk,
+ stmtblock_t *postblk, gfc_se *argse, tree def)
+{
+ if (!(*curr_al)->expr)
+ return def;
+ if ((*curr_al)->expr->rank > 0)
+ gfc_conv_expr_descriptor (argse, (*curr_al)->expr);
+ else
+ gfc_conv_expr (argse, (*curr_al)->expr);
+ gfc_add_block_to_block (blk, &argse->pre);
+ gfc_add_block_to_block (postblk, &argse->post);
+ *curr_al = (*curr_al)->next;
+ return argse->expr;
+}
+
+/* Convert CO_REDUCE for native coarrays. */
+
+static tree
+conv_nca_reduce (gfc_code *code, stmtblock_t *blk, stmtblock_t *postblk)
+{
+ gfc_actual_arglist *curr_al;
+ tree var, reduce_op, result_image, elem_size;
+ gfc_se argse;
+ int is_array;
+
+ curr_al = code->ext.actual;
+
+ gfc_init_se (&argse, NULL);
+ argse.want_pointer = 1;
+ is_array = curr_al->expr->rank > 0;
+ var = trans_argument (&curr_al, blk, postblk, &argse, NULL_TREE);
+
+ gfc_init_se (&argse, NULL);
+ argse.want_pointer = 1;
+ reduce_op = trans_argument (&curr_al, blk, postblk, &argse, NULL_TREE);
+
+ gfc_init_se (&argse, NULL);
+ argse.want_pointer = 1;
+ result_image = trans_argument (&curr_al, blk, postblk, &argse,
+ null_pointer_node);
+
+ if (is_array)
+ return build_call_expr_loc (input_location, gfor_fndecl_nca_reduce_array,
+ 3, var, reduce_op, result_image);
+
+ elem_size = size_in_bytes(TREE_TYPE(TREE_TYPE(var)));
+ return build_call_expr_loc (input_location, gfor_fndecl_nca_reduce_scalar, 4,
+ var, elem_size, reduce_op, result_image);
+}
+
+static tree
+conv_nca_broadcast (gfc_code *code, stmtblock_t *blk, stmtblock_t *postblk)
+{
+ gfc_actual_arglist *curr_al;
+ tree var, source_image, elem_size;
+ gfc_se argse;
+ int is_array;
+
+ curr_al = code->ext.actual;
+
+ gfc_init_se (&argse, NULL);
+ argse.want_pointer = 1;
+ is_array = curr_al->expr->rank > 0;
+ var = trans_argument (&curr_al, blk, postblk, &argse, NULL_TREE);
+
+ gfc_init_se (&argse, NULL);
+ argse.want_pointer = 0;
+ source_image = trans_argument (&curr_al, blk, postblk, &argse, NULL_TREE);
+
+ if (is_array)
+ return build_call_expr_loc (input_location, gfor_fndecl_nca_broadcast_array,
+ 2, var, source_image);
+
+ elem_size = size_in_bytes(TREE_TYPE(TREE_TYPE(var)));
+ return build_call_expr_loc (input_location, gfor_fndecl_nca_broadcast_scalar,
+ 3, var, elem_size, source_image);
+}
+
+static tree conv_co_collective (gfc_code *);
+
+/* Convert collective subroutines for native coarrays. */
+
+static tree
+conv_nca_collective (gfc_code *code)
+{
+
+ switch (code->resolved_isym->id)
+ {
+ case GFC_ISYM_CO_REDUCE:
+ {
+ stmtblock_t block, postblock;
+ tree fcall;
+
+ gfc_start_block (&block);
+ gfc_init_block (&postblock);
+ fcall = conv_nca_reduce (code, &block, &postblock);
+ gfc_add_expr_to_block (&block, fcall);
+ gfc_add_block_to_block (&block, &postblock);
+ return gfc_finish_block (&block);
+ }
+ case GFC_ISYM_CO_SUM:
+ case GFC_ISYM_CO_MIN:
+ case GFC_ISYM_CO_MAX:
+ return gfc_trans_call (code, false, NULL_TREE, NULL_TREE, false);
+
+ case GFC_ISYM_CO_BROADCAST:
+ {
+ stmtblock_t block, postblock;
+ tree fcall;
+
+ gfc_start_block (&block);
+ gfc_init_block (&postblock);
+ fcall = conv_nca_broadcast (code, &block, &postblock);
+ gfc_add_expr_to_block (&block, fcall);
+ gfc_add_block_to_block (&block, &postblock);
+ return gfc_finish_block (&block);
+ }
+#if 0
+ case GFC_ISYM_CO_BROADCAST:
+ return conv_co_collective (code);
+#endif
+ default:
+ gfc_internal_error ("Invalid or unsupported isym");
+ break;
+ }
+}
+
static tree
conv_co_collective (gfc_code *code)
{
@@ -11111,7 +11261,13 @@ conv_co_collective (gfc_code *code)
errmsg_len = build_zero_cst (size_type_node);
}
+ /* For native coarrays, we only come here for CO_BROADCAST. */
+
+ gcc_assert (code->resolved_isym->id == GFC_ISYM_CO_BROADCAST
+ || flag_coarray != GFC_FCOARRAY_NATIVE);
+
/* Generate the function call. */
+
switch (code->resolved_isym->id)
{
case GFC_ISYM_CO_BROADCAST:
@@ -12104,7 +12260,10 @@ gfc_conv_intrinsic_subroutine (gfc_code *code)
case GFC_ISYM_CO_MAX:
case GFC_ISYM_CO_REDUCE:
case GFC_ISYM_CO_SUM:
- res = conv_co_collective (code);
+ if (flag_coarray == GFC_FCOARRAY_NATIVE)
+ res = conv_nca_collective (code);
+ else
+ res = conv_co_collective (code);
break;
case GFC_ISYM_FREE:
diff --git a/gcc/fortran/trans-stmt.c b/gcc/fortran/trans-stmt.c
index 1f183b9..4897fa1 100644
--- a/gcc/fortran/trans-stmt.c
+++ b/gcc/fortran/trans-stmt.c
@@ -830,7 +830,9 @@ gfc_trans_lock_unlock (gfc_code *code, gfc_exec_op op)
/* Short cut: For single images without STAT= or LOCK_ACQUIRED
return early. (ERRMSG= is always untouched for -fcoarray=single.) */
- if (!code->expr2 && !code->expr4 && flag_coarray != GFC_FCOARRAY_LIB)
+ if (!code->expr2 && !code->expr4
+ && !(flag_coarray == GFC_FCOARRAY_LIB
+ || flag_coarray == GFC_FCOARRAY_NATIVE))
return NULL_TREE;
if (code->expr2)
@@ -990,6 +992,29 @@ gfc_trans_lock_unlock (gfc_code *code, gfc_exec_op op)
return gfc_finish_block (&se.pre);
}
+ else if (flag_coarray == GFC_FCOARRAY_NATIVE)
+ {
+ gfc_se arg;
+ stmtblock_t res;
+ tree call;
+ tree tmp;
+
+ gfc_init_se (&arg, NULL);
+ gfc_start_block (&res);
+ gfc_conv_expr (&arg, code->expr1);
+ gfc_add_block_to_block (&res, &arg.pre);
+ call = build_call_expr_loc (input_location, op == EXEC_LOCK ?
+ gfor_fndecl_nca_lock
+ : gfor_fndecl_nca_unlock,
+ 1, fold_convert (pvoid_type_node,
+ gfc_build_addr_expr (NULL, arg.expr)));
+ gfc_add_expr_to_block (&res, call);
+ gfc_add_block_to_block (&res, &arg.post);
+ tmp = gfc_trans_memory_barrier ();
+ gfc_add_expr_to_block (&res, tmp);
+
+ return gfc_finish_block (&res);
+ }
if (stat != NULL_TREE)
gfc_add_modify (&se.pre, stat, build_int_cst (TREE_TYPE (stat), 0));
@@ -1183,7 +1208,8 @@ gfc_trans_sync (gfc_code *code, gfc_exec_op type)
/* Short cut: For single images without bound checking or without STAT=,
return early. (ERRMSG= is always untouched for -fcoarray=single.) */
if (!code->expr2 && !(gfc_option.rtcheck & GFC_RTCHECK_BOUNDS)
- && flag_coarray != GFC_FCOARRAY_LIB)
+ && flag_coarray != GFC_FCOARRAY_LIB
+ && flag_coarray != GFC_FCOARRAY_NATIVE)
return NULL_TREE;
gfc_init_se (&se, NULL);
@@ -1206,7 +1232,7 @@ gfc_trans_sync (gfc_code *code, gfc_exec_op type)
else
stat = null_pointer_node;
- if (code->expr3 && flag_coarray == GFC_FCOARRAY_LIB)
+ if (code->expr3 && (flag_coarray == GFC_FCOARRAY_LIB || flag_coarray == GFC_FCOARRAY_NATIVE))
{
gcc_assert (code->expr3->expr_type == EXPR_VARIABLE);
gfc_init_se (&argse, NULL);
@@ -1216,7 +1242,7 @@ gfc_trans_sync (gfc_code *code, gfc_exec_op type)
errmsg = gfc_build_addr_expr (NULL, argse.expr);
errmsglen = fold_convert (size_type_node, argse.string_length);
}
- else if (flag_coarray == GFC_FCOARRAY_LIB)
+ else if (flag_coarray == GFC_FCOARRAY_LIB || flag_coarray == GFC_FCOARRAY_NATIVE)
{
errmsg = null_pointer_node;
errmsglen = build_int_cst (size_type_node, 0);
@@ -1229,7 +1255,7 @@ gfc_trans_sync (gfc_code *code, gfc_exec_op type)
{
tree images2 = fold_convert (integer_type_node, images);
tree cond;
- if (flag_coarray != GFC_FCOARRAY_LIB)
+ if (flag_coarray != GFC_FCOARRAY_LIB && flag_coarray != GFC_FCOARRAY_NATIVE)
cond = fold_build2_loc (input_location, NE_EXPR, logical_type_node,
images, build_int_cst (TREE_TYPE (images), 1));
else
@@ -1253,17 +1279,13 @@ gfc_trans_sync (gfc_code *code, gfc_exec_op type)
/* Per F2008, 8.5.1, a SYNC MEMORY is implied by calling the
image control statements SYNC IMAGES and SYNC ALL. */
- if (flag_coarray == GFC_FCOARRAY_LIB)
+ if (flag_coarray == GFC_FCOARRAY_LIB || flag_coarray == GFC_FCOARRAY_NATIVE)
{
- tmp = gfc_build_string_const (strlen ("memory")+1, "memory"),
- tmp = build5_loc (input_location, ASM_EXPR, void_type_node,
- gfc_build_string_const (1, ""), NULL_TREE, NULL_TREE,
- tree_cons (NULL_TREE, tmp, NULL_TREE), NULL_TREE);
- ASM_VOLATILE_P (tmp) = 1;
+ tmp = gfc_trans_memory_barrier ();
gfc_add_expr_to_block (&se.pre, tmp);
}
- if (flag_coarray != GFC_FCOARRAY_LIB)
+ if (flag_coarray != GFC_FCOARRAY_LIB && flag_coarray != GFC_FCOARRAY_NATIVE)
{
/* Set STAT to zero. */
if (code->expr2)
@@ -1285,8 +1307,14 @@ gfc_trans_sync (gfc_code *code, gfc_exec_op type)
tmp = build_call_expr_loc (input_location, gfor_fndecl_caf_sync_memory,
3, stat, errmsg, errmsglen);
else
- tmp = build_call_expr_loc (input_location, gfor_fndecl_caf_sync_all,
- 3, stat, errmsg, errmsglen);
+ {
+ if (flag_coarray == GFC_FCOARRAY_LIB)
+ tmp = build_call_expr_loc (input_location, gfor_fndecl_caf_sync_all,
+ 3, stat, errmsg, errmsglen);
+ else
+ tmp = build_call_expr_loc (input_location, gfor_fndecl_nca_sync_all,
+ 1, stat);
+ }
gfc_add_expr_to_block (&se.pre, tmp);
}
@@ -1351,7 +1379,10 @@ gfc_trans_sync (gfc_code *code, gfc_exec_op type)
if (TREE_TYPE (stat) == integer_type_node)
stat = gfc_build_addr_expr (NULL, stat);
- tmp = build_call_expr_loc (input_location, gfor_fndecl_caf_sync_images,
+ tmp = build_call_expr_loc (input_location,
+ flag_coarray == GFC_FCOARRAY_NATIVE
+ ? gfor_fndecl_nca_sync_images
+ : gfor_fndecl_caf_sync_images,
5, fold_convert (integer_type_node, len),
images, stat, errmsg, errmsglen);
gfc_add_expr_to_block (&se.pre, tmp);
@@ -1360,7 +1391,10 @@ gfc_trans_sync (gfc_code *code, gfc_exec_op type)
{
tree tmp_stat = gfc_create_var (integer_type_node, "stat");
- tmp = build_call_expr_loc (input_location, gfor_fndecl_caf_sync_images,
+ tmp = build_call_expr_loc (input_location,
+ flag_coarray == GFC_FCOARRAY_NATIVE
+ ? gfor_fndecl_nca_sync_images
+ : gfor_fndecl_caf_sync_images,
5, fold_convert (integer_type_node, len),
images, gfc_build_addr_expr (NULL, tmp_stat),
errmsg, errmsglen);
@@ -1596,6 +1630,11 @@ gfc_trans_critical (gfc_code *code)
gfc_add_expr_to_block (&block, tmp);
}
+ else if (flag_coarray == GFC_FCOARRAY_NATIVE)
+ {
+ tmp = gfc_trans_lock_unlock (code, EXEC_LOCK);
+ gfc_add_expr_to_block (&block, tmp);
+ }
tmp = gfc_trans_code (code->block->next);
gfc_add_expr_to_block (&block, tmp);
@@ -1620,6 +1659,11 @@ gfc_trans_critical (gfc_code *code)
gfc_add_expr_to_block (&block, tmp);
}
+ else if (flag_coarray == GFC_FCOARRAY_NATIVE)
+ {
+ tmp = gfc_trans_lock_unlock (code, EXEC_UNLOCK);
+ gfc_add_expr_to_block (&block, tmp);
+ }
return gfc_finish_block (&block);
}
@@ -7169,6 +7213,7 @@ gfc_trans_deallocate (gfc_code *code)
tree apstat, pstat, stat, errmsg, errlen, tmp;
tree label_finish, label_errmsg;
stmtblock_t block;
+ bool is_native_coarray = false;
pstat = apstat = stat = errmsg = errlen = tmp = NULL_TREE;
label_finish = label_errmsg = NULL_TREE;
@@ -7254,8 +7299,27 @@ gfc_trans_deallocate (gfc_code *code)
? GFC_STRUCTURE_CAF_MODE_DEALLOC_ONLY : 0);
}
}
+ else if (flag_coarray == GFC_FCOARRAY_NATIVE)
+ {
+ gfc_ref *ref, *last;
- if (expr->rank || is_coarray_array)
+ for (ref = expr->ref, last = ref; ref; last = ref, ref = ref->next);
+ ref = last;
+ if (ref->type == REF_ARRAY && ref->u.ar.codimen)
+ {
+ gfc_symbol *sym = expr->symtree->n.sym;
+ int alloc_type = gfc_native_coarray_get_allocation_type (sym);
+ tmp = build_call_expr_loc (input_location,
+ gfor_fndecl_nca_coarray_free,
+ 2, gfc_build_addr_expr (pvoid_type_node, se.expr),
+ build_int_cst (integer_type_node,
+ alloc_type));
+ gfc_add_expr_to_block (&block, tmp);
+ is_native_coarray = true;
+ }
+ }
+
+ if ((expr->rank || is_coarray_array) && !is_native_coarray)
{
gfc_ref *ref;
@@ -7344,7 +7408,7 @@ gfc_trans_deallocate (gfc_code *code)
gfc_reset_len (&se.pre, al->expr);
}
}
- else
+ else if (!is_native_coarray)
{
tmp = gfc_deallocate_scalar_with_status (se.expr, pstat, label_finish,
false, al->expr,
diff --git a/gcc/fortran/trans-types.c b/gcc/fortran/trans-types.c
index 26fdb28..f100d34 100644
--- a/gcc/fortran/trans-types.c
+++ b/gcc/fortran/trans-types.c
@@ -1345,6 +1345,10 @@ gfc_is_nodesc_array (gfc_symbol * sym)
gcc_assert (array_attr->dimension || array_attr->codimension);
+ /* We need a descriptor for native coarrays. */
+ if (flag_coarray == GFC_FCOARRAY_NATIVE && sym->as && sym->as->corank)
+ return 0;
+
/* We only want local arrays. */
if ((sym->ts.type != BT_CLASS && sym->attr.pointer)
|| (sym->ts.type == BT_CLASS && CLASS_DATA (sym)->attr.class_pointer)
@@ -1381,12 +1385,18 @@ gfc_build_array_type (tree type, gfc_array_spec * as,
tree ubound[GFC_MAX_DIMENSIONS];
int n, corank;
- /* Assumed-shape arrays do not have codimension information stored in the
- descriptor. */
- corank = MAX (as->corank, codim);
- if (as->type == AS_ASSUMED_SHAPE ||
- (as->type == AS_ASSUMED_RANK && akind == GFC_ARRAY_ALLOCATABLE))
- corank = codim;
+ /* For -fcoarray=lib, assumed-shape arrays do not have codimension
+ information stored in the descriptor. */
+ if (flag_coarray != GFC_FCOARRAY_NATIVE)
+ {
+ corank = MAX (as->corank, codim);
+
+ if (as->type == AS_ASSUMED_SHAPE ||
+ (as->type == AS_ASSUMED_RANK && akind == GFC_ARRAY_ALLOCATABLE))
+ corank = codim;
+ }
+ else
+ corank = as->corank;
if (as->type == AS_ASSUMED_RANK)
for (n = 0; n < GFC_MAX_DIMENSIONS; n++)
@@ -1427,7 +1437,7 @@ gfc_build_array_type (tree type, gfc_array_spec * as,
corank, lbound, ubound, 0, akind,
restricted);
}
-
+
/* Returns the struct descriptor_dimension type. */
static tree
@@ -1598,7 +1608,7 @@ gfc_get_nodesc_array_type (tree etype, gfc_array_spec * as, gfc_packed packed,
/* We don't use build_array_type because this does not include
lang-specific information (i.e. the bounds of the array) when checking
for duplicates. */
- if (as->rank)
+ if (as->rank || (flag_coarray == GFC_FCOARRAY_NATIVE && as->corank))
type = make_node (ARRAY_TYPE);
else
type = build_variant_type_copy (etype);
@@ -1665,6 +1675,7 @@ gfc_get_nodesc_array_type (tree etype, gfc_array_spec * as, gfc_packed packed,
if (packed == PACKED_NO || packed == PACKED_PARTIAL)
known_stride = 0;
}
+
for (n = as->rank; n < as->rank + as->corank; n++)
{
expr = as->lower[n];
@@ -1672,7 +1683,7 @@ gfc_get_nodesc_array_type (tree etype, gfc_array_spec * as, gfc_packed packed,
tmp = gfc_conv_mpz_to_tree (expr->value.integer,
gfc_index_integer_kind);
else
- tmp = NULL_TREE;
+ tmp = NULL_TREE;
GFC_TYPE_ARRAY_LBOUND (type, n) = tmp;
expr = as->upper[n];
@@ -1680,16 +1691,16 @@ gfc_get_nodesc_array_type (tree etype, gfc_array_spec * as, gfc_packed packed,
tmp = gfc_conv_mpz_to_tree (expr->value.integer,
gfc_index_integer_kind);
else
- tmp = NULL_TREE;
+ tmp = NULL_TREE;
if (n < as->rank + as->corank - 1)
- GFC_TYPE_ARRAY_UBOUND (type, n) = tmp;
+ GFC_TYPE_ARRAY_UBOUND (type, n) = tmp;
}
- if (known_offset)
- {
- GFC_TYPE_ARRAY_OFFSET (type) =
- gfc_conv_mpz_to_tree (offset, gfc_index_integer_kind);
- }
+ if (flag_coarray == GFC_FCOARRAY_NATIVE && as->rank == 0 && as->corank != 0)
+ GFC_TYPE_ARRAY_OFFSET (type) = NULL_TREE;
+ else if (known_offset)
+ GFC_TYPE_ARRAY_OFFSET (type) =
+ gfc_conv_mpz_to_tree (offset, gfc_index_integer_kind);
else
GFC_TYPE_ARRAY_OFFSET (type) = NULL_TREE;
@@ -1714,7 +1725,7 @@ gfc_get_nodesc_array_type (tree etype, gfc_array_spec * as, gfc_packed packed,
build_qualified_type (GFC_TYPE_ARRAY_DATAPTR_TYPE (type),
TYPE_QUAL_RESTRICT);
- if (as->rank == 0)
+ if (as->rank == 0 && (flag_coarray != GFC_FCOARRAY_NATIVE || as->corank == 0))
{
if (packed != PACKED_STATIC || flag_coarray == GFC_FCOARRAY_LIB)
{
@@ -1982,7 +1993,7 @@ gfc_get_array_type_bounds (tree etype, int dimen, int codimen, tree * lbound,
/* TODO: known offsets for descriptors. */
GFC_TYPE_ARRAY_OFFSET (fat_type) = NULL_TREE;
- if (dimen == 0)
+ if (flag_coarray != GFC_FCOARRAY_NATIVE && dimen == 0)
{
arraytype = build_pointer_type (etype);
if (restricted)
@@ -2281,6 +2292,10 @@ gfc_sym_type (gfc_symbol * sym)
: GFC_ARRAY_POINTER;
else if (sym->attr.allocatable)
akind = GFC_ARRAY_ALLOCATABLE;
+
+ /* FIXME: For normal coarrays, we pass a bool to an int here.
+ Is this really intended? */
+
type = gfc_build_array_type (type, sym->as, akind, restricted,
sym->attr.contiguous, false);
}
diff --git a/gcc/fortran/trans.c b/gcc/fortran/trans.c
index ed05426..2b60550 100644
--- a/gcc/fortran/trans.c
+++ b/gcc/fortran/trans.c
@@ -47,6 +47,21 @@ static gfc_file *gfc_current_backend_file;
const char gfc_msg_fault[] = N_("Array reference out of bounds");
const char gfc_msg_wrong_return[] = N_("Incorrect function return value");
+/* Insert a memory barrier into the code. */
+
+tree
+gfc_trans_memory_barrier (void)
+{
+ tree tmp;
+
+ tmp = gfc_build_string_const (strlen ("memory")+1, "memory"),
+ tmp = build5_loc (input_location, ASM_EXPR, void_type_node,
+ gfc_build_string_const (1, ""), NULL_TREE, NULL_TREE,
+ tree_cons (NULL_TREE, tmp, NULL_TREE), NULL_TREE);
+ ASM_VOLATILE_P (tmp) = 1;
+
+ return tmp;
+}
/* Return a location_t suitable for 'tree' for a gfortran locus. The way the
parser works in gfortran, loc->lb->location contains only the line number
@@ -403,15 +418,16 @@ gfc_build_array_ref (tree base, tree offset, tree decl, tree vptr)
tree tmp;
tree span = NULL_TREE;
- if (GFC_ARRAY_TYPE_P (type) && GFC_TYPE_ARRAY_RANK (type) == 0)
+ if (GFC_ARRAY_TYPE_P (type) && GFC_TYPE_ARRAY_RANK (type) == 0
+ && flag_coarray != GFC_FCOARRAY_NATIVE)
{
gcc_assert (GFC_TYPE_ARRAY_CORANK (type) > 0);
return fold_convert (TYPE_MAIN_VARIANT (type), base);
}
- /* Scalar coarray, there is nothing to do. */
- if (TREE_CODE (type) != ARRAY_TYPE)
+ /* Scalar library coarray, there is nothing to do. */
+ if (TREE_CODE (type) != ARRAY_TYPE && flag_coarray != GFC_FCOARRAY_NATIVE)
{
gcc_assert (decl == NULL_TREE);
gcc_assert (integer_zerop (offset));
diff --git a/gcc/fortran/trans.h b/gcc/fortran/trans.h
index d257963..974785f 100644
--- a/gcc/fortran/trans.h
+++ b/gcc/fortran/trans.h
@@ -501,6 +501,9 @@ void gfc_conv_expr_reference (gfc_se * se, gfc_expr * expr,
bool add_clobber = false);
void gfc_conv_expr_type (gfc_se * se, gfc_expr *, tree);
+/* Insert a memory barrier into the code. */
+
+tree gfc_trans_memory_barrier (void);
/* trans-expr.c */
tree gfc_conv_scalar_to_descriptor (gfc_se *, tree, symbol_attribute);
@@ -890,6 +893,21 @@ extern GTY(()) tree gfor_fndecl_co_reduce;
extern GTY(()) tree gfor_fndecl_co_sum;
extern GTY(()) tree gfor_fndecl_caf_is_present;
+
+/* Native coarray library function decls. */
+extern GTY(()) tree gfor_fndecl_nca_this_image;
+extern GTY(()) tree gfor_fndecl_nca_num_images;
+extern GTY(()) tree gfor_fndecl_nca_coarray_allocate;
+extern GTY(()) tree gfor_fndecl_nca_coarray_free;
+extern GTY(()) tree gfor_fndecl_nca_sync_images;
+extern GTY(()) tree gfor_fndecl_nca_sync_all;
+extern GTY(()) tree gfor_fndecl_nca_lock;
+extern GTY(()) tree gfor_fndecl_nca_unlock;
+extern GTY(()) tree gfor_fndecl_nca_reduce_scalar;
+extern GTY(()) tree gfor_fndecl_nca_reduce_array;
+extern GTY(()) tree gfor_fndecl_nca_broadcast_scalar;
+extern GTY(()) tree gfor_fndecl_nca_broadcast_array;
+
/* Math functions. Many other math functions are handled in
trans-intrinsic.c. */
diff --git a/libgfortran/Makefile.am b/libgfortran/Makefile.am
index 61bf05d..683fd04 100644
--- a/libgfortran/Makefile.am
+++ b/libgfortran/Makefile.am
@@ -36,14 +36,21 @@ gfor_cdir = $(libdir)/gcc/$(target_alias)/$(gcc_version)/include
LTLDFLAGS = $(shell $(SHELL) $(top_srcdir)/../libtool-ldflags $(LDFLAGS)) \
$(lt_host_flags)
+if LIBGFOR_NATIVE_COARRAY
+COARRAY_LIBS=$(PTHREAD_LIBS) $(RT_LIBS)
+else
+COARRAY_LIBS=""
+endif
+
toolexeclib_LTLIBRARIES = libgfortran.la
toolexeclib_DATA = libgfortran.spec
libgfortran_la_LINK = $(LINK) $(libgfortran_la_LDFLAGS)
libgfortran_la_LDFLAGS = -version-info `grep -v '^\#' $(srcdir)/libtool-version` \
$(LTLDFLAGS) $(LIBQUADLIB) ../libbacktrace/libbacktrace.la \
$(HWCAP_LDFLAGS) \
- -lm $(extra_ldflags_libgfortran) \
+ -lm $(COARRAY_LIBS) $(extra_ldflags_libgfortran) \
$(version_arg) -Wc,-shared-libgcc
+
libgfortran_la_DEPENDENCIES = $(version_dep) libgfortran.spec $(LIBQUADLIB_DEP)
cafexeclib_LTLIBRARIES = libcaf_single.la
@@ -53,6 +60,37 @@ libcaf_single_la_LDFLAGS = -static
libcaf_single_la_DEPENDENCIES = caf/libcaf.h
libcaf_single_la_LINK = $(LINK) $(libcaf_single_la_LDFLAGS)
+i_nca_minmax_c = \
+ $(srcdir)/generated/nca_minmax_i1.c \
+ $(srcdir)/generated/nca_minmax_i2.c \
+ $(srcdir)/generated/nca_minmax_i4.c \
+ $(srcdir)/generated/nca_minmax_i8.c \
+ $(srcdir)/generated/nca_minmax_i16.c \
+ $(srcdir)/generated/nca_minmax_r4.c \
+ $(srcdir)/generated/nca_minmax_r8.c \
+ $(srcdir)/generated/nca_minmax_r10.c \
+ $(srcdir)/generated/nca_minmax_r16.c
+
+i_nca_minmax_s_c = \
+ $(srcdir)/generated/nca_minmax_s1.c \
+ $(srcdir)/generated/nca_minmax_s4.c
+
+if LIBGFOR_NATIVE_COARRAY
+
+mylib_LTLIBRARIES = libnca.la
+mylibdir = $(libdir)/gcc/$(target_alias)/$(gcc_version)$(MULTISUBDIR)
+libnca_la_SOURCES = nca/alloc.c nca/allocator.c nca/coarraynative.c \
+ nca/hashmap.c \
+ nca/sync.c nca/util.c nca/wrapper.c nca/collective_subroutine.c \
+ nca/shared_memory.c \
+ $(i_nca_minmax_c) $(i_nca_minmax_s_c)
+libnca_la_DEPENDENCIES = nca/alloc.h nca/allocator.h nca/hashmap.h
+ nca/libcoarraynative.h nca/sync.h shared_memory.h \
+ nca/util.h nca/lock.h nca/collective_subroutine.h\
+ nca/collective_inline.h
+libnca_la_LINK = $(LINK) $(libnca_la_LDFLAGS)
+endif
+
if IEEE_SUPPORT
fincludedir = $(libdir)/gcc/$(target_alias)/$(gcc_version)$(MULTISUBDIR)/finclude
nodist_finclude_HEADERS = ieee_arithmetic.mod ieee_exceptions.mod ieee_features.mod
@@ -83,6 +121,10 @@ if LIBGFOR_MINIMAL
AM_CFLAGS += -DLIBGFOR_MINIMAL
endif
+if LIBGFOR_NATIVE_COARRAY
+AM_CFLAGS += $(PTHREAD_CFLAGS)
+endif
+
gfor_io_src= \
io/size_from_kind.c
@@ -1231,9 +1273,20 @@ $(gfor_built_specific2_src): m4/specific2.m4 m4/head.m4
$(gfor_misc_specifics): m4/misc_specifics.m4 m4/head.m4
$(M4) -Dfile=$@ -I$(srcdir)/m4 misc_specifics.m4 > $@
+
+
+if LIBGFOR_NATIVE_COARRAY
+$(i_nca_minmax_c): m4/nca_minmax.m4 $(I_M4_DEPS)
+ $(M4) -Dfile=$@ -I$(srcdir)/m4 nca_minmax.m4 > $@
+
+$(i_nca_minmax_s_c): m4/nca-minmax-s.m4 $(I_M4_DEPS)
+ $(M4) -Dfile=$@ -I$(srcdir)/m4 nca-minmax-s.m4 > $@
+endif
+
## end of maintainer mode only rules
endif
+
EXTRA_DIST = $(m4_files)
# target overrides
diff --git a/libgfortran/Makefile.in b/libgfortran/Makefile.in
index 3d043aa..2379fe3 100644
--- a/libgfortran/Makefile.in
+++ b/libgfortran/Makefile.in
@@ -92,7 +92,8 @@ build_triplet = @build@
host_triplet = @host@
target_triplet = @target@
@LIBGFOR_MINIMAL_TRUE@am__append_1 = -DLIBGFOR_MINIMAL
-@LIBGFOR_MINIMAL_FALSE@am__append_2 = \
+@LIBGFOR_NATIVE_COARRAY_TRUE@am__append_2 = $(PTHREAD_CFLAGS)
+@LIBGFOR_MINIMAL_FALSE@am__append_3 = \
@LIBGFOR_MINIMAL_FALSE@io/close.c \
@LIBGFOR_MINIMAL_FALSE@io/file_pos.c \
@LIBGFOR_MINIMAL_FALSE@io/format.c \
@@ -110,7 +111,7 @@ target_triplet = @target@
@LIBGFOR_MINIMAL_FALSE@io/fbuf.c \
@LIBGFOR_MINIMAL_FALSE@io/async.c
-@LIBGFOR_MINIMAL_FALSE@am__append_3 = \
+@LIBGFOR_MINIMAL_FALSE@am__append_4 = \
@LIBGFOR_MINIMAL_FALSE@intrinsics/access.c \
@LIBGFOR_MINIMAL_FALSE@intrinsics/c99_functions.c \
@LIBGFOR_MINIMAL_FALSE@intrinsics/chdir.c \
@@ -143,9 +144,9 @@ target_triplet = @target@
@LIBGFOR_MINIMAL_FALSE@intrinsics/umask.c \
@LIBGFOR_MINIMAL_FALSE@intrinsics/unlink.c
-@IEEE_SUPPORT_TRUE@am__append_4 = ieee/ieee_helper.c
-@LIBGFOR_MINIMAL_TRUE@am__append_5 = runtime/minimal.c
-@LIBGFOR_MINIMAL_FALSE@am__append_6 = \
+@IEEE_SUPPORT_TRUE@am__append_5 = ieee/ieee_helper.c
+@LIBGFOR_MINIMAL_TRUE@am__append_6 = runtime/minimal.c
+@LIBGFOR_MINIMAL_FALSE@am__append_7 = \
@LIBGFOR_MINIMAL_FALSE@runtime/backtrace.c \
@LIBGFOR_MINIMAL_FALSE@runtime/convert_char.c \
@LIBGFOR_MINIMAL_FALSE@runtime/environ.c \
@@ -157,7 +158,7 @@ target_triplet = @target@
# dummy sources for libtool
-@onestep_TRUE@am__append_7 = libgfortran_c.c libgfortran_f.f90
+@onestep_TRUE@am__append_8 = libgfortran_c.c libgfortran_f.f90
subdir = .
ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
am__aclocal_m4_deps = $(top_srcdir)/../config/depstand.m4 \
@@ -214,10 +215,11 @@ am__uninstall_files_from_dir = { \
|| { echo " ( cd '$$dir' && rm -f" $$files ")"; \
$(am__cd) "$$dir" && rm -f $$files; }; \
}
-am__installdirs = "$(DESTDIR)$(cafexeclibdir)" \
+am__installdirs = "$(DESTDIR)$(cafexeclibdir)" "$(DESTDIR)$(mylibdir)" \
"$(DESTDIR)$(toolexeclibdir)" "$(DESTDIR)$(toolexeclibdir)" \
"$(DESTDIR)$(gfor_cdir)" "$(DESTDIR)$(fincludedir)"
-LTLIBRARIES = $(cafexeclib_LTLIBRARIES) $(toolexeclib_LTLIBRARIES)
+LTLIBRARIES = $(cafexeclib_LTLIBRARIES) $(mylib_LTLIBRARIES) \
+ $(toolexeclib_LTLIBRARIES)
libcaf_single_la_LIBADD =
am_libcaf_single_la_OBJECTS = single.lo
libcaf_single_la_OBJECTS = $(am_libcaf_single_la_OBJECTS)
@@ -467,6 +469,21 @@ am__objects_65 = $(am__objects_3) $(am__objects_53) $(am__objects_55) \
@onestep_FALSE@am_libgfortran_la_OBJECTS = $(am__objects_65)
@onestep_TRUE@am_libgfortran_la_OBJECTS = libgfortran_c.lo
libgfortran_la_OBJECTS = $(am_libgfortran_la_OBJECTS)
+libnca_la_LIBADD =
+am__objects_66 = nca_minmax_i1.lo nca_minmax_i2.lo nca_minmax_i4.lo \
+ nca_minmax_i8.lo nca_minmax_i16.lo nca_minmax_r4.lo \
+ nca_minmax_r8.lo nca_minmax_r10.lo nca_minmax_r16.lo
+am__objects_67 = nca_minmax_s1.lo nca_minmax_s4.lo
+@LIBGFOR_NATIVE_COARRAY_TRUE@am_libnca_la_OBJECTS = alloc.lo \
+@LIBGFOR_NATIVE_COARRAY_TRUE@ allocator.lo coarraynative.lo \
+@LIBGFOR_NATIVE_COARRAY_TRUE@ hashmap.lo sync.lo util.lo \
+@LIBGFOR_NATIVE_COARRAY_TRUE@ wrapper.lo \
+@LIBGFOR_NATIVE_COARRAY_TRUE@ collective_subroutine.lo \
+@LIBGFOR_NATIVE_COARRAY_TRUE@ shared_memory.lo \
+@LIBGFOR_NATIVE_COARRAY_TRUE@ $(am__objects_66) \
+@LIBGFOR_NATIVE_COARRAY_TRUE@ $(am__objects_67)
+libnca_la_OBJECTS = $(am_libnca_la_OBJECTS)
+@LIBGFOR_NATIVE_COARRAY_TRUE@am_libnca_la_rpath = -rpath $(mylibdir)
AM_V_P = $(am__v_P_@AM_V@)
am__v_P_ = $(am__v_P_@AM_DEFAULT_V@)
am__v_P_0 = false
@@ -530,7 +547,8 @@ AM_V_FC = $(am__v_FC_@AM_V@)
am__v_FC_ = $(am__v_FC_@AM_DEFAULT_V@)
am__v_FC_0 = @echo " FC " $@;
am__v_FC_1 =
-SOURCES = $(libcaf_single_la_SOURCES) $(libgfortran_la_SOURCES)
+SOURCES = $(libcaf_single_la_SOURCES) $(libgfortran_la_SOURCES) \
+ $(libnca_la_SOURCES)
am__can_run_installinfo = \
case $$AM_UPDATE_INFO_DIR in \
n|no|NO) false;; \
@@ -569,7 +587,7 @@ AMTAR = @AMTAR@
# Some targets require additional compiler options for IEEE compatibility.
AM_CFLAGS = @AM_CFLAGS@ -fcx-fortran-rules $(SECTION_FLAGS) \
- $(IEEE_FLAGS) $(am__append_1)
+ $(IEEE_FLAGS) $(am__append_1) $(am__append_2)
AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@
AM_FCFLAGS = @AM_FCFLAGS@ $(IEEE_FLAGS)
AR = @AR@
@@ -637,7 +655,10 @@ PACKAGE_TARNAME = @PACKAGE_TARNAME@
PACKAGE_URL = @PACKAGE_URL@
PACKAGE_VERSION = @PACKAGE_VERSION@
PATH_SEPARATOR = @PATH_SEPARATOR@
+PTHREAD_CFLAGS = @PTHREAD_CFLAGS@
+PTHREAD_LIBS = @PTHREAD_LIBS@
RANLIB = @RANLIB@
+RT_LIBS = @RT_LIBS@
SECTION_FLAGS = @SECTION_FLAGS@
SED = @SED@
SET_MAKE = @SET_MAKE@
@@ -698,6 +719,7 @@ pdfdir = @pdfdir@
prefix = @prefix@
program_transform_name = @program_transform_name@
psdir = @psdir@
+runstatedir = @runstatedir@
sbindir = @sbindir@
sharedstatedir = @sharedstatedir@
srcdir = @srcdir@
@@ -728,13 +750,15 @@ gfor_cdir = $(libdir)/gcc/$(target_alias)/$(gcc_version)/include
LTLDFLAGS = $(shell $(SHELL) $(top_srcdir)/../libtool-ldflags $(LDFLAGS)) \
$(lt_host_flags)
+@LIBGFOR_NATIVE_COARRAY_FALSE@COARRAY_LIBS = ""
+@LIBGFOR_NATIVE_COARRAY_TRUE@COARRAY_LIBS = $(PTHREAD_LIBS) $(RT_LIBS)
toolexeclib_LTLIBRARIES = libgfortran.la
toolexeclib_DATA = libgfortran.spec
libgfortran_la_LINK = $(LINK) $(libgfortran_la_LDFLAGS)
libgfortran_la_LDFLAGS = -version-info `grep -v '^\#' $(srcdir)/libtool-version` \
$(LTLDFLAGS) $(LIBQUADLIB) ../libbacktrace/libbacktrace.la \
$(HWCAP_LDFLAGS) \
- -lm $(extra_ldflags_libgfortran) \
+ -lm $(COARRAY_LIBS) $(extra_ldflags_libgfortran) \
$(version_arg) -Wc,-shared-libgcc
libgfortran_la_DEPENDENCIES = $(version_dep) libgfortran.spec $(LIBQUADLIB_DEP)
@@ -744,6 +768,31 @@ libcaf_single_la_SOURCES = caf/single.c
libcaf_single_la_LDFLAGS = -static
libcaf_single_la_DEPENDENCIES = caf/libcaf.h
libcaf_single_la_LINK = $(LINK) $(libcaf_single_la_LDFLAGS)
+i_nca_minmax_c = \
+ $(srcdir)/generated/nca_minmax_i1.c \
+ $(srcdir)/generated/nca_minmax_i2.c \
+ $(srcdir)/generated/nca_minmax_i4.c \
+ $(srcdir)/generated/nca_minmax_i8.c \
+ $(srcdir)/generated/nca_minmax_i16.c \
+ $(srcdir)/generated/nca_minmax_r4.c \
+ $(srcdir)/generated/nca_minmax_r8.c \
+ $(srcdir)/generated/nca_minmax_r10.c \
+ $(srcdir)/generated/nca_minmax_r16.c
+
+i_nca_minmax_s_c = \
+ $(srcdir)/generated/nca_minmax_s1.c \
+ $(srcdir)/generated/nca_minmax_s4.c
+
+@LIBGFOR_NATIVE_COARRAY_TRUE@mylib_LTLIBRARIES = libnca.la
+@LIBGFOR_NATIVE_COARRAY_TRUE@mylibdir = $(libdir)/gcc/$(target_alias)/$(gcc_version)$(MULTISUBDIR)
+@LIBGFOR_NATIVE_COARRAY_TRUE@libnca_la_SOURCES = nca/alloc.c nca/allocator.c nca/coarraynative.c \
+@LIBGFOR_NATIVE_COARRAY_TRUE@ nca/hashmap.c \
+@LIBGFOR_NATIVE_COARRAY_TRUE@ nca/sync.c nca/util.c nca/wrapper.c nca/collective_subroutine.c \
+@LIBGFOR_NATIVE_COARRAY_TRUE@ nca/shared_memory.c \
+@LIBGFOR_NATIVE_COARRAY_TRUE@ $(i_nca_minmax_c) $(i_nca_minmax_s_c)
+
+@LIBGFOR_NATIVE_COARRAY_TRUE@libnca_la_DEPENDENCIES = nca/alloc.h nca/allocator.h nca/hashmap.h
+@LIBGFOR_NATIVE_COARRAY_TRUE@libnca_la_LINK = $(LINK) $(libnca_la_LDFLAGS)
@IEEE_SUPPORT_TRUE@fincludedir = $(libdir)/gcc/$(target_alias)/$(gcc_version)$(MULTISUBDIR)/finclude
@IEEE_SUPPORT_TRUE@nodist_finclude_HEADERS = ieee_arithmetic.mod ieee_exceptions.mod ieee_features.mod
AM_CPPFLAGS = -iquote$(srcdir)/io -I$(srcdir)/$(MULTISRCTOP)../gcc \
@@ -755,7 +804,7 @@ AM_CPPFLAGS = -iquote$(srcdir)/io -I$(srcdir)/$(MULTISRCTOP)../gcc \
-I$(MULTIBUILDTOP)../libbacktrace \
-I../libbacktrace
-gfor_io_src = io/size_from_kind.c $(am__append_2)
+gfor_io_src = io/size_from_kind.c $(am__append_3)
gfor_io_headers = \
io/io.h \
io/fbuf.h \
@@ -777,7 +826,7 @@ gfor_helper_src = intrinsics/associated.c intrinsics/abort.c \
intrinsics/selected_int_kind.f90 \
intrinsics/selected_real_kind.f90 intrinsics/trigd.c \
intrinsics/unpack_generic.c runtime/in_pack_generic.c \
- runtime/in_unpack_generic.c $(am__append_3) $(am__append_4)
+ runtime/in_unpack_generic.c $(am__append_4) $(am__append_5)
@IEEE_SUPPORT_FALSE@gfor_ieee_src =
@IEEE_SUPPORT_TRUE@gfor_ieee_src = \
@IEEE_SUPPORT_TRUE@ieee/ieee_arithmetic.F90 \
@@ -785,8 +834,8 @@ gfor_helper_src = intrinsics/associated.c intrinsics/abort.c \
@IEEE_SUPPORT_TRUE@ieee/ieee_features.F90
gfor_src = runtime/bounds.c runtime/compile_options.c runtime/memory.c \
- runtime/string.c runtime/select.c $(am__append_5) \
- $(am__append_6)
+ runtime/string.c runtime/select.c $(am__append_6) \
+ $(am__append_7)
i_all_c = \
$(srcdir)/generated/all_l1.c \
$(srcdir)/generated/all_l2.c \
@@ -1540,7 +1589,7 @@ intrinsics/random_init.f90
BUILT_SOURCES = $(gfor_built_src) $(gfor_built_specific_src) \
$(gfor_built_specific2_src) $(gfor_misc_specifics) \
- $(am__append_7)
+ $(am__append_8)
prereq_SRC = $(gfor_src) $(gfor_built_src) $(gfor_io_src) \
$(gfor_helper_src) $(gfor_ieee_src) $(gfor_io_headers) $(gfor_specific_src)
@@ -1668,6 +1717,41 @@ clean-cafexeclibLTLIBRARIES:
rm -f $${locs}; \
}
+install-mylibLTLIBRARIES: $(mylib_LTLIBRARIES)
+ @$(NORMAL_INSTALL)
+ @list='$(mylib_LTLIBRARIES)'; test -n "$(mylibdir)" || list=; \
+ list2=; for p in $$list; do \
+ if test -f $$p; then \
+ list2="$$list2 $$p"; \
+ else :; fi; \
+ done; \
+ test -z "$$list2" || { \
+ echo " $(MKDIR_P) '$(DESTDIR)$(mylibdir)'"; \
+ $(MKDIR_P) "$(DESTDIR)$(mylibdir)" || exit 1; \
+ echo " $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL) $(INSTALL_STRIP_FLAG) $$list2 '$(DESTDIR)$(mylibdir)'"; \
+ $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL) $(INSTALL_STRIP_FLAG) $$list2 "$(DESTDIR)$(mylibdir)"; \
+ }
+
+uninstall-mylibLTLIBRARIES:
+ @$(NORMAL_UNINSTALL)
+ @list='$(mylib_LTLIBRARIES)'; test -n "$(mylibdir)" || list=; \
+ for p in $$list; do \
+ $(am__strip_dir) \
+ echo " $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=uninstall rm -f '$(DESTDIR)$(mylibdir)/$$f'"; \
+ $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=uninstall rm -f "$(DESTDIR)$(mylibdir)/$$f"; \
+ done
+
+clean-mylibLTLIBRARIES:
+ -test -z "$(mylib_LTLIBRARIES)" || rm -f $(mylib_LTLIBRARIES)
+ @list='$(mylib_LTLIBRARIES)'; \
+ locs=`for p in $$list; do echo $$p; done | \
+ sed 's|^[^/]*$$|.|; s|/[^/]*$$||; s|$$|/so_locations|' | \
+ sort -u`; \
+ test -z "$$locs" || { \
+ echo rm -f $${locs}; \
+ rm -f $${locs}; \
+ }
+
install-toolexeclibLTLIBRARIES: $(toolexeclib_LTLIBRARIES)
@$(NORMAL_INSTALL)
@list='$(toolexeclib_LTLIBRARIES)'; test -n "$(toolexeclibdir)" || list=; \
@@ -1709,6 +1793,9 @@ libcaf_single.la: $(libcaf_single_la_OBJECTS) $(libcaf_single_la_DEPENDENCIES) $
libgfortran.la: $(libgfortran_la_OBJECTS) $(libgfortran_la_DEPENDENCIES) $(EXTRA_libgfortran_la_DEPENDENCIES)
$(AM_V_GEN)$(libgfortran_la_LINK) -rpath $(toolexeclibdir) $(libgfortran_la_OBJECTS) $(libgfortran_la_LIBADD) $(LIBS)
+libnca.la: $(libnca_la_OBJECTS) $(libnca_la_DEPENDENCIES) $(EXTRA_libnca_la_DEPENDENCIES)
+ $(AM_V_GEN)$(libnca_la_LINK) $(am_libnca_la_rpath) $(libnca_la_OBJECTS) $(libnca_la_LIBADD) $(LIBS)
+
mostlyclean-compile:
-rm -f *.$(OBJEXT)
@@ -1723,6 +1810,8 @@ distclean-compile:
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/all_l2.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/all_l4.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/all_l8.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/alloc.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/allocator.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/any_l1.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/any_l16.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/any_l2.Plo@am__quote@
@@ -1742,6 +1831,8 @@ distclean-compile:
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/chmod.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/clock.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/close.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/coarraynative.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/collective_subroutine.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/compile_options.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/convert_char.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/count_16_l.Plo@am__quote@
@@ -1866,6 +1957,7 @@ distclean-compile:
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/getXid.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/getcwd.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/getlog.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/hashmap.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/hostnm.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/iall_i1.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/iall_i16.Plo@am__quote@
@@ -2125,6 +2217,17 @@ distclean-compile:
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/minval_r8.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/move_alloc.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/mvbits.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/nca_minmax_i1.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/nca_minmax_i16.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/nca_minmax_i2.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/nca_minmax_i4.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/nca_minmax_i8.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/nca_minmax_r10.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/nca_minmax_r16.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/nca_minmax_r4.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/nca_minmax_r8.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/nca_minmax_s1.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/nca_minmax_s4.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/norm2_r10.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/norm2_r16.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/norm2_r4.Plo@am__quote@
@@ -2218,6 +2321,7 @@ distclean-compile:
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/shape_i2.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/shape_i4.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/shape_i8.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/shared_memory.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/signal.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/single.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/size.Plo@am__quote@
@@ -2255,6 +2359,7 @@ distclean-compile:
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/sum_r4.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/sum_r8.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/symlnk.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/sync.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/system.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/system_clock.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/time.Plo@am__quote@
@@ -2279,6 +2384,8 @@ distclean-compile:
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/unpack_r16.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/unpack_r4.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/unpack_r8.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/util.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/wrapper.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/write.Plo@am__quote@
.F90.o:
@@ -6679,6 +6786,146 @@ ieee_helper.lo: ieee/ieee_helper.c
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o ieee_helper.lo `test -f 'ieee/ieee_helper.c' || echo '$(srcdir)/'`ieee/ieee_helper.c
+alloc.lo: nca/alloc.c
+@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT alloc.lo -MD -MP -MF $(DEPDIR)/alloc.Tpo -c -o alloc.lo `test -f 'nca/alloc.c' || echo '$(srcdir)/'`nca/alloc.c
+@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/alloc.Tpo $(DEPDIR)/alloc.Plo
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='nca/alloc.c' object='alloc.lo' libtool=yes @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o alloc.lo `test -f 'nca/alloc.c' || echo '$(srcdir)/'`nca/alloc.c
+
+allocator.lo: nca/allocator.c
+@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT allocator.lo -MD -MP -MF $(DEPDIR)/allocator.Tpo -c -o allocator.lo `test -f 'nca/allocator.c' || echo '$(srcdir)/'`nca/allocator.c
+@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/allocator.Tpo $(DEPDIR)/allocator.Plo
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='nca/allocator.c' object='allocator.lo' libtool=yes @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o allocator.lo `test -f 'nca/allocator.c' || echo '$(srcdir)/'`nca/allocator.c
+
+coarraynative.lo: nca/coarraynative.c
+@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT coarraynative.lo -MD -MP -MF $(DEPDIR)/coarraynative.Tpo -c -o coarraynative.lo `test -f 'nca/coarraynative.c' || echo '$(srcdir)/'`nca/coarraynative.c
+@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/coarraynative.Tpo $(DEPDIR)/coarraynative.Plo
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='nca/coarraynative.c' object='coarraynative.lo' libtool=yes @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o coarraynative.lo `test -f 'nca/coarraynative.c' || echo '$(srcdir)/'`nca/coarraynative.c
+
+hashmap.lo: nca/hashmap.c
+@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT hashmap.lo -MD -MP -MF $(DEPDIR)/hashmap.Tpo -c -o hashmap.lo `test -f 'nca/hashmap.c' || echo '$(srcdir)/'`nca/hashmap.c
+@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/hashmap.Tpo $(DEPDIR)/hashmap.Plo
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='nca/hashmap.c' object='hashmap.lo' libtool=yes @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o hashmap.lo `test -f 'nca/hashmap.c' || echo '$(srcdir)/'`nca/hashmap.c
+
+sync.lo: nca/sync.c
+@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT sync.lo -MD -MP -MF $(DEPDIR)/sync.Tpo -c -o sync.lo `test -f 'nca/sync.c' || echo '$(srcdir)/'`nca/sync.c
+@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/sync.Tpo $(DEPDIR)/sync.Plo
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='nca/sync.c' object='sync.lo' libtool=yes @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o sync.lo `test -f 'nca/sync.c' || echo '$(srcdir)/'`nca/sync.c
+
+util.lo: nca/util.c
+@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT util.lo -MD -MP -MF $(DEPDIR)/util.Tpo -c -o util.lo `test -f 'nca/util.c' || echo '$(srcdir)/'`nca/util.c
+@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/util.Tpo $(DEPDIR)/util.Plo
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='nca/util.c' object='util.lo' libtool=yes @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o util.lo `test -f 'nca/util.c' || echo '$(srcdir)/'`nca/util.c
+
+wrapper.lo: nca/wrapper.c
+@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT wrapper.lo -MD -MP -MF $(DEPDIR)/wrapper.Tpo -c -o wrapper.lo `test -f 'nca/wrapper.c' || echo '$(srcdir)/'`nca/wrapper.c
+@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/wrapper.Tpo $(DEPDIR)/wrapper.Plo
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='nca/wrapper.c' object='wrapper.lo' libtool=yes @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o wrapper.lo `test -f 'nca/wrapper.c' || echo '$(srcdir)/'`nca/wrapper.c
+
+collective_subroutine.lo: nca/collective_subroutine.c
+@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT collective_subroutine.lo -MD -MP -MF $(DEPDIR)/collective_subroutine.Tpo -c -o collective_subroutine.lo `test -f 'nca/collective_subroutine.c' || echo '$(srcdir)/'`nca/collective_subroutine.c
+@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/collective_subroutine.Tpo $(DEPDIR)/collective_subroutine.Plo
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='nca/collective_subroutine.c' object='collective_subroutine.lo' libtool=yes @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o collective_subroutine.lo `test -f 'nca/collective_subroutine.c' || echo '$(srcdir)/'`nca/collective_subroutine.c
+
+shared_memory.lo: nca/shared_memory.c
+@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT shared_memory.lo -MD -MP -MF $(DEPDIR)/shared_memory.Tpo -c -o shared_memory.lo `test -f 'nca/shared_memory.c' || echo '$(srcdir)/'`nca/shared_memory.c
+@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/shared_memory.Tpo $(DEPDIR)/shared_memory.Plo
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='nca/shared_memory.c' object='shared_memory.lo' libtool=yes @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o shared_memory.lo `test -f 'nca/shared_memory.c' || echo '$(srcdir)/'`nca/shared_memory.c
+
+nca_minmax_i1.lo: $(srcdir)/generated/nca_minmax_i1.c
+@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT nca_minmax_i1.lo -MD -MP -MF $(DEPDIR)/nca_minmax_i1.Tpo -c -o nca_minmax_i1.lo `test -f '$(srcdir)/generated/nca_minmax_i1.c' || echo '$(srcdir)/'`$(srcdir)/generated/nca_minmax_i1.c
+@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/nca_minmax_i1.Tpo $(DEPDIR)/nca_minmax_i1.Plo
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$(srcdir)/generated/nca_minmax_i1.c' object='nca_minmax_i1.lo' libtool=yes @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o nca_minmax_i1.lo `test -f '$(srcdir)/generated/nca_minmax_i1.c' || echo '$(srcdir)/'`$(srcdir)/generated/nca_minmax_i1.c
+
+nca_minmax_i2.lo: $(srcdir)/generated/nca_minmax_i2.c
+@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT nca_minmax_i2.lo -MD -MP -MF $(DEPDIR)/nca_minmax_i2.Tpo -c -o nca_minmax_i2.lo `test -f '$(srcdir)/generated/nca_minmax_i2.c' || echo '$(srcdir)/'`$(srcdir)/generated/nca_minmax_i2.c
+@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/nca_minmax_i2.Tpo $(DEPDIR)/nca_minmax_i2.Plo
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$(srcdir)/generated/nca_minmax_i2.c' object='nca_minmax_i2.lo' libtool=yes @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o nca_minmax_i2.lo `test -f '$(srcdir)/generated/nca_minmax_i2.c' || echo '$(srcdir)/'`$(srcdir)/generated/nca_minmax_i2.c
+
+nca_minmax_i4.lo: $(srcdir)/generated/nca_minmax_i4.c
+@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT nca_minmax_i4.lo -MD -MP -MF $(DEPDIR)/nca_minmax_i4.Tpo -c -o nca_minmax_i4.lo `test -f '$(srcdir)/generated/nca_minmax_i4.c' || echo '$(srcdir)/'`$(srcdir)/generated/nca_minmax_i4.c
+@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/nca_minmax_i4.Tpo $(DEPDIR)/nca_minmax_i4.Plo
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$(srcdir)/generated/nca_minmax_i4.c' object='nca_minmax_i4.lo' libtool=yes @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o nca_minmax_i4.lo `test -f '$(srcdir)/generated/nca_minmax_i4.c' || echo '$(srcdir)/'`$(srcdir)/generated/nca_minmax_i4.c
+
+nca_minmax_i8.lo: $(srcdir)/generated/nca_minmax_i8.c
+@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT nca_minmax_i8.lo -MD -MP -MF $(DEPDIR)/nca_minmax_i8.Tpo -c -o nca_minmax_i8.lo `test -f '$(srcdir)/generated/nca_minmax_i8.c' || echo '$(srcdir)/'`$(srcdir)/generated/nca_minmax_i8.c
+@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/nca_minmax_i8.Tpo $(DEPDIR)/nca_minmax_i8.Plo
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$(srcdir)/generated/nca_minmax_i8.c' object='nca_minmax_i8.lo' libtool=yes @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o nca_minmax_i8.lo `test -f '$(srcdir)/generated/nca_minmax_i8.c' || echo '$(srcdir)/'`$(srcdir)/generated/nca_minmax_i8.c
+
+nca_minmax_i16.lo: $(srcdir)/generated/nca_minmax_i16.c
+@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT nca_minmax_i16.lo -MD -MP -MF $(DEPDIR)/nca_minmax_i16.Tpo -c -o nca_minmax_i16.lo `test -f '$(srcdir)/generated/nca_minmax_i16.c' || echo '$(srcdir)/'`$(srcdir)/generated/nca_minmax_i16.c
+@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/nca_minmax_i16.Tpo $(DEPDIR)/nca_minmax_i16.Plo
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$(srcdir)/generated/nca_minmax_i16.c' object='nca_minmax_i16.lo' libtool=yes @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o nca_minmax_i16.lo `test -f '$(srcdir)/generated/nca_minmax_i16.c' || echo '$(srcdir)/'`$(srcdir)/generated/nca_minmax_i16.c
+
+nca_minmax_r4.lo: $(srcdir)/generated/nca_minmax_r4.c
+@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT nca_minmax_r4.lo -MD -MP -MF $(DEPDIR)/nca_minmax_r4.Tpo -c -o nca_minmax_r4.lo `test -f '$(srcdir)/generated/nca_minmax_r4.c' || echo '$(srcdir)/'`$(srcdir)/generated/nca_minmax_r4.c
+@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/nca_minmax_r4.Tpo $(DEPDIR)/nca_minmax_r4.Plo
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$(srcdir)/generated/nca_minmax_r4.c' object='nca_minmax_r4.lo' libtool=yes @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o nca_minmax_r4.lo `test -f '$(srcdir)/generated/nca_minmax_r4.c' || echo '$(srcdir)/'`$(srcdir)/generated/nca_minmax_r4.c
+
+nca_minmax_r8.lo: $(srcdir)/generated/nca_minmax_r8.c
+@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT nca_minmax_r8.lo -MD -MP -MF $(DEPDIR)/nca_minmax_r8.Tpo -c -o nca_minmax_r8.lo `test -f '$(srcdir)/generated/nca_minmax_r8.c' || echo '$(srcdir)/'`$(srcdir)/generated/nca_minmax_r8.c
+@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/nca_minmax_r8.Tpo $(DEPDIR)/nca_minmax_r8.Plo
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$(srcdir)/generated/nca_minmax_r8.c' object='nca_minmax_r8.lo' libtool=yes @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o nca_minmax_r8.lo `test -f '$(srcdir)/generated/nca_minmax_r8.c' || echo '$(srcdir)/'`$(srcdir)/generated/nca_minmax_r8.c
+
+nca_minmax_r10.lo: $(srcdir)/generated/nca_minmax_r10.c
+@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT nca_minmax_r10.lo -MD -MP -MF $(DEPDIR)/nca_minmax_r10.Tpo -c -o nca_minmax_r10.lo `test -f '$(srcdir)/generated/nca_minmax_r10.c' || echo '$(srcdir)/'`$(srcdir)/generated/nca_minmax_r10.c
+@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/nca_minmax_r10.Tpo $(DEPDIR)/nca_minmax_r10.Plo
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$(srcdir)/generated/nca_minmax_r10.c' object='nca_minmax_r10.lo' libtool=yes @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o nca_minmax_r10.lo `test -f '$(srcdir)/generated/nca_minmax_r10.c' || echo '$(srcdir)/'`$(srcdir)/generated/nca_minmax_r10.c
+
+nca_minmax_r16.lo: $(srcdir)/generated/nca_minmax_r16.c
+@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT nca_minmax_r16.lo -MD -MP -MF $(DEPDIR)/nca_minmax_r16.Tpo -c -o nca_minmax_r16.lo `test -f '$(srcdir)/generated/nca_minmax_r16.c' || echo '$(srcdir)/'`$(srcdir)/generated/nca_minmax_r16.c
+@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/nca_minmax_r16.Tpo $(DEPDIR)/nca_minmax_r16.Plo
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$(srcdir)/generated/nca_minmax_r16.c' object='nca_minmax_r16.lo' libtool=yes @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o nca_minmax_r16.lo `test -f '$(srcdir)/generated/nca_minmax_r16.c' || echo '$(srcdir)/'`$(srcdir)/generated/nca_minmax_r16.c
+
+nca_minmax_s1.lo: $(srcdir)/generated/nca_minmax_s1.c
+@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT nca_minmax_s1.lo -MD -MP -MF $(DEPDIR)/nca_minmax_s1.Tpo -c -o nca_minmax_s1.lo `test -f '$(srcdir)/generated/nca_minmax_s1.c' || echo '$(srcdir)/'`$(srcdir)/generated/nca_minmax_s1.c
+@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/nca_minmax_s1.Tpo $(DEPDIR)/nca_minmax_s1.Plo
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$(srcdir)/generated/nca_minmax_s1.c' object='nca_minmax_s1.lo' libtool=yes @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o nca_minmax_s1.lo `test -f '$(srcdir)/generated/nca_minmax_s1.c' || echo '$(srcdir)/'`$(srcdir)/generated/nca_minmax_s1.c
+
+nca_minmax_s4.lo: $(srcdir)/generated/nca_minmax_s4.c
+@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT nca_minmax_s4.lo -MD -MP -MF $(DEPDIR)/nca_minmax_s4.Tpo -c -o nca_minmax_s4.lo `test -f '$(srcdir)/generated/nca_minmax_s4.c' || echo '$(srcdir)/'`$(srcdir)/generated/nca_minmax_s4.c
+@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/nca_minmax_s4.Tpo $(DEPDIR)/nca_minmax_s4.Plo
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$(srcdir)/generated/nca_minmax_s4.c' object='nca_minmax_s4.lo' libtool=yes @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o nca_minmax_s4.lo `test -f '$(srcdir)/generated/nca_minmax_s4.c' || echo '$(srcdir)/'`$(srcdir)/generated/nca_minmax_s4.c
+
.f90.o:
$(AM_V_FC)$(FCCOMPILE) -c -o $@ $<
@@ -6835,7 +7082,7 @@ check: $(BUILT_SOURCES)
$(MAKE) $(AM_MAKEFLAGS) check-am
all-am: Makefile $(LTLIBRARIES) $(DATA) $(HEADERS) config.h all-local
installdirs:
- for dir in "$(DESTDIR)$(cafexeclibdir)" "$(DESTDIR)$(toolexeclibdir)" "$(DESTDIR)$(toolexeclibdir)" "$(DESTDIR)$(gfor_cdir)" "$(DESTDIR)$(fincludedir)"; do \
+ for dir in "$(DESTDIR)$(cafexeclibdir)" "$(DESTDIR)$(mylibdir)" "$(DESTDIR)$(toolexeclibdir)" "$(DESTDIR)$(toolexeclibdir)" "$(DESTDIR)$(gfor_cdir)" "$(DESTDIR)$(fincludedir)"; do \
test -z "$$dir" || $(MKDIR_P) "$$dir"; \
done
install: $(BUILT_SOURCES)
@@ -6873,7 +7120,8 @@ maintainer-clean-generic:
clean: clean-am
clean-am: clean-cafexeclibLTLIBRARIES clean-generic clean-libtool \
- clean-local clean-toolexeclibLTLIBRARIES mostlyclean-am
+ clean-local clean-mylibLTLIBRARIES \
+ clean-toolexeclibLTLIBRARIES mostlyclean-am
distclean: distclean-am
-rm -f $(am__CONFIG_DISTCLEAN_FILES)
@@ -6894,7 +7142,8 @@ info: info-am
info-am:
-install-data-am: install-gfor_cHEADERS install-nodist_fincludeHEADERS
+install-data-am: install-gfor_cHEADERS install-mylibLTLIBRARIES \
+ install-nodist_fincludeHEADERS
install-dvi: install-dvi-am
@@ -6945,14 +7194,14 @@ ps: ps-am
ps-am:
uninstall-am: uninstall-cafexeclibLTLIBRARIES uninstall-gfor_cHEADERS \
- uninstall-nodist_fincludeHEADERS uninstall-toolexeclibDATA \
- uninstall-toolexeclibLTLIBRARIES
+ uninstall-mylibLTLIBRARIES uninstall-nodist_fincludeHEADERS \
+ uninstall-toolexeclibDATA uninstall-toolexeclibLTLIBRARIES
.MAKE: all check install install-am install-strip
.PHONY: CTAGS GTAGS TAGS all all-am all-local am--refresh check \
check-am clean clean-cafexeclibLTLIBRARIES clean-cscope \
- clean-generic clean-libtool clean-local \
+ clean-generic clean-libtool clean-local clean-mylibLTLIBRARIES \
clean-toolexeclibLTLIBRARIES cscope cscopelist-am ctags \
ctags-am distclean distclean-compile distclean-generic \
distclean-hdr distclean-libtool distclean-local distclean-tags \
@@ -6961,16 +7210,17 @@ uninstall-am: uninstall-cafexeclibLTLIBRARIES uninstall-gfor_cHEADERS \
install-dvi install-dvi-am install-exec install-exec-am \
install-exec-local install-gfor_cHEADERS install-html \
install-html-am install-info install-info-am install-man \
- install-nodist_fincludeHEADERS install-pdf install-pdf-am \
- install-ps install-ps-am install-strip install-toolexeclibDATA \
+ install-mylibLTLIBRARIES install-nodist_fincludeHEADERS \
+ install-pdf install-pdf-am install-ps install-ps-am \
+ install-strip install-toolexeclibDATA \
install-toolexeclibLTLIBRARIES installcheck installcheck-am \
installdirs maintainer-clean maintainer-clean-generic \
maintainer-clean-local mostlyclean mostlyclean-compile \
mostlyclean-generic mostlyclean-libtool mostlyclean-local pdf \
pdf-am ps ps-am tags tags-am uninstall uninstall-am \
uninstall-cafexeclibLTLIBRARIES uninstall-gfor_cHEADERS \
- uninstall-nodist_fincludeHEADERS uninstall-toolexeclibDATA \
- uninstall-toolexeclibLTLIBRARIES
+ uninstall-mylibLTLIBRARIES uninstall-nodist_fincludeHEADERS \
+ uninstall-toolexeclibDATA uninstall-toolexeclibLTLIBRARIES
.PRECIOUS: Makefile
@@ -6983,6 +7233,9 @@ uninstall-am: uninstall-cafexeclibLTLIBRARIES uninstall-gfor_cHEADERS \
@LIBGFOR_USE_SYMVER_SUN_TRUE@@LIBGFOR_USE_SYMVER_TRUE@ `echo $(libgfortran_la_LIBADD) | \
@LIBGFOR_USE_SYMVER_SUN_TRUE@@LIBGFOR_USE_SYMVER_TRUE@ sed 's,/\([^/.]*\)\.la,/.libs/\1.a,g'` \
@LIBGFOR_USE_SYMVER_SUN_TRUE@@LIBGFOR_USE_SYMVER_TRUE@ > $@ || (rm -f $@ ; exit 1)
+@LIBGFOR_NATIVE_COARRAY_TRUE@ nca/libcoarraynative.h nca/sync.h shared_memory.h \
+@LIBGFOR_NATIVE_COARRAY_TRUE@ nca/util.h nca/lock.h nca/collective_subroutine.h\
+@LIBGFOR_NATIVE_COARRAY_TRUE@ nca/collective_inline.h
# Turn on vectorization and loop unrolling for matmul.
$(patsubst %.c,%.lo,$(notdir $(i_matmul_c))): AM_CFLAGS += -ffast-math -ftree-vectorize -funroll-loops --param max-unroll-times=4
@@ -7193,6 +7446,12 @@ fpu-target.inc: fpu-target.h $(srcdir)/libgfortran.h
@MAINTAINER_MODE_TRUE@$(gfor_misc_specifics): m4/misc_specifics.m4 m4/head.m4
@MAINTAINER_MODE_TRUE@ $(M4) -Dfile=$@ -I$(srcdir)/m4 misc_specifics.m4 > $@
+@LIBGFOR_NATIVE_COARRAY_TRUE@@MAINTAINER_MODE_TRUE@$(i_nca_minmax_c): m4/nca_minmax.m4 $(I_M4_DEPS)
+@LIBGFOR_NATIVE_COARRAY_TRUE@@MAINTAINER_MODE_TRUE@ $(M4) -Dfile=$@ -I$(srcdir)/m4 nca_minmax.m4 > $@
+
+@LIBGFOR_NATIVE_COARRAY_TRUE@@MAINTAINER_MODE_TRUE@$(i_nca_minmax_s_c): m4/nca-minmax-s.m4 $(I_M4_DEPS)
+@LIBGFOR_NATIVE_COARRAY_TRUE@@MAINTAINER_MODE_TRUE@ $(M4) -Dfile=$@ -I$(srcdir)/m4 nca-minmax-s.m4 > $@
+
# target overrides
-include $(tmake_file)
diff --git a/libgfortran/config.h.in b/libgfortran/config.h.in
index 2d58188..795c9fe 100644
--- a/libgfortran/config.h.in
+++ b/libgfortran/config.h.in
@@ -657,6 +657,15 @@
/* Define to 1 if you have the `powf' function. */
#undef HAVE_POWF
+/* Define to 1 if you have the `pthread_barrierattr_setpshared' function. */
+#undef HAVE_PTHREAD_BARRIERATTR_SETPSHARED
+
+/* Define to 1 if you have the `pthread_condattr_setpshared' function. */
+#undef HAVE_PTHREAD_CONDATTR_SETPSHARED
+
+/* Define to 1 if you have the `pthread_mutexattr_setpshared' function. */
+#undef HAVE_PTHREAD_MUTEXATTR_SETPSHARED
+
/* Define to 1 if the system has the type `ptrdiff_t'. */
#undef HAVE_PTRDIFF_T
@@ -687,6 +696,12 @@
/* Define to 1 if you have the `setmode' function. */
#undef HAVE_SETMODE
+/* Define to 1 if you have the `shm_open' function. */
+#undef HAVE_SHM_OPEN
+
+/* Define to 1 if you have the `shm_unlink' function. */
+#undef HAVE_SHM_UNLINK
+
/* Define to 1 if you have the `sigaction' function. */
#undef HAVE_SIGACTION
diff --git a/libgfortran/configure b/libgfortran/configure
index 99cca96..d703fda 100755
--- a/libgfortran/configure
+++ b/libgfortran/configure
@@ -637,6 +637,11 @@ am__EXEEXT_TRUE
LTLIBOBJS
LIBOBJS
get_gcc_base_ver
+LIBGFOR_NATIVE_COARRAY_FALSE
+LIBGFOR_NATIVE_COARRAY_TRUE
+RT_LIBS
+PTHREAD_LIBS
+PTHREAD_CFLAGS
HAVE_AVX128_FALSE
HAVE_AVX128_TRUE
tmake_file
@@ -783,6 +788,7 @@ infodir
docdir
oldincludedir
includedir
+runstatedir
localstatedir
sharedstatedir
sysconfdir
@@ -874,6 +880,7 @@ datadir='${datarootdir}'
sysconfdir='${prefix}/etc'
sharedstatedir='${prefix}/com'
localstatedir='${prefix}/var'
+runstatedir='${localstatedir}/run'
includedir='${prefix}/include'
oldincludedir='/usr/include'
docdir='${datarootdir}/doc/${PACKAGE_TARNAME}'
@@ -1126,6 +1133,15 @@ do
| -silent | --silent | --silen | --sile | --sil)
silent=yes ;;
+ -runstatedir | --runstatedir | --runstatedi | --runstated \
+ | --runstate | --runstat | --runsta | --runst | --runs \
+ | --run | --ru | --r)
+ ac_prev=runstatedir ;;
+ -runstatedir=* | --runstatedir=* | --runstatedi=* | --runstated=* \
+ | --runstate=* | --runstat=* | --runsta=* | --runst=* | --runs=* \
+ | --run=* | --ru=* | --r=*)
+ runstatedir=$ac_optarg ;;
+
-sbindir | --sbindir | --sbindi | --sbind | --sbin | --sbi | --sb)
ac_prev=sbindir ;;
-sbindir=* | --sbindir=* | --sbindi=* | --sbind=* | --sbin=* \
@@ -1263,7 +1279,7 @@ fi
for ac_var in exec_prefix prefix bindir sbindir libexecdir datarootdir \
datadir sysconfdir sharedstatedir localstatedir includedir \
oldincludedir docdir infodir htmldir dvidir pdfdir psdir \
- libdir localedir mandir
+ libdir localedir mandir runstatedir
do
eval ac_val=\$$ac_var
# Remove trailing slashes.
@@ -1416,6 +1432,7 @@ Fine tuning of the installation directories:
--sysconfdir=DIR read-only single-machine data [PREFIX/etc]
--sharedstatedir=DIR modifiable architecture-independent data [PREFIX/com]
--localstatedir=DIR modifiable single-machine data [PREFIX/var]
+ --runstatedir=DIR modifiable per-process data [LOCALSTATEDIR/run]
--libdir=DIR object code libraries [EPREFIX/lib]
--includedir=DIR C header files [PREFIX/include]
--oldincludedir=DIR C header files for non-gcc [/usr/include]
@@ -16099,7 +16116,7 @@ else
We can't simply define LARGE_OFF_T to be 9223372036854775807,
since some C++ compilers masquerading as C compilers
incorrectly reject 9223372036854775807. */
-#define LARGE_OFF_T (((off_t) 1 << 62) - 1 + ((off_t) 1 << 62))
+#define LARGE_OFF_T ((((off_t) 1 << 31) << 31) - 1 + (((off_t) 1 << 31) << 31))
int off_t_is_large[(LARGE_OFF_T % 2147483629 == 721
&& LARGE_OFF_T % 2147483647 == 1)
? 1 : -1];
@@ -16145,7 +16162,7 @@ else
We can't simply define LARGE_OFF_T to be 9223372036854775807,
since some C++ compilers masquerading as C compilers
incorrectly reject 9223372036854775807. */
-#define LARGE_OFF_T (((off_t) 1 << 62) - 1 + ((off_t) 1 << 62))
+#define LARGE_OFF_T ((((off_t) 1 << 31) << 31) - 1 + (((off_t) 1 << 31) << 31))
int off_t_is_large[(LARGE_OFF_T % 2147483629 == 721
&& LARGE_OFF_T % 2147483647 == 1)
? 1 : -1];
@@ -16169,7 +16186,7 @@ rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
We can't simply define LARGE_OFF_T to be 9223372036854775807,
since some C++ compilers masquerading as C compilers
incorrectly reject 9223372036854775807. */
-#define LARGE_OFF_T (((off_t) 1 << 62) - 1 + ((off_t) 1 << 62))
+#define LARGE_OFF_T ((((off_t) 1 << 31) << 31) - 1 + (((off_t) 1 << 31) << 31))
int off_t_is_large[(LARGE_OFF_T % 2147483629 == 721
&& LARGE_OFF_T % 2147483647 == 1)
? 1 : -1];
@@ -16214,7 +16231,7 @@ else
We can't simply define LARGE_OFF_T to be 9223372036854775807,
since some C++ compilers masquerading as C compilers
incorrectly reject 9223372036854775807. */
-#define LARGE_OFF_T (((off_t) 1 << 62) - 1 + ((off_t) 1 << 62))
+#define LARGE_OFF_T ((((off_t) 1 << 31) << 31) - 1 + (((off_t) 1 << 31) << 31))
int off_t_is_large[(LARGE_OFF_T % 2147483629 == 721
&& LARGE_OFF_T % 2147483647 == 1)
? 1 : -1];
@@ -16238,7 +16255,7 @@ rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
We can't simply define LARGE_OFF_T to be 9223372036854775807,
since some C++ compilers masquerading as C compilers
incorrectly reject 9223372036854775807. */
-#define LARGE_OFF_T (((off_t) 1 << 62) - 1 + ((off_t) 1 << 62))
+#define LARGE_OFF_T ((((off_t) 1 << 31) << 31) - 1 + (((off_t) 1 << 31) << 31))
int off_t_is_large[(LARGE_OFF_T % 2147483629 == 721
&& LARGE_OFF_T % 2147483647 == 1)
? 1 : -1];
@@ -27147,6 +27164,167 @@ rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
CFLAGS="$ac_save_CFLAGS"
+# Tests related to native coarrays
+# Test whether the compiler supports the -pthread option.
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether -pthread is supported" >&5
+$as_echo_n "checking whether -pthread is supported... " >&6; }
+if ${libgfortran_cv_lib_pthread+:} false; then :
+ $as_echo_n "(cached) " >&6
+else
+ CFLAGS_hold=$CFLAGS
+CFLAGS="$CFLAGS -pthread -L../libatomic/.libs"
+cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
+int i;
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"; then :
+ libgfortran_cv_lib_pthread=yes
+else
+ libgfortran_cv_lib_pthread=no
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+CFLAGS=$CFLAGS_hold
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $libgfortran_cv_lib_pthread" >&5
+$as_echo "$libgfortran_cv_lib_pthread" >&6; }
+PTHREAD_CFLAGS=
+if test "$libgfortran_cv_lib_pthread" = yes; then
+ # RISC-V apparently adds -latomic when using -pthread.
+ PTHREAD_CFLAGS="-pthread -L../libatomic/.libs"
+fi
+
+
+# Test for the -lpthread library.
+PTHREAD_LIBS=
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for pthread_create in -lpthread" >&5
+$as_echo_n "checking for pthread_create in -lpthread... " >&6; }
+if ${ac_cv_lib_pthread_pthread_create+:} false; then :
+ $as_echo_n "(cached) " >&6
+else
+ ac_check_lib_save_LIBS=$LIBS
+LIBS="-lpthread $LIBS"
+if test x$gcc_no_link = xyes; then
+ as_fn_error $? "Link tests are not allowed after GCC_NO_EXECUTABLES." "$LINENO" 5
+fi
+cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
+
+/* Override any GCC internal prototype to avoid an error.
+ Use char because int might match the return type of a GCC
+ builtin and then its argument prototype would still apply. */
+#ifdef __cplusplus
+extern "C"
+#endif
+char pthread_create ();
+int
+main ()
+{
+return pthread_create ();
+ ;
+ return 0;
+}
+_ACEOF
+if ac_fn_c_try_link "$LINENO"; then :
+ ac_cv_lib_pthread_pthread_create=yes
+else
+ ac_cv_lib_pthread_pthread_create=no
+fi
+rm -f core conftest.err conftest.$ac_objext \
+ conftest$ac_exeext conftest.$ac_ext
+LIBS=$ac_check_lib_save_LIBS
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_pthread_pthread_create" >&5
+$as_echo "$ac_cv_lib_pthread_pthread_create" >&6; }
+if test "x$ac_cv_lib_pthread_pthread_create" = xyes; then :
+ PTHREAD_LIBS=-lpthread
+fi
+
+
+
+# Test if -lrt is required
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for shm_open in -lrt" >&5
+$as_echo_n "checking for shm_open in -lrt... " >&6; }
+if ${ac_cv_lib_rt_shm_open+:} false; then :
+ $as_echo_n "(cached) " >&6
+else
+ ac_check_lib_save_LIBS=$LIBS
+LIBS="-lrt $LIBS"
+if test x$gcc_no_link = xyes; then
+ as_fn_error $? "Link tests are not allowed after GCC_NO_EXECUTABLES." "$LINENO" 5
+fi
+cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
+
+/* Override any GCC internal prototype to avoid an error.
+ Use char because int might match the return type of a GCC
+ builtin and then its argument prototype would still apply. */
+#ifdef __cplusplus
+extern "C"
+#endif
+char shm_open ();
+int
+main ()
+{
+return shm_open ();
+ ;
+ return 0;
+}
+_ACEOF
+if ac_fn_c_try_link "$LINENO"; then :
+ ac_cv_lib_rt_shm_open=yes
+else
+ ac_cv_lib_rt_shm_open=no
+fi
+rm -f core conftest.err conftest.$ac_objext \
+ conftest$ac_exeext conftest.$ac_ext
+LIBS=$ac_check_lib_save_LIBS
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_rt_shm_open" >&5
+$as_echo "$ac_cv_lib_rt_shm_open" >&6; }
+if test "x$ac_cv_lib_rt_shm_open" = xyes; then :
+ RT_LIBS=-lrt
+fi
+
+
+
+CFLAGS_hold="$CFLAGS"
+CFLAGS="$CFLAGS $PTHREAD_CFLAGS"
+LIBS_hold="$LIBS"
+LIBS="$LIBS $PTHREAD_LIBS $RT_LIBS"
+
+# Find the functions that we need
+for ac_func in pthread_condattr_setpshared pthread_mutexattr_setpshared pthread_barrierattr_setpshared shm_open shm_unlink
+do :
+ as_ac_var=`$as_echo "ac_cv_func_$ac_func" | $as_tr_sh`
+ac_fn_c_check_func "$LINENO" "$ac_func" "$as_ac_var"
+if eval test \"x\$"$as_ac_var"\" = x"yes"; then :
+ cat >>confdefs.h <<_ACEOF
+#define `$as_echo "HAVE_$ac_func" | $as_tr_cpp` 1
+_ACEOF
+ if true; then
+ LIBGFOR_NATIVE_COARRAY_TRUE=
+ LIBGFOR_NATIVE_COARRAY_FALSE='#'
+else
+ LIBGFOR_NATIVE_COARRAY_TRUE='#'
+ LIBGFOR_NATIVE_COARRAY_FALSE=
+fi
+
+else
+ if false; then
+ LIBGFOR_NATIVE_COARRAY_TRUE=
+ LIBGFOR_NATIVE_COARRAY_FALSE='#'
+else
+ LIBGFOR_NATIVE_COARRAY_TRUE='#'
+ LIBGFOR_NATIVE_COARRAY_FALSE=
+fi
+
+fi
+done
+
+
+CFLAGS="$CFLAGS_hold"
+LIBS="$LIBS_hold"
+
# Determine what GCC version number to use in filesystem paths.
get_gcc_base_ver="cat"
@@ -27438,6 +27616,14 @@ if test -z "${HAVE_AVX128_TRUE}" && test -z "${HAVE_AVX128_FALSE}"; then
as_fn_error $? "conditional \"HAVE_AVX128\" was never defined.
Usually this means the macro was only invoked conditionally." "$LINENO" 5
fi
+if test -z "${LIBGFOR_NATIVE_COARRAY_TRUE}" && test -z "${LIBGFOR_NATIVE_COARRAY_FALSE}"; then
+ as_fn_error $? "conditional \"LIBGFOR_NATIVE_COARRAY\" was never defined.
+Usually this means the macro was only invoked conditionally." "$LINENO" 5
+fi
+if test -z "${LIBGFOR_NATIVE_COARRAY_TRUE}" && test -z "${LIBGFOR_NATIVE_COARRAY_FALSE}"; then
+ as_fn_error $? "conditional \"LIBGFOR_NATIVE_COARRAY\" was never defined.
+Usually this means the macro was only invoked conditionally." "$LINENO" 5
+fi
: "${CONFIG_STATUS=./config.status}"
ac_write_fail=0
diff --git a/libgfortran/configure.ac b/libgfortran/configure.ac
index 8961e31..1aac140 100644
--- a/libgfortran/configure.ac
+++ b/libgfortran/configure.ac
@@ -683,6 +683,43 @@ LIBGFOR_CHECK_FMA4
# Check if AVX128 works
LIBGFOR_CHECK_AVX128
+# Tests related to native coarrays
+# Test whether the compiler supports the -pthread option.
+AC_CACHE_CHECK([whether -pthread is supported],
+[libgfortran_cv_lib_pthread],
+[CFLAGS_hold=$CFLAGS
+CFLAGS="$CFLAGS -pthread -L../libatomic/.libs"
+AC_COMPILE_IFELSE([AC_LANG_SOURCE([int i;])],
+[libgfortran_cv_lib_pthread=yes],
+[libgfortran_cv_lib_pthread=no])
+CFLAGS=$CFLAGS_hold])
+PTHREAD_CFLAGS=
+if test "$libgfortran_cv_lib_pthread" = yes; then
+ # RISC-V apparently adds -latomic when using -pthread.
+ PTHREAD_CFLAGS="-pthread -L../libatomic/.libs"
+fi
+AC_SUBST(PTHREAD_CFLAGS)
+
+# Test for the -lpthread library.
+PTHREAD_LIBS=
+AC_CHECK_LIB([pthread], [pthread_create], PTHREAD_LIBS=-lpthread)
+AC_SUBST(PTHREAD_LIBS)
+
+# Test if -lrt is required
+AC_CHECK_LIB([rt], [shm_open], RT_LIBS=-lrt)
+AC_SUBST(RT_LIBS)
+
+CFLAGS_hold="$CFLAGS"
+CFLAGS="$CFLAGS $PTHREAD_CFLAGS"
+LIBS_hold="$LIBS"
+LIBS="$LIBS $PTHREAD_LIBS $RT_LIBS"
+
+# Find the functions that we need
+AC_CHECK_FUNCS([pthread_condattr_setpshared pthread_mutexattr_setpshared pthread_barrierattr_setpshared shm_open shm_unlink],[AM_CONDITIONAL(LIBGFOR_NATIVE_COARRAY,true)],[AM_CONDITIONAL(LIBGFOR_NATIVE_COARRAY,false)])
+
+CFLAGS="$CFLAGS_hold"
+LIBS="$LIBS_hold"
+
# Determine what GCC version number to use in filesystem paths.
GCC_BASE_VER
diff --git a/libgfortran/generated/nca_minmax_i1.c b/libgfortran/generated/nca_minmax_i1.c
new file mode 100644
index 0000000..3bc9a2b
--- /dev/null
+++ b/libgfortran/generated/nca_minmax_i1.c
@@ -0,0 +1,653 @@
+/* Implementation of collective subroutines minmax.
+ Copyright (C) 2020 Free Software Foundation, Inc.
+ Contributed by Thomas Koenig <tkoenig@gcc.gnu.org>.
+
+This file is part of the GNU Fortran runtime library (libgfortran).
+
+Libgfortran is free software; you can redistribute it and/or
+modify it under the terms of the GNU General Public
+License as published by the Free Software Foundation; either
+version 3 of the License, or (at your option) any later version.
+
+Libgfortran is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+<http://www.gnu.org/licenses/>. */
+
+
+#include "libgfortran.h"
+
+#if defined (HAVE_GFC_INTEGER_1)
+#include <string.h>
+#include "../nca/libcoarraynative.h"
+#include "../nca/collective_subroutine.h"
+#include "../nca/collective_inline.h"
+
+void nca_collsub_max_scalar_i1 (GFC_INTEGER_1 *obj, int *result_image,
+ int *stat, char *errmsg, index_type errmsg_len);
+export_proto(nca_collsub_max_scalar_i1);
+
+void
+nca_collsub_max_scalar_i1 (GFC_INTEGER_1 *obj, int *result_image,
+ int *stat __attribute__ ((unused)),
+ char *errmsg __attribute__ ((unused)),
+ index_type errmsg_len __attribute__ ((unused)))
+{
+ int cbit = 0;
+ int imoffset;
+ GFC_INTEGER_1 *a, *b;
+ GFC_INTEGER_1 *buffer, *this_image_buf;
+ collsub_iface *ci;
+
+ ci = &local->ci;
+
+ buffer = get_collsub_buf (ci, sizeof(GFC_INTEGER_1) * local->num_images);
+ this_image_buf = buffer + this_image.image_num;
+ *this_image_buf = *obj;
+
+ collsub_sync (ci);
+ for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++)
+ {
+ imoffset = 1 << cbit;
+ if (this_image.image_num + imoffset < local->num_images)
+ {
+ a = this_image_buf;
+ b = this_image_buf + imoffset;
+ if (*b > *a)
+ *a = *b;
+ }
+ collsub_sync (ci);
+ }
+ for ( ; (local->num_images >> cbit) != 0; cbit++)
+ collsub_sync (ci);
+
+ if (!result_image || (*result_image - 1) == this_image.image_num)
+ *obj = *buffer;
+
+ finish_collective_subroutine (ci);
+
+}
+
+void nca_collsub_min_scalar_i1 (GFC_INTEGER_1 *obj, int *result_image,
+ int *stat, char *errmsg, index_type errmsg_len);
+export_proto(nca_collsub_min_scalar_i1);
+
+void
+nca_collsub_min_scalar_i1 (GFC_INTEGER_1 *obj, int *result_image,
+ int *stat __attribute__ ((unused)),
+ char *errmsg __attribute__ ((unused)),
+ index_type errmsg_len __attribute__ ((unused)))
+{
+ int cbit = 0;
+ int imoffset;
+ GFC_INTEGER_1 *a, *b;
+ GFC_INTEGER_1 *buffer, *this_image_buf;
+ collsub_iface *ci;
+
+ ci = &local->ci;
+
+ buffer = get_collsub_buf (ci, sizeof(GFC_INTEGER_1) * local->num_images);
+ this_image_buf = buffer + this_image.image_num;
+ *this_image_buf = *obj;
+
+ collsub_sync (ci);
+ for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++)
+ {
+ imoffset = 1 << cbit;
+ if (this_image.image_num + imoffset < local->num_images)
+ {
+ a = this_image_buf;
+ b = this_image_buf + imoffset;
+ if (*b < *a)
+ *a = *b;
+ }
+ collsub_sync (ci);
+ }
+ for ( ; (local->num_images >> cbit) != 0; cbit++)
+ collsub_sync (ci);
+
+ if (!result_image || (*result_image - 1) == this_image.image_num)
+ *obj = *buffer;
+
+ finish_collective_subroutine (ci);
+
+}
+
+void nca_collsub_sum_scalar_i1 (GFC_INTEGER_1 *obj, int *result_image,
+ int *stat, char *errmsg, index_type errmsg_len);
+export_proto(nca_collsub_sum_scalar_i1);
+
+void
+nca_collsub_sum_scalar_i1 (GFC_INTEGER_1 *obj, int *result_image,
+ int *stat __attribute__ ((unused)),
+ char *errmsg __attribute__ ((unused)),
+ index_type errmsg_len __attribute__ ((unused)))
+{
+ int cbit = 0;
+ int imoffset;
+ GFC_INTEGER_1 *a, *b;
+ GFC_INTEGER_1 *buffer, *this_image_buf;
+ collsub_iface *ci;
+
+ ci = &local->ci;
+
+ buffer = get_collsub_buf (ci, sizeof(GFC_INTEGER_1) * local->num_images);
+ this_image_buf = buffer + this_image.image_num;
+ *this_image_buf = *obj;
+
+ collsub_sync (ci);
+ for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++)
+ {
+ imoffset = 1 << cbit;
+ if (this_image.image_num + imoffset < local->num_images)
+ {
+ a = this_image_buf;
+ b = this_image_buf + imoffset;
+ *a += *b;
+ }
+ collsub_sync (ci);
+ }
+ for ( ; (local->num_images >> cbit) != 0; cbit++)
+ collsub_sync (ci);
+
+ if (!result_image || (*result_image - 1) == this_image.image_num)
+ *obj = *buffer;
+
+ finish_collective_subroutine (ci);
+
+}
+
+void nca_collsub_max_array_i1 (gfc_array_i1 * restrict array, int *result_image,
+ int *stat, char *errmsg, index_type errmsg_len);
+export_proto (nca_collsub_max_array_i1);
+
+void
+nca_collsub_max_array_i1 (gfc_array_i1 * restrict array, int *result_image,
+ int *stat __attribute__ ((unused)),
+ char *errmsg __attribute__ ((unused)),
+ index_type errmsg_len __attribute__ ((unused)))
+{
+ index_type count[GFC_MAX_DIMENSIONS];
+ index_type stride[GFC_MAX_DIMENSIONS];
+ index_type extent[GFC_MAX_DIMENSIONS];
+ GFC_INTEGER_1 *this_shared_ptr; /* Points to the shared memory allocated to this image. */
+ GFC_INTEGER_1 *buffer;
+ index_type dim;
+ bool packed;
+ index_type span;
+ index_type ssize, num_elems;
+ int cbit = 0;
+ int imoffset;
+ collsub_iface *ci;
+
+ ci = &local->ci;
+
+ dim = GFC_DESCRIPTOR_RANK (array);
+ ssize = sizeof (GFC_INTEGER_1);
+ packed = true;
+ span = array->span != 0 ? array->span : (index_type) sizeof (GFC_INTEGER_1);
+ for (index_type n = 0; n < dim; n++)
+ {
+ count[n] = 0;
+ stride[n] = GFC_DESCRIPTOR_STRIDE (array, n) * span;
+ extent[n] = GFC_DESCRIPTOR_EXTENT (array, n);
+
+ /* No-op for an empty array. */
+ if (extent[n] <= 0)
+ return;
+
+ if (ssize != stride[n])
+ packed = false;
+
+ ssize *= extent[n];
+ }
+
+ num_elems = ssize / sizeof (GFC_INTEGER_1);
+
+ buffer = get_collsub_buf (ci, ssize * local->num_images);
+ this_shared_ptr = buffer + this_image.image_num * num_elems;
+
+ if (packed)
+ memcpy (this_shared_ptr, array->base_addr, ssize);
+ else
+ {
+ char *src = (char *) array->base_addr;
+ GFC_INTEGER_1 *restrict dest = this_shared_ptr;
+ index_type stride0 = stride[0];
+
+ while (src)
+ {
+ /* Copy the data. */
+ *(dest++) = *((GFC_INTEGER_1 *) src);
+ src += stride0;
+ count[0] ++;
+ /* Advance to the next source element. */
+ for (index_type n = 0; count[n] == extent[n] ; )
+ {
+ /* When we get to the end of a dimension, reset it and increment
+ the next dimension. */
+ count[n] = 0;
+ src -= stride[n] * extent[n];
+ n++;
+ if (n == dim)
+ {
+ src = NULL;
+ break;
+ }
+ else
+ {
+ count[n]++;
+ src += stride[n];
+ }
+ }
+ }
+ }
+
+ collsub_sync (ci);
+
+ /* Reduce the array to image zero. Here the general scheme:
+
+ abababababab
+ a_b_a_b_a_b_
+ a___b___a___
+ a_______b___
+ r___________
+ */
+ for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++)
+ {
+ imoffset = 1 << cbit;
+ if (this_image.image_num + imoffset < local->num_images)
+ {
+ GFC_INTEGER_1 * other_shared_ptr; /* Points to the shared memory
+ allocated to another image. */
+ GFC_INTEGER_1 *a;
+ GFC_INTEGER_1 *b;
+
+ other_shared_ptr = this_shared_ptr + num_elems * imoffset;
+ for (index_type i = 0; i < num_elems; i++)
+ {
+ a = this_shared_ptr + i;
+ b = other_shared_ptr + i;
+ if (*b > *a)
+ *a = *b;
+ }
+ }
+ collsub_sync (ci);
+ }
+ for ( ; (local->num_images >> cbit) != 0; cbit++)
+ collsub_sync (ci);
+
+ if (!result_image || (*result_image - 1) == this_image.image_num)
+ {
+ if (packed)
+ memcpy (array->base_addr, buffer, ssize);
+ else
+ {
+ GFC_INTEGER_1 *src = buffer;
+ char * restrict dest = (char *) array->base_addr;
+ index_type stride0 = stride[0];
+
+ for (index_type n = 0; n < dim; n++)
+ count[n] = 0;
+
+ while (dest)
+ {
+ *((GFC_INTEGER_1 * ) dest) = *src++;
+ dest += stride0;
+ count[0] ++;
+ for (index_type n = 0; count[n] == extent[n] ;)
+ {
+ /* When we get to the end of a dimension, reset it and increment
+ the next dimension. */
+ count[n] = 0;
+ dest -= stride[n] * extent[n];
+ n++;
+ if (n == dim)
+ {
+ dest = NULL;
+ break;
+ }
+ else
+ {
+ count[n]++;
+ dest += stride[n];
+ }
+ }
+ }
+ }
+ }
+ finish_collective_subroutine (ci);
+}
+void nca_collsub_min_array_i1 (gfc_array_i1 * restrict array, int *result_image,
+ int *stat, char *errmsg, index_type errmsg_len);
+export_proto (nca_collsub_min_array_i1);
+
+void
+nca_collsub_min_array_i1 (gfc_array_i1 * restrict array, int *result_image,
+ int *stat __attribute__ ((unused)),
+ char *errmsg __attribute__ ((unused)),
+ index_type errmsg_len __attribute__ ((unused)))
+{
+ index_type count[GFC_MAX_DIMENSIONS];
+ index_type stride[GFC_MAX_DIMENSIONS];
+ index_type extent[GFC_MAX_DIMENSIONS];
+ GFC_INTEGER_1 *this_shared_ptr; /* Points to the shared memory allocated to this image. */
+ GFC_INTEGER_1 *buffer;
+ index_type dim;
+ bool packed;
+ index_type span;
+ index_type ssize, num_elems;
+ int cbit = 0;
+ int imoffset;
+ collsub_iface *ci;
+
+ ci = &local->ci;
+
+ dim = GFC_DESCRIPTOR_RANK (array);
+ ssize = sizeof (GFC_INTEGER_1);
+ packed = true;
+ span = array->span != 0 ? array->span : (index_type) sizeof (GFC_INTEGER_1);
+ for (index_type n = 0; n < dim; n++)
+ {
+ count[n] = 0;
+ stride[n] = GFC_DESCRIPTOR_STRIDE (array, n) * span;
+ extent[n] = GFC_DESCRIPTOR_EXTENT (array, n);
+
+ /* No-op for an empty array. */
+ if (extent[n] <= 0)
+ return;
+
+ if (ssize != stride[n])
+ packed = false;
+
+ ssize *= extent[n];
+ }
+
+ num_elems = ssize / sizeof (GFC_INTEGER_1);
+
+ buffer = get_collsub_buf (ci, ssize * local->num_images);
+ this_shared_ptr = buffer + this_image.image_num * num_elems;
+
+ if (packed)
+ memcpy (this_shared_ptr, array->base_addr, ssize);
+ else
+ {
+ char *src = (char *) array->base_addr;
+ GFC_INTEGER_1 *restrict dest = this_shared_ptr;
+ index_type stride0 = stride[0];
+
+ while (src)
+ {
+ /* Copy the data. */
+ *(dest++) = *((GFC_INTEGER_1 *) src);
+ src += stride0;
+ count[0] ++;
+ /* Advance to the next source element. */
+ for (index_type n = 0; count[n] == extent[n] ; )
+ {
+ /* When we get to the end of a dimension, reset it and increment
+ the next dimension. */
+ count[n] = 0;
+ src -= stride[n] * extent[n];
+ n++;
+ if (n == dim)
+ {
+ src = NULL;
+ break;
+ }
+ else
+ {
+ count[n]++;
+ src += stride[n];
+ }
+ }
+ }
+ }
+
+ collsub_sync (ci);
+
+ /* Reduce the array to image zero. Here the general scheme:
+
+ abababababab
+ a_b_a_b_a_b_
+ a___b___a___
+ a_______b___
+ r___________
+ */
+ for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++)
+ {
+ imoffset = 1 << cbit;
+ if (this_image.image_num + imoffset < local->num_images)
+ {
+ GFC_INTEGER_1 * other_shared_ptr; /* Points to the shared memory
+ allocated to another image. */
+ GFC_INTEGER_1 *a;
+ GFC_INTEGER_1 *b;
+
+ other_shared_ptr = this_shared_ptr + num_elems * imoffset;
+ for (index_type i = 0; i < num_elems; i++)
+ {
+ a = this_shared_ptr + i;
+ b = other_shared_ptr + i;
+ if (*b < *a)
+ *a = *b;
+ }
+ }
+ collsub_sync (ci);
+ }
+ for ( ; (local->num_images >> cbit) != 0; cbit++)
+ collsub_sync (ci);
+
+ if (!result_image || (*result_image - 1) == this_image.image_num)
+ {
+ if (packed)
+ memcpy (array->base_addr, buffer, ssize);
+ else
+ {
+ GFC_INTEGER_1 *src = buffer;
+ char * restrict dest = (char *) array->base_addr;
+ index_type stride0 = stride[0];
+
+ for (index_type n = 0; n < dim; n++)
+ count[n] = 0;
+
+ while (dest)
+ {
+ *((GFC_INTEGER_1 * ) dest) = *src++;
+ dest += stride0;
+ count[0] ++;
+ for (index_type n = 0; count[n] == extent[n] ;)
+ {
+ /* When we get to the end of a dimension, reset it and increment
+ the next dimension. */
+ count[n] = 0;
+ dest -= stride[n] * extent[n];
+ n++;
+ if (n == dim)
+ {
+ dest = NULL;
+ break;
+ }
+ else
+ {
+ count[n]++;
+ dest += stride[n];
+ }
+ }
+ }
+ }
+ }
+ finish_collective_subroutine (ci);
+}
+void nca_collsub_sum_array_i1 (gfc_array_i1 * restrict array, int *result_image,
+ int *stat, char *errmsg, index_type errmsg_len);
+export_proto (nca_collsub_sum_array_i1);
+
+void
+nca_collsub_sum_array_i1 (gfc_array_i1 * restrict array, int *result_image,
+ int *stat __attribute__ ((unused)),
+ char *errmsg __attribute__ ((unused)),
+ index_type errmsg_len __attribute__ ((unused)))
+{
+ index_type count[GFC_MAX_DIMENSIONS];
+ index_type stride[GFC_MAX_DIMENSIONS];
+ index_type extent[GFC_MAX_DIMENSIONS];
+ GFC_INTEGER_1 *this_shared_ptr; /* Points to the shared memory allocated to this image. */
+ GFC_INTEGER_1 *buffer;
+ index_type dim;
+ bool packed;
+ index_type span;
+ index_type ssize, num_elems;
+ int cbit = 0;
+ int imoffset;
+ collsub_iface *ci;
+
+ ci = &local->ci;
+
+ dim = GFC_DESCRIPTOR_RANK (array);
+ ssize = sizeof (GFC_INTEGER_1);
+ packed = true;
+ span = array->span != 0 ? array->span : (index_type) sizeof (GFC_INTEGER_1);
+ for (index_type n = 0; n < dim; n++)
+ {
+ count[n] = 0;
+ stride[n] = GFC_DESCRIPTOR_STRIDE (array, n) * span;
+ extent[n] = GFC_DESCRIPTOR_EXTENT (array, n);
+
+ /* No-op for an empty array. */
+ if (extent[n] <= 0)
+ return;
+
+ if (ssize != stride[n])
+ packed = false;
+
+ ssize *= extent[n];
+ }
+
+ num_elems = ssize / sizeof (GFC_INTEGER_1);
+
+ buffer = get_collsub_buf (ci, ssize * local->num_images);
+ this_shared_ptr = buffer + this_image.image_num * num_elems;
+
+ if (packed)
+ memcpy (this_shared_ptr, array->base_addr, ssize);
+ else
+ {
+ char *src = (char *) array->base_addr;
+ GFC_INTEGER_1 *restrict dest = this_shared_ptr;
+ index_type stride0 = stride[0];
+
+ while (src)
+ {
+ /* Copy the data. */
+ *(dest++) = *((GFC_INTEGER_1 *) src);
+ src += stride0;
+ count[0] ++;
+ /* Advance to the next source element. */
+ for (index_type n = 0; count[n] == extent[n] ; )
+ {
+ /* When we get to the end of a dimension, reset it and increment
+ the next dimension. */
+ count[n] = 0;
+ src -= stride[n] * extent[n];
+ n++;
+ if (n == dim)
+ {
+ src = NULL;
+ break;
+ }
+ else
+ {
+ count[n]++;
+ src += stride[n];
+ }
+ }
+ }
+ }
+
+ collsub_sync (ci);
+
+ /* Reduce the array to image zero. Here the general scheme:
+
+ abababababab
+ a_b_a_b_a_b_
+ a___b___a___
+ a_______b___
+ r___________
+ */
+ for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++)
+ {
+ imoffset = 1 << cbit;
+ if (this_image.image_num + imoffset < local->num_images)
+ {
+ GFC_INTEGER_1 * other_shared_ptr; /* Points to the shared memory
+ allocated to another image. */
+ GFC_INTEGER_1 *a;
+ GFC_INTEGER_1 *b;
+
+ other_shared_ptr = this_shared_ptr + num_elems * imoffset;
+ for (index_type i = 0; i < num_elems; i++)
+ {
+ a = this_shared_ptr + i;
+ b = other_shared_ptr + i;
+ *a += *b;
+ }
+ }
+ collsub_sync (ci);
+ }
+ for ( ; (local->num_images >> cbit) != 0; cbit++)
+ collsub_sync (ci);
+
+ if (!result_image || (*result_image - 1) == this_image.image_num)
+ {
+ if (packed)
+ memcpy (array->base_addr, buffer, ssize);
+ else
+ {
+ GFC_INTEGER_1 *src = buffer;
+ char * restrict dest = (char *) array->base_addr;
+ index_type stride0 = stride[0];
+
+ for (index_type n = 0; n < dim; n++)
+ count[n] = 0;
+
+ while (dest)
+ {
+ *((GFC_INTEGER_1 * ) dest) = *src++;
+ dest += stride0;
+ count[0] ++;
+ for (index_type n = 0; count[n] == extent[n] ;)
+ {
+ /* When we get to the end of a dimension, reset it and increment
+ the next dimension. */
+ count[n] = 0;
+ dest -= stride[n] * extent[n];
+ n++;
+ if (n == dim)
+ {
+ dest = NULL;
+ break;
+ }
+ else
+ {
+ count[n]++;
+ dest += stride[n];
+ }
+ }
+ }
+ }
+ }
+ finish_collective_subroutine (ci);
+}
+
+#endif
+
diff --git a/libgfortran/generated/nca_minmax_i16.c b/libgfortran/generated/nca_minmax_i16.c
new file mode 100644
index 0000000..8fbb948
--- /dev/null
+++ b/libgfortran/generated/nca_minmax_i16.c
@@ -0,0 +1,653 @@
+/* Implementation of collective subroutines minmax.
+ Copyright (C) 2020 Free Software Foundation, Inc.
+ Contributed by Thomas Koenig <tkoenig@gcc.gnu.org>.
+
+This file is part of the GNU Fortran runtime library (libgfortran).
+
+Libgfortran is free software; you can redistribute it and/or
+modify it under the terms of the GNU General Public
+License as published by the Free Software Foundation; either
+version 3 of the License, or (at your option) any later version.
+
+Libgfortran is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+<http://www.gnu.org/licenses/>. */
+
+
+#include "libgfortran.h"
+
+#if defined (HAVE_GFC_INTEGER_16)
+#include <string.h>
+#include "../nca/libcoarraynative.h"
+#include "../nca/collective_subroutine.h"
+#include "../nca/collective_inline.h"
+
+void nca_collsub_max_scalar_i16 (GFC_INTEGER_16 *obj, int *result_image,
+ int *stat, char *errmsg, index_type errmsg_len);
+export_proto(nca_collsub_max_scalar_i16);
+
+void
+nca_collsub_max_scalar_i16 (GFC_INTEGER_16 *obj, int *result_image,
+ int *stat __attribute__ ((unused)),
+ char *errmsg __attribute__ ((unused)),
+ index_type errmsg_len __attribute__ ((unused)))
+{
+ int cbit = 0;
+ int imoffset;
+ GFC_INTEGER_16 *a, *b;
+ GFC_INTEGER_16 *buffer, *this_image_buf;
+ collsub_iface *ci;
+
+ ci = &local->ci;
+
+ buffer = get_collsub_buf (ci, sizeof(GFC_INTEGER_16) * local->num_images);
+ this_image_buf = buffer + this_image.image_num;
+ *this_image_buf = *obj;
+
+ collsub_sync (ci);
+ for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++)
+ {
+ imoffset = 1 << cbit;
+ if (this_image.image_num + imoffset < local->num_images)
+ {
+ a = this_image_buf;
+ b = this_image_buf + imoffset;
+ if (*b > *a)
+ *a = *b;
+ }
+ collsub_sync (ci);
+ }
+ for ( ; (local->num_images >> cbit) != 0; cbit++)
+ collsub_sync (ci);
+
+ if (!result_image || (*result_image - 1) == this_image.image_num)
+ *obj = *buffer;
+
+ finish_collective_subroutine (ci);
+
+}
+
+void nca_collsub_min_scalar_i16 (GFC_INTEGER_16 *obj, int *result_image,
+ int *stat, char *errmsg, index_type errmsg_len);
+export_proto(nca_collsub_min_scalar_i16);
+
+void
+nca_collsub_min_scalar_i16 (GFC_INTEGER_16 *obj, int *result_image,
+ int *stat __attribute__ ((unused)),
+ char *errmsg __attribute__ ((unused)),
+ index_type errmsg_len __attribute__ ((unused)))
+{
+ int cbit = 0;
+ int imoffset;
+ GFC_INTEGER_16 *a, *b;
+ GFC_INTEGER_16 *buffer, *this_image_buf;
+ collsub_iface *ci;
+
+ ci = &local->ci;
+
+ buffer = get_collsub_buf (ci, sizeof(GFC_INTEGER_16) * local->num_images);
+ this_image_buf = buffer + this_image.image_num;
+ *this_image_buf = *obj;
+
+ collsub_sync (ci);
+ for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++)
+ {
+ imoffset = 1 << cbit;
+ if (this_image.image_num + imoffset < local->num_images)
+ {
+ a = this_image_buf;
+ b = this_image_buf + imoffset;
+ if (*b < *a)
+ *a = *b;
+ }
+ collsub_sync (ci);
+ }
+ for ( ; (local->num_images >> cbit) != 0; cbit++)
+ collsub_sync (ci);
+
+ if (!result_image || (*result_image - 1) == this_image.image_num)
+ *obj = *buffer;
+
+ finish_collective_subroutine (ci);
+
+}
+
+void nca_collsub_sum_scalar_i16 (GFC_INTEGER_16 *obj, int *result_image,
+ int *stat, char *errmsg, index_type errmsg_len);
+export_proto(nca_collsub_sum_scalar_i16);
+
+void
+nca_collsub_sum_scalar_i16 (GFC_INTEGER_16 *obj, int *result_image,
+ int *stat __attribute__ ((unused)),
+ char *errmsg __attribute__ ((unused)),
+ index_type errmsg_len __attribute__ ((unused)))
+{
+ int cbit = 0;
+ int imoffset;
+ GFC_INTEGER_16 *a, *b;
+ GFC_INTEGER_16 *buffer, *this_image_buf;
+ collsub_iface *ci;
+
+ ci = &local->ci;
+
+ buffer = get_collsub_buf (ci, sizeof(GFC_INTEGER_16) * local->num_images);
+ this_image_buf = buffer + this_image.image_num;
+ *this_image_buf = *obj;
+
+ collsub_sync (ci);
+ for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++)
+ {
+ imoffset = 1 << cbit;
+ if (this_image.image_num + imoffset < local->num_images)
+ {
+ a = this_image_buf;
+ b = this_image_buf + imoffset;
+ *a += *b;
+ }
+ collsub_sync (ci);
+ }
+ for ( ; (local->num_images >> cbit) != 0; cbit++)
+ collsub_sync (ci);
+
+ if (!result_image || (*result_image - 1) == this_image.image_num)
+ *obj = *buffer;
+
+ finish_collective_subroutine (ci);
+
+}
+
+void nca_collsub_max_array_i16 (gfc_array_i16 * restrict array, int *result_image,
+ int *stat, char *errmsg, index_type errmsg_len);
+export_proto (nca_collsub_max_array_i16);
+
+void
+nca_collsub_max_array_i16 (gfc_array_i16 * restrict array, int *result_image,
+ int *stat __attribute__ ((unused)),
+ char *errmsg __attribute__ ((unused)),
+ index_type errmsg_len __attribute__ ((unused)))
+{
+ index_type count[GFC_MAX_DIMENSIONS];
+ index_type stride[GFC_MAX_DIMENSIONS];
+ index_type extent[GFC_MAX_DIMENSIONS];
+ GFC_INTEGER_16 *this_shared_ptr; /* Points to the shared memory allocated to this image. */
+ GFC_INTEGER_16 *buffer;
+ index_type dim;
+ bool packed;
+ index_type span;
+ index_type ssize, num_elems;
+ int cbit = 0;
+ int imoffset;
+ collsub_iface *ci;
+
+ ci = &local->ci;
+
+ dim = GFC_DESCRIPTOR_RANK (array);
+ ssize = sizeof (GFC_INTEGER_16);
+ packed = true;
+ span = array->span != 0 ? array->span : (index_type) sizeof (GFC_INTEGER_16);
+ for (index_type n = 0; n < dim; n++)
+ {
+ count[n] = 0;
+ stride[n] = GFC_DESCRIPTOR_STRIDE (array, n) * span;
+ extent[n] = GFC_DESCRIPTOR_EXTENT (array, n);
+
+ /* No-op for an empty array. */
+ if (extent[n] <= 0)
+ return;
+
+ if (ssize != stride[n])
+ packed = false;
+
+ ssize *= extent[n];
+ }
+
+ num_elems = ssize / sizeof (GFC_INTEGER_16);
+
+ buffer = get_collsub_buf (ci, ssize * local->num_images);
+ this_shared_ptr = buffer + this_image.image_num * num_elems;
+
+ if (packed)
+ memcpy (this_shared_ptr, array->base_addr, ssize);
+ else
+ {
+ char *src = (char *) array->base_addr;
+ GFC_INTEGER_16 *restrict dest = this_shared_ptr;
+ index_type stride0 = stride[0];
+
+ while (src)
+ {
+ /* Copy the data. */
+ *(dest++) = *((GFC_INTEGER_16 *) src);
+ src += stride0;
+ count[0] ++;
+ /* Advance to the next source element. */
+ for (index_type n = 0; count[n] == extent[n] ; )
+ {
+ /* When we get to the end of a dimension, reset it and increment
+ the next dimension. */
+ count[n] = 0;
+ src -= stride[n] * extent[n];
+ n++;
+ if (n == dim)
+ {
+ src = NULL;
+ break;
+ }
+ else
+ {
+ count[n]++;
+ src += stride[n];
+ }
+ }
+ }
+ }
+
+ collsub_sync (ci);
+
+ /* Reduce the array to image zero. Here the general scheme:
+
+ abababababab
+ a_b_a_b_a_b_
+ a___b___a___
+ a_______b___
+ r___________
+ */
+ for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++)
+ {
+ imoffset = 1 << cbit;
+ if (this_image.image_num + imoffset < local->num_images)
+ {
+ GFC_INTEGER_16 * other_shared_ptr; /* Points to the shared memory
+ allocated to another image. */
+ GFC_INTEGER_16 *a;
+ GFC_INTEGER_16 *b;
+
+ other_shared_ptr = this_shared_ptr + num_elems * imoffset;
+ for (index_type i = 0; i < num_elems; i++)
+ {
+ a = this_shared_ptr + i;
+ b = other_shared_ptr + i;
+ if (*b > *a)
+ *a = *b;
+ }
+ }
+ collsub_sync (ci);
+ }
+ for ( ; (local->num_images >> cbit) != 0; cbit++)
+ collsub_sync (ci);
+
+ if (!result_image || (*result_image - 1) == this_image.image_num)
+ {
+ if (packed)
+ memcpy (array->base_addr, buffer, ssize);
+ else
+ {
+ GFC_INTEGER_16 *src = buffer;
+ char * restrict dest = (char *) array->base_addr;
+ index_type stride0 = stride[0];
+
+ for (index_type n = 0; n < dim; n++)
+ count[n] = 0;
+
+ while (dest)
+ {
+ *((GFC_INTEGER_16 * ) dest) = *src++;
+ dest += stride0;
+ count[0] ++;
+ for (index_type n = 0; count[n] == extent[n] ;)
+ {
+ /* When we get to the end of a dimension, reset it and increment
+ the next dimension. */
+ count[n] = 0;
+ dest -= stride[n] * extent[n];
+ n++;
+ if (n == dim)
+ {
+ dest = NULL;
+ break;
+ }
+ else
+ {
+ count[n]++;
+ dest += stride[n];
+ }
+ }
+ }
+ }
+ }
+ finish_collective_subroutine (ci);
+}
+void nca_collsub_min_array_i16 (gfc_array_i16 * restrict array, int *result_image,
+ int *stat, char *errmsg, index_type errmsg_len);
+export_proto (nca_collsub_min_array_i16);
+
+void
+nca_collsub_min_array_i16 (gfc_array_i16 * restrict array, int *result_image,
+ int *stat __attribute__ ((unused)),
+ char *errmsg __attribute__ ((unused)),
+ index_type errmsg_len __attribute__ ((unused)))
+{
+ index_type count[GFC_MAX_DIMENSIONS];
+ index_type stride[GFC_MAX_DIMENSIONS];
+ index_type extent[GFC_MAX_DIMENSIONS];
+ GFC_INTEGER_16 *this_shared_ptr; /* Points to the shared memory allocated to this image. */
+ GFC_INTEGER_16 *buffer;
+ index_type dim;
+ bool packed;
+ index_type span;
+ index_type ssize, num_elems;
+ int cbit = 0;
+ int imoffset;
+ collsub_iface *ci;
+
+ ci = &local->ci;
+
+ dim = GFC_DESCRIPTOR_RANK (array);
+ ssize = sizeof (GFC_INTEGER_16);
+ packed = true;
+ span = array->span != 0 ? array->span : (index_type) sizeof (GFC_INTEGER_16);
+ for (index_type n = 0; n < dim; n++)
+ {
+ count[n] = 0;
+ stride[n] = GFC_DESCRIPTOR_STRIDE (array, n) * span;
+ extent[n] = GFC_DESCRIPTOR_EXTENT (array, n);
+
+ /* No-op for an empty array. */
+ if (extent[n] <= 0)
+ return;
+
+ if (ssize != stride[n])
+ packed = false;
+
+ ssize *= extent[n];
+ }
+
+ num_elems = ssize / sizeof (GFC_INTEGER_16);
+
+ buffer = get_collsub_buf (ci, ssize * local->num_images);
+ this_shared_ptr = buffer + this_image.image_num * num_elems;
+
+ if (packed)
+ memcpy (this_shared_ptr, array->base_addr, ssize);
+ else
+ {
+ char *src = (char *) array->base_addr;
+ GFC_INTEGER_16 *restrict dest = this_shared_ptr;
+ index_type stride0 = stride[0];
+
+ while (src)
+ {
+ /* Copy the data. */
+ *(dest++) = *((GFC_INTEGER_16 *) src);
+ src += stride0;
+ count[0] ++;
+ /* Advance to the next source element. */
+ for (index_type n = 0; count[n] == extent[n] ; )
+ {
+ /* When we get to the end of a dimension, reset it and increment
+ the next dimension. */
+ count[n] = 0;
+ src -= stride[n] * extent[n];
+ n++;
+ if (n == dim)
+ {
+ src = NULL;
+ break;
+ }
+ else
+ {
+ count[n]++;
+ src += stride[n];
+ }
+ }
+ }
+ }
+
+ collsub_sync (ci);
+
+ /* Reduce the array to image zero. Here the general scheme:
+
+ abababababab
+ a_b_a_b_a_b_
+ a___b___a___
+ a_______b___
+ r___________
+ */
+ for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++)
+ {
+ imoffset = 1 << cbit;
+ if (this_image.image_num + imoffset < local->num_images)
+ {
+ GFC_INTEGER_16 * other_shared_ptr; /* Points to the shared memory
+ allocated to another image. */
+ GFC_INTEGER_16 *a;
+ GFC_INTEGER_16 *b;
+
+ other_shared_ptr = this_shared_ptr + num_elems * imoffset;
+ for (index_type i = 0; i < num_elems; i++)
+ {
+ a = this_shared_ptr + i;
+ b = other_shared_ptr + i;
+ if (*b < *a)
+ *a = *b;
+ }
+ }
+ collsub_sync (ci);
+ }
+ for ( ; (local->num_images >> cbit) != 0; cbit++)
+ collsub_sync (ci);
+
+ if (!result_image || (*result_image - 1) == this_image.image_num)
+ {
+ if (packed)
+ memcpy (array->base_addr, buffer, ssize);
+ else
+ {
+ GFC_INTEGER_16 *src = buffer;
+ char * restrict dest = (char *) array->base_addr;
+ index_type stride0 = stride[0];
+
+ for (index_type n = 0; n < dim; n++)
+ count[n] = 0;
+
+ while (dest)
+ {
+ *((GFC_INTEGER_16 * ) dest) = *src++;
+ dest += stride0;
+ count[0] ++;
+ for (index_type n = 0; count[n] == extent[n] ;)
+ {
+ /* When we get to the end of a dimension, reset it and increment
+ the next dimension. */
+ count[n] = 0;
+ dest -= stride[n] * extent[n];
+ n++;
+ if (n == dim)
+ {
+ dest = NULL;
+ break;
+ }
+ else
+ {
+ count[n]++;
+ dest += stride[n];
+ }
+ }
+ }
+ }
+ }
+ finish_collective_subroutine (ci);
+}
+void nca_collsub_sum_array_i16 (gfc_array_i16 * restrict array, int *result_image,
+ int *stat, char *errmsg, index_type errmsg_len);
+export_proto (nca_collsub_sum_array_i16);
+
+void
+nca_collsub_sum_array_i16 (gfc_array_i16 * restrict array, int *result_image,
+ int *stat __attribute__ ((unused)),
+ char *errmsg __attribute__ ((unused)),
+ index_type errmsg_len __attribute__ ((unused)))
+{
+ index_type count[GFC_MAX_DIMENSIONS];
+ index_type stride[GFC_MAX_DIMENSIONS];
+ index_type extent[GFC_MAX_DIMENSIONS];
+ GFC_INTEGER_16 *this_shared_ptr; /* Points to the shared memory allocated to this image. */
+ GFC_INTEGER_16 *buffer;
+ index_type dim;
+ bool packed;
+ index_type span;
+ index_type ssize, num_elems;
+ int cbit = 0;
+ int imoffset;
+ collsub_iface *ci;
+
+ ci = &local->ci;
+
+ dim = GFC_DESCRIPTOR_RANK (array);
+ ssize = sizeof (GFC_INTEGER_16);
+ packed = true;
+ span = array->span != 0 ? array->span : (index_type) sizeof (GFC_INTEGER_16);
+ for (index_type n = 0; n < dim; n++)
+ {
+ count[n] = 0;
+ stride[n] = GFC_DESCRIPTOR_STRIDE (array, n) * span;
+ extent[n] = GFC_DESCRIPTOR_EXTENT (array, n);
+
+ /* No-op for an empty array. */
+ if (extent[n] <= 0)
+ return;
+
+ if (ssize != stride[n])
+ packed = false;
+
+ ssize *= extent[n];
+ }
+
+ num_elems = ssize / sizeof (GFC_INTEGER_16);
+
+ buffer = get_collsub_buf (ci, ssize * local->num_images);
+ this_shared_ptr = buffer + this_image.image_num * num_elems;
+
+ if (packed)
+ memcpy (this_shared_ptr, array->base_addr, ssize);
+ else
+ {
+ char *src = (char *) array->base_addr;
+ GFC_INTEGER_16 *restrict dest = this_shared_ptr;
+ index_type stride0 = stride[0];
+
+ while (src)
+ {
+ /* Copy the data. */
+ *(dest++) = *((GFC_INTEGER_16 *) src);
+ src += stride0;
+ count[0] ++;
+ /* Advance to the next source element. */
+ for (index_type n = 0; count[n] == extent[n] ; )
+ {
+ /* When we get to the end of a dimension, reset it and increment
+ the next dimension. */
+ count[n] = 0;
+ src -= stride[n] * extent[n];
+ n++;
+ if (n == dim)
+ {
+ src = NULL;
+ break;
+ }
+ else
+ {
+ count[n]++;
+ src += stride[n];
+ }
+ }
+ }
+ }
+
+ collsub_sync (ci);
+
+ /* Reduce the array to image zero. Here the general scheme:
+
+ abababababab
+ a_b_a_b_a_b_
+ a___b___a___
+ a_______b___
+ r___________
+ */
+ for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++)
+ {
+ imoffset = 1 << cbit;
+ if (this_image.image_num + imoffset < local->num_images)
+ {
+ GFC_INTEGER_16 * other_shared_ptr; /* Points to the shared memory
+ allocated to another image. */
+ GFC_INTEGER_16 *a;
+ GFC_INTEGER_16 *b;
+
+ other_shared_ptr = this_shared_ptr + num_elems * imoffset;
+ for (index_type i = 0; i < num_elems; i++)
+ {
+ a = this_shared_ptr + i;
+ b = other_shared_ptr + i;
+ *a += *b;
+ }
+ }
+ collsub_sync (ci);
+ }
+ for ( ; (local->num_images >> cbit) != 0; cbit++)
+ collsub_sync (ci);
+
+ if (!result_image || (*result_image - 1) == this_image.image_num)
+ {
+ if (packed)
+ memcpy (array->base_addr, buffer, ssize);
+ else
+ {
+ GFC_INTEGER_16 *src = buffer;
+ char * restrict dest = (char *) array->base_addr;
+ index_type stride0 = stride[0];
+
+ for (index_type n = 0; n < dim; n++)
+ count[n] = 0;
+
+ while (dest)
+ {
+ *((GFC_INTEGER_16 * ) dest) = *src++;
+ dest += stride0;
+ count[0] ++;
+ for (index_type n = 0; count[n] == extent[n] ;)
+ {
+ /* When we get to the end of a dimension, reset it and increment
+ the next dimension. */
+ count[n] = 0;
+ dest -= stride[n] * extent[n];
+ n++;
+ if (n == dim)
+ {
+ dest = NULL;
+ break;
+ }
+ else
+ {
+ count[n]++;
+ dest += stride[n];
+ }
+ }
+ }
+ }
+ }
+ finish_collective_subroutine (ci);
+}
+
+#endif
+
diff --git a/libgfortran/generated/nca_minmax_i2.c b/libgfortran/generated/nca_minmax_i2.c
new file mode 100644
index 0000000..61908d6
--- /dev/null
+++ b/libgfortran/generated/nca_minmax_i2.c
@@ -0,0 +1,653 @@
+/* Implementation of collective subroutines minmax.
+ Copyright (C) 2020 Free Software Foundation, Inc.
+ Contributed by Thomas Koenig <tkoenig@gcc.gnu.org>.
+
+This file is part of the GNU Fortran runtime library (libgfortran).
+
+Libgfortran is free software; you can redistribute it and/or
+modify it under the terms of the GNU General Public
+License as published by the Free Software Foundation; either
+version 3 of the License, or (at your option) any later version.
+
+Libgfortran is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+<http://www.gnu.org/licenses/>. */
+
+
+#include "libgfortran.h"
+
+#if defined (HAVE_GFC_INTEGER_2)
+#include <string.h>
+#include "../nca/libcoarraynative.h"
+#include "../nca/collective_subroutine.h"
+#include "../nca/collective_inline.h"
+
+void nca_collsub_max_scalar_i2 (GFC_INTEGER_2 *obj, int *result_image,
+ int *stat, char *errmsg, index_type errmsg_len);
+export_proto(nca_collsub_max_scalar_i2);
+
+void
+nca_collsub_max_scalar_i2 (GFC_INTEGER_2 *obj, int *result_image,
+ int *stat __attribute__ ((unused)),
+ char *errmsg __attribute__ ((unused)),
+ index_type errmsg_len __attribute__ ((unused)))
+{
+ int cbit = 0;
+ int imoffset;
+ GFC_INTEGER_2 *a, *b;
+ GFC_INTEGER_2 *buffer, *this_image_buf;
+ collsub_iface *ci;
+
+ ci = &local->ci;
+
+ buffer = get_collsub_buf (ci, sizeof(GFC_INTEGER_2) * local->num_images);
+ this_image_buf = buffer + this_image.image_num;
+ *this_image_buf = *obj;
+
+ collsub_sync (ci);
+ for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++)
+ {
+ imoffset = 1 << cbit;
+ if (this_image.image_num + imoffset < local->num_images)
+ {
+ a = this_image_buf;
+ b = this_image_buf + imoffset;
+ if (*b > *a)
+ *a = *b;
+ }
+ collsub_sync (ci);
+ }
+ for ( ; (local->num_images >> cbit) != 0; cbit++)
+ collsub_sync (ci);
+
+ if (!result_image || (*result_image - 1) == this_image.image_num)
+ *obj = *buffer;
+
+ finish_collective_subroutine (ci);
+
+}
+
+void nca_collsub_min_scalar_i2 (GFC_INTEGER_2 *obj, int *result_image,
+ int *stat, char *errmsg, index_type errmsg_len);
+export_proto(nca_collsub_min_scalar_i2);
+
+void
+nca_collsub_min_scalar_i2 (GFC_INTEGER_2 *obj, int *result_image,
+ int *stat __attribute__ ((unused)),
+ char *errmsg __attribute__ ((unused)),
+ index_type errmsg_len __attribute__ ((unused)))
+{
+ int cbit = 0;
+ int imoffset;
+ GFC_INTEGER_2 *a, *b;
+ GFC_INTEGER_2 *buffer, *this_image_buf;
+ collsub_iface *ci;
+
+ ci = &local->ci;
+
+ buffer = get_collsub_buf (ci, sizeof(GFC_INTEGER_2) * local->num_images);
+ this_image_buf = buffer + this_image.image_num;
+ *this_image_buf = *obj;
+
+ collsub_sync (ci);
+ for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++)
+ {
+ imoffset = 1 << cbit;
+ if (this_image.image_num + imoffset < local->num_images)
+ {
+ a = this_image_buf;
+ b = this_image_buf + imoffset;
+ if (*b < *a)
+ *a = *b;
+ }
+ collsub_sync (ci);
+ }
+ for ( ; (local->num_images >> cbit) != 0; cbit++)
+ collsub_sync (ci);
+
+ if (!result_image || (*result_image - 1) == this_image.image_num)
+ *obj = *buffer;
+
+ finish_collective_subroutine (ci);
+
+}
+
+void nca_collsub_sum_scalar_i2 (GFC_INTEGER_2 *obj, int *result_image,
+ int *stat, char *errmsg, index_type errmsg_len);
+export_proto(nca_collsub_sum_scalar_i2);
+
+void
+nca_collsub_sum_scalar_i2 (GFC_INTEGER_2 *obj, int *result_image,
+ int *stat __attribute__ ((unused)),
+ char *errmsg __attribute__ ((unused)),
+ index_type errmsg_len __attribute__ ((unused)))
+{
+ int cbit = 0;
+ int imoffset;
+ GFC_INTEGER_2 *a, *b;
+ GFC_INTEGER_2 *buffer, *this_image_buf;
+ collsub_iface *ci;
+
+ ci = &local->ci;
+
+ buffer = get_collsub_buf (ci, sizeof(GFC_INTEGER_2) * local->num_images);
+ this_image_buf = buffer + this_image.image_num;
+ *this_image_buf = *obj;
+
+ collsub_sync (ci);
+ for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++)
+ {
+ imoffset = 1 << cbit;
+ if (this_image.image_num + imoffset < local->num_images)
+ {
+ a = this_image_buf;
+ b = this_image_buf + imoffset;
+ *a += *b;
+ }
+ collsub_sync (ci);
+ }
+ for ( ; (local->num_images >> cbit) != 0; cbit++)
+ collsub_sync (ci);
+
+ if (!result_image || (*result_image - 1) == this_image.image_num)
+ *obj = *buffer;
+
+ finish_collective_subroutine (ci);
+
+}
+
+void nca_collsub_max_array_i2 (gfc_array_i2 * restrict array, int *result_image,
+ int *stat, char *errmsg, index_type errmsg_len);
+export_proto (nca_collsub_max_array_i2);
+
+void
+nca_collsub_max_array_i2 (gfc_array_i2 * restrict array, int *result_image,
+ int *stat __attribute__ ((unused)),
+ char *errmsg __attribute__ ((unused)),
+ index_type errmsg_len __attribute__ ((unused)))
+{
+ index_type count[GFC_MAX_DIMENSIONS];
+ index_type stride[GFC_MAX_DIMENSIONS];
+ index_type extent[GFC_MAX_DIMENSIONS];
+ GFC_INTEGER_2 *this_shared_ptr; /* Points to the shared memory allocated to this image. */
+ GFC_INTEGER_2 *buffer;
+ index_type dim;
+ bool packed;
+ index_type span;
+ index_type ssize, num_elems;
+ int cbit = 0;
+ int imoffset;
+ collsub_iface *ci;
+
+ ci = &local->ci;
+
+ dim = GFC_DESCRIPTOR_RANK (array);
+ ssize = sizeof (GFC_INTEGER_2);
+ packed = true;
+ span = array->span != 0 ? array->span : (index_type) sizeof (GFC_INTEGER_2);
+ for (index_type n = 0; n < dim; n++)
+ {
+ count[n] = 0;
+ stride[n] = GFC_DESCRIPTOR_STRIDE (array, n) * span;
+ extent[n] = GFC_DESCRIPTOR_EXTENT (array, n);
+
+ /* No-op for an empty array. */
+ if (extent[n] <= 0)
+ return;
+
+ if (ssize != stride[n])
+ packed = false;
+
+ ssize *= extent[n];
+ }
+
+ num_elems = ssize / sizeof (GFC_INTEGER_2);
+
+ buffer = get_collsub_buf (ci, ssize * local->num_images);
+ this_shared_ptr = buffer + this_image.image_num * num_elems;
+
+ if (packed)
+ memcpy (this_shared_ptr, array->base_addr, ssize);
+ else
+ {
+ char *src = (char *) array->base_addr;
+ GFC_INTEGER_2 *restrict dest = this_shared_ptr;
+ index_type stride0 = stride[0];
+
+ while (src)
+ {
+ /* Copy the data. */
+ *(dest++) = *((GFC_INTEGER_2 *) src);
+ src += stride0;
+ count[0] ++;
+ /* Advance to the next source element. */
+ for (index_type n = 0; count[n] == extent[n] ; )
+ {
+ /* When we get to the end of a dimension, reset it and increment
+ the next dimension. */
+ count[n] = 0;
+ src -= stride[n] * extent[n];
+ n++;
+ if (n == dim)
+ {
+ src = NULL;
+ break;
+ }
+ else
+ {
+ count[n]++;
+ src += stride[n];
+ }
+ }
+ }
+ }
+
+ collsub_sync (ci);
+
+ /* Reduce the array to image zero. Here the general scheme:
+
+ abababababab
+ a_b_a_b_a_b_
+ a___b___a___
+ a_______b___
+ r___________
+ */
+ for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++)
+ {
+ imoffset = 1 << cbit;
+ if (this_image.image_num + imoffset < local->num_images)
+ {
+ GFC_INTEGER_2 * other_shared_ptr; /* Points to the shared memory
+ allocated to another image. */
+ GFC_INTEGER_2 *a;
+ GFC_INTEGER_2 *b;
+
+ other_shared_ptr = this_shared_ptr + num_elems * imoffset;
+ for (index_type i = 0; i < num_elems; i++)
+ {
+ a = this_shared_ptr + i;
+ b = other_shared_ptr + i;
+ if (*b > *a)
+ *a = *b;
+ }
+ }
+ collsub_sync (ci);
+ }
+ for ( ; (local->num_images >> cbit) != 0; cbit++)
+ collsub_sync (ci);
+
+ if (!result_image || (*result_image - 1) == this_image.image_num)
+ {
+ if (packed)
+ memcpy (array->base_addr, buffer, ssize);
+ else
+ {
+ GFC_INTEGER_2 *src = buffer;
+ char * restrict dest = (char *) array->base_addr;
+ index_type stride0 = stride[0];
+
+ for (index_type n = 0; n < dim; n++)
+ count[n] = 0;
+
+ while (dest)
+ {
+ *((GFC_INTEGER_2 * ) dest) = *src++;
+ dest += stride0;
+ count[0] ++;
+ for (index_type n = 0; count[n] == extent[n] ;)
+ {
+ /* When we get to the end of a dimension, reset it and increment
+ the next dimension. */
+ count[n] = 0;
+ dest -= stride[n] * extent[n];
+ n++;
+ if (n == dim)
+ {
+ dest = NULL;
+ break;
+ }
+ else
+ {
+ count[n]++;
+ dest += stride[n];
+ }
+ }
+ }
+ }
+ }
+ finish_collective_subroutine (ci);
+}
+void nca_collsub_min_array_i2 (gfc_array_i2 * restrict array, int *result_image,
+ int *stat, char *errmsg, index_type errmsg_len);
+export_proto (nca_collsub_min_array_i2);
+
+void
+nca_collsub_min_array_i2 (gfc_array_i2 * restrict array, int *result_image,
+ int *stat __attribute__ ((unused)),
+ char *errmsg __attribute__ ((unused)),
+ index_type errmsg_len __attribute__ ((unused)))
+{
+ index_type count[GFC_MAX_DIMENSIONS];
+ index_type stride[GFC_MAX_DIMENSIONS];
+ index_type extent[GFC_MAX_DIMENSIONS];
+ GFC_INTEGER_2 *this_shared_ptr; /* Points to the shared memory allocated to this image. */
+ GFC_INTEGER_2 *buffer;
+ index_type dim;
+ bool packed;
+ index_type span;
+ index_type ssize, num_elems;
+ int cbit = 0;
+ int imoffset;
+ collsub_iface *ci;
+
+ ci = &local->ci;
+
+ dim = GFC_DESCRIPTOR_RANK (array);
+ ssize = sizeof (GFC_INTEGER_2);
+ packed = true;
+ span = array->span != 0 ? array->span : (index_type) sizeof (GFC_INTEGER_2);
+ for (index_type n = 0; n < dim; n++)
+ {
+ count[n] = 0;
+ stride[n] = GFC_DESCRIPTOR_STRIDE (array, n) * span;
+ extent[n] = GFC_DESCRIPTOR_EXTENT (array, n);
+
+ /* No-op for an empty array. */
+ if (extent[n] <= 0)
+ return;
+
+ if (ssize != stride[n])
+ packed = false;
+
+ ssize *= extent[n];
+ }
+
+ num_elems = ssize / sizeof (GFC_INTEGER_2);
+
+ buffer = get_collsub_buf (ci, ssize * local->num_images);
+ this_shared_ptr = buffer + this_image.image_num * num_elems;
+
+ if (packed)
+ memcpy (this_shared_ptr, array->base_addr, ssize);
+ else
+ {
+ char *src = (char *) array->base_addr;
+ GFC_INTEGER_2 *restrict dest = this_shared_ptr;
+ index_type stride0 = stride[0];
+
+ while (src)
+ {
+ /* Copy the data. */
+ *(dest++) = *((GFC_INTEGER_2 *) src);
+ src += stride0;
+ count[0] ++;
+ /* Advance to the next source element. */
+ for (index_type n = 0; count[n] == extent[n] ; )
+ {
+ /* When we get to the end of a dimension, reset it and increment
+ the next dimension. */
+ count[n] = 0;
+ src -= stride[n] * extent[n];
+ n++;
+ if (n == dim)
+ {
+ src = NULL;
+ break;
+ }
+ else
+ {
+ count[n]++;
+ src += stride[n];
+ }
+ }
+ }
+ }
+
+ collsub_sync (ci);
+
+ /* Reduce the array to image zero. Here the general scheme:
+
+ abababababab
+ a_b_a_b_a_b_
+ a___b___a___
+ a_______b___
+ r___________
+ */
+ for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++)
+ {
+ imoffset = 1 << cbit;
+ if (this_image.image_num + imoffset < local->num_images)
+ {
+ GFC_INTEGER_2 * other_shared_ptr; /* Points to the shared memory
+ allocated to another image. */
+ GFC_INTEGER_2 *a;
+ GFC_INTEGER_2 *b;
+
+ other_shared_ptr = this_shared_ptr + num_elems * imoffset;
+ for (index_type i = 0; i < num_elems; i++)
+ {
+ a = this_shared_ptr + i;
+ b = other_shared_ptr + i;
+ if (*b < *a)
+ *a = *b;
+ }
+ }
+ collsub_sync (ci);
+ }
+ for ( ; (local->num_images >> cbit) != 0; cbit++)
+ collsub_sync (ci);
+
+ if (!result_image || (*result_image - 1) == this_image.image_num)
+ {
+ if (packed)
+ memcpy (array->base_addr, buffer, ssize);
+ else
+ {
+ GFC_INTEGER_2 *src = buffer;
+ char * restrict dest = (char *) array->base_addr;
+ index_type stride0 = stride[0];
+
+ for (index_type n = 0; n < dim; n++)
+ count[n] = 0;
+
+ while (dest)
+ {
+ *((GFC_INTEGER_2 * ) dest) = *src++;
+ dest += stride0;
+ count[0] ++;
+ for (index_type n = 0; count[n] == extent[n] ;)
+ {
+ /* When we get to the end of a dimension, reset it and increment
+ the next dimension. */
+ count[n] = 0;
+ dest -= stride[n] * extent[n];
+ n++;
+ if (n == dim)
+ {
+ dest = NULL;
+ break;
+ }
+ else
+ {
+ count[n]++;
+ dest += stride[n];
+ }
+ }
+ }
+ }
+ }
+ finish_collective_subroutine (ci);
+}
+void nca_collsub_sum_array_i2 (gfc_array_i2 * restrict array, int *result_image,
+ int *stat, char *errmsg, index_type errmsg_len);
+export_proto (nca_collsub_sum_array_i2);
+
+void
+nca_collsub_sum_array_i2 (gfc_array_i2 * restrict array, int *result_image,
+ int *stat __attribute__ ((unused)),
+ char *errmsg __attribute__ ((unused)),
+ index_type errmsg_len __attribute__ ((unused)))
+{
+ index_type count[GFC_MAX_DIMENSIONS];
+ index_type stride[GFC_MAX_DIMENSIONS];
+ index_type extent[GFC_MAX_DIMENSIONS];
+ GFC_INTEGER_2 *this_shared_ptr; /* Points to the shared memory allocated to this image. */
+ GFC_INTEGER_2 *buffer;
+ index_type dim;
+ bool packed;
+ index_type span;
+ index_type ssize, num_elems;
+ int cbit = 0;
+ int imoffset;
+ collsub_iface *ci;
+
+ ci = &local->ci;
+
+ dim = GFC_DESCRIPTOR_RANK (array);
+ ssize = sizeof (GFC_INTEGER_2);
+ packed = true;
+ span = array->span != 0 ? array->span : (index_type) sizeof (GFC_INTEGER_2);
+ for (index_type n = 0; n < dim; n++)
+ {
+ count[n] = 0;
+ stride[n] = GFC_DESCRIPTOR_STRIDE (array, n) * span;
+ extent[n] = GFC_DESCRIPTOR_EXTENT (array, n);
+
+ /* No-op for an empty array. */
+ if (extent[n] <= 0)
+ return;
+
+ if (ssize != stride[n])
+ packed = false;
+
+ ssize *= extent[n];
+ }
+
+ num_elems = ssize / sizeof (GFC_INTEGER_2);
+
+ buffer = get_collsub_buf (ci, ssize * local->num_images);
+ this_shared_ptr = buffer + this_image.image_num * num_elems;
+
+ if (packed)
+ memcpy (this_shared_ptr, array->base_addr, ssize);
+ else
+ {
+ char *src = (char *) array->base_addr;
+ GFC_INTEGER_2 *restrict dest = this_shared_ptr;
+ index_type stride0 = stride[0];
+
+ while (src)
+ {
+ /* Copy the data. */
+ *(dest++) = *((GFC_INTEGER_2 *) src);
+ src += stride0;
+ count[0] ++;
+ /* Advance to the next source element. */
+ for (index_type n = 0; count[n] == extent[n] ; )
+ {
+ /* When we get to the end of a dimension, reset it and increment
+ the next dimension. */
+ count[n] = 0;
+ src -= stride[n] * extent[n];
+ n++;
+ if (n == dim)
+ {
+ src = NULL;
+ break;
+ }
+ else
+ {
+ count[n]++;
+ src += stride[n];
+ }
+ }
+ }
+ }
+
+ collsub_sync (ci);
+
+ /* Reduce the array to image zero. Here the general scheme:
+
+ abababababab
+ a_b_a_b_a_b_
+ a___b___a___
+ a_______b___
+ r___________
+ */
+ for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++)
+ {
+ imoffset = 1 << cbit;
+ if (this_image.image_num + imoffset < local->num_images)
+ {
+ GFC_INTEGER_2 * other_shared_ptr; /* Points to the shared memory
+ allocated to another image. */
+ GFC_INTEGER_2 *a;
+ GFC_INTEGER_2 *b;
+
+ other_shared_ptr = this_shared_ptr + num_elems * imoffset;
+ for (index_type i = 0; i < num_elems; i++)
+ {
+ a = this_shared_ptr + i;
+ b = other_shared_ptr + i;
+ *a += *b;
+ }
+ }
+ collsub_sync (ci);
+ }
+ for ( ; (local->num_images >> cbit) != 0; cbit++)
+ collsub_sync (ci);
+
+ if (!result_image || (*result_image - 1) == this_image.image_num)
+ {
+ if (packed)
+ memcpy (array->base_addr, buffer, ssize);
+ else
+ {
+ GFC_INTEGER_2 *src = buffer;
+ char * restrict dest = (char *) array->base_addr;
+ index_type stride0 = stride[0];
+
+ for (index_type n = 0; n < dim; n++)
+ count[n] = 0;
+
+ while (dest)
+ {
+ *((GFC_INTEGER_2 * ) dest) = *src++;
+ dest += stride0;
+ count[0] ++;
+ for (index_type n = 0; count[n] == extent[n] ;)
+ {
+ /* When we get to the end of a dimension, reset it and increment
+ the next dimension. */
+ count[n] = 0;
+ dest -= stride[n] * extent[n];
+ n++;
+ if (n == dim)
+ {
+ dest = NULL;
+ break;
+ }
+ else
+ {
+ count[n]++;
+ dest += stride[n];
+ }
+ }
+ }
+ }
+ }
+ finish_collective_subroutine (ci);
+}
+
+#endif
+
diff --git a/libgfortran/generated/nca_minmax_i4.c b/libgfortran/generated/nca_minmax_i4.c
new file mode 100644
index 0000000..5e37586
--- /dev/null
+++ b/libgfortran/generated/nca_minmax_i4.c
@@ -0,0 +1,653 @@
+/* Implementation of collective subroutines minmax.
+ Copyright (C) 2020 Free Software Foundation, Inc.
+ Contributed by Thomas Koenig <tkoenig@gcc.gnu.org>.
+
+This file is part of the GNU Fortran runtime library (libgfortran).
+
+Libgfortran is free software; you can redistribute it and/or
+modify it under the terms of the GNU General Public
+License as published by the Free Software Foundation; either
+version 3 of the License, or (at your option) any later version.
+
+Libgfortran is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+<http://www.gnu.org/licenses/>. */
+
+
+#include "libgfortran.h"
+
+#if defined (HAVE_GFC_INTEGER_4)
+#include <string.h>
+#include "../nca/libcoarraynative.h"
+#include "../nca/collective_subroutine.h"
+#include "../nca/collective_inline.h"
+
+void nca_collsub_max_scalar_i4 (GFC_INTEGER_4 *obj, int *result_image,
+ int *stat, char *errmsg, index_type errmsg_len);
+export_proto(nca_collsub_max_scalar_i4);
+
+void
+nca_collsub_max_scalar_i4 (GFC_INTEGER_4 *obj, int *result_image,
+ int *stat __attribute__ ((unused)),
+ char *errmsg __attribute__ ((unused)),
+ index_type errmsg_len __attribute__ ((unused)))
+{
+ int cbit = 0;
+ int imoffset;
+ GFC_INTEGER_4 *a, *b;
+ GFC_INTEGER_4 *buffer, *this_image_buf;
+ collsub_iface *ci;
+
+ ci = &local->ci;
+
+ buffer = get_collsub_buf (ci, sizeof(GFC_INTEGER_4) * local->num_images);
+ this_image_buf = buffer + this_image.image_num;
+ *this_image_buf = *obj;
+
+ collsub_sync (ci);
+ for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++)
+ {
+ imoffset = 1 << cbit;
+ if (this_image.image_num + imoffset < local->num_images)
+ {
+ a = this_image_buf;
+ b = this_image_buf + imoffset;
+ if (*b > *a)
+ *a = *b;
+ }
+ collsub_sync (ci);
+ }
+ for ( ; (local->num_images >> cbit) != 0; cbit++)
+ collsub_sync (ci);
+
+ if (!result_image || (*result_image - 1) == this_image.image_num)
+ *obj = *buffer;
+
+ finish_collective_subroutine (ci);
+
+}
+
+void nca_collsub_min_scalar_i4 (GFC_INTEGER_4 *obj, int *result_image,
+ int *stat, char *errmsg, index_type errmsg_len);
+export_proto(nca_collsub_min_scalar_i4);
+
+void
+nca_collsub_min_scalar_i4 (GFC_INTEGER_4 *obj, int *result_image,
+ int *stat __attribute__ ((unused)),
+ char *errmsg __attribute__ ((unused)),
+ index_type errmsg_len __attribute__ ((unused)))
+{
+ int cbit = 0;
+ int imoffset;
+ GFC_INTEGER_4 *a, *b;
+ GFC_INTEGER_4 *buffer, *this_image_buf;
+ collsub_iface *ci;
+
+ ci = &local->ci;
+
+ buffer = get_collsub_buf (ci, sizeof(GFC_INTEGER_4) * local->num_images);
+ this_image_buf = buffer + this_image.image_num;
+ *this_image_buf = *obj;
+
+ collsub_sync (ci);
+ for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++)
+ {
+ imoffset = 1 << cbit;
+ if (this_image.image_num + imoffset < local->num_images)
+ {
+ a = this_image_buf;
+ b = this_image_buf + imoffset;
+ if (*b < *a)
+ *a = *b;
+ }
+ collsub_sync (ci);
+ }
+ for ( ; (local->num_images >> cbit) != 0; cbit++)
+ collsub_sync (ci);
+
+ if (!result_image || (*result_image - 1) == this_image.image_num)
+ *obj = *buffer;
+
+ finish_collective_subroutine (ci);
+
+}
+
+void nca_collsub_sum_scalar_i4 (GFC_INTEGER_4 *obj, int *result_image,
+ int *stat, char *errmsg, index_type errmsg_len);
+export_proto(nca_collsub_sum_scalar_i4);
+
+void
+nca_collsub_sum_scalar_i4 (GFC_INTEGER_4 *obj, int *result_image,
+ int *stat __attribute__ ((unused)),
+ char *errmsg __attribute__ ((unused)),
+ index_type errmsg_len __attribute__ ((unused)))
+{
+ int cbit = 0;
+ int imoffset;
+ GFC_INTEGER_4 *a, *b;
+ GFC_INTEGER_4 *buffer, *this_image_buf;
+ collsub_iface *ci;
+
+ ci = &local->ci;
+
+ buffer = get_collsub_buf (ci, sizeof(GFC_INTEGER_4) * local->num_images);
+ this_image_buf = buffer + this_image.image_num;
+ *this_image_buf = *obj;
+
+ collsub_sync (ci);
+ for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++)
+ {
+ imoffset = 1 << cbit;
+ if (this_image.image_num + imoffset < local->num_images)
+ {
+ a = this_image_buf;
+ b = this_image_buf + imoffset;
+ *a += *b;
+ }
+ collsub_sync (ci);
+ }
+ for ( ; (local->num_images >> cbit) != 0; cbit++)
+ collsub_sync (ci);
+
+ if (!result_image || (*result_image - 1) == this_image.image_num)
+ *obj = *buffer;
+
+ finish_collective_subroutine (ci);
+
+}
+
+void nca_collsub_max_array_i4 (gfc_array_i4 * restrict array, int *result_image,
+ int *stat, char *errmsg, index_type errmsg_len);
+export_proto (nca_collsub_max_array_i4);
+
+void
+nca_collsub_max_array_i4 (gfc_array_i4 * restrict array, int *result_image,
+ int *stat __attribute__ ((unused)),
+ char *errmsg __attribute__ ((unused)),
+ index_type errmsg_len __attribute__ ((unused)))
+{
+ index_type count[GFC_MAX_DIMENSIONS];
+ index_type stride[GFC_MAX_DIMENSIONS];
+ index_type extent[GFC_MAX_DIMENSIONS];
+ GFC_INTEGER_4 *this_shared_ptr; /* Points to the shared memory allocated to this image. */
+ GFC_INTEGER_4 *buffer;
+ index_type dim;
+ bool packed;
+ index_type span;
+ index_type ssize, num_elems;
+ int cbit = 0;
+ int imoffset;
+ collsub_iface *ci;
+
+ ci = &local->ci;
+
+ dim = GFC_DESCRIPTOR_RANK (array);
+ ssize = sizeof (GFC_INTEGER_4);
+ packed = true;
+ span = array->span != 0 ? array->span : (index_type) sizeof (GFC_INTEGER_4);
+ for (index_type n = 0; n < dim; n++)
+ {
+ count[n] = 0;
+ stride[n] = GFC_DESCRIPTOR_STRIDE (array, n) * span;
+ extent[n] = GFC_DESCRIPTOR_EXTENT (array, n);
+
+ /* No-op for an empty array. */
+ if (extent[n] <= 0)
+ return;
+
+ if (ssize != stride[n])
+ packed = false;
+
+ ssize *= extent[n];
+ }
+
+ num_elems = ssize / sizeof (GFC_INTEGER_4);
+
+ buffer = get_collsub_buf (ci, ssize * local->num_images);
+ this_shared_ptr = buffer + this_image.image_num * num_elems;
+
+ if (packed)
+ memcpy (this_shared_ptr, array->base_addr, ssize);
+ else
+ {
+ char *src = (char *) array->base_addr;
+ GFC_INTEGER_4 *restrict dest = this_shared_ptr;
+ index_type stride0 = stride[0];
+
+ while (src)
+ {
+ /* Copy the data. */
+ *(dest++) = *((GFC_INTEGER_4 *) src);
+ src += stride0;
+ count[0] ++;
+ /* Advance to the next source element. */
+ for (index_type n = 0; count[n] == extent[n] ; )
+ {
+ /* When we get to the end of a dimension, reset it and increment
+ the next dimension. */
+ count[n] = 0;
+ src -= stride[n] * extent[n];
+ n++;
+ if (n == dim)
+ {
+ src = NULL;
+ break;
+ }
+ else
+ {
+ count[n]++;
+ src += stride[n];
+ }
+ }
+ }
+ }
+
+ collsub_sync (ci);
+
+ /* Reduce the array to image zero. Here the general scheme:
+
+ abababababab
+ a_b_a_b_a_b_
+ a___b___a___
+ a_______b___
+ r___________
+ */
+ for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++)
+ {
+ imoffset = 1 << cbit;
+ if (this_image.image_num + imoffset < local->num_images)
+ {
+ GFC_INTEGER_4 * other_shared_ptr; /* Points to the shared memory
+ allocated to another image. */
+ GFC_INTEGER_4 *a;
+ GFC_INTEGER_4 *b;
+
+ other_shared_ptr = this_shared_ptr + num_elems * imoffset;
+ for (index_type i = 0; i < num_elems; i++)
+ {
+ a = this_shared_ptr + i;
+ b = other_shared_ptr + i;
+ if (*b > *a)
+ *a = *b;
+ }
+ }
+ collsub_sync (ci);
+ }
+ for ( ; (local->num_images >> cbit) != 0; cbit++)
+ collsub_sync (ci);
+
+ if (!result_image || (*result_image - 1) == this_image.image_num)
+ {
+ if (packed)
+ memcpy (array->base_addr, buffer, ssize);
+ else
+ {
+ GFC_INTEGER_4 *src = buffer;
+ char * restrict dest = (char *) array->base_addr;
+ index_type stride0 = stride[0];
+
+ for (index_type n = 0; n < dim; n++)
+ count[n] = 0;
+
+ while (dest)
+ {
+ *((GFC_INTEGER_4 * ) dest) = *src++;
+ dest += stride0;
+ count[0] ++;
+ for (index_type n = 0; count[n] == extent[n] ;)
+ {
+ /* When we get to the end of a dimension, reset it and increment
+ the next dimension. */
+ count[n] = 0;
+ dest -= stride[n] * extent[n];
+ n++;
+ if (n == dim)
+ {
+ dest = NULL;
+ break;
+ }
+ else
+ {
+ count[n]++;
+ dest += stride[n];
+ }
+ }
+ }
+ }
+ }
+ finish_collective_subroutine (ci);
+}
+void nca_collsub_min_array_i4 (gfc_array_i4 * restrict array, int *result_image,
+ int *stat, char *errmsg, index_type errmsg_len);
+export_proto (nca_collsub_min_array_i4);
+
+void
+nca_collsub_min_array_i4 (gfc_array_i4 * restrict array, int *result_image,
+ int *stat __attribute__ ((unused)),
+ char *errmsg __attribute__ ((unused)),
+ index_type errmsg_len __attribute__ ((unused)))
+{
+ index_type count[GFC_MAX_DIMENSIONS];
+ index_type stride[GFC_MAX_DIMENSIONS];
+ index_type extent[GFC_MAX_DIMENSIONS];
+ GFC_INTEGER_4 *this_shared_ptr; /* Points to the shared memory allocated to this image. */
+ GFC_INTEGER_4 *buffer;
+ index_type dim;
+ bool packed;
+ index_type span;
+ index_type ssize, num_elems;
+ int cbit = 0;
+ int imoffset;
+ collsub_iface *ci;
+
+ ci = &local->ci;
+
+ dim = GFC_DESCRIPTOR_RANK (array);
+ ssize = sizeof (GFC_INTEGER_4);
+ packed = true;
+ span = array->span != 0 ? array->span : (index_type) sizeof (GFC_INTEGER_4);
+ for (index_type n = 0; n < dim; n++)
+ {
+ count[n] = 0;
+ stride[n] = GFC_DESCRIPTOR_STRIDE (array, n) * span;
+ extent[n] = GFC_DESCRIPTOR_EXTENT (array, n);
+
+ /* No-op for an empty array. */
+ if (extent[n] <= 0)
+ return;
+
+ if (ssize != stride[n])
+ packed = false;
+
+ ssize *= extent[n];
+ }
+
+ num_elems = ssize / sizeof (GFC_INTEGER_4);
+
+ buffer = get_collsub_buf (ci, ssize * local->num_images);
+ this_shared_ptr = buffer + this_image.image_num * num_elems;
+
+ if (packed)
+ memcpy (this_shared_ptr, array->base_addr, ssize);
+ else
+ {
+ char *src = (char *) array->base_addr;
+ GFC_INTEGER_4 *restrict dest = this_shared_ptr;
+ index_type stride0 = stride[0];
+
+ while (src)
+ {
+ /* Copy the data. */
+ *(dest++) = *((GFC_INTEGER_4 *) src);
+ src += stride0;
+ count[0] ++;
+ /* Advance to the next source element. */
+ for (index_type n = 0; count[n] == extent[n] ; )
+ {
+ /* When we get to the end of a dimension, reset it and increment
+ the next dimension. */
+ count[n] = 0;
+ src -= stride[n] * extent[n];
+ n++;
+ if (n == dim)
+ {
+ src = NULL;
+ break;
+ }
+ else
+ {
+ count[n]++;
+ src += stride[n];
+ }
+ }
+ }
+ }
+
+ collsub_sync (ci);
+
+ /* Reduce the array to image zero. Here the general scheme:
+
+ abababababab
+ a_b_a_b_a_b_
+ a___b___a___
+ a_______b___
+ r___________
+ */
+ for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++)
+ {
+ imoffset = 1 << cbit;
+ if (this_image.image_num + imoffset < local->num_images)
+ {
+ GFC_INTEGER_4 * other_shared_ptr; /* Points to the shared memory
+ allocated to another image. */
+ GFC_INTEGER_4 *a;
+ GFC_INTEGER_4 *b;
+
+ other_shared_ptr = this_shared_ptr + num_elems * imoffset;
+ for (index_type i = 0; i < num_elems; i++)
+ {
+ a = this_shared_ptr + i;
+ b = other_shared_ptr + i;
+ if (*b < *a)
+ *a = *b;
+ }
+ }
+ collsub_sync (ci);
+ }
+ for ( ; (local->num_images >> cbit) != 0; cbit++)
+ collsub_sync (ci);
+
+ if (!result_image || (*result_image - 1) == this_image.image_num)
+ {
+ if (packed)
+ memcpy (array->base_addr, buffer, ssize);
+ else
+ {
+ GFC_INTEGER_4 *src = buffer;
+ char * restrict dest = (char *) array->base_addr;
+ index_type stride0 = stride[0];
+
+ for (index_type n = 0; n < dim; n++)
+ count[n] = 0;
+
+ while (dest)
+ {
+ *((GFC_INTEGER_4 * ) dest) = *src++;
+ dest += stride0;
+ count[0] ++;
+ for (index_type n = 0; count[n] == extent[n] ;)
+ {
+ /* When we get to the end of a dimension, reset it and increment
+ the next dimension. */
+ count[n] = 0;
+ dest -= stride[n] * extent[n];
+ n++;
+ if (n == dim)
+ {
+ dest = NULL;
+ break;
+ }
+ else
+ {
+ count[n]++;
+ dest += stride[n];
+ }
+ }
+ }
+ }
+ }
+ finish_collective_subroutine (ci);
+}
+void nca_collsub_sum_array_i4 (gfc_array_i4 * restrict array, int *result_image,
+ int *stat, char *errmsg, index_type errmsg_len);
+export_proto (nca_collsub_sum_array_i4);
+
+void
+nca_collsub_sum_array_i4 (gfc_array_i4 * restrict array, int *result_image,
+ int *stat __attribute__ ((unused)),
+ char *errmsg __attribute__ ((unused)),
+ index_type errmsg_len __attribute__ ((unused)))
+{
+ index_type count[GFC_MAX_DIMENSIONS];
+ index_type stride[GFC_MAX_DIMENSIONS];
+ index_type extent[GFC_MAX_DIMENSIONS];
+ GFC_INTEGER_4 *this_shared_ptr; /* Points to the shared memory allocated to this image. */
+ GFC_INTEGER_4 *buffer;
+ index_type dim;
+ bool packed;
+ index_type span;
+ index_type ssize, num_elems;
+ int cbit = 0;
+ int imoffset;
+ collsub_iface *ci;
+
+ ci = &local->ci;
+
+ dim = GFC_DESCRIPTOR_RANK (array);
+ ssize = sizeof (GFC_INTEGER_4);
+ packed = true;
+ span = array->span != 0 ? array->span : (index_type) sizeof (GFC_INTEGER_4);
+ for (index_type n = 0; n < dim; n++)
+ {
+ count[n] = 0;
+ stride[n] = GFC_DESCRIPTOR_STRIDE (array, n) * span;
+ extent[n] = GFC_DESCRIPTOR_EXTENT (array, n);
+
+ /* No-op for an empty array. */
+ if (extent[n] <= 0)
+ return;
+
+ if (ssize != stride[n])
+ packed = false;
+
+ ssize *= extent[n];
+ }
+
+ num_elems = ssize / sizeof (GFC_INTEGER_4);
+
+ buffer = get_collsub_buf (ci, ssize * local->num_images);
+ this_shared_ptr = buffer + this_image.image_num * num_elems;
+
+ if (packed)
+ memcpy (this_shared_ptr, array->base_addr, ssize);
+ else
+ {
+ char *src = (char *) array->base_addr;
+ GFC_INTEGER_4 *restrict dest = this_shared_ptr;
+ index_type stride0 = stride[0];
+
+ while (src)
+ {
+ /* Copy the data. */
+ *(dest++) = *((GFC_INTEGER_4 *) src);
+ src += stride0;
+ count[0] ++;
+ /* Advance to the next source element. */
+ for (index_type n = 0; count[n] == extent[n] ; )
+ {
+ /* When we get to the end of a dimension, reset it and increment
+ the next dimension. */
+ count[n] = 0;
+ src -= stride[n] * extent[n];
+ n++;
+ if (n == dim)
+ {
+ src = NULL;
+ break;
+ }
+ else
+ {
+ count[n]++;
+ src += stride[n];
+ }
+ }
+ }
+ }
+
+ collsub_sync (ci);
+
+ /* Reduce the array to image zero. Here the general scheme:
+
+ abababababab
+ a_b_a_b_a_b_
+ a___b___a___
+ a_______b___
+ r___________
+ */
+ for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++)
+ {
+ imoffset = 1 << cbit;
+ if (this_image.image_num + imoffset < local->num_images)
+ {
+ GFC_INTEGER_4 * other_shared_ptr; /* Points to the shared memory
+ allocated to another image. */
+ GFC_INTEGER_4 *a;
+ GFC_INTEGER_4 *b;
+
+ other_shared_ptr = this_shared_ptr + num_elems * imoffset;
+ for (index_type i = 0; i < num_elems; i++)
+ {
+ a = this_shared_ptr + i;
+ b = other_shared_ptr + i;
+ *a += *b;
+ }
+ }
+ collsub_sync (ci);
+ }
+ for ( ; (local->num_images >> cbit) != 0; cbit++)
+ collsub_sync (ci);
+
+ if (!result_image || (*result_image - 1) == this_image.image_num)
+ {
+ if (packed)
+ memcpy (array->base_addr, buffer, ssize);
+ else
+ {
+ GFC_INTEGER_4 *src = buffer;
+ char * restrict dest = (char *) array->base_addr;
+ index_type stride0 = stride[0];
+
+ for (index_type n = 0; n < dim; n++)
+ count[n] = 0;
+
+ while (dest)
+ {
+ *((GFC_INTEGER_4 * ) dest) = *src++;
+ dest += stride0;
+ count[0] ++;
+ for (index_type n = 0; count[n] == extent[n] ;)
+ {
+ /* When we get to the end of a dimension, reset it and increment
+ the next dimension. */
+ count[n] = 0;
+ dest -= stride[n] * extent[n];
+ n++;
+ if (n == dim)
+ {
+ dest = NULL;
+ break;
+ }
+ else
+ {
+ count[n]++;
+ dest += stride[n];
+ }
+ }
+ }
+ }
+ }
+ finish_collective_subroutine (ci);
+}
+
+#endif
+
diff --git a/libgfortran/generated/nca_minmax_i8.c b/libgfortran/generated/nca_minmax_i8.c
new file mode 100644
index 0000000..b3dc861
--- /dev/null
+++ b/libgfortran/generated/nca_minmax_i8.c
@@ -0,0 +1,653 @@
+/* Implementation of collective subroutines minmax.
+ Copyright (C) 2020 Free Software Foundation, Inc.
+ Contributed by Thomas Koenig <tkoenig@gcc.gnu.org>.
+
+This file is part of the GNU Fortran runtime library (libgfortran).
+
+Libgfortran is free software; you can redistribute it and/or
+modify it under the terms of the GNU General Public
+License as published by the Free Software Foundation; either
+version 3 of the License, or (at your option) any later version.
+
+Libgfortran is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+<http://www.gnu.org/licenses/>. */
+
+
+#include "libgfortran.h"
+
+#if defined (HAVE_GFC_INTEGER_8)
+#include <string.h>
+#include "../nca/libcoarraynative.h"
+#include "../nca/collective_subroutine.h"
+#include "../nca/collective_inline.h"
+
+void nca_collsub_max_scalar_i8 (GFC_INTEGER_8 *obj, int *result_image,
+ int *stat, char *errmsg, index_type errmsg_len);
+export_proto(nca_collsub_max_scalar_i8);
+
+void
+nca_collsub_max_scalar_i8 (GFC_INTEGER_8 *obj, int *result_image,
+ int *stat __attribute__ ((unused)),
+ char *errmsg __attribute__ ((unused)),
+ index_type errmsg_len __attribute__ ((unused)))
+{
+ int cbit = 0;
+ int imoffset;
+ GFC_INTEGER_8 *a, *b;
+ GFC_INTEGER_8 *buffer, *this_image_buf;
+ collsub_iface *ci;
+
+ ci = &local->ci;
+
+ buffer = get_collsub_buf (ci, sizeof(GFC_INTEGER_8) * local->num_images);
+ this_image_buf = buffer + this_image.image_num;
+ *this_image_buf = *obj;
+
+ collsub_sync (ci);
+ for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++)
+ {
+ imoffset = 1 << cbit;
+ if (this_image.image_num + imoffset < local->num_images)
+ {
+ a = this_image_buf;
+ b = this_image_buf + imoffset;
+ if (*b > *a)
+ *a = *b;
+ }
+ collsub_sync (ci);
+ }
+ for ( ; (local->num_images >> cbit) != 0; cbit++)
+ collsub_sync (ci);
+
+ if (!result_image || (*result_image - 1) == this_image.image_num)
+ *obj = *buffer;
+
+ finish_collective_subroutine (ci);
+
+}
+
+void nca_collsub_min_scalar_i8 (GFC_INTEGER_8 *obj, int *result_image,
+ int *stat, char *errmsg, index_type errmsg_len);
+export_proto(nca_collsub_min_scalar_i8);
+
+void
+nca_collsub_min_scalar_i8 (GFC_INTEGER_8 *obj, int *result_image,
+ int *stat __attribute__ ((unused)),
+ char *errmsg __attribute__ ((unused)),
+ index_type errmsg_len __attribute__ ((unused)))
+{
+ int cbit = 0;
+ int imoffset;
+ GFC_INTEGER_8 *a, *b;
+ GFC_INTEGER_8 *buffer, *this_image_buf;
+ collsub_iface *ci;
+
+ ci = &local->ci;
+
+ buffer = get_collsub_buf (ci, sizeof(GFC_INTEGER_8) * local->num_images);
+ this_image_buf = buffer + this_image.image_num;
+ *this_image_buf = *obj;
+
+ collsub_sync (ci);
+ for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++)
+ {
+ imoffset = 1 << cbit;
+ if (this_image.image_num + imoffset < local->num_images)
+ {
+ a = this_image_buf;
+ b = this_image_buf + imoffset;
+ if (*b < *a)
+ *a = *b;
+ }
+ collsub_sync (ci);
+ }
+ for ( ; (local->num_images >> cbit) != 0; cbit++)
+ collsub_sync (ci);
+
+ if (!result_image || (*result_image - 1) == this_image.image_num)
+ *obj = *buffer;
+
+ finish_collective_subroutine (ci);
+
+}
+
+void nca_collsub_sum_scalar_i8 (GFC_INTEGER_8 *obj, int *result_image,
+ int *stat, char *errmsg, index_type errmsg_len);
+export_proto(nca_collsub_sum_scalar_i8);
+
+void
+nca_collsub_sum_scalar_i8 (GFC_INTEGER_8 *obj, int *result_image,
+ int *stat __attribute__ ((unused)),
+ char *errmsg __attribute__ ((unused)),
+ index_type errmsg_len __attribute__ ((unused)))
+{
+ int cbit = 0;
+ int imoffset;
+ GFC_INTEGER_8 *a, *b;
+ GFC_INTEGER_8 *buffer, *this_image_buf;
+ collsub_iface *ci;
+
+ ci = &local->ci;
+
+ buffer = get_collsub_buf (ci, sizeof(GFC_INTEGER_8) * local->num_images);
+ this_image_buf = buffer + this_image.image_num;
+ *this_image_buf = *obj;
+
+ collsub_sync (ci);
+ for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++)
+ {
+ imoffset = 1 << cbit;
+ if (this_image.image_num + imoffset < local->num_images)
+ {
+ a = this_image_buf;
+ b = this_image_buf + imoffset;
+ *a += *b;
+ }
+ collsub_sync (ci);
+ }
+ for ( ; (local->num_images >> cbit) != 0; cbit++)
+ collsub_sync (ci);
+
+ if (!result_image || (*result_image - 1) == this_image.image_num)
+ *obj = *buffer;
+
+ finish_collective_subroutine (ci);
+
+}
+
+void nca_collsub_max_array_i8 (gfc_array_i8 * restrict array, int *result_image,
+ int *stat, char *errmsg, index_type errmsg_len);
+export_proto (nca_collsub_max_array_i8);
+
+void
+nca_collsub_max_array_i8 (gfc_array_i8 * restrict array, int *result_image,
+ int *stat __attribute__ ((unused)),
+ char *errmsg __attribute__ ((unused)),
+ index_type errmsg_len __attribute__ ((unused)))
+{
+ index_type count[GFC_MAX_DIMENSIONS];
+ index_type stride[GFC_MAX_DIMENSIONS];
+ index_type extent[GFC_MAX_DIMENSIONS];
+ GFC_INTEGER_8 *this_shared_ptr; /* Points to the shared memory allocated to this image. */
+ GFC_INTEGER_8 *buffer;
+ index_type dim;
+ bool packed;
+ index_type span;
+ index_type ssize, num_elems;
+ int cbit = 0;
+ int imoffset;
+ collsub_iface *ci;
+
+ ci = &local->ci;
+
+ dim = GFC_DESCRIPTOR_RANK (array);
+ ssize = sizeof (GFC_INTEGER_8);
+ packed = true;
+ span = array->span != 0 ? array->span : (index_type) sizeof (GFC_INTEGER_8);
+ for (index_type n = 0; n < dim; n++)
+ {
+ count[n] = 0;
+ stride[n] = GFC_DESCRIPTOR_STRIDE (array, n) * span;
+ extent[n] = GFC_DESCRIPTOR_EXTENT (array, n);
+
+ /* No-op for an empty array. */
+ if (extent[n] <= 0)
+ return;
+
+ if (ssize != stride[n])
+ packed = false;
+
+ ssize *= extent[n];
+ }
+
+ num_elems = ssize / sizeof (GFC_INTEGER_8);
+
+ buffer = get_collsub_buf (ci, ssize * local->num_images);
+ this_shared_ptr = buffer + this_image.image_num * num_elems;
+
+ if (packed)
+ memcpy (this_shared_ptr, array->base_addr, ssize);
+ else
+ {
+ char *src = (char *) array->base_addr;
+ GFC_INTEGER_8 *restrict dest = this_shared_ptr;
+ index_type stride0 = stride[0];
+
+ while (src)
+ {
+ /* Copy the data. */
+ *(dest++) = *((GFC_INTEGER_8 *) src);
+ src += stride0;
+ count[0] ++;
+ /* Advance to the next source element. */
+ for (index_type n = 0; count[n] == extent[n] ; )
+ {
+ /* When we get to the end of a dimension, reset it and increment
+ the next dimension. */
+ count[n] = 0;
+ src -= stride[n] * extent[n];
+ n++;
+ if (n == dim)
+ {
+ src = NULL;
+ break;
+ }
+ else
+ {
+ count[n]++;
+ src += stride[n];
+ }
+ }
+ }
+ }
+
+ collsub_sync (ci);
+
+ /* Reduce the array to image zero. Here the general scheme:
+
+ abababababab
+ a_b_a_b_a_b_
+ a___b___a___
+ a_______b___
+ r___________
+ */
+ for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++)
+ {
+ imoffset = 1 << cbit;
+ if (this_image.image_num + imoffset < local->num_images)
+ {
+ GFC_INTEGER_8 * other_shared_ptr; /* Points to the shared memory
+ allocated to another image. */
+ GFC_INTEGER_8 *a;
+ GFC_INTEGER_8 *b;
+
+ other_shared_ptr = this_shared_ptr + num_elems * imoffset;
+ for (index_type i = 0; i < num_elems; i++)
+ {
+ a = this_shared_ptr + i;
+ b = other_shared_ptr + i;
+ if (*b > *a)
+ *a = *b;
+ }
+ }
+ collsub_sync (ci);
+ }
+ for ( ; (local->num_images >> cbit) != 0; cbit++)
+ collsub_sync (ci);
+
+ if (!result_image || (*result_image - 1) == this_image.image_num)
+ {
+ if (packed)
+ memcpy (array->base_addr, buffer, ssize);
+ else
+ {
+ GFC_INTEGER_8 *src = buffer;
+ char * restrict dest = (char *) array->base_addr;
+ index_type stride0 = stride[0];
+
+ for (index_type n = 0; n < dim; n++)
+ count[n] = 0;
+
+ while (dest)
+ {
+ *((GFC_INTEGER_8 * ) dest) = *src++;
+ dest += stride0;
+ count[0] ++;
+ for (index_type n = 0; count[n] == extent[n] ;)
+ {
+ /* When we get to the end of a dimension, reset it and increment
+ the next dimension. */
+ count[n] = 0;
+ dest -= stride[n] * extent[n];
+ n++;
+ if (n == dim)
+ {
+ dest = NULL;
+ break;
+ }
+ else
+ {
+ count[n]++;
+ dest += stride[n];
+ }
+ }
+ }
+ }
+ }
+ finish_collective_subroutine (ci);
+}
+void nca_collsub_min_array_i8 (gfc_array_i8 * restrict array, int *result_image,
+ int *stat, char *errmsg, index_type errmsg_len);
+export_proto (nca_collsub_min_array_i8);
+
+void
+nca_collsub_min_array_i8 (gfc_array_i8 * restrict array, int *result_image,
+ int *stat __attribute__ ((unused)),
+ char *errmsg __attribute__ ((unused)),
+ index_type errmsg_len __attribute__ ((unused)))
+{
+ index_type count[GFC_MAX_DIMENSIONS];
+ index_type stride[GFC_MAX_DIMENSIONS];
+ index_type extent[GFC_MAX_DIMENSIONS];
+ GFC_INTEGER_8 *this_shared_ptr; /* Points to the shared memory allocated to this image. */
+ GFC_INTEGER_8 *buffer;
+ index_type dim;
+ bool packed;
+ index_type span;
+ index_type ssize, num_elems;
+ int cbit = 0;
+ int imoffset;
+ collsub_iface *ci;
+
+ ci = &local->ci;
+
+ dim = GFC_DESCRIPTOR_RANK (array);
+ ssize = sizeof (GFC_INTEGER_8);
+ packed = true;
+ span = array->span != 0 ? array->span : (index_type) sizeof (GFC_INTEGER_8);
+ for (index_type n = 0; n < dim; n++)
+ {
+ count[n] = 0;
+ stride[n] = GFC_DESCRIPTOR_STRIDE (array, n) * span;
+ extent[n] = GFC_DESCRIPTOR_EXTENT (array, n);
+
+ /* No-op for an empty array. */
+ if (extent[n] <= 0)
+ return;
+
+ if (ssize != stride[n])
+ packed = false;
+
+ ssize *= extent[n];
+ }
+
+ num_elems = ssize / sizeof (GFC_INTEGER_8);
+
+ buffer = get_collsub_buf (ci, ssize * local->num_images);
+ this_shared_ptr = buffer + this_image.image_num * num_elems;
+
+ if (packed)
+ memcpy (this_shared_ptr, array->base_addr, ssize);
+ else
+ {
+ char *src = (char *) array->base_addr;
+ GFC_INTEGER_8 *restrict dest = this_shared_ptr;
+ index_type stride0 = stride[0];
+
+ while (src)
+ {
+ /* Copy the data. */
+ *(dest++) = *((GFC_INTEGER_8 *) src);
+ src += stride0;
+ count[0] ++;
+ /* Advance to the next source element. */
+ for (index_type n = 0; count[n] == extent[n] ; )
+ {
+ /* When we get to the end of a dimension, reset it and increment
+ the next dimension. */
+ count[n] = 0;
+ src -= stride[n] * extent[n];
+ n++;
+ if (n == dim)
+ {
+ src = NULL;
+ break;
+ }
+ else
+ {
+ count[n]++;
+ src += stride[n];
+ }
+ }
+ }
+ }
+
+ collsub_sync (ci);
+
+ /* Reduce the array to image zero. Here the general scheme:
+
+ abababababab
+ a_b_a_b_a_b_
+ a___b___a___
+ a_______b___
+ r___________
+ */
+ for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++)
+ {
+ imoffset = 1 << cbit;
+ if (this_image.image_num + imoffset < local->num_images)
+ {
+ GFC_INTEGER_8 * other_shared_ptr; /* Points to the shared memory
+ allocated to another image. */
+ GFC_INTEGER_8 *a;
+ GFC_INTEGER_8 *b;
+
+ other_shared_ptr = this_shared_ptr + num_elems * imoffset;
+ for (index_type i = 0; i < num_elems; i++)
+ {
+ a = this_shared_ptr + i;
+ b = other_shared_ptr + i;
+ if (*b < *a)
+ *a = *b;
+ }
+ }
+ collsub_sync (ci);
+ }
+ for ( ; (local->num_images >> cbit) != 0; cbit++)
+ collsub_sync (ci);
+
+ if (!result_image || (*result_image - 1) == this_image.image_num)
+ {
+ if (packed)
+ memcpy (array->base_addr, buffer, ssize);
+ else
+ {
+ GFC_INTEGER_8 *src = buffer;
+ char * restrict dest = (char *) array->base_addr;
+ index_type stride0 = stride[0];
+
+ for (index_type n = 0; n < dim; n++)
+ count[n] = 0;
+
+ while (dest)
+ {
+ *((GFC_INTEGER_8 * ) dest) = *src++;
+ dest += stride0;
+ count[0] ++;
+ for (index_type n = 0; count[n] == extent[n] ;)
+ {
+ /* When we get to the end of a dimension, reset it and increment
+ the next dimension. */
+ count[n] = 0;
+ dest -= stride[n] * extent[n];
+ n++;
+ if (n == dim)
+ {
+ dest = NULL;
+ break;
+ }
+ else
+ {
+ count[n]++;
+ dest += stride[n];
+ }
+ }
+ }
+ }
+ }
+ finish_collective_subroutine (ci);
+}
+void nca_collsub_sum_array_i8 (gfc_array_i8 * restrict array, int *result_image,
+ int *stat, char *errmsg, index_type errmsg_len);
+export_proto (nca_collsub_sum_array_i8);
+
+void
+nca_collsub_sum_array_i8 (gfc_array_i8 * restrict array, int *result_image,
+ int *stat __attribute__ ((unused)),
+ char *errmsg __attribute__ ((unused)),
+ index_type errmsg_len __attribute__ ((unused)))
+{
+ index_type count[GFC_MAX_DIMENSIONS];
+ index_type stride[GFC_MAX_DIMENSIONS];
+ index_type extent[GFC_MAX_DIMENSIONS];
+ GFC_INTEGER_8 *this_shared_ptr; /* Points to the shared memory allocated to this image. */
+ GFC_INTEGER_8 *buffer;
+ index_type dim;
+ bool packed;
+ index_type span;
+ index_type ssize, num_elems;
+ int cbit = 0;
+ int imoffset;
+ collsub_iface *ci;
+
+ ci = &local->ci;
+
+ dim = GFC_DESCRIPTOR_RANK (array);
+ ssize = sizeof (GFC_INTEGER_8);
+ packed = true;
+ span = array->span != 0 ? array->span : (index_type) sizeof (GFC_INTEGER_8);
+ for (index_type n = 0; n < dim; n++)
+ {
+ count[n] = 0;
+ stride[n] = GFC_DESCRIPTOR_STRIDE (array, n) * span;
+ extent[n] = GFC_DESCRIPTOR_EXTENT (array, n);
+
+ /* No-op for an empty array. */
+ if (extent[n] <= 0)
+ return;
+
+ if (ssize != stride[n])
+ packed = false;
+
+ ssize *= extent[n];
+ }
+
+ num_elems = ssize / sizeof (GFC_INTEGER_8);
+
+ buffer = get_collsub_buf (ci, ssize * local->num_images);
+ this_shared_ptr = buffer + this_image.image_num * num_elems;
+
+ if (packed)
+ memcpy (this_shared_ptr, array->base_addr, ssize);
+ else
+ {
+ char *src = (char *) array->base_addr;
+ GFC_INTEGER_8 *restrict dest = this_shared_ptr;
+ index_type stride0 = stride[0];
+
+ while (src)
+ {
+ /* Copy the data. */
+ *(dest++) = *((GFC_INTEGER_8 *) src);
+ src += stride0;
+ count[0] ++;
+ /* Advance to the next source element. */
+ for (index_type n = 0; count[n] == extent[n] ; )
+ {
+ /* When we get to the end of a dimension, reset it and increment
+ the next dimension. */
+ count[n] = 0;
+ src -= stride[n] * extent[n];
+ n++;
+ if (n == dim)
+ {
+ src = NULL;
+ break;
+ }
+ else
+ {
+ count[n]++;
+ src += stride[n];
+ }
+ }
+ }
+ }
+
+ collsub_sync (ci);
+
+ /* Reduce the array to image zero. Here the general scheme:
+
+ abababababab
+ a_b_a_b_a_b_
+ a___b___a___
+ a_______b___
+ r___________
+ */
+ for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++)
+ {
+ imoffset = 1 << cbit;
+ if (this_image.image_num + imoffset < local->num_images)
+ {
+ GFC_INTEGER_8 * other_shared_ptr; /* Points to the shared memory
+ allocated to another image. */
+ GFC_INTEGER_8 *a;
+ GFC_INTEGER_8 *b;
+
+ other_shared_ptr = this_shared_ptr + num_elems * imoffset;
+ for (index_type i = 0; i < num_elems; i++)
+ {
+ a = this_shared_ptr + i;
+ b = other_shared_ptr + i;
+ *a += *b;
+ }
+ }
+ collsub_sync (ci);
+ }
+ for ( ; (local->num_images >> cbit) != 0; cbit++)
+ collsub_sync (ci);
+
+ if (!result_image || (*result_image - 1) == this_image.image_num)
+ {
+ if (packed)
+ memcpy (array->base_addr, buffer, ssize);
+ else
+ {
+ GFC_INTEGER_8 *src = buffer;
+ char * restrict dest = (char *) array->base_addr;
+ index_type stride0 = stride[0];
+
+ for (index_type n = 0; n < dim; n++)
+ count[n] = 0;
+
+ while (dest)
+ {
+ *((GFC_INTEGER_8 * ) dest) = *src++;
+ dest += stride0;
+ count[0] ++;
+ for (index_type n = 0; count[n] == extent[n] ;)
+ {
+ /* When we get to the end of a dimension, reset it and increment
+ the next dimension. */
+ count[n] = 0;
+ dest -= stride[n] * extent[n];
+ n++;
+ if (n == dim)
+ {
+ dest = NULL;
+ break;
+ }
+ else
+ {
+ count[n]++;
+ dest += stride[n];
+ }
+ }
+ }
+ }
+ }
+ finish_collective_subroutine (ci);
+}
+
+#endif
+
diff --git a/libgfortran/generated/nca_minmax_r10.c b/libgfortran/generated/nca_minmax_r10.c
new file mode 100644
index 0000000..10f7324
--- /dev/null
+++ b/libgfortran/generated/nca_minmax_r10.c
@@ -0,0 +1,653 @@
+/* Implementation of collective subroutines minmax.
+ Copyright (C) 2020 Free Software Foundation, Inc.
+ Contributed by Thomas Koenig <tkoenig@gcc.gnu.org>.
+
+This file is part of the GNU Fortran runtime library (libgfortran).
+
+Libgfortran is free software; you can redistribute it and/or
+modify it under the terms of the GNU General Public
+License as published by the Free Software Foundation; either
+version 3 of the License, or (at your option) any later version.
+
+Libgfortran is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+<http://www.gnu.org/licenses/>. */
+
+
+#include "libgfortran.h"
+
+#if defined (HAVE_GFC_REAL_10)
+#include <string.h>
+#include "../nca/libcoarraynative.h"
+#include "../nca/collective_subroutine.h"
+#include "../nca/collective_inline.h"
+
+void nca_collsub_max_scalar_r10 (GFC_REAL_10 *obj, int *result_image,
+ int *stat, char *errmsg, index_type errmsg_len);
+export_proto(nca_collsub_max_scalar_r10);
+
+void
+nca_collsub_max_scalar_r10 (GFC_REAL_10 *obj, int *result_image,
+ int *stat __attribute__ ((unused)),
+ char *errmsg __attribute__ ((unused)),
+ index_type errmsg_len __attribute__ ((unused)))
+{
+ int cbit = 0;
+ int imoffset;
+ GFC_REAL_10 *a, *b;
+ GFC_REAL_10 *buffer, *this_image_buf;
+ collsub_iface *ci;
+
+ ci = &local->ci;
+
+ buffer = get_collsub_buf (ci, sizeof(GFC_REAL_10) * local->num_images);
+ this_image_buf = buffer + this_image.image_num;
+ *this_image_buf = *obj;
+
+ collsub_sync (ci);
+ for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++)
+ {
+ imoffset = 1 << cbit;
+ if (this_image.image_num + imoffset < local->num_images)
+ {
+ a = this_image_buf;
+ b = this_image_buf + imoffset;
+ if (*b > *a)
+ *a = *b;
+ }
+ collsub_sync (ci);
+ }
+ for ( ; (local->num_images >> cbit) != 0; cbit++)
+ collsub_sync (ci);
+
+ if (!result_image || (*result_image - 1) == this_image.image_num)
+ *obj = *buffer;
+
+ finish_collective_subroutine (ci);
+
+}
+
+void nca_collsub_min_scalar_r10 (GFC_REAL_10 *obj, int *result_image,
+ int *stat, char *errmsg, index_type errmsg_len);
+export_proto(nca_collsub_min_scalar_r10);
+
+void
+nca_collsub_min_scalar_r10 (GFC_REAL_10 *obj, int *result_image,
+ int *stat __attribute__ ((unused)),
+ char *errmsg __attribute__ ((unused)),
+ index_type errmsg_len __attribute__ ((unused)))
+{
+ int cbit = 0;
+ int imoffset;
+ GFC_REAL_10 *a, *b;
+ GFC_REAL_10 *buffer, *this_image_buf;
+ collsub_iface *ci;
+
+ ci = &local->ci;
+
+ buffer = get_collsub_buf (ci, sizeof(GFC_REAL_10) * local->num_images);
+ this_image_buf = buffer + this_image.image_num;
+ *this_image_buf = *obj;
+
+ collsub_sync (ci);
+ for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++)
+ {
+ imoffset = 1 << cbit;
+ if (this_image.image_num + imoffset < local->num_images)
+ {
+ a = this_image_buf;
+ b = this_image_buf + imoffset;
+ if (*b < *a)
+ *a = *b;
+ }
+ collsub_sync (ci);
+ }
+ for ( ; (local->num_images >> cbit) != 0; cbit++)
+ collsub_sync (ci);
+
+ if (!result_image || (*result_image - 1) == this_image.image_num)
+ *obj = *buffer;
+
+ finish_collective_subroutine (ci);
+
+}
+
+void nca_collsub_sum_scalar_r10 (GFC_REAL_10 *obj, int *result_image,
+ int *stat, char *errmsg, index_type errmsg_len);
+export_proto(nca_collsub_sum_scalar_r10);
+
+void
+nca_collsub_sum_scalar_r10 (GFC_REAL_10 *obj, int *result_image,
+ int *stat __attribute__ ((unused)),
+ char *errmsg __attribute__ ((unused)),
+ index_type errmsg_len __attribute__ ((unused)))
+{
+ int cbit = 0;
+ int imoffset;
+ GFC_REAL_10 *a, *b;
+ GFC_REAL_10 *buffer, *this_image_buf;
+ collsub_iface *ci;
+
+ ci = &local->ci;
+
+ buffer = get_collsub_buf (ci, sizeof(GFC_REAL_10) * local->num_images);
+ this_image_buf = buffer + this_image.image_num;
+ *this_image_buf = *obj;
+
+ collsub_sync (ci);
+ for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++)
+ {
+ imoffset = 1 << cbit;
+ if (this_image.image_num + imoffset < local->num_images)
+ {
+ a = this_image_buf;
+ b = this_image_buf + imoffset;
+ *a += *b;
+ }
+ collsub_sync (ci);
+ }
+ for ( ; (local->num_images >> cbit) != 0; cbit++)
+ collsub_sync (ci);
+
+ if (!result_image || (*result_image - 1) == this_image.image_num)
+ *obj = *buffer;
+
+ finish_collective_subroutine (ci);
+
+}
+
+void nca_collsub_max_array_r10 (gfc_array_r10 * restrict array, int *result_image,
+ int *stat, char *errmsg, index_type errmsg_len);
+export_proto (nca_collsub_max_array_r10);
+
+void
+nca_collsub_max_array_r10 (gfc_array_r10 * restrict array, int *result_image,
+ int *stat __attribute__ ((unused)),
+ char *errmsg __attribute__ ((unused)),
+ index_type errmsg_len __attribute__ ((unused)))
+{
+ index_type count[GFC_MAX_DIMENSIONS];
+ index_type stride[GFC_MAX_DIMENSIONS];
+ index_type extent[GFC_MAX_DIMENSIONS];
+ GFC_REAL_10 *this_shared_ptr; /* Points to the shared memory allocated to this image. */
+ GFC_REAL_10 *buffer;
+ index_type dim;
+ bool packed;
+ index_type span;
+ index_type ssize, num_elems;
+ int cbit = 0;
+ int imoffset;
+ collsub_iface *ci;
+
+ ci = &local->ci;
+
+ dim = GFC_DESCRIPTOR_RANK (array);
+ ssize = sizeof (GFC_REAL_10);
+ packed = true;
+ span = array->span != 0 ? array->span : (index_type) sizeof (GFC_REAL_10);
+ for (index_type n = 0; n < dim; n++)
+ {
+ count[n] = 0;
+ stride[n] = GFC_DESCRIPTOR_STRIDE (array, n) * span;
+ extent[n] = GFC_DESCRIPTOR_EXTENT (array, n);
+
+ /* No-op for an empty array. */
+ if (extent[n] <= 0)
+ return;
+
+ if (ssize != stride[n])
+ packed = false;
+
+ ssize *= extent[n];
+ }
+
+ num_elems = ssize / sizeof (GFC_REAL_10);
+
+ buffer = get_collsub_buf (ci, ssize * local->num_images);
+ this_shared_ptr = buffer + this_image.image_num * num_elems;
+
+ if (packed)
+ memcpy (this_shared_ptr, array->base_addr, ssize);
+ else
+ {
+ char *src = (char *) array->base_addr;
+ GFC_REAL_10 *restrict dest = this_shared_ptr;
+ index_type stride0 = stride[0];
+
+ while (src)
+ {
+ /* Copy the data. */
+ *(dest++) = *((GFC_REAL_10 *) src);
+ src += stride0;
+ count[0] ++;
+ /* Advance to the next source element. */
+ for (index_type n = 0; count[n] == extent[n] ; )
+ {
+ /* When we get to the end of a dimension, reset it and increment
+ the next dimension. */
+ count[n] = 0;
+ src -= stride[n] * extent[n];
+ n++;
+ if (n == dim)
+ {
+ src = NULL;
+ break;
+ }
+ else
+ {
+ count[n]++;
+ src += stride[n];
+ }
+ }
+ }
+ }
+
+ collsub_sync (ci);
+
+ /* Reduce the array to image zero. Here the general scheme:
+
+ abababababab
+ a_b_a_b_a_b_
+ a___b___a___
+ a_______b___
+ r___________
+ */
+ for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++)
+ {
+ imoffset = 1 << cbit;
+ if (this_image.image_num + imoffset < local->num_images)
+ {
+ GFC_REAL_10 * other_shared_ptr; /* Points to the shared memory
+ allocated to another image. */
+ GFC_REAL_10 *a;
+ GFC_REAL_10 *b;
+
+ other_shared_ptr = this_shared_ptr + num_elems * imoffset;
+ for (index_type i = 0; i < num_elems; i++)
+ {
+ a = this_shared_ptr + i;
+ b = other_shared_ptr + i;
+ if (*b > *a)
+ *a = *b;
+ }
+ }
+ collsub_sync (ci);
+ }
+ for ( ; (local->num_images >> cbit) != 0; cbit++)
+ collsub_sync (ci);
+
+ if (!result_image || (*result_image - 1) == this_image.image_num)
+ {
+ if (packed)
+ memcpy (array->base_addr, buffer, ssize);
+ else
+ {
+ GFC_REAL_10 *src = buffer;
+ char * restrict dest = (char *) array->base_addr;
+ index_type stride0 = stride[0];
+
+ for (index_type n = 0; n < dim; n++)
+ count[n] = 0;
+
+ while (dest)
+ {
+ *((GFC_REAL_10 * ) dest) = *src++;
+ dest += stride0;
+ count[0] ++;
+ for (index_type n = 0; count[n] == extent[n] ;)
+ {
+ /* When we get to the end of a dimension, reset it and increment
+ the next dimension. */
+ count[n] = 0;
+ dest -= stride[n] * extent[n];
+ n++;
+ if (n == dim)
+ {
+ dest = NULL;
+ break;
+ }
+ else
+ {
+ count[n]++;
+ dest += stride[n];
+ }
+ }
+ }
+ }
+ }
+ finish_collective_subroutine (ci);
+}
+void nca_collsub_min_array_r10 (gfc_array_r10 * restrict array, int *result_image,
+ int *stat, char *errmsg, index_type errmsg_len);
+export_proto (nca_collsub_min_array_r10);
+
+void
+nca_collsub_min_array_r10 (gfc_array_r10 * restrict array, int *result_image,
+ int *stat __attribute__ ((unused)),
+ char *errmsg __attribute__ ((unused)),
+ index_type errmsg_len __attribute__ ((unused)))
+{
+ index_type count[GFC_MAX_DIMENSIONS];
+ index_type stride[GFC_MAX_DIMENSIONS];
+ index_type extent[GFC_MAX_DIMENSIONS];
+ GFC_REAL_10 *this_shared_ptr; /* Points to the shared memory allocated to this image. */
+ GFC_REAL_10 *buffer;
+ index_type dim;
+ bool packed;
+ index_type span;
+ index_type ssize, num_elems;
+ int cbit = 0;
+ int imoffset;
+ collsub_iface *ci;
+
+ ci = &local->ci;
+
+ dim = GFC_DESCRIPTOR_RANK (array);
+ ssize = sizeof (GFC_REAL_10);
+ packed = true;
+ span = array->span != 0 ? array->span : (index_type) sizeof (GFC_REAL_10);
+ for (index_type n = 0; n < dim; n++)
+ {
+ count[n] = 0;
+ stride[n] = GFC_DESCRIPTOR_STRIDE (array, n) * span;
+ extent[n] = GFC_DESCRIPTOR_EXTENT (array, n);
+
+ /* No-op for an empty array. */
+ if (extent[n] <= 0)
+ return;
+
+ if (ssize != stride[n])
+ packed = false;
+
+ ssize *= extent[n];
+ }
+
+ num_elems = ssize / sizeof (GFC_REAL_10);
+
+ buffer = get_collsub_buf (ci, ssize * local->num_images);
+ this_shared_ptr = buffer + this_image.image_num * num_elems;
+
+ if (packed)
+ memcpy (this_shared_ptr, array->base_addr, ssize);
+ else
+ {
+ char *src = (char *) array->base_addr;
+ GFC_REAL_10 *restrict dest = this_shared_ptr;
+ index_type stride0 = stride[0];
+
+ while (src)
+ {
+ /* Copy the data. */
+ *(dest++) = *((GFC_REAL_10 *) src);
+ src += stride0;
+ count[0] ++;
+ /* Advance to the next source element. */
+ for (index_type n = 0; count[n] == extent[n] ; )
+ {
+ /* When we get to the end of a dimension, reset it and increment
+ the next dimension. */
+ count[n] = 0;
+ src -= stride[n] * extent[n];
+ n++;
+ if (n == dim)
+ {
+ src = NULL;
+ break;
+ }
+ else
+ {
+ count[n]++;
+ src += stride[n];
+ }
+ }
+ }
+ }
+
+ collsub_sync (ci);
+
+ /* Reduce the array to image zero. Here the general scheme:
+
+ abababababab
+ a_b_a_b_a_b_
+ a___b___a___
+ a_______b___
+ r___________
+ */
+ for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++)
+ {
+ imoffset = 1 << cbit;
+ if (this_image.image_num + imoffset < local->num_images)
+ {
+ GFC_REAL_10 * other_shared_ptr; /* Points to the shared memory
+ allocated to another image. */
+ GFC_REAL_10 *a;
+ GFC_REAL_10 *b;
+
+ other_shared_ptr = this_shared_ptr + num_elems * imoffset;
+ for (index_type i = 0; i < num_elems; i++)
+ {
+ a = this_shared_ptr + i;
+ b = other_shared_ptr + i;
+ if (*b < *a)
+ *a = *b;
+ }
+ }
+ collsub_sync (ci);
+ }
+ for ( ; (local->num_images >> cbit) != 0; cbit++)
+ collsub_sync (ci);
+
+ if (!result_image || (*result_image - 1) == this_image.image_num)
+ {
+ if (packed)
+ memcpy (array->base_addr, buffer, ssize);
+ else
+ {
+ GFC_REAL_10 *src = buffer;
+ char * restrict dest = (char *) array->base_addr;
+ index_type stride0 = stride[0];
+
+ for (index_type n = 0; n < dim; n++)
+ count[n] = 0;
+
+ while (dest)
+ {
+ *((GFC_REAL_10 * ) dest) = *src++;
+ dest += stride0;
+ count[0] ++;
+ for (index_type n = 0; count[n] == extent[n] ;)
+ {
+ /* When we get to the end of a dimension, reset it and increment
+ the next dimension. */
+ count[n] = 0;
+ dest -= stride[n] * extent[n];
+ n++;
+ if (n == dim)
+ {
+ dest = NULL;
+ break;
+ }
+ else
+ {
+ count[n]++;
+ dest += stride[n];
+ }
+ }
+ }
+ }
+ }
+ finish_collective_subroutine (ci);
+}
+void nca_collsub_sum_array_r10 (gfc_array_r10 * restrict array, int *result_image,
+ int *stat, char *errmsg, index_type errmsg_len);
+export_proto (nca_collsub_sum_array_r10);
+
+void
+nca_collsub_sum_array_r10 (gfc_array_r10 * restrict array, int *result_image,
+ int *stat __attribute__ ((unused)),
+ char *errmsg __attribute__ ((unused)),
+ index_type errmsg_len __attribute__ ((unused)))
+{
+ index_type count[GFC_MAX_DIMENSIONS];
+ index_type stride[GFC_MAX_DIMENSIONS];
+ index_type extent[GFC_MAX_DIMENSIONS];
+ GFC_REAL_10 *this_shared_ptr; /* Points to the shared memory allocated to this image. */
+ GFC_REAL_10 *buffer;
+ index_type dim;
+ bool packed;
+ index_type span;
+ index_type ssize, num_elems;
+ int cbit = 0;
+ int imoffset;
+ collsub_iface *ci;
+
+ ci = &local->ci;
+
+ dim = GFC_DESCRIPTOR_RANK (array);
+ ssize = sizeof (GFC_REAL_10);
+ packed = true;
+ span = array->span != 0 ? array->span : (index_type) sizeof (GFC_REAL_10);
+ for (index_type n = 0; n < dim; n++)
+ {
+ count[n] = 0;
+ stride[n] = GFC_DESCRIPTOR_STRIDE (array, n) * span;
+ extent[n] = GFC_DESCRIPTOR_EXTENT (array, n);
+
+ /* No-op for an empty array. */
+ if (extent[n] <= 0)
+ return;
+
+ if (ssize != stride[n])
+ packed = false;
+
+ ssize *= extent[n];
+ }
+
+ num_elems = ssize / sizeof (GFC_REAL_10);
+
+ buffer = get_collsub_buf (ci, ssize * local->num_images);
+ this_shared_ptr = buffer + this_image.image_num * num_elems;
+
+ if (packed)
+ memcpy (this_shared_ptr, array->base_addr, ssize);
+ else
+ {
+ char *src = (char *) array->base_addr;
+ GFC_REAL_10 *restrict dest = this_shared_ptr;
+ index_type stride0 = stride[0];
+
+ while (src)
+ {
+ /* Copy the data. */
+ *(dest++) = *((GFC_REAL_10 *) src);
+ src += stride0;
+ count[0] ++;
+ /* Advance to the next source element. */
+ for (index_type n = 0; count[n] == extent[n] ; )
+ {
+ /* When we get to the end of a dimension, reset it and increment
+ the next dimension. */
+ count[n] = 0;
+ src -= stride[n] * extent[n];
+ n++;
+ if (n == dim)
+ {
+ src = NULL;
+ break;
+ }
+ else
+ {
+ count[n]++;
+ src += stride[n];
+ }
+ }
+ }
+ }
+
+ collsub_sync (ci);
+
+ /* Reduce the array to image zero. Here the general scheme:
+
+ abababababab
+ a_b_a_b_a_b_
+ a___b___a___
+ a_______b___
+ r___________
+ */
+ for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++)
+ {
+ imoffset = 1 << cbit;
+ if (this_image.image_num + imoffset < local->num_images)
+ {
+ GFC_REAL_10 * other_shared_ptr; /* Points to the shared memory
+ allocated to another image. */
+ GFC_REAL_10 *a;
+ GFC_REAL_10 *b;
+
+ other_shared_ptr = this_shared_ptr + num_elems * imoffset;
+ for (index_type i = 0; i < num_elems; i++)
+ {
+ a = this_shared_ptr + i;
+ b = other_shared_ptr + i;
+ *a += *b;
+ }
+ }
+ collsub_sync (ci);
+ }
+ for ( ; (local->num_images >> cbit) != 0; cbit++)
+ collsub_sync (ci);
+
+ if (!result_image || (*result_image - 1) == this_image.image_num)
+ {
+ if (packed)
+ memcpy (array->base_addr, buffer, ssize);
+ else
+ {
+ GFC_REAL_10 *src = buffer;
+ char * restrict dest = (char *) array->base_addr;
+ index_type stride0 = stride[0];
+
+ for (index_type n = 0; n < dim; n++)
+ count[n] = 0;
+
+ while (dest)
+ {
+ *((GFC_REAL_10 * ) dest) = *src++;
+ dest += stride0;
+ count[0] ++;
+ for (index_type n = 0; count[n] == extent[n] ;)
+ {
+ /* When we get to the end of a dimension, reset it and increment
+ the next dimension. */
+ count[n] = 0;
+ dest -= stride[n] * extent[n];
+ n++;
+ if (n == dim)
+ {
+ dest = NULL;
+ break;
+ }
+ else
+ {
+ count[n]++;
+ dest += stride[n];
+ }
+ }
+ }
+ }
+ }
+ finish_collective_subroutine (ci);
+}
+
+#endif
+
diff --git a/libgfortran/generated/nca_minmax_r16.c b/libgfortran/generated/nca_minmax_r16.c
new file mode 100644
index 0000000..a0a0a51
--- /dev/null
+++ b/libgfortran/generated/nca_minmax_r16.c
@@ -0,0 +1,653 @@
+/* Implementation of collective subroutines minmax.
+ Copyright (C) 2020 Free Software Foundation, Inc.
+ Contributed by Thomas Koenig <tkoenig@gcc.gnu.org>.
+
+This file is part of the GNU Fortran runtime library (libgfortran).
+
+Libgfortran is free software; you can redistribute it and/or
+modify it under the terms of the GNU General Public
+License as published by the Free Software Foundation; either
+version 3 of the License, or (at your option) any later version.
+
+Libgfortran is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+<http://www.gnu.org/licenses/>. */
+
+
+#include "libgfortran.h"
+
+#if defined (HAVE_GFC_REAL_16)
+#include <string.h>
+#include "../nca/libcoarraynative.h"
+#include "../nca/collective_subroutine.h"
+#include "../nca/collective_inline.h"
+
+void nca_collsub_max_scalar_r16 (GFC_REAL_16 *obj, int *result_image,
+ int *stat, char *errmsg, index_type errmsg_len);
+export_proto(nca_collsub_max_scalar_r16);
+
+void
+nca_collsub_max_scalar_r16 (GFC_REAL_16 *obj, int *result_image,
+ int *stat __attribute__ ((unused)),
+ char *errmsg __attribute__ ((unused)),
+ index_type errmsg_len __attribute__ ((unused)))
+{
+ int cbit = 0;
+ int imoffset;
+ GFC_REAL_16 *a, *b;
+ GFC_REAL_16 *buffer, *this_image_buf;
+ collsub_iface *ci;
+
+ ci = &local->ci;
+
+ buffer = get_collsub_buf (ci, sizeof(GFC_REAL_16) * local->num_images);
+ this_image_buf = buffer + this_image.image_num;
+ *this_image_buf = *obj;
+
+ collsub_sync (ci);
+ for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++)
+ {
+ imoffset = 1 << cbit;
+ if (this_image.image_num + imoffset < local->num_images)
+ {
+ a = this_image_buf;
+ b = this_image_buf + imoffset;
+ if (*b > *a)
+ *a = *b;
+ }
+ collsub_sync (ci);
+ }
+ for ( ; (local->num_images >> cbit) != 0; cbit++)
+ collsub_sync (ci);
+
+ if (!result_image || (*result_image - 1) == this_image.image_num)
+ *obj = *buffer;
+
+ finish_collective_subroutine (ci);
+
+}
+
+void nca_collsub_min_scalar_r16 (GFC_REAL_16 *obj, int *result_image,
+ int *stat, char *errmsg, index_type errmsg_len);
+export_proto(nca_collsub_min_scalar_r16);
+
+void
+nca_collsub_min_scalar_r16 (GFC_REAL_16 *obj, int *result_image,
+ int *stat __attribute__ ((unused)),
+ char *errmsg __attribute__ ((unused)),
+ index_type errmsg_len __attribute__ ((unused)))
+{
+ int cbit = 0;
+ int imoffset;
+ GFC_REAL_16 *a, *b;
+ GFC_REAL_16 *buffer, *this_image_buf;
+ collsub_iface *ci;
+
+ ci = &local->ci;
+
+ buffer = get_collsub_buf (ci, sizeof(GFC_REAL_16) * local->num_images);
+ this_image_buf = buffer + this_image.image_num;
+ *this_image_buf = *obj;
+
+ collsub_sync (ci);
+ for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++)
+ {
+ imoffset = 1 << cbit;
+ if (this_image.image_num + imoffset < local->num_images)
+ {
+ a = this_image_buf;
+ b = this_image_buf + imoffset;
+ if (*b < *a)
+ *a = *b;
+ }
+ collsub_sync (ci);
+ }
+ for ( ; (local->num_images >> cbit) != 0; cbit++)
+ collsub_sync (ci);
+
+ if (!result_image || (*result_image - 1) == this_image.image_num)
+ *obj = *buffer;
+
+ finish_collective_subroutine (ci);
+
+}
+
+void nca_collsub_sum_scalar_r16 (GFC_REAL_16 *obj, int *result_image,
+ int *stat, char *errmsg, index_type errmsg_len);
+export_proto(nca_collsub_sum_scalar_r16);
+
+void
+nca_collsub_sum_scalar_r16 (GFC_REAL_16 *obj, int *result_image,
+ int *stat __attribute__ ((unused)),
+ char *errmsg __attribute__ ((unused)),
+ index_type errmsg_len __attribute__ ((unused)))
+{
+ int cbit = 0;
+ int imoffset;
+ GFC_REAL_16 *a, *b;
+ GFC_REAL_16 *buffer, *this_image_buf;
+ collsub_iface *ci;
+
+ ci = &local->ci;
+
+ buffer = get_collsub_buf (ci, sizeof(GFC_REAL_16) * local->num_images);
+ this_image_buf = buffer + this_image.image_num;
+ *this_image_buf = *obj;
+
+ collsub_sync (ci);
+ for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++)
+ {
+ imoffset = 1 << cbit;
+ if (this_image.image_num + imoffset < local->num_images)
+ {
+ a = this_image_buf;
+ b = this_image_buf + imoffset;
+ *a += *b;
+ }
+ collsub_sync (ci);
+ }
+ for ( ; (local->num_images >> cbit) != 0; cbit++)
+ collsub_sync (ci);
+
+ if (!result_image || (*result_image - 1) == this_image.image_num)
+ *obj = *buffer;
+
+ finish_collective_subroutine (ci);
+
+}
+
+void nca_collsub_max_array_r16 (gfc_array_r16 * restrict array, int *result_image,
+ int *stat, char *errmsg, index_type errmsg_len);
+export_proto (nca_collsub_max_array_r16);
+
+void
+nca_collsub_max_array_r16 (gfc_array_r16 * restrict array, int *result_image,
+ int *stat __attribute__ ((unused)),
+ char *errmsg __attribute__ ((unused)),
+ index_type errmsg_len __attribute__ ((unused)))
+{
+ index_type count[GFC_MAX_DIMENSIONS];
+ index_type stride[GFC_MAX_DIMENSIONS];
+ index_type extent[GFC_MAX_DIMENSIONS];
+ GFC_REAL_16 *this_shared_ptr; /* Points to the shared memory allocated to this image. */
+ GFC_REAL_16 *buffer;
+ index_type dim;
+ bool packed;
+ index_type span;
+ index_type ssize, num_elems;
+ int cbit = 0;
+ int imoffset;
+ collsub_iface *ci;
+
+ ci = &local->ci;
+
+ dim = GFC_DESCRIPTOR_RANK (array);
+ ssize = sizeof (GFC_REAL_16);
+ packed = true;
+ span = array->span != 0 ? array->span : (index_type) sizeof (GFC_REAL_16);
+ for (index_type n = 0; n < dim; n++)
+ {
+ count[n] = 0;
+ stride[n] = GFC_DESCRIPTOR_STRIDE (array, n) * span;
+ extent[n] = GFC_DESCRIPTOR_EXTENT (array, n);
+
+ /* No-op for an empty array. */
+ if (extent[n] <= 0)
+ return;
+
+ if (ssize != stride[n])
+ packed = false;
+
+ ssize *= extent[n];
+ }
+
+ num_elems = ssize / sizeof (GFC_REAL_16);
+
+ buffer = get_collsub_buf (ci, ssize * local->num_images);
+ this_shared_ptr = buffer + this_image.image_num * num_elems;
+
+ if (packed)
+ memcpy (this_shared_ptr, array->base_addr, ssize);
+ else
+ {
+ char *src = (char *) array->base_addr;
+ GFC_REAL_16 *restrict dest = this_shared_ptr;
+ index_type stride0 = stride[0];
+
+ while (src)
+ {
+ /* Copy the data. */
+ *(dest++) = *((GFC_REAL_16 *) src);
+ src += stride0;
+ count[0] ++;
+ /* Advance to the next source element. */
+ for (index_type n = 0; count[n] == extent[n] ; )
+ {
+ /* When we get to the end of a dimension, reset it and increment
+ the next dimension. */
+ count[n] = 0;
+ src -= stride[n] * extent[n];
+ n++;
+ if (n == dim)
+ {
+ src = NULL;
+ break;
+ }
+ else
+ {
+ count[n]++;
+ src += stride[n];
+ }
+ }
+ }
+ }
+
+ collsub_sync (ci);
+
+ /* Reduce the array to image zero. Here the general scheme:
+
+ abababababab
+ a_b_a_b_a_b_
+ a___b___a___
+ a_______b___
+ r___________
+ */
+ for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++)
+ {
+ imoffset = 1 << cbit;
+ if (this_image.image_num + imoffset < local->num_images)
+ {
+ GFC_REAL_16 * other_shared_ptr; /* Points to the shared memory
+ allocated to another image. */
+ GFC_REAL_16 *a;
+ GFC_REAL_16 *b;
+
+ other_shared_ptr = this_shared_ptr + num_elems * imoffset;
+ for (index_type i = 0; i < num_elems; i++)
+ {
+ a = this_shared_ptr + i;
+ b = other_shared_ptr + i;
+ if (*b > *a)
+ *a = *b;
+ }
+ }
+ collsub_sync (ci);
+ }
+ for ( ; (local->num_images >> cbit) != 0; cbit++)
+ collsub_sync (ci);
+
+ if (!result_image || (*result_image - 1) == this_image.image_num)
+ {
+ if (packed)
+ memcpy (array->base_addr, buffer, ssize);
+ else
+ {
+ GFC_REAL_16 *src = buffer;
+ char * restrict dest = (char *) array->base_addr;
+ index_type stride0 = stride[0];
+
+ for (index_type n = 0; n < dim; n++)
+ count[n] = 0;
+
+ while (dest)
+ {
+ *((GFC_REAL_16 * ) dest) = *src++;
+ dest += stride0;
+ count[0] ++;
+ for (index_type n = 0; count[n] == extent[n] ;)
+ {
+ /* When we get to the end of a dimension, reset it and increment
+ the next dimension. */
+ count[n] = 0;
+ dest -= stride[n] * extent[n];
+ n++;
+ if (n == dim)
+ {
+ dest = NULL;
+ break;
+ }
+ else
+ {
+ count[n]++;
+ dest += stride[n];
+ }
+ }
+ }
+ }
+ }
+ finish_collective_subroutine (ci);
+}
+void nca_collsub_min_array_r16 (gfc_array_r16 * restrict array, int *result_image,
+ int *stat, char *errmsg, index_type errmsg_len);
+export_proto (nca_collsub_min_array_r16);
+
+void
+nca_collsub_min_array_r16 (gfc_array_r16 * restrict array, int *result_image,
+ int *stat __attribute__ ((unused)),
+ char *errmsg __attribute__ ((unused)),
+ index_type errmsg_len __attribute__ ((unused)))
+{
+ index_type count[GFC_MAX_DIMENSIONS];
+ index_type stride[GFC_MAX_DIMENSIONS];
+ index_type extent[GFC_MAX_DIMENSIONS];
+ GFC_REAL_16 *this_shared_ptr; /* Points to the shared memory allocated to this image. */
+ GFC_REAL_16 *buffer;
+ index_type dim;
+ bool packed;
+ index_type span;
+ index_type ssize, num_elems;
+ int cbit = 0;
+ int imoffset;
+ collsub_iface *ci;
+
+ ci = &local->ci;
+
+ dim = GFC_DESCRIPTOR_RANK (array);
+ ssize = sizeof (GFC_REAL_16);
+ packed = true;
+ span = array->span != 0 ? array->span : (index_type) sizeof (GFC_REAL_16);
+ for (index_type n = 0; n < dim; n++)
+ {
+ count[n] = 0;
+ stride[n] = GFC_DESCRIPTOR_STRIDE (array, n) * span;
+ extent[n] = GFC_DESCRIPTOR_EXTENT (array, n);
+
+ /* No-op for an empty array. */
+ if (extent[n] <= 0)
+ return;
+
+ if (ssize != stride[n])
+ packed = false;
+
+ ssize *= extent[n];
+ }
+
+ num_elems = ssize / sizeof (GFC_REAL_16);
+
+ buffer = get_collsub_buf (ci, ssize * local->num_images);
+ this_shared_ptr = buffer + this_image.image_num * num_elems;
+
+ if (packed)
+ memcpy (this_shared_ptr, array->base_addr, ssize);
+ else
+ {
+ char *src = (char *) array->base_addr;
+ GFC_REAL_16 *restrict dest = this_shared_ptr;
+ index_type stride0 = stride[0];
+
+ while (src)
+ {
+ /* Copy the data. */
+ *(dest++) = *((GFC_REAL_16 *) src);
+ src += stride0;
+ count[0] ++;
+ /* Advance to the next source element. */
+ for (index_type n = 0; count[n] == extent[n] ; )
+ {
+ /* When we get to the end of a dimension, reset it and increment
+ the next dimension. */
+ count[n] = 0;
+ src -= stride[n] * extent[n];
+ n++;
+ if (n == dim)
+ {
+ src = NULL;
+ break;
+ }
+ else
+ {
+ count[n]++;
+ src += stride[n];
+ }
+ }
+ }
+ }
+
+ collsub_sync (ci);
+
+ /* Reduce the array to image zero. Here the general scheme:
+
+ abababababab
+ a_b_a_b_a_b_
+ a___b___a___
+ a_______b___
+ r___________
+ */
+ for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++)
+ {
+ imoffset = 1 << cbit;
+ if (this_image.image_num + imoffset < local->num_images)
+ {
+ GFC_REAL_16 * other_shared_ptr; /* Points to the shared memory
+ allocated to another image. */
+ GFC_REAL_16 *a;
+ GFC_REAL_16 *b;
+
+ other_shared_ptr = this_shared_ptr + num_elems * imoffset;
+ for (index_type i = 0; i < num_elems; i++)
+ {
+ a = this_shared_ptr + i;
+ b = other_shared_ptr + i;
+ if (*b < *a)
+ *a = *b;
+ }
+ }
+ collsub_sync (ci);
+ }
+ for ( ; (local->num_images >> cbit) != 0; cbit++)
+ collsub_sync (ci);
+
+ if (!result_image || (*result_image - 1) == this_image.image_num)
+ {
+ if (packed)
+ memcpy (array->base_addr, buffer, ssize);
+ else
+ {
+ GFC_REAL_16 *src = buffer;
+ char * restrict dest = (char *) array->base_addr;
+ index_type stride0 = stride[0];
+
+ for (index_type n = 0; n < dim; n++)
+ count[n] = 0;
+
+ while (dest)
+ {
+ *((GFC_REAL_16 * ) dest) = *src++;
+ dest += stride0;
+ count[0] ++;
+ for (index_type n = 0; count[n] == extent[n] ;)
+ {
+ /* When we get to the end of a dimension, reset it and increment
+ the next dimension. */
+ count[n] = 0;
+ dest -= stride[n] * extent[n];
+ n++;
+ if (n == dim)
+ {
+ dest = NULL;
+ break;
+ }
+ else
+ {
+ count[n]++;
+ dest += stride[n];
+ }
+ }
+ }
+ }
+ }
+ finish_collective_subroutine (ci);
+}
+void nca_collsub_sum_array_r16 (gfc_array_r16 * restrict array, int *result_image,
+ int *stat, char *errmsg, index_type errmsg_len);
+export_proto (nca_collsub_sum_array_r16);
+
+void
+nca_collsub_sum_array_r16 (gfc_array_r16 * restrict array, int *result_image,
+ int *stat __attribute__ ((unused)),
+ char *errmsg __attribute__ ((unused)),
+ index_type errmsg_len __attribute__ ((unused)))
+{
+ index_type count[GFC_MAX_DIMENSIONS];
+ index_type stride[GFC_MAX_DIMENSIONS];
+ index_type extent[GFC_MAX_DIMENSIONS];
+ GFC_REAL_16 *this_shared_ptr; /* Points to the shared memory allocated to this image. */
+ GFC_REAL_16 *buffer;
+ index_type dim;
+ bool packed;
+ index_type span;
+ index_type ssize, num_elems;
+ int cbit = 0;
+ int imoffset;
+ collsub_iface *ci;
+
+ ci = &local->ci;
+
+ dim = GFC_DESCRIPTOR_RANK (array);
+ ssize = sizeof (GFC_REAL_16);
+ packed = true;
+ span = array->span != 0 ? array->span : (index_type) sizeof (GFC_REAL_16);
+ for (index_type n = 0; n < dim; n++)
+ {
+ count[n] = 0;
+ stride[n] = GFC_DESCRIPTOR_STRIDE (array, n) * span;
+ extent[n] = GFC_DESCRIPTOR_EXTENT (array, n);
+
+ /* No-op for an empty array. */
+ if (extent[n] <= 0)
+ return;
+
+ if (ssize != stride[n])
+ packed = false;
+
+ ssize *= extent[n];
+ }
+
+ num_elems = ssize / sizeof (GFC_REAL_16);
+
+ buffer = get_collsub_buf (ci, ssize * local->num_images);
+ this_shared_ptr = buffer + this_image.image_num * num_elems;
+
+ if (packed)
+ memcpy (this_shared_ptr, array->base_addr, ssize);
+ else
+ {
+ char *src = (char *) array->base_addr;
+ GFC_REAL_16 *restrict dest = this_shared_ptr;
+ index_type stride0 = stride[0];
+
+ while (src)
+ {
+ /* Copy the data. */
+ *(dest++) = *((GFC_REAL_16 *) src);
+ src += stride0;
+ count[0] ++;
+ /* Advance to the next source element. */
+ for (index_type n = 0; count[n] == extent[n] ; )
+ {
+ /* When we get to the end of a dimension, reset it and increment
+ the next dimension. */
+ count[n] = 0;
+ src -= stride[n] * extent[n];
+ n++;
+ if (n == dim)
+ {
+ src = NULL;
+ break;
+ }
+ else
+ {
+ count[n]++;
+ src += stride[n];
+ }
+ }
+ }
+ }
+
+ collsub_sync (ci);
+
+ /* Reduce the array to image zero. Here the general scheme:
+
+ abababababab
+ a_b_a_b_a_b_
+ a___b___a___
+ a_______b___
+ r___________
+ */
+ for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++)
+ {
+ imoffset = 1 << cbit;
+ if (this_image.image_num + imoffset < local->num_images)
+ {
+ GFC_REAL_16 * other_shared_ptr; /* Points to the shared memory
+ allocated to another image. */
+ GFC_REAL_16 *a;
+ GFC_REAL_16 *b;
+
+ other_shared_ptr = this_shared_ptr + num_elems * imoffset;
+ for (index_type i = 0; i < num_elems; i++)
+ {
+ a = this_shared_ptr + i;
+ b = other_shared_ptr + i;
+ *a += *b;
+ }
+ }
+ collsub_sync (ci);
+ }
+ for ( ; (local->num_images >> cbit) != 0; cbit++)
+ collsub_sync (ci);
+
+ if (!result_image || (*result_image - 1) == this_image.image_num)
+ {
+ if (packed)
+ memcpy (array->base_addr, buffer, ssize);
+ else
+ {
+ GFC_REAL_16 *src = buffer;
+ char * restrict dest = (char *) array->base_addr;
+ index_type stride0 = stride[0];
+
+ for (index_type n = 0; n < dim; n++)
+ count[n] = 0;
+
+ while (dest)
+ {
+ *((GFC_REAL_16 * ) dest) = *src++;
+ dest += stride0;
+ count[0] ++;
+ for (index_type n = 0; count[n] == extent[n] ;)
+ {
+ /* When we get to the end of a dimension, reset it and increment
+ the next dimension. */
+ count[n] = 0;
+ dest -= stride[n] * extent[n];
+ n++;
+ if (n == dim)
+ {
+ dest = NULL;
+ break;
+ }
+ else
+ {
+ count[n]++;
+ dest += stride[n];
+ }
+ }
+ }
+ }
+ }
+ finish_collective_subroutine (ci);
+}
+
+#endif
+
diff --git a/libgfortran/generated/nca_minmax_r4.c b/libgfortran/generated/nca_minmax_r4.c
new file mode 100644
index 0000000..0eb3f1b
--- /dev/null
+++ b/libgfortran/generated/nca_minmax_r4.c
@@ -0,0 +1,653 @@
+/* Implementation of collective subroutines minmax.
+ Copyright (C) 2020 Free Software Foundation, Inc.
+ Contributed by Thomas Koenig <tkoenig@gcc.gnu.org>.
+
+This file is part of the GNU Fortran runtime library (libgfortran).
+
+Libgfortran is free software; you can redistribute it and/or
+modify it under the terms of the GNU General Public
+License as published by the Free Software Foundation; either
+version 3 of the License, or (at your option) any later version.
+
+Libgfortran is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+<http://www.gnu.org/licenses/>. */
+
+
+#include "libgfortran.h"
+
+#if defined (HAVE_GFC_REAL_4)
+#include <string.h>
+#include "../nca/libcoarraynative.h"
+#include "../nca/collective_subroutine.h"
+#include "../nca/collective_inline.h"
+
+void nca_collsub_max_scalar_r4 (GFC_REAL_4 *obj, int *result_image,
+ int *stat, char *errmsg, index_type errmsg_len);
+export_proto(nca_collsub_max_scalar_r4);
+
+void
+nca_collsub_max_scalar_r4 (GFC_REAL_4 *obj, int *result_image,
+ int *stat __attribute__ ((unused)),
+ char *errmsg __attribute__ ((unused)),
+ index_type errmsg_len __attribute__ ((unused)))
+{
+ int cbit = 0;
+ int imoffset;
+ GFC_REAL_4 *a, *b;
+ GFC_REAL_4 *buffer, *this_image_buf;
+ collsub_iface *ci;
+
+ ci = &local->ci;
+
+ buffer = get_collsub_buf (ci, sizeof(GFC_REAL_4) * local->num_images);
+ this_image_buf = buffer + this_image.image_num;
+ *this_image_buf = *obj;
+
+ collsub_sync (ci);
+ for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++)
+ {
+ imoffset = 1 << cbit;
+ if (this_image.image_num + imoffset < local->num_images)
+ {
+ a = this_image_buf;
+ b = this_image_buf + imoffset;
+ if (*b > *a)
+ *a = *b;
+ }
+ collsub_sync (ci);
+ }
+ for ( ; (local->num_images >> cbit) != 0; cbit++)
+ collsub_sync (ci);
+
+ if (!result_image || (*result_image - 1) == this_image.image_num)
+ *obj = *buffer;
+
+ finish_collective_subroutine (ci);
+
+}
+
+void nca_collsub_min_scalar_r4 (GFC_REAL_4 *obj, int *result_image,
+ int *stat, char *errmsg, index_type errmsg_len);
+export_proto(nca_collsub_min_scalar_r4);
+
+void
+nca_collsub_min_scalar_r4 (GFC_REAL_4 *obj, int *result_image,
+ int *stat __attribute__ ((unused)),
+ char *errmsg __attribute__ ((unused)),
+ index_type errmsg_len __attribute__ ((unused)))
+{
+ int cbit = 0;
+ int imoffset;
+ GFC_REAL_4 *a, *b;
+ GFC_REAL_4 *buffer, *this_image_buf;
+ collsub_iface *ci;
+
+ ci = &local->ci;
+
+ buffer = get_collsub_buf (ci, sizeof(GFC_REAL_4) * local->num_images);
+ this_image_buf = buffer + this_image.image_num;
+ *this_image_buf = *obj;
+
+ collsub_sync (ci);
+ for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++)
+ {
+ imoffset = 1 << cbit;
+ if (this_image.image_num + imoffset < local->num_images)
+ {
+ a = this_image_buf;
+ b = this_image_buf + imoffset;
+ if (*b < *a)
+ *a = *b;
+ }
+ collsub_sync (ci);
+ }
+ for ( ; (local->num_images >> cbit) != 0; cbit++)
+ collsub_sync (ci);
+
+ if (!result_image || (*result_image - 1) == this_image.image_num)
+ *obj = *buffer;
+
+ finish_collective_subroutine (ci);
+
+}
+
+void nca_collsub_sum_scalar_r4 (GFC_REAL_4 *obj, int *result_image,
+ int *stat, char *errmsg, index_type errmsg_len);
+export_proto(nca_collsub_sum_scalar_r4);
+
+void
+nca_collsub_sum_scalar_r4 (GFC_REAL_4 *obj, int *result_image,
+ int *stat __attribute__ ((unused)),
+ char *errmsg __attribute__ ((unused)),
+ index_type errmsg_len __attribute__ ((unused)))
+{
+ int cbit = 0;
+ int imoffset;
+ GFC_REAL_4 *a, *b;
+ GFC_REAL_4 *buffer, *this_image_buf;
+ collsub_iface *ci;
+
+ ci = &local->ci;
+
+ buffer = get_collsub_buf (ci, sizeof(GFC_REAL_4) * local->num_images);
+ this_image_buf = buffer + this_image.image_num;
+ *this_image_buf = *obj;
+
+ collsub_sync (ci);
+ for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++)
+ {
+ imoffset = 1 << cbit;
+ if (this_image.image_num + imoffset < local->num_images)
+ {
+ a = this_image_buf;
+ b = this_image_buf + imoffset;
+ *a += *b;
+ }
+ collsub_sync (ci);
+ }
+ for ( ; (local->num_images >> cbit) != 0; cbit++)
+ collsub_sync (ci);
+
+ if (!result_image || (*result_image - 1) == this_image.image_num)
+ *obj = *buffer;
+
+ finish_collective_subroutine (ci);
+
+}
+
+void nca_collsub_max_array_r4 (gfc_array_r4 * restrict array, int *result_image,
+ int *stat, char *errmsg, index_type errmsg_len);
+export_proto (nca_collsub_max_array_r4);
+
+void
+nca_collsub_max_array_r4 (gfc_array_r4 * restrict array, int *result_image,
+ int *stat __attribute__ ((unused)),
+ char *errmsg __attribute__ ((unused)),
+ index_type errmsg_len __attribute__ ((unused)))
+{
+ index_type count[GFC_MAX_DIMENSIONS];
+ index_type stride[GFC_MAX_DIMENSIONS];
+ index_type extent[GFC_MAX_DIMENSIONS];
+ GFC_REAL_4 *this_shared_ptr; /* Points to the shared memory allocated to this image. */
+ GFC_REAL_4 *buffer;
+ index_type dim;
+ bool packed;
+ index_type span;
+ index_type ssize, num_elems;
+ int cbit = 0;
+ int imoffset;
+ collsub_iface *ci;
+
+ ci = &local->ci;
+
+ dim = GFC_DESCRIPTOR_RANK (array);
+ ssize = sizeof (GFC_REAL_4);
+ packed = true;
+ span = array->span != 0 ? array->span : (index_type) sizeof (GFC_REAL_4);
+ for (index_type n = 0; n < dim; n++)
+ {
+ count[n] = 0;
+ stride[n] = GFC_DESCRIPTOR_STRIDE (array, n) * span;
+ extent[n] = GFC_DESCRIPTOR_EXTENT (array, n);
+
+ /* No-op for an empty array. */
+ if (extent[n] <= 0)
+ return;
+
+ if (ssize != stride[n])
+ packed = false;
+
+ ssize *= extent[n];
+ }
+
+ num_elems = ssize / sizeof (GFC_REAL_4);
+
+ buffer = get_collsub_buf (ci, ssize * local->num_images);
+ this_shared_ptr = buffer + this_image.image_num * num_elems;
+
+ if (packed)
+ memcpy (this_shared_ptr, array->base_addr, ssize);
+ else
+ {
+ char *src = (char *) array->base_addr;
+ GFC_REAL_4 *restrict dest = this_shared_ptr;
+ index_type stride0 = stride[0];
+
+ while (src)
+ {
+ /* Copy the data. */
+ *(dest++) = *((GFC_REAL_4 *) src);
+ src += stride0;
+ count[0] ++;
+ /* Advance to the next source element. */
+ for (index_type n = 0; count[n] == extent[n] ; )
+ {
+ /* When we get to the end of a dimension, reset it and increment
+ the next dimension. */
+ count[n] = 0;
+ src -= stride[n] * extent[n];
+ n++;
+ if (n == dim)
+ {
+ src = NULL;
+ break;
+ }
+ else
+ {
+ count[n]++;
+ src += stride[n];
+ }
+ }
+ }
+ }
+
+ collsub_sync (ci);
+
+ /* Reduce the array to image zero. Here the general scheme:
+
+ abababababab
+ a_b_a_b_a_b_
+ a___b___a___
+ a_______b___
+ r___________
+ */
+ for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++)
+ {
+ imoffset = 1 << cbit;
+ if (this_image.image_num + imoffset < local->num_images)
+ {
+ GFC_REAL_4 * other_shared_ptr; /* Points to the shared memory
+ allocated to another image. */
+ GFC_REAL_4 *a;
+ GFC_REAL_4 *b;
+
+ other_shared_ptr = this_shared_ptr + num_elems * imoffset;
+ for (index_type i = 0; i < num_elems; i++)
+ {
+ a = this_shared_ptr + i;
+ b = other_shared_ptr + i;
+ if (*b > *a)
+ *a = *b;
+ }
+ }
+ collsub_sync (ci);
+ }
+ for ( ; (local->num_images >> cbit) != 0; cbit++)
+ collsub_sync (ci);
+
+ if (!result_image || (*result_image - 1) == this_image.image_num)
+ {
+ if (packed)
+ memcpy (array->base_addr, buffer, ssize);
+ else
+ {
+ GFC_REAL_4 *src = buffer;
+ char * restrict dest = (char *) array->base_addr;
+ index_type stride0 = stride[0];
+
+ for (index_type n = 0; n < dim; n++)
+ count[n] = 0;
+
+ while (dest)
+ {
+ *((GFC_REAL_4 * ) dest) = *src++;
+ dest += stride0;
+ count[0] ++;
+ for (index_type n = 0; count[n] == extent[n] ;)
+ {
+ /* When we get to the end of a dimension, reset it and increment
+ the next dimension. */
+ count[n] = 0;
+ dest -= stride[n] * extent[n];
+ n++;
+ if (n == dim)
+ {
+ dest = NULL;
+ break;
+ }
+ else
+ {
+ count[n]++;
+ dest += stride[n];
+ }
+ }
+ }
+ }
+ }
+ finish_collective_subroutine (ci);
+}
+void nca_collsub_min_array_r4 (gfc_array_r4 * restrict array, int *result_image,
+ int *stat, char *errmsg, index_type errmsg_len);
+export_proto (nca_collsub_min_array_r4);
+
+void
+nca_collsub_min_array_r4 (gfc_array_r4 * restrict array, int *result_image,
+ int *stat __attribute__ ((unused)),
+ char *errmsg __attribute__ ((unused)),
+ index_type errmsg_len __attribute__ ((unused)))
+{
+ index_type count[GFC_MAX_DIMENSIONS];
+ index_type stride[GFC_MAX_DIMENSIONS];
+ index_type extent[GFC_MAX_DIMENSIONS];
+ GFC_REAL_4 *this_shared_ptr; /* Points to the shared memory allocated to this image. */
+ GFC_REAL_4 *buffer;
+ index_type dim;
+ bool packed;
+ index_type span;
+ index_type ssize, num_elems;
+ int cbit = 0;
+ int imoffset;
+ collsub_iface *ci;
+
+ ci = &local->ci;
+
+ dim = GFC_DESCRIPTOR_RANK (array);
+ ssize = sizeof (GFC_REAL_4);
+ packed = true;
+ span = array->span != 0 ? array->span : (index_type) sizeof (GFC_REAL_4);
+ for (index_type n = 0; n < dim; n++)
+ {
+ count[n] = 0;
+ stride[n] = GFC_DESCRIPTOR_STRIDE (array, n) * span;
+ extent[n] = GFC_DESCRIPTOR_EXTENT (array, n);
+
+ /* No-op for an empty array. */
+ if (extent[n] <= 0)
+ return;
+
+ if (ssize != stride[n])
+ packed = false;
+
+ ssize *= extent[n];
+ }
+
+ num_elems = ssize / sizeof (GFC_REAL_4);
+
+ buffer = get_collsub_buf (ci, ssize * local->num_images);
+ this_shared_ptr = buffer + this_image.image_num * num_elems;
+
+ if (packed)
+ memcpy (this_shared_ptr, array->base_addr, ssize);
+ else
+ {
+ char *src = (char *) array->base_addr;
+ GFC_REAL_4 *restrict dest = this_shared_ptr;
+ index_type stride0 = stride[0];
+
+ while (src)
+ {
+ /* Copy the data. */
+ *(dest++) = *((GFC_REAL_4 *) src);
+ src += stride0;
+ count[0] ++;
+ /* Advance to the next source element. */
+ for (index_type n = 0; count[n] == extent[n] ; )
+ {
+ /* When we get to the end of a dimension, reset it and increment
+ the next dimension. */
+ count[n] = 0;
+ src -= stride[n] * extent[n];
+ n++;
+ if (n == dim)
+ {
+ src = NULL;
+ break;
+ }
+ else
+ {
+ count[n]++;
+ src += stride[n];
+ }
+ }
+ }
+ }
+
+ collsub_sync (ci);
+
+ /* Reduce the array to image zero. Here the general scheme:
+
+ abababababab
+ a_b_a_b_a_b_
+ a___b___a___
+ a_______b___
+ r___________
+ */
+ for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++)
+ {
+ imoffset = 1 << cbit;
+ if (this_image.image_num + imoffset < local->num_images)
+ {
+ GFC_REAL_4 * other_shared_ptr; /* Points to the shared memory
+ allocated to another image. */
+ GFC_REAL_4 *a;
+ GFC_REAL_4 *b;
+
+ other_shared_ptr = this_shared_ptr + num_elems * imoffset;
+ for (index_type i = 0; i < num_elems; i++)
+ {
+ a = this_shared_ptr + i;
+ b = other_shared_ptr + i;
+ if (*b < *a)
+ *a = *b;
+ }
+ }
+ collsub_sync (ci);
+ }
+ for ( ; (local->num_images >> cbit) != 0; cbit++)
+ collsub_sync (ci);
+
+ if (!result_image || (*result_image - 1) == this_image.image_num)
+ {
+ if (packed)
+ memcpy (array->base_addr, buffer, ssize);
+ else
+ {
+ GFC_REAL_4 *src = buffer;
+ char * restrict dest = (char *) array->base_addr;
+ index_type stride0 = stride[0];
+
+ for (index_type n = 0; n < dim; n++)
+ count[n] = 0;
+
+ while (dest)
+ {
+ *((GFC_REAL_4 * ) dest) = *src++;
+ dest += stride0;
+ count[0] ++;
+ for (index_type n = 0; count[n] == extent[n] ;)
+ {
+ /* When we get to the end of a dimension, reset it and increment
+ the next dimension. */
+ count[n] = 0;
+ dest -= stride[n] * extent[n];
+ n++;
+ if (n == dim)
+ {
+ dest = NULL;
+ break;
+ }
+ else
+ {
+ count[n]++;
+ dest += stride[n];
+ }
+ }
+ }
+ }
+ }
+ finish_collective_subroutine (ci);
+}
+void nca_collsub_sum_array_r4 (gfc_array_r4 * restrict array, int *result_image,
+ int *stat, char *errmsg, index_type errmsg_len);
+export_proto (nca_collsub_sum_array_r4);
+
+void
+nca_collsub_sum_array_r4 (gfc_array_r4 * restrict array, int *result_image,
+ int *stat __attribute__ ((unused)),
+ char *errmsg __attribute__ ((unused)),
+ index_type errmsg_len __attribute__ ((unused)))
+{
+ index_type count[GFC_MAX_DIMENSIONS];
+ index_type stride[GFC_MAX_DIMENSIONS];
+ index_type extent[GFC_MAX_DIMENSIONS];
+ GFC_REAL_4 *this_shared_ptr; /* Points to the shared memory allocated to this image. */
+ GFC_REAL_4 *buffer;
+ index_type dim;
+ bool packed;
+ index_type span;
+ index_type ssize, num_elems;
+ int cbit = 0;
+ int imoffset;
+ collsub_iface *ci;
+
+ ci = &local->ci;
+
+ dim = GFC_DESCRIPTOR_RANK (array);
+ ssize = sizeof (GFC_REAL_4);
+ packed = true;
+ span = array->span != 0 ? array->span : (index_type) sizeof (GFC_REAL_4);
+ for (index_type n = 0; n < dim; n++)
+ {
+ count[n] = 0;
+ stride[n] = GFC_DESCRIPTOR_STRIDE (array, n) * span;
+ extent[n] = GFC_DESCRIPTOR_EXTENT (array, n);
+
+ /* No-op for an empty array. */
+ if (extent[n] <= 0)
+ return;
+
+ if (ssize != stride[n])
+ packed = false;
+
+ ssize *= extent[n];
+ }
+
+ num_elems = ssize / sizeof (GFC_REAL_4);
+
+ buffer = get_collsub_buf (ci, ssize * local->num_images);
+ this_shared_ptr = buffer + this_image.image_num * num_elems;
+
+ if (packed)
+ memcpy (this_shared_ptr, array->base_addr, ssize);
+ else
+ {
+ char *src = (char *) array->base_addr;
+ GFC_REAL_4 *restrict dest = this_shared_ptr;
+ index_type stride0 = stride[0];
+
+ while (src)
+ {
+ /* Copy the data. */
+ *(dest++) = *((GFC_REAL_4 *) src);
+ src += stride0;
+ count[0] ++;
+ /* Advance to the next source element. */
+ for (index_type n = 0; count[n] == extent[n] ; )
+ {
+ /* When we get to the end of a dimension, reset it and increment
+ the next dimension. */
+ count[n] = 0;
+ src -= stride[n] * extent[n];
+ n++;
+ if (n == dim)
+ {
+ src = NULL;
+ break;
+ }
+ else
+ {
+ count[n]++;
+ src += stride[n];
+ }
+ }
+ }
+ }
+
+ collsub_sync (ci);
+
+ /* Reduce the array to image zero. Here the general scheme:
+
+ abababababab
+ a_b_a_b_a_b_
+ a___b___a___
+ a_______b___
+ r___________
+ */
+ for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++)
+ {
+ imoffset = 1 << cbit;
+ if (this_image.image_num + imoffset < local->num_images)
+ {
+ GFC_REAL_4 * other_shared_ptr; /* Points to the shared memory
+ allocated to another image. */
+ GFC_REAL_4 *a;
+ GFC_REAL_4 *b;
+
+ other_shared_ptr = this_shared_ptr + num_elems * imoffset;
+ for (index_type i = 0; i < num_elems; i++)
+ {
+ a = this_shared_ptr + i;
+ b = other_shared_ptr + i;
+ *a += *b;
+ }
+ }
+ collsub_sync (ci);
+ }
+ for ( ; (local->num_images >> cbit) != 0; cbit++)
+ collsub_sync (ci);
+
+ if (!result_image || (*result_image - 1) == this_image.image_num)
+ {
+ if (packed)
+ memcpy (array->base_addr, buffer, ssize);
+ else
+ {
+ GFC_REAL_4 *src = buffer;
+ char * restrict dest = (char *) array->base_addr;
+ index_type stride0 = stride[0];
+
+ for (index_type n = 0; n < dim; n++)
+ count[n] = 0;
+
+ while (dest)
+ {
+ *((GFC_REAL_4 * ) dest) = *src++;
+ dest += stride0;
+ count[0] ++;
+ for (index_type n = 0; count[n] == extent[n] ;)
+ {
+ /* When we get to the end of a dimension, reset it and increment
+ the next dimension. */
+ count[n] = 0;
+ dest -= stride[n] * extent[n];
+ n++;
+ if (n == dim)
+ {
+ dest = NULL;
+ break;
+ }
+ else
+ {
+ count[n]++;
+ dest += stride[n];
+ }
+ }
+ }
+ }
+ }
+ finish_collective_subroutine (ci);
+}
+
+#endif
+
diff --git a/libgfortran/generated/nca_minmax_r8.c b/libgfortran/generated/nca_minmax_r8.c
new file mode 100644
index 0000000..3b3e962
--- /dev/null
+++ b/libgfortran/generated/nca_minmax_r8.c
@@ -0,0 +1,653 @@
+/* Implementation of collective subroutines minmax.
+ Copyright (C) 2020 Free Software Foundation, Inc.
+ Contributed by Thomas Koenig <tkoenig@gcc.gnu.org>.
+
+This file is part of the GNU Fortran runtime library (libgfortran).
+
+Libgfortran is free software; you can redistribute it and/or
+modify it under the terms of the GNU General Public
+License as published by the Free Software Foundation; either
+version 3 of the License, or (at your option) any later version.
+
+Libgfortran is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+<http://www.gnu.org/licenses/>. */
+
+
+#include "libgfortran.h"
+
+#if defined (HAVE_GFC_REAL_8)
+#include <string.h>
+#include "../nca/libcoarraynative.h"
+#include "../nca/collective_subroutine.h"
+#include "../nca/collective_inline.h"
+
+void nca_collsub_max_scalar_r8 (GFC_REAL_8 *obj, int *result_image,
+ int *stat, char *errmsg, index_type errmsg_len);
+export_proto(nca_collsub_max_scalar_r8);
+
+void
+nca_collsub_max_scalar_r8 (GFC_REAL_8 *obj, int *result_image,
+ int *stat __attribute__ ((unused)),
+ char *errmsg __attribute__ ((unused)),
+ index_type errmsg_len __attribute__ ((unused)))
+{
+ int cbit = 0;
+ int imoffset;
+ GFC_REAL_8 *a, *b;
+ GFC_REAL_8 *buffer, *this_image_buf;
+ collsub_iface *ci;
+
+ ci = &local->ci;
+
+ buffer = get_collsub_buf (ci, sizeof(GFC_REAL_8) * local->num_images);
+ this_image_buf = buffer + this_image.image_num;
+ *this_image_buf = *obj;
+
+ collsub_sync (ci);
+ for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++)
+ {
+ imoffset = 1 << cbit;
+ if (this_image.image_num + imoffset < local->num_images)
+ {
+ a = this_image_buf;
+ b = this_image_buf + imoffset;
+ if (*b > *a)
+ *a = *b;
+ }
+ collsub_sync (ci);
+ }
+ for ( ; (local->num_images >> cbit) != 0; cbit++)
+ collsub_sync (ci);
+
+ if (!result_image || (*result_image - 1) == this_image.image_num)
+ *obj = *buffer;
+
+ finish_collective_subroutine (ci);
+
+}
+
+void nca_collsub_min_scalar_r8 (GFC_REAL_8 *obj, int *result_image,
+ int *stat, char *errmsg, index_type errmsg_len);
+export_proto(nca_collsub_min_scalar_r8);
+
+void
+nca_collsub_min_scalar_r8 (GFC_REAL_8 *obj, int *result_image,
+ int *stat __attribute__ ((unused)),
+ char *errmsg __attribute__ ((unused)),
+ index_type errmsg_len __attribute__ ((unused)))
+{
+ int cbit = 0;
+ int imoffset;
+ GFC_REAL_8 *a, *b;
+ GFC_REAL_8 *buffer, *this_image_buf;
+ collsub_iface *ci;
+
+ ci = &local->ci;
+
+ buffer = get_collsub_buf (ci, sizeof(GFC_REAL_8) * local->num_images);
+ this_image_buf = buffer + this_image.image_num;
+ *this_image_buf = *obj;
+
+ collsub_sync (ci);
+ for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++)
+ {
+ imoffset = 1 << cbit;
+ if (this_image.image_num + imoffset < local->num_images)
+ {
+ a = this_image_buf;
+ b = this_image_buf + imoffset;
+ if (*b < *a)
+ *a = *b;
+ }
+ collsub_sync (ci);
+ }
+ for ( ; (local->num_images >> cbit) != 0; cbit++)
+ collsub_sync (ci);
+
+ if (!result_image || (*result_image - 1) == this_image.image_num)
+ *obj = *buffer;
+
+ finish_collective_subroutine (ci);
+
+}
+
+void nca_collsub_sum_scalar_r8 (GFC_REAL_8 *obj, int *result_image,
+ int *stat, char *errmsg, index_type errmsg_len);
+export_proto(nca_collsub_sum_scalar_r8);
+
+void
+nca_collsub_sum_scalar_r8 (GFC_REAL_8 *obj, int *result_image,
+ int *stat __attribute__ ((unused)),
+ char *errmsg __attribute__ ((unused)),
+ index_type errmsg_len __attribute__ ((unused)))
+{
+ int cbit = 0;
+ int imoffset;
+ GFC_REAL_8 *a, *b;
+ GFC_REAL_8 *buffer, *this_image_buf;
+ collsub_iface *ci;
+
+ ci = &local->ci;
+
+ buffer = get_collsub_buf (ci, sizeof(GFC_REAL_8) * local->num_images);
+ this_image_buf = buffer + this_image.image_num;
+ *this_image_buf = *obj;
+
+ collsub_sync (ci);
+ for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++)
+ {
+ imoffset = 1 << cbit;
+ if (this_image.image_num + imoffset < local->num_images)
+ {
+ a = this_image_buf;
+ b = this_image_buf + imoffset;
+ *a += *b;
+ }
+ collsub_sync (ci);
+ }
+ for ( ; (local->num_images >> cbit) != 0; cbit++)
+ collsub_sync (ci);
+
+ if (!result_image || (*result_image - 1) == this_image.image_num)
+ *obj = *buffer;
+
+ finish_collective_subroutine (ci);
+
+}
+
+void nca_collsub_max_array_r8 (gfc_array_r8 * restrict array, int *result_image,
+ int *stat, char *errmsg, index_type errmsg_len);
+export_proto (nca_collsub_max_array_r8);
+
+void
+nca_collsub_max_array_r8 (gfc_array_r8 * restrict array, int *result_image,
+ int *stat __attribute__ ((unused)),
+ char *errmsg __attribute__ ((unused)),
+ index_type errmsg_len __attribute__ ((unused)))
+{
+ index_type count[GFC_MAX_DIMENSIONS];
+ index_type stride[GFC_MAX_DIMENSIONS];
+ index_type extent[GFC_MAX_DIMENSIONS];
+ GFC_REAL_8 *this_shared_ptr; /* Points to the shared memory allocated to this image. */
+ GFC_REAL_8 *buffer;
+ index_type dim;
+ bool packed;
+ index_type span;
+ index_type ssize, num_elems;
+ int cbit = 0;
+ int imoffset;
+ collsub_iface *ci;
+
+ ci = &local->ci;
+
+ dim = GFC_DESCRIPTOR_RANK (array);
+ ssize = sizeof (GFC_REAL_8);
+ packed = true;
+ span = array->span != 0 ? array->span : (index_type) sizeof (GFC_REAL_8);
+ for (index_type n = 0; n < dim; n++)
+ {
+ count[n] = 0;
+ stride[n] = GFC_DESCRIPTOR_STRIDE (array, n) * span;
+ extent[n] = GFC_DESCRIPTOR_EXTENT (array, n);
+
+ /* No-op for an empty array. */
+ if (extent[n] <= 0)
+ return;
+
+ if (ssize != stride[n])
+ packed = false;
+
+ ssize *= extent[n];
+ }
+
+ num_elems = ssize / sizeof (GFC_REAL_8);
+
+ buffer = get_collsub_buf (ci, ssize * local->num_images);
+ this_shared_ptr = buffer + this_image.image_num * num_elems;
+
+ if (packed)
+ memcpy (this_shared_ptr, array->base_addr, ssize);
+ else
+ {
+ char *src = (char *) array->base_addr;
+ GFC_REAL_8 *restrict dest = this_shared_ptr;
+ index_type stride0 = stride[0];
+
+ while (src)
+ {
+ /* Copy the data. */
+ *(dest++) = *((GFC_REAL_8 *) src);
+ src += stride0;
+ count[0] ++;
+ /* Advance to the next source element. */
+ for (index_type n = 0; count[n] == extent[n] ; )
+ {
+ /* When we get to the end of a dimension, reset it and increment
+ the next dimension. */
+ count[n] = 0;
+ src -= stride[n] * extent[n];
+ n++;
+ if (n == dim)
+ {
+ src = NULL;
+ break;
+ }
+ else
+ {
+ count[n]++;
+ src += stride[n];
+ }
+ }
+ }
+ }
+
+ collsub_sync (ci);
+
+ /* Reduce the array to image zero. Here the general scheme:
+
+ abababababab
+ a_b_a_b_a_b_
+ a___b___a___
+ a_______b___
+ r___________
+ */
+ for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++)
+ {
+ imoffset = 1 << cbit;
+ if (this_image.image_num + imoffset < local->num_images)
+ {
+ GFC_REAL_8 * other_shared_ptr; /* Points to the shared memory
+ allocated to another image. */
+ GFC_REAL_8 *a;
+ GFC_REAL_8 *b;
+
+ other_shared_ptr = this_shared_ptr + num_elems * imoffset;
+ for (index_type i = 0; i < num_elems; i++)
+ {
+ a = this_shared_ptr + i;
+ b = other_shared_ptr + i;
+ if (*b > *a)
+ *a = *b;
+ }
+ }
+ collsub_sync (ci);
+ }
+ for ( ; (local->num_images >> cbit) != 0; cbit++)
+ collsub_sync (ci);
+
+ if (!result_image || (*result_image - 1) == this_image.image_num)
+ {
+ if (packed)
+ memcpy (array->base_addr, buffer, ssize);
+ else
+ {
+ GFC_REAL_8 *src = buffer;
+ char * restrict dest = (char *) array->base_addr;
+ index_type stride0 = stride[0];
+
+ for (index_type n = 0; n < dim; n++)
+ count[n] = 0;
+
+ while (dest)
+ {
+ *((GFC_REAL_8 * ) dest) = *src++;
+ dest += stride0;
+ count[0] ++;
+ for (index_type n = 0; count[n] == extent[n] ;)
+ {
+ /* When we get to the end of a dimension, reset it and increment
+ the next dimension. */
+ count[n] = 0;
+ dest -= stride[n] * extent[n];
+ n++;
+ if (n == dim)
+ {
+ dest = NULL;
+ break;
+ }
+ else
+ {
+ count[n]++;
+ dest += stride[n];
+ }
+ }
+ }
+ }
+ }
+ finish_collective_subroutine (ci);
+}
+void nca_collsub_min_array_r8 (gfc_array_r8 * restrict array, int *result_image,
+ int *stat, char *errmsg, index_type errmsg_len);
+export_proto (nca_collsub_min_array_r8);
+
+void
+nca_collsub_min_array_r8 (gfc_array_r8 * restrict array, int *result_image,
+ int *stat __attribute__ ((unused)),
+ char *errmsg __attribute__ ((unused)),
+ index_type errmsg_len __attribute__ ((unused)))
+{
+ index_type count[GFC_MAX_DIMENSIONS];
+ index_type stride[GFC_MAX_DIMENSIONS];
+ index_type extent[GFC_MAX_DIMENSIONS];
+ GFC_REAL_8 *this_shared_ptr; /* Points to the shared memory allocated to this image. */
+ GFC_REAL_8 *buffer;
+ index_type dim;
+ bool packed;
+ index_type span;
+ index_type ssize, num_elems;
+ int cbit = 0;
+ int imoffset;
+ collsub_iface *ci;
+
+ ci = &local->ci;
+
+ dim = GFC_DESCRIPTOR_RANK (array);
+ ssize = sizeof (GFC_REAL_8);
+ packed = true;
+ span = array->span != 0 ? array->span : (index_type) sizeof (GFC_REAL_8);
+ for (index_type n = 0; n < dim; n++)
+ {
+ count[n] = 0;
+ stride[n] = GFC_DESCRIPTOR_STRIDE (array, n) * span;
+ extent[n] = GFC_DESCRIPTOR_EXTENT (array, n);
+
+ /* No-op for an empty array. */
+ if (extent[n] <= 0)
+ return;
+
+ if (ssize != stride[n])
+ packed = false;
+
+ ssize *= extent[n];
+ }
+
+ num_elems = ssize / sizeof (GFC_REAL_8);
+
+ buffer = get_collsub_buf (ci, ssize * local->num_images);
+ this_shared_ptr = buffer + this_image.image_num * num_elems;
+
+ if (packed)
+ memcpy (this_shared_ptr, array->base_addr, ssize);
+ else
+ {
+ char *src = (char *) array->base_addr;
+ GFC_REAL_8 *restrict dest = this_shared_ptr;
+ index_type stride0 = stride[0];
+
+ while (src)
+ {
+ /* Copy the data. */
+ *(dest++) = *((GFC_REAL_8 *) src);
+ src += stride0;
+ count[0] ++;
+ /* Advance to the next source element. */
+ for (index_type n = 0; count[n] == extent[n] ; )
+ {
+ /* When we get to the end of a dimension, reset it and increment
+ the next dimension. */
+ count[n] = 0;
+ src -= stride[n] * extent[n];
+ n++;
+ if (n == dim)
+ {
+ src = NULL;
+ break;
+ }
+ else
+ {
+ count[n]++;
+ src += stride[n];
+ }
+ }
+ }
+ }
+
+ collsub_sync (ci);
+
+ /* Reduce the array to image zero. Here the general scheme:
+
+ abababababab
+ a_b_a_b_a_b_
+ a___b___a___
+ a_______b___
+ r___________
+ */
+ for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++)
+ {
+ imoffset = 1 << cbit;
+ if (this_image.image_num + imoffset < local->num_images)
+ {
+ GFC_REAL_8 * other_shared_ptr; /* Points to the shared memory
+ allocated to another image. */
+ GFC_REAL_8 *a;
+ GFC_REAL_8 *b;
+
+ other_shared_ptr = this_shared_ptr + num_elems * imoffset;
+ for (index_type i = 0; i < num_elems; i++)
+ {
+ a = this_shared_ptr + i;
+ b = other_shared_ptr + i;
+ if (*b < *a)
+ *a = *b;
+ }
+ }
+ collsub_sync (ci);
+ }
+ for ( ; (local->num_images >> cbit) != 0; cbit++)
+ collsub_sync (ci);
+
+ if (!result_image || (*result_image - 1) == this_image.image_num)
+ {
+ if (packed)
+ memcpy (array->base_addr, buffer, ssize);
+ else
+ {
+ GFC_REAL_8 *src = buffer;
+ char * restrict dest = (char *) array->base_addr;
+ index_type stride0 = stride[0];
+
+ for (index_type n = 0; n < dim; n++)
+ count[n] = 0;
+
+ while (dest)
+ {
+ *((GFC_REAL_8 * ) dest) = *src++;
+ dest += stride0;
+ count[0] ++;
+ for (index_type n = 0; count[n] == extent[n] ;)
+ {
+ /* When we get to the end of a dimension, reset it and increment
+ the next dimension. */
+ count[n] = 0;
+ dest -= stride[n] * extent[n];
+ n++;
+ if (n == dim)
+ {
+ dest = NULL;
+ break;
+ }
+ else
+ {
+ count[n]++;
+ dest += stride[n];
+ }
+ }
+ }
+ }
+ }
+ finish_collective_subroutine (ci);
+}
+void nca_collsub_sum_array_r8 (gfc_array_r8 * restrict array, int *result_image,
+ int *stat, char *errmsg, index_type errmsg_len);
+export_proto (nca_collsub_sum_array_r8);
+
+void
+nca_collsub_sum_array_r8 (gfc_array_r8 * restrict array, int *result_image,
+ int *stat __attribute__ ((unused)),
+ char *errmsg __attribute__ ((unused)),
+ index_type errmsg_len __attribute__ ((unused)))
+{
+ index_type count[GFC_MAX_DIMENSIONS];
+ index_type stride[GFC_MAX_DIMENSIONS];
+ index_type extent[GFC_MAX_DIMENSIONS];
+ GFC_REAL_8 *this_shared_ptr; /* Points to the shared memory allocated to this image. */
+ GFC_REAL_8 *buffer;
+ index_type dim;
+ bool packed;
+ index_type span;
+ index_type ssize, num_elems;
+ int cbit = 0;
+ int imoffset;
+ collsub_iface *ci;
+
+ ci = &local->ci;
+
+ dim = GFC_DESCRIPTOR_RANK (array);
+ ssize = sizeof (GFC_REAL_8);
+ packed = true;
+ span = array->span != 0 ? array->span : (index_type) sizeof (GFC_REAL_8);
+ for (index_type n = 0; n < dim; n++)
+ {
+ count[n] = 0;
+ stride[n] = GFC_DESCRIPTOR_STRIDE (array, n) * span;
+ extent[n] = GFC_DESCRIPTOR_EXTENT (array, n);
+
+ /* No-op for an empty array. */
+ if (extent[n] <= 0)
+ return;
+
+ if (ssize != stride[n])
+ packed = false;
+
+ ssize *= extent[n];
+ }
+
+ num_elems = ssize / sizeof (GFC_REAL_8);
+
+ buffer = get_collsub_buf (ci, ssize * local->num_images);
+ this_shared_ptr = buffer + this_image.image_num * num_elems;
+
+ if (packed)
+ memcpy (this_shared_ptr, array->base_addr, ssize);
+ else
+ {
+ char *src = (char *) array->base_addr;
+ GFC_REAL_8 *restrict dest = this_shared_ptr;
+ index_type stride0 = stride[0];
+
+ while (src)
+ {
+ /* Copy the data. */
+ *(dest++) = *((GFC_REAL_8 *) src);
+ src += stride0;
+ count[0] ++;
+ /* Advance to the next source element. */
+ for (index_type n = 0; count[n] == extent[n] ; )
+ {
+ /* When we get to the end of a dimension, reset it and increment
+ the next dimension. */
+ count[n] = 0;
+ src -= stride[n] * extent[n];
+ n++;
+ if (n == dim)
+ {
+ src = NULL;
+ break;
+ }
+ else
+ {
+ count[n]++;
+ src += stride[n];
+ }
+ }
+ }
+ }
+
+ collsub_sync (ci);
+
+ /* Reduce the array to image zero. Here the general scheme:
+
+ abababababab
+ a_b_a_b_a_b_
+ a___b___a___
+ a_______b___
+ r___________
+ */
+ for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++)
+ {
+ imoffset = 1 << cbit;
+ if (this_image.image_num + imoffset < local->num_images)
+ {
+ GFC_REAL_8 * other_shared_ptr; /* Points to the shared memory
+ allocated to another image. */
+ GFC_REAL_8 *a;
+ GFC_REAL_8 *b;
+
+ other_shared_ptr = this_shared_ptr + num_elems * imoffset;
+ for (index_type i = 0; i < num_elems; i++)
+ {
+ a = this_shared_ptr + i;
+ b = other_shared_ptr + i;
+ *a += *b;
+ }
+ }
+ collsub_sync (ci);
+ }
+ for ( ; (local->num_images >> cbit) != 0; cbit++)
+ collsub_sync (ci);
+
+ if (!result_image || (*result_image - 1) == this_image.image_num)
+ {
+ if (packed)
+ memcpy (array->base_addr, buffer, ssize);
+ else
+ {
+ GFC_REAL_8 *src = buffer;
+ char * restrict dest = (char *) array->base_addr;
+ index_type stride0 = stride[0];
+
+ for (index_type n = 0; n < dim; n++)
+ count[n] = 0;
+
+ while (dest)
+ {
+ *((GFC_REAL_8 * ) dest) = *src++;
+ dest += stride0;
+ count[0] ++;
+ for (index_type n = 0; count[n] == extent[n] ;)
+ {
+ /* When we get to the end of a dimension, reset it and increment
+ the next dimension. */
+ count[n] = 0;
+ dest -= stride[n] * extent[n];
+ n++;
+ if (n == dim)
+ {
+ dest = NULL;
+ break;
+ }
+ else
+ {
+ count[n]++;
+ dest += stride[n];
+ }
+ }
+ }
+ }
+ }
+ finish_collective_subroutine (ci);
+}
+
+#endif
+
diff --git a/libgfortran/generated/nca_minmax_s1.c b/libgfortran/generated/nca_minmax_s1.c
new file mode 100644
index 0000000..b081452
--- /dev/null
+++ b/libgfortran/generated/nca_minmax_s1.c
@@ -0,0 +1,494 @@
+/* Implementation of collective subroutines minmax.
+ Copyright (C) 2020 Free Software Foundation, Inc.
+ Contributed by Thomas Koenig <tkoenig@gcc.gnu.org>.
+
+This file is part of the GNU Fortran runtime library (libgfortran).
+
+Libgfortran is free software; you can redistribute it and/or
+modify it under the terms of the GNU General Public
+License as published by the Free Software Foundation; either
+version 3 of the License, or (at your option) any later version.
+
+Libgfortran is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+<http://www.gnu.org/licenses/>. */
+
+
+
+#include "libgfortran.h"
+
+#if defined (HAVE_GFC_UINTEGER_1)
+#include <string.h>
+#include "../nca/libcoarraynative.h"
+#include "../nca/collective_subroutine.h"
+#include "../nca/collective_inline.h"
+
+#if 1 == 4
+
+/* Compare wide character types, which are handled internally as
+ unsigned 4-byte integers. */
+static inline int
+memcmp4 (const void *a, const void *b, size_t len)
+{
+ const GFC_UINTEGER_4 *pa = a;
+ const GFC_UINTEGER_4 *pb = b;
+ while (len-- > 0)
+ {
+ if (*pa != *pb)
+ return *pa < *pb ? -1 : 1;
+ pa ++;
+ pb ++;
+ }
+ return 0;
+}
+
+#endif
+void nca_collsub_max_scalar_s1 (GFC_UINTEGER_1 *obj, int *result_image,
+ int *stat, char *errmsg, index_type char_len, index_type errmsg_len);
+export_proto(nca_collsub_max_scalar_s1);
+
+void
+nca_collsub_max_scalar_s1 (GFC_UINTEGER_1 *obj, int *result_image,
+ int *stat __attribute__ ((unused)),
+ char *errmsg __attribute__ ((unused)),
+ index_type char_len,
+ index_type errmsg_len __attribute__ ((unused)))
+{
+ int cbit = 0;
+ int imoffset;
+ GFC_UINTEGER_1 *a, *b;
+ GFC_UINTEGER_1 *buffer, *this_image_buf;
+ collsub_iface *ci;
+ index_type type_size;
+
+ ci = &local->ci;
+
+ type_size = char_len * sizeof (GFC_UINTEGER_1);
+ buffer = get_collsub_buf (ci, type_size * local->num_images);
+ this_image_buf = buffer + this_image.image_num * char_len;
+ memcpy (this_image_buf, obj, type_size);
+
+ collsub_sync (ci);
+ for (; ((this_image.image_num >> cbit) & 1) == 0
+ && (local->num_images >> cbit) != 0; cbit++)
+ {
+ imoffset = 1 << cbit;
+ if (this_image.image_num + imoffset < local->num_images)
+ {
+ a = this_image_buf;
+ b = this_image_buf + imoffset * char_len;
+ if (memcmp (b, a, char_len) > 0)
+ memcpy (a, b, type_size);
+ }
+ collsub_sync (ci);
+ }
+ for ( ; (local->num_images >> cbit) != 0; cbit++)
+ collsub_sync (ci);
+
+ if (!result_image || (*result_image - 1) == this_image.image_num)
+ memcpy (obj, buffer, type_size);
+
+ finish_collective_subroutine (ci);
+
+}
+
+void nca_collsub_min_scalar_s1 (GFC_UINTEGER_1 *obj, int *result_image,
+ int *stat, char *errmsg, index_type char_len, index_type errmsg_len);
+export_proto(nca_collsub_min_scalar_s1);
+
+void
+nca_collsub_min_scalar_s1 (GFC_UINTEGER_1 *obj, int *result_image,
+ int *stat __attribute__ ((unused)),
+ char *errmsg __attribute__ ((unused)),
+ index_type char_len,
+ index_type errmsg_len __attribute__ ((unused)))
+{
+ int cbit = 0;
+ int imoffset;
+ GFC_UINTEGER_1 *a, *b;
+ GFC_UINTEGER_1 *buffer, *this_image_buf;
+ collsub_iface *ci;
+ index_type type_size;
+
+ ci = &local->ci;
+
+ type_size = char_len * sizeof (GFC_UINTEGER_1);
+ buffer = get_collsub_buf (ci, type_size * local->num_images);
+ this_image_buf = buffer + this_image.image_num * char_len;
+ memcpy (this_image_buf, obj, type_size);
+
+ collsub_sync (ci);
+ for (; ((this_image.image_num >> cbit) & 1) == 0
+ && (local->num_images >> cbit) != 0; cbit++)
+ {
+ imoffset = 1 << cbit;
+ if (this_image.image_num + imoffset < local->num_images)
+ {
+ a = this_image_buf;
+ b = this_image_buf + imoffset * char_len;
+ if (memcmp (b, a, char_len) < 0)
+ memcpy (a, b, type_size);
+ }
+ collsub_sync (ci);
+ }
+ for ( ; (local->num_images >> cbit) != 0; cbit++)
+ collsub_sync (ci);
+
+ if (!result_image || (*result_image - 1) == this_image.image_num)
+ memcpy (obj, buffer, type_size);
+
+ finish_collective_subroutine (ci);
+
+}
+
+void nca_collsub_max_array_s1 (gfc_array_s1 * restrict array, int *result_image,
+ int *stat, char *errmsg, index_type char_len,
+ index_type errmsg_len);
+export_proto (nca_collsub_max_array_s1);
+
+void
+nca_collsub_max_array_s1 (gfc_array_s1 * restrict array, int *result_image,
+ int *stat __attribute__ ((unused)),
+ char *errmsg __attribute__ ((unused)),
+ index_type char_len,
+ index_type errmsg_len __attribute__ ((unused)))
+{
+ index_type count[GFC_MAX_DIMENSIONS];
+ index_type stride[GFC_MAX_DIMENSIONS]; /* stride is byte-based here. */
+ index_type extent[GFC_MAX_DIMENSIONS];
+ char *this_shared_ptr; /* Points to the shared memory allocated to this image. */
+ char *buffer;
+ index_type dim;
+ bool packed;
+ index_type span;
+ index_type ssize, num_elems;
+ int cbit = 0;
+ int imoffset;
+ index_type type_size;
+ collsub_iface *ci;
+
+ ci = &local->ci;
+
+ type_size = char_len * sizeof (GFC_UINTEGER_1);
+ dim = GFC_DESCRIPTOR_RANK (array);
+ num_elems = 1;
+ ssize = type_size;
+ packed = true;
+ span = array->span != 0 ? array->span : type_size;
+ for (index_type n = 0; n < dim; n++)
+ {
+ count[n] = 0;
+ stride[n] = GFC_DESCRIPTOR_STRIDE (array, n) * span;
+ extent[n] = GFC_DESCRIPTOR_EXTENT (array, n);
+
+ /* No-op for an empty array. */
+ if (extent[n] <= 0)
+ return;
+
+ if (num_elems != GFC_DESCRIPTOR_STRIDE (array,n))
+ packed = false;
+
+ num_elems *= extent[n];
+ }
+
+ ssize = num_elems * type_size;
+ buffer = get_collsub_buf (ci, ssize * local->num_images);
+ this_shared_ptr = buffer + this_image.image_num * ssize;
+
+ if (packed)
+ memcpy (this_shared_ptr, array->base_addr, ssize);
+ else
+ {
+ char *src = (char *) array->base_addr;
+ char *restrict dest = this_shared_ptr;
+ index_type stride0 = stride[0];
+
+ while (src)
+ {
+ /* Copy the data. */
+
+ memcpy (dest, src, type_size);
+ dest += type_size;
+ src += stride0;
+ count[0] ++;
+ /* Advance to the next source element. */
+ for (index_type n = 0; count[n] == extent[n] ; )
+ {
+ /* When we get to the end of a dimension, reset it and increment
+ the next dimension. */
+ count[n] = 0;
+ src -= stride[n] * extent[n];
+ n++;
+ if (n == dim)
+ {
+ src = NULL;
+ break;
+ }
+ else
+ {
+ count[n]++;
+ src += stride[n];
+ }
+ }
+ }
+ }
+
+ collsub_sync (ci);
+
+ /* Reduce the array to image zero. Here the general scheme:
+
+ abababababab
+ a_b_a_b_a_b_
+ a___b___a___
+ a_______b___
+ r___________
+ */
+ for (; ((this_image.image_num >> cbit) & 1) == 0
+ && (local->num_images >> cbit) != 0; cbit++)
+ {
+ imoffset = 1 << cbit;
+ if (this_image.image_num + imoffset < local->num_images)
+ {
+ char *other_shared_ptr; /* Points to the shared memory
+ allocated to another image. */
+ GFC_UINTEGER_1 *a;
+ GFC_UINTEGER_1 *b;
+
+ other_shared_ptr = this_shared_ptr + imoffset * ssize;
+ for (index_type i = 0; i < num_elems; i++)
+ {
+ a = (GFC_UINTEGER_1 *) (this_shared_ptr + i * type_size);
+ b = (GFC_UINTEGER_1 *) (other_shared_ptr + i * type_size);
+ if (memcmp (b, a, char_len) > 0)
+ memcpy (a, b, type_size);
+ }
+ }
+ collsub_sync (ci);
+ }
+ for ( ; (local->num_images >> cbit) != 0; cbit++)
+ collsub_sync (ci);
+
+ if (!result_image || (*result_image - 1) == this_image.image_num)
+ {
+ if (packed)
+ memcpy (array->base_addr, buffer, ssize);
+ else
+ {
+ char *src = buffer;
+ char *restrict dest = (char *) array->base_addr;
+ index_type stride0 = stride[0];
+
+ for (index_type n = 0; n < dim; n++)
+ count[n] = 0;
+
+ while (dest)
+ {
+ memcpy (dest, src, type_size);
+ src += span;
+ dest += stride0;
+ count[0] ++;
+ for (index_type n = 0; count[n] == extent[n] ;)
+ {
+ /* When we get to the end of a dimension, reset it and increment
+ the next dimension. */
+ count[n] = 0;
+ dest -= stride[n] * extent[n];
+ n++;
+ if (n == dim)
+ {
+ dest = NULL;
+ break;
+ }
+ else
+ {
+ count[n]++;
+ dest += stride[n];
+ }
+ }
+ }
+ }
+ }
+ finish_collective_subroutine (ci);
+}
+void nca_collsub_min_array_s1 (gfc_array_s1 * restrict array, int *result_image,
+ int *stat, char *errmsg, index_type char_len,
+ index_type errmsg_len);
+export_proto (nca_collsub_min_array_s1);
+
+void
+nca_collsub_min_array_s1 (gfc_array_s1 * restrict array, int *result_image,
+ int *stat __attribute__ ((unused)),
+ char *errmsg __attribute__ ((unused)),
+ index_type char_len,
+ index_type errmsg_len __attribute__ ((unused)))
+{
+ index_type count[GFC_MAX_DIMENSIONS];
+ index_type stride[GFC_MAX_DIMENSIONS]; /* stride is byte-based here. */
+ index_type extent[GFC_MAX_DIMENSIONS];
+ char *this_shared_ptr; /* Points to the shared memory allocated to this image. */
+ char *buffer;
+ index_type dim;
+ bool packed;
+ index_type span;
+ index_type ssize, num_elems;
+ int cbit = 0;
+ int imoffset;
+ index_type type_size;
+ collsub_iface *ci;
+
+ ci = &local->ci;
+
+ type_size = char_len * sizeof (GFC_UINTEGER_1);
+ dim = GFC_DESCRIPTOR_RANK (array);
+ num_elems = 1;
+ ssize = type_size;
+ packed = true;
+ span = array->span != 0 ? array->span : type_size;
+ for (index_type n = 0; n < dim; n++)
+ {
+ count[n] = 0;
+ stride[n] = GFC_DESCRIPTOR_STRIDE (array, n) * span;
+ extent[n] = GFC_DESCRIPTOR_EXTENT (array, n);
+
+ /* No-op for an empty array. */
+ if (extent[n] <= 0)
+ return;
+
+ if (num_elems != GFC_DESCRIPTOR_STRIDE (array,n))
+ packed = false;
+
+ num_elems *= extent[n];
+ }
+
+ ssize = num_elems * type_size;
+ buffer = get_collsub_buf (ci, ssize * local->num_images);
+ this_shared_ptr = buffer + this_image.image_num * ssize;
+
+ if (packed)
+ memcpy (this_shared_ptr, array->base_addr, ssize);
+ else
+ {
+ char *src = (char *) array->base_addr;
+ char *restrict dest = this_shared_ptr;
+ index_type stride0 = stride[0];
+
+ while (src)
+ {
+ /* Copy the data. */
+
+ memcpy (dest, src, type_size);
+ dest += type_size;
+ src += stride0;
+ count[0] ++;
+ /* Advance to the next source element. */
+ for (index_type n = 0; count[n] == extent[n] ; )
+ {
+ /* When we get to the end of a dimension, reset it and increment
+ the next dimension. */
+ count[n] = 0;
+ src -= stride[n] * extent[n];
+ n++;
+ if (n == dim)
+ {
+ src = NULL;
+ break;
+ }
+ else
+ {
+ count[n]++;
+ src += stride[n];
+ }
+ }
+ }
+ }
+
+ collsub_sync (ci);
+
+ /* Reduce the array to image zero. Here the general scheme:
+
+ abababababab
+ a_b_a_b_a_b_
+ a___b___a___
+ a_______b___
+ r___________
+ */
+ for (; ((this_image.image_num >> cbit) & 1) == 0
+ && (local->num_images >> cbit) != 0; cbit++)
+ {
+ imoffset = 1 << cbit;
+ if (this_image.image_num + imoffset < local->num_images)
+ {
+ char *other_shared_ptr; /* Points to the shared memory
+ allocated to another image. */
+ GFC_UINTEGER_1 *a;
+ GFC_UINTEGER_1 *b;
+
+ other_shared_ptr = this_shared_ptr + imoffset * ssize;
+ for (index_type i = 0; i < num_elems; i++)
+ {
+ a = (GFC_UINTEGER_1 *) (this_shared_ptr + i * type_size);
+ b = (GFC_UINTEGER_1 *) (other_shared_ptr + i * type_size);
+ if (memcmp (b, a, char_len) < 0)
+ memcpy (a, b, type_size);
+ }
+ }
+ collsub_sync (ci);
+ }
+ for ( ; (local->num_images >> cbit) != 0; cbit++)
+ collsub_sync (ci);
+
+ if (!result_image || (*result_image - 1) == this_image.image_num)
+ {
+ if (packed)
+ memcpy (array->base_addr, buffer, ssize);
+ else
+ {
+ char *src = buffer;
+ char *restrict dest = (char *) array->base_addr;
+ index_type stride0 = stride[0];
+
+ for (index_type n = 0; n < dim; n++)
+ count[n] = 0;
+
+ while (dest)
+ {
+ memcpy (dest, src, type_size);
+ src += span;
+ dest += stride0;
+ count[0] ++;
+ for (index_type n = 0; count[n] == extent[n] ;)
+ {
+ /* When we get to the end of a dimension, reset it and increment
+ the next dimension. */
+ count[n] = 0;
+ dest -= stride[n] * extent[n];
+ n++;
+ if (n == dim)
+ {
+ dest = NULL;
+ break;
+ }
+ else
+ {
+ count[n]++;
+ dest += stride[n];
+ }
+ }
+ }
+ }
+ }
+ finish_collective_subroutine (ci);
+}
+
+#endif
+
diff --git a/libgfortran/generated/nca_minmax_s4.c b/libgfortran/generated/nca_minmax_s4.c
new file mode 100644
index 0000000..b202fda
--- /dev/null
+++ b/libgfortran/generated/nca_minmax_s4.c
@@ -0,0 +1,494 @@
+/* Implementation of collective subroutines minmax.
+ Copyright (C) 2020 Free Software Foundation, Inc.
+ Contributed by Thomas Koenig <tkoenig@gcc.gnu.org>.
+
+This file is part of the GNU Fortran runtime library (libgfortran).
+
+Libgfortran is free software; you can redistribute it and/or
+modify it under the terms of the GNU General Public
+License as published by the Free Software Foundation; either
+version 3 of the License, or (at your option) any later version.
+
+Libgfortran is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+<http://www.gnu.org/licenses/>. */
+
+
+
+#include "libgfortran.h"
+
+#if defined (HAVE_GFC_UINTEGER_4)
+#include <string.h>
+#include "../nca/libcoarraynative.h"
+#include "../nca/collective_subroutine.h"
+#include "../nca/collective_inline.h"
+
+#if 4 == 4
+
+/* Compare wide character types, which are handled internally as
+ unsigned 4-byte integers. */
+static inline int
+memcmp4 (const void *a, const void *b, size_t len)
+{
+ const GFC_UINTEGER_4 *pa = a;
+ const GFC_UINTEGER_4 *pb = b;
+ while (len-- > 0)
+ {
+ if (*pa != *pb)
+ return *pa < *pb ? -1 : 1;
+ pa ++;
+ pb ++;
+ }
+ return 0;
+}
+
+#endif
+void nca_collsub_max_scalar_s4 (GFC_UINTEGER_4 *obj, int *result_image,
+ int *stat, char *errmsg, index_type char_len, index_type errmsg_len);
+export_proto(nca_collsub_max_scalar_s4);
+
+void
+nca_collsub_max_scalar_s4 (GFC_UINTEGER_4 *obj, int *result_image,
+ int *stat __attribute__ ((unused)),
+ char *errmsg __attribute__ ((unused)),
+ index_type char_len,
+ index_type errmsg_len __attribute__ ((unused)))
+{
+ int cbit = 0;
+ int imoffset;
+ GFC_UINTEGER_4 *a, *b;
+ GFC_UINTEGER_4 *buffer, *this_image_buf;
+ collsub_iface *ci;
+ index_type type_size;
+
+ ci = &local->ci;
+
+ type_size = char_len * sizeof (GFC_UINTEGER_4);
+ buffer = get_collsub_buf (ci, type_size * local->num_images);
+ this_image_buf = buffer + this_image.image_num * char_len;
+ memcpy (this_image_buf, obj, type_size);
+
+ collsub_sync (ci);
+ for (; ((this_image.image_num >> cbit) & 1) == 0
+ && (local->num_images >> cbit) != 0; cbit++)
+ {
+ imoffset = 1 << cbit;
+ if (this_image.image_num + imoffset < local->num_images)
+ {
+ a = this_image_buf;
+ b = this_image_buf + imoffset * char_len;
+ if (memcmp4 (b, a, char_len) > 0)
+ memcpy (a, b, type_size);
+ }
+ collsub_sync (ci);
+ }
+ for ( ; (local->num_images >> cbit) != 0; cbit++)
+ collsub_sync (ci);
+
+ if (!result_image || (*result_image - 1) == this_image.image_num)
+ memcpy (obj, buffer, type_size);
+
+ finish_collective_subroutine (ci);
+
+}
+
+void nca_collsub_min_scalar_s4 (GFC_UINTEGER_4 *obj, int *result_image,
+ int *stat, char *errmsg, index_type char_len, index_type errmsg_len);
+export_proto(nca_collsub_min_scalar_s4);
+
+void
+nca_collsub_min_scalar_s4 (GFC_UINTEGER_4 *obj, int *result_image,
+ int *stat __attribute__ ((unused)),
+ char *errmsg __attribute__ ((unused)),
+ index_type char_len,
+ index_type errmsg_len __attribute__ ((unused)))
+{
+ int cbit = 0;
+ int imoffset;
+ GFC_UINTEGER_4 *a, *b;
+ GFC_UINTEGER_4 *buffer, *this_image_buf;
+ collsub_iface *ci;
+ index_type type_size;
+
+ ci = &local->ci;
+
+ type_size = char_len * sizeof (GFC_UINTEGER_4);
+ buffer = get_collsub_buf (ci, type_size * local->num_images);
+ this_image_buf = buffer + this_image.image_num * char_len;
+ memcpy (this_image_buf, obj, type_size);
+
+ collsub_sync (ci);
+ for (; ((this_image.image_num >> cbit) & 1) == 0
+ && (local->num_images >> cbit) != 0; cbit++)
+ {
+ imoffset = 1 << cbit;
+ if (this_image.image_num + imoffset < local->num_images)
+ {
+ a = this_image_buf;
+ b = this_image_buf + imoffset * char_len;
+ if (memcmp4 (b, a, char_len) < 0)
+ memcpy (a, b, type_size);
+ }
+ collsub_sync (ci);
+ }
+ for ( ; (local->num_images >> cbit) != 0; cbit++)
+ collsub_sync (ci);
+
+ if (!result_image || (*result_image - 1) == this_image.image_num)
+ memcpy (obj, buffer, type_size);
+
+ finish_collective_subroutine (ci);
+
+}
+
+void nca_collsub_max_array_s4 (gfc_array_s4 * restrict array, int *result_image,
+ int *stat, char *errmsg, index_type char_len,
+ index_type errmsg_len);
+export_proto (nca_collsub_max_array_s4);
+
+void
+nca_collsub_max_array_s4 (gfc_array_s4 * restrict array, int *result_image,
+ int *stat __attribute__ ((unused)),
+ char *errmsg __attribute__ ((unused)),
+ index_type char_len,
+ index_type errmsg_len __attribute__ ((unused)))
+{
+ index_type count[GFC_MAX_DIMENSIONS];
+ index_type stride[GFC_MAX_DIMENSIONS]; /* stride is byte-based here. */
+ index_type extent[GFC_MAX_DIMENSIONS];
+ char *this_shared_ptr; /* Points to the shared memory allocated to this image. */
+ char *buffer;
+ index_type dim;
+ bool packed;
+ index_type span;
+ index_type ssize, num_elems;
+ int cbit = 0;
+ int imoffset;
+ index_type type_size;
+ collsub_iface *ci;
+
+ ci = &local->ci;
+
+ type_size = char_len * sizeof (GFC_UINTEGER_4);
+ dim = GFC_DESCRIPTOR_RANK (array);
+ num_elems = 1;
+ ssize = type_size;
+ packed = true;
+ span = array->span != 0 ? array->span : type_size;
+ for (index_type n = 0; n < dim; n++)
+ {
+ count[n] = 0;
+ stride[n] = GFC_DESCRIPTOR_STRIDE (array, n) * span;
+ extent[n] = GFC_DESCRIPTOR_EXTENT (array, n);
+
+ /* No-op for an empty array. */
+ if (extent[n] <= 0)
+ return;
+
+ if (num_elems != GFC_DESCRIPTOR_STRIDE (array,n))
+ packed = false;
+
+ num_elems *= extent[n];
+ }
+
+ ssize = num_elems * type_size;
+ buffer = get_collsub_buf (ci, ssize * local->num_images);
+ this_shared_ptr = buffer + this_image.image_num * ssize;
+
+ if (packed)
+ memcpy (this_shared_ptr, array->base_addr, ssize);
+ else
+ {
+ char *src = (char *) array->base_addr;
+ char *restrict dest = this_shared_ptr;
+ index_type stride0 = stride[0];
+
+ while (src)
+ {
+ /* Copy the data. */
+
+ memcpy (dest, src, type_size);
+ dest += type_size;
+ src += stride0;
+ count[0] ++;
+ /* Advance to the next source element. */
+ for (index_type n = 0; count[n] == extent[n] ; )
+ {
+ /* When we get to the end of a dimension, reset it and increment
+ the next dimension. */
+ count[n] = 0;
+ src -= stride[n] * extent[n];
+ n++;
+ if (n == dim)
+ {
+ src = NULL;
+ break;
+ }
+ else
+ {
+ count[n]++;
+ src += stride[n];
+ }
+ }
+ }
+ }
+
+ collsub_sync (ci);
+
+ /* Reduce the array to image zero. Here the general scheme:
+
+ abababababab
+ a_b_a_b_a_b_
+ a___b___a___
+ a_______b___
+ r___________
+ */
+ for (; ((this_image.image_num >> cbit) & 1) == 0
+ && (local->num_images >> cbit) != 0; cbit++)
+ {
+ imoffset = 1 << cbit;
+ if (this_image.image_num + imoffset < local->num_images)
+ {
+ char *other_shared_ptr; /* Points to the shared memory
+ allocated to another image. */
+ GFC_UINTEGER_4 *a;
+ GFC_UINTEGER_4 *b;
+
+ other_shared_ptr = this_shared_ptr + imoffset * ssize;
+ for (index_type i = 0; i < num_elems; i++)
+ {
+ a = (GFC_UINTEGER_4 *) (this_shared_ptr + i * type_size);
+ b = (GFC_UINTEGER_4 *) (other_shared_ptr + i * type_size);
+ if (memcmp4 (b, a, char_len) > 0)
+ memcpy (a, b, type_size);
+ }
+ }
+ collsub_sync (ci);
+ }
+ for ( ; (local->num_images >> cbit) != 0; cbit++)
+ collsub_sync (ci);
+
+ if (!result_image || (*result_image - 1) == this_image.image_num)
+ {
+ if (packed)
+ memcpy (array->base_addr, buffer, ssize);
+ else
+ {
+ char *src = buffer;
+ char *restrict dest = (char *) array->base_addr;
+ index_type stride0 = stride[0];
+
+ for (index_type n = 0; n < dim; n++)
+ count[n] = 0;
+
+ while (dest)
+ {
+ memcpy (dest, src, type_size);
+ src += span;
+ dest += stride0;
+ count[0] ++;
+ for (index_type n = 0; count[n] == extent[n] ;)
+ {
+ /* When we get to the end of a dimension, reset it and increment
+ the next dimension. */
+ count[n] = 0;
+ dest -= stride[n] * extent[n];
+ n++;
+ if (n == dim)
+ {
+ dest = NULL;
+ break;
+ }
+ else
+ {
+ count[n]++;
+ dest += stride[n];
+ }
+ }
+ }
+ }
+ }
+ finish_collective_subroutine (ci);
+}
+void nca_collsub_min_array_s4 (gfc_array_s4 * restrict array, int *result_image,
+ int *stat, char *errmsg, index_type char_len,
+ index_type errmsg_len);
+export_proto (nca_collsub_min_array_s4);
+
+void
+nca_collsub_min_array_s4 (gfc_array_s4 * restrict array, int *result_image,
+ int *stat __attribute__ ((unused)),
+ char *errmsg __attribute__ ((unused)),
+ index_type char_len,
+ index_type errmsg_len __attribute__ ((unused)))
+{
+ index_type count[GFC_MAX_DIMENSIONS];
+ index_type stride[GFC_MAX_DIMENSIONS]; /* stride is byte-based here. */
+ index_type extent[GFC_MAX_DIMENSIONS];
+ char *this_shared_ptr; /* Points to the shared memory allocated to this image. */
+ char *buffer;
+ index_type dim;
+ bool packed;
+ index_type span;
+ index_type ssize, num_elems;
+ int cbit = 0;
+ int imoffset;
+ index_type type_size;
+ collsub_iface *ci;
+
+ ci = &local->ci;
+
+ type_size = char_len * sizeof (GFC_UINTEGER_4);
+ dim = GFC_DESCRIPTOR_RANK (array);
+ num_elems = 1;
+ ssize = type_size;
+ packed = true;
+ span = array->span != 0 ? array->span : type_size;
+ for (index_type n = 0; n < dim; n++)
+ {
+ count[n] = 0;
+ stride[n] = GFC_DESCRIPTOR_STRIDE (array, n) * span;
+ extent[n] = GFC_DESCRIPTOR_EXTENT (array, n);
+
+ /* No-op for an empty array. */
+ if (extent[n] <= 0)
+ return;
+
+ if (num_elems != GFC_DESCRIPTOR_STRIDE (array,n))
+ packed = false;
+
+ num_elems *= extent[n];
+ }
+
+ ssize = num_elems * type_size;
+ buffer = get_collsub_buf (ci, ssize * local->num_images);
+ this_shared_ptr = buffer + this_image.image_num * ssize;
+
+ if (packed)
+ memcpy (this_shared_ptr, array->base_addr, ssize);
+ else
+ {
+ char *src = (char *) array->base_addr;
+ char *restrict dest = this_shared_ptr;
+ index_type stride0 = stride[0];
+
+ while (src)
+ {
+ /* Copy the data. */
+
+ memcpy (dest, src, type_size);
+ dest += type_size;
+ src += stride0;
+ count[0] ++;
+ /* Advance to the next source element. */
+ for (index_type n = 0; count[n] == extent[n] ; )
+ {
+ /* When we get to the end of a dimension, reset it and increment
+ the next dimension. */
+ count[n] = 0;
+ src -= stride[n] * extent[n];
+ n++;
+ if (n == dim)
+ {
+ src = NULL;
+ break;
+ }
+ else
+ {
+ count[n]++;
+ src += stride[n];
+ }
+ }
+ }
+ }
+
+ collsub_sync (ci);
+
+ /* Reduce the array to image zero. Here the general scheme:
+
+ abababababab
+ a_b_a_b_a_b_
+ a___b___a___
+ a_______b___
+ r___________
+ */
+ for (; ((this_image.image_num >> cbit) & 1) == 0
+ && (local->num_images >> cbit) != 0; cbit++)
+ {
+ imoffset = 1 << cbit;
+ if (this_image.image_num + imoffset < local->num_images)
+ {
+ char *other_shared_ptr; /* Points to the shared memory
+ allocated to another image. */
+ GFC_UINTEGER_4 *a;
+ GFC_UINTEGER_4 *b;
+
+ other_shared_ptr = this_shared_ptr + imoffset * ssize;
+ for (index_type i = 0; i < num_elems; i++)
+ {
+ a = (GFC_UINTEGER_4 *) (this_shared_ptr + i * type_size);
+ b = (GFC_UINTEGER_4 *) (other_shared_ptr + i * type_size);
+ if (memcmp4 (b, a, char_len) < 0)
+ memcpy (a, b, type_size);
+ }
+ }
+ collsub_sync (ci);
+ }
+ for ( ; (local->num_images >> cbit) != 0; cbit++)
+ collsub_sync (ci);
+
+ if (!result_image || (*result_image - 1) == this_image.image_num)
+ {
+ if (packed)
+ memcpy (array->base_addr, buffer, ssize);
+ else
+ {
+ char *src = buffer;
+ char *restrict dest = (char *) array->base_addr;
+ index_type stride0 = stride[0];
+
+ for (index_type n = 0; n < dim; n++)
+ count[n] = 0;
+
+ while (dest)
+ {
+ memcpy (dest, src, type_size);
+ src += span;
+ dest += stride0;
+ count[0] ++;
+ for (index_type n = 0; count[n] == extent[n] ;)
+ {
+ /* When we get to the end of a dimension, reset it and increment
+ the next dimension. */
+ count[n] = 0;
+ dest -= stride[n] * extent[n];
+ n++;
+ if (n == dim)
+ {
+ dest = NULL;
+ break;
+ }
+ else
+ {
+ count[n]++;
+ dest += stride[n];
+ }
+ }
+ }
+ }
+ }
+ finish_collective_subroutine (ci);
+}
+
+#endif
+
diff --git a/libgfortran/libgfortran.h b/libgfortran/libgfortran.h
index 8c539e0..a6b0d5a 100644
--- a/libgfortran/libgfortran.h
+++ b/libgfortran/libgfortran.h
@@ -403,6 +403,7 @@ struct {\
}
typedef GFC_FULL_ARRAY_DESCRIPTOR (GFC_MAX_DIMENSIONS, GFC_INTEGER_4) gfc_full_array_i4;
+typedef GFC_FULL_ARRAY_DESCRIPTOR (GFC_MAX_DIMENSIONS, char) gfc_full_array_char;
#define GFC_DESCRIPTOR_RANK(desc) ((desc)->dtype.rank)
#define GFC_DESCRIPTOR_TYPE(desc) ((desc)->dtype.type)
diff --git a/libgfortran/m4/nca-minmax-s.m4 b/libgfortran/m4/nca-minmax-s.m4
new file mode 100644
index 0000000..2d8891f
--- /dev/null
+++ b/libgfortran/m4/nca-minmax-s.m4
@@ -0,0 +1,289 @@
+dnl Support macro file for intrinsic functions.
+dnl Contains the generic sections of gfortran functions.
+dnl This file is part of the GNU Fortran Runtime Library (libgfortran)
+dnl Distributed under the GNU GPL with exception. See COPYING for details.
+dnl
+`/* Implementation of collective subroutines minmax.
+ Copyright (C) 2020 Free Software Foundation, Inc.
+ Contributed by Thomas Koenig <tkoenig@gcc.gnu.org>.
+
+This file is part of the GNU Fortran runtime library (libgfortran).
+
+Libgfortran is free software; you can redistribute it and/or
+modify it under the terms of the GNU General Public
+License as published by the Free Software Foundation; either
+version 3 of the License, or (at your option) any later version.
+
+Libgfortran is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+<http://www.gnu.org/licenses/>. */'
+
+include(iparm.m4)dnl
+define(`compare_fcn',`ifelse(rtype_kind,1,memcmp,memcmp4)')dnl
+define(SCALAR_FUNCTION,`void nca_collsub_'$1`_scalar_'rtype_code` ('rtype_name` *obj, int *result_image,
+ int *stat, char *errmsg, index_type char_len, index_type errmsg_len);
+export_proto(nca_collsub_'$1`_scalar_'rtype_code`);
+
+void
+nca_collsub_'$1`_scalar_'rtype_code` ('rtype_name` *obj, int *result_image,
+ int *stat __attribute__ ((unused)),
+ char *errmsg __attribute__ ((unused)),
+ index_type char_len,
+ index_type errmsg_len __attribute__ ((unused)))
+{
+ int cbit = 0;
+ int imoffset;
+ 'rtype_name` *a, *b;
+ 'rtype_name` *buffer, *this_image_buf;
+ collsub_iface *ci;
+ index_type type_size;
+
+ ci = &local->ci;
+
+ type_size = char_len * sizeof ('rtype_name`);
+ buffer = get_collsub_buf (ci, type_size * local->num_images);
+ this_image_buf = buffer + this_image.image_num * char_len;
+ memcpy (this_image_buf, obj, type_size);
+
+ collsub_sync (ci);
+ for (; ((this_image.image_num >> cbit) & 1) == 0
+ && (local->num_images >> cbit) != 0; cbit++)
+ {
+ imoffset = 1 << cbit;
+ if (this_image.image_num + imoffset < local->num_images)
+ {
+ a = this_image_buf;
+ b = this_image_buf + imoffset * char_len;
+ if ('compare_fcn` (b, a, char_len) '$2` 0)
+ memcpy (a, b, type_size);
+ }
+ collsub_sync (ci);
+ }
+ for ( ; (local->num_images >> cbit) != 0; cbit++)
+ collsub_sync (ci);
+
+ if (!result_image || (*result_image - 1) == this_image.image_num)
+ memcpy (obj, buffer, type_size);
+
+ finish_collective_subroutine (ci);
+
+}
+
+')dnl
+define(ARRAY_FUNCTION,dnl
+`void nca_collsub_'$1`_array_'rtype_code` ('rtype` * restrict array, int *result_image,
+ int *stat, char *errmsg, index_type char_len,
+ index_type errmsg_len);
+export_proto (nca_collsub_'$1`_array_'rtype_code`);
+
+void
+nca_collsub_'$1`_array_'rtype_code` ('rtype` * restrict array, int *result_image,
+ int *stat __attribute__ ((unused)),
+ char *errmsg __attribute__ ((unused)),
+ index_type char_len,
+ index_type errmsg_len __attribute__ ((unused)))
+{
+ index_type count[GFC_MAX_DIMENSIONS];
+ index_type stride[GFC_MAX_DIMENSIONS]; /* stride is byte-based here. */
+ index_type extent[GFC_MAX_DIMENSIONS];
+ char *this_shared_ptr; /* Points to the shared memory allocated to this image. */
+ char *buffer;
+ index_type dim;
+ bool packed;
+ index_type span;
+ index_type ssize, num_elems;
+ int cbit = 0;
+ int imoffset;
+ index_type type_size;
+ collsub_iface *ci;
+
+ ci = &local->ci;
+
+ type_size = char_len * sizeof ('rtype_name`);
+ dim = GFC_DESCRIPTOR_RANK (array);
+ num_elems = 1;
+ ssize = type_size;
+ packed = true;
+ span = array->span != 0 ? array->span : type_size;
+ for (index_type n = 0; n < dim; n++)
+ {
+ count[n] = 0;
+ stride[n] = GFC_DESCRIPTOR_STRIDE (array, n) * span;
+ extent[n] = GFC_DESCRIPTOR_EXTENT (array, n);
+
+ /* No-op for an empty array. */
+ if (extent[n] <= 0)
+ return;
+
+ if (num_elems != GFC_DESCRIPTOR_STRIDE (array,n))
+ packed = false;
+
+ num_elems *= extent[n];
+ }
+
+ ssize = num_elems * type_size;
+ buffer = get_collsub_buf (ci, ssize * local->num_images);
+ this_shared_ptr = buffer + this_image.image_num * ssize;
+
+ if (packed)
+ memcpy (this_shared_ptr, array->base_addr, ssize);
+ else
+ {
+ char *src = (char *) array->base_addr;
+ char *restrict dest = this_shared_ptr;
+ index_type stride0 = stride[0];
+
+ while (src)
+ {
+ /* Copy the data. */
+
+ memcpy (dest, src, type_size);
+ dest += type_size;
+ src += stride0;
+ count[0] ++;
+ /* Advance to the next source element. */
+ for (index_type n = 0; count[n] == extent[n] ; )
+ {
+ /* When we get to the end of a dimension, reset it and increment
+ the next dimension. */
+ count[n] = 0;
+ src -= stride[n] * extent[n];
+ n++;
+ if (n == dim)
+ {
+ src = NULL;
+ break;
+ }
+ else
+ {
+ count[n]++;
+ src += stride[n];
+ }
+ }
+ }
+ }
+
+ collsub_sync (ci);
+
+ /* Reduce the array to image zero. Here the general scheme:
+
+ abababababab
+ a_b_a_b_a_b_
+ a___b___a___
+ a_______b___
+ r___________
+ */
+ for (; ((this_image.image_num >> cbit) & 1) == 0
+ && (local->num_images >> cbit) != 0; cbit++)
+ {
+ imoffset = 1 << cbit;
+ if (this_image.image_num + imoffset < local->num_images)
+ {
+ char *other_shared_ptr; /* Points to the shared memory
+ allocated to another image. */
+ 'rtype_name` *a;
+ 'rtype_name` *b;
+
+ other_shared_ptr = this_shared_ptr + imoffset * ssize;
+ for (index_type i = 0; i < num_elems; i++)
+ {
+ a = ('rtype_name` *) (this_shared_ptr + i * type_size);
+ b = ('rtype_name` *) (other_shared_ptr + i * type_size);
+ if ('compare_fcn` (b, a, char_len) '$2` 0)
+ memcpy (a, b, type_size);
+ }
+ }
+ collsub_sync (ci);
+ }
+ for ( ; (local->num_images >> cbit) != 0; cbit++)
+ collsub_sync (ci);
+
+ if (!result_image || (*result_image - 1) == this_image.image_num)
+ {
+ if (packed)
+ memcpy (array->base_addr, buffer, ssize);
+ else
+ {
+ char *src = buffer;
+ char *restrict dest = (char *) array->base_addr;
+ index_type stride0 = stride[0];
+
+ for (index_type n = 0; n < dim; n++)
+ count[n] = 0;
+
+ while (dest)
+ {
+ memcpy (dest, src, type_size);
+ src += span;
+ dest += stride0;
+ count[0] ++;
+ for (index_type n = 0; count[n] == extent[n] ;)
+ {
+ /* When we get to the end of a dimension, reset it and increment
+ the next dimension. */
+ count[n] = 0;
+ dest -= stride[n] * extent[n];
+ n++;
+ if (n == dim)
+ {
+ dest = NULL;
+ break;
+ }
+ else
+ {
+ count[n]++;
+ dest += stride[n];
+ }
+ }
+ }
+ }
+ }
+ finish_collective_subroutine (ci);
+}
+')
+`
+#include "libgfortran.h"
+
+#if defined (HAVE_'rtype_name`)
+#include <string.h>
+#include "../nca/libcoarraynative.h"
+#include "../nca/collective_subroutine.h"
+#include "../nca/collective_inline.h"
+
+#if 'rtype_kind` == 4
+
+/* Compare wide character types, which are handled internally as
+ unsigned 4-byte integers. */
+static inline int
+memcmp4 (const void *a, const void *b, size_t len)
+{
+ const GFC_UINTEGER_4 *pa = a;
+ const GFC_UINTEGER_4 *pb = b;
+ while (len-- > 0)
+ {
+ if (*pa != *pb)
+ return *pa < *pb ? -1 : 1;
+ pa ++;
+ pb ++;
+ }
+ return 0;
+}
+
+#endif
+'SCALAR_FUNCTION(`max',`>')dnl
+SCALAR_FUNCTION(`min',`<')dnl
+ARRAY_FUNCTION(`max',`>')dnl
+ARRAY_FUNCTION(`min',`<')dnl
+`
+#endif
+'
diff --git a/libgfortran/m4/nca_minmax.m4 b/libgfortran/m4/nca_minmax.m4
new file mode 100644
index 0000000..76070c1
--- /dev/null
+++ b/libgfortran/m4/nca_minmax.m4
@@ -0,0 +1,259 @@
+dnl Support macro file for intrinsic functions.
+dnl Contains the generic sections of gfortran functions.
+dnl This file is part of the GNU Fortran Runtime Library (libgfortran)
+dnl Distributed under the GNU GPL with exception. See COPYING for details.
+dnl
+`/* Implementation of collective subroutines minmax.
+ Copyright (C) 2020 Free Software Foundation, Inc.
+ Contributed by Thomas Koenig <tkoenig@gcc.gnu.org>.
+
+This file is part of the GNU Fortran runtime library (libgfortran).
+
+Libgfortran is free software; you can redistribute it and/or
+modify it under the terms of the GNU General Public
+License as published by the Free Software Foundation; either
+version 3 of the License, or (at your option) any later version.
+
+Libgfortran is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+<http://www.gnu.org/licenses/>. */'
+
+include(iparm.m4)dnl
+define(SCALAR_FUNCTION,`void nca_collsub_'$1`_scalar_'rtype_code` ('rtype_name` *obj, int *result_image,
+ int *stat, char *errmsg, index_type errmsg_len);
+export_proto(nca_collsub_'$1`_scalar_'rtype_code`);
+
+void
+nca_collsub_'$1`_scalar_'rtype_code` ('rtype_name` *obj, int *result_image,
+ int *stat __attribute__ ((unused)),
+ char *errmsg __attribute__ ((unused)),
+ index_type errmsg_len __attribute__ ((unused)))
+{
+ int cbit = 0;
+ int imoffset;
+ 'rtype_name` *a, *b;
+ 'rtype_name` *buffer, *this_image_buf;
+ collsub_iface *ci;
+
+ ci = &local->ci;
+
+ buffer = get_collsub_buf (ci, sizeof('rtype_name`) * local->num_images);
+ this_image_buf = buffer + this_image.image_num;
+ *this_image_buf = *obj;
+
+ collsub_sync (ci);
+ for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++)
+ {
+ imoffset = 1 << cbit;
+ if (this_image.image_num + imoffset < local->num_images)
+ {
+ a = this_image_buf;
+ b = this_image_buf + imoffset;
+ '$2`
+ }
+ collsub_sync (ci);
+ }
+ for ( ; (local->num_images >> cbit) != 0; cbit++)
+ collsub_sync (ci);
+
+ if (!result_image || (*result_image - 1) == this_image.image_num)
+ *obj = *buffer;
+
+ finish_collective_subroutine (ci);
+
+}
+
+')dnl
+define(ARRAY_FUNCTION,dnl
+`void nca_collsub_'$1`_array_'rtype_code` ('rtype` * restrict array, int *result_image,
+ int *stat, char *errmsg, index_type errmsg_len);
+export_proto (nca_collsub_'$1`_array_'rtype_code`);
+
+void
+nca_collsub_'$1`_array_'rtype_code` ('rtype` * restrict array, int *result_image,
+ int *stat __attribute__ ((unused)),
+ char *errmsg __attribute__ ((unused)),
+ index_type errmsg_len __attribute__ ((unused)))
+{
+ index_type count[GFC_MAX_DIMENSIONS];
+ index_type stride[GFC_MAX_DIMENSIONS];
+ index_type extent[GFC_MAX_DIMENSIONS];
+ 'rtype_name` *this_shared_ptr; /* Points to the shared memory allocated to this image. */
+ 'rtype_name` *buffer;
+ index_type dim;
+ bool packed;
+ index_type span;
+ index_type ssize, num_elems;
+ int cbit = 0;
+ int imoffset;
+ collsub_iface *ci;
+
+ ci = &local->ci;
+
+ dim = GFC_DESCRIPTOR_RANK (array);
+ ssize = sizeof ('rtype_name`);
+ packed = true;
+ span = array->span != 0 ? array->span : (index_type) sizeof ('rtype_name`);
+ for (index_type n = 0; n < dim; n++)
+ {
+ count[n] = 0;
+ stride[n] = GFC_DESCRIPTOR_STRIDE (array, n) * span;
+ extent[n] = GFC_DESCRIPTOR_EXTENT (array, n);
+
+ /* No-op for an empty array. */
+ if (extent[n] <= 0)
+ return;
+
+ if (ssize != stride[n])
+ packed = false;
+
+ ssize *= extent[n];
+ }
+
+ num_elems = ssize / sizeof ('rtype_name`);
+
+ buffer = get_collsub_buf (ci, ssize * local->num_images);
+ this_shared_ptr = buffer + this_image.image_num * num_elems;
+
+ if (packed)
+ memcpy (this_shared_ptr, array->base_addr, ssize);
+ else
+ {
+ char *src = (char *) array->base_addr;
+ 'rtype_name` *restrict dest = this_shared_ptr;
+ index_type stride0 = stride[0];
+
+ while (src)
+ {
+ /* Copy the data. */
+ *(dest++) = *(('rtype_name` *) src);
+ src += stride0;
+ count[0] ++;
+ /* Advance to the next source element. */
+ for (index_type n = 0; count[n] == extent[n] ; )
+ {
+ /* When we get to the end of a dimension, reset it and increment
+ the next dimension. */
+ count[n] = 0;
+ src -= stride[n] * extent[n];
+ n++;
+ if (n == dim)
+ {
+ src = NULL;
+ break;
+ }
+ else
+ {
+ count[n]++;
+ src += stride[n];
+ }
+ }
+ }
+ }
+
+ collsub_sync (ci);
+
+ /* Reduce the array to image zero. Here the general scheme:
+
+ abababababab
+ a_b_a_b_a_b_
+ a___b___a___
+ a_______b___
+ r___________
+ */
+ for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++)
+ {
+ imoffset = 1 << cbit;
+ if (this_image.image_num + imoffset < local->num_images)
+ {
+ 'rtype_name` * other_shared_ptr; /* Points to the shared memory
+ allocated to another image. */
+ 'rtype_name` *a;
+ 'rtype_name` *b;
+
+ other_shared_ptr = this_shared_ptr + num_elems * imoffset;
+ for (index_type i = 0; i < num_elems; i++)
+ {
+ a = this_shared_ptr + i;
+ b = other_shared_ptr + i;
+ '$2`
+ }
+ }
+ collsub_sync (ci);
+ }
+ for ( ; (local->num_images >> cbit) != 0; cbit++)
+ collsub_sync (ci);
+
+ if (!result_image || (*result_image - 1) == this_image.image_num)
+ {
+ if (packed)
+ memcpy (array->base_addr, buffer, ssize);
+ else
+ {
+ 'rtype_name` *src = buffer;
+ char * restrict dest = (char *) array->base_addr;
+ index_type stride0 = stride[0];
+
+ for (index_type n = 0; n < dim; n++)
+ count[n] = 0;
+
+ while (dest)
+ {
+ *(('rtype_name` * ) dest) = *src++;
+ dest += stride0;
+ count[0] ++;
+ for (index_type n = 0; count[n] == extent[n] ;)
+ {
+ /* When we get to the end of a dimension, reset it and increment
+ the next dimension. */
+ count[n] = 0;
+ dest -= stride[n] * extent[n];
+ n++;
+ if (n == dim)
+ {
+ dest = NULL;
+ break;
+ }
+ else
+ {
+ count[n]++;
+ dest += stride[n];
+ }
+ }
+ }
+ }
+ }
+ finish_collective_subroutine (ci);
+}
+')
+`#include "libgfortran.h"
+
+#if defined (HAVE_'rtype_name`)'
+#include <string.h>
+#include "../nca/libcoarraynative.h"
+#include "../nca/collective_subroutine.h"
+#include "../nca/collective_inline.h"
+
+SCALAR_FUNCTION(`max',`if (*b > *a)
+ *a = *b;')dnl
+SCALAR_FUNCTION(`min',`if (*b < *a)
+ *a = *b;')dnl
+SCALAR_FUNCTION(`sum',`*a += *b;')dnl
+ARRAY_FUNCTION(`max',`if (*b > *a)
+ *a = *b;')dnl
+ARRAY_FUNCTION(`min',`if (*b < *a)
+ *a = *b;')dnl
+ARRAY_FUNCTION(`sum',`*a += *b;')dnl
+`
+#endif
+'
diff --git a/libgfortran/nca/.tags b/libgfortran/nca/.tags
new file mode 100644
index 0000000..07d260d
--- /dev/null
+++ b/libgfortran/nca/.tags
@@ -0,0 +1,275 @@
+!_TAG_FILE_FORMAT 2 /extended format; --format=1 will not append ;" to lines/
+!_TAG_FILE_SORTED 1 /0=unsorted, 1=sorted, 2=foldcase/
+!_TAG_PROGRAM_AUTHOR Darren Hiebert /dhiebert@users.sourceforge.net/
+!_TAG_PROGRAM_NAME Exuberant Ctags //
+!_TAG_PROGRAM_URL http://ctags.sourceforge.net /official site/
+!_TAG_PROGRAM_VERSION 5.9~svn20110310 //
+ALIGN_TO serialize.c 47;" d file:
+ALLOC_H alloc.h 26;" d
+COARRAY_LOCK_HDR lock.h 27;" d
+COARRAY_NATIVE_HDR libcoarraynative.h 30;" d
+COLLECTIVE_SUBROUTINE_HDR collective_subroutine.h 3;" d
+CRITICAL_LOOKAHEAD hashmap.c 30;" d file:
+DEBUG_PRINTF libcoarraynative.h 45;" d
+DEBUG_PRINTF libcoarraynative.h 47;" d
+FILL_VALUE serialize.c 57;" d file:
+GFC_NCA_EVENT_COARRAY wrapper.c /^ GFC_NCA_EVENT_COARRAY,$/;" e enum:gfc_coarray_allocation_type file:
+GFC_NCA_LOCK_COARRAY wrapper.c /^ GFC_NCA_LOCK_COARRAY,$/;" e enum:gfc_coarray_allocation_type file:
+GFC_NCA_NORMAL_COARRAY wrapper.c /^ GFC_NCA_NORMAL_COARRAY = 3,$/;" e enum:gfc_coarray_allocation_type file:
+GFORTRAN_ENV_NUM_IMAGES coarraynative.c 37;" d file:
+HASHMAP_H hashmap.h 69;" d
+IMAGE_FAILED libcoarraynative.h /^ IMAGE_FAILED$/;" e enum:__anon7
+IMAGE_FAILED master.c /^ IMAGE_FAILED$/;" e enum:__anon5 file:
+IMAGE_OK libcoarraynative.h /^ IMAGE_OK,$/;" e enum:__anon7
+IMAGE_OK master.c /^ IMAGE_OK,$/;" e enum:__anon5 file:
+IMAGE_UNKNOWN libcoarraynative.h /^ IMAGE_UNKNOWN = 0,$/;" e enum:__anon7
+IMAGE_UNKNOWN master.c /^ IMAGE_UNKNOWN = 0,$/;" e enum:__anon5 file:
+INDENT hashmap.c 403;" d file:
+INDENT hashmap.c 414;" d file:
+INDENT hashmap.c 415;" d file:
+INDENT hashmap.c 429;" d file:
+INDENT hashmap.c 430;" d file:
+INITIAL_BITNUM hashmap.c 28;" d file:
+INITIAL_SIZE hashmap.c 29;" d file:
+IPSYNC_HDR sync.h 26;" d
+ITER malloc_test.c 13;" d file:
+MAX_ALIGN allocator.c 50;" d file:
+MAX_NUM malloc_test.c 10;" d file:
+MEMOBJ_NAME util.c 11;" d file:
+MIN_NUM malloc_test.c 11;" d file:
+NUM_BITS malloc_test.c 8;" d file:
+NUM_SIZES malloc_test.c 9;" d file:
+PE hashmap.c 402;" d file:
+PTR_BITS util.h 32;" d
+SHARED_ALLOCATOR_HDR allocator.h 2;" d
+SHARED_MEMORY_H shared_memory.h 77;" d
+SHARED_MEMORY_RAW_ALLOC shared_memory.h 50;" d
+SHARED_MEMORY_RAW_ALLOC_PTR shared_memory.h 53;" d
+SHMPTR_AS shared_memory.h 46;" d
+SHMPTR_DEREF shared_memory.h 44;" d
+SHMPTR_EQUALS shared_memory.h 48;" d
+SHMPTR_IS_NULL shared_memory.h 42;" d
+SHMPTR_NULL shared_memory.h 41;" d
+SHMPTR_SET shared_memory.h 47;" d
+SZ malloc_test.c 12;" d file:
+UTIL_HDR util.h 26;" d
+a hashmap.h /^ allocator *a;$/;" m struct:hashmap
+a sync.h /^ allocator *a;$/;" m struct:__anon3
+ai libcoarraynative.h /^ alloc_iface ai;$/;" m struct:__anon11
+alignto util.c /^alignto(size_t size, size_t align) {$/;" f
+alloc alloc.h /^ allocator alloc;$/;" m struct:alloc_iface
+alloc_iface alloc.h /^typedef struct alloc_iface$/;" s
+alloc_iface alloc.h /^} alloc_iface;$/;" t typeref:struct:alloc_iface
+alloc_iface_init alloc.c /^alloc_iface_init (alloc_iface *iface, shared_memory *mem)$/;" f
+alloc_iface_init alloc.h /^internal_proto (alloc_iface_init);$/;" v
+alloc_iface_shared alloc.h /^typedef struct alloc_iface_shared$/;" s
+alloc_iface_shared alloc.h /^} alloc_iface_shared;$/;" t typeref:struct:alloc_iface_shared
+allocator allocator.h /^} allocator;$/;" t typeref:struct:__anon17
+allocator_init allocator.c /^allocator_init (allocator *a, allocator_shared *s, shared_memory *sm)$/;" f
+allocator_s alloc.h /^ allocator_shared allocator_s;$/;" m struct:alloc_iface_shared
+allocator_shared allocator.h /^} allocator_shared;$/;" t typeref:struct:__anon16
+allocs shared_memory.c /^ } allocs[];$/;" m struct:shared_memory_act typeref:struct:shared_memory_act::local_alloc file:
+arr lock.h /^ pthread_mutex_t arr[];$/;" m struct:__anon12
+as alloc.h /^ alloc_iface_shared *as;$/;" m struct:alloc_iface
+barrier collective_subroutine.h /^ pthread_barrier_t barrier;$/;" m struct:collsub_iface_shared
+barrier libcoarraynative.h /^ pthread_barrier_t barrier;$/;" m struct:__anon6
+base shared_memory.c /^ void *base;$/;" m struct:shared_memory_act::local_alloc file:
+bitnum hashmap.h /^ int bitnum;$/;" m struct:__anon14
+bucket allocator.c /^} bucket;$/;" t typeref:struct:__anon1 file:
+ci libcoarraynative.h /^ collsub_iface ci;$/;" m struct:__anon11
+cis sync.h /^ sync_iface_shared *cis;$/;" m struct:__anon3
+collsub_broadcast collective_subroutine.h /^internal_proto (collsub_broadcast);$/;" v
+collsub_buf collective_subroutine.c /^void *collsub_buf = NULL;$/;" v
+collsub_buf collective_subroutine.h /^ shared_mem_ptr collsub_buf;$/;" m struct:collsub_iface_shared
+collsub_buf collective_subroutine.h /^ void *collsub_buf; \/* Cached pointer to shared collsub_buf. *\/$/;" m struct:collsub_iface
+collsub_buf collective_subroutine.h /^internal_proto (collsub_buf);$/;" v
+collsub_iface collective_subroutine.h /^typedef struct collsub_iface$/;" s
+collsub_iface collective_subroutine.h /^} collsub_iface;$/;" t typeref:struct:collsub_iface
+collsub_iface_shared collective_subroutine.h /^typedef struct collsub_iface_shared $/;" s
+collsub_iface_shared collective_subroutine.h /^} collsub_iface_shared;$/;" t typeref:struct:collsub_iface_shared
+collsub_reduce collective_subroutine.c /^collsub_reduce (void *obj, size_t nobjs, int *result_image, size_t size, $/;" f
+collsub_reduce collective_subroutine.h /^internal_proto (collsub_reduce);$/;" v
+collsub_sync collective_subroutine.c /^collsub_sync (void) {$/;" f
+collsub_sync collective_subroutine.h /^internal_proto (collsub_sync);$/;" v
+copy_from collective_inline.h /^copy_from (int image) $/;" f
+copy_in collective_inline.h /^copy_in (void *obj) {$/;" f
+copy_out collective_inline.h /^copy_out (void *obj, int image)$/;" f
+copy_to collective_inline.h /^copy_to (void *obj, int image)$/;" f
+curr_size collective_subroutine.c /^size_t curr_size = 0;$/;" v
+curr_size collective_subroutine.h /^ size_t curr_size;$/;" m struct:collsub_iface_shared
+curr_size collective_subroutine.h /^internal_proto (curr_size);$/;" v
+data hashmap.h /^ shared_mem_ptr data;$/;" m struct:__anon14
+deserialize_memory serialize.c /^export_proto (deserialize_memory);$/;" v
+div_ru wrapper.c /^div_ru (int divident, int divisor)$/;" f file:
+dump_hm hashmap.c /^dump_hm(hashmap *hm) {$/;" f
+enlarge_hashmap_mem hashmap.c /^enlarge_hashmap_mem (hashmap *hm, hashmap_entry **data, bool f)$/;" f file:
+ensure_initialization coarraynative.c /^ensure_initialization(void) {$/;" f
+fd shared_memory.c /^ int fd;$/;" m struct:__anon18 file:
+finish_collective_subroutine collective_inline.h /^finish_collective_subroutine (void) $/;" f
+free_bucket_head allocator.h /^ shared_mem_ptr free_bucket_head[PTR_BITS];$/;" m struct:__anon16
+free_memory_with_id alloc.c /^free_memory_with_id (alloc_iface* iface, memid id)$/;" f
+free_memory_with_id alloc.h /^internal_proto (free_memory_with_id);$/;" v
+gen_mask hashmap.c /^gen_mask (hashmap *hm)$/;" f file:
+get_allocator alloc.c /^get_allocator (alloc_iface * iface)$/;" f
+get_allocator alloc.h /^internal_proto (get_allocator);$/;" v
+get_data hashmap.c /^get_data(hashmap *hm)$/;" f file:
+get_environ_image_num coarraynative.c /^get_environ_image_num (void)$/;" f file:
+get_expected_offset hashmap.c /^get_expected_offset (hashmap *hm, memid id)$/;" f file:
+get_locked_table sync.c /^get_locked_table(sync_iface *si) { \/\/ The initialization of the table has to $/;" f file:
+get_master coarraynative.c /^get_master (void) {$/;" f file:
+get_memory_by_id alloc.c /^get_memory_by_id (alloc_iface *iface, size_t size, memid id)$/;" f
+get_memory_by_id alloc.h /^internal_proto (get_memory_by_id);$/;" v
+get_obj_ptr collective_inline.h /^get_obj_ptr (int image) $/;" f
+get_shared_memory_act_size shared_memory.c /^get_shared_memory_act_size (int nallocs)$/;" f file:
+get_shmem_fd util.c /^get_shmem_fd (void)$/;" f
+gfc_coarray_allocation_type wrapper.c /^enum gfc_coarray_allocation_type {$/;" g file:
+global_shared_memory_meta shared_memory.c /^} global_shared_memory_meta;$/;" t typeref:struct:__anon18 file:
+has_failed_image libcoarraynative.h /^ int has_failed_image;$/;" m struct:__anon9
+has_failed_image master.c /^ int has_failed_image = 0;$/;" m struct:__anon4 file:
+hash hashmap.c /^hash (uint64_t key)$/;" f file:
+hashmap hashmap.h /^typedef struct hashmap$/;" s
+hashmap hashmap.h /^} hashmap;$/;" t typeref:struct:hashmap
+hashmap_change_refcnt hashmap.c /^hashmap_change_refcnt (hashmap *hm, memid id, hashmap_search_result *res,$/;" f file:
+hashmap_dec hashmap.c /^hashmap_dec (hashmap *hm, memid id, hashmap_search_result * res)$/;" f
+hashmap_entry hashmap.c /^} hashmap_entry;$/;" t typeref:struct:__anon13 file:
+hashmap_get hashmap.c /^hashmap_get (hashmap *hm, memid id)$/;" f
+hashmap_inc hashmap.c /^hashmap_inc (hashmap *hm, memid id, hashmap_search_result * res)$/;" f
+hashmap_init hashmap.c /^hashmap_init (hashmap *hm, hashmap_shared *hs, allocator *a,$/;" f
+hashmap_search_result hashmap.h /^} hashmap_search_result;$/;" t typeref:struct:__anon15
+hashmap_set hashmap.c /^hashmap_set (hashmap *hm, memid id, hashmap_search_result *hsr,$/;" f
+hashmap_shared hashmap.h /^} hashmap_shared;$/;" t typeref:struct:__anon14
+header shared_memory.c /^ void *header;$/;" m struct:shared_memory_act file:
+hm alloc.h /^ hashmap hm;$/;" m struct:alloc_iface
+hm_search_result_contains hashmap.c /^hm_search_result_contains (hashmap_search_result *res)$/;" f
+hm_search_result_ptr hashmap.c /^hm_search_result_ptr (hashmap_search_result *res)$/;" f
+hm_search_result_size hashmap.c /^hm_search_result_size (hashmap_search_result *res)$/;" f
+hmiadd hashmap.c /^hmiadd (hashmap *hm, size_t s, ssize_t o) {$/;" f file:
+hms alloc.h /^ hashmap_shared hms;$/;" m struct:alloc_iface_shared
+id hashmap.c /^ memid id;$/;" m struct:__anon13 file:
+image libcoarraynative.h /^} image;$/;" t typeref:struct:__anon10
+image_main_wrapper master.c /^image_main_wrapper (void (*image_main) (void), int this_image_num)$/;" f file:
+image_num libcoarraynative.h /^ int image_num;$/;" m struct:__anon10
+image_status libcoarraynative.h /^} image_status;$/;" t typeref:enum:__anon7
+image_tracker libcoarraynative.h /^} image_tracker;$/;" t typeref:struct:__anon8
+images libcoarraynative.h /^ image_tracker images[];$/;" m struct:__anon9
+images master.c /^ struct image_status * images;$/;" m struct:__anon4 typeref:struct:__anon4::image_status file:
+init_collsub collective_subroutine.c /^init_collsub (void) {$/;" f
+init_collsub collective_subroutine.h /^internal_proto (init_collsub);$/;" v
+initialize_shared_mutex util.c /^initialize_shared_mutex (pthread_mutex_t *mutex)$/;" f
+initialized lock.h /^ int initialized;$/;" m struct:__anon12
+ipcollsub libcoarraynative.h /^} ipcollsub;$/;" t typeref:struct:__anon6
+last_base shared_memory.c /^last_base (shared_memory_act *mem)$/;" f file:
+last_seen_size shared_memory.c /^ size_t last_seen_size;$/;" m struct:shared_memory_act file:
+local coarraynative.c /^nca_local_data *local = NULL;$/;" v
+local_alloc shared_memory.c /^ struct local_alloc {$/;" s struct:shared_memory_act file:
+lock alloc.h /^ pthread_mutex_t lock;$/;" m struct:alloc_iface_shared
+lock_array lock.h /^} lock_array;$/;" t typeref:struct:__anon12
+lock_table sync.c /^lock_table (sync_iface *si)$/;" f file:
+m libcoarraynative.h /^ master *m;$/;" m struct:__anon10
+main malloc_test.c /^int main()$/;" f
+map_memory shared_memory.c /^map_memory (int fd, size_t size, off_t offset)$/;" f file:
+master libcoarraynative.h /^} master;$/;" t typeref:struct:__anon9
+master master.c /^} master;$/;" t typeref:struct:__anon4 file:
+max_lookahead hashmap.c /^ int max_lookahead; $/;" m struct:__anon13 file:
+maximg libcoarraynative.h /^ int maximg;$/;" m struct:__anon6
+mem alloc.h /^ shared_memory *mem;$/;" m struct:alloc_iface
+memid hashmap.h /^typedef intptr_t memid;$/;" t
+meta shared_memory.c /^ global_shared_memory_meta *meta;$/;" m struct:shared_memory_act file:
+n_ent hashmap.c /^static ssize_t n_ent;$/;" v file:
+nca_co_broadcast collective_subroutine.c /^export_proto (nca_co_broadcast);$/;" v
+nca_co_broadcast collective_subroutine.c /^nca_co_broadcast (gfc_array_char * restrict a, int source_image,$/;" f
+nca_coarray_alloc wrapper.c /^export_proto (nca_coarray_alloc);$/;" v
+nca_coarray_alloc wrapper.c /^nca_coarray_alloc (gfc_array_void *desc, int elem_size, int corank,$/;" f
+nca_coarray_free wrapper.c /^export_proto (nca_coarray_free);$/;" v
+nca_coarray_free wrapper.c /^nca_coarray_free (gfc_array_void *desc, int alloc_type)$/;" f
+nca_coarray_num_images wrapper.c /^export_proto (nca_coarray_num_images);$/;" v
+nca_coarray_num_images wrapper.c /^nca_coarray_num_images (int distance __attribute__((unused)))$/;" f
+nca_coarray_sync_all wrapper.c /^export_proto (nca_coarray_sync_all);$/;" v
+nca_coarray_sync_all wrapper.c /^nca_coarray_sync_all (int *stat __attribute__((unused)))$/;" f
+nca_coarray_this_image wrapper.c /^export_proto (nca_coarray_this_image);$/;" v
+nca_coarray_this_image wrapper.c /^nca_coarray_this_image (int distance __attribute__((unused)))$/;" f
+nca_collsub_reduce_array wrapper.c /^export_proto (nca_collsub_reduce_array);$/;" v
+nca_collsub_reduce_array wrapper.c /^nca_collsub_reduce_array (gfc_array_void *desc, void (*assign_function) (void *, void *),$/;" f
+nca_collsub_reduce_scalar wrapper.c /^export_proto (nca_collsub_reduce_scalar);$/;" v
+nca_collsub_reduce_scalar wrapper.c /^nca_collsub_reduce_scalar (void *obj, index_type elem_size,$/;" f
+nca_local_data libcoarraynative.h /^} nca_local_data;$/;" t typeref:struct:__anon11
+nca_lock wrapper.c /^export_proto (nca_lock);$/;" v
+nca_lock wrapper.c /^nca_lock (void *lock)$/;" f
+nca_master coarraynative.c /^nca_master (void (*image_main) (void)) {$/;" f
+nca_master master.c /^nca_master (void (*image_main) (void)) {$/;" f
+nca_master master.c /^nca_master (void (*image_main) (void))$/;" f
+nca_sync_images wrapper.c /^export_proto (nca_sync_images);$/;" v
+nca_sync_images wrapper.c /^nca_sync_images (size_t s, int *images,$/;" f
+nca_unlock wrapper.c /^export_proto (nca_unlock);$/;" v
+nca_unlock wrapper.c /^nca_unlock (void *lock)$/;" f
+new_base_mapping shared_memory.c /^new_base_mapping (shared_memory_act *mem)$/;" f file:
+next allocator.c /^ shared_mem_ptr next;$/;" m struct:__anon1 file:
+next_power_of_two util.c /^next_power_of_two(size_t size) {$/;" f
+num_entries hashmap.c /^num_entries (hashmap_entry *data, size_t size)$/;" f file:
+num_images libcoarraynative.h /^ int num_images;$/;" m struct:__anon11
+num_local_allocs shared_memory.c /^ size_t num_local_allocs;$/;" m struct:shared_memory_act file:
+offset shared_memory.h /^ ssize_t offset;$/;" m struct:shared_mem_ptr
+owner lock.h /^ int owner;$/;" m struct:__anon12
+p hashmap.c /^ shared_mem_ptr p; \/* If p == SHMPTR_NULL, the entry is empty. *\/$/;" m struct:__anon13 file:
+p hashmap.h /^ shared_mem_ptr p;$/;" m struct:__anon15
+pagesize util.c /^size_t pagesize = 1<<17;$/;" v
+pid libcoarraynative.h /^ pid_t pid;$/;" m struct:__anon8
+prepare_collective_subroutine collective_subroutine.c /^prepare_collective_subroutine (size_t size)$/;" f
+prepare_collective_subroutine collective_subroutine.h /^internal_proto (prepare_collective_subroutine);$/;" v
+refcnt hashmap.c /^ int refcnt;$/;" m struct:__anon13 file:
+res_offset hashmap.h /^ ssize_t res_offset;$/;" m struct:__anon15
+resize_hm hashmap.c /^resize_hm (hashmap *hm, hashmap_entry **data)$/;" f file:
+round_to_pagesize util.c /^round_to_pagesize(size_t s) {$/;" f
+s allocator.h /^ allocator_shared *s;$/;" m struct:__anon17
+s collective_subroutine.h /^ collsub_iface_shared *s;$/;" m struct:collsub_iface
+s hashmap.c /^ size_t s;$/;" m struct:__anon13 file:
+s hashmap.h /^ hashmap_shared *s;$/;" m struct:hashmap
+scan_empty hashmap.c /^scan_empty (hashmap *hm, ssize_t expected_off, memid id)$/;" f file:
+scan_inside_lookahead hashmap.c /^scan_inside_lookahead (hashmap *hm, ssize_t expected_off, memid id)$/;" f file:
+serialize_memory serialize.c /^export_proto (serialize_memory);$/;" v
+serialize_memory serialize.c /^serialize_memory (gfc_array_char * const restrict source, char *d)$/;" f
+shared_free allocator.c /^shared_free (allocator *a, shared_mem_ptr p, size_t size) {$/;" f
+shared_malloc allocator.c /^shared_malloc (allocator *a, size_t size)$/;" f
+shared_mem_ptr shared_memory.h /^typedef struct shared_mem_ptr$/;" s
+shared_mem_ptr shared_memory.h /^} shared_mem_ptr;$/;" t typeref:struct:shared_mem_ptr
+shared_mem_ptr_to_void_ptr shared_memory.c /^shared_mem_ptr_to_void_ptr(shared_memory_act **pmem, shared_mem_ptr smp)$/;" f
+shared_mem_ptr_to_void_ptr shared_memory.h /^internal_proto (shared_mem_ptr_to_void_ptr);$/;" v
+shared_memory shared_memory.h /^typedef struct shared_memory_act * shared_memory;$/;" t typeref:struct:shared_memory_act
+shared_memory_act shared_memory.c /^typedef struct shared_memory_act$/;" s file:
+shared_memory_act shared_memory.c /^} shared_memory_act;$/;" t typeref:struct:shared_memory_act file:
+shared_memory_get_mem_with_alignment shared_memory.c /^shared_memory_get_mem_with_alignment (shared_memory_act **pmem, size_t size,$/;" f
+shared_memory_get_mem_with_alignment shared_memory.h /^internal_proto (shared_memory_get_mem_with_alignment);$/;" v
+shared_memory_init shared_memory.c /^shared_memory_init (shared_memory_act **pmem)$/;" f
+shared_memory_init shared_memory.h /^internal_proto (shared_memory_init);$/;" v
+shared_memory_prepare shared_memory.c /^shared_memory_prepare (shared_memory_act **pmem)$/;" f
+shared_memory_prepare shared_memory.h /^internal_proto (shared_memory_prepare);$/;" v
+shm allocator.h /^ shared_memory *shm;$/;" m struct:__anon17
+si libcoarraynative.h /^ sync_iface si;$/;" m struct:__anon11
+size hashmap.h /^ size_t size;$/;" m struct:__anon14
+size hashmap.h /^ size_t size;$/;" m struct:__anon15
+size shared_memory.c /^ size_t size;$/;" m struct:shared_memory_act::local_alloc file:
+size shared_memory.c /^ size_t size;$/;" m struct:__anon18 file:
+sm collective_subroutine.h /^ shared_memory *sm;$/;" m struct:collsub_iface
+sm hashmap.h /^ shared_memory *sm;$/;" m struct:hashmap
+sm libcoarraynative.h /^ shared_memory sm;$/;" m struct:__anon11
+sm sync.h /^ shared_memory *sm;$/;" m struct:__anon3
+status libcoarraynative.h /^ image_status status;$/;" m struct:__anon8
+sync_all sync.c /^sync_all (sync_iface *si)$/;" f
+sync_all sync.h /^ pthread_barrier_t sync_all;$/;" m struct:__anon2
+sync_all sync.h /^internal_proto (sync_all);$/;" v
+sync_all_init sync.c /^sync_all_init (pthread_barrier_t *b)$/;" f file:
+sync_iface sync.h /^} sync_iface;$/;" t typeref:struct:__anon3
+sync_iface_init sync.c /^sync_iface_init (sync_iface *si, alloc_iface *ai, shared_memory *sm)$/;" f
+sync_iface_init sync.h /^internal_proto (sync_iface_init);$/;" v
+sync_iface_shared sync.h /^} sync_iface_shared;$/;" t typeref:struct:__anon2
+sync_table sync.c /^sync_table (sync_iface *si, int *images, size_t size)$/;" f
+sync_table sync.h /^internal_proto (sync_table);$/;" v
+table sync.h /^ int *table; \/\/ we can cache the table and the trigger pointers here$/;" m struct:__anon3
+table sync.h /^ shared_mem_ptr table;$/;" m struct:__anon2
+table_lock sync.h /^ pthread_mutex_t table_lock;$/;" m struct:__anon2
+this_image coarraynative.c /^image this_image;$/;" v
+triggers sync.h /^ pthread_cond_t *triggers;$/;" m struct:__anon3
+triggers sync.h /^ shared_mem_ptr triggers;$/;" m struct:__anon2
+unlock_table sync.c /^unlock_table (sync_iface *si)$/;" f file:
+used shared_memory.c /^ size_t used;$/;" m struct:__anon18 file:
+wait_table_cond sync.c /^wait_table_cond (sync_iface *si, pthread_cond_t *cond)$/;" f file:
diff --git a/libgfortran/nca/alloc.c b/libgfortran/nca/alloc.c
new file mode 100644
index 0000000..174fe33
--- /dev/null
+++ b/libgfortran/nca/alloc.c
@@ -0,0 +1,152 @@
+/* Copyright (C) 2020 Free Software Foundation, Inc.
+ Contributed by Thomas Koenig
+
+This file is part of the GNU Fortran Native Coarray Library (libnca).
+
+Libnca is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+Libnca is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+<http://www.gnu.org/licenses/>. */
+
+/* This provides the coarray-specific features (like IDs etc) for
+ allocator.c, in turn calling routines from shared_memory.c.
+*/
+
+#include "libgfortran.h"
+#include "shared_memory.h"
+#include "allocator.h"
+#include "hashmap.h"
+#include "alloc.h"
+
+#include <string.h>
+
+/* Return a local pointer into a shared memory object identified by
+ id. If the object is already found, it has been allocated before,
+ so just increase the reference counter.
+
+ The pointers returned by this function remain valid even if the
+ size of the memory allocation changes (see shared_memory.c). */
+
+static void *
+get_memory_by_id_internal (alloc_iface *iface, size_t size, memid id,
+ bool zero_mem)
+{
+ hashmap_search_result res;
+ shared_mem_ptr shared_ptr;
+ void *ret;
+
+ pthread_mutex_lock (&iface->as->lock);
+ shared_memory_prepare(iface->mem);
+
+ res = hashmap_get (&iface->hm, id);
+
+ if (hm_search_result_contains (&res))
+ {
+ size_t found_size;
+ found_size = hm_search_result_size (&res);
+ if (found_size != size)
+ {
+ dprintf (2, "Size mismatch for coarray allocation id %p: "
+ "found = %lu != size = %lu\n", (void *) id, found_size, size);
+ pthread_mutex_unlock (&iface->as->lock);
+ exit (1);
+ }
+ shared_ptr = hm_search_result_ptr (&res);
+ hashmap_inc (&iface->hm, id, &res);
+ }
+ else
+ {
+ shared_ptr = shared_malloc (&iface->alloc, size);
+ hashmap_set (&iface->hm, id, NULL, shared_ptr, size);
+ }
+
+ ret = SHMPTR_AS (void *, shared_ptr, iface->mem);
+ if (zero_mem)
+ memset (ret, '\0', size);
+
+ pthread_mutex_unlock (&iface->as->lock);
+ return ret;
+}
+
+void *
+get_memory_by_id (alloc_iface *iface, size_t size, memid id)
+{
+ return get_memory_by_id_internal (iface, size, id, 0);
+}
+
+void *
+get_memory_by_id_zero (alloc_iface *iface, size_t size, memid id)
+{
+ return get_memory_by_id_internal (iface, size, id, 1);
+}
+
+/* Free memory with id. Free it if this is the last image which
+ holds that memory segment, decrease the reference count otherwise. */
+
+void
+free_memory_with_id (alloc_iface* iface, memid id)
+{
+ hashmap_search_result res;
+ int entries_left;
+
+ pthread_mutex_lock (&iface->as->lock);
+ shared_memory_prepare(iface->mem);
+
+ res = hashmap_get (&iface->hm, id);
+ if (!hm_search_result_contains (&res))
+ {
+ pthread_mutex_unlock (&iface->as->lock);
+ char buffer[100];
+ snprintf (buffer, sizeof(buffer), "Error in free_memory_with_id: "
+ "%p not found", (void *) id);
+ dprintf (2, buffer);
+ // internal_error (NULL, buffer);
+ exit (1);
+ }
+
+ entries_left = hashmap_dec (&iface->hm, id, &res);
+ assert (entries_left >=0);
+
+ if (entries_left == 0)
+ {
+ shared_free (&iface->alloc, hm_search_result_ptr (&res),
+ hm_search_result_size (&res));
+ }
+
+ pthread_mutex_unlock (&iface->as->lock);
+ return;
+}
+
+/* Allocate the shared memory interface. This is called before we have
+ multiple images. */
+
+void
+alloc_iface_init (alloc_iface *iface, shared_memory *mem)
+{
+
+ iface->as = SHARED_MEMORY_RAW_ALLOC_PTR (mem, alloc_iface_shared);
+ iface->mem = mem;
+ initialize_shared_mutex (&iface->as->lock);
+ allocator_init (&iface->alloc, &iface->as->allocator_s, mem);
+ hashmap_init (&iface->hm, &iface->as->hms, &iface->alloc, mem);
+}
+
+allocator *
+get_allocator (alloc_iface * iface)
+{
+ return &iface->alloc;
+}
diff --git a/libgfortran/nca/alloc.h b/libgfortran/nca/alloc.h
new file mode 100644
index 0000000..f65121c
--- /dev/null
+++ b/libgfortran/nca/alloc.h
@@ -0,0 +1,67 @@
+/* Copyright (C) 2020 Free Software Foundation, Inc.
+ Contributed by Nicolas Koenig
+
+This file is part of the GNU Fortran Native Coarray Library (libnca).
+
+Libnca is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+Libnca is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+<http://www.gnu.org/licenses/>. */
+
+#ifndef ALLOC_H
+#define ALLOC_H
+
+#include "allocator.h"
+#include "hashmap.h"
+
+/* High-level interface for shared memory allocation. */
+
+/* This part of the alloc interface goes into shared memory. */
+
+typedef struct alloc_iface_shared
+{
+ allocator_shared allocator_s;
+ hashmap_shared hms;
+ pthread_mutex_t lock;
+} alloc_iface_shared;
+
+/* This is the local part. */
+
+typedef struct alloc_iface
+{
+ alloc_iface_shared *as;
+ shared_memory *mem;
+ allocator alloc;
+ hashmap hm;
+} alloc_iface;
+
+void *get_memory_by_id (alloc_iface *, size_t, memid);
+internal_proto (get_memory_by_id);
+
+void *get_memory_by_id_zero (alloc_iface *, size_t, memid);
+internal_proto (get_memory_by_id_zero);
+
+void free_memory_with_id (alloc_iface *, memid);
+internal_proto (free_memory_with_id);
+
+void alloc_iface_init (alloc_iface *, shared_memory *);
+internal_proto (alloc_iface_init);
+
+allocator *get_allocator (alloc_iface *);
+internal_proto (get_allocator);
+
+#endif
diff --git a/libgfortran/nca/allocator.c b/libgfortran/nca/allocator.c
new file mode 100644
index 0000000..e7aa9fd
--- /dev/null
+++ b/libgfortran/nca/allocator.c
@@ -0,0 +1,90 @@
+/* Copyright (C) 2020 Free Software Foundation, Inc.
+ Contributed by Nicolas Koenig
+
+This file is part of the GNU Fortran Native Coarray Library (libnca).
+
+Libnca is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+Libnca is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+<http://www.gnu.org/licenses/>. */
+
+/* A malloc() - and free() - like interface, but for shared memory
+ pointers, except that we pass the size to free as well. */
+
+#include "libgfortran.h"
+#include "shared_memory.h"
+#include "allocator.h"
+
+typedef struct {
+ shared_mem_ptr next;
+} bucket;
+
+/* Initialize the allocator. */
+
+void
+allocator_init (allocator *a, allocator_shared *s, shared_memory *sm)
+{
+ a->s = s;
+ a->shm = sm;
+ for (int i = 0; i < PTR_BITS; i++)
+ s->free_bucket_head[i] = SHMPTR_NULL;
+}
+
+/* Main allocation routine, works like malloc. Round up allocations
+ to the next power of two and keep free lists in buckets. */
+
+#define MAX_ALIGN 16
+
+shared_mem_ptr
+shared_malloc (allocator *a, size_t size)
+{
+ shared_mem_ptr ret;
+ size_t sz;
+ size_t act_size;
+ int bucket_list_index;
+
+ sz = next_power_of_two (size);
+ act_size = sz > sizeof (bucket) ? sz : sizeof (bucket);
+ bucket_list_index = __builtin_clzl(act_size);
+
+ if (SHMPTR_IS_NULL (a->s->free_bucket_head[bucket_list_index]))
+ return shared_memory_get_mem_with_alignment (a->shm, act_size, MAX_ALIGN);
+
+ ret = a->s->free_bucket_head[bucket_list_index];
+ a->s->free_bucket_head[bucket_list_index]
+ = (SHMPTR_AS (bucket *, ret, a->shm)->next);
+ assert(ret.offset != 0);
+ return ret;
+}
+
+/* Free memory. */
+
+void
+shared_free (allocator *a, shared_mem_ptr p, size_t size) {
+ bucket *b;
+ size_t sz;
+ int bucket_list_index;
+ size_t act_size;
+
+ sz = next_power_of_two (size);
+ act_size = sz > sizeof (bucket) ? sz : sizeof (bucket);
+ bucket_list_index = __builtin_clzl(act_size);
+
+ b = SHMPTR_AS(bucket *, p, a->shm);
+ b->next = a->s->free_bucket_head[bucket_list_index];
+ a->s->free_bucket_head[bucket_list_index] = p;
+}
diff --git a/libgfortran/nca/allocator.h b/libgfortran/nca/allocator.h
new file mode 100644
index 0000000..306022a
--- /dev/null
+++ b/libgfortran/nca/allocator.h
@@ -0,0 +1,21 @@
+#ifndef SHARED_ALLOCATOR_HDR
+#define SHARED_ALLOCATOR_HDR
+
+#include "util.h"
+#include "shared_memory.h"
+
+typedef struct {
+ shared_mem_ptr free_bucket_head[PTR_BITS];
+} allocator_shared;
+
+typedef struct {
+ allocator_shared *s;
+ shared_memory *shm;
+} allocator;
+
+void allocator_init (allocator *, allocator_shared *, shared_memory *);
+
+shared_mem_ptr shared_malloc (allocator *, size_t size);
+void shared_free (allocator *, shared_mem_ptr, size_t size);
+
+#endif
diff --git a/libgfortran/nca/coarraynative.c b/libgfortran/nca/coarraynative.c
new file mode 100644
index 0000000..c9d13ee
--- /dev/null
+++ b/libgfortran/nca/coarraynative.c
@@ -0,0 +1,145 @@
+/* Copyright (C) 2019-2020 Free Software Foundation, Inc.
+ Contributed by Nicolas Koenig
+
+This file is part of the GNU Fortran Native Coarray Library (libnca).
+
+Libnca is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+Libnca is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+<http://www.gnu.org/licenses/>. */
+
+
+#include "libgfortran.h"
+#include "libcoarraynative.h"
+#include "allocator.h"
+#include "hashmap.h"
+#include "util.h"
+#include "lock.h"
+#include "collective_subroutine.h"
+
+#include <unistd.h>
+#include <sys/mman.h>
+// #include <stdlib.h>
+#include <sys/wait.h>
+
+#define GFORTRAN_ENV_NUM_IMAGES "GFORTRAN_NUM_IMAGES"
+
+nca_local_data *local = NULL;
+
+image this_image;
+
+static int
+get_environ_image_num (void)
+{
+ char *num_images_char;
+ int nimages;
+ num_images_char = getenv (GFORTRAN_ENV_NUM_IMAGES);
+ if (!num_images_char)
+ return sysconf (_SC_NPROCESSORS_ONLN); /* TODO: Make portable. */
+ /* TODO: Error checking. */
+ nimages = atoi (num_images_char);
+ return nimages;
+}
+
+void
+ensure_initialization(void)
+{
+ if (local)
+ return;
+
+ local = malloc(sizeof(nca_local_data)); // Is malloc already init'ed at that
+ // point? Maybe use mmap(MAP_ANON)
+ // instead
+ pagesize = sysconf (_SC_PAGE_SIZE);
+ local->num_images = get_environ_image_num ();
+ shared_memory_init (&local->sm);
+ shared_memory_prepare (&local->sm);
+ alloc_iface_init (&local->ai, &local->sm);
+ collsub_iface_init (&local->ci, &local->ai, &local->sm);
+ sync_iface_init (&local->si, &local->ai, &local->sm);
+}
+
+static void __attribute__((noreturn))
+image_main_wrapper (void (*image_main) (void), image *this)
+{
+ this_image = *this;
+
+ sync_all(&local->si);
+
+ image_main ();
+
+ exit (0);
+}
+
+static master *
+get_master (void) {
+ master *m;
+ m = SHMPTR_AS (master *,
+ shared_memory_get_mem_with_alignment
+ (&local->sm,
+ sizeof (master) + sizeof(image_status) * local->num_images,
+ __alignof__(master)), &local->sm);
+ m->has_failed_image = 0;
+ return m;
+}
+
+/* This is called from main, with a pointer to the user's program as
+ argument. It forks the images and waits for their completion. */
+
+void
+nca_master (void (*image_main) (void)) {
+ master *m;
+ int i, j;
+ pid_t new;
+ image im;
+ int exit_code = 0;
+ int chstatus;
+ ensure_initialization();
+ m = get_master();
+
+ im.m = m;
+
+ for (im.image_num = 0; im.image_num < local->num_images; im.image_num++)
+ {
+ if ((new = fork()))
+ {
+ if (new == -1)
+ {
+ dprintf(2, "error spawning child\n");
+ exit_code = 1;
+ }
+ m->images[im.image_num].pid = new;
+ m->images[im.image_num].status = IMAGE_OK;
+ }
+ else
+ image_main_wrapper(image_main, &im);
+ }
+ for (i = 0; i < local->num_images; i++)
+ {
+ new = wait (&chstatus);
+ if (!WIFEXITED (chstatus) || WEXITSTATUS (chstatus))
+ {
+ j = 0;
+ for (; j < local->num_images && m->images[j].pid != new; j++);
+ m->images[j].status = IMAGE_FAILED;
+ m->has_failed_image++; //FIXME: Needs to be atomic, probably
+ dprintf (2, "ERROR: Image %d(%#x) failed\n", j, new);
+ exit_code = 1;
+ }
+ }
+ exit (exit_code);
+}
diff --git a/libgfortran/nca/collective_inline.h b/libgfortran/nca/collective_inline.h
new file mode 100644
index 0000000..4e7107b
--- /dev/null
+++ b/libgfortran/nca/collective_inline.h
@@ -0,0 +1,42 @@
+#include "collective_subroutine.h"
+
+static inline void
+finish_collective_subroutine (collsub_iface *ci)
+{
+ collsub_sync (ci);
+}
+
+#if 0
+static inline void *
+get_obj_ptr (void *buffer, int image)
+{
+ return (char *) + curr_size * image;
+}
+
+/* If obj is NULL, copy the object from the entry in this image. */
+static inline void
+copy_to (void *buffer, void *obj, int image)
+{
+ if (obj == 0)
+ obj = get_obj_ptr (this_image.image_num);
+ memcpy (get_obj_ptr (image), obj, curr_size);
+}
+
+static inline void
+copy_out (void *buffer, void *obj, int image)
+{
+ memcpy (obj, get_obj_ptr (image), curr_size);
+}
+
+static inline void
+copy_from (void *buffer, int image)
+{
+ copy_out (get_obj_ptr (this_image.image_num), image);
+}
+
+static inline void
+copy_in (void *buffer, void *obj)
+{
+ copy_to (obj, this_image.image_num);
+}
+#endif
diff --git a/libgfortran/nca/collective_subroutine.c b/libgfortran/nca/collective_subroutine.c
new file mode 100644
index 0000000..8a8a7d6
--- /dev/null
+++ b/libgfortran/nca/collective_subroutine.c
@@ -0,0 +1,416 @@
+/* Copyright (C) 2020 Free Software Foundation, Inc.
+ Contributed by Nicolas Koenig
+
+This file is part of the GNU Fortran Native Coarray Library (libnca).
+
+Libnca is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+Libnca is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+<http://www.gnu.org/licenses/>. */
+
+#include <string.h>
+#include "libgfortran.h"
+#include "libcoarraynative.h"
+#include "collective_subroutine.h"
+#include "collective_inline.h"
+#include "allocator.h"
+
+void *
+get_collsub_buf (collsub_iface *ci, size_t size)
+{
+ void *ret;
+
+ pthread_mutex_lock (&ci->s->mutex);
+ if (size > ci->s->curr_size)
+ {
+ shared_free (ci->a, ci->s->collsub_buf, ci->s->curr_size);
+ ci->s->collsub_buf = shared_malloc (ci->a, size);
+ ci->s->curr_size = size;
+ }
+
+ ret = SHMPTR_AS (void *, ci->s->collsub_buf, ci->sm);
+ pthread_mutex_unlock (&ci->s->mutex);
+ return ret;
+}
+
+/* It appears as if glibc's barrier implementation does not spin (at
+ least that is what I got from a quick glance at the source code),
+ so performance would be improved quite a bit if we spun a few times
+ here so we don't run into the futex syscall. */
+
+void
+collsub_sync (collsub_iface *ci)
+{
+ //dprintf (2, "Calling collsub_sync %d times\n", ++called);
+ pthread_barrier_wait (&ci->s->barrier);
+}
+
+/* assign_function is needed since we only know how to assign the type inside
+ the compiler. It should be implemented as follows:
+
+ void assign_function (void *a, void *b)
+ {
+ *((t *) a) = reduction_operation ((t *) a, (t *) b);
+ }
+
+ */
+
+void
+collsub_reduce_array (collsub_iface *ci, gfc_array_char *desc, int *result_image,
+ void (*assign_function) (void *, void *))
+{
+ void *buffer;
+ pack_info pi;
+ bool packed;
+ int cbit = 0;
+ int imoffset;
+ index_type elem_size;
+ index_type this_image_size_bytes;
+ char *this_image_buf;
+
+ packed = pack_array_prepare (&pi, desc);
+ if (pi.num_elem == 0)
+ return;
+
+ elem_size = GFC_DESCRIPTOR_SIZE (desc);
+ this_image_size_bytes = elem_size * pi.num_elem;
+
+ buffer = get_collsub_buf (ci, this_image_size_bytes * local->num_images);
+ this_image_buf = buffer + this_image_size_bytes * this_image.image_num;
+
+ if (packed)
+ memcpy (this_image_buf, GFC_DESCRIPTOR_DATA (desc), this_image_size_bytes);
+ else
+ pack_array_finish (&pi, desc, this_image_buf);
+
+ collsub_sync (ci);
+ for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++)
+ {
+ imoffset = 1 << cbit;
+ if (this_image.image_num + imoffset < local->num_images)
+ /* Reduce arrays elementwise. */
+ for (size_t i = 0; i < pi.num_elem; i++)
+ assign_function (this_image_buf + elem_size * i,
+ this_image_buf + this_image_size_bytes * imoffset + elem_size * i);
+
+ collsub_sync (ci);
+ }
+ for ( ; (local->num_images >> cbit) != 0; cbit++)
+ collsub_sync (ci);
+
+ if (!result_image || *result_image == this_image.image_num)
+ {
+ if (packed)
+ memcpy (GFC_DESCRIPTOR_DATA (desc), buffer, this_image_size_bytes);
+ else
+ unpack_array_finish(&pi, desc, buffer);
+ }
+
+ finish_collective_subroutine (ci);
+}
+
+void
+collsub_reduce_scalar (collsub_iface *ci, void *obj, index_type elem_size,
+ int *result_image,
+ void (*assign_function) (void *, void *))
+{
+ void *buffer;
+ int cbit = 0;
+ int imoffset;
+ char *this_image_buf;
+
+ buffer = get_collsub_buf (ci, elem_size * local->num_images);
+ this_image_buf = buffer + elem_size * this_image.image_num;
+
+ memcpy (this_image_buf, obj, elem_size);
+
+ collsub_sync (ci);
+ for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++)
+ {
+ imoffset = 1 << cbit;
+ if (this_image.image_num + imoffset < local->num_images)
+ /* Reduce arrays elementwise. */
+ assign_function (this_image_buf, this_image_buf + elem_size*imoffset);
+
+ collsub_sync (ci);
+ }
+ for ( ; (local->num_images >> cbit) != 0; cbit++)
+ collsub_sync (ci);
+
+ if (!result_image || *result_image == this_image.image_num)
+ memcpy(obj, buffer, elem_size);
+
+ finish_collective_subroutine (ci);
+}
+
+/* Do not use sync_all(), because the program should deadlock in the case that
+ * some images are on a sync_all barrier while others are in a collective
+ * subroutine. */
+
+void
+collsub_iface_init (collsub_iface *ci, alloc_iface *ai, shared_memory *sm)
+{
+ pthread_barrierattr_t attr;
+ shared_mem_ptr p;
+ ci->s = SHARED_MEMORY_RAW_ALLOC_PTR(sm, collsub_iface_shared);
+
+ ci->s->collsub_buf = shared_malloc(get_allocator(ai), sizeof(double)*local->num_images);
+ ci->s->curr_size = sizeof(double)*local->num_images;
+ ci->sm = sm;
+ ci->a = get_allocator(ai);
+
+ pthread_barrierattr_init (&attr);
+ pthread_barrierattr_setpshared (&attr, PTHREAD_PROCESS_SHARED);
+ pthread_barrier_init (&ci->s->barrier, &attr, local->num_images);
+ pthread_barrierattr_destroy(&attr);
+
+ initialize_shared_mutex (&ci->s->mutex);
+}
+
+void
+collsub_broadcast_scalar (collsub_iface *ci, void *obj, index_type elem_size,
+ int source_image /* Adjusted in the wrapper. */)
+{
+ void *buffer;
+
+ buffer = get_collsub_buf (ci, elem_size);
+
+ dprintf(2, "Source image: %d\n", source_image);
+
+ if (source_image == this_image.image_num)
+ {
+ memcpy (buffer, obj, elem_size);
+ collsub_sync (ci);
+ }
+ else
+ {
+ collsub_sync (ci);
+ memcpy (obj, buffer, elem_size);
+ }
+
+ finish_collective_subroutine (ci);
+}
+
+void
+collsub_broadcast_array (collsub_iface *ci, gfc_array_char *desc,
+ int source_image)
+{
+ void *buffer;
+ pack_info pi;
+ bool packed;
+ index_type elem_size;
+ index_type size_bytes;
+ char *this_image_buf;
+
+ packed = pack_array_prepare (&pi, desc);
+ if (pi.num_elem == 0)
+ return;
+
+ elem_size = GFC_DESCRIPTOR_SIZE (desc);
+ size_bytes = elem_size * pi.num_elem;
+
+ buffer = get_collsub_buf (ci, size_bytes);
+
+ if (source_image == this_image.image_num)
+ {
+ if (packed)
+ memcpy (buffer, GFC_DESCRIPTOR_DATA (desc), size_bytes);
+ else
+ pack_array_finish (&pi, desc, buffer);
+ collsub_sync (ci);
+ }
+ else
+ {
+ collsub_sync (ci);
+ if (packed)
+ memcpy (GFC_DESCRIPTOR_DATA (desc), buffer, size_bytes);
+ else
+ unpack_array_finish(&pi, desc, buffer);
+ }
+
+ finish_collective_subroutine (ci);
+}
+
+#if 0
+
+void nca_co_broadcast (gfc_array_char *, int, int*, char *, size_t);
+export_proto (nca_co_broadcast);
+
+void
+nca_co_broadcast (gfc_array_char * restrict a, int source_image,
+ int *stat, char *errmsg __attribute__ ((unused)),
+ size_t errmsg_len __attribute__ ((unused)))
+{
+ index_type count[GFC_MAX_DIMENSIONS];
+ index_type stride[GFC_MAX_DIMENSIONS];
+ index_type extent[GFC_MAX_DIMENSIONS];
+ index_type type_size;
+ index_type dim;
+ index_type span;
+ bool packed, empty;
+ index_type num_elems;
+ index_type ssize, ssize_bytes;
+ char *this_shared_ptr, *other_shared_ptr;
+
+ if (stat)
+ *stat = 0;
+
+ dim = GFC_DESCRIPTOR_RANK (a);
+ type_size = GFC_DESCRIPTOR_SIZE (a);
+
+ /* Source image, gather. */
+ if (source_image - 1 == image_num)
+ {
+ num_elems = 1;
+ if (dim > 0)
+ {
+ span = a->span != 0 ? a->span : type_size;
+ packed = true;
+ empty = false;
+ for (index_type n = 0; n < dim; n++)
+ {
+ count[n] = 0;
+ stride[n] = GFC_DESCRIPTOR_STRIDE (a, n) * span;
+ extent[n] = GFC_DESCRIPTOR_EXTENT (a, n);
+
+ empty = empty || extent[n] <= 0;
+
+ if (num_elems != GFC_DESCRIPTOR_STRIDE (a, n))
+ packed = false;
+
+ num_elems *= extent[n];
+ }
+ ssize_bytes = num_elems * type_size;
+ }
+ else
+ {
+ ssize_bytes = type_size;
+ packed = true;
+ empty = false;
+ }
+
+ prepare_collective_subroutine (ssize_bytes); // broadcast barrier 1
+ this_shared_ptr = get_obj_ptr (image_num);
+ if (packed)
+ memcpy (this_shared_ptr, a->base_addr, ssize_bytes);
+ else
+ {
+ char *src = (char *) a->base_addr;
+ char * restrict dest = this_shared_ptr;
+ index_type stride0 = stride[0];
+
+ while (src)
+ {
+ /* Copy the data. */
+
+ memcpy (dest, src, type_size);
+ dest += type_size;
+ src += stride0;
+ count[0] ++;
+ /* Advance to the next source element. */
+ for (index_type n = 0; count[n] == extent[n] ; )
+ {
+ /* When we get to the end of a dimension, reset it
+ and increment the next dimension. */
+ count[n] = 0;
+ src -= stride[n] * extent[n];
+ n++;
+ if (n == dim)
+ {
+ src = NULL;
+ break;
+ }
+ else
+ {
+ count[n]++;
+ src += stride[n];
+ }
+ }
+ }
+ }
+ collsub_sync (ci); /* Broadcast barrier 2. */
+ }
+ else /* Target image, scatter. */
+ {
+ collsub_sync (ci); /* Broadcast barrier 1. */
+ packed = 1;
+ num_elems = 1;
+ span = a->span != 0 ? a->span : type_size;
+
+ for (index_type n = 0; n < dim; n++)
+ {
+ index_type stride_n;
+ count[n] = 0;
+ stride_n = GFC_DESCRIPTOR_STRIDE (a, n);
+ stride[n] = stride_n * type_size;
+ extent[n] = GFC_DESCRIPTOR_EXTENT (a, n);
+ if (extent[n] <= 0)
+ {
+ packed = true;
+ num_elems = 0;
+ break;
+ }
+ if (num_elems != stride_n)
+ packed = false;
+
+ num_elems *= extent[n];
+ }
+ ssize = num_elems * type_size;
+ prepare_collective_subroutine (ssize); /* Broadcaset barrier 2. */
+ other_shared_ptr = get_obj_ptr (source_image - 1);
+ if (packed)
+ memcpy (a->base_addr, other_shared_ptr, ssize);
+ else
+ {
+ char *src = other_shared_ptr;
+ char * restrict dest = (char *) a->base_addr;
+ index_type stride0 = stride[0];
+
+ for (index_type n = 0; n < dim; n++)
+ count[n] = 0;
+
+ while (dest)
+ {
+ memcpy (dest, src, type_size);
+ src += span;
+ dest += stride0;
+ count[0] ++;
+ for (index_type n = 0; count[n] == extent[n] ;)
+ {
+ /* When we get to the end of a dimension, reset it and increment
+ the next dimension. */
+ count[n] = 0;
+ dest -= stride[n] * extent[n];
+ n++;
+ if (n == dim)
+ {
+ dest = NULL;
+ break;
+ }
+ else
+ {
+ count[n]++;
+ dest += stride[n];
+ }
+ }
+ }
+ }
+ }
+ finish_collective_subroutine (ci); /* Broadcast barrier 3. */
+}
+
+#endif
diff --git a/libgfortran/nca/collective_subroutine.h b/libgfortran/nca/collective_subroutine.h
new file mode 100644
index 0000000..6147dd6
--- /dev/null
+++ b/libgfortran/nca/collective_subroutine.h
@@ -0,0 +1,44 @@
+
+#ifndef COLLECTIVE_SUBROUTINE_HDR
+#define COLLECTIVE_SUBROUTINE_HDR
+
+#include "shared_memory.h"
+
+typedef struct collsub_iface_shared
+{
+ size_t curr_size;
+ shared_mem_ptr collsub_buf;
+ pthread_barrier_t barrier;
+ pthread_mutex_t mutex;
+} collsub_iface_shared;
+
+typedef struct collsub_iface
+{
+ collsub_iface_shared *s;
+ allocator *a;
+ shared_memory *sm;
+} collsub_iface;
+
+void collsub_broadcast_scalar (collsub_iface *, void *, index_type, int);
+internal_proto (collsub_broadcast_scalar);
+
+void collsub_broadcast_array (collsub_iface *, gfc_array_char *, int);
+internal_proto (collsub_broadcast_array);
+
+void collsub_reduce_array (collsub_iface *, gfc_array_char *, int *,
+ void (*) (void *, void *));
+internal_proto (collsub_reduce_array);
+
+void collsub_reduce_scalar (collsub_iface *, void *, index_type, int *,
+ void (*) (void *, void *));
+internal_proto (collsub_reduce_scalar);
+
+void collsub_sync (collsub_iface *);
+internal_proto (collsub_sync);
+
+void collsub_iface_init (collsub_iface *, alloc_iface *, shared_memory *);
+internal_proto (collsub_iface_init);
+
+void * get_collsub_buf (collsub_iface *ci, size_t size);
+internal_proto (get_collsub_buf);
+#endif
diff --git a/libgfortran/nca/hashmap.c b/libgfortran/nca/hashmap.c
new file mode 100644
index 0000000..61f5487
--- /dev/null
+++ b/libgfortran/nca/hashmap.c
@@ -0,0 +1,447 @@
+/* Copyright (C) 2020 Free Software Foundation, Inc.
+ Contributed by Nicolas Koenig
+
+This file is part of the GNU Fortran Native Coarray Library (libnca).
+
+Libnca is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+Libnca is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+<http://www.gnu.org/licenses/>. */
+
+#include "libgfortran.h"
+#include "hashmap.h"
+#include <string.h>
+
+#define INITIAL_BITNUM (5)
+#define INITIAL_SIZE (1<<INITIAL_BITNUM)
+#define CRITICAL_LOOKAHEAD (16)
+
+static ssize_t n_ent;
+
+typedef struct {
+ memid id;
+ shared_mem_ptr p; /* If p == SHMPTR_NULL, the entry is empty. */
+ size_t s;
+ int max_lookahead;
+ int refcnt;
+} hashmap_entry;
+
+static ssize_t
+num_entries (hashmap_entry *data, size_t size)
+{
+ ssize_t i;
+ ssize_t ret = 0;
+ for (i = 0; i < size; i++)
+ {
+ if (!SHMPTR_IS_NULL (data[i].p))
+ ret ++;
+ }
+ return ret;
+}
+
+/* 64 bit to 64 bit hash function. */
+
+/*
+static inline uint64_t
+hash (uint64_t x)
+{
+ return x * 11400714819323198485lu;
+}
+*/
+
+#define ASSERT_HM(hm, cond) assert_hashmap(hm, cond, #cond)
+
+static void
+assert_hashmap(hashmap *hm, bool asserted, const char *cond)
+{
+ if (!asserted)
+ {
+ dprintf(2, cond);
+ dump_hm(hm);
+ }
+ assert(asserted);
+}
+
+static inline uint64_t
+hash (uint64_t key)
+{
+ key ^= (key >> 30);
+ key *= 0xbf58476d1ce4e5b9ul;
+ key ^= (key >> 27);
+ key *= 0x94d049bb133111ebul;
+ key ^= (key >> 31);
+
+ return key;
+}
+
+/* Gets a pointer to the current data in the hashmap. */
+static inline hashmap_entry *
+get_data(hashmap *hm)
+{
+ return SHMPTR_AS (hashmap_entry *, hm->s->data, hm->sm);
+}
+
+/* Generate mask from current number of bits. */
+
+static inline intptr_t
+gen_mask (hashmap *hm)
+{
+ return (1 << hm->s->bitnum) - 1;
+}
+
+/* Add with wrap-around at hashmap size. */
+
+static inline size_t
+hmiadd (hashmap *hm, size_t s, ssize_t o) {
+ return (s + o) & gen_mask (hm);
+}
+
+/* Get the expected offset for entry id. */
+
+static inline ssize_t
+get_expected_offset (hashmap *hm, memid id)
+{
+ return hash(id) >> (PTR_BITS - hm->s->bitnum);
+}
+
+/* Initialize the hashmap. */
+
+void
+hashmap_init (hashmap *hm, hashmap_shared *hs, allocator *a,
+ shared_memory *mem)
+{
+ hashmap_entry *data;
+ hm->s = hs;
+ hm->sm = mem;
+ hm->s->data = shared_malloc (a, INITIAL_SIZE * sizeof(hashmap_entry));
+ data = get_data (hm);
+ memset(data, '\0', INITIAL_SIZE*sizeof(hashmap_entry));
+
+ for (int i = 0; i < INITIAL_SIZE; i++)
+ data[i].p = SHMPTR_NULL;
+
+ hm->s->size = INITIAL_SIZE;
+ hm->s->bitnum = INITIAL_BITNUM;
+ hm->a = a;
+}
+
+/* This checks if the entry id exists in that range the range between
+ the expected position and the maximum lookahead. */
+
+static ssize_t
+scan_inside_lookahead (hashmap *hm, ssize_t expected_off, memid id)
+{
+ ssize_t lookahead;
+ hashmap_entry *data;
+
+ data = get_data (hm);
+ lookahead = data[expected_off].max_lookahead;
+ ASSERT_HM (hm, lookahead < CRITICAL_LOOKAHEAD);
+
+ for (int i = 0; i <= lookahead; i++) /* For performance, this could
+ iterate backwards. */
+ if (data[hmiadd (hm, expected_off, i)].id == id)
+ return hmiadd (hm, expected_off, i);
+
+ return -1;
+}
+
+/* Scan for the next empty slot we can use. Returns offset relative
+ to the expected position. */
+
+static ssize_t
+scan_empty (hashmap *hm, ssize_t expected_off, memid id)
+{
+ hashmap_entry *data;
+
+ data = get_data(hm);
+ for (int i = 0; i < CRITICAL_LOOKAHEAD; i++)
+ if (SHMPTR_IS_NULL (data[hmiadd (hm, expected_off, i)].p))
+ return i;
+
+ return -1;
+}
+
+/* Search the hashmap for id. */
+
+hashmap_search_result
+hashmap_get (hashmap *hm, memid id)
+{
+ hashmap_search_result ret;
+ hashmap_entry *data;
+ size_t expected_offset;
+ ssize_t res;
+
+ data = get_data (hm);
+ expected_offset = get_expected_offset (hm, id);
+ res = scan_inside_lookahead (hm, expected_offset, id);
+
+ if (res != -1)
+ ret = ((hashmap_search_result)
+ { .p = data[res].p, .size=data[res].s, .res_offset = res });
+ else
+ ret.p = SHMPTR_NULL;
+
+ return ret;
+}
+
+/* Return size of a hashmap search result. */
+size_t
+hm_search_result_size (hashmap_search_result *res)
+{
+ return res->size;
+}
+
+/* Return pointer of a hashmap search result. */
+
+shared_mem_ptr
+hm_search_result_ptr (hashmap_search_result *res)
+{
+ return res->p;
+}
+
+/* Return pointer of a hashmap search result. */
+
+bool
+hm_search_result_contains (hashmap_search_result *res)
+{
+ return !SHMPTR_IS_NULL(res->p);
+}
+
+/* Enlarge hashmap memory. */
+
+static void
+enlarge_hashmap_mem (hashmap *hm, hashmap_entry **data, bool f)
+{
+ shared_mem_ptr old_data_p;
+ size_t old_size;
+
+ old_data_p = hm->s->data;
+ old_size = hm->s->size;
+
+ hm->s->data = shared_malloc (hm->a, (hm->s->size *= 2)*sizeof(hashmap_entry));
+ fprintf (stderr,"enlarge_hashmap_mem: %ld\n", hm->s->data.offset);
+ hm->s->bitnum++;
+
+ *data = get_data(hm);
+ for (size_t i = 0; i < hm->s->size; i++)
+ (*data)[i] = ((hashmap_entry) { .id = 0, .p = SHMPTR_NULL, .s=0,
+ .max_lookahead = 0, .refcnt=0 });
+
+ if (f)
+ shared_free(hm->a, old_data_p, old_size);
+}
+
+/* Resize hashmap. */
+
+static void
+resize_hm (hashmap *hm, hashmap_entry **data)
+{
+ shared_mem_ptr old_data_p;
+ hashmap_entry *old_data, *new_data;
+ size_t old_size;
+ ssize_t new_offset, inital_index, new_index;
+ memid id;
+ ssize_t max_lookahead;
+ ssize_t old_count, new_count;
+
+ /* old_data points to the old block containing the hashmap. We
+ redistribute the data from there into the new block. */
+
+ old_data_p = hm->s->data;
+ old_data = *data;
+ old_size = hm->s->size;
+ old_count = num_entries (old_data, old_size);
+
+ fprintf(stderr, "Occupancy at resize: %f\n", ((double) old_count)/old_size);
+
+ //fprintf (stderr,"\n====== Resizing hashmap =========\n\nOld map:\n\n");
+ //dump_hm (hm);
+ enlarge_hashmap_mem (hm, &new_data, false);
+ //fprintf (stderr,"old_data: %p new_data: %p\n", old_data, new_data);
+ retry_resize:
+ for (size_t i = 0; i < old_size; i++)
+ {
+ if (SHMPTR_IS_NULL (old_data[i].p))
+ continue;
+
+ id = old_data[i].id;
+ inital_index = get_expected_offset (hm, id);
+ new_offset = scan_empty (hm, inital_index, id);
+
+ /* If we didn't find a free slot, just resize the hashmap
+ again. */
+ if (new_offset == -1)
+ {
+ enlarge_hashmap_mem (hm, &new_data, true);
+ //fprintf (stderr,"\n====== AGAIN Resizing hashmap =========\n\n");
+ //fprintf (stderr,"old_data: %p new_data %p\n", old_data, new_data);
+ goto retry_resize; /* Sue me. */
+ }
+
+ ASSERT_HM (hm, new_offset < CRITICAL_LOOKAHEAD);
+ new_index = hmiadd (hm, inital_index, new_offset);
+ max_lookahead = new_data[inital_index].max_lookahead;
+ new_data[inital_index].max_lookahead
+ = new_offset > max_lookahead ? new_offset : max_lookahead;
+
+ new_data[new_index] = ((hashmap_entry) {.id = id, .p = old_data[i].p,
+ .s = old_data[i].s,
+ .max_lookahead = new_data[new_index].max_lookahead,
+ .refcnt = old_data[i].refcnt});
+ }
+ new_count = num_entries (new_data, hm->s->size);
+ //fprintf (stderr,"Number of elements: %ld to %ld\n", old_count, new_count);
+ //fprintf (stderr,"============ After resizing: =======\n\n");
+ //dump_hm (hm);
+
+ shared_free (hm->a, old_data_p, old_size);
+ *data = new_data;
+}
+
+/* Set an entry in the hashmap. */
+
+void
+hashmap_set (hashmap *hm, memid id, hashmap_search_result *hsr,
+ shared_mem_ptr p, size_t size)
+{
+ hashmap_entry *data;
+ ssize_t expected_offset, lookahead;
+ ssize_t empty_offset;
+ ssize_t delta;
+
+ // //fprintf (stderr,"hashmap_set: id = %-16p\n", (void *) id);
+ data = get_data(hm);
+
+ if (hsr) {
+ data[hsr->res_offset].s = size;
+ data[hsr->res_offset].p = p;
+ return;
+ }
+
+ expected_offset = get_expected_offset (hm, id);
+ while ((delta = scan_empty (hm, expected_offset, id)) == -1)
+ {
+ resize_hm (hm, &data);
+ expected_offset = get_expected_offset (hm, id);
+ }
+
+ empty_offset = hmiadd (hm, expected_offset, delta);
+ lookahead = data[expected_offset].max_lookahead;
+ data[expected_offset].max_lookahead = delta > lookahead ? delta : lookahead;
+ data[empty_offset] = ((hashmap_entry) {.id = id, .p = p, .s = size,
+ .max_lookahead = data[empty_offset].max_lookahead,
+ .refcnt = 1});
+
+ n_ent ++;
+ fprintf (stderr,"hashmap_set: Setting %p at %p, n_ent = %ld\n", (void *) id, data + empty_offset,
+ n_ent);
+ // dump_hm (hm);
+ // fprintf(stderr, "--------------------------------------------------\n");
+ /* TODO: Shouldn't reset refcnt, but this doesn't matter at the
+ moment because of the way the function is used. */
+}
+
+/* Change the refcount of a hashmap entry. */
+
+static int
+hashmap_change_refcnt (hashmap *hm, memid id, hashmap_search_result *res,
+ int delta)
+{
+ hashmap_entry *data;
+ hashmap_search_result r;
+ hashmap_search_result *pr;
+ int ret;
+ hashmap_entry *entry;
+
+ data = get_data (hm);
+
+ if (res)
+ pr = res;
+ else
+ {
+ r = hashmap_get (hm, id);
+ pr = &r;
+ }
+
+ entry = &data[pr->res_offset];
+ ret = (entry->refcnt += delta);
+ if (ret == 0)
+ {
+ n_ent --;
+ //fprintf (stderr, "hashmap_change_refcnt: removing %p at %p, n_ent = %ld\n",
+ // (void *) id, entry, n_ent);
+ entry->id = 0;
+ entry->p = SHMPTR_NULL;
+ entry->s = 0;
+ }
+
+ return ret;
+}
+
+/* Increase hashmap entry refcount. */
+
+void
+hashmap_inc (hashmap *hm, memid id, hashmap_search_result * res)
+{
+ int ret;
+ ret = hashmap_change_refcnt (hm, id, res, 1);
+ ASSERT_HM (hm, ret > 0);
+}
+
+/* Decrease hashmap entry refcount. */
+
+int
+hashmap_dec (hashmap *hm, memid id, hashmap_search_result * res)
+{
+ int ret;
+ ret = hashmap_change_refcnt (hm, id, res, -1);
+ ASSERT_HM (hm, ret >= 0);
+ return ret;
+}
+
+#define PE(str, ...) fprintf(stderr, INDENT str, ##__VA_ARGS__)
+#define INDENT ""
+
+void
+dump_hm(hashmap *hm) {
+ hashmap_entry *data;
+ size_t exp;
+ size_t occ_num = 0;
+ PE("h %p (size: %lu, bitnum: %d)\n", hm, hm->s->size, hm->s->bitnum);
+ data = get_data (hm);
+ fprintf (stderr,"offset = %lx data = %p\n", (unsigned long) hm->s->data.offset, data);
+
+#undef INDENT
+#define INDENT " "
+ for (size_t i = 0; i < hm->s->size; i++) {
+ exp = get_expected_offset(hm, data[i].id);
+ if (!SHMPTR_IS_NULL(data[i].p)) {
+ PE("%2lu. (exp: %2lu w la %d) id %#-16lx p %#-14lx s %-7lu -- la %u ref %u %-16p\n",
+ i, exp, data[exp].max_lookahead, data[i].id, data[i].p.offset, data[i].s,
+ data[i].max_lookahead, data[i].refcnt, data + i);
+ occ_num++;
+ }
+ else
+ PE("%2lu. empty -- la %u %p\n", i, data[i].max_lookahead,
+ data + i);
+
+ }
+#undef INDENT
+#define INDENT ""
+ PE("occupancy: %lu %f\n", occ_num, ((double) occ_num)/hm->s->size);
+}
diff --git a/libgfortran/nca/hashmap.h b/libgfortran/nca/hashmap.h
new file mode 100644
index 0000000..4d999e3
--- /dev/null
+++ b/libgfortran/nca/hashmap.h
@@ -0,0 +1,70 @@
+#ifndef HASHMAP_H
+
+#include "shared_memory.h"
+#include "allocator.h"
+
+#include <stdint.h>
+#include <stddef.h>
+
+
+/* Data structures and variables:
+
+ memid is a unique identifier for the coarray, the address of its
+ descriptor (which is unique in the program). */
+typedef intptr_t memid;
+
+typedef struct {
+ shared_mem_ptr data;
+ size_t size;
+ int bitnum;
+} hashmap_shared;
+
+typedef struct hashmap
+{
+ hashmap_shared *s;
+ shared_memory *sm;
+ allocator *a;
+} hashmap;
+
+typedef struct {
+ shared_mem_ptr p;
+ size_t size;
+ ssize_t res_offset;
+} hashmap_search_result;
+
+void hashmap_init (hashmap *, hashmap_shared *, allocator *a, shared_memory *);
+
+/* Look up memid in the hashmap. The result can be inspected via the
+ hm_search_result_* functions. */
+
+hashmap_search_result hashmap_get (hashmap *, memid);
+
+/* Given a search result, returns the size. */
+size_t hm_search_result_size (hashmap_search_result *);
+
+/* Given a search result, returns the pointer. */
+shared_mem_ptr hm_search_result_ptr (hashmap_search_result *);
+
+/* Given a search result, returns whether something was found. */
+bool hm_search_result_contains (hashmap_search_result *);
+
+/* Sets the hashmap entry for memid to shared_mem_ptr and
+ size_t. Optionally, if a hashmap_search_result is supplied, it is
+ used to make the lookup faster. */
+
+void hashmap_set (hashmap *, memid, hashmap_search_result *, shared_mem_ptr p,
+ size_t);
+
+/* Increments the hashmap entry for memid. Optionally, if a
+ hashmap_search_result is supplied, it is used to make the lookup
+ faster. */
+
+void hashmap_inc (hashmap *, memid, hashmap_search_result *);
+
+/* Same, but decrement. */
+int hashmap_dec (hashmap *, memid, hashmap_search_result *);
+
+void dump_hm (hashmap *hm);
+
+#define HASHMAP_H
+#endif
diff --git a/libgfortran/nca/libcoarraynative.h b/libgfortran/nca/libcoarraynative.h
new file mode 100644
index 0000000..507de0c
--- /dev/null
+++ b/libgfortran/nca/libcoarraynative.h
@@ -0,0 +1,103 @@
+/* Copyright (C) 2019-2020 Free Software Foundation, Inc.
+ Contributed by Nicolas Koenig
+
+This file is part of the GNU Fortran Native Coarray Library (libnca).
+
+Libnca is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+Libnca is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+<http://www.gnu.org/licenses/>. */
+
+#ifndef LIBGFOR_H
+#error "Include libgfortran.h before libcoarraynative.h"
+#endif
+
+#ifndef COARRAY_NATIVE_HDR
+#define COARRAY_NATIVE_HDR
+
+#include "libgfortran.h"
+
+#include <sys/types.h>
+#include <stdint.h>
+#include <stdio.h>
+
+
+/* This is to create a _nca_gfortrani_ prefix for all variables and
+ function used only by nca. */
+#if 0
+#define NUM_ADDR_BITS (8 * sizeof (int *))
+#endif
+
+#define DEBUG_NATIVE_COARRAY 1
+
+#ifdef DEBUG_NATIVE_COARRAY
+#define DEBUG_PRINTF(...) dprintf (2,__VA_ARGS__)
+#else
+#define DEBUG_PRINTF(...) do {} while(0)
+#endif
+
+#include "allocator.h"
+#include "hashmap.h"
+#include "sync.h"
+#include "lock.h"
+#include "collective_subroutine.h"
+
+typedef struct {
+ pthread_barrier_t barrier;
+ int maximg;
+} ipcollsub;
+
+typedef enum {
+ IMAGE_UNKNOWN = 0,
+ IMAGE_OK,
+ IMAGE_FAILED
+} image_status;
+
+typedef struct {
+ image_status status;
+ pid_t pid;
+} image_tracker;
+
+typedef struct {
+ int has_failed_image;
+ image_tracker images[];
+} master;
+
+typedef struct {
+ int image_num;
+ master *m;
+} image;
+
+extern image this_image;
+
+typedef struct {
+ int num_images;
+ shared_memory sm;
+ alloc_iface ai;
+ collsub_iface ci;
+ sync_iface si;
+} nca_local_data;
+
+extern nca_local_data *local;
+internal_proto (local);
+void ensure_initialization(void);
+internal_proto(ensure_initialization);
+
+void nca_master(void (*)(void));
+export_proto (nca_master);
+
+#endif
diff --git a/libgfortran/nca/lock.h b/libgfortran/nca/lock.h
new file mode 100644
index 0000000..4697395
--- /dev/null
+++ b/libgfortran/nca/lock.h
@@ -0,0 +1,37 @@
+/* Copyright (C) 2019-2020 Free Software Foundation, Inc.
+ Contributed by Nicolas Koenig
+
+This file is part of the GNU Fortran Native Coarray Library (libnca).
+
+Libnca is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+Libnca is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+<http://www.gnu.org/licenses/>. */
+
+
+#ifndef COARRAY_LOCK_HDR
+#define COARRAY_LOCK_HDR
+
+#include <pthread.h>
+
+typedef struct {
+ int owner;
+ int initialized;
+ pthread_mutex_t arr[];
+} lock_array;
+
+#endif
diff --git a/libgfortran/nca/shared_memory.c b/libgfortran/nca/shared_memory.c
new file mode 100644
index 0000000..bc3093d
--- /dev/null
+++ b/libgfortran/nca/shared_memory.c
@@ -0,0 +1,221 @@
+/* Copyright (C) 2020 Free Software Foundation, Inc.
+ Contributed by Nicolas Koenig
+
+This file is part of the GNU Fortran Native Coarray Library (libnca).
+
+Libnca is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+Libnca is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+<http://www.gnu.org/licenses/>. */
+
+#include "libgfortran.h"
+#include "libcoarraynative.h"
+#include "util.h"
+#include <sys/mman.h>
+#include <unistd.h>
+#include <string.h>
+
+#include "shared_memory.h"
+
+/* This implements shared memory based on POSIX mmap. We start with
+ memory block of the size of the global shared memory data, rounded
+ up to one pagesize, and enlarge as needed.
+
+ We address the memory via a shared_memory_ptr, which is an offset into
+ the shared memory block. The metadata is situated at offset 0.
+
+ In order to be able to resize the memory and to keep pointers
+ valid, we keep the old mapping around, so the memory is actually
+ visible several times to the process. Thus, pointers returned by
+ shared_memory_get_mem_with_alignment remain valid even when
+ resizing. */
+
+/* Global metadata for shared memory, always kept at offset 0. */
+
+typedef struct
+{
+ size_t size;
+ size_t used;
+ int fd;
+} global_shared_memory_meta;
+
+/* Type realization for opaque type shared_memory. */
+
+typedef struct shared_memory_act
+{
+ global_shared_memory_meta *meta;
+ void *header;
+ size_t last_seen_size;
+
+ /* We don't need to free these. We probably also don't need to keep
+ track of them, but it is much more future proof if we do. */
+
+ size_t num_local_allocs;
+
+ struct local_alloc {
+ void *base;
+ size_t size;
+ } allocs[];
+
+} shared_memory_act;
+
+/* Convenience wrapper for mmap. */
+
+static inline void *
+map_memory (int fd, size_t size, off_t offset)
+{
+ void *ret = mmap (NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, offset);
+ if (ret == MAP_FAILED)
+ {
+ perror("mmap failed");
+ exit(1);
+ }
+ return ret;
+}
+
+/* Returns the size of shared_memory_act. */
+
+static inline size_t
+get_shared_memory_act_size (int nallocs)
+{
+ return sizeof(shared_memory_act) + nallocs*sizeof(struct local_alloc);
+}
+
+/* When the shared memory block is enlarged, we need to map it into
+ virtual memory again. */
+
+static inline shared_memory_act *
+new_base_mapping (shared_memory_act *mem)
+{
+ shared_memory_act *newmem;
+ /* We need another entry in the alloc table. */
+ mem->num_local_allocs++;
+ newmem = realloc (mem, get_shared_memory_act_size (mem->num_local_allocs));
+ newmem->allocs[newmem->num_local_allocs - 1]
+ = ((struct local_alloc)
+ {.base = map_memory (newmem->meta->fd, newmem->meta->size, 0),
+ .size = newmem->meta->size});
+ newmem->last_seen_size = newmem->meta->size;
+ return newmem;
+}
+
+/* Return the most recently allocated base pointer. */
+
+static inline void *
+last_base (shared_memory_act *mem)
+{
+ return mem->allocs[mem->num_local_allocs - 1].base;
+}
+
+/* Get a pointer into the shared memory block with alignemnt
+ (works similar to sbrk). */
+
+shared_mem_ptr
+shared_memory_get_mem_with_alignment (shared_memory_act **pmem, size_t size,
+ size_t align)
+{
+ shared_memory_act *mem = *pmem;
+ size_t new_size;
+ size_t orig_used;
+
+ /* Offset into memory block with alignment. */
+ size_t used_wa = alignto (mem->meta->used, align);
+
+ if (used_wa + size <= mem->meta->size)
+ {
+ memset(last_base(mem) + mem->meta->used, 0xCA, used_wa - mem->meta->used);
+ memset(last_base(mem) + used_wa, 0x42, size);
+ mem->meta->used = used_wa + size;
+
+ DEBUG_PRINTF ("Shared Memory: New memory of size %#lx requested, returned %#lx\n", size, used_wa);
+ return (shared_mem_ptr) {.offset = used_wa};
+ }
+
+ /* We need to enlarge the memory segment. Double the size if that
+ is big enough, otherwise get what's needed. */
+
+ if (mem->meta->size * 2 < used_wa + size)
+ new_size = mem->meta->size * 2;
+ else
+ new_size = round_to_pagesize (used_wa + size);
+
+ orig_used = mem->meta->used;
+ mem->meta->size = new_size;
+ mem->meta->used = used_wa + size;
+ ftruncate (mem->meta->fd, mem->meta->size);
+ /* This also sets the new base pointer where the shared memory
+ can be found in the address space. */
+
+ mem = new_base_mapping (mem);
+
+ *pmem = mem;
+ assert(used_wa != 0);
+
+ dprintf(2, "Shared Memory: New memory of size %#lx requested, returned %#lx\n", size, used_wa);
+ memset(last_base(mem) + orig_used, 0xCA, used_wa - orig_used);
+ memset(last_base(mem) + used_wa, 0x42, size);
+
+ return (shared_mem_ptr) {.offset = used_wa};
+}
+
+/* If another image changed the size, update the size accordingly. */
+
+void
+shared_memory_prepare (shared_memory_act **pmem)
+{
+ shared_memory_act *mem = *pmem;
+ if (mem->meta->size == mem->last_seen_size)
+ return;
+ mem = new_base_mapping(mem);
+ *pmem = mem;
+}
+
+/* Initialize the memory with one page, the shared metadata of the
+ shared memory is stored at the beginning. */
+
+void
+shared_memory_init (shared_memory_act **pmem)
+{
+ shared_memory_act *mem;
+ int fd;
+ size_t initial_size = round_to_pagesize (sizeof (global_shared_memory_meta));
+
+ mem = malloc (get_shared_memory_act_size(1));
+ fd = get_shmem_fd();
+
+ ftruncate(fd, initial_size);
+ mem->meta = map_memory (fd, initial_size, 0);
+ *mem->meta = ((global_shared_memory_meta) {.size = initial_size,
+ .used = sizeof(global_shared_memory_meta),
+ .fd = fd});
+ mem->last_seen_size = initial_size;
+ mem->num_local_allocs = 1;
+ mem->allocs[0] = ((struct local_alloc) {.base = mem->meta,
+ .size = initial_size});
+
+ *pmem = mem;
+}
+
+/* Convert a shared memory pointer (i.e. an offset into the shared
+ memory block) to a pointer. */
+
+void *
+shared_mem_ptr_to_void_ptr(shared_memory_act **pmem, shared_mem_ptr smp)
+{
+ return last_base(*pmem) + smp.offset;
+}
+
diff --git a/libgfortran/nca/shared_memory.h b/libgfortran/nca/shared_memory.h
new file mode 100644
index 0000000..4adc104
--- /dev/null
+++ b/libgfortran/nca/shared_memory.h
@@ -0,0 +1,78 @@
+/* Copyright (C) 2020 Free Software Foundation, Inc.
+ Contributed by Nicolas Koenig
+
+This file is part of the GNU Fortran Native Coarray Library (libnca).
+
+Libnca is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+Libnca is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+<http://www.gnu.org/licenses/>. */
+
+#ifndef SHARED_MEMORY_H
+#include <stdbool.h>
+#include <stdint.h>
+#include <stddef.h>
+#include <sys/types.h>
+#include <pthread.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <assert.h>
+#include <limits.h>
+
+/* A struct to serve as an opaque shared memory object. */
+
+struct shared_memory_act;
+typedef struct shared_memory_act * shared_memory;
+
+#define SHMPTR_NULL ((shared_mem_ptr) {.offset = -1})
+#define SHMPTR_IS_NULL(x) (x.offset == -1)
+
+#define SHMPTR_DEREF(x, s, sm) \
+ ((x) = *(__typeof(x) *) shared_mem_ptr_to_void_ptr (sm, s);
+#define SHMPTR_AS(t, s, sm) ((t) shared_mem_ptr_to_void_ptr(sm, s))
+#define SHMPTR_SET(v, s, sm) (v = SHMPTR_AS(__typeof(v), s, sm))
+#define SHMPTR_EQUALS(s1, s2) (s1.offset == s2.offset)
+
+#define SHARED_MEMORY_RAW_ALLOC(mem, t, n) \
+ shared_memory_get_mem_with_alignment(mem, sizeof(t)*n, __alignof__(t))
+
+#define SHARED_MEMORY_RAW_ALLOC_PTR(mem, t) \
+ SHMPTR_AS (t *, SHARED_MEMORY_RAW_ALLOC (mem, t, 1), mem)
+
+/* A shared-memory pointer is implemented as an offset into the shared
+ memory region. */
+
+typedef struct shared_mem_ptr
+{
+ ssize_t offset;
+} shared_mem_ptr;
+
+void shared_memory_init (shared_memory *);
+internal_proto (shared_memory_init);
+
+void shared_memory_prepare (shared_memory *);
+internal_proto (shared_memory_prepare);
+
+shared_mem_ptr shared_memory_get_mem_with_alignment (shared_memory *mem,
+ size_t size, size_t align);
+internal_proto (shared_memory_get_mem_with_alignment);
+
+void *shared_mem_ptr_to_void_ptr (shared_memory *, shared_mem_ptr);
+internal_proto (shared_mem_ptr_to_void_ptr);
+
+#define SHARED_MEMORY_H
+#endif
diff --git a/libgfortran/nca/sync.c b/libgfortran/nca/sync.c
new file mode 100644
index 0000000..6d7f7ca
--- /dev/null
+++ b/libgfortran/nca/sync.c
@@ -0,0 +1,156 @@
+/* Copyright (C) 2019-2020 Free Software Foundation, Inc.
+ Contributed by Nicolas Koenig
+
+This file is part of the GNU Fortran Native Coarray Library (libnca).
+
+Libnca is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+Libnca is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+<http://www.gnu.org/licenses/>. */
+
+
+#include <string.h>
+
+#include "libgfortran.h"
+#include "libcoarraynative.h"
+#include "sync.h"
+#include "util.h"
+
+static void
+sync_all_init (pthread_barrier_t *b)
+{
+ pthread_barrierattr_t battr;
+ pthread_barrierattr_init (&battr);
+ pthread_barrierattr_setpshared (&battr, PTHREAD_PROCESS_SHARED);
+ pthread_barrier_init (b, &battr, local->num_images);
+ pthread_barrierattr_destroy (&battr);
+}
+
+static inline void
+lock_table (sync_iface *si)
+{
+ pthread_mutex_lock (&si->cis->table_lock);
+}
+
+static inline void
+unlock_table (sync_iface *si)
+{
+ pthread_mutex_unlock (&si->cis->table_lock);
+}
+
+static inline void
+wait_table_cond (sync_iface *si, pthread_cond_t *cond)
+{
+ pthread_cond_wait (cond,&si->cis->table_lock);
+}
+
+static int *
+get_locked_table(sync_iface *si) { // The initialization of the table has to
+ // be delayed, since we might not know the
+ // number of images when the library is
+ // initialized
+ lock_table(si);
+ return si->table;
+ /*
+ if (si->table)
+ return si->table;
+ else if (!SHMPTR_IS_NULL(si->cis->table))
+ {
+ si->table = SHMPTR_AS(int *, si->cis->table, si->sm);
+ si->triggers = SHMPTR_AS(pthread_cond_t *, si->cis->triggers, si->sm);
+ return si->table;
+ }
+
+ si->cis->table =
+ shared_malloc(si->a, sizeof(int)*local->num_images * local->num_images);
+ si->cis->triggers =
+ shared_malloc(si->a, sizeof(int)*local->num_images);
+
+ si->table = SHMPTR_AS(int *, si->cis->table, si->sm);
+ si->triggers = SHMPTR_AS(pthread_cond_t *, si->cis->triggers, si->sm);
+
+ for (int i = 0; i < local->num_images; i++)
+ initialize_shared_condition (&si->triggers[i]);
+
+ return si->table;
+ */
+}
+
+void
+sync_iface_init (sync_iface *si, alloc_iface *ai, shared_memory *sm)
+{
+ si->cis = SHMPTR_AS (sync_iface_shared *,
+ shared_malloc (get_allocator(ai),
+ sizeof(collsub_iface_shared)),
+ sm);
+ DEBUG_PRINTF ("%s: num_images is %d\n", __PRETTY_FUNCTION__, local->num_images);
+
+ sync_all_init (&si->cis->sync_all);
+ initialize_shared_mutex (&si->cis->table_lock);
+ si->sm = sm;
+ si->a = get_allocator(ai);
+
+ si->cis->table =
+ shared_malloc(si->a, sizeof(int)*local->num_images * local->num_images);
+ si->cis->triggers =
+ shared_malloc(si->a, sizeof(pthread_cond_t)*local->num_images);
+
+ si->table = SHMPTR_AS(int *, si->cis->table, si->sm);
+ si->triggers = SHMPTR_AS(pthread_cond_t *, si->cis->triggers, si->sm);
+
+ for (int i = 0; i < local->num_images; i++)
+ initialize_shared_condition (&si->triggers[i]);
+}
+
+void
+sync_table (sync_iface *si, int *images, size_t size)
+{
+#ifdef DEBUG_NATIVE_COARRAY
+ dprintf (2, "Image %d waiting for these %ld images: ", this_image.image_num + 1, size);
+ for (int d_i = 0; d_i < size; d_i++)
+ dprintf (2, "%d ", images[d_i]);
+ dprintf (2, "\n");
+#endif
+ size_t i;
+ int done;
+ int *table = get_locked_table(si);
+ for (i = 0; i < size; i++)
+ {
+ table[images[i] - 1 + local->num_images*this_image.image_num]++;
+ pthread_cond_signal (&si->triggers[images[i] - 1]);
+ }
+ for (;;)
+ {
+ done = 1;
+ for (i = 0; i < size; i++)
+ done &= si->table[images[i] - 1 + this_image.image_num*local->num_images]
+ == si->table[this_image.image_num + (images[i] - 1)*local->num_images];
+ if (done)
+ break;
+ wait_table_cond (si, &si->triggers[this_image.image_num]);
+ }
+ unlock_table (si);
+}
+
+void
+sync_all (sync_iface *si)
+{
+
+ DEBUG_PRINTF("Syncing all\n");
+
+ pthread_barrier_wait (&si->cis->sync_all);
+}
diff --git a/libgfortran/nca/sync.h b/libgfortran/nca/sync.h
new file mode 100644
index 0000000..4b49441
--- /dev/null
+++ b/libgfortran/nca/sync.h
@@ -0,0 +1,56 @@
+/* Copyright (C) 2019-2020 Free Software Foundation, Inc.
+ Contributed by Nicolas Koenig
+
+This file is part of the GNU Fortran Native Coarray Library (libnca).
+
+Libnca is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+Libnca is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+<http://www.gnu.org/licenses/>. */
+
+#ifndef IPSYNC_HDR
+#define IPSYNC_HDR
+
+#include "shared_memory.h"
+#include "alloc.h"
+#include<pthread.h>
+
+typedef struct {
+ pthread_barrier_t sync_all;
+ pthread_mutex_t table_lock;
+ shared_mem_ptr table;
+ shared_mem_ptr triggers;
+} sync_iface_shared;
+
+typedef struct {
+ sync_iface_shared *cis;
+ shared_memory *sm;
+ allocator *a;
+ int *table; // we can cache the table and the trigger pointers here
+ pthread_cond_t *triggers;
+} sync_iface;
+
+void sync_iface_init (sync_iface *, alloc_iface *, shared_memory *);
+internal_proto (sync_iface_init);
+
+void sync_all (sync_iface *);
+internal_proto (sync_all);
+
+void sync_table (sync_iface *, int *, size_t);
+internal_proto (sync_table);
+
+#endif
diff --git a/libgfortran/nca/util.c b/libgfortran/nca/util.c
new file mode 100644
index 0000000..5805218
--- /dev/null
+++ b/libgfortran/nca/util.c
@@ -0,0 +1,197 @@
+#include "libgfortran.h"
+#include "util.h"
+#include <string.h>
+#include <stddef.h>
+#include <stdlib.h>
+#include <limits.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+
+#define MEMOBJ_NAME "/gfortran_coarray_memfd"
+
+size_t
+alignto(size_t size, size_t align) {
+ return align*((size + align - 1)/align);
+}
+
+size_t pagesize;
+
+size_t
+round_to_pagesize(size_t s) {
+ return alignto(s, pagesize);
+}
+
+size_t
+next_power_of_two(size_t size) {
+ return 1 << (PTR_BITS - __builtin_clzl(size-1)); //FIXME: There's an off-by-one error, I can feel it
+}
+
+void
+initialize_shared_mutex (pthread_mutex_t *mutex)
+{
+ pthread_mutexattr_t mattr;
+ pthread_mutexattr_init (&mattr);
+ pthread_mutexattr_setpshared (&mattr, PTHREAD_PROCESS_SHARED);
+ pthread_mutex_init (mutex, &mattr);
+ pthread_mutexattr_destroy (&mattr);
+}
+
+void
+initialize_shared_condition (pthread_cond_t *cond)
+{
+ pthread_condattr_t cattr;
+ pthread_condattr_init (&cattr);
+ pthread_condattr_setpshared (&cattr, PTHREAD_PROCESS_SHARED);
+ pthread_cond_init (cond, &cattr);
+ pthread_condattr_destroy (&cattr);
+}
+
+int
+get_shmem_fd (void)
+{
+ char buffer[1<<10];
+ int fd, id;
+ id = random ();
+ do
+ {
+ snprintf (buffer, sizeof (buffer),
+ MEMOBJ_NAME "_%u_%d", (unsigned int) getpid (), id++);
+ fd = shm_open (buffer, O_RDWR | O_CREAT | O_EXCL, S_IRUSR | S_IWUSR);
+ }
+ while (fd == -1);
+ shm_unlink (buffer);
+ return fd;
+}
+
+bool
+pack_array_prepare (pack_info * restrict pi, const gfc_array_char * restrict source)
+{
+ index_type dim;
+ bool packed;
+ index_type span;
+ index_type type_size;
+ index_type ssize;
+
+ dim = GFC_DESCRIPTOR_RANK (source);
+ type_size = GFC_DESCRIPTOR_SIZE (source);
+ ssize = type_size;
+
+ pi->num_elem = 1;
+ packed = true;
+ span = source->span != 0 ? source->span : type_size;
+ for (index_type n = 0; n < dim; n++)
+ {
+ pi->stride[n] = GFC_DESCRIPTOR_STRIDE (source,n) * span;
+ pi->extent[n] = GFC_DESCRIPTOR_EXTENT (source,n);
+ if (pi->extent[n] <= 0)
+ {
+ /* Do nothing. */
+ packed = 1;
+ pi->num_elem = 0;
+ break;
+ }
+
+ if (ssize != pi->stride[n])
+ packed = 0;
+
+ pi->num_elem *= pi->extent[n];
+ ssize *= pi->extent[n];
+ }
+
+ return packed;
+}
+
+void
+pack_array_finish (pack_info * const restrict pi, const gfc_array_char * const restrict source,
+ char * restrict dest)
+{
+ index_type dim;
+ const char *restrict src;
+
+ index_type size;
+ index_type stride0;
+ index_type count[GFC_MAX_DIMENSIONS];
+
+ dim = GFC_DESCRIPTOR_RANK (source);
+ src = source->base_addr;
+ stride0 = pi->stride[0];
+ size = GFC_DESCRIPTOR_SIZE (source);
+
+ memset (count, 0, sizeof(count));
+ while (src)
+ {
+ /* Copy the data. */
+ memcpy(dest, src, size);
+ /* Advance to the next element. */
+ dest += size;
+ src += stride0;
+ count[0]++;
+ /* Advance to the next source element. */
+ index_type n = 0;
+ while (count[n] == pi->extent[n])
+ {
+ /* When we get to the end of a dimension, reset it and increment
+ the next dimension. */
+ count[n] = 0;
+ /* We could precalculate these products, but this is a less
+ frequently used path so probably not worth it. */
+ src -= pi->stride[n] * pi->extent[n];
+ n++;
+ if (n == dim)
+ {
+ src = NULL;
+ break;
+ }
+ else
+ {
+ count[n]++;
+ src += pi->stride[n];
+ }
+ }
+ }
+}
+
+void
+unpack_array_finish (pack_info * const restrict pi,
+ const gfc_array_char * restrict d,
+ const char *restrict src)
+{
+ index_type stride0;
+ char * restrict dest;
+ index_type size;
+ index_type count[GFC_MAX_DIMENSIONS];
+ index_type dim;
+
+ size = GFC_DESCRIPTOR_SIZE (d);
+ stride0 = pi->stride[0];
+ dest = d->base_addr;
+ dim = GFC_DESCRIPTOR_RANK (d);
+
+ while (dest)
+ {
+ memcpy (dest, src, size);
+ src += size;
+ dest += stride0;
+ count[0]++;
+ index_type n = 0;
+ while (count[n] == pi->extent[n])
+ {
+ count[n] = 0;
+ dest -= pi->stride[n] * pi->extent[n];
+ n++;
+ if (n == dim)
+ {
+ dest = NULL;
+ break;
+ }
+ else
+ {
+ count[n] ++;
+ dest += pi->stride[n];
+ }
+ }
+ }
+}
diff --git a/libgfortran/nca/util.h b/libgfortran/nca/util.h
new file mode 100644
index 0000000..9abd7ad
--- /dev/null
+++ b/libgfortran/nca/util.h
@@ -0,0 +1,86 @@
+/* Copyright (C) 2020 Free Software Foundation, Inc.
+ Contributed by Nicolas Koenig
+
+This file is part of the GNU Fortran Native Coarray Library (libnca).
+
+Libnca is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+Libnca is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+<http://www.gnu.org/licenses/>. */
+
+#ifndef UTIL_HDR
+#define UTIL_HDR
+
+#include <stdint.h>
+#include <stddef.h>
+#include <pthread.h>
+
+#define PTR_BITS (CHAR_BIT*sizeof(void *))
+
+size_t alignto (size_t, size_t);
+internal_proto (alignto);
+
+size_t round_to_pagesize (size_t);
+internal_proto (round_to_pagesize);
+
+size_t next_power_of_two (size_t);
+internal_proto (next_power_of_two);
+
+int get_shmem_fd (void);
+internal_proto (get_shmem_fd);
+
+void initialize_shared_mutex (pthread_mutex_t *);
+internal_proto (initialize_shared_mutex);
+
+void initialize_shared_condition (pthread_cond_t *);
+internal_proto (initialize_shared_condition);
+
+extern size_t pagesize;
+internal_proto (pagesize);
+
+/* Usage:
+ pack_info pi;
+ packed = pack_array_prepare (&pi, source);
+
+ // Awesome allocation of destptr using pi.num_elem
+ if (packed)
+ memcpy (...);
+ else
+ pack_array_finish (&pi, source, destptr);
+
+ This could also be used in in_pack_generic.c. Additionally, since
+ pack_array_prepare is the same for all type sizes, we would only have to
+ specialize pack_array_finish, saving on code size. */
+
+typedef struct
+{
+ index_type num_elem;
+ index_type extent[GFC_MAX_DIMENSIONS];
+ index_type stride[GFC_MAX_DIMENSIONS]; /* Stride is byte-based. */
+} pack_info;
+
+bool pack_array_prepare (pack_info *restrict, const gfc_array_char * restrict);
+internal_proto (pack_array_prepare);
+
+void pack_array_finish (pack_info * const restrict, const gfc_array_char * const restrict,
+ char * restrict);
+
+internal_proto (pack_array_finish);
+
+void unpack_array_finish (pack_info * const restrict, const gfc_array_char * const,
+ const char * restrict);
+#endif
diff --git a/libgfortran/nca/wrapper.c b/libgfortran/nca/wrapper.c
new file mode 100644
index 0000000..eeb64d3
--- /dev/null
+++ b/libgfortran/nca/wrapper.c
@@ -0,0 +1,258 @@
+/* Copyright (C) 2019-2020 Free Software Foundation, Inc.
+ Contributed by Nicolas Koenig
+
+This file is part of the GNU Fortran Native Coarray Library (libnca).
+
+Libnca is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+Libnca is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+<http://www.gnu.org/licenses/>. */
+
+#include <string.h>
+#include "libgfortran.h"
+#include "libcoarraynative.h"
+#include "sync.h"
+#include "lock.h"
+#include "util.h"
+#include "collective_subroutine.h"
+
+static inline int
+div_ru (int divident, int divisor)
+{
+ return (divident + divisor - 1)/divisor;
+}
+
+enum gfc_coarray_allocation_type {
+ GFC_NCA_NORMAL_COARRAY = 3,
+ GFC_NCA_LOCK_COARRAY,
+ GFC_NCA_EVENT_COARRAY,
+};
+
+void nca_coarray_alloc (gfc_array_void *, int, int, int);
+export_proto (nca_coarray_alloc);
+
+void
+nca_coarray_free (gfc_array_void *, int);
+export_proto (nca_coarray_free);
+
+int nca_coarray_this_image (int);
+export_proto (nca_coarray_this_image);
+
+int nca_coarray_num_images (int);
+export_proto (nca_coarray_num_images);
+
+void nca_coarray_sync_all (int *);
+export_proto (nca_coarray_sync_all);
+
+void nca_sync_images (size_t, int *, int*, char *, size_t);
+export_proto (nca_sync_images);
+
+void nca_lock (void *);
+export_proto (nca_lock);
+
+void nca_unlock (void *);
+export_proto (nca_unlock);
+
+void nca_collsub_reduce_array (gfc_array_char *, void (*) (void *, void *),
+ int *);
+export_proto (nca_collsub_reduce_array);
+
+void nca_collsub_reduce_scalar (void *, index_type, void (*) (void *, void *),
+ int *);
+export_proto (nca_collsub_reduce_scalar);
+
+void nca_collsub_broadcast_array (gfc_array_char * restrict, int/*, int *, char *,
+ size_t*/);
+export_proto (nca_collsub_broadcast_array);
+
+void nca_collsub_broadcast_scalar (void * restrict, size_t, int/*, int *, char *,
+ size_t*/);
+export_proto(nca_collsub_broadcast_scalar);
+
+void
+nca_coarray_alloc (gfc_array_void *desc, int elem_size, int corank,
+ int alloc_type)
+{
+ int i, last_rank_index;
+ int num_coarray_elems, num_elems; /* Excludes the last dimension, because it
+ will have to be determined later. */
+ int extent_last_codimen;
+ size_t last_lbound;
+ size_t size_in_bytes;
+
+ ensure_initialization(); /* This function might be the first one to be
+ called, if it is called in a constructor. */
+
+ if (alloc_type == GFC_NCA_LOCK_COARRAY)
+ elem_size = sizeof (pthread_mutex_t);
+ else if (alloc_type == GFC_NCA_EVENT_COARRAY)
+ elem_size = sizeof(char); /* replace with proper type. */
+
+ last_rank_index = GFC_DESCRIPTOR_RANK(desc) + corank -1;
+
+ num_elems = 1;
+ num_coarray_elems = 1;
+ for (i = 0; i < GFC_DESCRIPTOR_RANK(desc); i++)
+ num_elems *= GFC_DESCRIPTOR_EXTENT(desc, i);
+ for (i = GFC_DESCRIPTOR_RANK(desc); i < last_rank_index; i++)
+ {
+ num_elems *= GFC_DESCRIPTOR_EXTENT(desc, i);
+ num_coarray_elems *= GFC_DESCRIPTOR_EXTENT(desc, i);
+ }
+
+ extent_last_codimen = div_ru (local->num_images, num_coarray_elems);
+
+ last_lbound = GFC_DIMENSION_LBOUND(desc->dim[last_rank_index]);
+ GFC_DIMENSION_SET(desc->dim[last_rank_index], last_lbound,
+ last_lbound + extent_last_codimen - 1,
+ num_elems);
+
+ size_in_bytes = elem_size * num_elems * extent_last_codimen;
+ if (alloc_type == GFC_NCA_LOCK_COARRAY)
+ {
+ lock_array *addr;
+ int expected = 0;
+ /* Allocate enough space for the metadata infront of the lock
+ array. */
+ addr = get_memory_by_id_zero (&local->ai, size_in_bytes
+ + sizeof (lock_array),
+ (intptr_t) desc);
+
+ /* Use of a traditional spin lock to avoid race conditions with
+ the initization of the mutex. We could alternatively put a
+ global lock around allocate, but that would probably be
+ slower. */
+ while (!__atomic_compare_exchange_n (&addr->owner, &expected,
+ this_image.image_num + 1,
+ false, __ATOMIC_SEQ_CST,
+ __ATOMIC_SEQ_CST));
+ if (!addr->initialized++)
+ {
+ for (i = 0; i < local->num_images; i++)
+ initialize_shared_mutex (&addr->arr[i]);
+ }
+ __atomic_store_n (&addr->owner, 0, __ATOMIC_SEQ_CST);
+ desc->base_addr = &addr->arr;
+ }
+ else if (alloc_type == GFC_NCA_EVENT_COARRAY)
+ (void) 0; // TODO
+ else
+ desc->base_addr = get_memory_by_id (&local->ai, size_in_bytes,
+ (intptr_t) desc);
+ dprintf(2, "Base address of desc for image %d: %p\n", this_image.image_num + 1, desc->base_addr);
+}
+
+void
+nca_coarray_free (gfc_array_void *desc, int alloc_type)
+{
+ int i;
+ if (alloc_type == GFC_NCA_LOCK_COARRAY)
+ {
+ lock_array *la;
+ int expected = 0;
+ la = desc->base_addr - offsetof (lock_array, arr);
+ while (!__atomic_compare_exchange_n (&la->owner, &expected,
+ this_image.image_num+1,
+ false, __ATOMIC_SEQ_CST,
+ __ATOMIC_SEQ_CST));
+ if (!--la->initialized)
+ {
+ /* Coarray locks can be removed and just normal
+ pthread_mutex can be used. */
+ for (i = 0; i < local->num_images; i++)
+ pthread_mutex_destroy (&la->arr[i]);
+ }
+ __atomic_store_n (&la->owner, 0, __ATOMIC_SEQ_CST);
+ }
+ else if (alloc_type == GFC_NCA_EVENT_COARRAY)
+ (void) 0; //TODO
+
+ free_memory_with_id (&local->ai, (intptr_t) desc);
+ desc->base_addr = NULL;
+}
+
+int
+nca_coarray_this_image (int distance __attribute__((unused)))
+{
+ return this_image.image_num + 1;
+}
+
+int
+nca_coarray_num_images (int distance __attribute__((unused)))
+{
+ return local->num_images;
+}
+
+void
+nca_coarray_sync_all (int *stat __attribute__((unused)))
+{
+ sync_all (&local->si);
+}
+
+void
+nca_sync_images (size_t s, int *images,
+ int *stat __attribute__((unused)),
+ char *error __attribute__((unused)),
+ size_t err_size __attribute__((unused)))
+{
+ sync_table (&local->si, images, s);
+}
+
+void
+nca_lock (void *lock)
+{
+ pthread_mutex_lock (lock);
+}
+
+void
+nca_unlock (void *lock)
+{
+ pthread_mutex_unlock (lock);
+}
+
+void
+nca_collsub_reduce_array (gfc_array_char *desc, void (*assign_function) (void *, void *),
+ int *result_image)
+{
+ collsub_reduce_array (&local->ci, desc, result_image, assign_function);
+}
+
+void
+nca_collsub_reduce_scalar (void *obj, index_type elem_size,
+ void (*assign_function) (void *, void *),
+ int *result_image)
+{
+ collsub_reduce_scalar (&local->ci, obj, elem_size, result_image, assign_function);
+}
+
+void
+nca_collsub_broadcast_array (gfc_array_char * restrict a, int source_image
+ /* , int *stat __attribute__ ((unused)),
+ char *errmsg __attribute__ ((unused)),
+ size_t errmsg_len __attribute__ ((unused))*/)
+{
+ collsub_broadcast_array (&local->ci, a, source_image - 1);
+}
+
+void
+nca_collsub_broadcast_scalar (void * restrict obj, size_t size, int source_image/*,
+ int *stat __attribute__((unused)),
+ char *errmsg __attribute__ ((unused)),
+ size_t errmsg_len __attribute__ ((unused))*/)
+{
+ collsub_broadcast_scalar (&local->ci, obj, size, source_image - 1);
+}