diff options
57 files changed, 12113 insertions, 183 deletions
diff --git a/gcc/flag-types.h b/gcc/flag-types.h index 852ea76..51e698d 100644 --- a/gcc/flag-types.h +++ b/gcc/flag-types.h @@ -346,7 +346,8 @@ enum gfc_fcoarray { GFC_FCOARRAY_NONE = 0, GFC_FCOARRAY_SINGLE, - GFC_FCOARRAY_LIB + GFC_FCOARRAY_LIB, + GFC_FCOARRAY_NATIVE }; diff --git a/gcc/fortran/dump-parse-tree.c b/gcc/fortran/dump-parse-tree.c index 6e265f4..acff75a 100644 --- a/gcc/fortran/dump-parse-tree.c +++ b/gcc/fortran/dump-parse-tree.c @@ -1060,7 +1060,7 @@ show_symbol (gfc_symbol *sym) if (sym == NULL) return; - fprintf (dumpfile, "|| symbol: '%s' ", sym->name); + fprintf (dumpfile, "|| symbol: '%s' %p ", sym->name, (void *) &(sym->backend_decl)); len = strlen (sym->name); for (i=len; i<12; i++) fputc(' ', dumpfile); diff --git a/gcc/fortran/frontend-passes.c b/gcc/fortran/frontend-passes.c index 83f6fd8..c573731 100644 --- a/gcc/fortran/frontend-passes.c +++ b/gcc/fortran/frontend-passes.c @@ -57,6 +57,7 @@ static int call_external_blas (gfc_code **, int *, void *); static int matmul_temp_args (gfc_code **, int *,void *data); static int index_interchange (gfc_code **, int*, void *); static bool is_fe_temp (gfc_expr *e); +static void rewrite_co_reduce (gfc_namespace *); #ifdef CHECKING_P static void check_locus (gfc_namespace *); @@ -179,6 +180,9 @@ gfc_run_passes (gfc_namespace *ns) if (flag_realloc_lhs) realloc_strings (ns); + + if (flag_coarray == GFC_FCOARRAY_NATIVE) + rewrite_co_reduce (ns); } #ifdef CHECKING_P @@ -5895,3 +5899,121 @@ gfc_fix_implicit_pure (gfc_namespace *ns) return changed; } + +/* Callback function. Create a wrapper around VALUE functions. */ + +static int +co_reduce_code (gfc_code **c, int *walk_subtrees ATTRIBUTE_UNUSED, void *data) +{ + gfc_code *co = *c; + gfc_expr *oper; + gfc_symbol *op_sym; + gfc_symbol *arg1, *arg2; + gfc_namespace *parent_ns; + gfc_namespace *proc_ns; + gfc_symbol *proc_sym; + gfc_symtree *f1t, *f2t; + gfc_symbol *f1, *f2; + gfc_code *assign; + gfc_expr *e1, *e2; + char name[GFC_MAX_SYMBOL_LEN + 1]; + static int num; + + if (co->op != EXEC_CALL || co->resolved_isym == NULL + || co->resolved_isym->id != GFC_ISYM_CO_REDUCE) + return 0; + + oper = co->ext.actual->next->expr; + op_sym = oper->symtree->n.sym; + arg1 = op_sym->formal->sym; + arg2 = op_sym->formal->next->sym; + + parent_ns = (gfc_namespace *) data; + + /* Generate the wrapper around the function. */ + proc_ns = gfc_get_namespace (parent_ns, 0); + snprintf (name, GFC_MAX_SYMBOL_LEN, "__coreduce_%d_%s", num++, op_sym->name); + gfc_get_symbol (name, proc_ns, &proc_sym); + proc_sym->attr.flavor = FL_PROCEDURE; + proc_sym->attr.subroutine = 1; + proc_sym->attr.referenced = 1; + proc_sym->attr.access = ACCESS_PRIVATE; + gfc_commit_symbol (proc_sym); + proc_ns->proc_name = proc_sym; + + /* Make up the formal arguments. */ + gfc_get_sym_tree (arg1->name, proc_ns, &f1t, false); + f1 = f1t->n.sym; + f1->ts = arg1->ts; + f1->attr.flavor = FL_VARIABLE; + f1->attr.dummy = 1; + f1->attr.intent = INTENT_INOUT; + f1->attr.fe_temp = 1; + f1->declared_at = arg1->declared_at; + f1->attr.referenced = 1; + proc_sym->formal = gfc_get_formal_arglist (); + proc_sym->formal->sym = f1; + gfc_commit_symbol (f1); + + gfc_get_sym_tree (arg2->name, proc_ns, &f2t, false); + f2 = f2t->n.sym; + f2->ts = arg2->ts; + f2->attr.flavor = FL_VARIABLE; + f2->attr.dummy = 1; + f2->attr.intent = INTENT_IN; + f2->attr.fe_temp = 1; + f2->declared_at = arg2->declared_at; + f2->attr.referenced = 1; + proc_sym->formal->next = gfc_get_formal_arglist (); + proc_sym->formal->next->sym = f2; + gfc_commit_symbol (f2); + + /* Generate the assignment statement. */ + assign = gfc_get_code (EXEC_ASSIGN); + + e1 = gfc_lval_expr_from_sym (f1); + e2 = gfc_get_expr (); + e2->where = proc_sym->declared_at; + e2->expr_type = EXPR_FUNCTION; + e2->symtree = f2t; + e2->ts = arg1->ts; + e2->value.function.esym = op_sym; + e2->value.function.actual = gfc_get_actual_arglist (); + e2->value.function.actual->expr = gfc_lval_expr_from_sym (f1); + e2->value.function.actual->next = gfc_get_actual_arglist (); + e2->value.function.actual->next->expr = gfc_lval_expr_from_sym (f2); + assign->expr1 = e1; + assign->expr2 = e2; + assign->loc = proc_sym->declared_at; + + proc_ns->code = assign; + + /* And hang it into the sibling list. */ + proc_ns->sibling = parent_ns->contained; + parent_ns->contained = proc_ns; + + /* ... and finally replace the call in the statement. */ + + oper->symtree->n.sym = proc_sym; + proc_sym->refs ++; + return 0; +} + +/* Rewrite functions for co_reduce for a consistent calling + signature. This is only necessary if any of the functions + has a VALUE argument. */ + +static void +rewrite_co_reduce (gfc_namespace *global_ns) +{ + gfc_namespace *ns; + + gfc_code_walker (&global_ns->code, co_reduce_code, dummy_expr_callback, + (void *) global_ns); + + for (ns = global_ns->contained; ns; ns = ns->sibling) + gfc_code_walker (&ns->code, co_reduce_code, dummy_expr_callback, + (void *) global_ns); + + return; +} diff --git a/gcc/fortran/gfortran.h b/gcc/fortran/gfortran.h index d0cea83..6940c24 100644 --- a/gcc/fortran/gfortran.h +++ b/gcc/fortran/gfortran.h @@ -2010,6 +2010,7 @@ typedef struct gfc_array_ref int dimen; /* # of components in the reference */ int codimen; bool in_allocate; /* For coarray checks. */ + bool native_coarray_argument; gfc_expr *team; gfc_expr *stat; locus where; diff --git a/gcc/fortran/intrinsic.c b/gcc/fortran/intrinsic.c index ef33587..6a5b3e9 100644 --- a/gcc/fortran/intrinsic.c +++ b/gcc/fortran/intrinsic.c @@ -3734,7 +3734,7 @@ add_subroutines (void) /* Coarray collectives. */ add_sym_4s ("co_broadcast", GFC_ISYM_CO_BROADCAST, CLASS_IMPURE, BT_UNKNOWN, 0, GFC_STD_F2018, - gfc_check_co_broadcast, NULL, NULL, + gfc_check_co_broadcast, NULL, gfc_resolve_co_broadcast, a, BT_REAL, dr, REQUIRED, INTENT_INOUT, "source_image", BT_INTEGER, di, REQUIRED, INTENT_IN, stat, BT_INTEGER, di, OPTIONAL, INTENT_OUT, @@ -3742,7 +3742,7 @@ add_subroutines (void) add_sym_4s ("co_max", GFC_ISYM_CO_MAX, CLASS_IMPURE, BT_UNKNOWN, 0, GFC_STD_F2018, - gfc_check_co_minmax, NULL, NULL, + gfc_check_co_minmax, NULL, gfc_resolve_co_max, a, BT_REAL, dr, REQUIRED, INTENT_INOUT, result_image, BT_INTEGER, di, OPTIONAL, INTENT_IN, stat, BT_INTEGER, di, OPTIONAL, INTENT_OUT, @@ -3750,7 +3750,7 @@ add_subroutines (void) add_sym_4s ("co_min", GFC_ISYM_CO_MIN, CLASS_IMPURE, BT_UNKNOWN, 0, GFC_STD_F2018, - gfc_check_co_minmax, NULL, NULL, + gfc_check_co_minmax, NULL, gfc_resolve_co_min, a, BT_REAL, dr, REQUIRED, INTENT_INOUT, result_image, BT_INTEGER, di, OPTIONAL, INTENT_IN, stat, BT_INTEGER, di, OPTIONAL, INTENT_OUT, @@ -3758,7 +3758,7 @@ add_subroutines (void) add_sym_4s ("co_sum", GFC_ISYM_CO_SUM, CLASS_IMPURE, BT_UNKNOWN, 0, GFC_STD_F2018, - gfc_check_co_sum, NULL, NULL, + gfc_check_co_sum, NULL, gfc_resolve_co_sum, a, BT_REAL, dr, REQUIRED, INTENT_INOUT, result_image, BT_INTEGER, di, OPTIONAL, INTENT_IN, stat, BT_INTEGER, di, OPTIONAL, INTENT_OUT, @@ -3766,7 +3766,7 @@ add_subroutines (void) add_sym_5s ("co_reduce", GFC_ISYM_CO_REDUCE, CLASS_IMPURE, BT_UNKNOWN, 0, GFC_STD_F2018, - gfc_check_co_reduce, NULL, NULL, + gfc_check_co_reduce, NULL, gfc_resolve_co_reduce, a, BT_REAL, dr, REQUIRED, INTENT_INOUT, "operator", BT_INTEGER, di, REQUIRED, INTENT_IN, result_image, BT_INTEGER, di, OPTIONAL, INTENT_IN, diff --git a/gcc/fortran/intrinsic.h b/gcc/fortran/intrinsic.h index 166ae79..2ca566c 100644 --- a/gcc/fortran/intrinsic.h +++ b/gcc/fortran/intrinsic.h @@ -677,7 +677,11 @@ void gfc_resolve_system_sub (gfc_code *); void gfc_resolve_ttynam_sub (gfc_code *); void gfc_resolve_umask_sub (gfc_code *); void gfc_resolve_unlink_sub (gfc_code *); - +void gfc_resolve_co_sum (gfc_code *); +void gfc_resolve_co_min (gfc_code *); +void gfc_resolve_co_max (gfc_code *); +void gfc_resolve_co_reduce (gfc_code *); +void gfc_resolve_co_broadcast (gfc_code *); /* The findloc() subroutine requires the most arguments: six. */ diff --git a/gcc/fortran/iresolve.c b/gcc/fortran/iresolve.c index 7376961..844891e 100644 --- a/gcc/fortran/iresolve.c +++ b/gcc/fortran/iresolve.c @@ -36,6 +36,7 @@ along with GCC; see the file COPYING3. If not see #include "constructor.h" #include "arith.h" #include "trans.h" +#include "options.h" /* Given printf-like arguments, return a stable version of the result string. @@ -4030,3 +4031,100 @@ gfc_resolve_unlink_sub (gfc_code *c) name = gfc_get_string (PREFIX ("unlink_i%d_sub"), kind); c->resolved_sym = gfc_get_intrinsic_sub_symbol (name); } + +/* Resolve the CO_SUM et al. intrinsic subroutines. */ + +static void +gfc_resolve_co_collective (gfc_code *c, const char *oper) +{ + int kind; + gfc_expr *e; + const char *name; + + if (flag_coarray != GFC_FCOARRAY_NATIVE) + name = gfc_get_string (PREFIX ("caf_co_sum")); + else + { + e = c->ext.actual->expr; + kind = e->ts.kind; + + name = gfc_get_string (PREFIX ("nca_collsub_%s_%s_%c%d"), oper, + e->rank ? "array" : "scalar", + gfc_type_letter (e->ts.type), kind); + } + + c->resolved_sym = gfc_get_intrinsic_sub_symbol (name); +} + +/* Resolve CO_SUM. */ + +void +gfc_resolve_co_sum (gfc_code *c) +{ + gfc_resolve_co_collective (c, "sum"); +} + +/* Resolve CO_MIN. */ + +void +gfc_resolve_co_min (gfc_code *c) +{ + gfc_resolve_co_collective (c, "min"); +} + +/* Resolve CO_MAX. */ + +void +gfc_resolve_co_max (gfc_code *c) +{ + gfc_resolve_co_collective (c, "max"); +} + +/* Resolve CO_REDUCE. */ + +void +gfc_resolve_co_reduce (gfc_code *c) +{ + gfc_expr *e; + const char *name; + + if (flag_coarray != GFC_FCOARRAY_NATIVE) + name = gfc_get_string (PREFIX ("caf_co_reduce")); + + else + { + e = c->ext.actual->expr; + if (e->ts.type == BT_CHARACTER) + name = gfc_get_string (PREFIX ("nca_collsub_reduce_%s%c%d"), + e->rank ? "array" : "scalar", + gfc_type_letter (e->ts.type), e->ts.kind); + else + name = gfc_get_string (PREFIX ("nca_collsub_reduce_%s"), + e->rank ? "array" : "scalar" ); + } + + c->resolved_sym = gfc_get_intrinsic_sub_symbol (name); +} + +void +gfc_resolve_co_broadcast (gfc_code * c) +{ + gfc_expr *e; + const char *name; + + if (flag_coarray != GFC_FCOARRAY_NATIVE) + name = gfc_get_string (PREFIX ("caf_co_broadcast")); + else + { + e = c->ext.actual->expr; + if (e->ts.type == BT_CHARACTER) + name = gfc_get_string (PREFIX ("nca_collsub_broadcast_%s%c%d"), + e->rank ? "array" : "scalar", + gfc_type_letter (e->ts.type), e->ts.kind); + else + name = gfc_get_string (PREFIX ("nca_collsub_broadcast_%s"), + e->rank ? "array" : "scalar" ); + } + + c->resolved_sym = gfc_get_intrinsic_sub_symbol (name); +} diff --git a/gcc/fortran/lang.opt b/gcc/fortran/lang.opt index da4b1aa..e267fb0 100644 --- a/gcc/fortran/lang.opt +++ b/gcc/fortran/lang.opt @@ -761,7 +761,7 @@ Copy array sections into a contiguous block on procedure entry. fcoarray= Fortran RejectNegative Joined Enum(gfc_fcoarray) Var(flag_coarray) Init(GFC_FCOARRAY_NONE) --fcoarray=<none|single|lib> Specify which coarray parallelization should be used. +-fcoarray=<none|single|lib|native> Specify which coarray parallelization should be used. Enum Name(gfc_fcoarray) Type(enum gfc_fcoarray) UnknownError(Unrecognized option: %qs) @@ -775,6 +775,9 @@ Enum(gfc_fcoarray) String(single) Value(GFC_FCOARRAY_SINGLE) EnumValue Enum(gfc_fcoarray) String(lib) Value(GFC_FCOARRAY_LIB) +EnumValue +Enum(gfc_fcoarray) String(native) Value(GFC_FCOARRAY_NATIVE) + fcheck= Fortran RejectNegative JoinedOrMissing -fcheck=[...] Specify which runtime checks are to be performed. diff --git a/gcc/fortran/resolve.c b/gcc/fortran/resolve.c index f4ce49f..6d6984b 100644 --- a/gcc/fortran/resolve.c +++ b/gcc/fortran/resolve.c @@ -3585,6 +3585,53 @@ resolve_specific_s (gfc_code *c) return false; } +/* Fix up references to native coarrays in call - element references + have to be converted to full references if the coarray has to be + passed fully. */ + +static void +fixup_coarray_args (gfc_symbol *sym, gfc_actual_arglist *actual) +{ + gfc_formal_arglist *formal, *f; + gfc_actual_arglist *a; + + formal = gfc_sym_get_dummy_args (sym); + + if (formal == NULL) + return; + + for (a = actual, f = formal; a && f; a = a->next, f = f->next) + { + if (a->expr == NULL || f->sym == NULL) + continue; + if (a->expr->expr_type == EXPR_VARIABLE + && a->expr->symtree->n.sym->attr.codimension + && f->sym->attr.codimension) + { + gfc_ref *r; + for (r = a->expr->ref; r; r = r->next) + { + if (r->type == REF_ARRAY && r->u.ar.codimen) + { + gfc_array_ref *ar = &r->u.ar; + int i, eff_dimen = ar->dimen + ar->codimen; + + for (i = ar->dimen; i < eff_dimen; i++) + { + ar->dimen_type[i] = DIMEN_RANGE; + gcc_assert (ar->start[i] == NULL); + gcc_assert (ar->end[i] == NULL); + } + + if (ar->type == AR_ELEMENT) + ar->type = !ar->dimen ? AR_FULL : AR_SECTION; + + ar->native_coarray_argument = true; + } + } + } + } +} /* Resolve a subroutine call not known to be generic nor specific. */ @@ -3615,7 +3662,7 @@ resolve_unknown_s (gfc_code *c) found: gfc_procedure_use (sym, &c->ext.actual, &c->loc); - + c->resolved_sym = sym; return pure_subroutine (sym, sym->name, &c->loc); @@ -3740,6 +3787,9 @@ resolve_call (gfc_code *c) /* Typebound procedure: Assume the worst. */ gfc_current_ns->proc_name->attr.array_outer_dependency = 1; + if (flag_coarray == GFC_FCOARRAY_NATIVE) + fixup_coarray_args (csym, c->ext.actual); + return t; } @@ -10117,7 +10167,7 @@ resolve_critical (gfc_code *code) char name[GFC_MAX_SYMBOL_LEN]; static int serial = 0; - if (flag_coarray != GFC_FCOARRAY_LIB) + if (flag_coarray != GFC_FCOARRAY_LIB && flag_coarray != GFC_FCOARRAY_NATIVE) return; symtree = gfc_find_symtree (gfc_current_ns->sym_root, @@ -10154,6 +10204,19 @@ resolve_critical (gfc_code *code) symtree->n.sym->as->lower[0] = gfc_get_int_expr (gfc_default_integer_kind, NULL, 1); gfc_commit_symbols(); + + if (flag_coarray == GFC_FCOARRAY_NATIVE) + { + gfc_ref *r = gfc_get_ref (); + r->type = REF_ARRAY; + r->u.ar.type = AR_ELEMENT; + r->u.ar.as = code->resolved_sym->as; + for (int i = 0; i < code->resolved_sym->as->corank; i++) + r->u.ar.dimen_type [i] = DIMEN_THIS_IMAGE; + + code->expr1 = gfc_lval_expr_from_sym (code->resolved_sym); + code->expr1->ref = r; + } } diff --git a/gcc/fortran/trans-array.c b/gcc/fortran/trans-array.c index 6566c47..9013f19 100644 --- a/gcc/fortran/trans-array.c +++ b/gcc/fortran/trans-array.c @@ -2940,6 +2940,60 @@ gfc_add_loop_ss_code (gfc_loopinfo * loop, gfc_ss * ss, bool subscript, gfc_add_loop_ss_code (nested_loop, nested_loop->ss, subscript, where); } +static tree +gfc_add_strides (tree expr, tree desc, int beg, int end) +{ + int i; + tree tmp, stride; + tmp = gfc_index_zero_node; + for (i = beg; i < end; i++) + { + stride = gfc_conv_array_stride (desc, i); + tmp = fold_build2_loc (input_location, PLUS_EXPR, TREE_TYPE(tmp), + tmp, stride); + } + return fold_build2_loc (input_location, PLUS_EXPR, TREE_TYPE(expr), + expr, tmp); +} + +/* This function calculates the new offset via + new_offset = offset + this_image () + * arrray.stride[first_codimension] + + sum (remaining codimension offsets) + If offset is a pointer, we also need to multiply it by the size.*/ +static tree +gfc_native_coarray_add_this_image_offset (tree offset, tree desc, + gfc_array_ref *ar, int is_pointer, + int subtract) +{ + tree tmp, off; + /* Calculate the actual offset. */ + tmp = build_call_expr_loc (input_location, gfor_fndecl_nca_this_image, + 1, integer_zero_node); + tmp = convert (TREE_TYPE(gfc_index_zero_node), tmp); + tmp = fold_build2_loc (input_location, MINUS_EXPR, TREE_TYPE(tmp), tmp, + build_int_cst (TREE_TYPE(tmp), subtract)); + tmp = fold_build2_loc (input_location, MULT_EXPR, TREE_TYPE(tmp), + gfc_conv_array_stride (desc, ar->dimen), tmp); + /* We also need to add the missing strides once to compensate for the + offset, that is to large now. The loop starts at sym->as.rank+1 + because we need to skip the first corank stride */ + off = gfc_add_strides (tmp, desc, ar->as->rank + 1, + ar->as->rank + ar->as->corank); + if (is_pointer) + { + /* Remove pointer and array from type in order to get the raw base type. */ + tmp = TREE_TYPE(TREE_TYPE(TREE_TYPE(offset))); + /* And get the size of that base type. */ + tmp = convert (TREE_TYPE(off), size_in_bytes_loc (input_location, tmp)); + tmp = fold_build2_loc (input_location, MULT_EXPR, TREE_TYPE(off), + off, tmp); + return fold_build_pointer_plus_loc (input_location, offset, tmp); + } + else + return fold_build2_loc (input_location, PLUS_EXPR, TREE_TYPE(offset), + offset, off); +} /* Translate expressions for the descriptor and data pointer of a SS. */ /*GCC ARRAYS*/ @@ -2951,6 +3005,7 @@ gfc_conv_ss_descriptor (stmtblock_t * block, gfc_ss * ss, int base) gfc_ss_info *ss_info; gfc_array_info *info; tree tmp; + gfc_ref *ref; ss_info = ss->info; info = &ss_info->data.array; @@ -2982,10 +3037,18 @@ gfc_conv_ss_descriptor (stmtblock_t * block, gfc_ss * ss, int base) } /* Also the data pointer. */ tmp = gfc_conv_array_data (se.expr); + /* If we have a native coarray with implied this_image (), add the + appropriate offset to the data pointer. */ + ref = ss_info->expr->ref; + if (flag_coarray == GFC_FCOARRAY_NATIVE && ref + && ref->u.ar.dimen_type[ref->u.ar.dimen + ref->u.ar.codimen - 1] + == DIMEN_THIS_IMAGE) + tmp = gfc_native_coarray_add_this_image_offset (tmp, se.expr, &ref->u.ar, 1, 1); /* If this is a variable or address of a variable we use it directly. Otherwise we must evaluate it now to avoid breaking dependency analysis by pulling the expressions for elemental array indices inside the loop. */ + if (!(DECL_P (tmp) || (TREE_CODE (tmp) == ADDR_EXPR && DECL_P (TREE_OPERAND (tmp, 0))))) @@ -2993,6 +3056,15 @@ gfc_conv_ss_descriptor (stmtblock_t * block, gfc_ss * ss, int base) info->data = tmp; tmp = gfc_conv_array_offset (se.expr); + /* If we have a native coarray, adjust the offset to remove the + offset for the codimensions. */ + // TODO: check whether the recipient is a coarray, if it is, disable + // all of this + if (flag_coarray == GFC_FCOARRAY_NATIVE && ref + && ref->u.ar.dimen_type[ref->u.ar.dimen + ref->u.ar.codimen - 1] + == DIMEN_THIS_IMAGE) + tmp = gfc_add_strides (tmp, se.expr, ref->u.ar.as->rank, + ref->u.ar.as->rank + ref->u.ar.as->corank); info->offset = gfc_evaluate_now (tmp, block); /* Make absolutely sure that the saved_offset is indeed saved @@ -3593,6 +3665,7 @@ build_array_ref (tree desc, tree offset, tree decl, tree vptr) } + /* Build an array reference. se->expr already holds the array descriptor. This should be either a variable, indirect variable reference or component reference. For arrays which do not have a descriptor, se->expr will be @@ -3612,8 +3685,20 @@ gfc_conv_array_ref (gfc_se * se, gfc_array_ref * ar, gfc_expr *expr, gfc_se tmpse; gfc_symbol * sym = expr->symtree->n.sym; char *var_name = NULL; + bool need_impl_this_image; + int eff_dimen; + + need_impl_this_image = + ar->dimen_type[ar->dimen + ar->codimen - 1] == DIMEN_THIS_IMAGE; + + if (flag_coarray == GFC_FCOARRAY_NATIVE + && !need_impl_this_image) + eff_dimen = ar->dimen + ar->codimen - 1; + else + eff_dimen = ar->dimen - 1; - if (ar->dimen == 0) + + if (flag_coarray != GFC_FCOARRAY_NATIVE && ar->dimen == 0) { gcc_assert (ar->codimen || sym->attr.select_rank_temporary || (ar->as && ar->as->corank)); @@ -3681,7 +3766,7 @@ gfc_conv_array_ref (gfc_se * se, gfc_array_ref * ar, gfc_expr *expr, /* Calculate the offsets from all the dimensions. Make sure to associate the final offset so that we form a chain of loop invariant summands. */ - for (n = ar->dimen - 1; n >= 0; n--) + for (n = eff_dimen; n >= 0; n--) { /* Calculate the index for this dimension. */ gfc_init_se (&indexse, se); @@ -3753,6 +3838,9 @@ gfc_conv_array_ref (gfc_se * se, gfc_array_ref * ar, gfc_expr *expr, add_to_offset (&cst_offset, &offset, tmp); } + if (flag_coarray == GFC_FCOARRAY_NATIVE && need_impl_this_image) + offset = gfc_native_coarray_add_this_image_offset (offset, se->expr, ar, 0, 0); + if (!integer_zerop (cst_offset)) offset = fold_build2_loc (input_location, PLUS_EXPR, gfc_array_index_type, offset, cst_offset); @@ -5423,7 +5511,7 @@ gfc_conv_descriptor_cosize (tree desc, int rank, int corank) } */ /*GCC ARRAYS*/ -static tree +tree gfc_array_init_size (tree descriptor, int rank, int corank, tree * poffset, gfc_expr ** lower, gfc_expr ** upper, stmtblock_t * pblock, stmtblock_t * descriptor_block, tree * overflow, @@ -5441,6 +5529,8 @@ gfc_array_init_size (tree descriptor, int rank, int corank, tree * poffset, tree elsecase; tree cond; tree var; + tree conv_lbound; + tree conv_ubound; stmtblock_t thenblock; stmtblock_t elseblock; gfc_expr *ubound; @@ -5454,7 +5544,7 @@ gfc_array_init_size (tree descriptor, int rank, int corank, tree * poffset, /* Set the dtype before the alloc, because registration of coarrays needs it initialized. */ - if (expr->ts.type == BT_CHARACTER + if (expr && expr->ts.type == BT_CHARACTER && expr->ts.deferred && VAR_P (expr->ts.u.cl->backend_decl)) { @@ -5462,7 +5552,7 @@ gfc_array_init_size (tree descriptor, int rank, int corank, tree * poffset, tmp = gfc_conv_descriptor_dtype (descriptor); gfc_add_modify (pblock, tmp, gfc_get_dtype_rank_type (rank, type)); } - else if (expr->ts.type == BT_CHARACTER + else if (expr && expr->ts.type == BT_CHARACTER && expr->ts.deferred && TREE_CODE (descriptor) == COMPONENT_REF) { @@ -5494,9 +5584,6 @@ gfc_array_init_size (tree descriptor, int rank, int corank, tree * poffset, for (n = 0; n < rank; n++) { - tree conv_lbound; - tree conv_ubound; - /* We have 3 possibilities for determining the size of the array: lower == NULL => lbound = 1, ubound = upper[n] upper[n] = NULL => lbound = 1, ubound = lower[n] @@ -5646,6 +5733,15 @@ gfc_array_init_size (tree descriptor, int rank, int corank, tree * poffset, } gfc_conv_descriptor_lbound_set (descriptor_block, descriptor, gfc_rank_cst[n], se.expr); + conv_lbound = se.expr; + if (flag_coarray == GFC_FCOARRAY_NATIVE) + { + + tmp = fold_build2_loc (input_location, MULT_EXPR, gfc_array_index_type, + se.expr, stride); + offset = fold_build2_loc (input_location, MINUS_EXPR, + gfc_array_index_type, offset, tmp); + } if (n < rank + corank - 1) { @@ -5655,6 +5751,18 @@ gfc_array_init_size (tree descriptor, int rank, int corank, tree * poffset, gfc_add_block_to_block (pblock, &se.pre); gfc_conv_descriptor_ubound_set (descriptor_block, descriptor, gfc_rank_cst[n], se.expr); + gfc_conv_descriptor_stride_set (descriptor_block, descriptor, + gfc_rank_cst[n], stride); + conv_ubound = se.expr; + if (flag_coarray == GFC_FCOARRAY_NATIVE) + { + size = gfc_conv_array_extent_dim (conv_lbound, conv_ubound, + &or_expr); + size = gfc_evaluate_now (size, descriptor_block); + stride = fold_build2_loc (input_location, MULT_EXPR, + gfc_array_index_type, stride, size); + stride = gfc_evaluate_now (stride, descriptor_block); + } } } @@ -5688,7 +5796,7 @@ gfc_array_init_size (tree descriptor, int rank, int corank, tree * poffset, /* Convert to size_t. */ *element_size = fold_convert (size_type_node, tmp); - if (rank == 0) + if (rank == 0 && !(flag_coarray == GFC_FCOARRAY_NATIVE && corank)) return *element_size; *nelems = gfc_evaluate_now (stride, pblock); @@ -5773,6 +5881,38 @@ retrieve_last_ref (gfc_ref **ref_in, gfc_ref **prev_ref_in) return true; } +int +gfc_native_coarray_get_allocation_type (gfc_symbol * sym) +{ + bool is_lock_type, is_event_type; + is_lock_type = sym->ts.type == BT_DERIVED + && sym->ts.u.derived->from_intmod == INTMOD_ISO_FORTRAN_ENV + && sym->ts.u.derived->intmod_sym_id == ISOFORTRAN_LOCK_TYPE; + + is_event_type = sym->ts.type == BT_DERIVED + && sym->ts.u.derived->from_intmod == INTMOD_ISO_FORTRAN_ENV + && sym->ts.u.derived->intmod_sym_id == ISOFORTRAN_EVENT_TYPE; + + if (is_lock_type) + return GFC_NCA_LOCK_COARRAY; + else if (is_event_type) + return GFC_NCA_EVENT_COARRAY; + else + return GFC_NCA_NORMAL_COARRAY; +} + +void +gfc_allocate_native_coarray (stmtblock_t *b, tree decl, tree size, int corank, + int alloc_type) +{ + gfc_add_expr_to_block (b, + build_call_expr_loc (input_location, gfor_fndecl_nca_coarray_allocate, + 4, gfc_build_addr_expr (pvoid_type_node, decl), + size, build_int_cst (integer_type_node, corank), + build_int_cst (integer_type_node, alloc_type))); + +} + /* Initializes the descriptor and generates a call to _gfor_allocate. Does the work for an ALLOCATE statement. */ /*GCC ARRAYS*/ @@ -5784,6 +5924,7 @@ gfc_array_allocate (gfc_se * se, gfc_expr * expr, tree status, tree errmsg, bool e3_has_nodescriptor) { tree tmp; + tree allocation; tree pointer; tree offset = NULL_TREE; tree token = NULL_TREE; @@ -5914,7 +6055,7 @@ gfc_array_allocate (gfc_se * se, gfc_expr * expr, tree status, tree errmsg, expr3_elem_size, nelems, expr3, e3_arr_desc, e3_has_nodescriptor, expr, &element_size); - if (dimension) + if (dimension || (flag_coarray == GFC_FCOARRAY_NATIVE && coarray)) { var_overflow = gfc_create_var (integer_type_node, "overflow"); gfc_add_modify (&se->pre, var_overflow, overflow); @@ -5956,7 +6097,7 @@ gfc_array_allocate (gfc_se * se, gfc_expr * expr, tree status, tree errmsg, pointer = gfc_conv_descriptor_data_get (se->expr); STRIP_NOPS (pointer); - if (allocatable) + if (allocatable && !(flag_coarray == GFC_FCOARRAY_NATIVE && coarray)) { not_prev_allocated = gfc_create_var (logical_type_node, "not_prev_allocated"); @@ -5969,8 +6110,17 @@ gfc_array_allocate (gfc_se * se, gfc_expr * expr, tree status, tree errmsg, gfc_start_block (&elseblock); + if (coarray && flag_coarray == GFC_FCOARRAY_NATIVE) + { + tree elem_size + = size_in_bytes (gfc_get_element_type (TREE_TYPE(se->expr))); + int alloc_type + = gfc_native_coarray_get_allocation_type (expr->symtree->n.sym); + gfc_allocate_native_coarray (&elseblock, se->expr, elem_size, + ref->u.ar.as->corank, alloc_type); + } /* The allocatable variant takes the old pointer as first argument. */ - if (allocatable) + else if (allocatable) gfc_allocate_allocatable (&elseblock, pointer, size, token, status, errmsg, errlen, label_finish, expr, coref != NULL ? coref->u.ar.as->corank : 0); @@ -5987,13 +6137,12 @@ gfc_array_allocate (gfc_se * se, gfc_expr * expr, tree status, tree errmsg, cond = gfc_unlikely (fold_build2_loc (input_location, NE_EXPR, logical_type_node, var_overflow, integer_zero_node), PRED_FORTRAN_OVERFLOW); - tmp = fold_build3_loc (input_location, COND_EXPR, void_type_node, cond, + allocation = fold_build3_loc (input_location, COND_EXPR, void_type_node, cond, error, gfc_finish_block (&elseblock)); } else - tmp = gfc_finish_block (&elseblock); + allocation = gfc_finish_block (&elseblock); - gfc_add_expr_to_block (&se->pre, tmp); /* Update the array descriptor with the offset and the span. */ if (dimension) @@ -6004,6 +6153,7 @@ gfc_array_allocate (gfc_se * se, gfc_expr * expr, tree status, tree errmsg, } set_descriptor = gfc_finish_block (&set_descriptor_block); + if (status != NULL_TREE) { cond = fold_build2_loc (input_location, EQ_EXPR, @@ -6014,14 +6164,25 @@ gfc_array_allocate (gfc_se * se, gfc_expr * expr, tree status, tree errmsg, cond = fold_build2_loc (input_location, TRUTH_OR_EXPR, logical_type_node, cond, not_prev_allocated); - gfc_add_expr_to_block (&se->pre, - fold_build3_loc (input_location, COND_EXPR, void_type_node, + set_descriptor = fold_build3_loc (input_location, COND_EXPR, void_type_node, cond, set_descriptor, - build_empty_stmt (input_location))); + build_empty_stmt (input_location)); + } + + // For native coarrays, the size must be set before the allocation routine + // can be called. + if (coarray && flag_coarray == GFC_FCOARRAY_NATIVE) + { + gfc_add_expr_to_block (&se->pre, set_descriptor); + gfc_add_expr_to_block (&se->pre, allocation); } else + { + gfc_add_expr_to_block (&se->pre, allocation); gfc_add_expr_to_block (&se->pre, set_descriptor); + } + return true; } @@ -6524,6 +6685,7 @@ gfc_trans_dummy_array_bias (gfc_symbol * sym, tree tmpdesc, bool optional_arg; gfc_array_spec *as; bool is_classarray = IS_CLASS_ARRAY (sym); + int eff_dimen; /* Do nothing for pointer and allocatable arrays. */ if ((sym->ts.type != BT_CLASS && sym->attr.pointer) @@ -6638,8 +6800,13 @@ gfc_trans_dummy_array_bias (gfc_symbol * sym, tree tmpdesc, offset = gfc_index_zero_node; size = gfc_index_one_node; + if (flag_coarray == GFC_FCOARRAY_NATIVE) + eff_dimen = as->rank + as->corank; + else + eff_dimen = as->rank; + /* Evaluate the bounds of the array. */ - for (n = 0; n < as->rank; n++) + for (n = 0; n < eff_dimen; n++) { if (checkparm || !as->upper[n]) { @@ -6724,7 +6891,7 @@ gfc_trans_dummy_array_bias (gfc_symbol * sym, tree tmpdesc, gfc_array_index_type, offset, tmp); /* The size of this dimension, and the stride of the next. */ - if (n + 1 < as->rank) + if (n + 1 < eff_dimen) { stride = GFC_TYPE_ARRAY_STRIDE (type, n + 1); @@ -6879,20 +7046,35 @@ gfc_get_dataptr_offset (stmtblock_t *block, tree parm, tree desc, tree offset, return; } + /* if it's a coarray with implicit this_image, add that to the offset. */ + ref = expr->ref; + if (flag_coarray == GFC_FCOARRAY_NATIVE && ref && ref->type == REF_ARRAY + && ref->u.ar.dimen_type[ref->u.ar.dimen + ref->u.ar.codimen - 1] + == DIMEN_THIS_IMAGE + && !ref->u.ar.native_coarray_argument) + offset = gfc_native_coarray_add_this_image_offset (offset, desc, + &ref->u.ar, 0, 1); + tmp = build_array_ref (desc, offset, NULL, NULL); /* Offset the data pointer for pointer assignments from arrays with subreferences; e.g. my_integer => my_type(:)%integer_component. */ if (subref) { - /* Go past the array reference. */ + /* Go past the array reference. */ for (ref = expr->ref; ref; ref = ref->next) - if (ref->type == REF_ARRAY && - ref->u.ar.type != AR_ELEMENT) - { - ref = ref->next; - break; - } + { + if (ref->type == REF_ARRAY && + ref->u.ar.type != AR_ELEMENT) + { + ref = ref->next; + break; + } + else if (flag_coarray == GFC_FCOARRAY_NATIVE && ref->type == REF_ARRAY && + ref->u.ar.dimen_type[ref->u.ar.dimen +ref->u.ar.codimen -1] + == DIMEN_THIS_IMAGE) + tmp = gfc_native_coarray_add_this_image_offset (tmp, desc, &ref->u.ar, 0, 1); + } /* Calculate the offset for each subsequent subreference. */ for (; ref; ref = ref->next) @@ -6955,7 +7137,10 @@ gfc_get_dataptr_offset (stmtblock_t *block, tree parm, tree desc, tree offset, gfc_array_index_type, stride, itmp); stride = gfc_evaluate_now (stride, block); } - + if (flag_coarray == GFC_FCOARRAY_NATIVE && + ref->u.ar.dimen_type[ref->u.ar.dimen +ref->u.ar.codimen -1] + == DIMEN_THIS_IMAGE) + tmp = gfc_native_coarray_add_this_image_offset (tmp, desc, &ref->u.ar, 0, 1); /* Apply the index to obtain the array element. */ tmp = gfc_build_array_ref (tmp, index, NULL); break; @@ -7306,6 +7491,13 @@ gfc_conv_expr_descriptor (gfc_se *se, gfc_expr *expr) else full = gfc_full_array_ref_p (info->ref, NULL); + if (flag_coarray == GFC_FCOARRAY_NATIVE && + info->ref->type == REF_ARRAY && + info->ref->u.ar.dimen_type[info->ref->u.ar.dimen + + info->ref->u.ar.codimen - 1] == + DIMEN_THIS_IMAGE) + full = 0; + if (full && !transposed_dims (ss)) { if (se->direct_byref && !se->byref_noassign) @@ -7540,9 +7732,19 @@ gfc_conv_expr_descriptor (gfc_se *se, gfc_expr *expr) tree to; tree base; tree offset; - +#if 0 /* TK */ ndim = info->ref ? info->ref->u.ar.dimen : ss->dimen; - +#else + if (info->ref) + { + if (info->ref->u.ar.native_coarray_argument) + ndim = info->ref->u.ar.dimen + info->ref->u.ar.codimen; + else + ndim = info->ref->u.ar.dimen; + } + else + ndim = ss->dimen; +#endif if (se->want_coarray) { gfc_array_ref *ar = &info->ref->u.ar; @@ -7911,7 +8113,15 @@ gfc_conv_array_parameter (gfc_se * se, gfc_expr * expr, bool g77, expr->ts.u.cl->backend_decl = tmp; se->string_length = tmp; } - +#if 0 + if (flag_coarray == GFC_FCOARRAY_NATIVE && fsym && fsym->attr.codimension && sym) + { + gfc_init_se (se, NULL); + tmp = gfc_get_symbol_decl (sym); + se->expr = gfc_build_addr_expr (NULL_TREE, tmp); + return; + } +#endif /* Is this the result of the enclosing procedure? */ this_array_result = (full_array_var && sym->attr.flavor == FL_PROCEDURE); if (this_array_result @@ -7919,6 +8129,10 @@ gfc_conv_array_parameter (gfc_se * se, gfc_expr * expr, bool g77, && (sym->backend_decl != parent)) this_array_result = false; +#if 1 /* TK */ + if (flag_coarray == GFC_FCOARRAY_NATIVE && fsym && fsym->attr.codimension) + g77 = false; +#endif /* Passing address of the array if it is not pointer or assumed-shape. */ if (full_array_var && g77 && !this_array_result && sym->ts.type != BT_DERIVED && sym->ts.type != BT_CLASS) @@ -8053,8 +8267,8 @@ gfc_conv_array_parameter (gfc_se * se, gfc_expr * expr, bool g77, { /* Every other type of array. */ se->want_pointer = 1; - gfc_conv_expr_descriptor (se, expr); + gfc_conv_expr_descriptor (se, expr); if (size) array_parameter_size (build_fold_indirect_ref_loc (input_location, se->expr), @@ -10869,9 +11083,15 @@ gfc_walk_array_ref (gfc_ss * ss, gfc_expr * expr, gfc_ref * ref) case AR_SECTION: newss = gfc_get_array_ss (ss, expr, 0, GFC_SS_SECTION); newss->info->data.array.ref = ref; - +#if 1 /* TK */ + int eff_dimen; + if (ar->native_coarray_argument) + eff_dimen = ar->dimen + ar->codimen; + else + eff_dimen = ar->dimen; +#endif /* We add SS chains for all the subscripts in the section. */ - for (n = 0; n < ar->dimen; n++) + for (n = 0; n < eff_dimen; n++) { gfc_ss *indexss; diff --git a/gcc/fortran/trans-array.h b/gcc/fortran/trans-array.h index e561605..0bfd1b0 100644 --- a/gcc/fortran/trans-array.h +++ b/gcc/fortran/trans-array.h @@ -23,6 +23,15 @@ along with GCC; see the file COPYING3. If not see bool gfc_array_allocate (gfc_se *, gfc_expr *, tree, tree, tree, tree, tree, tree *, gfc_expr *, tree, bool); +enum gfc_coarray_allocation_type { + GFC_NCA_NORMAL_COARRAY = 3, + GFC_NCA_LOCK_COARRAY, + GFC_NCA_EVENT_COARRAY +}; +int gfc_native_coarray_get_allocation_type (gfc_symbol *); + +void gfc_allocate_native_coarray (stmtblock_t *, tree, tree, int, int); + /* Allow the bounds of a loop to be set from a callee's array spec. */ void gfc_set_loop_bounds_from_array_spec (gfc_interface_mapping *, gfc_se *, gfc_array_spec *); @@ -57,6 +66,10 @@ tree gfc_bcast_alloc_comp (gfc_symbol *, gfc_expr *, int, tree, tree gfc_deallocate_alloc_comp_no_caf (gfc_symbol *, tree, int); tree gfc_reassign_alloc_comp_caf (gfc_symbol *, tree, tree); +tree gfc_array_init_size (tree, int, int, tree *, gfc_expr **, gfc_expr **, + stmtblock_t *, stmtblock_t *, tree *, tree, tree *, + gfc_expr *, tree, bool, gfc_expr *, tree *); + tree gfc_copy_alloc_comp (gfc_symbol *, tree, tree, int, int); tree gfc_copy_only_alloc_comp (gfc_symbol *, tree, tree, int); diff --git a/gcc/fortran/trans-decl.c b/gcc/fortran/trans-decl.c index 9224277..1973c4d 100644 --- a/gcc/fortran/trans-decl.c +++ b/gcc/fortran/trans-decl.c @@ -170,6 +170,21 @@ tree gfor_fndecl_co_reduce; tree gfor_fndecl_co_sum; tree gfor_fndecl_caf_is_present; +/* Native coarray functions. */ + +tree gfor_fndecl_nca_master; +tree gfor_fndecl_nca_coarray_allocate; +tree gfor_fndecl_nca_coarray_free; +tree gfor_fndecl_nca_this_image; +tree gfor_fndecl_nca_num_images; +tree gfor_fndecl_nca_sync_all; +tree gfor_fndecl_nca_sync_images; +tree gfor_fndecl_nca_lock; +tree gfor_fndecl_nca_unlock; +tree gfor_fndecl_nca_reduce_scalar; +tree gfor_fndecl_nca_reduce_array; +tree gfor_fndecl_nca_broadcast_scalar; +tree gfor_fndecl_nca_broadcast_array; /* Math functions. Many other math functions are handled in trans-intrinsic.c. */ @@ -961,6 +976,7 @@ gfc_build_qualified_array (tree decl, gfc_symbol * sym) tree type; int dim; int nest; + int eff_dimen; gfc_namespace* procns; symbol_attribute *array_attr; gfc_array_spec *as; @@ -1031,8 +1047,12 @@ gfc_build_qualified_array (tree decl, gfc_symbol * sym) else gfc_add_decl_to_function (token); } + + eff_dimen = flag_coarray == GFC_FCOARRAY_NATIVE + ? GFC_TYPE_ARRAY_RANK (type) + GFC_TYPE_ARRAY_CORANK (type) + : GFC_TYPE_ARRAY_RANK (type); - for (dim = 0; dim < GFC_TYPE_ARRAY_RANK (type); dim++) + for (dim = 0; dim < eff_dimen; dim++) { if (GFC_TYPE_ARRAY_LBOUND (type, dim) == NULL_TREE) { @@ -1054,22 +1074,30 @@ gfc_build_qualified_array (tree decl, gfc_symbol * sym) TREE_NO_WARNING (GFC_TYPE_ARRAY_STRIDE (type, dim)) = 1; } } - for (dim = GFC_TYPE_ARRAY_RANK (type); - dim < GFC_TYPE_ARRAY_RANK (type) + GFC_TYPE_ARRAY_CORANK (type); dim++) - { - if (GFC_TYPE_ARRAY_LBOUND (type, dim) == NULL_TREE) - { - GFC_TYPE_ARRAY_LBOUND (type, dim) = create_index_var ("lbound", nest); - TREE_NO_WARNING (GFC_TYPE_ARRAY_LBOUND (type, dim)) = 1; - } - /* Don't try to use the unknown ubound for the last coarray dimension. */ - if (GFC_TYPE_ARRAY_UBOUND (type, dim) == NULL_TREE - && dim < GFC_TYPE_ARRAY_RANK (type) + GFC_TYPE_ARRAY_CORANK (type) - 1) - { - GFC_TYPE_ARRAY_UBOUND (type, dim) = create_index_var ("ubound", nest); - TREE_NO_WARNING (GFC_TYPE_ARRAY_UBOUND (type, dim)) = 1; - } - } + + if (flag_coarray != GFC_FCOARRAY_NATIVE) + for (dim = GFC_TYPE_ARRAY_RANK (type); + dim < GFC_TYPE_ARRAY_RANK (type) + GFC_TYPE_ARRAY_CORANK (type); + dim++) + { + if (GFC_TYPE_ARRAY_LBOUND (type, dim) == NULL_TREE) + { + GFC_TYPE_ARRAY_LBOUND (type, dim) + = create_index_var ("lbound", nest); + TREE_NO_WARNING (GFC_TYPE_ARRAY_LBOUND (type, dim)) = 1; + } + /* Don't try to use the unknown ubound for the last coarray + dimension. */ + if (GFC_TYPE_ARRAY_UBOUND (type, dim) == NULL_TREE + && dim < GFC_TYPE_ARRAY_RANK (type) + + GFC_TYPE_ARRAY_CORANK (type) - 1) + { + GFC_TYPE_ARRAY_UBOUND (type, dim) + = create_index_var ("ubound", nest); + TREE_NO_WARNING (GFC_TYPE_ARRAY_UBOUND (type, dim)) = 1; + } + } + if (GFC_TYPE_ARRAY_OFFSET (type) == NULL_TREE) { GFC_TYPE_ARRAY_OFFSET (type) = gfc_create_var_np (gfc_array_index_type, @@ -1202,6 +1230,10 @@ gfc_build_dummy_array_decl (gfc_symbol * sym, tree dummy) || (as && as->type == AS_ASSUMED_RANK)) return dummy; + if (flag_coarray == GFC_FCOARRAY_NATIVE && sym->attr.codimension + && sym->attr.allocatable) + return dummy; + /* Add to list of variables if not a fake result variable. These symbols are set on the symbol only, not on the class component. */ if (sym->attr.result || sym->attr.dummy) @@ -1504,7 +1536,6 @@ add_attributes_to_decl (symbol_attribute sym_attr, tree list) static void build_function_decl (gfc_symbol * sym, bool global); - /* Return the decl for a gfc_symbol, create it if it doesn't already exist. */ @@ -1820,7 +1851,7 @@ gfc_get_symbol_decl (gfc_symbol * sym) } /* Remember this variable for allocation/cleanup. */ - if (sym->attr.dimension || sym->attr.allocatable || sym->attr.codimension + if (sym->attr.dimension || sym->attr.codimension || sym->attr.allocatable || (sym->ts.type == BT_CLASS && (CLASS_DATA (sym)->attr.dimension || CLASS_DATA (sym)->attr.allocatable)) @@ -1869,6 +1900,9 @@ gfc_get_symbol_decl (gfc_symbol * sym) gcc_assert (!sym->value || sym->value->expr_type == EXPR_NULL); } + if (flag_coarray == GFC_FCOARRAY_NATIVE && sym->attr.codimension) + TREE_STATIC(decl) = 1; + gfc_finish_var_decl (decl, sym); if (sym->ts.type == BT_CHARACTER) @@ -3693,6 +3727,7 @@ void gfc_build_builtin_function_decls (void) { tree gfc_int8_type_node = gfc_get_int_type (8); + tree pint_type = build_pointer_type (integer_type_node); gfor_fndecl_stop_numeric = gfc_build_library_function_decl ( get_identifier (PREFIX("stop_numeric")), @@ -3820,9 +3855,8 @@ gfc_build_builtin_function_decls (void) /* Coarray library calls. */ if (flag_coarray == GFC_FCOARRAY_LIB) { - tree pint_type, pppchar_type; + tree pppchar_type; - pint_type = build_pointer_type (integer_type_node); pppchar_type = build_pointer_type (build_pointer_type (pchar_type_node)); @@ -4062,6 +4096,64 @@ gfc_build_builtin_function_decls (void) integer_type_node, 3, pvoid_type_node, integer_type_node, pvoid_type_node); } + else if (flag_coarray == GFC_FCOARRAY_NATIVE) + { + gfor_fndecl_nca_master = gfc_build_library_function_decl_with_spec ( + get_identifier (PREFIX("nca_master")), ".r", integer_type_node, 1, + build_pointer_type (build_function_type_list (void_type_node, NULL_TREE))); + gfor_fndecl_nca_coarray_allocate = gfc_build_library_function_decl_with_spec ( + get_identifier (PREFIX("nca_coarray_alloc")), "..RRR", integer_type_node, 4, + pvoid_type_node, integer_type_node, integer_type_node, integer_type_node, + NULL_TREE); + gfor_fndecl_nca_coarray_free = gfc_build_library_function_decl_with_spec ( + get_identifier (PREFIX("nca_coarray_free")), "..RR", integer_type_node, 3, + pvoid_type_node, integer_type_node, integer_type_node, NULL_TREE); + gfor_fndecl_nca_this_image = gfc_build_library_function_decl_with_spec ( + get_identifier (PREFIX("nca_coarray_this_image")), ".X", integer_type_node, 1, + integer_type_node, NULL_TREE); + DECL_PURE_P (gfor_fndecl_nca_this_image) = 1; + gfor_fndecl_nca_num_images = gfc_build_library_function_decl_with_spec ( + get_identifier (PREFIX("nca_coarray_num_images")), ".X", integer_type_node, 1, + integer_type_node, NULL_TREE); + DECL_PURE_P (gfor_fndecl_nca_num_images) = 1; + gfor_fndecl_nca_sync_all = gfc_build_library_function_decl_with_spec ( + get_identifier (PREFIX("nca_coarray_sync_all")), ".X", void_type_node, 1, + build_pointer_type (integer_type_node), NULL_TREE); + gfor_fndecl_nca_sync_images = gfc_build_library_function_decl_with_spec ( + get_identifier (PREFIX("nca_sync_images")), ".RRXXX", void_type_node, + 5, integer_type_node, pint_type, pint_type, + pchar_type_node, size_type_node, NULL_TREE); + gfor_fndecl_nca_lock = gfc_build_library_function_decl_with_spec ( + get_identifier (PREFIX("nca_lock")), ".w", void_type_node, 1, + pvoid_type_node, NULL_TREE); + gfor_fndecl_nca_unlock = gfc_build_library_function_decl_with_spec ( + get_identifier (PREFIX("nca_unlock")), ".w", void_type_node, 1, + pvoid_type_node, NULL_TREE); + + gfor_fndecl_nca_reduce_scalar = + gfc_build_library_function_decl_with_spec ( + get_identifier (PREFIX("nca_collsub_reduce_scalar")), ".wrW", + void_type_node, 3, pvoid_type_node, + build_pointer_type (build_function_type_list (void_type_node, + pvoid_type_node, pvoid_type_node, NULL_TREE)), + pint_type, NULL_TREE); + + gfor_fndecl_nca_reduce_array = + gfc_build_library_function_decl_with_spec ( + get_identifier (PREFIX("nca_collsub_reduce_array")), ".wrWR", + void_type_node, 4, pvoid_type_node, + build_pointer_type (build_function_type_list (void_type_node, + pvoid_type_node, pvoid_type_node, NULL_TREE)), + pint_type, integer_type_node, NULL_TREE); + + gfor_fndecl_nca_broadcast_scalar = gfc_build_library_function_decl_with_spec ( + get_identifier (PREFIX ("nca_collsub_broadcast_scalar")), ".w..", + void_type_node, 3, pvoid_type_node, size_type_node, integer_type_node); + gfor_fndecl_nca_broadcast_array = gfc_build_library_function_decl_with_spec ( + get_identifier (PREFIX ("nca_collsub_broadcast_array")), ".W.", + void_type_node, 2, pvoid_type_node, integer_type_node); + } + gfc_build_intrinsic_function_decls (); gfc_build_intrinsic_lib_fndecls (); @@ -4538,6 +4630,76 @@ get_proc_result (gfc_symbol* sym) } +void +gfc_trans_native_coarray (stmtblock_t * init, stmtblock_t *cleanup, gfc_symbol * sym) +{ + tree tmp, decl; + tree overflow = build_int_cst (integer_type_node, 0), nelems, element_size; //All unused + tree offset; + tree elem_size; + int alloc_type; + + decl = sym->backend_decl; + + TREE_STATIC(decl) = 1; + + /* Tell the library to handle arrays of locks and event types seperatly. */ + alloc_type = gfc_native_coarray_get_allocation_type (sym); + + if (init) + { + gfc_array_init_size (decl, sym->as->rank, sym->as->corank, &offset, + sym->as->lower, sym->as->upper, init, + init, &overflow, + NULL_TREE, &nelems, NULL, + NULL_TREE, true, NULL, &element_size); + gfc_conv_descriptor_offset_set (init, decl, offset); + elem_size = size_in_bytes (gfc_get_element_type (TREE_TYPE(decl))); + gfc_allocate_native_coarray (init, decl, elem_size, sym->as->corank, + alloc_type); + } + + if (cleanup) + { + tmp = build_call_expr_loc (input_location, gfor_fndecl_nca_coarray_free, + 2, gfc_build_addr_expr (pvoid_type_node, decl), + build_int_cst (integer_type_node, alloc_type), + build_int_cst (integer_type_node, + sym->as->corank)); + gfc_add_expr_to_block (cleanup, tmp); + } +} + +static void +finish_coarray_constructor_function (tree *, tree *); + +static void +generate_coarray_constructor_function (tree *, tree *); + +static void +gfc_trans_native_coarray_static (gfc_symbol * sym) +{ + tree save_fn_decl, fndecl; + generate_coarray_constructor_function (&save_fn_decl, &fndecl); + gfc_trans_native_coarray (&caf_init_block, NULL, sym); + finish_coarray_constructor_function (&save_fn_decl, &fndecl); +} + +static void +gfc_trans_native_coarray_inline (gfc_wrapped_block * block, gfc_symbol * sym) +{ + stmtblock_t init, cleanup; + + gfc_init_block (&init); + gfc_init_block (&cleanup); + + gfc_trans_native_coarray (&init, &cleanup, sym); + + gfc_add_init_cleanup (block, gfc_finish_block (&init), gfc_finish_block (&cleanup)); +} + + + /* Generate function entry and exit code, and add it to the function body. This includes: Allocation and initialization of array variables. @@ -4833,7 +4995,8 @@ gfc_trans_deferred_vars (gfc_symbol * proc_sym, gfc_wrapped_block * block) gfc_trans_deferred_array (sym, block); } } - else if (sym->attr.codimension + else if (flag_coarray != GFC_FCOARRAY_NATIVE + && sym->attr.codimension && TREE_STATIC (sym->backend_decl)) { gfc_init_block (&tmpblock); @@ -4843,6 +5006,11 @@ gfc_trans_deferred_vars (gfc_symbol * proc_sym, gfc_wrapped_block * block) NULL_TREE); continue; } + else if (flag_coarray == GFC_FCOARRAY_NATIVE + && sym->attr.codimension) + { + gfc_trans_native_coarray_inline (block, sym); + } else { gfc_save_backend_locus (&loc); @@ -5333,6 +5501,10 @@ gfc_create_module_variable (gfc_symbol * sym) && sym->fn_result_spec)); DECL_CONTEXT (decl) = sym->ns->proc_name->backend_decl; rest_of_decl_compilation (decl, 1, 0); + + if (flag_coarray == GFC_FCOARRAY_NATIVE && sym->attr.codimension) + gfc_trans_native_coarray_static (sym); + gfc_module_add_decl (cur_module, decl); /* Also add length of strings. */ @@ -5730,64 +5902,82 @@ generate_coarray_sym_init (gfc_symbol *sym) } -/* Generate constructor function to initialize static, nonallocatable - coarrays. */ static void -generate_coarray_init (gfc_namespace * ns __attribute((unused))) +generate_coarray_constructor_function (tree *save_fn_decl, tree *fndecl) { - tree fndecl, tmp, decl, save_fn_decl; + tree tmp, decl; - save_fn_decl = current_function_decl; + *save_fn_decl = current_function_decl; push_function_context (); tmp = build_function_type_list (void_type_node, NULL_TREE); - fndecl = build_decl (input_location, FUNCTION_DECL, - create_tmp_var_name ("_caf_init"), tmp); + *fndecl = build_decl (input_location, FUNCTION_DECL, + create_tmp_var_name (flag_coarray == GFC_FCOARRAY_LIB ? "_caf_init" : "_nca_init"), tmp); - DECL_STATIC_CONSTRUCTOR (fndecl) = 1; - SET_DECL_INIT_PRIORITY (fndecl, DEFAULT_INIT_PRIORITY); + DECL_STATIC_CONSTRUCTOR (*fndecl) = 1; + SET_DECL_INIT_PRIORITY (*fndecl, DEFAULT_INIT_PRIORITY); decl = build_decl (input_location, RESULT_DECL, NULL_TREE, void_type_node); DECL_ARTIFICIAL (decl) = 1; DECL_IGNORED_P (decl) = 1; - DECL_CONTEXT (decl) = fndecl; - DECL_RESULT (fndecl) = decl; + DECL_CONTEXT (decl) = *fndecl; + DECL_RESULT (*fndecl) = decl; - pushdecl (fndecl); - current_function_decl = fndecl; - announce_function (fndecl); + pushdecl (*fndecl); + current_function_decl = *fndecl; + announce_function (*fndecl); - rest_of_decl_compilation (fndecl, 0, 0); - make_decl_rtl (fndecl); - allocate_struct_function (fndecl, false); + rest_of_decl_compilation (*fndecl, 0, 0); + make_decl_rtl (*fndecl); + allocate_struct_function (*fndecl, false); pushlevel (); gfc_init_block (&caf_init_block); +} - gfc_traverse_ns (ns, generate_coarray_sym_init); +static void +finish_coarray_constructor_function (tree *save_fn_decl, tree *fndecl) +{ + tree decl; - DECL_SAVED_TREE (fndecl) = gfc_finish_block (&caf_init_block); + DECL_SAVED_TREE (*fndecl) = gfc_finish_block (&caf_init_block); decl = getdecls (); poplevel (1, 1); - BLOCK_SUPERCONTEXT (DECL_INITIAL (fndecl)) = fndecl; + BLOCK_SUPERCONTEXT (DECL_INITIAL (*fndecl)) = *fndecl; - DECL_SAVED_TREE (fndecl) - = build3_v (BIND_EXPR, decl, DECL_SAVED_TREE (fndecl), - DECL_INITIAL (fndecl)); - dump_function (TDI_original, fndecl); + DECL_SAVED_TREE (*fndecl) + = build3_v (BIND_EXPR, decl, DECL_SAVED_TREE (*fndecl), + DECL_INITIAL (*fndecl)); + dump_function (TDI_original, *fndecl); cfun->function_end_locus = input_location; set_cfun (NULL); - if (decl_function_context (fndecl)) - (void) cgraph_node::create (fndecl); + if (decl_function_context (*fndecl)) + (void) cgraph_node::create (*fndecl); else - cgraph_node::finalize_function (fndecl, true); + cgraph_node::finalize_function (*fndecl, true); pop_function_context (); - current_function_decl = save_fn_decl; + current_function_decl = *save_fn_decl; +} + +/* Generate constructor function to initialize static, nonallocatable + coarrays. */ + +static void +generate_coarray_init (gfc_namespace * ns) +{ + tree save_fn_decl, fndecl; + + generate_coarray_constructor_function (&save_fn_decl, &fndecl); + + gfc_traverse_ns (ns, generate_coarray_sym_init); + + finish_coarray_constructor_function (&save_fn_decl, &fndecl); + } @@ -6470,7 +6660,11 @@ create_main_function (tree fndecl) } /* Call MAIN__(). */ - tmp = build_call_expr_loc (input_location, + if (flag_coarray == GFC_FCOARRAY_NATIVE) + tmp = build_call_expr_loc (input_location, gfor_fndecl_nca_master, 1, + gfc_build_addr_expr (NULL, fndecl)); + else + tmp = build_call_expr_loc (input_location, fndecl, 0); gfc_add_expr_to_block (&body, tmp); diff --git a/gcc/fortran/trans-expr.c b/gcc/fortran/trans-expr.c index 36ff9b5..9979980 100644 --- a/gcc/fortran/trans-expr.c +++ b/gcc/fortran/trans-expr.c @@ -2622,8 +2622,14 @@ gfc_maybe_dereference_var (gfc_symbol *sym, tree var, bool descriptor_only_p, } else if (!sym->attr.value) { + + /* Do not derefernce native coarray dummies. */ + if (false && flag_coarray == GFC_FCOARRAY_NATIVE + && sym->attr.codimension && sym->attr.dummy) + return var; + /* Dereference temporaries for class array dummy arguments. */ - if (sym->attr.dummy && is_classarray + else if (sym->attr.dummy && is_classarray && GFC_ARRAY_TYPE_P (TREE_TYPE (var))) { if (!descriptor_only_p) @@ -2635,6 +2641,7 @@ gfc_maybe_dereference_var (gfc_symbol *sym, tree var, bool descriptor_only_p, /* Dereference non-character scalar dummy arguments. */ if (sym->attr.dummy && !sym->attr.dimension && !(sym->attr.codimension && sym->attr.allocatable) + && !(sym->attr.codimension && flag_coarray == GFC_FCOARRAY_NATIVE) && (sym->ts.type != BT_CLASS || (!CLASS_DATA (sym)->attr.dimension && !(CLASS_DATA (sym)->attr.codimension @@ -2670,6 +2677,7 @@ gfc_maybe_dereference_var (gfc_symbol *sym, tree var, bool descriptor_only_p, || CLASS_DATA (sym)->attr.allocatable || CLASS_DATA (sym)->attr.class_pointer)) var = build_fold_indirect_ref_loc (input_location, var); + /* And the case where a non-dummy, non-result, non-function, non-allotable and non-pointer classarray is present. This case was previously covered by the first if, but with introducing the @@ -5528,7 +5536,10 @@ gfc_conv_procedure_call (gfc_se * se, gfc_symbol * sym, nodesc_arg = nodesc_arg || !comp->attr.always_explicit; else nodesc_arg = nodesc_arg || !sym->attr.always_explicit; - +#if 0 + if (flag_coarray == GFC_FCOARRAY_NATIVE && fsym->attr.codimension) + nodesc_arg = false; +#endif /* Class array expressions are sometimes coming completely unadorned with either arrayspec or _data component. Correct that here. OOP-TODO: Move this to the frontend. */ @@ -5720,7 +5731,10 @@ gfc_conv_procedure_call (gfc_se * se, gfc_symbol * sym, parmse.want_coarray = 1; scalar = false; } - +#if 0 + if (flag_coarray == GFC_FCOARRAY_NATIVE && fsym->attr.codimension) + scalar = false; +#endif /* A scalar or transformational function. */ if (scalar) { @@ -6233,7 +6247,7 @@ gfc_conv_procedure_call (gfc_se * se, gfc_symbol * sym, else gfc_conv_array_parameter (&parmse, e, nodesc_arg, fsym, sym->name, NULL); - + /* Unallocated allocatable arrays and unassociated pointer arrays need their dtype setting if they are argument associated with assumed rank dummies. */ diff --git a/gcc/fortran/trans-intrinsic.c b/gcc/fortran/trans-intrinsic.c index 32fe988..b418321 100644 --- a/gcc/fortran/trans-intrinsic.c +++ b/gcc/fortran/trans-intrinsic.c @@ -41,6 +41,7 @@ along with GCC; see the file COPYING3. If not see #include "trans-array.h" #include "dependency.h" /* For CAF array alias analysis. */ /* Only for gfc_trans_assign and gfc_trans_pointer_assign. */ +#include "trans-stmt.h" /* This maps Fortran intrinsic math functions to external library or GCC builtin functions. */ @@ -2363,7 +2364,6 @@ conv_caf_send (gfc_code *code) { return gfc_finish_block (&block); } - static void trans_this_image (gfc_se * se, gfc_expr *expr) { @@ -2394,14 +2394,18 @@ trans_this_image (gfc_se * se, gfc_expr *expr) } else tmp = integer_zero_node; - tmp = build_call_expr_loc (input_location, gfor_fndecl_caf_this_image, 1, - tmp); + tmp = build_call_expr_loc (input_location, + flag_coarray == GFC_FCOARRAY_NATIVE ? + gfor_fndecl_nca_this_image : + gfor_fndecl_caf_this_image, + 1, tmp); se->expr = fold_convert (gfc_get_int_type (gfc_default_integer_kind), tmp); return; } /* Coarray-argument version: THIS_IMAGE(coarray [, dim]). */ + /* TODO: NCA handle native coarrays. */ type = gfc_get_int_type (gfc_default_integer_kind); corank = gfc_get_corank (expr->value.function.actual->expr); @@ -2490,8 +2494,11 @@ trans_this_image (gfc_se * se, gfc_expr *expr) */ /* this_image () - 1. */ - tmp = build_call_expr_loc (input_location, gfor_fndecl_caf_this_image, 1, - integer_zero_node); + tmp = build_call_expr_loc (input_location, + flag_coarray == GFC_FCOARRAY_NATIVE + ? gfor_fndecl_nca_this_image + : gfor_fndecl_caf_this_image, + 1, integer_zero_node); tmp = fold_build2_loc (input_location, MINUS_EXPR, type, fold_convert (type, tmp), build_int_cst (type, 1)); if (corank == 1) @@ -2774,7 +2781,10 @@ trans_image_index (gfc_se * se, gfc_expr *expr) num_images = build_int_cst (type, 1); else { - tmp = build_call_expr_loc (input_location, gfor_fndecl_caf_num_images, 2, + tmp = build_call_expr_loc (input_location, + flag_coarray == GFC_FCOARRAY_NATIVE + ? gfor_fndecl_nca_num_images + : gfor_fndecl_caf_num_images, 2, integer_zero_node, build_int_cst (integer_type_node, -1)); num_images = fold_convert (type, tmp); @@ -2819,8 +2829,13 @@ trans_num_images (gfc_se * se, gfc_expr *expr) } else failed = build_int_cst (integer_type_node, -1); - tmp = build_call_expr_loc (input_location, gfor_fndecl_caf_num_images, 2, - distance, failed); + + if (flag_coarray == GFC_FCOARRAY_NATIVE) + tmp = build_call_expr_loc (input_location, gfor_fndecl_nca_num_images, 1, + distance); + else + tmp = build_call_expr_loc (input_location, gfor_fndecl_caf_num_images, 2, + distance, failed); se->expr = fold_convert (gfc_get_int_type (gfc_default_integer_kind), tmp); } @@ -3264,7 +3279,10 @@ conv_intrinsic_cobound (gfc_se * se, gfc_expr * expr) tree cosize; cosize = gfc_conv_descriptor_cosize (desc, arg->expr->rank, corank); - tmp = build_call_expr_loc (input_location, gfor_fndecl_caf_num_images, + tmp = build_call_expr_loc (input_location, + flag_coarray == GFC_FCOARRAY_NATIVE + ? gfor_fndecl_nca_num_images + : gfor_fndecl_caf_num_images, 2, integer_zero_node, build_int_cst (integer_type_node, -1)); tmp = fold_build2_loc (input_location, MINUS_EXPR, @@ -3280,7 +3298,9 @@ conv_intrinsic_cobound (gfc_se * se, gfc_expr * expr) else if (flag_coarray != GFC_FCOARRAY_SINGLE) { /* ubound = lbound + num_images() - 1. */ - tmp = build_call_expr_loc (input_location, gfor_fndecl_caf_num_images, + tmp = build_call_expr_loc (input_location, + flag_coarray == GFC_FCOARRAY_NATIVE ? gfor_fndecl_nca_num_images : + gfor_fndecl_caf_num_images, 2, integer_zero_node, build_int_cst (integer_type_node, -1)); tmp = fold_build2_loc (input_location, MINUS_EXPR, @@ -11004,6 +11024,136 @@ gfc_walk_intrinsic_function (gfc_ss * ss, gfc_expr * expr, } } +/* Helper function - advance to the next argument. */ + +static tree +trans_argument (gfc_actual_arglist **curr_al, stmtblock_t *blk, + stmtblock_t *postblk, gfc_se *argse, tree def) +{ + if (!(*curr_al)->expr) + return def; + if ((*curr_al)->expr->rank > 0) + gfc_conv_expr_descriptor (argse, (*curr_al)->expr); + else + gfc_conv_expr (argse, (*curr_al)->expr); + gfc_add_block_to_block (blk, &argse->pre); + gfc_add_block_to_block (postblk, &argse->post); + *curr_al = (*curr_al)->next; + return argse->expr; +} + +/* Convert CO_REDUCE for native coarrays. */ + +static tree +conv_nca_reduce (gfc_code *code, stmtblock_t *blk, stmtblock_t *postblk) +{ + gfc_actual_arglist *curr_al; + tree var, reduce_op, result_image, elem_size; + gfc_se argse; + int is_array; + + curr_al = code->ext.actual; + + gfc_init_se (&argse, NULL); + argse.want_pointer = 1; + is_array = curr_al->expr->rank > 0; + var = trans_argument (&curr_al, blk, postblk, &argse, NULL_TREE); + + gfc_init_se (&argse, NULL); + argse.want_pointer = 1; + reduce_op = trans_argument (&curr_al, blk, postblk, &argse, NULL_TREE); + + gfc_init_se (&argse, NULL); + argse.want_pointer = 1; + result_image = trans_argument (&curr_al, blk, postblk, &argse, + null_pointer_node); + + if (is_array) + return build_call_expr_loc (input_location, gfor_fndecl_nca_reduce_array, + 3, var, reduce_op, result_image); + + elem_size = size_in_bytes(TREE_TYPE(TREE_TYPE(var))); + return build_call_expr_loc (input_location, gfor_fndecl_nca_reduce_scalar, 4, + var, elem_size, reduce_op, result_image); +} + +static tree +conv_nca_broadcast (gfc_code *code, stmtblock_t *blk, stmtblock_t *postblk) +{ + gfc_actual_arglist *curr_al; + tree var, source_image, elem_size; + gfc_se argse; + int is_array; + + curr_al = code->ext.actual; + + gfc_init_se (&argse, NULL); + argse.want_pointer = 1; + is_array = curr_al->expr->rank > 0; + var = trans_argument (&curr_al, blk, postblk, &argse, NULL_TREE); + + gfc_init_se (&argse, NULL); + argse.want_pointer = 0; + source_image = trans_argument (&curr_al, blk, postblk, &argse, NULL_TREE); + + if (is_array) + return build_call_expr_loc (input_location, gfor_fndecl_nca_broadcast_array, + 2, var, source_image); + + elem_size = size_in_bytes(TREE_TYPE(TREE_TYPE(var))); + return build_call_expr_loc (input_location, gfor_fndecl_nca_broadcast_scalar, + 3, var, elem_size, source_image); +} + +static tree conv_co_collective (gfc_code *); + +/* Convert collective subroutines for native coarrays. */ + +static tree +conv_nca_collective (gfc_code *code) +{ + + switch (code->resolved_isym->id) + { + case GFC_ISYM_CO_REDUCE: + { + stmtblock_t block, postblock; + tree fcall; + + gfc_start_block (&block); + gfc_init_block (&postblock); + fcall = conv_nca_reduce (code, &block, &postblock); + gfc_add_expr_to_block (&block, fcall); + gfc_add_block_to_block (&block, &postblock); + return gfc_finish_block (&block); + } + case GFC_ISYM_CO_SUM: + case GFC_ISYM_CO_MIN: + case GFC_ISYM_CO_MAX: + return gfc_trans_call (code, false, NULL_TREE, NULL_TREE, false); + + case GFC_ISYM_CO_BROADCAST: + { + stmtblock_t block, postblock; + tree fcall; + + gfc_start_block (&block); + gfc_init_block (&postblock); + fcall = conv_nca_broadcast (code, &block, &postblock); + gfc_add_expr_to_block (&block, fcall); + gfc_add_block_to_block (&block, &postblock); + return gfc_finish_block (&block); + } +#if 0 + case GFC_ISYM_CO_BROADCAST: + return conv_co_collective (code); +#endif + default: + gfc_internal_error ("Invalid or unsupported isym"); + break; + } +} + static tree conv_co_collective (gfc_code *code) { @@ -11111,7 +11261,13 @@ conv_co_collective (gfc_code *code) errmsg_len = build_zero_cst (size_type_node); } + /* For native coarrays, we only come here for CO_BROADCAST. */ + + gcc_assert (code->resolved_isym->id == GFC_ISYM_CO_BROADCAST + || flag_coarray != GFC_FCOARRAY_NATIVE); + /* Generate the function call. */ + switch (code->resolved_isym->id) { case GFC_ISYM_CO_BROADCAST: @@ -12104,7 +12260,10 @@ gfc_conv_intrinsic_subroutine (gfc_code *code) case GFC_ISYM_CO_MAX: case GFC_ISYM_CO_REDUCE: case GFC_ISYM_CO_SUM: - res = conv_co_collective (code); + if (flag_coarray == GFC_FCOARRAY_NATIVE) + res = conv_nca_collective (code); + else + res = conv_co_collective (code); break; case GFC_ISYM_FREE: diff --git a/gcc/fortran/trans-stmt.c b/gcc/fortran/trans-stmt.c index 1f183b9..4897fa1 100644 --- a/gcc/fortran/trans-stmt.c +++ b/gcc/fortran/trans-stmt.c @@ -830,7 +830,9 @@ gfc_trans_lock_unlock (gfc_code *code, gfc_exec_op op) /* Short cut: For single images without STAT= or LOCK_ACQUIRED return early. (ERRMSG= is always untouched for -fcoarray=single.) */ - if (!code->expr2 && !code->expr4 && flag_coarray != GFC_FCOARRAY_LIB) + if (!code->expr2 && !code->expr4 + && !(flag_coarray == GFC_FCOARRAY_LIB + || flag_coarray == GFC_FCOARRAY_NATIVE)) return NULL_TREE; if (code->expr2) @@ -990,6 +992,29 @@ gfc_trans_lock_unlock (gfc_code *code, gfc_exec_op op) return gfc_finish_block (&se.pre); } + else if (flag_coarray == GFC_FCOARRAY_NATIVE) + { + gfc_se arg; + stmtblock_t res; + tree call; + tree tmp; + + gfc_init_se (&arg, NULL); + gfc_start_block (&res); + gfc_conv_expr (&arg, code->expr1); + gfc_add_block_to_block (&res, &arg.pre); + call = build_call_expr_loc (input_location, op == EXEC_LOCK ? + gfor_fndecl_nca_lock + : gfor_fndecl_nca_unlock, + 1, fold_convert (pvoid_type_node, + gfc_build_addr_expr (NULL, arg.expr))); + gfc_add_expr_to_block (&res, call); + gfc_add_block_to_block (&res, &arg.post); + tmp = gfc_trans_memory_barrier (); + gfc_add_expr_to_block (&res, tmp); + + return gfc_finish_block (&res); + } if (stat != NULL_TREE) gfc_add_modify (&se.pre, stat, build_int_cst (TREE_TYPE (stat), 0)); @@ -1183,7 +1208,8 @@ gfc_trans_sync (gfc_code *code, gfc_exec_op type) /* Short cut: For single images without bound checking or without STAT=, return early. (ERRMSG= is always untouched for -fcoarray=single.) */ if (!code->expr2 && !(gfc_option.rtcheck & GFC_RTCHECK_BOUNDS) - && flag_coarray != GFC_FCOARRAY_LIB) + && flag_coarray != GFC_FCOARRAY_LIB + && flag_coarray != GFC_FCOARRAY_NATIVE) return NULL_TREE; gfc_init_se (&se, NULL); @@ -1206,7 +1232,7 @@ gfc_trans_sync (gfc_code *code, gfc_exec_op type) else stat = null_pointer_node; - if (code->expr3 && flag_coarray == GFC_FCOARRAY_LIB) + if (code->expr3 && (flag_coarray == GFC_FCOARRAY_LIB || flag_coarray == GFC_FCOARRAY_NATIVE)) { gcc_assert (code->expr3->expr_type == EXPR_VARIABLE); gfc_init_se (&argse, NULL); @@ -1216,7 +1242,7 @@ gfc_trans_sync (gfc_code *code, gfc_exec_op type) errmsg = gfc_build_addr_expr (NULL, argse.expr); errmsglen = fold_convert (size_type_node, argse.string_length); } - else if (flag_coarray == GFC_FCOARRAY_LIB) + else if (flag_coarray == GFC_FCOARRAY_LIB || flag_coarray == GFC_FCOARRAY_NATIVE) { errmsg = null_pointer_node; errmsglen = build_int_cst (size_type_node, 0); @@ -1229,7 +1255,7 @@ gfc_trans_sync (gfc_code *code, gfc_exec_op type) { tree images2 = fold_convert (integer_type_node, images); tree cond; - if (flag_coarray != GFC_FCOARRAY_LIB) + if (flag_coarray != GFC_FCOARRAY_LIB && flag_coarray != GFC_FCOARRAY_NATIVE) cond = fold_build2_loc (input_location, NE_EXPR, logical_type_node, images, build_int_cst (TREE_TYPE (images), 1)); else @@ -1253,17 +1279,13 @@ gfc_trans_sync (gfc_code *code, gfc_exec_op type) /* Per F2008, 8.5.1, a SYNC MEMORY is implied by calling the image control statements SYNC IMAGES and SYNC ALL. */ - if (flag_coarray == GFC_FCOARRAY_LIB) + if (flag_coarray == GFC_FCOARRAY_LIB || flag_coarray == GFC_FCOARRAY_NATIVE) { - tmp = gfc_build_string_const (strlen ("memory")+1, "memory"), - tmp = build5_loc (input_location, ASM_EXPR, void_type_node, - gfc_build_string_const (1, ""), NULL_TREE, NULL_TREE, - tree_cons (NULL_TREE, tmp, NULL_TREE), NULL_TREE); - ASM_VOLATILE_P (tmp) = 1; + tmp = gfc_trans_memory_barrier (); gfc_add_expr_to_block (&se.pre, tmp); } - if (flag_coarray != GFC_FCOARRAY_LIB) + if (flag_coarray != GFC_FCOARRAY_LIB && flag_coarray != GFC_FCOARRAY_NATIVE) { /* Set STAT to zero. */ if (code->expr2) @@ -1285,8 +1307,14 @@ gfc_trans_sync (gfc_code *code, gfc_exec_op type) tmp = build_call_expr_loc (input_location, gfor_fndecl_caf_sync_memory, 3, stat, errmsg, errmsglen); else - tmp = build_call_expr_loc (input_location, gfor_fndecl_caf_sync_all, - 3, stat, errmsg, errmsglen); + { + if (flag_coarray == GFC_FCOARRAY_LIB) + tmp = build_call_expr_loc (input_location, gfor_fndecl_caf_sync_all, + 3, stat, errmsg, errmsglen); + else + tmp = build_call_expr_loc (input_location, gfor_fndecl_nca_sync_all, + 1, stat); + } gfc_add_expr_to_block (&se.pre, tmp); } @@ -1351,7 +1379,10 @@ gfc_trans_sync (gfc_code *code, gfc_exec_op type) if (TREE_TYPE (stat) == integer_type_node) stat = gfc_build_addr_expr (NULL, stat); - tmp = build_call_expr_loc (input_location, gfor_fndecl_caf_sync_images, + tmp = build_call_expr_loc (input_location, + flag_coarray == GFC_FCOARRAY_NATIVE + ? gfor_fndecl_nca_sync_images + : gfor_fndecl_caf_sync_images, 5, fold_convert (integer_type_node, len), images, stat, errmsg, errmsglen); gfc_add_expr_to_block (&se.pre, tmp); @@ -1360,7 +1391,10 @@ gfc_trans_sync (gfc_code *code, gfc_exec_op type) { tree tmp_stat = gfc_create_var (integer_type_node, "stat"); - tmp = build_call_expr_loc (input_location, gfor_fndecl_caf_sync_images, + tmp = build_call_expr_loc (input_location, + flag_coarray == GFC_FCOARRAY_NATIVE + ? gfor_fndecl_nca_sync_images + : gfor_fndecl_caf_sync_images, 5, fold_convert (integer_type_node, len), images, gfc_build_addr_expr (NULL, tmp_stat), errmsg, errmsglen); @@ -1596,6 +1630,11 @@ gfc_trans_critical (gfc_code *code) gfc_add_expr_to_block (&block, tmp); } + else if (flag_coarray == GFC_FCOARRAY_NATIVE) + { + tmp = gfc_trans_lock_unlock (code, EXEC_LOCK); + gfc_add_expr_to_block (&block, tmp); + } tmp = gfc_trans_code (code->block->next); gfc_add_expr_to_block (&block, tmp); @@ -1620,6 +1659,11 @@ gfc_trans_critical (gfc_code *code) gfc_add_expr_to_block (&block, tmp); } + else if (flag_coarray == GFC_FCOARRAY_NATIVE) + { + tmp = gfc_trans_lock_unlock (code, EXEC_UNLOCK); + gfc_add_expr_to_block (&block, tmp); + } return gfc_finish_block (&block); } @@ -7169,6 +7213,7 @@ gfc_trans_deallocate (gfc_code *code) tree apstat, pstat, stat, errmsg, errlen, tmp; tree label_finish, label_errmsg; stmtblock_t block; + bool is_native_coarray = false; pstat = apstat = stat = errmsg = errlen = tmp = NULL_TREE; label_finish = label_errmsg = NULL_TREE; @@ -7254,8 +7299,27 @@ gfc_trans_deallocate (gfc_code *code) ? GFC_STRUCTURE_CAF_MODE_DEALLOC_ONLY : 0); } } + else if (flag_coarray == GFC_FCOARRAY_NATIVE) + { + gfc_ref *ref, *last; - if (expr->rank || is_coarray_array) + for (ref = expr->ref, last = ref; ref; last = ref, ref = ref->next); + ref = last; + if (ref->type == REF_ARRAY && ref->u.ar.codimen) + { + gfc_symbol *sym = expr->symtree->n.sym; + int alloc_type = gfc_native_coarray_get_allocation_type (sym); + tmp = build_call_expr_loc (input_location, + gfor_fndecl_nca_coarray_free, + 2, gfc_build_addr_expr (pvoid_type_node, se.expr), + build_int_cst (integer_type_node, + alloc_type)); + gfc_add_expr_to_block (&block, tmp); + is_native_coarray = true; + } + } + + if ((expr->rank || is_coarray_array) && !is_native_coarray) { gfc_ref *ref; @@ -7344,7 +7408,7 @@ gfc_trans_deallocate (gfc_code *code) gfc_reset_len (&se.pre, al->expr); } } - else + else if (!is_native_coarray) { tmp = gfc_deallocate_scalar_with_status (se.expr, pstat, label_finish, false, al->expr, diff --git a/gcc/fortran/trans-types.c b/gcc/fortran/trans-types.c index 26fdb28..f100d34 100644 --- a/gcc/fortran/trans-types.c +++ b/gcc/fortran/trans-types.c @@ -1345,6 +1345,10 @@ gfc_is_nodesc_array (gfc_symbol * sym) gcc_assert (array_attr->dimension || array_attr->codimension); + /* We need a descriptor for native coarrays. */ + if (flag_coarray == GFC_FCOARRAY_NATIVE && sym->as && sym->as->corank) + return 0; + /* We only want local arrays. */ if ((sym->ts.type != BT_CLASS && sym->attr.pointer) || (sym->ts.type == BT_CLASS && CLASS_DATA (sym)->attr.class_pointer) @@ -1381,12 +1385,18 @@ gfc_build_array_type (tree type, gfc_array_spec * as, tree ubound[GFC_MAX_DIMENSIONS]; int n, corank; - /* Assumed-shape arrays do not have codimension information stored in the - descriptor. */ - corank = MAX (as->corank, codim); - if (as->type == AS_ASSUMED_SHAPE || - (as->type == AS_ASSUMED_RANK && akind == GFC_ARRAY_ALLOCATABLE)) - corank = codim; + /* For -fcoarray=lib, assumed-shape arrays do not have codimension + information stored in the descriptor. */ + if (flag_coarray != GFC_FCOARRAY_NATIVE) + { + corank = MAX (as->corank, codim); + + if (as->type == AS_ASSUMED_SHAPE || + (as->type == AS_ASSUMED_RANK && akind == GFC_ARRAY_ALLOCATABLE)) + corank = codim; + } + else + corank = as->corank; if (as->type == AS_ASSUMED_RANK) for (n = 0; n < GFC_MAX_DIMENSIONS; n++) @@ -1427,7 +1437,7 @@ gfc_build_array_type (tree type, gfc_array_spec * as, corank, lbound, ubound, 0, akind, restricted); } - + /* Returns the struct descriptor_dimension type. */ static tree @@ -1598,7 +1608,7 @@ gfc_get_nodesc_array_type (tree etype, gfc_array_spec * as, gfc_packed packed, /* We don't use build_array_type because this does not include lang-specific information (i.e. the bounds of the array) when checking for duplicates. */ - if (as->rank) + if (as->rank || (flag_coarray == GFC_FCOARRAY_NATIVE && as->corank)) type = make_node (ARRAY_TYPE); else type = build_variant_type_copy (etype); @@ -1665,6 +1675,7 @@ gfc_get_nodesc_array_type (tree etype, gfc_array_spec * as, gfc_packed packed, if (packed == PACKED_NO || packed == PACKED_PARTIAL) known_stride = 0; } + for (n = as->rank; n < as->rank + as->corank; n++) { expr = as->lower[n]; @@ -1672,7 +1683,7 @@ gfc_get_nodesc_array_type (tree etype, gfc_array_spec * as, gfc_packed packed, tmp = gfc_conv_mpz_to_tree (expr->value.integer, gfc_index_integer_kind); else - tmp = NULL_TREE; + tmp = NULL_TREE; GFC_TYPE_ARRAY_LBOUND (type, n) = tmp; expr = as->upper[n]; @@ -1680,16 +1691,16 @@ gfc_get_nodesc_array_type (tree etype, gfc_array_spec * as, gfc_packed packed, tmp = gfc_conv_mpz_to_tree (expr->value.integer, gfc_index_integer_kind); else - tmp = NULL_TREE; + tmp = NULL_TREE; if (n < as->rank + as->corank - 1) - GFC_TYPE_ARRAY_UBOUND (type, n) = tmp; + GFC_TYPE_ARRAY_UBOUND (type, n) = tmp; } - if (known_offset) - { - GFC_TYPE_ARRAY_OFFSET (type) = - gfc_conv_mpz_to_tree (offset, gfc_index_integer_kind); - } + if (flag_coarray == GFC_FCOARRAY_NATIVE && as->rank == 0 && as->corank != 0) + GFC_TYPE_ARRAY_OFFSET (type) = NULL_TREE; + else if (known_offset) + GFC_TYPE_ARRAY_OFFSET (type) = + gfc_conv_mpz_to_tree (offset, gfc_index_integer_kind); else GFC_TYPE_ARRAY_OFFSET (type) = NULL_TREE; @@ -1714,7 +1725,7 @@ gfc_get_nodesc_array_type (tree etype, gfc_array_spec * as, gfc_packed packed, build_qualified_type (GFC_TYPE_ARRAY_DATAPTR_TYPE (type), TYPE_QUAL_RESTRICT); - if (as->rank == 0) + if (as->rank == 0 && (flag_coarray != GFC_FCOARRAY_NATIVE || as->corank == 0)) { if (packed != PACKED_STATIC || flag_coarray == GFC_FCOARRAY_LIB) { @@ -1982,7 +1993,7 @@ gfc_get_array_type_bounds (tree etype, int dimen, int codimen, tree * lbound, /* TODO: known offsets for descriptors. */ GFC_TYPE_ARRAY_OFFSET (fat_type) = NULL_TREE; - if (dimen == 0) + if (flag_coarray != GFC_FCOARRAY_NATIVE && dimen == 0) { arraytype = build_pointer_type (etype); if (restricted) @@ -2281,6 +2292,10 @@ gfc_sym_type (gfc_symbol * sym) : GFC_ARRAY_POINTER; else if (sym->attr.allocatable) akind = GFC_ARRAY_ALLOCATABLE; + + /* FIXME: For normal coarrays, we pass a bool to an int here. + Is this really intended? */ + type = gfc_build_array_type (type, sym->as, akind, restricted, sym->attr.contiguous, false); } diff --git a/gcc/fortran/trans.c b/gcc/fortran/trans.c index ed05426..2b60550 100644 --- a/gcc/fortran/trans.c +++ b/gcc/fortran/trans.c @@ -47,6 +47,21 @@ static gfc_file *gfc_current_backend_file; const char gfc_msg_fault[] = N_("Array reference out of bounds"); const char gfc_msg_wrong_return[] = N_("Incorrect function return value"); +/* Insert a memory barrier into the code. */ + +tree +gfc_trans_memory_barrier (void) +{ + tree tmp; + + tmp = gfc_build_string_const (strlen ("memory")+1, "memory"), + tmp = build5_loc (input_location, ASM_EXPR, void_type_node, + gfc_build_string_const (1, ""), NULL_TREE, NULL_TREE, + tree_cons (NULL_TREE, tmp, NULL_TREE), NULL_TREE); + ASM_VOLATILE_P (tmp) = 1; + + return tmp; +} /* Return a location_t suitable for 'tree' for a gfortran locus. The way the parser works in gfortran, loc->lb->location contains only the line number @@ -403,15 +418,16 @@ gfc_build_array_ref (tree base, tree offset, tree decl, tree vptr) tree tmp; tree span = NULL_TREE; - if (GFC_ARRAY_TYPE_P (type) && GFC_TYPE_ARRAY_RANK (type) == 0) + if (GFC_ARRAY_TYPE_P (type) && GFC_TYPE_ARRAY_RANK (type) == 0 + && flag_coarray != GFC_FCOARRAY_NATIVE) { gcc_assert (GFC_TYPE_ARRAY_CORANK (type) > 0); return fold_convert (TYPE_MAIN_VARIANT (type), base); } - /* Scalar coarray, there is nothing to do. */ - if (TREE_CODE (type) != ARRAY_TYPE) + /* Scalar library coarray, there is nothing to do. */ + if (TREE_CODE (type) != ARRAY_TYPE && flag_coarray != GFC_FCOARRAY_NATIVE) { gcc_assert (decl == NULL_TREE); gcc_assert (integer_zerop (offset)); diff --git a/gcc/fortran/trans.h b/gcc/fortran/trans.h index d257963..974785f 100644 --- a/gcc/fortran/trans.h +++ b/gcc/fortran/trans.h @@ -501,6 +501,9 @@ void gfc_conv_expr_reference (gfc_se * se, gfc_expr * expr, bool add_clobber = false); void gfc_conv_expr_type (gfc_se * se, gfc_expr *, tree); +/* Insert a memory barrier into the code. */ + +tree gfc_trans_memory_barrier (void); /* trans-expr.c */ tree gfc_conv_scalar_to_descriptor (gfc_se *, tree, symbol_attribute); @@ -890,6 +893,21 @@ extern GTY(()) tree gfor_fndecl_co_reduce; extern GTY(()) tree gfor_fndecl_co_sum; extern GTY(()) tree gfor_fndecl_caf_is_present; + +/* Native coarray library function decls. */ +extern GTY(()) tree gfor_fndecl_nca_this_image; +extern GTY(()) tree gfor_fndecl_nca_num_images; +extern GTY(()) tree gfor_fndecl_nca_coarray_allocate; +extern GTY(()) tree gfor_fndecl_nca_coarray_free; +extern GTY(()) tree gfor_fndecl_nca_sync_images; +extern GTY(()) tree gfor_fndecl_nca_sync_all; +extern GTY(()) tree gfor_fndecl_nca_lock; +extern GTY(()) tree gfor_fndecl_nca_unlock; +extern GTY(()) tree gfor_fndecl_nca_reduce_scalar; +extern GTY(()) tree gfor_fndecl_nca_reduce_array; +extern GTY(()) tree gfor_fndecl_nca_broadcast_scalar; +extern GTY(()) tree gfor_fndecl_nca_broadcast_array; + /* Math functions. Many other math functions are handled in trans-intrinsic.c. */ diff --git a/libgfortran/Makefile.am b/libgfortran/Makefile.am index 61bf05d..683fd04 100644 --- a/libgfortran/Makefile.am +++ b/libgfortran/Makefile.am @@ -36,14 +36,21 @@ gfor_cdir = $(libdir)/gcc/$(target_alias)/$(gcc_version)/include LTLDFLAGS = $(shell $(SHELL) $(top_srcdir)/../libtool-ldflags $(LDFLAGS)) \ $(lt_host_flags) +if LIBGFOR_NATIVE_COARRAY +COARRAY_LIBS=$(PTHREAD_LIBS) $(RT_LIBS) +else +COARRAY_LIBS="" +endif + toolexeclib_LTLIBRARIES = libgfortran.la toolexeclib_DATA = libgfortran.spec libgfortran_la_LINK = $(LINK) $(libgfortran_la_LDFLAGS) libgfortran_la_LDFLAGS = -version-info `grep -v '^\#' $(srcdir)/libtool-version` \ $(LTLDFLAGS) $(LIBQUADLIB) ../libbacktrace/libbacktrace.la \ $(HWCAP_LDFLAGS) \ - -lm $(extra_ldflags_libgfortran) \ + -lm $(COARRAY_LIBS) $(extra_ldflags_libgfortran) \ $(version_arg) -Wc,-shared-libgcc + libgfortran_la_DEPENDENCIES = $(version_dep) libgfortran.spec $(LIBQUADLIB_DEP) cafexeclib_LTLIBRARIES = libcaf_single.la @@ -53,6 +60,37 @@ libcaf_single_la_LDFLAGS = -static libcaf_single_la_DEPENDENCIES = caf/libcaf.h libcaf_single_la_LINK = $(LINK) $(libcaf_single_la_LDFLAGS) +i_nca_minmax_c = \ + $(srcdir)/generated/nca_minmax_i1.c \ + $(srcdir)/generated/nca_minmax_i2.c \ + $(srcdir)/generated/nca_minmax_i4.c \ + $(srcdir)/generated/nca_minmax_i8.c \ + $(srcdir)/generated/nca_minmax_i16.c \ + $(srcdir)/generated/nca_minmax_r4.c \ + $(srcdir)/generated/nca_minmax_r8.c \ + $(srcdir)/generated/nca_minmax_r10.c \ + $(srcdir)/generated/nca_minmax_r16.c + +i_nca_minmax_s_c = \ + $(srcdir)/generated/nca_minmax_s1.c \ + $(srcdir)/generated/nca_minmax_s4.c + +if LIBGFOR_NATIVE_COARRAY + +mylib_LTLIBRARIES = libnca.la +mylibdir = $(libdir)/gcc/$(target_alias)/$(gcc_version)$(MULTISUBDIR) +libnca_la_SOURCES = nca/alloc.c nca/allocator.c nca/coarraynative.c \ + nca/hashmap.c \ + nca/sync.c nca/util.c nca/wrapper.c nca/collective_subroutine.c \ + nca/shared_memory.c \ + $(i_nca_minmax_c) $(i_nca_minmax_s_c) +libnca_la_DEPENDENCIES = nca/alloc.h nca/allocator.h nca/hashmap.h + nca/libcoarraynative.h nca/sync.h shared_memory.h \ + nca/util.h nca/lock.h nca/collective_subroutine.h\ + nca/collective_inline.h +libnca_la_LINK = $(LINK) $(libnca_la_LDFLAGS) +endif + if IEEE_SUPPORT fincludedir = $(libdir)/gcc/$(target_alias)/$(gcc_version)$(MULTISUBDIR)/finclude nodist_finclude_HEADERS = ieee_arithmetic.mod ieee_exceptions.mod ieee_features.mod @@ -83,6 +121,10 @@ if LIBGFOR_MINIMAL AM_CFLAGS += -DLIBGFOR_MINIMAL endif +if LIBGFOR_NATIVE_COARRAY +AM_CFLAGS += $(PTHREAD_CFLAGS) +endif + gfor_io_src= \ io/size_from_kind.c @@ -1231,9 +1273,20 @@ $(gfor_built_specific2_src): m4/specific2.m4 m4/head.m4 $(gfor_misc_specifics): m4/misc_specifics.m4 m4/head.m4 $(M4) -Dfile=$@ -I$(srcdir)/m4 misc_specifics.m4 > $@ + + +if LIBGFOR_NATIVE_COARRAY +$(i_nca_minmax_c): m4/nca_minmax.m4 $(I_M4_DEPS) + $(M4) -Dfile=$@ -I$(srcdir)/m4 nca_minmax.m4 > $@ + +$(i_nca_minmax_s_c): m4/nca-minmax-s.m4 $(I_M4_DEPS) + $(M4) -Dfile=$@ -I$(srcdir)/m4 nca-minmax-s.m4 > $@ +endif + ## end of maintainer mode only rules endif + EXTRA_DIST = $(m4_files) # target overrides diff --git a/libgfortran/Makefile.in b/libgfortran/Makefile.in index 3d043aa..2379fe3 100644 --- a/libgfortran/Makefile.in +++ b/libgfortran/Makefile.in @@ -92,7 +92,8 @@ build_triplet = @build@ host_triplet = @host@ target_triplet = @target@ @LIBGFOR_MINIMAL_TRUE@am__append_1 = -DLIBGFOR_MINIMAL -@LIBGFOR_MINIMAL_FALSE@am__append_2 = \ +@LIBGFOR_NATIVE_COARRAY_TRUE@am__append_2 = $(PTHREAD_CFLAGS) +@LIBGFOR_MINIMAL_FALSE@am__append_3 = \ @LIBGFOR_MINIMAL_FALSE@io/close.c \ @LIBGFOR_MINIMAL_FALSE@io/file_pos.c \ @LIBGFOR_MINIMAL_FALSE@io/format.c \ @@ -110,7 +111,7 @@ target_triplet = @target@ @LIBGFOR_MINIMAL_FALSE@io/fbuf.c \ @LIBGFOR_MINIMAL_FALSE@io/async.c -@LIBGFOR_MINIMAL_FALSE@am__append_3 = \ +@LIBGFOR_MINIMAL_FALSE@am__append_4 = \ @LIBGFOR_MINIMAL_FALSE@intrinsics/access.c \ @LIBGFOR_MINIMAL_FALSE@intrinsics/c99_functions.c \ @LIBGFOR_MINIMAL_FALSE@intrinsics/chdir.c \ @@ -143,9 +144,9 @@ target_triplet = @target@ @LIBGFOR_MINIMAL_FALSE@intrinsics/umask.c \ @LIBGFOR_MINIMAL_FALSE@intrinsics/unlink.c -@IEEE_SUPPORT_TRUE@am__append_4 = ieee/ieee_helper.c -@LIBGFOR_MINIMAL_TRUE@am__append_5 = runtime/minimal.c -@LIBGFOR_MINIMAL_FALSE@am__append_6 = \ +@IEEE_SUPPORT_TRUE@am__append_5 = ieee/ieee_helper.c +@LIBGFOR_MINIMAL_TRUE@am__append_6 = runtime/minimal.c +@LIBGFOR_MINIMAL_FALSE@am__append_7 = \ @LIBGFOR_MINIMAL_FALSE@runtime/backtrace.c \ @LIBGFOR_MINIMAL_FALSE@runtime/convert_char.c \ @LIBGFOR_MINIMAL_FALSE@runtime/environ.c \ @@ -157,7 +158,7 @@ target_triplet = @target@ # dummy sources for libtool -@onestep_TRUE@am__append_7 = libgfortran_c.c libgfortran_f.f90 +@onestep_TRUE@am__append_8 = libgfortran_c.c libgfortran_f.f90 subdir = . ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 am__aclocal_m4_deps = $(top_srcdir)/../config/depstand.m4 \ @@ -214,10 +215,11 @@ am__uninstall_files_from_dir = { \ || { echo " ( cd '$$dir' && rm -f" $$files ")"; \ $(am__cd) "$$dir" && rm -f $$files; }; \ } -am__installdirs = "$(DESTDIR)$(cafexeclibdir)" \ +am__installdirs = "$(DESTDIR)$(cafexeclibdir)" "$(DESTDIR)$(mylibdir)" \ "$(DESTDIR)$(toolexeclibdir)" "$(DESTDIR)$(toolexeclibdir)" \ "$(DESTDIR)$(gfor_cdir)" "$(DESTDIR)$(fincludedir)" -LTLIBRARIES = $(cafexeclib_LTLIBRARIES) $(toolexeclib_LTLIBRARIES) +LTLIBRARIES = $(cafexeclib_LTLIBRARIES) $(mylib_LTLIBRARIES) \ + $(toolexeclib_LTLIBRARIES) libcaf_single_la_LIBADD = am_libcaf_single_la_OBJECTS = single.lo libcaf_single_la_OBJECTS = $(am_libcaf_single_la_OBJECTS) @@ -467,6 +469,21 @@ am__objects_65 = $(am__objects_3) $(am__objects_53) $(am__objects_55) \ @onestep_FALSE@am_libgfortran_la_OBJECTS = $(am__objects_65) @onestep_TRUE@am_libgfortran_la_OBJECTS = libgfortran_c.lo libgfortran_la_OBJECTS = $(am_libgfortran_la_OBJECTS) +libnca_la_LIBADD = +am__objects_66 = nca_minmax_i1.lo nca_minmax_i2.lo nca_minmax_i4.lo \ + nca_minmax_i8.lo nca_minmax_i16.lo nca_minmax_r4.lo \ + nca_minmax_r8.lo nca_minmax_r10.lo nca_minmax_r16.lo +am__objects_67 = nca_minmax_s1.lo nca_minmax_s4.lo +@LIBGFOR_NATIVE_COARRAY_TRUE@am_libnca_la_OBJECTS = alloc.lo \ +@LIBGFOR_NATIVE_COARRAY_TRUE@ allocator.lo coarraynative.lo \ +@LIBGFOR_NATIVE_COARRAY_TRUE@ hashmap.lo sync.lo util.lo \ +@LIBGFOR_NATIVE_COARRAY_TRUE@ wrapper.lo \ +@LIBGFOR_NATIVE_COARRAY_TRUE@ collective_subroutine.lo \ +@LIBGFOR_NATIVE_COARRAY_TRUE@ shared_memory.lo \ +@LIBGFOR_NATIVE_COARRAY_TRUE@ $(am__objects_66) \ +@LIBGFOR_NATIVE_COARRAY_TRUE@ $(am__objects_67) +libnca_la_OBJECTS = $(am_libnca_la_OBJECTS) +@LIBGFOR_NATIVE_COARRAY_TRUE@am_libnca_la_rpath = -rpath $(mylibdir) AM_V_P = $(am__v_P_@AM_V@) am__v_P_ = $(am__v_P_@AM_DEFAULT_V@) am__v_P_0 = false @@ -530,7 +547,8 @@ AM_V_FC = $(am__v_FC_@AM_V@) am__v_FC_ = $(am__v_FC_@AM_DEFAULT_V@) am__v_FC_0 = @echo " FC " $@; am__v_FC_1 = -SOURCES = $(libcaf_single_la_SOURCES) $(libgfortran_la_SOURCES) +SOURCES = $(libcaf_single_la_SOURCES) $(libgfortran_la_SOURCES) \ + $(libnca_la_SOURCES) am__can_run_installinfo = \ case $$AM_UPDATE_INFO_DIR in \ n|no|NO) false;; \ @@ -569,7 +587,7 @@ AMTAR = @AMTAR@ # Some targets require additional compiler options for IEEE compatibility. AM_CFLAGS = @AM_CFLAGS@ -fcx-fortran-rules $(SECTION_FLAGS) \ - $(IEEE_FLAGS) $(am__append_1) + $(IEEE_FLAGS) $(am__append_1) $(am__append_2) AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@ AM_FCFLAGS = @AM_FCFLAGS@ $(IEEE_FLAGS) AR = @AR@ @@ -637,7 +655,10 @@ PACKAGE_TARNAME = @PACKAGE_TARNAME@ PACKAGE_URL = @PACKAGE_URL@ PACKAGE_VERSION = @PACKAGE_VERSION@ PATH_SEPARATOR = @PATH_SEPARATOR@ +PTHREAD_CFLAGS = @PTHREAD_CFLAGS@ +PTHREAD_LIBS = @PTHREAD_LIBS@ RANLIB = @RANLIB@ +RT_LIBS = @RT_LIBS@ SECTION_FLAGS = @SECTION_FLAGS@ SED = @SED@ SET_MAKE = @SET_MAKE@ @@ -698,6 +719,7 @@ pdfdir = @pdfdir@ prefix = @prefix@ program_transform_name = @program_transform_name@ psdir = @psdir@ +runstatedir = @runstatedir@ sbindir = @sbindir@ sharedstatedir = @sharedstatedir@ srcdir = @srcdir@ @@ -728,13 +750,15 @@ gfor_cdir = $(libdir)/gcc/$(target_alias)/$(gcc_version)/include LTLDFLAGS = $(shell $(SHELL) $(top_srcdir)/../libtool-ldflags $(LDFLAGS)) \ $(lt_host_flags) +@LIBGFOR_NATIVE_COARRAY_FALSE@COARRAY_LIBS = "" +@LIBGFOR_NATIVE_COARRAY_TRUE@COARRAY_LIBS = $(PTHREAD_LIBS) $(RT_LIBS) toolexeclib_LTLIBRARIES = libgfortran.la toolexeclib_DATA = libgfortran.spec libgfortran_la_LINK = $(LINK) $(libgfortran_la_LDFLAGS) libgfortran_la_LDFLAGS = -version-info `grep -v '^\#' $(srcdir)/libtool-version` \ $(LTLDFLAGS) $(LIBQUADLIB) ../libbacktrace/libbacktrace.la \ $(HWCAP_LDFLAGS) \ - -lm $(extra_ldflags_libgfortran) \ + -lm $(COARRAY_LIBS) $(extra_ldflags_libgfortran) \ $(version_arg) -Wc,-shared-libgcc libgfortran_la_DEPENDENCIES = $(version_dep) libgfortran.spec $(LIBQUADLIB_DEP) @@ -744,6 +768,31 @@ libcaf_single_la_SOURCES = caf/single.c libcaf_single_la_LDFLAGS = -static libcaf_single_la_DEPENDENCIES = caf/libcaf.h libcaf_single_la_LINK = $(LINK) $(libcaf_single_la_LDFLAGS) +i_nca_minmax_c = \ + $(srcdir)/generated/nca_minmax_i1.c \ + $(srcdir)/generated/nca_minmax_i2.c \ + $(srcdir)/generated/nca_minmax_i4.c \ + $(srcdir)/generated/nca_minmax_i8.c \ + $(srcdir)/generated/nca_minmax_i16.c \ + $(srcdir)/generated/nca_minmax_r4.c \ + $(srcdir)/generated/nca_minmax_r8.c \ + $(srcdir)/generated/nca_minmax_r10.c \ + $(srcdir)/generated/nca_minmax_r16.c + +i_nca_minmax_s_c = \ + $(srcdir)/generated/nca_minmax_s1.c \ + $(srcdir)/generated/nca_minmax_s4.c + +@LIBGFOR_NATIVE_COARRAY_TRUE@mylib_LTLIBRARIES = libnca.la +@LIBGFOR_NATIVE_COARRAY_TRUE@mylibdir = $(libdir)/gcc/$(target_alias)/$(gcc_version)$(MULTISUBDIR) +@LIBGFOR_NATIVE_COARRAY_TRUE@libnca_la_SOURCES = nca/alloc.c nca/allocator.c nca/coarraynative.c \ +@LIBGFOR_NATIVE_COARRAY_TRUE@ nca/hashmap.c \ +@LIBGFOR_NATIVE_COARRAY_TRUE@ nca/sync.c nca/util.c nca/wrapper.c nca/collective_subroutine.c \ +@LIBGFOR_NATIVE_COARRAY_TRUE@ nca/shared_memory.c \ +@LIBGFOR_NATIVE_COARRAY_TRUE@ $(i_nca_minmax_c) $(i_nca_minmax_s_c) + +@LIBGFOR_NATIVE_COARRAY_TRUE@libnca_la_DEPENDENCIES = nca/alloc.h nca/allocator.h nca/hashmap.h +@LIBGFOR_NATIVE_COARRAY_TRUE@libnca_la_LINK = $(LINK) $(libnca_la_LDFLAGS) @IEEE_SUPPORT_TRUE@fincludedir = $(libdir)/gcc/$(target_alias)/$(gcc_version)$(MULTISUBDIR)/finclude @IEEE_SUPPORT_TRUE@nodist_finclude_HEADERS = ieee_arithmetic.mod ieee_exceptions.mod ieee_features.mod AM_CPPFLAGS = -iquote$(srcdir)/io -I$(srcdir)/$(MULTISRCTOP)../gcc \ @@ -755,7 +804,7 @@ AM_CPPFLAGS = -iquote$(srcdir)/io -I$(srcdir)/$(MULTISRCTOP)../gcc \ -I$(MULTIBUILDTOP)../libbacktrace \ -I../libbacktrace -gfor_io_src = io/size_from_kind.c $(am__append_2) +gfor_io_src = io/size_from_kind.c $(am__append_3) gfor_io_headers = \ io/io.h \ io/fbuf.h \ @@ -777,7 +826,7 @@ gfor_helper_src = intrinsics/associated.c intrinsics/abort.c \ intrinsics/selected_int_kind.f90 \ intrinsics/selected_real_kind.f90 intrinsics/trigd.c \ intrinsics/unpack_generic.c runtime/in_pack_generic.c \ - runtime/in_unpack_generic.c $(am__append_3) $(am__append_4) + runtime/in_unpack_generic.c $(am__append_4) $(am__append_5) @IEEE_SUPPORT_FALSE@gfor_ieee_src = @IEEE_SUPPORT_TRUE@gfor_ieee_src = \ @IEEE_SUPPORT_TRUE@ieee/ieee_arithmetic.F90 \ @@ -785,8 +834,8 @@ gfor_helper_src = intrinsics/associated.c intrinsics/abort.c \ @IEEE_SUPPORT_TRUE@ieee/ieee_features.F90 gfor_src = runtime/bounds.c runtime/compile_options.c runtime/memory.c \ - runtime/string.c runtime/select.c $(am__append_5) \ - $(am__append_6) + runtime/string.c runtime/select.c $(am__append_6) \ + $(am__append_7) i_all_c = \ $(srcdir)/generated/all_l1.c \ $(srcdir)/generated/all_l2.c \ @@ -1540,7 +1589,7 @@ intrinsics/random_init.f90 BUILT_SOURCES = $(gfor_built_src) $(gfor_built_specific_src) \ $(gfor_built_specific2_src) $(gfor_misc_specifics) \ - $(am__append_7) + $(am__append_8) prereq_SRC = $(gfor_src) $(gfor_built_src) $(gfor_io_src) \ $(gfor_helper_src) $(gfor_ieee_src) $(gfor_io_headers) $(gfor_specific_src) @@ -1668,6 +1717,41 @@ clean-cafexeclibLTLIBRARIES: rm -f $${locs}; \ } +install-mylibLTLIBRARIES: $(mylib_LTLIBRARIES) + @$(NORMAL_INSTALL) + @list='$(mylib_LTLIBRARIES)'; test -n "$(mylibdir)" || list=; \ + list2=; for p in $$list; do \ + if test -f $$p; then \ + list2="$$list2 $$p"; \ + else :; fi; \ + done; \ + test -z "$$list2" || { \ + echo " $(MKDIR_P) '$(DESTDIR)$(mylibdir)'"; \ + $(MKDIR_P) "$(DESTDIR)$(mylibdir)" || exit 1; \ + echo " $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL) $(INSTALL_STRIP_FLAG) $$list2 '$(DESTDIR)$(mylibdir)'"; \ + $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL) $(INSTALL_STRIP_FLAG) $$list2 "$(DESTDIR)$(mylibdir)"; \ + } + +uninstall-mylibLTLIBRARIES: + @$(NORMAL_UNINSTALL) + @list='$(mylib_LTLIBRARIES)'; test -n "$(mylibdir)" || list=; \ + for p in $$list; do \ + $(am__strip_dir) \ + echo " $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=uninstall rm -f '$(DESTDIR)$(mylibdir)/$$f'"; \ + $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=uninstall rm -f "$(DESTDIR)$(mylibdir)/$$f"; \ + done + +clean-mylibLTLIBRARIES: + -test -z "$(mylib_LTLIBRARIES)" || rm -f $(mylib_LTLIBRARIES) + @list='$(mylib_LTLIBRARIES)'; \ + locs=`for p in $$list; do echo $$p; done | \ + sed 's|^[^/]*$$|.|; s|/[^/]*$$||; s|$$|/so_locations|' | \ + sort -u`; \ + test -z "$$locs" || { \ + echo rm -f $${locs}; \ + rm -f $${locs}; \ + } + install-toolexeclibLTLIBRARIES: $(toolexeclib_LTLIBRARIES) @$(NORMAL_INSTALL) @list='$(toolexeclib_LTLIBRARIES)'; test -n "$(toolexeclibdir)" || list=; \ @@ -1709,6 +1793,9 @@ libcaf_single.la: $(libcaf_single_la_OBJECTS) $(libcaf_single_la_DEPENDENCIES) $ libgfortran.la: $(libgfortran_la_OBJECTS) $(libgfortran_la_DEPENDENCIES) $(EXTRA_libgfortran_la_DEPENDENCIES) $(AM_V_GEN)$(libgfortran_la_LINK) -rpath $(toolexeclibdir) $(libgfortran_la_OBJECTS) $(libgfortran_la_LIBADD) $(LIBS) +libnca.la: $(libnca_la_OBJECTS) $(libnca_la_DEPENDENCIES) $(EXTRA_libnca_la_DEPENDENCIES) + $(AM_V_GEN)$(libnca_la_LINK) $(am_libnca_la_rpath) $(libnca_la_OBJECTS) $(libnca_la_LIBADD) $(LIBS) + mostlyclean-compile: -rm -f *.$(OBJEXT) @@ -1723,6 +1810,8 @@ distclean-compile: @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/all_l2.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/all_l4.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/all_l8.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/alloc.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/allocator.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/any_l1.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/any_l16.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/any_l2.Plo@am__quote@ @@ -1742,6 +1831,8 @@ distclean-compile: @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/chmod.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/clock.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/close.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/coarraynative.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/collective_subroutine.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/compile_options.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/convert_char.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/count_16_l.Plo@am__quote@ @@ -1866,6 +1957,7 @@ distclean-compile: @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/getXid.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/getcwd.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/getlog.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/hashmap.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/hostnm.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/iall_i1.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/iall_i16.Plo@am__quote@ @@ -2125,6 +2217,17 @@ distclean-compile: @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/minval_r8.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/move_alloc.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/mvbits.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/nca_minmax_i1.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/nca_minmax_i16.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/nca_minmax_i2.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/nca_minmax_i4.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/nca_minmax_i8.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/nca_minmax_r10.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/nca_minmax_r16.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/nca_minmax_r4.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/nca_minmax_r8.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/nca_minmax_s1.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/nca_minmax_s4.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/norm2_r10.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/norm2_r16.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/norm2_r4.Plo@am__quote@ @@ -2218,6 +2321,7 @@ distclean-compile: @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/shape_i2.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/shape_i4.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/shape_i8.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/shared_memory.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/signal.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/single.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/size.Plo@am__quote@ @@ -2255,6 +2359,7 @@ distclean-compile: @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/sum_r4.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/sum_r8.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/symlnk.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/sync.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/system.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/system_clock.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/time.Plo@am__quote@ @@ -2279,6 +2384,8 @@ distclean-compile: @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/unpack_r16.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/unpack_r4.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/unpack_r8.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/util.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/wrapper.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/write.Plo@am__quote@ .F90.o: @@ -6679,6 +6786,146 @@ ieee_helper.lo: ieee/ieee_helper.c @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o ieee_helper.lo `test -f 'ieee/ieee_helper.c' || echo '$(srcdir)/'`ieee/ieee_helper.c +alloc.lo: nca/alloc.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT alloc.lo -MD -MP -MF $(DEPDIR)/alloc.Tpo -c -o alloc.lo `test -f 'nca/alloc.c' || echo '$(srcdir)/'`nca/alloc.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/alloc.Tpo $(DEPDIR)/alloc.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='nca/alloc.c' object='alloc.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o alloc.lo `test -f 'nca/alloc.c' || echo '$(srcdir)/'`nca/alloc.c + +allocator.lo: nca/allocator.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT allocator.lo -MD -MP -MF $(DEPDIR)/allocator.Tpo -c -o allocator.lo `test -f 'nca/allocator.c' || echo '$(srcdir)/'`nca/allocator.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/allocator.Tpo $(DEPDIR)/allocator.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='nca/allocator.c' object='allocator.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o allocator.lo `test -f 'nca/allocator.c' || echo '$(srcdir)/'`nca/allocator.c + +coarraynative.lo: nca/coarraynative.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT coarraynative.lo -MD -MP -MF $(DEPDIR)/coarraynative.Tpo -c -o coarraynative.lo `test -f 'nca/coarraynative.c' || echo '$(srcdir)/'`nca/coarraynative.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/coarraynative.Tpo $(DEPDIR)/coarraynative.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='nca/coarraynative.c' object='coarraynative.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o coarraynative.lo `test -f 'nca/coarraynative.c' || echo '$(srcdir)/'`nca/coarraynative.c + +hashmap.lo: nca/hashmap.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT hashmap.lo -MD -MP -MF $(DEPDIR)/hashmap.Tpo -c -o hashmap.lo `test -f 'nca/hashmap.c' || echo '$(srcdir)/'`nca/hashmap.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/hashmap.Tpo $(DEPDIR)/hashmap.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='nca/hashmap.c' object='hashmap.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o hashmap.lo `test -f 'nca/hashmap.c' || echo '$(srcdir)/'`nca/hashmap.c + +sync.lo: nca/sync.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT sync.lo -MD -MP -MF $(DEPDIR)/sync.Tpo -c -o sync.lo `test -f 'nca/sync.c' || echo '$(srcdir)/'`nca/sync.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/sync.Tpo $(DEPDIR)/sync.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='nca/sync.c' object='sync.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o sync.lo `test -f 'nca/sync.c' || echo '$(srcdir)/'`nca/sync.c + +util.lo: nca/util.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT util.lo -MD -MP -MF $(DEPDIR)/util.Tpo -c -o util.lo `test -f 'nca/util.c' || echo '$(srcdir)/'`nca/util.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/util.Tpo $(DEPDIR)/util.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='nca/util.c' object='util.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o util.lo `test -f 'nca/util.c' || echo '$(srcdir)/'`nca/util.c + +wrapper.lo: nca/wrapper.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT wrapper.lo -MD -MP -MF $(DEPDIR)/wrapper.Tpo -c -o wrapper.lo `test -f 'nca/wrapper.c' || echo '$(srcdir)/'`nca/wrapper.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/wrapper.Tpo $(DEPDIR)/wrapper.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='nca/wrapper.c' object='wrapper.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o wrapper.lo `test -f 'nca/wrapper.c' || echo '$(srcdir)/'`nca/wrapper.c + +collective_subroutine.lo: nca/collective_subroutine.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT collective_subroutine.lo -MD -MP -MF $(DEPDIR)/collective_subroutine.Tpo -c -o collective_subroutine.lo `test -f 'nca/collective_subroutine.c' || echo '$(srcdir)/'`nca/collective_subroutine.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/collective_subroutine.Tpo $(DEPDIR)/collective_subroutine.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='nca/collective_subroutine.c' object='collective_subroutine.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o collective_subroutine.lo `test -f 'nca/collective_subroutine.c' || echo '$(srcdir)/'`nca/collective_subroutine.c + +shared_memory.lo: nca/shared_memory.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT shared_memory.lo -MD -MP -MF $(DEPDIR)/shared_memory.Tpo -c -o shared_memory.lo `test -f 'nca/shared_memory.c' || echo '$(srcdir)/'`nca/shared_memory.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/shared_memory.Tpo $(DEPDIR)/shared_memory.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='nca/shared_memory.c' object='shared_memory.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o shared_memory.lo `test -f 'nca/shared_memory.c' || echo '$(srcdir)/'`nca/shared_memory.c + +nca_minmax_i1.lo: $(srcdir)/generated/nca_minmax_i1.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT nca_minmax_i1.lo -MD -MP -MF $(DEPDIR)/nca_minmax_i1.Tpo -c -o nca_minmax_i1.lo `test -f '$(srcdir)/generated/nca_minmax_i1.c' || echo '$(srcdir)/'`$(srcdir)/generated/nca_minmax_i1.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/nca_minmax_i1.Tpo $(DEPDIR)/nca_minmax_i1.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$(srcdir)/generated/nca_minmax_i1.c' object='nca_minmax_i1.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o nca_minmax_i1.lo `test -f '$(srcdir)/generated/nca_minmax_i1.c' || echo '$(srcdir)/'`$(srcdir)/generated/nca_minmax_i1.c + +nca_minmax_i2.lo: $(srcdir)/generated/nca_minmax_i2.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT nca_minmax_i2.lo -MD -MP -MF $(DEPDIR)/nca_minmax_i2.Tpo -c -o nca_minmax_i2.lo `test -f '$(srcdir)/generated/nca_minmax_i2.c' || echo '$(srcdir)/'`$(srcdir)/generated/nca_minmax_i2.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/nca_minmax_i2.Tpo $(DEPDIR)/nca_minmax_i2.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$(srcdir)/generated/nca_minmax_i2.c' object='nca_minmax_i2.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o nca_minmax_i2.lo `test -f '$(srcdir)/generated/nca_minmax_i2.c' || echo '$(srcdir)/'`$(srcdir)/generated/nca_minmax_i2.c + +nca_minmax_i4.lo: $(srcdir)/generated/nca_minmax_i4.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT nca_minmax_i4.lo -MD -MP -MF $(DEPDIR)/nca_minmax_i4.Tpo -c -o nca_minmax_i4.lo `test -f '$(srcdir)/generated/nca_minmax_i4.c' || echo '$(srcdir)/'`$(srcdir)/generated/nca_minmax_i4.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/nca_minmax_i4.Tpo $(DEPDIR)/nca_minmax_i4.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$(srcdir)/generated/nca_minmax_i4.c' object='nca_minmax_i4.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o nca_minmax_i4.lo `test -f '$(srcdir)/generated/nca_minmax_i4.c' || echo '$(srcdir)/'`$(srcdir)/generated/nca_minmax_i4.c + +nca_minmax_i8.lo: $(srcdir)/generated/nca_minmax_i8.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT nca_minmax_i8.lo -MD -MP -MF $(DEPDIR)/nca_minmax_i8.Tpo -c -o nca_minmax_i8.lo `test -f '$(srcdir)/generated/nca_minmax_i8.c' || echo '$(srcdir)/'`$(srcdir)/generated/nca_minmax_i8.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/nca_minmax_i8.Tpo $(DEPDIR)/nca_minmax_i8.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$(srcdir)/generated/nca_minmax_i8.c' object='nca_minmax_i8.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o nca_minmax_i8.lo `test -f '$(srcdir)/generated/nca_minmax_i8.c' || echo '$(srcdir)/'`$(srcdir)/generated/nca_minmax_i8.c + +nca_minmax_i16.lo: $(srcdir)/generated/nca_minmax_i16.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT nca_minmax_i16.lo -MD -MP -MF $(DEPDIR)/nca_minmax_i16.Tpo -c -o nca_minmax_i16.lo `test -f '$(srcdir)/generated/nca_minmax_i16.c' || echo '$(srcdir)/'`$(srcdir)/generated/nca_minmax_i16.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/nca_minmax_i16.Tpo $(DEPDIR)/nca_minmax_i16.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$(srcdir)/generated/nca_minmax_i16.c' object='nca_minmax_i16.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o nca_minmax_i16.lo `test -f '$(srcdir)/generated/nca_minmax_i16.c' || echo '$(srcdir)/'`$(srcdir)/generated/nca_minmax_i16.c + +nca_minmax_r4.lo: $(srcdir)/generated/nca_minmax_r4.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT nca_minmax_r4.lo -MD -MP -MF $(DEPDIR)/nca_minmax_r4.Tpo -c -o nca_minmax_r4.lo `test -f '$(srcdir)/generated/nca_minmax_r4.c' || echo '$(srcdir)/'`$(srcdir)/generated/nca_minmax_r4.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/nca_minmax_r4.Tpo $(DEPDIR)/nca_minmax_r4.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$(srcdir)/generated/nca_minmax_r4.c' object='nca_minmax_r4.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o nca_minmax_r4.lo `test -f '$(srcdir)/generated/nca_minmax_r4.c' || echo '$(srcdir)/'`$(srcdir)/generated/nca_minmax_r4.c + +nca_minmax_r8.lo: $(srcdir)/generated/nca_minmax_r8.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT nca_minmax_r8.lo -MD -MP -MF $(DEPDIR)/nca_minmax_r8.Tpo -c -o nca_minmax_r8.lo `test -f '$(srcdir)/generated/nca_minmax_r8.c' || echo '$(srcdir)/'`$(srcdir)/generated/nca_minmax_r8.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/nca_minmax_r8.Tpo $(DEPDIR)/nca_minmax_r8.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$(srcdir)/generated/nca_minmax_r8.c' object='nca_minmax_r8.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o nca_minmax_r8.lo `test -f '$(srcdir)/generated/nca_minmax_r8.c' || echo '$(srcdir)/'`$(srcdir)/generated/nca_minmax_r8.c + +nca_minmax_r10.lo: $(srcdir)/generated/nca_minmax_r10.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT nca_minmax_r10.lo -MD -MP -MF $(DEPDIR)/nca_minmax_r10.Tpo -c -o nca_minmax_r10.lo `test -f '$(srcdir)/generated/nca_minmax_r10.c' || echo '$(srcdir)/'`$(srcdir)/generated/nca_minmax_r10.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/nca_minmax_r10.Tpo $(DEPDIR)/nca_minmax_r10.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$(srcdir)/generated/nca_minmax_r10.c' object='nca_minmax_r10.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o nca_minmax_r10.lo `test -f '$(srcdir)/generated/nca_minmax_r10.c' || echo '$(srcdir)/'`$(srcdir)/generated/nca_minmax_r10.c + +nca_minmax_r16.lo: $(srcdir)/generated/nca_minmax_r16.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT nca_minmax_r16.lo -MD -MP -MF $(DEPDIR)/nca_minmax_r16.Tpo -c -o nca_minmax_r16.lo `test -f '$(srcdir)/generated/nca_minmax_r16.c' || echo '$(srcdir)/'`$(srcdir)/generated/nca_minmax_r16.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/nca_minmax_r16.Tpo $(DEPDIR)/nca_minmax_r16.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$(srcdir)/generated/nca_minmax_r16.c' object='nca_minmax_r16.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o nca_minmax_r16.lo `test -f '$(srcdir)/generated/nca_minmax_r16.c' || echo '$(srcdir)/'`$(srcdir)/generated/nca_minmax_r16.c + +nca_minmax_s1.lo: $(srcdir)/generated/nca_minmax_s1.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT nca_minmax_s1.lo -MD -MP -MF $(DEPDIR)/nca_minmax_s1.Tpo -c -o nca_minmax_s1.lo `test -f '$(srcdir)/generated/nca_minmax_s1.c' || echo '$(srcdir)/'`$(srcdir)/generated/nca_minmax_s1.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/nca_minmax_s1.Tpo $(DEPDIR)/nca_minmax_s1.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$(srcdir)/generated/nca_minmax_s1.c' object='nca_minmax_s1.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o nca_minmax_s1.lo `test -f '$(srcdir)/generated/nca_minmax_s1.c' || echo '$(srcdir)/'`$(srcdir)/generated/nca_minmax_s1.c + +nca_minmax_s4.lo: $(srcdir)/generated/nca_minmax_s4.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT nca_minmax_s4.lo -MD -MP -MF $(DEPDIR)/nca_minmax_s4.Tpo -c -o nca_minmax_s4.lo `test -f '$(srcdir)/generated/nca_minmax_s4.c' || echo '$(srcdir)/'`$(srcdir)/generated/nca_minmax_s4.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/nca_minmax_s4.Tpo $(DEPDIR)/nca_minmax_s4.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$(srcdir)/generated/nca_minmax_s4.c' object='nca_minmax_s4.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o nca_minmax_s4.lo `test -f '$(srcdir)/generated/nca_minmax_s4.c' || echo '$(srcdir)/'`$(srcdir)/generated/nca_minmax_s4.c + .f90.o: $(AM_V_FC)$(FCCOMPILE) -c -o $@ $< @@ -6835,7 +7082,7 @@ check: $(BUILT_SOURCES) $(MAKE) $(AM_MAKEFLAGS) check-am all-am: Makefile $(LTLIBRARIES) $(DATA) $(HEADERS) config.h all-local installdirs: - for dir in "$(DESTDIR)$(cafexeclibdir)" "$(DESTDIR)$(toolexeclibdir)" "$(DESTDIR)$(toolexeclibdir)" "$(DESTDIR)$(gfor_cdir)" "$(DESTDIR)$(fincludedir)"; do \ + for dir in "$(DESTDIR)$(cafexeclibdir)" "$(DESTDIR)$(mylibdir)" "$(DESTDIR)$(toolexeclibdir)" "$(DESTDIR)$(toolexeclibdir)" "$(DESTDIR)$(gfor_cdir)" "$(DESTDIR)$(fincludedir)"; do \ test -z "$$dir" || $(MKDIR_P) "$$dir"; \ done install: $(BUILT_SOURCES) @@ -6873,7 +7120,8 @@ maintainer-clean-generic: clean: clean-am clean-am: clean-cafexeclibLTLIBRARIES clean-generic clean-libtool \ - clean-local clean-toolexeclibLTLIBRARIES mostlyclean-am + clean-local clean-mylibLTLIBRARIES \ + clean-toolexeclibLTLIBRARIES mostlyclean-am distclean: distclean-am -rm -f $(am__CONFIG_DISTCLEAN_FILES) @@ -6894,7 +7142,8 @@ info: info-am info-am: -install-data-am: install-gfor_cHEADERS install-nodist_fincludeHEADERS +install-data-am: install-gfor_cHEADERS install-mylibLTLIBRARIES \ + install-nodist_fincludeHEADERS install-dvi: install-dvi-am @@ -6945,14 +7194,14 @@ ps: ps-am ps-am: uninstall-am: uninstall-cafexeclibLTLIBRARIES uninstall-gfor_cHEADERS \ - uninstall-nodist_fincludeHEADERS uninstall-toolexeclibDATA \ - uninstall-toolexeclibLTLIBRARIES + uninstall-mylibLTLIBRARIES uninstall-nodist_fincludeHEADERS \ + uninstall-toolexeclibDATA uninstall-toolexeclibLTLIBRARIES .MAKE: all check install install-am install-strip .PHONY: CTAGS GTAGS TAGS all all-am all-local am--refresh check \ check-am clean clean-cafexeclibLTLIBRARIES clean-cscope \ - clean-generic clean-libtool clean-local \ + clean-generic clean-libtool clean-local clean-mylibLTLIBRARIES \ clean-toolexeclibLTLIBRARIES cscope cscopelist-am ctags \ ctags-am distclean distclean-compile distclean-generic \ distclean-hdr distclean-libtool distclean-local distclean-tags \ @@ -6961,16 +7210,17 @@ uninstall-am: uninstall-cafexeclibLTLIBRARIES uninstall-gfor_cHEADERS \ install-dvi install-dvi-am install-exec install-exec-am \ install-exec-local install-gfor_cHEADERS install-html \ install-html-am install-info install-info-am install-man \ - install-nodist_fincludeHEADERS install-pdf install-pdf-am \ - install-ps install-ps-am install-strip install-toolexeclibDATA \ + install-mylibLTLIBRARIES install-nodist_fincludeHEADERS \ + install-pdf install-pdf-am install-ps install-ps-am \ + install-strip install-toolexeclibDATA \ install-toolexeclibLTLIBRARIES installcheck installcheck-am \ installdirs maintainer-clean maintainer-clean-generic \ maintainer-clean-local mostlyclean mostlyclean-compile \ mostlyclean-generic mostlyclean-libtool mostlyclean-local pdf \ pdf-am ps ps-am tags tags-am uninstall uninstall-am \ uninstall-cafexeclibLTLIBRARIES uninstall-gfor_cHEADERS \ - uninstall-nodist_fincludeHEADERS uninstall-toolexeclibDATA \ - uninstall-toolexeclibLTLIBRARIES + uninstall-mylibLTLIBRARIES uninstall-nodist_fincludeHEADERS \ + uninstall-toolexeclibDATA uninstall-toolexeclibLTLIBRARIES .PRECIOUS: Makefile @@ -6983,6 +7233,9 @@ uninstall-am: uninstall-cafexeclibLTLIBRARIES uninstall-gfor_cHEADERS \ @LIBGFOR_USE_SYMVER_SUN_TRUE@@LIBGFOR_USE_SYMVER_TRUE@ `echo $(libgfortran_la_LIBADD) | \ @LIBGFOR_USE_SYMVER_SUN_TRUE@@LIBGFOR_USE_SYMVER_TRUE@ sed 's,/\([^/.]*\)\.la,/.libs/\1.a,g'` \ @LIBGFOR_USE_SYMVER_SUN_TRUE@@LIBGFOR_USE_SYMVER_TRUE@ > $@ || (rm -f $@ ; exit 1) +@LIBGFOR_NATIVE_COARRAY_TRUE@ nca/libcoarraynative.h nca/sync.h shared_memory.h \ +@LIBGFOR_NATIVE_COARRAY_TRUE@ nca/util.h nca/lock.h nca/collective_subroutine.h\ +@LIBGFOR_NATIVE_COARRAY_TRUE@ nca/collective_inline.h # Turn on vectorization and loop unrolling for matmul. $(patsubst %.c,%.lo,$(notdir $(i_matmul_c))): AM_CFLAGS += -ffast-math -ftree-vectorize -funroll-loops --param max-unroll-times=4 @@ -7193,6 +7446,12 @@ fpu-target.inc: fpu-target.h $(srcdir)/libgfortran.h @MAINTAINER_MODE_TRUE@$(gfor_misc_specifics): m4/misc_specifics.m4 m4/head.m4 @MAINTAINER_MODE_TRUE@ $(M4) -Dfile=$@ -I$(srcdir)/m4 misc_specifics.m4 > $@ +@LIBGFOR_NATIVE_COARRAY_TRUE@@MAINTAINER_MODE_TRUE@$(i_nca_minmax_c): m4/nca_minmax.m4 $(I_M4_DEPS) +@LIBGFOR_NATIVE_COARRAY_TRUE@@MAINTAINER_MODE_TRUE@ $(M4) -Dfile=$@ -I$(srcdir)/m4 nca_minmax.m4 > $@ + +@LIBGFOR_NATIVE_COARRAY_TRUE@@MAINTAINER_MODE_TRUE@$(i_nca_minmax_s_c): m4/nca-minmax-s.m4 $(I_M4_DEPS) +@LIBGFOR_NATIVE_COARRAY_TRUE@@MAINTAINER_MODE_TRUE@ $(M4) -Dfile=$@ -I$(srcdir)/m4 nca-minmax-s.m4 > $@ + # target overrides -include $(tmake_file) diff --git a/libgfortran/config.h.in b/libgfortran/config.h.in index 2d58188..795c9fe 100644 --- a/libgfortran/config.h.in +++ b/libgfortran/config.h.in @@ -657,6 +657,15 @@ /* Define to 1 if you have the `powf' function. */ #undef HAVE_POWF +/* Define to 1 if you have the `pthread_barrierattr_setpshared' function. */ +#undef HAVE_PTHREAD_BARRIERATTR_SETPSHARED + +/* Define to 1 if you have the `pthread_condattr_setpshared' function. */ +#undef HAVE_PTHREAD_CONDATTR_SETPSHARED + +/* Define to 1 if you have the `pthread_mutexattr_setpshared' function. */ +#undef HAVE_PTHREAD_MUTEXATTR_SETPSHARED + /* Define to 1 if the system has the type `ptrdiff_t'. */ #undef HAVE_PTRDIFF_T @@ -687,6 +696,12 @@ /* Define to 1 if you have the `setmode' function. */ #undef HAVE_SETMODE +/* Define to 1 if you have the `shm_open' function. */ +#undef HAVE_SHM_OPEN + +/* Define to 1 if you have the `shm_unlink' function. */ +#undef HAVE_SHM_UNLINK + /* Define to 1 if you have the `sigaction' function. */ #undef HAVE_SIGACTION diff --git a/libgfortran/configure b/libgfortran/configure index 99cca96..d703fda 100755 --- a/libgfortran/configure +++ b/libgfortran/configure @@ -637,6 +637,11 @@ am__EXEEXT_TRUE LTLIBOBJS LIBOBJS get_gcc_base_ver +LIBGFOR_NATIVE_COARRAY_FALSE +LIBGFOR_NATIVE_COARRAY_TRUE +RT_LIBS +PTHREAD_LIBS +PTHREAD_CFLAGS HAVE_AVX128_FALSE HAVE_AVX128_TRUE tmake_file @@ -783,6 +788,7 @@ infodir docdir oldincludedir includedir +runstatedir localstatedir sharedstatedir sysconfdir @@ -874,6 +880,7 @@ datadir='${datarootdir}' sysconfdir='${prefix}/etc' sharedstatedir='${prefix}/com' localstatedir='${prefix}/var' +runstatedir='${localstatedir}/run' includedir='${prefix}/include' oldincludedir='/usr/include' docdir='${datarootdir}/doc/${PACKAGE_TARNAME}' @@ -1126,6 +1133,15 @@ do | -silent | --silent | --silen | --sile | --sil) silent=yes ;; + -runstatedir | --runstatedir | --runstatedi | --runstated \ + | --runstate | --runstat | --runsta | --runst | --runs \ + | --run | --ru | --r) + ac_prev=runstatedir ;; + -runstatedir=* | --runstatedir=* | --runstatedi=* | --runstated=* \ + | --runstate=* | --runstat=* | --runsta=* | --runst=* | --runs=* \ + | --run=* | --ru=* | --r=*) + runstatedir=$ac_optarg ;; + -sbindir | --sbindir | --sbindi | --sbind | --sbin | --sbi | --sb) ac_prev=sbindir ;; -sbindir=* | --sbindir=* | --sbindi=* | --sbind=* | --sbin=* \ @@ -1263,7 +1279,7 @@ fi for ac_var in exec_prefix prefix bindir sbindir libexecdir datarootdir \ datadir sysconfdir sharedstatedir localstatedir includedir \ oldincludedir docdir infodir htmldir dvidir pdfdir psdir \ - libdir localedir mandir + libdir localedir mandir runstatedir do eval ac_val=\$$ac_var # Remove trailing slashes. @@ -1416,6 +1432,7 @@ Fine tuning of the installation directories: --sysconfdir=DIR read-only single-machine data [PREFIX/etc] --sharedstatedir=DIR modifiable architecture-independent data [PREFIX/com] --localstatedir=DIR modifiable single-machine data [PREFIX/var] + --runstatedir=DIR modifiable per-process data [LOCALSTATEDIR/run] --libdir=DIR object code libraries [EPREFIX/lib] --includedir=DIR C header files [PREFIX/include] --oldincludedir=DIR C header files for non-gcc [/usr/include] @@ -16099,7 +16116,7 @@ else We can't simply define LARGE_OFF_T to be 9223372036854775807, since some C++ compilers masquerading as C compilers incorrectly reject 9223372036854775807. */ -#define LARGE_OFF_T (((off_t) 1 << 62) - 1 + ((off_t) 1 << 62)) +#define LARGE_OFF_T ((((off_t) 1 << 31) << 31) - 1 + (((off_t) 1 << 31) << 31)) int off_t_is_large[(LARGE_OFF_T % 2147483629 == 721 && LARGE_OFF_T % 2147483647 == 1) ? 1 : -1]; @@ -16145,7 +16162,7 @@ else We can't simply define LARGE_OFF_T to be 9223372036854775807, since some C++ compilers masquerading as C compilers incorrectly reject 9223372036854775807. */ -#define LARGE_OFF_T (((off_t) 1 << 62) - 1 + ((off_t) 1 << 62)) +#define LARGE_OFF_T ((((off_t) 1 << 31) << 31) - 1 + (((off_t) 1 << 31) << 31)) int off_t_is_large[(LARGE_OFF_T % 2147483629 == 721 && LARGE_OFF_T % 2147483647 == 1) ? 1 : -1]; @@ -16169,7 +16186,7 @@ rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext We can't simply define LARGE_OFF_T to be 9223372036854775807, since some C++ compilers masquerading as C compilers incorrectly reject 9223372036854775807. */ -#define LARGE_OFF_T (((off_t) 1 << 62) - 1 + ((off_t) 1 << 62)) +#define LARGE_OFF_T ((((off_t) 1 << 31) << 31) - 1 + (((off_t) 1 << 31) << 31)) int off_t_is_large[(LARGE_OFF_T % 2147483629 == 721 && LARGE_OFF_T % 2147483647 == 1) ? 1 : -1]; @@ -16214,7 +16231,7 @@ else We can't simply define LARGE_OFF_T to be 9223372036854775807, since some C++ compilers masquerading as C compilers incorrectly reject 9223372036854775807. */ -#define LARGE_OFF_T (((off_t) 1 << 62) - 1 + ((off_t) 1 << 62)) +#define LARGE_OFF_T ((((off_t) 1 << 31) << 31) - 1 + (((off_t) 1 << 31) << 31)) int off_t_is_large[(LARGE_OFF_T % 2147483629 == 721 && LARGE_OFF_T % 2147483647 == 1) ? 1 : -1]; @@ -16238,7 +16255,7 @@ rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext We can't simply define LARGE_OFF_T to be 9223372036854775807, since some C++ compilers masquerading as C compilers incorrectly reject 9223372036854775807. */ -#define LARGE_OFF_T (((off_t) 1 << 62) - 1 + ((off_t) 1 << 62)) +#define LARGE_OFF_T ((((off_t) 1 << 31) << 31) - 1 + (((off_t) 1 << 31) << 31)) int off_t_is_large[(LARGE_OFF_T % 2147483629 == 721 && LARGE_OFF_T % 2147483647 == 1) ? 1 : -1]; @@ -27147,6 +27164,167 @@ rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext CFLAGS="$ac_save_CFLAGS" +# Tests related to native coarrays +# Test whether the compiler supports the -pthread option. +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether -pthread is supported" >&5 +$as_echo_n "checking whether -pthread is supported... " >&6; } +if ${libgfortran_cv_lib_pthread+:} false; then : + $as_echo_n "(cached) " >&6 +else + CFLAGS_hold=$CFLAGS +CFLAGS="$CFLAGS -pthread -L../libatomic/.libs" +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +int i; +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + libgfortran_cv_lib_pthread=yes +else + libgfortran_cv_lib_pthread=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +CFLAGS=$CFLAGS_hold +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $libgfortran_cv_lib_pthread" >&5 +$as_echo "$libgfortran_cv_lib_pthread" >&6; } +PTHREAD_CFLAGS= +if test "$libgfortran_cv_lib_pthread" = yes; then + # RISC-V apparently adds -latomic when using -pthread. + PTHREAD_CFLAGS="-pthread -L../libatomic/.libs" +fi + + +# Test for the -lpthread library. +PTHREAD_LIBS= +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for pthread_create in -lpthread" >&5 +$as_echo_n "checking for pthread_create in -lpthread... " >&6; } +if ${ac_cv_lib_pthread_pthread_create+:} false; then : + $as_echo_n "(cached) " >&6 +else + ac_check_lib_save_LIBS=$LIBS +LIBS="-lpthread $LIBS" +if test x$gcc_no_link = xyes; then + as_fn_error $? "Link tests are not allowed after GCC_NO_EXECUTABLES." "$LINENO" 5 +fi +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. */ +#ifdef __cplusplus +extern "C" +#endif +char pthread_create (); +int +main () +{ +return pthread_create (); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO"; then : + ac_cv_lib_pthread_pthread_create=yes +else + ac_cv_lib_pthread_pthread_create=no +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext +LIBS=$ac_check_lib_save_LIBS +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_pthread_pthread_create" >&5 +$as_echo "$ac_cv_lib_pthread_pthread_create" >&6; } +if test "x$ac_cv_lib_pthread_pthread_create" = xyes; then : + PTHREAD_LIBS=-lpthread +fi + + + +# Test if -lrt is required +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for shm_open in -lrt" >&5 +$as_echo_n "checking for shm_open in -lrt... " >&6; } +if ${ac_cv_lib_rt_shm_open+:} false; then : + $as_echo_n "(cached) " >&6 +else + ac_check_lib_save_LIBS=$LIBS +LIBS="-lrt $LIBS" +if test x$gcc_no_link = xyes; then + as_fn_error $? "Link tests are not allowed after GCC_NO_EXECUTABLES." "$LINENO" 5 +fi +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. */ +#ifdef __cplusplus +extern "C" +#endif +char shm_open (); +int +main () +{ +return shm_open (); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO"; then : + ac_cv_lib_rt_shm_open=yes +else + ac_cv_lib_rt_shm_open=no +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext +LIBS=$ac_check_lib_save_LIBS +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_rt_shm_open" >&5 +$as_echo "$ac_cv_lib_rt_shm_open" >&6; } +if test "x$ac_cv_lib_rt_shm_open" = xyes; then : + RT_LIBS=-lrt +fi + + + +CFLAGS_hold="$CFLAGS" +CFLAGS="$CFLAGS $PTHREAD_CFLAGS" +LIBS_hold="$LIBS" +LIBS="$LIBS $PTHREAD_LIBS $RT_LIBS" + +# Find the functions that we need +for ac_func in pthread_condattr_setpshared pthread_mutexattr_setpshared pthread_barrierattr_setpshared shm_open shm_unlink +do : + as_ac_var=`$as_echo "ac_cv_func_$ac_func" | $as_tr_sh` +ac_fn_c_check_func "$LINENO" "$ac_func" "$as_ac_var" +if eval test \"x\$"$as_ac_var"\" = x"yes"; then : + cat >>confdefs.h <<_ACEOF +#define `$as_echo "HAVE_$ac_func" | $as_tr_cpp` 1 +_ACEOF + if true; then + LIBGFOR_NATIVE_COARRAY_TRUE= + LIBGFOR_NATIVE_COARRAY_FALSE='#' +else + LIBGFOR_NATIVE_COARRAY_TRUE='#' + LIBGFOR_NATIVE_COARRAY_FALSE= +fi + +else + if false; then + LIBGFOR_NATIVE_COARRAY_TRUE= + LIBGFOR_NATIVE_COARRAY_FALSE='#' +else + LIBGFOR_NATIVE_COARRAY_TRUE='#' + LIBGFOR_NATIVE_COARRAY_FALSE= +fi + +fi +done + + +CFLAGS="$CFLAGS_hold" +LIBS="$LIBS_hold" + # Determine what GCC version number to use in filesystem paths. get_gcc_base_ver="cat" @@ -27438,6 +27616,14 @@ if test -z "${HAVE_AVX128_TRUE}" && test -z "${HAVE_AVX128_FALSE}"; then as_fn_error $? "conditional \"HAVE_AVX128\" was never defined. Usually this means the macro was only invoked conditionally." "$LINENO" 5 fi +if test -z "${LIBGFOR_NATIVE_COARRAY_TRUE}" && test -z "${LIBGFOR_NATIVE_COARRAY_FALSE}"; then + as_fn_error $? "conditional \"LIBGFOR_NATIVE_COARRAY\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi +if test -z "${LIBGFOR_NATIVE_COARRAY_TRUE}" && test -z "${LIBGFOR_NATIVE_COARRAY_FALSE}"; then + as_fn_error $? "conditional \"LIBGFOR_NATIVE_COARRAY\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi : "${CONFIG_STATUS=./config.status}" ac_write_fail=0 diff --git a/libgfortran/configure.ac b/libgfortran/configure.ac index 8961e31..1aac140 100644 --- a/libgfortran/configure.ac +++ b/libgfortran/configure.ac @@ -683,6 +683,43 @@ LIBGFOR_CHECK_FMA4 # Check if AVX128 works LIBGFOR_CHECK_AVX128 +# Tests related to native coarrays +# Test whether the compiler supports the -pthread option. +AC_CACHE_CHECK([whether -pthread is supported], +[libgfortran_cv_lib_pthread], +[CFLAGS_hold=$CFLAGS +CFLAGS="$CFLAGS -pthread -L../libatomic/.libs" +AC_COMPILE_IFELSE([AC_LANG_SOURCE([int i;])], +[libgfortran_cv_lib_pthread=yes], +[libgfortran_cv_lib_pthread=no]) +CFLAGS=$CFLAGS_hold]) +PTHREAD_CFLAGS= +if test "$libgfortran_cv_lib_pthread" = yes; then + # RISC-V apparently adds -latomic when using -pthread. + PTHREAD_CFLAGS="-pthread -L../libatomic/.libs" +fi +AC_SUBST(PTHREAD_CFLAGS) + +# Test for the -lpthread library. +PTHREAD_LIBS= +AC_CHECK_LIB([pthread], [pthread_create], PTHREAD_LIBS=-lpthread) +AC_SUBST(PTHREAD_LIBS) + +# Test if -lrt is required +AC_CHECK_LIB([rt], [shm_open], RT_LIBS=-lrt) +AC_SUBST(RT_LIBS) + +CFLAGS_hold="$CFLAGS" +CFLAGS="$CFLAGS $PTHREAD_CFLAGS" +LIBS_hold="$LIBS" +LIBS="$LIBS $PTHREAD_LIBS $RT_LIBS" + +# Find the functions that we need +AC_CHECK_FUNCS([pthread_condattr_setpshared pthread_mutexattr_setpshared pthread_barrierattr_setpshared shm_open shm_unlink],[AM_CONDITIONAL(LIBGFOR_NATIVE_COARRAY,true)],[AM_CONDITIONAL(LIBGFOR_NATIVE_COARRAY,false)]) + +CFLAGS="$CFLAGS_hold" +LIBS="$LIBS_hold" + # Determine what GCC version number to use in filesystem paths. GCC_BASE_VER diff --git a/libgfortran/generated/nca_minmax_i1.c b/libgfortran/generated/nca_minmax_i1.c new file mode 100644 index 0000000..3bc9a2b --- /dev/null +++ b/libgfortran/generated/nca_minmax_i1.c @@ -0,0 +1,653 @@ +/* Implementation of collective subroutines minmax. + Copyright (C) 2020 Free Software Foundation, Inc. + Contributed by Thomas Koenig <tkoenig@gcc.gnu.org>. + +This file is part of the GNU Fortran runtime library (libgfortran). + +Libgfortran is free software; you can redistribute it and/or +modify it under the terms of the GNU General Public +License as published by the Free Software Foundation; either +version 3 of the License, or (at your option) any later version. + +Libgfortran is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +<http://www.gnu.org/licenses/>. */ + + +#include "libgfortran.h" + +#if defined (HAVE_GFC_INTEGER_1) +#include <string.h> +#include "../nca/libcoarraynative.h" +#include "../nca/collective_subroutine.h" +#include "../nca/collective_inline.h" + +void nca_collsub_max_scalar_i1 (GFC_INTEGER_1 *obj, int *result_image, + int *stat, char *errmsg, index_type errmsg_len); +export_proto(nca_collsub_max_scalar_i1); + +void +nca_collsub_max_scalar_i1 (GFC_INTEGER_1 *obj, int *result_image, + int *stat __attribute__ ((unused)), + char *errmsg __attribute__ ((unused)), + index_type errmsg_len __attribute__ ((unused))) +{ + int cbit = 0; + int imoffset; + GFC_INTEGER_1 *a, *b; + GFC_INTEGER_1 *buffer, *this_image_buf; + collsub_iface *ci; + + ci = &local->ci; + + buffer = get_collsub_buf (ci, sizeof(GFC_INTEGER_1) * local->num_images); + this_image_buf = buffer + this_image.image_num; + *this_image_buf = *obj; + + collsub_sync (ci); + for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++) + { + imoffset = 1 << cbit; + if (this_image.image_num + imoffset < local->num_images) + { + a = this_image_buf; + b = this_image_buf + imoffset; + if (*b > *a) + *a = *b; + } + collsub_sync (ci); + } + for ( ; (local->num_images >> cbit) != 0; cbit++) + collsub_sync (ci); + + if (!result_image || (*result_image - 1) == this_image.image_num) + *obj = *buffer; + + finish_collective_subroutine (ci); + +} + +void nca_collsub_min_scalar_i1 (GFC_INTEGER_1 *obj, int *result_image, + int *stat, char *errmsg, index_type errmsg_len); +export_proto(nca_collsub_min_scalar_i1); + +void +nca_collsub_min_scalar_i1 (GFC_INTEGER_1 *obj, int *result_image, + int *stat __attribute__ ((unused)), + char *errmsg __attribute__ ((unused)), + index_type errmsg_len __attribute__ ((unused))) +{ + int cbit = 0; + int imoffset; + GFC_INTEGER_1 *a, *b; + GFC_INTEGER_1 *buffer, *this_image_buf; + collsub_iface *ci; + + ci = &local->ci; + + buffer = get_collsub_buf (ci, sizeof(GFC_INTEGER_1) * local->num_images); + this_image_buf = buffer + this_image.image_num; + *this_image_buf = *obj; + + collsub_sync (ci); + for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++) + { + imoffset = 1 << cbit; + if (this_image.image_num + imoffset < local->num_images) + { + a = this_image_buf; + b = this_image_buf + imoffset; + if (*b < *a) + *a = *b; + } + collsub_sync (ci); + } + for ( ; (local->num_images >> cbit) != 0; cbit++) + collsub_sync (ci); + + if (!result_image || (*result_image - 1) == this_image.image_num) + *obj = *buffer; + + finish_collective_subroutine (ci); + +} + +void nca_collsub_sum_scalar_i1 (GFC_INTEGER_1 *obj, int *result_image, + int *stat, char *errmsg, index_type errmsg_len); +export_proto(nca_collsub_sum_scalar_i1); + +void +nca_collsub_sum_scalar_i1 (GFC_INTEGER_1 *obj, int *result_image, + int *stat __attribute__ ((unused)), + char *errmsg __attribute__ ((unused)), + index_type errmsg_len __attribute__ ((unused))) +{ + int cbit = 0; + int imoffset; + GFC_INTEGER_1 *a, *b; + GFC_INTEGER_1 *buffer, *this_image_buf; + collsub_iface *ci; + + ci = &local->ci; + + buffer = get_collsub_buf (ci, sizeof(GFC_INTEGER_1) * local->num_images); + this_image_buf = buffer + this_image.image_num; + *this_image_buf = *obj; + + collsub_sync (ci); + for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++) + { + imoffset = 1 << cbit; + if (this_image.image_num + imoffset < local->num_images) + { + a = this_image_buf; + b = this_image_buf + imoffset; + *a += *b; + } + collsub_sync (ci); + } + for ( ; (local->num_images >> cbit) != 0; cbit++) + collsub_sync (ci); + + if (!result_image || (*result_image - 1) == this_image.image_num) + *obj = *buffer; + + finish_collective_subroutine (ci); + +} + +void nca_collsub_max_array_i1 (gfc_array_i1 * restrict array, int *result_image, + int *stat, char *errmsg, index_type errmsg_len); +export_proto (nca_collsub_max_array_i1); + +void +nca_collsub_max_array_i1 (gfc_array_i1 * restrict array, int *result_image, + int *stat __attribute__ ((unused)), + char *errmsg __attribute__ ((unused)), + index_type errmsg_len __attribute__ ((unused))) +{ + index_type count[GFC_MAX_DIMENSIONS]; + index_type stride[GFC_MAX_DIMENSIONS]; + index_type extent[GFC_MAX_DIMENSIONS]; + GFC_INTEGER_1 *this_shared_ptr; /* Points to the shared memory allocated to this image. */ + GFC_INTEGER_1 *buffer; + index_type dim; + bool packed; + index_type span; + index_type ssize, num_elems; + int cbit = 0; + int imoffset; + collsub_iface *ci; + + ci = &local->ci; + + dim = GFC_DESCRIPTOR_RANK (array); + ssize = sizeof (GFC_INTEGER_1); + packed = true; + span = array->span != 0 ? array->span : (index_type) sizeof (GFC_INTEGER_1); + for (index_type n = 0; n < dim; n++) + { + count[n] = 0; + stride[n] = GFC_DESCRIPTOR_STRIDE (array, n) * span; + extent[n] = GFC_DESCRIPTOR_EXTENT (array, n); + + /* No-op for an empty array. */ + if (extent[n] <= 0) + return; + + if (ssize != stride[n]) + packed = false; + + ssize *= extent[n]; + } + + num_elems = ssize / sizeof (GFC_INTEGER_1); + + buffer = get_collsub_buf (ci, ssize * local->num_images); + this_shared_ptr = buffer + this_image.image_num * num_elems; + + if (packed) + memcpy (this_shared_ptr, array->base_addr, ssize); + else + { + char *src = (char *) array->base_addr; + GFC_INTEGER_1 *restrict dest = this_shared_ptr; + index_type stride0 = stride[0]; + + while (src) + { + /* Copy the data. */ + *(dest++) = *((GFC_INTEGER_1 *) src); + src += stride0; + count[0] ++; + /* Advance to the next source element. */ + for (index_type n = 0; count[n] == extent[n] ; ) + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + count[n] = 0; + src -= stride[n] * extent[n]; + n++; + if (n == dim) + { + src = NULL; + break; + } + else + { + count[n]++; + src += stride[n]; + } + } + } + } + + collsub_sync (ci); + + /* Reduce the array to image zero. Here the general scheme: + + abababababab + a_b_a_b_a_b_ + a___b___a___ + a_______b___ + r___________ + */ + for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++) + { + imoffset = 1 << cbit; + if (this_image.image_num + imoffset < local->num_images) + { + GFC_INTEGER_1 * other_shared_ptr; /* Points to the shared memory + allocated to another image. */ + GFC_INTEGER_1 *a; + GFC_INTEGER_1 *b; + + other_shared_ptr = this_shared_ptr + num_elems * imoffset; + for (index_type i = 0; i < num_elems; i++) + { + a = this_shared_ptr + i; + b = other_shared_ptr + i; + if (*b > *a) + *a = *b; + } + } + collsub_sync (ci); + } + for ( ; (local->num_images >> cbit) != 0; cbit++) + collsub_sync (ci); + + if (!result_image || (*result_image - 1) == this_image.image_num) + { + if (packed) + memcpy (array->base_addr, buffer, ssize); + else + { + GFC_INTEGER_1 *src = buffer; + char * restrict dest = (char *) array->base_addr; + index_type stride0 = stride[0]; + + for (index_type n = 0; n < dim; n++) + count[n] = 0; + + while (dest) + { + *((GFC_INTEGER_1 * ) dest) = *src++; + dest += stride0; + count[0] ++; + for (index_type n = 0; count[n] == extent[n] ;) + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + count[n] = 0; + dest -= stride[n] * extent[n]; + n++; + if (n == dim) + { + dest = NULL; + break; + } + else + { + count[n]++; + dest += stride[n]; + } + } + } + } + } + finish_collective_subroutine (ci); +} +void nca_collsub_min_array_i1 (gfc_array_i1 * restrict array, int *result_image, + int *stat, char *errmsg, index_type errmsg_len); +export_proto (nca_collsub_min_array_i1); + +void +nca_collsub_min_array_i1 (gfc_array_i1 * restrict array, int *result_image, + int *stat __attribute__ ((unused)), + char *errmsg __attribute__ ((unused)), + index_type errmsg_len __attribute__ ((unused))) +{ + index_type count[GFC_MAX_DIMENSIONS]; + index_type stride[GFC_MAX_DIMENSIONS]; + index_type extent[GFC_MAX_DIMENSIONS]; + GFC_INTEGER_1 *this_shared_ptr; /* Points to the shared memory allocated to this image. */ + GFC_INTEGER_1 *buffer; + index_type dim; + bool packed; + index_type span; + index_type ssize, num_elems; + int cbit = 0; + int imoffset; + collsub_iface *ci; + + ci = &local->ci; + + dim = GFC_DESCRIPTOR_RANK (array); + ssize = sizeof (GFC_INTEGER_1); + packed = true; + span = array->span != 0 ? array->span : (index_type) sizeof (GFC_INTEGER_1); + for (index_type n = 0; n < dim; n++) + { + count[n] = 0; + stride[n] = GFC_DESCRIPTOR_STRIDE (array, n) * span; + extent[n] = GFC_DESCRIPTOR_EXTENT (array, n); + + /* No-op for an empty array. */ + if (extent[n] <= 0) + return; + + if (ssize != stride[n]) + packed = false; + + ssize *= extent[n]; + } + + num_elems = ssize / sizeof (GFC_INTEGER_1); + + buffer = get_collsub_buf (ci, ssize * local->num_images); + this_shared_ptr = buffer + this_image.image_num * num_elems; + + if (packed) + memcpy (this_shared_ptr, array->base_addr, ssize); + else + { + char *src = (char *) array->base_addr; + GFC_INTEGER_1 *restrict dest = this_shared_ptr; + index_type stride0 = stride[0]; + + while (src) + { + /* Copy the data. */ + *(dest++) = *((GFC_INTEGER_1 *) src); + src += stride0; + count[0] ++; + /* Advance to the next source element. */ + for (index_type n = 0; count[n] == extent[n] ; ) + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + count[n] = 0; + src -= stride[n] * extent[n]; + n++; + if (n == dim) + { + src = NULL; + break; + } + else + { + count[n]++; + src += stride[n]; + } + } + } + } + + collsub_sync (ci); + + /* Reduce the array to image zero. Here the general scheme: + + abababababab + a_b_a_b_a_b_ + a___b___a___ + a_______b___ + r___________ + */ + for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++) + { + imoffset = 1 << cbit; + if (this_image.image_num + imoffset < local->num_images) + { + GFC_INTEGER_1 * other_shared_ptr; /* Points to the shared memory + allocated to another image. */ + GFC_INTEGER_1 *a; + GFC_INTEGER_1 *b; + + other_shared_ptr = this_shared_ptr + num_elems * imoffset; + for (index_type i = 0; i < num_elems; i++) + { + a = this_shared_ptr + i; + b = other_shared_ptr + i; + if (*b < *a) + *a = *b; + } + } + collsub_sync (ci); + } + for ( ; (local->num_images >> cbit) != 0; cbit++) + collsub_sync (ci); + + if (!result_image || (*result_image - 1) == this_image.image_num) + { + if (packed) + memcpy (array->base_addr, buffer, ssize); + else + { + GFC_INTEGER_1 *src = buffer; + char * restrict dest = (char *) array->base_addr; + index_type stride0 = stride[0]; + + for (index_type n = 0; n < dim; n++) + count[n] = 0; + + while (dest) + { + *((GFC_INTEGER_1 * ) dest) = *src++; + dest += stride0; + count[0] ++; + for (index_type n = 0; count[n] == extent[n] ;) + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + count[n] = 0; + dest -= stride[n] * extent[n]; + n++; + if (n == dim) + { + dest = NULL; + break; + } + else + { + count[n]++; + dest += stride[n]; + } + } + } + } + } + finish_collective_subroutine (ci); +} +void nca_collsub_sum_array_i1 (gfc_array_i1 * restrict array, int *result_image, + int *stat, char *errmsg, index_type errmsg_len); +export_proto (nca_collsub_sum_array_i1); + +void +nca_collsub_sum_array_i1 (gfc_array_i1 * restrict array, int *result_image, + int *stat __attribute__ ((unused)), + char *errmsg __attribute__ ((unused)), + index_type errmsg_len __attribute__ ((unused))) +{ + index_type count[GFC_MAX_DIMENSIONS]; + index_type stride[GFC_MAX_DIMENSIONS]; + index_type extent[GFC_MAX_DIMENSIONS]; + GFC_INTEGER_1 *this_shared_ptr; /* Points to the shared memory allocated to this image. */ + GFC_INTEGER_1 *buffer; + index_type dim; + bool packed; + index_type span; + index_type ssize, num_elems; + int cbit = 0; + int imoffset; + collsub_iface *ci; + + ci = &local->ci; + + dim = GFC_DESCRIPTOR_RANK (array); + ssize = sizeof (GFC_INTEGER_1); + packed = true; + span = array->span != 0 ? array->span : (index_type) sizeof (GFC_INTEGER_1); + for (index_type n = 0; n < dim; n++) + { + count[n] = 0; + stride[n] = GFC_DESCRIPTOR_STRIDE (array, n) * span; + extent[n] = GFC_DESCRIPTOR_EXTENT (array, n); + + /* No-op for an empty array. */ + if (extent[n] <= 0) + return; + + if (ssize != stride[n]) + packed = false; + + ssize *= extent[n]; + } + + num_elems = ssize / sizeof (GFC_INTEGER_1); + + buffer = get_collsub_buf (ci, ssize * local->num_images); + this_shared_ptr = buffer + this_image.image_num * num_elems; + + if (packed) + memcpy (this_shared_ptr, array->base_addr, ssize); + else + { + char *src = (char *) array->base_addr; + GFC_INTEGER_1 *restrict dest = this_shared_ptr; + index_type stride0 = stride[0]; + + while (src) + { + /* Copy the data. */ + *(dest++) = *((GFC_INTEGER_1 *) src); + src += stride0; + count[0] ++; + /* Advance to the next source element. */ + for (index_type n = 0; count[n] == extent[n] ; ) + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + count[n] = 0; + src -= stride[n] * extent[n]; + n++; + if (n == dim) + { + src = NULL; + break; + } + else + { + count[n]++; + src += stride[n]; + } + } + } + } + + collsub_sync (ci); + + /* Reduce the array to image zero. Here the general scheme: + + abababababab + a_b_a_b_a_b_ + a___b___a___ + a_______b___ + r___________ + */ + for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++) + { + imoffset = 1 << cbit; + if (this_image.image_num + imoffset < local->num_images) + { + GFC_INTEGER_1 * other_shared_ptr; /* Points to the shared memory + allocated to another image. */ + GFC_INTEGER_1 *a; + GFC_INTEGER_1 *b; + + other_shared_ptr = this_shared_ptr + num_elems * imoffset; + for (index_type i = 0; i < num_elems; i++) + { + a = this_shared_ptr + i; + b = other_shared_ptr + i; + *a += *b; + } + } + collsub_sync (ci); + } + for ( ; (local->num_images >> cbit) != 0; cbit++) + collsub_sync (ci); + + if (!result_image || (*result_image - 1) == this_image.image_num) + { + if (packed) + memcpy (array->base_addr, buffer, ssize); + else + { + GFC_INTEGER_1 *src = buffer; + char * restrict dest = (char *) array->base_addr; + index_type stride0 = stride[0]; + + for (index_type n = 0; n < dim; n++) + count[n] = 0; + + while (dest) + { + *((GFC_INTEGER_1 * ) dest) = *src++; + dest += stride0; + count[0] ++; + for (index_type n = 0; count[n] == extent[n] ;) + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + count[n] = 0; + dest -= stride[n] * extent[n]; + n++; + if (n == dim) + { + dest = NULL; + break; + } + else + { + count[n]++; + dest += stride[n]; + } + } + } + } + } + finish_collective_subroutine (ci); +} + +#endif + diff --git a/libgfortran/generated/nca_minmax_i16.c b/libgfortran/generated/nca_minmax_i16.c new file mode 100644 index 0000000..8fbb948 --- /dev/null +++ b/libgfortran/generated/nca_minmax_i16.c @@ -0,0 +1,653 @@ +/* Implementation of collective subroutines minmax. + Copyright (C) 2020 Free Software Foundation, Inc. + Contributed by Thomas Koenig <tkoenig@gcc.gnu.org>. + +This file is part of the GNU Fortran runtime library (libgfortran). + +Libgfortran is free software; you can redistribute it and/or +modify it under the terms of the GNU General Public +License as published by the Free Software Foundation; either +version 3 of the License, or (at your option) any later version. + +Libgfortran is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +<http://www.gnu.org/licenses/>. */ + + +#include "libgfortran.h" + +#if defined (HAVE_GFC_INTEGER_16) +#include <string.h> +#include "../nca/libcoarraynative.h" +#include "../nca/collective_subroutine.h" +#include "../nca/collective_inline.h" + +void nca_collsub_max_scalar_i16 (GFC_INTEGER_16 *obj, int *result_image, + int *stat, char *errmsg, index_type errmsg_len); +export_proto(nca_collsub_max_scalar_i16); + +void +nca_collsub_max_scalar_i16 (GFC_INTEGER_16 *obj, int *result_image, + int *stat __attribute__ ((unused)), + char *errmsg __attribute__ ((unused)), + index_type errmsg_len __attribute__ ((unused))) +{ + int cbit = 0; + int imoffset; + GFC_INTEGER_16 *a, *b; + GFC_INTEGER_16 *buffer, *this_image_buf; + collsub_iface *ci; + + ci = &local->ci; + + buffer = get_collsub_buf (ci, sizeof(GFC_INTEGER_16) * local->num_images); + this_image_buf = buffer + this_image.image_num; + *this_image_buf = *obj; + + collsub_sync (ci); + for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++) + { + imoffset = 1 << cbit; + if (this_image.image_num + imoffset < local->num_images) + { + a = this_image_buf; + b = this_image_buf + imoffset; + if (*b > *a) + *a = *b; + } + collsub_sync (ci); + } + for ( ; (local->num_images >> cbit) != 0; cbit++) + collsub_sync (ci); + + if (!result_image || (*result_image - 1) == this_image.image_num) + *obj = *buffer; + + finish_collective_subroutine (ci); + +} + +void nca_collsub_min_scalar_i16 (GFC_INTEGER_16 *obj, int *result_image, + int *stat, char *errmsg, index_type errmsg_len); +export_proto(nca_collsub_min_scalar_i16); + +void +nca_collsub_min_scalar_i16 (GFC_INTEGER_16 *obj, int *result_image, + int *stat __attribute__ ((unused)), + char *errmsg __attribute__ ((unused)), + index_type errmsg_len __attribute__ ((unused))) +{ + int cbit = 0; + int imoffset; + GFC_INTEGER_16 *a, *b; + GFC_INTEGER_16 *buffer, *this_image_buf; + collsub_iface *ci; + + ci = &local->ci; + + buffer = get_collsub_buf (ci, sizeof(GFC_INTEGER_16) * local->num_images); + this_image_buf = buffer + this_image.image_num; + *this_image_buf = *obj; + + collsub_sync (ci); + for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++) + { + imoffset = 1 << cbit; + if (this_image.image_num + imoffset < local->num_images) + { + a = this_image_buf; + b = this_image_buf + imoffset; + if (*b < *a) + *a = *b; + } + collsub_sync (ci); + } + for ( ; (local->num_images >> cbit) != 0; cbit++) + collsub_sync (ci); + + if (!result_image || (*result_image - 1) == this_image.image_num) + *obj = *buffer; + + finish_collective_subroutine (ci); + +} + +void nca_collsub_sum_scalar_i16 (GFC_INTEGER_16 *obj, int *result_image, + int *stat, char *errmsg, index_type errmsg_len); +export_proto(nca_collsub_sum_scalar_i16); + +void +nca_collsub_sum_scalar_i16 (GFC_INTEGER_16 *obj, int *result_image, + int *stat __attribute__ ((unused)), + char *errmsg __attribute__ ((unused)), + index_type errmsg_len __attribute__ ((unused))) +{ + int cbit = 0; + int imoffset; + GFC_INTEGER_16 *a, *b; + GFC_INTEGER_16 *buffer, *this_image_buf; + collsub_iface *ci; + + ci = &local->ci; + + buffer = get_collsub_buf (ci, sizeof(GFC_INTEGER_16) * local->num_images); + this_image_buf = buffer + this_image.image_num; + *this_image_buf = *obj; + + collsub_sync (ci); + for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++) + { + imoffset = 1 << cbit; + if (this_image.image_num + imoffset < local->num_images) + { + a = this_image_buf; + b = this_image_buf + imoffset; + *a += *b; + } + collsub_sync (ci); + } + for ( ; (local->num_images >> cbit) != 0; cbit++) + collsub_sync (ci); + + if (!result_image || (*result_image - 1) == this_image.image_num) + *obj = *buffer; + + finish_collective_subroutine (ci); + +} + +void nca_collsub_max_array_i16 (gfc_array_i16 * restrict array, int *result_image, + int *stat, char *errmsg, index_type errmsg_len); +export_proto (nca_collsub_max_array_i16); + +void +nca_collsub_max_array_i16 (gfc_array_i16 * restrict array, int *result_image, + int *stat __attribute__ ((unused)), + char *errmsg __attribute__ ((unused)), + index_type errmsg_len __attribute__ ((unused))) +{ + index_type count[GFC_MAX_DIMENSIONS]; + index_type stride[GFC_MAX_DIMENSIONS]; + index_type extent[GFC_MAX_DIMENSIONS]; + GFC_INTEGER_16 *this_shared_ptr; /* Points to the shared memory allocated to this image. */ + GFC_INTEGER_16 *buffer; + index_type dim; + bool packed; + index_type span; + index_type ssize, num_elems; + int cbit = 0; + int imoffset; + collsub_iface *ci; + + ci = &local->ci; + + dim = GFC_DESCRIPTOR_RANK (array); + ssize = sizeof (GFC_INTEGER_16); + packed = true; + span = array->span != 0 ? array->span : (index_type) sizeof (GFC_INTEGER_16); + for (index_type n = 0; n < dim; n++) + { + count[n] = 0; + stride[n] = GFC_DESCRIPTOR_STRIDE (array, n) * span; + extent[n] = GFC_DESCRIPTOR_EXTENT (array, n); + + /* No-op for an empty array. */ + if (extent[n] <= 0) + return; + + if (ssize != stride[n]) + packed = false; + + ssize *= extent[n]; + } + + num_elems = ssize / sizeof (GFC_INTEGER_16); + + buffer = get_collsub_buf (ci, ssize * local->num_images); + this_shared_ptr = buffer + this_image.image_num * num_elems; + + if (packed) + memcpy (this_shared_ptr, array->base_addr, ssize); + else + { + char *src = (char *) array->base_addr; + GFC_INTEGER_16 *restrict dest = this_shared_ptr; + index_type stride0 = stride[0]; + + while (src) + { + /* Copy the data. */ + *(dest++) = *((GFC_INTEGER_16 *) src); + src += stride0; + count[0] ++; + /* Advance to the next source element. */ + for (index_type n = 0; count[n] == extent[n] ; ) + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + count[n] = 0; + src -= stride[n] * extent[n]; + n++; + if (n == dim) + { + src = NULL; + break; + } + else + { + count[n]++; + src += stride[n]; + } + } + } + } + + collsub_sync (ci); + + /* Reduce the array to image zero. Here the general scheme: + + abababababab + a_b_a_b_a_b_ + a___b___a___ + a_______b___ + r___________ + */ + for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++) + { + imoffset = 1 << cbit; + if (this_image.image_num + imoffset < local->num_images) + { + GFC_INTEGER_16 * other_shared_ptr; /* Points to the shared memory + allocated to another image. */ + GFC_INTEGER_16 *a; + GFC_INTEGER_16 *b; + + other_shared_ptr = this_shared_ptr + num_elems * imoffset; + for (index_type i = 0; i < num_elems; i++) + { + a = this_shared_ptr + i; + b = other_shared_ptr + i; + if (*b > *a) + *a = *b; + } + } + collsub_sync (ci); + } + for ( ; (local->num_images >> cbit) != 0; cbit++) + collsub_sync (ci); + + if (!result_image || (*result_image - 1) == this_image.image_num) + { + if (packed) + memcpy (array->base_addr, buffer, ssize); + else + { + GFC_INTEGER_16 *src = buffer; + char * restrict dest = (char *) array->base_addr; + index_type stride0 = stride[0]; + + for (index_type n = 0; n < dim; n++) + count[n] = 0; + + while (dest) + { + *((GFC_INTEGER_16 * ) dest) = *src++; + dest += stride0; + count[0] ++; + for (index_type n = 0; count[n] == extent[n] ;) + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + count[n] = 0; + dest -= stride[n] * extent[n]; + n++; + if (n == dim) + { + dest = NULL; + break; + } + else + { + count[n]++; + dest += stride[n]; + } + } + } + } + } + finish_collective_subroutine (ci); +} +void nca_collsub_min_array_i16 (gfc_array_i16 * restrict array, int *result_image, + int *stat, char *errmsg, index_type errmsg_len); +export_proto (nca_collsub_min_array_i16); + +void +nca_collsub_min_array_i16 (gfc_array_i16 * restrict array, int *result_image, + int *stat __attribute__ ((unused)), + char *errmsg __attribute__ ((unused)), + index_type errmsg_len __attribute__ ((unused))) +{ + index_type count[GFC_MAX_DIMENSIONS]; + index_type stride[GFC_MAX_DIMENSIONS]; + index_type extent[GFC_MAX_DIMENSIONS]; + GFC_INTEGER_16 *this_shared_ptr; /* Points to the shared memory allocated to this image. */ + GFC_INTEGER_16 *buffer; + index_type dim; + bool packed; + index_type span; + index_type ssize, num_elems; + int cbit = 0; + int imoffset; + collsub_iface *ci; + + ci = &local->ci; + + dim = GFC_DESCRIPTOR_RANK (array); + ssize = sizeof (GFC_INTEGER_16); + packed = true; + span = array->span != 0 ? array->span : (index_type) sizeof (GFC_INTEGER_16); + for (index_type n = 0; n < dim; n++) + { + count[n] = 0; + stride[n] = GFC_DESCRIPTOR_STRIDE (array, n) * span; + extent[n] = GFC_DESCRIPTOR_EXTENT (array, n); + + /* No-op for an empty array. */ + if (extent[n] <= 0) + return; + + if (ssize != stride[n]) + packed = false; + + ssize *= extent[n]; + } + + num_elems = ssize / sizeof (GFC_INTEGER_16); + + buffer = get_collsub_buf (ci, ssize * local->num_images); + this_shared_ptr = buffer + this_image.image_num * num_elems; + + if (packed) + memcpy (this_shared_ptr, array->base_addr, ssize); + else + { + char *src = (char *) array->base_addr; + GFC_INTEGER_16 *restrict dest = this_shared_ptr; + index_type stride0 = stride[0]; + + while (src) + { + /* Copy the data. */ + *(dest++) = *((GFC_INTEGER_16 *) src); + src += stride0; + count[0] ++; + /* Advance to the next source element. */ + for (index_type n = 0; count[n] == extent[n] ; ) + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + count[n] = 0; + src -= stride[n] * extent[n]; + n++; + if (n == dim) + { + src = NULL; + break; + } + else + { + count[n]++; + src += stride[n]; + } + } + } + } + + collsub_sync (ci); + + /* Reduce the array to image zero. Here the general scheme: + + abababababab + a_b_a_b_a_b_ + a___b___a___ + a_______b___ + r___________ + */ + for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++) + { + imoffset = 1 << cbit; + if (this_image.image_num + imoffset < local->num_images) + { + GFC_INTEGER_16 * other_shared_ptr; /* Points to the shared memory + allocated to another image. */ + GFC_INTEGER_16 *a; + GFC_INTEGER_16 *b; + + other_shared_ptr = this_shared_ptr + num_elems * imoffset; + for (index_type i = 0; i < num_elems; i++) + { + a = this_shared_ptr + i; + b = other_shared_ptr + i; + if (*b < *a) + *a = *b; + } + } + collsub_sync (ci); + } + for ( ; (local->num_images >> cbit) != 0; cbit++) + collsub_sync (ci); + + if (!result_image || (*result_image - 1) == this_image.image_num) + { + if (packed) + memcpy (array->base_addr, buffer, ssize); + else + { + GFC_INTEGER_16 *src = buffer; + char * restrict dest = (char *) array->base_addr; + index_type stride0 = stride[0]; + + for (index_type n = 0; n < dim; n++) + count[n] = 0; + + while (dest) + { + *((GFC_INTEGER_16 * ) dest) = *src++; + dest += stride0; + count[0] ++; + for (index_type n = 0; count[n] == extent[n] ;) + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + count[n] = 0; + dest -= stride[n] * extent[n]; + n++; + if (n == dim) + { + dest = NULL; + break; + } + else + { + count[n]++; + dest += stride[n]; + } + } + } + } + } + finish_collective_subroutine (ci); +} +void nca_collsub_sum_array_i16 (gfc_array_i16 * restrict array, int *result_image, + int *stat, char *errmsg, index_type errmsg_len); +export_proto (nca_collsub_sum_array_i16); + +void +nca_collsub_sum_array_i16 (gfc_array_i16 * restrict array, int *result_image, + int *stat __attribute__ ((unused)), + char *errmsg __attribute__ ((unused)), + index_type errmsg_len __attribute__ ((unused))) +{ + index_type count[GFC_MAX_DIMENSIONS]; + index_type stride[GFC_MAX_DIMENSIONS]; + index_type extent[GFC_MAX_DIMENSIONS]; + GFC_INTEGER_16 *this_shared_ptr; /* Points to the shared memory allocated to this image. */ + GFC_INTEGER_16 *buffer; + index_type dim; + bool packed; + index_type span; + index_type ssize, num_elems; + int cbit = 0; + int imoffset; + collsub_iface *ci; + + ci = &local->ci; + + dim = GFC_DESCRIPTOR_RANK (array); + ssize = sizeof (GFC_INTEGER_16); + packed = true; + span = array->span != 0 ? array->span : (index_type) sizeof (GFC_INTEGER_16); + for (index_type n = 0; n < dim; n++) + { + count[n] = 0; + stride[n] = GFC_DESCRIPTOR_STRIDE (array, n) * span; + extent[n] = GFC_DESCRIPTOR_EXTENT (array, n); + + /* No-op for an empty array. */ + if (extent[n] <= 0) + return; + + if (ssize != stride[n]) + packed = false; + + ssize *= extent[n]; + } + + num_elems = ssize / sizeof (GFC_INTEGER_16); + + buffer = get_collsub_buf (ci, ssize * local->num_images); + this_shared_ptr = buffer + this_image.image_num * num_elems; + + if (packed) + memcpy (this_shared_ptr, array->base_addr, ssize); + else + { + char *src = (char *) array->base_addr; + GFC_INTEGER_16 *restrict dest = this_shared_ptr; + index_type stride0 = stride[0]; + + while (src) + { + /* Copy the data. */ + *(dest++) = *((GFC_INTEGER_16 *) src); + src += stride0; + count[0] ++; + /* Advance to the next source element. */ + for (index_type n = 0; count[n] == extent[n] ; ) + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + count[n] = 0; + src -= stride[n] * extent[n]; + n++; + if (n == dim) + { + src = NULL; + break; + } + else + { + count[n]++; + src += stride[n]; + } + } + } + } + + collsub_sync (ci); + + /* Reduce the array to image zero. Here the general scheme: + + abababababab + a_b_a_b_a_b_ + a___b___a___ + a_______b___ + r___________ + */ + for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++) + { + imoffset = 1 << cbit; + if (this_image.image_num + imoffset < local->num_images) + { + GFC_INTEGER_16 * other_shared_ptr; /* Points to the shared memory + allocated to another image. */ + GFC_INTEGER_16 *a; + GFC_INTEGER_16 *b; + + other_shared_ptr = this_shared_ptr + num_elems * imoffset; + for (index_type i = 0; i < num_elems; i++) + { + a = this_shared_ptr + i; + b = other_shared_ptr + i; + *a += *b; + } + } + collsub_sync (ci); + } + for ( ; (local->num_images >> cbit) != 0; cbit++) + collsub_sync (ci); + + if (!result_image || (*result_image - 1) == this_image.image_num) + { + if (packed) + memcpy (array->base_addr, buffer, ssize); + else + { + GFC_INTEGER_16 *src = buffer; + char * restrict dest = (char *) array->base_addr; + index_type stride0 = stride[0]; + + for (index_type n = 0; n < dim; n++) + count[n] = 0; + + while (dest) + { + *((GFC_INTEGER_16 * ) dest) = *src++; + dest += stride0; + count[0] ++; + for (index_type n = 0; count[n] == extent[n] ;) + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + count[n] = 0; + dest -= stride[n] * extent[n]; + n++; + if (n == dim) + { + dest = NULL; + break; + } + else + { + count[n]++; + dest += stride[n]; + } + } + } + } + } + finish_collective_subroutine (ci); +} + +#endif + diff --git a/libgfortran/generated/nca_minmax_i2.c b/libgfortran/generated/nca_minmax_i2.c new file mode 100644 index 0000000..61908d6 --- /dev/null +++ b/libgfortran/generated/nca_minmax_i2.c @@ -0,0 +1,653 @@ +/* Implementation of collective subroutines minmax. + Copyright (C) 2020 Free Software Foundation, Inc. + Contributed by Thomas Koenig <tkoenig@gcc.gnu.org>. + +This file is part of the GNU Fortran runtime library (libgfortran). + +Libgfortran is free software; you can redistribute it and/or +modify it under the terms of the GNU General Public +License as published by the Free Software Foundation; either +version 3 of the License, or (at your option) any later version. + +Libgfortran is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +<http://www.gnu.org/licenses/>. */ + + +#include "libgfortran.h" + +#if defined (HAVE_GFC_INTEGER_2) +#include <string.h> +#include "../nca/libcoarraynative.h" +#include "../nca/collective_subroutine.h" +#include "../nca/collective_inline.h" + +void nca_collsub_max_scalar_i2 (GFC_INTEGER_2 *obj, int *result_image, + int *stat, char *errmsg, index_type errmsg_len); +export_proto(nca_collsub_max_scalar_i2); + +void +nca_collsub_max_scalar_i2 (GFC_INTEGER_2 *obj, int *result_image, + int *stat __attribute__ ((unused)), + char *errmsg __attribute__ ((unused)), + index_type errmsg_len __attribute__ ((unused))) +{ + int cbit = 0; + int imoffset; + GFC_INTEGER_2 *a, *b; + GFC_INTEGER_2 *buffer, *this_image_buf; + collsub_iface *ci; + + ci = &local->ci; + + buffer = get_collsub_buf (ci, sizeof(GFC_INTEGER_2) * local->num_images); + this_image_buf = buffer + this_image.image_num; + *this_image_buf = *obj; + + collsub_sync (ci); + for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++) + { + imoffset = 1 << cbit; + if (this_image.image_num + imoffset < local->num_images) + { + a = this_image_buf; + b = this_image_buf + imoffset; + if (*b > *a) + *a = *b; + } + collsub_sync (ci); + } + for ( ; (local->num_images >> cbit) != 0; cbit++) + collsub_sync (ci); + + if (!result_image || (*result_image - 1) == this_image.image_num) + *obj = *buffer; + + finish_collective_subroutine (ci); + +} + +void nca_collsub_min_scalar_i2 (GFC_INTEGER_2 *obj, int *result_image, + int *stat, char *errmsg, index_type errmsg_len); +export_proto(nca_collsub_min_scalar_i2); + +void +nca_collsub_min_scalar_i2 (GFC_INTEGER_2 *obj, int *result_image, + int *stat __attribute__ ((unused)), + char *errmsg __attribute__ ((unused)), + index_type errmsg_len __attribute__ ((unused))) +{ + int cbit = 0; + int imoffset; + GFC_INTEGER_2 *a, *b; + GFC_INTEGER_2 *buffer, *this_image_buf; + collsub_iface *ci; + + ci = &local->ci; + + buffer = get_collsub_buf (ci, sizeof(GFC_INTEGER_2) * local->num_images); + this_image_buf = buffer + this_image.image_num; + *this_image_buf = *obj; + + collsub_sync (ci); + for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++) + { + imoffset = 1 << cbit; + if (this_image.image_num + imoffset < local->num_images) + { + a = this_image_buf; + b = this_image_buf + imoffset; + if (*b < *a) + *a = *b; + } + collsub_sync (ci); + } + for ( ; (local->num_images >> cbit) != 0; cbit++) + collsub_sync (ci); + + if (!result_image || (*result_image - 1) == this_image.image_num) + *obj = *buffer; + + finish_collective_subroutine (ci); + +} + +void nca_collsub_sum_scalar_i2 (GFC_INTEGER_2 *obj, int *result_image, + int *stat, char *errmsg, index_type errmsg_len); +export_proto(nca_collsub_sum_scalar_i2); + +void +nca_collsub_sum_scalar_i2 (GFC_INTEGER_2 *obj, int *result_image, + int *stat __attribute__ ((unused)), + char *errmsg __attribute__ ((unused)), + index_type errmsg_len __attribute__ ((unused))) +{ + int cbit = 0; + int imoffset; + GFC_INTEGER_2 *a, *b; + GFC_INTEGER_2 *buffer, *this_image_buf; + collsub_iface *ci; + + ci = &local->ci; + + buffer = get_collsub_buf (ci, sizeof(GFC_INTEGER_2) * local->num_images); + this_image_buf = buffer + this_image.image_num; + *this_image_buf = *obj; + + collsub_sync (ci); + for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++) + { + imoffset = 1 << cbit; + if (this_image.image_num + imoffset < local->num_images) + { + a = this_image_buf; + b = this_image_buf + imoffset; + *a += *b; + } + collsub_sync (ci); + } + for ( ; (local->num_images >> cbit) != 0; cbit++) + collsub_sync (ci); + + if (!result_image || (*result_image - 1) == this_image.image_num) + *obj = *buffer; + + finish_collective_subroutine (ci); + +} + +void nca_collsub_max_array_i2 (gfc_array_i2 * restrict array, int *result_image, + int *stat, char *errmsg, index_type errmsg_len); +export_proto (nca_collsub_max_array_i2); + +void +nca_collsub_max_array_i2 (gfc_array_i2 * restrict array, int *result_image, + int *stat __attribute__ ((unused)), + char *errmsg __attribute__ ((unused)), + index_type errmsg_len __attribute__ ((unused))) +{ + index_type count[GFC_MAX_DIMENSIONS]; + index_type stride[GFC_MAX_DIMENSIONS]; + index_type extent[GFC_MAX_DIMENSIONS]; + GFC_INTEGER_2 *this_shared_ptr; /* Points to the shared memory allocated to this image. */ + GFC_INTEGER_2 *buffer; + index_type dim; + bool packed; + index_type span; + index_type ssize, num_elems; + int cbit = 0; + int imoffset; + collsub_iface *ci; + + ci = &local->ci; + + dim = GFC_DESCRIPTOR_RANK (array); + ssize = sizeof (GFC_INTEGER_2); + packed = true; + span = array->span != 0 ? array->span : (index_type) sizeof (GFC_INTEGER_2); + for (index_type n = 0; n < dim; n++) + { + count[n] = 0; + stride[n] = GFC_DESCRIPTOR_STRIDE (array, n) * span; + extent[n] = GFC_DESCRIPTOR_EXTENT (array, n); + + /* No-op for an empty array. */ + if (extent[n] <= 0) + return; + + if (ssize != stride[n]) + packed = false; + + ssize *= extent[n]; + } + + num_elems = ssize / sizeof (GFC_INTEGER_2); + + buffer = get_collsub_buf (ci, ssize * local->num_images); + this_shared_ptr = buffer + this_image.image_num * num_elems; + + if (packed) + memcpy (this_shared_ptr, array->base_addr, ssize); + else + { + char *src = (char *) array->base_addr; + GFC_INTEGER_2 *restrict dest = this_shared_ptr; + index_type stride0 = stride[0]; + + while (src) + { + /* Copy the data. */ + *(dest++) = *((GFC_INTEGER_2 *) src); + src += stride0; + count[0] ++; + /* Advance to the next source element. */ + for (index_type n = 0; count[n] == extent[n] ; ) + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + count[n] = 0; + src -= stride[n] * extent[n]; + n++; + if (n == dim) + { + src = NULL; + break; + } + else + { + count[n]++; + src += stride[n]; + } + } + } + } + + collsub_sync (ci); + + /* Reduce the array to image zero. Here the general scheme: + + abababababab + a_b_a_b_a_b_ + a___b___a___ + a_______b___ + r___________ + */ + for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++) + { + imoffset = 1 << cbit; + if (this_image.image_num + imoffset < local->num_images) + { + GFC_INTEGER_2 * other_shared_ptr; /* Points to the shared memory + allocated to another image. */ + GFC_INTEGER_2 *a; + GFC_INTEGER_2 *b; + + other_shared_ptr = this_shared_ptr + num_elems * imoffset; + for (index_type i = 0; i < num_elems; i++) + { + a = this_shared_ptr + i; + b = other_shared_ptr + i; + if (*b > *a) + *a = *b; + } + } + collsub_sync (ci); + } + for ( ; (local->num_images >> cbit) != 0; cbit++) + collsub_sync (ci); + + if (!result_image || (*result_image - 1) == this_image.image_num) + { + if (packed) + memcpy (array->base_addr, buffer, ssize); + else + { + GFC_INTEGER_2 *src = buffer; + char * restrict dest = (char *) array->base_addr; + index_type stride0 = stride[0]; + + for (index_type n = 0; n < dim; n++) + count[n] = 0; + + while (dest) + { + *((GFC_INTEGER_2 * ) dest) = *src++; + dest += stride0; + count[0] ++; + for (index_type n = 0; count[n] == extent[n] ;) + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + count[n] = 0; + dest -= stride[n] * extent[n]; + n++; + if (n == dim) + { + dest = NULL; + break; + } + else + { + count[n]++; + dest += stride[n]; + } + } + } + } + } + finish_collective_subroutine (ci); +} +void nca_collsub_min_array_i2 (gfc_array_i2 * restrict array, int *result_image, + int *stat, char *errmsg, index_type errmsg_len); +export_proto (nca_collsub_min_array_i2); + +void +nca_collsub_min_array_i2 (gfc_array_i2 * restrict array, int *result_image, + int *stat __attribute__ ((unused)), + char *errmsg __attribute__ ((unused)), + index_type errmsg_len __attribute__ ((unused))) +{ + index_type count[GFC_MAX_DIMENSIONS]; + index_type stride[GFC_MAX_DIMENSIONS]; + index_type extent[GFC_MAX_DIMENSIONS]; + GFC_INTEGER_2 *this_shared_ptr; /* Points to the shared memory allocated to this image. */ + GFC_INTEGER_2 *buffer; + index_type dim; + bool packed; + index_type span; + index_type ssize, num_elems; + int cbit = 0; + int imoffset; + collsub_iface *ci; + + ci = &local->ci; + + dim = GFC_DESCRIPTOR_RANK (array); + ssize = sizeof (GFC_INTEGER_2); + packed = true; + span = array->span != 0 ? array->span : (index_type) sizeof (GFC_INTEGER_2); + for (index_type n = 0; n < dim; n++) + { + count[n] = 0; + stride[n] = GFC_DESCRIPTOR_STRIDE (array, n) * span; + extent[n] = GFC_DESCRIPTOR_EXTENT (array, n); + + /* No-op for an empty array. */ + if (extent[n] <= 0) + return; + + if (ssize != stride[n]) + packed = false; + + ssize *= extent[n]; + } + + num_elems = ssize / sizeof (GFC_INTEGER_2); + + buffer = get_collsub_buf (ci, ssize * local->num_images); + this_shared_ptr = buffer + this_image.image_num * num_elems; + + if (packed) + memcpy (this_shared_ptr, array->base_addr, ssize); + else + { + char *src = (char *) array->base_addr; + GFC_INTEGER_2 *restrict dest = this_shared_ptr; + index_type stride0 = stride[0]; + + while (src) + { + /* Copy the data. */ + *(dest++) = *((GFC_INTEGER_2 *) src); + src += stride0; + count[0] ++; + /* Advance to the next source element. */ + for (index_type n = 0; count[n] == extent[n] ; ) + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + count[n] = 0; + src -= stride[n] * extent[n]; + n++; + if (n == dim) + { + src = NULL; + break; + } + else + { + count[n]++; + src += stride[n]; + } + } + } + } + + collsub_sync (ci); + + /* Reduce the array to image zero. Here the general scheme: + + abababababab + a_b_a_b_a_b_ + a___b___a___ + a_______b___ + r___________ + */ + for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++) + { + imoffset = 1 << cbit; + if (this_image.image_num + imoffset < local->num_images) + { + GFC_INTEGER_2 * other_shared_ptr; /* Points to the shared memory + allocated to another image. */ + GFC_INTEGER_2 *a; + GFC_INTEGER_2 *b; + + other_shared_ptr = this_shared_ptr + num_elems * imoffset; + for (index_type i = 0; i < num_elems; i++) + { + a = this_shared_ptr + i; + b = other_shared_ptr + i; + if (*b < *a) + *a = *b; + } + } + collsub_sync (ci); + } + for ( ; (local->num_images >> cbit) != 0; cbit++) + collsub_sync (ci); + + if (!result_image || (*result_image - 1) == this_image.image_num) + { + if (packed) + memcpy (array->base_addr, buffer, ssize); + else + { + GFC_INTEGER_2 *src = buffer; + char * restrict dest = (char *) array->base_addr; + index_type stride0 = stride[0]; + + for (index_type n = 0; n < dim; n++) + count[n] = 0; + + while (dest) + { + *((GFC_INTEGER_2 * ) dest) = *src++; + dest += stride0; + count[0] ++; + for (index_type n = 0; count[n] == extent[n] ;) + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + count[n] = 0; + dest -= stride[n] * extent[n]; + n++; + if (n == dim) + { + dest = NULL; + break; + } + else + { + count[n]++; + dest += stride[n]; + } + } + } + } + } + finish_collective_subroutine (ci); +} +void nca_collsub_sum_array_i2 (gfc_array_i2 * restrict array, int *result_image, + int *stat, char *errmsg, index_type errmsg_len); +export_proto (nca_collsub_sum_array_i2); + +void +nca_collsub_sum_array_i2 (gfc_array_i2 * restrict array, int *result_image, + int *stat __attribute__ ((unused)), + char *errmsg __attribute__ ((unused)), + index_type errmsg_len __attribute__ ((unused))) +{ + index_type count[GFC_MAX_DIMENSIONS]; + index_type stride[GFC_MAX_DIMENSIONS]; + index_type extent[GFC_MAX_DIMENSIONS]; + GFC_INTEGER_2 *this_shared_ptr; /* Points to the shared memory allocated to this image. */ + GFC_INTEGER_2 *buffer; + index_type dim; + bool packed; + index_type span; + index_type ssize, num_elems; + int cbit = 0; + int imoffset; + collsub_iface *ci; + + ci = &local->ci; + + dim = GFC_DESCRIPTOR_RANK (array); + ssize = sizeof (GFC_INTEGER_2); + packed = true; + span = array->span != 0 ? array->span : (index_type) sizeof (GFC_INTEGER_2); + for (index_type n = 0; n < dim; n++) + { + count[n] = 0; + stride[n] = GFC_DESCRIPTOR_STRIDE (array, n) * span; + extent[n] = GFC_DESCRIPTOR_EXTENT (array, n); + + /* No-op for an empty array. */ + if (extent[n] <= 0) + return; + + if (ssize != stride[n]) + packed = false; + + ssize *= extent[n]; + } + + num_elems = ssize / sizeof (GFC_INTEGER_2); + + buffer = get_collsub_buf (ci, ssize * local->num_images); + this_shared_ptr = buffer + this_image.image_num * num_elems; + + if (packed) + memcpy (this_shared_ptr, array->base_addr, ssize); + else + { + char *src = (char *) array->base_addr; + GFC_INTEGER_2 *restrict dest = this_shared_ptr; + index_type stride0 = stride[0]; + + while (src) + { + /* Copy the data. */ + *(dest++) = *((GFC_INTEGER_2 *) src); + src += stride0; + count[0] ++; + /* Advance to the next source element. */ + for (index_type n = 0; count[n] == extent[n] ; ) + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + count[n] = 0; + src -= stride[n] * extent[n]; + n++; + if (n == dim) + { + src = NULL; + break; + } + else + { + count[n]++; + src += stride[n]; + } + } + } + } + + collsub_sync (ci); + + /* Reduce the array to image zero. Here the general scheme: + + abababababab + a_b_a_b_a_b_ + a___b___a___ + a_______b___ + r___________ + */ + for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++) + { + imoffset = 1 << cbit; + if (this_image.image_num + imoffset < local->num_images) + { + GFC_INTEGER_2 * other_shared_ptr; /* Points to the shared memory + allocated to another image. */ + GFC_INTEGER_2 *a; + GFC_INTEGER_2 *b; + + other_shared_ptr = this_shared_ptr + num_elems * imoffset; + for (index_type i = 0; i < num_elems; i++) + { + a = this_shared_ptr + i; + b = other_shared_ptr + i; + *a += *b; + } + } + collsub_sync (ci); + } + for ( ; (local->num_images >> cbit) != 0; cbit++) + collsub_sync (ci); + + if (!result_image || (*result_image - 1) == this_image.image_num) + { + if (packed) + memcpy (array->base_addr, buffer, ssize); + else + { + GFC_INTEGER_2 *src = buffer; + char * restrict dest = (char *) array->base_addr; + index_type stride0 = stride[0]; + + for (index_type n = 0; n < dim; n++) + count[n] = 0; + + while (dest) + { + *((GFC_INTEGER_2 * ) dest) = *src++; + dest += stride0; + count[0] ++; + for (index_type n = 0; count[n] == extent[n] ;) + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + count[n] = 0; + dest -= stride[n] * extent[n]; + n++; + if (n == dim) + { + dest = NULL; + break; + } + else + { + count[n]++; + dest += stride[n]; + } + } + } + } + } + finish_collective_subroutine (ci); +} + +#endif + diff --git a/libgfortran/generated/nca_minmax_i4.c b/libgfortran/generated/nca_minmax_i4.c new file mode 100644 index 0000000..5e37586 --- /dev/null +++ b/libgfortran/generated/nca_minmax_i4.c @@ -0,0 +1,653 @@ +/* Implementation of collective subroutines minmax. + Copyright (C) 2020 Free Software Foundation, Inc. + Contributed by Thomas Koenig <tkoenig@gcc.gnu.org>. + +This file is part of the GNU Fortran runtime library (libgfortran). + +Libgfortran is free software; you can redistribute it and/or +modify it under the terms of the GNU General Public +License as published by the Free Software Foundation; either +version 3 of the License, or (at your option) any later version. + +Libgfortran is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +<http://www.gnu.org/licenses/>. */ + + +#include "libgfortran.h" + +#if defined (HAVE_GFC_INTEGER_4) +#include <string.h> +#include "../nca/libcoarraynative.h" +#include "../nca/collective_subroutine.h" +#include "../nca/collective_inline.h" + +void nca_collsub_max_scalar_i4 (GFC_INTEGER_4 *obj, int *result_image, + int *stat, char *errmsg, index_type errmsg_len); +export_proto(nca_collsub_max_scalar_i4); + +void +nca_collsub_max_scalar_i4 (GFC_INTEGER_4 *obj, int *result_image, + int *stat __attribute__ ((unused)), + char *errmsg __attribute__ ((unused)), + index_type errmsg_len __attribute__ ((unused))) +{ + int cbit = 0; + int imoffset; + GFC_INTEGER_4 *a, *b; + GFC_INTEGER_4 *buffer, *this_image_buf; + collsub_iface *ci; + + ci = &local->ci; + + buffer = get_collsub_buf (ci, sizeof(GFC_INTEGER_4) * local->num_images); + this_image_buf = buffer + this_image.image_num; + *this_image_buf = *obj; + + collsub_sync (ci); + for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++) + { + imoffset = 1 << cbit; + if (this_image.image_num + imoffset < local->num_images) + { + a = this_image_buf; + b = this_image_buf + imoffset; + if (*b > *a) + *a = *b; + } + collsub_sync (ci); + } + for ( ; (local->num_images >> cbit) != 0; cbit++) + collsub_sync (ci); + + if (!result_image || (*result_image - 1) == this_image.image_num) + *obj = *buffer; + + finish_collective_subroutine (ci); + +} + +void nca_collsub_min_scalar_i4 (GFC_INTEGER_4 *obj, int *result_image, + int *stat, char *errmsg, index_type errmsg_len); +export_proto(nca_collsub_min_scalar_i4); + +void +nca_collsub_min_scalar_i4 (GFC_INTEGER_4 *obj, int *result_image, + int *stat __attribute__ ((unused)), + char *errmsg __attribute__ ((unused)), + index_type errmsg_len __attribute__ ((unused))) +{ + int cbit = 0; + int imoffset; + GFC_INTEGER_4 *a, *b; + GFC_INTEGER_4 *buffer, *this_image_buf; + collsub_iface *ci; + + ci = &local->ci; + + buffer = get_collsub_buf (ci, sizeof(GFC_INTEGER_4) * local->num_images); + this_image_buf = buffer + this_image.image_num; + *this_image_buf = *obj; + + collsub_sync (ci); + for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++) + { + imoffset = 1 << cbit; + if (this_image.image_num + imoffset < local->num_images) + { + a = this_image_buf; + b = this_image_buf + imoffset; + if (*b < *a) + *a = *b; + } + collsub_sync (ci); + } + for ( ; (local->num_images >> cbit) != 0; cbit++) + collsub_sync (ci); + + if (!result_image || (*result_image - 1) == this_image.image_num) + *obj = *buffer; + + finish_collective_subroutine (ci); + +} + +void nca_collsub_sum_scalar_i4 (GFC_INTEGER_4 *obj, int *result_image, + int *stat, char *errmsg, index_type errmsg_len); +export_proto(nca_collsub_sum_scalar_i4); + +void +nca_collsub_sum_scalar_i4 (GFC_INTEGER_4 *obj, int *result_image, + int *stat __attribute__ ((unused)), + char *errmsg __attribute__ ((unused)), + index_type errmsg_len __attribute__ ((unused))) +{ + int cbit = 0; + int imoffset; + GFC_INTEGER_4 *a, *b; + GFC_INTEGER_4 *buffer, *this_image_buf; + collsub_iface *ci; + + ci = &local->ci; + + buffer = get_collsub_buf (ci, sizeof(GFC_INTEGER_4) * local->num_images); + this_image_buf = buffer + this_image.image_num; + *this_image_buf = *obj; + + collsub_sync (ci); + for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++) + { + imoffset = 1 << cbit; + if (this_image.image_num + imoffset < local->num_images) + { + a = this_image_buf; + b = this_image_buf + imoffset; + *a += *b; + } + collsub_sync (ci); + } + for ( ; (local->num_images >> cbit) != 0; cbit++) + collsub_sync (ci); + + if (!result_image || (*result_image - 1) == this_image.image_num) + *obj = *buffer; + + finish_collective_subroutine (ci); + +} + +void nca_collsub_max_array_i4 (gfc_array_i4 * restrict array, int *result_image, + int *stat, char *errmsg, index_type errmsg_len); +export_proto (nca_collsub_max_array_i4); + +void +nca_collsub_max_array_i4 (gfc_array_i4 * restrict array, int *result_image, + int *stat __attribute__ ((unused)), + char *errmsg __attribute__ ((unused)), + index_type errmsg_len __attribute__ ((unused))) +{ + index_type count[GFC_MAX_DIMENSIONS]; + index_type stride[GFC_MAX_DIMENSIONS]; + index_type extent[GFC_MAX_DIMENSIONS]; + GFC_INTEGER_4 *this_shared_ptr; /* Points to the shared memory allocated to this image. */ + GFC_INTEGER_4 *buffer; + index_type dim; + bool packed; + index_type span; + index_type ssize, num_elems; + int cbit = 0; + int imoffset; + collsub_iface *ci; + + ci = &local->ci; + + dim = GFC_DESCRIPTOR_RANK (array); + ssize = sizeof (GFC_INTEGER_4); + packed = true; + span = array->span != 0 ? array->span : (index_type) sizeof (GFC_INTEGER_4); + for (index_type n = 0; n < dim; n++) + { + count[n] = 0; + stride[n] = GFC_DESCRIPTOR_STRIDE (array, n) * span; + extent[n] = GFC_DESCRIPTOR_EXTENT (array, n); + + /* No-op for an empty array. */ + if (extent[n] <= 0) + return; + + if (ssize != stride[n]) + packed = false; + + ssize *= extent[n]; + } + + num_elems = ssize / sizeof (GFC_INTEGER_4); + + buffer = get_collsub_buf (ci, ssize * local->num_images); + this_shared_ptr = buffer + this_image.image_num * num_elems; + + if (packed) + memcpy (this_shared_ptr, array->base_addr, ssize); + else + { + char *src = (char *) array->base_addr; + GFC_INTEGER_4 *restrict dest = this_shared_ptr; + index_type stride0 = stride[0]; + + while (src) + { + /* Copy the data. */ + *(dest++) = *((GFC_INTEGER_4 *) src); + src += stride0; + count[0] ++; + /* Advance to the next source element. */ + for (index_type n = 0; count[n] == extent[n] ; ) + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + count[n] = 0; + src -= stride[n] * extent[n]; + n++; + if (n == dim) + { + src = NULL; + break; + } + else + { + count[n]++; + src += stride[n]; + } + } + } + } + + collsub_sync (ci); + + /* Reduce the array to image zero. Here the general scheme: + + abababababab + a_b_a_b_a_b_ + a___b___a___ + a_______b___ + r___________ + */ + for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++) + { + imoffset = 1 << cbit; + if (this_image.image_num + imoffset < local->num_images) + { + GFC_INTEGER_4 * other_shared_ptr; /* Points to the shared memory + allocated to another image. */ + GFC_INTEGER_4 *a; + GFC_INTEGER_4 *b; + + other_shared_ptr = this_shared_ptr + num_elems * imoffset; + for (index_type i = 0; i < num_elems; i++) + { + a = this_shared_ptr + i; + b = other_shared_ptr + i; + if (*b > *a) + *a = *b; + } + } + collsub_sync (ci); + } + for ( ; (local->num_images >> cbit) != 0; cbit++) + collsub_sync (ci); + + if (!result_image || (*result_image - 1) == this_image.image_num) + { + if (packed) + memcpy (array->base_addr, buffer, ssize); + else + { + GFC_INTEGER_4 *src = buffer; + char * restrict dest = (char *) array->base_addr; + index_type stride0 = stride[0]; + + for (index_type n = 0; n < dim; n++) + count[n] = 0; + + while (dest) + { + *((GFC_INTEGER_4 * ) dest) = *src++; + dest += stride0; + count[0] ++; + for (index_type n = 0; count[n] == extent[n] ;) + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + count[n] = 0; + dest -= stride[n] * extent[n]; + n++; + if (n == dim) + { + dest = NULL; + break; + } + else + { + count[n]++; + dest += stride[n]; + } + } + } + } + } + finish_collective_subroutine (ci); +} +void nca_collsub_min_array_i4 (gfc_array_i4 * restrict array, int *result_image, + int *stat, char *errmsg, index_type errmsg_len); +export_proto (nca_collsub_min_array_i4); + +void +nca_collsub_min_array_i4 (gfc_array_i4 * restrict array, int *result_image, + int *stat __attribute__ ((unused)), + char *errmsg __attribute__ ((unused)), + index_type errmsg_len __attribute__ ((unused))) +{ + index_type count[GFC_MAX_DIMENSIONS]; + index_type stride[GFC_MAX_DIMENSIONS]; + index_type extent[GFC_MAX_DIMENSIONS]; + GFC_INTEGER_4 *this_shared_ptr; /* Points to the shared memory allocated to this image. */ + GFC_INTEGER_4 *buffer; + index_type dim; + bool packed; + index_type span; + index_type ssize, num_elems; + int cbit = 0; + int imoffset; + collsub_iface *ci; + + ci = &local->ci; + + dim = GFC_DESCRIPTOR_RANK (array); + ssize = sizeof (GFC_INTEGER_4); + packed = true; + span = array->span != 0 ? array->span : (index_type) sizeof (GFC_INTEGER_4); + for (index_type n = 0; n < dim; n++) + { + count[n] = 0; + stride[n] = GFC_DESCRIPTOR_STRIDE (array, n) * span; + extent[n] = GFC_DESCRIPTOR_EXTENT (array, n); + + /* No-op for an empty array. */ + if (extent[n] <= 0) + return; + + if (ssize != stride[n]) + packed = false; + + ssize *= extent[n]; + } + + num_elems = ssize / sizeof (GFC_INTEGER_4); + + buffer = get_collsub_buf (ci, ssize * local->num_images); + this_shared_ptr = buffer + this_image.image_num * num_elems; + + if (packed) + memcpy (this_shared_ptr, array->base_addr, ssize); + else + { + char *src = (char *) array->base_addr; + GFC_INTEGER_4 *restrict dest = this_shared_ptr; + index_type stride0 = stride[0]; + + while (src) + { + /* Copy the data. */ + *(dest++) = *((GFC_INTEGER_4 *) src); + src += stride0; + count[0] ++; + /* Advance to the next source element. */ + for (index_type n = 0; count[n] == extent[n] ; ) + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + count[n] = 0; + src -= stride[n] * extent[n]; + n++; + if (n == dim) + { + src = NULL; + break; + } + else + { + count[n]++; + src += stride[n]; + } + } + } + } + + collsub_sync (ci); + + /* Reduce the array to image zero. Here the general scheme: + + abababababab + a_b_a_b_a_b_ + a___b___a___ + a_______b___ + r___________ + */ + for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++) + { + imoffset = 1 << cbit; + if (this_image.image_num + imoffset < local->num_images) + { + GFC_INTEGER_4 * other_shared_ptr; /* Points to the shared memory + allocated to another image. */ + GFC_INTEGER_4 *a; + GFC_INTEGER_4 *b; + + other_shared_ptr = this_shared_ptr + num_elems * imoffset; + for (index_type i = 0; i < num_elems; i++) + { + a = this_shared_ptr + i; + b = other_shared_ptr + i; + if (*b < *a) + *a = *b; + } + } + collsub_sync (ci); + } + for ( ; (local->num_images >> cbit) != 0; cbit++) + collsub_sync (ci); + + if (!result_image || (*result_image - 1) == this_image.image_num) + { + if (packed) + memcpy (array->base_addr, buffer, ssize); + else + { + GFC_INTEGER_4 *src = buffer; + char * restrict dest = (char *) array->base_addr; + index_type stride0 = stride[0]; + + for (index_type n = 0; n < dim; n++) + count[n] = 0; + + while (dest) + { + *((GFC_INTEGER_4 * ) dest) = *src++; + dest += stride0; + count[0] ++; + for (index_type n = 0; count[n] == extent[n] ;) + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + count[n] = 0; + dest -= stride[n] * extent[n]; + n++; + if (n == dim) + { + dest = NULL; + break; + } + else + { + count[n]++; + dest += stride[n]; + } + } + } + } + } + finish_collective_subroutine (ci); +} +void nca_collsub_sum_array_i4 (gfc_array_i4 * restrict array, int *result_image, + int *stat, char *errmsg, index_type errmsg_len); +export_proto (nca_collsub_sum_array_i4); + +void +nca_collsub_sum_array_i4 (gfc_array_i4 * restrict array, int *result_image, + int *stat __attribute__ ((unused)), + char *errmsg __attribute__ ((unused)), + index_type errmsg_len __attribute__ ((unused))) +{ + index_type count[GFC_MAX_DIMENSIONS]; + index_type stride[GFC_MAX_DIMENSIONS]; + index_type extent[GFC_MAX_DIMENSIONS]; + GFC_INTEGER_4 *this_shared_ptr; /* Points to the shared memory allocated to this image. */ + GFC_INTEGER_4 *buffer; + index_type dim; + bool packed; + index_type span; + index_type ssize, num_elems; + int cbit = 0; + int imoffset; + collsub_iface *ci; + + ci = &local->ci; + + dim = GFC_DESCRIPTOR_RANK (array); + ssize = sizeof (GFC_INTEGER_4); + packed = true; + span = array->span != 0 ? array->span : (index_type) sizeof (GFC_INTEGER_4); + for (index_type n = 0; n < dim; n++) + { + count[n] = 0; + stride[n] = GFC_DESCRIPTOR_STRIDE (array, n) * span; + extent[n] = GFC_DESCRIPTOR_EXTENT (array, n); + + /* No-op for an empty array. */ + if (extent[n] <= 0) + return; + + if (ssize != stride[n]) + packed = false; + + ssize *= extent[n]; + } + + num_elems = ssize / sizeof (GFC_INTEGER_4); + + buffer = get_collsub_buf (ci, ssize * local->num_images); + this_shared_ptr = buffer + this_image.image_num * num_elems; + + if (packed) + memcpy (this_shared_ptr, array->base_addr, ssize); + else + { + char *src = (char *) array->base_addr; + GFC_INTEGER_4 *restrict dest = this_shared_ptr; + index_type stride0 = stride[0]; + + while (src) + { + /* Copy the data. */ + *(dest++) = *((GFC_INTEGER_4 *) src); + src += stride0; + count[0] ++; + /* Advance to the next source element. */ + for (index_type n = 0; count[n] == extent[n] ; ) + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + count[n] = 0; + src -= stride[n] * extent[n]; + n++; + if (n == dim) + { + src = NULL; + break; + } + else + { + count[n]++; + src += stride[n]; + } + } + } + } + + collsub_sync (ci); + + /* Reduce the array to image zero. Here the general scheme: + + abababababab + a_b_a_b_a_b_ + a___b___a___ + a_______b___ + r___________ + */ + for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++) + { + imoffset = 1 << cbit; + if (this_image.image_num + imoffset < local->num_images) + { + GFC_INTEGER_4 * other_shared_ptr; /* Points to the shared memory + allocated to another image. */ + GFC_INTEGER_4 *a; + GFC_INTEGER_4 *b; + + other_shared_ptr = this_shared_ptr + num_elems * imoffset; + for (index_type i = 0; i < num_elems; i++) + { + a = this_shared_ptr + i; + b = other_shared_ptr + i; + *a += *b; + } + } + collsub_sync (ci); + } + for ( ; (local->num_images >> cbit) != 0; cbit++) + collsub_sync (ci); + + if (!result_image || (*result_image - 1) == this_image.image_num) + { + if (packed) + memcpy (array->base_addr, buffer, ssize); + else + { + GFC_INTEGER_4 *src = buffer; + char * restrict dest = (char *) array->base_addr; + index_type stride0 = stride[0]; + + for (index_type n = 0; n < dim; n++) + count[n] = 0; + + while (dest) + { + *((GFC_INTEGER_4 * ) dest) = *src++; + dest += stride0; + count[0] ++; + for (index_type n = 0; count[n] == extent[n] ;) + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + count[n] = 0; + dest -= stride[n] * extent[n]; + n++; + if (n == dim) + { + dest = NULL; + break; + } + else + { + count[n]++; + dest += stride[n]; + } + } + } + } + } + finish_collective_subroutine (ci); +} + +#endif + diff --git a/libgfortran/generated/nca_minmax_i8.c b/libgfortran/generated/nca_minmax_i8.c new file mode 100644 index 0000000..b3dc861 --- /dev/null +++ b/libgfortran/generated/nca_minmax_i8.c @@ -0,0 +1,653 @@ +/* Implementation of collective subroutines minmax. + Copyright (C) 2020 Free Software Foundation, Inc. + Contributed by Thomas Koenig <tkoenig@gcc.gnu.org>. + +This file is part of the GNU Fortran runtime library (libgfortran). + +Libgfortran is free software; you can redistribute it and/or +modify it under the terms of the GNU General Public +License as published by the Free Software Foundation; either +version 3 of the License, or (at your option) any later version. + +Libgfortran is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +<http://www.gnu.org/licenses/>. */ + + +#include "libgfortran.h" + +#if defined (HAVE_GFC_INTEGER_8) +#include <string.h> +#include "../nca/libcoarraynative.h" +#include "../nca/collective_subroutine.h" +#include "../nca/collective_inline.h" + +void nca_collsub_max_scalar_i8 (GFC_INTEGER_8 *obj, int *result_image, + int *stat, char *errmsg, index_type errmsg_len); +export_proto(nca_collsub_max_scalar_i8); + +void +nca_collsub_max_scalar_i8 (GFC_INTEGER_8 *obj, int *result_image, + int *stat __attribute__ ((unused)), + char *errmsg __attribute__ ((unused)), + index_type errmsg_len __attribute__ ((unused))) +{ + int cbit = 0; + int imoffset; + GFC_INTEGER_8 *a, *b; + GFC_INTEGER_8 *buffer, *this_image_buf; + collsub_iface *ci; + + ci = &local->ci; + + buffer = get_collsub_buf (ci, sizeof(GFC_INTEGER_8) * local->num_images); + this_image_buf = buffer + this_image.image_num; + *this_image_buf = *obj; + + collsub_sync (ci); + for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++) + { + imoffset = 1 << cbit; + if (this_image.image_num + imoffset < local->num_images) + { + a = this_image_buf; + b = this_image_buf + imoffset; + if (*b > *a) + *a = *b; + } + collsub_sync (ci); + } + for ( ; (local->num_images >> cbit) != 0; cbit++) + collsub_sync (ci); + + if (!result_image || (*result_image - 1) == this_image.image_num) + *obj = *buffer; + + finish_collective_subroutine (ci); + +} + +void nca_collsub_min_scalar_i8 (GFC_INTEGER_8 *obj, int *result_image, + int *stat, char *errmsg, index_type errmsg_len); +export_proto(nca_collsub_min_scalar_i8); + +void +nca_collsub_min_scalar_i8 (GFC_INTEGER_8 *obj, int *result_image, + int *stat __attribute__ ((unused)), + char *errmsg __attribute__ ((unused)), + index_type errmsg_len __attribute__ ((unused))) +{ + int cbit = 0; + int imoffset; + GFC_INTEGER_8 *a, *b; + GFC_INTEGER_8 *buffer, *this_image_buf; + collsub_iface *ci; + + ci = &local->ci; + + buffer = get_collsub_buf (ci, sizeof(GFC_INTEGER_8) * local->num_images); + this_image_buf = buffer + this_image.image_num; + *this_image_buf = *obj; + + collsub_sync (ci); + for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++) + { + imoffset = 1 << cbit; + if (this_image.image_num + imoffset < local->num_images) + { + a = this_image_buf; + b = this_image_buf + imoffset; + if (*b < *a) + *a = *b; + } + collsub_sync (ci); + } + for ( ; (local->num_images >> cbit) != 0; cbit++) + collsub_sync (ci); + + if (!result_image || (*result_image - 1) == this_image.image_num) + *obj = *buffer; + + finish_collective_subroutine (ci); + +} + +void nca_collsub_sum_scalar_i8 (GFC_INTEGER_8 *obj, int *result_image, + int *stat, char *errmsg, index_type errmsg_len); +export_proto(nca_collsub_sum_scalar_i8); + +void +nca_collsub_sum_scalar_i8 (GFC_INTEGER_8 *obj, int *result_image, + int *stat __attribute__ ((unused)), + char *errmsg __attribute__ ((unused)), + index_type errmsg_len __attribute__ ((unused))) +{ + int cbit = 0; + int imoffset; + GFC_INTEGER_8 *a, *b; + GFC_INTEGER_8 *buffer, *this_image_buf; + collsub_iface *ci; + + ci = &local->ci; + + buffer = get_collsub_buf (ci, sizeof(GFC_INTEGER_8) * local->num_images); + this_image_buf = buffer + this_image.image_num; + *this_image_buf = *obj; + + collsub_sync (ci); + for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++) + { + imoffset = 1 << cbit; + if (this_image.image_num + imoffset < local->num_images) + { + a = this_image_buf; + b = this_image_buf + imoffset; + *a += *b; + } + collsub_sync (ci); + } + for ( ; (local->num_images >> cbit) != 0; cbit++) + collsub_sync (ci); + + if (!result_image || (*result_image - 1) == this_image.image_num) + *obj = *buffer; + + finish_collective_subroutine (ci); + +} + +void nca_collsub_max_array_i8 (gfc_array_i8 * restrict array, int *result_image, + int *stat, char *errmsg, index_type errmsg_len); +export_proto (nca_collsub_max_array_i8); + +void +nca_collsub_max_array_i8 (gfc_array_i8 * restrict array, int *result_image, + int *stat __attribute__ ((unused)), + char *errmsg __attribute__ ((unused)), + index_type errmsg_len __attribute__ ((unused))) +{ + index_type count[GFC_MAX_DIMENSIONS]; + index_type stride[GFC_MAX_DIMENSIONS]; + index_type extent[GFC_MAX_DIMENSIONS]; + GFC_INTEGER_8 *this_shared_ptr; /* Points to the shared memory allocated to this image. */ + GFC_INTEGER_8 *buffer; + index_type dim; + bool packed; + index_type span; + index_type ssize, num_elems; + int cbit = 0; + int imoffset; + collsub_iface *ci; + + ci = &local->ci; + + dim = GFC_DESCRIPTOR_RANK (array); + ssize = sizeof (GFC_INTEGER_8); + packed = true; + span = array->span != 0 ? array->span : (index_type) sizeof (GFC_INTEGER_8); + for (index_type n = 0; n < dim; n++) + { + count[n] = 0; + stride[n] = GFC_DESCRIPTOR_STRIDE (array, n) * span; + extent[n] = GFC_DESCRIPTOR_EXTENT (array, n); + + /* No-op for an empty array. */ + if (extent[n] <= 0) + return; + + if (ssize != stride[n]) + packed = false; + + ssize *= extent[n]; + } + + num_elems = ssize / sizeof (GFC_INTEGER_8); + + buffer = get_collsub_buf (ci, ssize * local->num_images); + this_shared_ptr = buffer + this_image.image_num * num_elems; + + if (packed) + memcpy (this_shared_ptr, array->base_addr, ssize); + else + { + char *src = (char *) array->base_addr; + GFC_INTEGER_8 *restrict dest = this_shared_ptr; + index_type stride0 = stride[0]; + + while (src) + { + /* Copy the data. */ + *(dest++) = *((GFC_INTEGER_8 *) src); + src += stride0; + count[0] ++; + /* Advance to the next source element. */ + for (index_type n = 0; count[n] == extent[n] ; ) + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + count[n] = 0; + src -= stride[n] * extent[n]; + n++; + if (n == dim) + { + src = NULL; + break; + } + else + { + count[n]++; + src += stride[n]; + } + } + } + } + + collsub_sync (ci); + + /* Reduce the array to image zero. Here the general scheme: + + abababababab + a_b_a_b_a_b_ + a___b___a___ + a_______b___ + r___________ + */ + for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++) + { + imoffset = 1 << cbit; + if (this_image.image_num + imoffset < local->num_images) + { + GFC_INTEGER_8 * other_shared_ptr; /* Points to the shared memory + allocated to another image. */ + GFC_INTEGER_8 *a; + GFC_INTEGER_8 *b; + + other_shared_ptr = this_shared_ptr + num_elems * imoffset; + for (index_type i = 0; i < num_elems; i++) + { + a = this_shared_ptr + i; + b = other_shared_ptr + i; + if (*b > *a) + *a = *b; + } + } + collsub_sync (ci); + } + for ( ; (local->num_images >> cbit) != 0; cbit++) + collsub_sync (ci); + + if (!result_image || (*result_image - 1) == this_image.image_num) + { + if (packed) + memcpy (array->base_addr, buffer, ssize); + else + { + GFC_INTEGER_8 *src = buffer; + char * restrict dest = (char *) array->base_addr; + index_type stride0 = stride[0]; + + for (index_type n = 0; n < dim; n++) + count[n] = 0; + + while (dest) + { + *((GFC_INTEGER_8 * ) dest) = *src++; + dest += stride0; + count[0] ++; + for (index_type n = 0; count[n] == extent[n] ;) + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + count[n] = 0; + dest -= stride[n] * extent[n]; + n++; + if (n == dim) + { + dest = NULL; + break; + } + else + { + count[n]++; + dest += stride[n]; + } + } + } + } + } + finish_collective_subroutine (ci); +} +void nca_collsub_min_array_i8 (gfc_array_i8 * restrict array, int *result_image, + int *stat, char *errmsg, index_type errmsg_len); +export_proto (nca_collsub_min_array_i8); + +void +nca_collsub_min_array_i8 (gfc_array_i8 * restrict array, int *result_image, + int *stat __attribute__ ((unused)), + char *errmsg __attribute__ ((unused)), + index_type errmsg_len __attribute__ ((unused))) +{ + index_type count[GFC_MAX_DIMENSIONS]; + index_type stride[GFC_MAX_DIMENSIONS]; + index_type extent[GFC_MAX_DIMENSIONS]; + GFC_INTEGER_8 *this_shared_ptr; /* Points to the shared memory allocated to this image. */ + GFC_INTEGER_8 *buffer; + index_type dim; + bool packed; + index_type span; + index_type ssize, num_elems; + int cbit = 0; + int imoffset; + collsub_iface *ci; + + ci = &local->ci; + + dim = GFC_DESCRIPTOR_RANK (array); + ssize = sizeof (GFC_INTEGER_8); + packed = true; + span = array->span != 0 ? array->span : (index_type) sizeof (GFC_INTEGER_8); + for (index_type n = 0; n < dim; n++) + { + count[n] = 0; + stride[n] = GFC_DESCRIPTOR_STRIDE (array, n) * span; + extent[n] = GFC_DESCRIPTOR_EXTENT (array, n); + + /* No-op for an empty array. */ + if (extent[n] <= 0) + return; + + if (ssize != stride[n]) + packed = false; + + ssize *= extent[n]; + } + + num_elems = ssize / sizeof (GFC_INTEGER_8); + + buffer = get_collsub_buf (ci, ssize * local->num_images); + this_shared_ptr = buffer + this_image.image_num * num_elems; + + if (packed) + memcpy (this_shared_ptr, array->base_addr, ssize); + else + { + char *src = (char *) array->base_addr; + GFC_INTEGER_8 *restrict dest = this_shared_ptr; + index_type stride0 = stride[0]; + + while (src) + { + /* Copy the data. */ + *(dest++) = *((GFC_INTEGER_8 *) src); + src += stride0; + count[0] ++; + /* Advance to the next source element. */ + for (index_type n = 0; count[n] == extent[n] ; ) + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + count[n] = 0; + src -= stride[n] * extent[n]; + n++; + if (n == dim) + { + src = NULL; + break; + } + else + { + count[n]++; + src += stride[n]; + } + } + } + } + + collsub_sync (ci); + + /* Reduce the array to image zero. Here the general scheme: + + abababababab + a_b_a_b_a_b_ + a___b___a___ + a_______b___ + r___________ + */ + for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++) + { + imoffset = 1 << cbit; + if (this_image.image_num + imoffset < local->num_images) + { + GFC_INTEGER_8 * other_shared_ptr; /* Points to the shared memory + allocated to another image. */ + GFC_INTEGER_8 *a; + GFC_INTEGER_8 *b; + + other_shared_ptr = this_shared_ptr + num_elems * imoffset; + for (index_type i = 0; i < num_elems; i++) + { + a = this_shared_ptr + i; + b = other_shared_ptr + i; + if (*b < *a) + *a = *b; + } + } + collsub_sync (ci); + } + for ( ; (local->num_images >> cbit) != 0; cbit++) + collsub_sync (ci); + + if (!result_image || (*result_image - 1) == this_image.image_num) + { + if (packed) + memcpy (array->base_addr, buffer, ssize); + else + { + GFC_INTEGER_8 *src = buffer; + char * restrict dest = (char *) array->base_addr; + index_type stride0 = stride[0]; + + for (index_type n = 0; n < dim; n++) + count[n] = 0; + + while (dest) + { + *((GFC_INTEGER_8 * ) dest) = *src++; + dest += stride0; + count[0] ++; + for (index_type n = 0; count[n] == extent[n] ;) + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + count[n] = 0; + dest -= stride[n] * extent[n]; + n++; + if (n == dim) + { + dest = NULL; + break; + } + else + { + count[n]++; + dest += stride[n]; + } + } + } + } + } + finish_collective_subroutine (ci); +} +void nca_collsub_sum_array_i8 (gfc_array_i8 * restrict array, int *result_image, + int *stat, char *errmsg, index_type errmsg_len); +export_proto (nca_collsub_sum_array_i8); + +void +nca_collsub_sum_array_i8 (gfc_array_i8 * restrict array, int *result_image, + int *stat __attribute__ ((unused)), + char *errmsg __attribute__ ((unused)), + index_type errmsg_len __attribute__ ((unused))) +{ + index_type count[GFC_MAX_DIMENSIONS]; + index_type stride[GFC_MAX_DIMENSIONS]; + index_type extent[GFC_MAX_DIMENSIONS]; + GFC_INTEGER_8 *this_shared_ptr; /* Points to the shared memory allocated to this image. */ + GFC_INTEGER_8 *buffer; + index_type dim; + bool packed; + index_type span; + index_type ssize, num_elems; + int cbit = 0; + int imoffset; + collsub_iface *ci; + + ci = &local->ci; + + dim = GFC_DESCRIPTOR_RANK (array); + ssize = sizeof (GFC_INTEGER_8); + packed = true; + span = array->span != 0 ? array->span : (index_type) sizeof (GFC_INTEGER_8); + for (index_type n = 0; n < dim; n++) + { + count[n] = 0; + stride[n] = GFC_DESCRIPTOR_STRIDE (array, n) * span; + extent[n] = GFC_DESCRIPTOR_EXTENT (array, n); + + /* No-op for an empty array. */ + if (extent[n] <= 0) + return; + + if (ssize != stride[n]) + packed = false; + + ssize *= extent[n]; + } + + num_elems = ssize / sizeof (GFC_INTEGER_8); + + buffer = get_collsub_buf (ci, ssize * local->num_images); + this_shared_ptr = buffer + this_image.image_num * num_elems; + + if (packed) + memcpy (this_shared_ptr, array->base_addr, ssize); + else + { + char *src = (char *) array->base_addr; + GFC_INTEGER_8 *restrict dest = this_shared_ptr; + index_type stride0 = stride[0]; + + while (src) + { + /* Copy the data. */ + *(dest++) = *((GFC_INTEGER_8 *) src); + src += stride0; + count[0] ++; + /* Advance to the next source element. */ + for (index_type n = 0; count[n] == extent[n] ; ) + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + count[n] = 0; + src -= stride[n] * extent[n]; + n++; + if (n == dim) + { + src = NULL; + break; + } + else + { + count[n]++; + src += stride[n]; + } + } + } + } + + collsub_sync (ci); + + /* Reduce the array to image zero. Here the general scheme: + + abababababab + a_b_a_b_a_b_ + a___b___a___ + a_______b___ + r___________ + */ + for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++) + { + imoffset = 1 << cbit; + if (this_image.image_num + imoffset < local->num_images) + { + GFC_INTEGER_8 * other_shared_ptr; /* Points to the shared memory + allocated to another image. */ + GFC_INTEGER_8 *a; + GFC_INTEGER_8 *b; + + other_shared_ptr = this_shared_ptr + num_elems * imoffset; + for (index_type i = 0; i < num_elems; i++) + { + a = this_shared_ptr + i; + b = other_shared_ptr + i; + *a += *b; + } + } + collsub_sync (ci); + } + for ( ; (local->num_images >> cbit) != 0; cbit++) + collsub_sync (ci); + + if (!result_image || (*result_image - 1) == this_image.image_num) + { + if (packed) + memcpy (array->base_addr, buffer, ssize); + else + { + GFC_INTEGER_8 *src = buffer; + char * restrict dest = (char *) array->base_addr; + index_type stride0 = stride[0]; + + for (index_type n = 0; n < dim; n++) + count[n] = 0; + + while (dest) + { + *((GFC_INTEGER_8 * ) dest) = *src++; + dest += stride0; + count[0] ++; + for (index_type n = 0; count[n] == extent[n] ;) + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + count[n] = 0; + dest -= stride[n] * extent[n]; + n++; + if (n == dim) + { + dest = NULL; + break; + } + else + { + count[n]++; + dest += stride[n]; + } + } + } + } + } + finish_collective_subroutine (ci); +} + +#endif + diff --git a/libgfortran/generated/nca_minmax_r10.c b/libgfortran/generated/nca_minmax_r10.c new file mode 100644 index 0000000..10f7324 --- /dev/null +++ b/libgfortran/generated/nca_minmax_r10.c @@ -0,0 +1,653 @@ +/* Implementation of collective subroutines minmax. + Copyright (C) 2020 Free Software Foundation, Inc. + Contributed by Thomas Koenig <tkoenig@gcc.gnu.org>. + +This file is part of the GNU Fortran runtime library (libgfortran). + +Libgfortran is free software; you can redistribute it and/or +modify it under the terms of the GNU General Public +License as published by the Free Software Foundation; either +version 3 of the License, or (at your option) any later version. + +Libgfortran is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +<http://www.gnu.org/licenses/>. */ + + +#include "libgfortran.h" + +#if defined (HAVE_GFC_REAL_10) +#include <string.h> +#include "../nca/libcoarraynative.h" +#include "../nca/collective_subroutine.h" +#include "../nca/collective_inline.h" + +void nca_collsub_max_scalar_r10 (GFC_REAL_10 *obj, int *result_image, + int *stat, char *errmsg, index_type errmsg_len); +export_proto(nca_collsub_max_scalar_r10); + +void +nca_collsub_max_scalar_r10 (GFC_REAL_10 *obj, int *result_image, + int *stat __attribute__ ((unused)), + char *errmsg __attribute__ ((unused)), + index_type errmsg_len __attribute__ ((unused))) +{ + int cbit = 0; + int imoffset; + GFC_REAL_10 *a, *b; + GFC_REAL_10 *buffer, *this_image_buf; + collsub_iface *ci; + + ci = &local->ci; + + buffer = get_collsub_buf (ci, sizeof(GFC_REAL_10) * local->num_images); + this_image_buf = buffer + this_image.image_num; + *this_image_buf = *obj; + + collsub_sync (ci); + for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++) + { + imoffset = 1 << cbit; + if (this_image.image_num + imoffset < local->num_images) + { + a = this_image_buf; + b = this_image_buf + imoffset; + if (*b > *a) + *a = *b; + } + collsub_sync (ci); + } + for ( ; (local->num_images >> cbit) != 0; cbit++) + collsub_sync (ci); + + if (!result_image || (*result_image - 1) == this_image.image_num) + *obj = *buffer; + + finish_collective_subroutine (ci); + +} + +void nca_collsub_min_scalar_r10 (GFC_REAL_10 *obj, int *result_image, + int *stat, char *errmsg, index_type errmsg_len); +export_proto(nca_collsub_min_scalar_r10); + +void +nca_collsub_min_scalar_r10 (GFC_REAL_10 *obj, int *result_image, + int *stat __attribute__ ((unused)), + char *errmsg __attribute__ ((unused)), + index_type errmsg_len __attribute__ ((unused))) +{ + int cbit = 0; + int imoffset; + GFC_REAL_10 *a, *b; + GFC_REAL_10 *buffer, *this_image_buf; + collsub_iface *ci; + + ci = &local->ci; + + buffer = get_collsub_buf (ci, sizeof(GFC_REAL_10) * local->num_images); + this_image_buf = buffer + this_image.image_num; + *this_image_buf = *obj; + + collsub_sync (ci); + for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++) + { + imoffset = 1 << cbit; + if (this_image.image_num + imoffset < local->num_images) + { + a = this_image_buf; + b = this_image_buf + imoffset; + if (*b < *a) + *a = *b; + } + collsub_sync (ci); + } + for ( ; (local->num_images >> cbit) != 0; cbit++) + collsub_sync (ci); + + if (!result_image || (*result_image - 1) == this_image.image_num) + *obj = *buffer; + + finish_collective_subroutine (ci); + +} + +void nca_collsub_sum_scalar_r10 (GFC_REAL_10 *obj, int *result_image, + int *stat, char *errmsg, index_type errmsg_len); +export_proto(nca_collsub_sum_scalar_r10); + +void +nca_collsub_sum_scalar_r10 (GFC_REAL_10 *obj, int *result_image, + int *stat __attribute__ ((unused)), + char *errmsg __attribute__ ((unused)), + index_type errmsg_len __attribute__ ((unused))) +{ + int cbit = 0; + int imoffset; + GFC_REAL_10 *a, *b; + GFC_REAL_10 *buffer, *this_image_buf; + collsub_iface *ci; + + ci = &local->ci; + + buffer = get_collsub_buf (ci, sizeof(GFC_REAL_10) * local->num_images); + this_image_buf = buffer + this_image.image_num; + *this_image_buf = *obj; + + collsub_sync (ci); + for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++) + { + imoffset = 1 << cbit; + if (this_image.image_num + imoffset < local->num_images) + { + a = this_image_buf; + b = this_image_buf + imoffset; + *a += *b; + } + collsub_sync (ci); + } + for ( ; (local->num_images >> cbit) != 0; cbit++) + collsub_sync (ci); + + if (!result_image || (*result_image - 1) == this_image.image_num) + *obj = *buffer; + + finish_collective_subroutine (ci); + +} + +void nca_collsub_max_array_r10 (gfc_array_r10 * restrict array, int *result_image, + int *stat, char *errmsg, index_type errmsg_len); +export_proto (nca_collsub_max_array_r10); + +void +nca_collsub_max_array_r10 (gfc_array_r10 * restrict array, int *result_image, + int *stat __attribute__ ((unused)), + char *errmsg __attribute__ ((unused)), + index_type errmsg_len __attribute__ ((unused))) +{ + index_type count[GFC_MAX_DIMENSIONS]; + index_type stride[GFC_MAX_DIMENSIONS]; + index_type extent[GFC_MAX_DIMENSIONS]; + GFC_REAL_10 *this_shared_ptr; /* Points to the shared memory allocated to this image. */ + GFC_REAL_10 *buffer; + index_type dim; + bool packed; + index_type span; + index_type ssize, num_elems; + int cbit = 0; + int imoffset; + collsub_iface *ci; + + ci = &local->ci; + + dim = GFC_DESCRIPTOR_RANK (array); + ssize = sizeof (GFC_REAL_10); + packed = true; + span = array->span != 0 ? array->span : (index_type) sizeof (GFC_REAL_10); + for (index_type n = 0; n < dim; n++) + { + count[n] = 0; + stride[n] = GFC_DESCRIPTOR_STRIDE (array, n) * span; + extent[n] = GFC_DESCRIPTOR_EXTENT (array, n); + + /* No-op for an empty array. */ + if (extent[n] <= 0) + return; + + if (ssize != stride[n]) + packed = false; + + ssize *= extent[n]; + } + + num_elems = ssize / sizeof (GFC_REAL_10); + + buffer = get_collsub_buf (ci, ssize * local->num_images); + this_shared_ptr = buffer + this_image.image_num * num_elems; + + if (packed) + memcpy (this_shared_ptr, array->base_addr, ssize); + else + { + char *src = (char *) array->base_addr; + GFC_REAL_10 *restrict dest = this_shared_ptr; + index_type stride0 = stride[0]; + + while (src) + { + /* Copy the data. */ + *(dest++) = *((GFC_REAL_10 *) src); + src += stride0; + count[0] ++; + /* Advance to the next source element. */ + for (index_type n = 0; count[n] == extent[n] ; ) + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + count[n] = 0; + src -= stride[n] * extent[n]; + n++; + if (n == dim) + { + src = NULL; + break; + } + else + { + count[n]++; + src += stride[n]; + } + } + } + } + + collsub_sync (ci); + + /* Reduce the array to image zero. Here the general scheme: + + abababababab + a_b_a_b_a_b_ + a___b___a___ + a_______b___ + r___________ + */ + for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++) + { + imoffset = 1 << cbit; + if (this_image.image_num + imoffset < local->num_images) + { + GFC_REAL_10 * other_shared_ptr; /* Points to the shared memory + allocated to another image. */ + GFC_REAL_10 *a; + GFC_REAL_10 *b; + + other_shared_ptr = this_shared_ptr + num_elems * imoffset; + for (index_type i = 0; i < num_elems; i++) + { + a = this_shared_ptr + i; + b = other_shared_ptr + i; + if (*b > *a) + *a = *b; + } + } + collsub_sync (ci); + } + for ( ; (local->num_images >> cbit) != 0; cbit++) + collsub_sync (ci); + + if (!result_image || (*result_image - 1) == this_image.image_num) + { + if (packed) + memcpy (array->base_addr, buffer, ssize); + else + { + GFC_REAL_10 *src = buffer; + char * restrict dest = (char *) array->base_addr; + index_type stride0 = stride[0]; + + for (index_type n = 0; n < dim; n++) + count[n] = 0; + + while (dest) + { + *((GFC_REAL_10 * ) dest) = *src++; + dest += stride0; + count[0] ++; + for (index_type n = 0; count[n] == extent[n] ;) + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + count[n] = 0; + dest -= stride[n] * extent[n]; + n++; + if (n == dim) + { + dest = NULL; + break; + } + else + { + count[n]++; + dest += stride[n]; + } + } + } + } + } + finish_collective_subroutine (ci); +} +void nca_collsub_min_array_r10 (gfc_array_r10 * restrict array, int *result_image, + int *stat, char *errmsg, index_type errmsg_len); +export_proto (nca_collsub_min_array_r10); + +void +nca_collsub_min_array_r10 (gfc_array_r10 * restrict array, int *result_image, + int *stat __attribute__ ((unused)), + char *errmsg __attribute__ ((unused)), + index_type errmsg_len __attribute__ ((unused))) +{ + index_type count[GFC_MAX_DIMENSIONS]; + index_type stride[GFC_MAX_DIMENSIONS]; + index_type extent[GFC_MAX_DIMENSIONS]; + GFC_REAL_10 *this_shared_ptr; /* Points to the shared memory allocated to this image. */ + GFC_REAL_10 *buffer; + index_type dim; + bool packed; + index_type span; + index_type ssize, num_elems; + int cbit = 0; + int imoffset; + collsub_iface *ci; + + ci = &local->ci; + + dim = GFC_DESCRIPTOR_RANK (array); + ssize = sizeof (GFC_REAL_10); + packed = true; + span = array->span != 0 ? array->span : (index_type) sizeof (GFC_REAL_10); + for (index_type n = 0; n < dim; n++) + { + count[n] = 0; + stride[n] = GFC_DESCRIPTOR_STRIDE (array, n) * span; + extent[n] = GFC_DESCRIPTOR_EXTENT (array, n); + + /* No-op for an empty array. */ + if (extent[n] <= 0) + return; + + if (ssize != stride[n]) + packed = false; + + ssize *= extent[n]; + } + + num_elems = ssize / sizeof (GFC_REAL_10); + + buffer = get_collsub_buf (ci, ssize * local->num_images); + this_shared_ptr = buffer + this_image.image_num * num_elems; + + if (packed) + memcpy (this_shared_ptr, array->base_addr, ssize); + else + { + char *src = (char *) array->base_addr; + GFC_REAL_10 *restrict dest = this_shared_ptr; + index_type stride0 = stride[0]; + + while (src) + { + /* Copy the data. */ + *(dest++) = *((GFC_REAL_10 *) src); + src += stride0; + count[0] ++; + /* Advance to the next source element. */ + for (index_type n = 0; count[n] == extent[n] ; ) + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + count[n] = 0; + src -= stride[n] * extent[n]; + n++; + if (n == dim) + { + src = NULL; + break; + } + else + { + count[n]++; + src += stride[n]; + } + } + } + } + + collsub_sync (ci); + + /* Reduce the array to image zero. Here the general scheme: + + abababababab + a_b_a_b_a_b_ + a___b___a___ + a_______b___ + r___________ + */ + for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++) + { + imoffset = 1 << cbit; + if (this_image.image_num + imoffset < local->num_images) + { + GFC_REAL_10 * other_shared_ptr; /* Points to the shared memory + allocated to another image. */ + GFC_REAL_10 *a; + GFC_REAL_10 *b; + + other_shared_ptr = this_shared_ptr + num_elems * imoffset; + for (index_type i = 0; i < num_elems; i++) + { + a = this_shared_ptr + i; + b = other_shared_ptr + i; + if (*b < *a) + *a = *b; + } + } + collsub_sync (ci); + } + for ( ; (local->num_images >> cbit) != 0; cbit++) + collsub_sync (ci); + + if (!result_image || (*result_image - 1) == this_image.image_num) + { + if (packed) + memcpy (array->base_addr, buffer, ssize); + else + { + GFC_REAL_10 *src = buffer; + char * restrict dest = (char *) array->base_addr; + index_type stride0 = stride[0]; + + for (index_type n = 0; n < dim; n++) + count[n] = 0; + + while (dest) + { + *((GFC_REAL_10 * ) dest) = *src++; + dest += stride0; + count[0] ++; + for (index_type n = 0; count[n] == extent[n] ;) + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + count[n] = 0; + dest -= stride[n] * extent[n]; + n++; + if (n == dim) + { + dest = NULL; + break; + } + else + { + count[n]++; + dest += stride[n]; + } + } + } + } + } + finish_collective_subroutine (ci); +} +void nca_collsub_sum_array_r10 (gfc_array_r10 * restrict array, int *result_image, + int *stat, char *errmsg, index_type errmsg_len); +export_proto (nca_collsub_sum_array_r10); + +void +nca_collsub_sum_array_r10 (gfc_array_r10 * restrict array, int *result_image, + int *stat __attribute__ ((unused)), + char *errmsg __attribute__ ((unused)), + index_type errmsg_len __attribute__ ((unused))) +{ + index_type count[GFC_MAX_DIMENSIONS]; + index_type stride[GFC_MAX_DIMENSIONS]; + index_type extent[GFC_MAX_DIMENSIONS]; + GFC_REAL_10 *this_shared_ptr; /* Points to the shared memory allocated to this image. */ + GFC_REAL_10 *buffer; + index_type dim; + bool packed; + index_type span; + index_type ssize, num_elems; + int cbit = 0; + int imoffset; + collsub_iface *ci; + + ci = &local->ci; + + dim = GFC_DESCRIPTOR_RANK (array); + ssize = sizeof (GFC_REAL_10); + packed = true; + span = array->span != 0 ? array->span : (index_type) sizeof (GFC_REAL_10); + for (index_type n = 0; n < dim; n++) + { + count[n] = 0; + stride[n] = GFC_DESCRIPTOR_STRIDE (array, n) * span; + extent[n] = GFC_DESCRIPTOR_EXTENT (array, n); + + /* No-op for an empty array. */ + if (extent[n] <= 0) + return; + + if (ssize != stride[n]) + packed = false; + + ssize *= extent[n]; + } + + num_elems = ssize / sizeof (GFC_REAL_10); + + buffer = get_collsub_buf (ci, ssize * local->num_images); + this_shared_ptr = buffer + this_image.image_num * num_elems; + + if (packed) + memcpy (this_shared_ptr, array->base_addr, ssize); + else + { + char *src = (char *) array->base_addr; + GFC_REAL_10 *restrict dest = this_shared_ptr; + index_type stride0 = stride[0]; + + while (src) + { + /* Copy the data. */ + *(dest++) = *((GFC_REAL_10 *) src); + src += stride0; + count[0] ++; + /* Advance to the next source element. */ + for (index_type n = 0; count[n] == extent[n] ; ) + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + count[n] = 0; + src -= stride[n] * extent[n]; + n++; + if (n == dim) + { + src = NULL; + break; + } + else + { + count[n]++; + src += stride[n]; + } + } + } + } + + collsub_sync (ci); + + /* Reduce the array to image zero. Here the general scheme: + + abababababab + a_b_a_b_a_b_ + a___b___a___ + a_______b___ + r___________ + */ + for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++) + { + imoffset = 1 << cbit; + if (this_image.image_num + imoffset < local->num_images) + { + GFC_REAL_10 * other_shared_ptr; /* Points to the shared memory + allocated to another image. */ + GFC_REAL_10 *a; + GFC_REAL_10 *b; + + other_shared_ptr = this_shared_ptr + num_elems * imoffset; + for (index_type i = 0; i < num_elems; i++) + { + a = this_shared_ptr + i; + b = other_shared_ptr + i; + *a += *b; + } + } + collsub_sync (ci); + } + for ( ; (local->num_images >> cbit) != 0; cbit++) + collsub_sync (ci); + + if (!result_image || (*result_image - 1) == this_image.image_num) + { + if (packed) + memcpy (array->base_addr, buffer, ssize); + else + { + GFC_REAL_10 *src = buffer; + char * restrict dest = (char *) array->base_addr; + index_type stride0 = stride[0]; + + for (index_type n = 0; n < dim; n++) + count[n] = 0; + + while (dest) + { + *((GFC_REAL_10 * ) dest) = *src++; + dest += stride0; + count[0] ++; + for (index_type n = 0; count[n] == extent[n] ;) + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + count[n] = 0; + dest -= stride[n] * extent[n]; + n++; + if (n == dim) + { + dest = NULL; + break; + } + else + { + count[n]++; + dest += stride[n]; + } + } + } + } + } + finish_collective_subroutine (ci); +} + +#endif + diff --git a/libgfortran/generated/nca_minmax_r16.c b/libgfortran/generated/nca_minmax_r16.c new file mode 100644 index 0000000..a0a0a51 --- /dev/null +++ b/libgfortran/generated/nca_minmax_r16.c @@ -0,0 +1,653 @@ +/* Implementation of collective subroutines minmax. + Copyright (C) 2020 Free Software Foundation, Inc. + Contributed by Thomas Koenig <tkoenig@gcc.gnu.org>. + +This file is part of the GNU Fortran runtime library (libgfortran). + +Libgfortran is free software; you can redistribute it and/or +modify it under the terms of the GNU General Public +License as published by the Free Software Foundation; either +version 3 of the License, or (at your option) any later version. + +Libgfortran is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +<http://www.gnu.org/licenses/>. */ + + +#include "libgfortran.h" + +#if defined (HAVE_GFC_REAL_16) +#include <string.h> +#include "../nca/libcoarraynative.h" +#include "../nca/collective_subroutine.h" +#include "../nca/collective_inline.h" + +void nca_collsub_max_scalar_r16 (GFC_REAL_16 *obj, int *result_image, + int *stat, char *errmsg, index_type errmsg_len); +export_proto(nca_collsub_max_scalar_r16); + +void +nca_collsub_max_scalar_r16 (GFC_REAL_16 *obj, int *result_image, + int *stat __attribute__ ((unused)), + char *errmsg __attribute__ ((unused)), + index_type errmsg_len __attribute__ ((unused))) +{ + int cbit = 0; + int imoffset; + GFC_REAL_16 *a, *b; + GFC_REAL_16 *buffer, *this_image_buf; + collsub_iface *ci; + + ci = &local->ci; + + buffer = get_collsub_buf (ci, sizeof(GFC_REAL_16) * local->num_images); + this_image_buf = buffer + this_image.image_num; + *this_image_buf = *obj; + + collsub_sync (ci); + for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++) + { + imoffset = 1 << cbit; + if (this_image.image_num + imoffset < local->num_images) + { + a = this_image_buf; + b = this_image_buf + imoffset; + if (*b > *a) + *a = *b; + } + collsub_sync (ci); + } + for ( ; (local->num_images >> cbit) != 0; cbit++) + collsub_sync (ci); + + if (!result_image || (*result_image - 1) == this_image.image_num) + *obj = *buffer; + + finish_collective_subroutine (ci); + +} + +void nca_collsub_min_scalar_r16 (GFC_REAL_16 *obj, int *result_image, + int *stat, char *errmsg, index_type errmsg_len); +export_proto(nca_collsub_min_scalar_r16); + +void +nca_collsub_min_scalar_r16 (GFC_REAL_16 *obj, int *result_image, + int *stat __attribute__ ((unused)), + char *errmsg __attribute__ ((unused)), + index_type errmsg_len __attribute__ ((unused))) +{ + int cbit = 0; + int imoffset; + GFC_REAL_16 *a, *b; + GFC_REAL_16 *buffer, *this_image_buf; + collsub_iface *ci; + + ci = &local->ci; + + buffer = get_collsub_buf (ci, sizeof(GFC_REAL_16) * local->num_images); + this_image_buf = buffer + this_image.image_num; + *this_image_buf = *obj; + + collsub_sync (ci); + for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++) + { + imoffset = 1 << cbit; + if (this_image.image_num + imoffset < local->num_images) + { + a = this_image_buf; + b = this_image_buf + imoffset; + if (*b < *a) + *a = *b; + } + collsub_sync (ci); + } + for ( ; (local->num_images >> cbit) != 0; cbit++) + collsub_sync (ci); + + if (!result_image || (*result_image - 1) == this_image.image_num) + *obj = *buffer; + + finish_collective_subroutine (ci); + +} + +void nca_collsub_sum_scalar_r16 (GFC_REAL_16 *obj, int *result_image, + int *stat, char *errmsg, index_type errmsg_len); +export_proto(nca_collsub_sum_scalar_r16); + +void +nca_collsub_sum_scalar_r16 (GFC_REAL_16 *obj, int *result_image, + int *stat __attribute__ ((unused)), + char *errmsg __attribute__ ((unused)), + index_type errmsg_len __attribute__ ((unused))) +{ + int cbit = 0; + int imoffset; + GFC_REAL_16 *a, *b; + GFC_REAL_16 *buffer, *this_image_buf; + collsub_iface *ci; + + ci = &local->ci; + + buffer = get_collsub_buf (ci, sizeof(GFC_REAL_16) * local->num_images); + this_image_buf = buffer + this_image.image_num; + *this_image_buf = *obj; + + collsub_sync (ci); + for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++) + { + imoffset = 1 << cbit; + if (this_image.image_num + imoffset < local->num_images) + { + a = this_image_buf; + b = this_image_buf + imoffset; + *a += *b; + } + collsub_sync (ci); + } + for ( ; (local->num_images >> cbit) != 0; cbit++) + collsub_sync (ci); + + if (!result_image || (*result_image - 1) == this_image.image_num) + *obj = *buffer; + + finish_collective_subroutine (ci); + +} + +void nca_collsub_max_array_r16 (gfc_array_r16 * restrict array, int *result_image, + int *stat, char *errmsg, index_type errmsg_len); +export_proto (nca_collsub_max_array_r16); + +void +nca_collsub_max_array_r16 (gfc_array_r16 * restrict array, int *result_image, + int *stat __attribute__ ((unused)), + char *errmsg __attribute__ ((unused)), + index_type errmsg_len __attribute__ ((unused))) +{ + index_type count[GFC_MAX_DIMENSIONS]; + index_type stride[GFC_MAX_DIMENSIONS]; + index_type extent[GFC_MAX_DIMENSIONS]; + GFC_REAL_16 *this_shared_ptr; /* Points to the shared memory allocated to this image. */ + GFC_REAL_16 *buffer; + index_type dim; + bool packed; + index_type span; + index_type ssize, num_elems; + int cbit = 0; + int imoffset; + collsub_iface *ci; + + ci = &local->ci; + + dim = GFC_DESCRIPTOR_RANK (array); + ssize = sizeof (GFC_REAL_16); + packed = true; + span = array->span != 0 ? array->span : (index_type) sizeof (GFC_REAL_16); + for (index_type n = 0; n < dim; n++) + { + count[n] = 0; + stride[n] = GFC_DESCRIPTOR_STRIDE (array, n) * span; + extent[n] = GFC_DESCRIPTOR_EXTENT (array, n); + + /* No-op for an empty array. */ + if (extent[n] <= 0) + return; + + if (ssize != stride[n]) + packed = false; + + ssize *= extent[n]; + } + + num_elems = ssize / sizeof (GFC_REAL_16); + + buffer = get_collsub_buf (ci, ssize * local->num_images); + this_shared_ptr = buffer + this_image.image_num * num_elems; + + if (packed) + memcpy (this_shared_ptr, array->base_addr, ssize); + else + { + char *src = (char *) array->base_addr; + GFC_REAL_16 *restrict dest = this_shared_ptr; + index_type stride0 = stride[0]; + + while (src) + { + /* Copy the data. */ + *(dest++) = *((GFC_REAL_16 *) src); + src += stride0; + count[0] ++; + /* Advance to the next source element. */ + for (index_type n = 0; count[n] == extent[n] ; ) + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + count[n] = 0; + src -= stride[n] * extent[n]; + n++; + if (n == dim) + { + src = NULL; + break; + } + else + { + count[n]++; + src += stride[n]; + } + } + } + } + + collsub_sync (ci); + + /* Reduce the array to image zero. Here the general scheme: + + abababababab + a_b_a_b_a_b_ + a___b___a___ + a_______b___ + r___________ + */ + for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++) + { + imoffset = 1 << cbit; + if (this_image.image_num + imoffset < local->num_images) + { + GFC_REAL_16 * other_shared_ptr; /* Points to the shared memory + allocated to another image. */ + GFC_REAL_16 *a; + GFC_REAL_16 *b; + + other_shared_ptr = this_shared_ptr + num_elems * imoffset; + for (index_type i = 0; i < num_elems; i++) + { + a = this_shared_ptr + i; + b = other_shared_ptr + i; + if (*b > *a) + *a = *b; + } + } + collsub_sync (ci); + } + for ( ; (local->num_images >> cbit) != 0; cbit++) + collsub_sync (ci); + + if (!result_image || (*result_image - 1) == this_image.image_num) + { + if (packed) + memcpy (array->base_addr, buffer, ssize); + else + { + GFC_REAL_16 *src = buffer; + char * restrict dest = (char *) array->base_addr; + index_type stride0 = stride[0]; + + for (index_type n = 0; n < dim; n++) + count[n] = 0; + + while (dest) + { + *((GFC_REAL_16 * ) dest) = *src++; + dest += stride0; + count[0] ++; + for (index_type n = 0; count[n] == extent[n] ;) + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + count[n] = 0; + dest -= stride[n] * extent[n]; + n++; + if (n == dim) + { + dest = NULL; + break; + } + else + { + count[n]++; + dest += stride[n]; + } + } + } + } + } + finish_collective_subroutine (ci); +} +void nca_collsub_min_array_r16 (gfc_array_r16 * restrict array, int *result_image, + int *stat, char *errmsg, index_type errmsg_len); +export_proto (nca_collsub_min_array_r16); + +void +nca_collsub_min_array_r16 (gfc_array_r16 * restrict array, int *result_image, + int *stat __attribute__ ((unused)), + char *errmsg __attribute__ ((unused)), + index_type errmsg_len __attribute__ ((unused))) +{ + index_type count[GFC_MAX_DIMENSIONS]; + index_type stride[GFC_MAX_DIMENSIONS]; + index_type extent[GFC_MAX_DIMENSIONS]; + GFC_REAL_16 *this_shared_ptr; /* Points to the shared memory allocated to this image. */ + GFC_REAL_16 *buffer; + index_type dim; + bool packed; + index_type span; + index_type ssize, num_elems; + int cbit = 0; + int imoffset; + collsub_iface *ci; + + ci = &local->ci; + + dim = GFC_DESCRIPTOR_RANK (array); + ssize = sizeof (GFC_REAL_16); + packed = true; + span = array->span != 0 ? array->span : (index_type) sizeof (GFC_REAL_16); + for (index_type n = 0; n < dim; n++) + { + count[n] = 0; + stride[n] = GFC_DESCRIPTOR_STRIDE (array, n) * span; + extent[n] = GFC_DESCRIPTOR_EXTENT (array, n); + + /* No-op for an empty array. */ + if (extent[n] <= 0) + return; + + if (ssize != stride[n]) + packed = false; + + ssize *= extent[n]; + } + + num_elems = ssize / sizeof (GFC_REAL_16); + + buffer = get_collsub_buf (ci, ssize * local->num_images); + this_shared_ptr = buffer + this_image.image_num * num_elems; + + if (packed) + memcpy (this_shared_ptr, array->base_addr, ssize); + else + { + char *src = (char *) array->base_addr; + GFC_REAL_16 *restrict dest = this_shared_ptr; + index_type stride0 = stride[0]; + + while (src) + { + /* Copy the data. */ + *(dest++) = *((GFC_REAL_16 *) src); + src += stride0; + count[0] ++; + /* Advance to the next source element. */ + for (index_type n = 0; count[n] == extent[n] ; ) + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + count[n] = 0; + src -= stride[n] * extent[n]; + n++; + if (n == dim) + { + src = NULL; + break; + } + else + { + count[n]++; + src += stride[n]; + } + } + } + } + + collsub_sync (ci); + + /* Reduce the array to image zero. Here the general scheme: + + abababababab + a_b_a_b_a_b_ + a___b___a___ + a_______b___ + r___________ + */ + for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++) + { + imoffset = 1 << cbit; + if (this_image.image_num + imoffset < local->num_images) + { + GFC_REAL_16 * other_shared_ptr; /* Points to the shared memory + allocated to another image. */ + GFC_REAL_16 *a; + GFC_REAL_16 *b; + + other_shared_ptr = this_shared_ptr + num_elems * imoffset; + for (index_type i = 0; i < num_elems; i++) + { + a = this_shared_ptr + i; + b = other_shared_ptr + i; + if (*b < *a) + *a = *b; + } + } + collsub_sync (ci); + } + for ( ; (local->num_images >> cbit) != 0; cbit++) + collsub_sync (ci); + + if (!result_image || (*result_image - 1) == this_image.image_num) + { + if (packed) + memcpy (array->base_addr, buffer, ssize); + else + { + GFC_REAL_16 *src = buffer; + char * restrict dest = (char *) array->base_addr; + index_type stride0 = stride[0]; + + for (index_type n = 0; n < dim; n++) + count[n] = 0; + + while (dest) + { + *((GFC_REAL_16 * ) dest) = *src++; + dest += stride0; + count[0] ++; + for (index_type n = 0; count[n] == extent[n] ;) + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + count[n] = 0; + dest -= stride[n] * extent[n]; + n++; + if (n == dim) + { + dest = NULL; + break; + } + else + { + count[n]++; + dest += stride[n]; + } + } + } + } + } + finish_collective_subroutine (ci); +} +void nca_collsub_sum_array_r16 (gfc_array_r16 * restrict array, int *result_image, + int *stat, char *errmsg, index_type errmsg_len); +export_proto (nca_collsub_sum_array_r16); + +void +nca_collsub_sum_array_r16 (gfc_array_r16 * restrict array, int *result_image, + int *stat __attribute__ ((unused)), + char *errmsg __attribute__ ((unused)), + index_type errmsg_len __attribute__ ((unused))) +{ + index_type count[GFC_MAX_DIMENSIONS]; + index_type stride[GFC_MAX_DIMENSIONS]; + index_type extent[GFC_MAX_DIMENSIONS]; + GFC_REAL_16 *this_shared_ptr; /* Points to the shared memory allocated to this image. */ + GFC_REAL_16 *buffer; + index_type dim; + bool packed; + index_type span; + index_type ssize, num_elems; + int cbit = 0; + int imoffset; + collsub_iface *ci; + + ci = &local->ci; + + dim = GFC_DESCRIPTOR_RANK (array); + ssize = sizeof (GFC_REAL_16); + packed = true; + span = array->span != 0 ? array->span : (index_type) sizeof (GFC_REAL_16); + for (index_type n = 0; n < dim; n++) + { + count[n] = 0; + stride[n] = GFC_DESCRIPTOR_STRIDE (array, n) * span; + extent[n] = GFC_DESCRIPTOR_EXTENT (array, n); + + /* No-op for an empty array. */ + if (extent[n] <= 0) + return; + + if (ssize != stride[n]) + packed = false; + + ssize *= extent[n]; + } + + num_elems = ssize / sizeof (GFC_REAL_16); + + buffer = get_collsub_buf (ci, ssize * local->num_images); + this_shared_ptr = buffer + this_image.image_num * num_elems; + + if (packed) + memcpy (this_shared_ptr, array->base_addr, ssize); + else + { + char *src = (char *) array->base_addr; + GFC_REAL_16 *restrict dest = this_shared_ptr; + index_type stride0 = stride[0]; + + while (src) + { + /* Copy the data. */ + *(dest++) = *((GFC_REAL_16 *) src); + src += stride0; + count[0] ++; + /* Advance to the next source element. */ + for (index_type n = 0; count[n] == extent[n] ; ) + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + count[n] = 0; + src -= stride[n] * extent[n]; + n++; + if (n == dim) + { + src = NULL; + break; + } + else + { + count[n]++; + src += stride[n]; + } + } + } + } + + collsub_sync (ci); + + /* Reduce the array to image zero. Here the general scheme: + + abababababab + a_b_a_b_a_b_ + a___b___a___ + a_______b___ + r___________ + */ + for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++) + { + imoffset = 1 << cbit; + if (this_image.image_num + imoffset < local->num_images) + { + GFC_REAL_16 * other_shared_ptr; /* Points to the shared memory + allocated to another image. */ + GFC_REAL_16 *a; + GFC_REAL_16 *b; + + other_shared_ptr = this_shared_ptr + num_elems * imoffset; + for (index_type i = 0; i < num_elems; i++) + { + a = this_shared_ptr + i; + b = other_shared_ptr + i; + *a += *b; + } + } + collsub_sync (ci); + } + for ( ; (local->num_images >> cbit) != 0; cbit++) + collsub_sync (ci); + + if (!result_image || (*result_image - 1) == this_image.image_num) + { + if (packed) + memcpy (array->base_addr, buffer, ssize); + else + { + GFC_REAL_16 *src = buffer; + char * restrict dest = (char *) array->base_addr; + index_type stride0 = stride[0]; + + for (index_type n = 0; n < dim; n++) + count[n] = 0; + + while (dest) + { + *((GFC_REAL_16 * ) dest) = *src++; + dest += stride0; + count[0] ++; + for (index_type n = 0; count[n] == extent[n] ;) + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + count[n] = 0; + dest -= stride[n] * extent[n]; + n++; + if (n == dim) + { + dest = NULL; + break; + } + else + { + count[n]++; + dest += stride[n]; + } + } + } + } + } + finish_collective_subroutine (ci); +} + +#endif + diff --git a/libgfortran/generated/nca_minmax_r4.c b/libgfortran/generated/nca_minmax_r4.c new file mode 100644 index 0000000..0eb3f1b --- /dev/null +++ b/libgfortran/generated/nca_minmax_r4.c @@ -0,0 +1,653 @@ +/* Implementation of collective subroutines minmax. + Copyright (C) 2020 Free Software Foundation, Inc. + Contributed by Thomas Koenig <tkoenig@gcc.gnu.org>. + +This file is part of the GNU Fortran runtime library (libgfortran). + +Libgfortran is free software; you can redistribute it and/or +modify it under the terms of the GNU General Public +License as published by the Free Software Foundation; either +version 3 of the License, or (at your option) any later version. + +Libgfortran is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +<http://www.gnu.org/licenses/>. */ + + +#include "libgfortran.h" + +#if defined (HAVE_GFC_REAL_4) +#include <string.h> +#include "../nca/libcoarraynative.h" +#include "../nca/collective_subroutine.h" +#include "../nca/collective_inline.h" + +void nca_collsub_max_scalar_r4 (GFC_REAL_4 *obj, int *result_image, + int *stat, char *errmsg, index_type errmsg_len); +export_proto(nca_collsub_max_scalar_r4); + +void +nca_collsub_max_scalar_r4 (GFC_REAL_4 *obj, int *result_image, + int *stat __attribute__ ((unused)), + char *errmsg __attribute__ ((unused)), + index_type errmsg_len __attribute__ ((unused))) +{ + int cbit = 0; + int imoffset; + GFC_REAL_4 *a, *b; + GFC_REAL_4 *buffer, *this_image_buf; + collsub_iface *ci; + + ci = &local->ci; + + buffer = get_collsub_buf (ci, sizeof(GFC_REAL_4) * local->num_images); + this_image_buf = buffer + this_image.image_num; + *this_image_buf = *obj; + + collsub_sync (ci); + for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++) + { + imoffset = 1 << cbit; + if (this_image.image_num + imoffset < local->num_images) + { + a = this_image_buf; + b = this_image_buf + imoffset; + if (*b > *a) + *a = *b; + } + collsub_sync (ci); + } + for ( ; (local->num_images >> cbit) != 0; cbit++) + collsub_sync (ci); + + if (!result_image || (*result_image - 1) == this_image.image_num) + *obj = *buffer; + + finish_collective_subroutine (ci); + +} + +void nca_collsub_min_scalar_r4 (GFC_REAL_4 *obj, int *result_image, + int *stat, char *errmsg, index_type errmsg_len); +export_proto(nca_collsub_min_scalar_r4); + +void +nca_collsub_min_scalar_r4 (GFC_REAL_4 *obj, int *result_image, + int *stat __attribute__ ((unused)), + char *errmsg __attribute__ ((unused)), + index_type errmsg_len __attribute__ ((unused))) +{ + int cbit = 0; + int imoffset; + GFC_REAL_4 *a, *b; + GFC_REAL_4 *buffer, *this_image_buf; + collsub_iface *ci; + + ci = &local->ci; + + buffer = get_collsub_buf (ci, sizeof(GFC_REAL_4) * local->num_images); + this_image_buf = buffer + this_image.image_num; + *this_image_buf = *obj; + + collsub_sync (ci); + for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++) + { + imoffset = 1 << cbit; + if (this_image.image_num + imoffset < local->num_images) + { + a = this_image_buf; + b = this_image_buf + imoffset; + if (*b < *a) + *a = *b; + } + collsub_sync (ci); + } + for ( ; (local->num_images >> cbit) != 0; cbit++) + collsub_sync (ci); + + if (!result_image || (*result_image - 1) == this_image.image_num) + *obj = *buffer; + + finish_collective_subroutine (ci); + +} + +void nca_collsub_sum_scalar_r4 (GFC_REAL_4 *obj, int *result_image, + int *stat, char *errmsg, index_type errmsg_len); +export_proto(nca_collsub_sum_scalar_r4); + +void +nca_collsub_sum_scalar_r4 (GFC_REAL_4 *obj, int *result_image, + int *stat __attribute__ ((unused)), + char *errmsg __attribute__ ((unused)), + index_type errmsg_len __attribute__ ((unused))) +{ + int cbit = 0; + int imoffset; + GFC_REAL_4 *a, *b; + GFC_REAL_4 *buffer, *this_image_buf; + collsub_iface *ci; + + ci = &local->ci; + + buffer = get_collsub_buf (ci, sizeof(GFC_REAL_4) * local->num_images); + this_image_buf = buffer + this_image.image_num; + *this_image_buf = *obj; + + collsub_sync (ci); + for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++) + { + imoffset = 1 << cbit; + if (this_image.image_num + imoffset < local->num_images) + { + a = this_image_buf; + b = this_image_buf + imoffset; + *a += *b; + } + collsub_sync (ci); + } + for ( ; (local->num_images >> cbit) != 0; cbit++) + collsub_sync (ci); + + if (!result_image || (*result_image - 1) == this_image.image_num) + *obj = *buffer; + + finish_collective_subroutine (ci); + +} + +void nca_collsub_max_array_r4 (gfc_array_r4 * restrict array, int *result_image, + int *stat, char *errmsg, index_type errmsg_len); +export_proto (nca_collsub_max_array_r4); + +void +nca_collsub_max_array_r4 (gfc_array_r4 * restrict array, int *result_image, + int *stat __attribute__ ((unused)), + char *errmsg __attribute__ ((unused)), + index_type errmsg_len __attribute__ ((unused))) +{ + index_type count[GFC_MAX_DIMENSIONS]; + index_type stride[GFC_MAX_DIMENSIONS]; + index_type extent[GFC_MAX_DIMENSIONS]; + GFC_REAL_4 *this_shared_ptr; /* Points to the shared memory allocated to this image. */ + GFC_REAL_4 *buffer; + index_type dim; + bool packed; + index_type span; + index_type ssize, num_elems; + int cbit = 0; + int imoffset; + collsub_iface *ci; + + ci = &local->ci; + + dim = GFC_DESCRIPTOR_RANK (array); + ssize = sizeof (GFC_REAL_4); + packed = true; + span = array->span != 0 ? array->span : (index_type) sizeof (GFC_REAL_4); + for (index_type n = 0; n < dim; n++) + { + count[n] = 0; + stride[n] = GFC_DESCRIPTOR_STRIDE (array, n) * span; + extent[n] = GFC_DESCRIPTOR_EXTENT (array, n); + + /* No-op for an empty array. */ + if (extent[n] <= 0) + return; + + if (ssize != stride[n]) + packed = false; + + ssize *= extent[n]; + } + + num_elems = ssize / sizeof (GFC_REAL_4); + + buffer = get_collsub_buf (ci, ssize * local->num_images); + this_shared_ptr = buffer + this_image.image_num * num_elems; + + if (packed) + memcpy (this_shared_ptr, array->base_addr, ssize); + else + { + char *src = (char *) array->base_addr; + GFC_REAL_4 *restrict dest = this_shared_ptr; + index_type stride0 = stride[0]; + + while (src) + { + /* Copy the data. */ + *(dest++) = *((GFC_REAL_4 *) src); + src += stride0; + count[0] ++; + /* Advance to the next source element. */ + for (index_type n = 0; count[n] == extent[n] ; ) + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + count[n] = 0; + src -= stride[n] * extent[n]; + n++; + if (n == dim) + { + src = NULL; + break; + } + else + { + count[n]++; + src += stride[n]; + } + } + } + } + + collsub_sync (ci); + + /* Reduce the array to image zero. Here the general scheme: + + abababababab + a_b_a_b_a_b_ + a___b___a___ + a_______b___ + r___________ + */ + for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++) + { + imoffset = 1 << cbit; + if (this_image.image_num + imoffset < local->num_images) + { + GFC_REAL_4 * other_shared_ptr; /* Points to the shared memory + allocated to another image. */ + GFC_REAL_4 *a; + GFC_REAL_4 *b; + + other_shared_ptr = this_shared_ptr + num_elems * imoffset; + for (index_type i = 0; i < num_elems; i++) + { + a = this_shared_ptr + i; + b = other_shared_ptr + i; + if (*b > *a) + *a = *b; + } + } + collsub_sync (ci); + } + for ( ; (local->num_images >> cbit) != 0; cbit++) + collsub_sync (ci); + + if (!result_image || (*result_image - 1) == this_image.image_num) + { + if (packed) + memcpy (array->base_addr, buffer, ssize); + else + { + GFC_REAL_4 *src = buffer; + char * restrict dest = (char *) array->base_addr; + index_type stride0 = stride[0]; + + for (index_type n = 0; n < dim; n++) + count[n] = 0; + + while (dest) + { + *((GFC_REAL_4 * ) dest) = *src++; + dest += stride0; + count[0] ++; + for (index_type n = 0; count[n] == extent[n] ;) + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + count[n] = 0; + dest -= stride[n] * extent[n]; + n++; + if (n == dim) + { + dest = NULL; + break; + } + else + { + count[n]++; + dest += stride[n]; + } + } + } + } + } + finish_collective_subroutine (ci); +} +void nca_collsub_min_array_r4 (gfc_array_r4 * restrict array, int *result_image, + int *stat, char *errmsg, index_type errmsg_len); +export_proto (nca_collsub_min_array_r4); + +void +nca_collsub_min_array_r4 (gfc_array_r4 * restrict array, int *result_image, + int *stat __attribute__ ((unused)), + char *errmsg __attribute__ ((unused)), + index_type errmsg_len __attribute__ ((unused))) +{ + index_type count[GFC_MAX_DIMENSIONS]; + index_type stride[GFC_MAX_DIMENSIONS]; + index_type extent[GFC_MAX_DIMENSIONS]; + GFC_REAL_4 *this_shared_ptr; /* Points to the shared memory allocated to this image. */ + GFC_REAL_4 *buffer; + index_type dim; + bool packed; + index_type span; + index_type ssize, num_elems; + int cbit = 0; + int imoffset; + collsub_iface *ci; + + ci = &local->ci; + + dim = GFC_DESCRIPTOR_RANK (array); + ssize = sizeof (GFC_REAL_4); + packed = true; + span = array->span != 0 ? array->span : (index_type) sizeof (GFC_REAL_4); + for (index_type n = 0; n < dim; n++) + { + count[n] = 0; + stride[n] = GFC_DESCRIPTOR_STRIDE (array, n) * span; + extent[n] = GFC_DESCRIPTOR_EXTENT (array, n); + + /* No-op for an empty array. */ + if (extent[n] <= 0) + return; + + if (ssize != stride[n]) + packed = false; + + ssize *= extent[n]; + } + + num_elems = ssize / sizeof (GFC_REAL_4); + + buffer = get_collsub_buf (ci, ssize * local->num_images); + this_shared_ptr = buffer + this_image.image_num * num_elems; + + if (packed) + memcpy (this_shared_ptr, array->base_addr, ssize); + else + { + char *src = (char *) array->base_addr; + GFC_REAL_4 *restrict dest = this_shared_ptr; + index_type stride0 = stride[0]; + + while (src) + { + /* Copy the data. */ + *(dest++) = *((GFC_REAL_4 *) src); + src += stride0; + count[0] ++; + /* Advance to the next source element. */ + for (index_type n = 0; count[n] == extent[n] ; ) + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + count[n] = 0; + src -= stride[n] * extent[n]; + n++; + if (n == dim) + { + src = NULL; + break; + } + else + { + count[n]++; + src += stride[n]; + } + } + } + } + + collsub_sync (ci); + + /* Reduce the array to image zero. Here the general scheme: + + abababababab + a_b_a_b_a_b_ + a___b___a___ + a_______b___ + r___________ + */ + for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++) + { + imoffset = 1 << cbit; + if (this_image.image_num + imoffset < local->num_images) + { + GFC_REAL_4 * other_shared_ptr; /* Points to the shared memory + allocated to another image. */ + GFC_REAL_4 *a; + GFC_REAL_4 *b; + + other_shared_ptr = this_shared_ptr + num_elems * imoffset; + for (index_type i = 0; i < num_elems; i++) + { + a = this_shared_ptr + i; + b = other_shared_ptr + i; + if (*b < *a) + *a = *b; + } + } + collsub_sync (ci); + } + for ( ; (local->num_images >> cbit) != 0; cbit++) + collsub_sync (ci); + + if (!result_image || (*result_image - 1) == this_image.image_num) + { + if (packed) + memcpy (array->base_addr, buffer, ssize); + else + { + GFC_REAL_4 *src = buffer; + char * restrict dest = (char *) array->base_addr; + index_type stride0 = stride[0]; + + for (index_type n = 0; n < dim; n++) + count[n] = 0; + + while (dest) + { + *((GFC_REAL_4 * ) dest) = *src++; + dest += stride0; + count[0] ++; + for (index_type n = 0; count[n] == extent[n] ;) + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + count[n] = 0; + dest -= stride[n] * extent[n]; + n++; + if (n == dim) + { + dest = NULL; + break; + } + else + { + count[n]++; + dest += stride[n]; + } + } + } + } + } + finish_collective_subroutine (ci); +} +void nca_collsub_sum_array_r4 (gfc_array_r4 * restrict array, int *result_image, + int *stat, char *errmsg, index_type errmsg_len); +export_proto (nca_collsub_sum_array_r4); + +void +nca_collsub_sum_array_r4 (gfc_array_r4 * restrict array, int *result_image, + int *stat __attribute__ ((unused)), + char *errmsg __attribute__ ((unused)), + index_type errmsg_len __attribute__ ((unused))) +{ + index_type count[GFC_MAX_DIMENSIONS]; + index_type stride[GFC_MAX_DIMENSIONS]; + index_type extent[GFC_MAX_DIMENSIONS]; + GFC_REAL_4 *this_shared_ptr; /* Points to the shared memory allocated to this image. */ + GFC_REAL_4 *buffer; + index_type dim; + bool packed; + index_type span; + index_type ssize, num_elems; + int cbit = 0; + int imoffset; + collsub_iface *ci; + + ci = &local->ci; + + dim = GFC_DESCRIPTOR_RANK (array); + ssize = sizeof (GFC_REAL_4); + packed = true; + span = array->span != 0 ? array->span : (index_type) sizeof (GFC_REAL_4); + for (index_type n = 0; n < dim; n++) + { + count[n] = 0; + stride[n] = GFC_DESCRIPTOR_STRIDE (array, n) * span; + extent[n] = GFC_DESCRIPTOR_EXTENT (array, n); + + /* No-op for an empty array. */ + if (extent[n] <= 0) + return; + + if (ssize != stride[n]) + packed = false; + + ssize *= extent[n]; + } + + num_elems = ssize / sizeof (GFC_REAL_4); + + buffer = get_collsub_buf (ci, ssize * local->num_images); + this_shared_ptr = buffer + this_image.image_num * num_elems; + + if (packed) + memcpy (this_shared_ptr, array->base_addr, ssize); + else + { + char *src = (char *) array->base_addr; + GFC_REAL_4 *restrict dest = this_shared_ptr; + index_type stride0 = stride[0]; + + while (src) + { + /* Copy the data. */ + *(dest++) = *((GFC_REAL_4 *) src); + src += stride0; + count[0] ++; + /* Advance to the next source element. */ + for (index_type n = 0; count[n] == extent[n] ; ) + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + count[n] = 0; + src -= stride[n] * extent[n]; + n++; + if (n == dim) + { + src = NULL; + break; + } + else + { + count[n]++; + src += stride[n]; + } + } + } + } + + collsub_sync (ci); + + /* Reduce the array to image zero. Here the general scheme: + + abababababab + a_b_a_b_a_b_ + a___b___a___ + a_______b___ + r___________ + */ + for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++) + { + imoffset = 1 << cbit; + if (this_image.image_num + imoffset < local->num_images) + { + GFC_REAL_4 * other_shared_ptr; /* Points to the shared memory + allocated to another image. */ + GFC_REAL_4 *a; + GFC_REAL_4 *b; + + other_shared_ptr = this_shared_ptr + num_elems * imoffset; + for (index_type i = 0; i < num_elems; i++) + { + a = this_shared_ptr + i; + b = other_shared_ptr + i; + *a += *b; + } + } + collsub_sync (ci); + } + for ( ; (local->num_images >> cbit) != 0; cbit++) + collsub_sync (ci); + + if (!result_image || (*result_image - 1) == this_image.image_num) + { + if (packed) + memcpy (array->base_addr, buffer, ssize); + else + { + GFC_REAL_4 *src = buffer; + char * restrict dest = (char *) array->base_addr; + index_type stride0 = stride[0]; + + for (index_type n = 0; n < dim; n++) + count[n] = 0; + + while (dest) + { + *((GFC_REAL_4 * ) dest) = *src++; + dest += stride0; + count[0] ++; + for (index_type n = 0; count[n] == extent[n] ;) + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + count[n] = 0; + dest -= stride[n] * extent[n]; + n++; + if (n == dim) + { + dest = NULL; + break; + } + else + { + count[n]++; + dest += stride[n]; + } + } + } + } + } + finish_collective_subroutine (ci); +} + +#endif + diff --git a/libgfortran/generated/nca_minmax_r8.c b/libgfortran/generated/nca_minmax_r8.c new file mode 100644 index 0000000..3b3e962 --- /dev/null +++ b/libgfortran/generated/nca_minmax_r8.c @@ -0,0 +1,653 @@ +/* Implementation of collective subroutines minmax. + Copyright (C) 2020 Free Software Foundation, Inc. + Contributed by Thomas Koenig <tkoenig@gcc.gnu.org>. + +This file is part of the GNU Fortran runtime library (libgfortran). + +Libgfortran is free software; you can redistribute it and/or +modify it under the terms of the GNU General Public +License as published by the Free Software Foundation; either +version 3 of the License, or (at your option) any later version. + +Libgfortran is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +<http://www.gnu.org/licenses/>. */ + + +#include "libgfortran.h" + +#if defined (HAVE_GFC_REAL_8) +#include <string.h> +#include "../nca/libcoarraynative.h" +#include "../nca/collective_subroutine.h" +#include "../nca/collective_inline.h" + +void nca_collsub_max_scalar_r8 (GFC_REAL_8 *obj, int *result_image, + int *stat, char *errmsg, index_type errmsg_len); +export_proto(nca_collsub_max_scalar_r8); + +void +nca_collsub_max_scalar_r8 (GFC_REAL_8 *obj, int *result_image, + int *stat __attribute__ ((unused)), + char *errmsg __attribute__ ((unused)), + index_type errmsg_len __attribute__ ((unused))) +{ + int cbit = 0; + int imoffset; + GFC_REAL_8 *a, *b; + GFC_REAL_8 *buffer, *this_image_buf; + collsub_iface *ci; + + ci = &local->ci; + + buffer = get_collsub_buf (ci, sizeof(GFC_REAL_8) * local->num_images); + this_image_buf = buffer + this_image.image_num; + *this_image_buf = *obj; + + collsub_sync (ci); + for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++) + { + imoffset = 1 << cbit; + if (this_image.image_num + imoffset < local->num_images) + { + a = this_image_buf; + b = this_image_buf + imoffset; + if (*b > *a) + *a = *b; + } + collsub_sync (ci); + } + for ( ; (local->num_images >> cbit) != 0; cbit++) + collsub_sync (ci); + + if (!result_image || (*result_image - 1) == this_image.image_num) + *obj = *buffer; + + finish_collective_subroutine (ci); + +} + +void nca_collsub_min_scalar_r8 (GFC_REAL_8 *obj, int *result_image, + int *stat, char *errmsg, index_type errmsg_len); +export_proto(nca_collsub_min_scalar_r8); + +void +nca_collsub_min_scalar_r8 (GFC_REAL_8 *obj, int *result_image, + int *stat __attribute__ ((unused)), + char *errmsg __attribute__ ((unused)), + index_type errmsg_len __attribute__ ((unused))) +{ + int cbit = 0; + int imoffset; + GFC_REAL_8 *a, *b; + GFC_REAL_8 *buffer, *this_image_buf; + collsub_iface *ci; + + ci = &local->ci; + + buffer = get_collsub_buf (ci, sizeof(GFC_REAL_8) * local->num_images); + this_image_buf = buffer + this_image.image_num; + *this_image_buf = *obj; + + collsub_sync (ci); + for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++) + { + imoffset = 1 << cbit; + if (this_image.image_num + imoffset < local->num_images) + { + a = this_image_buf; + b = this_image_buf + imoffset; + if (*b < *a) + *a = *b; + } + collsub_sync (ci); + } + for ( ; (local->num_images >> cbit) != 0; cbit++) + collsub_sync (ci); + + if (!result_image || (*result_image - 1) == this_image.image_num) + *obj = *buffer; + + finish_collective_subroutine (ci); + +} + +void nca_collsub_sum_scalar_r8 (GFC_REAL_8 *obj, int *result_image, + int *stat, char *errmsg, index_type errmsg_len); +export_proto(nca_collsub_sum_scalar_r8); + +void +nca_collsub_sum_scalar_r8 (GFC_REAL_8 *obj, int *result_image, + int *stat __attribute__ ((unused)), + char *errmsg __attribute__ ((unused)), + index_type errmsg_len __attribute__ ((unused))) +{ + int cbit = 0; + int imoffset; + GFC_REAL_8 *a, *b; + GFC_REAL_8 *buffer, *this_image_buf; + collsub_iface *ci; + + ci = &local->ci; + + buffer = get_collsub_buf (ci, sizeof(GFC_REAL_8) * local->num_images); + this_image_buf = buffer + this_image.image_num; + *this_image_buf = *obj; + + collsub_sync (ci); + for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++) + { + imoffset = 1 << cbit; + if (this_image.image_num + imoffset < local->num_images) + { + a = this_image_buf; + b = this_image_buf + imoffset; + *a += *b; + } + collsub_sync (ci); + } + for ( ; (local->num_images >> cbit) != 0; cbit++) + collsub_sync (ci); + + if (!result_image || (*result_image - 1) == this_image.image_num) + *obj = *buffer; + + finish_collective_subroutine (ci); + +} + +void nca_collsub_max_array_r8 (gfc_array_r8 * restrict array, int *result_image, + int *stat, char *errmsg, index_type errmsg_len); +export_proto (nca_collsub_max_array_r8); + +void +nca_collsub_max_array_r8 (gfc_array_r8 * restrict array, int *result_image, + int *stat __attribute__ ((unused)), + char *errmsg __attribute__ ((unused)), + index_type errmsg_len __attribute__ ((unused))) +{ + index_type count[GFC_MAX_DIMENSIONS]; + index_type stride[GFC_MAX_DIMENSIONS]; + index_type extent[GFC_MAX_DIMENSIONS]; + GFC_REAL_8 *this_shared_ptr; /* Points to the shared memory allocated to this image. */ + GFC_REAL_8 *buffer; + index_type dim; + bool packed; + index_type span; + index_type ssize, num_elems; + int cbit = 0; + int imoffset; + collsub_iface *ci; + + ci = &local->ci; + + dim = GFC_DESCRIPTOR_RANK (array); + ssize = sizeof (GFC_REAL_8); + packed = true; + span = array->span != 0 ? array->span : (index_type) sizeof (GFC_REAL_8); + for (index_type n = 0; n < dim; n++) + { + count[n] = 0; + stride[n] = GFC_DESCRIPTOR_STRIDE (array, n) * span; + extent[n] = GFC_DESCRIPTOR_EXTENT (array, n); + + /* No-op for an empty array. */ + if (extent[n] <= 0) + return; + + if (ssize != stride[n]) + packed = false; + + ssize *= extent[n]; + } + + num_elems = ssize / sizeof (GFC_REAL_8); + + buffer = get_collsub_buf (ci, ssize * local->num_images); + this_shared_ptr = buffer + this_image.image_num * num_elems; + + if (packed) + memcpy (this_shared_ptr, array->base_addr, ssize); + else + { + char *src = (char *) array->base_addr; + GFC_REAL_8 *restrict dest = this_shared_ptr; + index_type stride0 = stride[0]; + + while (src) + { + /* Copy the data. */ + *(dest++) = *((GFC_REAL_8 *) src); + src += stride0; + count[0] ++; + /* Advance to the next source element. */ + for (index_type n = 0; count[n] == extent[n] ; ) + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + count[n] = 0; + src -= stride[n] * extent[n]; + n++; + if (n == dim) + { + src = NULL; + break; + } + else + { + count[n]++; + src += stride[n]; + } + } + } + } + + collsub_sync (ci); + + /* Reduce the array to image zero. Here the general scheme: + + abababababab + a_b_a_b_a_b_ + a___b___a___ + a_______b___ + r___________ + */ + for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++) + { + imoffset = 1 << cbit; + if (this_image.image_num + imoffset < local->num_images) + { + GFC_REAL_8 * other_shared_ptr; /* Points to the shared memory + allocated to another image. */ + GFC_REAL_8 *a; + GFC_REAL_8 *b; + + other_shared_ptr = this_shared_ptr + num_elems * imoffset; + for (index_type i = 0; i < num_elems; i++) + { + a = this_shared_ptr + i; + b = other_shared_ptr + i; + if (*b > *a) + *a = *b; + } + } + collsub_sync (ci); + } + for ( ; (local->num_images >> cbit) != 0; cbit++) + collsub_sync (ci); + + if (!result_image || (*result_image - 1) == this_image.image_num) + { + if (packed) + memcpy (array->base_addr, buffer, ssize); + else + { + GFC_REAL_8 *src = buffer; + char * restrict dest = (char *) array->base_addr; + index_type stride0 = stride[0]; + + for (index_type n = 0; n < dim; n++) + count[n] = 0; + + while (dest) + { + *((GFC_REAL_8 * ) dest) = *src++; + dest += stride0; + count[0] ++; + for (index_type n = 0; count[n] == extent[n] ;) + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + count[n] = 0; + dest -= stride[n] * extent[n]; + n++; + if (n == dim) + { + dest = NULL; + break; + } + else + { + count[n]++; + dest += stride[n]; + } + } + } + } + } + finish_collective_subroutine (ci); +} +void nca_collsub_min_array_r8 (gfc_array_r8 * restrict array, int *result_image, + int *stat, char *errmsg, index_type errmsg_len); +export_proto (nca_collsub_min_array_r8); + +void +nca_collsub_min_array_r8 (gfc_array_r8 * restrict array, int *result_image, + int *stat __attribute__ ((unused)), + char *errmsg __attribute__ ((unused)), + index_type errmsg_len __attribute__ ((unused))) +{ + index_type count[GFC_MAX_DIMENSIONS]; + index_type stride[GFC_MAX_DIMENSIONS]; + index_type extent[GFC_MAX_DIMENSIONS]; + GFC_REAL_8 *this_shared_ptr; /* Points to the shared memory allocated to this image. */ + GFC_REAL_8 *buffer; + index_type dim; + bool packed; + index_type span; + index_type ssize, num_elems; + int cbit = 0; + int imoffset; + collsub_iface *ci; + + ci = &local->ci; + + dim = GFC_DESCRIPTOR_RANK (array); + ssize = sizeof (GFC_REAL_8); + packed = true; + span = array->span != 0 ? array->span : (index_type) sizeof (GFC_REAL_8); + for (index_type n = 0; n < dim; n++) + { + count[n] = 0; + stride[n] = GFC_DESCRIPTOR_STRIDE (array, n) * span; + extent[n] = GFC_DESCRIPTOR_EXTENT (array, n); + + /* No-op for an empty array. */ + if (extent[n] <= 0) + return; + + if (ssize != stride[n]) + packed = false; + + ssize *= extent[n]; + } + + num_elems = ssize / sizeof (GFC_REAL_8); + + buffer = get_collsub_buf (ci, ssize * local->num_images); + this_shared_ptr = buffer + this_image.image_num * num_elems; + + if (packed) + memcpy (this_shared_ptr, array->base_addr, ssize); + else + { + char *src = (char *) array->base_addr; + GFC_REAL_8 *restrict dest = this_shared_ptr; + index_type stride0 = stride[0]; + + while (src) + { + /* Copy the data. */ + *(dest++) = *((GFC_REAL_8 *) src); + src += stride0; + count[0] ++; + /* Advance to the next source element. */ + for (index_type n = 0; count[n] == extent[n] ; ) + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + count[n] = 0; + src -= stride[n] * extent[n]; + n++; + if (n == dim) + { + src = NULL; + break; + } + else + { + count[n]++; + src += stride[n]; + } + } + } + } + + collsub_sync (ci); + + /* Reduce the array to image zero. Here the general scheme: + + abababababab + a_b_a_b_a_b_ + a___b___a___ + a_______b___ + r___________ + */ + for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++) + { + imoffset = 1 << cbit; + if (this_image.image_num + imoffset < local->num_images) + { + GFC_REAL_8 * other_shared_ptr; /* Points to the shared memory + allocated to another image. */ + GFC_REAL_8 *a; + GFC_REAL_8 *b; + + other_shared_ptr = this_shared_ptr + num_elems * imoffset; + for (index_type i = 0; i < num_elems; i++) + { + a = this_shared_ptr + i; + b = other_shared_ptr + i; + if (*b < *a) + *a = *b; + } + } + collsub_sync (ci); + } + for ( ; (local->num_images >> cbit) != 0; cbit++) + collsub_sync (ci); + + if (!result_image || (*result_image - 1) == this_image.image_num) + { + if (packed) + memcpy (array->base_addr, buffer, ssize); + else + { + GFC_REAL_8 *src = buffer; + char * restrict dest = (char *) array->base_addr; + index_type stride0 = stride[0]; + + for (index_type n = 0; n < dim; n++) + count[n] = 0; + + while (dest) + { + *((GFC_REAL_8 * ) dest) = *src++; + dest += stride0; + count[0] ++; + for (index_type n = 0; count[n] == extent[n] ;) + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + count[n] = 0; + dest -= stride[n] * extent[n]; + n++; + if (n == dim) + { + dest = NULL; + break; + } + else + { + count[n]++; + dest += stride[n]; + } + } + } + } + } + finish_collective_subroutine (ci); +} +void nca_collsub_sum_array_r8 (gfc_array_r8 * restrict array, int *result_image, + int *stat, char *errmsg, index_type errmsg_len); +export_proto (nca_collsub_sum_array_r8); + +void +nca_collsub_sum_array_r8 (gfc_array_r8 * restrict array, int *result_image, + int *stat __attribute__ ((unused)), + char *errmsg __attribute__ ((unused)), + index_type errmsg_len __attribute__ ((unused))) +{ + index_type count[GFC_MAX_DIMENSIONS]; + index_type stride[GFC_MAX_DIMENSIONS]; + index_type extent[GFC_MAX_DIMENSIONS]; + GFC_REAL_8 *this_shared_ptr; /* Points to the shared memory allocated to this image. */ + GFC_REAL_8 *buffer; + index_type dim; + bool packed; + index_type span; + index_type ssize, num_elems; + int cbit = 0; + int imoffset; + collsub_iface *ci; + + ci = &local->ci; + + dim = GFC_DESCRIPTOR_RANK (array); + ssize = sizeof (GFC_REAL_8); + packed = true; + span = array->span != 0 ? array->span : (index_type) sizeof (GFC_REAL_8); + for (index_type n = 0; n < dim; n++) + { + count[n] = 0; + stride[n] = GFC_DESCRIPTOR_STRIDE (array, n) * span; + extent[n] = GFC_DESCRIPTOR_EXTENT (array, n); + + /* No-op for an empty array. */ + if (extent[n] <= 0) + return; + + if (ssize != stride[n]) + packed = false; + + ssize *= extent[n]; + } + + num_elems = ssize / sizeof (GFC_REAL_8); + + buffer = get_collsub_buf (ci, ssize * local->num_images); + this_shared_ptr = buffer + this_image.image_num * num_elems; + + if (packed) + memcpy (this_shared_ptr, array->base_addr, ssize); + else + { + char *src = (char *) array->base_addr; + GFC_REAL_8 *restrict dest = this_shared_ptr; + index_type stride0 = stride[0]; + + while (src) + { + /* Copy the data. */ + *(dest++) = *((GFC_REAL_8 *) src); + src += stride0; + count[0] ++; + /* Advance to the next source element. */ + for (index_type n = 0; count[n] == extent[n] ; ) + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + count[n] = 0; + src -= stride[n] * extent[n]; + n++; + if (n == dim) + { + src = NULL; + break; + } + else + { + count[n]++; + src += stride[n]; + } + } + } + } + + collsub_sync (ci); + + /* Reduce the array to image zero. Here the general scheme: + + abababababab + a_b_a_b_a_b_ + a___b___a___ + a_______b___ + r___________ + */ + for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++) + { + imoffset = 1 << cbit; + if (this_image.image_num + imoffset < local->num_images) + { + GFC_REAL_8 * other_shared_ptr; /* Points to the shared memory + allocated to another image. */ + GFC_REAL_8 *a; + GFC_REAL_8 *b; + + other_shared_ptr = this_shared_ptr + num_elems * imoffset; + for (index_type i = 0; i < num_elems; i++) + { + a = this_shared_ptr + i; + b = other_shared_ptr + i; + *a += *b; + } + } + collsub_sync (ci); + } + for ( ; (local->num_images >> cbit) != 0; cbit++) + collsub_sync (ci); + + if (!result_image || (*result_image - 1) == this_image.image_num) + { + if (packed) + memcpy (array->base_addr, buffer, ssize); + else + { + GFC_REAL_8 *src = buffer; + char * restrict dest = (char *) array->base_addr; + index_type stride0 = stride[0]; + + for (index_type n = 0; n < dim; n++) + count[n] = 0; + + while (dest) + { + *((GFC_REAL_8 * ) dest) = *src++; + dest += stride0; + count[0] ++; + for (index_type n = 0; count[n] == extent[n] ;) + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + count[n] = 0; + dest -= stride[n] * extent[n]; + n++; + if (n == dim) + { + dest = NULL; + break; + } + else + { + count[n]++; + dest += stride[n]; + } + } + } + } + } + finish_collective_subroutine (ci); +} + +#endif + diff --git a/libgfortran/generated/nca_minmax_s1.c b/libgfortran/generated/nca_minmax_s1.c new file mode 100644 index 0000000..b081452 --- /dev/null +++ b/libgfortran/generated/nca_minmax_s1.c @@ -0,0 +1,494 @@ +/* Implementation of collective subroutines minmax. + Copyright (C) 2020 Free Software Foundation, Inc. + Contributed by Thomas Koenig <tkoenig@gcc.gnu.org>. + +This file is part of the GNU Fortran runtime library (libgfortran). + +Libgfortran is free software; you can redistribute it and/or +modify it under the terms of the GNU General Public +License as published by the Free Software Foundation; either +version 3 of the License, or (at your option) any later version. + +Libgfortran is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +<http://www.gnu.org/licenses/>. */ + + + +#include "libgfortran.h" + +#if defined (HAVE_GFC_UINTEGER_1) +#include <string.h> +#include "../nca/libcoarraynative.h" +#include "../nca/collective_subroutine.h" +#include "../nca/collective_inline.h" + +#if 1 == 4 + +/* Compare wide character types, which are handled internally as + unsigned 4-byte integers. */ +static inline int +memcmp4 (const void *a, const void *b, size_t len) +{ + const GFC_UINTEGER_4 *pa = a; + const GFC_UINTEGER_4 *pb = b; + while (len-- > 0) + { + if (*pa != *pb) + return *pa < *pb ? -1 : 1; + pa ++; + pb ++; + } + return 0; +} + +#endif +void nca_collsub_max_scalar_s1 (GFC_UINTEGER_1 *obj, int *result_image, + int *stat, char *errmsg, index_type char_len, index_type errmsg_len); +export_proto(nca_collsub_max_scalar_s1); + +void +nca_collsub_max_scalar_s1 (GFC_UINTEGER_1 *obj, int *result_image, + int *stat __attribute__ ((unused)), + char *errmsg __attribute__ ((unused)), + index_type char_len, + index_type errmsg_len __attribute__ ((unused))) +{ + int cbit = 0; + int imoffset; + GFC_UINTEGER_1 *a, *b; + GFC_UINTEGER_1 *buffer, *this_image_buf; + collsub_iface *ci; + index_type type_size; + + ci = &local->ci; + + type_size = char_len * sizeof (GFC_UINTEGER_1); + buffer = get_collsub_buf (ci, type_size * local->num_images); + this_image_buf = buffer + this_image.image_num * char_len; + memcpy (this_image_buf, obj, type_size); + + collsub_sync (ci); + for (; ((this_image.image_num >> cbit) & 1) == 0 + && (local->num_images >> cbit) != 0; cbit++) + { + imoffset = 1 << cbit; + if (this_image.image_num + imoffset < local->num_images) + { + a = this_image_buf; + b = this_image_buf + imoffset * char_len; + if (memcmp (b, a, char_len) > 0) + memcpy (a, b, type_size); + } + collsub_sync (ci); + } + for ( ; (local->num_images >> cbit) != 0; cbit++) + collsub_sync (ci); + + if (!result_image || (*result_image - 1) == this_image.image_num) + memcpy (obj, buffer, type_size); + + finish_collective_subroutine (ci); + +} + +void nca_collsub_min_scalar_s1 (GFC_UINTEGER_1 *obj, int *result_image, + int *stat, char *errmsg, index_type char_len, index_type errmsg_len); +export_proto(nca_collsub_min_scalar_s1); + +void +nca_collsub_min_scalar_s1 (GFC_UINTEGER_1 *obj, int *result_image, + int *stat __attribute__ ((unused)), + char *errmsg __attribute__ ((unused)), + index_type char_len, + index_type errmsg_len __attribute__ ((unused))) +{ + int cbit = 0; + int imoffset; + GFC_UINTEGER_1 *a, *b; + GFC_UINTEGER_1 *buffer, *this_image_buf; + collsub_iface *ci; + index_type type_size; + + ci = &local->ci; + + type_size = char_len * sizeof (GFC_UINTEGER_1); + buffer = get_collsub_buf (ci, type_size * local->num_images); + this_image_buf = buffer + this_image.image_num * char_len; + memcpy (this_image_buf, obj, type_size); + + collsub_sync (ci); + for (; ((this_image.image_num >> cbit) & 1) == 0 + && (local->num_images >> cbit) != 0; cbit++) + { + imoffset = 1 << cbit; + if (this_image.image_num + imoffset < local->num_images) + { + a = this_image_buf; + b = this_image_buf + imoffset * char_len; + if (memcmp (b, a, char_len) < 0) + memcpy (a, b, type_size); + } + collsub_sync (ci); + } + for ( ; (local->num_images >> cbit) != 0; cbit++) + collsub_sync (ci); + + if (!result_image || (*result_image - 1) == this_image.image_num) + memcpy (obj, buffer, type_size); + + finish_collective_subroutine (ci); + +} + +void nca_collsub_max_array_s1 (gfc_array_s1 * restrict array, int *result_image, + int *stat, char *errmsg, index_type char_len, + index_type errmsg_len); +export_proto (nca_collsub_max_array_s1); + +void +nca_collsub_max_array_s1 (gfc_array_s1 * restrict array, int *result_image, + int *stat __attribute__ ((unused)), + char *errmsg __attribute__ ((unused)), + index_type char_len, + index_type errmsg_len __attribute__ ((unused))) +{ + index_type count[GFC_MAX_DIMENSIONS]; + index_type stride[GFC_MAX_DIMENSIONS]; /* stride is byte-based here. */ + index_type extent[GFC_MAX_DIMENSIONS]; + char *this_shared_ptr; /* Points to the shared memory allocated to this image. */ + char *buffer; + index_type dim; + bool packed; + index_type span; + index_type ssize, num_elems; + int cbit = 0; + int imoffset; + index_type type_size; + collsub_iface *ci; + + ci = &local->ci; + + type_size = char_len * sizeof (GFC_UINTEGER_1); + dim = GFC_DESCRIPTOR_RANK (array); + num_elems = 1; + ssize = type_size; + packed = true; + span = array->span != 0 ? array->span : type_size; + for (index_type n = 0; n < dim; n++) + { + count[n] = 0; + stride[n] = GFC_DESCRIPTOR_STRIDE (array, n) * span; + extent[n] = GFC_DESCRIPTOR_EXTENT (array, n); + + /* No-op for an empty array. */ + if (extent[n] <= 0) + return; + + if (num_elems != GFC_DESCRIPTOR_STRIDE (array,n)) + packed = false; + + num_elems *= extent[n]; + } + + ssize = num_elems * type_size; + buffer = get_collsub_buf (ci, ssize * local->num_images); + this_shared_ptr = buffer + this_image.image_num * ssize; + + if (packed) + memcpy (this_shared_ptr, array->base_addr, ssize); + else + { + char *src = (char *) array->base_addr; + char *restrict dest = this_shared_ptr; + index_type stride0 = stride[0]; + + while (src) + { + /* Copy the data. */ + + memcpy (dest, src, type_size); + dest += type_size; + src += stride0; + count[0] ++; + /* Advance to the next source element. */ + for (index_type n = 0; count[n] == extent[n] ; ) + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + count[n] = 0; + src -= stride[n] * extent[n]; + n++; + if (n == dim) + { + src = NULL; + break; + } + else + { + count[n]++; + src += stride[n]; + } + } + } + } + + collsub_sync (ci); + + /* Reduce the array to image zero. Here the general scheme: + + abababababab + a_b_a_b_a_b_ + a___b___a___ + a_______b___ + r___________ + */ + for (; ((this_image.image_num >> cbit) & 1) == 0 + && (local->num_images >> cbit) != 0; cbit++) + { + imoffset = 1 << cbit; + if (this_image.image_num + imoffset < local->num_images) + { + char *other_shared_ptr; /* Points to the shared memory + allocated to another image. */ + GFC_UINTEGER_1 *a; + GFC_UINTEGER_1 *b; + + other_shared_ptr = this_shared_ptr + imoffset * ssize; + for (index_type i = 0; i < num_elems; i++) + { + a = (GFC_UINTEGER_1 *) (this_shared_ptr + i * type_size); + b = (GFC_UINTEGER_1 *) (other_shared_ptr + i * type_size); + if (memcmp (b, a, char_len) > 0) + memcpy (a, b, type_size); + } + } + collsub_sync (ci); + } + for ( ; (local->num_images >> cbit) != 0; cbit++) + collsub_sync (ci); + + if (!result_image || (*result_image - 1) == this_image.image_num) + { + if (packed) + memcpy (array->base_addr, buffer, ssize); + else + { + char *src = buffer; + char *restrict dest = (char *) array->base_addr; + index_type stride0 = stride[0]; + + for (index_type n = 0; n < dim; n++) + count[n] = 0; + + while (dest) + { + memcpy (dest, src, type_size); + src += span; + dest += stride0; + count[0] ++; + for (index_type n = 0; count[n] == extent[n] ;) + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + count[n] = 0; + dest -= stride[n] * extent[n]; + n++; + if (n == dim) + { + dest = NULL; + break; + } + else + { + count[n]++; + dest += stride[n]; + } + } + } + } + } + finish_collective_subroutine (ci); +} +void nca_collsub_min_array_s1 (gfc_array_s1 * restrict array, int *result_image, + int *stat, char *errmsg, index_type char_len, + index_type errmsg_len); +export_proto (nca_collsub_min_array_s1); + +void +nca_collsub_min_array_s1 (gfc_array_s1 * restrict array, int *result_image, + int *stat __attribute__ ((unused)), + char *errmsg __attribute__ ((unused)), + index_type char_len, + index_type errmsg_len __attribute__ ((unused))) +{ + index_type count[GFC_MAX_DIMENSIONS]; + index_type stride[GFC_MAX_DIMENSIONS]; /* stride is byte-based here. */ + index_type extent[GFC_MAX_DIMENSIONS]; + char *this_shared_ptr; /* Points to the shared memory allocated to this image. */ + char *buffer; + index_type dim; + bool packed; + index_type span; + index_type ssize, num_elems; + int cbit = 0; + int imoffset; + index_type type_size; + collsub_iface *ci; + + ci = &local->ci; + + type_size = char_len * sizeof (GFC_UINTEGER_1); + dim = GFC_DESCRIPTOR_RANK (array); + num_elems = 1; + ssize = type_size; + packed = true; + span = array->span != 0 ? array->span : type_size; + for (index_type n = 0; n < dim; n++) + { + count[n] = 0; + stride[n] = GFC_DESCRIPTOR_STRIDE (array, n) * span; + extent[n] = GFC_DESCRIPTOR_EXTENT (array, n); + + /* No-op for an empty array. */ + if (extent[n] <= 0) + return; + + if (num_elems != GFC_DESCRIPTOR_STRIDE (array,n)) + packed = false; + + num_elems *= extent[n]; + } + + ssize = num_elems * type_size; + buffer = get_collsub_buf (ci, ssize * local->num_images); + this_shared_ptr = buffer + this_image.image_num * ssize; + + if (packed) + memcpy (this_shared_ptr, array->base_addr, ssize); + else + { + char *src = (char *) array->base_addr; + char *restrict dest = this_shared_ptr; + index_type stride0 = stride[0]; + + while (src) + { + /* Copy the data. */ + + memcpy (dest, src, type_size); + dest += type_size; + src += stride0; + count[0] ++; + /* Advance to the next source element. */ + for (index_type n = 0; count[n] == extent[n] ; ) + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + count[n] = 0; + src -= stride[n] * extent[n]; + n++; + if (n == dim) + { + src = NULL; + break; + } + else + { + count[n]++; + src += stride[n]; + } + } + } + } + + collsub_sync (ci); + + /* Reduce the array to image zero. Here the general scheme: + + abababababab + a_b_a_b_a_b_ + a___b___a___ + a_______b___ + r___________ + */ + for (; ((this_image.image_num >> cbit) & 1) == 0 + && (local->num_images >> cbit) != 0; cbit++) + { + imoffset = 1 << cbit; + if (this_image.image_num + imoffset < local->num_images) + { + char *other_shared_ptr; /* Points to the shared memory + allocated to another image. */ + GFC_UINTEGER_1 *a; + GFC_UINTEGER_1 *b; + + other_shared_ptr = this_shared_ptr + imoffset * ssize; + for (index_type i = 0; i < num_elems; i++) + { + a = (GFC_UINTEGER_1 *) (this_shared_ptr + i * type_size); + b = (GFC_UINTEGER_1 *) (other_shared_ptr + i * type_size); + if (memcmp (b, a, char_len) < 0) + memcpy (a, b, type_size); + } + } + collsub_sync (ci); + } + for ( ; (local->num_images >> cbit) != 0; cbit++) + collsub_sync (ci); + + if (!result_image || (*result_image - 1) == this_image.image_num) + { + if (packed) + memcpy (array->base_addr, buffer, ssize); + else + { + char *src = buffer; + char *restrict dest = (char *) array->base_addr; + index_type stride0 = stride[0]; + + for (index_type n = 0; n < dim; n++) + count[n] = 0; + + while (dest) + { + memcpy (dest, src, type_size); + src += span; + dest += stride0; + count[0] ++; + for (index_type n = 0; count[n] == extent[n] ;) + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + count[n] = 0; + dest -= stride[n] * extent[n]; + n++; + if (n == dim) + { + dest = NULL; + break; + } + else + { + count[n]++; + dest += stride[n]; + } + } + } + } + } + finish_collective_subroutine (ci); +} + +#endif + diff --git a/libgfortran/generated/nca_minmax_s4.c b/libgfortran/generated/nca_minmax_s4.c new file mode 100644 index 0000000..b202fda --- /dev/null +++ b/libgfortran/generated/nca_minmax_s4.c @@ -0,0 +1,494 @@ +/* Implementation of collective subroutines minmax. + Copyright (C) 2020 Free Software Foundation, Inc. + Contributed by Thomas Koenig <tkoenig@gcc.gnu.org>. + +This file is part of the GNU Fortran runtime library (libgfortran). + +Libgfortran is free software; you can redistribute it and/or +modify it under the terms of the GNU General Public +License as published by the Free Software Foundation; either +version 3 of the License, or (at your option) any later version. + +Libgfortran is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +<http://www.gnu.org/licenses/>. */ + + + +#include "libgfortran.h" + +#if defined (HAVE_GFC_UINTEGER_4) +#include <string.h> +#include "../nca/libcoarraynative.h" +#include "../nca/collective_subroutine.h" +#include "../nca/collective_inline.h" + +#if 4 == 4 + +/* Compare wide character types, which are handled internally as + unsigned 4-byte integers. */ +static inline int +memcmp4 (const void *a, const void *b, size_t len) +{ + const GFC_UINTEGER_4 *pa = a; + const GFC_UINTEGER_4 *pb = b; + while (len-- > 0) + { + if (*pa != *pb) + return *pa < *pb ? -1 : 1; + pa ++; + pb ++; + } + return 0; +} + +#endif +void nca_collsub_max_scalar_s4 (GFC_UINTEGER_4 *obj, int *result_image, + int *stat, char *errmsg, index_type char_len, index_type errmsg_len); +export_proto(nca_collsub_max_scalar_s4); + +void +nca_collsub_max_scalar_s4 (GFC_UINTEGER_4 *obj, int *result_image, + int *stat __attribute__ ((unused)), + char *errmsg __attribute__ ((unused)), + index_type char_len, + index_type errmsg_len __attribute__ ((unused))) +{ + int cbit = 0; + int imoffset; + GFC_UINTEGER_4 *a, *b; + GFC_UINTEGER_4 *buffer, *this_image_buf; + collsub_iface *ci; + index_type type_size; + + ci = &local->ci; + + type_size = char_len * sizeof (GFC_UINTEGER_4); + buffer = get_collsub_buf (ci, type_size * local->num_images); + this_image_buf = buffer + this_image.image_num * char_len; + memcpy (this_image_buf, obj, type_size); + + collsub_sync (ci); + for (; ((this_image.image_num >> cbit) & 1) == 0 + && (local->num_images >> cbit) != 0; cbit++) + { + imoffset = 1 << cbit; + if (this_image.image_num + imoffset < local->num_images) + { + a = this_image_buf; + b = this_image_buf + imoffset * char_len; + if (memcmp4 (b, a, char_len) > 0) + memcpy (a, b, type_size); + } + collsub_sync (ci); + } + for ( ; (local->num_images >> cbit) != 0; cbit++) + collsub_sync (ci); + + if (!result_image || (*result_image - 1) == this_image.image_num) + memcpy (obj, buffer, type_size); + + finish_collective_subroutine (ci); + +} + +void nca_collsub_min_scalar_s4 (GFC_UINTEGER_4 *obj, int *result_image, + int *stat, char *errmsg, index_type char_len, index_type errmsg_len); +export_proto(nca_collsub_min_scalar_s4); + +void +nca_collsub_min_scalar_s4 (GFC_UINTEGER_4 *obj, int *result_image, + int *stat __attribute__ ((unused)), + char *errmsg __attribute__ ((unused)), + index_type char_len, + index_type errmsg_len __attribute__ ((unused))) +{ + int cbit = 0; + int imoffset; + GFC_UINTEGER_4 *a, *b; + GFC_UINTEGER_4 *buffer, *this_image_buf; + collsub_iface *ci; + index_type type_size; + + ci = &local->ci; + + type_size = char_len * sizeof (GFC_UINTEGER_4); + buffer = get_collsub_buf (ci, type_size * local->num_images); + this_image_buf = buffer + this_image.image_num * char_len; + memcpy (this_image_buf, obj, type_size); + + collsub_sync (ci); + for (; ((this_image.image_num >> cbit) & 1) == 0 + && (local->num_images >> cbit) != 0; cbit++) + { + imoffset = 1 << cbit; + if (this_image.image_num + imoffset < local->num_images) + { + a = this_image_buf; + b = this_image_buf + imoffset * char_len; + if (memcmp4 (b, a, char_len) < 0) + memcpy (a, b, type_size); + } + collsub_sync (ci); + } + for ( ; (local->num_images >> cbit) != 0; cbit++) + collsub_sync (ci); + + if (!result_image || (*result_image - 1) == this_image.image_num) + memcpy (obj, buffer, type_size); + + finish_collective_subroutine (ci); + +} + +void nca_collsub_max_array_s4 (gfc_array_s4 * restrict array, int *result_image, + int *stat, char *errmsg, index_type char_len, + index_type errmsg_len); +export_proto (nca_collsub_max_array_s4); + +void +nca_collsub_max_array_s4 (gfc_array_s4 * restrict array, int *result_image, + int *stat __attribute__ ((unused)), + char *errmsg __attribute__ ((unused)), + index_type char_len, + index_type errmsg_len __attribute__ ((unused))) +{ + index_type count[GFC_MAX_DIMENSIONS]; + index_type stride[GFC_MAX_DIMENSIONS]; /* stride is byte-based here. */ + index_type extent[GFC_MAX_DIMENSIONS]; + char *this_shared_ptr; /* Points to the shared memory allocated to this image. */ + char *buffer; + index_type dim; + bool packed; + index_type span; + index_type ssize, num_elems; + int cbit = 0; + int imoffset; + index_type type_size; + collsub_iface *ci; + + ci = &local->ci; + + type_size = char_len * sizeof (GFC_UINTEGER_4); + dim = GFC_DESCRIPTOR_RANK (array); + num_elems = 1; + ssize = type_size; + packed = true; + span = array->span != 0 ? array->span : type_size; + for (index_type n = 0; n < dim; n++) + { + count[n] = 0; + stride[n] = GFC_DESCRIPTOR_STRIDE (array, n) * span; + extent[n] = GFC_DESCRIPTOR_EXTENT (array, n); + + /* No-op for an empty array. */ + if (extent[n] <= 0) + return; + + if (num_elems != GFC_DESCRIPTOR_STRIDE (array,n)) + packed = false; + + num_elems *= extent[n]; + } + + ssize = num_elems * type_size; + buffer = get_collsub_buf (ci, ssize * local->num_images); + this_shared_ptr = buffer + this_image.image_num * ssize; + + if (packed) + memcpy (this_shared_ptr, array->base_addr, ssize); + else + { + char *src = (char *) array->base_addr; + char *restrict dest = this_shared_ptr; + index_type stride0 = stride[0]; + + while (src) + { + /* Copy the data. */ + + memcpy (dest, src, type_size); + dest += type_size; + src += stride0; + count[0] ++; + /* Advance to the next source element. */ + for (index_type n = 0; count[n] == extent[n] ; ) + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + count[n] = 0; + src -= stride[n] * extent[n]; + n++; + if (n == dim) + { + src = NULL; + break; + } + else + { + count[n]++; + src += stride[n]; + } + } + } + } + + collsub_sync (ci); + + /* Reduce the array to image zero. Here the general scheme: + + abababababab + a_b_a_b_a_b_ + a___b___a___ + a_______b___ + r___________ + */ + for (; ((this_image.image_num >> cbit) & 1) == 0 + && (local->num_images >> cbit) != 0; cbit++) + { + imoffset = 1 << cbit; + if (this_image.image_num + imoffset < local->num_images) + { + char *other_shared_ptr; /* Points to the shared memory + allocated to another image. */ + GFC_UINTEGER_4 *a; + GFC_UINTEGER_4 *b; + + other_shared_ptr = this_shared_ptr + imoffset * ssize; + for (index_type i = 0; i < num_elems; i++) + { + a = (GFC_UINTEGER_4 *) (this_shared_ptr + i * type_size); + b = (GFC_UINTEGER_4 *) (other_shared_ptr + i * type_size); + if (memcmp4 (b, a, char_len) > 0) + memcpy (a, b, type_size); + } + } + collsub_sync (ci); + } + for ( ; (local->num_images >> cbit) != 0; cbit++) + collsub_sync (ci); + + if (!result_image || (*result_image - 1) == this_image.image_num) + { + if (packed) + memcpy (array->base_addr, buffer, ssize); + else + { + char *src = buffer; + char *restrict dest = (char *) array->base_addr; + index_type stride0 = stride[0]; + + for (index_type n = 0; n < dim; n++) + count[n] = 0; + + while (dest) + { + memcpy (dest, src, type_size); + src += span; + dest += stride0; + count[0] ++; + for (index_type n = 0; count[n] == extent[n] ;) + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + count[n] = 0; + dest -= stride[n] * extent[n]; + n++; + if (n == dim) + { + dest = NULL; + break; + } + else + { + count[n]++; + dest += stride[n]; + } + } + } + } + } + finish_collective_subroutine (ci); +} +void nca_collsub_min_array_s4 (gfc_array_s4 * restrict array, int *result_image, + int *stat, char *errmsg, index_type char_len, + index_type errmsg_len); +export_proto (nca_collsub_min_array_s4); + +void +nca_collsub_min_array_s4 (gfc_array_s4 * restrict array, int *result_image, + int *stat __attribute__ ((unused)), + char *errmsg __attribute__ ((unused)), + index_type char_len, + index_type errmsg_len __attribute__ ((unused))) +{ + index_type count[GFC_MAX_DIMENSIONS]; + index_type stride[GFC_MAX_DIMENSIONS]; /* stride is byte-based here. */ + index_type extent[GFC_MAX_DIMENSIONS]; + char *this_shared_ptr; /* Points to the shared memory allocated to this image. */ + char *buffer; + index_type dim; + bool packed; + index_type span; + index_type ssize, num_elems; + int cbit = 0; + int imoffset; + index_type type_size; + collsub_iface *ci; + + ci = &local->ci; + + type_size = char_len * sizeof (GFC_UINTEGER_4); + dim = GFC_DESCRIPTOR_RANK (array); + num_elems = 1; + ssize = type_size; + packed = true; + span = array->span != 0 ? array->span : type_size; + for (index_type n = 0; n < dim; n++) + { + count[n] = 0; + stride[n] = GFC_DESCRIPTOR_STRIDE (array, n) * span; + extent[n] = GFC_DESCRIPTOR_EXTENT (array, n); + + /* No-op for an empty array. */ + if (extent[n] <= 0) + return; + + if (num_elems != GFC_DESCRIPTOR_STRIDE (array,n)) + packed = false; + + num_elems *= extent[n]; + } + + ssize = num_elems * type_size; + buffer = get_collsub_buf (ci, ssize * local->num_images); + this_shared_ptr = buffer + this_image.image_num * ssize; + + if (packed) + memcpy (this_shared_ptr, array->base_addr, ssize); + else + { + char *src = (char *) array->base_addr; + char *restrict dest = this_shared_ptr; + index_type stride0 = stride[0]; + + while (src) + { + /* Copy the data. */ + + memcpy (dest, src, type_size); + dest += type_size; + src += stride0; + count[0] ++; + /* Advance to the next source element. */ + for (index_type n = 0; count[n] == extent[n] ; ) + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + count[n] = 0; + src -= stride[n] * extent[n]; + n++; + if (n == dim) + { + src = NULL; + break; + } + else + { + count[n]++; + src += stride[n]; + } + } + } + } + + collsub_sync (ci); + + /* Reduce the array to image zero. Here the general scheme: + + abababababab + a_b_a_b_a_b_ + a___b___a___ + a_______b___ + r___________ + */ + for (; ((this_image.image_num >> cbit) & 1) == 0 + && (local->num_images >> cbit) != 0; cbit++) + { + imoffset = 1 << cbit; + if (this_image.image_num + imoffset < local->num_images) + { + char *other_shared_ptr; /* Points to the shared memory + allocated to another image. */ + GFC_UINTEGER_4 *a; + GFC_UINTEGER_4 *b; + + other_shared_ptr = this_shared_ptr + imoffset * ssize; + for (index_type i = 0; i < num_elems; i++) + { + a = (GFC_UINTEGER_4 *) (this_shared_ptr + i * type_size); + b = (GFC_UINTEGER_4 *) (other_shared_ptr + i * type_size); + if (memcmp4 (b, a, char_len) < 0) + memcpy (a, b, type_size); + } + } + collsub_sync (ci); + } + for ( ; (local->num_images >> cbit) != 0; cbit++) + collsub_sync (ci); + + if (!result_image || (*result_image - 1) == this_image.image_num) + { + if (packed) + memcpy (array->base_addr, buffer, ssize); + else + { + char *src = buffer; + char *restrict dest = (char *) array->base_addr; + index_type stride0 = stride[0]; + + for (index_type n = 0; n < dim; n++) + count[n] = 0; + + while (dest) + { + memcpy (dest, src, type_size); + src += span; + dest += stride0; + count[0] ++; + for (index_type n = 0; count[n] == extent[n] ;) + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + count[n] = 0; + dest -= stride[n] * extent[n]; + n++; + if (n == dim) + { + dest = NULL; + break; + } + else + { + count[n]++; + dest += stride[n]; + } + } + } + } + } + finish_collective_subroutine (ci); +} + +#endif + diff --git a/libgfortran/libgfortran.h b/libgfortran/libgfortran.h index 8c539e0..a6b0d5a 100644 --- a/libgfortran/libgfortran.h +++ b/libgfortran/libgfortran.h @@ -403,6 +403,7 @@ struct {\ } typedef GFC_FULL_ARRAY_DESCRIPTOR (GFC_MAX_DIMENSIONS, GFC_INTEGER_4) gfc_full_array_i4; +typedef GFC_FULL_ARRAY_DESCRIPTOR (GFC_MAX_DIMENSIONS, char) gfc_full_array_char; #define GFC_DESCRIPTOR_RANK(desc) ((desc)->dtype.rank) #define GFC_DESCRIPTOR_TYPE(desc) ((desc)->dtype.type) diff --git a/libgfortran/m4/nca-minmax-s.m4 b/libgfortran/m4/nca-minmax-s.m4 new file mode 100644 index 0000000..2d8891f --- /dev/null +++ b/libgfortran/m4/nca-minmax-s.m4 @@ -0,0 +1,289 @@ +dnl Support macro file for intrinsic functions. +dnl Contains the generic sections of gfortran functions. +dnl This file is part of the GNU Fortran Runtime Library (libgfortran) +dnl Distributed under the GNU GPL with exception. See COPYING for details. +dnl +`/* Implementation of collective subroutines minmax. + Copyright (C) 2020 Free Software Foundation, Inc. + Contributed by Thomas Koenig <tkoenig@gcc.gnu.org>. + +This file is part of the GNU Fortran runtime library (libgfortran). + +Libgfortran is free software; you can redistribute it and/or +modify it under the terms of the GNU General Public +License as published by the Free Software Foundation; either +version 3 of the License, or (at your option) any later version. + +Libgfortran is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +<http://www.gnu.org/licenses/>. */' + +include(iparm.m4)dnl +define(`compare_fcn',`ifelse(rtype_kind,1,memcmp,memcmp4)')dnl +define(SCALAR_FUNCTION,`void nca_collsub_'$1`_scalar_'rtype_code` ('rtype_name` *obj, int *result_image, + int *stat, char *errmsg, index_type char_len, index_type errmsg_len); +export_proto(nca_collsub_'$1`_scalar_'rtype_code`); + +void +nca_collsub_'$1`_scalar_'rtype_code` ('rtype_name` *obj, int *result_image, + int *stat __attribute__ ((unused)), + char *errmsg __attribute__ ((unused)), + index_type char_len, + index_type errmsg_len __attribute__ ((unused))) +{ + int cbit = 0; + int imoffset; + 'rtype_name` *a, *b; + 'rtype_name` *buffer, *this_image_buf; + collsub_iface *ci; + index_type type_size; + + ci = &local->ci; + + type_size = char_len * sizeof ('rtype_name`); + buffer = get_collsub_buf (ci, type_size * local->num_images); + this_image_buf = buffer + this_image.image_num * char_len; + memcpy (this_image_buf, obj, type_size); + + collsub_sync (ci); + for (; ((this_image.image_num >> cbit) & 1) == 0 + && (local->num_images >> cbit) != 0; cbit++) + { + imoffset = 1 << cbit; + if (this_image.image_num + imoffset < local->num_images) + { + a = this_image_buf; + b = this_image_buf + imoffset * char_len; + if ('compare_fcn` (b, a, char_len) '$2` 0) + memcpy (a, b, type_size); + } + collsub_sync (ci); + } + for ( ; (local->num_images >> cbit) != 0; cbit++) + collsub_sync (ci); + + if (!result_image || (*result_image - 1) == this_image.image_num) + memcpy (obj, buffer, type_size); + + finish_collective_subroutine (ci); + +} + +')dnl +define(ARRAY_FUNCTION,dnl +`void nca_collsub_'$1`_array_'rtype_code` ('rtype` * restrict array, int *result_image, + int *stat, char *errmsg, index_type char_len, + index_type errmsg_len); +export_proto (nca_collsub_'$1`_array_'rtype_code`); + +void +nca_collsub_'$1`_array_'rtype_code` ('rtype` * restrict array, int *result_image, + int *stat __attribute__ ((unused)), + char *errmsg __attribute__ ((unused)), + index_type char_len, + index_type errmsg_len __attribute__ ((unused))) +{ + index_type count[GFC_MAX_DIMENSIONS]; + index_type stride[GFC_MAX_DIMENSIONS]; /* stride is byte-based here. */ + index_type extent[GFC_MAX_DIMENSIONS]; + char *this_shared_ptr; /* Points to the shared memory allocated to this image. */ + char *buffer; + index_type dim; + bool packed; + index_type span; + index_type ssize, num_elems; + int cbit = 0; + int imoffset; + index_type type_size; + collsub_iface *ci; + + ci = &local->ci; + + type_size = char_len * sizeof ('rtype_name`); + dim = GFC_DESCRIPTOR_RANK (array); + num_elems = 1; + ssize = type_size; + packed = true; + span = array->span != 0 ? array->span : type_size; + for (index_type n = 0; n < dim; n++) + { + count[n] = 0; + stride[n] = GFC_DESCRIPTOR_STRIDE (array, n) * span; + extent[n] = GFC_DESCRIPTOR_EXTENT (array, n); + + /* No-op for an empty array. */ + if (extent[n] <= 0) + return; + + if (num_elems != GFC_DESCRIPTOR_STRIDE (array,n)) + packed = false; + + num_elems *= extent[n]; + } + + ssize = num_elems * type_size; + buffer = get_collsub_buf (ci, ssize * local->num_images); + this_shared_ptr = buffer + this_image.image_num * ssize; + + if (packed) + memcpy (this_shared_ptr, array->base_addr, ssize); + else + { + char *src = (char *) array->base_addr; + char *restrict dest = this_shared_ptr; + index_type stride0 = stride[0]; + + while (src) + { + /* Copy the data. */ + + memcpy (dest, src, type_size); + dest += type_size; + src += stride0; + count[0] ++; + /* Advance to the next source element. */ + for (index_type n = 0; count[n] == extent[n] ; ) + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + count[n] = 0; + src -= stride[n] * extent[n]; + n++; + if (n == dim) + { + src = NULL; + break; + } + else + { + count[n]++; + src += stride[n]; + } + } + } + } + + collsub_sync (ci); + + /* Reduce the array to image zero. Here the general scheme: + + abababababab + a_b_a_b_a_b_ + a___b___a___ + a_______b___ + r___________ + */ + for (; ((this_image.image_num >> cbit) & 1) == 0 + && (local->num_images >> cbit) != 0; cbit++) + { + imoffset = 1 << cbit; + if (this_image.image_num + imoffset < local->num_images) + { + char *other_shared_ptr; /* Points to the shared memory + allocated to another image. */ + 'rtype_name` *a; + 'rtype_name` *b; + + other_shared_ptr = this_shared_ptr + imoffset * ssize; + for (index_type i = 0; i < num_elems; i++) + { + a = ('rtype_name` *) (this_shared_ptr + i * type_size); + b = ('rtype_name` *) (other_shared_ptr + i * type_size); + if ('compare_fcn` (b, a, char_len) '$2` 0) + memcpy (a, b, type_size); + } + } + collsub_sync (ci); + } + for ( ; (local->num_images >> cbit) != 0; cbit++) + collsub_sync (ci); + + if (!result_image || (*result_image - 1) == this_image.image_num) + { + if (packed) + memcpy (array->base_addr, buffer, ssize); + else + { + char *src = buffer; + char *restrict dest = (char *) array->base_addr; + index_type stride0 = stride[0]; + + for (index_type n = 0; n < dim; n++) + count[n] = 0; + + while (dest) + { + memcpy (dest, src, type_size); + src += span; + dest += stride0; + count[0] ++; + for (index_type n = 0; count[n] == extent[n] ;) + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + count[n] = 0; + dest -= stride[n] * extent[n]; + n++; + if (n == dim) + { + dest = NULL; + break; + } + else + { + count[n]++; + dest += stride[n]; + } + } + } + } + } + finish_collective_subroutine (ci); +} +') +` +#include "libgfortran.h" + +#if defined (HAVE_'rtype_name`) +#include <string.h> +#include "../nca/libcoarraynative.h" +#include "../nca/collective_subroutine.h" +#include "../nca/collective_inline.h" + +#if 'rtype_kind` == 4 + +/* Compare wide character types, which are handled internally as + unsigned 4-byte integers. */ +static inline int +memcmp4 (const void *a, const void *b, size_t len) +{ + const GFC_UINTEGER_4 *pa = a; + const GFC_UINTEGER_4 *pb = b; + while (len-- > 0) + { + if (*pa != *pb) + return *pa < *pb ? -1 : 1; + pa ++; + pb ++; + } + return 0; +} + +#endif +'SCALAR_FUNCTION(`max',`>')dnl +SCALAR_FUNCTION(`min',`<')dnl +ARRAY_FUNCTION(`max',`>')dnl +ARRAY_FUNCTION(`min',`<')dnl +` +#endif +' diff --git a/libgfortran/m4/nca_minmax.m4 b/libgfortran/m4/nca_minmax.m4 new file mode 100644 index 0000000..76070c1 --- /dev/null +++ b/libgfortran/m4/nca_minmax.m4 @@ -0,0 +1,259 @@ +dnl Support macro file for intrinsic functions. +dnl Contains the generic sections of gfortran functions. +dnl This file is part of the GNU Fortran Runtime Library (libgfortran) +dnl Distributed under the GNU GPL with exception. See COPYING for details. +dnl +`/* Implementation of collective subroutines minmax. + Copyright (C) 2020 Free Software Foundation, Inc. + Contributed by Thomas Koenig <tkoenig@gcc.gnu.org>. + +This file is part of the GNU Fortran runtime library (libgfortran). + +Libgfortran is free software; you can redistribute it and/or +modify it under the terms of the GNU General Public +License as published by the Free Software Foundation; either +version 3 of the License, or (at your option) any later version. + +Libgfortran is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +<http://www.gnu.org/licenses/>. */' + +include(iparm.m4)dnl +define(SCALAR_FUNCTION,`void nca_collsub_'$1`_scalar_'rtype_code` ('rtype_name` *obj, int *result_image, + int *stat, char *errmsg, index_type errmsg_len); +export_proto(nca_collsub_'$1`_scalar_'rtype_code`); + +void +nca_collsub_'$1`_scalar_'rtype_code` ('rtype_name` *obj, int *result_image, + int *stat __attribute__ ((unused)), + char *errmsg __attribute__ ((unused)), + index_type errmsg_len __attribute__ ((unused))) +{ + int cbit = 0; + int imoffset; + 'rtype_name` *a, *b; + 'rtype_name` *buffer, *this_image_buf; + collsub_iface *ci; + + ci = &local->ci; + + buffer = get_collsub_buf (ci, sizeof('rtype_name`) * local->num_images); + this_image_buf = buffer + this_image.image_num; + *this_image_buf = *obj; + + collsub_sync (ci); + for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++) + { + imoffset = 1 << cbit; + if (this_image.image_num + imoffset < local->num_images) + { + a = this_image_buf; + b = this_image_buf + imoffset; + '$2` + } + collsub_sync (ci); + } + for ( ; (local->num_images >> cbit) != 0; cbit++) + collsub_sync (ci); + + if (!result_image || (*result_image - 1) == this_image.image_num) + *obj = *buffer; + + finish_collective_subroutine (ci); + +} + +')dnl +define(ARRAY_FUNCTION,dnl +`void nca_collsub_'$1`_array_'rtype_code` ('rtype` * restrict array, int *result_image, + int *stat, char *errmsg, index_type errmsg_len); +export_proto (nca_collsub_'$1`_array_'rtype_code`); + +void +nca_collsub_'$1`_array_'rtype_code` ('rtype` * restrict array, int *result_image, + int *stat __attribute__ ((unused)), + char *errmsg __attribute__ ((unused)), + index_type errmsg_len __attribute__ ((unused))) +{ + index_type count[GFC_MAX_DIMENSIONS]; + index_type stride[GFC_MAX_DIMENSIONS]; + index_type extent[GFC_MAX_DIMENSIONS]; + 'rtype_name` *this_shared_ptr; /* Points to the shared memory allocated to this image. */ + 'rtype_name` *buffer; + index_type dim; + bool packed; + index_type span; + index_type ssize, num_elems; + int cbit = 0; + int imoffset; + collsub_iface *ci; + + ci = &local->ci; + + dim = GFC_DESCRIPTOR_RANK (array); + ssize = sizeof ('rtype_name`); + packed = true; + span = array->span != 0 ? array->span : (index_type) sizeof ('rtype_name`); + for (index_type n = 0; n < dim; n++) + { + count[n] = 0; + stride[n] = GFC_DESCRIPTOR_STRIDE (array, n) * span; + extent[n] = GFC_DESCRIPTOR_EXTENT (array, n); + + /* No-op for an empty array. */ + if (extent[n] <= 0) + return; + + if (ssize != stride[n]) + packed = false; + + ssize *= extent[n]; + } + + num_elems = ssize / sizeof ('rtype_name`); + + buffer = get_collsub_buf (ci, ssize * local->num_images); + this_shared_ptr = buffer + this_image.image_num * num_elems; + + if (packed) + memcpy (this_shared_ptr, array->base_addr, ssize); + else + { + char *src = (char *) array->base_addr; + 'rtype_name` *restrict dest = this_shared_ptr; + index_type stride0 = stride[0]; + + while (src) + { + /* Copy the data. */ + *(dest++) = *(('rtype_name` *) src); + src += stride0; + count[0] ++; + /* Advance to the next source element. */ + for (index_type n = 0; count[n] == extent[n] ; ) + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + count[n] = 0; + src -= stride[n] * extent[n]; + n++; + if (n == dim) + { + src = NULL; + break; + } + else + { + count[n]++; + src += stride[n]; + } + } + } + } + + collsub_sync (ci); + + /* Reduce the array to image zero. Here the general scheme: + + abababababab + a_b_a_b_a_b_ + a___b___a___ + a_______b___ + r___________ + */ + for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++) + { + imoffset = 1 << cbit; + if (this_image.image_num + imoffset < local->num_images) + { + 'rtype_name` * other_shared_ptr; /* Points to the shared memory + allocated to another image. */ + 'rtype_name` *a; + 'rtype_name` *b; + + other_shared_ptr = this_shared_ptr + num_elems * imoffset; + for (index_type i = 0; i < num_elems; i++) + { + a = this_shared_ptr + i; + b = other_shared_ptr + i; + '$2` + } + } + collsub_sync (ci); + } + for ( ; (local->num_images >> cbit) != 0; cbit++) + collsub_sync (ci); + + if (!result_image || (*result_image - 1) == this_image.image_num) + { + if (packed) + memcpy (array->base_addr, buffer, ssize); + else + { + 'rtype_name` *src = buffer; + char * restrict dest = (char *) array->base_addr; + index_type stride0 = stride[0]; + + for (index_type n = 0; n < dim; n++) + count[n] = 0; + + while (dest) + { + *(('rtype_name` * ) dest) = *src++; + dest += stride0; + count[0] ++; + for (index_type n = 0; count[n] == extent[n] ;) + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + count[n] = 0; + dest -= stride[n] * extent[n]; + n++; + if (n == dim) + { + dest = NULL; + break; + } + else + { + count[n]++; + dest += stride[n]; + } + } + } + } + } + finish_collective_subroutine (ci); +} +') +`#include "libgfortran.h" + +#if defined (HAVE_'rtype_name`)' +#include <string.h> +#include "../nca/libcoarraynative.h" +#include "../nca/collective_subroutine.h" +#include "../nca/collective_inline.h" + +SCALAR_FUNCTION(`max',`if (*b > *a) + *a = *b;')dnl +SCALAR_FUNCTION(`min',`if (*b < *a) + *a = *b;')dnl +SCALAR_FUNCTION(`sum',`*a += *b;')dnl +ARRAY_FUNCTION(`max',`if (*b > *a) + *a = *b;')dnl +ARRAY_FUNCTION(`min',`if (*b < *a) + *a = *b;')dnl +ARRAY_FUNCTION(`sum',`*a += *b;')dnl +` +#endif +' diff --git a/libgfortran/nca/.tags b/libgfortran/nca/.tags new file mode 100644 index 0000000..07d260d --- /dev/null +++ b/libgfortran/nca/.tags @@ -0,0 +1,275 @@ +!_TAG_FILE_FORMAT 2 /extended format; --format=1 will not append ;" to lines/ +!_TAG_FILE_SORTED 1 /0=unsorted, 1=sorted, 2=foldcase/ +!_TAG_PROGRAM_AUTHOR Darren Hiebert /dhiebert@users.sourceforge.net/ +!_TAG_PROGRAM_NAME Exuberant Ctags // +!_TAG_PROGRAM_URL http://ctags.sourceforge.net /official site/ +!_TAG_PROGRAM_VERSION 5.9~svn20110310 // +ALIGN_TO serialize.c 47;" d file: +ALLOC_H alloc.h 26;" d +COARRAY_LOCK_HDR lock.h 27;" d +COARRAY_NATIVE_HDR libcoarraynative.h 30;" d +COLLECTIVE_SUBROUTINE_HDR collective_subroutine.h 3;" d +CRITICAL_LOOKAHEAD hashmap.c 30;" d file: +DEBUG_PRINTF libcoarraynative.h 45;" d +DEBUG_PRINTF libcoarraynative.h 47;" d +FILL_VALUE serialize.c 57;" d file: +GFC_NCA_EVENT_COARRAY wrapper.c /^ GFC_NCA_EVENT_COARRAY,$/;" e enum:gfc_coarray_allocation_type file: +GFC_NCA_LOCK_COARRAY wrapper.c /^ GFC_NCA_LOCK_COARRAY,$/;" e enum:gfc_coarray_allocation_type file: +GFC_NCA_NORMAL_COARRAY wrapper.c /^ GFC_NCA_NORMAL_COARRAY = 3,$/;" e enum:gfc_coarray_allocation_type file: +GFORTRAN_ENV_NUM_IMAGES coarraynative.c 37;" d file: +HASHMAP_H hashmap.h 69;" d +IMAGE_FAILED libcoarraynative.h /^ IMAGE_FAILED$/;" e enum:__anon7 +IMAGE_FAILED master.c /^ IMAGE_FAILED$/;" e enum:__anon5 file: +IMAGE_OK libcoarraynative.h /^ IMAGE_OK,$/;" e enum:__anon7 +IMAGE_OK master.c /^ IMAGE_OK,$/;" e enum:__anon5 file: +IMAGE_UNKNOWN libcoarraynative.h /^ IMAGE_UNKNOWN = 0,$/;" e enum:__anon7 +IMAGE_UNKNOWN master.c /^ IMAGE_UNKNOWN = 0,$/;" e enum:__anon5 file: +INDENT hashmap.c 403;" d file: +INDENT hashmap.c 414;" d file: +INDENT hashmap.c 415;" d file: +INDENT hashmap.c 429;" d file: +INDENT hashmap.c 430;" d file: +INITIAL_BITNUM hashmap.c 28;" d file: +INITIAL_SIZE hashmap.c 29;" d file: +IPSYNC_HDR sync.h 26;" d +ITER malloc_test.c 13;" d file: +MAX_ALIGN allocator.c 50;" d file: +MAX_NUM malloc_test.c 10;" d file: +MEMOBJ_NAME util.c 11;" d file: +MIN_NUM malloc_test.c 11;" d file: +NUM_BITS malloc_test.c 8;" d file: +NUM_SIZES malloc_test.c 9;" d file: +PE hashmap.c 402;" d file: +PTR_BITS util.h 32;" d +SHARED_ALLOCATOR_HDR allocator.h 2;" d +SHARED_MEMORY_H shared_memory.h 77;" d +SHARED_MEMORY_RAW_ALLOC shared_memory.h 50;" d +SHARED_MEMORY_RAW_ALLOC_PTR shared_memory.h 53;" d +SHMPTR_AS shared_memory.h 46;" d +SHMPTR_DEREF shared_memory.h 44;" d +SHMPTR_EQUALS shared_memory.h 48;" d +SHMPTR_IS_NULL shared_memory.h 42;" d +SHMPTR_NULL shared_memory.h 41;" d +SHMPTR_SET shared_memory.h 47;" d +SZ malloc_test.c 12;" d file: +UTIL_HDR util.h 26;" d +a hashmap.h /^ allocator *a;$/;" m struct:hashmap +a sync.h /^ allocator *a;$/;" m struct:__anon3 +ai libcoarraynative.h /^ alloc_iface ai;$/;" m struct:__anon11 +alignto util.c /^alignto(size_t size, size_t align) {$/;" f +alloc alloc.h /^ allocator alloc;$/;" m struct:alloc_iface +alloc_iface alloc.h /^typedef struct alloc_iface$/;" s +alloc_iface alloc.h /^} alloc_iface;$/;" t typeref:struct:alloc_iface +alloc_iface_init alloc.c /^alloc_iface_init (alloc_iface *iface, shared_memory *mem)$/;" f +alloc_iface_init alloc.h /^internal_proto (alloc_iface_init);$/;" v +alloc_iface_shared alloc.h /^typedef struct alloc_iface_shared$/;" s +alloc_iface_shared alloc.h /^} alloc_iface_shared;$/;" t typeref:struct:alloc_iface_shared +allocator allocator.h /^} allocator;$/;" t typeref:struct:__anon17 +allocator_init allocator.c /^allocator_init (allocator *a, allocator_shared *s, shared_memory *sm)$/;" f +allocator_s alloc.h /^ allocator_shared allocator_s;$/;" m struct:alloc_iface_shared +allocator_shared allocator.h /^} allocator_shared;$/;" t typeref:struct:__anon16 +allocs shared_memory.c /^ } allocs[];$/;" m struct:shared_memory_act typeref:struct:shared_memory_act::local_alloc file: +arr lock.h /^ pthread_mutex_t arr[];$/;" m struct:__anon12 +as alloc.h /^ alloc_iface_shared *as;$/;" m struct:alloc_iface +barrier collective_subroutine.h /^ pthread_barrier_t barrier;$/;" m struct:collsub_iface_shared +barrier libcoarraynative.h /^ pthread_barrier_t barrier;$/;" m struct:__anon6 +base shared_memory.c /^ void *base;$/;" m struct:shared_memory_act::local_alloc file: +bitnum hashmap.h /^ int bitnum;$/;" m struct:__anon14 +bucket allocator.c /^} bucket;$/;" t typeref:struct:__anon1 file: +ci libcoarraynative.h /^ collsub_iface ci;$/;" m struct:__anon11 +cis sync.h /^ sync_iface_shared *cis;$/;" m struct:__anon3 +collsub_broadcast collective_subroutine.h /^internal_proto (collsub_broadcast);$/;" v +collsub_buf collective_subroutine.c /^void *collsub_buf = NULL;$/;" v +collsub_buf collective_subroutine.h /^ shared_mem_ptr collsub_buf;$/;" m struct:collsub_iface_shared +collsub_buf collective_subroutine.h /^ void *collsub_buf; \/* Cached pointer to shared collsub_buf. *\/$/;" m struct:collsub_iface +collsub_buf collective_subroutine.h /^internal_proto (collsub_buf);$/;" v +collsub_iface collective_subroutine.h /^typedef struct collsub_iface$/;" s +collsub_iface collective_subroutine.h /^} collsub_iface;$/;" t typeref:struct:collsub_iface +collsub_iface_shared collective_subroutine.h /^typedef struct collsub_iface_shared $/;" s +collsub_iface_shared collective_subroutine.h /^} collsub_iface_shared;$/;" t typeref:struct:collsub_iface_shared +collsub_reduce collective_subroutine.c /^collsub_reduce (void *obj, size_t nobjs, int *result_image, size_t size, $/;" f +collsub_reduce collective_subroutine.h /^internal_proto (collsub_reduce);$/;" v +collsub_sync collective_subroutine.c /^collsub_sync (void) {$/;" f +collsub_sync collective_subroutine.h /^internal_proto (collsub_sync);$/;" v +copy_from collective_inline.h /^copy_from (int image) $/;" f +copy_in collective_inline.h /^copy_in (void *obj) {$/;" f +copy_out collective_inline.h /^copy_out (void *obj, int image)$/;" f +copy_to collective_inline.h /^copy_to (void *obj, int image)$/;" f +curr_size collective_subroutine.c /^size_t curr_size = 0;$/;" v +curr_size collective_subroutine.h /^ size_t curr_size;$/;" m struct:collsub_iface_shared +curr_size collective_subroutine.h /^internal_proto (curr_size);$/;" v +data hashmap.h /^ shared_mem_ptr data;$/;" m struct:__anon14 +deserialize_memory serialize.c /^export_proto (deserialize_memory);$/;" v +div_ru wrapper.c /^div_ru (int divident, int divisor)$/;" f file: +dump_hm hashmap.c /^dump_hm(hashmap *hm) {$/;" f +enlarge_hashmap_mem hashmap.c /^enlarge_hashmap_mem (hashmap *hm, hashmap_entry **data, bool f)$/;" f file: +ensure_initialization coarraynative.c /^ensure_initialization(void) {$/;" f +fd shared_memory.c /^ int fd;$/;" m struct:__anon18 file: +finish_collective_subroutine collective_inline.h /^finish_collective_subroutine (void) $/;" f +free_bucket_head allocator.h /^ shared_mem_ptr free_bucket_head[PTR_BITS];$/;" m struct:__anon16 +free_memory_with_id alloc.c /^free_memory_with_id (alloc_iface* iface, memid id)$/;" f +free_memory_with_id alloc.h /^internal_proto (free_memory_with_id);$/;" v +gen_mask hashmap.c /^gen_mask (hashmap *hm)$/;" f file: +get_allocator alloc.c /^get_allocator (alloc_iface * iface)$/;" f +get_allocator alloc.h /^internal_proto (get_allocator);$/;" v +get_data hashmap.c /^get_data(hashmap *hm)$/;" f file: +get_environ_image_num coarraynative.c /^get_environ_image_num (void)$/;" f file: +get_expected_offset hashmap.c /^get_expected_offset (hashmap *hm, memid id)$/;" f file: +get_locked_table sync.c /^get_locked_table(sync_iface *si) { \/\/ The initialization of the table has to $/;" f file: +get_master coarraynative.c /^get_master (void) {$/;" f file: +get_memory_by_id alloc.c /^get_memory_by_id (alloc_iface *iface, size_t size, memid id)$/;" f +get_memory_by_id alloc.h /^internal_proto (get_memory_by_id);$/;" v +get_obj_ptr collective_inline.h /^get_obj_ptr (int image) $/;" f +get_shared_memory_act_size shared_memory.c /^get_shared_memory_act_size (int nallocs)$/;" f file: +get_shmem_fd util.c /^get_shmem_fd (void)$/;" f +gfc_coarray_allocation_type wrapper.c /^enum gfc_coarray_allocation_type {$/;" g file: +global_shared_memory_meta shared_memory.c /^} global_shared_memory_meta;$/;" t typeref:struct:__anon18 file: +has_failed_image libcoarraynative.h /^ int has_failed_image;$/;" m struct:__anon9 +has_failed_image master.c /^ int has_failed_image = 0;$/;" m struct:__anon4 file: +hash hashmap.c /^hash (uint64_t key)$/;" f file: +hashmap hashmap.h /^typedef struct hashmap$/;" s +hashmap hashmap.h /^} hashmap;$/;" t typeref:struct:hashmap +hashmap_change_refcnt hashmap.c /^hashmap_change_refcnt (hashmap *hm, memid id, hashmap_search_result *res,$/;" f file: +hashmap_dec hashmap.c /^hashmap_dec (hashmap *hm, memid id, hashmap_search_result * res)$/;" f +hashmap_entry hashmap.c /^} hashmap_entry;$/;" t typeref:struct:__anon13 file: +hashmap_get hashmap.c /^hashmap_get (hashmap *hm, memid id)$/;" f +hashmap_inc hashmap.c /^hashmap_inc (hashmap *hm, memid id, hashmap_search_result * res)$/;" f +hashmap_init hashmap.c /^hashmap_init (hashmap *hm, hashmap_shared *hs, allocator *a,$/;" f +hashmap_search_result hashmap.h /^} hashmap_search_result;$/;" t typeref:struct:__anon15 +hashmap_set hashmap.c /^hashmap_set (hashmap *hm, memid id, hashmap_search_result *hsr,$/;" f +hashmap_shared hashmap.h /^} hashmap_shared;$/;" t typeref:struct:__anon14 +header shared_memory.c /^ void *header;$/;" m struct:shared_memory_act file: +hm alloc.h /^ hashmap hm;$/;" m struct:alloc_iface +hm_search_result_contains hashmap.c /^hm_search_result_contains (hashmap_search_result *res)$/;" f +hm_search_result_ptr hashmap.c /^hm_search_result_ptr (hashmap_search_result *res)$/;" f +hm_search_result_size hashmap.c /^hm_search_result_size (hashmap_search_result *res)$/;" f +hmiadd hashmap.c /^hmiadd (hashmap *hm, size_t s, ssize_t o) {$/;" f file: +hms alloc.h /^ hashmap_shared hms;$/;" m struct:alloc_iface_shared +id hashmap.c /^ memid id;$/;" m struct:__anon13 file: +image libcoarraynative.h /^} image;$/;" t typeref:struct:__anon10 +image_main_wrapper master.c /^image_main_wrapper (void (*image_main) (void), int this_image_num)$/;" f file: +image_num libcoarraynative.h /^ int image_num;$/;" m struct:__anon10 +image_status libcoarraynative.h /^} image_status;$/;" t typeref:enum:__anon7 +image_tracker libcoarraynative.h /^} image_tracker;$/;" t typeref:struct:__anon8 +images libcoarraynative.h /^ image_tracker images[];$/;" m struct:__anon9 +images master.c /^ struct image_status * images;$/;" m struct:__anon4 typeref:struct:__anon4::image_status file: +init_collsub collective_subroutine.c /^init_collsub (void) {$/;" f +init_collsub collective_subroutine.h /^internal_proto (init_collsub);$/;" v +initialize_shared_mutex util.c /^initialize_shared_mutex (pthread_mutex_t *mutex)$/;" f +initialized lock.h /^ int initialized;$/;" m struct:__anon12 +ipcollsub libcoarraynative.h /^} ipcollsub;$/;" t typeref:struct:__anon6 +last_base shared_memory.c /^last_base (shared_memory_act *mem)$/;" f file: +last_seen_size shared_memory.c /^ size_t last_seen_size;$/;" m struct:shared_memory_act file: +local coarraynative.c /^nca_local_data *local = NULL;$/;" v +local_alloc shared_memory.c /^ struct local_alloc {$/;" s struct:shared_memory_act file: +lock alloc.h /^ pthread_mutex_t lock;$/;" m struct:alloc_iface_shared +lock_array lock.h /^} lock_array;$/;" t typeref:struct:__anon12 +lock_table sync.c /^lock_table (sync_iface *si)$/;" f file: +m libcoarraynative.h /^ master *m;$/;" m struct:__anon10 +main malloc_test.c /^int main()$/;" f +map_memory shared_memory.c /^map_memory (int fd, size_t size, off_t offset)$/;" f file: +master libcoarraynative.h /^} master;$/;" t typeref:struct:__anon9 +master master.c /^} master;$/;" t typeref:struct:__anon4 file: +max_lookahead hashmap.c /^ int max_lookahead; $/;" m struct:__anon13 file: +maximg libcoarraynative.h /^ int maximg;$/;" m struct:__anon6 +mem alloc.h /^ shared_memory *mem;$/;" m struct:alloc_iface +memid hashmap.h /^typedef intptr_t memid;$/;" t +meta shared_memory.c /^ global_shared_memory_meta *meta;$/;" m struct:shared_memory_act file: +n_ent hashmap.c /^static ssize_t n_ent;$/;" v file: +nca_co_broadcast collective_subroutine.c /^export_proto (nca_co_broadcast);$/;" v +nca_co_broadcast collective_subroutine.c /^nca_co_broadcast (gfc_array_char * restrict a, int source_image,$/;" f +nca_coarray_alloc wrapper.c /^export_proto (nca_coarray_alloc);$/;" v +nca_coarray_alloc wrapper.c /^nca_coarray_alloc (gfc_array_void *desc, int elem_size, int corank,$/;" f +nca_coarray_free wrapper.c /^export_proto (nca_coarray_free);$/;" v +nca_coarray_free wrapper.c /^nca_coarray_free (gfc_array_void *desc, int alloc_type)$/;" f +nca_coarray_num_images wrapper.c /^export_proto (nca_coarray_num_images);$/;" v +nca_coarray_num_images wrapper.c /^nca_coarray_num_images (int distance __attribute__((unused)))$/;" f +nca_coarray_sync_all wrapper.c /^export_proto (nca_coarray_sync_all);$/;" v +nca_coarray_sync_all wrapper.c /^nca_coarray_sync_all (int *stat __attribute__((unused)))$/;" f +nca_coarray_this_image wrapper.c /^export_proto (nca_coarray_this_image);$/;" v +nca_coarray_this_image wrapper.c /^nca_coarray_this_image (int distance __attribute__((unused)))$/;" f +nca_collsub_reduce_array wrapper.c /^export_proto (nca_collsub_reduce_array);$/;" v +nca_collsub_reduce_array wrapper.c /^nca_collsub_reduce_array (gfc_array_void *desc, void (*assign_function) (void *, void *),$/;" f +nca_collsub_reduce_scalar wrapper.c /^export_proto (nca_collsub_reduce_scalar);$/;" v +nca_collsub_reduce_scalar wrapper.c /^nca_collsub_reduce_scalar (void *obj, index_type elem_size,$/;" f +nca_local_data libcoarraynative.h /^} nca_local_data;$/;" t typeref:struct:__anon11 +nca_lock wrapper.c /^export_proto (nca_lock);$/;" v +nca_lock wrapper.c /^nca_lock (void *lock)$/;" f +nca_master coarraynative.c /^nca_master (void (*image_main) (void)) {$/;" f +nca_master master.c /^nca_master (void (*image_main) (void)) {$/;" f +nca_master master.c /^nca_master (void (*image_main) (void))$/;" f +nca_sync_images wrapper.c /^export_proto (nca_sync_images);$/;" v +nca_sync_images wrapper.c /^nca_sync_images (size_t s, int *images,$/;" f +nca_unlock wrapper.c /^export_proto (nca_unlock);$/;" v +nca_unlock wrapper.c /^nca_unlock (void *lock)$/;" f +new_base_mapping shared_memory.c /^new_base_mapping (shared_memory_act *mem)$/;" f file: +next allocator.c /^ shared_mem_ptr next;$/;" m struct:__anon1 file: +next_power_of_two util.c /^next_power_of_two(size_t size) {$/;" f +num_entries hashmap.c /^num_entries (hashmap_entry *data, size_t size)$/;" f file: +num_images libcoarraynative.h /^ int num_images;$/;" m struct:__anon11 +num_local_allocs shared_memory.c /^ size_t num_local_allocs;$/;" m struct:shared_memory_act file: +offset shared_memory.h /^ ssize_t offset;$/;" m struct:shared_mem_ptr +owner lock.h /^ int owner;$/;" m struct:__anon12 +p hashmap.c /^ shared_mem_ptr p; \/* If p == SHMPTR_NULL, the entry is empty. *\/$/;" m struct:__anon13 file: +p hashmap.h /^ shared_mem_ptr p;$/;" m struct:__anon15 +pagesize util.c /^size_t pagesize = 1<<17;$/;" v +pid libcoarraynative.h /^ pid_t pid;$/;" m struct:__anon8 +prepare_collective_subroutine collective_subroutine.c /^prepare_collective_subroutine (size_t size)$/;" f +prepare_collective_subroutine collective_subroutine.h /^internal_proto (prepare_collective_subroutine);$/;" v +refcnt hashmap.c /^ int refcnt;$/;" m struct:__anon13 file: +res_offset hashmap.h /^ ssize_t res_offset;$/;" m struct:__anon15 +resize_hm hashmap.c /^resize_hm (hashmap *hm, hashmap_entry **data)$/;" f file: +round_to_pagesize util.c /^round_to_pagesize(size_t s) {$/;" f +s allocator.h /^ allocator_shared *s;$/;" m struct:__anon17 +s collective_subroutine.h /^ collsub_iface_shared *s;$/;" m struct:collsub_iface +s hashmap.c /^ size_t s;$/;" m struct:__anon13 file: +s hashmap.h /^ hashmap_shared *s;$/;" m struct:hashmap +scan_empty hashmap.c /^scan_empty (hashmap *hm, ssize_t expected_off, memid id)$/;" f file: +scan_inside_lookahead hashmap.c /^scan_inside_lookahead (hashmap *hm, ssize_t expected_off, memid id)$/;" f file: +serialize_memory serialize.c /^export_proto (serialize_memory);$/;" v +serialize_memory serialize.c /^serialize_memory (gfc_array_char * const restrict source, char *d)$/;" f +shared_free allocator.c /^shared_free (allocator *a, shared_mem_ptr p, size_t size) {$/;" f +shared_malloc allocator.c /^shared_malloc (allocator *a, size_t size)$/;" f +shared_mem_ptr shared_memory.h /^typedef struct shared_mem_ptr$/;" s +shared_mem_ptr shared_memory.h /^} shared_mem_ptr;$/;" t typeref:struct:shared_mem_ptr +shared_mem_ptr_to_void_ptr shared_memory.c /^shared_mem_ptr_to_void_ptr(shared_memory_act **pmem, shared_mem_ptr smp)$/;" f +shared_mem_ptr_to_void_ptr shared_memory.h /^internal_proto (shared_mem_ptr_to_void_ptr);$/;" v +shared_memory shared_memory.h /^typedef struct shared_memory_act * shared_memory;$/;" t typeref:struct:shared_memory_act +shared_memory_act shared_memory.c /^typedef struct shared_memory_act$/;" s file: +shared_memory_act shared_memory.c /^} shared_memory_act;$/;" t typeref:struct:shared_memory_act file: +shared_memory_get_mem_with_alignment shared_memory.c /^shared_memory_get_mem_with_alignment (shared_memory_act **pmem, size_t size,$/;" f +shared_memory_get_mem_with_alignment shared_memory.h /^internal_proto (shared_memory_get_mem_with_alignment);$/;" v +shared_memory_init shared_memory.c /^shared_memory_init (shared_memory_act **pmem)$/;" f +shared_memory_init shared_memory.h /^internal_proto (shared_memory_init);$/;" v +shared_memory_prepare shared_memory.c /^shared_memory_prepare (shared_memory_act **pmem)$/;" f +shared_memory_prepare shared_memory.h /^internal_proto (shared_memory_prepare);$/;" v +shm allocator.h /^ shared_memory *shm;$/;" m struct:__anon17 +si libcoarraynative.h /^ sync_iface si;$/;" m struct:__anon11 +size hashmap.h /^ size_t size;$/;" m struct:__anon14 +size hashmap.h /^ size_t size;$/;" m struct:__anon15 +size shared_memory.c /^ size_t size;$/;" m struct:shared_memory_act::local_alloc file: +size shared_memory.c /^ size_t size;$/;" m struct:__anon18 file: +sm collective_subroutine.h /^ shared_memory *sm;$/;" m struct:collsub_iface +sm hashmap.h /^ shared_memory *sm;$/;" m struct:hashmap +sm libcoarraynative.h /^ shared_memory sm;$/;" m struct:__anon11 +sm sync.h /^ shared_memory *sm;$/;" m struct:__anon3 +status libcoarraynative.h /^ image_status status;$/;" m struct:__anon8 +sync_all sync.c /^sync_all (sync_iface *si)$/;" f +sync_all sync.h /^ pthread_barrier_t sync_all;$/;" m struct:__anon2 +sync_all sync.h /^internal_proto (sync_all);$/;" v +sync_all_init sync.c /^sync_all_init (pthread_barrier_t *b)$/;" f file: +sync_iface sync.h /^} sync_iface;$/;" t typeref:struct:__anon3 +sync_iface_init sync.c /^sync_iface_init (sync_iface *si, alloc_iface *ai, shared_memory *sm)$/;" f +sync_iface_init sync.h /^internal_proto (sync_iface_init);$/;" v +sync_iface_shared sync.h /^} sync_iface_shared;$/;" t typeref:struct:__anon2 +sync_table sync.c /^sync_table (sync_iface *si, int *images, size_t size)$/;" f +sync_table sync.h /^internal_proto (sync_table);$/;" v +table sync.h /^ int *table; \/\/ we can cache the table and the trigger pointers here$/;" m struct:__anon3 +table sync.h /^ shared_mem_ptr table;$/;" m struct:__anon2 +table_lock sync.h /^ pthread_mutex_t table_lock;$/;" m struct:__anon2 +this_image coarraynative.c /^image this_image;$/;" v +triggers sync.h /^ pthread_cond_t *triggers;$/;" m struct:__anon3 +triggers sync.h /^ shared_mem_ptr triggers;$/;" m struct:__anon2 +unlock_table sync.c /^unlock_table (sync_iface *si)$/;" f file: +used shared_memory.c /^ size_t used;$/;" m struct:__anon18 file: +wait_table_cond sync.c /^wait_table_cond (sync_iface *si, pthread_cond_t *cond)$/;" f file: diff --git a/libgfortran/nca/alloc.c b/libgfortran/nca/alloc.c new file mode 100644 index 0000000..174fe33 --- /dev/null +++ b/libgfortran/nca/alloc.c @@ -0,0 +1,152 @@ +/* Copyright (C) 2020 Free Software Foundation, Inc. + Contributed by Thomas Koenig + +This file is part of the GNU Fortran Native Coarray Library (libnca). + +Libnca is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +Libnca is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +<http://www.gnu.org/licenses/>. */ + +/* This provides the coarray-specific features (like IDs etc) for + allocator.c, in turn calling routines from shared_memory.c. +*/ + +#include "libgfortran.h" +#include "shared_memory.h" +#include "allocator.h" +#include "hashmap.h" +#include "alloc.h" + +#include <string.h> + +/* Return a local pointer into a shared memory object identified by + id. If the object is already found, it has been allocated before, + so just increase the reference counter. + + The pointers returned by this function remain valid even if the + size of the memory allocation changes (see shared_memory.c). */ + +static void * +get_memory_by_id_internal (alloc_iface *iface, size_t size, memid id, + bool zero_mem) +{ + hashmap_search_result res; + shared_mem_ptr shared_ptr; + void *ret; + + pthread_mutex_lock (&iface->as->lock); + shared_memory_prepare(iface->mem); + + res = hashmap_get (&iface->hm, id); + + if (hm_search_result_contains (&res)) + { + size_t found_size; + found_size = hm_search_result_size (&res); + if (found_size != size) + { + dprintf (2, "Size mismatch for coarray allocation id %p: " + "found = %lu != size = %lu\n", (void *) id, found_size, size); + pthread_mutex_unlock (&iface->as->lock); + exit (1); + } + shared_ptr = hm_search_result_ptr (&res); + hashmap_inc (&iface->hm, id, &res); + } + else + { + shared_ptr = shared_malloc (&iface->alloc, size); + hashmap_set (&iface->hm, id, NULL, shared_ptr, size); + } + + ret = SHMPTR_AS (void *, shared_ptr, iface->mem); + if (zero_mem) + memset (ret, '\0', size); + + pthread_mutex_unlock (&iface->as->lock); + return ret; +} + +void * +get_memory_by_id (alloc_iface *iface, size_t size, memid id) +{ + return get_memory_by_id_internal (iface, size, id, 0); +} + +void * +get_memory_by_id_zero (alloc_iface *iface, size_t size, memid id) +{ + return get_memory_by_id_internal (iface, size, id, 1); +} + +/* Free memory with id. Free it if this is the last image which + holds that memory segment, decrease the reference count otherwise. */ + +void +free_memory_with_id (alloc_iface* iface, memid id) +{ + hashmap_search_result res; + int entries_left; + + pthread_mutex_lock (&iface->as->lock); + shared_memory_prepare(iface->mem); + + res = hashmap_get (&iface->hm, id); + if (!hm_search_result_contains (&res)) + { + pthread_mutex_unlock (&iface->as->lock); + char buffer[100]; + snprintf (buffer, sizeof(buffer), "Error in free_memory_with_id: " + "%p not found", (void *) id); + dprintf (2, buffer); + // internal_error (NULL, buffer); + exit (1); + } + + entries_left = hashmap_dec (&iface->hm, id, &res); + assert (entries_left >=0); + + if (entries_left == 0) + { + shared_free (&iface->alloc, hm_search_result_ptr (&res), + hm_search_result_size (&res)); + } + + pthread_mutex_unlock (&iface->as->lock); + return; +} + +/* Allocate the shared memory interface. This is called before we have + multiple images. */ + +void +alloc_iface_init (alloc_iface *iface, shared_memory *mem) +{ + + iface->as = SHARED_MEMORY_RAW_ALLOC_PTR (mem, alloc_iface_shared); + iface->mem = mem; + initialize_shared_mutex (&iface->as->lock); + allocator_init (&iface->alloc, &iface->as->allocator_s, mem); + hashmap_init (&iface->hm, &iface->as->hms, &iface->alloc, mem); +} + +allocator * +get_allocator (alloc_iface * iface) +{ + return &iface->alloc; +} diff --git a/libgfortran/nca/alloc.h b/libgfortran/nca/alloc.h new file mode 100644 index 0000000..f65121c --- /dev/null +++ b/libgfortran/nca/alloc.h @@ -0,0 +1,67 @@ +/* Copyright (C) 2020 Free Software Foundation, Inc. + Contributed by Nicolas Koenig + +This file is part of the GNU Fortran Native Coarray Library (libnca). + +Libnca is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +Libnca is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +<http://www.gnu.org/licenses/>. */ + +#ifndef ALLOC_H +#define ALLOC_H + +#include "allocator.h" +#include "hashmap.h" + +/* High-level interface for shared memory allocation. */ + +/* This part of the alloc interface goes into shared memory. */ + +typedef struct alloc_iface_shared +{ + allocator_shared allocator_s; + hashmap_shared hms; + pthread_mutex_t lock; +} alloc_iface_shared; + +/* This is the local part. */ + +typedef struct alloc_iface +{ + alloc_iface_shared *as; + shared_memory *mem; + allocator alloc; + hashmap hm; +} alloc_iface; + +void *get_memory_by_id (alloc_iface *, size_t, memid); +internal_proto (get_memory_by_id); + +void *get_memory_by_id_zero (alloc_iface *, size_t, memid); +internal_proto (get_memory_by_id_zero); + +void free_memory_with_id (alloc_iface *, memid); +internal_proto (free_memory_with_id); + +void alloc_iface_init (alloc_iface *, shared_memory *); +internal_proto (alloc_iface_init); + +allocator *get_allocator (alloc_iface *); +internal_proto (get_allocator); + +#endif diff --git a/libgfortran/nca/allocator.c b/libgfortran/nca/allocator.c new file mode 100644 index 0000000..e7aa9fd --- /dev/null +++ b/libgfortran/nca/allocator.c @@ -0,0 +1,90 @@ +/* Copyright (C) 2020 Free Software Foundation, Inc. + Contributed by Nicolas Koenig + +This file is part of the GNU Fortran Native Coarray Library (libnca). + +Libnca is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +Libnca is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +<http://www.gnu.org/licenses/>. */ + +/* A malloc() - and free() - like interface, but for shared memory + pointers, except that we pass the size to free as well. */ + +#include "libgfortran.h" +#include "shared_memory.h" +#include "allocator.h" + +typedef struct { + shared_mem_ptr next; +} bucket; + +/* Initialize the allocator. */ + +void +allocator_init (allocator *a, allocator_shared *s, shared_memory *sm) +{ + a->s = s; + a->shm = sm; + for (int i = 0; i < PTR_BITS; i++) + s->free_bucket_head[i] = SHMPTR_NULL; +} + +/* Main allocation routine, works like malloc. Round up allocations + to the next power of two and keep free lists in buckets. */ + +#define MAX_ALIGN 16 + +shared_mem_ptr +shared_malloc (allocator *a, size_t size) +{ + shared_mem_ptr ret; + size_t sz; + size_t act_size; + int bucket_list_index; + + sz = next_power_of_two (size); + act_size = sz > sizeof (bucket) ? sz : sizeof (bucket); + bucket_list_index = __builtin_clzl(act_size); + + if (SHMPTR_IS_NULL (a->s->free_bucket_head[bucket_list_index])) + return shared_memory_get_mem_with_alignment (a->shm, act_size, MAX_ALIGN); + + ret = a->s->free_bucket_head[bucket_list_index]; + a->s->free_bucket_head[bucket_list_index] + = (SHMPTR_AS (bucket *, ret, a->shm)->next); + assert(ret.offset != 0); + return ret; +} + +/* Free memory. */ + +void +shared_free (allocator *a, shared_mem_ptr p, size_t size) { + bucket *b; + size_t sz; + int bucket_list_index; + size_t act_size; + + sz = next_power_of_two (size); + act_size = sz > sizeof (bucket) ? sz : sizeof (bucket); + bucket_list_index = __builtin_clzl(act_size); + + b = SHMPTR_AS(bucket *, p, a->shm); + b->next = a->s->free_bucket_head[bucket_list_index]; + a->s->free_bucket_head[bucket_list_index] = p; +} diff --git a/libgfortran/nca/allocator.h b/libgfortran/nca/allocator.h new file mode 100644 index 0000000..306022a --- /dev/null +++ b/libgfortran/nca/allocator.h @@ -0,0 +1,21 @@ +#ifndef SHARED_ALLOCATOR_HDR +#define SHARED_ALLOCATOR_HDR + +#include "util.h" +#include "shared_memory.h" + +typedef struct { + shared_mem_ptr free_bucket_head[PTR_BITS]; +} allocator_shared; + +typedef struct { + allocator_shared *s; + shared_memory *shm; +} allocator; + +void allocator_init (allocator *, allocator_shared *, shared_memory *); + +shared_mem_ptr shared_malloc (allocator *, size_t size); +void shared_free (allocator *, shared_mem_ptr, size_t size); + +#endif diff --git a/libgfortran/nca/coarraynative.c b/libgfortran/nca/coarraynative.c new file mode 100644 index 0000000..c9d13ee --- /dev/null +++ b/libgfortran/nca/coarraynative.c @@ -0,0 +1,145 @@ +/* Copyright (C) 2019-2020 Free Software Foundation, Inc. + Contributed by Nicolas Koenig + +This file is part of the GNU Fortran Native Coarray Library (libnca). + +Libnca is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +Libnca is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +<http://www.gnu.org/licenses/>. */ + + +#include "libgfortran.h" +#include "libcoarraynative.h" +#include "allocator.h" +#include "hashmap.h" +#include "util.h" +#include "lock.h" +#include "collective_subroutine.h" + +#include <unistd.h> +#include <sys/mman.h> +// #include <stdlib.h> +#include <sys/wait.h> + +#define GFORTRAN_ENV_NUM_IMAGES "GFORTRAN_NUM_IMAGES" + +nca_local_data *local = NULL; + +image this_image; + +static int +get_environ_image_num (void) +{ + char *num_images_char; + int nimages; + num_images_char = getenv (GFORTRAN_ENV_NUM_IMAGES); + if (!num_images_char) + return sysconf (_SC_NPROCESSORS_ONLN); /* TODO: Make portable. */ + /* TODO: Error checking. */ + nimages = atoi (num_images_char); + return nimages; +} + +void +ensure_initialization(void) +{ + if (local) + return; + + local = malloc(sizeof(nca_local_data)); // Is malloc already init'ed at that + // point? Maybe use mmap(MAP_ANON) + // instead + pagesize = sysconf (_SC_PAGE_SIZE); + local->num_images = get_environ_image_num (); + shared_memory_init (&local->sm); + shared_memory_prepare (&local->sm); + alloc_iface_init (&local->ai, &local->sm); + collsub_iface_init (&local->ci, &local->ai, &local->sm); + sync_iface_init (&local->si, &local->ai, &local->sm); +} + +static void __attribute__((noreturn)) +image_main_wrapper (void (*image_main) (void), image *this) +{ + this_image = *this; + + sync_all(&local->si); + + image_main (); + + exit (0); +} + +static master * +get_master (void) { + master *m; + m = SHMPTR_AS (master *, + shared_memory_get_mem_with_alignment + (&local->sm, + sizeof (master) + sizeof(image_status) * local->num_images, + __alignof__(master)), &local->sm); + m->has_failed_image = 0; + return m; +} + +/* This is called from main, with a pointer to the user's program as + argument. It forks the images and waits for their completion. */ + +void +nca_master (void (*image_main) (void)) { + master *m; + int i, j; + pid_t new; + image im; + int exit_code = 0; + int chstatus; + ensure_initialization(); + m = get_master(); + + im.m = m; + + for (im.image_num = 0; im.image_num < local->num_images; im.image_num++) + { + if ((new = fork())) + { + if (new == -1) + { + dprintf(2, "error spawning child\n"); + exit_code = 1; + } + m->images[im.image_num].pid = new; + m->images[im.image_num].status = IMAGE_OK; + } + else + image_main_wrapper(image_main, &im); + } + for (i = 0; i < local->num_images; i++) + { + new = wait (&chstatus); + if (!WIFEXITED (chstatus) || WEXITSTATUS (chstatus)) + { + j = 0; + for (; j < local->num_images && m->images[j].pid != new; j++); + m->images[j].status = IMAGE_FAILED; + m->has_failed_image++; //FIXME: Needs to be atomic, probably + dprintf (2, "ERROR: Image %d(%#x) failed\n", j, new); + exit_code = 1; + } + } + exit (exit_code); +} diff --git a/libgfortran/nca/collective_inline.h b/libgfortran/nca/collective_inline.h new file mode 100644 index 0000000..4e7107b --- /dev/null +++ b/libgfortran/nca/collective_inline.h @@ -0,0 +1,42 @@ +#include "collective_subroutine.h" + +static inline void +finish_collective_subroutine (collsub_iface *ci) +{ + collsub_sync (ci); +} + +#if 0 +static inline void * +get_obj_ptr (void *buffer, int image) +{ + return (char *) + curr_size * image; +} + +/* If obj is NULL, copy the object from the entry in this image. */ +static inline void +copy_to (void *buffer, void *obj, int image) +{ + if (obj == 0) + obj = get_obj_ptr (this_image.image_num); + memcpy (get_obj_ptr (image), obj, curr_size); +} + +static inline void +copy_out (void *buffer, void *obj, int image) +{ + memcpy (obj, get_obj_ptr (image), curr_size); +} + +static inline void +copy_from (void *buffer, int image) +{ + copy_out (get_obj_ptr (this_image.image_num), image); +} + +static inline void +copy_in (void *buffer, void *obj) +{ + copy_to (obj, this_image.image_num); +} +#endif diff --git a/libgfortran/nca/collective_subroutine.c b/libgfortran/nca/collective_subroutine.c new file mode 100644 index 0000000..8a8a7d6 --- /dev/null +++ b/libgfortran/nca/collective_subroutine.c @@ -0,0 +1,416 @@ +/* Copyright (C) 2020 Free Software Foundation, Inc. + Contributed by Nicolas Koenig + +This file is part of the GNU Fortran Native Coarray Library (libnca). + +Libnca is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +Libnca is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +<http://www.gnu.org/licenses/>. */ + +#include <string.h> +#include "libgfortran.h" +#include "libcoarraynative.h" +#include "collective_subroutine.h" +#include "collective_inline.h" +#include "allocator.h" + +void * +get_collsub_buf (collsub_iface *ci, size_t size) +{ + void *ret; + + pthread_mutex_lock (&ci->s->mutex); + if (size > ci->s->curr_size) + { + shared_free (ci->a, ci->s->collsub_buf, ci->s->curr_size); + ci->s->collsub_buf = shared_malloc (ci->a, size); + ci->s->curr_size = size; + } + + ret = SHMPTR_AS (void *, ci->s->collsub_buf, ci->sm); + pthread_mutex_unlock (&ci->s->mutex); + return ret; +} + +/* It appears as if glibc's barrier implementation does not spin (at + least that is what I got from a quick glance at the source code), + so performance would be improved quite a bit if we spun a few times + here so we don't run into the futex syscall. */ + +void +collsub_sync (collsub_iface *ci) +{ + //dprintf (2, "Calling collsub_sync %d times\n", ++called); + pthread_barrier_wait (&ci->s->barrier); +} + +/* assign_function is needed since we only know how to assign the type inside + the compiler. It should be implemented as follows: + + void assign_function (void *a, void *b) + { + *((t *) a) = reduction_operation ((t *) a, (t *) b); + } + + */ + +void +collsub_reduce_array (collsub_iface *ci, gfc_array_char *desc, int *result_image, + void (*assign_function) (void *, void *)) +{ + void *buffer; + pack_info pi; + bool packed; + int cbit = 0; + int imoffset; + index_type elem_size; + index_type this_image_size_bytes; + char *this_image_buf; + + packed = pack_array_prepare (&pi, desc); + if (pi.num_elem == 0) + return; + + elem_size = GFC_DESCRIPTOR_SIZE (desc); + this_image_size_bytes = elem_size * pi.num_elem; + + buffer = get_collsub_buf (ci, this_image_size_bytes * local->num_images); + this_image_buf = buffer + this_image_size_bytes * this_image.image_num; + + if (packed) + memcpy (this_image_buf, GFC_DESCRIPTOR_DATA (desc), this_image_size_bytes); + else + pack_array_finish (&pi, desc, this_image_buf); + + collsub_sync (ci); + for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++) + { + imoffset = 1 << cbit; + if (this_image.image_num + imoffset < local->num_images) + /* Reduce arrays elementwise. */ + for (size_t i = 0; i < pi.num_elem; i++) + assign_function (this_image_buf + elem_size * i, + this_image_buf + this_image_size_bytes * imoffset + elem_size * i); + + collsub_sync (ci); + } + for ( ; (local->num_images >> cbit) != 0; cbit++) + collsub_sync (ci); + + if (!result_image || *result_image == this_image.image_num) + { + if (packed) + memcpy (GFC_DESCRIPTOR_DATA (desc), buffer, this_image_size_bytes); + else + unpack_array_finish(&pi, desc, buffer); + } + + finish_collective_subroutine (ci); +} + +void +collsub_reduce_scalar (collsub_iface *ci, void *obj, index_type elem_size, + int *result_image, + void (*assign_function) (void *, void *)) +{ + void *buffer; + int cbit = 0; + int imoffset; + char *this_image_buf; + + buffer = get_collsub_buf (ci, elem_size * local->num_images); + this_image_buf = buffer + elem_size * this_image.image_num; + + memcpy (this_image_buf, obj, elem_size); + + collsub_sync (ci); + for (; ((this_image.image_num >> cbit) & 1) == 0 && (local->num_images >> cbit) != 0; cbit++) + { + imoffset = 1 << cbit; + if (this_image.image_num + imoffset < local->num_images) + /* Reduce arrays elementwise. */ + assign_function (this_image_buf, this_image_buf + elem_size*imoffset); + + collsub_sync (ci); + } + for ( ; (local->num_images >> cbit) != 0; cbit++) + collsub_sync (ci); + + if (!result_image || *result_image == this_image.image_num) + memcpy(obj, buffer, elem_size); + + finish_collective_subroutine (ci); +} + +/* Do not use sync_all(), because the program should deadlock in the case that + * some images are on a sync_all barrier while others are in a collective + * subroutine. */ + +void +collsub_iface_init (collsub_iface *ci, alloc_iface *ai, shared_memory *sm) +{ + pthread_barrierattr_t attr; + shared_mem_ptr p; + ci->s = SHARED_MEMORY_RAW_ALLOC_PTR(sm, collsub_iface_shared); + + ci->s->collsub_buf = shared_malloc(get_allocator(ai), sizeof(double)*local->num_images); + ci->s->curr_size = sizeof(double)*local->num_images; + ci->sm = sm; + ci->a = get_allocator(ai); + + pthread_barrierattr_init (&attr); + pthread_barrierattr_setpshared (&attr, PTHREAD_PROCESS_SHARED); + pthread_barrier_init (&ci->s->barrier, &attr, local->num_images); + pthread_barrierattr_destroy(&attr); + + initialize_shared_mutex (&ci->s->mutex); +} + +void +collsub_broadcast_scalar (collsub_iface *ci, void *obj, index_type elem_size, + int source_image /* Adjusted in the wrapper. */) +{ + void *buffer; + + buffer = get_collsub_buf (ci, elem_size); + + dprintf(2, "Source image: %d\n", source_image); + + if (source_image == this_image.image_num) + { + memcpy (buffer, obj, elem_size); + collsub_sync (ci); + } + else + { + collsub_sync (ci); + memcpy (obj, buffer, elem_size); + } + + finish_collective_subroutine (ci); +} + +void +collsub_broadcast_array (collsub_iface *ci, gfc_array_char *desc, + int source_image) +{ + void *buffer; + pack_info pi; + bool packed; + index_type elem_size; + index_type size_bytes; + char *this_image_buf; + + packed = pack_array_prepare (&pi, desc); + if (pi.num_elem == 0) + return; + + elem_size = GFC_DESCRIPTOR_SIZE (desc); + size_bytes = elem_size * pi.num_elem; + + buffer = get_collsub_buf (ci, size_bytes); + + if (source_image == this_image.image_num) + { + if (packed) + memcpy (buffer, GFC_DESCRIPTOR_DATA (desc), size_bytes); + else + pack_array_finish (&pi, desc, buffer); + collsub_sync (ci); + } + else + { + collsub_sync (ci); + if (packed) + memcpy (GFC_DESCRIPTOR_DATA (desc), buffer, size_bytes); + else + unpack_array_finish(&pi, desc, buffer); + } + + finish_collective_subroutine (ci); +} + +#if 0 + +void nca_co_broadcast (gfc_array_char *, int, int*, char *, size_t); +export_proto (nca_co_broadcast); + +void +nca_co_broadcast (gfc_array_char * restrict a, int source_image, + int *stat, char *errmsg __attribute__ ((unused)), + size_t errmsg_len __attribute__ ((unused))) +{ + index_type count[GFC_MAX_DIMENSIONS]; + index_type stride[GFC_MAX_DIMENSIONS]; + index_type extent[GFC_MAX_DIMENSIONS]; + index_type type_size; + index_type dim; + index_type span; + bool packed, empty; + index_type num_elems; + index_type ssize, ssize_bytes; + char *this_shared_ptr, *other_shared_ptr; + + if (stat) + *stat = 0; + + dim = GFC_DESCRIPTOR_RANK (a); + type_size = GFC_DESCRIPTOR_SIZE (a); + + /* Source image, gather. */ + if (source_image - 1 == image_num) + { + num_elems = 1; + if (dim > 0) + { + span = a->span != 0 ? a->span : type_size; + packed = true; + empty = false; + for (index_type n = 0; n < dim; n++) + { + count[n] = 0; + stride[n] = GFC_DESCRIPTOR_STRIDE (a, n) * span; + extent[n] = GFC_DESCRIPTOR_EXTENT (a, n); + + empty = empty || extent[n] <= 0; + + if (num_elems != GFC_DESCRIPTOR_STRIDE (a, n)) + packed = false; + + num_elems *= extent[n]; + } + ssize_bytes = num_elems * type_size; + } + else + { + ssize_bytes = type_size; + packed = true; + empty = false; + } + + prepare_collective_subroutine (ssize_bytes); // broadcast barrier 1 + this_shared_ptr = get_obj_ptr (image_num); + if (packed) + memcpy (this_shared_ptr, a->base_addr, ssize_bytes); + else + { + char *src = (char *) a->base_addr; + char * restrict dest = this_shared_ptr; + index_type stride0 = stride[0]; + + while (src) + { + /* Copy the data. */ + + memcpy (dest, src, type_size); + dest += type_size; + src += stride0; + count[0] ++; + /* Advance to the next source element. */ + for (index_type n = 0; count[n] == extent[n] ; ) + { + /* When we get to the end of a dimension, reset it + and increment the next dimension. */ + count[n] = 0; + src -= stride[n] * extent[n]; + n++; + if (n == dim) + { + src = NULL; + break; + } + else + { + count[n]++; + src += stride[n]; + } + } + } + } + collsub_sync (ci); /* Broadcast barrier 2. */ + } + else /* Target image, scatter. */ + { + collsub_sync (ci); /* Broadcast barrier 1. */ + packed = 1; + num_elems = 1; + span = a->span != 0 ? a->span : type_size; + + for (index_type n = 0; n < dim; n++) + { + index_type stride_n; + count[n] = 0; + stride_n = GFC_DESCRIPTOR_STRIDE (a, n); + stride[n] = stride_n * type_size; + extent[n] = GFC_DESCRIPTOR_EXTENT (a, n); + if (extent[n] <= 0) + { + packed = true; + num_elems = 0; + break; + } + if (num_elems != stride_n) + packed = false; + + num_elems *= extent[n]; + } + ssize = num_elems * type_size; + prepare_collective_subroutine (ssize); /* Broadcaset barrier 2. */ + other_shared_ptr = get_obj_ptr (source_image - 1); + if (packed) + memcpy (a->base_addr, other_shared_ptr, ssize); + else + { + char *src = other_shared_ptr; + char * restrict dest = (char *) a->base_addr; + index_type stride0 = stride[0]; + + for (index_type n = 0; n < dim; n++) + count[n] = 0; + + while (dest) + { + memcpy (dest, src, type_size); + src += span; + dest += stride0; + count[0] ++; + for (index_type n = 0; count[n] == extent[n] ;) + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + count[n] = 0; + dest -= stride[n] * extent[n]; + n++; + if (n == dim) + { + dest = NULL; + break; + } + else + { + count[n]++; + dest += stride[n]; + } + } + } + } + } + finish_collective_subroutine (ci); /* Broadcast barrier 3. */ +} + +#endif diff --git a/libgfortran/nca/collective_subroutine.h b/libgfortran/nca/collective_subroutine.h new file mode 100644 index 0000000..6147dd6 --- /dev/null +++ b/libgfortran/nca/collective_subroutine.h @@ -0,0 +1,44 @@ + +#ifndef COLLECTIVE_SUBROUTINE_HDR +#define COLLECTIVE_SUBROUTINE_HDR + +#include "shared_memory.h" + +typedef struct collsub_iface_shared +{ + size_t curr_size; + shared_mem_ptr collsub_buf; + pthread_barrier_t barrier; + pthread_mutex_t mutex; +} collsub_iface_shared; + +typedef struct collsub_iface +{ + collsub_iface_shared *s; + allocator *a; + shared_memory *sm; +} collsub_iface; + +void collsub_broadcast_scalar (collsub_iface *, void *, index_type, int); +internal_proto (collsub_broadcast_scalar); + +void collsub_broadcast_array (collsub_iface *, gfc_array_char *, int); +internal_proto (collsub_broadcast_array); + +void collsub_reduce_array (collsub_iface *, gfc_array_char *, int *, + void (*) (void *, void *)); +internal_proto (collsub_reduce_array); + +void collsub_reduce_scalar (collsub_iface *, void *, index_type, int *, + void (*) (void *, void *)); +internal_proto (collsub_reduce_scalar); + +void collsub_sync (collsub_iface *); +internal_proto (collsub_sync); + +void collsub_iface_init (collsub_iface *, alloc_iface *, shared_memory *); +internal_proto (collsub_iface_init); + +void * get_collsub_buf (collsub_iface *ci, size_t size); +internal_proto (get_collsub_buf); +#endif diff --git a/libgfortran/nca/hashmap.c b/libgfortran/nca/hashmap.c new file mode 100644 index 0000000..61f5487 --- /dev/null +++ b/libgfortran/nca/hashmap.c @@ -0,0 +1,447 @@ +/* Copyright (C) 2020 Free Software Foundation, Inc. + Contributed by Nicolas Koenig + +This file is part of the GNU Fortran Native Coarray Library (libnca). + +Libnca is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +Libnca is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +<http://www.gnu.org/licenses/>. */ + +#include "libgfortran.h" +#include "hashmap.h" +#include <string.h> + +#define INITIAL_BITNUM (5) +#define INITIAL_SIZE (1<<INITIAL_BITNUM) +#define CRITICAL_LOOKAHEAD (16) + +static ssize_t n_ent; + +typedef struct { + memid id; + shared_mem_ptr p; /* If p == SHMPTR_NULL, the entry is empty. */ + size_t s; + int max_lookahead; + int refcnt; +} hashmap_entry; + +static ssize_t +num_entries (hashmap_entry *data, size_t size) +{ + ssize_t i; + ssize_t ret = 0; + for (i = 0; i < size; i++) + { + if (!SHMPTR_IS_NULL (data[i].p)) + ret ++; + } + return ret; +} + +/* 64 bit to 64 bit hash function. */ + +/* +static inline uint64_t +hash (uint64_t x) +{ + return x * 11400714819323198485lu; +} +*/ + +#define ASSERT_HM(hm, cond) assert_hashmap(hm, cond, #cond) + +static void +assert_hashmap(hashmap *hm, bool asserted, const char *cond) +{ + if (!asserted) + { + dprintf(2, cond); + dump_hm(hm); + } + assert(asserted); +} + +static inline uint64_t +hash (uint64_t key) +{ + key ^= (key >> 30); + key *= 0xbf58476d1ce4e5b9ul; + key ^= (key >> 27); + key *= 0x94d049bb133111ebul; + key ^= (key >> 31); + + return key; +} + +/* Gets a pointer to the current data in the hashmap. */ +static inline hashmap_entry * +get_data(hashmap *hm) +{ + return SHMPTR_AS (hashmap_entry *, hm->s->data, hm->sm); +} + +/* Generate mask from current number of bits. */ + +static inline intptr_t +gen_mask (hashmap *hm) +{ + return (1 << hm->s->bitnum) - 1; +} + +/* Add with wrap-around at hashmap size. */ + +static inline size_t +hmiadd (hashmap *hm, size_t s, ssize_t o) { + return (s + o) & gen_mask (hm); +} + +/* Get the expected offset for entry id. */ + +static inline ssize_t +get_expected_offset (hashmap *hm, memid id) +{ + return hash(id) >> (PTR_BITS - hm->s->bitnum); +} + +/* Initialize the hashmap. */ + +void +hashmap_init (hashmap *hm, hashmap_shared *hs, allocator *a, + shared_memory *mem) +{ + hashmap_entry *data; + hm->s = hs; + hm->sm = mem; + hm->s->data = shared_malloc (a, INITIAL_SIZE * sizeof(hashmap_entry)); + data = get_data (hm); + memset(data, '\0', INITIAL_SIZE*sizeof(hashmap_entry)); + + for (int i = 0; i < INITIAL_SIZE; i++) + data[i].p = SHMPTR_NULL; + + hm->s->size = INITIAL_SIZE; + hm->s->bitnum = INITIAL_BITNUM; + hm->a = a; +} + +/* This checks if the entry id exists in that range the range between + the expected position and the maximum lookahead. */ + +static ssize_t +scan_inside_lookahead (hashmap *hm, ssize_t expected_off, memid id) +{ + ssize_t lookahead; + hashmap_entry *data; + + data = get_data (hm); + lookahead = data[expected_off].max_lookahead; + ASSERT_HM (hm, lookahead < CRITICAL_LOOKAHEAD); + + for (int i = 0; i <= lookahead; i++) /* For performance, this could + iterate backwards. */ + if (data[hmiadd (hm, expected_off, i)].id == id) + return hmiadd (hm, expected_off, i); + + return -1; +} + +/* Scan for the next empty slot we can use. Returns offset relative + to the expected position. */ + +static ssize_t +scan_empty (hashmap *hm, ssize_t expected_off, memid id) +{ + hashmap_entry *data; + + data = get_data(hm); + for (int i = 0; i < CRITICAL_LOOKAHEAD; i++) + if (SHMPTR_IS_NULL (data[hmiadd (hm, expected_off, i)].p)) + return i; + + return -1; +} + +/* Search the hashmap for id. */ + +hashmap_search_result +hashmap_get (hashmap *hm, memid id) +{ + hashmap_search_result ret; + hashmap_entry *data; + size_t expected_offset; + ssize_t res; + + data = get_data (hm); + expected_offset = get_expected_offset (hm, id); + res = scan_inside_lookahead (hm, expected_offset, id); + + if (res != -1) + ret = ((hashmap_search_result) + { .p = data[res].p, .size=data[res].s, .res_offset = res }); + else + ret.p = SHMPTR_NULL; + + return ret; +} + +/* Return size of a hashmap search result. */ +size_t +hm_search_result_size (hashmap_search_result *res) +{ + return res->size; +} + +/* Return pointer of a hashmap search result. */ + +shared_mem_ptr +hm_search_result_ptr (hashmap_search_result *res) +{ + return res->p; +} + +/* Return pointer of a hashmap search result. */ + +bool +hm_search_result_contains (hashmap_search_result *res) +{ + return !SHMPTR_IS_NULL(res->p); +} + +/* Enlarge hashmap memory. */ + +static void +enlarge_hashmap_mem (hashmap *hm, hashmap_entry **data, bool f) +{ + shared_mem_ptr old_data_p; + size_t old_size; + + old_data_p = hm->s->data; + old_size = hm->s->size; + + hm->s->data = shared_malloc (hm->a, (hm->s->size *= 2)*sizeof(hashmap_entry)); + fprintf (stderr,"enlarge_hashmap_mem: %ld\n", hm->s->data.offset); + hm->s->bitnum++; + + *data = get_data(hm); + for (size_t i = 0; i < hm->s->size; i++) + (*data)[i] = ((hashmap_entry) { .id = 0, .p = SHMPTR_NULL, .s=0, + .max_lookahead = 0, .refcnt=0 }); + + if (f) + shared_free(hm->a, old_data_p, old_size); +} + +/* Resize hashmap. */ + +static void +resize_hm (hashmap *hm, hashmap_entry **data) +{ + shared_mem_ptr old_data_p; + hashmap_entry *old_data, *new_data; + size_t old_size; + ssize_t new_offset, inital_index, new_index; + memid id; + ssize_t max_lookahead; + ssize_t old_count, new_count; + + /* old_data points to the old block containing the hashmap. We + redistribute the data from there into the new block. */ + + old_data_p = hm->s->data; + old_data = *data; + old_size = hm->s->size; + old_count = num_entries (old_data, old_size); + + fprintf(stderr, "Occupancy at resize: %f\n", ((double) old_count)/old_size); + + //fprintf (stderr,"\n====== Resizing hashmap =========\n\nOld map:\n\n"); + //dump_hm (hm); + enlarge_hashmap_mem (hm, &new_data, false); + //fprintf (stderr,"old_data: %p new_data: %p\n", old_data, new_data); + retry_resize: + for (size_t i = 0; i < old_size; i++) + { + if (SHMPTR_IS_NULL (old_data[i].p)) + continue; + + id = old_data[i].id; + inital_index = get_expected_offset (hm, id); + new_offset = scan_empty (hm, inital_index, id); + + /* If we didn't find a free slot, just resize the hashmap + again. */ + if (new_offset == -1) + { + enlarge_hashmap_mem (hm, &new_data, true); + //fprintf (stderr,"\n====== AGAIN Resizing hashmap =========\n\n"); + //fprintf (stderr,"old_data: %p new_data %p\n", old_data, new_data); + goto retry_resize; /* Sue me. */ + } + + ASSERT_HM (hm, new_offset < CRITICAL_LOOKAHEAD); + new_index = hmiadd (hm, inital_index, new_offset); + max_lookahead = new_data[inital_index].max_lookahead; + new_data[inital_index].max_lookahead + = new_offset > max_lookahead ? new_offset : max_lookahead; + + new_data[new_index] = ((hashmap_entry) {.id = id, .p = old_data[i].p, + .s = old_data[i].s, + .max_lookahead = new_data[new_index].max_lookahead, + .refcnt = old_data[i].refcnt}); + } + new_count = num_entries (new_data, hm->s->size); + //fprintf (stderr,"Number of elements: %ld to %ld\n", old_count, new_count); + //fprintf (stderr,"============ After resizing: =======\n\n"); + //dump_hm (hm); + + shared_free (hm->a, old_data_p, old_size); + *data = new_data; +} + +/* Set an entry in the hashmap. */ + +void +hashmap_set (hashmap *hm, memid id, hashmap_search_result *hsr, + shared_mem_ptr p, size_t size) +{ + hashmap_entry *data; + ssize_t expected_offset, lookahead; + ssize_t empty_offset; + ssize_t delta; + + // //fprintf (stderr,"hashmap_set: id = %-16p\n", (void *) id); + data = get_data(hm); + + if (hsr) { + data[hsr->res_offset].s = size; + data[hsr->res_offset].p = p; + return; + } + + expected_offset = get_expected_offset (hm, id); + while ((delta = scan_empty (hm, expected_offset, id)) == -1) + { + resize_hm (hm, &data); + expected_offset = get_expected_offset (hm, id); + } + + empty_offset = hmiadd (hm, expected_offset, delta); + lookahead = data[expected_offset].max_lookahead; + data[expected_offset].max_lookahead = delta > lookahead ? delta : lookahead; + data[empty_offset] = ((hashmap_entry) {.id = id, .p = p, .s = size, + .max_lookahead = data[empty_offset].max_lookahead, + .refcnt = 1}); + + n_ent ++; + fprintf (stderr,"hashmap_set: Setting %p at %p, n_ent = %ld\n", (void *) id, data + empty_offset, + n_ent); + // dump_hm (hm); + // fprintf(stderr, "--------------------------------------------------\n"); + /* TODO: Shouldn't reset refcnt, but this doesn't matter at the + moment because of the way the function is used. */ +} + +/* Change the refcount of a hashmap entry. */ + +static int +hashmap_change_refcnt (hashmap *hm, memid id, hashmap_search_result *res, + int delta) +{ + hashmap_entry *data; + hashmap_search_result r; + hashmap_search_result *pr; + int ret; + hashmap_entry *entry; + + data = get_data (hm); + + if (res) + pr = res; + else + { + r = hashmap_get (hm, id); + pr = &r; + } + + entry = &data[pr->res_offset]; + ret = (entry->refcnt += delta); + if (ret == 0) + { + n_ent --; + //fprintf (stderr, "hashmap_change_refcnt: removing %p at %p, n_ent = %ld\n", + // (void *) id, entry, n_ent); + entry->id = 0; + entry->p = SHMPTR_NULL; + entry->s = 0; + } + + return ret; +} + +/* Increase hashmap entry refcount. */ + +void +hashmap_inc (hashmap *hm, memid id, hashmap_search_result * res) +{ + int ret; + ret = hashmap_change_refcnt (hm, id, res, 1); + ASSERT_HM (hm, ret > 0); +} + +/* Decrease hashmap entry refcount. */ + +int +hashmap_dec (hashmap *hm, memid id, hashmap_search_result * res) +{ + int ret; + ret = hashmap_change_refcnt (hm, id, res, -1); + ASSERT_HM (hm, ret >= 0); + return ret; +} + +#define PE(str, ...) fprintf(stderr, INDENT str, ##__VA_ARGS__) +#define INDENT "" + +void +dump_hm(hashmap *hm) { + hashmap_entry *data; + size_t exp; + size_t occ_num = 0; + PE("h %p (size: %lu, bitnum: %d)\n", hm, hm->s->size, hm->s->bitnum); + data = get_data (hm); + fprintf (stderr,"offset = %lx data = %p\n", (unsigned long) hm->s->data.offset, data); + +#undef INDENT +#define INDENT " " + for (size_t i = 0; i < hm->s->size; i++) { + exp = get_expected_offset(hm, data[i].id); + if (!SHMPTR_IS_NULL(data[i].p)) { + PE("%2lu. (exp: %2lu w la %d) id %#-16lx p %#-14lx s %-7lu -- la %u ref %u %-16p\n", + i, exp, data[exp].max_lookahead, data[i].id, data[i].p.offset, data[i].s, + data[i].max_lookahead, data[i].refcnt, data + i); + occ_num++; + } + else + PE("%2lu. empty -- la %u %p\n", i, data[i].max_lookahead, + data + i); + + } +#undef INDENT +#define INDENT "" + PE("occupancy: %lu %f\n", occ_num, ((double) occ_num)/hm->s->size); +} diff --git a/libgfortran/nca/hashmap.h b/libgfortran/nca/hashmap.h new file mode 100644 index 0000000..4d999e3 --- /dev/null +++ b/libgfortran/nca/hashmap.h @@ -0,0 +1,70 @@ +#ifndef HASHMAP_H + +#include "shared_memory.h" +#include "allocator.h" + +#include <stdint.h> +#include <stddef.h> + + +/* Data structures and variables: + + memid is a unique identifier for the coarray, the address of its + descriptor (which is unique in the program). */ +typedef intptr_t memid; + +typedef struct { + shared_mem_ptr data; + size_t size; + int bitnum; +} hashmap_shared; + +typedef struct hashmap +{ + hashmap_shared *s; + shared_memory *sm; + allocator *a; +} hashmap; + +typedef struct { + shared_mem_ptr p; + size_t size; + ssize_t res_offset; +} hashmap_search_result; + +void hashmap_init (hashmap *, hashmap_shared *, allocator *a, shared_memory *); + +/* Look up memid in the hashmap. The result can be inspected via the + hm_search_result_* functions. */ + +hashmap_search_result hashmap_get (hashmap *, memid); + +/* Given a search result, returns the size. */ +size_t hm_search_result_size (hashmap_search_result *); + +/* Given a search result, returns the pointer. */ +shared_mem_ptr hm_search_result_ptr (hashmap_search_result *); + +/* Given a search result, returns whether something was found. */ +bool hm_search_result_contains (hashmap_search_result *); + +/* Sets the hashmap entry for memid to shared_mem_ptr and + size_t. Optionally, if a hashmap_search_result is supplied, it is + used to make the lookup faster. */ + +void hashmap_set (hashmap *, memid, hashmap_search_result *, shared_mem_ptr p, + size_t); + +/* Increments the hashmap entry for memid. Optionally, if a + hashmap_search_result is supplied, it is used to make the lookup + faster. */ + +void hashmap_inc (hashmap *, memid, hashmap_search_result *); + +/* Same, but decrement. */ +int hashmap_dec (hashmap *, memid, hashmap_search_result *); + +void dump_hm (hashmap *hm); + +#define HASHMAP_H +#endif diff --git a/libgfortran/nca/libcoarraynative.h b/libgfortran/nca/libcoarraynative.h new file mode 100644 index 0000000..507de0c --- /dev/null +++ b/libgfortran/nca/libcoarraynative.h @@ -0,0 +1,103 @@ +/* Copyright (C) 2019-2020 Free Software Foundation, Inc. + Contributed by Nicolas Koenig + +This file is part of the GNU Fortran Native Coarray Library (libnca). + +Libnca is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +Libnca is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +<http://www.gnu.org/licenses/>. */ + +#ifndef LIBGFOR_H +#error "Include libgfortran.h before libcoarraynative.h" +#endif + +#ifndef COARRAY_NATIVE_HDR +#define COARRAY_NATIVE_HDR + +#include "libgfortran.h" + +#include <sys/types.h> +#include <stdint.h> +#include <stdio.h> + + +/* This is to create a _nca_gfortrani_ prefix for all variables and + function used only by nca. */ +#if 0 +#define NUM_ADDR_BITS (8 * sizeof (int *)) +#endif + +#define DEBUG_NATIVE_COARRAY 1 + +#ifdef DEBUG_NATIVE_COARRAY +#define DEBUG_PRINTF(...) dprintf (2,__VA_ARGS__) +#else +#define DEBUG_PRINTF(...) do {} while(0) +#endif + +#include "allocator.h" +#include "hashmap.h" +#include "sync.h" +#include "lock.h" +#include "collective_subroutine.h" + +typedef struct { + pthread_barrier_t barrier; + int maximg; +} ipcollsub; + +typedef enum { + IMAGE_UNKNOWN = 0, + IMAGE_OK, + IMAGE_FAILED +} image_status; + +typedef struct { + image_status status; + pid_t pid; +} image_tracker; + +typedef struct { + int has_failed_image; + image_tracker images[]; +} master; + +typedef struct { + int image_num; + master *m; +} image; + +extern image this_image; + +typedef struct { + int num_images; + shared_memory sm; + alloc_iface ai; + collsub_iface ci; + sync_iface si; +} nca_local_data; + +extern nca_local_data *local; +internal_proto (local); +void ensure_initialization(void); +internal_proto(ensure_initialization); + +void nca_master(void (*)(void)); +export_proto (nca_master); + +#endif diff --git a/libgfortran/nca/lock.h b/libgfortran/nca/lock.h new file mode 100644 index 0000000..4697395 --- /dev/null +++ b/libgfortran/nca/lock.h @@ -0,0 +1,37 @@ +/* Copyright (C) 2019-2020 Free Software Foundation, Inc. + Contributed by Nicolas Koenig + +This file is part of the GNU Fortran Native Coarray Library (libnca). + +Libnca is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +Libnca is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +<http://www.gnu.org/licenses/>. */ + + +#ifndef COARRAY_LOCK_HDR +#define COARRAY_LOCK_HDR + +#include <pthread.h> + +typedef struct { + int owner; + int initialized; + pthread_mutex_t arr[]; +} lock_array; + +#endif diff --git a/libgfortran/nca/shared_memory.c b/libgfortran/nca/shared_memory.c new file mode 100644 index 0000000..bc3093d --- /dev/null +++ b/libgfortran/nca/shared_memory.c @@ -0,0 +1,221 @@ +/* Copyright (C) 2020 Free Software Foundation, Inc. + Contributed by Nicolas Koenig + +This file is part of the GNU Fortran Native Coarray Library (libnca). + +Libnca is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +Libnca is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +<http://www.gnu.org/licenses/>. */ + +#include "libgfortran.h" +#include "libcoarraynative.h" +#include "util.h" +#include <sys/mman.h> +#include <unistd.h> +#include <string.h> + +#include "shared_memory.h" + +/* This implements shared memory based on POSIX mmap. We start with + memory block of the size of the global shared memory data, rounded + up to one pagesize, and enlarge as needed. + + We address the memory via a shared_memory_ptr, which is an offset into + the shared memory block. The metadata is situated at offset 0. + + In order to be able to resize the memory and to keep pointers + valid, we keep the old mapping around, so the memory is actually + visible several times to the process. Thus, pointers returned by + shared_memory_get_mem_with_alignment remain valid even when + resizing. */ + +/* Global metadata for shared memory, always kept at offset 0. */ + +typedef struct +{ + size_t size; + size_t used; + int fd; +} global_shared_memory_meta; + +/* Type realization for opaque type shared_memory. */ + +typedef struct shared_memory_act +{ + global_shared_memory_meta *meta; + void *header; + size_t last_seen_size; + + /* We don't need to free these. We probably also don't need to keep + track of them, but it is much more future proof if we do. */ + + size_t num_local_allocs; + + struct local_alloc { + void *base; + size_t size; + } allocs[]; + +} shared_memory_act; + +/* Convenience wrapper for mmap. */ + +static inline void * +map_memory (int fd, size_t size, off_t offset) +{ + void *ret = mmap (NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, offset); + if (ret == MAP_FAILED) + { + perror("mmap failed"); + exit(1); + } + return ret; +} + +/* Returns the size of shared_memory_act. */ + +static inline size_t +get_shared_memory_act_size (int nallocs) +{ + return sizeof(shared_memory_act) + nallocs*sizeof(struct local_alloc); +} + +/* When the shared memory block is enlarged, we need to map it into + virtual memory again. */ + +static inline shared_memory_act * +new_base_mapping (shared_memory_act *mem) +{ + shared_memory_act *newmem; + /* We need another entry in the alloc table. */ + mem->num_local_allocs++; + newmem = realloc (mem, get_shared_memory_act_size (mem->num_local_allocs)); + newmem->allocs[newmem->num_local_allocs - 1] + = ((struct local_alloc) + {.base = map_memory (newmem->meta->fd, newmem->meta->size, 0), + .size = newmem->meta->size}); + newmem->last_seen_size = newmem->meta->size; + return newmem; +} + +/* Return the most recently allocated base pointer. */ + +static inline void * +last_base (shared_memory_act *mem) +{ + return mem->allocs[mem->num_local_allocs - 1].base; +} + +/* Get a pointer into the shared memory block with alignemnt + (works similar to sbrk). */ + +shared_mem_ptr +shared_memory_get_mem_with_alignment (shared_memory_act **pmem, size_t size, + size_t align) +{ + shared_memory_act *mem = *pmem; + size_t new_size; + size_t orig_used; + + /* Offset into memory block with alignment. */ + size_t used_wa = alignto (mem->meta->used, align); + + if (used_wa + size <= mem->meta->size) + { + memset(last_base(mem) + mem->meta->used, 0xCA, used_wa - mem->meta->used); + memset(last_base(mem) + used_wa, 0x42, size); + mem->meta->used = used_wa + size; + + DEBUG_PRINTF ("Shared Memory: New memory of size %#lx requested, returned %#lx\n", size, used_wa); + return (shared_mem_ptr) {.offset = used_wa}; + } + + /* We need to enlarge the memory segment. Double the size if that + is big enough, otherwise get what's needed. */ + + if (mem->meta->size * 2 < used_wa + size) + new_size = mem->meta->size * 2; + else + new_size = round_to_pagesize (used_wa + size); + + orig_used = mem->meta->used; + mem->meta->size = new_size; + mem->meta->used = used_wa + size; + ftruncate (mem->meta->fd, mem->meta->size); + /* This also sets the new base pointer where the shared memory + can be found in the address space. */ + + mem = new_base_mapping (mem); + + *pmem = mem; + assert(used_wa != 0); + + dprintf(2, "Shared Memory: New memory of size %#lx requested, returned %#lx\n", size, used_wa); + memset(last_base(mem) + orig_used, 0xCA, used_wa - orig_used); + memset(last_base(mem) + used_wa, 0x42, size); + + return (shared_mem_ptr) {.offset = used_wa}; +} + +/* If another image changed the size, update the size accordingly. */ + +void +shared_memory_prepare (shared_memory_act **pmem) +{ + shared_memory_act *mem = *pmem; + if (mem->meta->size == mem->last_seen_size) + return; + mem = new_base_mapping(mem); + *pmem = mem; +} + +/* Initialize the memory with one page, the shared metadata of the + shared memory is stored at the beginning. */ + +void +shared_memory_init (shared_memory_act **pmem) +{ + shared_memory_act *mem; + int fd; + size_t initial_size = round_to_pagesize (sizeof (global_shared_memory_meta)); + + mem = malloc (get_shared_memory_act_size(1)); + fd = get_shmem_fd(); + + ftruncate(fd, initial_size); + mem->meta = map_memory (fd, initial_size, 0); + *mem->meta = ((global_shared_memory_meta) {.size = initial_size, + .used = sizeof(global_shared_memory_meta), + .fd = fd}); + mem->last_seen_size = initial_size; + mem->num_local_allocs = 1; + mem->allocs[0] = ((struct local_alloc) {.base = mem->meta, + .size = initial_size}); + + *pmem = mem; +} + +/* Convert a shared memory pointer (i.e. an offset into the shared + memory block) to a pointer. */ + +void * +shared_mem_ptr_to_void_ptr(shared_memory_act **pmem, shared_mem_ptr smp) +{ + return last_base(*pmem) + smp.offset; +} + diff --git a/libgfortran/nca/shared_memory.h b/libgfortran/nca/shared_memory.h new file mode 100644 index 0000000..4adc104 --- /dev/null +++ b/libgfortran/nca/shared_memory.h @@ -0,0 +1,78 @@ +/* Copyright (C) 2020 Free Software Foundation, Inc. + Contributed by Nicolas Koenig + +This file is part of the GNU Fortran Native Coarray Library (libnca). + +Libnca is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +Libnca is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +<http://www.gnu.org/licenses/>. */ + +#ifndef SHARED_MEMORY_H +#include <stdbool.h> +#include <stdint.h> +#include <stddef.h> +#include <sys/types.h> +#include <pthread.h> +#include <stdio.h> +#include <stdlib.h> +#include <assert.h> +#include <limits.h> + +/* A struct to serve as an opaque shared memory object. */ + +struct shared_memory_act; +typedef struct shared_memory_act * shared_memory; + +#define SHMPTR_NULL ((shared_mem_ptr) {.offset = -1}) +#define SHMPTR_IS_NULL(x) (x.offset == -1) + +#define SHMPTR_DEREF(x, s, sm) \ + ((x) = *(__typeof(x) *) shared_mem_ptr_to_void_ptr (sm, s); +#define SHMPTR_AS(t, s, sm) ((t) shared_mem_ptr_to_void_ptr(sm, s)) +#define SHMPTR_SET(v, s, sm) (v = SHMPTR_AS(__typeof(v), s, sm)) +#define SHMPTR_EQUALS(s1, s2) (s1.offset == s2.offset) + +#define SHARED_MEMORY_RAW_ALLOC(mem, t, n) \ + shared_memory_get_mem_with_alignment(mem, sizeof(t)*n, __alignof__(t)) + +#define SHARED_MEMORY_RAW_ALLOC_PTR(mem, t) \ + SHMPTR_AS (t *, SHARED_MEMORY_RAW_ALLOC (mem, t, 1), mem) + +/* A shared-memory pointer is implemented as an offset into the shared + memory region. */ + +typedef struct shared_mem_ptr +{ + ssize_t offset; +} shared_mem_ptr; + +void shared_memory_init (shared_memory *); +internal_proto (shared_memory_init); + +void shared_memory_prepare (shared_memory *); +internal_proto (shared_memory_prepare); + +shared_mem_ptr shared_memory_get_mem_with_alignment (shared_memory *mem, + size_t size, size_t align); +internal_proto (shared_memory_get_mem_with_alignment); + +void *shared_mem_ptr_to_void_ptr (shared_memory *, shared_mem_ptr); +internal_proto (shared_mem_ptr_to_void_ptr); + +#define SHARED_MEMORY_H +#endif diff --git a/libgfortran/nca/sync.c b/libgfortran/nca/sync.c new file mode 100644 index 0000000..6d7f7ca --- /dev/null +++ b/libgfortran/nca/sync.c @@ -0,0 +1,156 @@ +/* Copyright (C) 2019-2020 Free Software Foundation, Inc. + Contributed by Nicolas Koenig + +This file is part of the GNU Fortran Native Coarray Library (libnca). + +Libnca is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +Libnca is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +<http://www.gnu.org/licenses/>. */ + + +#include <string.h> + +#include "libgfortran.h" +#include "libcoarraynative.h" +#include "sync.h" +#include "util.h" + +static void +sync_all_init (pthread_barrier_t *b) +{ + pthread_barrierattr_t battr; + pthread_barrierattr_init (&battr); + pthread_barrierattr_setpshared (&battr, PTHREAD_PROCESS_SHARED); + pthread_barrier_init (b, &battr, local->num_images); + pthread_barrierattr_destroy (&battr); +} + +static inline void +lock_table (sync_iface *si) +{ + pthread_mutex_lock (&si->cis->table_lock); +} + +static inline void +unlock_table (sync_iface *si) +{ + pthread_mutex_unlock (&si->cis->table_lock); +} + +static inline void +wait_table_cond (sync_iface *si, pthread_cond_t *cond) +{ + pthread_cond_wait (cond,&si->cis->table_lock); +} + +static int * +get_locked_table(sync_iface *si) { // The initialization of the table has to + // be delayed, since we might not know the + // number of images when the library is + // initialized + lock_table(si); + return si->table; + /* + if (si->table) + return si->table; + else if (!SHMPTR_IS_NULL(si->cis->table)) + { + si->table = SHMPTR_AS(int *, si->cis->table, si->sm); + si->triggers = SHMPTR_AS(pthread_cond_t *, si->cis->triggers, si->sm); + return si->table; + } + + si->cis->table = + shared_malloc(si->a, sizeof(int)*local->num_images * local->num_images); + si->cis->triggers = + shared_malloc(si->a, sizeof(int)*local->num_images); + + si->table = SHMPTR_AS(int *, si->cis->table, si->sm); + si->triggers = SHMPTR_AS(pthread_cond_t *, si->cis->triggers, si->sm); + + for (int i = 0; i < local->num_images; i++) + initialize_shared_condition (&si->triggers[i]); + + return si->table; + */ +} + +void +sync_iface_init (sync_iface *si, alloc_iface *ai, shared_memory *sm) +{ + si->cis = SHMPTR_AS (sync_iface_shared *, + shared_malloc (get_allocator(ai), + sizeof(collsub_iface_shared)), + sm); + DEBUG_PRINTF ("%s: num_images is %d\n", __PRETTY_FUNCTION__, local->num_images); + + sync_all_init (&si->cis->sync_all); + initialize_shared_mutex (&si->cis->table_lock); + si->sm = sm; + si->a = get_allocator(ai); + + si->cis->table = + shared_malloc(si->a, sizeof(int)*local->num_images * local->num_images); + si->cis->triggers = + shared_malloc(si->a, sizeof(pthread_cond_t)*local->num_images); + + si->table = SHMPTR_AS(int *, si->cis->table, si->sm); + si->triggers = SHMPTR_AS(pthread_cond_t *, si->cis->triggers, si->sm); + + for (int i = 0; i < local->num_images; i++) + initialize_shared_condition (&si->triggers[i]); +} + +void +sync_table (sync_iface *si, int *images, size_t size) +{ +#ifdef DEBUG_NATIVE_COARRAY + dprintf (2, "Image %d waiting for these %ld images: ", this_image.image_num + 1, size); + for (int d_i = 0; d_i < size; d_i++) + dprintf (2, "%d ", images[d_i]); + dprintf (2, "\n"); +#endif + size_t i; + int done; + int *table = get_locked_table(si); + for (i = 0; i < size; i++) + { + table[images[i] - 1 + local->num_images*this_image.image_num]++; + pthread_cond_signal (&si->triggers[images[i] - 1]); + } + for (;;) + { + done = 1; + for (i = 0; i < size; i++) + done &= si->table[images[i] - 1 + this_image.image_num*local->num_images] + == si->table[this_image.image_num + (images[i] - 1)*local->num_images]; + if (done) + break; + wait_table_cond (si, &si->triggers[this_image.image_num]); + } + unlock_table (si); +} + +void +sync_all (sync_iface *si) +{ + + DEBUG_PRINTF("Syncing all\n"); + + pthread_barrier_wait (&si->cis->sync_all); +} diff --git a/libgfortran/nca/sync.h b/libgfortran/nca/sync.h new file mode 100644 index 0000000..4b49441 --- /dev/null +++ b/libgfortran/nca/sync.h @@ -0,0 +1,56 @@ +/* Copyright (C) 2019-2020 Free Software Foundation, Inc. + Contributed by Nicolas Koenig + +This file is part of the GNU Fortran Native Coarray Library (libnca). + +Libnca is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +Libnca is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +<http://www.gnu.org/licenses/>. */ + +#ifndef IPSYNC_HDR +#define IPSYNC_HDR + +#include "shared_memory.h" +#include "alloc.h" +#include<pthread.h> + +typedef struct { + pthread_barrier_t sync_all; + pthread_mutex_t table_lock; + shared_mem_ptr table; + shared_mem_ptr triggers; +} sync_iface_shared; + +typedef struct { + sync_iface_shared *cis; + shared_memory *sm; + allocator *a; + int *table; // we can cache the table and the trigger pointers here + pthread_cond_t *triggers; +} sync_iface; + +void sync_iface_init (sync_iface *, alloc_iface *, shared_memory *); +internal_proto (sync_iface_init); + +void sync_all (sync_iface *); +internal_proto (sync_all); + +void sync_table (sync_iface *, int *, size_t); +internal_proto (sync_table); + +#endif diff --git a/libgfortran/nca/util.c b/libgfortran/nca/util.c new file mode 100644 index 0000000..5805218 --- /dev/null +++ b/libgfortran/nca/util.c @@ -0,0 +1,197 @@ +#include "libgfortran.h" +#include "util.h" +#include <string.h> +#include <stddef.h> +#include <stdlib.h> +#include <limits.h> +#include <stdio.h> +#include <unistd.h> +#include <fcntl.h> +#include <sys/mman.h> +#include <sys/stat.h> + +#define MEMOBJ_NAME "/gfortran_coarray_memfd" + +size_t +alignto(size_t size, size_t align) { + return align*((size + align - 1)/align); +} + +size_t pagesize; + +size_t +round_to_pagesize(size_t s) { + return alignto(s, pagesize); +} + +size_t +next_power_of_two(size_t size) { + return 1 << (PTR_BITS - __builtin_clzl(size-1)); //FIXME: There's an off-by-one error, I can feel it +} + +void +initialize_shared_mutex (pthread_mutex_t *mutex) +{ + pthread_mutexattr_t mattr; + pthread_mutexattr_init (&mattr); + pthread_mutexattr_setpshared (&mattr, PTHREAD_PROCESS_SHARED); + pthread_mutex_init (mutex, &mattr); + pthread_mutexattr_destroy (&mattr); +} + +void +initialize_shared_condition (pthread_cond_t *cond) +{ + pthread_condattr_t cattr; + pthread_condattr_init (&cattr); + pthread_condattr_setpshared (&cattr, PTHREAD_PROCESS_SHARED); + pthread_cond_init (cond, &cattr); + pthread_condattr_destroy (&cattr); +} + +int +get_shmem_fd (void) +{ + char buffer[1<<10]; + int fd, id; + id = random (); + do + { + snprintf (buffer, sizeof (buffer), + MEMOBJ_NAME "_%u_%d", (unsigned int) getpid (), id++); + fd = shm_open (buffer, O_RDWR | O_CREAT | O_EXCL, S_IRUSR | S_IWUSR); + } + while (fd == -1); + shm_unlink (buffer); + return fd; +} + +bool +pack_array_prepare (pack_info * restrict pi, const gfc_array_char * restrict source) +{ + index_type dim; + bool packed; + index_type span; + index_type type_size; + index_type ssize; + + dim = GFC_DESCRIPTOR_RANK (source); + type_size = GFC_DESCRIPTOR_SIZE (source); + ssize = type_size; + + pi->num_elem = 1; + packed = true; + span = source->span != 0 ? source->span : type_size; + for (index_type n = 0; n < dim; n++) + { + pi->stride[n] = GFC_DESCRIPTOR_STRIDE (source,n) * span; + pi->extent[n] = GFC_DESCRIPTOR_EXTENT (source,n); + if (pi->extent[n] <= 0) + { + /* Do nothing. */ + packed = 1; + pi->num_elem = 0; + break; + } + + if (ssize != pi->stride[n]) + packed = 0; + + pi->num_elem *= pi->extent[n]; + ssize *= pi->extent[n]; + } + + return packed; +} + +void +pack_array_finish (pack_info * const restrict pi, const gfc_array_char * const restrict source, + char * restrict dest) +{ + index_type dim; + const char *restrict src; + + index_type size; + index_type stride0; + index_type count[GFC_MAX_DIMENSIONS]; + + dim = GFC_DESCRIPTOR_RANK (source); + src = source->base_addr; + stride0 = pi->stride[0]; + size = GFC_DESCRIPTOR_SIZE (source); + + memset (count, 0, sizeof(count)); + while (src) + { + /* Copy the data. */ + memcpy(dest, src, size); + /* Advance to the next element. */ + dest += size; + src += stride0; + count[0]++; + /* Advance to the next source element. */ + index_type n = 0; + while (count[n] == pi->extent[n]) + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + count[n] = 0; + /* We could precalculate these products, but this is a less + frequently used path so probably not worth it. */ + src -= pi->stride[n] * pi->extent[n]; + n++; + if (n == dim) + { + src = NULL; + break; + } + else + { + count[n]++; + src += pi->stride[n]; + } + } + } +} + +void +unpack_array_finish (pack_info * const restrict pi, + const gfc_array_char * restrict d, + const char *restrict src) +{ + index_type stride0; + char * restrict dest; + index_type size; + index_type count[GFC_MAX_DIMENSIONS]; + index_type dim; + + size = GFC_DESCRIPTOR_SIZE (d); + stride0 = pi->stride[0]; + dest = d->base_addr; + dim = GFC_DESCRIPTOR_RANK (d); + + while (dest) + { + memcpy (dest, src, size); + src += size; + dest += stride0; + count[0]++; + index_type n = 0; + while (count[n] == pi->extent[n]) + { + count[n] = 0; + dest -= pi->stride[n] * pi->extent[n]; + n++; + if (n == dim) + { + dest = NULL; + break; + } + else + { + count[n] ++; + dest += pi->stride[n]; + } + } + } +} diff --git a/libgfortran/nca/util.h b/libgfortran/nca/util.h new file mode 100644 index 0000000..9abd7ad --- /dev/null +++ b/libgfortran/nca/util.h @@ -0,0 +1,86 @@ +/* Copyright (C) 2020 Free Software Foundation, Inc. + Contributed by Nicolas Koenig + +This file is part of the GNU Fortran Native Coarray Library (libnca). + +Libnca is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +Libnca is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +<http://www.gnu.org/licenses/>. */ + +#ifndef UTIL_HDR +#define UTIL_HDR + +#include <stdint.h> +#include <stddef.h> +#include <pthread.h> + +#define PTR_BITS (CHAR_BIT*sizeof(void *)) + +size_t alignto (size_t, size_t); +internal_proto (alignto); + +size_t round_to_pagesize (size_t); +internal_proto (round_to_pagesize); + +size_t next_power_of_two (size_t); +internal_proto (next_power_of_two); + +int get_shmem_fd (void); +internal_proto (get_shmem_fd); + +void initialize_shared_mutex (pthread_mutex_t *); +internal_proto (initialize_shared_mutex); + +void initialize_shared_condition (pthread_cond_t *); +internal_proto (initialize_shared_condition); + +extern size_t pagesize; +internal_proto (pagesize); + +/* Usage: + pack_info pi; + packed = pack_array_prepare (&pi, source); + + // Awesome allocation of destptr using pi.num_elem + if (packed) + memcpy (...); + else + pack_array_finish (&pi, source, destptr); + + This could also be used in in_pack_generic.c. Additionally, since + pack_array_prepare is the same for all type sizes, we would only have to + specialize pack_array_finish, saving on code size. */ + +typedef struct +{ + index_type num_elem; + index_type extent[GFC_MAX_DIMENSIONS]; + index_type stride[GFC_MAX_DIMENSIONS]; /* Stride is byte-based. */ +} pack_info; + +bool pack_array_prepare (pack_info *restrict, const gfc_array_char * restrict); +internal_proto (pack_array_prepare); + +void pack_array_finish (pack_info * const restrict, const gfc_array_char * const restrict, + char * restrict); + +internal_proto (pack_array_finish); + +void unpack_array_finish (pack_info * const restrict, const gfc_array_char * const, + const char * restrict); +#endif diff --git a/libgfortran/nca/wrapper.c b/libgfortran/nca/wrapper.c new file mode 100644 index 0000000..eeb64d3 --- /dev/null +++ b/libgfortran/nca/wrapper.c @@ -0,0 +1,258 @@ +/* Copyright (C) 2019-2020 Free Software Foundation, Inc. + Contributed by Nicolas Koenig + +This file is part of the GNU Fortran Native Coarray Library (libnca). + +Libnca is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +Libnca is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +<http://www.gnu.org/licenses/>. */ + +#include <string.h> +#include "libgfortran.h" +#include "libcoarraynative.h" +#include "sync.h" +#include "lock.h" +#include "util.h" +#include "collective_subroutine.h" + +static inline int +div_ru (int divident, int divisor) +{ + return (divident + divisor - 1)/divisor; +} + +enum gfc_coarray_allocation_type { + GFC_NCA_NORMAL_COARRAY = 3, + GFC_NCA_LOCK_COARRAY, + GFC_NCA_EVENT_COARRAY, +}; + +void nca_coarray_alloc (gfc_array_void *, int, int, int); +export_proto (nca_coarray_alloc); + +void +nca_coarray_free (gfc_array_void *, int); +export_proto (nca_coarray_free); + +int nca_coarray_this_image (int); +export_proto (nca_coarray_this_image); + +int nca_coarray_num_images (int); +export_proto (nca_coarray_num_images); + +void nca_coarray_sync_all (int *); +export_proto (nca_coarray_sync_all); + +void nca_sync_images (size_t, int *, int*, char *, size_t); +export_proto (nca_sync_images); + +void nca_lock (void *); +export_proto (nca_lock); + +void nca_unlock (void *); +export_proto (nca_unlock); + +void nca_collsub_reduce_array (gfc_array_char *, void (*) (void *, void *), + int *); +export_proto (nca_collsub_reduce_array); + +void nca_collsub_reduce_scalar (void *, index_type, void (*) (void *, void *), + int *); +export_proto (nca_collsub_reduce_scalar); + +void nca_collsub_broadcast_array (gfc_array_char * restrict, int/*, int *, char *, + size_t*/); +export_proto (nca_collsub_broadcast_array); + +void nca_collsub_broadcast_scalar (void * restrict, size_t, int/*, int *, char *, + size_t*/); +export_proto(nca_collsub_broadcast_scalar); + +void +nca_coarray_alloc (gfc_array_void *desc, int elem_size, int corank, + int alloc_type) +{ + int i, last_rank_index; + int num_coarray_elems, num_elems; /* Excludes the last dimension, because it + will have to be determined later. */ + int extent_last_codimen; + size_t last_lbound; + size_t size_in_bytes; + + ensure_initialization(); /* This function might be the first one to be + called, if it is called in a constructor. */ + + if (alloc_type == GFC_NCA_LOCK_COARRAY) + elem_size = sizeof (pthread_mutex_t); + else if (alloc_type == GFC_NCA_EVENT_COARRAY) + elem_size = sizeof(char); /* replace with proper type. */ + + last_rank_index = GFC_DESCRIPTOR_RANK(desc) + corank -1; + + num_elems = 1; + num_coarray_elems = 1; + for (i = 0; i < GFC_DESCRIPTOR_RANK(desc); i++) + num_elems *= GFC_DESCRIPTOR_EXTENT(desc, i); + for (i = GFC_DESCRIPTOR_RANK(desc); i < last_rank_index; i++) + { + num_elems *= GFC_DESCRIPTOR_EXTENT(desc, i); + num_coarray_elems *= GFC_DESCRIPTOR_EXTENT(desc, i); + } + + extent_last_codimen = div_ru (local->num_images, num_coarray_elems); + + last_lbound = GFC_DIMENSION_LBOUND(desc->dim[last_rank_index]); + GFC_DIMENSION_SET(desc->dim[last_rank_index], last_lbound, + last_lbound + extent_last_codimen - 1, + num_elems); + + size_in_bytes = elem_size * num_elems * extent_last_codimen; + if (alloc_type == GFC_NCA_LOCK_COARRAY) + { + lock_array *addr; + int expected = 0; + /* Allocate enough space for the metadata infront of the lock + array. */ + addr = get_memory_by_id_zero (&local->ai, size_in_bytes + + sizeof (lock_array), + (intptr_t) desc); + + /* Use of a traditional spin lock to avoid race conditions with + the initization of the mutex. We could alternatively put a + global lock around allocate, but that would probably be + slower. */ + while (!__atomic_compare_exchange_n (&addr->owner, &expected, + this_image.image_num + 1, + false, __ATOMIC_SEQ_CST, + __ATOMIC_SEQ_CST)); + if (!addr->initialized++) + { + for (i = 0; i < local->num_images; i++) + initialize_shared_mutex (&addr->arr[i]); + } + __atomic_store_n (&addr->owner, 0, __ATOMIC_SEQ_CST); + desc->base_addr = &addr->arr; + } + else if (alloc_type == GFC_NCA_EVENT_COARRAY) + (void) 0; // TODO + else + desc->base_addr = get_memory_by_id (&local->ai, size_in_bytes, + (intptr_t) desc); + dprintf(2, "Base address of desc for image %d: %p\n", this_image.image_num + 1, desc->base_addr); +} + +void +nca_coarray_free (gfc_array_void *desc, int alloc_type) +{ + int i; + if (alloc_type == GFC_NCA_LOCK_COARRAY) + { + lock_array *la; + int expected = 0; + la = desc->base_addr - offsetof (lock_array, arr); + while (!__atomic_compare_exchange_n (&la->owner, &expected, + this_image.image_num+1, + false, __ATOMIC_SEQ_CST, + __ATOMIC_SEQ_CST)); + if (!--la->initialized) + { + /* Coarray locks can be removed and just normal + pthread_mutex can be used. */ + for (i = 0; i < local->num_images; i++) + pthread_mutex_destroy (&la->arr[i]); + } + __atomic_store_n (&la->owner, 0, __ATOMIC_SEQ_CST); + } + else if (alloc_type == GFC_NCA_EVENT_COARRAY) + (void) 0; //TODO + + free_memory_with_id (&local->ai, (intptr_t) desc); + desc->base_addr = NULL; +} + +int +nca_coarray_this_image (int distance __attribute__((unused))) +{ + return this_image.image_num + 1; +} + +int +nca_coarray_num_images (int distance __attribute__((unused))) +{ + return local->num_images; +} + +void +nca_coarray_sync_all (int *stat __attribute__((unused))) +{ + sync_all (&local->si); +} + +void +nca_sync_images (size_t s, int *images, + int *stat __attribute__((unused)), + char *error __attribute__((unused)), + size_t err_size __attribute__((unused))) +{ + sync_table (&local->si, images, s); +} + +void +nca_lock (void *lock) +{ + pthread_mutex_lock (lock); +} + +void +nca_unlock (void *lock) +{ + pthread_mutex_unlock (lock); +} + +void +nca_collsub_reduce_array (gfc_array_char *desc, void (*assign_function) (void *, void *), + int *result_image) +{ + collsub_reduce_array (&local->ci, desc, result_image, assign_function); +} + +void +nca_collsub_reduce_scalar (void *obj, index_type elem_size, + void (*assign_function) (void *, void *), + int *result_image) +{ + collsub_reduce_scalar (&local->ci, obj, elem_size, result_image, assign_function); +} + +void +nca_collsub_broadcast_array (gfc_array_char * restrict a, int source_image + /* , int *stat __attribute__ ((unused)), + char *errmsg __attribute__ ((unused)), + size_t errmsg_len __attribute__ ((unused))*/) +{ + collsub_broadcast_array (&local->ci, a, source_image - 1); +} + +void +nca_collsub_broadcast_scalar (void * restrict obj, size_t size, int source_image/*, + int *stat __attribute__((unused)), + char *errmsg __attribute__ ((unused)), + size_t errmsg_len __attribute__ ((unused))*/) +{ + collsub_broadcast_scalar (&local->ci, obj, size, source_image - 1); +} |