diff options
Diffstat (limited to 'libgomp/oacc-parallel.c')
-rw-r--r-- | libgomp/oacc-parallel.c | 207 |
1 files changed, 200 insertions, 7 deletions
diff --git a/libgomp/oacc-parallel.c b/libgomp/oacc-parallel.c index 388cabd..a1fb11b 100644 --- a/libgomp/oacc-parallel.c +++ b/libgomp/oacc-parallel.c @@ -36,7 +36,7 @@ #include <string.h> #include <stdarg.h> #include <assert.h> - +#include <stdio.h> /* In the ABI, the GOACC_FLAGs are encoded as an inverted bitmask, so that we continue to support the following two legacy values. */ @@ -46,6 +46,171 @@ _Static_assert (GOACC_FLAGS_UNMARSHAL (GOMP_DEVICE_HOST_FALLBACK) == GOACC_FLAG_HOST_FALLBACK, "legacy GOMP_DEVICE_HOST_FALLBACK broken"); +static size_t +goacc_noncontig_array_count_rows (struct goacc_ncarray_descr_type *descr) +{ + size_t nrows = 1; + for (size_t d = 0; d < descr->ndims - 1; d++) + nrows *= descr->dims[d].length / sizeof (void *); + return nrows; +} + +static void +goacc_noncontig_array_compute_sizes (struct goacc_ncarray *nca) +{ + size_t d, n = 1; + struct goacc_ncarray_descr_type *descr = nca->descr; + + nca->ptrblock_size = 0; + for (d = 0; d < descr->ndims - 1; d++) + { + size_t dim_count = descr->dims[d].length / descr->dims[d].elem_size; + size_t dim_ptrblock_size = (descr->dims[d + 1].is_array + ? 0 : descr->dims[d].length * n); + nca->ptrblock_size += dim_ptrblock_size; + n *= dim_count; + } + nca->data_row_num = n; + nca->data_row_size = descr->dims[d].length; +} + +static void +goacc_noncontig_array_fill_rows_1 (struct goacc_ncarray_descr_type *descr, void *nca, + size_t d, void ***row_ptr, size_t *count) +{ + if (d < descr->ndims - 1) + { + size_t elsize = descr->dims[d].elem_size; + size_t n = descr->dims[d].length / elsize; + void *p = nca + descr->dims[d].base; + for (size_t i = 0; i < n; i++) + { + void *ptr = p + i * elsize; + /* Deref if next dimension is not array. */ + if (!descr->dims[d + 1].is_array) + ptr = *((void **) ptr); + goacc_noncontig_array_fill_rows_1 (descr, ptr, d + 1, row_ptr, count); + } + } + else + { + **row_ptr = nca + descr->dims[d].base; + *row_ptr += 1; + *count += 1; + } +} + +static size_t +goacc_noncontig_array_fill_rows (struct goacc_ncarray *nca) +{ + size_t count = 0; + void **p = nca->data_rows; + goacc_noncontig_array_fill_rows_1 (nca->descr, nca->ptr, 0, &p, &count); + return count; +} + +static struct goacc_ncarray_info * +goacc_process_noncontiguous_arrays (size_t mapnum, void **hostaddrs, + unsigned short *kinds, va_list* ap) +{ + size_t i, nr, num_data_rows = 0, num_ncarray = 0, curr_row_start = 0; + struct goacc_ncarray_descr_type *descr; + + /* We need to go over *ap twice, so preserve *ap state here. */ + va_list itr; + va_copy (itr, *ap); + for (i = 0; i < mapnum; i++) + if (GOMP_MAP_NONCONTIG_ARRAY_P (kinds[i] & 0xff)) + { + descr = va_arg (itr, struct goacc_ncarray_descr_type *); + num_data_rows += goacc_noncontig_array_count_rows (descr); + num_ncarray += 1; + } + else + break; + + /* Allocate the entire info struct, array entries, and row pointer + arrays in one large block. */ + struct goacc_ncarray_info *nca_info + = gomp_malloc (sizeof (struct goacc_ncarray_info) + + sizeof (struct goacc_ncarray) * num_ncarray + + sizeof (void *) * num_data_rows * 2); + nca_info->num_data_rows = num_data_rows; + nca_info->num_ncarray = num_ncarray; + nca_info->data_rows = (void **) (nca_info->ncarray + num_ncarray); + nca_info->tgt_data_rows = nca_info->data_rows + num_data_rows; + + struct goacc_ncarray *curr_ncarray = nca_info->ncarray; + for (i = 0; i < mapnum; i++) + if (GOMP_MAP_NONCONTIG_ARRAY_P (kinds[i] & 0xff)) + { + descr = va_arg (*ap, struct goacc_ncarray_descr_type *); + curr_ncarray->descr = descr; + curr_ncarray->ptr = hostaddrs[i]; + curr_ncarray->map_index = i; + + goacc_noncontig_array_compute_sizes (curr_ncarray); + + curr_ncarray->data_rows = nca_info->data_rows + curr_row_start; + curr_ncarray->tgt_data_rows = nca_info->tgt_data_rows + curr_row_start; + + nr = goacc_noncontig_array_fill_rows (curr_ncarray); + assert (nr == curr_ncarray->data_row_num); + curr_row_start += nr; + curr_ncarray += 1; + } + else + break; + + return nca_info; +} + +void +goacc_noncontig_array_create_ptrblock (struct goacc_ncarray *nca, + void *ptrblock, + void *tgt_ptrblock_addr) +{ + struct goacc_ncarray_descr_type *descr = nca->descr; + void **tgt_data_rows = nca->tgt_data_rows; + void **curr_dim_ptrblock = (void **) ptrblock; + size_t n = 1; + + for (size_t d = 0; d < descr->ndims - 1; d++) + { + int curr_dim_len = descr->dims[d].length; + int next_dim_len = descr->dims[d + 1].length; + int curr_dim_num = curr_dim_len / sizeof (void *); + size_t next_dim_bias = descr->dims[d + 1].base; + + void *next_dim_ptrblock + = (void *)(curr_dim_ptrblock + n * curr_dim_num); + + for (int b = 0; b < n; b++) + for (int i = 0; i < curr_dim_num; i++) + { + if (d < descr->ndims - 2) + { + void *ptr = (next_dim_ptrblock + + b * curr_dim_num * next_dim_len + + i * next_dim_len); + void *tgt_ptr = (tgt_ptrblock_addr + + (ptr - ptrblock) - next_dim_bias); + curr_dim_ptrblock[b * curr_dim_num + i] = tgt_ptr; + } + else + { + curr_dim_ptrblock[b * curr_dim_num + i] + = tgt_data_rows[b * curr_dim_num + i] - next_dim_bias; + } + void *addr = &curr_dim_ptrblock[b * curr_dim_num + i]; + assert (ptrblock <= addr && addr < ptrblock + nca->ptrblock_size); + } + + n *= curr_dim_num; + curr_dim_ptrblock = next_dim_ptrblock; + } + assert (n == nca->data_row_num); +} /* Handle the mapping pair that are presented when a deviceptr clause is used with Fortran. */ @@ -115,6 +280,7 @@ GOACC_parallel_keyed (int flags_m, void (*fn) (void *), int async = GOMP_ASYNC_SYNC; unsigned dims[GOMP_DIM_MAX]; unsigned tag; + struct goacc_ncarray_info *nca_info = NULL; #ifdef HAVE_INTTYPES_H gomp_debug (0, "%s: mapnum=%"PRIu64", hostaddrs=%p, size=%p, kinds=%p\n", @@ -201,6 +367,8 @@ GOACC_parallel_keyed (int flags_m, void (*fn) (void *), fn (hostaddrs); goto out_prof; } + else if (profiling_p) + api_info.device_api = acc_device_api_cuda; /* Default: let the runtime choose. */ for (i = 0; i != GOMP_DIM_MAX; i++) @@ -250,13 +418,22 @@ GOACC_parallel_keyed (int flags_m, void (*fn) (void *), break; } + /*case GOMP_LAUNCH_NONCONTIG_ARRAYS: + nca_info = goacc_process_noncontiguous_arrays (mapnum, hostaddrs, + kinds, &ap); + break;*/ + default: gomp_fatal ("unrecognized offload code '%d'," " libgomp is too old", GOMP_LAUNCH_CODE (tag)); } } + + if (mapnum > 0 && GOMP_MAP_NONCONTIG_ARRAY_P (kinds[0] & 0xff)) + nca_info = goacc_process_noncontiguous_arrays (mapnum, hostaddrs, kinds, &ap); + va_end (ap); - + if (!(acc_dev->capabilities & GOMP_OFFLOAD_CAP_NATIVE_EXEC)) { k.host_start = (uintptr_t) fn; @@ -292,8 +469,12 @@ GOACC_parallel_keyed (int flags_m, void (*fn) (void *), goacc_aq aq = get_goacc_asyncqueue (async); struct target_mem_desc *tgt - = goacc_map_vars (acc_dev, aq, mapnum, hostaddrs, NULL, sizes, kinds, true, - GOMP_MAP_VARS_TARGET); + = goacc_map_vars (acc_dev, aq, mapnum, hostaddrs, NULL, sizes, kinds, + nca_info, true, GOMP_MAP_VARS_TARGET); + if (aq == NULL) + free (nca_info); + else + acc_dev->openacc.async.queue_callback_func (aq, free, nca_info); if (profiling_p) { @@ -362,7 +543,7 @@ GOACC_parallel (int flags_m, void (*fn) (void *), void GOACC_data_start (int flags_m, size_t mapnum, - void **hostaddrs, size_t *sizes, unsigned short *kinds) + void **hostaddrs, size_t *sizes, unsigned short *kinds, ...) { int flags = GOACC_FLAGS_UNMARSHAL (flags_m); @@ -447,6 +628,8 @@ GOACC_data_start (int flags_m, size_t mapnum, if (profiling_p) goacc_profiling_dispatch (&prof_info, &enter_data_event_info, &api_info); + handle_ftn_pointers (mapnum, hostaddrs, sizes, kinds); + /* Host fallback or 'do nothing'. */ if ((acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) || (flags & GOACC_FLAG_HOST_FALLBACK) @@ -454,16 +637,26 @@ GOACC_data_start (int flags_m, size_t mapnum, { prof_info.device_type = acc_device_host; api_info.device_type = prof_info.device_type; - tgt = goacc_map_vars (NULL, NULL, 0, NULL, NULL, NULL, NULL, true, 0); + tgt = goacc_map_vars (NULL, NULL, 0, NULL, NULL, NULL, NULL, NULL, true, 0); tgt->prev = thr->mapped_data; thr->mapped_data = tgt; goto out_prof; } + struct goacc_ncarray_info *nca_info = NULL; + if (mapnum > 0 && GOMP_MAP_NONCONTIG_ARRAY_P (kinds[0] & 0xff)) + { + va_list ap; + va_start (ap, kinds); + nca_info = goacc_process_noncontiguous_arrays (mapnum, hostaddrs, kinds, &ap); + va_end (ap); + } + gomp_debug (0, " %s: prepare mappings\n", __FUNCTION__); tgt = goacc_map_vars (acc_dev, NULL, mapnum, hostaddrs, NULL, sizes, kinds, - true, 0); + nca_info, true, 0); + free (nca_info); gomp_debug (0, " %s: mappings prepared\n", __FUNCTION__); tgt->prev = thr->mapped_data; thr->mapped_data = tgt; |