diff options
Diffstat (limited to 'libgomp/plugin/plugin-nvptx.c')
-rw-r--r-- | libgomp/plugin/plugin-nvptx.c | 22 |
1 files changed, 22 insertions, 0 deletions
diff --git a/libgomp/plugin/plugin-nvptx.c b/libgomp/plugin/plugin-nvptx.c index 60553bd..c80da64c 100644 --- a/libgomp/plugin/plugin-nvptx.c +++ b/libgomp/plugin/plugin-nvptx.c @@ -1273,6 +1273,10 @@ nvptx_exec (void (*fn), size_t mapnum, void **hostaddrs, void **devaddrs, : dims[GOMP_DIM_VECTOR]); workers = blocks / actual_vectors; workers = MAX (workers, 1); + /* If we need a per-worker barrier ... . */ + if (actual_vectors > 32) + /* Don't use more barriers than available. */ + workers = MIN (workers, 15); } for (i = 0; i != GOMP_DIM_MAX; i++) @@ -1303,6 +1307,24 @@ nvptx_exec (void (*fn), size_t mapnum, void **hostaddrs, void **devaddrs, suggest_workers, suggest_workers); } + /* Check if the accelerator has sufficient barrier resources to + launch the offloaded kernel. */ + if (dims[GOMP_DIM_WORKER] > 15 && dims[GOMP_DIM_VECTOR] > 32) + { + const char *msg + = ("The Nvidia accelerator has insufficient barrier resources to launch" + " '%s' with num_workers = %d and vector_length = %d" + "; " + "recompile the program with 'num_workers = x' on that offloaded" + " region or '-fopenacc-dim=:x:' where x <= 15" + "; " + "or, recompile the program with 'vector_length = 32' on that" + " offloaded region" + ".\n"); + GOMP_PLUGIN_fatal (msg, targ_fn->launch->fn, dims[GOMP_DIM_WORKER], + dims[GOMP_DIM_VECTOR]); + } + /* This reserves a chunk of a pre-allocated page of memory mapped on both the host and the device. HP is a host pointer to the new chunk, and DP is the corresponding device pointer. */ |