diff options
author | Tobias Burnus <tobias@codesourcery.com> | 2022-11-25 13:48:17 +0100 |
---|---|---|
committer | Tobias Burnus <tobias@codesourcery.com> | 2022-11-25 13:48:17 +0100 |
commit | 9f9d128f459e0c5ace8f7b85504d277b5a838daf (patch) | |
tree | c935998f0c1a3490cd5beec437d84a01ea14e0f3 | |
parent | c16e85d726a7793c05209af031dac0bebf035ab9 (diff) | |
download | gcc-9f9d128f459e0c5ace8f7b85504d277b5a838daf.zip gcc-9f9d128f459e0c5ace8f7b85504d277b5a838daf.tar.gz gcc-9f9d128f459e0c5ace8f7b85504d277b5a838daf.tar.bz2 |
libgomp: Add no-target-region rev offload test + fix plugin-nvptx
OpenMP permits that a 'target device(ancestor:1)' is called without being
enclosed in a target region - using the current device (i.e. the host) in
that case. This commit adds a testcase for this.
In case of nvptx, the missing on-device 'GOMP_target_ext' call causes that
it and also the associated on-device GOMP_REV_OFFLOAD_VAR variable are not
linked in from nvptx's libgomp.a. Thus, handle the failing cuModuleGetGlobal
gracefully by disabling reverse offload and assuming that the failure is fine.
libgomp/ChangeLog:
* plugin/plugin-nvptx.c (GOMP_OFFLOAD_load_image): Use unsigned int
for 'i' to match 'fn_entries'; regard absent GOMP_REV_OFFLOAD_VAR
as valid and the code having no reverse-offload code.
* testsuite/libgomp.c-c++-common/reverse-offload-2.c: New test.
-rw-r--r-- | libgomp/plugin/plugin-nvptx.c | 36 | ||||
-rw-r--r-- | libgomp/testsuite/libgomp.c-c++-common/reverse-offload-2.c | 49 |
2 files changed, 73 insertions, 12 deletions
diff --git a/libgomp/plugin/plugin-nvptx.c b/libgomp/plugin/plugin-nvptx.c index 0768fca..e803f08 100644 --- a/libgomp/plugin/plugin-nvptx.c +++ b/libgomp/plugin/plugin-nvptx.c @@ -1390,7 +1390,8 @@ GOMP_OFFLOAD_load_image (int ord, unsigned version, const void *target_data, else if (rev_fn_table) { CUdeviceptr var; - size_t bytes, i; + size_t bytes; + unsigned int i; r = CUDA_CALL_NOCHECK (cuModuleGetGlobal, &var, &bytes, module, "$offload_func_table"); if (r != CUDA_SUCCESS) @@ -1413,12 +1414,11 @@ GOMP_OFFLOAD_load_image (int ord, unsigned version, const void *target_data, if (rev_fn_table && *rev_fn_table && dev->rev_data == NULL) { - /* cuMemHostAlloc memory is accessible on the device, if unified-shared - address is supported; this is assumed - see comment in - nvptx_open_device for CU_DEVICE_ATTRIBUTE_UNIFIED_ADDRESSING. */ - CUDA_CALL_ASSERT (cuMemHostAlloc, (void **) &dev->rev_data, - sizeof (*dev->rev_data), CU_MEMHOSTALLOC_DEVICEMAP); - CUdeviceptr dp = (CUdeviceptr) dev->rev_data; + /* Get the on-device GOMP_REV_OFFLOAD_VAR variable. It should be + available but it might be not. One reason could be: if the user code + has 'omp target device(ancestor:1)' in pure hostcode, GOMP_target_ext + is not called on the device and, hence, it and GOMP_REV_OFFLOAD_VAR + are not linked in. */ CUdeviceptr device_rev_offload_var; size_t device_rev_offload_size; CUresult r = CUDA_CALL_NOCHECK (cuModuleGetGlobal, @@ -1426,11 +1426,23 @@ GOMP_OFFLOAD_load_image (int ord, unsigned version, const void *target_data, &device_rev_offload_size, module, XSTRING (GOMP_REV_OFFLOAD_VAR)); if (r != CUDA_SUCCESS) - GOMP_PLUGIN_fatal ("cuModuleGetGlobal error - GOMP_REV_OFFLOAD_VAR: %s", cuda_error (r)); - r = CUDA_CALL_NOCHECK (cuMemcpyHtoD, device_rev_offload_var, &dp, - sizeof (dp)); - if (r != CUDA_SUCCESS) - GOMP_PLUGIN_fatal ("cuMemcpyHtoD error: %s", cuda_error (r)); + { + free (*rev_fn_table); + *rev_fn_table = NULL; + } + else + { + /* cuMemHostAlloc memory is accessible on the device, if + unified-shared address is supported; this is assumed - see comment + in nvptx_open_device for CU_DEVICE_ATTRIBUTE_UNIFIED_ADDRESSING. */ + CUDA_CALL_ASSERT (cuMemHostAlloc, (void **) &dev->rev_data, + sizeof (*dev->rev_data), CU_MEMHOSTALLOC_DEVICEMAP); + CUdeviceptr dp = (CUdeviceptr) dev->rev_data; + r = CUDA_CALL_NOCHECK (cuMemcpyHtoD, device_rev_offload_var, &dp, + sizeof (dp)); + if (r != CUDA_SUCCESS) + GOMP_PLUGIN_fatal ("cuMemcpyHtoD error: %s", cuda_error (r)); + } } nvptx_set_clocktick (module, dev); diff --git a/libgomp/testsuite/libgomp.c-c++-common/reverse-offload-2.c b/libgomp/testsuite/libgomp.c-c++-common/reverse-offload-2.c new file mode 100644 index 0000000..33bd384 --- /dev/null +++ b/libgomp/testsuite/libgomp.c-c++-common/reverse-offload-2.c @@ -0,0 +1,49 @@ +/* { dg-do run } */ +/* { dg-additional-options "-foffload-options=nvptx-none=-misa=sm_35" { target { offload_target_nvptx } } } */ + +#pragma omp requires reverse_offload + +int +main () +{ + int A[10]; + int y; + + for (int i = 0; i < 10; i++) + A[i] = 2*i; + + y = 42; + + /* Pointlessly copy to the default device. */ + #pragma omp target data map(to: A) + { + /* Not enclosed in a target region (= i.e. running on the host); the + following is valid - it runs on the current device (= host). */ + #pragma omp target device ( ancestor:1 ) firstprivate(y) map(to: A) + { + if (y != 42) + __builtin_abort (); + for (int i = 0; i < 10; i++) + if (A[i] != 2*i) + __builtin_abort (); + for (int i = 0; i < 10; i++) + if (A[i] != 2*i) + A[i] = 4*i; + y = 31; + } + + if (y != 42) + __builtin_abort (); + for (int i = 0; i < 10; i++) + if (A[i] != 2*i) + __builtin_abort (); + } + + if (y != 42) + __builtin_abort (); + for (int i = 0; i < 10; i++) + if (A[i] != 2*i) + __builtin_abort (); + + return 0; +} |