aboutsummaryrefslogtreecommitdiff
path: root/libgomp
diff options
context:
space:
mode:
authorTobias Burnus <tobias@codesourcery.com>2022-11-25 13:48:17 +0100
committerTobias Burnus <tobias@codesourcery.com>2022-11-25 13:48:17 +0100
commit9f9d128f459e0c5ace8f7b85504d277b5a838daf (patch)
treec935998f0c1a3490cd5beec437d84a01ea14e0f3 /libgomp
parentc16e85d726a7793c05209af031dac0bebf035ab9 (diff)
downloadgcc-9f9d128f459e0c5ace8f7b85504d277b5a838daf.zip
gcc-9f9d128f459e0c5ace8f7b85504d277b5a838daf.tar.gz
gcc-9f9d128f459e0c5ace8f7b85504d277b5a838daf.tar.bz2
libgomp: Add no-target-region rev offload test + fix plugin-nvptx
OpenMP permits that a 'target device(ancestor:1)' is called without being enclosed in a target region - using the current device (i.e. the host) in that case. This commit adds a testcase for this. In case of nvptx, the missing on-device 'GOMP_target_ext' call causes that it and also the associated on-device GOMP_REV_OFFLOAD_VAR variable are not linked in from nvptx's libgomp.a. Thus, handle the failing cuModuleGetGlobal gracefully by disabling reverse offload and assuming that the failure is fine. libgomp/ChangeLog: * plugin/plugin-nvptx.c (GOMP_OFFLOAD_load_image): Use unsigned int for 'i' to match 'fn_entries'; regard absent GOMP_REV_OFFLOAD_VAR as valid and the code having no reverse-offload code. * testsuite/libgomp.c-c++-common/reverse-offload-2.c: New test.
Diffstat (limited to 'libgomp')
-rw-r--r--libgomp/plugin/plugin-nvptx.c36
-rw-r--r--libgomp/testsuite/libgomp.c-c++-common/reverse-offload-2.c49
2 files changed, 73 insertions, 12 deletions
diff --git a/libgomp/plugin/plugin-nvptx.c b/libgomp/plugin/plugin-nvptx.c
index 0768fca..e803f08 100644
--- a/libgomp/plugin/plugin-nvptx.c
+++ b/libgomp/plugin/plugin-nvptx.c
@@ -1390,7 +1390,8 @@ GOMP_OFFLOAD_load_image (int ord, unsigned version, const void *target_data,
else if (rev_fn_table)
{
CUdeviceptr var;
- size_t bytes, i;
+ size_t bytes;
+ unsigned int i;
r = CUDA_CALL_NOCHECK (cuModuleGetGlobal, &var, &bytes, module,
"$offload_func_table");
if (r != CUDA_SUCCESS)
@@ -1413,12 +1414,11 @@ GOMP_OFFLOAD_load_image (int ord, unsigned version, const void *target_data,
if (rev_fn_table && *rev_fn_table && dev->rev_data == NULL)
{
- /* cuMemHostAlloc memory is accessible on the device, if unified-shared
- address is supported; this is assumed - see comment in
- nvptx_open_device for CU_DEVICE_ATTRIBUTE_UNIFIED_ADDRESSING. */
- CUDA_CALL_ASSERT (cuMemHostAlloc, (void **) &dev->rev_data,
- sizeof (*dev->rev_data), CU_MEMHOSTALLOC_DEVICEMAP);
- CUdeviceptr dp = (CUdeviceptr) dev->rev_data;
+ /* Get the on-device GOMP_REV_OFFLOAD_VAR variable. It should be
+ available but it might be not. One reason could be: if the user code
+ has 'omp target device(ancestor:1)' in pure hostcode, GOMP_target_ext
+ is not called on the device and, hence, it and GOMP_REV_OFFLOAD_VAR
+ are not linked in. */
CUdeviceptr device_rev_offload_var;
size_t device_rev_offload_size;
CUresult r = CUDA_CALL_NOCHECK (cuModuleGetGlobal,
@@ -1426,11 +1426,23 @@ GOMP_OFFLOAD_load_image (int ord, unsigned version, const void *target_data,
&device_rev_offload_size, module,
XSTRING (GOMP_REV_OFFLOAD_VAR));
if (r != CUDA_SUCCESS)
- GOMP_PLUGIN_fatal ("cuModuleGetGlobal error - GOMP_REV_OFFLOAD_VAR: %s", cuda_error (r));
- r = CUDA_CALL_NOCHECK (cuMemcpyHtoD, device_rev_offload_var, &dp,
- sizeof (dp));
- if (r != CUDA_SUCCESS)
- GOMP_PLUGIN_fatal ("cuMemcpyHtoD error: %s", cuda_error (r));
+ {
+ free (*rev_fn_table);
+ *rev_fn_table = NULL;
+ }
+ else
+ {
+ /* cuMemHostAlloc memory is accessible on the device, if
+ unified-shared address is supported; this is assumed - see comment
+ in nvptx_open_device for CU_DEVICE_ATTRIBUTE_UNIFIED_ADDRESSING. */
+ CUDA_CALL_ASSERT (cuMemHostAlloc, (void **) &dev->rev_data,
+ sizeof (*dev->rev_data), CU_MEMHOSTALLOC_DEVICEMAP);
+ CUdeviceptr dp = (CUdeviceptr) dev->rev_data;
+ r = CUDA_CALL_NOCHECK (cuMemcpyHtoD, device_rev_offload_var, &dp,
+ sizeof (dp));
+ if (r != CUDA_SUCCESS)
+ GOMP_PLUGIN_fatal ("cuMemcpyHtoD error: %s", cuda_error (r));
+ }
}
nvptx_set_clocktick (module, dev);
diff --git a/libgomp/testsuite/libgomp.c-c++-common/reverse-offload-2.c b/libgomp/testsuite/libgomp.c-c++-common/reverse-offload-2.c
new file mode 100644
index 0000000..33bd384
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c-c++-common/reverse-offload-2.c
@@ -0,0 +1,49 @@
+/* { dg-do run } */
+/* { dg-additional-options "-foffload-options=nvptx-none=-misa=sm_35" { target { offload_target_nvptx } } } */
+
+#pragma omp requires reverse_offload
+
+int
+main ()
+{
+ int A[10];
+ int y;
+
+ for (int i = 0; i < 10; i++)
+ A[i] = 2*i;
+
+ y = 42;
+
+ /* Pointlessly copy to the default device. */
+ #pragma omp target data map(to: A)
+ {
+ /* Not enclosed in a target region (= i.e. running on the host); the
+ following is valid - it runs on the current device (= host). */
+ #pragma omp target device ( ancestor:1 ) firstprivate(y) map(to: A)
+ {
+ if (y != 42)
+ __builtin_abort ();
+ for (int i = 0; i < 10; i++)
+ if (A[i] != 2*i)
+ __builtin_abort ();
+ for (int i = 0; i < 10; i++)
+ if (A[i] != 2*i)
+ A[i] = 4*i;
+ y = 31;
+ }
+
+ if (y != 42)
+ __builtin_abort ();
+ for (int i = 0; i < 10; i++)
+ if (A[i] != 2*i)
+ __builtin_abort ();
+ }
+
+ if (y != 42)
+ __builtin_abort ();
+ for (int i = 0; i < 10; i++)
+ if (A[i] != 2*i)
+ __builtin_abort ();
+
+ return 0;
+}