libgomp/nvptx: Prepare for reverse-offload callback handling

This patch adds a stub 'gomp_target_rev' in the host's target.c, which will later handle the reverse offload. For nvptx, it adds support for forwarding the offload gomp_target_ext call to the host by setting values in a struct on the device and querying it on the host - invoking gomp_target_rev on the result. include/ChangeLog: * cuda/cuda.h (enum CUdevice_attribute): Add CU_DEVICE_ATTRIBUTE_UNIFIED_ADDRESSING. (CU_MEMHOSTALLOC_DEVICEMAP): Define. (cuMemHostAlloc): Add prototype. libgomp/ChangeLog: * config/nvptx/icv-device.c (GOMP_DEVICE_NUM_VAR): Remove 'static' for this variable. * config/nvptx/libgomp-nvptx.h: New file. * config/nvptx/target.c: Include it. (GOMP_ADDITIONAL_ICVS): Declare extern var. (GOMP_REV_OFFLOAD_VAR): Declare var. (GOMP_target_ext): Handle reverse offload. * libgomp-plugin.h (GOMP_PLUGIN_target_rev): New prototype. * libgomp-plugin.c (GOMP_PLUGIN_target_rev): New, call ... * target.c (gomp_target_rev): ... this new stub function. * libgomp.h (gomp_target_rev): Declare. * libgomp.map (GOMP_PLUGIN_1.4): New; add GOMP_PLUGIN_target_rev. * plugin/cuda-lib.def (cuMemHostAlloc): Add. * plugin/plugin-nvptx.c: Include libgomp-nvptx.h. (struct ptx_device): Add rev_data member. (nvptx_open_device): Remove async_engines query, last used in r10-304-g1f4c5b9b; add unified-address assert check. (GOMP_OFFLOAD_get_num_devices): Claim unified address support. (GOMP_OFFLOAD_load_image): Free rev_fn_table if no offload functions exist. Make offload var available on host and device. (rev_off_dev_to_host_cpy, rev_off_host_to_dev_cpy): New. (GOMP_OFFLOAD_run): Handle reverse offload.
author: Tobias Burnus <tobias@codesourcery.com> 2022-10-24 16:58:43 +0200
committer: Tobias Burnus <tobias@codesourcery.com> 2022-10-24 17:04:08 +0200
commit: 131d18e928a3ea1ab2d3bf61aa92d68a8a254609 (patch)
tree: 9c379ef9c639a56d0b1146aada7cef937328a89e /include
parent: a096036589d82175a0f729c2dab73c9a527d075d (diff)
download: gcc-131d18e928a3ea1ab2d3bf61aa92d68a8a254609.zip
gcc-131d18e928a3ea1ab2d3bf61aa92d68a8a254609.tar.gz
gcc-131d18e928a3ea1ab2d3bf61aa92d68a8a254609.tar.bz2
1 files changed, 3 insertions, 0 deletions
diff --git a/include/cuda/cuda.h b/include/cuda/cuda.h
index 3938d05..e081f04 100644
--- a/include/cuda/cuda.h
+++ b/include/cuda/cuda.h
@@ -77,6 +77,7 @@ typedef enum {
   CU_DEVICE_ATTRIBUTE_CONCURRENT_KERNELS = 31,
   CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_MULTIPROCESSOR = 39,
   CU_DEVICE_ATTRIBUTE_ASYNC_ENGINE_COUNT = 40,
+  CU_DEVICE_ATTRIBUTE_UNIFIED_ADDRESSING = 41,
   CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_MULTIPROCESSOR = 82
 } CUdevice_attribute;
 
@@ -113,6 +114,7 @@ enum {
 #define CU_LAUNCH_PARAM_END ((void *) 0)
 #define CU_LAUNCH_PARAM_BUFFER_POINTER ((void *) 1)
 #define CU_LAUNCH_PARAM_BUFFER_SIZE ((void *) 2)
+#define CU_MEMHOSTALLOC_DEVICEMAP 0x02U
 
 enum {
   CU_STREAM_DEFAULT = 0,
@@ -169,6 +171,7 @@ CUresult cuMemGetInfo (size_t *, size_t *);
 CUresult cuMemAlloc (CUdeviceptr *, size_t);
 #define cuMemAllocHost cuMemAllocHost_v2
 CUresult cuMemAllocHost (void **, size_t);
+CUresult cuMemHostAlloc (void **, size_t, unsigned int);
 CUresult cuMemcpy (CUdeviceptr, CUdeviceptr, size_t);
 #define cuMemcpyDtoDAsync cuMemcpyDtoDAsync_v2
 CUresult cuMemcpyDtoDAsync (CUdeviceptr, CUdeviceptr, size_t, CUstream);
author	Tobias Burnus <tobias@codesourcery.com>	2022-10-24 16:58:43 +0200
committer	Tobias Burnus <tobias@codesourcery.com>	2022-10-24 17:04:08 +0200
commit	131d18e928a3ea1ab2d3bf61aa92d68a8a254609 (patch)
tree	9c379ef9c639a56d0b1146aada7cef937328a89e /include
parent	a096036589d82175a0f729c2dab73c9a527d075d (diff)
download	gcc-131d18e928a3ea1ab2d3bf61aa92d68a8a254609.zip gcc-131d18e928a3ea1ab2d3bf61aa92d68a8a254609.tar.gz gcc-131d18e928a3ea1ab2d3bf61aa92d68a8a254609.tar.bz2