From 4ccb3366ade6ec9493f8ca20ab73b0da4b9816db Mon Sep 17 00:00:00 2001
From: Tobias Burnus <tburnus@baylibre.com>
Date: Wed, 29 May 2024 15:14:38 +0200
Subject: libgomp: Enable USM for some nvptx devices

A few high-end nvptx devices support the attribute
CU_DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS; for those, unified shared
memory is supported in hardware. This patch enables support for those -
if all installed nvptx devices have this feature (as the capabilities
are per device type).

This exposes a bug in gomp_copy_back_icvs as it did before use
omp_get_mapped_ptr to find mapped variables, but that returns
the unchanged pointer in cased of shared memory. But in this case,
we have a few actually mapped pointers - like the ICV variables.
Additionally, there was a mismatch with regards to '-1' for the
device number as gomp_copy_back_icvs and omp_get_mapped_ptr count
differently. Hence, do the lookup manually.

include/ChangeLog:

	* cuda/cuda.h (CU_DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS): Add.

libgomp/ChangeLog:

	* libgomp.texi (nvptx): Update USM description.
	* plugin/plugin-nvptx.c (GOMP_OFFLOAD_get_num_devices):
	Claim support when requesting USM and all devices support
	CU_DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS.
	* target.c (gomp_copy_back_icvs): Fix device ptr lookup.
	(gomp_target_init): Set GOMP_OFFLOAD_CAP_SHARED_MEM is the
	devices supports USM.
---
 libgomp/plugin/plugin-nvptx.c | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

(limited to 'libgomp/plugin')

diff --git a/libgomp/plugin/plugin-nvptx.c b/libgomp/plugin/plugin-nvptx.c
index 5aad344..4cedc53 100644
--- a/libgomp/plugin/plugin-nvptx.c
+++ b/libgomp/plugin/plugin-nvptx.c
@@ -1201,8 +1201,23 @@ GOMP_OFFLOAD_get_num_devices (unsigned int omp_requires_mask)
   if (num_devices > 0
       && ((omp_requires_mask
 	   & ~(GOMP_REQUIRES_UNIFIED_ADDRESS
+	       | GOMP_REQUIRES_UNIFIED_SHARED_MEMORY
 	       | GOMP_REQUIRES_REVERSE_OFFLOAD)) != 0))
     return -1;
+  /* Check whether host page access (direct or via migration) is supported;
+     if so, enable USM.  Currently, capabilities is per device type, hence,
+     check all devices.  */
+  if (num_devices > 0
+      && (omp_requires_mask & GOMP_REQUIRES_UNIFIED_SHARED_MEMORY))
+    for (int dev = 0; dev < num_devices; dev++)
+      {
+	int pi;
+	CUresult r;
+	r = CUDA_CALL_NOCHECK (cuDeviceGetAttribute, &pi,
+			       CU_DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS, dev);
+	if (r != CUDA_SUCCESS || pi == 0)
+	  return -1;
+      }
   return num_devices;
 }
 
-- 
cgit v1.1