libgomp: Enable USM for AMD APUs and MI200 devices

If HSA_AMD_SYSTEM_INFO_SVM_ACCESSIBLE_BY_DEFAULT is true, all GPUs on the system support unified shared memory. That's the case for APUs and MI200 devices when XNACK is enabled. XNACK can be enabled by setting HSA_XNACK=1 as env var for supported devices; otherwise, if disable, USM code will use host fallback. gcc/ChangeLog: * config/gcn/gcn-hsa.h (gcn_local_sym_hash): Fix typo. include/ChangeLog: * hsa.h (HSA_AMD_SYSTEM_INFO_SVM_ACCESSIBLE_BY_DEFAULT): Add enum value. libgomp/ChangeLog: * libgomp.texi (gcn): Update USM handling * plugin/plugin-gcn.c (GOMP_OFFLOAD_get_num_devices): Handle USM if HSA_AMD_SYSTEM_INFO_SVM_ACCESSIBLE_BY_DEFAULT is true.
author: Tobias Burnus <tburnus@baylibre.com> 2024-05-29 15:29:06 +0200
committer: Tobias Burnus <tburnus@baylibre.com> 2024-05-29 15:29:06 +0200
commit: 18f477980c8597fe3dca2c2e8bd533c0c2b17aa6 (patch)
tree: 5d0dcb61f7e305df3994ac6250b906373f8937ff
parent: 4ccb3366ade6ec9493f8ca20ab73b0da4b9816db (diff)
download: gcc-18f477980c8597fe3dca2c2e8bd533c0c2b17aa6.zip
gcc-18f477980c8597fe3dca2c2e8bd533c0c2b17aa6.tar.gz
gcc-18f477980c8597fe3dca2c2e8bd533c0c2b17aa6.tar.bz2
4 files changed, 28 insertions, 4 deletions
diff --git a/gcc/config/gcn/gcn-hsa.h b/gcc/config/gcn/gcn-hsa.h
index 4611bc5..0322055 100644
--- a/gcc/config/gcn/gcn-hsa.h
+++ b/gcc/config/gcn/gcn-hsa.h
@@ -80,7 +80,7 @@ extern unsigned int gcn_local_sym_hash (const char *name);
    writes a new AMD GPU object file and the ABI version needs to be the
    same. - LLVM <= 17 defaults to 4 while LLVM >= 18 defaults to 5.
    GCC supports LLVM >= 13.0.1 and only LLVM >= 14 supports version 5.
-   Note that Fiji is only suppored with LLVM <= 17 as version 3 is no longer
+   Note that Fiji is only supported with LLVM <= 17 as version 3 is no longer
    supported in LLVM >= 18.  */
 #define ABI_VERSION_SPEC "march=fiji:--amdhsa-code-object-version=3;" \
 			 "!march=*|march=*:--amdhsa-code-object-version=4"
diff --git a/include/hsa.h b/include/hsa.h
index f9b5d9d..3c7be95 100644
--- a/include/hsa.h
+++ b/include/hsa.h
@@ -466,7 +466,9 @@ typedef enum {
   /**
   * String containing the ROCr build identifier.
   */
-  HSA_AMD_SYSTEM_INFO_BUILD_VERSION = 0x200
+  HSA_AMD_SYSTEM_INFO_BUILD_VERSION = 0x200,
+
+  HSA_AMD_SYSTEM_INFO_SVM_ACCESSIBLE_BY_DEFAULT = 0x202
 } hsa_system_info_t;
 
 /**
diff --git a/libgomp/libgomp.texi b/libgomp/libgomp.texi
index 2286863..e79bd7a 100644
--- a/libgomp/libgomp.texi
+++ b/libgomp/libgomp.texi
@@ -6360,8 +6360,13 @@ The implementation remark:
       such that the next reverse offload region is only executed after the previous
       one returned.
 @item OpenMP code that has a @code{requires} directive with
-      @code{unified_shared_memory} will remove any GCN device from the list of
-      available devices (``host fallback'').
+      @code{unified_shared_memory} is only supported if all AMD GPUs have the
+      @code{HSA_AMD_SYSTEM_INFO_SVM_ACCESSIBLE_BY_DEFAULT} property; for
+      discrete GPUs, this may require setting the @code{HSA_XNACK} environment
+      variable to @samp{1}; for systems with both an APU and a discrete GPU that
+      does not support XNACK, consider using @code{ROCR_VISIBLE_DEVICES} to
+      enable only the APU.  If not supported, all AMD GPU devices are removed
+      from the list of available devices (``host fallback'').
 @item The available stack size can be changed using the @code{GCN_STACK_SIZE}
       environment variable; the default is 32 kiB per thread.
 @item Low-latency memory (@code{omp_low_lat_mem_space}) is supported when the
diff --git a/libgomp/plugin/plugin-gcn.c b/libgomp/plugin/plugin-gcn.c
index 3cdc7ba..3d882b5 100644
--- a/libgomp/plugin/plugin-gcn.c
+++ b/libgomp/plugin/plugin-gcn.c
@@ -3355,8 +3355,25 @@ GOMP_OFFLOAD_get_num_devices (unsigned int omp_requires_mask)
   if (hsa_context.agent_count > 0
       && ((omp_requires_mask
 	   & ~(GOMP_REQUIRES_UNIFIED_ADDRESS
+	       | GOMP_REQUIRES_UNIFIED_SHARED_MEMORY
 	       | GOMP_REQUIRES_REVERSE_OFFLOAD)) != 0))
     return -1;
+  /* Check whether host page access is supported; this is per system level
+     (all GPUs supported by HSA).  While intrinsically true for APUs, it
+     requires XNACK support for discrete GPUs.  */
+  if (hsa_context.agent_count > 0
+      && (omp_requires_mask & GOMP_REQUIRES_UNIFIED_SHARED_MEMORY))
+    {
+      bool b;
+      hsa_system_info_t type = HSA_AMD_SYSTEM_INFO_SVM_ACCESSIBLE_BY_DEFAULT;
+      hsa_status_t status = hsa_fns.hsa_system_get_info_fn (type, &b);
+      if (status != HSA_STATUS_SUCCESS)
+	GOMP_PLUGIN_error ("HSA_AMD_SYSTEM_INFO_SVM_ACCESSIBLE_BY_DEFAULT "
+			   "failed");
+      if (!b)
+	return -1;
+    }
+
   return hsa_context.agent_count;
 }
author	Tobias Burnus <tburnus@baylibre.com>	2024-05-29 15:29:06 +0200
committer	Tobias Burnus <tburnus@baylibre.com>	2024-05-29 15:29:06 +0200
commit	18f477980c8597fe3dca2c2e8bd533c0c2b17aa6 (patch)
tree	5d0dcb61f7e305df3994ac6250b906373f8937ff
parent	4ccb3366ade6ec9493f8ca20ab73b0da4b9816db (diff)
download	gcc-18f477980c8597fe3dca2c2e8bd533c0c2b17aa6.zip gcc-18f477980c8597fe3dca2c2e8bd533c0c2b17aa6.tar.gz gcc-18f477980c8597fe3dca2c2e8bd533c0c2b17aa6.tar.bz2