aboutsummaryrefslogtreecommitdiff
path: root/libgomp/plugin
diff options
context:
space:
mode:
authorMaciej W. Rozycki <macro@codesourcery.com>2019-12-22 19:54:09 +0000
committerFrederik Harwath <frederik@gcc.gnu.org>2019-12-22 19:54:09 +0000
commit6c84c8bf9b29ce89f5d263c1fa3630896fcb23f4 (patch)
tree0e15cf12a4c3396702ed004deabe3c86ea7ac18e /libgomp/plugin
parentedadb8adc3563bf782e4781bf3be6425af566368 (diff)
downloadgcc-6c84c8bf9b29ce89f5d263c1fa3630896fcb23f4.zip
gcc-6c84c8bf9b29ce89f5d263c1fa3630896fcb23f4.tar.gz
gcc-6c84c8bf9b29ce89f5d263c1fa3630896fcb23f4.tar.bz2
Add OpenACC 2.6 `acc_get_property' support
Add generic support for the OpenACC 2.6 `acc_get_property' and `acc_get_property_string' routines, as well as full handlers for the host and the NVPTX offload targets and minimal handlers for the HSA, Intel MIC, and AMD GCN offload targets. Included are C/C++ and Fortran tests that, in particular, print the property values for acc_property_vendor, acc_property_memory, acc_property_free_memory, acc_property_name, and acc_property_driver. The output looks as follows: Vendor: GNU Name: GOMP Total memory: 0 Free memory: 0 Driver: 1.0 with the host driver (where the memory related properties are not supported for the host device and yield 0, conforming to the standard) and output like: Vendor: Nvidia Total memory: 12651462656 Free memory: 12202737664 Name: TITAN V Driver: CUDA Driver 9.1 with the NVPTX driver. 2019-12-22 Maciej W. Rozycki <macro@codesourcery.com> Frederik Harwath <frederik@codesourcery.com> Thomas Schwinge <tschwinge@codesourcery.com> include/ * gomp-constants.h (gomp_device_property): New enum. libgomp/ * libgomp.h (gomp_device_descr): Add `get_property_func' member. * libgomp-plugin.h (gomp_device_property_value): New union. (gomp_device_property_value): New prototype. * openacc.h (acc_device_t): Add `acc_device_current' enumeration constant. (acc_device_property_t): New enum. (acc_get_property, acc_get_property_string): New prototypes. * oacc-init.c (acc_get_device_type): Also assert that result is not `acc_device_current'. (get_property_any, acc_get_property, acc_get_property_string): New functions. * openacc.f90 (openacc_kinds): Add `acc_device_current' and `acc_property_memory', `acc_property_free_memory', `acc_property_name', `acc_property_vendor' and `acc_property_driver' constants. Add `acc_device_property' data type. (openacc_internal): Add `acc_get_property' and `acc_get_property_string' interfaces. Add `acc_get_property_h', `acc_get_property_string_h', `acc_get_property_l' and `acc_get_property_string_l'. * oacc-host.c (host_get_property): New function. (host_dispatch): Wire it. * target.c (gomp_load_plugin_for_device): Handle `get_property'. * libgomp.map (OACC_2.6): Add `acc_get_property', `acc_get_property_h_', `acc_get_property_string' and `acc_get_property_string_h_' symbols. * libgomp.texi (OpenACC Runtime Library Routines): Add `acc_get_property'. (acc_get_property): New node. * plugin/plugin-gcn.c (GOMP_OFFLOAD_get_property): New function (stub). * plugin/plugin-hsa.c (GOMP_OFFLOAD_get_property): New function. * plugin/plugin-nvptx.c (CUDA_CALLS): Add `cuDeviceGetName', `cuDeviceTotalMem', `cuDriverGetVersion' and `cuMemGetInfo' calls. (GOMP_OFFLOAD_get_property): New function. (struct ptx_device): Add new field "name". (cuda_driver_version_s): Add new static variable ... (nvptx_init): ... and init from here. * testsuite/libgomp.oacc-c-c++-common/acc_get_property.c: New test. * testsuite/libgomp.oacc-c-c++-common/acc_get_property-2.c: New test. * testsuite/libgomp.oacc-c-c++-common/acc_get_property-3.c: New test. * testsuite/libgomp.oacc-c-c++-common/acc_get_property-aux.c: New file with test helper functions. * testsuite/libgomp.oacc-fortran/acc_get_property.f90: New test. liboffloadmic/ * plugin/libgomp-plugin-intelmic.cpp (GOMP_OFFLOAD_get_property): New function. Reviewed-by: Thomas Schwinge <thomas@codesourcery.com> Co-Authored-By: Frederik Harwath <frederik@codesourcery.com> Co-Authored-By: Thomas Schwinge <tschwinge@codesourcery.com> From-SVN: r279710
Diffstat (limited to 'libgomp/plugin')
-rw-r--r--libgomp/plugin/cuda-lib.def4
-rw-r--r--libgomp/plugin/plugin-gcn.c11
-rw-r--r--libgomp/plugin/plugin-hsa.c26
-rw-r--r--libgomp/plugin/plugin-nvptx.c87
4 files changed, 127 insertions, 1 deletions
diff --git a/libgomp/plugin/cuda-lib.def b/libgomp/plugin/cuda-lib.def
index a16badc..cd91b39 100644
--- a/libgomp/plugin/cuda-lib.def
+++ b/libgomp/plugin/cuda-lib.def
@@ -8,6 +8,9 @@ CUDA_ONE_CALL (cuCtxSynchronize)
CUDA_ONE_CALL (cuDeviceGet)
CUDA_ONE_CALL (cuDeviceGetAttribute)
CUDA_ONE_CALL (cuDeviceGetCount)
+CUDA_ONE_CALL (cuDeviceGetName)
+CUDA_ONE_CALL (cuDeviceTotalMem)
+CUDA_ONE_CALL (cuDriverGetVersion)
CUDA_ONE_CALL (cuEventCreate)
CUDA_ONE_CALL (cuEventDestroy)
CUDA_ONE_CALL (cuEventElapsedTime)
@@ -35,6 +38,7 @@ CUDA_ONE_CALL (cuMemcpyHtoDAsync)
CUDA_ONE_CALL (cuMemFree)
CUDA_ONE_CALL (cuMemFreeHost)
CUDA_ONE_CALL (cuMemGetAddressRange)
+CUDA_ONE_CALL (cuMemGetInfo)
CUDA_ONE_CALL (cuMemHostGetDevicePointer)
CUDA_ONE_CALL (cuModuleGetFunction)
CUDA_ONE_CALL (cuModuleGetGlobal)
diff --git a/libgomp/plugin/plugin-gcn.c b/libgomp/plugin/plugin-gcn.c
index 04fe472..32239c7 100644
--- a/libgomp/plugin/plugin-gcn.c
+++ b/libgomp/plugin/plugin-gcn.c
@@ -3236,6 +3236,17 @@ GOMP_OFFLOAD_get_num_devices (void)
return hsa_context.agent_count;
}
+union gomp_device_property_value
+GOMP_OFFLOAD_get_property (int device, int prop)
+{
+ /* Stub. Check device and return default value for unsupported properties. */
+ /* TODO: Implement this function. */
+ get_agent_info (device);
+
+ union gomp_device_property_value nullval = { .val = 0 };
+ return nullval;
+}
+
/* Initialize device (agent) number N so that it can be used for computation.
Return TRUE on success. */
diff --git a/libgomp/plugin/plugin-hsa.c b/libgomp/plugin/plugin-hsa.c
index 409e138..259f704 100644
--- a/libgomp/plugin/plugin-hsa.c
+++ b/libgomp/plugin/plugin-hsa.c
@@ -699,6 +699,32 @@ GOMP_OFFLOAD_get_num_devices (void)
return hsa_context.agent_count;
}
+/* Part of the libgomp plugin interface. Return the value of property
+ PROP of agent number N. */
+
+union gomp_device_property_value
+GOMP_OFFLOAD_get_property (int n, int prop)
+{
+ union gomp_device_property_value nullval = { .val = 0 };
+
+ if (!init_hsa_context ())
+ return nullval;
+ if (n >= hsa_context.agent_count)
+ {
+ GOMP_PLUGIN_error
+ ("Request for a property of a non-existing HSA device %i", n);
+ return nullval;
+ }
+
+ switch (prop)
+ {
+ case GOMP_DEVICE_PROPERTY_VENDOR:
+ return (union gomp_device_property_value) { .ptr = "HSA" };
+ default:
+ return nullval;
+ }
+}
+
/* Part of the libgomp plugin interface. Initialize agent number N so that it
can be used for computation. Return TRUE on success. */
diff --git a/libgomp/plugin/plugin-nvptx.c b/libgomp/plugin/plugin-nvptx.c
index 911d0f6..80e5475 100644
--- a/libgomp/plugin/plugin-nvptx.c
+++ b/libgomp/plugin/plugin-nvptx.c
@@ -189,6 +189,10 @@ cuda_error (CUresult r)
return fallback;
}
+/* Version of the CUDA Toolkit in the same MAJOR.MINOR format that is used by
+ Nvidia, such as in the 'deviceQuery' program (Nvidia's CUDA samples). */
+static char cuda_driver_version_s[30];
+
static unsigned int instantiated_devices = 0;
static pthread_mutex_t ptx_dev_lock = PTHREAD_MUTEX_INITIALIZER;
@@ -284,7 +288,7 @@ struct ptx_device
bool map;
bool concur;
bool mkern;
- int mode;
+ int mode;
int clock_khz;
int num_sms;
int regs_per_block;
@@ -294,6 +298,9 @@ struct ptx_device
int max_threads_per_multiprocessor;
int default_dims[GOMP_DIM_MAX];
+ /* Length as used by the CUDA Runtime API ('struct cudaDeviceProp'). */
+ char name[256];
+
struct ptx_image_data *images; /* Images loaded on device. */
pthread_mutex_t image_lock; /* Lock for above list. */
@@ -327,9 +334,16 @@ nvptx_init (void)
CUDA_CALL (cuInit, 0);
+ int cuda_driver_version;
+ CUDA_CALL_ERET (NULL, cuDriverGetVersion, &cuda_driver_version);
+ snprintf (cuda_driver_version_s, sizeof cuda_driver_version_s,
+ "CUDA Driver %u.%u",
+ cuda_driver_version / 1000, cuda_driver_version % 1000 / 10);
+
CUDA_CALL (cuDeviceGetCount, &ndevs);
ptx_devices = GOMP_PLUGIN_malloc_cleared (sizeof (struct ptx_device *)
* ndevs);
+
return true;
}
@@ -491,6 +505,9 @@ nvptx_open_device (int n)
for (int i = 0; i != GOMP_DIM_MAX; i++)
ptx_dev->default_dims[i] = 0;
+ CUDA_CALL_ERET (NULL, cuDeviceGetName, ptx_dev->name, sizeof ptx_dev->name,
+ dev);
+
ptx_dev->images = NULL;
pthread_mutex_init (&ptx_dev->image_lock, NULL);
@@ -1104,6 +1121,74 @@ GOMP_OFFLOAD_get_num_devices (void)
return nvptx_get_num_devices ();
}
+union gomp_device_property_value
+GOMP_OFFLOAD_get_property (int n, int prop)
+{
+ union gomp_device_property_value propval = { .val = 0 };
+
+ pthread_mutex_lock (&ptx_dev_lock);
+
+ if (n >= nvptx_get_num_devices () || n < 0 || ptx_devices[n] == NULL)
+ {
+ pthread_mutex_unlock (&ptx_dev_lock);
+ return propval;
+ }
+
+ struct ptx_device *ptx_dev = ptx_devices[n];
+ switch (prop)
+ {
+ case GOMP_DEVICE_PROPERTY_MEMORY:
+ {
+ size_t total_mem;
+
+ CUDA_CALL_ERET (propval, cuDeviceTotalMem, &total_mem, ptx_dev->dev);
+ propval.val = total_mem;
+ }
+ break;
+ case GOMP_DEVICE_PROPERTY_FREE_MEMORY:
+ {
+ size_t total_mem;
+ size_t free_mem;
+ CUdevice ctxdev;
+
+ CUDA_CALL_ERET (propval, cuCtxGetDevice, &ctxdev);
+ if (ptx_dev->dev == ctxdev)
+ CUDA_CALL_ERET (propval, cuMemGetInfo, &free_mem, &total_mem);
+ else if (ptx_dev->ctx)
+ {
+ CUcontext old_ctx;
+
+ CUDA_CALL_ERET (propval, cuCtxPushCurrent, ptx_dev->ctx);
+ CUDA_CALL_ERET (propval, cuMemGetInfo, &free_mem, &total_mem);
+ CUDA_CALL_ASSERT (cuCtxPopCurrent, &old_ctx);
+ }
+ else
+ {
+ CUcontext new_ctx;
+
+ CUDA_CALL_ERET (propval, cuCtxCreate, &new_ctx, CU_CTX_SCHED_AUTO,
+ ptx_dev->dev);
+ CUDA_CALL_ERET (propval, cuMemGetInfo, &free_mem, &total_mem);
+ CUDA_CALL_ASSERT (cuCtxDestroy, new_ctx);
+ }
+ propval.val = free_mem;
+ }
+ break;
+ case GOMP_DEVICE_PROPERTY_NAME:
+ propval.ptr = ptx_dev->name;
+ break;
+ case GOMP_DEVICE_PROPERTY_VENDOR:
+ propval.ptr = "Nvidia";
+ break;
+ case GOMP_DEVICE_PROPERTY_DRIVER:
+ propval.ptr = cuda_driver_version_s;
+ break;
+ }
+
+ pthread_mutex_unlock (&ptx_dev_lock);
+ return propval;
+}
+
bool
GOMP_OFFLOAD_init_device (int n)
{