aboutsummaryrefslogtreecommitdiff
path: root/libgomp
diff options
context:
space:
mode:
authorAndrew Stubbs <ams@codesourcery.com>2019-07-25 11:26:45 +0100
committerThomas Schwinge <thomas@codesourcery.com>2020-03-03 12:51:25 +0100
commit7f925450379fe7fcaecafd38d6fa67aaa499cd6d (patch)
treef893167cc2e5390339330b96e4e22fa688bc8cd6 /libgomp
parent993d229d9afc275a5f44023d74098bbf03200d97 (diff)
downloadgcc-7f925450379fe7fcaecafd38d6fa67aaa499cd6d.zip
gcc-7f925450379fe7fcaecafd38d6fa67aaa499cd6d.tar.gz
gcc-7f925450379fe7fcaecafd38d6fa67aaa499cd6d.tar.bz2
Detect number of GPU compute units.
2019-09-10 Andrew Stubbs <ams@codesourcery.com> libgomp/ * plugin/plugin-gcn.c (HSA_AMD_AGENT_INFO_COMPUTE_UNIT_COUNT): Define. (dump_hsa_agent_info): Dump compute unit count. (get_cu_count): New function. (parse_target_attributes): Use get_cu_count for default gdims. (gcn_exec): Likewise. (cherry picked from openacc-gcc-9-branch commit 4bc3e873bcaa3b671dc7c6afbfaa02a4459daae6)
Diffstat (limited to 'libgomp')
-rw-r--r--libgomp/ChangeLog.omp8
-rw-r--r--libgomp/plugin/plugin-gcn.c31
2 files changed, 36 insertions, 3 deletions
diff --git a/libgomp/ChangeLog.omp b/libgomp/ChangeLog.omp
index d743975..9801857 100644
--- a/libgomp/ChangeLog.omp
+++ b/libgomp/ChangeLog.omp
@@ -1,5 +1,13 @@
2019-09-10 Andrew Stubbs <ams@codesourcery.com>
+ * plugin/plugin-gcn.c (HSA_AMD_AGENT_INFO_COMPUTE_UNIT_COUNT): Define.
+ (dump_hsa_agent_info): Dump compute unit count.
+ (get_cu_count): New function.
+ (parse_target_attributes): Use get_cu_count for default gdims.
+ (gcn_exec): Likewise.
+
+2019-09-10 Andrew Stubbs <ams@codesourcery.com>
+
* plugin/plugin-gcn.c (obstack_chunk_alloc): Delete.
(obstack_chunk_free): Delete.
(obstack.h): Remove include.
diff --git a/libgomp/plugin/plugin-gcn.c b/libgomp/plugin/plugin-gcn.c
index 6c00c81..9d03e4f 100644
--- a/libgomp/plugin/plugin-gcn.c
+++ b/libgomp/plugin/plugin-gcn.c
@@ -44,6 +44,11 @@
#include "oacc-int.h"
#include <assert.h>
+/* Additional definitions not in HSA 1.1.
+ FIXME: this needs to be updated in hsa.h for upstream, but the only source
+ right now is the ROCr source which may cause license issues. */
+#define HSA_AMD_AGENT_INFO_COMPUTE_UNIT_COUNT 0xA002
+
/* These probably won't be in elf.h for a while. */
#define R_AMDGPU_NONE 0
#define R_AMDGPU_ABS32_LO 1 /* (S + A) & 0xFFFFFFFF */
@@ -845,6 +850,14 @@ dump_hsa_agent_info (hsa_agent_t agent, void *data __attribute__((unused)))
else
HSA_DEBUG ("HSA_AGENT_INFO_DEVICE: FAILED\n");
+ uint32_t cu_count;
+ status = hsa_fns.hsa_agent_get_info_fn
+ (agent, HSA_AMD_AGENT_INFO_COMPUTE_UNIT_COUNT, &cu_count);
+ if (status == HSA_STATUS_SUCCESS)
+ HSA_DEBUG ("HSA_AMD_AGENT_INFO_COMPUTE_UNIT_COUNT: %u\n", cu_count);
+ else
+ HSA_DEBUG ("HSA_AMD_AGENT_INFO_COMPUTE_UNIT_COUNT: FAILED\n");
+
uint32_t size;
status = hsa_fns.hsa_agent_get_info_fn (agent, HSA_AGENT_INFO_WAVEFRONT_SIZE,
&size);
@@ -2449,6 +2462,18 @@ init_kernel (struct kernel_info *kernel)
"mutex");
}
+static int
+get_cu_count (struct agent_info *agent)
+{
+ uint32_t cu_count;
+ hsa_status_t status = hsa_fns.hsa_agent_get_info_fn
+ (agent->id, HSA_AMD_AGENT_INFO_COMPUTE_UNIT_COUNT, &cu_count);
+ if (status == HSA_STATUS_SUCCESS)
+ return cu_count;
+ else
+ return 64; /* The usual number for older devices. */
+}
+
/* Calculate the maximum grid size for OMP threads / OACC workers.
This depends on the kernel's resource usage levels. */
@@ -2527,8 +2552,8 @@ parse_target_attributes (void **input,
}
def->ndim = 3;
- /* Fiji has 64 CUs. */
- def->gdims[0] = (gcn_teams > 0) ? gcn_teams : 64;
+ /* Fiji has 64 CUs, but Vega20 has 60. */
+ def->gdims[0] = (gcn_teams > 0) ? gcn_teams : get_cu_count (agent);
/* Each thread is 64 work items wide. */
def->gdims[1] = 64;
/* A work group can have 16 wavefronts. */
@@ -3308,7 +3333,7 @@ gcn_exec (struct kernel_info *kernel, size_t mapnum, void **hostaddrs,
problem size, so let's do a reasonable number of single-worker gangs.
64 gangs matches a typical Fiji device. */
- if (dims[0] == 0) dims[0] = 64; /* Gangs. */
+ if (dims[0] == 0) dims[0] = get_cu_count (kernel->agent); /* Gangs. */
if (dims[1] == 0) dims[1] = 16; /* Workers. */
/* The incoming dimensions are expressed in terms of gangs, workers, and