diff options
author | Andrew Stubbs <ams@codesourcery.com> | 2019-07-25 11:26:45 +0100 |
---|---|---|
committer | Thomas Schwinge <thomas@codesourcery.com> | 2020-03-03 12:51:25 +0100 |
commit | 7f925450379fe7fcaecafd38d6fa67aaa499cd6d (patch) | |
tree | f893167cc2e5390339330b96e4e22fa688bc8cd6 /libgomp | |
parent | 993d229d9afc275a5f44023d74098bbf03200d97 (diff) | |
download | gcc-7f925450379fe7fcaecafd38d6fa67aaa499cd6d.zip gcc-7f925450379fe7fcaecafd38d6fa67aaa499cd6d.tar.gz gcc-7f925450379fe7fcaecafd38d6fa67aaa499cd6d.tar.bz2 |
Detect number of GPU compute units.
2019-09-10 Andrew Stubbs <ams@codesourcery.com>
libgomp/
* plugin/plugin-gcn.c (HSA_AMD_AGENT_INFO_COMPUTE_UNIT_COUNT): Define.
(dump_hsa_agent_info): Dump compute unit count.
(get_cu_count): New function.
(parse_target_attributes): Use get_cu_count for default gdims.
(gcn_exec): Likewise.
(cherry picked from openacc-gcc-9-branch commit
4bc3e873bcaa3b671dc7c6afbfaa02a4459daae6)
Diffstat (limited to 'libgomp')
-rw-r--r-- | libgomp/ChangeLog.omp | 8 | ||||
-rw-r--r-- | libgomp/plugin/plugin-gcn.c | 31 |
2 files changed, 36 insertions, 3 deletions
diff --git a/libgomp/ChangeLog.omp b/libgomp/ChangeLog.omp index d743975..9801857 100644 --- a/libgomp/ChangeLog.omp +++ b/libgomp/ChangeLog.omp @@ -1,5 +1,13 @@ 2019-09-10 Andrew Stubbs <ams@codesourcery.com> + * plugin/plugin-gcn.c (HSA_AMD_AGENT_INFO_COMPUTE_UNIT_COUNT): Define. + (dump_hsa_agent_info): Dump compute unit count. + (get_cu_count): New function. + (parse_target_attributes): Use get_cu_count for default gdims. + (gcn_exec): Likewise. + +2019-09-10 Andrew Stubbs <ams@codesourcery.com> + * plugin/plugin-gcn.c (obstack_chunk_alloc): Delete. (obstack_chunk_free): Delete. (obstack.h): Remove include. diff --git a/libgomp/plugin/plugin-gcn.c b/libgomp/plugin/plugin-gcn.c index 6c00c81..9d03e4f 100644 --- a/libgomp/plugin/plugin-gcn.c +++ b/libgomp/plugin/plugin-gcn.c @@ -44,6 +44,11 @@ #include "oacc-int.h" #include <assert.h> +/* Additional definitions not in HSA 1.1. + FIXME: this needs to be updated in hsa.h for upstream, but the only source + right now is the ROCr source which may cause license issues. */ +#define HSA_AMD_AGENT_INFO_COMPUTE_UNIT_COUNT 0xA002 + /* These probably won't be in elf.h for a while. */ #define R_AMDGPU_NONE 0 #define R_AMDGPU_ABS32_LO 1 /* (S + A) & 0xFFFFFFFF */ @@ -845,6 +850,14 @@ dump_hsa_agent_info (hsa_agent_t agent, void *data __attribute__((unused))) else HSA_DEBUG ("HSA_AGENT_INFO_DEVICE: FAILED\n"); + uint32_t cu_count; + status = hsa_fns.hsa_agent_get_info_fn + (agent, HSA_AMD_AGENT_INFO_COMPUTE_UNIT_COUNT, &cu_count); + if (status == HSA_STATUS_SUCCESS) + HSA_DEBUG ("HSA_AMD_AGENT_INFO_COMPUTE_UNIT_COUNT: %u\n", cu_count); + else + HSA_DEBUG ("HSA_AMD_AGENT_INFO_COMPUTE_UNIT_COUNT: FAILED\n"); + uint32_t size; status = hsa_fns.hsa_agent_get_info_fn (agent, HSA_AGENT_INFO_WAVEFRONT_SIZE, &size); @@ -2449,6 +2462,18 @@ init_kernel (struct kernel_info *kernel) "mutex"); } +static int +get_cu_count (struct agent_info *agent) +{ + uint32_t cu_count; + hsa_status_t status = hsa_fns.hsa_agent_get_info_fn + (agent->id, HSA_AMD_AGENT_INFO_COMPUTE_UNIT_COUNT, &cu_count); + if (status == HSA_STATUS_SUCCESS) + return cu_count; + else + return 64; /* The usual number for older devices. */ +} + /* Calculate the maximum grid size for OMP threads / OACC workers. This depends on the kernel's resource usage levels. */ @@ -2527,8 +2552,8 @@ parse_target_attributes (void **input, } def->ndim = 3; - /* Fiji has 64 CUs. */ - def->gdims[0] = (gcn_teams > 0) ? gcn_teams : 64; + /* Fiji has 64 CUs, but Vega20 has 60. */ + def->gdims[0] = (gcn_teams > 0) ? gcn_teams : get_cu_count (agent); /* Each thread is 64 work items wide. */ def->gdims[1] = 64; /* A work group can have 16 wavefronts. */ @@ -3308,7 +3333,7 @@ gcn_exec (struct kernel_info *kernel, size_t mapnum, void **hostaddrs, problem size, so let's do a reasonable number of single-worker gangs. 64 gangs matches a typical Fiji device. */ - if (dims[0] == 0) dims[0] = 64; /* Gangs. */ + if (dims[0] == 0) dims[0] = get_cu_count (kernel->agent); /* Gangs. */ if (dims[1] == 0) dims[1] = 16; /* Workers. */ /* The incoming dimensions are expressed in terms of gangs, workers, and |