aboutsummaryrefslogtreecommitdiff
path: root/libgomp
diff options
context:
space:
mode:
authorKwok Cheung Yeung <kcy@codesourcery.com>2020-01-31 06:53:30 -0800
committerKwok Cheung Yeung <kcy@codesourcery.com>2020-01-31 07:13:05 -0800
commit5a28e2727f718f14a2afccf16c4dda814af5b6f8 (patch)
tree0d286ca3d9f96a1418b9792a72ce2a4f4a54421b /libgomp
parent6a97d9eae4543a995f895e6739530f55f5d039a7 (diff)
downloadgcc-5a28e2727f718f14a2afccf16c4dda814af5b6f8.zip
gcc-5a28e2727f718f14a2afccf16c4dda814af5b6f8.tar.gz
gcc-5a28e2727f718f14a2afccf16c4dda814af5b6f8.tar.bz2
[amdgcn] Scale number of threads/workers with VGPR usage
2020-01-31 Kwok Cheung Yeung <kcy@codesourcery.com> gcc/ * config/gcn/mkoffload.c (process_asm): Add sgpr_count and vgpr_count to definition of hsa_kernel_description. Parse assembly to find SGPR and VGPR count of kernel and store in hsa_kernel_description. libgomp/ * plugin/plugin-gcn.c (struct hsa_kernel_description): Add sgpr_count and vgpr_count fields. (struct kernel_info): Add a field for a hsa_kernel_description. (run_kernel): Reduce the number of threads/workers if the requested number would require too many VGPRs. (init_basic_kernel_info): Initialize description field with the hsa_kernel_description entry for the kernel.
Diffstat (limited to 'libgomp')
-rw-r--r--libgomp/ChangeLog10
-rw-r--r--libgomp/plugin/plugin-gcn.c23
2 files changed, 33 insertions, 0 deletions
diff --git a/libgomp/ChangeLog b/libgomp/ChangeLog
index 0970724..557bec34 100644
--- a/libgomp/ChangeLog
+++ b/libgomp/ChangeLog
@@ -1,3 +1,13 @@
+2020-01-31 Kwok Cheung Yeung <kcy@codesourcery.com>
+
+ * plugin/plugin-gcn.c (struct hsa_kernel_description): Add sgpr_count
+ and vgpr_count fields.
+ (struct kernel_info): Add a field for a hsa_kernel_description.
+ (run_kernel): Reduce the number of threads/workers if the requested
+ number would require too many VGPRs.
+ (init_basic_kernel_info): Initialize description field with
+ the hsa_kernel_description entry for the kernel.
+
2020-01-29 Tobias Burnus <tobias@codesourcery.com>
PR bootstrap/93409
diff --git a/libgomp/plugin/plugin-gcn.c b/libgomp/plugin/plugin-gcn.c
index 22676b4..25547ef 100644
--- a/libgomp/plugin/plugin-gcn.c
+++ b/libgomp/plugin/plugin-gcn.c
@@ -371,6 +371,8 @@ struct hsa_kernel_description
{
const char *name;
int oacc_dims[3]; /* Only present for GCN kernels. */
+ int sgpr_count;
+ int vpgr_count;
};
/* Mkoffload uses this structure to describe an offload variable. */
@@ -478,6 +480,8 @@ struct kernel_info
struct agent_info *agent;
/* The specific module where the kernel takes place. */
struct module_info *module;
+ /* Information provided by mkoffload associated with the kernel. */
+ struct hsa_kernel_description *description;
/* Mutex enforcing that at most once thread ever initializes a kernel for
use. A thread should have locked agent->module_rwlock for reading before
acquiring it. */
@@ -2102,6 +2106,24 @@ run_kernel (struct kernel_info *kernel, void *vars,
struct GOMP_kernel_launch_attributes *kla,
struct goacc_asyncqueue *aq, bool module_locked)
{
+ GCN_DEBUG ("SGPRs: %d, VGPRs: %d\n", kernel->description->sgpr_count,
+ kernel->description->vpgr_count);
+
+ /* Reduce the number of threads/workers if there are insufficient
+ VGPRs available to run the kernels together. */
+ if (kla->ndim == 3 && kernel->description->vpgr_count > 0)
+ {
+ int granulated_vgprs = (kernel->description->vpgr_count + 3) & ~3;
+ int max_threads = (256 / granulated_vgprs) * 4;
+ if (kla->gdims[2] > max_threads)
+ {
+ GCN_WARNING ("Too many VGPRs required to support %d threads/workers"
+ " per team/gang - reducing to %d threads/workers.\n",
+ kla->gdims[2], max_threads);
+ kla->gdims[2] = max_threads;
+ }
+ }
+
GCN_DEBUG ("GCN launch on queue: %d:%d\n", kernel->agent->device_id,
(aq ? aq->id : 0));
GCN_DEBUG ("GCN launch attribs: gdims:[");
@@ -2303,6 +2325,7 @@ init_basic_kernel_info (struct kernel_info *kernel,
kernel->agent = agent;
kernel->module = module;
kernel->name = d->name;
+ kernel->description = d;
if (pthread_mutex_init (&kernel->init_mutex, NULL))
{
GOMP_PLUGIN_error ("Failed to initialize a GCN kernel mutex");