aboutsummaryrefslogtreecommitdiff
path: root/gcc
diff options
context:
space:
mode:
authorKwok Cheung Yeung <kcy@codesourcery.com>2020-01-31 06:53:30 -0800
committerKwok Cheung Yeung <kcy@codesourcery.com>2020-01-31 07:13:05 -0800
commit5a28e2727f718f14a2afccf16c4dda814af5b6f8 (patch)
tree0d286ca3d9f96a1418b9792a72ce2a4f4a54421b /gcc
parent6a97d9eae4543a995f895e6739530f55f5d039a7 (diff)
downloadgcc-5a28e2727f718f14a2afccf16c4dda814af5b6f8.zip
gcc-5a28e2727f718f14a2afccf16c4dda814af5b6f8.tar.gz
gcc-5a28e2727f718f14a2afccf16c4dda814af5b6f8.tar.bz2
[amdgcn] Scale number of threads/workers with VGPR usage
2020-01-31 Kwok Cheung Yeung <kcy@codesourcery.com> gcc/ * config/gcn/mkoffload.c (process_asm): Add sgpr_count and vgpr_count to definition of hsa_kernel_description. Parse assembly to find SGPR and VGPR count of kernel and store in hsa_kernel_description. libgomp/ * plugin/plugin-gcn.c (struct hsa_kernel_description): Add sgpr_count and vgpr_count fields. (struct kernel_info): Add a field for a hsa_kernel_description. (run_kernel): Reduce the number of threads/workers if the requested number would require too many VGPRs. (init_basic_kernel_info): Initialize description field with the hsa_kernel_description entry for the kernel.
Diffstat (limited to 'gcc')
-rw-r--r--gcc/ChangeLog6
-rw-r--r--gcc/config/gcn/mkoffload.c67
2 files changed, 67 insertions, 6 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index f44a09d..78a8310 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,9 @@
+2020-01-31 Kwok Cheung Yeung <kcy@codesourcery.com>
+
+ * config/gcn/mkoffload.c (process_asm): Add sgpr_count and vgpr_count
+ to definition of hsa_kernel_description. Parse assembly to find SGPR
+ and VGPR count of kernel and store in hsa_kernel_description.
+
2020-01-31 Tamar Christina <tamar.christina@arm.com>
PR rtl-optimization/91838
diff --git a/gcc/config/gcn/mkoffload.c b/gcc/config/gcn/mkoffload.c
index 0062f15..723da10 100644
--- a/gcc/config/gcn/mkoffload.c
+++ b/gcc/config/gcn/mkoffload.c
@@ -211,12 +211,13 @@ access_check (const char *name, int mode)
static void
process_asm (FILE *in, FILE *out, FILE *cfile)
{
- int fn_count = 0, var_count = 0, dims_count = 0;
- struct obstack fns_os, vars_os, varsizes_os, dims_os;
+ int fn_count = 0, var_count = 0, dims_count = 0, regcount_count = 0;
+ struct obstack fns_os, vars_os, varsizes_os, dims_os, regcounts_os;
obstack_init (&fns_os);
obstack_init (&vars_os);
obstack_init (&varsizes_os);
obstack_init (&dims_os);
+ obstack_init (&regcounts_os);
struct oaccdims
{
@@ -224,13 +225,20 @@ process_asm (FILE *in, FILE *out, FILE *cfile)
char *name;
} dim;
+ struct regcount
+ {
+ int sgpr_count;
+ int vgpr_count;
+ char *kernel_name;
+ } regcount;
+
/* Always add _init_array and _fini_array as kernels. */
obstack_ptr_grow (&fns_os, xstrdup ("_init_array"));
obstack_ptr_grow (&fns_os, xstrdup ("_fini_array"));
fn_count += 2;
char buf[1000];
- enum { IN_CODE, IN_VARS, IN_FUNCS } state = IN_CODE;
+ enum { IN_CODE, IN_AMD_KERNEL_CODE_T, IN_VARS, IN_FUNCS } state = IN_CODE;
while (fgets (buf, sizeof (buf), in))
{
switch (state)
@@ -243,6 +251,22 @@ process_asm (FILE *in, FILE *out, FILE *cfile)
obstack_grow (&dims_os, &dim, sizeof (dim));
dims_count++;
}
+ else if (sscanf (buf, " .amdgpu_hsa_kernel %ms\n",
+ &regcount.kernel_name) == 1)
+ break;
+
+ break;
+ }
+ case IN_AMD_KERNEL_CODE_T:
+ {
+ gcc_assert (regcount.kernel_name);
+ if (sscanf (buf, " wavefront_sgpr_count = %d\n",
+ &regcount.sgpr_count) == 1)
+ break;
+ else if (sscanf (buf, " workitem_vgpr_count = %d\n",
+ &regcount.vgpr_count) == 1)
+ break;
+
break;
}
case IN_VARS:
@@ -282,19 +306,36 @@ process_asm (FILE *in, FILE *out, FILE *cfile)
state = IN_VARS;
else if (sscanf (buf, " .section .gnu.offload_funcs%c", &dummy) > 0)
state = IN_FUNCS;
+ else if (sscanf (buf, " .amd_kernel_code_%c", &dummy) > 0)
+ {
+ state = IN_AMD_KERNEL_CODE_T;
+ regcount.sgpr_count = regcount.vgpr_count = -1;
+ }
else if (sscanf (buf, " .section %c", &dummy) > 0
|| sscanf (buf, " .text%c", &dummy) > 0
|| sscanf (buf, " .bss%c", &dummy) > 0
|| sscanf (buf, " .data%c", &dummy) > 0
|| sscanf (buf, " .ident %c", &dummy) > 0)
state = IN_CODE;
+ else if (sscanf (buf, " .end_amd_kernel_code_%c", &dummy) > 0)
+ {
+ state = IN_CODE;
+ gcc_assert (regcount.kernel_name != NULL
+ && regcount.sgpr_count >= 0
+ && regcount.vgpr_count >= 0);
+ obstack_grow (&regcounts_os, &regcount, sizeof (regcount));
+ regcount_count++;
+ regcount.kernel_name = NULL;
+ regcount.sgpr_count = regcount.vgpr_count = -1;
+ }
- if (state == IN_CODE)
+ if (state == IN_CODE || state == IN_AMD_KERNEL_CODE_T)
fputs (buf, out);
}
char **fns = XOBFINISH (&fns_os, char **);
struct oaccdims *dims = XOBFINISH (&dims_os, struct oaccdims *);
+ struct regcount *regcounts = XOBFINISH (&regcounts_os, struct regcount *);
fprintf (cfile, "#include <stdlib.h>\n");
fprintf (cfile, "#include <stdbool.h>\n\n");
@@ -322,6 +363,8 @@ process_asm (FILE *in, FILE *out, FILE *cfile)
fprintf (cfile, "static const struct hsa_kernel_description {\n"
" const char *name;\n"
" int oacc_dims[3];\n"
+ " int sgpr_count;\n"
+ " int vgpr_count;\n"
"} gcn_kernels[] = {\n ");
dim.d[0] = dim.d[1] = dim.d[2] = 0;
const char *comma;
@@ -329,15 +372,24 @@ process_asm (FILE *in, FILE *out, FILE *cfile)
{
/* Find if we recorded dimensions for this function. */
int *d = dim.d; /* Previously zeroed. */
+ int sgpr_count = 0;
+ int vgpr_count = 0;
for (int j = 0; j < dims_count; j++)
if (strcmp (fns[i], dims[j].name) == 0)
{
d = dims[j].d;
break;
}
+ for (int j = 0; j < regcount_count; j++)
+ if (strcmp (fns[i], regcounts[j].kernel_name) == 0)
+ {
+ sgpr_count = regcounts[j].sgpr_count;
+ vgpr_count = regcounts[j].vgpr_count;
+ break;
+ }
- fprintf (cfile, "%s{\"%s\", {%d, %d, %d}}", comma,
- fns[i], d[0], d[1], d[2]);
+ fprintf (cfile, "%s{\"%s\", {%d, %d, %d}, %d, %d}", comma,
+ fns[i], d[0], d[1], d[2], sgpr_count, vgpr_count);
free (fns[i]);
}
@@ -346,7 +398,10 @@ process_asm (FILE *in, FILE *out, FILE *cfile)
obstack_free (&fns_os, NULL);
for (i = 0; i < dims_count; i++)
free (dims[i].name);
+ for (i = 0; i < regcount_count; i++)
+ free (regcounts[i].kernel_name);
obstack_free (&dims_os, NULL);
+ obstack_free (&regcounts_os, NULL);
}
/* Embed an object file into a C source file. */