aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJulian Brown <julian@codesourcery.com>2020-06-26 09:07:58 -0700
committerJulian Brown <julian@codesourcery.com>2020-09-08 13:26:42 -0700
commit3aee3aaf48be2d3d81e381690ae9dd305d8b505f (patch)
tree64b508de14dca98f617aba45634484996b26b7cd
parentd6d9be7c6be1b88f844a49d51302d34efd919a23 (diff)
downloadgcc-3aee3aaf48be2d3d81e381690ae9dd305d8b505f.zip
gcc-3aee3aaf48be2d3d81e381690ae9dd305d8b505f.tar.gz
gcc-3aee3aaf48be2d3d81e381690ae9dd305d8b505f.tar.bz2
openacc: Fix mkoffload SGPR/VGPR count parsing for HSACO v3
If an offload kernel uses a large number of VGPRs, AMD GCN hardware may need to limit the number of threads/workers launched for that kernel. The number of SGPRs/VGPRs in use is detected by mkoffload and recorded in the processed output. The patterns emitted detailing SGPR/VGPR occupancy changed between HSACO v2 and v3 though, so this patch updates parsing to account for that. 2020-09-08 Julian Brown <julian@codesourcery.com> gcc/ * config/gcn/mkoffload.c (process_asm): Initialise regcount. Update scanning for SGPR/VGPR usage for HSACO v3.
-rw-r--r--gcc/config/gcn/mkoffload.c40
1 files changed, 25 insertions, 15 deletions
diff --git a/gcc/config/gcn/mkoffload.c b/gcc/config/gcn/mkoffload.c
index 808ce53..0983b98 100644
--- a/gcc/config/gcn/mkoffload.c
+++ b/gcc/config/gcn/mkoffload.c
@@ -432,7 +432,7 @@ process_asm (FILE *in, FILE *out, FILE *cfile)
int sgpr_count;
int vgpr_count;
char *kernel_name;
- } regcount;
+ } regcount = { -1, -1, NULL };
/* Always add _init_array and _fini_array as kernels. */
obstack_ptr_grow (&fns_os, xstrdup ("_init_array"));
@@ -440,7 +440,12 @@ process_asm (FILE *in, FILE *out, FILE *cfile)
fn_count += 2;
char buf[1000];
- enum { IN_CODE, IN_AMD_KERNEL_CODE_T, IN_VARS, IN_FUNCS } state = IN_CODE;
+ enum
+ { IN_CODE,
+ IN_METADATA,
+ IN_VARS,
+ IN_FUNCS
+ } state = IN_CODE;
while (fgets (buf, sizeof (buf), in))
{
switch (state)
@@ -453,21 +458,25 @@ process_asm (FILE *in, FILE *out, FILE *cfile)
obstack_grow (&dims_os, &dim, sizeof (dim));
dims_count++;
}
- else if (sscanf (buf, " .amdgpu_hsa_kernel %ms\n",
- &regcount.kernel_name) == 1)
- break;
break;
}
- case IN_AMD_KERNEL_CODE_T:
+ case IN_METADATA:
{
- gcc_assert (regcount.kernel_name);
- if (sscanf (buf, " wavefront_sgpr_count = %d\n",
- &regcount.sgpr_count) == 1)
+ if (sscanf (buf, " - .name: %ms\n", &regcount.kernel_name) == 1)
break;
- else if (sscanf (buf, " workitem_vgpr_count = %d\n",
+ else if (sscanf (buf, " .sgpr_count: %d\n",
+ &regcount.sgpr_count) == 1)
+ {
+ gcc_assert (regcount.kernel_name);
+ break;
+ }
+ else if (sscanf (buf, " .vgpr_count: %d\n",
&regcount.vgpr_count) == 1)
- break;
+ {
+ gcc_assert (regcount.kernel_name);
+ break;
+ }
break;
}
@@ -508,9 +517,10 @@ process_asm (FILE *in, FILE *out, FILE *cfile)
state = IN_VARS;
else if (sscanf (buf, " .section .gnu.offload_funcs%c", &dummy) > 0)
state = IN_FUNCS;
- else if (sscanf (buf, " .amd_kernel_code_%c", &dummy) > 0)
+ else if (sscanf (buf, " .amdgpu_metadata%c", &dummy) > 0)
{
- state = IN_AMD_KERNEL_CODE_T;
+ state = IN_METADATA;
+ regcount.kernel_name = NULL;
regcount.sgpr_count = regcount.vgpr_count = -1;
}
else if (sscanf (buf, " .section %c", &dummy) > 0
@@ -519,7 +529,7 @@ process_asm (FILE *in, FILE *out, FILE *cfile)
|| sscanf (buf, " .data%c", &dummy) > 0
|| sscanf (buf, " .ident %c", &dummy) > 0)
state = IN_CODE;
- else if (sscanf (buf, " .end_amd_kernel_code_%c", &dummy) > 0)
+ else if (sscanf (buf, " .end_amdgpu_metadata%c", &dummy) > 0)
{
state = IN_CODE;
gcc_assert (regcount.kernel_name != NULL
@@ -531,7 +541,7 @@ process_asm (FILE *in, FILE *out, FILE *cfile)
regcount.sgpr_count = regcount.vgpr_count = -1;
}
- if (state == IN_CODE || state == IN_AMD_KERNEL_CODE_T)
+ if (state == IN_CODE || state == IN_METADATA)
fputs (buf, out);
}