aboutsummaryrefslogtreecommitdiff
path: root/gcc
diff options
context:
space:
mode:
authorAndrew Stubbs <ams@codesourcery.com>2023-01-30 14:43:00 +0000
committerAndrew Stubbs <ams@codesourcery.com>2023-12-06 16:48:57 +0000
commite7d6c277fa28c0b9b621d23c471e0388d2912644 (patch)
tree3ef9390ef49f8deefa281fd7ad2a145ad85254a6 /gcc
parente9a19ead498fcc89186b724c6e76854f7751a89b (diff)
downloadgcc-e7d6c277fa28c0b9b621d23c471e0388d2912644.zip
gcc-e7d6c277fa28c0b9b621d23c471e0388d2912644.tar.gz
gcc-e7d6c277fa28c0b9b621d23c471e0388d2912644.tar.bz2
amdgcn, libgomp: low-latency allocator
This implements the OpenMP low-latency memory allocator for AMD GCN using the small per-team LDS memory (Local Data Store). Since addresses can now refer to LDS space, the "Global" address space is no-longer compatible. This patch therefore switches the backend to use entirely "Flat" addressing (which supports both memories). A future patch will re-enable "global" instructions for cases where it is known to be safe to do so. gcc/ChangeLog: * config/gcn/gcn-builtins.def (DISPATCH_PTR): New built-in. * config/gcn/gcn.cc (gcn_init_machine_status): Disable global addressing. (gcn_expand_builtin_1): Implement GCN_BUILTIN_DISPATCH_PTR. libgomp/ChangeLog: * config/gcn/libgomp-gcn.h (TEAM_ARENA_START): Move to here. (TEAM_ARENA_FREE): Likewise. (TEAM_ARENA_END): Likewise. (GCN_LOWLAT_HEAP): New. * config/gcn/team.c (LITTLEENDIAN_CPU): New, and import hsa.h. (__gcn_lowlat_init): New prototype. (gomp_gcn_enter_kernel): Initialize the low-latency heap. * libgomp.h (TEAM_ARENA_START): Move to libgomp.h. (TEAM_ARENA_FREE): Likewise. (TEAM_ARENA_END): Likewise. * plugin/plugin-gcn.c (lowlat_size): New variable. (print_kernel_dispatch): Label the group_segment_size purpose. (init_environment_variables): Read GOMP_GCN_LOWLAT_POOL. (create_kernel_dispatch): Pass low-latency head allocation to kernel. (run_kernel): Use shadow; don't assume values. * testsuite/libgomp.c/omp_alloc-traits.c: Enable for amdgcn. * config/gcn/allocator.c: New file. * libgomp.texi: Document low-latency implementation details.
Diffstat (limited to 'gcc')
-rw-r--r--gcc/config/gcn/gcn-builtins.def2
-rw-r--r--gcc/config/gcn/gcn.cc16
2 files changed, 17 insertions, 1 deletions
diff --git a/gcc/config/gcn/gcn-builtins.def b/gcc/config/gcn/gcn-builtins.def
index 636a8e7..471457d 100644
--- a/gcc/config/gcn/gcn-builtins.def
+++ b/gcc/config/gcn/gcn-builtins.def
@@ -164,6 +164,8 @@ DEF_BUILTIN (FIRST_CALL_THIS_THREAD_P, -1, "first_call_this_thread_p", B_INSN,
_A1 (GCN_BTI_BOOL), gcn_expand_builtin_1)
DEF_BUILTIN (KERNARG_PTR, -1, "kernarg_ptr", B_INSN, _A1 (GCN_BTI_VOIDPTR),
gcn_expand_builtin_1)
+DEF_BUILTIN (DISPATCH_PTR, -1, "dispatch_ptr", B_INSN, _A1 (GCN_BTI_VOIDPTR),
+ gcn_expand_builtin_1)
DEF_BUILTIN (GET_STACK_LIMIT, -1, "get_stack_limit", B_INSN,
_A1 (GCN_BTI_VOIDPTR), gcn_expand_builtin_1)
diff --git a/gcc/config/gcn/gcn.cc b/gcc/config/gcn/gcn.cc
index 0781c2a..031b405 100644
--- a/gcc/config/gcn/gcn.cc
+++ b/gcc/config/gcn/gcn.cc
@@ -110,7 +110,8 @@ gcn_init_machine_status (void)
f = ggc_cleared_alloc<machine_function> ();
- if (TARGET_GCN3)
+ // FIXME: re-enable global addressing with safety for LDS-flat addresses
+ //if (TARGET_GCN3)
f->use_flat_addressing = true;
return f;
@@ -4879,6 +4880,19 @@ gcn_expand_builtin_1 (tree exp, rtx target, rtx /*subtarget */ ,
}
return ptr;
}
+ case GCN_BUILTIN_DISPATCH_PTR:
+ {
+ rtx ptr;
+ if (cfun->machine->args.reg[DISPATCH_PTR_ARG] >= 0)
+ ptr = gen_rtx_REG (DImode,
+ cfun->machine->args.reg[DISPATCH_PTR_ARG]);
+ else
+ {
+ ptr = gen_reg_rtx (DImode);
+ emit_move_insn (ptr, const0_rtx);
+ }
+ return ptr;
+ }
case GCN_BUILTIN_FIRST_CALL_THIS_THREAD_P:
{
/* Stash a marker in the unused upper 16 bits of s[0:1] to indicate