diff options
author | Andrew Stubbs <ams@codesourcery.com> | 2023-01-30 14:43:00 +0000 |
---|---|---|
committer | Andrew Stubbs <ams@codesourcery.com> | 2023-12-06 16:48:57 +0000 |
commit | e7d6c277fa28c0b9b621d23c471e0388d2912644 (patch) | |
tree | 3ef9390ef49f8deefa281fd7ad2a145ad85254a6 /gcc | |
parent | e9a19ead498fcc89186b724c6e76854f7751a89b (diff) | |
download | gcc-e7d6c277fa28c0b9b621d23c471e0388d2912644.zip gcc-e7d6c277fa28c0b9b621d23c471e0388d2912644.tar.gz gcc-e7d6c277fa28c0b9b621d23c471e0388d2912644.tar.bz2 |
amdgcn, libgomp: low-latency allocator
This implements the OpenMP low-latency memory allocator for AMD GCN using the
small per-team LDS memory (Local Data Store).
Since addresses can now refer to LDS space, the "Global" address space is
no-longer compatible. This patch therefore switches the backend to use
entirely "Flat" addressing (which supports both memories). A future patch
will re-enable "global" instructions for cases where it is known to be safe
to do so.
gcc/ChangeLog:
* config/gcn/gcn-builtins.def (DISPATCH_PTR): New built-in.
* config/gcn/gcn.cc (gcn_init_machine_status): Disable global
addressing.
(gcn_expand_builtin_1): Implement GCN_BUILTIN_DISPATCH_PTR.
libgomp/ChangeLog:
* config/gcn/libgomp-gcn.h (TEAM_ARENA_START): Move to here.
(TEAM_ARENA_FREE): Likewise.
(TEAM_ARENA_END): Likewise.
(GCN_LOWLAT_HEAP): New.
* config/gcn/team.c (LITTLEENDIAN_CPU): New, and import hsa.h.
(__gcn_lowlat_init): New prototype.
(gomp_gcn_enter_kernel): Initialize the low-latency heap.
* libgomp.h (TEAM_ARENA_START): Move to libgomp.h.
(TEAM_ARENA_FREE): Likewise.
(TEAM_ARENA_END): Likewise.
* plugin/plugin-gcn.c (lowlat_size): New variable.
(print_kernel_dispatch): Label the group_segment_size purpose.
(init_environment_variables): Read GOMP_GCN_LOWLAT_POOL.
(create_kernel_dispatch): Pass low-latency head allocation to kernel.
(run_kernel): Use shadow; don't assume values.
* testsuite/libgomp.c/omp_alloc-traits.c: Enable for amdgcn.
* config/gcn/allocator.c: New file.
* libgomp.texi: Document low-latency implementation details.
Diffstat (limited to 'gcc')
-rw-r--r-- | gcc/config/gcn/gcn-builtins.def | 2 | ||||
-rw-r--r-- | gcc/config/gcn/gcn.cc | 16 |
2 files changed, 17 insertions, 1 deletions
diff --git a/gcc/config/gcn/gcn-builtins.def b/gcc/config/gcn/gcn-builtins.def index 636a8e7..471457d 100644 --- a/gcc/config/gcn/gcn-builtins.def +++ b/gcc/config/gcn/gcn-builtins.def @@ -164,6 +164,8 @@ DEF_BUILTIN (FIRST_CALL_THIS_THREAD_P, -1, "first_call_this_thread_p", B_INSN, _A1 (GCN_BTI_BOOL), gcn_expand_builtin_1) DEF_BUILTIN (KERNARG_PTR, -1, "kernarg_ptr", B_INSN, _A1 (GCN_BTI_VOIDPTR), gcn_expand_builtin_1) +DEF_BUILTIN (DISPATCH_PTR, -1, "dispatch_ptr", B_INSN, _A1 (GCN_BTI_VOIDPTR), + gcn_expand_builtin_1) DEF_BUILTIN (GET_STACK_LIMIT, -1, "get_stack_limit", B_INSN, _A1 (GCN_BTI_VOIDPTR), gcn_expand_builtin_1) diff --git a/gcc/config/gcn/gcn.cc b/gcc/config/gcn/gcn.cc index 0781c2a..031b405 100644 --- a/gcc/config/gcn/gcn.cc +++ b/gcc/config/gcn/gcn.cc @@ -110,7 +110,8 @@ gcn_init_machine_status (void) f = ggc_cleared_alloc<machine_function> (); - if (TARGET_GCN3) + // FIXME: re-enable global addressing with safety for LDS-flat addresses + //if (TARGET_GCN3) f->use_flat_addressing = true; return f; @@ -4879,6 +4880,19 @@ gcn_expand_builtin_1 (tree exp, rtx target, rtx /*subtarget */ , } return ptr; } + case GCN_BUILTIN_DISPATCH_PTR: + { + rtx ptr; + if (cfun->machine->args.reg[DISPATCH_PTR_ARG] >= 0) + ptr = gen_rtx_REG (DImode, + cfun->machine->args.reg[DISPATCH_PTR_ARG]); + else + { + ptr = gen_reg_rtx (DImode); + emit_move_insn (ptr, const0_rtx); + } + return ptr; + } case GCN_BUILTIN_FIRST_CALL_THIS_THREAD_P: { /* Stash a marker in the unused upper 16 bits of s[0:1] to indicate |