diff options
author | Thomas Schwinge <tschwinge@baylibre.com> | 2024-12-07 00:17:49 +0100 |
---|---|---|
committer | Thomas Schwinge <tschwinge@baylibre.com> | 2025-01-09 14:17:11 +0100 |
commit | 3861d362ec7e3c50742fc43833fe9d8674f4070e (patch) | |
tree | a62824305de2b8c0281fb67b52c7ce5629a35ab5 /gcc/config/nvptx | |
parent | 1db025c67fc19612332d1668607ab800f0251520 (diff) | |
download | gcc-3861d362ec7e3c50742fc43833fe9d8674f4070e.zip gcc-3861d362ec7e3c50742fc43833fe9d8674f4070e.tar.gz gcc-3861d362ec7e3c50742fc43833fe9d8674f4070e.tar.bz2 |
nvptx: PTX 'alloca' for '-mptx=7.3'+, '-march=sm_52'+ [PR65181]
..., and use it for '-mno-soft-stack': PTX "native" stacks.
PR target/65181
gcc/
* config/nvptx/nvptx.cc (nvptx_get_drap_rtx): Handle
'!TARGET_SOFT_STACK'.
* config/nvptx/nvptx.md (define_c_enum "unspec"): Add
'UNSPEC_STACKSAVE', 'UNSPEC_STACKRESTORE'.
(define_expand "allocate_stack", define_expand "save_stack_block")
(define_expand "save_stack_block"): Handle '!TARGET_SOFT_STACK',
PTX 'alloca'.
(define_insn "@nvptx_alloca_<mode>")
(define_insn "@nvptx_stacksave_<mode>")
(define_insn "@nvptx_stackrestore_<mode>"): New.
* doc/invoke.texi (Nvidia PTX Options): Update '-msoft-stack',
'-mno-soft-stack'.
* doc/sourcebuild.texi (nvptx-specific attributes): Document
'nvptx_runtime_alloca_ptx'.
(Add Options): Document 'nvptx_alloca_ptx'.
gcc/testsuite/
* gcc.target/nvptx/alloca-1.c: Evolve into...
* gcc.target/nvptx/alloca-1-O0.c: ... this, ...
* gcc.target/nvptx/alloca-1-O1.c: ... this, and...
* gcc.target/nvptx/alloca-1-sm_30.c: ... this.
* gcc.target/nvptx/vla-1.c: Evolve into...
* gcc.target/nvptx/vla-1-O0.c: ... this, ...
* gcc.target/nvptx/vla-1-O1.c: ... this, and...
* gcc.target/nvptx/vla-1-sm_30.c: ... this.
* gcc.c-torture/execute/pr36321.c: Adjust.
* gcc.target/nvptx/__builtin_alloca_0-1-O0.c: Likewise.
* gcc.target/nvptx/__builtin_alloca_0-1-O1.c: Likewise.
* gcc.target/nvptx/__builtin_stack_save___builtin_stack_restore-1.c:
Likewise.
* gcc.target/nvptx/softstack.c: Likewise.
* gcc.target/nvptx/__builtin_stack_save___builtin_stack_restore-1-sm_30.c:
New.
* gcc.target/nvptx/alloca-2-O0.c: Likewise.
* gcc.target/nvptx/alloca-3-O1.c: Likewise.
* gcc.target/nvptx/alloca-4-O3.c: Likewise.
* gcc.target/nvptx/alloca-5.c: Likewise.
* lib/target-supports.exp (check_effective_target_alloca): Adjust.
(check_nvptx_default_ptx_isa_target_architecture_at_least)
(check_nvptx_runtime_ptx_isa_target_architecture_at_least)
(check_effective_target_nvptx_runtime_alloca_ptx)
(add_options_for_nvptx_alloca_ptx): New.
libgomp/
* fortran.c (omp_get_device_from_uid_): Adjust.
* testsuite/libgomp.oacc-fortran/privatized-ref-2.f90: Likewise.
Diffstat (limited to 'gcc/config/nvptx')
-rw-r--r-- | gcc/config/nvptx/nvptx.cc | 4 | ||||
-rw-r--r-- | gcc/config/nvptx/nvptx.md | 92 |
2 files changed, 78 insertions, 18 deletions
diff --git a/gcc/config/nvptx/nvptx.cc b/gcc/config/nvptx/nvptx.cc index 5860b3d..060f453 100644 --- a/gcc/config/nvptx/nvptx.cc +++ b/gcc/config/nvptx/nvptx.cc @@ -245,7 +245,7 @@ default_ptx_version_option (void) warp convergence. */ res = MAX (res, PTX_VERSION_6_0); - /* For sm_52+, pick at least 7.3. */ + /* For sm_52+, pick at least 7.3, to enable PTX 'alloca'. */ if (ptx_isa_option >= PTX_ISA_SM52) res = MAX (res, PTX_VERSION_7_3); @@ -1797,7 +1797,7 @@ nvptx_function_ok_for_sibcall (tree, tree) static rtx nvptx_get_drap_rtx (void) { - if (TARGET_SOFT_STACK && stack_realign_drap) + if (stack_realign_drap) return arg_pointer_rtx; return NULL_RTX; } diff --git a/gcc/config/nvptx/nvptx.md b/gcc/config/nvptx/nvptx.md index b300f2e..a22a088 100644 --- a/gcc/config/nvptx/nvptx.md +++ b/gcc/config/nvptx/nvptx.md @@ -35,8 +35,9 @@ UNSPEC_FPINT_NEARBYINT UNSPEC_ALLOCA - UNSPEC_SET_SOFTSTACK + UNSPEC_STACKSAVE + UNSPEC_STACKRESTORE UNSPEC_DIM_SIZE @@ -1663,22 +1664,47 @@ (match_operand 1 "nvptx_register_operand")] "" { - if (TARGET_SOFT_STACK) + if (!TARGET_SOFT_STACK + && TARGET_PTX_7_3 + && TARGET_SM52) + emit_insn (gen_nvptx_alloca (Pmode, operands[0], operands[1])); + else if (!TARGET_SOFT_STACK) + { + sorry ("target cannot support alloca"); + emit_insn (gen_nop ()); + } + else if (TARGET_SOFT_STACK) { emit_move_insn (stack_pointer_rtx, gen_rtx_MINUS (Pmode, stack_pointer_rtx, operands[1])); emit_insn (gen_set_softstack (Pmode, stack_pointer_rtx)); emit_move_insn (operands[0], virtual_stack_dynamic_rtx); - DONE; } - /* The ptx documentation specifies an alloca intrinsic (for 32 bit - only) but notes it is not implemented. The assembler emits a - confused error message. Issue a blunt one now instead. */ - sorry ("target cannot support alloca"); - emit_insn (gen_nop ()); + else + gcc_unreachable (); DONE; }) +(define_insn "@nvptx_alloca_<mode>" + [(set (match_operand:P 0 "nvptx_register_operand" "=R") + (unspec:P [(match_operand:P 1 "nvptx_nonmemory_operand" "Ri")] + UNSPEC_ALLOCA))] + "TARGET_PTX_7_3 + && TARGET_SM52" + { + /* Convert the address from '.local' state space to generic. That way, + we don't have to use 'st.local', 'ld.local', and can easily pass the + address to other "generic functions". + TODO 'gcc.target/nvptx/alloca-5.c' */ + output_asm_insn ("{", NULL); + output_asm_insn ("\\t.reg%t0\\t%0_local;", operands); + output_asm_insn ("\\talloca%u0\\t%0_local, %1;", operands); + output_asm_insn ("\\tcvta.local%u0\\t%0, %0_local;", operands); + output_asm_insn ("}", NULL); + return ""; + } + [(set_attr "predicable" "no")]) + (define_insn "@set_softstack_<mode>" [(unspec [(match_operand:P 0 "nvptx_register_operand" "R")] UNSPEC_SET_SOFTSTACK)] @@ -1692,30 +1718,64 @@ (match_operand 1 "register_operand" "")] "!TARGET_SOFT_STACK" { - /* The concept of a '%stack' pointer doesn't apply like this for - PTX "native" stacks. GCC however occasionally synthesizes - '__builtin_stack_save ()', '__builtin_stack_restore ()', and isn't able to - optimize them all away. Just submit a dummy -- user code shouldn't be - able to observe this. */ - emit_move_insn (operands[0], GEN_INT (0xdeadbeef)); + if (TARGET_PTX_7_3 + && TARGET_SM52) + { + gcc_checking_assert (REG_P (operands[0])); + emit_insn (gen_nvptx_stacksave (Pmode, operands[0], operands[1])); + } + else + { + /* The concept of a '%stack' pointer doesn't apply like this. + GCC however occasionally synthesizes '__builtin_stack_save ()', + '__builtin_stack_restore ()', and isn't able to optimize them all + away. Just submit a dummy -- user code shouldn't be able to observe + this. */ + emit_move_insn (operands[0], GEN_INT (0xdeadbeef)); + } DONE; }) +(define_insn "@nvptx_stacksave_<mode>" + [(set (match_operand:P 0 "nvptx_register_operand" "=R") + (unspec:P [(match_operand:P 1 "register_operand" "R")] + UNSPEC_STACKSAVE))] + "TARGET_PTX_7_3 + && TARGET_SM52" + "%.\\tstacksave%u0\\t%0;") + (define_expand "restore_stack_block" [(match_operand 0 "register_operand" "") (match_operand 1 "register_operand" "")] "" { - if (!TARGET_SOFT_STACK) + if (!TARGET_SOFT_STACK + && TARGET_PTX_7_3 + && TARGET_SM52) + { + operands[1] = force_reg (Pmode, operands[1]); + emit_insn (gen_nvptx_stackrestore (Pmode, operands[0], operands[1])); + } + else if (!TARGET_SOFT_STACK) ; /* See 'save_stack_block'. */ - else + else if (TARGET_SOFT_STACK) { emit_move_insn (operands[0], operands[1]); emit_insn (gen_set_softstack (Pmode, operands[0])); } + else + gcc_unreachable (); DONE; }) +(define_insn "@nvptx_stackrestore_<mode>" + [(set (match_operand:P 0 "nvptx_register_operand" "=R") + (unspec:P [(match_operand:P 1 "nvptx_register_operand" "R")] + UNSPEC_STACKRESTORE))] + "TARGET_PTX_7_3 + && TARGET_SM52" + "%.\\tstackrestore%u1\\t%1;") + (define_expand "save_stack_function" [(match_operand 0 "register_operand" "") (match_operand 1 "register_operand" "")] |