aboutsummaryrefslogtreecommitdiff
path: root/gcc/config/nvptx
diff options
context:
space:
mode:
authorThomas Schwinge <tschwinge@baylibre.com>2024-12-07 00:17:49 +0100
committerThomas Schwinge <tschwinge@baylibre.com>2025-01-09 14:17:11 +0100
commit3861d362ec7e3c50742fc43833fe9d8674f4070e (patch)
treea62824305de2b8c0281fb67b52c7ce5629a35ab5 /gcc/config/nvptx
parent1db025c67fc19612332d1668607ab800f0251520 (diff)
downloadgcc-3861d362ec7e3c50742fc43833fe9d8674f4070e.zip
gcc-3861d362ec7e3c50742fc43833fe9d8674f4070e.tar.gz
gcc-3861d362ec7e3c50742fc43833fe9d8674f4070e.tar.bz2
nvptx: PTX 'alloca' for '-mptx=7.3'+, '-march=sm_52'+ [PR65181]
..., and use it for '-mno-soft-stack': PTX "native" stacks. PR target/65181 gcc/ * config/nvptx/nvptx.cc (nvptx_get_drap_rtx): Handle '!TARGET_SOFT_STACK'. * config/nvptx/nvptx.md (define_c_enum "unspec"): Add 'UNSPEC_STACKSAVE', 'UNSPEC_STACKRESTORE'. (define_expand "allocate_stack", define_expand "save_stack_block") (define_expand "save_stack_block"): Handle '!TARGET_SOFT_STACK', PTX 'alloca'. (define_insn "@nvptx_alloca_<mode>") (define_insn "@nvptx_stacksave_<mode>") (define_insn "@nvptx_stackrestore_<mode>"): New. * doc/invoke.texi (Nvidia PTX Options): Update '-msoft-stack', '-mno-soft-stack'. * doc/sourcebuild.texi (nvptx-specific attributes): Document 'nvptx_runtime_alloca_ptx'. (Add Options): Document 'nvptx_alloca_ptx'. gcc/testsuite/ * gcc.target/nvptx/alloca-1.c: Evolve into... * gcc.target/nvptx/alloca-1-O0.c: ... this, ... * gcc.target/nvptx/alloca-1-O1.c: ... this, and... * gcc.target/nvptx/alloca-1-sm_30.c: ... this. * gcc.target/nvptx/vla-1.c: Evolve into... * gcc.target/nvptx/vla-1-O0.c: ... this, ... * gcc.target/nvptx/vla-1-O1.c: ... this, and... * gcc.target/nvptx/vla-1-sm_30.c: ... this. * gcc.c-torture/execute/pr36321.c: Adjust. * gcc.target/nvptx/__builtin_alloca_0-1-O0.c: Likewise. * gcc.target/nvptx/__builtin_alloca_0-1-O1.c: Likewise. * gcc.target/nvptx/__builtin_stack_save___builtin_stack_restore-1.c: Likewise. * gcc.target/nvptx/softstack.c: Likewise. * gcc.target/nvptx/__builtin_stack_save___builtin_stack_restore-1-sm_30.c: New. * gcc.target/nvptx/alloca-2-O0.c: Likewise. * gcc.target/nvptx/alloca-3-O1.c: Likewise. * gcc.target/nvptx/alloca-4-O3.c: Likewise. * gcc.target/nvptx/alloca-5.c: Likewise. * lib/target-supports.exp (check_effective_target_alloca): Adjust. (check_nvptx_default_ptx_isa_target_architecture_at_least) (check_nvptx_runtime_ptx_isa_target_architecture_at_least) (check_effective_target_nvptx_runtime_alloca_ptx) (add_options_for_nvptx_alloca_ptx): New. libgomp/ * fortran.c (omp_get_device_from_uid_): Adjust. * testsuite/libgomp.oacc-fortran/privatized-ref-2.f90: Likewise.
Diffstat (limited to 'gcc/config/nvptx')
-rw-r--r--gcc/config/nvptx/nvptx.cc4
-rw-r--r--gcc/config/nvptx/nvptx.md92
2 files changed, 78 insertions, 18 deletions
diff --git a/gcc/config/nvptx/nvptx.cc b/gcc/config/nvptx/nvptx.cc
index 5860b3d..060f453 100644
--- a/gcc/config/nvptx/nvptx.cc
+++ b/gcc/config/nvptx/nvptx.cc
@@ -245,7 +245,7 @@ default_ptx_version_option (void)
warp convergence. */
res = MAX (res, PTX_VERSION_6_0);
- /* For sm_52+, pick at least 7.3. */
+ /* For sm_52+, pick at least 7.3, to enable PTX 'alloca'. */
if (ptx_isa_option >= PTX_ISA_SM52)
res = MAX (res, PTX_VERSION_7_3);
@@ -1797,7 +1797,7 @@ nvptx_function_ok_for_sibcall (tree, tree)
static rtx
nvptx_get_drap_rtx (void)
{
- if (TARGET_SOFT_STACK && stack_realign_drap)
+ if (stack_realign_drap)
return arg_pointer_rtx;
return NULL_RTX;
}
diff --git a/gcc/config/nvptx/nvptx.md b/gcc/config/nvptx/nvptx.md
index b300f2e..a22a088 100644
--- a/gcc/config/nvptx/nvptx.md
+++ b/gcc/config/nvptx/nvptx.md
@@ -35,8 +35,9 @@
UNSPEC_FPINT_NEARBYINT
UNSPEC_ALLOCA
-
UNSPEC_SET_SOFTSTACK
+ UNSPEC_STACKSAVE
+ UNSPEC_STACKRESTORE
UNSPEC_DIM_SIZE
@@ -1663,22 +1664,47 @@
(match_operand 1 "nvptx_register_operand")]
""
{
- if (TARGET_SOFT_STACK)
+ if (!TARGET_SOFT_STACK
+ && TARGET_PTX_7_3
+ && TARGET_SM52)
+ emit_insn (gen_nvptx_alloca (Pmode, operands[0], operands[1]));
+ else if (!TARGET_SOFT_STACK)
+ {
+ sorry ("target cannot support alloca");
+ emit_insn (gen_nop ());
+ }
+ else if (TARGET_SOFT_STACK)
{
emit_move_insn (stack_pointer_rtx,
gen_rtx_MINUS (Pmode, stack_pointer_rtx, operands[1]));
emit_insn (gen_set_softstack (Pmode, stack_pointer_rtx));
emit_move_insn (operands[0], virtual_stack_dynamic_rtx);
- DONE;
}
- /* The ptx documentation specifies an alloca intrinsic (for 32 bit
- only) but notes it is not implemented. The assembler emits a
- confused error message. Issue a blunt one now instead. */
- sorry ("target cannot support alloca");
- emit_insn (gen_nop ());
+ else
+ gcc_unreachable ();
DONE;
})
+(define_insn "@nvptx_alloca_<mode>"
+ [(set (match_operand:P 0 "nvptx_register_operand" "=R")
+ (unspec:P [(match_operand:P 1 "nvptx_nonmemory_operand" "Ri")]
+ UNSPEC_ALLOCA))]
+ "TARGET_PTX_7_3
+ && TARGET_SM52"
+ {
+ /* Convert the address from '.local' state space to generic. That way,
+ we don't have to use 'st.local', 'ld.local', and can easily pass the
+ address to other "generic functions".
+ TODO 'gcc.target/nvptx/alloca-5.c' */
+ output_asm_insn ("{", NULL);
+ output_asm_insn ("\\t.reg%t0\\t%0_local;", operands);
+ output_asm_insn ("\\talloca%u0\\t%0_local, %1;", operands);
+ output_asm_insn ("\\tcvta.local%u0\\t%0, %0_local;", operands);
+ output_asm_insn ("}", NULL);
+ return "";
+ }
+ [(set_attr "predicable" "no")])
+
(define_insn "@set_softstack_<mode>"
[(unspec [(match_operand:P 0 "nvptx_register_operand" "R")]
UNSPEC_SET_SOFTSTACK)]
@@ -1692,30 +1718,64 @@
(match_operand 1 "register_operand" "")]
"!TARGET_SOFT_STACK"
{
- /* The concept of a '%stack' pointer doesn't apply like this for
- PTX "native" stacks. GCC however occasionally synthesizes
- '__builtin_stack_save ()', '__builtin_stack_restore ()', and isn't able to
- optimize them all away. Just submit a dummy -- user code shouldn't be
- able to observe this. */
- emit_move_insn (operands[0], GEN_INT (0xdeadbeef));
+ if (TARGET_PTX_7_3
+ && TARGET_SM52)
+ {
+ gcc_checking_assert (REG_P (operands[0]));
+ emit_insn (gen_nvptx_stacksave (Pmode, operands[0], operands[1]));
+ }
+ else
+ {
+ /* The concept of a '%stack' pointer doesn't apply like this.
+ GCC however occasionally synthesizes '__builtin_stack_save ()',
+ '__builtin_stack_restore ()', and isn't able to optimize them all
+ away. Just submit a dummy -- user code shouldn't be able to observe
+ this. */
+ emit_move_insn (operands[0], GEN_INT (0xdeadbeef));
+ }
DONE;
})
+(define_insn "@nvptx_stacksave_<mode>"
+ [(set (match_operand:P 0 "nvptx_register_operand" "=R")
+ (unspec:P [(match_operand:P 1 "register_operand" "R")]
+ UNSPEC_STACKSAVE))]
+ "TARGET_PTX_7_3
+ && TARGET_SM52"
+ "%.\\tstacksave%u0\\t%0;")
+
(define_expand "restore_stack_block"
[(match_operand 0 "register_operand" "")
(match_operand 1 "register_operand" "")]
""
{
- if (!TARGET_SOFT_STACK)
+ if (!TARGET_SOFT_STACK
+ && TARGET_PTX_7_3
+ && TARGET_SM52)
+ {
+ operands[1] = force_reg (Pmode, operands[1]);
+ emit_insn (gen_nvptx_stackrestore (Pmode, operands[0], operands[1]));
+ }
+ else if (!TARGET_SOFT_STACK)
; /* See 'save_stack_block'. */
- else
+ else if (TARGET_SOFT_STACK)
{
emit_move_insn (operands[0], operands[1]);
emit_insn (gen_set_softstack (Pmode, operands[0]));
}
+ else
+ gcc_unreachable ();
DONE;
})
+(define_insn "@nvptx_stackrestore_<mode>"
+ [(set (match_operand:P 0 "nvptx_register_operand" "=R")
+ (unspec:P [(match_operand:P 1 "nvptx_register_operand" "R")]
+ UNSPEC_STACKRESTORE))]
+ "TARGET_PTX_7_3
+ && TARGET_SM52"
+ "%.\\tstackrestore%u1\\t%1;")
+
(define_expand "save_stack_function"
[(match_operand 0 "register_operand" "")
(match_operand 1 "register_operand" "")]