diff options
author | Tom de Vries <tdevries@suse.de> | 2022-02-15 14:36:26 +0100 |
---|---|---|
committer | Tom de Vries <tdevries@suse.de> | 2022-02-19 20:05:56 +0100 |
commit | 69cb3f2abb911acebfc7ffede2ee7151a3e14a59 (patch) | |
tree | 64d86cc4c708f0aed54b4b2187aca75bc3ec572a /gcc | |
parent | 9ed52438b8ca99a0dffe74da96c2281cbc9cbb4b (diff) | |
download | gcc-69cb3f2abb911acebfc7ffede2ee7151a3e14a59.zip gcc-69cb3f2abb911acebfc7ffede2ee7151a3e14a59.tar.gz gcc-69cb3f2abb911acebfc7ffede2ee7151a3e14a59.tar.bz2 |
[nvptx] Use _ as destination operand of atom.exch
We currently generate this code for an atomic store:
...
.reg.u32 %r21;
atom.exch.b32 %r21,[%r22],%r23;
...
where %r21 is set but unused.
Use the ptx bit bucket operand '_' instead, such that we have:
...
atom.exch.b32 _,[%r22],%r23;
...
[ Note that the same problem still occurs for this code:
...
void atomic_store (int *ptr, int val) {
__atomic_exchange_n (ptr, val, MEMMODEL_RELAXED);
}
... ]
Tested on nvptx.
gcc/ChangeLog:
2022-02-19 Tom de Vries <tdevries@suse.de>
* config/nvptx/nvptx.cc (nvptx_reorg_uniform_simt): Handle SET insn.
* config/nvptx/nvptx.md
(define_insn "nvptx_atomic_store<mode>"): Rename to ...
(define_insn "nvptx_atomic_store_sm70<mode>"): This.
(define_insn "nvptx_atomic_store<mode>"): New define_insn.
(define_expand "atomic_store<mode>"): Handle rename. Use
nvptx_atomic_store instead of atomic_exchange.
gcc/testsuite/ChangeLog:
2022-02-19 Tom de Vries <tdevries@suse.de>
* gcc.target/nvptx/atomic-store-1.c: Update.
Diffstat (limited to 'gcc')
-rw-r--r-- | gcc/config/nvptx/nvptx.cc | 18 | ||||
-rw-r--r-- | gcc/config/nvptx/nvptx.md | 25 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/nvptx/atomic-store-1.c | 4 |
3 files changed, 35 insertions, 12 deletions
diff --git a/gcc/config/nvptx/nvptx.cc b/gcc/config/nvptx/nvptx.cc index 55fab3e..ed347ca 100644 --- a/gcc/config/nvptx/nvptx.cc +++ b/gcc/config/nvptx/nvptx.cc @@ -3291,12 +3291,22 @@ nvptx_reorg_uniform_simt () continue; rtx pat = PATTERN (insn); - gcc_assert (GET_CODE (pat) == PARALLEL); rtx master = nvptx_get_unisimt_master (); bool shuffle_p = false; - for (int i = 0; i < XVECLEN (pat, 0); i++) - shuffle_p - |= nvptx_unisimt_handle_set (XVECEXP (pat, 0, i), insn, master); + switch (GET_CODE (pat)) + { + case PARALLEL: + for (int i = 0; i < XVECLEN (pat, 0); i++) + shuffle_p + |= nvptx_unisimt_handle_set (XVECEXP (pat, 0, i), insn, master); + break; + case SET: + shuffle_p |= nvptx_unisimt_handle_set (pat, insn, master); + break; + default: + gcc_unreachable (); + } + if (shuffle_p && TARGET_PTX_6_0) { /* The shuffle is a sync, so uniformity is guaranteed. */ diff --git a/gcc/config/nvptx/nvptx.md b/gcc/config/nvptx/nvptx.md index 132ef2f..f6dc817 100644 --- a/gcc/config/nvptx/nvptx.md +++ b/gcc/config/nvptx/nvptx.md @@ -2069,8 +2069,8 @@ if (TARGET_SM70) { - emit_insn (gen_nvptx_atomic_store<mode> (operands[0], operands[1], - operands[2])); + emit_insn (gen_nvptx_atomic_store_sm70<mode> (operands[0], operands[1], + operands[2])); DONE; } @@ -2079,13 +2079,12 @@ /* Fall back to expand_atomic_store. */ FAIL; - rtx tmpreg = gen_reg_rtx (<MODE>mode); - emit_insn (gen_atomic_exchange<mode> (tmpreg, operands[0], operands[1], - operands[2])); + emit_insn (gen_nvptx_atomic_store<mode> (operands[0], operands[1], + operands[2])); DONE; }) -(define_insn "nvptx_atomic_store<mode>" +(define_insn "nvptx_atomic_store_sm70<mode>" [(set (match_operand:SDIM 0 "memory_operand" "+m") ;; memory (unspec_volatile:SDIM [(match_operand:SDIM 1 "nvptx_nonmemory_operand" "Ri") ;; input @@ -2099,6 +2098,20 @@ } [(set_attr "atomic" "false")]) ;; Note: st is not an atomic insn. +(define_insn "nvptx_atomic_store<mode>" + [(set (match_operand:SDIM 0 "memory_operand" "+m") ;; memory + (unspec_volatile:SDIM + [(match_operand:SDIM 1 "nvptx_nonmemory_operand" "Ri") ;; input + (match_operand:SI 2 "const_int_operand")] ;; model + UNSPECV_ST))] + "!TARGET_SM70" + { + const char *t + = "%.\tatom%A0.exch.b%T0\t_, %0, %1;"; + return nvptx_output_atomic_insn (t, operands, 0, 2); + } + [(set_attr "atomic" "true")]) + (define_insn "atomic_fetch_add<mode>" [(set (match_operand:SDIM 1 "memory_operand" "+m") (unspec_volatile:SDIM diff --git a/gcc/testsuite/gcc.target/nvptx/atomic-store-1.c b/gcc/testsuite/gcc.target/nvptx/atomic-store-1.c index cee3815..d611f2d 100644 --- a/gcc/testsuite/gcc.target/nvptx/atomic-store-1.c +++ b/gcc/testsuite/gcc.target/nvptx/atomic-store-1.c @@ -21,6 +21,6 @@ main() return 0; } -/* { dg-final { scan-assembler-times "atom.shared.exch.b32" 1 } } */ -/* { dg-final { scan-assembler-times "atom.shared.exch.b64" 1 } } */ +/* { dg-final { scan-assembler-times "atom.shared.exch.b32\[\t \]+_," 1 } } */ +/* { dg-final { scan-assembler-times "atom.shared.exch.b64\[\t \]+_," 1 } } */ /* { dg-final { scan-assembler-times "membar.cta" 4 } } */ |