diff options
author | Tom de Vries <tdevries@suse.de> | 2022-01-13 13:13:44 +0100 |
---|---|---|
committer | Tom de Vries <tdevries@suse.de> | 2022-02-10 10:10:44 +0100 |
commit | 3e7d4e82dc9fecb051e9ac422c312b26206d5ecd (patch) | |
tree | c936837997675c954294872736c8ea5ba16b0daf | |
parent | 5b2d679bbbcc2b976c6e228ba63afdf67c33164e (diff) | |
download | gcc-3e7d4e82dc9fecb051e9ac422c312b26206d5ecd.zip gcc-3e7d4e82dc9fecb051e9ac422c312b26206d5ecd.tar.gz gcc-3e7d4e82dc9fecb051e9ac422c312b26206d5ecd.tar.bz2 |
[nvptx] Handle pre-sm_7x shared atomic store using atomic exchange
The ptx isa specifies (for pre-sm_7x) that atomic operations on shared memory
locations do not guarantee atomicity with respect to normal store instructions
to the same address.
This can be fixed by:
- inserting barriers between normal stores and atomic operations to a common
address
- using atom.exch to store to locations accessed by other atomic operations.
It's not clearly spelled out which barriers are needed, and a barrier seem more
expensive than atomic exchange.
Implement the pre-sm_7x shared atomic store using atomic exchange.
That includes stores using generic addressing, since those may also point to
shared memory.
Tested on x86-64 with nvptx accelerator.
gcc/ChangeLog:
2022-02-02 Tom de Vries <tdevries@suse.de>
* config/nvptx/nvptx-protos.h (nvptx_mem_maybe_shared_p): Declare.
* config/nvptx/nvptx.cc (nvptx_mem_data_area): New static function.
(nvptx_mem_maybe_shared_p): New function.
* config/nvptx/nvptx.md (define_expand "atomic_store<mode>"): New
define_expand.
gcc/testsuite/ChangeLog:
2022-02-02 Tom de Vries <tdevries@suse.de>
* gcc.target/nvptx/atomic-store-1.c: New test.
* gcc.target/nvptx/atomic-store-3.c: New test.
* gcc.target/nvptx/stack-atomics-run.c: Update.
-rw-r--r-- | gcc/config/nvptx/nvptx-protos.h | 1 | ||||
-rw-r--r-- | gcc/config/nvptx/nvptx.cc | 22 | ||||
-rw-r--r-- | gcc/config/nvptx/nvptx.md | 30 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/nvptx/atomic-store-1.c | 26 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/nvptx/atomic-store-3.c | 25 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/nvptx/stack-atomics-run.c | 6 |
6 files changed, 109 insertions, 1 deletions
diff --git a/gcc/config/nvptx/nvptx-protos.h b/gcc/config/nvptx/nvptx-protos.h index a846e34..0bf9af4 100644 --- a/gcc/config/nvptx/nvptx-protos.h +++ b/gcc/config/nvptx/nvptx-protos.h @@ -60,5 +60,6 @@ extern const char *nvptx_output_simt_exit (rtx); extern const char *nvptx_output_red_partition (rtx, rtx); extern const char *nvptx_output_atomic_insn (const char *, rtx *, int, int); extern bool nvptx_mem_local_p (rtx); +extern bool nvptx_mem_maybe_shared_p (const_rtx); #endif #endif diff --git a/gcc/config/nvptx/nvptx.cc b/gcc/config/nvptx/nvptx.cc index 1b0227a..5b26c0f 100644 --- a/gcc/config/nvptx/nvptx.cc +++ b/gcc/config/nvptx/nvptx.cc @@ -76,6 +76,7 @@ #include "intl.h" #include "opts.h" #include "tree-pretty-print.h" +#include "rtl-iter.h" /* This file should be included last. */ #include "target-def.h" @@ -2787,6 +2788,27 @@ nvptx_print_operand_address (FILE *file, machine_mode mode, rtx addr) nvptx_print_address_operand (file, addr, mode); } +static nvptx_data_area +nvptx_mem_data_area (const_rtx x) +{ + gcc_assert (GET_CODE (x) == MEM); + + const_rtx addr = XEXP (x, 0); + subrtx_iterator::array_type array; + FOR_EACH_SUBRTX (iter, array, addr, ALL) + if (SYMBOL_REF_P (*iter)) + return SYMBOL_DATA_AREA (*iter); + + return DATA_AREA_GENERIC; +} + +bool +nvptx_mem_maybe_shared_p (const_rtx x) +{ + nvptx_data_area area = nvptx_mem_data_area (x); + return area == DATA_AREA_SHARED || area == DATA_AREA_GENERIC; +} + /* Print an operand, X, to FILE, with an optional modifier in CODE. Meaning of CODE: diff --git a/gcc/config/nvptx/nvptx.md b/gcc/config/nvptx/nvptx.md index cced68e..1a283b4 100644 --- a/gcc/config/nvptx/nvptx.md +++ b/gcc/config/nvptx/nvptx.md @@ -2051,6 +2051,36 @@ } [(set_attr "atomic" "true")]) +(define_expand "atomic_store<mode>" + [(match_operand:SDIM 0 "memory_operand" "=m") ;; memory + (match_operand:SDIM 1 "nvptx_nonmemory_operand" "Ri") ;; input + (match_operand:SI 2 "const_int_operand")] ;; model + "" +{ + struct address_info info; + decompose_mem_address (&info, operands[0]); + if (info.base != NULL && REG_P (*info.base) + && REGNO_PTR_FRAME_P (REGNO (*info.base))) + { + emit_insn (gen_mov<mode> (operands[0], operands[1])); + DONE; + } + + if (TARGET_SM70) + /* Fall back to expand_atomic_store. */ + FAIL; + + bool maybe_shared_p = nvptx_mem_maybe_shared_p (operands[0]); + if (!maybe_shared_p) + /* Fall back to expand_atomic_store. */ + FAIL; + + rtx tmpreg = gen_reg_rtx (<MODE>mode); + emit_insn (gen_atomic_exchange<mode> (tmpreg, operands[0], operands[1], + operands[2])); + DONE; +}) + (define_insn "atomic_fetch_add<mode>" [(set (match_operand:SDIM 1 "memory_operand" "+m") (unspec_volatile:SDIM diff --git a/gcc/testsuite/gcc.target/nvptx/atomic-store-1.c b/gcc/testsuite/gcc.target/nvptx/atomic-store-1.c new file mode 100644 index 0000000..cee3815 --- /dev/null +++ b/gcc/testsuite/gcc.target/nvptx/atomic-store-1.c @@ -0,0 +1,26 @@ +/* Test the atomic store expansion for sm <= sm_6x targets, + shared state space. */ + +/* { dg-do compile } */ +/* { dg-options "-misa=sm_53" } */ + +enum memmodel +{ + MEMMODEL_SEQ_CST = 5 +}; + +unsigned int u32 __attribute__((shared)); +unsigned long long int u64 __attribute__((shared)); + +int +main() +{ + __atomic_store_n (&u32, 0, MEMMODEL_SEQ_CST); + __atomic_store_n (&u64, 0, MEMMODEL_SEQ_CST); + + return 0; +} + +/* { dg-final { scan-assembler-times "atom.shared.exch.b32" 1 } } */ +/* { dg-final { scan-assembler-times "atom.shared.exch.b64" 1 } } */ +/* { dg-final { scan-assembler-times "membar.cta" 4 } } */ diff --git a/gcc/testsuite/gcc.target/nvptx/atomic-store-3.c b/gcc/testsuite/gcc.target/nvptx/atomic-store-3.c new file mode 100644 index 0000000..cc0264f --- /dev/null +++ b/gcc/testsuite/gcc.target/nvptx/atomic-store-3.c @@ -0,0 +1,25 @@ +/* Test the atomic store expansion, global state space. */ + +/* { dg-do compile } */ +/* { dg-additional-options "-Wno-long-long" } */ + +enum memmodel +{ + MEMMODEL_SEQ_CST = 5 +}; + +unsigned int u32; +unsigned long long int u64; + +int +main() +{ + __atomic_store_n (&u32, 0, MEMMODEL_SEQ_CST); + __atomic_store_n (&u64, 0, MEMMODEL_SEQ_CST); + + return 0; +} + +/* { dg-final { scan-assembler-times "st.global.u32" 1 } } */ +/* { dg-final { scan-assembler-times "st.global.u64" 1 } } */ +/* { dg-final { scan-assembler-times "membar.sys" 4 } } */ diff --git a/gcc/testsuite/gcc.target/nvptx/stack-atomics-run.c b/gcc/testsuite/gcc.target/nvptx/stack-atomics-run.c index ad8e2f8..cd04596 100644 --- a/gcc/testsuite/gcc.target/nvptx/stack-atomics-run.c +++ b/gcc/testsuite/gcc.target/nvptx/stack-atomics-run.c @@ -39,6 +39,10 @@ main (void) if (b != 1) __builtin_abort (); - + a = 1; + __atomic_store_n (&a, 0, MEMMODEL_RELAXED); + if (a != 0) + __builtin_abort (); + return 0; } |