aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTom de Vries <tdevries@suse.de>2022-01-13 13:13:44 +0100
committerTom de Vries <tdevries@suse.de>2022-02-10 10:10:44 +0100
commit3e7d4e82dc9fecb051e9ac422c312b26206d5ecd (patch)
treec936837997675c954294872736c8ea5ba16b0daf
parent5b2d679bbbcc2b976c6e228ba63afdf67c33164e (diff)
downloadgcc-3e7d4e82dc9fecb051e9ac422c312b26206d5ecd.zip
gcc-3e7d4e82dc9fecb051e9ac422c312b26206d5ecd.tar.gz
gcc-3e7d4e82dc9fecb051e9ac422c312b26206d5ecd.tar.bz2
[nvptx] Handle pre-sm_7x shared atomic store using atomic exchange
The ptx isa specifies (for pre-sm_7x) that atomic operations on shared memory locations do not guarantee atomicity with respect to normal store instructions to the same address. This can be fixed by: - inserting barriers between normal stores and atomic operations to a common address - using atom.exch to store to locations accessed by other atomic operations. It's not clearly spelled out which barriers are needed, and a barrier seem more expensive than atomic exchange. Implement the pre-sm_7x shared atomic store using atomic exchange. That includes stores using generic addressing, since those may also point to shared memory. Tested on x86-64 with nvptx accelerator. gcc/ChangeLog: 2022-02-02 Tom de Vries <tdevries@suse.de> * config/nvptx/nvptx-protos.h (nvptx_mem_maybe_shared_p): Declare. * config/nvptx/nvptx.cc (nvptx_mem_data_area): New static function. (nvptx_mem_maybe_shared_p): New function. * config/nvptx/nvptx.md (define_expand "atomic_store<mode>"): New define_expand. gcc/testsuite/ChangeLog: 2022-02-02 Tom de Vries <tdevries@suse.de> * gcc.target/nvptx/atomic-store-1.c: New test. * gcc.target/nvptx/atomic-store-3.c: New test. * gcc.target/nvptx/stack-atomics-run.c: Update.
-rw-r--r--gcc/config/nvptx/nvptx-protos.h1
-rw-r--r--gcc/config/nvptx/nvptx.cc22
-rw-r--r--gcc/config/nvptx/nvptx.md30
-rw-r--r--gcc/testsuite/gcc.target/nvptx/atomic-store-1.c26
-rw-r--r--gcc/testsuite/gcc.target/nvptx/atomic-store-3.c25
-rw-r--r--gcc/testsuite/gcc.target/nvptx/stack-atomics-run.c6
6 files changed, 109 insertions, 1 deletions
diff --git a/gcc/config/nvptx/nvptx-protos.h b/gcc/config/nvptx/nvptx-protos.h
index a846e34..0bf9af4 100644
--- a/gcc/config/nvptx/nvptx-protos.h
+++ b/gcc/config/nvptx/nvptx-protos.h
@@ -60,5 +60,6 @@ extern const char *nvptx_output_simt_exit (rtx);
extern const char *nvptx_output_red_partition (rtx, rtx);
extern const char *nvptx_output_atomic_insn (const char *, rtx *, int, int);
extern bool nvptx_mem_local_p (rtx);
+extern bool nvptx_mem_maybe_shared_p (const_rtx);
#endif
#endif
diff --git a/gcc/config/nvptx/nvptx.cc b/gcc/config/nvptx/nvptx.cc
index 1b0227a..5b26c0f 100644
--- a/gcc/config/nvptx/nvptx.cc
+++ b/gcc/config/nvptx/nvptx.cc
@@ -76,6 +76,7 @@
#include "intl.h"
#include "opts.h"
#include "tree-pretty-print.h"
+#include "rtl-iter.h"
/* This file should be included last. */
#include "target-def.h"
@@ -2787,6 +2788,27 @@ nvptx_print_operand_address (FILE *file, machine_mode mode, rtx addr)
nvptx_print_address_operand (file, addr, mode);
}
+static nvptx_data_area
+nvptx_mem_data_area (const_rtx x)
+{
+ gcc_assert (GET_CODE (x) == MEM);
+
+ const_rtx addr = XEXP (x, 0);
+ subrtx_iterator::array_type array;
+ FOR_EACH_SUBRTX (iter, array, addr, ALL)
+ if (SYMBOL_REF_P (*iter))
+ return SYMBOL_DATA_AREA (*iter);
+
+ return DATA_AREA_GENERIC;
+}
+
+bool
+nvptx_mem_maybe_shared_p (const_rtx x)
+{
+ nvptx_data_area area = nvptx_mem_data_area (x);
+ return area == DATA_AREA_SHARED || area == DATA_AREA_GENERIC;
+}
+
/* Print an operand, X, to FILE, with an optional modifier in CODE.
Meaning of CODE:
diff --git a/gcc/config/nvptx/nvptx.md b/gcc/config/nvptx/nvptx.md
index cced68e..1a283b4 100644
--- a/gcc/config/nvptx/nvptx.md
+++ b/gcc/config/nvptx/nvptx.md
@@ -2051,6 +2051,36 @@
}
[(set_attr "atomic" "true")])
+(define_expand "atomic_store<mode>"
+ [(match_operand:SDIM 0 "memory_operand" "=m") ;; memory
+ (match_operand:SDIM 1 "nvptx_nonmemory_operand" "Ri") ;; input
+ (match_operand:SI 2 "const_int_operand")] ;; model
+ ""
+{
+ struct address_info info;
+ decompose_mem_address (&info, operands[0]);
+ if (info.base != NULL && REG_P (*info.base)
+ && REGNO_PTR_FRAME_P (REGNO (*info.base)))
+ {
+ emit_insn (gen_mov<mode> (operands[0], operands[1]));
+ DONE;
+ }
+
+ if (TARGET_SM70)
+ /* Fall back to expand_atomic_store. */
+ FAIL;
+
+ bool maybe_shared_p = nvptx_mem_maybe_shared_p (operands[0]);
+ if (!maybe_shared_p)
+ /* Fall back to expand_atomic_store. */
+ FAIL;
+
+ rtx tmpreg = gen_reg_rtx (<MODE>mode);
+ emit_insn (gen_atomic_exchange<mode> (tmpreg, operands[0], operands[1],
+ operands[2]));
+ DONE;
+})
+
(define_insn "atomic_fetch_add<mode>"
[(set (match_operand:SDIM 1 "memory_operand" "+m")
(unspec_volatile:SDIM
diff --git a/gcc/testsuite/gcc.target/nvptx/atomic-store-1.c b/gcc/testsuite/gcc.target/nvptx/atomic-store-1.c
new file mode 100644
index 0000000..cee3815
--- /dev/null
+++ b/gcc/testsuite/gcc.target/nvptx/atomic-store-1.c
@@ -0,0 +1,26 @@
+/* Test the atomic store expansion for sm <= sm_6x targets,
+ shared state space. */
+
+/* { dg-do compile } */
+/* { dg-options "-misa=sm_53" } */
+
+enum memmodel
+{
+ MEMMODEL_SEQ_CST = 5
+};
+
+unsigned int u32 __attribute__((shared));
+unsigned long long int u64 __attribute__((shared));
+
+int
+main()
+{
+ __atomic_store_n (&u32, 0, MEMMODEL_SEQ_CST);
+ __atomic_store_n (&u64, 0, MEMMODEL_SEQ_CST);
+
+ return 0;
+}
+
+/* { dg-final { scan-assembler-times "atom.shared.exch.b32" 1 } } */
+/* { dg-final { scan-assembler-times "atom.shared.exch.b64" 1 } } */
+/* { dg-final { scan-assembler-times "membar.cta" 4 } } */
diff --git a/gcc/testsuite/gcc.target/nvptx/atomic-store-3.c b/gcc/testsuite/gcc.target/nvptx/atomic-store-3.c
new file mode 100644
index 0000000..cc0264f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/nvptx/atomic-store-3.c
@@ -0,0 +1,25 @@
+/* Test the atomic store expansion, global state space. */
+
+/* { dg-do compile } */
+/* { dg-additional-options "-Wno-long-long" } */
+
+enum memmodel
+{
+ MEMMODEL_SEQ_CST = 5
+};
+
+unsigned int u32;
+unsigned long long int u64;
+
+int
+main()
+{
+ __atomic_store_n (&u32, 0, MEMMODEL_SEQ_CST);
+ __atomic_store_n (&u64, 0, MEMMODEL_SEQ_CST);
+
+ return 0;
+}
+
+/* { dg-final { scan-assembler-times "st.global.u32" 1 } } */
+/* { dg-final { scan-assembler-times "st.global.u64" 1 } } */
+/* { dg-final { scan-assembler-times "membar.sys" 4 } } */
diff --git a/gcc/testsuite/gcc.target/nvptx/stack-atomics-run.c b/gcc/testsuite/gcc.target/nvptx/stack-atomics-run.c
index ad8e2f8..cd04596 100644
--- a/gcc/testsuite/gcc.target/nvptx/stack-atomics-run.c
+++ b/gcc/testsuite/gcc.target/nvptx/stack-atomics-run.c
@@ -39,6 +39,10 @@ main (void)
if (b != 1)
__builtin_abort ();
-
+ a = 1;
+ __atomic_store_n (&a, 0, MEMMODEL_RELAXED);
+ if (a != 0)
+ __builtin_abort ();
+
return 0;
}