aboutsummaryrefslogtreecommitdiff
path: root/gcc
diff options
context:
space:
mode:
authorTom de Vries <tdevries@suse.de>2021-05-17 10:11:52 +0200
committerTom de Vries <tdevries@suse.de>2021-05-17 20:20:57 +0200
commit58f7c7e098b79c96403c8341823ec3ba1e8b3945 (patch)
tree3a95b6457066badeac27a8dd7dcaf203681a1059 /gcc
parent45aa7a447652e8541cc381d7ab128544f81ed857 (diff)
downloadgcc-58f7c7e098b79c96403c8341823ec3ba1e8b3945.zip
gcc-58f7c7e098b79c96403c8341823ec3ba1e8b3945.tar.gz
gcc-58f7c7e098b79c96403c8341823ec3ba1e8b3945.tar.bz2
[nvptx] Handle memmodel for atomic ops
The atomic ops in nvptx.md have memmodel arguments, which are currently ignored. Handle these, fixing test-case fails libgomp.c-c++-common/reduction-{5,6}.c on volta. Tested libgomp on x86_64-linux with nvptx accelerator. gcc/ChangeLog: 2021-05-17 Tom de Vries <tdevries@suse.de> PR target/100497 * config/nvptx/nvptx-protos.h (nvptx_output_atomic_insn): Declare * config/nvptx/nvptx.c (nvptx_output_barrier) (nvptx_output_atomic_insn): New function. (nvptx_print_operand): Add support for 'B'. * config/nvptx/nvptx.md: Use nvptx_output_atomic_insn for atomic insns.
Diffstat (limited to 'gcc')
-rw-r--r--gcc/config/nvptx/nvptx-protos.h1
-rw-r--r--gcc/config/nvptx/nvptx.c77
-rw-r--r--gcc/config/nvptx/nvptx.md31
3 files changed, 104 insertions, 5 deletions
diff --git a/gcc/config/nvptx/nvptx-protos.h b/gcc/config/nvptx/nvptx-protos.h
index 1512209..b7e6ae2 100644
--- a/gcc/config/nvptx/nvptx-protos.h
+++ b/gcc/config/nvptx/nvptx-protos.h
@@ -57,5 +57,6 @@ extern const char *nvptx_output_set_softstack (unsigned);
extern const char *nvptx_output_simt_enter (rtx, rtx, rtx);
extern const char *nvptx_output_simt_exit (rtx);
extern const char *nvptx_output_red_partition (rtx, rtx);
+extern const char *nvptx_output_atomic_insn (const char *, rtx *, int, int);
#endif
#endif
diff --git a/gcc/config/nvptx/nvptx.c b/gcc/config/nvptx/nvptx.c
index ebbfa92..722b0fa 100644
--- a/gcc/config/nvptx/nvptx.c
+++ b/gcc/config/nvptx/nvptx.c
@@ -2444,6 +2444,53 @@ nvptx_output_mov_insn (rtx dst, rtx src)
return "%.\tcvt%t0%t1\t%0, %1;";
}
+/* Output a pre/post barrier for MEM_OPERAND according to MEMMODEL. */
+
+static void
+nvptx_output_barrier (rtx *mem_operand, int memmodel, bool pre_p)
+{
+ bool post_p = !pre_p;
+
+ switch (memmodel)
+ {
+ case MEMMODEL_RELAXED:
+ return;
+ case MEMMODEL_CONSUME:
+ case MEMMODEL_ACQUIRE:
+ case MEMMODEL_SYNC_ACQUIRE:
+ if (post_p)
+ break;
+ return;
+ case MEMMODEL_RELEASE:
+ case MEMMODEL_SYNC_RELEASE:
+ if (pre_p)
+ break;
+ return;
+ case MEMMODEL_ACQ_REL:
+ case MEMMODEL_SEQ_CST:
+ case MEMMODEL_SYNC_SEQ_CST:
+ if (pre_p || post_p)
+ break;
+ return;
+ default:
+ gcc_unreachable ();
+ }
+
+ output_asm_insn ("%.\tmembar%B0;", mem_operand);
+}
+
+const char *
+nvptx_output_atomic_insn (const char *asm_template, rtx *operands, int mem_pos,
+ int memmodel_pos)
+{
+ nvptx_output_barrier (&operands[mem_pos], INTVAL (operands[memmodel_pos]),
+ true);
+ output_asm_insn (asm_template, operands);
+ nvptx_output_barrier (&operands[mem_pos], INTVAL (operands[memmodel_pos]),
+ false);
+ return "";
+}
+
static void nvptx_print_operand (FILE *, rtx, int);
/* Output INSN, which is a call to CALLEE with result RESULT. For ptx, this
@@ -2660,6 +2707,36 @@ nvptx_print_operand (FILE *file, rtx x, int code)
switch (code)
{
+ case 'B':
+ if (SYMBOL_REF_P (XEXP (x, 0)))
+ switch (SYMBOL_DATA_AREA (XEXP (x, 0)))
+ {
+ case DATA_AREA_GENERIC:
+ /* Assume worst-case: global. */
+ gcc_fallthrough (); /* FALLTHROUGH. */
+ case DATA_AREA_GLOBAL:
+ break;
+ case DATA_AREA_SHARED:
+ fputs (".cta", file);
+ return;
+ case DATA_AREA_LOCAL:
+ case DATA_AREA_CONST:
+ case DATA_AREA_PARAM:
+ default:
+ gcc_unreachable ();
+ }
+
+ /* There are 2 cases where membar.sys differs from membar.gl:
+ - host accesses global memory (f.i. systemwide atomics)
+ - 2 or more devices are setup in peer-to-peer mode, and one
+ peer can access global memory of other peer.
+ Neither are currently supported by openMP/OpenACC on nvptx, but
+ that could change, so we default to membar.sys. We could support
+ this more optimally by adding DATA_AREA_SYS and then emitting
+ .gl for DATA_AREA_GLOBAL and .sys for DATA_AREA_SYS. */
+ fputs (".sys", file);
+ return;
+
case 'A':
x = XEXP (x, 0);
gcc_fallthrough (); /* FALLTHROUGH. */
diff --git a/gcc/config/nvptx/nvptx.md b/gcc/config/nvptx/nvptx.md
index 00bb8fe..108de1c 100644
--- a/gcc/config/nvptx/nvptx.md
+++ b/gcc/config/nvptx/nvptx.md
@@ -1642,7 +1642,11 @@
(set (match_dup 1)
(unspec_volatile:SDIM [(const_int 0)] UNSPECV_CAS))]
""
- "%.\\tatom%A1.cas.b%T0\\t%0, %1, %2, %3;"
+ {
+ const char *t
+ = "%.\\tatom%A1.cas.b%T0\\t%0, %1, %2, %3;";
+ return nvptx_output_atomic_insn (t, operands, 1, 4);
+ }
[(set_attr "atomic" "true")])
(define_insn "atomic_exchange<mode>"
@@ -1654,7 +1658,11 @@
(set (match_dup 1)
(match_operand:SDIM 2 "nvptx_nonmemory_operand" "Ri"))] ;; input
""
- "%.\\tatom%A1.exch.b%T0\\t%0, %1, %2;"
+ {
+ const char *t
+ = "%.\tatom%A1.exch.b%T0\t%0, %1, %2;";
+ return nvptx_output_atomic_insn (t, operands, 1, 3);
+ }
[(set_attr "atomic" "true")])
(define_insn "atomic_fetch_add<mode>"
@@ -1667,7 +1675,11 @@
(set (match_operand:SDIM 0 "nvptx_register_operand" "=R")
(match_dup 1))]
""
- "%.\\tatom%A1.add%t0\\t%0, %1, %2;"
+ {
+ const char *t
+ = "%.\\tatom%A1.add%t0\\t%0, %1, %2;";
+ return nvptx_output_atomic_insn (t, operands, 1, 3);
+ }
[(set_attr "atomic" "true")])
(define_insn "atomic_fetch_addsf"
@@ -1680,7 +1692,11 @@
(set (match_operand:SF 0 "nvptx_register_operand" "=R")
(match_dup 1))]
""
- "%.\\tatom%A1.add%t0\\t%0, %1, %2;"
+ {
+ const char *t
+ = "%.\\tatom%A1.add%t0\\t%0, %1, %2;";
+ return nvptx_output_atomic_insn (t, operands, 1, 3);
+ }
[(set_attr "atomic" "true")])
(define_code_iterator any_logic [and ior xor])
@@ -1696,7 +1712,12 @@
(set (match_operand:SDIM 0 "nvptx_register_operand" "=R")
(match_dup 1))]
"<MODE>mode == SImode || TARGET_SM35"
- "%.\\tatom%A1.b%T0.<logic>\\t%0, %1, %2;"
+ {
+ const char *t
+ = "%.\\tatom%A1.b%T0.<logic>\\t%0, %1, %2;";
+ return nvptx_output_atomic_insn (t, operands, 1, 3);
+ }
+
[(set_attr "atomic" "true")])
(define_expand "atomic_test_and_set"