aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTom de Vries <tdevries@suse.de>2022-02-03 14:00:02 +0100
committerTom de Vries <tdevries@suse.de>2022-02-08 09:54:30 +0100
commit04b54cc486cc6fcc40380445e500eaf46d7901dc (patch)
treee9b9461242182dd61d5342588d3e6b1673fc92f0
parent0af7ef050aed9f678d70d79931ede38374fde863 (diff)
downloadgcc-04b54cc486cc6fcc40380445e500eaf46d7901dc.zip
gcc-04b54cc486cc6fcc40380445e500eaf46d7901dc.tar.gz
gcc-04b54cc486cc6fcc40380445e500eaf46d7901dc.tar.bz2
[nvptx] Fix .local atomic regressions
In PR target/104364, two problems were reported: - in muniform-simt mode, an atom.cas insn is no longer executed in the "master lane" only. - in msoft-stack mode, an __atomic_compare_exchange_n on stack memory is translated assuming it accesses local memory, while that's not the case. Fix these by: - ensuring that all insns with atomic attribute are also predicable, such that the validate_change in nvptx_reorg_uniform_simt will succeed, and asserting that it does, and - guarding the local atomics implementation with a new function nvptx_mem_local_p that correctly handles msoft-stack. Tested on x86_64 with nvptx accelerator. gcc/ChangeLog: 2022-02-04 Tom de Vries <tdevries@suse.de> PR target/104364 * config/nvptx/nvptx-protos.h (nvptx_mem_local_p): Declare. * config/nvptx/nvptx.cc (nvptx_reorg_uniform_simt): Assert that change is validated. (nvptx_mem_local_p): New function. * config/nvptx/nvptx.md: Use nvptx_mem_local_p. (define_c_enum "unspecv"): Add UNSPECV_CAS_LOCAL. (define_insn "atomic_compare_and_swap<mode>_1_local"): New non-atomic, non-predicable define_insn, factored out of ... (define_insn "atomic_compare_and_swap<mode>_1"): ... here. Make predicable again. (define_expand "atomic_compare_and_swap<mode>"): Use atomic_compare_and_swap<mode>_1_local. gcc/testsuite/ChangeLog: 2022-02-04 Tom de Vries <tdevries@suse.de> PR target/104364 * gcc.target/nvptx/softstack-2.c: New test. * gcc.target/nvptx/uniform-simt-1.c: New test.
-rw-r--r--gcc/config/nvptx/nvptx-protos.h1
-rw-r--r--gcc/config/nvptx/nvptx.cc25
-rw-r--r--gcc/config/nvptx/nvptx.md63
-rw-r--r--gcc/testsuite/gcc.target/nvptx/softstack-2.c11
-rw-r--r--gcc/testsuite/gcc.target/nvptx/uniform-simt-1.c18
5 files changed, 87 insertions, 31 deletions
diff --git a/gcc/config/nvptx/nvptx-protos.h b/gcc/config/nvptx/nvptx-protos.h
index 3d6ad14..a846e34 100644
--- a/gcc/config/nvptx/nvptx-protos.h
+++ b/gcc/config/nvptx/nvptx-protos.h
@@ -59,5 +59,6 @@ extern const char *nvptx_output_simt_enter (rtx, rtx, rtx);
extern const char *nvptx_output_simt_exit (rtx);
extern const char *nvptx_output_red_partition (rtx, rtx);
extern const char *nvptx_output_atomic_insn (const char *, rtx *, int, int);
+extern bool nvptx_mem_local_p (rtx);
#endif
#endif
diff --git a/gcc/config/nvptx/nvptx.cc b/gcc/config/nvptx/nvptx.cc
index b3bb97c..2a69492 100644
--- a/gcc/config/nvptx/nvptx.cc
+++ b/gcc/config/nvptx/nvptx.cc
@@ -3150,7 +3150,8 @@ nvptx_reorg_uniform_simt ()
rtx pred = nvptx_get_unisimt_predicate ();
pred = gen_rtx_NE (BImode, pred, const0_rtx);
pat = gen_rtx_COND_EXEC (VOIDmode, pred, pat);
- validate_change (insn, &PATTERN (insn), pat, false);
+ bool changed_p = validate_change (insn, &PATTERN (insn), pat, false);
+ gcc_assert (changed_p);
}
}
@@ -6894,6 +6895,28 @@ nvptx_libc_has_function (enum function_class fn_class, tree type)
return default_libc_has_function (fn_class, type);
}
+bool
+nvptx_mem_local_p (rtx mem)
+{
+ gcc_assert (GET_CODE (mem) == MEM);
+
+ struct address_info info;
+ decompose_mem_address (&info, mem);
+
+ if (info.base != NULL && REG_P (*info.base)
+ && REGNO_PTR_FRAME_P (REGNO (*info.base)))
+ {
+ if (TARGET_SOFT_STACK)
+ {
+ /* Frame-related doesn't mean local. */
+ }
+ else
+ return true;
+ }
+
+ return false;
+}
+
#undef TARGET_OPTION_OVERRIDE
#define TARGET_OPTION_OVERRIDE nvptx_option_override
diff --git a/gcc/config/nvptx/nvptx.md b/gcc/config/nvptx/nvptx.md
index 92768dd..d64dbfd 100644
--- a/gcc/config/nvptx/nvptx.md
+++ b/gcc/config/nvptx/nvptx.md
@@ -54,6 +54,7 @@
(define_c_enum "unspecv" [
UNSPECV_LOCK
UNSPECV_CAS
+ UNSPECV_CAS_LOCAL
UNSPECV_XCHG
UNSPECV_BARSYNC
UNSPECV_WARPSYNC
@@ -1771,8 +1772,14 @@
(match_operand:SI 7 "const_int_operand")] ;; failure model
""
{
- emit_insn (gen_atomic_compare_and_swap<mode>_1
- (operands[1], operands[2], operands[3], operands[4], operands[6]));
+ if (nvptx_mem_local_p (operands[2]))
+ emit_insn (gen_atomic_compare_and_swap<mode>_1_local
+ (operands[1], operands[2], operands[3], operands[4],
+ operands[6]));
+ else
+ emit_insn (gen_atomic_compare_and_swap<mode>_1
+ (operands[1], operands[2], operands[3], operands[4],
+ operands[6]));
rtx cond = gen_reg_rtx (BImode);
emit_move_insn (cond, gen_rtx_EQ (BImode, operands[1], operands[3]));
@@ -1780,23 +1787,18 @@
DONE;
})
-(define_insn "atomic_compare_and_swap<mode>_1"
+(define_insn "atomic_compare_and_swap<mode>_1_local"
[(set (match_operand:SDIM 0 "nvptx_register_operand" "=R")
(unspec_volatile:SDIM
[(match_operand:SDIM 1 "memory_operand" "+m")
(match_operand:SDIM 2 "nvptx_nonmemory_operand" "Ri")
(match_operand:SDIM 3 "nvptx_nonmemory_operand" "Ri")
(match_operand:SI 4 "const_int_operand")]
- UNSPECV_CAS))
+ UNSPECV_CAS_LOCAL))
(set (match_dup 1)
- (unspec_volatile:SDIM [(const_int 0)] UNSPECV_CAS))]
+ (unspec_volatile:SDIM [(const_int 0)] UNSPECV_CAS_LOCAL))]
""
{
- struct address_info info;
- decompose_mem_address (&info, operands[1]);
- if (info.base != NULL && REG_P (*info.base)
- && REGNO_PTR_FRAME_P (REGNO (*info.base)))
- {
output_asm_insn ("{", NULL);
output_asm_insn ("\\t" ".reg.pred" "\\t" "%%eq_p;", NULL);
output_asm_insn ("\\t" ".reg%t0" "\\t" "%%val;", operands);
@@ -1807,13 +1809,26 @@
output_asm_insn ("\\t" "mov%t0" "\\t" "%0,%%val;", operands);
output_asm_insn ("}", NULL);
return "";
- }
+ }
+ [(set_attr "predicable" "false")])
+
+(define_insn "atomic_compare_and_swap<mode>_1"
+ [(set (match_operand:SDIM 0 "nvptx_register_operand" "=R")
+ (unspec_volatile:SDIM
+ [(match_operand:SDIM 1 "memory_operand" "+m")
+ (match_operand:SDIM 2 "nvptx_nonmemory_operand" "Ri")
+ (match_operand:SDIM 3 "nvptx_nonmemory_operand" "Ri")
+ (match_operand:SI 4 "const_int_operand")]
+ UNSPECV_CAS))
+ (set (match_dup 1)
+ (unspec_volatile:SDIM [(const_int 0)] UNSPECV_CAS))]
+ ""
+ {
const char *t
- = "\\tatom%A1.cas.b%T0\\t%0, %1, %2, %3;";
+ = "%.\\tatom%A1.cas.b%T0\\t%0, %1, %2, %3;";
return nvptx_output_atomic_insn (t, operands, 1, 4);
}
- [(set_attr "atomic" "true")
- (set_attr "predicable" "false")])
+ [(set_attr "atomic" "true")])
(define_insn "atomic_exchange<mode>"
[(set (match_operand:SDIM 0 "nvptx_register_operand" "=R") ;; output
@@ -1825,10 +1840,7 @@
(match_operand:SDIM 2 "nvptx_nonmemory_operand" "Ri"))] ;; input
""
{
- struct address_info info;
- decompose_mem_address (&info, operands[1]);
- if (info.base != NULL && REG_P (*info.base)
- && REGNO_PTR_FRAME_P (REGNO (*info.base)))
+ if (nvptx_mem_local_p (operands[1]))
{
output_asm_insn ("{", NULL);
output_asm_insn ("\\t" ".reg%t0" "\\t" "%%val;", operands);
@@ -1855,10 +1867,7 @@
(match_dup 1))]
""
{
- struct address_info info;
- decompose_mem_address (&info, operands[1]);
- if (info.base != NULL && REG_P (*info.base)
- && REGNO_PTR_FRAME_P (REGNO (*info.base)))
+ if (nvptx_mem_local_p (operands[1]))
{
output_asm_insn ("{", NULL);
output_asm_insn ("\\t" ".reg%t0" "\\t" "%%val;", operands);
@@ -1888,10 +1897,7 @@
(match_dup 1))]
""
{
- struct address_info info;
- decompose_mem_address (&info, operands[1]);
- if (info.base != NULL && REG_P (*info.base)
- && REGNO_PTR_FRAME_P (REGNO (*info.base)))
+ if (nvptx_mem_local_p (operands[1]))
{
output_asm_insn ("{", NULL);
output_asm_insn ("\\t" ".reg%t0" "\\t" "%%val;", operands);
@@ -1924,10 +1930,7 @@
(match_dup 1))]
"<MODE>mode == SImode || TARGET_SM35"
{
- struct address_info info;
- decompose_mem_address (&info, operands[1]);
- if (info.base != NULL && REG_P (*info.base)
- && REGNO_PTR_FRAME_P (REGNO (*info.base)))
+ if (nvptx_mem_local_p (operands[1]))
{
output_asm_insn ("{", NULL);
output_asm_insn ("\\t" ".reg.b%T0" "\\t" "%%val;", operands);
diff --git a/gcc/testsuite/gcc.target/nvptx/softstack-2.c b/gcc/testsuite/gcc.target/nvptx/softstack-2.c
new file mode 100644
index 0000000..cccfda9
--- /dev/null
+++ b/gcc/testsuite/gcc.target/nvptx/softstack-2.c
@@ -0,0 +1,11 @@
+/* { dg-options "-O2 -msoft-stack" } */
+
+int
+f (void)
+{
+ int a = 0;
+ return __sync_lock_test_and_set (&a, 1);
+}
+
+/* { dg-final { scan-assembler-times "atom.exch" 1 } } */
+
diff --git a/gcc/testsuite/gcc.target/nvptx/uniform-simt-1.c b/gcc/testsuite/gcc.target/nvptx/uniform-simt-1.c
new file mode 100644
index 0000000..1bc0ada
--- /dev/null
+++ b/gcc/testsuite/gcc.target/nvptx/uniform-simt-1.c
@@ -0,0 +1,18 @@
+/* { dg-options "-O2 -muniform-simt" } */
+
+enum memmodel
+{
+ MEMMODEL_RELAXED = 0,
+};
+
+int a = 0;
+
+int
+f (void)
+{
+ int expected = 1;
+ return __atomic_compare_exchange_n (&a, &expected, 0, 0, MEMMODEL_RELAXED,
+ MEMMODEL_RELAXED);
+}
+
+/* { dg-final { scan-assembler-times "@%r\[0-9\]*\tatom.global.cas" 1 } } */