aboutsummaryrefslogtreecommitdiff
path: root/gcc
diff options
context:
space:
mode:
authorTom de Vries <tdevries@suse.de>2022-01-21 21:46:05 +0100
committerTom de Vries <tdevries@suse.de>2022-02-01 19:28:24 +0100
commite0451f93d9faa13495132f4e246e9bef30b51417 (patch)
tree22d002b52a5921f93d4d4e48c82280c3094f1216 /gcc
parentca902055d056773bd0ca80f68bca4b20ad0e183f (diff)
downloadgcc-e0451f93d9faa13495132f4e246e9bef30b51417.zip
gcc-e0451f93d9faa13495132f4e246e9bef30b51417.tar.gz
gcc-e0451f93d9faa13495132f4e246e9bef30b51417.tar.bz2
[nvptx] Add some support for .local atomics
The ptx insn atom doesn't support local memory. In case of doing an atomic operation on local memory, we run into: ... operation not supported on global/shared address space ... This is the cuGetErrorString message for CUDA_ERROR_INVALID_ADDRESS_SPACE. The message is somewhat confusing given that actually the operation is not supported on local address space. Fix this by falling back on a non-atomic version when detecting a frame-related memory operand. This only solves some cases that are detected at compile-time. It does however fix the openacc private-atomic-* test-cases. Tested on x86_64 with nvptx accelerator. gcc/ChangeLog: 2022-01-27 Tom de Vries <tdevries@suse.de> * config/nvptx/nvptx.md (define_insn "atomic_compare_and_swap<mode>_1") (define_insn "atomic_exchange<mode>") (define_insn "atomic_fetch_add<mode>") (define_insn "atomic_fetch_addsf") (define_insn "atomic_fetch_<logic><mode>"): Output non-atomic version if memory operands is frame-relative. gcc/testsuite/ChangeLog: 2022-01-31 Tom de Vries <tdevries@suse.de> * gcc.target/nvptx/stack-atomics-run.c: New test. libgomp/ChangeLog: 2022-01-27 Tom de Vries <tdevries@suse.de> * testsuite/libgomp.oacc-c-c++-common/private-atomic-1.c: Remove PR83812 workaround. * testsuite/libgomp.oacc-fortran/private-atomic-1-vector.f90: Same. * testsuite/libgomp.oacc-fortran/private-atomic-1-worker.f90: Same.
Diffstat (limited to 'gcc')
-rw-r--r--gcc/config/nvptx/nvptx.md82
-rw-r--r--gcc/testsuite/gcc.target/nvptx/stack-atomics-run.c44
2 files changed, 124 insertions, 2 deletions
diff --git a/gcc/config/nvptx/nvptx.md b/gcc/config/nvptx/nvptx.md
index 773ae8f..9cbbd95 100644
--- a/gcc/config/nvptx/nvptx.md
+++ b/gcc/config/nvptx/nvptx.md
@@ -1790,11 +1790,28 @@
(unspec_volatile:SDIM [(const_int 0)] UNSPECV_CAS))]
""
{
+ struct address_info info;
+ decompose_mem_address (&info, operands[1]);
+ if (info.base != NULL && REG_P (*info.base)
+ && REGNO_PTR_FRAME_P (REGNO (*info.base)))
+ {
+ output_asm_insn ("{", NULL);
+ output_asm_insn ("\\t" ".reg.pred" "\\t" "%%eq_p;", NULL);
+ output_asm_insn ("\\t" ".reg%t0" "\\t" "%%val;", operands);
+ output_asm_insn ("\\t" "ld%A1%t0" "\\t" "%%val,%1;", operands);
+ output_asm_insn ("\\t" "setp.eq%t0" "\\t" "%%eq_p, %%val, %2;",
+ operands);
+ output_asm_insn ("@%%eq_p\\t" "st%A1%t0" "\\t" "%1,%3;", operands);
+ output_asm_insn ("\\t" "mov%t0" "\\t" "%0,%%val;", operands);
+ output_asm_insn ("}", NULL);
+ return "";
+ }
const char *t
- = "%.\\tatom%A1.cas.b%T0\\t%0, %1, %2, %3;";
+ = "\\tatom%A1.cas.b%T0\\t%0, %1, %2, %3;";
return nvptx_output_atomic_insn (t, operands, 1, 4);
}
- [(set_attr "atomic" "true")])
+ [(set_attr "atomic" "true")
+ (set_attr "predicable" "false")])
(define_insn "atomic_exchange<mode>"
[(set (match_operand:SDIM 0 "nvptx_register_operand" "=R") ;; output
@@ -1806,6 +1823,19 @@
(match_operand:SDIM 2 "nvptx_nonmemory_operand" "Ri"))] ;; input
""
{
+ struct address_info info;
+ decompose_mem_address (&info, operands[1]);
+ if (info.base != NULL && REG_P (*info.base)
+ && REGNO_PTR_FRAME_P (REGNO (*info.base)))
+ {
+ output_asm_insn ("{", NULL);
+ output_asm_insn ("\\t" ".reg%t0" "\\t" "%%val;", operands);
+ output_asm_insn ("%.\\t" "ld%A1%t0" "\\t" "%%val,%1;", operands);
+ output_asm_insn ("%.\\t" "st%A1%t0" "\\t" "%1,%2;", operands);
+ output_asm_insn ("%.\\t" "mov%t0" "\\t" "%0,%%val;", operands);
+ output_asm_insn ("}", NULL);
+ return "";
+ }
const char *t
= "%.\tatom%A1.exch.b%T0\t%0, %1, %2;";
return nvptx_output_atomic_insn (t, operands, 1, 3);
@@ -1823,6 +1853,22 @@
(match_dup 1))]
""
{
+ struct address_info info;
+ decompose_mem_address (&info, operands[1]);
+ if (info.base != NULL && REG_P (*info.base)
+ && REGNO_PTR_FRAME_P (REGNO (*info.base)))
+ {
+ output_asm_insn ("{", NULL);
+ output_asm_insn ("\\t" ".reg%t0" "\\t" "%%val;", operands);
+ output_asm_insn ("\\t" ".reg%t0" "\\t" "%%update;", operands);
+ output_asm_insn ("%.\\t" "ld%A1%t0" "\\t" "%%val,%1;", operands);
+ output_asm_insn ("%.\\t" "add%t0" "\\t" "%%update,%%val,%2;",
+ operands);
+ output_asm_insn ("%.\\t" "st%A1%t0" "\\t" "%1,%%update;", operands);
+ output_asm_insn ("%.\\t" "mov%t0" "\\t" "%0,%%val;", operands);
+ output_asm_insn ("}", NULL);
+ return "";
+ }
const char *t
= "%.\\tatom%A1.add%t0\\t%0, %1, %2;";
return nvptx_output_atomic_insn (t, operands, 1, 3);
@@ -1840,6 +1886,22 @@
(match_dup 1))]
""
{
+ struct address_info info;
+ decompose_mem_address (&info, operands[1]);
+ if (info.base != NULL && REG_P (*info.base)
+ && REGNO_PTR_FRAME_P (REGNO (*info.base)))
+ {
+ output_asm_insn ("{", NULL);
+ output_asm_insn ("\\t" ".reg%t0" "\\t" "%%val;", operands);
+ output_asm_insn ("\\t" ".reg%t0" "\\t" "%%update;", operands);
+ output_asm_insn ("%.\\t" "ld%A1%t0" "\\t" "%%val,%1;", operands);
+ output_asm_insn ("%.\\t" "add%t0" "\\t" "%%update,%%val,%2;",
+ operands);
+ output_asm_insn ("%.\\t" "st%A1%t0" "\\t" "%1,%%update;", operands);
+ output_asm_insn ("%.\\t" "mov%t0" "\\t" "%0,%%val;", operands);
+ output_asm_insn ("}", NULL);
+ return "";
+ }
const char *t
= "%.\\tatom%A1.add%t0\\t%0, %1, %2;";
return nvptx_output_atomic_insn (t, operands, 1, 3);
@@ -1860,6 +1922,22 @@
(match_dup 1))]
"<MODE>mode == SImode || TARGET_SM35"
{
+ struct address_info info;
+ decompose_mem_address (&info, operands[1]);
+ if (info.base != NULL && REG_P (*info.base)
+ && REGNO_PTR_FRAME_P (REGNO (*info.base)))
+ {
+ output_asm_insn ("{", NULL);
+ output_asm_insn ("\\t" ".reg.b%T0" "\\t" "%%val;", operands);
+ output_asm_insn ("\\t" ".reg.b%T0" "\\t" "%%update;", operands);
+ output_asm_insn ("%.\\t" "ld%A1%t0" "\\t" "%%val,%1;", operands);
+ output_asm_insn ("%.\\t" "<logic>.b%T0" "\\t" "%%update,%%val,%2;",
+ operands);
+ output_asm_insn ("%.\\t" "st%A1%t0" "\\t" "%1,%%update;", operands);
+ output_asm_insn ("%.\\t" "mov%t0" "\\t" "%0,%%val;", operands);
+ output_asm_insn ("}", NULL);
+ return "";
+ }
const char *t
= "%.\\tatom%A1.b%T0.<logic>\\t%0, %1, %2;";
return nvptx_output_atomic_insn (t, operands, 1, 3);
diff --git a/gcc/testsuite/gcc.target/nvptx/stack-atomics-run.c b/gcc/testsuite/gcc.target/nvptx/stack-atomics-run.c
new file mode 100644
index 0000000..ad8e2f8
--- /dev/null
+++ b/gcc/testsuite/gcc.target/nvptx/stack-atomics-run.c
@@ -0,0 +1,44 @@
+/* { dg-do run } */
+
+enum memmodel {
+ MEMMODEL_RELAXED = 0
+};
+
+int
+main (void)
+{
+ int a, b;
+
+ a = 1;
+ __atomic_fetch_add (&a, 1, MEMMODEL_RELAXED);
+ if (a != 2)
+ __builtin_abort ();
+
+ a = 0;
+ __atomic_fetch_or (&a, 1, MEMMODEL_RELAXED);
+ if (a != 1)
+ __builtin_abort ();
+
+ a = 1;
+ b = -1;
+ b = __atomic_exchange_n (&a, 0, MEMMODEL_RELAXED);
+ if (a != 0)
+ __builtin_abort ();
+ if (b != 1)
+ __builtin_abort ();
+
+ a = 1;
+ b = -1;
+ {
+ int expected = a;
+ b = __atomic_compare_exchange_n (&a, &expected, 0, 0, MEMMODEL_RELAXED,
+ MEMMODEL_RELAXED);
+ }
+ if (a != 0)
+ __builtin_abort ();
+ if (b != 1)
+ __builtin_abort ();
+
+
+ return 0;
+}