diff options
author | Tom de Vries <tdevries@suse.de> | 2022-01-21 21:46:05 +0100 |
---|---|---|
committer | Tom de Vries <tdevries@suse.de> | 2022-02-01 19:28:24 +0100 |
commit | e0451f93d9faa13495132f4e246e9bef30b51417 (patch) | |
tree | 22d002b52a5921f93d4d4e48c82280c3094f1216 /gcc | |
parent | ca902055d056773bd0ca80f68bca4b20ad0e183f (diff) | |
download | gcc-e0451f93d9faa13495132f4e246e9bef30b51417.zip gcc-e0451f93d9faa13495132f4e246e9bef30b51417.tar.gz gcc-e0451f93d9faa13495132f4e246e9bef30b51417.tar.bz2 |
[nvptx] Add some support for .local atomics
The ptx insn atom doesn't support local memory. In case of doing an atomic
operation on local memory, we run into:
...
operation not supported on global/shared address space
...
This is the cuGetErrorString message for CUDA_ERROR_INVALID_ADDRESS_SPACE.
The message is somewhat confusing given that actually the operation is not
supported on local address space.
Fix this by falling back on a non-atomic version when detecting
a frame-related memory operand.
This only solves some cases that are detected at compile-time. It does
however fix the openacc private-atomic-* test-cases.
Tested on x86_64 with nvptx accelerator.
gcc/ChangeLog:
2022-01-27 Tom de Vries <tdevries@suse.de>
* config/nvptx/nvptx.md (define_insn "atomic_compare_and_swap<mode>_1")
(define_insn "atomic_exchange<mode>")
(define_insn "atomic_fetch_add<mode>")
(define_insn "atomic_fetch_addsf")
(define_insn "atomic_fetch_<logic><mode>"): Output non-atomic version
if memory operands is frame-relative.
gcc/testsuite/ChangeLog:
2022-01-31 Tom de Vries <tdevries@suse.de>
* gcc.target/nvptx/stack-atomics-run.c: New test.
libgomp/ChangeLog:
2022-01-27 Tom de Vries <tdevries@suse.de>
* testsuite/libgomp.oacc-c-c++-common/private-atomic-1.c: Remove
PR83812 workaround.
* testsuite/libgomp.oacc-fortran/private-atomic-1-vector.f90: Same.
* testsuite/libgomp.oacc-fortran/private-atomic-1-worker.f90: Same.
Diffstat (limited to 'gcc')
-rw-r--r-- | gcc/config/nvptx/nvptx.md | 82 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/nvptx/stack-atomics-run.c | 44 |
2 files changed, 124 insertions, 2 deletions
diff --git a/gcc/config/nvptx/nvptx.md b/gcc/config/nvptx/nvptx.md index 773ae8f..9cbbd95 100644 --- a/gcc/config/nvptx/nvptx.md +++ b/gcc/config/nvptx/nvptx.md @@ -1790,11 +1790,28 @@ (unspec_volatile:SDIM [(const_int 0)] UNSPECV_CAS))] "" { + struct address_info info; + decompose_mem_address (&info, operands[1]); + if (info.base != NULL && REG_P (*info.base) + && REGNO_PTR_FRAME_P (REGNO (*info.base))) + { + output_asm_insn ("{", NULL); + output_asm_insn ("\\t" ".reg.pred" "\\t" "%%eq_p;", NULL); + output_asm_insn ("\\t" ".reg%t0" "\\t" "%%val;", operands); + output_asm_insn ("\\t" "ld%A1%t0" "\\t" "%%val,%1;", operands); + output_asm_insn ("\\t" "setp.eq%t0" "\\t" "%%eq_p, %%val, %2;", + operands); + output_asm_insn ("@%%eq_p\\t" "st%A1%t0" "\\t" "%1,%3;", operands); + output_asm_insn ("\\t" "mov%t0" "\\t" "%0,%%val;", operands); + output_asm_insn ("}", NULL); + return ""; + } const char *t - = "%.\\tatom%A1.cas.b%T0\\t%0, %1, %2, %3;"; + = "\\tatom%A1.cas.b%T0\\t%0, %1, %2, %3;"; return nvptx_output_atomic_insn (t, operands, 1, 4); } - [(set_attr "atomic" "true")]) + [(set_attr "atomic" "true") + (set_attr "predicable" "false")]) (define_insn "atomic_exchange<mode>" [(set (match_operand:SDIM 0 "nvptx_register_operand" "=R") ;; output @@ -1806,6 +1823,19 @@ (match_operand:SDIM 2 "nvptx_nonmemory_operand" "Ri"))] ;; input "" { + struct address_info info; + decompose_mem_address (&info, operands[1]); + if (info.base != NULL && REG_P (*info.base) + && REGNO_PTR_FRAME_P (REGNO (*info.base))) + { + output_asm_insn ("{", NULL); + output_asm_insn ("\\t" ".reg%t0" "\\t" "%%val;", operands); + output_asm_insn ("%.\\t" "ld%A1%t0" "\\t" "%%val,%1;", operands); + output_asm_insn ("%.\\t" "st%A1%t0" "\\t" "%1,%2;", operands); + output_asm_insn ("%.\\t" "mov%t0" "\\t" "%0,%%val;", operands); + output_asm_insn ("}", NULL); + return ""; + } const char *t = "%.\tatom%A1.exch.b%T0\t%0, %1, %2;"; return nvptx_output_atomic_insn (t, operands, 1, 3); @@ -1823,6 +1853,22 @@ (match_dup 1))] "" { + struct address_info info; + decompose_mem_address (&info, operands[1]); + if (info.base != NULL && REG_P (*info.base) + && REGNO_PTR_FRAME_P (REGNO (*info.base))) + { + output_asm_insn ("{", NULL); + output_asm_insn ("\\t" ".reg%t0" "\\t" "%%val;", operands); + output_asm_insn ("\\t" ".reg%t0" "\\t" "%%update;", operands); + output_asm_insn ("%.\\t" "ld%A1%t0" "\\t" "%%val,%1;", operands); + output_asm_insn ("%.\\t" "add%t0" "\\t" "%%update,%%val,%2;", + operands); + output_asm_insn ("%.\\t" "st%A1%t0" "\\t" "%1,%%update;", operands); + output_asm_insn ("%.\\t" "mov%t0" "\\t" "%0,%%val;", operands); + output_asm_insn ("}", NULL); + return ""; + } const char *t = "%.\\tatom%A1.add%t0\\t%0, %1, %2;"; return nvptx_output_atomic_insn (t, operands, 1, 3); @@ -1840,6 +1886,22 @@ (match_dup 1))] "" { + struct address_info info; + decompose_mem_address (&info, operands[1]); + if (info.base != NULL && REG_P (*info.base) + && REGNO_PTR_FRAME_P (REGNO (*info.base))) + { + output_asm_insn ("{", NULL); + output_asm_insn ("\\t" ".reg%t0" "\\t" "%%val;", operands); + output_asm_insn ("\\t" ".reg%t0" "\\t" "%%update;", operands); + output_asm_insn ("%.\\t" "ld%A1%t0" "\\t" "%%val,%1;", operands); + output_asm_insn ("%.\\t" "add%t0" "\\t" "%%update,%%val,%2;", + operands); + output_asm_insn ("%.\\t" "st%A1%t0" "\\t" "%1,%%update;", operands); + output_asm_insn ("%.\\t" "mov%t0" "\\t" "%0,%%val;", operands); + output_asm_insn ("}", NULL); + return ""; + } const char *t = "%.\\tatom%A1.add%t0\\t%0, %1, %2;"; return nvptx_output_atomic_insn (t, operands, 1, 3); @@ -1860,6 +1922,22 @@ (match_dup 1))] "<MODE>mode == SImode || TARGET_SM35" { + struct address_info info; + decompose_mem_address (&info, operands[1]); + if (info.base != NULL && REG_P (*info.base) + && REGNO_PTR_FRAME_P (REGNO (*info.base))) + { + output_asm_insn ("{", NULL); + output_asm_insn ("\\t" ".reg.b%T0" "\\t" "%%val;", operands); + output_asm_insn ("\\t" ".reg.b%T0" "\\t" "%%update;", operands); + output_asm_insn ("%.\\t" "ld%A1%t0" "\\t" "%%val,%1;", operands); + output_asm_insn ("%.\\t" "<logic>.b%T0" "\\t" "%%update,%%val,%2;", + operands); + output_asm_insn ("%.\\t" "st%A1%t0" "\\t" "%1,%%update;", operands); + output_asm_insn ("%.\\t" "mov%t0" "\\t" "%0,%%val;", operands); + output_asm_insn ("}", NULL); + return ""; + } const char *t = "%.\\tatom%A1.b%T0.<logic>\\t%0, %1, %2;"; return nvptx_output_atomic_insn (t, operands, 1, 3); diff --git a/gcc/testsuite/gcc.target/nvptx/stack-atomics-run.c b/gcc/testsuite/gcc.target/nvptx/stack-atomics-run.c new file mode 100644 index 0000000..ad8e2f8 --- /dev/null +++ b/gcc/testsuite/gcc.target/nvptx/stack-atomics-run.c @@ -0,0 +1,44 @@ +/* { dg-do run } */ + +enum memmodel { + MEMMODEL_RELAXED = 0 +}; + +int +main (void) +{ + int a, b; + + a = 1; + __atomic_fetch_add (&a, 1, MEMMODEL_RELAXED); + if (a != 2) + __builtin_abort (); + + a = 0; + __atomic_fetch_or (&a, 1, MEMMODEL_RELAXED); + if (a != 1) + __builtin_abort (); + + a = 1; + b = -1; + b = __atomic_exchange_n (&a, 0, MEMMODEL_RELAXED); + if (a != 0) + __builtin_abort (); + if (b != 1) + __builtin_abort (); + + a = 1; + b = -1; + { + int expected = a; + b = __atomic_compare_exchange_n (&a, &expected, 0, 0, MEMMODEL_RELAXED, + MEMMODEL_RELAXED); + } + if (a != 0) + __builtin_abort (); + if (b != 1) + __builtin_abort (); + + + return 0; +} |