diff options
author | Tom de Vries <tdevries@suse.de> | 2022-02-03 14:00:02 +0100 |
---|---|---|
committer | Tom de Vries <tdevries@suse.de> | 2022-02-08 09:54:30 +0100 |
commit | 04b54cc486cc6fcc40380445e500eaf46d7901dc (patch) | |
tree | e9b9461242182dd61d5342588d3e6b1673fc92f0 | |
parent | 0af7ef050aed9f678d70d79931ede38374fde863 (diff) | |
download | gcc-04b54cc486cc6fcc40380445e500eaf46d7901dc.zip gcc-04b54cc486cc6fcc40380445e500eaf46d7901dc.tar.gz gcc-04b54cc486cc6fcc40380445e500eaf46d7901dc.tar.bz2 |
[nvptx] Fix .local atomic regressions
In PR target/104364, two problems were reported:
- in muniform-simt mode, an atom.cas insn is no longer executed in the
"master lane" only.
- in msoft-stack mode, an __atomic_compare_exchange_n on stack memory is
translated assuming it accesses local memory, while that's not the case.
Fix these by:
- ensuring that all insns with atomic attribute are also predicable, such
that the validate_change in nvptx_reorg_uniform_simt will succeed, and
asserting that it does, and
- guarding the local atomics implementation with a new function
nvptx_mem_local_p that correctly handles msoft-stack.
Tested on x86_64 with nvptx accelerator.
gcc/ChangeLog:
2022-02-04 Tom de Vries <tdevries@suse.de>
PR target/104364
* config/nvptx/nvptx-protos.h (nvptx_mem_local_p): Declare.
* config/nvptx/nvptx.cc (nvptx_reorg_uniform_simt): Assert that
change is validated.
(nvptx_mem_local_p): New function.
* config/nvptx/nvptx.md: Use nvptx_mem_local_p.
(define_c_enum "unspecv"): Add UNSPECV_CAS_LOCAL.
(define_insn "atomic_compare_and_swap<mode>_1_local"): New
non-atomic, non-predicable define_insn, factored out of ...
(define_insn "atomic_compare_and_swap<mode>_1"): ... here.
Make predicable again.
(define_expand "atomic_compare_and_swap<mode>"): Use
atomic_compare_and_swap<mode>_1_local.
gcc/testsuite/ChangeLog:
2022-02-04 Tom de Vries <tdevries@suse.de>
PR target/104364
* gcc.target/nvptx/softstack-2.c: New test.
* gcc.target/nvptx/uniform-simt-1.c: New test.
-rw-r--r-- | gcc/config/nvptx/nvptx-protos.h | 1 | ||||
-rw-r--r-- | gcc/config/nvptx/nvptx.cc | 25 | ||||
-rw-r--r-- | gcc/config/nvptx/nvptx.md | 63 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/nvptx/softstack-2.c | 11 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/nvptx/uniform-simt-1.c | 18 |
5 files changed, 87 insertions, 31 deletions
diff --git a/gcc/config/nvptx/nvptx-protos.h b/gcc/config/nvptx/nvptx-protos.h index 3d6ad14..a846e34 100644 --- a/gcc/config/nvptx/nvptx-protos.h +++ b/gcc/config/nvptx/nvptx-protos.h @@ -59,5 +59,6 @@ extern const char *nvptx_output_simt_enter (rtx, rtx, rtx); extern const char *nvptx_output_simt_exit (rtx); extern const char *nvptx_output_red_partition (rtx, rtx); extern const char *nvptx_output_atomic_insn (const char *, rtx *, int, int); +extern bool nvptx_mem_local_p (rtx); #endif #endif diff --git a/gcc/config/nvptx/nvptx.cc b/gcc/config/nvptx/nvptx.cc index b3bb97c..2a69492 100644 --- a/gcc/config/nvptx/nvptx.cc +++ b/gcc/config/nvptx/nvptx.cc @@ -3150,7 +3150,8 @@ nvptx_reorg_uniform_simt () rtx pred = nvptx_get_unisimt_predicate (); pred = gen_rtx_NE (BImode, pred, const0_rtx); pat = gen_rtx_COND_EXEC (VOIDmode, pred, pat); - validate_change (insn, &PATTERN (insn), pat, false); + bool changed_p = validate_change (insn, &PATTERN (insn), pat, false); + gcc_assert (changed_p); } } @@ -6894,6 +6895,28 @@ nvptx_libc_has_function (enum function_class fn_class, tree type) return default_libc_has_function (fn_class, type); } +bool +nvptx_mem_local_p (rtx mem) +{ + gcc_assert (GET_CODE (mem) == MEM); + + struct address_info info; + decompose_mem_address (&info, mem); + + if (info.base != NULL && REG_P (*info.base) + && REGNO_PTR_FRAME_P (REGNO (*info.base))) + { + if (TARGET_SOFT_STACK) + { + /* Frame-related doesn't mean local. */ + } + else + return true; + } + + return false; +} + #undef TARGET_OPTION_OVERRIDE #define TARGET_OPTION_OVERRIDE nvptx_option_override diff --git a/gcc/config/nvptx/nvptx.md b/gcc/config/nvptx/nvptx.md index 92768dd..d64dbfd 100644 --- a/gcc/config/nvptx/nvptx.md +++ b/gcc/config/nvptx/nvptx.md @@ -54,6 +54,7 @@ (define_c_enum "unspecv" [ UNSPECV_LOCK UNSPECV_CAS + UNSPECV_CAS_LOCAL UNSPECV_XCHG UNSPECV_BARSYNC UNSPECV_WARPSYNC @@ -1771,8 +1772,14 @@ (match_operand:SI 7 "const_int_operand")] ;; failure model "" { - emit_insn (gen_atomic_compare_and_swap<mode>_1 - (operands[1], operands[2], operands[3], operands[4], operands[6])); + if (nvptx_mem_local_p (operands[2])) + emit_insn (gen_atomic_compare_and_swap<mode>_1_local + (operands[1], operands[2], operands[3], operands[4], + operands[6])); + else + emit_insn (gen_atomic_compare_and_swap<mode>_1 + (operands[1], operands[2], operands[3], operands[4], + operands[6])); rtx cond = gen_reg_rtx (BImode); emit_move_insn (cond, gen_rtx_EQ (BImode, operands[1], operands[3])); @@ -1780,23 +1787,18 @@ DONE; }) -(define_insn "atomic_compare_and_swap<mode>_1" +(define_insn "atomic_compare_and_swap<mode>_1_local" [(set (match_operand:SDIM 0 "nvptx_register_operand" "=R") (unspec_volatile:SDIM [(match_operand:SDIM 1 "memory_operand" "+m") (match_operand:SDIM 2 "nvptx_nonmemory_operand" "Ri") (match_operand:SDIM 3 "nvptx_nonmemory_operand" "Ri") (match_operand:SI 4 "const_int_operand")] - UNSPECV_CAS)) + UNSPECV_CAS_LOCAL)) (set (match_dup 1) - (unspec_volatile:SDIM [(const_int 0)] UNSPECV_CAS))] + (unspec_volatile:SDIM [(const_int 0)] UNSPECV_CAS_LOCAL))] "" { - struct address_info info; - decompose_mem_address (&info, operands[1]); - if (info.base != NULL && REG_P (*info.base) - && REGNO_PTR_FRAME_P (REGNO (*info.base))) - { output_asm_insn ("{", NULL); output_asm_insn ("\\t" ".reg.pred" "\\t" "%%eq_p;", NULL); output_asm_insn ("\\t" ".reg%t0" "\\t" "%%val;", operands); @@ -1807,13 +1809,26 @@ output_asm_insn ("\\t" "mov%t0" "\\t" "%0,%%val;", operands); output_asm_insn ("}", NULL); return ""; - } + } + [(set_attr "predicable" "false")]) + +(define_insn "atomic_compare_and_swap<mode>_1" + [(set (match_operand:SDIM 0 "nvptx_register_operand" "=R") + (unspec_volatile:SDIM + [(match_operand:SDIM 1 "memory_operand" "+m") + (match_operand:SDIM 2 "nvptx_nonmemory_operand" "Ri") + (match_operand:SDIM 3 "nvptx_nonmemory_operand" "Ri") + (match_operand:SI 4 "const_int_operand")] + UNSPECV_CAS)) + (set (match_dup 1) + (unspec_volatile:SDIM [(const_int 0)] UNSPECV_CAS))] + "" + { const char *t - = "\\tatom%A1.cas.b%T0\\t%0, %1, %2, %3;"; + = "%.\\tatom%A1.cas.b%T0\\t%0, %1, %2, %3;"; return nvptx_output_atomic_insn (t, operands, 1, 4); } - [(set_attr "atomic" "true") - (set_attr "predicable" "false")]) + [(set_attr "atomic" "true")]) (define_insn "atomic_exchange<mode>" [(set (match_operand:SDIM 0 "nvptx_register_operand" "=R") ;; output @@ -1825,10 +1840,7 @@ (match_operand:SDIM 2 "nvptx_nonmemory_operand" "Ri"))] ;; input "" { - struct address_info info; - decompose_mem_address (&info, operands[1]); - if (info.base != NULL && REG_P (*info.base) - && REGNO_PTR_FRAME_P (REGNO (*info.base))) + if (nvptx_mem_local_p (operands[1])) { output_asm_insn ("{", NULL); output_asm_insn ("\\t" ".reg%t0" "\\t" "%%val;", operands); @@ -1855,10 +1867,7 @@ (match_dup 1))] "" { - struct address_info info; - decompose_mem_address (&info, operands[1]); - if (info.base != NULL && REG_P (*info.base) - && REGNO_PTR_FRAME_P (REGNO (*info.base))) + if (nvptx_mem_local_p (operands[1])) { output_asm_insn ("{", NULL); output_asm_insn ("\\t" ".reg%t0" "\\t" "%%val;", operands); @@ -1888,10 +1897,7 @@ (match_dup 1))] "" { - struct address_info info; - decompose_mem_address (&info, operands[1]); - if (info.base != NULL && REG_P (*info.base) - && REGNO_PTR_FRAME_P (REGNO (*info.base))) + if (nvptx_mem_local_p (operands[1])) { output_asm_insn ("{", NULL); output_asm_insn ("\\t" ".reg%t0" "\\t" "%%val;", operands); @@ -1924,10 +1930,7 @@ (match_dup 1))] "<MODE>mode == SImode || TARGET_SM35" { - struct address_info info; - decompose_mem_address (&info, operands[1]); - if (info.base != NULL && REG_P (*info.base) - && REGNO_PTR_FRAME_P (REGNO (*info.base))) + if (nvptx_mem_local_p (operands[1])) { output_asm_insn ("{", NULL); output_asm_insn ("\\t" ".reg.b%T0" "\\t" "%%val;", operands); diff --git a/gcc/testsuite/gcc.target/nvptx/softstack-2.c b/gcc/testsuite/gcc.target/nvptx/softstack-2.c new file mode 100644 index 0000000..cccfda9 --- /dev/null +++ b/gcc/testsuite/gcc.target/nvptx/softstack-2.c @@ -0,0 +1,11 @@ +/* { dg-options "-O2 -msoft-stack" } */ + +int +f (void) +{ + int a = 0; + return __sync_lock_test_and_set (&a, 1); +} + +/* { dg-final { scan-assembler-times "atom.exch" 1 } } */ + diff --git a/gcc/testsuite/gcc.target/nvptx/uniform-simt-1.c b/gcc/testsuite/gcc.target/nvptx/uniform-simt-1.c new file mode 100644 index 0000000..1bc0ada --- /dev/null +++ b/gcc/testsuite/gcc.target/nvptx/uniform-simt-1.c @@ -0,0 +1,18 @@ +/* { dg-options "-O2 -muniform-simt" } */ + +enum memmodel +{ + MEMMODEL_RELAXED = 0, +}; + +int a = 0; + +int +f (void) +{ + int expected = 1; + return __atomic_compare_exchange_n (&a, &expected, 0, 0, MEMMODEL_RELAXED, + MEMMODEL_RELAXED); +} + +/* { dg-final { scan-assembler-times "@%r\[0-9\]*\tatom.global.cas" 1 } } */ |