diff options
author | Chung-Lin Tang <cltang@baylibre.com> | 2025-05-02 00:33:07 +0000 |
---|---|---|
committer | Sandra Loosemore <sloosemore@baylibre.com> | 2025-05-15 20:25:51 +0000 |
commit | a6682e7af891700ae67e7d0f5d3fcc462eb44609 (patch) | |
tree | 61fd0c61939ebbd3a5396fbd9b865f2f529abf61 /gcc | |
parent | 5fd60a678116773e99d5fd2d64a118f837e5d6f0 (diff) | |
download | gcc-a6682e7af891700ae67e7d0f5d3fcc462eb44609.zip gcc-a6682e7af891700ae67e7d0f5d3fcc462eb44609.tar.gz gcc-a6682e7af891700ae67e7d0f5d3fcc462eb44609.tar.bz2 |
OpenACC: array reductions bug fixes
This is a merge of the v4 to v5 diff patch from:
https://gcc.gnu.org/pipermail/gcc-patches/2025-March/679682.html
This patch fixes issues found for NVPTX sm_70 testing, and another issue
related to copying to reduction buffer for worker/vector mode.
gcc/ChangeLog:
* config/gcn/gcn-tree.cc (gcn_goacc_reduction_setup): Fix array case
copy source into reduction buffer.
* config/nvptx/nvptx.cc (nvptx_expand_shared_addr): Move default size
init setting place.
(enum nvptx_builtins): Add NVPTX_BUILTIN_BAR_WARPSYNC.
(nvptx_init_builtins): Add DEF() of nvptx_builtin_bar_warpsync.
(nvptx_expand_builtin): Expand NVPTX_BUILTIN_BAR_WARPSYNC.
(nvptx_goacc_reduction_setup): Fix array case copy source into reduction
buffer.
(nvptx_goacc_reduction_fini): Add bar.warpsync for at end of vector-mode
reductions for sm_70 and above.
libgomp/ChangeLog:
* testsuite/libgomp.oacc-c-c++-common/reduction-arrays-2.c: Adjust test.
* testsuite/libgomp.oacc-c-c++-common/reduction-arrays-3.c: Likewise.
* testsuite/libgomp.oacc-c-c++-common/reduction-arrays-4.c: Likewise.
* testsuite/libgomp.oacc-c-c++-common/reduction-arrays-5.c: Likewise.
Diffstat (limited to 'gcc')
-rw-r--r-- | gcc/config/gcn/gcn-tree.cc | 3 | ||||
-rw-r--r-- | gcc/config/nvptx/nvptx.cc | 32 |
2 files changed, 26 insertions, 9 deletions
diff --git a/gcc/config/gcn/gcn-tree.cc b/gcc/config/gcn/gcn-tree.cc index 87c4267..c3349d6 100644 --- a/gcc/config/gcn/gcn-tree.cc +++ b/gcc/config/gcn/gcn-tree.cc @@ -750,13 +750,14 @@ gcn_goacc_reduction_setup (gcall *call) tree offset = gimple_call_arg (call, 5); if (array_p) { + tree copy_src = !integer_zerop (ref_to_res) ? ref_to_res : array_addr; tree decl = gcn_goacc_get_worker_array_reduction_buffer (array_type, array_max_idx, &seq); tree ptr = make_ssa_name (TREE_TYPE (array_addr)); gimplify_assign (ptr, build_fold_addr_expr (decl), &seq); /* Store incoming value to worker reduction buffer. */ - oacc_build_array_copy (ptr, array_addr, array_max_idx, &seq); + oacc_build_array_copy (ptr, copy_src, array_max_idx, &seq); } else { diff --git a/gcc/config/nvptx/nvptx.cc b/gcc/config/nvptx/nvptx.cc index ba40a84..115d34f 100644 --- a/gcc/config/nvptx/nvptx.cc +++ b/gcc/config/nvptx/nvptx.cc @@ -6516,16 +6516,16 @@ nvptx_expand_shared_addr (tree exp, rtx target, if (TREE_CONSTANT (size_expr)) size = TREE_INT_CST_LOW (size_expr); + /* Default size for unknown size expression. */ + if (size == 0) + size = 256; + if (vector) { offload_attrs oa; populate_offload_attrs (&oa); - /* Default size for unknown size expression. */ - if (size == 0) - size = 256; - unsigned int psize = ROUND_UP (size + offset, align); unsigned int pnum = nvptx_mach_max_workers (); vector_red_partition = MAX (vector_red_partition, psize); @@ -6621,6 +6621,7 @@ enum nvptx_builtins NVPTX_BUILTIN_BAR_RED_AND, NVPTX_BUILTIN_BAR_RED_OR, NVPTX_BUILTIN_BAR_RED_POPC, + NVPTX_BUILTIN_BAR_WARPSYNC, NVPTX_BUILTIN_BREV, NVPTX_BUILTIN_BREVLL, NVPTX_BUILTIN_COND_UNI, @@ -6753,6 +6754,8 @@ nvptx_init_builtins (void) DEF (BAR_RED_POPC, "bar_red_popc", (UINT, UINT, UINT, UINT, UINT, NULL_TREE)); + DEF (BAR_WARPSYNC, "bar_warpsync", (VOID, VOID, NULL_TREE)); + DEF (BREV, "brev", (UINT, UINT, NULL_TREE)); DEF (BREVLL, "brevll", (LLUINT, LLUINT, NULL_TREE)); @@ -6803,6 +6806,10 @@ nvptx_expand_builtin (tree exp, rtx target, rtx ARG_UNUSED (subtarget), case NVPTX_BUILTIN_BAR_RED_POPC: return nvptx_expand_bar_red (exp, target, mode, ignore); + case NVPTX_BUILTIN_BAR_WARPSYNC: + emit_insn (gen_nvptx_warpsync ()); + return NULL_RTX; + case NVPTX_BUILTIN_BREV: case NVPTX_BUILTIN_BREVLL: return nvptx_expand_brev (exp, target, mode, ignore); @@ -7774,11 +7781,11 @@ nvptx_goacc_reduction_setup (gcall *call, offload_attrs *oa) push_gimplify_context (true); + /* Copy the receiver object. */ + tree ref_to_res = gimple_call_arg (call, 1); + if (level != GOMP_DIM_GANG) { - /* Copy the receiver object. */ - tree ref_to_res = gimple_call_arg (call, 1); - if (!integer_zerop (ref_to_res) && !array_p) { ref_to_res = nvptx_adjust_reduction_type (ref_to_res, @@ -7798,13 +7805,14 @@ nvptx_goacc_reduction_setup (gcall *call, offload_attrs *oa) tree call, ptr; if (array_p) { + tree copy_src = !integer_zerop (ref_to_res) ? ref_to_res : array_addr; tree array_elem_type = TREE_TYPE (array_type); call = nvptx_get_shared_red_addr (array_elem_type, array_max_idx, offset, level == GOMP_DIM_VECTOR); ptr = make_ssa_name (TREE_TYPE (call)); gimplify_assign (ptr, call, &seq); oacc_build_array_copy (fold_convert (TREE_TYPE (array_addr), ptr), - array_addr, array_max_idx, &seq); + copy_src, array_max_idx, &seq); } else { @@ -8038,6 +8046,14 @@ nvptx_goacc_reduction_fini (gcall *call, offload_attrs *oa) else r = nvptx_reduction_update (gimple_location (call), &gsi, accum, var, op, level); + + if (TARGET_SM70 && level == GOMP_DIM_VECTOR) + { + /* After SM70, with Independent Thread Scheduling introduced, + place a warpsync after vector-mode update of accum buffer. */ + tree fn = nvptx_builtin_decl (NVPTX_BUILTIN_BAR_WARPSYNC, true); + gimple_seq_add_stmt (&seq, gimple_build_call (fn, 0)); + } } } |