aboutsummaryrefslogtreecommitdiff
path: root/gcc
diff options
context:
space:
mode:
authorChung-Lin Tang <cltang@baylibre.com>2025-05-02 00:33:07 +0000
committerSandra Loosemore <sloosemore@baylibre.com>2025-05-15 20:25:51 +0000
commita6682e7af891700ae67e7d0f5d3fcc462eb44609 (patch)
tree61fd0c61939ebbd3a5396fbd9b865f2f529abf61 /gcc
parent5fd60a678116773e99d5fd2d64a118f837e5d6f0 (diff)
downloadgcc-a6682e7af891700ae67e7d0f5d3fcc462eb44609.zip
gcc-a6682e7af891700ae67e7d0f5d3fcc462eb44609.tar.gz
gcc-a6682e7af891700ae67e7d0f5d3fcc462eb44609.tar.bz2
OpenACC: array reductions bug fixes
This is a merge of the v4 to v5 diff patch from: https://gcc.gnu.org/pipermail/gcc-patches/2025-March/679682.html This patch fixes issues found for NVPTX sm_70 testing, and another issue related to copying to reduction buffer for worker/vector mode. gcc/ChangeLog: * config/gcn/gcn-tree.cc (gcn_goacc_reduction_setup): Fix array case copy source into reduction buffer. * config/nvptx/nvptx.cc (nvptx_expand_shared_addr): Move default size init setting place. (enum nvptx_builtins): Add NVPTX_BUILTIN_BAR_WARPSYNC. (nvptx_init_builtins): Add DEF() of nvptx_builtin_bar_warpsync. (nvptx_expand_builtin): Expand NVPTX_BUILTIN_BAR_WARPSYNC. (nvptx_goacc_reduction_setup): Fix array case copy source into reduction buffer. (nvptx_goacc_reduction_fini): Add bar.warpsync for at end of vector-mode reductions for sm_70 and above. libgomp/ChangeLog: * testsuite/libgomp.oacc-c-c++-common/reduction-arrays-2.c: Adjust test. * testsuite/libgomp.oacc-c-c++-common/reduction-arrays-3.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/reduction-arrays-4.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/reduction-arrays-5.c: Likewise.
Diffstat (limited to 'gcc')
-rw-r--r--gcc/config/gcn/gcn-tree.cc3
-rw-r--r--gcc/config/nvptx/nvptx.cc32
2 files changed, 26 insertions, 9 deletions
diff --git a/gcc/config/gcn/gcn-tree.cc b/gcc/config/gcn/gcn-tree.cc
index 87c4267..c3349d6 100644
--- a/gcc/config/gcn/gcn-tree.cc
+++ b/gcc/config/gcn/gcn-tree.cc
@@ -750,13 +750,14 @@ gcn_goacc_reduction_setup (gcall *call)
tree offset = gimple_call_arg (call, 5);
if (array_p)
{
+ tree copy_src = !integer_zerop (ref_to_res) ? ref_to_res : array_addr;
tree decl = gcn_goacc_get_worker_array_reduction_buffer
(array_type, array_max_idx, &seq);
tree ptr = make_ssa_name (TREE_TYPE (array_addr));
gimplify_assign (ptr, build_fold_addr_expr (decl), &seq);
/* Store incoming value to worker reduction buffer. */
- oacc_build_array_copy (ptr, array_addr, array_max_idx, &seq);
+ oacc_build_array_copy (ptr, copy_src, array_max_idx, &seq);
}
else
{
diff --git a/gcc/config/nvptx/nvptx.cc b/gcc/config/nvptx/nvptx.cc
index ba40a84..115d34f 100644
--- a/gcc/config/nvptx/nvptx.cc
+++ b/gcc/config/nvptx/nvptx.cc
@@ -6516,16 +6516,16 @@ nvptx_expand_shared_addr (tree exp, rtx target,
if (TREE_CONSTANT (size_expr))
size = TREE_INT_CST_LOW (size_expr);
+ /* Default size for unknown size expression. */
+ if (size == 0)
+ size = 256;
+
if (vector)
{
offload_attrs oa;
populate_offload_attrs (&oa);
- /* Default size for unknown size expression. */
- if (size == 0)
- size = 256;
-
unsigned int psize = ROUND_UP (size + offset, align);
unsigned int pnum = nvptx_mach_max_workers ();
vector_red_partition = MAX (vector_red_partition, psize);
@@ -6621,6 +6621,7 @@ enum nvptx_builtins
NVPTX_BUILTIN_BAR_RED_AND,
NVPTX_BUILTIN_BAR_RED_OR,
NVPTX_BUILTIN_BAR_RED_POPC,
+ NVPTX_BUILTIN_BAR_WARPSYNC,
NVPTX_BUILTIN_BREV,
NVPTX_BUILTIN_BREVLL,
NVPTX_BUILTIN_COND_UNI,
@@ -6753,6 +6754,8 @@ nvptx_init_builtins (void)
DEF (BAR_RED_POPC, "bar_red_popc",
(UINT, UINT, UINT, UINT, UINT, NULL_TREE));
+ DEF (BAR_WARPSYNC, "bar_warpsync", (VOID, VOID, NULL_TREE));
+
DEF (BREV, "brev", (UINT, UINT, NULL_TREE));
DEF (BREVLL, "brevll", (LLUINT, LLUINT, NULL_TREE));
@@ -6803,6 +6806,10 @@ nvptx_expand_builtin (tree exp, rtx target, rtx ARG_UNUSED (subtarget),
case NVPTX_BUILTIN_BAR_RED_POPC:
return nvptx_expand_bar_red (exp, target, mode, ignore);
+ case NVPTX_BUILTIN_BAR_WARPSYNC:
+ emit_insn (gen_nvptx_warpsync ());
+ return NULL_RTX;
+
case NVPTX_BUILTIN_BREV:
case NVPTX_BUILTIN_BREVLL:
return nvptx_expand_brev (exp, target, mode, ignore);
@@ -7774,11 +7781,11 @@ nvptx_goacc_reduction_setup (gcall *call, offload_attrs *oa)
push_gimplify_context (true);
+ /* Copy the receiver object. */
+ tree ref_to_res = gimple_call_arg (call, 1);
+
if (level != GOMP_DIM_GANG)
{
- /* Copy the receiver object. */
- tree ref_to_res = gimple_call_arg (call, 1);
-
if (!integer_zerop (ref_to_res) && !array_p)
{
ref_to_res = nvptx_adjust_reduction_type (ref_to_res,
@@ -7798,13 +7805,14 @@ nvptx_goacc_reduction_setup (gcall *call, offload_attrs *oa)
tree call, ptr;
if (array_p)
{
+ tree copy_src = !integer_zerop (ref_to_res) ? ref_to_res : array_addr;
tree array_elem_type = TREE_TYPE (array_type);
call = nvptx_get_shared_red_addr (array_elem_type, array_max_idx,
offset, level == GOMP_DIM_VECTOR);
ptr = make_ssa_name (TREE_TYPE (call));
gimplify_assign (ptr, call, &seq);
oacc_build_array_copy (fold_convert (TREE_TYPE (array_addr), ptr),
- array_addr, array_max_idx, &seq);
+ copy_src, array_max_idx, &seq);
}
else
{
@@ -8038,6 +8046,14 @@ nvptx_goacc_reduction_fini (gcall *call, offload_attrs *oa)
else
r = nvptx_reduction_update (gimple_location (call), &gsi,
accum, var, op, level);
+
+ if (TARGET_SM70 && level == GOMP_DIM_VECTOR)
+ {
+ /* After SM70, with Independent Thread Scheduling introduced,
+ place a warpsync after vector-mode update of accum buffer. */
+ tree fn = nvptx_builtin_decl (NVPTX_BUILTIN_BAR_WARPSYNC, true);
+ gimple_seq_add_stmt (&seq, gimple_build_call (fn, 0));
+ }
}
}