aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorThomas Schwinge <tschwinge@baylibre.com>2024-07-15 11:19:28 +0200
committerThomas Schwinge <tschwinge@baylibre.com>2024-07-19 21:55:47 +0200
commitf9119948cedefa07a667e8beacbd5317a4d8ec1b (patch)
treed57aeda6346ac8f3ee769a42f04b573540f1342a
parent385004846dc6437bab9b3b7d8b071ce1c3418384 (diff)
downloadgcc-f9119948cedefa07a667e8beacbd5317a4d8ec1b.zip
gcc-f9119948cedefa07a667e8beacbd5317a4d8ec1b.tar.gz
gcc-f9119948cedefa07a667e8beacbd5317a4d8ec1b.tar.bz2
GCN: Honor OpenMP 5.1 'num_teams' lower bound
Corresponding to commit 9fa72756d90e0d9edadf6e6f5f56476029925788 "libgomp, nvptx: Honor OpenMP 5.1 num_teams lower bound", these are the GCN offloading changes to fix: PASS: libgomp.c/../libgomp.c-c++-common/teams-2.c (test for excess errors) [-FAIL:-]{+PASS:+} libgomp.c/../libgomp.c-c++-common/teams-2.c execution test PASS: libgomp.c++/../libgomp.c-c++-common/teams-2.c (test for excess errors) [-FAIL:-]{+PASS:+} libgomp.c++/../libgomp.c-c++-common/teams-2.c execution test ..., and omptests' 't-critical' test case. I've cross checked that those test cases are the ones that regress for nvptx offloading, if I locally revert the "libgomp, nvptx: Honor OpenMP 5.1 num_teams lower bound" changes. libgomp/ * config/gcn/libgomp-gcn.h (GOMP_TEAM_NUM): Inject. * config/gcn/target.c (GOMP_teams4): Handle. * config/gcn/team.c (gomp_gcn_enter_kernel): Initialize. * config/gcn/teams.c (omp_get_team_num): Adjust.
-rw-r--r--libgomp/config/gcn/libgomp-gcn.h9
-rw-r--r--libgomp/config/gcn/target.c29
-rw-r--r--libgomp/config/gcn/team.c3
-rw-r--r--libgomp/config/gcn/teams.c5
4 files changed, 31 insertions, 15 deletions
diff --git a/libgomp/config/gcn/libgomp-gcn.h b/libgomp/config/gcn/libgomp-gcn.h
index e94f0c7..48a3741 100644
--- a/libgomp/config/gcn/libgomp-gcn.h
+++ b/libgomp/config/gcn/libgomp-gcn.h
@@ -34,10 +34,11 @@
#define DEFAULT_TEAM_ARENA_SIZE (64*1024)
/* These define the LDS location of data needed by OpenMP. */
-#define TEAM_ARENA_START 16 /* LDS offset of free pointer. */
-#define TEAM_ARENA_FREE 24 /* LDS offset of free pointer. */
-#define TEAM_ARENA_END 32 /* LDS offset of end pointer. */
-#define GCN_LOWLAT_HEAP 40 /* LDS offset of the OpenMP low-latency heap. */
+#define GOMP_TEAM_NUM 16
+#define TEAM_ARENA_START 24 /* LDS offset of free pointer. */
+#define TEAM_ARENA_FREE 32 /* LDS offset of free pointer. */
+#define TEAM_ARENA_END 40 /* LDS offset of end pointer. */
+#define GCN_LOWLAT_HEAP 48 /* LDS offset of the OpenMP low-latency heap. */
struct heap
{
diff --git a/libgomp/config/gcn/target.c b/libgomp/config/gcn/target.c
index 1d4a23c..e57d2e5 100644
--- a/libgomp/config/gcn/target.c
+++ b/libgomp/config/gcn/target.c
@@ -33,26 +33,37 @@ bool
GOMP_teams4 (unsigned int num_teams_lower, unsigned int num_teams_upper,
unsigned int thread_limit, bool first)
{
+ int __lds *gomp_team_num = (int __lds *) GOMP_TEAM_NUM;
+ unsigned int num_workgroups = __builtin_gcn_dim_size (0);
if (!first)
- return false;
+ {
+ unsigned int team_num;
+ if (num_workgroups > gomp_num_teams_var)
+ return false;
+ team_num = *gomp_team_num;
+ if (team_num > gomp_num_teams_var - num_workgroups)
+ return false;
+ *gomp_team_num = team_num + num_workgroups;
+ return true;
+ }
if (thread_limit)
{
struct gomp_task_icv *icv = gomp_icv (true);
icv->thread_limit_var
= thread_limit > INT_MAX ? UINT_MAX : thread_limit;
}
- unsigned int num_workgroups, workgroup_id;
- num_workgroups = __builtin_gcn_dim_size (0);
- workgroup_id = __builtin_gcn_dim_pos (0);
- /* FIXME: If num_teams_lower > num_workgroups, we want to loop
- multiple times at least for some workgroups. */
- (void) num_teams_lower;
- if (!num_teams_upper || num_teams_upper >= num_workgroups)
+ if (!num_teams_upper)
num_teams_upper = ((GOMP_ADDITIONAL_ICVS.nteams > 0
&& num_workgroups > GOMP_ADDITIONAL_ICVS.nteams)
? GOMP_ADDITIONAL_ICVS.nteams : num_workgroups);
- else if (workgroup_id >= num_teams_upper)
+ else if (num_workgroups < num_teams_lower)
+ num_teams_upper = num_teams_lower;
+ else if (num_workgroups < num_teams_upper)
+ num_teams_upper = num_workgroups;
+ unsigned int workgroup_id = __builtin_gcn_dim_pos (0);
+ if (workgroup_id >= num_teams_upper)
return false;
+ *gomp_team_num = workgroup_id;
gomp_num_teams_var = num_teams_upper - 1;
return true;
}
diff --git a/libgomp/config/gcn/team.c b/libgomp/config/gcn/team.c
index bd3df44..aa68b3a 100644
--- a/libgomp/config/gcn/team.c
+++ b/libgomp/config/gcn/team.c
@@ -68,6 +68,9 @@ gomp_gcn_enter_kernel (void)
/* Starting additional threads is not supported. */
gomp_global_icv.dyn_var = true;
+ int __lds *gomp_team_num = (int __lds *) GOMP_TEAM_NUM;
+ *gomp_team_num = 0;
+
/* Initialize the team arena for optimized memory allocation.
The arena has been allocated on the host side, and the address
passed in via the kernargs. Each team takes a small slice of it. */
diff --git a/libgomp/config/gcn/teams.c b/libgomp/config/gcn/teams.c
index 8a91ba8..5740418 100644
--- a/libgomp/config/gcn/teams.c
+++ b/libgomp/config/gcn/teams.c
@@ -44,10 +44,11 @@ omp_get_num_teams (void)
return gomp_num_teams_var + 1;
}
-int __attribute__ ((__optimize__ ("O2")))
+int
omp_get_team_num (void)
{
- return __builtin_gcn_dim_pos (0);
+ int __lds *gomp_team_num = (int __lds *) GOMP_TEAM_NUM;
+ return *gomp_team_num;
}
ialias (omp_get_num_teams)