aboutsummaryrefslogtreecommitdiff
path: root/libgomp/target.c
diff options
context:
space:
mode:
authorJakub Jelinek <jakub@redhat.com>2021-11-12 12:41:22 +0100
committerJakub Jelinek <jakub@redhat.com>2021-11-12 12:41:22 +0100
commit7d6da11fce054b25b50d0dec7f8d49cf22852680 (patch)
treea52c163e2c2d9eb14ec2cc43f59029673863cc12 /libgomp/target.c
parent5f516a6a5d7ecce48a86d01fed1aeb4fc4ccc534 (diff)
downloadgcc-7d6da11fce054b25b50d0dec7f8d49cf22852680.zip
gcc-7d6da11fce054b25b50d0dec7f8d49cf22852680.tar.gz
gcc-7d6da11fce054b25b50d0dec7f8d49cf22852680.tar.bz2
openmp: Honor OpenMP 5.1 num_teams lower bound
The following patch implements what I've been talking about earlier, honor that for explicit num_teams clause we create at least the lower-bound (if not specified, upper-bound) teams in the league. For host fallback, it still means we only have one thread doing all the teams, sequentially one after another. For PTX and GCN, I think the new teams-2.c test and maybe teams-4.c too will or might fail. For these offloads, I think it is ok to remove symbols no longer used from libgomp.a. If num_teams_lower is bigger than the provided num_blocks or num_workgroups, we should arrange for gomp_num_teams_var to be num_teams_lower - 1, stop using the %ctaid.x or __builtin_gcn_dim_pos (0) for omp_get_team_num () and instead use for it some .shared var that GOMP_teams4 initializes to %ctaid.x or __builtin_gcn_dim_pos (0) when first and for !first increment that by num_blocks or num_workgroups each time and only return false when we are above num_teams_lower. Any help with actually implementing this for the 2 architectures highly appreciated. 2021-11-12 Jakub Jelinek <jakub@redhat.com> gcc/ * omp-builtins.def (BUILT_IN_GOMP_TEAMS): Remove. (BUILT_IN_GOMP_TEAMS4): New. * builtin-types.def (BT_FN_VOID_UINT_UINT): Remove. (BT_FN_BOOL_UINT_UINT_UINT_BOOL): New. * omp-low.c (lower_omp_teams): Use GOMP_teams4 instead of GOMP_teams, pass to it also num_teams lower-bound expression or a dup of upper-bound if it is missing and a flag whether it is the first call or not. gcc/fortran/ * types.def (BT_FN_VOID_UINT_UINT): Remove. (BT_FN_BOOL_UINT_UINT_UINT_BOOL): New. libgomp/ * libgomp_g.h (GOMP_teams4): Declare. * libgomp.map (GOMP_5.1): Export GOMP_teams4. * target.c (GOMP_teams4): New function. * config/nvptx/target.c (GOMP_teams): Remove. (GOMP_teams4): New function. * config/gcn/target.c (GOMP_teams): Remove. (GOMP_teams4): New function. * testsuite/libgomp.c/teams-4.c (main): Expect exactly 2 teams instead of <= 2. * testsuite/libgomp.c-c++-common/teams-2.c: New test.
Diffstat (limited to 'libgomp/target.c')
-rw-r--r--libgomp/target.c26
1 files changed, 26 insertions, 0 deletions
diff --git a/libgomp/target.c b/libgomp/target.c
index 196dba4..ecf09f9 100644
--- a/libgomp/target.c
+++ b/libgomp/target.c
@@ -3088,6 +3088,32 @@ GOMP_teams (unsigned int num_teams, unsigned int thread_limit)
(void) num_teams;
}
+bool
+GOMP_teams4 (unsigned int num_teams_low, unsigned int num_teams_high,
+ unsigned int thread_limit, bool first)
+{
+ struct gomp_thread *thr = gomp_thread ();
+ if (first)
+ {
+ if (thread_limit)
+ {
+ struct gomp_task_icv *icv = gomp_icv (true);
+ icv->thread_limit_var
+ = thread_limit > INT_MAX ? UINT_MAX : thread_limit;
+ }
+ (void) num_teams_high;
+ if (num_teams_low == 0)
+ num_teams_low = 1;
+ thr->num_teams = num_teams_low - 1;
+ thr->team_num = 0;
+ }
+ else if (thr->team_num == thr->num_teams)
+ return false;
+ else
+ ++thr->team_num;
+ return true;
+}
+
void *
omp_target_alloc (size_t size, int device_num)
{