From c408512e1f7ca07e07794dc13fd6dfd9d2d7e998 Mon Sep 17 00:00:00 2001 From: Julian Brown Date: Tue, 2 Mar 2021 04:20:13 -0800 Subject: amdgcn: Enable OpenACC worker partitioning for AMD GCN gcc/ * config/gcn/gcn.c (gcn_init_builtins): Override decls for BUILT_IN_GOACC_SINGLE_START, BUILT_IN_GOACC_SINGLE_COPY_START, BUILT_IN_GOACC_SINGLE_COPY_END and BUILT_IN_GOACC_BARRIER. (gcn_goacc_validate_dims): Turn on worker partitioning unconditionally. (gcn_fork_join): Update comment. * config/gcn/gcn.opt (flag_worker_partitioning): Remove. (macc_experimental_workers): Remove unused option. libgomp/ * plugin/plugin-gcn.c (gcn_exec): Change default number of workers to 16. * testsuite/libgomp.oacc-c-c++-common/acc_prof-kernels-1.c [acc_device_radeon]: Update. * testsuite/libgomp.oacc-c-c++-common/loop-dim-default.c [ACC_DEVICE_TYPE_radeon]: Likewise. * testsuite/libgomp.oacc-c-c++-common/parallel-dims.c [acc_device_radeon]: Likewise. * testsuite/libgomp.oacc-c-c++-common/routine-wv-2.c [ACC_DEVICE_TYPE_radeon]: Likewise. * testsuite/libgomp.oacc-fortran/optional-reduction.f90: XFAIL for 'openacc_radeon_accel_selected' and '-O0'. * testsuite/libgomp.oacc-fortran/reduction-7.f90: Likewise. Co-Authored-By: Kwok Cheung Yeung Co-Authored-By: Thomas Schwinge --- gcc/config/gcn/gcn.c | 15 +++------------ gcc/config/gcn/gcn.opt | 5 ----- 2 files changed, 3 insertions(+), 17 deletions(-) (limited to 'gcc') diff --git a/gcc/config/gcn/gcn.c b/gcc/config/gcn/gcn.c index 87af5d1..9df2827 100644 --- a/gcc/config/gcn/gcn.c +++ b/gcc/config/gcn/gcn.c @@ -3712,8 +3712,6 @@ gcn_init_builtins (void) TREE_NOTHROW (gcn_builtin_decls[i]) = 1; } -/* FIXME: remove the ifdef once OpenACC support is merged upstream. */ -#ifdef BUILT_IN_GOACC_SINGLE_START /* These builtins need to take/return an LDS pointer: override the generic versions here. */ @@ -3730,7 +3728,6 @@ gcn_init_builtins (void) set_builtin_decl (BUILT_IN_GOACC_BARRIER, gcn_builtin_decls[GCN_BUILTIN_ACC_BARRIER], false); -#endif } /* Implement TARGET_INIT_LIBFUNCS. */ @@ -5019,11 +5016,7 @@ gcn_goacc_validate_dims (tree decl, int dims[], int fn_level, unsigned /*used*/) { bool changed = false; - - /* FIXME: remove -facc-experimental-workers when they're ready. */ - int max_workers = flag_worker_partitioning ? 16 : 1; - - gcc_assert (!flag_worker_partitioning); + const int max_workers = 16; /* The vector size must appear to be 64, to the user, unless this is a SEQ routine. The real, internal value is always 1, which means use @@ -5060,8 +5053,7 @@ gcn_goacc_validate_dims (tree decl, int dims[], int fn_level, { dims[GOMP_DIM_VECTOR] = GCN_DEFAULT_VECTORS; if (dims[GOMP_DIM_WORKER] < 0) - dims[GOMP_DIM_WORKER] = (flag_worker_partitioning - ? GCN_DEFAULT_WORKERS : 1); + dims[GOMP_DIM_WORKER] = GCN_DEFAULT_WORKERS; if (dims[GOMP_DIM_GANG] < 0) dims[GOMP_DIM_GANG] = GCN_DEFAULT_GANGS; changed = true; @@ -5126,8 +5118,7 @@ static bool gcn_fork_join (gcall *ARG_UNUSED (call), const int *ARG_UNUSED (dims), bool ARG_UNUSED (is_fork)) { - /* GCN does not use the fork/join concept invented for NVPTX. - Instead we use standard autovectorization. */ + /* GCN does not need to expand fork/join markers at the RTL level. */ return false; } diff --git a/gcc/config/gcn/gcn.opt b/gcc/config/gcn/gcn.opt index b2b10b0..6faacca 100644 --- a/gcc/config/gcn/gcn.opt +++ b/gcc/config/gcn/gcn.opt @@ -62,11 +62,6 @@ bool flag_bypass_init_error = false mbypass-init-error Target RejectNegative Var(flag_bypass_init_error) -bool flag_worker_partitioning = false - -macc-experimental-workers -Target Var(flag_worker_partitioning) Init(0) - int stack_size_opt = -1 mstack-size= -- cgit v1.1