diff options
Diffstat (limited to 'gcc')
-rw-r--r-- | gcc/ChangeLog | 8 | ||||
-rw-r--r-- | gcc/config/nvptx/nvptx.c | 13 |
2 files changed, 21 insertions, 0 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 5495b9a..5d9c536 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,5 +1,13 @@ 2019-01-11 Tom de Vries <tdevries@suse.de> + * config/nvptx/nvptx.c (PTX_CTA_NUM_BARRIERS, PTX_PER_CTA_BARRIER) + (PTX_NUM_PER_CTA_BARRIER, PTX_FIRST_PER_WORKER_BARRIER) + (PTX_NUM_PER_WORKER_BARRIERS): Define. + (nvptx_apply_dim_limits): Prevent vector_length 64 and + num_workers 16. + +2019-01-11 Tom de Vries <tdevries@suse.de> + * config/nvptx/nvptx.c (PTX_CTA_SIZE): Move up. 2019-01-11 Jan Beulich <jbeulich@suse.com> diff --git a/gcc/config/nvptx/nvptx.c b/gcc/config/nvptx/nvptx.c index 643f5e8..b37010f 100644 --- a/gcc/config/nvptx/nvptx.c +++ b/gcc/config/nvptx/nvptx.c @@ -87,8 +87,14 @@ 2.x. */ #define PTX_CTA_SIZE 1024 +#define PTX_CTA_NUM_BARRIERS 16 #define PTX_WARP_SIZE 32 +#define PTX_PER_CTA_BARRIER 0 +#define PTX_NUM_PER_CTA_BARRIERS 1 +#define PTX_FIRST_PER_WORKER_BARRIER (PTX_NUM_PER_CTA_BARRIERS) +#define PTX_NUM_PER_WORKER_BARRIERS (PTX_CTA_NUM_BARRIERS - PTX_NUM_PER_CTA_BARRIERS) + #define PTX_DEFAULT_VECTOR_LENGTH PTX_WARP_SIZE #define PTX_MAX_VECTOR_LENGTH PTX_WARP_SIZE #define PTX_WORKER_LENGTH 32 @@ -5496,6 +5502,13 @@ nvptx_apply_dim_limits (int dims[]) if (dims[GOMP_DIM_WORKER] > 0 && dims[GOMP_DIM_VECTOR] > 0 && dims[GOMP_DIM_WORKER] * dims[GOMP_DIM_VECTOR] > PTX_CTA_SIZE) dims[GOMP_DIM_VECTOR] = PTX_WARP_SIZE; + + /* If we need a per-worker barrier ... . */ + if (dims[GOMP_DIM_WORKER] > 0 && dims[GOMP_DIM_VECTOR] > 0 + && dims[GOMP_DIM_VECTOR] > PTX_WARP_SIZE) + /* Don't use more barriers than available. */ + dims[GOMP_DIM_WORKER] = MIN (dims[GOMP_DIM_WORKER], + PTX_NUM_PER_WORKER_BARRIERS); } /* Return true if FNDECL contains calls to vector-partitionable routines. */ |