aboutsummaryrefslogtreecommitdiff
path: root/libhsail-rt
diff options
context:
space:
mode:
authorPekka Jääskeläinen <visit0r@gcc.gnu.org>2018-05-04 19:43:57 +0000
committerPekka Jääskeläinen <visit0r@gcc.gnu.org>2018-05-04 19:43:57 +0000
commit080dc24383a602a5a4095eb05b04100f15ba1ad4 (patch)
treea0d2a78948f718293527ac76c53bdee3303d989f /libhsail-rt
parent1e25c5a9bb7042d7bca5a4fa840666dcb16f4918 (diff)
downloadgcc-080dc24383a602a5a4095eb05b04100f15ba1ad4.zip
gcc-080dc24383a602a5a4095eb05b04100f15ba1ad4.tar.gz
gcc-080dc24383a602a5a4095eb05b04100f15ba1ad4.tar.bz2
[BRIGFE] phsa-specific optimizations
Add flag -fassume-phsa that is on by default. If -fno-assume-phsa is given, these optimizations are disabled. With this flag, gccbrig can generate GENERIC that assumes we are targeting a phsa-runtime based implementation, which allows us to expose the work-item context accesses to retrieve WI IDs etc. which helps optimizers. First optimization that takes advantage of this is to get rid of the setworkitemid calls whenever we have non-inlined calls that use IDs internally. Other optimizations added in this commit: - expand absoluteid to similar level of simplicity as workitemid. At the moment absoluteid is the best indexing ID to end up with WG vectorization. - propagate ID variables closer to their uses. This is mainly to avoid known useless casts, which confuse at least scalar evolution analysis. - use signed long long for storing IDs. Unsigned integers have defined wraparound semantics, which confuse at least scalar evolution analysis, leading to unvectorizable WI loops. - also refactor some BRIG function generation helpers to brig_function. - no point in having the wi-loop as a for-loop. It's really a do...while and SCEV can analyze it just fine still. - add consts to ptrs etc. in BRIG builtin defs. Improves optimization opportunities. - add qualifiers to generated function parameters. Const and restrict on the hidden local/private pointers, the arg buffer and the context pointer help some optimizations. From-SVN: r259957
Diffstat (limited to 'libhsail-rt')
-rw-r--r--libhsail-rt/ChangeLog7
-rw-r--r--libhsail-rt/include/internal/phsa-rt.h1
-rw-r--r--libhsail-rt/include/internal/workitems.h50
-rw-r--r--libhsail-rt/rt/workitems.c84
4 files changed, 108 insertions, 34 deletions
diff --git a/libhsail-rt/ChangeLog b/libhsail-rt/ChangeLog
index 5ab9e85..17aeb6e4 100644
--- a/libhsail-rt/ChangeLog
+++ b/libhsail-rt/ChangeLog
@@ -1,5 +1,12 @@
2018-05-04 Pekka Jääskeläinen <pekka.jaaskelainen@parmance.com>
+ * include/internal/phsa-rt.h: Whitespace cleanup.
+ * include/internal/workitems.h: Store work item ID data to easily
+ accessible locations.
+ * rt/workitems.c: Same.
+
+2018-05-04 Pekka Jääskeläinen <pekka.jaaskelainen@parmance.com>
+
* rt/workitems.c: Fix an alloca stack underflow.
2018-04-18 David Malcolm <dmalcolm@redhat.com>
diff --git a/libhsail-rt/include/internal/phsa-rt.h b/libhsail-rt/include/internal/phsa-rt.h
index d9db56c..c09f18d 100644
--- a/libhsail-rt/include/internal/phsa-rt.h
+++ b/libhsail-rt/include/internal/phsa-rt.h
@@ -54,7 +54,6 @@ typedef void (*gccbrigKernelFunc) (unsigned char *, void *, void *, uint32_t,
*/
typedef struct
{
-
/* Data set by the HSA Runtime's kernel launcher. */
hsa_kernel_dispatch_packet_t *dp;
diff --git a/libhsail-rt/include/internal/workitems.h b/libhsail-rt/include/internal/workitems.h
index 73add28..0839853f 100644
--- a/libhsail-rt/include/internal/workitems.h
+++ b/libhsail-rt/include/internal/workitems.h
@@ -45,11 +45,6 @@
typedef struct
{
- /* The group id of the currently executed WG. */
- size_t x;
- size_t y;
- size_t z;
-
/* This is 1 in case there are more work groups to execute.
If 0, the work-item threads should finish themselves. */
int more_wgs;
@@ -89,6 +84,16 @@ typedef struct
stack frame. Initialized to point outside the private segment. */
uint32_t alloca_frame_p;
+ /* The group id of the currently executed WG. This is for fiber based
+ execution. The group ids are duplicated also to the per WI context
+ struct for simplified single pointer access in the GCCBRIG produced
+ code.
+ */
+
+ uint32_t x;
+ uint32_t y;
+ uint32_t z;
+
} PHSAWorkGroup;
/* Data identifying a single work-item, passed to the work-item thread in case
@@ -96,17 +101,42 @@ typedef struct
typedef struct
{
+ /* NOTE: These members STARTing here should not be moved as they are
+ accessed directly by code emitted by BRIG FE. */
+
+ /* The local id of the current WI. */
+
+ uint32_t x;
+ uint32_t y;
+ uint32_t z;
+
+ /* The group id of the currently executed WG. */
+
+ uint32_t group_x;
+ uint32_t group_y;
+ uint32_t group_z;
+
+ /* The local size of a complete WG. */
+
+ uint32_t wg_size_x;
+ uint32_t wg_size_y;
+ uint32_t wg_size_z;
+
+ /* The local size of the current WG. */
+
+ uint32_t cur_wg_size_x;
+ uint32_t cur_wg_size_y;
+ uint32_t cur_wg_size_z;
+
+ /* NOTE: Fixed members END here. */
+
PHSAKernelLaunchData *launch_data;
/* Identifies and keeps book of the currently executed WG of the WI swarm. */
volatile PHSAWorkGroup *wg;
- /* The local id of the current WI. */
- size_t x;
- size_t y;
- size_t z;
#ifdef HAVE_FIBERS
fiber_t fiber;
#endif
-} PHSAWorkItem;
+} __attribute__((packed)) PHSAWorkItem;
#endif
diff --git a/libhsail-rt/rt/workitems.c b/libhsail-rt/rt/workitems.c
index 36c9169..c846350 100644
--- a/libhsail-rt/rt/workitems.c
+++ b/libhsail-rt/rt/workitems.c
@@ -107,11 +107,20 @@ phsa_work_item_thread (int arg0, int arg1)
the current_work_group_* is set to point to the WG executed next. */
if (!wi->wg->more_wgs)
break;
+
+ wi->group_x = wg->x;
+ wi->group_y = wg->y;
+ wi->group_z = wg->z;
+
+ wi->cur_wg_size_x = __hsail_currentworkgroupsize (0, wi);
+ wi->cur_wg_size_y = __hsail_currentworkgroupsize (1, wi);
+ wi->cur_wg_size_z = __hsail_currentworkgroupsize (2, wi);
+
#ifdef DEBUG_PHSA_RT
printf (
"Running work-item %lu/%lu/%lu for wg %lu/%lu/%lu / %lu/%lu/%lu...\n",
- wi->x, wi->y, wi->z, wg->x, wg->y, wg->z, l_data->wg_max_x,
- l_data->wg_max_y, l_data->wg_max_z);
+ wi->x, wi->y, wi->z, wi->group_x, wi->group_y, wi->group_z,
+ l_data->wg_max_x, l_data->wg_max_y, l_data->wg_max_z);
#endif
if (wi->x < __hsail_currentworkgroupsize (0, wi)
@@ -180,6 +189,13 @@ phsa_work_item_thread (int arg0, int arg1)
else
wg->x++;
#endif
+ wi->group_x = wg->x;
+ wi->group_y = wg->y;
+ wi->group_z = wg->z;
+
+ wi->cur_wg_size_x = __hsail_currentworkgroupsize (0, wi);
+ wi->cur_wg_size_y = __hsail_currentworkgroupsize (1, wi);
+ wi->cur_wg_size_z = __hsail_currentworkgroupsize (2, wi);
/* Reinitialize the work-group barrier according to the new WG's
size, which might not be the same as the previous ones, due
@@ -233,6 +249,7 @@ phsa_execute_wi_gang (PHSAKernelLaunchData *context, void *group_base_ptr,
PHSAWorkItem *wi_threads = NULL;
PHSAWorkGroup wg;
size_t flat_wi_id = 0, x, y, z, max_x, max_y, max_z;
+ uint32_t group_x, group_y, group_z;
fiber_barrier_t wg_start_barrier;
fiber_barrier_t wg_completion_barrier;
fiber_barrier_t wg_sync_barrier;
@@ -257,13 +274,13 @@ phsa_execute_wi_gang (PHSAKernelLaunchData *context, void *group_base_ptr,
wg.initial_group_offset = group_local_offset;
#ifdef EXECUTE_WGS_BACKWARDS
- wg.x = context->wg_max_x - 1;
- wg.y = context->wg_max_y - 1;
- wg.z = context->wg_max_z - 1;
+ group_x = context->wg_max_x - 1;
+ group_y = context->wg_max_y - 1;
+ group_z = context->wg_max_z - 1;
#else
- wg.x = context->wg_min_x;
- wg.y = context->wg_min_y;
- wg.z = context->wg_min_z;
+ group_x = context->wg_min_x;
+ group_y = context->wg_min_y;
+ group_z = context->wg_min_z;
#endif
fiber_barrier_init (&wg_sync_barrier, wg_size);
@@ -290,6 +307,19 @@ phsa_execute_wi_gang (PHSAKernelLaunchData *context, void *group_base_ptr,
PHSAWorkItem *wi = &wi_threads[flat_wi_id];
wi->launch_data = context;
wi->wg = &wg;
+
+ wg.x = wi->group_x = group_x;
+ wg.y = wi->group_y = group_y;
+ wg.z = wi->group_z = group_z;
+
+ wi->wg_size_x = context->dp->workgroup_size_x;
+ wi->wg_size_y = context->dp->workgroup_size_y;
+ wi->wg_size_z = context->dp->workgroup_size_z;
+
+ wi->cur_wg_size_x = __hsail_currentworkgroupsize (0, wi);
+ wi->cur_wg_size_y = __hsail_currentworkgroupsize (1, wi);
+ wi->cur_wg_size_z = __hsail_currentworkgroupsize (2, wi);
+
wi->x = x;
wi->y = y;
wi->z = z;
@@ -467,9 +497,17 @@ phsa_execute_work_groups (PHSAKernelLaunchData *context, void *group_base_ptr,
for (wg_y = context->wg_min_y; wg_y < context->wg_max_y; ++wg_y)
for (wg_x = context->wg_min_x; wg_x < context->wg_max_x; ++wg_x)
{
- wi.wg->x = wg_x;
- wi.wg->y = wg_y;
- wi.wg->z = wg_z;
+ wi.group_x = wg_x;
+ wi.group_y = wg_y;
+ wi.group_z = wg_z;
+
+ wi.wg_size_x = context->dp->workgroup_size_x;
+ wi.wg_size_y = context->dp->workgroup_size_y;
+ wi.wg_size_z = context->dp->workgroup_size_z;
+
+ wi.cur_wg_size_x = __hsail_currentworkgroupsize (0, &wi);
+ wi.cur_wg_size_y = __hsail_currentworkgroupsize (1, &wi);
+ wi.cur_wg_size_z = __hsail_currentworkgroupsize (2, &wi);
context->kernel (context->kernarg_addr, &wi, group_base_ptr,
group_local_offset, private_base_ptr);
@@ -564,15 +602,15 @@ __hsail_workitemabsid (uint32_t dim, PHSAWorkItem *context)
default:
case 0:
/* Overflow semantics in the case of WG dim > grid dim. */
- id = ((uint64_t) context->wg->x * dp->workgroup_size_x + context->x)
+ id = ((uint64_t) context->group_x * dp->workgroup_size_x + context->x)
% dp->grid_size_x;
break;
case 1:
- id = ((uint64_t) context->wg->y * dp->workgroup_size_y + context->y)
+ id = ((uint64_t) context->group_y * dp->workgroup_size_y + context->y)
% dp->grid_size_y;
break;
case 2:
- id = ((uint64_t) context->wg->z * dp->workgroup_size_z + context->z)
+ id = ((uint64_t) context->group_z * dp->workgroup_size_z + context->z)
% dp->grid_size_z;
break;
}
@@ -590,15 +628,15 @@ __hsail_workitemabsid_u64 (uint32_t dim, PHSAWorkItem *context)
default:
case 0:
/* Overflow semantics in the case of WG dim > grid dim. */
- id = ((uint64_t) context->wg->x * dp->workgroup_size_x + context->x)
+ id = ((uint64_t) context->group_x * dp->workgroup_size_x + context->x)
% dp->grid_size_x;
break;
case 1:
- id = ((uint64_t) context->wg->y * dp->workgroup_size_y + context->y)
+ id = ((uint64_t) context->group_y * dp->workgroup_size_y + context->y)
% dp->grid_size_y;
break;
case 2:
- id = ((uint64_t) context->wg->z * dp->workgroup_size_z + context->z)
+ id = ((uint64_t) context->group_z * dp->workgroup_size_z + context->z)
% dp->grid_size_z;
break;
}
@@ -738,19 +776,19 @@ __hsail_currentworkgroupsize (uint32_t dim, PHSAWorkItem *wi)
{
default:
case 0:
- if ((uint64_t) wi->wg->x < dp->grid_size_x / dp->workgroup_size_x)
+ if ((uint64_t) wi->group_x < dp->grid_size_x / dp->workgroup_size_x)
wg_size = dp->workgroup_size_x; /* Full WG. */
else
wg_size = dp->grid_size_x % dp->workgroup_size_x; /* Partial WG. */
break;
case 1:
- if ((uint64_t) wi->wg->y < dp->grid_size_y / dp->workgroup_size_y)
+ if ((uint64_t) wi->group_y < dp->grid_size_y / dp->workgroup_size_y)
wg_size = dp->workgroup_size_y; /* Full WG. */
else
wg_size = dp->grid_size_y % dp->workgroup_size_y; /* Partial WG. */
break;
case 2:
- if ((uint64_t) wi->wg->z < dp->grid_size_z / dp->workgroup_size_z)
+ if ((uint64_t) wi->group_z < dp->grid_size_z / dp->workgroup_size_z)
wg_size = dp->workgroup_size_z; /* Full WG. */
else
wg_size = dp->grid_size_z % dp->workgroup_size_z; /* Partial WG. */
@@ -798,11 +836,11 @@ __hsail_workgroupid (uint32_t dim, PHSAWorkItem *wi)
{
default:
case 0:
- return wi->wg->x;
+ return wi->group_x;
case 1:
- return wi->wg->y;
+ return wi->group_y;
case 2:
- return wi->wg->z;
+ return wi->group_z;
}
}