aboutsummaryrefslogtreecommitdiff
path: root/libhsail-rt
diff options
context:
space:
mode:
Diffstat (limited to 'libhsail-rt')
-rw-r--r--libhsail-rt/ChangeLog8
-rw-r--r--libhsail-rt/include/internal/phsa-rt.h3
-rw-r--r--libhsail-rt/include/internal/workitems.h5
-rw-r--r--libhsail-rt/rt/workitems.c27
4 files changed, 31 insertions, 12 deletions
diff --git a/libhsail-rt/ChangeLog b/libhsail-rt/ChangeLog
index bf86278..53d3634 100644
--- a/libhsail-rt/ChangeLog
+++ b/libhsail-rt/ChangeLog
@@ -1,7 +1,15 @@
+2017-09-27 Pekka Jääskeläinen <pekka.jaaskelainen@parmance.com>
+
+ * include/internal/phsa-rt.h: Support for improved group segment
+ handling with a stack-like allocation scheme.
+ * include/internal/workitems.h: Likewise.
+ * rt/workitems.c: Likewise.
+
2017-09-25 Pekka Jääskeläinen <pekka.jaaskelainen@parmance.com>
* rt/workitems.c: Assume the host runtime allocates the work group
memory.
+
2017-05-03 Pekka Jääskeläinen <pekka.jaaskelainen@parmance.com>
* rt/workitems.c: Removed a leftover comment.
diff --git a/libhsail-rt/include/internal/phsa-rt.h b/libhsail-rt/include/internal/phsa-rt.h
index d47cbfcd..13349e7 100644
--- a/libhsail-rt/include/internal/phsa-rt.h
+++ b/libhsail-rt/include/internal/phsa-rt.h
@@ -42,7 +42,8 @@ typedef void (*gccbrigKernelLauncherFunc) (void *context, void *);
/* Pointer type for kernel functions produced by gccbrig from the HSAIL.
This is private from outside the device binary and only called by
the launcher. */
-typedef void (*gccbrigKernelFunc) (unsigned char *, void *, void *, void *);
+typedef void (*gccbrigKernelFunc) (unsigned char *, void *, void *, uint32_t,
+ void *);
/* Context data that is passed to the kernel function, initialized
by the runtime to the current launch information. The data is
diff --git a/libhsail-rt/include/internal/workitems.h b/libhsail-rt/include/internal/workitems.h
index e7d386d..2abfc61 100644
--- a/libhsail-rt/include/internal/workitems.h
+++ b/libhsail-rt/include/internal/workitems.h
@@ -63,6 +63,11 @@ typedef struct
to the work-group. */
void *group_base_ptr;
+ /* The offset in the group memory for the kernel local group variables.
+ To support module scope group variables, there might be need to preseve
+ room for them in the beginning of the group segment. */
+ uint32_t initial_group_offset;
+
/* Similarly to the private segment that gets space allocated for all
WIs in the work-group. */
void *private_base_ptr;
diff --git a/libhsail-rt/rt/workitems.c b/libhsail-rt/rt/workitems.c
index ed1185a..b24fc10 100644
--- a/libhsail-rt/rt/workitems.c
+++ b/libhsail-rt/rt/workitems.c
@@ -113,7 +113,7 @@ phsa_work_item_thread (int arg0, int arg1)
&& wi->z < __hsail_currentworkgroupsize (2, wi))
{
l_data->kernel (l_data->kernarg_addr, wi, wg->group_base_ptr,
- wg->private_base_ptr);
+ wg->initial_group_offset, wg->private_base_ptr);
#ifdef DEBUG_PHSA_RT
printf ("done.\n");
#endif
@@ -221,7 +221,8 @@ phsa_work_item_thread (int arg0, int arg1)
static void
phsa_execute_wi_gang (PHSAKernelLaunchData *context, void *group_base_ptr,
- size_t wg_size_x, size_t wg_size_y, size_t wg_size_z)
+ uint32_t group_local_offset, size_t wg_size_x,
+ size_t wg_size_y, size_t wg_size_z)
{
PHSAWorkItem *wi_threads = NULL;
PHSAWorkGroup wg;
@@ -247,6 +248,7 @@ phsa_execute_wi_gang (PHSAKernelLaunchData *context, void *group_base_ptr,
wg.alloca_stack_p = wg.private_segment_total_size;
wg.alloca_frame_p = wg.alloca_stack_p;
+ wg.initial_group_offset = group_local_offset;
#ifdef EXECUTE_WGS_BACKWARDS
wg.x = context->wg_max_x - 1;
@@ -313,7 +315,8 @@ phsa_execute_wi_gang (PHSAKernelLaunchData *context, void *group_base_ptr,
them execute all the WGs, including a potential partial WG. */
static void
-phsa_spawn_work_items (PHSAKernelLaunchData *context, void *group_base_ptr)
+phsa_spawn_work_items (PHSAKernelLaunchData *context, void *group_base_ptr,
+ uint32_t group_local_offset)
{
hsa_kernel_dispatch_packet_t *dp = context->dp;
size_t x, y, z;
@@ -361,8 +364,8 @@ phsa_spawn_work_items (PHSAKernelLaunchData *context, void *group_base_ptr)
dp->grid_size_y, dp->grid_size_z);
#endif
- phsa_execute_wi_gang (context, group_base_ptr, sat_wg_size_x, sat_wg_size_y,
- sat_wg_size_z);
+ phsa_execute_wi_gang (context, group_base_ptr, group_local_offset,
+ sat_wg_size_x, sat_wg_size_y, sat_wg_size_z);
}
#endif
@@ -374,7 +377,8 @@ phsa_spawn_work_items (PHSAKernelLaunchData *context, void *group_base_ptr)
execute massive numbers of work-items in a non-SPMD machine than fibers
(easily 100x faster). */
static void
-phsa_execute_work_groups (PHSAKernelLaunchData *context, void *group_base_ptr)
+phsa_execute_work_groups (PHSAKernelLaunchData *context, void *group_base_ptr,
+ uint32_t group_local_offset)
{
hsa_kernel_dispatch_packet_t *dp = context->dp;
size_t x, y, z, wg_x, wg_y, wg_z;
@@ -462,7 +466,7 @@ phsa_execute_work_groups (PHSAKernelLaunchData *context, void *group_base_ptr)
wi.wg->z = wg_z;
context->kernel (context->kernarg_addr, &wi, group_base_ptr,
- private_base_ptr);
+ group_local_offset, private_base_ptr);
#if defined (BENCHMARK_PHSA_RT)
wg_count++;
@@ -527,19 +531,20 @@ phsa_execute_work_groups (PHSAKernelLaunchData *context, void *group_base_ptr)
void
__hsail_launch_kernel (gccbrigKernelFunc kernel, PHSAKernelLaunchData *context,
- void *group_base_ptr)
+ void *group_base_ptr, uint32_t group_local_offset)
{
context->kernel = kernel;
- phsa_spawn_work_items (context, group_base_ptr);
+ phsa_spawn_work_items (context, group_base_ptr, group_local_offset);
}
#endif
void
__hsail_launch_wg_function (gccbrigKernelFunc kernel,
- PHSAKernelLaunchData *context, void *group_base_ptr)
+ PHSAKernelLaunchData *context, void *group_base_ptr,
+ uint32_t group_local_offset)
{
context->kernel = kernel;
- phsa_execute_work_groups (context, group_base_ptr);
+ phsa_execute_work_groups (context, group_base_ptr, group_local_offset);
}
uint32_t