aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJulian Brown <julian@codesourcery.com>2023-09-13 13:31:48 +0000
committerSandra Loosemore <sloosemore@baylibre.com>2025-05-15 20:25:50 +0000
commit5f7678f015bb31a5aef0ea4f801a3fcff2e85050 (patch)
tree3f6b1a3d4ea2105ef4b4f8122b58a1e96e67c86c
parentfc4bf7b190ee2db44839c09518ca074c5cef8a3b (diff)
downloadgcc-5f7678f015bb31a5aef0ea4f801a3fcff2e85050.zip
gcc-5f7678f015bb31a5aef0ea4f801a3fcff2e85050.tar.gz
gcc-5f7678f015bb31a5aef0ea4f801a3fcff2e85050.tar.bz2
OpenMP: Support accelerated 2D/3D memory copies for AMD GCN [OG14-only part]
This patch only adds the bits missing from mainline: Support is also added for 1-dimensional strided accesses: these are treated as a special case of 2-dimensional transfers, where the innermost dimension is formed from the stride length (in bytes). 2023-09-19 Julian Brown <julian@codesourcery.com> libgomp/ * target.c (omp_target_memcpy_rect_worker): Add 1D strided transfer support.
-rw-r--r--libgomp/target.c31
1 files changed, 31 insertions, 0 deletions
diff --git a/libgomp/target.c b/libgomp/target.c
index 8e811bf..66bc5c1 100644
--- a/libgomp/target.c
+++ b/libgomp/target.c
@@ -5215,6 +5215,37 @@ omp_target_memcpy_rect_worker (void *dst, const void *src, size_t element_size,
if (__builtin_mul_overflow (span, strides[0], &stride))
return EINVAL;
+ if (((src_devicep && src_devicep->memcpy2d_func)
+ || (dst_devicep && dst_devicep->memcpy2d_func))
+ && (stride % element_size) == 0)
+ {
+ /* Try using memcpy2d for a 1-dimensional strided access. Here we
+ treat the transfer as a 2-dimensional array, where the inner
+ dimension is calculated to be (stride in bytes) / element_size.
+ Indices/offsets are adjusted so the source/destination pointers
+ point to the first element to be transferred, to make the sums
+ easier. (There are some configurations of 2D strided accesses
+ that memcpy3d could handle similarly, but those are probably rare
+ and are unimplemented for now.) */
+
+ /* If stride is element size, this is a contiguous transfer and
+ should have been handled above. */
+ assert (stride > element_size);
+
+ int dst_id = dst_devicep ? dst_devicep->target_id : -1;
+ int src_id = src_devicep ? src_devicep->target_id : -1;
+ void *subarray_src = (char *) src + src_off;
+ void *subarray_dst = (char *) dst + dst_off;
+
+ struct gomp_device_descr *devp = dst_devicep ? dst_devicep
+ : src_devicep;
+ ret = devp->memcpy2d_func (dst_id, src_id, element_size, volume[0],
+ subarray_dst, 0, 0, stride, subarray_src,
+ 0, 0, stride);
+ if (ret != -1)
+ return ret ? 0 : EINVAL;
+ }
+
for (i = 0, ret = 1; i < volume[0] && ret; i++)
{
if (src_devicep == NULL)