aboutsummaryrefslogtreecommitdiff
path: root/libgomp/plugin
diff options
context:
space:
mode:
authorJulian Brown <julian@codesourcery.com>2023-08-23 23:46:29 +0000
committerJulian Brown <julian@codesourcery.com>2023-12-20 21:35:36 +0000
commitd7e9ae4fa94afd5517536b4dfc7d6be0b3e8c2c3 (patch)
tree28d8b922c8fee2956851f08b1a7482912d044e9c /libgomp/plugin
parentba615557a4c698d27042a5fe058ea6e721a03b12 (diff)
downloadgcc-d7e9ae4fa94afd5517536b4dfc7d6be0b3e8c2c3.zip
gcc-d7e9ae4fa94afd5517536b4dfc7d6be0b3e8c2c3.tar.gz
gcc-d7e9ae4fa94afd5517536b4dfc7d6be0b3e8c2c3.tar.bz2
OpenMP, NVPTX: memcpy[23]D bias correction
This patch works around behaviour of the 2D and 3D memcpy operations in the CUDA driver runtime. Particularly in Fortran, the "base pointer" of an array (used for either source or destination of a host/device copy) may lie outside of data that is actually stored on the device. The fix is to make sure that we use the first element of data to be transferred instead, and adjust parameters accordingly. 2023-10-02 Julian Brown <julian@codesourcery.com> libgomp/ * plugin/plugin-nvptx.c (GOMP_OFFLOAD_memcpy2d): Adjust parameters to avoid out-of-bounds array checks in CUDA runtime. (GOMP_OFFLOAD_memcpy3d): Likewise. * testsuite/libgomp.c-c++-common/memcpyxd-bias-1.c: New test.
Diffstat (limited to 'libgomp/plugin')
-rw-r--r--libgomp/plugin/plugin-nvptx.c67
1 files changed, 67 insertions, 0 deletions
diff --git a/libgomp/plugin/plugin-nvptx.c b/libgomp/plugin/plugin-nvptx.c
index d4a254e..a31f259 100644
--- a/libgomp/plugin/plugin-nvptx.c
+++ b/libgomp/plugin/plugin-nvptx.c
@@ -1907,6 +1907,35 @@ GOMP_OFFLOAD_memcpy2d (int dst_ord, int src_ord, size_t dim1_size,
data.srcXInBytes = src_offset1_size;
data.srcY = src_offset0_len;
+ if (data.srcXInBytes != 0 || data.srcY != 0)
+ {
+ /* Adjust origin to the actual array data, else the CUDA 2D memory
+ copy API calls below may fail to validate source/dest pointers
+ correctly (especially for Fortran where the "virtual origin" of an
+ array is often outside the stored data). */
+ if (src_ord == -1)
+ data.srcHost = (const void *) ((const char *) data.srcHost
+ + data.srcY * data.srcPitch
+ + data.srcXInBytes);
+ else
+ data.srcDevice += data.srcY * data.srcPitch + data.srcXInBytes;
+ data.srcXInBytes = 0;
+ data.srcY = 0;
+ }
+
+ if (data.dstXInBytes != 0 || data.dstY != 0)
+ {
+ /* As above. */
+ if (dst_ord == -1)
+ data.dstHost = (void *) ((char *) data.dstHost
+ + data.dstY * data.dstPitch
+ + data.dstXInBytes);
+ else
+ data.dstDevice += data.dstY * data.dstPitch + data.dstXInBytes;
+ data.dstXInBytes = 0;
+ data.dstY = 0;
+ }
+
CUresult res = CUDA_CALL_NOCHECK (cuMemcpy2D, &data);
if (res == CUDA_ERROR_INVALID_VALUE)
/* If pitch > CU_DEVICE_ATTRIBUTE_MAX_PITCH or for device-to-device
@@ -1975,6 +2004,44 @@ GOMP_OFFLOAD_memcpy3d (int dst_ord, int src_ord, size_t dim2_size,
data.srcY = src_offset1_len;
data.srcZ = src_offset0_len;
+ if (data.srcXInBytes != 0 || data.srcY != 0 || data.srcZ != 0)
+ {
+ /* Adjust origin to the actual array data, else the CUDA 3D memory
+ copy API call below may fail to validate source/dest pointers
+ correctly (especially for Fortran where the "virtual origin" of an
+ array is often outside the stored data). */
+ if (src_ord == -1)
+ data.srcHost
+ = (const void *) ((const char *) data.srcHost
+ + (data.srcZ * data.srcHeight + data.srcY)
+ * data.srcPitch
+ + data.srcXInBytes);
+ else
+ data.srcDevice
+ += (data.srcZ * data.srcHeight + data.srcY) * data.srcPitch
+ + data.srcXInBytes;
+ data.srcXInBytes = 0;
+ data.srcY = 0;
+ data.srcZ = 0;
+ }
+
+ if (data.dstXInBytes != 0 || data.dstY != 0 || data.dstZ != 0)
+ {
+ /* As above. */
+ if (dst_ord == -1)
+ data.dstHost = (void *) ((char *) data.dstHost
+ + (data.dstZ * data.dstHeight + data.dstY)
+ * data.dstPitch
+ + data.dstXInBytes);
+ else
+ data.dstDevice
+ += (data.dstZ * data.dstHeight + data.dstY) * data.dstPitch
+ + data.dstXInBytes;
+ data.dstXInBytes = 0;
+ data.dstY = 0;
+ data.dstZ = 0;
+ }
+
CUDA_CALL (cuMemcpy3D, &data);
return true;
}