OpenMP, NVPTX: memcpy[23]D bias correction

This patch works around behaviour of the 2D and 3D memcpy operations in the CUDA driver runtime. Particularly in Fortran, the "base pointer" of an array (used for either source or destination of a host/device copy) may lie outside of data that is actually stored on the device. The fix is to make sure that we use the first element of data to be transferred instead, and adjust parameters accordingly. 2023-10-02 Julian Brown <julian@codesourcery.com> libgomp/ * plugin/plugin-nvptx.c (GOMP_OFFLOAD_memcpy2d): Adjust parameters to avoid out-of-bounds array checks in CUDA runtime. (GOMP_OFFLOAD_memcpy3d): Likewise. * testsuite/libgomp.c-c++-common/memcpyxd-bias-1.c: New test.
author: Julian Brown <julian@codesourcery.com> 2023-08-23 23:46:29 +0000
committer: Julian Brown <julian@codesourcery.com> 2023-12-20 21:35:36 +0000
commit: d7e9ae4fa94afd5517536b4dfc7d6be0b3e8c2c3 (patch)
tree: 28d8b922c8fee2956851f08b1a7482912d044e9c /libgomp/plugin
parent: ba615557a4c698d27042a5fe058ea6e721a03b12 (diff)
download: gcc-d7e9ae4fa94afd5517536b4dfc7d6be0b3e8c2c3.zip
gcc-d7e9ae4fa94afd5517536b4dfc7d6be0b3e8c2c3.tar.gz
gcc-d7e9ae4fa94afd5517536b4dfc7d6be0b3e8c2c3.tar.bz2
1 files changed, 67 insertions, 0 deletions
diff --git a/libgomp/plugin/plugin-nvptx.c b/libgomp/plugin/plugin-nvptx.c
index d4a254e..a31f259 100644
--- a/libgomp/plugin/plugin-nvptx.c
+++ b/libgomp/plugin/plugin-nvptx.c
@@ -1907,6 +1907,35 @@ GOMP_OFFLOAD_memcpy2d (int dst_ord, int src_ord, size_t dim1_size,
   data.srcXInBytes = src_offset1_size;
   data.srcY = src_offset0_len;
 
+  if (data.srcXInBytes != 0 || data.srcY != 0)
+    {
+      /* Adjust origin to the actual array data, else the CUDA 2D memory
+	 copy API calls below may fail to validate source/dest pointers
+	 correctly (especially for Fortran where the "virtual origin" of an
+	 array is often outside the stored data).  */
+      if (src_ord == -1)
+	data.srcHost = (const void *) ((const char *) data.srcHost
+				      + data.srcY * data.srcPitch
+				      + data.srcXInBytes);
+      else
+	data.srcDevice += data.srcY * data.srcPitch + data.srcXInBytes;
+      data.srcXInBytes = 0;
+      data.srcY = 0;
+    }
+
+  if (data.dstXInBytes != 0 || data.dstY != 0)
+    {
+      /* As above.  */
+      if (dst_ord == -1)
+	data.dstHost = (void *) ((char *) data.dstHost
+				 + data.dstY * data.dstPitch
+				 + data.dstXInBytes);
+      else
+	data.dstDevice += data.dstY * data.dstPitch + data.dstXInBytes;
+      data.dstXInBytes = 0;
+      data.dstY = 0;
+    }
+
   CUresult res = CUDA_CALL_NOCHECK (cuMemcpy2D, &data);
   if (res == CUDA_ERROR_INVALID_VALUE)
     /* If pitch > CU_DEVICE_ATTRIBUTE_MAX_PITCH or for device-to-device
@@ -1975,6 +2004,44 @@ GOMP_OFFLOAD_memcpy3d (int dst_ord, int src_ord, size_t dim2_size,
   data.srcY = src_offset1_len;
   data.srcZ = src_offset0_len;
 
+  if (data.srcXInBytes != 0 || data.srcY != 0 || data.srcZ != 0)
+    {
+      /* Adjust origin to the actual array data, else the CUDA 3D memory
+	 copy API call below may fail to validate source/dest pointers
+	 correctly (especially for Fortran where the "virtual origin" of an
+	 array is often outside the stored data).  */
+      if (src_ord == -1)
+	data.srcHost
+	  = (const void *) ((const char *) data.srcHost
+			    + (data.srcZ * data.srcHeight + data.srcY)
+			      * data.srcPitch
+			    + data.srcXInBytes);
+      else
+	data.srcDevice
+	  += (data.srcZ * data.srcHeight + data.srcY) * data.srcPitch
+	     + data.srcXInBytes;
+      data.srcXInBytes = 0;
+      data.srcY = 0;
+      data.srcZ = 0;
+    }
+
+  if (data.dstXInBytes != 0 || data.dstY != 0 || data.dstZ != 0)
+    {
+      /* As above.  */
+      if (dst_ord == -1)
+	data.dstHost = (void *) ((char *) data.dstHost
+				 + (data.dstZ * data.dstHeight + data.dstY)
+				   * data.dstPitch
+				 + data.dstXInBytes);
+      else
+	data.dstDevice
+	  += (data.dstZ * data.dstHeight + data.dstY) * data.dstPitch
+	     + data.dstXInBytes;
+      data.dstXInBytes = 0;
+      data.dstY = 0;
+      data.dstZ = 0;
+    }
+
   CUDA_CALL (cuMemcpy3D, &data);
   return true;
 }
author	Julian Brown <julian@codesourcery.com>	2023-08-23 23:46:29 +0000
committer	Julian Brown <julian@codesourcery.com>	2023-12-20 21:35:36 +0000
commit	d7e9ae4fa94afd5517536b4dfc7d6be0b3e8c2c3 (patch)
tree	28d8b922c8fee2956851f08b1a7482912d044e9c /libgomp/plugin
parent	ba615557a4c698d27042a5fe058ea6e721a03b12 (diff)
download	gcc-d7e9ae4fa94afd5517536b4dfc7d6be0b3e8c2c3.zip gcc-d7e9ae4fa94afd5517536b4dfc7d6be0b3e8c2c3.tar.gz gcc-d7e9ae4fa94afd5517536b4dfc7d6be0b3e8c2c3.tar.bz2