[gcc/devel/omp/gcc-14] OpenMP: Support accelerated 2D/3D memory copies for AMD GCN [OG14-only part]

Paul-Antoine Arras parras@gcc.gnu.org
Fri Jun 28 09:55:18 GMT 2024


https://gcc.gnu.org/g:2d14b0787c3f5acbbcd642ebf0352cb120e3012d

commit 2d14b0787c3f5acbbcd642ebf0352cb120e3012d
Author: Julian Brown <julian@codesourcery.com>
Date:   Wed Sep 13 13:31:48 2023 +0000

    OpenMP: Support accelerated 2D/3D memory copies for AMD GCN [OG14-only part]
    
    This patch only adds the bits missing from mainline:
    
    Support is also added for 1-dimensional strided accesses: these are
    treated as a special case of 2-dimensional transfers, where the innermost
    dimension is formed from the stride length (in bytes).
    
    2023-09-19  Julian Brown  <julian@codesourcery.com>
    
    libgomp/
            * target.c (omp_target_memcpy_rect_worker): Add 1D strided transfer
            support.

Diff:
---
 libgomp/ChangeLog.omp |  5 +++++
 libgomp/target.c      | 31 +++++++++++++++++++++++++++++++
 2 files changed, 36 insertions(+)

diff --git a/libgomp/ChangeLog.omp b/libgomp/ChangeLog.omp
index cdb3b42be14..a053803a9ef 100644
--- a/libgomp/ChangeLog.omp
+++ b/libgomp/ChangeLog.omp
@@ -1,3 +1,8 @@
+2023-09-19  Julian Brown  <julian@codesourcery.com>
+
+	* target.c (omp_target_memcpy_rect_worker): Add 1D strided transfer
+	support.
+
 2023-08-10  Julian Brown  <julian@codesourcery.com>
 
 	* testsuite/libgomp.c-c++-common/declare-mapper-18.c: New test.
diff --git a/libgomp/target.c b/libgomp/target.c
index c28c3e1e5bb..23dc72476ec 100644
--- a/libgomp/target.c
+++ b/libgomp/target.c
@@ -5180,6 +5180,37 @@ omp_target_memcpy_rect_worker (void *dst, const void *src, size_t element_size,
       if (__builtin_mul_overflow (span, strides[0], &stride))
 	return EINVAL;
 
+      if (((src_devicep && src_devicep->memcpy2d_func)
+	   || (dst_devicep && dst_devicep->memcpy2d_func))
+	  && (stride % element_size) == 0)
+	{
+	  /* Try using memcpy2d for a 1-dimensional strided access.  Here we
+	     treat the transfer as a 2-dimensional array, where the inner
+	     dimension is calculated to be (stride in bytes) / element_size.
+	     Indices/offsets are adjusted so the source/destination pointers
+	     point to the first element to be transferred, to make the sums
+	     easier.  (There are some configurations of 2D strided accesses
+	     that memcpy3d could handle similarly, but those are probably rare
+	     and are unimplemented for now.)   */
+
+	  /* If stride is element size, this is a contiguous transfer and
+	     should have been handled above.  */
+	  assert (stride > element_size);
+
+	  int dst_id = dst_devicep ? dst_devicep->target_id : -1;
+	  int src_id = src_devicep ? src_devicep->target_id : -1;
+	  void *subarray_src = (char *) src + src_off;
+	  void *subarray_dst = (char *) dst + dst_off;
+
+	  struct gomp_device_descr *devp = dst_devicep ? dst_devicep
+						       : src_devicep;
+	  ret = devp->memcpy2d_func (dst_id, src_id, element_size, volume[0],
+				     subarray_dst, 0, 0, stride, subarray_src,
+				     0, 0, stride);
+	  if (ret != -1)
+	    return ret ? 0 : EINVAL;
+	}
+
       for (i = 0, ret = 1; i < volume[0] && ret; i++)
 	{
 	  if (src_devicep == NULL)


More information about the Gcc-cvs mailing list