This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: [gomp4.1] Add new versions of GOMP_target{,_data,_update} and GOMP_target_enter_exit_data


On Sat, Jun 20, 2015 at 00:35:14 +0300, Ilya Verbin wrote:
> Given that a mapped variable in 4.1 can have different kinds across nested data
> regions, we need to store map-type not only for each var, but also for each
> structured mapping.  Here is my WIP patch, is it sane? :)
> Attached testcase works OK on the device with non-shared memory.

A bit updated version with a fix for GOMP_MAP_TO_PSET.
make check-target-libgomp passed.


include/gcc/
	* gomp-constants.h (GOMP_MAP_ALWAYS_TO_P,
	GOMP_MAP_ALWAYS_FROM_P): Define.
libgomp/
	* libgomp.h (struct target_var_desc): New.
	(struct target_mem_desc): Replace array of splay_tree_key with array of
	target_var_desc.
	(struct splay_tree_key_s): Move copy_from to target_var_desc.
	* oacc-mem.c (gomp_acc_remove_pointer): Use copy_from from
	target_var_desc.
	* oacc-parallel.c (GOACC_parallel): Use copy_from from target_var_desc.
	* target.c (gomp_map_vars_existing): Copy data to device if map-type is
	'always to' or 'always tofrom'.
	(gomp_map_vars): Use key from target_var_desc.  Set copy_from and
	always_copy_from.
	(gomp_copy_from_async): Use key and copy_from from target_var_desc.
	(gomp_unmap_vars): Copy data from device if always_copy_from is set.
	(gomp_offload_image_to_device): Do not use copy_from.
	* testsuite/libgomp.c/target-11.c: New test.


diff --git a/include/gomp-constants.h b/include/gomp-constants.h
index 1849478..42bec04 100644
--- a/include/gomp-constants.h
+++ b/include/gomp-constants.h
@@ -107,6 +107,12 @@ enum gomp_map_kind
 #define GOMP_MAP_POINTER_P(X) \
   ((X) == GOMP_MAP_POINTER)
 
+#define GOMP_MAP_ALWAYS_TO_P(X) \
+  (((X) == GOMP_MAP_ALWAYS_TO) || ((X) == GOMP_MAP_ALWAYS_TOFROM))
+
+#define GOMP_MAP_ALWAYS_FROM_P(X) \
+  (((X) == GOMP_MAP_ALWAYS_FROM) || ((X) == GOMP_MAP_ALWAYS_TOFROM))
+
 
 /* Asynchronous behavior.  Keep in sync with
    libgomp/{openacc.h,openacc.f90,openacc_lib.h}:acc_async_t.  */
diff --git a/libgomp/libgomp.h b/libgomp/libgomp.h
index 87d6c40..8e6d4ac 100644
--- a/libgomp/libgomp.h
+++ b/libgomp/libgomp.h
@@ -636,6 +636,15 @@ typedef struct splay_tree_node_s *splay_tree_node;
 typedef struct splay_tree_s *splay_tree;
 typedef struct splay_tree_key_s *splay_tree_key;
 
+struct target_var_desc {
+  /* Splay key.  */
+  splay_tree_key key;
+  /* True if data should be copied from device to host at the end.  */
+  bool copy_from;
+  /* True if data always should be copied from device to host at the end.  */
+  bool always_copy_from;
+};
+
 struct target_mem_desc {
   /* Reference count.  */
   uintptr_t refcount;
@@ -655,9 +664,9 @@ struct target_mem_desc {
   /* Corresponding target device descriptor.  */
   struct gomp_device_descr *device_descr;
 
-  /* List of splay keys to remove (or decrease refcount)
+  /* List of target items to remove (or decrease refcount)
      at the end of region.  */
-  splay_tree_key list[];
+  struct target_var_desc list[];
 };
 
 struct splay_tree_key_s {
@@ -673,8 +682,6 @@ struct splay_tree_key_s {
   uintptr_t refcount;
   /* Asynchronous reference count.  */
   uintptr_t async_refcount;
-  /* True if data should be copied from device to host at the end.  */
-  bool copy_from;
 };
 
 #include "splay-tree.h"
diff --git a/libgomp/oacc-mem.c b/libgomp/oacc-mem.c
index 90d43eb..c0fcb07 100644
--- a/libgomp/oacc-mem.c
+++ b/libgomp/oacc-mem.c
@@ -651,7 +651,7 @@ gomp_acc_remove_pointer (void *h, bool force_copyfrom, int async, int mapnum)
     }
 
   if (force_copyfrom)
-    t->list[0]->copy_from = 1;
+    t->list[0].copy_from = 1;
 
   gomp_mutex_unlock (&acc_dev->lock);
 
diff --git a/libgomp/oacc-parallel.c b/libgomp/oacc-parallel.c
index d899946..8ea3dd1 100644
--- a/libgomp/oacc-parallel.c
+++ b/libgomp/oacc-parallel.c
@@ -135,8 +135,8 @@ GOACC_parallel (int device, void (*fn) (void *),
 
   devaddrs = gomp_alloca (sizeof (void *) * mapnum);
   for (i = 0; i < mapnum; i++)
-    devaddrs[i] = (void *) (tgt->list[i]->tgt->tgt_start
-			    + tgt->list[i]->tgt_offset);
+    devaddrs[i] = (void *) (tgt->list[i].key->tgt->tgt_start
+			    + tgt->list[i].key->tgt_offset);
 
   acc_dev->openacc.exec_func (tgt_fn, mapnum, hostaddrs, devaddrs, sizes, kinds,
 			      num_gangs, num_workers, vector_length, async,
diff --git a/libgomp/target.c b/libgomp/target.c
index fb8487a..b1640c1 100644
--- a/libgomp/target.c
+++ b/libgomp/target.c
@@ -161,6 +161,12 @@ gomp_map_vars_existing (struct gomp_device_descr *devicep, splay_tree_key oldn,
 		  (void *) newn->host_start, (void *) newn->host_end,
 		  (void *) oldn->host_start, (void *) oldn->host_end);
     }
+
+  if (GOMP_MAP_ALWAYS_TO_P (kind))
+    devicep->host2dev_func (devicep->target_id,
+			    (void *) (oldn->tgt->tgt_start + oldn->tgt_offset),
+			    (void *) newn->host_start,
+			    newn->host_end - newn->host_start);
   oldn->refcount++;
 }
 
@@ -260,7 +266,7 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum,
       int kind = get_kind (short_mapkind, kinds, i);
       if (hostaddrs[i] == NULL)
 	{
-	  tgt->list[i] = NULL;
+	  tgt->list[i].key = NULL;
 	  continue;
 	}
       cur_node.host_start = (uintptr_t) hostaddrs[i];
@@ -271,12 +277,15 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum,
       splay_tree_key n = splay_tree_lookup (mem_map, &cur_node);
       if (n)
 	{
-	  tgt->list[i] = n;
+	  tgt->list[i].key = n;
+	  tgt->list[i].copy_from = GOMP_MAP_COPY_FROM_P (kind & typemask);
+	  tgt->list[i].always_copy_from
+	    = GOMP_MAP_ALWAYS_FROM_P (kind & typemask);
 	  gomp_map_vars_existing (devicep, n, &cur_node, kind & typemask);
 	}
       else
 	{
-	  tgt->list[i] = NULL;
+	  tgt->list[i].key = NULL;
 
 	  size_t align = (size_t) 1 << (kind >> rshift);
 	  not_found_cnt++;
@@ -297,7 +306,7 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum,
 		  break;
 		else
 		  {
-		    tgt->list[j] = NULL;
+		    tgt->list[j].key = NULL;
 		    i++;
 		  }
 	    }
@@ -345,7 +354,7 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum,
       size_t j;
 
       for (i = 0; i < mapnum; i++)
-	if (tgt->list[i] == NULL)
+	if (tgt->list[i].key == NULL)
 	  {
 	    int kind = get_kind (short_mapkind, kinds, i);
 	    if (hostaddrs[i] == NULL)
@@ -359,18 +368,23 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum,
 	    splay_tree_key n = splay_tree_lookup (mem_map, k);
 	    if (n)
 	      {
-		tgt->list[i] = n;
+		tgt->list[i].key = n;
+		tgt->list[i].copy_from = GOMP_MAP_COPY_FROM_P (kind & typemask);
+		tgt->list[i].always_copy_from
+		  = GOMP_MAP_ALWAYS_FROM_P (kind & typemask);
 		gomp_map_vars_existing (devicep, n, k, kind & typemask);
 	      }
 	    else
 	      {
 		size_t align = (size_t) 1 << (kind >> rshift);
-		tgt->list[i] = k;
+		tgt->list[i].key = k;
 		tgt_size = (tgt_size + align - 1) & ~(align - 1);
 		k->tgt = tgt;
 		k->tgt_offset = tgt_size;
 		tgt_size += k->host_end - k->host_start;
-		k->copy_from = GOMP_MAP_COPY_FROM_P (kind & typemask);
+		tgt->list[i].copy_from = GOMP_MAP_COPY_FROM_P (kind & typemask);
+		tgt->list[i].always_copy_from
+		  = GOMP_MAP_ALWAYS_FROM_P (kind & typemask);
 		k->refcount = 1;
 		k->async_refcount = 0;
 		tgt->refcount++;
@@ -388,6 +402,8 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum,
 		  case GOMP_MAP_TOFROM:
 		  case GOMP_MAP_FORCE_TO:
 		  case GOMP_MAP_FORCE_TOFROM:
+		  case GOMP_MAP_ALWAYS_TO:
+		  case GOMP_MAP_ALWAYS_TOFROM:
 		    /* FIXME: Perhaps add some smarts, like if copying
 		       several adjacent fields from host to target, use some
 		       host buffer to avoid sending each var individually.  */
@@ -420,7 +436,9 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum,
 			break;
 		      else
 			{
-			  tgt->list[j] = k;
+			  tgt->list[j].key = k;
+			  tgt->list[j].copy_from = false;
+			  tgt->list[j].always_copy_from = false;
 			  k->refcount++;
 			  gomp_map_pointer (tgt,
 					    (uintptr_t) *(void **) hostaddrs[j],
@@ -472,11 +490,11 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum,
     {
       for (i = 0; i < mapnum; i++)
 	{
-	  if (tgt->list[i] == NULL)
+	  if (tgt->list[i].key == NULL)
 	    cur_node.tgt_offset = (uintptr_t) NULL;
 	  else
-	    cur_node.tgt_offset = tgt->list[i]->tgt->tgt_start
-				  + tgt->list[i]->tgt_offset;
+	    cur_node.tgt_offset = tgt->list[i].key->tgt->tgt_start
+				  + tgt->list[i].key->tgt_offset;
 	  /* FIXME: see above FIXME comment.  */
 	  devicep->host2dev_func (devicep->target_id,
 				  (void *) (tgt->tgt_start
@@ -516,17 +534,17 @@ gomp_copy_from_async (struct target_mem_desc *tgt)
   gomp_mutex_lock (&devicep->lock);
 
   for (i = 0; i < tgt->list_count; i++)
-    if (tgt->list[i] == NULL)
+    if (tgt->list[i].key == NULL)
       ;
-    else if (tgt->list[i]->refcount > 1)
+    else if (tgt->list[i].key->refcount > 1)
       {
-	tgt->list[i]->refcount--;
-	tgt->list[i]->async_refcount++;
+	tgt->list[i].key->refcount--;
+	tgt->list[i].key->async_refcount++;
       }
     else
       {
-	splay_tree_key k = tgt->list[i];
-	if (k->copy_from)
+	splay_tree_key k = tgt->list[i].key;
+	if (tgt->list[i].copy_from)
 	  devicep->dev2host_func (devicep->target_id, (void *) k->host_start,
 				  (void *) (k->tgt->tgt_start + k->tgt_offset),
 				  k->host_end - k->host_start);
@@ -554,25 +572,33 @@ gomp_unmap_vars (struct target_mem_desc *tgt, bool do_copyfrom)
 
   size_t i;
   for (i = 0; i < tgt->list_count; i++)
-    if (tgt->list[i] == NULL)
-      ;
-    else if (tgt->list[i]->refcount > 1)
-      tgt->list[i]->refcount--;
-    else if (tgt->list[i]->async_refcount > 0)
-      tgt->list[i]->async_refcount--;
-    else
-      {
-	splay_tree_key k = tgt->list[i];
-	if (k->copy_from && do_copyfrom)
-	  devicep->dev2host_func (devicep->target_id, (void *) k->host_start,
-				  (void *) (k->tgt->tgt_start + k->tgt_offset),
-				  k->host_end - k->host_start);
-	splay_tree_remove (&devicep->mem_map, k);
-	if (k->tgt->refcount > 1)
-	  k->tgt->refcount--;
-	else
-	  gomp_unmap_tgt (k->tgt);
-      }
+    {
+      splay_tree_key k = tgt->list[i].key;
+      if (k == NULL)
+	continue;
+
+      bool do_unmap = false;
+      if (k->refcount > 1)
+	k->refcount--;
+      else if (k->async_refcount > 0)
+	k->async_refcount--;
+      else
+	do_unmap = true;
+
+      if ((do_unmap && do_copyfrom && tgt->list[i].copy_from)
+	  || tgt->list[i].always_copy_from)
+	devicep->dev2host_func (devicep->target_id, (void *) k->host_start,
+				(void *) (k->tgt->tgt_start + k->tgt_offset),
+				k->host_end - k->host_start);
+      if (do_unmap)
+	{
+	  splay_tree_remove (&devicep->mem_map, k);
+	  if (k->tgt->refcount > 1)
+	    k->tgt->refcount--;
+	  else
+	    gomp_unmap_tgt (k->tgt);
+	}
+    }
 
   if (tgt->refcount > 1)
     tgt->refcount--;
@@ -699,7 +725,6 @@ gomp_offload_image_to_device (struct gomp_device_descr *devicep,
       k->tgt_offset = target_table[i].start;
       k->refcount = 1;
       k->async_refcount = 0;
-      k->copy_from = false;
       array->left = NULL;
       array->right = NULL;
       splay_tree_insert (&devicep->mem_map, array);
@@ -725,7 +750,6 @@ gomp_offload_image_to_device (struct gomp_device_descr *devicep,
       k->tgt_offset = target_var->start;
       k->refcount = 1;
       k->async_refcount = 0;
-      k->copy_from = false;
       array->left = NULL;
       array->right = NULL;
       splay_tree_insert (&devicep->mem_map, array);
diff --git a/libgomp/testsuite/libgomp.c/target-11.c b/libgomp/testsuite/libgomp.c/target-11.c
new file mode 100644
index 0000000..4562d88
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c/target-11.c
@@ -0,0 +1,51 @@
+/* { dg-require-effective-target offload_device } */
+
+#include <assert.h>
+
+int main ()
+{
+  int aa = 0, bb = 0, cc = 0, dd = 0;
+
+  #pragma omp target data map(tofrom: aa) map(to: bb) map(from: cc, dd)
+    {
+      int ok;
+      aa = bb = cc = 1;
+
+      /* Set dd on target to 0 for the further check.  */
+      #pragma omp target map(always to: dd)
+	{ dd; }
+
+      dd = 1;
+      #pragma omp target map(tofrom: aa) map(always to: bb) \
+	map(always from: cc) map(to: dd) map(from: ok)
+	{
+	  /* bb is always to, aa and dd are not.  */
+	  ok = (aa == 0) && (bb == 1) && (dd == 0);
+	  aa = bb = cc = dd = 2;
+	}
+
+      assert (ok);
+      assert (aa == 1);
+      assert (bb == 1);
+      assert (cc == 2); /* cc is always from.  */
+      assert (dd == 1);
+
+      dd = 3;
+      #pragma omp target map(from: cc) map(always to: dd) map(from: ok)
+	{
+	  ok = (dd == 3); /* dd is always to.  */
+	  cc = dd = 4;
+	}
+
+      assert (ok);
+      assert (cc == 2);
+      assert (dd == 3);
+    }
+
+  assert (aa == 2);
+  assert (bb == 1);
+  assert (cc == 4);
+  assert (dd == 4);
+
+  return 0;
+}


  -- Ilya


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]