This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: [OpenACC] Update OpenACC data clause semantics to the 2.5 behavior - runtime


This patch implements the OpenACC 2.5 data clause semantics in libgomp.

Is it OK for trunk?

Cesar
2018-06-19  Chung-Lin Tang <cltang@codesourcery.com>
	    Thomas Schwinge <thomas@codesourcery.com>
	    Cesar Philippidis  <cesar@codesourcery.com>

	libgomp/
	* libgomp.h (struct splay_tree_key_s): Add dynamic_refcount member.
	(gomp_acc_remove_pointer): Update declaration.
	(gomp_acc_declare_allocate): Declare.
	(gomp_remove_var): Declare.
	* libgomp.map (OACC_2.5): Define.
	* oacc-mem.c (acc_map_data): Update refcount.
	(acc_unmap_data): Likewise.
	(present_create_copy): Likewise.
	(acc_create): Add FLAG_PRESENT when calling present_create_copy.
	(acc_copyin): Likewise.
	(FLAG_FINALIZE): Define.
	(delete_copyout): Update dynamic refcounts, add support for FINALIZE.
	(acc_delete_finalize): New function.
	(acc_delete_finalize_async): New function.
	(acc_copyout_finalize): New function.
	(acc_copyout_finalize_async): New function.
	(gomp_acc_insert_pointer): Update refcounts.
	(gomp_acc_remove_pointer): Return if data is not present on the
	accelerator.
	* oacc-parallel.c (find_pset): Rename to find_pointer.
	(find_pointer): Add support for GOMP_MAP_POINTER.
	(handle_ftn_pointers): New function.
	(GOACC_parallel_keyed): Update refcounts of variables.
	(GOACC_enter_exit_data): Add support for finalized data mappings.
	Add support for GOMP_MAP_{TO,ALLOC,RELESE,FROM}. Update handling
	of fortran arrays.
	(GOACC_update): Add support for GOMP_MAP_{ALWAYS_POINTER,TO,FROM}.
	(GOACC_declare): Add support for GOMP_MAP_RELEASE, remove support
	for GOMP_MAP_FORCE_FROM.
	* openacc.f90 (module openacc_internal): Add
	acc_copyout_finalize_{32_h,64_h,array_h,_l}, and
	acc_delete_finalize_{32_h,64_h,array_h,_l}. Add interfaces for
	acc_copyout_finalize and acc_delete_finalize.
	(acc_copyout_finalize_32_h): New subroutine.
	(acc_copyout_finalize_64_h): New subroutine.
	(acc_copyout_finalize_array_h): New subroutine.
	(acc_delete_finalize_32_h): New subroutine.
	(acc_delete_finalize_64_h): New subroutine.
	(acc_delete_finalize_array_h): New subroutine.
	* openacc.h (acc_copyout_finalize): Declare.
	(acc_copyout_finalize_async): Declare.
	(acc_delete_finalize): Declare.
	(acc_delete_finalize_async): Declare.
	* openacc_lib.h (acc_copyout_finalize): New interface.
	(acc_delete_finalize): New interface.
	* target.c (gomp_map_vars): Update dynamic_refcount.
	(gomp_remove_var): New function.
	(gomp_unmap_vars): Use it.
	(gomp_unload_image_from_device): Likewise.


>From 53ee03231c5e6e4747b4ef01335079a2d4a98480 Mon Sep 17 00:00:00 2001
From: Cesar Philippidis <cesar@codesourcery.com>
Date: Tue, 19 Jun 2018 09:33:04 -0700
Subject: [PATCH 7/7] runtime changes

---
 libgomp/libgomp.h       |   7 +-
 libgomp/libgomp.map     |  12 +++
 libgomp/oacc-mem.c      | 196 ++++++++++++++++++++++++++++++++-------
 libgomp/oacc-parallel.c | 198 ++++++++++++++++++++++++++++++++++------
 libgomp/openacc.f90     | 112 +++++++++++++++++++++++
 libgomp/openacc.h       |   6 ++
 libgomp/openacc_lib.h   |  40 ++++++++
 libgomp/target.c        |  41 ++++-----
 8 files changed, 528 insertions(+), 84 deletions(-)

diff --git a/libgomp/libgomp.h b/libgomp/libgomp.h
index 10ea8940c96..3a8cc2bd7d6 100644
--- a/libgomp/libgomp.h
+++ b/libgomp/libgomp.h
@@ -853,6 +853,8 @@ struct splay_tree_key_s {
   uintptr_t tgt_offset;
   /* Reference count.  */
   uintptr_t refcount;
+  /* Dynamic reference count.  */
+  uintptr_t dynamic_refcount;
   /* Pointer to the original mapping of "omp declare target link" object.  */
   splay_tree_key link_key;
 };
@@ -991,7 +993,9 @@ enum gomp_map_vars_kind
 };
 
 extern void gomp_acc_insert_pointer (size_t, void **, size_t *, void *);
-extern void gomp_acc_remove_pointer (void *, bool, int, int);
+extern void gomp_acc_remove_pointer (void *, size_t, bool, int, int, int);
+extern void gomp_acc_declare_allocate (bool, size_t, void **, size_t *,
+				       unsigned short *);
 
 extern struct target_mem_desc *gomp_map_vars (struct gomp_device_descr *,
 					      size_t, void **, void **,
@@ -1001,6 +1005,7 @@ extern void gomp_unmap_vars (struct target_mem_desc *, bool);
 extern void gomp_init_device (struct gomp_device_descr *);
 extern void gomp_free_memmap (struct splay_tree_s *);
 extern void gomp_unload_device (struct gomp_device_descr *);
+extern bool gomp_remove_var (struct gomp_device_descr *, splay_tree_key);
 
 /* work.c */
 
diff --git a/libgomp/libgomp.map b/libgomp/libgomp.map
index 8752348fbf2..2cd3bf524bc 100644
--- a/libgomp/libgomp.map
+++ b/libgomp/libgomp.map
@@ -386,6 +386,18 @@ OACC_2.0.1 {
 	acc_pcreate;
 } OACC_2.0;
 
+OACC_2.5 {
+  global:
+	acc_copyout_finalize;
+	acc_copyout_finalize_32_h_;
+	acc_copyout_finalize_64_h_;
+	acc_copyout_finalize_array_h_;
+	acc_delete_finalize;
+	acc_delete_finalize_32_h_;
+	acc_delete_finalize_64_h_;
+	acc_delete_finalize_array_h_;
+} OACC_2.0.1;
+
 GOACC_2.0 {
   global:
 	GOACC_data_end;
diff --git a/libgomp/oacc-mem.c b/libgomp/oacc-mem.c
index 158f0862018..3787ce49e38 100644
--- a/libgomp/oacc-mem.c
+++ b/libgomp/oacc-mem.c
@@ -347,6 +347,7 @@ acc_map_data (void *h, void *d, size_t s)
 
       tgt = gomp_map_vars (acc_dev, mapnum, &hostaddrs, &devaddrs, &sizes,
 			   &kinds, true, GOMP_MAP_VARS_OPENACC);
+      tgt->list[0].key->refcount = REFCOUNT_INFINITY;
     }
 
   gomp_mutex_lock (&acc_dev->lock);
@@ -389,6 +390,9 @@ acc_unmap_data (void *h)
 		  (void *) n->host_start, (int) host_size, (void *) h);
     }
 
+  /* Mark for removal.  */
+  n->refcount = 1;
+
   t = n->tgt;
 
   if (t->refcount == 2)
@@ -460,6 +464,11 @@ present_create_copy (unsigned f, void *h, size_t s)
 	  gomp_fatal ("[%p,+%d] not mapped", (void *)h, (int)s);
 	}
 
+      if (n->refcount != REFCOUNT_INFINITY)
+	{
+	  n->refcount++;
+	  n->dynamic_refcount++;
+	}
       gomp_mutex_unlock (&acc_dev->lock);
     }
   else if (!(f & FLAG_CREATE))
@@ -483,6 +492,8 @@ present_create_copy (unsigned f, void *h, size_t s)
 
       tgt = gomp_map_vars (acc_dev, mapnum, &hostaddrs, NULL, &s, &kinds, true,
 			   GOMP_MAP_VARS_OPENACC);
+      /* Initialize dynamic refcount.  */
+      tgt->list[0].key->dynamic_refcount = 1;
 
       gomp_mutex_lock (&acc_dev->lock);
 
@@ -499,13 +510,13 @@ present_create_copy (unsigned f, void *h, size_t s)
 void *
 acc_create (void *h, size_t s)
 {
-  return present_create_copy (FLAG_CREATE, h, s);
+  return present_create_copy (FLAG_PRESENT | FLAG_CREATE, h, s);
 }
 
 void *
 acc_copyin (void *h, size_t s)
 {
-  return present_create_copy (FLAG_CREATE | FLAG_COPY, h, s);
+  return present_create_copy (FLAG_PRESENT | FLAG_CREATE | FLAG_COPY, h, s);
 }
 
 void *
@@ -542,7 +553,8 @@ acc_pcopyin (void *h, size_t s)
 }
 #endif
 
-#define FLAG_COPYOUT (1 << 0)
+#define FLAG_COPYOUT  (1 << 0)
+#define FLAG_FINALIZE (1 << 1)
 
 static void
 delete_copyout (unsigned f, void *h, size_t s, const char *libfnname)
@@ -581,15 +593,52 @@ delete_copyout (unsigned f, void *h, size_t s, const char *libfnname)
 		  (void *) n->host_start, (int) host_size, (void *) h, (int) s);
     }
 
-  gomp_mutex_unlock (&acc_dev->lock);
+  if (n->refcount == REFCOUNT_INFINITY)
+    {
+      n->refcount = 0;
+      n->dynamic_refcount = 0;
+    }
+  if (n->refcount < n->dynamic_refcount)
+    {
+      gomp_mutex_unlock (&acc_dev->lock);
+      gomp_fatal ("Dynamic reference counting assert fail\n");
+    }
 
-  if (f & FLAG_COPYOUT)
-    acc_dev->dev2host_func (acc_dev->target_id, h, d, s);
+  if (f & FLAG_FINALIZE)
+    {
+      n->refcount -= n->dynamic_refcount;
+      n->dynamic_refcount = 0;
+    }
+  else if (n->dynamic_refcount)
+    {
+      n->dynamic_refcount--;
+      n->refcount--;
+    }
 
-  acc_unmap_data (h);
+  if (n->refcount == 0)
+    {
+      if (n->tgt->refcount == 2)
+	{
+	  struct target_mem_desc *tp, *t;
+	  for (tp = NULL, t = acc_dev->openacc.data_environ; t != NULL;
+	       tp = t, t = t->prev)
+	    if (n->tgt == t)
+	      {
+		if (tp)
+		  tp->prev = t->prev;
+		else
+		  acc_dev->openacc.data_environ = t->prev;
+		break;
+	      }
+	}
 
-  if (!acc_dev->free_func (acc_dev->target_id, d))
-    gomp_fatal ("error in freeing device memory in %s", libfnname);
+      if (f & FLAG_COPYOUT)
+	acc_dev->dev2host_func (acc_dev->target_id, h, d, s);
+
+      gomp_remove_var (acc_dev, n);
+    }
+
+  gomp_mutex_unlock (&acc_dev->lock);
 }
 
 void
@@ -598,12 +647,36 @@ acc_delete (void *h , size_t s)
   delete_copyout (0, h, s, __FUNCTION__);
 }
 
+void
+acc_delete_finalize (void *h , size_t s)
+{
+  delete_copyout (FLAG_FINALIZE, h, s, __FUNCTION__);
+}
+
+void
+acc_delete_finalize_async (void *h , size_t s, int async)
+{
+  delete_copyout (FLAG_FINALIZE, h, s, __FUNCTION__);
+}
+
 void
 acc_copyout (void *h, size_t s)
 {
   delete_copyout (FLAG_COPYOUT, h, s, __FUNCTION__);
 }
 
+void
+acc_copyout_finalize (void *h, size_t s)
+{
+  delete_copyout (FLAG_COPYOUT | FLAG_FINALIZE, h, s, __FUNCTION__);
+}
+
+void
+acc_copyout_finalize_async (void *h, size_t s, int async)
+{
+  delete_copyout (FLAG_COPYOUT | FLAG_FINALIZE, h, s, __FUNCTION__);
+}
+
 static void
 update_dev_host (int is_dev, void *h, size_t s)
 {
@@ -659,11 +732,37 @@ gomp_acc_insert_pointer (size_t mapnum, void **hostaddrs, size_t *sizes,
   struct goacc_thread *thr = goacc_thread ();
   struct gomp_device_descr *acc_dev = thr->dev;
 
+  if (acc_is_present (*hostaddrs, *sizes))
+    {
+      splay_tree_key n;
+      gomp_mutex_lock (&acc_dev->lock);
+      n = lookup_host (acc_dev, *hostaddrs, *sizes);
+      gomp_mutex_unlock (&acc_dev->lock);
+
+      tgt = n->tgt;
+      for (size_t i = 0; i < tgt->list_count; i++)
+	if (tgt->list[i].key == n)
+	  {
+	    for (size_t j = 0; j < mapnum; j++)
+	      if (i + j < tgt->list_count && tgt->list[i + j].key)
+		{
+		  tgt->list[i + j].key->refcount++;
+		  tgt->list[i + j].key->dynamic_refcount++;
+		}
+	    return;
+	  }
+      /* Should not reach here.  */
+      gomp_fatal ("Dynamic refcount incrementing failed for pointer/pset");
+    }
+
   gomp_debug (0, "  %s: prepare mappings\n", __FUNCTION__);
   tgt = gomp_map_vars (acc_dev, mapnum, hostaddrs,
 		       NULL, sizes, kinds, true, GOMP_MAP_VARS_OPENACC);
   gomp_debug (0, "  %s: mappings prepared\n", __FUNCTION__);
 
+  /* Initialize dynamic refcount.  */
+  tgt->list[0].key->dynamic_refcount = 1;
+
   gomp_mutex_lock (&acc_dev->lock);
   tgt->prev = acc_dev->openacc.data_environ;
   acc_dev->openacc.data_environ = tgt;
@@ -671,7 +770,8 @@ gomp_acc_insert_pointer (size_t mapnum, void **hostaddrs, size_t *sizes,
 }
 
 void
-gomp_acc_remove_pointer (void *h, bool force_copyfrom, int async, int mapnum)
+gomp_acc_remove_pointer (void *h, size_t s, bool force_copyfrom, int async,
+			 int finalize, int mapnum)
 {
   struct goacc_thread *thr = goacc_thread ();
   struct gomp_device_descr *acc_dev = thr->dev;
@@ -679,6 +779,9 @@ gomp_acc_remove_pointer (void *h, bool force_copyfrom, int async, int mapnum)
   struct target_mem_desc *t;
   int minrefs = (mapnum == 1) ? 2 : 3;
 
+  if (!acc_is_present (h, s))
+    return;
+
   gomp_mutex_lock (&acc_dev->lock);
 
   n = lookup_host (acc_dev, h, 1);
@@ -693,40 +796,65 @@ gomp_acc_remove_pointer (void *h, bool force_copyfrom, int async, int mapnum)
 
   t = n->tgt;
 
-  struct target_mem_desc *tp;
+  if (n->refcount < n->dynamic_refcount)
+    {
+      gomp_mutex_unlock (&acc_dev->lock);
+      gomp_fatal ("Dynamic reference counting assert fail\n");
+    }
 
-  if (t->refcount == minrefs)
+  if (finalize)
     {
-      /* This is the last reference, so pull the descriptor off the
-	 chain. This avoids gomp_unmap_vars via gomp_unmap_tgt from
-	 freeing the device memory. */
-      t->tgt_end = 0;
-      t->to_free = 0;
+      n->refcount -= n->dynamic_refcount;
+      n->dynamic_refcount = 0;
+    }
+  else if (n->dynamic_refcount)
+    {
+      n->dynamic_refcount--;
+      n->refcount--;
+    }
 
-      for (tp = NULL, t = acc_dev->openacc.data_environ; t != NULL;
-	   tp = t, t = t->prev)
+  gomp_mutex_unlock (&acc_dev->lock);
+
+  if (n->refcount == 0)
+    {
+      if (t->refcount == minrefs)
 	{
-	  if (n->tgt == t)
+	  /* This is the last reference, so pull the descriptor off the
+	     chain. This prevents gomp_unmap_vars via gomp_unmap_tgt from
+	     freeing the device memory. */
+	  struct target_mem_desc *tp;
+	  for (tp = NULL, t = acc_dev->openacc.data_environ; t != NULL;
+	       tp = t, t = t->prev)
 	    {
-	      if (tp)
-		tp->prev = t->prev;
-	      else
-		acc_dev->openacc.data_environ = t->prev;
-	      break;
+	      if (n->tgt == t)
+		{
+		  if (tp)
+		    tp->prev = t->prev;
+		  else
+		    acc_dev->openacc.data_environ = t->prev;
+		  break;
+		}
 	    }
 	}
-    }
 
-  if (force_copyfrom)
-    t->list[0].copy_from = 1;
+      /* Set refcount to 1 to allow gomp_unmap_vars to unmap it.  */
+      n->refcount = 1;
+      t->refcount = minrefs;
+      for (size_t i = 0; i < t->list_count; i++)
+	if (t->list[i].key == n)
+	  {
+	    t->list[i].copy_from = force_copyfrom ? 1 : 0;
+	    break;
+	  }
 
-  gomp_mutex_unlock (&acc_dev->lock);
+      /* If running synchronously, unmap immediately.  */
+      if (async < acc_async_noval)
+	gomp_unmap_vars (t, true);
+      else
+	t->device_descr->openacc.register_async_cleanup_func (t, async);
+    }
 
-  /* If running synchronously, unmap immediately.  */
-  if (async_synchronous_p (async))
-    gomp_unmap_vars (t, true);
-  else
-    t->device_descr->openacc.register_async_cleanup_func (t, async);
+  gomp_mutex_unlock (&acc_dev->lock);
 
   gomp_debug (0, "  %s: mappings restored\n", __FUNCTION__);
 }
diff --git a/libgomp/oacc-parallel.c b/libgomp/oacc-parallel.c
index 9eae43131f8..b80ace58590 100644
--- a/libgomp/oacc-parallel.c
+++ b/libgomp/oacc-parallel.c
@@ -38,15 +38,68 @@
 #include <stdarg.h>
 #include <assert.h>
 
+/* Returns the number of mappings associated with the pointer or pset. PSET
+   have three mappings, whereas pointer have two.  */
+
 static int
-find_pset (int pos, size_t mapnum, unsigned short *kinds)
+find_pointer (int pos, size_t mapnum, unsigned short *kinds)
 {
   if (pos + 1 >= mapnum)
     return 0;
 
   unsigned char kind = kinds[pos+1] & 0xff;
 
-  return kind == GOMP_MAP_TO_PSET;
+  if (kind == GOMP_MAP_TO_PSET)
+    return 3;
+  else if (kind == GOMP_MAP_POINTER)
+    return 2;
+
+  return 0;
+}
+
+/* Handle the mapping pair that are presented when a
+   deviceptr clause is used with Fortran.  */
+
+static void
+handle_ftn_pointers (size_t mapnum, void **hostaddrs, size_t *sizes,
+		     unsigned short *kinds)
+{
+  int i;
+
+  for (i = 0; i < mapnum; i++)
+    {
+      unsigned short kind1 = kinds[i] & 0xff;
+
+      /* Handle Fortran deviceptr clause.  */
+      if (kind1 == GOMP_MAP_FORCE_DEVICEPTR)
+	{
+	  unsigned short kind2;
+
+	  if (i < (signed)mapnum - 1)
+	    kind2 = kinds[i + 1] & 0xff;
+	  else
+	    kind2 = 0xffff;
+
+	  if (sizes[i] == sizeof (void *))
+	    continue;
+
+	  /* At this point, we're dealing with a Fortran deviceptr.
+	     If the next element is not what we're expecting, then
+	     this is an instance of where the deviceptr variable was
+	     not used within the region and the pointer was removed
+	     by the gimplifier.  */
+	  if (kind2 == GOMP_MAP_POINTER
+	      && sizes[i + 1] == 0
+	      && hostaddrs[i] == *(void **)hostaddrs[i + 1])
+	    {
+	      kinds[i+1] = kinds[i];
+	      sizes[i+1] = sizeof (void *);
+	    }
+
+	  /* Invalidate the entry.  */
+	  hostaddrs[i] = NULL;
+	}
+    }
 }
 
 static void goacc_wait (int async, int num_waits, va_list *ap);
@@ -88,6 +141,8 @@ GOACC_parallel_keyed (int device, void (*fn) (void *),
   thr = goacc_thread ();
   acc_dev = thr->dev;
 
+  handle_ftn_pointers (mapnum, hostaddrs, sizes, kinds);
+
   /* Host fallback if "if" clause is false or if the current device is set to
      the host.  */
   if (host_fallback)
@@ -183,10 +238,29 @@ GOACC_parallel_keyed (int device, void (*fn) (void *),
 			      async, dims, tgt);
 
   /* If running synchronously, unmap immediately.  */
+  bool copyfrom = true;
   if (async_synchronous_p (async))
     gomp_unmap_vars (tgt, true);
   else
-    tgt->device_descr->openacc.register_async_cleanup_func (tgt, async);
+    {
+      bool async_unmap = false;
+      for (size_t i = 0; i < tgt->list_count; i++)
+	{
+	  splay_tree_key k = tgt->list[i].key;
+	  if (k && k->refcount == 1)
+	    {
+	      async_unmap = true;
+	      break;
+	    }
+	}
+      if (async_unmap)
+	tgt->device_descr->openacc.register_async_cleanup_func (tgt, async);
+      else
+	{
+	  copyfrom = false;
+	  gomp_unmap_vars (tgt, copyfrom);
+	}
+    }
 
   acc_dev->openacc.async_set_async_func (acc_async_sync);
 }
@@ -286,6 +360,17 @@ GOACC_enter_exit_data (int device, size_t mapnum,
       va_end (ap);
     }
 
+  /* Determine whether "finalize" semantics apply to all mappings of this
+     OpenACC directive.  */
+  bool finalize = false;
+  if (mapnum > 0)
+    {
+      unsigned char kind = kinds[0] & 0xff;
+      if (kind == GOMP_MAP_DELETE
+	  || kind == GOMP_MAP_FORCE_FROM)
+	finalize = true;
+    }
+
   acc_dev->openacc.async_set_async_func (async);
 
   /* Determine if this is an "acc enter data".  */
@@ -298,13 +383,17 @@ GOACC_enter_exit_data (int device, size_t mapnum,
 
       if (kind == GOMP_MAP_FORCE_ALLOC
 	  || kind == GOMP_MAP_FORCE_PRESENT
-	  || kind == GOMP_MAP_FORCE_TO)
+	  || kind == GOMP_MAP_FORCE_TO
+	  || kind == GOMP_MAP_TO
+	  || kind == GOMP_MAP_ALLOC)
 	{
 	  data_enter = true;
 	  break;
 	}
 
-      if (kind == GOMP_MAP_DELETE
+      if (kind == GOMP_MAP_RELEASE
+	  || kind == GOMP_MAP_DELETE
+	  || kind == GOMP_MAP_FROM
 	  || kind == GOMP_MAP_FORCE_FROM)
 	break;
 
@@ -312,31 +401,39 @@ GOACC_enter_exit_data (int device, size_t mapnum,
 		      kind);
     }
 
+  /* In c, non-pointers and arrays are represented by a single data clause.
+     Dynamically allocated arrays and subarrays are represented by a data
+     clause followed by an internal GOMP_MAP_POINTER.
+
+     In fortran, scalars and not allocated arrays are represented by a
+     single data clause. Allocated arrays and subarrays have three mappings:
+     1) the original data clause, 2) a PSET 3) a pointer to the array data.
+  */
+
   if (data_enter)
     {
       for (i = 0; i < mapnum; i++)
 	{
 	  unsigned char kind = kinds[i] & 0xff;
 
-	  /* Scan for PSETs.  */
-	  int psets = find_pset (i, mapnum, kinds);
+	  /* Scan for pointers and PSETs.  */
+	  int pointer = find_pointer (i, mapnum, kinds);
 
-	  if (!psets)
+	  if (!pointer)
 	    {
 	      switch (kind)
 		{
-		case GOMP_MAP_POINTER:
-		  gomp_acc_insert_pointer (1, &hostaddrs[i], &sizes[i],
-					&kinds[i]);
+		case GOMP_MAP_ALLOC:
+		  acc_present_or_create (hostaddrs[i], sizes[i]);
 		  break;
 		case GOMP_MAP_FORCE_ALLOC:
 		  acc_create (hostaddrs[i], sizes[i]);
 		  break;
-		case GOMP_MAP_FORCE_PRESENT:
+		case GOMP_MAP_TO:
 		  acc_present_or_copyin (hostaddrs[i], sizes[i]);
 		  break;
 		case GOMP_MAP_FORCE_TO:
-		  acc_present_or_copyin (hostaddrs[i], sizes[i]);
+		  acc_copyin (hostaddrs[i], sizes[i]);
 		  break;
 		default:
 		  gomp_fatal (">>>> GOACC_enter_exit_data UNHANDLED kind 0x%.2x",
@@ -346,12 +443,13 @@ GOACC_enter_exit_data (int device, size_t mapnum,
 	    }
 	  else
 	    {
-	      gomp_acc_insert_pointer (3, &hostaddrs[i], &sizes[i], &kinds[i]);
+	      gomp_acc_insert_pointer (pointer, &hostaddrs[i],
+				       &sizes[i], &kinds[i]);
 	      /* Increment 'i' by two because OpenACC requires fortran
 		 arrays to be contiguous, so each PSET is associated with
 		 one of MAP_FORCE_ALLOC/MAP_FORCE_PRESET/MAP_FORCE_TO, and
 		 one MAP_POINTER.  */
-	      i += 2;
+	      i += pointer - 1;
 	    }
 	}
     }
@@ -360,22 +458,28 @@ GOACC_enter_exit_data (int device, size_t mapnum,
       {
 	unsigned char kind = kinds[i] & 0xff;
 
-	int psets = find_pset (i, mapnum, kinds);
+	int pointer = find_pointer (i, mapnum, kinds);
 
-	if (!psets)
+	if (!pointer)
 	  {
 	    switch (kind)
 	      {
-	      case GOMP_MAP_POINTER:
-		gomp_acc_remove_pointer (hostaddrs[i], (kinds[i] & 0xff)
-					 == GOMP_MAP_FORCE_FROM,
-					 async, 1);
-		break;
+	      case GOMP_MAP_RELEASE:
 	      case GOMP_MAP_DELETE:
-		acc_delete (hostaddrs[i], sizes[i]);
+		if (acc_is_present (hostaddrs[i], sizes[i]))
+		  {
+		    if (finalize)
+		      acc_delete_finalize (hostaddrs[i], sizes[i]);
+		    else
+		      acc_delete (hostaddrs[i], sizes[i]);
+		  }
 		break;
+	      case GOMP_MAP_FROM:
 	      case GOMP_MAP_FORCE_FROM:
-		acc_copyout (hostaddrs[i], sizes[i]);
+		if (finalize)
+		  acc_copyout_finalize (hostaddrs[i], sizes[i]);
+		else
+		  acc_copyout (hostaddrs[i], sizes[i]);
 		break;
 	      default:
 		gomp_fatal (">>>> GOACC_enter_exit_data UNHANDLED kind 0x%.2x",
@@ -385,10 +489,12 @@ GOACC_enter_exit_data (int device, size_t mapnum,
 	  }
 	else
 	  {
-	    gomp_acc_remove_pointer (hostaddrs[i], (kinds[i] & 0xff)
-				     == GOMP_MAP_FORCE_FROM, async, 3);
+	    bool copyfrom = (kind == GOMP_MAP_FORCE_FROM
+			     || kind == GOMP_MAP_FROM);
+	    gomp_acc_remove_pointer (hostaddrs[i], sizes[i], copyfrom, async,
+				     finalize, pointer);
 	    /* See the above comment.  */
-	    i += 2;
+	    i += pointer - 1;
 	  }
       }
 
@@ -447,6 +553,7 @@ GOACC_update (int device, size_t mapnum,
 
   acc_dev->openacc.async_set_async_func (async);
 
+  bool update_device = false;
   for (i = 0; i < mapnum; ++i)
     {
       unsigned char kind = kinds[i] & 0xff;
@@ -457,11 +564,46 @@ GOACC_update (int device, size_t mapnum,
 	case GOMP_MAP_TO_PSET:
 	  break;
 
+	case GOMP_MAP_ALWAYS_POINTER:
+	  if (update_device)
+	    {
+	      /* Save the contents of the host pointer.  */
+	      void *dptr = acc_deviceptr (hostaddrs[i-1]);
+	      uintptr_t t = *(uintptr_t *) hostaddrs[i];
+
+	      /* Update the contents of the host pointer to reflect
+		 the value of the allocated device memory in the
+		 previous pointer.  */
+	      *(uintptr_t *) hostaddrs[i] = (uintptr_t)dptr;
+	      acc_update_device (hostaddrs[i], sizeof (uintptr_t));
+
+	      /* Restore the host pointer.  */
+	      *(uintptr_t *) hostaddrs[i] = t;
+	      update_device = false;
+	    }
+	  break;
+
+	case GOMP_MAP_TO:
+	  if (!acc_is_present (hostaddrs[i], sizes[i]))
+	    {
+	      update_device = false;
+	      break;
+	    }
+	  /* Fallthru  */
 	case GOMP_MAP_FORCE_TO:
+	  update_device = true;
 	  acc_update_device (hostaddrs[i], sizes[i]);
 	  break;
 
+	case GOMP_MAP_FROM:
+	  if (!acc_is_present (hostaddrs[i], sizes[i]))
+	    {
+	      update_device = false;
+	      break;
+	    }
+	  /* Fallthru  */
 	case GOMP_MAP_FORCE_FROM:
+	  update_device = false;
 	  acc_update_self (hostaddrs[i], sizes[i]);
 	  break;
 
@@ -522,6 +664,7 @@ GOACC_declare (int device, size_t mapnum,
 	  case GOMP_MAP_FORCE_FROM:
 	  case GOMP_MAP_FORCE_TO:
 	  case GOMP_MAP_POINTER:
+	  case GOMP_MAP_RELEASE:
 	  case GOMP_MAP_DELETE:
 	    GOACC_enter_exit_data (device, 1, &hostaddrs[i], &sizes[i],
 				   &kinds[i], GOMP_ASYNC_SYNC, 0);
@@ -543,7 +686,6 @@ GOACC_declare (int device, size_t mapnum,
 	    break;
 
 	  case GOMP_MAP_FROM:
-	    kinds[i] = GOMP_MAP_FORCE_FROM;
 	    GOACC_enter_exit_data (device, 1, &hostaddrs[i], &sizes[i],
 				   &kinds[i], GOMP_ASYNC_SYNC, 0);
 	    break;
diff --git a/libgomp/openacc.f90 b/libgomp/openacc.f90
index d201d1dde6f..84a8700f072 100644
--- a/libgomp/openacc.f90
+++ b/libgomp/openacc.f90
@@ -222,6 +222,24 @@ module openacc_internal
       type (*), dimension (..), contiguous :: a
     end subroutine
 
+    subroutine acc_copyout_finalize_32_h (a, len)
+      use iso_c_binding, only: c_int32_t
+      !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
+      type (*), dimension (*) :: a
+      integer (c_int32_t) len
+    end subroutine
+
+    subroutine acc_copyout_finalize_64_h (a, len)
+      use iso_c_binding, only: c_int64_t
+      !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
+      type (*), dimension (*) :: a
+      integer (c_int64_t) len
+    end subroutine
+
+    subroutine acc_copyout_finalize_array_h (a)
+      type (*), dimension (..), contiguous :: a
+    end subroutine
+
     subroutine acc_delete_32_h (a, len)
       use iso_c_binding, only: c_int32_t
       !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
@@ -240,6 +258,24 @@ module openacc_internal
       type (*), dimension (..), contiguous :: a
     end subroutine
 
+    subroutine acc_delete_finalize_32_h (a, len)
+      use iso_c_binding, only: c_int32_t
+      !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
+      type (*), dimension (*) :: a
+      integer (c_int32_t) len
+    end subroutine
+
+    subroutine acc_delete_finalize_64_h (a, len)
+      use iso_c_binding, only: c_int64_t
+      !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
+      type (*), dimension (*) :: a
+      integer (c_int64_t) len
+    end subroutine
+
+    subroutine acc_delete_finalize_array_h (a)
+      type (*), dimension (..), contiguous :: a
+    end subroutine
+
     subroutine acc_update_device_32_h (a, len)
       use iso_c_binding, only: c_int32_t
       !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
@@ -426,6 +462,14 @@ module openacc_internal
       integer (c_size_t), value :: len
     end subroutine
 
+    subroutine acc_copyout_finalize_l (a, len) &
+        bind (C, name = "acc_copyout_finalize")
+      use iso_c_binding, only: c_size_t
+      !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
+      type (*), dimension (*) :: a
+      integer (c_size_t), value :: len
+    end subroutine
+
     subroutine acc_delete_l (a, len) &
         bind (C, name = "acc_delete")
       use iso_c_binding, only: c_size_t
@@ -434,6 +478,14 @@ module openacc_internal
       integer (c_size_t), value :: len
     end subroutine
 
+    subroutine acc_delete_finalize_l (a, len) &
+        bind (C, name = "acc_delete_finalize")
+      use iso_c_binding, only: c_size_t
+      !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
+      type (*), dimension (*) :: a
+      integer (c_size_t), value :: len
+    end subroutine
+
     subroutine acc_update_device_l (a, len) &
         bind (C, name = "acc_update_device")
       use iso_c_binding, only: c_size_t
@@ -598,12 +650,24 @@ module openacc
     procedure :: acc_copyout_array_h
   end interface
 
+  interface acc_copyout_finalize
+    procedure :: acc_copyout_finalize_32_h
+    procedure :: acc_copyout_finalize_64_h
+    procedure :: acc_copyout_finalize_array_h
+  end interface
+
   interface acc_delete
     procedure :: acc_delete_32_h
     procedure :: acc_delete_64_h
     procedure :: acc_delete_array_h
   end interface
 
+  interface acc_delete_finalize
+    procedure :: acc_delete_finalize_32_h
+    procedure :: acc_delete_finalize_64_h
+    procedure :: acc_delete_finalize_array_h
+  end interface
+
   interface acc_update_device
     procedure :: acc_update_device_32_h
     procedure :: acc_update_device_64_h
@@ -860,6 +924,30 @@ subroutine acc_copyout_array_h (a)
   call acc_copyout_l (a, sizeof (a))
 end subroutine
 
+subroutine acc_copyout_finalize_32_h (a, len)
+  use iso_c_binding, only: c_int32_t, c_size_t
+  use openacc_internal, only: acc_copyout_finalize_l
+  !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
+  type (*), dimension (*) :: a
+  integer (c_int32_t) len
+  call acc_copyout_finalize_l (a, int (len, kind = c_size_t))
+end subroutine
+
+subroutine acc_copyout_finalize_64_h (a, len)
+  use iso_c_binding, only: c_int64_t, c_size_t
+  use openacc_internal, only: acc_copyout_finalize_l
+  !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
+  type (*), dimension (*) :: a
+  integer (c_int64_t) len
+  call acc_copyout_finalize_l (a, int (len, kind = c_size_t))
+end subroutine
+
+subroutine acc_copyout_finalize_array_h (a)
+  use openacc_internal, only: acc_copyout_finalize_l
+  type (*), dimension (..), contiguous :: a
+  call acc_copyout_finalize_l (a, sizeof (a))
+end subroutine
+
 subroutine acc_delete_32_h (a, len)
   use iso_c_binding, only: c_int32_t, c_size_t
   use openacc_internal, only: acc_delete_l
@@ -884,6 +972,30 @@ subroutine acc_delete_array_h (a)
   call acc_delete_l (a, sizeof (a))
 end subroutine
 
+subroutine acc_delete_finalize_32_h (a, len)
+  use iso_c_binding, only: c_int32_t, c_size_t
+  use openacc_internal, only: acc_delete_finalize_l
+  !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
+  type (*), dimension (*) :: a
+  integer (c_int32_t) len
+  call acc_delete_finalize_l (a, int (len, kind = c_size_t))
+end subroutine
+
+subroutine acc_delete_finalize_64_h (a, len)
+  use iso_c_binding, only: c_int64_t, c_size_t
+  use openacc_internal, only: acc_delete_finalize_l
+  !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
+  type (*), dimension (*) :: a
+  integer (c_int64_t) len
+  call acc_delete_finalize_l (a, int (len, kind = c_size_t))
+end subroutine
+
+subroutine acc_delete_finalize_array_h (a)
+  use openacc_internal, only: acc_delete_finalize_l
+  type (*), dimension (..), contiguous :: a
+  call acc_delete_finalize_l (a, sizeof (a))
+end subroutine
+
 subroutine acc_update_device_32_h (a, len)
   use iso_c_binding, only: c_int32_t, c_size_t
   use openacc_internal, only: acc_update_device_l
diff --git a/libgomp/openacc.h b/libgomp/openacc.h
index b8572574f13..02a85a09ddb 100644
--- a/libgomp/openacc.h
+++ b/libgomp/openacc.h
@@ -109,6 +109,12 @@ int acc_is_present (void *, size_t) __GOACC_NOTHROW;
 void acc_memcpy_to_device (void *, void *, size_t) __GOACC_NOTHROW;
 void acc_memcpy_from_device (void *, void *, size_t) __GOACC_NOTHROW;
 
+/* Finalize versions of copyout/delete functions, specified in OpenACC 2.5.  */
+void acc_copyout_finalize (void *, size_t) __GOACC_NOTHROW;
+void acc_copyout_finalize_async (void *, size_t, int) __GOACC_NOTHROW;
+void acc_delete_finalize (void *, size_t) __GOACC_NOTHROW;
+void acc_delete_finalize_async (void *, size_t, int) __GOACC_NOTHROW;
+
 /* CUDA-specific routines.  */
 void *acc_get_current_cuda_device (void) __GOACC_NOTHROW;
 void *acc_get_current_cuda_context (void) __GOACC_NOTHROW;
diff --git a/libgomp/openacc_lib.h b/libgomp/openacc_lib.h
index 5cf743c2491..737c582041d 100644
--- a/libgomp/openacc_lib.h
+++ b/libgomp/openacc_lib.h
@@ -273,6 +273,26 @@
         end subroutine
       end interface
 
+      interface acc_copyout_finalize
+        subroutine acc_copyout_finalize_32_h (a, len)
+          use iso_c_binding, only: c_int32_t
+          !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
+          type (*), dimension (*) :: a
+          integer (c_int32_t) len
+        end subroutine
+
+        subroutine acc_copyout_finalize_64_h (a, len)
+          use iso_c_binding, only: c_int64_t
+          !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
+          type (*), dimension (*) :: a
+          integer (c_int64_t) len
+        end subroutine
+
+        subroutine acc_copyout_finalize_array_h (a)
+          type (*), dimension (..), contiguous :: a
+        end subroutine
+      end interface
+
       interface acc_delete
         subroutine acc_delete_32_h (a, len)
           use iso_c_binding, only: c_int32_t
@@ -293,6 +313,26 @@
         end subroutine
       end interface
 
+      interface acc_delete_finalize
+        subroutine acc_delete_finalize_32_h (a, len)
+          use iso_c_binding, only: c_int32_t
+          !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
+          type (*), dimension (*) :: a
+          integer (c_int32_t) len
+        end subroutine
+
+        subroutine acc_delete_finalize_64_h (a, len)
+          use iso_c_binding, only: c_int64_t
+          !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
+          type (*), dimension (*) :: a
+          integer (c_int64_t) len
+        end subroutine
+
+        subroutine acc_delete_finalize_array_h (a)
+          type (*), dimension (..), contiguous :: a
+        end subroutine
+      end interface
+
       interface acc_update_device
         subroutine acc_update_device_32_h (a, len)
           use iso_c_binding, only: c_int32_t
diff --git a/libgomp/target.c b/libgomp/target.c
index 509776d17a8..dda041cdbef 100644
--- a/libgomp/target.c
+++ b/libgomp/target.c
@@ -859,6 +859,7 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum,
 		tgt->list[i].offset = 0;
 		tgt->list[i].length = k->host_end - k->host_start;
 		k->refcount = 1;
+		k->dynamic_refcount = 0;
 		tgt->refcount++;
 		array->left = NULL;
 		array->right = NULL;
@@ -1011,6 +1012,23 @@ gomp_unmap_tgt (struct target_mem_desc *tgt)
   free (tgt);
 }
 
+attribute_hidden bool
+gomp_remove_var (struct gomp_device_descr *devicep, splay_tree_key k)
+{
+  bool is_tgt_unmapped = false;
+  splay_tree_remove (&devicep->mem_map, k);
+  if (k->link_key)
+    splay_tree_insert (&devicep->mem_map, (splay_tree_node) k->link_key);
+  if (k->tgt->refcount > 1)
+    k->tgt->refcount--;
+  else
+    {
+      is_tgt_unmapped = true;
+      gomp_unmap_tgt (k->tgt);
+    }
+  return is_tgt_unmapped;
+}
+
 /* Unmap variables described by TGT.  If DO_COPYFROM is true, copy relevant
    variables back from device to host: if it is false, it is assumed that this
    has been done already.  */
@@ -1059,16 +1077,7 @@ gomp_unmap_vars (struct target_mem_desc *tgt, bool do_copyfrom)
 				      + tgt->list[i].offset),
 			    tgt->list[i].length);
       if (do_unmap)
-	{
-	  splay_tree_remove (&devicep->mem_map, k);
-	  if (k->link_key)
-	    splay_tree_insert (&devicep->mem_map,
-			       (splay_tree_node) k->link_key);
-	  if (k->tgt->refcount > 1)
-	    k->tgt->refcount--;
-	  else
-	    gomp_unmap_tgt (k->tgt);
-	}
+	gomp_remove_var (devicep, k);
     }
 
   if (tgt->refcount > 1)
@@ -1298,17 +1307,7 @@ gomp_unload_image_from_device (struct gomp_device_descr *devicep,
       else
 	{
 	  splay_tree_key n = splay_tree_lookup (&devicep->mem_map, &k);
-	  splay_tree_remove (&devicep->mem_map, n);
-	  if (n->link_key)
-	    {
-	      if (n->tgt->refcount > 1)
-		n->tgt->refcount--;
-	      else
-		{
-		  is_tgt_unmapped = true;
-		  gomp_unmap_tgt (n->tgt);
-		}
-	    }
+	  is_tgt_unmapped = gomp_remove_var (devicep, n);
 	}
     }
 
-- 
2.17.1


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]