This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
Re: [OpenACC] Update OpenACC data clause semantics to the 2.5 behavior - runtime
- From: Cesar Philippidis <cesar at codesourcery dot com>
- To: "gcc-patches at gcc dot gnu dot org" <gcc-patches at gcc dot gnu dot org>, Jakub Jelinek <jakub at redhat dot com>
- Date: Tue, 19 Jun 2018 10:01:20 -0700
- Subject: Re: [OpenACC] Update OpenACC data clause semantics to the 2.5 behavior - runtime
- References: <7fa7637f-e7f5-d43d-13f1-706c77e8e957@codesourcery.com> <836f376f-513d-dd29-9133-6526bfb59866@codesourcery.com>
This patch implements the OpenACC 2.5 data clause semantics in libgomp.
Is it OK for trunk?
Cesar
2018-06-19 Chung-Lin Tang <cltang@codesourcery.com>
Thomas Schwinge <thomas@codesourcery.com>
Cesar Philippidis <cesar@codesourcery.com>
libgomp/
* libgomp.h (struct splay_tree_key_s): Add dynamic_refcount member.
(gomp_acc_remove_pointer): Update declaration.
(gomp_acc_declare_allocate): Declare.
(gomp_remove_var): Declare.
* libgomp.map (OACC_2.5): Define.
* oacc-mem.c (acc_map_data): Update refcount.
(acc_unmap_data): Likewise.
(present_create_copy): Likewise.
(acc_create): Add FLAG_PRESENT when calling present_create_copy.
(acc_copyin): Likewise.
(FLAG_FINALIZE): Define.
(delete_copyout): Update dynamic refcounts, add support for FINALIZE.
(acc_delete_finalize): New function.
(acc_delete_finalize_async): New function.
(acc_copyout_finalize): New function.
(acc_copyout_finalize_async): New function.
(gomp_acc_insert_pointer): Update refcounts.
(gomp_acc_remove_pointer): Return if data is not present on the
accelerator.
* oacc-parallel.c (find_pset): Rename to find_pointer.
(find_pointer): Add support for GOMP_MAP_POINTER.
(handle_ftn_pointers): New function.
(GOACC_parallel_keyed): Update refcounts of variables.
(GOACC_enter_exit_data): Add support for finalized data mappings.
Add support for GOMP_MAP_{TO,ALLOC,RELESE,FROM}. Update handling
of fortran arrays.
(GOACC_update): Add support for GOMP_MAP_{ALWAYS_POINTER,TO,FROM}.
(GOACC_declare): Add support for GOMP_MAP_RELEASE, remove support
for GOMP_MAP_FORCE_FROM.
* openacc.f90 (module openacc_internal): Add
acc_copyout_finalize_{32_h,64_h,array_h,_l}, and
acc_delete_finalize_{32_h,64_h,array_h,_l}. Add interfaces for
acc_copyout_finalize and acc_delete_finalize.
(acc_copyout_finalize_32_h): New subroutine.
(acc_copyout_finalize_64_h): New subroutine.
(acc_copyout_finalize_array_h): New subroutine.
(acc_delete_finalize_32_h): New subroutine.
(acc_delete_finalize_64_h): New subroutine.
(acc_delete_finalize_array_h): New subroutine.
* openacc.h (acc_copyout_finalize): Declare.
(acc_copyout_finalize_async): Declare.
(acc_delete_finalize): Declare.
(acc_delete_finalize_async): Declare.
* openacc_lib.h (acc_copyout_finalize): New interface.
(acc_delete_finalize): New interface.
* target.c (gomp_map_vars): Update dynamic_refcount.
(gomp_remove_var): New function.
(gomp_unmap_vars): Use it.
(gomp_unload_image_from_device): Likewise.
>From 53ee03231c5e6e4747b4ef01335079a2d4a98480 Mon Sep 17 00:00:00 2001
From: Cesar Philippidis <cesar@codesourcery.com>
Date: Tue, 19 Jun 2018 09:33:04 -0700
Subject: [PATCH 7/7] runtime changes
---
libgomp/libgomp.h | 7 +-
libgomp/libgomp.map | 12 +++
libgomp/oacc-mem.c | 196 ++++++++++++++++++++++++++++++++-------
libgomp/oacc-parallel.c | 198 ++++++++++++++++++++++++++++++++++------
libgomp/openacc.f90 | 112 +++++++++++++++++++++++
libgomp/openacc.h | 6 ++
libgomp/openacc_lib.h | 40 ++++++++
libgomp/target.c | 41 ++++-----
8 files changed, 528 insertions(+), 84 deletions(-)
diff --git a/libgomp/libgomp.h b/libgomp/libgomp.h
index 10ea8940c96..3a8cc2bd7d6 100644
--- a/libgomp/libgomp.h
+++ b/libgomp/libgomp.h
@@ -853,6 +853,8 @@ struct splay_tree_key_s {
uintptr_t tgt_offset;
/* Reference count. */
uintptr_t refcount;
+ /* Dynamic reference count. */
+ uintptr_t dynamic_refcount;
/* Pointer to the original mapping of "omp declare target link" object. */
splay_tree_key link_key;
};
@@ -991,7 +993,9 @@ enum gomp_map_vars_kind
};
extern void gomp_acc_insert_pointer (size_t, void **, size_t *, void *);
-extern void gomp_acc_remove_pointer (void *, bool, int, int);
+extern void gomp_acc_remove_pointer (void *, size_t, bool, int, int, int);
+extern void gomp_acc_declare_allocate (bool, size_t, void **, size_t *,
+ unsigned short *);
extern struct target_mem_desc *gomp_map_vars (struct gomp_device_descr *,
size_t, void **, void **,
@@ -1001,6 +1005,7 @@ extern void gomp_unmap_vars (struct target_mem_desc *, bool);
extern void gomp_init_device (struct gomp_device_descr *);
extern void gomp_free_memmap (struct splay_tree_s *);
extern void gomp_unload_device (struct gomp_device_descr *);
+extern bool gomp_remove_var (struct gomp_device_descr *, splay_tree_key);
/* work.c */
diff --git a/libgomp/libgomp.map b/libgomp/libgomp.map
index 8752348fbf2..2cd3bf524bc 100644
--- a/libgomp/libgomp.map
+++ b/libgomp/libgomp.map
@@ -386,6 +386,18 @@ OACC_2.0.1 {
acc_pcreate;
} OACC_2.0;
+OACC_2.5 {
+ global:
+ acc_copyout_finalize;
+ acc_copyout_finalize_32_h_;
+ acc_copyout_finalize_64_h_;
+ acc_copyout_finalize_array_h_;
+ acc_delete_finalize;
+ acc_delete_finalize_32_h_;
+ acc_delete_finalize_64_h_;
+ acc_delete_finalize_array_h_;
+} OACC_2.0.1;
+
GOACC_2.0 {
global:
GOACC_data_end;
diff --git a/libgomp/oacc-mem.c b/libgomp/oacc-mem.c
index 158f0862018..3787ce49e38 100644
--- a/libgomp/oacc-mem.c
+++ b/libgomp/oacc-mem.c
@@ -347,6 +347,7 @@ acc_map_data (void *h, void *d, size_t s)
tgt = gomp_map_vars (acc_dev, mapnum, &hostaddrs, &devaddrs, &sizes,
&kinds, true, GOMP_MAP_VARS_OPENACC);
+ tgt->list[0].key->refcount = REFCOUNT_INFINITY;
}
gomp_mutex_lock (&acc_dev->lock);
@@ -389,6 +390,9 @@ acc_unmap_data (void *h)
(void *) n->host_start, (int) host_size, (void *) h);
}
+ /* Mark for removal. */
+ n->refcount = 1;
+
t = n->tgt;
if (t->refcount == 2)
@@ -460,6 +464,11 @@ present_create_copy (unsigned f, void *h, size_t s)
gomp_fatal ("[%p,+%d] not mapped", (void *)h, (int)s);
}
+ if (n->refcount != REFCOUNT_INFINITY)
+ {
+ n->refcount++;
+ n->dynamic_refcount++;
+ }
gomp_mutex_unlock (&acc_dev->lock);
}
else if (!(f & FLAG_CREATE))
@@ -483,6 +492,8 @@ present_create_copy (unsigned f, void *h, size_t s)
tgt = gomp_map_vars (acc_dev, mapnum, &hostaddrs, NULL, &s, &kinds, true,
GOMP_MAP_VARS_OPENACC);
+ /* Initialize dynamic refcount. */
+ tgt->list[0].key->dynamic_refcount = 1;
gomp_mutex_lock (&acc_dev->lock);
@@ -499,13 +510,13 @@ present_create_copy (unsigned f, void *h, size_t s)
void *
acc_create (void *h, size_t s)
{
- return present_create_copy (FLAG_CREATE, h, s);
+ return present_create_copy (FLAG_PRESENT | FLAG_CREATE, h, s);
}
void *
acc_copyin (void *h, size_t s)
{
- return present_create_copy (FLAG_CREATE | FLAG_COPY, h, s);
+ return present_create_copy (FLAG_PRESENT | FLAG_CREATE | FLAG_COPY, h, s);
}
void *
@@ -542,7 +553,8 @@ acc_pcopyin (void *h, size_t s)
}
#endif
-#define FLAG_COPYOUT (1 << 0)
+#define FLAG_COPYOUT (1 << 0)
+#define FLAG_FINALIZE (1 << 1)
static void
delete_copyout (unsigned f, void *h, size_t s, const char *libfnname)
@@ -581,15 +593,52 @@ delete_copyout (unsigned f, void *h, size_t s, const char *libfnname)
(void *) n->host_start, (int) host_size, (void *) h, (int) s);
}
- gomp_mutex_unlock (&acc_dev->lock);
+ if (n->refcount == REFCOUNT_INFINITY)
+ {
+ n->refcount = 0;
+ n->dynamic_refcount = 0;
+ }
+ if (n->refcount < n->dynamic_refcount)
+ {
+ gomp_mutex_unlock (&acc_dev->lock);
+ gomp_fatal ("Dynamic reference counting assert fail\n");
+ }
- if (f & FLAG_COPYOUT)
- acc_dev->dev2host_func (acc_dev->target_id, h, d, s);
+ if (f & FLAG_FINALIZE)
+ {
+ n->refcount -= n->dynamic_refcount;
+ n->dynamic_refcount = 0;
+ }
+ else if (n->dynamic_refcount)
+ {
+ n->dynamic_refcount--;
+ n->refcount--;
+ }
- acc_unmap_data (h);
+ if (n->refcount == 0)
+ {
+ if (n->tgt->refcount == 2)
+ {
+ struct target_mem_desc *tp, *t;
+ for (tp = NULL, t = acc_dev->openacc.data_environ; t != NULL;
+ tp = t, t = t->prev)
+ if (n->tgt == t)
+ {
+ if (tp)
+ tp->prev = t->prev;
+ else
+ acc_dev->openacc.data_environ = t->prev;
+ break;
+ }
+ }
- if (!acc_dev->free_func (acc_dev->target_id, d))
- gomp_fatal ("error in freeing device memory in %s", libfnname);
+ if (f & FLAG_COPYOUT)
+ acc_dev->dev2host_func (acc_dev->target_id, h, d, s);
+
+ gomp_remove_var (acc_dev, n);
+ }
+
+ gomp_mutex_unlock (&acc_dev->lock);
}
void
@@ -598,12 +647,36 @@ acc_delete (void *h , size_t s)
delete_copyout (0, h, s, __FUNCTION__);
}
+void
+acc_delete_finalize (void *h , size_t s)
+{
+ delete_copyout (FLAG_FINALIZE, h, s, __FUNCTION__);
+}
+
+void
+acc_delete_finalize_async (void *h , size_t s, int async)
+{
+ delete_copyout (FLAG_FINALIZE, h, s, __FUNCTION__);
+}
+
void
acc_copyout (void *h, size_t s)
{
delete_copyout (FLAG_COPYOUT, h, s, __FUNCTION__);
}
+void
+acc_copyout_finalize (void *h, size_t s)
+{
+ delete_copyout (FLAG_COPYOUT | FLAG_FINALIZE, h, s, __FUNCTION__);
+}
+
+void
+acc_copyout_finalize_async (void *h, size_t s, int async)
+{
+ delete_copyout (FLAG_COPYOUT | FLAG_FINALIZE, h, s, __FUNCTION__);
+}
+
static void
update_dev_host (int is_dev, void *h, size_t s)
{
@@ -659,11 +732,37 @@ gomp_acc_insert_pointer (size_t mapnum, void **hostaddrs, size_t *sizes,
struct goacc_thread *thr = goacc_thread ();
struct gomp_device_descr *acc_dev = thr->dev;
+ if (acc_is_present (*hostaddrs, *sizes))
+ {
+ splay_tree_key n;
+ gomp_mutex_lock (&acc_dev->lock);
+ n = lookup_host (acc_dev, *hostaddrs, *sizes);
+ gomp_mutex_unlock (&acc_dev->lock);
+
+ tgt = n->tgt;
+ for (size_t i = 0; i < tgt->list_count; i++)
+ if (tgt->list[i].key == n)
+ {
+ for (size_t j = 0; j < mapnum; j++)
+ if (i + j < tgt->list_count && tgt->list[i + j].key)
+ {
+ tgt->list[i + j].key->refcount++;
+ tgt->list[i + j].key->dynamic_refcount++;
+ }
+ return;
+ }
+ /* Should not reach here. */
+ gomp_fatal ("Dynamic refcount incrementing failed for pointer/pset");
+ }
+
gomp_debug (0, " %s: prepare mappings\n", __FUNCTION__);
tgt = gomp_map_vars (acc_dev, mapnum, hostaddrs,
NULL, sizes, kinds, true, GOMP_MAP_VARS_OPENACC);
gomp_debug (0, " %s: mappings prepared\n", __FUNCTION__);
+ /* Initialize dynamic refcount. */
+ tgt->list[0].key->dynamic_refcount = 1;
+
gomp_mutex_lock (&acc_dev->lock);
tgt->prev = acc_dev->openacc.data_environ;
acc_dev->openacc.data_environ = tgt;
@@ -671,7 +770,8 @@ gomp_acc_insert_pointer (size_t mapnum, void **hostaddrs, size_t *sizes,
}
void
-gomp_acc_remove_pointer (void *h, bool force_copyfrom, int async, int mapnum)
+gomp_acc_remove_pointer (void *h, size_t s, bool force_copyfrom, int async,
+ int finalize, int mapnum)
{
struct goacc_thread *thr = goacc_thread ();
struct gomp_device_descr *acc_dev = thr->dev;
@@ -679,6 +779,9 @@ gomp_acc_remove_pointer (void *h, bool force_copyfrom, int async, int mapnum)
struct target_mem_desc *t;
int minrefs = (mapnum == 1) ? 2 : 3;
+ if (!acc_is_present (h, s))
+ return;
+
gomp_mutex_lock (&acc_dev->lock);
n = lookup_host (acc_dev, h, 1);
@@ -693,40 +796,65 @@ gomp_acc_remove_pointer (void *h, bool force_copyfrom, int async, int mapnum)
t = n->tgt;
- struct target_mem_desc *tp;
+ if (n->refcount < n->dynamic_refcount)
+ {
+ gomp_mutex_unlock (&acc_dev->lock);
+ gomp_fatal ("Dynamic reference counting assert fail\n");
+ }
- if (t->refcount == minrefs)
+ if (finalize)
{
- /* This is the last reference, so pull the descriptor off the
- chain. This avoids gomp_unmap_vars via gomp_unmap_tgt from
- freeing the device memory. */
- t->tgt_end = 0;
- t->to_free = 0;
+ n->refcount -= n->dynamic_refcount;
+ n->dynamic_refcount = 0;
+ }
+ else if (n->dynamic_refcount)
+ {
+ n->dynamic_refcount--;
+ n->refcount--;
+ }
- for (tp = NULL, t = acc_dev->openacc.data_environ; t != NULL;
- tp = t, t = t->prev)
+ gomp_mutex_unlock (&acc_dev->lock);
+
+ if (n->refcount == 0)
+ {
+ if (t->refcount == minrefs)
{
- if (n->tgt == t)
+ /* This is the last reference, so pull the descriptor off the
+ chain. This prevents gomp_unmap_vars via gomp_unmap_tgt from
+ freeing the device memory. */
+ struct target_mem_desc *tp;
+ for (tp = NULL, t = acc_dev->openacc.data_environ; t != NULL;
+ tp = t, t = t->prev)
{
- if (tp)
- tp->prev = t->prev;
- else
- acc_dev->openacc.data_environ = t->prev;
- break;
+ if (n->tgt == t)
+ {
+ if (tp)
+ tp->prev = t->prev;
+ else
+ acc_dev->openacc.data_environ = t->prev;
+ break;
+ }
}
}
- }
- if (force_copyfrom)
- t->list[0].copy_from = 1;
+ /* Set refcount to 1 to allow gomp_unmap_vars to unmap it. */
+ n->refcount = 1;
+ t->refcount = minrefs;
+ for (size_t i = 0; i < t->list_count; i++)
+ if (t->list[i].key == n)
+ {
+ t->list[i].copy_from = force_copyfrom ? 1 : 0;
+ break;
+ }
- gomp_mutex_unlock (&acc_dev->lock);
+ /* If running synchronously, unmap immediately. */
+ if (async < acc_async_noval)
+ gomp_unmap_vars (t, true);
+ else
+ t->device_descr->openacc.register_async_cleanup_func (t, async);
+ }
- /* If running synchronously, unmap immediately. */
- if (async_synchronous_p (async))
- gomp_unmap_vars (t, true);
- else
- t->device_descr->openacc.register_async_cleanup_func (t, async);
+ gomp_mutex_unlock (&acc_dev->lock);
gomp_debug (0, " %s: mappings restored\n", __FUNCTION__);
}
diff --git a/libgomp/oacc-parallel.c b/libgomp/oacc-parallel.c
index 9eae43131f8..b80ace58590 100644
--- a/libgomp/oacc-parallel.c
+++ b/libgomp/oacc-parallel.c
@@ -38,15 +38,68 @@
#include <stdarg.h>
#include <assert.h>
+/* Returns the number of mappings associated with the pointer or pset. PSET
+ have three mappings, whereas pointer have two. */
+
static int
-find_pset (int pos, size_t mapnum, unsigned short *kinds)
+find_pointer (int pos, size_t mapnum, unsigned short *kinds)
{
if (pos + 1 >= mapnum)
return 0;
unsigned char kind = kinds[pos+1] & 0xff;
- return kind == GOMP_MAP_TO_PSET;
+ if (kind == GOMP_MAP_TO_PSET)
+ return 3;
+ else if (kind == GOMP_MAP_POINTER)
+ return 2;
+
+ return 0;
+}
+
+/* Handle the mapping pair that are presented when a
+ deviceptr clause is used with Fortran. */
+
+static void
+handle_ftn_pointers (size_t mapnum, void **hostaddrs, size_t *sizes,
+ unsigned short *kinds)
+{
+ int i;
+
+ for (i = 0; i < mapnum; i++)
+ {
+ unsigned short kind1 = kinds[i] & 0xff;
+
+ /* Handle Fortran deviceptr clause. */
+ if (kind1 == GOMP_MAP_FORCE_DEVICEPTR)
+ {
+ unsigned short kind2;
+
+ if (i < (signed)mapnum - 1)
+ kind2 = kinds[i + 1] & 0xff;
+ else
+ kind2 = 0xffff;
+
+ if (sizes[i] == sizeof (void *))
+ continue;
+
+ /* At this point, we're dealing with a Fortran deviceptr.
+ If the next element is not what we're expecting, then
+ this is an instance of where the deviceptr variable was
+ not used within the region and the pointer was removed
+ by the gimplifier. */
+ if (kind2 == GOMP_MAP_POINTER
+ && sizes[i + 1] == 0
+ && hostaddrs[i] == *(void **)hostaddrs[i + 1])
+ {
+ kinds[i+1] = kinds[i];
+ sizes[i+1] = sizeof (void *);
+ }
+
+ /* Invalidate the entry. */
+ hostaddrs[i] = NULL;
+ }
+ }
}
static void goacc_wait (int async, int num_waits, va_list *ap);
@@ -88,6 +141,8 @@ GOACC_parallel_keyed (int device, void (*fn) (void *),
thr = goacc_thread ();
acc_dev = thr->dev;
+ handle_ftn_pointers (mapnum, hostaddrs, sizes, kinds);
+
/* Host fallback if "if" clause is false or if the current device is set to
the host. */
if (host_fallback)
@@ -183,10 +238,29 @@ GOACC_parallel_keyed (int device, void (*fn) (void *),
async, dims, tgt);
/* If running synchronously, unmap immediately. */
+ bool copyfrom = true;
if (async_synchronous_p (async))
gomp_unmap_vars (tgt, true);
else
- tgt->device_descr->openacc.register_async_cleanup_func (tgt, async);
+ {
+ bool async_unmap = false;
+ for (size_t i = 0; i < tgt->list_count; i++)
+ {
+ splay_tree_key k = tgt->list[i].key;
+ if (k && k->refcount == 1)
+ {
+ async_unmap = true;
+ break;
+ }
+ }
+ if (async_unmap)
+ tgt->device_descr->openacc.register_async_cleanup_func (tgt, async);
+ else
+ {
+ copyfrom = false;
+ gomp_unmap_vars (tgt, copyfrom);
+ }
+ }
acc_dev->openacc.async_set_async_func (acc_async_sync);
}
@@ -286,6 +360,17 @@ GOACC_enter_exit_data (int device, size_t mapnum,
va_end (ap);
}
+ /* Determine whether "finalize" semantics apply to all mappings of this
+ OpenACC directive. */
+ bool finalize = false;
+ if (mapnum > 0)
+ {
+ unsigned char kind = kinds[0] & 0xff;
+ if (kind == GOMP_MAP_DELETE
+ || kind == GOMP_MAP_FORCE_FROM)
+ finalize = true;
+ }
+
acc_dev->openacc.async_set_async_func (async);
/* Determine if this is an "acc enter data". */
@@ -298,13 +383,17 @@ GOACC_enter_exit_data (int device, size_t mapnum,
if (kind == GOMP_MAP_FORCE_ALLOC
|| kind == GOMP_MAP_FORCE_PRESENT
- || kind == GOMP_MAP_FORCE_TO)
+ || kind == GOMP_MAP_FORCE_TO
+ || kind == GOMP_MAP_TO
+ || kind == GOMP_MAP_ALLOC)
{
data_enter = true;
break;
}
- if (kind == GOMP_MAP_DELETE
+ if (kind == GOMP_MAP_RELEASE
+ || kind == GOMP_MAP_DELETE
+ || kind == GOMP_MAP_FROM
|| kind == GOMP_MAP_FORCE_FROM)
break;
@@ -312,31 +401,39 @@ GOACC_enter_exit_data (int device, size_t mapnum,
kind);
}
+ /* In c, non-pointers and arrays are represented by a single data clause.
+ Dynamically allocated arrays and subarrays are represented by a data
+ clause followed by an internal GOMP_MAP_POINTER.
+
+ In fortran, scalars and not allocated arrays are represented by a
+ single data clause. Allocated arrays and subarrays have three mappings:
+ 1) the original data clause, 2) a PSET 3) a pointer to the array data.
+ */
+
if (data_enter)
{
for (i = 0; i < mapnum; i++)
{
unsigned char kind = kinds[i] & 0xff;
- /* Scan for PSETs. */
- int psets = find_pset (i, mapnum, kinds);
+ /* Scan for pointers and PSETs. */
+ int pointer = find_pointer (i, mapnum, kinds);
- if (!psets)
+ if (!pointer)
{
switch (kind)
{
- case GOMP_MAP_POINTER:
- gomp_acc_insert_pointer (1, &hostaddrs[i], &sizes[i],
- &kinds[i]);
+ case GOMP_MAP_ALLOC:
+ acc_present_or_create (hostaddrs[i], sizes[i]);
break;
case GOMP_MAP_FORCE_ALLOC:
acc_create (hostaddrs[i], sizes[i]);
break;
- case GOMP_MAP_FORCE_PRESENT:
+ case GOMP_MAP_TO:
acc_present_or_copyin (hostaddrs[i], sizes[i]);
break;
case GOMP_MAP_FORCE_TO:
- acc_present_or_copyin (hostaddrs[i], sizes[i]);
+ acc_copyin (hostaddrs[i], sizes[i]);
break;
default:
gomp_fatal (">>>> GOACC_enter_exit_data UNHANDLED kind 0x%.2x",
@@ -346,12 +443,13 @@ GOACC_enter_exit_data (int device, size_t mapnum,
}
else
{
- gomp_acc_insert_pointer (3, &hostaddrs[i], &sizes[i], &kinds[i]);
+ gomp_acc_insert_pointer (pointer, &hostaddrs[i],
+ &sizes[i], &kinds[i]);
/* Increment 'i' by two because OpenACC requires fortran
arrays to be contiguous, so each PSET is associated with
one of MAP_FORCE_ALLOC/MAP_FORCE_PRESET/MAP_FORCE_TO, and
one MAP_POINTER. */
- i += 2;
+ i += pointer - 1;
}
}
}
@@ -360,22 +458,28 @@ GOACC_enter_exit_data (int device, size_t mapnum,
{
unsigned char kind = kinds[i] & 0xff;
- int psets = find_pset (i, mapnum, kinds);
+ int pointer = find_pointer (i, mapnum, kinds);
- if (!psets)
+ if (!pointer)
{
switch (kind)
{
- case GOMP_MAP_POINTER:
- gomp_acc_remove_pointer (hostaddrs[i], (kinds[i] & 0xff)
- == GOMP_MAP_FORCE_FROM,
- async, 1);
- break;
+ case GOMP_MAP_RELEASE:
case GOMP_MAP_DELETE:
- acc_delete (hostaddrs[i], sizes[i]);
+ if (acc_is_present (hostaddrs[i], sizes[i]))
+ {
+ if (finalize)
+ acc_delete_finalize (hostaddrs[i], sizes[i]);
+ else
+ acc_delete (hostaddrs[i], sizes[i]);
+ }
break;
+ case GOMP_MAP_FROM:
case GOMP_MAP_FORCE_FROM:
- acc_copyout (hostaddrs[i], sizes[i]);
+ if (finalize)
+ acc_copyout_finalize (hostaddrs[i], sizes[i]);
+ else
+ acc_copyout (hostaddrs[i], sizes[i]);
break;
default:
gomp_fatal (">>>> GOACC_enter_exit_data UNHANDLED kind 0x%.2x",
@@ -385,10 +489,12 @@ GOACC_enter_exit_data (int device, size_t mapnum,
}
else
{
- gomp_acc_remove_pointer (hostaddrs[i], (kinds[i] & 0xff)
- == GOMP_MAP_FORCE_FROM, async, 3);
+ bool copyfrom = (kind == GOMP_MAP_FORCE_FROM
+ || kind == GOMP_MAP_FROM);
+ gomp_acc_remove_pointer (hostaddrs[i], sizes[i], copyfrom, async,
+ finalize, pointer);
/* See the above comment. */
- i += 2;
+ i += pointer - 1;
}
}
@@ -447,6 +553,7 @@ GOACC_update (int device, size_t mapnum,
acc_dev->openacc.async_set_async_func (async);
+ bool update_device = false;
for (i = 0; i < mapnum; ++i)
{
unsigned char kind = kinds[i] & 0xff;
@@ -457,11 +564,46 @@ GOACC_update (int device, size_t mapnum,
case GOMP_MAP_TO_PSET:
break;
+ case GOMP_MAP_ALWAYS_POINTER:
+ if (update_device)
+ {
+ /* Save the contents of the host pointer. */
+ void *dptr = acc_deviceptr (hostaddrs[i-1]);
+ uintptr_t t = *(uintptr_t *) hostaddrs[i];
+
+ /* Update the contents of the host pointer to reflect
+ the value of the allocated device memory in the
+ previous pointer. */
+ *(uintptr_t *) hostaddrs[i] = (uintptr_t)dptr;
+ acc_update_device (hostaddrs[i], sizeof (uintptr_t));
+
+ /* Restore the host pointer. */
+ *(uintptr_t *) hostaddrs[i] = t;
+ update_device = false;
+ }
+ break;
+
+ case GOMP_MAP_TO:
+ if (!acc_is_present (hostaddrs[i], sizes[i]))
+ {
+ update_device = false;
+ break;
+ }
+ /* Fallthru */
case GOMP_MAP_FORCE_TO:
+ update_device = true;
acc_update_device (hostaddrs[i], sizes[i]);
break;
+ case GOMP_MAP_FROM:
+ if (!acc_is_present (hostaddrs[i], sizes[i]))
+ {
+ update_device = false;
+ break;
+ }
+ /* Fallthru */
case GOMP_MAP_FORCE_FROM:
+ update_device = false;
acc_update_self (hostaddrs[i], sizes[i]);
break;
@@ -522,6 +664,7 @@ GOACC_declare (int device, size_t mapnum,
case GOMP_MAP_FORCE_FROM:
case GOMP_MAP_FORCE_TO:
case GOMP_MAP_POINTER:
+ case GOMP_MAP_RELEASE:
case GOMP_MAP_DELETE:
GOACC_enter_exit_data (device, 1, &hostaddrs[i], &sizes[i],
&kinds[i], GOMP_ASYNC_SYNC, 0);
@@ -543,7 +686,6 @@ GOACC_declare (int device, size_t mapnum,
break;
case GOMP_MAP_FROM:
- kinds[i] = GOMP_MAP_FORCE_FROM;
GOACC_enter_exit_data (device, 1, &hostaddrs[i], &sizes[i],
&kinds[i], GOMP_ASYNC_SYNC, 0);
break;
diff --git a/libgomp/openacc.f90 b/libgomp/openacc.f90
index d201d1dde6f..84a8700f072 100644
--- a/libgomp/openacc.f90
+++ b/libgomp/openacc.f90
@@ -222,6 +222,24 @@ module openacc_internal
type (*), dimension (..), contiguous :: a
end subroutine
+ subroutine acc_copyout_finalize_32_h (a, len)
+ use iso_c_binding, only: c_int32_t
+ !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
+ type (*), dimension (*) :: a
+ integer (c_int32_t) len
+ end subroutine
+
+ subroutine acc_copyout_finalize_64_h (a, len)
+ use iso_c_binding, only: c_int64_t
+ !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
+ type (*), dimension (*) :: a
+ integer (c_int64_t) len
+ end subroutine
+
+ subroutine acc_copyout_finalize_array_h (a)
+ type (*), dimension (..), contiguous :: a
+ end subroutine
+
subroutine acc_delete_32_h (a, len)
use iso_c_binding, only: c_int32_t
!GCC$ ATTRIBUTES NO_ARG_CHECK :: a
@@ -240,6 +258,24 @@ module openacc_internal
type (*), dimension (..), contiguous :: a
end subroutine
+ subroutine acc_delete_finalize_32_h (a, len)
+ use iso_c_binding, only: c_int32_t
+ !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
+ type (*), dimension (*) :: a
+ integer (c_int32_t) len
+ end subroutine
+
+ subroutine acc_delete_finalize_64_h (a, len)
+ use iso_c_binding, only: c_int64_t
+ !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
+ type (*), dimension (*) :: a
+ integer (c_int64_t) len
+ end subroutine
+
+ subroutine acc_delete_finalize_array_h (a)
+ type (*), dimension (..), contiguous :: a
+ end subroutine
+
subroutine acc_update_device_32_h (a, len)
use iso_c_binding, only: c_int32_t
!GCC$ ATTRIBUTES NO_ARG_CHECK :: a
@@ -426,6 +462,14 @@ module openacc_internal
integer (c_size_t), value :: len
end subroutine
+ subroutine acc_copyout_finalize_l (a, len) &
+ bind (C, name = "acc_copyout_finalize")
+ use iso_c_binding, only: c_size_t
+ !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
+ type (*), dimension (*) :: a
+ integer (c_size_t), value :: len
+ end subroutine
+
subroutine acc_delete_l (a, len) &
bind (C, name = "acc_delete")
use iso_c_binding, only: c_size_t
@@ -434,6 +478,14 @@ module openacc_internal
integer (c_size_t), value :: len
end subroutine
+ subroutine acc_delete_finalize_l (a, len) &
+ bind (C, name = "acc_delete_finalize")
+ use iso_c_binding, only: c_size_t
+ !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
+ type (*), dimension (*) :: a
+ integer (c_size_t), value :: len
+ end subroutine
+
subroutine acc_update_device_l (a, len) &
bind (C, name = "acc_update_device")
use iso_c_binding, only: c_size_t
@@ -598,12 +650,24 @@ module openacc
procedure :: acc_copyout_array_h
end interface
+ interface acc_copyout_finalize
+ procedure :: acc_copyout_finalize_32_h
+ procedure :: acc_copyout_finalize_64_h
+ procedure :: acc_copyout_finalize_array_h
+ end interface
+
interface acc_delete
procedure :: acc_delete_32_h
procedure :: acc_delete_64_h
procedure :: acc_delete_array_h
end interface
+ interface acc_delete_finalize
+ procedure :: acc_delete_finalize_32_h
+ procedure :: acc_delete_finalize_64_h
+ procedure :: acc_delete_finalize_array_h
+ end interface
+
interface acc_update_device
procedure :: acc_update_device_32_h
procedure :: acc_update_device_64_h
@@ -860,6 +924,30 @@ subroutine acc_copyout_array_h (a)
call acc_copyout_l (a, sizeof (a))
end subroutine
+subroutine acc_copyout_finalize_32_h (a, len)
+ use iso_c_binding, only: c_int32_t, c_size_t
+ use openacc_internal, only: acc_copyout_finalize_l
+ !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
+ type (*), dimension (*) :: a
+ integer (c_int32_t) len
+ call acc_copyout_finalize_l (a, int (len, kind = c_size_t))
+end subroutine
+
+subroutine acc_copyout_finalize_64_h (a, len)
+ use iso_c_binding, only: c_int64_t, c_size_t
+ use openacc_internal, only: acc_copyout_finalize_l
+ !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
+ type (*), dimension (*) :: a
+ integer (c_int64_t) len
+ call acc_copyout_finalize_l (a, int (len, kind = c_size_t))
+end subroutine
+
+subroutine acc_copyout_finalize_array_h (a)
+ use openacc_internal, only: acc_copyout_finalize_l
+ type (*), dimension (..), contiguous :: a
+ call acc_copyout_finalize_l (a, sizeof (a))
+end subroutine
+
subroutine acc_delete_32_h (a, len)
use iso_c_binding, only: c_int32_t, c_size_t
use openacc_internal, only: acc_delete_l
@@ -884,6 +972,30 @@ subroutine acc_delete_array_h (a)
call acc_delete_l (a, sizeof (a))
end subroutine
+subroutine acc_delete_finalize_32_h (a, len)
+ use iso_c_binding, only: c_int32_t, c_size_t
+ use openacc_internal, only: acc_delete_finalize_l
+ !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
+ type (*), dimension (*) :: a
+ integer (c_int32_t) len
+ call acc_delete_finalize_l (a, int (len, kind = c_size_t))
+end subroutine
+
+subroutine acc_delete_finalize_64_h (a, len)
+ use iso_c_binding, only: c_int64_t, c_size_t
+ use openacc_internal, only: acc_delete_finalize_l
+ !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
+ type (*), dimension (*) :: a
+ integer (c_int64_t) len
+ call acc_delete_finalize_l (a, int (len, kind = c_size_t))
+end subroutine
+
+subroutine acc_delete_finalize_array_h (a)
+ use openacc_internal, only: acc_delete_finalize_l
+ type (*), dimension (..), contiguous :: a
+ call acc_delete_finalize_l (a, sizeof (a))
+end subroutine
+
subroutine acc_update_device_32_h (a, len)
use iso_c_binding, only: c_int32_t, c_size_t
use openacc_internal, only: acc_update_device_l
diff --git a/libgomp/openacc.h b/libgomp/openacc.h
index b8572574f13..02a85a09ddb 100644
--- a/libgomp/openacc.h
+++ b/libgomp/openacc.h
@@ -109,6 +109,12 @@ int acc_is_present (void *, size_t) __GOACC_NOTHROW;
void acc_memcpy_to_device (void *, void *, size_t) __GOACC_NOTHROW;
void acc_memcpy_from_device (void *, void *, size_t) __GOACC_NOTHROW;
+/* Finalize versions of copyout/delete functions, specified in OpenACC 2.5. */
+void acc_copyout_finalize (void *, size_t) __GOACC_NOTHROW;
+void acc_copyout_finalize_async (void *, size_t, int) __GOACC_NOTHROW;
+void acc_delete_finalize (void *, size_t) __GOACC_NOTHROW;
+void acc_delete_finalize_async (void *, size_t, int) __GOACC_NOTHROW;
+
/* CUDA-specific routines. */
void *acc_get_current_cuda_device (void) __GOACC_NOTHROW;
void *acc_get_current_cuda_context (void) __GOACC_NOTHROW;
diff --git a/libgomp/openacc_lib.h b/libgomp/openacc_lib.h
index 5cf743c2491..737c582041d 100644
--- a/libgomp/openacc_lib.h
+++ b/libgomp/openacc_lib.h
@@ -273,6 +273,26 @@
end subroutine
end interface
+ interface acc_copyout_finalize
+ subroutine acc_copyout_finalize_32_h (a, len)
+ use iso_c_binding, only: c_int32_t
+ !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
+ type (*), dimension (*) :: a
+ integer (c_int32_t) len
+ end subroutine
+
+ subroutine acc_copyout_finalize_64_h (a, len)
+ use iso_c_binding, only: c_int64_t
+ !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
+ type (*), dimension (*) :: a
+ integer (c_int64_t) len
+ end subroutine
+
+ subroutine acc_copyout_finalize_array_h (a)
+ type (*), dimension (..), contiguous :: a
+ end subroutine
+ end interface
+
interface acc_delete
subroutine acc_delete_32_h (a, len)
use iso_c_binding, only: c_int32_t
@@ -293,6 +313,26 @@
end subroutine
end interface
+ interface acc_delete_finalize
+ subroutine acc_delete_finalize_32_h (a, len)
+ use iso_c_binding, only: c_int32_t
+ !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
+ type (*), dimension (*) :: a
+ integer (c_int32_t) len
+ end subroutine
+
+ subroutine acc_delete_finalize_64_h (a, len)
+ use iso_c_binding, only: c_int64_t
+ !GCC$ ATTRIBUTES NO_ARG_CHECK :: a
+ type (*), dimension (*) :: a
+ integer (c_int64_t) len
+ end subroutine
+
+ subroutine acc_delete_finalize_array_h (a)
+ type (*), dimension (..), contiguous :: a
+ end subroutine
+ end interface
+
interface acc_update_device
subroutine acc_update_device_32_h (a, len)
use iso_c_binding, only: c_int32_t
diff --git a/libgomp/target.c b/libgomp/target.c
index 509776d17a8..dda041cdbef 100644
--- a/libgomp/target.c
+++ b/libgomp/target.c
@@ -859,6 +859,7 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum,
tgt->list[i].offset = 0;
tgt->list[i].length = k->host_end - k->host_start;
k->refcount = 1;
+ k->dynamic_refcount = 0;
tgt->refcount++;
array->left = NULL;
array->right = NULL;
@@ -1011,6 +1012,23 @@ gomp_unmap_tgt (struct target_mem_desc *tgt)
free (tgt);
}
+attribute_hidden bool
+gomp_remove_var (struct gomp_device_descr *devicep, splay_tree_key k)
+{
+ bool is_tgt_unmapped = false;
+ splay_tree_remove (&devicep->mem_map, k);
+ if (k->link_key)
+ splay_tree_insert (&devicep->mem_map, (splay_tree_node) k->link_key);
+ if (k->tgt->refcount > 1)
+ k->tgt->refcount--;
+ else
+ {
+ is_tgt_unmapped = true;
+ gomp_unmap_tgt (k->tgt);
+ }
+ return is_tgt_unmapped;
+}
+
/* Unmap variables described by TGT. If DO_COPYFROM is true, copy relevant
variables back from device to host: if it is false, it is assumed that this
has been done already. */
@@ -1059,16 +1077,7 @@ gomp_unmap_vars (struct target_mem_desc *tgt, bool do_copyfrom)
+ tgt->list[i].offset),
tgt->list[i].length);
if (do_unmap)
- {
- splay_tree_remove (&devicep->mem_map, k);
- if (k->link_key)
- splay_tree_insert (&devicep->mem_map,
- (splay_tree_node) k->link_key);
- if (k->tgt->refcount > 1)
- k->tgt->refcount--;
- else
- gomp_unmap_tgt (k->tgt);
- }
+ gomp_remove_var (devicep, k);
}
if (tgt->refcount > 1)
@@ -1298,17 +1307,7 @@ gomp_unload_image_from_device (struct gomp_device_descr *devicep,
else
{
splay_tree_key n = splay_tree_lookup (&devicep->mem_map, &k);
- splay_tree_remove (&devicep->mem_map, n);
- if (n->link_key)
- {
- if (n->tgt->refcount > 1)
- n->tgt->refcount--;
- else
- {
- is_tgt_unmapped = true;
- gomp_unmap_tgt (n->tgt);
- }
- }
+ is_tgt_unmapped = gomp_remove_var (devicep, n);
}
}
--
2.17.1