This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
Re: [gomp4.1] Add new versions of GOMP_target{,_data,_update} and GOMP_target_enter_exit_data
- From: Ilya Verbin <iverbin at gmail dot com>
- To: Jakub Jelinek <jakub at redhat dot com>
- Cc: Thomas Schwinge <thomas at codesourcery dot com>, gcc-patches at gcc dot gnu dot org, Kirill Yukhin <kirill dot yukhin at gmail dot com>
- Date: Tue, 23 Jun 2015 14:40:43 +0300
- Subject: Re: [gomp4.1] Add new versions of GOMP_target{,_data,_update} and GOMP_target_enter_exit_data
- Authentication-results: sourceware.org; auth=none
- References: <20150615122037 dot GA45068 at msticlxl57 dot ims dot intel dot com> <20150615130609 dot GR10247 at tucnak dot redhat dot com> <20150615161827 dot GB45068 at msticlxl57 dot ims dot intel dot com> <20150615162528 dot GU10247 at tucnak dot redhat dot com> <20150615194850 dot GC45068 at msticlxl57 dot ims dot intel dot com> <20150615195840 dot GZ10247 at tucnak dot redhat dot com> <20150619213514 dot GA23723 at msticlxl57 dot ims dot intel dot com>
On Sat, Jun 20, 2015 at 00:35:14 +0300, Ilya Verbin wrote:
> Given that a mapped variable in 4.1 can have different kinds across nested data
> regions, we need to store map-type not only for each var, but also for each
> structured mapping. Here is my WIP patch, is it sane? :)
> Attached testcase works OK on the device with non-shared memory.
A bit updated version with a fix for GOMP_MAP_TO_PSET.
make check-target-libgomp passed.
include/gcc/
* gomp-constants.h (GOMP_MAP_ALWAYS_TO_P,
GOMP_MAP_ALWAYS_FROM_P): Define.
libgomp/
* libgomp.h (struct target_var_desc): New.
(struct target_mem_desc): Replace array of splay_tree_key with array of
target_var_desc.
(struct splay_tree_key_s): Move copy_from to target_var_desc.
* oacc-mem.c (gomp_acc_remove_pointer): Use copy_from from
target_var_desc.
* oacc-parallel.c (GOACC_parallel): Use copy_from from target_var_desc.
* target.c (gomp_map_vars_existing): Copy data to device if map-type is
'always to' or 'always tofrom'.
(gomp_map_vars): Use key from target_var_desc. Set copy_from and
always_copy_from.
(gomp_copy_from_async): Use key and copy_from from target_var_desc.
(gomp_unmap_vars): Copy data from device if always_copy_from is set.
(gomp_offload_image_to_device): Do not use copy_from.
* testsuite/libgomp.c/target-11.c: New test.
diff --git a/include/gomp-constants.h b/include/gomp-constants.h
index 1849478..42bec04 100644
--- a/include/gomp-constants.h
+++ b/include/gomp-constants.h
@@ -107,6 +107,12 @@ enum gomp_map_kind
#define GOMP_MAP_POINTER_P(X) \
((X) == GOMP_MAP_POINTER)
+#define GOMP_MAP_ALWAYS_TO_P(X) \
+ (((X) == GOMP_MAP_ALWAYS_TO) || ((X) == GOMP_MAP_ALWAYS_TOFROM))
+
+#define GOMP_MAP_ALWAYS_FROM_P(X) \
+ (((X) == GOMP_MAP_ALWAYS_FROM) || ((X) == GOMP_MAP_ALWAYS_TOFROM))
+
/* Asynchronous behavior. Keep in sync with
libgomp/{openacc.h,openacc.f90,openacc_lib.h}:acc_async_t. */
diff --git a/libgomp/libgomp.h b/libgomp/libgomp.h
index 87d6c40..8e6d4ac 100644
--- a/libgomp/libgomp.h
+++ b/libgomp/libgomp.h
@@ -636,6 +636,15 @@ typedef struct splay_tree_node_s *splay_tree_node;
typedef struct splay_tree_s *splay_tree;
typedef struct splay_tree_key_s *splay_tree_key;
+struct target_var_desc {
+ /* Splay key. */
+ splay_tree_key key;
+ /* True if data should be copied from device to host at the end. */
+ bool copy_from;
+ /* True if data always should be copied from device to host at the end. */
+ bool always_copy_from;
+};
+
struct target_mem_desc {
/* Reference count. */
uintptr_t refcount;
@@ -655,9 +664,9 @@ struct target_mem_desc {
/* Corresponding target device descriptor. */
struct gomp_device_descr *device_descr;
- /* List of splay keys to remove (or decrease refcount)
+ /* List of target items to remove (or decrease refcount)
at the end of region. */
- splay_tree_key list[];
+ struct target_var_desc list[];
};
struct splay_tree_key_s {
@@ -673,8 +682,6 @@ struct splay_tree_key_s {
uintptr_t refcount;
/* Asynchronous reference count. */
uintptr_t async_refcount;
- /* True if data should be copied from device to host at the end. */
- bool copy_from;
};
#include "splay-tree.h"
diff --git a/libgomp/oacc-mem.c b/libgomp/oacc-mem.c
index 90d43eb..c0fcb07 100644
--- a/libgomp/oacc-mem.c
+++ b/libgomp/oacc-mem.c
@@ -651,7 +651,7 @@ gomp_acc_remove_pointer (void *h, bool force_copyfrom, int async, int mapnum)
}
if (force_copyfrom)
- t->list[0]->copy_from = 1;
+ t->list[0].copy_from = 1;
gomp_mutex_unlock (&acc_dev->lock);
diff --git a/libgomp/oacc-parallel.c b/libgomp/oacc-parallel.c
index d899946..8ea3dd1 100644
--- a/libgomp/oacc-parallel.c
+++ b/libgomp/oacc-parallel.c
@@ -135,8 +135,8 @@ GOACC_parallel (int device, void (*fn) (void *),
devaddrs = gomp_alloca (sizeof (void *) * mapnum);
for (i = 0; i < mapnum; i++)
- devaddrs[i] = (void *) (tgt->list[i]->tgt->tgt_start
- + tgt->list[i]->tgt_offset);
+ devaddrs[i] = (void *) (tgt->list[i].key->tgt->tgt_start
+ + tgt->list[i].key->tgt_offset);
acc_dev->openacc.exec_func (tgt_fn, mapnum, hostaddrs, devaddrs, sizes, kinds,
num_gangs, num_workers, vector_length, async,
diff --git a/libgomp/target.c b/libgomp/target.c
index fb8487a..b1640c1 100644
--- a/libgomp/target.c
+++ b/libgomp/target.c
@@ -161,6 +161,12 @@ gomp_map_vars_existing (struct gomp_device_descr *devicep, splay_tree_key oldn,
(void *) newn->host_start, (void *) newn->host_end,
(void *) oldn->host_start, (void *) oldn->host_end);
}
+
+ if (GOMP_MAP_ALWAYS_TO_P (kind))
+ devicep->host2dev_func (devicep->target_id,
+ (void *) (oldn->tgt->tgt_start + oldn->tgt_offset),
+ (void *) newn->host_start,
+ newn->host_end - newn->host_start);
oldn->refcount++;
}
@@ -260,7 +266,7 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum,
int kind = get_kind (short_mapkind, kinds, i);
if (hostaddrs[i] == NULL)
{
- tgt->list[i] = NULL;
+ tgt->list[i].key = NULL;
continue;
}
cur_node.host_start = (uintptr_t) hostaddrs[i];
@@ -271,12 +277,15 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum,
splay_tree_key n = splay_tree_lookup (mem_map, &cur_node);
if (n)
{
- tgt->list[i] = n;
+ tgt->list[i].key = n;
+ tgt->list[i].copy_from = GOMP_MAP_COPY_FROM_P (kind & typemask);
+ tgt->list[i].always_copy_from
+ = GOMP_MAP_ALWAYS_FROM_P (kind & typemask);
gomp_map_vars_existing (devicep, n, &cur_node, kind & typemask);
}
else
{
- tgt->list[i] = NULL;
+ tgt->list[i].key = NULL;
size_t align = (size_t) 1 << (kind >> rshift);
not_found_cnt++;
@@ -297,7 +306,7 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum,
break;
else
{
- tgt->list[j] = NULL;
+ tgt->list[j].key = NULL;
i++;
}
}
@@ -345,7 +354,7 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum,
size_t j;
for (i = 0; i < mapnum; i++)
- if (tgt->list[i] == NULL)
+ if (tgt->list[i].key == NULL)
{
int kind = get_kind (short_mapkind, kinds, i);
if (hostaddrs[i] == NULL)
@@ -359,18 +368,23 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum,
splay_tree_key n = splay_tree_lookup (mem_map, k);
if (n)
{
- tgt->list[i] = n;
+ tgt->list[i].key = n;
+ tgt->list[i].copy_from = GOMP_MAP_COPY_FROM_P (kind & typemask);
+ tgt->list[i].always_copy_from
+ = GOMP_MAP_ALWAYS_FROM_P (kind & typemask);
gomp_map_vars_existing (devicep, n, k, kind & typemask);
}
else
{
size_t align = (size_t) 1 << (kind >> rshift);
- tgt->list[i] = k;
+ tgt->list[i].key = k;
tgt_size = (tgt_size + align - 1) & ~(align - 1);
k->tgt = tgt;
k->tgt_offset = tgt_size;
tgt_size += k->host_end - k->host_start;
- k->copy_from = GOMP_MAP_COPY_FROM_P (kind & typemask);
+ tgt->list[i].copy_from = GOMP_MAP_COPY_FROM_P (kind & typemask);
+ tgt->list[i].always_copy_from
+ = GOMP_MAP_ALWAYS_FROM_P (kind & typemask);
k->refcount = 1;
k->async_refcount = 0;
tgt->refcount++;
@@ -388,6 +402,8 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum,
case GOMP_MAP_TOFROM:
case GOMP_MAP_FORCE_TO:
case GOMP_MAP_FORCE_TOFROM:
+ case GOMP_MAP_ALWAYS_TO:
+ case GOMP_MAP_ALWAYS_TOFROM:
/* FIXME: Perhaps add some smarts, like if copying
several adjacent fields from host to target, use some
host buffer to avoid sending each var individually. */
@@ -420,7 +436,9 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum,
break;
else
{
- tgt->list[j] = k;
+ tgt->list[j].key = k;
+ tgt->list[j].copy_from = false;
+ tgt->list[j].always_copy_from = false;
k->refcount++;
gomp_map_pointer (tgt,
(uintptr_t) *(void **) hostaddrs[j],
@@ -472,11 +490,11 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum,
{
for (i = 0; i < mapnum; i++)
{
- if (tgt->list[i] == NULL)
+ if (tgt->list[i].key == NULL)
cur_node.tgt_offset = (uintptr_t) NULL;
else
- cur_node.tgt_offset = tgt->list[i]->tgt->tgt_start
- + tgt->list[i]->tgt_offset;
+ cur_node.tgt_offset = tgt->list[i].key->tgt->tgt_start
+ + tgt->list[i].key->tgt_offset;
/* FIXME: see above FIXME comment. */
devicep->host2dev_func (devicep->target_id,
(void *) (tgt->tgt_start
@@ -516,17 +534,17 @@ gomp_copy_from_async (struct target_mem_desc *tgt)
gomp_mutex_lock (&devicep->lock);
for (i = 0; i < tgt->list_count; i++)
- if (tgt->list[i] == NULL)
+ if (tgt->list[i].key == NULL)
;
- else if (tgt->list[i]->refcount > 1)
+ else if (tgt->list[i].key->refcount > 1)
{
- tgt->list[i]->refcount--;
- tgt->list[i]->async_refcount++;
+ tgt->list[i].key->refcount--;
+ tgt->list[i].key->async_refcount++;
}
else
{
- splay_tree_key k = tgt->list[i];
- if (k->copy_from)
+ splay_tree_key k = tgt->list[i].key;
+ if (tgt->list[i].copy_from)
devicep->dev2host_func (devicep->target_id, (void *) k->host_start,
(void *) (k->tgt->tgt_start + k->tgt_offset),
k->host_end - k->host_start);
@@ -554,25 +572,33 @@ gomp_unmap_vars (struct target_mem_desc *tgt, bool do_copyfrom)
size_t i;
for (i = 0; i < tgt->list_count; i++)
- if (tgt->list[i] == NULL)
- ;
- else if (tgt->list[i]->refcount > 1)
- tgt->list[i]->refcount--;
- else if (tgt->list[i]->async_refcount > 0)
- tgt->list[i]->async_refcount--;
- else
- {
- splay_tree_key k = tgt->list[i];
- if (k->copy_from && do_copyfrom)
- devicep->dev2host_func (devicep->target_id, (void *) k->host_start,
- (void *) (k->tgt->tgt_start + k->tgt_offset),
- k->host_end - k->host_start);
- splay_tree_remove (&devicep->mem_map, k);
- if (k->tgt->refcount > 1)
- k->tgt->refcount--;
- else
- gomp_unmap_tgt (k->tgt);
- }
+ {
+ splay_tree_key k = tgt->list[i].key;
+ if (k == NULL)
+ continue;
+
+ bool do_unmap = false;
+ if (k->refcount > 1)
+ k->refcount--;
+ else if (k->async_refcount > 0)
+ k->async_refcount--;
+ else
+ do_unmap = true;
+
+ if ((do_unmap && do_copyfrom && tgt->list[i].copy_from)
+ || tgt->list[i].always_copy_from)
+ devicep->dev2host_func (devicep->target_id, (void *) k->host_start,
+ (void *) (k->tgt->tgt_start + k->tgt_offset),
+ k->host_end - k->host_start);
+ if (do_unmap)
+ {
+ splay_tree_remove (&devicep->mem_map, k);
+ if (k->tgt->refcount > 1)
+ k->tgt->refcount--;
+ else
+ gomp_unmap_tgt (k->tgt);
+ }
+ }
if (tgt->refcount > 1)
tgt->refcount--;
@@ -699,7 +725,6 @@ gomp_offload_image_to_device (struct gomp_device_descr *devicep,
k->tgt_offset = target_table[i].start;
k->refcount = 1;
k->async_refcount = 0;
- k->copy_from = false;
array->left = NULL;
array->right = NULL;
splay_tree_insert (&devicep->mem_map, array);
@@ -725,7 +750,6 @@ gomp_offload_image_to_device (struct gomp_device_descr *devicep,
k->tgt_offset = target_var->start;
k->refcount = 1;
k->async_refcount = 0;
- k->copy_from = false;
array->left = NULL;
array->right = NULL;
splay_tree_insert (&devicep->mem_map, array);
diff --git a/libgomp/testsuite/libgomp.c/target-11.c b/libgomp/testsuite/libgomp.c/target-11.c
new file mode 100644
index 0000000..4562d88
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c/target-11.c
@@ -0,0 +1,51 @@
+/* { dg-require-effective-target offload_device } */
+
+#include <assert.h>
+
+int main ()
+{
+ int aa = 0, bb = 0, cc = 0, dd = 0;
+
+ #pragma omp target data map(tofrom: aa) map(to: bb) map(from: cc, dd)
+ {
+ int ok;
+ aa = bb = cc = 1;
+
+ /* Set dd on target to 0 for the further check. */
+ #pragma omp target map(always to: dd)
+ { dd; }
+
+ dd = 1;
+ #pragma omp target map(tofrom: aa) map(always to: bb) \
+ map(always from: cc) map(to: dd) map(from: ok)
+ {
+ /* bb is always to, aa and dd are not. */
+ ok = (aa == 0) && (bb == 1) && (dd == 0);
+ aa = bb = cc = dd = 2;
+ }
+
+ assert (ok);
+ assert (aa == 1);
+ assert (bb == 1);
+ assert (cc == 2); /* cc is always from. */
+ assert (dd == 1);
+
+ dd = 3;
+ #pragma omp target map(from: cc) map(always to: dd) map(from: ok)
+ {
+ ok = (dd == 3); /* dd is always to. */
+ cc = dd = 4;
+ }
+
+ assert (ok);
+ assert (cc == 2);
+ assert (dd == 3);
+ }
+
+ assert (aa == 2);
+ assert (bb == 1);
+ assert (cc == 4);
+ assert (dd == 4);
+
+ return 0;
+}
-- Ilya
- References:
- [gomp4.1] Add new versions of GOMP_target{,_data,_update} and GOMP_target_enter_exit_data
- Re: [gomp4.1] Add new versions of GOMP_target{,_data,_update} and GOMP_target_enter_exit_data
- Re: [gomp4.1] Add new versions of GOMP_target{,_data,_update} and GOMP_target_enter_exit_data
- Re: [gomp4.1] Add new versions of GOMP_target{,_data,_update} and GOMP_target_enter_exit_data
- Re: [gomp4.1] Add new versions of GOMP_target{,_data,_update} and GOMP_target_enter_exit_data
- Re: [gomp4.1] Add new versions of GOMP_target{,_data,_update} and GOMP_target_enter_exit_data
- Re: [gomp4.1] Add new versions of GOMP_target{,_data,_update} and GOMP_target_enter_exit_data