This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
[WIP] OpenACC 'acc_attach*', 'acc_detach*' runtime library routines (was: [PATCH] OpenACC 2.6 manual deep copy support (attach/detach))
- From: Thomas Schwinge <thomas at codesourcery dot com>
- To: Julian Brown <julian at codesourcery dot com>, <gcc-patches at gcc dot gnu dot org>
- Cc: <Catherine_Moore at mentor dot com>
- Date: Tue, 17 Dec 2019 18:27:27 +0100
- Subject: [WIP] OpenACC 'acc_attach*', 'acc_detach*' runtime library routines (was: [PATCH] OpenACC 2.6 manual deep copy support (attach/detach))
- Ironport-sdr: ZO+Ypa5M44WRJsrrEwdo/6h+ueMKm0WhVsfDXI2jb7XwADCuiZFH/U7grAFvn2I/MFNbcovX+S Q+CMaKpufJbkk2P+TWNeq2gMdIoNKZ+qMaP+f8XxTAfSbEVtzq5H5mAvBQ5BH3v1QT7lSrIAyQ nMgjSmSCr+akOwGEYlAIDwNXKzO/7OY3kqVUU+noo0QlLD7Npuwg+bbtDVD4iD+hzEArK+/E/d nshXvT5Xg2NRvinPjKqIXjehGxYLBWYknFMQgcQUGZcS4hqEGank2VCOt9HK6c7ymv4WDV3iVr 6Fs=
- Ironport-sdr: ota1saMRgJ6daJ8nP1ggc4tsZquO7b4dGxBG5anzyNuRJulFtgZh/Q8cQSwAecYjZwJobXZf32 PQkwhndDTlXL87bn7OJeDvQhd8O0ywU4SuX7ULst5W8Nk1EE/kWlH+/OSchwZQAw1/RqsD+kiA 0+I0tJ5C/OdQp8+gw0FeuhXHYIpabZbGYYmVyWj9ZxFqgQte4su+yoQN3ZEVjGrtHmTd4DI2RH E9s7/CQBc2PTe1x5b5qrFWwBV0Jpm2f8f7ZgcAxhKNCbsx/1VVU7UkHagUYkmnXqnrod3LZUXm Q34=
- References: <aaa47f4b99ed1cf7c54400da9e255df427da6761.1541863637.git.julian@codesourcery.com> <1543578069-386-1-git-send-email-julian@codesourcery.com> <20181207135019.GI12380@tucnak> <20181210194137.27720f3e@squid.athome> <87pniuuhkj.fsf@euler.schwinge.homeip.net> <20191106184339.3f5e6430@squid.athome> <20191122234258.50986156@squid.athome> <e6b40069-4916-5f12-4ab1-529fca25d717@codesourcery.com> <20191126024502.10808ed5@squid.athome> <87r213xkbj.fsf@euler.schwinge.homeip.net>
Hi!
On 2019-12-17T12:28:32+0100, Thomas Schwinge <thomas@codesourcery.com> wrote:
> As a first step, can you please split out just the code required to make
> the OpenACC 'acc_attach*', 'acc_detach*' runtime library routines work?
I've now simply done this myself (that is, code extraction from Julian's
patch, not any development, mind you), see the attached "[WIP] OpenACC
'acc_attach*', 'acc_detach*' runtime library routines". 15 minutes of
work, for anyone curious.
> Assuming there were no other defects in libgomp, whould this already make
> the 'libgomp.oacc-c-c++-common/deep-copy-3.c',
> 'libgomp.oacc-c-c++-common/deep-copy-5.c' test cases work?
That's indeed the case. :-)
Now, to apply some review/polish.
Grüße
Thomas
>From 19321c3dc7b96a305a51941c0a485f814af84130 Mon Sep 17 00:00:00 2001
From: Thomas Schwinge <thomas@codesourcery.com>
Date: Tue, 17 Dec 2019 17:57:36 +0100
Subject: [PATCH] [WIP] OpenACC 'acc_attach*', 'acc_detach*' runtime library
routines
---
libgomp/libgomp.h | 10 ++
libgomp/libgomp.map | 10 ++
libgomp/oacc-mem.c | 85 ++++++++++++
libgomp/openacc.h | 6 +
libgomp/target.c | 130 ++++++++++++++++++
.../libgomp.oacc-c-c++-common/deep-copy-3.c | 34 +++++
.../libgomp.oacc-c-c++-common/deep-copy-5.c | 81 +++++++++++
7 files changed, 356 insertions(+)
create mode 100644 libgomp/testsuite/libgomp.oacc-c-c++-common/deep-copy-3.c
create mode 100644 libgomp/testsuite/libgomp.oacc-c-c++-common/deep-copy-5.c
diff --git a/libgomp/libgomp.h b/libgomp/libgomp.h
index d65a1fa250b..56225c1482b 100644
--- a/libgomp/libgomp.h
+++ b/libgomp/libgomp.h
@@ -994,6 +994,9 @@ struct target_mem_desc {
struct splay_tree_aux {
/* Pointer to the original mapping of "omp declare target link" object. */
splay_tree_key link_key;
+ /* For a block with attached pointers, the attachment counters for each.
+ Only used for OpenACC. */
+ uintptr_t *attach_count;
};
struct splay_tree_key_s {
@@ -1155,6 +1158,13 @@ extern void gomp_copy_dev2host (struct gomp_device_descr *,
struct goacc_asyncqueue *, void *, const void *,
size_t);
extern uintptr_t gomp_map_val (struct target_mem_desc *, void **, size_t);
+extern void gomp_attach_pointer (struct gomp_device_descr *,
+ struct goacc_asyncqueue *, splay_tree,
+ splay_tree_key, uintptr_t, size_t,
+ struct gomp_coalesce_buf *);
+extern void gomp_detach_pointer (struct gomp_device_descr *,
+ struct goacc_asyncqueue *, splay_tree_key,
+ uintptr_t, bool, struct gomp_coalesce_buf *);
extern struct target_mem_desc *gomp_map_vars (struct gomp_device_descr *,
size_t, void **, void **,
diff --git a/libgomp/libgomp.map b/libgomp/libgomp.map
index e9a0e059a30..1b7022b38c7 100644
--- a/libgomp/libgomp.map
+++ b/libgomp/libgomp.map
@@ -484,6 +484,16 @@ OACC_2.5.1 {
acc_register_library;
} OACC_2.5;
+OACC_2.6 {
+ global:
+ acc_attach;
+ acc_attach_async;
+ acc_detach;
+ acc_detach_async;
+ acc_detach_finalize;
+ acc_detach_finalize_async;
+} OACC_2.5.1;
+
GOACC_2.0 {
global:
GOACC_data_end;
diff --git a/libgomp/oacc-mem.c b/libgomp/oacc-mem.c
index 297a4e5806c..b76dfc44ca1 100644
--- a/libgomp/oacc-mem.c
+++ b/libgomp/oacc-mem.c
@@ -918,6 +918,91 @@ acc_update_self_async (void *h, size_t s, int async)
}
+void
+acc_attach_async (void **hostaddr, int async)
+{
+ struct goacc_thread *thr = goacc_thread ();
+ struct gomp_device_descr *acc_dev = thr->dev;
+ goacc_aq aq = get_goacc_asyncqueue (async);
+
+ struct splay_tree_key_s cur_node;
+ splay_tree_key n;
+
+ if (thr->dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
+ return;
+
+ gomp_mutex_lock (&acc_dev->lock);
+
+ cur_node.host_start = (uintptr_t) hostaddr;
+ cur_node.host_end = cur_node.host_start + sizeof (void *);
+ n = splay_tree_lookup (&acc_dev->mem_map, &cur_node);
+
+ if (n == NULL)
+ gomp_fatal ("struct not mapped for acc_attach");
+
+ gomp_attach_pointer (acc_dev, aq, &acc_dev->mem_map, n, (uintptr_t) hostaddr,
+ 0, NULL);
+
+ gomp_mutex_unlock (&acc_dev->lock);
+}
+
+void
+acc_attach (void **hostaddr)
+{
+ acc_attach_async (hostaddr, acc_async_sync);
+}
+
+static void
+goacc_detach_internal (void **hostaddr, int async, bool finalize)
+{
+ struct goacc_thread *thr = goacc_thread ();
+ struct gomp_device_descr *acc_dev = thr->dev;
+ struct splay_tree_key_s cur_node;
+ splay_tree_key n;
+ struct goacc_asyncqueue *aq = get_goacc_asyncqueue (async);
+
+ if (thr->dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
+ return;
+
+ gomp_mutex_lock (&acc_dev->lock);
+
+ cur_node.host_start = (uintptr_t) hostaddr;
+ cur_node.host_end = cur_node.host_start + sizeof (void *);
+ n = splay_tree_lookup (&acc_dev->mem_map, &cur_node);
+
+ if (n == NULL)
+ gomp_fatal ("struct not mapped for acc_detach");
+
+ gomp_detach_pointer (acc_dev, aq, n, (uintptr_t) hostaddr, finalize, NULL);
+
+ gomp_mutex_unlock (&acc_dev->lock);
+}
+
+void
+acc_detach (void **hostaddr)
+{
+ goacc_detach_internal (hostaddr, acc_async_sync, false);
+}
+
+void
+acc_detach_async (void **hostaddr, int async)
+{
+ goacc_detach_internal (hostaddr, async, false);
+}
+
+void
+acc_detach_finalize (void **hostaddr)
+{
+ goacc_detach_internal (hostaddr, acc_async_sync, true);
+}
+
+void
+acc_detach_finalize_async (void **hostaddr, int async)
+{
+ goacc_detach_internal (hostaddr, async, true);
+}
+
+
/* OpenACC 'enter data', 'exit data': 'GOACC_enter_exit_data' and its helper
functions. */
diff --git a/libgomp/openacc.h b/libgomp/openacc.h
index 49340b7fb6d..c255cc56ac6 100644
--- a/libgomp/openacc.h
+++ b/libgomp/openacc.h
@@ -124,12 +124,18 @@ void *acc_hostptr (void *) __GOACC_NOTHROW;
int acc_is_present (void *, size_t) __GOACC_NOTHROW;
void acc_memcpy_to_device (void *, void *, size_t) __GOACC_NOTHROW;
void acc_memcpy_from_device (void *, void *, size_t) __GOACC_NOTHROW;
+void acc_attach (void **) __GOACC_NOTHROW;
+void acc_attach_async (void **, int) __GOACC_NOTHROW;
+void acc_detach (void **) __GOACC_NOTHROW;
+void acc_detach_async (void **, int) __GOACC_NOTHROW;
/* Finalize versions of copyout/delete functions, specified in OpenACC 2.5. */
void acc_copyout_finalize (void *, size_t) __GOACC_NOTHROW;
void acc_copyout_finalize_async (void *, size_t, int) __GOACC_NOTHROW;
void acc_delete_finalize (void *, size_t) __GOACC_NOTHROW;
void acc_delete_finalize_async (void *, size_t, int) __GOACC_NOTHROW;
+void acc_detach_finalize (void **) __GOACC_NOTHROW;
+void acc_detach_finalize_async (void **, int) __GOACC_NOTHROW;
/* Async functions, specified in OpenACC 2.5. */
void acc_copyin_async (void *, size_t, int) __GOACC_NOTHROW;
diff --git a/libgomp/target.c b/libgomp/target.c
index d00334ce9e6..73699f35c71 100644
--- a/libgomp/target.c
+++ b/libgomp/target.c
@@ -498,6 +498,134 @@ gomp_map_fields_existing (struct target_mem_desc *tgt,
(void *) cur_node.host_end);
}
+attribute_hidden void
+gomp_attach_pointer (struct gomp_device_descr *devicep,
+ struct goacc_asyncqueue *aq, splay_tree mem_map,
+ splay_tree_key n, uintptr_t attach_to, size_t bias,
+ struct gomp_coalesce_buf *cbufp)
+{
+ struct splay_tree_key_s s;
+ size_t size, idx;
+
+ if (n == NULL)
+ {
+ gomp_mutex_unlock (&devicep->lock);
+ gomp_fatal ("enclosing struct not mapped for attach");
+ }
+
+ size = (n->host_end - n->host_start + sizeof (void *) - 1) / sizeof (void *);
+ /* We might have a pointer in a packed struct: however we cannot have more
+ than one such pointer in each pointer-sized portion of the struct, so
+ this is safe. */
+ idx = (attach_to - n->host_start) / sizeof (void *);
+
+ if (!n->aux)
+ n->aux = gomp_malloc_cleared (sizeof (struct splay_tree_aux));
+
+ if (!n->aux->attach_count)
+ n->aux->attach_count
+ = gomp_malloc_cleared (sizeof (*n->aux->attach_count) * size);
+
+ if (n->aux->attach_count[idx] < UINTPTR_MAX)
+ n->aux->attach_count[idx]++;
+ else
+ {
+ gomp_mutex_unlock (&devicep->lock);
+ gomp_fatal ("attach count overflow");
+ }
+
+ if (n->aux->attach_count[idx] == 1)
+ {
+ uintptr_t devptr = n->tgt->tgt_start + n->tgt_offset + attach_to
+ - n->host_start;
+ uintptr_t target = (uintptr_t) *(void **) attach_to;
+ splay_tree_key tn;
+ uintptr_t data;
+
+ if ((void *) target == NULL)
+ {
+ gomp_mutex_unlock (&devicep->lock);
+ gomp_fatal ("attempt to attach null pointer");
+ }
+
+ s.host_start = target + bias;
+ s.host_end = s.host_start + 1;
+ tn = splay_tree_lookup (mem_map, &s);
+
+ if (!tn)
+ {
+ gomp_mutex_unlock (&devicep->lock);
+ gomp_fatal ("pointer target not mapped for attach");
+ }
+
+ data = tn->tgt->tgt_start + tn->tgt_offset + target - tn->host_start;
+
+ gomp_debug (1,
+ "%s: attaching host %p, target %p (struct base %p) to %p\n",
+ __FUNCTION__, (void *) attach_to, (void *) devptr,
+ (void *) (n->tgt->tgt_start + n->tgt_offset), (void *) data);
+
+ gomp_copy_host2dev (devicep, aq, (void *) devptr, (void *) &data,
+ sizeof (void *), cbufp);
+ }
+ else
+ gomp_debug (1, "%s: attach count for %p -> %u\n", __FUNCTION__,
+ (void *) attach_to, (int) n->aux->attach_count[idx]);
+}
+
+attribute_hidden void
+gomp_detach_pointer (struct gomp_device_descr *devicep,
+ struct goacc_asyncqueue *aq, splay_tree_key n,
+ uintptr_t detach_from, bool finalize,
+ struct gomp_coalesce_buf *cbufp)
+{
+ size_t idx;
+
+ if (n == NULL)
+ {
+ gomp_mutex_unlock (&devicep->lock);
+ gomp_fatal ("enclosing struct not mapped for detach");
+ }
+
+ idx = (detach_from - n->host_start) / sizeof (void *);
+
+ if (!n->aux || !n->aux->attach_count)
+ {
+ gomp_mutex_unlock (&devicep->lock);
+ gomp_fatal ("no attachment counters for struct");
+ }
+
+ if (finalize)
+ n->aux->attach_count[idx] = 1;
+
+ if (n->aux->attach_count[idx] == 0)
+ {
+ gomp_mutex_unlock (&devicep->lock);
+ gomp_fatal ("attach count underflow");
+ }
+ else
+ n->aux->attach_count[idx]--;
+
+ if (n->aux->attach_count[idx] == 0)
+ {
+ uintptr_t devptr = n->tgt->tgt_start + n->tgt_offset + detach_from
+ - n->host_start;
+ uintptr_t target = (uintptr_t) *(void **) detach_from;
+
+ gomp_debug (1,
+ "%s: detaching host %p, target %p (struct base %p) to %p\n",
+ __FUNCTION__, (void *) detach_from, (void *) devptr,
+ (void *) (n->tgt->tgt_start + n->tgt_offset),
+ (void *) target);
+
+ gomp_copy_host2dev (devicep, aq, (void *) devptr, (void *) &target,
+ sizeof (void *), cbufp);
+ }
+ else
+ gomp_debug (1, "%s: attach count for %p -> %u\n", __FUNCTION__,
+ (void *) detach_from, (int) n->aux->attach_count[idx]);
+}
+
attribute_hidden uintptr_t
gomp_map_val (struct target_mem_desc *tgt, void **hostaddrs, size_t i)
{
@@ -1218,6 +1346,8 @@ gomp_remove_var_internal (struct gomp_device_descr *devicep, splay_tree_key k,
if (k->aux->link_key)
splay_tree_insert (&devicep->mem_map,
(splay_tree_node) k->aux->link_key);
+ if (k->aux->attach_count)
+ free (k->aux->attach_count);
free (k->aux);
k->aux = NULL;
}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/deep-copy-3.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/deep-copy-3.c
new file mode 100644
index 00000000000..cec764bd3e7
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/deep-copy-3.c
@@ -0,0 +1,34 @@
+#include <assert.h>
+#include <stdlib.h>
+#include <openacc.h>
+
+int
+main ()
+{
+ int n = 100, i;
+ int *a = (int *) malloc (sizeof (int) * n);
+ int *b;
+
+ for (i = 0; i < n; i++)
+ a[i] = i+1;
+
+#pragma acc enter data copyin(a[:n]) create(b)
+
+ b = a;
+ acc_attach ((void **)&b);
+
+#pragma acc parallel loop present (b[:n])
+ for (i = 0; i < n; i++)
+ b[i] = i+1;
+
+ acc_detach ((void **)&b);
+
+#pragma acc exit data copyout(a[:n], b)
+
+ for (i = 0; i < 10; i++)
+ assert (a[i] == b[i]);
+
+ free (a);
+
+ return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/deep-copy-5.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/deep-copy-5.c
new file mode 100644
index 00000000000..89cafbb62ab
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/deep-copy-5.c
@@ -0,0 +1,81 @@
+#include <assert.h>
+#include <stdlib.h>
+#include <openacc.h>
+
+struct node
+{
+ struct node *next;
+ int val;
+};
+
+int
+sum_nodes (struct node *head)
+{
+ int i = 0, sum = 0;
+
+#pragma acc parallel reduction(+:sum) present(head[:1])
+ {
+ for (; head != NULL; head = head->next)
+ sum += head->val;
+ }
+
+ return sum;
+}
+
+void
+insert (struct node *head, int val)
+{
+ struct node *n = (struct node *) malloc (sizeof (struct node));
+
+ if (head->next)
+ acc_detach ((void **) &head->next);
+
+ n->val = val;
+ n->next = head->next;
+ head->next = n;
+
+ acc_copyin (n, sizeof (struct node));
+ acc_attach((void **) &head->next);
+
+ if (n->next)
+ acc_attach ((void **) &n->next);
+}
+
+void
+destroy (struct node *head)
+{
+ while (head->next != NULL)
+ {
+ acc_detach ((void **) &head->next);
+ struct node * n = head->next;
+ head->next = n->next;
+ if (n->next)
+ acc_detach ((void **) &n->next);
+
+ acc_delete (n, sizeof (struct node));
+ if (head->next)
+ acc_attach((void **) &head->next);
+
+ free (n);
+ }
+}
+
+int
+main ()
+{
+ struct node list = { .next = NULL, .val = 0 };
+ int i;
+
+ acc_copyin (&list, sizeof (struct node));
+
+ for (i = 0; i < 10; i++)
+ insert (&list, 2);
+
+ assert (sum_nodes (&list) == 10 * 2);
+
+ destroy (&list);
+
+ acc_delete (&list, sizeof (struct node));
+
+ return 0;
+}
--
2.17.1