[PATCH 7/10] OpenACC 2.0 support for libgomp - OpenACC runtime, NVidia PTX/CUDA plugin
Thomas Schwinge
thomas@codesourcery.com
Mon Jan 12 14:49:00 GMT 2015
Hi!
On Tue, 23 Sep 2014 19:19:31 +0100, Julian Brown <julian@codesourcery.com> wrote:
> This patch contains the bulk of the OpenACC 2.0 runtime support, [...]
> --- /dev/null
> +++ b/libgomp/libgomp-plugin.c
> @@ -0,0 +1,106 @@
> +/* Exported (non-hidden) functions exposing libgomp interface for plugins. */
> +void
> +gomp_plugin_mutex_init (gomp_mutex_t *mutex)
> +{
> + gomp_mutex_init (mutex);
> +}
> +
> +void
> +gomp_plugin_mutex_destroy (gomp_mutex_t *mutex)
> +{
> + gomp_mutex_destroy (mutex);
> +}
> +
> +void
> +gomp_plugin_mutex_lock (gomp_mutex_t *mutex)
> +{
> + gomp_mutex_lock (mutex);
> +}
> +
> +void
> +gomp_plugin_mutex_unlock (gomp_mutex_t *mutex)
> +{
> + gomp_mutex_unlock (mutex);
> +}
> --- a/libgomp/libgomp.map
> +++ b/libgomp/libgomp.map
> +PLUGIN_1.0 {
> + global:
> + gomp_plugin_mutex_init;
> + gomp_plugin_mutex_destroy;
> + gomp_plugin_mutex_lock;
> + gomp_plugin_mutex_unlock;
> +};
> --- /dev/null
> +++ b/libgomp/plugin-nvptx.c
> @@ -0,0 +1,1854 @@
> +/* Plugin for NVPTX execution.
> +#include "libgomp.h"
Plugins in libgomp are not to depend on libgomp internals (libgomp.h),
and given that...
> +struct PTX_device
> +{
> + /* A lock for use when manipulating the above stream list and array. */
> + gomp_mutex_t stream_lock;
> +};
> +static gomp_mutex_t PTX_event_lock;
> +static void
> +init_streams_for_device (struct PTX_device *ptx_dev, int concurrency)
> +{
> + gomp_plugin_mutex_init (&ptx_dev->stream_lock);
> +}
> +[...]
... it much more makes sense to just use pthread mutexes here. Committed
to gomp-4_0-branch in r219467:
commit 4de7ea8222739fa60d6eb81284dac61dc2bae7b2
Author: tschwinge <tschwinge@138bc75d-0d04-0410-961f-82ee72b054a4>
Date: Mon Jan 12 14:35:51 2015 +0000
libgomp: Use pthread mutexes in the nvptx plugin.
... instead of libgomp's internal mutex implementation. Plugins aren't to
depend on internal libgomp interfaces, and how would you instantiate a
gomp_mutex_t in a plugin without knowing what it is exactly?
libgomp/
* plugin/plugin-nvptx.c (struct ptx_device): Turn stream_lock
member into a pthread_mutex_t. Adjust all users.
(ptx_event_lock): Likewise.
* libgomp-plugin.c (GOMP_PLUGIN_mutex_init)
(GOMP_PLUGIN_mutex_destroy, GOMP_PLUGIN_mutex_lock)
(GOMP_PLUGIN_mutex_unlock): Remove.
* libgomp-plugin.h (GOMP_PLUGIN_mutex_init)
(GOMP_PLUGIN_mutex_destroy, GOMP_PLUGIN_mutex_lock)
(GOMP_PLUGIN_mutex_unlock): Likewise.
* libgomp.map (GOMP_PLUGIN_1.0): Remove GOMP_PLUGIN_mutex_init,
GOMP_PLUGIN_mutex_destroy, GOMP_PLUGIN_mutex_lock,
GOMP_PLUGIN_mutex_unlock.
git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/branches/gomp-4_0-branch@219467 138bc75d-0d04-0410-961f-82ee72b054a4
---
libgomp/ChangeLog.gomp | 15 +++++++++++++++
libgomp/libgomp-plugin.c | 24 ------------------------
libgomp/libgomp-plugin.h | 7 -------
libgomp/libgomp.map | 4 ----
libgomp/plugin/plugin-nvptx.c | 39 ++++++++++++++++++++-------------------
5 files changed, 35 insertions(+), 54 deletions(-)
diff --git libgomp/ChangeLog.gomp libgomp/ChangeLog.gomp
index 745b836..d955a85 100644
--- libgomp/ChangeLog.gomp
+++ libgomp/ChangeLog.gomp
@@ -1,3 +1,18 @@
+2015-01-12 Thomas Schwinge <thomas@codesourcery.com>
+
+ * plugin/plugin-nvptx.c (struct ptx_device): Turn stream_lock
+ member into a pthread_mutex_t. Adjust all users.
+ (ptx_event_lock): Likewise.
+ * libgomp-plugin.c (GOMP_PLUGIN_mutex_init)
+ (GOMP_PLUGIN_mutex_destroy, GOMP_PLUGIN_mutex_lock)
+ (GOMP_PLUGIN_mutex_unlock): Remove.
+ * libgomp-plugin.h (GOMP_PLUGIN_mutex_init)
+ (GOMP_PLUGIN_mutex_destroy, GOMP_PLUGIN_mutex_lock)
+ (GOMP_PLUGIN_mutex_unlock): Likewise.
+ * libgomp.map (GOMP_PLUGIN_1.0): Remove GOMP_PLUGIN_mutex_init,
+ GOMP_PLUGIN_mutex_destroy, GOMP_PLUGIN_mutex_lock,
+ GOMP_PLUGIN_mutex_unlock.
+
2014-12-22 Thomas Schwinge <thomas@codesourcery.com>
* libgomp.c (struct gomp_device_descr): Add lock member.
diff --git libgomp/libgomp-plugin.c libgomp/libgomp-plugin.c
index 0026270..77e250e 100644
--- libgomp/libgomp-plugin.c
+++ libgomp/libgomp-plugin.c
@@ -82,27 +82,3 @@ GOMP_PLUGIN_fatal (const char *msg, ...)
/* Unreachable. */
abort ();
}
-
-void
-GOMP_PLUGIN_mutex_init (gomp_mutex_t *mutex)
-{
- gomp_mutex_init (mutex);
-}
-
-void
-GOMP_PLUGIN_mutex_destroy (gomp_mutex_t *mutex)
-{
- gomp_mutex_destroy (mutex);
-}
-
-void
-GOMP_PLUGIN_mutex_lock (gomp_mutex_t *mutex)
-{
- gomp_mutex_lock (mutex);
-}
-
-void
-GOMP_PLUGIN_mutex_unlock (gomp_mutex_t *mutex)
-{
- gomp_mutex_unlock (mutex);
-}
diff --git libgomp/libgomp-plugin.h libgomp/libgomp-plugin.h
index 051d4e2..2e2be1f 100644
--- libgomp/libgomp-plugin.h
+++ libgomp/libgomp-plugin.h
@@ -29,8 +29,6 @@
#ifndef LIBGOMP_PLUGIN_H
#define LIBGOMP_PLUGIN_H 1
-#include "mutex.h"
-
extern void *GOMP_PLUGIN_malloc (size_t) __attribute__((malloc));
extern void *GOMP_PLUGIN_malloc_cleared (size_t) __attribute__((malloc));
extern void *GOMP_PLUGIN_realloc (void *, size_t);
@@ -42,9 +40,4 @@ extern void GOMP_PLUGIN_error (const char *, ...)
extern void GOMP_PLUGIN_fatal (const char *, ...)
__attribute__((noreturn, format (printf, 1, 2)));
-extern void GOMP_PLUGIN_mutex_init (gomp_mutex_t *);
-extern void GOMP_PLUGIN_mutex_destroy (gomp_mutex_t *);
-extern void GOMP_PLUGIN_mutex_lock (gomp_mutex_t *);
-extern void GOMP_PLUGIN_mutex_unlock (gomp_mutex_t *);
-
#endif
diff --git libgomp/libgomp.map libgomp/libgomp.map
index aa1fdb8..bfdb78c 100644
--- libgomp/libgomp.map
+++ libgomp/libgomp.map
@@ -334,10 +334,6 @@ GOMP_PLUGIN_1.0 {
GOMP_PLUGIN_error;
GOMP_PLUGIN_debug;
GOMP_PLUGIN_fatal;
- GOMP_PLUGIN_mutex_init;
- GOMP_PLUGIN_mutex_destroy;
- GOMP_PLUGIN_mutex_lock;
- GOMP_PLUGIN_mutex_unlock;
GOMP_PLUGIN_async_unmap_vars;
GOMP_PLUGIN_acc_thread;
};
diff --git libgomp/plugin/plugin-nvptx.c libgomp/plugin/plugin-nvptx.c
index 593b1a9..f92ff40 100644
--- libgomp/plugin/plugin-nvptx.c
+++ libgomp/plugin/plugin-nvptx.c
@@ -39,6 +39,7 @@
#include "oacc-ptx.h"
#include "oacc-plugin.h"
+#include <pthread.h>
#include <cuda.h>
#include <stdint.h>
#include <string.h>
@@ -302,7 +303,7 @@ struct ptx_device
int size;
} async_streams;
/* A lock for use when manipulating the above stream list and array. */
- gomp_mutex_t stream_lock;
+ pthread_mutex_t stream_lock;
int ord;
bool overlap;
bool map;
@@ -331,7 +332,7 @@ struct ptx_event
struct ptx_event *next;
};
-static gomp_mutex_t ptx_event_lock;
+static pthread_mutex_t ptx_event_lock;
static struct ptx_event *ptx_events;
#define _XSTR(s) _STR(s)
@@ -424,7 +425,7 @@ init_streams_for_device (struct ptx_device *ptx_dev, int concurrency)
ptx_dev->null_stream = null_stream;
ptx_dev->active_streams = NULL;
- GOMP_PLUGIN_mutex_init (&ptx_dev->stream_lock);
+ pthread_mutex_init (&ptx_dev->stream_lock, NULL);
if (concurrency < 1)
concurrency = 1;
@@ -484,7 +485,7 @@ select_stream_for_async (int async, pthread_t thread, bool create,
async++;
if (create)
- GOMP_PLUGIN_mutex_lock (&ptx_dev->stream_lock);
+ pthread_mutex_lock (&ptx_dev->stream_lock);
/* NOTE: AFAICT there's no particular need for acc_async_sync to map to the
null stream, and in fact better performance may be obtainable if it doesn't
@@ -566,7 +567,7 @@ select_stream_for_async (int async, pthread_t thread, bool create,
if (thread != stream->host_thread)
stream->multithreaded = true;
- GOMP_PLUGIN_mutex_unlock (&ptx_dev->stream_lock);
+ pthread_mutex_unlock (&ptx_dev->stream_lock);
}
else if (stream && !stream->multithreaded
&& !pthread_equal (stream->host_thread, thread))
@@ -597,7 +598,7 @@ nvptx_init (void)
ptx_events = NULL;
- GOMP_PLUGIN_mutex_init (&ptx_event_lock);
+ pthread_mutex_init (&ptx_event_lock, NULL);
ptx_inited = true;
@@ -822,7 +823,7 @@ event_gc (bool memmap_lockable)
struct ptx_event *ptx_event = ptx_events;
struct nvptx_thread *nvthd = nvptx_thread ();
- GOMP_PLUGIN_mutex_lock (&ptx_event_lock);
+ pthread_mutex_lock (&ptx_event_lock);
while (ptx_event != NULL)
{
@@ -883,7 +884,7 @@ event_gc (bool memmap_lockable)
}
}
- GOMP_PLUGIN_mutex_unlock (&ptx_event_lock);
+ pthread_mutex_unlock (&ptx_event_lock);
}
static void
@@ -901,12 +902,12 @@ event_add (enum ptx_event_type type, CUevent *e, void *h)
ptx_event->addr = h;
ptx_event->ord = nvthd->ptx_dev->ord;
- GOMP_PLUGIN_mutex_lock (&ptx_event_lock);
+ pthread_mutex_lock (&ptx_event_lock);
ptx_event->next = ptx_events;
ptx_events = ptx_event;
- GOMP_PLUGIN_mutex_unlock (&ptx_event_lock);
+ pthread_mutex_unlock (&ptx_event_lock);
}
void
@@ -1239,19 +1240,19 @@ nvptx_async_test_all (void)
pthread_t self = pthread_self ();
struct nvptx_thread *nvthd = nvptx_thread ();
- GOMP_PLUGIN_mutex_lock (&nvthd->ptx_dev->stream_lock);
+ pthread_mutex_lock (&nvthd->ptx_dev->stream_lock);
for (s = nvthd->ptx_dev->active_streams; s != NULL; s = s->next)
{
if ((s->multithreaded || pthread_equal (s->host_thread, self))
&& cuStreamQuery (s->stream) == CUDA_ERROR_NOT_READY)
{
- GOMP_PLUGIN_mutex_unlock (&nvthd->ptx_dev->stream_lock);
+ pthread_mutex_unlock (&nvthd->ptx_dev->stream_lock);
return 0;
}
}
- GOMP_PLUGIN_mutex_unlock (&nvthd->ptx_dev->stream_lock);
+ pthread_mutex_unlock (&nvthd->ptx_dev->stream_lock);
event_gc (true);
@@ -1322,7 +1323,7 @@ nvptx_wait_all (void)
pthread_t self = pthread_self ();
struct nvptx_thread *nvthd = nvptx_thread ();
- GOMP_PLUGIN_mutex_lock (&nvthd->ptx_dev->stream_lock);
+ pthread_mutex_lock (&nvthd->ptx_dev->stream_lock);
/* Wait for active streams initiated by this thread (or by multiple threads)
to complete. */
@@ -1342,7 +1343,7 @@ nvptx_wait_all (void)
}
}
- GOMP_PLUGIN_mutex_unlock (&nvthd->ptx_dev->stream_lock);
+ pthread_mutex_unlock (&nvthd->ptx_dev->stream_lock);
event_gc (true);
}
@@ -1368,7 +1369,7 @@ nvptx_wait_all_async (int async)
event_gc (true);
- GOMP_PLUGIN_mutex_lock (&nvthd->ptx_dev->stream_lock);
+ pthread_mutex_lock (&nvthd->ptx_dev->stream_lock);
for (other_stream = nvthd->ptx_dev->active_streams;
other_stream != NULL;
@@ -1396,7 +1397,7 @@ nvptx_wait_all_async (int async)
GOMP_PLUGIN_fatal ("cuStreamWaitEvent error: %s", cuda_error (r));
}
- GOMP_PLUGIN_mutex_unlock (&nvthd->ptx_dev->stream_lock);
+ pthread_mutex_unlock (&nvthd->ptx_dev->stream_lock);
}
static void *
@@ -1442,7 +1443,7 @@ nvptx_set_cuda_stream (int async, void *stream)
pthread_t self = pthread_self ();
struct nvptx_thread *nvthd = nvptx_thread ();
- GOMP_PLUGIN_mutex_lock (&nvthd->ptx_dev->stream_lock);
+ pthread_mutex_lock (&nvthd->ptx_dev->stream_lock);
if (async < 0)
GOMP_PLUGIN_fatal ("bad async %d", async);
@@ -1474,7 +1475,7 @@ nvptx_set_cuda_stream (int async, void *stream)
free (oldstream);
}
- GOMP_PLUGIN_mutex_unlock (&nvthd->ptx_dev->stream_lock);
+ pthread_mutex_unlock (&nvthd->ptx_dev->stream_lock);
(void) select_stream_for_async (async, self, true, (CUstream) stream);
Grüße,
Thomas
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 472 bytes
Desc: not available
URL: <http://gcc.gnu.org/pipermail/gcc-patches/attachments/20150112/4537ba8d/attachment.sig>
More information about the Gcc-patches
mailing list