This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
Re: [gomp4.5] Handle #pragma omp declare target link
- From: Ilya Verbin <iverbin at gmail dot com>
- To: Jakub Jelinek <jakub at redhat dot com>
- Cc: gcc-patches at gcc dot gnu dot org, Kirill Yukhin <kirill dot yukhin at gmail dot com>, Thomas Schwinge <thomas at codesourcery dot com>
- Date: Tue, 8 Dec 2015 17:45:59 +0300
- Subject: Re: [gomp4.5] Handle #pragma omp declare target link
- Authentication-results: sourceware.org; auth=none
- References: <20151127165009 dot GA24771 at msticlxl57 dot ims dot intel dot com> <20151130120459 dot GB5675 at tucnak dot redhat dot com> <20151130202934 dot GA22962 at msticlxl57 dot ims dot intel dot com> <20151130204902 dot GJ5675 at tucnak dot redhat dot com> <20151130205520 dot GB22962 at msticlxl57 dot ims dot intel dot com> <20151201081800 dot GN5675 at tucnak dot redhat dot com> <C5AE729F-0BA3-4933-91E1-E4729107798B at gmail dot com> <20151201131559 dot GU5675 at tucnak dot redhat dot com> <20151201172927 dot GA7692 at msticlxl57 dot ims dot intel dot com> <20151201190504 dot GY5675 at tucnak dot redhat dot com>
On Tue, Dec 01, 2015 at 20:05:04 +0100, Jakub Jelinek wrote:
> This is racy, tsan would tell you so.
> Instead of a global var, I'd just change the devicep->is_initialized
> field from bool into a 3 state field (perhaps enum), with states
> uninitialized, initialized, finalized, and then say in resolve_device,
>
> gomp_mutex_lock (&devices[device_id].lock);
> if (devices[device_id].state == GOMP_DEVICE_UNINITIALIZED)
> gomp_init_device (&devices[device_id]);
> else if (devices[device_id].state == GOMP_DEVICE_FINALIZED)
> {
> gomp_mutex_unlock (&devices[device_id].lock);
> return NULL;
> }
> gomp_mutex_unlock (&devices[device_id].lock);
>
> Though, of course, that is incomplete, because resolve_device takes one
> lock, gomp_get_target_fn_addr another one, gomp_map_vars yet another one.
> So I think either we want to rewrite the locking, such that say
> resolve_device returns a locked device and then you perform stuff on the
> locked device (disadvantage is that gomp_map_vars will call gomp_malloc
> with the lock held, which can take some time to allocate the memory),
> or there needs to be the possibility that gomp_map_vars rechecks if the
> device has not been finalized after taking the lock and returns to the
> caller if the device has been finalized in between resolve_device and
> gomp_map_vars.
This patch implements the second approach. Is it OK?
Bootstrap and make check-target-libgomp passed.
libgomp/
* libgomp.h (gomp_device_state): New enum.
(struct gomp_device_descr): Replace is_initialized with state.
(gomp_fini_device): Remove declaration.
* oacc-host.c (host_dispatch): Use state instead of is_initialized.
* oacc-init.c (acc_init_1): Use state instead of is_initialized.
(acc_shutdown_1): Likewise. Inline gomp_fini_device.
(acc_set_device_type): Use state instead of is_initialized.
(acc_set_device_num): Likewise.
* target.c (resolve_device): Use state instead of is_initialized.
Do not initialize finalized device.
(gomp_map_vars): Do nothing if device is finalized.
(gomp_unmap_vars): Likewise.
(gomp_update): Likewise.
(GOMP_offload_register_ver): Use state instead of is_initialized.
(GOMP_offload_unregister_ver): Likewise.
(gomp_init_device): Likewise.
(gomp_unload_device): Likewise.
(gomp_fini_device): Remove.
(gomp_get_target_fn_addr): Do nothing if device is finalized.
(GOMP_target): Go to host fallback if device is finalized.
(GOMP_target_ext): Likewise.
(gomp_exit_data): Do nothing if device is finalized.
(gomp_target_task_fn): Go to host fallback if device is finalized.
(gomp_target_fini): New static function.
(gomp_target_init): Use state instead of is_initialized.
Call gomp_target_fini at exit.
liboffloadmic/
* plugin/libgomp-plugin-intelmic.cpp (unregister_main_image): Remove.
(register_main_image): Do not call unregister_main_image at exit.
(GOMP_OFFLOAD_fini_device): Allow for OpenMP. Unregister main image.
diff --git a/libgomp/libgomp.h b/libgomp/libgomp.h
index c467f97..9d9949f 100644
--- a/libgomp/libgomp.h
+++ b/libgomp/libgomp.h
@@ -888,6 +888,14 @@ typedef struct acc_dispatch_t
} cuda;
} acc_dispatch_t;
+/* Various state of the accelerator device. */
+enum gomp_device_state
+{
+ GOMP_DEVICE_UNINITIALIZED,
+ GOMP_DEVICE_INITIALIZED,
+ GOMP_DEVICE_FINALIZED
+};
+
/* This structure describes accelerator device.
It contains name of the corresponding libgomp plugin, function handlers for
interaction with the device, ID-number of the device, and information about
@@ -933,8 +941,10 @@ struct gomp_device_descr
/* Mutex for the mutable data. */
gomp_mutex_t lock;
- /* Set to true when device is initialized. */
- bool is_initialized;
+ /* Current state of the device. OpenACC allows to move from INITIALIZED state
+ back to UNINITIALIZED state. OpenMP allows only to move from INITIALIZED
+ to FINALIZED state (at program shutdown). */
+ enum gomp_device_state state;
/* OpenACC-specific data and functions. */
/* This is mutable because of its mutable data_environ and target_data
@@ -962,7 +972,6 @@ extern void gomp_copy_from_async (struct target_mem_desc *);
extern void gomp_unmap_vars (struct target_mem_desc *, bool);
extern void gomp_init_device (struct gomp_device_descr *);
extern void gomp_free_memmap (struct splay_tree_s *);
-extern void gomp_fini_device (struct gomp_device_descr *);
extern void gomp_unload_device (struct gomp_device_descr *);
/* work.c */
diff --git a/libgomp/oacc-host.c b/libgomp/oacc-host.c
index 9874804..d289b38 100644
--- a/libgomp/oacc-host.c
+++ b/libgomp/oacc-host.c
@@ -222,7 +222,7 @@ static struct gomp_device_descr host_dispatch =
.mem_map = { NULL },
/* .lock initilized in goacc_host_init. */
- .is_initialized = false,
+ .state = GOMP_DEVICE_UNINITIALIZED,
.openacc = {
.data_environ = NULL,
diff --git a/libgomp/oacc-init.c b/libgomp/oacc-init.c
index 9a9a0b0..c4f7b67 100644
--- a/libgomp/oacc-init.c
+++ b/libgomp/oacc-init.c
@@ -225,7 +225,7 @@ acc_init_1 (acc_device_t d)
acc_dev = &base_dev[goacc_device_num];
gomp_mutex_lock (&acc_dev->lock);
- if (acc_dev->is_initialized)
+ if (acc_dev->state == GOMP_DEVICE_INITIALIZED)
{
gomp_mutex_unlock (&acc_dev->lock);
gomp_fatal ("device already active");
@@ -306,10 +306,11 @@ acc_shutdown_1 (acc_device_t d)
{
struct gomp_device_descr *acc_dev = &base_dev[i];
gomp_mutex_lock (&acc_dev->lock);
- if (acc_dev->is_initialized)
+ if (acc_dev->state == GOMP_DEVICE_INITIALIZED)
{
devices_active = true;
- gomp_fini_device (acc_dev);
+ acc_dev->fini_device_func (acc_dev->target_id);
+ acc_dev->state = GOMP_DEVICE_UNINITIALIZED;
}
gomp_mutex_unlock (&acc_dev->lock);
}
@@ -506,7 +507,7 @@ acc_set_device_type (acc_device_t d)
acc_dev = &base_dev[goacc_device_num];
gomp_mutex_lock (&acc_dev->lock);
- if (!acc_dev->is_initialized)
+ if (acc_dev->state == GOMP_DEVICE_UNINITIALIZED)
gomp_init_device (acc_dev);
gomp_mutex_unlock (&acc_dev->lock);
@@ -608,7 +609,7 @@ acc_set_device_num (int ord, acc_device_t d)
acc_dev = &base_dev[ord];
gomp_mutex_lock (&acc_dev->lock);
- if (!acc_dev->is_initialized)
+ if (acc_dev->state == GOMP_DEVICE_UNINITIALIZED)
gomp_init_device (acc_dev);
gomp_mutex_unlock (&acc_dev->lock);
diff --git a/libgomp/target.c b/libgomp/target.c
index cf9d0e6..be96a9e 100644
--- a/libgomp/target.c
+++ b/libgomp/target.c
@@ -118,8 +118,13 @@ resolve_device (int device_id)
return NULL;
gomp_mutex_lock (&devices[device_id].lock);
- if (!devices[device_id].is_initialized)
+ if (devices[device_id].state == GOMP_DEVICE_UNINITIALIZED)
gomp_init_device (&devices[device_id]);
+ else if (devices[device_id].state == GOMP_DEVICE_FINALIZED)
+ {
+ gomp_mutex_unlock (&devices[device_id].lock);
+ return NULL;
+ }
gomp_mutex_unlock (&devices[device_id].lock);
return &devices[device_id];
@@ -356,6 +361,11 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum,
}
gomp_mutex_lock (&devicep->lock);
+ if (devicep->state == GOMP_DEVICE_FINALIZED)
+ {
+ gomp_mutex_unlock (&devicep->lock);
+ return NULL;
+ }
for (i = 0; i < mapnum; i++)
{
@@ -834,6 +844,11 @@ gomp_unmap_vars (struct target_mem_desc *tgt, bool do_copyfrom)
}
gomp_mutex_lock (&devicep->lock);
+ if (devicep->state == GOMP_DEVICE_FINALIZED)
+ {
+ gomp_mutex_unlock (&devicep->lock);
+ return;
+ }
size_t i;
for (i = 0; i < tgt->list_count; i++)
@@ -896,6 +911,12 @@ gomp_update (struct gomp_device_descr *devicep, size_t mapnum, void **hostaddrs,
return;
gomp_mutex_lock (&devicep->lock);
+ if (devicep->state == GOMP_DEVICE_FINALIZED)
+ {
+ gomp_mutex_unlock (&devicep->lock);
+ return;
+ }
+
for (i = 0; i < mapnum; i++)
if (sizes[i])
{
@@ -1106,7 +1127,8 @@ GOMP_offload_register_ver (unsigned version, const void *host_table,
{
struct gomp_device_descr *devicep = &devices[i];
gomp_mutex_lock (&devicep->lock);
- if (devicep->type == target_type && devicep->is_initialized)
+ if (devicep->type == target_type
+ && devicep->state == GOMP_DEVICE_INITIALIZED)
gomp_load_image_to_device (devicep, version,
host_table, target_data, true);
gomp_mutex_unlock (&devicep->lock);
@@ -1150,7 +1172,8 @@ GOMP_offload_unregister_ver (unsigned version, const void *host_table,
{
struct gomp_device_descr *devicep = &devices[i];
gomp_mutex_lock (&devicep->lock);
- if (devicep->type == target_type && devicep->is_initialized)
+ if (devicep->type == target_type
+ && devicep->state == GOMP_DEVICE_INITIALIZED)
gomp_unload_image_from_device (devicep, version,
host_table, target_data);
gomp_mutex_unlock (&devicep->lock);
@@ -1193,13 +1216,13 @@ gomp_init_device (struct gomp_device_descr *devicep)
false);
}
- devicep->is_initialized = true;
+ devicep->state = GOMP_DEVICE_INITIALIZED;
}
attribute_hidden void
gomp_unload_device (struct gomp_device_descr *devicep)
{
- if (devicep->is_initialized)
+ if (devicep->state == GOMP_DEVICE_INITIALIZED)
{
unsigned i;
@@ -1231,18 +1254,6 @@ gomp_free_memmap (struct splay_tree_s *mem_map)
}
}
-/* This function de-initializes the target device, specified by DEVICEP.
- DEVICEP must be locked on entry, and remains locked on return. */
-
-attribute_hidden void
-gomp_fini_device (struct gomp_device_descr *devicep)
-{
- if (devicep->is_initialized)
- devicep->fini_device_func (devicep->target_id);
-
- devicep->is_initialized = false;
-}
-
/* Host fallback for GOMP_target{,_ext} routines. */
static void
@@ -1310,6 +1321,12 @@ gomp_get_target_fn_addr (struct gomp_device_descr *devicep,
else
{
gomp_mutex_lock (&devicep->lock);
+ if (devicep->state == GOMP_DEVICE_FINALIZED)
+ {
+ gomp_mutex_unlock (&devicep->lock);
+ return NULL;
+ }
+
struct splay_tree_key_s k;
k.host_start = (uintptr_t) host_fn;
k.host_end = k.host_start + 1;
@@ -1339,12 +1356,12 @@ GOMP_target (int device, void (*fn) (void *), const void *unused,
{
struct gomp_device_descr *devicep = resolve_device (device);
+ void *fn_addr;
if (devicep == NULL
- || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
+ || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
+ || !(fn_addr = gomp_get_target_fn_addr (devicep, fn)))
return gomp_target_fallback (fn, hostaddrs);
- void *fn_addr = gomp_get_target_fn_addr (devicep, fn);
-
struct target_mem_desc *tgt_vars
= gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, false,
GOMP_MAP_VARS_TARGET);
@@ -1430,15 +1447,15 @@ GOMP_target_ext (int device, void (*fn) (void *), size_t mapnum,
gomp_task_maybe_wait_for_dependencies (depend);
}
+ void *fn_addr;
if (devicep == NULL
- || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
+ || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
+ || !(fn_addr = gomp_get_target_fn_addr (devicep, fn)))
{
gomp_target_fallback_firstprivate (fn, mapnum, hostaddrs, sizes, kinds);
return;
}
- void *fn_addr = gomp_get_target_fn_addr (devicep, fn);
-
struct target_mem_desc *tgt_vars
= gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, true,
GOMP_MAP_VARS_TARGET);
@@ -1593,6 +1610,12 @@ gomp_exit_data (struct gomp_device_descr *devicep, size_t mapnum,
const int typemask = 0xff;
size_t i;
gomp_mutex_lock (&devicep->lock);
+ if (devicep->state == GOMP_DEVICE_FINALIZED)
+ {
+ gomp_mutex_unlock (&devicep->lock);
+ return;
+ }
+
for (i = 0; i < mapnum; i++)
{
struct splay_tree_key_s cur_node;
@@ -1729,8 +1752,10 @@ gomp_target_task_fn (void *data)
if (ttask->fn != NULL)
{
+ void *fn_addr;
if (devicep == NULL
- || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
+ || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
+ || !(fn_addr = gomp_get_target_fn_addr (devicep, ttask->fn)))
{
ttask->state = GOMP_TARGET_TASK_FALLBACK;
gomp_target_fallback_firstprivate (ttask->fn, ttask->mapnum,
@@ -1745,7 +1770,6 @@ gomp_target_task_fn (void *data)
return false;
}
- void *fn_addr = gomp_get_target_fn_addr (devicep, ttask->fn);
ttask->tgt
= gomp_map_vars (devicep, ttask->mapnum, ttask->hostaddrs, NULL,
ttask->sizes, ttask->kinds, true,
@@ -2282,6 +2306,25 @@ gomp_load_plugin_for_device (struct gomp_device_descr *device,
return 0;
}
+/* This function finalizes all initialized devices. */
+
+static void
+gomp_target_fini (void)
+{
+ int i;
+ for (i = 0; i < num_devices; i++)
+ {
+ struct gomp_device_descr *devicep = &devices[i];
+ gomp_mutex_lock (&devicep->lock);
+ if (devicep->state == GOMP_DEVICE_INITIALIZED)
+ {
+ devicep->fini_device_func (devicep->target_id);
+ devicep->state = GOMP_DEVICE_FINALIZED;
+ }
+ gomp_mutex_unlock (&devicep->lock);
+ }
+}
+
/* This function initializes the runtime needed for offloading.
It parses the list of offload targets and tries to load the plugins for
these targets. On return, the variables NUM_DEVICES and NUM_DEVICES_OPENMP
@@ -2341,7 +2384,7 @@ gomp_target_init (void)
/* current_device.capabilities has already been set. */
current_device.type = current_device.get_type_func ();
current_device.mem_map.root = NULL;
- current_device.is_initialized = false;
+ current_device.state = GOMP_DEVICE_UNINITIALIZED;
current_device.openacc.data_environ = NULL;
for (i = 0; i < new_num_devices; i++)
{
@@ -2387,6 +2430,9 @@ gomp_target_init (void)
if (devices[i].capabilities & GOMP_OFFLOAD_CAP_OPENACC_200)
goacc_register (&devices[i]);
}
+
+ if (atexit (gomp_target_fini) != 0)
+ gomp_fatal ("atexit failed");
}
#else /* PLUGIN_SUPPORT */
diff --git a/liboffloadmic/plugin/libgomp-plugin-intelmic.cpp b/liboffloadmic/plugin/libgomp-plugin-intelmic.cpp
index f8c1725..68f7b2c 100644
--- a/liboffloadmic/plugin/libgomp-plugin-intelmic.cpp
+++ b/liboffloadmic/plugin/libgomp-plugin-intelmic.cpp
@@ -231,12 +231,6 @@ offload (const char *file, uint64_t line, int device, const char *name,
}
static void
-unregister_main_image ()
-{
- __offload_unregister_image (&main_target_image);
-}
-
-static void
register_main_image ()
{
/* Do not check the return value, because old versions of liboffloadmic did
@@ -246,12 +240,6 @@ register_main_image ()
/* liboffloadmic will call GOMP_PLUGIN_target_task_completion when
asynchronous task on target is completed. */
__offload_register_task_callback (GOMP_PLUGIN_target_task_completion);
-
- if (atexit (unregister_main_image) != 0)
- {
- fprintf (stderr, "%s: atexit failed\n", __FILE__);
- exit (1);
- }
}
/* liboffloadmic loads and runs offload_target_main on all available devices
@@ -269,8 +257,9 @@ extern "C" void
GOMP_OFFLOAD_fini_device (int device)
{
TRACE ("(device = %d)", device);
- /* Unreachable for GOMP_OFFLOAD_CAP_OPENMP_400. */
- abort ();
+
+ /* liboffloadmic will finalize target processes on all available devices. */
+ __offload_unregister_image (&main_target_image);
}
static void
-- Ilya