[gomp4.5] Handle #pragma omp declare target link

Ilya Verbin iverbin@gmail.com
Mon Dec 14 16:48:00 GMT 2015


On Fri, Dec 11, 2015 at 18:27:13 +0100, Jakub Jelinek wrote:
> On Tue, Dec 08, 2015 at 05:45:59PM +0300, Ilya Verbin wrote:
> > @@ -356,6 +361,11 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum,
> >      }
> >  
> >    gomp_mutex_lock (&devicep->lock);
> > +  if (devicep->state == GOMP_DEVICE_FINALIZED)
> > +    {
> > +      gomp_mutex_unlock (&devicep->lock);
> 
> You need to free (tgt); here I think to avoid leaking memory.

Done.

> > +      return NULL;
> > +    }
> >  
> >    for (i = 0; i < mapnum; i++)
> >      {
> > @@ -834,6 +844,11 @@ gomp_unmap_vars (struct target_mem_desc *tgt, bool do_copyfrom)
> >      }
> >  
> >    gomp_mutex_lock (&devicep->lock);
> > +  if (devicep->state == GOMP_DEVICE_FINALIZED)
> > +    {
> > +      gomp_mutex_unlock (&devicep->lock);
> > +      return;
> 
> Supposedly you want at least free (tgt->array); free (tgt); here.

Done.

> Plus the question is if the mappings shouldn't be removed from the splay tree
> before that.

This code can be executed only at program shutdown, so I think that removing
from the splay tree isn't necessary here, it will only consume time.
Besides, we do not remove at shutdown those vars, which have non-zero refcount.

> > +/* This function finalizes all initialized devices.  */
> > +
> > +static void
> > +gomp_target_fini (void)
> > +{
> > +  int i;
> > +  for (i = 0; i < num_devices; i++)
> > +    {
> > +      struct gomp_device_descr *devicep = &devices[i];
> > +      gomp_mutex_lock (&devicep->lock);
> > +      if (devicep->state == GOMP_DEVICE_INITIALIZED)
> > +	{
> > +	  devicep->fini_device_func (devicep->target_id);
> > +	  devicep->state = GOMP_DEVICE_FINALIZED;
> > +	}
> > +      gomp_mutex_unlock (&devicep->lock);
> > +    }
> > +}
> 
> The question is what will this do if there are async target tasks still
> running on some of the devices at this point (forgotten #pragma omp taskwait
> or similar if target nowait regions are started outside of parallel region,
> or exit inside of parallel, etc.  But perhaps it can be handled incrementally.
> Also there is the question that the 
> So I think the patch is ok with the above mentioned changes.

Here is what I've committed to trunk.


libgomp/
	* libgomp.h (gomp_device_state): New enum.
	(struct gomp_device_descr): Replace is_initialized with state.
	(gomp_fini_device): Remove declaration.
	* oacc-host.c (host_dispatch): Use state instead of is_initialized.
	* oacc-init.c (acc_init_1): Use state instead of is_initialized.
	(acc_shutdown_1): Likewise.  Inline gomp_fini_device.
	(acc_set_device_type): Use state instead of is_initialized.
	(acc_set_device_num): Likewise.
	* target.c (resolve_device): Use state instead of is_initialized.
	Do not initialize finalized device.
	(gomp_map_vars): Do nothing if device is finalized.
	(gomp_unmap_vars): Likewise.
	(gomp_update): Likewise.
	(GOMP_offload_register_ver): Use state instead of is_initialized.
	(GOMP_offload_unregister_ver): Likewise.
	(gomp_init_device): Likewise.
	(gomp_unload_device): Likewise.
	(gomp_fini_device): Remove.
	(gomp_get_target_fn_addr): Do nothing if device is finalized.
	(GOMP_target): Go to host fallback if device is finalized.
	(GOMP_target_ext): Likewise.
	(gomp_exit_data): Do nothing if device is finalized.
	(gomp_target_task_fn): Go to host fallback if device is finalized.
	(gomp_target_fini): New static function.
	(gomp_target_init): Use state instead of is_initialized.
	Call gomp_target_fini at exit.
liboffloadmic/
	* plugin/libgomp-plugin-intelmic.cpp (unregister_main_image): Remove.
	(register_main_image): Do not call unregister_main_image at exit.
	(GOMP_OFFLOAD_fini_device): Allow for OpenMP.  Unregister main image.


diff --git a/libgomp/libgomp.h b/libgomp/libgomp.h
index c467f97..9d9949f 100644
--- a/libgomp/libgomp.h
+++ b/libgomp/libgomp.h
@@ -888,6 +888,14 @@ typedef struct acc_dispatch_t
   } cuda;
 } acc_dispatch_t;
 
+/* Various state of the accelerator device.  */
+enum gomp_device_state
+{
+  GOMP_DEVICE_UNINITIALIZED,
+  GOMP_DEVICE_INITIALIZED,
+  GOMP_DEVICE_FINALIZED
+};
+
 /* This structure describes accelerator device.
    It contains name of the corresponding libgomp plugin, function handlers for
    interaction with the device, ID-number of the device, and information about
@@ -933,8 +941,10 @@ struct gomp_device_descr
   /* Mutex for the mutable data.  */
   gomp_mutex_t lock;
 
-  /* Set to true when device is initialized.  */
-  bool is_initialized;
+  /* Current state of the device.  OpenACC allows to move from INITIALIZED state
+     back to UNINITIALIZED state.  OpenMP allows only to move from INITIALIZED
+     to FINALIZED state (at program shutdown).  */
+  enum gomp_device_state state;
 
   /* OpenACC-specific data and functions.  */
   /* This is mutable because of its mutable data_environ and target_data
@@ -962,7 +972,6 @@ extern void gomp_copy_from_async (struct target_mem_desc *);
 extern void gomp_unmap_vars (struct target_mem_desc *, bool);
 extern void gomp_init_device (struct gomp_device_descr *);
 extern void gomp_free_memmap (struct splay_tree_s *);
-extern void gomp_fini_device (struct gomp_device_descr *);
 extern void gomp_unload_device (struct gomp_device_descr *);
 
 /* work.c */
diff --git a/libgomp/oacc-host.c b/libgomp/oacc-host.c
index 9874804..d289b38 100644
--- a/libgomp/oacc-host.c
+++ b/libgomp/oacc-host.c
@@ -222,7 +222,7 @@ static struct gomp_device_descr host_dispatch =
 
     .mem_map = { NULL },
     /* .lock initilized in goacc_host_init.  */
-    .is_initialized = false,
+    .state = GOMP_DEVICE_UNINITIALIZED,
 
     .openacc = {
       .data_environ = NULL,
diff --git a/libgomp/oacc-init.c b/libgomp/oacc-init.c
index 9a9a0b0..c4f7b67 100644
--- a/libgomp/oacc-init.c
+++ b/libgomp/oacc-init.c
@@ -225,7 +225,7 @@ acc_init_1 (acc_device_t d)
   acc_dev = &base_dev[goacc_device_num];
 
   gomp_mutex_lock (&acc_dev->lock);
-  if (acc_dev->is_initialized)
+  if (acc_dev->state == GOMP_DEVICE_INITIALIZED)
     {
       gomp_mutex_unlock (&acc_dev->lock);
       gomp_fatal ("device already active");
@@ -306,10 +306,11 @@ acc_shutdown_1 (acc_device_t d)
     {
       struct gomp_device_descr *acc_dev = &base_dev[i];
       gomp_mutex_lock (&acc_dev->lock);
-      if (acc_dev->is_initialized)
+      if (acc_dev->state == GOMP_DEVICE_INITIALIZED)
         {
 	  devices_active = true;
-	  gomp_fini_device (acc_dev);
+	  acc_dev->fini_device_func (acc_dev->target_id);
+	  acc_dev->state = GOMP_DEVICE_UNINITIALIZED;
 	}
       gomp_mutex_unlock (&acc_dev->lock);
     }
@@ -506,7 +507,7 @@ acc_set_device_type (acc_device_t d)
   acc_dev = &base_dev[goacc_device_num];
 
   gomp_mutex_lock (&acc_dev->lock);
-  if (!acc_dev->is_initialized)
+  if (acc_dev->state == GOMP_DEVICE_UNINITIALIZED)
     gomp_init_device (acc_dev);
   gomp_mutex_unlock (&acc_dev->lock);
 
@@ -608,7 +609,7 @@ acc_set_device_num (int ord, acc_device_t d)
       acc_dev = &base_dev[ord];
 
       gomp_mutex_lock (&acc_dev->lock);
-      if (!acc_dev->is_initialized)
+      if (acc_dev->state == GOMP_DEVICE_UNINITIALIZED)
         gomp_init_device (acc_dev);
       gomp_mutex_unlock (&acc_dev->lock);
 
diff --git a/libgomp/target.c b/libgomp/target.c
index cf9d0e6..932b176 100644
--- a/libgomp/target.c
+++ b/libgomp/target.c
@@ -118,8 +118,13 @@ resolve_device (int device_id)
     return NULL;
 
   gomp_mutex_lock (&devices[device_id].lock);
-  if (!devices[device_id].is_initialized)
+  if (devices[device_id].state == GOMP_DEVICE_UNINITIALIZED)
     gomp_init_device (&devices[device_id]);
+  else if (devices[device_id].state == GOMP_DEVICE_FINALIZED)
+    {
+      gomp_mutex_unlock (&devices[device_id].lock);
+      return NULL;
+    }
   gomp_mutex_unlock (&devices[device_id].lock);
 
   return &devices[device_id];
@@ -356,6 +361,12 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum,
     }
 
   gomp_mutex_lock (&devicep->lock);
+  if (devicep->state == GOMP_DEVICE_FINALIZED)
+    {
+      gomp_mutex_unlock (&devicep->lock);
+      free (tgt);
+      return NULL;
+    }
 
   for (i = 0; i < mapnum; i++)
     {
@@ -834,6 +845,13 @@ gomp_unmap_vars (struct target_mem_desc *tgt, bool do_copyfrom)
     }
 
   gomp_mutex_lock (&devicep->lock);
+  if (devicep->state == GOMP_DEVICE_FINALIZED)
+    {
+      gomp_mutex_unlock (&devicep->lock);
+      free (tgt->array);
+      free (tgt);
+      return;
+    }
 
   size_t i;
   for (i = 0; i < tgt->list_count; i++)
@@ -896,6 +914,12 @@ gomp_update (struct gomp_device_descr *devicep, size_t mapnum, void **hostaddrs,
     return;
 
   gomp_mutex_lock (&devicep->lock);
+  if (devicep->state == GOMP_DEVICE_FINALIZED)
+    {
+      gomp_mutex_unlock (&devicep->lock);
+      return;
+    }
+
   for (i = 0; i < mapnum; i++)
     if (sizes[i])
       {
@@ -1106,7 +1130,8 @@ GOMP_offload_register_ver (unsigned version, const void *host_table,
     {
       struct gomp_device_descr *devicep = &devices[i];
       gomp_mutex_lock (&devicep->lock);
-      if (devicep->type == target_type && devicep->is_initialized)
+      if (devicep->type == target_type
+	  && devicep->state == GOMP_DEVICE_INITIALIZED)
 	gomp_load_image_to_device (devicep, version,
 				   host_table, target_data, true);
       gomp_mutex_unlock (&devicep->lock);
@@ -1150,7 +1175,8 @@ GOMP_offload_unregister_ver (unsigned version, const void *host_table,
     {
       struct gomp_device_descr *devicep = &devices[i];
       gomp_mutex_lock (&devicep->lock);
-      if (devicep->type == target_type && devicep->is_initialized)
+      if (devicep->type == target_type
+	  && devicep->state == GOMP_DEVICE_INITIALIZED)
 	gomp_unload_image_from_device (devicep, version,
 				       host_table, target_data);
       gomp_mutex_unlock (&devicep->lock);
@@ -1193,13 +1219,13 @@ gomp_init_device (struct gomp_device_descr *devicep)
 				   false);
     }
 
-  devicep->is_initialized = true;
+  devicep->state = GOMP_DEVICE_INITIALIZED;
 }
 
 attribute_hidden void
 gomp_unload_device (struct gomp_device_descr *devicep)
 {
-  if (devicep->is_initialized)
+  if (devicep->state == GOMP_DEVICE_INITIALIZED)
     {
       unsigned i;
       
@@ -1231,18 +1257,6 @@ gomp_free_memmap (struct splay_tree_s *mem_map)
     }
 }
 
-/* This function de-initializes the target device, specified by DEVICEP.
-   DEVICEP must be locked on entry, and remains locked on return.  */
-
-attribute_hidden void
-gomp_fini_device (struct gomp_device_descr *devicep)
-{
-  if (devicep->is_initialized)
-    devicep->fini_device_func (devicep->target_id);
-
-  devicep->is_initialized = false;
-}
-
 /* Host fallback for GOMP_target{,_ext} routines.  */
 
 static void
@@ -1310,6 +1324,12 @@ gomp_get_target_fn_addr (struct gomp_device_descr *devicep,
   else
     {
       gomp_mutex_lock (&devicep->lock);
+      if (devicep->state == GOMP_DEVICE_FINALIZED)
+	{
+	  gomp_mutex_unlock (&devicep->lock);
+	  return NULL;
+	}
+
       struct splay_tree_key_s k;
       k.host_start = (uintptr_t) host_fn;
       k.host_end = k.host_start + 1;
@@ -1339,12 +1359,12 @@ GOMP_target (int device, void (*fn) (void *), const void *unused,
 {
   struct gomp_device_descr *devicep = resolve_device (device);
 
+  void *fn_addr;
   if (devicep == NULL
-      || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
+      || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
+      || !(fn_addr = gomp_get_target_fn_addr (devicep, fn)))
     return gomp_target_fallback (fn, hostaddrs);
 
-  void *fn_addr = gomp_get_target_fn_addr (devicep, fn);
-
   struct target_mem_desc *tgt_vars
     = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, false,
 		     GOMP_MAP_VARS_TARGET);
@@ -1430,15 +1450,15 @@ GOMP_target_ext (int device, void (*fn) (void *), size_t mapnum,
 	gomp_task_maybe_wait_for_dependencies (depend);
     }
 
+  void *fn_addr;
   if (devicep == NULL
-      || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
+      || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
+      || !(fn_addr = gomp_get_target_fn_addr (devicep, fn)))
     {
       gomp_target_fallback_firstprivate (fn, mapnum, hostaddrs, sizes, kinds);
       return;
     }
 
-  void *fn_addr = gomp_get_target_fn_addr (devicep, fn);
-
   struct target_mem_desc *tgt_vars
     = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, true,
 		     GOMP_MAP_VARS_TARGET);
@@ -1593,6 +1613,12 @@ gomp_exit_data (struct gomp_device_descr *devicep, size_t mapnum,
   const int typemask = 0xff;
   size_t i;
   gomp_mutex_lock (&devicep->lock);
+  if (devicep->state == GOMP_DEVICE_FINALIZED)
+    {
+      gomp_mutex_unlock (&devicep->lock);
+      return;
+    }
+
   for (i = 0; i < mapnum; i++)
     {
       struct splay_tree_key_s cur_node;
@@ -1729,8 +1755,10 @@ gomp_target_task_fn (void *data)
 
   if (ttask->fn != NULL)
     {
+      void *fn_addr;
       if (devicep == NULL
-	  || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
+	  || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
+	  || !(fn_addr = gomp_get_target_fn_addr (devicep, ttask->fn)))
 	{
 	  ttask->state = GOMP_TARGET_TASK_FALLBACK;
 	  gomp_target_fallback_firstprivate (ttask->fn, ttask->mapnum,
@@ -1745,7 +1773,6 @@ gomp_target_task_fn (void *data)
 	  return false;
 	}
 
-      void *fn_addr = gomp_get_target_fn_addr (devicep, ttask->fn);
       ttask->tgt
 	= gomp_map_vars (devicep, ttask->mapnum, ttask->hostaddrs, NULL,
 			 ttask->sizes, ttask->kinds, true,
@@ -2282,6 +2309,25 @@ gomp_load_plugin_for_device (struct gomp_device_descr *device,
   return 0;
 }
 
+/* This function finalizes all initialized devices.  */
+
+static void
+gomp_target_fini (void)
+{
+  int i;
+  for (i = 0; i < num_devices; i++)
+    {
+      struct gomp_device_descr *devicep = &devices[i];
+      gomp_mutex_lock (&devicep->lock);
+      if (devicep->state == GOMP_DEVICE_INITIALIZED)
+	{
+	  devicep->fini_device_func (devicep->target_id);
+	  devicep->state = GOMP_DEVICE_FINALIZED;
+	}
+      gomp_mutex_unlock (&devicep->lock);
+    }
+}
+
 /* This function initializes the runtime needed for offloading.
    It parses the list of offload targets and tries to load the plugins for
    these targets.  On return, the variables NUM_DEVICES and NUM_DEVICES_OPENMP
@@ -2341,7 +2387,7 @@ gomp_target_init (void)
 		/* current_device.capabilities has already been set.  */
 		current_device.type = current_device.get_type_func ();
 		current_device.mem_map.root = NULL;
-		current_device.is_initialized = false;
+		current_device.state = GOMP_DEVICE_UNINITIALIZED;
 		current_device.openacc.data_environ = NULL;
 		for (i = 0; i < new_num_devices; i++)
 		  {
@@ -2387,6 +2433,9 @@ gomp_target_init (void)
       if (devices[i].capabilities & GOMP_OFFLOAD_CAP_OPENACC_200)
 	goacc_register (&devices[i]);
     }
+
+  if (atexit (gomp_target_fini) != 0)
+    gomp_fatal ("atexit failed");
 }
 
 #else /* PLUGIN_SUPPORT */
diff --git a/liboffloadmic/plugin/libgomp-plugin-intelmic.cpp b/liboffloadmic/plugin/libgomp-plugin-intelmic.cpp
index f8c1725..68f7b2c 100644
--- a/liboffloadmic/plugin/libgomp-plugin-intelmic.cpp
+++ b/liboffloadmic/plugin/libgomp-plugin-intelmic.cpp
@@ -231,12 +231,6 @@ offload (const char *file, uint64_t line, int device, const char *name,
 }
 
 static void
-unregister_main_image ()
-{
-  __offload_unregister_image (&main_target_image);
-}
-
-static void
 register_main_image ()
 {
   /* Do not check the return value, because old versions of liboffloadmic did
@@ -246,12 +240,6 @@ register_main_image ()
   /* liboffloadmic will call GOMP_PLUGIN_target_task_completion when
      asynchronous task on target is completed.  */
   __offload_register_task_callback (GOMP_PLUGIN_target_task_completion);
-
-  if (atexit (unregister_main_image) != 0)
-    {
-      fprintf (stderr, "%s: atexit failed\n", __FILE__);
-      exit (1);
-    }
 }
 
 /* liboffloadmic loads and runs offload_target_main on all available devices
@@ -269,8 +257,9 @@ extern "C" void
 GOMP_OFFLOAD_fini_device (int device)
 {
   TRACE ("(device = %d)", device);
-  /* Unreachable for GOMP_OFFLOAD_CAP_OPENMP_400.  */
-  abort ();
+
+  /* liboffloadmic will finalize target processes on all available devices.  */
+  __offload_unregister_image (&main_target_image);
 }
 
 static void


  -- Ilya



More information about the Gcc-patches mailing list