This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: libgomp nvptx plugin: rework initialisation and support the proposed load/unload hooks (was: Merge current set of OpenACC changes from gomp-4_0-branch)


On Mon, Mar 30, 2015 at 22:42:51 +0100, Julian Brown wrote:
> On Mon, 30 Mar 2015 18:42:02 +0200
> Jakub Jelinek <jakub@redhat.com> wrote:
> > But the one Julian posted doesn't apply on top of your patch.
> > If there is any interdiff needed on top of your patch, can it be
> > posted against trunk + your patch?
> 
> Here's a version of my patch against trunk and Ilya's latest patch
> (hopefully!). Tests look OK (libgomp + PTX).

Thanks for rebasing!

On Mon, Mar 30, 2015 at 18:42:02 +0200, Jakub Jelinek wrote:
> > +/* Insert mapping of host -> target address pairs to splay tree.  */
> > +
> > +static void
> > +gomp_splay_tree_insert_mapping (struct gomp_device_descr *devicep,
> > +				struct addr_pair *host_addr,
> > +				struct addr_pair *tgt_addr)
> > +{
> > +  struct target_mem_desc *tgt = gomp_malloc (sizeof (*tgt));
> > +  tgt->refcount = 1;
> > +  tgt->array = gomp_malloc (sizeof (*tgt->array));
> > +  tgt->tgt_start = tgt_addr->start;
> > +  tgt->tgt_end = tgt_addr->end;
> > +  tgt->to_free = NULL;
> > +  tgt->list_count = 0;
> > +  tgt->device_descr = devicep;
> > +  splay_tree_node node = tgt->array;
> > +  splay_tree_key k = &node->key;
> > +  k->host_start = host_addr->start;
> > +  k->host_end = host_addr->end;
> > +  k->tgt_offset = 0;
> > +  k->refcount = 1;
> > +  k->copy_from = false;
> > +  k->tgt = tgt;
> > +  node->left = NULL;
> > +  node->right = NULL;
> > +  splay_tree_insert (&devicep->mem_map, node);
> > +}
> 
> What is the reason to register and allocate these one at a time, rather than
> using one struct target_mem_desc with one tgt->array for all splay tree
> nodes registered from one image?
> Perhaps you would just use tgt_start of 0 and tgt_end of 0 too (to make it
> clear it is special) and just use tgt_offset relative to that (i.e.
> absolute), but having to malloc each node individually and having to malloc
> a target_mem_desc for each one sounds expensive.
> Everything is freed just once anyway, isn't it?

Here is WIP patch, does this look like what you suggested?  It works fine with
functions, however I'm not sure what to do with variables.  Will gomp_map_vars
work when tgt_start and tgt_end are equal to 0?

> > @@ -654,6 +727,18 @@ void
> >  GOMP_offload_register (void *host_table, enum offload_target_type target_type,
> >  		       void *target_data)
> >  {
> > +  int i;
> > +  gomp_mutex_lock (&register_lock);
> > +
> > +  /* Load image to all initialized devices.  */
> > +  for (i = 0; i < num_devices; i++)
> > +    {
> > +      struct gomp_device_descr *devicep = &devices[i];
> > +      if (devicep->type == target_type && devicep->is_initialized)
> > +	gomp_offload_image_to_device (devicep, host_table, target_data);
> 
> Shouldn't either this function, or gomp_offload_image_to_device lock
> also devicep->lock mutex and unlock at the end?
> Where exactly I guess depends on if the devicep->* hook calls should be
> guarded with the mutex or not.  If yes, it should be this function and
> gomp_init_device.
> 
> > +      if (devicep->type != target_type || !devicep->is_initialized)
> > +	continue;
> > +
> 
> Similarly.

I've added lock/unlock to GOMP_offload_register and GOMP_offload_unregister.
All calls to gomp_init_device were already guarded.

> > +      devicep->unload_image_func (devicep->target_id, target_data);
> > +
> > +      /* Remove mapping from splay tree.  */
> > +      for (j = 0; j < num_funcs; j++)
> > +	{
> > +	  struct splay_tree_key_s k;
> > +	  k.host_start = (uintptr_t) host_func_table[j];
> > +	  k.host_end = k.host_start + 1;
> > +	  splay_tree_remove (&devicep->mem_map, &k);
> > +	}
> > +
> > +      for (j = 0; j < num_vars; j++)
> > +	{
> > +	  struct splay_tree_key_s k;
> > +	  k.host_start = (uintptr_t) host_var_table[j*2];
> > +	  k.host_end = k.host_start + (uintptr_t) host_var_table[j*2+1];
> > +	  splay_tree_remove (&devicep->mem_map, &k);
> > +	}
> > +    }
> 
> Aren't you leaking here all the tgt->array and tgt allocations here?
> Though, if you change it to just two allocations (one tgt, one array),
> you'd need to free just once.

You're right.  I've fixed this for functions, variables are WIP.


diff --git a/gcc/config/i386/intelmic-mkoffload.c b/gcc/config/i386/intelmic-mkoffload.c
index f93007c..e101f93 100644
--- a/gcc/config/i386/intelmic-mkoffload.c
+++ b/gcc/config/i386/intelmic-mkoffload.c
@@ -350,14 +350,24 @@ generate_host_descr_file (const char *host_compiler)
 	   "#ifdef __cplusplus\n"
 	   "extern \"C\"\n"
 	   "#endif\n"
-	   "void GOMP_offload_register (void *, int, void *);\n\n"
+	   "void GOMP_offload_register (void *, int, void *);\n"
+	   "void GOMP_offload_unregister (void *, int, void *);\n\n"
 
 	   "__attribute__((constructor))\n"
 	   "static void\n"
 	   "init (void)\n"
 	   "{\n"
 	   "  GOMP_offload_register (&__OFFLOAD_TABLE__, %d, __offload_target_data);\n"
+	   "}\n\n", GOMP_DEVICE_INTEL_MIC);
+
+  fprintf (src_file,
+	   "__attribute__((destructor))\n"
+	   "static void\n"
+	   "fini (void)\n"
+	   "{\n"
+	   "  GOMP_offload_unregister (&__OFFLOAD_TABLE__, %d, __offload_target_data);\n"
 	   "}\n", GOMP_DEVICE_INTEL_MIC);
+
   fclose (src_file);
 
   unsigned new_argc = 0;
diff --git a/libgomp/libgomp-plugin.h b/libgomp/libgomp-plugin.h
index d9cbff5..1072ae4 100644
--- a/libgomp/libgomp-plugin.h
+++ b/libgomp/libgomp-plugin.h
@@ -51,14 +51,12 @@ enum offload_target_type
   OFFLOAD_TARGET_TYPE_INTEL_MIC = 6
 };
 
-/* Auxiliary struct, used for transferring a host-target address range mapping
-   from plugin to libgomp.  */
-struct mapping_table
+/* Auxiliary struct, used for transferring pairs of addresses from plugin
+   to libgomp.  */
+struct addr_pair
 {
-  uintptr_t host_start;
-  uintptr_t host_end;
-  uintptr_t tgt_start;
-  uintptr_t tgt_end;
+  uintptr_t start;
+  uintptr_t end;
 };
 
 /* Miscellaneous functions.  */
diff --git a/libgomp/libgomp.h b/libgomp/libgomp.h
index 3089401..a1d42c5 100644
--- a/libgomp/libgomp.h
+++ b/libgomp/libgomp.h
@@ -224,7 +224,6 @@ struct gomp_team_state
 };
 
 struct target_mem_desc;
-struct gomp_memory_mapping;
 
 /* These are the OpenMP 4.0 Internal Control Variables described in
    section 2.3.1.  Those described as having one copy per task are
@@ -657,7 +656,7 @@ struct target_mem_desc {
   struct gomp_device_descr *device_descr;
 
   /* Memory mapping info for the thread that created this descriptor.  */
-  struct gomp_memory_mapping *mem_map;
+  struct splay_tree_s *mem_map;
 
   /* List of splay keys to remove (or decrease refcount)
      at the end of region.  */
@@ -683,20 +682,6 @@ struct splay_tree_key_s {
 
 #include "splay-tree.h"
 
-/* Information about mapped memory regions (per device/context).  */
-
-struct gomp_memory_mapping
-{
-  /* Mutex for operating with the splay tree and other shared structures.  */
-  gomp_mutex_t lock;
-
-  /* True when tables have been added to this memory map.  */
-  bool is_initialized;
-
-  /* Splay tree containing information about mapped memory regions.  */
-  struct splay_tree_s splay_tree;
-};
-
 typedef struct acc_dispatch_t
 {
   /* This is a linked list of data mapped using the
@@ -773,19 +758,18 @@ struct gomp_device_descr
   unsigned int (*get_caps_func) (void);
   int (*get_type_func) (void);
   int (*get_num_devices_func) (void);
-  void (*register_image_func) (void *, void *);
   void (*init_device_func) (int);
   void (*fini_device_func) (int);
-  int (*get_table_func) (int, struct mapping_table **);
+  int (*load_image_func) (int, void *, struct addr_pair **);
+  void (*unload_image_func) (int, void *);
   void *(*alloc_func) (int, size_t);
   void (*free_func) (int, void *);
   void *(*dev2host_func) (int, void *, const void *, size_t);
   void *(*host2dev_func) (int, void *, const void *, size_t);
   void (*run_func) (int, void *, void *);
 
-  /* Memory-mapping info for this device instance.  */
-  /* Uses a separate lock.  */
-  struct gomp_memory_mapping mem_map;
+  /* Splay tree containing information about mapped memory regions.  */
+  struct splay_tree_s mem_map;
 
   /* Mutex for the mutable data.  */
   gomp_mutex_t lock;
@@ -793,9 +777,6 @@ struct gomp_device_descr
   /* Set to true when device is initialized.  */
   bool is_initialized;
 
-  /* True when offload regions have been registered with this device.  */
-  bool offload_regions_registered;
-
   /* OpenACC-specific data and functions.  */
   /* This is mutable because of its mutable data_environ and target_data
      members.  */
@@ -811,9 +792,7 @@ extern struct target_mem_desc *gomp_map_vars (struct gomp_device_descr *,
 extern void gomp_copy_from_async (struct target_mem_desc *);
 extern void gomp_unmap_vars (struct target_mem_desc *, bool);
 extern void gomp_init_device (struct gomp_device_descr *);
-extern void gomp_init_tables (struct gomp_device_descr *,
-			      struct gomp_memory_mapping *);
-extern void gomp_free_memmap (struct gomp_memory_mapping *);
+extern void gomp_free_memmap (struct splay_tree_s *);
 extern void gomp_fini_device (struct gomp_device_descr *);
 
 /* work.c */
diff --git a/libgomp/libgomp.map b/libgomp/libgomp.map
index f44174e..2b2b953 100644
--- a/libgomp/libgomp.map
+++ b/libgomp/libgomp.map
@@ -231,6 +231,7 @@ GOMP_4.0 {
 GOMP_4.0.1 {
   global:
 	GOMP_offload_register;
+	GOMP_offload_unregister;
 } GOMP_4.0;
 
 OACC_2.0 {
diff --git a/libgomp/oacc-host.c b/libgomp/oacc-host.c
index 6aeb1e7..e4756b6 100644
--- a/libgomp/oacc-host.c
+++ b/libgomp/oacc-host.c
@@ -43,20 +43,18 @@ static struct gomp_device_descr host_dispatch =
     .get_caps_func = GOMP_OFFLOAD_get_caps,
     .get_type_func = GOMP_OFFLOAD_get_type,
     .get_num_devices_func = GOMP_OFFLOAD_get_num_devices,
-    .register_image_func = GOMP_OFFLOAD_register_image,
     .init_device_func = GOMP_OFFLOAD_init_device,
     .fini_device_func = GOMP_OFFLOAD_fini_device,
-    .get_table_func = GOMP_OFFLOAD_get_table,
+    .load_image_func = GOMP_OFFLOAD_load_image,
+    .unload_image_func = GOMP_OFFLOAD_unload_image,
     .alloc_func = GOMP_OFFLOAD_alloc,
     .free_func = GOMP_OFFLOAD_free,
     .dev2host_func = GOMP_OFFLOAD_dev2host,
     .host2dev_func = GOMP_OFFLOAD_host2dev,
     .run_func = GOMP_OFFLOAD_run,
 
-    .mem_map.is_initialized = false,
-    .mem_map.splay_tree.root = NULL,
+    .mem_map.root = NULL,
     .is_initialized = false,
-    .offload_regions_registered = false,
 
     .openacc = {
       .open_device_func = GOMP_OFFLOAD_openacc_open_device,
@@ -94,7 +92,6 @@ static struct gomp_device_descr host_dispatch =
 static __attribute__ ((constructor))
 void goacc_host_init (void)
 {
-  gomp_mutex_init (&host_dispatch.mem_map.lock);
   gomp_mutex_init (&host_dispatch.lock);
   goacc_register (&host_dispatch);
 }
diff --git a/libgomp/oacc-init.c b/libgomp/oacc-init.c
index 166eb55..1e0243e 100644
--- a/libgomp/oacc-init.c
+++ b/libgomp/oacc-init.c
@@ -284,12 +284,6 @@ lazy_open (int ord)
     = acc_dev->openacc.create_thread_data_func (acc_dev->openacc.target_data);
 
   acc_dev->openacc.async_set_async_func (acc_async_sync);
-
-  struct gomp_memory_mapping *mem_map = &acc_dev->mem_map;
-  gomp_mutex_lock (&mem_map->lock);
-  if (!mem_map->is_initialized)
-    gomp_init_tables (acc_dev, mem_map);
-  gomp_mutex_unlock (&mem_map->lock);
 }
 
 /* OpenACC 2.0a (3.2.12, 3.2.13) doesn't specify whether the serialization of
@@ -351,10 +345,9 @@ acc_shutdown_1 (acc_device_t d)
 
 	  walk->dev->openacc.target_data = target_data = NULL;
 
-	  struct gomp_memory_mapping *mem_map = &walk->dev->mem_map;
-	  gomp_mutex_lock (&mem_map->lock);
-	  gomp_free_memmap (mem_map);
-	  gomp_mutex_unlock (&mem_map->lock);
+	  gomp_mutex_lock (&walk->dev->lock);
+	  gomp_free_memmap (&walk->dev->mem_map);
+	  gomp_mutex_unlock (&walk->dev->lock);
 
 	  walk->dev = NULL;
 	}
diff --git a/libgomp/oacc-mem.c b/libgomp/oacc-mem.c
index 0096d51..fdc82e6 100644
--- a/libgomp/oacc-mem.c
+++ b/libgomp/oacc-mem.c
@@ -38,7 +38,7 @@
 /* Return block containing [H->S), or NULL if not contained.  */
 
 static splay_tree_key
-lookup_host (struct gomp_memory_mapping *mem_map, void *h, size_t s)
+lookup_host (struct gomp_device_descr *dev, void *h, size_t s)
 {
   struct splay_tree_key_s node;
   splay_tree_key key;
@@ -46,11 +46,9 @@ lookup_host (struct gomp_memory_mapping *mem_map, void *h, size_t s)
   node.host_start = (uintptr_t) h;
   node.host_end = (uintptr_t) h + s;
 
-  gomp_mutex_lock (&mem_map->lock);
-
-  key = splay_tree_lookup (&mem_map->splay_tree, &node);
-
-  gomp_mutex_unlock (&mem_map->lock);
+  gomp_mutex_lock (&dev->lock);
+  key = splay_tree_lookup (&dev->mem_map, &node);
+  gomp_mutex_unlock (&dev->lock);
 
   return key;
 }
@@ -65,14 +63,11 @@ lookup_dev (struct target_mem_desc *tgt, void *d, size_t s)
 {
   int i;
   struct target_mem_desc *t;
-  struct gomp_memory_mapping *mem_map;
 
   if (!tgt)
     return NULL;
 
-  mem_map = tgt->mem_map;
-
-  gomp_mutex_lock (&mem_map->lock);
+  gomp_mutex_lock (&tgt->device_descr->lock);
 
   for (t = tgt; t != NULL; t = t->prev)
     {
@@ -80,7 +75,7 @@ lookup_dev (struct target_mem_desc *tgt, void *d, size_t s)
         break;
     }
 
-  gomp_mutex_unlock (&mem_map->lock);
+  gomp_mutex_unlock (&tgt->device_descr->lock);
 
   if (!t)
     return NULL;
@@ -176,7 +171,7 @@ acc_deviceptr (void *h)
 
   struct goacc_thread *thr = goacc_thread ();
 
-  n = lookup_host (&thr->dev->mem_map, h, 1);
+  n = lookup_host (thr->dev, h, 1);
 
   if (!n)
     return NULL;
@@ -229,7 +224,7 @@ acc_is_present (void *h, size_t s)
   struct goacc_thread *thr = goacc_thread ();
   struct gomp_device_descr *acc_dev = thr->dev;
 
-  n = lookup_host (&acc_dev->mem_map, h, s);
+  n = lookup_host (acc_dev, h, s);
 
   if (n && ((uintptr_t)h < n->host_start
 	    || (uintptr_t)h + s > n->host_end
@@ -271,7 +266,7 @@ acc_map_data (void *h, void *d, size_t s)
 	gomp_fatal ("[%p,+%d]->[%p,+%d] is a bad map",
                     (void *)h, (int)s, (void *)d, (int)s);
 
-      if (lookup_host (&acc_dev->mem_map, h, s))
+      if (lookup_host (acc_dev, h, s))
 	gomp_fatal ("host address [%p, +%d] is already mapped", (void *)h,
 		    (int)s);
 
@@ -296,7 +291,7 @@ acc_unmap_data (void *h)
   /* No need to call lazy open, as the address must have been mapped.  */
 
   size_t host_size;
-  splay_tree_key n = lookup_host (&acc_dev->mem_map, h, 1);
+  splay_tree_key n = lookup_host (acc_dev, h, 1);
   struct target_mem_desc *t;
 
   if (!n)
@@ -320,7 +315,7 @@ acc_unmap_data (void *h)
       t->tgt_end = 0;
       t->to_free = 0;
 
-      gomp_mutex_lock (&acc_dev->mem_map.lock);
+      gomp_mutex_lock (&acc_dev->lock);
 
       for (tp = NULL, t = acc_dev->openacc.data_environ; t != NULL;
 	   tp = t, t = t->prev)
@@ -334,7 +329,7 @@ acc_unmap_data (void *h)
 	    break;
 	  }
 
-      gomp_mutex_unlock (&acc_dev->mem_map.lock);
+      gomp_mutex_unlock (&acc_dev->lock);
     }
 
   gomp_unmap_vars (t, true);
@@ -358,7 +353,7 @@ present_create_copy (unsigned f, void *h, size_t s)
   struct goacc_thread *thr = goacc_thread ();
   struct gomp_device_descr *acc_dev = thr->dev;
 
-  n = lookup_host (&acc_dev->mem_map, h, s);
+  n = lookup_host (acc_dev, h, s);
   if (n)
     {
       /* Present. */
@@ -389,13 +384,13 @@ present_create_copy (unsigned f, void *h, size_t s)
       tgt = gomp_map_vars (acc_dev, mapnum, &hostaddrs, NULL, &s, &kinds, true,
 			   false);
 
-      gomp_mutex_lock (&acc_dev->mem_map.lock);
+      gomp_mutex_lock (&acc_dev->lock);
 
       d = tgt->to_free;
       tgt->prev = acc_dev->openacc.data_environ;
       acc_dev->openacc.data_environ = tgt;
 
-      gomp_mutex_unlock (&acc_dev->mem_map.lock);
+      gomp_mutex_unlock (&acc_dev->lock);
     }
 
   return d;
@@ -436,7 +431,7 @@ delete_copyout (unsigned f, void *h, size_t s)
   struct goacc_thread *thr = goacc_thread ();
   struct gomp_device_descr *acc_dev = thr->dev;
 
-  n = lookup_host (&acc_dev->mem_map, h, s);
+  n = lookup_host (acc_dev, h, s);
 
   /* No need to call lazy open, as the data must already have been
      mapped.  */
@@ -479,7 +474,7 @@ update_dev_host (int is_dev, void *h, size_t s)
   struct goacc_thread *thr = goacc_thread ();
   struct gomp_device_descr *acc_dev = thr->dev;
 
-  n = lookup_host (&acc_dev->mem_map, h, s);
+  n = lookup_host (acc_dev, h, s);
 
   /* No need to call lazy open, as the data must already have been
      mapped.  */
@@ -532,7 +527,7 @@ gomp_acc_remove_pointer (void *h, bool force_copyfrom, int async, int mapnum)
   struct target_mem_desc *t;
   int minrefs = (mapnum == 1) ? 2 : 3;
 
-  n = lookup_host (&acc_dev->mem_map, h, 1);
+  n = lookup_host (acc_dev, h, 1);
 
   if (!n)
     gomp_fatal ("%p is not a mapped block", (void *)h);
@@ -543,7 +538,7 @@ gomp_acc_remove_pointer (void *h, bool force_copyfrom, int async, int mapnum)
 
   struct target_mem_desc *tp;
 
-  gomp_mutex_lock (&acc_dev->mem_map.lock);
+  gomp_mutex_lock (&acc_dev->lock);
 
   if (t->refcount == minrefs)
     {
@@ -570,7 +565,7 @@ gomp_acc_remove_pointer (void *h, bool force_copyfrom, int async, int mapnum)
   if (force_copyfrom)
     t->list[0]->copy_from = 1;
 
-  gomp_mutex_unlock (&acc_dev->mem_map.lock);
+  gomp_mutex_unlock (&acc_dev->lock);
 
   /* If running synchronously, unmap immediately.  */
   if (async < acc_async_noval)
diff --git a/libgomp/oacc-parallel.c b/libgomp/oacc-parallel.c
index 0c74f54..563f9bb 100644
--- a/libgomp/oacc-parallel.c
+++ b/libgomp/oacc-parallel.c
@@ -144,9 +144,9 @@ GOACC_parallel (int device, void (*fn) (void *),
     {
       k.host_start = (uintptr_t) fn;
       k.host_end = k.host_start + 1;
-      gomp_mutex_lock (&acc_dev->mem_map.lock);
-      tgt_fn_key = splay_tree_lookup (&acc_dev->mem_map.splay_tree, &k);
-      gomp_mutex_unlock (&acc_dev->mem_map.lock);
+      gomp_mutex_lock (&acc_dev->lock);
+      tgt_fn_key = splay_tree_lookup (&acc_dev->mem_map, &k);
+      gomp_mutex_unlock (&acc_dev->lock);
 
       if (tgt_fn_key == NULL)
 	gomp_fatal ("target function wasn't mapped");
diff --git a/libgomp/plugin/plugin-host.c b/libgomp/plugin/plugin-host.c
index ebf7f11..bc60f72 100644
--- a/libgomp/plugin/plugin-host.c
+++ b/libgomp/plugin/plugin-host.c
@@ -95,12 +95,6 @@ GOMP_OFFLOAD_get_num_devices (void)
 }
 
 STATIC void
-GOMP_OFFLOAD_register_image (void *host_table __attribute__ ((unused)),
-			     void *target_data __attribute__ ((unused)))
-{
-}
-
-STATIC void
 GOMP_OFFLOAD_init_device (int n __attribute__ ((unused)))
 {
 }
@@ -111,12 +105,19 @@ GOMP_OFFLOAD_fini_device (int n __attribute__ ((unused)))
 }
 
 STATIC int
-GOMP_OFFLOAD_get_table (int n __attribute__ ((unused)),
-			struct mapping_table **table __attribute__ ((unused)))
+GOMP_OFFLOAD_load_image (int n __attribute__ ((unused)),
+			 void *i __attribute__ ((unused)),
+			 struct addr_pair **r __attribute__ ((unused)))
 {
   return 0;
 }
 
+STATIC void
+GOMP_OFFLOAD_unload_image (int n __attribute__ ((unused)),
+			   void *i __attribute__ ((unused)))
+{
+}
+
 STATIC void *
 GOMP_OFFLOAD_openacc_open_device (int n)
 {
diff --git a/libgomp/target.c b/libgomp/target.c
index c5dda3f..418a2f5 100644
--- a/libgomp/target.c
+++ b/libgomp/target.c
@@ -49,6 +49,9 @@ static void gomp_target_init (void);
 /* The whole initialization code for offloading plugins is only run one.  */
 static pthread_once_t gomp_is_initialized = PTHREAD_ONCE_INIT;
 
+/* Mutex for offload image registration.  */
+static gomp_mutex_t register_lock;
+
 /* This structure describes an offload image.
    It contains type of the target device, pointer to host table descriptor, and
    pointer to target data.  */
@@ -153,14 +156,14 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum,
   size_t i, tgt_align, tgt_size, not_found_cnt = 0;
   const int rshift = is_openacc ? 8 : 3;
   const int typemask = is_openacc ? 0xff : 0x7;
-  struct gomp_memory_mapping *mm = &devicep->mem_map;
+  struct splay_tree_s *mem_map = &devicep->mem_map;
   struct splay_tree_key_s cur_node;
   struct target_mem_desc *tgt
     = gomp_malloc (sizeof (*tgt) + sizeof (tgt->list[0]) * mapnum);
   tgt->list_count = mapnum;
   tgt->refcount = 1;
   tgt->device_descr = devicep;
-  tgt->mem_map = mm;
+  tgt->mem_map = mem_map;
 
   if (mapnum == 0)
     return tgt;
@@ -174,7 +177,7 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum,
       tgt_size = mapnum * sizeof (void *);
     }
 
-  gomp_mutex_lock (&mm->lock);
+  gomp_mutex_lock (&devicep->lock);
 
   for (i = 0; i < mapnum; i++)
     {
@@ -189,7 +192,7 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum,
 	cur_node.host_end = cur_node.host_start + sizes[i];
       else
 	cur_node.host_end = cur_node.host_start + sizeof (void *);
-      splay_tree_key n = splay_tree_lookup (&mm->splay_tree, &cur_node);
+      splay_tree_key n = splay_tree_lookup (mem_map, &cur_node);
       if (n)
 	{
 	  tgt->list[i] = n;
@@ -274,7 +277,7 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum,
 	      k->host_end = k->host_start + sizes[i];
 	    else
 	      k->host_end = k->host_start + sizeof (void *);
-	    splay_tree_key n = splay_tree_lookup (&mm->splay_tree, k);
+	    splay_tree_key n = splay_tree_lookup (mem_map, k);
 	    if (n)
 	      {
 		tgt->list[i] = n;
@@ -294,7 +297,7 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum,
 		tgt->refcount++;
 		array->left = NULL;
 		array->right = NULL;
-		splay_tree_insert (&mm->splay_tree, array);
+		splay_tree_insert (mem_map, array);
 		switch (kind & typemask)
 		  {
 		  case GOMP_MAP_ALLOC:
@@ -332,16 +335,16 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum,
 		    /* Add bias to the pointer value.  */
 		    cur_node.host_start += sizes[i];
 		    cur_node.host_end = cur_node.host_start + 1;
-		    n = splay_tree_lookup (&mm->splay_tree, &cur_node);
+		    n = splay_tree_lookup (mem_map, &cur_node);
 		    if (n == NULL)
 		      {
 			/* Could be possibly zero size array section.  */
 			cur_node.host_end--;
-			n = splay_tree_lookup (&mm->splay_tree, &cur_node);
+			n = splay_tree_lookup (mem_map, &cur_node);
 			if (n == NULL)
 			  {
 			    cur_node.host_start--;
-			    n = splay_tree_lookup (&mm->splay_tree, &cur_node);
+			    n = splay_tree_lookup (mem_map, &cur_node);
 			    cur_node.host_start++;
 			  }
 		      }
@@ -400,18 +403,16 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum,
 			  /* Add bias to the pointer value.  */
 			  cur_node.host_start += sizes[j];
 			  cur_node.host_end = cur_node.host_start + 1;
-			  n = splay_tree_lookup (&mm->splay_tree, &cur_node);
+			  n = splay_tree_lookup (mem_map, &cur_node);
 			  if (n == NULL)
 			    {
 			      /* Could be possibly zero size array section.  */
 			      cur_node.host_end--;
-			      n = splay_tree_lookup (&mm->splay_tree,
-						     &cur_node);
+			      n = splay_tree_lookup (mem_map, &cur_node);
 			      if (n == NULL)
 				{
 				  cur_node.host_start--;
-				  n = splay_tree_lookup (&mm->splay_tree,
-							 &cur_node);
+				  n = splay_tree_lookup (mem_map, &cur_node);
 				  cur_node.host_start++;
 				}
 			    }
@@ -489,7 +490,7 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum,
 	}
     }
 
-  gomp_mutex_unlock (&mm->lock);
+  gomp_mutex_unlock (&devicep->lock);
   return tgt;
 }
 
@@ -514,10 +515,9 @@ attribute_hidden void
 gomp_copy_from_async (struct target_mem_desc *tgt)
 {
   struct gomp_device_descr *devicep = tgt->device_descr;
-  struct gomp_memory_mapping *mm = tgt->mem_map;
   size_t i;
 
-  gomp_mutex_lock (&mm->lock);
+  gomp_mutex_lock (&devicep->lock);
 
   for (i = 0; i < tgt->list_count; i++)
     if (tgt->list[i] == NULL)
@@ -536,7 +536,7 @@ gomp_copy_from_async (struct target_mem_desc *tgt)
 				  k->host_end - k->host_start);
       }
 
-  gomp_mutex_unlock (&mm->lock);
+  gomp_mutex_unlock (&devicep->lock);
 }
 
 /* Unmap variables described by TGT.  If DO_COPYFROM is true, copy relevant
@@ -547,7 +547,6 @@ attribute_hidden void
 gomp_unmap_vars (struct target_mem_desc *tgt, bool do_copyfrom)
 {
   struct gomp_device_descr *devicep = tgt->device_descr;
-  struct gomp_memory_mapping *mm = tgt->mem_map;
 
   if (tgt->list_count == 0)
     {
@@ -555,7 +554,7 @@ gomp_unmap_vars (struct target_mem_desc *tgt, bool do_copyfrom)
       return;
     }
 
-  gomp_mutex_lock (&mm->lock);
+  gomp_mutex_lock (&devicep->lock);
 
   size_t i;
   for (i = 0; i < tgt->list_count; i++)
@@ -572,7 +571,7 @@ gomp_unmap_vars (struct target_mem_desc *tgt, bool do_copyfrom)
 	  devicep->dev2host_func (devicep->target_id, (void *) k->host_start,
 				  (void *) (k->tgt->tgt_start + k->tgt_offset),
 				  k->host_end - k->host_start);
-	splay_tree_remove (&mm->splay_tree, k);
+	splay_tree_remove (tgt->mem_map, k);
 	if (k->tgt->refcount > 1)
 	  k->tgt->refcount--;
 	else
@@ -584,13 +583,12 @@ gomp_unmap_vars (struct target_mem_desc *tgt, bool do_copyfrom)
   else
     gomp_unmap_tgt (tgt);
 
-  gomp_mutex_unlock (&mm->lock);
+  gomp_mutex_unlock (&devicep->lock);
 }
 
 static void
-gomp_update (struct gomp_device_descr *devicep, struct gomp_memory_mapping *mm,
-	     size_t mapnum, void **hostaddrs, size_t *sizes, void *kinds,
-	     bool is_openacc)
+gomp_update (struct gomp_device_descr *devicep, size_t mapnum, void **hostaddrs,
+	     size_t *sizes, void *kinds, bool is_openacc)
 {
   size_t i;
   struct splay_tree_key_s cur_node;
@@ -602,14 +600,13 @@ gomp_update (struct gomp_device_descr *devicep, struct gomp_memory_mapping *mm,
   if (mapnum == 0)
     return;
 
-  gomp_mutex_lock (&mm->lock);
+  gomp_mutex_lock (&devicep->lock);
   for (i = 0; i < mapnum; i++)
     if (sizes[i])
       {
 	cur_node.host_start = (uintptr_t) hostaddrs[i];
 	cur_node.host_end = cur_node.host_start + sizes[i];
-	splay_tree_key n = splay_tree_lookup (&mm->splay_tree,
-					      &cur_node);
+	splay_tree_key n = splay_tree_lookup (&devicep->mem_map, &cur_node);
 	if (n)
 	  {
 	    int kind = get_kind (is_openacc, kinds, i);
@@ -643,10 +640,105 @@ gomp_update (struct gomp_device_descr *devicep, struct gomp_memory_mapping *mm,
 		      (void *) cur_node.host_start,
 		      (void *) cur_node.host_end);
       }
-  gomp_mutex_unlock (&mm->lock);
+  gomp_mutex_unlock (&devicep->lock);
+}
+
+
+/* Insert mapping of host -> target address pairs to splay tree.  */
+
+static void
+gomp_splay_tree_insert_mapping (struct gomp_device_descr *devicep,
+				struct addr_pair *host_addr,
+				struct addr_pair *tgt_addr)
+{
+  struct target_mem_desc *tgt = gomp_malloc (sizeof (*tgt));
+  tgt->refcount = 1;
+  tgt->array = gomp_malloc (sizeof (*tgt->array));
+  tgt->tgt_start = tgt_addr->start;
+  tgt->tgt_end = tgt_addr->end;
+  tgt->to_free = NULL;
+  tgt->list_count = 0;
+  tgt->device_descr = devicep;
+  splay_tree_node node = tgt->array;
+  splay_tree_key k = &node->key;
+  k->host_start = host_addr->start;
+  k->host_end = host_addr->end;
+  k->tgt_offset = 0;
+  k->refcount = 1;
+  k->copy_from = false;
+  k->tgt = tgt;
+  node->left = NULL;
+  node->right = NULL;
+  splay_tree_insert (&devicep->mem_map, node);
+}
+
+/* Load image pointed by TARGET_DATA to the device, specified by DEVICEP.
+   And insert to splay tree the mapping between addresses from HOST_TABLE and
+   from loaded target image.  */
+
+static void
+gomp_offload_image_to_device (struct gomp_device_descr *devicep,
+			      void *host_table, void *target_data)
+{
+  void **host_func_table = ((void ***) host_table)[0];
+  void **host_funcs_end  = ((void ***) host_table)[1];
+  void **host_var_table  = ((void ***) host_table)[2];
+  void **host_vars_end   = ((void ***) host_table)[3];
+
+  /* The func table contains only addresses, the var table contains addresses
+     and corresponding sizes.  */
+  int num_funcs = host_funcs_end - host_func_table;
+  int num_vars  = (host_vars_end - host_var_table) / 2;
+
+  /* Load image to device and get target addresses for the image.  */
+  struct addr_pair *target_table = NULL;
+  int i, num_target_entries
+    = devicep->load_image_func (devicep->target_id, target_data, &target_table);
+
+  if (num_target_entries != num_funcs + num_vars)
+    gomp_fatal ("Can't map target functions or variables");
+
+  /* Insert host-target address mapping into splay tree.  */
+  struct target_mem_desc *tgt = gomp_malloc (sizeof (*tgt));
+  tgt->array = gomp_malloc (num_funcs * sizeof (*tgt->array));
+  tgt->refcount = 1;
+  tgt->tgt_start = 0;
+  tgt->tgt_end = 0;
+  tgt->to_free = NULL;
+  tgt->prev = NULL;
+  tgt->list_count = 0;
+  tgt->device_descr = devicep;
+
+  splay_tree_node array = tgt->array;
+  for (i = 0; i < num_funcs; i++)
+    {
+      splay_tree_key k = &array->key;
+      k->host_start = (uintptr_t) host_func_table[i];
+      k->host_end = k->host_start + 1;
+      k->tgt = tgt;
+      k->tgt_offset = target_table[i].start;
+      k->refcount = 1;
+      k->async_refcount = 0;
+      k->copy_from = false;
+      array->left = NULL;
+      array->right = NULL;
+      splay_tree_insert (&devicep->mem_map, array);
+      array++;
+    }
+
+  for (i = 0; i < num_vars; i++)
+    {
+      struct addr_pair host_addr;
+      host_addr.start = (uintptr_t) host_var_table[i*2];
+      host_addr.end = host_addr.start + (uintptr_t) host_var_table[i*2+1];
+      gomp_splay_tree_insert_mapping (devicep, &host_addr,
+				      &target_table[num_funcs+i]);
+    }
+
+  free (target_table);
 }
 
-/* This function should be called from every offload image.
+/* This function should be called from every offload image while loading.
    It gets the descriptor of the host func and var tables HOST_TABLE, TYPE of
    the target, and TARGET_DATA needed by target plugin.  */
 
@@ -654,6 +746,20 @@ void
 GOMP_offload_register (void *host_table, enum offload_target_type target_type,
 		       void *target_data)
 {
+  int i;
+  gomp_mutex_lock (&register_lock);
+
+  /* Load image to all initialized devices.  */
+  for (i = 0; i < num_devices; i++)
+    {
+      struct gomp_device_descr *devicep = &devices[i];
+      gomp_mutex_lock (&devicep->lock);
+      if (devicep->type == target_type && devicep->is_initialized)
+	gomp_offload_image_to_device (devicep, host_table, target_data);
+      gomp_mutex_unlock (&devicep->lock);
+    }
+
+  /* Insert image to array of pending images.  */
   offload_images = gomp_realloc (offload_images,
 				 (num_offload_images + 1)
 				 * sizeof (struct offload_image_descr));
@@ -663,74 +769,122 @@ GOMP_offload_register (void *host_table, enum offload_target_type target_type,
   offload_images[num_offload_images].target_data = target_data;
 
   num_offload_images++;
+  gomp_mutex_unlock (&register_lock);
 }
 
-/* This function initializes the target device, specified by DEVICEP.  DEVICEP
-   must be locked on entry, and remains locked on return.  */
+/* This function should be called from every offload image while unloading.
+   It gets the descriptor of the host func and var tables HOST_TABLE, TYPE of
+   the target, and TARGET_DATA needed by target plugin.  */
 
-attribute_hidden void
-gomp_init_device (struct gomp_device_descr *devicep)
+void
+GOMP_offload_unregister (void *host_table, enum offload_target_type target_type,
+			 void *target_data)
 {
-  devicep->init_device_func (devicep->target_id);
-  devicep->is_initialized = true;
+  void **host_func_table = ((void ***) host_table)[0];
+  void **host_funcs_end  = ((void ***) host_table)[1];
+  void **host_var_table  = ((void ***) host_table)[2];
+  void **host_vars_end   = ((void ***) host_table)[3];
+  int i;
+
+  /* The func table contains only addresses, the var table contains addresses
+     and corresponding sizes.  */
+  int num_funcs = host_funcs_end - host_func_table;
+  int num_vars  = (host_vars_end - host_var_table) / 2;
+
+  gomp_mutex_lock (&register_lock);
+
+  /* Unload image from all initialized devices.  */
+  for (i = 0; i < num_devices; i++)
+    {
+      int j;
+      struct gomp_device_descr *devicep = &devices[i];
+      gomp_mutex_lock (&devicep->lock);
+      if (devicep->type != target_type || !devicep->is_initialized)
+	{
+	  gomp_mutex_unlock (&devicep->lock);
+	  continue;
+	}
+
+      devicep->unload_image_func (devicep->target_id, target_data);
+
+      /* Remove mapping from splay tree.  */
+      struct splay_tree_key_s k;
+      splay_tree_key node = NULL;
+      if (num_funcs > 0)
+	{
+	  k.host_start = (uintptr_t) host_func_table[0];
+	  k.host_end = k.host_start + 1;
+	  node = splay_tree_lookup (&devicep->mem_map, &k);
+	}
+      for (j = 0; j < num_funcs; j++)
+	{
+	  k.host_start = (uintptr_t) host_func_table[j];
+	  k.host_end = k.host_start + 1;
+	  splay_tree_remove (&devicep->mem_map, &k);
+	}
+      if (node)
+	{
+	  free (node->tgt);
+	  free (node);
+	}
+
+      for (j = 0; j < num_vars; j++)
+	{
+	  k.host_start = (uintptr_t) host_var_table[j*2];
+	  k.host_end = k.host_start + (uintptr_t) host_var_table[j*2+1];
+	  splay_tree_remove (&devicep->mem_map, &k);
+	  /* FIXME: free tgt->array and tgt.  */
+	}
+
+      gomp_mutex_unlock (&devicep->lock);
+    }
+
+  /* Remove image from array of pending images.  */
+  for (i = 0; i < num_offload_images; i++)
+    if (offload_images[i].target_data == target_data)
+      {
+	offload_images[i] = offload_images[--num_offload_images];
+	break;
+      }
+
+  gomp_mutex_unlock (&register_lock);
 }
 
-/* Initialize address mapping tables.  MM must be locked on entry, and remains
-   locked on return.  */
+/* This function initializes the target device, specified by DEVICEP.  DEVICEP
+   must be locked on entry, and remains locked on return.  */
 
 attribute_hidden void
-gomp_init_tables (struct gomp_device_descr *devicep,
-		  struct gomp_memory_mapping *mm)
+gomp_init_device (struct gomp_device_descr *devicep)
 {
-  /* Get address mapping table for device.  */
-  struct mapping_table *table = NULL;
-  int num_entries = devicep->get_table_func (devicep->target_id, &table);
-
-  /* Insert host-target address mapping into dev_splay_tree.  */
   int i;
-  for (i = 0; i < num_entries; i++)
+  devicep->init_device_func (devicep->target_id);
+
+  /* Load to device all images registered by the moment.  */
+  for (i = 0; i < num_offload_images; i++)
     {
-      struct target_mem_desc *tgt = gomp_malloc (sizeof (*tgt));
-      tgt->refcount = 1;
-      tgt->array = gomp_malloc (sizeof (*tgt->array));
-      tgt->tgt_start = table[i].tgt_start;
-      tgt->tgt_end = table[i].tgt_end;
-      tgt->to_free = NULL;
-      tgt->list_count = 0;
-      tgt->device_descr = devicep;
-      splay_tree_node node = tgt->array;
-      splay_tree_key k = &node->key;
-      k->host_start = table[i].host_start;
-      k->host_end = table[i].host_end;
-      k->tgt_offset = 0;
-      k->refcount = 1;
-      k->copy_from = false;
-      k->tgt = tgt;
-      node->left = NULL;
-      node->right = NULL;
-      splay_tree_insert (&mm->splay_tree, node);
+      struct offload_image_descr *image = &offload_images[i];
+      if (image->type == devicep->type)
+	gomp_offload_image_to_device (devicep, image->host_table,
+				      image->target_data);
     }
 
-  free (table);
-  mm->is_initialized = true;
+  devicep->is_initialized = true;
 }
 
 /* Free address mapping tables.  MM must be locked on entry, and remains locked
    on return.  */
 
 attribute_hidden void
-gomp_free_memmap (struct gomp_memory_mapping *mm)
+gomp_free_memmap (struct splay_tree_s *mem_map)
 {
-  while (mm->splay_tree.root)
+  while (mem_map->root)
     {
-      struct target_mem_desc *tgt = mm->splay_tree.root->key.tgt;
+      struct target_mem_desc *tgt = mem_map->root->key.tgt;
 
-      splay_tree_remove (&mm->splay_tree, &mm->splay_tree.root->key);
+      splay_tree_remove (mem_map, &mem_map->root->key);
       free (tgt->array);
       free (tgt);
     }
-
-  mm->is_initialized = false;
 }
 
 /* This function de-initializes the target device, specified by DEVICEP.
@@ -791,22 +945,17 @@ GOMP_target (int device, void (*fn) (void *), const void *unused,
     fn_addr = (void *) fn;
   else
     {
-      struct gomp_memory_mapping *mm = &devicep->mem_map;
-      gomp_mutex_lock (&mm->lock);
-
-      if (!mm->is_initialized)
-	gomp_init_tables (devicep, mm);
-
+      gomp_mutex_lock (&devicep->lock);
       struct splay_tree_key_s k;
       k.host_start = (uintptr_t) fn;
       k.host_end = k.host_start + 1;
-      splay_tree_key tgt_fn = splay_tree_lookup (&mm->splay_tree, &k);
+      splay_tree_key tgt_fn = splay_tree_lookup (&devicep->mem_map, &k);
       if (tgt_fn == NULL)
 	gomp_fatal ("Target function wasn't mapped");
 
-      gomp_mutex_unlock (&mm->lock);
+      gomp_mutex_unlock (&devicep->lock);
 
-      fn_addr = (void *) tgt_fn->tgt->tgt_start;
+      fn_addr = (void *) tgt_fn->tgt_offset;
     }
 
   struct target_mem_desc *tgt_vars
@@ -856,12 +1005,6 @@ GOMP_target_data (int device, const void *unused, size_t mapnum,
     gomp_init_device (devicep);
   gomp_mutex_unlock (&devicep->lock);
 
-  struct gomp_memory_mapping *mm = &devicep->mem_map;
-  gomp_mutex_lock (&mm->lock);
-  if (!mm->is_initialized)
-    gomp_init_tables (devicep, mm);
-  gomp_mutex_unlock (&mm->lock);
-
   struct target_mem_desc *tgt
     = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, false,
 		     false);
@@ -897,13 +1040,7 @@ GOMP_target_update (int device, const void *unused, size_t mapnum,
     gomp_init_device (devicep);
   gomp_mutex_unlock (&devicep->lock);
 
-  struct gomp_memory_mapping *mm = &devicep->mem_map;
-  gomp_mutex_lock (&mm->lock);
-  if (!mm->is_initialized)
-    gomp_init_tables (devicep, mm);
-  gomp_mutex_unlock (&mm->lock);
-
-  gomp_update (devicep, mm, mapnum, hostaddrs, sizes, kinds, false);
+  gomp_update (devicep, mapnum, hostaddrs, sizes, kinds, false);
 }
 
 void
@@ -972,10 +1109,10 @@ gomp_load_plugin_for_device (struct gomp_device_descr *device,
   DLSYM (get_caps);
   DLSYM (get_type);
   DLSYM (get_num_devices);
-  DLSYM (register_image);
   DLSYM (init_device);
   DLSYM (fini_device);
-  DLSYM (get_table);
+  DLSYM (load_image);
+  DLSYM (unload_image);
   DLSYM (alloc);
   DLSYM (free);
   DLSYM (dev2host);
@@ -1038,22 +1175,6 @@ gomp_load_plugin_for_device (struct gomp_device_descr *device,
   return err == NULL;
 }
 
-/* This function adds a compatible offload image IMAGE to an accelerator device
-   DEVICE.  DEVICE must be locked on entry, and remains locked on return.  */
-
-static void
-gomp_register_image_for_device (struct gomp_device_descr *device,
-				struct offload_image_descr *image)
-{
-  if (!device->offload_regions_registered
-      && (device->type == image->type
-	  || device->type == OFFLOAD_TARGET_TYPE_HOST))
-    {
-      device->register_image_func (image->host_table, image->target_data);
-      device->offload_regions_registered = true;
-    }
-}
-
 /* This function initializes the runtime needed for offloading.
    It parses the list of offload targets and tries to load the plugins for
    these targets.  On return, the variables NUM_DEVICES and NUM_DEVICES_OPENMP
@@ -1112,17 +1233,14 @@ gomp_target_init (void)
 		current_device.name = current_device.get_name_func ();
 		/* current_device.capabilities has already been set.  */
 		current_device.type = current_device.get_type_func ();
-		current_device.mem_map.is_initialized = false;
-		current_device.mem_map.splay_tree.root = NULL;
+		current_device.mem_map.root = NULL;
 		current_device.is_initialized = false;
-		current_device.offload_regions_registered = false;
 		current_device.openacc.data_environ = NULL;
 		current_device.openacc.target_data = NULL;
 		for (i = 0; i < new_num_devices; i++)
 		  {
 		    current_device.target_id = i;
 		    devices[num_devices] = current_device;
-		    gomp_mutex_init (&devices[num_devices].mem_map.lock);
 		    gomp_mutex_init (&devices[num_devices].lock);
 		    num_devices++;
 		  }
@@ -1157,21 +1275,12 @@ gomp_target_init (void)
 
   for (i = 0; i < num_devices; i++)
     {
-      int j;
-
-      for (j = 0; j < num_offload_images; j++)
-	gomp_register_image_for_device (&devices[i], &offload_images[j]);
-
       /* The 'devices' array can be moved (by the realloc call) until we have
 	 found all the plugins, so registering with the OpenACC runtime (which
 	 takes a copy of the pointer argument) must be delayed until now.  */
       if (devices[i].capabilities & GOMP_OFFLOAD_CAP_OPENACC_200)
 	goacc_register (&devices[i]);
     }
-
-  free (offload_images);
-  offload_images = NULL;
-  num_offload_images = 0;
 }
 
 #else /* PLUGIN_SUPPORT */
diff --git a/liboffloadmic/plugin/libgomp-plugin-intelmic.cpp b/liboffloadmic/plugin/libgomp-plugin-intelmic.cpp
index 3e7a958..a2d61b1 100644
--- a/liboffloadmic/plugin/libgomp-plugin-intelmic.cpp
+++ b/liboffloadmic/plugin/libgomp-plugin-intelmic.cpp
@@ -34,6 +34,7 @@
 #include <string.h>
 #include <utility>
 #include <vector>
+#include <map>
 #include "libgomp-plugin.h"
 #include "compiler_if_host.h"
 #include "main_target_image.h"
@@ -53,6 +54,29 @@ fprintf (stderr, "\n");					    \
 #endif
 
 
+/* Start/end addresses of functions and global variables on a device.  */
+typedef std::vector<addr_pair> AddrVect;
+
+/* Addresses for one image and all devices.  */
+typedef std::vector<AddrVect> DevAddrVect;
+
+/* Addresses for all images and all devices.  */
+typedef std::map<void *, DevAddrVect> ImgDevAddrMap;
+
+
+/* Total number of available devices.  */
+static int num_devices;
+
+/* Total number of shared libraries with offloading to Intel MIC.  */
+static int num_images;
+
+/* Two dimensional array: one key is a pointer to image,
+   second key is number of device.  Contains a vector of pointer pairs.  */
+static ImgDevAddrMap *address_table;
+
+/* Thread-safe registration of the main image.  */
+static pthread_once_t main_image_is_registered = PTHREAD_ONCE_INIT;
+
 static VarDesc vd_host2tgt = {
   { 1, 1 },		      /* dst, src			      */
   { 1, 0 },		      /* in, out			      */
@@ -90,28 +114,17 @@ static VarDesc vd_tgt2host = {
 };
 
 
-/* Total number of shared libraries with offloading to Intel MIC.  */
-static int num_libraries;
-
-/* Pointers to the descriptors, containing pointers to host-side tables and to
-   target images.  */
-static std::vector< std::pair<void *, void *> > lib_descrs;
-
-/* Thread-safe registration of the main image.  */
-static pthread_once_t main_image_is_registered = PTHREAD_ONCE_INIT;
-
-
 /* Add path specified in LD_LIBRARY_PATH to MIC_LD_LIBRARY_PATH, which is
    required by liboffloadmic.  */
 __attribute__((constructor))
 static void
-set_mic_lib_path (void)
+init (void)
 {
   const char *ld_lib_path = getenv (LD_LIBRARY_PATH_ENV);
   const char *mic_lib_path = getenv (MIC_LD_LIBRARY_PATH_ENV);
 
   if (!ld_lib_path)
-    return;
+    goto out;
 
   if (!mic_lib_path)
     setenv (MIC_LD_LIBRARY_PATH_ENV, ld_lib_path, 1);
@@ -133,6 +146,10 @@ set_mic_lib_path (void)
       if (!use_alloca)
 	free (mic_lib_path_new);
     }
+
+out:
+  address_table = new ImgDevAddrMap;
+  num_devices = _Offload_number_of_devices ();
 }
 
 extern "C" const char *
@@ -162,18 +179,8 @@ GOMP_OFFLOAD_get_type (void)
 extern "C" int
 GOMP_OFFLOAD_get_num_devices (void)
 {
-  int res = _Offload_number_of_devices ();
-  TRACE ("(): return %d", res);
-  return res;
-}
-
-/* This should be called from every shared library with offloading.  */
-extern "C" void
-GOMP_OFFLOAD_register_image (void *host_table, void *target_image)
-{
-  TRACE ("(host_table = %p, target_image = %p)", host_table, target_image);
-  lib_descrs.push_back (std::make_pair (host_table, target_image));
-  num_libraries++;
+  TRACE ("(): return %d", num_devices);
+  return num_devices;
 }
 
 static void
@@ -196,7 +203,8 @@ register_main_image ()
   __offload_register_image (&main_target_image);
 }
 
-/* Load offload_target_main on target.  */
+/* liboffloadmic loads and runs offload_target_main on all available devices
+   during a first call to offload ().  */
 extern "C" void
 GOMP_OFFLOAD_init_device (int device)
 {
@@ -243,9 +251,11 @@ get_target_table (int device, int &num_funcs, int &num_vars, void **&table)
     }
 }
 
+/* Offload TARGET_IMAGE to all available devices and fill address_table with
+   corresponding target addresses.  */
+
 static void
-load_lib_and_get_table (int device, int lib_num, mapping_table *&table,
-			int &table_size)
+offload_image (void *target_image)
 {
   struct TargetImage {
     int64_t size;
@@ -254,19 +264,11 @@ load_lib_and_get_table (int device, int lib_num, mapping_table *&table,
     char data[];
   } __attribute__ ((packed));
 
-  void ***host_table_descr = (void ***) lib_descrs[lib_num].first;
-  void **host_func_start = host_table_descr[0];
-  void **host_func_end   = host_table_descr[1];
-  void **host_var_start  = host_table_descr[2];
-  void **host_var_end    = host_table_descr[3];
+  void *image_start = ((void **) target_image)[0];
+  void *image_end   = ((void **) target_image)[1];
 
-  void **target_image_descr = (void **) lib_descrs[lib_num].second;
-  void *image_start = target_image_descr[0];
-  void *image_end   = target_image_descr[1];
-
-  TRACE ("() host_table_descr { %p, %p, %p, %p }", host_func_start,
-	 host_func_end, host_var_start, host_var_end);
-  TRACE ("() target_image_descr { %p, %p }", image_start, image_end);
+  TRACE ("(target_image = %p { %p, %p })",
+	 target_image, image_start, image_end);
 
   int64_t image_size = (uintptr_t) image_end - (uintptr_t) image_start;
   TargetImage *image
@@ -279,94 +281,87 @@ load_lib_and_get_table (int device, int lib_num, mapping_table *&table,
     }
 
   image->size = image_size;
-  sprintf (image->name, "lib%010d.so", lib_num);
+  sprintf (image->name, "lib%010d.so", num_images++);
   memcpy (image->data, image_start, image->size);
 
   TRACE ("() __offload_register_image %s { %p, %d }",
 	 image->name, image_start, image->size);
   __offload_register_image (image);
 
-  int tgt_num_funcs = 0;
-  int tgt_num_vars = 0;
-  void **tgt_table = NULL;
-  get_target_table (device, tgt_num_funcs, tgt_num_vars, tgt_table);
-  free (image);
-
-  /* The func table contains only addresses, the var table contains addresses
-     and corresponding sizes.  */
-  int host_num_funcs = host_func_end - host_func_start;
-  int host_num_vars  = (host_var_end - host_var_start) / 2;
-  TRACE ("() host_num_funcs = %d, tgt_num_funcs = %d",
-	 host_num_funcs, tgt_num_funcs);
-  TRACE ("() host_num_vars = %d, tgt_num_vars = %d",
-	 host_num_vars, tgt_num_vars);
-  if (host_num_funcs != tgt_num_funcs)
+  /* Receive tables for target_image from all devices.  */
+  DevAddrVect dev_table;
+  for (int dev = 0; dev < num_devices; dev++)
     {
-      fprintf (stderr, "%s: Can't map target functions\n", __FILE__);
-      exit (1);
-    }
-  if (host_num_vars != tgt_num_vars)
-    {
-      fprintf (stderr, "%s: Can't map target variables\n", __FILE__);
-      exit (1);
-    }
+      int num_funcs = 0;
+      int num_vars = 0;
+      void **table = NULL;
 
-  table = (mapping_table *) realloc (table, (table_size + host_num_funcs
-					     + host_num_vars)
-					    * sizeof (mapping_table));
-  if (table == NULL)
-    {
-      fprintf (stderr, "%s: Can't allocate memory\n", __FILE__);
-      exit (1);
-    }
+      get_target_table (dev, num_funcs, num_vars, table);
 
-  for (int i = 0; i < host_num_funcs; i++)
-    {
-      mapping_table t;
-      t.host_start = (uintptr_t) host_func_start[i];
-      t.host_end = t.host_start + 1;
-      t.tgt_start = (uintptr_t) tgt_table[i];
-      t.tgt_end = t.tgt_start + 1;
-
-      TRACE ("() lib %d, func %d:\t0x%llx -- 0x%llx",
-	     lib_num, i, t.host_start, t.tgt_start);
-
-      table[table_size++] = t;
-    }
+      AddrVect curr_dev_table;
 
-  for (int i = 0; i < host_num_vars * 2; i += 2)
-    {
-      mapping_table t;
-      t.host_start = (uintptr_t) host_var_start[i];
-      t.host_end = t.host_start + (uintptr_t) host_var_start[i+1];
-      t.tgt_start = (uintptr_t) tgt_table[tgt_num_funcs+i];
-      t.tgt_end = t.tgt_start + (uintptr_t) tgt_table[tgt_num_funcs+i+1];
+      for (int i = 0; i < num_funcs; i++)
+	{
+	  addr_pair tgt_addr;
+	  tgt_addr.start = (uintptr_t) table[i];
+	  tgt_addr.end = tgt_addr.start + 1;
+	  TRACE ("() func %d:\t0x%llx..0x%llx", i,
+		 tgt_addr.start, tgt_addr.end);
+	  curr_dev_table.push_back (tgt_addr);
+	}
 
-      TRACE ("() lib %d, var %d:\t0x%llx (%d) -- 0x%llx (%d)", lib_num, i/2,
-	     t.host_start, t.host_end - t.host_start,
-	     t.tgt_start, t.tgt_end - t.tgt_start);
+      for (int i = 0; i < num_vars; i++)
+	{
+	  addr_pair tgt_addr;
+	  tgt_addr.start = (uintptr_t) table[num_funcs+i*2];
+	  tgt_addr.end = tgt_addr.start + (uintptr_t) table[num_funcs+i*2+1];
+	  TRACE ("() var %d:\t0x%llx..0x%llx", i, tgt_addr.start, tgt_addr.end);
+	  curr_dev_table.push_back (tgt_addr);
+	}
 
-      table[table_size++] = t;
+      dev_table.push_back (curr_dev_table);
     }
 
-  delete [] tgt_table;
+  address_table->insert (std::make_pair (target_image, dev_table));
+
+  free (image);
 }
 
 extern "C" int
-GOMP_OFFLOAD_get_table (int device, void *result)
+GOMP_OFFLOAD_load_image (int device, void *target_image, addr_pair **result)
 {
-  TRACE ("(num_libraries = %d)", num_libraries);
+  TRACE ("(device = %d, target_image = %p)", device, target_image);
 
-  mapping_table *table = NULL;
-  int table_size = 0;
+  /* If target_image is already present in address_table, then there is no need
+     to offload it.  */
+  if (address_table->count (target_image) == 0)
+    offload_image (target_image);
 
-  for (int i = 0; i < num_libraries; i++)
-    load_lib_and_get_table (device, i, table, table_size);
+  AddrVect *curr_dev_table = &(*address_table)[target_image][device];
+  int table_size = curr_dev_table->size ();
+  addr_pair *table = (addr_pair *) malloc (table_size * sizeof (addr_pair));
+  if (table == NULL)
+    {
+      fprintf (stderr, "%s: Can't allocate memory\n", __FILE__);
+      exit (1);
+    }
 
-  *(void **) result = table;
+  std::copy (curr_dev_table->begin (), curr_dev_table->end (), table);
+  *result = table;
   return table_size;
 }
 
+extern "C" void
+GOMP_OFFLOAD_unload_image (int device, void *target_image)
+{
+  TRACE ("(device = %d, target_image = %p)", device, target_image);
+
+  /* TODO: Currently liboffloadmic doesn't support __offload_unregister_image
+     for libraries.  */
+
+  address_table->erase (target_image);
+}
+
 extern "C" void *
 GOMP_OFFLOAD_alloc (int device, size_t size)
 {


Thanks,
  -- Ilya


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]