This is the mail archive of the gcc@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: [RFC] Offloading Support in libgomp


On Fri, Sep 13, 2013 at 03:29:30PM +0400, Michael V. Zolotukhin wrote:
> Here is the first patch for adding plugins support in libgomp - could you please
> take a look at it?
> 
> I changed configure.ac to add dl-library, but I am not sure if I regenerated all
> related to configure files properly.  I'd appreciate your help here, if I did
> it wrong.

The configure stuff looks reasonable.

> --- a/libgomp/target.c
> +++ b/libgomp/target.c
> @@ -28,6 +28,45 @@
>  #include "libgomp.h"
>  #include <stdlib.h>
>  #include <string.h>
> +#include <dirent.h>

But I doubt dirent.h is portable to all targets we support, so I believe it
needs another configure test, and perhaps we want to define some macro
whether we actually support offloading at all (HAVE_DLFCN_H would be one
precondition, HAVE_DIRENT_H (with opendir etc.) another one (for this the
question is if we are building libgomp with LFS flags also, i.e. opendir64
etc. if available) another requirement we have is that sizeof (void *) ==
sizeof (uintptr_t), etc.

>  static int
>  resolve_device (int device)
> @@ -49,6 +88,7 @@ GOMP_target (int device, void (*fn) (void *), const char *fnname,
>  	     size_t mapnum, void **hostaddrs, size_t *sizes,
>  	     unsigned char *kinds)
>  {
> +  (void) pthread_once (&gomp_is_initialized, gomp_target_init);

resolve_device should be changed to return struct gomp_device_descr *
(or NULL for host fallback), and this pthread_once done inside of
resolve_device, not in all the callers.

> +static bool
> +gomp_check_plugin_file_name (const char *fname)
> +{
> +  const char *prefix = "libgomp-plugin-";
> +  const char *suffix = ".so.1";
> +  if (!fname)
> +    return false;
> +  if (strncmp (fname, prefix, strlen (prefix)) != 0)
> +    return false;
> +  if (strncmp (fname + strnlen (fname, NAME_MAX + 1) - strlen (suffix),

I'm afraid strnlen isn't sufficiently portable.  Why don't you just use
strlen?

> +  /* Check if all required functions are available in the plugin and store
> +     their handlers.
> +     TODO: check for other routines as well.  */
> +  *(void **) (&device->device_available_func) = dlsym (device->plugin_handle,
> +						       "device_available");

Aliasing violation, don't do that.

FYI, I'm attaching a WIP patch with the splay tree stuff, debugging
target-1.c with OMP_DEFAULT_DEVICE=257 right now (with all tgtv related
stuff removed), but hitting some error regarding OMP_CLAUSE_MAP_POINTER
reallocation, supposedly a bug on the compiler side.  But e.g. fn2 and fn3
already seem to pass with that, only fn4 is problematic.

There are various FIXMEs in the patch, the routines that create
target_mem_desc should actually get an extra struct gomp_device_descr *
argument, store it into *tgt and then the spots where I'm using
gomp_malloc/free/memcpy for device allocation/deallocation/to/from
data transfer should be adjusted to use callbacks from the plugin.

After the fname to void * __OPENMP_OFFLOAD__ or whatever change
for GOMP_target, I think we need to pass the same argument to
GOMP_target_data and GOMP_target_update too, pass it through
to resolve_device and that will actually need to also find out
if the selected target has corresponding offload support compiled in,
and will need to upload the DSO to target if not done already,
and register into the splay tree all the static vars
(and if any of that fails, return NULL for host fallback).

--- libgomp/target.c.jj	2013-09-09 17:41:02.290429613 +0200
+++ libgomp/target.c	2013-09-13 13:17:31.703502392 +0200
@@ -1,4 +1,4 @@
-/* Copyright (C) 2013 Free Software Foundation, Inc.
+/* Copyright (C) 1998-2013 Free Software Foundation, Inc.
    Contributed by Jakub Jelinek <jakub@redhat.com>.
 
    This file is part of the GNU OpenMP Library (libgomp).
@@ -26,15 +26,567 @@
    creation and termination.  */
 
 #include "libgomp.h"
+#include <stdbool.h>
 #include <stdlib.h>
 #include <string.h>
 
+/* The splay tree code copied from include/splay-tree.h and adjusted,
+   so that all the data lives directly in splay_tree_node_s structure
+   and no extra allocations are needed.  */
+
+/* For an easily readable description of splay-trees, see:
+
+     Lewis, Harry R. and Denenberg, Larry.  Data Structures and Their
+     Algorithms.  Harper-Collins, Inc.  1991.  
+
+   The major feature of splay trees is that all basic tree operations
+   are amortized O(log n) time for a tree with n nodes.  */
+
+/* Forward declaration for a node in the tree.  */
+typedef struct splay_tree_node_s *splay_tree_node;
+typedef splay_tree_node *splay_tree;
+
+struct target_mem_desc {
+  /* Reference count.  */
+  uintptr_t refcount;
+  /* All the splay nodes allocated together.  */
+  splay_tree_node array;
+  /* Start of the target region.  */
+  uintptr_t tgt_start;
+  /* End of the targer region.  */
+  uintptr_t tgt_end;
+  /* Handle to free.  */
+  void *to_free;
+  /* Previous target_mem_desc.  */
+  struct target_mem_desc *prev;
+  /* Number of items in following list.  */
+  size_t list_count;
+  /* List of splay nodes to remove (or decrease refcount)
+     at the end of region.  */
+  splay_tree_node list[];
+};
+
+/* The nodes in the splay tree.  */
+struct splay_tree_node_s {
+  /* Address of the host object.  */
+  uintptr_t host_start;
+  /* Address immediately after the host object.  */
+  uintptr_t host_end;
+  /* Descriptor of the target memory.  */
+  struct target_mem_desc *tgt;
+  /* Offset from tgt->tgt_start to the start of the target object.  */
+  uintptr_t tgt_offset;
+  /* Reference count.  */
+  uintptr_t refcount;
+  /* True if data should be copied from device to host at the end.  */
+  bool copy_from;
+  /* The left and right children, respectively.  */
+  splay_tree_node left;
+  splay_tree_node right;
+};
+
+/* Rotate the edge joining the left child N with its parent P.  PP is the
+   grandparents' pointer to P.  */
+
+static inline void
+rotate_left (splay_tree_node *pp, splay_tree_node p, splay_tree_node n)
+{
+  splay_tree_node tmp;
+  tmp = n->right;
+  n->right = p;
+  p->left = tmp;
+  *pp = n;
+}
+
+/* Rotate the edge joining the right child N with its parent P.  PP is the
+   grandparents' pointer to P.  */
+
+static inline void
+rotate_right (splay_tree_node *pp, splay_tree_node p, splay_tree_node n)
+{
+  splay_tree_node tmp;
+  tmp = n->left;
+  n->left = p;
+  p->right = tmp;
+  *pp = n;
+}
+
+static int
+splay_compare (splay_tree_node x, splay_tree_node y)
+{
+  if (x->host_start == x->host_end
+      && y->host_start == y->host_end)
+    return 0;
+  if (x->host_end <= y->host_start)
+    return -1;
+  if (x->host_start >= y->host_end)
+    return 1;
+  return 0;
+}
+
+/* Bottom up splay of NODE.  */
+
+static void
+splay_tree_splay (splay_tree sp, splay_tree_node node)
+{
+  if (*sp == NULL)
+    return;
+
+  do {
+    int cmp1, cmp2;
+    splay_tree_node n, c;
+
+    n = *sp;
+    cmp1 = splay_compare (node, n);
+
+    /* Found.  */
+    if (cmp1 == 0)
+      return;
+
+    /* Left or right?  If no child, then we're done.  */
+    if (cmp1 < 0)
+      c = n->left;
+    else
+      c = n->right;
+    if (!c)
+      return;
+
+    /* Next one left or right?  If found or no child, we're done
+       after one rotation.  */
+    cmp2 = splay_compare (node, c);
+    if (cmp2 == 0
+        || (cmp2 < 0 && !c->left)
+        || (cmp2 > 0 && !c->right))
+      {
+	if (cmp1 < 0)
+	  rotate_left (sp, n, c);
+	else
+	  rotate_right (sp, n, c);
+        return;
+      }
+
+    /* Now we have the four cases of double-rotation.  */
+    if (cmp1 < 0 && cmp2 < 0)
+      {
+	rotate_left (&n->left, c, c->left);
+	rotate_left (sp, n, n->left);
+      }
+    else if (cmp1 > 0 && cmp2 > 0)
+      {
+	rotate_right (&n->right, c, c->right);
+	rotate_right (sp, n, n->right);
+      }
+    else if (cmp1 < 0 && cmp2 > 0)
+      {
+	rotate_right (&n->left, c, c->right);
+	rotate_left (sp, n, n->left);
+      }
+    else if (cmp1 > 0 && cmp2 < 0)
+      {
+	rotate_left (&n->right, c, c->left);
+	rotate_right (sp, n, n->right);
+      }
+  } while (1);
+}
+
+/* Insert a new NODE into SP.  The NODE shouldn't exist in the tree.  */
+
+void
+splay_tree_insert (splay_tree sp, splay_tree_node node)
+{
+  int comparison = 0;
+
+  splay_tree_splay (sp, node);
+
+  if (*sp)
+    comparison = splay_compare (*sp, node);
+
+  if (*sp && comparison == 0)
+    abort ();
+  else 
+    {
+      /* Insert it at the root.  */
+      if (*sp == NULL)
+	node->left = node->right = NULL;
+      else if (comparison < 0)
+	{
+	  node->left = *sp;
+	  node->right = node->left->right;
+	  node->left->right = NULL;
+	}
+      else
+	{
+	  node->right = *sp;
+	  node->left = node->right->left;
+	  node->right->left = NULL;
+	}
+
+      *sp = node;
+    }
+}
+
+/* Remove NODE from SP.  It is not an error if it did not exist.  */
+
+void
+splay_tree_remove (splay_tree sp, splay_tree_node node)
+{
+  splay_tree_splay (sp, node);
+
+  if (*sp && splay_compare (*sp, node) == 0)
+    {
+      splay_tree_node left, right;
+
+      left = (*sp)->left;
+      right = (*sp)->right;
+
+      /* One of the children is now the root.  Doesn't matter much
+	 which, so long as we preserve the properties of the tree.  */
+      if (left)
+	{
+	  *sp = left;
+
+	  /* If there was a right child as well, hang it off the 
+	     right-most leaf of the left child.  */
+	  if (right)
+	    {
+	      while (left->right)
+		left = left->right;
+	      left->right = right;
+	    }
+	}
+      else
+	*sp = right;
+    }
+}
+
+/* Lookup NODE in SP, returning VALUE if present, and NULL 
+   otherwise.  */
+
+splay_tree_node
+splay_tree_lookup (splay_tree sp, splay_tree_node node)
+{
+  splay_tree_splay (sp, node);
+
+  if (*sp && splay_compare (*sp, node) == 0)
+    return *sp;
+  else
+    return NULL;
+}
+
+attribute_hidden int
+gomp_get_num_devices (void)
+{
+  /* FIXME: Scan supported accelerators when called the first time.  */
+  return 0;
+}
+
 static int
 resolve_device (int device)
 {
+  if (device == -1)
+    {
+      struct gomp_task_icv *icv = gomp_icv (false);
+      device = icv->default_device_var;
+    }
+  /* FIXME: Temporary hack for testing non-shared address spaces on host.  */
+  if (device == 257)
+    return 257;
+  if (device >= gomp_get_num_devices ())
+    return -1;
   return -1;
 }
 
+/* These variables would be per-accelerator (which doesn't have shared address
+   space.  */
+static splay_tree_node dev_splay_tree;
+static gomp_mutex_t dev_env_lock;
+
+/* Handle the case where splay_tree_lookup found oldn for newn.
+   Helper function of gomp_map_vars.  */
+
+static inline void
+gomp_map_vars_existing (splay_tree_node oldn, splay_tree_node newn,
+			unsigned char kind)
+{
+  if (oldn->host_start > newn->host_start
+      || oldn->host_end < newn->host_end)
+    gomp_fatal ("Trying to map into device [%p..%p) object when"
+		"[%p..%p) is already mapped",
+		(void *) newn->host_start, (void *) newn->host_end,
+		(void *) oldn->host_start, (void *) oldn->host_end);
+  if (((kind & 7) == 2 || (kind & 7) == 3)
+      && !oldn->copy_from
+      && oldn->host_start == newn->host_start
+      && oldn->host_end == newn->host_end)
+    oldn->copy_from = true;
+  oldn->refcount++;
+}
+
+static struct target_mem_desc *
+gomp_map_vars (size_t mapnum, void **hostaddrs, size_t *sizes,
+	       unsigned char *kinds, bool is_target)
+{
+  size_t i, tgt_align, tgt_size, not_found_cnt = 0;
+  struct splay_tree_node_s cur_node;
+  struct target_mem_desc *tgt
+    = gomp_malloc (sizeof (*tgt) + sizeof (tgt->list[0]) * mapnum);
+  tgt->list_count = mapnum;
+  tgt->refcount = 1;
+
+  if (mapnum == 0)
+    return tgt;
+
+  tgt_align = sizeof (void *);
+  tgt_size = 0;
+  if (is_target)
+    {
+      size_t align = 4 * sizeof (void *);
+      tgt_align = align;
+      tgt_size = mapnum * sizeof (void *);
+    }
+
+  gomp_mutex_lock (&dev_env_lock);
+  for (i = 0; i < mapnum; i++)
+    {
+      cur_node.host_start = (uintptr_t) hostaddrs[i];
+      if ((kinds[i] & 7) != 4)
+	cur_node.host_end = cur_node.host_start + sizes[i];
+      else
+	cur_node.host_end = cur_node.host_start + sizeof (void *);
+      splay_tree_node n = splay_tree_lookup (&dev_splay_tree, &cur_node);
+      if (n)
+	{
+	  tgt->list[i] = n;
+	  gomp_map_vars_existing (n, &cur_node, kinds[i]);
+	}
+      else
+	{
+	  size_t align = (size_t) 1 << (kinds[i] >> 3);
+	  tgt->list[i] = NULL;
+	  not_found_cnt++;
+	  if (tgt_align < align)
+	    tgt_align = align;
+	  tgt_size = (tgt_size + align - 1) & ~(align - 1);
+	  tgt_size += cur_node.host_end - cur_node.host_start;
+	}
+    }
+
+  if (not_found_cnt || is_target)
+    {
+      /* FIXME: This would be accelerator memory allocation, not
+	 host, and should allocate tgt_align aligned tgt_size block
+	 of memory.  */
+      tgt->to_free = gomp_malloc (tgt_size + tgt_align - 1);
+      tgt->tgt_start = (uintptr_t) tgt->to_free;
+      tgt->tgt_start = (tgt->tgt_start + tgt_align - 1) & ~(tgt_align - 1);
+      tgt->tgt_end = tgt->tgt_start + tgt_size;
+    }
+
+  tgt_size = 0;
+  if (is_target)
+    tgt_size = mapnum * sizeof (void *);
+
+  if (not_found_cnt)
+    {
+      tgt->array = gomp_malloc (not_found_cnt * sizeof (*tgt->array));
+      splay_tree_node array = tgt->array;
+
+      for (i = 0; i < mapnum; i++)
+	if (tgt->list[i] == NULL)
+	  {
+	    array->host_start = (uintptr_t) hostaddrs[i];
+	    if ((kinds[i] & 7) != 4)
+	      array->host_end = array->host_start + sizes[i];
+	    else
+	      array->host_end = array->host_start + sizeof (void *);
+	    splay_tree_node n = splay_tree_lookup (&dev_splay_tree, array);
+	    if (n)
+	      {
+		tgt->list[i] = n;
+		gomp_map_vars_existing (n, array, kinds[i]);
+	      }
+	    else
+	      {
+		size_t align = (size_t) 1 << (kinds[i] >> 3);
+		tgt->list[i] = array;
+		tgt_size = (tgt_size + align - 1) & ~(align - 1);
+		array->tgt = tgt;
+		array->tgt_offset = tgt_size;
+		tgt_size += array->host_end - array->host_start;
+		if ((kinds[i] & 7) == 2 || (kinds[i] & 7) == 3)
+		  array->copy_from = true;
+		array->refcount = 1;
+		tgt->refcount++;
+		array->left = NULL;
+		array->right = NULL;
+		splay_tree_insert (&dev_splay_tree, array);
+		switch (kinds[i] & 7)
+		  {
+		  case 0: /* ALLOC */
+		  case 2: /* FROM */
+		    break;
+		  case 1: /* TO */
+		  case 3: /* TOFROM */
+		    /* FIXME: This is supposed to be copy from host to device
+		       memory.  Perhaps add some smarts, like if copying
+		       several adjacent fields from host to target, use some
+		       host buffer to avoid sending each var individually.  */
+		    memcpy ((void *) (tgt->tgt_start + array->tgt_offset),
+			    (void *) array->host_start,
+			    array->host_end - array->host_start);
+		    break;
+		  case 4: /* POINTER */
+		    cur_node.host_start
+		      = (uintptr_t) *(void **) array->host_start;
+		    /* Add bias to the pointer value.  */
+		    cur_node.host_start += sizes[i];
+		    cur_node.host_end = cur_node.host_start + 1;
+		    n = splay_tree_lookup (&dev_splay_tree, &cur_node);
+		    if (n == NULL)
+		      {
+			/* Could be possibly zero size array section.  */
+			cur_node.host_end--;
+			n = splay_tree_lookup (&dev_splay_tree, &cur_node);
+			if (n == NULL)
+			  {
+			    cur_node.host_start--;
+			    n = splay_tree_lookup (&dev_splay_tree, &cur_node);
+			    cur_node.host_start++;
+			  }
+		      }
+		    if (n == NULL)
+		      gomp_fatal ("Pointer target of array section "
+				  "wasn't mapped");
+		    cur_node.host_start -= n->host_start;
+		    cur_node.tgt_offset = n->tgt->tgt_start + n->tgt_offset
+					  + cur_node.host_start;
+		    /* At this point tgt_offset is target address of the
+		       array section.  Now subtract bias to get what we want
+		       to initialize the pointer with.  */
+		    cur_node.tgt_offset -= sizes[i];
+		    /* FIXME: host to device copy, see above FIXME comment.  */
+		    memcpy ((void *) (tgt->tgt_start + array->tgt_offset),
+			    (void *) &cur_node.tgt_offset,
+			    sizeof (void *));
+		    break;
+		  }
+		array++;
+	      }
+	  }
+    }
+  if (is_target)
+    {
+      for (i = 0; i < mapnum; i++)
+	{
+	  cur_node.tgt_offset = tgt->list[i]->tgt->tgt_start
+				+ tgt->list[i]->tgt_offset;
+	  /* FIXME: host to device copy, see above FIXME comment.  */
+	  memcpy ((void *) (tgt->tgt_start + i * sizeof (void *)),
+		  (void *) &cur_node.tgt_offset,
+		  sizeof (void *));
+	}
+    }
+
+  gomp_mutex_unlock (&dev_env_lock);
+  return tgt;
+}
+
+static void
+gomp_unmap_tgt (struct target_mem_desc *tgt)
+{
+  /* FIXME: Deallocate on target the tgt->tgt_start .. tgt->tgt_end
+     region.  */
+  if (tgt->tgt_end)
+    free (tgt->to_free);
+
+  free (tgt->array);
+  free (tgt);
+}
+
+static void
+gomp_unmap_vars (struct target_mem_desc *tgt)
+{
+  if (tgt->list_count == 0)
+    {
+      free (tgt);
+      return;
+    }
+
+  size_t i;
+  gomp_mutex_lock (&dev_env_lock);
+  for (i = 0; i < tgt->list_count; i++)
+    if (tgt->list[i]->refcount > 1)
+      tgt->list[i]->refcount--;
+    else
+      {
+	splay_tree_node n = tgt->list[i];
+	if (n->copy_from)
+	  /* FIXME: device to host copy.  */
+	  memcpy ((void *) n->host_start,
+		  (void *) (n->tgt->tgt_start + n->tgt_offset),
+		  n->host_end - n->host_start);
+	splay_tree_remove (&dev_splay_tree, n);
+	if (n->tgt->refcount > 1)
+	  n->tgt->refcount--;
+	else
+	  gomp_unmap_tgt (n->tgt);
+      }
+
+  if (tgt->refcount > 1)
+    tgt->refcount--;
+  else
+    gomp_unmap_tgt (tgt);
+  gomp_mutex_unlock (&dev_env_lock);
+}
+
+static void
+gomp_update (size_t mapnum, void **hostaddrs, size_t *sizes,
+	     unsigned char *kinds)
+{
+  size_t i;
+  struct splay_tree_node_s cur_node;
+
+  if (mapnum == 0)
+    return;
+
+  gomp_mutex_lock (&dev_env_lock);
+  for (i = 0; i < mapnum; i++)
+    if (sizes[i])
+      {
+	cur_node.host_start = (uintptr_t) hostaddrs[i];
+	cur_node.host_end = cur_node.host_start + sizes[i];
+	splay_tree_node n = splay_tree_lookup (&dev_splay_tree, &cur_node);
+	if (n)
+	  {
+	    if (n->host_start > cur_node.host_start
+		|| n->host_end < cur_node.host_end)
+	      gomp_fatal ("Trying to update [%p..%p) object when"
+			  "only [%p..%p) is mapped",
+			  (void *) cur_node.host_start,
+			  (void *) cur_node.host_end,
+			  (void *) n->host_start,
+			  (void *) n->host_end);
+	    if ((kinds[i] & 7) == 1)
+	      /* FIXME: host to device copy.  */
+	      memcpy ((void *) (n->tgt->tgt_start + n->tgt_offset
+				+ cur_node.host_start - n->host_start),
+		      (void *) cur_node.host_start,
+		      cur_node.host_end - cur_node.host_start);
+	    else if ((kinds[i] & 7) == 2)
+	      /* FIXME: device to host copy.  */
+	      memcpy ((void *) cur_node.host_start,
+		      (void *) (n->tgt->tgt_start + n->tgt_offset
+				+ cur_node.host_start - n->host_start),
+		      cur_node.host_end - cur_node.host_start);
+	  }
+	else
+	  gomp_fatal ("Trying to update [%p..%p) object that is not mapped",
+		      (void *) cur_node.host_start,
+		      (void *) cur_node.host_end);
+      }
+  gomp_mutex_unlock (&dev_env_lock);
+}
+
 /* Called when encountering a target directive.  If DEVICE
    is -1, it means use device-var ICV.  If it is -2 (or any other value
    larger than last available hw device, use host fallback.
@@ -49,32 +601,77 @@ GOMP_target (int device, void (*fn) (voi
 	     size_t mapnum, void **hostaddrs, size_t *sizes,
 	     unsigned char *kinds)
 {
-  if (resolve_device (device) == -1)
+  device = resolve_device (device);
+  if (device == -1)
     {
+      /* Host fallback.  */
       fn (hostaddrs);
       return;
     }
+  if (device == 257)
+    {
+      struct target_mem_desc *tgt
+	= gomp_map_vars (mapnum, hostaddrs, sizes, kinds, true);
+      fn ((void *) tgt->tgt_start);
+      gomp_unmap_vars (tgt);
+    }
 }
 
 void
 GOMP_target_data (int device, size_t mapnum, void **hostaddrs, size_t *sizes,
 		  unsigned char *kinds)
 {
-  if (resolve_device (device) == -1)
-    return;
+  device = resolve_device (device);
+  if (device == -1)
+    {
+      /* Host fallback.  */
+      struct gomp_task_icv *icv = gomp_icv (false);
+      if (icv->target_data)
+	{
+	  /* Even when doing a host fallback, if there are any active
+	     #pragma omp target data constructs, need to remember the
+	     new #pragma omp target data, otherwise GOMP_target_end_data
+	     would get out of sync.  */
+	  struct target_mem_desc *tgt
+	    = gomp_map_vars (0, NULL, NULL, NULL, false);
+	  tgt->prev = icv->target_data;
+	  icv->target_data = tgt;
+	}
+      return;
+    }
+
+  if (device == 257)
+    {
+      struct target_mem_desc *tgt
+	= gomp_map_vars (mapnum, hostaddrs, sizes, kinds, false);
+      struct gomp_task_icv *icv = gomp_icv (true);
+      tgt->prev = icv->target_data;
+      icv->target_data = tgt;
+    }
 }
 
 void
 GOMP_target_end_data (void)
 {
+  struct gomp_task_icv *icv = gomp_icv (false);
+  if (icv->target_data)
+    {
+      struct target_mem_desc *tgt = icv->target_data;
+      icv->target_data = tgt->prev;
+      gomp_unmap_vars (tgt);
+    }
 }
 
 void
 GOMP_target_update (int device, size_t mapnum, void **hostaddrs, size_t *sizes,
 		    unsigned char *kinds)
 {
-  if (resolve_device (device) == -1)
+  device = resolve_device (device);
+  if (device == -1)
     return;
+
+  if (device == 257)
+    gomp_update (mapnum, hostaddrs, sizes, kinds);
 }
 
 void
--- libgomp/libgomp.h.jj	2013-09-09 17:41:02.388429108 +0200
+++ libgomp/libgomp.h	2013-09-13 12:19:13.489052710 +0200
@@ -214,18 +214,23 @@ struct gomp_team_state
   unsigned long static_trip;
 };
 
-/* These are the OpenMP 3.0 Internal Control Variables described in
+struct target_mem_desc;
+
+/* These are the OpenMP 4.0 Internal Control Variables described in
    section 2.3.1.  Those described as having one copy per task are
    stored within the structure; those described as having one copy
    for the whole program are (naturally) global variables.  */
-
+   
 struct gomp_task_icv
 {
   unsigned long nthreads_var;
   enum gomp_schedule_type run_sched_var;
   int run_sched_modifier;
+  int default_device_var;
   bool dyn_var;
   bool nest_var;
+  /* Internal ICV.  */
+  struct target_mem_desc *target_data;
 };
 
 extern struct gomp_task_icv gomp_global_icv;
@@ -496,6 +501,10 @@ extern void gomp_team_start (void (*) (v
 			     struct gomp_team *);
 extern void gomp_team_end (void);
 
+/* target.c */
+
+extern int gomp_get_num_devices (void);
+
 /* work.c */
 
 extern void gomp_init_work_share (struct gomp_work_share *, bool, unsigned);
--- libgomp/env.c.jj	2013-09-09 17:41:02.335429381 +0200
+++ libgomp/env.c	2013-09-12 17:39:42.435446713 +0200
@@ -56,6 +56,7 @@ struct gomp_task_icv gomp_global_icv = {
   .nthreads_var = 1,
   .run_sched_var = GFS_DYNAMIC,
   .run_sched_modifier = 1,
+  .default_device_var = 0,
   .dyn_var = false,
   .nest_var = false
 };
@@ -188,6 +189,24 @@ parse_unsigned_long (const char *name, u
   return false;
 }
 
+/* Parse a positive int environment variable.  Return true if one was
+   present and it was successfully parsed.  */
+
+static bool
+parse_int (const char *name, int *pvalue, bool allow_zero)
+{
+  unsigned long value;
+  if (!parse_unsigned_long (name, &value, allow_zero))
+    return false;
+  if (value > INT_MAX)
+    {
+      gomp_error ("Invalid value for environment variable %s", name);
+      return false;
+    }
+  *pvalue = (int) value;
+  return true;
+}
+
 /* Parse an unsigned long list environment variable.  Return true if one was
    present and it was successfully parsed.  */
 
@@ -658,8 +677,9 @@ handle_omp_display_env (bool proc_bind,
 
 /* FIXME: Unimplemented OpenMP 4.0 environment variables.
   fprintf (stderr, "  OMP_PLACES = ''\n");
-  fprintf (stderr, "  OMP_CANCELLATION = ''\n");
-  fprintf (stderr, "  OMP_DEFAULT_DEVICE = ''\n"); */
+  fprintf (stderr, "  OMP_CANCELLATION = ''\n"); */
+  fprintf (stderr, "  OMP_DEFAULT_DEVICE = '%d'\n",
+	   gomp_global_icv.default_device_var);
 
   if (verbose)
     {
@@ -699,6 +719,7 @@ initialize_env (void)
   parse_boolean ("OMP_DYNAMIC", &gomp_global_icv.dyn_var);
   parse_boolean ("OMP_NESTED", &gomp_global_icv.nest_var);
   parse_boolean ("OMP_PROC_BIND", &bind_var);
+  parse_int ("OMP_DEFAULT_DEVICE", &gomp_global_icv.default_device_var, true);
   parse_unsigned_long ("OMP_MAX_ACTIVE_LEVELS", &gomp_max_active_levels_var,
 		       true);
   parse_unsigned_long ("OMP_THREAD_LIMIT", &gomp_thread_limit_var, false);
@@ -881,36 +902,41 @@ omp_get_proc_bind (void)
 void
 omp_set_default_device (int device_num)
 {
-  (void) device_num;
+  struct gomp_task_icv *icv = gomp_icv (true);
+  icv->default_device_var = device_num >= 0 ? device_num : 0;
 }
 
 int
 omp_get_default_device (void)
 {
-  return 0;
+  struct gomp_task_icv *icv = gomp_icv (false);
+  return icv->default_device_var;
 }
 
 int
 omp_get_num_devices (void)
 {
-  return 0;
+  return gomp_get_num_devices ();
 }
 
 int
 omp_get_num_teams (void)
 {
+  /* Hardcoded to 1 on host, MIC, HSAIL?  Maybe variable on PTX.  */
   return 1;
 }
 
 int
 omp_get_team_num (void)
 {
+  /* Hardcoded to 0 on host, MIC, HSAIL?  Maybe variable on PTX.  */
   return 0;
 }
 
 int
 omp_is_initial_device (void)
 {
+  /* Hardcoded to 1 on host, should be 0 on MIC, HSAIL, PTX.  */
   return 1;
 }
 


	Jakub


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]