This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: [RFC] Offloading Support in libgomp


On Fri, Sep 13, 2013 at 05:41:03PM +0200, Marek Polacek wrote:
> On Fri, Sep 13, 2013 at 05:35:27PM +0200, Jakub Jelinek wrote:
> > On Fri, Sep 13, 2013 at 03:15:56PM +0200, Jakub Jelinek wrote:
> > > On Fri, Sep 13, 2013 at 05:11:09PM +0400, Michael V. Zolotukhin wrote:
> > > > > FYI, I'm attaching a WIP patch with the splay tree stuff.
> > > > Thanks, I'll take a look.  By the way, isn't it better to move splay-tree
> > > > implementation to a separate file?
> > > 
> > > As it is just a few routines, heavily modified from include/splay-tree.h
> > > (e.g. the data structures contain all the target.c specific stuff), and will be
> > > used just in target.c, I think it is fine to keep it in target.c.
> > 
> > Anyway, here is an updated patch that moves the splay stuff into
> > splay-tree.h and cleans up a bunch of other things.
> > 
> > Will commit once the http://gcc.gnu.org/ml/gcc-patches/2013-09/msg01044.html
> > issue is resolved.
> > 
> > 2013-09-13  Jakub Jelinek  <jakub@redhat.com>
> > 
> > 	* ipa-prop.c (ipa_compute_jump_functions_for_edge): Return early
> > 	for internal calls.
> 
> Seems like a wrong patch is attached.

You're right, here is the right one.

2013-09-13  Jakub Jelinek  <jakub@redhat.com>

	* splay-tree.h: New file.
	* target.c: Include stdbool.h.
	(splay_tree_node, splay_tree, splay_tree_key): New typedefs.
	(struct target_mem_desc, struct splay_tree_key_s): New structures.
	(splay_compare): New inline function.
	(gomp_get_num_devices): New function.
	(resolve_device): Use default_device_var ICV.  Add temporarily
	magic testing device number 257.
	(dev_splay_tree, dev_env_lock): New variables.
	(gomp_map_vars_existing, gomp_map_vars, gomp_unmap_tgt,
	gomp_unmap_vars, gomp_update): New functions.
	(GOMP_target, GOMP_target_data, GOMP_target_end_data,
	GOMP_target_update): Add support for magic testing device number 257.
	* libgomp.h (struct target_mem_desc): Forward declare.
	(struct gomp_task_icv): Add default_device_var and target_data.
	(gomp_get_num_devices): New prototype.
	* env.c (gomp_global_icv): Add default_device_var initializer.
	(parse_int): New function.
	(handle_omp_display_env): Print OMP_DEFAULT_DEVICE.
	(initialize_env): Initialize default_device_var.
	(omp_set_default_device): Set default_device_var ICV.
	(omp_get_default_device): Query default_device_var ICV.
	(omp_get_num_devices): Call gomp_get_num_devices.
	(omp_get_num_teams, omp_get_team_num, omp_is_initial_device): Add
	comments.

--- libgomp/splay-tree.h.jj	2013-09-13 16:32:48.381973559 +0200
+++ libgomp/splay-tree.h	2013-09-13 16:41:38.059701560 +0200
@@ -0,0 +1,232 @@
+/* A splay-tree datatype.
+   Copyright 1998-2013
+   Free Software Foundation, Inc.
+   Contributed by Mark Mitchell (mark@markmitchell.com).
+
+   This file is part of the GNU OpenMP Library (libgomp).
+
+   Libgomp is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
+   WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+   FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+   more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+/* The splay tree code copied from include/splay-tree.h and adjusted,
+   so that all the data lives directly in splay_tree_node_s structure
+   and no extra allocations are needed.
+
+   Files including this header should before including it add:
+typedef struct splay_tree_node_s *splay_tree_node;
+typedef struct splay_tree_s *splay_tree;
+typedef struct splay_tree_key_s *splay_tree_key;
+   define splay_tree_key_s structure, and define
+   splay_compare inline function.  */
+
+/* For an easily readable description of splay-trees, see:
+
+     Lewis, Harry R. and Denenberg, Larry.  Data Structures and Their
+     Algorithms.  Harper-Collins, Inc.  1991.
+
+   The major feature of splay trees is that all basic tree operations
+   are amortized O(log n) time for a tree with n nodes.  */
+
+/* The nodes in the splay tree.  */
+struct splay_tree_node_s {
+  struct splay_tree_key_s key;
+  /* The left and right children, respectively.  */
+  splay_tree_node left;
+  splay_tree_node right;
+};
+
+/* The splay tree.  */
+struct splay_tree_s {
+  splay_tree_node root;
+};
+
+/* Rotate the edge joining the left child N with its parent P.  PP is the
+   grandparents' pointer to P.  */
+
+static inline void
+rotate_left (splay_tree_node *pp, splay_tree_node p, splay_tree_node n)
+{
+  splay_tree_node tmp;
+  tmp = n->right;
+  n->right = p;
+  p->left = tmp;
+  *pp = n;
+}
+
+/* Rotate the edge joining the right child N with its parent P.  PP is the
+   grandparents' pointer to P.  */
+
+static inline void
+rotate_right (splay_tree_node *pp, splay_tree_node p, splay_tree_node n)
+{
+  splay_tree_node tmp;
+  tmp = n->left;
+  n->left = p;
+  p->right = tmp;
+  *pp = n;
+}
+
+/* Bottom up splay of KEY.  */
+
+static void
+splay_tree_splay (splay_tree sp, splay_tree_key key)
+{
+  if (sp->root == NULL)
+    return;
+
+  do {
+    int cmp1, cmp2;
+    splay_tree_node n, c;
+
+    n = sp->root;
+    cmp1 = splay_compare (key, &n->key);
+
+    /* Found.  */
+    if (cmp1 == 0)
+      return;
+
+    /* Left or right?  If no child, then we're done.  */
+    if (cmp1 < 0)
+      c = n->left;
+    else
+      c = n->right;
+    if (!c)
+      return;
+
+    /* Next one left or right?  If found or no child, we're done
+       after one rotation.  */
+    cmp2 = splay_compare (key, &c->key);
+    if (cmp2 == 0
+	|| (cmp2 < 0 && !c->left)
+	|| (cmp2 > 0 && !c->right))
+      {
+	if (cmp1 < 0)
+	  rotate_left (&sp->root, n, c);
+	else
+	  rotate_right (&sp->root, n, c);
+	return;
+      }
+
+    /* Now we have the four cases of double-rotation.  */
+    if (cmp1 < 0 && cmp2 < 0)
+      {
+	rotate_left (&n->left, c, c->left);
+	rotate_left (&sp->root, n, n->left);
+      }
+    else if (cmp1 > 0 && cmp2 > 0)
+      {
+	rotate_right (&n->right, c, c->right);
+	rotate_right (&sp->root, n, n->right);
+      }
+    else if (cmp1 < 0 && cmp2 > 0)
+      {
+	rotate_right (&n->left, c, c->right);
+	rotate_left (&sp->root, n, n->left);
+      }
+    else if (cmp1 > 0 && cmp2 < 0)
+      {
+	rotate_left (&n->right, c, c->left);
+	rotate_right (&sp->root, n, n->right);
+      }
+  } while (1);
+}
+
+/* Insert a new NODE into SP.  The NODE shouldn't exist in the tree.  */
+
+static void
+splay_tree_insert (splay_tree sp, splay_tree_node node)
+{
+  int comparison = 0;
+
+  splay_tree_splay (sp, &node->key);
+
+  if (sp->root)
+    comparison = splay_compare (&sp->root->key, &node->key);
+
+  if (sp->root && comparison == 0)
+    abort ();
+  else
+    {
+      /* Insert it at the root.  */
+      if (sp->root == NULL)
+	node->left = node->right = NULL;
+      else if (comparison < 0)
+	{
+	  node->left = sp->root;
+	  node->right = node->left->right;
+	  node->left->right = NULL;
+	}
+      else
+	{
+	  node->right = sp->root;
+	  node->left = node->right->left;
+	  node->right->left = NULL;
+	}
+
+      sp->root = node;
+    }
+}
+
+/* Remove node with KEY from SP.  It is not an error if it did not exist.  */
+
+static void
+splay_tree_remove (splay_tree sp, splay_tree_key key)
+{
+  splay_tree_splay (sp, key);
+
+  if (sp->root && splay_compare (&sp->root->key, key) == 0)
+    {
+      splay_tree_node left, right;
+
+      left = sp->root->left;
+      right = sp->root->right;
+
+      /* One of the children is now the root.  Doesn't matter much
+	 which, so long as we preserve the properties of the tree.  */
+      if (left)
+	{
+	  sp->root = left;
+
+	  /* If there was a right child as well, hang it off the
+	     right-most leaf of the left child.  */
+	  if (right)
+	    {
+	      while (left->right)
+		left = left->right;
+	      left->right = right;
+	    }
+	}
+      else
+	sp->root = right;
+    }
+}
+
+/* Lookup KEY in SP, returning NODE if present, and NULL
+   otherwise.  */
+
+static splay_tree_key
+splay_tree_lookup (splay_tree sp, splay_tree_key key)
+{
+  splay_tree_splay (sp, key);
+
+  if (sp->root && splay_compare (&sp->root->key, key) == 0)
+    return &sp->root->key;
+  else
+    return NULL;
+}
--- libgomp/target.c.jj	2013-09-09 17:41:02.290429613 +0200
+++ libgomp/target.c	2013-09-13 16:41:24.514770386 +0200
@@ -26,15 +26,383 @@
    creation and termination.  */
 
 #include "libgomp.h"
+#include <stdbool.h>
 #include <stdlib.h>
 #include <string.h>
 
+/* Forward declaration for a node in the tree.  */
+typedef struct splay_tree_node_s *splay_tree_node;
+typedef struct splay_tree_s *splay_tree;
+typedef struct splay_tree_key_s *splay_tree_key;
+
+struct target_mem_desc {
+  /* Reference count.  */
+  uintptr_t refcount;
+  /* All the splay nodes allocated together.  */
+  splay_tree_node array;
+  /* Start of the target region.  */
+  uintptr_t tgt_start;
+  /* End of the targer region.  */
+  uintptr_t tgt_end;
+  /* Handle to free.  */
+  void *to_free;
+  /* Previous target_mem_desc.  */
+  struct target_mem_desc *prev;
+  /* Number of items in following list.  */
+  size_t list_count;
+  /* List of splay keys to remove (or decrease refcount)
+     at the end of region.  */
+  splay_tree_key list[];
+};
+
+struct splay_tree_key_s {
+  /* Address of the host object.  */
+  uintptr_t host_start;
+  /* Address immediately after the host object.  */
+  uintptr_t host_end;
+  /* Descriptor of the target memory.  */
+  struct target_mem_desc *tgt;
+  /* Offset from tgt->tgt_start to the start of the target object.  */
+  uintptr_t tgt_offset;
+  /* Reference count.  */
+  uintptr_t refcount;
+  /* True if data should be copied from device to host at the end.  */
+  bool copy_from;
+};
+
+/* The comparison function.  */
+
+static int
+splay_compare (splay_tree_key x, splay_tree_key y)
+{
+  if (x->host_start == x->host_end
+      && y->host_start == y->host_end)
+    return 0;
+  if (x->host_end <= y->host_start)
+    return -1;
+  if (x->host_start >= y->host_end)
+    return 1;
+  return 0;
+}
+
+#include "splay-tree.h"
+
+attribute_hidden int
+gomp_get_num_devices (void)
+{
+  /* FIXME: Scan supported accelerators when called the first time.  */
+  return 0;
+}
+
 static int
 resolve_device (int device)
 {
+  if (device == -1)
+    {
+      struct gomp_task_icv *icv = gomp_icv (false);
+      device = icv->default_device_var;
+    }
+  /* FIXME: Temporary hack for testing non-shared address spaces on host.  */
+  if (device == 257)
+    return 257;
+  if (device >= gomp_get_num_devices ())
+    return -1;
   return -1;
 }
 
+/* These variables would be per-accelerator (which doesn't have shared address
+   space.  */
+static struct splay_tree_s dev_splay_tree;
+static gomp_mutex_t dev_env_lock;
+
+/* Handle the case where splay_tree_lookup found oldn for newn.
+   Helper function of gomp_map_vars.  */
+
+static inline void
+gomp_map_vars_existing (splay_tree_key oldn, splay_tree_key newn,
+			unsigned char kind)
+{
+  if (oldn->host_start > newn->host_start
+      || oldn->host_end < newn->host_end)
+    gomp_fatal ("Trying to map into device [%p..%p) object when"
+		"[%p..%p) is already mapped",
+		(void *) newn->host_start, (void *) newn->host_end,
+		(void *) oldn->host_start, (void *) oldn->host_end);
+  if (((kind & 7) == 2 || (kind & 7) == 3)
+      && !oldn->copy_from
+      && oldn->host_start == newn->host_start
+      && oldn->host_end == newn->host_end)
+    oldn->copy_from = true;
+  oldn->refcount++;
+}
+
+static struct target_mem_desc *
+gomp_map_vars (size_t mapnum, void **hostaddrs, size_t *sizes,
+	       unsigned char *kinds, bool is_target)
+{
+  size_t i, tgt_align, tgt_size, not_found_cnt = 0;
+  struct splay_tree_key_s cur_node;
+  struct target_mem_desc *tgt
+    = gomp_malloc (sizeof (*tgt) + sizeof (tgt->list[0]) * mapnum);
+  tgt->list_count = mapnum;
+  tgt->refcount = 1;
+
+  if (mapnum == 0)
+    return tgt;
+
+  tgt_align = sizeof (void *);
+  tgt_size = 0;
+  if (is_target)
+    {
+      size_t align = 4 * sizeof (void *);
+      tgt_align = align;
+      tgt_size = mapnum * sizeof (void *);
+    }
+
+  gomp_mutex_lock (&dev_env_lock);
+  for (i = 0; i < mapnum; i++)
+    {
+      cur_node.host_start = (uintptr_t) hostaddrs[i];
+      if ((kinds[i] & 7) != 4)
+	cur_node.host_end = cur_node.host_start + sizes[i];
+      else
+	cur_node.host_end = cur_node.host_start + sizeof (void *);
+      splay_tree_key n = splay_tree_lookup (&dev_splay_tree, &cur_node);
+      if (n)
+	{
+	  tgt->list[i] = n;
+	  gomp_map_vars_existing (n, &cur_node, kinds[i]);
+	}
+      else
+	{
+	  size_t align = (size_t) 1 << (kinds[i] >> 3);
+	  tgt->list[i] = NULL;
+	  not_found_cnt++;
+	  if (tgt_align < align)
+	    tgt_align = align;
+	  tgt_size = (tgt_size + align - 1) & ~(align - 1);
+	  tgt_size += cur_node.host_end - cur_node.host_start;
+	}
+    }
+
+  if (not_found_cnt || is_target)
+    {
+      /* FIXME: This would be accelerator memory allocation, not
+	 host, and should allocate tgt_align aligned tgt_size block
+	 of memory.  */
+      tgt->to_free = gomp_malloc (tgt_size + tgt_align - 1);
+      tgt->tgt_start = (uintptr_t) tgt->to_free;
+      tgt->tgt_start = (tgt->tgt_start + tgt_align - 1) & ~(tgt_align - 1);
+      tgt->tgt_end = tgt->tgt_start + tgt_size;
+    }
+
+  tgt_size = 0;
+  if (is_target)
+    tgt_size = mapnum * sizeof (void *);
+
+  if (not_found_cnt)
+    {
+      tgt->array = gomp_malloc (not_found_cnt * sizeof (*tgt->array));
+      splay_tree_node array = tgt->array;
+
+      for (i = 0; i < mapnum; i++)
+	if (tgt->list[i] == NULL)
+	  {
+	    splay_tree_key k = &array->key;
+	    k->host_start = (uintptr_t) hostaddrs[i];
+	    if ((kinds[i] & 7) != 4)
+	      k->host_end = k->host_start + sizes[i];
+	    else
+	      k->host_end = k->host_start + sizeof (void *);
+	    splay_tree_key n
+	      = splay_tree_lookup (&dev_splay_tree, k);
+	    if (n)
+	      {
+		tgt->list[i] = n;
+		gomp_map_vars_existing (n, k, kinds[i]);
+	      }
+	    else
+	      {
+		size_t align = (size_t) 1 << (kinds[i] >> 3);
+		tgt->list[i] = k;
+		tgt_size = (tgt_size + align - 1) & ~(align - 1);
+		k->tgt = tgt;
+		k->tgt_offset = tgt_size;
+		tgt_size += k->host_end - k->host_start;
+		if ((kinds[i] & 7) == 2 || (kinds[i] & 7) == 3)
+		  k->copy_from = true;
+		k->refcount = 1;
+		tgt->refcount++;
+		array->left = NULL;
+		array->right = NULL;
+		splay_tree_insert (&dev_splay_tree, array);
+		switch (kinds[i] & 7)
+		  {
+		  case 0: /* ALLOC */
+		  case 2: /* FROM */
+		    break;
+		  case 1: /* TO */
+		  case 3: /* TOFROM */
+		    /* FIXME: This is supposed to be copy from host to device
+		       memory.  Perhaps add some smarts, like if copying
+		       several adjacent fields from host to target, use some
+		       host buffer to avoid sending each var individually.  */
+		    memcpy ((void *) (tgt->tgt_start + k->tgt_offset),
+			    (void *) k->host_start,
+			    k->host_end - k->host_start);
+		    break;
+		  case 4: /* POINTER */
+		    cur_node.host_start
+		      = (uintptr_t) *(void **) k->host_start;
+		    /* Add bias to the pointer value.  */
+		    cur_node.host_start += sizes[i];
+		    cur_node.host_end = cur_node.host_start + 1;
+		    n = splay_tree_lookup (&dev_splay_tree, &cur_node);
+		    if (n == NULL)
+		      {
+			/* Could be possibly zero size array section.  */
+			cur_node.host_end--;
+			n = splay_tree_lookup (&dev_splay_tree, &cur_node);
+			if (n == NULL)
+			  {
+			    cur_node.host_start--;
+			    n = splay_tree_lookup (&dev_splay_tree, &cur_node);
+			    cur_node.host_start++;
+			  }
+		      }
+		    if (n == NULL)
+		      gomp_fatal ("Pointer target of array section "
+				  "wasn't mapped");
+		    cur_node.host_start -= n->host_start;
+		    cur_node.tgt_offset = n->tgt->tgt_start + n->tgt_offset
+					  + cur_node.host_start;
+		    /* At this point tgt_offset is target address of the
+		       array section.  Now subtract bias to get what we want
+		       to initialize the pointer with.  */
+		    cur_node.tgt_offset -= sizes[i];
+		    /* FIXME: host to device copy, see above FIXME comment.  */
+		    memcpy ((void *) (tgt->tgt_start + k->tgt_offset),
+			    (void *) &cur_node.tgt_offset,
+			    sizeof (void *));
+		    break;
+		  }
+		array++;
+	      }
+	  }
+    }
+  if (is_target)
+    {
+      for (i = 0; i < mapnum; i++)
+	{
+	  cur_node.tgt_offset = tgt->list[i]->tgt->tgt_start
+				+ tgt->list[i]->tgt_offset;
+	  /* FIXME: host to device copy, see above FIXME comment.  */
+	  memcpy ((void *) (tgt->tgt_start + i * sizeof (void *)),
+		  (void *) &cur_node.tgt_offset,
+		  sizeof (void *));
+	}
+    }
+
+  gomp_mutex_unlock (&dev_env_lock);
+  return tgt;
+}
+
+static void
+gomp_unmap_tgt (struct target_mem_desc *tgt)
+{
+  /* FIXME: Deallocate on target the tgt->tgt_start .. tgt->tgt_end
+     region.  */
+  if (tgt->tgt_end)
+    free (tgt->to_free);
+
+  free (tgt->array);
+  free (tgt);
+}
+
+static void
+gomp_unmap_vars (struct target_mem_desc *tgt)
+{
+  if (tgt->list_count == 0)
+    {
+      free (tgt);
+      return;
+    }
+
+  size_t i;
+  gomp_mutex_lock (&dev_env_lock);
+  for (i = 0; i < tgt->list_count; i++)
+    if (tgt->list[i]->refcount > 1)
+      tgt->list[i]->refcount--;
+    else
+      {
+	splay_tree_key k = tgt->list[i];
+	if (k->copy_from)
+	  /* FIXME: device to host copy.  */
+	  memcpy ((void *) k->host_start,
+		  (void *) (k->tgt->tgt_start + k->tgt_offset),
+		  k->host_end - k->host_start);
+	splay_tree_remove (&dev_splay_tree, k);
+	if (k->tgt->refcount > 1)
+	  k->tgt->refcount--;
+	else
+	  gomp_unmap_tgt (k->tgt);
+      }
+
+  if (tgt->refcount > 1)
+    tgt->refcount--;
+  else
+    gomp_unmap_tgt (tgt);
+  gomp_mutex_unlock (&dev_env_lock);
+}
+
+static void
+gomp_update (size_t mapnum, void **hostaddrs, size_t *sizes,
+	     unsigned char *kinds)
+{
+  size_t i;
+  struct splay_tree_key_s cur_node;
+
+  if (mapnum == 0)
+    return;
+
+  gomp_mutex_lock (&dev_env_lock);
+  for (i = 0; i < mapnum; i++)
+    if (sizes[i])
+      {
+	cur_node.host_start = (uintptr_t) hostaddrs[i];
+	cur_node.host_end = cur_node.host_start + sizes[i];
+	splay_tree_key n = splay_tree_lookup (&dev_splay_tree, &cur_node);
+	if (n)
+	  {
+	    if (n->host_start > cur_node.host_start
+		|| n->host_end < cur_node.host_end)
+	      gomp_fatal ("Trying to update [%p..%p) object when"
+			  "only [%p..%p) is mapped",
+			  (void *) cur_node.host_start,
+			  (void *) cur_node.host_end,
+			  (void *) n->host_start,
+			  (void *) n->host_end);
+	    if ((kinds[i] & 7) == 1)
+	      /* FIXME: host to device copy.  */
+	      memcpy ((void *) (n->tgt->tgt_start + n->tgt_offset
+				+ cur_node.host_start - n->host_start),
+		      (void *) cur_node.host_start,
+		      cur_node.host_end - cur_node.host_start);
+	    else if ((kinds[i] & 7) == 2)
+	      /* FIXME: device to host copy.  */
+	      memcpy ((void *) cur_node.host_start,
+		      (void *) (n->tgt->tgt_start + n->tgt_offset
+				+ cur_node.host_start - n->host_start),
+		      cur_node.host_end - cur_node.host_start);
+	  }
+	else
+	  gomp_fatal ("Trying to update [%p..%p) object that is not mapped",
+		      (void *) cur_node.host_start,
+		      (void *) cur_node.host_end);
+      }
+  gomp_mutex_unlock (&dev_env_lock);
+}
+
 /* Called when encountering a target directive.  If DEVICE
    is -1, it means use device-var ICV.  If it is -2 (or any other value
    larger than last available hw device, use host fallback.
@@ -49,32 +417,77 @@ GOMP_target (int device, void (*fn) (voi
 	     size_t mapnum, void **hostaddrs, size_t *sizes,
 	     unsigned char *kinds)
 {
-  if (resolve_device (device) == -1)
+  device = resolve_device (device);
+  if (device == -1)
     {
+      /* Host fallback.  */
       fn (hostaddrs);
       return;
     }
+  if (device == 257)
+    {
+      struct target_mem_desc *tgt
+	= gomp_map_vars (mapnum, hostaddrs, sizes, kinds, true);
+      fn ((void *) tgt->tgt_start);
+      gomp_unmap_vars (tgt);
+    }
 }
 
 void
 GOMP_target_data (int device, size_t mapnum, void **hostaddrs, size_t *sizes,
 		  unsigned char *kinds)
 {
-  if (resolve_device (device) == -1)
-    return;
+  device = resolve_device (device);
+  if (device == -1)
+    {
+      /* Host fallback.  */
+      struct gomp_task_icv *icv = gomp_icv (false);
+      if (icv->target_data)
+	{
+	  /* Even when doing a host fallback, if there are any active
+	     #pragma omp target data constructs, need to remember the
+	     new #pragma omp target data, otherwise GOMP_target_end_data
+	     would get out of sync.  */
+	  struct target_mem_desc *tgt
+	    = gomp_map_vars (0, NULL, NULL, NULL, false);
+	  tgt->prev = icv->target_data;
+	  icv->target_data = tgt;
+	}
+      return;
+    }
+
+  if (device == 257)
+    {
+      struct target_mem_desc *tgt
+	= gomp_map_vars (mapnum, hostaddrs, sizes, kinds, false);
+      struct gomp_task_icv *icv = gomp_icv (true);
+      tgt->prev = icv->target_data;
+      icv->target_data = tgt;
+    }
 }
 
 void
 GOMP_target_end_data (void)
 {
+  struct gomp_task_icv *icv = gomp_icv (false);
+  if (icv->target_data)
+    {
+      struct target_mem_desc *tgt = icv->target_data;
+      icv->target_data = tgt->prev;
+      gomp_unmap_vars (tgt);
+    }
 }
 
 void
 GOMP_target_update (int device, size_t mapnum, void **hostaddrs, size_t *sizes,
 		    unsigned char *kinds)
 {
-  if (resolve_device (device) == -1)
+  device = resolve_device (device);
+  if (device == -1)
     return;
+
+  if (device == 257)
+    gomp_update (mapnum, hostaddrs, sizes, kinds);
 }
 
 void
--- libgomp/libgomp.h.jj	2013-09-09 17:41:02.388429108 +0200
+++ libgomp/libgomp.h	2013-09-13 12:19:13.489052710 +0200
@@ -214,18 +214,23 @@ struct gomp_team_state
   unsigned long static_trip;
 };
 
-/* These are the OpenMP 3.0 Internal Control Variables described in
+struct target_mem_desc;
+
+/* These are the OpenMP 4.0 Internal Control Variables described in
    section 2.3.1.  Those described as having one copy per task are
    stored within the structure; those described as having one copy
    for the whole program are (naturally) global variables.  */
-
+   
 struct gomp_task_icv
 {
   unsigned long nthreads_var;
   enum gomp_schedule_type run_sched_var;
   int run_sched_modifier;
+  int default_device_var;
   bool dyn_var;
   bool nest_var;
+  /* Internal ICV.  */
+  struct target_mem_desc *target_data;
 };
 
 extern struct gomp_task_icv gomp_global_icv;
@@ -496,6 +501,10 @@ extern void gomp_team_start (void (*) (v
 			     struct gomp_team *);
 extern void gomp_team_end (void);
 
+/* target.c */
+
+extern int gomp_get_num_devices (void);
+
 /* work.c */
 
 extern void gomp_init_work_share (struct gomp_work_share *, bool, unsigned);
--- libgomp/env.c.jj	2013-09-09 17:41:02.335429381 +0200
+++ libgomp/env.c	2013-09-12 17:39:42.435446713 +0200
@@ -56,6 +56,7 @@ struct gomp_task_icv gomp_global_icv = {
   .nthreads_var = 1,
   .run_sched_var = GFS_DYNAMIC,
   .run_sched_modifier = 1,
+  .default_device_var = 0,
   .dyn_var = false,
   .nest_var = false
 };
@@ -188,6 +189,24 @@ parse_unsigned_long (const char *name, u
   return false;
 }
 
+/* Parse a positive int environment variable.  Return true if one was
+   present and it was successfully parsed.  */
+
+static bool
+parse_int (const char *name, int *pvalue, bool allow_zero)
+{
+  unsigned long value;
+  if (!parse_unsigned_long (name, &value, allow_zero))
+    return false;
+  if (value > INT_MAX)
+    {
+      gomp_error ("Invalid value for environment variable %s", name);
+      return false;
+    }
+  *pvalue = (int) value;
+  return true;
+}
+
 /* Parse an unsigned long list environment variable.  Return true if one was
    present and it was successfully parsed.  */
 
@@ -658,8 +677,9 @@ handle_omp_display_env (bool proc_bind,
 
 /* FIXME: Unimplemented OpenMP 4.0 environment variables.
   fprintf (stderr, "  OMP_PLACES = ''\n");
-  fprintf (stderr, "  OMP_CANCELLATION = ''\n");
-  fprintf (stderr, "  OMP_DEFAULT_DEVICE = ''\n"); */
+  fprintf (stderr, "  OMP_CANCELLATION = ''\n"); */
+  fprintf (stderr, "  OMP_DEFAULT_DEVICE = '%d'\n",
+	   gomp_global_icv.default_device_var);
 
   if (verbose)
     {
@@ -699,6 +719,7 @@ initialize_env (void)
   parse_boolean ("OMP_DYNAMIC", &gomp_global_icv.dyn_var);
   parse_boolean ("OMP_NESTED", &gomp_global_icv.nest_var);
   parse_boolean ("OMP_PROC_BIND", &bind_var);
+  parse_int ("OMP_DEFAULT_DEVICE", &gomp_global_icv.default_device_var, true);
   parse_unsigned_long ("OMP_MAX_ACTIVE_LEVELS", &gomp_max_active_levels_var,
 		       true);
   parse_unsigned_long ("OMP_THREAD_LIMIT", &gomp_thread_limit_var, false);
@@ -881,36 +902,41 @@ omp_get_proc_bind (void)
 void
 omp_set_default_device (int device_num)
 {
-  (void) device_num;
+  struct gomp_task_icv *icv = gomp_icv (true);
+  icv->default_device_var = device_num >= 0 ? device_num : 0;
 }
 
 int
 omp_get_default_device (void)
 {
-  return 0;
+  struct gomp_task_icv *icv = gomp_icv (false);
+  return icv->default_device_var;
 }
 
 int
 omp_get_num_devices (void)
 {
-  return 0;
+  return gomp_get_num_devices ();
 }
 
 int
 omp_get_num_teams (void)
 {
+  /* Hardcoded to 1 on host, MIC, HSAIL?  Maybe variable on PTX.  */
   return 1;
 }
 
 int
 omp_get_team_num (void)
 {
+  /* Hardcoded to 0 on host, MIC, HSAIL?  Maybe variable on PTX.  */
   return 0;
 }
 
 int
 omp_is_initial_device (void)
 {
+  /* Hardcoded to 1 on host, should be 0 on MIC, HSAIL, PTX.  */
   return 1;
 }
 


	Jakub


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]