[gomp4.5] Handle #pragma omp declare target link

Ilya Verbin iverbin@gmail.com
Mon Nov 30 20:42:00 GMT 2015


On Mon, Nov 30, 2015 at 13:04:59 +0100, Jakub Jelinek wrote:
> On Fri, Nov 27, 2015 at 07:50:09PM +0300, Ilya Verbin wrote:
> > +	  /* Most significant bit of the size marks such vars.  */
> > +	  unsigned HOST_WIDE_INT isize = tree_to_uhwi (size);
> > +	  isize |= 1ULL << (int_size_in_bytes (const_ptr_type_node) * 8 - 1);
> 
> That supposedly should be BITS_PER_UNIT instead of 8.

Fixed.

> > diff --git a/gcc/varpool.c b/gcc/varpool.c
> > index 36f19a6..cbd1e05 100644
> > --- a/gcc/varpool.c
> > +++ b/gcc/varpool.c
> > @@ -561,17 +561,21 @@ varpool_node::assemble_decl (void)
> >       are not real variables, but just info for debugging and codegen.
> >       Unfortunately at the moment emutls is not updating varpool correctly
> >       after turning real vars into value_expr vars.  */
> > +#ifndef ACCEL_COMPILER
> >    if (DECL_HAS_VALUE_EXPR_P (decl)
> >        && !targetm.have_tls)
> >      return false;
> > +#endif
> >  
> >    /* Hard register vars do not need to be output.  */
> >    if (DECL_HARD_REGISTER (decl))
> >      return false;
> >  
> > +#ifndef ACCEL_COMPILER
> >    gcc_checking_assert (!TREE_ASM_WRITTEN (decl)
> >  		       && TREE_CODE (decl) == VAR_DECL
> >  		       && !DECL_HAS_VALUE_EXPR_P (decl));
> > +#endif
> 
> This looks wrong, both of these clearly could affect anything with
> DECL_HAS_VALUE_EXPR_P, not just the link vars.
> So, if you need to handle the "omp declare target link" vars specially,
> you should only handle those specially and nothing else.  And please try to
> explain why.

Actually these ifndefs are not needed, because assemble_decl never will be
called by accel compiler for original link vars.  I've added a check into
output_in_order, but missed a second place where assemble_decl is called -
symbol_table::output_variables.  So, fixed now.

> > @@ -1005,13 +1026,18 @@ gomp_load_image_to_device (struct gomp_device_descr *devicep, unsigned version,
> >    for (i = 0; i < num_vars; i++)
> >      {
> >        struct addr_pair *target_var = &target_table[num_funcs + i];
> > -      if (target_var->end - target_var->start
> > -	  != (uintptr_t) host_var_table[i * 2 + 1])
> > +      uintptr_t target_size = target_var->end - target_var->start;
> > +
> > +      /* Most significant bit of the size marks "omp declare target link"
> > +	 variables.  */
> > +      bool is_link = target_size & (1ULL << (sizeof (uintptr_t) * 8 - 1));
> 
> __CHAR_BIT__ here instead of 8?

Fixed.

> > @@ -1019,7 +1045,7 @@ gomp_load_image_to_device (struct gomp_device_descr *devicep, unsigned version,
> >        k->host_end = k->host_start + (uintptr_t) host_var_table[i * 2 + 1];
> >        k->tgt = tgt;
> >        k->tgt_offset = target_var->start;
> > -      k->refcount = REFCOUNT_INFINITY;
> > +      k->refcount = is_link ? REFCOUNT_LINK : REFCOUNT_INFINITY;
> >        k->async_refcount = 0;
> >        array->left = NULL;
> >        array->right = NULL;
> 
> Do we need to do anything in gomp_unload_image_from_device ?
> I mean at least in questionable programs that for link vars don't decrement
> the refcount of the var that replaced the link var to 0 first before
> dlclosing the library.
> At least host_var_table[j * 2 + 1] will have the MSB set, so we need to
> handle it differently.  Perhaps for that case perform a lookup, and if we
> get something which has link_map non-NULL, first perform as if there is
> target exit data delete (var) on it first?

You're right, it doesn't deallocate memory on the device if DSO leaves nonzero
refcount.  And currently host compiler doesn't set MSB in host_var_table, it's
set only by accel compiler.  But it's possible to do splay_tree_lookup for each
var to determine whether is it linked or not, like in the patch bellow.
Or do you prefer to set the bit in host compiler too?  It requires
lookup_attribute ("omp declare target link") for all vars in the table during
compilation, but allows to do splay_tree_lookup at run-time only for vars with
MSB set in host_var_table.
Unfortunately, calling gomp_exit_data from gomp_unload_image_from_device works
only for DSO, but it crashed when an executable leaves nonzero refcount, because
target device may be already uninitialized from plugin's __run_exit_handlers
(and it is in case of intelmic), so gomp_exit_data cannot run free_func.
Is it possible do add some atexit (...) to libgomp, which will set shutting_down
flag, and just do nothing in gomp_unload_image_from_device if it is set?


diff --git a/gcc/c-family/c-common.c b/gcc/c-family/c-common.c
index 369574f..b73caa1 100644
--- a/gcc/c-family/c-common.c
+++ b/gcc/c-family/c-common.c
@@ -822,6 +822,8 @@ const struct attribute_spec c_common_attribute_table[] =
 			      handle_simd_attribute, false },
   { "omp declare target",     0, 0, true, false, false,
 			      handle_omp_declare_target_attribute, false },
+  { "omp declare target link", 0, 0, true, false, false,
+			      handle_omp_declare_target_attribute, false },
   { "alloc_align",	      1, 1, false, true, true,
 			      handle_alloc_align_attribute, false },
   { "assume_aligned",	      1, 2, false, true, true,
diff --git a/gcc/cgraphunit.c b/gcc/cgraphunit.c
index f73d9a7..8bc70f0 100644
--- a/gcc/cgraphunit.c
+++ b/gcc/cgraphunit.c
@@ -2204,6 +2204,13 @@ output_in_order (bool no_reorder)
 	  break;
 
 	case ORDER_VAR:
+#ifdef ACCEL_COMPILER
+	  /* Do not assemble "omp declare target link" vars.  */
+	  if (DECL_HAS_VALUE_EXPR_P (nodes[i].u.v->decl)
+	      && lookup_attribute ("omp declare target link",
+				   DECL_ATTRIBUTES (nodes[i].u.v->decl)))
+	    break;
+#endif
 	  nodes[i].u.v->assemble_decl ();
 	  break;
 
diff --git a/gcc/gimplify.c b/gcc/gimplify.c
index 7fff12f..040e7d8 100644
--- a/gcc/gimplify.c
+++ b/gcc/gimplify.c
@@ -7861,7 +7861,9 @@ gimplify_adjust_omp_clauses (gimple_seq *pre_p, gimple_seq body, tree *list_p,
 	  n = splay_tree_lookup (ctx->variables, (splay_tree_key) decl);
 	  if ((ctx->region_type & ORT_TARGET) != 0
 	      && !(n->value & GOVD_SEEN)
-	      && GOMP_MAP_ALWAYS_P (OMP_CLAUSE_MAP_KIND (c)) == 0)
+	      && GOMP_MAP_ALWAYS_P (OMP_CLAUSE_MAP_KIND (c)) == 0
+	      && !lookup_attribute ("omp declare target link",
+				    DECL_ATTRIBUTES (decl)))
 	    {
 	      remove = true;
 	      /* For struct element mapping, if struct is never referenced
diff --git a/gcc/lto/lto.c b/gcc/lto/lto.c
index b1e2d6e..37aa197 100644
--- a/gcc/lto/lto.c
+++ b/gcc/lto/lto.c
@@ -49,6 +49,8 @@ along with GCC; see the file COPYING3.  If not see
 #include "params.h"
 #include "ipa-utils.h"
 #include "gomp-constants.h"
+#include "stringpool.h"
+#include "fold-const.h"
 
 
 /* Number of parallel tasks to run, -1 if we want to use GNU Make jobserver.  */
@@ -3218,6 +3220,37 @@ lto_init (void)
 #endif
 }
 
+/* Create artificial pointers for "omp declare target link" vars.  */
+
+static void
+offload_handle_link_vars (void)
+{
+#ifdef ACCEL_COMPILER
+  varpool_node *var;
+  FOR_EACH_VARIABLE (var)
+    if (lookup_attribute ("omp declare target link",
+			  DECL_ATTRIBUTES (var->decl)))
+      {
+	tree type = build_pointer_type (TREE_TYPE (var->decl));
+	tree link_ptr_var = make_node (VAR_DECL);
+	TREE_TYPE (link_ptr_var) = type;
+	TREE_USED (link_ptr_var) = 1;
+	TREE_STATIC (link_ptr_var) = 1;
+	DECL_MODE (link_ptr_var) = TYPE_MODE (type);
+	DECL_SIZE (link_ptr_var) = TYPE_SIZE (type);
+	DECL_SIZE_UNIT (link_ptr_var) = TYPE_SIZE_UNIT (type);
+	DECL_ARTIFICIAL (link_ptr_var) = 1;
+	tree var_name = DECL_ASSEMBLER_NAME (var->decl);
+	char *new_name
+	  = ACONCAT ((IDENTIFIER_POINTER (var_name), "_linkptr", NULL));
+	DECL_NAME (link_ptr_var) = get_identifier (new_name);
+	SET_DECL_ASSEMBLER_NAME (link_ptr_var, DECL_NAME (link_ptr_var));
+	SET_DECL_VALUE_EXPR (var->decl, build_simple_mem_ref (link_ptr_var));
+	DECL_HAS_VALUE_EXPR_P (var->decl) = 1;
+      }
+#endif
+}
+
 
 /* Main entry point for the GIMPLE front end.  This front end has
    three main personalities:
@@ -3266,6 +3299,8 @@ lto_main (void)
 
   if (!seen_error ())
     {
+      offload_handle_link_vars ();
+
       /* If WPA is enabled analyze the whole call graph and create an
 	 optimization plan.  Otherwise, read in all the function
 	 bodies and continue with optimization.  */
diff --git a/gcc/omp-low.c b/gcc/omp-low.c
index f17a828..d2f12dd 100644
--- a/gcc/omp-low.c
+++ b/gcc/omp-low.c
@@ -2010,7 +2010,8 @@ scan_sharing_clauses (tree clauses, omp_context *ctx)
 	  decl = OMP_CLAUSE_DECL (c);
 	  /* Global variables with "omp declare target" attribute
 	     don't need to be copied, the receiver side will use them
-	     directly.  */
+	     directly.  However, global variables with "omp declare target link"
+	     attribute need to be copied.  */
 	  if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_MAP
 	      && DECL_P (decl)
 	      && ((OMP_CLAUSE_MAP_KIND (c) != GOMP_MAP_FIRSTPRIVATE_POINTER
@@ -2018,7 +2019,9 @@ scan_sharing_clauses (tree clauses, omp_context *ctx)
 		       != GOMP_MAP_FIRSTPRIVATE_REFERENCE))
 		  || TREE_CODE (TREE_TYPE (decl)) == ARRAY_TYPE)
 	      && is_global_var (maybe_lookup_decl_in_outer_ctx (decl, ctx))
-	      && varpool_node::get_create (decl)->offloadable)
+	      && varpool_node::get_create (decl)->offloadable
+	      && !lookup_attribute ("omp declare target link",
+				    DECL_ATTRIBUTES (decl)))
 	    break;
 	  if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_MAP
 	      && OMP_CLAUSE_MAP_KIND (c) == GOMP_MAP_POINTER)
@@ -18485,13 +18488,36 @@ add_decls_addresses_to_decl_constructor (vec<tree, va_gc> *v_decls,
   for (unsigned i = 0; i < len; i++)
     {
       tree it = (*v_decls)[i];
-      bool is_function = TREE_CODE (it) != VAR_DECL;
+      bool is_var = TREE_CODE (it) == VAR_DECL;
+      bool is_link_var
+	= is_var && DECL_HAS_VALUE_EXPR_P (it)
+	  && lookup_attribute ("omp declare target link", DECL_ATTRIBUTES (it));
+
+      tree size = NULL_TREE;
+      if (is_var)
+	size = fold_convert (const_ptr_type_node, DECL_SIZE_UNIT (it));
+
+      tree addr;
+      if (!is_link_var)
+	addr = build_fold_addr_expr (it);
+      else
+	{
+	  tree value_expr = DECL_VALUE_EXPR (it);
+	  tree link_ptr_decl = TREE_OPERAND (value_expr, 0);
+	  varpool_node::finalize_decl (link_ptr_decl);
+	  /* For "omp declare target link" var use address of the pointer
+	     instead of address of the var.  */
+	  addr = build_fold_addr_expr (link_ptr_decl);
+	  /* Most significant bit of the size marks such vars.  */
+	  unsigned HOST_WIDE_INT isize = tree_to_uhwi (size);
+	  isize |= 1ULL << (int_size_in_bytes (const_ptr_type_node)
+			    * BITS_PER_UNIT - 1);
+	  size = wide_int_to_tree (const_ptr_type_node, isize);
+	}
 
-      CONSTRUCTOR_APPEND_ELT (v_ctor, NULL_TREE, build_fold_addr_expr (it));
-      if (!is_function)
-	CONSTRUCTOR_APPEND_ELT (v_ctor, NULL_TREE,
-				fold_convert (const_ptr_type_node,
-					      DECL_SIZE_UNIT (it)));
+      CONSTRUCTOR_APPEND_ELT (v_ctor, NULL_TREE, addr);
+      if (is_var)
+	CONSTRUCTOR_APPEND_ELT (v_ctor, NULL_TREE, size);
     }
 }
 
@@ -19728,4 +19754,84 @@ make_pass_oacc_device_lower (gcc::context *ctxt)
   return new pass_oacc_device_lower (ctxt);
 }
 
+/* "omp declare target link" handling pass.  */
+
+namespace {
+
+const pass_data pass_data_omp_target_link =
+{
+  GIMPLE_PASS,			/* type */
+  "omptargetlink",		/* name */
+  OPTGROUP_NONE,		/* optinfo_flags */
+  TV_NONE,			/* tv_id */
+  PROP_ssa,			/* properties_required */
+  0,				/* properties_provided */
+  0,				/* properties_destroyed */
+  0,				/* todo_flags_start */
+  TODO_update_ssa,		/* todo_flags_finish */
+};
+
+class pass_omp_target_link : public gimple_opt_pass
+{
+public:
+  pass_omp_target_link (gcc::context *ctxt)
+    : gimple_opt_pass (pass_data_omp_target_link, ctxt)
+  {}
+
+  /* opt_pass methods: */
+  virtual bool gate (function *fun)
+    {
+#ifdef ACCEL_COMPILER
+      tree attrs = DECL_ATTRIBUTES (fun->decl);
+      return lookup_attribute ("omp declare target", attrs)
+	     || lookup_attribute ("omp target entrypoint", attrs);
+#else
+      (void) fun;
+      return false;
+#endif
+    }
+
+  virtual unsigned execute (function *);
+};
+
+/* Callback for walk_gimple_stmt used to scan for link var operands.  */
+
+static tree
+find_link_var_op (tree *tp, int *walk_subtrees, void *)
+{
+  tree t = *tp;
+
+  if (TREE_CODE (t) == VAR_DECL && DECL_HAS_VALUE_EXPR_P (t)
+      && lookup_attribute ("omp declare target link", DECL_ATTRIBUTES (t)))
+    {
+      *walk_subtrees = 0;
+      return t;
+    }
+
+  return NULL_TREE;
+}
+
+unsigned
+pass_omp_target_link::execute (function *fun)
+{
+  basic_block bb;
+  FOR_EACH_BB_FN (bb, fun)
+    {
+      gimple_stmt_iterator gsi;
+      for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
+	if (walk_gimple_stmt (&gsi, NULL, find_link_var_op, NULL))
+	  gimple_regimplify_operands (gsi_stmt (gsi), &gsi);
+    }
+
+  return 0;
+}
+
+} // anon namespace
+
+gimple_opt_pass *
+make_pass_omp_target_link (gcc::context *ctxt)
+{
+  return new pass_omp_target_link (ctxt);
+}
+
 #include "gt-omp-low.h"
diff --git a/gcc/passes.def b/gcc/passes.def
index 28cb4c1..1f9b5d4 100644
--- a/gcc/passes.def
+++ b/gcc/passes.def
@@ -170,6 +170,7 @@ along with GCC; see the file COPYING3.  If not see
   NEXT_PASS (pass_fixup_cfg);
   NEXT_PASS (pass_lower_eh_dispatch);
   NEXT_PASS (pass_oacc_device_lower);
+  NEXT_PASS (pass_omp_target_link);
   NEXT_PASS (pass_all_optimizations);
   PUSH_INSERT_PASSES_WITHIN (pass_all_optimizations)
       NEXT_PASS (pass_remove_cgraph_callee_edges);
diff --git a/gcc/tree-pass.h b/gcc/tree-pass.h
index 9704918..ce1f3eb 100644
--- a/gcc/tree-pass.h
+++ b/gcc/tree-pass.h
@@ -415,6 +415,7 @@ extern gimple_opt_pass *make_pass_lower_omp (gcc::context *ctxt);
 extern gimple_opt_pass *make_pass_diagnose_omp_blocks (gcc::context *ctxt);
 extern gimple_opt_pass *make_pass_expand_omp (gcc::context *ctxt);
 extern gimple_opt_pass *make_pass_expand_omp_ssa (gcc::context *ctxt);
+extern gimple_opt_pass *make_pass_omp_target_link (gcc::context *ctxt);
 extern gimple_opt_pass *make_pass_oacc_device_lower (gcc::context *ctxt);
 extern gimple_opt_pass *make_pass_object_sizes (gcc::context *ctxt);
 extern gimple_opt_pass *make_pass_strlen (gcc::context *ctxt);
diff --git a/gcc/varpool.c b/gcc/varpool.c
index 36f19a6..f269826 100644
--- a/gcc/varpool.c
+++ b/gcc/varpool.c
@@ -738,6 +738,13 @@ symbol_table::output_variables (void)
       /* Handled in output_in_order.  */
       if (node->no_reorder)
 	continue;
+#ifdef ACCEL_COMPILER
+      /* Do not assemble "omp declare target link" vars.  */
+      if (DECL_HAS_VALUE_EXPR_P (node->decl)
+	  && lookup_attribute ("omp declare target link",
+			       DECL_ATTRIBUTES (node->decl)))
+	continue;
+#endif
       if (node->assemble_decl ())
         changed = true;
     }
diff --git a/libgomp/libgomp.h b/libgomp/libgomp.h
index c467f97..ea63248 100644
--- a/libgomp/libgomp.h
+++ b/libgomp/libgomp.h
@@ -817,6 +817,9 @@ struct target_mem_desc {
 
 /* Special value for refcount - infinity.  */
 #define REFCOUNT_INFINITY (~(uintptr_t) 0)
+/* Special value for refcount - tgt_offset contains target address of the
+   artificial pointer to "omp declare target link" object.  */
+#define REFCOUNT_LINK (~(uintptr_t) 1)
 
 struct splay_tree_key_s {
   /* Address of the host object.  */
@@ -831,6 +834,8 @@ struct splay_tree_key_s {
   uintptr_t refcount;
   /* Asynchronous reference count.  */
   uintptr_t async_refcount;
+  /* Pointer to the original mapping of "omp declare target link" object.  */
+  splay_tree_key link_key;
 };
 
 /* The comparison function.  */
diff --git a/libgomp/target.c b/libgomp/target.c
index cf9d0e6..bad78a0 100644
--- a/libgomp/target.c
+++ b/libgomp/target.c
@@ -453,7 +453,7 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum,
 	}
       else
 	n = splay_tree_lookup (mem_map, &cur_node);
-      if (n)
+      if (n && n->refcount != REFCOUNT_LINK)
 	gomp_map_vars_existing (devicep, n, &cur_node, &tgt->list[i],
 				kind & typemask);
       else
@@ -617,11 +617,19 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum,
 	    else
 	      k->host_end = k->host_start + sizeof (void *);
 	    splay_tree_key n = splay_tree_lookup (mem_map, k);
-	    if (n)
+	    if (n && n->refcount != REFCOUNT_LINK)
 	      gomp_map_vars_existing (devicep, n, k, &tgt->list[i],
 				      kind & typemask);
 	    else
 	      {
+		k->link_key = NULL;
+		if (n && n->refcount == REFCOUNT_LINK)
+		  {
+		    /* Replace target address of the pointer with target address
+		       of mapped object in the splay tree.  */
+		    splay_tree_remove (mem_map, n);
+		    k->link_key = n;
+		  }
 		size_t align = (size_t) 1 << (kind >> rshift);
 		tgt->list[i].key = k;
 		k->tgt = tgt;
@@ -741,6 +749,16 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum,
 		    gomp_fatal ("%s: unhandled kind 0x%.2x", __FUNCTION__,
 				kind);
 		  }
+
+		if (k->link_key)
+		  {
+		    /* Set link pointer on target to the device address of the
+		       mapped object.  */
+		    void *tgt_addr = (void *) (tgt->tgt_start + k->tgt_offset);
+		    devicep->host2dev_func (devicep->target_id,
+					    (void *) n->tgt_offset,
+					    &tgt_addr, sizeof (void *));
+		  }
 		array++;
 	      }
 	  }
@@ -866,6 +884,9 @@ gomp_unmap_vars (struct target_mem_desc *tgt, bool do_copyfrom)
       if (do_unmap)
 	{
 	  splay_tree_remove (&devicep->mem_map, k);
+	  if (k->link_key)
+	    splay_tree_insert (&devicep->mem_map,
+			       (splay_tree_node) k->link_key);
 	  if (k->tgt->refcount > 1)
 	    k->tgt->refcount--;
 	  else
@@ -937,6 +958,76 @@ gomp_update (struct gomp_device_descr *devicep, size_t mapnum, void **hostaddrs,
   gomp_mutex_unlock (&devicep->lock);
 }
 
+static void
+gomp_exit_data (struct gomp_device_descr *devicep, size_t mapnum,
+		void **hostaddrs, size_t *sizes, unsigned short *kinds,
+		bool do_lock)
+{
+  const int typemask = 0xff;
+  size_t i;
+  if (do_lock)
+    gomp_mutex_lock (&devicep->lock);
+  for (i = 0; i < mapnum; i++)
+    {
+      struct splay_tree_key_s cur_node;
+      unsigned char kind = kinds[i] & typemask;
+      switch (kind)
+	{
+	case GOMP_MAP_FROM:
+	case GOMP_MAP_ALWAYS_FROM:
+	case GOMP_MAP_DELETE:
+	case GOMP_MAP_RELEASE:
+	case GOMP_MAP_ZERO_LEN_ARRAY_SECTION:
+	case GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION:
+	  cur_node.host_start = (uintptr_t) hostaddrs[i];
+	  cur_node.host_end = cur_node.host_start + sizes[i];
+	  splay_tree_key k = (kind == GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION
+			      || kind == GOMP_MAP_ZERO_LEN_ARRAY_SECTION)
+	    ? gomp_map_0len_lookup (&devicep->mem_map, &cur_node)
+	    : splay_tree_lookup (&devicep->mem_map, &cur_node);
+	  if (!k)
+	    continue;
+
+	  if (k->refcount > 0 && k->refcount != REFCOUNT_INFINITY)
+	    k->refcount--;
+	  if ((kind == GOMP_MAP_DELETE
+	       || kind == GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION)
+	      && k->refcount != REFCOUNT_INFINITY)
+	    k->refcount = 0;
+
+	  if ((kind == GOMP_MAP_FROM && k->refcount == 0)
+	      || kind == GOMP_MAP_ALWAYS_FROM)
+	    devicep->dev2host_func (devicep->target_id,
+				    (void *) cur_node.host_start,
+				    (void *) (k->tgt->tgt_start + k->tgt_offset
+					      + cur_node.host_start
+					      - k->host_start),
+				    cur_node.host_end - cur_node.host_start);
+	  if (k->refcount == 0)
+	    {
+	      splay_tree_remove (&devicep->mem_map, k);
+	      if (k->link_key)
+		splay_tree_insert (&devicep->mem_map,
+				   (splay_tree_node) k->link_key);
+	      if (k->tgt->refcount > 1)
+		k->tgt->refcount--;
+	      else
+		gomp_unmap_tgt (k->tgt);
+	    }
+
+	  break;
+	default:
+	  if (do_lock)
+	    gomp_mutex_unlock (&devicep->lock);
+	  gomp_fatal ("GOMP_target_enter_exit_data unhandled kind 0x%.2x",
+		      kind);
+	}
+    }
+
+  if (do_lock)
+    gomp_mutex_unlock (&devicep->lock);
+}
+
 /* Load image pointed by TARGET_DATA to the device, specified by DEVICEP.
    And insert to splay tree the mapping between addresses from HOST_TABLE and
    from loaded target image.  We rely in the host and device compiler
@@ -996,6 +1087,7 @@ gomp_load_image_to_device (struct gomp_device_descr *devicep, unsigned version,
       k->tgt_offset = target_table[i].start;
       k->refcount = REFCOUNT_INFINITY;
       k->async_refcount = 0;
+      k->link_key = NULL;
       array->left = NULL;
       array->right = NULL;
       splay_tree_insert (&devicep->mem_map, array);
@@ -1005,13 +1097,19 @@ gomp_load_image_to_device (struct gomp_device_descr *devicep, unsigned version,
   for (i = 0; i < num_vars; i++)
     {
       struct addr_pair *target_var = &target_table[num_funcs + i];
-      if (target_var->end - target_var->start
-	  != (uintptr_t) host_var_table[i * 2 + 1])
+      uintptr_t target_size = target_var->end - target_var->start;
+
+      /* Most significant bit of the size marks "omp declare target link"
+	 variables.  */
+      bool is_link
+	= target_size & (1ULL << (sizeof (uintptr_t) * __CHAR_BIT__ - 1));
+
+      if (!is_link && (uintptr_t) host_var_table[i * 2 + 1] != target_size)
 	{
 	  gomp_mutex_unlock (&devicep->lock);
 	  if (is_register_lock)
 	    gomp_mutex_unlock (&register_lock);
-	  gomp_fatal ("Can't map target variables (size mismatch)");
+	  gomp_fatal ("Cannot map target variables (size mismatch)");
 	}
 
       splay_tree_key k = &array->key;
@@ -1019,8 +1117,9 @@ gomp_load_image_to_device (struct gomp_device_descr *devicep, unsigned version,
       k->host_end = k->host_start + (uintptr_t) host_var_table[i * 2 + 1];
       k->tgt = tgt;
       k->tgt_offset = target_var->start;
-      k->refcount = REFCOUNT_INFINITY;
+      k->refcount = is_link ? REFCOUNT_LINK : REFCOUNT_INFINITY;
       k->async_refcount = 0;
+      k->link_key = NULL;
       array->left = NULL;
       array->right = NULL;
       splay_tree_insert (&devicep->mem_map, array);
@@ -1071,14 +1170,24 @@ gomp_unload_image_from_device (struct gomp_device_descr *devicep,
       splay_tree_remove (&devicep->mem_map, &k);
     }
 
+  bool has_link_var = false;
   for (j = 0; j < num_vars; j++)
     {
       k.host_start = (uintptr_t) host_var_table[j * 2];
       k.host_end = k.host_start + (uintptr_t) host_var_table[j * 2 + 1];
+      splay_tree_key n = splay_tree_lookup (&devicep->mem_map, &k);
+      if (n->link_key)
+	{
+	  n->link_key = NULL;
+	  has_link_var = true;
+	  unsigned short kind = GOMP_MAP_DELETE;
+	  gomp_exit_data (devicep, 1, &host_var_table[j * 2],
+			  (size_t *) &host_var_table[j * 2 + 1], &kind, false);
+	}
       splay_tree_remove (&devicep->mem_map, &k);
     }
 
-  if (node)
+  if (node && !has_link_var)
     {
       free (node->tgt);
       free (node);
@@ -1586,69 +1695,6 @@ GOMP_target_update_ext (int device, size_t mapnum, void **hostaddrs,
   gomp_update (devicep, mapnum, hostaddrs, sizes, kinds, true);
 }
 
-static void
-gomp_exit_data (struct gomp_device_descr *devicep, size_t mapnum,
-		void **hostaddrs, size_t *sizes, unsigned short *kinds)
-{
-  const int typemask = 0xff;
-  size_t i;
-  gomp_mutex_lock (&devicep->lock);
-  for (i = 0; i < mapnum; i++)
-    {
-      struct splay_tree_key_s cur_node;
-      unsigned char kind = kinds[i] & typemask;
-      switch (kind)
-	{
-	case GOMP_MAP_FROM:
-	case GOMP_MAP_ALWAYS_FROM:
-	case GOMP_MAP_DELETE:
-	case GOMP_MAP_RELEASE:
-	case GOMP_MAP_ZERO_LEN_ARRAY_SECTION:
-	case GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION:
-	  cur_node.host_start = (uintptr_t) hostaddrs[i];
-	  cur_node.host_end = cur_node.host_start + sizes[i];
-	  splay_tree_key k = (kind == GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION
-			      || kind == GOMP_MAP_ZERO_LEN_ARRAY_SECTION)
-	    ? gomp_map_0len_lookup (&devicep->mem_map, &cur_node)
-	    : splay_tree_lookup (&devicep->mem_map, &cur_node);
-	  if (!k)
-	    continue;
-
-	  if (k->refcount > 0 && k->refcount != REFCOUNT_INFINITY)
-	    k->refcount--;
-	  if ((kind == GOMP_MAP_DELETE
-	       || kind == GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION)
-	      && k->refcount != REFCOUNT_INFINITY)
-	    k->refcount = 0;
-
-	  if ((kind == GOMP_MAP_FROM && k->refcount == 0)
-	      || kind == GOMP_MAP_ALWAYS_FROM)
-	    devicep->dev2host_func (devicep->target_id,
-				    (void *) cur_node.host_start,
-				    (void *) (k->tgt->tgt_start + k->tgt_offset
-					      + cur_node.host_start
-					      - k->host_start),
-				    cur_node.host_end - cur_node.host_start);
-	  if (k->refcount == 0)
-	    {
-	      splay_tree_remove (&devicep->mem_map, k);
-	      if (k->tgt->refcount > 1)
-		k->tgt->refcount--;
-	      else
-		gomp_unmap_tgt (k->tgt);
-	    }
-
-	  break;
-	default:
-	  gomp_mutex_unlock (&devicep->lock);
-	  gomp_fatal ("GOMP_target_enter_exit_data unhandled kind 0x%.2x",
-		      kind);
-	}
-    }
-
-  gomp_mutex_unlock (&devicep->lock);
-}
-
 void
 GOMP_target_enter_exit_data (int device, size_t mapnum, void **hostaddrs,
 			     size_t *sizes, unsigned short *kinds,
@@ -1718,7 +1764,7 @@ GOMP_target_enter_exit_data (int device, size_t mapnum, void **hostaddrs,
 	gomp_map_vars (devicep, 1, &hostaddrs[i], NULL, &sizes[i], &kinds[i],
 		       true, GOMP_MAP_VARS_ENTER_DATA);
   else
-    gomp_exit_data (devicep, mapnum, hostaddrs, sizes, kinds);
+    gomp_exit_data (devicep, mapnum, hostaddrs, sizes, kinds, true);
 }
 
 bool
@@ -1778,7 +1824,7 @@ gomp_target_task_fn (void *data)
 		       &ttask->kinds[i], true, GOMP_MAP_VARS_ENTER_DATA);
   else
     gomp_exit_data (devicep, ttask->mapnum, ttask->hostaddrs, ttask->sizes,
-		    ttask->kinds);
+		    ttask->kinds, true);
   return false;
 }
 
diff --git a/libgomp/testsuite/libgomp.c/target-link-1.c b/libgomp/testsuite/libgomp.c/target-link-1.c
new file mode 100644
index 0000000..681677c
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c/target-link-1.c
@@ -0,0 +1,63 @@
+struct S { int s, t; };
+
+int a = 1, b = 1;
+double c[27];
+struct S d = { 8888, 8888 };
+#pragma omp declare target link (a) to (b) link (c, d)
+
+int
+foo (void)
+{
+  return a++ + b++;
+}
+
+int
+bar (int n)
+{
+  int *p1 = &a;
+  int *p2 = &b;
+  c[n] += 2.0;
+  d.s -= 2;
+  d.t -= 2;
+  return *p1 + *p2 + d.s + d.t;
+}
+
+#pragma omp declare target (foo, bar)
+
+int
+main ()
+{
+  a = b = 2;
+  d.s = 17;
+  d.t = 18;
+
+  int res, n = 10;
+  #pragma omp target map (to: a, b, c, d) map (from: res)
+  {
+    res = foo () + foo ();
+    c[n] = 3.0;
+    res += bar (n);
+  }
+
+  int shared_mem = 0;
+  #pragma omp target map (alloc: shared_mem)
+    shared_mem = 1;
+
+  if ((shared_mem && res != (2 + 2) + (3 + 3) + (4 + 4 + 15 + 16))
+      || (!shared_mem && res != (2 + 1) + (3 + 2) + (4 + 3 + 15 + 16)))
+    __builtin_abort ();
+
+  #pragma omp target enter data map (to: c)
+  #pragma omp target update from (c)
+  res = (int) (c[n] + 0.5);
+  if ((shared_mem && res != 5) || (!shared_mem && res != 0))
+    __builtin_abort ();
+
+  #pragma omp target map (to: a, b) map (from: res)
+    res = foo ();
+
+  if ((shared_mem && res != 4 + 4) || (!shared_mem && res != 2 + 3))
+    __builtin_abort ();
+
+  return 0;
+}


  -- Ilya



More information about the Gcc-patches mailing list