This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[gomp-nvptx 11/13] pick GOMP_target_ext changes from the hsa branch


This adds necessary plumbing to spawn multiple teams.

To be reverted on this branch prior to merge.
---
 gcc/builtin-types.def                            |   7 +-
 gcc/fortran/types.def                            |   5 +-
 gcc/omp-builtins.def                             |   2 +-
 gcc/omp-low.c                                    | 149 ++++++++++++++++---
 include/gomp-constants.h                         |  21 +++
 libgomp/libgomp.h                                |  12 +-
 libgomp/libgomp_g.h                              |   3 +-
 libgomp/oacc-host.c                              |   3 +-
 libgomp/target.c                                 | 179 +++++++++++++++++------
 libgomp/task.c                                   |   3 +-
 liboffloadmic/plugin/libgomp-plugin-intelmic.cpp |   4 +-
 11 files changed, 299 insertions(+), 89 deletions(-)

diff --git a/gcc/builtin-types.def b/gcc/builtin-types.def
index c68fb19..33bee1d 100644
--- a/gcc/builtin-types.def
+++ b/gcc/builtin-types.def
@@ -555,10 +555,9 @@ DEF_FUNCTION_TYPE_9 (BT_FN_VOID_OMPFN_PTR_OMPCPYFN_LONG_LONG_BOOL_UINT_PTR_INT,
 		     BT_VOID, BT_PTR_FN_VOID_PTR, BT_PTR,
 		     BT_PTR_FN_VOID_PTR_PTR, BT_LONG, BT_LONG,
 		     BT_BOOL, BT_UINT, BT_PTR, BT_INT)
-
-DEF_FUNCTION_TYPE_10 (BT_FN_VOID_INT_OMPFN_SIZE_PTR_PTR_PTR_UINT_PTR_INT_INT,
-		      BT_VOID, BT_INT, BT_PTR_FN_VOID_PTR, BT_SIZE, BT_PTR,
-		      BT_PTR, BT_PTR, BT_UINT, BT_PTR, BT_INT, BT_INT)
+DEF_FUNCTION_TYPE_9 (BT_FN_VOID_INT_OMPFN_SIZE_PTR_PTR_PTR_UINT_PTR_PTR,
+		     BT_VOID, BT_INT, BT_PTR_FN_VOID_PTR, BT_SIZE, BT_PTR,
+		     BT_PTR, BT_PTR, BT_UINT, BT_PTR, BT_PTR)
 
 DEF_FUNCTION_TYPE_11 (BT_FN_VOID_OMPFN_PTR_OMPCPYFN_LONG_LONG_UINT_LONG_INT_LONG_LONG_LONG,
 		      BT_VOID, BT_PTR_FN_VOID_PTR, BT_PTR,
diff --git a/gcc/fortran/types.def b/gcc/fortran/types.def
index a37e856..5838f04 100644
--- a/gcc/fortran/types.def
+++ b/gcc/fortran/types.def
@@ -220,10 +220,9 @@ DEF_FUNCTION_TYPE_9 (BT_FN_VOID_OMPFN_PTR_OMPCPYFN_LONG_LONG_BOOL_UINT_PTR_INT,
 		     BT_VOID, BT_PTR_FN_VOID_PTR, BT_PTR,
 		     BT_PTR_FN_VOID_PTR_PTR, BT_LONG, BT_LONG,
 		     BT_BOOL, BT_UINT, BT_PTR, BT_INT)
-
-DEF_FUNCTION_TYPE_10 (BT_FN_VOID_INT_OMPFN_SIZE_PTR_PTR_PTR_UINT_PTR_INT_INT,
+DEF_FUNCTION_TYPE_9 (BT_FN_VOID_INT_OMPFN_SIZE_PTR_PTR_PTR_UINT_PTR_PTR,
 		      BT_VOID, BT_INT, BT_PTR_FN_VOID_PTR, BT_SIZE, BT_PTR,
-		      BT_PTR, BT_PTR, BT_UINT, BT_PTR, BT_INT, BT_INT)
+		      BT_PTR, BT_PTR, BT_UINT, BT_PTR, BT_PTR)
 
 DEF_FUNCTION_TYPE_11 (BT_FN_VOID_OMPFN_PTR_OMPCPYFN_LONG_LONG_UINT_LONG_INT_LONG_LONG_LONG,
 		      BT_VOID, BT_PTR_FN_VOID_PTR, BT_PTR,
diff --git a/gcc/omp-builtins.def b/gcc/omp-builtins.def
index 35f5014..35c2724 100644
--- a/gcc/omp-builtins.def
+++ b/gcc/omp-builtins.def
@@ -341,7 +341,7 @@ DEF_GOMP_BUILTIN (BUILT_IN_GOMP_SINGLE_COPY_START, "GOMP_single_copy_start",
 DEF_GOMP_BUILTIN (BUILT_IN_GOMP_SINGLE_COPY_END, "GOMP_single_copy_end",
 		  BT_FN_VOID_PTR, ATTR_NOTHROW_LEAF_LIST)
 DEF_GOMP_BUILTIN (BUILT_IN_GOMP_TARGET, "GOMP_target_ext",
-		  BT_FN_VOID_INT_OMPFN_SIZE_PTR_PTR_PTR_UINT_PTR_INT_INT,
+		  BT_FN_VOID_INT_OMPFN_SIZE_PTR_PTR_PTR_UINT_PTR_PTR,
 		  ATTR_NOTHROW_LIST)
 DEF_GOMP_BUILTIN (BUILT_IN_GOMP_TARGET_DATA, "GOMP_target_data_ext",
 		  BT_FN_VOID_INT_SIZE_PTR_PTR_PTR, ATTR_NOTHROW_LIST)
diff --git a/gcc/omp-low.c b/gcc/omp-low.c
index 8996b8d..2e02c6f 100644
--- a/gcc/omp-low.c
+++ b/gcc/omp-low.c
@@ -12731,6 +12731,130 @@ mark_loops_in_oacc_kernels_region (basic_block region_entry,
     loop->in_oacc_kernels_region = true;
 }
 
+/* Build target argument identifier from the DEVICE identifier, value
+   identifier ID and whether the element also has a SUBSEQUENT_PARAM.  */
+
+static tree
+get_target_argument_identifier_1 (int device, bool subseqent_param, int id)
+{
+  tree t = build_int_cst (integer_type_node, device);
+  if (subseqent_param)
+    t = fold_build2 (BIT_IOR_EXPR, integer_type_node, t,
+		     build_int_cst (integer_type_node,
+				    GOMP_TARGET_ARG_SUBSEQUENT_PARAM));
+  t = fold_build2 (BIT_IOR_EXPR, integer_type_node, t,
+		   build_int_cst (integer_type_node, id));
+  return t;
+}
+
+/* Like above but return it in type that can be directly stored as an element
+   of the argument array.  */
+
+static tree
+get_target_argument_identifier (int device, bool subseqent_param, int id)
+{
+  tree t = get_target_argument_identifier_1 (device, subseqent_param, id);
+  return fold_convert (ptr_type_node, t);
+}
+
+/* Return a target argument consisiting of DEVICE identifier, value identifier
+   ID, and the actual VALUE.  */
+
+static tree
+get_target_argument_value (gimple_stmt_iterator *gsi, int device, int id,
+			   tree value)
+{
+  tree t = fold_build2 (LSHIFT_EXPR, integer_type_node,
+			fold_convert (integer_type_node, value),
+			build_int_cst (unsigned_type_node,
+				       GOMP_TARGET_ARG_VALUE_SHIFT));
+  t = fold_build2 (BIT_IOR_EXPR, integer_type_node, t,
+		   get_target_argument_identifier_1 (device, false, id));
+  t = fold_convert (ptr_type_node, t);
+  return force_gimple_operand_gsi (gsi, t, true, NULL, true, GSI_SAME_STMT);
+}
+
+/* If VALUE is an integer constant greater than -2^15 and smaller than 2^15,
+   push one argument to ARGS with bot the DEVICE, ID and VALUE embeded in it,
+   otherwise push an iedntifier (with DEVICE and ID) and the VALUE in two
+   arguments.  */
+
+static void
+push_target_argument_according_to_value (gimple_stmt_iterator *gsi, int device,
+					 int id, tree value, vec <tree> *args)
+{
+  if (tree_fits_shwi_p (value)
+      && tree_to_shwi (value) > -(1 << 15)
+      && tree_to_shwi (value) < (1 << 15))
+    args->quick_push (get_target_argument_value (gsi, device, id, value));
+  else
+    {
+      args->quick_push (get_target_argument_identifier (device, true, id));
+      value = fold_convert (ptr_type_node, value);
+      value = force_gimple_operand_gsi (gsi, value, true, NULL, true,
+					GSI_SAME_STMT);
+      args->quick_push (value);
+    }
+}
+
+/* Create an array of arguments that is then passed to GOMP_target.   */
+
+static tree
+get_target_arguments (gimple_stmt_iterator *gsi, gomp_target *tgt_stmt)
+{
+  auto_vec <tree, 6> args;
+  tree clauses = gimple_omp_target_clauses (tgt_stmt);
+  tree t, c = find_omp_clause (clauses, OMP_CLAUSE_NUM_TEAMS);
+  if (c)
+    t = OMP_CLAUSE_NUM_TEAMS_EXPR (c);
+  else
+    t = integer_minus_one_node;
+  push_target_argument_according_to_value (gsi, GOMP_TARGET_ARG_DEVICE_ALL,
+					   GOMP_TARGET_ARG_NUM_TEAMS, t, &args);
+
+  c = find_omp_clause (clauses, OMP_CLAUSE_THREAD_LIMIT);
+  if (c)
+    t = OMP_CLAUSE_THREAD_LIMIT_EXPR (c);
+  else
+    t = integer_minus_one_node;
+  push_target_argument_according_to_value (gsi, GOMP_TARGET_ARG_DEVICE_ALL,
+					   GOMP_TARGET_ARG_THREAD_LIMIT, t,
+					   &args);
+
+#if 0
+  /* Add HSA-specific grid sizes, if available.  */
+  if (find_omp_clause (gimple_omp_target_clauses (tgt_stmt),
+		       OMP_CLAUSE__GRIDDIM_))
+    {
+      t = get_target_argument_identifier (GOMP_DEVICE_HSA, true,
+					  GOMP_TARGET_ARG_HSA_KERNEL_ATTRIBUTES);
+      args.quick_push (t);
+      args.quick_push (grid_get_kernel_launch_attributes (gsi, tgt_stmt));
+    }
+#endif
+
+  /* Produce more, perhaps device specific, arguments here.  */
+
+  tree argarray = create_tmp_var (build_array_type_nelts (ptr_type_node,
+							  args.length () + 1),
+				  ".omp_target_args");
+  for (unsigned i = 0; i < args.length (); i++)
+    {
+      tree ref = build4 (ARRAY_REF, ptr_type_node, argarray,
+			 build_int_cst (integer_type_node, i),
+			 NULL_TREE, NULL_TREE);
+      gsi_insert_before (gsi, gimple_build_assign (ref, args[i]),
+			 GSI_SAME_STMT);
+    }
+  tree ref = build4 (ARRAY_REF, ptr_type_node, argarray,
+		     build_int_cst (integer_type_node, args.length ()),
+		     NULL_TREE, NULL_TREE);
+  gsi_insert_before (gsi, gimple_build_assign (ref, null_pointer_node),
+		     GSI_SAME_STMT);
+  TREE_ADDRESSABLE (argarray) = 1;
+  return build_fold_addr_expr (argarray);
+}
+
 /* Expand the GIMPLE_OMP_TARGET starting at REGION.  */
 
 static void
@@ -13148,30 +13272,7 @@ expand_omp_target (struct omp_region *region)
 	depend = build_int_cst (ptr_type_node, 0);
       args.quick_push (depend);
       if (start_ix == BUILT_IN_GOMP_TARGET)
-	{
-	  c = find_omp_clause (clauses, OMP_CLAUSE_NUM_TEAMS);
-	  if (c)
-	    {
-	      t = fold_convert (integer_type_node,
-				OMP_CLAUSE_NUM_TEAMS_EXPR (c));
-	      t = force_gimple_operand_gsi (&gsi, t, true, NULL,
-					    true, GSI_SAME_STMT);
-	    }
-	  else
-	    t = integer_minus_one_node;
-	  args.quick_push (t);
-	  c = find_omp_clause (clauses, OMP_CLAUSE_THREAD_LIMIT);
-	  if (c)
-	    {
-	      t = fold_convert (integer_type_node,
-				OMP_CLAUSE_THREAD_LIMIT_EXPR (c));
-	      t = force_gimple_operand_gsi (&gsi, t, true, NULL,
-					    true, GSI_SAME_STMT);
-	    }
-	  else
-	    t = integer_minus_one_node;
-	  args.quick_push (t);
-	}
+	args.quick_push (get_target_arguments (&gsi, entry_stmt));
       break;
     case BUILT_IN_GOACC_PARALLEL:
       {
diff --git a/include/gomp-constants.h b/include/gomp-constants.h
index dffd631..fef27e4 100644
--- a/include/gomp-constants.h
+++ b/include/gomp-constants.h
@@ -228,4 +228,25 @@ enum gomp_map_kind
 #define GOMP_LAUNCH_OP(X) (((X) >> GOMP_LAUNCH_OP_SHIFT) & 0xffff)
 #define GOMP_LAUNCH_OP_MAX 0xffff
 
+/* Bitmask to apply in order to find out the intended device of a target
+   argument.  */
+#define GOMP_TARGET_ARG_DEVICE_MASK		((1 << 7) - 1)
+/* The target argument is significant for all devices.  */
+#define GOMP_TARGET_ARG_DEVICE_ALL		0
+
+/* Flag set when the subsequent element in the device-specific argument
+   values.  */
+#define GOMP_TARGET_ARG_SUBSEQUENT_PARAM	(1 << 7)
+
+/* Bitmask to apply to a target argument to find out the value identifier.  */
+#define GOMP_TARGET_ARG_ID_MASK			(((1 << 8) - 1) << 8)
+/* Target argument index of NUM_TEAMS.  */
+#define GOMP_TARGET_ARG_NUM_TEAMS		(1 << 8)
+/* Target argument index of THREAD_LIMIT.  */
+#define GOMP_TARGET_ARG_THREAD_LIMIT		(2 << 8)
+
+/* If the value is directly embeded in target argument, it should be a 16-bit
+   at most and shifted by this many bits.  */
+#define GOMP_TARGET_ARG_VALUE_SHIFT		16
+
 #endif
diff --git a/libgomp/libgomp.h b/libgomp/libgomp.h
index 31ffba0..1d137f1 100644
--- a/libgomp/libgomp.h
+++ b/libgomp/libgomp.h
@@ -499,6 +499,10 @@ struct gomp_target_task
   struct target_mem_desc *tgt;
   struct gomp_task *task;
   struct gomp_team *team;
+  /* Copies of firstprivate mapped data for shared memory accelerators.  */
+  void *firstprivate_copies;
+  /* Device-specific target arguments.  */
+  void **args;
   void *hostaddrs[];
 };
 
@@ -765,7 +769,8 @@ extern void gomp_task_maybe_wait_for_dependencies (void **);
 extern bool gomp_create_target_task (struct gomp_device_descr *,
 				     void (*) (void *), size_t, void **,
 				     size_t *, unsigned short *, unsigned int,
-				     void **, enum gomp_target_task_state);
+				     void **, void **,
+				     enum gomp_target_task_state);
 
 static void inline
 gomp_finish_task (struct gomp_task *task)
@@ -939,8 +944,9 @@ struct gomp_device_descr
   void *(*dev2host_func) (int, void *, const void *, size_t);
   void *(*host2dev_func) (int, void *, const void *, size_t);
   void *(*dev2dev_func) (int, void *, const void *, size_t);
-  void (*run_func) (int, void *, void *);
-  void (*async_run_func) (int, void *, void *, void *);
+  bool (*can_run_func) (void *);
+  void (*run_func) (int, void *, void *, void **);
+  void (*async_run_func) (int, void *, void *, void **, void *);
 
   /* Splay tree containing information about mapped memory regions.  */
   struct splay_tree_s mem_map;
diff --git a/libgomp/libgomp_g.h b/libgomp/libgomp_g.h
index c238e6a..9c90d59 100644
--- a/libgomp/libgomp_g.h
+++ b/libgomp/libgomp_g.h
@@ -278,8 +278,7 @@ extern void GOMP_single_copy_end (void *);
 extern void GOMP_target (int, void (*) (void *), const void *,
 			 size_t, void **, size_t *, unsigned char *);
 extern void GOMP_target_ext (int, void (*) (void *), size_t, void **, size_t *,
-			     unsigned short *, unsigned int, void **,
-			     int, int);
+			     unsigned short *, unsigned int, void **, void **);
 extern void GOMP_target_data (int, const void *,
 			      size_t, void **, size_t *, unsigned char *);
 extern void GOMP_target_data_ext (int, size_t, void **, size_t *,
diff --git a/libgomp/oacc-host.c b/libgomp/oacc-host.c
index 9874804..a769211 100644
--- a/libgomp/oacc-host.c
+++ b/libgomp/oacc-host.c
@@ -123,7 +123,8 @@ host_host2dev (int n __attribute__ ((unused)),
 }
 
 static void
-host_run (int n __attribute__ ((unused)), void *fn_ptr, void *vars)
+host_run (int n __attribute__ ((unused)), void *fn_ptr, void *vars,
+	  void **args __attribute__((unused)))
 {
   void (*fn)(void *) = (void (*)(void *)) fn_ptr;
 
diff --git a/libgomp/target.c b/libgomp/target.c
index cf9d0e6..f990a9e 100644
--- a/libgomp/target.c
+++ b/libgomp/target.c
@@ -1261,15 +1261,38 @@ gomp_target_fallback (void (*fn) (void *), void **hostaddrs)
   *thr = old_thr;
 }
 
-/* Host fallback with firstprivate map-type handling.  */
+/* Calculate alignment and size requirements of a private copy of data shared
+   as GOMP_MAP_FIRSTPRIVATE and store them to TGT_ALIGN and TGT_SIZE.  */
 
-static void
-gomp_target_fallback_firstprivate (void (*fn) (void *), size_t mapnum,
-				   void **hostaddrs, size_t *sizes,
-				   unsigned short *kinds)
+static inline void
+calculate_firstprivate_requirements (size_t mapnum, size_t *sizes,
+				     unsigned short *kinds, size_t *tgt_align,
+				     size_t *tgt_size)
 {
-  size_t i, tgt_align = 0, tgt_size = 0;
-  char *tgt = NULL;
+  size_t i;
+  for (i = 0; i < mapnum; i++)
+    if ((kinds[i] & 0xff) == GOMP_MAP_FIRSTPRIVATE)
+      {
+	size_t align = (size_t) 1 << (kinds[i] >> 8);
+	if (*tgt_align < align)
+	  *tgt_align = align;
+	*tgt_size = (*tgt_size + align - 1) & ~(align - 1);
+	*tgt_size += sizes[i];
+      }
+}
+
+/* Copy data shared as GOMP_MAP_FIRSTPRIVATE to DST.  */
+
+static inline void
+copy_firstprivate_data (char *tgt, size_t mapnum, void **hostaddrs,
+			size_t *sizes, unsigned short *kinds, size_t tgt_align,
+			size_t tgt_size)
+{
+  uintptr_t al = (uintptr_t) tgt & (tgt_align - 1);
+  if (al)
+    tgt += tgt_align - al;
+  tgt_size = 0;
+  size_t i;
   for (i = 0; i < mapnum; i++)
     if ((kinds[i] & 0xff) == GOMP_MAP_FIRSTPRIVATE)
       {
@@ -1277,28 +1300,53 @@ gomp_target_fallback_firstprivate (void (*fn) (void *), size_t mapnum,
 	if (tgt_align < align)
 	  tgt_align = align;
 	tgt_size = (tgt_size + align - 1) & ~(align - 1);
-	tgt_size += sizes[i];
+	memcpy (tgt + tgt_size, hostaddrs[i], sizes[i]);
+	hostaddrs[i] = tgt + tgt_size;
+	tgt_size = tgt_size + sizes[i];
       }
+}
+
+/* Host fallback with firstprivate map-type handling.  */
+
+static void
+gomp_target_fallback_firstprivate (void (*fn) (void *), size_t mapnum,
+				   void **hostaddrs, size_t *sizes,
+				   unsigned short *kinds)
+{
+  size_t tgt_align = 0, tgt_size = 0;
+  calculate_firstprivate_requirements (mapnum, sizes, kinds, &tgt_align,
+				       &tgt_size);
   if (tgt_align)
     {
-      tgt = gomp_alloca (tgt_size + tgt_align - 1);
-      uintptr_t al = (uintptr_t) tgt & (tgt_align - 1);
-      if (al)
-	tgt += tgt_align - al;
-      tgt_size = 0;
-      for (i = 0; i < mapnum; i++)
-	if ((kinds[i] & 0xff) == GOMP_MAP_FIRSTPRIVATE)
-	  {
-	    size_t align = (size_t) 1 << (kinds[i] >> 8);
-	    tgt_size = (tgt_size + align - 1) & ~(align - 1);
-	    memcpy (tgt + tgt_size, hostaddrs[i], sizes[i]);
-	    hostaddrs[i] = tgt + tgt_size;
-	    tgt_size = tgt_size + sizes[i];
-	  }
+      char *tgt = gomp_alloca (tgt_size + tgt_align - 1);
+      copy_firstprivate_data (tgt, mapnum, hostaddrs, sizes, kinds, tgt_align,
+			      tgt_size);
     }
   gomp_target_fallback (fn, hostaddrs);
 }
 
+/* Handle firstprivate map-type for shared memory devices and the host
+   fallback.  Return the pointer of firstprivate copies which has to be freed
+   after use.  */
+
+static void *
+gomp_target_unshare_firstprivate (size_t mapnum, void **hostaddrs,
+				  size_t *sizes, unsigned short *kinds)
+{
+  size_t tgt_align = 0, tgt_size = 0;
+  char *tgt = NULL;
+
+  calculate_firstprivate_requirements (mapnum, sizes, kinds, &tgt_align,
+				       &tgt_size);
+  if (tgt_align)
+    {
+      tgt = gomp_malloc (tgt_size + tgt_align - 1);
+      copy_firstprivate_data (tgt, mapnum, hostaddrs, sizes, kinds, tgt_align,
+			      tgt_size);
+    }
+  return tgt;
+}
+
 /* Helper function of GOMP_target{,_ext} routines.  */
 
 static void *
@@ -1348,7 +1396,8 @@ GOMP_target (int device, void (*fn) (void *), const void *unused,
   struct target_mem_desc *tgt_vars
     = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, false,
 		     GOMP_MAP_VARS_TARGET);
-  devicep->run_func (devicep->target_id, fn_addr, (void *) tgt_vars->tgt_start);
+  devicep->run_func (devicep->target_id, fn_addr, (void *) tgt_vars->tgt_start,
+		     NULL);
   gomp_unmap_vars (tgt_vars, true);
 }
 
@@ -1356,6 +1405,15 @@ GOMP_target (int device, void (*fn) (void *), const void *unused,
    and several arguments have been added:
    FLAGS is a bitmask, see GOMP_TARGET_FLAG_* in gomp-constants.h.
    DEPEND is array of dependencies, see GOMP_task for details.
+
+   ARGS is a pointer to an array consisting of a variable number of both
+   device-independent and device-specific arguments, which can take one two
+   elements where the first specifies for which device it is intended, the type
+   and optionally also the value.  If the value is not present in the first
+   one, the whole second element the actual value.  The last element of the
+   array is a single NULL.  Among the device independent can be for example
+   NUM_TEAMS and THREAD_LIMIT.
+
    NUM_TEAMS is positive if GOMP_teams will be called in the body with
    that value, or 1 if teams construct is not present, or 0, if
    teams construct does not have num_teams clause and so the choice is
@@ -1369,14 +1427,10 @@ GOMP_target (int device, void (*fn) (void *), const void *unused,
 void
 GOMP_target_ext (int device, void (*fn) (void *), size_t mapnum,
 		 void **hostaddrs, size_t *sizes, unsigned short *kinds,
-		 unsigned int flags, void **depend, int num_teams,
-		 int thread_limit)
+		 unsigned int flags, void **depend, void **args)
 {
   struct gomp_device_descr *devicep = resolve_device (device);
 
-  (void) num_teams;
-  (void) thread_limit;
-
   if (flags & GOMP_TARGET_FLAG_NOWAIT)
     {
       struct gomp_thread *thr = gomp_thread ();
@@ -1413,7 +1467,7 @@ GOMP_target_ext (int device, void (*fn) (void *), size_t mapnum,
 	  && !thr->task->final_task)
 	{
 	  gomp_create_target_task (devicep, fn, mapnum, hostaddrs,
-				   sizes, kinds, flags, depend,
+				   sizes, kinds, flags, depend, args,
 				   GOMP_TARGET_TASK_BEFORE_MAP);
 	  return;
 	}
@@ -1430,20 +1484,33 @@ GOMP_target_ext (int device, void (*fn) (void *), size_t mapnum,
 	gomp_task_maybe_wait_for_dependencies (depend);
     }
 
+  void *fn_addr;
   if (devicep == NULL
-      || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
+      || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
+      || !(fn_addr = gomp_get_target_fn_addr (devicep, fn))
+      || (devicep->can_run_func && !devicep->can_run_func (fn_addr)))
     {
       gomp_target_fallback_firstprivate (fn, mapnum, hostaddrs, sizes, kinds);
       return;
     }
 
-  void *fn_addr = gomp_get_target_fn_addr (devicep, fn);
-
-  struct target_mem_desc *tgt_vars
-    = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, true,
-		     GOMP_MAP_VARS_TARGET);
-  devicep->run_func (devicep->target_id, fn_addr, (void *) tgt_vars->tgt_start);
-  gomp_unmap_vars (tgt_vars, true);
+  struct target_mem_desc *tgt_vars;
+  void *fpc = NULL;
+  if (devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
+    {
+      fpc = gomp_target_unshare_firstprivate (mapnum, hostaddrs, sizes, kinds);
+      tgt_vars = NULL;
+    }
+  else
+    tgt_vars = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds,
+			      true, GOMP_MAP_VARS_TARGET);
+  devicep->run_func (devicep->target_id, fn_addr,
+		     tgt_vars ? (void *) tgt_vars->tgt_start : hostaddrs,
+		     args);
+  if (tgt_vars)
+    gomp_unmap_vars (tgt_vars, true);
+  else
+    free (fpc);
 }
 
 /* Host fallback for GOMP_target_data{,_ext} routines.  */
@@ -1552,7 +1619,7 @@ GOMP_target_update_ext (int device, size_t mapnum, void **hostaddrs,
 	      if (gomp_create_target_task (devicep, (void (*) (void *)) NULL,
 					   mapnum, hostaddrs, sizes, kinds,
 					   flags | GOMP_TARGET_FLAG_UPDATE,
-					   depend, GOMP_TARGET_TASK_DATA))
+					   depend, NULL, GOMP_TARGET_TASK_DATA))
 		return;
 	    }
 	  else
@@ -1673,7 +1740,7 @@ GOMP_target_enter_exit_data (int device, size_t mapnum, void **hostaddrs,
 	    {
 	      if (gomp_create_target_task (devicep, (void (*) (void *)) NULL,
 					   mapnum, hostaddrs, sizes, kinds,
-					   flags, depend,
+					   flags, depend, NULL,
 					   GOMP_TARGET_TASK_DATA))
 		return;
 	    }
@@ -1729,8 +1796,11 @@ gomp_target_task_fn (void *data)
 
   if (ttask->fn != NULL)
     {
+      void *fn_addr;
       if (devicep == NULL
-	  || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
+	  || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
+	  || !(fn_addr = gomp_get_target_fn_addr (devicep, ttask->fn))
+	  || (devicep->can_run_func && !devicep->can_run_func (fn_addr)))
 	{
 	  ttask->state = GOMP_TARGET_TASK_FALLBACK;
 	  gomp_target_fallback_firstprivate (ttask->fn, ttask->mapnum,
@@ -1745,19 +1815,31 @@ gomp_target_task_fn (void *data)
 	  return false;
 	}
 
-      void *fn_addr = gomp_get_target_fn_addr (devicep, ttask->fn);
-      ttask->tgt
-	= gomp_map_vars (devicep, ttask->mapnum, ttask->hostaddrs, NULL,
-			 ttask->sizes, ttask->kinds, true,
-			 GOMP_MAP_VARS_TARGET);
+      void *actual_arguments;
+      if (devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
+	{
+	  ttask->tgt = NULL;
+	  ttask->firstprivate_copies
+	    = gomp_target_unshare_firstprivate (ttask->mapnum, ttask->hostaddrs,
+						ttask->sizes, ttask->kinds);
+	  actual_arguments = ttask->hostaddrs;
+	}
+      else
+	{
+	  ttask->tgt = gomp_map_vars (devicep, ttask->mapnum, ttask->hostaddrs,
+				      NULL, ttask->sizes, ttask->kinds, true,
+				      GOMP_MAP_VARS_TARGET);
+	  actual_arguments = (void *) ttask->tgt->tgt_start;
+	}
       ttask->state = GOMP_TARGET_TASK_READY_TO_RUN;
 
-      devicep->async_run_func (devicep->target_id, fn_addr,
-			       (void *) ttask->tgt->tgt_start, (void *) ttask);
+      devicep->async_run_func (devicep->target_id, fn_addr, actual_arguments,
+			       ttask->args, (void *) ttask);
       return true;
     }
   else if (devicep == NULL
-	   || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
+	   || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
+	   || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
     return false;
 
   size_t i;
@@ -2225,6 +2307,7 @@ gomp_load_plugin_for_device (struct gomp_device_descr *device,
     {
       DLSYM (run);
       DLSYM (async_run);
+      DLSYM_OPT (can_run, can_run);
       DLSYM (dev2dev);
     }
   if (device->capabilities & GOMP_OFFLOAD_CAP_OPENACC_200)
diff --git a/libgomp/task.c b/libgomp/task.c
index 620facd..f3b05e5 100644
--- a/libgomp/task.c
+++ b/libgomp/task.c
@@ -593,7 +593,7 @@ bool
 gomp_create_target_task (struct gomp_device_descr *devicep,
 			 void (*fn) (void *), size_t mapnum, void **hostaddrs,
 			 size_t *sizes, unsigned short *kinds,
-			 unsigned int flags, void **depend,
+			 unsigned int flags, void **depend, void **args,
 			 enum gomp_target_task_state state)
 {
   struct gomp_thread *thr = gomp_thread ();
@@ -653,6 +653,7 @@ gomp_create_target_task (struct gomp_device_descr *devicep,
   ttask->devicep = devicep;
   ttask->fn = fn;
   ttask->mapnum = mapnum;
+  ttask->args = args;
   memcpy (ttask->hostaddrs, hostaddrs, mapnum * sizeof (void *));
   ttask->sizes = (size_t *) &ttask->hostaddrs[mapnum];
   memcpy (ttask->sizes, sizes, mapnum * sizeof (size_t));
diff --git a/liboffloadmic/plugin/libgomp-plugin-intelmic.cpp b/liboffloadmic/plugin/libgomp-plugin-intelmic.cpp
index f8c1725..48599dd 100644
--- a/liboffloadmic/plugin/libgomp-plugin-intelmic.cpp
+++ b/liboffloadmic/plugin/libgomp-plugin-intelmic.cpp
@@ -539,7 +539,7 @@ GOMP_OFFLOAD_dev2dev (int device, void *dst_ptr, const void *src_ptr,
 
 extern "C" void
 GOMP_OFFLOAD_async_run (int device, void *tgt_fn, void *tgt_vars,
-			void *async_data)
+			void **, void *async_data)
 {
   TRACE ("(device = %d, tgt_fn = %p, tgt_vars = %p, async_data = %p)", device,
 	 tgt_fn, tgt_vars, async_data);
@@ -555,7 +555,7 @@ GOMP_OFFLOAD_async_run (int device, void *tgt_fn, void *tgt_vars,
 }
 
 extern "C" void
-GOMP_OFFLOAD_run (int device, void *tgt_fn, void *tgt_vars)
+GOMP_OFFLOAD_run (int device, void *tgt_fn, void *tgt_vars, void **)
 {
   TRACE ("(device = %d, tgt_fn = %p, tgt_vars = %p)", device, tgt_fn, tgt_vars);
 


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]