This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[hsa 2/12] Modifications to libgomp proper


Hi,

The patch below contains all changes to libgomp files.  First, it adds
a new constant identifying HSA devices and a structure that is shared
between libgomp and the compiler when kernels from kernels are invoked
via dynamic parallelism.

Second it modifies the GOMP_target_41 function so that it also can take
kernel attributes (essentially the grid dimension) as a parameter and
pass it on the HSA libgomp plugin.  Because we do want HSAIL
generation to gracefully fail and use host fallback in that case, the
same function calls the host implementation if it cannot map the
requested function to an accelerated one or of a new callback
can_run_func indicates there is a problem.

We need a new hook because we use it to check for linking errors which
we cannot do when incrementally loading registered images.  And we
want to handle linking errors, so that when we cannot emit HSAIL for a
function called from a kernel (possibly in a different compilation
unit), we also resort to host fallback.

Last but not least, the patch removes data remapping when the selected
device is capable of sharing memory with the host.

Thanks,

Martin


2015-11-02  Martin Jambor  <mjambor@suse.cz>
	    Martin Liska  <mliska@suse.cz>

include/
	* gomp-constants.h (GOMP_DEVICE_HSA): New macro.

libgomp/
	* libgomp-plugin.h (offload_target_type): New element
	OFFLOAD_TARGET_TYPE_HSA.
	* libgomp.h (gomp_device_descr): Extra parameter of run_func, new
	field can_run_func.
	* libgomp_g.h (GOMP_target_41): Add an extra parameter.
	* oacc-host.c (host_run): Add an extra unused parameter.
	* target.c (gomp_get_target_fn_addr): Allow failure if device shares
	memory.
	(GOMP_target): Assert failure did not happen.  Add extra parameter to
	call of run_func.
	(GOMP_target_41): Added an extra parameter, pass it to run_func.
	Allow host fallback if device shares memory.  Do not remap data if
	device has shared memory.
	(GOMP_target_data_41): Run host fallback if device has shared memory.
	(gomp_load_plugin_for_device): Also attempt to load can_run_func.


diff --git a/include/gomp-constants.h b/include/gomp-constants.h
index f834dec..46d52b3 100644
--- a/include/gomp-constants.h
+++ b/include/gomp-constants.h
@@ -160,6 +160,7 @@ enum gomp_map_kind
 #define GOMP_DEVICE_NOT_HOST		4
 #define GOMP_DEVICE_NVIDIA_PTX		5
 #define GOMP_DEVICE_INTEL_MIC		6
+#define GOMP_DEVICE_HSA			7
 
 #define GOMP_DEVICE_ICV			-1
 #define GOMP_DEVICE_HOST_FALLBACK	-2
@@ -212,4 +213,35 @@ enum gomp_map_kind
 #define GOMP_LAUNCH_OP(X) (((X) >> GOMP_LAUNCH_OP_SHIFT) & 0xffff)
 #define GOMP_LAUNCH_OP_MAX 0xffff
 
+/* HSA specific data structures.  */
+
+/* HSA kernel dispatch is collection of information needed for
+   a kernel dispatch.  */
+
+struct hsa_kernel_dispatch
+{
+  /* Pointer to a command queue associated with a kernel dispatch agent.  */
+  void *queue;
+  /* Pointer to reserved memory for OMP data struct copying.  */
+  void *omp_data_memory;
+  /* Pointer to a memory space used for kernel arguments passing.  */
+  void *kernarg_address;
+  /* Kernel object.  */
+  uint64_t object;
+  /* Synchronization signal used for dispatch synchronization.  */
+  uint64_t signal;
+  /* Private segment size.  */
+  uint32_t private_segment_size;
+  /* Group segment size.  */
+  uint32_t group_segment_size;
+  /* Number of children kernel dispatches.  */
+  uint64_t kernel_dispatch_count;
+  /* Number of threads.  */
+  uint32_t omp_num_threads;
+  /* Debug purpose argument.  */
+  uint64_t debug;
+  /* Kernel dispatch structures created for children kernel dispatches.  */
+  struct hsa_kernel_dispatch **children_dispatches;
+};
+
 #endif
diff --git a/libgomp/libgomp-plugin.h b/libgomp/libgomp-plugin.h
index 24fbb94..acf6eb7 100644
--- a/libgomp/libgomp-plugin.h
+++ b/libgomp/libgomp-plugin.h
@@ -48,7 +48,8 @@ enum offload_target_type
   OFFLOAD_TARGET_TYPE_HOST = 2,
   /* OFFLOAD_TARGET_TYPE_HOST_NONSHM = 3 removed.  */
   OFFLOAD_TARGET_TYPE_NVIDIA_PTX = 5,
-  OFFLOAD_TARGET_TYPE_INTEL_MIC = 6
+  OFFLOAD_TARGET_TYPE_INTEL_MIC = 6,
+  OFFLOAD_TARGET_TYPE_HSA = 7
 };
 
 /* Auxiliary struct, used for transferring pairs of addresses from plugin
diff --git a/libgomp/libgomp.h b/libgomp/libgomp.h
index 9c8b1fb..0ad42d2 100644
--- a/libgomp/libgomp.h
+++ b/libgomp/libgomp.h
@@ -876,7 +876,8 @@ struct gomp_device_descr
   void *(*dev2host_func) (int, void *, const void *, size_t);
   void *(*host2dev_func) (int, void *, const void *, size_t);
   void *(*dev2dev_func) (int, void *, const void *, size_t);
-  void (*run_func) (int, void *, void *);
+  void (*run_func) (int, void *, void *, const void *);
+  bool (*can_run_func) (void *);
 
   /* Splay tree containing information about mapped memory regions.  */
   struct splay_tree_s mem_map;
diff --git a/libgomp/libgomp_g.h b/libgomp/libgomp_g.h
index c28ad21..adb9bcc 100644
--- a/libgomp/libgomp_g.h
+++ b/libgomp/libgomp_g.h
@@ -250,7 +250,8 @@ extern void GOMP_single_copy_end (void *);
 extern void GOMP_target (int, void (*) (void *), const void *,
 			 size_t, void **, size_t *, unsigned char *);
 extern void GOMP_target_41 (int, void (*) (void *), size_t, void **, size_t *,
-			  unsigned short *, unsigned int, void **);
+			    unsigned short *, unsigned int, void **,
+			    const void *);
 extern void GOMP_target_data (int, const void *,
 			      size_t, void **, size_t *, unsigned char *);
 extern void GOMP_target_data_41 (int, size_t, void **, size_t *,
diff --git a/libgomp/oacc-host.c b/libgomp/oacc-host.c
index 8e4ba04..c0c4d52 100644
--- a/libgomp/oacc-host.c
+++ b/libgomp/oacc-host.c
@@ -123,7 +123,8 @@ host_host2dev (int n __attribute__ ((unused)),
 }
 
 static void
-host_run (int n __attribute__ ((unused)), void *fn_ptr, void *vars)
+host_run (int n __attribute__ ((unused)), void *fn_ptr, void *vars,
+	  const void* kern_launch __attribute__ ((unused)))
 {
   void (*fn)(void *) = (void (*)(void *)) fn_ptr;
 
diff --git a/libgomp/target.c b/libgomp/target.c
index b767410..404faa4 100644
--- a/libgomp/target.c
+++ b/libgomp/target.c
@@ -1248,7 +1248,12 @@ gomp_get_target_fn_addr (struct gomp_device_descr *devicep,
       splay_tree_key tgt_fn = splay_tree_lookup (&devicep->mem_map, &k);
       gomp_mutex_unlock (&devicep->lock);
       if (tgt_fn == NULL)
-	gomp_fatal ("Target function wasn't mapped");
+	{
+	  if (devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
+	    return NULL;
+	  else
+	    gomp_fatal ("Target function wasn't mapped");
+	}
 
       return (void *) tgt_fn->tgt_offset;
     }
@@ -1276,6 +1281,7 @@ GOMP_target (int device, void (*fn) (void *), const void *unused,
     return gomp_target_fallback (fn, hostaddrs);
 
   void *fn_addr = gomp_get_target_fn_addr (devicep, fn);
+  assert (fn_addr);
 
   struct target_mem_desc *tgt_vars
     = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, false,
@@ -1288,7 +1294,8 @@ GOMP_target (int device, void (*fn) (void *), const void *unused,
       thr->place = old_thr.place;
       thr->ts.place_partition_len = gomp_places_list_len;
     }
-  devicep->run_func (devicep->target_id, fn_addr, (void *) tgt_vars->tgt_start);
+  devicep->run_func (devicep->target_id, fn_addr, (void *) tgt_vars->tgt_start,
+		     NULL);
   gomp_free_thread (thr);
   *thr = old_thr;
   gomp_unmap_vars (tgt_vars, true);
@@ -1297,7 +1304,7 @@ GOMP_target (int device, void (*fn) (void *), const void *unused,
 void
 GOMP_target_41 (int device, void (*fn) (void *), size_t mapnum,
 		void **hostaddrs, size_t *sizes, unsigned short *kinds,
-		unsigned int flags, void **depend)
+		unsigned int flags, void **depend, const void *kernel_launch)
 {
   struct gomp_device_descr *devicep = resolve_device (device);
 
@@ -1312,8 +1319,16 @@ GOMP_target_41 (int device, void (*fn) (void *), size_t mapnum,
 	gomp_task_maybe_wait_for_dependencies (depend);
     }
 
+  void *fn_addr = NULL;
+  bool host_fallback = false;
   if (devicep == NULL
-      || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
+      || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
+      || !(fn_addr = gomp_get_target_fn_addr (devicep, fn))
+      || (devicep->can_run_func && !devicep->can_run_func (fn_addr)))
+    host_fallback = true;
+
+  if (host_fallback
+      || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
     {
       size_t i, tgt_align = 0, tgt_size = 0;
       char *tgt = NULL;
@@ -1343,15 +1358,20 @@ GOMP_target_41 (int device, void (*fn) (void *), size_t mapnum,
 		tgt_size = tgt_size + sizes[i];
 	      }
 	}
-      gomp_target_fallback (fn, hostaddrs);
-      return;
-    }
 
-  void *fn_addr = gomp_get_target_fn_addr (devicep, fn);
+      if (host_fallback)
+	{
+	  gomp_target_fallback (fn, hostaddrs);
+	  return;
+	}
+    }
 
-  struct target_mem_desc *tgt_vars
-    = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, true,
-		     GOMP_MAP_VARS_TARGET);
+  struct target_mem_desc *tgt_vars;
+  if (devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
+    tgt_vars = NULL;
+  else
+    tgt_vars = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds,
+			      true, GOMP_MAP_VARS_TARGET);
   struct gomp_thread old_thr, *thr = gomp_thread ();
   old_thr = *thr;
   memset (thr, '\0', sizeof (*thr));
@@ -1360,10 +1380,13 @@ GOMP_target_41 (int device, void (*fn) (void *), size_t mapnum,
       thr->place = old_thr.place;
       thr->ts.place_partition_len = gomp_places_list_len;
     }
-  devicep->run_func (devicep->target_id, fn_addr, (void *) tgt_vars->tgt_start);
+  devicep->run_func (devicep->target_id, fn_addr,
+		     tgt_vars ? (void *) tgt_vars->tgt_start : hostaddrs,
+		     kernel_launch);
   gomp_free_thread (thr);
   *thr = old_thr;
-  gomp_unmap_vars (tgt_vars, true);
+  if (tgt_vars)
+    gomp_unmap_vars (tgt_vars, true);
 }
 
 /* Host fallback for GOMP_target_data{,_41} routines.  */
@@ -1393,6 +1416,7 @@ GOMP_target_data (int device, const void *unused, size_t mapnum,
   struct gomp_device_descr *devicep = resolve_device (device);
 
   if (devicep == NULL
+      || (devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
       || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
     return gomp_target_data_fallback ();
 
@@ -2112,6 +2136,7 @@ gomp_load_plugin_for_device (struct gomp_device_descr *device,
   if (device->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
     {
       DLSYM (run);
+      DLSYM_OPT (can_run, can_run);
       DLSYM (dev2dev);
     }
   if (device->capabilities & GOMP_OFFLOAD_CAP_OPENACC_200)


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]