This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: [gomp4.1] depend nowait support for target {update,{enter,exit} data}


On Thu, Oct 15, 2015 at 16:01:56 +0200, Jakub Jelinek wrote:
> On Fri, Oct 02, 2015 at 10:28:01PM +0300, Ilya Verbin wrote:
> > Here is my WIP patch.  target.c part is obviously incorrect, but it demonstrates
> > a possible libgomp <-> plugin interface for running a target task function
> > asynchronously and checking whether it is completed or not.
> > (Refactored liboffloadmic/runtime/emulator from trunk is required to run
> > target-tmp.c testcase.)
> 
> > diff --git a/libgomp/target.c b/libgomp/target.c
> > index 77bd442..31f034c 100644
> > --- a/libgomp/target.c
> > +++ b/libgomp/target.c
> > @@ -45,6 +45,10 @@
> >  #include "plugin-suffix.h"
> >  #endif
> >  
> > +/* FIXME: TMP */
> > +#include <stdio.h>
> > +#include <unistd.h>
> 
> I hope you mean to remove this later on.

Sure, this is just a prototype, not for committing.


> > @@ -1227,6 +1231,44 @@ gomp_target_fallback (void (*fn) (void *), void **hostaddrs)
> >    *thr = old_thr;
> >  }
> >  
> > +/* Host fallback with firstprivate map-type handling.  */
> > +
> > +static void
> > +gomp_target_fallback_firstprivate (void (*fn) (void *), size_t mapnum,
> > +				   void **hostaddrs, size_t *sizes,
> > +				   unsigned short *kinds)
> > +{
> > +  size_t i, tgt_align = 0, tgt_size = 0;
> > +  char *tgt = NULL;
> > +  for (i = 0; i < mapnum; i++)
> > +    if ((kinds[i] & 0xff) == GOMP_MAP_FIRSTPRIVATE)
> > +      {
> > +	size_t align = (size_t) 1 << (kinds[i] >> 8);
> > +	if (tgt_align < align)
> > +	  tgt_align = align;
> > +	tgt_size = (tgt_size + align - 1) & ~(align - 1);
> > +	tgt_size += sizes[i];
> > +      }
> > +  if (tgt_align)
> > +    {
> > +      tgt = gomp_alloca (tgt_size + tgt_align - 1);
> > +      uintptr_t al = (uintptr_t) tgt & (tgt_align - 1);
> > +      if (al)
> > +	tgt += tgt_align - al;
> > +      tgt_size = 0;
> > +      for (i = 0; i < mapnum; i++)
> > +	if ((kinds[i] & 0xff) == GOMP_MAP_FIRSTPRIVATE)
> > +	  {
> > +	    size_t align = (size_t) 1 << (kinds[i] >> 8);
> > +	    tgt_size = (tgt_size + align - 1) & ~(align - 1);
> > +	    memcpy (tgt + tgt_size, hostaddrs[i], sizes[i]);
> > +	    hostaddrs[i] = tgt + tgt_size;
> > +	    tgt_size = tgt_size + sizes[i];
> > +	  }
> > +    }
> > +  gomp_target_fallback (fn, hostaddrs);
> > +}
> 
> This is ok.
> 
> >  /* Helper function of GOMP_target{,_41} routines.  */
> >  
> >  static void *
> > @@ -1311,40 +1353,19 @@ GOMP_target_41 (int device, void (*fn) (void *), size_t mapnum,
> >    if (devicep == NULL
> >        || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
> >      {
> > -      size_t i, tgt_align = 0, tgt_size = 0;
> > -      char *tgt = NULL;
> > -      for (i = 0; i < mapnum; i++)
> > -	if ((kinds[i] & 0xff) == GOMP_MAP_FIRSTPRIVATE)
> > -	  {
> > -	    size_t align = (size_t) 1 << (kinds[i] >> 8);
> > -	    if (tgt_align < align)
> > -	      tgt_align = align;
> > -	    tgt_size = (tgt_size + align - 1) & ~(align - 1);
> > -	    tgt_size += sizes[i];
> > -	  }
> > -      if (tgt_align)
> > -	{
> > -	  tgt = gomp_alloca (tgt_size + tgt_align - 1);
> > -	  uintptr_t al = (uintptr_t) tgt & (tgt_align - 1);
> > -	  if (al)
> > -	    tgt += tgt_align - al;
> > -	  tgt_size = 0;
> > -	  for (i = 0; i < mapnum; i++)
> > -	    if ((kinds[i] & 0xff) == GOMP_MAP_FIRSTPRIVATE)
> > -	      {
> > -		size_t align = (size_t) 1 << (kinds[i] >> 8);
> > -		tgt_size = (tgt_size + align - 1) & ~(align - 1);
> > -		memcpy (tgt + tgt_size, hostaddrs[i], sizes[i]);
> > -		hostaddrs[i] = tgt + tgt_size;
> > -		tgt_size = tgt_size + sizes[i];
> > -	      }
> > -	}
> > -      gomp_target_fallback (fn, hostaddrs);
> > +      gomp_target_fallback_firstprivate (fn, mapnum, hostaddrs, sizes, kinds);
> >        return;
> >      }
> 
> This too.

I will commit this small part to gomp-4_5-branch separately.


> > diff --git a/libgomp/testsuite/libgomp.c/target-tmp.c b/libgomp/testsuite/libgomp.c/target-tmp.c
> > new file mode 100644
> > index 0000000..23a739c
> > --- /dev/null
> > +++ b/libgomp/testsuite/libgomp.c/target-tmp.c
> > @@ -0,0 +1,40 @@
> > +#include <stdio.h>
> > +#include <unistd.h>
> > +
> > +#pragma omp declare target
> > +void foo (int n)
> > +{
> > +  printf ("Start tgt %d\n", n);
> > +  usleep (5000000);
> 
> 5s is too long.  Not to mention that not sure if PTX can do printf
> and especially usleep.

This testcase is also for demonstration only.


> > diff --git a/liboffloadmic/plugin/libgomp-plugin-intelmic.cpp b/liboffloadmic/plugin/libgomp-plugin-intelmic.cpp
> > index 26ac6fe..c843710 100644
> > --- a/liboffloadmic/plugin/libgomp-plugin-intelmic.cpp
> > +++ b/liboffloadmic/plugin/libgomp-plugin-intelmic.cpp
> ...
> > +/* Set of asynchronously running target tasks.  */
> > +static std::set<const void *> *async_tasks;
> > +
> >  /* Thread-safe registration of the main image.  */
> >  static pthread_once_t main_image_is_registered = PTHREAD_ONCE_INIT;
> >  
> > +/* Mutex for protecting async_tasks.  */
> > +static pthread_mutex_t async_tasks_lock = PTHREAD_MUTEX_INITIALIZER;
> > +
> >  static VarDesc vd_host2tgt = {
> >    { 1, 1 },		      /* dst, src			      */
> >    { 1, 0 },		      /* in, out			      */
> > @@ -156,6 +163,8 @@ init (void)
> >  
> >  out:
> >    address_table = new ImgDevAddrMap;
> > +  async_tasks = new std::set<const void *>;
> > +  pthread_mutex_init (&async_tasks_lock, NULL);
> 
> PTHREAD_MUTEX_INITIALIZER should already initialize the lock.
> But, do you really need async_tasks and the lock?  Better store
> something into some plugin's owned field in target_task struct and
> let the plugin callback be passed address of that field rather than the
> whole target_task?

OK, that should work.


> > diff --git a/liboffloadmic/runtime/offload_host.cpp b/liboffloadmic/runtime/offload_host.cpp
> > index 08f626f..8cee12c 100644
> > --- a/liboffloadmic/runtime/offload_host.cpp
> > +++ b/liboffloadmic/runtime/offload_host.cpp
> > @@ -64,6 +64,9 @@ static void __offload_fini_library(void);
> >  #define GET_OFFLOAD_NUMBER(timer_data) \
> >      timer_data? timer_data->offload_number : 0
> >  
> > +extern "C" void
> > +__gomp_offload_intelmic_async_completed (const void *);
> > +
> >  extern "C" {
> >  #ifdef TARGET_WINNT
> >  // Windows does not support imports from libraries without actually
> > @@ -2507,7 +2510,7 @@ extern "C" {
> >          const void *info
> >      )
> >      {
> > -	/* TODO: Call callback function, pass info.  */
> > +	__gomp_offload_intelmic_async_completed (info);
> >      }
> >  }
> 
> Is this for the emul only, or KNL only, or both?

This is for both.  liboffloadmic doesn't know whether target process is running
on real KNL or on host using emul, only underlying libcoi matters.

> In any case, not sure how it works, this is in liboffloadmic.so and
> the function defined in the plugin?

Yes, this is in liboffloadmic_host.so, and the function is defined in the
plugin.  Both are loaded into the host process.
We can replace it by a callback directly into libgomp, if needed.

  -- Ilya


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]