This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: [gomp4 10/14] libgomp: arrange a team of pre-started threads via gomp_nvptx_main


On Tue, Oct 20, 2015 at 09:34:32PM +0300, Alexander Monakov wrote:
> diff --git a/libgomp/config/nvptx/team.c b/libgomp/config/nvptx/team.c
> deleted file mode 100644
> index e69de29..0000000
> diff --git a/libgomp/libgomp.h b/libgomp/libgomp.h
> index 1454adf..f25b265 100644
> --- a/libgomp/libgomp.h
> +++ b/libgomp/libgomp.h
> @@ -483,7 +483,15 @@ enum gomp_cancel_kind
>  
>  /* ... and here is that TLS data.  */
>  
> -#if defined HAVE_TLS || defined USE_EMUTLS
> +#if defined __nvptx__
> +extern struct gomp_thread *nvptx_thrs;

What kind of address space is this variable?  It should be
a per-CTA var, so that different teams have different, and
simultaneous target regions have different too.

> +static inline struct gomp_thread *gomp_thread (void)
> +{
> +  int tid;
> +  asm ("mov.u32 %0, %%tid.y;" : "=r" (tid));
> +  return nvptx_thrs + tid;
> +}
> +#elif defined HAVE_TLS || defined USE_EMUTLS

Other than that, yes, such bit of nvptx specific stuff is acceptable here.

>  extern __thread struct gomp_thread gomp_tls_data;
>  static inline struct gomp_thread *gomp_thread (void)
>  {
> diff --git a/libgomp/team.c b/libgomp/team.c
> index 7671b05..5b74532 100644
> --- a/libgomp/team.c
> +++ b/libgomp/team.c
> @@ -30,6 +30,7 @@
>  #include <stdlib.h>
>  #include <string.h>
>  
> +#ifdef LIBGOMP_USE_PTHREADS
>  /* This attribute contains PTHREAD_CREATE_DETACHED.  */
>  pthread_attr_t gomp_thread_attr;
>  
> @@ -43,6 +44,7 @@ __thread struct gomp_thread gomp_tls_data;
>  #else
>  pthread_key_t gomp_tls_key;
>  #endif
> +#endif

I'm surprised that for team.c you chose to adjust the shared source,
rather than copy and remove all the cruft you don't need/want.

That includes the LIBGOMP_USE_PTHREADS guarded parts, all the thread binding
stuff etc.  I'd like to see at least for comparison how much actually
remained in there.

> @@ -58,6 +60,52 @@ struct gomp_thread_start_data
>    bool nested;
>  };
>  
> +#ifdef __nvptx__
> +struct gomp_thread *nvptx_thrs;
> +
> +static struct gomp_thread_pool *gomp_new_thread_pool (void);
> +static void *gomp_thread_start (void *);
> +
> +void __attribute__((kernel))
> +gomp_nvptx_main (void (*fn) (void *), void *fn_data)
> +{
> +  int ntids, tid, laneid;
> +  asm ("mov.u32 %0, %%laneid;" : "=r" (laneid));
> +  if (laneid)
> +    return;
> +  static struct gomp_thread_pool *pool;
> +  asm ("mov.u32 %0, %%tid.y;" : "=r" (tid));
> +  asm ("mov.u32 %0, %%ntid.y;" : "=r"(ntids));
> +  if (tid == 0)
> +    {
> +      gomp_global_icv.nthreads_var = ntids;
> +
> +      nvptx_thrs = gomp_malloc_cleared (ntids * sizeof (*nvptx_thrs));
> +
> +      pool = gomp_new_thread_pool ();
> +      pool->threads = gomp_malloc (ntids * sizeof (*pool->threads));
> +      pool->threads[0] = nvptx_thrs;
> +      pool->threads_size = ntids;
> +      pool->threads_used = ntids;
> +      gomp_barrier_init (&pool->threads_dock, ntids);
> +
> +      nvptx_thrs[0].thread_pool = pool;
> +      asm ("bar.sync 0;");
> +      fn (fn_data);
> +
> +      gomp_free_thread (nvptx_thrs);
> +      free (nvptx_thrs);
> +    }
> +  else
> +    {
> +      struct gomp_thread_start_data tsdata = {0};
> +      tsdata.ts.team_id = tid;
> +      asm ("bar.sync 0;");
> +      tsdata.thread_pool = pool;
> +      gomp_thread_start (&tsdata);
> +    }
> +}
> +#endif

If nvptx is going to use the toplevel team.c, then at least this should not
go in there, it is sufficiently large, and you can just stick it into
config/nvptx/team.c and include the toplevel team.c from there.

> @@ -88,7 +138,8 @@ gomp_thread_start (void *xdata)
>    thr->task = data->task;
>    thr->place = data->place;
>  
> -  thr->ts.team->ordered_release[thr->ts.team_id] = &thr->release;
> +  if (thr->ts.team)
> +    thr->ts.team->ordered_release[thr->ts.team_id] = &thr->release;

Why this?  Lots of gomp_thread_start other places assume thr->ts.team is
non-NULL.  And, this isn't even guarded with __nvptx__, we don't want
to slow down host thread start.

	Jakub


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]