[PATCH] [WIP] openmp: Add OpenMP 5.0 task detach clause support

Jakub Jelinek jakub@redhat.com
Thu Dec 10 14:38:40 GMT 2020


On Wed, Dec 09, 2020 at 05:37:24PM +0000, Kwok Cheung Yeung wrote:
> --- a/gcc/c/c-typeck.c
> +++ b/gcc/c/c-typeck.c
> @@ -14942,6 +14942,11 @@ c_finish_omp_clauses (tree clauses, enum c_omp_region_type ort)
>  	  pc = &OMP_CLAUSE_CHAIN (c);
>  	  continue;
>  
> +	case OMP_CLAUSE_DETACH:
> +	  t = OMP_CLAUSE_DECL (c);
> +	  pc = &OMP_CLAUSE_CHAIN (c);
> +	  continue;
> +

If you wouldn't need to do anything for C for the detach clause, just would
just add:
	case OMP_CLAUSE_DETACH:
at the end of the case list that starts below:
>  	case OMP_CLAUSE_IF:
>  	case OMP_CLAUSE_NUM_THREADS:
>  	case OMP_CLAUSE_NUM_TEAMS:

But you actually do need to do something, even for C.

There are two restrictions:
- At most one detach clause can appear on the directive.
- If a detach clause appears on the directive, then a mergeable clause cannot appear on the same directive.
that should be checked and diagnosed.  One place to do that would be
like usually in all the FEs separately, that would mean adding
  bool mergeable_seen = false, detach_seen = false;
vars and for those clauses setting the *_seen, plus for DETACH
already complain if detach_seen is already true and remove the clause.
And at the end of the loop if mergeable_seen && detach_seen, diagnose
and remove one of them (perhaps better detach clause).
There is the optional second loop that can be used for the removal...

Testcase coverage should include:
  #pragma omp task detach (x) detach (y)
as well as
  #pragma omp task mergeable detach (x)
and
  #pragma omp task detach (x) mergeable
(and likewise for Fortran).

> +  if (cp_lexer_next_token_is_not (parser->lexer, CPP_NAME))
> +    {
> +      cp_parser_error (parser, "expected identifier");
> +      return list;
> +    }
> +
> +  location_t id_loc = cp_lexer_peek_token (parser->lexer)->location;
> +  tree t, identifier = cp_parser_identifier (parser);
> +
> +  if (identifier == error_mark_node)
> +    t = error_mark_node;
> +  else
> +    {
> +      t = cp_parser_lookup_name_simple
> +	    (parser, identifier,
> +	     cp_lexer_peek_token (parser->lexer)->location);
> +      if (t == error_mark_node)
> +	cp_parser_name_lookup_error (parser, identifier, t, NLE_NULL,
> +				     id_loc);

The above doesn't match what cp_parser_omp_var_list_no_open does,
in particular it should use cp_parser_id_expression
instead of cp_parser_identifier etc.

> +      else
> +	{
> +	  tree type = TYPE_MAIN_VARIANT (TREE_TYPE (t));
> +	  if (!INTEGRAL_TYPE_P (type)
> +	      || TREE_CODE (type) != ENUMERAL_TYPE
> +	      || DECL_NAME (TYPE_NAME (type))
> +		   != get_identifier ("omp_event_handle_t"))
> +	    {
> +	      error_at (id_loc, "%<detach%> clause event handle "
> +			    "has type %qT rather than "
> +			    "%<omp_event_handle_t%>",
> +			    type);
> +	      return list;

You can't do this here for C++, it needs to be done in finish_omp_clauses
instead and only be done if the type is not a dependent type.
Consider (e.g. should be in testsuite)
template <typename T>
void
foo ()
{
  T t;
  #pragma omp task detach (t)
  ;
}

template <typename T>
void
bar ()
{
  T t;
  #pragma omp task detach (t)
  ;
}

void
baz ()
{
  foo <omp_event_handle_t> ();
  bar <int> (); // Instantiating this should error
}

> @@ -7394,6 +7394,9 @@ finish_omp_clauses (tree clauses, enum c_omp_region_type ort)
>  		}
>  	    }
>  	  break;
> +	case OMP_CLAUSE_DETACH:
> +	  t = OMP_CLAUSE_DECL (c);
> +	  break;
>  

Again, restriction checking here, plus check the type if it is
non-dependent, otherwise defer that checking for finish_omp_clauses when
it will not be dependent anymore.

I think you need to handle OMP_CLAUSE_DETACH in cp/pt.c too.

> --- a/gcc/gimplify.c
> +++ b/gcc/gimplify.c
> @@ -9733,6 +9733,19 @@ gimplify_scan_omp_clauses (tree *list_p, gimple_seq *pre_p,
>  	    }
>  	  break;
>  
> +	case OMP_CLAUSE_DETACH:
> +	  decl = OMP_CLAUSE_DECL (c);
> +	  if (outer_ctx)
> +	    {
> +	      splay_tree_node on
> +		= splay_tree_lookup (outer_ctx->variables,
> +				     (splay_tree_key)decl);
> +	      if (on == NULL || (on->value & GOVD_DATA_SHARE_CLASS) == 0)
> +		omp_firstprivatize_variable (outer_ctx, decl);
> +	      omp_notice_variable (outer_ctx, decl, true);
> +	    }
> +	  break;

I don't understand this.  My reading of:
"The event-handle will be considered as if it was specified on a
firstprivate clause. The use of a variable in a detach clause expression of a task
construct causes an implicit reference to the variable in all enclosing
constructs."
is that we should do:
      case OMP_CLAUSE_DETACH:
	decl = OMP_CLAUSE_DECL (c);
	goto do_notice;
which does the second sentence, and for the first sentence I believe it
talks about the task construct rather than about the outer construct.
So (again, something for testsuite):
void
foo (void)
{
  omp_event_handle_t t;
  #pragma omp parallel master default (none) /* { dg-error "..." } */
  {
    #pragma omp task detach (t)
    ;
  }
}
The dg-error should be the usual error about t being referenced in the
construct but not specified in the data sharing clauses on parallel.
And then
void
bar (void)
{
  omp_event_handle_t t;
  #pragma omp task detach (t) default (none)
  omp_fullfill_event (t); // This should be ok, above first sentence says
// that it is as if firstprivate (t)
}

But I think it is actually even stronger than that,
  #pragma omp task detach (t) firstprivate (t)
and
  #pragma omp task detach (t) shared (t)
etc. should be invalid too (at least in pedantic reading).
I guess we should ask on omp-lang.  If it actually works as firstprivate
(t), perhaps we should handle it that way already in the FEs.
> --- a/gcc/omp-expand.c
> +++ b/gcc/omp-expand.c
> @@ -762,6 +762,7 @@ expand_task_call (struct omp_region *region, basic_block bb,
>    tree depend = omp_find_clause (clauses, OMP_CLAUSE_DEPEND);
>    tree finalc = omp_find_clause (clauses, OMP_CLAUSE_FINAL);
>    tree priority = omp_find_clause (clauses, OMP_CLAUSE_PRIORITY);
> +  tree detach = omp_find_clause (clauses, OMP_CLAUSE_DETACH);
>  
>    unsigned int iflags
>      = (untied ? GOMP_TASK_FLAG_UNTIED : 0)
> @@ -853,6 +854,11 @@ expand_task_call (struct omp_region *region, basic_block bb,
>      priority = integer_zero_node;
>  
>    gsi = gsi_last_nondebug_bb (bb);
> +
> +  detach = detach
> +      ? fold_convert (pointer_sized_int_node, OMP_CLAUSE_DETACH_EXPR (detach))
> +      : null_pointer_node;

Formatting is wrong, would need to be
  detach = (detach
	    ? fold_convert (pointer_sized_int_node,
			    OMP_CLAUSE_DETACH_EXPR (detach))
	    : ...);
Now, null_pointer_node doesn't have pointer_sized_int type, so there would
be type mismatch.  But additionally, let's talk about how it should be
implemented.  I'd say best would be if:
  #pragma omp task detach (x)
  ;
expands as
  GOMP_task (..., ..., ..., ..., ..., ..., other_flags | GOMP_TASK_FLAG_DETACH, ..., ..., &x);
where GOMP_task allocates the structure for the struct gomp_task the way it
usually does, will have something in that structure include a gomp_sem_t and
and a flag that detach is needed (and if detach is needed gomp_sem_init that
semaphore) and stores to *detach the address of that semaphore or so.
Then you don't need a separate call and allocate something separately.
The GOMP_TASK_FLAG_DETACH is needed, because we keep extending GOMP_task
rather than creating new versions of that, and so to stay (at least on
ABIs I care about) ABI compatible we must not assume detach will have
meaningful value if GOMP_TASK_FLAG_DETACH bit is not set in flags.
I think we can make the 10th argument to BUILT_IN_GOMP_TASK just void *
e.g. to simplify omp-builtins.def and builtin-types.def etc., and just
cast it to omp_eventhandler_t * inside of GOMP_task.

And, when we pass &x to GOMP_task, we need the detach decl to be
addressable, so best in the FEs ensure it is addressable by calling
*mark_addressable there.

> --- a/gcc/tree.c
> +++ b/gcc/tree.c
> @@ -299,6 +299,7 @@ unsigned const char omp_clause_num_ops[] =
>    1, /* OMP_CLAUSE_LINK  */
>    2, /* OMP_CLAUSE_FROM  */
>    2, /* OMP_CLAUSE_TO  */
> +  1, /* OMP_CLAUSE_DETACH  */
>    2, /* OMP_CLAUSE_MAP  */
>    1, /* OMP_CLAUSE_USE_DEVICE_PTR  */
>    1, /* OMP_CLAUSE_USE_DEVICE_ADDR  */

This position for OMP_CLAUSE_DETACH is incorrect (but so is
USE_DEVICE_*/IS_DEVICE*/INCLUSIVE/EXCLUSIVE it seems), given:
#define OMP_CLAUSE_SIZE(NODE)                                           \
  OMP_CLAUSE_OPERAND (OMP_CLAUSE_RANGE_CHECK (OMP_CLAUSE_CHECK (NODE),  \
                                              OMP_CLAUSE_FROM,          \
                                              OMP_CLAUSE__CACHE_), 1)
I think for OMP_CLAUSE_SIZE we just want from/to/map/_cache_ and nothing
else, so I'd move detach/use*/is*/*clusive right before from clause.
Here 3 times and in tree-core.h to match.
We want OMP_CLAUSE_DECL to work for them, which is OMP_CLAUSE_PRIVATE ..
OMP_CLAUSE__SCANTEMP_.  

> +#define OMP_CLAUSE_DETACH_EXPR(NODE) \
> +  OMP_CLAUSE_OPERAND (OMP_CLAUSE_SUBCODE_CHECK (NODE, OMP_CLAUSE_DETACH),0)

No need for that, OMP_CLAUSE_DECL can be used for that.
It is not an expression, but decl anyway.

I don't see tree-nested.c changes, that is needed too.

> @@ -556,6 +557,13 @@
>          end interface
>  
>          interface
> +          subroutine omp_fulfill_event (event)
> +            use omp_lib_kinds
> +            integer (kind=omp_event_handle_kind), value, intent(in) :: event

Too long line?

> diff --git a/libgomp/priority_queue.h b/libgomp/priority_queue.h
> index 0ad78f5..c6fd80d 100644
> --- a/libgomp/priority_queue.h
> +++ b/libgomp/priority_queue.h
> @@ -113,6 +113,8 @@ enum priority_queue_type
>    PQ_IGNORED = 999
>  };
>  
> +typedef bool (*priority_queue_predicate)(struct gomp_task *);

Formatting, space before (.
> +
>  /* Priority queue implementation prototypes.  */
>  
>  extern bool priority_queue_task_in_queue_p (enum priority_queue_type,
> @@ -122,6 +124,9 @@ extern void priority_queue_dump (enum priority_queue_type,
>  				 struct priority_queue *);
>  extern void priority_queue_verify (enum priority_queue_type,
>  				   struct priority_queue *, bool);
> +extern struct gomp_task *priority_queue_find (enum priority_queue_type,
> +					      struct priority_queue *,
> +					      priority_queue_predicate);
>  extern void priority_tree_remove (enum priority_queue_type,
>  				  struct priority_queue *,
>  				  struct priority_node *);
> diff --git a/libgomp/task.c b/libgomp/task.c
> index a95067c..ae1fcf7 100644
> --- a/libgomp/task.c
> +++ b/libgomp/task.c
> @@ -29,6 +29,7 @@
>  #include "libgomp.h"
>  #include <stdlib.h>
>  #include <string.h>
> +#include <stdio.h>
>  #include "gomp-constants.h"
>  
>  typedef struct gomp_task_depend_entry *hash_entry_type;
> @@ -86,6 +87,7 @@ gomp_init_task (struct gomp_task *task, struct gomp_task *parent_task,
>    task->dependers = NULL;
>    task->depend_hash = NULL;
>    task->depend_count = 0;
> +  task->detach_event = NULL;
>  }
>  
>  /* Clean up a task, after completing it.  */
> @@ -326,6 +328,28 @@ gomp_task_handle_depend (struct gomp_task *task, struct gomp_task *parent,
>      }
>  }
>  
> +uintptr_t
> +GOMP_new_event ()
> +{
> +  struct gomp_allow_completion_event *event;
> +
> +  event = (struct gomp_allow_completion_event *)
> +	    gomp_malloc (sizeof (struct gomp_allow_completion_event));
> +  event->fulfilled = false;
> +  gomp_sem_init (&event->completion_sem, 0);
> +
> +  gomp_debug (0, "GOMP_new_event: %p\n", event);
> +
> +  return (uintptr_t) event;
> +}
> +
> +static bool
> +task_fulfilled_p (struct gomp_task *task)
> +{
> +  return __atomic_load_n (&task->detach_event->fulfilled,
> +			  __ATOMIC_RELAXED);
> +}

I don't understand the fulfilled and sem duality, I think there should be
a flag that the flag has detach at all, and then just a semaphore, the
semaphore state itself would indicate if it is fulfilled or not.

> @@ -347,11 +371,14 @@ gomp_task_handle_depend (struct gomp_task *task, struct gomp_task *parent,
>  void
>  GOMP_task (void (*fn) (void *), void *data, void (*cpyfn) (void *, void *),
>  	   long arg_size, long arg_align, bool if_clause, unsigned flags,
> -	   void **depend, int priority)
> +	   void **depend, int priority, uintptr_t detach)
>  {
>    struct gomp_thread *thr = gomp_thread ();
>    struct gomp_team *team = thr->ts.team;
>  
> +  struct gomp_allow_completion_event *detach_event =
> +    detach ? (struct gomp_allow_completion_event *) detach : NULL;

Formatting (no = at the end of line), but as I said above, I'd prefer
to do it differently.

> +void omp_fulfill_event(omp_event_handle_t event)

Formatting.
omp_fulfill_event on separate line from void,
and space before (.

> +{
> +  struct gomp_allow_completion_event *ev =
> +		(struct gomp_allow_completion_event *) event;

Formatting, no = at the end of line and the (struct ... is indented
wierdly.

	Jakub



More information about the Gcc-patches mailing list