[OpenACC 7/11] execution model

Jakub Jelinek jakub@redhat.com
Thu Oct 22 09:32:00 GMT 2015


On Wed, Oct 21, 2015 at 03:42:26PM -0400, Nathan Sidwell wrote:
> +/*  Flags for an OpenACC loop.  */
> +
> +enum oacc_loop_flags
> +  {

Weird formatting.  I see either
enum foobarbaz {
  e1 = ...,
  e2 = ...
};
or
enum foobarbaz
{
  e1 = ...,
  e2 = ...
};
styles being used heavily, but not this one.

> +    OLF_SEQ	= 1u << 0,  /* Explicitly sequential  */
> +    OLF_AUTO	= 1u << 1,	/* Compiler chooses axes.  */
> +    OLF_INDEPENDENT = 1u << 2,	/* Iterations are known independent.  */
> +    OLF_GANG_STATIC = 1u << 3,	/* Gang partitioning is static (has op). */
> +
> +    /* Explicitly specified loop axes.  */
> +    OLF_DIM_BASE = 4,
> +    OLF_DIM_GANG   = 1u << (OLF_DIM_BASE + GOMP_DIM_GANG),
> +    OLF_DIM_WORKER = 1u << (OLF_DIM_BASE + GOMP_DIM_WORKER),
> +    OLF_DIM_VECTOR = 1u << (OLF_DIM_BASE + GOMP_DIM_VECTOR),
> +
> +    OLF_MAX = OLF_DIM_BASE + GOMP_DIM_MAX
> +  };
> +

> +  if (checking)
> +    {
> +      if (has_seq && (this_mask || has_auto))
> +	error_at (gimple_location (stmt), "%<seq%> overrides other OpenACC loop specifiers");
> +      else if (has_auto && this_mask)
> +	error_at (gimple_location (stmt), "%<auto%> conflicts with other OpenACC loop specifiers");
> +
> +      if (this_mask & outer_mask)
> +	error_at (gimple_location (stmt), "inner loop uses same  OpenACC parallelism as containing loop");

Too long lines.  Plus 2 spaces into one.
> +	    if (check && OMP_CLAUSE_OPERAND (c, 0))
> +	      error_at (gimple_location (stmt),
> +			"argument not permitted on %<%s%> clause in"

%qs instead of %<%s%> ?

> @@ -5769,6 +5885,166 @@ lower_send_shared_vars (gimple_seq *ilis
>      }
>  }
>  
> +/* Emit an OpenACC head marker call, encapulating the partitioning and
> +   other information that must be processed by the target compiler.
> +   Return the maximum number of dimensions the associated loop might
> +   be partitioned over.  */
> +
> +static unsigned
> +lower_oacc_head_mark (location_t loc, tree clauses,
> +		      gimple_seq *seq, omp_context *ctx)
> +{
> +  unsigned levels = 0;
> +  unsigned tag = 0;
> +  tree gang_static = NULL_TREE;
> +  auto_vec<tree, 1> args;

If you usually push there 3 or 4 arguments, wouldn't it be better to
just use auto_vec<tree, 4> args; instead?

> +  if (gang_static)
> +    {
> +      if (DECL_P  (gang_static))

Formatting, too many spaces.

> +  tree marker = build_int_cst
> +    (integer_type_node, (head ? IFN_UNIQUE_OACC_HEAD_MARK
> +			 : IFN_UNIQUE_OACC_TAIL_MARK));

I really don't like putting the arguments on a different from
the function name, unless you have to.
Here you can easily do say
  enum internal_fn marker_val = head ? IFN_UNIQUE_OACC_HEAD_MARK
				     : IFN_UNIQUE_OACC_TAIL_MARK;
  tree marker = build_int_cst (integer_type_node, marker_val);
same number of lines, easier to read.

> +  gcall *call = gimple_build_call_internal
> +    (IFN_UNIQUE, 1 + (tofollow != NULL_TREE), marker, tofollow);

Similarly.

> +      gcall *fork = gimple_build_call_internal
> +	(IFN_UNIQUE, 2,
> +	 build_int_cst (unsigned_type_node, IFN_UNIQUE_OACC_FORK), place);
> +      gcall *join = gimple_build_call_internal
> +	(IFN_UNIQUE, 2,
> +	 build_int_cst (unsigned_type_node, IFN_UNIQUE_OACC_JOIN), place);

Likewise.  Just use a 
    tree t = build_int_cst (unsigned_type_node, IFN_UNIQUE_OACC_FORK);
    gcall *fork = gimple_build_call_internal (IFN_UNIQUE, 2, t, place);
etc.

> +      expr = build2 (TRUNC_MOD_EXPR, ivar_type, ivar,
> +		     fold_convert (ivar_type, collapse->iters));
> +      expr = build2 (MULT_EXPR, diff_type, fold_convert (diff_type, expr),
> +		     collapse->step);
> +      expr = build2 (plus_code, iter_type, collapse->base,
> +		     fold_convert (plus_type, expr));

Shouldn't these be fold_build2 instead?
Of course Richi would prefer gimple_build, but omp-low.c has already
way too much of fold_build2 + force_gimple_operand_gsi code around
that it is fine with me this way.

>  /* An unduplicable, uncombinable function.  Generally used to preserve
>     a CFG property in the face of jump threading, tail merging or
>     other such optimizations.  The first argument distinguishes
>     between uses.  Other arguments are as needed for use.  The return
>     type depends on use too.  */
>  DEF_INTERNAL_FN (UNIQUE, ECF_NOTHROW | ECF_LEAF, NULL)
>  #define IFN_UNIQUE_UNSPEC 0  /* Undifferentiated UNIQUE.  */
> +
> +/* FORK and JOIN mark the points at which OpenACC partitioned
> +   execution is entered or exited.  They take an INTEGER_CST argument,
> +   indicating the axis of forking or joining and return nothing.  */
> +#define IFN_UNIQUE_OACC_FORK 1
> +#define IFN_UNIQUE_OACC_JOIN 2
> +/* HEAD_MARK and TAIL_MARK are used to demark the sequence entering or
> +   leaving partitioned execution.  */
> +#define IFN_UNIQUE_OACC_HEAD_MARK 3
> +#define IFN_UNIQUE_OACC_TAIL_MARK 4

Shouldn't these be in an enum, to make debugging easier?

	Jakub



More information about the Gcc-patches mailing list