This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: [Patch 2/5] OpenACC tile clause support, omp-low parts


On Thu, Nov 10, 2016 at 06:45:10PM +0800, Chung-Lin Tang wrote:
> 2016-XX-XX  Nathan Sidwell  <nathan@codesourcery.com>
> 
>         * internal-fn.def (GOACC_DIM_POS): Comment may be overly conservative.
>         (GOACC_TILE): New.
>         * internal-fn.c (expand_GOACC_TILE): New.
> 
>         * omp-low.c (struct omp_for_data): Add tiling field.
>         (struct oacc_loop): Change 'ifns' to vector of call stmts,
>         add e_mask field.

Please avoid using 8 spaces instead of a tab in ChangeLog.

> 	dump partitioning.
> 	(oacc_loop_auto_partitions): Add outer_assign parm. Assign all but
>         vector partitioning to outer loops.  Assign 2 partitions to loops
>         when available. Add TILE handling.
>         (oacc_loop_partition): Adjust oacc_loop_auto_partitions call.
>         (execite_oacc_device_lower): Process GOACC_TILE fns, ignore unknown specs.

Here too.  execute instead of execite?  And the last line is too long.

> @@ -626,7 +638,8 @@ extract_omp_for_data (gomp_for *for_stmt, struct o
>    int cnt = fd->ordered ? fd->ordered : fd->collapse;
>    for (i = 0; i < cnt; i++)
>      {
> -      if (i == 0 && fd->collapse == 1 && (fd->ordered == 0 || loops == NULL))
> +      if (i == 0 && fd->collapse == 1 && !fd->tiling
> +	  && (fd->ordered == 0 || loops == NULL))
>  	loop = &fd->loop;
>        else if (loops != NULL)
>  	loop = loops + i;

If the condition fits on one line, it can stay as is, if it can't, then
you should use a
	if (i == 0
	    && fd->collapse == 1
	    && !fd->tiling
	    && (fd->ordered == 0 || loops == NULL))
IMHO.

> +	  tree tile = TREE_VALUE (tiling);
> +	  gcall *call = gimple_build_call_internal
> +	    (IFN_GOACC_TILE, 5, num, loop_no, tile,
> +	     /* gwv-outer=*/integer_zero_node,
> +	     /* gwv-inner=*/integer_zero_node);

I don't really like the ( on separate line unless absolutely necessary.
So better:

	  gcall *call
	    = gimple_build_call_internal (IFN_GOACC_TILE, 5, num, loop_no,
					  tile, integer_zero_node,
					  integer_zero_node);

> +	  call = gimple_build_call_internal
> +	    (IFN_GOACC_LOOP, 7,
> +	     build_int_cst (integer_type_node, IFN_GOACC_LOOP_OFFSET),
> +	     dir, e_range, element_s, chunk, e_gwv, chunk);

Similarly.  For the build_int_cst argument just add a temporary with
a short name (e.g. t) and initialize it to build_int_cst before the
gimple_build_call_internal.

> +	  gimple_call_set_lhs (call, e_offset);
> +	  gimple_set_location (call, loc);
> +	  gsi_insert_before (&gsi, call, GSI_SAME_STMT);
> +
> +	  call = gimple_build_call_internal
> +	    (IFN_GOACC_LOOP, 7,
> +	     build_int_cst (integer_type_node, IFN_GOACC_LOOP_BOUND),
> +	     dir, e_range, element_s, chunk, e_gwv, e_offset);
> +	  gimple_call_set_lhs (call, e_bound);
> +	  gimple_set_location (call, loc);
> +	  gsi_insert_before (&gsi, call, GSI_SAME_STMT);
> +
> +	  call = gimple_build_call_internal
> +	    (IFN_GOACC_LOOP, 6,
> +	     build_int_cst (integer_type_node, IFN_GOACC_LOOP_STEP),
> +	     dir, e_range, element_s, chunk, e_gwv);

And again 2x.

>    if (cont_bb)
>      {
> -      /* We now have one or two nested loops.  Update the loop
> +      /* We now have one,  two or three nested loops.  Update the loop

Only one space after , - we use 2 spaces only after full stop.

> @@ -11537,6 +11712,15 @@ expand_oacc_for (struct omp_region *region, struct
>  	  body_loop->header = body_bb;
>  	  body_loop->latch = cont_bb;
>  	  add_loop (body_loop, parent);
> +
> +	  if (fd->tiling)
> +	    {
> +	      // Insert tiling's element loop

Please use /* */ style comment instead, plus full stop:
	      /* Insert tiling's element loop.  */
> +	      struct loop *inner_loop = alloc_loop ();
> +	      inner_loop->header = elem_body_bb;
> +	      inner_loop->latch = elem_cont_bb;
> +	      add_loop (inner_loop, body_loop);

> +static void
> +oacc_xform_tile (gcall *call)
> +{
> +  gimple_stmt_iterator gsi = gsi_for_stmt (call);
> +  unsigned collapse = (unsigned) TREE_INT_CST_LOW (gimple_call_arg (call, 0));
> +  /* Inner loops have higher loop_nos.  */
> +  unsigned loop_no = (unsigned) TREE_INT_CST_LOW (gimple_call_arg (call, 1));
> +  tree tile_size = gimple_call_arg (call, 2);
> +  unsigned e_mask = (unsigned) TREE_INT_CST_LOW (gimple_call_arg (call, 4));

Please use
  unsigned collapse = tree_to_uhwi (gimple_call_arg (call, 0));
etc. instead.

> +  tree lhs = gimple_call_lhs (call);
> +  tree type = TREE_TYPE (lhs);
> +  gimple_seq seq = NULL;
> +  tree span = build_int_cst (type, 1);
> +
> +  gcc_assert (!(e_mask
> +		& ~(GOMP_DIM_MASK (GOMP_DIM_VECTOR)
> +		    | GOMP_DIM_MASK (GOMP_DIM_WORKER))));
> +  push_gimplify_context (!seen_error ());
> +  if (
> +#ifndef ACCEL_COMPILER
> +      1 ||
> +#endif
> +      !e_mask)

We don't want || at the end of line.
Perhaps better
#ifndef ACCEL_COMPILER
  e_mask = 0;
#endif
  if (!e_mask)
?

> +	switch (gimple_call_internal_fn (call))
> +	  {
> +	  case IFN_GOACC_LOOP:
> +	    {
> +	      bool is_e = gimple_call_arg (call, 5) == integer_minus_one_node;
> +	      *gimple_call_arg_ptr (call, 5) = is_e ? e_mask_arg : mask_arg;
> +	      if (!is_e)
> +		*gimple_call_arg_ptr (call, 4) = chunk_arg;
> +	    }
> +	    break;
>  
> +	  case IFN_GOACC_TILE:
> +	    *gimple_call_arg_ptr (call, 3) = mask_arg;
> +	    *gimple_call_arg_ptr (call, 4) = e_mask_arg;
> +	    break;

Please use
gimple_call_set_arg (call, idx, val)
instead of
*gimple_call_arg_ptr (call, idx) = val

> +      /* Apply auto partitioning if this is a non-partitioned regular
> +	 loop, or (no more than) single axis tiled loop.  */
> +      bool maybe_auto = !seq_par
> +	&& this_mask == (tiling ? this_mask & -this_mask : 0);

Wrong formatting, the && must go below !seq or better there should be ()
around the whole rhs.  Or maybe even better
      bool maybe_auto
	= !seq_par && this_mask == (tiling ? this_mask & -this_mask : 0);

>    if (loop->child)
>      {
> -      loop->inner = oacc_loop_fixed_partitions (loop->child,
> -						outer_mask | this_mask); 
> +      loop->inner = oacc_loop_fixed_partitions
> +	(loop->child, outer_mask | this_mask | loop->e_mask); 

See above for ( on next line.  Perhaps introduce a temporary holding
outer_mask | this_mask | loop->e_mask
or similar.

> -  if (assign && !loop->mask)
> +  if (loop->child)
> +    loop->inner = oacc_loop_auto_partitions
> +      (loop->child, outer_mask | loop->mask | loop->e_mask,
> +       outer_assign | assign);

Again.

>        /* Determine the outermost partitioning used within this loop. */
>        this_mask = loop->inner | GOMP_DIM_MASK (GOMP_DIM_MAX);
>        this_mask = least_bit_hwi (this_mask);
>  
>        /* Pick the partitioning just inside that one.  */
>        this_mask >>= 1;
> -
>        /* And avoid picking one use by an outer loop. */
>        this_mask &= ~outer_mask;

Why remove the empty line?

	Jakub


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]