This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
Re: [Patch 2/5] OpenACC tile clause support, omp-low parts
- From: Jakub Jelinek <jakub at redhat dot com>
- To: Chung-Lin Tang <chunglin_tang at mentor dot com>
- Cc: gcc-patches <gcc-patches at gcc dot gnu dot org>, Nathan Sidwell <nathan at acm dot org>, Cesar Philippidis <cesar at codesourcery dot com>
- Date: Fri, 11 Nov 2016 11:00:30 +0100
- Subject: Re: [Patch 2/5] OpenACC tile clause support, omp-low parts
- Authentication-results: sourceware.org; auth=none
- References: <c5f37d54-c921-50c0-6b42-b4b6fa5813b0@mentor.com>
- Reply-to: Jakub Jelinek <jakub at redhat dot com>
On Thu, Nov 10, 2016 at 06:45:10PM +0800, Chung-Lin Tang wrote:
> 2016-XX-XX Nathan Sidwell <nathan@codesourcery.com>
>
> * internal-fn.def (GOACC_DIM_POS): Comment may be overly conservative.
> (GOACC_TILE): New.
> * internal-fn.c (expand_GOACC_TILE): New.
>
> * omp-low.c (struct omp_for_data): Add tiling field.
> (struct oacc_loop): Change 'ifns' to vector of call stmts,
> add e_mask field.
Please avoid using 8 spaces instead of a tab in ChangeLog.
> dump partitioning.
> (oacc_loop_auto_partitions): Add outer_assign parm. Assign all but
> vector partitioning to outer loops. Assign 2 partitions to loops
> when available. Add TILE handling.
> (oacc_loop_partition): Adjust oacc_loop_auto_partitions call.
> (execite_oacc_device_lower): Process GOACC_TILE fns, ignore unknown specs.
Here too. execute instead of execite? And the last line is too long.
> @@ -626,7 +638,8 @@ extract_omp_for_data (gomp_for *for_stmt, struct o
> int cnt = fd->ordered ? fd->ordered : fd->collapse;
> for (i = 0; i < cnt; i++)
> {
> - if (i == 0 && fd->collapse == 1 && (fd->ordered == 0 || loops == NULL))
> + if (i == 0 && fd->collapse == 1 && !fd->tiling
> + && (fd->ordered == 0 || loops == NULL))
> loop = &fd->loop;
> else if (loops != NULL)
> loop = loops + i;
If the condition fits on one line, it can stay as is, if it can't, then
you should use a
if (i == 0
&& fd->collapse == 1
&& !fd->tiling
&& (fd->ordered == 0 || loops == NULL))
IMHO.
> + tree tile = TREE_VALUE (tiling);
> + gcall *call = gimple_build_call_internal
> + (IFN_GOACC_TILE, 5, num, loop_no, tile,
> + /* gwv-outer=*/integer_zero_node,
> + /* gwv-inner=*/integer_zero_node);
I don't really like the ( on separate line unless absolutely necessary.
So better:
gcall *call
= gimple_build_call_internal (IFN_GOACC_TILE, 5, num, loop_no,
tile, integer_zero_node,
integer_zero_node);
> + call = gimple_build_call_internal
> + (IFN_GOACC_LOOP, 7,
> + build_int_cst (integer_type_node, IFN_GOACC_LOOP_OFFSET),
> + dir, e_range, element_s, chunk, e_gwv, chunk);
Similarly. For the build_int_cst argument just add a temporary with
a short name (e.g. t) and initialize it to build_int_cst before the
gimple_build_call_internal.
> + gimple_call_set_lhs (call, e_offset);
> + gimple_set_location (call, loc);
> + gsi_insert_before (&gsi, call, GSI_SAME_STMT);
> +
> + call = gimple_build_call_internal
> + (IFN_GOACC_LOOP, 7,
> + build_int_cst (integer_type_node, IFN_GOACC_LOOP_BOUND),
> + dir, e_range, element_s, chunk, e_gwv, e_offset);
> + gimple_call_set_lhs (call, e_bound);
> + gimple_set_location (call, loc);
> + gsi_insert_before (&gsi, call, GSI_SAME_STMT);
> +
> + call = gimple_build_call_internal
> + (IFN_GOACC_LOOP, 6,
> + build_int_cst (integer_type_node, IFN_GOACC_LOOP_STEP),
> + dir, e_range, element_s, chunk, e_gwv);
And again 2x.
> if (cont_bb)
> {
> - /* We now have one or two nested loops. Update the loop
> + /* We now have one, two or three nested loops. Update the loop
Only one space after , - we use 2 spaces only after full stop.
> @@ -11537,6 +11712,15 @@ expand_oacc_for (struct omp_region *region, struct
> body_loop->header = body_bb;
> body_loop->latch = cont_bb;
> add_loop (body_loop, parent);
> +
> + if (fd->tiling)
> + {
> + // Insert tiling's element loop
Please use /* */ style comment instead, plus full stop:
/* Insert tiling's element loop. */
> + struct loop *inner_loop = alloc_loop ();
> + inner_loop->header = elem_body_bb;
> + inner_loop->latch = elem_cont_bb;
> + add_loop (inner_loop, body_loop);
> +static void
> +oacc_xform_tile (gcall *call)
> +{
> + gimple_stmt_iterator gsi = gsi_for_stmt (call);
> + unsigned collapse = (unsigned) TREE_INT_CST_LOW (gimple_call_arg (call, 0));
> + /* Inner loops have higher loop_nos. */
> + unsigned loop_no = (unsigned) TREE_INT_CST_LOW (gimple_call_arg (call, 1));
> + tree tile_size = gimple_call_arg (call, 2);
> + unsigned e_mask = (unsigned) TREE_INT_CST_LOW (gimple_call_arg (call, 4));
Please use
unsigned collapse = tree_to_uhwi (gimple_call_arg (call, 0));
etc. instead.
> + tree lhs = gimple_call_lhs (call);
> + tree type = TREE_TYPE (lhs);
> + gimple_seq seq = NULL;
> + tree span = build_int_cst (type, 1);
> +
> + gcc_assert (!(e_mask
> + & ~(GOMP_DIM_MASK (GOMP_DIM_VECTOR)
> + | GOMP_DIM_MASK (GOMP_DIM_WORKER))));
> + push_gimplify_context (!seen_error ());
> + if (
> +#ifndef ACCEL_COMPILER
> + 1 ||
> +#endif
> + !e_mask)
We don't want || at the end of line.
Perhaps better
#ifndef ACCEL_COMPILER
e_mask = 0;
#endif
if (!e_mask)
?
> + switch (gimple_call_internal_fn (call))
> + {
> + case IFN_GOACC_LOOP:
> + {
> + bool is_e = gimple_call_arg (call, 5) == integer_minus_one_node;
> + *gimple_call_arg_ptr (call, 5) = is_e ? e_mask_arg : mask_arg;
> + if (!is_e)
> + *gimple_call_arg_ptr (call, 4) = chunk_arg;
> + }
> + break;
>
> + case IFN_GOACC_TILE:
> + *gimple_call_arg_ptr (call, 3) = mask_arg;
> + *gimple_call_arg_ptr (call, 4) = e_mask_arg;
> + break;
Please use
gimple_call_set_arg (call, idx, val)
instead of
*gimple_call_arg_ptr (call, idx) = val
> + /* Apply auto partitioning if this is a non-partitioned regular
> + loop, or (no more than) single axis tiled loop. */
> + bool maybe_auto = !seq_par
> + && this_mask == (tiling ? this_mask & -this_mask : 0);
Wrong formatting, the && must go below !seq or better there should be ()
around the whole rhs. Or maybe even better
bool maybe_auto
= !seq_par && this_mask == (tiling ? this_mask & -this_mask : 0);
> if (loop->child)
> {
> - loop->inner = oacc_loop_fixed_partitions (loop->child,
> - outer_mask | this_mask);
> + loop->inner = oacc_loop_fixed_partitions
> + (loop->child, outer_mask | this_mask | loop->e_mask);
See above for ( on next line. Perhaps introduce a temporary holding
outer_mask | this_mask | loop->e_mask
or similar.
> - if (assign && !loop->mask)
> + if (loop->child)
> + loop->inner = oacc_loop_auto_partitions
> + (loop->child, outer_mask | loop->mask | loop->e_mask,
> + outer_assign | assign);
Again.
> /* Determine the outermost partitioning used within this loop. */
> this_mask = loop->inner | GOMP_DIM_MASK (GOMP_DIM_MAX);
> this_mask = least_bit_hwi (this_mask);
>
> /* Pick the partitioning just inside that one. */
> this_mask >>= 1;
> -
> /* And avoid picking one use by an outer loop. */
> this_mask &= ~outer_mask;
Why remove the empty line?
Jakub