This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
Re: [PING][PATCH, 2/2][PR66642] Add empty loop exit block in transform_to_exit_first_loop_alt
- From: Richard Biener <rguenther at suse dot de>
- To: Tom de Vries <Tom_deVries at mentor dot com>
- Cc: "gcc-patches at gnu dot org" <gcc-patches at gnu dot org>
- Date: Mon, 6 Jul 2015 15:44:58 +0200 (CEST)
- Subject: Re: [PING][PATCH, 2/2][PR66642] Add empty loop exit block in transform_to_exit_first_loop_alt
- Authentication-results: sourceware.org; auth=none
- References: <558BB12B dot 7060108 at mentor dot com> <559A54C2 dot 7090604 at mentor dot com>
On Mon, 6 Jul 2015, Tom de Vries wrote:
> On 25/06/15 09:43, Tom de Vries wrote:
> > Hi,
> >
> > I ran into a failure with parloops for reduction loop testcase
> > libgomp/testsuite/libgomp.c/parloops-exit-first-loop-alt-3.c. When we
> > exercise the low iteration count loop, the test-case fails.
> >
> > To understand the problem, let's first look at what happens when we use
> > transform_to_exit_first_loop (the original one) instead of
> > transform_to_exit_first_loop_alt (the alternative one, which is
> > currently used, and causing the failure).
> >
> > Before transform_to_exit_first_loop, the low iteration count loop and
> > the main loop share the loop exit block. After
> > transform_to_exit_first_loop, that's not the case anymore, the main loop
> > now has an exit block with a single predecessor. Subsequently,
> > separate_decls_in_region inserts code in the main loop exit block, which
> > is only triggered upon exit of the main loop.
> >
> > However, transform_to_exit_first_loop_alt does not insert such an exit
> > block, and the code inserted by separate_decls_in_region is also active
> > for the low iteration count loop, which results in an incorrect
> > reduction result when the low iteration count loop is used.
> >
> >
> > This patch fixes the problem by making sure
> > transform_to_exit_first_loop_alt adds a new exit block inbetween the
> > main loop header and the old exit block.
> >
> >
> > Bootstrapped and reg-tested on x86_64.
> >
> > OK for trunk?
> >
>
> Ping.
Ok.
Thanks,
Richard.
> Thanks,
> - Tom
>
> > 0002-Add-empty-loop-exit-block-in-transform_to_exit_first.patch
> >
> >
> > Add empty loop exit block in transform_to_exit_first_loop_alt
> >
> > 2015-06-24 Tom de Vries<tom@codesourcery.com>
> >
> > PR tree-optimization/66642
> > * tree-parloops.c (transform_to_exit_first_loop_alt): Update function
> > header comment. Rename split_edge variable to edge_at_split. Split
> > exit edge to create new loop exit bb. Insert loop exit phis in new
> > loop
> > exit bb.
> >
> > * testsuite/libgomp.c/parloops-exit-first-loop-alt-3.c (main): Test
> > low
> > iteration count case.
> > * testsuite/libgomp.c/parloops-exit-first-loop-alt.c (init): New
> > function, factor out of ...
> > (main): ... here. Test low iteration count case.
> > ---
> > gcc/tree-parloops.c | 45
> > ++++++++++++++++------
> > .../libgomp.c/parloops-exit-first-loop-alt-3.c | 5 +++
> > .../libgomp.c/parloops-exit-first-loop-alt.c | 28 +++++++++++++-
> > 3 files changed, 64 insertions(+), 14 deletions(-)
> >
> > diff --git a/gcc/tree-parloops.c b/gcc/tree-parloops.c
> > index df7c351..6c8aaab 100644
> > --- a/gcc/tree-parloops.c
> > +++ b/gcc/tree-parloops.c
> > @@ -1522,7 +1522,7 @@ replace_uses_in_bb_by (tree name, tree val,
> > basic_block bb)
> > goto <bb header>
> >
> > <bb exit>:
> > - sum_z = PHI <sum_b (cond[1])>
> > + sum_z = PHI <sum_b (cond[1]), ...>
> >
> > [1] Where <bb cond> is single_pred (bb latch); In the simplest case,
> > that's <bb header>.
> > @@ -1549,14 +1549,17 @@ replace_uses_in_bb_by (tree name, tree val,
> > basic_block bb)
> > if (ivtmp_c < n + 1)
> > goto <bb header>;
> > else
> > - goto <bb exit>;
> > + goto <bb newexit>;
> >
> > <bb latch>:
> > ivtmp_b = ivtmp_a + 1;
> > goto <bb newheader>
> >
> > + <bb newexit>:
> > + sum_y = PHI <sum_c (newheader)>
> > +
> > <bb exit>:
> > - sum_z = PHI <sum_c (newheader)>
> > + sum_z = PHI <sum_y (newexit), ...>
> >
> >
> > In unified diff format:
> > @@ -1593,9 +1596,12 @@ replace_uses_in_bb_by (tree name, tree val,
> > basic_block bb)
> > - goto <bb header>
> > + goto <bb newheader>
> >
> > ++ <bb newexit>:
> > ++ sum_y = PHI <sum_c (newheader)>
> > +
> > <bb exit>:
> > -- sum_z = PHI <sum_b (cond[1])>
> > -+ sum_z = PHI <sum_c (newheader)>
> > +- sum_z = PHI <sum_b (cond[1]), ...>
> > ++ sum_z = PHI <sum_y (newexit), ...>
> >
> > Note: the example does not show any virtual phis, but these are handled
> > more
> > or less as reductions.
> > @@ -1626,7 +1632,7 @@ transform_to_exit_first_loop_alt (struct loop *loop,
> >
> > /* Create the new_header block. */
> > basic_block new_header = split_block_before_cond_jump (exit->src);
> > - edge split_edge = single_pred_edge (new_header);
> > + edge edge_at_split = single_pred_edge (new_header);
> >
> > /* Redirect entry edge to new_header. */
> > edge entry = loop_preheader_edge (loop);
> > @@ -1643,9 +1649,9 @@ transform_to_exit_first_loop_alt (struct loop *loop,
> > e = redirect_edge_and_branch (post_cond_edge, header);
> > gcc_assert (e == post_cond_edge);
> >
> > - /* Redirect split_edge to latch. */
> > - e = redirect_edge_and_branch (split_edge, latch);
> > - gcc_assert (e == split_edge);
> > + /* Redirect edge_at_split to latch. */
> > + e = redirect_edge_and_branch (edge_at_split, latch);
> > + gcc_assert (e == edge_at_split);
> >
> > /* Set the new loop bound. */
> > gimple_cond_set_rhs (cond_stmt, bound);
> > @@ -1697,21 +1703,36 @@ transform_to_exit_first_loop_alt (struct loop *loop,
> > /* Set the latch arguments of the new phis to ivtmp/sum_b. */
> > flush_pending_stmts (post_inc_edge);
> >
> > - /* Register the reduction exit phis. */
> > + /* Create a new empty exit block, inbetween the new loop header and the
> > old
> > + exit block. The function separate_decls_in_region needs this block to
> > + insert code that is active on loop exit, but not any other path. */
> > + basic_block new_exit_block = split_edge (exit);
> > +
> > + /* Insert and register the reduction exit phis. */
> > for (gphi_iterator gsi = gsi_start_phis (exit_block);
> > !gsi_end_p (gsi);
> > gsi_next (&gsi))
> > {
> > gphi *phi = gsi.phi ();
> > tree res_z = PHI_RESULT (phi);
> > +
> > + /* Now that we have a new exit block, duplicate the phi of the old
> > exit
> > + block in the new exit block to preserve loop-closed ssa. */
> > + edge succ_new_exit_block = single_succ_edge (new_exit_block);
> > + edge pred_new_exit_block = single_pred_edge (new_exit_block);
> > + tree res_y = copy_ssa_name (res_z, phi);
> > + gphi *nphi = create_phi_node (res_y, new_exit_block);
> > + tree res_c = PHI_ARG_DEF_FROM_EDGE (phi, succ_new_exit_block);
> > + add_phi_arg (nphi, res_c, pred_new_exit_block, UNKNOWN_LOCATION);
> > + add_phi_arg (phi, res_y, succ_new_exit_block, UNKNOWN_LOCATION);
> > +
> > if (virtual_operand_p (res_z))
> > continue;
> >
> > - tree res_c = PHI_ARG_DEF_FROM_EDGE (phi, exit);
> > gimple reduc_phi = SSA_NAME_DEF_STMT (res_c);
> > struct reduction_info *red = reduction_phi (reduction_list,
> > reduc_phi);
> > if (red != NULL)
> > - red->keep_res = phi;
> > + red->keep_res = nphi;
> > }
> >
> > /* We're going to cancel the loop at the end of gen_parallel_loop, but
> > until
> > diff --git a/libgomp/testsuite/libgomp.c/parloops-exit-first-loop-alt-3.c
> > b/libgomp/testsuite/libgomp.c/parloops-exit-first-loop-alt-3.c
> > index cb5bf9c..bf06e0e 100644
> > --- a/libgomp/testsuite/libgomp.c/parloops-exit-first-loop-alt-3.c
> > +++ b/libgomp/testsuite/libgomp.c/parloops-exit-first-loop-alt-3.c
> > @@ -36,5 +36,10 @@ main (void)
> > if (res != 11995)
> > abort ();
> >
> > + /* Test low iteration count case. */
> > + res = f (10);
> > + if (res != 25)
> > + abort ();
> > +
> > return 0;
> > }
> > diff --git a/libgomp/testsuite/libgomp.c/parloops-exit-first-loop-alt.c
> > b/libgomp/testsuite/libgomp.c/parloops-exit-first-loop-alt.c
> > index 1c32ea3..35c47d7 100644
> > --- a/libgomp/testsuite/libgomp.c/parloops-exit-first-loop-alt.c
> > +++ b/libgomp/testsuite/libgomp.c/parloops-exit-first-loop-alt.c
> > @@ -21,8 +21,8 @@ f (unsigned int n)
> > c[i] = a[i] + b[i];
> > }
> >
> > -int
> > -main (void)
> > +static void __attribute__((noclone,noinline))
> > +init (void)
> > {
> > int i, j;
> >
> > @@ -35,6 +35,14 @@ main (void)
> > b[k] = (k * 3) % 7;
> > c[k] = k * 2;
> > }
> > +}
> > +
> > +int
> > +main (void)
> > +{
> > + int i;
> > +
> > + init ();
> >
> > f (N);
> >
> > @@ -46,5 +54,21 @@ main (void)
> > abort ();
> > }
> >
> > + /* Test low iteration count case. */
> > +
> > + init ();
> > +
> > + f (10);
> > +
> > + for (i = 0; i < N; i++)
> > + {
> > + unsigned int actual = c[i];
> > + unsigned int expected = (i < 10
> > + ? i + ((i * 3) % 7)
> > + : i * 2);
> > + if (actual != expected)
> > + abort ();
> > + }
> > +
> > return 0;
> > }
> > -- 1.9.1
> >
>
>
--
Richard Biener <rguenther@suse.de>
SUSE LINUX GmbH, GF: Felix Imendoerffer, Jane Smithard, Dilip Upmanyu, Graham Norton, HRB 21284 (AG Nuernberg)