[PATCH] Handle loops with control flow in loop-distribution
Richard Biener
rguenther@suse.de
Fri Sep 13 12:52:00 GMT 2013
The following patch makes loop-distribution able to distribute loops
that have control flow. It does so by adding control dependence
edges to the RDG (removing the need for special-casing the loop
exit condition and its dependencies). With this we can now
distribute the loop of the testcase
for (i = 0; i < 1024; ++i)
{
a[i] = 0;
if (i > 100)
b[i] = i;
}
and generate a memset for zeroing a. The patch does not in any
way add or adjust a cost model, so it remains necessary that
there is a write in each partition (a cost model could also be
that if we were able to factor out a control dependence from
at least one partition then that is worthwhile on its own).
In theory this should expose a greater number of loops to
vectorization (if you'd enable -ftree-loop-distribution, that is).
Bootstrapped (with -O2 -g -ftree-loop-distribution) on
x86_64-unknown-linux-gnu, testing in progress.
I'll leave it for comments until Monday (if anyone cares).
The next restriction to go is that of us only distributing innermost
loops.
Thanks,
Richard.
2013-09-13 Richard Biener <rguenther@suse.de>
* tree-loop-distribution.c (enum rdg_dep_type): Add control_dd.
(dot_rdg_1): Handle control_dd.
(create_edge_for_control_dependence): New function.
(create_rdg_edges): Add control dependences if asked for.
(build_rdg): Likewise.
(generate_loops_for_partition): If there are not necessary
control stmts remove all their dependencies.
(collect_condition_stmts, rdg_flag_loop_exits): Remove.
(distribute_loop): Pass on control dependences.
(tree_loop_distribution): Compute control dependences and remove
restriction on number of loop nodes.
* gcc.dg/tree-ssa/ldist-22.c: New testcase.
Index: gcc/tree-loop-distribution.c
===================================================================
*** gcc/tree-loop-distribution.c.orig 2013-09-13 13:34:49.000000000 +0200
--- gcc/tree-loop-distribution.c 2013-09-13 13:44:09.771379182 +0200
*************** enum rdg_dep_type
*** 92,98 ****
output_dd = 'o',
/* Read After Read (RAR). */
! input_dd = 'i'
};
/* Dependence information attached to an edge of the RDG. */
--- 92,101 ----
output_dd = 'o',
/* Read After Read (RAR). */
! input_dd = 'i',
!
! /* Control dependence (execute conditional on). */
! control_dd = 'c'
};
/* Dependence information attached to an edge of the RDG. */
*************** dot_rdg_1 (FILE *file, struct graph *rdg
*** 218,223 ****
--- 221,230 ----
fprintf (file, "%d -> %d [label=anti] \n", i, e->dest);
break;
+ case control_dd:
+ fprintf (file, "%d -> %d [label=control] \n", i, e->dest);
+ break;
+
default:
gcc_unreachable ();
}
*************** create_rdg_edges_for_scalar (struct grap
*** 325,334 ****
}
}
/* Creates the edges of the reduced dependence graph RDG. */
static void
! create_rdg_edges (struct graph *rdg, vec<ddr_p> ddrs)
{
int i;
struct data_dependence_relation *ddr;
--- 332,369 ----
}
}
+ /* Creates an edge for the control dependences of BB to the vertex V. */
+
+ static void
+ create_edge_for_control_dependence (struct graph *rdg, basic_block bb,
+ int v, control_dependences *cd)
+ {
+ bitmap_iterator bi;
+ unsigned edge_n;
+ EXECUTE_IF_SET_IN_BITMAP (cd->get_edges_dependent_on (bb->index),
+ 0, edge_n, bi)
+ {
+ basic_block cond_bb = cd->get_edge (edge_n)->src;
+ gimple stmt = last_stmt (cond_bb);
+ if (stmt && is_ctrl_stmt (stmt))
+ {
+ struct graph_edge *e;
+ int c = rdg_vertex_for_stmt (rdg, stmt);
+ if (c < 0)
+ continue;
+
+ e = add_edge (rdg, c, v);
+ e->data = XNEW (struct rdg_edge);
+ RDGE_TYPE (e) = control_dd;
+ RDGE_RELATION (e) = NULL;
+ }
+ }
+ }
+
/* Creates the edges of the reduced dependence graph RDG. */
static void
! create_rdg_edges (struct graph *rdg, vec<ddr_p> ddrs, control_dependences *cd)
{
int i;
struct data_dependence_relation *ddr;
*************** create_rdg_edges (struct graph *rdg, vec
*** 345,350 ****
--- 380,400 ----
FOR_EACH_PHI_OR_STMT_DEF (def_p, RDG_STMT (rdg, i),
iter, SSA_OP_DEF)
create_rdg_edges_for_scalar (rdg, DEF_FROM_PTR (def_p), i);
+
+ if (cd)
+ for (i = 0; i < rdg->n_vertices; i++)
+ {
+ gimple stmt = RDG_STMT (rdg, i);
+ if (gimple_code (stmt) == GIMPLE_PHI)
+ {
+ edge_iterator ei;
+ edge e;
+ FOR_EACH_EDGE (e, ei, gimple_bb (stmt)->preds)
+ create_edge_for_control_dependence (rdg, e->src, i, cd);
+ }
+ else
+ create_edge_for_control_dependence (rdg, gimple_bb (stmt), i, cd);
+ }
}
/* Build the vertices of the reduced dependence graph RDG. Return false
*************** free_rdg (struct graph *rdg)
*** 465,471 ****
scalar dependence. */
static struct graph *
! build_rdg (vec<loop_p> loop_nest)
{
struct graph *rdg;
vec<gimple> stmts;
--- 515,521 ----
scalar dependence. */
static struct graph *
! build_rdg (vec<loop_p> loop_nest, control_dependences *cd)
{
struct graph *rdg;
vec<gimple> stmts;
*************** build_rdg (vec<loop_p> loop_nest)
*** 497,503 ****
free_rdg (rdg);
return NULL;
}
! create_rdg_edges (rdg, dependence_relations);
dependence_relations.release ();
datarefs.release ();
--- 547,553 ----
free_rdg (rdg);
return NULL;
}
! create_rdg_edges (rdg, dependence_relations, cd);
dependence_relations.release ();
datarefs.release ();
*************** generate_loops_for_partition (struct loo
*** 699,710 ****
&& !is_gimple_debug (stmt)
&& !bitmap_bit_p (partition->stmts, gimple_uid (stmt)))
{
! unlink_stmt_vdef (stmt);
! gsi_remove (&bsi, true);
! release_defs (stmt);
}
! else
! gsi_next (&bsi);
}
}
--- 749,776 ----
&& !is_gimple_debug (stmt)
&& !bitmap_bit_p (partition->stmts, gimple_uid (stmt)))
{
! /* Choose an arbitrary path through the empty CFG part
! that this unnecessary control stmt controls. */
! if (gimple_code (stmt) == GIMPLE_COND)
! {
! gimple_cond_make_false (stmt);
! update_stmt (stmt);
! }
! else if (gimple_code (stmt) == GIMPLE_SWITCH)
! {
! gimple_switch_set_index
! (stmt, CASE_LOW (gimple_switch_label (stmt, 1)));
! update_stmt (stmt);
! }
! else
! {
! unlink_stmt_vdef (stmt);
! gsi_remove (&bsi, true);
! release_defs (stmt);
! continue;
! }
}
! gsi_next (&bsi);
}
}
*************** rdg_flag_vertex_and_dependent (struct gr
*** 1031,1092 ****
nodes.release ();
}
- /* Initialize CONDS with all the condition statements from the basic
- blocks of LOOP. */
-
- static void
- collect_condition_stmts (struct loop *loop, vec<gimple> *conds)
- {
- unsigned i;
- edge e;
- vec<edge> exits = get_loop_exit_edges (loop);
-
- FOR_EACH_VEC_ELT (exits, i, e)
- {
- gimple cond = last_stmt (e->src);
-
- if (cond)
- conds->safe_push (cond);
- }
-
- exits.release ();
- }
-
- /* Add to PARTITION all the exit condition statements for LOOPS
- together with all their dependent statements determined from
- RDG. */
-
- static void
- rdg_flag_loop_exits (struct graph *rdg, partition_t partition,
- bitmap processed)
- {
- unsigned i;
- bitmap_iterator bi;
- vec<gimple> conds;
- conds.create (3);
-
- EXECUTE_IF_SET_IN_BITMAP (partition->loops, 0, i, bi)
- collect_condition_stmts (get_loop (cfun, i), &conds);
-
- while (!conds.is_empty ())
- {
- gimple cond = conds.pop ();
- int v = rdg_vertex_for_stmt (rdg, cond);
- if (!already_processed_vertex_p (processed, v))
- {
- bitmap saved_loops = BITMAP_ALLOC (NULL);
- bitmap_copy (saved_loops, partition->loops);
- rdg_flag_vertex_and_dependent (rdg, v, partition, processed);
- EXECUTE_IF_AND_COMPL_IN_BITMAP (partition->loops, saved_loops,
- 0, i, bi)
- collect_condition_stmts (get_loop (cfun, i), &conds);
- BITMAP_FREE (saved_loops);
- }
- }
-
- conds.release ();
- }
-
/* Returns a partition with all the statements needed for computing
the vertex V of the RDG, also including the loop exit conditions. */
--- 1097,1102 ----
*************** build_rdg_partition_for_vertex (struct g
*** 1096,1102 ****
partition_t partition = partition_alloc (NULL, NULL);
bitmap processed = BITMAP_ALLOC (NULL);
rdg_flag_vertex_and_dependent (rdg, v, partition, processed);
- rdg_flag_loop_exits (rdg, partition, processed);
BITMAP_FREE (processed);
return partition;
}
--- 1106,1111 ----
*************** partition_contains_all_rw (struct graph
*** 1463,1469 ****
Returns the number of distributed loops. */
static int
! distribute_loop (struct loop *loop, vec<gimple> stmts)
{
struct graph *rdg;
vec<loop_p> loop_nest;
--- 1472,1479 ----
Returns the number of distributed loops. */
static int
! distribute_loop (struct loop *loop, vec<gimple> stmts,
! control_dependences *cd)
{
struct graph *rdg;
vec<loop_p> loop_nest;
*************** distribute_loop (struct loop *loop, vec<
*** 1479,1485 ****
return 0;
}
! rdg = build_rdg (loop_nest);
if (!rdg)
{
if (dump_file && (dump_flags & TDF_DETAILS))
--- 1489,1495 ----
return 0;
}
! rdg = build_rdg (loop_nest, cd);
if (!rdg)
{
if (dump_file && (dump_flags & TDF_DETAILS))
*************** tree_loop_distribution (void)
*** 1631,1636 ****
--- 1641,1647 ----
loop_iterator li;
bool changed = false;
basic_block bb;
+ control_dependences *cd = NULL;
FOR_ALL_BB (bb)
{
*************** tree_loop_distribution (void)
*** 1660,1669 ****
if (!optimize_loop_for_speed_p (loop))
continue;
- /* Only distribute loops with a header and latch for now. */
- if (loop->num_nodes > 2)
- continue;
-
/* Initialize the worklist with stmts we seed the partitions with. */
bbs = get_loop_body_in_dom_order (loop);
for (i = 0; i < loop->num_nodes; ++i)
--- 1671,1676 ----
*************** out:
*** 1697,1703 ****
free (bbs);
if (work_list.length () > 0)
! nb_generated_loops = distribute_loop (loop, work_list);
if (nb_generated_loops > 0)
changed = true;
--- 1704,1718 ----
free (bbs);
if (work_list.length () > 0)
! {
! if (!cd)
! {
! calculate_dominance_info (CDI_POST_DOMINATORS);
! cd = new control_dependences (create_edge_list ());
! free_dominance_info (CDI_POST_DOMINATORS);
! }
! nb_generated_loops = distribute_loop (loop, work_list, cd);
! }
if (nb_generated_loops > 0)
changed = true;
*************** out:
*** 1714,1719 ****
--- 1729,1737 ----
work_list.release ();
}
+ if (cd)
+ delete cd;
+
if (changed)
{
mark_virtual_operands_for_renaming (cfun);
Index: gcc/testsuite/gcc.dg/tree-ssa/ldist-22.c
===================================================================
*** /dev/null 1970-01-01 00:00:00.000000000 +0000
--- gcc/testsuite/gcc.dg/tree-ssa/ldist-22.c 2013-09-13 14:06:48.135500474 +0200
***************
*** 0 ****
--- 1,32 ----
+ /* { dg-do run } */
+ /* { dg-options "-O3 -fdump-tree-ldist-details" } */
+
+ extern void abort (void);
+
+ int a[1024], b[1024];
+
+ void __attribute__((noinline,noclone))
+ foo (void)
+ {
+ int i;
+ for (i = 0; i < 1024; ++i)
+ {
+ a[i] = 0;
+ if (i > 100)
+ b[i] = i;
+ }
+ }
+
+ int main()
+ {
+ b[100] = 1;
+ foo ();
+ if (b[100] != 1 || b[101] != 101)
+ abort ();
+ if (a[0] != 0 || a[101] != 0)
+ abort ();
+ return;
+ }
+
+ /* { dg-final { scan-tree-dump "generated memset zero" "ldist" } } */
+ /* { dg-final { cleanup-tree-dump "ldist" } } */
More information about the Gcc-patches
mailing list