[PATCH] Handle loops with control flow in loop-distribution

Richard Biener rguenther@suse.de
Fri Sep 13 12:52:00 GMT 2013


The following patch makes loop-distribution able to distribute loops
that have control flow.  It does so by adding control dependence
edges to the RDG (removing the need for special-casing the loop
exit condition and its dependencies).  With this we can now
distribute the loop of the testcase

  for (i = 0; i < 1024; ++i)
    {
      a[i] = 0;
      if (i > 100)
        b[i] = i;
    }

and generate a memset for zeroing a.  The patch does not in any
way add or adjust a cost model, so it remains necessary that
there is a write in each partition (a cost model could also be
that if we were able to factor out a control dependence from
at least one partition then that is worthwhile on its own).

In theory this should expose a greater number of loops to
vectorization (if you'd enable -ftree-loop-distribution, that is).

Bootstrapped (with -O2 -g -ftree-loop-distribution) on 
x86_64-unknown-linux-gnu, testing in progress.

I'll leave it for comments until Monday (if anyone cares).

The next restriction to go is that of us only distributing innermost
loops.

Thanks,
Richard.


2013-09-13  Richard Biener  <rguenther@suse.de>

	* tree-loop-distribution.c (enum rdg_dep_type): Add control_dd.
	(dot_rdg_1): Handle control_dd.
	(create_edge_for_control_dependence): New function.
	(create_rdg_edges): Add control dependences if asked for.
	(build_rdg): Likewise.
	(generate_loops_for_partition): If there are not necessary
	control stmts remove all their dependencies.
	(collect_condition_stmts, rdg_flag_loop_exits): Remove.
	(distribute_loop): Pass on control dependences.
	(tree_loop_distribution): Compute control dependences and remove
	restriction on number of loop nodes.

	* gcc.dg/tree-ssa/ldist-22.c: New testcase.

Index: gcc/tree-loop-distribution.c
===================================================================
*** gcc/tree-loop-distribution.c.orig	2013-09-13 13:34:49.000000000 +0200
--- gcc/tree-loop-distribution.c	2013-09-13 13:44:09.771379182 +0200
*************** enum rdg_dep_type
*** 92,98 ****
    output_dd = 'o',
  
    /* Read After Read (RAR).  */
!   input_dd = 'i'
  };
  
  /* Dependence information attached to an edge of the RDG.  */
--- 92,101 ----
    output_dd = 'o',
  
    /* Read After Read (RAR).  */
!   input_dd = 'i',
! 
!   /* Control dependence (execute conditional on).  */
!   control_dd = 'c'
  };
  
  /* Dependence information attached to an edge of the RDG.  */
*************** dot_rdg_1 (FILE *file, struct graph *rdg
*** 218,223 ****
--- 221,230 ----
               fprintf (file, "%d -> %d [label=anti] \n", i, e->dest);
               break;
  
+ 	   case control_dd:
+              fprintf (file, "%d -> %d [label=control] \n", i, e->dest);
+              break;
+ 
             default:
               gcc_unreachable ();
             }
*************** create_rdg_edges_for_scalar (struct grap
*** 325,334 ****
      }
  }
  
  /* Creates the edges of the reduced dependence graph RDG.  */
  
  static void
! create_rdg_edges (struct graph *rdg, vec<ddr_p> ddrs)
  {
    int i;
    struct data_dependence_relation *ddr;
--- 332,369 ----
      }
  }
  
+ /* Creates an edge for the control dependences of BB to the vertex V.  */
+ 
+ static void
+ create_edge_for_control_dependence (struct graph *rdg, basic_block bb,
+ 				    int v, control_dependences *cd)
+ {
+   bitmap_iterator bi;
+   unsigned edge_n;
+   EXECUTE_IF_SET_IN_BITMAP (cd->get_edges_dependent_on (bb->index),
+ 			    0, edge_n, bi)
+     {
+       basic_block cond_bb = cd->get_edge (edge_n)->src;
+       gimple stmt = last_stmt (cond_bb);
+       if (stmt && is_ctrl_stmt (stmt))
+ 	{
+ 	  struct graph_edge *e;
+ 	  int c = rdg_vertex_for_stmt (rdg, stmt);
+ 	  if (c < 0)
+ 	    continue;
+ 
+ 	  e = add_edge (rdg, c, v);
+ 	  e->data = XNEW (struct rdg_edge);
+ 	  RDGE_TYPE (e) = control_dd;
+ 	  RDGE_RELATION (e) = NULL;
+ 	}
+     }
+ }
+ 
  /* Creates the edges of the reduced dependence graph RDG.  */
  
  static void
! create_rdg_edges (struct graph *rdg, vec<ddr_p> ddrs, control_dependences *cd)
  {
    int i;
    struct data_dependence_relation *ddr;
*************** create_rdg_edges (struct graph *rdg, vec
*** 345,350 ****
--- 380,400 ----
      FOR_EACH_PHI_OR_STMT_DEF (def_p, RDG_STMT (rdg, i),
  			      iter, SSA_OP_DEF)
        create_rdg_edges_for_scalar (rdg, DEF_FROM_PTR (def_p), i);
+ 
+   if (cd)
+     for (i = 0; i < rdg->n_vertices; i++)
+       {
+ 	gimple stmt = RDG_STMT (rdg, i);
+ 	if (gimple_code (stmt) == GIMPLE_PHI)
+ 	  {
+ 	    edge_iterator ei;
+ 	    edge e;
+ 	    FOR_EACH_EDGE (e, ei, gimple_bb (stmt)->preds)
+ 	      create_edge_for_control_dependence (rdg, e->src, i, cd);
+ 	  }
+ 	else
+ 	  create_edge_for_control_dependence (rdg, gimple_bb (stmt), i, cd);
+       }
  }
  
  /* Build the vertices of the reduced dependence graph RDG.  Return false
*************** free_rdg (struct graph *rdg)
*** 465,471 ****
     scalar dependence.  */
  
  static struct graph *
! build_rdg (vec<loop_p> loop_nest)
  {
    struct graph *rdg;
    vec<gimple> stmts;
--- 515,521 ----
     scalar dependence.  */
  
  static struct graph *
! build_rdg (vec<loop_p> loop_nest, control_dependences *cd)
  {
    struct graph *rdg;
    vec<gimple> stmts;
*************** build_rdg (vec<loop_p> loop_nest)
*** 497,503 ****
        free_rdg (rdg);
        return NULL;
      }
!   create_rdg_edges (rdg, dependence_relations);
    dependence_relations.release ();
    datarefs.release ();
  
--- 547,553 ----
        free_rdg (rdg);
        return NULL;
      }
!   create_rdg_edges (rdg, dependence_relations, cd);
    dependence_relations.release ();
    datarefs.release ();
  
*************** generate_loops_for_partition (struct loo
*** 699,710 ****
  	      && !is_gimple_debug (stmt)
  	      && !bitmap_bit_p (partition->stmts, gimple_uid (stmt)))
  	    {
! 	      unlink_stmt_vdef (stmt);
! 	      gsi_remove (&bsi, true);
! 	      release_defs (stmt);
  	    }
! 	  else
! 	    gsi_next (&bsi);
  	}
      }
  
--- 749,776 ----
  	      && !is_gimple_debug (stmt)
  	      && !bitmap_bit_p (partition->stmts, gimple_uid (stmt)))
  	    {
! 	      /* Choose an arbitrary path through the empty CFG part
! 		 that this unnecessary control stmt controls.  */
! 	      if (gimple_code (stmt) == GIMPLE_COND)
! 		{
! 		  gimple_cond_make_false (stmt);
! 		  update_stmt (stmt);
! 		}
! 	      else if (gimple_code (stmt) == GIMPLE_SWITCH)
! 		{
! 		  gimple_switch_set_index
! 		      (stmt, CASE_LOW (gimple_switch_label (stmt, 1)));
! 		  update_stmt (stmt);
! 		}
! 	      else
! 		{
! 		  unlink_stmt_vdef (stmt);
! 		  gsi_remove (&bsi, true);
! 		  release_defs (stmt);
! 		  continue;
! 		}
  	    }
! 	  gsi_next (&bsi);
  	}
      }
  
*************** rdg_flag_vertex_and_dependent (struct gr
*** 1031,1092 ****
    nodes.release ();
  }
  
- /* Initialize CONDS with all the condition statements from the basic
-    blocks of LOOP.  */
- 
- static void
- collect_condition_stmts (struct loop *loop, vec<gimple> *conds)
- {
-   unsigned i;
-   edge e;
-   vec<edge> exits = get_loop_exit_edges (loop);
- 
-   FOR_EACH_VEC_ELT (exits, i, e)
-     {
-       gimple cond = last_stmt (e->src);
- 
-       if (cond)
- 	conds->safe_push (cond);
-     }
- 
-   exits.release ();
- }
- 
- /* Add to PARTITION all the exit condition statements for LOOPS
-    together with all their dependent statements determined from
-    RDG.  */
- 
- static void
- rdg_flag_loop_exits (struct graph *rdg, partition_t partition,
- 		     bitmap processed)
- {
-   unsigned i;
-   bitmap_iterator bi;
-   vec<gimple> conds;
-   conds.create (3);
- 
-   EXECUTE_IF_SET_IN_BITMAP (partition->loops, 0, i, bi)
-     collect_condition_stmts (get_loop (cfun, i), &conds);
- 
-   while (!conds.is_empty ())
-     {
-       gimple cond = conds.pop ();
-       int v = rdg_vertex_for_stmt (rdg, cond);
-       if (!already_processed_vertex_p (processed, v))
- 	{
- 	  bitmap saved_loops = BITMAP_ALLOC (NULL);
- 	  bitmap_copy (saved_loops, partition->loops);
- 	  rdg_flag_vertex_and_dependent (rdg, v, partition, processed);
- 	  EXECUTE_IF_AND_COMPL_IN_BITMAP (partition->loops, saved_loops,
- 					  0, i, bi)
- 	    collect_condition_stmts (get_loop (cfun, i), &conds);
- 	  BITMAP_FREE (saved_loops);
- 	}
-     }
- 
-   conds.release ();
- }
- 
  /* Returns a partition with all the statements needed for computing
     the vertex V of the RDG, also including the loop exit conditions.  */
  
--- 1097,1102 ----
*************** build_rdg_partition_for_vertex (struct g
*** 1096,1102 ****
    partition_t partition = partition_alloc (NULL, NULL);
    bitmap processed = BITMAP_ALLOC (NULL);
    rdg_flag_vertex_and_dependent (rdg, v, partition, processed);
-   rdg_flag_loop_exits (rdg, partition, processed);
    BITMAP_FREE (processed);
    return partition;
  }
--- 1106,1111 ----
*************** partition_contains_all_rw (struct graph
*** 1463,1469 ****
     Returns the number of distributed loops.  */
  
  static int
! distribute_loop (struct loop *loop, vec<gimple> stmts)
  {
    struct graph *rdg;
    vec<loop_p> loop_nest;
--- 1472,1479 ----
     Returns the number of distributed loops.  */
  
  static int
! distribute_loop (struct loop *loop, vec<gimple> stmts,
! 		 control_dependences *cd)
  {
    struct graph *rdg;
    vec<loop_p> loop_nest;
*************** distribute_loop (struct loop *loop, vec<
*** 1479,1485 ****
        return 0;
      }
  
!   rdg = build_rdg (loop_nest);
    if (!rdg)
      {
        if (dump_file && (dump_flags & TDF_DETAILS))
--- 1489,1495 ----
        return 0;
      }
  
!   rdg = build_rdg (loop_nest, cd);
    if (!rdg)
      {
        if (dump_file && (dump_flags & TDF_DETAILS))
*************** tree_loop_distribution (void)
*** 1631,1636 ****
--- 1641,1647 ----
    loop_iterator li;
    bool changed = false;
    basic_block bb;
+   control_dependences *cd = NULL;
  
    FOR_ALL_BB (bb)
      {
*************** tree_loop_distribution (void)
*** 1660,1669 ****
        if (!optimize_loop_for_speed_p (loop))
  	continue;
  
-       /* Only distribute loops with a header and latch for now.  */
-       if (loop->num_nodes > 2)
- 	continue;
- 
        /* Initialize the worklist with stmts we seed the partitions with.  */
        bbs = get_loop_body_in_dom_order (loop);
        for (i = 0; i < loop->num_nodes; ++i)
--- 1671,1676 ----
*************** out:
*** 1697,1703 ****
        free (bbs);
  
        if (work_list.length () > 0)
! 	nb_generated_loops = distribute_loop (loop, work_list);
  
        if (nb_generated_loops > 0)
  	changed = true;
--- 1704,1718 ----
        free (bbs);
  
        if (work_list.length () > 0)
! 	{
! 	  if (!cd)
! 	    {
! 	      calculate_dominance_info (CDI_POST_DOMINATORS);
! 	      cd = new control_dependences (create_edge_list ());
! 	      free_dominance_info (CDI_POST_DOMINATORS);
! 	    }
! 	  nb_generated_loops = distribute_loop (loop, work_list, cd);
! 	}
  
        if (nb_generated_loops > 0)
  	changed = true;
*************** out:
*** 1714,1719 ****
--- 1729,1737 ----
        work_list.release ();
      }
  
+   if (cd)
+     delete cd;
+ 
    if (changed)
      {
        mark_virtual_operands_for_renaming (cfun);
Index: gcc/testsuite/gcc.dg/tree-ssa/ldist-22.c
===================================================================
*** /dev/null	1970-01-01 00:00:00.000000000 +0000
--- gcc/testsuite/gcc.dg/tree-ssa/ldist-22.c	2013-09-13 14:06:48.135500474 +0200
***************
*** 0 ****
--- 1,32 ----
+ /* { dg-do run } */
+ /* { dg-options "-O3 -fdump-tree-ldist-details" } */
+ 
+ extern void abort (void);
+ 
+ int a[1024], b[1024];
+ 
+ void __attribute__((noinline,noclone))
+ foo (void)
+ {
+   int i;
+   for (i = 0; i < 1024; ++i)
+     {
+       a[i] = 0;
+       if (i > 100)
+ 	b[i] = i;
+     }
+ }
+ 
+ int main()
+ {
+   b[100] = 1;
+   foo ();
+   if (b[100] != 1 || b[101] != 101)
+     abort ();
+   if (a[0] != 0 || a[101] != 0)
+     abort ();
+   return;
+ }
+ 
+ /* { dg-final { scan-tree-dump "generated memset zero" "ldist" } } */
+ /* { dg-final { cleanup-tree-dump "ldist" } } */



More information about the Gcc-patches mailing list