This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Graphite middle-end parts review


Comments for the middle-end parts (http://cri.ensmp.fr/people/pop/gcc/1142_middle-end.diff) inline.
Please look carefully as the patch is not quoted.

Thanks,
Richard.


Index: gcc/doc/invoke.texi
===================================================================
--- gcc/doc/invoke.texi	(.../trunk)	(revision 138275)
+++ gcc/doc/invoke.texi	(.../branches/graphite)	(revision 138569)
@@ -5941,6 +5942,15 @@ at @option{-O} and higher.
 Perform linear loop transformations on tree.  This flag can improve cache
 performance and allow further loop optimizations to take place.
 
+@item -floop-block
+Perform loop blocking transformations on loops.
+
+@item -floop-strip-mine
+Perform loop strip mining transformations on loops.
+
+@item -floop-interchange
+Perform loop interchange transformations on loops.
+

A bit more verbose descriptions would be nice.  While people might
know what interchange is I bet with strip mining they are lost.
Remember this is supposed to be user-level documentation.


Index: gcc/cfgloopmanip.c
===================================================================
--- gcc/cfgloopmanip.c	(.../trunk)	(revision 138275)
+++ gcc/cfgloopmanip.c	(.../branches/graphite)	(revision 138569)
@@ -30,6 +30,7 @@ along with GCC; see the file COPYING3.  
 #include "cfglayout.h"
 #include "cfghooks.h"
 #include "output.h"
+#include "tree-flow.h"
 
 static void duplicate_subloops (struct loop *, struct loop *);
 static void copy_loops_to (struct loop **, int,
@@ -466,6 +467,167 @@ scale_loop_frequencies (struct loop *loo
   free (bbs);
 }
 
+/* Recompute dominance information for basic blocks outside LOOP.  */
+
+static void update_dominators_in_loop (struct loop *loop)
+{
+  VEC (basic_block, heap) *dom_bbs = NULL;
+  sbitmap seen;
+  basic_block *body;
+  unsigned i;
+
+  seen = sbitmap_alloc (last_basic_block);
+  sbitmap_zero (seen);
+  body = get_loop_body (loop);
+
+  for (i = 0; i < loop->num_nodes; i++)
+    SET_BIT (seen, body[i]->index);
+
+  for (i = 0; i < loop->num_nodes; i++)
+    {
+      basic_block ldom;
+
+      for (ldom = first_dom_son (CDI_DOMINATORS, body[i]);
+	   ldom;
+	   ldom = next_dom_son (CDI_DOMINATORS, ldom))
+	if (!TEST_BIT (seen, ldom->index))
+	  {
+	    SET_BIT (seen, ldom->index);
+	    VEC_safe_push (basic_block, heap, dom_bbs, ldom);
+	  }
+    }
+
+  iterate_fix_dominators (CDI_DOMINATORS, dom_bbs, false);
+  free (body);
+  free (seen);
+  VEC_free (basic_block, heap, dom_bbs);
+}

There is exactly the same code in for example loopify ().  Please
make sure to remove these duplicates and call the new helper (and
eventually adjust it to make it fit the other cases) - I grepped
for iterate_fix_dominators to catch them.


+
+/* create_empty_loop_on_edge
+   |
+   |     -------------                 -----------------------           
+   |     |  pred_bb  |                 |  pred_bb            |           
+   |     -------------                 |  IV = INITIAL_VALUE |           
+   |           |                       -----------------------           
+   |           |                       ______    | ENTRY_EDGE            
+   |           | ENTRY_EDGE           /      V   V                       
+   |           |             ====>   |     ------------------               
+   |           |                     |     | loop_header     |               
+   |           V                     |     | IV <= UPPER_BOUND|---          
+   |     -------------               |     ------------------     \         
+   |     |  succ_bb  |               |         |                   \        
+   |     -------------               |         |                    |       
+   |                                 |         V                    | exit_e
+   |                                 |      --------------          |       
+   |                                 |      | loop_latch |          |       
+   |                                 |      |IV += STEP  |          V       
+   |                                 |      --------------      ------------
+   |                                  \       /                 | succ_bb  |
+   |                                   \ ___ /                  ------------
+
+   Creates an empty loop as shown above, the IV_BEFORE is the SSA_NAME
+   that is used before the increment of IV. IV_BEFORE should be used for 
+   adding code to the body that uses the IV.  OUTER is the outer loop in
+   which the new loop should be inserted.

There is no IV_BEFORE in the diagram above (looks like it is INITIAL_VALUE).
The other parameters need documenting (looks like stride vs. STEP
mismatch?).

+*/
+struct loop *
+create_empty_loop_on_edge (edge entry_edge, 
+			   tree initial_value,
+			   tree stride, tree upper_bound,
+			   tree iv,
+			   tree *iv_before,
+			   struct loop *outer)
+{
+  basic_block loop_header, loop_latch, succ_bb, pred_bb;
+  struct loop *loop;
+  int freq;
+  gcov_type cnt;
+  gimple_stmt_iterator gsi;
+  bool insert_after;
+  gimple_seq stmts;
+  gimple cond_expr;
+  tree exit_test;
+  edge exit_e;
+  int prob;
+  tree upper_bound_gimplified;
+  
+  gcc_assert (entry_edge);
+  gcc_assert (initial_value);
+  gcc_assert (stride);
+  gcc_assert (upper_bound);
+  gcc_assert (iv);

Please combine asserts like

   gcc_assert (entry_edge && initial_value && stride && upper_bound && iv);

this reduces code size and runtime.


+  /* Create header, latch and wire up the loop.  */
+  pred_bb = entry_edge->src;
+  loop_header = split_edge (entry_edge);
+  loop_latch = split_edge (single_succ_edge (loop_header));
+  succ_bb = single_succ (loop_latch);
+  make_edge (loop_header, succ_bb, 0);
+  redirect_edge_succ_nodup (single_succ_edge (loop_latch), loop_header);
+
+  /* Set immediate dominator information.  */
+  set_immediate_dominator (CDI_DOMINATORS, loop_header, pred_bb);
+  set_immediate_dominator (CDI_DOMINATORS, loop_latch, loop_header);
+  set_immediate_dominator (CDI_DOMINATORS, succ_bb, loop_header);
+
+  /* Initialize a loop structure and put it in a loop hierarchy.  */
+  loop = alloc_loop ();
+  loop->header = loop_header;
+  loop->latch = loop_latch;
+  add_loop (loop, outer);
+
+  /* TODO: Fix frequencies and counts.  */
+  freq = EDGE_FREQUENCY (entry_edge);
+  cnt = entry_edge->count;
+
+  prob = REG_BR_PROB_BASE / 2;
+
+  scale_loop_frequencies (loop, REG_BR_PROB_BASE - prob, REG_BR_PROB_BASE);
+
+  /* Update dominators.  */
+  update_dominators_in_loop (loop);
+
+  /* Construct IV code in loop.  */
+  initial_value = force_gimple_operand (initial_value, &stmts, true, iv);
+  if (stmts)
+    {
+      gsi_insert_seq_on_edge (loop_preheader_edge (loop), stmts);
+      gsi_commit_edge_inserts ();
+    }
+
+  add_referenced_var (iv);

The caller should have done this.


+  standard_iv_increment_position (loop, &gsi, &insert_after);
+  create_iv (initial_value, stride, iv, loop, &gsi, insert_after,
+	     iv_before, NULL);
+
+  /* Modify edge flags.  */
+  exit_e = single_exit (loop);
+  exit_e->flags = EDGE_LOOP_EXIT | EDGE_FALSE_VALUE;
+  single_pred_edge (loop_latch)->flags = EDGE_TRUE_VALUE;
+
+  gsi = gsi_last_bb (exit_e->src);
+
+  upper_bound_gimplified = 
+    force_gimple_operand (upper_bound, &stmts, true, NULL);
+  if (stmts)
+    gsi_insert_seq_after (&gsi, stmts, GSI_NEW_STMT);

There is force_gimple_operand_gsi for this idiom.


+  gsi = gsi_last_bb (exit_e->src);
+  
+  cond_expr = gimple_build_cond 
+    (LE_EXPR, *iv_before, upper_bound_gimplified, NULL_TREE, NULL_TREE);
+
+  exit_test = gimple_cond_lhs (cond_expr);
+  exit_test = force_gimple_operand (exit_test, &stmts, true, NULL);
+  gimple_cond_set_lhs (cond_expr, exit_test);
+  if (stmts)
+    gsi_insert_seq_after (&gsi, stmts, GSI_NEW_STMT);

likewise.

+
+  gsi = gsi_last_bb (exit_e->src);
+  gsi_insert_after (&gsi, cond_expr, GSI_NEW_STMT);
+
+  return loop;
+}
+
 /* Make area between HEADER_EDGE and LATCH_EDGE a loop by connecting
    latch to header and update loop tree and dominators
    accordingly. Everything between them plus LATCH_EDGE destination must
Index: gcc/tree-scalar-evolution.c
===================================================================
--- gcc/tree-scalar-evolution.c	(.../trunk)	(revision 138275)
+++ gcc/tree-scalar-evolution.c	(.../branches/graphite)	(revision 138569)
@@ -2052,6 +2052,31 @@ instantiate_scev_1 (struct loop *instant
 	 inside the loop.  */
       res = !flow_bb_inside_loop_p (instantiation_loop, def_bb) 
 	? chrec : chrec_dont_know;
+#if 0
+      if (!flow_bb_inside_loop_p (loop, def_bb))
+	res = chrec;
+      else
+	{
+	  /* res = chrec_dont_know; */
+	  tree def = SSA_NAME_DEF_STMT (chrec);
+
+	  /* Instantiate as much as possible until ending on a phi
+	     node of the same loop, in which case it means that the
+	     definition is varying in that loop.  */
+	  switch (TREE_CODE (def))
+	    {
+	    case MODIFY_EXPR:
+	      res = instantiate_parameters_1 (loop, TREE_OPERAND (def, 1),
+					      flags, cache, size_expr);
+	      break;
+
+	    case PHI_NODE:
+	    default:
+	      res = chrec_dont_know;
+	      break;
+	    }
+	}
+#endif
       set_instantiated_value (cache, chrec, res);
 
       /* To make things even more complicated, instantiate_scev_1
@@ -2212,7 +2237,7 @@ instantiate_scev_1 (struct loop *instant
 
     case SCEV_KNOWN:
       return chrec_known;
-                                     
+
     default:
       break;
     }


The changes to gcc/tree-scalar-evolution.c look completely unneccessary.


Index: gcc/tree-phinodes.c
===================================================================
--- gcc/tree-phinodes.c	(.../trunk)	(revision 138275)
+++ gcc/tree-phinodes.c	(.../branches/graphite)	(revision 138569)
@@ -449,7 +449,6 @@ remove_phi_args (edge e)
     remove_phi_arg_num (gsi_stmt (gsi), e->dest_idx);
 }
 
-
 /* Remove the PHI node pointed-to by iterator GSI from basic block BB.  After
    removal, iterator GSI is updated to point to the next PHI node in the
    sequence. If RELEASE_LHS_P is true, the LHS of this PHI node is released


Likewise.


Index: gcc/cfghooks.c
===================================================================
--- gcc/cfghooks.c	(.../trunk)	(revision 138275)
+++ gcc/cfghooks.c	(.../branches/graphite)	(revision 138569)
@@ -507,41 +507,16 @@ delete_basic_block (basic_block bb)
   expunge_block (bb);
 }
 
-/* Splits edge E and returns the newly created basic block.  */
+/* Updates the dominator information for block RET.  */
 
-basic_block
-split_edge (edge e)
+static void
+update_dominator_information (basic_block ret)
 {
-  basic_block ret;
-  gcov_type count = e->count;
-  int freq = EDGE_FREQUENCY (e);
   edge f;
-  bool irr = (e->flags & EDGE_IRREDUCIBLE_LOOP) != 0;
-  struct loop *loop;
-  basic_block src = e->src, dest = e->dest;
-
-  if (!cfg_hooks->split_edge)
-    internal_error ("%s does not support split_edge", cfg_hooks->name);
-
-  if (current_loops != NULL)
-    rescan_loop_exit (e, false, true);
-
-  ret = cfg_hooks->split_edge (e);
-  ret->count = count;
-  ret->frequency = freq;
-  single_succ_edge (ret)->probability = REG_BR_PROB_BASE;
-  single_succ_edge (ret)->count = count;
-
-  if (irr)
-    {
-      ret->flags |= BB_IRREDUCIBLE_LOOP;
-      single_pred_edge (ret)->flags |= EDGE_IRREDUCIBLE_LOOP;
-      single_succ_edge (ret)->flags |= EDGE_IRREDUCIBLE_LOOP;
-    }
 
   if (dom_info_available_p (CDI_DOMINATORS))
     set_immediate_dominator (CDI_DOMINATORS, ret, single_pred (ret));
-
+  
   if (dom_info_state (CDI_DOMINATORS) >= DOM_NO_FAST_QUERY)
     {
       /* There are two cases:
@@ -571,6 +546,41 @@ split_edge (edge e)
 	    set_immediate_dominator (CDI_DOMINATORS, single_succ (ret), ret);
 	}
     }
+}
+
+
+/* Splits edge E and returns the newly created basic block.  */
+
+basic_block
+split_edge (edge e)
+{
+  basic_block ret;
+  gcov_type count = e->count;
+  int freq = EDGE_FREQUENCY (e);
+  bool irr = (e->flags & EDGE_IRREDUCIBLE_LOOP) != 0;
+  struct loop *loop;
+  basic_block src = e->src, dest = e->dest;
+
+  if (!cfg_hooks->split_edge)
+    internal_error ("%s does not support split_edge", cfg_hooks->name);
+
+  if (current_loops != NULL)
+    rescan_loop_exit (e, false, true);
+
+  ret = cfg_hooks->split_edge (e);
+  ret->count = count;
+  ret->frequency = freq;
+  single_succ_edge (ret)->probability = REG_BR_PROB_BASE;
+  single_succ_edge (ret)->count = count;
+
+  if (irr)
+    {
+      ret->flags |= BB_IRREDUCIBLE_LOOP;
+      single_pred_edge (ret)->flags |= EDGE_IRREDUCIBLE_LOOP;
+      single_succ_edge (ret)->flags |= EDGE_IRREDUCIBLE_LOOP;
+    }
+
+  update_dominator_information(ret);
 
   if (current_loops != NULL)
     {


Likewise.  The new function is not used outside.

Index: gcc/tree-chrec.c
===================================================================
--- gcc/tree-chrec.c	(.../trunk)	(revision 138275)
+++ gcc/tree-chrec.c	(.../branches/graphite)	(revision 138569)
@@ -1408,3 +1408,26 @@ scev_direction (const_tree chrec)
   else
     return EV_DIR_GROWS;
 }
+
+/* Iterates over all the components of SCEV, and calls CBCK.  */
+
+void
+for_each_scev_op (tree *scev, bool (*cbck) (tree *, void *), void *data)
+{
+  switch (TREE_CODE_LENGTH (TREE_CODE (*scev)))
+    {
+    case 3:
+      for_each_scev_op (&TREE_OPERAND (*scev, 2), cbck, data);
+
+    case 2:
+      for_each_scev_op (&TREE_OPERAND (*scev, 1), cbck, data);
+      
+    case 1:
+      for_each_scev_op (&TREE_OPERAND (*scev, 0), cbck, data);
+
+    default:
+      cbck (scev, data);
+      break;
+    }
+}
+
Index: gcc/tree-chrec.h
===================================================================
--- gcc/tree-chrec.h	(.../trunk)	(revision 138275)
+++ gcc/tree-chrec.h	(.../branches/graphite)	(revision 138569)
@@ -70,6 +70,7 @@ extern tree evolution_part_in_loop_num (
 extern tree hide_evolution_in_other_loops_than_loop (tree, unsigned);
 extern tree reset_evolution_in_loop (unsigned, tree, tree);
 extern tree chrec_merge (tree, tree);
+extern void for_each_scev_op (tree *, bool (*) (tree *, void *), void *);
 
 /* Observers.  */
 extern bool eq_evolutions_p (const_tree, const_tree);
Index: gcc/vec.h
===================================================================
--- gcc/vec.h	(.../trunk)	(revision 138275)
+++ gcc/vec.h	(.../branches/graphite)	(revision 138569)
@@ -399,6 +399,7 @@ along with GCC; see the file COPYING3.  
    If you need to directly manipulate the array (for instance, you
    want to feed it to qsort), use this accessor.  */
 
+
 #define VEC_address(T,V)		(VEC_OP(T,base,address)(VEC_BASE(V)))
 
 /* Find the first index in the vector not less than the object.


white-space only change.  Remove.

Index: gcc/tree-vectorizer.h
===================================================================
--- gcc/tree-vectorizer.h	(.../trunk)	(revision 138275)
+++ gcc/tree-vectorizer.h	(.../branches/graphite)	(revision 138569)
@@ -647,6 +647,7 @@ extern bitmap vect_memsyms_to_rename;
    to force the alignment of data references in the loop.  */
 extern struct loop *slpeel_tree_peel_loop_to_edge 
   (struct loop *, edge, tree, tree, bool, unsigned int, bool);
+struct loop *tree_duplicate_loop_on_edge (struct loop *, edge);
 extern void set_prologue_iterations (basic_block, tree,
 				     struct loop *, unsigned int);
 struct loop *tree_duplicate_loop_on_edge (struct loop *, edge);


This is a bogus hunk.  The declaration is already there three lines below.


Index: gcc/tree-data-ref.c
===================================================================
--- gcc/tree-data-ref.c	(.../trunk)	(revision 138275)
+++ gcc/tree-data-ref.c	(.../branches/graphite)	(revision 138569)
@@ -4827,6 +4829,21 @@ hash_stmt_vertex_del (void *e)
    scalar dependence.  */
 

The comment needs updating - you just copied it from build_rdg.

 struct graph *
+build_empty_rdg (int n_stmts)
+{
+  int nb_data_refs = 10;
+  struct graph *rdg = new_graph (n_stmts);
+
+  rdg->indices = htab_create (nb_data_refs, hash_stmt_vertex_info,
+			      eq_stmt_vertex_info, hash_stmt_vertex_del);
+  return rdg;
+}
+
+/* Build the Reduced Dependence Graph (RDG) with one vertex per
+   statement of the loop nest, and one edge per data dependence or
+   scalar dependence.  */
+
+struct graph *
 build_rdg (struct loop *loop)
 {
   int nb_data_refs = 10;
Index: gcc/common.opt
===================================================================
--- gcc/common.opt	(.../trunk)	(revision 138275)
+++ gcc/common.opt	(.../branches/graphite)	(revision 138569)
@@ -543,6 +543,22 @@ Common Report Var(flag_gcse_after_reload
 Perform global common subexpression elimination after register allocation
 has finished
 
+fgraphite
+Common Report Var(flag_graphite)
+Enable in and out of Graphite representation, not modifying the compiled program

This is misleading - it _is_ modifying the program if you add any of the
new options.  Just leave that part of the sentence out.

Index: gcc/tree-flow.h
===================================================================
--- gcc/tree-flow.h	(.../trunk)	(revision 138275)
+++ gcc/tree-flow.h	(.../branches/graphite)	(revision 138569)
@@ -687,6 +687,7 @@ tree copy_var_decl (tree, tree, tree);
 /* Location to track pending stmt for edge insertion.  */
 #define PENDING_STMT(e)	((e)->insns.g)
 
+extern void remove_bb (basic_block bb);

Why do you need to export this?  You should call it via the cfghook
wrapper.

Index: gcc/tree-cfg.c
===================================================================
--- gcc/tree-cfg.c	(.../trunk)	(revision 138275)
+++ gcc/tree-cfg.c	(.../branches/graphite)	(revision 138569)
@@ -104,7 +104,6 @@ static void gimple_cfg2vcg (FILE *);
 /* Flowgraph optimization and cleanup.  */
 static void gimple_merge_blocks (basic_block, basic_block);
 static bool gimple_can_merge_blocks_p (basic_block, basic_block);
-static void remove_bb (basic_block);
 static edge find_taken_edge_computed_goto (basic_block, tree);
 static edge find_taken_edge_cond_expr (basic_block, tree);
 static edge find_taken_edge_switch_expr (basic_block, tree);
@@ -2069,7 +2068,7 @@ remove_phi_nodes_and_edges_for_unreachab
 
 /* Remove statements of basic block BB.  */
 
-static void
+void
 remove_bb (basic_block bb)
 {
   gimple_stmt_iterator i;


See above - no need to export this.


@@ -6108,7 +6107,7 @@ print_loops (FILE *file, int verbosity)
 {
   basic_block bb;
 
-  bb = BASIC_BLOCK (NUM_FIXED_BLOCKS);
+  bb = ENTRY_BLOCK_PTR;
   if (bb && bb->loop_father)
     print_loop_and_siblings (file, bb->loop_father, 0, verbosity);
 }
Index: gcc/passes.c
===================================================================
--- gcc/passes.c	(.../trunk)	(revision 138275)
+++ gcc/passes.c	(.../branches/graphite)	(revision 138569)
@@ -661,6 +661,7 @@ init_optimization_passes (void)
 	  NEXT_PASS (pass_check_data_deps);
 	  NEXT_PASS (pass_loop_distribution);
 	  NEXT_PASS (pass_linear_transform);
+	  NEXT_PASS (pass_graphite_transforms);

So we still have loop-distribution and interchange before graphite?


The middle-end parts look ok otherwise.  Please make the suggested
changes though.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]