Graphite review, graphite parts [1/n]

Mon Aug 11 13:16:00 GMT 2008

On Thu, 7 Aug 2008, Richard Guenther wrote:

> 
> This is part one (from unknown N) of the graphite parts review
> http://cri.ensmp.fr/people/pop/gcc/1142_graphite.diff
> 
> Find comments inline.

The rest of the graphite.[ch] review follows.  It looks like my
comments from the previous review are not addressed at all ...

Richard.


+/* Return true if we can create a data-ref for OP in STMT.  */
+
+static bool
+stmt_simple_memref_for_scop_p (struct loop *loop, gimple stmt, tree op)
+{
+  data_reference_p dr = create_data_ref (loop, op, stmt, true);
+  if (!dr)
+    return false;
+
+  free_data_ref (dr);
+  return true;
+}

This asks for a new helper in tree-data-ref.c that avoids creating the
data-ref in the first place.  It also looks like that create_data_ref
never will return NULL, so the function always returns true, which
can't be right.

+/* Return true if the operand OP is simple.  */
+
+static bool
+is_simple_operand (loop_p loop, gimple stmt, tree op) 
+{
+  return !((handled_component_p (op) || INDIRECT_REF_P (op))
+	   && !stmt_simple_memref_for_scop_p (loop, stmt, op));
+}

so loads from plain variables do not need checking for stmt_simple_memref_for_scop_p?

+/* Return true only when STMT is simple enough for being handled by Graphite.
+   This depends on outermost_loop, as the parametetrs are initialized relativ
+   to this loop.  */
+
+static bool
+stmt_simple_for_scop_p (struct loop *outermost_loop, gimple stmt)
+{
+  basic_block bb = gimple_bb (stmt);
+  struct loop *loop = bb->loop_father;
+
+  /* ASM_EXPR and CALL_EXPR may embed arbitrary side effects.
+     Calls have side-effects, except those to const or pure
+     functions.  */
+  if (gimple_has_volatile_ops (stmt)
+      || (gimple_code (stmt) == GIMPLE_CALL
+	  && !(gimple_call_flags (stmt) & (ECF_CONST | ECF_PURE)))
+      || (gimple_code (stmt) == GIMPLE_ASM))
+    return false;
+
+  switch (gimple_code (stmt))
+    {
+    case GIMPLE_RETURN:
+    case GIMPLE_LABEL:
+      return true;
+
+    case GIMPLE_COND:
+      {
+	tree op;
+	ssa_op_iter op_iter;
+        enum tree_code code = gimple_cond_code (stmt);
+
+        /* We can only handle this kind of conditional expressions.  
+           For inequalities like "if (i != 3 * k)" we need unions of
+           polyhedrons.  Expressions like  "if (a)" or "if (a == 15)" need
+           them for the else branch.  */
+        if (!(code == LT_EXPR || code == GT_EXPR
+              || code == LE_EXPR || GE_EXPR))
+          return false;
+
+	if (!outermost_loop)
+	  return false;
+
+	FOR_EACH_SSA_TREE_OPERAND (op, stmt, op_iter, SSA_OP_ALL_USES)
+	  if (!loop_affine_expr (outermost_loop, loop, op))
+	    return false;
+
+	return true;
+      }
+
+    case GIMPLE_ASSIGN:
+      {
+	enum tree_code code = gimple_assign_rhs_code (stmt);
+
+	switch (get_gimple_rhs_class (code))
+	  {
+	  case GIMPLE_UNARY_RHS:
+	  case GIMPLE_SINGLE_RHS:
+	    return (is_simple_operand (loop, stmt, gimple_assign_lhs (stmt))
+		    && is_simple_operand (loop, stmt, gimple_assign_rhs1 (stmt)));

This will allow structure copies as simple - is this intended?

+	  case GIMPLE_BINARY_RHS:
+	    return (is_simple_operand (loop, stmt, gimple_assign_lhs (stmt))
+		    && is_simple_operand (loop, stmt, gimple_assign_rhs1 (stmt))
+		    && is_simple_operand (loop, stmt, gimple_assign_rhs2 (stmt)));
+
+	  case GIMPLE_INVALID_RHS:
+	    gcc_unreachable ();
+	  default:
+	    gcc_unreachable ();
+	  }
+      }
+
+    case GIMPLE_CALL:
+      {
+	size_t i;
+	size_t n = gimple_call_num_args (stmt);
+
+	for (i = 0; i < n; i++)
+	  {
+	    tree arg = gimple_call_arg (stmt, i);
+
+	    if (!(stmt_simple_memref_for_scop_p (loop, stmt, gimple_assign_lhs (stmt))

please factor the lhs handling out of the loop.

+		  && stmt_simple_memref_for_scop_p (loop, stmt, arg)))
+	      return false;
+	  }
+
+	return true;
+      }
+
+    default:
+      /* These nodes cut a new scope.  */
+      return false;
+    }
+
+  return false;
+}


+#define GBB_UNKNOWN 0
+#define GBB_LOOP_SING_EXIT_HEADER 1
+#define GBB_LOOP_MULT_EXIT_HEADER 2
+#define GBB_LOOP_EXIT 3
+#define GBB_COND_HEADER 5
+#define GBB_SIMPLE 6
+#define GBB_LAST 7


Use an enum.


+/* Check if 'exit_bb' is a bb, which follows a loop exit edge.
+   ???  Move to cfgloop.c  */ 

         ^^^^^^^^^^
as I said in the last review.

+static bool
+is_loop_exit (struct loop *loop, basic_block exit_bb)
+{
+  int i;
+  VEC (edge, heap) *exits;
+  edge e;
+  bool is_exit = false;
+
+  if (loop_outer (loop) == NULL)
+    return false;
+ 
+  exits = get_loop_exit_edges (loop);
+  
+  for (i = 0; VEC_iterate (edge, exits, i, e); i++)
+    if (e->dest == exit_bb)
+      is_exit = true;
+
+  VEC_free (edge, heap, exits);
+
+  return is_exit;
+}


+/* End SCOP with basic block EXIT, and split EXIT before STMT when
+   STMT is non NULL.  */
+
+static void
+end_scop (scop_p scop, basic_block exit, basic_block *last, gimple stmt)
+{
+  /* XXX: Disable bb splitting, contains a bug (SIGSEGV in polyhedron
+     aermod.f90).  */

Is this still the case?


+/* Gather the basic blocks belonging to the SCOP.  */
+
+static void
+build_scop_bbs (scop_p scop)
+{
+  basic_block *stack = XNEWVEC (basic_block, n_basic_blocks + 1);
+  sbitmap visited = sbitmap_alloc (last_basic_block);
+  int sp = 0;
+
+  sbitmap_zero (visited);
+  stack[sp++] = SCOP_ENTRY (scop);
+
+  while (sp)
+    {
+      basic_block bb = stack[--sp];
+      int depth = loop_depth (bb->loop_father);
+      int num = bb->loop_father->num;
+      edge_iterator ei;
+      edge e;
+
+      /* Scop's exit is not in the scop.  Exclude also bbs, which are
+	 dominated by the SCoP exit.  These are e.g. loop latches.  */
+      if (TEST_BIT (visited, bb->index)
+	  || dominated_by_p (CDI_DOMINATORS, bb, SCOP_EXIT (scop))
+	  /* Every block in the scop is dominated by scop's entry.  */
+	  || !dominated_by_p (CDI_DOMINATORS, bb, SCOP_ENTRY (scop))
+	  /* Every block in the scop is postdominated by scop's exit.  */
+	  || (bb != ENTRY_BLOCK_PTR 
+	      && !dominated_by_p (CDI_POST_DOMINATORS, bb, SCOP_EXIT (scop))))
+	continue;
+
+      build_graphite_bb (scop, bb);
+      SET_BIT (visited, bb->index);
+
+      /* First push the blocks that have to be processed last.  */
+      FOR_EACH_EDGE (e, ei, bb->succs)
+	if (! TEST_BIT (visited, e->dest->index)
+	    && (int) loop_depth (e->dest->loop_father) < depth)
+	  stack[sp++] = e->dest;
+
+      FOR_EACH_EDGE (e, ei, bb->succs)
+	if (! TEST_BIT (visited, e->dest->index)
+	    && (int) loop_depth (e->dest->loop_father) == depth
+	    && e->dest->loop_father->num != num)
+	  stack[sp++] = e->dest;
+
+      FOR_EACH_EDGE (e, ei, bb->succs)
+	if (! TEST_BIT (visited, e->dest->index)
+	    && (int) loop_depth (e->dest->loop_father) == depth
+	    && e->dest->loop_father->num == num
+	    && EDGE_COUNT (e->dest->preds) > 1)
+	  stack[sp++] = e->dest;
+
+      FOR_EACH_EDGE (e, ei, bb->succs)
+	if (! TEST_BIT (visited, e->dest->index)
+	    && (int) loop_depth (e->dest->loop_father) == depth
+	    && e->dest->loop_father->num == num
+	    && EDGE_COUNT (e->dest->preds) == 1)
+	  stack[sp++] = e->dest;
+
+      FOR_EACH_EDGE (e, ei, bb->succs)
+	if (! TEST_BIT (visited, e->dest->index)
+	    && (int) loop_depth (e->dest->loop_father) > depth)
+	  stack[sp++] = e->dest;

I wonder why it is necessary to walk over all succs multiple times
and why it is not possible to just combine them.  It looks like
you might end up putting a dest on the stack multiple times.


+/* Record LOOP as occuring in SCOP.  */
+
+static void
+scop_record_loop (scop_p scop, struct loop *loop)
+{
+  if (!bitmap_bit_p (SCOP_LOOPS (scop), loop->num))

It helps readability and indent level if you write such early outs as

   if (bitmap_bit_p (SCOP_LOOPS (scop), loop->num))
     return;


+/* Build the loop nests contained in SCOP.  */
+
+static void
+build_scop_loop_nests (scop_p scop)
+{
+  unsigned i;
+  graphite_bb_p gb;
+  struct loop *loop0, *loop1;
+
+  for (i = 0; VEC_iterate (graphite_bb_p, SCOP_BBS (scop), i, gb); i++)
+    {
+      struct loop *loop = gbb_loop (gb);
+
+      /* Only add loops, if they are completely contained in the SCoP.  */
+      if (loop->header == GBB_BB (gb) && bb_in_scop_p (loop->latch, scop))

Align multiple tests vertically according to the coding conventions
(also in other places).

+      if (loop->header == GBB_BB (gb)
+          && bb_in_scop_p (loop->latch, scop))


+/* Build dynamic schedules for all the BBs. */
+
+static void
+build_scop_dynamic_schedules (scop_p scop)
+{
+  int i, dim, loop_num, row, col;
+  graphite_bb_p gb;
+
+

only one blank line.

+  for (i = 0; VEC_iterate (graphite_bb_p, SCOP_BBS (scop), i, gb); i++)
+    {
+      loop_num = GBB_BB (gb) -> loop_father -> num; 
+      if (loop_num != 0)
+        {
+          dim = loop_iteration_vector_dim (loop_num, scop);
+          GBB_DYNAMIC_SCHEDULE (gb) = cloog_matrix_alloc (dim, dim);
+          for (row = 0; row < GBB_DYNAMIC_SCHEDULE (gb)->NbRows; row++)
+            for (col = 0; col < GBB_DYNAMIC_SCHEDULE (gb)->NbColumns; col++)
+              if (row == col)
+                {
+                  value_init (GBB_DYNAMIC_SCHEDULE (gb)->p[row][col]);
+                  value_set_si (GBB_DYNAMIC_SCHEDULE (gb)->p[row][col], 1);
+                }
+              else  
+                {
+                  value_init (GBB_DYNAMIC_SCHEDULE (gb)->p[row][col]);
+                  value_set_si (GBB_DYNAMIC_SCHEDULE (gb)->p[row][col], 0);
+                }

Better do

+              value_init (GBB_DYNAMIC_SCHEDULE (gb)->p[row][col]);
+              value_set_si (GBB_DYNAMIC_SCHEDULE (gb)->p[row][col], row == col);

?


+/* Get the index for parameter VAR in SCOP.  */
+
+static int
+param_index (tree var, scop_p scop)
+{
+  int i;
+  name_tree p;
+  name_tree nvar;
+
+  /* If this is true why not use simply SSA_NAME_VERSION as index?  */
+  gcc_assert (TREE_CODE (var) == SSA_NAME);

I see I suggested the name-trick (see first review) during the last
review as well ... (I am actually surprised that all of my past
comments seem to be not addressed)

+/* Scan EXPR and translate it to an inequality vector INEQ that will
+   be inserted in the constraint domain matrix C at row R.  N is
+   the number of columns for loop iterators in C.  */
+
+static void
+scan_tree_for_params (scop_p s, tree e, CloogMatrix *c, int r, Value k)
+{
+  int cst_col, param_col;
+
+  switch (TREE_CODE (e))
+    {
+    case POLYNOMIAL_CHREC:
+      {
+	tree left = CHREC_LEFT (e);
+	tree right = CHREC_RIGHT (e);
+	int var = CHREC_VARIABLE (e);
+
+	if (TREE_CODE (right) != INTEGER_CST)
+	  return;
+
+	if (c)
+	  {
+	    int var_idx = loop_iteration_vector_dim (var, s);
+	    value_init (c->p[r][var_idx]);
+	    value_set_si (c->p[r][var_idx], int_cst_value (right));
+	  }
+
+	switch (TREE_CODE (left))
+	  {
+	  case POLYNOMIAL_CHREC:
+	    scan_tree_for_params (s, left, c, r, k);
+            return;
+
+	  case INTEGER_CST:
+	    /* Constant part.  */
+	    if (c)
+	      {
+		cst_col = c->NbColumns - 1;
+		value_init (c->p[r][cst_col]);
+		value_set_si (c->p[r][cst_col], int_cst_value (left));
+	      }
+	    return;
+
+	  default:
+	    scan_tree_for_params (s, left, c, r, k);
+	    return;
+	  }
+      }
+      break;
+
+    case MULT_EXPR:
+      if (chrec_contains_symbols (TREE_OPERAND (e, 0)))
+	{
+	  Value val;
+
+	  gcc_assert (host_integerp (TREE_OPERAND (e, 1), 0));
+
+	  value_init (val);
+	  value_set_si (val, int_cst_value (TREE_OPERAND (e, 1)));
+	  value_multiply (k, k, val);
+	  value_clear (val);
+	  scan_tree_for_params (s, TREE_OPERAND (e, 0), c, r, k);
+	}
+      else
+	{
+	  Value val;
+
+	  gcc_assert (host_integerp (TREE_OPERAND (e, 0), 0));
+
+	  value_init (val);
+	  value_set_si (val, int_cst_value (TREE_OPERAND (e, 0)));
+	  value_multiply (k, k, val);
+	  value_clear (val);
+	  scan_tree_for_params (s, TREE_OPERAND (e, 1), c, r, k);
+	}
+      break;
+
+    case PLUS_EXPR:
+      scan_tree_for_params (s, TREE_OPERAND (e, 0), c, r, k);
+      scan_tree_for_params (s, TREE_OPERAND (e, 1), c, r, k);
+      break;
+
+    case MINUS_EXPR:
+      scan_tree_for_params (s, TREE_OPERAND (e, 0), c, r, k);
+      value_oppose (k, k);
+      scan_tree_for_params (s, TREE_OPERAND (e, 1), c, r, k);
+      break;
+
+    case SSA_NAME:
+      param_col = param_index (e, s);
+
+      if (c)
+	{
+          param_col += c->NbColumns - scop_nb_params (s) - 1;
+	  value_init (c->p[r][param_col]);
+	  value_assign (c->p[r][param_col], k);
+	}
+      break;
+
+    case INTEGER_CST:
+      if (c)
+	{
+	  cst_col = c->NbColumns - 1;
+	  value_init (c->p[r][cst_col]);
+	  value_set_si (c->p[r][cst_col], int_cst_value (e));
+	}
+      break;
+
+    case NOP_EXPR:
+    case CONVERT_EXPR:
+    case NON_LVALUE_EXPR:
+      scan_tree_for_params (s, TREE_OPERAND (e, 0), c, r, k);
+      break;
+
+    default:
+      /* ???  The default cases should be gcc_unreachable ().  */

Another past-review thing ...


+struct irp_data
+{
+  struct loop *loop;
+  scop_p scop;
+};

Needs a comment.


+/* Saves in NV the name of variable P->T.  */
+
+static void
+save_var_name (char **nv, int i, name_tree p)
+{
+  const char *name = get_name (SSA_NAME_VAR (p->t));
+
+  if (name)
+    {
+      nv[i] = XNEWVEC (char, strlen (name) + 12);
+      sprintf (nv[i], "%s_%d", name, SSA_NAME_VERSION (p->t));
+    }
+  else
+    {
+      nv[i] = XNEWVEC (char, 12);
+      sprintf (nv[i], "T_%d", SSA_NAME_VERSION (p->t));
+    }
+
+  p->name = nv[i];
+}

And you already _do_ use a "fancy" naming scheme!


+/* Returns a graphite_bb from BB.  */
+
+static graphite_bb_p
+graphite_bb_from_bb (basic_block bb, scop_p scop)
+{
+  graphite_bb_p gb;
+  int i;
+
+  for (i = 0; VEC_iterate (graphite_bb_p, SCOP_BBS (scop), i, gb); i++)
+    if (GBB_BB (gb) == bb)
+      return gb;

This looks line a lookup that uses linear time.  You can either use
bb->aux or sort SCOP_BBS after the basic-block index.

+/* Converts a GMP constant value to a tree and returns it.  */
+
+static tree
+gmp_cst_to_tree (Value v)
+{
+  return build_int_cst (integer_type_node, value_get_si (v));
+}
+
+/* Returns the tree variable from the name that it was given in Cloog
+   representation.  
+
+   FIXME: This is a hack, and Cloog should be fixed to not work with
+   variable names represented as "char *string", but with void
+   pointers that could be casted back to a tree.  The only problem in
+   doing that is that Cloog's pretty printer still assumes that
+   variable names are char *strings.  The solution would be to have a
+   function pointer for pretty-printing that can be redirected to be
+   print_generic_stmt in our case, or fprintf by default.
+   ???  Too ugly to live.  */

         ^^^^^^^^^^^

still the case.  If there is no progress with the above use fancy
names.

+static tree
+clast_name_to_gcc (const char *name, VEC (name_tree, heap) *params, 
+		   loop_iv_stack ivstack)
+{
+  int i;
+  name_tree t;
+  tree iv;
+
+  for (i = 0; VEC_iterate (name_tree, params, i, t); i++)
+    if (!strcmp (name, t->name))
+      return t->t;
+
+  iv = loop_iv_stack_get_iv_from_name (ivstack, name);
+  if (iv)
+    return iv;
+
+  gcc_unreachable ();
+  return NULL_TREE;
+}


+/* Remove all the edges from BB.  */
+
+static void
+remove_all_edges (basic_block bb, edge construction_edge)

What is construction_edge, and how is this still "remove_all_edges"?

+{
+  edge e;
+  edge_iterator ei;
+
+  for (ei = ei_start (bb->succs); (e = ei_safe_edge (ei)); )
+    {
+      if (e != construction_edge)
+	{
+	  remove_edge (e);
+	  e = ei_safe_edge (ei);
+	}
+      else
+	ei_next (&ei);
+    }
+
+  for (ei = ei_start (bb->preds); (e = ei_safe_edge (ei));)
+    {
+      if (e != construction_edge)
+	{
+	  remove_edge (e);
+	  e = ei_safe_edge (ei);
+	}
+      else
+	ei_next (&ei);
+    }
+}

+/* Remove COND_EXPRs from BB.  */
+
+static void
+remove_cond_exprs (basic_block bb)
+{
+  gimple_stmt_iterator gsi;
+
+  for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi);)
+    {
+      gimple stmt = gsi_stmt (gsi);
+
+      if (gimple_code (stmt) == GIMPLE_COND)
+	gsi_remove (&gsi, true);
+      else
+	gsi_next (&gsi);
+    }

The only cond_expr in a BB is at the last statement in the BB, so
simply do

   if (GIMPLE_CODE (last_stmt (bb)) == GIMPLE_COND)
     ...

but of course you need to fix outgoing edges.

+/* Return a vector of all the virtual phi nodes in the current
+   function.  */
+ 
+static VEC (gimple, heap) *
+collect_virtual_phis (void)     
+{
+  gimple_stmt_iterator si;
+  gimple_vec phis = VEC_alloc (gimple, heap, 3);
+  basic_block bb;
+
+  FOR_EACH_BB (bb) 
+    for (si = gsi_start_phis (bb); !gsi_end_p(si); gsi_next (&si))
+      {
+	gimple phi = gsi_stmt (si);
+
+	/* The phis we moved will have 0 arguments because the
+	   original edges were removed.  */
+	if (gimple_phi_num_args (phi) == 0)
+	  VEC_safe_push (gimple, heap, phis, phi);
+      }
+
+  /* Deallocate if we did not find any.  */
+  if (VEC_length (gimple, phis) == 0)
+    {
+      VEC_free (gimple, heap, phis);
+      phis = NULL;
+    }
+
+  return phis;
+}
+
+/* Find a virtual definition for variable VAR in BB.  */
+
+static tree
+find_vdef_for_var_in_bb (basic_block bb, tree var)
+{
+  gimple_stmt_iterator gsi;
+  gimple phi;
+  def_operand_p def_var;
+  vuse_vec_p vv;
+  ssa_op_iter op_iter;
+
+  for (gsi = gsi_last_bb (bb); !gsi_end_p (gsi); gsi_prev (&gsi))
+    FOR_EACH_SSA_VDEF_OPERAND (def_var, vv, gsi_stmt (gsi), op_iter)
+      if (SSA_NAME_VAR (*def_var) == var)
+	return *def_var;
+
+  for (gsi = gsi_start_phis (bb); !gsi_end_p(gsi); gsi_next (&gsi))
+    {
+      phi = gsi_stmt (gsi);
+      if (SSA_NAME_VAR (PHI_RESULT (phi)) == var)
+	return PHI_RESULT (phi);
+    }
+
+  return NULL;
+}
+
+/* Recursive helper.  */
+
+static tree
+find_vdef_for_var_1 (basic_block bb, struct pointer_set_t *visited, tree var)
+{
+  tree result = NULL;
+  edge_iterator ei;
+  edge pred_edge;
+
+  if (pointer_set_contains (visited, bb))
+    return NULL;
+
+  pointer_set_insert (visited, bb);
+  result = find_vdef_for_var_in_bb (bb, var);
+
+  if (!result)
+    FOR_EACH_EDGE (pred_edge, ei, bb->preds)
+      if (!result)
+	result = find_vdef_for_var_1 (pred_edge->src, visited, var);
+
+  return result;
+}
+
+/* Finds a virtual definition for variable VAR.  */
+
+static tree
+find_vdef_for_var (basic_block bb, tree var)
+{
+  struct pointer_set_t *visited = pointer_set_create ();
+  tree def = find_vdef_for_var_1 (bb, visited, var);
+
+  pointer_set_destroy (visited);
+  return def;
+}
+
+/* Update the virtual phis after loop bodies are moved to new
+   loops.  */
+
+static void
+patch_phis_for_virtual_defs (void)
+{
+  int i;
+  gimple phi;
+  VEC (gimple, heap) *virtual_phis = collect_virtual_phis ();
+  
+  for (i = 0; VEC_iterate (gimple, virtual_phis, i, phi); i++)
+    {
+      basic_block bb = gimple_bb (phi);
+      edge_iterator ei;
+      edge pred_edge;
+      gimple_stmt_iterator gsi;
+      gimple new_phi;
+
+      /* FIXME: Should PHI_RESULT become gimple_phi_result?  */
+      tree phi_result = PHI_RESULT (phi);
+
+      tree var = SSA_NAME_VAR (phi_result);
+
+      new_phi = create_phi_node (phi_result, bb);
+      SSA_NAME_DEF_STMT (phi_result) = new_phi;
+
+      FOR_EACH_EDGE (pred_edge, ei, bb->preds)
+	{
+	  tree def = find_vdef_for_var (pred_edge->src, var);
+
+	  if (def)
+	    add_phi_arg (new_phi, def, pred_edge);
+	  else
+	    add_phi_arg (new_phi, gimple_default_def (cfun, var), pred_edge);
+	}
+
+      gsi = gsi_for_stmt (phi);
+      remove_phi_node (&gsi, false);
+    }
+}

Why are you doing all this virtual-phi stuff?  Why are you not just
renaming all symbols that appear in virtual operands?


+/* Scan the loops and remove the ones that have been marked for
+   removal.  */
+
+static void
+remove_dead_loops (void)
+{
+  struct loop *loop, *ploop;
+  loop_iterator li;
+
+  FOR_EACH_LOOP (li, loop, LI_FROM_INNERMOST)
+    {
+      if (loop->header)
+	continue;
+
+      while (loop->inner)
+	{
+	  ploop = loop->inner;
+	  flow_loop_tree_node_remove (ploop);
+	  flow_loop_tree_node_add (loop_outer (loop), ploop);
+	}
+
+      /* Remove the loop and free its data.  */
+      delete_loop (loop);
+    }

I don't see how loops are not marked.  This seems to remove _all_
loops.  The while (loop->inner) loop must be dead - how can an
outer loop be deleted without deleting its inner loops??


+/* GIMPLE Loop Generator: generates loops from STMT in GIMPLE form for
+   the given SCOP.  */
+
+static void
+gloog (scop_p scop, struct clast_stmt *stmt)
+{
+  edge new_scop_exit_edge = NULL;
+  basic_block scop_exit = SCOP_EXIT (scop);
+  VEC (name_tree, heap) *ivstack = VEC_alloc (name_tree, heap, 10);
+  edge construction_edge = NULL;
+  VEC (basic_block,heap) *bbs = VEC_alloc (basic_block, heap, 10);
+  basic_block old_scop_exit_idom = get_immediate_dominator (CDI_DOMINATORS, scop_exit);
+
+  if (!can_generate_code (stmt) || !can_generate_for_scop (scop))
+    {
+      cloog_clast_free (stmt);
+      return;
+    }      
+
+  construction_edge = get_construction_edge (scop);
+  if (!construction_edge)
+    {
+      cloog_clast_free (stmt);
+      return;
+    }
+
+  gather_blocks_in_sese_region (SCOP_ENTRY (scop), SCOP_EXIT (scop), &bbs);
+  new_scop_exit_edge = translate_clast (scop, SCOP_ENTRY(scop)->loop_father, stmt, construction_edge, &ivstack);
+  redirect_edge_succ (new_scop_exit_edge, scop_exit);
+  cloog_clast_free (stmt);
+
+  if (new_scop_exit_edge->dest == EXIT_BLOCK_PTR)
+    new_scop_exit_edge->flags = 0;
+
+  remove_edges_around_useless_blocks (scop, bbs);
+ 
+  VEC_free (basic_block, heap, bbs);
+
+  patch_phis_for_virtual_defs ();
+
+  find_unreachable_blocks ();
+  if (old_scop_exit_idom 
+      && !(old_scop_exit_idom->flags & BB_REACHABLE))
+    set_immediate_dominator (CDI_DOMINATORS,
+			     new_scop_exit_edge->dest,
+			     new_scop_exit_edge->src);
+
+  delete_unreachable_blocks();
+  mark_old_loops (scop);
+  remove_dead_loops ();

+  verify_loop_structure ();

This should be inside #ifdef ENABLE_CHECKING

+  calculate_dominance_info (CDI_DOMINATORS);
+  calculate_dominance_info (CDI_POST_DOMINATORS); 

This is a no-op unless you free dominator information before.  Maybe
you want to use verify_dominators () (inside ENABLE_CHECKING)?

+  rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa);
+  update_ssa (TODO_update_ssa);
+  verify_ssa (false);

Inside #ifdef ENABLE_CHECKING please.

+  estimate_bb_frequencies ();

No - this will destroy any valid frequency information.  If you want
to do this for your new loops you need to properly split and expose
the machinery for loops or basic-blocks.  Re-computing it for the
whole function is just wrong.


+static void
+graphite_trans_bb_move_loop (graphite_bb_p gb, int loop,
+			     int new_loop_pos)
+{
+  CloogMatrix *domain = GBB_DOMAIN (gb);
+  scop_p scop = GBB_SCOP (gb);
+  int row, j;
+  loop_p tmp_loop_p;
+
+  gcc_assert (loop < gbb_nb_loops (gb));
+  gcc_assert (new_loop_pos < gbb_nb_loops (gb));
+
+

only one blank like.  (please check elsewhere as well)

+  swap_loop_mapped_depth (scop, gb, loop, new_loop_pos);
+
+  /* Update LOOPS vector.  */
+  tmp_loop_p = VEC_index (loop_p, GBB_LOOPS (gb), loop);
+  VEC_ordered_remove (loop_p, GBB_LOOPS (gb), loop);
+  VEC_safe_insert (loop_p, heap, GBB_LOOPS (gb), new_loop_pos, tmp_loop_p);
+
+
+  /* Move the domain columns.  */
+  if (loop < new_loop_pos)
+    for (row = 0; row < domain->NbRows; row++)
+      {
+        Value tmp;
+        value_init (tmp);
+        value_assign (tmp, domain->p[row][loop + 1]);
+   
+        for (j = loop ; j < new_loop_pos - 1; j++)
+          value_assign (domain->p[row][j + 1], domain->p[row][j + 2]);
+
+        value_assign (domain->p[row][new_loop_pos], tmp);
+        value_clear (tmp);
+      }
+  else
+    for (row = 0; row < domain->NbRows; row++)
+      {
+        Value tmp;
+        value_init (tmp);
+        value_assign (tmp, domain->p[row][loop + 1]);
+
+        for (j = loop ; j > new_loop_pos; j--)
+          value_assign (domain->p[row][j + 1], domain->p[row][j]);
+     
+        value_assign (domain->p[row][new_loop_pos + 1], tmp);
+        value_clear (tmp);
+      }

looks like two loops with lots of common code - please factor it out.


+/* Strip mines the loop of BB at the position LOOP_DEPTH with STRIDE.
+   Always valid, but not always a performance improvement.  */
+  
+static void
+graphite_trans_bb_strip_mine (graphite_bb_p gb, int loop_depth, int stride)
+{
+  int row, col;
+  scop_p scop = GBB_SCOP (gb);
+
+  CloogMatrix *domain = GBB_DOMAIN (gb);
+  CloogMatrix *new_domain = cloog_matrix_alloc (domain->NbRows + 3,
+                                                domain->NbColumns + 1);   
+
+  int col_loop_old = loop_depth + 2; 
+  int col_loop_strip = col_loop_old - 1;
+  /*int col_loc = col_loop_old + 1; */

Remove commented out code.

+    GBB_STATIC_SCHEDULE (gb) = new_schedule;
+  
+    /* XXX: Free  old schedule.  How?  */

This needs to be fixed.



Index: gcc/graphite.h
===================================================================

--- gcc/graphite.h	(.../trunk)	(revision 0)
+++ gcc/graphite.h	(.../branches/graphite)	(revision 138569)
@@ -0,0 +1,523 @@

+/* Returns the corresponding loop iterator index for a gimple loop.  */
+
+static inline int
+gbb_loop_index (graphite_bb_p gb, loop_p loop)
+{
+  int i;
+  loop_p l;
+
+  for (i = 0; VEC_iterate (loop_p, GBB_LOOPS (gb), i, l); i++)
+    if (loop == l)
+      return i;

Linear walk ...


+/* A SCoP is a Static Control Part of the program, simple enough to be
+   represented in polyhedral form.  */
+struct scop
+{
+  /* The entry bb dominates all the bbs of the scop.  The exit bb
+     post-dominates all the bbs of the scop.  The exit bb
+     potentially contains non affine data accesses, side effect
+     statements or difficult constructs, and thus is not
+     considered part of the scop, but just boundary.  The entry bb is
+     considered part of the scop.  */
+  basic_block entry, exit;
+
+  /* All the basic blocks in the scope.  They have extra information
+     attached to them, in the graphite_bb structure.  */
+  VEC (graphite_bb_p, heap) *bbs;
+
+  /* Set for a basic block index when it belongs to this scope.  */
+  bitmap bbs_b;
+
+  lambda_vector static_schedule;
+
+  /* Parameters used within the SCOP.  */
+  VEC (name_tree, heap) *params;
+
+  /* A collection of old induction variables*/ 
+  VEC (name_tree, heap) *old_ivs;
+
+  /* Loops completely contained in the scop.  */
+  bitmap loops;
+  VEC (loop_p, heap) *loop_nest;
+
+  /* specifies for loop num in loops which corresponding loop depth that num is mapped to
+     in the transformed program */
+  graphite_loops_mapping loops_mapping;
+
+  /* ???  It looks like a global mapping loop_id -> cloog_loop would work.  */
+  htab_t loop2cloog_loop;

So - ??? (from last review) - does it work?

+static int
+scop_max_loop_depth (scop_p scop)
+{
+  int i;
+  graphite_bb_p gbb;
+  int max_nb_loops = 0;
+
+  for (i = 0; VEC_iterate (graphite_bb_p, SCOP_BBS (scop), i, gbb); i++) 
+    {    
+      int nb_loops = gbb_nb_loops (gbb);
+      if (max_nb_loops < nb_loops)
+        max_nb_loops = nb_loops;
+    }    
+
+  return max_nb_loops;
+}

This is not a suitable inline function - move this to graphite.c.


+/* Returns the index of LOOP in the domain matrix for the SCOP.  */
+
+static inline int
+scop_loop_index (scop_p scop, struct loop *loop)
+{
+  unsigned i;
+  struct loop *l;
+
+  gcc_assert (scop_contains_loop (scop, loop));
+
+  for (i = 0; VEC_iterate (loop_p, SCOP_LOOP_NEST (scop), i, l); i++)
+    if (l == loop)
+      return i;

Linear walk again ...