Graphite review, graphite parts [1/n]
Richard Guenther
rguenther@suse.de
Mon Aug 11 13:16:00 GMT 2008
On Thu, 7 Aug 2008, Richard Guenther wrote:
>
> This is part one (from unknown N) of the graphite parts review
> http://cri.ensmp.fr/people/pop/gcc/1142_graphite.diff
>
> Find comments inline.
The rest of the graphite.[ch] review follows. It looks like my
comments from the previous review are not addressed at all ...
Richard.
+/* Return true if we can create a data-ref for OP in STMT. */
+
+static bool
+stmt_simple_memref_for_scop_p (struct loop *loop, gimple stmt, tree op)
+{
+ data_reference_p dr = create_data_ref (loop, op, stmt, true);
+ if (!dr)
+ return false;
+
+ free_data_ref (dr);
+ return true;
+}
This asks for a new helper in tree-data-ref.c that avoids creating the
data-ref in the first place. It also looks like that create_data_ref
never will return NULL, so the function always returns true, which
can't be right.
+/* Return true if the operand OP is simple. */
+
+static bool
+is_simple_operand (loop_p loop, gimple stmt, tree op)
+{
+ return !((handled_component_p (op) || INDIRECT_REF_P (op))
+ && !stmt_simple_memref_for_scop_p (loop, stmt, op));
+}
so loads from plain variables do not need checking for stmt_simple_memref_for_scop_p?
+/* Return true only when STMT is simple enough for being handled by Graphite.
+ This depends on outermost_loop, as the parametetrs are initialized relativ
+ to this loop. */
+
+static bool
+stmt_simple_for_scop_p (struct loop *outermost_loop, gimple stmt)
+{
+ basic_block bb = gimple_bb (stmt);
+ struct loop *loop = bb->loop_father;
+
+ /* ASM_EXPR and CALL_EXPR may embed arbitrary side effects.
+ Calls have side-effects, except those to const or pure
+ functions. */
+ if (gimple_has_volatile_ops (stmt)
+ || (gimple_code (stmt) == GIMPLE_CALL
+ && !(gimple_call_flags (stmt) & (ECF_CONST | ECF_PURE)))
+ || (gimple_code (stmt) == GIMPLE_ASM))
+ return false;
+
+ switch (gimple_code (stmt))
+ {
+ case GIMPLE_RETURN:
+ case GIMPLE_LABEL:
+ return true;
+
+ case GIMPLE_COND:
+ {
+ tree op;
+ ssa_op_iter op_iter;
+ enum tree_code code = gimple_cond_code (stmt);
+
+ /* We can only handle this kind of conditional expressions.
+ For inequalities like "if (i != 3 * k)" we need unions of
+ polyhedrons. Expressions like "if (a)" or "if (a == 15)" need
+ them for the else branch. */
+ if (!(code == LT_EXPR || code == GT_EXPR
+ || code == LE_EXPR || GE_EXPR))
+ return false;
+
+ if (!outermost_loop)
+ return false;
+
+ FOR_EACH_SSA_TREE_OPERAND (op, stmt, op_iter, SSA_OP_ALL_USES)
+ if (!loop_affine_expr (outermost_loop, loop, op))
+ return false;
+
+ return true;
+ }
+
+ case GIMPLE_ASSIGN:
+ {
+ enum tree_code code = gimple_assign_rhs_code (stmt);
+
+ switch (get_gimple_rhs_class (code))
+ {
+ case GIMPLE_UNARY_RHS:
+ case GIMPLE_SINGLE_RHS:
+ return (is_simple_operand (loop, stmt, gimple_assign_lhs (stmt))
+ && is_simple_operand (loop, stmt, gimple_assign_rhs1 (stmt)));
This will allow structure copies as simple - is this intended?
+ case GIMPLE_BINARY_RHS:
+ return (is_simple_operand (loop, stmt, gimple_assign_lhs (stmt))
+ && is_simple_operand (loop, stmt, gimple_assign_rhs1 (stmt))
+ && is_simple_operand (loop, stmt, gimple_assign_rhs2 (stmt)));
+
+ case GIMPLE_INVALID_RHS:
+ gcc_unreachable ();
+ default:
+ gcc_unreachable ();
+ }
+ }
+
+ case GIMPLE_CALL:
+ {
+ size_t i;
+ size_t n = gimple_call_num_args (stmt);
+
+ for (i = 0; i < n; i++)
+ {
+ tree arg = gimple_call_arg (stmt, i);
+
+ if (!(stmt_simple_memref_for_scop_p (loop, stmt, gimple_assign_lhs (stmt))
please factor the lhs handling out of the loop.
+ && stmt_simple_memref_for_scop_p (loop, stmt, arg)))
+ return false;
+ }
+
+ return true;
+ }
+
+ default:
+ /* These nodes cut a new scope. */
+ return false;
+ }
+
+ return false;
+}
+#define GBB_UNKNOWN 0
+#define GBB_LOOP_SING_EXIT_HEADER 1
+#define GBB_LOOP_MULT_EXIT_HEADER 2
+#define GBB_LOOP_EXIT 3
+#define GBB_COND_HEADER 5
+#define GBB_SIMPLE 6
+#define GBB_LAST 7
Use an enum.
+/* Check if 'exit_bb' is a bb, which follows a loop exit edge.
+ ??? Move to cfgloop.c */
^^^^^^^^^^
as I said in the last review.
+static bool
+is_loop_exit (struct loop *loop, basic_block exit_bb)
+{
+ int i;
+ VEC (edge, heap) *exits;
+ edge e;
+ bool is_exit = false;
+
+ if (loop_outer (loop) == NULL)
+ return false;
+
+ exits = get_loop_exit_edges (loop);
+
+ for (i = 0; VEC_iterate (edge, exits, i, e); i++)
+ if (e->dest == exit_bb)
+ is_exit = true;
+
+ VEC_free (edge, heap, exits);
+
+ return is_exit;
+}
+/* End SCOP with basic block EXIT, and split EXIT before STMT when
+ STMT is non NULL. */
+
+static void
+end_scop (scop_p scop, basic_block exit, basic_block *last, gimple stmt)
+{
+ /* XXX: Disable bb splitting, contains a bug (SIGSEGV in polyhedron
+ aermod.f90). */
Is this still the case?
+/* Gather the basic blocks belonging to the SCOP. */
+
+static void
+build_scop_bbs (scop_p scop)
+{
+ basic_block *stack = XNEWVEC (basic_block, n_basic_blocks + 1);
+ sbitmap visited = sbitmap_alloc (last_basic_block);
+ int sp = 0;
+
+ sbitmap_zero (visited);
+ stack[sp++] = SCOP_ENTRY (scop);
+
+ while (sp)
+ {
+ basic_block bb = stack[--sp];
+ int depth = loop_depth (bb->loop_father);
+ int num = bb->loop_father->num;
+ edge_iterator ei;
+ edge e;
+
+ /* Scop's exit is not in the scop. Exclude also bbs, which are
+ dominated by the SCoP exit. These are e.g. loop latches. */
+ if (TEST_BIT (visited, bb->index)
+ || dominated_by_p (CDI_DOMINATORS, bb, SCOP_EXIT (scop))
+ /* Every block in the scop is dominated by scop's entry. */
+ || !dominated_by_p (CDI_DOMINATORS, bb, SCOP_ENTRY (scop))
+ /* Every block in the scop is postdominated by scop's exit. */
+ || (bb != ENTRY_BLOCK_PTR
+ && !dominated_by_p (CDI_POST_DOMINATORS, bb, SCOP_EXIT (scop))))
+ continue;
+
+ build_graphite_bb (scop, bb);
+ SET_BIT (visited, bb->index);
+
+ /* First push the blocks that have to be processed last. */
+ FOR_EACH_EDGE (e, ei, bb->succs)
+ if (! TEST_BIT (visited, e->dest->index)
+ && (int) loop_depth (e->dest->loop_father) < depth)
+ stack[sp++] = e->dest;
+
+ FOR_EACH_EDGE (e, ei, bb->succs)
+ if (! TEST_BIT (visited, e->dest->index)
+ && (int) loop_depth (e->dest->loop_father) == depth
+ && e->dest->loop_father->num != num)
+ stack[sp++] = e->dest;
+
+ FOR_EACH_EDGE (e, ei, bb->succs)
+ if (! TEST_BIT (visited, e->dest->index)
+ && (int) loop_depth (e->dest->loop_father) == depth
+ && e->dest->loop_father->num == num
+ && EDGE_COUNT (e->dest->preds) > 1)
+ stack[sp++] = e->dest;
+
+ FOR_EACH_EDGE (e, ei, bb->succs)
+ if (! TEST_BIT (visited, e->dest->index)
+ && (int) loop_depth (e->dest->loop_father) == depth
+ && e->dest->loop_father->num == num
+ && EDGE_COUNT (e->dest->preds) == 1)
+ stack[sp++] = e->dest;
+
+ FOR_EACH_EDGE (e, ei, bb->succs)
+ if (! TEST_BIT (visited, e->dest->index)
+ && (int) loop_depth (e->dest->loop_father) > depth)
+ stack[sp++] = e->dest;
I wonder why it is necessary to walk over all succs multiple times
and why it is not possible to just combine them. It looks like
you might end up putting a dest on the stack multiple times.
+/* Record LOOP as occuring in SCOP. */
+
+static void
+scop_record_loop (scop_p scop, struct loop *loop)
+{
+ if (!bitmap_bit_p (SCOP_LOOPS (scop), loop->num))
It helps readability and indent level if you write such early outs as
if (bitmap_bit_p (SCOP_LOOPS (scop), loop->num))
return;
+/* Build the loop nests contained in SCOP. */
+
+static void
+build_scop_loop_nests (scop_p scop)
+{
+ unsigned i;
+ graphite_bb_p gb;
+ struct loop *loop0, *loop1;
+
+ for (i = 0; VEC_iterate (graphite_bb_p, SCOP_BBS (scop), i, gb); i++)
+ {
+ struct loop *loop = gbb_loop (gb);
+
+ /* Only add loops, if they are completely contained in the SCoP. */
+ if (loop->header == GBB_BB (gb) && bb_in_scop_p (loop->latch, scop))
Align multiple tests vertically according to the coding conventions
(also in other places).
+ if (loop->header == GBB_BB (gb)
+ && bb_in_scop_p (loop->latch, scop))
+/* Build dynamic schedules for all the BBs. */
+
+static void
+build_scop_dynamic_schedules (scop_p scop)
+{
+ int i, dim, loop_num, row, col;
+ graphite_bb_p gb;
+
+
only one blank line.
+ for (i = 0; VEC_iterate (graphite_bb_p, SCOP_BBS (scop), i, gb); i++)
+ {
+ loop_num = GBB_BB (gb) -> loop_father -> num;
+ if (loop_num != 0)
+ {
+ dim = loop_iteration_vector_dim (loop_num, scop);
+ GBB_DYNAMIC_SCHEDULE (gb) = cloog_matrix_alloc (dim, dim);
+ for (row = 0; row < GBB_DYNAMIC_SCHEDULE (gb)->NbRows; row++)
+ for (col = 0; col < GBB_DYNAMIC_SCHEDULE (gb)->NbColumns; col++)
+ if (row == col)
+ {
+ value_init (GBB_DYNAMIC_SCHEDULE (gb)->p[row][col]);
+ value_set_si (GBB_DYNAMIC_SCHEDULE (gb)->p[row][col], 1);
+ }
+ else
+ {
+ value_init (GBB_DYNAMIC_SCHEDULE (gb)->p[row][col]);
+ value_set_si (GBB_DYNAMIC_SCHEDULE (gb)->p[row][col], 0);
+ }
Better do
+ value_init (GBB_DYNAMIC_SCHEDULE (gb)->p[row][col]);
+ value_set_si (GBB_DYNAMIC_SCHEDULE (gb)->p[row][col], row == col);
?
+/* Get the index for parameter VAR in SCOP. */
+
+static int
+param_index (tree var, scop_p scop)
+{
+ int i;
+ name_tree p;
+ name_tree nvar;
+
+ /* If this is true why not use simply SSA_NAME_VERSION as index? */
+ gcc_assert (TREE_CODE (var) == SSA_NAME);
I see I suggested the name-trick (see first review) during the last
review as well ... (I am actually surprised that all of my past
comments seem to be not addressed)
+/* Scan EXPR and translate it to an inequality vector INEQ that will
+ be inserted in the constraint domain matrix C at row R. N is
+ the number of columns for loop iterators in C. */
+
+static void
+scan_tree_for_params (scop_p s, tree e, CloogMatrix *c, int r, Value k)
+{
+ int cst_col, param_col;
+
+ switch (TREE_CODE (e))
+ {
+ case POLYNOMIAL_CHREC:
+ {
+ tree left = CHREC_LEFT (e);
+ tree right = CHREC_RIGHT (e);
+ int var = CHREC_VARIABLE (e);
+
+ if (TREE_CODE (right) != INTEGER_CST)
+ return;
+
+ if (c)
+ {
+ int var_idx = loop_iteration_vector_dim (var, s);
+ value_init (c->p[r][var_idx]);
+ value_set_si (c->p[r][var_idx], int_cst_value (right));
+ }
+
+ switch (TREE_CODE (left))
+ {
+ case POLYNOMIAL_CHREC:
+ scan_tree_for_params (s, left, c, r, k);
+ return;
+
+ case INTEGER_CST:
+ /* Constant part. */
+ if (c)
+ {
+ cst_col = c->NbColumns - 1;
+ value_init (c->p[r][cst_col]);
+ value_set_si (c->p[r][cst_col], int_cst_value (left));
+ }
+ return;
+
+ default:
+ scan_tree_for_params (s, left, c, r, k);
+ return;
+ }
+ }
+ break;
+
+ case MULT_EXPR:
+ if (chrec_contains_symbols (TREE_OPERAND (e, 0)))
+ {
+ Value val;
+
+ gcc_assert (host_integerp (TREE_OPERAND (e, 1), 0));
+
+ value_init (val);
+ value_set_si (val, int_cst_value (TREE_OPERAND (e, 1)));
+ value_multiply (k, k, val);
+ value_clear (val);
+ scan_tree_for_params (s, TREE_OPERAND (e, 0), c, r, k);
+ }
+ else
+ {
+ Value val;
+
+ gcc_assert (host_integerp (TREE_OPERAND (e, 0), 0));
+
+ value_init (val);
+ value_set_si (val, int_cst_value (TREE_OPERAND (e, 0)));
+ value_multiply (k, k, val);
+ value_clear (val);
+ scan_tree_for_params (s, TREE_OPERAND (e, 1), c, r, k);
+ }
+ break;
+
+ case PLUS_EXPR:
+ scan_tree_for_params (s, TREE_OPERAND (e, 0), c, r, k);
+ scan_tree_for_params (s, TREE_OPERAND (e, 1), c, r, k);
+ break;
+
+ case MINUS_EXPR:
+ scan_tree_for_params (s, TREE_OPERAND (e, 0), c, r, k);
+ value_oppose (k, k);
+ scan_tree_for_params (s, TREE_OPERAND (e, 1), c, r, k);
+ break;
+
+ case SSA_NAME:
+ param_col = param_index (e, s);
+
+ if (c)
+ {
+ param_col += c->NbColumns - scop_nb_params (s) - 1;
+ value_init (c->p[r][param_col]);
+ value_assign (c->p[r][param_col], k);
+ }
+ break;
+
+ case INTEGER_CST:
+ if (c)
+ {
+ cst_col = c->NbColumns - 1;
+ value_init (c->p[r][cst_col]);
+ value_set_si (c->p[r][cst_col], int_cst_value (e));
+ }
+ break;
+
+ case NOP_EXPR:
+ case CONVERT_EXPR:
+ case NON_LVALUE_EXPR:
+ scan_tree_for_params (s, TREE_OPERAND (e, 0), c, r, k);
+ break;
+
+ default:
+ /* ??? The default cases should be gcc_unreachable (). */
Another past-review thing ...
+struct irp_data
+{
+ struct loop *loop;
+ scop_p scop;
+};
Needs a comment.
+/* Saves in NV the name of variable P->T. */
+
+static void
+save_var_name (char **nv, int i, name_tree p)
+{
+ const char *name = get_name (SSA_NAME_VAR (p->t));
+
+ if (name)
+ {
+ nv[i] = XNEWVEC (char, strlen (name) + 12);
+ sprintf (nv[i], "%s_%d", name, SSA_NAME_VERSION (p->t));
+ }
+ else
+ {
+ nv[i] = XNEWVEC (char, 12);
+ sprintf (nv[i], "T_%d", SSA_NAME_VERSION (p->t));
+ }
+
+ p->name = nv[i];
+}
And you already _do_ use a "fancy" naming scheme!
+/* Returns a graphite_bb from BB. */
+
+static graphite_bb_p
+graphite_bb_from_bb (basic_block bb, scop_p scop)
+{
+ graphite_bb_p gb;
+ int i;
+
+ for (i = 0; VEC_iterate (graphite_bb_p, SCOP_BBS (scop), i, gb); i++)
+ if (GBB_BB (gb) == bb)
+ return gb;
This looks line a lookup that uses linear time. You can either use
bb->aux or sort SCOP_BBS after the basic-block index.
+/* Converts a GMP constant value to a tree and returns it. */
+
+static tree
+gmp_cst_to_tree (Value v)
+{
+ return build_int_cst (integer_type_node, value_get_si (v));
+}
+
+/* Returns the tree variable from the name that it was given in Cloog
+ representation.
+
+ FIXME: This is a hack, and Cloog should be fixed to not work with
+ variable names represented as "char *string", but with void
+ pointers that could be casted back to a tree. The only problem in
+ doing that is that Cloog's pretty printer still assumes that
+ variable names are char *strings. The solution would be to have a
+ function pointer for pretty-printing that can be redirected to be
+ print_generic_stmt in our case, or fprintf by default.
+ ??? Too ugly to live. */
^^^^^^^^^^^
still the case. If there is no progress with the above use fancy
names.
+static tree
+clast_name_to_gcc (const char *name, VEC (name_tree, heap) *params,
+ loop_iv_stack ivstack)
+{
+ int i;
+ name_tree t;
+ tree iv;
+
+ for (i = 0; VEC_iterate (name_tree, params, i, t); i++)
+ if (!strcmp (name, t->name))
+ return t->t;
+
+ iv = loop_iv_stack_get_iv_from_name (ivstack, name);
+ if (iv)
+ return iv;
+
+ gcc_unreachable ();
+ return NULL_TREE;
+}
+/* Remove all the edges from BB. */
+
+static void
+remove_all_edges (basic_block bb, edge construction_edge)
What is construction_edge, and how is this still "remove_all_edges"?
+{
+ edge e;
+ edge_iterator ei;
+
+ for (ei = ei_start (bb->succs); (e = ei_safe_edge (ei)); )
+ {
+ if (e != construction_edge)
+ {
+ remove_edge (e);
+ e = ei_safe_edge (ei);
+ }
+ else
+ ei_next (&ei);
+ }
+
+ for (ei = ei_start (bb->preds); (e = ei_safe_edge (ei));)
+ {
+ if (e != construction_edge)
+ {
+ remove_edge (e);
+ e = ei_safe_edge (ei);
+ }
+ else
+ ei_next (&ei);
+ }
+}
+/* Remove COND_EXPRs from BB. */
+
+static void
+remove_cond_exprs (basic_block bb)
+{
+ gimple_stmt_iterator gsi;
+
+ for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi);)
+ {
+ gimple stmt = gsi_stmt (gsi);
+
+ if (gimple_code (stmt) == GIMPLE_COND)
+ gsi_remove (&gsi, true);
+ else
+ gsi_next (&gsi);
+ }
The only cond_expr in a BB is at the last statement in the BB, so
simply do
if (GIMPLE_CODE (last_stmt (bb)) == GIMPLE_COND)
...
but of course you need to fix outgoing edges.
+/* Return a vector of all the virtual phi nodes in the current
+ function. */
+
+static VEC (gimple, heap) *
+collect_virtual_phis (void)
+{
+ gimple_stmt_iterator si;
+ gimple_vec phis = VEC_alloc (gimple, heap, 3);
+ basic_block bb;
+
+ FOR_EACH_BB (bb)
+ for (si = gsi_start_phis (bb); !gsi_end_p(si); gsi_next (&si))
+ {
+ gimple phi = gsi_stmt (si);
+
+ /* The phis we moved will have 0 arguments because the
+ original edges were removed. */
+ if (gimple_phi_num_args (phi) == 0)
+ VEC_safe_push (gimple, heap, phis, phi);
+ }
+
+ /* Deallocate if we did not find any. */
+ if (VEC_length (gimple, phis) == 0)
+ {
+ VEC_free (gimple, heap, phis);
+ phis = NULL;
+ }
+
+ return phis;
+}
+
+/* Find a virtual definition for variable VAR in BB. */
+
+static tree
+find_vdef_for_var_in_bb (basic_block bb, tree var)
+{
+ gimple_stmt_iterator gsi;
+ gimple phi;
+ def_operand_p def_var;
+ vuse_vec_p vv;
+ ssa_op_iter op_iter;
+
+ for (gsi = gsi_last_bb (bb); !gsi_end_p (gsi); gsi_prev (&gsi))
+ FOR_EACH_SSA_VDEF_OPERAND (def_var, vv, gsi_stmt (gsi), op_iter)
+ if (SSA_NAME_VAR (*def_var) == var)
+ return *def_var;
+
+ for (gsi = gsi_start_phis (bb); !gsi_end_p(gsi); gsi_next (&gsi))
+ {
+ phi = gsi_stmt (gsi);
+ if (SSA_NAME_VAR (PHI_RESULT (phi)) == var)
+ return PHI_RESULT (phi);
+ }
+
+ return NULL;
+}
+
+/* Recursive helper. */
+
+static tree
+find_vdef_for_var_1 (basic_block bb, struct pointer_set_t *visited, tree var)
+{
+ tree result = NULL;
+ edge_iterator ei;
+ edge pred_edge;
+
+ if (pointer_set_contains (visited, bb))
+ return NULL;
+
+ pointer_set_insert (visited, bb);
+ result = find_vdef_for_var_in_bb (bb, var);
+
+ if (!result)
+ FOR_EACH_EDGE (pred_edge, ei, bb->preds)
+ if (!result)
+ result = find_vdef_for_var_1 (pred_edge->src, visited, var);
+
+ return result;
+}
+
+/* Finds a virtual definition for variable VAR. */
+
+static tree
+find_vdef_for_var (basic_block bb, tree var)
+{
+ struct pointer_set_t *visited = pointer_set_create ();
+ tree def = find_vdef_for_var_1 (bb, visited, var);
+
+ pointer_set_destroy (visited);
+ return def;
+}
+
+/* Update the virtual phis after loop bodies are moved to new
+ loops. */
+
+static void
+patch_phis_for_virtual_defs (void)
+{
+ int i;
+ gimple phi;
+ VEC (gimple, heap) *virtual_phis = collect_virtual_phis ();
+
+ for (i = 0; VEC_iterate (gimple, virtual_phis, i, phi); i++)
+ {
+ basic_block bb = gimple_bb (phi);
+ edge_iterator ei;
+ edge pred_edge;
+ gimple_stmt_iterator gsi;
+ gimple new_phi;
+
+ /* FIXME: Should PHI_RESULT become gimple_phi_result? */
+ tree phi_result = PHI_RESULT (phi);
+
+ tree var = SSA_NAME_VAR (phi_result);
+
+ new_phi = create_phi_node (phi_result, bb);
+ SSA_NAME_DEF_STMT (phi_result) = new_phi;
+
+ FOR_EACH_EDGE (pred_edge, ei, bb->preds)
+ {
+ tree def = find_vdef_for_var (pred_edge->src, var);
+
+ if (def)
+ add_phi_arg (new_phi, def, pred_edge);
+ else
+ add_phi_arg (new_phi, gimple_default_def (cfun, var), pred_edge);
+ }
+
+ gsi = gsi_for_stmt (phi);
+ remove_phi_node (&gsi, false);
+ }
+}
Why are you doing all this virtual-phi stuff? Why are you not just
renaming all symbols that appear in virtual operands?
+/* Scan the loops and remove the ones that have been marked for
+ removal. */
+
+static void
+remove_dead_loops (void)
+{
+ struct loop *loop, *ploop;
+ loop_iterator li;
+
+ FOR_EACH_LOOP (li, loop, LI_FROM_INNERMOST)
+ {
+ if (loop->header)
+ continue;
+
+ while (loop->inner)
+ {
+ ploop = loop->inner;
+ flow_loop_tree_node_remove (ploop);
+ flow_loop_tree_node_add (loop_outer (loop), ploop);
+ }
+
+ /* Remove the loop and free its data. */
+ delete_loop (loop);
+ }
I don't see how loops are not marked. This seems to remove _all_
loops. The while (loop->inner) loop must be dead - how can an
outer loop be deleted without deleting its inner loops??
+/* GIMPLE Loop Generator: generates loops from STMT in GIMPLE form for
+ the given SCOP. */
+
+static void
+gloog (scop_p scop, struct clast_stmt *stmt)
+{
+ edge new_scop_exit_edge = NULL;
+ basic_block scop_exit = SCOP_EXIT (scop);
+ VEC (name_tree, heap) *ivstack = VEC_alloc (name_tree, heap, 10);
+ edge construction_edge = NULL;
+ VEC (basic_block,heap) *bbs = VEC_alloc (basic_block, heap, 10);
+ basic_block old_scop_exit_idom = get_immediate_dominator (CDI_DOMINATORS, scop_exit);
+
+ if (!can_generate_code (stmt) || !can_generate_for_scop (scop))
+ {
+ cloog_clast_free (stmt);
+ return;
+ }
+
+ construction_edge = get_construction_edge (scop);
+ if (!construction_edge)
+ {
+ cloog_clast_free (stmt);
+ return;
+ }
+
+ gather_blocks_in_sese_region (SCOP_ENTRY (scop), SCOP_EXIT (scop), &bbs);
+ new_scop_exit_edge = translate_clast (scop, SCOP_ENTRY(scop)->loop_father, stmt, construction_edge, &ivstack);
+ redirect_edge_succ (new_scop_exit_edge, scop_exit);
+ cloog_clast_free (stmt);
+
+ if (new_scop_exit_edge->dest == EXIT_BLOCK_PTR)
+ new_scop_exit_edge->flags = 0;
+
+ remove_edges_around_useless_blocks (scop, bbs);
+
+ VEC_free (basic_block, heap, bbs);
+
+ patch_phis_for_virtual_defs ();
+
+ find_unreachable_blocks ();
+ if (old_scop_exit_idom
+ && !(old_scop_exit_idom->flags & BB_REACHABLE))
+ set_immediate_dominator (CDI_DOMINATORS,
+ new_scop_exit_edge->dest,
+ new_scop_exit_edge->src);
+
+ delete_unreachable_blocks();
+ mark_old_loops (scop);
+ remove_dead_loops ();
+ verify_loop_structure ();
This should be inside #ifdef ENABLE_CHECKING
+ calculate_dominance_info (CDI_DOMINATORS);
+ calculate_dominance_info (CDI_POST_DOMINATORS);
This is a no-op unless you free dominator information before. Maybe
you want to use verify_dominators () (inside ENABLE_CHECKING)?
+ rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa);
+ update_ssa (TODO_update_ssa);
+ verify_ssa (false);
Inside #ifdef ENABLE_CHECKING please.
+ estimate_bb_frequencies ();
No - this will destroy any valid frequency information. If you want
to do this for your new loops you need to properly split and expose
the machinery for loops or basic-blocks. Re-computing it for the
whole function is just wrong.
+static void
+graphite_trans_bb_move_loop (graphite_bb_p gb, int loop,
+ int new_loop_pos)
+{
+ CloogMatrix *domain = GBB_DOMAIN (gb);
+ scop_p scop = GBB_SCOP (gb);
+ int row, j;
+ loop_p tmp_loop_p;
+
+ gcc_assert (loop < gbb_nb_loops (gb));
+ gcc_assert (new_loop_pos < gbb_nb_loops (gb));
+
+
only one blank like. (please check elsewhere as well)
+ swap_loop_mapped_depth (scop, gb, loop, new_loop_pos);
+
+ /* Update LOOPS vector. */
+ tmp_loop_p = VEC_index (loop_p, GBB_LOOPS (gb), loop);
+ VEC_ordered_remove (loop_p, GBB_LOOPS (gb), loop);
+ VEC_safe_insert (loop_p, heap, GBB_LOOPS (gb), new_loop_pos, tmp_loop_p);
+
+
+ /* Move the domain columns. */
+ if (loop < new_loop_pos)
+ for (row = 0; row < domain->NbRows; row++)
+ {
+ Value tmp;
+ value_init (tmp);
+ value_assign (tmp, domain->p[row][loop + 1]);
+
+ for (j = loop ; j < new_loop_pos - 1; j++)
+ value_assign (domain->p[row][j + 1], domain->p[row][j + 2]);
+
+ value_assign (domain->p[row][new_loop_pos], tmp);
+ value_clear (tmp);
+ }
+ else
+ for (row = 0; row < domain->NbRows; row++)
+ {
+ Value tmp;
+ value_init (tmp);
+ value_assign (tmp, domain->p[row][loop + 1]);
+
+ for (j = loop ; j > new_loop_pos; j--)
+ value_assign (domain->p[row][j + 1], domain->p[row][j]);
+
+ value_assign (domain->p[row][new_loop_pos + 1], tmp);
+ value_clear (tmp);
+ }
looks like two loops with lots of common code - please factor it out.
+/* Strip mines the loop of BB at the position LOOP_DEPTH with STRIDE.
+ Always valid, but not always a performance improvement. */
+
+static void
+graphite_trans_bb_strip_mine (graphite_bb_p gb, int loop_depth, int stride)
+{
+ int row, col;
+ scop_p scop = GBB_SCOP (gb);
+
+ CloogMatrix *domain = GBB_DOMAIN (gb);
+ CloogMatrix *new_domain = cloog_matrix_alloc (domain->NbRows + 3,
+ domain->NbColumns + 1);
+
+ int col_loop_old = loop_depth + 2;
+ int col_loop_strip = col_loop_old - 1;
+ /*int col_loc = col_loop_old + 1; */
Remove commented out code.
+ GBB_STATIC_SCHEDULE (gb) = new_schedule;
+
+ /* XXX: Free old schedule. How? */
This needs to be fixed.
Index: gcc/graphite.h
===================================================================
--- gcc/graphite.h (.../trunk) (revision 0)
+++ gcc/graphite.h (.../branches/graphite) (revision 138569)
@@ -0,0 +1,523 @@
+/* Returns the corresponding loop iterator index for a gimple loop. */
+
+static inline int
+gbb_loop_index (graphite_bb_p gb, loop_p loop)
+{
+ int i;
+ loop_p l;
+
+ for (i = 0; VEC_iterate (loop_p, GBB_LOOPS (gb), i, l); i++)
+ if (loop == l)
+ return i;
Linear walk ...
+/* A SCoP is a Static Control Part of the program, simple enough to be
+ represented in polyhedral form. */
+struct scop
+{
+ /* The entry bb dominates all the bbs of the scop. The exit bb
+ post-dominates all the bbs of the scop. The exit bb
+ potentially contains non affine data accesses, side effect
+ statements or difficult constructs, and thus is not
+ considered part of the scop, but just boundary. The entry bb is
+ considered part of the scop. */
+ basic_block entry, exit;
+
+ /* All the basic blocks in the scope. They have extra information
+ attached to them, in the graphite_bb structure. */
+ VEC (graphite_bb_p, heap) *bbs;
+
+ /* Set for a basic block index when it belongs to this scope. */
+ bitmap bbs_b;
+
+ lambda_vector static_schedule;
+
+ /* Parameters used within the SCOP. */
+ VEC (name_tree, heap) *params;
+
+ /* A collection of old induction variables*/
+ VEC (name_tree, heap) *old_ivs;
+
+ /* Loops completely contained in the scop. */
+ bitmap loops;
+ VEC (loop_p, heap) *loop_nest;
+
+ /* specifies for loop num in loops which corresponding loop depth that num is mapped to
+ in the transformed program */
+ graphite_loops_mapping loops_mapping;
+
+ /* ??? It looks like a global mapping loop_id -> cloog_loop would work. */
+ htab_t loop2cloog_loop;
So - ??? (from last review) - does it work?
+static int
+scop_max_loop_depth (scop_p scop)
+{
+ int i;
+ graphite_bb_p gbb;
+ int max_nb_loops = 0;
+
+ for (i = 0; VEC_iterate (graphite_bb_p, SCOP_BBS (scop), i, gbb); i++)
+ {
+ int nb_loops = gbb_nb_loops (gbb);
+ if (max_nb_loops < nb_loops)
+ max_nb_loops = nb_loops;
+ }
+
+ return max_nb_loops;
+}
This is not a suitable inline function - move this to graphite.c.
+/* Returns the index of LOOP in the domain matrix for the SCOP. */
+
+static inline int
+scop_loop_index (scop_p scop, struct loop *loop)
+{
+ unsigned i;
+ struct loop *l;
+
+ gcc_assert (scop_contains_loop (scop, loop));
+
+ for (i = 0; VEC_iterate (loop_p, SCOP_LOOP_NEST (scop), i, l); i++)
+ if (l == loop)
+ return i;
Linear walk again ...
More information about the Gcc-patches
mailing list