This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
Re: Graphite review, graphite parts [1/n]
Hi,
On Thu, Aug 21, 2008 at 5:39 PM, Sebastian Pop <sebastian.pop@amd.com> wrote:
> I will diff again the graphite branch against trunk and prepare the
> split patches such that it will be simpler for you to review all the
> changes.
Attached are the middle-end changes from the graphite branch. You can
get the graphite.[ch] files from:
http://gcc.gnu.org/viewcvs/branches/graphite/gcc/graphite.c?view=markup
http://gcc.gnu.org/viewcvs/branches/graphite/gcc/graphite.h?view=markup
Build and testsuite parts are not included as I got an ok from
maintainers for these parts.
Sebastian
Index: gcc/tree-loop-linear.c
===================================================================
--- gcc/tree-loop-linear.c (.../trunk) (revision 138275)
+++ gcc/tree-loop-linear.c (.../branches/graphite) (revision 139415)
@@ -272,7 +272,7 @@ try_interchange_loops (lambda_trans_matr
/* Return the number of nested loops in LOOP_NEST, or 0 if the loops
are not perfectly nested. */
-static unsigned int
+unsigned int
perfect_loop_nest_depth (struct loop *loop_nest)
{
struct loop *temp;
Index: gcc/doc/invoke.texi
===================================================================
--- gcc/doc/invoke.texi (.../trunk) (revision 138275)
+++ gcc/doc/invoke.texi (.../branches/graphite) (revision 139415)
@@ -333,6 +333,7 @@ Objective-C and Objective-C++ Dialects}.
-finline-small-functions -fipa-cp -fipa-marix-reorg -fipa-pta @gol
-fipa-pure-const -fipa-reference -fipa-struct-reorg @gol
-fipa-type-escape -fivopts -fkeep-inline-functions -fkeep-static-consts @gol
+-floop-block -floop-interchange -floop-strip-mine @gol
-fmerge-all-constants -fmerge-constants -fmodulo-sched @gol
-fmodulo-sched-allow-regmoves -fmove-loop-invariants -fmudflap @gol
-fmudflapir -fmudflapth -fno-branch-count-reg -fno-default-inline @gol
@@ -5941,6 +5942,81 @@ at @option{-O} and higher.
Perform linear loop transformations on tree. This flag can improve cache
performance and allow further loop optimizations to take place.
+@item -floop-interchange
+Perform loop interchange transformations on loops. Interchanging two
+nested loops switches the inner and outer loops. For example, given a
+loop like:
+@smallexample
+DO J = 1, M
+ DO I = 1, N
+ A(J, I) = A(J, I) * C
+ ENDDO
+ENDDO
+@end smallexample
+loop interchange will transform the loop as if the user had written:
+@smallexample
+DO I = 1, N
+ DO J = 1, M
+ A(J, I) = A(J, I) * C
+ ENDDO
+ENDDO
+@end smallexample
+which can be beneficial when @code{N} is larger than the caches,
+because in Fortran, the elements of an array are stored in memory
+contiguously by column, and the original loop iterates over rows,
+potentially creating at each access a cache miss. This optimization
+applies to all the languages supported by GCC and is not limited to
+Fortran.
+
+@item -floop-strip-mine
+Perform loop strip mining transformations on loops. Strip mining
+splits a loop into two nested loops. The outer loop has strides
+equal to the strip size and the inner loop has strides of the
+original loop within a strip. For example, given a loop like:
+@smallexample
+DO I = 1, N
+ A(I) = A(I) + C
+ENDDO
+@end smallexample
+loop strip mining will transform the loop as if the user had written:
+@smallexample
+DO II = 1, N, 4
+ DO I = II, min (II + 4, N)
+ A(I) = A(I) + C
+ ENDDO
+ENDDO
+@end smallexample
+This optimization applies to all the languages supported by GCC and is
+not limited to Fortran.
+
+@item -floop-block
+Perform loop blocking transformations on loops. Blocking strip mines
+each loop in the loop nest such that the memory accesses of the
+element loops fit inside caches. For example, given a loop like:
+@smallexample
+DO I = 1, N
+ DO J = 1, M
+ A(J, I) = B(I) + C(J)
+ ENDDO
+ENDDO
+@end smallexample
+loop blocking will transform the loop as if the user had written:
+@smallexample
+DO II = 1, N, 64
+ DO JJ = 1, M, 64
+ DO I = II, min (II + 64, N)
+ DO J = JJ, min (JJ + 64, M)
+ A(J, I) = B(I) + C(J)
+ ENDDO
+ ENDDO
+ ENDDO
+ENDDO
+@end smallexample
+which can be beneficial when @code{M} is larger than the caches,
+because the innermost loop will iterate over a smaller amount of data
+that can be kept in the caches. This optimization applies to all the
+languages supported by GCC and is not limited to Fortran.
+
@item -fcheck-data-deps
@opindex fcheck-data-deps
Compare the results of several data dependence analyzers. This option
Index: gcc/tree-into-ssa.c
===================================================================
--- gcc/tree-into-ssa.c (.../trunk) (revision 138275)
+++ gcc/tree-into-ssa.c (.../branches/graphite) (revision 139415)
@@ -130,12 +130,6 @@ static bitmap mem_syms_to_rename;
released after we finish updating the SSA web. */
static bitmap names_to_release;
-/* For each block, the PHI nodes that need to be rewritten are stored into
- these vectors. */
-typedef VEC(gimple, heap) *gimple_vec;
-DEF_VEC_P (gimple_vec);
-DEF_VEC_ALLOC_P (gimple_vec, heap);
-
static VEC(gimple_vec, heap) *phis_to_rewrite;
/* The bitmap of non-NULL elements of PHIS_TO_REWRITE. */
Index: gcc/tree-loop-distribution.c
===================================================================
--- gcc/tree-loop-distribution.c (.../trunk) (revision 138275)
+++ gcc/tree-loop-distribution.c (.../branches/graphite) (revision 139415)
@@ -710,17 +710,6 @@ rdg_flag_loop_exits (struct graph *rdg,
}
}
-/* Strongly connected components of the reduced data dependence graph. */
-
-typedef struct rdg_component
-{
- int num;
- VEC (int, heap) *vertices;
-} *rdgc;
-
-DEF_VEC_P (rdgc);
-DEF_VEC_ALLOC_P (rdgc, heap);
-
/* Flag all the nodes of RDG containing memory accesses that could
potentially belong to arrays already accessed in the current
PARTITION. */
@@ -864,9 +853,6 @@ rdg_build_components (struct graph *rdg,
BITMAP_FREE (saved_components);
}
-DEF_VEC_P (bitmap);
-DEF_VEC_ALLOC_P (bitmap, heap);
-
/* Aggregate several components into a useful partition that is
registered in the PARTITIONS vector. Partitions will be
distributed in different loops. */
Index: gcc/cfgloopmanip.c
===================================================================
--- gcc/cfgloopmanip.c (.../trunk) (revision 138275)
+++ gcc/cfgloopmanip.c (.../branches/graphite) (revision 139415)
@@ -30,6 +30,7 @@ along with GCC; see the file COPYING3.
#include "cfglayout.h"
#include "cfghooks.h"
#include "output.h"
+#include "tree-flow.h"
static void duplicate_subloops (struct loop *, struct loop *);
static void copy_loops_to (struct loop **, int,
@@ -466,6 +467,160 @@ scale_loop_frequencies (struct loop *loo
free (bbs);
}
+/* Recompute dominance information for basic blocks outside LOOP. */
+
+static void update_dominators_in_loop (struct loop *loop)
+{
+ VEC (basic_block, heap) *dom_bbs = NULL;
+ sbitmap seen;
+ basic_block *body;
+ unsigned i;
+
+ seen = sbitmap_alloc (last_basic_block);
+ sbitmap_zero (seen);
+ body = get_loop_body (loop);
+
+ for (i = 0; i < loop->num_nodes; i++)
+ SET_BIT (seen, body[i]->index);
+
+ for (i = 0; i < loop->num_nodes; i++)
+ {
+ basic_block ldom;
+
+ for (ldom = first_dom_son (CDI_DOMINATORS, body[i]);
+ ldom;
+ ldom = next_dom_son (CDI_DOMINATORS, ldom))
+ if (!TEST_BIT (seen, ldom->index))
+ {
+ SET_BIT (seen, ldom->index);
+ VEC_safe_push (basic_block, heap, dom_bbs, ldom);
+ }
+ }
+
+ iterate_fix_dominators (CDI_DOMINATORS, dom_bbs, false);
+ free (body);
+ free (seen);
+ VEC_free (basic_block, heap, dom_bbs);
+}
+
+/* create_empty_loop_on_edge
+ |
+ | ------------- ------------------------
+ | | pred_bb | | pred_bb |
+ | ------------- | IV_0 = INITIAL_VALUE |
+ | | ------------------------
+ | | ______ | ENTRY_EDGE
+ | | ENTRY_EDGE / V V
+ | | ====> | -----------------------------
+ | | | | IV_BEFORE = phi (IV_0, IV) |
+ | | | | loop_header |
+ | V | | IV_BEFORE <= UPPER_BOUND |
+ | ------------- | -----------------------\-----
+ | | succ_bb | | | \
+ | ------------- | | \ exit_e
+ | | V V---------
+ | | -------------- | succ_bb |
+ | | | loop_latch | ----------
+ | | |IV = IV_BEFORE + STRIDE
+ | | --------------
+ | \ /
+ | \ ___ /
+
+ Creates an empty loop as shown above, the IV_BEFORE is the SSA_NAME
+ that is used before the increment of IV. IV_BEFORE should be used for
+ adding code to the body that uses the IV. OUTER is the outer loop in
+ which the new loop should be inserted. */
+
+struct loop *
+create_empty_loop_on_edge (edge entry_edge,
+ tree initial_value,
+ tree stride, tree upper_bound,
+ tree iv,
+ tree *iv_before,
+ struct loop *outer)
+{
+ basic_block loop_header, loop_latch, succ_bb, pred_bb;
+ struct loop *loop;
+ int freq;
+ gcov_type cnt;
+ gimple_stmt_iterator gsi;
+ bool insert_after;
+ gimple_seq stmts;
+ gimple cond_expr;
+ tree exit_test;
+ edge exit_e;
+ int prob;
+ tree upper_bound_gimplified;
+
+ gcc_assert (entry_edge && initial_value && stride && upper_bound && iv);
+
+ /* Create header, latch and wire up the loop. */
+ pred_bb = entry_edge->src;
+ loop_header = split_edge (entry_edge);
+ loop_latch = split_edge (single_succ_edge (loop_header));
+ succ_bb = single_succ (loop_latch);
+ make_edge (loop_header, succ_bb, 0);
+ redirect_edge_succ_nodup (single_succ_edge (loop_latch), loop_header);
+
+ /* Set immediate dominator information. */
+ set_immediate_dominator (CDI_DOMINATORS, loop_header, pred_bb);
+ set_immediate_dominator (CDI_DOMINATORS, loop_latch, loop_header);
+ set_immediate_dominator (CDI_DOMINATORS, succ_bb, loop_header);
+
+ /* Initialize a loop structure and put it in a loop hierarchy. */
+ loop = alloc_loop ();
+ loop->header = loop_header;
+ loop->latch = loop_latch;
+ add_loop (loop, outer);
+
+ /* TODO: Fix frequencies and counts. */
+ freq = EDGE_FREQUENCY (entry_edge);
+ cnt = entry_edge->count;
+
+ prob = REG_BR_PROB_BASE / 2;
+
+ scale_loop_frequencies (loop, REG_BR_PROB_BASE - prob, REG_BR_PROB_BASE);
+
+ /* Update dominators. */
+ update_dominators_in_loop (loop);
+
+ /* Construct IV code in loop. */
+ initial_value = force_gimple_operand (initial_value, &stmts, true, iv);
+ if (stmts)
+ {
+ gsi_insert_seq_on_edge (loop_preheader_edge (loop), stmts);
+ gsi_commit_edge_inserts ();
+ }
+
+ standard_iv_increment_position (loop, &gsi, &insert_after);
+ create_iv (initial_value, stride, iv, loop, &gsi, insert_after,
+ iv_before, NULL);
+
+ /* Modify edge flags. */
+ exit_e = single_exit (loop);
+ exit_e->flags = EDGE_LOOP_EXIT | EDGE_FALSE_VALUE;
+ single_pred_edge (loop_latch)->flags = EDGE_TRUE_VALUE;
+
+ gsi = gsi_last_bb (exit_e->src);
+
+ upper_bound_gimplified =
+ force_gimple_operand_gsi (&gsi, upper_bound, true, NULL,
+ false, GSI_NEW_STMT);
+ gsi = gsi_last_bb (exit_e->src);
+
+ cond_expr = gimple_build_cond
+ (LE_EXPR, *iv_before, upper_bound_gimplified, NULL_TREE, NULL_TREE);
+
+ exit_test = gimple_cond_lhs (cond_expr);
+ exit_test = force_gimple_operand_gsi (&gsi, exit_test, true, NULL,
+ false, GSI_NEW_STMT);
+ gimple_cond_set_lhs (cond_expr, exit_test);
+ gsi = gsi_last_bb (exit_e->src);
+ gsi_insert_after (&gsi, cond_expr, GSI_NEW_STMT);
+
+ return loop;
+}
+
/* Make area between HEADER_EDGE and LATCH_EDGE a loop by connecting
latch to header and update loop tree and dominators
accordingly. Everything between them plus LATCH_EDGE destination must
@@ -483,10 +638,6 @@ loopify (edge latch_edge, edge header_ed
{
basic_block succ_bb = latch_edge->dest;
basic_block pred_bb = header_edge->src;
- basic_block *body;
- VEC (basic_block, heap) *dom_bbs;
- unsigned i;
- sbitmap seen;
struct loop *loop = alloc_loop ();
struct loop *outer = loop_outer (succ_bb->loop_father);
int freq;
@@ -538,35 +689,7 @@ loopify (edge latch_edge, edge header_ed
}
scale_loop_frequencies (loop, false_scale, REG_BR_PROB_BASE);
scale_loop_frequencies (succ_bb->loop_father, true_scale, REG_BR_PROB_BASE);
-
- /* Update dominators of blocks outside of LOOP. */
- dom_bbs = NULL;
- seen = sbitmap_alloc (last_basic_block);
- sbitmap_zero (seen);
- body = get_loop_body (loop);
-
- for (i = 0; i < loop->num_nodes; i++)
- SET_BIT (seen, body[i]->index);
-
- for (i = 0; i < loop->num_nodes; i++)
- {
- basic_block ldom;
-
- for (ldom = first_dom_son (CDI_DOMINATORS, body[i]);
- ldom;
- ldom = next_dom_son (CDI_DOMINATORS, ldom))
- if (!TEST_BIT (seen, ldom->index))
- {
- SET_BIT (seen, ldom->index);
- VEC_safe_push (basic_block, heap, dom_bbs, ldom);
- }
- }
-
- iterate_fix_dominators (CDI_DOMINATORS, dom_bbs, false);
-
- free (body);
- free (seen);
- VEC_free (basic_block, heap, dom_bbs);
+ update_dominators_in_loop (loop);
return loop;
}
Index: gcc/tree-pass.h
===================================================================
--- gcc/tree-pass.h (.../trunk) (revision 138275)
+++ gcc/tree-pass.h (.../branches/graphite) (revision 139415)
@@ -321,6 +321,7 @@ extern struct gimple_opt_pass pass_iv_ca
extern struct gimple_opt_pass pass_scev_cprop;
extern struct gimple_opt_pass pass_empty_loop;
extern struct gimple_opt_pass pass_record_bounds;
+extern struct gimple_opt_pass pass_graphite_transforms;
extern struct gimple_opt_pass pass_if_conversion;
extern struct gimple_opt_pass pass_loop_distribution;
extern struct gimple_opt_pass pass_vectorize;
Index: gcc/tree-chrec.c
===================================================================
--- gcc/tree-chrec.c (.../trunk) (revision 138275)
+++ gcc/tree-chrec.c (.../branches/graphite) (revision 139415)
@@ -1408,3 +1408,26 @@ scev_direction (const_tree chrec)
else
return EV_DIR_GROWS;
}
+
+/* Iterates over all the components of SCEV, and calls CBCK. */
+
+void
+for_each_scev_op (tree *scev, bool (*cbck) (tree *, void *), void *data)
+{
+ switch (TREE_CODE_LENGTH (TREE_CODE (*scev)))
+ {
+ case 3:
+ for_each_scev_op (&TREE_OPERAND (*scev, 2), cbck, data);
+
+ case 2:
+ for_each_scev_op (&TREE_OPERAND (*scev, 1), cbck, data);
+
+ case 1:
+ for_each_scev_op (&TREE_OPERAND (*scev, 0), cbck, data);
+
+ default:
+ cbck (scev, data);
+ break;
+ }
+}
+
Index: gcc/tree-chrec.h
===================================================================
--- gcc/tree-chrec.h (.../trunk) (revision 138275)
+++ gcc/tree-chrec.h (.../branches/graphite) (revision 139415)
@@ -70,6 +70,7 @@ extern tree evolution_part_in_loop_num (
extern tree hide_evolution_in_other_loops_than_loop (tree, unsigned);
extern tree reset_evolution_in_loop (unsigned, tree, tree);
extern tree chrec_merge (tree, tree);
+extern void for_each_scev_op (tree *, bool (*) (tree *, void *), void *);
/* Observers. */
extern bool eq_evolutions_p (const_tree, const_tree);
Index: gcc/gdbinit.in
===================================================================
--- gcc/gdbinit.in (.../trunk) (revision 138275)
+++ gcc/gdbinit.in (.../branches/graphite) (revision 139415)
@@ -40,6 +40,15 @@ Print the tree that is $ in C syntax.
Works only when an inferior is executing.
end
+define pgg
+set debug_gimple_stmt ($)
+end
+
+document pgg
+Print the Gimple statement that is $ in C syntax.
+Works only when an inferior is executing.
+end
+
define pgs
set debug_generic_stmt ($)
end
Index: gcc/timevar.def
===================================================================
--- gcc/timevar.def (.../trunk) (revision 138275)
+++ gcc/timevar.def (.../branches/graphite) (revision 139415)
@@ -125,6 +125,7 @@ DEFTIMEVAR (TV_TREE_LOOP_UNSWITCH , "
DEFTIMEVAR (TV_COMPLETE_UNROLL , "complete unrolling")
DEFTIMEVAR (TV_TREE_PARALLELIZE_LOOPS, "tree parallelize loops")
DEFTIMEVAR (TV_TREE_VECTORIZATION , "tree vectorization")
+DEFTIMEVAR (TV_GRAPHITE_TRANSFORMS , "GRAPHITE loop transforms")
DEFTIMEVAR (TV_TREE_LINEAR_TRANSFORM , "tree loop linear")
DEFTIMEVAR (TV_TREE_LOOP_DISTRIBUTION, "tree loop distribution")
DEFTIMEVAR (TV_CHECK_DATA_DEPS , "tree check data dependences")
Index: gcc/tree-ssa-loop.c
===================================================================
--- gcc/tree-ssa-loop.c (.../trunk) (revision 138275)
+++ gcc/tree-ssa-loop.c (.../branches/graphite) (revision 139415)
@@ -287,6 +287,49 @@ struct gimple_opt_pass pass_linear_trans
}
};
+/* GRAPHITE optimizations. */
+
+static unsigned int
+graphite_transforms (void)
+{
+ if (!current_loops)
+ return 0;
+
+ graphite_transform_loops ();
+
+ return 0;
+}
+
+static bool
+gate_graphite_transforms (void)
+{
+ /* Enable -fgraphite pass if any one of the graphite optimization flags
+ is turned on. */
+ if (flag_loop_block || flag_loop_interchange || flag_loop_strip_mine)
+ flag_graphite = 1;
+
+ return flag_graphite != 0;
+}
+
+struct gimple_opt_pass pass_graphite_transforms =
+{
+ {
+ GIMPLE_PASS,
+ "graphite", /* name */
+ gate_graphite_transforms, /* gate */
+ graphite_transforms, /* execute */
+ NULL, /* sub */
+ NULL, /* next */
+ 0, /* static_pass_number */
+ TV_GRAPHITE_TRANSFORMS, /* tv_id */
+ PROP_cfg | PROP_ssa, /* properties_required */
+ 0, /* properties_provided */
+ 0, /* properties_destroyed */
+ 0, /* todo_flags_start */
+ TODO_verify_loops /* todo_flags_finish */
+ }
+};
+
/* Check the correctness of the data dependence analyzers. */
static unsigned int
Index: gcc/tree-vectorizer.c
===================================================================
--- gcc/tree-vectorizer.c (.../trunk) (revision 138275)
+++ gcc/tree-vectorizer.c (.../branches/graphite) (revision 139415)
@@ -200,7 +200,7 @@ rename_use_op (use_operand_p op_p)
/* Renames the variables in basic block BB. */
-static void
+void
rename_variables_in_bb (basic_block bb)
{
gimple_stmt_iterator gsi;
Index: gcc/tree-data-ref.c
===================================================================
--- gcc/tree-data-ref.c (.../trunk) (revision 138275)
+++ gcc/tree-data-ref.c (.../branches/graphite) (revision 139415)
@@ -1225,7 +1225,7 @@ disjoint_objects_p (tree a, tree b)
/* Returns false if we can prove that data references A and B do not alias,
true otherwise. */
-static bool
+bool
dr_may_alias_p (const struct data_reference *a, const struct data_reference *b)
{
const_tree addr_a = DR_BASE_ADDRESS (a);
@@ -3304,6 +3304,24 @@ access_functions_are_affine_or_constant_
return true;
}
+/* Return true if we can create an affine data-ref for OP in STMT. */
+
+bool
+stmt_simple_memref_p (struct loop *loop, gimple stmt, tree op)
+{
+ data_reference_p dr;
+
+ if (really_constant_p (op))
+ return true;
+
+ dr = create_data_ref (loop, op, stmt, true);
+ if (!access_functions_are_affine_or_constant_p (dr, loop))
+ return false;
+
+ free_data_ref (dr);
+ return true;
+}
+
/* Initializes an equation for an OMEGA problem using the information
contained in the ACCESS_FUN. Returns true when the operation
succeeded.
@@ -4070,9 +4088,9 @@ get_references_in_stmt (gimple stmt, VEC
/* Stores the data references in STMT to DATAREFS. If there is an unanalyzable
reference, returns false, otherwise returns true. NEST is the outermost
- loop of the loop nest in that the references should be analyzed. */
+ loop of the loop nest in which the references should be analyzed. */
-static bool
+bool
find_data_references_in_stmt (struct loop *nest, gimple stmt,
VEC (data_reference_p, heap) **datarefs)
{
@@ -4117,7 +4135,7 @@ find_data_references_in_stmt (struct loo
TODO: This function should be made smarter so that it can handle address
arithmetic as if they were array accesses, etc. */
-static tree
+tree
find_data_references_in_loop (struct loop *loop,
VEC (data_reference_p, heap) **datarefs)
{
@@ -4645,6 +4663,7 @@ create_rdg_edge_for_ddr (struct graph *r
e->data = XNEW (struct rdg_edge);
RDGE_LEVEL (e) = level;
+ RDGE_RELATION (e) = ddr;
/* Determines the type of the data dependence. */
if (DR_IS_READ (dra) && DR_IS_READ (drb))
@@ -4677,6 +4696,7 @@ create_rdg_edges_for_scalar (struct grap
e = add_edge (rdg, idef, use);
e->data = XNEW (struct rdg_edge);
RDGE_TYPE (e) = flow_dd;
+ RDGE_RELATION (e) = NULL;
}
}
@@ -4702,7 +4722,7 @@ create_rdg_edges (struct graph *rdg, VEC
/* Build the vertices of the reduced dependence graph RDG. */
-static void
+void
create_rdg_vertices (struct graph *rdg, VEC (gimple, heap) *stmts)
{
int i, j;
@@ -4827,6 +4847,21 @@ hash_stmt_vertex_del (void *e)
scalar dependence. */
struct graph *
+build_empty_rdg (int n_stmts)
+{
+ int nb_data_refs = 10;
+ struct graph *rdg = new_graph (n_stmts);
+
+ rdg->indices = htab_create (nb_data_refs, hash_stmt_vertex_info,
+ eq_stmt_vertex_info, hash_stmt_vertex_del);
+ return rdg;
+}
+
+/* Build the Reduced Dependence Graph (RDG) with one vertex per
+ statement of the loop nest, and one edge per data dependence or
+ scalar dependence. */
+
+struct graph *
build_rdg (struct loop *loop)
{
int nb_data_refs = 10;
@@ -4843,21 +4878,23 @@ build_rdg (struct loop *loop)
&dependence_relations);
if (!known_dependences_p (dependence_relations))
- goto end_rdg;
+ {
+ free_dependence_relations (dependence_relations);
+ free_data_refs (datarefs);
+ VEC_free (gimple, heap, stmts);
+
+ return rdg;
+ }
stmts_from_loop (loop, &stmts);
- rdg = new_graph (VEC_length (gimple, stmts));
+ rdg = build_empty_rdg (VEC_length (gimple, stmts));
rdg->indices = htab_create (nb_data_refs, hash_stmt_vertex_info,
eq_stmt_vertex_info, hash_stmt_vertex_del);
create_rdg_vertices (rdg, stmts);
create_rdg_edges (rdg, dependence_relations);
- end_rdg:
- free_dependence_relations (dependence_relations);
- free_data_refs (datarefs);
VEC_free (gimple, heap, stmts);
-
return rdg;
}
Index: gcc/tree-data-ref.h
===================================================================
--- gcc/tree-data-ref.h (.../trunk) (revision 138275)
+++ gcc/tree-data-ref.h (.../branches/graphite) (revision 139415)
@@ -96,6 +96,8 @@ struct dr_alias
bitmap vops;
};
+typedef struct scop *scop_p;
+
/* Each vector of the access matrix represents a linear access
function for a subscript. First elements correspond to the
leftmost indices, ie. for a[i][j] the first vector corresponds to
@@ -170,16 +172,20 @@ struct data_reference
/* Behavior of the memory reference in the innermost loop. */
struct innermost_loop_behavior innermost;
- /* Decomposition to indices for alias analysis. */
+ /* Subscripts of this data reference. */
struct indices indices;
/* Alias information for the data reference. */
struct dr_alias alias;
+ /* The SCoP in which the data reference was analyzed. */
+ scop_p scop;
+
/* Matrix representation for the data access functions. */
struct access_matrix *access_matrix;
};
+#define DR_SCOP(DR) (DR)->scop
#define DR_STMT(DR) (DR)->stmt
#define DR_REF(DR) (DR)->ref
#define DR_BASE_OBJECT(DR) (DR)->indices.base_object
@@ -373,6 +379,8 @@ void dr_analyze_innermost (struct data_r
extern bool compute_data_dependences_for_loop (struct loop *, bool,
VEC (data_reference_p, heap) **,
VEC (ddr_p, heap) **);
+extern tree find_data_references_in_loop (struct loop *,
+ VEC (data_reference_p, heap) **);
extern void print_direction_vector (FILE *, lambda_vector, int);
extern void print_dir_vectors (FILE *, VEC (lambda_vector, heap) *, int);
extern void print_dist_vectors (FILE *, VEC (lambda_vector, heap) *, int);
@@ -392,10 +400,18 @@ extern void free_dependence_relation (st
extern void free_dependence_relations (VEC (ddr_p, heap) *);
extern void free_data_ref (data_reference_p);
extern void free_data_refs (VEC (data_reference_p, heap) *);
+extern bool find_data_references_in_stmt (struct loop *, gimple,
+ VEC (data_reference_p, heap) **);
struct data_reference *create_data_ref (struct loop *, tree, gimple, bool);
-bool find_loop_nest (struct loop *, VEC (loop_p, heap) **);
-void compute_all_dependences (VEC (data_reference_p, heap) *,
- VEC (ddr_p, heap) **, VEC (loop_p, heap) *, bool);
+extern bool find_loop_nest (struct loop *, VEC (loop_p, heap) **);
+extern void compute_all_dependences (VEC (data_reference_p, heap) *,
+ VEC (ddr_p, heap) **, VEC (loop_p, heap) *,
+ bool);
+
+extern void create_rdg_vertices (struct graph *, VEC (gimple, heap) *);
+extern bool dr_may_alias_p (const struct data_reference *,
+ const struct data_reference *);
+extern bool stmt_simple_memref_p (struct loop *, gimple, tree);
/* Return true when the DDR contains two data references that have the
same access functions. */
@@ -511,15 +527,21 @@ typedef struct rdg_edge
/* Type of the dependence. */
enum rdg_dep_type type;
- /* Levels of the dependence: the depth of the loops that
- carry the dependence. */
+ /* Levels of the dependence: the depth of the loops that carry the
+ dependence. */
unsigned level;
+
+ /* Dependence relation between data dependences, NULL when one of
+ the vertices is a scalar. */
+ ddr_p relation;
} *rdg_edge_p;
#define RDGE_TYPE(E) ((struct rdg_edge *) ((E)->data))->type
#define RDGE_LEVEL(E) ((struct rdg_edge *) ((E)->data))->level
+#define RDGE_RELATION(E) ((struct rdg_edge *) ((E)->data))->relation
struct graph *build_rdg (struct loop *);
+struct graph *build_empty_rdg (int);
void free_rdg (struct graph *);
/* Return the index of the variable VAR in the LOOP_NEST array. */
@@ -561,7 +583,21 @@ void lambda_collect_parameters (VEC (dat
bool lambda_compute_access_matrices (VEC (data_reference_p, heap) *,
VEC (tree, heap) *, int);
-/* In tree-data-refs.c */
+/* In tree-data-ref.c */
void split_constant_offset (tree , tree *, tree *);
+/* Strongly connected components of the reduced data dependence graph. */
+
+typedef struct rdg_component
+{
+ int num;
+ VEC (int, heap) *vertices;
+} *rdgc;
+
+DEF_VEC_P (rdgc);
+DEF_VEC_ALLOC_P (rdgc, heap);
+
+DEF_VEC_P (bitmap);
+DEF_VEC_ALLOC_P (bitmap, heap);
+
#endif /* GCC_TREE_DATA_REF_H */
Index: gcc/lambda.h
===================================================================
--- gcc/lambda.h (.../trunk) (revision 138275)
+++ gcc/lambda.h (.../branches/graphite) (revision 139415)
@@ -39,6 +39,9 @@ DEF_VEC_ALLOC_P (lambda_vector_vec_p, he
all vectors are the same length). */
typedef lambda_vector *lambda_matrix;
+DEF_VEC_P (lambda_matrix);
+DEF_VEC_ALLOC_P (lambda_matrix, heap);
+
/* A transformation matrix, which is a self-contained ROWSIZE x COLSIZE
matrix. Rather than use floats, we simply keep a single DENOMINATOR that
represents the denominator for every element in the matrix. */
@@ -213,6 +216,7 @@ void lambda_loopnest_to_gcc_loopnest (st
lambda_loopnest, lambda_trans_matrix,
struct obstack *);
void remove_iv (gimple);
+tree find_induction_var_from_exit_cond (struct loop *);
static inline void lambda_vector_negate (lambda_vector, lambda_vector, int);
static inline void lambda_vector_mult_const (lambda_vector, lambda_vector, int, int);
@@ -374,6 +378,33 @@ lambda_vector_matrix_mult (lambda_vector
dest[i] += mat[j][i] * vect[j];
}
+/* Compare two vectors returning an integer less than, equal to, or
+ greater than zero if the first argument is considered to be respectively
+ less than, equal to, or greater than the second.
+ We use the lexicographic order. */
+
+static inline int
+lambda_vector_compare (lambda_vector vec1, int length1, lambda_vector vec2,
+ int length2)
+{
+ int min_length;
+ int i;
+
+ if (length1 < length2)
+ min_length = length1;
+ else
+ min_length = length2;
+
+ for (i = 0; i < min_length; i++)
+ if (vec1[i] < vec2[i])
+ return -1;
+ else if (vec1[i] > vec2[i])
+ return 1;
+ else
+ continue;
+
+ return length1 - length2;
+}
/* Print out a vector VEC of length N to OUTFILE. */
Index: gcc/common.opt
===================================================================
--- gcc/common.opt (.../trunk) (revision 138275)
+++ gcc/common.opt (.../branches/graphite) (revision 139415)
@@ -543,6 +543,22 @@ Common Report Var(flag_gcse_after_reload
Perform global common subexpression elimination after register allocation
has finished
+fgraphite
+Common Report Var(flag_graphite)
+Enable in and out of Graphite representation
+
+floop-strip-mine
+Common Report Var(flag_loop_strip_mine) Optimization
+Enable Loop Strip Mining transformation
+
+floop-interchange
+Common Report Var(flag_loop_interchange) Optimization
+Enable Loop Interchange transformation
+
+floop-block
+Common Report Var(flag_loop_block) Optimization
+Enable Loop Blocking transformation
+
fguess-branch-probability
Common Report Var(flag_guess_branch_prob) Optimization
Enable guessing of branch probabilities
Index: gcc/lambda-code.c
===================================================================
--- gcc/lambda-code.c (.../trunk) (revision 138275)
+++ gcc/lambda-code.c (.../branches/graphite) (revision 139415)
@@ -147,7 +147,6 @@ static lambda_lattice lambda_lattice_new
static lambda_lattice lambda_lattice_compute_base (lambda_loopnest,
struct obstack *);
-static tree find_induction_var_from_exit_cond (struct loop *);
static bool can_convert_to_perfect_nest (struct loop *);
/* Create a new lambda body vector. */
@@ -1434,7 +1433,7 @@ gcc_loop_to_lambda_loop (struct loop *lo
/* Given a LOOP, find the induction variable it is testing against in the exit
condition. Return the induction variable if found, NULL otherwise. */
-static tree
+tree
find_induction_var_from_exit_cond (struct loop *loop)
{
gimple expr = get_loop_exit_condition (loop);
Index: gcc/cfgloop.c
===================================================================
--- gcc/cfgloop.c (.../trunk) (revision 138275)
+++ gcc/cfgloop.c (.../branches/graphite) (revision 139415)
@@ -1607,3 +1607,18 @@ single_exit (const struct loop *loop)
else
return NULL;
}
+
+/* Returns true when BB has an edge exiting LOOP. */
+
+bool
+is_loop_exit (struct loop *loop, basic_block bb)
+{
+ edge e;
+ edge_iterator ei;
+
+ FOR_EACH_EDGE (e, ei, bb->preds)
+ if (loop_exit_edge_p (loop, e))
+ return true;
+
+ return false;
+}
Index: gcc/cfgloop.h
===================================================================
--- gcc/cfgloop.h (.../trunk) (revision 138275)
+++ gcc/cfgloop.h (.../branches/graphite) (revision 139415)
@@ -227,6 +227,7 @@ extern int num_loop_insns (const struct
extern int average_num_loop_insns (const struct loop *);
extern unsigned get_loop_level (const struct loop *);
extern bool loop_exit_edge_p (const struct loop *, const_edge);
+extern bool is_loop_exit (struct loop *, basic_block);
extern void mark_loop_exit_edges (void);
/* Loops & cfg manipulation. */
@@ -279,6 +280,8 @@ extern bool can_duplicate_loop_p (const
#define DLTHE_FLAG_COMPLETTE_PEEL 4 /* Update frequencies expecting
a complete peeling. */
+extern struct loop *create_empty_loop_on_edge (edge, tree, tree, tree, tree,
+ tree *, struct loop *);
extern struct loop * duplicate_loop (struct loop *, struct loop *);
extern bool duplicate_loop_to_header_edge (struct loop *, edge,
unsigned, sbitmap, edge,
Index: gcc/tree-flow.h
===================================================================
--- gcc/tree-flow.h (.../trunk) (revision 138275)
+++ gcc/tree-flow.h (.../branches/graphite) (revision 139415)
@@ -1023,6 +1023,7 @@ bool gimple_duplicate_loop_to_header_edg
int);
struct loop *slpeel_tree_duplicate_loop_to_edge_cfg (struct loop *, edge);
void rename_variables_in_loop (struct loop *);
+void rename_variables_in_bb (basic_block bb);
struct loop *tree_ssa_loop_version (struct loop *, tree,
basic_block *);
tree expand_simple_operations (tree);
@@ -1116,6 +1117,10 @@ bool sra_type_can_be_decomposed_p (tree)
/* In tree-loop-linear.c */
extern void linear_transform_loops (void);
+extern unsigned perfect_loop_nest_depth (struct loop *);
+
+/* In graphite.c */
+extern void graphite_transform_loops (void);
/* In tree-data-ref.c */
extern void tree_check_data_deps (void);
Index: gcc/gimple.h
===================================================================
--- gcc/gimple.h (.../trunk) (revision 138275)
+++ gcc/gimple.h (.../branches/graphite) (revision 139415)
@@ -38,6 +38,12 @@ DEF_VEC_P(gimple_seq);
DEF_VEC_ALLOC_P(gimple_seq,gc);
DEF_VEC_ALLOC_P(gimple_seq,heap);
+/* For each block, the PHI nodes that need to be rewritten are stored into
+ these vectors. */
+typedef VEC(gimple, heap) *gimple_vec;
+DEF_VEC_P (gimple_vec);
+DEF_VEC_ALLOC_P (gimple_vec, heap);
+
enum gimple_code {
#define DEFGSCODE(SYM, STRING, STRUCT) SYM,
#include "gimple.def"
Index: gcc/tree-cfg.c
===================================================================
--- gcc/tree-cfg.c (.../trunk) (revision 138275)
+++ gcc/tree-cfg.c (.../branches/graphite) (revision 139415)
@@ -6108,7 +6108,7 @@ print_loops (FILE *file, int verbosity)
{
basic_block bb;
- bb = BASIC_BLOCK (NUM_FIXED_BLOCKS);
+ bb = ENTRY_BLOCK_PTR;
if (bb && bb->loop_father)
print_loop_and_siblings (file, bb->loop_father, 0, verbosity);
}
Index: gcc/passes.c
===================================================================
--- gcc/passes.c (.../trunk) (revision 138275)
+++ gcc/passes.c (.../branches/graphite) (revision 139415)
@@ -661,6 +661,7 @@ init_optimization_passes (void)
NEXT_PASS (pass_check_data_deps);
NEXT_PASS (pass_loop_distribution);
NEXT_PASS (pass_linear_transform);
+ NEXT_PASS (pass_graphite_transforms);
NEXT_PASS (pass_iv_canon);
NEXT_PASS (pass_if_conversion);
NEXT_PASS (pass_vectorize);