This is the mail archive of the
gcc@gcc.gnu.org
mailing list for the GCC project.
Re: How to handle loop iterator variable?
> In lambda-code.c:1858 you have some code that does a similar renaming:
>
> FOR_EACH_IMM_USE_STMT (stmt, imm_iter, oldiv_def)
> FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
> propagate_value (use_p, newiv);
>
>
The function replace_uses_by() also does the same renaming and it is
suitable in my case. So I have used it and renamed the iterator
variable. The problem with the iterator variable is solved.
Now I have deleted all the PHI nodes of loop_a and transferred and all
the statements one by one to loop_b.
So the statement
j_12 = D.1190_11 + j_24;
is now present in loop_b, but I am unable to create the phi node for
it in loop_b. Can you please tell me how can I create phi node for
j_24 ?
I am attaching the patch and .c file.
Thanks,
Sandeep.
>
/* Loop fusion. */
/* This pass performs loop fusion: for example, the loops
|loop_1
| A[i] = ...
|endloop_1
|loop_2
| ... = A[i]
|endloop_2
that becomes after fusion:
|loop_1
| A[i] = ...
| ... = A[i]
|endloop_1
*/
#include "config.h"
#include "system.h"
#include "coretypes.h"
#include "tm.h"
#include "ggc.h"
#include "tree.h"
#include "target.h"
#include "rtl.h"
#include "basic-block.h"
#include "diagnostic.h"
#include "tree-flow.h"
#include "tree-dump.h"
#include "timevar.h"
#include "cfgloop.h"
#include "expr.h"
#include "optabs.h"
#include "tree-chrec.h"
#include "tree-data-ref.h"
#include "tree-scalar-evolution.h"
#include "tree-pass.h"
#include "lambda.h"
#include "langhooks.h"
#include "tree-vectorizer.h"
/* Returns true when a given loop is a parallel loop. */
static bool
is_parallel_loop (struct loop *loop)
{
VEC (ddr_p, heap) * dependence_relations;
VEC (data_reference_p, heap) * datarefs;
lambda_trans_matrix trans;
bool ret = false;
/* If the loop can be reversed, the iterations are independent. */
datarefs = VEC_alloc (data_reference_p, heap, 10);
dependence_relations = VEC_alloc (ddr_p, heap, 10 * 10);
compute_data_dependences_for_loop (loop, true, &datarefs, &dependence_relations);
trans = lambda_trans_matrix_new (1, 1);
LTM_MATRIX (trans)[0][0] = -1;
if (lambda_transform_legal_p (trans, 1, dependence_relations))
{
ret = true;
}
free_dependence_relations (dependence_relations);
free_data_refs (datarefs);
if (ret == true)
fprintf (dump_file, " loop %d is a parallel loop\n ", loop->num);
return ret;
}
/* Returns true when a given loop is a sequential loop. */
static bool
is_sequential_loop (struct loop *loop)
{
if (is_parallel_loop (loop))
return false;
else
return true;
}
/* Returns true if there is no fusion preventing constraint between two given loops. */
static bool
no_fusion_preventing_constraint (struct loop *loop_a, struct loop *loop_b)
{
bool ret = true;
struct loop *father_loop;
father_loop = find_common_loop (loop_a, loop_b);
struct graph *rdg_father;
rdg_father = build_rdg (father_loop);
struct graph *rdg_a;
rdg_a = build_rdg (loop_a);
struct graph *rdg_b;
rdg_b = build_rdg (loop_b);
return ret;
}
/* Returns true when two loops can be fused. */
static bool
can_fuse_loops (struct loop *loop_a, struct loop *loop_b)
{
if ((is_parallel_loop (loop_a) && is_parallel_loop (loop_b) && no_fusion_preventing_constraint (loop_a, loop_b)) ||
(is_sequential_loop (loop_a) && is_sequential_loop (loop_b) && no_fusion_preventing_constraint (loop_a, loop_b)))
{
fprintf (dump_file, " can fuse loops %d %d ", loop_a->num, loop_b->num);
return true;
}
return false;
}
/* The following function fuses two loops. */
void
fuse_loops2 (struct loop *loop_a, struct loop *loop_b)
{
basic_block *bbs_a, *bbs_b;
struct loop *new_loop;
bbs_a = get_loop_body (loop_a);
bbs_b = get_loop_body (loop_b);
debug_loop (loop_a, 10);
debug_loop (loop_b, 10);
tree_merge_blocks (bbs_b[0], bbs_a[0]);
debug_loop (loop_a, 10);
debug_loop (loop_b, 10);
cancel_loop_tree (loop_a);
}
/* The following function fuses two loops. */
void
fuse_loops (struct loop *loop_a, struct loop *loop_b)
{
debug_loop (loop_a, 10);
debug_loop (loop_b, 10);
block_stmt_iterator bsi_a, bsi_a_last, bsi_b, bsi_b_last, bsi;
bsi_b_last = bsi_last (loop_b->header);
bsi_prev (&bsi_b_last);
tree phi, prev = NULL_TREE, next, use, def;
/* Detects the iterator variable of loop_b. */
for (phi = phi_nodes (loop_b->header);
phi;)
{
next = PHI_CHAIN (phi);
use = PHI_RESULT (phi);
phi = next;
}
/* Detects the iterator variable of loop_a. */
for (phi = phi_nodes (loop_a->header);
phi;)
{
next = PHI_CHAIN (phi);
def = PHI_RESULT (phi);
phi = next;
}
/*for (phi = phi_nodes (loop_a->header);
phi;)
{
next = PHI_CHAIN (phi);
create_phi_node (SSA_NAME_VAR (PHI_RESULT (phi)), loop_b->header);
phi = next;
} */
/* Replaces the itaerator variable of loop_a with that of loop_b. */
for (phi = phi_nodes (loop_a->header);
phi;)
{
next = PHI_CHAIN (phi);
replace_uses_by (def, use);
phi = next;
}
/* Remove PHI nodes of loop_a. */
for (phi = phi_nodes (loop_a->header);
phi;)
{
next = PHI_CHAIN (phi);
remove_phi_node (phi, prev, true);
phi = next;
}
/* Transfer all the statements one by one. */
for (bsi = bsi_start (loop_a->header); !bsi_end_p (bsi);)
{
if ((TREE_CODE (bsi_stmt (bsi)) != COND_EXPR) && (TREE_CODE (bsi_stmt (bsi)) != LABEL_EXPR))
{
update_stmt (bsi_stmt (bsi));
bsi_move_before (&bsi, &bsi_b_last);
fprintf (stderr, " transferred one statement. \n ");
fprintf (dump_file, " transferred one statement. \n ");
update_stmt (bsi_stmt (bsi));
}
else
{
bsi_next (&bsi);
}
}
debug_loop (loop_a, 10);
debug_loop (loop_b, 10);
update_ssa (TODO_update_ssa);
fprintf (stderr, "deleting loop %d \n", loop_a->num);
cancel_loop_tree (loop_a);
fprintf (stderr, "deleted loop %d \n", loop_a->num);
}
/* Returns true if loop_a is dependent on loop_b. */
static bool
is_dependent_on (struct loop *loop_a, struct loop *loop_b)
{
bool ret = false;
struct loop *loop_father;
loop_father = find_common_loop (loop_a, loop_b);
VEC (ddr_p, heap) * dependence_relations_a;
VEC (ddr_p, heap) * dependence_relations_b;
VEC (ddr_p, heap) * dependence_relations_father;
VEC (data_reference_p, heap) * datarefs_a;
VEC (data_reference_p, heap) * datarefs_b;
VEC (data_reference_p, heap) * datarefs_father;
datarefs_a = VEC_alloc (data_reference_p, heap, 10);
datarefs_b = VEC_alloc (data_reference_p, heap, 10);
datarefs_father = VEC_alloc (data_reference_p, heap, 10);
dependence_relations_a = VEC_alloc (ddr_p, heap, 10 * 10);
dependence_relations_b = VEC_alloc (ddr_p, heap, 10 * 10);
dependence_relations_father = VEC_alloc (ddr_p, heap, 10 * 10);
compute_data_dependences_for_loop (loop_a, true, &datarefs_a, &dependence_relations_a);
compute_data_dependences_for_loop (loop_b, true, &datarefs_b, &dependence_relations_b);
compute_data_dependences_for_loop (loop_father, true, &datarefs_father, &dependence_relations_father);
debug_data_dependence_relations (dependence_relations_a);
debug_data_dependence_relations (dependence_relations_b);
debug_data_dependence_relations (dependence_relations_father);
fprintf (dump_file, "dumping data dependences of a \n");
dump_data_dependence_relations (dump_file, dependence_relations_a);
fprintf (dump_file, "dumping data dependences of b \n");
dump_data_dependence_relations (dump_file, dependence_relations_b);
fprintf (dump_file, "dumping data dependences of father \n");
dump_data_dependence_relations (dump_file, dependence_relations_father);
free_dependence_relations (dependence_relations_a);
free_data_refs (datarefs_a);
free_dependence_relations (dependence_relations_b);
free_data_refs (datarefs_b);
free_dependence_relations (dependence_relations_father);
free_data_refs (datarefs_father);
return true;
}
/* Structure to hold the list of partitions. */
static struct partition
{
int num;
int list[10];
int present;
char type;
} ;
/* Create a graph original_graph. It's nodes are loops and it's edges are data dependencies. */
static struct graph *
create_original_graph (int num_loops)
{
struct graph *g = new_graph (num_loops);
struct loop *loop;
loop_iterator li;
FOR_EACH_LOOP (li, loop, 0)
{
struct partition *p = XNEW (struct partition);
p->num = 0;
p->list[p->num] = loop->num;
p->num++;
p->present = 1;
if (is_parallel_loop (loop))
{
p->type = 'P';
g->vertices[loop->num].data = p;
}
else
{
p->type = 'S';
g->vertices[loop->num].data = p;
}
}
FOR_EACH_LOOP (li, loop, 0)
{
struct loop *loop_dummy;
loop_iterator li_dummy;
FOR_EACH_LOOP (li_dummy, loop_dummy, 0)
{
if (loop != loop_dummy)
{
if (is_dependent_on (loop, loop_dummy))
{
if (no_fusion_preventing_constraint (loop, loop_dummy))
{
struct graph_edge *ge = add_edge (g, loop_dummy->num, loop->num);
ge->data = "FPE";
}
else
add_edge (g, loop_dummy->num, loop->num);
}
}
}
}
return g;
}
/* Create a component graph from the original graph. The parameter t denotes type of the component graph -
parallel or sequential. */
static struct graph*
create_component_graph (char t, int num_loops)
{
struct graph *og = create_original_graph (num_loops);
struct graph *g = new_graph (num_loops);
struct partition *p, *q, *r;
int i;
char opp;
if (t == 'P')
opp = 'S';
else
opp = 'P';
struct loop *loop;
loop_iterator li;
for (i = 1; i < num_loops; i++)
{
p = XNEW (struct partition);
p = og->vertices[i].data;
if (p->type == t)
{
q = XNEW (struct partition);
q->num = 0;
q->list[q->num] = i;
q->num++;
q->present = 1;
q->type = t;
g->vertices[i].data = q;
}
}
if (t == 'P')
FOR_EACH_LOOP (li, loop, 0)
{
struct loop *loop_dummy;
loop_iterator li_dummy;
FOR_EACH_LOOP (li_dummy, loop_dummy, 0)
{
if (loop != loop_dummy)
{
if (is_dependent_on (loop, loop_dummy)&& (is_parallel_loop (loop)) && (is_parallel_loop (loop_dummy)))
{
if (no_fusion_preventing_constraint (loop, loop_dummy))
{
struct graph_edge *ge = add_edge (g, loop_dummy->num, loop->num);
ge->data = "FPE";
}
else
add_edge (g, loop_dummy->num, loop->num);
}
}
}
}
if (t =='S')
FOR_EACH_LOOP (li, loop, 0)
{
struct loop *loop_dummy;
loop_iterator li_dummy;
FOR_EACH_LOOP (li_dummy, loop_dummy, 0)
{
if (loop != loop_dummy)
{
if (is_dependent_on (loop, loop_dummy)&& (is_sequential_loop (loop)) && (is_sequential_loop (loop_dummy)))
{
if (no_fusion_preventing_constraint (loop, loop_dummy))
{
struct graph_edge *ge = add_edge (g, loop_dummy->num, loop->num);
ge->data = "FPE";
}
else
add_edge (g, loop_dummy->num, loop->num);
}
}
}
}
for (i = 1; i < num_loops; i++)
{
p = XNEW (struct partition);
p = og->vertices[i].data;
if (p->type == opp)
{
struct graph_edge *ges;
for (ges = og->vertices[i].pred; ges; ges = ges->pred_next)
{
int source = ges->src;
struct graph_edge *ged;
for (ged = og->vertices[i].succ; ged; ged = ged->succ_next)
{
int destin = ged->dest;
q = XNEW (struct partition);
q = og->vertices[source].data;
r = XNEW (struct partition);
r = og->vertices[destin].data;
if ((q->type == t) && (r->type == t) && (q->list[0] != r->list[0]))
{
struct graph_edge *ge = add_edge (g, source, destin);
ge->data = "FPE";
}
}
}
}
}
return g;
}
/* Partitions the graph according to rules. And fuse the loops corresponding to the partitions. */
static void
do_partitions (struct graph *g)
{
int num = g->n_vertices;
int i;
struct partition *p, *q;
for (i = 1; i<num; i++)
{
p = XNEW (struct partition);
p = g->vertices[i].data;
struct graph_edge *ge;
if (p)
if (p->present == 1)
for (ge = g->vertices[i].pred; ge; ge = ge->pred_next)
{
q = XNEW (struct partition);
q = g->vertices[ge->src].data;
if (ge->data != "FPE")
{
p->list[p->num] = q->list[0];
p->num++;
q->present = 0;
}
}
}
}
/* Performs loop fusion according to the given graph. */
static void
fuse_according_to_graph (struct graph *g)
{
int num = g->n_vertices;
int i, j;
struct partition *p;
struct loop *loop;
loop_iterator li;
for (i = 1; i<num; i++)
{
p = XNEW (struct partition);
p = g->vertices[i].data;
if (p)
if (p->present == 1)
{
for (j = (p->num)-1; j > 0 ; j--)
{
FOR_EACH_LOOP (li, loop, 0)
{
if ((loop->num == p->list[j]) && (loop->next->num == p->list[j-1]))
fuse_loops (loop, loop->next);
}
}
}
}
}
/* Performs legal loops fusion in the current function. */
static unsigned int
tree_loop_fusion (void)
{
if (!current_loops)
return 0;
int num_loops = number_of_loops();
struct loop *loop;
loop_iterator li;
/* For all the loops in the program pick consecutive loops loop_a and loop_b. */
FOR_EACH_LOOP (li, loop, 0)
{
if (loop->next)
{
//if (can_fuse_loops (loop, loop->next))
{
/*if (is_dependent_on (loop, loop->next))
fprintf (stderr, "loop %d is dependent on loop %d \n\n", loop->num, loop->next->num);
else
fprintf (stderr, "loop %d is not dependent on loop %d \n\n", loop->num, loop->next->num);*/
//fuse_loops2 (loop, loop->next);
fuse_loops (loop, loop->next);
}
}
}
/* struct graph *original_graph_1 = new_graph (num_loops);
original_graph_1 = create_original_graph (num_loops);
dump_graph (dump_file, original_graph_1);
dump_graph (stderr, original_graph_1);
fprintf (dump_file, "printed the original_graph\n\n");
fprintf (stderr, "printed the original_graph\n\n");
struct graph *component_graph_parallel = create_component_graph ('P', num_loops);
dump_graph (dump_file, component_graph_parallel);
dump_graph (stderr, component_graph_parallel);
fprintf (dump_file, "printed the component parallel graph\n\n");
fprintf (stderr, "printed the component parallel graph\n\n");
dump_graph (dump_file, original_graph_1);
dump_graph (stderr, original_graph_1);
fprintf (dump_file, "printed the original_graph\n\n");
fprintf (stderr, "printed the original_graph\n\n");
do_partitions (component_graph_parallel);
dump_graph (dump_file, component_graph_parallel);
dump_graph (stderr, component_graph_parallel);
fprintf (dump_file, "printed the partitioned component parallel graph\n");
fprintf (stderr, "printed the partitioned component parallel graph\n");
fuse_according_to_graph (component_graph_parallel);*/
return 0;
}
static bool
gate_tree_loop_fusion (void)
{
return flag_tree_loop_fusion != 0;
}
struct gimple_opt_pass pass_loop_fusion =
{
{
GIMPLE_PASS,
"lfusion", /* name */
gate_tree_loop_fusion, /* gate */
tree_loop_fusion, /* execute */
NULL, /* sub */
NULL, /* next */
0, /* static_pass_number */
TV_TREE_LOOP_FUSION, /* tv_id */
PROP_cfg | PROP_ssa, /* properties_required */
0, /* properties_provided */
0, /* properties_destroyed */
0, /* todo_flags_start */
TODO_dump_func | TODO_verify_loops | TODO_update_ssa, /* todo_flags_finish */
}
};
Index: doc/invoke.texi
===================================================================
--- doc/invoke.texi (revision 134556)
+++ doc/invoke.texi (working copy)
@@ -355,6 +355,7 @@
-fstrict-aliasing -fstrict-overflow -fthread-jumps -ftracer -ftree-ccp @gol
-ftree-ch -ftree-copy-prop -ftree-copyrename -ftree-dce @gol
-ftree-dominator-opts -ftree-dse -ftree-fre -ftree-loop-im @gol
+-ftree-loop-fusion @gol
-ftree-loop-distribution @gol
-ftree-loop-ivcanon -ftree-loop-linear -ftree-loop-optimize @gol
-ftree-parallelize-loops=@var{n} -ftree-pre -ftree-reassoc -ftree-salias @gol
@@ -5888,6 +5889,9 @@
Compare the results of several data dependence analyzers. This option
is used for debugging the data dependence analyzers.
+@item -ftree-loop-fusion
+Perform loop fusion.
+
@item -ftree-loop-distribution
Perform loop distribution. This flag can improve cache performance on
big loop bodies and allow further loop optimizations, like
Index: tree-pass.h
===================================================================
--- tree-pass.h (revision 134556)
+++ tree-pass.h (working copy)
@@ -287,6 +287,7 @@
extern struct gimple_opt_pass pass_empty_loop;
extern struct gimple_opt_pass pass_record_bounds;
extern struct gimple_opt_pass pass_if_conversion;
+extern struct gimple_opt_pass pass_loop_fusion;
extern struct gimple_opt_pass pass_loop_distribution;
extern struct gimple_opt_pass pass_vectorize;
extern struct gimple_opt_pass pass_complete_unroll;
Index: tree-loop-fusion.c
===================================================================
--- tree-loop-fusion.c (revision 0)
+++ tree-loop-fusion.c (revision 0)
@@ -0,0 +1,589 @@
+/* Loop fusion. */
+
+/* This pass performs loop fusion: for example, the loops
+
+ |loop_1
+ | A[i] = ...
+ |endloop_1
+
+
+ |loop_2
+ | ... = A[i]
+ |endloop_2
+
+ that becomes after fusion:
+
+ |loop_1
+ | A[i] = ...
+ | ... = A[i]
+ |endloop_1
+
+*/
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tm.h"
+#include "ggc.h"
+#include "tree.h"
+#include "target.h"
+
+#include "rtl.h"
+#include "basic-block.h"
+#include "diagnostic.h"
+#include "tree-flow.h"
+#include "tree-dump.h"
+#include "timevar.h"
+#include "cfgloop.h"
+#include "expr.h"
+#include "optabs.h"
+#include "tree-chrec.h"
+#include "tree-data-ref.h"
+#include "tree-scalar-evolution.h"
+#include "tree-pass.h"
+#include "lambda.h"
+#include "langhooks.h"
+#include "tree-vectorizer.h"
+
+
+
+
+/* Returns true when a given loop is a parallel loop. */
+
+static bool
+is_parallel_loop (struct loop *loop)
+{
+ VEC (ddr_p, heap) * dependence_relations;
+ VEC (data_reference_p, heap) * datarefs;
+ lambda_trans_matrix trans;
+ bool ret = false;
+
+ /* If the loop can be reversed, the iterations are independent. */
+ datarefs = VEC_alloc (data_reference_p, heap, 10);
+ dependence_relations = VEC_alloc (ddr_p, heap, 10 * 10);
+ compute_data_dependences_for_loop (loop, true, &datarefs, &dependence_relations);
+ trans = lambda_trans_matrix_new (1, 1);
+ LTM_MATRIX (trans)[0][0] = -1;
+
+ if (lambda_transform_legal_p (trans, 1, dependence_relations))
+ {
+ ret = true;
+ }
+
+ free_dependence_relations (dependence_relations);
+ free_data_refs (datarefs);
+ if (ret == true)
+ fprintf (dump_file, " loop %d is a parallel loop\n ", loop->num);
+ return ret;
+}
+
+/* Returns true when a given loop is a sequential loop. */
+
+static bool
+is_sequential_loop (struct loop *loop)
+{
+ if (is_parallel_loop (loop))
+ return false;
+ else
+ return true;
+}
+
+/* Returns true if there is no fusion preventing constraint between two given loops. */
+
+static bool
+no_fusion_preventing_constraint (struct loop *loop_a, struct loop *loop_b)
+{
+ bool ret = true;
+ struct loop *father_loop;
+ father_loop = find_common_loop (loop_a, loop_b);
+ struct graph *rdg_father;
+ rdg_father = build_rdg (father_loop);
+ struct graph *rdg_a;
+ rdg_a = build_rdg (loop_a);
+ struct graph *rdg_b;
+ rdg_b = build_rdg (loop_b);
+ return ret;
+}
+
+/* Returns true when two loops can be fused. */
+
+static bool
+can_fuse_loops (struct loop *loop_a, struct loop *loop_b)
+{
+ if ((is_parallel_loop (loop_a) && is_parallel_loop (loop_b) && no_fusion_preventing_constraint (loop_a, loop_b)) ||
+ (is_sequential_loop (loop_a) && is_sequential_loop (loop_b) && no_fusion_preventing_constraint (loop_a, loop_b)))
+ {
+ fprintf (dump_file, " can fuse loops %d %d ", loop_a->num, loop_b->num);
+ return true;
+ }
+
+ return false;
+}
+
+/* The following function fuses two loops. */
+
+void
+fuse_loops2 (struct loop *loop_a, struct loop *loop_b)
+{
+ basic_block *bbs_a, *bbs_b;
+ struct loop *new_loop;
+ bbs_a = get_loop_body (loop_a);
+ bbs_b = get_loop_body (loop_b);
+ debug_loop (loop_a, 10);
+ debug_loop (loop_b, 10);
+ tree_merge_blocks (bbs_b[0], bbs_a[0]);
+ debug_loop (loop_a, 10);
+ debug_loop (loop_b, 10);
+ cancel_loop_tree (loop_a);
+
+}
+
+
+/* The following function fuses two loops. */
+
+void
+fuse_loops (struct loop *loop_a, struct loop *loop_b)
+{
+ debug_loop (loop_a, 10);
+ debug_loop (loop_b, 10);
+ block_stmt_iterator bsi_a, bsi_a_last, bsi_b, bsi_b_last, bsi;
+ bsi_b_last = bsi_last (loop_b->header);
+ bsi_prev (&bsi_b_last);
+ tree phi, prev = NULL_TREE, next, use, def;
+
+ /* Detects the iterator variable of loop_b. */
+ for (phi = phi_nodes (loop_b->header);
+ phi;)
+ {
+ next = PHI_CHAIN (phi);
+ use = PHI_RESULT (phi);
+ phi = next;
+ }
+
+ /* Detects the iterator variable of loop_a. */
+ for (phi = phi_nodes (loop_a->header);
+ phi;)
+ {
+ next = PHI_CHAIN (phi);
+ def = PHI_RESULT (phi);
+ phi = next;
+ }
+
+ /*for (phi = phi_nodes (loop_a->header);
+ phi;)
+ {
+ next = PHI_CHAIN (phi);
+ create_phi_node (SSA_NAME_VAR (PHI_RESULT (phi)), loop_b->header);
+ phi = next;
+ } */
+
+ /* Replaces the itaerator variable of loop_a with that of loop_b. */
+ for (phi = phi_nodes (loop_a->header);
+ phi;)
+ {
+ next = PHI_CHAIN (phi);
+ replace_uses_by (def, use);
+ phi = next;
+ }
+
+ /* Remove PHI nodes of loop_a. */
+ for (phi = phi_nodes (loop_a->header);
+ phi;)
+ {
+ next = PHI_CHAIN (phi);
+ remove_phi_node (phi, prev, true);
+ phi = next;
+ }
+
+ /* Transfer all the statements one by one. */
+ for (bsi = bsi_start (loop_a->header); !bsi_end_p (bsi);)
+ {
+ if ((TREE_CODE (bsi_stmt (bsi)) != COND_EXPR) && (TREE_CODE (bsi_stmt (bsi)) != LABEL_EXPR))
+ {
+ update_stmt (bsi_stmt (bsi));
+ bsi_move_before (&bsi, &bsi_b_last);
+ fprintf (stderr, " transferred one statement. \n ");
+ fprintf (dump_file, " transferred one statement. \n ");
+ update_stmt (bsi_stmt (bsi));
+ }
+ else
+ {
+ bsi_next (&bsi);
+ }
+ }
+
+ debug_loop (loop_a, 10);
+ debug_loop (loop_b, 10);
+ update_ssa (TODO_update_ssa);
+ fprintf (stderr, "deleting loop %d \n", loop_a->num);
+ cancel_loop_tree (loop_a);
+ fprintf (stderr, "deleted loop %d \n", loop_a->num);
+}
+
+
+/* Returns true if loop_a is dependent on loop_b. */
+
+static bool
+is_dependent_on (struct loop *loop_a, struct loop *loop_b)
+{
+ bool ret = false;
+ struct loop *loop_father;
+ loop_father = find_common_loop (loop_a, loop_b);
+
+ VEC (ddr_p, heap) * dependence_relations_a;
+ VEC (ddr_p, heap) * dependence_relations_b;
+ VEC (ddr_p, heap) * dependence_relations_father;
+
+ VEC (data_reference_p, heap) * datarefs_a;
+ VEC (data_reference_p, heap) * datarefs_b;
+ VEC (data_reference_p, heap) * datarefs_father;
+
+ datarefs_a = VEC_alloc (data_reference_p, heap, 10);
+ datarefs_b = VEC_alloc (data_reference_p, heap, 10);
+ datarefs_father = VEC_alloc (data_reference_p, heap, 10);
+
+ dependence_relations_a = VEC_alloc (ddr_p, heap, 10 * 10);
+ dependence_relations_b = VEC_alloc (ddr_p, heap, 10 * 10);
+ dependence_relations_father = VEC_alloc (ddr_p, heap, 10 * 10);
+
+ compute_data_dependences_for_loop (loop_a, true, &datarefs_a, &dependence_relations_a);
+ compute_data_dependences_for_loop (loop_b, true, &datarefs_b, &dependence_relations_b);
+ compute_data_dependences_for_loop (loop_father, true, &datarefs_father, &dependence_relations_father);
+
+ debug_data_dependence_relations (dependence_relations_a);
+ debug_data_dependence_relations (dependence_relations_b);
+ debug_data_dependence_relations (dependence_relations_father);
+
+ fprintf (dump_file, "dumping data dependences of a \n");
+ dump_data_dependence_relations (dump_file, dependence_relations_a);
+ fprintf (dump_file, "dumping data dependences of b \n");
+ dump_data_dependence_relations (dump_file, dependence_relations_b);
+ fprintf (dump_file, "dumping data dependences of father \n");
+ dump_data_dependence_relations (dump_file, dependence_relations_father);
+
+ free_dependence_relations (dependence_relations_a);
+ free_data_refs (datarefs_a);
+ free_dependence_relations (dependence_relations_b);
+ free_data_refs (datarefs_b);
+ free_dependence_relations (dependence_relations_father);
+ free_data_refs (datarefs_father);
+
+ return true;
+}
+
+/* Structure to hold the list of partitions. */
+
+static struct partition
+{
+ int num;
+ int list[10];
+ int present;
+ char type;
+} ;
+
+
+/* Create a graph original_graph. It's nodes are loops and it's edges are data dependencies. */
+
+static struct graph *
+create_original_graph (int num_loops)
+{
+ struct graph *g = new_graph (num_loops);
+ struct loop *loop;
+ loop_iterator li;
+ FOR_EACH_LOOP (li, loop, 0)
+ {
+ struct partition *p = XNEW (struct partition);
+ p->num = 0;
+ p->list[p->num] = loop->num;
+ p->num++;
+ p->present = 1;
+
+ if (is_parallel_loop (loop))
+ {
+ p->type = 'P';
+ g->vertices[loop->num].data = p;
+ }
+ else
+ {
+ p->type = 'S';
+ g->vertices[loop->num].data = p;
+ }
+ }
+ FOR_EACH_LOOP (li, loop, 0)
+ {
+ struct loop *loop_dummy;
+ loop_iterator li_dummy;
+ FOR_EACH_LOOP (li_dummy, loop_dummy, 0)
+ {
+ if (loop != loop_dummy)
+ {
+ if (is_dependent_on (loop, loop_dummy))
+ {
+ if (no_fusion_preventing_constraint (loop, loop_dummy))
+ {
+ struct graph_edge *ge = add_edge (g, loop_dummy->num, loop->num);
+ ge->data = "FPE";
+ }
+ else
+ add_edge (g, loop_dummy->num, loop->num);
+ }
+ }
+ }
+ }
+
+
+ return g;
+}
+
+/* Create a component graph from the original graph. The parameter t denotes type of the component graph -
+ parallel or sequential. */
+
+static struct graph*
+create_component_graph (char t, int num_loops)
+{
+ struct graph *og = create_original_graph (num_loops);
+ struct graph *g = new_graph (num_loops);
+ struct partition *p, *q, *r;
+ int i;
+ char opp;
+ if (t == 'P')
+ opp = 'S';
+ else
+ opp = 'P';
+ struct loop *loop;
+ loop_iterator li;
+
+
+ for (i = 1; i < num_loops; i++)
+ {
+ p = XNEW (struct partition);
+ p = og->vertices[i].data;
+ if (p->type == t)
+ {
+ q = XNEW (struct partition);
+ q->num = 0;
+ q->list[q->num] = i;
+ q->num++;
+ q->present = 1;
+ q->type = t;
+ g->vertices[i].data = q;
+ }
+ }
+ if (t == 'P')
+ FOR_EACH_LOOP (li, loop, 0)
+ {
+ struct loop *loop_dummy;
+ loop_iterator li_dummy;
+ FOR_EACH_LOOP (li_dummy, loop_dummy, 0)
+ {
+ if (loop != loop_dummy)
+ {
+ if (is_dependent_on (loop, loop_dummy)&& (is_parallel_loop (loop)) && (is_parallel_loop (loop_dummy)))
+ {
+ if (no_fusion_preventing_constraint (loop, loop_dummy))
+ {
+ struct graph_edge *ge = add_edge (g, loop_dummy->num, loop->num);
+ ge->data = "FPE";
+ }
+ else
+ add_edge (g, loop_dummy->num, loop->num);
+ }
+ }
+ }
+ }
+ if (t =='S')
+ FOR_EACH_LOOP (li, loop, 0)
+ {
+ struct loop *loop_dummy;
+ loop_iterator li_dummy;
+ FOR_EACH_LOOP (li_dummy, loop_dummy, 0)
+ {
+ if (loop != loop_dummy)
+ {
+ if (is_dependent_on (loop, loop_dummy)&& (is_sequential_loop (loop)) && (is_sequential_loop (loop_dummy)))
+ {
+ if (no_fusion_preventing_constraint (loop, loop_dummy))
+ {
+ struct graph_edge *ge = add_edge (g, loop_dummy->num, loop->num);
+ ge->data = "FPE";
+ }
+ else
+ add_edge (g, loop_dummy->num, loop->num);
+ }
+ }
+ }
+ }
+
+ for (i = 1; i < num_loops; i++)
+ {
+ p = XNEW (struct partition);
+ p = og->vertices[i].data;
+ if (p->type == opp)
+ {
+ struct graph_edge *ges;
+ for (ges = og->vertices[i].pred; ges; ges = ges->pred_next)
+ {
+ int source = ges->src;
+ struct graph_edge *ged;
+ for (ged = og->vertices[i].succ; ged; ged = ged->succ_next)
+ {
+ int destin = ged->dest;
+ q = XNEW (struct partition);
+ q = og->vertices[source].data;
+ r = XNEW (struct partition);
+ r = og->vertices[destin].data;
+ if ((q->type == t) && (r->type == t) && (q->list[0] != r->list[0]))
+ {
+ struct graph_edge *ge = add_edge (g, source, destin);
+ ge->data = "FPE";
+ }
+ }
+ }
+ }
+ }
+ return g;
+
+}
+
+
+/* Partitions the graph according to rules. And fuse the loops corresponding to the partitions. */
+
+static void
+do_partitions (struct graph *g)
+{
+ int num = g->n_vertices;
+ int i;
+ struct partition *p, *q;
+ for (i = 1; i<num; i++)
+ {
+ p = XNEW (struct partition);
+ p = g->vertices[i].data;
+ struct graph_edge *ge;
+ if (p)
+ if (p->present == 1)
+ for (ge = g->vertices[i].pred; ge; ge = ge->pred_next)
+ {
+ q = XNEW (struct partition);
+ q = g->vertices[ge->src].data;
+ if (ge->data != "FPE")
+ {
+ p->list[p->num] = q->list[0];
+ p->num++;
+ q->present = 0;
+ }
+ }
+ }
+
+}
+
+/* Performs loop fusion according to the given graph. */
+
+static void
+fuse_according_to_graph (struct graph *g)
+{
+ int num = g->n_vertices;
+ int i, j;
+ struct partition *p;
+ struct loop *loop;
+ loop_iterator li;
+ for (i = 1; i<num; i++)
+ {
+ p = XNEW (struct partition);
+ p = g->vertices[i].data;
+ if (p)
+ if (p->present == 1)
+ {
+ for (j = (p->num)-1; j > 0 ; j--)
+ {
+ FOR_EACH_LOOP (li, loop, 0)
+ {
+ if ((loop->num == p->list[j]) && (loop->next->num == p->list[j-1]))
+ fuse_loops (loop, loop->next);
+ }
+ }
+ }
+ }
+
+}
+
+
+/* Performs legal loops fusion in the current function. */
+
+static unsigned int
+tree_loop_fusion (void)
+{
+ if (!current_loops)
+ return 0;
+ int num_loops = number_of_loops();
+ struct loop *loop;
+ loop_iterator li;
+
+ /* For all the loops in the program pick consecutive loops loop_a and loop_b. */
+ FOR_EACH_LOOP (li, loop, 0)
+ {
+ if (loop->next)
+ {
+ //if (can_fuse_loops (loop, loop->next))
+ {
+ /*if (is_dependent_on (loop, loop->next))
+ fprintf (stderr, "loop %d is dependent on loop %d \n\n", loop->num, loop->next->num);
+ else
+ fprintf (stderr, "loop %d is not dependent on loop %d \n\n", loop->num, loop->next->num);*/
+ //fuse_loops2 (loop, loop->next);
+ fuse_loops (loop, loop->next);
+ }
+ }
+
+ }
+
+ /* struct graph *original_graph_1 = new_graph (num_loops);
+ original_graph_1 = create_original_graph (num_loops);
+ dump_graph (dump_file, original_graph_1);
+ dump_graph (stderr, original_graph_1);
+ fprintf (dump_file, "printed the original_graph\n\n");
+ fprintf (stderr, "printed the original_graph\n\n");
+ struct graph *component_graph_parallel = create_component_graph ('P', num_loops);
+ dump_graph (dump_file, component_graph_parallel);
+ dump_graph (stderr, component_graph_parallel);
+ fprintf (dump_file, "printed the component parallel graph\n\n");
+ fprintf (stderr, "printed the component parallel graph\n\n");
+ dump_graph (dump_file, original_graph_1);
+ dump_graph (stderr, original_graph_1);
+ fprintf (dump_file, "printed the original_graph\n\n");
+ fprintf (stderr, "printed the original_graph\n\n");
+ do_partitions (component_graph_parallel);
+ dump_graph (dump_file, component_graph_parallel);
+ dump_graph (stderr, component_graph_parallel);
+ fprintf (dump_file, "printed the partitioned component parallel graph\n");
+ fprintf (stderr, "printed the partitioned component parallel graph\n");
+ fuse_according_to_graph (component_graph_parallel);*/
+
+
+ return 0;
+}
+
+static bool
+gate_tree_loop_fusion (void)
+{
+ return flag_tree_loop_fusion != 0;
+}
+
+struct gimple_opt_pass pass_loop_fusion =
+{
+ {
+ GIMPLE_PASS,
+ "lfusion", /* name */
+ gate_tree_loop_fusion, /* gate */
+ tree_loop_fusion, /* execute */
+ NULL, /* sub */
+ NULL, /* next */
+ 0, /* static_pass_number */
+ TV_TREE_LOOP_FUSION, /* tv_id */
+ PROP_cfg | PROP_ssa, /* properties_required */
+ 0, /* properties_provided */
+ 0, /* properties_destroyed */
+ 0, /* todo_flags_start */
+ TODO_dump_func | TODO_verify_loops | TODO_update_ssa, /* todo_flags_finish */
+ }
+};
+
Index: timevar.def
===================================================================
--- timevar.def (revision 134556)
+++ timevar.def (working copy)
@@ -125,6 +125,7 @@
DEFTIMEVAR (TV_TREE_PARALLELIZE_LOOPS, "tree parallelize loops")
DEFTIMEVAR (TV_TREE_VECTORIZATION , "tree vectorization")
DEFTIMEVAR (TV_TREE_LINEAR_TRANSFORM , "tree loop linear")
+DEFTIMEVAR (TV_TREE_LOOP_FUSION , "tree loop fusion")
DEFTIMEVAR (TV_TREE_LOOP_DISTRIBUTION, "tree loop distribution")
DEFTIMEVAR (TV_CHECK_DATA_DEPS , "tree check data dependences")
DEFTIMEVAR (TV_TREE_PREFETCH , "tree prefetching")
Index: common.opt
===================================================================
--- common.opt (revision 134556)
+++ common.opt (working copy)
@@ -1115,6 +1115,10 @@
Common Report Var(flag_tree_fre) Optimization
Enable Full Redundancy Elimination (FRE) on trees
+ftree-loop-fusion
+Common Report Var(flag_tree_loop_fusion) Optimization
+Enable loop fusion on trees
+
ftree-loop-distribution
Common Report Var(flag_tree_loop_distribution) Optimization
Enable loop distribution on trees
Index: Makefile.in
===================================================================
--- Makefile.in (revision 134556)
+++ Makefile.in (working copy)
@@ -1147,6 +1147,7 @@
tree-if-conv.o \
tree-into-ssa.o \
tree-iterator.o \
+ tree-loop-fusion.o \
tree-loop-distribution.o \
tree-loop-linear.o \
tree-nested.o \
@@ -2289,6 +2290,11 @@
$(DIAGNOSTIC_H) $(TREE_FLOW_H) $(TREE_DUMP_H) $(TIMEVAR_H) $(CFGLOOP_H) \
tree-pass.h $(TREE_DATA_REF_H) $(SCEV_H) $(EXPR_H) $(LAMBDA_H) \
$(TARGET_H) tree-chrec.h $(OBSTACK_H)
+tree-loop-fusion.o: tree-loop-fusion.c $(CONFIG_H) $(SYSTEM_H) coretypes.h \
+ $(TM_H) $(GGC_H) $(OPTABS_H) $(TREE_H) $(RTL_H) $(BASIC_BLOCK_H) \
+ $(DIAGNOSTIC_H) $(TREE_FLOW_H) $(TREE_DUMP_H) $(TIMEVAR_H) $(CFGLOOP_H) \
+ tree-pass.h $(TREE_DATA_REF_H) $(SCEV_H) $(EXPR_H) \
+ $(TARGET_H) tree-chrec.h tree-vectorizer.h
tree-loop-distribution.o: tree-loop-distribution.c $(CONFIG_H) $(SYSTEM_H) coretypes.h \
$(TM_H) $(GGC_H) $(OPTABS_H) $(TREE_H) $(RTL_H) $(BASIC_BLOCK_H) \
$(DIAGNOSTIC_H) $(TREE_FLOW_H) $(TREE_DUMP_H) $(TIMEVAR_H) $(CFGLOOP_H) \
Index: tree-cfg.c
===================================================================
--- tree-cfg.c (revision 134556)
+++ tree-cfg.c (working copy)
@@ -104,7 +104,7 @@
static inline void change_bb_for_stmt (tree t, basic_block bb);
/* Flowgraph optimization and cleanup. */
-static void tree_merge_blocks (basic_block, basic_block);
+extern void tree_merge_blocks (basic_block, basic_block);
static bool tree_can_merge_blocks_p (basic_block, basic_block);
static void remove_bb (basic_block);
static edge find_taken_edge_computed_goto (basic_block, tree);
@@ -1277,7 +1277,7 @@
/* Merge block B into block A. */
-static void
+void
tree_merge_blocks (basic_block a, basic_block b)
{
block_stmt_iterator bsi;
Index: passes.c
===================================================================
--- passes.c (revision 134556)
+++ passes.c (working copy)
@@ -627,6 +627,7 @@
NEXT_PASS (pass_empty_loop);
NEXT_PASS (pass_record_bounds);
NEXT_PASS (pass_check_data_deps);
+ NEXT_PASS (pass_loop_fusion);
NEXT_PASS (pass_loop_distribution);
NEXT_PASS (pass_linear_transform);
NEXT_PASS (pass_iv_canon);