This is the mail archive of the gcc@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: How to handle loop iterator variable?


> In lambda-code.c:1858 you have some code that does a similar renaming:
>
> FOR_EACH_IMM_USE_STMT (stmt, imm_iter, oldiv_def)
>  FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
>    propagate_value (use_p, newiv);
>
>
The function replace_uses_by() also does the same renaming and it is
suitable in my case. So I have used it and renamed the iterator
variable. The problem with the iterator variable is solved.

Now I have deleted all the PHI nodes of loop_a and transferred and all
the statements one by one to loop_b.
So the statement
j_12 = D.1190_11 + j_24;
is now present in loop_b, but I am unable to create the phi node for
it in loop_b. Can you please tell me how can I create phi node for
j_24 ?

I am attaching the patch and .c file.

Thanks,
Sandeep.

>
/* Loop fusion. */

/* This pass performs loop fusion: for example, the loops

	|loop_1
	|  A[i] = ...
	|endloop_1


	|loop_2
	|  ... = A[i]
	|endloop_2

   that becomes after fusion:

	|loop_1
	|  A[i] = ...
	|  ... = A[i]
	|endloop_1

*/

#include "config.h"
#include "system.h"
#include "coretypes.h"
#include "tm.h"
#include "ggc.h"
#include "tree.h"
#include "target.h"

#include "rtl.h"
#include "basic-block.h"
#include "diagnostic.h"
#include "tree-flow.h"
#include "tree-dump.h"
#include "timevar.h"
#include "cfgloop.h"
#include "expr.h"
#include "optabs.h"
#include "tree-chrec.h"
#include "tree-data-ref.h"
#include "tree-scalar-evolution.h"
#include "tree-pass.h"
#include "lambda.h"
#include "langhooks.h"
#include "tree-vectorizer.h"




/* Returns true when a given loop is a parallel loop.  */

static bool
is_parallel_loop (struct loop *loop)
{
  VEC (ddr_p, heap) * dependence_relations;
  VEC (data_reference_p, heap) * datarefs;
  lambda_trans_matrix trans;
  bool ret = false;
  
  /* If the loop can be reversed, the iterations are independent.  */
  datarefs = VEC_alloc (data_reference_p, heap, 10);
  dependence_relations = VEC_alloc (ddr_p, heap, 10 * 10);
  compute_data_dependences_for_loop (loop, true, &datarefs, &dependence_relations);
  trans = lambda_trans_matrix_new (1, 1);
  LTM_MATRIX (trans)[0][0] = -1;

  if (lambda_transform_legal_p (trans, 1, dependence_relations))
    {
      ret = true;
    }

  free_dependence_relations (dependence_relations);
  free_data_refs (datarefs);
  if (ret == true)
    fprintf (dump_file, " loop %d is a parallel loop\n ", loop->num);
  return ret;
}

/* Returns true when a given loop is a sequential loop.  */

static bool
is_sequential_loop (struct loop *loop)
{
  if (is_parallel_loop (loop))
    return false;
  else
    return true;
}

/* Returns true if there is no fusion preventing constraint between two given loops.  */

static bool
no_fusion_preventing_constraint (struct loop *loop_a, struct loop *loop_b)
{
  bool ret = true;
  struct loop *father_loop;
  father_loop = find_common_loop (loop_a, loop_b);
  struct graph *rdg_father;
  rdg_father = build_rdg (father_loop);
  struct graph *rdg_a;
  rdg_a = build_rdg (loop_a);
  struct graph *rdg_b;
  rdg_b = build_rdg (loop_b);
  return ret;
}

/* Returns true when two loops can be fused.  */

static bool
can_fuse_loops (struct loop *loop_a, struct loop *loop_b)
{
  if ((is_parallel_loop (loop_a) && is_parallel_loop (loop_b) && no_fusion_preventing_constraint (loop_a, loop_b)) || 
      (is_sequential_loop (loop_a) && is_sequential_loop (loop_b) && no_fusion_preventing_constraint (loop_a, loop_b)))
    {
      fprintf (dump_file, " can fuse loops %d %d ", loop_a->num, loop_b->num);
      return true;
    }

  return false;
}

/* The following function fuses two loops.  */

void
fuse_loops2 (struct loop *loop_a, struct loop *loop_b)
{
  basic_block *bbs_a, *bbs_b;
  struct loop *new_loop;
  bbs_a = get_loop_body (loop_a);
  bbs_b = get_loop_body (loop_b);
  debug_loop (loop_a, 10);
  debug_loop (loop_b, 10); 
  tree_merge_blocks (bbs_b[0], bbs_a[0]);
  debug_loop (loop_a, 10);
  debug_loop (loop_b, 10); 
  cancel_loop_tree (loop_a);
  
}


/* The following function fuses two loops.  */

void
fuse_loops (struct loop *loop_a, struct loop *loop_b)
{
  debug_loop (loop_a, 10);
  debug_loop (loop_b, 10); 
  block_stmt_iterator bsi_a, bsi_a_last, bsi_b, bsi_b_last, bsi;
  bsi_b_last = bsi_last (loop_b->header);
  bsi_prev (&bsi_b_last);
  tree phi, prev = NULL_TREE, next, use, def;

  /* Detects the iterator variable of loop_b.  */
  for (phi = phi_nodes (loop_b->header);
       phi;) 
    {
      next = PHI_CHAIN (phi);
      use = PHI_RESULT (phi);
      phi = next;
    } 

  /* Detects the iterator variable of loop_a.  */
  for (phi = phi_nodes (loop_a->header);
       phi;) 
    {
      next = PHI_CHAIN (phi);
      def = PHI_RESULT (phi);
      phi = next;
    } 
  
  /*for (phi = phi_nodes (loop_a->header);
       phi;)
    {
      next = PHI_CHAIN (phi);
      create_phi_node (SSA_NAME_VAR (PHI_RESULT (phi)), loop_b->header);
      phi = next;
    } */

  /* Replaces the itaerator variable of loop_a with that of loop_b.  */
  for (phi = phi_nodes (loop_a->header);
       phi;) 
    {
      next = PHI_CHAIN (phi);
      replace_uses_by (def, use);
      phi = next;
    } 

  /* Remove PHI nodes of loop_a.  */
  for (phi = phi_nodes (loop_a->header);
       phi;)
    {
      next = PHI_CHAIN (phi);
      remove_phi_node (phi, prev, true);
      phi = next;
    } 

  /* Transfer all the statements one by one.  */
  for (bsi = bsi_start (loop_a->header); !bsi_end_p (bsi);)
    {
      if ((TREE_CODE (bsi_stmt (bsi)) != COND_EXPR) && (TREE_CODE (bsi_stmt (bsi)) != LABEL_EXPR))
        {
          update_stmt (bsi_stmt (bsi)); 
	  bsi_move_before (&bsi, &bsi_b_last);
          fprintf (stderr, " transferred one statement. \n ");  
          fprintf (dump_file, " transferred one statement. \n ");  
          update_stmt (bsi_stmt (bsi)); 
        } 
      else
        {
	  bsi_next (&bsi);
        }
     } 
  
  debug_loop (loop_a, 10);
  debug_loop (loop_b, 10); 
  update_ssa (TODO_update_ssa);
  fprintf (stderr, "deleting loop %d \n", loop_a->num);
  cancel_loop_tree (loop_a);
  fprintf (stderr, "deleted loop %d \n", loop_a->num);
}


/* Returns true if  loop_a is dependent on loop_b.  */

static bool
is_dependent_on (struct loop *loop_a, struct loop *loop_b)
{
  bool ret = false;
  struct loop *loop_father;
  loop_father = find_common_loop (loop_a, loop_b);

  VEC (ddr_p, heap) * dependence_relations_a;
  VEC (ddr_p, heap) * dependence_relations_b;
  VEC (ddr_p, heap) * dependence_relations_father;

  VEC (data_reference_p, heap) * datarefs_a;
  VEC (data_reference_p, heap) * datarefs_b;
  VEC (data_reference_p, heap) * datarefs_father;
  
  datarefs_a = VEC_alloc (data_reference_p, heap, 10);
  datarefs_b = VEC_alloc (data_reference_p, heap, 10);
  datarefs_father = VEC_alloc (data_reference_p, heap, 10);

  dependence_relations_a = VEC_alloc (ddr_p, heap, 10 * 10);
  dependence_relations_b = VEC_alloc (ddr_p, heap, 10 * 10);
  dependence_relations_father = VEC_alloc (ddr_p, heap, 10 * 10);

  compute_data_dependences_for_loop (loop_a, true, &datarefs_a, &dependence_relations_a); 
  compute_data_dependences_for_loop (loop_b, true, &datarefs_b, &dependence_relations_b);
  compute_data_dependences_for_loop (loop_father, true, &datarefs_father, &dependence_relations_father);
  
  debug_data_dependence_relations (dependence_relations_a);
  debug_data_dependence_relations (dependence_relations_b);
  debug_data_dependence_relations (dependence_relations_father);
  
  fprintf (dump_file, "dumping data dependences of a \n");
  dump_data_dependence_relations (dump_file, dependence_relations_a);
  fprintf (dump_file, "dumping data dependences of b \n");
  dump_data_dependence_relations (dump_file, dependence_relations_b);
  fprintf (dump_file, "dumping data dependences of father \n");
  dump_data_dependence_relations (dump_file, dependence_relations_father); 

  free_dependence_relations (dependence_relations_a);
  free_data_refs (datarefs_a);
  free_dependence_relations (dependence_relations_b);
  free_data_refs (datarefs_b);
  free_dependence_relations (dependence_relations_father);
  free_data_refs (datarefs_father);

  return true;
}

/* Structure to hold the list of partitions.  */

static struct partition 
{
  int num;
  int list[10];
  int present;
  char type; 
} ;


/* Create a graph original_graph. It's nodes are loops and it's edges are data dependencies.  */

static struct graph *
create_original_graph (int num_loops)
{
  struct graph *g = new_graph (num_loops);
  struct loop *loop;
  loop_iterator li;
  FOR_EACH_LOOP (li, loop, 0)
    {
      struct partition *p = XNEW (struct partition);
      p->num = 0;
      p->list[p->num] = loop->num;
      p->num++;
      p->present = 1;
      
      if (is_parallel_loop (loop))
        {
          p->type = 'P';
          g->vertices[loop->num].data = p;
        }
      else
        {
          p->type = 'S';
          g->vertices[loop->num].data = p;
        } 
    }
   FOR_EACH_LOOP (li, loop, 0)
     {
       struct loop *loop_dummy;
       loop_iterator li_dummy;
       FOR_EACH_LOOP (li_dummy, loop_dummy, 0)
         {
           if (loop != loop_dummy)
             {
               if (is_dependent_on (loop, loop_dummy))
                 {
                   if (no_fusion_preventing_constraint (loop, loop_dummy))
                     {
                       struct graph_edge *ge = add_edge (g, loop_dummy->num, loop->num);
                       ge->data = "FPE";
                     } 
                   else 
                     add_edge (g, loop_dummy->num, loop->num);
                  }
             }
         }  
     }

   
  return g;  
}

/* Create a component graph from the original graph. The parameter t denotes type of the component graph -
   parallel or sequential.  */

static struct graph*
create_component_graph (char t, int num_loops)
{
  struct graph *og = create_original_graph (num_loops);
  struct graph *g = new_graph (num_loops);
  struct partition *p, *q, *r;
  int i;
  char opp; 
  if (t == 'P')
    opp = 'S';
  else
    opp = 'P';
  struct loop *loop;
  loop_iterator li;
  

  for (i = 1; i < num_loops; i++)
    {
      p = XNEW (struct partition);
      p = og->vertices[i].data;
      if (p->type == t)
        {
          q = XNEW (struct partition);
          q->num = 0;
          q->list[q->num] = i;
          q->num++;
          q->present = 1;
          q->type = t;
          g->vertices[i].data = q;
        } 
    } 
  if (t == 'P') 
  FOR_EACH_LOOP (li, loop, 0)
     {
       struct loop *loop_dummy;
       loop_iterator li_dummy;
       FOR_EACH_LOOP (li_dummy, loop_dummy, 0)
         {
           if (loop != loop_dummy)
             {
               if (is_dependent_on (loop, loop_dummy)&& (is_parallel_loop (loop)) && (is_parallel_loop (loop_dummy)))
                 {
                   if (no_fusion_preventing_constraint (loop, loop_dummy))
                     {
                       struct graph_edge *ge = add_edge (g, loop_dummy->num, loop->num);
                       ge->data = "FPE";
                     } 
                   else 
                     add_edge (g, loop_dummy->num, loop->num);
                  }
             }
         }  
     }
  if (t =='S') 
  FOR_EACH_LOOP (li, loop, 0)
     {
       struct loop *loop_dummy;
       loop_iterator li_dummy;
       FOR_EACH_LOOP (li_dummy, loop_dummy, 0)
         {
           if (loop != loop_dummy)
             {
               if (is_dependent_on (loop, loop_dummy)&& (is_sequential_loop (loop)) && (is_sequential_loop (loop_dummy)))
                 {
                   if (no_fusion_preventing_constraint (loop, loop_dummy))
                     {
                       struct graph_edge *ge = add_edge (g, loop_dummy->num, loop->num);
                       ge->data = "FPE";
                     } 
                   else 
                     add_edge (g, loop_dummy->num, loop->num);
                  }
             }
         }  
     }
  
  for (i = 1; i < num_loops; i++)
    {
      p = XNEW (struct partition);
      p = og->vertices[i].data;  
      if (p->type == opp)
        { 
          struct graph_edge *ges;
          for (ges = og->vertices[i].pred; ges; ges = ges->pred_next)
            {
              int source = ges->src;
              struct graph_edge *ged; 
              for (ged = og->vertices[i].succ; ged; ged = ged->succ_next)
                { 
                  int destin = ged->dest;
                  q = XNEW (struct partition);
                  q = og->vertices[source].data;
                  r = XNEW (struct partition);
                  r = og->vertices[destin].data;
                  if ((q->type == t) && (r->type == t) && (q->list[0] != r->list[0]))
                    {
                      struct graph_edge *ge = add_edge (g, source, destin); 
                      ge->data = "FPE";
                    }
                 }
             }
          }
        }
   return g;
  
}


/* Partitions the graph according to rules. And fuse the loops corresponding to the partitions.  */

static void 
do_partitions (struct graph *g)
{
  int num = g->n_vertices;
  int i;
  struct partition *p, *q; 
  for (i = 1; i<num; i++)
    {
      p = XNEW (struct partition);
      p = g->vertices[i].data;
      struct graph_edge *ge;
      if (p) 
      if (p->present == 1)
        for (ge = g->vertices[i].pred; ge; ge = ge->pred_next)
          {
            q = XNEW (struct partition);
            q = g->vertices[ge->src].data;
            if (ge->data != "FPE") 
              {
               p->list[p->num] = q->list[0];
               p->num++;
               q->present = 0;
              }
           }
    } 

}

/* Performs loop fusion according to the given graph.  */

static void
fuse_according_to_graph (struct graph *g)
{
  int num = g->n_vertices;
  int i, j;
  struct partition *p; 
  struct loop *loop;
  loop_iterator li;
  for (i = 1; i<num; i++)
    {
      p = XNEW (struct partition);
      p = g->vertices[i].data;
      if (p)
        if (p->present == 1)
          {
            for (j = (p->num)-1; j > 0 ; j--)
              {
                FOR_EACH_LOOP (li, loop, 0)
                  {
                    if ((loop->num == p->list[j]) && (loop->next->num == p->list[j-1]))
                      fuse_loops (loop, loop->next);
                  }
              }
          } 
    } 

}


/* Performs legal loops fusion in the current function.  */

static unsigned int
tree_loop_fusion (void)
{
  if (!current_loops)
    return 0;
  int num_loops = number_of_loops();
  struct loop *loop;
  loop_iterator li;
   
  /* For all the loops in the program pick consecutive loops loop_a and loop_b.  */
  FOR_EACH_LOOP (li, loop, 0)
    {
      if (loop->next)
        {
	  //if (can_fuse_loops (loop, loop->next))
            {
              /*if (is_dependent_on (loop, loop->next))
                fprintf (stderr, "loop %d is dependent on loop %d \n\n", loop->num, loop->next->num);
              else
                fprintf (stderr, "loop %d is not dependent on loop %d \n\n", loop->num, loop->next->num);*/ 
    	      //fuse_loops2 (loop, loop->next);
              fuse_loops (loop, loop->next);
            }
	}
            
     }

 /* struct graph *original_graph_1 = new_graph (num_loops);
  original_graph_1 = create_original_graph (num_loops);
  dump_graph (dump_file, original_graph_1);
  dump_graph (stderr, original_graph_1);
  fprintf (dump_file, "printed the original_graph\n\n"); 
  fprintf (stderr, "printed the original_graph\n\n");
  struct graph *component_graph_parallel = create_component_graph ('P', num_loops);
  dump_graph (dump_file, component_graph_parallel);
  dump_graph (stderr, component_graph_parallel);
  fprintf (dump_file, "printed the component parallel graph\n\n"); 
  fprintf (stderr, "printed the component parallel graph\n\n");
  dump_graph (dump_file, original_graph_1);
  dump_graph (stderr, original_graph_1);
  fprintf (dump_file, "printed the original_graph\n\n"); 
  fprintf (stderr, "printed the original_graph\n\n");
  do_partitions (component_graph_parallel);
  dump_graph (dump_file, component_graph_parallel);
  dump_graph (stderr, component_graph_parallel);
  fprintf (dump_file, "printed the partitioned component parallel graph\n"); 
  fprintf (stderr, "printed the partitioned component parallel graph\n");
  fuse_according_to_graph (component_graph_parallel);*/
  
  
  return 0;
}

static bool
gate_tree_loop_fusion (void)
{
  return flag_tree_loop_fusion != 0;
}

struct gimple_opt_pass pass_loop_fusion =
{
 {
  GIMPLE_PASS, 
  "lfusion",		        	/* name */
  gate_tree_loop_fusion,                /* gate */
  tree_loop_fusion,                     /* execute */
  NULL,			        	/* sub */
  NULL,			        	/* next */
  0,				        /* static_pass_number */
  TV_TREE_LOOP_FUSION,                  /* tv_id */
  PROP_cfg | PROP_ssa,		        /* properties_required */
  0,				        /* properties_provided */
  0,				        /* properties_destroyed */
  0,				        /* todo_flags_start */
  TODO_dump_func | TODO_verify_loops | TODO_update_ssa,   /* todo_flags_finish */
 }
};

Index: doc/invoke.texi
===================================================================
--- doc/invoke.texi	(revision 134556)
+++ doc/invoke.texi	(working copy)
@@ -355,6 +355,7 @@
 -fstrict-aliasing -fstrict-overflow -fthread-jumps -ftracer -ftree-ccp @gol
 -ftree-ch -ftree-copy-prop -ftree-copyrename -ftree-dce @gol
 -ftree-dominator-opts -ftree-dse -ftree-fre -ftree-loop-im @gol
+-ftree-loop-fusion @gol
 -ftree-loop-distribution @gol
 -ftree-loop-ivcanon -ftree-loop-linear -ftree-loop-optimize @gol
 -ftree-parallelize-loops=@var{n} -ftree-pre -ftree-reassoc -ftree-salias @gol
@@ -5888,6 +5889,9 @@
 Compare the results of several data dependence analyzers.  This option
 is used for debugging the data dependence analyzers.
 
+@item -ftree-loop-fusion
+Perform loop fusion.
+
 @item -ftree-loop-distribution
 Perform loop distribution.  This flag can improve cache performance on
 big loop bodies and allow further loop optimizations, like
Index: tree-pass.h
===================================================================
--- tree-pass.h	(revision 134556)
+++ tree-pass.h	(working copy)
@@ -287,6 +287,7 @@
 extern struct gimple_opt_pass pass_empty_loop;
 extern struct gimple_opt_pass pass_record_bounds;
 extern struct gimple_opt_pass pass_if_conversion;
+extern struct gimple_opt_pass pass_loop_fusion; 
 extern struct gimple_opt_pass pass_loop_distribution;
 extern struct gimple_opt_pass pass_vectorize;
 extern struct gimple_opt_pass pass_complete_unroll;
Index: tree-loop-fusion.c
===================================================================
--- tree-loop-fusion.c	(revision 0)
+++ tree-loop-fusion.c	(revision 0)
@@ -0,0 +1,589 @@
+/* Loop fusion. */
+
+/* This pass performs loop fusion: for example, the loops
+
+	|loop_1
+	|  A[i] = ...
+	|endloop_1
+
+
+	|loop_2
+	|  ... = A[i]
+	|endloop_2
+
+   that becomes after fusion:
+
+	|loop_1
+	|  A[i] = ...
+	|  ... = A[i]
+	|endloop_1
+
+*/
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tm.h"
+#include "ggc.h"
+#include "tree.h"
+#include "target.h"
+
+#include "rtl.h"
+#include "basic-block.h"
+#include "diagnostic.h"
+#include "tree-flow.h"
+#include "tree-dump.h"
+#include "timevar.h"
+#include "cfgloop.h"
+#include "expr.h"
+#include "optabs.h"
+#include "tree-chrec.h"
+#include "tree-data-ref.h"
+#include "tree-scalar-evolution.h"
+#include "tree-pass.h"
+#include "lambda.h"
+#include "langhooks.h"
+#include "tree-vectorizer.h"
+
+
+
+
+/* Returns true when a given loop is a parallel loop.  */
+
+static bool
+is_parallel_loop (struct loop *loop)
+{
+  VEC (ddr_p, heap) * dependence_relations;
+  VEC (data_reference_p, heap) * datarefs;
+  lambda_trans_matrix trans;
+  bool ret = false;
+  
+  /* If the loop can be reversed, the iterations are independent.  */
+  datarefs = VEC_alloc (data_reference_p, heap, 10);
+  dependence_relations = VEC_alloc (ddr_p, heap, 10 * 10);
+  compute_data_dependences_for_loop (loop, true, &datarefs, &dependence_relations);
+  trans = lambda_trans_matrix_new (1, 1);
+  LTM_MATRIX (trans)[0][0] = -1;
+
+  if (lambda_transform_legal_p (trans, 1, dependence_relations))
+    {
+      ret = true;
+    }
+
+  free_dependence_relations (dependence_relations);
+  free_data_refs (datarefs);
+  if (ret == true)
+    fprintf (dump_file, " loop %d is a parallel loop\n ", loop->num);
+  return ret;
+}
+
+/* Returns true when a given loop is a sequential loop.  */
+
+static bool
+is_sequential_loop (struct loop *loop)
+{
+  if (is_parallel_loop (loop))
+    return false;
+  else
+    return true;
+}
+
+/* Returns true if there is no fusion preventing constraint between two given loops.  */
+
+static bool
+no_fusion_preventing_constraint (struct loop *loop_a, struct loop *loop_b)
+{
+  bool ret = true;
+  struct loop *father_loop;
+  father_loop = find_common_loop (loop_a, loop_b);
+  struct graph *rdg_father;
+  rdg_father = build_rdg (father_loop);
+  struct graph *rdg_a;
+  rdg_a = build_rdg (loop_a);
+  struct graph *rdg_b;
+  rdg_b = build_rdg (loop_b);
+  return ret;
+}
+
+/* Returns true when two loops can be fused.  */
+
+static bool
+can_fuse_loops (struct loop *loop_a, struct loop *loop_b)
+{
+  if ((is_parallel_loop (loop_a) && is_parallel_loop (loop_b) && no_fusion_preventing_constraint (loop_a, loop_b)) || 
+      (is_sequential_loop (loop_a) && is_sequential_loop (loop_b) && no_fusion_preventing_constraint (loop_a, loop_b)))
+    {
+      fprintf (dump_file, " can fuse loops %d %d ", loop_a->num, loop_b->num);
+      return true;
+    }
+
+  return false;
+}
+
+/* The following function fuses two loops.  */
+
+void
+fuse_loops2 (struct loop *loop_a, struct loop *loop_b)
+{
+  basic_block *bbs_a, *bbs_b;
+  struct loop *new_loop;
+  bbs_a = get_loop_body (loop_a);
+  bbs_b = get_loop_body (loop_b);
+  debug_loop (loop_a, 10);
+  debug_loop (loop_b, 10); 
+  tree_merge_blocks (bbs_b[0], bbs_a[0]);
+  debug_loop (loop_a, 10);
+  debug_loop (loop_b, 10); 
+  cancel_loop_tree (loop_a);
+  
+}
+
+
+/* The following function fuses two loops.  */
+
+void
+fuse_loops (struct loop *loop_a, struct loop *loop_b)
+{
+  debug_loop (loop_a, 10);
+  debug_loop (loop_b, 10); 
+  block_stmt_iterator bsi_a, bsi_a_last, bsi_b, bsi_b_last, bsi;
+  bsi_b_last = bsi_last (loop_b->header);
+  bsi_prev (&bsi_b_last);
+  tree phi, prev = NULL_TREE, next, use, def;
+
+  /* Detects the iterator variable of loop_b.  */
+  for (phi = phi_nodes (loop_b->header);
+       phi;) 
+    {
+      next = PHI_CHAIN (phi);
+      use = PHI_RESULT (phi);
+      phi = next;
+    } 
+
+  /* Detects the iterator variable of loop_a.  */
+  for (phi = phi_nodes (loop_a->header);
+       phi;) 
+    {
+      next = PHI_CHAIN (phi);
+      def = PHI_RESULT (phi);
+      phi = next;
+    } 
+  
+  /*for (phi = phi_nodes (loop_a->header);
+       phi;)
+    {
+      next = PHI_CHAIN (phi);
+      create_phi_node (SSA_NAME_VAR (PHI_RESULT (phi)), loop_b->header);
+      phi = next;
+    } */
+
+  /* Replaces the itaerator variable of loop_a with that of loop_b.  */
+  for (phi = phi_nodes (loop_a->header);
+       phi;) 
+    {
+      next = PHI_CHAIN (phi);
+      replace_uses_by (def, use);
+      phi = next;
+    } 
+
+  /* Remove PHI nodes of loop_a.  */
+  for (phi = phi_nodes (loop_a->header);
+       phi;)
+    {
+      next = PHI_CHAIN (phi);
+      remove_phi_node (phi, prev, true);
+      phi = next;
+    } 
+
+  /* Transfer all the statements one by one.  */
+  for (bsi = bsi_start (loop_a->header); !bsi_end_p (bsi);)
+    {
+      if ((TREE_CODE (bsi_stmt (bsi)) != COND_EXPR) && (TREE_CODE (bsi_stmt (bsi)) != LABEL_EXPR))
+        {
+          update_stmt (bsi_stmt (bsi)); 
+	  bsi_move_before (&bsi, &bsi_b_last);
+          fprintf (stderr, " transferred one statement. \n ");  
+          fprintf (dump_file, " transferred one statement. \n ");  
+          update_stmt (bsi_stmt (bsi)); 
+        } 
+      else
+        {
+	  bsi_next (&bsi);
+        }
+     } 
+  
+  debug_loop (loop_a, 10);
+  debug_loop (loop_b, 10); 
+  update_ssa (TODO_update_ssa);
+  fprintf (stderr, "deleting loop %d \n", loop_a->num);
+  cancel_loop_tree (loop_a);
+  fprintf (stderr, "deleted loop %d \n", loop_a->num);
+}
+
+
+/* Returns true if  loop_a is dependent on loop_b.  */
+
+static bool
+is_dependent_on (struct loop *loop_a, struct loop *loop_b)
+{
+  bool ret = false;
+  struct loop *loop_father;
+  loop_father = find_common_loop (loop_a, loop_b);
+
+  VEC (ddr_p, heap) * dependence_relations_a;
+  VEC (ddr_p, heap) * dependence_relations_b;
+  VEC (ddr_p, heap) * dependence_relations_father;
+
+  VEC (data_reference_p, heap) * datarefs_a;
+  VEC (data_reference_p, heap) * datarefs_b;
+  VEC (data_reference_p, heap) * datarefs_father;
+  
+  datarefs_a = VEC_alloc (data_reference_p, heap, 10);
+  datarefs_b = VEC_alloc (data_reference_p, heap, 10);
+  datarefs_father = VEC_alloc (data_reference_p, heap, 10);
+
+  dependence_relations_a = VEC_alloc (ddr_p, heap, 10 * 10);
+  dependence_relations_b = VEC_alloc (ddr_p, heap, 10 * 10);
+  dependence_relations_father = VEC_alloc (ddr_p, heap, 10 * 10);
+
+  compute_data_dependences_for_loop (loop_a, true, &datarefs_a, &dependence_relations_a); 
+  compute_data_dependences_for_loop (loop_b, true, &datarefs_b, &dependence_relations_b);
+  compute_data_dependences_for_loop (loop_father, true, &datarefs_father, &dependence_relations_father);
+  
+  debug_data_dependence_relations (dependence_relations_a);
+  debug_data_dependence_relations (dependence_relations_b);
+  debug_data_dependence_relations (dependence_relations_father);
+  
+  fprintf (dump_file, "dumping data dependences of a \n");
+  dump_data_dependence_relations (dump_file, dependence_relations_a);
+  fprintf (dump_file, "dumping data dependences of b \n");
+  dump_data_dependence_relations (dump_file, dependence_relations_b);
+  fprintf (dump_file, "dumping data dependences of father \n");
+  dump_data_dependence_relations (dump_file, dependence_relations_father); 
+
+  free_dependence_relations (dependence_relations_a);
+  free_data_refs (datarefs_a);
+  free_dependence_relations (dependence_relations_b);
+  free_data_refs (datarefs_b);
+  free_dependence_relations (dependence_relations_father);
+  free_data_refs (datarefs_father);
+
+  return true;
+}
+
+/* Structure to hold the list of partitions.  */
+
+static struct partition 
+{
+  int num;
+  int list[10];
+  int present;
+  char type; 
+} ;
+
+
+/* Create a graph original_graph. It's nodes are loops and it's edges are data dependencies.  */
+
+static struct graph *
+create_original_graph (int num_loops)
+{
+  struct graph *g = new_graph (num_loops);
+  struct loop *loop;
+  loop_iterator li;
+  FOR_EACH_LOOP (li, loop, 0)
+    {
+      struct partition *p = XNEW (struct partition);
+      p->num = 0;
+      p->list[p->num] = loop->num;
+      p->num++;
+      p->present = 1;
+      
+      if (is_parallel_loop (loop))
+        {
+          p->type = 'P';
+          g->vertices[loop->num].data = p;
+        }
+      else
+        {
+          p->type = 'S';
+          g->vertices[loop->num].data = p;
+        } 
+    }
+   FOR_EACH_LOOP (li, loop, 0)
+     {
+       struct loop *loop_dummy;
+       loop_iterator li_dummy;
+       FOR_EACH_LOOP (li_dummy, loop_dummy, 0)
+         {
+           if (loop != loop_dummy)
+             {
+               if (is_dependent_on (loop, loop_dummy))
+                 {
+                   if (no_fusion_preventing_constraint (loop, loop_dummy))
+                     {
+                       struct graph_edge *ge = add_edge (g, loop_dummy->num, loop->num);
+                       ge->data = "FPE";
+                     } 
+                   else 
+                     add_edge (g, loop_dummy->num, loop->num);
+                  }
+             }
+         }  
+     }
+
+   
+  return g;  
+}
+
+/* Create a component graph from the original graph. The parameter t denotes type of the component graph -
+   parallel or sequential.  */
+
+static struct graph*
+create_component_graph (char t, int num_loops)
+{
+  struct graph *og = create_original_graph (num_loops);
+  struct graph *g = new_graph (num_loops);
+  struct partition *p, *q, *r;
+  int i;
+  char opp; 
+  if (t == 'P')
+    opp = 'S';
+  else
+    opp = 'P';
+  struct loop *loop;
+  loop_iterator li;
+  
+
+  for (i = 1; i < num_loops; i++)
+    {
+      p = XNEW (struct partition);
+      p = og->vertices[i].data;
+      if (p->type == t)
+        {
+          q = XNEW (struct partition);
+          q->num = 0;
+          q->list[q->num] = i;
+          q->num++;
+          q->present = 1;
+          q->type = t;
+          g->vertices[i].data = q;
+        } 
+    } 
+  if (t == 'P') 
+  FOR_EACH_LOOP (li, loop, 0)
+     {
+       struct loop *loop_dummy;
+       loop_iterator li_dummy;
+       FOR_EACH_LOOP (li_dummy, loop_dummy, 0)
+         {
+           if (loop != loop_dummy)
+             {
+               if (is_dependent_on (loop, loop_dummy)&& (is_parallel_loop (loop)) && (is_parallel_loop (loop_dummy)))
+                 {
+                   if (no_fusion_preventing_constraint (loop, loop_dummy))
+                     {
+                       struct graph_edge *ge = add_edge (g, loop_dummy->num, loop->num);
+                       ge->data = "FPE";
+                     } 
+                   else 
+                     add_edge (g, loop_dummy->num, loop->num);
+                  }
+             }
+         }  
+     }
+  if (t =='S') 
+  FOR_EACH_LOOP (li, loop, 0)
+     {
+       struct loop *loop_dummy;
+       loop_iterator li_dummy;
+       FOR_EACH_LOOP (li_dummy, loop_dummy, 0)
+         {
+           if (loop != loop_dummy)
+             {
+               if (is_dependent_on (loop, loop_dummy)&& (is_sequential_loop (loop)) && (is_sequential_loop (loop_dummy)))
+                 {
+                   if (no_fusion_preventing_constraint (loop, loop_dummy))
+                     {
+                       struct graph_edge *ge = add_edge (g, loop_dummy->num, loop->num);
+                       ge->data = "FPE";
+                     } 
+                   else 
+                     add_edge (g, loop_dummy->num, loop->num);
+                  }
+             }
+         }  
+     }
+  
+  for (i = 1; i < num_loops; i++)
+    {
+      p = XNEW (struct partition);
+      p = og->vertices[i].data;  
+      if (p->type == opp)
+        { 
+          struct graph_edge *ges;
+          for (ges = og->vertices[i].pred; ges; ges = ges->pred_next)
+            {
+              int source = ges->src;
+              struct graph_edge *ged; 
+              for (ged = og->vertices[i].succ; ged; ged = ged->succ_next)
+                { 
+                  int destin = ged->dest;
+                  q = XNEW (struct partition);
+                  q = og->vertices[source].data;
+                  r = XNEW (struct partition);
+                  r = og->vertices[destin].data;
+                  if ((q->type == t) && (r->type == t) && (q->list[0] != r->list[0]))
+                    {
+                      struct graph_edge *ge = add_edge (g, source, destin); 
+                      ge->data = "FPE";
+                    }
+                 }
+             }
+          }
+        }
+   return g;
+  
+}
+
+
+/* Partitions the graph according to rules. And fuse the loops corresponding to the partitions.  */
+
+static void 
+do_partitions (struct graph *g)
+{
+  int num = g->n_vertices;
+  int i;
+  struct partition *p, *q; 
+  for (i = 1; i<num; i++)
+    {
+      p = XNEW (struct partition);
+      p = g->vertices[i].data;
+      struct graph_edge *ge;
+      if (p) 
+      if (p->present == 1)
+        for (ge = g->vertices[i].pred; ge; ge = ge->pred_next)
+          {
+            q = XNEW (struct partition);
+            q = g->vertices[ge->src].data;
+            if (ge->data != "FPE") 
+              {
+               p->list[p->num] = q->list[0];
+               p->num++;
+               q->present = 0;
+              }
+           }
+    } 
+
+}
+
+/* Performs loop fusion according to the given graph.  */
+
+static void
+fuse_according_to_graph (struct graph *g)
+{
+  int num = g->n_vertices;
+  int i, j;
+  struct partition *p; 
+  struct loop *loop;
+  loop_iterator li;
+  for (i = 1; i<num; i++)
+    {
+      p = XNEW (struct partition);
+      p = g->vertices[i].data;
+      if (p)
+        if (p->present == 1)
+          {
+            for (j = (p->num)-1; j > 0 ; j--)
+              {
+                FOR_EACH_LOOP (li, loop, 0)
+                  {
+                    if ((loop->num == p->list[j]) && (loop->next->num == p->list[j-1]))
+                      fuse_loops (loop, loop->next);
+                  }
+              }
+          } 
+    } 
+
+}
+
+
+/* Performs legal loops fusion in the current function.  */
+
+static unsigned int
+tree_loop_fusion (void)
+{
+  if (!current_loops)
+    return 0;
+  int num_loops = number_of_loops();
+  struct loop *loop;
+  loop_iterator li;
+   
+  /* For all the loops in the program pick consecutive loops loop_a and loop_b.  */
+  FOR_EACH_LOOP (li, loop, 0)
+    {
+      if (loop->next)
+        {
+	  //if (can_fuse_loops (loop, loop->next))
+            {
+              /*if (is_dependent_on (loop, loop->next))
+                fprintf (stderr, "loop %d is dependent on loop %d \n\n", loop->num, loop->next->num);
+              else
+                fprintf (stderr, "loop %d is not dependent on loop %d \n\n", loop->num, loop->next->num);*/ 
+    	      //fuse_loops2 (loop, loop->next);
+              fuse_loops (loop, loop->next);
+            }
+	}
+            
+     }
+
+ /* struct graph *original_graph_1 = new_graph (num_loops);
+  original_graph_1 = create_original_graph (num_loops);
+  dump_graph (dump_file, original_graph_1);
+  dump_graph (stderr, original_graph_1);
+  fprintf (dump_file, "printed the original_graph\n\n"); 
+  fprintf (stderr, "printed the original_graph\n\n");
+  struct graph *component_graph_parallel = create_component_graph ('P', num_loops);
+  dump_graph (dump_file, component_graph_parallel);
+  dump_graph (stderr, component_graph_parallel);
+  fprintf (dump_file, "printed the component parallel graph\n\n"); 
+  fprintf (stderr, "printed the component parallel graph\n\n");
+  dump_graph (dump_file, original_graph_1);
+  dump_graph (stderr, original_graph_1);
+  fprintf (dump_file, "printed the original_graph\n\n"); 
+  fprintf (stderr, "printed the original_graph\n\n");
+  do_partitions (component_graph_parallel);
+  dump_graph (dump_file, component_graph_parallel);
+  dump_graph (stderr, component_graph_parallel);
+  fprintf (dump_file, "printed the partitioned component parallel graph\n"); 
+  fprintf (stderr, "printed the partitioned component parallel graph\n");
+  fuse_according_to_graph (component_graph_parallel);*/
+  
+  
+  return 0;
+}
+
+static bool
+gate_tree_loop_fusion (void)
+{
+  return flag_tree_loop_fusion != 0;
+}
+
+struct gimple_opt_pass pass_loop_fusion =
+{
+ {
+  GIMPLE_PASS, 
+  "lfusion",		        	/* name */
+  gate_tree_loop_fusion,                /* gate */
+  tree_loop_fusion,                     /* execute */
+  NULL,			        	/* sub */
+  NULL,			        	/* next */
+  0,				        /* static_pass_number */
+  TV_TREE_LOOP_FUSION,                  /* tv_id */
+  PROP_cfg | PROP_ssa,		        /* properties_required */
+  0,				        /* properties_provided */
+  0,				        /* properties_destroyed */
+  0,				        /* todo_flags_start */
+  TODO_dump_func | TODO_verify_loops | TODO_update_ssa,   /* todo_flags_finish */
+ }
+};
+
Index: timevar.def
===================================================================
--- timevar.def	(revision 134556)
+++ timevar.def	(working copy)
@@ -125,6 +125,7 @@
 DEFTIMEVAR (TV_TREE_PARALLELIZE_LOOPS, "tree parallelize loops")
 DEFTIMEVAR (TV_TREE_VECTORIZATION    , "tree vectorization")
 DEFTIMEVAR (TV_TREE_LINEAR_TRANSFORM , "tree loop linear")
+DEFTIMEVAR (TV_TREE_LOOP_FUSION      , "tree loop fusion") 
 DEFTIMEVAR (TV_TREE_LOOP_DISTRIBUTION, "tree loop distribution")
 DEFTIMEVAR (TV_CHECK_DATA_DEPS       , "tree check data dependences")
 DEFTIMEVAR (TV_TREE_PREFETCH	     , "tree prefetching")
Index: common.opt
===================================================================
--- common.opt	(revision 134556)
+++ common.opt	(working copy)
@@ -1115,6 +1115,10 @@
 Common Report Var(flag_tree_fre) Optimization
 Enable Full Redundancy Elimination (FRE) on trees
 
+ftree-loop-fusion
+Common Report Var(flag_tree_loop_fusion) Optimization
+Enable loop fusion on trees
+
 ftree-loop-distribution
 Common Report Var(flag_tree_loop_distribution) Optimization
 Enable loop distribution on trees
Index: Makefile.in
===================================================================
--- Makefile.in	(revision 134556)
+++ Makefile.in	(working copy)
@@ -1147,6 +1147,7 @@
 	tree-if-conv.o \
 	tree-into-ssa.o \
 	tree-iterator.o \
+        tree-loop-fusion.o \
 	tree-loop-distribution.o \
 	tree-loop-linear.o \
 	tree-nested.o \
@@ -2289,6 +2290,11 @@
    $(DIAGNOSTIC_H) $(TREE_FLOW_H) $(TREE_DUMP_H) $(TIMEVAR_H) $(CFGLOOP_H) \
    tree-pass.h $(TREE_DATA_REF_H) $(SCEV_H) $(EXPR_H) $(LAMBDA_H) \
    $(TARGET_H) tree-chrec.h $(OBSTACK_H)
+tree-loop-fusion.o: tree-loop-fusion.c $(CONFIG_H) $(SYSTEM_H) coretypes.h \
+   $(TM_H) $(GGC_H) $(OPTABS_H) $(TREE_H) $(RTL_H) $(BASIC_BLOCK_H) \
+   $(DIAGNOSTIC_H) $(TREE_FLOW_H) $(TREE_DUMP_H) $(TIMEVAR_H) $(CFGLOOP_H) \
+   tree-pass.h $(TREE_DATA_REF_H) $(SCEV_H) $(EXPR_H) \
+   $(TARGET_H) tree-chrec.h tree-vectorizer.h
 tree-loop-distribution.o: tree-loop-distribution.c $(CONFIG_H) $(SYSTEM_H) coretypes.h \
    $(TM_H) $(GGC_H) $(OPTABS_H) $(TREE_H) $(RTL_H) $(BASIC_BLOCK_H) \
    $(DIAGNOSTIC_H) $(TREE_FLOW_H) $(TREE_DUMP_H) $(TIMEVAR_H) $(CFGLOOP_H) \
Index: tree-cfg.c
===================================================================
--- tree-cfg.c	(revision 134556)
+++ tree-cfg.c	(working copy)
@@ -104,7 +104,7 @@
 static inline void change_bb_for_stmt (tree t, basic_block bb);
 
 /* Flowgraph optimization and cleanup.  */
-static void tree_merge_blocks (basic_block, basic_block);
+extern void tree_merge_blocks (basic_block, basic_block);
 static bool tree_can_merge_blocks_p (basic_block, basic_block);
 static void remove_bb (basic_block);
 static edge find_taken_edge_computed_goto (basic_block, tree);
@@ -1277,7 +1277,7 @@
 
 /* Merge block B into block A.  */
 
-static void
+void
 tree_merge_blocks (basic_block a, basic_block b)
 {
   block_stmt_iterator bsi;
Index: passes.c
===================================================================
--- passes.c	(revision 134556)
+++ passes.c	(working copy)
@@ -627,6 +627,7 @@
 	  NEXT_PASS (pass_empty_loop);
 	  NEXT_PASS (pass_record_bounds);
 	  NEXT_PASS (pass_check_data_deps);
+          NEXT_PASS (pass_loop_fusion);
 	  NEXT_PASS (pass_loop_distribution);
 	  NEXT_PASS (pass_linear_transform);
 	  NEXT_PASS (pass_iv_canon);

Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]