This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]

[patch] lcm,gcse changes for nearly full flow graphs


Hi,

some weeks ago I (and Brad) profiled the compiler a little bit on some of
Brads testcases, and one of the remaining (after elimination of the 
problem with dominators) biggest time sinks was gcse and its various data
collecting passes. The patch below addresses some of them to reduce the
time to a more normal value.

First it changes gcse to insert newly found register setters in front of
the already found. Thats no problem as later the whole list is anyway
handled as set, and makes that insertion O(n) instead O(n^2).

Second it changes the worklists of nodes from a stack to a queue, and
thereby reduces the number of calls to sbitmap_intersection_of_preds() and
friends.

Now there is one problem left (pre_expr_reaches_here_p_work) which calls
itself 89169411 times while called from outside only 48788 times (for
Brads testcase all.i). The fix here would be a little bit more involved.

All in all these changes make a compile on all.i went down from 878 to 396
seconds (on Brads machine, with an unprofiled cc1 I believe). The compiler
bootstraps with them on alpha and i386 (not slower as without the
changes).

Btw: I sent in my assignment some weeks ago. Is it still not there?

Ciao,
Michael.
--

	* gcse.c (record_one_set): Prepend instead of append onto
	reg_set_table, making it O(n) instead O(n^2).
	* lcm.c (compute_antinout_edge,compute_laterin,compute_available):
	Use a queue instead of a stack as worklist.


Index: gcse.c
===================================================================
RCS file: /cvs/gcc/egcs/gcc/gcse.c,v
retrieving revision 1.89
diff -u -r1.89 gcse.c
--- gcse.c	2000/06/19 01:40:32	1.89
+++ gcse.c	2000/06/23 23:14:29
@@ -1136,21 +1136,8 @@
 						   sizeof (struct reg_set));
   bytes_used += sizeof (struct reg_set);
   new_reg_info->insn = insn;
-  new_reg_info->next = NULL;
-  if (reg_set_table[regno] == NULL)
-    reg_set_table[regno] = new_reg_info;
-  else
-    {
-      reg_info_ptr1 = reg_info_ptr2 = reg_set_table[regno];
-      /* ??? One could keep a "last" pointer to speed this up.  */
-      while (reg_info_ptr1 != NULL)
-	{
-	  reg_info_ptr2 = reg_info_ptr1;
-	  reg_info_ptr1 = reg_info_ptr1->next;
-	}
-
-      reg_info_ptr2->next = new_reg_info;
-    }
+  new_reg_info->next = reg_set_table[regno];
+  reg_set_table[regno] = new_reg_info;
 }
 
 /* Called from compute_sets via note_stores to handle one SET or CLOBBER in
Index: lcm.c
===================================================================
RCS file: /cvs/gcc/egcs/gcc/lcm.c,v
retrieving revision 1.16
diff -u -r1.16 lcm.c
--- lcm.c	2000/05/28 23:40:19	1.16
+++ lcm.c	2000/06/23 23:14:29
@@ -108,12 +108,13 @@
 {
   int bb;
   edge e;
-  basic_block *worklist, *tos;
+  basic_block *worklist, *qin, *qout, *qend;
+  unsigned int qlen;
 
   /* Allocate a worklist array/queue.  Entries are only added to the
      list if they were not already on the list.  So the size is
      bounded by the number of basic blocks.  */
-  tos = worklist
+  qin = qout = worklist
     = (basic_block *) xmalloc (sizeof (basic_block) * n_basic_blocks);
 
   /* We want a maximal solution, so make an optimistic initialization of
@@ -122,11 +123,15 @@
 
   /* Put every block on the worklist; this is necessary because of the
      optimistic initialization of ANTIN above.  */
-  for (bb = 0; bb < n_basic_blocks; bb++)
+  for (bb = n_basic_blocks-1; bb >= 0; bb--)
     {
-      *tos++ = BASIC_BLOCK (bb);
+      *qin++ = BASIC_BLOCK (bb);
       BASIC_BLOCK (bb)->aux = BASIC_BLOCK (bb);
     }
+  
+  qin = worklist;
+  qend = &worklist[n_basic_blocks];
+  qlen = n_basic_blocks;
 
   /* Mark blocks which are predecessors of the exit block so that we
      can easily identify them below.  */
@@ -134,11 +139,15 @@
     e->src->aux = EXIT_BLOCK_PTR;
 
   /* Iterate until the worklist is empty.  */
-  while (tos != worklist)
+  while (qlen)
     {
       /* Take the first entry off the worklist.  */
-      basic_block b = *--tos;
+      basic_block b = *qout++;
       bb = b->index;
+      qlen--;
+
+      if (qout >= qend)
+        qout = worklist;
 
       if (b->aux == EXIT_BLOCK_PTR)
 	/* Do not clear the aux field for blocks which are predecessors of
@@ -160,12 +169,15 @@
 	for (e = b->pred; e; e = e->pred_next)
 	  if (!e->src->aux && e->src != ENTRY_BLOCK_PTR)
 	    {
-	      *tos++ = e->src;
+	      *qin++ = e->src;
 	      e->src->aux = e;
+	      qlen++;
+	      if (qin >= qend)
+	        qin = worklist;
 	    }
     }
 
-  free (tos);
+  free (worklist);
 }
 
 /* Compute the earliest vector for edge based lcm.  */
@@ -246,14 +258,15 @@
 {
   int bb, num_edges, i;
   edge e;
-  basic_block *worklist, *tos;
+  basic_block *worklist, *qin, *qout, *qend;
+  unsigned int qlen;
 
   num_edges = NUM_EDGES (edge_list);
 
   /* Allocate a worklist array/queue.  Entries are only added to the
      list if they were not already on the list.  So the size is
      bounded by the number of basic blocks.  */
-  tos = worklist
+  qin = qout = worklist
     = (basic_block *) xmalloc (sizeof (basic_block) * (n_basic_blocks + 1));
 
   /* Initialize a mapping from each edge to its index.  */
@@ -281,19 +294,28 @@
 
   /* Add all the blocks to the worklist.  This prevents an early exit from
      the loop given our optimistic initialization of LATER above.  */
-  for (bb = n_basic_blocks - 1; bb >= 0; bb--)
+  for (bb = 0; bb < n_basic_blocks; bb++)
     {
       basic_block b = BASIC_BLOCK (bb);
-      *tos++ = b;
+      *qin++ = b;
       b->aux = b;
     }
+  qin = worklist;
+  /* Note that we do not use the last allocated element for our queue,
+     as EXIT_BLOCK is never inserted into it. In fact the above allocation
+     of n_basic_blocks+1 elements is not encessary. */
+  qend = &worklist[n_basic_blocks];
+  qlen = n_basic_blocks;
 
   /* Iterate until the worklist is empty.  */
-  while (tos != worklist)
+  while (qlen)
     {
       /* Take the first entry off the worklist.  */
-      basic_block b = *--tos;
+      basic_block b = *qout++;
       b->aux = NULL;
+      qlen--;
+      if (qout >= qend)
+        qout = worklist;
 
       /* Compute the intersection of LATERIN for each incoming edge to B.  */
       bb = b->index;
@@ -311,8 +333,11 @@
 	       to add the target of the outgoing edge to the worklist.  */
 	    && e->dest != EXIT_BLOCK_PTR && e->dest->aux == 0)
 	  {
-	    *tos++ = e->dest;
+	    *qin++ = e->dest;
 	    e->dest->aux = e;
+	    qlen++;
+	    if (qin >= qend)
+	      qin = worklist;
 	  }
     }
 
@@ -325,7 +350,7 @@
 		     laterin[n_basic_blocks],
 		     later[(size_t) e->aux]);
 
-  free (tos);
+  free (worklist);
 }
 
 /* Compute the insertion and deletion points for edge based LCM.  */
@@ -465,12 +490,13 @@
 {
   int bb;
   edge e;
-  basic_block *worklist, *tos;
+  basic_block *worklist, *qin, *qout, *qend;
+  unsigned int qlen;
 
   /* Allocate a worklist array/queue.  Entries are only added to the
      list if they were not already on the list.  So the size is
      bounded by the number of basic blocks.  */
-  tos = worklist
+  qin = qout = worklist
     = (basic_block *) xmalloc (sizeof (basic_block) * n_basic_blocks);
 
   /* We want a maximal solution.  */
@@ -478,11 +504,15 @@
 
   /* Put every block on the worklist; this is necessary because of the
      optimistic initialization of AVOUT above.  */
-  for (bb = n_basic_blocks - 1; bb >= 0; bb--)
+  for (bb = 0; bb < n_basic_blocks; bb++)
     {
-      *tos++ = BASIC_BLOCK (bb);
+      *qin++ = BASIC_BLOCK (bb);
       BASIC_BLOCK (bb)->aux = BASIC_BLOCK (bb);
     }
+  
+  qin = worklist;
+  qend = &worklist[n_basic_blocks];
+  qlen = n_basic_blocks;
 
   /* Mark blocks which are successors of the entry block so that we
      can easily identify them below.  */
@@ -490,11 +520,15 @@
     e->dest->aux = ENTRY_BLOCK_PTR;
 
   /* Iterate until the worklist is empty.  */
-  while (tos != worklist)
+  while (qlen)
     {
       /* Take the first entry off the worklist.  */
-      basic_block b = *--tos;
+      basic_block b = *qout++;
       bb = b->index;
+      qlen--;
+
+      if (qout >= qend)
+        qout = worklist;
 
       /* If one of the predecessor blocks is the ENTRY block, then the
 	 intersection of avouts is the null set.  We can identify such blocks
@@ -518,12 +552,16 @@
 	for (e = b->succ; e; e = e->succ_next)
 	  if (!e->dest->aux && e->dest != EXIT_BLOCK_PTR)
 	    {
-	      *tos++ = e->dest;
+	      *qin++ = e->dest;
 	      e->dest->aux = e;
+	      qlen++;
+
+	      if (qin >= qend)
+	        qin = worklist;
 	    }
     }
 
-  free (tos);
+  free (worklist);
 }
 
 /* Compute the farthest vector for edge based lcm.  */


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]