This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[PATCH] PR optimization/12158


Hi,

This PR features a testcase for which calculate_global_regs_live from flow.c 
doesn't terminate (endless oscillation) at -O on HP-PA on the 3.3 branch.  
See http://gcc.gnu.org/ml/gcc/2003-12/msg00698.html for a description.

While being relatively rare, this is a well-known problem, reported here:
   http://gcc.gnu.org/ml/gcc/2001-02/msg00476.html
and worked around here:
   http://gcc.gnu.org/ml/gcc-patches/2002-05/msg02253.html

Zdenek posted a patch to fix it in January:
   http://gcc.gnu.org/ml/gcc-patches/2003-01/msg02059.html
which was not reviewed because of lack of testcase.

The testcase for PR opt/12158 is attached: when run on it in the 
aforementioned circumstances, the compiler never stops.  If Zdenek's patch 
is applied (it applies cleanly on the 3.3 branch), the compiler does stop.

I think it should be applied on mainline (although the testcase doesn't 
trigger an endless loop there) because:
- it fixes a real problem,
- it makes it possible to remove Richard's workaround, thus speeding up a bit 
the compiler (Zdenek mesured a 1-2% speed-up back in January),
- it was bootstrapped/regtested on i586-redhat-linux-gnu (mainline except 
Ada) without a hitch today.

I've attached a slightly modified version (I revamped a little the big 
comment and reformatted the code in a few places).

But I think it is probably too risky for the 3.3 branch, so the PR should be 
closed as WONTFIX.


2003-12-22  Zdenek Dvorak  <rakdver@atrey.karlin.mff.cuni.cz>

	* flow.c (MAX_LIVENESS_ROUNDS): New constant.
	(update_life_info_in_dirty_blocks): Remove 2002-05-28 workaround.
	(calculate_global_regs_live): Make sure the loop will terminate
	when the initial sets are not empty.


-- 
Eric Botcazou
Index: flow.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/flow.c,v
retrieving revision 1.572
diff -u -p -r1.572 flow.c
--- flow.c	20 Dec 2003 02:39:43 -0000	1.572
+++ flow.c	22 Dec 2003 13:20:18 -0000
@@ -164,6 +164,10 @@ Software Foundation, 59 Temple Place - S
 #endif
 #endif
 
+/* This should depend on the maximal loop depth in the function,
+   but this value should be high enough for any real cases.  */
+#define MAX_LIVENESS_ROUNDS 20
+
 /* Nonzero if the second flow pass has completed.  */
 int flow2_completed;
 
@@ -754,21 +758,10 @@ update_life_info_in_dirty_blocks (enum u
   sbitmap_zero (update_life_blocks);
   FOR_EACH_BB (bb)
     {
-      if (extent == UPDATE_LIFE_LOCAL)
-	{
-	  if (bb->flags & BB_DIRTY)
-	    {
-	      SET_BIT (update_life_blocks, bb->index);
-	      n++;
-	    }
-	}
-      else
+      if (bb->flags & BB_DIRTY)
 	{
-	  /* ??? Bootstrap with -march=pentium4 fails to terminate
-	     with only a partial life update.  */
 	  SET_BIT (update_life_blocks, bb->index);
-	  if (bb->flags & BB_DIRTY)
-	    n++;
+	  n++;
 	}
     }
 
@@ -1036,10 +1029,14 @@ mark_regs_live_at_end (regset set)
 static void
 calculate_global_regs_live (sbitmap blocks_in, sbitmap blocks_out, int flags)
 {
-  basic_block *queue, *qhead, *qtail, *qend, bb;
+  basic_block *queue, *qhead, *qtail, *qend, bb, pbb;
   regset tmp, new_live_at_end, invalidated_by_call;
   regset_head tmp_head, invalidated_by_call_head;
   regset_head new_live_at_end_head;
+  regset made_dead;
+  regset_head made_dead_head;
+  bool failure_strategy_required = false;
+  int *block_accesses;
   int i;
 
   /* Some passes used to forget clear aux field of basic block causing
@@ -1053,6 +1050,7 @@ calculate_global_regs_live (sbitmap bloc
   tmp = INITIALIZE_REG_SET (tmp_head);
   new_live_at_end = INITIALIZE_REG_SET (new_live_at_end_head);
   invalidated_by_call = INITIALIZE_REG_SET (invalidated_by_call_head);
+  made_dead = INITIALIZE_REG_SET (made_dead_head);
 
   /* Inconveniently, this is only readily available in hard reg set form.  */
   for (i = 0; i < FIRST_PSEUDO_REGISTER; ++i)
@@ -1087,6 +1085,8 @@ calculate_global_regs_live (sbitmap bloc
 	}
     }
 
+  block_accesses = xcalloc (last_basic_block, sizeof (int));
+  
   /* We clean aux when we remove the initially-enqueued bbs, but we
      don't enqueue ENTRY and EXIT initially, so clean them upfront and
      unconditionally.  */
@@ -1112,7 +1112,38 @@ calculate_global_regs_live (sbitmap bloc
      from GLOBAL_LIVE_AT_START.  In the former case, the register
      could go away only if it disappeared from GLOBAL_LIVE_AT_START
      for one of the successor blocks.  By induction, that cannot
-     occur.  */
+     occur.
+
+     ??? This reasoning doesn't work if we start from non-empty initial
+     GLOBAL_LIVE_AT_START sets.  And there are actually two problems:
+       1) Updating may not terminate (endless oscillation).
+       2) Even if it does (and it usually does), the resulting information
+	  may be inaccurate.  Consider for example the following case:
+
+	  a = ...;
+	  while (...) {...}  -- 'a' not mentioned at all
+	  ... = a;
+
+	  If the use of 'a' is deleted between two calculations of liveness
+	  information and the initial sets are not cleared, the information
+	  about a's liveness will get stuck inside the loop and the set will
+	  appear not to be dead.
+
+     We do not attempt to solve 2) -- the information is conservatively
+     correct (i.e. we never claim that something live is dead) and the
+     amount of optimization opportunities missed due to this problem is
+     not significant.
+
+     1) is more serious.  In order to fix it, we monitor the number of times
+     each block is processed.  Once one of the blocks has been processed more
+     than MAX_LIVENESS_ROUNDS times, we use the following strategy: when a
+     register disappears from one of the sets, we add it to the MADE_DEAD
+     set, remove all registers in this set from all GLOBAL_LIVE_AT_* sets and
+     add the blocks with changed sets into the queue.  Thus we are guaranteed
+     to terminate (the worst case corresponds to all registers in MADE_DEAD,
+     in which case the original reasoning above is valid), but in general we
+     only fix up a few offending registers.  */
+
   while (qhead != qtail)
     {
       int rescan, changed;
@@ -1124,6 +1155,14 @@ calculate_global_regs_live (sbitmap bloc
 	qhead = queue;
       bb->aux = NULL;
 
+      /* Should we start using the failure strategy?  */
+      if (bb != ENTRY_BLOCK_PTR)
+	{
+	  block_accesses[bb->index]++;
+	  if (block_accesses[bb->index] > MAX_LIVENESS_ROUNDS)
+	    failure_strategy_required = true;
+	}
+
       /* Begin by propagating live_at_start from the successor blocks.  */
       CLEAR_REG_SET (new_live_at_end);
 
@@ -1279,6 +1318,63 @@ calculate_global_regs_live (sbitmap bloc
 	  if (REG_SET_EQUAL_P (bb->global_live_at_start, new_live_at_end))
 	    continue;
 
+	  if (failure_strategy_required)
+	    {
+	      /* Get the list of registers that were removed from the
+		 bb->global_live_at_start set.  */
+	      bitmap_clear (tmp);
+	      if (bitmap_operation (tmp, bb->global_live_at_start,
+				    new_live_at_end, BITMAP_AND_COMPL))
+		{
+		  /* It should not happen that one of the registers we removed
+		     last time disappears again before any other register.  */
+		  if (! bitmap_operation (made_dead, made_dead,
+		  			  tmp, BITMAP_IOR))
+		    abort ();
+
+		  /* Now remove the registers from all sets.  */
+		  FOR_EACH_BB (pbb)
+		    {
+		      int pchanged = 0;
+
+		      pchanged |= bitmap_operation (pbb->global_live_at_start,
+						    pbb->global_live_at_start,
+						    made_dead,
+						    BITMAP_AND_COMPL);
+
+		      pchanged |= bitmap_operation (pbb->global_live_at_end,
+						    pbb->global_live_at_end,
+						    made_dead,
+						    BITMAP_AND_COMPL);
+
+		      if (! pchanged)
+			continue;
+
+		      /* Note the (possible) change.  */
+		      if (blocks_out)
+			SET_BIT (blocks_out, pbb->index);
+
+		      /* Makes sure to really rescan the block.  */
+		      if (pbb->local_set)
+			{
+			  FREE_REG_SET (pbb->local_set);
+			  FREE_REG_SET (pbb->cond_local_set);
+			  pbb->local_set = NULL;
+			}
+
+		      /* Add it to the queue.  */
+		      if (pbb->aux == NULL)
+			{
+			  *qtail++ = pbb;
+			  if (qtail == qend)
+			    qtail = queue;
+			  pbb->aux = pbb;
+			}
+		    }
+		  continue;
+		}
+	    } /* end of failure_strategy_required */
+
 	  COPY_REG_SET (bb->global_live_at_start, new_live_at_end);
 	}
 
@@ -1300,6 +1396,7 @@ calculate_global_regs_live (sbitmap bloc
   FREE_REG_SET (tmp);
   FREE_REG_SET (new_live_at_end);
   FREE_REG_SET (invalidated_by_call);
+  FREE_REG_SET (made_dead);
 
   if (blocks_out)
     {
@@ -1319,6 +1416,7 @@ calculate_global_regs_live (sbitmap bloc
 	}
     }
 
+  free (block_accesses);
   free (queue);
 }
 
int lam_errfunc (int, int, int);
void lam_mkpt (int);
void MPI_Send (int, int, int, int, int, int);

int bcast_lin (int buff, int count, int datatype, int comm, int err,
	       int longproto, int nreqs, int nacks, int done)
{
  int i;
  struct { int MPI_SOURCE; } stats[16];
  if (longproto)
      return lam_errfunc (comm, 15, err);
  
  while (nacks < nreqs)
    {
      if (err != 0)
	{
	  lam_mkpt (comm);
	  return lam_errfunc (comm, 15, err);
	}
      for (i = 0; i < done; i++)
	{
	  MPI_Send (buff, count, datatype, stats[i].MPI_SOURCE, 15, comm);
	  if (err)
	    {
	      lam_mkpt (comm);
	      return lam_errfunc (comm, 15, err);
	    }
        }
    }
}

Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]