This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[tree-ssa] Speedup count_or_remove_death_notes


I was investigating why a pending change of mine was having a negative
compile-time performance impact and stumbled across some lameness in
count_or_remove_death_notes.

Basically I was seeing times in the scheduler jump from ~3 seconds to ~7
seconds.  I found this to be rather odd since the patch just recycles PHI
nodes.  Presumably it's some kind of cache effect since recycling PHIs
does certainly change memory characteristics of the compiler.

I poked at things with oprofile for a few minutes without getting anywhere --
eventually I realized that the code in count_or_remove_death notes was
horribly inefficient and I might as well just fix it, then return to the
PHI recycling patch.

Basically count_or_remove_death_notes looks something like this

FOR_EACH_BB
  {
    if (blocks && TEST_BIT (blocks, bb->index))
    [ ... ]
  }


So let's assume we have ~5k blocks and we're in sched2 where we have single
block regions.  Imagine the effect of this code:


      for (rgn = 0; rgn < nr_regions; rgn++)
        {
          int b;
                                                                                
          sbitmap_zero (blocks);
          for (b = RGN_NR_BLOCKS (rgn) - 1; b >= 0; --b)
            SET_BIT (blocks, rgn_bb_table[RGN_BLOCKS (rgn) + b]);
                                                                                
          deaths_in_region[rgn] = count_or_remove_death_notes (blocks, 1);
        }


ie, we end up calling count_or_remove_death_notes once per block and for
each of those calls we have to walk through the entire sbitmap to find
the one and only one bit that is set.  Egad.  Worse yet, we do it twice.
Once before scheduling and once after since we're verifying the number of
registers killed hasn't changed.

This patch changes count_or_remove_death_notes to use 
EXECUTE_IF_SET_IN_SBITMAP which drastically reduces the overhead
for this routine.

Long term we might want to change this routine to accept a bitmap
rather than an sbitmap.  I suspect the bitmap is very sparse, and
thus using a bitmap rather than an sbitmap may result in further
compile time and memory consumption improvements.

Note the extreme lameness only shows up when ENABLE_CHECKING since otherwise
we don't bother to sanity check that the number of registers killed 
before and after scheduling is the same.


	* flow.c (count_or_remove_death_notes_bb): New.  Extracted from
	count_or_remove_death_notes.
	(count_or_remove_death_notes): Use EXECUTE_IF_SET_IN_SBITMAP.
	
Index: flow.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/flow.c,v
retrieving revision 1.534.2.19
diff -c -3 -p -r1.534.2.19 flow.c
*** flow.c	25 Nov 2003 02:09:44 -0000	1.534.2.19
--- flow.c	25 Nov 2003 05:57:02 -0000
*************** static void add_to_mem_set_list (struct 
*** 326,331 ****
--- 326,332 ----
  static int invalidate_mems_from_autoinc (rtx *, void *);
  static void invalidate_mems_from_set (struct propagate_block_info *, rtx);
  static void clear_log_links (sbitmap);
+ static int count_or_remove_death_notes_bb (basic_block, int);
  
  
  void
*************** int
*** 4168,4232 ****
  count_or_remove_death_notes (sbitmap blocks, int kill)
  {
    int count = 0;
    basic_block bb;
  
!   FOR_EACH_BB_REVERSE (bb)
      {
!       rtx insn;
  
!       if (blocks && ! TEST_BIT (blocks, bb->index))
! 	continue;
  
!       for (insn = bb->head;; insn = NEXT_INSN (insn))
  	{
! 	  if (INSN_P (insn))
! 	    {
! 	      rtx *pprev = &REG_NOTES (insn);
! 	      rtx link = *pprev;
  
! 	      while (link)
  		{
! 		  switch (REG_NOTE_KIND (link))
  		    {
! 		    case REG_DEAD:
! 		      if (GET_CODE (XEXP (link, 0)) == REG)
! 			{
! 			  rtx reg = XEXP (link, 0);
! 			  int n;
! 
! 			  if (REGNO (reg) >= FIRST_PSEUDO_REGISTER)
! 			    n = 1;
! 			  else
! 			    n = HARD_REGNO_NREGS (REGNO (reg), GET_MODE (reg));
! 			  count += n;
! 			}
! 		      /* Fall through.  */
! 
! 		    case REG_UNUSED:
! 		      if (kill)
! 			{
! 			  rtx next = XEXP (link, 1);
! 			  free_EXPR_LIST_node (link);
! 			  *pprev = link = next;
! 			  break;
! 			}
! 		      /* Fall through.  */
! 
! 		    default:
! 		      pprev = &XEXP (link, 1);
! 		      link = *pprev;
  		      break;
  		    }
  		}
  	    }
- 
- 	  if (insn == bb->end)
- 	    break;
  	}
      }
  
    return count;
  }
  /* Clear LOG_LINKS fields of insns in a selected blocks or whole chain
     if blocks is NULL.  */
  
--- 4169,4263 ----
  count_or_remove_death_notes (sbitmap blocks, int kill)
  {
    int count = 0;
+   int i;
    basic_block bb;
  
!   
!   /* This used to be a loop over all the blocks with a membership test
!      inside the loop.  That can be amazingly expensive on a large CFG
!      when only a small number of bits are set in BLOCKs (for example,
!      the calls from the scheduler typically have very few bits set).
! 
!      For extra credit, someone should convert BLOCKS to a bitmap rather
!      than an sbitmap.  */
!   if (blocks)
!     {
!       EXECUTE_IF_SET_IN_SBITMAP (blocks, 0, i,
! 	{
! 	  count += count_or_remove_death_notes_bb (BASIC_BLOCK (i), kill);
! 	});
!     }
!   else
      {
!       FOR_EACH_BB (bb)
! 	{
! 	  count += count_or_remove_death_notes_bb (bb, kill);
! 	}
!     }
  
!   return count;
! }
!   
! /* Optionally removes all the REG_DEAD and REG_UNUSED notes from basic
!    block BB.  Returns a count of the number of registers that died.  */
  
! static int
! count_or_remove_death_notes_bb (basic_block bb, int kill)
! {
!   int count = 0;
!   rtx insn;
! 
!   for (insn = bb->head;; insn = NEXT_INSN (insn))
!     {
!       if (INSN_P (insn))
  	{
! 	  rtx *pprev = &REG_NOTES (insn);
! 	  rtx link = *pprev;
  
! 	  while (link)
! 	    {
! 	      switch (REG_NOTE_KIND (link))
  		{
! 		case REG_DEAD:
! 		  if (GET_CODE (XEXP (link, 0)) == REG)
! 		    {
! 		      rtx reg = XEXP (link, 0);
! 		      int n;
! 
! 		      if (REGNO (reg) >= FIRST_PSEUDO_REGISTER)
! 		        n = 1;
! 		      else
! 		        n = HARD_REGNO_NREGS (REGNO (reg), GET_MODE (reg));
! 		      count += n;
! 		    }
! 
! 		  /* Fall through.  */
! 
! 		case REG_UNUSED:
! 		  if (kill)
  		    {
! 		      rtx next = XEXP (link, 1);
! 		      free_EXPR_LIST_node (link);
! 		      *pprev = link = next;
  		      break;
  		    }
+ 		  /* Fall through.  */
+ 
+ 		default:
+ 		  pprev = &XEXP (link, 1);
+ 		  link = *pprev;
+ 		  break;
  		}
  	    }
  	}
+ 
+       if (insn == bb->end)
+ 	break;
      }
  
    return count;
  }
+ 
  /* Clear LOG_LINKS fields of insns in a selected blocks or whole chain
     if blocks is NULL.  */
  







Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]