This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]

More curefull aligning of branches



Hi,
this patch implements an algorithm to set branch alignments more curefully than we
were doing by now.  It aligns only loops executed at least 5 times and only
the hot spot loops.  Additionally it takes care to avoid jump alignments in
jumps that are rarely executed or where the predecesor is likely to be in
the cache anyway.

I've checked that the patch helps in small programms fitting cache, does not
hurt on gcc and reduces code size.  Originally we used about 9% of cc1 image
size for alignmetns, now we do use 3% saving roughtly 137KB (7% of text segment
size)

Honza

Thu Jul 26 16:15:27 CEST 2001  Jan Hubicka  <jh@suse.cz>
	* final.c (compute_alignments): New function.
	(shorted_branches): Do not initialize label_align array; do
	not call init_insn_lengths; do not compute alignments for branches;
	always ensure that the label_align is initialized before
	reading value.
	* output.h (compute_alignments): Declare.
	* toplev.c (rest_of_compilation): Call compute_alignments.

*** final.c.nop	Thu Jul 26 03:03:10 2001
--- final.c	Thu Jul 26 16:14:49 2001
*************** insn_current_reference_address (branch)
*** 930,935 ****
--- 930,1014 ----
  }
  #endif /* HAVE_ATTR_length */
  
+ void
+ compute_alignments ()
+ {
+   int i;
+   int log, max_skip, max_log;
+ 
+ 
+   /* We must do some computations even when not actually shortening, in
+      order to get the alignment information for the labels.  */
+ 
+   init_insn_lengths ();
+ 
+   max_labelno = max_label_num ();
+   min_labelno = get_first_label_num ();
+   label_align = (struct label_alignment *) xcalloc ((max_labelno - min_labelno + 1),
+ 	     sizeof (struct label_alignment));
+ 
+   /* If not optimizing or optimizing for size, don't assign any alignments.  */
+   for (i = 0; i < n_basic_blocks; i++)
+     {
+       basic_block bb = BASIC_BLOCK (i);
+       rtx label = bb->head;
+       int fallthru_frequency = 0, branch_frequency = 0, has_fallthru = 0;
+       edge e;
+ 
+       if (GET_CODE (label) != CODE_LABEL)
+ 	continue;
+       max_log = LABEL_ALIGN (insn);
+       max_skip = LABEL_ALIGN_MAX_SKIP;
+ 
+       for (e = bb->pred; e; e = e->pred_next)
+ 	{
+ 	  if (e->flags & EDGE_FALLTHRU)
+ 	    has_fallthru = 1, fallthru_frequency += EDGE_FREQUENCY (e);
+ 	  else
+ 	    branch_frequency += EDGE_FREQUENCY (e);
+ 	}
+ 
+       /* There are two purposes to align block with no fallthru incomming edge:
+ 	 1) to avoid fetch stalls when branch destination is near cache boundary
+ 	 2) to improve cache effciency in case the previous block is not executed
+ 	    (so it does not need to be in the cache).
+ 
+ 	 We to catch first case, we align frequently executed blocks.
+ 	 To catch the second, we align blocks that are executed more frequently
+ 	 than the predecesor and the predecesor is likely to not be executed
+ 	 when function is called.  */
+ 
+       if (!has_fallthru
+ 	  && (branch_frequency > BB_FREQ_MAX / 10
+ 	      || (bb->frequency > BASIC_BLOCK (i - 1)->frequency * 10
+ 		  && (BASIC_BLOCK (i - 1)->frequency
+ 		      <= ENTRY_BLOCK_PTR->frequency / 2))))
+ 	{
+ 	  log = LABEL_ALIGN_AFTER_BARRIER (insn);
+ 	  if (max_log < log)
+ 	    {
+ 	      max_log = log;
+ 	      max_skip = LABEL_ALIGN_AFTER_BARRIER_MAX_SKIP;
+ 	    }
+ 	}
+       /* In case block is frequent and reached mostly by non-fallthru edge,
+ 	 align it.  It is most likely an first block of loop.  */
+       if (has_fallthru
+ 	  && branch_frequency + fallthru_frequency > BB_FREQ_MAX / 10
+ 	  && branch_frequency > fallthru_frequency * 5)
+ 	{
+ 	  log = LOOP_ALIGN (label);
+ 	  if (max_log < log)
+ 	    {
+ 	      max_log = log;
+ 	      max_skip = LOOP_ALIGN_MAX_SKIP;
+ 	    }
+ 	}
+       LABEL_TO_ALIGNMENT (label) = max_log;
+       LABEL_TO_MAX_SKIP (label) = max_skip;
+     }
+ }
+ 
  /* Make a pass over all insns and compute their actual lengths by shortening
     any branches of variable length if possible.  */
  
*************** shorten_branches (first)
*** 967,985 ****
  
  #endif
  
-   /* We must do some computations even when not actually shortening, in
-      order to get the alignment information for the labels.  */
- 
-   init_insn_lengths ();
- 
    /* Compute maximum UID and allocate label_align / uid_shuid.  */
    max_uid = get_max_uid ();
  
-   max_labelno = max_label_num ();
-   min_labelno = get_first_label_num ();
-   label_align = (struct label_alignment *)
-     xcalloc ((max_labelno - min_labelno + 1), sizeof (struct label_alignment));
- 
    uid_shuid = (int *) xmalloc (max_uid * sizeof *uid_shuid);
  
    /* Initialize label_align and set up uid_shuid to be strictly
--- 1046,1054 ----
*************** shorten_branches (first)
*** 996,1019 ****
        int log;
  
        INSN_SHUID (insn) = i++;
!       if (INSN_P (insn))
! 	{
! 	  /* reorg might make the first insn of a loop being run once only,
!              and delete the label in front of it.  Then we want to apply
!              the loop alignment to the new label created by reorg, which
!              is separated by the former loop start insn from the
! 	     NOTE_INSN_LOOP_BEG.  */
! 	}
!       else if (GET_CODE (insn) == CODE_LABEL)
  	{
  	  rtx next;
  
! 	  log = LABEL_ALIGN (insn);
! 	  if (max_log < log)
! 	    {
! 	      max_log = log;
! 	      max_skip = LABEL_ALIGN_MAX_SKIP;
! 	    }
  	  next = NEXT_INSN (insn);
  	  /* ADDR_VECs only take room if read-only data goes into the text
  	     section.  */
--- 1065,1076 ----
        int log;
  
        INSN_SHUID (insn) = i++;
!       if (GET_CODE (insn) == CODE_LABEL)
  	{
  	  rtx next;
  
! 	  if (CODE_LABEL_NUMBER (insn) > max_labelno)
! 	    continue;
  	  next = NEXT_INSN (insn);
  	  /* ADDR_VECs only take room if read-only data goes into the text
  	     section.  */
*************** shorten_branches (first)
*** 1028,1098 ****
  		if (GET_CODE (nextbody) == ADDR_VEC
  		    || GET_CODE (nextbody) == ADDR_DIFF_VEC)
  		  {
  		    log = ADDR_VEC_ALIGN (next);
  		    if (max_log < log)
  		      {
  			max_log = log;
  			max_skip = LABEL_ALIGN_MAX_SKIP;
  		      }
  		  }
  	      }
- 	  LABEL_TO_ALIGNMENT (insn) = max_log;
- 	  LABEL_TO_MAX_SKIP (insn) = max_skip;
- 	  max_log = 0;
- 	  max_skip = 0;
- 	}
-       else if (GET_CODE (insn) == BARRIER)
- 	{
- 	  rtx label;
- 
- 	  for (label = insn; label && ! INSN_P (label);
- 	       label = NEXT_INSN (label))
- 	    if (GET_CODE (label) == CODE_LABEL)
- 	      {
- 		log = LABEL_ALIGN_AFTER_BARRIER (insn);
- 		if (max_log < log)
- 		  {
- 		    max_log = log;
- 		    max_skip = LABEL_ALIGN_AFTER_BARRIER_MAX_SKIP;
- 		  }
- 		break;
- 	      }
  	}
-       /* Again, we allow NOTE_INSN_LOOP_BEG - INSN - CODE_LABEL
- 	 sequences in order to handle reorg output efficiently.  */
-       else if (GET_CODE (insn) == NOTE
- 	       && NOTE_LINE_NUMBER (insn) == NOTE_INSN_LOOP_BEG)
- 	{
- 	  rtx label;
- 	  int nest = 0;
- 
- 	  /* Search for the label that starts the loop.
- 	     Don't skip past the end of the loop, since that could
- 	     lead to putting an alignment where it does not belong.
- 	     However, a label after a nested (non-)loop would be OK.  */
- 	  for (label = insn; label; label = NEXT_INSN (label))
- 	    {
- 	      if (GET_CODE (label) == NOTE
- 		  && NOTE_LINE_NUMBER (label) == NOTE_INSN_LOOP_BEG)
- 		nest++;
- 	      else if (GET_CODE (label) == NOTE
- 		       && NOTE_LINE_NUMBER (label) == NOTE_INSN_LOOP_END
- 		       && --nest == 0)
- 		break;
- 	      else if (GET_CODE (label) == CODE_LABEL)
- 		{
- 		  log = LOOP_ALIGN (label);
- 		  if (max_log < log)
- 		    {
- 		      max_log = log;
- 		      max_skip = LOOP_ALIGN_MAX_SKIP;
- 		    }
- 		  break;
- 		}
- 	    }
- 	}
-       else
- 	continue;
      }
  #ifdef HAVE_ATTR_length
  
--- 1085,1104 ----
  		if (GET_CODE (nextbody) == ADDR_VEC
  		    || GET_CODE (nextbody) == ADDR_DIFF_VEC)
  		  {
+ 		    max_log = LABEL_ALIGN (insn);
+ 		    max_skip = LABEL_ALIGN_MAX_SKIP;
+ 
  		    log = ADDR_VEC_ALIGN (next);
  		    if (max_log < log)
  		      {
  			max_log = log;
  			max_skip = LABEL_ALIGN_MAX_SKIP;
  		      }
+ 		    LABEL_TO_ALIGNMENT (insn) = max_log;
+ 		    LABEL_TO_MAX_SKIP (insn) = max_skip;
  		  }
  	      }
  	}
      }
  #ifdef HAVE_ATTR_length
  
*************** shorten_branches (first)
*** 1119,1125 ****
      {
        int uid = INSN_UID (seq);
        int log;
!       log = (GET_CODE (seq) == CODE_LABEL ? LABEL_TO_ALIGNMENT (seq) : 0);
        uid_align[uid] = align_tab[0];
        if (log)
  	{
--- 1125,1133 ----
      {
        int uid = INSN_UID (seq);
        int log;
!       log = (GET_CODE (seq) == CODE_LABEL
! 	     && CODE_LABEL_NUMBER (seq) <= max_labelno
! 	     ? LABEL_TO_ALIGNMENT (seq) : 0);
        uid_align[uid] = align_tab[0];
        if (log)
  	{
*************** shorten_branches (first)
*** 1168,1174 ****
  		  max = shuid;
  		  max_lab = lab;
  		}
! 	      if (min_align > LABEL_TO_ALIGNMENT (lab))
  		min_align = LABEL_TO_ALIGNMENT (lab);
  	    }
  	  XEXP (pat, 2) = gen_rtx_LABEL_REF (VOIDmode, min_lab);
--- 1176,1184 ----
  		  max = shuid;
  		  max_lab = lab;
  		}
! 	      if (CODE_LABEL_NUMBER (lab) > max_labelno
! 		min_align = 0;
! 	      else if (min_align > LABEL_TO_ALIGNMENT (lab))
  		min_align = LABEL_TO_ALIGNMENT (lab);
  	    }
  	  XEXP (pat, 2) = gen_rtx_LABEL_REF (VOIDmode, min_lab);
*************** shorten_branches (first)
*** 1195,1201 ****
  
        insn_lengths[uid] = 0;
  
!       if (GET_CODE (insn) == CODE_LABEL)
  	{
  	  int log = LABEL_TO_ALIGNMENT (insn);
  	  if (log)
--- 1205,1212 ----
  
        insn_lengths[uid] = 0;
  
!       if (GET_CODE (insn) == CODE_LABEL
! 	  && CODE_LABEL_NUMBER (insn) <= max_labelno)
  	{
  	  int log = LABEL_TO_ALIGNMENT (insn);
  	  if (log)
*************** shorten_branches (first)
*** 1304,1310 ****
  
  	  uid = INSN_UID (insn);
  
! 	  if (GET_CODE (insn) == CODE_LABEL)
  	    {
  	      int log = LABEL_TO_ALIGNMENT (insn);
  	      if (log > insn_current_align)
--- 1315,1322 ----
  
  	  uid = INSN_UID (insn);
  
! 	  if (GET_CODE (insn) == CODE_LABEL
! 	      && CODE_LABEL_NUMBER (insn) <= max_labelno)
  	    {
  	      int log = LABEL_TO_ALIGNMENT (insn);
  	      if (log > insn_current_align)
*************** shorten_branches (first)
*** 1352,1358 ****
  		   prev = PREV_INSN (prev))
  		if (varying_length[INSN_UID (prev)] & 2)
  		  {
! 		    rel_align = LABEL_TO_ALIGNMENT (prev);
  		    break;
  		  }
  
--- 1364,1373 ----
  		   prev = PREV_INSN (prev))
  		if (varying_length[INSN_UID (prev)] & 2)
  		  {
! 		    if (CODE_LABEL_NUMBER (prev) <= max_labelno)
! 		      rel_align = LABEL_TO_ALIGNMENT (prev);
! 		    else
! 		      rel_align = 0;
  		    break;
  		  }
  
*** output.h.nop	Thu Jul 26 16:13:30 2001
--- output.h	Thu Jul 26 16:13:33 2001
*************** along with GNU CC; see the file COPYING.
*** 20,25 ****
--- 20,28 ----
  the Free Software Foundation, 59 Temple Place - Suite 330,
  Boston, MA 02111-1307, USA.  */
  
+ /* Compute branch alignments based on frequency information in the CFG.  */
+ extern void compute_alignments  PARAMS ((void))
+ 
  /* Initialize data in final at the beginning of a compilation.  */
  extern void init_final		PARAMS ((const char *));
  
*** toplev.c.nop	Thu Jul 26 02:44:34 2001
--- toplev.c	Thu Jul 26 15:10:45 2001
*************** rest_of_compilation (decl)
*** 3594,3602 ****
--- 3594,3607 ----
  	  cleanup_cfg (CLEANUP_EXPENSIVE | CLEANUP_POST_REGSTACK);
  	}
  
+       /* Output final version of CFG.  */
+       if (rtl_dump_file)
+         dump_flow_info (rtl_dump_file);
+ 
        close_dump_file (DFI_bbro, print_rtl_with_bb, insns);
        timevar_pop (TV_REORDER_BLOCKS);
      }
+   compute_alignments ();
  
    /* If a machine dependent reorganization is needed, call it.  */
  #ifdef MACHINE_DEPENDENT_REORG


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]