This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]

avoid code bloat in i386 prologues/epilogues


Hi,
one of causes for code growth in recent development was my change to
use moves instead of pushes in prologues and epilogues.  This cahange,
according to performance testing caused important speedups on both Athlon
and PPC, but also caused code growth by about 6%.

This patch cuts this down to about 2.5% by estimating number of isnsns
executed at each invocations of function, so fast prologues are used
only for functions exiting early - only case where it is profitable.

This idea is little bit dificult to tune, as most code overhead the
prologues had for small functions, so threshold needs some care.
30 was my second try (after 10 where I measured performance degradation)
and seems to work well.

On unrelated note I also do fix -mno-accumulate-outgoing-args to be wroking
again.
Honza

Mon Jul 30 13:09:49 CEST 2001  Jan Hubicka  <jh@suse.cz>

	* function.h (avg_insn_count):
	* predict.c (count_avg_insn_count): New function.
	(estimate_bb_frequencies): Call it.
	* i386.c (FAST_PROLOGUE_INSN_COUNT): New constant.
	(ix86_expand_prologue): Use fast prologue only for fast functions.
	(ix86_expand_epilogue): Likewise.
	* i386.h (no-accumulate-outgoing-args): Use proper mask.

Index: function.h
===================================================================
RCS file: /cvs/gcc/egcs/gcc/function.h,v
retrieving revision 1.63
diff -c -3 -p -r1.63 function.h
*** function.h	2001/06/21 16:50:55	1.63
--- function.h	2001/07/30 09:52:29
*************** struct function
*** 477,482 ****
--- 477,486 ----
  
    /* Nonzero if the current function needs an lsda for exception handling.  */
    unsigned int uses_eh_lsda : 1;
+ 
+   /* Average number of instructions executed at each invocation of this
+      function.  */
+   unsigned int avg_insn_count;
  };
  
  /* The function currently being compiled.  */
Index: predict.c
===================================================================
RCS file: /cvs/gcc/egcs/gcc/predict.c,v
retrieving revision 1.27
diff -c -3 -p -r1.27 predict.c
*** predict.c	2001/07/10 03:50:25	1.27
--- predict.c	2001/07/30 09:52:46
*************** static void estimate_loops_at_level	 PAR
*** 64,69 ****
--- 64,70 ----
  static void propagate_freq		 PARAMS ((basic_block));
  static void estimate_bb_frequencies	 PARAMS ((struct loops *));
  static void counts_to_freqs		 PARAMS ((void));
+ static void count_avg_insns		 PARAMS ((void));
  
  /* Information we hold about each branch predictor.
     Filled using information from predict.def.  */
*************** counts_to_freqs ()
*** 718,724 ****
--- 754,789 ----
  		       / count_max);
      }
  }
+ static void
+ count_avg_insns()
+ {
+   unsigned int sum = 0;
+   int i;
  
+   /* Frequencies are out of range.  We can't say more.  */
+   if (ENTRY_BLOCK_PTR->frequency == 0)
+     {
+       cfun->avg_insn_count = BB_FREQ_MAX;
+       return;
+     }
+     
+   for (i = 0; i < n_basic_blocks; i++)
+     {
+       basic_block bb = BASIC_BLOCK (i);
+       rtx insn;
+ 
+       for (insn = bb->head; insn != NEXT_INSN (bb->end);
+ 	   insn = NEXT_INSN (insn))
+ 	{
+ 	  if (INSN_P (insn))
+ 	    sum += bb->frequency;
+ 	}
+     }
+   cfun->avg_insn_count = sum / ENTRY_BLOCK_PTR->frequency;
+   if (rtl_dump_file)
+     fprintf (rtl_dump_file, "avg_insn_count :%i\n", cfun->avg_insn_count);
+ }
+ 
  /* Estimate basic blocks frequency by given branch probabilities.  */
  static void
  estimate_bb_frequencies (loops)
*************** estimate_bb_frequencies (loops)
*** 733,738 ****
--- 798,804 ----
    if (flag_branch_probabilities)
      {
        counts_to_freqs ();
+       count_avg_insns ();
        return;
      }
  
*************** estimate_bb_frequencies (loops)
*** 830,835 ****
--- 896,903 ----
        bb->frequency = (BLOCK_INFO (bb)->frequency * BB_FREQ_MAX / freq_max
  		       + 0.5);
      }
+ 
+   count_avg_insns ();
  
    free (ei);
    free (bi);
Index: config/i386/i386.c
===================================================================
RCS file: /cvs/gcc/egcs/gcc/config/i386/i386.c,v
retrieving revision 1.288
diff -c -3 -p -r1.288 i386.c
*** i386.c	2001/07/22 21:42:35	1.288
--- i386.c	2001/07/30 09:54:04
*************** Boston, MA 02111-1307, USA.  */
*** 43,48 ****
--- 43,53 ----
  #include "target.h"
  #include "target-def.h"
  
+ /* In case the avreage insn count for single function invocation is
+    lower than this constant, emit fast (but longer) prologue and
+    epilogue code.  */
+ #define FAST_PROLOGUE_INSN_COUNT 30
+ 
  #ifndef CHECK_STACK_LIMIT
  #define CHECK_STACK_LIMIT -1
  #endif
*************** ix86_expand_prologue ()
*** 2627,2633 ****
  				  || current_function_uses_const_pool)
  		      && !TARGET_64BIT);
    struct ix86_frame frame;
!   int use_mov = (TARGET_PROLOGUE_USING_MOVE && !optimize_size);
    HOST_WIDE_INT allocate;
  
    ix86_compute_frame_layout (&frame);
--- 2632,2639 ----
  				  || current_function_uses_const_pool)
  		      && !TARGET_64BIT);
    struct ix86_frame frame;
!   int use_mov = (TARGET_PROLOGUE_USING_MOVE && !optimize_size
! 		 && cfun->avg_insn_count < FAST_PROLOGUE_INSN_COUNT);
    HOST_WIDE_INT allocate;
  
    ix86_compute_frame_layout (&frame);
*************** ix86_expand_epilogue (style)
*** 2768,2776 ****
--- 2774,2784 ----
       tuning in future.  */
    if ((!sp_valid && frame.nregs <= 1)
        || (TARGET_EPILOGUE_USING_MOVE && !optimize_size
+ 	  && cfun->avg_insn_count < FAST_PROLOGUE_INSN_COUNT
  	  && (frame.nregs > 1 || frame.to_allocate))
        || (frame_pointer_needed && !frame.nregs && frame.to_allocate)
        || (frame_pointer_needed && TARGET_USE_LEAVE && !optimize_size
+ 	  && cfun->avg_insn_count < FAST_PROLOGUE_INSN_COUNT
  	  && frame.nregs == 1)
        || style == 2)
      {
*************** ix86_expand_epilogue (style)
*** 2818,2824 ****
  		    GEN_INT (frame.to_allocate
  			     + frame.nregs * UNITS_PER_WORD)));
        /* If not an i386, mov & pop is faster than "leave".  */
!       else if (TARGET_USE_LEAVE || optimize_size)
  	emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
        else
  	{
--- 2826,2833 ----
  		    GEN_INT (frame.to_allocate
  			     + frame.nregs * UNITS_PER_WORD)));
        /* If not an i386, mov & pop is faster than "leave".  */
!       else if (TARGET_USE_LEAVE || optimize_size
! 	       || cfun->avg_insn_count > FAST_PROLOGUE_INSN_COUNT)
  	emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
        else
  	{
Index: config/i386/i386.h
===================================================================
RCS file: /cvs/gcc/egcs/gcc/config/i386/i386.h,v
retrieving revision 1.195
diff -c -3 -p -r1.195 i386.h
*** i386.h	2001/07/15 16:59:06	1.195
--- i386.h	2001/07/30 09:54:05
*************** extern const int x86_epilogue_using_move
*** 330,336 ****
      N_("Do not use push instructions to save outgoing arguments") },	      \
    { "accumulate-outgoing-args",	MASK_ACCUMULATE_OUTGOING_ARGS,		      \
      N_("Use push instructions to save outgoing arguments") },		      \
!   { "no-accumulate-outgoing-args",-MASK_ACCUMULATE_OUTGOING_ARGS,	      \
      N_("Do not use push instructions to save outgoing arguments") },	      \
    { "mmx",			 MASK_MMX, N_("Support MMX builtins") },      \
    { "no-mmx",			-MASK_MMX,				      \
--- 330,336 ----
      N_("Do not use push instructions to save outgoing arguments") },	      \
    { "accumulate-outgoing-args",	MASK_ACCUMULATE_OUTGOING_ARGS,		      \
      N_("Use push instructions to save outgoing arguments") },		      \
!   { "no-accumulate-outgoing-args",MASK_NO_ACCUMULATE_OUTGOING_ARGS,	      \
      N_("Do not use push instructions to save outgoing arguments") },	      \
    { "mmx",			 MASK_MMX, N_("Support MMX builtins") },      \
    { "no-mmx",			-MASK_MMX,				      \


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]