This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]

Re: code growth


> On Thu, Aug 23, 2001 at 03:48:59PM +0200, Jan Hubicka wrote:
> > > I think this should be given some arbitrary upper bound (100?)
> > > that just means "large".
> > Actually it does, see the code - when I can't represent the frequencies,
> > I just fall into BLOCK_FREQ_BASE.
> 
> Not in the code I'm looking at.  There, you iterate over every
> single instruction in the function.  
> 
> > Note that I am summing normarized frequencies, that are all BLOCK_FREQ_BASE
> > limited, this means that I count maximally 10000 for each instructions that
> > should not be problem for overflows.
> 
> Which limits you to a potential maximum of 400k instructions before
> overflow occurs.  You think functions are always smaller than that?
> 
> Besides: what use is it to iterate over all those thousands
> of instructions just to say "yes, this function is large"?
> 
> I think I'd really prefer this be a boolean flag than any sort
> of absolute count.
> 
> Also, I just noticed that you're not counting real instructions,
> but anything in the insn stream.  Which means you're counting 
> notes, which means that you'll get different answers depending
> on whether debugging is enabled.  Use active_insn_p.

Hi,
here is updated patch - I've fixed the overflow (I've made overestimate
by two orders when I was guessing that I am safe from overflows) and
replaced INSN_P by active_insn_p (as I see I don't want to count USE
and CLOBBER instructions).

I am still not sure about boolean flag. I don't see way to compute it
indepdendetly in predict.c w/o target macro specifying threshold, that looks
uglier than current choice (as the value has pretty well understnadable
meaning).

I also believe that later we may find usage for the value - such as
for function inlining (we want to inline fast function with low
average count).

Other way I see is to move the loop directly into i386.c, so I will
compute it in epilogue expander, but IMO it makes somewhat strong
assumption that the CFG is correct and frequencies up to date at
time epilogue is emitted.  This should hold now, so I leave it at
your choice what looks as better alternative.

Honza

Bootstrapping/regtesting i686

Mon Jul 30 13:09:49 CEST 2001  Jan Hubicka  <jh@suse.cz>

	* function.h (avg_insn_count):
	* predict.c (count_avg_insn_count): New function.
	(estimate_bb_frequencies): Call it.
	* i386.c (FAST_PROLOGUE_INSN_COUNT): New constant.
	(ix86_expand_prologue): Use fast prologue only for fast functions.
	(ix86_expand_epilogue): Likewise.
	* i386.h (no-accumulate-outgoing-args): Use proper mask.

Index: function.h
===================================================================
RCS file: /cvs/gcc/egcs/gcc/function.h,v
retrieving revision 1.64
diff -c -3 -p -r1.64 function.h
*** function.h	2001/08/22 14:35:11	1.64
--- function.h	2001/08/24 13:25:53
*************** struct function
*** 477,482 ****
--- 477,486 ----
  
    /* Nonzero if the current function needs an lsda for exception handling.  */
    unsigned int uses_eh_lsda : 1;
+ 
+   /* Average number of instructions executed at each invocation of this
+      function.  BB_FREQ_MAX in case function is too large for estimating.  */
+   unsigned int avg_insn_count;
  };
  
  /* The function currently being compiled.  */
Index: predict.c
===================================================================
RCS file: /cvs/gcc/egcs/gcc/predict.c,v
retrieving revision 1.38
diff -c -3 -p -r1.38 predict.c
*** predict.c	2001/08/22 14:35:31	1.38
--- predict.c	2001/08/24 13:25:53
*************** static void estimate_loops_at_level	 PAR
*** 64,69 ****
--- 64,70 ----
  static void propagate_freq		 PARAMS ((basic_block));
  static void estimate_bb_frequencies	 PARAMS ((struct loops *));
  static void counts_to_freqs		 PARAMS ((void));
+ static void count_avg_insns		 PARAMS ((void));
  
  /* Information we hold about each branch predictor.
     Filled using information from predict.def.  */
*************** counts_to_freqs ()
*** 781,787 ****
--- 782,825 ----
  		       / count_max);
      }
  }
+ static void
+ count_avg_insns()
+ {
+   unsigned int sum = 0;
+   int i;
+   int limit;
+ 
+   /* Frequencies are out of range.  We can't say more.  */
+   if (ENTRY_BLOCK_PTR->frequency == 0)
+     {
+       cfun->avg_insn_count = BB_FREQ_MAX;
+       return;
+     }
+     
+   /* Maximally BB_FREQ_MAX^2 so overflow won't happen.  */
+   limit = ENTRY_BLOCK_PTR->frequency * BB_FREQ_MAX;
+   for (i = 0; i < n_basic_blocks; i++)
+     {
+       basic_block bb = BASIC_BLOCK (i);
+       rtx insn;
  
+       for (insn = bb->head; insn != NEXT_INSN (bb->end) && sum < limit;
+ 	   insn = NEXT_INSN (insn))
+ 	{
+ 	  if (active_insn_p (insn))
+ 	    {
+ 	      sum += bb->frequency;
+ 	    }
+ 	}
+     }
+   if (sum >= limit)
+     cfun->avg_insn_count = BB_FREQ_MAX;
+   else
+     cfun->avg_insn_count = sum / ENTRY_BLOCK_PTR->frequency;
+   if (rtl_dump_file)
+     fprintf (rtl_dump_file, "avg_insn_count :%i\n", cfun->avg_insn_count);
+ }
+ 
  /* Estimate basic blocks frequency by given branch probabilities.  */
  static void
  estimate_bb_frequencies (loops)
*************** estimate_bb_frequencies (loops)
*** 797,802 ****
--- 835,841 ----
    if (flag_branch_probabilities)
      {
        counts_to_freqs ();
+       count_avg_insns ();
        return;
      }
  
*************** estimate_bb_frequencies (loops)
*** 894,899 ****
--- 933,940 ----
        bb->frequency = (BLOCK_INFO (bb)->frequency * BB_FREQ_MAX / freq_max
  		       + 0.5);
      }
+ 
+   count_avg_insns ();
  
    free (ei);
    free (bi);
Index: config/i386/i386.c
===================================================================
RCS file: /cvs/gcc/egcs/gcc/config/i386/i386.c,v
retrieving revision 1.299
diff -c -3 -p -r1.299 i386.c
*** i386.c	2001/08/18 20:25:52	1.299
--- i386.c	2001/08/24 13:25:57
*************** Boston, MA 02111-1307, USA.  */
*** 43,48 ****
--- 43,53 ----
  #include "target.h"
  #include "target-def.h"
  
+ /* In case the avreage insn count for single function invocation is
+    lower than this constant, emit fast (but longer) prologue and
+    epilogue code.  */
+ #define FAST_PROLOGUE_INSN_COUNT 30
+ 
  #ifndef CHECK_STACK_LIMIT
  #define CHECK_STACK_LIMIT -1
  #endif
*************** ix86_expand_prologue ()
*** 2653,2659 ****
  				  || current_function_uses_const_pool)
  		      && !TARGET_64BIT);
    struct ix86_frame frame;
!   int use_mov = (TARGET_PROLOGUE_USING_MOVE && !optimize_size);
    HOST_WIDE_INT allocate;
  
    ix86_compute_frame_layout (&frame);
--- 2658,2665 ----
  				  || current_function_uses_const_pool)
  		      && !TARGET_64BIT);
    struct ix86_frame frame;
!   int use_mov = (TARGET_PROLOGUE_USING_MOVE && !optimize_size
! 		 && cfun->avg_insn_count < FAST_PROLOGUE_INSN_COUNT);
    HOST_WIDE_INT allocate;
  
    ix86_compute_frame_layout (&frame);
*************** ix86_expand_epilogue (style)
*** 2794,2802 ****
--- 2800,2810 ----
       tuning in future.  */
    if ((!sp_valid && frame.nregs <= 1)
        || (TARGET_EPILOGUE_USING_MOVE && !optimize_size
+ 	  && cfun->avg_insn_count < FAST_PROLOGUE_INSN_COUNT
  	  && (frame.nregs > 1 || frame.to_allocate))
        || (frame_pointer_needed && !frame.nregs && frame.to_allocate)
        || (frame_pointer_needed && TARGET_USE_LEAVE && !optimize_size
+ 	  && cfun->avg_insn_count < FAST_PROLOGUE_INSN_COUNT
  	  && frame.nregs == 1)
        || style == 2)
      {
*************** ix86_expand_epilogue (style)
*** 2844,2850 ****
  		    GEN_INT (frame.to_allocate
  			     + frame.nregs * UNITS_PER_WORD)));
        /* If not an i386, mov & pop is faster than "leave".  */
!       else if (TARGET_USE_LEAVE || optimize_size)
  	emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
        else
  	{
--- 2852,2859 ----
  		    GEN_INT (frame.to_allocate
  			     + frame.nregs * UNITS_PER_WORD)));
        /* If not an i386, mov & pop is faster than "leave".  */
!       else if (TARGET_USE_LEAVE || optimize_size
! 	       || cfun->avg_insn_count > FAST_PROLOGUE_INSN_COUNT)
  	emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
        else
  	{
Index: config/i386/i386.h
===================================================================
RCS file: /cvs/gcc/egcs/gcc/config/i386/i386.h,v
retrieving revision 1.198
diff -c -3 -p -r1.198 i386.h
*** i386.h	2001/08/18 20:25:52	1.198
--- i386.h	2001/08/24 13:25:58
*************** extern const int x86_epilogue_using_move
*** 330,336 ****
      N_("Do not use push instructions to save outgoing arguments") },	      \
    { "accumulate-outgoing-args",	MASK_ACCUMULATE_OUTGOING_ARGS,		      \
      N_("Use push instructions to save outgoing arguments") },		      \
!   { "no-accumulate-outgoing-args",-MASK_ACCUMULATE_OUTGOING_ARGS,	      \
      N_("Do not use push instructions to save outgoing arguments") },	      \
    { "mmx",			 MASK_MMX, N_("Support MMX builtins") },      \
    { "no-mmx",			-MASK_MMX,				      \
--- 330,336 ----
      N_("Do not use push instructions to save outgoing arguments") },	      \
    { "accumulate-outgoing-args",	MASK_ACCUMULATE_OUTGOING_ARGS,		      \
      N_("Use push instructions to save outgoing arguments") },		      \
!   { "no-accumulate-outgoing-args",MASK_NO_ACCUMULATE_OUTGOING_ARGS,	      \
      N_("Do not use push instructions to save outgoing arguments") },	      \
    { "mmx",			 MASK_MMX, N_("Support MMX builtins") },      \
    { "no-mmx",			-MASK_MMX,				      \


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]