code growth

Jan Hubicka jh@suse.cz
Sat Aug 25 06:44:00 GMT 2001


> On Fri, Aug 24, 2001 at 03:31:35PM +0200, Jan Hubicka wrote:
> > Other way I see is to move the loop directly into i386.c, so I will
> > compute it in epilogue expander, but IMO it makes somewhat strong
> > assumption that the CFG is correct and frequencies up to date at
> > time epilogue is emitted.  This should hold now, so I leave it at
> > your choice what looks as better alternative.
> 
> I think most reasonable is leave the function in predict.c, 
> but call it from the i386 prologue expander.  Pass in "limit"
> as a parameter.
Here it is. Oops I've called it "threshold" but I guess it is OK.

Sat Aug 25 15:42:17 CEST 2001  Jan Hubicka  <jh@suse.cz>
	* predict.c (expensive_function_p): New.
	* rtl.h (expensive_function_p): Declare.
	* i386.c (FAST_PROLOGUE_INSN_COUNT): New constant.
	(use_fast_prologue_epilogue): New static variable.
	(expand_prologue): Set it; emit short prologues if unset.
	(expand_epilogue): Likewise.
Index: predict.c
===================================================================
RCS file: /cvs/gcc/egcs/gcc/predict.c,v
retrieving revision 1.38
diff -c -3 -p -r1.38 predict.c
*** predict.c	2001/08/22 14:35:31	1.38
--- predict.c	2001/08/25 13:39:34
*************** counts_to_freqs ()
*** 780,785 ****
--- 779,829 ----
        bb->frequency = ((bb->count * BB_FREQ_MAX + count_max / 2)
  		       / count_max);
      }
+ }
+ 
+ /* Return true if function is likely to be expensive, so there is no point
+    to optimizer performance of prologue, epilogue or do inlining at the
+    expense of code size growth.  THRESHOLD is the limit of number
+    of isntructions function can execute at average to be still considered
+    not expensive.  */
+ bool
+ expensive_function_p (threshold)
+ 	int threshold;
+ {
+   unsigned int sum = 0;
+   int i;
+   int limit;
+ 
+   /* We can not compute accurately for large thresholds due to scaled
+      frequencies.  */
+   if (threshold > BB_FREQ_MAX)
+     abort ();
+ 
+   /* Frequencies are out of range.  This eighter means that function contains
+      internal loop executing more than BB_FREQ_MAX times or profile feedback
+      is available and function has not been executed at all.  */
+   if (ENTRY_BLOCK_PTR->frequency == 0)
+     return true;
+     
+   /* Maximally BB_FREQ_MAX^2 so overflow won't happen.  */
+   limit = ENTRY_BLOCK_PTR->frequency * threshold;
+   for (i = 0; i < n_basic_blocks; i++)
+     {
+       basic_block bb = BASIC_BLOCK (i);
+       rtx insn;
+ 
+       for (insn = bb->head; insn != NEXT_INSN (bb->end);
+ 	   insn = NEXT_INSN (insn))
+ 	{
+ 	  if (active_insn_p (insn))
+ 	    {
+ 	      sum += bb->frequency;
+ 	      if (sum > limit)
+ 		return true;
+ 	    }
+ 	}
+     }
+   return false;
  }
  
  /* Estimate basic blocks frequency by given branch probabilities.  */
Index: rtl.h
===================================================================
RCS file: /cvs/gcc/egcs/gcc/rtl.h,v
retrieving revision 1.291
diff -c -3 -p -r1.291 rtl.h
*** rtl.h	2001/08/22 14:51:31	1.291
--- rtl.h	2001/08/25 13:39:34
*************** extern void if_convert			PARAMS ((int));
*** 2028,2031 ****
--- 2028,2032 ----
  
  /* In predict.c */
  extern void invert_br_probabilities	PARAMS ((rtx));
+ extern bool expensive_function_p	PARAMS ((int));
  #endif /* ! GCC_RTL_H */
Index: config/i386/i386.c
===================================================================
RCS file: /cvs/gcc/egcs/gcc/config/i386/i386.c,v
retrieving revision 1.299
diff -c -3 -p -r1.299 i386.c
*** i386.c	2001/08/18 20:25:52	1.299
--- i386.c	2001/08/25 13:39:36
*************** const int x86_accumulate_outgoing_args =
*** 319,324 ****
--- 319,332 ----
  const int x86_prologue_using_move = m_ATHLON | m_PENT4 | m_PPRO;
  const int x86_epilogue_using_move = m_ATHLON | m_PENT4 | m_PPRO;
  
+ /* In case the avreage insn count for single function invocation is
+    lower than this constant, emit fast (but longer) prologue and
+    epilogue code.  */
+ #define FAST_PROLOGUE_INSN_COUNT 30
+ /* Set by prologue expander and used by epilogue expander to determine
+    the style used.  */
+ static int use_fast_prologue_epilogue;
+ 
  #define AT_BP(mode) (gen_rtx_MEM ((mode), hard_frame_pointer_rtx))
  
  const char * const hi_reg_name[] = HI_REGISTER_NAMES;
*************** ix86_expand_prologue ()
*** 2653,2661 ****
  				  || current_function_uses_const_pool)
  		      && !TARGET_64BIT);
    struct ix86_frame frame;
!   int use_mov = (TARGET_PROLOGUE_USING_MOVE && !optimize_size);
    HOST_WIDE_INT allocate;
  
    ix86_compute_frame_layout (&frame);
  
    /* Note: AT&T enter does NOT have reversed args.  Enter is probably
--- 2661,2675 ----
  				  || current_function_uses_const_pool)
  		      && !TARGET_64BIT);
    struct ix86_frame frame;
!   int use_mov = 0;
    HOST_WIDE_INT allocate;
  
+   if (TARGET_PROLOGUE_USING_MOVE && !optimize_size)
+     {
+       use_fast_prologue_epilogue
+ 	 = !expensive_function_p (FAST_PROLOGUE_INSN_COUNT);
+       use_mov = use_fast_prologue_epilogue;
+     }
    ix86_compute_frame_layout (&frame);
  
    /* Note: AT&T enter does NOT have reversed args.  Enter is probably
*************** ix86_expand_epilogue (style)
*** 2794,2803 ****
       tuning in future.  */
    if ((!sp_valid && frame.nregs <= 1)
        || (TARGET_EPILOGUE_USING_MOVE && !optimize_size
  	  && (frame.nregs > 1 || frame.to_allocate))
        || (frame_pointer_needed && !frame.nregs && frame.to_allocate)
        || (frame_pointer_needed && TARGET_USE_LEAVE && !optimize_size
! 	  && frame.nregs == 1)
        || style == 2)
      {
        /* Restore registers.  We can use ebp or esp to address the memory
--- 2808,2818 ----
       tuning in future.  */
    if ((!sp_valid && frame.nregs <= 1)
        || (TARGET_EPILOGUE_USING_MOVE && !optimize_size
+ 	  && use_fast_prologue_epilogue
  	  && (frame.nregs > 1 || frame.to_allocate))
        || (frame_pointer_needed && !frame.nregs && frame.to_allocate)
        || (frame_pointer_needed && TARGET_USE_LEAVE && !optimize_size
! 	  && use_fast_prologue_epilogue && frame.nregs == 1)
        || style == 2)
      {
        /* Restore registers.  We can use ebp or esp to address the memory
*************** ix86_expand_epilogue (style)
*** 2844,2850 ****
  		    GEN_INT (frame.to_allocate
  			     + frame.nregs * UNITS_PER_WORD)));
        /* If not an i386, mov & pop is faster than "leave".  */
!       else if (TARGET_USE_LEAVE || optimize_size)
  	emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
        else
  	{
--- 2859,2865 ----
  		    GEN_INT (frame.to_allocate
  			     + frame.nregs * UNITS_PER_WORD)));
        /* If not an i386, mov & pop is faster than "leave".  */
!       else if (TARGET_USE_LEAVE || optimize_size || !use_fast_prologue_epilogue)
  	emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
        else
  	{



More information about the Gcc-patches mailing list