code growth
Jan Hubicka
jh@suse.cz
Sat Aug 25 06:44:00 GMT 2001
> On Fri, Aug 24, 2001 at 03:31:35PM +0200, Jan Hubicka wrote:
> > Other way I see is to move the loop directly into i386.c, so I will
> > compute it in epilogue expander, but IMO it makes somewhat strong
> > assumption that the CFG is correct and frequencies up to date at
> > time epilogue is emitted. This should hold now, so I leave it at
> > your choice what looks as better alternative.
>
> I think most reasonable is leave the function in predict.c,
> but call it from the i386 prologue expander. Pass in "limit"
> as a parameter.
Here it is. Oops I've called it "threshold" but I guess it is OK.
Sat Aug 25 15:42:17 CEST 2001 Jan Hubicka <jh@suse.cz>
* predict.c (expensive_function_p): New.
* rtl.h (expensive_function_p): Declare.
* i386.c (FAST_PROLOGUE_INSN_COUNT): New constant.
(use_fast_prologue_epilogue): New static variable.
(expand_prologue): Set it; emit short prologues if unset.
(expand_epilogue): Likewise.
Index: predict.c
===================================================================
RCS file: /cvs/gcc/egcs/gcc/predict.c,v
retrieving revision 1.38
diff -c -3 -p -r1.38 predict.c
*** predict.c 2001/08/22 14:35:31 1.38
--- predict.c 2001/08/25 13:39:34
*************** counts_to_freqs ()
*** 780,785 ****
--- 779,829 ----
bb->frequency = ((bb->count * BB_FREQ_MAX + count_max / 2)
/ count_max);
}
+ }
+
+ /* Return true if function is likely to be expensive, so there is no point
+ to optimizer performance of prologue, epilogue or do inlining at the
+ expense of code size growth. THRESHOLD is the limit of number
+ of isntructions function can execute at average to be still considered
+ not expensive. */
+ bool
+ expensive_function_p (threshold)
+ int threshold;
+ {
+ unsigned int sum = 0;
+ int i;
+ int limit;
+
+ /* We can not compute accurately for large thresholds due to scaled
+ frequencies. */
+ if (threshold > BB_FREQ_MAX)
+ abort ();
+
+ /* Frequencies are out of range. This eighter means that function contains
+ internal loop executing more than BB_FREQ_MAX times or profile feedback
+ is available and function has not been executed at all. */
+ if (ENTRY_BLOCK_PTR->frequency == 0)
+ return true;
+
+ /* Maximally BB_FREQ_MAX^2 so overflow won't happen. */
+ limit = ENTRY_BLOCK_PTR->frequency * threshold;
+ for (i = 0; i < n_basic_blocks; i++)
+ {
+ basic_block bb = BASIC_BLOCK (i);
+ rtx insn;
+
+ for (insn = bb->head; insn != NEXT_INSN (bb->end);
+ insn = NEXT_INSN (insn))
+ {
+ if (active_insn_p (insn))
+ {
+ sum += bb->frequency;
+ if (sum > limit)
+ return true;
+ }
+ }
+ }
+ return false;
}
/* Estimate basic blocks frequency by given branch probabilities. */
Index: rtl.h
===================================================================
RCS file: /cvs/gcc/egcs/gcc/rtl.h,v
retrieving revision 1.291
diff -c -3 -p -r1.291 rtl.h
*** rtl.h 2001/08/22 14:51:31 1.291
--- rtl.h 2001/08/25 13:39:34
*************** extern void if_convert PARAMS ((int));
*** 2028,2031 ****
--- 2028,2032 ----
/* In predict.c */
extern void invert_br_probabilities PARAMS ((rtx));
+ extern bool expensive_function_p PARAMS ((int));
#endif /* ! GCC_RTL_H */
Index: config/i386/i386.c
===================================================================
RCS file: /cvs/gcc/egcs/gcc/config/i386/i386.c,v
retrieving revision 1.299
diff -c -3 -p -r1.299 i386.c
*** i386.c 2001/08/18 20:25:52 1.299
--- i386.c 2001/08/25 13:39:36
*************** const int x86_accumulate_outgoing_args =
*** 319,324 ****
--- 319,332 ----
const int x86_prologue_using_move = m_ATHLON | m_PENT4 | m_PPRO;
const int x86_epilogue_using_move = m_ATHLON | m_PENT4 | m_PPRO;
+ /* In case the avreage insn count for single function invocation is
+ lower than this constant, emit fast (but longer) prologue and
+ epilogue code. */
+ #define FAST_PROLOGUE_INSN_COUNT 30
+ /* Set by prologue expander and used by epilogue expander to determine
+ the style used. */
+ static int use_fast_prologue_epilogue;
+
#define AT_BP(mode) (gen_rtx_MEM ((mode), hard_frame_pointer_rtx))
const char * const hi_reg_name[] = HI_REGISTER_NAMES;
*************** ix86_expand_prologue ()
*** 2653,2661 ****
|| current_function_uses_const_pool)
&& !TARGET_64BIT);
struct ix86_frame frame;
! int use_mov = (TARGET_PROLOGUE_USING_MOVE && !optimize_size);
HOST_WIDE_INT allocate;
ix86_compute_frame_layout (&frame);
/* Note: AT&T enter does NOT have reversed args. Enter is probably
--- 2661,2675 ----
|| current_function_uses_const_pool)
&& !TARGET_64BIT);
struct ix86_frame frame;
! int use_mov = 0;
HOST_WIDE_INT allocate;
+ if (TARGET_PROLOGUE_USING_MOVE && !optimize_size)
+ {
+ use_fast_prologue_epilogue
+ = !expensive_function_p (FAST_PROLOGUE_INSN_COUNT);
+ use_mov = use_fast_prologue_epilogue;
+ }
ix86_compute_frame_layout (&frame);
/* Note: AT&T enter does NOT have reversed args. Enter is probably
*************** ix86_expand_epilogue (style)
*** 2794,2803 ****
tuning in future. */
if ((!sp_valid && frame.nregs <= 1)
|| (TARGET_EPILOGUE_USING_MOVE && !optimize_size
&& (frame.nregs > 1 || frame.to_allocate))
|| (frame_pointer_needed && !frame.nregs && frame.to_allocate)
|| (frame_pointer_needed && TARGET_USE_LEAVE && !optimize_size
! && frame.nregs == 1)
|| style == 2)
{
/* Restore registers. We can use ebp or esp to address the memory
--- 2808,2818 ----
tuning in future. */
if ((!sp_valid && frame.nregs <= 1)
|| (TARGET_EPILOGUE_USING_MOVE && !optimize_size
+ && use_fast_prologue_epilogue
&& (frame.nregs > 1 || frame.to_allocate))
|| (frame_pointer_needed && !frame.nregs && frame.to_allocate)
|| (frame_pointer_needed && TARGET_USE_LEAVE && !optimize_size
! && use_fast_prologue_epilogue && frame.nregs == 1)
|| style == 2)
{
/* Restore registers. We can use ebp or esp to address the memory
*************** ix86_expand_epilogue (style)
*** 2844,2850 ****
GEN_INT (frame.to_allocate
+ frame.nregs * UNITS_PER_WORD)));
/* If not an i386, mov & pop is faster than "leave". */
! else if (TARGET_USE_LEAVE || optimize_size)
emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
else
{
--- 2859,2865 ----
GEN_INT (frame.to_allocate
+ frame.nregs * UNITS_PER_WORD)));
/* If not an i386, mov & pop is faster than "leave". */
! else if (TARGET_USE_LEAVE || optimize_size || !use_fast_prologue_epilogue)
emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
else
{
More information about the Gcc-patches
mailing list