This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
avoid code bloat in i386 prologues/epilogues
- To: gcc-patches at gcc dot gnu dot org, rth at cygnus dot com, patches at x86-64 dot org
- Subject: avoid code bloat in i386 prologues/epilogues
- From: Jan Hubicka <jh at suse dot cz>
- Date: Mon, 30 Jul 2001 13:15:05 +0200
Hi,
one of causes for code growth in recent development was my change to
use moves instead of pushes in prologues and epilogues. This cahange,
according to performance testing caused important speedups on both Athlon
and PPC, but also caused code growth by about 6%.
This patch cuts this down to about 2.5% by estimating number of isnsns
executed at each invocations of function, so fast prologues are used
only for functions exiting early - only case where it is profitable.
This idea is little bit dificult to tune, as most code overhead the
prologues had for small functions, so threshold needs some care.
30 was my second try (after 10 where I measured performance degradation)
and seems to work well.
On unrelated note I also do fix -mno-accumulate-outgoing-args to be wroking
again.
Honza
Mon Jul 30 13:09:49 CEST 2001 Jan Hubicka <jh@suse.cz>
* function.h (avg_insn_count):
* predict.c (count_avg_insn_count): New function.
(estimate_bb_frequencies): Call it.
* i386.c (FAST_PROLOGUE_INSN_COUNT): New constant.
(ix86_expand_prologue): Use fast prologue only for fast functions.
(ix86_expand_epilogue): Likewise.
* i386.h (no-accumulate-outgoing-args): Use proper mask.
Index: function.h
===================================================================
RCS file: /cvs/gcc/egcs/gcc/function.h,v
retrieving revision 1.63
diff -c -3 -p -r1.63 function.h
*** function.h 2001/06/21 16:50:55 1.63
--- function.h 2001/07/30 09:52:29
*************** struct function
*** 477,482 ****
--- 477,486 ----
/* Nonzero if the current function needs an lsda for exception handling. */
unsigned int uses_eh_lsda : 1;
+
+ /* Average number of instructions executed at each invocation of this
+ function. */
+ unsigned int avg_insn_count;
};
/* The function currently being compiled. */
Index: predict.c
===================================================================
RCS file: /cvs/gcc/egcs/gcc/predict.c,v
retrieving revision 1.27
diff -c -3 -p -r1.27 predict.c
*** predict.c 2001/07/10 03:50:25 1.27
--- predict.c 2001/07/30 09:52:46
*************** static void estimate_loops_at_level PAR
*** 64,69 ****
--- 64,70 ----
static void propagate_freq PARAMS ((basic_block));
static void estimate_bb_frequencies PARAMS ((struct loops *));
static void counts_to_freqs PARAMS ((void));
+ static void count_avg_insns PARAMS ((void));
/* Information we hold about each branch predictor.
Filled using information from predict.def. */
*************** counts_to_freqs ()
*** 718,724 ****
--- 754,789 ----
/ count_max);
}
}
+ static void
+ count_avg_insns()
+ {
+ unsigned int sum = 0;
+ int i;
+ /* Frequencies are out of range. We can't say more. */
+ if (ENTRY_BLOCK_PTR->frequency == 0)
+ {
+ cfun->avg_insn_count = BB_FREQ_MAX;
+ return;
+ }
+
+ for (i = 0; i < n_basic_blocks; i++)
+ {
+ basic_block bb = BASIC_BLOCK (i);
+ rtx insn;
+
+ for (insn = bb->head; insn != NEXT_INSN (bb->end);
+ insn = NEXT_INSN (insn))
+ {
+ if (INSN_P (insn))
+ sum += bb->frequency;
+ }
+ }
+ cfun->avg_insn_count = sum / ENTRY_BLOCK_PTR->frequency;
+ if (rtl_dump_file)
+ fprintf (rtl_dump_file, "avg_insn_count :%i\n", cfun->avg_insn_count);
+ }
+
/* Estimate basic blocks frequency by given branch probabilities. */
static void
estimate_bb_frequencies (loops)
*************** estimate_bb_frequencies (loops)
*** 733,738 ****
--- 798,804 ----
if (flag_branch_probabilities)
{
counts_to_freqs ();
+ count_avg_insns ();
return;
}
*************** estimate_bb_frequencies (loops)
*** 830,835 ****
--- 896,903 ----
bb->frequency = (BLOCK_INFO (bb)->frequency * BB_FREQ_MAX / freq_max
+ 0.5);
}
+
+ count_avg_insns ();
free (ei);
free (bi);
Index: config/i386/i386.c
===================================================================
RCS file: /cvs/gcc/egcs/gcc/config/i386/i386.c,v
retrieving revision 1.288
diff -c -3 -p -r1.288 i386.c
*** i386.c 2001/07/22 21:42:35 1.288
--- i386.c 2001/07/30 09:54:04
*************** Boston, MA 02111-1307, USA. */
*** 43,48 ****
--- 43,53 ----
#include "target.h"
#include "target-def.h"
+ /* In case the avreage insn count for single function invocation is
+ lower than this constant, emit fast (but longer) prologue and
+ epilogue code. */
+ #define FAST_PROLOGUE_INSN_COUNT 30
+
#ifndef CHECK_STACK_LIMIT
#define CHECK_STACK_LIMIT -1
#endif
*************** ix86_expand_prologue ()
*** 2627,2633 ****
|| current_function_uses_const_pool)
&& !TARGET_64BIT);
struct ix86_frame frame;
! int use_mov = (TARGET_PROLOGUE_USING_MOVE && !optimize_size);
HOST_WIDE_INT allocate;
ix86_compute_frame_layout (&frame);
--- 2632,2639 ----
|| current_function_uses_const_pool)
&& !TARGET_64BIT);
struct ix86_frame frame;
! int use_mov = (TARGET_PROLOGUE_USING_MOVE && !optimize_size
! && cfun->avg_insn_count < FAST_PROLOGUE_INSN_COUNT);
HOST_WIDE_INT allocate;
ix86_compute_frame_layout (&frame);
*************** ix86_expand_epilogue (style)
*** 2768,2776 ****
--- 2774,2784 ----
tuning in future. */
if ((!sp_valid && frame.nregs <= 1)
|| (TARGET_EPILOGUE_USING_MOVE && !optimize_size
+ && cfun->avg_insn_count < FAST_PROLOGUE_INSN_COUNT
&& (frame.nregs > 1 || frame.to_allocate))
|| (frame_pointer_needed && !frame.nregs && frame.to_allocate)
|| (frame_pointer_needed && TARGET_USE_LEAVE && !optimize_size
+ && cfun->avg_insn_count < FAST_PROLOGUE_INSN_COUNT
&& frame.nregs == 1)
|| style == 2)
{
*************** ix86_expand_epilogue (style)
*** 2818,2824 ****
GEN_INT (frame.to_allocate
+ frame.nregs * UNITS_PER_WORD)));
/* If not an i386, mov & pop is faster than "leave". */
! else if (TARGET_USE_LEAVE || optimize_size)
emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
else
{
--- 2826,2833 ----
GEN_INT (frame.to_allocate
+ frame.nregs * UNITS_PER_WORD)));
/* If not an i386, mov & pop is faster than "leave". */
! else if (TARGET_USE_LEAVE || optimize_size
! || cfun->avg_insn_count > FAST_PROLOGUE_INSN_COUNT)
emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
else
{
Index: config/i386/i386.h
===================================================================
RCS file: /cvs/gcc/egcs/gcc/config/i386/i386.h,v
retrieving revision 1.195
diff -c -3 -p -r1.195 i386.h
*** i386.h 2001/07/15 16:59:06 1.195
--- i386.h 2001/07/30 09:54:05
*************** extern const int x86_epilogue_using_move
*** 330,336 ****
N_("Do not use push instructions to save outgoing arguments") }, \
{ "accumulate-outgoing-args", MASK_ACCUMULATE_OUTGOING_ARGS, \
N_("Use push instructions to save outgoing arguments") }, \
! { "no-accumulate-outgoing-args",-MASK_ACCUMULATE_OUTGOING_ARGS, \
N_("Do not use push instructions to save outgoing arguments") }, \
{ "mmx", MASK_MMX, N_("Support MMX builtins") }, \
{ "no-mmx", -MASK_MMX, \
--- 330,336 ----
N_("Do not use push instructions to save outgoing arguments") }, \
{ "accumulate-outgoing-args", MASK_ACCUMULATE_OUTGOING_ARGS, \
N_("Use push instructions to save outgoing arguments") }, \
! { "no-accumulate-outgoing-args",MASK_NO_ACCUMULATE_OUTGOING_ARGS, \
N_("Do not use push instructions to save outgoing arguments") }, \
{ "mmx", MASK_MMX, N_("Support MMX builtins") }, \
{ "no-mmx", -MASK_MMX, \