+Sat Aug 25 23:07:35 CEST 2001 Jan Hubicka <jh@suse.cz>
+
+ * predict.c (expensive_function_p): New.
+ * rtl.h (expensive_function_p): Declare.
+ * i386.c (FAST_PROLOGUE_INSN_COUNT): New constant.
+ (use_fast_prologue_epilogue): New static variable.
+ (expand_prologue): Set it; emit short prologues if unset.
+ (expand_epilogue): Likewise.
+
2001-08-22 Geoffrey Keating <geoffk@redhat.com>
* config.gcc: Add stormy16-*-elf case.
const int x86_prologue_using_move = m_ATHLON | m_PENT4 | m_PPRO;
const int x86_epilogue_using_move = m_ATHLON | m_PENT4 | m_PPRO;
+/* In case the avreage insn count for single function invocation is
+ lower than this constant, emit fast (but longer) prologue and
+ epilogue code. */
+#define FAST_PROLOGUE_INSN_COUNT 30
+/* Set by prologue expander and used by epilogue expander to determine
+ the style used. */
+static int use_fast_prologue_epilogue;
+
#define AT_BP(mode) (gen_rtx_MEM ((mode), hard_frame_pointer_rtx))
const char * const hi_reg_name[] = HI_REGISTER_NAMES;
|| current_function_uses_const_pool)
&& !TARGET_64BIT);
struct ix86_frame frame;
- int use_mov = (TARGET_PROLOGUE_USING_MOVE && !optimize_size);
+ int use_mov = 0;
HOST_WIDE_INT allocate;
+ if (TARGET_PROLOGUE_USING_MOVE && !optimize_size)
+ {
+ use_fast_prologue_epilogue
+ = !expensive_function_p (FAST_PROLOGUE_INSN_COUNT);
+ use_mov = use_fast_prologue_epilogue;
+ }
ix86_compute_frame_layout (&frame);
/* Note: AT&T enter does NOT have reversed args. Enter is probably
tuning in future. */
if ((!sp_valid && frame.nregs <= 1)
|| (TARGET_EPILOGUE_USING_MOVE && !optimize_size
+ && use_fast_prologue_epilogue
&& (frame.nregs > 1 || frame.to_allocate))
|| (frame_pointer_needed && !frame.nregs && frame.to_allocate)
|| (frame_pointer_needed && TARGET_USE_LEAVE && !optimize_size
- && frame.nregs == 1)
+ && use_fast_prologue_epilogue && frame.nregs == 1)
|| style == 2)
{
/* Restore registers. We can use ebp or esp to address the memory
GEN_INT (frame.to_allocate
+ frame.nregs * UNITS_PER_WORD)));
/* If not an i386, mov & pop is faster than "leave". */
- else if (TARGET_USE_LEAVE || optimize_size)
+ else if (TARGET_USE_LEAVE || optimize_size || !use_fast_prologue_epilogue)
emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
else
{
}
}
+/* Return true if function is likely to be expensive, so there is no point
+ to optimizer performance of prologue, epilogue or do inlining at the
+ expense of code size growth. THRESHOLD is the limit of number
+ of isntructions function can execute at average to be still considered
+ not expensive. */
+bool
+expensive_function_p (threshold)
+ int threshold;
+{
+ unsigned int sum = 0;
+ int i;
+ int limit;
+
+ /* We can not compute accurately for large thresholds due to scaled
+ frequencies. */
+ if (threshold > BB_FREQ_MAX)
+ abort ();
+
+ /* Frequencies are out of range. This eighter means that function contains
+ internal loop executing more than BB_FREQ_MAX times or profile feedback
+ is available and function has not been executed at all. */
+ if (ENTRY_BLOCK_PTR->frequency == 0)
+ return true;
+
+ /* Maximally BB_FREQ_MAX^2 so overflow won't happen. */
+ limit = ENTRY_BLOCK_PTR->frequency * threshold;
+ for (i = 0; i < n_basic_blocks; i++)
+ {
+ basic_block bb = BASIC_BLOCK (i);
+ rtx insn;
+
+ for (insn = bb->head; insn != NEXT_INSN (bb->end);
+ insn = NEXT_INSN (insn))
+ {
+ if (active_insn_p (insn))
+ {
+ sum += bb->frequency;
+ if (sum > limit)
+ return true;
+ }
+ }
+ }
+ return false;
+}
+
/* Estimate basic blocks frequency by given branch probabilities. */
static void
estimate_bb_frequencies (loops)