This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
[rtlopt] trace scheduling
- From: Jan Hubicka <jh at suse dot cz>
- To: gcc-patches at gcc dot gnu dot org
- Date: Sun, 22 Dec 2002 18:22:01 +0100
- Subject: [rtlopt] trace scheduling
Hi,
this is updated version of trace scheduler brought from cfg branch. I
would be interested in any benchmarks about it's performance relative to
the local scheduler pass we have right now.
There are two options -fsuperblock-scheduling and -ftrace-scheduling.
First enables scheduling without code duplication, second does a lot of
code duplication and should be probably compared relative to -ftracer.
Honza
Sun Dec 22 20:20:48 CET 2002 Jan Hubicka <jh@suse.cz>
* basic-block.h (inside_basic_block_p, control_flow_insn_p): Declare.
* cfgbuild.c (inside_basic_block_p, control_flow_insn_p): Make global.
* invoke.texi (-fschedule-traces, -fschedule-superblocks): Document
* flags.h (flag_superblock_scheduling, flag_trace_scheduling): Declare.
* haifa-sched.c (unlink_other_notes): Unlink basic block notes.
* params.def (tracer-min-branch-probability-feedback): Fix default value.
* schedule-ebbs.c (params.h, profile.h): Include.
(schedule_ebb): Return last basic block in the sequence.
(fix_basic_block_boundaries, add_missing_blocks): New static functions.
(schedule_ebbs): Use TRACER_MIN_BRANCH_PROBABILITY to discover traces;
Use CFG for profile information.
* flags.c (flag_superblock_scheduling, flag_trace_scheduling): New global
variables.
(lang_independent_options): Add -fschedule-superblocks, -fschedule-traces.
(rest_of_compilation): Suppress crossjumping when trace scheduling is on;
invoke superblock scheduling when asked for.
(parse_options_and_default_flags): Default to superblock scheduling for -O2
and trace scheduling for -O3.
(process_options): Imply tracer and superblock scheduling by trace scheduling.
Imply 2nd scheduler pass by superblock scheduling.
diff -Nrc3p gcc.orig/basic-block.h gcc/basic-block.h
*** gcc.orig/basic-block.h Sun Dec 22 17:57:34 2002
--- gcc/basic-block.h Sun Dec 22 18:59:52 2002
*************** extern void fixup_abnormal_edges PARAMS
*** 805,810 ****
--- 805,812 ----
extern bool can_hoist_insn_p PARAMS ((rtx, rtx, regset));
extern rtx hoist_insn_after PARAMS ((rtx, rtx, rtx, rtx));
extern rtx hoist_insn_to_edge PARAMS ((rtx, edge, rtx, rtx));
+ extern bool inside_basic_block_p PARAMS ((rtx));
+ extern bool control_flow_insn_p PARAMS ((rtx));
/* In dominance.c */
diff -Nrc3p gcc.orig/cfgbuild.c gcc/cfgbuild.c
*** gcc.orig/cfgbuild.c Sun Dec 22 17:57:34 2002
--- gcc/cfgbuild.c Sun Dec 22 19:00:03 2002
*************** static void make_label_edge PARAMS ((sb
*** 58,70 ****
static void make_eh_edge PARAMS ((sbitmap *, basic_block, rtx));
static void find_bb_boundaries PARAMS ((basic_block));
static void compute_outgoing_frequencies PARAMS ((basic_block));
- static bool inside_basic_block_p PARAMS ((rtx));
- static bool control_flow_insn_p PARAMS ((rtx));
/* Return true if insn is something that should be contained inside basic
block. */
! static bool
inside_basic_block_p (insn)
rtx insn;
{
--- 58,68 ----
static void make_eh_edge PARAMS ((sbitmap *, basic_block, rtx));
static void find_bb_boundaries PARAMS ((basic_block));
static void compute_outgoing_frequencies PARAMS ((basic_block));
/* Return true if insn is something that should be contained inside basic
block. */
! bool
inside_basic_block_p (insn)
rtx insn;
{
*************** inside_basic_block_p (insn)
*** 97,103 ****
/* Return true if INSN may cause control flow transfer, so it should be last in
the basic block. */
! static bool
control_flow_insn_p (insn)
rtx insn;
{
--- 95,101 ----
/* Return true if INSN may cause control flow transfer, so it should be last in
the basic block. */
! bool
control_flow_insn_p (insn)
rtx insn;
{
diff -Nrc3p gcc.orig/doc/invoke.texi gcc/doc/invoke.texi
*** gcc.orig/doc/invoke.texi Sun Dec 22 17:57:56 2002
--- gcc/doc/invoke.texi Sun Dec 22 20:18:38 2002
*************** in the following sections.
*** 285,291 ****
-freduce-all-givs -fregmove -frename-registers @gol
-freorder-blocks -freorder-functions @gol
-frerun-cse-after-loop -frerun-loop-opt @gol
! -fschedule-insns -fschedule-insns2 @gol
-fno-sched-interblock -fno-sched-spec -fsched-spec-load @gol
-fsched-spec-load-dangerous -fsignaling-nans @gol
-fsingle-precision-constant -fssa -fssa-ccp -fssa-dce @gol
--- 285,291 ----
-freduce-all-givs -fregmove -frename-registers @gol
-freorder-blocks -freorder-functions @gol
-frerun-cse-after-loop -frerun-loop-opt @gol
! -fschedule-insns -fschedule-insns2 -fschedule-superblocks -fschedule-traces @gol
-fno-sched-interblock -fno-sched-spec -fsched-spec-load @gol
-fsched-spec-load-dangerous -fsignaling-nans @gol
-fsingle-precision-constant -fssa -fssa-ccp -fssa-dce @gol
*************** instruction scheduling after register al
*** 3869,3874 ****
--- 3869,3889 ----
especially useful on machines with a relatively small number of
registers and where memory load instructions take more than one cycle.
+ @item -fschedule-superblocks
+ @opindex fschedule-superblocks
+ Do schedule superblocks in the second scheduler pass. This allows more of
+ motion to be done and should result in faster code than
+ @code{-fschedule-insns2}, however number of the motion is speculative and
+ without accurate profile it may cause too many of dead computations to happen.
+
+ @item -fschedule-traces
+ @opindex fschedule-traces
+ Same as @code{-fschedule-superblocks} but does imply @code{-ftracer} to perform
+ code duplication to enlarge superblock size and disables first crossjumping
+ pass. The programs should be faster but results are even more dependent on the
+ quality of profile. Also the late crossjumping pass has much fewer
+ oppurtunities for code unifications, so the programs are significantly larger.
+
@item -fno-sched-interblock
@opindex fno-sched-interblock
Don't schedule instructions across basic blocks. This is normally
diff -Nrc3p gcc.orig/flags.h gcc/flags.h
*** gcc.orig/flags.h Sun Dec 22 17:57:35 2002
--- gcc/flags.h Sun Dec 22 18:04:10 2002
*************** extern int flag_shared_data;
*** 423,432 ****
/* flag_schedule_insns means schedule insns within basic blocks (before
local_alloc).
flag_schedule_insns_after_reload means schedule insns after
! global_alloc. */
extern int flag_schedule_insns;
extern int flag_schedule_insns_after_reload;
/* The following flags have effect only for scheduling before register
allocation:
--- 423,439 ----
/* flag_schedule_insns means schedule insns within basic blocks (before
local_alloc).
flag_schedule_insns_after_reload means schedule insns after
! global_alloc.
! flag_superblock_scheduling enables scheduling of superblocks instead of
! local scheduling after reload.
! flag_trace_scheduling enables tracer and disables post reload crossjumping
! so the traces gets scheduled properly.
! */
extern int flag_schedule_insns;
extern int flag_schedule_insns_after_reload;
+ extern int flag_superblock_scheduling;
+ extern int flag_trace_scheduling;
/* The following flags have effect only for scheduling before register
allocation:
diff -Nrc3p gcc.orig/haifa-sched.c gcc/haifa-sched.c
*** gcc.orig/haifa-sched.c Sun Dec 22 17:57:36 2002
--- gcc/haifa-sched.c Sun Dec 22 18:04:10 2002
*************** unlink_other_notes (insn, tail)
*** 1228,1233 ****
--- 1228,1234 ----
/* See sched_analyze to see how these are handled. */
if (NOTE_LINE_NUMBER (insn) != NOTE_INSN_LOOP_BEG
&& NOTE_LINE_NUMBER (insn) != NOTE_INSN_LOOP_END
+ && NOTE_LINE_NUMBER (insn) != NOTE_INSN_BASIC_BLOCK
&& NOTE_LINE_NUMBER (insn) != NOTE_INSN_EH_REGION_BEG
&& NOTE_LINE_NUMBER (insn) != NOTE_INSN_EH_REGION_END)
{
diff -Nrc3p gcc.orig/params.def gcc/params.def
*** gcc.orig/params.def Sun Dec 22 17:57:37 2002
--- gcc/params.def Sun Dec 22 19:52:24 2002
*************** DEFPARAM(TRACER_MIN_BRANCH_PROBABILITY_F
*** 184,190 ****
"tracer-min-branch-probability-feedback",
"Stop forward growth if the probability of best edge is less than \
this threshold (in percents). Used when profile feedback is available",
! 30)
DEFPARAM(TRACER_MIN_BRANCH_PROBABILITY,
"tracer-min-branch-probability",
"Stop forward growth if the probability of best edge is less than \
--- 184,190 ----
"tracer-min-branch-probability-feedback",
"Stop forward growth if the probability of best edge is less than \
this threshold (in percents). Used when profile feedback is available",
! 80)
DEFPARAM(TRACER_MIN_BRANCH_PROBABILITY,
"tracer-min-branch-probability",
"Stop forward growth if the probability of best edge is less than \
diff -Nrc3p gcc.orig/sched-ebb.c gcc/sched-ebb.c
*** gcc.orig/sched-ebb.c Sun Dec 22 17:57:38 2002
--- gcc/sched-ebb.c Sun Dec 22 20:06:25 2002
*************** Software Foundation, 59 Temple Place - S
*** 39,44 ****
--- 39,46 ----
#include "toplev.h"
#include "recog.h"
#include "cfglayout.h"
+ #include "params.h"
+ #include "profile.h"
#include "sched-int.h"
/* The number of insns to be scheduled in total. */
*************** static const char *ebb_print_insn PARAMS
*** 55,61 ****
static int rank PARAMS ((rtx, rtx));
static int contributes_to_priority PARAMS ((rtx, rtx));
static void compute_jump_reg_dependencies PARAMS ((rtx, regset));
! static void schedule_ebb PARAMS ((rtx, rtx));
/* Return nonzero if there are more insns that should be scheduled. */
--- 57,65 ----
static int rank PARAMS ((rtx, rtx));
static int contributes_to_priority PARAMS ((rtx, rtx));
static void compute_jump_reg_dependencies PARAMS ((rtx, regset));
! static basic_block schedule_ebb PARAMS ((rtx, rtx));
! static basic_block fix_basic_block_boundaries PARAMS ((basic_block, basic_block, rtx, rtx));
! static void add_missing_bbs PARAMS ((rtx, basic_block, basic_block));
/* Return nonzero if there are more insns that should be scheduled. */
*************** static struct sched_info ebb_sched_info
*** 199,216 ****
0, 1
};
/* Schedule a single extended basic block, defined by the boundaries HEAD
and TAIL. */
! static void
schedule_ebb (head, tail)
rtx head, tail;
{
int n_insns;
struct deps tmp_deps;
if (no_real_insns_p (head, tail))
! return;
init_deps_global ();
--- 203,356 ----
0, 1
};
+ /* It is possible that ebb scheduling elliminated some blocks.
+ Place blocks from FIRST to LAST before BEFORE. */
+
+ static void
+ add_missing_bbs (before, first, last)
+ rtx before;
+ basic_block first, last;
+ {
+ for (; last != first->prev_bb; last = last->prev_bb)
+ {
+ before = emit_note_before (NOTE_INSN_BASIC_BLOCK, before);
+ NOTE_BASIC_BLOCK (before) = last;
+ last->head = before;
+ last->end = before;
+ update_bb_for_insn (last);
+ }
+ }
+
+ /* Fixup the CFG after EBB scheduling. Re-recognize the basic
+ block boundaries in between HEAD and TAIL and update basic block
+ structures between BB and LAST. */
+
+ static basic_block
+ fix_basic_block_boundaries (bb, last, head, tail)
+ basic_block bb, last;
+ rtx head, tail;
+ {
+ rtx insn = head;
+ rtx last_inside = bb->head;
+ rtx aftertail = NEXT_INSN (tail);
+
+ head = bb->head;
+
+ for (; insn != aftertail; insn = NEXT_INSN (insn))
+ {
+ if (GET_CODE (insn) == CODE_LABEL)
+ abort ();
+ /* Create new basic blocks just before first insn. */
+ if (inside_basic_block_p (insn))
+ {
+ if (!last_inside)
+ {
+ rtx note;
+
+ /* Re-emit the basic block note for newly found BB header. */
+ if (GET_CODE (insn) == CODE_LABEL)
+ {
+ note = emit_note_after (NOTE_INSN_BASIC_BLOCK, insn);
+ head = insn;
+ last_inside = note;
+ }
+ else
+ {
+ note = emit_note_before (NOTE_INSN_BASIC_BLOCK, insn);
+ head = note;
+ last_inside = insn;
+ }
+ }
+ else
+ last_inside = insn;
+ }
+ /* Control flow instruction terminate basic block. It is possible
+ that we've elliminated some basic blocks (made them empty).
+ Find the proper basic block using BLOCK_FOR_INSN and arrange things in
+ a sensible way by inserting empty basic blocks as needed. */
+ if (control_flow_insn_p (insn) || (insn == tail && last_inside))
+ {
+ basic_block curr_bb = BLOCK_FOR_INSN (insn);
+ rtx note;
+
+ if (!control_flow_insn_p (insn))
+ curr_bb = last;
+ if (bb == last->next_bb)
+ {
+ edge f;
+ rtx h;
+
+ /* An obscure special case, where we do have partially dead
+ instruction scheduled after last control flow instruction.
+ In this case we can create new basic block. It is
+ always exactly one basic block last in the sequence. Handle
+ it by splitting the edge and repositioning the block.
+ This is somewhat hackish, but at least avoid cut&paste
+
+ Safter sollution can be to bring the code into sequence,
+ do the split and re-emit it back in case this will ever
+ trigger problem. */
+ f = bb->prev_bb->succ;
+ while (f && !(f->flags & EDGE_FALLTHRU))
+ f = f->succ_next;
+
+ if (f)
+ {
+ last = curr_bb = split_edge (f);
+ h = curr_bb->head;
+ curr_bb->head = head;
+ curr_bb->end = insn;
+ /* Edge splitting created missplaced BASIC_BLOCK note, kill
+ it. */
+ delete_insn (h);
+ }
+ /* It may happen that code got moved past unconditional jump in
+ case the code is completely dead. Kill it. */
+ else
+ {
+ rtx next = next_nonnote_insn (insn);
+ delete_insn_chain (head, insn);
+ /* We keep some notes in the way that may split barrier from the
+ jump. */
+ if (GET_CODE (next) == BARRIER)
+ {
+ emit_barrier_after (prev_nonnote_insn (head));
+ delete_insn (next);
+ }
+ }
+ }
+ else
+ {
+ curr_bb->head = head;
+ curr_bb->end = insn;
+ add_missing_bbs (curr_bb->head, bb, curr_bb->prev_bb);
+ }
+ note = GET_CODE (head) == CODE_LABEL ? NEXT_INSN (head) : head;
+ NOTE_BASIC_BLOCK (note) = curr_bb;
+ update_bb_for_insn (curr_bb);
+ bb = curr_bb->next_bb;
+ last_inside = NULL;
+ }
+ }
+ add_missing_bbs (last->next_bb->head, bb, last);
+ return bb->prev_bb;
+ }
+
/* Schedule a single extended basic block, defined by the boundaries HEAD
and TAIL. */
! static basic_block
schedule_ebb (head, tail)
rtx head, tail;
{
int n_insns;
+ basic_block b;
struct deps tmp_deps;
+ basic_block first_bb = BLOCK_FOR_INSN (head);
+ basic_block last_bb = BLOCK_FOR_INSN (tail);
if (no_real_insns_p (head, tail))
! return BLOCK_FOR_INSN (tail);
init_deps_global ();
*************** schedule_ebb (head, tail)
*** 270,277 ****
--- 410,419 ----
if (write_symbols != NO_DEBUG)
restore_line_notes (head, tail);
+ b = fix_basic_block_boundaries (first_bb, last_bb, head, tail);
finish_deps_global ();
+ return b;
}
/* The one entry point in this file. DUMP_FILE is the dump file for
*************** schedule_ebbs (dump_file)
*** 282,287 ****
--- 424,436 ----
FILE *dump_file;
{
basic_block bb;
+ int probability_cutoff;
+
+ if (profile_info.count_profiles_merged && flag_branch_probabilities)
+ probability_cutoff = PARAM_VALUE (TRACER_MIN_BRANCH_PROBABILITY_FEEDBACK);
+ else
+ probability_cutoff = PARAM_VALUE (TRACER_MIN_BRANCH_PROBABILITY);
+ probability_cutoff = REG_BR_PROB_BASE / 100 * probability_cutoff;
/* Taking care of this degenerate case makes the rest of
this code simpler. */
*************** schedule_ebbs (dump_file)
*** 313,329 ****
break;
if (! e)
break;
! if (GET_CODE (tail) == JUMP_INSN)
! {
! rtx x = find_reg_note (tail, REG_BR_PROB, 0);
! if (x)
! {
! int pred_val = INTVAL (XEXP (x, 0));
! if (pred_val > REG_BR_PROB_BASE / 2)
! break;
! }
! }
!
bb = bb->next_bb;
}
--- 462,469 ----
break;
if (! e)
break;
! if (e->probability < probability_cutoff)
! break;
bb = bb->next_bb;
}
*************** schedule_ebbs (dump_file)
*** 341,351 ****
break;
}
! schedule_ebb (head, tail);
}
! /* It doesn't make much sense to try and update life information here - we
! probably messed up even the flow graph. */
/* Reposition the prologue and epilogue notes in case we moved the
prologue/epilogue insns. */
--- 481,491 ----
break;
}
! bb = schedule_ebb (head, tail);
}
! /* Updating life info can be done by local propagation over the modified
! superblocks. */
/* Reposition the prologue and epilogue notes in case we moved the
prologue/epilogue insns. */
*************** schedule_ebbs (dump_file)
*** 356,359 ****
--- 496,503 ----
rm_redundant_line_notes ();
sched_finish ();
+
+ #ifdef ENABLE_CHECKING
+ verify_flow_info ();
+ #endif
}
diff -Nrc3p gcc.orig/toplev.c gcc/toplev.c
*** gcc.orig/toplev.c Sun Dec 22 17:57:35 2002
--- gcc/toplev.c Sun Dec 22 20:06:17 2002
*************** int flag_pedantic_errors = 0;
*** 745,750 ****
--- 747,754 ----
global_alloc. */
int flag_schedule_insns = 0;
+ int flag_superblock_scheduling = 0;
+ int flag_trace_scheduling = 0;
int flag_schedule_insns_after_reload = 0;
/* The following flags have effect only for scheduling before register
*************** static const lang_independent_options f_
*** 1069,1074 ****
--- 1073,1082 ----
N_("Delete useless null pointer checks") },
{"schedule-insns", &flag_schedule_insns, 1,
N_("Reschedule instructions before register allocation") },
+ {"schedule-superblocks", &flag_superblock_scheduling, 1,
+ N_("When scheduling, do superblock sheduling") },
+ {"schedule-traces", &flag_trace_scheduling, 1,
+ N_("When scheduling, do trace sheduling") },
{"schedule-insns2", &flag_schedule_insns_after_reload, 1,
N_("Reschedule instructions after register allocation") },
{"sched-interblock",&flag_schedule_interblock, 1,
*************** rest_of_compilation (decl)
*** 3375,3381 ****
{
life_analysis (insns, rtl_dump_file, PROP_FINAL);
cleanup_cfg (CLEANUP_EXPENSIVE | CLEANUP_UPDATE_LIFE
! | (flag_crossjumping ? CLEANUP_CROSSJUMP : 0));
/* This is kind of a heuristic. We need to run combine_stack_adjustments
even for machines with possibly nonzero RETURN_POPS_ARGS
--- 3383,3390 ----
{
life_analysis (insns, rtl_dump_file, PROP_FINAL);
cleanup_cfg (CLEANUP_EXPENSIVE | CLEANUP_UPDATE_LIFE
! | (flag_crossjumping && !flag_trace_scheduling
! ? CLEANUP_CROSSJUMP : 0));
/* This is kind of a heuristic. We need to run combine_stack_adjustments
even for machines with possibly nonzero RETURN_POPS_ARGS
*************** rest_of_compilation (decl)
*** 3447,3455 ****
split_all_insns (1);
! schedule_insns (rtl_dump_file);
! close_dump_file (DFI_sched2, print_rtl_with_bb, insns);
timevar_pop (TV_SCHED2);
ggc_collect ();
--- 3456,3474 ----
split_all_insns (1);
! if (flag_superblock_scheduling)
! {
! schedule_ebbs (rtl_dump_file);
! /* No liveness updating code yet, but it should be easy to do */
! count_or_remove_death_notes (NULL, 1);
! life_analysis (get_insns (), rtl_dump_file, PROP_FINAL);
! cleanup_cfg (CLEANUP_EXPENSIVE | CLEANUP_UPDATE_LIFE
! | (flag_crossjumping ? CLEANUP_CROSSJUMP : 0));
! }
! else
! schedule_insns (rtl_dump_file);
! close_dump_file (DFI_sched2, print_rtl_with_bb, get_insns ());
timevar_pop (TV_SCHED2);
ggc_collect ();
*************** parse_options_and_default_flags (argc, a
*** 4846,4851 ****
--- 4865,4871 ----
#ifdef INSN_SCHEDULING
flag_schedule_insns = 1;
flag_schedule_insns_after_reload = 1;
+ flag_superblock_scheduling = 1;
#endif
flag_regmove = 1;
flag_strict_aliasing = 1;
*************** parse_options_and_default_flags (argc, a
*** 4858,4863 ****
--- 4878,4886 ----
{
flag_inline_functions = 1;
flag_rename_registers = 1;
+ #ifdef INSN_SCHEDULING
+ flag_trace_scheduling = 1;
+ #endif
}
if (optimize < 2 || optimize_size)
*************** process_options ()
*** 5037,5042 ****
--- 5060,5069 ----
flag_asynchronous_unwind_tables = 1;
if (flag_asynchronous_unwind_tables)
flag_unwind_tables = 1;
+ if (flag_trace_scheduling)
+ flag_tracer = flag_superblock_scheduling = 1;
+ if (flag_superblock_scheduling)
+ flag_schedule_insns_after_reload = 1;
/* Warn about options that are not supported on this machine. */
#ifndef INSN_SCHEDULING