This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
[dataflow] PATCH to make small performance improvement and more ia64 fixes.
- From: Kenneth Zadeck <zadeck at naturalbridge dot com>
- To: GCC Patches <gcc-patches at gcc dot gnu dot org>, "Berlin, Daniel" <dberlin at dberlin dot org>, "Zadeck, Kenneth" <zadeck at naturalbridge dot com>
- Date: Fri, 04 Aug 2006 12:39:24 -0400
- Subject: [dataflow] PATCH to make small performance improvement and more ia64 fixes.
This patch removes a large number of calls to either find the postorder
numbering or delete unreachable blocks. This saves about .5% in compile
time.
This patch also fixes more ia-64 problems.
This has been bootstrapped on x86_64-unknown-linux-gnu. The ia-64 is
now awaiting a merge with the mainline. They both fail bootstrap in the
same way.
Kenny
2006-08-04 Kenneth Zadeck <zadeck@naturalbridge.com>
* tree-pass.h (pass_rtl_dse): Split into pass_rtl_dse1,
pass_rtl_dse2, pass_rtl_dse3.
* passes.c (init_optimization_passes): Ditto.
* timevar.def (TV_DSE): Split into TV_DSE1, TV_DSE2, and TV_DSE3.
(TV_THREAD_PROLOGUE_AND_EPILOGUE): Made text shorter to improve
readability.
* df.core.c (df_init, df_finish1, df_analyze_problem, df_analyze):
Made postorder and instance variable of df.
(df_finish1): Made tolerant of being passed NULL instance.
(df_get_n_blocks, df_get_postorder): New functions.
* cfganal (post_order_compute): Added delete_unreachable
parameter and code to delete unreachable blocks.
* local_alloc (rest_of_handle_local_alloc): Removed unnecessary
call to delete_unreachable_blocks.
* df.h (postorder, n_blocks): New instance variables.
(df_get_n_blocks, df_get_postorder): New functions.
* sched-rgn.c (extend_rgns): Added extra parameter to
post_order_compute.
* basic-block.h (post_order_compute): Ditto.
* dce.c (fast_dce, init_rs_dflow): Now uses postorder and n_blocks
from df.
(pass_rtl_dse): Split into pass_rtl_dse1,
pass_rtl_dse2, pass_rtl_dse3.
* sched-ebb.c (schedule-ebbs): Added return value.
* haifa-sched.c (add_block): Made df parameter unused and fixed
incorrect assert.
Index: tree-pass.h
===================================================================
--- tree-pass.h (revision 115812)
+++ tree-pass.h (working copy)
@@ -341,7 +341,9 @@ extern struct tree_opt_pass pass_jump2;
extern struct tree_opt_pass pass_cse;
extern struct tree_opt_pass pass_fast_rtl_dce;
extern struct tree_opt_pass pass_rtl_dce;
-extern struct tree_opt_pass pass_rtl_dse;
+extern struct tree_opt_pass pass_rtl_dse1;
+extern struct tree_opt_pass pass_rtl_dse2;
+extern struct tree_opt_pass pass_rtl_dse3;
extern struct tree_opt_pass pass_gcse;
extern struct tree_opt_pass pass_jump_bypass;
extern struct tree_opt_pass pass_profiling;
Index: passes.c
===================================================================
--- passes.c (revision 115812)
+++ passes.c (working copy)
@@ -648,7 +648,7 @@ init_optimization_passes (void)
NEXT_PASS (pass_loop2);
NEXT_PASS (pass_web);
NEXT_PASS (pass_cse2);
- NEXT_PASS (pass_rtl_dse);
+ NEXT_PASS (pass_rtl_dse1);
NEXT_PASS (pass_rtl_fwprop_addr);
NEXT_PASS (pass_regclass_init);
NEXT_PASS (pass_subregs_of_mode_init);
@@ -657,7 +657,7 @@ init_optimization_passes (void)
NEXT_PASS (pass_initialize_subregs);
NEXT_PASS (pass_no_new_pseudos);
NEXT_PASS (pass_combine);
- NEXT_PASS (pass_rtl_dse);
+ NEXT_PASS (pass_rtl_dse2);
NEXT_PASS (pass_if_after_combine);
NEXT_PASS (pass_fast_rtl_dce);
NEXT_PASS (pass_partition_blocks);
@@ -679,7 +679,7 @@ init_optimization_passes (void)
NEXT_PASS (pass_split_after_reload);
NEXT_PASS (pass_branch_target_load_optimize1);
NEXT_PASS (pass_thread_prologue_and_epilogue);
- NEXT_PASS (pass_rtl_dse);
+ NEXT_PASS (pass_rtl_dse3);
NEXT_PASS (pass_rtl_seqabstr);
NEXT_PASS (pass_stack_adjustments);
NEXT_PASS (pass_peephole2);
Index: timevar.def
===================================================================
--- timevar.def (revision 115812)
+++ timevar.def (working copy)
@@ -131,7 +131,9 @@ DEFTIMEVAR (TV_JUMP , "
DEFTIMEVAR (TV_FWPROP , "forward prop")
DEFTIMEVAR (TV_CSE , "CSE")
DEFTIMEVAR (TV_DCE , "dead code elimination")
-DEFTIMEVAR (TV_DSE , "dead store elimination")
+DEFTIMEVAR (TV_DSE1 , "dead store elim1")
+DEFTIMEVAR (TV_DSE2 , "dead store elim2")
+DEFTIMEVAR (TV_DSE3 , "dead store elim3")
DEFTIMEVAR (TV_LOOP , "loop analysis")
DEFTIMEVAR (TV_GCSE , "global CSE")
DEFTIMEVAR (TV_CPROP1 , "CPROP 1")
@@ -158,7 +160,7 @@ DEFTIMEVAR (TV_GLOBAL_ALLOC , "
DEFTIMEVAR (TV_RELOAD_CSE_REGS , "reload CSE regs")
DEFTIMEVAR (TV_SEQABSTR , "sequence abstraction")
DEFTIMEVAR (TV_GCSE_AFTER_RELOAD , "load CSE after reload")
-DEFTIMEVAR (TV_THREAD_PROLOGUE_AND_EPILOGUE, "thread prologue and epilogue")
+DEFTIMEVAR (TV_THREAD_PROLOGUE_AND_EPILOGUE, "thread pro- & epilogue")
DEFTIMEVAR (TV_IFCVT2 , "if-conversion 2")
DEFTIMEVAR (TV_PEEPHOLE2 , "peephole 2")
DEFTIMEVAR (TV_RENAME_REGISTERS , "rename registers")
Index: df-core.c
===================================================================
--- df-core.c (revision 115812)
+++ df-core.c (working copy)
@@ -330,6 +330,7 @@ df_init (int flags)
gcc_assert (!df_current_instance);
df_current_instance = df;
+ df->postorder = NULL;
return df;
}
@@ -508,12 +509,17 @@ df_finish1 (struct df *df)
{
int i;
+ if (!df)
+ return;
+
for (i = 0; i < df->num_problems_defined; i++)
{
struct dataflow *dflow = df->problems_in_order[i];
dflow->problem->free_fun (df->problems_in_order[i]);
}
+ if (df->postorder)
+ free (df->postorder);
free (df);
df_current_instance = NULL;
}
@@ -805,19 +811,17 @@ df_analyze_problem (struct dataflow *dfl
void
df_analyze (struct df *df)
{
- int *postorder = XNEWVEC (int, last_basic_block);
bitmap current_all_blocks = BITMAP_ALLOC (NULL);
- int n_blocks;
int i;
bool everything;
- n_blocks = post_order_compute (postorder, true);
+ if (df->postorder)
+ free (df->postorder);
+ df->postorder = XNEWVEC (int, last_basic_block);
+ df->n_blocks = post_order_compute (df->postorder, true, true);
- if (n_blocks != n_basic_blocks)
- delete_unreachable_blocks ();
-
- for (i = 0; i < n_blocks; i++)
- bitmap_set_bit (current_all_blocks, postorder[i]);
+ for (i = 0; i < df->n_blocks; i++)
+ bitmap_set_bit (current_all_blocks, df->postorder[i]);
/* No one called df_rescan_blocks, so do it. */
if (!df->blocks_to_scan)
@@ -831,7 +835,8 @@ df_analyze (struct df *df)
{
everything = false;
bitmap_and_into (df->blocks_to_analyze, current_all_blocks);
- n_blocks = df_prune_to_subcfg (postorder, n_blocks, df->blocks_to_analyze);
+ df->n_blocks = df_prune_to_subcfg (df->postorder,
+ df->n_blocks, df->blocks_to_analyze);
BITMAP_FREE (current_all_blocks);
}
else
@@ -846,7 +851,7 @@ df_analyze (struct df *df)
df_analyze_problem (df->problems_in_order[i],
df->blocks_to_analyze, df->blocks_to_analyze,
df->blocks_to_scan,
- postorder, n_blocks, false);
+ df->postorder, df->n_blocks, false);
if (everything)
{
@@ -856,7 +861,25 @@ df_analyze (struct df *df)
BITMAP_FREE (df->blocks_to_scan);
df->blocks_to_scan = NULL;
- free (postorder);
+}
+
+
+/* Return the number of basic blocks from the last call to df_analyze. */
+
+int
+df_get_n_blocks (struct df *df)
+{
+ gcc_assert (df->postorder);
+ return df->n_blocks;
+}
+
+
+/* Return a pointer to the array of basic blocks from the last call to df_analyze. */
+int *
+df_get_postorder (struct df *df)
+{
+ gcc_assert (df->postorder);
+ return df->postorder;
}
static struct df_problem user_problem;
Index: cfganal.c
===================================================================
--- cfganal.c (revision 115812)
+++ cfganal.c (working copy)
@@ -645,16 +645,20 @@ connect_infinite_loops_to_exit (void)
return;
}
-/* Compute reverse top sort order.
- This is computing a post order numbering of the graph. */
+/* Compute reverse top sort order. This is computing a post order
+ numbering of the graph. If INCLUDE_ENTRY_EXIT is true, then then
+ ENTRY_BLOCK and EXIT_BLOCK are included. If DELETE_UNREACHABLE is
+ true, unreachable blocks are deleted. */
int
-post_order_compute (int *post_order, bool include_entry_exit)
+post_order_compute (int *post_order, bool include_entry_exit,
+ bool delete_unreachable)
{
edge_iterator *stack;
int sp;
int post_order_num = 0;
sbitmap visited;
+ int count;
if (include_entry_exit)
post_order[post_order_num++] = EXIT_BLOCK;
@@ -699,7 +703,7 @@ post_order_compute (int *post_order, boo
else
{
if (ei_one_before_end_p (ei) && src != ENTRY_BLOCK_PTR)
- post_order[post_order_num++] = src->index;
+ post_order[post_order_num++] = src->index;
if (!ei_one_before_end_p (ei))
ei_next (&stack[sp - 1]);
@@ -709,7 +713,29 @@ post_order_compute (int *post_order, boo
}
if (include_entry_exit)
- post_order[post_order_num++] = ENTRY_BLOCK;
+ {
+ post_order[post_order_num++] = ENTRY_BLOCK;
+ count = post_order_num;
+ }
+ else
+ count = post_order_num + 2;
+
+ /* Delete the unreachable blocks if some were found and we are
+ supposed to do it. */
+ if (delete_unreachable && (count != n_basic_blocks))
+ {
+ basic_block b;
+ basic_block next_bb;
+ for (b = ENTRY_BLOCK_PTR->next_bb; b != EXIT_BLOCK_PTR; b = next_bb)
+ {
+ next_bb = b->next_bb;
+
+ if (!(TEST_BIT (visited, b->index)))
+ delete_basic_block (b);
+ }
+
+ tidy_fallthru_edges ();
+ }
free (stack);
sbitmap_free (visited);
Index: local-alloc.c
===================================================================
--- local-alloc.c (revision 115812)
+++ local-alloc.c (working copy)
@@ -2529,8 +2529,6 @@ rest_of_handle_local_alloc (void)
rebuild_jump_labels (get_insns ());
purge_all_dead_edges ();
- delete_unreachable_blocks ();
-
timevar_pop (TV_JUMP);
}
Index: df.h
===================================================================
--- df.h (revision 115812)
+++ df.h (working copy)
@@ -400,6 +400,8 @@ struct df
bitmap eh_block_artificial_uses;
bitmap entry_block_defs; /* The set of hardware registers live on entry to the function. */
bitmap exit_block_uses; /* The set of hardware registers used in exit block. */
+ int *postorder; /* The current set of basic blocks in postorder. */
+ int n_blocks; /* The number of blocks in postorder. */
};
#define DF_SCAN_BB_INFO(DF, BB) (df_scan_get_bb_info((DF)->problems_by_index[DF_SCAN],(BB)->index))
@@ -632,6 +634,8 @@ extern void df_delete_basic_block (struc
extern void df_finish1 (struct df *);
extern void df_analyze_problem (struct dataflow *, bitmap, bitmap, bitmap, int *, int, bool);
extern void df_analyze (struct df *);
+extern int df_get_n_blocks (struct df *);
+extern int *df_get_postorder (struct df *);
extern void df_simple_iterative_dataflow (enum df_flow_dir, df_init_function,
df_confluence_function_0, df_confluence_function_n,
df_transfer_function, bitmap, int *, int);
Index: sched-rgn.c
===================================================================
--- sched-rgn.c (revision 115812)
+++ sched-rgn.c (working copy)
@@ -1030,7 +1030,7 @@ extend_rgns (int *degree, int *idxp, sbi
max_hdr = xmalloc (last_basic_block * sizeof (*max_hdr));
order = xmalloc (last_basic_block * sizeof (*order));
- post_order_compute (order, false);
+ post_order_compute (order, false, false);
for (i = nblocks - 1; i >= 0; i--)
{
Index: basic-block.h
===================================================================
--- basic-block.h (revision 115812)
+++ basic-block.h (working copy)
@@ -517,7 +517,7 @@ extern edge redirect_edge_succ_nodup (ed
extern void redirect_edge_pred (edge, basic_block);
extern basic_block create_basic_block_structure (rtx, rtx, rtx, basic_block);
extern void clear_bb_flags (void);
-extern int post_order_compute (int *, bool);
+extern int post_order_compute (int *, bool, bool);
extern int pre_and_rev_post_order_compute (int *, int *, bool);
extern int dfs_enumerate_from (basic_block, int,
bool (*)(basic_block, void *),
Index: dce.c
===================================================================
--- dce.c (revision 115812)
+++ dce.c (working copy)
@@ -561,7 +561,8 @@ dce_process_block (basic_block bb, bool
static void
fast_dce (bool df_delete)
{
- int *postorder = xmalloc (sizeof (int) *last_basic_block);
+ int *postorder = df_get_postorder (dce_df);
+ int n_blocks = df_get_n_blocks (dce_df);
int i;
/* The set of blocks that have been seen on this iteration. */
bitmap processed = BITMAP_ALLOC (NULL);
@@ -574,15 +575,11 @@ fast_dce (bool df_delete)
bitmap all_blocks = BITMAP_ALLOC (NULL);
bool global_changed = true;
struct dataflow * dflow = dce_df->problems_by_index[DF_LR];
- int n_blocks;
+
int loop_count = 0;
prescan_insns_for_dce (true);
- n_blocks = post_order_compute (postorder, true);
- if (n_blocks != n_basic_blocks)
- delete_unreachable_blocks ();
-
for (i = 0; i < n_blocks; i++)
bitmap_set_bit (all_blocks, postorder[i]);
@@ -649,7 +646,6 @@ fast_dce (bool df_delete)
delete_unmarked_insns (df_delete);
- free (postorder);
BITMAP_FREE (processed);
BITMAP_FREE (changed);
BITMAP_FREE (redo_out);
@@ -968,8 +964,8 @@ init_rs_dflow (void)
out_vec = XNEWVEC (bitmap, last_basic_block);
gen_vec = XNEWVEC (bitmap, last_basic_block);
kill_vec = XNEWVEC (bitmap, last_basic_block);
- postorder = XNEWVEC (int, last_basic_block);
- n_blocks = post_order_compute (postorder, true);
+ n_blocks = df_get_n_blocks (dce_df);
+ postorder = df_get_postorder (dce_df);
iterating = BITMAP_ALLOC (NULL);
num_stores = VEC_length (store_info, stores.stores);
@@ -1013,7 +1009,6 @@ end_rs_dflow (void)
XDELETE (max_in_luid);
BITMAP_FREE (iterating);
- XDELETE (postorder);
XDELETE (kill_vec);
XDELETE (gen_vec);
XDELETE (out_vec);
@@ -1637,15 +1632,53 @@ gate_dse (void)
return optimize > 0 && flag_new_dce;
}
-struct tree_opt_pass pass_rtl_dse =
+struct tree_opt_pass pass_rtl_dse1 =
+{
+ "dse1", /* name */
+ gate_dse, /* gate */
+ rest_of_handle_dse, /* execute */
+ NULL, /* sub */
+ NULL, /* next */
+ 0, /* static_pass_number */
+ TV_DSE1, /* tv_id */
+ 0, /* properties_required */
+ 0, /* properties_provided */
+ 0, /* properties_destroyed */
+ 0, /* todo_flags_start */
+ TODO_dump_func |
+ TODO_df_finish |
+ TODO_ggc_collect, /* todo_flags_finish */
+ 'w' /* letter */
+};
+
+struct tree_opt_pass pass_rtl_dse2 =
+{
+ "dse2", /* name */
+ gate_dse, /* gate */
+ rest_of_handle_dse, /* execute */
+ NULL, /* sub */
+ NULL, /* next */
+ 0, /* static_pass_number */
+ TV_DSE2, /* tv_id */
+ 0, /* properties_required */
+ 0, /* properties_provided */
+ 0, /* properties_destroyed */
+ 0, /* todo_flags_start */
+ TODO_dump_func |
+ TODO_df_finish |
+ TODO_ggc_collect, /* todo_flags_finish */
+ 'w' /* letter */
+};
+
+struct tree_opt_pass pass_rtl_dse3 =
{
- "dse", /* name */
+ "dse3", /* name */
gate_dse, /* gate */
rest_of_handle_dse, /* execute */
NULL, /* sub */
NULL, /* next */
0, /* static_pass_number */
- TV_DSE, /* tv_id */
+ TV_DSE3, /* tv_id */
0, /* properties_required */
0, /* properties_provided */
0, /* properties_destroyed */
Index: sched-ebb.c
===================================================================
--- sched-ebb.c (revision 115861)
+++ sched-ebb.c (working copy)
@@ -554,7 +554,7 @@ schedule_ebbs (void)
/* Taking care of this degenerate case makes the rest of
this code simpler. */
if (n_basic_blocks == NUM_FIXED_BLOCKS)
- return;
+ return NULL;
/* We need current_sched_info in init_dependency_caches, which is
invoked via sched_init. */
Index: haifa-sched.c
===================================================================
--- haifa-sched.c (revision 115812)
+++ haifa-sched.c (working copy)
@@ -4271,13 +4271,10 @@ extend_bb (basic_block bb)
If EBB is EXIT_BLOCK_PTR, then BB is recovery block.
If EBB is NULL, then BB should be a new region. */
void
-add_block (struct df *df, basic_block bb, basic_block ebb)
+add_block (struct df *df ATTRIBUTE_UNUSED, basic_block bb, basic_block ebb)
{
- gcc_assert (current_sched_info->flags & DETACH_LIFE_INFO
- && DF_LIVE_IN (df, bb) == NULL
- && DF_LIVE_OUT (df, bb) == NULL);
-
- extend_bb (bb);
+ gcc_assert (current_sched_info->flags & DETACH_LIFE_INFO);
+ extend_bb (bb);
glat_start[bb->index] = 0;
glat_end[bb->index] = 0;