This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
[RFC] Run fast RTL DCE pass at -O0
- From: Eric Botcazou <ebotcazou at adacore dot com>
- To: gcc-patches at gcc dot gnu dot org
- Date: Sat, 18 Apr 2009 17:22:40 +0200
- Subject: [RFC] Run fast RTL DCE pass at -O0
Hi,
flow.c used to run (integrated) DCE at -O0, this was ditched with the switch
to full DF. As a result, we measured significant code size increases at -O0,
for example 10 MB on a big Ada application (85 MB).
This patch adds a fast RTL DCE pass at -O0; in our 4.3-based compiler, this
entirely fixed the code size regression with no compilation time increases.
Tested (GCC, GDB) on i586-suse-linux. Thoughts?
2009-04-18 Eric Botcazou <ebotcazou@adacore.com>
* tree-pass.h (pass_fast_rtl_dce_no_opt): Declare.
* dce.c (insn_is_nop): New function.
(delete_unmarked_insns): Do not delete NOPs at -O0.
(gate_fast_dce_no_opt): New gate function.
(pass_fast_rtl_dce_no_opt): New pass.
* passes.c (init_optimization_passes): Register it.
--
Eric Botcazou
Index: tree-pass.h
===================================================================
--- tree-pass.h (revision 146259)
+++ tree-pass.h (working copy)
@@ -431,6 +431,7 @@ extern struct rtl_opt_pass pass_jump2;
extern struct rtl_opt_pass pass_lower_subreg;
extern struct rtl_opt_pass pass_cse;
extern struct rtl_opt_pass pass_fast_rtl_dce;
+extern struct rtl_opt_pass pass_fast_rtl_dce_no_opt;
extern struct rtl_opt_pass pass_ud_rtl_dce;
extern struct rtl_opt_pass pass_rtl_dce;
extern struct rtl_opt_pass pass_rtl_dse1;
Index: passes.c
===================================================================
--- passes.c (revision 146259)
+++ passes.c (working copy)
@@ -766,6 +766,7 @@ init_optimization_passes (void)
NEXT_PASS (pass_split_all_insns);
NEXT_PASS (pass_lower_subreg2);
NEXT_PASS (pass_df_initialize_no_opt);
+ NEXT_PASS (pass_fast_rtl_dce_no_opt);
NEXT_PASS (pass_stack_ptr_mod);
NEXT_PASS (pass_mode_switching);
NEXT_PASS (pass_see);
Index: dce.c
===================================================================
--- dce.c (revision 146259)
+++ dce.c (working copy)
@@ -501,6 +501,36 @@ delete_corresponding_reg_eq_notes (rtx i
}
+/* Return true if INSN contains only a formal nop pattern.
+
+ NOTE: This function is coded for speed and thus not meant to be
+ bullet-proof. Its failure to recognize a nop pattern shouldn't
+ be allowed to generate wrong code. */
+
+static bool
+insn_is_nop (const_rtx insn)
+{
+ rtx pat;
+
+ if (!INSN_P (insn))
+ return false;
+
+ pat = PATTERN (insn);
+
+ /* This is the pattern used by almost all back-ends. */
+ if (pat == const0_rtx)
+ return true;
+
+ /* This is the pattern used by the SPU back-end. */
+ if (GET_CODE (pat) == UNSPEC_VOLATILE
+ && XVECLEN (pat, 0) == 1
+ && XVECEXP (pat, 0, 0) == const0_rtx)
+ return true;
+
+ return false;
+}
+
+
/* Delete every instruction that hasn't been marked. */
static void
@@ -522,6 +552,10 @@ delete_unmarked_insns (void)
else if (marked_insn_p (insn))
continue;
+ /* Do not delete NOPs created to carry source line info at -O0. */
+ if (!optimize && insn_is_nop (insn))
+ continue;
+
/* Beware that reaching a dbg counter limit here can result
in miscompiled file. This occurs when a group of insns
must be deleted together, typically because the kept insn
@@ -1115,8 +1149,7 @@ run_fast_dce (void)
static bool
gate_fast_dce (void)
{
- return optimize > 0 && flag_dce
- && dbg_cnt (dce_fast);
+ return optimize > 0 && flag_dce && dbg_cnt (dce_fast);
}
struct rtl_opt_pass pass_fast_rtl_dce =
@@ -1140,6 +1173,35 @@ struct rtl_opt_pass pass_fast_rtl_dce =
}
};
+
+static bool
+gate_fast_dce_no_opt (void)
+{
+ return optimize == 0 && flag_dce && dbg_cnt (dce_fast);
+}
+
+struct rtl_opt_pass pass_fast_rtl_dce_no_opt =
+{
+ {
+ RTL_PASS,
+ "dce", /* name */
+ gate_fast_dce_no_opt, /* gate */
+ rest_of_handle_fast_dce, /* execute */
+ NULL, /* sub */
+ NULL, /* next */
+ 0, /* static_pass_number */
+ TV_DCE, /* tv_id */
+ 0, /* properties_required */
+ 0, /* properties_provided */
+ 0, /* properties_destroyed */
+ 0, /* todo_flags_start */
+ TODO_dump_func |
+ TODO_df_finish | TODO_verify_rtl_sharing |
+ TODO_ggc_collect /* todo_flags_finish */
+ }
+};
+
+
struct rtl_opt_pass pass_fast_rtl_byte_dce =
{
{