This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
patch/RFC: PR optimization/8423
- From: Zack Weinberg <zack at codesourcery dot com>
- To: gcc-patches at gcc dot gnu dot org
- Date: Tue, 5 Nov 2002 13:55:00 -0800
- Subject: patch/RFC: PR optimization/8423
PR optimization/8423 reports a deficiency in optimizing
__builtin_constant_p. To wit, if you have
#define btest(x) __builtin_constant_p(x) ? "1" : "0"
...
{
int size = sizeof (int);
foo (btest (size));
foo (btest (size));
}
the first call to foo() will pass it "1", but the second will pass "0".
Initial RTL generation produces code of the form
(set (reg A) (const_int 4)) ; sizeof(int)
(set (reg B) (constant_p_rtx (reg A)))
; if-then-else sequence here based on value of reg B, setting up foo's
; argument
(call "foo")
(set (reg C) (constant_p_rtx (reg A)))
; another if-then-else sequence
(call "foo")
Presently, the only pass that does anything with CONSTANT_P_RTX is
local CSE. fold_rtx() can figure out that (reg A) == (const_int 4)
and therefore (constant_p_rtx (reg A)) -> (const_int 1) when it is
processing the insn that sets reg B. However, since local CSE -- by
design -- forgets all register values at extended basic block
boundaries, it cannot figure out that reg A is constant when it is
processing the insn that sets reg C. And fold_rtx always folds out
CONSTANT_P_RTX, whether or not it knows the operand is constant, so no
later pass gets a crack at it.
If I change fold_rtx so that it does not eliminate CONSTANT_P_RTX if
it can't prove it constant, then the insn
(set (reg C) (constant_p_rtx (reg A)))
survives unchanged to the GCSE pass, which _can_ figure out that reg A
is constant at that point. It replaces this with
(set (reg C) (constant_p_rtx (const_int 4)))
Then the post-GCSE CSE pass folds that down to
(set (reg C) (const_int 1))
and the testcase is compiled correctly (i.e. both calls to btest have
the same result). However, the CFG is left a mess which does not get
properly cleaned up until combine. Also, now we don't ever reduce
(constant_p_rtx (value)), where value is _not_ constant, to 0, which
causes problems later on.
Staring at toplev.c for awhile leads me to believe that the right fix
for this is to run GCSE _before_ CSE, and make GCSE aware of
CONSTANT_P_RTX. The appended patch does just that. Since this is a
drastic change, I will also check what overall effect this has on code
quality by examining bootstrap times and the size of the generated
compiler.
Comments?
zw
* gcse.c (try_replace_reg, cprop_jump): Call simplify_rtx on
the replacement before doing anything else.
* simplify-rtx.c (simplify_replace_rtx, simplify_rtx):
Handle CONSTANT_P_RTX.
* toplev.c (enum dump_file_index, dump_file array): GCSE
happens before CSE.
(rest_of_compilation): Move GCSE pass before CSE pass.
Reorganize code to match.
===================================================================
Index: gcse.c
--- gcse.c 21 Oct 2002 17:51:57 -0000 1.218.4.4
+++ gcse.c 5 Nov 2002 21:53:05 -0000
@@ -3922,6 +3922,11 @@ try_replace_reg (from, to, insn)
int success = 0;
rtx set = single_set (insn);
+ /* Always try to simplify TO a little on its own. */
+ rtx simpler_to = simplify_rtx (to);
+ if (simpler_to)
+ to = simpler_to;
+
validate_replace_src_group (from, to, insn);
if (num_changes_pending () && apply_change_group ())
success = 1;
@@ -4045,6 +4050,11 @@ cprop_jump (bb, setcc, jump, from, src)
{
rtx new, new_set;
rtx set = pc_set (jump);
+
+ /* Always try to simplify SRC a little on its own. */
+ rtx simpler_src = simplify_rtx (src);
+ if (simpler_src)
+ src = simpler_src;
/* First substitute in the INSN condition as the SET_SRC of the JUMP,
then substitute that given values in this expanded JUMP. */
===================================================================
Index: simplify-rtx.c
--- simplify-rtx.c 5 Nov 2002 19:11:55 -0000 1.118.4.8
+++ simplify-rtx.c 5 Nov 2002 21:53:05 -0000
@@ -289,7 +289,7 @@ simplify_replace_rtx (x, old, new)
}
case 'x':
- /* The only case we try to handle is a SUBREG. */
+ /* We know about SUBREG and CONSTANT_P_RTX. */
if (code == SUBREG)
{
rtx exp;
@@ -301,6 +301,18 @@ simplify_replace_rtx (x, old, new)
if (exp)
x = exp;
}
+ else if (code == CONSTANT_P_RTX)
+ {
+ /* Don't eliminate constant_p_rtx if it would evaluate to 0;
+ we may be able to figure it out later. */
+ rtx exp;
+ do
+ exp = XEXP (x, 0);
+ while (GET_CODE (exp) == CONSTANT_P_RTX);
+
+ if (CONSTANT_P (exp))
+ x = const1_rtx;
+ }
return x;
case 'o':
@@ -2729,11 +2741,23 @@ simplify_rtx (x)
: GET_MODE (XEXP (x, 1))),
XEXP (x, 0), XEXP (x, 1));
case 'x':
- /* The only case we try to handle is a SUBREG. */
+ /* We know about SUBREG and CONSTANT_P_RTX. */
if (code == SUBREG)
return simplify_gen_subreg (mode, SUBREG_REG (x),
GET_MODE (SUBREG_REG (x)),
SUBREG_BYTE (x));
+ else if (code == CONSTANT_P_RTX)
+ {
+ /* Don't eliminate constant_p_rtx if it would evaluate to 0;
+ we may be able to figure it out later. */
+ rtx exp;
+ do
+ exp = XEXP (x, 0);
+ while (GET_CODE (exp) == CONSTANT_P_RTX);
+
+ if (CONSTANT_P (exp))
+ return const1_rtx;
+ }
return NULL;
default:
return NULL;
===================================================================
Index: toplev.c
--- toplev.c 5 Nov 2002 19:11:55 -0000 1.668.4.10
+++ toplev.c 5 Nov 2002 21:53:07 -0000
@@ -226,9 +226,9 @@ enum dump_file_index
DFI_ssa_dce,
DFI_ussa,
DFI_null,
+ DFI_gcse,
DFI_cse,
DFI_addressof,
- DFI_gcse,
DFI_loop,
DFI_cfg,
DFI_bp,
@@ -275,9 +275,9 @@ static struct dump_file_info dump_file[D
{ "ssadce", 'X', 1, 0, 0 },
{ "ussa", 'e', 1, 0, 0 }, /* Yes, duplicate enable switch. */
{ "null", 'u', 0, 0, 0 },
+ { "gcse", 'G', 1, 0, 0 },
{ "cse", 's', 0, 0, 0 },
{ "addressof", 'F', 0, 0, 0 },
- { "gcse", 'G', 1, 0, 0 },
{ "loop", 'L', 1, 0, 0 },
{ "ce1", 'C', 1, 0, 0 },
{ "cfg", 'f', 1, 0, 0 },
@@ -2795,38 +2795,72 @@ rest_of_compilation (decl)
ggc_collect ();
- /* Perform common subexpression elimination.
- Nonzero value from `cse_main' means that jumps were simplified
- and some code may now be unreachable, so do
- jump optimization again. */
+ /* Perform global common subexpression elimination. */
+ if (optimize > 0 && flag_gcse)
+ {
+ timevar_push (TV_GCSE);
+ open_dump_file (DFI_gcse, decl);
+
+ tem = gcse_main (insns, rtl_dump_file);
+
+ timevar_push (TV_JUMP);
+ purge_all_dead_edges (0);
+ rebuild_jump_labels (insns);
+ delete_trivially_dead_insns (insns, max_reg_num ());
+
+ if (tem)
+ cleanup_cfg (CLEANUP_EXPENSIVE | CLEANUP_PRE_LOOP);
+ timevar_pop (TV_JUMP);
+
+ close_dump_file (DFI_gcse, print_rtl_with_bb, insns);
+
+ ggc_collect ();
+#ifdef ENABLE_CHECKING
+ verify_flow_info ();
+#endif
+ timevar_pop (TV_GCSE);
+ }
+
+ /* Perform local common subexpression elimination. If expensive
+ optimizations are enabled, do this iteratively until it stops
+ modifying the CFG. */
if (optimize > 0)
{
+ timevar_push (TV_CSE);
open_dump_file (DFI_cse, decl);
if (rtl_dump_file)
dump_flow_info (rtl_dump_file);
- timevar_push (TV_CSE);
- reg_scan (insns, max_reg_num (), 1);
+ do
+ {
+ reg_scan (insns, max_reg_num (), 1);
+ tem = cse_main (insns, max_reg_num (), 0, rtl_dump_file);
- tem = cse_main (insns, max_reg_num (), 0, rtl_dump_file);
- if (tem)
- rebuild_jump_labels (insns);
- purge_all_dead_edges (0);
+ timevar_push (TV_JUMP);
+ if (tem)
+ rebuild_jump_labels (insns);
+ purge_all_dead_edges (0);
- delete_trivially_dead_insns (insns, max_reg_num ());
+ delete_trivially_dead_insns (insns, max_reg_num ());
- /* If we are not running more CSE passes, then we are no longer
- expecting CSE to be run. But always rerun it in a cheap mode. */
- cse_not_expected = !flag_rerun_cse_after_loop && !flag_gcse;
+ /* If we are not running more CSE passes, then we are no
+ longer expecting CSE to be run. */
+ cse_not_expected =
+ (flag_expensive_optimizations ? !tem : 1)
+ && !flag_rerun_cse_after_loop;
- if (tem || optimize > 1)
- cleanup_cfg (CLEANUP_EXPENSIVE | CLEANUP_PRE_LOOP);
- /* Try to identify useless null pointer tests and delete them. */
+ if (tem || optimize > 1)
+ cleanup_cfg (CLEANUP_EXPENSIVE | CLEANUP_PRE_LOOP);
+ timevar_pop (TV_JUMP);
+ }
+ while (flag_expensive_optimizations && tem);
+
+ /* Again try to identify useless null pointer tests and delete
+ them. */
if (flag_delete_null_pointer_checks)
{
timevar_push (TV_JUMP);
-
if (delete_null_pointer_checks (insns))
cleanup_cfg (CLEANUP_EXPENSIVE | CLEANUP_PRE_LOOP);
timevar_pop (TV_JUMP);
@@ -2836,8 +2870,13 @@ rest_of_compilation (decl)
removed a bunch more instructions. */
renumber_insns (rtl_dump_file);
- timevar_pop (TV_CSE);
close_dump_file (DFI_cse, print_rtl_with_bb, insns);
+
+ ggc_collect ();
+#ifdef ENABLE_CHECKING
+ verify_flow_info ();
+#endif
+ timevar_pop (TV_CSE);
}
open_dump_file (DFI_addressof, decl);
@@ -2850,69 +2889,6 @@ rest_of_compilation (decl)
close_dump_file (DFI_addressof, print_rtl, insns);
ggc_collect ();
-
- /* Perform global cse. */
-
- if (optimize > 0 && flag_gcse)
- {
- int save_csb, save_cfj;
- int tem2 = 0;
-
- timevar_push (TV_GCSE);
- open_dump_file (DFI_gcse, decl);
-
- tem = gcse_main (insns, rtl_dump_file);
- rebuild_jump_labels (insns);
- delete_trivially_dead_insns (insns, max_reg_num ());
-
- save_csb = flag_cse_skip_blocks;
- save_cfj = flag_cse_follow_jumps;
- flag_cse_skip_blocks = flag_cse_follow_jumps = 0;
-
- /* If -fexpensive-optimizations, re-run CSE to clean up things done
- by gcse. */
- if (flag_expensive_optimizations)
- {
- timevar_push (TV_CSE);
- reg_scan (insns, max_reg_num (), 1);
- tem2 = cse_main (insns, max_reg_num (), 0, rtl_dump_file);
- purge_all_dead_edges (0);
- delete_trivially_dead_insns (insns, max_reg_num ());
- timevar_pop (TV_CSE);
- cse_not_expected = !flag_rerun_cse_after_loop;
- }
-
- /* If gcse or cse altered any jumps, rerun jump optimizations to clean
- things up. Then possibly re-run CSE again. */
- while (tem || tem2)
- {
- tem = tem2 = 0;
- timevar_push (TV_JUMP);
- rebuild_jump_labels (insns);
- cleanup_cfg (CLEANUP_EXPENSIVE | CLEANUP_PRE_LOOP);
- timevar_pop (TV_JUMP);
-
- if (flag_expensive_optimizations)
- {
- timevar_push (TV_CSE);
- reg_scan (insns, max_reg_num (), 1);
- tem2 = cse_main (insns, max_reg_num (), 0, rtl_dump_file);
- purge_all_dead_edges (0);
- delete_trivially_dead_insns (insns, max_reg_num ());
- timevar_pop (TV_CSE);
- }
- }
-
- close_dump_file (DFI_gcse, print_rtl_with_bb, insns);
- timevar_pop (TV_GCSE);
-
- ggc_collect ();
- flag_cse_skip_blocks = save_csb;
- flag_cse_follow_jumps = save_cfj;
-#ifdef ENABLE_CHECKING
- verify_flow_info ();
-#endif
- }
/* Move constant computations out of loops. */