This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
[tcb] Merge PHI nodes -- 7.4% reduction of PHI nodes (Take 2)
- From: Kazu Hirata <kazu at cs dot umass dot edu>
- To: gcc-patches at gcc dot gnu dot org
- Cc: dnovillo at redhat dot com, dberlin at dberlin dot org, rakdver at atrey dot karlin dot mff dot cuni dot cz
- Date: Mon, 20 Sep 2004 16:05:56 -0400 (EDT)
- Subject: [tcb] Merge PHI nodes -- 7.4% reduction of PHI nodes (Take 2)
Hi,
Attached is a revised version of the patch posted at:
http://gcc.gnu.org/ml/gcc-patches/2004-09/msg01888.html
In this iteration,
o I use split_edge instead of tree_split_edge, which I used to
externify.
o I don't call compute_immediate_uses() as often, not for each BB.
o I don't call split_edge() as often, more than 50% reduction.
o I added some comments.
I have not
o seen if I can integrate this into thread_jumps() in tree-cfg.c, or
o removed a call to free_dominance_info().
Unfortunately, compile time goes up a little bit with this patch. On
three-run average,
combine.c 13.343 sec -> 13.443 sec (0.75% up)
fold-const.c 27.270 sec -> 27.427 sec (0.58% up)
This PHI merge pass itself takes only 0.03 second, excluding the time
spent on cleanup_tree_cfg(). Even though I call cleanup_tree_cfg()
more often, the time spent on that has gone done. I have yet to find
out which pass is taking longer with my patch.
Tested on i686-pc-linux-gnu. Comments?
Kazu Hirata
2004-09-20 Kazu Hirata <kazu@cs.umass.edu>
PR tree-optimization/15349
* Makefile.in (OBJS-common): Add tree-ssa-mergephi.o.
* timevar.def (TV_TREE_MERGEPHI): New.
* tree-optimize.c (init_tree_optimization_passes): Add
pass_mergephi.
* tree-pass.h: Add a prototype for pass_mergephi.
* tree-ssa-mergephi.h: New.
Index: Makefile.in
===================================================================
RCS file: /cvs/gcc/gcc/gcc/Makefile.in,v
retrieving revision 1.1396
diff -u -r1.1396 Makefile.in
--- Makefile.in 18 Sep 2004 01:07:21 -0000 1.1396
+++ Makefile.in 20 Sep 2004 11:56:16 -0000
@@ -924,7 +924,7 @@
varasm.o varray.o vec.o version.o vmsdbgout.o xcoffout.o alloc-pool.o \
et-forest.o cfghooks.o bt-load.o pretty-print.o $(GGC) web.o passes.o \
rtl-profile.o tree-profile.o rtlhooks.o cfgexpand.o lambda-mat.o \
- lambda-trans.o lambda-code.o tree-loop-linear.o
+ lambda-trans.o lambda-code.o tree-loop-linear.o tree-ssa-mergephi.o
OBJS-md = $(out_object_file)
OBJS-archive = $(EXTRA_OBJS) $(host_hook_obj) tree-inline.o \
@@ -1617,6 +1617,9 @@
tree-ssa-forwprop.o : tree-ssa-forwprop.c $(CONFIG_H) $(SYSTEM_H) coretypes.h \
$(TM_H) errors.h $(GGC_H) $(TREE_H) $(RTL_H) $(TM_P_H) $(BASIC_BLOCK_H) \
$(TREE_FLOW_H) tree-pass.h $(TREE_DUMP_H)
+tree-ssa-mergephi.o : tree-ssa-mergephi.c $(CONFIG_H) $(SYSTEM_H) coretypes.h \
+ $(TM_H) errors.h $(GGC_H) $(TREE_H) $(RTL_H) $(TM_P_H) $(BASIC_BLOCK_H) \
+ $(TREE_FLOW_H) tree-pass.h $(TREE_DUMP_H)
tree-ssa-phiopt.o : tree-ssa-phiopt.c $(CONFIG_H) $(SYSTEM_H) coretypes.h \
$(TM_H) errors.h $(GGC_H) $(TREE_H) $(RTL_H) $(TM_P_H) $(BASIC_BLOCK_H) \
$(TREE_FLOW_H) tree-pass.h $(TREE_DUMP_H) langhooks.h $(FLAGS_H)
Index: timevar.def
===================================================================
RCS file: /cvs/gcc/gcc/gcc/timevar.def,v
retrieving revision 1.36
diff -u -r1.36 timevar.def
--- timevar.def 8 Sep 2004 15:28:55 -0000 1.36
+++ timevar.def 20 Sep 2004 11:56:17 -0000
@@ -77,6 +77,7 @@
DEFTIMEVAR (TV_TREE_PRE , "tree PRE")
DEFTIMEVAR (TV_TREE_FRE , "tree FRE")
DEFTIMEVAR (TV_TREE_PHIOPT , "tree linearize phis")
+DEFTIMEVAR (TV_TREE_MERGEPHI , "tree phi merge")
DEFTIMEVAR (TV_TREE_FORWPROP , "tree forward propagate")
DEFTIMEVAR (TV_TREE_DCE , "tree conservative DCE")
DEFTIMEVAR (TV_TREE_CD_DCE , "tree aggressive DCE")
Index: tree-optimize.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/tree-optimize.c,v
retrieving revision 2.49
diff -u -r2.49 tree-optimize.c
--- tree-optimize.c 18 Sep 2004 21:53:00 -0000 2.49
+++ tree-optimize.c 20 Sep 2004 11:56:17 -0000
@@ -354,6 +354,7 @@
NEXT_PASS (pass_redundant_phi);
NEXT_PASS (pass_dce);
NEXT_PASS (pass_forwprop);
+ NEXT_PASS (pass_mergephi);
NEXT_PASS (pass_phiopt);
NEXT_PASS (pass_may_alias);
NEXT_PASS (pass_tail_recursion);
Index: tree-pass.h
===================================================================
RCS file: /cvs/gcc/gcc/gcc/tree-pass.h,v
retrieving revision 2.15
diff -u -r2.15 tree-pass.h
--- tree-pass.h 9 Sep 2004 20:53:37 -0000 2.15
+++ tree-pass.h 20 Sep 2004 11:56:17 -0000
@@ -150,6 +150,7 @@
extern struct tree_opt_pass pass_warn_function_return;
extern struct tree_opt_pass pass_phiopt;
extern struct tree_opt_pass pass_forwprop;
+extern struct tree_opt_pass pass_mergephi;
extern struct tree_opt_pass pass_redundant_phi;
extern struct tree_opt_pass pass_dse;
extern struct tree_opt_pass pass_nrv;
--- /dev/null 2003-09-15 09:40:47.000000000 -0400
+++ tree-ssa-mergephi.c 2004-09-20 07:56:02.000000000 -0400
@@ -0,0 +1,357 @@
+/* Merge PHI nodes
+ Copyright (C) 2004 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING. If not, write to
+the Free Software Foundation, 59 Temple Place - Suite 330,
+Boston, MA 02111-1307, USA. */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tm.h"
+#include "errors.h"
+#include "ggc.h"
+#include "tree.h"
+#include "rtl.h"
+#include "tm_p.h"
+#include "basic-block.h"
+#include "timevar.h"
+#include "diagnostic.h"
+#include "tree-flow.h"
+#include "tree-pass.h"
+#include "tree-dump.h"
+
+/* This pass performs merges PHI nodes if one feeds into another. For
+ example, suppose we have the following:
+
+ goto <bb 9> (<L9>);
+
+<L8>:;
+ tem_17 = foo ();
+
+ # tem_6 = PHI <tem_17(8), tem_23(7)>;
+<L9>:;
+
+ # tem_3 = PHI <tem_6(9), tem_2(5)>;
+<L10>:;
+
+ Then we merge the first PHI node into the second one like so:
+
+ goto <bb 9> (<L10>);
+
+<L8>:;
+ tem_17 = foo ();
+
+ # tem_3 = PHI <tem_23(7), tem_2(5), tem_17(8)>;
+<L10>:;
+*/
+
+/* Bitmap of variables for which we want immediate uses. This is set
+ by record_single_argument_cond_exprs and tested in need_imm_uses_for. */
+static bitmap vars;
+
+/* Function indicating whether we ought to include information for 'var'
+ when calculating immediate uses. */
+
+static bool
+need_imm_uses_for (tree var)
+{
+ return bitmap_bit_p (vars, SSA_NAME_VERSION (var));
+}
+
+/* Return true if BB is a forwarder block with PHI nodes. */
+
+static bool
+tree_forwarder_block_with_phi_p (basic_block bb)
+{
+ block_stmt_iterator bsi;
+ edge e;
+
+ /* BB must have a single outgoing normal edge. Otherwise it can not be
+ a forwarder block. */
+ if (!bb->succ
+ || bb->succ->succ_next
+ || bb->succ->dest == EXIT_BLOCK_PTR
+ || (bb->succ->flags & EDGE_ABNORMAL)
+ || bb == ENTRY_BLOCK_PTR)
+ return false;
+
+ /* Successors of the entry block are not forwarders. */
+ for (e = ENTRY_BLOCK_PTR->succ; e; e = e->succ_next)
+ if (e->dest == bb)
+ return false;
+
+ /* BB must have some PHI nodes. */
+ if (!phi_nodes (bb))
+ return false;
+
+ /* Now walk through the statements. We can ignore labels, anything else
+ means this is not a forwarder block. */
+ for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
+ {
+ tree stmt = bsi_stmt (bsi);
+
+ switch (TREE_CODE (stmt))
+ {
+ case LABEL_EXPR:
+ if (DECL_NONLOCAL (LABEL_EXPR_LABEL (stmt)))
+ return false;
+ break;
+
+ default:
+ return false;
+ }
+ }
+
+ return true;
+}
+
+/* Return true if we can actually merge those PHI nodes at BB into its
+ sole successor. */
+
+static bool
+phi_mergeable_p (basic_block bb)
+{
+ tree phi;
+
+ /* See if the results of all PHI nodes at BB feed into nothing
+ but those PHI nodes at BB->SUCC->DEST. */
+ for (phi = phi_nodes (bb); phi; phi = TREE_CHAIN (phi))
+ {
+ dataflow_t df = get_immediate_uses (phi);
+ int num_uses = num_immediate_uses (df);
+ int i, j;
+
+ for (i = 0; i < num_uses; i++)
+ {
+ tree stmt = immediate_use (df, i);
+
+ if (TREE_CODE (stmt) != PHI_NODE
+ || bb_for_stmt (stmt) != bb->succ->dest)
+ return false;
+
+ /* Make sure PHI_RESULT (phi) does not appear in any
+ arguments of STMT other than the one from BB. */
+ for (j = 0; j < PHI_NUM_ARGS (stmt); j++)
+ {
+ tree arg = PHI_ARG_DEF (stmt, j);
+ if (arg == PHI_RESULT (phi)
+ && PHI_ARG_EDGE (stmt, j)->src != bb)
+ return false;
+ }
+ }
+ }
+
+ return true;
+}
+
+/* Redirect edge E to basic block DEST. Return true if successful. */
+
+static bool
+redirect_phi_edge (edge e, basic_block dest)
+{
+ /* The original edge from E->DEST to DEST. */
+ edge old_edge = e->dest->succ;
+ /* The new edge to DEST. */
+ edge new_edge;
+ tree phi;
+
+ /* We don't know what to do with an abonormal edge. */
+ if (e->flags & EDGE_ABNORMAL)
+ return false;
+
+ /* Make sure we have a forwarder block by splitting E. */
+ e = split_edge (e)->succ;
+
+ new_edge = ssa_redirect_edge (e, dest);
+
+ /* Add to the PHI nodes at DEST each PHI argument removed at the
+ destination of E. */
+ for (phi = phi_nodes (dest); phi; phi = TREE_CHAIN (phi))
+ {
+ int n = phi_arg_from_edge (phi, old_edge);
+ tree val = PHI_ARG_DEF (phi, n);
+ tree var;
+
+ /* Add NEW_ARG to PHI nodes wherever OLD_ARG appears. */
+ for (var = PENDING_STMT (e); var; var = TREE_CHAIN (var))
+ {
+ tree old_arg = TREE_PURPOSE (var);
+ tree new_arg = TREE_VALUE (var);
+
+ if (operand_equal_p (old_arg, val, 0))
+ {
+ add_phi_arg (&phi, new_arg, new_edge);
+ break;
+ }
+ }
+ if (!var)
+ add_phi_arg (&phi, val, new_edge);
+ }
+
+ PENDING_STMT (e) = NULL;
+
+ return true;
+}
+
+/* Merge the PHI nodes at BB into those at BB_SUCC. Return true if
+ successful. */
+
+static bool
+merge_phi_nodes_1 (basic_block bb, basic_block bb_succ)
+{
+ edge e, next;
+ bool something_changed = false;
+
+ /* Redirect each incoming edge to BB to BB_SUCC. */
+ for (e = bb->pred; e; e = next)
+ {
+ next = e->pred_next;
+ something_changed |= redirect_phi_edge (e, bb_succ);
+ }
+
+ return something_changed;
+}
+
+/* Merge PHI nodes. We do the job in 4 steps.
+
+ 1. Find a set S of basic blocks such that each basic block B in S
+ has a single successor block C where both B and C have PHI
+ nodes.
+
+ 2. For each basic block B in S, compute the immediate uses of the
+ results of B's PHI nodes.
+
+ 3. Find a subset S' of S such that for each basic block B in S',
+ the results of B's PHI nodes are neither used outside the
+ arguments of C's PHI nodes, or used in the arguments for those
+ incoming edges other than the one from B.
+
+ 4. For each basic block B in S', redirect incoming edges to B to
+ B's sole successor. Note that B's sole successor may not be the
+ same as C as we modify the control flow graph.
+
+ 5. Repeat all of the above until we reach a fixed point. */
+
+static void
+tree_ssa_merge_phi_nodes (void)
+{
+ basic_block bb;
+ bool something_changed;
+ varray_type phi_worklist_1;
+ varray_type phi_worklist_2;
+
+ vars = BITMAP_XMALLOC ();
+ VARRAY_BB_INIT (phi_worklist_1, 10, "PHI worklist 1");
+ VARRAY_BB_INIT (phi_worklist_2, 10, "PHI worklist 2");
+
+ do
+ {
+ something_changed = false;
+
+ /* Find all PHI nodes that we may be able to merge. */
+ FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR, EXIT_BLOCK_PTR, next_bb)
+ {
+ tree phi;
+
+ /* Look for a basic block with PHI nodes. */
+ if (!tree_forwarder_block_with_phi_p (bb))
+ continue;
+
+ /* We have to feed into another block with PHI nodes. */
+ if (!phi_nodes (bb->succ->dest))
+ continue;
+
+ for (phi = phi_nodes (bb); phi; phi = TREE_CHAIN (phi))
+ bitmap_set_bit (vars, SSA_NAME_VERSION (PHI_RESULT (phi)));
+
+ VARRAY_PUSH_BB (phi_worklist_1, bb);
+ }
+
+ /* Now prune PHI_WORKLIST_1 into PHI_WORKLIST_2 by copying only PHI
+ nodes that we are likely to be able to merge. */
+ if (VARRAY_ACTIVE_SIZE (phi_worklist_1) > 0)
+ {
+ /* Now compute immediate uses for all the PHI nodes we care
+ about. */
+ compute_immediate_uses (TDFA_USE_OPS | TDFA_USE_VOPS,
+ need_imm_uses_for);
+
+ /* We've computed immediate uses, so we can/must clear the
+ VARS bitmap for the next iteration. */
+ bitmap_clear (vars);
+
+ while (VARRAY_ACTIVE_SIZE (phi_worklist_1) > 0)
+ {
+ bb = VARRAY_TOP_BB (phi_worklist_1);
+ VARRAY_POP (phi_worklist_1);
+
+ /* See if the results of all PHI nodes at BB feed into nothing
+ but those PHI nodes at BB->SUCC->DEST. */
+ if (phi_mergeable_p (bb))
+ VARRAY_PUSH_BB (phi_worklist_2, bb);
+ }
+
+ free_df ();
+
+ while (VARRAY_ACTIVE_SIZE (phi_worklist_2) > 0)
+ {
+ bb = VARRAY_TOP_BB (phi_worklist_2);
+ VARRAY_POP (phi_worklist_2);
+
+ /* Check that we still have PHI nodes at BB's sole
+ successor because it may have lost PHI nodes due to
+ the previous iterations of this loop. */
+ if (!phi_nodes (bb->succ->dest))
+ continue;
+
+ something_changed |= merge_phi_nodes_1 (bb, bb->succ->dest);
+ }
+ }
+
+ if (something_changed)
+ {
+ free_dominance_info (CDI_DOMINATORS);
+ cleanup_tree_cfg ();
+ }
+ }
+ while (something_changed);
+
+ BITMAP_XFREE (vars);
+}
+
+static bool
+gate_mergephi (void)
+{
+ return 1;
+}
+
+struct tree_opt_pass pass_mergephi = {
+ "mergephi", /* name */
+ gate_mergephi, /* gate */
+ tree_ssa_merge_phi_nodes, /* execute */
+ NULL, /* sub */
+ NULL, /* next */
+ 0, /* static_pass_number */
+ TV_TREE_MERGEPHI, /* tv_id */
+ PROP_cfg | PROP_ssa, /* properties_required */
+ 0, /* properties_provided */
+ 0, /* properties_destroyed */
+ 0, /* todo_flags_start */
+ TODO_dump_func | TODO_ggc_collect /* todo_flags_finish */
+ | TODO_verify_ssa,
+ 0 /* letter */
+};