This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[tcb] Merge PHI nodes -- 7.4% reduction of PHI nodes (Take 2)


Hi,

Attached is a revised version of the patch posted at:

http://gcc.gnu.org/ml/gcc-patches/2004-09/msg01888.html

In this iteration,

o I use split_edge instead of tree_split_edge, which I used to
  externify.
o I don't call compute_immediate_uses() as often, not for each BB.
o I don't call split_edge() as often, more than 50% reduction.
o I added some comments.

I have not

o seen if I can integrate this into thread_jumps() in tree-cfg.c, or
o removed a call to free_dominance_info().

Unfortunately, compile time goes up a little bit with this patch.  On
three-run average,

combine.c    13.343 sec -> 13.443 sec (0.75% up)
fold-const.c 27.270 sec -> 27.427 sec (0.58% up)

This PHI merge pass itself takes only 0.03 second, excluding the time
spent on cleanup_tree_cfg().  Even though I call cleanup_tree_cfg()
more often, the time spent on that has gone done.  I have yet to find
out which pass is taking longer with my patch.

Tested on i686-pc-linux-gnu.  Comments?

Kazu Hirata

2004-09-20  Kazu Hirata  <kazu@cs.umass.edu>

	PR tree-optimization/15349
	* Makefile.in (OBJS-common): Add tree-ssa-mergephi.o.
	* timevar.def (TV_TREE_MERGEPHI): New.
	* tree-optimize.c (init_tree_optimization_passes): Add
	pass_mergephi.
	* tree-pass.h: Add a prototype for pass_mergephi.
	* tree-ssa-mergephi.h: New.

Index: Makefile.in
===================================================================
RCS file: /cvs/gcc/gcc/gcc/Makefile.in,v
retrieving revision 1.1396
diff -u -r1.1396 Makefile.in
--- Makefile.in	18 Sep 2004 01:07:21 -0000	1.1396
+++ Makefile.in	20 Sep 2004 11:56:16 -0000
@@ -924,7 +924,7 @@
  varasm.o varray.o vec.o version.o vmsdbgout.o xcoffout.o alloc-pool.o	   \
  et-forest.o cfghooks.o bt-load.o pretty-print.o $(GGC) web.o passes.o	   \
  rtl-profile.o tree-profile.o rtlhooks.o cfgexpand.o lambda-mat.o          \
- lambda-trans.o	lambda-code.o tree-loop-linear.o
+ lambda-trans.o	lambda-code.o tree-loop-linear.o tree-ssa-mergephi.o
 
 OBJS-md = $(out_object_file)
 OBJS-archive = $(EXTRA_OBJS) $(host_hook_obj) tree-inline.o		   \
@@ -1617,6 +1617,9 @@
 tree-ssa-forwprop.o : tree-ssa-forwprop.c $(CONFIG_H) $(SYSTEM_H) coretypes.h \
    $(TM_H) errors.h $(GGC_H) $(TREE_H) $(RTL_H) $(TM_P_H) $(BASIC_BLOCK_H) \
    $(TREE_FLOW_H) tree-pass.h $(TREE_DUMP_H)
+tree-ssa-mergephi.o : tree-ssa-mergephi.c $(CONFIG_H) $(SYSTEM_H) coretypes.h \
+   $(TM_H) errors.h $(GGC_H) $(TREE_H) $(RTL_H) $(TM_P_H) $(BASIC_BLOCK_H) \
+   $(TREE_FLOW_H) tree-pass.h $(TREE_DUMP_H)
 tree-ssa-phiopt.o : tree-ssa-phiopt.c $(CONFIG_H) $(SYSTEM_H) coretypes.h \
    $(TM_H) errors.h $(GGC_H) $(TREE_H) $(RTL_H) $(TM_P_H) $(BASIC_BLOCK_H) \
    $(TREE_FLOW_H) tree-pass.h $(TREE_DUMP_H) langhooks.h $(FLAGS_H)
Index: timevar.def
===================================================================
RCS file: /cvs/gcc/gcc/gcc/timevar.def,v
retrieving revision 1.36
diff -u -r1.36 timevar.def
--- timevar.def	8 Sep 2004 15:28:55 -0000	1.36
+++ timevar.def	20 Sep 2004 11:56:17 -0000
@@ -77,6 +77,7 @@
 DEFTIMEVAR (TV_TREE_PRE		     , "tree PRE")
 DEFTIMEVAR (TV_TREE_FRE		     , "tree FRE")
 DEFTIMEVAR (TV_TREE_PHIOPT	     , "tree linearize phis")
+DEFTIMEVAR (TV_TREE_MERGEPHI	     , "tree phi merge")
 DEFTIMEVAR (TV_TREE_FORWPROP	     , "tree forward propagate")
 DEFTIMEVAR (TV_TREE_DCE		     , "tree conservative DCE")
 DEFTIMEVAR (TV_TREE_CD_DCE	     , "tree aggressive DCE")
Index: tree-optimize.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/tree-optimize.c,v
retrieving revision 2.49
diff -u -r2.49 tree-optimize.c
--- tree-optimize.c	18 Sep 2004 21:53:00 -0000	2.49
+++ tree-optimize.c	20 Sep 2004 11:56:17 -0000
@@ -354,6 +354,7 @@
   NEXT_PASS (pass_redundant_phi);
   NEXT_PASS (pass_dce);
   NEXT_PASS (pass_forwprop);
+  NEXT_PASS (pass_mergephi);
   NEXT_PASS (pass_phiopt);
   NEXT_PASS (pass_may_alias);
   NEXT_PASS (pass_tail_recursion);
Index: tree-pass.h
===================================================================
RCS file: /cvs/gcc/gcc/gcc/tree-pass.h,v
retrieving revision 2.15
diff -u -r2.15 tree-pass.h
--- tree-pass.h	9 Sep 2004 20:53:37 -0000	2.15
+++ tree-pass.h	20 Sep 2004 11:56:17 -0000
@@ -150,6 +150,7 @@
 extern struct tree_opt_pass pass_warn_function_return;
 extern struct tree_opt_pass pass_phiopt;
 extern struct tree_opt_pass pass_forwprop;
+extern struct tree_opt_pass pass_mergephi;
 extern struct tree_opt_pass pass_redundant_phi;
 extern struct tree_opt_pass pass_dse;
 extern struct tree_opt_pass pass_nrv;
--- /dev/null	2003-09-15 09:40:47.000000000 -0400
+++ tree-ssa-mergephi.c	2004-09-20 07:56:02.000000000 -0400
@@ -0,0 +1,357 @@
+/* Merge PHI nodes
+   Copyright (C) 2004 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING.  If not, write to
+the Free Software Foundation, 59 Temple Place - Suite 330,
+Boston, MA 02111-1307, USA.  */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tm.h"
+#include "errors.h"
+#include "ggc.h"
+#include "tree.h"
+#include "rtl.h"
+#include "tm_p.h"
+#include "basic-block.h"
+#include "timevar.h"
+#include "diagnostic.h"
+#include "tree-flow.h"
+#include "tree-pass.h"
+#include "tree-dump.h"
+
+/* This pass performs merges PHI nodes if one feeds into another.  For
+   example, suppose we have the following:
+
+  goto <bb 9> (<L9>);
+
+<L8>:;
+  tem_17 = foo ();
+
+  # tem_6 = PHI <tem_17(8), tem_23(7)>;
+<L9>:;
+
+  # tem_3 = PHI <tem_6(9), tem_2(5)>;
+<L10>:;
+
+  Then we merge the first PHI node into the second one like so:
+
+  goto <bb 9> (<L10>);
+
+<L8>:;
+  tem_17 = foo ();
+
+  # tem_3 = PHI <tem_23(7), tem_2(5), tem_17(8)>;
+<L10>:;
+*/
+
+/* Bitmap of variables for which we want immediate uses.  This is set
+   by record_single_argument_cond_exprs and tested in need_imm_uses_for.  */
+static bitmap vars;
+
+/* Function indicating whether we ought to include information for 'var'
+   when calculating immediate uses.  */
+
+static bool
+need_imm_uses_for (tree var)
+{
+  return bitmap_bit_p (vars, SSA_NAME_VERSION (var));
+}
+
+/* Return true if BB is a forwarder block with PHI nodes.  */
+
+static bool
+tree_forwarder_block_with_phi_p (basic_block bb)
+{
+  block_stmt_iterator bsi;
+  edge e;
+
+  /* BB must have a single outgoing normal edge.  Otherwise it can not be
+     a forwarder block.  */
+  if (!bb->succ
+      || bb->succ->succ_next
+      || bb->succ->dest == EXIT_BLOCK_PTR
+      || (bb->succ->flags & EDGE_ABNORMAL)
+      || bb == ENTRY_BLOCK_PTR)
+    return false;
+
+  /* Successors of the entry block are not forwarders.  */
+  for (e = ENTRY_BLOCK_PTR->succ; e; e = e->succ_next)
+    if (e->dest == bb)
+      return false;
+
+  /* BB must have some PHI nodes.  */
+  if (!phi_nodes (bb))
+    return false;
+
+  /* Now walk through the statements.  We can ignore labels, anything else
+     means this is not a forwarder block.  */
+  for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
+    {
+      tree stmt = bsi_stmt (bsi);
+
+      switch (TREE_CODE (stmt))
+	{
+	case LABEL_EXPR:
+	  if (DECL_NONLOCAL (LABEL_EXPR_LABEL (stmt)))
+	    return false;
+	  break;
+
+	default:
+	  return false;
+	}
+    }
+
+  return true;
+}
+
+/* Return true if we can actually merge those PHI nodes at BB into its
+   sole successor.  */
+
+static bool
+phi_mergeable_p (basic_block bb)
+{
+  tree phi;
+
+  /* See if the results of all PHI nodes at BB feed into nothing
+     but those PHI nodes at BB->SUCC->DEST.  */
+  for (phi = phi_nodes (bb); phi; phi = TREE_CHAIN (phi))
+    {
+      dataflow_t df = get_immediate_uses (phi);
+      int num_uses = num_immediate_uses (df);
+      int i, j;
+
+      for (i = 0; i < num_uses; i++)
+	{
+	  tree stmt = immediate_use (df, i);
+
+	  if (TREE_CODE (stmt) != PHI_NODE
+	      || bb_for_stmt (stmt) != bb->succ->dest)
+	    return false;
+
+	  /* Make sure PHI_RESULT (phi) does not appear in any
+	     arguments of STMT other than the one from BB.  */
+	  for (j = 0; j < PHI_NUM_ARGS (stmt); j++)
+	    {
+	      tree arg = PHI_ARG_DEF (stmt, j);
+	      if (arg == PHI_RESULT (phi)
+		  && PHI_ARG_EDGE (stmt, j)->src != bb)
+		return false;
+	    }
+	}
+    }
+
+  return true;
+}
+
+/* Redirect edge E to basic block DEST.  Return true if successful.  */
+
+static bool
+redirect_phi_edge (edge e, basic_block dest)
+{
+  /* The original edge from E->DEST to DEST.  */
+  edge old_edge = e->dest->succ;
+  /* The new edge to DEST.  */
+  edge new_edge;
+  tree phi;
+
+  /* We don't know what to do with an abonormal edge.  */
+  if (e->flags & EDGE_ABNORMAL)
+    return false;
+
+  /* Make sure we have a forwarder block by splitting E.  */
+  e = split_edge (e)->succ;
+
+  new_edge = ssa_redirect_edge (e, dest);
+
+  /* Add to the PHI nodes at DEST each PHI argument removed at the
+     destination of E.  */
+  for (phi = phi_nodes (dest); phi; phi = TREE_CHAIN (phi))
+    {
+      int n = phi_arg_from_edge (phi, old_edge);
+      tree val = PHI_ARG_DEF (phi, n);
+      tree var;
+
+      /* Add NEW_ARG to PHI nodes wherever OLD_ARG appears.  */
+      for (var = PENDING_STMT (e); var; var = TREE_CHAIN (var))
+	{
+	  tree old_arg = TREE_PURPOSE (var);
+	  tree new_arg = TREE_VALUE (var);
+
+	  if (operand_equal_p (old_arg, val, 0))
+	    {
+	      add_phi_arg (&phi, new_arg, new_edge);
+	      break;
+	    }
+	}
+      if (!var)
+	add_phi_arg (&phi, val, new_edge);
+    }
+
+  PENDING_STMT (e) = NULL;
+
+  return true;
+}
+
+/* Merge the PHI nodes at BB into those at BB_SUCC.  Return true if
+   successful.  */
+
+static bool
+merge_phi_nodes_1 (basic_block bb, basic_block bb_succ)
+{
+  edge e, next;
+  bool something_changed = false;
+
+  /* Redirect each incoming edge to BB to BB_SUCC.  */
+  for (e = bb->pred; e; e = next)
+    {
+      next = e->pred_next;
+      something_changed |= redirect_phi_edge (e, bb_succ);
+    }
+
+  return something_changed;
+}
+
+/* Merge PHI nodes.  We do the job in 4 steps.
+
+   1. Find a set S of basic blocks such that each basic block B in S
+      has a single successor block C where both B and C have PHI
+      nodes.
+
+   2. For each basic block B in S, compute the immediate uses of the
+      results of B's PHI nodes.
+
+   3. Find a subset S' of S such that for each basic block B in S',
+      the results of B's PHI nodes are neither used outside the
+      arguments of C's PHI nodes, or used in the arguments for those
+      incoming edges other than the one from B.
+
+   4. For each basic block B in S', redirect incoming edges to B to
+      B's sole successor.  Note that B's sole successor may not be the
+      same as C as we modify the control flow graph.
+
+   5. Repeat all of the above until we reach a fixed point.  */
+
+static void
+tree_ssa_merge_phi_nodes (void)
+{
+  basic_block bb;
+  bool something_changed;
+  varray_type phi_worklist_1;
+  varray_type phi_worklist_2;
+
+  vars = BITMAP_XMALLOC ();
+  VARRAY_BB_INIT (phi_worklist_1, 10, "PHI worklist 1");
+  VARRAY_BB_INIT (phi_worklist_2, 10, "PHI worklist 2");
+
+  do
+    {
+      something_changed = false;
+
+      /* Find all PHI nodes that we may be able to merge.  */
+      FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR, EXIT_BLOCK_PTR, next_bb)
+	{
+	  tree phi;
+
+	  /* Look for a basic block with PHI nodes.  */
+	  if (!tree_forwarder_block_with_phi_p (bb))
+	    continue;
+
+	  /* We have to feed into another block with PHI nodes.  */
+	  if (!phi_nodes (bb->succ->dest))
+	    continue;
+
+	  for (phi = phi_nodes (bb); phi; phi = TREE_CHAIN (phi))
+	    bitmap_set_bit (vars, SSA_NAME_VERSION (PHI_RESULT (phi)));
+
+	  VARRAY_PUSH_BB (phi_worklist_1, bb);
+	}
+
+      /* Now prune PHI_WORKLIST_1 into PHI_WORKLIST_2 by copying only PHI
+	 nodes that we are likely to be able to merge.  */
+      if (VARRAY_ACTIVE_SIZE (phi_worklist_1) > 0)
+	{
+	  /* Now compute immediate uses for all the PHI nodes we care
+	     about.  */
+	  compute_immediate_uses (TDFA_USE_OPS | TDFA_USE_VOPS,
+				  need_imm_uses_for);
+
+	  /* We've computed immediate uses, so we can/must clear the
+	     VARS bitmap for the next iteration.  */
+	  bitmap_clear (vars);
+
+	  while (VARRAY_ACTIVE_SIZE (phi_worklist_1) > 0)
+	    {
+	      bb = VARRAY_TOP_BB (phi_worklist_1);
+	      VARRAY_POP (phi_worklist_1);
+
+	      /* See if the results of all PHI nodes at BB feed into nothing
+		 but those PHI nodes at BB->SUCC->DEST.  */
+	      if (phi_mergeable_p (bb))
+		VARRAY_PUSH_BB (phi_worklist_2, bb);
+	    }
+
+	  free_df ();
+
+	  while (VARRAY_ACTIVE_SIZE (phi_worklist_2) > 0)
+	    {
+	      bb = VARRAY_TOP_BB (phi_worklist_2);
+	      VARRAY_POP (phi_worklist_2);
+
+	      /* Check that we still have PHI nodes at BB's sole
+		 successor because it may have lost PHI nodes due to
+		 the previous iterations of this loop.  */
+	      if (!phi_nodes (bb->succ->dest))
+		continue;
+
+	      something_changed |= merge_phi_nodes_1 (bb, bb->succ->dest);
+	    }
+	}
+
+      if (something_changed)
+	{
+	  free_dominance_info (CDI_DOMINATORS);
+	  cleanup_tree_cfg ();
+	}
+    }
+  while (something_changed);
+
+  BITMAP_XFREE (vars);
+}
+
+static bool
+gate_mergephi (void)
+{
+  return 1;
+}
+
+struct tree_opt_pass pass_mergephi = {
+  "mergephi",			/* name */
+  gate_mergephi,		/* gate */
+  tree_ssa_merge_phi_nodes,	/* execute */
+  NULL,				/* sub */
+  NULL,				/* next */
+  0,				/* static_pass_number */
+  TV_TREE_MERGEPHI,		/* tv_id */
+  PROP_cfg | PROP_ssa,		/* properties_required */
+  0,				/* properties_provided */
+  0,				/* properties_destroyed */
+  0,				/* todo_flags_start */
+  TODO_dump_func | TODO_ggc_collect	/* todo_flags_finish */
+  | TODO_verify_ssa,
+  0				/* letter */
+};


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]