This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[PATCH] Fix PR63155 (again)


This fixes yet another bottleneck in the SSA propagator where the way
we process the worklists (in particular the BB one) causes excessive
re-processing of PHI nodes.  The following patch priorizes forward
progress over iteration as that ensures the maximum set of possible
backedges is executable when re-processing PHIs.  Implementation-wise
this is done by using two worklists each for BBs and SSA edges,
making sure to not go back in the RPO iteration.

This improves compile-time for the new "Small testcase more
similar to original environment" from to 197s to 27s.

Not first iterating SSA cycles before processing uses might end up
processing more stmts but for the testcase in question going
back to processing SSA cycles first gets compile-time back up to 35s.
That is likely because doing that isn't the ideal way of iterating
over SSA (which would be processing SCCs...).  Other testcases
might not be so happy about this specific change, if there are any
such ones we might want to consider SCC based iteration for the
SSA propagator...

Note that CCP is still the most expensive pass at -O1 for the
testcase, even after the patch but backprop follows close behind.

Bootstrap and regtest running on x86_64-unknown-linux-gnu.

Note GCC 4.8 compiled this in less than 1s, the abnormal edges
we generate for setjmp really hurt (but it's the testcases fault
to implement a try/catch with setjmp inside a state-machine...
which makes me wonder how a C++ variant with EH would perform)

Richard.

>From 80083abe998e0f75782d206ceda72de88fcf0563 Mon Sep 17 00:00:00 2001
From: Richard Guenther <rguenther@suse.de>
Date: Fri, 5 Oct 2018 12:31:44 +0200
Subject: [PATCH] fix-pr63155-8

2018-10-05  Richard Biener  <rguenther@suse.de>

	PR tree-optimization/63155
	* tree-ssa-ccp.c (ccp_propagate::visit_phi): Avoid excess
	vertical space in dumpfiles.
	* tree-ssa-propagate.h
	(ssa_propagation_engine::process_ssa_edge_worklist): Remove.
	* tree-ssa-propagate.c (cfg_blocks_back): New global.
	(ssa_edge_worklist_back): Likewise.
	(curr_order): Likewise.
	(cfg_blocks_get): Remove abstraction.
	(cfg_blocks_add): Likewise.
	(cfg_blocks_empty_p): Likewise.
	(add_ssa_edge): Add to current or next worklist based on
	RPO index.
	(add_control_edge): Likewise.
	(ssa_propagation_engine::process_ssa_edge_worklist): Fold
	into ...
	(ssa_propagation_engine::ssa_propagate): ... here.  Unify
	iteration from CFG and SSA edge worklist so we process
	everything in RPO order, prioritizing forward progress
	over iteration.
	(ssa_prop_init): Allocate new worklists, do not dump
	immediate uses.
	(ssa_prop_fini): Free new worklists.

diff --git a/gcc/tree-ssa-ccp.c b/gcc/tree-ssa-ccp.c
index 95368a5c79d..d8a069be529 100644
--- a/gcc/tree-ssa-ccp.c
+++ b/gcc/tree-ssa-ccp.c
@@ -1119,7 +1119,7 @@ ccp_propagate::visit_phi (gphi *phi)
       if (dump_file && (dump_flags & TDF_DETAILS))
 	{
 	  fprintf (dump_file,
-	      "\n    Argument #%d (%d -> %d %sexecutable)\n",
+	      "\tArgument #%d (%d -> %d %sexecutable)\n",
 	      i, e->src->index, e->dest->index,
 	      (e->flags & EDGE_EXECUTABLE) ? "" : "not ");
 	}
diff --git a/gcc/tree-ssa-propagate.c b/gcc/tree-ssa-propagate.c
index 140b153d5a1..4cb0fbaed15 100644
--- a/gcc/tree-ssa-propagate.c
+++ b/gcc/tree-ssa-propagate.c
@@ -108,51 +108,26 @@
      [3] Advanced Compiler Design and Implementation,
 	 Steven Muchnick, Morgan Kaufmann, 1997, Section 12.6  */
 
-/* Worklist of control flow edge destinations.  This contains
+/* Worklists of control flow edge destinations.  This contains
    the CFG order number of the blocks so we can iterate in CFG
-   order by visiting in bit-order.  */
+   order by visiting in bit-order.  We use two worklists to
+   first make forward progress before iterating.  */
 static bitmap cfg_blocks;
+static bitmap cfg_blocks_back;
 static int *bb_to_cfg_order;
 static int *cfg_order_to_bb;
 
-/* Worklist of SSA edges which will need reexamination as their
+/* Worklists of SSA edges which will need reexamination as their
    definition has changed.  SSA edges are def-use edges in the SSA
    web.  For each D-U edge, we store the target statement or PHI node
-   UID in a bitmap.  UIDs order stmts in execution order.   */
+   UID in a bitmap.  UIDs order stmts in execution order.  We use
+   two worklists to first make forward progress before iterating.  */
 static bitmap ssa_edge_worklist;
+static bitmap ssa_edge_worklist_back;
 static vec<gimple *> uid_to_stmt;
 
-/* Return true if the block worklist empty.  */
-
-static inline bool
-cfg_blocks_empty_p (void)
-{
-  return bitmap_empty_p (cfg_blocks);
-}
-
-
-/* Add a basic block to the worklist.  The block must not be the ENTRY
-   or EXIT block.  */
-
-static void
-cfg_blocks_add (basic_block bb)
-{
-  gcc_assert (bb != ENTRY_BLOCK_PTR_FOR_FN (cfun)
-	      && bb != EXIT_BLOCK_PTR_FOR_FN (cfun));
-  bitmap_set_bit (cfg_blocks, bb_to_cfg_order[bb->index]);
-}
-
-
-/* Remove a block from the worklist.  */
-
-static basic_block
-cfg_blocks_get (void)
-{
-  gcc_assert (!cfg_blocks_empty_p ());
-  int order_index = bitmap_first_set_bit (cfg_blocks);
-  bitmap_clear_bit (cfg_blocks, order_index);
-  return BASIC_BLOCK_FOR_FN (cfun, cfg_order_to_bb [order_index]);
-}
+/* Current RPO index in the iteration.  */
+static int curr_order;
 
 
 /* We have just defined a new value for VAR.  If IS_VARYING is true,
@@ -182,8 +157,15 @@ add_ssa_edge (tree var)
 	       & EDGE_EXECUTABLE))
 	continue;
 
-      if (prop_simulate_again_p (use_stmt)
-	  && bitmap_set_bit (ssa_edge_worklist, gimple_uid (use_stmt)))
+      if (!prop_simulate_again_p (use_stmt))
+	continue;
+
+      bitmap worklist;
+      if (bb_to_cfg_order[gimple_bb (use_stmt)->index] < curr_order)
+	worklist = ssa_edge_worklist_back;
+      else
+	worklist = ssa_edge_worklist;
+      if (bitmap_set_bit (worklist, gimple_uid (use_stmt)))
 	{
 	  uid_to_stmt[gimple_uid (use_stmt)] = use_stmt;
 	  if (dump_file && (dump_flags & TDF_DETAILS))
@@ -211,7 +193,11 @@ add_control_edge (edge e)
 
   e->flags |= EDGE_EXECUTABLE;
 
-  cfg_blocks_add (bb);
+  int bb_order = bb_to_cfg_order[bb->index];
+  if (bb_order < curr_order)
+    bitmap_set_bit (cfg_blocks_back, bb_order);
+  else
+    bitmap_set_bit (cfg_blocks, bb_order);
 
   if (dump_file && (dump_flags & TDF_DETAILS))
     fprintf (dump_file, "Adding destination of edge (%d -> %d) to worklist\n",
@@ -318,33 +304,6 @@ ssa_propagation_engine::simulate_stmt (gimple *stmt)
     }
 }
 
-/* Process an SSA edge worklist.  WORKLIST is the SSA edge worklist to
-   drain.  This pops statements off the given WORKLIST and processes
-   them until one statement was simulated or there are no more statements
-   on WORKLIST.  We take a pointer to WORKLIST because it may be reallocated
-   when an SSA edge is added to it in simulate_stmt.  Return true if a stmt
-   was simulated.  */
-
-void
-ssa_propagation_engine::process_ssa_edge_worklist (void)
-{
-  /* Process the next entry from the worklist.  */
-  unsigned stmt_uid = bitmap_first_set_bit (ssa_edge_worklist);
-  bitmap_clear_bit (ssa_edge_worklist, stmt_uid);
-  gimple *stmt = uid_to_stmt[stmt_uid];
-
-  /* We should not have stmts in not yet simulated BBs on the worklist.  */
-  gcc_assert (gimple_bb (stmt)->flags & BB_VISITED);
-
-  if (dump_file && (dump_flags & TDF_DETAILS))
-    {
-      fprintf (dump_file, "\nSimulating statement: ");
-      print_gimple_stmt (dump_file, stmt, 0, dump_flags);
-    }
-
-  simulate_stmt (stmt);
-}
-
 
 /* Simulate the execution of BLOCK.  Evaluate the statement associated
    with each variable reference inside the block.  */
@@ -422,6 +381,7 @@ ssa_prop_init (void)
 
   /* Worklists of SSA edges.  */
   ssa_edge_worklist = BITMAP_ALLOC (NULL);
+  ssa_edge_worklist_back = BITMAP_ALLOC (NULL);
 
   /* Worklist of basic-blocks.  */
   bb_to_cfg_order = XNEWVEC (int, last_basic_block_for_fn (cfun) + 1);
@@ -431,9 +391,7 @@ ssa_prop_init (void)
   for (int i = 0; i < n; ++i)
     bb_to_cfg_order[cfg_order_to_bb[i]] = i;
   cfg_blocks = BITMAP_ALLOC (NULL);
-
-  if (dump_file && (dump_flags & TDF_DETAILS))
-    dump_immediate_uses (dump_file);
+  cfg_blocks_back = BITMAP_ALLOC (NULL);
 
   /* Initially assume that every edge in the CFG is not executable.
      (including the edges coming out of the entry block).  Mark blocks
@@ -479,9 +437,11 @@ static void
 ssa_prop_fini (void)
 {
   BITMAP_FREE (cfg_blocks);
+  BITMAP_FREE (cfg_blocks_back);
   free (bb_to_cfg_order);
   free (cfg_order_to_bb);
   BITMAP_FREE (ssa_edge_worklist);
+  BITMAP_FREE (ssa_edge_worklist_back);
   uid_to_stmt.release ();
 }
 
@@ -796,21 +756,62 @@ ssa_propagation_engine::ssa_propagate (void)
 {
   ssa_prop_init ();
 
-  /* Iterate until the worklists are empty.  */
-  while (! cfg_blocks_empty_p ()
-	 || ! bitmap_empty_p (ssa_edge_worklist))
+  curr_order = 0;
+
+  /* Iterate until the worklists are empty.  We iterate both blocks
+     and stmts in RPO order, using sets of two worklists to first
+     complete the current iteration before iterating over backedges.  */
+  while (1)
     {
-      /* First simulate whole blocks.  */
-      if (! cfg_blocks_empty_p ())
+      int next_block_order = (bitmap_empty_p (cfg_blocks)
+			      ? -1 : bitmap_first_set_bit (cfg_blocks));
+      int next_stmt_uid = (bitmap_empty_p (ssa_edge_worklist)
+			   ? -1 : bitmap_first_set_bit (ssa_edge_worklist));
+      if (next_block_order == -1 && next_stmt_uid == -1)
 	{
-	  /* Pull the next block to simulate off the worklist.  */
-	  basic_block dest_block = cfg_blocks_get ();
-	  simulate_block (dest_block);
+	  if (bitmap_empty_p (cfg_blocks_back)
+	      && bitmap_empty_p (ssa_edge_worklist_back))
+	    break;
+
+	  if (dump_file && (dump_flags & TDF_DETAILS))
+	    fprintf (dump_file, "Regular worklists empty, now processing "
+		     "backedge destinations\n");
+	  std::swap (cfg_blocks, cfg_blocks_back);
+	  std::swap (ssa_edge_worklist, ssa_edge_worklist_back);
 	  continue;
 	}
 
-      /* Then simulate from the SSA edge worklist.  */
-      process_ssa_edge_worklist ();
+      int next_stmt_bb_order = -1;
+      gimple *next_stmt = NULL;
+      if (next_stmt_uid != -1)
+	{
+	  next_stmt = uid_to_stmt[next_stmt_uid];
+	  next_stmt_bb_order = bb_to_cfg_order[gimple_bb (next_stmt)->index];
+	}
+
+      /* Pull the next block to simulate off the worklist if it comes first.  */
+      if (next_block_order != -1
+	  && (next_stmt_bb_order == -1
+	      || next_block_order <= next_stmt_bb_order))
+	{
+	  curr_order = next_block_order;
+	  bitmap_clear_bit (cfg_blocks, next_block_order);
+	  basic_block bb
+	    = BASIC_BLOCK_FOR_FN (cfun, cfg_order_to_bb [next_block_order]);
+	  simulate_block (bb);
+	}
+      /* Else simulate from the SSA edge worklist.  */
+      else
+	{
+	  curr_order = next_stmt_bb_order;
+	  bitmap_clear_bit (ssa_edge_worklist, next_stmt_uid);
+	  if (dump_file && (dump_flags & TDF_DETAILS))
+	    {
+	      fprintf (dump_file, "\nSimulating statement: ");
+	      print_gimple_stmt (dump_file, next_stmt, 0, dump_flags);
+	    }
+	  simulate_stmt (next_stmt);
+	}
     }
 
   ssa_prop_fini ();
diff --git a/gcc/tree-ssa-propagate.h b/gcc/tree-ssa-propagate.h
index 10c48d87ff8..56e1b1c1379 100644
--- a/gcc/tree-ssa-propagate.h
+++ b/gcc/tree-ssa-propagate.h
@@ -94,9 +94,7 @@ class ssa_propagation_engine
  private:
   /* Internal implementation details.  */
   void simulate_stmt (gimple *stmt);
-  void process_ssa_edge_worklist (void);
   void simulate_block (basic_block);
-
 };
 
 class substitute_and_fold_engine


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]