This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[PATCH][RFC] Fix PR63155 (some more)


The second testcase in the above PR runs into our O(N) bitmap element
search limitation and spends 8s (60%) of the compile-time in the SSA propagator
engine (when optimizing).  The patch improves that to 0.9s (15%).  For the 
first testcase it isn't that bad but still the patch improves CCP from 36% to 
14%.

The "solution" is to use sbitmap instead of a bitmap to avoid
the linearity when doing add_ssa_edge.  We pay for that (but not
actually with the testcases) with a linear-time bitmap_first_set_bit
in process_ssa_edge_worklist.  I do not (yet...) have a testcase
that overall gets slower with this approach.  I suppose using
std::set<int> would "solve" the complexity issue but we'd pay
back with horribly inefficient memory use.  Similarly with
our sparse bitmap implementation which lacks an ordered
first_set_bit (it only can get any set bit fast, breaking optimal
iteration order).

If we'd only had a O(log n) search sparse bitmap implementation ...
(Steven posted patches to switch bitmap from/to such one but IIRC
that at least lacked bitmap_first_set_bit).

Bootstrapped and tested on x86_64-unknown-linux-gnu.

OK for trunk?

Thanks,
Richard.

2018-09-19  Richard Biener  <rguenther@suse.de>

	PR tree-optimization/63155
	* tree-ssa-propagate.h
	(ssa_propagation_engine::process_ssa_edge_worklist): Change
	prototype.
	* tree-ssa-propagate.c (ssa_edge_worklist): Turn into an sbitmap.
	(add_ssa_edge): Adjust.
	(ssa_propagation_engine::process_ssa_edge_worklist): If there
	was nothing to do return false.
	(ssa_prop_init): Allocate and clear ssa_edge_worklist.
	(ssa_prop_fini): Adjust.
	(ssa_propagation_engine::ssa_propagate): Elide the check
	for an empty ssa_edge_worklist by using the
	process_ssa_edge_worklist return value.

Index: gcc/tree-ssa-propagate.h
===================================================================
--- gcc/tree-ssa-propagate.h	(revision 264418)
+++ gcc/tree-ssa-propagate.h	(working copy)
@@ -94,7 +94,7 @@ class ssa_propagation_engine
  private:
   /* Internal implementation details.  */
   void simulate_stmt (gimple *stmt);
-  void process_ssa_edge_worklist (void);
+  bool process_ssa_edge_worklist (void);
   void simulate_block (basic_block);
 
 };
Index: gcc/tree-ssa-propagate.c
===================================================================
--- gcc/tree-ssa-propagate.c	(revision 264418)
+++ gcc/tree-ssa-propagate.c	(working copy)
@@ -119,7 +119,7 @@ static int *cfg_order_to_bb;
    definition has changed.  SSA edges are def-use edges in the SSA
    web.  For each D-U edge, we store the target statement or PHI node
    UID in a bitmap.  UIDs order stmts in execution order.   */
-static bitmap ssa_edge_worklist;
+static sbitmap ssa_edge_worklist;
 static vec<gimple *> uid_to_stmt;
 
 /* Return true if the block worklist empty.  */
@@ -175,8 +175,9 @@ add_ssa_edge (tree var)
 	continue;
 
       if (prop_simulate_again_p (use_stmt)
-	  && bitmap_set_bit (ssa_edge_worklist, gimple_uid (use_stmt)))
+	  && !bitmap_bit_p (ssa_edge_worklist, gimple_uid (use_stmt)))
 	{
+	  bitmap_set_bit (ssa_edge_worklist, gimple_uid (use_stmt));
 	  uid_to_stmt[gimple_uid (use_stmt)] = use_stmt;
 	  if (dump_file && (dump_flags & TDF_DETAILS))
 	    {
@@ -317,11 +318,14 @@ ssa_propagation_engine::simulate_stmt (g
    when an SSA edge is added to it in simulate_stmt.  Return true if a stmt
    was simulated.  */
 
-void
+bool
 ssa_propagation_engine::process_ssa_edge_worklist (void)
 {
   /* Process the next entry from the worklist.  */
-  unsigned stmt_uid = bitmap_first_set_bit (ssa_edge_worklist);
+  int stmt_uid = bitmap_first_set_bit (ssa_edge_worklist);
+  if (stmt_uid == -1)
+    return false;
+
   bitmap_clear_bit (ssa_edge_worklist, stmt_uid);
   gimple *stmt = uid_to_stmt[stmt_uid];
 
@@ -335,6 +339,7 @@ ssa_propagation_engine::process_ssa_edge
     }
 
   simulate_stmt (stmt);
+  return true;
 }
 
 
@@ -412,9 +417,6 @@ ssa_prop_init (void)
   edge_iterator ei;
   basic_block bb;
 
-  /* Worklists of SSA edges.  */
-  ssa_edge_worklist = BITMAP_ALLOC (NULL);
-
   /* Worklist of basic-blocks.  */
   bb_to_cfg_order = XNEWVEC (int, last_basic_block_for_fn (cfun) + 1);
   cfg_order_to_bb = XNEWVEC (int, n_basic_blocks_for_fn (cfun));
@@ -455,6 +457,10 @@ ssa_prop_init (void)
     }
   uid_to_stmt.safe_grow (gimple_stmt_max_uid (cfun));
 
+  /* Worklists of SSA edges.  */
+  ssa_edge_worklist = sbitmap_alloc (gimple_stmt_max_uid (cfun));
+  bitmap_clear (ssa_edge_worklist);
+
   /* Seed the algorithm by adding the successors of the entry block to the
      edge worklist.  */
   FOR_EACH_EDGE (e, ei, ENTRY_BLOCK_PTR_FOR_FN (cfun)->succs)
@@ -473,7 +479,8 @@ ssa_prop_fini (void)
   BITMAP_FREE (cfg_blocks);
   free (bb_to_cfg_order);
   free (cfg_order_to_bb);
-  BITMAP_FREE (ssa_edge_worklist);
+  sbitmap_free (ssa_edge_worklist);
+  ssa_edge_worklist = NULL;
   uid_to_stmt.release ();
 }
 
@@ -789,8 +796,7 @@ ssa_propagation_engine::ssa_propagate (v
   ssa_prop_init ();
 
   /* Iterate until the worklists are empty.  */
-  while (! cfg_blocks_empty_p ()
-	 || ! bitmap_empty_p (ssa_edge_worklist))
+  while (1)
     {
       /* First simulate whole blocks.  */
       if (! cfg_blocks_empty_p ())
@@ -802,7 +808,10 @@ ssa_propagation_engine::ssa_propagate (v
 	}
 
       /* Then simulate from the SSA edge worklist.  */
-      process_ssa_edge_worklist ();
+      if (process_ssa_edge_worklist ())
+	continue;
+
+      break;
     }
 
   ssa_prop_fini ();


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]