This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
[PATCH][RFC] Fix PR63155 (some more)
- From: Richard Biener <rguenther at suse dot de>
- To: gcc-patches at gcc dot gnu dot org
- Date: Wed, 19 Sep 2018 15:06:30 +0200 (CEST)
- Subject: [PATCH][RFC] Fix PR63155 (some more)
The second testcase in the above PR runs into our O(N) bitmap element
search limitation and spends 8s (60%) of the compile-time in the SSA propagator
engine (when optimizing). The patch improves that to 0.9s (15%). For the
first testcase it isn't that bad but still the patch improves CCP from 36% to
14%.
The "solution" is to use sbitmap instead of a bitmap to avoid
the linearity when doing add_ssa_edge. We pay for that (but not
actually with the testcases) with a linear-time bitmap_first_set_bit
in process_ssa_edge_worklist. I do not (yet...) have a testcase
that overall gets slower with this approach. I suppose using
std::set<int> would "solve" the complexity issue but we'd pay
back with horribly inefficient memory use. Similarly with
our sparse bitmap implementation which lacks an ordered
first_set_bit (it only can get any set bit fast, breaking optimal
iteration order).
If we'd only had a O(log n) search sparse bitmap implementation ...
(Steven posted patches to switch bitmap from/to such one but IIRC
that at least lacked bitmap_first_set_bit).
Bootstrapped and tested on x86_64-unknown-linux-gnu.
OK for trunk?
Thanks,
Richard.
2018-09-19 Richard Biener <rguenther@suse.de>
PR tree-optimization/63155
* tree-ssa-propagate.h
(ssa_propagation_engine::process_ssa_edge_worklist): Change
prototype.
* tree-ssa-propagate.c (ssa_edge_worklist): Turn into an sbitmap.
(add_ssa_edge): Adjust.
(ssa_propagation_engine::process_ssa_edge_worklist): If there
was nothing to do return false.
(ssa_prop_init): Allocate and clear ssa_edge_worklist.
(ssa_prop_fini): Adjust.
(ssa_propagation_engine::ssa_propagate): Elide the check
for an empty ssa_edge_worklist by using the
process_ssa_edge_worklist return value.
Index: gcc/tree-ssa-propagate.h
===================================================================
--- gcc/tree-ssa-propagate.h (revision 264418)
+++ gcc/tree-ssa-propagate.h (working copy)
@@ -94,7 +94,7 @@ class ssa_propagation_engine
private:
/* Internal implementation details. */
void simulate_stmt (gimple *stmt);
- void process_ssa_edge_worklist (void);
+ bool process_ssa_edge_worklist (void);
void simulate_block (basic_block);
};
Index: gcc/tree-ssa-propagate.c
===================================================================
--- gcc/tree-ssa-propagate.c (revision 264418)
+++ gcc/tree-ssa-propagate.c (working copy)
@@ -119,7 +119,7 @@ static int *cfg_order_to_bb;
definition has changed. SSA edges are def-use edges in the SSA
web. For each D-U edge, we store the target statement or PHI node
UID in a bitmap. UIDs order stmts in execution order. */
-static bitmap ssa_edge_worklist;
+static sbitmap ssa_edge_worklist;
static vec<gimple *> uid_to_stmt;
/* Return true if the block worklist empty. */
@@ -175,8 +175,9 @@ add_ssa_edge (tree var)
continue;
if (prop_simulate_again_p (use_stmt)
- && bitmap_set_bit (ssa_edge_worklist, gimple_uid (use_stmt)))
+ && !bitmap_bit_p (ssa_edge_worklist, gimple_uid (use_stmt)))
{
+ bitmap_set_bit (ssa_edge_worklist, gimple_uid (use_stmt));
uid_to_stmt[gimple_uid (use_stmt)] = use_stmt;
if (dump_file && (dump_flags & TDF_DETAILS))
{
@@ -317,11 +318,14 @@ ssa_propagation_engine::simulate_stmt (g
when an SSA edge is added to it in simulate_stmt. Return true if a stmt
was simulated. */
-void
+bool
ssa_propagation_engine::process_ssa_edge_worklist (void)
{
/* Process the next entry from the worklist. */
- unsigned stmt_uid = bitmap_first_set_bit (ssa_edge_worklist);
+ int stmt_uid = bitmap_first_set_bit (ssa_edge_worklist);
+ if (stmt_uid == -1)
+ return false;
+
bitmap_clear_bit (ssa_edge_worklist, stmt_uid);
gimple *stmt = uid_to_stmt[stmt_uid];
@@ -335,6 +339,7 @@ ssa_propagation_engine::process_ssa_edge
}
simulate_stmt (stmt);
+ return true;
}
@@ -412,9 +417,6 @@ ssa_prop_init (void)
edge_iterator ei;
basic_block bb;
- /* Worklists of SSA edges. */
- ssa_edge_worklist = BITMAP_ALLOC (NULL);
-
/* Worklist of basic-blocks. */
bb_to_cfg_order = XNEWVEC (int, last_basic_block_for_fn (cfun) + 1);
cfg_order_to_bb = XNEWVEC (int, n_basic_blocks_for_fn (cfun));
@@ -455,6 +457,10 @@ ssa_prop_init (void)
}
uid_to_stmt.safe_grow (gimple_stmt_max_uid (cfun));
+ /* Worklists of SSA edges. */
+ ssa_edge_worklist = sbitmap_alloc (gimple_stmt_max_uid (cfun));
+ bitmap_clear (ssa_edge_worklist);
+
/* Seed the algorithm by adding the successors of the entry block to the
edge worklist. */
FOR_EACH_EDGE (e, ei, ENTRY_BLOCK_PTR_FOR_FN (cfun)->succs)
@@ -473,7 +479,8 @@ ssa_prop_fini (void)
BITMAP_FREE (cfg_blocks);
free (bb_to_cfg_order);
free (cfg_order_to_bb);
- BITMAP_FREE (ssa_edge_worklist);
+ sbitmap_free (ssa_edge_worklist);
+ ssa_edge_worklist = NULL;
uid_to_stmt.release ();
}
@@ -789,8 +796,7 @@ ssa_propagation_engine::ssa_propagate (v
ssa_prop_init ();
/* Iterate until the worklists are empty. */
- while (! cfg_blocks_empty_p ()
- || ! bitmap_empty_p (ssa_edge_worklist))
+ while (1)
{
/* First simulate whole blocks. */
if (! cfg_blocks_empty_p ())
@@ -802,7 +808,10 @@ ssa_propagation_engine::ssa_propagate (v
}
/* Then simulate from the SSA edge worklist. */
- process_ssa_edge_worklist ();
+ if (process_ssa_edge_worklist ())
+ continue;
+
+ break;
}
ssa_prop_fini ();