[PATCH] Improve RPO VN for PR91257
Richard Biener
rguenther@suse.de
Mon Jul 29 13:07:00 GMT 2019
The following patch addresses a known issue with managing availability
in RPO VN - this is just the first time a testcase actually shows
measurable cycles here. It's hammering libc malloc/free for
allocating one-element vectors. So the following finally goes the
way the comment suggests and hooks the availability chain off
vn_ssa_aux, also converting it to a single-linked list of elements
we allocate from the same obstack. Having only a single hash-table
is probably also more cache-friendly.
This cuts down VN time by ~20% on the testcase (but overall
compile-time much less).
The remaining "slowness" is GC, out-of-SSA and the SSA propagator
passes (via the SSA propagator engine). All known and hard to fix.
Bootstrap / regtest running on x86_64-unknown-linux-gnu.
Richard.
2019-07-29 Richard Biener <rguenther@suse.de>
PR tree-optimization/91257
* tree-ssa-sccvn.h (struct vn_avail): New.
(struct vn_ssa_aux): Add avail member.
* tree-ssa-sccvn.c (class rpo_elim): Remove m_rpo_avail
member, add m_avail_freelist one.
(rpo_elim::~rpo_elim): Remove.
(rpo_elim::eliminate_avail): Adjust to new avail tracking
data structure.
(rpo_elim::eliminate_push_avail): Likewise.
(do_unwind): Likewise.
(do_rpo_vn): Likewise.
Index: gcc/tree-ssa-sccvn.h
===================================================================
--- gcc/tree-ssa-sccvn.h (revision 273792)
+++ gcc/tree-ssa-sccvn.h (working copy)
@@ -193,6 +193,25 @@ vn_constant_eq_with_type (tree c1, tree
&& types_compatible_p (TREE_TYPE (c1), TREE_TYPE (c2)));
}
+/* Instead of having a local availability lattice for each basic-block
+ and availability at X defined as union of the local availabilities
+ at X and its dominators we're turning this upside down and track
+ availability per value given values are usually made available at very
+ few points.
+ So we have a chain of LOCATION, LEADER entries where LOCATION is
+ specifying the basic-block LEADER is made available for VALUE.
+ We prepend to this chain in RPO order thus for iteration we can simply
+ remove the last entries.
+ LOCATION is the basic-block index and LEADER is its SSA name version. */
+struct vn_avail
+{
+ vn_avail *next;
+ /* The basic-block LEADER is made available. */
+ int location;
+ /* The LEADER for the value we are chained on. */
+ int leader;
+};
+
typedef struct vn_ssa_aux
{
/* SSA name this vn_ssa_aux is associated with in the lattice. */
@@ -202,6 +221,10 @@ typedef struct vn_ssa_aux
/* Statements to insert if needs_insertion is true. */
gimple_seq expr;
+ /* AVAIL entries, last in RPO order is first. This is only tracked
+ for SSA names also serving as values (NAME == VALNUM). */
+ vn_avail *avail;
+
/* Unique identifier that all expressions with the same value have. */
unsigned int value_id;
Index: gcc/tree-ssa-sccvn.c
===================================================================
--- gcc/tree-ssa-sccvn.c (revision 273792)
+++ gcc/tree-ssa-sccvn.c (working copy)
@@ -2126,36 +2126,17 @@ class rpo_elim : public eliminate_dom_wa
{
public:
rpo_elim(basic_block entry_)
- : eliminate_dom_walker (CDI_DOMINATORS, NULL), entry (entry_) {}
- ~rpo_elim();
+ : eliminate_dom_walker (CDI_DOMINATORS, NULL), entry (entry_),
+ m_avail_freelist (NULL) {}
virtual tree eliminate_avail (basic_block, tree op);
virtual void eliminate_push_avail (basic_block, tree);
basic_block entry;
- /* Instead of having a local availability lattice for each
- basic-block and availability at X defined as union of
- the local availabilities at X and its dominators we're
- turning this upside down and track availability per
- value given values are usually made available at very
- few points (at least one).
- So we have a value -> vec<location, leader> map where
- LOCATION is specifying the basic-block LEADER is made
- available for VALUE. We push to this vector in RPO
- order thus for iteration we can simply pop the last
- entries.
- LOCATION is the basic-block index and LEADER is its
- SSA name version. */
- /* ??? We'd like to use auto_vec here with embedded storage
- but that doesn't play well until we can provide move
- constructors and use std::move on hash-table expansion.
- So for now this is a bit more expensive than necessary.
- We eventually want to switch to a chaining scheme like
- for hashtable entries for unwinding which would make
- making the vector part of the vn_ssa_aux structure possible. */
- typedef hash_map<tree, vec<std::pair<int, int> > > rpo_avail_t;
- rpo_avail_t m_rpo_avail;
+ /* Freelist of avail entries which are allocated from the vn_ssa_aux
+ obstack. */
+ vn_avail *m_avail_freelist;
};
/* Global RPO state for access from hooks. */
@@ -6197,14 +6178,6 @@ vn_lookup_simplify_result (gimple_match_
return res;
}
-rpo_elim::~rpo_elim ()
-{
- /* Release the avail vectors. */
- for (rpo_avail_t::iterator i = m_rpo_avail.begin ();
- i != m_rpo_avail.end (); ++i)
- (*i).second.release ();
-}
-
/* Return a leader for OPs value that is valid at BB. */
tree
@@ -6220,16 +6193,15 @@ rpo_elim::eliminate_avail (basic_block b
{
if (SSA_NAME_IS_DEFAULT_DEF (valnum))
return valnum;
- vec<std::pair<int, int> > *av = m_rpo_avail.get (valnum);
- if (!av || av->is_empty ())
+ vn_avail *av = VN_INFO (valnum)->avail;
+ if (!av)
return NULL_TREE;
- int i = av->length () - 1;
- if ((*av)[i].first == bb->index)
+ if (av->location == bb->index)
/* On tramp3d 90% of the cases are here. */
- return ssa_name ((*av)[i].second);
+ return ssa_name (av->leader);
do
{
- basic_block abb = BASIC_BLOCK_FOR_FN (cfun, (*av)[i].first);
+ basic_block abb = BASIC_BLOCK_FOR_FN (cfun, av->location);
/* ??? During elimination we have to use availability at the
definition site of a use we try to replace. This
is required to not run into inconsistencies because
@@ -6243,7 +6215,7 @@ rpo_elim::eliminate_avail (basic_block b
executable. */
if (dominated_by_p_w_unex (bb, abb))
{
- tree leader = ssa_name ((*av)[i].second);
+ tree leader = ssa_name (av->leader);
/* Prevent eliminations that break loop-closed SSA. */
if (loops_state_satisfies_p (LOOP_CLOSED_SSA)
&& ! SSA_NAME_IS_DEFAULT_DEF (leader)
@@ -6265,8 +6237,9 @@ rpo_elim::eliminate_avail (basic_block b
/* ??? Can we somehow skip to the immediate dominator
RPO index (bb_to_rpo)? Again, maybe not worth, on
tramp3d the worst number of elements in the vector is 9. */
+ av = av->next;
}
- while (--i >= 0);
+ while (av);
}
else if (valnum != VN_TOP)
/* valnum is is_gimple_min_invariant. */
@@ -6290,15 +6263,19 @@ rpo_elim::eliminate_push_avail (basic_bl
print_generic_expr (dump_file, valnum);
fprintf (dump_file, "\n");
}
- bool existed;
- vec<std::pair<int, int> > &av = m_rpo_avail.get_or_insert (valnum, &existed);
- if (!existed)
- {
- new (&av) vec<std::pair<int, int> >;
- av = vNULL;
- av.reserve_exact (2);
+ vn_ssa_aux_t value = VN_INFO (valnum);
+ vn_avail *av;
+ if (m_avail_freelist)
+ {
+ av = m_avail_freelist;
+ m_avail_freelist = m_avail_freelist->next;
}
- av.safe_push (std::make_pair (bb->index, SSA_NAME_VERSION (leader)));
+ else
+ av = XOBNEW (&vn_ssa_aux_obstack, vn_avail);
+ av->location = bb->index;
+ av->leader = SSA_NAME_VERSION (leader);
+ av->next = value->avail;
+ value->avail = av;
}
/* Valueization hook for RPO VN plus required state. */
@@ -6780,15 +6757,17 @@ do_unwind (unwind_state *to, int rpo_idx
/* Prune [rpo_idx, ] from avail. */
/* ??? This is O(number-of-values-in-region) which is
O(region-size) rather than O(iteration-piece). */
- for (rpo_elim::rpo_avail_t::iterator i
- = avail.m_rpo_avail.begin ();
- i != avail.m_rpo_avail.end (); ++i)
+ for (hash_table<vn_ssa_aux_hasher>::iterator i = vn_ssa_aux_hash->begin ();
+ i != vn_ssa_aux_hash->end (); ++i)
{
- while (! (*i).second.is_empty ())
+ while ((*i)->avail)
{
- if (bb_to_rpo[(*i).second.last ().first] < rpo_idx)
+ if (bb_to_rpo[(*i)->avail->location] < rpo_idx)
break;
- (*i).second.pop ();
+ vn_avail *av = (*i)->avail;
+ (*i)->avail = (*i)->avail->next;
+ av->next = avail.m_avail_freelist;
+ avail.m_avail_freelist = av;
}
}
}
@@ -7184,11 +7163,16 @@ do_rpo_vn (function *fn, edge entry, bit
max_visited = rpo_state[i].visited;
}
unsigned nvalues = 0, navail = 0;
- for (rpo_elim::rpo_avail_t::iterator i = avail.m_rpo_avail.begin ();
- i != avail.m_rpo_avail.end (); ++i)
+ for (hash_table<vn_ssa_aux_hasher>::iterator i = vn_ssa_aux_hash->begin ();
+ i != vn_ssa_aux_hash->end (); ++i)
{
nvalues++;
- navail += (*i).second.length ();
+ vn_avail *av = (*i)->avail;
+ while (av)
+ {
+ navail++;
+ av = av->next;
+ }
}
statistics_counter_event (cfun, "RPO blocks", n);
statistics_counter_event (cfun, "RPO blocks visited", nblk);
More information about the Gcc-patches
mailing list