This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
[PR 84149] From 3b967c133f7f62e3c2951a971d8690d242364f5d Mon Sep 17 00:00:00 2001
- From: Martin Jambor <mjambor at suse dot cz>
- From: Martin Jambor <mjambor at suse dot cz>
- To: GCC Patches <gcc-patches at gcc dot gnu dot org>
- Cc: Jan Hubicka <jh at suse dot cz>
- Date: Mon, 9 Apr 2018 14:18:27 +0200
- Subject: [PR 84149] From 3b967c133f7f62e3c2951a971d8690d242364f5d Mon Sep 17 00:00:00 2001
Hi,
changes in predictors caused that LTO IPA-CP no longer clones the qsort
implementation for both of the comparators used in 505.mcf_r from Spec
2017 to be quite and this resulted in quite some slowdown of the
benchmark.
I have tried various way of teaching IPA-CP that the remaining contexts
all have a single constant, but all of them required that (I either
reorder stuff on a higher level or) I special case self-recursive edges
and so I decided to do it consistently even in the analysis phase in the
patch below. Consequently, using default parameters, IPA-CP now knows
that it is beneficial to clone for both comparators again. However, I
have verified that when I tweak --param ipa-cp-eval-threshold so that
only one clone is created, the one created "for all remaining contexts"
knows that they only contain one comparator.
Because IPA-CP now can count self-recursive edges among those bringing a
value when cloning (as opposed next time around when we come to the node
in the SCC) and we need to make sure the original node keeps its
self-recursive edge pointing to itself, create_specialized_node must
make sure that such edges are handled properly and redirect only clones
of these edges, rather than relying on cgraphclones.c machinery.
The patch passes bootstrap, LTO-bootstrap and testing on x86_64-linux, I
have also verified the 505.mcf_r regression is gone and that it can
compile all of SPEC 2017 and 2006 with LTO (well, the config I used
failed for three benchmarks, but so it did on trunk). I have also LTO
built Firefox with it and it browsed a few pages fine too. OK for
trunk?
Thanks,
Martin
2018-04-08 Martin Jambor <mjambor@suse.cz>
PR ipa/84149
* ipa-cp.c (propagate_vals_across_pass_through): Expand comment.
(cgraph_edge_brings_value_p): New parameter dest_val, check if it is
not the same as the source val.
(cgraph_edge_brings_value_p): New parameter.
(gather_edges_for_value): Pass destination value to
cgraph_edge_brings_value_p.
(perhaps_add_new_callers): Likewise.
(get_info_about_necessary_edges): Likewise and exclude values brought
only by self-recursive edges.
(create_specialized_node): Redirect only clones of self-calling edges.
(+self_recursive_pass_through_p): New function.
(find_more_scalar_values_for_callers_subset): Use it.
(find_aggregate_values_for_callers_subset): Likewise.
(known_aggs_to_agg_replacement_list): Removed.
(decide_whether_version_node): Re-calculate known constants for all
remaining context clones.
---
gcc/ipa-cp.c | 121 ++++++++++++++++++++++++++++++++++++++---------------------
1 file changed, 78 insertions(+), 43 deletions(-)
diff --git a/gcc/ipa-cp.c b/gcc/ipa-cp.c
index ee41a8d55b7..6a9a695fc2c 100644
--- a/gcc/ipa-cp.c
+++ b/gcc/ipa-cp.c
@@ -1601,7 +1601,9 @@ propagate_vals_across_pass_through (cgraph_edge *cs, ipa_jump_func *jfunc,
/* Do not create new values when propagating within an SCC because if there
are arithmetic functions with circular dependencies, there is infinite
- number of them and we would just make lattices bottom. */
+ number of them and we would just make lattices bottom. If this condition
+ is ever relaxed we have to detect self-feeding recursive calls in
+ cgraph_edge_brings_value_p in a smarter way. */
if ((ipa_get_jf_pass_through_operation (jfunc) != NOP_EXPR)
&& ipa_edge_within_scc (cs))
ret = dest_lat->set_contains_variable ();
@@ -3470,12 +3472,12 @@ same_node_or_its_all_contexts_clone_p (cgraph_node *node, cgraph_node *dest)
return info->is_all_contexts_clone && info->ipcp_orig_node == dest;
}
-/* Return true if edge CS does bring about the value described by SRC to node
- DEST or its clone for all contexts. */
+/* Return true if edge CS does bring about the value described by SRC to
+ DEST_VAL of node DEST or its clone for all contexts. */
static bool
cgraph_edge_brings_value_p (cgraph_edge *cs, ipcp_value_source<tree> *src,
- cgraph_node *dest)
+ cgraph_node *dest, ipcp_value<tree> *dest_val)
{
struct ipa_node_params *caller_info = IPA_NODE_REF (cs->caller);
enum availability availability;
@@ -3485,7 +3487,9 @@ cgraph_edge_brings_value_p (cgraph_edge *cs, ipcp_value_source<tree> *src,
|| availability <= AVAIL_INTERPOSABLE
|| caller_info->node_dead)
return false;
- if (!src->val)
+ /* At the moment we do not propagate over arithmetic jump functions in SCCs,
+ so it is safe to detect self-feeding recursive calls in this way. */
+ if (!src->val || src->val == dest_val)
return true;
if (caller_info->ipcp_orig_node)
@@ -3521,13 +3525,14 @@ cgraph_edge_brings_value_p (cgraph_edge *cs, ipcp_value_source<tree> *src,
}
}
-/* Return true if edge CS does bring about the value described by SRC to node
- DEST or its clone for all contexts. */
+/* Return true if edge CS does bring about the value described by SRC to
+ DST_VAL of node DEST or its clone for all contexts. */
static bool
cgraph_edge_brings_value_p (cgraph_edge *cs,
ipcp_value_source<ipa_polymorphic_call_context> *src,
- cgraph_node *dest)
+ cgraph_node *dest,
+ ipcp_value<ipa_polymorphic_call_context> *)
{
struct ipa_node_params *caller_info = IPA_NODE_REF (cs->caller);
cgraph_node *real_dest = cs->callee->function_symbol ();
@@ -3558,9 +3563,10 @@ get_next_cgraph_edge_clone (struct cgraph_edge *cs)
return next_edge_clone[cs->uid];
}
-/* Given VAL that is intended for DEST, iterate over all its sources and if
- they still hold, add their edge frequency and their number into *FREQUENCY
- and *CALLER_COUNT respectively. */
+/* Given VAL that is intended for DEST, iterate over all its sources and if any
+ of them is viable and hot, return true. In that case, for those that still
+ hold, add their edge frequency and their number into *FREQUENCY and
+ *CALLER_COUNT respectively. */
template <typename valtype>
static bool
@@ -3572,24 +3578,32 @@ get_info_about_necessary_edges (ipcp_value<valtype> *val, cgraph_node *dest,
int freq = 0, count = 0;
profile_count cnt = profile_count::zero ();
bool hot = false;
+ bool non_self_recursive = false;
for (src = val->sources; src; src = src->next)
{
struct cgraph_edge *cs = src->cs;
while (cs)
{
- if (cgraph_edge_brings_value_p (cs, src, dest))
+ if (cgraph_edge_brings_value_p (cs, src, dest, val))
{
count++;
freq += cs->frequency ();
if (cs->count.ipa ().initialized_p ())
cnt += cs->count.ipa ();
hot |= cs->maybe_hot_p ();
+ if (cs->caller != dest)
+ non_self_recursive = true;
}
cs = get_next_cgraph_edge_clone (cs);
}
}
+ /* If the only edges bringing a value are self-recursive ones, do not bother
+ evaluating it. */
+ if (!non_self_recursive)
+ return false;
+
*freq_sum = freq;
*count_sum = cnt;
*caller_count = count;
@@ -3613,7 +3627,7 @@ gather_edges_for_value (ipcp_value<valtype> *val, cgraph_node *dest,
struct cgraph_edge *cs = src->cs;
while (cs)
{
- if (cgraph_edge_brings_value_p (cs, src, dest))
+ if (cgraph_edge_brings_value_p (cs, src, dest, val))
ret.quick_push (cs);
cs = get_next_cgraph_edge_clone (cs);
}
@@ -3833,9 +3847,28 @@ create_specialized_node (struct cgraph_node *node,
vec_safe_push (replace_trees, replace_map);
}
}
+ auto_vec<cgraph_edge *, 2> self_recursive_calls;
+ for (i = callers.length () - 1; i >= 0; i--)
+ {
+ cgraph_edge *cs = callers[i];
+ if (cs->caller == node)
+ {
+ self_recursive_calls.safe_push (cs);
+ callers.unordered_remove (i);
+ }
+ }
new_node = node->create_virtual_clone (callers, replace_trees,
args_to_skip, "constprop");
+
+ for (unsigned j = 0; j < self_recursive_calls.length (); j++)
+ {
+ cgraph_edge *cs = next_edge_clone[self_recursive_calls[j]->uid];
+ gcc_checking_assert (cs);
+ gcc_assert (cs->caller == new_node);
+ cs->redirect_callee_duplicating_thunks (new_node);
+ }
+
ipa_set_node_agg_value_chain (new_node, aggvals);
for (av = aggvals; av; av = av->next)
new_node->maybe_create_reference (av->value, NULL);
@@ -3868,6 +3901,22 @@ create_specialized_node (struct cgraph_node *node,
return new_node;
}
+/* Return true, if JFUNC, which describes a i-th parameter of call CS, is a
+ simple no-operation pass-through function to itself. */
+
+static bool
+self_recursive_pass_through_p (cgraph_edge *cs, ipa_jump_func *jfunc, int i)
+{
+ enum availability availability;
+ if (cs->caller == cs->callee->function_symbol (&availability)
+ && availability > AVAIL_INTERPOSABLE
+ && jfunc->type == IPA_JF_PASS_THROUGH
+ && ipa_get_jf_pass_through_operation (jfunc) == NOP_EXPR
+ && ipa_get_jf_pass_through_formal_id (jfunc) == i)
+ return true;
+ return false;
+}
+
/* Given a NODE, and a subset of its CALLERS, try to populate blanks slots in
KNOWN_CSTS with constants that are also known for all of the CALLERS. */
@@ -3895,6 +3944,9 @@ find_more_scalar_values_for_callers_subset (struct cgraph_node *node,
struct ipa_jump_func *jump_func;
tree t;
+ if (IPA_NODE_REF (cs->caller)->node_dead)
+ continue;
+
if (i >= ipa_get_cs_argument_count (IPA_EDGE_REF (cs))
|| (i == 0
&& call_passes_through_thunk_p (cs))
@@ -3905,6 +3957,9 @@ find_more_scalar_values_for_callers_subset (struct cgraph_node *node,
break;
}
jump_func = ipa_get_ith_jump_func (IPA_EDGE_REF (cs), i);
+ if (self_recursive_pass_through_p (cs, jump_func, i))
+ continue;
+
t = ipa_value_from_jfunc (IPA_NODE_REF (cs->caller), jump_func, type);
if (!t
|| (newval
@@ -4297,6 +4352,10 @@ find_aggregate_values_for_callers_subset (struct cgraph_node *node,
FOR_EACH_VEC_ELT (callers, j, cs)
{
+ struct ipa_jump_func *jfunc
+ = ipa_get_ith_jump_func (IPA_EDGE_REF (cs), i);
+ if (self_recursive_pass_through_p (cs, jfunc, i))
+ continue;
inter = intersect_aggregates_with_edge (cs, i, inter);
if (!inter.exists ())
@@ -4327,33 +4386,6 @@ find_aggregate_values_for_callers_subset (struct cgraph_node *node,
return res;
}
-/* Turn KNOWN_AGGS into a list of aggregate replacement values. */
-
-static struct ipa_agg_replacement_value *
-known_aggs_to_agg_replacement_list (vec<ipa_agg_jump_function> known_aggs)
-{
- struct ipa_agg_replacement_value *res;
- struct ipa_agg_replacement_value **tail = &res;
- struct ipa_agg_jump_function *aggjf;
- struct ipa_agg_jf_item *item;
- int i, j;
-
- FOR_EACH_VEC_ELT (known_aggs, i, aggjf)
- FOR_EACH_VEC_SAFE_ELT (aggjf->items, j, item)
- {
- struct ipa_agg_replacement_value *v;
- v = ggc_alloc<ipa_agg_replacement_value> ();
- v->index = i;
- v->offset = item->offset;
- v->value = item->value;
- v->by_ref = aggjf->by_ref;
- *tail = v;
- tail = &v->next;
- }
- *tail = NULL;
- return res;
-}
-
/* Determine whether CS also brings all scalar values that the NODE is
specialized for. */
@@ -4479,7 +4511,7 @@ perhaps_add_new_callers (cgraph_node *node, ipcp_value<valtype> *val)
struct cgraph_edge *cs = src->cs;
while (cs)
{
- if (cgraph_edge_brings_value_p (cs, src, node)
+ if (cgraph_edge_brings_value_p (cs, src, node, val)
&& cgraph_edge_brings_all_scalars_for_node (cs, val->spec_node)
&& cgraph_edge_brings_all_agg_vals_for_node (cs, val->spec_node))
{
@@ -4744,6 +4776,10 @@ decide_whether_version_node (struct cgraph_node *node)
"for all known contexts.\n", node->dump_name ());
callers = node->collect_callers ();
+ find_more_scalar_values_for_callers_subset (node, known_csts, callers);
+ find_more_contexts_for_caller_subset (node, &known_contexts, callers);
+ ipa_agg_replacement_value *aggvals
+ = find_aggregate_values_for_callers_subset (node, callers);
if (!known_contexts_useful_p (known_contexts))
{
@@ -4751,8 +4787,7 @@ decide_whether_version_node (struct cgraph_node *node)
known_contexts = vNULL;
}
clone = create_specialized_node (node, known_csts, known_contexts,
- known_aggs_to_agg_replacement_list (known_aggs),
- callers);
+ aggvals, callers);
info = IPA_NODE_REF (node);
info->do_clone_for_all_contexts = false;
IPA_NODE_REF (clone)->is_all_contexts_clone = true;
--
2.16.3