[PATCH] Support multi-versioning on self-recursive function (ipa/92133)
Feng Xue OS
fxue@os.amperecomputing.com
Thu Oct 17 08:35:00 GMT 2019
IPA does not allow constant propagation on parameter that is used to control
function recursion.
recur_fn (i)
{
if ( !terminate_recursion (i))
{
...
recur_fn (i + 1);
...
}
...
}
This patch is composed to enable multi-versioning for self-recursive function,
and versioning copies is limited by a specified option.
Feng
---
diff --git a/gcc/ipa-cp.c b/gcc/ipa-cp.c
index 045072e02ec..6255a766e4d 100644
--- a/gcc/ipa-cp.c
+++ b/gcc/ipa-cp.c
@@ -229,7 +229,9 @@ public:
inline bool set_contains_variable ();
bool add_value (valtype newval, cgraph_edge *cs,
ipcp_value<valtype> *src_val = NULL,
- int src_idx = 0, HOST_WIDE_INT offset = -1);
+ int src_idx = 0, HOST_WIDE_INT offset = -1,
+ ipcp_value<valtype> **val_pos_p = NULL,
+ bool unlimited = false);
void print (FILE * f, bool dump_sources, bool dump_benefits);
};
@@ -1579,22 +1581,37 @@ allocate_and_init_ipcp_value (ipa_polymorphic_call_context source)
/* Try to add NEWVAL to LAT, potentially creating a new ipcp_value for it. CS,
SRC_VAL SRC_INDEX and OFFSET are meant for add_source and have the same
meaning. OFFSET -1 means the source is scalar and not a part of an
- aggregate. */
+ aggregate. If non-NULL, VAL_POS_P specifies position in value list,
+ after which newly created ipcp_value will be inserted, and it is also
+ used to record address of the added ipcp_value before function returns.
+ UNLIMITED means whether value count should not exceed the limit given
+ by PARAM_IPA_CP_VALUE_LIST_SIZE. */
template <typename valtype>
bool
ipcp_lattice<valtype>::add_value (valtype newval, cgraph_edge *cs,
ipcp_value<valtype> *src_val,
- int src_idx, HOST_WIDE_INT offset)
+ int src_idx, HOST_WIDE_INT offset,
+ ipcp_value<valtype> **val_pos_p,
+ bool unlimited)
{
ipcp_value<valtype> *val;
+ if (val_pos_p)
+ {
+ for (val = values; val && val != *val_pos_p; val = val->next);
+ gcc_checking_assert (val);
+ }
+
if (bottom)
return false;
for (val = values; val; val = val->next)
if (values_equal_for_ipcp_p (val->value, newval))
{
+ if (val_pos_p)
+ *val_pos_p = val;
+
if (ipa_edge_within_scc (cs))
{
ipcp_value_source<valtype> *s;
@@ -1609,7 +1626,8 @@ ipcp_lattice<valtype>::add_value (valtype newval, cgraph_edge *cs,
return false;
}
- if (values_count == PARAM_VALUE (PARAM_IPA_CP_VALUE_LIST_SIZE))
+ if (!unlimited
+ && values_count == PARAM_VALUE (PARAM_IPA_CP_VALUE_LIST_SIZE))
{
/* We can only free sources, not the values themselves, because sources
of other values in this SCC might point to them. */
@@ -1623,6 +1641,9 @@ ipcp_lattice<valtype>::add_value (valtype newval, cgraph_edge *cs,
}
}
+ if (val_pos_p)
+ *val_pos_p = NULL;
+
values = NULL;
return set_to_bottom ();
}
@@ -1630,8 +1651,54 @@ ipcp_lattice<valtype>::add_value (valtype newval, cgraph_edge *cs,
values_count++;
val = allocate_and_init_ipcp_value (newval);
val->add_source (cs, src_val, src_idx, offset);
- val->next = values;
- values = val;
+ if (val_pos_p)
+ {
+ val->next = (*val_pos_p)->next;
+ (*val_pos_p)->next = val;
+ *val_pos_p = val;
+ }
+ else
+ {
+ val->next = values;
+ values = val;
+ }
+
+ return true;
+}
+
+/* Return true, if a ipcp_value VAL is orginated from parameter value of
+ self-feeding recursive function by applying non-passthrough arithmetic
+ transformation. */
+
+static bool
+self_recursively_generated_p (ipcp_value<tree> *val)
+{
+ class ipa_node_params *info = NULL;
+
+ for (ipcp_value_source<tree> *src = val->sources; src; src = src->next)
+ {
+ cgraph_edge *cs = src->cs;
+
+ if (!src->val || cs->caller != cs->callee->function_symbol ()
+ || src->val == val)
+ return false;
+
+ if (!info)
+ info = IPA_NODE_REF (cs->caller);
+
+ class ipcp_param_lattices *plats = ipa_get_parm_lattices (info,
+ src->index);
+ ipcp_lattice<tree> *src_lat = src->offset == -1 ? &plats->itself
+ : plats->aggs;
+ ipcp_value<tree> *src_val;
+
+ for (src_val = src_lat->values; src_val && src_val != val;
+ src_val = src_val->next);
+
+ if (!src_val)
+ return false;
+ }
+
return true;
}
@@ -1649,20 +1716,72 @@ propagate_vals_across_pass_through (cgraph_edge *cs, ipa_jump_func *jfunc,
ipcp_value<tree> *src_val;
bool ret = false;
- /* Do not create new values when propagating within an SCC because if there
- are arithmetic functions with circular dependencies, there is infinite
- number of them and we would just make lattices bottom. If this condition
- is ever relaxed we have to detect self-feeding recursive calls in
- cgraph_edge_brings_value_p in a smarter way. */
+ /* Due to circular dependencies, propagating within an SCC through arithmetic
+ transformation would create infinite number of values. But for
+ self-feeding recursive function, we could allow propagation in a limited
+ count, and this can enable a simple kind of recursive function versioning.
+ For other scenario, we would just make lattices bottom. */
if ((ipa_get_jf_pass_through_operation (jfunc) != NOP_EXPR)
&& ipa_edge_within_scc (cs))
- ret = dest_lat->set_contains_variable ();
+ {
+ if (src_lat != dest_lat)
+ return dest_lat->set_contains_variable ();
+
+ auto_vec<ipcp_value<tree> *, 8> val_seeds;
+
+ for (src_val = src_lat->values; src_val; src_val = src_val->next)
+ {
+ /* Now we do not use self-recursively generated value as propagation
+ source, this is absolutely conservative, but could avoid explosion
+ of lattice's value space, especially when one recursive function
+ calls another recursive. */
+ if (self_recursively_generated_p (src_val))
+ {
+ /* If the lattice has already been propagated for the call site,
+ no need to do that again. */
+ for (ipcp_value_source<tree> *s = src_val->sources; s;
+ s = s->next)
+ if (s->cs == cs)
+ return dest_lat->set_contains_variable ();
+ }
+ else
+ val_seeds.safe_push (src_val);
+ }
+
+ int clone_copies = PARAM_VALUE (PARAM_IPA_CP_MAX_RECURSION_COPIES);
+ int i;
+
+ /* Recursively generate lattice values with a limited count. */
+ FOR_EACH_VEC_ELT (val_seeds, i, src_val)
+ {
+ for (int j = 1; j < clone_copies; j++)
+ {
+ tree cstval = ipa_get_jf_pass_through_result (jfunc,
+ src_val->value,
+ parm_type);
+ if (!cstval)
+ break;
+
+ /* Try to place the new lattice value after its source, which
+ can decrease iterations of propagate stage. */
+ ret |= dest_lat->add_value (cstval, cs, src_val, src_idx,
+ -1, &src_val, true);
+ gcc_checking_assert (src_val);
+ }
+ }
+ ret |= dest_lat->set_contains_variable ();
+ }
else
for (src_val = src_lat->values; src_val; src_val = src_val->next)
{
+ /* Now we do not use self-recursively generated value as propagation
+ source, otherwise it is easy to make value space of normal lattice
+ overflow. */
+ if (self_recursively_generated_p (src_val))
+ continue;
+
tree cstval = ipa_get_jf_pass_through_result (jfunc, src_val->value,
parm_type);
-
if (cstval)
ret |= dest_lat->add_value (cstval, cs, src_val, src_idx);
else
@@ -3635,6 +3754,9 @@ get_info_about_necessary_edges (ipcp_value<valtype> *val, cgraph_node *dest,
hot |= cs->maybe_hot_p ();
if (cs->caller != dest)
non_self_recursive = true;
+ else if (src->val)
+ gcc_assert (values_equal_for_ipcp_p (src->val->value,
+ val->value));
}
cs = get_next_cgraph_edge_clone (cs);
}
diff --git a/gcc/params.def b/gcc/params.def
index 322c37f8b96..3e242e09f01 100644
--- a/gcc/params.def
+++ b/gcc/params.def
@@ -1135,6 +1135,11 @@ DEFPARAM (PARAM_IPA_CP_RECURSION_PENALTY,
"are evaluated for cloning.",
40, 0, 100)
+DEFPARAM (PARAM_IPA_CP_MAX_RECURSION_COPIES,
+ "ipa-cp-max-recursion-copies",
+ "Maximum function versioning copies for a self-recursive function.",
+ 8, 0, 0)
+
DEFPARAM (PARAM_IPA_CP_SINGLE_CALL_PENALTY,
"ipa-cp-single-call-penalty",
"Percentage penalty functions containing a single call to another "
diff --git a/gcc/testsuite/gcc.dg/ipa/ipa-clone-2.c b/gcc/testsuite/gcc.dg/ipa/ipa-clone-2.c
new file mode 100644
index 00000000000..7c1c01047c6
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/ipa/ipa-clone-2.c
@@ -0,0 +1,47 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -fdump-ipa-cp-details -fno-early-inlining" } */
+
+int fn();
+
+int data[100];
+
+int recur_fn (int i)
+{
+ int j;
+
+ if (i == 6)
+ {
+ fn();
+ fn();
+ fn();
+ fn();
+ fn();
+ fn();
+ fn();
+ fn();
+ fn();
+ fn();
+ fn();
+ fn();
+ return 10;
+ }
+
+ data[i] = i;
+
+ for (j = 0; j < 100; j++)
+ recur_fn (i + 1);
+
+ return i;
+}
+
+int main ()
+{
+ int i;
+
+ for (i = 0; i < 100; i++)
+ recur_fn (1) + recur_fn (-5);
+
+ return 1;
+}
+
+/* { dg-final { scan-ipa-dump-times "Creating a specialized node of recur_fn/\[0-9\]*\\." 12 "cp" } } */
--
2.17.1
More information about the Gcc-patches
mailing list