[PATCH] Support multi-versioning on self-recursive function (ipa/92133)

luoxhu luoxhu@linux.ibm.com
Thu Oct 24 06:17:00 GMT 2019


Hi,


On 2019/10/17 16:23, Feng Xue OS wrote:
> IPA does not allow constant propagation on parameter that is used to control
> function recursion.
> 
> recur_fn (i)
> {
>    if ( !terminate_recursion (i))
>      {
>        ...
>        recur_fn (i + 1);
>        ...
>      }
>    ...
> }
> 
> This patch is composed to enable multi-versioning for self-recursive function,
> and versioning copies is limited by a specified option.
> 
> Feng
> ---
> diff --git a/gcc/ipa-cp.c b/gcc/ipa-cp.c
> index 045072e02ec..6255a766e4d 100644
> --- a/gcc/ipa-cp.c
> +++ b/gcc/ipa-cp.c
> @@ -229,7 +229,9 @@ public:
>     inline bool set_contains_variable ();
>     bool add_value (valtype newval, cgraph_edge *cs,
>   		  ipcp_value<valtype> *src_val = NULL,
> -		  int src_idx = 0, HOST_WIDE_INT offset = -1);
> +		  int src_idx = 0, HOST_WIDE_INT offset = -1,
> +		  ipcp_value<valtype> **val_pos_p = NULL,
> +		  bool unlimited = false);
>     void print (FILE * f, bool dump_sources, bool dump_benefits);
>   };
>   
> @@ -1579,22 +1581,37 @@ allocate_and_init_ipcp_value (ipa_polymorphic_call_context source)
>   /* Try to add NEWVAL to LAT, potentially creating a new ipcp_value for it.  CS,
>      SRC_VAL SRC_INDEX and OFFSET are meant for add_source and have the same
>      meaning.  OFFSET -1 means the source is scalar and not a part of an
> -   aggregate.  */
> +   aggregate.  If non-NULL, VAL_POS_P specifies position in value list,
> +   after which newly created ipcp_value will be inserted, and it is also
> +   used to record address of the added ipcp_value before function returns.
> +   UNLIMITED means whether value count should not exceed the limit given
> +   by PARAM_IPA_CP_VALUE_LIST_SIZE.  */
>   
>   template <typename valtype>
>   bool
>   ipcp_lattice<valtype>::add_value (valtype newval, cgraph_edge *cs,
>   				  ipcp_value<valtype> *src_val,
> -				  int src_idx, HOST_WIDE_INT offset)
> +				  int src_idx, HOST_WIDE_INT offset,
> +				  ipcp_value<valtype> **val_pos_p,
> +				  bool unlimited)
>   {
>     ipcp_value<valtype> *val;
>   
> +  if (val_pos_p)
> +    {
> +      for (val = values; val && val != *val_pos_p; val = val->next);
> +      gcc_checking_assert (val);
> +    }
> +
>     if (bottom)
>       return false;
>   
>     for (val = values; val; val = val->next)
>       if (values_equal_for_ipcp_p (val->value, newval))
>         {
> +	if (val_pos_p)
> +	  *val_pos_p = val;
> +
>   	if (ipa_edge_within_scc (cs))
>   	  {
>   	    ipcp_value_source<valtype> *s;
> @@ -1609,7 +1626,8 @@ ipcp_lattice<valtype>::add_value (valtype newval, cgraph_edge *cs,
>   	return false;
>         }
>   
> -  if (values_count == PARAM_VALUE (PARAM_IPA_CP_VALUE_LIST_SIZE))
> +  if (!unlimited
> +      && values_count == PARAM_VALUE (PARAM_IPA_CP_VALUE_LIST_SIZE))
>       {
>         /* We can only free sources, not the values themselves, because sources
>   	 of other values in this SCC might point to them.   */
> @@ -1623,6 +1641,9 @@ ipcp_lattice<valtype>::add_value (valtype newval, cgraph_edge *cs,
>   	    }
>   	}
>   
> +      if (val_pos_p)
> +	*val_pos_p = NULL;
> +
>         values = NULL;
>         return set_to_bottom ();
>       }
> @@ -1630,8 +1651,54 @@ ipcp_lattice<valtype>::add_value (valtype newval, cgraph_edge *cs,
>     values_count++;
>     val = allocate_and_init_ipcp_value (newval);
>     val->add_source (cs, src_val, src_idx, offset);
> -  val->next = values;
> -  values = val;
> +  if (val_pos_p)
> +    {
> +      val->next = (*val_pos_p)->next;
> +      (*val_pos_p)->next = val;
> +      *val_pos_p = val;
> +    }
> +  else
> +    {
> +      val->next = values;
> +      values = val;
> +    }
> +
> +  return true;
> +}
> +
> +/* Return true, if a ipcp_value VAL is orginated from parameter value of
> +   self-feeding recursive function by applying non-passthrough arithmetic
> +   transformation.  */
> +
> +static bool
> +self_recursively_generated_p (ipcp_value<tree> *val)
> +{
> +  class ipa_node_params *info = NULL;
> +
> +  for (ipcp_value_source<tree> *src = val->sources; src; src = src->next)
> +    {
> +      cgraph_edge *cs = src->cs;
> +
> +      if (!src->val || cs->caller != cs->callee->function_symbol ()
> +	  || src->val == val)
> +	return false;
> +
> +      if (!info)
> +	info = IPA_NODE_REF (cs->caller);
> +
> +      class ipcp_param_lattices *plats = ipa_get_parm_lattices (info,
> +								src->index);
> +      ipcp_lattice<tree> *src_lat = src->offset == -1 ? &plats->itself
> +						      : plats->aggs;

Thanks for the patch.
This function doesn't handle the by-ref case after rebasing this patch to your
previous ipa-cp by-ref of arith patch as below (Also some conflicts need fix):

foo (int * p) { ...  return foo(*(&p) + 1); }

It will cause value explosion.

Secondly, the self_recursive doesn't check or break if the value is above than
the recursive boundary, which seems would generate redundant nodes?


IMHO, It may be helpful if adding more comments before the return and dump log
for dump-ipa-cp-all-details when adding new values and new sources like below
for debugging and others can understand the code easier? :)

newval, src_val, src_val->sources,  *src_val->sources,  caller,  callee
1, (ipcp_value<tree_node*> *) 0x12bc1828, (ipcp_value_source<tree_node*> *) 0x12bae800,  {offset = -1, cs = 0x3fffb5602768, val = 0x0, next = 0x0, index = 0}, "main", recur_fn"
2, (ipcp_value<tree_node*> *) 0x12bc1880, (ipcp_value_source<tree_node*> *) 0x12bae830,  {offset = -1, cs = 0x3fffb5602630, val = 0x12bc1828, next = 0x0, index = 0},"recur_fn","recur_fn"
...
1, (ipcp_value<tree_node*> *) 0x12bc1828, (ipcp_value_source<tree_node*> *) 0x12baea70, {offset = -1, cs = 0x3fffb5602630, val = 0x12bc1c48, next = 0x12bae800, index = 0}  "recur_fn","recur_fn"
2, (ipcp_value<tree_node*> *) 0x12bc1880, (ipcp_value_source<tree_node*> *) 0x12bae830, {offset = -1, cs = 0x3fffb5602630, val = 0x12bc1828, next = 0x0, index = 0}  "recur_fn","recur_fn"


Xiong Hu
Thanks

> +      ipcp_value<tree> *src_val;
> +
> +      for (src_val = src_lat->values; src_val && src_val != val;
> +	   src_val = src_val->next);
> +
> +      if (!src_val)
> +	return false;
> +    }
> +
>     return true;
>   }
>   
> @@ -1649,20 +1716,72 @@ propagate_vals_across_pass_through (cgraph_edge *cs, ipa_jump_func *jfunc,
>     ipcp_value<tree> *src_val;
>     bool ret = false;
>   
> -  /* Do not create new values when propagating within an SCC because if there
> -     are arithmetic functions with circular dependencies, there is infinite
> -     number of them and we would just make lattices bottom.  If this condition
> -     is ever relaxed we have to detect self-feeding recursive calls in
> -     cgraph_edge_brings_value_p in a smarter way.  */
> +  /* Due to circular dependencies, propagating within an SCC through arithmetic
> +     transformation would create infinite number of values.  But for
> +     self-feeding recursive function, we could allow propagation in a limited
> +     count, and this can enable a simple kind of recursive function versioning.
> +     For other scenario, we would just make lattices bottom.  */
>     if ((ipa_get_jf_pass_through_operation (jfunc) != NOP_EXPR)
>         && ipa_edge_within_scc (cs))
> -    ret = dest_lat->set_contains_variable ();
> +    {
> +      if (src_lat != dest_lat)
> +	return dest_lat->set_contains_variable ();
> +
> +      auto_vec<ipcp_value<tree> *, 8> val_seeds;
> +
> +      for (src_val = src_lat->values; src_val; src_val = src_val->next)
> +	{
> +	  /* Now we do not use self-recursively generated value as propagation
> +	     source, this is absolutely conservative, but could avoid explosion
> +	     of lattice's value space, especially when one recursive function
> +	     calls another recursive.  */
> +	  if (self_recursively_generated_p (src_val))
> +	    {
> +	      /* If the lattice has already been propagated for the call site,
> +		 no need to do that again.  */
> +	      for (ipcp_value_source<tree> *s = src_val->sources; s;
> +		   s = s->next)
> +		if (s->cs == cs)
> +		  return dest_lat->set_contains_variable ();
> +	    }
> +	  else
> +	    val_seeds.safe_push (src_val);
> +	}
> +
> +      int clone_copies = PARAM_VALUE (PARAM_IPA_CP_MAX_RECURSION_COPIES);
> +      int i;
> +
> +      /* Recursively generate lattice values with a limited count.  */
> +      FOR_EACH_VEC_ELT (val_seeds, i, src_val)
> +	{
> +	  for (int j = 1; j < clone_copies; j++)
> +	    {
> +	      tree cstval = ipa_get_jf_pass_through_result (jfunc,
> +	      						    src_val->value,
> +							    parm_type);
> +	      if (!cstval)
> +		break;
> +
> +	      /* Try to place the new lattice value after its source, which
> +		 can decrease iterations of propagate stage.  */
> +	      ret |= dest_lat->add_value (cstval, cs, src_val, src_idx,
> +					  -1, &src_val, true);
> +	      gcc_checking_assert (src_val);
> +	    }
> +	}
> +      ret |= dest_lat->set_contains_variable ();
> +    }
>     else
>       for (src_val = src_lat->values; src_val; src_val = src_val->next)
>         {
> +	  /* Now we do not use self-recursively generated value as propagation
> +	     source, otherwise it is easy to make value space of normal lattice
> +	     overflow.  */
> +	if (self_recursively_generated_p (src_val))
> +	  continue;
> +
>   	tree cstval = ipa_get_jf_pass_through_result (jfunc, src_val->value,
>   						      parm_type);
> -
>   	if (cstval)
>   	  ret |= dest_lat->add_value (cstval, cs, src_val, src_idx);
>   	else
> @@ -3635,6 +3754,9 @@ get_info_about_necessary_edges (ipcp_value<valtype> *val, cgraph_node *dest,
>   	      hot |= cs->maybe_hot_p ();
>   	      if (cs->caller != dest)
>   		non_self_recursive = true;
> +	      else if (src->val)
> +		gcc_assert (values_equal_for_ipcp_p (src->val->value,
> +						     val->value));
>   	    }
>   	  cs = get_next_cgraph_edge_clone (cs);
>   	}
> diff --git a/gcc/params.def b/gcc/params.def
> index 322c37f8b96..3e242e09f01 100644
> --- a/gcc/params.def
> +++ b/gcc/params.def
> @@ -1135,6 +1135,11 @@ DEFPARAM (PARAM_IPA_CP_RECURSION_PENALTY,
>   	  "are evaluated for cloning.",
>   	  40, 0, 100)
>   
> +DEFPARAM (PARAM_IPA_CP_MAX_RECURSION_COPIES,
> +	  "ipa-cp-max-recursion-copies",
> +	  "Maximum function versioning copies for a self-recursive function.",
> +	  8, 0, 0)
> +
>   DEFPARAM (PARAM_IPA_CP_SINGLE_CALL_PENALTY,
>   	  "ipa-cp-single-call-penalty",
>   	  "Percentage penalty functions containing a single call to another "
> diff --git a/gcc/testsuite/gcc.dg/ipa/ipa-clone-2.c b/gcc/testsuite/gcc.dg/ipa/ipa-clone-2.c
> new file mode 100644
> index 00000000000..7c1c01047c6
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/ipa/ipa-clone-2.c
> @@ -0,0 +1,47 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O3 -fdump-ipa-cp-details -fno-early-inlining" } */
> +
> +int fn();
> +
> +int data[100];
> +
> +int recur_fn (int i)
> +{
> +  int j;
> +
> +  if (i == 6)
> +    {
> +      fn();
> +      fn();
> +      fn();
> +      fn();
> +      fn();
> +      fn();
> +      fn();
> +      fn();
> +      fn();
> +      fn();
> +      fn();
> +      fn();
> +      return 10;
> +    }
> +
> +  data[i] = i;
> +
> +  for (j = 0; j < 100; j++)
> +    recur_fn (i + 1);
> +
> +  return i;
> +}
> +
> +int main ()
> +{
> +  int i;
> +
> +  for (i = 0; i < 100; i++)
> +    recur_fn (1) + recur_fn (-5);
> +
> +  return 1;
> +}
> +
> +/* { dg-final { scan-ipa-dump-times "Creating a specialized node of recur_fn/\[0-9\]*\\." 12 "cp" } } */
> 



More information about the Gcc-patches mailing list