[PATCH] Find constant definition for by-ref argument using dominance information (PR ipa/90401)

Richard Biener richard.guenther@gmail.com
Wed Jun 5 10:15:00 GMT 2019


On Wed, Jun 5, 2019 at 10:59 AM Feng Xue OS <fxue@os.amperecomputing.com> wrote:
>
> IPA-CP can not identify a constant by-ref argument to a function, if definition
> of the argument is not in same basic block where the callsite lies in. This is
> because IPA-CP only does local search in the callsite basic block.So this patch
> implemented an enhanced algorithm, which uses dominance information to guide
> traverse in virtual SSA web, to find out constant definitions in dominating basic
> block.
>
> Feng
>
> ---
> diff --git a/gcc/ChangeLog b/gcc/ChangeLog
> index 526ed45be89..cc076c337af 100644
> --- a/gcc/ChangeLog
> +++ b/gcc/ChangeLog
> @@ -1,3 +1,14 @@
> +2019-06-04  Feng Xue  <fxue@os.amperecomputing.com>
> +
> +       PR ipa/90401
> +       * ipa-prop.c (add_to_agg_contents_list): New function.
> +       (clobber_by_agg_contents_list_p): Likewise.
> +       (extract_mem_content): Likewise.
> +       (strictly_dominated_by_ssa_p): Likewise.
> +       (get_place_in_agg_contents_list): Delete.
> +       (determine_known_aggregate_parts): Renamed from
> +       determine_locally_known_aggregate_parts.
> +
>  2019-06-04  Segher Boessenkool  <segher@kernel.crashing.org>
>
>         * config/rs6000/constraints.md (define_register_constraint "wp"):
> diff --git a/gcc/ipa-prop.c b/gcc/ipa-prop.c
> index d86c2f3db55..405cdf7730b 100644
> --- a/gcc/ipa-prop.c
> +++ b/gcc/ipa-prop.c
> @@ -1458,7 +1458,7 @@ get_ssa_def_if_simple_copy (tree rhs)
>    return rhs;
>  }
>
> -/* Simple linked list, describing known contents of an aggregate beforere
> +/* Simple linked list, describing known contents of an aggregate before
>     call.  */
>
>  struct ipa_known_agg_contents_list
> @@ -1471,41 +1471,64 @@ struct ipa_known_agg_contents_list
>    struct ipa_known_agg_contents_list *next;
>  };
>
> -/* Find the proper place in linked list of ipa_known_agg_contents_list
> -   structures where to put a new one with the given LHS_OFFSET and LHS_SIZE,
> -   unless there is a partial overlap, in which case return NULL, or such
> -   element is already there, in which case set *ALREADY_THERE to true.  */
> +/* Add a known content item into a linked list of ipa_known_agg_contents_list,
> +   in which all elements are sorted ascendingly by offset. When ALLOW_DUP is
> +   false, insert the item only if there is no duplicate one (with same offset
> +   and size) in the list. And if the item is added, return true. */
>
> -static struct ipa_known_agg_contents_list **
> -get_place_in_agg_contents_list (struct ipa_known_agg_contents_list **list,
> -                               HOST_WIDE_INT lhs_offset,
> -                               HOST_WIDE_INT lhs_size,
> -                               bool *already_there)
> +static inline bool
> +add_to_agg_contents_list (struct ipa_known_agg_contents_list **plist,
> +                          struct ipa_known_agg_contents_list *item,
> +                          bool allow_dup = true)
>  {
> -  struct ipa_known_agg_contents_list **p = list;
> -  while (*p && (*p)->offset < lhs_offset)
> +  struct ipa_known_agg_contents_list *list = *plist;
> +
> +  for (; list; list = list->next)
>      {
> -      if ((*p)->offset + (*p)->size > lhs_offset)
> -       return NULL;
> -      p = &(*p)->next;
> +      if (list->offset > item->offset)
> +        break;
> +
> +      if (list->offset == item->offset && list->size == item->size
> +          && !allow_dup)
> +        return false;
> +
> +      plist = &list->next;
>      }
>
> -  if (*p && (*p)->offset < lhs_offset + lhs_size)
> +  item->next = list;
> +  *plist = item;
> +  return true;
> +}
> +
> +/* Check whether a given known content is clobbered by certain element in
> +   a linked list of ipa_known_agg_contents_list. A special case is that
> +   we can ignore those constant items completely same as the given item,
> +   that is they have same offset/size/value. */
> +
> +static inline bool
> +clobber_by_agg_contents_list_p (struct ipa_known_agg_contents_list *list,
> +                                struct ipa_known_agg_contents_list *item)
> +{
> +  for (; list; list = list->next)
>      {
> -      if ((*p)->offset == lhs_offset && (*p)->size == lhs_size)
> -       /* We already know this value is subsequently overwritten with
> -          something else.  */
> -       *already_there = true;
> -      else
> -       /* Otherwise this is a partial overlap which we cannot
> -          represent.  */
> -       return NULL;
> +      if (list->offset > item->offset)
> +        return list->offset < item->offset + item->size;
> +
> +      /* For the constant item, we can skip comparison with identical items in
> +         the list, because its content remains unchanged after clobbering. */
> +      if (list->offset == item->offset && list->size == item->size
> +          && list->constant && item->constant
> +          && operand_equal_p (list->constant, item->constant, 0))
> +        continue;
> +
> +      if (list->offset + list->size > item->offset)
> +        return true;
>      }
> -  return p;
> +  return false;
>  }
>
>  /* Build aggregate jump function from LIST, assuming there are exactly
> -   CONST_COUNT constant entries there and that th offset of the passed argument
> +   CONST_COUNT constant entries there and that offset of the passed argument
>     is ARG_OFFSET and store it into JFUNC.  */
>
>  static void
> @@ -1528,6 +1551,66 @@ build_agg_jump_func_from_list (struct ipa_known_agg_contents_list *list,
>      }
>  }
>
> +/* If STMT is a memory store to the object whose address is BASE, extract
> +   information (offset, size, and value) into CONTENT, and return true,
> +   otherwise we conservatively assume the whole object is modified with
> +   unknown content, and return false. CHECK_REF means that access to object
> +   is expected to be in form of MEM_REF expression. */
> +
> +static bool
> +extract_mem_content (gimple *stmt, tree base, bool check_ref,
> +                     struct ipa_known_agg_contents_list *content)
> +{
> +  HOST_WIDE_INT lhs_offset, lhs_size;
> +  tree lhs, rhs, lhs_base;
> +  bool reverse;
> +
> +  if (!gimple_assign_single_p (stmt))
> +    return false;
> +
> +  lhs = gimple_assign_lhs (stmt);
> +  rhs = gimple_assign_rhs1 (stmt);
> +
> +  if (!is_gimple_reg_type (TREE_TYPE (rhs))
> +      || TREE_CODE (lhs) == BIT_FIELD_REF
> +      || contains_bitfld_component_ref_p (lhs))
> +    return false;
> +
> +  lhs_base = get_ref_base_and_extent_hwi (lhs, &lhs_offset,
> +                                          &lhs_size, &reverse);
> +  if (!lhs_base)
> +    return false;
> +
> +  if (check_ref)
> +    {
> +      if (TREE_CODE (lhs_base) != MEM_REF
> +          || TREE_OPERAND (lhs_base, 0) != base
> +          || !integer_zerop (TREE_OPERAND (lhs_base, 1)))
> +        return false;
> +    }
> +  else if (lhs_base != base)
> +    return false;
> +
> +  rhs = get_ssa_def_if_simple_copy (rhs);
> +
> +  content->size = lhs_size;
> +  content->offset = lhs_offset;
> +  content->constant = is_gimple_ip_invariant (rhs) ? rhs : NULL_TREE;
> +  content->next = NULL;
> +
> +  return true;
> +}
> +
> +/* Return true if defintion of SSA name strictly dominates basic block BB. */
> +
> +static inline bool
> +strictly_dominated_by_ssa_p (basic_block bb, tree ssa)
> +{
> +  basic_block ssa_bb = gimple_bb (SSA_NAME_DEF_STMT (ssa));
> +
> +  return bb != ssa_bb && dominated_by_p (CDI_DOMINATORS, bb, ssa_bb);
> +}
> +
>  /* Traverse statements from CALL backwards, scanning whether an aggregate given
>     in ARG is filled in with constant values.  ARG can either be an aggregate
>     expression or a pointer to an aggregate.  ARG_TYPE is the type of the
> @@ -1535,19 +1618,22 @@ build_agg_jump_func_from_list (struct ipa_known_agg_contents_list *list,
>     subsequently stored.  */
>
>  static void
> -determine_locally_known_aggregate_parts (gcall *call, tree arg,
> -                                        tree arg_type,
> -                                        struct ipa_jump_func *jfunc)
> +determine_known_aggregate_parts (gcall *call, tree arg,
> +                                tree arg_type,
> +                                struct ipa_jump_func *jfunc)
>  {
> -  struct ipa_known_agg_contents_list *list = NULL;
> +  struct ipa_known_agg_contents_list *list = NULL, *all_list = NULL;
> +  basic_block call_bb = gimple_bb (call);
> +  hash_set <tree> visited;
> +  auto_vec <tree> worklist;
>    int item_count = 0, const_count = 0;
> +  int ipa_max_agg_items = PARAM_VALUE (PARAM_IPA_MAX_AGG_ITEMS);
>    HOST_WIDE_INT arg_offset, arg_size;
> -  gimple_stmt_iterator gsi;
>    tree arg_base;
>    bool check_ref, by_ref;
>    ao_ref r;
>
> -  if (PARAM_VALUE (PARAM_IPA_MAX_AGG_ITEMS) == 0)
> +  if (ipa_max_agg_items == 0)
>      return;
>
>    /* The function operates in three stages.  First, we prepare check_ref, r,
> @@ -1606,82 +1692,139 @@ determine_locally_known_aggregate_parts (gcall *call, tree arg,
>        ao_ref_init (&r, arg);
>      }
>
> -  /* Second stage walks back the BB, looks at individual statements and as long
> -     as it is confident of how the statements affect contents of the
> -     aggregates, it builds a sorted linked list of ipa_agg_jf_list structures
> -     describing it.  */
> -  gsi = gsi_for_stmt (call);
> -  gsi_prev (&gsi);
> -  for (; !gsi_end_p (gsi); gsi_prev (&gsi))
> -    {
> -      struct ipa_known_agg_contents_list *n, **p;
> -      gimple *stmt = gsi_stmt (gsi);
> -      HOST_WIDE_INT lhs_offset, lhs_size;
> -      tree lhs, rhs, lhs_base;
> -      bool reverse;
> -
> -      if (!stmt_may_clobber_ref_p_1 (stmt, &r))
> -       continue;
> -      if (!gimple_assign_single_p (stmt))
> -       break;
> -
> -      lhs = gimple_assign_lhs (stmt);
> -      rhs = gimple_assign_rhs1 (stmt);
> -      if (!is_gimple_reg_type (TREE_TYPE (rhs))
> -         || TREE_CODE (lhs) == BIT_FIELD_REF
> -         || contains_bitfld_component_ref_p (lhs))
> -       break;
> -
> -      lhs_base = get_ref_base_and_extent_hwi (lhs, &lhs_offset,
> -                                             &lhs_size, &reverse);
> -      if (!lhs_base)
> -       break;
> -
> -      if (check_ref)
> -       {
> -         if (TREE_CODE (lhs_base) != MEM_REF
> -             || TREE_OPERAND (lhs_base, 0) != arg_base
> -             || !integer_zerop (TREE_OPERAND (lhs_base, 1)))
> -           break;
> -       }
> -      else if (lhs_base != arg_base)
> -       {
> -         if (DECL_P (lhs_base))
> -           continue;
> -         else
> -           break;
> -       }
> -
> -      bool already_there = false;
> -      p = get_place_in_agg_contents_list (&list, lhs_offset, lhs_size,
> -                                         &already_there);
> -      if (!p)
> -       break;
> -      if (already_there)
> -       continue;
> -
> -      rhs = get_ssa_def_if_simple_copy (rhs);
> -      n = XALLOCA (struct ipa_known_agg_contents_list);
> -      n->size = lhs_size;
> -      n->offset = lhs_offset;
> -      if (is_gimple_ip_invariant (rhs))
> -       {
> -         n->constant = rhs;
> -         const_count++;
> -       }
> -      else
> -       n->constant = NULL_TREE;
> -      n->next = *p;
> -      *p = n;
> -
> -      item_count++;
> -      if (const_count == PARAM_VALUE (PARAM_IPA_MAX_AGG_ITEMS)
> -         || item_count == 2 * PARAM_VALUE (PARAM_IPA_MAX_AGG_ITEMS))
> -       break;
> -    }
> +  /* Second stage walks back from the call statement, looks at individual
> +     virtual operand and as long as it is confident of how definition
> +     statement of the virtual operand affects content of the aggregate, it
> +     builds a sorted linked list of ipa_agg_jf_list structures describing it.
> +
> +     Actually, the stage involves kind of global reachability analysis on
> +     in-memory variables definitions. A complete resolution requires iterative
> +     dataflow equation computation, which is somewhat complex and seems to be
> +     overkill to this collecting-constant task.
> +
> +     So here we use a simpler algorithm that we believe can archive nearly
> +     same result in most situations. In the algorithm, we traverse virtual SSA
> +     web backwards starting from the call, by stepping from one dominating
> +     virtual operand (its definition dominates the call) to another dominating
> +     one. Before a dom virtual operand is processed, we will ensure all those
> +     non-dom virtual operands, which are backwards reachable from previous dom
> +     virtual operand, have been processed. But only dom virtual operand is
> +     regarded as possible constant value provider to the call argument. And
> +     non-dom virtual operand is just checked to see whether it kills
> +     definitions of some dom virtual operands. */
> +
> +  for (tree dom_vuse = gimple_vuse (call); dom_vuse; )
> +    {
> +      gimple *stmt = SSA_NAME_DEF_STMT (dom_vuse);
> +      bool exist = visited.add (dom_vuse);
> +
> +      gcc_assert (!exist);
> +
> +      if (gimple_code (stmt) != GIMPLE_PHI)
> +        {
> +          if (stmt_may_clobber_ref_p_1 (stmt, &r))
> +            {
> +              struct ipa_known_agg_contents_list *content =
> +                         XALLOCA (struct ipa_known_agg_contents_list);
> +
> +              if (!extract_mem_content (stmt, arg_base, check_ref, content))
> +                break;
> +
> +              /* Now we get a dom virtual operand, and need to check whether
> +                 its value can finally reach the call. And put it into the
> +                 content list only if it is unique. */
> +              if (!clobber_by_agg_contents_list_p (all_list, content)
> +                  && add_to_agg_contents_list (&list, content, false))
> +                {
> +                  struct ipa_known_agg_contents_list *copy =
> +                             XALLOCA (struct ipa_known_agg_contents_list);
> +
> +                  *copy = *content;
> +                  content = copy;
> +
> +                  item_count +=1;
> +                  const_count += !!(content->constant);
> +
> +                  if (item_count == 2 * ipa_max_agg_items
> +                      || const_count == ipa_max_agg_items)
> +                    break;
> +                }
> +              add_to_agg_contents_list (&all_list, content);
> +            }
> +          dom_vuse = gimple_vuse (stmt);
> +          continue;
> +        }
> +
> +      /* We get to a merge point in virtual SSA web, then go through all
> +         non-dom virtual operands before handling next dom virtual operand. */
> +      worklist.safe_push (dom_vuse);
> +      dom_vuse = NULL_TREE;
> +
> +      do
> +        {
> +          tree vuse = worklist.pop ();
> +          gimple *stmt = SSA_NAME_DEF_STMT (vuse);
> +          auto_vec<tree, 6> append;
> +
> +          /* Non-dom virtual operand should not be the DEFAULT definition. */
> +          gcc_assert (!SSA_NAME_IS_DEFAULT_DEF (vuse));
> +
> +          if (gimple_code (stmt) != GIMPLE_PHI)
> +            {
> +              if (stmt_may_clobber_ref_p_1 (stmt, &r))
> +                {
> +                  struct ipa_known_agg_contents_list *content =
> +                             XALLOCA (struct ipa_known_agg_contents_list);
> +
> +                  if (!extract_mem_content (stmt, arg_base, check_ref, content))
> +                    goto finish;
> +
> +                  add_to_agg_contents_list (&all_list, content);

I don't think your PHI handling works correct.  If you have

   agg.part1 = 0;
   if ()
     agg.part2 = 1;
   call (agg);

then you seem to end up above for agg.part2 because you push that onto the
worklist below.

> +                }
> +              append.safe_push (gimple_vuse (stmt));
> +            }
> +          else
> +            {
> +              for (unsigned i = 0; i < gimple_phi_num_args (stmt); ++i)
> +                {
> +                  tree phi_arg = gimple_phi_arg_def (stmt, i);
> +
> +                  if (SSA_NAME_IS_DEFAULT_DEF (phi_arg))
> +                    goto finish;
> +
> +                  append.safe_push (phi_arg);

Not sure why you first push to append and then move parts to the
worklist below - it seems to only complicate code.

What you want to do is (probably) use get_continuation_for_phi
which for a virtual PHI node and a reference computes a
virtual operand you can continue your processing with.  Thus sth
like

   bitmap vistied = NULL;
   dom_vuse = get_continuation_for_phi (stmt, &ref, counter /* limit walking */,
  &visited, false, NULL, NULL);
  if (visited)
    BITMAP_FREE (visited);

and have ref be

  ao_ref ref = ao_ref_init (arg);

where you then can remove the worklist alltogether.

You need to limit work you want to do, otherwise it becomes quite
expensive - assign an overall limit of alias queries, get_continuation_for_phi
will decrement counter for each query it does and return NULL if
counter reaches zero.  Each extract_mem_content call should be
accounted as well.

Richard.

> +                }
> +            }
> +
> +          for (unsigned i = 0; i < append.length (); ++i)
> +            {
> +              if (visited.add (vuse = append[i]))
> +                continue;
> +
> +              if (SSA_NAME_IS_DEFAULT_DEF (vuse)
> +                  || strictly_dominated_by_ssa_p (call_bb, vuse))
> +                {
> +                  /* Found a new dom virtual operand, stop going further until
> +                     all pending non-dom virtual operands are processed. */
> +                  gcc_assert (!dom_vuse);
> +                  dom_vuse = vuse;
> +                }
> +              else
> +                worklist.safe_push (vuse);
> +            }
> +        } while (!worklist.is_empty ());
> +
> +      gcc_assert (dom_vuse);
> +
> +      /* It is asserted that unprocessed dom virtual operand should be in
> +         non-visited state. */
> +      visited.remove (dom_vuse);
> +    }
> +
> +finish:
>
>    /* Third stage just goes over the list and creates an appropriate vector of
> -     ipa_agg_jf_item structures out of it, of sourse only if there are
> +     ipa_agg_jf_item structures out of it, of course only if there are
>       any known constants to begin with.  */
>
>    if (const_count)
> @@ -1691,6 +1834,7 @@ determine_locally_known_aggregate_parts (gcall *call, tree arg,
>      }
>  }
>
> +
>  /* Return the Ith param type of callee associated with call graph
>     edge E.  */
>
> @@ -1797,7 +1941,7 @@ ipa_set_jfunc_vr (ipa_jump_func *jf, enum value_range_kind type,
>    jf->m_vr = ipa_get_value_range (type, min, max);
>  }
>
> -/* Assign to JF a pointer to a value_range just liek TMP but either fetch a
> +/* Assign to JF a pointer to a value_range just like TMP but either fetch a
>     copy from ipa_vr_hash_table or allocate a new on in GC memory.  */
>
>  static void
> @@ -1978,7 +2122,7 @@ ipa_compute_jump_functions_for_edge (struct ipa_func_body_info *fbi,
>               || !ipa_get_jf_ancestor_agg_preserved (jfunc))
>           && (AGGREGATE_TYPE_P (TREE_TYPE (arg))
>               || POINTER_TYPE_P (param_type)))
> -       determine_locally_known_aggregate_parts (call, arg, param_type, jfunc);
> +       determine_known_aggregate_parts (call, arg, param_type, jfunc);
>      }
>    if (!useful_context)
>      vec_free (args->polymorphic_call_contexts);
> diff --git a/gcc/testsuite/gcc.dg/ipa/ipcp-agg-10.c b/gcc/testsuite/gcc.dg/ipa/ipcp-agg-10.c
> new file mode 100644
> index 00000000000..131c4e265bb
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/ipa/ipcp-agg-10.c
> @@ -0,0 +1,78 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O3 -fdump-ipa-cp-details -fno-inline" } */
> +
> +int data1;
> +
> +int callee1(int *v)
> +{
> +  if (*v < 2)
> +    return 0;
> +  else
> +    {
> +      int t = data1;
> +
> +      data1 = *v;
> +      *v = t;
> +
> +      return 1;
> +    }
> +}
> +
> +int __attribute((pure)) callee2(int *v)
> +{
> +  if (*v < 2)
> +    return 0;
> +  else
> +    {
> +      data1 = v[0] + v[2];
> +
> +      return 1;
> +    }
> +}
> +
> +int caller1(int c, int *r)
> +{
> +  int a = 1;
> +
> +  if (c)
> +    return callee1(&a);
> +  else
> +    {
> +      *r = 2;
> +      return callee1(r);
> +    }
> +}
> +
> +int data2[200];
> +int data3;
> +
> +int __attribute((const)) gen_cond(int);
> +
> +int caller2(void)
> +{
> +  int i, j;
> +  int sum = 0;
> +  int a[8];
> +
> +  a[0] = 3;
> +  for (i = 0; i < 100; i++)
> +    {
> +      if (gen_cond (i))
> +        continue;
> +
> +      a[2] = 4;
> +      for (j = 0; j < 100; j++)
> +        {
> +          data2[i + j] = (i ^ j) + data3;
> +
> +          sum += callee2(a);
> +        }
> +    }
> +
> +  return sum;
> +}
> +
> +/* { dg-final { scan-ipa-dump-times "offset: 0, cst: 1" 1 "cp" } } */
> +/* { dg-final { scan-ipa-dump-times "offset: 0, cst: 2" 1 "cp" } } */
> +/* { dg-final { scan-ipa-dump-times "offset: 0, cst: 3" 1 "cp" } } */
> +/* { dg-final { scan-ipa-dump-times "offset: 64, cst: 4" 1 "cp" } } */
>



More information about the Gcc-patches mailing list