[PATCH] Implement a context aware points-to analyzer for use in evrp.

Richard Biener richard.guenther@gmail.com
Mon Jun 7 13:30:48 GMT 2021


On Mon, Jun 7, 2021 at 12:10 PM Aldy Hernandez via Gcc-patches
<gcc-patches@gcc.gnu.org> wrote:
>
> The substitute_and_fold_engine which evrp uses is expecting symbolics
> from value_of_expr / value_on_edge / etc, which ranger does not provide.
> In some cases, these provide important folding cues, as in the case of
> aliases for pointers.  For example, legacy evrp may return [&foo, &foo]
> for the value of "bar" where bar is on an edge where bar == &foo, or
> when bar has been globally set to &foo.  This information is then used
> by the subst & fold engine to propagate the known value of bar.
>
> Currently this is a major source of discrepancies between evrp and
> ranger.  Of the 284 cases legacy evrp is getting over ranger, 237 are
> for pointer equality as discussed above.
>
> This patch implements a context aware points-to class which
> ranger-evrp can use to query what a pointer is currently pointing to.
> With it, we reduce the 284 cases legacy evrp is getting to 47.
>
> The API for the points-to analyzer is the following:
>
> class points_to_analyzer
> {
> public:
>   points_to_analyzer (gimple_ranger *r);
>   ~points_to_analyzer ();
>   void enter (basic_block);
>   void leave (basic_block);
>   void visit_stmt (gimple *stmt);
>   tree get_points_to (tree name) const;
> ...
> };
>
> The enter(), leave(), and visit_stmt() methods are meant to be called
> from a DOM walk.   At any point throughout the walk, one can call
> get_points_to() to get whatever an SSA is pointing to.
>
> If this class is useful to others, we could place it in a more generic
> location.
>
> Tested on x86-64 Linux with a regular bootstrap/tests and by comparing
> EVRP folds over ranger before and after this patch.

Hmm, but why call it "points-to" - when I look at the implementation
it's really about equivalences.  Thus,

 if (var1_2 == var2_3)

could be handled the same way.  Also "points-to" implies (to me)
that &p[1] and &p[2] point to the same object but your points-to
is clearly tracking equivalences only.

So maybe at least rename it to pointer_equiv_analyzer?  ISTR
propagating random (symbolic) equivalences has issues.

Thanks,
Richard.

> gcc/ChangeLog:
>
>         * gimple-ssa-evrp.c (class ssa_equiv_stack): New.
>         (ssa_equiv_stack::ssa_equiv_stack): New.
>         (ssa_equiv_stack::~ssa_equiv_stack): New.
>         (ssa_equiv_stack::enter): New.
>         (ssa_equiv_stack::leave): New.
>         (ssa_equiv_stack::push_replacement): New.
>         (ssa_equiv_stack::get_replacement): New.
>         (is_pointer_ssa): New.
>         (class points_to_analyzer): New.
>         (points_to_analyzer::points_to_analyzer): New.
>         (points_to_analyzer::~points_to_analyzer): New.
>         (points_to_analyzer::set_global_points_to): New.
>         (points_to_analyzer::set_cond_points_to): New.
>         (points_to_analyzer::get_points_to): New.
>         (points_to_analyzer::enter): New.
>         (points_to_analyzer::leave): New.
>         (points_to_analyzer::get_points_to_expr): New.
>         (pta_valueize): New.
>         (points_to_analyzer::visit_stmt): New.
>         (points_to_analyzer::visit_edge): New.
>         (hybrid_folder::value_of_expr): Call PTA.
>         (hybrid_folder::value_on_edge): Same.
>         (hybrid_folder::pre_fold_bb): New.
>         (hybrid_folder::post_fold_bb): New.
>         (hybrid_folder::pre_fold_stmt): New.
>         (rvrp_folder::pre_fold_bb): New.
>         (rvrp_folder::post_fold_bb): New.
>         (rvrp_folder::pre_fold_stmt): New.
>         (rvrp_folder::value_of_expr): Call PTA.
>         (rvrp_folder::value_on_edge): Same.
> ---
>  gcc/gimple-ssa-evrp.c | 352 +++++++++++++++++++++++++++++++++++++++++-
>  1 file changed, 350 insertions(+), 2 deletions(-)
>
> diff --git a/gcc/gimple-ssa-evrp.c b/gcc/gimple-ssa-evrp.c
> index 118d10365a0..6ce32d7b620 100644
> --- a/gcc/gimple-ssa-evrp.c
> +++ b/gcc/gimple-ssa-evrp.c
> @@ -42,6 +42,305 @@ along with GCC; see the file COPYING3.  If not see
>  #include "vr-values.h"
>  #include "gimple-ssa-evrp-analyze.h"
>  #include "gimple-range.h"
> +#include "fold-const.h"
> +
> +// Unwindable SSA equivalence table for pointers.
> +//
> +// The main query point is get_replacement() which returns what a given SSA can
> +// be replaced with in the current scope.
> +
> +class ssa_equiv_stack
> +{
> +public:
> +  ssa_equiv_stack ();
> +  ~ssa_equiv_stack ();
> +  void enter (basic_block);
> +  void leave (basic_block);
> +  void push_replacement (tree name, tree replacement);
> +  tree get_replacement (tree name) const;
> +
> +private:
> +  auto_vec<std::pair <tree, tree>> m_stack;
> +  tree *m_replacements;
> +  const std::pair <tree, tree> m_marker = std::make_pair (NULL, NULL);
> +};
> +
> +ssa_equiv_stack::ssa_equiv_stack ()
> +{
> +  m_replacements = new tree[num_ssa_names] ();
> +}
> +
> +ssa_equiv_stack::~ssa_equiv_stack ()
> +{
> +  m_stack.release ();
> +  delete m_replacements;
> +}
> +
> +// Pushes a marker at the given point.
> +
> +void
> +ssa_equiv_stack::enter (basic_block)
> +{
> +  m_stack.safe_push (m_marker);
> +}
> +
> +// Pops the stack to the last marker, while performing replacements along the
> +// way.
> +
> +void
> +ssa_equiv_stack::leave (basic_block)
> +{
> +  gcc_checking_assert (!m_stack.is_empty ());
> +  while (m_stack.last () != m_marker)
> +    {
> +      std::pair<tree, tree> e = m_stack.pop ();
> +      m_replacements[SSA_NAME_VERSION (e.first)] = e.second;
> +    }
> +  m_stack.pop ();
> +}
> +
> +// Set the equivalence of NAME to REPLACEMENT.
> +
> +void
> +ssa_equiv_stack::push_replacement (tree name, tree replacement)
> +{
> +  tree old = m_replacements[SSA_NAME_VERSION (name)];
> +  m_replacements[SSA_NAME_VERSION (name)] = replacement;
> +  m_stack.safe_push (std::make_pair (name, old));
> +}
> +
> +// Return the equivalence of NAME.
> +
> +tree
> +ssa_equiv_stack::get_replacement (tree name) const
> +{
> +  return m_replacements[SSA_NAME_VERSION (name)];
> +}
> +
> +// Return TRUE if EXPR is an SSA holding a pointer.
> +
> +static bool inline
> +is_pointer_ssa (tree expr)
> +{
> +  return TREE_CODE (expr) == SSA_NAME && POINTER_TYPE_P (TREE_TYPE (expr));
> +}
> +
> +// Simple context-aware points-to analyzer that returns what an SSA name
> +// points-to at a given point during a walk of the IL.
> +//
> +// Note that global points-to take priority over conditional points-to.  That
> +// is, p = q takes priority over a later p == t.
> +//
> +// This class is meant to be called during a DOM walk.
> +
> +class points_to_analyzer
> +{
> +public:
> +  points_to_analyzer (gimple_ranger *r);
> +  ~points_to_analyzer ();
> +  void enter (basic_block);
> +  void leave (basic_block);
> +  void visit_stmt (gimple *stmt);
> +  tree get_points_to (tree ssa) const;
> +
> +private:
> +  void visit_edge (edge e);
> +  tree get_points_to_expr (tree_code code, tree expr) const;
> +  void set_global_points_to (tree ssa, tree pointee);
> +  void set_cond_points_to (tree ssa, tree pointee);
> +
> +  gimple_ranger *m_ranger;
> +  // Global points-to replacements indexed by SSA_NAME_VERSION.
> +  tree *m_global_points;
> +  // Conditional points-to replacements.
> +  ssa_equiv_stack m_cond_points;
> +};
> +
> +points_to_analyzer::points_to_analyzer (gimple_ranger *r)
> +{
> +  m_ranger = r;
> +  m_global_points = new tree[num_ssa_names] ();
> +}
> +
> +points_to_analyzer::~points_to_analyzer ()
> +{
> +  delete m_global_points;
> +}
> +
> +// Set the global points-to for SSA to POINTEE.
> +
> +void
> +points_to_analyzer::set_global_points_to (tree ssa, tree pointee)
> +{
> +  m_global_points[SSA_NAME_VERSION (ssa)] = pointee;
> +}
> +
> +// Set the conditional points-to for SSA to POINTEE.
> +
> +void
> +points_to_analyzer::set_cond_points_to (tree ssa, tree pointee)
> +{
> +  m_cond_points.push_replacement (ssa, pointee);
> +}
> +
> +// Return the current points-to information for SSA, or NULL if none is
> +// available.  Note that global info takes priority over conditional info.
> +
> +tree
> +points_to_analyzer::get_points_to (tree ssa) const
> +{
> +  tree ret = m_global_points[SSA_NAME_VERSION (ssa)];
> +  if (ret)
> +    return ret;
> +  return m_cond_points.get_replacement (ssa);
> +}
> +
> +// Method to be called on entry to a BB.
> +
> +void
> +points_to_analyzer::enter (basic_block bb)
> +{
> +  m_cond_points.enter (bb);
> +
> +  for (gphi_iterator iter = gsi_start_phis (bb);
> +       !gsi_end_p (iter);
> +       gsi_next (&iter))
> +    {
> +      gphi *phi = iter.phi ();
> +      tree lhs = gimple_phi_result (phi);
> +      if (!POINTER_TYPE_P (TREE_TYPE (lhs)))
> +       continue;
> +      tree arg0 = gimple_phi_arg_def (phi, 0);
> +      if (TREE_CODE (arg0) == SSA_NAME && !is_gimple_min_invariant (arg0))
> +       arg0 = get_points_to (arg0);
> +      if (arg0 && is_gimple_min_invariant (arg0))
> +       {
> +         // If all the PHI args point to the same place, set the
> +         // points-to info for the PHI result.  This can happen for
> +         // passes that create redundant PHIs like PHI<&foo, &foo> or
> +         // PHI<&foo>.
> +         for (size_t i = 1; i < gimple_phi_num_args (phi); ++i)
> +           {
> +             tree argi = gimple_phi_arg_def (phi, i);
> +             if (TREE_CODE (argi) == SSA_NAME
> +                 && !is_gimple_min_invariant (argi))
> +               argi = get_points_to (argi);
> +             if (!argi || !operand_equal_p (arg0, argi))
> +               return;
> +           }
> +         set_global_points_to (lhs, arg0);
> +       }
> +    }
> +
> +  edge pred = single_pred_edge_ignoring_loop_edges (bb, false);
> +  if (pred)
> +    visit_edge (pred);
> +}
> +
> +// Method to be called on exit from a BB.
> +
> +void
> +points_to_analyzer::leave (basic_block bb)
> +{
> +  m_cond_points.leave (bb);
> +}
> +
> +// Helper function to return the points-to information for EXPR from a gimple
> +// statement with CODE.  This returns either the cached points-to info for an
> +// SSA, or an invariant in case EXPR is one (i.e. &foo).  Returns NULL if EXPR
> +// is neither an SSA nor an invariant.
> +
> +tree
> +points_to_analyzer::get_points_to_expr (tree_code code, tree expr) const
> +{
> +  if (code == SSA_NAME)
> +    return get_points_to (expr);
> +
> +  if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS
> +      && is_gimple_min_invariant (expr))
> +    return expr;
> +
> +  return NULL;
> +}
> +
> +// Hack to provide context to the gimple fold callback.
> +static struct
> +{
> +  gimple *m_stmt;
> +  gimple_ranger *m_ranger;
> +  points_to_analyzer *m_pta;
> +} x_fold_context;
> +
> +// Gimple fold callback.
> +static tree
> +pta_valueize (tree name)
> +{
> +  tree ret
> +    = x_fold_context.m_ranger->value_of_expr (name, x_fold_context.m_stmt);
> +
> +  if (!ret && is_pointer_ssa (name))
> +    ret = x_fold_context.m_pta->get_points_to (name);
> +
> +  return ret ? ret : name;
> +}
> +
> +// Method to be called on gimple statements during traversal of the IL.
> +
> +void
> +points_to_analyzer::visit_stmt (gimple *stmt)
> +{
> +  if (gimple_code (stmt) != GIMPLE_ASSIGN)
> +    return;
> +
> +  tree lhs = gimple_assign_lhs (stmt);
> +  if (!is_pointer_ssa (lhs))
> +    return;
> +
> +  // Try to get the points-to info from the cache.
> +  tree rhs = gimple_assign_rhs1 (stmt);
> +  rhs = get_points_to_expr (gimple_assign_rhs_code (stmt), rhs);
> +  if (rhs)
> +    {
> +      set_global_points_to (lhs, rhs);
> +      return;
> +    }
> +
> +  // If we couldn't find anything, try fold.
> +  x_fold_context = { stmt, m_ranger, this};
> +  rhs = gimple_fold_stmt_to_constant_1 (stmt, pta_valueize, pta_valueize);
> +  if (rhs)
> +    {
> +      rhs = get_points_to_expr (TREE_CODE (rhs), rhs);
> +      if (rhs)
> +       {
> +         set_global_points_to (lhs, rhs);
> +         return;
> +       }
> +    }
> +}
> +
> +// If the edge in E is a conditional that sets a pointer equality, set the
> +// conditional points-to information.
> +
> +void
> +points_to_analyzer::visit_edge (edge e)
> +{
> +  gimple *stmt = last_stmt (e->src);
> +  tree lhs;
> +  // Recognize: x_13 [==,!=] &foo.
> +  if (stmt
> +      && gimple_code (stmt) == GIMPLE_COND
> +      && (lhs = gimple_cond_lhs (stmt))
> +      && TREE_CODE (lhs) == SSA_NAME
> +      && POINTER_TYPE_P (TREE_TYPE (lhs))
> +      && TREE_CODE (gimple_cond_rhs (stmt)) == ADDR_EXPR)
> +    {
> +      tree_code code = gimple_cond_code (stmt);
> +      if ((code == EQ_EXPR && e->flags & EDGE_TRUE_VALUE)
> +         || ((code == NE_EXPR && e->flags & EDGE_FALSE_VALUE)))
> +       set_cond_points_to (lhs, gimple_cond_rhs (stmt));
> +    }
> +}
>
>  // This is the classic EVRP folder which uses a dominator walk and pushes
>  // ranges into the next block if it is a single predecessor block.
> @@ -120,6 +419,7 @@ public:
>    {
>      m_ranger = enable_ranger (cfun);
>      m_simplifier.set_range_query (m_ranger);
> +    m_pta = new points_to_analyzer (m_ranger);
>    }
>
>    ~rvrp_folder ()
> @@ -129,16 +429,23 @@ public:
>
>      m_ranger->export_global_ranges ();
>      disable_ranger (cfun);
> +    delete m_pta;
>    }
>
>    tree value_of_expr (tree name, gimple *s = NULL) OVERRIDE
>    {
> -    return m_ranger->value_of_expr (name, s);
> +    tree ret = m_ranger->value_of_expr (name, s);
> +    if (!ret && is_pointer_ssa (name))
> +      ret = m_pta->get_points_to (name);
> +    return ret;
>    }
>
>    tree value_on_edge (edge e, tree name) OVERRIDE
>    {
> -    return m_ranger->value_on_edge (e, name);
> +    tree ret = m_ranger->value_on_edge (e, name);
> +    if (!ret && is_pointer_ssa (name))
> +      ret = m_pta->get_points_to (name);
> +    return ret;
>    }
>
>    tree value_of_stmt (gimple *s, tree name = NULL) OVERRIDE
> @@ -146,6 +453,21 @@ public:
>      return m_ranger->value_of_stmt (s, name);
>    }
>
> +  void pre_fold_bb (basic_block bb) OVERRIDE
> +  {
> +    m_pta->enter (bb);
> +  }
> +
> +  void post_fold_bb (basic_block bb) OVERRIDE
> +  {
> +    m_pta->leave (bb);
> +  }
> +
> +  void pre_fold_stmt (gimple *stmt) OVERRIDE
> +  {
> +    m_pta->visit_stmt (stmt);
> +  }
> +
>    bool fold_stmt (gimple_stmt_iterator *gsi) OVERRIDE
>    {
>      return m_simplifier.simplify (gsi);
> @@ -155,6 +477,7 @@ private:
>    DISABLE_COPY_AND_ASSIGN (rvrp_folder);
>    gimple_ranger *m_ranger;
>    simplify_using_ranges m_simplifier;
> +  points_to_analyzer *m_pta;
>  };
>
>  // In a hybrid folder, start with an EVRP folder, and add the required
> @@ -186,6 +509,7 @@ public:
>         first = m_ranger;
>         second = &m_range_analyzer;
>        }
> +    m_pta = new points_to_analyzer (m_ranger);
>    }
>
>    ~hybrid_folder ()
> @@ -195,6 +519,7 @@ public:
>
>      m_ranger->export_global_ranges ();
>      disable_ranger (cfun);
> +    delete m_pta;
>    }
>
>    bool fold_stmt (gimple_stmt_iterator *gsi) OVERRIDE
> @@ -213,6 +538,24 @@ public:
>        return false;
>      }
>
> +  void pre_fold_stmt (gimple *stmt) OVERRIDE
> +  {
> +    evrp_folder::pre_fold_stmt (stmt);
> +    m_pta->visit_stmt (stmt);
> +  }
> +
> +  void pre_fold_bb (basic_block bb) OVERRIDE
> +  {
> +    evrp_folder::pre_fold_bb (bb);
> +    m_pta->enter (bb);
> +  }
> +
> +  void post_fold_bb (basic_block bb) OVERRIDE
> +  {
> +    evrp_folder::post_fold_bb (bb);
> +    m_pta->leave (bb);
> +  }
> +
>    tree value_of_expr (tree name, gimple *) OVERRIDE;
>    tree value_on_edge (edge, tree name) OVERRIDE;
>    tree value_of_stmt (gimple *, tree name) OVERRIDE;
> @@ -222,6 +565,7 @@ private:
>    gimple_ranger *m_ranger;
>    range_query *first;
>    range_query *second;
> +  points_to_analyzer *m_pta;
>    tree choose_value (tree evrp_val, tree ranger_val);
>  };
>
> @@ -231,6 +575,8 @@ hybrid_folder::value_of_expr (tree op, gimple *stmt)
>  {
>    tree evrp_ret = evrp_folder::value_of_expr (op, stmt);
>    tree ranger_ret = m_ranger->value_of_expr (op, stmt);
> +  if (!ranger_ret && is_pointer_ssa (op))
> +    ranger_ret = m_pta->get_points_to (op);
>    return choose_value (evrp_ret, ranger_ret);
>  }
>
> @@ -241,6 +587,8 @@ hybrid_folder::value_on_edge (edge e, tree op)
>    // via hybrid_folder::value_of_expr, but without an edge.
>    tree evrp_ret = evrp_folder::value_of_expr (op, NULL);
>    tree ranger_ret = m_ranger->value_on_edge (e, op);
> +  if (!ranger_ret && is_pointer_ssa (op))
> +    ranger_ret = m_pta->get_points_to (op);
>    return choose_value (evrp_ret, ranger_ret);
>  }
>
> --
> 2.31.1
>


More information about the Gcc-patches mailing list