[PATCH] extend cselim to check non-trapping for more references (PR tree-optimizaton/89430)

Richard Biener rguenther@suse.de
Tue Jun 2 11:29:09 GMT 2020


On Fri, 29 May 2020, Hao Liu OS wrote:

> Hi Richard,
> 
> Thanks for your comments. It's a good idea to simplify the code and remove get_inner_reference. I've updated the patch accordingly. I also simplified the code to ignore other loads, which can not help to check if a store can be trapped. 
> 
> About tests:
>     1.  All previously XFAIL tests (gcc.dg/tree-ssa/pr89430-*) for pr89430 are passed with this patch. 
>     2.  ssa-pre-17.c is failed as cselim optimizes the conditional store, so "-fno-tree-cselim" is added. That case is added as a new test case for pr89430.
>     3.  Other test cases (including the regression test for pr94734) in gcc testsuit are not affected by this patch, according to gcc "make check".
>     4.  Some other benchmarks are also tested for correctness and performance. The performance regression mentioned in pr89430 can be fixed. 
>  
> Review, please.

Looks mostly good now, some more comments inline below

> gcc/ChangeLog:
> 
>     PR tree-optimization/89430
>     * tree-ssa-phiopt.c (cond_store_replacement): Extend non-trap checking to
>     support ARRAY_REFs and COMPONENT_REFs.  Support a special case: if there is
>     a dominating load of local variable without address escape, a store is not
>     trapped (as local stack is always writable).  Other loads are ignored for
>     simplicity, as they don't help to check if a store can be trapped.
> 
> gcc/testsuite/ChangeLog:
> 
>     PR tree-optimization/89430
>     * gcc.dg/tree-ssa/pr89430-1.c: Remove xfail.
>     * gcc.dg/tree-ssa/pr89430-2.c: Remove xfail.
>     * gcc.dg/tree-ssa/pr89430-5.c: Remove xfail.
>     * gcc.dg/tree-ssa/pr89430-6.c: Remove xfail.
>     * gcc.dg/tree-ssa/pr89430-7-comp-ref.c: New test.
>     * gcc.dg/tree-ssa/ssa-pre-17.c: Add -fno-tree-cselim.
> 
> ---
>  gcc/testsuite/gcc.dg/tree-ssa/pr89430-1.c     |   2 +-
>  gcc/testsuite/gcc.dg/tree-ssa/pr89430-2.c     |   2 +-
>  gcc/testsuite/gcc.dg/tree-ssa/pr89430-5.c     |   2 +-
>  gcc/testsuite/gcc.dg/tree-ssa/pr89430-6.c     |   2 +-
>  .../gcc.dg/tree-ssa/pr89430-7-comp-ref.c      |  17 +++
>  gcc/testsuite/gcc.dg/tree-ssa/ssa-pre-17.c    |   2 +-
>  gcc/tree-ssa-phiopt.c                         | 140 +++++++++---------
>  7 files changed, 91 insertions(+), 76 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr89430-7-comp-ref.c
> 
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr89430-1.c b/gcc/testsuite/gcc.dg/tree-ssa/pr89430-1.c
> index ce242ba569b..8ee1850ac63 100644
> --- a/gcc/testsuite/gcc.dg/tree-ssa/pr89430-1.c
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr89430-1.c
> @@ -9,4 +9,4 @@ unsigned test(unsigned k, unsigned b) {
>          return a[0]+a[1];
>  }
>  
> -/* { dg-final { scan-tree-dump "Conditional store replacement" "cselim" { xfail *-*-* } } } */
> +/* { dg-final { scan-tree-dump "Conditional store replacement" "cselim" } } */
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr89430-2.c b/gcc/testsuite/gcc.dg/tree-ssa/pr89430-2.c
> index 90ae36bfce2..9b96875ac7a 100644
> --- a/gcc/testsuite/gcc.dg/tree-ssa/pr89430-2.c
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr89430-2.c
> @@ -11,4 +11,4 @@ unsigned test(unsigned k, unsigned b) {
>          return a[0]+a[1];
>  }
>  
> -/* { dg-final { scan-tree-dump "Conditional store replacement" "cselim" { xfail *-*-* } } } */
> +/* { dg-final { scan-tree-dump "Conditional store replacement" "cselim" } } */
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr89430-5.c b/gcc/testsuite/gcc.dg/tree-ssa/pr89430-5.c
> index c633cbe947d..b2d04119381 100644
> --- a/gcc/testsuite/gcc.dg/tree-ssa/pr89430-5.c
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr89430-5.c
> @@ -13,4 +13,4 @@ int test(int b, int k) {
>      return a.data[0] + a.data[1];
>  }
>  
> -/* { dg-final { scan-tree-dump "Conditional store replacement" "cselim" { xfail *-*-* } } } */
> +/* { dg-final { scan-tree-dump "Conditional store replacement" "cselim" } } */
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr89430-6.c b/gcc/testsuite/gcc.dg/tree-ssa/pr89430-6.c
> index 7cad563128d..8d3c4f7cc6a 100644
> --- a/gcc/testsuite/gcc.dg/tree-ssa/pr89430-6.c
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr89430-6.c
> @@ -16,4 +16,4 @@ int test(int b, int k) {
>      return a.data[0].x + a.data[1].x;
>  }
>  
> -/* { dg-final { scan-tree-dump "Conditional store replacement" "cselim" { xfail *-*-* } } } */
> +/* { dg-final { scan-tree-dump "Conditional store replacement" "cselim" } } */
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr89430-7-comp-ref.c b/gcc/testsuite/gcc.dg/tree-ssa/pr89430-7-comp-ref.c
> new file mode 100644
> index 00000000000..c35a2afc70b
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr89430-7-comp-ref.c
> @@ -0,0 +1,17 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -fdump-tree-cselim-details" } */
> +
> +typedef union {
> +  int i;
> +  float f;
> +} U;
> +
> +int foo(U *u, int b, int i)
> +{
> +  u->i = 0;
> +  if (b)
> +    u->i = i;
> +  return u->i;
> +}
> +
> +/* { dg-final { scan-tree-dump "Conditional store replacement" "cselim" } } */
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-pre-17.c b/gcc/testsuite/gcc.dg/tree-ssa/ssa-pre-17.c
> index 09313716598..a06f339f0bb 100644
> --- a/gcc/testsuite/gcc.dg/tree-ssa/ssa-pre-17.c
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-pre-17.c
> @@ -1,5 +1,5 @@
>  /* { dg-do compile } */
> -/* { dg-options "-O2 -fdump-tree-pre-stats" } */
> +/* { dg-options "-O2 -fdump-tree-pre-stats -fno-tree-cselim" } */
>  
>  typedef union {
>    int i;
> diff --git a/gcc/tree-ssa-phiopt.c b/gcc/tree-ssa-phiopt.c
> index b1e0dce93d8..7c67b75486c 100644
> --- a/gcc/tree-ssa-phiopt.c
> +++ b/gcc/tree-ssa-phiopt.c
> @@ -1969,43 +1969,44 @@ abs_replacement (basic_block cond_bb, basic_block middle_bb,
>    return true;
>  }
>  
> -/* Auxiliary functions to determine the set of memory accesses which
> +/* Auxiliary functions to determine the set of memory write accesses which
>     can't trap because they are preceded by accesses to the same memory
> -   portion.  We do that for MEM_REFs, so we only need to track
> -   the SSA_NAME of the pointer indirectly referenced.  The algorithm
> +   portion.  We do that for MEM_REFs/ARRAY_REFs/COMPONENT_REFs.  The algorithm
>     simply is a walk over all instructions in dominator order.  When
> -   we see an MEM_REF we determine if we've already seen a same
> +   we see an *_REF we determine if we've already seen a same
>     ref anywhere up to the root of the dominator tree.  If we do the
>     current access can't trap.  If we don't see any dominating access
>     the current access might trap, but might also make later accesses
>     non-trapping, so we remember it.  We need to be careful with loads
>     or stores, for instance a load might not trap, while a store would,
>     so if we see a dominating read access this doesn't mean that a later
> -   write access would not trap.  Hence we also need to differentiate the
> -   type of access(es) seen.
> +   write access would not trap.
>  
> -   ??? We currently are very conservative and assume that a load might
> -   trap even if a store doesn't (write-only memory).  This probably is
> -   overly conservative.  */
> +   We care about following memory accesses:
> +     1) a store.
> +     2) a load of local variable without address-taken (as local stack is
> +	always writable).  It helps to optimize a conditoinal store with
> +	a dominating load.
>  
> -/* A hash-table of SSA_NAMEs, and in which basic block an MEM_REF
> -   through it was seen, which would constitute a no-trap region for
> -   same accesses.  */
> -struct name_to_bb
> +   other loads are ignored as we don't know if the accessed memory is writable,
> +   so they don't help conditional store replacement.  */
> +
> +/* A hash-table of references (MEM_REF/ARRAY_REF/COMPONENT_REF), and in which
> +   basic block an *_REF through it was seen, which would constitute a
> +   no-trap region for same accesses.  */
> +struct ref_to_bb
>  {
> -  unsigned int ssa_name_ver;
> +  tree exp;
>    unsigned int phase;
> -  bool store;
> -  HOST_WIDE_INT offset, size;
>    basic_block bb;
>  };
>  
>  /* Hashtable helpers.  */
>  
> -struct ssa_names_hasher : free_ptr_hash <name_to_bb>
> +struct refs_hasher : free_ptr_hash<ref_to_bb>
>  {
> -  static inline hashval_t hash (const name_to_bb *);
> -  static inline bool equal (const name_to_bb *, const name_to_bb *);
> +  static inline hashval_t hash (const ref_to_bb *);
> +  static inline bool equal (const ref_to_bb *, const ref_to_bb *);
>  };
>  
>  /* Used for quick clearing of the hash-table when we see calls.
> @@ -2015,28 +2016,27 @@ static unsigned int nt_call_phase;
>  /* The hash function.  */
>  
>  inline hashval_t
> -ssa_names_hasher::hash (const name_to_bb *n)
> +refs_hasher::hash (const ref_to_bb *n)
>  {
> -  return n->ssa_name_ver ^ (((hashval_t) n->store) << 31)
> -         ^ (n->offset << 6) ^ (n->size << 3);
> +  inchash::hash hstate;
> +  inchash::add_expr (n->exp, hstate);
> +  return hstate.end ();
>  }
>  
>  /* The equality function of *P1 and *P2.  */
>  
>  inline bool
> -ssa_names_hasher::equal (const name_to_bb *n1, const name_to_bb *n2)
> +refs_hasher::equal (const ref_to_bb *n1, const ref_to_bb *n2)
>  {
> -  return n1->ssa_name_ver == n2->ssa_name_ver
> -         && n1->store == n2->store
> -         && n1->offset == n2->offset
> -         && n1->size == n2->size;
> +  return operand_equal_p (n1->exp, n2->exp, 0);

So I figured we're going to regress some cases here that do not
have semantically agreeing MEM_REFs, like MEM<double>(s_1)
and MEM<long>(s_1) they would not compare equal but did before,
just considering offset and size.  So we'd like to pass
OEP_ADDRESS_OF as flag argument to operand_equal_p here
and to add_expr above.  This part would then just check whether
the accesses are to the same address.

This means you have to preserve the ->size member (and check it).

The rest of the patch looks good to me.

Thanks,
Richard.

>  }
>  
>  class nontrapping_dom_walker : public dom_walker
>  {
>  public:
>    nontrapping_dom_walker (cdi_direction direction, hash_set<tree> *ps)
> -    : dom_walker (direction), m_nontrapping (ps), m_seen_ssa_names (128) {}
> +    : dom_walker (direction), m_nontrapping (ps), m_seen_refs (128)
> +  {}
>  
>    virtual edge before_dom_children (basic_block);
>    virtual void after_dom_children (basic_block);
> @@ -2053,7 +2053,7 @@ private:
>    hash_set<tree> *m_nontrapping;
>  
>    /* The hash table for remembering what we've seen.  */
> -  hash_table<ssa_names_hasher> m_seen_ssa_names;
> +  hash_table<refs_hasher> m_seen_refs;
>  };
>  
>  /* Called by walk_dominator_tree, when entering the block BB.  */
> @@ -2102,65 +2102,63 @@ nontrapping_dom_walker::after_dom_children (basic_block bb)
>  }
>  
>  /* We see the expression EXP in basic block BB.  If it's an interesting
> -   expression (an MEM_REF through an SSA_NAME) possibly insert the
> -   expression into the set NONTRAP or the hash table of seen expressions.
> -   STORE is true if this expression is on the LHS, otherwise it's on
> -   the RHS.  */
> +   expression of:
> +     1) MEM_REF
> +     2) ARRAY_REF
> +     3) COMPONENT_REF
> +   possibly insert the expression into the set NONTRAP or the hash table
> +   of seen expressions.  STORE is true if this expression is on the LHS,
> +   otherwise it's on the RHS.  */
>  void
>  nontrapping_dom_walker::add_or_mark_expr (basic_block bb, tree exp, bool store)
>  {
> -  HOST_WIDE_INT size;
> -
> -  if (TREE_CODE (exp) == MEM_REF
> -      && TREE_CODE (TREE_OPERAND (exp, 0)) == SSA_NAME
> -      && tree_fits_shwi_p (TREE_OPERAND (exp, 1))
> -      && (size = int_size_in_bytes (TREE_TYPE (exp))) > 0)
> +  if (TREE_CODE (exp) == MEM_REF || TREE_CODE (exp) == ARRAY_REF
> +      || TREE_CODE (exp) == COMPONENT_REF)
>      {
> -      tree name = TREE_OPERAND (exp, 0);
> -      struct name_to_bb map;
> -      name_to_bb **slot;
> -      struct name_to_bb *n2bb;
> +      struct ref_to_bb map;
> +      ref_to_bb **slot;
> +      struct ref_to_bb *r2bb;
>        basic_block found_bb = 0;
>  
> -      /* Try to find the last seen MEM_REF through the same
> -         SSA_NAME, which can trap.  */
> -      map.ssa_name_ver = SSA_NAME_VERSION (name);
> -      map.phase = 0;
> -      map.bb = 0;
> -      map.store = store;
> -      map.offset = tree_to_shwi (TREE_OPERAND (exp, 1));
> -      map.size = size;
> -
> -      slot = m_seen_ssa_names.find_slot (&map, INSERT);
> -      n2bb = *slot;
> -      if (n2bb && n2bb->phase >= nt_call_phase)
> -        found_bb = n2bb->bb;
> -
> -      /* If we've found a trapping MEM_REF, _and_ it dominates EXP
> -         (it's in a basic block on the path from us to the dominator root)
> +      if (!store)
> +	{
> +	  tree base = get_base_address (exp);
> +	  /* Only record a LOAD of local variable without address-taken, as
> +	     the local stack is always writable.  This allows cselim on a STORE
> +	     with a dominating LOAD.  */
> +	  if (!auto_var_p (base) || TREE_ADDRESSABLE (base))
> +	    return;
> +	}
> +
> +      /* Try to find the last seen *_REF, which can trap.  */
> +      map.exp = exp;
> +      slot = m_seen_refs.find_slot (&map, INSERT);
> +      r2bb = *slot;
> +      if (r2bb && r2bb->phase >= nt_call_phase)
> +	found_bb = r2bb->bb;
> +
> +      /* If we've found a trapping *_REF, _and_ it dominates EXP
> +	 (it's in a basic block on the path from us to the dominator root)
>  	 then we can't trap.  */
>        if (found_bb && (((size_t)found_bb->aux) & 1) == 1)
>  	{
>  	  m_nontrapping->add (exp);
>  	}
>        else
> -        {
> +	{
>  	  /* EXP might trap, so insert it into the hash table.  */
> -	  if (n2bb)
> +	  if (r2bb)
>  	    {
> -	      n2bb->phase = nt_call_phase;
> -	      n2bb->bb = bb;
> +	      r2bb->phase = nt_call_phase;
> +	      r2bb->bb = bb;
>  	    }
>  	  else
>  	    {
> -	      n2bb = XNEW (struct name_to_bb);
> -	      n2bb->ssa_name_ver = SSA_NAME_VERSION (name);
> -	      n2bb->phase = nt_call_phase;
> -	      n2bb->bb = bb;
> -	      n2bb->store = store;
> -	      n2bb->offset = map.offset;
> -	      n2bb->size = size;
> -	      *slot = n2bb;
> +	      r2bb = XNEW (struct ref_to_bb);
> +	      r2bb->phase = nt_call_phase;
> +	      r2bb->bb = bb;
> +	      r2bb->exp = exp;
> +	      *slot = r2bb;
>  	    }
>  	}
>      }
> 

-- 
Richard Biener <rguenther@suse.de>
SUSE Software Solutions Germany GmbH, Maxfeldstrasse 5, 90409 Nuernberg,
Germany; GF: Felix Imendörffer; HRB 36809 (AG Nuernberg)


More information about the Gcc-patches mailing list