Use modref summary to DSE calls to non-pure functions

Richard Biener richard.guenther@gmail.com
Fri Nov 12 12:53:26 GMT 2021


On Fri, Nov 12, 2021 at 12:39 PM Jan Hubicka <hubicka@kam.mff.cuni.cz> wrote:
>
> Hi,
> this is updated patch.  It moves the summary walk checking if we can
> possibly suceed on dse to summary->finalize member function so it is done
> once per summary and refactors dse_optimize_call to be called from
> dse_optimize_stmt after early checks.
>
> I did not try to handle the special case of parm_offset_known but we can
> do it incrementally.  I think initializing range with offset being
> polyin64_int_min and max_size unkonwn as suggested is going to work.
> I am bit worried this is in bits so we have 2^61 range instead of 2^64
> but I guess once can not offset pointer back and forth in valid program?

Not sure indeed.  I'd only special-case when the call argument is
&decl, then the start offset can be simply zero.

> Bootstrapped/regtested x86_64-linux, ltobootstrap in progress, OK if it
> succeeds?

OK.

Thanks,
Richard.

> gcc/ChangeLog:
>
>         * ipa-modref.c (modref_summary::modref_summary): Clear new flags.
>         (modref_summary::dump): Dump try_dse.
>         (modref_summary::finalize): Add FUN attribute; compute try-dse.
>         (analyze_function): Update.
>         (read_section): Update.
>         (update_signature): Update.
>         (pass_ipa_modref::execute): Update.
>         * ipa-modref.h (struct modref_summary):
>         * tree-ssa-alias.c (ao_ref_init_from_ptr_and_range): Export.
>         * tree-ssa-alias.h (ao_ref_init_from_ptr_and_range): Declare.
>         * tree-ssa-dse.c: Include cgraph.h, ipa-modref-tree.h and
>         ipa-modref.h
>         (dse_optimize_call): New function.
>         (dse_optimize_stmt): Use it.
>
> gcc/testsuite/ChangeLog:
>
>         * gcc.dg/tree-ssa/modref-dse-1.c: New test.
>         * gcc.dg/tree-ssa/modref-dse-2.c: New test.
>
> diff --git a/gcc/ipa-modref.c b/gcc/ipa-modref.c
> index c6efacb0e20..ea6a27ae767 100644
> --- a/gcc/ipa-modref.c
> +++ b/gcc/ipa-modref.c
> @@ -276,7 +276,8 @@ static GTY(()) fast_function_summary <modref_summary_lto *, va_gc>
>
>  modref_summary::modref_summary ()
>    : loads (NULL), stores (NULL), retslot_flags (0), static_chain_flags (0),
> -    writes_errno (false), side_effects (false)
> +    writes_errno (false), side_effects (false), global_memory_read (false),
> +    global_memory_written (false), try_dse (false)
>  {
>  }
>
> @@ -605,6 +606,8 @@ modref_summary::dump (FILE *out)
>      fprintf (out, "  Global memory read\n");
>    if (global_memory_written)
>      fprintf (out, "  Global memory written\n");
> +  if (try_dse)
> +    fprintf (out, "  Try dse\n");
>    if (arg_flags.length ())
>      {
>        for (unsigned int i = 0; i < arg_flags.length (); i++)
> @@ -661,12 +664,56 @@ modref_summary_lto::dump (FILE *out)
>  }
>
>  /* Called after summary is produced and before it is used by local analysis.
> -   Can be called multiple times in case summary needs to update signature.  */
> +   Can be called multiple times in case summary needs to update signature.
> +   FUN is decl of function summary is attached to.  */
>  void
> -modref_summary::finalize ()
> +modref_summary::finalize (tree fun)
>  {
>    global_memory_read = !loads || loads->global_access_p ();
>    global_memory_written = !stores || stores->global_access_p ();
> +
> +  /* We can do DSE if we know function has no side effects and
> +     we can analyse all stores.  Disable dse if there are too many
> +     stores to try.  */
> +  if (side_effects || global_memory_written || writes_errno)
> +    try_dse = false;
> +  else
> +    {
> +      try_dse = true;
> +      size_t i, j, k;
> +      int num_tests = 0, max_tests
> +               = opt_for_fn (fun, param_modref_max_tests);
> +      modref_base_node <alias_set_type> *base_node;
> +      modref_ref_node <alias_set_type> *ref_node;
> +      modref_access_node *access_node;
> +      FOR_EACH_VEC_SAFE_ELT (stores->bases, i, base_node)
> +       {
> +         if (base_node->every_ref)
> +           {
> +             try_dse = false;
> +             break;
> +           }
> +         FOR_EACH_VEC_SAFE_ELT (base_node->refs, j, ref_node)
> +           {
> +             if (base_node->every_ref)
> +               {
> +                 try_dse = false;
> +                 break;
> +               }
> +             FOR_EACH_VEC_SAFE_ELT (ref_node->accesses, k, access_node)
> +               if (num_tests++ > max_tests
> +                   || !access_node->parm_offset_known)
> +                 {
> +                   try_dse = false;
> +                   break;
> +                 }
> +             if (!try_dse)
> +               break;
> +           }
> +         if (!try_dse)
> +           break;
> +       }
> +    }
>  }
>
>  /* Get function summary for FUNC if it exists, return NULL otherwise.  */
> @@ -2803,7 +2850,7 @@ analyze_function (function *f, bool ipa)
>        summary = NULL;
>      }
>    else if (summary)
> -    summary->finalize ();
> +    summary->finalize (current_function_decl);
>    if (summary_lto && !summary_lto->useful_p (ecf_flags))
>      {
>        summaries_lto->remove (fnode);
> @@ -3524,7 +3571,7 @@ read_section (struct lto_file_decl_data *file_data, const char *data,
>             }
>         }
>        if (flag_ltrans)
> -       modref_sum->finalize ();
> +       modref_sum->finalize (node->decl);
>        if (dump_file)
>         {
>           fprintf (dump_file, "Read modref for %s\n",
> @@ -3682,7 +3729,7 @@ update_signature (struct cgraph_node *node)
>         r_lto->dump (dump_file);
>      }
>    if (r)
> -    r->finalize ();
> +    r->finalize (node->decl);
>    return;
>  }
>
> @@ -4907,7 +4954,7 @@ pass_ipa_modref::execute (function *)
>         for (struct cgraph_node *cur = component_node; cur;
>              cur = ((struct ipa_dfs_info *) cur->aux)->next_cycle)
>           if (modref_summary *sum = optimization_summaries->get (cur))
> -           sum->finalize ();
> +           sum->finalize (cur->decl);
>        if (dump_file)
>         modref_propagate_dump_scc (component_node);
>      }
> diff --git a/gcc/ipa-modref.h b/gcc/ipa-modref.h
> index 9a8d565d770..43353ad2ef4 100644
> --- a/gcc/ipa-modref.h
> +++ b/gcc/ipa-modref.h
> @@ -38,13 +38,14 @@ struct GTY(()) modref_summary
>    /* Flags coputed by finalize method.  */
>    unsigned global_memory_read : 1;
>    unsigned global_memory_written : 1;
> +  unsigned try_dse : 1;
>
>
>    modref_summary ();
>    ~modref_summary ();
>    void dump (FILE *);
>    bool useful_p (int ecf_flags, bool check_flags = true);
> -  void finalize ();
> +  void finalize (tree);
>  };
>
>  modref_summary *get_modref_function_summary (cgraph_node *func);
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/modref-dse-1.c b/gcc/testsuite/gcc.dg/tree-ssa/modref-dse-1.c
> new file mode 100644
> index 00000000000..1f80cc57c57
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/modref-dse-1.c
> @@ -0,0 +1,28 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -fdump-tree-dse1-details"  } */
> +volatile int *ptr;
> +struct a {
> +       int a,b,c;
> +} a;
> +__attribute__((noinline))
> +static int init (struct a*a)
> +{
> +       a->a=0;
> +       a->b=1;
> +}
> +__attribute__((noinline))
> +static int use (struct a*a)
> +{
> +       if (a->c != 3)
> +               *ptr=5;
> +}
> +
> +void
> +main(void)
> +{
> +       struct a a;
> +       init (&a);
> +       a.c=3;
> +       use (&a);
> +}
> +/* { dg-final { scan-tree-dump "Deleted dead store: init" "dse1" } } */
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/modref-dse-2.c b/gcc/testsuite/gcc.dg/tree-ssa/modref-dse-2.c
> new file mode 100644
> index 00000000000..e41d065a5e3
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/modref-dse-2.c
> @@ -0,0 +1,31 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -fdump-tree-dse2-details"  } */
> +volatile int *ptr;
> +struct a {
> +       int a,b,c;
> +} a;
> +__attribute__((noinline))
> +static int init (struct a*a)
> +{
> +       a->a=0;
> +       a->b=1;
> +       a->c=1;
> +}
> +__attribute__((noinline))
> +static int use (struct a*a)
> +{
> +       if (a->c != 3)
> +               *ptr=5;
> +}
> +
> +void
> +main(void)
> +{
> +       struct a a;
> +       init (&a);
> +       a.c=3;
> +       use (&a);
> +}
> +/* Only DSE2 is tracking live bytes needed to figure out that store to c is
> +   also dead above.  */
> +/* { dg-final { scan-tree-dump "Deleted dead store: init" "dse2" } } */
> diff --git a/gcc/tree-ssa-alias.c b/gcc/tree-ssa-alias.c
> index 17ff6bb582c..2965902912f 100644
> --- a/gcc/tree-ssa-alias.c
> +++ b/gcc/tree-ssa-alias.c
> @@ -782,7 +782,7 @@ ao_ref_alias_ptr_type (ao_ref *ref)
>     The access is assumed to be only to or after of the pointer target adjusted
>     by the offset, not before it (even in the case RANGE_KNOWN is false).  */
>
> -static void
> +void
>  ao_ref_init_from_ptr_and_range (ao_ref *ref, tree ptr,
>                                 bool range_known,
>                                 poly_int64 offset,
> diff --git a/gcc/tree-ssa-alias.h b/gcc/tree-ssa-alias.h
> index 275dea10397..64d4865f58d 100644
> --- a/gcc/tree-ssa-alias.h
> +++ b/gcc/tree-ssa-alias.h
> @@ -111,6 +111,9 @@ ao_ref::max_size_known_p () const
>  /* In tree-ssa-alias.c  */
>  extern void ao_ref_init (ao_ref *, tree);
>  extern void ao_ref_init_from_ptr_and_size (ao_ref *, tree, tree);
> +extern void ao_ref_init_from_ptr_and_range (ao_ref *, tree, bool,
> +                                           poly_int64, poly_int64,
> +                                           poly_int64);
>  extern tree ao_ref_base (ao_ref *);
>  extern alias_set_type ao_ref_alias_set (ao_ref *);
>  extern alias_set_type ao_ref_base_alias_set (ao_ref *);
> diff --git a/gcc/tree-ssa-dse.c b/gcc/tree-ssa-dse.c
> index 27287fe88ee..0e8c4ed1435 100644
> --- a/gcc/tree-ssa-dse.c
> +++ b/gcc/tree-ssa-dse.c
> @@ -40,6 +40,9 @@ along with GCC; see the file COPYING3.  If not see
>  #include "gimplify.h"
>  #include "tree-eh.h"
>  #include "cfganal.h"
> +#include "cgraph.h"
> +#include "ipa-modref-tree.h"
> +#include "ipa-modref.h"
>
>  /* This file implements dead store elimination.
>
> @@ -1027,6 +1030,101 @@ delete_dead_or_redundant_assignment (gimple_stmt_iterator *gsi, const char *type
>    release_defs (stmt);
>  }
>
> +/* Try to prove, using modref summary, that all memory written to by a call is
> +   dead and remove it.  Assume that if return value is written to memory
> +   it is already proved to be dead.  */
> +
> +static bool
> +dse_optimize_call (gimple_stmt_iterator *gsi, sbitmap live_bytes)
> +{
> +  gcall *stmt = dyn_cast <gcall *> (gsi_stmt (*gsi));
> +
> +  if (!stmt)
> +    return false;
> +
> +  tree callee = gimple_call_fndecl (stmt);
> +
> +  if (!callee)
> +    return false;
> +
> +  /* Pure/const functions are optimized by normal DCE
> +     or handled as store above.  */
> +  int flags = gimple_call_flags (stmt);
> +  if ((flags & (ECF_PURE|ECF_CONST|ECF_NOVOPS))
> +      && !(flags & (ECF_LOOPING_CONST_OR_PURE)))
> +    return false;
> +
> +  cgraph_node *node = cgraph_node::get (callee);
> +  if (!node)
> +    return false;
> +
> +  if (stmt_could_throw_p (cfun, stmt)
> +      && !cfun->can_delete_dead_exceptions)
> +    return false;
> +
> +  /* If return value is used the call is not dead.  */
> +  tree lhs = gimple_call_lhs (stmt);
> +  if (lhs && TREE_CODE (lhs) == SSA_NAME)
> +    {
> +      imm_use_iterator ui;
> +      gimple *use_stmt;
> +      FOR_EACH_IMM_USE_STMT (use_stmt, ui, lhs)
> +       if (!is_gimple_debug (use_stmt))
> +         return false;
> +    }
> +
> +  /* Verify that there are no side-effects except for return value
> +     and memory writes tracked by modref.  */
> +  modref_summary *summary = get_modref_function_summary (node);
> +  if (!summary || !summary->try_dse)
> +    return false;
> +
> +  modref_base_node <alias_set_type> *base_node;
> +  modref_ref_node <alias_set_type> *ref_node;
> +  modref_access_node *access_node;
> +  size_t i, j, k;
> +  bool by_clobber_p = false;
> +
> +  /* Walk all memory writes and verify that they are dead.  */
> +  FOR_EACH_VEC_SAFE_ELT (summary->stores->bases, i, base_node)
> +    FOR_EACH_VEC_SAFE_ELT (base_node->refs, j, ref_node)
> +      FOR_EACH_VEC_SAFE_ELT (ref_node->accesses, k, access_node)
> +       {
> +         gcc_checking_assert (access_node->parm_offset_known);
> +
> +         tree arg;
> +         if (access_node->parm_index == MODREF_STATIC_CHAIN_PARM)
> +           arg = gimple_call_chain (stmt);
> +         else
> +           arg = gimple_call_arg (stmt, access_node->parm_index);
> +
> +         ao_ref ref;
> +         poly_offset_int off = (poly_offset_int)access_node->offset
> +               + ((poly_offset_int)access_node->parm_offset
> +                  << LOG2_BITS_PER_UNIT);
> +         poly_int64 off2;
> +         if (!off.to_shwi (&off2))
> +           return false;
> +         ao_ref_init_from_ptr_and_range
> +                (&ref, arg, true, off2, access_node->size,
> +                 access_node->max_size);
> +         ref.ref_alias_set = ref_node->ref;
> +         ref.base_alias_set = base_node->base;
> +
> +         bool byte_tracking_enabled
> +             = setup_live_bytes_from_ref (&ref, live_bytes);
> +         enum dse_store_status store_status;
> +
> +         store_status = dse_classify_store (&ref, stmt,
> +                                            byte_tracking_enabled,
> +                                            live_bytes, &by_clobber_p);
> +         if (store_status != DSE_STORE_DEAD)
> +           return false;
> +       }
> +  delete_dead_or_redundant_assignment (gsi, "dead", need_eh_cleanup);
> +  return true;
> +}
> +
>  /* Attempt to eliminate dead stores in the statement referenced by BSI.
>
>     A dead store is a store into a memory location which will later be
> @@ -1050,8 +1148,13 @@ dse_optimize_stmt (function *fun, gimple_stmt_iterator *gsi, sbitmap live_bytes)
>      return;
>
>    ao_ref ref;
> +  /* If this is not a store we can still remove dead call using
> +     modref summary.  */
>    if (!initialize_ao_ref_for_dse (stmt, &ref))
> -    return;
> +    {
> +      dse_optimize_call (gsi, live_bytes);
> +      return;
> +    }
>
>    /* We know we have virtual definitions.  We can handle assignments and
>       some builtin calls.  */
> @@ -1162,6 +1265,9 @@ dse_optimize_stmt (function *fun, gimple_stmt_iterator *gsi, sbitmap live_bytes)
>           || (stmt_could_throw_p (fun, stmt)
>               && !fun->can_delete_dead_exceptions)))
>      {
> +      /* See if we can remove complete call.  */
> +      if (dse_optimize_call (gsi, live_bytes))
> +       return;
>        /* Make sure we do not remove a return slot we cannot reconstruct
>          later.  */
>        if (gimple_call_return_slot_opt_p (as_a <gcall *>(stmt))


More information about the Gcc-patches mailing list