[PATCH] Proposal for IPA init() constant propagation

Erick Ochoa erick.ochoa@theobroma-systems.com
Thu Jan 23 16:13:00 GMT 2020


Forgot to attach the test cases.

On 2020-01-23 10:20 a.m., Erick Ochoa wrote:
> Hi Jan,
> 
> Here is a patch I just rebased against
> 
> commit 2589beb1d1a065f75a5515c9e2698de12a421913 (origin/master, origin/HEAD, master)
> Author: GCC Administrator <gccadmin@gcc.gnu.org>
> Date:   Sun Jan 19 00:16:30 2020 +0000
> 
> I also include some test cases.
> To run test cases, just set your path for the gcc
> executable compiled with the patch and run make all.
> The test verify the semantics of C code haven't changed.
> 
> If ./test $number prints out several "SUCCESS", we haven't
> changed the semantics.
> If there is at least 1 FAILURE, the semantics have changed.
> I just ran and there is all "SUCCESS".
> 
> To verify that the variables have indeed been propagated,
> you can do: grep -n -R 'Eliminated' and it should eliminate
> variables prefixed with "SUCCESS_{0,1,2,3}".
> Eliminating variables prefixed with "FAILURE_" is indicative
> of unsound changes. After running the tests, I confirm the
> patch eliminated the variables targeted.
> 
> 
> Please let me know if you have any questions.
> 
> diff --git a/gcc/Makefile.in b/gcc/Makefile.in
> index b1423d1dbfd..a9bdc162e63 100644
> --- a/gcc/Makefile.in
> +++ b/gcc/Makefile.in
> @@ -1400,6 +1400,7 @@ OBJS = \
>  	internal-fn.o \
>  	ipa-cp.o \
>  	ipa-sra.o \
> +	ipa-initcall-cp.o \
>  	ipa-devirt.o \
>  	ipa-fnsummary.o \
>  	ipa-polymorphic-call.o \
> diff --git a/gcc/cgraph.c b/gcc/cgraph.c
> index 95b523d6be5..82cdf660f07 100644
> --- a/gcc/cgraph.c
> +++ b/gcc/cgraph.c
> @@ -3626,6 +3626,7 @@ cgraph_node::get_untransformed_body (void)
>    name = lto_get_decl_name_mapping (file_data, name);
>    struct lto_in_decl_state *decl_state
>  	 = lto_get_function_in_decl_state (file_data, decl);
> +  //gcc_assert (decl_state != NULL);
>  
>    cgraph_node *origin = this;
>    while (origin->clone_of)
> diff --git a/gcc/cgraph.h b/gcc/cgraph.h
> index 0ace13df1f9..e9c96fc5d32 100644
> --- a/gcc/cgraph.h
> +++ b/gcc/cgraph.h
> @@ -937,9 +937,11 @@ struct GTY((tag ("SYMTAB_FUNCTION"))) cgraph_node : public symtab_node
>        split_part (false), indirect_call_target (false), local (false),
>        versionable (false), can_change_signature (false),
>        redefined_extern_inline (false), tm_may_enter_irr (false),
> -      ipcp_clone (false), m_uid (uid), m_summary_id (-1)
> +      ipcp_clone (false), m_uid (uid), m_summary_id (-1), skip_ipa_cp(false)
>    {}
>  
> +  bool skip_ipa_cp;
> +
>    /* Remove the node from cgraph and all inline clones inlined into it.
>       Skip however removal of FORBIDDEN_NODE and return true if it needs to be
>       removed.  This allows to call the function from outer loop walking clone
> diff --git a/gcc/common.opt b/gcc/common.opt
> index 5692cd04374..6ee9852d6b4 100644
> --- a/gcc/common.opt
> +++ b/gcc/common.opt
> @@ -3376,4 +3376,10 @@ fipa-ra
>  Common Report Var(flag_ipa_ra) Optimization
>  Use caller save register across calls if possible.
>  
> +finitcall-cp
> +Common Report Var(flag_initcall_cp) TBD.
> +
> +finitcall-cp-dryrun
> +Common Report Var(flag_initcall_cp_dryrun) TBD.
> +
>  ; This comment is to ensure we retain the blank line above.
> diff --git a/gcc/ipa-cp.c b/gcc/ipa-cp.c
> index 17da1d8e8a7..13311c24e43 100644
> --- a/gcc/ipa-cp.c
> +++ b/gcc/ipa-cp.c
> @@ -1188,7 +1188,7 @@ initialize_node_lattices (struct cgraph_node *node)
>  
>    gcc_checking_assert (node->has_gimple_body_p ());
>  
> -  if (!ipa_get_param_count (info))
> +  if (!ipa_get_param_count (info) || node->skip_ipa_cp)
>      disable = true;
>    else if (node->local)
>      {
> diff --git a/gcc/ipa-initcall-cp.c b/gcc/ipa-initcall-cp.c
> new file mode 100644
> index 00000000000..264bf38549d
> --- /dev/null
> +++ b/gcc/ipa-initcall-cp.c
> @@ -0,0 +1,1467 @@
> +/* Initcall constant propagation pass.  */
> +
> +#include "config.h"
> +#include "system.h"
> +#include "coretypes.h"
> +#include "backend.h"
> +#include "tree.h"
> +#include "tree-eh.h"
> +#include "gimple.h"
> +#include "gimple-expr.h"
> +#include "gimple-iterator.h"
> +#include "predict.h"
> +#include "alloc-pool.h"
> +#include "tree-pass.h"
> +#include "cgraph.h"
> +#include "diagnostic.h"
> +#include "fold-const.h"
> +#include "gimple-fold.h"
> +#include "symbol-summary.h"
> +#include "tree-vrp.h"
> +#include "ipa-prop.h"
> +#include "tree-pretty-print.h"
> +#include "tree-inline.h"
> +#include "ipa-fnsummary.h"
> +#include "ipa-utils.h"
> +#include "tree-ssa-ccp.h"
> +#include "stringpool.h"
> +#include "attribs.h"
> +#include "gimple-pretty-print.h"
> +#include "ssa.h"
> +
> +bool
> +reach_nodes_via_bb1 (cgraph_node *n2);
> +static bool
> +bb1_dominates_bb2 (basic_block, basic_block, cgraph_node *);
> +static bool
> +write_before_call (struct ipa_ref *, struct ipa_ref *);
> +static bool
> +call_before_read (struct ipa_ref *, struct ipa_ref *);
> +static hash_map<const char *, unsigned> *clone_num_suffixes1;
> +static hash_map<cgraph_node *, cgraph_node *> *func_to_clone;
> +static vec<struct cgraph_node *>
> +get_nondominated_callees (cgraph_node *caller, cgraph_node *callee,
> +			  bool *exitEarly = NULL);
> +
> +static bool
> +comdat_can_be_unshared_p_1 (symtab_node *node)
> +{
> +  if (!node->externally_visible)
> +    return true;
> +  if (node->address_can_be_compared_p ())
> +    {
> +      struct ipa_ref *ref;
> +
> +      for (unsigned int i = 0; node->iterate_referring (i, ref); i++)
> +	if (ref->address_matters_p ())
> +	  return false;
> +    }
> +
> +  /* If the symbol is used in some weird way,
> +   * better to not touch it.  */
> +  if (node->force_output)
> +    return false;
> +
> +  /* Explicit instantiations needs to be output when possibly
> +     used externally.  */
> +  if (node->forced_by_abi && TREE_PUBLIC (node->decl)
> +      && (node->resolution != LDPR_PREVAILING_DEF_IRONLY
> +	  && !flag_whole_program))
> +    return false;
> +
> +  /* Non-readonly and volatile variables cannot be duplicated.  */
> +  if (is_a<varpool_node *> (node)
> +      && (!TREE_READONLY (node->decl) || TREE_THIS_VOLATILE (node->decl)))
> +    return false;
> +  return true;
> +}
> +
> +/* COMDAT functions must be shared only if they have address taken,
> +   otherwise we can produce our own private implementation with
> +   -fwhole-program.
> +   Return true when turning COMDAT function static cannot lead to wrong
> +   code when the resulting object links with a library defining same
> +   COMDAT.
> +
> +   Virtual functions do have their addresses taken from the vtables,
> +   but in C++ there is no way to compare their addresses
> +   for equality.  */
> +
> +static bool
> +comdat_can_be_unshared_p (symtab_node *node)
> +{
> +  if (!comdat_can_be_unshared_p_1 (node))
> +    return false;
> +  if (node->same_comdat_group)
> +    {
> +      symtab_node *next;
> +
> +      /* If more than one function is in the same COMDAT group, it must
> +	 be shared even if just one function in the comdat group has
> +	 address taken.  */
> +      for (next = node->same_comdat_group; next != node;
> +	   next = next->same_comdat_group)
> +	if (!comdat_can_be_unshared_p_1 (next))
> +	  return false;
> +    }
> +  return true;
> +}
> +
> +/* Return true when function NODE should
> + * be considered externally visible.  */
> +
> +static bool
> +cgraph_externally_visible_p (struct cgraph_node *node, bool whole_program)
> +{
> +  while (node->transparent_alias && node->definition)
> +    node = node->get_alias_target ();
> +  if (!node->definition)
> +    return false;
> +  if (!TREE_PUBLIC (node->decl) || DECL_EXTERNAL (node->decl))
> +    return false;
> +
> +  /* Do not try to localize built-in functions yet.
> +    One of problems is that we
> +     end up mangling their asm for
> +     WHOPR that makes it impossible to call them
> +     using the implicit built-in declarations
> +     anymore.  Similarly this enables
> +     us to remove them as unreachable before
> +     actual calls may appear during
> +     expansion or folding.  */
> +  if (fndecl_built_in_p (node->decl))
> +    return true;
> +
> +  /* If linker counts on us, we must preserve the function.  */
> +  if (node->used_from_object_file_p ())
> +    return true;
> +  if (DECL_PRESERVE_P (node->decl))
> +    return true;
> +  if (lookup_attribute ("externally_visible", DECL_ATTRIBUTES (node->decl)))
> +    return true;
> +  if (lookup_attribute ("noipa", DECL_ATTRIBUTES (node->decl)))
> +    return true;
> +  if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
> +      && lookup_attribute ("dllexport", DECL_ATTRIBUTES (node->decl)))
> +    return true;
> +  if (node->resolution == LDPR_PREVAILING_DEF_IRONLY)
> +    return false;
> +  /* When doing LTO or whole program, we can bring COMDAT functions
> +     static.
> +     This improves code quality and we know we will duplicate them at
> +     most twice
> +     (in the case that we are not using plugin and link with object
> +     file implementing same COMDAT)  */
> +  if (((in_lto_p || whole_program) && !flag_incremental_link)
> +      && DECL_COMDAT (node->decl) && comdat_can_be_unshared_p (node))
> +    return false;
> +
> +  /* When doing link time optimizations,
> +   * hidden symbols become local.  */
> +  if ((in_lto_p && !flag_incremental_link)
> +      && (DECL_VISIBILITY (node->decl) == VISIBILITY_HIDDEN
> +	  || DECL_VISIBILITY (node->decl) == VISIBILITY_INTERNAL)
> +      /* Be sure that node is defined in IR file, not in other object
> +	 file.  In that case we don't set
> +	 used_from_other_object_file.  */
> +      && node->definition)
> +    ;
> +  else if (!whole_program)
> +    return true;
> +
> +  if (MAIN_NAME_P (DECL_NAME (node->decl)))
> +    return true;
> +
> +  return false;
> +}
> +
> +typedef vec<struct ipa_ref *> ipa_ref_vec;
> +
> +// XXX: Move to ipa-ref.h
> +static const char *
> +stringify_ipa_ref_use (ipa_ref_use use)
> +{
> +  switch (use)
> +    {
> +    case IPA_REF_LOAD:
> +      return "IPA_REF_LOAD";
> +      break;
> +    case IPA_REF_STORE:
> +      return "IPA_REF_STORE";
> +      break;
> +    case IPA_REF_ADDR:
> +      return "IPA_REF_ADDR";
> +      break;
> +    case IPA_REF_ALIAS:
> +      return "IPA_REF_ALIAS";
> +      break;
> +    default:
> +      return "<unknown>";
> +    }
> +}
> +
> +static void
> +load_function_body_of_ipa_ref (cgraph_node *node)
> +{
> +  if (in_lto_p)
> +    {
> +      cgraph_node *f_cnode2 = node->ultimate_alias_target ();
> +      if (dump_file)
> +	{
> +	  fprintf (dump_file, "%s: for function '%s'\n", __func__,
> +		   f_cnode2->dump_asm_name ());
> +	  dump_node (f_cnode2->decl, TDF_DETAILS, dump_file);
> +	}
> +      f_cnode2->get_untransformed_body ();
> +    }
> +}
> +
> +static void
> +load_function_body_of_ipa_ref (struct ipa_ref *ref)
> +{
> +  /* IPA passes don't get the function bodies during LTO.  */
> +  if (in_lto_p)
> +    {
> +      symtab_node *f_node = ref->referring;
> +      cgraph_node *f_cnode = dyn_cast<cgraph_node *> (f_node);
> +      load_function_body_of_ipa_ref (f_cnode);
> +    }
> +}
> +
> +static void
> +dump_vnode_ipa_ref (ipa_ref *ref)
> +{
> +  fprintf (dump_file, "Reference type: %s\n", stringify_ipa_ref_use (ref->use));
> +
> +  symtab_node *f_node = ref->referring;
> +  fprintf (dump_file, "Reference function: %s\n", f_node->dump_asm_name ());
> +
> +  fprintf (dump_file, "Gimple stmt (@0x%lx, lto_stmt_uid: %u):\n",
> +	   (long) ref->stmt, ref->lto_stmt_uid);
> +  load_function_body_of_ipa_ref (ref);
> +  print_gimple_stmt (dump_file, ref->stmt, 0, TDF_NONE);
> +}
> +
> +// XXX: Move to tree.h
> +static bool
> +tree_code_is_cst (tree op)
> +{
> +  // XXX: use cprop_constant_p () here?
> +  int code = TREE_CODE (op);
> +  if (code == INTEGER_CST || code == REAL_CST || code == COMPLEX_CST
> +      || code == VECTOR_CST)
> +    return true;
> +  return false;
> +}
> +
> +/* Return true of all ops of assignment are constants.  */
> +static bool
> +gimple_assign_is_single_const (gimple *stmt)
> +{
> +  tree op;
> +
> +  gcc_assert (gimple_code (stmt) == GIMPLE_ASSIGN);
> +
> +  if (gimple_has_volatile_ops (stmt))
> +    {
> +      if (dump_file)
> +	fprintf (dump_file, "has volatile ops!\n");
> +      return false;
> +    }
> +
> +  if (gimple_num_ops (stmt) != 2)
> +    {
> +      if (dump_file)
> +	fprintf (dump_file, "wrong num of ops: %u!\n", gimple_num_ops (stmt));
> +      return false;
> +    }
> +
> +  op = gimple_op (stmt, 1);
> +  if (!tree_code_is_cst (op))
> +    {
> +      if (dump_file)
> +	fprintf (dump_file, "op is not cst!\n");
> +      return false;
> +    }
> +
> +  return true;
> +}
> +
> +// XXX: Move to ipa-utils.c
> +// XXX: from/to could be const
> +static bool
> +path_exists_dfs (hash_set<cgraph_node *> *visited, cgraph_node *current,
> +		 cgraph_node *target)
> +{
> +  if (current == target)
> +    return true;
> +
> +  visited->add (current);
> +
> +  cgraph_edge *cs;
> +  for (cs = current->callees; cs; cs = cs->next_callee)
> +    {
> +      cgraph_node *callee = cs->callee->function_symbol ();
> +      if (!visited->contains (callee))
> +	{
> +	  if (path_exists_dfs (visited, callee, target))
> +	    {
> +	      return true;
> +	    }
> +	}
> +    }
> +  return false;
> +}
> +
> +// XXX: Move to ipa-utils.c
> +// XXX: from/to could be const
> +static bool
> +path_exists (cgraph_node *from, cgraph_node *to)
> +{
> +  hash_set<cgraph_node *> visited;
> +  bool found_path = path_exists_dfs (&visited, from, to);
> +  visited.empty ();
> +
> +  if (dump_file)
> +    {
> +      fprintf (dump_file, "Found ");
> +      if (!found_path)
> +	fprintf (dump_file, "*no* ");
> +      fprintf (dump_file, "path from %s to %s.\n", from->name (), to->name ());
> +    }
> +  return found_path;
> +}
> +
> +static vec<struct cgraph_node *>
> +get_nondominated_callees (cgraph_node *caller, cgraph_node *callee,
> +			  bool *exitEarly)
> +{
> +  // assumptions:
> +  //  * there is a callsite from caller to callee
> +
> +  if (in_lto_p)
> +    caller->get_untransformed_body ();
> +
> +  function *func = DECL_STRUCT_FUNCTION (caller->decl);
> +  basic_block bb;
> +  bool found = false;
> +  FOR_EACH_BB_FN (bb, func)
> +    {
> +      for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi);
> +	   gsi_next (&gsi))
> +	{
> +	  gimple *stmt = gsi_stmt (gsi);
> +	  if (gimple_code (stmt) != GIMPLE_CALL)
> +	    continue;
> +
> +	  // The question here is, what happens if we don't have
> +	  // a tree that is of the type
> +	  // var = call()
> +	  // and instead we have a tree of the type
> +	  // call()
> +	  // are these trees even possible?
> +	  // I would prefer if they are all the same...
> +	  if (dump_file)
> +	    print_gimple_stmt (dump_file, stmt, 0, TDF_NONE);
> +	  tree rhs = gimple_op (stmt, 1);
> +	  if (TREE_CODE (rhs) == RECORD_TYPE)
> +	    {
> +	      gcc_assert (exitEarly);
> +	      *exitEarly = true;
> +	      return vNULL;
> +	    }
> +	  tree callee_decl = gimple_call_fndecl (stmt);
> +	  if (!callee_decl)
> +	    {
> +	      gcc_assert (exitEarly);
> +	      *exitEarly = true;
> +	      return vNULL;
> +	    }
> +	  cgraph_node *callee_node = cgraph_node::get (callee_decl);
> +	  if (callee_node != callee)
> +	    continue;
> +
> +	  found = true;
> +	}
> +      if (found)
> +	break;
> +    }
> +
> +  gcc_assert (found);
> +
> +  // At this point in the program, we hold a valid bb.
> +  // The callee is located
> +  vec<struct cgraph_node *> callees = vNULL;
> +  basic_block bb_c;
> +  FOR_EACH_BB_FN (bb_c, func)
> +    {
> +      bool self_dominates = bb == bb_c;
> +      bool bb_dominates_bbc = bb1_dominates_bb2 (bb, bb_c, caller);
> +      if (bb_dominates_bbc && !self_dominates)
> +	continue;
> +
> +      for (gimple_stmt_iterator gsi = gsi_start_bb (bb_c); !gsi_end_p (gsi);
> +	   gsi_next (&gsi))
> +	{
> +	  gimple *stmt = gsi_stmt (gsi);
> +	  if (gimple_code (stmt) != GIMPLE_CALL)
> +	    continue;
> +
> +	  tree callee_decl = gimple_call_fndecl (stmt);
> +	  cgraph_node *callee_node = cgraph_node::get (callee_decl);
> +
> +	  if (fndecl_built_in_p (callee_node->decl))
> +	    continue;
> +
> +	  if (self_dominates && callee_node == callee)
> +	    {
> +	      break;
> +	    }
> +
> +	  callees.safe_push (callee_node);
> +	}
> +    }
> +  return callees;
> +}
> +
> +// XXX: Move to class symtab_node in cgraph.h
> +static ipa_ref *
> +symtab_node_iterate_address_uses (symtab_node *n, unsigned i, ipa_ref *&ref)
> +{
> +  ipa_ref *retval = NULL;
> +  for (int i = 0; n->iterate_referring (i, retval); i++)
> +    if (ref->use == IPA_REF_ADDR)
> +      return ref;
> +  return NULL;
> +}
> +
> +// XXX: Move to class symtab_node in cgraph.h
> +static bool
> +symtab_node_address_matters_p (symtab_node *n)
> +{
> +  struct ipa_ref *ref;
> +
> +  if (!n->address_can_be_compared_p ())
> +    return false;
> +  if (n->externally_visible || n->force_output)
> +    return true;
> +
> +  for (unsigned int i = 0; n->iterate_referring (i, ref); i++)
> +    if (ref->address_matters_p ())
> +      return true;
> +  return false;
> +}
> +
> +static bool
> +bb1_dominates_bb2 (basic_block bb1, basic_block bb2, cgraph_node *cnode)
> +{
> +  // self dominance
> +  if (bb1 == bb2)
> +    return true;
> +
> +  push_cfun (DECL_STRUCT_FUNCTION (cnode->decl));
> +
> +  /* If dominator info is not available, we need to calculate it.  */
> +  if (!dom_info_available_p (CDI_DOMINATORS))
> +    calculate_dominance_info (CDI_DOMINATORS);
> +
> +  /* Check if the read is dominated by the write.  */
> +  bool ret = dominated_by_p (CDI_DOMINATORS, bb2, bb1);
> +
> +  /* Restore cfun.  */
> +  pop_cfun ();
> +
> +  return ret;
> +}
> +
> +static bool
> +write_before_read_in_function (struct ipa_ref *write, struct ipa_ref *read)
> +{
> +  basic_block w_bb = gimple_bb (write->stmt);
> +  basic_block r_bb = gimple_bb (read->stmt);
> +
> +  if (w_bb != r_bb)
> +    {
> +      /*
> +       * The dominator framework operates on cfun.
> +       * Therefore we need to set cfun accordingly.
> +       */
> +      symtab_node *w_node = write->referring;
> +      cgraph_node *w_cnode = dyn_cast<cgraph_node *> (w_node);
> +      return bb1_dominates_bb2 (w_bb, r_bb, w_cnode);
> +    }
> +
> +  gimple_stmt_iterator gsi;
> +  for (gsi = gsi_start_bb (w_bb); !gsi_end_p (gsi); gsi_next (&gsi))
> +    {
> +      if (gsi_stmt (gsi) == write->stmt)
> +	return true;
> +      if (gsi_stmt (gsi) == read->stmt)
> +	return false;
> +    }
> +
> +  /* Neither write nor read found it BB.  */
> +  gcc_assert (1);
> +
> +  return false;
> +}
> +
> +/*
> + * DFS over callees and return if callee is a or b.
> + */
> +static cgraph_node *
> +find_cgraph_in_callee_set (cgraph_node *n, hash_set<cgraph_node *> set,
> +			   cgraph_node *a, cgraph_node *b)
> +{
> +  cgraph_edge *cs;
> +  for (cs = n->callees; cs; cs = cs->next_callee)
> +    {
> +      cgraph_node *callee = cs->callee->function_symbol ();
> +      if (dump_file)
> +	fprintf (dump_file, "Child of %s: %s\n", n->dump_asm_name (),
> +		 callee->dump_asm_name ());
> +      if (callee == a)
> +	return a;
> +      if (callee == b)
> +	return b;
> +      if (!set.contains (callee))
> +	continue;
> +      return find_cgraph_in_callee_set (callee, set, a, b);
> +    }
> +  return NULL;
> +}
> +
> +/* Walks back along caller relations until main is found.  */
> +static cgraph_node *
> +get_ancestor_main_dfs (hash_set<cgraph_node *> *visited,
> +		       vec<cgraph_node *> *path, cgraph_node *node)
> +{
> +  if (MAIN_NAME_P (DECL_NAME (node->decl)))
> +    {
> +      path->safe_push (node);
> +      return node;
> +    }
> +
> +  visited->add (node);
> +
> +  cgraph_edge *cs;
> +  for (cs = node->callers; cs; cs = cs->next_caller)
> +    {
> +      cgraph_node *caller = cs->caller->function_symbol ();
> +      if (!visited->contains (caller))
> +	{
> +	  cgraph_node *main = get_ancestor_main_dfs (visited, path, caller);
> +	  if (main)
> +	    {
> +	      path->safe_push (node);
> +	      return main;
> +	    }
> +	}
> +    }
> +  return NULL;
> +}
> +
> +static cgraph_node *
> +get_path_from_main_to (cgraph_node *node, vec<cgraph_node *> *path)
> +{
> +  hash_set<cgraph_node *> visited;
> +  cgraph_node *main = get_ancestor_main_dfs (&visited, path, node);
> +  visited.empty ();
> +  return main;
> +}
> +
> +/*
> + * Verifying that a stmt s1 is dominated by a stmt s2
> + * across function borders is not trivial with the available
> + * infrastructure (dominator algorithm on bb level plus cgraph).
> + * If we rule out external calls/callbacks, then we still need
> + * to guarantee a write on each possible path to the read.
> + *
> + * The implemented solution to this problem, which is of course
> + * too strict,
> + * but also not too compute/memory intensive is as follows:
> + *
> + * - Check if write is reachable by main () by only looking into
> + *   the first bb in each function on the path.
> + * - All call stmts between main () and write must not possibly
> + *   reach a read.  We consider indirect call statements as
> + *   possible reaching read (unless we can prove opposite).
> + *
> + * The external calls/callbacks are ruled out as follows:
> + *
> + * - all possible ancestors of read must not be external visible
> + * - all possible ancestors of read must not be function pointers
> + * - Something else?
> + */
> +static bool
> +write_before_read_across_function_borders (struct ipa_ref *write,
> +					   struct ipa_ref *read)
> +{
> +  symtab_node *w_node = write->referring;
> +  cgraph_node *w_cnode = dyn_cast<cgraph_node *> (w_node);
> +  symtab_node *r_node = read->referring;
> +  cgraph_node *r_cnode = dyn_cast<cgraph_node *> (r_node);
> +  cgraph_node *main;
> +
> +  /* Get main () function.  */
> +  vec<cgraph_node *> pw = vNULL;
> +  main = get_path_from_main_to (w_cnode, &pw);
> +  if (!main)
> +    {
> +      if (dump_file)
> +	fprintf (dump_file, "main () is not ancestor of write -> skipping.\n");
> +      return false;
> +    }
> +
> +  vec<cgraph_node *> pr = vNULL;
> +  cgraph_node *main_to_read = get_path_from_main_to (r_cnode, &pr);
> +  if (!main_to_read)
> +    {
> +      if (dump_file)
> +	fprintf (dump_file, "main () is not ancestor of read -> skipping.\n");
> +      return false;
> +    }
> +
> +  // TODO: is write guaranteed to happen?
> +  // if it is not guaranteed to happen, we shouldn't modify anything!
> +  // The very first approximation. There should be a path from main to w_cnode
> +  // such that only the first basic block is actually inspected.
> +  // Future work will involve looking at unconditional paths.
> +  // I think right now, all paths are reachable from first basic block
> +  if (!reach_nodes_via_bb1 (w_cnode))
> +    return false;
> +
> +  int i;
> +  cgraph_node *node_in_pr;
> +  FOR_EACH_VEC_ELT (pr, i, node_in_pr)
> +    {
> +      // I think main has to be externally visible.
> +      if (node_in_pr == main)
> +	continue;
> +
> +      /* Assure that all paths to read are not externally visible.  */
> +      if (cgraph_externally_visible_p (node_in_pr, flag_whole_program))
> +	{
> +	  if (dump_file)
> +	    fprintf (dump_file, "%s is externally visible...  skipping\n",
> +		     node_in_pr->dump_asm_name ());
> +	  return false;
> +	}
> +
> +      /* Assure that all paths to read are not
> +       * used as function pointers.  */
> +      if (node_in_pr->address_taken)
> +	{
> +	  if (dump_file)
> +	    fprintf (dump_file, "%s might be function pointer...  skipping\n",
> +		     node_in_pr->dump_asm_name ());
> +	  return false;
> +	}
> +    }
> +
> +  cgraph_node *caller = main;
> +  cgraph_node *node_in_pw;
> +  FOR_EACH_VEC_ELT (pw, i, node_in_pw)
> +    {
> +      gcc_assert (w_cnode != r_cnode);
> +      if (node_in_pw == r_cnode && path_exists (r_cnode, w_cnode))
> +	{
> +	  return call_before_read (write, read);
> +	}
> +
> +      if (node_in_pw == w_cnode && path_exists (w_cnode, r_cnode))
> +	{
> +	  return write_before_call (write, read);
> +	}
> +
> +      if (node_in_pw == main)
> +	{
> +	  continue;
> +	}
> +
> +      if (dump_file)
> +	fprintf (dump_file, "get_nondominated_callees from %s to %s\n",
> +		 caller->dump_asm_name (), node_in_pw->dump_asm_name ());
> +
> +      bool exitEarly = false;
> +      vec<cgraph_node *> non_dominated_callees
> +	= get_nondominated_callees (caller, node_in_pw, &exitEarly);
> +      if (exitEarly)
> +	return false;
> +      cgraph_node *non_dominated_callee;
> +      int j;
> +      FOR_EACH_VEC_ELT (non_dominated_callees, j, non_dominated_callee)
> +	{
> +	  if (dump_file)
> +	    fprintf (dump_file, "callee %d %s\n", j,
> +		     non_dominated_callee->dump_asm_name ());
> +	  if (path_exists (non_dominated_callee, r_cnode))
> +	    {
> +	      return false;
> +	    }
> +	}
> +
> +      caller = node_in_pw;
> +    }
> +  return true;
> +}
> +
> +static vec<struct cgraph_node *>
> +get_bb1_callees (cgraph_node *c, cgraph_node *w_cnode)
> +{
> +  vec<struct cgraph_node *> callees = vNULL;
> +  if (fndecl_built_in_p (c->decl))
> +    return vNULL;
> +
> +  if (dump_file)
> +    fprintf (dump_file, "before get_untransformed_body %s\n",
> +	     c->dump_asm_name ());
> +
> +  if (!c->definition)
> +    return vNULL;
> +
> +  push_cfun (DECL_STRUCT_FUNCTION (c->decl));
> +
> +  if (in_lto_p)
> +    c->get_untransformed_body ();
> +
> +  pop_cfun ();
> +
> +  function *func = DECL_STRUCT_FUNCTION (c->decl);
> +  /* Get first BB (after the fake entry BB).  */
> +  basic_block bb = ENTRY_BLOCK_PTR_FOR_FN (func)->next_bb;
> +
> +  for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi);
> +       gsi_next (&gsi))
> +    {
> +      gimple *stmt = gsi_stmt (gsi);
> +
> +      if (gimple_code (stmt) != GIMPLE_CALL)
> +	continue;
> +
> +      tree callee_decl = gimple_call_fndecl (stmt);
> +      cgraph_node *callee_node = cgraph_node::get (callee_decl);
> +      if (!path_exists (callee_node, w_cnode))
> +	continue;
> +
> +      callees.safe_push (callee_node);
> +    }
> +
> +  return callees;
> +}
> +
> +static bool
> +reach_nodes_via_bb1_dfs (hash_set<cgraph_node *> *visited, cgraph_node *n1,
> +			 cgraph_node *n2)
> +{
> +  if (n1 == n2)
> +    return true;
> +
> +  visited->add (n1);
> +
> +  vec<struct cgraph_node *> callees = get_bb1_callees (n1, n2);
> +
> +  int i;
> +  cgraph_node *callee;
> +  FOR_EACH_VEC_ELT (callees, i, callee)
> +    {
> +      if (!visited->contains (callee))
> +	{
> +	  bool found = reach_nodes_via_bb1_dfs (visited, callee, n2);
> +	  if (found)
> +	    {
> +	      callees.release ();
> +	      return true;
> +	    }
> +	}
> +    }
> +
> +  return false;
> +}
> +
> +bool
> +reach_nodes_via_bb1 (cgraph_node *n2)
> +{
> +  vec<cgraph_node *> pr = vNULL;
> +  cgraph_node *main = get_path_from_main_to (n2, &pr);
> +  hash_set<cgraph_node *> visited;
> +  bool path_exists = reach_nodes_via_bb1_dfs (&visited, main, n2);
> +  visited.empty ();
> +  pr.release ();
> +  return path_exists;
> +}
> +
> +static bool
> +write_before_call (struct ipa_ref *write, struct ipa_ref *read)
> +{
> +  symtab_node *w_node = write->referring;
> +  cgraph_node *w_cnode = dyn_cast<cgraph_node *> (w_node);
> +  symtab_node *r_node = read->referring;
> +  cgraph_node *r_cnode = dyn_cast<cgraph_node *> (r_node);
> +
> +  gcc_assert (path_exists (w_cnode, r_cnode));
> +  gcc_assert (w_cnode != r_cnode);
> +
> +  basic_block w_bb = gimple_bb (write->stmt);
> +  basic_block r_bb = gimple_bb (read->stmt);
> +
> +  if (in_lto_p)
> +    {
> +      w_cnode->get_untransformed_body ();
> +    }
> +
> +  function *func = DECL_STRUCT_FUNCTION (w_cnode->decl);
> +  basic_block c_bb;
> +  vec<struct cgraph_node *> callees = vNULL;
> +  FOR_EACH_BB_FN (c_bb, func)
> +    {
> +      bool self_dominates = w_bb == c_bb;
> +      bool w_bb_dominates_c_bb = bb1_dominates_bb2 (w_bb, c_bb, w_cnode);
> +      if (w_bb_dominates_c_bb && !self_dominates)
> +	continue;
> +
> +      for (gimple_stmt_iterator gsi = gsi_start_bb (c_bb); !gsi_end_p (gsi);
> +	   gsi_next (&gsi))
> +	{
> +	  gimple *stmt = gsi_stmt (gsi);
> +
> +	  if (stmt == write->stmt)
> +	    {
> +	      break;
> +	    }
> +
> +	  if (gimple_code (stmt) != GIMPLE_CALL)
> +	    {
> +	      continue;
> +	    }
> +	  if (dump_file)
> +	    print_gimple_stmt (dump_file, stmt, 0, TDF_NONE);
> +	  tree rhs = gimple_op (stmt, 1);
> +
> +	  if (rhs && TREE_CODE (rhs) == POINTER_TYPE)
> +	    return false;
> +
> +	  tree callee_decl = gimple_call_fndecl (stmt);
> +	  if (!callee_decl)
> +	    return false;
> +
> +	  cgraph_node *callee_node = cgraph_node::get (callee_decl);
> +	  const char *identifier
> +	    = IDENTIFIER_POINTER (DECL_NAME (callee_node->decl));
> +	  const char *sigsetjmp = "sigsetjmp";
> +	  if (strstr (identifier, sigsetjmp) != NULL)
> +	    return false;
> +
> +	  if (fndecl_built_in_p (callee_node->decl))
> +	    continue;
> +
> +	  if (dump_file)
> +	    fprintf (dump_file, "found callee %s\n",
> +		     callee_node->dump_asm_name ());
> +	  callees.safe_push (callee_node);
> +	}
> +    }
> +  cgraph_node *callee;
> +  int i;
> +  FOR_EACH_VEC_ELT (callees, i, callee)
> +    {
> +      if (path_exists (callee, r_cnode))
> +	{
> +	  return false;
> +	}
> +    }
> +
> +  return true;
> +}
> +
> +static bool
> +call_before_read (struct ipa_ref *write, struct ipa_ref *read)
> +{
> +  symtab_node *w_node = write->referring;
> +  cgraph_node *w_cnode = dyn_cast<cgraph_node *> (w_node);
> +  symtab_node *r_node = read->referring;
> +  cgraph_node *r_cnode = dyn_cast<cgraph_node *> (r_node);
> +
> +  gcc_assert (path_exists (r_cnode, w_cnode));
> +  gcc_assert (w_cnode != r_cnode);
> +
> +  basic_block w_bb = gimple_bb (write->stmt);
> +  basic_block r_bb = gimple_bb (read->stmt);
> +
> +  if (in_lto_p)
> +    r_cnode->get_untransformed_body ();
> +
> +  function *func = DECL_STRUCT_FUNCTION (r_cnode->decl);
> +  basic_block c_bb;
> +  FOR_EACH_BB_FN (c_bb, func)
> +    {
> +      bool self_dominates = c_bb == r_bb;
> +      bool call_dominates_read = bb1_dominates_bb2 (c_bb, r_bb, r_cnode);
> +      if (!call_dominates_read && !self_dominates)
> +	continue;
> +
> +      for (gimple_stmt_iterator gsi = gsi_start_bb (c_bb); !gsi_end_p (gsi);
> +	   gsi_next (&gsi))
> +	{
> +	  gimple *stmt = gsi_stmt (gsi);
> +
> +	  // self dominance
> +	  if (stmt == read->stmt)
> +	    break;
> +
> +	  if (gimple_code (stmt) != GIMPLE_CALL)
> +	    continue;
> +
> +	  if (dump_file)
> +	    print_gimple_stmt (dump_file, stmt, 0, TDF_NONE);
> +	  tree rhs = gimple_op (stmt, 1);
> +	  if (TREE_CODE (rhs) == POINTER_TYPE)
> +	    return false;
> +	  tree callee_decl = gimple_call_fndecl (stmt);
> +
> +	  cgraph_node *callee_node = cgraph_node::get (callee_decl);
> +	  const char *identifier
> +	    = IDENTIFIER_POINTER (DECL_NAME (callee_node->decl));
> +	  const char *sigsetjmp = "sigsetjmp";
> +	  if (strstr (identifier, sigsetjmp) != NULL)
> +	    return false;
> +
> +	  if (path_exists (callee_node, w_cnode))
> +	    return true;
> +	}
> +    }
> +  return false;
> +}
> +
> +static bool
> +write_before_read (struct ipa_ref *write, struct ipa_ref *read)
> +{
> +  symtab_node *w_node = write->referring;
> +  cgraph_node *w_cnode = dyn_cast<cgraph_node *> (w_node);
> +  symtab_node *r_node = read->referring;
> +  cgraph_node *r_cnode = dyn_cast<cgraph_node *> (r_node);
> +
> +  if (w_cnode == r_cnode)
> +    /* Within the same function.  */
> +    return write_before_read_in_function (write, read);
> +
> +  /* Not within the same function.  */
> +  return write_before_read_across_function_borders (write, read);
> +}
> +
> +static bool
> +write_before_all_reads (struct ipa_ref *write, const ipa_ref_vec &reads)
> +{
> +  int i;
> +  struct ipa_ref *read;
> +
> +  FOR_EACH_VEC_ELT (reads, i, read)
> +    {
> +      load_function_body_of_ipa_ref (read);
> +      if (!write_before_read (write, read))
> +	return false;
> +    }
> +  return true;
> +}
> +
> +static void
> +propagate_constant_to_read (tree write_val, struct ipa_ref *ref,
> +			    hash_set<cgraph_node *> *func)
> +{
> +  gcc_assert (ref->use == IPA_REF_LOAD);
> +  load_function_body_of_ipa_ref (ref);
> +
> +  gimple *read_stmt = ref->stmt;
> +
> +  gcc_assert (gimple_code (read_stmt) == GIMPLE_ASSIGN);
> +  gcc_assert (gimple_num_ops (read_stmt) == 2);
> +
> +  tree old_lhs = gimple_op (read_stmt, 0);
> +  symtab_node *r_node = ref->referring;
> +  cgraph_node *r_cnode = dyn_cast<cgraph_node *> (r_node);
> +
> +  // if (!func->contains (r_cnode)) func->add (r_cnode);
> +  cgraph_node **possible_clone = func_to_clone->get (r_cnode);
> +  if (!possible_clone)
> +    {
> +      const char *suffix = "test";
> +      // callers has to be vNULL, otherwise, we will be
> +      // analyzing clones...
> +      // and we don't want that...
> +      // but that means that we will need to update the callers
> +      // later...  in update_callgraph
> +      cgraph_node *clone
> +	= r_cnode->create_version_clone_with_body (vNULL, NULL, NULL, NULL,
> +						   NULL, suffix, NULL);
> +      clone->get_untransformed_body ();
> +      func_to_clone->put (r_cnode, clone);
> +    }
> +
> +  possible_clone = func_to_clone->get (r_cnode);
> +  cgraph_node *clone = *possible_clone;
> +
> +  // Build new stmt and replace old.
> +  gimple_stmt_iterator gsi;
> +  basic_block bb;
> +  // Let's create a clone with body here...
> +  // The clone should not have callers as
> +  // to not interfere with the current
> +  // analysis.
> +  // The callers will be set at the end...
> +
> +  push_cfun (DECL_STRUCT_FUNCTION (clone->decl));
> +  function *clone_func = DECL_STRUCT_FUNCTION (clone->decl);
> +  bool found = false;
> +  FOR_EACH_BB_FN (bb, clone_func)
> +    {
> +      for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
> +	{
> +	  gimple *stmt = gsi_stmt (gsi);
> +	  if (gimple_code (stmt) != GIMPLE_ASSIGN)
> +	    continue;
> +	  tree old_val_clone = gimple_op (stmt, 1);
> +	  tree lhs = gimple_op (stmt, 0);
> +
> +	  // TODO: what happens when it is not a variable decl?
> +	  // This might actually be the source of imprecision
> +	  if (TREE_CODE (old_val_clone) != VAR_DECL)
> +	    continue;
> +
> +	  tree old_val = gimple_op (read_stmt, 1);
> +	  if (IDENTIFIER_POINTER (DECL_NAME (old_val_clone))
> +	      != IDENTIFIER_POINTER (DECL_NAME (old_val)))
> +	    continue;
> +
> +	  found = true;
> +
> +	  // REPLACE!!!
> +	  gimple_stmt_iterator gsi2 = gsi_for_stmt (stmt);
> +	  tree new_lhs = make_ssa_name (TREE_TYPE (lhs));
> +	  gimple *new_read_stmt = gimple_build_assign (new_lhs, write_val);
> +	  gsi_insert_before (&gsi2, new_read_stmt, GSI_SAME_STMT);
> +
> +	  gimple *use_stmt;
> +	  imm_use_iterator use_iter;
> +	  FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, lhs)
> +	    {
> +	      use_operand_p use_p;
> +	      FOR_EACH_IMM_USE_ON_STMT (use_p, use_iter)
> +		SET_USE (use_p, new_lhs);
> +	      update_stmt (use_stmt);
> +	    }
> +	}
> +
> +      if (found)
> +	break;
> +    }
> +  gcc_assert (found);
> +  pop_cfun ();
> +}
> +
> +static bool
> +ipa_initcall_get_writes_and_reads (varpool_node *vnode, ipa_ref_vec *writes,
> +				   ipa_ref_vec *reads)
> +{
> +  int i;
> +  struct ipa_ref *ref;
> +
> +  if (dump_file)
> +    fprintf (dump_file, "%s for variable '%s'.\n", __func__, vnode->name ());
> +
> +  /* Only IPA_REF_STORE and IPA_REF_LOAD left.  */
> +  for (i = 0; vnode->iterate_referring (i, ref); i++)
> +    {
> +      symtab_node *f_node = ref->referring;
> +      cgraph_node *f_cnode = dyn_cast<cgraph_node *> (f_node);
> +
> +      if (!f_cnode)
> +	{
> +	  if (dump_file)
> +	    fprintf (dump_file,
> +		     "skipping variable %s due to static initialization\n",
> +		     vnode->name ());
> +	  return false;
> +	}
> +      // it is possible that f_cnode is NULL if the dyn_cast fails.
> +      // If the dyn_cast fails, this is an example of static initialization.
> +      const char *identifier = IDENTIFIER_POINTER (DECL_NAME (f_cnode->decl));
> +      // This is the suffix we are adding to our clones.
> +      // Therefore, if we find the suffix in the identifier,
> +      // that means that we found a variable in a clone and
> +      // we would like to skip it.
> +      // TODO: make this a global static for easier changes...
> +      const char *suffix = "test.0";
> +      if (strstr (identifier, suffix) != NULL)
> +	continue;
> +
> +      if (ref->use == IPA_REF_STORE)
> +	writes->safe_push (ref);
> +
> +      if (ref->use == IPA_REF_LOAD)
> +	reads->safe_push (ref);
> +    }
> +  return true;
> +}
> +
> +static void
> +propagate_constant_to_reads (tree write_val, const ipa_ref_vec &reads_original,
> +			     hash_set<cgraph_node *> *funcs)
> +{
> +  /* Iterate over reads and replace them by constant.  */
> +  struct ipa_ref *ref;
> +  int i;
> +  FOR_EACH_VEC_ELT (reads_original, i, ref)
> +    {
> +      propagate_constant_to_read (write_val, ref, funcs);
> +    }
> +}
> +
> +/*
> + * Extracts the assigned constant, iff the given statement
> + * is a constant assignment.  Returns NULL_TREE otherwise.
> + */
> +static tree
> +extract_constant_from_initcall_write (struct ipa_ref *write)
> +{
> +  symtab_node *f_node = write->referring;
> +  cgraph_node *f_cnode = dyn_cast<cgraph_node *> (f_node);
> +
> +  tree decl = f_cnode->decl;
> +  if (TREE_CODE (decl) != FUNCTION_DECL)
> +    {
> +      if (dump_file)
> +	fprintf (dump_file, "Not function decl -> skipping.\n");
> +      return NULL_TREE;
> +    }
> +
> +  if (!f_cnode->has_gimple_body_p () && dump_file)
> +    fprintf (dump_file, "Does NOT have gimple body!\n");
> +  if (f_cnode->inlined_to && dump_file)
> +    fprintf (dump_file, "Inlined To\n");
> +
> +  if (dump_file)
> +    {
> +      fprintf (dump_file, "%s: for writer gimple:\n", __func__);
> +      dump_vnode_ipa_ref (write);
> +    }
> +
> +  /* Get the write function's body.  */
> +  load_function_body_of_ipa_ref (write);
> +
> +  gimple *stmt = write->stmt;
> +
> +  /* Verify that we have an assignment.  */
> +  if (gimple_code (stmt) != GIMPLE_ASSIGN)
> +    {
> +      if (dump_file)
> +	fprintf (dump_file, "Write stmt not assignment -> skipping.\n");
> +      return NULL_TREE;
> +    }
> +
> +  /* Check if write's LHS (vnode) is not volatile.  */
> +  tree lhs = gimple_assign_lhs (stmt);
> +  if (TREE_THIS_VOLATILE (TREE_TYPE (lhs)))
> +    {
> +      if (dump_file)
> +	fprintf (dump_file, "Variable volatile -> skipping.\n");
> +      return NULL_TREE;
> +    }
> +
> +  /* Check if RHS of write stmt is constant.  */
> +  if (!gimple_assign_is_single_const (stmt))
> +    {
> +      if (dump_file)
> +	{
> +	  fprintf (dump_file, "Found non-const operands.\n");
> +	}
> +      return NULL_TREE;
> +    }
> +
> +  /* Extract the constant.  */
> +  tree write_val = gimple_op (stmt, 1);
> +
> +  if (dump_file)
> +    {
> +      fprintf (dump_file, "Const op:\n");
> +      dump_node (write_val, TDF_DETAILS, dump_file);
> +    }
> +
> +  return write_val;
> +}
> +
> +static void
> +ipa_initcall_cp_execute_for_var (varpool_node *vnode,
> +				 hash_set<cgraph_node *> *update_functions)
> +{
> +  ipa_ref_vec writes = vNULL;
> +  ipa_ref_vec reads = vNULL;
> +  struct ipa_ref *write;
> +  tree write_val;
> +  gimple_stmt_iterator gsi;
> +  bool remove_permanently;
> +
> +  if (dump_file)
> +    fprintf (dump_file, "%s for variable '%s'.\n", __func__, vnode->name ());
> +
> +  if (dump_file)
> +    {
> +      dump_node (vnode->decl, TDF_DETAILS, dump_file);
> +    }
> +
> +  /* Variable must not be externally visible.  */
> +  if (vnode->externally_visible_p ())
> +    {
> +      if (dump_file)
> +	fprintf (dump_file, "\texternally visible "
> +			    "-> skipping\n");
> +      return;
> +    }
> +
> +  /* All refs must be explicit.  */
> +  if (!vnode->all_refs_explicit_p ())
> +    {
> +      if (dump_file)
> +	fprintf (dump_file, "Not explicit variable refs "
> +			    "-> skipping.\n");
> +      return;
> +    }
> +
> +  /* Check if any ref->use is IPA_REF_ALIAS.  */
> +  if (vnode->has_aliases_p ())
> +    {
> +      if (dump_file)
> +	fprintf (dump_file, "Found IPA_REF_ALIAS -> skipping.\n");
> +      return;
> +    }
> +
> +  /* Check if any ref->use is IPA_REF_ADDR.  */
> +  if (symtab_node_address_matters_p (vnode))
> +    {
> +      if (dump_file)
> +	fprintf (dump_file, "Found IPA_REF_ADDR -> skipping.\n");
> +      return;
> +    }
> +
> +  /* We don't touch arrays.  */
> +  if (TREE_CODE (TREE_TYPE (vnode->decl)) == ARRAY_TYPE)
> +    {
> +      if (dump_file)
> +	fprintf (dump_file, "Variable is array -> skipping.\n");
> +      return;
> +    }
> +
> +  /* We don't touch structs.  */
> +  if (TREE_CODE (TREE_TYPE (vnode->decl)) == RECORD_TYPE)
> +    {
> +      if (dump_file)
> +	fprintf (dump_file, "Variable is struct -> skipping.\n");
> +      return;
> +    }
> +
> +  /* We don't touch unions.  */
> +  if (TREE_CODE (TREE_TYPE (vnode->decl)) == UNION_TYPE)
> +    {
> +      if (dump_file)
> +	fprintf (dump_file, "Variable is union -> skipping.\n");
> +      return;
> +    }
> +
> +  /* Get all refs (must be REF_STORE or REF_LOAD).  */
> +  bool continue_work
> +    = ipa_initcall_get_writes_and_reads (vnode, &writes, &reads);
> +  if (!continue_work)
> +    {
> +      if (dump_file)
> +	fprintf (dump_file, "Static initialization -> skipping.\n");
> +      writes.release ();
> +      reads.release ();
> +      return;
> +    }
> +
> +  if (writes.length () > 1)
> +    {
> +      /* More than one writer.  */
> +      if (dump_file)
> +	fprintf (dump_file, "More than one writer -> skipping.\n");
> +      writes.release ();
> +      reads.release ();
> +      return;
> +    }
> +  else if (writes.length () < 1)
> +    {
> +      /* No writer.  */
> +      if (dump_file)
> +	fprintf (dump_file, "No writer -> skipping.\n");
> +      writes.release ();
> +      reads.release ();
> +      return;
> +    }
> +
> +  /*
> +   * Note:
> +   * Limiting ourselfs to only one write is not necessary in general,
> +   * but good enough to address the typical init () case.
> +   * Big advantage is, that it makes the following code much easier.
> +   *
> +   * TODO: I believe that if we create clones, we might get two writes
> +   * Investigate
> +   */
> +
> +  /* Get (only) write ref.  */
> +  write = writes.pop ();
> +
> +  /* Check if write's RHS is a constant and get it.  */
> +  write_val = extract_constant_from_initcall_write (write);
> +  if (write_val == NULL_TREE)
> +    {
> +      if (dump_file)
> +	fprintf (dump_file, "Write's RHS is not constant"
> +			    " -> skipping.\n");
> +      writes.release ();
> +      reads.release ();
> +      return;
> +    }
> +
> +  /* Assure all reads are after the write.  */
> +  if (!write_before_all_reads (write, reads))
> +    {
> +      if (dump_file)
> +	fprintf (dump_file, "Write not guaranteed "
> +			    "to be before read -> skipping.\n");
> +      writes.release ();
> +      reads.release ();
> +      return;
> +    }
> +
> +  /* Propagate constant to reads.  */
> +  if (!flag_initcall_cp_dryrun)
> +    propagate_constant_to_reads (write_val, reads, update_functions);
> +
> +  /* Finally remove the write.  */
> +  gsi = gsi_for_stmt (write->stmt);
> +  remove_permanently = false; // XXX: fails with true?
> +  // gsi_remove (&gsi, remove_permanently);
> +
> +  if (dump_file)
> +    fprintf (dump_file, "Eliminated variable '%s'.\n\n", vnode->name ());
> +
> +  writes.release ();
> +  reads.release ();
> +}
> +
> +bool
> +update_callgraph (cgraph_node *const &r_cnode, cgraph_node **clone_ptr, void *)
> +{
> +  vec<cgraph_edge *> callers = r_cnode->collect_callers ();
> +  cgraph_node *clone = *clone_ptr;
> +  cgraph_edge *e;
> +  int i;
> +  profile_count count = r_cnode->count;
> +
> +  FOR_EACH_VEC_ELT (callers, i, e)
> +    e->redirect_callee (clone);
> +
> +  /*
> +    for (e = r_cnode->callees; e; e=e->next_callee)
> +      e->clone (clone, e->call_stmt, e->lto_stmt_uid, count, count, true);
> +
> +    for (e = r_cnode->indirect_calls; e; e=e->next_callee)
> +      e->clone (clone, e->call_stmt, e->lto_stmt_uid, count, count, true);
> +  */
> +  for (e = clone->callers; e; e = e->next_caller)
> +    {
> +      e->caller->get_untransformed_body ();
> +      function *inner_function = DECL_STRUCT_FUNCTION (e->caller->decl);
> +      gimple_call_set_fndecl (e->call_stmt, clone->decl);
> +      maybe_clean_eh_stmt_fn (inner_function, e->call_stmt);
> +    }
> +
> +  r_cnode->skip_ipa_cp = true;
> +  // r_cnode->remove ();
> +  push_cfun (DECL_STRUCT_FUNCTION (r_cnode->decl));
> +
> +  if (dump_file)
> +    fprintf (dump_file, "dumping function %s\n", r_cnode->dump_asm_name ());
> +  function *func = DECL_STRUCT_FUNCTION (r_cnode->decl);
> +  basic_block bb;
> +  FOR_EACH_BB_FN (bb, func)
> +    {
> +      for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi);
> +	   gsi_next (&gsi))
> +	{
> +	  gimple *stmt = gsi_stmt (gsi);
> +	  if (dump_file)
> +	    print_gimple_stmt (dump_file, stmt, 0, TDF_NONE);
> +	}
> +    }
> +  if (dom_info_available_p (CDI_DOMINATORS))
> +    free_dominance_info (CDI_DOMINATORS);
> +  pop_cfun ();
> +  return true;
> +}
> +
> +static unsigned int
> +ipa_initcall_cp_execute (void)
> +{
> +  varpool_node *vnode;
> +
> +  clone_num_suffixes1 = new hash_map<const char *, unsigned>;
> +  hash_set<cgraph_node *> update_functions;
> +  func_to_clone = new hash_map<cgraph_node *, cgraph_node *>;
> +  FOR_EACH_VARIABLE (vnode)
> +    {
> +      ipa_initcall_cp_execute_for_var (vnode, &update_functions);
> +    }
> +
> +  if (!flag_initcall_cp_dryrun)
> +    func_to_clone->traverse<void *, update_callgraph> (NULL);
> +
> +  delete clone_num_suffixes1;
> +  delete func_to_clone;
> +
> +  return 0;
> +}
> +
> +namespace {
> +
> +const pass_data pass_data_ipa_initcall_cp = {
> +  IPA_PASS,
> +  "initcall_cp",
> +  OPTGROUP_NONE,
> +  TV_NONE,
> +  (PROP_cfg | PROP_ssa),
> +  0,
> +  0,
> +  0,
> +  (TODO_update_ssa | TODO_cleanup_cfg | TODO_dump_symtab
> +   | TODO_rebuild_cgraph_edges | TODO_discard_function),
> +};
> +
> +class pass_ipa_initcall_cp : public ipa_opt_pass_d
> +{
> +public:
> +  pass_ipa_initcall_cp (gcc::context *ctxt)
> +    : ipa_opt_pass_d (pass_data_ipa_initcall_cp, ctxt, NULL, NULL, NULL, NULL,
> +		      NULL, NULL, 0, NULL, NULL)
> +  {}
> +
> +  /* opt_pass methods: */
> +  virtual bool gate (function *)
> +  {
> +    return flag_initcall_cp || flag_initcall_cp_dryrun;
> +  }
> +
> +  virtual unsigned int execute (function *)
> +  {
> +    return ipa_initcall_cp_execute ();
> +  }
> +
> +}; // class pass_ipa_initcall_cp
> +
> +} // namespace
> +
> +ipa_opt_pass_d *
> +make_pass_ipa_initcall_cp (gcc::context *ctxt)
> +{
> +  return new pass_ipa_initcall_cp (ctxt);
> +}
> diff --git a/gcc/passes.def b/gcc/passes.def
> index 2bf2cb78fc5..62609951bac 100644
> --- a/gcc/passes.def
> +++ b/gcc/passes.def
> @@ -149,6 +149,7 @@ along with GCC; see the file COPYING3.  If not see
>    NEXT_PASS (pass_ipa_profile);
>    NEXT_PASS (pass_ipa_icf);
>    NEXT_PASS (pass_ipa_devirt);
> +  NEXT_PASS (pass_ipa_initcall_cp);
>    NEXT_PASS (pass_ipa_cp);
>    NEXT_PASS (pass_ipa_sra);
>    NEXT_PASS (pass_ipa_cdtor_merge);
> diff --git a/gcc/tree-pass.h b/gcc/tree-pass.h
> index a1207a20a3c..65e09c657b7 100644
> --- a/gcc/tree-pass.h
> +++ b/gcc/tree-pass.h
> @@ -501,6 +501,7 @@ extern ipa_opt_pass_d *make_pass_ipa_fn_summary (gcc::context *ctxt);
>  extern ipa_opt_pass_d *make_pass_ipa_inline (gcc::context *ctxt);
>  extern simple_ipa_opt_pass *make_pass_ipa_free_lang_data (gcc::context *ctxt);
>  extern simple_ipa_opt_pass *make_pass_ipa_free_fn_summary (gcc::context *ctxt);
> +extern ipa_opt_pass_d *make_pass_ipa_initcall_cp (gcc::context *ctxt);
>  extern ipa_opt_pass_d *make_pass_ipa_cp (gcc::context *ctxt);
>  extern ipa_opt_pass_d *make_pass_ipa_sra (gcc::context *ctxt);
>  extern ipa_opt_pass_d *make_pass_ipa_icf (gcc::context *ctxt);
> 
> 
> On 2020-01-23 8:53 a.m., Jan Hubicka wrote:
>>> Hi.
>>>
>>> Thank you for the patch. I'm CCing the author of current IPA CP
>>> and IPA maintainer.
>> OK, happy to be in CC, but I wonder where I find the last version of the
>> patch?
>> https://gcc.gnu.org/bugzilla/attachment.cgi?id=47455
>> Honza
>>>
>>> Martin
-------------- next part --------------
#include <stdio.h>
#include <limits.h>

#define noinline __attribute__((noinline))

extern int FAIL_0_global_variable_read_before_write;
extern int SUCCESS_0_global_variable_write_before_read;
extern int FAIL_1_global_variable_call_read_before_write;
extern int SUCCESS_1_global_variable_call_write_before_read;
extern int FAIL_2_global_variable_call_read_before_call_write;
extern int SUCCESS_2_global_variable_call_write_before_call_read;
extern int FAIL_3_global_variable_read_before_call_write;
extern int SUCCESS_3_global_variable_write_before_call_read;

extern void FAIL_1_call_read();
extern void SUCCESS_1_call_write();
extern void FAIL_2_call_read();
extern void FAIL_2_call_write();
extern void SUCCESS_2_call_write();
extern void SUCCESS_2_call_read();
extern void FAIL_3_call_write();
extern void SUCCESS_3_call_read();

extern void myprint(const char* string, int i );

void noinline FAIL_0_read_and_write();
void noinline SUCCESS_0_write_and_read();
void noinline FAIL_1_call_read_and_write();
void noinline SUCCESS_1_call_write_and_read();
void noinline FAIL_2_call_read_and_call_write();
void noinline SUCCESS_2_call_write_and_call_read();
void noinline FAIL_3_read_and_call_write();
void noinline SUCCESS_3_write_and_call_read();

noinline
void FAIL_0_read_and_write()
{
	printf("%s\n", FAIL_0_global_variable_read_before_write == 0 ? "SUCCESS" : "FAILURE");
	FAIL_0_global_variable_read_before_write = 1;
}

noinline
void SUCCESS_0_write_and_read()
{
	SUCCESS_0_global_variable_write_before_read = 15000000;
	printf("%s\n", SUCCESS_0_global_variable_write_before_read == 15000000? "SUCCESS" : "FAILURE");
}

noinline
void FAIL_1_call_read_and_write()
{
	FAIL_1_call_read();
	FAIL_1_global_variable_call_read_before_write = 1;
}

noinline
void SUCCESS_1_call_write_and_read()
{
	SUCCESS_1_call_write();
	printf("%s\n", SUCCESS_1_global_variable_call_write_before_read == 1 ? "SUCCESS" : "FAILURE");
}

noinline
void FAIL_2_call_read_and_call_write()
{
	FAIL_2_call_read();
	FAIL_2_call_write();
}

noinline
void SUCCESS_2_call_write_and_call_read()
{
	SUCCESS_2_call_write();
	SUCCESS_2_call_read();
}

noinline
void FAIL_3_read_and_call_write()
{
	printf("%s\n", FAIL_3_global_variable_read_before_call_write == 0 ? "SUCCESS" : "FAILURE");
	FAIL_3_call_write();
}

noinline
void SUCCESS_3_write_and_call_read()
{
	SUCCESS_3_global_variable_write_before_call_read = 1;
	SUCCESS_3_call_read();
}

noinline
void trigger_constant_propagation(int i)
{
	myprint("Hello world\n", i);
}
-------------- next part --------------
#include <stdio.h>

#define noinline __attribute__((noinline))

extern int FAIL_0_global_variable_read_before_write;
extern int SUCCESS_0_global_variable_write_before_read;
extern int FAIL_1_global_variable_call_read_before_write;
extern int SUCCESS_1_global_variable_call_write_before_read;
extern int FAIL_2_global_variable_call_read_before_call_write;
extern int SUCCESS_2_global_variable_call_write_before_call_read;
extern int FAIL_3_global_variable_read_before_call_write;
extern int SUCCESS_3_global_variable_write_before_call_read;

noinline
void SUCCESS_1_call_write()
{
	SUCCESS_1_global_variable_call_write_before_read = 1;
}

noinline
void FAIL_2_call_write()
{
   FAIL_2_global_variable_call_read_before_call_write = 1;
}

noinline
void SUCCESS_2_call_write()
{
	SUCCESS_2_global_variable_call_write_before_call_read = 1;
}

noinline
void SUCCESS_3_call_read()
{
	printf("%s\n", SUCCESS_3_global_variable_write_before_call_read == 1 ? "SUCCESS" : "FAILURE");
}

noinline
void FAIL_3_call_write()
{
	FAIL_3_global_variable_read_before_call_write = 1;
}

noinline
void FAIL_2_call_read()
{
	printf("%s\n", FAIL_2_global_variable_call_read_before_call_write == 0 ? "SUCCESS" : "FAILURE");

}

noinline
void SUCCESS_2_call_read()
{
	printf("%s\n", SUCCESS_2_global_variable_call_write_before_call_read == 1 ? "SUCCESS" : "FAILURE");
}

noinline
void FAIL_1_call_read()
{
	printf("%s\n", FAIL_1_global_variable_call_read_before_write == 0 ? "SUCCESS" : "FAILURE");
}

noinline
void myprint(const char* string, int i)
{
	int data = i % SUCCESS_0_global_variable_write_before_read;
	printf("%s data = %d\n", string, data);
}
-------------- next part --------------
#include <stdio.h>
#include <string.h>

extern void FAIL_0_read_and_write();
extern void SUCCESS_0_write_and_read();
extern void FAIL_1_call_read_and_write();
extern void SUCCESS_1_call_write_and_read();
extern void FAIL_2_call_read_and_call_write();
extern void SUCCESS_2_call_write_and_call_read();
extern void FAIL_3_read_and_call_write();
extern void SUCCESS_3_write_and_call_read();
extern void trigger_constant_propagation(int i);

int FAIL_0_global_variable_read_before_write = 0;
int SUCCESS_0_global_variable_write_before_read = 0;
int FAIL_1_global_variable_call_read_before_write = 0;
int SUCCESS_1_global_variable_call_write_before_read = 0;
int FAIL_2_global_variable_call_read_before_call_write = 0;
int SUCCESS_2_global_variable_call_write_before_call_read = 0;
int FAIL_3_global_variable_read_before_call_write = 0;
int SUCCESS_3_global_variable_write_before_call_read = 0;

int main(int argc, char** argv)
{
	FAIL_0_read_and_write();
	SUCCESS_0_write_and_read();
	FAIL_1_call_read_and_write();
	SUCCESS_1_call_write_and_read();
	FAIL_2_call_read_and_call_write();
	SUCCESS_2_call_write_and_call_read();
	FAIL_3_read_and_call_write();
	SUCCESS_3_write_and_call_read();
	trigger_constant_propagation(atoi(argv[1]));
	return 0;
}
-------------- next part --------------
CROSS_COMPILE=$(HOME)/code/gcc-inst/bin/

CC=$(CROSS_COMPILE)gcc

CFLAGS+=-Ofast 
CFLAGS+=-ggdb

#Three options:
# a) nothing (test1.c.072i.initcall_cp)
# b) -fwhole-program (test1.c.072i.initcall_cp)
# c) -flto (test1.wpa.072i.initcall_cp)
CFLAGS+=-flto=32
#LDFLAGS+=-flto-partition=none
LDFLAGS+=-flto=32
LDFLAGS+=-finitcall-cp
#LDFLAGS+=-finitcall-cp-dryrun
#LDFLAGS+=-fwhole-program
CFLAGS+=-fdump-ipa-all
CFLAGS+=-fdump-tree-all
CFLAGS+=-fdump-passes

PGEN_DIR=pgen
PUSE_DIR=puse
PGEN=-fprofile-generate
PUSE=-fprofile-use

BIN=test
PGEN_BIN=$(PGEN_DIR)/test
PUSE_BIN=$(PUSE_DIR)/test

SRCS=\
     main.c \
     init.c \
     ext.c

OBJS=$(patsubst %.c,%.o,$(SRCS))
PGEN_OBJS=$(addprefix $(PGEN_DIR)/,$(OBJS))
PUSE_OBJS=$(addprefix $(PUSE_DIR)/,$(OBJS))
PGEN_GCDAS=$(patsubst %.o,%.gcda,$(PGEN_OBJS))
PUSE_GCDAS=$(patsubst %.o,%.gcda,$(PUSE_OBJS))

all: $(BIN) $(PUSE_BIN)

$(PGEN_DIR)/%.o: %.c
	mkdir -p $(PGEN_DIR)
	$(CC) -c $< -o $@ $(CFLAGS) $(LDFLAGS) $(PGEN)

$(PUSE_DIR)/%.o: %.c
	mkdir -p $(PUSE_DIR)
	$(CC) -c $< -o $@ $(CFLAGS) $(LDFLAGS) $(PUSE)

$(PGEN_BIN): $(PGEN_OBJS)
	$(CC) $^ -o $@ $(CFLAGS) $(LDFLAGS) $(PGEN)

$(PGEN_GCDAS): $(PGEN_BIN)
	./$(PGEN_BIN) 5
	echo Created $(PGEN_GCDAS)

$(PUSE_GCDAS): $(PGEN_GCDAS)
	mkdir -p $(PUSE_DIR)
	cp $^ $(PUSE_DIR)
	echo Created $(PUSE_GCDAS)

$(PUSE_BIN): $(PUSE_GCDAS) $(PUSE_OBJS)
	$(CC) $(PUSE_OBJS) -o $@ $(CFLAGS) $(LDFLAGS) $(PUSE)

%.o: %.c
	$(CC) -c $< -o $@ $(CFLAGS) $(LDFLAGS)

$(BIN): $(OBJS)
	$(CC) $^ -o $@ $(CFLAGS) $(LDFLAGS)

PASSDUMP=$(patsubst %.c,%.c.*,$(SRCS))
clean:
	rm -rf $(PGEN_DIR) $(PUSE_DIR)
	rm -rf *.o $(BIN)
	rm -rf $(PASSDUMP)
	rm -rf $(BIN).ltrans* $(BIN).wpa*
	rm -rf $(BIN).ltrans* $(BIN).wpa*
	rm -rf $(BIN).*.* $(BIN).*.*



More information about the Gcc-patches mailing list