[RFC] propagate malloc attribute in ipa-pure-const pass

Prathamesh Kulkarni prathamesh.kulkarni@linaro.org
Fri Oct 13 23:34:00 GMT 2017


On 7 October 2017 at 12:35, Prathamesh Kulkarni
<prathamesh.kulkarni@linaro.org> wrote:
> On 7 October 2017 at 11:23, Jan Hubicka <hubicka@ucw.cz> wrote:
>>> On 6 October 2017 at 06:04, Jan Hubicka <hubicka@ucw.cz> wrote:
>>> >> Hi Honza,
>>> >> Thanks for the detailed suggestions, I have updated the patch accordingly.
>>> >> I have following questions on call_summary:
>>> >> 1] I added field bool is_return_callee in ipa_call_summary to track
>>> >> whether the caller possibly returns value returned by callee, which
>>> >> gets rid of return_callees_map. I assume ipa_call_summary_t::remove()
>>> >> and ipa_call_summary_t::duplicate() will already take care of handling
>>> >> late insertion/removal of cgraph nodes ? I just initialized
>>> >> is_return_callee to false in ipa_call_summary::reset and that seems to
>>> >> work. I am not sure though if I have handled it correctly. Could you
>>> >> please check that ?
>>> >
>>> > I was actually thinking to introduce separate summary for ipa-pure-const pass,
>>> > but this seems fine to me too (for one bit definitly more effecient)
>>> > ipa_call_summary_t::duplicate copies all the fields, so indeed you should be
>>> > safe here.
>>> >
>>> > Also it is possible for functions to be inserted late.  Updating of call summaries
>>> > is currently handled by ipa_fn_summary_t::insert
>>> >>
>>> >> 2] ipa_inline() called ipa_free_fn_summary, which made
>>> >> ipa_call_summaries unavailable during ipa-pure-const pass. I removed
>>> >> call to ipa_free_fn_summary from ipa_inline, and moved it to
>>> >> ipa_pure_const::execute(). Is that OK ?
>>> >
>>> > Seems OK to me.
>>> >>
>>> >> Patch passes bootstrap+test and lto bootstrap+test on x86_64-unknown-linux-gnu.
>>> >> Verfiied SPEC2k6 compiles and runs without miscompares with LTO
>>> >> enabled on aarch64-linux-gnu.
>>> >> Cross-tested on arm*-*-* and aarch64*-*-*. I will additionally test
>>> >> the patch by building chromium or firefox.
>>> >> Would it be OK to commit if it passes above validations ?
>>> >>
>>> >> Thanks,
>>> >> Prathamesh
>>> >> >
>>> >> > Thanks,
>>> >> > Honza
>>> >
>>> >> 2017-10-05  Prathamesh Kulkarni  <prathamesh.kulkarni@linaro.org>
>>> >>
>>> >>       * cgraph.h (set_malloc_flag): Declare.
>>> >>       * cgraph.c (set_malloc_flag_1): New function.
>>> >>       (set_malloc_flag): Likewise.
>>> >>       * ipa-fnsummary.h (ipa_call_summary): Add new field is_return_callee.
>>> >>       * ipa-fnsummary.c (ipa_call_summary::reset): Set is_return_callee to
>>> >>       false.
>>> >>       (read_ipa_call_summary): Add support for reading is_return_callee.
>>> >>       (write_ipa_call_summary): Stream is_return_callee.
>>> >>       * ipa-inline.c (ipa_inline): Remove call to ipa_free_fn_summary.
>>> >>       * ipa-pure-const.c: Add headers ssa.h, alloc-pool.h, symbol-summary.h,
>>> >>       ipa-prop.h, ipa-fnsummary.h.
>>> >>       (malloc_state_e): Define.
>>> >>       (malloc_state_names): Define.
>>> >>       (funct_state_d): Add field malloc_state.
>>> >>       (varying_state): Set malloc_state to STATE_MALLOC_BOTTOM.
>>> >>       (check_retval_uses): New function.
>>> >>       (malloc_candidate_p): Likewise.
>>> >>       (analyze_function): Add support for malloc attribute.
>>> >>       (pure_const_write_summary): Stream malloc_state.
>>> >>       (pure_const_read_summary): Add support for reading malloc_state.
>>> >>       (dump_malloc_lattice): New function.
>>> >>       (propagate_malloc): New function.
>>> >>       (ipa_pure_const::execute): Call propagate_malloc and
>>> >>       ipa_free_fn_summary.
>>> >>       (pass_local_pure_const::execute): Add support for malloc attribute.
>>> >>       * ssa-iterators.h (RETURN_FROM_IMM_USE_STMT): New macro.
>>> >>
>>> >> testsuite/
>>> >>       * gcc.dg/ipa/propmalloc-1.c: New test-case.
>>> >>       * gcc.dg/ipa/propmalloc-2.c: Likewise.
>>> >>       * gcc.dg/ipa/propmalloc-3.c: Likewise.
>>> >>
>>> >> diff --git a/gcc/cgraph.c b/gcc/cgraph.c
>>> >> index 3d0cefbd46b..0aad90d59ea 100644
>>> >> --- a/gcc/cgraph.c
>>> >> +++ b/gcc/cgraph.c
>>> >> @@ -2530,6 +2530,53 @@ cgraph_node::set_nothrow_flag (bool nothrow)
>>> >>    return changed;
>>> >>  }
>>> >>
>>> >> +/* Worker to set malloc flag.  */
>>> > New line here I guess (it is below)
>>> >> +static void
>>> >> +set_malloc_flag_1 (cgraph_node *node, bool malloc_p, bool *changed)
>>> >> +{
>>> >> +  if (malloc_p && !DECL_IS_MALLOC (node->decl))
>>> >> +    {
>>> >> +      DECL_IS_MALLOC (node->decl) = true;
>>> >> +      *changed = true;
>>> >> +    }
>>> >> +
>>> >> +  ipa_ref *ref;
>>> >> +  FOR_EACH_ALIAS (node, ref)
>>> >> +    {
>>> >> +      cgraph_node *alias = dyn_cast<cgraph_node *> (ref->referring);
>>> >> +      if (!malloc_p || alias->get_availability () > AVAIL_INTERPOSABLE)
>>> >> +     set_malloc_flag_1 (alias, malloc_p, changed);
>>> >> +    }
>>> >> +
>>> >> +  for (cgraph_edge *e = node->callers; e; e = e->next_caller)
>>> >> +    if (e->caller->thunk.thunk_p
>>> >> +     && (!malloc_p || e->caller->get_availability () > AVAIL_INTERPOSABLE))
>>> >> +      set_malloc_flag_1 (e->caller, malloc_p, changed);
>>> >> +}
>>> >> +
>>> >> +/* Set DECL_IS_MALLOC on NODE's decl and on NODE's aliases if any.  */
>>> >> +
>>> >> +bool
>>> >> +cgraph_node::set_malloc_flag (bool malloc_p)
>>> >> +{
>>> >> +  bool changed = false;
>>> >> +
>>> >> +  if (!malloc_p || get_availability () > AVAIL_INTERPOSABLE)
>>> >> +    set_malloc_flag_1 (this, malloc_p, &changed);
>>> >> +  else
>>> >> +    {
>>> >> +      ipa_ref *ref;
>>> >> +
>>> >> +      FOR_EACH_ALIAS (this, ref)
>>> >> +     {
>>> >> +       cgraph_node *alias = dyn_cast<cgraph_node *> (ref->referring);
>>> >> +       if (!malloc_p || alias->get_availability () > AVAIL_INTERPOSABLE)
>>> >> +         set_malloc_flag_1 (alias, malloc_p, &changed);
>>> >> +     }
>>> >> +    }
>>> >> +  return changed;
>>> >> +}
>>> >> +
>>> >> diff --git a/gcc/ipa-fnsummary.h b/gcc/ipa-fnsummary.h
>>> >> index f50d6806e61..613a2b6f625 100644
>>> >> --- a/gcc/ipa-fnsummary.h
>>> >> +++ b/gcc/ipa-fnsummary.h
>>> >> @@ -197,7 +197,9 @@ struct ipa_call_summary
>>> >>    int call_stmt_time;
>>> >>    /* Depth of loop nest, 0 means no nesting.  */
>>> >>    unsigned int loop_depth;
>>> >> -
>>> >> +  /* Indicates whether the caller returns the value of it's callee.  */
>>> > Perhaps add a comment when it is initialized.
>>> > Don't you also check that the calle is not captured? In that case I would
>>> > is_return_callee_uncaptured or so, so someone does not try to use it with
>>> > different meaning.
>>> Sorry, I didn't understand "Don't you also check that callee is not captured ?"
>>> What does capturing mean in this context ?
>>
>> Don't you need to know that the returned pointer is new and does not alias
>> with something else?
> Yes, malloc_candidate_p enforces that locally by checking the returned
> pointer is return value of callee and optionally has immediate uses
> only within comparisons against 0. But whether the returned pointer is
> new depends on whether callee itself can be annotated with malloc,
> which is done in propagate_malloc.
> IIUC pointer returned by a malloc annotated function, does not alias
> anything else ?
>
> Thanks,
> Prathamesh
>>
>> Honza
>>>
>>> Thanks,
>>> Prathamesh
>>> >
>>> >> @@ -69,6 +74,15 @@ enum pure_const_state_e
>>> >>
>>> >>  const char *pure_const_names[3] = {"const", "pure", "neither"};
>>> >>
>>> >> +enum malloc_state_e
>>> >> +{
>>> >> +  STATE_MALLOC_TOP,
>>> >> +  STATE_MALLOC,
>>> >> +  STATE_MALLOC_BOTTOM
>>> >> +};
>>> >> +
>>> >> +const char *malloc_state_names[] = {"malloc_top", "malloc", "malloc_bottom"};
>>> >
>>> > Seems static is missing in all those declarations, please fix it.
Hi Honza,
I have done the suggested changes in this version.
LTO Bootstrapped+tested on x86_64-unknown-linux-gnu, ppc64le-linux-gnu.
Cross-tested on arm*-*-*, aarch64*-*-*
Verified no functional regressions with SPEC2006.
Would it be OK to commit this version of the patch ?

Thanks,
Prathamesh
>>> >
>>> > OK with these changes. Thanks!
>>> >
>>> > Honza
-------------- next part --------------
2017-10-13  Prathamesh Kulkarni  <prathamesh.kulkarni@linaro.org>

	* cgraph.h (set_malloc_flag): Declare.
	* cgraph.c (set_malloc_flag_1): New function.
	(set_malloc_flag): Likewise.
	* ipa-fnsummary.h (ipa_call_summary): Add new field is_return_callee.
	* ipa-fnsummary.c (ipa_call_summary::reset): Set is_return_callee to
	false.
	(read_ipa_call_summary): Add support for reading is_return_callee.
	(write_ipa_call_summary): Stream is_return_callee.
	* ipa-inline.c (ipa_inline): Remove call to ipa_free_fn_summary.
	* ipa-pure-const.c: Add headers ssa.h, alloc-pool.h, symbol-summary.h,
	ipa-prop.h, ipa-fnsummary.h.
	(pure_const_names): Change to static.
	(malloc_state_e): Define.
	(malloc_state_names): Define.
	(funct_state_d): Add field malloc_state.
	(varying_state): Set malloc_state to STATE_MALLOC_BOTTOM.
	(check_retval_uses): New function.
	(malloc_candidate_p): Likewise.
	(analyze_function): Add support for malloc attribute.
	(pure_const_write_summary): Stream malloc_state.
	(pure_const_read_summary): Add support for reading malloc_state.
	(dump_malloc_lattice): New function.
	(propagate_malloc): New function.
	(ipa_pure_const::execute): Call propagate_malloc and
	ipa_free_fn_summary.
	(pass_local_pure_const::execute): Add support for malloc attribute.
	* ssa-iterators.h (RETURN_FROM_IMM_USE_STMT): New macro.

testsuite/
	* gcc.dg/ipa/propmalloc-1.c: New test-case.
	* gcc.dg/ipa/propmalloc-2.c: Likewise.
	* gcc.dg/ipa/propmalloc-3.c: Likewise.

diff --git a/gcc/cgraph.c b/gcc/cgraph.c
index 3d0cefbd46b..0aad90d59ea 100644
--- a/gcc/cgraph.c
+++ b/gcc/cgraph.c
@@ -2530,6 +2530,53 @@ cgraph_node::set_nothrow_flag (bool nothrow)
   return changed;
 }
 
+/* Worker to set malloc flag.  */
+static void
+set_malloc_flag_1 (cgraph_node *node, bool malloc_p, bool *changed)
+{
+  if (malloc_p && !DECL_IS_MALLOC (node->decl))
+    {
+      DECL_IS_MALLOC (node->decl) = true;
+      *changed = true;
+    }
+
+  ipa_ref *ref;
+  FOR_EACH_ALIAS (node, ref)
+    {
+      cgraph_node *alias = dyn_cast<cgraph_node *> (ref->referring);
+      if (!malloc_p || alias->get_availability () > AVAIL_INTERPOSABLE)
+	set_malloc_flag_1 (alias, malloc_p, changed);
+    }
+
+  for (cgraph_edge *e = node->callers; e; e = e->next_caller)
+    if (e->caller->thunk.thunk_p
+	&& (!malloc_p || e->caller->get_availability () > AVAIL_INTERPOSABLE))
+      set_malloc_flag_1 (e->caller, malloc_p, changed);
+}
+
+/* Set DECL_IS_MALLOC on NODE's decl and on NODE's aliases if any.  */
+
+bool
+cgraph_node::set_malloc_flag (bool malloc_p)
+{
+  bool changed = false;
+
+  if (!malloc_p || get_availability () > AVAIL_INTERPOSABLE)
+    set_malloc_flag_1 (this, malloc_p, &changed);
+  else
+    {
+      ipa_ref *ref;
+
+      FOR_EACH_ALIAS (this, ref)
+	{
+	  cgraph_node *alias = dyn_cast<cgraph_node *> (ref->referring);
+	  if (!malloc_p || alias->get_availability () > AVAIL_INTERPOSABLE)
+	    set_malloc_flag_1 (alias, malloc_p, &changed);
+	}
+    }
+  return changed;
+}
+
 /* Worker to set_const_flag.  */
 
 static void
diff --git a/gcc/cgraph.h b/gcc/cgraph.h
index 1758e8b08c1..84824e9f814 100644
--- a/gcc/cgraph.h
+++ b/gcc/cgraph.h
@@ -1151,6 +1151,10 @@ public:
      if any to NOTHROW.  */
   bool set_nothrow_flag (bool nothrow);
 
+  /* SET DECL_IS_MALLOC on cgraph_node's decl and on aliases of the node
+     if any.  */
+  bool set_malloc_flag (bool malloc_p);
+
   /* If SET_CONST is true, mark function, aliases and thunks to be ECF_CONST.
     If SET_CONST if false, clear the flag.
 
diff --git a/gcc/ipa-fnsummary.c b/gcc/ipa-fnsummary.c
index 076ccd40bd7..f71338e424e 100644
--- a/gcc/ipa-fnsummary.c
+++ b/gcc/ipa-fnsummary.c
@@ -542,6 +542,7 @@ void
 ipa_call_summary::reset ()
 {
   call_stmt_size = call_stmt_time = 0;
+  is_return_callee_uncaptured = false;
   if (predicate)
     edge_predicate_pool.remove (predicate);
   predicate = NULL;
@@ -3204,6 +3205,10 @@ read_ipa_call_summary (struct lto_input_block *ib, struct cgraph_edge *e)
   es->call_stmt_size = streamer_read_uhwi (ib);
   es->call_stmt_time = streamer_read_uhwi (ib);
   es->loop_depth = streamer_read_uhwi (ib);
+
+  bitpack_d bp = streamer_read_bitpack (ib);
+  es->is_return_callee_uncaptured = bp_unpack_value (&bp, 1);
+
   p.stream_in (ib);
   edge_set_predicate (e, &p);
   length = streamer_read_uhwi (ib);
@@ -3360,6 +3365,11 @@ write_ipa_call_summary (struct output_block *ob, struct cgraph_edge *e)
   streamer_write_uhwi (ob, es->call_stmt_size);
   streamer_write_uhwi (ob, es->call_stmt_time);
   streamer_write_uhwi (ob, es->loop_depth);
+
+  bitpack_d bp = bitpack_create (ob->main_stream);
+  bp_pack_value (&bp, es->is_return_callee_uncaptured, 1);
+  streamer_write_bitpack (&bp);
+
   if (es->predicate)
     es->predicate->stream_out (ob);
   else
diff --git a/gcc/ipa-fnsummary.h b/gcc/ipa-fnsummary.h
index f50d6806e61..a794bd09318 100644
--- a/gcc/ipa-fnsummary.h
+++ b/gcc/ipa-fnsummary.h
@@ -197,7 +197,9 @@ struct ipa_call_summary
   int call_stmt_time;
   /* Depth of loop nest, 0 means no nesting.  */
   unsigned int loop_depth;
-  
+  /* Indicates whether the caller returns the value of it's callee.  */
+  bool is_return_callee_uncaptured;
+
   /* Keep all field empty so summary dumping works during its computation.
      This is useful for debugging.  */
   ipa_call_summary ()
diff --git a/gcc/ipa-inline.c b/gcc/ipa-inline.c
index dd46cb61362..b8e65e2fa7e 100644
--- a/gcc/ipa-inline.c
+++ b/gcc/ipa-inline.c
@@ -2526,9 +2526,6 @@ ipa_inline (void)
 
   if (dump_file)
     ipa_dump_fn_summaries (dump_file);
-  /* In WPA we use inline summaries for partitioning process.  */
-  if (!flag_wpa)
-    ipa_free_fn_summary ();
   return remove_functions ? TODO_remove_functions : 0;
 }
 
diff --git a/gcc/ipa-pure-const.c b/gcc/ipa-pure-const.c
index dac8f0d5f21..a6bca660036 100644
--- a/gcc/ipa-pure-const.c
+++ b/gcc/ipa-pure-const.c
@@ -56,6 +56,11 @@ along with GCC; see the file COPYING3.  If not see
 #include "tree-scalar-evolution.h"
 #include "intl.h"
 #include "opts.h"
+#include "ssa.h"
+#include "alloc-pool.h"
+#include "symbol-summary.h"
+#include "ipa-prop.h"
+#include "ipa-fnsummary.h"
 
 /* Lattice values for const and pure functions.  Everything starts out
    being const, then may drop to pure and then neither depending on
@@ -67,7 +72,16 @@ enum pure_const_state_e
   IPA_NEITHER
 };
 
-const char *pure_const_names[3] = {"const", "pure", "neither"};
+static const char *pure_const_names[3] = {"const", "pure", "neither"};
+
+enum malloc_state_e
+{
+  STATE_MALLOC_TOP,
+  STATE_MALLOC,
+  STATE_MALLOC_BOTTOM
+};
+
+static const char *malloc_state_names[] = {"malloc_top", "malloc", "malloc_bottom"};
 
 /* Holder for the const_state.  There is one of these per function
    decl.  */
@@ -92,11 +106,13 @@ struct funct_state_d
   /* If function can call free, munmap or otherwise make previously
      non-trapping memory accesses trapping.  */
   bool can_free;
+
+  enum malloc_state_e malloc_state;
 };
 
 /* State used when we know nothing about function.  */
 static struct funct_state_d varying_state
-   = { IPA_NEITHER, IPA_NEITHER, true, true, true, true };
+   = { IPA_NEITHER, IPA_NEITHER, true, true, true, true, STATE_MALLOC_BOTTOM };
 
 
 typedef struct funct_state_d * funct_state;
@@ -812,6 +828,149 @@ check_stmt (gimple_stmt_iterator *gsip, funct_state local, bool ipa)
     }
 }
 
+/* Check that RETVAL is used only in STMT and in comparisons against 0.
+   RETVAL is return value of the function and STMT is return stmt.  */
+
+static bool
+check_retval_uses (tree retval, gimple *stmt)
+{
+  imm_use_iterator use_iter;
+  gimple *use_stmt;
+
+  FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, retval)
+    if (gcond *cond = dyn_cast<gcond *> (use_stmt))
+      {
+	tree op2 = gimple_cond_rhs (cond);
+	if (!integer_zerop (op2))
+	  RETURN_FROM_IMM_USE_STMT (use_iter, false);
+      }
+    else if (gassign *ga = dyn_cast<gassign *> (use_stmt))
+      {
+	enum tree_code code = gimple_assign_rhs_code (ga);
+	if (TREE_CODE_CLASS (code) != tcc_comparison)
+	  RETURN_FROM_IMM_USE_STMT (use_iter, false);
+	if (!integer_zerop (gimple_assign_rhs2 (ga)))
+	  RETURN_FROM_IMM_USE_STMT (use_iter, false);
+      }
+    else if (is_gimple_debug (use_stmt))
+      ;
+    else if (use_stmt != stmt)
+      RETURN_FROM_IMM_USE_STMT (use_iter, false);
+
+  return true;
+}
+
+/* malloc_candidate_p() checks if FUN can possibly be annotated with malloc
+   attribute. Currently this function does a very conservative analysis.
+   FUN is considered to be a candidate if
+   1) It returns a value of pointer type.
+   2) SSA_NAME_DEF_STMT (return_value) is either a function call or
+      a phi, and element of phi is either NULL or
+      SSA_NAME_DEF_STMT(element) is function call.
+   3) The return-value has immediate uses only within comparisons (gcond or gassign)
+      and return_stmt (and likewise a phi arg has immediate use only within comparison
+      or the phi stmt).  */
+
+static bool
+malloc_candidate_p (function *fun, bool ipa)
+{
+  basic_block exit_block = EXIT_BLOCK_PTR_FOR_FN (fun);
+  edge e;
+  edge_iterator ei;
+  cgraph_node *node = cgraph_node::get_create (fun->decl);
+
+#define DUMP_AND_RETURN(reason)  \
+{  \
+  if (dump_file && (dump_flags & TDF_DETAILS))  \
+    fprintf (dump_file, "%s", (reason));  \
+  return false;  \
+}
+
+  if (EDGE_COUNT (exit_block->preds) == 0)
+    return false;
+
+  FOR_EACH_EDGE (e, ei, exit_block->preds)
+    {
+      gimple_stmt_iterator gsi = gsi_last_bb (e->src);
+      greturn *ret_stmt = dyn_cast<greturn *> (gsi_stmt (gsi));
+
+      if (!ret_stmt)
+	return false;
+
+      tree retval = gimple_return_retval (ret_stmt);
+      if (!retval)
+	DUMP_AND_RETURN("No return value.")
+
+      if (TREE_CODE (retval) != SSA_NAME
+	  || TREE_CODE (TREE_TYPE (retval)) != POINTER_TYPE)
+	DUMP_AND_RETURN("Return value is not SSA_NAME or not a pointer type.")
+
+      if (!check_retval_uses (retval, ret_stmt))
+	DUMP_AND_RETURN("Return value has uses outside return stmt"
+			" and comparisons against 0.")
+
+      gimple *def = SSA_NAME_DEF_STMT (retval);
+      if (gcall *call_stmt = dyn_cast<gcall *> (def))
+	{
+	  tree callee_decl = gimple_call_fndecl (call_stmt);
+	  if (!callee_decl)
+	    return false;
+
+	  if (!ipa && !DECL_IS_MALLOC (callee_decl))
+	    DUMP_AND_RETURN("callee_decl does not have malloc attribute for"
+			    " non-ipa mode.")
+
+	  cgraph_edge *cs = node->get_edge (call_stmt);
+	  if (cs)
+	    {
+	      ipa_call_summary *es = ipa_call_summaries->get (cs);
+	      gcc_assert (es);
+	      es->is_return_callee_uncaptured = true;
+	    }
+	}
+
+      else if (gphi *phi = dyn_cast<gphi *> (def))
+	for (unsigned i = 0; i < gimple_phi_num_args (phi); ++i)
+	  {
+	    tree arg = gimple_phi_arg_def (phi, i);
+	    if (TREE_CODE (arg) != SSA_NAME)
+	      DUMP_AND_RETURN("phi arg is not SSA_NAME.")
+	    if (!(arg == null_pointer_node || check_retval_uses (arg, phi)))
+	      DUMP_AND_RETURN("phi arg has uses outside phi"
+			      " and comparisons against 0.")
+
+	    gimple *arg_def = SSA_NAME_DEF_STMT (arg);
+	    gcall *call_stmt = dyn_cast<gcall *> (arg_def);
+	    if (!call_stmt)
+	      return false;
+	    tree callee_decl = gimple_call_fndecl (call_stmt);
+	    if (!callee_decl)
+	      return false;
+	    if (!ipa && !DECL_IS_MALLOC (callee_decl))
+	      DUMP_AND_RETURN("callee_decl does not have malloc attribute for"
+			      " non-ipa mode.")
+
+	    cgraph_edge *cs = node->get_edge (call_stmt);
+	    if (cs)
+	      {
+		ipa_call_summary *es = ipa_call_summaries->get (cs);
+		gcc_assert (es);
+		es->is_return_callee_uncaptured = true;
+	      }
+	  }
+
+      else
+	DUMP_AND_RETURN("def_stmt of return value is not a call or phi-stmt.")
+    }
+
+  if (dump_file && (dump_flags & TDF_DETAILS))
+    fprintf (dump_file, "\nFound %s to be candidate for malloc attribute\n",
+	     IDENTIFIER_POINTER (DECL_NAME (fun->decl)));
+  return true;
+
+#undef DUMP_AND_RETURN
+}
+
 
 /* This is the main routine for finding the reference patterns for
    global variables within a function FN.  */
@@ -921,6 +1080,14 @@ end:
   if (TREE_NOTHROW (decl))
     l->can_throw = false;
 
+  l->malloc_state = STATE_MALLOC_BOTTOM;
+  if (DECL_IS_MALLOC (decl))
+    l->malloc_state = STATE_MALLOC;
+  else if (ipa && malloc_candidate_p (DECL_STRUCT_FUNCTION (decl), true))
+    l->malloc_state = STATE_MALLOC_TOP;
+  else if (malloc_candidate_p (DECL_STRUCT_FUNCTION (decl), false))
+    l->malloc_state = STATE_MALLOC;
+
   pop_cfun ();
   if (dump_file)
     {
@@ -934,6 +1101,8 @@ end:
         fprintf (dump_file, "Function is locally pure.\n");
       if (l->can_free)
 	fprintf (dump_file, "Function can locally free.\n");
+      if (l->malloc_state == STATE_MALLOC)
+	fprintf (dump_file, "Function is locally malloc.\n");
     }
   return l;
 }
@@ -1067,6 +1236,7 @@ pure_const_write_summary (void)
 	  bp_pack_value (&bp, fs->looping, 1);
 	  bp_pack_value (&bp, fs->can_throw, 1);
 	  bp_pack_value (&bp, fs->can_free, 1);
+	  bp_pack_value (&bp, fs->malloc_state, 2);
 	  streamer_write_bitpack (&bp);
 	}
     }
@@ -1127,6 +1297,9 @@ pure_const_read_summary (void)
 	      fs->looping = bp_unpack_value (&bp, 1);
 	      fs->can_throw = bp_unpack_value (&bp, 1);
 	      fs->can_free = bp_unpack_value (&bp, 1);
+	      fs->malloc_state
+			= (enum malloc_state_e) bp_unpack_value (&bp, 2);
+
 	      if (dump_file)
 		{
 		  int flags = flags_from_decl_or_type (node->decl);
@@ -1149,6 +1322,8 @@ pure_const_read_summary (void)
 		    fprintf (dump_file,"  function is locally throwing\n");
 		  if (fs->can_free)
 		    fprintf (dump_file,"  function can locally free\n");
+		  fprintf (dump_file, "\n malloc state: %s\n",
+			   malloc_state_names[fs->malloc_state]);
 		}
 	    }
 
@@ -1659,6 +1834,127 @@ propagate_nothrow (void)
   free (order);
 }
 
+/* Debugging function to dump state of malloc lattice.  */
+
+DEBUG_FUNCTION
+static void
+dump_malloc_lattice (FILE *dump_file, const char *s)
+{
+  if (!dump_file)
+    return;
+
+  fprintf (dump_file, "\n\nMALLOC LATTICE %s:\n", s);
+  cgraph_node *node;
+  FOR_EACH_FUNCTION (node)
+    {
+      funct_state fs = get_function_state (node);
+      malloc_state_e state = fs->malloc_state;
+      fprintf (dump_file, "%s: %s\n", node->name (), malloc_state_names[state]);
+    }
+}
+
+/* Propagate malloc attribute across the callgraph.  */
+
+static void
+propagate_malloc (void)
+{
+  cgraph_node *node;
+  FOR_EACH_FUNCTION (node)
+    {
+      if (DECL_IS_MALLOC (node->decl))
+	if (!has_function_state (node))
+	  {
+	    funct_state l = XCNEW (struct funct_state_d);
+	    *l = varying_state;
+	    l->malloc_state = STATE_MALLOC;
+	    set_function_state (node, l);
+	  }
+    }
+
+  dump_malloc_lattice (dump_file, "Initial");
+  struct cgraph_node **order
+    = XNEWVEC (struct cgraph_node *, symtab->cgraph_count);
+  int order_pos = ipa_reverse_postorder (order);
+  bool changed = true;
+
+  while (changed)
+    {
+      changed = false;
+      /* Walk in postorder.  */
+      for (int i = order_pos - 1; i >= 0; --i)
+	{
+	  cgraph_node *node = order[i];
+	  if (node->alias
+	      || !node->definition
+	      || !has_function_state (node))
+	    continue;
+
+	  funct_state l = get_function_state (node);
+
+	  /* FIXME: add support for indirect-calls.  */
+	  if (node->indirect_calls)
+	    {
+	      l->malloc_state = STATE_MALLOC_BOTTOM;
+	      continue;
+	    }
+
+	  if (node->get_availability () <= AVAIL_INTERPOSABLE)
+	    {
+	      l->malloc_state = STATE_MALLOC_BOTTOM;
+	      continue;
+	    }
+
+	  if (l->malloc_state == STATE_MALLOC_BOTTOM)
+	    continue;
+
+	  vec<cgraph_node *> callees = vNULL;
+	  for (cgraph_edge *cs = node->callees; cs; cs = cs->next_callee)
+	    {
+	      ipa_call_summary *es = ipa_call_summaries->get (cs);
+	      if (es && es->is_return_callee_uncaptured)
+		callees.safe_push (cs->callee);
+	    }
+
+	  malloc_state_e new_state = l->malloc_state;
+	  for (unsigned j = 0; j < callees.length (); j++)
+	    {
+	      cgraph_node *callee = callees[j];
+	      if (!has_function_state (callee))
+		{
+		  new_state = STATE_MALLOC_BOTTOM;
+		  break;
+		}
+	      malloc_state_e callee_state = get_function_state (callee)->malloc_state;
+	      if (new_state < callee_state)
+		new_state = callee_state;
+	    }
+	  if (new_state != l->malloc_state)
+	    {
+	      changed = true;
+	      l->malloc_state = new_state;
+	    }
+	}
+    }
+
+  FOR_EACH_DEFINED_FUNCTION (node)
+    if (has_function_state (node))
+      {
+	funct_state l = get_function_state (node);
+	if (!node->alias
+	    && l->malloc_state == STATE_MALLOC
+	    && !node->global.inlined_to)
+	  {
+	    if (dump_file && (dump_flags & TDF_DETAILS))
+	      fprintf (dump_file, "Function %s found to be malloc\n",
+		       node->name ());
+	    node->set_malloc_flag (true);
+	  }
+      }
+
+  dump_malloc_lattice (dump_file, "after propagation");
+  ipa_free_postorder_info ();
+  free (order);
+}
 
 /* Produce the global information by preforming a transitive closure
    on the local information that was produced by generate_summary.  */
@@ -1677,6 +1973,7 @@ execute (function *)
   /* Nothrow makes more function to not lead to return and improve
      later analysis.  */
   propagate_nothrow ();
+  propagate_malloc ();
   remove_p = propagate_pure_const ();
 
   /* Cleanup. */
@@ -1684,6 +1981,10 @@ execute (function *)
     if (has_function_state (node))
       free (get_function_state (node));
   funct_state_vec.release ();
+
+  /* In WPA we use inline summaries for partitioning process.  */
+  if (!flag_wpa)
+    ipa_free_fn_summary ();
   return remove_p ? TODO_remove_functions : 0;
 }
 
@@ -1877,6 +2178,17 @@ pass_local_pure_const::execute (function *fun)
 	fprintf (dump_file, "Function found to be nothrow: %s\n",
 		 current_function_name ());
     }
+
+  if (l->malloc_state == STATE_MALLOC
+      && !DECL_IS_MALLOC (current_function_decl))
+    {
+      node->set_malloc_flag (true);
+      changed = true;
+      if (dump_file)
+	fprintf (dump_file, "Function found to be malloc: %s\n",
+		 node->name ());
+    }
+
   free (l);
   if (changed)
     return execute_fixup_cfg ();
diff --git a/gcc/ssa-iterators.h b/gcc/ssa-iterators.h
index c8aa77bd4f3..740cbf13cb2 100644
--- a/gcc/ssa-iterators.h
+++ b/gcc/ssa-iterators.h
@@ -93,6 +93,12 @@ struct imm_use_iterator
      break;							\
    }
 
+/* Similarly for return.  */
+#define RETURN_FROM_IMM_USE_STMT(ITER, VAL)			\
+  {								\
+    end_imm_use_stmt_traverse (&(ITER));			\
+    return (VAL);						\
+  }
 
 /* Use this iterator in combination with FOR_EACH_IMM_USE_STMT to
    get access to each occurrence of ssavar on the stmt returned by
diff --git a/gcc/testsuite/gcc.dg/ipa/propmalloc-1.c b/gcc/testsuite/gcc.dg/ipa/propmalloc-1.c
new file mode 100644
index 00000000000..9a95f817079
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/ipa/propmalloc-1.c
@@ -0,0 +1,21 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-ipa-pure-const-details" } */
+
+__attribute__((noinline, no_icf, used))
+static void *f(__SIZE_TYPE__ n)
+{
+  void *p = __builtin_malloc (n);
+  if (p == 0)
+    __builtin_abort ();
+  return p;
+}
+
+__attribute__((noinline, no_icf, used))
+static void *bar(__SIZE_TYPE__ n)
+{
+  void *p = f (n);
+  return p;
+}
+
+/* { dg-final { scan-ipa-dump "Function f found to be malloc" "pure-const" } } */
+/* { dg-final { scan-ipa-dump "Function bar found to be malloc" "pure-const" } } */
diff --git a/gcc/testsuite/gcc.dg/ipa/propmalloc-2.c b/gcc/testsuite/gcc.dg/ipa/propmalloc-2.c
new file mode 100644
index 00000000000..95b2fd74a7a
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/ipa/propmalloc-2.c
@@ -0,0 +1,23 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-ipa-pure-const-details" } */
+
+__attribute__((noinline, used, no_icf))
+static void *foo (__SIZE_TYPE__ n)
+{
+  return __builtin_malloc (n * 10);
+}
+
+__attribute__((noinline, used, no_icf))
+static void *bar(__SIZE_TYPE__ n, int cond)
+{
+  void *p;
+  if (cond)
+    p = foo (n);
+  else
+    p = __builtin_malloc (n);
+
+  return p;
+}
+
+/* { dg-final { scan-ipa-dump "Function foo found to be malloc" "pure-const" } } */
+/* { dg-final { scan-ipa-dump "Function bar found to be malloc" "pure-const" } } */
diff --git a/gcc/testsuite/gcc.dg/ipa/propmalloc-3.c b/gcc/testsuite/gcc.dg/ipa/propmalloc-3.c
new file mode 100644
index 00000000000..13558ddd07d
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/ipa/propmalloc-3.c
@@ -0,0 +1,24 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-ipa-pure-const-details" } */
+
+static void *foo(__SIZE_TYPE__, int) __attribute__((noinline, no_icf, used));
+
+__attribute__((noinline, used, no_icf))
+static void *bar(__SIZE_TYPE__ n, int m)
+{
+  return foo (n, m);
+}
+
+static void *foo(__SIZE_TYPE__ n, int m)
+{
+  void *p;
+  if (m > 0)
+    p = bar (n, --m);
+  else
+    p = __builtin_malloc (n);
+
+  return p;
+}
+
+/* { dg-final { scan-ipa-dump "Function foo found to be malloc" "pure-const" } } */
+/* { dg-final { scan-ipa-dump "Function bar found to be malloc" "pure-const" } } */


More information about the Gcc-patches mailing list