[RFC] replace malloc with a decl on the stack

Marc Glisse marc.glisse@inria.fr
Sun Nov 24 22:01:00 GMT 2013


On Sun, 24 Nov 2013, Florian Weimer wrote:

> On 11/12/2013 04:22 PM, Marc Glisse wrote:
>> On Tue, 12 Nov 2013, Ondøej Bílka wrote:
>> 
>>> Anyway you need a better analysis to determine if user called realloc on
>>> converted pointer.
>> 
>> Note that I am checking if the argument of free is the same as the
>> return value of malloc by looking at the SSA_NAME, not the C variable.
>> If the user called realloc on this value and then calls free on the same
>> value on all paths (including those where realloc returned a different
>> value), that's a broken program to start with. But indeed my current
>> patch fails on:
>> 
>> p=malloc(12);
>> q=realloc(p,16);
>> if(p==q)free(p);
>> exit(0);
>> 
>> because I (wrongly apparently) considered exit as safe as free :-(
>
> Strictly speaking, this code is equivalent to:
>
> p=malloc(12);
> q=realloc(p,16);
> free(p);
> exit(0);
>
> So I don't see how your optimization breaks it if it is applied to it. :)
>
> p==q is undefined if the realloc call returned a different pointer. 
> Past-realloc pointer uses are fairly common, though, and often used to adjust 
> interior pointers.  Empirically, this seem to work, although I wouldn't be 
> surprised if we already have optimizations which make past-realloc pointer 
> use undefined.
>
> (I don't think C provides a safe way to update interior pointers in place, 
> FWIW.)

Hmm, true, though I wouldn't feel very comfortable breaking code that 
checks if the return value of realloc differs from its argument (I may 
have used this myself once or twice, and I am not aware of existing 
optimizations that would break it).

I can also change the example to this more valid code:

p=malloc(12);
if(whatever)
   free(p);
else
   {
     q=copy(p); // copy just returns its argument
     free(q);
     exit(0);
   }

where if I considered exit as safe, I would turn it into:

unsigned char tab[12]__attribute__((aligned(16)));
p=tab;
if(!whatever)
   {
     q=copy(p);
     free(q);
     exit(0);
   }

which is clearly wrong. __builtin_unreachable might still be ok though.

To counter this, it may help to maintain a property of whether the program 
may have freed a possibly aliasing pointer sometime between the relevant 
malloc and here.

Other painful cases:

p=malloc(12);
secretly_free(p);
while(true){...} // infinite loop
free(p);

which I would turn into:

p=tab;
secretly_free(p);
while(true){...}

which would crash before even starting the infinite loop.

Raising a signal (say by dereferencing a null pointer) and catching it 
elsewhere is another way to avoid reaching the call to free that I am 
using to prove the transformation is ok.

I am attaching a more recent version of the patch that shouldn't ICE as 
much.

-- 
Marc Glisse
-------------- next part --------------
Index: gcc/c-family/c-common.c
===================================================================
--- gcc/c-family/c-common.c	(revision 205333)
+++ gcc/c-family/c-common.c	(working copy)
@@ -371,20 +371,21 @@ static tree handle_warn_unused_result_at
 static tree handle_sentinel_attribute (tree *, tree, tree, int, bool *);
 static tree handle_type_generic_attribute (tree *, tree, tree, int, bool *);
 static tree handle_alloc_size_attribute (tree *, tree, tree, int, bool *);
 static tree handle_target_attribute (tree *, tree, tree, int, bool *);
 static tree handle_optimize_attribute (tree *, tree, tree, int, bool *);
 static tree ignore_attribute (tree *, tree, tree, int, bool *);
 static tree handle_no_split_stack_attribute (tree *, tree, tree, int, bool *);
 static tree handle_fnspec_attribute (tree *, tree, tree, int, bool *);
 static tree handle_warn_unused_attribute (tree *, tree, tree, int, bool *);
 static tree handle_returns_nonnull_attribute (tree *, tree, tree, int, bool *);
+static tree handle_attribute_default (tree *, tree, tree, int, bool *);
 static tree handle_omp_declare_simd_attribute (tree *, tree, tree, int,
 					       bool *);
 static tree handle_omp_declare_target_attribute (tree *, tree, tree, int,
 						 bool *);
 static tree handle_bnd_variable_size_attribute (tree *, tree, tree, int, bool *);
 static tree handle_bnd_legacy (tree *, tree, tree, int, bool *);
 
 static void check_function_nonnull (tree, int, tree *);
 static void check_nonnull_arg (void *, tree, unsigned HOST_WIDE_INT);
 static bool nonnull_check_p (tree, unsigned HOST_WIDE_INT);
@@ -760,20 +761,22 @@ const struct attribute_spec c_common_att
   { "no_split_stack",	      0, 0, true,  false, false,
 			      handle_no_split_stack_attribute, false },
   /* For internal use (marking of builtins and runtime functions) only.
      The name contains space to prevent its usage in source code.  */
   { "fn spec",	 	      1, 1, false, true, true,
 			      handle_fnspec_attribute, false },
   { "warn_unused",            0, 0, false, false, false,
 			      handle_warn_unused_attribute, false },
   { "returns_nonnull",        0, 0, false, true, true,
 			      handle_returns_nonnull_attribute, false },
+  { "returns",		      0, 0, false, true, true,
+			      handle_attribute_default, false },
   { "omp declare simd",       0, -1, true,  false, false,
 			      handle_omp_declare_simd_attribute, false },
   { "omp declare target",     0, 0, true, false, false,
 			      handle_omp_declare_target_attribute, false },
   { "bnd_variable_size",      0, 0, true,  false, false,
 			      handle_bnd_variable_size_attribute, false },
   { "bnd_legacy",             0, 0, true, false, false,
 			      handle_bnd_legacy, false },
   { NULL,                     0, 0, false, false, false, NULL, false }
 };
@@ -8127,20 +8130,28 @@ handle_returns_twice_attribute (tree *no
     DECL_IS_RETURNS_TWICE (*node) = 1;
   else
     {
       warning (OPT_Wattributes, "%qE attribute ignored", name);
       *no_add_attrs = true;
     }
 
   return NULL_TREE;
 }
 
+/* Accept any attribute; arguments as in struct attribute_spec.handler.  */
+
+static tree
+handle_attribute_default (tree *, tree, tree, int, bool *)
+{
+  return NULL_TREE;
+}
+
 /* Handle a "no_limit_stack" attribute; arguments as in
    struct attribute_spec.handler.  */
 
 static tree
 handle_no_limit_stack_attribute (tree *node, tree name,
 				 tree ARG_UNUSED (args),
 				 int ARG_UNUSED (flags),
 				 bool *no_add_attrs)
 {
   tree decl = *node;
Index: gcc/calls.c
===================================================================
--- gcc/calls.c	(revision 205333)
+++ gcc/calls.c	(working copy)
@@ -713,23 +713,26 @@ is_tm_builtin (const_tree fndecl)
     }
   return false;
 }
 
 /* Detect flags (function attributes) from the function decl or type node.  */
 
 int
 flags_from_decl_or_type (const_tree exp)
 {
   int flags = 0;
+  const_tree type = exp;
 
   if (DECL_P (exp))
     {
+      type = TREE_TYPE (exp);
+
       /* The function exp may have the `malloc' attribute.  */
       if (DECL_IS_MALLOC (exp))
 	flags |= ECF_MALLOC;
 
       /* The function exp may have the `returns_twice' attribute.  */
       if (DECL_IS_RETURNS_TWICE (exp))
 	flags |= ECF_RETURNS_TWICE;
 
       /* Process the pure and const attributes.  */
       if (TREE_READONLY (exp))
@@ -770,20 +773,28 @@ flags_from_decl_or_type (const_tree exp)
 	flags |= ECF_TM_PURE;
     }
 
   if (TREE_THIS_VOLATILE (exp))
     {
       flags |= ECF_NORETURN;
       if (flags & (ECF_CONST|ECF_PURE))
 	flags |= ECF_LOOPING_CONST_OR_PURE;
     }
 
+  /* Built-in functions are all either noreturn or returns.  */
+  if (DECL_P (exp) && DECL_BUILT_IN_CLASS (exp) == BUILT_IN_NORMAL
+      && !(flags & ECF_NORETURN))
+    flags |= ECF_RETURNS;
+  else if (TYPE_P (type)
+	   && lookup_attribute ("returns", TYPE_ATTRIBUTES (type)))
+    flags |= ECF_RETURNS;
+
   return flags;
 }
 
 /* Detect flags from a CALL_EXPR.  */
 
 int
 call_expr_flags (const_tree t)
 {
   int flags;
   tree decl = get_callee_fndecl (t);
Index: gcc/doc/extend.texi
===================================================================
--- gcc/doc/extend.texi	(revision 205333)
+++ gcc/doc/extend.texi	(working copy)
@@ -2157,21 +2157,21 @@ attribute specification inside double pa
 attributes are currently defined for functions on all targets:
 @code{aligned}, @code{alloc_size}, @code{noreturn},
 @code{returns_twice}, @code{noinline}, @code{noclone},
 @code{always_inline}, @code{flatten}, @code{pure}, @code{const},
 @code{nothrow}, @code{sentinel}, @code{format}, @code{format_arg},
 @code{no_instrument_function}, @code{no_split_stack},
 @code{section}, @code{constructor},
 @code{destructor}, @code{used}, @code{unused}, @code{deprecated},
 @code{weak}, @code{malloc}, @code{alias}, @code{ifunc},
 @code{warn_unused_result}, @code{nonnull},
-@code{returns_nonnull}, @code{gnu_inline},
+@code{returns_nonnull}, @code{returns}, @code{gnu_inline},
 @code{externally_visible}, @code{hot}, @code{cold}, @code{artificial},
 @code{no_sanitize_address}, @code{no_address_safety_analysis},
 @code{no_sanitize_undefined}, @code{bnd_legacy},
 @code{error} and @code{warning}.
 Several other attributes are defined for functions on particular
 target systems.  Other attributes, including @code{section} are
 supported for variables declarations (@pxref{Variable Attributes})
 and for types (@pxref{Type Attributes}).
 
 GCC plugins may provide their own attributes.
@@ -3380,20 +3380,27 @@ return value should be a non-null pointe
 
 @smallexample
 extern void *
 mymalloc (size_t len) __attribute__((returns_nonnull));
 @end smallexample
 
 @noindent
 lets the compiler optimize callers based on the knowledge
 that the return value will never be null.
 
+@item returns
+@cindex @code{returns} function attribute
+The @code{returns} attribute specifies that the execution will always
+leave the function, either via a return statement or an exception. In
+particular, the function must not contain an infinite loop, and it must
+not leave with @code{exit}, @code{longjmp}, or by raising a signal.
+
 @item noreturn
 @cindex @code{noreturn} function attribute
 A few standard library functions, such as @code{abort} and @code{exit},
 cannot return.  GCC knows this automatically.  Some programs define
 their own functions that never return.  You can declare them
 @code{noreturn} to tell the compiler this fact.  For example,
 
 @smallexample
 @group
 void fatal () __attribute__ ((noreturn));
Index: gcc/testsuite/g++.dg/tree-ssa/heapstack-1.C
===================================================================
--- gcc/testsuite/g++.dg/tree-ssa/heapstack-1.C	(revision 0)
+++ gcc/testsuite/g++.dg/tree-ssa/heapstack-1.C	(working copy)
@@ -0,0 +1,24 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-optimized" } */
+
+struct A {
+  void*p;
+  A(void*q) : p(q) {}
+  ~A(){ __builtin_free(p); }
+};
+void f(void*)__attribute__((__returns__));
+void h(void*)__attribute__((__returns__,__nothrow__));
+void g(){
+  void*p=__builtin_malloc(12);
+  A a(p);
+  f(p);
+}
+
+void i(){
+  void*p=__builtin_malloc(12);
+  h(p);
+  __builtin_free(p);
+}
+
+/* { dg-final { scan-tree-dump-not "malloc" "optimized" } } */
+/* { dg-final { cleanup-tree-dump "optimized" } } */

Property changes on: gcc/testsuite/g++.dg/tree-ssa/heapstack-1.C
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+Author Date Id Revision URL
\ No newline at end of property
Index: gcc/testsuite/g++.dg/tree-ssa/heapstack-2.C
===================================================================
--- gcc/testsuite/g++.dg/tree-ssa/heapstack-2.C	(revision 0)
+++ gcc/testsuite/g++.dg/tree-ssa/heapstack-2.C	(working copy)
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-optimized" } */
+
+void f(void*)__attribute__((__returns__));
+void g(){
+  void*p=__builtin_malloc(12);
+  f(p);
+  __builtin_free(p);
+}
+
+/* { dg-final { scan-tree-dump-times "malloc" 1 "optimized" } } */
+/* { dg-final { cleanup-tree-dump "optimized" } } */

Property changes on: gcc/testsuite/g++.dg/tree-ssa/heapstack-2.C
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+Author Date Id Revision URL
\ No newline at end of property
Index: gcc/testsuite/gcc.dg/tree-ssa/heapstack-1.c
===================================================================
--- gcc/testsuite/gcc.dg/tree-ssa/heapstack-1.c	(revision 0)
+++ gcc/testsuite/gcc.dg/tree-ssa/heapstack-1.c	(working copy)
@@ -0,0 +1,30 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-optimized" } */
+
+void* f(void*,int)__attribute__((__pure__));
+void * volatile q;
+void g(int m,int n,int s){
+  int i;
+  if (s<0||s>3)__builtin_unreachable();
+  void*p=__builtin_calloc(s,4);
+  switch(n){
+    case 1:
+      for (i=0; i<m; ++i)
+	q=f(p,i);
+      break;
+    case 2:
+      if (p)
+	__builtin_free(p);
+      return;
+    case 3:
+      __builtin_free(p);
+      __builtin_exit(42);
+    case 4:
+      __builtin_unreachable();
+    default:;
+  }
+  __builtin_free(p);
+}
+
+/* { dg-final { scan-tree-dump-not "malloc" "optimized" } } */
+/* { dg-final { cleanup-tree-dump "optimized" } } */

Property changes on: gcc/testsuite/gcc.dg/tree-ssa/heapstack-1.c
___________________________________________________________________
Added: svn:keywords
## -0,0 +1 ##
+Author Date Id Revision URL
\ No newline at end of property
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Index: gcc/testsuite/gcc.dg/tree-ssa/heapstack-2.c
===================================================================
--- gcc/testsuite/gcc.dg/tree-ssa/heapstack-2.c	(revision 0)
+++ gcc/testsuite/gcc.dg/tree-ssa/heapstack-2.c	(working copy)
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-optimized" } */
+
+void f(void*);
+void g(){
+  void*p=__builtin_malloc(12);
+  f(p);
+  __builtin_free(p);
+}
+
+/* { dg-final { scan-tree-dump-times "malloc" 1 "optimized" } } */
+/* { dg-final { cleanup-tree-dump "optimized" } } */

Property changes on: gcc/testsuite/gcc.dg/tree-ssa/heapstack-2.c
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+Author Date Id Revision URL
\ No newline at end of property
Index: gcc/testsuite/gcc.dg/tree-ssa/pr19831-3.c
===================================================================
--- gcc/testsuite/gcc.dg/tree-ssa/pr19831-3.c	(revision 205333)
+++ gcc/testsuite/gcc.dg/tree-ssa/pr19831-3.c	(working copy)
@@ -1,31 +1,31 @@
 /* { dg-do compile } */
 /* { dg-options "-O2 -fdump-tree-optimized" } */
 
-void test2(void)
+void test2(int n)
 {
-  int *p = __builtin_malloc (sizeof (int) * 4);
+  int *p = __builtin_malloc (sizeof (int) * n);
   if (p == (void *)0)
     __builtin_abort ();
   __builtin_free (p);
 }
 
-void test5(int b)
+void test5(int n)
 {
-  int *p = __builtin_malloc (sizeof (int) * 4);
+  int *p = __builtin_malloc (sizeof (int) * n);
   if (p)
     __builtin_free (p);
 }
 
-void test6(void)
+void test6(int n)
 {
-  int *p = __builtin_malloc (sizeof (int) * 4);
+  int *p = __builtin_malloc (sizeof (int) * n);
   if (p == (void *)0)
     __builtin_abort ();
   if (p)
     __builtin_free (p);
 }
 
 /* We should be able to remove all malloc/free pairs with CDDCE.
    Assume p was non-NULL for test2.
    For test5, it doesn't matter if p is NULL or non-NULL.  */
 
Index: gcc/tree-core.h
===================================================================
--- gcc/tree-core.h	(revision 205333)
+++ gcc/tree-core.h	(working copy)
@@ -91,20 +91,23 @@ struct pointer_set_t;
 
 /* The function does not lead to calls within current function unit.  */
 #define ECF_LEAF		  (1 << 10)
 
 /* Nonzero if this call does not affect transactions.  */
 #define ECF_TM_PURE		  (1 << 11)
 
 /* Nonzero if this call is into the transaction runtime library.  */
 #define ECF_TM_BUILTIN		  (1 << 12)
 
+/* Nonzero if this call always ends, and only with return or throw.  */
+#define ECF_RETURNS		  (1 << 13)
+
 /* Call argument flags.  */
 /* Nonzero if the argument is not dereferenced recursively, thus only
    directly reachable memory is read or written.  */
 #define EAF_DIRECT		(1 << 0)
 
 /* Nonzero if memory reached by the argument is not clobbered.  */
 #define EAF_NOCLOBBER		(1 << 1)
 
 /* Nonzero if the argument does not escape.  */
 #define EAF_NOESCAPE		(1 << 2)
Index: gcc/tree-ssa-ccp.c
===================================================================
--- gcc/tree-ssa-ccp.c	(revision 205333)
+++ gcc/tree-ssa-ccp.c	(working copy)
@@ -137,20 +137,21 @@ along with GCC; see the file COPYING3.
 #include "stringpool.h"
 #include "tree-ssanames.h"
 #include "tree-pass.h"
 #include "tree-ssa-propagate.h"
 #include "value-prof.h"
 #include "langhooks.h"
 #include "target.h"
 #include "diagnostic-core.h"
 #include "dbgcnt.h"
 #include "params.h"
+#include "bitmap.h"
 
 
 /* Possible lattice values.  */
 typedef enum
 {
   UNINITIALIZED,
   UNDEFINED,
   CONSTANT,
   VARYING
 } ccp_lattice_t;
@@ -171,20 +172,24 @@ typedef struct prop_value_d prop_value_t
 
 /* Array of propagated constant values.  After propagation,
    CONST_VAL[I].VALUE holds the constant value for SSA_NAME(I).  If
    the constant is held in an SSA name representing a memory store
    (i.e., a VDEF), CONST_VAL[I].MEM_REF will contain the actual
    memory reference used to store (i.e., the LHS of the assignment
    doing the store).  */
 static prop_value_t *const_val;
 static unsigned n_const_val;
 
+/* Avoid looking at the same malloc call twice if there are several
+   matching free. The bitmap stores the SSA_NAME of the variable.  */
+static bitmap malloc_studied;
+
 static void canonicalize_value (prop_value_t *);
 static bool ccp_fold_stmt (gimple_stmt_iterator *);
 
 /* Dump constant propagation value VAL to file OUTF prefixed by PREFIX.  */
 
 static void
 dump_lattice_value (FILE *outf, const char *prefix, prop_value_t val)
 {
   switch (val.lattice_val)
     {
@@ -815,20 +820,22 @@ ccp_initialize (void)
       for (i = gsi_start_phis (bb); !gsi_end_p (i); gsi_next (&i))
         {
           gimple phi = gsi_stmt (i);
 
 	  if (virtual_operand_p (gimple_phi_result (phi)))
             prop_set_simulate_again (phi, false);
 	  else
             prop_set_simulate_again (phi, true);
 	}
     }
+
+  malloc_studied = BITMAP_ALLOC (NULL);
 }
 
 /* Debug count support. Reset the values of ssa names
    VARYING when the total number ssa names analyzed is
    beyond the debug count specified.  */
 
 static void
 do_dbg_cnt (void)
 {
   unsigned i;
@@ -896,20 +903,21 @@ ccp_finalize (void)
 	  nonzero_bits = nonzero_bits | tree_to_double_int (val->value);
 	  nonzero_bits &= get_nonzero_bits (name);
 	  set_nonzero_bits (name, nonzero_bits);
 	}
     }
 
   /* Perform substitutions based on the known constant values.  */
   something_changed = substitute_and_fold (get_constant_value,
 					   ccp_fold_stmt, true);
 
+  BITMAP_FREE (malloc_studied);
   free (const_val);
   const_val = NULL;
   return something_changed;;
 }
 
 
 /* Compute the meet operator between *VAL1 and *VAL2.  Store the result
    in VAL1.
 
    		any  M UNDEFINED   = any
@@ -1920,20 +1928,156 @@ fold_builtin_alloca_with_align (gimple s
 	singleton_p = pt_solution_singleton_p (&pi->pt, &uid);
 	gcc_assert (singleton_p);
 	SET_DECL_PT_UID (var, uid);
       }
   }
 
   /* Fold alloca to the address of the array.  */
   return fold_convert (TREE_TYPE (lhs), build_fold_addr_expr (var));
 }
 
+/* Returns a constant upper bound if possible, or the variable if not.  */
+static tree
+get_upper_bound (tree var)
+{
+  tree cst = get_constant_value (var);
+  if (cst)
+    return cst;
+  double_int min, max;
+  if (TREE_CODE (var) != SSA_NAME
+      || get_range_info (var, &min, &max) != VR_RANGE)
+    return var;
+  return double_int_to_tree (TREE_TYPE (var), max);
+}
+
+/* We want to be sure that free (VAR) can't be called in another function, and
+   the easiest way to ensure that is to prove that it is called in this
+   function.  The hardest part is avoiding some call to a function that may not
+   return because of exit, an infinite loop, setjmp, etc, where it might call
+   free on VAR.  */
+// ??? What about signals and signal handlers?
+// ??? What if there is an infinite loop in this function? Checking that
+// we can't return without calling free is not enough.
+// TODO: maintain 2 things:
+// * we can't have exited yet; (done)
+// * we can't have freed VAR yet.
+static bool
+malloc_safe_on_stack (gimple stmt, vec<gimple, va_heap> *list_of_frees)
+{
+  tree var = gimple_call_lhs (stmt);
+  if (!bitmap_set_bit (malloc_studied, SSA_NAME_VERSION (var)))
+    return false;
+  basic_block bb = gimple_bb (stmt);
+  stack_vec<basic_block, 20> bb_to_visit;
+  bitmap bb_visited = BITMAP_ALLOC (NULL);
+  bitmap_set_bit (bb_visited, bb->index);
+  gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
+  enum tree_code code;
+
+next_stmt:
+  gsi_next_nondebug (&gsi);
+
+handle_stmt:
+  if (gsi_end_p (gsi))
+    {
+      edge e;
+      edge_iterator ei;
+      FOR_EACH_EDGE (e, ei, bb->succs)
+	bb_to_visit.safe_push (e->dest);
+      goto next_bb;
+    }
+  stmt = gsi_stmt (gsi);
+  if (stmt_can_throw_external (stmt))
+    /* We could give up only if VAR has escaped (same for return), but that
+       means there is a memory leak, so don't bother.  */
+    goto unsafe;
+
+  switch (gimple_code (stmt))
+    {
+	/* We leave the function without calling free.  */
+      case GIMPLE_RETURN:
+	goto unsafe;
+
+      case GIMPLE_COND:
+	code = gimple_cond_code (stmt);
+	/* If we replace malloc by an array on the stack, it can't be NULL.  */
+	if ((code == EQ_EXPR || code == NE_EXPR)
+	    && var == gimple_cond_lhs (stmt)
+	    && integer_zerop (gimple_cond_rhs (stmt)))
+	  {
+	    edge e;
+	    edge_iterator ei;
+	    FOR_EACH_EDGE (e, ei, bb->succs)
+	      if (e->flags
+		  & ((code == NE_EXPR) ? EDGE_TRUE_VALUE : EDGE_FALSE_VALUE))
+		bb_to_visit.safe_push (e->dest);
+	    goto next_bb;
+	  }
+	/* Fallthrough.  */
+
+	/* Last stmt of the bb, handled by looking at the outgoing edges.  */
+      case GIMPLE_GOTO:
+      case GIMPLE_SWITCH:
+	/* Statements that are irrelevant.  */
+      case GIMPLE_ASSIGN:
+      case GIMPLE_LABEL:
+      case GIMPLE_NOP:
+	goto next_stmt;
+
+      case GIMPLE_CALL:
+	{
+	  tree callee = gimple_call_fndecl (stmt);
+	  if (callee != NULL_TREE
+	      && DECL_BUILT_IN_CLASS (callee) == BUILT_IN_NORMAL)
+	    switch (DECL_FUNCTION_CODE (callee))
+	      {
+		case BUILT_IN_FREE:
+		  if (gimple_call_arg (stmt, 0) != var)
+		    goto next_stmt;
+		  list_of_frees->safe_push (stmt);
+		  /* Fallthrough.  */
+
+		/* It is questionable whether BUILT_IN_UNREACHABLE is safe.  */
+		case BUILT_IN_UNREACHABLE:
+		  /* Nothing could throw an exception earlier in the block,
+		     so we don't need to check the EH edges.  */
+		  goto next_bb;
+		default:;
+	      }
+
+	  int flags = gimple_call_flags (stmt);
+	  if (flags & (ECF_CONST|ECF_PURE|ECF_RETURNS|ECF_NOVOPS))
+	    goto next_stmt;
+	  goto unsafe;
+	}
+      default:
+	goto unsafe;
+    }
+
+next_bb:
+  if (bb_to_visit.is_empty ())
+    goto safe;
+  bb = bb_to_visit.last ();
+  bb_to_visit.pop ();
+  if (!bitmap_set_bit (bb_visited, bb->index))
+    goto next_bb;
+  gsi = gsi_start_nondebug_after_labels_bb (bb);
+  goto handle_stmt;
+
+unsafe:
+  BITMAP_FREE (bb_visited);
+  return false;
+safe:
+  BITMAP_FREE (bb_visited);
+  return true;
+}
+
 /* Fold the stmt at *GSI with CCP specific information that propagating
    and regular folding does not catch.  */
 
 static bool
 ccp_fold_stmt (gimple_stmt_iterator *gsi)
 {
   gimple stmt = gsi_stmt (*gsi);
 
   switch (gimple_code (stmt))
     {
@@ -2008,20 +2152,95 @@ ccp_fold_stmt (gimple_stmt_iterator *gsi
             if (new_rhs)
 	      {
 		bool res = update_call_from_tree (gsi, new_rhs);
 		tree var = TREE_OPERAND (TREE_OPERAND (new_rhs, 0),0);
 		gcc_assert (res);
 		insert_clobbers_for_var (*gsi, var);
 		return true;
 	      }
           }
 
+	/* Replace malloc+free with a stack allocation.  */
+	if (gimple_call_builtin_p (stmt, BUILT_IN_FREE))
+	  {
+	    bool is_calloc = false;
+	    tree ptr = gimple_call_arg (stmt, 0);
+	    if (TREE_CODE (ptr) != SSA_NAME)
+	      return false;
+	    gimple stmt1 = SSA_NAME_DEF_STMT (ptr);
+	    if ((is_calloc = gimple_call_builtin_p (stmt1, BUILT_IN_CALLOC))
+		|| gimple_call_builtin_p (stmt1, BUILT_IN_MALLOC))
+	      {
+		gcc_checking_assert (gimple_call_lhs (stmt1) == ptr);
+		tree size = gimple_call_arg (stmt1, 0);
+		size = get_upper_bound (size);
+		if (is_calloc)
+		  {
+		    tree siz2 = gimple_call_arg (stmt1, 1);
+		    siz2 = get_upper_bound (siz2);
+		    size = fold_binary (MULT_EXPR, size_type_node, size, siz2);
+		  }
+		if (!size || !tree_fits_uhwi_p (size)
+		    || TREE_INT_CST_LOW (size)
+		       > (unsigned HOST_WIDE_INT)
+			   PARAM_VALUE (PARAM_LARGE_STACK_FRAME))
+		  return false;
+		stack_vec<gimple, 20> list_of_frees;
+		if (!malloc_safe_on_stack (stmt1, &list_of_frees))
+		  return false;
+
+		/* Declare an array.  */
+		tree elem_type =
+		  build_nonstandard_integer_type (BITS_PER_UNIT, 1);
+		tree array_type =
+		  build_array_type_nelts (elem_type, TREE_INT_CST_LOW (size));
+		tree var = create_tmp_var (array_type, NULL);
+		/* Too much, on x86_64 we want 128, what malloc provides,
+		   but MALLOC_ABI_ALIGNMENT seems like it is a lower bound
+		   whereas we want an upper bound. Maybe
+		   ATTRIBUTE_ALIGNED_VALUE?  */
+		DECL_ALIGN (var) = BIGGEST_ALIGNMENT;
+		tree p = fold_convert (TREE_TYPE (ptr),
+				       build_fold_addr_expr (var));
+
+		/* Replace free with a clobber.  */
+		int i;
+		gimple free_stmt;
+		FOR_EACH_VEC_ELT (list_of_frees, i, free_stmt)
+		  {
+		    tree clobber = build_constructor (array_type, NULL);
+		    TREE_THIS_VOLATILE (clobber) = 1;
+		    gimple clobber_stmt = gimple_build_assign (var, clobber);
+		    gimple_stmt_iterator gsi_free = gsi_for_stmt (free_stmt);
+		    gimple_stmt_iterator *gsi_p = &gsi_free;
+		    /* Make sure gsi doesn't point to a deleted statement
+		       at the end, since substitute_and_fold runs various
+		       updates on it.  */
+		    if (free_stmt == stmt)
+		      gsi_p = gsi;
+		    gsi_replace (gsi_p, clobber_stmt, false);
+		  }
+
+		/* Replace malloc with the address of the variable.  */
+		gimple_stmt_iterator gsi1 = gsi_for_stmt (stmt1);
+		update_call_from_tree (&gsi1, p);
+		if (is_calloc)
+		  {
+		    tree memset_decl = builtin_decl_explicit (BUILT_IN_MEMSET);
+		    gimple zero = gimple_build_call (memset_decl, 3, ptr,
+						     integer_zero_node, size);
+		    gsi_insert_after (&gsi1, zero, GSI_SAME_STMT);
+		  }
+		return true;
+	      }
+	  }
+
 	/* Propagate into the call arguments.  Compared to replace_uses_in
 	   this can use the argument slot types for type verification
 	   instead of the current argument type.  We also can safely
 	   drop qualifiers here as we are dealing with constants anyway.  */
 	argt = TYPE_ARG_TYPES (gimple_call_fntype (stmt));
 	for (i = 0; i < gimple_call_num_args (stmt) && argt;
 	     ++i, argt = TREE_CHAIN (argt))
 	  {
 	    tree arg = gimple_call_arg (stmt, i);
 	    if (TREE_CODE (arg) == SSA_NAME


More information about the Gcc-patches mailing list