This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
calloc = malloc + memset
- From: Marc Glisse <marc dot glisse at inria dot fr>
- To: gcc-patches at gcc dot gnu dot org
- Date: Fri, 28 Feb 2014 23:48:02 +0100 (CET)
- Subject: calloc = malloc + memset
- Authentication-results: sourceware.org; auth=none
Hello,
this is a stage 1 patch, and I'll ping it then, but if you have comments
now...
Passes bootstrap+testsuite on x86_64-linux-gnu.
2014-02-28 Marc Glisse <marc.glisse@inria.fr>
PR tree-optimization/57742
gcc/
* tree-ssa-forwprop.c (simplify_malloc_memset): New function.
(simplify_builtin_call): Call it.
gcc/testsuite/
* g++.dg/tree-ssa/calloc.C: New testcase.
* gcc.dg/tree-ssa/calloc.c: Likewise.
--
Marc Glisse
Index: gcc/testsuite/g++.dg/tree-ssa/calloc.C
===================================================================
--- gcc/testsuite/g++.dg/tree-ssa/calloc.C (revision 0)
+++ gcc/testsuite/g++.dg/tree-ssa/calloc.C (working copy)
@@ -0,0 +1,35 @@
+/* { dg-do compile } */
+/* { dg-options "-std=gnu++11 -O3 -fdump-tree-optimized" } */
+
+#include <new>
+#include <vector>
+#include <cstdlib>
+
+void g(void*);
+inline void* operator new(std::size_t sz) _GLIBCXX_THROW (std::bad_alloc)
+{
+ void *p;
+
+ if (sz == 0)
+ sz = 1;
+
+ // Slightly modified from the libsupc++ version, that one has 2 calls
+ // to malloc which makes it too hard to optimize.
+ while ((p = std::malloc (sz)) == 0)
+ {
+ std::new_handler handler = std::get_new_handler ();
+ if (! handler)
+ _GLIBCXX_THROW_OR_ABORT(std::bad_alloc());
+ handler ();
+ }
+ return p;
+}
+
+void f(void*p,int n){
+ new(p)std::vector<int>(n);
+}
+
+/* { dg-final { scan-tree-dump-times "calloc" 1 "optimized" } } */
+/* { dg-final { scan-tree-dump-not "malloc" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "memset" "optimized" } } */
+/* { dg-final { cleanup-tree-dump "optimized" } } */
Property changes on: gcc/testsuite/g++.dg/tree-ssa/calloc.C
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+Author Date Id Revision URL
\ No newline at end of property
Index: gcc/testsuite/gcc.dg/tree-ssa/calloc.c
===================================================================
--- gcc/testsuite/gcc.dg/tree-ssa/calloc.c (revision 0)
+++ gcc/testsuite/gcc.dg/tree-ssa/calloc.c (working copy)
@@ -0,0 +1,29 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-optimized" } */
+
+#include <stdlib.h>
+#include <string.h>
+
+extern int a;
+extern int* b;
+int n;
+void* f(long*q){
+ int*p=malloc(n);
+ ++*q;
+ if(p){
+ ++*q;
+ a=2;
+ memset(p,0,n);
+ *b=3;
+ }
+ return p;
+}
+void* g(void){
+ float*p=calloc(8,4);
+ return memset(p,0,32);
+}
+
+/* { dg-final { scan-tree-dump-times "calloc" 2 "optimized" } } */
+/* { dg-final { scan-tree-dump-not "malloc" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "memset" "optimized" } } */
+/* { dg-final { cleanup-tree-dump "optimized" } } */
Property changes on: gcc/testsuite/gcc.dg/tree-ssa/calloc.c
___________________________________________________________________
Added: svn:keywords
## -0,0 +1 ##
+Author Date Id Revision URL
\ No newline at end of property
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Index: gcc/tree-ssa-forwprop.c
===================================================================
--- gcc/tree-ssa-forwprop.c (revision 208224)
+++ gcc/tree-ssa-forwprop.c (working copy)
@@ -1487,20 +1487,149 @@ constant_pointer_difference (tree p1, tr
}
for (i = 0; i < cnt[0]; i++)
for (j = 0; j < cnt[1]; j++)
if (exps[0][i] == exps[1][j])
return size_binop (MINUS_EXPR, offs[0][i], offs[1][j]);
return NULL_TREE;
}
+/* Optimize
+ ptr = malloc (n);
+ memset (ptr, 0, n);
+ into
+ ptr = calloc (n);
+ gsi_p is known to point to a call to __builtin_memset. */
+static bool
+simplify_malloc_memset (gimple_stmt_iterator *gsi_p)
+{
+ /* First make sure we have:
+ ptr = malloc (n);
+ memset (ptr, 0, n); */
+ gimple stmt2 = gsi_stmt (*gsi_p);
+ if (!integer_zerop (gimple_call_arg (stmt2, 1)))
+ return false;
+ tree ptr1, ptr2 = gimple_call_arg (stmt2, 0);
+ tree size = gimple_call_arg (stmt2, 2);
+ if (TREE_CODE (ptr2) != SSA_NAME)
+ return false;
+ gimple stmt1 = SSA_NAME_DEF_STMT (ptr2);
+ tree callee1;
+ /* Handle the case where STMT1 is a unary PHI, which happends
+ for instance with:
+ while (!(p = malloc (n))) { ... }
+ memset (p, 0, n); */
+ if (!stmt1)
+ return false;
+ if (gimple_code (stmt1) == GIMPLE_PHI
+ && gimple_phi_num_args (stmt1) == 1)
+ {
+ ptr1 = gimple_phi_arg_def (stmt1, 0);
+ if (TREE_CODE (ptr1) != SSA_NAME)
+ return false;
+ stmt1 = SSA_NAME_DEF_STMT (ptr1);
+ }
+ else
+ ptr1 = ptr2;
+ if (!stmt1
+ || !is_gimple_call (stmt1)
+ || !(callee1 = gimple_call_fndecl (stmt1)))
+ return false;
+
+ bool is_calloc;
+ if (DECL_FUNCTION_CODE (callee1) == BUILT_IN_MALLOC)
+ {
+ is_calloc = false;
+ if (!operand_equal_p (gimple_call_arg (stmt1, 0), size, 0))
+ return false;
+ }
+ else if (DECL_FUNCTION_CODE (callee1) == BUILT_IN_CALLOC)
+ {
+ is_calloc = true;
+ tree arg1 = gimple_call_arg (stmt1, 0);
+ tree arg2 = gimple_call_arg (stmt1, 1);
+ tree size1 = fold_build2 (MULT_EXPR, TREE_TYPE (size), arg1, arg2);
+ if (!operand_equal_p (size1, size, 0))
+ return false;
+ /* We could look at SSA_NAME_DEF_STMT (size), but there can be many casts
+ mixed with the MULT_EXPR which makes it hard to match with size1. */
+ }
+ else
+ return false;
+
+ /* We only allow two simple CFG forms for now. Either stmt1 and stmt2 are
+ in the same BB (in this order), or BB1 (malloc) ends with:
+ if (ptr) goto BB2 (memset) */
+ basic_block bb1 = gimple_bb (stmt1);
+ basic_block bb2 = gimple_bb (stmt2);
+ if (bb1 != bb2)
+ {
+ if (!single_pred_p (bb2) || single_pred (bb2) != bb1)
+ return false;
+ gimple cond = last_stmt (bb1);
+ if (gimple_code (cond) != GIMPLE_COND
+ || !integer_zerop (gimple_cond_rhs (cond))
+ || gimple_cond_lhs (cond) != ptr1)
+ return false;
+ int branch;
+ tree_code comp = gimple_cond_code (cond);
+ if (comp == NE_EXPR)
+ branch = EDGE_TRUE_VALUE;
+ else if (comp == EQ_EXPR)
+ branch = EDGE_FALSE_VALUE;
+ else
+ return false;
+ edge e;
+ edge_iterator ei;
+ FOR_EACH_EDGE (e, ei, bb1->succs)
+ if ((e->flags & branch) && e->dest != bb2)
+ return false;
+ }
+
+ /* Finally, make sure the memory is not used before stmt2. */
+ ao_ref ref;
+ ao_ref_init_from_ptr_and_size (&ref, ptr1, size);
+ tree vdef = gimple_vuse (stmt2);
+ if (vdef == NULL)
+ return false;
+ while (true)
+ {
+ gimple cur = SSA_NAME_DEF_STMT (vdef);
+ if (cur == stmt1) break;
+ if (stmt_may_clobber_ref_p_1 (cur, &ref))
+ return false;
+ vdef = gimple_vuse (cur);
+ }
+
+ /* Replace malloc with calloc and remove memset. */
+ if (!is_calloc)
+ {
+ gimple_stmt_iterator gsi = gsi_for_stmt (stmt1);
+ update_gimple_call (&gsi, builtin_decl_implicit (BUILT_IN_CALLOC), 2,
+ size, build_one_cst (size_type_node));
+ }
+ tree lhs = gimple_call_lhs (stmt2);
+ unlink_stmt_vdef (stmt2);
+ if (lhs)
+ {
+ gimple assign = gimple_build_assign (lhs, ptr2);
+ gsi_replace (gsi_p, assign, false);
+ }
+ else
+ {
+ gsi_remove (gsi_p, true);
+ release_defs (stmt2);
+ }
+ return true;
+}
+
/* *GSI_P is a GIMPLE_CALL to a builtin function.
Optimize
memcpy (p, "abcd", 4);
memset (p + 4, ' ', 3);
into
memcpy (p, "abcd ", 7);
call if the latter can be stored by pieces during expansion. */
static bool
simplify_builtin_call (gimple_stmt_iterator *gsi_p, tree callee2)
@@ -1508,38 +1637,44 @@ simplify_builtin_call (gimple_stmt_itera
gimple stmt1, stmt2 = gsi_stmt (*gsi_p);
tree vuse = gimple_vuse (stmt2);
if (vuse == NULL)
return false;
stmt1 = SSA_NAME_DEF_STMT (vuse);
switch (DECL_FUNCTION_CODE (callee2))
{
case BUILT_IN_MEMSET:
if (gimple_call_num_args (stmt2) != 3
- || gimple_call_lhs (stmt2)
|| CHAR_BIT != 8
|| BITS_PER_UNIT != 8)
break;
else
{
+ if (simplify_malloc_memset (gsi_p))
+ return true;
+
tree callee1;
tree ptr1, src1, str1, off1, len1, lhs1;
tree ptr2 = gimple_call_arg (stmt2, 0);
tree val2 = gimple_call_arg (stmt2, 1);
tree len2 = gimple_call_arg (stmt2, 2);
tree diff, vdef, new_str_cst;
gimple use_stmt;
unsigned int ptr1_align;
unsigned HOST_WIDE_INT src_len;
char *src_buf;
use_operand_p use_p;
+ /* Not implemented yet. */
+ if (gimple_call_lhs (stmt2))
+ break;
+
if (!tree_fits_shwi_p (val2)
|| !tree_fits_uhwi_p (len2))
break;
if (is_gimple_call (stmt1))
{
/* If first stmt is a call, it needs to be memcpy
or mempcpy, with string literal as second argument and
constant length. */
callee1 = gimple_call_fndecl (stmt1);
if (callee1 == NULL_TREE