This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[PR tree-optimization/83022] malloc/memset->calloc too aggressive


We currently optimize a malloc/memset pair into a calloc call (when the values match, of course). This turns out to be a pessimization for mysql 5.6, where the allocator looks like:

void *ptr = malloc (size);
if (ptr && other_condition)
  memset (ptr, 0, size);

other_condition is false sufficiently often for this to be a noticeable performance degradation.

mysql 5.7 moved the above code around to explicitly call calloc or malloc. Maybe to avoid this optimization?

This patch restricts the optimization to require
a) malloc and memset in the same bb (of course, this is UB if malloc returns NULL), or,

b) memset in a single-pred block whose predecessor contains the malloc call. AND the condition being a comparison of the malloc result against NULL.

For #b I only check for NE/EQ comparison, and don't bother determining whether the test is the correct sense. If the user's written it the wrong way round, we're in UB territory anyway.

The existing tests for this optimization do not regress. The new test shows the optimization being repressed, as expected.

ok?

nathan

--
Nathan Sidwell
2017-11-17  Nathan Sidwell  <nathan@acm.org>

	PR tree-optimization/83022
	* tree-ssa-strlen.c (handle_builtin_memset): Don't fold malloc &
	memset if the condition is complicated.

	PR tree-optimization/83022
	* gcc.dg/tree-ssa/pr83022.c: New.

Index: tree-ssa-strlen.c
===================================================================
--- tree-ssa-strlen.c	(revision 254841)
+++ tree-ssa-strlen.c	(working copy)
@@ -2394,7 +2394,13 @@ handle_builtin_malloc (enum built_in_fun
 
 /* Handle a call to memset.
    After a call to calloc, memset(,0,) is unnecessary.
-   memset(malloc(n),0,n) is calloc(n,1).  */
+   calloc is also:
+     p = malloc (n); if (p) memset(p,0,n);
+   We don't want to optimize:
+     p = malloc (n); if (p && other_cond) memset (p,0,n);
+   because other_cond may be false sufficiently often for the memset
+   to be a noticeable performance hit.
+*/
 
 static bool
 handle_builtin_memset (gimple_stmt_iterator *gsi)
@@ -2422,7 +2428,42 @@ handle_builtin_memset (gimple_stmt_itera
   else if (code1 == BUILT_IN_MALLOC
 	   && operand_equal_p (gimple_call_arg (stmt1, 0), size, 0))
     {
+      /* Optimize if malloc is in same block, or it is in immediate
+	 predecessor block AND condition is simple test of
+	 non-nullness.  */
       gimple_stmt_iterator gsi1 = gsi_for_stmt (stmt1);
+      if (gsi1.bb != gsi->bb)
+	{
+	  if (!single_pred_p (gsi->bb))
+	    /* memset not in a single predecessor bb.  */
+	    return true;
+
+	  edge e = EDGE_PRED (gsi->bb, 0);
+	  if (e->src != gsi1.bb)
+	    /* Single predecessor bb does not contain malloc.  */
+	    return true;
+
+	  gcond *cond = as_a <gcond *>
+	    (gimple_seq_last (*bb_seq_addr (gsi1.bb)));
+	  if (!integer_zerop (gimple_cond_rhs (cond)))
+	    /* Not comparing against zero.  */
+	    return true;
+	  if (!operand_equal_p (gimple_cond_lhs (cond), ptr, 0))
+	    /* Not comparing malloc'd pointer.  */
+	    return true;
+
+	  tree_code code = gimple_cond_code (cond);
+	  if (code != NE_EXPR && code != EQ_EXPR)
+	    /* Comparison is not == or !=.  Something very strange,
+	       but implementation-defined is happening.  */
+	    return true;
+
+	  /* We don't have to check the edge truthiness.  If it's the
+	     wrong way round, we're calling memset with a NULL
+	     pointer, which is undefined.  */
+	}
+
+      /* Turn the malloc into calloc.  */
       update_gimple_call (&gsi1, builtin_decl_implicit (BUILT_IN_CALLOC), 2,
 			  size, build_one_cst (size_type_node));
       si1->nonzero_chars = build_int_cst (size_type_node, 0);
@@ -2431,6 +2472,8 @@ handle_builtin_memset (gimple_stmt_itera
     }
   else
     return true;
+
+  /* Delete the memset.  */
   tree lhs = gimple_call_lhs (stmt2);
   unlink_stmt_vdef (stmt2);
   if (lhs)
Index: testsuite/gcc.dg/tree-ssa/pr83022.c
===================================================================
--- testsuite/gcc.dg/tree-ssa/pr83022.c	(revision 0)
+++ testsuite/gcc.dg/tree-ssa/pr83022.c	(working copy)
@@ -0,0 +1,22 @@
+/* PR tree-optimization/83022 malloc followed by conditional memset
+   optimized to calloc, which pessimizes code when condition is
+   sufficiently false.  */
+
+/* { dg-options "-O2 -fdump-tree-optimized" } */
+
+typedef __SIZE_TYPE__ size_t;
+extern void *malloc (size_t) __attribute__((__nothrow__,__malloc__));
+extern void *memset (void *, int, size_t) __attribute__ ((__nothrow__,__nonnull__ (1)));
+
+
+void *m (size_t s, int c)
+{
+  void *r = malloc (s);
+  if (r && c)
+    memset (r, 0, s);
+  return r;
+}
+
+/* { dg-final { scan-tree-dump "memset" "optimized" } }
+   { dg-final { scan-tree-dump "malloc" "optimized" } }
+   { dg-final { scan-tree-dump-not "calloc" "optimized" } }  */

Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]