This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
[PR tree-optimization/83022] malloc/memset->calloc too aggressive
- From: Nathan Sidwell <nathan at acm dot org>
- To: GCC Patches <gcc-patches at gcc dot gnu dot org>
- Cc: marc dot glisse at inria dot fr
- Date: Fri, 17 Nov 2017 11:07:56 -0500
- Subject: [PR tree-optimization/83022] malloc/memset->calloc too aggressive
- Authentication-results: sourceware.org; auth=none
We currently optimize a malloc/memset pair into a calloc call (when the
values match, of course). This turns out to be a pessimization for
mysql 5.6, where the allocator looks like:
void *ptr = malloc (size);
if (ptr && other_condition)
memset (ptr, 0, size);
other_condition is false sufficiently often for this to be a noticeable
performance degradation.
mysql 5.7 moved the above code around to explicitly call calloc or
malloc. Maybe to avoid this optimization?
This patch restricts the optimization to require
a) malloc and memset in the same bb (of course, this is UB if malloc
returns NULL), or,
b) memset in a single-pred block whose predecessor contains the malloc
call. AND the condition being a comparison of the malloc result against
NULL.
For #b I only check for NE/EQ comparison, and don't bother determining
whether the test is the correct sense. If the user's written it the
wrong way round, we're in UB territory anyway.
The existing tests for this optimization do not regress. The new test
shows the optimization being repressed, as expected.
ok?
nathan
--
Nathan Sidwell
2017-11-17 Nathan Sidwell <nathan@acm.org>
PR tree-optimization/83022
* tree-ssa-strlen.c (handle_builtin_memset): Don't fold malloc &
memset if the condition is complicated.
PR tree-optimization/83022
* gcc.dg/tree-ssa/pr83022.c: New.
Index: tree-ssa-strlen.c
===================================================================
--- tree-ssa-strlen.c (revision 254841)
+++ tree-ssa-strlen.c (working copy)
@@ -2394,7 +2394,13 @@ handle_builtin_malloc (enum built_in_fun
/* Handle a call to memset.
After a call to calloc, memset(,0,) is unnecessary.
- memset(malloc(n),0,n) is calloc(n,1). */
+ calloc is also:
+ p = malloc (n); if (p) memset(p,0,n);
+ We don't want to optimize:
+ p = malloc (n); if (p && other_cond) memset (p,0,n);
+ because other_cond may be false sufficiently often for the memset
+ to be a noticeable performance hit.
+*/
static bool
handle_builtin_memset (gimple_stmt_iterator *gsi)
@@ -2422,7 +2428,42 @@ handle_builtin_memset (gimple_stmt_itera
else if (code1 == BUILT_IN_MALLOC
&& operand_equal_p (gimple_call_arg (stmt1, 0), size, 0))
{
+ /* Optimize if malloc is in same block, or it is in immediate
+ predecessor block AND condition is simple test of
+ non-nullness. */
gimple_stmt_iterator gsi1 = gsi_for_stmt (stmt1);
+ if (gsi1.bb != gsi->bb)
+ {
+ if (!single_pred_p (gsi->bb))
+ /* memset not in a single predecessor bb. */
+ return true;
+
+ edge e = EDGE_PRED (gsi->bb, 0);
+ if (e->src != gsi1.bb)
+ /* Single predecessor bb does not contain malloc. */
+ return true;
+
+ gcond *cond = as_a <gcond *>
+ (gimple_seq_last (*bb_seq_addr (gsi1.bb)));
+ if (!integer_zerop (gimple_cond_rhs (cond)))
+ /* Not comparing against zero. */
+ return true;
+ if (!operand_equal_p (gimple_cond_lhs (cond), ptr, 0))
+ /* Not comparing malloc'd pointer. */
+ return true;
+
+ tree_code code = gimple_cond_code (cond);
+ if (code != NE_EXPR && code != EQ_EXPR)
+ /* Comparison is not == or !=. Something very strange,
+ but implementation-defined is happening. */
+ return true;
+
+ /* We don't have to check the edge truthiness. If it's the
+ wrong way round, we're calling memset with a NULL
+ pointer, which is undefined. */
+ }
+
+ /* Turn the malloc into calloc. */
update_gimple_call (&gsi1, builtin_decl_implicit (BUILT_IN_CALLOC), 2,
size, build_one_cst (size_type_node));
si1->nonzero_chars = build_int_cst (size_type_node, 0);
@@ -2431,6 +2472,8 @@ handle_builtin_memset (gimple_stmt_itera
}
else
return true;
+
+ /* Delete the memset. */
tree lhs = gimple_call_lhs (stmt2);
unlink_stmt_vdef (stmt2);
if (lhs)
Index: testsuite/gcc.dg/tree-ssa/pr83022.c
===================================================================
--- testsuite/gcc.dg/tree-ssa/pr83022.c (revision 0)
+++ testsuite/gcc.dg/tree-ssa/pr83022.c (working copy)
@@ -0,0 +1,22 @@
+/* PR tree-optimization/83022 malloc followed by conditional memset
+ optimized to calloc, which pessimizes code when condition is
+ sufficiently false. */
+
+/* { dg-options "-O2 -fdump-tree-optimized" } */
+
+typedef __SIZE_TYPE__ size_t;
+extern void *malloc (size_t) __attribute__((__nothrow__,__malloc__));
+extern void *memset (void *, int, size_t) __attribute__ ((__nothrow__,__nonnull__ (1)));
+
+
+void *m (size_t s, int c)
+{
+ void *r = malloc (s);
+ if (r && c)
+ memset (r, 0, s);
+ return r;
+}
+
+/* { dg-final { scan-tree-dump "memset" "optimized" } }
+ { dg-final { scan-tree-dump "malloc" "optimized" } }
+ { dg-final { scan-tree-dump-not "calloc" "optimized" } } */