This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
Re: [PATCH] Optimize UBSAN_NULL checks, add sanopt.c
- From: Marek Polacek <polacek at redhat dot com>
- To: Jakub Jelinek <jakub at redhat dot com>
- Cc: GCC Patches <gcc-patches at gcc dot gnu dot org>
- Date: Tue, 4 Nov 2014 19:36:26 +0100
- Subject: Re: [PATCH] Optimize UBSAN_NULL checks, add sanopt.c
- Authentication-results: sourceware.org; auth=none
- References: <20141103142757 dot GP20462 at redhat dot com> <20141103153500 dot GS5026 at tucnak dot redhat dot com>
On Mon, Nov 03, 2014 at 04:35:00PM +0100, Jakub Jelinek wrote:
> Well, in theory you could just script removing them one by one and just
> make sanopt.o after each step to see if the header is or is not needed,
> perhaps with some manual tweeks.
I pruned those headers some more.
> Perhaps in preparation for future optimizations (other UBSAN_*
> calls, and ASAN_CHECK and tsan builtins), you should consider
> putting the hash_map into some structure and pass address of that
> structure instead, so that you have all the pass context at the same spot.
> You could put asan_num_accesses in there too, see below.
Done.
> I think it would be better to walk the vector backwards instead of forward.
> 1) if you have the same SSA_NAME there multiple times, ignoring the already
> unnecessary stmts, the only case where you'd have multiple stmts is
> if the earlier stmts dominate the following stmts and for some reason
> weren't optimized away; that for some reason currently should be
> just higher required alignment in the dominated stmt (e.g. say have
> UBSAN_NULL (foo_23, 0); UBSAN_NULL (foo_23, 2); UBSAN_NULL (foo_23, 4);
> where the first stmt dominates the second two and second stmt dominates
> the last one.
> 2) All the si->visited_p stmts should be always at the end of the vector
> IMHO, preceeded only by !si->visited_p stmts.
> Thus, when walking backwards, first remove the stmts with bb->visited_p,
> once you hit one that doesn't have it set, I believe there shouldn't be any
> further. And then in theory it should be fine to just compare the last
> stmt in the vector that was left (if any).
I agree that should work - so changed.
> Perhaps you could return the asan_num_accesses value computed during
> sanopt_optimize (just count IFN_ASAN_CHECK calls that you don't optimize
> away), and do this only in else if (i.e. when sanopt_optimize has not been
> run). That way, you'd save one extra IL walk.
Done.
Bootstrap-ubsan/regtest passed on x86_64-linux, ok for trunk?
2014-11-04 Marek Polacek <polacek@redhat.com>
* Makefile.in (OBJS): Add sanopt.o.
(GTFILES): Add sanopt.c.
* asan.h (asan_expand_check_ifn): Declare.
* asan.c (asan_expand_check_ifn): No longer static.
(class pass_sanopt, pass_sanopt::execute, make_pass_sanopt): Move...
* sanopt.c: ...here. New file.
testsuite/
* c-c++-common/ubsan/align-2.c: Remove dg-output.
* c-c++-common/ubsan/align-4.c: Likewise.
* g++.dg/ubsan/null-1.C: Likewise.
* g++.dg/ubsan/null-2.C: Likewise.
diff --git gcc/Makefile.in gcc/Makefile.in
index 9c67fe2..f383032 100644
--- gcc/Makefile.in
+++ gcc/Makefile.in
@@ -1376,6 +1376,7 @@ OBJS = \
asan.o \
tsan.o \
ubsan.o \
+ sanopt.o \
tree-call-cdce.o \
tree-cfg.o \
tree-cfgcleanup.o \
@@ -2305,6 +2306,7 @@ GTFILES = $(CPP_ID_DATA_H) $(srcdir)/input.h $(srcdir)/coretypes.h \
$(srcdir)/asan.c \
$(srcdir)/ubsan.c \
$(srcdir)/tsan.c \
+ $(srcdir)/sanopt.c \
$(srcdir)/ipa-devirt.c \
$(srcdir)/internal-fn.h \
@all_gtfiles@
diff --git gcc/asan.c gcc/asan.c
index 8f146d2..79dede7 100644
--- gcc/asan.c
+++ gcc/asan.c
@@ -2497,7 +2497,7 @@ asan_finish_file (void)
/* Expand the ASAN_{LOAD,STORE} builtins. */
-static bool
+bool
asan_expand_check_ifn (gimple_stmt_iterator *iter, bool use_calls)
{
gimple g = gsi_stmt (*iter);
@@ -2800,114 +2800,4 @@ make_pass_asan_O0 (gcc::context *ctxt)
return new pass_asan_O0 (ctxt);
}
-/* Perform optimization of sanitize functions. */
-
-namespace {
-
-const pass_data pass_data_sanopt =
-{
- GIMPLE_PASS, /* type */
- "sanopt", /* name */
- OPTGROUP_NONE, /* optinfo_flags */
- TV_NONE, /* tv_id */
- ( PROP_ssa | PROP_cfg | PROP_gimple_leh ), /* properties_required */
- 0, /* properties_provided */
- 0, /* properties_destroyed */
- 0, /* todo_flags_start */
- TODO_update_ssa, /* todo_flags_finish */
-};
-
-class pass_sanopt : public gimple_opt_pass
-{
-public:
- pass_sanopt (gcc::context *ctxt)
- : gimple_opt_pass (pass_data_sanopt, ctxt)
- {}
-
- /* opt_pass methods: */
- virtual bool gate (function *) { return flag_sanitize; }
- virtual unsigned int execute (function *);
-
-}; // class pass_sanopt
-
-unsigned int
-pass_sanopt::execute (function *fun)
-{
- basic_block bb;
-
- int asan_num_accesses = 0;
- if (flag_sanitize & SANITIZE_ADDRESS)
- {
- gimple_stmt_iterator gsi;
- FOR_EACH_BB_FN (bb, fun)
- for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
- {
- gimple stmt = gsi_stmt (gsi);
- if (is_gimple_call (stmt) && gimple_call_internal_p (stmt)
- && gimple_call_internal_fn (stmt) == IFN_ASAN_CHECK)
- ++asan_num_accesses;
- }
- }
-
- bool use_calls = ASAN_INSTRUMENTATION_WITH_CALL_THRESHOLD < INT_MAX
- && asan_num_accesses >= ASAN_INSTRUMENTATION_WITH_CALL_THRESHOLD;
-
- FOR_EACH_BB_FN (bb, fun)
- {
- gimple_stmt_iterator gsi;
- for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); )
- {
- gimple stmt = gsi_stmt (gsi);
- bool no_next = false;
-
- if (!is_gimple_call (stmt))
- {
- gsi_next (&gsi);
- continue;
- }
-
- if (gimple_call_internal_p (stmt))
- {
- enum internal_fn ifn = gimple_call_internal_fn (stmt);
- switch (ifn)
- {
- case IFN_UBSAN_NULL:
- no_next = ubsan_expand_null_ifn (&gsi);
- break;
- case IFN_UBSAN_BOUNDS:
- no_next = ubsan_expand_bounds_ifn (&gsi);
- break;
- case IFN_UBSAN_OBJECT_SIZE:
- no_next = ubsan_expand_objsize_ifn (&gsi);
- break;
- case IFN_ASAN_CHECK:
- no_next = asan_expand_check_ifn (&gsi, use_calls);
- break;
- default:
- break;
- }
- }
-
- if (dump_file && (dump_flags & TDF_DETAILS))
- {
- fprintf (dump_file, "Optimized\n ");
- print_gimple_stmt (dump_file, stmt, 0, dump_flags);
- fprintf (dump_file, "\n");
- }
-
- if (!no_next)
- gsi_next (&gsi);
- }
- }
- return 0;
-}
-
-} // anon namespace
-
-gimple_opt_pass *
-make_pass_sanopt (gcc::context *ctxt)
-{
- return new pass_sanopt (ctxt);
-}
-
#include "gt-asan.h"
diff --git gcc/asan.h gcc/asan.h
index 8e3f0ba..f448391 100644
--- gcc/asan.h
+++ gcc/asan.h
@@ -28,6 +28,7 @@ extern rtx_insn *asan_emit_stack_protection (rtx, rtx, unsigned int,
extern bool asan_protect_global (tree);
extern void initialize_sanitizer_builtins (void);
extern tree asan_dynamic_init_call (bool);
+extern bool asan_expand_check_ifn (gimple_stmt_iterator *, bool);
extern gimple_stmt_iterator create_cond_insert_point
(gimple_stmt_iterator *, bool, bool, bool, basic_block *, basic_block *);
diff --git gcc/sanopt.c gcc/sanopt.c
index e69de29..7bfe59e 100644
--- gcc/sanopt.c
+++ gcc/sanopt.c
@@ -0,0 +1,315 @@
+/* Optimize and expand sanitizer functions.
+ Copyright (C) 2014 Free Software Foundation, Inc.
+ Contributed by Marek Polacek <polacek@redhat.com>
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3. If not see
+<http://www.gnu.org/licenses/>. */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tree.h"
+#include "hash-table.h"
+#include "predict.h"
+#include "vec.h"
+#include "hashtab.h"
+#include "hash-set.h"
+#include "tm.h"
+#include "hard-reg-set.h"
+#include "function.h"
+#include "dominance.h"
+#include "cfg.h"
+#include "basic-block.h"
+#include "tree-ssa-alias.h"
+#include "internal-fn.h"
+#include "gimple-expr.h"
+#include "is-a.h"
+#include "gimple.h"
+#include "gimplify.h"
+#include "gimple-iterator.h"
+#include "hash-map.h"
+#include "plugin-api.h"
+#include "tree-pass.h"
+#include "asan.h"
+#include "gimple-pretty-print.h"
+#include "tm_p.h"
+#include "langhooks.h"
+#include "ubsan.h"
+#include "params.h"
+
+
+/* This is used to carry information about basic blocks. It is
+ attached to the AUX field of the standard CFG block. */
+
+struct sanopt_info
+{
+ /* True if this BB has been visited. */
+ bool visited_p;
+};
+
+/* This is used to carry various hash maps and variables used
+ in sanopt_optimize_walker. */
+
+struct sanopt_ctx
+{
+ /* This map maps a pointer (the first argument of UBSAN_NULL) to
+ a vector of UBSAN_NULL call statements that check this pointer. */
+ hash_map<tree, auto_vec<gimple> > null_check_map;
+
+ /* Number of IFN_ASAN_CHECK statements. */
+ int asan_num_accesses;
+};
+
+
+/* Try to optimize away redundant UBSAN_NULL checks.
+
+ We walk blocks in the CFG via a depth first search of the dominator
+ tree; we push unique UBSAN_NULL statements into a vector in the
+ NULL_CHECK_MAP as we enter the blocks. When leaving a block, we
+ mark the block as visited; then when checking the statements in the
+ vector, we ignore statements that are coming from already visited
+ blocks, because these cannot dominate anything anymore.
+ CTX is a sanopt context. */
+
+static void
+sanopt_optimize_walker (basic_block bb, struct sanopt_ctx *ctx)
+{
+ basic_block son;
+ gimple_stmt_iterator gsi;
+
+ for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi);)
+ {
+ gimple stmt = gsi_stmt (gsi);
+ bool remove = false;
+
+ if (is_gimple_call (stmt)
+ && gimple_call_internal_p (stmt))
+ switch (gimple_call_internal_fn (stmt))
+ {
+ case IFN_UBSAN_NULL:
+ {
+ gcc_assert (gimple_call_num_args (stmt) == 3);
+ tree ptr = gimple_call_arg (stmt, 0);
+ tree cur_align = gimple_call_arg (stmt, 2);
+ gcc_assert (TREE_CODE (cur_align) == INTEGER_CST);
+
+ auto_vec<gimple> &v = ctx->null_check_map.get_or_insert (ptr);
+ if (v.is_empty ())
+ /* For this PTR we don't have any UBSAN_NULL stmts
+ recorded, so there's nothing to optimize yet. */
+ v.safe_push (stmt);
+ else
+ {
+ /* We already have recorded a UBSAN_NULL check
+ for this pointer. Perhaps we can drop this one.
+ But only if this check doesn't specify stricter
+ alignment. */
+ int i;
+ gimple g;
+
+ FOR_EACH_VEC_ELT_REVERSE (v, i, g)
+ {
+ /* Remove statements for BBs that have been
+ already processed. */
+ sanopt_info *si = (sanopt_info *) gimple_bb (g)->aux;
+ if (si->visited_p)
+ {
+ v.unordered_remove (i);
+ continue;
+ }
+ /* At this point we shouldn't have any statements
+ that aren't dominating the current BB. */
+ tree align = gimple_call_arg (g, 2);
+ remove = tree_int_cst_le (cur_align, align);
+ break;
+ }
+
+ if (remove)
+ {
+ /* Drop this check. */
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ {
+ fprintf (dump_file, "Optimizing out\n ");
+ print_gimple_stmt (dump_file, stmt, 0,
+ dump_flags);
+ fprintf (dump_file, "\n");
+ }
+ gsi_remove (&gsi, true);
+ }
+ else if (v.length () < 30)
+ v.safe_push (stmt);
+ }
+ }
+ case IFN_ASAN_CHECK:
+ ctx->asan_num_accesses++;
+ break;
+ default:
+ break;
+ }
+
+ /* If we were able to remove the current statement, gsi_remove
+ already pointed us to the next statement. */
+ if (!remove)
+ gsi_next (&gsi);
+ }
+
+ for (son = first_dom_son (CDI_DOMINATORS, bb);
+ son;
+ son = next_dom_son (CDI_DOMINATORS, son))
+ sanopt_optimize_walker (son, ctx);
+
+ /* We're leaving this BB, so mark it to that effect. */
+ sanopt_info *info = (sanopt_info *) bb->aux;
+ info->visited_p = true;
+}
+
+/* Try to remove redundant sanitizer checks in function FUN. */
+
+static int
+sanopt_optimize (function *fun)
+{
+ struct sanopt_ctx ctx;
+ ctx.asan_num_accesses = 0;
+
+ /* Set up block info for each basic block. */
+ alloc_aux_for_blocks (sizeof (sanopt_info));
+
+ /* We're going to do a dominator walk, so ensure that we have
+ dominance information. */
+ calculate_dominance_info (CDI_DOMINATORS);
+
+ /* Recursively walk the dominator tree optimizing away
+ redundant checks. */
+ sanopt_optimize_walker (ENTRY_BLOCK_PTR_FOR_FN (fun), &ctx);
+
+ free_aux_for_blocks ();
+
+ return ctx.asan_num_accesses;
+}
+
+/* Perform optimization of sanitize functions. */
+
+namespace {
+
+const pass_data pass_data_sanopt =
+{
+ GIMPLE_PASS, /* type */
+ "sanopt", /* name */
+ OPTGROUP_NONE, /* optinfo_flags */
+ TV_NONE, /* tv_id */
+ ( PROP_ssa | PROP_cfg | PROP_gimple_leh ), /* properties_required */
+ 0, /* properties_provided */
+ 0, /* properties_destroyed */
+ 0, /* todo_flags_start */
+ TODO_update_ssa, /* todo_flags_finish */
+};
+
+class pass_sanopt : public gimple_opt_pass
+{
+public:
+ pass_sanopt (gcc::context *ctxt)
+ : gimple_opt_pass (pass_data_sanopt, ctxt)
+ {}
+
+ /* opt_pass methods: */
+ virtual bool gate (function *) { return flag_sanitize; }
+ virtual unsigned int execute (function *);
+
+}; // class pass_sanopt
+
+unsigned int
+pass_sanopt::execute (function *fun)
+{
+ basic_block bb;
+ int asan_num_accesses = 0;
+
+ /* Try to remove redundant checks. */
+ if (optimize
+ && (flag_sanitize & (SANITIZE_NULL | SANITIZE_ALIGNMENT)))
+ asan_num_accesses = sanopt_optimize (fun);
+ else if (flag_sanitize & SANITIZE_ADDRESS)
+ {
+ gimple_stmt_iterator gsi;
+ FOR_EACH_BB_FN (bb, fun)
+ for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
+ {
+ gimple stmt = gsi_stmt (gsi);
+ if (is_gimple_call (stmt) && gimple_call_internal_p (stmt)
+ && gimple_call_internal_fn (stmt) == IFN_ASAN_CHECK)
+ ++asan_num_accesses;
+ }
+ }
+
+ bool use_calls = ASAN_INSTRUMENTATION_WITH_CALL_THRESHOLD < INT_MAX
+ && asan_num_accesses >= ASAN_INSTRUMENTATION_WITH_CALL_THRESHOLD;
+
+ FOR_EACH_BB_FN (bb, fun)
+ {
+ gimple_stmt_iterator gsi;
+ for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); )
+ {
+ gimple stmt = gsi_stmt (gsi);
+ bool no_next = false;
+
+ if (!is_gimple_call (stmt))
+ {
+ gsi_next (&gsi);
+ continue;
+ }
+
+ if (gimple_call_internal_p (stmt))
+ {
+ enum internal_fn ifn = gimple_call_internal_fn (stmt);
+ switch (ifn)
+ {
+ case IFN_UBSAN_NULL:
+ no_next = ubsan_expand_null_ifn (&gsi);
+ break;
+ case IFN_UBSAN_BOUNDS:
+ no_next = ubsan_expand_bounds_ifn (&gsi);
+ break;
+ case IFN_UBSAN_OBJECT_SIZE:
+ no_next = ubsan_expand_objsize_ifn (&gsi);
+ break;
+ case IFN_ASAN_CHECK:
+ no_next = asan_expand_check_ifn (&gsi, use_calls);
+ break;
+ default:
+ break;
+ }
+ }
+
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ {
+ fprintf (dump_file, "Expanded\n ");
+ print_gimple_stmt (dump_file, stmt, 0, dump_flags);
+ fprintf (dump_file, "\n");
+ }
+
+ if (!no_next)
+ gsi_next (&gsi);
+ }
+ }
+ return 0;
+}
+
+} // anon namespace
+
+gimple_opt_pass *
+make_pass_sanopt (gcc::context *ctxt)
+{
+ return new pass_sanopt (ctxt);
+}
diff --git gcc/testsuite/c-c++-common/ubsan/align-2.c gcc/testsuite/c-c++-common/ubsan/align-2.c
index 071de8c..02a26e2 100644
--- gcc/testsuite/c-c++-common/ubsan/align-2.c
+++ gcc/testsuite/c-c++-common/ubsan/align-2.c
@@ -51,6 +51,4 @@ main ()
/* { dg-output "\.c:(13|16):\[0-9]*: \[^\n\r]*store to misaligned address 0x\[0-9a-fA-F]* for type 'int', which requires 4 byte alignment.*" } */
/* { dg-output "\.c:23:\[0-9]*: \[^\n\r]*member access within misaligned address 0x\[0-9a-fA-F]* for type 'struct S', which requires \[48] byte alignment.*" } */
/* { dg-output "\.c:(29|30):\[0-9]*: \[^\n\r]*member access within misaligned address 0x\[0-9a-fA-F]* for type 'struct S', which requires \[48] byte alignment.*" } */
-/* { dg-output "\.c:30:\[0-9]*: \[^\n\r]*member access within misaligned address 0x\[0-9a-fA-F]* for type 'struct S', which requires \[48] byte alignment.*" } */
-/* { dg-output "\.c:31:\[0-9]*: \[^\n\r]*member access within misaligned address 0x\[0-9a-fA-F]* for type 'struct S', which requires \[48] byte alignment.*" } */
/* { dg-output "\.c:37:\[0-9]*: \[^\n\r]*load of misaligned address 0x\[0-9a-fA-F]* for type 'long long int', which requires \[48] byte alignment" } */
diff --git gcc/testsuite/c-c++-common/ubsan/align-4.c gcc/testsuite/c-c++-common/ubsan/align-4.c
index 3252595..f010589 100644
--- gcc/testsuite/c-c++-common/ubsan/align-4.c
+++ gcc/testsuite/c-c++-common/ubsan/align-4.c
@@ -9,6 +9,4 @@
/* { dg-output "\[^\n\r]*\.c:(13|16):\[0-9]*: \[^\n\r]*store to misaligned address 0x\[0-9a-fA-F]* for type 'int', which requires 4 byte alignment.*" } */
/* { dg-output "\[^\n\r]*\.c:23:\[0-9]*: \[^\n\r]*member access within misaligned address 0x\[0-9a-fA-F]* for type 'struct S', which requires \[48] byte alignment.*" } */
/* { dg-output "\[^\n\r]*\.c:(29|30):\[0-9]*: \[^\n\r]*member access within misaligned address 0x\[0-9a-fA-F]* for type 'struct S', which requires \[48] byte alignment.*" } */
-/* { dg-output "\[^\n\r]*\.c:30:\[0-9]*: \[^\n\r]*member access within misaligned address 0x\[0-9a-fA-F]* for type 'struct S', which requires \[48] byte alignment.*" } */
-/* { dg-output "\[^\n\r]*\.c:31:\[0-9]*: \[^\n\r]*member access within misaligned address 0x\[0-9a-fA-F]* for type 'struct S', which requires \[48] byte alignment.*" } */
/* { dg-output "\[^\n\r]*\.c:37:\[0-9]*: \[^\n\r]*load of misaligned address 0x\[0-9a-fA-F]* for type 'long long int', which requires \[48] byte alignment" } */
diff --git gcc/testsuite/g++.dg/ubsan/null-1.C gcc/testsuite/g++.dg/ubsan/null-1.C
index e1524b1..83b3033 100644
--- gcc/testsuite/g++.dg/ubsan/null-1.C
+++ gcc/testsuite/g++.dg/ubsan/null-1.C
@@ -25,6 +25,4 @@ main (void)
}
// { dg-output "reference binding to null pointer of type 'int'(\n|\r\n|\r)" }
-// { dg-output "\[^\n\r]*reference binding to null pointer of type 'int'(\n|\r\n|\r)" }
// { dg-output "\[^\n\r]*reference binding to null pointer of type 'const L'(\n|\r\n|\r)" }
-// { dg-output "\[^\n\r]*reference binding to null pointer of type 'int'(\n|\r\n|\r)" }
diff --git gcc/testsuite/g++.dg/ubsan/null-2.C gcc/testsuite/g++.dg/ubsan/null-2.C
index 88f387e..0230c7c 100644
--- gcc/testsuite/g++.dg/ubsan/null-2.C
+++ gcc/testsuite/g++.dg/ubsan/null-2.C
@@ -35,5 +35,3 @@ main (void)
// { dg-output "\.C:26:\[0-9]*:\[\^\n\r]*member call on null pointer of type 'struct U'.*" }
// { dg-output "\.C:29:\[0-9]*:\[\^\n\r]*member call on null pointer of type 'struct V'.*" }
-// { dg-output "\.C:31:\[0-9]*:\[\^\n\r]*member call on null pointer of type 'struct V'.*" }
-// { dg-output "\.C:33:\[0-9]*:\[\^\n\r]*member call on null pointer of type 'struct V'" }
Marek