[PATCH] Optimize UBSAN_NULL checks, add sanopt.c

Marek Polacek polacek@redhat.com
Tue Nov 4 18:36:00 GMT 2014


On Mon, Nov 03, 2014 at 04:35:00PM +0100, Jakub Jelinek wrote:
> Well, in theory you could just script removing them one by one and just
> make sanopt.o after each step to see if the header is or is not needed,
> perhaps with some manual tweeks.

I pruned those headers some more.
 
> Perhaps in preparation for future optimizations (other UBSAN_*
> calls, and ASAN_CHECK and tsan builtins), you should consider
> putting the hash_map into some structure and pass address of that
> structure instead, so that you have all the pass context at the same spot.
> You could put asan_num_accesses in there too, see below.
 
Done.

> I think it would be better to walk the vector backwards instead of forward.
> 1) if you have the same SSA_NAME there multiple times, ignoring the already
>    unnecessary stmts, the only case where you'd have multiple stmts is
>    if the earlier stmts dominate the following stmts and for some reason
>    weren't optimized away; that for some reason currently should be
>    just higher required alignment in the dominated stmt (e.g. say have
>    UBSAN_NULL (foo_23, 0); UBSAN_NULL (foo_23, 2); UBSAN_NULL (foo_23, 4);
>    where the first stmt dominates the second two and second stmt dominates
>    the last one.
> 2) All the si->visited_p stmts should be always at the end of the vector
>    IMHO, preceeded only by !si->visited_p stmts.
> Thus, when walking backwards, first remove the stmts with bb->visited_p,
> once you hit one that doesn't have it set, I believe there shouldn't be any
> further.  And then in theory it should be fine to just compare the last
> stmt in the vector that was left (if any).
 
I agree that should work - so changed.

> Perhaps you could return the asan_num_accesses value computed during
> sanopt_optimize (just count IFN_ASAN_CHECK calls that you don't optimize
> away), and do this only in else if (i.e. when sanopt_optimize has not been
> run).  That way, you'd save one extra IL walk.

Done.

Bootstrap-ubsan/regtest passed on x86_64-linux, ok for trunk?

2014-11-04  Marek Polacek  <polacek@redhat.com>

	* Makefile.in (OBJS): Add sanopt.o.
	(GTFILES): Add sanopt.c.
	* asan.h (asan_expand_check_ifn): Declare.
	* asan.c (asan_expand_check_ifn): No longer static.
	(class pass_sanopt, pass_sanopt::execute, make_pass_sanopt): Move...
	* sanopt.c: ...here.  New file.
testsuite/
	* c-c++-common/ubsan/align-2.c: Remove dg-output.
	* c-c++-common/ubsan/align-4.c: Likewise.
	* g++.dg/ubsan/null-1.C: Likewise.
	* g++.dg/ubsan/null-2.C: Likewise.

diff --git gcc/Makefile.in gcc/Makefile.in
index 9c67fe2..f383032 100644
--- gcc/Makefile.in
+++ gcc/Makefile.in
@@ -1376,6 +1376,7 @@ OBJS = \
 	asan.o \
 	tsan.o \
 	ubsan.o \
+	sanopt.o \
 	tree-call-cdce.o \
 	tree-cfg.o \
 	tree-cfgcleanup.o \
@@ -2305,6 +2306,7 @@ GTFILES = $(CPP_ID_DATA_H) $(srcdir)/input.h $(srcdir)/coretypes.h \
   $(srcdir)/asan.c \
   $(srcdir)/ubsan.c \
   $(srcdir)/tsan.c \
+  $(srcdir)/sanopt.c \
   $(srcdir)/ipa-devirt.c \
   $(srcdir)/internal-fn.h \
   @all_gtfiles@
diff --git gcc/asan.c gcc/asan.c
index 8f146d2..79dede7 100644
--- gcc/asan.c
+++ gcc/asan.c
@@ -2497,7 +2497,7 @@ asan_finish_file (void)
 
 /* Expand the ASAN_{LOAD,STORE} builtins.  */
 
-static bool
+bool
 asan_expand_check_ifn (gimple_stmt_iterator *iter, bool use_calls)
 {
   gimple g = gsi_stmt (*iter);
@@ -2800,114 +2800,4 @@ make_pass_asan_O0 (gcc::context *ctxt)
   return new pass_asan_O0 (ctxt);
 }
 
-/* Perform optimization of sanitize functions.  */
-
-namespace {
-
-const pass_data pass_data_sanopt =
-{
-  GIMPLE_PASS, /* type */
-  "sanopt", /* name */
-  OPTGROUP_NONE, /* optinfo_flags */
-  TV_NONE, /* tv_id */
-  ( PROP_ssa | PROP_cfg | PROP_gimple_leh ), /* properties_required */
-  0, /* properties_provided */
-  0, /* properties_destroyed */
-  0, /* todo_flags_start */
-  TODO_update_ssa, /* todo_flags_finish */
-};
-
-class pass_sanopt : public gimple_opt_pass
-{
-public:
-  pass_sanopt (gcc::context *ctxt)
-    : gimple_opt_pass (pass_data_sanopt, ctxt)
-  {}
-
-  /* opt_pass methods: */
-  virtual bool gate (function *) { return flag_sanitize; }
-  virtual unsigned int execute (function *);
-
-}; // class pass_sanopt
-
-unsigned int
-pass_sanopt::execute (function *fun)
-{
-  basic_block bb;
-
-  int asan_num_accesses = 0;
-  if (flag_sanitize & SANITIZE_ADDRESS)
-    {
-      gimple_stmt_iterator gsi;
-      FOR_EACH_BB_FN (bb, fun)
-	for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
-	  {
- 	    gimple stmt = gsi_stmt (gsi);
-	    if (is_gimple_call (stmt) && gimple_call_internal_p (stmt)
-		&& gimple_call_internal_fn (stmt) == IFN_ASAN_CHECK)
-	      ++asan_num_accesses;
-	  }
-    }
-
-  bool use_calls = ASAN_INSTRUMENTATION_WITH_CALL_THRESHOLD < INT_MAX
-    && asan_num_accesses >= ASAN_INSTRUMENTATION_WITH_CALL_THRESHOLD;
-
-  FOR_EACH_BB_FN (bb, fun)
-    {
-      gimple_stmt_iterator gsi;
-      for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); )
-	{
-	  gimple stmt = gsi_stmt (gsi);
-	  bool no_next = false;
-
-	  if (!is_gimple_call (stmt))
-	    {
-	      gsi_next (&gsi);
-	      continue;
-	    }
-
-	  if (gimple_call_internal_p (stmt))
-	    {
-	      enum internal_fn ifn = gimple_call_internal_fn (stmt);
-	      switch (ifn)
-		{
-		case IFN_UBSAN_NULL:
-		  no_next = ubsan_expand_null_ifn (&gsi);
-		  break;
-		case IFN_UBSAN_BOUNDS:
-		  no_next = ubsan_expand_bounds_ifn (&gsi);
-		  break;
-		case IFN_UBSAN_OBJECT_SIZE:
-		  no_next = ubsan_expand_objsize_ifn (&gsi);
-		  break;
-		case IFN_ASAN_CHECK:
-		  no_next = asan_expand_check_ifn (&gsi, use_calls);
-		  break;
-		default:
-		  break;
-		}
-	    }
-
-	  if (dump_file && (dump_flags & TDF_DETAILS))
-	    {
-	      fprintf (dump_file, "Optimized\n  ");
-	      print_gimple_stmt (dump_file, stmt, 0, dump_flags);
-	      fprintf (dump_file, "\n");
-	    }
-
-	  if (!no_next)
-	    gsi_next (&gsi);
-	}
-    }
-  return 0;
-}
-
-} // anon namespace
-
-gimple_opt_pass *
-make_pass_sanopt (gcc::context *ctxt)
-{
-  return new pass_sanopt (ctxt);
-}
-
 #include "gt-asan.h"
diff --git gcc/asan.h gcc/asan.h
index 8e3f0ba..f448391 100644
--- gcc/asan.h
+++ gcc/asan.h
@@ -28,6 +28,7 @@ extern rtx_insn *asan_emit_stack_protection (rtx, rtx, unsigned int,
 extern bool asan_protect_global (tree);
 extern void initialize_sanitizer_builtins (void);
 extern tree asan_dynamic_init_call (bool);
+extern bool asan_expand_check_ifn (gimple_stmt_iterator *, bool);
 
 extern gimple_stmt_iterator create_cond_insert_point
      (gimple_stmt_iterator *, bool, bool, bool, basic_block *, basic_block *);
diff --git gcc/sanopt.c gcc/sanopt.c
index e69de29..7bfe59e 100644
--- gcc/sanopt.c
+++ gcc/sanopt.c
@@ -0,0 +1,315 @@
+/* Optimize and expand sanitizer functions.
+   Copyright (C) 2014 Free Software Foundation, Inc.
+   Contributed by Marek Polacek <polacek@redhat.com>
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tree.h"
+#include "hash-table.h"
+#include "predict.h"
+#include "vec.h"
+#include "hashtab.h"
+#include "hash-set.h"
+#include "tm.h"
+#include "hard-reg-set.h"
+#include "function.h"
+#include "dominance.h"
+#include "cfg.h"
+#include "basic-block.h"
+#include "tree-ssa-alias.h"
+#include "internal-fn.h"
+#include "gimple-expr.h"
+#include "is-a.h"
+#include "gimple.h"
+#include "gimplify.h"
+#include "gimple-iterator.h"
+#include "hash-map.h"
+#include "plugin-api.h"
+#include "tree-pass.h"
+#include "asan.h"
+#include "gimple-pretty-print.h"
+#include "tm_p.h"
+#include "langhooks.h"
+#include "ubsan.h"
+#include "params.h"
+
+
+/* This is used to carry information about basic blocks.  It is
+   attached to the AUX field of the standard CFG block.  */
+
+struct sanopt_info
+{
+  /* True if this BB has been visited.  */
+  bool visited_p;
+};
+
+/* This is used to carry various hash maps and variables used
+   in sanopt_optimize_walker.  */
+
+struct sanopt_ctx
+{
+  /* This map maps a pointer (the first argument of UBSAN_NULL) to
+     a vector of UBSAN_NULL call statements that check this pointer.  */
+  hash_map<tree, auto_vec<gimple> > null_check_map;
+
+  /* Number of IFN_ASAN_CHECK statements.  */
+  int asan_num_accesses;
+};
+
+
+/* Try to optimize away redundant UBSAN_NULL checks.
+   
+   We walk blocks in the CFG via a depth first search of the dominator
+   tree; we push unique UBSAN_NULL statements into a vector in the
+   NULL_CHECK_MAP as we enter the blocks.  When leaving a block, we
+   mark the block as visited; then when checking the statements in the
+   vector, we ignore statements that are coming from already visited
+   blocks, because these cannot dominate anything anymore.
+   CTX is a sanopt context.  */
+
+static void
+sanopt_optimize_walker (basic_block bb, struct sanopt_ctx *ctx)
+{
+  basic_block son;
+  gimple_stmt_iterator gsi;
+
+  for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi);)
+    {
+      gimple stmt = gsi_stmt (gsi);
+      bool remove = false;
+
+      if (is_gimple_call (stmt)
+	  && gimple_call_internal_p (stmt))
+	switch (gimple_call_internal_fn (stmt))
+	  {
+	  case IFN_UBSAN_NULL:
+	    {
+	      gcc_assert (gimple_call_num_args (stmt) == 3);
+	      tree ptr = gimple_call_arg (stmt, 0);
+	      tree cur_align = gimple_call_arg (stmt, 2);
+	      gcc_assert (TREE_CODE (cur_align) == INTEGER_CST);
+
+	      auto_vec<gimple> &v = ctx->null_check_map.get_or_insert (ptr);
+	      if (v.is_empty ())
+		/* For this PTR we don't have any UBSAN_NULL stmts
+		   recorded, so there's nothing to optimize yet.  */
+		v.safe_push (stmt);
+	      else
+		{
+		  /* We already have recorded a UBSAN_NULL check
+		     for this pointer.  Perhaps we can drop this one.
+		     But only if this check doesn't specify stricter
+		     alignment.  */
+		  int i;
+		  gimple g;
+
+		  FOR_EACH_VEC_ELT_REVERSE (v, i, g)
+		    {
+		      /* Remove statements for BBs that have been
+			 already processed.  */
+		      sanopt_info *si = (sanopt_info *) gimple_bb (g)->aux;
+		      if (si->visited_p)
+			{
+			  v.unordered_remove (i);
+			  continue;
+			}
+		      /* At this point we shouldn't have any statements
+			 that aren't dominating the current BB.  */
+		      tree align = gimple_call_arg (g, 2);
+		      remove = tree_int_cst_le (cur_align, align);
+		      break;
+		    }
+
+		  if (remove)
+		    {
+		      /* Drop this check.  */
+		      if (dump_file && (dump_flags & TDF_DETAILS))
+			{
+			  fprintf (dump_file, "Optimizing out\n  ");
+			  print_gimple_stmt (dump_file, stmt, 0,
+					     dump_flags);
+			  fprintf (dump_file, "\n");
+			}
+		      gsi_remove (&gsi, true);
+		    }
+		  else if (v.length () < 30)
+		    v.safe_push (stmt);
+		  }
+	    }
+	  case IFN_ASAN_CHECK:
+	    ctx->asan_num_accesses++;
+	    break;
+	  default:
+	    break;
+	  }
+
+      /* If we were able to remove the current statement, gsi_remove
+	 already pointed us to the next statement.  */
+      if (!remove)
+	gsi_next (&gsi);
+    }
+
+  for (son = first_dom_son (CDI_DOMINATORS, bb);
+       son;
+       son = next_dom_son (CDI_DOMINATORS, son))
+    sanopt_optimize_walker (son, ctx);
+
+  /* We're leaving this BB, so mark it to that effect.  */
+  sanopt_info *info = (sanopt_info *) bb->aux;
+  info->visited_p = true;
+}
+
+/* Try to remove redundant sanitizer checks in function FUN.  */
+
+static int
+sanopt_optimize (function *fun)
+{
+  struct sanopt_ctx ctx;
+  ctx.asan_num_accesses = 0;
+
+  /* Set up block info for each basic block.  */
+  alloc_aux_for_blocks (sizeof (sanopt_info));
+
+  /* We're going to do a dominator walk, so ensure that we have
+     dominance information.  */
+  calculate_dominance_info (CDI_DOMINATORS);
+
+  /* Recursively walk the dominator tree optimizing away
+     redundant checks.  */
+  sanopt_optimize_walker (ENTRY_BLOCK_PTR_FOR_FN (fun), &ctx);
+
+  free_aux_for_blocks ();
+
+  return ctx.asan_num_accesses;
+}
+
+/* Perform optimization of sanitize functions.  */
+
+namespace {
+
+const pass_data pass_data_sanopt =
+{
+  GIMPLE_PASS, /* type */
+  "sanopt", /* name */
+  OPTGROUP_NONE, /* optinfo_flags */
+  TV_NONE, /* tv_id */
+  ( PROP_ssa | PROP_cfg | PROP_gimple_leh ), /* properties_required */
+  0, /* properties_provided */
+  0, /* properties_destroyed */
+  0, /* todo_flags_start */
+  TODO_update_ssa, /* todo_flags_finish */
+};
+
+class pass_sanopt : public gimple_opt_pass
+{
+public:
+  pass_sanopt (gcc::context *ctxt)
+    : gimple_opt_pass (pass_data_sanopt, ctxt)
+  {}
+
+  /* opt_pass methods: */
+  virtual bool gate (function *) { return flag_sanitize; }
+  virtual unsigned int execute (function *);
+
+}; // class pass_sanopt
+
+unsigned int
+pass_sanopt::execute (function *fun)
+{
+  basic_block bb;
+  int asan_num_accesses = 0;
+
+  /* Try to remove redundant checks.  */
+  if (optimize
+      && (flag_sanitize & (SANITIZE_NULL | SANITIZE_ALIGNMENT)))
+    asan_num_accesses = sanopt_optimize (fun);
+  else if (flag_sanitize & SANITIZE_ADDRESS)
+    {
+      gimple_stmt_iterator gsi;
+      FOR_EACH_BB_FN (bb, fun)
+	for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
+	  {
+ 	    gimple stmt = gsi_stmt (gsi);
+	    if (is_gimple_call (stmt) && gimple_call_internal_p (stmt)
+		&& gimple_call_internal_fn (stmt) == IFN_ASAN_CHECK)
+	      ++asan_num_accesses;
+	  }
+    }
+
+  bool use_calls = ASAN_INSTRUMENTATION_WITH_CALL_THRESHOLD < INT_MAX
+    && asan_num_accesses >= ASAN_INSTRUMENTATION_WITH_CALL_THRESHOLD;
+
+  FOR_EACH_BB_FN (bb, fun)
+    {
+      gimple_stmt_iterator gsi;
+      for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); )
+	{
+	  gimple stmt = gsi_stmt (gsi);
+	  bool no_next = false;
+
+	  if (!is_gimple_call (stmt))
+	    {
+	      gsi_next (&gsi);
+	      continue;
+	    }
+
+	  if (gimple_call_internal_p (stmt))
+	    {
+	      enum internal_fn ifn = gimple_call_internal_fn (stmt);
+	      switch (ifn)
+		{
+		case IFN_UBSAN_NULL:
+		  no_next = ubsan_expand_null_ifn (&gsi);
+		  break;
+		case IFN_UBSAN_BOUNDS:
+		  no_next = ubsan_expand_bounds_ifn (&gsi);
+		  break;
+		case IFN_UBSAN_OBJECT_SIZE:
+		  no_next = ubsan_expand_objsize_ifn (&gsi);
+		  break;
+		case IFN_ASAN_CHECK:
+		  no_next = asan_expand_check_ifn (&gsi, use_calls);
+		  break;
+		default:
+		  break;
+		}
+	    }
+
+	  if (dump_file && (dump_flags & TDF_DETAILS))
+	    {
+	      fprintf (dump_file, "Expanded\n  ");
+	      print_gimple_stmt (dump_file, stmt, 0, dump_flags);
+	      fprintf (dump_file, "\n");
+	    }
+
+	  if (!no_next)
+	    gsi_next (&gsi);
+	}
+    }
+  return 0;
+}
+
+} // anon namespace
+
+gimple_opt_pass *
+make_pass_sanopt (gcc::context *ctxt)
+{
+  return new pass_sanopt (ctxt);
+}
diff --git gcc/testsuite/c-c++-common/ubsan/align-2.c gcc/testsuite/c-c++-common/ubsan/align-2.c
index 071de8c..02a26e2 100644
--- gcc/testsuite/c-c++-common/ubsan/align-2.c
+++ gcc/testsuite/c-c++-common/ubsan/align-2.c
@@ -51,6 +51,4 @@ main ()
 /* { dg-output "\.c:(13|16):\[0-9]*: \[^\n\r]*store to misaligned address 0x\[0-9a-fA-F]* for type 'int', which requires 4 byte alignment.*" } */
 /* { dg-output "\.c:23:\[0-9]*: \[^\n\r]*member access within misaligned address 0x\[0-9a-fA-F]* for type 'struct S', which requires \[48] byte alignment.*" } */
 /* { dg-output "\.c:(29|30):\[0-9]*: \[^\n\r]*member access within misaligned address 0x\[0-9a-fA-F]* for type 'struct S', which requires \[48] byte alignment.*" } */
-/* { dg-output "\.c:30:\[0-9]*: \[^\n\r]*member access within misaligned address 0x\[0-9a-fA-F]* for type 'struct S', which requires \[48] byte alignment.*" } */
-/* { dg-output "\.c:31:\[0-9]*: \[^\n\r]*member access within misaligned address 0x\[0-9a-fA-F]* for type 'struct S', which requires \[48] byte alignment.*" } */
 /* { dg-output "\.c:37:\[0-9]*: \[^\n\r]*load of misaligned address 0x\[0-9a-fA-F]* for type 'long long int', which requires \[48] byte alignment" } */
diff --git gcc/testsuite/c-c++-common/ubsan/align-4.c gcc/testsuite/c-c++-common/ubsan/align-4.c
index 3252595..f010589 100644
--- gcc/testsuite/c-c++-common/ubsan/align-4.c
+++ gcc/testsuite/c-c++-common/ubsan/align-4.c
@@ -9,6 +9,4 @@
 /* { dg-output "\[^\n\r]*\.c:(13|16):\[0-9]*: \[^\n\r]*store to misaligned address 0x\[0-9a-fA-F]* for type 'int', which requires 4 byte alignment.*" } */
 /* { dg-output "\[^\n\r]*\.c:23:\[0-9]*: \[^\n\r]*member access within misaligned address 0x\[0-9a-fA-F]* for type 'struct S', which requires \[48] byte alignment.*" } */
 /* { dg-output "\[^\n\r]*\.c:(29|30):\[0-9]*: \[^\n\r]*member access within misaligned address 0x\[0-9a-fA-F]* for type 'struct S', which requires \[48] byte alignment.*" } */
-/* { dg-output "\[^\n\r]*\.c:30:\[0-9]*: \[^\n\r]*member access within misaligned address 0x\[0-9a-fA-F]* for type 'struct S', which requires \[48] byte alignment.*" } */
-/* { dg-output "\[^\n\r]*\.c:31:\[0-9]*: \[^\n\r]*member access within misaligned address 0x\[0-9a-fA-F]* for type 'struct S', which requires \[48] byte alignment.*" } */
 /* { dg-output "\[^\n\r]*\.c:37:\[0-9]*: \[^\n\r]*load of misaligned address 0x\[0-9a-fA-F]* for type 'long long int', which requires \[48] byte alignment" } */
diff --git gcc/testsuite/g++.dg/ubsan/null-1.C gcc/testsuite/g++.dg/ubsan/null-1.C
index e1524b1..83b3033 100644
--- gcc/testsuite/g++.dg/ubsan/null-1.C
+++ gcc/testsuite/g++.dg/ubsan/null-1.C
@@ -25,6 +25,4 @@ main (void)
 }
 
 // { dg-output "reference binding to null pointer of type 'int'(\n|\r\n|\r)" }
-// { dg-output "\[^\n\r]*reference binding to null pointer of type 'int'(\n|\r\n|\r)" }
 // { dg-output "\[^\n\r]*reference binding to null pointer of type 'const L'(\n|\r\n|\r)" }
-// { dg-output "\[^\n\r]*reference binding to null pointer of type 'int'(\n|\r\n|\r)" }
diff --git gcc/testsuite/g++.dg/ubsan/null-2.C gcc/testsuite/g++.dg/ubsan/null-2.C
index 88f387e..0230c7c 100644
--- gcc/testsuite/g++.dg/ubsan/null-2.C
+++ gcc/testsuite/g++.dg/ubsan/null-2.C
@@ -35,5 +35,3 @@ main (void)
 
 // { dg-output "\.C:26:\[0-9]*:\[\^\n\r]*member call on null pointer of type 'struct U'.*" }
 // { dg-output "\.C:29:\[0-9]*:\[\^\n\r]*member call on null pointer of type 'struct V'.*" }
-// { dg-output "\.C:31:\[0-9]*:\[\^\n\r]*member call on null pointer of type 'struct V'.*" }
-// { dg-output "\.C:33:\[0-9]*:\[\^\n\r]*member call on null pointer of type 'struct V'" }

	Marek



More information about the Gcc-patches mailing list