This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[PATCH 3/3] [asan] Instrument built-in memory access function calls


This patch instruments many memory access patterns through builtins.

Basically, for a call like:

     __builtin_memset (from, 0, n_bytes);

the patch would only instrument the accesses at the beginning and at
the end of the memory region [from, from + n_bytes].  This is the
strategy used by the llvm implementation of asan.

This instrumentation for all the memory access builtin functions that
expose a well specified memory region -- one that explicitly states
the number of bytes accessed in the region.

Tested by running cc1 -fasan on variations of simple programs like:

    void
    foo ()
    {
      char foo[1] = {0};

      __builtin_memset (foo, 0, 2);
    }

and by starring at the gimple output.

gcc/
	* asan.c (insert_if_then_before_iter)
	(instrument_mem_region_access)
	(maybe_instrument_builtin_call, maybe_instrument_call): New static
	functions.
	(instrument_assignment): Factorize from ...
	(transform_statements): ... here.  Use maybe_instrument_call to
	instrument (builtin) function calls as well.
---
 gcc/asan.c | 328 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 320 insertions(+), 8 deletions(-)

diff --git a/gcc/asan.c b/gcc/asan.c
index aed1a60..a8e3827 100644
--- a/gcc/asan.c
+++ b/gcc/asan.c
@@ -468,6 +468,40 @@ create_cond_insert_point_before_iter (gimple_stmt_iterator *iter,
   return gsi_last_bb (cond_bb);
 }
 
+/* Insert an if condition followed by a 'then block' right before the
+   statement pointed to by ITER.  The fallthrough block -- which is the
+   else block of the condition as well as the destination of the
+   outcoming edge of the 'then block' -- starts with the statement
+   pointed to by ITER.
+
+   COND is the condition of the if.  
+
+   If THEN_MORE_LIKELY_P is true,
+   the the probability of the edge to the 'then block' is higher than
+   the probability of the edge to the fallthrough block.
+
+   Upon completion of the function, *THEN_BB is set to the newly
+   inserted 'then block' and similarly, *FALLTHROUGH_BB is set to the
+   fallthrough block.
+
+   *ITER is adjusted to still point to the same statement it was
+   pointing to initially.  */
+
+static void
+insert_if_then_before_iter (gimple cond,
+			    gimple_stmt_iterator *iter,
+			    bool then_more_likely_p,
+			    basic_block *then_bb,
+			    basic_block *fallthrough_bb)
+{
+  gimple_stmt_iterator cond_insert_point =
+    create_cond_insert_point_before_iter (iter,
+					  then_more_likely_p,
+					  then_bb,
+					  fallthrough_bb);
+  gsi_insert_after (&cond_insert_point, cond, GSI_NEW_STMT);
+}
+
 /* Instrument the memory access instruction BASE.  Insert new
    statements before ITER.
 
@@ -628,7 +662,7 @@ build_check_stmt (tree base, gimple_stmt_iterator *iter,
 
 /* If T represents a memory access, add instrumentation code before ITER.
    LOCATION is source code location.
-   IS_STORE is either 1 (for a store) or 0 (for a load).  */
+   IS_STORE is either TRUE (for a store) or FALSE (for a load).  */
 
 static void
 instrument_derefs (gimple_stmt_iterator *iter, tree t,
@@ -670,6 +704,285 @@ instrument_derefs (gimple_stmt_iterator *iter, tree t,
   build_check_stmt (base, iter, location, is_store, size_in_bytes);
 }
 
+/* Instrument an access to a contiguous memory region that starts at
+   the address pointed to by BASE, over a length of LEN (expressed in
+   the sizeof (*BASE) bytes).  ITER points to the the instruction
+   before which the instrumentation instructions must be inserted.
+   LOCATION  is the source location that the instrumentation
+   instructions must have.  If IS_STORE is true, then the memory
+   access is a store; otherwise, it's a load.  */
+
+static void
+instrument_mem_region_access (tree base, tree len,
+			      gimple_stmt_iterator *iter,
+			      location_t location, bool is_store)
+{
+  if (integer_zerop (len))
+    return;
+
+  gimple_stmt_iterator gsi = *iter;
+  tree pointed_to_type = TREE_TYPE (TREE_TYPE (base));
+
+  if (!is_gimple_constant (len))
+    {
+      /* So, the length of the memory area to asan-protect is
+	 non-constant.  Let's guard the generated instrumentation code
+	 like:
+
+	 if (len != 0)
+	   {
+	     //asan instrumentation code goes here.
+           }
+	   // falltrough instructions, starting with *ITER.  */
+
+      basic_block fallthrough_bb, then_bb;
+      gimple g = gimple_build_cond (NE_EXPR,
+				    len,
+				    build_int_cst (TREE_TYPE (len), 0),
+				    NULL_TREE, NULL_TREE);
+      gimple_set_location (g, location);
+      insert_if_then_before_iter (g, iter, /*then_more_likely_p=*/true,
+				  &then_bb, &fallthrough_bb);
+      /* Note that fallthrough_bb starts with the statement that was
+	 pointed to by ITER.  */
+
+      /* The 'then block' of the 'if (len != 0) condition is where
+	 we'll generate the asan instrumentation code now.  */
+      gsi = gsi_start_bb (then_bb);
+
+      /* Instrument the beginning of the memory region to be accessed,
+	 and arrange for the rest of the intrumentation code to be
+	 inserted in the then block *after* the current gsi.  */
+      build_check_stmt (base, &gsi, location, is_store,
+			int_size_in_bytes (pointed_to_type));
+      gsi = gsi_last_bb (then_bb);
+    }
+  else
+    {
+      /* Instrument the beginning of the memory region to be
+	 accessed.  */
+      build_check_stmt (base, iter, location, is_store,
+			int_size_in_bytes (pointed_to_type));
+      gsi = *iter;
+    }
+
+  /* We want to instrument the access at the end of the memory region,
+     which is at (base + len - 1).  */
+
+  /* offset = len - 1;  */
+  len = unshare_expr (len);
+  gimple offset =
+    gimple_build_assign_with_ops (TREE_CODE (len),
+				  make_ssa_name (TREE_TYPE (len), NULL),
+				  len, NULL);
+  gimple_set_location (offset, location);
+  gsi_insert_before (&gsi, offset, GSI_NEW_STMT);
+
+  offset =
+    gimple_build_assign_with_ops (MINUS_EXPR,
+				  make_ssa_name (size_type_node, NULL),
+				  gimple_assign_lhs (offset),
+				  build_int_cst (size_type_node, 1));
+  gimple_set_location (offset, location);
+  gsi_insert_after (&gsi, offset, GSI_NEW_STMT);
+
+  /* _1 = base;  */
+  base = unshare_expr (base);
+  gimple region_end =
+    gimple_build_assign_with_ops (TREE_CODE (base),
+				  make_ssa_name (TREE_TYPE (base), NULL),
+				  base, NULL);
+  gimple_set_location (region_end, location);
+  gsi_insert_after (&gsi, region_end, GSI_NEW_STMT);
+
+  /* _2 = _1 + offset;  */
+  region_end =
+    gimple_build_assign_with_ops (POINTER_PLUS_EXPR,
+				  make_ssa_name (TREE_TYPE (base), NULL),
+				  gimple_assign_lhs (region_end), 
+				  gimple_assign_lhs (offset));
+  gimple_set_location (region_end, location);
+  gsi_insert_after (&gsi, region_end, GSI_NEW_STMT);
+
+  /* instrument access at _2;  */
+  gsi_next (&gsi);
+  tree end = gimple_assign_lhs (region_end);
+  build_check_stmt (end, &gsi, location, is_store,
+		    int_size_in_bytes (TREE_TYPE (end)));
+}
+
+/* If the statement pointed to by the iterator ITER is a call to a
+   builtin memory access function, instrumented it and return TRUE.
+   Otherwise, return false.  */
+
+static bool
+maybe_instrument_builtin_call (gimple_stmt_iterator *iter)
+{
+  gimple call = gsi_stmt (*iter);
+
+  if (!is_gimple_call (call))
+    return false;
+
+  tree callee = gimple_call_fndecl (call);
+
+  if (!is_builtin_fn (callee)
+      || DECL_BUILT_IN_CLASS (callee) != BUILT_IN_NORMAL)
+    return false;
+
+  tree source0 = NULL_TREE, source1 = NULL_TREE,
+    dest = NULL_TREE, len = NULL_TREE;
+
+  switch (DECL_FUNCTION_CODE (callee))
+    {
+      /* (s, s, n) style memops.  */
+    case BUILT_IN_BCMP:
+    case BUILT_IN_MEMCMP:
+      /* These cannot be safely instrumented as their length parameter
+         is just a mere limit.
+
+    case BUILT_IN_STRNCASECMP:
+    case BUILT_IN_STRNCMP:  */
+      len = gimple_call_arg (call, 2);
+      source0 = gimple_call_arg (call, 0);
+      source1 = gimple_call_arg (call, 1);
+      break;
+
+      /* (src, dest, n) style memops.  */
+    case BUILT_IN_BCOPY:
+      len = gimple_call_arg (call, 2);
+      source0 = gimple_call_arg (call, 0);
+      dest = gimple_call_arg (call, 2);
+      break;
+
+      /* (dest, src, n) style memops.  */
+    case BUILT_IN_MEMCPY:
+    case BUILT_IN_MEMCPY_CHK:
+    case BUILT_IN_MEMMOVE:
+    case BUILT_IN_MEMMOVE_CHK:
+    case BUILT_IN_MEMPCPY:
+    case BUILT_IN_MEMPCPY_CHK:
+
+      /* The builtin below cannot be safely instrumented as their
+         length parameter is just a mere limit.
+
+    case BUILT_IN_STPNCPY:
+    case BUILT_IN_STPNCPY_CHK:
+    case BUILT_IN_STRNCPY:
+    case BUILT_IN_STRNCPY_CHK: */
+      dest = gimple_call_arg (call, 0);
+      source0 = gimple_call_arg (call, 1);
+      len = gimple_call_arg (call, 2);
+      break;
+
+      /* (dest, src, n) where src is appended to the end of dest.
+	 These cannot be instrumented either.
+    case BUILT_IN_STRNCAT:
+    case BUILT_IN_STRNCAT_CHK:
+      break;  */
+
+      /* (dest, n) style memops.  */
+    case BUILT_IN_BZERO:
+      dest = gimple_call_arg (call, 0);
+      len = gimple_call_arg (call, 1);
+      break;
+
+      /* (dest, x, n) style memops*/
+    case BUILT_IN_MEMSET:
+    case BUILT_IN_MEMSET_CHK:
+      dest = gimple_call_arg (call, 0);
+      len = gimple_call_arg (call, 2);
+      break;
+
+      /* (src, n) style memops.  */
+    case BUILT_IN_STRNDUP:
+      source0 = gimple_call_arg (call, 0);
+      len = gimple_call_arg (call, 1);
+      break;
+
+      /* (src, x, n) style memops.  */      
+    case BUILT_IN_MEMCHR:
+      source0 = gimple_call_arg (call, 0);
+      len = gimple_call_arg (call, 2);
+    break;
+
+      /* memops that have no length information.
+    case BUILT_IN_INDEX:
+    case BUILT_IN_RINDEX:
+    case BUILT_IN_STRCHR:
+    case BUILT_IN_STRDUP:
+    case BUILT_IN_STRLEN:
+    case BUILT_IN_STRRCHR:
+      break;  */
+
+      /* (dest, src) and no length info.
+    case BUILT_IN_STPCPY:
+    case BUILT_IN_STPCPY_CHK:
+    case BUILT_IN_STRCASECMP:
+    case BUILT_IN_STRCAT:
+    case BUILT_IN_STRCAT_CHK:
+    case BUILT_IN_STRCPY:
+    case BUILT_IN_STRCPY_CHK:
+      break;  */
+
+      /* (s, s) and no length info.
+    case BUILT_IN_STRCMP:
+    case BUILT_IN_STRCSPN:
+    case BUILT_IN_STRPBRK:
+    case BUILT_IN_STRSPN:
+    case BUILT_IN_STRSTR:
+      break;  */
+
+    default:
+      break;
+    }
+
+  if (len != NULL_TREE)
+    {
+      if (source0 != NULL_TREE)
+	instrument_mem_region_access (source0, len, iter,
+				      gimple_location (call),
+				      /*is_store=*/false);
+      if (source1 != NULL_TREE)
+	instrument_mem_region_access (source1, len, iter,
+				      gimple_location (call),
+				      /*is_store=*/false);
+      else if (dest != NULL_TREE)
+	instrument_mem_region_access (dest, len, iter,
+				      gimple_location (call),
+				      /*is_store=*/true);
+      return true;
+    }
+
+  return false;
+}
+
+/*  Instrument the assignment statement ITER if it is subject to
+    instrumentation.  */
+
+static void
+instrument_assignment (gimple_stmt_iterator *iter)
+{
+  gimple s = gsi_stmt (*iter);
+
+  gcc_assert (gimple_assign_single_p (s));
+
+  instrument_derefs (iter, gimple_assign_lhs (s),
+		     gimple_location (s), true);
+  instrument_derefs (iter, gimple_assign_rhs1 (s),
+		     gimple_location (s), false);
+}
+
+/* Instrument the function call pointed to by the iterator ITER, if it
+   is subject to instrumentation.  At the moment, the only function
+   calls that are instrumented are some built-in functions that access
+   memory.  Look at maybe_instrument_builtin_call to learn more.  */
+
+static void
+maybe_instrument_call (gimple_stmt_iterator *iter)
+{
+  maybe_instrument_builtin_call (iter);
+}
+
 /* asan: this looks too complex. Can this be done simpler? */
 /* Transform
    1) Memory references.
@@ -688,13 +1001,12 @@ transform_statements (void)
       if (bb->index >= saved_last_basic_block) continue;
       for (i = gsi_start_bb (bb); !gsi_end_p (i); gsi_next (&i))
         {
-          gimple s = gsi_stmt (i);
-          if (!gimple_assign_single_p (s))
-	    continue;
-          instrument_derefs (&i, gimple_assign_lhs (s),
-                             gimple_location (s), true);
-          instrument_derefs (&i, gimple_assign_rhs1 (s),
-                             gimple_location (s), false);
+	  gimple s = gsi_stmt (i);
+
+	  if (gimple_assign_single_p (s))
+	    instrument_assignment (&i);
+	  else if (is_gimple_call (s))
+	    maybe_instrument_call (&i);
         }
     }
 }
-- 
		Dodji


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]