This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
[PATCH 3/3] [asan] Instrument built-in memory access function calls
- From: Dodji Seketeli <dodji at redhat dot com>
- To: GCC Patches <gcc-patches at gcc dot gnu dot org>
- Cc: Jakub Jelinek <jakub at redhat dot com>, Diego Novillo <dnovillo at google dot com>, Xinliang David Li <davidxl at google dot com>, Wei Mi <wmi at google dot com>
- Date: Tue, 23 Oct 2012 15:11:29 +0200
- Subject: [PATCH 3/3] [asan] Instrument built-in memory access function calls
- References: <87liex8hjt.fsf@redhat.com>
This patch instruments many memory access patterns through builtins.
Basically, for a call like:
__builtin_memset (from, 0, n_bytes);
the patch would only instrument the accesses at the beginning and at
the end of the memory region [from, from + n_bytes]. This is the
strategy used by the llvm implementation of asan.
This instrumentation for all the memory access builtin functions that
expose a well specified memory region -- one that explicitly states
the number of bytes accessed in the region.
Tested by running cc1 -fasan on variations of simple programs like:
void
foo ()
{
char foo[1] = {0};
__builtin_memset (foo, 0, 2);
}
and by starring at the gimple output.
gcc/
* asan.c (insert_if_then_before_iter)
(instrument_mem_region_access)
(maybe_instrument_builtin_call, maybe_instrument_call): New static
functions.
(instrument_assignment): Factorize from ...
(transform_statements): ... here. Use maybe_instrument_call to
instrument (builtin) function calls as well.
---
gcc/asan.c | 328 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++--
1 file changed, 320 insertions(+), 8 deletions(-)
diff --git a/gcc/asan.c b/gcc/asan.c
index aed1a60..a8e3827 100644
--- a/gcc/asan.c
+++ b/gcc/asan.c
@@ -468,6 +468,40 @@ create_cond_insert_point_before_iter (gimple_stmt_iterator *iter,
return gsi_last_bb (cond_bb);
}
+/* Insert an if condition followed by a 'then block' right before the
+ statement pointed to by ITER. The fallthrough block -- which is the
+ else block of the condition as well as the destination of the
+ outcoming edge of the 'then block' -- starts with the statement
+ pointed to by ITER.
+
+ COND is the condition of the if.
+
+ If THEN_MORE_LIKELY_P is true,
+ the the probability of the edge to the 'then block' is higher than
+ the probability of the edge to the fallthrough block.
+
+ Upon completion of the function, *THEN_BB is set to the newly
+ inserted 'then block' and similarly, *FALLTHROUGH_BB is set to the
+ fallthrough block.
+
+ *ITER is adjusted to still point to the same statement it was
+ pointing to initially. */
+
+static void
+insert_if_then_before_iter (gimple cond,
+ gimple_stmt_iterator *iter,
+ bool then_more_likely_p,
+ basic_block *then_bb,
+ basic_block *fallthrough_bb)
+{
+ gimple_stmt_iterator cond_insert_point =
+ create_cond_insert_point_before_iter (iter,
+ then_more_likely_p,
+ then_bb,
+ fallthrough_bb);
+ gsi_insert_after (&cond_insert_point, cond, GSI_NEW_STMT);
+}
+
/* Instrument the memory access instruction BASE. Insert new
statements before ITER.
@@ -628,7 +662,7 @@ build_check_stmt (tree base, gimple_stmt_iterator *iter,
/* If T represents a memory access, add instrumentation code before ITER.
LOCATION is source code location.
- IS_STORE is either 1 (for a store) or 0 (for a load). */
+ IS_STORE is either TRUE (for a store) or FALSE (for a load). */
static void
instrument_derefs (gimple_stmt_iterator *iter, tree t,
@@ -670,6 +704,285 @@ instrument_derefs (gimple_stmt_iterator *iter, tree t,
build_check_stmt (base, iter, location, is_store, size_in_bytes);
}
+/* Instrument an access to a contiguous memory region that starts at
+ the address pointed to by BASE, over a length of LEN (expressed in
+ the sizeof (*BASE) bytes). ITER points to the the instruction
+ before which the instrumentation instructions must be inserted.
+ LOCATION is the source location that the instrumentation
+ instructions must have. If IS_STORE is true, then the memory
+ access is a store; otherwise, it's a load. */
+
+static void
+instrument_mem_region_access (tree base, tree len,
+ gimple_stmt_iterator *iter,
+ location_t location, bool is_store)
+{
+ if (integer_zerop (len))
+ return;
+
+ gimple_stmt_iterator gsi = *iter;
+ tree pointed_to_type = TREE_TYPE (TREE_TYPE (base));
+
+ if (!is_gimple_constant (len))
+ {
+ /* So, the length of the memory area to asan-protect is
+ non-constant. Let's guard the generated instrumentation code
+ like:
+
+ if (len != 0)
+ {
+ //asan instrumentation code goes here.
+ }
+ // falltrough instructions, starting with *ITER. */
+
+ basic_block fallthrough_bb, then_bb;
+ gimple g = gimple_build_cond (NE_EXPR,
+ len,
+ build_int_cst (TREE_TYPE (len), 0),
+ NULL_TREE, NULL_TREE);
+ gimple_set_location (g, location);
+ insert_if_then_before_iter (g, iter, /*then_more_likely_p=*/true,
+ &then_bb, &fallthrough_bb);
+ /* Note that fallthrough_bb starts with the statement that was
+ pointed to by ITER. */
+
+ /* The 'then block' of the 'if (len != 0) condition is where
+ we'll generate the asan instrumentation code now. */
+ gsi = gsi_start_bb (then_bb);
+
+ /* Instrument the beginning of the memory region to be accessed,
+ and arrange for the rest of the intrumentation code to be
+ inserted in the then block *after* the current gsi. */
+ build_check_stmt (base, &gsi, location, is_store,
+ int_size_in_bytes (pointed_to_type));
+ gsi = gsi_last_bb (then_bb);
+ }
+ else
+ {
+ /* Instrument the beginning of the memory region to be
+ accessed. */
+ build_check_stmt (base, iter, location, is_store,
+ int_size_in_bytes (pointed_to_type));
+ gsi = *iter;
+ }
+
+ /* We want to instrument the access at the end of the memory region,
+ which is at (base + len - 1). */
+
+ /* offset = len - 1; */
+ len = unshare_expr (len);
+ gimple offset =
+ gimple_build_assign_with_ops (TREE_CODE (len),
+ make_ssa_name (TREE_TYPE (len), NULL),
+ len, NULL);
+ gimple_set_location (offset, location);
+ gsi_insert_before (&gsi, offset, GSI_NEW_STMT);
+
+ offset =
+ gimple_build_assign_with_ops (MINUS_EXPR,
+ make_ssa_name (size_type_node, NULL),
+ gimple_assign_lhs (offset),
+ build_int_cst (size_type_node, 1));
+ gimple_set_location (offset, location);
+ gsi_insert_after (&gsi, offset, GSI_NEW_STMT);
+
+ /* _1 = base; */
+ base = unshare_expr (base);
+ gimple region_end =
+ gimple_build_assign_with_ops (TREE_CODE (base),
+ make_ssa_name (TREE_TYPE (base), NULL),
+ base, NULL);
+ gimple_set_location (region_end, location);
+ gsi_insert_after (&gsi, region_end, GSI_NEW_STMT);
+
+ /* _2 = _1 + offset; */
+ region_end =
+ gimple_build_assign_with_ops (POINTER_PLUS_EXPR,
+ make_ssa_name (TREE_TYPE (base), NULL),
+ gimple_assign_lhs (region_end),
+ gimple_assign_lhs (offset));
+ gimple_set_location (region_end, location);
+ gsi_insert_after (&gsi, region_end, GSI_NEW_STMT);
+
+ /* instrument access at _2; */
+ gsi_next (&gsi);
+ tree end = gimple_assign_lhs (region_end);
+ build_check_stmt (end, &gsi, location, is_store,
+ int_size_in_bytes (TREE_TYPE (end)));
+}
+
+/* If the statement pointed to by the iterator ITER is a call to a
+ builtin memory access function, instrumented it and return TRUE.
+ Otherwise, return false. */
+
+static bool
+maybe_instrument_builtin_call (gimple_stmt_iterator *iter)
+{
+ gimple call = gsi_stmt (*iter);
+
+ if (!is_gimple_call (call))
+ return false;
+
+ tree callee = gimple_call_fndecl (call);
+
+ if (!is_builtin_fn (callee)
+ || DECL_BUILT_IN_CLASS (callee) != BUILT_IN_NORMAL)
+ return false;
+
+ tree source0 = NULL_TREE, source1 = NULL_TREE,
+ dest = NULL_TREE, len = NULL_TREE;
+
+ switch (DECL_FUNCTION_CODE (callee))
+ {
+ /* (s, s, n) style memops. */
+ case BUILT_IN_BCMP:
+ case BUILT_IN_MEMCMP:
+ /* These cannot be safely instrumented as their length parameter
+ is just a mere limit.
+
+ case BUILT_IN_STRNCASECMP:
+ case BUILT_IN_STRNCMP: */
+ len = gimple_call_arg (call, 2);
+ source0 = gimple_call_arg (call, 0);
+ source1 = gimple_call_arg (call, 1);
+ break;
+
+ /* (src, dest, n) style memops. */
+ case BUILT_IN_BCOPY:
+ len = gimple_call_arg (call, 2);
+ source0 = gimple_call_arg (call, 0);
+ dest = gimple_call_arg (call, 2);
+ break;
+
+ /* (dest, src, n) style memops. */
+ case BUILT_IN_MEMCPY:
+ case BUILT_IN_MEMCPY_CHK:
+ case BUILT_IN_MEMMOVE:
+ case BUILT_IN_MEMMOVE_CHK:
+ case BUILT_IN_MEMPCPY:
+ case BUILT_IN_MEMPCPY_CHK:
+
+ /* The builtin below cannot be safely instrumented as their
+ length parameter is just a mere limit.
+
+ case BUILT_IN_STPNCPY:
+ case BUILT_IN_STPNCPY_CHK:
+ case BUILT_IN_STRNCPY:
+ case BUILT_IN_STRNCPY_CHK: */
+ dest = gimple_call_arg (call, 0);
+ source0 = gimple_call_arg (call, 1);
+ len = gimple_call_arg (call, 2);
+ break;
+
+ /* (dest, src, n) where src is appended to the end of dest.
+ These cannot be instrumented either.
+ case BUILT_IN_STRNCAT:
+ case BUILT_IN_STRNCAT_CHK:
+ break; */
+
+ /* (dest, n) style memops. */
+ case BUILT_IN_BZERO:
+ dest = gimple_call_arg (call, 0);
+ len = gimple_call_arg (call, 1);
+ break;
+
+ /* (dest, x, n) style memops*/
+ case BUILT_IN_MEMSET:
+ case BUILT_IN_MEMSET_CHK:
+ dest = gimple_call_arg (call, 0);
+ len = gimple_call_arg (call, 2);
+ break;
+
+ /* (src, n) style memops. */
+ case BUILT_IN_STRNDUP:
+ source0 = gimple_call_arg (call, 0);
+ len = gimple_call_arg (call, 1);
+ break;
+
+ /* (src, x, n) style memops. */
+ case BUILT_IN_MEMCHR:
+ source0 = gimple_call_arg (call, 0);
+ len = gimple_call_arg (call, 2);
+ break;
+
+ /* memops that have no length information.
+ case BUILT_IN_INDEX:
+ case BUILT_IN_RINDEX:
+ case BUILT_IN_STRCHR:
+ case BUILT_IN_STRDUP:
+ case BUILT_IN_STRLEN:
+ case BUILT_IN_STRRCHR:
+ break; */
+
+ /* (dest, src) and no length info.
+ case BUILT_IN_STPCPY:
+ case BUILT_IN_STPCPY_CHK:
+ case BUILT_IN_STRCASECMP:
+ case BUILT_IN_STRCAT:
+ case BUILT_IN_STRCAT_CHK:
+ case BUILT_IN_STRCPY:
+ case BUILT_IN_STRCPY_CHK:
+ break; */
+
+ /* (s, s) and no length info.
+ case BUILT_IN_STRCMP:
+ case BUILT_IN_STRCSPN:
+ case BUILT_IN_STRPBRK:
+ case BUILT_IN_STRSPN:
+ case BUILT_IN_STRSTR:
+ break; */
+
+ default:
+ break;
+ }
+
+ if (len != NULL_TREE)
+ {
+ if (source0 != NULL_TREE)
+ instrument_mem_region_access (source0, len, iter,
+ gimple_location (call),
+ /*is_store=*/false);
+ if (source1 != NULL_TREE)
+ instrument_mem_region_access (source1, len, iter,
+ gimple_location (call),
+ /*is_store=*/false);
+ else if (dest != NULL_TREE)
+ instrument_mem_region_access (dest, len, iter,
+ gimple_location (call),
+ /*is_store=*/true);
+ return true;
+ }
+
+ return false;
+}
+
+/* Instrument the assignment statement ITER if it is subject to
+ instrumentation. */
+
+static void
+instrument_assignment (gimple_stmt_iterator *iter)
+{
+ gimple s = gsi_stmt (*iter);
+
+ gcc_assert (gimple_assign_single_p (s));
+
+ instrument_derefs (iter, gimple_assign_lhs (s),
+ gimple_location (s), true);
+ instrument_derefs (iter, gimple_assign_rhs1 (s),
+ gimple_location (s), false);
+}
+
+/* Instrument the function call pointed to by the iterator ITER, if it
+ is subject to instrumentation. At the moment, the only function
+ calls that are instrumented are some built-in functions that access
+ memory. Look at maybe_instrument_builtin_call to learn more. */
+
+static void
+maybe_instrument_call (gimple_stmt_iterator *iter)
+{
+ maybe_instrument_builtin_call (iter);
+}
+
/* asan: this looks too complex. Can this be done simpler? */
/* Transform
1) Memory references.
@@ -688,13 +1001,12 @@ transform_statements (void)
if (bb->index >= saved_last_basic_block) continue;
for (i = gsi_start_bb (bb); !gsi_end_p (i); gsi_next (&i))
{
- gimple s = gsi_stmt (i);
- if (!gimple_assign_single_p (s))
- continue;
- instrument_derefs (&i, gimple_assign_lhs (s),
- gimple_location (s), true);
- instrument_derefs (&i, gimple_assign_rhs1 (s),
- gimple_location (s), false);
+ gimple s = gsi_stmt (i);
+
+ if (gimple_assign_single_p (s))
+ instrument_assignment (&i);
+ else if (is_gimple_call (s))
+ maybe_instrument_call (&i);
}
}
}
--
Dodji