This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: [PATCH 3/3] [asan] Instrument built-in memory access function calls


On Tue, Oct 23, 2012 at 6:11 AM, Dodji Seketeli <dodji@redhat.com> wrote:
> This patch instruments many memory access patterns through builtins.
>
> Basically, for a call like:
>
>      __builtin_memset (from, 0, n_bytes);
>
> the patch would only instrument the accesses at the beginning and at
> the end of the memory region [from, from + n_bytes].  This is the
> strategy used by the llvm implementation of asan.
>
> This instrumentation for all the memory access builtin functions that
> expose a well specified memory region -- one that explicitly states
> the number of bytes accessed in the region.
>
> Tested by running cc1 -fasan on variations of simple programs like:
>
>     void
>     foo ()
>     {
>       char foo[1] = {0};
>
>       __builtin_memset (foo, 0, 2);
>     }
>
> and by starring at the gimple output.
>
> gcc/
>         * asan.c (insert_if_then_before_iter)
>         (instrument_mem_region_access)
>         (maybe_instrument_builtin_call, maybe_instrument_call): New static
>         functions.
>         (instrument_assignment): Factorize from ...
>         (transform_statements): ... here.  Use maybe_instrument_call to
>         instrument (builtin) function calls as well.
> ---
>  gcc/asan.c | 328 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++--
>  1 file changed, 320 insertions(+), 8 deletions(-)
>
> diff --git a/gcc/asan.c b/gcc/asan.c
> index aed1a60..a8e3827 100644
> --- a/gcc/asan.c
> +++ b/gcc/asan.c
> @@ -468,6 +468,40 @@ create_cond_insert_point_before_iter (gimple_stmt_iterator *iter,
>    return gsi_last_bb (cond_bb);
>  }
>
> +/* Insert an if condition followed by a 'then block' right before the
> +   statement pointed to by ITER.  The fallthrough block -- which is the
> +   else block of the condition as well as the destination of the
> +   outcoming edge of the 'then block' -- starts with the statement
> +   pointed to by ITER.
> +
> +   COND is the condition of the if.
> +
> +   If THEN_MORE_LIKELY_P is true,
> +   the the probability of the edge to the 'then block' is higher than
> +   the probability of the edge to the fallthrough block.
> +
> +   Upon completion of the function, *THEN_BB is set to the newly
> +   inserted 'then block' and similarly, *FALLTHROUGH_BB is set to the
> +   fallthrough block.
> +
> +   *ITER is adjusted to still point to the same statement it was
> +   pointing to initially.  */
> +
> +static void
> +insert_if_then_before_iter (gimple cond,
> +                           gimple_stmt_iterator *iter,
> +                           bool then_more_likely_p,
> +                           basic_block *then_bb,
> +                           basic_block *fallthrough_bb)
> +{
> +  gimple_stmt_iterator cond_insert_point =
> +    create_cond_insert_point_before_iter (iter,
> +                                         then_more_likely_p,
> +                                         then_bb,
> +                                         fallthrough_bb);
> +  gsi_insert_after (&cond_insert_point, cond, GSI_NEW_STMT);
> +}
> +
>  /* Instrument the memory access instruction BASE.  Insert new
>     statements before ITER.
>
> @@ -628,7 +662,7 @@ build_check_stmt (tree base, gimple_stmt_iterator *iter,
>
>  /* If T represents a memory access, add instrumentation code before ITER.
>     LOCATION is source code location.
> -   IS_STORE is either 1 (for a store) or 0 (for a load).  */
> +   IS_STORE is either TRUE (for a store) or FALSE (for a load).  */
>
>  static void
>  instrument_derefs (gimple_stmt_iterator *iter, tree t,
> @@ -670,6 +704,285 @@ instrument_derefs (gimple_stmt_iterator *iter, tree t,
>    build_check_stmt (base, iter, location, is_store, size_in_bytes);
>  }
>
> +/* Instrument an access to a contiguous memory region that starts at
> +   the address pointed to by BASE, over a length of LEN (expressed in
> +   the sizeof (*BASE) bytes).  ITER points to the the instruction
> +   before which the instrumentation instructions must be inserted.
> +   LOCATION  is the source location that the instrumentation
> +   instructions must have.  If IS_STORE is true, then the memory
> +   access is a store; otherwise, it's a load.  */
> +
> +static void
> +instrument_mem_region_access (tree base, tree len,
> +                             gimple_stmt_iterator *iter,
> +                             location_t location, bool is_store)
> +{
> +  if (integer_zerop (len))
> +    return;
> +
> +  gimple_stmt_iterator gsi = *iter;
> +  tree pointed_to_type = TREE_TYPE (TREE_TYPE (base));
> +
> +  if (!is_gimple_constant (len))
> +    {
> +      /* So, the length of the memory area to asan-protect is
> +        non-constant.  Let's guard the generated instrumentation code
> +        like:
> +
> +        if (len != 0)
> +          {
> +            //asan instrumentation code goes here.
> +           }
> +          // falltrough instructions, starting with *ITER.  */
> +
> +      basic_block fallthrough_bb, then_bb;
> +      gimple g = gimple_build_cond (NE_EXPR,
> +                                   len,
> +                                   build_int_cst (TREE_TYPE (len), 0),
> +                                   NULL_TREE, NULL_TREE);
> +      gimple_set_location (g, location);
> +      insert_if_then_before_iter (g, iter, /*then_more_likely_p=*/true,
> +                                 &then_bb, &fallthrough_bb);
> +      /* Note that fallthrough_bb starts with the statement that was
> +        pointed to by ITER.  */
> +
> +      /* The 'then block' of the 'if (len != 0) condition is where
> +        we'll generate the asan instrumentation code now.  */
> +      gsi = gsi_start_bb (then_bb);
> +
> +      /* Instrument the beginning of the memory region to be accessed,
> +        and arrange for the rest of the intrumentation code to be
> +        inserted in the then block *after* the current gsi.  */
> +      build_check_stmt (base, &gsi, location, is_store,
> +                       int_size_in_bytes (pointed_to_type));
> +      gsi = gsi_last_bb (then_bb);
> +    }
> +  else
> +    {
> +      /* Instrument the beginning of the memory region to be
> +        accessed.  */
> +      build_check_stmt (base, iter, location, is_store,
> +                       int_size_in_bytes (pointed_to_type));
> +      gsi = *iter;
> +    }
> +
> +  /* We want to instrument the access at the end of the memory region,
> +     which is at (base + len - 1).  */
> +
> +  /* offset = len - 1;  */
> +  len = unshare_expr (len);
> +  gimple offset =
> +    gimple_build_assign_with_ops (TREE_CODE (len),
> +                                 make_ssa_name (TREE_TYPE (len), NULL),
> +                                 len, NULL);
> +  gimple_set_location (offset, location);
> +  gsi_insert_before (&gsi, offset, GSI_NEW_STMT);
> +
> +  offset =
> +    gimple_build_assign_with_ops (MINUS_EXPR,
> +                                 make_ssa_name (size_type_node, NULL),
> +                                 gimple_assign_lhs (offset),
> +                                 build_int_cst (size_type_node, 1));
> +  gimple_set_location (offset, location);
> +  gsi_insert_after (&gsi, offset, GSI_NEW_STMT);
> +
> +  /* _1 = base;  */
> +  base = unshare_expr (base);
> +  gimple region_end =
> +    gimple_build_assign_with_ops (TREE_CODE (base),
> +                                 make_ssa_name (TREE_TYPE (base), NULL),
> +                                 base, NULL);
> +  gimple_set_location (region_end, location);
> +  gsi_insert_after (&gsi, region_end, GSI_NEW_STMT);
> +
> +  /* _2 = _1 + offset;  */
> +  region_end =
> +    gimple_build_assign_with_ops (POINTER_PLUS_EXPR,
> +                                 make_ssa_name (TREE_TYPE (base), NULL),
> +                                 gimple_assign_lhs (region_end),
> +                                 gimple_assign_lhs (offset));
> +  gimple_set_location (region_end, location);
> +  gsi_insert_after (&gsi, region_end, GSI_NEW_STMT);
> +
> +  /* instrument access at _2;  */
> +  gsi_next (&gsi);
> +  tree end = gimple_assign_lhs (region_end);
> +  build_check_stmt (end, &gsi, location, is_store,
> +                   int_size_in_bytes (TREE_TYPE (end)));
> +}
> +
> +/* If the statement pointed to by the iterator ITER is a call to a
> +   builtin memory access function, instrumented it and return TRUE.
> +   Otherwise, return false.  */
> +
> +static bool
> +maybe_instrument_builtin_call (gimple_stmt_iterator *iter)
> +{
> +  gimple call = gsi_stmt (*iter);
> +
> +  if (!is_gimple_call (call))
> +    return false;
> +
> +  tree callee = gimple_call_fndecl (call);
> +
> +  if (!is_builtin_fn (callee)
> +      || DECL_BUILT_IN_CLASS (callee) != BUILT_IN_NORMAL)
> +    return false;
> +
> +  tree source0 = NULL_TREE, source1 = NULL_TREE,
> +    dest = NULL_TREE, len = NULL_TREE;
> +
> +  switch (DECL_FUNCTION_CODE (callee))
> +    {
> +      /* (s, s, n) style memops.  */
> +    case BUILT_IN_BCMP:
> +    case BUILT_IN_MEMCMP:
> +      /* These cannot be safely instrumented as their length parameter
> +         is just a mere limit.
> +
> +    case BUILT_IN_STRNCASECMP:
> +    case BUILT_IN_STRNCMP:  */
> +      len = gimple_call_arg (call, 2);
> +      source0 = gimple_call_arg (call, 0);
> +      source1 = gimple_call_arg (call, 1);
> +      break;
> +
> +      /* (src, dest, n) style memops.  */
> +    case BUILT_IN_BCOPY:
> +      len = gimple_call_arg (call, 2);
> +      source0 = gimple_call_arg (call, 0);
> +      dest = gimple_call_arg (call, 2);
> +      break;
> +
> +      /* (dest, src, n) style memops.  */
> +    case BUILT_IN_MEMCPY:
> +    case BUILT_IN_MEMCPY_CHK:
> +    case BUILT_IN_MEMMOVE:
> +    case BUILT_IN_MEMMOVE_CHK:
> +    case BUILT_IN_MEMPCPY:
> +    case BUILT_IN_MEMPCPY_CHK:
> +
> +      /* The builtin below cannot be safely instrumented as their
> +         length parameter is just a mere limit.
> +

Why can't the following be instrumented? The length is min (n, strlen (str)).


> +    case BUILT_IN_STPNCPY:
> +    case BUILT_IN_STPNCPY_CHK:
> +    case BUILT_IN_STRNCPY:
> +    case BUILT_IN_STRNCPY_CHK: */
> +      dest = gimple_call_arg (call, 0);
> +      source0 = gimple_call_arg (call, 1);
> +      len = gimple_call_arg (call, 2);
> +      break;
> +
> +      /* (dest, src, n) where src is appended to the end of dest.
> +        These cannot be instrumented either.
> +    case BUILT_IN_STRNCAT:
> +    case BUILT_IN_STRNCAT_CHK:
> +      break;  */
> +
> +      /* (dest, n) style memops.  */
> +    case BUILT_IN_BZERO:
> +      dest = gimple_call_arg (call, 0);
> +      len = gimple_call_arg (call, 1);
> +      break;
> +
> +      /* (dest, x, n) style memops*/
> +    case BUILT_IN_MEMSET:
> +    case BUILT_IN_MEMSET_CHK:
> +      dest = gimple_call_arg (call, 0);
> +      len = gimple_call_arg (call, 2);
> +      break;
> +
> +      /* (src, n) style memops.  */
> +    case BUILT_IN_STRNDUP:
> +      source0 = gimple_call_arg (call, 0);
> +      len = gimple_call_arg (call, 1);
> +      break;
> +
> +      /* (src, x, n) style memops.  */
> +    case BUILT_IN_MEMCHR:
> +      source0 = gimple_call_arg (call, 0);
> +      len = gimple_call_arg (call, 2);
> +    break;
> +
> +      /* memops that have no length information.
> +    case BUILT_IN_INDEX:
> +    case BUILT_IN_RINDEX:
> +    case BUILT_IN_STRCHR:
> +    case BUILT_IN_STRDUP:
> +    case BUILT_IN_STRLEN:
> +    case BUILT_IN_STRRCHR:
> +      break;  */


For 'strlen', can the memory check be done at the end of the string
using the returned length?

For strchr, the instrumentation can be done at the returned pointer, right?

thanks,

David


> +
> +      /* (dest, src) and no length info.
> +    case BUILT_IN_STPCPY:
> +    case BUILT_IN_STPCPY_CHK:
> +    case BUILT_IN_STRCASECMP:
> +    case BUILT_IN_STRCAT:
> +    case BUILT_IN_STRCAT_CHK:
> +    case BUILT_IN_STRCPY:
> +    case BUILT_IN_STRCPY_CHK:
> +      break;  */
> +
> +      /* (s, s) and no length info.
> +    case BUILT_IN_STRCMP:
> +    case BUILT_IN_STRCSPN:
> +    case BUILT_IN_STRPBRK:
> +    case BUILT_IN_STRSPN:
> +    case BUILT_IN_STRSTR:
> +      break;  */
> +
> +    default:
> +      break;
> +    }
> +
> +  if (len != NULL_TREE)
> +    {
> +      if (source0 != NULL_TREE)
> +       instrument_mem_region_access (source0, len, iter,
> +                                     gimple_location (call),
> +                                     /*is_store=*/false);
> +      if (source1 != NULL_TREE)
> +       instrument_mem_region_access (source1, len, iter,
> +                                     gimple_location (call),
> +                                     /*is_store=*/false);
> +      else if (dest != NULL_TREE)
> +       instrument_mem_region_access (dest, len, iter,
> +                                     gimple_location (call),
> +                                     /*is_store=*/true);
> +      return true;
> +    }
> +
> +  return false;
> +}
> +
> +/*  Instrument the assignment statement ITER if it is subject to
> +    instrumentation.  */
> +
> +static void
> +instrument_assignment (gimple_stmt_iterator *iter)
> +{
> +  gimple s = gsi_stmt (*iter);
> +
> +  gcc_assert (gimple_assign_single_p (s));
> +
> +  instrument_derefs (iter, gimple_assign_lhs (s),
> +                    gimple_location (s), true);
> +  instrument_derefs (iter, gimple_assign_rhs1 (s),
> +                    gimple_location (s), false);
> +}
> +
> +/* Instrument the function call pointed to by the iterator ITER, if it
> +   is subject to instrumentation.  At the moment, the only function
> +   calls that are instrumented are some built-in functions that access
> +   memory.  Look at maybe_instrument_builtin_call to learn more.  */
> +
> +static void
> +maybe_instrument_call (gimple_stmt_iterator *iter)
> +{
> +  maybe_instrument_builtin_call (iter);
> +}
> +
>  /* asan: this looks too complex. Can this be done simpler? */
>  /* Transform
>     1) Memory references.
> @@ -688,13 +1001,12 @@ transform_statements (void)
>        if (bb->index >= saved_last_basic_block) continue;
>        for (i = gsi_start_bb (bb); !gsi_end_p (i); gsi_next (&i))
>          {
> -          gimple s = gsi_stmt (i);
> -          if (!gimple_assign_single_p (s))
> -           continue;
> -          instrument_derefs (&i, gimple_assign_lhs (s),
> -                             gimple_location (s), true);
> -          instrument_derefs (&i, gimple_assign_rhs1 (s),
> -                             gimple_location (s), false);
> +         gimple s = gsi_stmt (i);
> +
> +         if (gimple_assign_single_p (s))
> +           instrument_assignment (&i);
> +         else if (is_gimple_call (s))
> +           maybe_instrument_call (&i);
>          }
>      }
>  }
> --
>                 Dodji


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]