This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: libsanitizer merge from upstream r208536


On Thu, May 22, 2014 at 6:07 PM, Jakub Jelinek <jakub@redhat.com> wrote:
> On Thu, May 22, 2014 at 04:06:21PM +0400, Konstantin Serebryany wrote:
>> Not really recently... (Feb 2013)
>> http://llvm.org/viewvc/llvm-project?rev=175507&view=rev
>
> Ah, wasn't aware of that.
>
> Here is (so far not bootstrapped/regtested) patch for the GCC side.
>
> Notes:
> 1) the cases where we e.g. for various stringops perform first and
>    last address check and use separate __asan_report_*1 for reporting
>    that should probably be converted to use this __asan_report_*_n too

Note that in clang we completely removed the code that instruments
srtingops (we had memset/memcpy/memmove).
Instead we replace memset with __asan_memset (ditto for
memcpy/memmove) so that it does not get inlined later.
This simplifies the code and allows to properly analyze all memset/etc
calls, not just check the first and the last bytes.


> 2) it doesn't still deal with unaligned power of two accesses properly,
>    but neither does llvm (at least not 3.4).  Am not talking about
>    undefined behavior cases where the compiler isn't told the access
>    is misaligned, but e.g. when accessing struct S { int x; }
>    __attribute__((packed)) and similar (or aligned(1)).  Supposedly
>    we could force __asan_report_*_n for that case too, because
>    normal wider check assumes it is aligned

Yep, we don't do it.

> 3) there is still a failure for -m32:
> FAIL: g++.dg/asan/asan_test.C  -O2  AddressSanitizer_UAF_long_double Ident(p)[12] = 0 output pattern test
> Output should match: WRITE of size 1[06]
> FAIL: g++.dg/asan/asan_test.C  -O2  AddressSanitizer_UAF_long_double Ident(p)[0] = Ident(p)[12] output pattern test
> Output should match: READ of size 1[06]
>    That sounds like something to fix in upstream, it should allow also size
>    12 which is the size of long double on ia32 (16 bytes on x86_64),
>    thus 1[026].  Kostya, can you please change it, I'll then apply it
>    to the testsuite patch too?
Like this?

--- lib/asan/tests/asan_test.cc (revision 209430)
+++ lib/asan/tests/asan_test.cc (working copy)
@@ -183,8 +183,8 @@
 TEST(AddressSanitizer, UAF_long_double) {
   if (sizeof(long double) == sizeof(double)) return;
   long double *p = Ident(new long double[10]);
-  EXPECT_DEATH(Ident(p)[12] = 0, "WRITE of size 1[06]");
-  EXPECT_DEATH(Ident(p)[0] = Ident(p)[12], "READ of size 1[06]");
+  EXPECT_DEATH(Ident(p)[12] = 0, "WRITE of size 1[026]");
+  EXPECT_DEATH(Ident(p)[0] = Ident(p)[12], "READ of size 1[026]");
   delete [] Ident(p);
 }


> As mentioned earlier, ubsan has similar
>    problem where it doesn't recognize float bitsize 96 (but unlike this
>    case where clang+llvm pass in 10 bytes, which is what is actually
>    accessed by hw if using i387 stack, but not if using other means of
>    copying it, in ubsan case clang also passes in bitsize 96 that ubsan
>    doesn't handle).

Yea, this long double business is rather confusing to me...

--kcc

>
> I'll bootstrap/regtest this later today.
>
> 2014-05-22  Jakub Jelinek  <jakub@redhat.com>
>
>         * sanitizer.def (BUILT_IN_ASAN_REPORT_LOAD_N,
>         BUILT_IN_ASAN_REPORT_STORE_N): New.
>         * asan.c (struct asan_mem_ref): Change access_size type to
>         HOST_WIDE_INT.
>         (asan_mem_ref_init, asan_mem_ref_new, get_mem_refs_of_builtin_call,
>         update_mem_ref_hash_table): Likewise.
>         (asan_mem_ref_hasher::hash): Hash in a HWI.
>         (report_error_func): Change size_in_bytes argument to HWI.
>         Use *_N builtins if size_in_bytes is larger than 16 or not power of
>         two.
>         (build_shadow_mem_access): New function.
>         (build_check_stmt): Use it.  Change size_in_bytes argument to HWI.
>         Handle size_in_bytes not power of two or larger than 16.
>         (instrument_derefs): Don't give up if size_in_bytes is not
>         power of two or is larger than 16.
>
> --- gcc/sanitizer.def.jj        2014-05-22 10:18:01.000000000 +0200
> +++ gcc/sanitizer.def   2014-05-22 14:13:27.859513839 +0200
> @@ -41,6 +41,9 @@ DEF_SANITIZER_BUILTIN(BUILT_IN_ASAN_REPO
>                       BT_FN_VOID_PTR, ATTR_TMPURE_NORETURN_NOTHROW_LEAF_LIST)
>  DEF_SANITIZER_BUILTIN(BUILT_IN_ASAN_REPORT_LOAD16, "__asan_report_load16",
>                       BT_FN_VOID_PTR, ATTR_TMPURE_NORETURN_NOTHROW_LEAF_LIST)
> +DEF_SANITIZER_BUILTIN(BUILT_IN_ASAN_REPORT_LOAD_N, "__asan_report_load_n",
> +                     BT_FN_VOID_PTR_PTRMODE,
> +                     ATTR_TMPURE_NORETURN_NOTHROW_LEAF_LIST)
>  DEF_SANITIZER_BUILTIN(BUILT_IN_ASAN_REPORT_STORE1, "__asan_report_store1",
>                       BT_FN_VOID_PTR, ATTR_TMPURE_NORETURN_NOTHROW_LEAF_LIST)
>  DEF_SANITIZER_BUILTIN(BUILT_IN_ASAN_REPORT_STORE2, "__asan_report_store2",
> @@ -51,6 +54,9 @@ DEF_SANITIZER_BUILTIN(BUILT_IN_ASAN_REPO
>                       BT_FN_VOID_PTR, ATTR_TMPURE_NORETURN_NOTHROW_LEAF_LIST)
>  DEF_SANITIZER_BUILTIN(BUILT_IN_ASAN_REPORT_STORE16, "__asan_report_store16",
>                       BT_FN_VOID_PTR, ATTR_TMPURE_NORETURN_NOTHROW_LEAF_LIST)
> +DEF_SANITIZER_BUILTIN(BUILT_IN_ASAN_REPORT_STORE_N, "__asan_report_store_n",
> +                     BT_FN_VOID_PTR_PTRMODE,
> +                     ATTR_TMPURE_NORETURN_NOTHROW_LEAF_LIST)
>  DEF_SANITIZER_BUILTIN(BUILT_IN_ASAN_REGISTER_GLOBALS,
>                       "__asan_register_globals",
>                       BT_FN_VOID_PTR_PTRMODE, ATTR_NOTHROW_LEAF_LIST)
> --- gcc/asan.c.jj       2014-05-11 22:21:23.000000000 +0200
> +++ gcc/asan.c  2014-05-22 15:28:30.125998730 +0200
> @@ -251,8 +251,8 @@ struct asan_mem_ref
>    /* The expression of the beginning of the memory region.  */
>    tree start;
>
> -  /* The size of the access (can be 1, 2, 4, 8, 16 for now).  */
> -  char access_size;
> +  /* The size of the access.  */
> +  HOST_WIDE_INT access_size;
>  };
>
>  static alloc_pool asan_mem_ref_alloc_pool;
> @@ -274,7 +274,7 @@ asan_mem_ref_get_alloc_pool ()
>  /* Initializes an instance of asan_mem_ref.  */
>
>  static void
> -asan_mem_ref_init (asan_mem_ref *ref, tree start, char access_size)
> +asan_mem_ref_init (asan_mem_ref *ref, tree start, HOST_WIDE_INT access_size)
>  {
>    ref->start = start;
>    ref->access_size = access_size;
> @@ -287,7 +287,7 @@ asan_mem_ref_init (asan_mem_ref *ref, tr
>     access to the referenced memory.  */
>
>  static asan_mem_ref*
> -asan_mem_ref_new (tree start, char access_size)
> +asan_mem_ref_new (tree start, HOST_WIDE_INT access_size)
>  {
>    asan_mem_ref *ref =
>      (asan_mem_ref *) pool_alloc (asan_mem_ref_get_alloc_pool ());
> @@ -334,7 +334,7 @@ inline hashval_t
>  asan_mem_ref_hasher::hash (const asan_mem_ref *mem_ref)
>  {
>    hashval_t h = iterative_hash_expr (mem_ref->start, 0);
> -  h = iterative_hash_hashval_t (h, mem_ref->access_size);
> +  h = iterative_hash_host_wide_int (mem_ref->access_size, h);
>    return h;
>  }
>
> @@ -392,7 +392,7 @@ free_mem_ref_resources ()
>  /* Return true iff the memory reference REF has been instrumented.  */
>
>  static bool
> -has_mem_ref_been_instrumented (tree ref, char access_size)
> +has_mem_ref_been_instrumented (tree ref, HOST_WIDE_INT access_size)
>  {
>    asan_mem_ref r;
>    asan_mem_ref_init (&r, ref, access_size);
> @@ -480,7 +480,7 @@ get_mem_refs_of_builtin_call (const gimp
>    tree source0 = NULL_TREE, source1 = NULL_TREE,
>      dest = NULL_TREE, len = NULL_TREE;
>    bool is_store = true, got_reference_p = false;
> -  char access_size = 1;
> +  HOST_WIDE_INT access_size = 1;
>
>    switch (DECL_FUNCTION_CODE (callee))
>      {
> @@ -842,7 +842,7 @@ has_stmt_been_instrumented_p (gimple stm
>  /*  Insert a memory reference into the hash table.  */
>
>  static void
> -update_mem_ref_hash_table (tree ref, char access_size)
> +update_mem_ref_hash_table (tree ref, HOST_WIDE_INT access_size)
>  {
>    hash_table <asan_mem_ref_hasher> ht = get_mem_ref_hash_table ();
>
> @@ -1315,20 +1315,22 @@ asan_protect_global (tree decl)
>    return true;
>  }
>
> -/* Construct a function tree for __asan_report_{load,store}{1,2,4,8,16}.
> -   IS_STORE is either 1 (for a store) or 0 (for a load).
> -   SIZE_IN_BYTES is one of 1, 2, 4, 8, 16.  */
> +/* Construct a function tree for __asan_report_{load,store}{1,2,4,8,16,_n}.
> +   IS_STORE is either 1 (for a store) or 0 (for a load).  */
>
>  static tree
> -report_error_func (bool is_store, int size_in_bytes)
> +report_error_func (bool is_store, HOST_WIDE_INT size_in_bytes)
>  {
> -  static enum built_in_function report[2][5]
> +  static enum built_in_function report[2][6]
>      = { { BUILT_IN_ASAN_REPORT_LOAD1, BUILT_IN_ASAN_REPORT_LOAD2,
>           BUILT_IN_ASAN_REPORT_LOAD4, BUILT_IN_ASAN_REPORT_LOAD8,
> -         BUILT_IN_ASAN_REPORT_LOAD16 },
> +         BUILT_IN_ASAN_REPORT_LOAD16, BUILT_IN_ASAN_REPORT_LOAD_N },
>         { BUILT_IN_ASAN_REPORT_STORE1, BUILT_IN_ASAN_REPORT_STORE2,
>           BUILT_IN_ASAN_REPORT_STORE4, BUILT_IN_ASAN_REPORT_STORE8,
> -         BUILT_IN_ASAN_REPORT_STORE16 } };
> +         BUILT_IN_ASAN_REPORT_STORE16, BUILT_IN_ASAN_REPORT_STORE_N } };
> +  if ((size_in_bytes & (size_in_bytes - 1)) != 0
> +      || size_in_bytes > 16)
> +    return builtin_decl_implicit (report[is_store][5]);
>    return builtin_decl_implicit (report[is_store][exact_log2 (size_in_bytes)]);
>  }
>
> @@ -1450,6 +1452,47 @@ insert_if_then_before_iter (gimple cond,
>    gsi_insert_after (&cond_insert_point, cond, GSI_NEW_STMT);
>  }
>
> +/* Build
> +   (base_addr >> ASAN_SHADOW_SHIFT) + targetm.asan_shadow_offset ().  */
> +
> +static tree
> +build_shadow_mem_access (gimple_stmt_iterator *gsi, location_t location,
> +                        tree base_addr, tree shadow_ptr_type)
> +{
> +  tree t, uintptr_type = TREE_TYPE (base_addr);
> +  tree shadow_type = TREE_TYPE (shadow_ptr_type);
> +  gimple g;
> +
> +  t = build_int_cst (uintptr_type, ASAN_SHADOW_SHIFT);
> +  g = gimple_build_assign_with_ops (RSHIFT_EXPR,
> +                                   make_ssa_name (uintptr_type, NULL),
> +                                   base_addr, t);
> +  gimple_set_location (g, location);
> +  gsi_insert_after (gsi, g, GSI_NEW_STMT);
> +
> +  t = build_int_cst (uintptr_type, targetm.asan_shadow_offset ());
> +  g = gimple_build_assign_with_ops (PLUS_EXPR,
> +                                   make_ssa_name (uintptr_type, NULL),
> +                                   gimple_assign_lhs (g), t);
> +  gimple_set_location (g, location);
> +  gsi_insert_after (gsi, g, GSI_NEW_STMT);
> +
> +  g = gimple_build_assign_with_ops (NOP_EXPR,
> +                                   make_ssa_name (shadow_ptr_type, NULL),
> +                                   gimple_assign_lhs (g), NULL_TREE);
> +  gimple_set_location (g, location);
> +  gsi_insert_after (gsi, g, GSI_NEW_STMT);
> +
> +  t = build2 (MEM_REF, shadow_type, gimple_assign_lhs (g),
> +             build_int_cst (shadow_ptr_type, 0));
> +  g = gimple_build_assign_with_ops (MEM_REF,
> +                                   make_ssa_name (shadow_type, NULL),
> +                                   t, NULL_TREE);
> +  gimple_set_location (g, location);
> +  gsi_insert_after (gsi, g, GSI_NEW_STMT);
> +  return gimple_assign_lhs (g);
> +}
> +
>  /* Instrument the memory access instruction BASE.  Insert new
>     statements before or after ITER.
>
> @@ -1457,8 +1500,7 @@ insert_if_then_before_iter (gimple cond,
>     SSA_NAME, or a non-SSA expression.  LOCATION is the source code
>     location.  IS_STORE is TRUE for a store, FALSE for a load.
>     BEFORE_P is TRUE for inserting the instrumentation code before
> -   ITER, FALSE for inserting it after ITER.  SIZE_IN_BYTES is one of
> -   1, 2, 4, 8, 16.
> +   ITER, FALSE for inserting it after ITER.
>
>     If BEFORE_P is TRUE, *ITER is arranged to still point to the
>     statement it was pointing to prior to calling this function,
> @@ -1466,7 +1508,7 @@ insert_if_then_before_iter (gimple cond,
>
>  static void
>  build_check_stmt (location_t location, tree base, gimple_stmt_iterator *iter,
> -                 bool before_p, bool is_store, int size_in_bytes)
> +                 bool before_p, bool is_store, HOST_WIDE_INT size_in_bytes)
>  {
>    gimple_stmt_iterator gsi;
>    basic_block then_bb, else_bb;
> @@ -1477,6 +1519,12 @@ build_check_stmt (location_t location, t
>    tree uintptr_type
>      = build_nonstandard_integer_type (TYPE_PRECISION (TREE_TYPE (base)), 1);
>    tree base_ssa = base;
> +  HOST_WIDE_INT real_size_in_bytes = size_in_bytes;
> +  tree sz_arg = NULL_TREE;
> +
> +  if ((size_in_bytes & (size_in_bytes - 1)) != 0
> +      || size_in_bytes > 16)
> +    real_size_in_bytes = 1;
>
>    /* Get an iterator on the point where we can add the condition
>       statement for the instrumentation.  */
> @@ -1509,51 +1557,24 @@ build_check_stmt (location_t location, t
>
>    /* Build
>       (base_addr >> ASAN_SHADOW_SHIFT) + targetm.asan_shadow_offset ().  */
> +  shadow = build_shadow_mem_access (&gsi, location, base_addr,
> +                                   shadow_ptr_type);
>
> -  t = build_int_cst (uintptr_type, ASAN_SHADOW_SHIFT);
> -  g = gimple_build_assign_with_ops (RSHIFT_EXPR,
> -                                   make_ssa_name (uintptr_type, NULL),
> -                                   base_addr, t);
> -  gimple_set_location (g, location);
> -  gsi_insert_after (&gsi, g, GSI_NEW_STMT);
> -
> -  t = build_int_cst (uintptr_type, targetm.asan_shadow_offset ());
> -  g = gimple_build_assign_with_ops (PLUS_EXPR,
> -                                   make_ssa_name (uintptr_type, NULL),
> -                                   gimple_assign_lhs (g), t);
> -  gimple_set_location (g, location);
> -  gsi_insert_after (&gsi, g, GSI_NEW_STMT);
> -
> -  g = gimple_build_assign_with_ops (NOP_EXPR,
> -                                   make_ssa_name (shadow_ptr_type, NULL),
> -                                   gimple_assign_lhs (g), NULL_TREE);
> -  gimple_set_location (g, location);
> -  gsi_insert_after (&gsi, g, GSI_NEW_STMT);
> -
> -  t = build2 (MEM_REF, shadow_type, gimple_assign_lhs (g),
> -             build_int_cst (shadow_ptr_type, 0));
> -  g = gimple_build_assign_with_ops (MEM_REF,
> -                                   make_ssa_name (shadow_type, NULL),
> -                                   t, NULL_TREE);
> -  gimple_set_location (g, location);
> -  gsi_insert_after (&gsi, g, GSI_NEW_STMT);
> -  shadow = gimple_assign_lhs (g);
> -
> -  if (size_in_bytes < 8)
> +  if (real_size_in_bytes < 8)
>      {
>        /* Slow path for 1, 2 and 4 byte accesses.
>          Test (shadow != 0)
> -             & ((base_addr & 7) + (size_in_bytes - 1)) >= shadow).  */
> +             & ((base_addr & 7) + (real_size_in_bytes - 1)) >= shadow).  */
>        gimple_seq seq = NULL;
>        gimple shadow_test = build_assign (NE_EXPR, shadow, 0);
>        gimple_seq_add_stmt (&seq, shadow_test);
>        gimple_seq_add_stmt (&seq, build_assign (BIT_AND_EXPR, base_addr, 7));
>        gimple_seq_add_stmt (&seq, build_type_cast (shadow_type,
>                                                    gimple_seq_last (seq)));
> -      if (size_in_bytes > 1)
> +      if (real_size_in_bytes > 1)
>          gimple_seq_add_stmt (&seq,
>                               build_assign (PLUS_EXPR, gimple_seq_last (seq),
> -                                           size_in_bytes - 1));
> +                                          real_size_in_bytes - 1));
>        gimple_seq_add_stmt (&seq, build_assign (GE_EXPR, gimple_seq_last (seq),
>                                                 shadow));
>        gimple_seq_add_stmt (&seq, build_assign (BIT_AND_EXPR, shadow_test,
> @@ -1561,6 +1582,39 @@ build_check_stmt (location_t location, t
>        t = gimple_assign_lhs (gimple_seq_last (seq));
>        gimple_seq_set_location (seq, location);
>        gsi_insert_seq_after (&gsi, seq, GSI_CONTINUE_LINKING);
> +      /* For weird access sizes, check first and last byte.  */
> +      if (real_size_in_bytes != size_in_bytes)
> +       {
> +         g = gimple_build_assign_with_ops (PLUS_EXPR,
> +                                           make_ssa_name (uintptr_type, NULL),
> +                                           base_addr,
> +                                           build_int_cst (uintptr_type,
> +                                                          size_in_bytes - 1));
> +         gimple_set_location (g, location);
> +         gsi_insert_after (&gsi, g, GSI_NEW_STMT);
> +         tree base_end_addr = gimple_assign_lhs (g);
> +
> +         shadow = build_shadow_mem_access (&gsi, location, base_end_addr,
> +                                           shadow_ptr_type);
> +         seq = NULL;
> +         shadow_test = build_assign (NE_EXPR, shadow, 0);
> +         gimple_seq_add_stmt (&seq, shadow_test);
> +         gimple_seq_add_stmt (&seq, build_assign (BIT_AND_EXPR,
> +                                                  base_end_addr, 7));
> +         gimple_seq_add_stmt (&seq, build_type_cast (shadow_type,
> +                                                     gimple_seq_last (seq)));
> +         gimple_seq_add_stmt (&seq, build_assign (GE_EXPR,
> +                                                  gimple_seq_last (seq),
> +                                                  shadow));
> +         gimple_seq_add_stmt (&seq, build_assign (BIT_AND_EXPR, shadow_test,
> +                                                  gimple_seq_last (seq)));
> +         gimple_seq_add_stmt (&seq, build_assign (BIT_IOR_EXPR, t,
> +                                                  gimple_seq_last (seq)));
> +         t = gimple_assign_lhs (gimple_seq_last (seq));
> +         gimple_seq_set_location (seq, location);
> +         gsi_insert_seq_after (&gsi, seq, GSI_CONTINUE_LINKING);
> +         sz_arg = build_int_cst (pointer_sized_int_node, size_in_bytes);
> +       }
>      }
>    else
>      t = shadow;
> @@ -1573,7 +1627,7 @@ build_check_stmt (location_t location, t
>    /* Generate call to the run-time library (e.g. __asan_report_load8).  */
>    gsi = gsi_start_bb (then_bb);
>    g = gimple_build_call (report_error_func (is_store, size_in_bytes),
> -                        1, base_addr);
> +                        sz_arg ? 2 : 1, base_addr, sz_arg);
>    gimple_set_location (g, location);
>    gsi_insert_after (&gsi, g, GSI_NEW_STMT);
>
> @@ -1611,8 +1665,7 @@ instrument_derefs (gimple_stmt_iterator
>      }
>
>    size_in_bytes = int_size_in_bytes (type);
> -  if ((size_in_bytes & (size_in_bytes - 1)) != 0
> -      || (unsigned HOST_WIDE_INT) size_in_bytes - 1 >= 16)
> +  if (size_in_bytes <= 0)
>      return;
>
>    HOST_WIDE_INT bitsize, bitpos;
> @@ -1621,7 +1674,8 @@ instrument_derefs (gimple_stmt_iterator
>    int volatilep = 0, unsignedp = 0;
>    tree inner = get_inner_reference (t, &bitsize, &bitpos, &offset,
>                                     &mode, &unsignedp, &volatilep, false);
> -  if (bitpos % (size_in_bytes * BITS_PER_UNIT)
> +  if (((size_in_bytes & (size_in_bytes - 1)) == 0
> +       && (bitpos % (size_in_bytes * BITS_PER_UNIT)))
>        || bitsize != size_in_bytes * BITS_PER_UNIT)
>      {
>        if (TREE_CODE (t) == COMPONENT_REF
> @@ -1634,6 +1688,8 @@ instrument_derefs (gimple_stmt_iterator
>         }
>        return;
>      }
> +  if (bitpos % BITS_PER_UNIT)
> +    return;
>
>    if (TREE_CODE (inner) == VAR_DECL
>        && offset == NULL_TREE
>
>
>         Jakub


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]