[tsan] ThreadSanitizer instrumentation part

Xinliang David Li davidxl@google.com
Thu Nov 1 06:02:00 GMT 2012


On Wed, Oct 31, 2012 at 4:10 PM, Jakub Jelinek <jakub@redhat.com> wrote:
> Hi!
>
> Just a couple of random comments:
>
> On Wed, Oct 31, 2012 at 11:34:10AM -0700, Wei Mi wrote:
>> gcc/ChangeLog:
>> 2012-10-31  Wei Mi  <wmi@gmail.com>
>
> If Dmitry wrote parts of the patch, it would be nice to mention
> him in the ChangeLog too.
>
>>         * Makefile.in (tsan.o): New
>>         * passes.c (init_optimization_passes): Add tsan passes
>>         * tree-pass.h (register_pass_info): Ditto
>>         * cfghooks.h (GCC_CFGHOOKS_H): Avoid including duplicate headers
>>         * doc/invoke.texi: Document tsan related options
>>         * toplev.c (compile_file): Add tsan pass in driver
>>         * gcc.c (LINK_COMMAND_SPEC): Add -lasan in link command if there
>>         -ftsan is on.
>>         * tsan.c: New file about tsan
>>         * tsan.h: Ditto
>
> All ChangeLog entries should end with a dot.
>
>> --- gcc/cfghooks.h    (revision 193016)
>> +++ gcc/cfghooks.h    (working copy)
>> @@ -19,6 +19,9 @@ You should have received a copy of the G
>>  along with GCC; see the file COPYING3.  If not see
>>  <http://www.gnu.org/licenses/>.  */
>>
>> +#ifndef GCC_CFGHOOKS_H
>> +#define GCC_CFGHOOKS_H
>> +
>>  /* Only basic-block.h includes this.  */
>>
>>  struct cfg_hooks
>> @@ -219,3 +222,4 @@ extern void gimple_register_cfg_hooks (v
>>  extern struct cfg_hooks get_cfg_hooks (void);
>>  extern void set_cfg_hooks (struct cfg_hooks);
>>
>> +#endif  /* GCC_CFGHOOKS_H */
>
> Why this?  Simply don't include that header in tsan.c, it is already
> included by basic-block.h.
>
>> --- gcc/tsan.c        (revision 0)
>> +++ gcc/tsan.c        (revision 0)
>> @@ -0,0 +1,534 @@
>> +/* GCC instrumentation plugin for ThreadSanitizer.
>> + * Copyright (c) 2012, Google Inc. All rights reserved.
>> + * Author: Dmitry Vyukov (dvyukov)
>> + *
>> + * IT is free software; you can redistribute it and/or modify it under
>> + * the terms of the GNU General Public License as published by the Free
>> + * Software Foundation; either version 3, or (at your option) any later
>> + * version. See http://www.gnu.org/licenses/
>> + */
>
> Can't google just assign the code to FSF, and use a standard boilerplate
> as everything else in gcc/ ?
>
>> +/* Builds the following decl
>> +   void __tsan_vptr_update (void *vptr, void *val);  */
>> +
>> +static tree
>> +get_vptr_update_decl (void)
>> +{
>> +  tree typ;
>> +  static tree decl;
>> +
>> +  if (decl != NULL)
>> +    return decl;
>> +  typ = build_function_type_list (void_type_node,
>> +                                  ptr_type_node, ptr_type_node, NULL_TREE);
>> +  decl = build_func_decl (typ, "__tsan_vptr_update");
>> +  return decl;
>> +}
> ...
>
> Instead of this (but same applies to asan), I think we should just consider
> putting it into builtins.def (or have sanitizer.def like there is sync.def
> or omp-builtins.def).  The problem might be non-C/C++ family frontends
> though.
>


This sounds good -- may be better to defer this and combine the effort
with asan.


David




>> +static gimple_seq
>> +instr_memory_access (tree expr, int is_write)
>> +{
>> +  tree addr_expr, expr_type, call_expr, fdecl;
>> +  gimple_seq gs;
>> +  unsigned size;
>> +
>> +  gcc_assert (is_gimple_addressable (expr));
>> +  addr_expr = build_addr (unshare_expr (expr), current_function_decl);
>> +  expr_type = TREE_TYPE (expr);
>> +  while (TREE_CODE (expr_type) == ARRAY_TYPE)
>> +    expr_type = TREE_TYPE (expr_type);
>> +  size = (TREE_INT_CST_LOW (TYPE_SIZE (expr_type))) / BITS_PER_UNIT;
>
> int_size_in_bytes.
>
>> +  fdecl = get_memory_access_decl (is_write, size);
>> +  call_expr = build_call_expr (fdecl, 1, addr_expr);
>> +  gs = NULL;
>> +  force_gimple_operand (call_expr, &gs, true, 0);
>> +  return gs;
>
> I think it is weird to return gimple_seqs, it would be better to just
> emit the code at some stmt iterator, and preferrably without building
> everything as trees, then gimplifying it.
>> +}
>> +
>> +/* Builds the following gimple sequence:
>> +   __tsan_vptr_update (&EXPR, RHS);  */
>> +
>> +static gimple_seq
>> +instr_vptr_update (tree expr, tree rhs)
>> +{
>> +  tree expr_ptr, call_expr, fdecl;
>> +  gimple_seq gs;
>> +
>> +  expr_ptr = build_addr (unshare_expr (expr), current_function_decl);
>> +  fdecl = get_vptr_update_decl ();
>> +  call_expr = build_call_expr (fdecl, 2, expr_ptr, rhs);
>> +  gs = NULL;
>> +  force_gimple_operand (call_expr, &gs, true, 0);
>> +  return gs;
>> +}
>> +
>> +/* Returns gimple seq that needs to be inserted at function entry.  */
>> +
>> +static gimple_seq
>> +instr_func_entry (void)
>> +{
>> +  tree retaddr_decl, pc_addr, fdecl, call_expr;
>> +  gimple_seq gs;
>> +
>> +  retaddr_decl = builtin_decl_implicit (BUILT_IN_RETURN_ADDRESS);
>> +  pc_addr = build_call_expr (retaddr_decl, 1, integer_zero_node);
>> +  fdecl = get_func_entry_decl ();
>> +  call_expr = build_call_expr (fdecl, 1, pc_addr);
>> +  gs = NULL;
>> +  force_gimple_operand (call_expr, &gs, true, 0);
>> +  return gs;
>> +}
>> +
>> +/* Returns gimple seq that needs to be inserted before function exit.  */
>> +
>> +static gimple_seq
>> +instr_func_exit (void)
>> +{
>> +  tree fdecl, call_expr;
>> +  gimple_seq gs;
>> +
>> +  fdecl = get_func_exit_decl ();
>> +  call_expr = build_call_expr (fdecl, 0);
>> +  gs = NULL;
>> +  force_gimple_operand (call_expr, &gs, true, 0);
>> +  return gs;
>> +}
>> +
>> +/* Sets location LOC for all gimples in the SEQ.  */
>> +
>> +static void
>> +set_location (gimple_seq seq, location_t loc)
>> +{
>> +  gimple_seq_node n;
>> +
>> +  for (n = gimple_seq_first (seq); n != NULL; n = n->gsbase.next)
>
> This really should use a stmt iterator.
>
>> +  FOR_EACH_BB (bb)
>> +    {
>> +      for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
>> +        {
>> +          instrument_gimple (gsi);
>> +        }
>> +    }
>
> Extraneous two pairs of {}s.
>
>> +  struct gimplify_ctx gctx;
>> +
>> +  func_calls = 0;
>> +  func_mops = 0;
>
> For gimplification context, see above, would be better to just
> emit gimple directly.
> For func_calls and func_mops, I believe why you need two variables instead
> of just one, and why the function can't just return a bool whether
> entry/exit needs to be instrumented or not.
>> +  push_gimplify_context (&gctx);
>> +  instrument_memory_accesses ();
>> +  if (func_calls || func_mops)
>> +    {
>> +      instrument_func_entry ();
>> +      instrument_func_exit ();
>> +    }
>> +  pop_gimplify_context (NULL);
>> +  return 0;
>> +}
>> +
>> +/* The pass's gate.  */
>> +
>> +static bool
>> +tsan_gate (void)
>> +{
>> +  return flag_tsan != 0;
>> +}
>> +
>> +/* Inserts __tsan_init () into the list of CTORs.  */
>> +
>> +void tsan_finish_file (void)
>> +{
>> +  tree ctor_statements;
>> +
>> +  ctor_statements = NULL_TREE;
>> +  append_to_statement_list (build_call_expr (get_init_decl (), 0),
>> +                            &ctor_statements);
>> +  cgraph_build_static_cdtor ('I', ctor_statements,
>> +                             MAX_RESERVED_INIT_PRIORITY - 1);
>> +}
>> +
>> +/* The pass descriptor.  */
>> +
>> +struct gimple_opt_pass pass_tsan = {{
>
> Please watch formatting of other gimple_opt_pass structures.
> {{ isn't used anywhere.
>
>> --- gcc/common.opt    (revision 193016)
>> +++ gcc/common.opt    (working copy)
>> @@ -1518,6 +1518,14 @@ fmove-loop-invariants
>>  Common Report Var(flag_move_loop_invariants) Init(1) Optimization
>>  Move loop invariant computations out of loops
>>
>> +ftsan
>> +Common RejectNegative Report Var(flag_tsan)
>> +Add ThreadSanitizer instrumentation
>
> Is that the option that LLVM uses (I'm talking about -faddress-sanitizer
> in LLVM vs. -fasan right now in GCC, isn't that similar?).
>
>         Jakub



More information about the Gcc-patches mailing list