[tsan] ThreadSanitizer instrumentation part
Xinliang David Li
davidxl@google.com
Thu Nov 1 06:02:00 GMT 2012
On Wed, Oct 31, 2012 at 4:10 PM, Jakub Jelinek <jakub@redhat.com> wrote:
> Hi!
>
> Just a couple of random comments:
>
> On Wed, Oct 31, 2012 at 11:34:10AM -0700, Wei Mi wrote:
>> gcc/ChangeLog:
>> 2012-10-31 Wei Mi <wmi@gmail.com>
>
> If Dmitry wrote parts of the patch, it would be nice to mention
> him in the ChangeLog too.
>
>> * Makefile.in (tsan.o): New
>> * passes.c (init_optimization_passes): Add tsan passes
>> * tree-pass.h (register_pass_info): Ditto
>> * cfghooks.h (GCC_CFGHOOKS_H): Avoid including duplicate headers
>> * doc/invoke.texi: Document tsan related options
>> * toplev.c (compile_file): Add tsan pass in driver
>> * gcc.c (LINK_COMMAND_SPEC): Add -lasan in link command if there
>> -ftsan is on.
>> * tsan.c: New file about tsan
>> * tsan.h: Ditto
>
> All ChangeLog entries should end with a dot.
>
>> --- gcc/cfghooks.h (revision 193016)
>> +++ gcc/cfghooks.h (working copy)
>> @@ -19,6 +19,9 @@ You should have received a copy of the G
>> along with GCC; see the file COPYING3. If not see
>> <http://www.gnu.org/licenses/>. */
>>
>> +#ifndef GCC_CFGHOOKS_H
>> +#define GCC_CFGHOOKS_H
>> +
>> /* Only basic-block.h includes this. */
>>
>> struct cfg_hooks
>> @@ -219,3 +222,4 @@ extern void gimple_register_cfg_hooks (v
>> extern struct cfg_hooks get_cfg_hooks (void);
>> extern void set_cfg_hooks (struct cfg_hooks);
>>
>> +#endif /* GCC_CFGHOOKS_H */
>
> Why this? Simply don't include that header in tsan.c, it is already
> included by basic-block.h.
>
>> --- gcc/tsan.c (revision 0)
>> +++ gcc/tsan.c (revision 0)
>> @@ -0,0 +1,534 @@
>> +/* GCC instrumentation plugin for ThreadSanitizer.
>> + * Copyright (c) 2012, Google Inc. All rights reserved.
>> + * Author: Dmitry Vyukov (dvyukov)
>> + *
>> + * IT is free software; you can redistribute it and/or modify it under
>> + * the terms of the GNU General Public License as published by the Free
>> + * Software Foundation; either version 3, or (at your option) any later
>> + * version. See http://www.gnu.org/licenses/
>> + */
>
> Can't google just assign the code to FSF, and use a standard boilerplate
> as everything else in gcc/ ?
>
>> +/* Builds the following decl
>> + void __tsan_vptr_update (void *vptr, void *val); */
>> +
>> +static tree
>> +get_vptr_update_decl (void)
>> +{
>> + tree typ;
>> + static tree decl;
>> +
>> + if (decl != NULL)
>> + return decl;
>> + typ = build_function_type_list (void_type_node,
>> + ptr_type_node, ptr_type_node, NULL_TREE);
>> + decl = build_func_decl (typ, "__tsan_vptr_update");
>> + return decl;
>> +}
> ...
>
> Instead of this (but same applies to asan), I think we should just consider
> putting it into builtins.def (or have sanitizer.def like there is sync.def
> or omp-builtins.def). The problem might be non-C/C++ family frontends
> though.
>
This sounds good -- may be better to defer this and combine the effort
with asan.
David
>> +static gimple_seq
>> +instr_memory_access (tree expr, int is_write)
>> +{
>> + tree addr_expr, expr_type, call_expr, fdecl;
>> + gimple_seq gs;
>> + unsigned size;
>> +
>> + gcc_assert (is_gimple_addressable (expr));
>> + addr_expr = build_addr (unshare_expr (expr), current_function_decl);
>> + expr_type = TREE_TYPE (expr);
>> + while (TREE_CODE (expr_type) == ARRAY_TYPE)
>> + expr_type = TREE_TYPE (expr_type);
>> + size = (TREE_INT_CST_LOW (TYPE_SIZE (expr_type))) / BITS_PER_UNIT;
>
> int_size_in_bytes.
>
>> + fdecl = get_memory_access_decl (is_write, size);
>> + call_expr = build_call_expr (fdecl, 1, addr_expr);
>> + gs = NULL;
>> + force_gimple_operand (call_expr, &gs, true, 0);
>> + return gs;
>
> I think it is weird to return gimple_seqs, it would be better to just
> emit the code at some stmt iterator, and preferrably without building
> everything as trees, then gimplifying it.
>> +}
>> +
>> +/* Builds the following gimple sequence:
>> + __tsan_vptr_update (&EXPR, RHS); */
>> +
>> +static gimple_seq
>> +instr_vptr_update (tree expr, tree rhs)
>> +{
>> + tree expr_ptr, call_expr, fdecl;
>> + gimple_seq gs;
>> +
>> + expr_ptr = build_addr (unshare_expr (expr), current_function_decl);
>> + fdecl = get_vptr_update_decl ();
>> + call_expr = build_call_expr (fdecl, 2, expr_ptr, rhs);
>> + gs = NULL;
>> + force_gimple_operand (call_expr, &gs, true, 0);
>> + return gs;
>> +}
>> +
>> +/* Returns gimple seq that needs to be inserted at function entry. */
>> +
>> +static gimple_seq
>> +instr_func_entry (void)
>> +{
>> + tree retaddr_decl, pc_addr, fdecl, call_expr;
>> + gimple_seq gs;
>> +
>> + retaddr_decl = builtin_decl_implicit (BUILT_IN_RETURN_ADDRESS);
>> + pc_addr = build_call_expr (retaddr_decl, 1, integer_zero_node);
>> + fdecl = get_func_entry_decl ();
>> + call_expr = build_call_expr (fdecl, 1, pc_addr);
>> + gs = NULL;
>> + force_gimple_operand (call_expr, &gs, true, 0);
>> + return gs;
>> +}
>> +
>> +/* Returns gimple seq that needs to be inserted before function exit. */
>> +
>> +static gimple_seq
>> +instr_func_exit (void)
>> +{
>> + tree fdecl, call_expr;
>> + gimple_seq gs;
>> +
>> + fdecl = get_func_exit_decl ();
>> + call_expr = build_call_expr (fdecl, 0);
>> + gs = NULL;
>> + force_gimple_operand (call_expr, &gs, true, 0);
>> + return gs;
>> +}
>> +
>> +/* Sets location LOC for all gimples in the SEQ. */
>> +
>> +static void
>> +set_location (gimple_seq seq, location_t loc)
>> +{
>> + gimple_seq_node n;
>> +
>> + for (n = gimple_seq_first (seq); n != NULL; n = n->gsbase.next)
>
> This really should use a stmt iterator.
>
>> + FOR_EACH_BB (bb)
>> + {
>> + for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
>> + {
>> + instrument_gimple (gsi);
>> + }
>> + }
>
> Extraneous two pairs of {}s.
>
>> + struct gimplify_ctx gctx;
>> +
>> + func_calls = 0;
>> + func_mops = 0;
>
> For gimplification context, see above, would be better to just
> emit gimple directly.
> For func_calls and func_mops, I believe why you need two variables instead
> of just one, and why the function can't just return a bool whether
> entry/exit needs to be instrumented or not.
>> + push_gimplify_context (&gctx);
>> + instrument_memory_accesses ();
>> + if (func_calls || func_mops)
>> + {
>> + instrument_func_entry ();
>> + instrument_func_exit ();
>> + }
>> + pop_gimplify_context (NULL);
>> + return 0;
>> +}
>> +
>> +/* The pass's gate. */
>> +
>> +static bool
>> +tsan_gate (void)
>> +{
>> + return flag_tsan != 0;
>> +}
>> +
>> +/* Inserts __tsan_init () into the list of CTORs. */
>> +
>> +void tsan_finish_file (void)
>> +{
>> + tree ctor_statements;
>> +
>> + ctor_statements = NULL_TREE;
>> + append_to_statement_list (build_call_expr (get_init_decl (), 0),
>> + &ctor_statements);
>> + cgraph_build_static_cdtor ('I', ctor_statements,
>> + MAX_RESERVED_INIT_PRIORITY - 1);
>> +}
>> +
>> +/* The pass descriptor. */
>> +
>> +struct gimple_opt_pass pass_tsan = {{
>
> Please watch formatting of other gimple_opt_pass structures.
> {{ isn't used anywhere.
>
>> --- gcc/common.opt (revision 193016)
>> +++ gcc/common.opt (working copy)
>> @@ -1518,6 +1518,14 @@ fmove-loop-invariants
>> Common Report Var(flag_move_loop_invariants) Init(1) Optimization
>> Move loop invariant computations out of loops
>>
>> +ftsan
>> +Common RejectNegative Report Var(flag_tsan)
>> +Add ThreadSanitizer instrumentation
>
> Is that the option that LLVM uses (I'm talking about -faddress-sanitizer
> in LLVM vs. -fasan right now in GCC, isn't that similar?).
>
> Jakub
More information about the Gcc-patches
mailing list