This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
Re: [tsan] ThreadSanitizer instrumentation part
- From: Jakub Jelinek <jakub at redhat dot com>
- To: Wei Mi <wmi at google dot com>
- Cc: gcc-patches at gcc dot gnu dot org, Dmitry Vyukov <dvyukov at google dot com>, David Li <davidxl at google dot com>, Diego Novillo <dnovillo at google dot com>, Dodji Seketeli <dodji at redhat dot com>
- Date: Thu, 1 Nov 2012 00:10:53 +0100
- Subject: Re: [tsan] ThreadSanitizer instrumentation part
- References: <CA+4CFy76XDVYtErxJRcM+qDDM=aOtwJptqj3Cwy2AmLBFahuRw@mail.gmail.com>
- Reply-to: Jakub Jelinek <jakub at redhat dot com>
Hi!
Just a couple of random comments:
On Wed, Oct 31, 2012 at 11:34:10AM -0700, Wei Mi wrote:
> gcc/ChangeLog:
> 2012-10-31 Wei Mi <wmi@gmail.com>
If Dmitry wrote parts of the patch, it would be nice to mention
him in the ChangeLog too.
> * Makefile.in (tsan.o): New
> * passes.c (init_optimization_passes): Add tsan passes
> * tree-pass.h (register_pass_info): Ditto
> * cfghooks.h (GCC_CFGHOOKS_H): Avoid including duplicate headers
> * doc/invoke.texi: Document tsan related options
> * toplev.c (compile_file): Add tsan pass in driver
> * gcc.c (LINK_COMMAND_SPEC): Add -lasan in link command if there
> -ftsan is on.
> * tsan.c: New file about tsan
> * tsan.h: Ditto
All ChangeLog entries should end with a dot.
> --- gcc/cfghooks.h (revision 193016)
> +++ gcc/cfghooks.h (working copy)
> @@ -19,6 +19,9 @@ You should have received a copy of the G
> along with GCC; see the file COPYING3. If not see
> <http://www.gnu.org/licenses/>. */
>
> +#ifndef GCC_CFGHOOKS_H
> +#define GCC_CFGHOOKS_H
> +
> /* Only basic-block.h includes this. */
>
> struct cfg_hooks
> @@ -219,3 +222,4 @@ extern void gimple_register_cfg_hooks (v
> extern struct cfg_hooks get_cfg_hooks (void);
> extern void set_cfg_hooks (struct cfg_hooks);
>
> +#endif /* GCC_CFGHOOKS_H */
Why this? Simply don't include that header in tsan.c, it is already
included by basic-block.h.
> --- gcc/tsan.c (revision 0)
> +++ gcc/tsan.c (revision 0)
> @@ -0,0 +1,534 @@
> +/* GCC instrumentation plugin for ThreadSanitizer.
> + * Copyright (c) 2012, Google Inc. All rights reserved.
> + * Author: Dmitry Vyukov (dvyukov)
> + *
> + * IT is free software; you can redistribute it and/or modify it under
> + * the terms of the GNU General Public License as published by the Free
> + * Software Foundation; either version 3, or (at your option) any later
> + * version. See http://www.gnu.org/licenses/
> + */
Can't google just assign the code to FSF, and use a standard boilerplate
as everything else in gcc/ ?
> +/* Builds the following decl
> + void __tsan_vptr_update (void *vptr, void *val); */
> +
> +static tree
> +get_vptr_update_decl (void)
> +{
> + tree typ;
> + static tree decl;
> +
> + if (decl != NULL)
> + return decl;
> + typ = build_function_type_list (void_type_node,
> + ptr_type_node, ptr_type_node, NULL_TREE);
> + decl = build_func_decl (typ, "__tsan_vptr_update");
> + return decl;
> +}
...
Instead of this (but same applies to asan), I think we should just consider
putting it into builtins.def (or have sanitizer.def like there is sync.def
or omp-builtins.def). The problem might be non-C/C++ family frontends
though.
> +static gimple_seq
> +instr_memory_access (tree expr, int is_write)
> +{
> + tree addr_expr, expr_type, call_expr, fdecl;
> + gimple_seq gs;
> + unsigned size;
> +
> + gcc_assert (is_gimple_addressable (expr));
> + addr_expr = build_addr (unshare_expr (expr), current_function_decl);
> + expr_type = TREE_TYPE (expr);
> + while (TREE_CODE (expr_type) == ARRAY_TYPE)
> + expr_type = TREE_TYPE (expr_type);
> + size = (TREE_INT_CST_LOW (TYPE_SIZE (expr_type))) / BITS_PER_UNIT;
int_size_in_bytes.
> + fdecl = get_memory_access_decl (is_write, size);
> + call_expr = build_call_expr (fdecl, 1, addr_expr);
> + gs = NULL;
> + force_gimple_operand (call_expr, &gs, true, 0);
> + return gs;
I think it is weird to return gimple_seqs, it would be better to just
emit the code at some stmt iterator, and preferrably without building
everything as trees, then gimplifying it.
> +}
> +
> +/* Builds the following gimple sequence:
> + __tsan_vptr_update (&EXPR, RHS); */
> +
> +static gimple_seq
> +instr_vptr_update (tree expr, tree rhs)
> +{
> + tree expr_ptr, call_expr, fdecl;
> + gimple_seq gs;
> +
> + expr_ptr = build_addr (unshare_expr (expr), current_function_decl);
> + fdecl = get_vptr_update_decl ();
> + call_expr = build_call_expr (fdecl, 2, expr_ptr, rhs);
> + gs = NULL;
> + force_gimple_operand (call_expr, &gs, true, 0);
> + return gs;
> +}
> +
> +/* Returns gimple seq that needs to be inserted at function entry. */
> +
> +static gimple_seq
> +instr_func_entry (void)
> +{
> + tree retaddr_decl, pc_addr, fdecl, call_expr;
> + gimple_seq gs;
> +
> + retaddr_decl = builtin_decl_implicit (BUILT_IN_RETURN_ADDRESS);
> + pc_addr = build_call_expr (retaddr_decl, 1, integer_zero_node);
> + fdecl = get_func_entry_decl ();
> + call_expr = build_call_expr (fdecl, 1, pc_addr);
> + gs = NULL;
> + force_gimple_operand (call_expr, &gs, true, 0);
> + return gs;
> +}
> +
> +/* Returns gimple seq that needs to be inserted before function exit. */
> +
> +static gimple_seq
> +instr_func_exit (void)
> +{
> + tree fdecl, call_expr;
> + gimple_seq gs;
> +
> + fdecl = get_func_exit_decl ();
> + call_expr = build_call_expr (fdecl, 0);
> + gs = NULL;
> + force_gimple_operand (call_expr, &gs, true, 0);
> + return gs;
> +}
> +
> +/* Sets location LOC for all gimples in the SEQ. */
> +
> +static void
> +set_location (gimple_seq seq, location_t loc)
> +{
> + gimple_seq_node n;
> +
> + for (n = gimple_seq_first (seq); n != NULL; n = n->gsbase.next)
This really should use a stmt iterator.
> + FOR_EACH_BB (bb)
> + {
> + for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
> + {
> + instrument_gimple (gsi);
> + }
> + }
Extraneous two pairs of {}s.
> + struct gimplify_ctx gctx;
> +
> + func_calls = 0;
> + func_mops = 0;
For gimplification context, see above, would be better to just
emit gimple directly.
For func_calls and func_mops, I believe why you need two variables instead
of just one, and why the function can't just return a bool whether
entry/exit needs to be instrumented or not.
> + push_gimplify_context (&gctx);
> + instrument_memory_accesses ();
> + if (func_calls || func_mops)
> + {
> + instrument_func_entry ();
> + instrument_func_exit ();
> + }
> + pop_gimplify_context (NULL);
> + return 0;
> +}
> +
> +/* The pass's gate. */
> +
> +static bool
> +tsan_gate (void)
> +{
> + return flag_tsan != 0;
> +}
> +
> +/* Inserts __tsan_init () into the list of CTORs. */
> +
> +void tsan_finish_file (void)
> +{
> + tree ctor_statements;
> +
> + ctor_statements = NULL_TREE;
> + append_to_statement_list (build_call_expr (get_init_decl (), 0),
> + &ctor_statements);
> + cgraph_build_static_cdtor ('I', ctor_statements,
> + MAX_RESERVED_INIT_PRIORITY - 1);
> +}
> +
> +/* The pass descriptor. */
> +
> +struct gimple_opt_pass pass_tsan = {{
Please watch formatting of other gimple_opt_pass structures.
{{ isn't used anywhere.
> --- gcc/common.opt (revision 193016)
> +++ gcc/common.opt (working copy)
> @@ -1518,6 +1518,14 @@ fmove-loop-invariants
> Common Report Var(flag_move_loop_invariants) Init(1) Optimization
> Move loop invariant computations out of loops
>
> +ftsan
> +Common RejectNegative Report Var(flag_tsan)
> +Add ThreadSanitizer instrumentation
Is that the option that LLVM uses (I'm talking about -faddress-sanitizer
in LLVM vs. -fasan right now in GCC, isn't that similar?).
Jakub