Created attachment 34249 [details] test case starting with r217669 tsan instrumentation gets wrong code: g++ -g -fsanitize=thread test.cpp ./a.out => soaks all memory up. reason is this function calls __tsan_func_entry in a loop: _ZNSt12_Destroy_auxILb0EE9__destroyIN9__gnu_cxx17__normal_iteratorIPSsSt6vectorISsSaISsEEEEEEvT_S9_: .LFB982: .loc 5 100 0 .cfi_startproc pushq %rbp .cfi_def_cfa_offset 16 .cfi_offset 6, -16 movq %rsp, %rbp .cfi_def_cfa_register 6 subq $32, %rsp movq %rdi, -16(%rbp) movq %rsi, -32(%rbp) .L153: movq 8(%rbp), %rax movq %rax, %rdi call __tsan_func_entry .loc 5 102 0 discriminator 2 leaq -32(%rbp), %rdx leaq -16(%rbp), %rax movq %rdx, %rsi movq %rax, %rdi call _ZN9__gnu_cxxneIPSsSt6vectorISsSaISsEEEEbRKNS_17__normal_iteratorIT_T0_EESA_ testb %al, %al je .L152 .loc 5 103 0 discriminator 1 leaq -16(%rbp), %rax movq %rax, %rdi call _ZNK9__gnu_cxx17__normal_iteratorIPSsSt6vectorISsSaISsEEEdeEv movq %rax, %rdi call _ZSt11__addressofISsEPT_RS0_ movq %rax, %rdi call _ZSt8_DestroyISsEvPT_ .loc 5 102 0 discriminator 1 leaq -16(%rbp), %rax movq %rax, %rdi call _ZN9__gnu_cxx17__normal_iteratorIPSsSt6vectorISsSaISsEEEppEv jmp .L153 .L152: .loc 5 104 0 call __tsan_func_exit leave .cfi_def_cfa 7, 8 ret .cfi_endproc it is generated from bits/stl_container.h, line 95-105: template<bool> struct _Destroy_aux { template<typename _ForwardIterator> static void __destroy(_ForwardIterator __first, _ForwardIterator __last) { for (; __first != __last; ++__first) std::_Destroy(std::__addressof(*__first)); } };
with current trunk we get this in test.cpp.176t.cplxlower0: static void std::_Destroy_aux<<anonymous> >::__destroy(_ForwardIterator, _ForwardIterator) [with _ForwardIterator = std::basic_string<char>*; bool <anonymous> = false] (struct basic_string * __first, struct basic_string * __last) { struct basic_string * D.17011; struct basic_string * _7; <bb 2>: # __first_1 = PHI <__first_3(D)(0), __first_9(3)> if (__first_1 == __last_5(D)) goto <bb 4>; else goto <bb 3>; <bb 3>: _7 = std::__addressof<std::basic_string<char> > (__first_1); std::_Destroy<std::basic_string<char> > (_7); __first_9 = __first_1 + 8; goto <bb 2>; <bb 4>: return; which is tranformed to this in test.cpp.178t.tsan0: static void std::_Destroy_aux<<anonymous> >::__destroy(_ForwardIterator, _ForwardIterator) [with _ForwardIterator = std::basic_string<char>*; bool <anonymous> = false] (struct basic_string * __first, struct basic_string * __last) { struct basic_string * D.17011; struct basic_string * _7; void * _10; <bb 2>: # __first_1 = PHI <__first_3(D)(0), __first_9(3)> _10 = __builtin_return_address (0); __builtin___tsan_func_entry (_10); if (__first_1 == __last_5(D)) goto <bb 4>; else goto <bb 3>; <bb 3>: _7 = std::__addressof<std::basic_string<char> > (__first_1); std::_Destroy<std::basic_string<char> > (_7); __first_9 = __first_1 + 8; goto <bb 2>; <bb 4>: __builtin___tsan_func_exit (); return; } but with r217669 reverted, we get in test.cpp.176t.cplxlower0: static void std::_Destroy_aux<<anonymous> >::__destroy(_ForwardIterator, _ForwardIterator) [with _ForwardIterator = std::basic_string<char>*; bool <anonymous> = false] (struct basic_string * __first, struct basic_string * __last) { struct basic_string * D.17027; struct basic_string * _7; <bb 2>: goto <bb 4>; <bb 3>: _7 = std::__addressof<std::basic_string<char> > (__first_1); std::_Destroy<std::basic_string<char> > (_7); __first_9 = __first_1 + 8; <bb 4>: # __first_1 = PHI <__first_3(D)(2), __first_9(3)> if (__first_1 != __last_5(D)) goto <bb 3>; else goto <bb 5>; <bb 5>: return; } which is transformed to this in test.cpp.178t.tsan0: static void std::_Destroy_aux<<anonymous> >::__destroy(_ForwardIterator, _ForwardIterator) [with _ForwardIterator = std::basic_string<char>*; bool <anonymous> = false] (struct basic_string * __first, struct basic_string * __last) { struct basic_string * D.17027; struct basic_string * _7; void * _10; <bb 2>: _10 = __builtin_return_address (0); __builtin___tsan_func_entry (_10); goto <bb 4>; <bb 3>: _7 = std::__addressof<std::basic_string<char> > (__first_1); std::_Destroy<std::basic_string<char> > (_7); __first_9 = __first_1 + 8; <bb 4>: # __first_1 = PHI <__first_3(D)(2), __first_9(3)> if (__first_1 != __last_5(D)) goto <bb 3>; else goto <bb 5>; <bb 5>: __builtin___tsan_func_exit (); return; }
So it looks like tsan wants to instrument the function entry edge but instead instruments the first basic block without considering backedges. Latent TSAN bug.
static void instrument_func_entry (void) { basic_block succ_bb; gimple_stmt_iterator gsi; tree ret_addr, builtin_decl; gimple g; succ_bb = single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun)); gsi = gsi_after_labels (succ_bb); indeed. It should do succ_bb = split_edge (single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun))); gsi = gsi_after_labels (succ_bb); instead for example. Or use gsi_insert_on_edge_immediate () and insert on the edge (that avoids splitting the edge if not necessary).
and now I see that his example is mis-compiled too: cat test1.cpp int test1(int x) { abc: x=x+1; __builtin_printf("Test %d\n", x); if (x<9) goto abc; return 0; } is transformed to this in test1.cpp.178t.tsan0: int test1(int) (int x) { int D.2636; int _7; void * _8; # x_1 = PHI <x_3(D)(0), x_5(3)> abc: _8 = __builtin_return_address (0); __builtin___tsan_func_entry (_8); x_5 = x_1 + 1; __builtin_printf ("Test %d\n", x_5); if (x_5 <= 8) goto <bb 3>; else goto <bb 4>; <bb 3>: goto <bb 2> (abc); <bb 4>: _7 = 0; <L3>: __builtin___tsan_func_exit (); return _7; } with or without r217669, and also if test1.cpp is renamed to test1.c !
Aehm, and if the function throws, the __tsan_func_exit is not called either: cat test2.cpp struct my_class { my_class(){} }; int test1(int x) throw(my_class) { throw my_class(); return x; } int main() { for (int i=0; i<10000000; i++) { try { test1(i); } catch (my_class) { } } return 0; } g++ -g -fsanitize=thread test2.cpp ./a.out => here we have another memory leak.
Seems there are more such spots that insert stmts at gsi_after_labels of single_succ of entry block - e.g. ipa-split.c, omp-low.c, tree-inline.c, tree-into-ssa.c, tree-profile.c, tree-ssa-reassoc.c at least. I'll take care of tsan.c.
Note, I don't see any kind of memory leak on any of the testcases. Sure, calling __tsan_func_entry many times is of course wrong. As for #c5, clang doesn't call __tsan_func_exit in that case either. Dmitry? If we were to call it even for exceptions, I'm afraid expanding this in tsan pass is too late, we'd need to add the __tsan_func_exit call say during gimplification as a cleanup of the whole body and then EH code would take care of adding the needed landing pads etc. But libtsan e.g. wraps longjmp and pops frames in there, not sure if it doesn't do something similar for exceptions already.
Exceptions are currently unsupported by tsan. Yes, we can do either what we do in longjmp if it's possible to figure out the landing frame in runtime, or add __tsan_func_exit to cleanup statements for each function in compiler (obviously simpler for runtime, but more complex for compiler). I don't know what is simpler and what is exceptions ABI. Is it possible to do what we do for longjmp for exceptions? There is an issue for this in tsan tracker: https://code.google.com/p/thread-sanitizer/issues/detail?id=78
Doing it in gimplify_function_tree is pretty straightforward, after all, we already have there code to handle if (flag_instrument_function_entry_exit && !DECL_NO_INSTRUMENT_FUNCTION_ENTRY_EXIT (fndecl) && !flag_instrument_functions_exclude_p (fndecl)) which does very similar thing - on entry add a call to one function with address of current function and __builtin_return_address (0), on exit (including exit through exceptions) another call with the same arguments. So, the question is just if you want to do it that way...
Created attachment 34270 [details] gcc5-pr64265.patch Untested patch to fix just the func entry issue.
>Doing it in gimplify_function_tree is pretty straightforward That's good! >So, the question is just if you want to do it that way... Kostya, can you say anything about llvm? On the tsan issue you said: "We'll need a kind of RAII for tsan entry/exit hooks. When we are adding tsan instrumentation, we need to create a fake class object with a ctor and dtor." Which suggests that you wanted to do it in a similar way in llvm. If we decide to it this way in both compilers, then no support in runtime is required, and gcc can well implement it ahead of llvm.
Created attachment 34271 [details] gcc5-pr64265-2.patch Incremental patch to handle the exceptions, completely untested (don't have spare cycles for that right now), can just throw it into bootstrap/regtest. The only complication has been in the optimization that we actually don't want any __tsan_func_{entry,exit} calls if there are no memory accesses in the function.
> ... we actually don't want any __tsan_func_{entry,exit} calls if there are no memory accesses in the function... ... and no calls to other functions, because these functions can contain memory accesses and tsan needs func_entry/exit to maintain stack traces.
(In reply to Jakub Jelinek from comment #7) > Note, I don't see any kind of memory leak on any of the testcases. > Sure, calling __tsan_func_entry many times is of course wrong. > As for #c5, clang doesn't call __tsan_func_exit in that case either. Dmitry? > If we were to call it even for exceptions, I'm afraid expanding this in tsan > pass is too late, we'd need to add the __tsan_func_exit call say during > gimplification as a cleanup of the whole body and then EH code would take > care of adding the needed landing pads etc. > But libtsan e.g. wraps longjmp and pops frames in there, not sure if it > doesn't do something similar for exceptions already. Hi Jakub, __tsan_func_entry pushes a few bytes on a call stack heap, and __tsan_func_exit pops these again. Therefore it is absolotely necessary to call these functions in pairs. If I run the a.out from the test cases, and I have the system monitor in the background, I can see my 8GB of memory quickly used up, and then my linux starts to page a lot so that it is hardly possible to press CTRL-C. There may of course also be a SIGSEGV in __tsan_func_entry when the heap finally overflows. Bernd.
I've been running the tests for quite a while and RSS didn't increase in top at all. As for "and no calls to other functions", sure, I haven't changed anything on that logic.
> Kostya, can you say anything about llvm? On the tsan issue you said: > "We'll need a kind of RAII for tsan entry/exit hooks. When we are adding > tsan instrumentation, we need to create a fake class object with a ctor and > dtor." I am still pretty confident that this is the only viable solution (the fix should be done in Clang, not LLVM). I did not try to actually implement it yet. > > Which suggests that you wanted to do it in a similar way in llvm. > > If we decide to it this way in both compilers, then no support in runtime is > required, and gcc can well implement it ahead of llvm. Absolutely.
Great. Jakub, then you can go for gcc support whenever you have time. It's not super priority as we managed to live without exceptions support so far.
(In reply to Jakub Jelinek from comment #15) > I've been running the tests for quite a while and RSS didn't increase in top > at all. > > As for "and no calls to other functions", sure, I haven't changed anything > on that logic. Interesting, I dont quite understand how that can be. I have seen all the time either the application crashed in __tsan_func_entry or the computer crashed because it ran out of memory too quickly. With O/S ubuntu 12.04 and ubuntu 14.04. X86_64 of course. The gcc was just the current trunk, with no special configure options. Just g++ --g -fsanitize=thread test.cpp Is there a way to limit the call stack depth in tsan that I am not aware of?
oh, I see now, in tsan/tsan_rtl.cc // Shadow stack maintenance can be replaced with // stack unwinding during trace switch (which presumably must be faster). DCHECK_GE(thr->shadow_stack_pos, thr->shadow_stack); #ifndef TSAN_GO DCHECK_LT(thr->shadow_stack_pos, thr->shadow_stack_end); #else if (thr->shadow_stack_pos == thr->shadow_stack_end) GrowShadowStack(thr); #endif thr->shadow_stack_pos[0] = pc; thr->shadow_stack_pos++; I usually build all languages, inclusive go, just for curiosity. And maybe that defines TSAN_GO ?
No, TSAN_GO is not defined for C/C++ tsan. It's only for race detector for Go language.
FYI, the #c12 patch needs more work, in particular the inliner probably has to drop the TSAN_FUNC_EXIT () internal calls, otherwise after inlining there can be multiple of them which is undesirable, as tsan supposedly doesn't care about inline functions. And on the other side, when e.g. OpenMP outlines some SESE region into a new function, we probably need to add TSAN_FUNC_EXIT () there.
Author: jakub Date: Mon Dec 15 09:37:47 2014 New Revision: 218734 URL: https://gcc.gnu.org/viewcvs?rev=218734&root=gcc&view=rev Log: PR sanitizer/64265 * tsan.c (instrument_func_entry): Insert __tsan_func_entry call on edge from entry block to single succ instead of after labels of single succ of entry block. Modified: trunk/gcc/ChangeLog trunk/gcc/tsan.c
Author: jakub Date: Mon Dec 15 09:46:21 2014 New Revision: 218735 URL: https://gcc.gnu.org/viewcvs?rev=218735&root=gcc&view=rev Log: PR sanitizer/64265 * tsan.c (instrument_func_entry): Insert __tsan_func_entry call on edge from entry block to single succ instead of after labels of single succ of entry block. Modified: branches/gcc-4_9-branch/gcc/ChangeLog branches/gcc-4_9-branch/gcc/tsan.c
Author: jakub Date: Mon Dec 15 09:50:11 2014 New Revision: 218736 URL: https://gcc.gnu.org/viewcvs?rev=218736&root=gcc&view=rev Log: PR sanitizer/64265 * tsan.c (instrument_func_entry): Insert __tsan_func_entry call on edge from entry block to single succ instead of after labels of single succ of entry block. Modified: branches/gcc-4_8-branch/gcc/ChangeLog branches/gcc-4_8-branch/gcc/tsan.c
The regression is fixed. For the EH support, patch has been posted, but that is not a fix for a regression, but enhancement.
Author: jakub Date: Mon Jan 5 21:47:51 2015 New Revision: 219202 URL: https://gcc.gnu.org/viewcvs?rev=219202&root=gcc&view=rev Log: PR sanitizer/64265 * gimplify.c (gimplify_function_tree): Add TSAN_FUNC_EXIT internal call as cleanup of the whole body. * internal-fn.def (TSAN_FUNC_EXIT): New internal call. * tsan.c (replace_func_exit): New function. (instrument_func_exit): Moved earlier. (instrument_memory_accesses): Adjust TSAN_FUNC_EXIT internal calls. Call instrument_func_exit if no TSAN_FUNC_EXIT internal calls have been found. (tsan_pass): Don't call instrument_func_exit. * internal-fn.c (expand_TSAN_FUNC_EXIT): New function. * tree-inline.c (copy_bb): Drop TSAN_FUNC_EXIT internal calls during inlining. Modified: trunk/gcc/ChangeLog trunk/gcc/gimplify.c trunk/gcc/internal-fn.c trunk/gcc/internal-fn.def trunk/gcc/tree-inline.c trunk/gcc/tsan.c
Author: jakub Date: Thu Mar 19 07:55:22 2015 New Revision: 221509 URL: https://gcc.gnu.org/viewcvs?rev=221509&root=gcc&view=rev Log: PR sanitizer/64265 * g++.dg/tsan/pr64265.C: New test. Added: trunk/gcc/testsuite/g++.dg/tsan/pr64265.C Modified: trunk/gcc/testsuite/ChangeLog