Bug 64265 - [5 Regression] r217669 broke tsan
Summary: [5 Regression] r217669 broke tsan
Status: RESOLVED FIXED
Alias: None
Product: gcc
Classification: Unclassified
Component: sanitizer (show other bugs)
Version: 5.0
: P3 normal
Target Milestone: 5.0
Assignee: Not yet assigned to anyone
URL:
Keywords:
Depends on:
Blocks:
 
Reported: 2014-12-11 11:47 UTC by Bernd Edlinger
Modified: 2015-03-19 07:55 UTC (History)
5 users (show)

See Also:
Host:
Target:
Build:
Known to work:
Known to fail:
Last reconfirmed: 2014-12-12 00:00:00


Attachments
test case (149 bytes, text/x-c++src)
2014-12-11 11:47 UTC, Bernd Edlinger
Details
gcc5-pr64265.patch (685 bytes, patch)
2014-12-12 17:42 UTC, Jakub Jelinek
Details | Diff
gcc5-pr64265-2.patch (1.97 KB, patch)
2014-12-12 18:40 UTC, Jakub Jelinek
Details | Diff

Note You need to log in before you can comment on or make changes to this bug.
Description Bernd Edlinger 2014-12-11 11:47:46 UTC
Created attachment 34249 [details]
test case

starting with r217669 tsan instrumentation gets wrong code:

g++ -g -fsanitize=thread test.cpp

./a.out

=> soaks all memory up.

reason is this function calls __tsan_func_entry in a loop:

_ZNSt12_Destroy_auxILb0EE9__destroyIN9__gnu_cxx17__normal_iteratorIPSsSt6vectorISsSaISsEEEEEEvT_S9_:
.LFB982:
        .loc 5 100 0
        .cfi_startproc
        pushq   %rbp
        .cfi_def_cfa_offset 16
        .cfi_offset 6, -16
        movq    %rsp, %rbp
        .cfi_def_cfa_register 6
        subq    $32, %rsp
        movq    %rdi, -16(%rbp)
        movq    %rsi, -32(%rbp)
.L153:
        movq    8(%rbp), %rax
        movq    %rax, %rdi
        call    __tsan_func_entry
        .loc 5 102 0 discriminator 2
        leaq    -32(%rbp), %rdx
        leaq    -16(%rbp), %rax
        movq    %rdx, %rsi
        movq    %rax, %rdi
        call    _ZN9__gnu_cxxneIPSsSt6vectorISsSaISsEEEEbRKNS_17__normal_iteratorIT_T0_EESA_
        testb   %al, %al
        je      .L152
        .loc 5 103 0 discriminator 1
        leaq    -16(%rbp), %rax
        movq    %rax, %rdi
        call    _ZNK9__gnu_cxx17__normal_iteratorIPSsSt6vectorISsSaISsEEEdeEv
        movq    %rax, %rdi
        call    _ZSt11__addressofISsEPT_RS0_
        movq    %rax, %rdi
        call    _ZSt8_DestroyISsEvPT_
        .loc 5 102 0 discriminator 1
        leaq    -16(%rbp), %rax
        movq    %rax, %rdi
        call    _ZN9__gnu_cxx17__normal_iteratorIPSsSt6vectorISsSaISsEEEppEv
        jmp     .L153
.L152:
        .loc 5 104 0
        call    __tsan_func_exit
        leave
        .cfi_def_cfa 7, 8
        ret
        .cfi_endproc


it is generated from bits/stl_container.h, line 95-105:

template<bool>
  struct _Destroy_aux
  {
    template<typename _ForwardIterator>
      static void
      __destroy(_ForwardIterator __first, _ForwardIterator __last)
      {
        for (; __first != __last; ++__first)
          std::_Destroy(std::__addressof(*__first));
      }
  };
Comment 1 Bernd Edlinger 2014-12-12 13:31:41 UTC
with current trunk we get this in test.cpp.176t.cplxlower0:

static void std::_Destroy_aux<<anonymous> >::__destroy(_ForwardIterator, _ForwardIterator) [with _ForwardIterator = std::basic_string<char>*; bool <anonymous> = false] (struct basic_string * __first, struct basic_string * __last)
{
  struct basic_string * D.17011;
  struct basic_string * _7;

  <bb 2>:
  # __first_1 = PHI <__first_3(D)(0), __first_9(3)>
  if (__first_1 == __last_5(D))
    goto <bb 4>;
  else
    goto <bb 3>;

  <bb 3>:
  _7 = std::__addressof<std::basic_string<char> > (__first_1);
  std::_Destroy<std::basic_string<char> > (_7);
  __first_9 = __first_1 + 8;
  goto <bb 2>;

  <bb 4>:
  return;

which is tranformed to this in test.cpp.178t.tsan0:

static void std::_Destroy_aux<<anonymous> >::__destroy(_ForwardIterator, _ForwardIterator) [with _ForwardIterator = std::basic_string<char>*; bool <anonymous> = false] (struct basic_string * __first, struct basic_string * __last)
{
  struct basic_string * D.17011;
  struct basic_string * _7;
  void * _10;

  <bb 2>:
  # __first_1 = PHI <__first_3(D)(0), __first_9(3)>
  _10 = __builtin_return_address (0);
  __builtin___tsan_func_entry (_10);
  if (__first_1 == __last_5(D))
    goto <bb 4>;
  else
    goto <bb 3>;

  <bb 3>:
  _7 = std::__addressof<std::basic_string<char> > (__first_1);
  std::_Destroy<std::basic_string<char> > (_7);
  __first_9 = __first_1 + 8;
  goto <bb 2>;

  <bb 4>:
  __builtin___tsan_func_exit ();
  return;

}


but with r217669 reverted, we get in test.cpp.176t.cplxlower0:

static void std::_Destroy_aux<<anonymous> >::__destroy(_ForwardIterator, _ForwardIterator) [with _ForwardIterator = std::basic_string<char>*; bool <anonymous> = false] (struct basic_string * __first, struct basic_string * __last)
{
  struct basic_string * D.17027;
  struct basic_string * _7;

  <bb 2>:
  goto <bb 4>;

  <bb 3>:
  _7 = std::__addressof<std::basic_string<char> > (__first_1);
  std::_Destroy<std::basic_string<char> > (_7);
  __first_9 = __first_1 + 8;

  <bb 4>:
  # __first_1 = PHI <__first_3(D)(2), __first_9(3)>
  if (__first_1 != __last_5(D))
    goto <bb 3>;
  else
    goto <bb 5>;

  <bb 5>:
  return;

}

which is transformed to this in test.cpp.178t.tsan0:

static void std::_Destroy_aux<<anonymous> >::__destroy(_ForwardIterator, _ForwardIterator) [with _ForwardIterator = std::basic_string<char>*; bool <anonymous> = false] (struct basic_string * __first, struct basic_string * __last)
{
  struct basic_string * D.17027;
  struct basic_string * _7;
  void * _10;

  <bb 2>:
  _10 = __builtin_return_address (0);
  __builtin___tsan_func_entry (_10);
  goto <bb 4>;

  <bb 3>:
  _7 = std::__addressof<std::basic_string<char> > (__first_1);
  std::_Destroy<std::basic_string<char> > (_7);
  __first_9 = __first_1 + 8;

  <bb 4>:
  # __first_1 = PHI <__first_3(D)(2), __first_9(3)>
  if (__first_1 != __last_5(D))
    goto <bb 3>;
  else
    goto <bb 5>;

  <bb 5>:
  __builtin___tsan_func_exit ();
  return;

}
Comment 2 Richard Biener 2014-12-12 13:33:27 UTC
So it looks like tsan wants to instrument the function entry edge but instead
instruments the first basic block without considering backedges.

Latent TSAN bug.
Comment 3 Richard Biener 2014-12-12 13:36:26 UTC
static void
instrument_func_entry (void)
{
  basic_block succ_bb;
  gimple_stmt_iterator gsi;
  tree ret_addr, builtin_decl;
  gimple g;

  succ_bb = single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun));
  gsi = gsi_after_labels (succ_bb);

indeed.

It should do

  succ_bb = split_edge (single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
  gsi = gsi_after_labels (succ_bb);

instead for example.  Or use gsi_insert_on_edge_immediate () and insert
on the edge (that avoids splitting the edge if not necessary).
Comment 4 Bernd Edlinger 2014-12-12 13:39:06 UTC
and now I see that his example is mis-compiled too:

cat test1.cpp
int test1(int x)
{
abc:
  x=x+1;
  __builtin_printf("Test %d\n", x);
  if (x<9)
    goto abc;
  return 0;
}


is transformed to this in test1.cpp.178t.tsan0:

int test1(int) (int x)
{
  int D.2636;
  int _7;
  void * _8;

  # x_1 = PHI <x_3(D)(0), x_5(3)>
abc:
  _8 = __builtin_return_address (0);
  __builtin___tsan_func_entry (_8);
  x_5 = x_1 + 1;
  __builtin_printf ("Test %d\n", x_5);
  if (x_5 <= 8)
    goto <bb 3>;
  else
    goto <bb 4>;

  <bb 3>:
  goto <bb 2> (abc);

  <bb 4>:
  _7 = 0;

<L3>:
  __builtin___tsan_func_exit ();
  return _7;

}

with or without r217669, and also if test1.cpp is renamed to test1.c !
Comment 5 Bernd Edlinger 2014-12-12 15:58:20 UTC
Aehm, and if the function throws, the __tsan_func_exit is not
called either:

cat test2.cpp
struct my_class
{
  my_class(){}
};

int test1(int x) throw(my_class)
{
  throw my_class();
  return x;
}


int
main()
{
  for (int i=0; i<10000000; i++)
  {
    try
    {
      test1(i);
    }
    catch (my_class)
    {
    }
  }
  return 0;
}


g++ -g -fsanitize=thread test2.cpp

./a.out

=> here we have another memory leak.
Comment 6 Jakub Jelinek 2014-12-12 16:51:21 UTC
Seems there are more such spots that insert stmts at gsi_after_labels of single_succ of entry block - e.g. ipa-split.c, omp-low.c, tree-inline.c, tree-into-ssa.c, tree-profile.c, tree-ssa-reassoc.c at least.

I'll take care of tsan.c.
Comment 7 Jakub Jelinek 2014-12-12 17:22:42 UTC
Note, I don't see any kind of memory leak on any of the testcases.
Sure, calling __tsan_func_entry many times is of course wrong.
As for #c5, clang doesn't call __tsan_func_exit in that case either.  Dmitry?
If we were to call it even for exceptions, I'm afraid expanding this in tsan pass is too late, we'd need to add the __tsan_func_exit call say during gimplification as a cleanup of the whole body and then EH code would take care of adding the needed landing pads etc.
But libtsan e.g. wraps longjmp and pops frames in there, not sure if it doesn't do something similar for exceptions already.
Comment 8 Dmitry Vyukov 2014-12-12 17:28:25 UTC
Exceptions are currently unsupported by tsan.
Yes, we can do either what we do in longjmp if it's possible to figure out the landing frame in runtime, or add __tsan_func_exit to cleanup statements for each function in compiler (obviously simpler for runtime, but more complex for compiler).
I don't know what is simpler and what is exceptions ABI. Is it possible to do what we do for longjmp for exceptions?
There is an issue for this in tsan tracker:
https://code.google.com/p/thread-sanitizer/issues/detail?id=78
Comment 9 Jakub Jelinek 2014-12-12 17:39:46 UTC
Doing it in gimplify_function_tree is pretty straightforward, after all, we already have there code to handle
  if (flag_instrument_function_entry_exit
      && !DECL_NO_INSTRUMENT_FUNCTION_ENTRY_EXIT (fndecl)
      && !flag_instrument_functions_exclude_p (fndecl))
which does very similar thing - on entry add a call to one function with address of current function and __builtin_return_address (0), on exit (including exit through exceptions) another call with the same arguments.

So, the question is just if you want to do it that way...
Comment 10 Jakub Jelinek 2014-12-12 17:42:32 UTC
Created attachment 34270 [details]
gcc5-pr64265.patch

Untested patch to fix just the func entry issue.
Comment 11 Dmitry Vyukov 2014-12-12 17:52:53 UTC
>Doing it in gimplify_function_tree is pretty straightforward

That's good!

>So, the question is just if you want to do it that way...

Kostya, can you say anything about llvm? On the tsan issue you said:
"We'll need a kind of RAII for tsan entry/exit hooks. When we are adding tsan instrumentation, we need to create a fake class object with a ctor and dtor."

Which suggests that you wanted to do it in a similar way in llvm.

If we decide to it this way in both compilers, then no support in runtime is required, and gcc can well implement it ahead of llvm.
Comment 12 Jakub Jelinek 2014-12-12 18:40:54 UTC
Created attachment 34271 [details]
gcc5-pr64265-2.patch

Incremental patch to handle the exceptions, completely untested (don't have spare cycles for that right now), can just throw it into bootstrap/regtest.
The only complication has been in the optimization that we actually don't want any __tsan_func_{entry,exit} calls if there are no memory accesses in the function.
Comment 13 Dmitry Vyukov 2014-12-12 18:48:13 UTC
> ... we actually don't want any __tsan_func_{entry,exit} calls if there are no memory accesses in the function...

... and no calls to other functions, because these functions can contain memory accesses and tsan needs func_entry/exit to maintain stack traces.
Comment 14 Bernd Edlinger 2014-12-12 18:58:22 UTC
(In reply to Jakub Jelinek from comment #7)
> Note, I don't see any kind of memory leak on any of the testcases.
> Sure, calling __tsan_func_entry many times is of course wrong.
> As for #c5, clang doesn't call __tsan_func_exit in that case either.  Dmitry?
> If we were to call it even for exceptions, I'm afraid expanding this in tsan
> pass is too late, we'd need to add the __tsan_func_exit call say during
> gimplification as a cleanup of the whole body and then EH code would take
> care of adding the needed landing pads etc.
> But libtsan e.g. wraps longjmp and pops frames in there, not sure if it
> doesn't do something similar for exceptions already.

Hi Jakub,

__tsan_func_entry pushes a few bytes on a call stack heap,
and __tsan_func_exit pops these again. Therefore it is absolotely
necessary to call these functions in pairs.

If I run the a.out from the test cases, and I have the system monitor
in the background, I can see my 8GB of memory quickly used up,
and then my linux starts to page a lot so that it is hardly possible
to press CTRL-C.

There may of course also be a SIGSEGV in __tsan_func_entry when the
heap finally overflows.

Bernd.
Comment 15 Jakub Jelinek 2014-12-12 19:13:42 UTC
I've been running the tests for quite a while and RSS didn't increase in top at all.

As for "and no calls to other functions", sure, I haven't changed anything on that logic.
Comment 16 Kostya Serebryany 2014-12-12 19:22:04 UTC
> Kostya, can you say anything about llvm? On the tsan issue you said:
> "We'll need a kind of RAII for tsan entry/exit hooks. When we are adding
> tsan instrumentation, we need to create a fake class object with a ctor and
> dtor."

I am still pretty confident that this is the only viable solution
(the fix should be done in Clang, not LLVM).
I did not try to actually implement it yet. 

> 
> Which suggests that you wanted to do it in a similar way in llvm.
> 
> If we decide to it this way in both compilers, then no support in runtime is
> required, and gcc can well implement it ahead of llvm.

Absolutely.
Comment 17 Dmitry Vyukov 2014-12-12 19:31:05 UTC
Great.

Jakub, then you can go for gcc support whenever you have time. It's not super priority as we managed to live without exceptions support so far.
Comment 18 Bernd Edlinger 2014-12-12 19:35:56 UTC
(In reply to Jakub Jelinek from comment #15)
> I've been running the tests for quite a while and RSS didn't increase in top
> at all.
> 
> As for "and no calls to other functions", sure, I haven't changed anything
> on that logic.

Interesting, I dont quite understand how that can be.

I have seen all the time either the application crashed in __tsan_func_entry
or the computer crashed because it ran out of memory too quickly.

With O/S ubuntu 12.04 and ubuntu 14.04.  X86_64 of course.

The gcc was just the current trunk, with no special configure options.
Just g++ --g -fsanitize=thread test.cpp

Is there a way to limit the call stack depth in tsan that I am not aware of?
Comment 19 Bernd Edlinger 2014-12-12 20:18:34 UTC
oh, I see now, in tsan/tsan_rtl.cc

  // Shadow stack maintenance can be replaced with
  // stack unwinding during trace switch (which presumably must be faster).
  DCHECK_GE(thr->shadow_stack_pos, thr->shadow_stack);
#ifndef TSAN_GO
  DCHECK_LT(thr->shadow_stack_pos, thr->shadow_stack_end);
#else
  if (thr->shadow_stack_pos == thr->shadow_stack_end)
    GrowShadowStack(thr);
#endif
  thr->shadow_stack_pos[0] = pc;
  thr->shadow_stack_pos++;

I usually build all languages, inclusive go, just for curiosity.
And maybe that defines TSAN_GO ?
Comment 20 Dmitry Vyukov 2014-12-12 20:26:05 UTC
No, TSAN_GO is not defined for C/C++ tsan. It's only for race detector for Go language.
Comment 21 Jakub Jelinek 2014-12-12 21:23:02 UTC
FYI, the #c12 patch needs more work, in particular the inliner probably has to drop the TSAN_FUNC_EXIT () internal calls, otherwise after inlining there can be multiple of them which is undesirable, as tsan supposedly doesn't care about inline functions.  And on the other side, when e.g. OpenMP outlines some SESE region into a new function, we probably need to add TSAN_FUNC_EXIT () there.
Comment 22 Jakub Jelinek 2014-12-15 09:38:19 UTC
Author: jakub
Date: Mon Dec 15 09:37:47 2014
New Revision: 218734

URL: https://gcc.gnu.org/viewcvs?rev=218734&root=gcc&view=rev
Log:
	PR sanitizer/64265
	* tsan.c (instrument_func_entry): Insert __tsan_func_entry
	call on edge from entry block to single succ instead
	of after labels of single succ of entry block.

Modified:
    trunk/gcc/ChangeLog
    trunk/gcc/tsan.c
Comment 23 Jakub Jelinek 2014-12-15 09:46:53 UTC
Author: jakub
Date: Mon Dec 15 09:46:21 2014
New Revision: 218735

URL: https://gcc.gnu.org/viewcvs?rev=218735&root=gcc&view=rev
Log:
	PR sanitizer/64265
	* tsan.c (instrument_func_entry): Insert __tsan_func_entry
	call on edge from entry block to single succ instead
	of after labels of single succ of entry block.

Modified:
    branches/gcc-4_9-branch/gcc/ChangeLog
    branches/gcc-4_9-branch/gcc/tsan.c
Comment 24 Jakub Jelinek 2014-12-15 09:50:42 UTC
Author: jakub
Date: Mon Dec 15 09:50:11 2014
New Revision: 218736

URL: https://gcc.gnu.org/viewcvs?rev=218736&root=gcc&view=rev
Log:
	PR sanitizer/64265
	* tsan.c (instrument_func_entry): Insert __tsan_func_entry
	call on edge from entry block to single succ instead
	of after labels of single succ of entry block.

Modified:
    branches/gcc-4_8-branch/gcc/ChangeLog
    branches/gcc-4_8-branch/gcc/tsan.c
Comment 25 Jakub Jelinek 2014-12-16 12:04:33 UTC
The regression is fixed.  For the EH support, patch has been posted, but that is not a fix for a regression, but enhancement.
Comment 26 Jakub Jelinek 2015-01-05 21:48:23 UTC
Author: jakub
Date: Mon Jan  5 21:47:51 2015
New Revision: 219202

URL: https://gcc.gnu.org/viewcvs?rev=219202&root=gcc&view=rev
Log:
	PR sanitizer/64265
	* gimplify.c (gimplify_function_tree): Add TSAN_FUNC_EXIT internal
	call as cleanup of the whole body.
	* internal-fn.def (TSAN_FUNC_EXIT): New internal call.
	* tsan.c (replace_func_exit): New function.
	(instrument_func_exit): Moved earlier.
	(instrument_memory_accesses): Adjust TSAN_FUNC_EXIT internal calls.
	Call instrument_func_exit if no TSAN_FUNC_EXIT internal calls have
	been found.
	(tsan_pass): Don't call instrument_func_exit.
	* internal-fn.c (expand_TSAN_FUNC_EXIT): New function.
	* tree-inline.c (copy_bb): Drop TSAN_FUNC_EXIT internal calls during
	inlining.

Modified:
    trunk/gcc/ChangeLog
    trunk/gcc/gimplify.c
    trunk/gcc/internal-fn.c
    trunk/gcc/internal-fn.def
    trunk/gcc/tree-inline.c
    trunk/gcc/tsan.c
Comment 27 Jakub Jelinek 2015-03-19 07:55:54 UTC
Author: jakub
Date: Thu Mar 19 07:55:22 2015
New Revision: 221509

URL: https://gcc.gnu.org/viewcvs?rev=221509&root=gcc&view=rev
Log:
	PR sanitizer/64265
	* g++.dg/tsan/pr64265.C: New test.

Added:
    trunk/gcc/testsuite/g++.dg/tsan/pr64265.C
Modified:
    trunk/gcc/testsuite/ChangeLog