The thread sanitizer says that there is a data race during regex construction. Here a simple test: #include <regex> #include <string> #include <thread> static const std::string test_string = "aiusndougbafiudboihvboauvhbadnofhdbvouahvebfdocsahkbnavoiekrhfdsbvjqon"; static constexpr unsigned n_threads = 30; void test() { std::regex re = std::regex("[abg][jtd]"); std::regex_match(test_string, re); } int main() { std::vector<std::thread> threads; threads.reserve(n_threads); for(unsigned i = 0; i < n_threads; ++i) threads.emplace_back(test); for(unsigned i = 0; i < n_threads; ++i) threads[i].join(); } Compiled with GCC 6.2.1, using g++ -Wall -Wextra -O3 test.cpp -o test -g -pthread -fsanitize=thread Running the test gives these "results": ================== WARNING: ThreadSanitizer: data race (pid=14501) Read of size 1 at 0x7fc2e581b1f4 by thread T2: #0 std::ctype<char>::narrow(char, char) const /usr/include/c++/6.2.1/bits/locale_facets.h:932 (test+0x0000004064f4) #1 std::__detail::_Scanner<char>::_M_scan_normal() /usr/include/c++/6.2.1/bits/regex_scanner.tcc:101 (test+0x0000004064f4) #2 std::__detail::_Scanner<char>::_M_advance() /usr/include/c++/6.2.1/bits/regex_scanner.tcc:80 (test+0x0000004083af) #3 std::__detail::_Scanner<char>::_Scanner(char const*, char const*, std::regex_constants::syntax_option_type, std::locale) /usr/include/c++/6.2.1/bits/regex_scanner.tcc:66 (test+0x0000004083af) #4 std::__detail::_Compiler<std::__cxx11::regex_traits<char> >::_Compiler(char const*, char const*, std::locale const&, std::regex_constants::syntax_option_type) /usr/include/c++/6.2.1/bits/regex_compiler.tcc:78 (test+0x000000422b8a) #5 std::enable_if<std::__detail::__is_contiguous_normal_iter<char const*>::value, std::shared_ptr<std::__detail::_NFA<std::__cxx11::regex_traits<char> > const> >::type std::__detail::__compile_nfa<char const*, std::__cxx11::regex_traits<char> >(char const*, char const*, std::__cxx11::regex_traits<char>::locale_type const&, std::regex_constants::syntax_option_type) /usr/include/c++/6.2.1/bits/regex_compiler.h:194 (test+0x00000042379f) #6 std::__cxx11::basic_regex<char, std::__cxx11::regex_traits<char> >::basic_regex<char const*>(char const*, char const*, std::locale, std::regex_constants::syntax_option_type) /usr/include/c++/6.2.1/bits/regex.h:767 (test+0x0000004034df) #7 std::__cxx11::basic_regex<char, std::__cxx11::regex_traits<char> >::basic_regex<char const*>(char const*, char const*, std::regex_constants::syntax_option_type) /usr/include/c++/6.2.1/bits/regex.h:512 (test+0x0000004034df) #8 std::__cxx11::basic_regex<char, std::__cxx11::regex_traits<char> >::basic_regex(char const*, std::regex_constants::syntax_option_type) /usr/include/c++/6.2.1/bits/regex.h:445 (test+0x0000004034df) #9 test() /tmp/test3.cpp:10 (test+0x0000004034df) #10 void std::_Bind_simple<void (*())()>::_M_invoke<>(std::_Index_tuple<>) /usr/include/c++/6.2.1/functional:1400 (test+0x000000403fc9) #11 std::_Bind_simple<void (*())()>::operator()() /usr/include/c++/6.2.1/functional:1389 (test+0x000000403fc9) #12 std::thread::_State_impl<std::_Bind_simple<void (*())()> >::_M_run() /usr/include/c++/6.2.1/thread:196 (test+0x000000403fc9) #13 execute_native_thread_routine /build/gcc-multilib/src/gcc/libstdc++-v3/src/c++11/thread.cc:83 (libstdc++.so.6+0x0000000bb31e) Previous write of size 1 at 0x7fc2e581b1f4 by thread T1: #0 std::ctype<char>::narrow(char, char) const /usr/include/c++/6.2.1/bits/locale_facets.h:936 (test+0x00000040665d) #1 std::__detail::_Scanner<char>::_M_scan_normal() /usr/include/c++/6.2.1/bits/regex_scanner.tcc:101 (test+0x00000040665d) #2 std::__detail::_Scanner<char>::_M_advance() /usr/include/c++/6.2.1/bits/regex_scanner.tcc:80 (test+0x0000004083af) #3 std::__detail::_Scanner<char>::_Scanner(char const*, char const*, std::regex_constants::syntax_option_type, std::locale) /usr/include/c++/6.2.1/bits/regex_scanner.tcc:66 (test+0x0000004083af) #4 std::__detail::_Compiler<std::__cxx11::regex_traits<char> >::_Compiler(char const*, char const*, std::locale const&, std::regex_constants::syntax_option_type) /usr/include/c++/6.2.1/bits/regex_compiler.tcc:78 (test+0x000000422b8a) #5 std::enable_if<std::__detail::__is_contiguous_normal_iter<char const*>::value, std::shared_ptr<std::__detail::_NFA<std::__cxx11::regex_traits<char> > const> >::type std::__detail::__compile_nfa<char const*, std::__cxx11::regex_traits<char> >(char const*, char const*, std::__cxx11::regex_traits<char>::locale_type const&, std::regex_constants::syntax_option_type) /usr/include/c++/6.2.1/bits/regex_compiler.h:194 (test+0x00000042379f) #6 std::__cxx11::basic_regex<char, std::__cxx11::regex_traits<char> >::basic_regex<char const*>(char const*, char const*, std::locale, std::regex_constants::syntax_option_type) /usr/include/c++/6.2.1/bits/regex.h:767 (test+0x0000004034df) #7 std::__cxx11::basic_regex<char, std::__cxx11::regex_traits<char> >::basic_regex<char const*>(char const*, char const*, std::regex_constants::syntax_option_type) /usr/include/c++/6.2.1/bits/regex.h:512 (test+0x0000004034df) #8 std::__cxx11::basic_regex<char, std::__cxx11::regex_traits<char> >::basic_regex(char const*, std::regex_constants::syntax_option_type) /usr/include/c++/6.2.1/bits/regex.h:445 (test+0x0000004034df) #9 test() /tmp/test3.cpp:10 (test+0x0000004034df) #10 void std::_Bind_simple<void (*())()>::_M_invoke<>(std::_Index_tuple<>) /usr/include/c++/6.2.1/functional:1400 (test+0x000000403fc9) #11 std::_Bind_simple<void (*())()>::operator()() /usr/include/c++/6.2.1/functional:1389 (test+0x000000403fc9) #12 std::thread::_State_impl<std::_Bind_simple<void (*())()> >::_M_run() /usr/include/c++/6.2.1/thread:196 (test+0x000000403fc9) #13 execute_native_thread_routine /build/gcc-multilib/src/gcc/libstdc++-v3/src/c++11/thread.cc:83 (libstdc++.so.6+0x0000000bb31e) Location is global '(anonymous namespace)::ctype_c' of size 576 at 0x7fc2e581b060 (libstdc++.so.6+0x0000003861f4) Thread T2 (tid=14504, running) created by main thread at: #0 pthread_create /build/gcc-multilib/src/gcc/libsanitizer/tsan/tsan_interceptors.cc:876 (libtsan.so.0+0x000000028470) #1 __gthread_create /build/gcc-multilib/src/gcc-build/x86_64-pc-linux-gnu/libstdc++-v3/include/x86_64-pc-linux-gnu/bits/gthr-default.h:662 (libstdc++.so.6+0x0000000bb634) #2 std::thread::_M_start_thread(std::unique_ptr<std::thread::_State, std::default_delete<std::thread::_State> >, void (*)()) /build/gcc-multilib/src/gcc/libstdc++-v3/src/c++11/thread.cc:163 (libstdc++.so.6+0x0000000bb634) #3 __libc_start_main <null> (libc.so.6+0x000000020290) Thread T1 (tid=14503, running) created by main thread at: #0 pthread_create /build/gcc-multilib/src/gcc/libsanitizer/tsan/tsan_interceptors.cc:876 (libtsan.so.0+0x000000028470) #1 __gthread_create /build/gcc-multilib/src/gcc-build/x86_64-pc-linux-gnu/libstdc++-v3/include/x86_64-pc-linux-gnu/bits/gthr-default.h:662 (libstdc++.so.6+0x0000000bb634) #2 std::thread::_M_start_thread(std::unique_ptr<std::thread::_State, std::default_delete<std::thread::_State> >, void (*)()) /build/gcc-multilib/src/gcc/libstdc++-v3/src/c++11/thread.cc:163 (libstdc++.so.6+0x0000000bb634) #3 __libc_start_main <null> (libc.so.6+0x000000020290) SUMMARY: ThreadSanitizer: data race /usr/include/c++/6.2.1/bits/locale_facets.h:932 in std::ctype<char>::narrow(char, char) const ================== ThreadSanitizer: reported 1 warnings
Ugh, we have mutable members in std::ctype<char> #include <locale> #include <thread> int main() { auto& ctype = std::use_facet<std::ctype<char>>(std::locale::classic()); auto f = [&]{ for (int i = 0; i < 64; ++i) ctype.narrow(i, 'a'); }; std::thread t1{f}; std::thread t2{f}; t1.join(); t2.join(); }
Still exist on 6.3.1.
Yes that's why the bug report is still open.
Ran into this in 7.2.0 and considering workarounds. Are there plans to fix this some time soon or is this low-priority?
For anyone interested, here is the workaround we came up with: // A data race happens in the libstdc++ (as of GCC 7.2) implementation of the // ctype<ctype>::narrow() function (bug #77704). The issue is easily triggered // by the testscript runner that indirectly (via regex) uses ctype<char> facet // of the global locale (and can potentially be triggered by other locale- // aware code). We work around this by pre-initializing the global locale // facet internal cache. // #ifdef _GLIBCXX_ { const ctype<char>& ct (use_facet<ctype<char>> (locale ())); for (size_t i (0); i != 256; ++i) ct.narrow (static_cast<char> (i), '\0'); } #endif
Ran into this issue with 8.3.0
Yup, I too was just bitten by this, with libstdc++ 9.4.0-1ubuntu1~20.04.1 Luckily the Thread Sanitizer finally led me to the diagnosis, and many thanks to Boris for posting his workaround, which I can confirm seems to fix it for me too