[Patch, libstdc++/64584, libstdc++/64585] Clear basic_regex after imbue and make assign exception tolerant

Tim Shen timshen@google.com
Sun Jan 18 00:06:00 GMT 2015


On Fri, Jan 16, 2015 at 4:12 AM, Jonathan Wakely <jwakely@redhat.com> wrote:
> In that case, why bother using a unique_ptr initially, if it will just
> have to allocate a shared_ptr control block later anyway? It's better
> to use make_shared to reduce the number of allocations, isn't it?

Yes you are right, I didn't notice that shared_ptr<_NFA> can be
implicitly converted to shared_ptr<const _NFA>.

Fixed.

On Fri, Jan 16, 2015 at 4:14 AM, Jonathan Wakely <jwakely@redhat.com> wrote:
>> @@ -675,12 +681,22 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
>>         assign(const basic_string<_Ch_type, _Ch_typeraits, _Alloc>& __s,
>>                flag_type __flags = ECMAScript)
>>         {
>> +         auto __traits = _M_traits;
>> +         auto __f = _M_flags;
>>           _M_flags = __flags;
>> -         _M_original_str.assign(__s.begin(), __s.end());
>> -         auto __p = _M_original_str.c_str();
>> -         _M_automaton = __detail::__compile_nfa(__p,
>> -                                                __p +
>> _M_original_str.size(),
>> -                                                _M_traits, _M_flags);
>> +         _M_traits = __traits;
>
>
> What is this assignnment for?

Sorry, I didn't notice that _M_traits doesn't change in this function. Fixed.

On Fri, Jan 16, 2015 at 5:39 AM, Jonathan Wakely <jwakely@redhat.com> wrote:
> If std::move on a pointer definitely doesn't pessimize, then I suppose
> it's no worse and the move is OK.

I'm not 100% sure of that, but isn't moving a pointer the same as copying?


Also adjusted SFINAE (add _CharT* specialization).


-- 
Regards,
Tim Shen
-------------- next part --------------
commit 866d34c2bbcb4fc72fe7088e550a59a1139fe28c
Author: timshen <timshen@google.com>
Date:   Wed Jan 14 01:35:22 2015 -0800

    	PR libstdc++/64584
    	PR libstdc++/64585
    	* include/bits/regex.h (basic_regex<>::basic_regex,
    	basic_regex<>::assign, basic_regex<>::imbue,
    	basic_regex<>::swap, basic_regex<>::mark_count): Drop NFA after
    	imbuing basic_regex; Make assign() transactional against exception.
    	* testsuite/28_regex/basic_regex/assign/char/string.cc: New testcase.
    	* testsuite/28_regex/basic_regex/imbue/string.cc: New testcase.

diff --git a/libstdc++-v3/include/bits/regex.h b/libstdc++-v3/include/bits/regex.h
index 3cbec3c..b358c79 100644
--- a/libstdc++-v3/include/bits/regex.h
+++ b/libstdc++-v3/include/bits/regex.h
@@ -476,7 +476,10 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
        */
       basic_regex(const basic_regex& __rhs)
       : _M_flags(__rhs._M_flags), _M_original_str(__rhs._M_original_str)
-      { this->imbue(__rhs.getloc()); }
+      {
+	_M_traits.imbue(__rhs.getloc());
+	this->assign(_M_original_str, _M_flags);
+      }
 
       /**
        * @brief Move-constructs a basic regular expression.
@@ -490,7 +493,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
       : _M_flags(__rhs._M_flags),
       _M_original_str(std::move(__rhs._M_original_str))
       {
-	this->imbue(__rhs.getloc());
+	_M_traits.imbue(__rhs.getloc());
+	this->assign(_M_original_str, _M_flags);
 	__rhs._M_automaton.reset();
       }
 
@@ -604,7 +608,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
       {
 	_M_flags = __rhs._M_flags;
 	_M_original_str = __rhs._M_original_str;
-	this->imbue(__rhs.getloc());
+	_M_traits.imbue(__rhs.getloc());
+	this->assign(_M_original_str, _M_flags);
 	return *this;
       }
 
@@ -622,7 +627,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 	_M_flags = __rhs._M_flags;
 	_M_original_str = std::move(__rhs._M_original_str);
 	__rhs._M_automaton.reset();
-	this->imbue(__rhs.getloc());
+	_M_traits.imbue(__rhs.getloc());
+	this->assign(_M_original_str, _M_flags);
+	return *this;
       }
 
       /**
@@ -675,12 +682,10 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 	assign(const basic_string<_Ch_type, _Ch_typeraits, _Alloc>& __s,
 	       flag_type __flags = ECMAScript)
 	{
+	  _M_automaton = __detail::__compile_nfa(
+	    __s.data(), __s.data() + __s.size(), _M_traits, __flags);
+	  _M_original_str = __s;
 	  _M_flags = __flags;
-	  _M_original_str.assign(__s.begin(), __s.end());
-	  auto __p = _M_original_str.c_str();
-	  _M_automaton = __detail::__compile_nfa(__p,
-						 __p + _M_original_str.size(),
-						 _M_traits, _M_flags);
 	  return *this;
 	}
 
@@ -725,7 +730,11 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
        */
       unsigned int
       mark_count() const
-      { return _M_automaton->_M_sub_count() - 1; }
+      {
+	if (_M_automaton)
+	  return _M_automaton->_M_sub_count() - 1;
+	return 0;
+      }
 
       /**
        * @brief Gets the flags used to construct the regular expression
@@ -744,9 +753,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
       locale_type
       imbue(locale_type __loc)
       {
-	auto __ret = _M_traits.imbue(__loc);
-	this->assign(_M_original_str, _M_flags);
-	return __ret;
+	_M_automaton = nullptr;
+	return _M_traits.imbue(__loc);
       }
 
       /**
@@ -767,8 +775,10 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
       swap(basic_regex& __rhs)
       {
 	std::swap(_M_flags, __rhs._M_flags);
-	std::swap(_M_original_str, __rhs._M_original_str);
-	this->imbue(__rhs.imbue(this->getloc()));
+	std::swap(_M_traits, __rhs._M_traits);
+	auto __tmp = std::move(_M_original_str);
+	this->assign(__rhs._M_original_str, _M_flags);
+	__rhs.assign(__tmp, __rhs._M_flags);
       }
 
 #ifdef _GLIBCXX_DEBUG
@@ -777,7 +787,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
       { _M_automaton->_M_dot(__ostr); }
 #endif
 
-    protected:
+    private:
       typedef std::shared_ptr<__detail::_NFA<_Rx_traits>> _AutomatonPtr;
 
       template<typename _Bp, typename _Ap, typename _Cp, typename _Rp,
diff --git a/libstdc++-v3/testsuite/28_regex/basic_regex/assign/char/string.cc b/libstdc++-v3/testsuite/28_regex/basic_regex/assign/char/string.cc
index 0a32ab5..7d4db8f1 100644
--- a/libstdc++-v3/testsuite/28_regex/basic_regex/assign/char/string.cc
+++ b/libstdc++-v3/testsuite/28_regex/basic_regex/assign/char/string.cc
@@ -1,4 +1,3 @@
-// { dg-do compile }
 // { dg-options "-std=gnu++0x" }
 
 // 2007-03-12  Stephen M. Webb  <stephen.webb@bregmasoft.com>
@@ -29,6 +28,7 @@
 // Tests C++ string assignment of the basic_regex class.  
 void test01()
 {
+  bool test __attribute__((unused)) = true;
   typedef std::basic_regex<char> test_type;
 
   std::string s("a*b");
@@ -36,9 +36,27 @@ void test01()
   re.assign(s);
 }
 
+// libstdc++/64584
+void test02()
+{
+  bool test __attribute__((unused)) = true;
+  std::regex re("", std::regex_constants::extended);
+  auto flags = re.flags();
+  try
+    {
+      re.assign("(", std::regex_constants::icase);
+      VERIFY(false);
+    }
+  catch (const std::regex_error& e)
+    {
+      VERIFY(flags == re.flags());
+    }
+}
+
 int
 main()
 { 
   test01();
+  test02();
   return 0;
 }
diff --git a/libstdc++-v3/testsuite/28_regex/basic_regex/imbue/string.cc b/libstdc++-v3/testsuite/28_regex/basic_regex/imbue/string.cc
new file mode 100644
index 0000000..d4d4f47
--- /dev/null
+++ b/libstdc++-v3/testsuite/28_regex/basic_regex/imbue/string.cc
@@ -0,0 +1,44 @@
+// { dg-options "-std=gnu++11" }
+
+// Copyright (C) 2015 Free Software Foundation, Inc.
+//
+// This file is part of the GNU ISO C++ Library.  This library is free
+// software; you can redistribute it and/or modify it under the
+// terms of the GNU General Public License as published by the
+// Free Software Foundation; either version 3, or (at your option)
+// any later version.
+
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+
+// You should have received a copy of the GNU General Public License along
+// with this library; see the file COPYING3.  If not see
+// <http://www.gnu.org/licenses/>.
+
+// [28.8.5] class template basic_regex locale
+
+#include <string>
+#include <regex>
+#include <testsuite_hooks.h>
+
+// libstdc++/64585
+void test01()
+{
+  bool test __attribute__((unused)) = true;
+
+  static const char s[] = "a";
+  std::regex re("a");
+  VERIFY(std::regex_search(s, re));
+
+  auto loc = re.imbue(re.getloc());
+  VERIFY(!std::regex_search(s, re));
+}
+
+int
+main()
+{
+  test01();
+  return 0;
+}
-------------- next part --------------
commit eec5548a66ed0782eb3a6a16b4e4723d13e51ac8
Author: timshen <timshen@google.com>
Date:   Wed Jan 14 00:01:40 2015 -0800

    	PR libstdc++/64584
    	PR libstdc++/64585
    	* include/bits/regex.h (basic_regex<>::basic_regex,
    	basic_regex<>::assign, basic_regex<>::imbue,
    	basic_regex<>::swap, basic_regex<>::mark_count): Drop NFA after
    	imbuing basic_regex; Make assign() transactional against exception.
    	* include/bits/regex_compiler.h (__compile_nfa<>): Add back
    	__compile_nfa SFINAE.
    	* include/std/regex: Adjust include order to avoid __compile_nfa
    	forward declaration.
    	* testsuite/28_regex/basic_regex/assign/char/string.cc: New testcase.
    	* testsuite/28_regex/basic_regex/imbue/string.cc: New testcase.

diff --git a/libstdc++-v3/include/bits/regex.h b/libstdc++-v3/include/bits/regex.h
index 52c2384..6de883a 100644
--- a/libstdc++-v3/include/bits/regex.h
+++ b/libstdc++-v3/include/bits/regex.h
@@ -62,13 +62,6 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
   template<typename, typename, typename, bool>
     class _Executor;
 
-  template<typename _TraitsT>
-    inline std::shared_ptr<_NFA<_TraitsT>>
-    __compile_nfa(const typename _TraitsT::char_type* __first,
-		  const typename _TraitsT::char_type* __last,
-		  const typename _TraitsT::locale_type& __loc,
-		  regex_constants::syntax_option_type __flags);
-
 _GLIBCXX_END_NAMESPACE_VERSION
 }
 
@@ -433,7 +426,7 @@ _GLIBCXX_BEGIN_NAMESPACE_CXX11
        * character sequence.
        */
       basic_regex()
-      : _M_flags(ECMAScript), _M_loc(), _M_original_str(), _M_automaton(nullptr)
+      : _M_flags(ECMAScript), _M_loc(), _M_automaton(nullptr)
       { }
 
       /**
@@ -497,7 +490,7 @@ _GLIBCXX_BEGIN_NAMESPACE_CXX11
 	basic_regex(const std::basic_string<_Ch_type, _Ch_traits,
 					    _Ch_alloc>& __s,
 		    flag_type __f = ECMAScript)
-	: basic_regex(__s.begin(), __s.end(), __f)
+	: basic_regex(__s.data(), __s.data() + __s.size(), __f)
 	{ }
 
       /**
@@ -516,14 +509,7 @@ _GLIBCXX_BEGIN_NAMESPACE_CXX11
       template<typename _FwdIter>
 	basic_regex(_FwdIter __first, _FwdIter __last,
 		    flag_type __f = ECMAScript)
-	: _M_flags(__f),
-	  _M_loc(),
-	  _M_original_str(__first, __last),
-	  _M_automaton(__detail::__compile_nfa<_Rx_traits>(
-	    _M_original_str.c_str(),
-	    _M_original_str.c_str() + _M_original_str.size(),
-	    _M_loc,
-	    _M_flags))
+	: basic_regex(std::move(__first), std::move(__last), locale_type(), __f)
 	{ }
 
       /**
@@ -657,15 +643,8 @@ _GLIBCXX_BEGIN_NAMESPACE_CXX11
 	assign(const basic_string<_Ch_type, _Ch_traits, _Alloc>& __s,
 	       flag_type __flags = ECMAScript)
 	{
-	  _M_flags = __flags;
-	  _M_original_str.assign(__s.begin(), __s.end());
-	  auto __p = _M_original_str.c_str();
-	  _M_automaton = __detail::__compile_nfa<_Rx_traits>(
-	    __p,
-	    __p + _M_original_str.size(),
-	    _M_loc,
-	    _M_flags);
-	  return *this;
+	  return this->assign(basic_regex(__s.data(), __s.data() + __s.size(),
+					  _M_loc, _M_flags));
 	}
 
       /**
@@ -709,7 +688,11 @@ _GLIBCXX_BEGIN_NAMESPACE_CXX11
        */
       unsigned int
       mark_count() const
-      { return _M_automaton->_M_sub_count() - 1; }
+      {
+	if (_M_automaton)
+	  return _M_automaton->_M_sub_count() - 1;
+	return 0;
+      }
 
       /**
        * @brief Gets the flags used to construct the regular expression
@@ -729,8 +712,7 @@ _GLIBCXX_BEGIN_NAMESPACE_CXX11
       imbue(locale_type __loc)
       {
 	std::swap(__loc, _M_loc);
-	if (_M_automaton != nullptr)
-	  this->assign(_M_original_str, _M_flags);
+	_M_automaton = nullptr;
 	return __loc;
       }
 
@@ -753,7 +735,6 @@ _GLIBCXX_BEGIN_NAMESPACE_CXX11
       {
 	std::swap(_M_flags, __rhs._M_flags);
 	std::swap(_M_loc, __rhs._M_loc);
-	std::swap(_M_original_str, __rhs._M_original_str);
 	std::swap(_M_automaton, __rhs._M_automaton);
       }
 
@@ -764,7 +745,15 @@ _GLIBCXX_BEGIN_NAMESPACE_CXX11
 #endif
 
     private:
-      typedef std::shared_ptr<__detail::_NFA<_Rx_traits>> _AutomatonPtr;
+      typedef std::shared_ptr<const __detail::_NFA<_Rx_traits>> _AutomatonPtr;
+
+      template<typename _FwdIter>
+	basic_regex(_FwdIter __first, _FwdIter __last, locale_type __loc,
+		    flag_type __f)
+	: _M_flags(__f), _M_loc(std::move(__loc)),
+	_M_automaton(__detail::__compile_nfa<_FwdIter, _Rx_traits>(
+	  std::move(__first), std::move(__last), _M_loc, _M_flags))
+	{ }
 
       template<typename _Bp, typename _Ap, typename _Cp, typename _Rp,
 	__detail::_RegexExecutorPolicy, bool>
@@ -778,7 +767,6 @@ _GLIBCXX_BEGIN_NAMESPACE_CXX11
 
       flag_type              _M_flags;
       locale_type            _M_loc;
-      basic_string<_Ch_type> _M_original_str;
       _AutomatonPtr          _M_automaton;
     };
 
diff --git a/libstdc++-v3/include/bits/regex_compiler.h b/libstdc++-v3/include/bits/regex_compiler.h
index 81f8c8e..4472116 100644
--- a/libstdc++-v3/include/bits/regex_compiler.h
+++ b/libstdc++-v3/include/bits/regex_compiler.h
@@ -59,7 +59,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
       _Compiler(_IterT __b, _IterT __e,
 		const typename _TraitsT::locale_type& __traits, _FlagT __flags);
 
-      std::shared_ptr<_RegexT>
+      shared_ptr<const _RegexT>
       _M_get_nfa()
       { return std::move(_M_nfa); }
 
@@ -145,15 +145,62 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
       const _CtypeT&      _M_ctype;
     };
 
-  template<typename _TraitsT>
-    inline std::shared_ptr<_NFA<_TraitsT>>
-    __compile_nfa(const typename _TraitsT::char_type* __first,
-		  const typename _TraitsT::char_type* __last,
+  template<typename _Tp>
+    struct __has_contiguous_iter : std::false_type { };
+
+  template<typename _Ch, typename _Tr, typename _Alloc>
+    struct __has_contiguous_iter<std::basic_string<_Ch, _Tr, _Alloc>>
+    : std::true_type
+    { };
+
+  template<typename _Tp, typename _Alloc>
+    struct __has_contiguous_iter<std::vector<_Tp, _Alloc>>
+    : std::true_type
+    { };
+
+  template<typename _Tp>
+    struct __is_contiguous_normal_iter : std::false_type { };
+
+  template<typename _CharT>
+    struct __is_contiguous_normal_iter<_CharT*> : std::true_type { };
+
+  template<typename _Tp, typename _Cont>
+    struct
+    __is_contiguous_normal_iter<__gnu_cxx::__normal_iterator<_Tp, _Cont>>
+    : __has_contiguous_iter<_Cont>::type
+    { };
+
+  template<typename _Iter, typename _TraitsT>
+    using __enable_if_contiguous_normal_iter
+      = typename enable_if< __is_contiguous_normal_iter<_Iter>::value,
+                           std::shared_ptr<const _NFA<_TraitsT>> >::type;
+
+  template<typename _Iter, typename _TraitsT>
+    using __disable_if_contiguous_normal_iter
+      = typename enable_if< !__is_contiguous_normal_iter<_Iter>::value,
+                           std::shared_ptr<const _NFA<_TraitsT>> >::type;
+
+  template<typename _FwdIter, typename _TraitsT>
+    inline __enable_if_contiguous_normal_iter<_FwdIter, _TraitsT>
+    __compile_nfa(_FwdIter __first, _FwdIter __last,
 		  const typename _TraitsT::locale_type& __loc,
 		  regex_constants::syntax_option_type __flags)
     {
+      size_t __len = __last - __first;
+      const auto* __cfirst = __len ? std::__addressof(*__first) : nullptr;
       using _Cmplr = _Compiler<_TraitsT>;
-      return _Cmplr(__first, __last, __loc, __flags)._M_get_nfa();
+      return _Cmplr(__cfirst, __cfirst + __len, __loc, __flags)._M_get_nfa();
+    }
+
+  template<typename _FwdIter, typename _TraitsT>
+    inline __disable_if_contiguous_normal_iter<_FwdIter, _TraitsT>
+    __compile_nfa(_FwdIter __first, _FwdIter __last,
+		  const typename _TraitsT::locale_type& __loc,
+		  regex_constants::syntax_option_type __flags)
+    {
+      basic_string<typename _TraitsT::char_type> __str(__first, __last);
+      return __compile_nfa(__str.data(), __str.data() + __str.size(), __loc,
+          __flags);
     }
 
   // [28.13.14]
diff --git a/libstdc++-v3/include/std/regex b/libstdc++-v3/include/std/regex
index 4d0e93c..3dff372 100644
--- a/libstdc++-v3/include/std/regex
+++ b/libstdc++-v3/include/std/regex
@@ -56,9 +56,9 @@
 #include <bits/regex_constants.h>
 #include <bits/regex_error.h>
 #include <bits/regex_automaton.h>
-#include <bits/regex.h>
 #include <bits/regex_scanner.h>
 #include <bits/regex_compiler.h>
+#include <bits/regex.h>
 #include <bits/regex_executor.h>
 
 #endif // C++11
diff --git a/libstdc++-v3/testsuite/28_regex/basic_regex/assign/char/string.cc b/libstdc++-v3/testsuite/28_regex/basic_regex/assign/char/string.cc
index e0110a1..ee115b5 100644
--- a/libstdc++-v3/testsuite/28_regex/basic_regex/assign/char/string.cc
+++ b/libstdc++-v3/testsuite/28_regex/basic_regex/assign/char/string.cc
@@ -1,4 +1,3 @@
-// { dg-do compile }
 // { dg-options "-std=gnu++11" }
 
 // 2007-03-12  Stephen M. Webb  <stephen.webb@bregmasoft.com>
@@ -29,6 +28,7 @@
 // Tests C++ string assignment of the basic_regex class.  
 void test01()
 {
+  bool test __attribute__((unused)) = true;
   typedef std::basic_regex<char> test_type;
 
   std::string s("a*b");
@@ -36,9 +36,27 @@ void test01()
   re.assign(s);
 }
 
+// libstdc++/64584
+void test02()
+{
+  bool test __attribute__((unused)) = true;
+  std::regex re("", std::regex_constants::extended);
+  auto flags = re.flags();
+  try
+    {
+      re.assign("(", std::regex_constants::icase);
+      VERIFY(false);
+    }
+  catch (const std::regex_error& e)
+    {
+      VERIFY(flags == re.flags());
+    }
+}
+
 int
 main()
 { 
   test01();
+  test02();
   return 0;
 }
diff --git a/libstdc++-v3/testsuite/28_regex/basic_regex/imbue/string.cc b/libstdc++-v3/testsuite/28_regex/basic_regex/imbue/string.cc
new file mode 100644
index 0000000..d4d4f47
--- /dev/null
+++ b/libstdc++-v3/testsuite/28_regex/basic_regex/imbue/string.cc
@@ -0,0 +1,44 @@
+// { dg-options "-std=gnu++11" }
+
+// Copyright (C) 2015 Free Software Foundation, Inc.
+//
+// This file is part of the GNU ISO C++ Library.  This library is free
+// software; you can redistribute it and/or modify it under the
+// terms of the GNU General Public License as published by the
+// Free Software Foundation; either version 3, or (at your option)
+// any later version.
+
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+
+// You should have received a copy of the GNU General Public License along
+// with this library; see the file COPYING3.  If not see
+// <http://www.gnu.org/licenses/>.
+
+// [28.8.5] class template basic_regex locale
+
+#include <string>
+#include <regex>
+#include <testsuite_hooks.h>
+
+// libstdc++/64585
+void test01()
+{
+  bool test __attribute__((unused)) = true;
+
+  static const char s[] = "a";
+  std::regex re("a");
+  VERIFY(std::regex_search(s, re));
+
+  auto loc = re.imbue(re.getloc());
+  VERIFY(!std::regex_search(s, re));
+}
+
+int
+main()
+{
+  test01();
+  return 0;
+}


More information about the Libstdc++ mailing list