[gcc r12-3960] libstdc++: Simplify std::basic_regex construction and assignment

Jonathan Wakely redi@gcc.gnu.org
Wed Sep 29 12:49:19 GMT 2021


https://gcc.gnu.org/g:b59be1adbaea022f19dc7c30d9bf5089e80795d9

commit r12-3960-gb59be1adbaea022f19dc7c30d9bf5089e80795d9
Author: Jonathan Wakely <jwakely@redhat.com>
Date:   Wed Sep 29 13:48:02 2021 +0100

    libstdc++: Simplify std::basic_regex construction and assignment
    
    Introduce a new _M_compile function which does the common work needed by
    all constructors and assignment. Call that directly to avoid multiple
    levels of constructor delegation or calls to basic_regex::assign
    overloads.
    
    For assignment, there is no need to construct a std::basic_string if we
    already have a contiguous sequence of the correct character type, and no
    need to construct a temporary basic_regex when assigning from an
    existing basic_regex.
    
    Also define the copy and move assignment operators as defaulted, which
    does the right thing without constructing a temporary and swapping it.
    Copying or moving the shared_ptr member cannot fail, so they can be
    noexcept. The assign(const basic_regex&) and assign(basic_regex&&)
    member can then be defined in terms of copy or move assignment.
    
    The new _M_compile function takes pointer arguments, so the caller has
    to convert arbitrary iterator ranges into a contiguous sequence of
    characters. With that simplification, the __compile_nfa helpers are not
    needed and can be removed.
    
    This also fixes a bug where construction from a contiguous sequence with
    the wrong character type would fail to compile, rather than converting
    the elements to the regex character type.
    
    Signed-off-by: Jonathan Wakely <jwakely@redhat.com>
    
    libstdc++-v3/ChangeLog:
    
            * include/bits/regex.h (__detail::__is_contiguous_iter): Move
            here from <bits/regex_compiler.h>.
            (basic_regex::_M_compile): New function to compile an NFA from
            a regular expression string.
            (basic_regex::basic_regex): Use _M_compile instead of delegating
            to other constructors.
            (basic_regex::operator=(const basic_regex&)): Define as
            defaulted.
            (basic_regex::operator=(initializer_list<C>)): Use _M_compile.
            (basic_regex::assign(const basic_regex&)): Use copy assignment.
            (basic_regex::assign(basic_regex&&)): Use move assignment.
            (basic_regex::assign(const C*, flag_type)): Use _M_compile
            instead of constructing a temporary string.
            (basic_regex::assign(const C*, size_t, flag_type)): Likewise.
            (basic_regex::assign(const basic_string<C,T,A>&, flag_type)):
            Use _M_compile instead of constructing a temporary basic_regex.
            (basic_regex::assign(InputIter, InputIter, flag_type)): Avoid
            constructing a temporary string for contiguous iterators of the
            right value type.
            * include/bits/regex_compiler.h (__is_contiguous_iter): Move to
            <bits/regex.h>.
            (__enable_if_contiguous_iter, __disable_if_contiguous_iter)
            (__compile_nfa): Remove.
            * testsuite/28_regex/basic_regex/assign/exception_safety.cc: New
            test.
            * testsuite/28_regex/basic_regex/ctors/char/other.cc: New test.

Diff:
---
 libstdc++-v3/include/bits/regex.h                  | 101 ++++++++++++---------
 libstdc++-v3/include/bits/regex_compiler.h         |  41 ---------
 .../basic_regex/assign/exception_safety.cc         |  20 ++++
 .../28_regex/basic_regex/ctors/char/other.cc       |  37 ++++++++
 4 files changed, 117 insertions(+), 82 deletions(-)

diff --git a/libstdc++-v3/include/bits/regex.h b/libstdc++-v3/include/bits/regex.h
index d4a7729de2c..baf8ff1a9cf 100644
--- a/libstdc++-v3/include/bits/regex.h
+++ b/libstdc++-v3/include/bits/regex.h
@@ -57,6 +57,16 @@ namespace __detail
 
   template<typename, typename, typename, bool>
     class _Executor;
+
+  template<typename _Tp>
+    struct __is_contiguous_iter : false_type { };
+
+  template<typename _Tp>
+    struct __is_contiguous_iter<_Tp*> : true_type { };
+
+  template<typename _Tp, typename _Cont>
+    struct __is_contiguous_iter<__gnu_cxx::__normal_iterator<_Tp*, _Cont>>
+    : true_type { };
 }
 
 _GLIBCXX_BEGIN_NAMESPACE_CXX11
@@ -438,8 +448,7 @@ _GLIBCXX_BEGIN_NAMESPACE_CXX11
        */
       explicit
       basic_regex(const _Ch_type* __p, flag_type __f = ECMAScript)
-      : basic_regex(__p, __p + char_traits<_Ch_type>::length(__p), __f)
-      { }
+      { _M_compile(__p, __p + _Rx_traits::length(__p), __f); }
 
       /**
        * @brief Constructs a basic regular expression from the sequence
@@ -455,8 +464,7 @@ _GLIBCXX_BEGIN_NAMESPACE_CXX11
        */
       basic_regex(const _Ch_type* __p, std::size_t __len,
 		  flag_type __f = ECMAScript)
-      : basic_regex(__p, __p + __len, __f)
-      { }
+      { _M_compile(__p, __p + __len, __f); }
 
       /**
        * @brief Copy-constructs a basic regular expression.
@@ -486,8 +494,7 @@ _GLIBCXX_BEGIN_NAMESPACE_CXX11
 	basic_regex(const std::basic_string<_Ch_type, _Ch_traits,
 					    _Ch_alloc>& __s,
 		    flag_type __f = ECMAScript)
-	: basic_regex(__s.data(), __s.data() + __s.size(), __f)
-	{ }
+	{ _M_compile(__s.data(), __s.data() + __s.size(), __f); }
 
       /**
        * @brief Constructs a basic regular expression from the range
@@ -505,8 +512,7 @@ _GLIBCXX_BEGIN_NAMESPACE_CXX11
       template<typename _FwdIter>
 	basic_regex(_FwdIter __first, _FwdIter __last,
 		    flag_type __f = ECMAScript)
-	: basic_regex(std::move(__first), std::move(__last), locale_type(), __f)
-	{ }
+	{ this->assign(__first, __last, __f); }
 
       /**
        * @brief Constructs a basic regular expression from an initializer list.
@@ -517,8 +523,7 @@ _GLIBCXX_BEGIN_NAMESPACE_CXX11
        * @throws regex_error if @p __l is not a valid regular expression.
        */
       basic_regex(initializer_list<_Ch_type> __l, flag_type __f = ECMAScript)
-      : basic_regex(__l.begin(), __l.end(), __f)
-      { }
+      { _M_compile(__l.begin(), __l.end(), __f); }
 
       /**
        * @brief Destroys a basic regular expression.
@@ -530,15 +535,13 @@ _GLIBCXX_BEGIN_NAMESPACE_CXX11
        * @brief Assigns one regular expression to another.
        */
       basic_regex&
-      operator=(const basic_regex& __rhs)
-      { return this->assign(__rhs); }
+      operator=(const basic_regex&) = default;
 
       /**
        * @brief Move-assigns one regular expression to another.
        */
       basic_regex&
-      operator=(basic_regex&& __rhs) noexcept
-      { return this->assign(std::move(__rhs)); }
+      operator=(basic_regex&&) = default;
 
       /**
        * @brief Replaces a regular expression with a new one constructed from
@@ -561,7 +564,7 @@ _GLIBCXX_BEGIN_NAMESPACE_CXX11
        */
       basic_regex&
       operator=(initializer_list<_Ch_type> __l)
-      { return this->assign(__l.begin(), __l.end()); }
+      { return this->assign(__l); }
 
       /**
        * @brief Replaces a regular expression with a new one constructed from
@@ -576,30 +579,22 @@ _GLIBCXX_BEGIN_NAMESPACE_CXX11
 
       // [7.8.3] assign
       /**
-       * @brief the real assignment operator.
+       * @brief Assigns one regular expression to another.
        *
        * @param __rhs Another regular expression object.
        */
       basic_regex&
-      assign(const basic_regex& __rhs)
-      {
-	basic_regex __tmp(__rhs);
-	this->swap(__tmp);
-	return *this;
-      }
+      assign(const basic_regex& __rhs) noexcept
+      { return *this = __rhs; }
 
       /**
-       * @brief The move-assignment operator.
+       * @brief Move-assigns one regular expression to another.
        *
        * @param __rhs Another regular expression object.
        */
       basic_regex&
       assign(basic_regex&& __rhs) noexcept
-      {
-	basic_regex __tmp(std::move(__rhs));
-	this->swap(__tmp);
-	return *this;
-      }
+      { return *this = std::move(__rhs); }
 
       /**
        * @brief Assigns a new regular expression to a regex object from a
@@ -616,7 +611,10 @@ _GLIBCXX_BEGIN_NAMESPACE_CXX11
        */
       basic_regex&
       assign(const _Ch_type* __p, flag_type __flags = ECMAScript)
-      { return this->assign(string_type(__p), __flags); }
+      {
+	_M_compile(__p, __p + _Rx_traits::length(__p), __flags);
+	return *this;
+      }
 
       /**
        * @brief Assigns a new regular expression to a regex object from a
@@ -635,7 +633,10 @@ _GLIBCXX_BEGIN_NAMESPACE_CXX11
       // 3296. Inconsistent default argument for basic_regex<>::assign
       basic_regex&
       assign(const _Ch_type* __p, size_t __len, flag_type __flags = ECMAScript)
-      { return this->assign(string_type(__p, __len), __flags); }
+      {
+	_M_compile(__p, __p + __len, __flags);
+	return *this;
+      }
 
       /**
        * @brief Assigns a new regular expression to a regex object from a
@@ -653,8 +654,8 @@ _GLIBCXX_BEGIN_NAMESPACE_CXX11
 	assign(const basic_string<_Ch_type, _Ch_traits, _Alloc>& __s,
 	       flag_type __flags = ECMAScript)
 	{
-	  return this->assign(basic_regex(__s.data(), __s.data() + __s.size(),
-					  _M_loc, __flags));
+	  _M_compile(__s.data(), __s.data() + __s.size(), __flags);
+	  return *this;
 	}
 
       /**
@@ -674,7 +675,21 @@ _GLIBCXX_BEGIN_NAMESPACE_CXX11
 	basic_regex&
 	assign(_InputIterator __first, _InputIterator __last,
 	       flag_type __flags = ECMAScript)
-	{ return this->assign(string_type(__first, __last), __flags); }
+	{
+#if __cplusplus >= 201703L
+	  using _ValT = typename iterator_traits<_InputIterator>::value_type;
+	  if constexpr (__detail::__is_contiguous_iter<_InputIterator>::value
+			&& is_same_v<_ValT, value_type>)
+	    {
+	      const auto __len = __last - __first;
+	      const _Ch_type* __p = std::__to_address(__first);
+	      _M_compile(__p, __p + __len, __flags);
+	    }
+	  else
+#endif
+	  this->assign(string_type(__first, __last), __flags);
+	  return *this;
+	}
 
       /**
        * @brief Assigns a new regular expression to a regex object.
@@ -689,7 +704,10 @@ _GLIBCXX_BEGIN_NAMESPACE_CXX11
        */
       basic_regex&
       assign(initializer_list<_Ch_type> __l, flag_type __flags = ECMAScript)
-      { return this->assign(__l.begin(), __l.end(), __flags); }
+      {
+	_M_compile(__l.begin(), __l.end(), __flags);
+	return *this;
+      }
 
       // [7.8.4] const operations
       /**
@@ -757,13 +775,14 @@ _GLIBCXX_BEGIN_NAMESPACE_CXX11
     private:
       typedef std::shared_ptr<const __detail::_NFA<_Rx_traits>> _AutomatonPtr;
 
-      template<typename _FwdIter>
-	basic_regex(_FwdIter __first, _FwdIter __last, locale_type __loc,
-		    flag_type __f)
-	: _M_flags(__f), _M_loc(std::move(__loc)),
-	_M_automaton(__detail::__compile_nfa<_Rx_traits>(
-	  std::move(__first), std::move(__last), _M_loc, _M_flags))
-	{ }
+      void
+      _M_compile(const _Ch_type* __first, const _Ch_type* __last,
+		 flag_type __f)
+      {
+	__detail::_Compiler<_Rx_traits> __c(__first, __last, _M_loc, __f);
+	_M_automaton = __c._M_get_nfa();
+	_M_flags = __f;
+      }
 
       template<typename _Bp, typename _Ap, typename _Cp, typename _Rp,
 	__detail::_RegexExecutorPolicy, bool>
diff --git a/libstdc++-v3/include/bits/regex_compiler.h b/libstdc++-v3/include/bits/regex_compiler.h
index 646766ebdf9..898607d81c6 100644
--- a/libstdc++-v3/include/bits/regex_compiler.h
+++ b/libstdc++-v3/include/bits/regex_compiler.h
@@ -152,47 +152,6 @@ namespace __detail
       const _CtypeT&      _M_ctype;
     };
 
-  template<typename _Tp>
-    struct __is_contiguous_iter : is_pointer<_Tp>::type { };
-
-  template<typename _Tp, typename _Cont>
-    struct
-    __is_contiguous_iter<__gnu_cxx::__normal_iterator<_Tp*, _Cont>>
-    : true_type { };
-
-  template<typename _Iter, typename _TraitsT>
-    using __enable_if_contiguous_iter
-      = __enable_if_t< __is_contiguous_iter<_Iter>::value,
-                       std::shared_ptr<const _NFA<_TraitsT>> >;
-
-  template<typename _Iter, typename _TraitsT>
-    using __disable_if_contiguous_iter
-      = __enable_if_t< !__is_contiguous_iter<_Iter>::value,
-                       std::shared_ptr<const _NFA<_TraitsT>> >;
-
-  template<typename _TraitsT, typename _FwdIter>
-    inline __enable_if_contiguous_iter<_FwdIter, _TraitsT>
-    __compile_nfa(_FwdIter __first, _FwdIter __last,
-		  const typename _TraitsT::locale_type& __loc,
-		  regex_constants::syntax_option_type __flags)
-    {
-      size_t __len = __last - __first;
-      const auto* __cfirst = __len ? std::__addressof(*__first) : nullptr;
-      using _Cmplr = _Compiler<_TraitsT>;
-      return _Cmplr(__cfirst, __cfirst + __len, __loc, __flags)._M_get_nfa();
-    }
-
-  template<typename _TraitsT, typename _FwdIter>
-    inline __disable_if_contiguous_iter<_FwdIter, _TraitsT>
-    __compile_nfa(_FwdIter __first, _FwdIter __last,
-		  const typename _TraitsT::locale_type& __loc,
-		  regex_constants::syntax_option_type __flags)
-    {
-      const basic_string<typename _TraitsT::char_type> __str(__first, __last);
-      return __compile_nfa<_TraitsT>(__str.data(), __str.data() + __str.size(),
-				     __loc, __flags);
-    }
-
   // [28.13.14]
   template<typename _TraitsT, bool __icase, bool __collate>
     class _RegexTranslatorBase
diff --git a/libstdc++-v3/testsuite/28_regex/basic_regex/assign/exception_safety.cc b/libstdc++-v3/testsuite/28_regex/basic_regex/assign/exception_safety.cc
new file mode 100644
index 00000000000..462eebcf2cd
--- /dev/null
+++ b/libstdc++-v3/testsuite/28_regex/basic_regex/assign/exception_safety.cc
@@ -0,0 +1,20 @@
+// { dg-do run { target c++11 } }
+#include <regex>
+#include <testsuite_hooks.h>
+
+int main()
+{
+  const auto f = std::regex::ECMAScript|std::regex::icase;
+  std::regex re("abc", f);
+  try
+  {
+    re.assign("[", std::regex::extended);
+    VERIFY( false );
+  }
+  catch (const std::regex_error&)
+  {
+    // [re.regex.assign] "If an exception is thrown, *this is unchanged."
+    VERIFY( re.flags() == f );
+    VERIFY( std::regex_match("abc", re) );
+  }
+}
diff --git a/libstdc++-v3/testsuite/28_regex/basic_regex/ctors/char/other.cc b/libstdc++-v3/testsuite/28_regex/basic_regex/ctors/char/other.cc
new file mode 100644
index 00000000000..f9b68a72f0a
--- /dev/null
+++ b/libstdc++-v3/testsuite/28_regex/basic_regex/ctors/char/other.cc
@@ -0,0 +1,37 @@
+// { dg-do run { target c++11 } }
+#include <regex>
+#include <testsuite_iterators.h>
+#include <testsuite_hooks.h>
+
+void
+test01()
+{
+  signed char s[] = { 'a', '.' };
+  std::regex re(s, s+2); // This used to fail up to GCC 11.2
+  // VERIFY( regex_match("an", re) );
+
+  std::wstring str = L"xx";
+  str[0] = '1';
+  str[1] = '2';
+  re.assign(str.begin(), str.end());
+  VERIFY( regex_match("12", re) );
+}
+
+void
+test02()
+{
+  int i[] = { 'a', '.', '[', 'x', 'y', 'z', ']' };
+  __gnu_test::forward_container<int> fwd(i);
+  std::regex re(fwd.begin(), fwd.end());
+  VERIFY( regex_match("any", re) );
+
+  __gnu_test::input_container<int> input(i);
+  re.assign(input.begin(), input.end(), std::regex::icase);
+  VERIFY( regex_match("ANY", re) );
+}
+
+int main()
+{
+  test01();
+  test02();
+}


More information about the Libstdc++-cvs mailing list