[Patch, libstdc++/64441] Fix out of range match_results submatch access

Tim Shen timshen@google.com
Mon Mar 9 10:56:00 GMT 2015


I guess this patch doesn't break abi compatibility, so if everything
is Ok, I'm gonna patch it to 4.9 too.

I'm not sure if this is a "regression fix" though; if it's
inappropriate for trunk, then I can simply wait.

Bootstrapped in trunk and tested.

Thanks! :)


-- 
Regards,
Tim Shen
-------------- next part --------------
commit cf3977f3077a10de8ccf6dae188f945e1008b2c6
Author: timshen <timshen@google.com>
Date:   Mon Mar 9 03:41:31 2015 -0700

    	PR libstdc++/64441
    	* include/bits/regex.h (match_results<>::size,
    	match_results<>::position, match_results<>::str,
    	match_results<>::operator[], match_results<>::prefix,
    	match_results<>::suffix, match_results<>::end,
    	match_results<>::_M_resize, match_results<>::_M_unmatched_sub,
    	match_results<>::_M_prefix, match_results<>::_M_suffix): Remove
    	global __unmatched_sub. Add unmatched submatch as part of
    	match_results.
    	* include/bits/regex.tcc (__regex_algo_impl<>, regex_replace<>,
    	regex_iterator<>::operator++): Adjust to use match_results::_M_prefix.
    	* testsuite/28_regex/match_results/out_of_range_submatches.cc:
    	New testcases.

diff --git a/libstdc++-v3/include/bits/regex.h b/libstdc++-v3/include/bits/regex.h
index 2b09da6..a23c2c9 100644
--- a/libstdc++-v3/include/bits/regex.h
+++ b/libstdc++-v3/include/bits/regex.h
@@ -1483,17 +1483,6 @@ _GLIBCXX_BEGIN_NAMESPACE_CXX11
 
   // [7.10] Class template match_results
 
-  /*
-   * Special sub_match object representing an unmatched sub-expression.
-   */
-  template<typename _Bi_iter>
-    inline const sub_match<_Bi_iter>&
-    __unmatched_sub()
-    {
-      static const sub_match<_Bi_iter> __unmatched = sub_match<_Bi_iter>();
-      return __unmatched;
-    }
-
   /**
    * @brief The results of a match or search operation.
    *
@@ -1523,15 +1512,20 @@ _GLIBCXX_BEGIN_NAMESPACE_CXX11
     {
     private:
       /*
-       * The vector base is empty if this does not represent a successful match.
-       * Otherwise it contains n+3 elements where n is the number of marked
+       * The vector base is empty if this does not represent a match (!ready());
+       * Otherwise if it's a match failure, it contains 3 elements:
+       * [0] unmatched
+       * [1] prefix
+       * [2] suffix
+       * Otherwise it contains n+4 elements where n is the number of marked
        * sub-expressions:
        * [0] entire match
        * [1] 1st marked subexpression
        * ...
        * [n] nth marked subexpression
-       * [n+1] prefix
-       * [n+2] suffix
+       * [n+1] unmatched
+       * [n+2] prefix
+       * [n+3] suffix
        */
       typedef std::vector<sub_match<_Bi_iter>, _Alloc>     _Base_type;
       typedef std::iterator_traits<_Bi_iter>   	   	   __iter_traits;
@@ -1623,10 +1617,7 @@ _GLIBCXX_BEGIN_NAMESPACE_CXX11
        */
       size_type
       size() const
-      {
-      	size_type __size = _Base_type::size();
-      	return (__size && _Base_type::operator[](0).matched) ? __size - 2 : 0;
-      }
+      { return _Base_type::empty() ? 0 : _Base_type::size() - 3; }
 
       size_type
       max_size() const
@@ -1670,15 +1661,10 @@ _GLIBCXX_BEGIN_NAMESPACE_CXX11
        * is zero (the default), in which case this function returns the offset
        * from the beginning of the target sequence to the beginning of the
        * match.
-       *
-       * Returns -1 if @p __sub is out of range.
        */
       difference_type
       position(size_type __sub = 0) const
-      {
-	return __sub < size() ? std::distance(_M_begin,
-					      (*this)[__sub].first) : -1;
-      }
+      { return std::distance(_M_begin, (*this)[__sub].first); }
 
       /**
        * @brief Gets the match or submatch converted to a string type.
@@ -1691,7 +1677,7 @@ _GLIBCXX_BEGIN_NAMESPACE_CXX11
        */
       string_type
       str(size_type __sub = 0) const
-      { return (*this)[__sub].str(); }
+      { return string_type((*this)[__sub]); }
 
       /**
        * @brief Gets a %sub_match reference for the match or submatch.
@@ -1707,10 +1693,10 @@ _GLIBCXX_BEGIN_NAMESPACE_CXX11
       const_reference
       operator[](size_type __sub) const
       {
-      	_GLIBCXX_DEBUG_ASSERT( ready() );
-      	return __sub < size()
-	       ?  _Base_type::operator[](__sub)
-	       : __unmatched_sub<_Bi_iter>();
+	_GLIBCXX_DEBUG_ASSERT( ready() );
+	return __sub < size()
+	       ? _Base_type::operator[](__sub)
+	       : _M_unmatched_sub();
       }
 
       /**
@@ -1724,10 +1710,8 @@ _GLIBCXX_BEGIN_NAMESPACE_CXX11
       const_reference
       prefix() const
       {
-      	_GLIBCXX_DEBUG_ASSERT( ready() );
-      	return !empty()
-      	       ? _Base_type::operator[](_Base_type::size() - 2)
-	       : __unmatched_sub<_Bi_iter>();
+	_GLIBCXX_DEBUG_ASSERT( ready() );
+	return !empty() ? _M_prefix() : _M_unmatched_sub();
       }
 
       /**
@@ -1742,9 +1726,7 @@ _GLIBCXX_BEGIN_NAMESPACE_CXX11
       suffix() const
       {
 	_GLIBCXX_DEBUG_ASSERT( ready() );
-	return !empty()
-	       ? _Base_type::operator[](_Base_type::size() - 1)
-	       : __unmatched_sub<_Bi_iter>();
+	return !empty() ? _M_suffix() : _M_unmatched_sub();
       }
 
       /**
@@ -1766,7 +1748,7 @@ _GLIBCXX_BEGIN_NAMESPACE_CXX11
        */
       const_iterator
       end() const
-      { return _Base_type::end() - 2; }
+      { return _Base_type::end() - 3; }
 
       /**
        * @brief Gets an iterator to one-past-the-end of the collection.
@@ -1883,6 +1865,34 @@ _GLIBCXX_BEGIN_NAMESPACE_CXX11
 				    const basic_regex<_Cp, _Rp>&,
 				    regex_constants::match_flag_type);
 
+      void
+      _M_resize(unsigned int __size)
+      { _Base_type::resize(__size + 3); }
+
+      const_reference
+      _M_unmatched_sub() const
+      { return _Base_type::operator[](_Base_type::size() - 3); }
+
+      sub_match<_Bi_iter>&
+      _M_unmatched_sub()
+      { return _Base_type::operator[](_Base_type::size() - 3); }
+
+      const_reference
+      _M_prefix() const
+      { return _Base_type::operator[](_Base_type::size() - 2); }
+
+      sub_match<_Bi_iter>&
+      _M_prefix()
+      { return _Base_type::operator[](_Base_type::size() - 2); }
+
+      const_reference
+      _M_suffix() const
+      { return _Base_type::operator[](_Base_type::size() - 1); }
+
+      sub_match<_Bi_iter>&
+      _M_suffix()
+      { return _Base_type::operator[](_Base_type::size() - 1); }
+
       _Bi_iter _M_begin;
     };
 
diff --git a/libstdc++-v3/include/bits/regex.tcc b/libstdc++-v3/include/bits/regex.tcc
index aad56e0..823ad51 100644
--- a/libstdc++-v3/include/bits/regex.tcc
+++ b/libstdc++-v3/include/bits/regex.tcc
@@ -63,7 +63,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 
       typename match_results<_BiIter, _Alloc>::_Base_type& __res = __m;
       __m._M_begin = __s;
-      __res.resize(__re._M_automaton->_M_sub_count() + 2);
+      __m._M_resize(__re._M_automaton->_M_sub_count());
       for (auto& __it : __res)
 	__it.matched = false;
 
@@ -99,8 +99,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 	  for (auto& __it : __res)
 	    if (!__it.matched)
 	      __it.first = __it.second = __e;
-	  auto& __pre = __res[__res.size()-2];
-	  auto& __suf = __res[__res.size()-1];
+	  auto& __pre = __m._M_prefix();
+	  auto& __suf = __m._M_suffix();
 	  if (__match_mode)
 	    {
 	      __pre.matched = false;
@@ -120,6 +120,15 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 	      __suf.matched = (__suf.first != __suf.second);
 	    }
 	}
+      else
+	{
+	  __m._M_resize(0);
+	  for (auto& __it : __res)
+	    {
+	      __it.matched = false;
+	      __it.first = __it.second = __e;
+	    }
+	}
       return __ret;
     }
 
@@ -374,7 +383,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 
       auto __output = [&](size_t __idx)
 	{
-	  auto& __sub = _Base_type::operator[](__idx);
+	  auto& __sub = (*this)[__idx];
 	  if (__sub.matched)
 	    __out = std::copy(__sub.first, __sub.second, __out);
 	};
@@ -425,9 +434,17 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 	      else if (__eat('&'))
 		__output(0);
 	      else if (__eat('`'))
-		__output(_Base_type::size()-2);
+		{
+		  auto& __sub = _M_prefix();
+		  if (__sub.matched)
+		    __out = std::copy(__sub.first, __sub.second, __out);
+		}
 	      else if (__eat('\''))
-		__output(_Base_type::size()-1);
+		{
+		  auto& __sub = _M_suffix();
+		  if (__sub.matched)
+		    __out = std::copy(__sub.first, __sub.second, __out);
+		}
 	      else if (__fctyp.is(__ctype_type::digit, *__next))
 		{
 		  long __num = __traits.value(*__next, 10);
@@ -532,7 +549,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 				   | regex_constants::match_continuous))
 		    {
 		      _GLIBCXX_DEBUG_ASSERT(_M_match[0].matched);
-		      auto& __prefix = _M_match.at(_M_match.size());
+		      auto& __prefix = _M_match._M_prefix();
 		      __prefix.first = __prefix_first;
 		      __prefix.matched = __prefix.first != __prefix.second;
 		      // [28.12.1.4.5]
@@ -547,7 +564,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 	  if (regex_search(__start, _M_end, _M_match, *_M_pregex, _M_flags))
 	    {
 	      _GLIBCXX_DEBUG_ASSERT(_M_match[0].matched);
-	      auto& __prefix = _M_match.at(_M_match.size());
+	      auto& __prefix = _M_match._M_prefix();
 	      __prefix.first = __prefix_first;
 	      __prefix.matched = __prefix.first != __prefix.second;
 	      // [28.12.1.4.5]
diff --git a/libstdc++-v3/testsuite/28_regex/match_results/out_of_range_submatches.cc b/libstdc++-v3/testsuite/28_regex/match_results/out_of_range_submatches.cc
new file mode 100644
index 0000000..2309b13
--- /dev/null
+++ b/libstdc++-v3/testsuite/28_regex/match_results/out_of_range_submatches.cc
@@ -0,0 +1,81 @@
+// { dg-options "-std=gnu++11" }
+
+// Copyright (C) 2015 Free Software Foundation, Inc.
+//
+// This file is part of the GNU ISO C++ Library.  This library is free
+// software; you can redistribute it and/or modify it under the
+// terms of the GNU General Public License as published by the
+// Free Software Foundation; either version 3, or (at your option)
+// any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License along
+// with this library; see the file COPYING3.  If not see
+// <http://www.gnu.org/licenses/>.
+
+#include <regex>
+#include <testsuite_hooks.h>
+#include <testsuite_regex.h>
+
+using namespace std;
+using namespace __gnu_test;
+
+// libstdc++/64441
+void
+test01()
+{
+  bool test __attribute__((unused)) = true;
+
+  const char s[] = "abc";
+  const std::regex re("(\\d+)|(\\w+)");
+
+  std::cmatch m;
+  VERIFY(regex_search_debug(s, m, re));
+
+  std::tuple<bool, string, int, int> expected[] = {
+      make_tuple(true, "abc", 0, 3),
+      make_tuple(false, "", 3, 3),
+      make_tuple(true, "abc", 0, 3),
+      make_tuple(false, "", 3, 3),
+  };
+  for (size_t i = 0, n = m.size(); i <= n; ++i) {
+      auto&& sub = m[i];
+      VERIFY(sub.matched == std::get<0>(expected[i]));
+      VERIFY(sub.str() == std::get<1>(expected[i]));
+      VERIFY((sub.first - s) == std::get<2>(expected[i]));
+      VERIFY((sub.second - s) == std::get<3>(expected[i]));
+  }
+}
+
+// libstdc++/64781
+void
+test02()
+{
+  bool test __attribute__((unused)) = true;
+
+  std::match_results<const char*> m;
+  const char s[] = "a";
+  VERIFY(regex_search_debug(s, m, std::regex("a")));
+
+  VERIFY(m.size() == 1);
+
+  VERIFY(m[0].first == s+0);
+  VERIFY(m[0].second == s+1);
+  VERIFY(m[0].matched == true);
+
+  VERIFY(m[42].first == s+1);
+  VERIFY(m[42].second == s+1);
+  VERIFY(m[42].matched == false);
+}
+
+int
+main()
+{
+  test01();
+  test02();
+  return 0;
+}


More information about the Libstdc++ mailing list