std::regex_replace behaviour (LWG DR 2213)

Tim Shen timshen91@gmail.com
Thu Feb 13 22:31:00 GMT 2014


On Thu, Feb 13, 2014 at 1:13 PM, Jonathan Wakely <jwakely.gcc@gmail.com> wrote:
> The LWG have decided that
> http://cplusplus.github.io/LWG/lwg-active.html#2213 is a defect.
>
> In our std::regex_replace we do not appear to update out in all places
> that we should.

1) Yes, the current implementation is buggy for not updating __out
after calling std::copy;
2) I'd rather say the standard is misleading but well intended (return
the new out iterator) rather than ill intended (return the original
out iterator). It'll be a little troubler if match_results<>::format()
do not return the new out iterator, which regex_replace<>() the caller
needs. Boost and libc++ as well return the new iterator.

So my suggestion is just following the LWG proposal, as well as Boost
and libc++.

Here's the patch tested with -m32 and -m64 respectively.

Thanks!


-- 
Regards,
Tim Shen
-------------- next part --------------
commit 3f8621b5f7ced00e21e7038f1e9737eea1bb4251
Author: tim <timshen91@gmail.com>
Date:   Thu Feb 13 17:23:48 2014 -0500

    2014-02-13  Tim Shen  <timshen91@gmail.com>
    
    	* include/bits/regex.tcc (match_results<>::format,
    	regex_replace<>): Update __out after calling std::copy.
    	* testsuite/28_regex/algorithms/regex_replace/char/basic_replace.cc:
    	Add testcase.
    	* testsuite/28_regex/match_results/format.cc: Likewise.

diff --git a/libstdc++-v3/include/bits/regex.tcc b/libstdc++-v3/include/bits/regex.tcc
index 73f55df..5fa1f01 100644
--- a/libstdc++-v3/include/bits/regex.tcc
+++ b/libstdc++-v3/include/bits/regex.tcc
@@ -425,7 +425,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 	{
 	  auto& __sub = _Base_type::operator[](__idx);
 	  if (__sub.matched)
-	    std::copy(__sub.first, __sub.second, __out);
+	    __out = std::copy(__sub.first, __sub.second, __out);
 	};
 
       if (__flags & regex_constants::format_sed)
@@ -455,7 +455,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 	      if (__next == __fmt_last)
 		break;
 
-	      std::copy(__fmt_first, __next, __out);
+	      __out = std::copy(__fmt_first, __next, __out);
 
 	      auto __eat = [&](char __ch) -> bool
 		{
@@ -493,7 +493,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 		*__out++ = '$';
 	      __fmt_first = __next;
 	    }
-	  std::copy(__fmt_first, __fmt_last, __out);
+	  __out = std::copy(__fmt_first, __fmt_last, __out);
 	}
       return __out;
     }
@@ -512,7 +512,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
       if (__i == __end)
 	{
 	  if (!(__flags & regex_constants::format_no_copy))
-	    std::copy(__first, __last, __out);
+	    __out = std::copy(__first, __last, __out);
 	}
       else
 	{
@@ -521,14 +521,15 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 	  for (; __i != __end; ++__i)
 	    {
 	      if (!(__flags & regex_constants::format_no_copy))
-		std::copy(__i->prefix().first, __i->prefix().second, __out);
+		__out = std::copy(__i->prefix().first, __i->prefix().second,
+				  __out);
 	      __out = __i->format(__out, __fmt, __fmt + __len, __flags);
 	      __last = __i->suffix();
 	      if (__flags & regex_constants::format_first_only)
 		break;
 	    }
 	  if (!(__flags & regex_constants::format_no_copy))
-	    std::copy(__last.first, __last.second, __out);
+	    __out = std::copy(__last.first, __last.second, __out);
 	}
       return __out;
     }
diff --git a/libstdc++-v3/testsuite/28_regex/algorithms/regex_replace/char/basic_replace.cc b/libstdc++-v3/testsuite/28_regex/algorithms/regex_replace/char/basic_replace.cc
index 28f78a0..38ef970 100644
--- a/libstdc++-v3/testsuite/28_regex/algorithms/regex_replace/char/basic_replace.cc
+++ b/libstdc++-v3/testsuite/28_regex/algorithms/regex_replace/char/basic_replace.cc
@@ -41,6 +41,14 @@ test01()
   VERIFY(regex_replace(string("This is a string"), regex("\\b\\w*\\b"), "|$0|",
 		       regex_constants::format_first_only)
 	 == "|This| is a string");
+
+  char buff[4096] = {0};
+  regex re("asdf");
+  string s = "asdf";
+  string res = "|asdf|asdf|";
+  regex_replace(buff, s.data(), s.data() + s.size(), re, "|&|\\0|",
+		regex_constants::format_sed);
+  VERIFY(res == buff);
 }
 
 int
diff --git a/libstdc++-v3/testsuite/28_regex/match_results/format.cc b/libstdc++-v3/testsuite/28_regex/match_results/format.cc
index 11e3bdb..097a0d7 100644
--- a/libstdc++-v3/testsuite/28_regex/match_results/format.cc
+++ b/libstdc++-v3/testsuite/28_regex/match_results/format.cc
@@ -43,6 +43,14 @@ test01()
   VERIFY(m.format("&|\\3|\\4|\\2|\\1|\\",
 		  regex_constants::format_sed)
 	 == "this is a string|a|string|is|this|\\");
+
+  regex re("asdf");
+  regex_match("asdf", m, re);
+  string fmt = "|&|\\0|";
+  char buff[4096] = {0};
+  m.format(buff, fmt.data(), fmt.data() + fmt.size(),
+	   regex_constants::format_sed);
+  VERIFY(string(buff) == "|asdf|asdf|");
 }
 
 int


More information about the Gcc-patches mailing list