[Patch, regex, libstdc++/69794] Unify special character parsing

Tim Shen timshen@google.com
Sat Feb 13 19:13:00 GMT 2016


I did it wrong in r227289 - I ignored the "\n" special case in grep.
Turns out using code to handle special cases is error prone, so I
turned to use data (_M_grep_spec_char and _M_egrep_spec_char).

Bootstrapped and tested on x86_64-pc-linux-gnu.

Thanks!

-- 
Regards,
Tim Shen
-------------- next part --------------
commit 03e651ef56e516f1bc7b0d041d93ef657af54791
Author: Tim Shen <timshen@google.com>
Date:   Sat Feb 13 10:55:38 2016 -0800

    	PR libstdc++/69794
    	* include/bits/regex_scanner.h: Add different special character
    	sets for grep and egrep regex.
    	* include/bits/regex_scanner.tcc: Use _M_spec_char more unifiedly.
    	* testsuite/28_regex/regression.cc: Add new testcase.

diff --git a/libstdc++-v3/include/bits/regex_scanner.h b/libstdc++-v3/include/bits/regex_scanner.h
index bff7366..16071da 100644
--- a/libstdc++-v3/include/bits/regex_scanner.h
+++ b/libstdc++-v3/include/bits/regex_scanner.h
@@ -95,11 +95,19 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 		  : _M_awk_escape_tbl),
     _M_spec_char(_M_is_ecma()
 		 ? _M_ecma_spec_char
-		 : _M_is_basic()
+		 : _M_flags & regex_constants::basic
 		 ? _M_basic_spec_char
-		 : _M_extended_spec_char),
+		 : _M_flags & regex_constants::extended
+		 ? _M_extended_spec_char
+		 : _M_flags & regex_constants::grep
+		 ? _M_grep_spec_char
+		 : _M_flags & regex_constants::egrep
+		 ? _M_egrep_spec_char
+		 : _M_flags & regex_constants::awk
+		 ? _M_extended_spec_char
+		 : nullptr),
     _M_at_bracket_start(false)
-    { }
+    { __glibcxx_assert(_M_spec_char); }
 
   protected:
     const char*
@@ -177,6 +185,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
     const char* _M_ecma_spec_char = "^$\\.*+?()[]{}|";
     const char* _M_basic_spec_char = ".[\\*^$";
     const char* _M_extended_spec_char = ".[\\()*+?{|^$";
+    const char* _M_grep_spec_char = ".[\\*^$\n";
+    const char* _M_egrep_spec_char = ".[\\()*+?{|^$\n";
 
     _StateT                       _M_state;
     _FlagT                        _M_flags;
diff --git a/libstdc++-v3/include/bits/regex_scanner.tcc b/libstdc++-v3/include/bits/regex_scanner.tcc
index 920cb14..fedba09 100644
--- a/libstdc++-v3/include/bits/regex_scanner.tcc
+++ b/libstdc++-v3/include/bits/regex_scanner.tcc
@@ -97,9 +97,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
     _M_scan_normal()
     {
       auto __c = *_M_current++;
-      const char* __pos;
 
-      if (std::strchr(_M_spec_char, _M_ctype.narrow(__c, '\0')) == nullptr)
+      if (std::strchr(_M_spec_char, _M_ctype.narrow(__c, ' ')) == nullptr)
 	{
 	  _M_token = _S_token_ord_char;
 	  _M_value.assign(1, __c);
@@ -177,12 +176,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 	  _M_state = _S_state_in_brace;
 	  _M_token = _S_token_interval_begin;
 	}
-      else if (((__pos = std::strchr(_M_spec_char, _M_ctype.narrow(__c, '\0')))
-		  != nullptr
-		&& *__pos != '\0'
-		&& __c != ']'
-		&& __c != '}')
-	       || (_M_is_grep() && __c == '\n'))
+      else if (__c != ']' && __c != '}')
 	{
 	  auto __it = _M_token_tbl;
 	  auto __narrowc = _M_ctype.narrow(__c, '\0');
diff --git a/libstdc++-v3/testsuite/28_regex/regression.cc b/libstdc++-v3/testsuite/28_regex/regression.cc
index f95bef9..c9a3402 100644
--- a/libstdc++-v3/testsuite/28_regex/regression.cc
+++ b/libstdc++-v3/testsuite/28_regex/regression.cc
@@ -33,10 +33,26 @@ test01()
   regex re("((.)", regex_constants::basic);
 }
 
+void
+test02()
+{
+  bool test __attribute__((unused)) = true;
+
+  std::string re_str
+    {
+      "/abcd" "\n"
+      "/aecf" "\n"
+      "/ghci"
+    };
+  auto rx = std::regex(re_str, std::regex_constants::grep | std::regex_constants::icase);
+  VERIFY(std::regex_search("/abcd", rx));
+}
+
 int
 main()
 {
   test01();
+  test02();
   return 0;
 }
 


More information about the Gcc-patches mailing list