This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[libstdc++/61227] Support regex like "[\w]"


Here's the patch. I did a simple test on 28_regex/*.

A bootstrap may be needed, but I can't do it now.


-- 
Regards,
Tim Shen
commit 2fd6be816c1d1797b3aad228b9fb2cfb7374483c
Author: tim <timshen91@gmail.com>
Date:   Mon May 19 10:40:16 2014 -0400

    2014-05-19  Tim Shen  <timshen91@gmail.com>
    
    	PR libstdc++/61227
    	* include/bits/regex_compiler.h
    	(_BracketMatcher<>::_M_add_character_class): Add negative character
    	class support.
    	* include/bits/regex_compiler.tcc (_BracketMatcher<>::_M_apply):
    	Likewise.
    	* testsuite/28_regex/algorithms/regex_match/ecma/char/61227.cc:
    	New testcase.
    	* testsuite/28_regex/algorithms/regex_match/ecma/char/quoted_char.cc:
    	New testcase.

diff --git a/libstdc++-v3/include/bits/regex_compiler.h b/libstdc++-v3/include/bits/regex_compiler.h
index d7e2162..52f7235 100644
--- a/libstdc++-v3/include/bits/regex_compiler.h
+++ b/libstdc++-v3/include/bits/regex_compiler.h
@@ -369,15 +369,19 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 #endif
       }
 
+      // __neg should be true for \D, \S and \W only.
       void
-      _M_add_character_class(const _StringT& __s)
+      _M_add_character_class(const _StringT& __s, bool __neg)
       {
 	auto __mask = _M_traits.lookup_classname(__s.data(),
 						 __s.data() + __s.size(),
 						 __icase);
 	if (__mask == 0)
 	  __throw_regex_error(regex_constants::error_ctype);
-	_M_class_set |= __mask;
+	if (!__neg)
+	  _M_class_set |= __mask;
+	else
+	  _M_neg_class_set.push_back(__mask);
 #ifdef _GLIBCXX_DEBUG
 	_M_is_ready = false;
 #endif
@@ -387,7 +391,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
       _M_make_range(_CharT __l, _CharT __r)
       {
 	_M_range_set.push_back(make_pair(_M_translator._M_transform(__l),
-				      _M_translator._M_transform(__r)));
+					 _M_translator._M_transform(__r)));
 #ifdef _GLIBCXX_DEBUG
 	_M_is_ready = false;
 #endif
@@ -435,6 +439,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
       std::vector<_CharT>                       _M_char_set;
       std::vector<_StringT>                     _M_equiv_set;
       std::vector<pair<_StrTransT, _StrTransT>> _M_range_set;
+      std::vector<_CharClassT>                  _M_neg_class_set;
       _CharClassT                               _M_class_set;
       _TransT                                   _M_translator;
       const _TraitsT&                           _M_traits;
diff --git a/libstdc++-v3/include/bits/regex_compiler.tcc b/libstdc++-v3/include/bits/regex_compiler.tcc
index 3cf9e457..a3a4945 100644
--- a/libstdc++-v3/include/bits/regex_compiler.tcc
+++ b/libstdc++-v3/include/bits/regex_compiler.tcc
@@ -397,7 +397,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
       _GLIBCXX_DEBUG_ASSERT(_M_value.size() == 1);
       _BracketMatcher<_TraitsT, __icase, __collate> __matcher
 	(_M_ctype.is(_CtypeT::upper, _M_value[0]), _M_traits);
-      __matcher._M_add_character_class(_M_value);
+      __matcher._M_add_character_class(_M_value, false);
       __matcher._M_ready();
       _M_stack.push(_StateSeqT(_M_nfa,
 	_M_nfa._M_insert_matcher(std::move(__matcher))));
@@ -428,7 +428,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
       else if (_M_match_token(_ScannerT::_S_token_equiv_class_name))
 	__matcher._M_add_equivalence_class(_M_value);
       else if (_M_match_token(_ScannerT::_S_token_char_class_name))
-	__matcher._M_add_character_class(_M_value);
+	__matcher._M_add_character_class(_M_value, false);
       else if (_M_try_char()) // [a
 	{
 	  auto __ch = _M_value[0];
@@ -451,6 +451,10 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 	    }
 	  __matcher._M_add_char(__ch);
 	}
+      else if (_M_match_token(_ScannerT::_S_token_quoted_class))
+	__matcher._M_add_character_class(_M_value,
+					 _M_ctype.is(_CtypeT::upper,
+						     _M_value[0]));
       else
 	__throw_regex_error(regex_constants::error_brack);
     }
@@ -508,6 +512,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
     _M_apply(_CharT __ch, false_type) const
     {
       bool __ret = false;
+      // TODO Refactor this piece of junk.
       if (std::find(_M_char_set.begin(), _M_char_set.end(),
 		    _M_translator._M_translate(__ch))
 	  != _M_char_set.end())
@@ -527,6 +532,15 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 			     _M_traits.transform_primary(&__ch, &__ch+1))
 		   != _M_equiv_set.end())
 	    __ret = true;
+	  else
+	    {
+	      for (auto& __it : _M_neg_class_set)
+		if (!_M_traits.isctype(__ch, __it))
+		  {
+		    __ret = true;
+		    break;
+		  }
+	    }
 	}
       if (_M_is_non_matching)
 	return !__ret;
diff --git a/libstdc++-v3/testsuite/28_regex/algorithms/regex_match/ecma/char/61227.cc b/libstdc++-v3/testsuite/28_regex/algorithms/regex_match/ecma/char/61227.cc
new file mode 100644
index 0000000..2d854f6
--- /dev/null
+++ b/libstdc++-v3/testsuite/28_regex/algorithms/regex_match/ecma/char/61227.cc
@@ -0,0 +1,47 @@
+// { dg-options "-std=gnu++11" }
+
+//
+// Copyright (C) 2014 Free Software Foundation, Inc.
+//
+// This file is part of the GNU ISO C++ Library.  This library is free
+// software; you can redistribute it and/or modify it under the
+// terms of the GNU General Public License as published by the
+// Free Software Foundation; either version 3, or (at your option)
+// any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License along
+// with this library; see the file COPYING3.  If not see
+// <http://www.gnu.org/licenses/>.
+
+// 28.11.2 regex_match
+
+#include <regex>
+#include <testsuite_hooks.h>
+#include <testsuite_regex.h>
+
+using namespace __gnu_test;
+using namespace std;
+
+// libstdc++/61227
+void
+test01()
+{
+  std::regex r1{R"([^\w ])"};
+  std::regex r2{R"(\b\w+\b)"};
+  std::regex r3{R"(\b\w+\b)"};
+  std::regex r4{"//.*$"};
+  std::regex r5{R"((?:[^;"]|"[^"]*")+)"};
+  std::regex r6{R"~~(([^\s"]+)|"([^"]*)")~~"};
+}
+
+int
+main()
+{
+  test01();
+  return 0;
+}
diff --git a/libstdc++-v3/testsuite/28_regex/algorithms/regex_match/ecma/char/quoted_char.cc b/libstdc++-v3/testsuite/28_regex/algorithms/regex_match/ecma/char/quoted_char.cc
index e7280ac..8641732 100644
--- a/libstdc++-v3/testsuite/28_regex/algorithms/regex_match/ecma/char/quoted_char.cc
+++ b/libstdc++-v3/testsuite/28_regex/algorithms/regex_match/ecma/char/quoted_char.cc
@@ -44,6 +44,16 @@ test01()
   VERIFY(regex_match_debug("_az", regex("\\w*")));
   VERIFY(regex_match_debug("!@#$%", regex("\\W*")));
   VERIFY(!regex_match_debug("_01234", regex("\\W*")));
+
+  VERIFY(regex_match_debug("01", regex("[\\d]*")));
+  VERIFY(regex_match_debug("asdfjkl", regex("[\\D]*")));
+  VERIFY(!regex_match_debug("asdfjkl0", regex("[\\D]*")));
+  VERIFY(regex_match_debug("\r\t\v\f ", regex("[\\s]*")));
+  VERIFY(regex_match_debug("asdfjkl", regex("[\\S]*")));
+  VERIFY(!regex_match_debug("asdfjkl\r", regex("[\\S]*")));
+  VERIFY(regex_match_debug("_az", regex("[\\w]*")));
+  VERIFY(regex_match_debug("!@#$%", regex("[\\W]*")));
+  VERIFY(!regex_match_debug("_01234", regex("[\\W]*")));
 }
 
 int

Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]