[Patch, regex, libstdc++/69794] Unify special character parsing
Tim Shen
timshen@google.com
Sat Feb 13 19:13:00 GMT 2016
I did it wrong in r227289 - I ignored the "\n" special case in grep.
Turns out using code to handle special cases is error prone, so I
turned to use data (_M_grep_spec_char and _M_egrep_spec_char).
Bootstrapped and tested on x86_64-pc-linux-gnu.
Thanks!
--
Regards,
Tim Shen
-------------- next part --------------
commit 03e651ef56e516f1bc7b0d041d93ef657af54791
Author: Tim Shen <timshen@google.com>
Date: Sat Feb 13 10:55:38 2016 -0800
PR libstdc++/69794
* include/bits/regex_scanner.h: Add different special character
sets for grep and egrep regex.
* include/bits/regex_scanner.tcc: Use _M_spec_char more unifiedly.
* testsuite/28_regex/regression.cc: Add new testcase.
diff --git a/libstdc++-v3/include/bits/regex_scanner.h b/libstdc++-v3/include/bits/regex_scanner.h
index bff7366..16071da 100644
--- a/libstdc++-v3/include/bits/regex_scanner.h
+++ b/libstdc++-v3/include/bits/regex_scanner.h
@@ -95,11 +95,19 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
: _M_awk_escape_tbl),
_M_spec_char(_M_is_ecma()
? _M_ecma_spec_char
- : _M_is_basic()
+ : _M_flags & regex_constants::basic
? _M_basic_spec_char
- : _M_extended_spec_char),
+ : _M_flags & regex_constants::extended
+ ? _M_extended_spec_char
+ : _M_flags & regex_constants::grep
+ ? _M_grep_spec_char
+ : _M_flags & regex_constants::egrep
+ ? _M_egrep_spec_char
+ : _M_flags & regex_constants::awk
+ ? _M_extended_spec_char
+ : nullptr),
_M_at_bracket_start(false)
- { }
+ { __glibcxx_assert(_M_spec_char); }
protected:
const char*
@@ -177,6 +185,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
const char* _M_ecma_spec_char = "^$\\.*+?()[]{}|";
const char* _M_basic_spec_char = ".[\\*^$";
const char* _M_extended_spec_char = ".[\\()*+?{|^$";
+ const char* _M_grep_spec_char = ".[\\*^$\n";
+ const char* _M_egrep_spec_char = ".[\\()*+?{|^$\n";
_StateT _M_state;
_FlagT _M_flags;
diff --git a/libstdc++-v3/include/bits/regex_scanner.tcc b/libstdc++-v3/include/bits/regex_scanner.tcc
index 920cb14..fedba09 100644
--- a/libstdc++-v3/include/bits/regex_scanner.tcc
+++ b/libstdc++-v3/include/bits/regex_scanner.tcc
@@ -97,9 +97,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
_M_scan_normal()
{
auto __c = *_M_current++;
- const char* __pos;
- if (std::strchr(_M_spec_char, _M_ctype.narrow(__c, '\0')) == nullptr)
+ if (std::strchr(_M_spec_char, _M_ctype.narrow(__c, ' ')) == nullptr)
{
_M_token = _S_token_ord_char;
_M_value.assign(1, __c);
@@ -177,12 +176,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
_M_state = _S_state_in_brace;
_M_token = _S_token_interval_begin;
}
- else if (((__pos = std::strchr(_M_spec_char, _M_ctype.narrow(__c, '\0')))
- != nullptr
- && *__pos != '\0'
- && __c != ']'
- && __c != '}')
- || (_M_is_grep() && __c == '\n'))
+ else if (__c != ']' && __c != '}')
{
auto __it = _M_token_tbl;
auto __narrowc = _M_ctype.narrow(__c, '\0');
diff --git a/libstdc++-v3/testsuite/28_regex/regression.cc b/libstdc++-v3/testsuite/28_regex/regression.cc
index f95bef9..c9a3402 100644
--- a/libstdc++-v3/testsuite/28_regex/regression.cc
+++ b/libstdc++-v3/testsuite/28_regex/regression.cc
@@ -33,10 +33,26 @@ test01()
regex re("((.)", regex_constants::basic);
}
+void
+test02()
+{
+ bool test __attribute__((unused)) = true;
+
+ std::string re_str
+ {
+ "/abcd" "\n"
+ "/aecf" "\n"
+ "/ghci"
+ };
+ auto rx = std::regex(re_str, std::regex_constants::grep | std::regex_constants::icase);
+ VERIFY(std::regex_search("/abcd", rx));
+}
+
int
main()
{
test01();
+ test02();
return 0;
}
More information about the Gcc-patches
mailing list