This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
Re: [Patch] Refactor regex executors
- From: Tim Shen <timshen91 at gmail dot com>
- To: Paolo Carlini <paolo dot carlini at oracle dot com>
- Cc: "libstdc++" <libstdc++ at gcc dot gnu dot org>, gcc-patches <gcc-patches at gcc dot gnu dot org>
- Date: Fri, 25 Oct 2013 10:05:25 -0400
- Subject: Re: [Patch] Refactor regex executors
- Authentication-results: sourceware.org; auth=none
- References: <CAPrifDk=fh0tjQvFB=J_V3HktivWJ6icY0g6PNgYE_aHCAxugw at mail dot gmail dot com> <526A34C7 dot 7000708 at oracle dot com>
On Fri, Oct 25, 2013 at 5:07 AM, Paolo Carlini <paolo.carlini@oracle.com> wrote:
> Ok. Are there measurable performance changes at this point? I'm asking
> because we should take the chance and add performance testcases when we
> improve the performance: later, unless somebody files a specific Bug report,
> we become lazy about those, I know that ;)
I do use another test case, but it's from Stackoverflow. I tend not to
include that; Howeverm I make a bfs version of the previous split.cc
testcase and it indeed reflect the optimization(in my machine, running
time from 294u to 194u).
--
Regards,
Tim Shen
commit 7508419faa69ce9f5621125e1c77b954d39ec90c
Author: tim <timshen91@gmail.com>
Date: Mon Oct 21 00:03:06 2013 -0400
2013-10-24 Tim Shen <timshen91@gmail.com>
* include/bits/regex.h: Remove unnecessary friends.
* include/bits/regex.tcc (__regex_algo_impl<>): Move __get_executor
to here.
* include/bits/regex_executor.h: Remove _DFSExecutor and _BFSExecutor;
they are merged into _Executor. Eliminate quantifier tracking part, so
it's faster.
* include/bits/regex_executor.tcc: Implement _Executor.
* testsuite/28_regex/algorithms/regex_match/ecma/char/ungreedy.cc: New.
* testsuite/28_regex/algorithms/regex_search/ecma/greedy.cc: Adjust
duplicate testcases.
* testsuite/performance/28_regex/split_bfs.cc: New.
* testsuite/util/testsuite_regex.h: Adjust behavior of two-executors
agreement judger: do not compare match_results when executor return
false.
diff --git a/libstdc++-v3/include/bits/regex.h b/libstdc++-v3/include/bits/regex.h
index 32d38b4..b1bda46 100644
--- a/libstdc++-v3/include/bits/regex.h
+++ b/libstdc++-v3/include/bits/regex.h
@@ -36,6 +36,9 @@ namespace __detail
{
_GLIBCXX_BEGIN_NAMESPACE_VERSION
+ enum class _RegexExecutorPolicy : int
+ { _S_auto, _S_alternate };
+
template<typename _BiIter, typename _Alloc,
typename _CharT, typename _TraitsT,
_RegexExecutorPolicy __policy,
@@ -730,17 +733,6 @@ _GLIBCXX_END_NAMESPACE_VERSION
typedef std::shared_ptr<__detail::_NFA<_Ch_type, _Rx_traits>>
_AutomatonPtr;
- template<typename _BiIter, typename _Alloc,
- typename _CharT, typename _TraitsT,
- __detail::_RegexExecutorPolicy __policy>
- friend std::unique_ptr<
- __detail::_Executor<_BiIter, _Alloc, _CharT, _TraitsT>>
- __detail::__get_executor(_BiIter,
- _BiIter,
- std::vector<sub_match<_BiIter>, _Alloc>&,
- const basic_regex<_CharT, _TraitsT>&,
- regex_constants::match_flag_type);
-
template<typename _Bp, typename _Ap, typename _Cp, typename _Rp,
__detail::_RegexExecutorPolicy, bool>
friend bool
@@ -748,15 +740,9 @@ _GLIBCXX_END_NAMESPACE_VERSION
const basic_regex<_Cp, _Rp>&,
regex_constants::match_flag_type);
- template<typename, typename, typename, typename>
+ template<typename, typename, typename, bool>
friend class __detail::_Executor;
- template<typename, typename, typename, typename>
- friend class __detail::_DFSExecutor;
-
- template<typename, typename, typename, typename>
- friend class __detail::_BFSExecutor;
-
flag_type _M_flags;
_Rx_traits _M_traits;
_AutomatonPtr _M_automaton;
@@ -1851,15 +1837,9 @@ _GLIBCXX_END_NAMESPACE_VERSION
//@}
private:
- template<typename, typename, typename, typename>
+ template<typename, typename, typename, bool>
friend class __detail::_Executor;
- template<typename, typename, typename, typename>
- friend class __detail::_DFSExecutor;
-
- template<typename, typename, typename, typename>
- friend class __detail::_BFSExecutor;
-
template<typename, typename, typename>
friend class regex_iterator;
diff --git a/libstdc++-v3/include/bits/regex.tcc b/libstdc++-v3/include/bits/regex.tcc
index 7028480..2ac095d 100644
--- a/libstdc++-v3/include/bits/regex.tcc
+++ b/libstdc++-v3/include/bits/regex.tcc
@@ -28,6 +28,13 @@
* Do not attempt to use it directly. @headername{regex}
*/
+// See below __regex_algo_impl to get what this is talking about. The default
+// value 1 indicated a conservative optimization without giving up worst case
+// performance.
+#ifndef _GLIBCXX_REGEX_DFS_QUANTIFIERS_LIMIT
+#define _GLIBCXX_REGEX_DFS_QUANTIFIERS_LIMIT 1
+#endif
+
namespace std _GLIBCXX_VISIBILITY(default)
{
_GLIBCXX_BEGIN_NAMESPACE_VERSION
@@ -61,14 +68,41 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
for (auto& __it : __res)
__it.matched = false;
- auto __executor = __get_executor<_BiIter, _Alloc, _CharT, _TraitsT,
- __policy>(__s, __e, __res, __re, __flags);
-
+ // This function decide which executor to use under given circumstances.
+ // The _S_auto policy now is the following: if a NFA has no
+ // back-references and has more than _GLIBCXX_REGEX_DFS_QUANTIFIERS_LIMIT
+ // quantifiers (*, +, ?), the BFS executor will be used, other wise
+ // DFS executor. This is because DFS executor has a exponential upper
+ // bound, but better best-case performace. Meanwhile, BFS executor can
+ // effectively prevent from exponential-long time matching (which must
+ // contains many quantifiers), but it's slower in average.
+ //
+ // For simple regex, BFS executor could be 2 or more times slower than
+ // DFS executor.
+ //
+ // Of course, BFS executor cannot handle back-references.
bool __ret;
- if (__match_mode)
- __ret = __executor->_M_match();
+ if (!__re._M_automaton->_M_has_backref
+ && (__policy == _RegexExecutorPolicy::_S_alternate
+ || __re._M_automaton->_M_quant_count
+ > _GLIBCXX_REGEX_DFS_QUANTIFIERS_LIMIT))
+ {
+ _Executor<_BiIter, _Alloc, _TraitsT, false>
+ __executor(__s, __e, __m, __re, __flags);
+ if (__match_mode)
+ __ret = __executor._M_match();
+ else
+ __ret = __executor._M_search();
+ }
else
- __ret = __executor->_M_search();
+ {
+ _Executor<_BiIter, _Alloc, _TraitsT, true>
+ __executor(__s, __e, __m, __re, __flags);
+ if (__match_mode)
+ __ret = __executor._M_match();
+ else
+ __ret = __executor._M_search();
+ }
if (__ret)
{
for (auto __it : __res)
diff --git a/libstdc++-v3/include/bits/regex_executor.h b/libstdc++-v3/include/bits/regex_executor.h
index 6d9a83e..09555ba 100644
--- a/libstdc++-v3/include/bits/regex_executor.h
+++ b/libstdc++-v3/include/bits/regex_executor.h
@@ -30,10 +30,6 @@
// FIXME convert comments to doxygen format.
-// TODO Put _DFSExecutor and _BFSExecutor into one class. They are becoming
-// much more similar. Also, make grouping seperated. The
-// regex_constants::nosubs enables much more simpler execution.
-
namespace std _GLIBCXX_VISIBILITY(default)
{
_GLIBCXX_BEGIN_NAMESPACE_VERSION
@@ -56,15 +52,17 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
* @{
*/
- template<typename _BiIter, typename _Alloc,
- typename _CharT, typename _TraitsT>
+ template<typename _BiIter, typename _Alloc, typename _TraitsT,
+ bool __dfs_mode>
class _Executor
{
public:
- typedef basic_regex<_CharT, _TraitsT> _RegexT;
- typedef std::vector<sub_match<_BiIter>, _Alloc> _ResultsVec;
- typedef regex_constants::match_flag_type _FlagT;
- typedef typename _TraitsT::char_class_type _ClassT;
+ typedef typename iterator_traits<_BiIter>::value_type _CharT;
+ typedef basic_regex<_CharT, _TraitsT> _RegexT;
+ typedef std::vector<sub_match<_BiIter>, _Alloc> _ResultsVec;
+ typedef regex_constants::match_flag_type _FlagT;
+ typedef typename _TraitsT::char_class_type _ClassT;
+ typedef _NFA<_CharT, _TraitsT> _NFAT;
public:
_Executor(_BiIter __begin,
@@ -75,39 +73,48 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
: _M_begin(__begin),
_M_end(__end),
_M_re(__re),
+ _M_nfa(*__re._M_automaton),
_M_results(__results),
+ _M_match_queue(__dfs_mode ? nullptr
+ : new queue<pair<_StateIdT, _ResultsVec>>()),
+ _M_visited(__dfs_mode ? nullptr : new vector<bool>(_M_nfa.size())),
_M_flags((__flags & regex_constants::match_prev_avail)
? (__flags
& ~regex_constants::match_not_bol
& ~regex_constants::match_not_bow)
- : __flags)
- { }
-
- virtual
- ~_Executor()
+ : __flags),
+ _M_cur_results(),
+ _M_start_state(_M_nfa._M_start())
{ }
// Set matched when string exactly match the pattern.
bool
_M_match()
{
- _M_match_mode = true;
- _M_init(_M_begin);
- return _M_main();
+ _M_current = _M_begin;
+ return _M_main<true>();
}
// Set matched when some prefix of the string matches the pattern.
bool
_M_search_from_first()
{
- _M_match_mode = false;
- _M_init(_M_begin);
- return _M_main();
+ _M_current = _M_begin;
+ return _M_main<false>();
}
bool
_M_search();
+ private:
+ template<bool __match_mode>
+ void
+ _M_dfs(_StateIdT __start);
+
+ template<bool __match_mode>
+ bool
+ _M_main();
+
bool
_M_is_word(_CharT __ch) const
{
@@ -134,307 +141,27 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
bool
_M_word_boundry(_State<_CharT, _TraitsT> __state) const;
- virtual std::unique_ptr<_Executor>
- _M_clone() const = 0;
-
- // Return whether now match the given sub-NFA.
bool
- _M_lookahead(_State<_CharT, _TraitsT> __state) const
- {
- auto __sub = this->_M_clone();
- __sub->_M_set_start(__state._M_alt);
- return __sub->_M_search_from_first();
- }
-
- void
- _M_set_results(_ResultsVec& __cur_results);
-
- public:
- virtual void
- _M_init(_BiIter __cur) = 0;
-
- virtual void
- _M_set_start(_StateIdT __start) = 0;
-
- virtual bool
- _M_main() = 0;
-
- _BiIter _M_current;
- const _BiIter _M_begin;
- const _BiIter _M_end;
- const _RegexT& _M_re;
- _ResultsVec& _M_results;
- _FlagT _M_flags;
- bool _M_match_mode;
- };
-
- // A _DFSExecutor perform a DFS on given NFA and input string. At the very
- // beginning the executor stands in the start state, then it try every
- // possible state transition in current state recursively. Some state
- // transitions consume input string, say, a single-char-matcher or a
- // back-reference matcher; some not, like assertion or other anchor nodes.
- // When the input is exhausted and the current state is an accepting state,
- // the whole executor return true.
- //
- // TODO: This approach is exponentially slow for certain input.
- // Try to compile the NFA to a DFA.
- //
- // Time complexity: o(match_length), O(2^(_M_nfa->size()))
- // Space complexity: \theta(match_results.size() + match_length)
- template<typename _BiIter, typename _Alloc,
- typename _CharT, typename _TraitsT>
- class _DFSExecutor
- : public _Executor<_BiIter, _Alloc, _CharT, _TraitsT>
- {
- public:
- typedef _Executor<_BiIter, _Alloc, _CharT, _TraitsT> _BaseT;
- typedef _NFA<_CharT, _TraitsT> _NFAT;
- typedef typename _BaseT::_RegexT _RegexT;
- typedef typename _BaseT::_ResultsVec _ResultsVec;
- typedef typename _BaseT::_FlagT _FlagT;
+ _M_lookahead(_State<_CharT, _TraitsT> __state);
public:
- _DFSExecutor(_BiIter __begin,
- _BiIter __end,
- _ResultsVec& __results,
- const _RegexT& __re,
- _FlagT __flags)
- : _BaseT(__begin, __end, __results, __re, __flags),
- _M_nfa(__re._M_automaton), _M_start_state(_M_nfa->_M_start())
- { }
-
- private:
- void
- _M_init(_BiIter __cur)
- {
- _M_cur_results.resize(_M_nfa->_M_sub_count() + 2);
- this->_M_current = __cur;
- }
-
- void
- _M_set_start(_StateIdT __start)
- { _M_start_state = __start; }
-
- bool
- _M_main()
- { return _M_dfs(this->_M_start_state); }
-
- bool
- _M_dfs(_StateIdT __start);
-
- std::unique_ptr<_BaseT>
- _M_clone() const
- {
- return std::unique_ptr<_BaseT>(new _DFSExecutor(this->_M_current,
- this->_M_end,
- this->_M_results,
- this->_M_re,
- this->_M_flags));
- }
-
+ _ResultsVec _M_cur_results;
+ _BiIter _M_current;
+ const _BiIter _M_begin;
+ const _BiIter _M_end;
+ const _RegexT& _M_re;
+ const _NFAT& _M_nfa;
+ _ResultsVec& _M_results;
+ // Used in BFS, saving states that need to be considered for the next
+ // character.
+ std::unique_ptr<queue<pair<_StateIdT, _ResultsVec>>> _M_match_queue;
+ // Used in BFS, indicating that which state is already visited.
+ std::unique_ptr<vector<bool>> _M_visited;
+ _FlagT _M_flags;
// To record current solution.
- std::shared_ptr<_NFAT> _M_nfa;
- _ResultsVec _M_cur_results;
- _StateIdT _M_start_state;
- };
-
- // Like the DFS approach, it try every possible state transition; Unlike DFS,
- // it uses a queue instead of a stack to store matching states. It's a BFS
- // approach.
- //
- // Russ Cox's article(http://swtch.com/~rsc/regexp/regexp1.html) explained
- // this algorithm clearly.
- //
- // Every entry of _M_covered saves the solution(grouping status) for every
- // matching head. When states transit, solutions will be compared and
- // deduplicated(based on which greedy mode we have).
- //
- // Time complexity: o(match_length * (quantifier_number
- // + match_results.size())
- // O(match_length * _M_nfa->size()
- // * (quantifier_number + match_results.size())
- // Space complexity: o(quantifier_number + match_results.size())
- // O(_M_nfa->size()
- // * (quantifier_number + match_results.size())
- template<typename _BiIter, typename _Alloc,
- typename _CharT, typename _TraitsT>
- class _BFSExecutor
- : public _Executor<_BiIter, _Alloc, _CharT, _TraitsT>
- {
- public:
- typedef _Executor<_BiIter, _Alloc, _CharT, _TraitsT> _BaseT;
- typedef _NFA<_CharT, _TraitsT> _NFAT;
- typedef typename _BaseT::_RegexT _RegexT;
- typedef typename _BaseT::_ResultsVec _ResultsVec;
- typedef typename _BaseT::_FlagT _FlagT;
- // Here's a solution for greedy/ungreedy mode in BFS approach. We need to
- // carefully work out how to compare to conflict matching states.
- //
- // A matching state is a pair(where, when); `where` is a NFA node; `when`
- // is a _BiIter, indicating which char is the next to be matched. Two
- // matching states conflict if they have equivalent `where` and `when`.
- //
- // Now we need to drop one and keep another, because at most one of them
- // could be the final optimal solution. This behavior is affected by
- // greedy policy.
- //
- // The definition of `greedy`:
- // For the sequence of quantifiers in NFA sorted by their start positions,
- // now maintain a vector in every matching state, with length equal to
- // quantifier seq, recording repeating times of every quantifier. Now to
- // compare two matching states, we just lexically compare these two
- // vectors. To win the compare(to survive), one matching state needs to
- // make its greedy quantifier count larger, and ungreedy quantifiers
- // count smaller.
- //
- // In the implementation, we recorded negtive counts for greedy
- // quantifiers and positive counts of ungreedy ones. Now the implicit
- // operator<() for lexicographical_compare will emit the answer.
- //
- // When two vectors equal, it means the `where`, `when` and quantifier
- // counts are identical, and indicates the same solution; so
- // _ResultsEntry::operator<() just return false.
- struct _ResultsEntry
- : private _ResultsVec
- {
- public:
- _ResultsEntry(size_t __res_sz, size_t __sz)
- : _ResultsVec(__res_sz), _M_quant_keys(__sz)
- { }
-
- void
- resize(size_t __n)
- { _ResultsVec::resize(__n); }
-
- size_t
- size()
- { return _ResultsVec::size(); }
-
- sub_match<_BiIter>&
- operator[](size_t __idx)
- { return _ResultsVec::operator[](__idx); }
-
- bool
- operator<(const _ResultsEntry& __rhs) const
- {
- _GLIBCXX_DEBUG_ASSERT(_M_quant_keys.size()
- == __rhs._M_quant_keys.size());
- return lexicographical_compare(_M_quant_keys.begin(),
- _M_quant_keys.end(),
- __rhs._M_quant_keys.begin(),
- __rhs._M_quant_keys.end());
- }
-
- void
- _M_inc(size_t __idx, bool __neg)
- { _M_quant_keys[__idx] += __neg ? 1 : -1; }
-
- _ResultsVec&
- _M_get()
- { return *this; }
-
- public:
- std::vector<int> _M_quant_keys;
- };
- typedef std::unique_ptr<_ResultsEntry> _ResultsPtr;
-
- class _TodoList
- {
- public:
- explicit
- _TodoList(size_t __sz)
- : _M_states(), _M_exists(__sz, false)
- { }
-
- void _M_push(_StateIdT __u)
- {
- _GLIBCXX_DEBUG_ASSERT(__u < _M_exists.size());
- if (!_M_exists[__u])
- {
- _M_exists[__u] = true;
- _M_states.push_back(__u);
- }
- }
-
- _StateIdT _M_pop()
- {
- auto __ret = _M_states.back();
- _M_states.pop_back();
- _M_exists[__ret] = false;
- return __ret;
- }
-
- bool _M_empty() const
- { return _M_states.empty(); }
-
- void _M_clear()
- {
- _M_states.clear();
- _M_exists.assign(_M_exists.size(), false);
- }
-
- private:
- std::vector<_StateIdT> _M_states;
- std::vector<bool> _M_exists;
- };
-
- public:
- _BFSExecutor(_BiIter __begin,
- _BiIter __end,
- _ResultsVec& __results,
- const _RegexT& __re,
- _FlagT __flags)
- : _BaseT(__begin, __end, __results, __re, __flags),
- _M_nfa(__re._M_automaton), _M_match_stack(_M_nfa->size()),
- _M_stack(_M_nfa->size()), _M_start_state(_M_nfa->_M_start())
- { }
-
- private:
- void
- _M_init(_BiIter __cur)
- {
- this->_M_current = __cur;
- _M_covered.clear();
- _ResultsVec& __res(this->_M_results);
- _M_covered[this->_M_start_state] =
- _ResultsPtr(new _ResultsEntry(__res.size(),
- _M_nfa->_M_quant_count));
- _M_stack._M_push(this->_M_start_state);
- }
-
- void
- _M_set_start(_StateIdT __start)
- { _M_start_state = __start; }
-
- bool
- _M_main();
-
- void
- _M_e_closure();
-
- void
- _M_move();
-
- bool
- _M_includes_some();
-
- std::unique_ptr<_BaseT>
- _M_clone() const
- {
- return std::unique_ptr<_BaseT>(new _BFSExecutor(this->_M_current,
- this->_M_end,
- this->_M_results,
- this->_M_re,
- this->_M_flags));
- }
-
- std::shared_ptr<_NFAT> _M_nfa;
- std::map<_StateIdT, _ResultsPtr> _M_covered;
- _TodoList _M_match_stack;
- _TodoList _M_stack;
- _StateIdT _M_start_state;
- // To record global optimal solution.
- _ResultsPtr _M_cur_results;
+ _StateIdT _M_start_state;
+ // Do we have a solution so far?
+ bool _M_has_sol;
};
//@} regex-detail
diff --git a/libstdc++-v3/include/bits/regex_executor.tcc b/libstdc++-v3/include/bits/regex_executor.tcc
index 7c084ad..d3b9a04 100644
--- a/libstdc++-v3/include/bits/regex_executor.tcc
+++ b/libstdc++-v3/include/bits/regex_executor.tcc
@@ -28,22 +28,15 @@
* Do not attempt to use it directly. @headername{regex}
*/
-// See below __get_executor to get what this is talking about. The default
-// value 1 indicated a conservative optimization without giving up worst case
-// performance.
-#ifndef _GLIBCXX_REGEX_DFS_QUANTIFIERS_LIMIT
-#define _GLIBCXX_REGEX_DFS_QUANTIFIERS_LIMIT 1
-#endif
-
namespace std _GLIBCXX_VISIBILITY(default)
{
namespace __detail
{
_GLIBCXX_BEGIN_NAMESPACE_VERSION
- template<typename _BiIter, typename _Alloc,
- typename _CharT, typename _TraitsT>
- bool _Executor<_BiIter, _Alloc, _CharT, _TraitsT>::
+ template<typename _BiIter, typename _Alloc, typename _TraitsT,
+ bool __dfs_mode>
+ bool _Executor<_BiIter, _Alloc, _TraitsT, __dfs_mode>::
_M_search()
{
if (_M_flags & regex_constants::match_continuous)
@@ -51,9 +44,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
auto __cur = _M_begin;
do
{
- _M_match_mode = false;
- _M_init(__cur);
- if (_M_main())
+ _M_current = __cur;
+ if (_M_main<false>())
return true;
}
// Continue when __cur == _M_end
@@ -61,24 +53,141 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
return false;
}
- template<typename _BiIter, typename _Alloc,
- typename _CharT, typename _TraitsT>
- bool _DFSExecutor<_BiIter, _Alloc, _CharT, _TraitsT>::
+ template<typename _BiIter, typename _Alloc, typename _TraitsT,
+ bool __dfs_mode>
+ template<bool __match_mode>
+ bool _Executor<_BiIter, _Alloc, _TraitsT, __dfs_mode>::
+ _M_main()
+ {
+ if (__dfs_mode)
+ {
+ _M_has_sol = false;
+ _M_cur_results = _M_results;
+ _M_dfs<__match_mode>(_M_start_state);
+ return _M_has_sol;
+ }
+ else
+ {
+ // Like the DFS approach, it try every possible state transition;
+ // Unlike DFS, it uses a queue instead of a stack to store matching
+ // states. It's a BFS approach.
+ //
+ // Russ Cox's article(http://swtch.com/~rsc/regexp/regexp1.html)
+ // explained this algorithm clearly.
+ //
+ // Time complexity: o(match_length * match_results.size())
+ // O(match_length * _M_nfa.size()
+ // * match_results.size())
+ // Space complexity: o(_M_nfa.size() + match_results.size())
+ // O(_M_nfa.size() * match_results.size())
+ _M_match_queue->push(make_pair(_M_start_state, _M_results));
+ bool __ret = false;
+ while (1)
+ {
+ _M_has_sol = false;
+ if (_M_match_queue->empty())
+ break;
+ _M_visited->assign(_M_visited->size(), false);
+ auto _M_old_queue = std::move(*_M_match_queue);
+ while (!_M_old_queue.empty())
+ {
+ auto __task = _M_old_queue.front();
+ _M_old_queue.pop();
+ _M_cur_results = __task.second;
+ _M_dfs<__match_mode>(__task.first);
+ }
+ if (!__match_mode)
+ __ret |= _M_has_sol;
+ if (_M_current == _M_end)
+ break;
+ ++_M_current;
+ }
+ if (__match_mode)
+ __ret = _M_has_sol;
+ return __ret;
+ }
+ }
+
+ // Return whether now match the given sub-NFA.
+ template<typename _BiIter, typename _Alloc, typename _TraitsT,
+ bool __dfs_mode>
+ bool _Executor<_BiIter, _Alloc, _TraitsT, __dfs_mode>::
+ _M_lookahead(_State<_Executor<_BiIter, _Alloc, _TraitsT, __dfs_mode>::
+ _CharT, _TraitsT> __state)
+ {
+ _ResultsVec __what(_M_cur_results.size());
+ auto __sub = std::unique_ptr<_Executor>(new _Executor(_M_current,
+ _M_end,
+ __what,
+ _M_re,
+ _M_flags));
+ __sub->_M_start_state = __state._M_alt;
+ if (__sub->_M_search_from_first())
+ {
+ for (size_t __i = 0; __i < __what.size(); __i++)
+ if (__what[__i].matched)
+ _M_cur_results[__i] = __what[__i];
+ return true;
+ }
+ return false;
+ }
+
+ // A _DFSExecutor perform a DFS on given NFA and input string. At the very
+ // beginning the executor stands in the start state, then it try every
+ // possible state transition in current state recursively. Some state
+ // transitions consume input string, say, a single-char-matcher or a
+ // back-reference matcher; some not, like assertion or other anchor nodes.
+ // When the input is exhausted and the current state is an accepting state,
+ // the whole executor return true.
+ //
+ // TODO: This approach is exponentially slow for certain input.
+ // Try to compile the NFA to a DFA.
+ //
+ // Time complexity: o(match_length), O(2^(_M_nfa.size()))
+ // Space complexity: \theta(match_results.size() + match_length)
+ //
+ template<typename _BiIter, typename _Alloc, typename _TraitsT,
+ bool __dfs_mode>
+ template<bool __match_mode>
+ void _Executor<_BiIter, _Alloc, _TraitsT, __dfs_mode>::
_M_dfs(_StateIdT __i)
{
- auto& __current = this->_M_current;
- const auto& __state = (*_M_nfa)[__i];
- bool __ret = false;
+ if (!__dfs_mode)
+ {
+ if ((*_M_visited)[__i])
+ return;
+ (*_M_visited)[__i] = true;
+ }
+
+ const auto& __state = _M_nfa[__i];
switch (__state._M_opcode)
{
case _S_opcode_alternative:
// Greedy or not, this is a question ;)
if (!__state._M_neg)
- __ret = _M_dfs(__state._M_alt)
- || _M_dfs(__state._M_next);
+ {
+ _M_dfs<__match_mode>(__state._M_alt);
+ if (!__dfs_mode || !_M_has_sol)
+ _M_dfs<__match_mode>(__state._M_next);
+ }
else
- __ret = _M_dfs(__state._M_next)
- || _M_dfs(__state._M_alt);
+ {
+ if (__dfs_mode)
+ {
+ _M_dfs<__match_mode>(__state._M_next);
+ if (!_M_has_sol)
+ _M_dfs<__match_mode>(__state._M_alt);
+ }
+ else
+ {
+ if (!_M_has_sol)
+ {
+ _M_dfs<__match_mode>(__state._M_next);
+ if (!_M_has_sol)
+ _M_dfs<__match_mode>(__state._M_alt);
+ }
+ }
+ }
break;
case _S_opcode_subexpr_begin:
// Here's the critical part: if there's nothing changed since last
@@ -88,273 +197,128 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
// Every change on _M_cur_results will be roll back after the
// recursion step finished.
if (!_M_cur_results[__state._M_subexpr].matched
- || _M_cur_results[__state._M_subexpr].first != __current)
+ || _M_cur_results[__state._M_subexpr].first != _M_current)
{
- auto __back = _M_cur_results[__state._M_subexpr].first;
- _M_cur_results[__state._M_subexpr].first = __current;
- __ret = _M_dfs(__state._M_next);
- _M_cur_results[__state._M_subexpr].first = __back;
+ auto& __res = _M_cur_results[__state._M_subexpr];
+ auto __back = __res.first;
+ __res.first = _M_current;
+ _M_dfs<__match_mode>(__state._M_next);
+ __res.first = __back;
}
break;
case _S_opcode_subexpr_end:
- if (_M_cur_results[__state._M_subexpr].second != __current
+ if (_M_cur_results[__state._M_subexpr].second != _M_current
|| _M_cur_results[__state._M_subexpr].matched != true)
{
- auto __back = _M_cur_results[__state._M_subexpr];
- _M_cur_results[__state._M_subexpr].second = __current;
- _M_cur_results[__state._M_subexpr].matched = true;
- __ret = _M_dfs(__state._M_next);
- _M_cur_results[__state._M_subexpr] = __back;
+ auto& __res = _M_cur_results[__state._M_subexpr];
+ auto __back = __res;
+ __res.second = _M_current;
+ __res.matched = true;
+ _M_dfs<__match_mode>(__state._M_next);
+ __res = __back;
}
else
- __ret = _M_dfs(__state._M_next);
+ _M_dfs<__match_mode>(__state._M_next);
break;
case _S_opcode_line_begin_assertion:
- if (this->_M_at_begin())
- __ret = _M_dfs(__state._M_next);
+ if (_M_at_begin())
+ _M_dfs<__match_mode>(__state._M_next);
break;
case _S_opcode_line_end_assertion:
- if (this->_M_at_end())
- __ret = _M_dfs(__state._M_next);
+ if (_M_at_end())
+ _M_dfs<__match_mode>(__state._M_next);
break;
case _S_opcode_word_boundry:
- if (this->_M_word_boundry(__state) == !__state._M_neg)
- __ret = _M_dfs(__state._M_next);
+ if (_M_word_boundry(__state) == !__state._M_neg)
+ _M_dfs<__match_mode>(__state._M_next);
break;
// Here __state._M_alt offers a single start node for a sub-NFA.
// We recursivly invoke our algorithm to match the sub-NFA.
case _S_opcode_subexpr_lookahead:
- if (this->_M_lookahead(__state) == !__state._M_neg)
- __ret = _M_dfs(__state._M_next);
+ if (_M_lookahead(__state) == !__state._M_neg)
+ _M_dfs<__match_mode>(__state._M_next);
break;
case _S_opcode_match:
- if (__current != this->_M_end && __state._M_matches(*__current))
+ if (__dfs_mode)
{
- ++__current;
- __ret = _M_dfs(__state._M_next);
- --__current;
+ if (_M_current != _M_end && __state._M_matches(*_M_current))
+ {
+ ++_M_current;
+ _M_dfs<__match_mode>(__state._M_next);
+ --_M_current;
+ }
}
+ else
+ if (__state._M_matches(*_M_current))
+ _M_match_queue->push(make_pair(__state._M_next, _M_cur_results));
break;
// First fetch the matched result from _M_cur_results as __submatch;
// then compare it with
- // (__current, __current + (__submatch.second - __submatch.first))
+ // (_M_current, _M_current + (__submatch.second - __submatch.first))
// If matched, keep going; else just return to try another state.
case _S_opcode_backref:
{
+ _GLIBCXX_DEBUG_ASSERT(__dfs_mode);
auto& __submatch = _M_cur_results[__state._M_backref_index];
if (!__submatch.matched)
break;
- auto __last = __current;
+ auto __last = _M_current;
for (auto __tmp = __submatch.first;
- __last != this->_M_end && __tmp != __submatch.second;
+ __last != _M_end && __tmp != __submatch.second;
++__tmp)
++__last;
- if (this->_M_re._M_traits.transform(__submatch.first,
+ if (_M_re._M_traits.transform(__submatch.first,
__submatch.second)
- == this->_M_re._M_traits.transform(__current, __last))
+ == _M_re._M_traits.transform(_M_current, __last))
{
- if (__last != __current)
+ if (__last != _M_current)
{
- auto __backup = __current;
- __current = __last;
- __ret = _M_dfs(__state._M_next);
- __current = __backup;
+ auto __backup = _M_current;
+ _M_current = __last;
+ _M_dfs<__match_mode>(__state._M_next);
+ _M_current = __backup;
}
else
- __ret = _M_dfs(__state._M_next);
+ _M_dfs<__match_mode>(__state._M_next);
}
}
break;
case _S_opcode_accept:
- if (this->_M_match_mode)
- __ret = __current == this->_M_end;
- else
- __ret = true;
- if (__current == this->_M_begin
- && (this->_M_flags & regex_constants::match_not_null))
- __ret = false;
- if (__ret)
- this->_M_set_results(_M_cur_results);
- break;
- default:
- _GLIBCXX_DEBUG_ASSERT(false);
- }
- return __ret;
- }
-
- template<typename _BiIter, typename _Alloc,
- typename _CharT, typename _TraitsT>
- bool _BFSExecutor<_BiIter, _Alloc, _CharT, _TraitsT>::
- _M_main()
- {
- _M_e_closure();
- bool __ret = false;
- if (!this->_M_match_mode
- && !(this->_M_flags & regex_constants::match_not_null))
- __ret = _M_includes_some() || __ret;
- while (this->_M_current != this->_M_end)
- {
- _M_move();
- ++this->_M_current;
- if (_M_stack._M_empty())
- break;
- _M_e_closure();
- if (!this->_M_match_mode)
- // To keep regex_search greedy, no "return true" here.
- __ret = _M_includes_some() || __ret;
- }
- if (this->_M_match_mode)
- __ret = _M_includes_some();
- if (__ret)
- this->_M_set_results(_M_cur_results->_M_get());
- _M_match_stack._M_clear();
- _GLIBCXX_DEBUG_ASSERT(_M_stack._M_empty());
- return __ret;
- }
-
- template<typename _BiIter, typename _Alloc,
- typename _CharT, typename _TraitsT>
- void _BFSExecutor<_BiIter, _Alloc, _CharT, _TraitsT>::
- _M_e_closure()
- {
- auto& __current = this->_M_current;
-
- while (!_M_stack._M_empty())
- {
- auto __u = _M_stack._M_pop();
- _GLIBCXX_DEBUG_ASSERT(_M_covered.count(__u));
- const auto& __state = (*_M_nfa)[__u];
-
- // Can be implemented using method, but there will be too many
- // arguments. I would use macro function before C++11, but lambda is
- // a better choice, since hopefully compiler can inline it.
- auto __add_visited_state = [=](_StateIdT __v)
- {
- if (_M_covered.count(__v) == 0)
- {
- _M_covered[__v] =
- _ResultsPtr(new _ResultsEntry(*_M_covered[__u]));
- _M_stack._M_push(__v);
- return;
- }
- auto& __cu = _M_covered[__u];
- auto& __cv = _M_covered[__v];
- if (*__cu < *__cv)
- {
- __cv = _ResultsPtr(new _ResultsEntry(*__cu));
- // if a state is updated, it's outgoing neighbors should be
- // reconsidered too. Push them to the queue.
- _M_stack._M_push(__v);
- }
- };
-
- // Identical to DFS's switch part.
- switch (__state._M_opcode)
+ if (__dfs_mode)
{
- // Needs to maintain quantifier count vector here. A quantifier
- // must be concerned with a alt node.
- case _S_opcode_alternative:
- {
- __add_visited_state(__state._M_next);
- auto& __cu = *_M_covered[__u];
- auto __back = __cu._M_quant_keys[__state._M_quant_index];
- __cu._M_inc(__state._M_quant_index, __state._M_neg);
- __add_visited_state(__state._M_alt);
- __cu._M_quant_keys[__state._M_quant_index] = __back;
- }
- break;
- case _S_opcode_subexpr_begin:
- {
- auto& __sub = (*_M_covered[__u])[__state._M_subexpr];
- if (!__sub.matched || __sub.first != __current)
- {
- auto __back = __sub.first;
- __sub.first = __current;
- __add_visited_state(__state._M_next);
- __sub.first = __back;
- }
- }
- break;
- case _S_opcode_subexpr_end:
- {
- auto& __cu = *_M_covered[__u];
- auto __back = __cu[__state._M_subexpr];
- __cu[__state._M_subexpr].second = __current;
- __cu[__state._M_subexpr].matched = true;
- __add_visited_state(__state._M_next);
- __cu[__state._M_subexpr] = __back;
- }
- break;
- case _S_opcode_line_begin_assertion:
- if (this->_M_at_begin())
- __add_visited_state(__state._M_next);
- break;
- case _S_opcode_line_end_assertion:
- if (this->_M_at_end())
- __add_visited_state(__state._M_next);
- break;
- case _S_opcode_word_boundry:
- if (this->_M_word_boundry(__state) == !__state._M_neg)
- __add_visited_state(__state._M_next);
- break;
- case _S_opcode_subexpr_lookahead:
- if (this->_M_lookahead(__state) == !__state._M_neg)
- __add_visited_state(__state._M_next);
- break;
- case _S_opcode_match:
- _M_match_stack._M_push(__u);
- break;
- case _S_opcode_accept:
- break;
- default:
- _GLIBCXX_DEBUG_ASSERT(false);
+ _GLIBCXX_DEBUG_ASSERT(!_M_has_sol);
+ if (__match_mode)
+ _M_has_sol = _M_current == _M_end;
+ else
+ _M_has_sol = true;
+ if (_M_current == _M_begin
+ && (_M_flags & regex_constants::match_not_null))
+ _M_has_sol = false;
+ if (_M_has_sol)
+ _M_results = _M_cur_results;
}
- }
- }
-
- template<typename _BiIter, typename _Alloc,
- typename _CharT, typename _TraitsT>
- void _BFSExecutor<_BiIter, _Alloc, _CharT, _TraitsT>::
- _M_move()
- {
- decltype(_M_covered) __next;
- while (!_M_match_stack._M_empty())
- {
- auto __u = _M_match_stack._M_pop();
- const auto& __state = (*_M_nfa)[__u];
- auto& __cu = _M_covered[__u];
- if (__state._M_matches(*this->_M_current)
- && (__next.count(__state._M_next) == 0
- || *__cu < *__next[__state._M_next]))
+ else
{
- __next[__state._M_next] = std::move(__cu);
- _M_stack._M_push(__state._M_next);
+ if (_M_current == _M_begin
+ && (_M_flags & regex_constants::match_not_null))
+ break;
+ if (!__match_mode || _M_current == _M_end)
+ if (!_M_has_sol)
+ {
+ _M_has_sol = true;
+ _M_results = _M_cur_results;
+ }
}
+ break;
+ default:
+ _GLIBCXX_DEBUG_ASSERT(false);
}
- _M_covered = move(__next);
- }
-
- template<typename _BiIter, typename _Alloc,
- typename _CharT, typename _TraitsT>
- bool _BFSExecutor<_BiIter, _Alloc, _CharT, _TraitsT>::
- _M_includes_some()
- {
- bool __succ = false;
- for (auto __u : _M_nfa->_M_final_states())
- if (_M_covered.count(__u))
- {
- __succ = true;
- auto& __cu = _M_covered[__u];
- if (_M_cur_results == nullptr || *__cu < *_M_cur_results)
- _M_cur_results = _ResultsPtr(new _ResultsEntry(*__cu));
- }
- return __succ;
}
// Return whether now is at some word boundry.
- template<typename _BiIter, typename _Alloc,
- typename _CharT, typename _TraitsT>
- bool _Executor<_BiIter, _Alloc, _CharT, _TraitsT>::
+ template<typename _BiIter, typename _Alloc, typename _TraitsT,
+ bool __dfs_mode>
+ bool _Executor<_BiIter, _Alloc, _TraitsT, __dfs_mode>::
_M_word_boundry(_State<_CharT, _TraitsT> __state) const
{
// By definition.
@@ -376,54 +340,6 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
return __ans;
}
- template<typename _BiIter, typename _Alloc,
- typename _CharT, typename _TraitsT>
- void _Executor<_BiIter, _Alloc, _CharT, _TraitsT>::
- _M_set_results(_ResultsVec& __cur_results)
- {
- for (size_t __i = 0; __i < __cur_results.size(); ++__i)
- if (__cur_results[__i].matched)
- _M_results[__i] = __cur_results[__i];
- }
-
- enum class _RegexExecutorPolicy : int
- { _S_auto, _S_alternate };
-
- // This function decide which executor to use under given circumstances.
- // The _S_auto policy now is the following: if a NFA has no back-references
- // and has more than _GLIBCXX_REGEX_DFS_QUANTIFIERS_LIMIT quantifiers
- // (*, +, ?), the _BFSExecutor will be used, other wise _DFSExecutor. This is
- // because _DFSExecutor has a exponential upper bound, but better best-case
- // performace. Meanwhile, _BFSExecutor can effectively prevent from
- // exponential-long time matching (which must contains many quantifiers), but
- // it's slower in average.
- //
- // For simple regex, _BFSExecutor could be 2 or more times slower than
- // _DFSExecutor.
- //
- // Of course, _BFSExecutor cannot handle back-references.
- template<typename _BiIter, typename _Alloc,
- typename _CharT, typename _TraitsT,
- _RegexExecutorPolicy __policy>
- std::unique_ptr<_Executor<_BiIter, _Alloc, _CharT, _TraitsT>>
- __get_executor(_BiIter __b,
- _BiIter __e,
- std::vector<sub_match<_BiIter>, _Alloc>& __m,
- const basic_regex<_CharT, _TraitsT>& __re,
- regex_constants::match_flag_type __flags)
- {
- typedef std::unique_ptr<_Executor<_BiIter, _Alloc, _CharT, _TraitsT>>
- _ExecutorPtr;
- typedef _DFSExecutor<_BiIter, _Alloc, _CharT, _TraitsT> _DFSExecutorT;
- typedef _BFSExecutor<_BiIter, _Alloc, _CharT, _TraitsT> _BFSExecutorT;
- if (!__re._M_automaton->_M_has_backref
- && (__policy == _RegexExecutorPolicy::_S_alternate
- || __re._M_automaton->_M_quant_count
- > _GLIBCXX_REGEX_DFS_QUANTIFIERS_LIMIT))
- return _ExecutorPtr(new _BFSExecutorT(__b, __e, __m, __re, __flags));
- return _ExecutorPtr(new _DFSExecutorT(__b, __e, __m, __re, __flags));
- }
-
_GLIBCXX_END_NAMESPACE_VERSION
} // namespace __detail
} // namespace
diff --git a/libstdc++-v3/testsuite/28_regex/algorithms/regex_match/ecma/char/ungreedy.cc b/libstdc++-v3/testsuite/28_regex/algorithms/regex_match/ecma/char/ungreedy.cc
new file mode 100644
index 0000000..ed26ebb
--- /dev/null
+++ b/libstdc++-v3/testsuite/28_regex/algorithms/regex_match/ecma/char/ungreedy.cc
@@ -0,0 +1,50 @@
+// { dg-options "-std=gnu++11" }
+
+//
+// 2013-10-24 Tim Shen <timshen91@gmail.com>
+//
+// Copyright (C) 2013 Free Software Foundation, Inc.
+//
+// This file is part of the GNU ISO C++ Library. This library is free
+// software; you can redistribute it and/or modify it under the
+// terms of the GNU General Public License as published by the
+// Free Software Foundation; either version 3, or (at your option)
+// any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License along
+// with this library; see the file COPYING3. If not see
+// <http://www.gnu.org/licenses/>.
+
+// 28.11.2 regex_match
+// Tests ECMAScript ungreedy match.
+
+#include <regex>
+#include <testsuite_hooks.h>
+#include <testsuite_regex.h>
+
+using namespace __gnu_test;
+using namespace std;
+
+void
+test01()
+{
+ bool test __attribute__((unused)) = true;
+
+ regex re("(a*?)*?");
+ cmatch m;
+ VERIFY(regex_match("a", m, re));
+ VERIFY(m.size() == 2);
+ VERIFY(string(m[0].first, m[0].second) == "a");
+}
+
+int
+main()
+{
+ test01();
+ return 0;
+}
diff --git a/libstdc++-v3/testsuite/28_regex/algorithms/regex_match/extended/string_dispatch_01.cc b/libstdc++-v3/testsuite/28_regex/algorithms/regex_match/extended/string_dispatch_01.cc
deleted file mode 100644
index 50141f0..0000000
--- a/libstdc++-v3/testsuite/28_regex/algorithms/regex_match/extended/string_dispatch_01.cc
+++ /dev/null
@@ -1,72 +0,0 @@
-// { dg-options "-std=gnu++11" }
-
-//
-// 2013-07-29 Tim Shen <timshen91@gmail.com>
-//
-// Copyright (C) 2013 Free Software Foundation, Inc.
-//
-// This file is part of the GNU ISO C++ Library. This library is free
-// software; you can redistribute it and/or modify it under the
-// terms of the GNU General Public License as published by the
-// Free Software Foundation; either version 3, or (at your option)
-// any later version.
-//
-// This library is distributed in the hope that it will be useful,
-// but WITHOUT ANY WARRANTY; without even the implied warranty of
-// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-// GNU General Public License for more details.
-//
-// You should have received a copy of the GNU General Public License along
-// with this library; see the file COPYING3. If not see
-// <http://www.gnu.org/licenses/>.
-
-// 28.11.2 regex_match
-// Tests Extended automatic matcher dispatching against a std::string target.
-
-#include <regex>
-#include <testsuite_hooks.h>
-
-using namespace std;
-
-template<typename _Bi_iter, typename _Alloc,
- typename _Ch_type, typename _Rx_traits>
- void
- fake_match(_Bi_iter __s,
- _Bi_iter __e,
- match_results<_Bi_iter, _Alloc>& __m,
- const basic_regex<_Ch_type, _Rx_traits>& __re,
- regex_constants::match_flag_type __flags
- = regex_constants::match_default)
- {
- using namespace __detail;
- auto& __res = (vector<sub_match<_Bi_iter>, _Alloc>&)(__m);
- VERIFY( (dynamic_cast
- <_DFSExecutor<_Bi_iter, _Alloc, _Ch_type, _Rx_traits>*>
- (&*__get_executor<_Bi_iter, _Alloc, _Ch_type, _Rx_traits,
- _RegexExecutorPolicy::_S_auto>(__s, __e, __res, __re, __flags))
- != nullptr) );
- }
-
-void
-test01()
-{
- bool test __attribute__((unused)) = true;
-
- regex re("()(one(.*))abc\\1"); // backref cause DFS
- const string target("onetwoabc");
- smatch m;
- fake_match(target.begin(), target.end(), m, re);
-
- regex_match(target, m, re);
- VERIFY( m[2].matched );
- VERIFY( m[3].matched );
- VERIFY( std::string(m[2].first, m[2].second) == "onetwo" );
- VERIFY( std::string(m[3].first, m[3].second) == "two" );
-}
-
-int
-main()
-{
- test01();
- return 0;
-}
diff --git a/libstdc++-v3/testsuite/28_regex/algorithms/regex_search/ecma/greedy.cc b/libstdc++-v3/testsuite/28_regex/algorithms/regex_search/ecma/greedy.cc
index 107ced0..5821bba 100644
--- a/libstdc++-v3/testsuite/28_regex/algorithms/regex_search/ecma/greedy.cc
+++ b/libstdc++-v3/testsuite/28_regex/algorithms/regex_search/ecma/greedy.cc
@@ -54,9 +54,9 @@ test01()
VERIFY(regex_search_debug("aaaa", m, regex("(a+)(a+)")));
TEST(1, "aaa");
TEST(2, "a");
- VERIFY(regex_search_debug("aaaa", m, regex("(a+?)(a+)")));
- TEST(1, "a");
- TEST(2, "aaa");
+ VERIFY(regex_search_debug("aaaa", m, regex("(a+)(a+?)")));
+ TEST(1, "aaa");
+ TEST(2, "a");
VERIFY(regex_search_debug("aaaa", m, regex("(a+?)(a+)")));
TEST(1, "a");
TEST(2, "aaa");
diff --git a/libstdc++-v3/testsuite/performance/28_regex/split_bfs.cc b/libstdc++-v3/testsuite/performance/28_regex/split_bfs.cc
new file mode 100644
index 0000000..ecaa96c
--- /dev/null
+++ b/libstdc++-v3/testsuite/performance/28_regex/split_bfs.cc
@@ -0,0 +1,116 @@
+// Copyright (C) 2013 Free Software Foundation, Inc.
+//
+// This file is part of the GNU ISO C++ Library. This library is free
+// software; you can redistribute it and/or modify it under the
+// terms of the GNU General Public License as published by the
+// Free Software Foundation; either version 3, or (at your option)
+// any later version.
+
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+
+// You should have received a copy of the GNU General Public License along
+// with this library; see the file COPYING3. If not see
+// <http://www.gnu.org/licenses/>.
+
+// 2013-10-25 Tim Shen <timshen91@gmail.com>
+
+#define _GLIBCXX_REGEX_DFS_QUANTIFIERS_LIMIT 0
+#include <testsuite_performance.h>
+#include <regex>
+
+using namespace __gnu_test;
+using namespace std;
+
+void split(string s)
+{
+ regex re("\\s+");
+ for (auto it = sregex_token_iterator(s.begin(), s.end(), re, -1);
+ it != sregex_token_iterator();
+ ++it)
+ {
+ }
+}
+
+int main()
+{
+ string source = "\
+// Copyright (C) 2013 Free Software Foundation, Inc.\n\
+//\n\
+// This file is part of the GNU ISO C++ Library. This library is free\n\
+// software; you can redistribute it and/or modify it under the\n\
+// terms of the GNU General Public License as published by the\n\
+// Free Software Foundation; either version 3, or (at your option)\n\
+// any later version.\n\
+\n\
+// This library is distributed in the hope that it will be useful,\n\
+// but WITHOUT ANY WARRANTY; without even the implied warranty of\n\
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the\n\
+// GNU General Public License for more details.\n\
+\n\
+// You should have received a copy of the GNU General Public License along\n\
+// with this library; see the file COPYING3. If not see\n\
+// <http://www.gnu.org/licenses/>.\n\
+\n\
+// 2013-10-08 Tim Shen <timshen91@gmail.com>\n\
+\n\
+#include <testsuite_performance.h>\n\
+#include <regex>\n\
+\n\
+using namespace __gnu_test;\n\
+using namespace std;\n\
+\n\
+void split(string s)\n\
+{\n\
+ regex re(\"\\s+\");\n\
+ for (auto it = sregex_token_iterator(s.begin(), s.end(), re, -1);\n\
+ it != sregex_token_iterator();\n\
+ ++it)\n\
+ {\n\
+ }\n\
+}\n\
+\n\
+int main()\n\
+{\n\
+ string source = \"\";\n\
+ time_counter time;\n\
+ resource_counter resource;\n\
+\n\
+ source = source + source;\n\
+ source = source + source;\n\
+ source = source + source;\n\
+ source = source + source;\n\
+ source = source + source;\n\
+ source = source + source;\n\
+ source = source + source;\n\
+ source = source + source;\n\
+\n\
+ start_counters(time, resource);\n\
+ split(source);\n\
+ stop_counters(time, resource);\n\
+ report_performance(__FILE__, \"\", time, resource);\n\
+\n\
+ return 0;\n\
+}\n";
+
+ time_counter time;
+ resource_counter resource;
+
+ source = source + source;
+ source = source + source;
+ source = source + source;
+ source = source + source;
+ source = source + source;
+ source = source + source;
+ source = source + source;
+ source = source + source;
+
+ start_counters(time, resource);
+ split(source);
+ stop_counters(time, resource);
+ report_performance(__FILE__, "", time, resource);
+
+ return 0;
+}
diff --git a/libstdc++-v3/testsuite/util/testsuite_regex.h b/libstdc++-v3/testsuite/util/testsuite_regex.h
index bc0f3d5..596781a 100644
--- a/libstdc++-v3/testsuite/util/testsuite_regex.h
+++ b/libstdc++-v3/testsuite/util/testsuite_regex.h
@@ -150,7 +150,8 @@ namespace __gnu_test
auto __res2 = __regex_algo_impl<_Bi_iter, _Alloc, _Ch_type, _Rx_traits,
_RegexExecutorPolicy::_S_alternate, true>
(__s, __e, __mm, __re, __flags);
- if (__res1 == __res2 && __m == __mm)
+ // __m is unspecified if return value is false.
+ if (__res1 == __res2 && (!__res1 || __m == __mm))
return __res1;
throw(std::exception());
}