On Sat, Jul 25, 2015 at 8:31 AM, Jonathan Wakely <jwakely@redhat.com> wrote:
On 25/07/15 00:11 -0700, Tim Shen wrote:
It's not a very necessary refactoring, but simply can't resist. :)
I'm not sure of the ::memcpy calls. It looks not very idiomatic, but
std::copy on char* looks even more weird? :/
The ::memcpy(this, &rhs, sizeof(rhs))...) makes me quite
uncomfortable, because _State is not trivially-copyable, although its
_State_base base class _is_ trivially-copyable, and is at the same
address and has the same size ... so I think it's safe.
But couldn't you replace that memcpy with an assignment?
_State_base::operator=(__rhs);
Done.
The implicitly defined assignment operator should do the same thing as
a memcpy.
_State should have a deleted copy assignment operator though (or a
user-provided one that correctly handles the _S_opcode_match case, but
since it's not needed it should just be deleted).
Actually it's needed in _StateSeq::_M_clone, but I defined a normal
member function _State::_M_clone to avoid unexpected copying.
diff --git a/libstdc++-v3/include/bits/regex_automaton.h b/libstdc++-v3/include/bits/regex_automaton.h
index fc0eb41..c9f7bb3 100644
--- a/libstdc++-v3/include/bits/regex_automaton.h
+++ b/libstdc++-v3/include/bits/regex_automaton.h
@@ -70,51 +70,115 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
_S_opcode_accept,
};
- struct _State_base
- {
- _Opcode _M_opcode; // type of outgoing transition
- _StateIdT _M_next; // outgoing transition
- union // Since they are mutually exclusive.
+ template<size_t _Matcher_size>
+ struct _State_base
{
- size_t _M_subexpr; // for _S_opcode_subexpr_*
- size_t _M_backref_index; // for _S_opcode_backref
- struct
+ protected:
+ _Opcode _M_opcode; // type of outgoing transition
+
+ public:
+ _StateIdT _M_next; // outgoing transition
+ union // Since they are mutually exclusive.
{
- // for _S_opcode_alternative, _S_opcode_repeat and
- // _S_opcode_subexpr_lookahead
- _StateIdT _M_alt;
- // for _S_opcode_word_boundary or _S_opcode_subexpr_lookahead or
- // quantifiers (ungreedy if set true)
- bool _M_neg;
+ size_t _M_subexpr; // for _S_opcode_subexpr_*
+ size_t _M_backref_index; // for _S_opcode_backref
+ struct
+ {
+ // for _S_opcode_alternative, _S_opcode_repeat and
+ // _S_opcode_subexpr_lookahead
+ _StateIdT _M_alt;
+ // for _S_opcode_word_boundary or _S_opcode_subexpr_lookahead or
+ // quantifiers (ungreedy if set true)
+ bool _M_neg;
+ };
+ char _M_matcher_storage[_Matcher_size]; // for _S_opcode_match
};
- };
- explicit _State_base(_Opcode __opcode)
- : _M_opcode(__opcode), _M_next(_S_invalid_state_id)
- { }
+ protected:
+ _State_base() : _M_opcode(_S_opcode_unknown) { }
- protected:
- ~_State_base() = default;
+ explicit _State_base(_Opcode __opcode)
+ : _M_opcode(__opcode), _M_next(_S_invalid_state_id)
+ { }
+
+ public:
+ bool
+ _M_has_alt()
+ {
+ return _M_opcode == _S_opcode_alternative
+ || _M_opcode == _S_opcode_repeat
+ || _M_opcode == _S_opcode_subexpr_lookahead;
+ }
- public:
#ifdef _GLIBCXX_DEBUG
- std::ostream&
- _M_print(std::ostream& ostr) const;
+ std::ostream&
+ _M_print(std::ostream& ostr) const;
- // Prints graphviz dot commands for state.
- std::ostream&
- _M_dot(std::ostream& __ostr, _StateIdT __id) const;
+ // Prints graphviz dot commands for state.
+ std::ostream&
+ _M_dot(std::ostream& __ostr, _StateIdT __id) const;
#endif
- };
+ };
- template<typename _TraitsT>
- struct _State : _State_base
+ template<typename _Char_type>
+ struct _State : _State_base<sizeof(_Matcher<_Char_type>)>
{
- typedef _Matcher<typename _TraitsT::char_type> _MatcherT;
+ typedef _Matcher<_Char_type> _MatcherT;
+ typedef _State_base<sizeof(_MatcherT)> _Base_type;
- _MatcherT _M_matches; // for _S_opcode_match
+ explicit
+ _State(_Opcode __opcode) : _Base_type(__opcode)
+ {
+ if (_M_opcode() == _S_opcode_match)
+ new (this->_M_matcher_storage) _MatcherT();
+ }
+
+ _State(_State&& __rhs)
+ {
+ _Base_type::operator=(__rhs);
+ if (__rhs._M_opcode() == _S_opcode_match)
+ new (this->_M_matcher_storage)
+ _MatcherT(std::move(__rhs._M_get_matcher()));
+ }
+
+ ~_State()
+ {
+ if (_M_opcode() == _S_opcode_match)
+ _M_get_matcher().~_MatcherT();
+ }
- explicit _State(_Opcode __opcode) : _State_base(__opcode) { }
+ // Since correct ctor and dtor rely on _M_opcode, it's better not to
+ // change it over time.
+ _Opcode
+ _M_opcode() const
+ { return _Base_type::_M_opcode; }
+
+ bool
+ _M_matches(_Char_type __char) const
+ { return _M_get_matcher()(__char); }
+
+ _MatcherT&
+ _M_get_matcher()
+ { return *reinterpret_cast<_MatcherT*>(this->_M_matcher_storage); }
+
+ const _MatcherT&
+ _M_get_matcher() const
+ { return *reinterpret_cast<const _MatcherT*>(this->_M_matcher_storage); }
+
+ _State
+ _M_clone() const
+ { return *this; }
+
+ private:
+ _State() = default;
+
+ _State(const _State& __rhs)
+ {
+ _Base_type::operator=(__rhs);
+ if (__rhs._M_opcode() == _S_opcode_match)
+ new (this->_M_matcher_storage)
+ _MatcherT(__rhs._M_get_matcher());
+ }
};
struct _NFA_base
@@ -155,10 +219,11 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
template<typename _TraitsT>
struct _NFA
- : _NFA_base, std::vector<_State<_TraitsT>>
+ : _NFA_base, std::vector<_State<typename _TraitsT::char_type>>
{
- typedef _State<_TraitsT> _StateT;
- typedef _Matcher<typename _TraitsT::char_type> _MatcherT;
+ typedef typename _TraitsT::char_type _Char_type;
+ typedef _State<_Char_type> _StateT;
+ typedef _Matcher<_Char_type> _MatcherT;
_NFA(const typename _TraitsT::locale_type& __loc, _FlagT __flags)
: _NFA_base(__flags)
@@ -202,7 +267,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
_M_insert_matcher(_MatcherT __m)
{
_StateT __tmp(_S_opcode_match);
- __tmp._M_matches = std::move(__m);
+ __tmp._M_get_matcher() = std::move(__m);
return _M_insert_state(std::move(__tmp));
}
diff --git a/libstdc++-v3/include/bits/regex_automaton.tcc b/libstdc++-v3/include/bits/regex_automaton.tcc
index 72fe978..0f5187c 100644
--- a/libstdc++-v3/include/bits/regex_automaton.tcc
+++ b/libstdc++-v3/include/bits/regex_automaton.tcc
@@ -35,101 +35,103 @@ namespace __detail
_GLIBCXX_BEGIN_NAMESPACE_VERSION
#ifdef _GLIBCXX_DEBUG
- inline std::ostream&
- _State_base::_M_print(std::ostream& ostr) const
- {
- switch (_M_opcode)
+ template<size_t _Matcher_size>
+ std::ostream&
+ _State_base<_Matcher_size>::_M_print(std::ostream& ostr) const
{
- case _S_opcode_alternative:
- case _S_opcode_repeat:
- ostr << "alt next=" << _M_next << " alt=" << _M_alt;
- break;
- case _S_opcode_subexpr_begin:
- ostr << "subexpr begin next=" << _M_next << " index=" << _M_subexpr;
- break;
- case _S_opcode_subexpr_end:
- ostr << "subexpr end next=" << _M_next << " index=" << _M_subexpr;
- break;
- case _S_opcode_backref:
- ostr << "backref next=" << _M_next << " index=" << _M_backref_index;
- break;
- case _S_opcode_match:
- ostr << "match next=" << _M_next;
- break;
- case _S_opcode_accept:
- ostr << "accept next=" << _M_next;
- break;
- default:
- ostr << "unknown next=" << _M_next;
- break;
+ switch (_M_opcode)
+ {
+ case _S_opcode_alternative:
+ case _S_opcode_repeat:
+ ostr << "alt next=" << _M_next << " alt=" << _M_alt;
+ break;
+ case _S_opcode_subexpr_begin:
+ ostr << "subexpr begin next=" << _M_next << " index=" << _M_subexpr;
+ break;
+ case _S_opcode_subexpr_end:
+ ostr << "subexpr end next=" << _M_next << " index=" << _M_subexpr;
+ break;
+ case _S_opcode_backref:
+ ostr << "backref next=" << _M_next << " index=" << _M_backref_index;
+ break;
+ case _S_opcode_match:
+ ostr << "match next=" << _M_next;
+ break;
+ case _S_opcode_accept:
+ ostr << "accept next=" << _M_next;
+ break;
+ default:
+ ostr << "unknown next=" << _M_next;
+ break;
+ }
+ return ostr;
}
- return ostr;
- }
// Prints graphviz dot commands for state.
- inline std::ostream&
- _State_base::_M_dot(std::ostream& __ostr, _StateIdT __id) const
- {
- switch (_M_opcode)
+ template<size_t _Matcher_size>
+ std::ostream&
+ _State_base<_Matcher_size>::_M_dot(std::ostream& __ostr, _StateIdT __id) const
{
- case _S_opcode_alternative:
- case _S_opcode_repeat:
- __ostr << __id << " [label=\"" << __id << "\\nALT\"];\n"
- << __id << " -> " << _M_next
- << " [label=\"next\", tailport=\"s\"];\n"
- << __id << " -> " << _M_alt
- << " [label=\"alt\", tailport=\"n\"];\n";
- break;
- case _S_opcode_backref:
- __ostr << __id << " [label=\"" << __id << "\\nBACKREF "
- << _M_subexpr << "\"];\n"
- << __id << " -> " << _M_next << " [label=\"<match>\"];\n";
- break;
- case _S_opcode_line_begin_assertion:
- __ostr << __id << " [label=\"" << __id << "\\nLINE_BEGIN \"];\n"
- << __id << " -> " << _M_next << " [label=\"epsilon\"];\n";
- break;
- case _S_opcode_line_end_assertion:
- __ostr << __id << " [label=\"" << __id << "\\nLINE_END \"];\n"
- << __id << " -> " << _M_next << " [label=\"epsilon\"];\n";
- break;
- case _S_opcode_word_boundary:
- __ostr << __id << " [label=\"" << __id << "\\nWORD_BOUNDRY "
- << _M_neg << "\"];\n"
- << __id << " -> " << _M_next << " [label=\"epsilon\"];\n";
- break;
- case _S_opcode_subexpr_lookahead:
- __ostr << __id << " [label=\"" << __id << "\\nLOOK_AHEAD\"];\n"
- << __id << " -> " << _M_next
- << " [label=\"epsilon\", tailport=\"s\"];\n"
- << __id << " -> " << _M_alt
- << " [label=\"<assert>\", tailport=\"n\"];\n";
- break;
- case _S_opcode_subexpr_begin:
- __ostr << __id << " [label=\"" << __id << "\\nSBEGIN "
- << _M_subexpr << "\"];\n"
- << __id << " -> " << _M_next << " [label=\"epsilon\"];\n";
- break;
- case _S_opcode_subexpr_end:
- __ostr << __id << " [label=\"" << __id << "\\nSEND "
- << _M_subexpr << "\"];\n"
- << __id << " -> " << _M_next << " [label=\"epsilon\"];\n";
- break;
- case _S_opcode_dummy:
- break;
- case _S_opcode_match:
- __ostr << __id << " [label=\"" << __id << "\\nMATCH\"];\n"
- << __id << " -> " << _M_next << " [label=\"<match>\"];\n";
- break;
- case _S_opcode_accept:
- __ostr << __id << " [label=\"" << __id << "\\nACC\"];\n" ;
- break;
- default:
- _GLIBCXX_DEBUG_ASSERT(false);
- break;
+ switch (_M_opcode)
+ {
+ case _S_opcode_alternative:
+ case _S_opcode_repeat:
+ __ostr << __id << " [label=\"" << __id << "\\nALT\"];\n"
+ << __id << " -> " << _M_next
+ << " [label=\"next\", tailport=\"s\"];\n"
+ << __id << " -> " << _M_alt
+ << " [label=\"alt\", tailport=\"n\"];\n";
+ break;
+ case _S_opcode_backref:
+ __ostr << __id << " [label=\"" << __id << "\\nBACKREF "
+ << _M_subexpr << "\"];\n"
+ << __id << " -> " << _M_next << " [label=\"<match>\"];\n";
+ break;
+ case _S_opcode_line_begin_assertion:
+ __ostr << __id << " [label=\"" << __id << "\\nLINE_BEGIN \"];\n"
+ << __id << " -> " << _M_next << " [label=\"epsilon\"];\n";
+ break;
+ case _S_opcode_line_end_assertion:
+ __ostr << __id << " [label=\"" << __id << "\\nLINE_END \"];\n"
+ << __id << " -> " << _M_next << " [label=\"epsilon\"];\n";
+ break;
+ case _S_opcode_word_boundary:
+ __ostr << __id << " [label=\"" << __id << "\\nWORD_BOUNDRY "
+ << _M_neg << "\"];\n"
+ << __id << " -> " << _M_next << " [label=\"epsilon\"];\n";
+ break;
+ case _S_opcode_subexpr_lookahead:
+ __ostr << __id << " [label=\"" << __id << "\\nLOOK_AHEAD\"];\n"
+ << __id << " -> " << _M_next
+ << " [label=\"epsilon\", tailport=\"s\"];\n"
+ << __id << " -> " << _M_alt
+ << " [label=\"<assert>\", tailport=\"n\"];\n";
+ break;
+ case _S_opcode_subexpr_begin:
+ __ostr << __id << " [label=\"" << __id << "\\nSBEGIN "
+ << _M_subexpr << "\"];\n"
+ << __id << " -> " << _M_next << " [label=\"epsilon\"];\n";
+ break;
+ case _S_opcode_subexpr_end:
+ __ostr << __id << " [label=\"" << __id << "\\nSEND "
+ << _M_subexpr << "\"];\n"
+ << __id << " -> " << _M_next << " [label=\"epsilon\"];\n";
+ break;
+ case _S_opcode_dummy:
+ break;
+ case _S_opcode_match:
+ __ostr << __id << " [label=\"" << __id << "\\nMATCH\"];\n"
+ << __id << " -> " << _M_next << " [label=\"<match>\"];\n";
+ break;
+ case _S_opcode_accept:
+ __ostr << __id << " [label=\"" << __id << "\\nACC\"];\n" ;
+ break;
+ default:
+ _GLIBCXX_DEBUG_ASSERT(false);
+ break;
+ }
+ return __ostr;
}
- return __ostr;
- }
template<typename _TraitsT>
std::ostream&
@@ -174,13 +176,11 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
{
for (auto& __it : *this)
{
- while (__it._M_next >= 0 && (*this)[__it._M_next]._M_opcode
+ while (__it._M_next >= 0 && (*this)[__it._M_next]._M_opcode()
== _S_opcode_dummy)
__it._M_next = (*this)[__it._M_next]._M_next;
- if (__it._M_opcode == _S_opcode_alternative
- || __it._M_opcode == _S_opcode_repeat
- || __it._M_opcode == _S_opcode_subexpr_lookahead)
- while (__it._M_alt >= 0 && (*this)[__it._M_alt]._M_opcode
+ if (__it._M_has_alt())
+ while (__it._M_alt >= 0 && (*this)[__it._M_alt]._M_opcode()
== _S_opcode_dummy)
__it._M_alt = (*this)[__it._M_alt]._M_next;
}
@@ -198,13 +198,11 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
{
auto __u = __stack.top();
__stack.pop();
- auto __dup = _M_nfa[__u];
+ auto __dup = _M_nfa[__u]._M_clone();
// _M_insert_state() never return -1
- auto __id = _M_nfa._M_insert_state(__dup);
+ auto __id = _M_nfa._M_insert_state(std::move(__dup));
__m[__u] = __id;
- if (__dup._M_opcode == _S_opcode_alternative
- || __dup._M_opcode == _S_opcode_repeat
- || __dup._M_opcode == _S_opcode_subexpr_lookahead)
+ if (__dup._M_has_alt())
if (__dup._M_alt != _S_invalid_state_id
&& __m.count(__dup._M_alt) == 0)
__stack.push(__dup._M_alt);
@@ -223,9 +221,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
_GLIBCXX_DEBUG_ASSERT(__m.count(__ref._M_next) > 0);
__ref._M_next = __m[__ref._M_next];
}
- if (__ref._M_opcode == _S_opcode_alternative
- || __ref._M_opcode == _S_opcode_repeat
- || __ref._M_opcode == _S_opcode_subexpr_lookahead)
+ if (__ref._M_has_alt())
if (__ref._M_alt != _S_invalid_state_id)
{
_GLIBCXX_DEBUG_ASSERT(__m.count(__ref._M_alt) > 0);
diff --git a/libstdc++-v3/include/bits/regex_executor.h b/libstdc++-v3/include/bits/regex_executor.h
index 404f30b..c97e056 100644
--- a/libstdc++-v3/include/bits/regex_executor.h
+++ b/libstdc++-v3/include/bits/regex_executor.h
@@ -148,7 +148,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
_M_word_boundary() const;
bool
- _M_lookahead(_State<_TraitsT> __state);
+ _M_lookahead(const _State<_CharT>& __state);
// Holds additional information used in BFS-mode.
template<typename _SearchMode, typename _ResultsVec>
diff --git a/libstdc++-v3/include/bits/regex_executor.tcc b/libstdc++-v3/include/bits/regex_executor.tcc
index 9b5c1c6..1bc7739 100644
--- a/libstdc++-v3/include/bits/regex_executor.tcc
+++ b/libstdc++-v3/include/bits/regex_executor.tcc
@@ -145,7 +145,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
template<typename _BiIter, typename _Alloc, typename _TraitsT,
bool __dfs_mode>
bool _Executor<_BiIter, _Alloc, _TraitsT, __dfs_mode>::
- _M_lookahead(_State<_TraitsT> __state)
+ _M_lookahead(const _State<_CharT>& __state)
{
_ResultsVec __what(_M_cur_results.size());
_Executor __sub(_M_current, _M_end, __what, _M_re, _M_flags);
@@ -203,7 +203,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
const auto& __state = _M_nfa[__i];
// Every change on _M_cur_results and _M_current will be rolled back after
// finishing the recursion step.
- switch (__state._M_opcode)
+ switch (__state._M_opcode())
{
// _M_alt branch is "match once more", while _M_next is "get me out
// of this quantifier". Executing _M_next first or _M_alt first don't