diff options
author | Tim Shen <timshen91@gmail.com> | 2013-07-30 12:02:55 +0000 |
---|---|---|
committer | Tim Shen <timshen@gcc.gnu.org> | 2013-07-30 12:02:55 +0000 |
commit | a6dc77bc3d174d5f2cd7e81a5fbc9e96713eb19b (patch) | |
tree | 3806c0e659bf08c57518f2bf0aca08fb7785d419 | |
parent | 605e86fa3f9faa7f244ead20033aa1263d635c21 (diff) | |
download | gcc-a6dc77bc3d174d5f2cd7e81a5fbc9e96713eb19b.zip gcc-a6dc77bc3d174d5f2cd7e81a5fbc9e96713eb19b.tar.gz gcc-a6dc77bc3d174d5f2cd7e81a5fbc9e96713eb19b.tar.bz2 |
Thompson matcher refactored.
2013-07-30 Tim Shen <timshen91@gmail.com>
Thompson matcher refactored. Fix grouping problem.
* include/bits/regex.h: Use a dispatcher _M_get_matcher().
* include/bits/regex_compiler.h: Tweak for auto switching.
* include/bits/regex_grep_matcher.h: Class structure.
* include/bits/regex_grep_matcher.tcc: _BFSMatcher(Thompson
matcher) refactoring.
* include/bits/regex_nfa.h: Change _Results's interfaces.
* include/std/regex: Includes <map> and <queue>.
* testsuite/28_regex/algorithms/regex_match/extended/53622.cc:
For both matchers.
* testsuite/28_regex/algorithms/regex_match/extended/57173.cc:
For both matchers.
* testsuite/28_regex/algorithms/regex_match/extended/string_dispatch_01.cc:
New.
From-SVN: r201334
-rw-r--r-- | libstdc++-v3/ChangeLog | 17 | ||||
-rw-r--r-- | libstdc++-v3/include/bits/regex.h | 9 | ||||
-rw-r--r-- | libstdc++-v3/include/bits/regex_compiler.h | 3 | ||||
-rw-r--r-- | libstdc++-v3/include/bits/regex_grep_matcher.h | 151 | ||||
-rw-r--r-- | libstdc++-v3/include/bits/regex_grep_matcher.tcc | 258 | ||||
-rw-r--r-- | libstdc++-v3/include/bits/regex_nfa.h | 50 | ||||
-rw-r--r-- | libstdc++-v3/include/std/regex | 2 | ||||
-rw-r--r-- | libstdc++-v3/testsuite/28_regex/algorithms/regex_match/extended/53622.cc | 35 | ||||
-rw-r--r-- | libstdc++-v3/testsuite/28_regex/algorithms/regex_match/extended/57173.cc | 23 | ||||
-rw-r--r-- | libstdc++-v3/testsuite/28_regex/algorithms/regex_match/extended/string_dispatch_01.cc | 71 |
10 files changed, 419 insertions, 200 deletions
diff --git a/libstdc++-v3/ChangeLog b/libstdc++-v3/ChangeLog index edb17ea..db9a035 100644 --- a/libstdc++-v3/ChangeLog +++ b/libstdc++-v3/ChangeLog @@ -1,3 +1,20 @@ +2013-07-30 Tim Shen <timshen91@gmail.com> + + Thompson matcher refactored. Fix grouping problem. + * include/bits/regex.h: Use a dispatcher _M_get_matcher(). + * include/bits/regex_compiler.h: Tweak for auto switching. + * include/bits/regex_grep_matcher.h: Class structure. + * include/bits/regex_grep_matcher.tcc: _BFSMatcher(Thompson + matcher) refactoring. + * include/bits/regex_nfa.h: Change _Results's interfaces. + * include/std/regex: Includes <map> and <queue>. + * testsuite/28_regex/algorithms/regex_match/extended/53622.cc: + For both matchers. + * testsuite/28_regex/algorithms/regex_match/extended/57173.cc: + For both matchers. + * testsuite/28_regex/algorithms/regex_match/extended/string_dispatch_01.cc: + New. + 2013-07-29 Nathan Froyd <froydnj@gcc.gnu.org> * include/std/atomic (compare_exchange_weak, compare_exchange_strong): diff --git a/libstdc++-v3/include/bits/regex.h b/libstdc++-v3/include/bits/regex.h index 5692848..c1f6533 100644 --- a/libstdc++-v3/include/bits/regex.h +++ b/libstdc++-v3/include/bits/regex.h @@ -2175,7 +2175,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION bool regex_match(_Bi_iter __s, _Bi_iter __e, - match_results<_Bi_iter, _Alloc>& __m, + match_results<_Bi_iter, _Alloc>& __m, const basic_regex<_Ch_type, _Rx_traits>& __re, regex_constants::match_flag_type __flags = regex_constants::match_default) @@ -2184,8 +2184,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION __detail::_Automaton::_SizeT __sz = __a->_M_sub_count(); __detail::_SpecializedCursor<_Bi_iter> __cs(__s, __e); __detail::_SpecializedResults<_Bi_iter, _Alloc> __r(__sz, __cs, __m); - __detail::_Grep_matcher __matcher(__cs, __r, __a, __flags); - return __matcher._M_dfs_match(); + return __a->_M_get_matcher(__cs, __r, __a, __flags)->_M_match(); } /** @@ -2336,8 +2335,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION for (auto __cur = __first; __cur != __last; ++__cur) // Any KMP-like algo? { __detail::_SpecializedCursor<_Bi_iter> __curs(__cur, __last); - __detail::_Grep_matcher __matcher(__curs, __r, __a, __flags); - if (__matcher._M_dfs_search_from_first()) + auto __matcher = __a->_M_get_matcher(__curs, __r, __a, __flags); + if (__matcher->_M_search_from_first()) { __r._M_set_range(__m.size(), __detail::_SpecializedCursor<_Bi_iter> diff --git a/libstdc++-v3/include/bits/regex_compiler.h b/libstdc++-v3/include/bits/regex_compiler.h index dae0948..badea8c 100644 --- a/libstdc++-v3/include/bits/regex_compiler.h +++ b/libstdc++-v3/include/bits/regex_compiler.h @@ -936,7 +936,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION if (_M_match_token(_ScannerT::_S_token_backref)) { // __m.push(_Matcher::_S_opcode_ordchar, _M_cur_value); - return true; + _M_state_store._M_set_back_ref(true); + //return true; } if (_M_match_token(_ScannerT::_S_token_subexpr_begin)) { diff --git a/libstdc++-v3/include/bits/regex_grep_matcher.h b/libstdc++-v3/include/bits/regex_grep_matcher.h index 8686cc9..d9270e5 100644 --- a/libstdc++-v3/include/bits/regex_grep_matcher.h +++ b/libstdc++-v3/include/bits/regex_grep_matcher.h @@ -60,6 +60,19 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION const _SpecializedCursor<_FwdIterT>& __cursor, match_results<_FwdIterT, _Alloc>& __m); + ~_SpecializedResults() + { + if (_M_managed) + delete &_M_results; + } + + private: + _SpecializedResults(const _SpecializedResults& __rhs) + : _M_results(*new match_results<_FwdIterT, _Alloc>(__rhs._M_results)), + _M_managed(true) + { } + + public: void _M_set_pos(int __i, int __j, const _PatternCursor& __pc); @@ -76,8 +89,20 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION _M_set_matched(int __i, bool __is_matched) { _M_results.at(__i).matched = __is_matched; } + std::unique_ptr<_Results> + _M_clone() const + { return unique_ptr<_Results>(new _SpecializedResults(*this)); } + + void + _M_assign(const _Results& __rhs) + { + auto __r = static_cast<const _SpecializedResults*>(&__rhs); + _M_results = __r->_M_results; + } + private: match_results<_FwdIterT, _Alloc>& _M_results; + bool _M_managed; }; template<typename _FwdIterT, typename _Alloc> @@ -85,7 +110,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION _SpecializedResults(const _Automaton::_SizeT __size, const _SpecializedCursor<_FwdIterT>& __cursor, match_results<_FwdIterT, _Alloc>& __m) - : _M_results(__m) + : _M_results(__m), _M_managed(false) { _M_results.clear(); _M_results.reserve(__size + 2); @@ -105,14 +130,11 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION typedef const _SpecializedCursor<_FwdIterT>& _CursorT; _CursorT __c = static_cast<_CursorT>(__pc); if (__j == 0) - _M_results.at(__i).first = __c._M_pos(); + _M_results.at(__i).first = __c._M_pos(); else _M_results.at(__i).second = __c._M_pos(); } - /// A stack of states used in evaluating the NFA. - typedef std::stack<_StateIdT, std::vector<_StateIdT> > _StateStack; - /// Executes a regular expression NFA/DFA over a range using a /// variant of the parallel execution algorithm featured in the grep /// utility, modified to use Laurikari tags. @@ -124,47 +146,126 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION const _AutomatonPtr& __automaton, regex_constants::match_flag_type __flags) : _M_nfa(static_pointer_cast<_Nfa>(__automaton)), - _M_pattern(__p), _M_results(__r) + _M_str_cur(__p), _M_results(__r) + { } + + virtual + ~_Grep_matcher() { } // Set matched when string exactly match the pattern. - void - _M_match(); + virtual bool + _M_match() = 0; // Set matched when some prefix of the string matches the pattern. - void - _M_search_from_first(); + virtual bool + _M_search_from_first() = 0; + + protected: + const std::shared_ptr<_Nfa> _M_nfa; + _PatternCursor& _M_str_cur; + _Results& _M_results; + }; + + // Time complexity: exponential + // Space complexity: O(_M_str_cur.size()) + // _M_dfs() take a state, along with current string cursor(_M_str_cur), + // trying to match current state with current character. + // Only _S_opcode_match will consume a character. + class _DFSMatcher + : public _Grep_matcher + { + public: + _DFSMatcher(_PatternCursor& __p, + _Results& __r, + const _AutomatonPtr& __automaton, + regex_constants::match_flag_type __flags) + : _Grep_matcher(__p, __r, __automaton, __flags) + { } - // TODO: in the future this function will be _M_match, in another class. bool - _M_dfs_match() + _M_match() { return _M_dfs<true>(_M_nfa->_M_start()); } - // TODO: in the future this function will be _M_search_from_first, - // in another class. bool - _M_dfs_search_from_first() + _M_search_from_first() { return _M_dfs<false>(_M_nfa->_M_start()); } private: - _StateSet - _M_e_closure(_StateIdT __i); + template<bool __match_mode> + bool + _M_dfs(_StateIdT __i); + }; + + // It's essentially a variant of Single-Source-Shortest-Path problem, where, + // the matching results is the final distance and should be minimized. + // Instead of using Dijkstra Algorithm, I pick up the queue-optimizaed + // (BFS-like) Bellman-Ford algorithm, + // SPFA(http://en.wikipedia.org/wiki/Shortest_Path_Faster_Algorithm). + // + // Every entry of _M_current saves the solution(grouping status) for every + // matching head. When states transfer, solutions will be compared and + // deduplicated(based on which greedy mode we have). + // + // Time complexity: O(_M_str_cur.size() * _M_nfa.size()) + // Space complexity: O(_M_nfa.size() * _M_nfa.mark_count()) + class _BFSMatcher + : public _Grep_matcher + { + public: + _BFSMatcher(_PatternCursor& __p, + _Results& __r, + const _AutomatonPtr& __automaton, + regex_constants::match_flag_type __flags) + : _Grep_matcher(__p, __r, __automaton, __flags) + { + if (_M_nfa->_M_start() != _S_invalid_state_id) + _M_current[_M_nfa->_M_start()] = _M_results._M_clone(); + _M_e_closure(); + } - _StateSet - _M_e_closure(const _StateSet& __s); + bool + _M_match() + { return _M_main_loop<true>(); } - _StateSet - _M_e_closure(_StateStack& __stack, const _StateSet& __s); + bool + _M_search_from_first() + { return _M_main_loop<false>(); } + private: template<bool __match_mode> bool - _M_dfs(_StateIdT __i); + _M_main_loop(); - const std::shared_ptr<_Nfa> _M_nfa; - _PatternCursor& _M_pattern; - _Results& _M_results; + void + _M_e_closure(); + + void + _M_move(); + + bool + _M_match_less_than(_StateIdT __u, _StateIdT __v) const; + + bool + _M_includes_some() const; + + std::map<_StateIdT, std::unique_ptr<_Results>> _M_current; }; + std::unique_ptr<_Grep_matcher> _Nfa:: + _M_get_matcher(_PatternCursor& __p, + _Results& __r, + const _AutomatonPtr& __a, + regex_constants::match_flag_type __flags) + { + if (_M_has_back_ref) + return unique_ptr<_Grep_matcher>( + new _DFSMatcher(__p, __r, __a, __flags)); + else + return unique_ptr<_Grep_matcher>( + new _BFSMatcher(__p, __r, __a, __flags)); + } + //@} regex-detail _GLIBCXX_END_NAMESPACE_VERSION } // namespace __detail diff --git a/libstdc++-v3/include/bits/regex_grep_matcher.tcc b/libstdc++-v3/include/bits/regex_grep_matcher.tcc index dccdfda..2cfc790 100644 --- a/libstdc++-v3/include/bits/regex_grep_matcher.tcc +++ b/libstdc++-v3/include/bits/regex_grep_matcher.tcc @@ -32,83 +32,13 @@ namespace std _GLIBCXX_VISIBILITY(default) { -namespace -{ - // A stack of states used in evaluating the NFA. - typedef std::stack<std::__detail::_StateIdT, - std::vector<std::__detail::_StateIdT> - > _StateStack; - - // Obtains the next state set given the current state set __s and the current - // input character. - inline std::__detail::_StateSet - __move(const std::__detail::_PatternCursor& __p, - const std::__detail::_Nfa& __nfa, - const std::__detail::_StateSet& __s) - { - std::__detail::_StateSet __m; - for (std::__detail::_StateSet::const_iterator __i = __s.begin(); - __i != __s.end(); ++__i) - { - if (*__i == std::__detail::_S_invalid_state_id) - continue; - - const std::__detail::_State& __state = __nfa[*__i]; - if (__state._M_opcode == std::__detail::_S_opcode_match - && __state._M_matches(__p)) - __m.insert(__state._M_next); - } - return __m; - } - - // returns true if (__s intersect __t) is not empty - inline bool - __includes_some(const std::__detail::_StateSet& __s, - const std::__detail::_StateSet& __t) - { - if (__s.size() > 0 && __t.size() > 0) - { - std::__detail::_StateSet::const_iterator __first = __s.begin(); - std::__detail::_StateSet::const_iterator __second = __t.begin(); - while (__first != __s.end() && __second != __t.end()) - { - if (*__first < *__second) - ++__first; - else if (*__second < *__first) - ++__second; - else - return true; - } - } - return false; - } - - // If an identified state __u is not already in the current state set __e, - // insert it and push it on the current state stack __s. - inline void - __add_visited_state(const std::__detail::_StateIdT __u, - _StateStack& __s, - std::__detail::_StateSet& __e) - { - if (__e.count(__u) == 0) - { - __e.insert(__u); - __s.push(__u); - } - } - -} // anonymous namespace - namespace __detail { _GLIBCXX_BEGIN_NAMESPACE_VERSION - // _M_dfs() take a state, along with current string cursor(_M_pattern), - // trying to match current state with current character. - // Only _S_opcode_match will consume a character. // TODO: This is too slow. Try to compile the NFA to a DFA. template<bool __match_mode> - bool _Grep_matcher:: + bool _DFSMatcher:: _M_dfs(_StateIdT __i) { if (__i == _S_invalid_state_id) @@ -126,116 +56,162 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION || _M_dfs<__match_mode>(__state._M_next); break; case _S_opcode_subexpr_begin: - __state._M_tagger(_M_pattern, _M_results); + __state._M_tagger(_M_str_cur, _M_results); __ret = _M_dfs<__match_mode>(__state._M_next); break; case _S_opcode_subexpr_end: - __state._M_tagger(_M_pattern, _M_results); + __state._M_tagger(_M_str_cur, _M_results); __ret = _M_dfs<__match_mode>(__state._M_next); _M_results._M_set_matched(__state._M_subexpr, __ret); break; case _S_opcode_match: - if (!_M_pattern._M_at_end() && __state._M_matches(_M_pattern)) + if (!_M_str_cur._M_at_end() && __state._M_matches(_M_str_cur)) { - _M_pattern._M_next(); + _M_str_cur._M_next(); __ret = _M_dfs<__match_mode>(__state._M_next); - _M_pattern._M_prev(); + _M_str_cur._M_prev(); } break; case _S_opcode_accept: if (__match_mode) - __ret = _M_pattern._M_at_end(); + __ret = _M_str_cur._M_at_end(); else __ret = true; break; default: - _GLIBCXX_DEBUG_ASSERT( false ); + _GLIBCXX_DEBUG_ASSERT(false); } return __ret; } - inline void _Grep_matcher:: - _M_match() - { - __detail::_StateSet __t = this->_M_e_closure(_M_nfa->_M_start()); - for (; !_M_pattern._M_at_end(); _M_pattern._M_next()) - __t = this->_M_e_closure(__move(_M_pattern, *_M_nfa, __t)); - - _M_results._M_set_matched(0, - __includes_some(_M_nfa->_M_final_states(), __t)); - } + template<bool __match_mode> + bool _BFSMatcher:: + _M_main_loop() + { + while (!_M_str_cur._M_at_end()) + { + if (!__match_mode) + if (_M_includes_some()) + return true; + _M_move(); + _M_str_cur._M_next(); + _M_e_closure(); + } + return _M_includes_some(); + } - inline void _Grep_matcher:: - _M_search_from_first() + // The SPFA approach. + void _BFSMatcher:: + _M_e_closure() { - __detail::_StateSet __t = this->_M_e_closure(_M_nfa->_M_start()); - for (; !_M_pattern._M_at_end(); _M_pattern._M_next()) + std::queue<_StateIdT> __q; + std::vector<bool> __in_q(_M_nfa->size(), false); + for (auto& __it : _M_current) { - if (__includes_some(_M_nfa->_M_final_states(), __t)) // KISS - { - _M_results._M_set_matched(0, true); + __in_q[__it.first] = true; + __q.push(__it.first); + } + while (!__q.empty()) + { + auto __u = __q.front(); + __q.pop(); + __in_q[__u] = false; + const auto& __state = (*_M_nfa)[__u]; + + // Can be implemented using method, but there're too much arguments. + auto __add_visited_state = [&](_StateIdT __v) + { + if (__v == _S_invalid_state_id) return; + if (_M_match_less_than(__u, __v)) + { + _M_current[__v] = _M_current[__u]->_M_clone(); + // if a state is updated, it's outgoing neighbors should be + // reconsidered too. Push them to the queue. + if (!__in_q[__v]) + { + __in_q[__v] = true; + __q.push(__v); + } + } + }; + + switch (__state._M_opcode) + { + case _S_opcode_alternative: + __add_visited_state(__state._M_next); + __add_visited_state(__state._M_alt); + break; + case _S_opcode_subexpr_begin: + __state._M_tagger(_M_str_cur, *_M_current[__u]); + __add_visited_state(__state._M_next); + break; + case _S_opcode_subexpr_end: + __state._M_tagger(_M_str_cur, *_M_current[__u]); + _M_current[__u]->_M_set_matched(__state._M_subexpr, true); + __add_visited_state(__state._M_next); + break; + case _S_opcode_match: + break; + case _S_opcode_accept: + __add_visited_state(__state._M_next); + break; + default: + _GLIBCXX_DEBUG_ASSERT(false); } - __t = this->_M_e_closure(__move(_M_pattern, *_M_nfa, __t)); } - _M_results._M_set_matched(0, false); } - // Creates the e-closure set for the initial state __i. - inline _StateSet _Grep_matcher:: - _M_e_closure(_StateIdT __i) + void _BFSMatcher:: + _M_move() { - _StateSet __s; - __s.insert(__i); - _StateStack __stack; - __stack.push(__i); - return this->_M_e_closure(__stack, __s); + decltype(_M_current) __next; + for (auto& __it : _M_current) + { + const auto& __state = (*_M_nfa)[__it.first]; + if (__state._M_opcode == _S_opcode_match + && __state._M_matches(_M_str_cur)) + if (_M_match_less_than(__it.first, __state._M_next) + && __state._M_next != _S_invalid_state_id) + __next[__state._M_next] = __it.second->_M_clone(); + } + _M_current = move(__next); } - // Creates the e-closure set for an arbitrary state set __s. - inline _StateSet _Grep_matcher:: - _M_e_closure(const _StateSet& __s) + bool _BFSMatcher:: + _M_match_less_than(_StateIdT __u, _StateIdT __v) const { - _StateStack __stack; - for (_StateSet::const_iterator __i = __s.begin(); __i != __s.end(); ++__i) - __stack.push(*__i); - return this->_M_e_closure(__stack, __s); + if (_M_current.count(__u) == 0) + return false; + if (_M_current.count(__v) > 0) + return true; + // TODO: Greedy and Non-greedy support + return true; } - inline _StateSet _Grep_matcher:: - _M_e_closure(_StateStack& __stack, const _StateSet& __s) + bool _BFSMatcher:: + _M_includes_some() const { - _StateSet __e = __s; - while (!__stack.empty()) + auto& __s = _M_nfa->_M_final_states(); + auto& __t = _M_current; + if (__s.size() > 0 && __t.size() > 0) { - _StateIdT __t = __stack.top(); __stack.pop(); - if (__t == _S_invalid_state_id) - continue; - // for each __u with edge from __t to __u labeled e do ... - const _State& __state = _M_nfa->operator[](__t); - switch (__state._M_opcode) + auto __first = __s.begin(); + auto __second = __t.begin(); + while (__first != __s.end() && __second != __t.end()) { - case _S_opcode_alternative: - __add_visited_state(__state._M_next, __stack, __e); - __add_visited_state(__state._M_alt, __stack, __e); - break; - case _S_opcode_subexpr_begin: - __add_visited_state(__state._M_next, __stack, __e); - __state._M_tagger(_M_pattern, _M_results); - break; - case _S_opcode_subexpr_end: - __add_visited_state(__state._M_next, __stack, __e); - __state._M_tagger(_M_pattern, _M_results); - _M_results._M_set_matched(__state._M_subexpr, true); - break; - case _S_opcode_accept: - __add_visited_state(__state._M_next, __stack, __e); - break; - default: - break; + if (*__first < __second->first) + ++__first; + else if (__second->first < *__first) + ++__second; + else + { + _M_results._M_assign(*__second->second); + return true; + } } } - return __e; + return false; } _GLIBCXX_END_NAMESPACE_VERSION diff --git a/libstdc++-v3/include/bits/regex_nfa.h b/libstdc++-v3/include/bits/regex_nfa.h index fc30237..1bae1b7 100644 --- a/libstdc++-v3/include/bits/regex_nfa.h +++ b/libstdc++-v3/include/bits/regex_nfa.h @@ -39,6 +39,24 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION * @{ */ + /// Provides a generic facade for a templated match_results. + struct _Results + { + virtual + ~_Results() + { } + virtual void _M_set_pos(int __i, int __j, const _PatternCursor& __p) = 0; + virtual void _M_set_matched(int __i, bool __is_matched) = 0; + virtual std::unique_ptr<_Results> _M_clone() const = 0; + virtual void _M_assign(const _Results& __rhs) = 0; + }; + + class _Grep_matcher; + class _Automaton; + + /// Generic shared pointer to an automaton. + typedef std::shared_ptr<_Automaton> _AutomatonPtr; + /// Base class for, um, automata. Could be an NFA or a DFA. Your choice. class _Automaton { @@ -52,15 +70,18 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION virtual _SizeT _M_sub_count() const = 0; + virtual std::unique_ptr<_Grep_matcher> + _M_get_matcher(_PatternCursor& __p, + _Results& __r, + const _AutomatonPtr& __automaton, + regex_constants::match_flag_type __flags) = 0; + #ifdef _GLIBCXX_DEBUG virtual std::ostream& _M_dot(std::ostream& __ostr) const = 0; #endif }; - /// Generic shared pointer to an automaton. - typedef std::shared_ptr<_Automaton> _AutomatonPtr; - /// Operation codes that define the type of transitions within the base NFA /// that represents the regular expression. enum _Opcode @@ -73,13 +94,6 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION _S_opcode_accept = 255 }; - /// Provides a generic facade for a templated match_results. - struct _Results - { - virtual void _M_set_pos(int __i, int __j, const _PatternCursor& __p) = 0; - virtual void _M_set_matched(int __i, bool __is_matched) = 0; - }; - /// Tags current state (for subexpr begin/end). typedef std::function<void (const _PatternCursor&, _Results&)> _Tagger; @@ -113,7 +127,6 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION { __r._M_set_pos(_M_index, 1, __pc); } int _M_index; - _FwdIterT _M_pos; }; /// Indicates if current state matches cursor current. @@ -275,7 +288,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION typedef regex_constants::syntax_option_type _FlagT; _Nfa(_FlagT __f) - : _M_flags(__f), _M_start_state(0), _M_subexpr_count(0) + : _M_flags(__f), _M_start_state(0), _M_subexpr_count(0), + // TODO: BFS by default. Your choice. Need to be set by the compiler. + _M_has_back_ref(false) { } ~_Nfa() @@ -334,6 +349,16 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION return this->size()-1; } + void + _M_set_back_ref(bool __b) + { _M_has_back_ref = __b; } + + std::unique_ptr<_Grep_matcher> + _M_get_matcher(_PatternCursor& __p, + _Results& __r, + const _AutomatonPtr& __automaton, + regex_constants::match_flag_type __flags); + #ifdef _GLIBCXX_DEBUG std::ostream& _M_dot(std::ostream& __ostr) const; @@ -344,6 +369,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION _StateIdT _M_start_state; _StateSet _M_accepting_states; _SizeT _M_subexpr_count; + bool _M_has_back_ref; }; /// Describes a sequence of one or more %_State, its current start diff --git a/libstdc++-v3/include/std/regex b/libstdc++-v3/include/std/regex index 907f5bb..b0918ed 100644 --- a/libstdc++-v3/include/std/regex +++ b/libstdc++-v3/include/std/regex @@ -44,6 +44,8 @@ #include <iterator> #include <locale> #include <memory> +#include <map> +#include <queue> #include <set> #include <sstream> #include <stack> diff --git a/libstdc++-v3/testsuite/28_regex/algorithms/regex_match/extended/53622.cc b/libstdc++-v3/testsuite/28_regex/algorithms/regex_match/extended/53622.cc index 383ed05..10e2ff4 100644 --- a/libstdc++-v3/testsuite/28_regex/algorithms/regex_match/extended/53622.cc +++ b/libstdc++-v3/testsuite/28_regex/algorithms/regex_match/extended/53622.cc @@ -32,16 +32,31 @@ test01() { bool test __attribute__((unused)) = true; - std::regex re("zxcv/(one.*)abc", std::regex::extended); - std::string target("zxcv/onetwoabc"); - std::smatch m; - - VERIFY( std::regex_search(target, m, re) ); - VERIFY( m.size() == 2 ); - VERIFY( m[0].matched == true ); - VERIFY( std::string(m[0].first, m[0].second) == "zxcv/onetwoabc" ); - VERIFY( m[1].matched == true ); - VERIFY( std::string(m[1].first, m[1].second) == "onetwo" ); + { + std::regex re("zxcv/(one.*)abc", std::regex::extended); + std::string target("zxcv/onetwoabc"); + std::smatch m; + + VERIFY( std::regex_search(target, m, re) ); + VERIFY( m.size() == 2 ); + VERIFY( m[0].matched == true ); + VERIFY( std::string(m[0].first, m[0].second) == "zxcv/onetwoabc" ); + VERIFY( m[1].matched == true ); + VERIFY( std::string(m[1].first, m[1].second) == "onetwo" ); + } + + { + std::regex re("zxcv/(one.*)abc()\\2", std::regex::extended); + std::string target("zxcv/onetwoabc"); + std::smatch m; + + VERIFY( std::regex_search(target, m, re) ); + VERIFY( m.size() == 3 ); + VERIFY( m[0].matched == true ); + VERIFY( std::string(m[0].first, m[0].second) == "zxcv/onetwoabc" ); + VERIFY( m[1].matched == true ); + VERIFY( std::string(m[1].first, m[1].second) == "onetwo" ); + } } int diff --git a/libstdc++-v3/testsuite/28_regex/algorithms/regex_match/extended/57173.cc b/libstdc++-v3/testsuite/28_regex/algorithms/regex_match/extended/57173.cc index 3031c43..cb3a54f 100644 --- a/libstdc++-v3/testsuite/28_regex/algorithms/regex_match/extended/57173.cc +++ b/libstdc++-v3/testsuite/28_regex/algorithms/regex_match/extended/57173.cc @@ -33,13 +33,24 @@ test01() { bool test __attribute__((unused)) = true; - std::regex re("/asdf(/.*)", std::regex::extended); - std::string target("/asdf/qwerty"); - std::smatch m; + { + std::regex re("/asdf(/.*)", std::regex::extended); + std::string target("/asdf/qwerty"); + std::smatch m; - VERIFY( std::regex_match(target, m, re) ); - VERIFY( m.size() == 2 ); - VERIFY( std::string(m[1].first, m[1].second) == "/qwerty"); + VERIFY( std::regex_match(target, m, re) ); + VERIFY( m.size() == 2 ); + VERIFY( std::string(m[1].first, m[1].second) == "/qwerty"); + } + { + std::regex re("/asdf(/.*)()\\2", std::regex::extended); + std::string target("/asdf/qwerty"); + std::smatch m; + + VERIFY( std::regex_match(target, m, re) ); + VERIFY( m.size() == 3 ); + VERIFY( std::string(m[1].first, m[1].second) == "/qwerty"); + } } int diff --git a/libstdc++-v3/testsuite/28_regex/algorithms/regex_match/extended/string_dispatch_01.cc b/libstdc++-v3/testsuite/28_regex/algorithms/regex_match/extended/string_dispatch_01.cc new file mode 100644 index 0000000..86fab85 --- /dev/null +++ b/libstdc++-v3/testsuite/28_regex/algorithms/regex_match/extended/string_dispatch_01.cc @@ -0,0 +1,71 @@ +// { dg-options "-std=gnu++11" } + +// +// 2013-07-29 Tim Shen <timshen91@gmail.com> +// +// Copyright (C) 2013 Free Software Foundation, Inc. +// +// This file is part of the GNU ISO C++ Library. This library is free +// software; you can redistribute it and/or modify it under the +// terms of the GNU General Public License as published by the +// Free Software Foundation; either version 3, or (at your option) +// any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License along +// with this library; see the file COPYING3. If not see +// <http://www.gnu.org/licenses/>. + +// 28.11.2 regex_match +// Tests Extended automatic matcher dispatching against a std::string target. + +#include <regex> +#include <testsuite_hooks.h> + +using namespace std; + +template<typename _Bi_iter, typename _Alloc, + typename _Ch_type, typename _Rx_traits> + void + fake_match(_Bi_iter __s, + _Bi_iter __e, + match_results<_Bi_iter, _Alloc>& __m, + const basic_regex<_Ch_type, _Rx_traits>& __re, + regex_constants::match_flag_type __flags + = regex_constants::match_default) + { + __detail::_AutomatonPtr __a = __re._M_get_automaton(); + __detail::_Automaton::_SizeT __sz = __a->_M_sub_count(); + __detail::_SpecializedCursor<_Bi_iter> __cs(__s, __e); + __detail::_SpecializedResults<_Bi_iter, _Alloc> __r(__sz, __cs, __m); + VERIFY( dynamic_cast<__detail::_DFSMatcher *>( + &*__a->_M_get_matcher(__cs, __r, __a, __flags)) != nullptr ); + } + +void +test01() +{ + bool test __attribute__((unused)) = true; + + regex re("()(one(.*))abc\\1"); // backref cause DFS + const string target("onetwoabc"); + smatch m; + fake_match(target.begin(), target.end(), m, re); + + regex_match(target, m, re); + VERIFY( m[2].matched ); + VERIFY( m[3].matched ); + VERIFY( std::string(m[2].first, m[2].second) == "onetwo" ); + VERIFY( std::string(m[3].first, m[3].second) == "two" ); +} + +int +main() +{ + test01(); + return 0; +} |