diff options
-rw-r--r-- | libstdc++-v3/ChangeLog | 18 | ||||
-rw-r--r-- | libstdc++-v3/include/bits/regex.h | 30 | ||||
-rw-r--r-- | libstdc++-v3/include/bits/regex.tcc | 46 | ||||
-rw-r--r-- | libstdc++-v3/include/bits/regex_executor.h | 362 | ||||
-rw-r--r-- | libstdc++-v3/include/bits/regex_executor.tcc | 490 | ||||
-rw-r--r-- | libstdc++-v3/testsuite/28_regex/algorithms/regex_match/ecma/char/ungreedy.cc | 50 | ||||
-rw-r--r-- | libstdc++-v3/testsuite/28_regex/algorithms/regex_match/extended/string_dispatch_01.cc | 72 | ||||
-rw-r--r-- | libstdc++-v3/testsuite/28_regex/algorithms/regex_search/ecma/greedy.cc | 6 | ||||
-rw-r--r-- | libstdc++-v3/testsuite/performance/28_regex/split.cc | 72 | ||||
-rw-r--r-- | libstdc++-v3/testsuite/performance/28_regex/split.h | 91 | ||||
-rw-r--r-- | libstdc++-v3/testsuite/performance/28_regex/split_bfs.cc | 46 | ||||
-rw-r--r-- | libstdc++-v3/testsuite/util/testsuite_regex.h | 3 |
12 files changed, 503 insertions, 783 deletions
diff --git a/libstdc++-v3/ChangeLog b/libstdc++-v3/ChangeLog index 3149f38..7672b08 100644 --- a/libstdc++-v3/ChangeLog +++ b/libstdc++-v3/ChangeLog @@ -1,3 +1,21 @@ +2013-10-26 Tim Shen <timshen91@gmail.com> + + * include/bits/regex.h: Remove unnecessary friends. + * include/bits/regex.tcc (__regex_algo_impl<>): Move __get_executor + to here. + * include/bits/regex_executor.h: Remove _DFSExecutor and _BFSExecutor; + they are merged into _Executor. Eliminate quantifier tracking part, so + it's faster. + * include/bits/regex_executor.tcc: Implement _Executor. + * testsuite/28_regex/algorithms/regex_match/ecma/char/ungreedy.cc: New. + * testsuite/28_regex/algorithms/regex_search/ecma/greedy.cc: Adjust + duplicate testcases. + * testsuite/performance/28_regex/split.h: New. + * testsuite/performance/28_regex/split_bfs.cc: New. + * testsuite/util/testsuite_regex.h: Adjust behavior of two-executors + agreement judger: do not compare match_results when executor return + false. + 2013-10-25 François Dumont <fdumont@gcc.gnu.org> * include/debug/formatter.h (__check_singular): Add const on diff --git a/libstdc++-v3/include/bits/regex.h b/libstdc++-v3/include/bits/regex.h index 32d38b4..b1bda46 100644 --- a/libstdc++-v3/include/bits/regex.h +++ b/libstdc++-v3/include/bits/regex.h @@ -36,6 +36,9 @@ namespace __detail { _GLIBCXX_BEGIN_NAMESPACE_VERSION + enum class _RegexExecutorPolicy : int + { _S_auto, _S_alternate }; + template<typename _BiIter, typename _Alloc, typename _CharT, typename _TraitsT, _RegexExecutorPolicy __policy, @@ -730,17 +733,6 @@ _GLIBCXX_END_NAMESPACE_VERSION typedef std::shared_ptr<__detail::_NFA<_Ch_type, _Rx_traits>> _AutomatonPtr; - template<typename _BiIter, typename _Alloc, - typename _CharT, typename _TraitsT, - __detail::_RegexExecutorPolicy __policy> - friend std::unique_ptr< - __detail::_Executor<_BiIter, _Alloc, _CharT, _TraitsT>> - __detail::__get_executor(_BiIter, - _BiIter, - std::vector<sub_match<_BiIter>, _Alloc>&, - const basic_regex<_CharT, _TraitsT>&, - regex_constants::match_flag_type); - template<typename _Bp, typename _Ap, typename _Cp, typename _Rp, __detail::_RegexExecutorPolicy, bool> friend bool @@ -748,15 +740,9 @@ _GLIBCXX_END_NAMESPACE_VERSION const basic_regex<_Cp, _Rp>&, regex_constants::match_flag_type); - template<typename, typename, typename, typename> + template<typename, typename, typename, bool> friend class __detail::_Executor; - template<typename, typename, typename, typename> - friend class __detail::_DFSExecutor; - - template<typename, typename, typename, typename> - friend class __detail::_BFSExecutor; - flag_type _M_flags; _Rx_traits _M_traits; _AutomatonPtr _M_automaton; @@ -1851,15 +1837,9 @@ _GLIBCXX_END_NAMESPACE_VERSION //@} private: - template<typename, typename, typename, typename> + template<typename, typename, typename, bool> friend class __detail::_Executor; - template<typename, typename, typename, typename> - friend class __detail::_DFSExecutor; - - template<typename, typename, typename, typename> - friend class __detail::_BFSExecutor; - template<typename, typename, typename> friend class regex_iterator; diff --git a/libstdc++-v3/include/bits/regex.tcc b/libstdc++-v3/include/bits/regex.tcc index 7028480..2ac095d 100644 --- a/libstdc++-v3/include/bits/regex.tcc +++ b/libstdc++-v3/include/bits/regex.tcc @@ -28,6 +28,13 @@ * Do not attempt to use it directly. @headername{regex} */ +// See below __regex_algo_impl to get what this is talking about. The default +// value 1 indicated a conservative optimization without giving up worst case +// performance. +#ifndef _GLIBCXX_REGEX_DFS_QUANTIFIERS_LIMIT +#define _GLIBCXX_REGEX_DFS_QUANTIFIERS_LIMIT 1 +#endif + namespace std _GLIBCXX_VISIBILITY(default) { _GLIBCXX_BEGIN_NAMESPACE_VERSION @@ -61,14 +68,41 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION for (auto& __it : __res) __it.matched = false; - auto __executor = __get_executor<_BiIter, _Alloc, _CharT, _TraitsT, - __policy>(__s, __e, __res, __re, __flags); - + // This function decide which executor to use under given circumstances. + // The _S_auto policy now is the following: if a NFA has no + // back-references and has more than _GLIBCXX_REGEX_DFS_QUANTIFIERS_LIMIT + // quantifiers (*, +, ?), the BFS executor will be used, other wise + // DFS executor. This is because DFS executor has a exponential upper + // bound, but better best-case performace. Meanwhile, BFS executor can + // effectively prevent from exponential-long time matching (which must + // contains many quantifiers), but it's slower in average. + // + // For simple regex, BFS executor could be 2 or more times slower than + // DFS executor. + // + // Of course, BFS executor cannot handle back-references. bool __ret; - if (__match_mode) - __ret = __executor->_M_match(); + if (!__re._M_automaton->_M_has_backref + && (__policy == _RegexExecutorPolicy::_S_alternate + || __re._M_automaton->_M_quant_count + > _GLIBCXX_REGEX_DFS_QUANTIFIERS_LIMIT)) + { + _Executor<_BiIter, _Alloc, _TraitsT, false> + __executor(__s, __e, __m, __re, __flags); + if (__match_mode) + __ret = __executor._M_match(); + else + __ret = __executor._M_search(); + } else - __ret = __executor->_M_search(); + { + _Executor<_BiIter, _Alloc, _TraitsT, true> + __executor(__s, __e, __m, __re, __flags); + if (__match_mode) + __ret = __executor._M_match(); + else + __ret = __executor._M_search(); + } if (__ret) { for (auto __it : __res) diff --git a/libstdc++-v3/include/bits/regex_executor.h b/libstdc++-v3/include/bits/regex_executor.h index 6d9a83e..12db47b 100644 --- a/libstdc++-v3/include/bits/regex_executor.h +++ b/libstdc++-v3/include/bits/regex_executor.h @@ -30,10 +30,6 @@ // FIXME convert comments to doxygen format. -// TODO Put _DFSExecutor and _BFSExecutor into one class. They are becoming -// much more similar. Also, make grouping seperated. The -// regex_constants::nosubs enables much more simpler execution. - namespace std _GLIBCXX_VISIBILITY(default) { _GLIBCXX_BEGIN_NAMESPACE_VERSION @@ -56,15 +52,17 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION * @{ */ - template<typename _BiIter, typename _Alloc, - typename _CharT, typename _TraitsT> + template<typename _BiIter, typename _Alloc, typename _TraitsT, + bool __dfs_mode> class _Executor { public: - typedef basic_regex<_CharT, _TraitsT> _RegexT; - typedef std::vector<sub_match<_BiIter>, _Alloc> _ResultsVec; - typedef regex_constants::match_flag_type _FlagT; - typedef typename _TraitsT::char_class_type _ClassT; + typedef typename iterator_traits<_BiIter>::value_type _CharT; + typedef basic_regex<_CharT, _TraitsT> _RegexT; + typedef std::vector<sub_match<_BiIter>, _Alloc> _ResultsVec; + typedef regex_constants::match_flag_type _FlagT; + typedef typename _TraitsT::char_class_type _ClassT; + typedef _NFA<_CharT, _TraitsT> _NFAT; public: _Executor(_BiIter __begin, @@ -75,39 +73,47 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION : _M_begin(__begin), _M_end(__end), _M_re(__re), + _M_nfa(*__re._M_automaton), _M_results(__results), + _M_match_queue(__dfs_mode ? nullptr + : new queue<pair<_StateIdT, _ResultsVec>>()), + _M_visited(__dfs_mode ? nullptr : new vector<bool>(_M_nfa.size())), _M_flags((__flags & regex_constants::match_prev_avail) ? (__flags & ~regex_constants::match_not_bol & ~regex_constants::match_not_bow) - : __flags) - { } - - virtual - ~_Executor() + : __flags), + _M_start_state(_M_nfa._M_start()) { } // Set matched when string exactly match the pattern. bool _M_match() { - _M_match_mode = true; - _M_init(_M_begin); - return _M_main(); + _M_current = _M_begin; + return _M_main<true>(); } // Set matched when some prefix of the string matches the pattern. bool _M_search_from_first() { - _M_match_mode = false; - _M_init(_M_begin); - return _M_main(); + _M_current = _M_begin; + return _M_main<false>(); } bool _M_search(); + private: + template<bool __match_mode> + void + _M_dfs(_StateIdT __start); + + template<bool __match_mode> + bool + _M_main(); + bool _M_is_word(_CharT __ch) const { @@ -134,307 +140,27 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION bool _M_word_boundry(_State<_CharT, _TraitsT> __state) const; - virtual std::unique_ptr<_Executor> - _M_clone() const = 0; - - // Return whether now match the given sub-NFA. bool - _M_lookahead(_State<_CharT, _TraitsT> __state) const - { - auto __sub = this->_M_clone(); - __sub->_M_set_start(__state._M_alt); - return __sub->_M_search_from_first(); - } - - void - _M_set_results(_ResultsVec& __cur_results); - - public: - virtual void - _M_init(_BiIter __cur) = 0; - - virtual void - _M_set_start(_StateIdT __start) = 0; - - virtual bool - _M_main() = 0; - - _BiIter _M_current; - const _BiIter _M_begin; - const _BiIter _M_end; - const _RegexT& _M_re; - _ResultsVec& _M_results; - _FlagT _M_flags; - bool _M_match_mode; - }; - - // A _DFSExecutor perform a DFS on given NFA and input string. At the very - // beginning the executor stands in the start state, then it try every - // possible state transition in current state recursively. Some state - // transitions consume input string, say, a single-char-matcher or a - // back-reference matcher; some not, like assertion or other anchor nodes. - // When the input is exhausted and the current state is an accepting state, - // the whole executor return true. - // - // TODO: This approach is exponentially slow for certain input. - // Try to compile the NFA to a DFA. - // - // Time complexity: o(match_length), O(2^(_M_nfa->size())) - // Space complexity: \theta(match_results.size() + match_length) - template<typename _BiIter, typename _Alloc, - typename _CharT, typename _TraitsT> - class _DFSExecutor - : public _Executor<_BiIter, _Alloc, _CharT, _TraitsT> - { - public: - typedef _Executor<_BiIter, _Alloc, _CharT, _TraitsT> _BaseT; - typedef _NFA<_CharT, _TraitsT> _NFAT; - typedef typename _BaseT::_RegexT _RegexT; - typedef typename _BaseT::_ResultsVec _ResultsVec; - typedef typename _BaseT::_FlagT _FlagT; + _M_lookahead(_State<_CharT, _TraitsT> __state); public: - _DFSExecutor(_BiIter __begin, - _BiIter __end, - _ResultsVec& __results, - const _RegexT& __re, - _FlagT __flags) - : _BaseT(__begin, __end, __results, __re, __flags), - _M_nfa(__re._M_automaton), _M_start_state(_M_nfa->_M_start()) - { } - - private: - void - _M_init(_BiIter __cur) - { - _M_cur_results.resize(_M_nfa->_M_sub_count() + 2); - this->_M_current = __cur; - } - - void - _M_set_start(_StateIdT __start) - { _M_start_state = __start; } - - bool - _M_main() - { return _M_dfs(this->_M_start_state); } - - bool - _M_dfs(_StateIdT __start); - - std::unique_ptr<_BaseT> - _M_clone() const - { - return std::unique_ptr<_BaseT>(new _DFSExecutor(this->_M_current, - this->_M_end, - this->_M_results, - this->_M_re, - this->_M_flags)); - } - + _ResultsVec _M_cur_results; + _BiIter _M_current; + const _BiIter _M_begin; + const _BiIter _M_end; + const _RegexT& _M_re; + const _NFAT& _M_nfa; + _ResultsVec& _M_results; + // Used in BFS, saving states that need to be considered for the next + // character. + std::unique_ptr<queue<pair<_StateIdT, _ResultsVec>>> _M_match_queue; + // Used in BFS, indicating that which state is already visited. + std::unique_ptr<vector<bool>> _M_visited; + _FlagT _M_flags; // To record current solution. - std::shared_ptr<_NFAT> _M_nfa; - _ResultsVec _M_cur_results; - _StateIdT _M_start_state; - }; - - // Like the DFS approach, it try every possible state transition; Unlike DFS, - // it uses a queue instead of a stack to store matching states. It's a BFS - // approach. - // - // Russ Cox's article(http://swtch.com/~rsc/regexp/regexp1.html) explained - // this algorithm clearly. - // - // Every entry of _M_covered saves the solution(grouping status) for every - // matching head. When states transit, solutions will be compared and - // deduplicated(based on which greedy mode we have). - // - // Time complexity: o(match_length * (quantifier_number - // + match_results.size()) - // O(match_length * _M_nfa->size() - // * (quantifier_number + match_results.size()) - // Space complexity: o(quantifier_number + match_results.size()) - // O(_M_nfa->size() - // * (quantifier_number + match_results.size()) - template<typename _BiIter, typename _Alloc, - typename _CharT, typename _TraitsT> - class _BFSExecutor - : public _Executor<_BiIter, _Alloc, _CharT, _TraitsT> - { - public: - typedef _Executor<_BiIter, _Alloc, _CharT, _TraitsT> _BaseT; - typedef _NFA<_CharT, _TraitsT> _NFAT; - typedef typename _BaseT::_RegexT _RegexT; - typedef typename _BaseT::_ResultsVec _ResultsVec; - typedef typename _BaseT::_FlagT _FlagT; - // Here's a solution for greedy/ungreedy mode in BFS approach. We need to - // carefully work out how to compare to conflict matching states. - // - // A matching state is a pair(where, when); `where` is a NFA node; `when` - // is a _BiIter, indicating which char is the next to be matched. Two - // matching states conflict if they have equivalent `where` and `when`. - // - // Now we need to drop one and keep another, because at most one of them - // could be the final optimal solution. This behavior is affected by - // greedy policy. - // - // The definition of `greedy`: - // For the sequence of quantifiers in NFA sorted by their start positions, - // now maintain a vector in every matching state, with length equal to - // quantifier seq, recording repeating times of every quantifier. Now to - // compare two matching states, we just lexically compare these two - // vectors. To win the compare(to survive), one matching state needs to - // make its greedy quantifier count larger, and ungreedy quantifiers - // count smaller. - // - // In the implementation, we recorded negtive counts for greedy - // quantifiers and positive counts of ungreedy ones. Now the implicit - // operator<() for lexicographical_compare will emit the answer. - // - // When two vectors equal, it means the `where`, `when` and quantifier - // counts are identical, and indicates the same solution; so - // _ResultsEntry::operator<() just return false. - struct _ResultsEntry - : private _ResultsVec - { - public: - _ResultsEntry(size_t __res_sz, size_t __sz) - : _ResultsVec(__res_sz), _M_quant_keys(__sz) - { } - - void - resize(size_t __n) - { _ResultsVec::resize(__n); } - - size_t - size() - { return _ResultsVec::size(); } - - sub_match<_BiIter>& - operator[](size_t __idx) - { return _ResultsVec::operator[](__idx); } - - bool - operator<(const _ResultsEntry& __rhs) const - { - _GLIBCXX_DEBUG_ASSERT(_M_quant_keys.size() - == __rhs._M_quant_keys.size()); - return lexicographical_compare(_M_quant_keys.begin(), - _M_quant_keys.end(), - __rhs._M_quant_keys.begin(), - __rhs._M_quant_keys.end()); - } - - void - _M_inc(size_t __idx, bool __neg) - { _M_quant_keys[__idx] += __neg ? 1 : -1; } - - _ResultsVec& - _M_get() - { return *this; } - - public: - std::vector<int> _M_quant_keys; - }; - typedef std::unique_ptr<_ResultsEntry> _ResultsPtr; - - class _TodoList - { - public: - explicit - _TodoList(size_t __sz) - : _M_states(), _M_exists(__sz, false) - { } - - void _M_push(_StateIdT __u) - { - _GLIBCXX_DEBUG_ASSERT(__u < _M_exists.size()); - if (!_M_exists[__u]) - { - _M_exists[__u] = true; - _M_states.push_back(__u); - } - } - - _StateIdT _M_pop() - { - auto __ret = _M_states.back(); - _M_states.pop_back(); - _M_exists[__ret] = false; - return __ret; - } - - bool _M_empty() const - { return _M_states.empty(); } - - void _M_clear() - { - _M_states.clear(); - _M_exists.assign(_M_exists.size(), false); - } - - private: - std::vector<_StateIdT> _M_states; - std::vector<bool> _M_exists; - }; - - public: - _BFSExecutor(_BiIter __begin, - _BiIter __end, - _ResultsVec& __results, - const _RegexT& __re, - _FlagT __flags) - : _BaseT(__begin, __end, __results, __re, __flags), - _M_nfa(__re._M_automaton), _M_match_stack(_M_nfa->size()), - _M_stack(_M_nfa->size()), _M_start_state(_M_nfa->_M_start()) - { } - - private: - void - _M_init(_BiIter __cur) - { - this->_M_current = __cur; - _M_covered.clear(); - _ResultsVec& __res(this->_M_results); - _M_covered[this->_M_start_state] = - _ResultsPtr(new _ResultsEntry(__res.size(), - _M_nfa->_M_quant_count)); - _M_stack._M_push(this->_M_start_state); - } - - void - _M_set_start(_StateIdT __start) - { _M_start_state = __start; } - - bool - _M_main(); - - void - _M_e_closure(); - - void - _M_move(); - - bool - _M_includes_some(); - - std::unique_ptr<_BaseT> - _M_clone() const - { - return std::unique_ptr<_BaseT>(new _BFSExecutor(this->_M_current, - this->_M_end, - this->_M_results, - this->_M_re, - this->_M_flags)); - } - - std::shared_ptr<_NFAT> _M_nfa; - std::map<_StateIdT, _ResultsPtr> _M_covered; - _TodoList _M_match_stack; - _TodoList _M_stack; - _StateIdT _M_start_state; - // To record global optimal solution. - _ResultsPtr _M_cur_results; + _StateIdT _M_start_state; + // Do we have a solution so far? + bool _M_has_sol; }; //@} regex-detail diff --git a/libstdc++-v3/include/bits/regex_executor.tcc b/libstdc++-v3/include/bits/regex_executor.tcc index 7c084ad..d3b9a04 100644 --- a/libstdc++-v3/include/bits/regex_executor.tcc +++ b/libstdc++-v3/include/bits/regex_executor.tcc @@ -28,22 +28,15 @@ * Do not attempt to use it directly. @headername{regex} */ -// See below __get_executor to get what this is talking about. The default -// value 1 indicated a conservative optimization without giving up worst case -// performance. -#ifndef _GLIBCXX_REGEX_DFS_QUANTIFIERS_LIMIT -#define _GLIBCXX_REGEX_DFS_QUANTIFIERS_LIMIT 1 -#endif - namespace std _GLIBCXX_VISIBILITY(default) { namespace __detail { _GLIBCXX_BEGIN_NAMESPACE_VERSION - template<typename _BiIter, typename _Alloc, - typename _CharT, typename _TraitsT> - bool _Executor<_BiIter, _Alloc, _CharT, _TraitsT>:: + template<typename _BiIter, typename _Alloc, typename _TraitsT, + bool __dfs_mode> + bool _Executor<_BiIter, _Alloc, _TraitsT, __dfs_mode>:: _M_search() { if (_M_flags & regex_constants::match_continuous) @@ -51,9 +44,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION auto __cur = _M_begin; do { - _M_match_mode = false; - _M_init(__cur); - if (_M_main()) + _M_current = __cur; + if (_M_main<false>()) return true; } // Continue when __cur == _M_end @@ -61,24 +53,141 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION return false; } - template<typename _BiIter, typename _Alloc, - typename _CharT, typename _TraitsT> - bool _DFSExecutor<_BiIter, _Alloc, _CharT, _TraitsT>:: + template<typename _BiIter, typename _Alloc, typename _TraitsT, + bool __dfs_mode> + template<bool __match_mode> + bool _Executor<_BiIter, _Alloc, _TraitsT, __dfs_mode>:: + _M_main() + { + if (__dfs_mode) + { + _M_has_sol = false; + _M_cur_results = _M_results; + _M_dfs<__match_mode>(_M_start_state); + return _M_has_sol; + } + else + { + // Like the DFS approach, it try every possible state transition; + // Unlike DFS, it uses a queue instead of a stack to store matching + // states. It's a BFS approach. + // + // Russ Cox's article(http://swtch.com/~rsc/regexp/regexp1.html) + // explained this algorithm clearly. + // + // Time complexity: o(match_length * match_results.size()) + // O(match_length * _M_nfa.size() + // * match_results.size()) + // Space complexity: o(_M_nfa.size() + match_results.size()) + // O(_M_nfa.size() * match_results.size()) + _M_match_queue->push(make_pair(_M_start_state, _M_results)); + bool __ret = false; + while (1) + { + _M_has_sol = false; + if (_M_match_queue->empty()) + break; + _M_visited->assign(_M_visited->size(), false); + auto _M_old_queue = std::move(*_M_match_queue); + while (!_M_old_queue.empty()) + { + auto __task = _M_old_queue.front(); + _M_old_queue.pop(); + _M_cur_results = __task.second; + _M_dfs<__match_mode>(__task.first); + } + if (!__match_mode) + __ret |= _M_has_sol; + if (_M_current == _M_end) + break; + ++_M_current; + } + if (__match_mode) + __ret = _M_has_sol; + return __ret; + } + } + + // Return whether now match the given sub-NFA. + template<typename _BiIter, typename _Alloc, typename _TraitsT, + bool __dfs_mode> + bool _Executor<_BiIter, _Alloc, _TraitsT, __dfs_mode>:: + _M_lookahead(_State<_Executor<_BiIter, _Alloc, _TraitsT, __dfs_mode>:: + _CharT, _TraitsT> __state) + { + _ResultsVec __what(_M_cur_results.size()); + auto __sub = std::unique_ptr<_Executor>(new _Executor(_M_current, + _M_end, + __what, + _M_re, + _M_flags)); + __sub->_M_start_state = __state._M_alt; + if (__sub->_M_search_from_first()) + { + for (size_t __i = 0; __i < __what.size(); __i++) + if (__what[__i].matched) + _M_cur_results[__i] = __what[__i]; + return true; + } + return false; + } + + // A _DFSExecutor perform a DFS on given NFA and input string. At the very + // beginning the executor stands in the start state, then it try every + // possible state transition in current state recursively. Some state + // transitions consume input string, say, a single-char-matcher or a + // back-reference matcher; some not, like assertion or other anchor nodes. + // When the input is exhausted and the current state is an accepting state, + // the whole executor return true. + // + // TODO: This approach is exponentially slow for certain input. + // Try to compile the NFA to a DFA. + // + // Time complexity: o(match_length), O(2^(_M_nfa.size())) + // Space complexity: \theta(match_results.size() + match_length) + // + template<typename _BiIter, typename _Alloc, typename _TraitsT, + bool __dfs_mode> + template<bool __match_mode> + void _Executor<_BiIter, _Alloc, _TraitsT, __dfs_mode>:: _M_dfs(_StateIdT __i) { - auto& __current = this->_M_current; - const auto& __state = (*_M_nfa)[__i]; - bool __ret = false; + if (!__dfs_mode) + { + if ((*_M_visited)[__i]) + return; + (*_M_visited)[__i] = true; + } + + const auto& __state = _M_nfa[__i]; switch (__state._M_opcode) { case _S_opcode_alternative: // Greedy or not, this is a question ;) if (!__state._M_neg) - __ret = _M_dfs(__state._M_alt) - || _M_dfs(__state._M_next); + { + _M_dfs<__match_mode>(__state._M_alt); + if (!__dfs_mode || !_M_has_sol) + _M_dfs<__match_mode>(__state._M_next); + } else - __ret = _M_dfs(__state._M_next) - || _M_dfs(__state._M_alt); + { + if (__dfs_mode) + { + _M_dfs<__match_mode>(__state._M_next); + if (!_M_has_sol) + _M_dfs<__match_mode>(__state._M_alt); + } + else + { + if (!_M_has_sol) + { + _M_dfs<__match_mode>(__state._M_next); + if (!_M_has_sol) + _M_dfs<__match_mode>(__state._M_alt); + } + } + } break; case _S_opcode_subexpr_begin: // Here's the critical part: if there's nothing changed since last @@ -88,273 +197,128 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION // Every change on _M_cur_results will be roll back after the // recursion step finished. if (!_M_cur_results[__state._M_subexpr].matched - || _M_cur_results[__state._M_subexpr].first != __current) + || _M_cur_results[__state._M_subexpr].first != _M_current) { - auto __back = _M_cur_results[__state._M_subexpr].first; - _M_cur_results[__state._M_subexpr].first = __current; - __ret = _M_dfs(__state._M_next); - _M_cur_results[__state._M_subexpr].first = __back; + auto& __res = _M_cur_results[__state._M_subexpr]; + auto __back = __res.first; + __res.first = _M_current; + _M_dfs<__match_mode>(__state._M_next); + __res.first = __back; } break; case _S_opcode_subexpr_end: - if (_M_cur_results[__state._M_subexpr].second != __current + if (_M_cur_results[__state._M_subexpr].second != _M_current || _M_cur_results[__state._M_subexpr].matched != true) { - auto __back = _M_cur_results[__state._M_subexpr]; - _M_cur_results[__state._M_subexpr].second = __current; - _M_cur_results[__state._M_subexpr].matched = true; - __ret = _M_dfs(__state._M_next); - _M_cur_results[__state._M_subexpr] = __back; + auto& __res = _M_cur_results[__state._M_subexpr]; + auto __back = __res; + __res.second = _M_current; + __res.matched = true; + _M_dfs<__match_mode>(__state._M_next); + __res = __back; } else - __ret = _M_dfs(__state._M_next); + _M_dfs<__match_mode>(__state._M_next); break; case _S_opcode_line_begin_assertion: - if (this->_M_at_begin()) - __ret = _M_dfs(__state._M_next); + if (_M_at_begin()) + _M_dfs<__match_mode>(__state._M_next); break; case _S_opcode_line_end_assertion: - if (this->_M_at_end()) - __ret = _M_dfs(__state._M_next); + if (_M_at_end()) + _M_dfs<__match_mode>(__state._M_next); break; case _S_opcode_word_boundry: - if (this->_M_word_boundry(__state) == !__state._M_neg) - __ret = _M_dfs(__state._M_next); + if (_M_word_boundry(__state) == !__state._M_neg) + _M_dfs<__match_mode>(__state._M_next); break; // Here __state._M_alt offers a single start node for a sub-NFA. // We recursivly invoke our algorithm to match the sub-NFA. case _S_opcode_subexpr_lookahead: - if (this->_M_lookahead(__state) == !__state._M_neg) - __ret = _M_dfs(__state._M_next); + if (_M_lookahead(__state) == !__state._M_neg) + _M_dfs<__match_mode>(__state._M_next); break; case _S_opcode_match: - if (__current != this->_M_end && __state._M_matches(*__current)) + if (__dfs_mode) { - ++__current; - __ret = _M_dfs(__state._M_next); - --__current; + if (_M_current != _M_end && __state._M_matches(*_M_current)) + { + ++_M_current; + _M_dfs<__match_mode>(__state._M_next); + --_M_current; + } } + else + if (__state._M_matches(*_M_current)) + _M_match_queue->push(make_pair(__state._M_next, _M_cur_results)); break; // First fetch the matched result from _M_cur_results as __submatch; // then compare it with - // (__current, __current + (__submatch.second - __submatch.first)) + // (_M_current, _M_current + (__submatch.second - __submatch.first)) // If matched, keep going; else just return to try another state. case _S_opcode_backref: { + _GLIBCXX_DEBUG_ASSERT(__dfs_mode); auto& __submatch = _M_cur_results[__state._M_backref_index]; if (!__submatch.matched) break; - auto __last = __current; + auto __last = _M_current; for (auto __tmp = __submatch.first; - __last != this->_M_end && __tmp != __submatch.second; + __last != _M_end && __tmp != __submatch.second; ++__tmp) ++__last; - if (this->_M_re._M_traits.transform(__submatch.first, + if (_M_re._M_traits.transform(__submatch.first, __submatch.second) - == this->_M_re._M_traits.transform(__current, __last)) + == _M_re._M_traits.transform(_M_current, __last)) { - if (__last != __current) + if (__last != _M_current) { - auto __backup = __current; - __current = __last; - __ret = _M_dfs(__state._M_next); - __current = __backup; + auto __backup = _M_current; + _M_current = __last; + _M_dfs<__match_mode>(__state._M_next); + _M_current = __backup; } else - __ret = _M_dfs(__state._M_next); + _M_dfs<__match_mode>(__state._M_next); } } break; case _S_opcode_accept: - if (this->_M_match_mode) - __ret = __current == this->_M_end; - else - __ret = true; - if (__current == this->_M_begin - && (this->_M_flags & regex_constants::match_not_null)) - __ret = false; - if (__ret) - this->_M_set_results(_M_cur_results); - break; - default: - _GLIBCXX_DEBUG_ASSERT(false); - } - return __ret; - } - - template<typename _BiIter, typename _Alloc, - typename _CharT, typename _TraitsT> - bool _BFSExecutor<_BiIter, _Alloc, _CharT, _TraitsT>:: - _M_main() - { - _M_e_closure(); - bool __ret = false; - if (!this->_M_match_mode - && !(this->_M_flags & regex_constants::match_not_null)) - __ret = _M_includes_some() || __ret; - while (this->_M_current != this->_M_end) - { - _M_move(); - ++this->_M_current; - if (_M_stack._M_empty()) - break; - _M_e_closure(); - if (!this->_M_match_mode) - // To keep regex_search greedy, no "return true" here. - __ret = _M_includes_some() || __ret; - } - if (this->_M_match_mode) - __ret = _M_includes_some(); - if (__ret) - this->_M_set_results(_M_cur_results->_M_get()); - _M_match_stack._M_clear(); - _GLIBCXX_DEBUG_ASSERT(_M_stack._M_empty()); - return __ret; - } - - template<typename _BiIter, typename _Alloc, - typename _CharT, typename _TraitsT> - void _BFSExecutor<_BiIter, _Alloc, _CharT, _TraitsT>:: - _M_e_closure() - { - auto& __current = this->_M_current; - - while (!_M_stack._M_empty()) - { - auto __u = _M_stack._M_pop(); - _GLIBCXX_DEBUG_ASSERT(_M_covered.count(__u)); - const auto& __state = (*_M_nfa)[__u]; - - // Can be implemented using method, but there will be too many - // arguments. I would use macro function before C++11, but lambda is - // a better choice, since hopefully compiler can inline it. - auto __add_visited_state = [=](_StateIdT __v) - { - if (_M_covered.count(__v) == 0) - { - _M_covered[__v] = - _ResultsPtr(new _ResultsEntry(*_M_covered[__u])); - _M_stack._M_push(__v); - return; - } - auto& __cu = _M_covered[__u]; - auto& __cv = _M_covered[__v]; - if (*__cu < *__cv) - { - __cv = _ResultsPtr(new _ResultsEntry(*__cu)); - // if a state is updated, it's outgoing neighbors should be - // reconsidered too. Push them to the queue. - _M_stack._M_push(__v); - } - }; - - // Identical to DFS's switch part. - switch (__state._M_opcode) + if (__dfs_mode) { - // Needs to maintain quantifier count vector here. A quantifier - // must be concerned with a alt node. - case _S_opcode_alternative: - { - __add_visited_state(__state._M_next); - auto& __cu = *_M_covered[__u]; - auto __back = __cu._M_quant_keys[__state._M_quant_index]; - __cu._M_inc(__state._M_quant_index, __state._M_neg); - __add_visited_state(__state._M_alt); - __cu._M_quant_keys[__state._M_quant_index] = __back; - } - break; - case _S_opcode_subexpr_begin: - { - auto& __sub = (*_M_covered[__u])[__state._M_subexpr]; - if (!__sub.matched || __sub.first != __current) - { - auto __back = __sub.first; - __sub.first = __current; - __add_visited_state(__state._M_next); - __sub.first = __back; - } - } - break; - case _S_opcode_subexpr_end: - { - auto& __cu = *_M_covered[__u]; - auto __back = __cu[__state._M_subexpr]; - __cu[__state._M_subexpr].second = __current; - __cu[__state._M_subexpr].matched = true; - __add_visited_state(__state._M_next); - __cu[__state._M_subexpr] = __back; - } - break; - case _S_opcode_line_begin_assertion: - if (this->_M_at_begin()) - __add_visited_state(__state._M_next); - break; - case _S_opcode_line_end_assertion: - if (this->_M_at_end()) - __add_visited_state(__state._M_next); - break; - case _S_opcode_word_boundry: - if (this->_M_word_boundry(__state) == !__state._M_neg) - __add_visited_state(__state._M_next); - break; - case _S_opcode_subexpr_lookahead: - if (this->_M_lookahead(__state) == !__state._M_neg) - __add_visited_state(__state._M_next); - break; - case _S_opcode_match: - _M_match_stack._M_push(__u); - break; - case _S_opcode_accept: - break; - default: - _GLIBCXX_DEBUG_ASSERT(false); + _GLIBCXX_DEBUG_ASSERT(!_M_has_sol); + if (__match_mode) + _M_has_sol = _M_current == _M_end; + else + _M_has_sol = true; + if (_M_current == _M_begin + && (_M_flags & regex_constants::match_not_null)) + _M_has_sol = false; + if (_M_has_sol) + _M_results = _M_cur_results; } - } - } - - template<typename _BiIter, typename _Alloc, - typename _CharT, typename _TraitsT> - void _BFSExecutor<_BiIter, _Alloc, _CharT, _TraitsT>:: - _M_move() - { - decltype(_M_covered) __next; - while (!_M_match_stack._M_empty()) - { - auto __u = _M_match_stack._M_pop(); - const auto& __state = (*_M_nfa)[__u]; - auto& __cu = _M_covered[__u]; - if (__state._M_matches(*this->_M_current) - && (__next.count(__state._M_next) == 0 - || *__cu < *__next[__state._M_next])) + else { - __next[__state._M_next] = std::move(__cu); - _M_stack._M_push(__state._M_next); + if (_M_current == _M_begin + && (_M_flags & regex_constants::match_not_null)) + break; + if (!__match_mode || _M_current == _M_end) + if (!_M_has_sol) + { + _M_has_sol = true; + _M_results = _M_cur_results; + } } + break; + default: + _GLIBCXX_DEBUG_ASSERT(false); } - _M_covered = move(__next); - } - - template<typename _BiIter, typename _Alloc, - typename _CharT, typename _TraitsT> - bool _BFSExecutor<_BiIter, _Alloc, _CharT, _TraitsT>:: - _M_includes_some() - { - bool __succ = false; - for (auto __u : _M_nfa->_M_final_states()) - if (_M_covered.count(__u)) - { - __succ = true; - auto& __cu = _M_covered[__u]; - if (_M_cur_results == nullptr || *__cu < *_M_cur_results) - _M_cur_results = _ResultsPtr(new _ResultsEntry(*__cu)); - } - return __succ; } // Return whether now is at some word boundry. - template<typename _BiIter, typename _Alloc, - typename _CharT, typename _TraitsT> - bool _Executor<_BiIter, _Alloc, _CharT, _TraitsT>:: + template<typename _BiIter, typename _Alloc, typename _TraitsT, + bool __dfs_mode> + bool _Executor<_BiIter, _Alloc, _TraitsT, __dfs_mode>:: _M_word_boundry(_State<_CharT, _TraitsT> __state) const { // By definition. @@ -376,54 +340,6 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION return __ans; } - template<typename _BiIter, typename _Alloc, - typename _CharT, typename _TraitsT> - void _Executor<_BiIter, _Alloc, _CharT, _TraitsT>:: - _M_set_results(_ResultsVec& __cur_results) - { - for (size_t __i = 0; __i < __cur_results.size(); ++__i) - if (__cur_results[__i].matched) - _M_results[__i] = __cur_results[__i]; - } - - enum class _RegexExecutorPolicy : int - { _S_auto, _S_alternate }; - - // This function decide which executor to use under given circumstances. - // The _S_auto policy now is the following: if a NFA has no back-references - // and has more than _GLIBCXX_REGEX_DFS_QUANTIFIERS_LIMIT quantifiers - // (*, +, ?), the _BFSExecutor will be used, other wise _DFSExecutor. This is - // because _DFSExecutor has a exponential upper bound, but better best-case - // performace. Meanwhile, _BFSExecutor can effectively prevent from - // exponential-long time matching (which must contains many quantifiers), but - // it's slower in average. - // - // For simple regex, _BFSExecutor could be 2 or more times slower than - // _DFSExecutor. - // - // Of course, _BFSExecutor cannot handle back-references. - template<typename _BiIter, typename _Alloc, - typename _CharT, typename _TraitsT, - _RegexExecutorPolicy __policy> - std::unique_ptr<_Executor<_BiIter, _Alloc, _CharT, _TraitsT>> - __get_executor(_BiIter __b, - _BiIter __e, - std::vector<sub_match<_BiIter>, _Alloc>& __m, - const basic_regex<_CharT, _TraitsT>& __re, - regex_constants::match_flag_type __flags) - { - typedef std::unique_ptr<_Executor<_BiIter, _Alloc, _CharT, _TraitsT>> - _ExecutorPtr; - typedef _DFSExecutor<_BiIter, _Alloc, _CharT, _TraitsT> _DFSExecutorT; - typedef _BFSExecutor<_BiIter, _Alloc, _CharT, _TraitsT> _BFSExecutorT; - if (!__re._M_automaton->_M_has_backref - && (__policy == _RegexExecutorPolicy::_S_alternate - || __re._M_automaton->_M_quant_count - > _GLIBCXX_REGEX_DFS_QUANTIFIERS_LIMIT)) - return _ExecutorPtr(new _BFSExecutorT(__b, __e, __m, __re, __flags)); - return _ExecutorPtr(new _DFSExecutorT(__b, __e, __m, __re, __flags)); - } - _GLIBCXX_END_NAMESPACE_VERSION } // namespace __detail } // namespace diff --git a/libstdc++-v3/testsuite/28_regex/algorithms/regex_match/ecma/char/ungreedy.cc b/libstdc++-v3/testsuite/28_regex/algorithms/regex_match/ecma/char/ungreedy.cc new file mode 100644 index 0000000..ed26ebb --- /dev/null +++ b/libstdc++-v3/testsuite/28_regex/algorithms/regex_match/ecma/char/ungreedy.cc @@ -0,0 +1,50 @@ +// { dg-options "-std=gnu++11" } + +// +// 2013-10-24 Tim Shen <timshen91@gmail.com> +// +// Copyright (C) 2013 Free Software Foundation, Inc. +// +// This file is part of the GNU ISO C++ Library. This library is free +// software; you can redistribute it and/or modify it under the +// terms of the GNU General Public License as published by the +// Free Software Foundation; either version 3, or (at your option) +// any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License along +// with this library; see the file COPYING3. If not see +// <http://www.gnu.org/licenses/>. + +// 28.11.2 regex_match +// Tests ECMAScript ungreedy match. + +#include <regex> +#include <testsuite_hooks.h> +#include <testsuite_regex.h> + +using namespace __gnu_test; +using namespace std; + +void +test01() +{ + bool test __attribute__((unused)) = true; + + regex re("(a*?)*?"); + cmatch m; + VERIFY(regex_match("a", m, re)); + VERIFY(m.size() == 2); + VERIFY(string(m[0].first, m[0].second) == "a"); +} + +int +main() +{ + test01(); + return 0; +} diff --git a/libstdc++-v3/testsuite/28_regex/algorithms/regex_match/extended/string_dispatch_01.cc b/libstdc++-v3/testsuite/28_regex/algorithms/regex_match/extended/string_dispatch_01.cc deleted file mode 100644 index 50141f0..0000000 --- a/libstdc++-v3/testsuite/28_regex/algorithms/regex_match/extended/string_dispatch_01.cc +++ /dev/null @@ -1,72 +0,0 @@ -// { dg-options "-std=gnu++11" } - -// -// 2013-07-29 Tim Shen <timshen91@gmail.com> -// -// Copyright (C) 2013 Free Software Foundation, Inc. -// -// This file is part of the GNU ISO C++ Library. This library is free -// software; you can redistribute it and/or modify it under the -// terms of the GNU General Public License as published by the -// Free Software Foundation; either version 3, or (at your option) -// any later version. -// -// This library is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -// GNU General Public License for more details. -// -// You should have received a copy of the GNU General Public License along -// with this library; see the file COPYING3. If not see -// <http://www.gnu.org/licenses/>. - -// 28.11.2 regex_match -// Tests Extended automatic matcher dispatching against a std::string target. - -#include <regex> -#include <testsuite_hooks.h> - -using namespace std; - -template<typename _Bi_iter, typename _Alloc, - typename _Ch_type, typename _Rx_traits> - void - fake_match(_Bi_iter __s, - _Bi_iter __e, - match_results<_Bi_iter, _Alloc>& __m, - const basic_regex<_Ch_type, _Rx_traits>& __re, - regex_constants::match_flag_type __flags - = regex_constants::match_default) - { - using namespace __detail; - auto& __res = (vector<sub_match<_Bi_iter>, _Alloc>&)(__m); - VERIFY( (dynamic_cast - <_DFSExecutor<_Bi_iter, _Alloc, _Ch_type, _Rx_traits>*> - (&*__get_executor<_Bi_iter, _Alloc, _Ch_type, _Rx_traits, - _RegexExecutorPolicy::_S_auto>(__s, __e, __res, __re, __flags)) - != nullptr) ); - } - -void -test01() -{ - bool test __attribute__((unused)) = true; - - regex re("()(one(.*))abc\\1"); // backref cause DFS - const string target("onetwoabc"); - smatch m; - fake_match(target.begin(), target.end(), m, re); - - regex_match(target, m, re); - VERIFY( m[2].matched ); - VERIFY( m[3].matched ); - VERIFY( std::string(m[2].first, m[2].second) == "onetwo" ); - VERIFY( std::string(m[3].first, m[3].second) == "two" ); -} - -int -main() -{ - test01(); - return 0; -} diff --git a/libstdc++-v3/testsuite/28_regex/algorithms/regex_search/ecma/greedy.cc b/libstdc++-v3/testsuite/28_regex/algorithms/regex_search/ecma/greedy.cc index 107ced0..5821bba 100644 --- a/libstdc++-v3/testsuite/28_regex/algorithms/regex_search/ecma/greedy.cc +++ b/libstdc++-v3/testsuite/28_regex/algorithms/regex_search/ecma/greedy.cc @@ -54,9 +54,9 @@ test01() VERIFY(regex_search_debug("aaaa", m, regex("(a+)(a+)"))); TEST(1, "aaa"); TEST(2, "a"); - VERIFY(regex_search_debug("aaaa", m, regex("(a+?)(a+)"))); - TEST(1, "a"); - TEST(2, "aaa"); + VERIFY(regex_search_debug("aaaa", m, regex("(a+)(a+?)"))); + TEST(1, "aaa"); + TEST(2, "a"); VERIFY(regex_search_debug("aaaa", m, regex("(a+?)(a+)"))); TEST(1, "a"); TEST(2, "aaa"); diff --git a/libstdc++-v3/testsuite/performance/28_regex/split.cc b/libstdc++-v3/testsuite/performance/28_regex/split.cc index 5b972e4..e94e031 100644 --- a/libstdc++-v3/testsuite/performance/28_regex/split.cc +++ b/libstdc++-v3/testsuite/performance/28_regex/split.cc @@ -18,82 +18,12 @@ // 2013-10-08 Tim Shen <timshen91@gmail.com> #include <testsuite_performance.h> -#include <regex> +#include "split.h" using namespace __gnu_test; -using namespace std; - -void split(string s) -{ - regex re("\\s+"); - for (auto it = sregex_token_iterator(s.begin(), s.end(), re, -1); - it != sregex_token_iterator(); - ++it) - { - } -} int main() { - string source = "\ -// Copyright (C) 2013 Free Software Foundation, Inc.\n\ -//\n\ -// This file is part of the GNU ISO C++ Library. This library is free\n\ -// software; you can redistribute it and/or modify it under the\n\ -// terms of the GNU General Public License as published by the\n\ -// Free Software Foundation; either version 3, or (at your option)\n\ -// any later version.\n\ -\n\ -// This library is distributed in the hope that it will be useful,\n\ -// but WITHOUT ANY WARRANTY; without even the implied warranty of\n\ -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the\n\ -// GNU General Public License for more details.\n\ -\n\ -// You should have received a copy of the GNU General Public License along\n\ -// with this library; see the file COPYING3. If not see\n\ -// <http://www.gnu.org/licenses/>.\n\ -\n\ -// 2013-10-08 Tim Shen <timshen91@gmail.com>\n\ -\n\ -#include <testsuite_performance.h>\n\ -#include <regex>\n\ -\n\ -using namespace __gnu_test;\n\ -using namespace std;\n\ -\n\ -void split(string s)\n\ -{\n\ - regex re(\"\\s+\");\n\ - for (auto it = sregex_token_iterator(s.begin(), s.end(), re, -1);\n\ - it != sregex_token_iterator();\n\ - ++it)\n\ - {\n\ - }\n\ -}\n\ -\n\ -int main()\n\ -{\n\ - string source = \"\";\n\ - time_counter time;\n\ - resource_counter resource;\n\ -\n\ - source = source + source;\n\ - source = source + source;\n\ - source = source + source;\n\ - source = source + source;\n\ - source = source + source;\n\ - source = source + source;\n\ - source = source + source;\n\ - source = source + source;\n\ -\n\ - start_counters(time, resource);\n\ - split(source);\n\ - stop_counters(time, resource);\n\ - report_performance(__FILE__, \"\", time, resource);\n\ -\n\ - return 0;\n\ -}\n"; - time_counter time; resource_counter resource; diff --git a/libstdc++-v3/testsuite/performance/28_regex/split.h b/libstdc++-v3/testsuite/performance/28_regex/split.h new file mode 100644 index 0000000..d016d92 --- /dev/null +++ b/libstdc++-v3/testsuite/performance/28_regex/split.h @@ -0,0 +1,91 @@ +// Copyright (C) 2013 Free Software Foundation, Inc. +// +// This file is part of the GNU ISO C++ Library. This library is free +// software; you can redistribute it and/or modify it under the +// terms of the GNU General Public License as published by the +// Free Software Foundation; either version 3, or (at your option) +// any later version. + +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. + +// You should have received a copy of the GNU General Public License along +// with this library; see the file COPYING3. If not see +// <http://www.gnu.org/licenses/>. + +// 2013-10-26 Tim Shen <timshen91@gmail.com> + +#include <regex> + +using namespace std; + +void split(string s) +{ + regex re("\\s+"); + for (auto it = sregex_token_iterator(s.begin(), s.end(), re, -1); + it != sregex_token_iterator(); + ++it) + { + } +} + +string source = "\ +// Copyright (C) 2013 Free Software Foundation, Inc.\n\ +//\n\ +// This file is part of the GNU ISO C++ Library. This library is free\n\ +// software; you can redistribute it and/or modify it under the\n\ +// terms of the GNU General Public License as published by the\n\ +// Free Software Foundation; either version 3, or (at your option)\n\ +// any later version.\n\ +\n\ +// This library is distributed in the hope that it will be useful,\n\ +// but WITHOUT ANY WARRANTY; without even the implied warranty of\n\ +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the\n\ +// GNU General Public License for more details.\n\ +\n\ +// You should have received a copy of the GNU General Public License along\n\ +// with this library; see the file COPYING3. If not see\n\ +// <http://www.gnu.org/licenses/>.\n\ +\n\ +// 2013-10-08 Tim Shen <timshen91@gmail.com>\n\ +\n\ +#include <testsuite_performance.h>\n\ +#include <regex>\n\ +\n\ +using namespace __gnu_test;\n\ +using namespace std;\n\ +\n\ +void split(string s)\n\ +{\n\ + regex re(\"\\s+\");\n\ + for (auto it = sregex_token_iterator(s.begin(), s.end(), re, -1);\n\ + it != sregex_token_iterator();\n\ + ++it)\n\ + {\n\ + }\n\ +}\n\ +\n\ +int main()\n\ +{\n\ + string source = \"\";\n\ + time_counter time;\n\ + resource_counter resource;\n\ +\n\ + source = source + source;\n\ + source = source + source;\n\ + source = source + source;\n\ + source = source + source;\n\ + source = source + source;\n\ + source = source + source;\n\ + source = source + source;\n\ + source = source + source;\n\ +\n\ + start_counters(time, resource);\n\ + split(source);\n\ + stop_counters(time, resource);\n\ + report_performance(__FILE__, \"\", time, resource);\n\ +\n\ + return 0;\n\ +}\n"; diff --git a/libstdc++-v3/testsuite/performance/28_regex/split_bfs.cc b/libstdc++-v3/testsuite/performance/28_regex/split_bfs.cc new file mode 100644 index 0000000..de2ef76 --- /dev/null +++ b/libstdc++-v3/testsuite/performance/28_regex/split_bfs.cc @@ -0,0 +1,46 @@ +// Copyright (C) 2013 Free Software Foundation, Inc. +// +// This file is part of the GNU ISO C++ Library. This library is free +// software; you can redistribute it and/or modify it under the +// terms of the GNU General Public License as published by the +// Free Software Foundation; either version 3, or (at your option) +// any later version. + +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. + +// You should have received a copy of the GNU General Public License along +// with this library; see the file COPYING3. If not see +// <http://www.gnu.org/licenses/>. + +// 2013-10-26 Tim Shen <timshen91@gmail.com> + +#include <testsuite_performance.h> +#define _GLIBCXX_REGEX_DFS_QUANTIFIERS_LIMIT 0 +#include "split.h" + +using namespace __gnu_test; + +int main() +{ + time_counter time; + resource_counter resource; + + source = source + source; + source = source + source; + source = source + source; + source = source + source; + source = source + source; + source = source + source; + source = source + source; + source = source + source; + + start_counters(time, resource); + split(source); + stop_counters(time, resource); + report_performance(__FILE__, "", time, resource); + + return 0; +} diff --git a/libstdc++-v3/testsuite/util/testsuite_regex.h b/libstdc++-v3/testsuite/util/testsuite_regex.h index bc0f3d5..596781a 100644 --- a/libstdc++-v3/testsuite/util/testsuite_regex.h +++ b/libstdc++-v3/testsuite/util/testsuite_regex.h @@ -150,7 +150,8 @@ namespace __gnu_test auto __res2 = __regex_algo_impl<_Bi_iter, _Alloc, _Ch_type, _Rx_traits, _RegexExecutorPolicy::_S_alternate, true> (__s, __e, __mm, __re, __flags); - if (__res1 == __res2 && __m == __mm) + // __m is unspecified if return value is false. + if (__res1 == __res2 && (!__res1 || __m == __mm)) return __res1; throw(std::exception()); } |