diff options
author | Tim Shen <timshen91@gmail.com> | 2014-01-16 23:35:21 +0000 |
---|---|---|
committer | Tim Shen <timshen@gcc.gnu.org> | 2014-01-16 23:35:21 +0000 |
commit | ddf41e9db6f43e3fdb0f2717fb47851a2a64d5bc (patch) | |
tree | d4af0e8eec13c68aaeba7d1ccacd8f8508d5064f /libstdc++-v3 | |
parent | 9e6f9ad62c66546ac8b475521e840c9661b6fa78 (diff) | |
download | gcc-ddf41e9db6f43e3fdb0f2717fb47851a2a64d5bc.zip gcc-ddf41e9db6f43e3fdb0f2717fb47851a2a64d5bc.tar.gz gcc-ddf41e9db6f43e3fdb0f2717fb47851a2a64d5bc.tar.bz2 |
regex_automaton.tcc (_StateSeq<>::_M_clone()): Do not use std::map.
2014-01-17 Tim Shen <timshen91@gmail.com>
* include/bits/regex_automaton.tcc (_StateSeq<>::_M_clone()): Do not
use std::map.
* include/bits/regex_automaton.h: Do not use std::set.
* include/bits/regex_compiler.h (_BracketMatcher<>::_M_add_char(),
_BracketMatcher<>::_M_add_collating_element(),
_BracketMatcher<>::_M_add_equivalence_class(),
_BracketMatcher<>::_M_make_range()): Likewise.
* include/bits/regex_compiler.tcc (_BracketMatcher<>::_M_apply()):
Likewise.
* include/bits/regex_executor.h: Do not use std::queue.
* include/bits/regex_executor.tcc (_Executor<>::_M_main(),
_Executor<>::_M_dfs()): Likewise.
* include/std/regex: Remove <map>, <set> and <queue>.
2014-01-17 Tim Shen <timshen91@gmail.com>
* include/bits/regex.h (__compile_nfa<>(), basic_regex<>::basic_regex(),
basic_regex<>::assign()): Change __compile_nfa to accept
const _CharT* only.
* include/bits/regex_compiler.h: Change _Compiler's template
argument from <_FwdIter, _TraitsT> to <_TraitsT>.
* include/bits/regex_compiler.tcc: Likewise.
2014-01-17 Tim Shen <timshen91@gmail.com>
* include/bits/regex_compiler.h: Change _ScannerT into char-type
templated.
* include/bits/regex_scanner.h (_Scanner<>::_Scanner()): Separate
_ScannerBase from _Scanner; Change _Scanner's template argument from
_FwdIter to _CharT. Avoid use of std::map and std::set by using arrays
instead.
* include/bits/regex_scanner.tcc (_Scanner<>::_Scanner(),
_Scanner<>::_M_scan_normal(), _Scanner<>::_M_eat_escape_ecma(),
_Scanner<>::_M_eat_escape_posix(), _Scanner<>::_M_eat_escape_awk()):
Likewise.
* include/std/regex: Add <cstring> for using strchr.
2014-01-17 Tim Shen <timshen91@gmail.com>
* bits/regex_automaton.tcc: Indentation fix.
* bits/regex_compiler.h (__compile_nfa<>(), _Compiler<>,
_RegexTranslator<> _AnyMatcher<>, _CharMatcher<>,
_BracketMatcher<>): Add bool option template parameters and
specializations to make matching more efficient and space saving.
* bits/regex_compiler.tcc: Likewise.
From-SVN: r206690
Diffstat (limited to 'libstdc++-v3')
-rw-r--r-- | libstdc++-v3/ChangeLog | 48 | ||||
-rw-r--r-- | libstdc++-v3/include/bits/regex.h | 59 | ||||
-rw-r--r-- | libstdc++-v3/include/bits/regex_automaton.h | 7 | ||||
-rw-r--r-- | libstdc++-v3/include/bits/regex_automaton.tcc | 27 | ||||
-rw-r--r-- | libstdc++-v3/include/bits/regex_compiler.h | 308 | ||||
-rw-r--r-- | libstdc++-v3/include/bits/regex_compiler.tcc | 189 | ||||
-rw-r--r-- | libstdc++-v3/include/bits/regex_executor.h | 26 | ||||
-rw-r--r-- | libstdc++-v3/include/bits/regex_executor.tcc | 9 | ||||
-rw-r--r-- | libstdc++-v3/include/bits/regex_scanner.h | 240 | ||||
-rw-r--r-- | libstdc++-v3/include/bits/regex_scanner.tcc | 163 | ||||
-rw-r--r-- | libstdc++-v3/include/std/regex | 4 |
11 files changed, 593 insertions, 487 deletions
diff --git a/libstdc++-v3/ChangeLog b/libstdc++-v3/ChangeLog index 18b44da..73b48b2 100644 --- a/libstdc++-v3/ChangeLog +++ b/libstdc++-v3/ChangeLog @@ -1,3 +1,51 @@ +2014-01-17 Tim Shen <timshen91@gmail.com> + + * include/bits/regex_automaton.tcc (_StateSeq<>::_M_clone()): Do not + use std::map. + * include/bits/regex_automaton.h: Do not use std::set. + * include/bits/regex_compiler.h (_BracketMatcher<>::_M_add_char(), + _BracketMatcher<>::_M_add_collating_element(), + _BracketMatcher<>::_M_add_equivalence_class(), + _BracketMatcher<>::_M_make_range()): Likewise. + * include/bits/regex_compiler.tcc (_BracketMatcher<>::_M_apply()): + Likewise. + * include/bits/regex_executor.h: Do not use std::queue. + * include/bits/regex_executor.tcc (_Executor<>::_M_main(), + _Executor<>::_M_dfs()): Likewise. + * include/std/regex: Remove <map>, <set> and <queue>. + +2014-01-17 Tim Shen <timshen91@gmail.com> + + * include/bits/regex.h (__compile_nfa<>(), basic_regex<>::basic_regex(), + basic_regex<>::assign()): Change __compile_nfa to accept + const _CharT* only. + * include/bits/regex_compiler.h: Change _Compiler's template + argument from <_FwdIter, _TraitsT> to <_TraitsT>. + * include/bits/regex_compiler.tcc: Likewise. + +2014-01-17 Tim Shen <timshen91@gmail.com> + + * include/bits/regex_compiler.h: Change _ScannerT into char-type + templated. + * include/bits/regex_scanner.h (_Scanner<>::_Scanner()): Separate + _ScannerBase from _Scanner; Change _Scanner's template argument from + _FwdIter to _CharT. Avoid use of std::map and std::set by using arrays + instead. + * include/bits/regex_scanner.tcc (_Scanner<>::_Scanner(), + _Scanner<>::_M_scan_normal(), _Scanner<>::_M_eat_escape_ecma(), + _Scanner<>::_M_eat_escape_posix(), _Scanner<>::_M_eat_escape_awk()): + Likewise. + * include/std/regex: Add <cstring> for using strchr. + +2014-01-17 Tim Shen <timshen91@gmail.com> + + * bits/regex_automaton.tcc: Indentation fix. + * bits/regex_compiler.h (__compile_nfa<>(), _Compiler<>, + _RegexTranslator<> _AnyMatcher<>, _CharMatcher<>, + _BracketMatcher<>): Add bool option template parameters and + specializations to make matching more efficient and space saving. + * bits/regex_compiler.tcc: Likewise. + 2014-01-15 François Dumont <fdumont@gcc.gnu.org> PR libstdc++/59712 diff --git a/libstdc++-v3/include/bits/regex.h b/libstdc++-v3/include/bits/regex.h index ae8e1f5..816f5cf 100644 --- a/libstdc++-v3/include/bits/regex.h +++ b/libstdc++-v3/include/bits/regex.h @@ -60,51 +60,11 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION template<typename, typename, typename, bool> class _Executor; - template<typename _Tp> - struct __has_contiguous_iter : std::false_type { }; - - template<typename _Ch, typename _Tr, typename _Alloc> - struct __has_contiguous_iter<std::basic_string<_Ch, _Tr, _Alloc>> - : std::true_type // string<Ch> storage is contiguous - { }; - - template<typename _Tp, typename _Alloc> - struct __has_contiguous_iter<std::vector<_Tp, _Alloc>> - : std::true_type // vector<Tp> storage is contiguous - { }; - - template<typename _Alloc> - struct __has_contiguous_iter<std::vector<bool, _Alloc>> - : std::false_type // vector<bool> storage is not contiguous - { }; - - template<typename _Tp> - struct __is_contiguous_normal_iter : std::false_type { }; - - template<typename _Tp, typename _Cont> - struct - __is_contiguous_normal_iter<__gnu_cxx::__normal_iterator<_Tp, _Cont>> - : __has_contiguous_iter<_Cont>::type - { }; - - template<typename _Iter, typename _TraitsT> - using __enable_if_contiguous_normal_iter - = typename enable_if< __is_contiguous_normal_iter<_Iter>::value, - std::shared_ptr<_NFA<_TraitsT>> >::type; - - template<typename _Iter, typename _TraitsT> - using __disable_if_contiguous_normal_iter - = typename enable_if< !__is_contiguous_normal_iter<_Iter>::value, - std::shared_ptr<_NFA<_TraitsT>> >::type; - - template<typename _FwdIter, typename _TraitsT> - __disable_if_contiguous_normal_iter<_FwdIter, _TraitsT> - __compile_nfa(_FwdIter __first, _FwdIter __last, const _TraitsT& __traits, - regex_constants::syntax_option_type __flags); - - template<typename _Iter, typename _TraitsT> - __enable_if_contiguous_normal_iter<_Iter, _TraitsT> - __compile_nfa(_Iter __first, _Iter __last, const _TraitsT& __traits, + template<typename _TraitsT> + inline std::shared_ptr<_NFA<_TraitsT>> + __compile_nfa(const typename _TraitsT::char_type* __first, + const typename _TraitsT::char_type* __last, + const _TraitsT& __traits, regex_constants::syntax_option_type __flags); _GLIBCXX_END_NAMESPACE_VERSION @@ -561,7 +521,10 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION flag_type __f = ECMAScript) : _M_flags(__f), _M_original_str(__first, __last), - _M_automaton(__detail::__compile_nfa(__first, __last, _M_traits, + _M_automaton(__detail::__compile_nfa(_M_original_str.c_str(), + _M_original_str.c_str() + + _M_original_str.size(), + _M_traits, _M_flags)) { } @@ -698,7 +661,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION { _M_flags = __flags; _M_original_str.assign(__s.begin(), __s.end()); - _M_automaton = __detail::__compile_nfa(__s.begin(), __s.end(), + auto __p = _M_original_str.c_str(); + _M_automaton = __detail::__compile_nfa(__p, + __p + _M_original_str.size(), _M_traits, _M_flags); return *this; } diff --git a/libstdc++-v3/include/bits/regex_automaton.h b/libstdc++-v3/include/bits/regex_automaton.h index ea73675..a442cfe 100644 --- a/libstdc++-v3/include/bits/regex_automaton.h +++ b/libstdc++-v3/include/bits/regex_automaton.h @@ -41,7 +41,6 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION */ typedef long _StateIdT; - typedef std::set<_StateIdT> _StateSet; static const _StateIdT _S_invalid_state_id = -1; template<typename _CharT> @@ -138,16 +137,11 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION _M_start() const { return _M_start_state; } - const _StateSet& - _M_final_states() const - { return _M_accepting_states; } - _SizeT _M_sub_count() const { return _M_subexpr_count; } std::vector<size_t> _M_paren_stack; - _StateSet _M_accepting_states; _FlagT _M_flags; _StateIdT _M_start_state; _SizeT _M_subexpr_count; @@ -172,7 +166,6 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION _M_insert_accept() { auto __ret = _M_insert_state(_StateT(_S_opcode_accept)); - this->_M_accepting_states.insert(__ret); return __ret; } diff --git a/libstdc++-v3/include/bits/regex_automaton.tcc b/libstdc++-v3/include/bits/regex_automaton.tcc index 7edc67f..759b053 100644 --- a/libstdc++-v3/include/bits/regex_automaton.tcc +++ b/libstdc++-v3/include/bits/regex_automaton.tcc @@ -134,9 +134,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION _NFA<_TraitsT>::_M_dot(std::ostream& __ostr) const { __ostr << "digraph _Nfa {\n" - " rankdir=LR;\n"; + " rankdir=LR;\n"; for (size_t __i = 0; __i < this->size(); ++__i) - (*this)[__i]._M_dot(__ostr, __i); + (*this)[__i]._M_dot(__ostr, __i); __ostr << "}\n"; return __ostr; } @@ -186,7 +186,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION _StateSeq<_TraitsT> _StateSeq<_TraitsT>::_M_clone() { - std::map<_StateIdT, _StateIdT> __m; + std::vector<_StateIdT> __m(_M_nfa.size(), -1); std::stack<_StateIdT> __stack; __stack.push(_M_start); while (!__stack.empty()) @@ -194,30 +194,35 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION auto __u = __stack.top(); __stack.pop(); auto __dup = _M_nfa[__u]; + // _M_insert_state() never return -1 auto __id = _M_nfa._M_insert_state(__dup); __m[__u] = __id; if (__u == _M_end) continue; - if (__m.count(__dup._M_next) == 0) + if (__dup._M_next != _S_invalid_state_id && __m[__dup._M_next] == -1) __stack.push(__dup._M_next); if (__dup._M_opcode == _S_opcode_alternative || __dup._M_opcode == _S_opcode_subexpr_lookahead) - if (__m.count(__dup._M_alt) == 0) + if (__dup._M_alt != _S_invalid_state_id && __m[__dup._M_alt] == -1) __stack.push(__dup._M_alt); } - for (auto __it : __m) + long __size = static_cast<long>(__m.size()); + for (long __k = 0; __k < __size; __k++) { - auto& __ref = _M_nfa[__it.second]; - if (__ref._M_next != -1) + long __v; + if ((__v = __m[__k]) == -1) + continue; + auto& __ref = _M_nfa[__v]; + if (__ref._M_next != _S_invalid_state_id) { - _GLIBCXX_DEBUG_ASSERT(__m.count(__ref._M_next)); + _GLIBCXX_DEBUG_ASSERT(__m[__ref._M_next] != -1); __ref._M_next = __m[__ref._M_next]; } if (__ref._M_opcode == _S_opcode_alternative || __ref._M_opcode == _S_opcode_subexpr_lookahead) - if (__ref._M_alt != -1) + if (__ref._M_alt != _S_invalid_state_id) { - _GLIBCXX_DEBUG_ASSERT(__m.count(__ref._M_alt)); + _GLIBCXX_DEBUG_ASSERT(__m[__ref._M_alt] != -1); __ref._M_alt = __m[__ref._M_alt]; } } diff --git a/libstdc++-v3/include/bits/regex_compiler.h b/libstdc++-v3/include/bits/regex_compiler.h index 4ac67df..216f8fb 100644 --- a/libstdc++-v3/include/bits/regex_compiler.h +++ b/libstdc++-v3/include/bits/regex_compiler.h @@ -39,19 +39,20 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION * @{ */ - template<typename _TraitsT> + template<typename, bool, bool> struct _BracketMatcher; /// Builds an NFA from an input iterator interval. - template<typename _FwdIter, typename _TraitsT> + template<typename _TraitsT> class _Compiler { public: - typedef typename _TraitsT::string_type _StringT; + typedef typename _TraitsT::char_type _CharT; + typedef const _CharT* _IterT; typedef _NFA<_TraitsT> _RegexT; typedef regex_constants::syntax_option_type _FlagT; - _Compiler(_FwdIter __b, _FwdIter __e, + _Compiler(_IterT __b, _IterT __e, const _TraitsT& __traits, _FlagT __flags); std::shared_ptr<_RegexT> @@ -59,12 +60,12 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION { return make_shared<_RegexT>(std::move(_M_nfa)); } private: - typedef _Scanner<_FwdIter> _ScannerT; - typedef typename _ScannerT::_TokenT _TokenT; - typedef _StateSeq<_TraitsT> _StateSeqT; - typedef std::stack<_StateSeqT, std::vector<_StateSeqT>> _StackT; - typedef _BracketMatcher<_TraitsT> _BMatcherT; - typedef std::ctype<typename _TraitsT::char_type> _CtypeT; + typedef _Scanner<_CharT> _ScannerT; + typedef typename _TraitsT::string_type _StringT; + typedef typename _ScannerT::_TokenT _TokenT; + typedef _StateSeq<_TraitsT> _StateSeqT; + typedef std::stack<_StateSeqT> _StackT; + typedef std::ctype<_CharT> _CtypeT; // accepts a specific token or returns false. bool @@ -91,20 +92,30 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION bool _M_bracket_expression(); - void - _M_expression_term(_BMatcherT& __matcher); + template<bool __icase, bool __collate> + void + _M_insert_any_matcher_ecma(); - bool - _M_range_expression(_BMatcherT& __matcher); + template<bool __icase, bool __collate> + void + _M_insert_any_matcher_posix(); - bool - _M_collating_symbol(_BMatcherT& __matcher); + template<bool __icase, bool __collate> + void + _M_insert_char_matcher(); - bool - _M_equivalence_class(_BMatcherT& __matcher); + template<bool __icase, bool __collate> + void + _M_insert_character_class_matcher(); - bool - _M_character_class(_BMatcherT& __matcher); + template<bool __icase, bool __collate> + void + _M_insert_bracket_matcher(bool __neg); + + template<bool __icase, bool __collate> + void + _M_expression_term(_BracketMatcher<_TraitsT, __icase, __collate>& + __matcher); int _M_cur_int_value(int __radix); @@ -129,33 +140,119 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION _StackT _M_stack; }; - template<typename _FwdIter, typename _TraitsT> - inline __disable_if_contiguous_normal_iter<_FwdIter, _TraitsT> - __compile_nfa(_FwdIter __first, _FwdIter __last, const _TraitsT& __traits, + template<typename _TraitsT> + inline std::shared_ptr<_NFA<_TraitsT>> + __compile_nfa(const typename _TraitsT::char_type* __first, + const typename _TraitsT::char_type* __last, + const _TraitsT& __traits, regex_constants::syntax_option_type __flags) { - using _Cmplr = _Compiler<_FwdIter, _TraitsT>; + using _Cmplr = _Compiler<_TraitsT>; return _Cmplr(__first, __last, __traits, __flags)._M_get_nfa(); } - template<typename _Iter, typename _TraitsT> - inline __enable_if_contiguous_normal_iter<_Iter, _TraitsT> - __compile_nfa(_Iter __first, _Iter __last, const _TraitsT& __traits, - regex_constants::syntax_option_type __flags) + // [28.13.14] + template<typename _TraitsT, bool __icase, bool __collate> + class _RegexTranslator { - size_t __len = __last - __first; - const auto* __cfirst = __len ? std::__addressof(*__first) : nullptr; - return __compile_nfa(__cfirst, __cfirst + __len, __traits, __flags); - } + public: + typedef typename _TraitsT::char_type _CharT; + typedef typename _TraitsT::string_type _StringT; + typedef typename std::conditional<__collate, + _StringT, + _CharT>::type _StrTransT; + + explicit + _RegexTranslator(const _TraitsT& __traits) + : _M_traits(__traits) + { } + + _CharT + _M_translate(_CharT __ch) const + { + if (__icase) + return _M_traits.translate_nocase(__ch); + else if (__collate) + return _M_traits.translate(__ch); + else + return __ch; + } + + _StrTransT + _M_transform(_CharT __ch) const + { + return _M_transform_impl(__ch, typename integral_constant<bool, + __collate>::type()); + } + + private: + _StrTransT + _M_transform_impl(_CharT __ch, false_type) const + { return __ch; } + + _StrTransT + _M_transform_impl(_CharT __ch, true_type) const + { + _StrTransT __str = _StrTransT(1, _M_translate(__ch)); + return _M_traits.transform(__str.begin(), __str.end()); + } - template<typename _TraitsT, bool __is_ecma> - struct _AnyMatcher + const _TraitsT& _M_traits; + }; + + template<typename _TraitsT> + class _RegexTranslator<_TraitsT, false, false> { - typedef typename _TraitsT::char_type _CharT; + public: + typedef typename _TraitsT::char_type _CharT; + typedef _CharT _StrTransT; + + explicit + _RegexTranslator(const _TraitsT& __traits) + { } + + _CharT + _M_translate(_CharT __ch) const + { return __ch; } + + _StrTransT + _M_transform(_CharT __ch) const + { return __ch; } + }; + + template<typename _TraitsT, bool __is_ecma, bool __icase, bool __collate> + struct _AnyMatcher; + + template<typename _TraitsT, bool __icase, bool __collate> + struct _AnyMatcher<_TraitsT, false, __icase, __collate> + { + typedef _RegexTranslator<_TraitsT, __icase, __collate> _TransT; + typedef typename _TransT::_CharT _CharT; explicit _AnyMatcher(const _TraitsT& __traits) - : _M_traits(__traits) + : _M_translator(__traits) + { } + + bool + operator()(_CharT __ch) const + { + static auto __nul = _M_translator._M_translate('\0'); + return _M_translator._M_translate(__ch) != __nul; + } + + _TransT _M_translator; + }; + + template<typename _TraitsT, bool __icase, bool __collate> + struct _AnyMatcher<_TraitsT, true, __icase, __collate> + { + typedef _RegexTranslator<_TraitsT, __icase, __collate> _TransT; + typedef typename _TransT::_CharT _CharT; + + explicit + _AnyMatcher(const _TraitsT& __traits) + : _M_translator(__traits) { } bool @@ -165,92 +262,63 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION bool _M_apply(_CharT __ch, true_type) const { - auto __c = _M_traits.translate(__ch); - if (__is_ecma) - { - static auto __n = _M_traits.translate('\n'); - static auto __r = _M_traits.translate('\r'); - return __c != __n && __c != __r; - } - else - { - static auto __nul = _M_traits.translate('\0'); - return __c != __nul; - } + auto __c = _M_translator._M_translate(__ch); + auto __n = _M_translator._M_translate('\n'); + auto __r = _M_translator._M_translate('\r'); + return __c != __n && __c != __r; } bool _M_apply(_CharT __ch, false_type) const { - auto __c = _M_traits.translate(__ch); - if (__is_ecma) - { - static auto __n = _M_traits.translate('\n'); - static auto __r = _M_traits.translate('\r'); - static auto __u2028 = _M_traits.translate(u'\u2028'); - static auto __u2029 = _M_traits.translate(u'\u2029'); - return __c != __n && __c != __r && __c != __u2028 - && __c != __u2029; - } - else - { - static auto __nul = _M_traits.translate('\0'); - return __c != __nul; - } + auto __c = _M_translator._M_translate(__ch); + auto __n = _M_translator._M_translate('\n'); + auto __r = _M_translator._M_translate('\r'); + auto __u2028 = _M_translator._M_translate(u'\u2028'); + auto __u2029 = _M_translator._M_translate(u'\u2029'); + return __c != __n && __c != __r && __c != __u2028 && __c != __u2029; } - const _TraitsT& _M_traits; + _TransT _M_translator; }; - template<typename _TraitsT, bool __icase> + template<typename _TraitsT, bool __icase, bool __collate> struct _CharMatcher { - typedef typename _TraitsT::char_type _CharT; + typedef _RegexTranslator<_TraitsT, __icase, __collate> _TransT; + typedef typename _TransT::_CharT _CharT; _CharMatcher(_CharT __ch, const _TraitsT& __traits) - : _M_traits(__traits), _M_ch(_M_translate(__ch)) + : _M_translator(__traits), _M_ch(_M_translator._M_translate(__ch)) { } bool operator()(_CharT __ch) const - { return _M_ch == _M_translate(__ch); } + { return _M_ch == _M_translator._M_translate(__ch); } - _CharT - _M_translate(_CharT __ch) const - { - if (__icase) - return _M_traits.translate_nocase(__ch); - else - return _M_traits.translate(__ch); - } - - const _TraitsT& _M_traits; - _CharT _M_ch; + _TransT _M_translator; + _CharT _M_ch; }; /// Matches a character range (bracket expression) - // TODO: Convert used _M_flags fields to template parameters, including - // collate and icase. Avoid using std::set, could use flat_set - // (sorted vector and binary search) instead. - template<typename _TraitsT> + template<typename _TraitsT, bool __icase, bool __collate> struct _BracketMatcher { public: - typedef typename _TraitsT::char_type _CharT; - typedef typename _TraitsT::char_class_type _CharClassT; - typedef typename _TraitsT::string_type _StringT; - typedef regex_constants::syntax_option_type _FlagT; + typedef _RegexTranslator<_TraitsT, __icase, __collate> _TransT; + typedef typename _TransT::_CharT _CharT; + typedef typename _TransT::_StrTransT _StrTransT; + typedef typename _TraitsT::string_type _StringT; + typedef typename _TraitsT::char_class_type _CharClassT; public: _BracketMatcher(bool __is_non_matching, - const _TraitsT& __traits, - _FlagT __flags) - : + const _TraitsT& __traits) + : _M_class_set(0), _M_translator(__traits), _M_traits(__traits), + _M_is_non_matching(__is_non_matching) #ifdef _GLIBCXX_DEBUG - _M_is_ready(false), + , _M_is_ready(false) #endif - _M_traits(__traits), _M_class_set(0), _M_flags(__flags), - _M_is_non_matching(__is_non_matching) { } bool @@ -263,7 +331,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION void _M_add_char(_CharT __c) { - _M_char_set.insert(_M_translate(__c)); + _M_char_set.push_back(_M_translator._M_translate(__c)); #ifdef _GLIBCXX_DEBUG _M_is_ready = false; #endif @@ -276,7 +344,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION __s.data() + __s.size()); if (__st.empty()) __throw_regex_error(regex_constants::error_collate); - _M_char_set.insert(_M_translate(__st[0])); + _M_char_set.push_back(_M_translator._M_translate(__st[0])); #ifdef _GLIBCXX_DEBUG _M_is_ready = false; #endif @@ -291,7 +359,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION __throw_regex_error(regex_constants::error_collate); __st = _M_traits.transform_primary(__st.data(), __st.data() + __st.size()); - _M_equiv_set.insert(__st); + _M_equiv_set.push_back(__st); #ifdef _GLIBCXX_DEBUG _M_is_ready = false; #endif @@ -302,7 +370,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION { auto __mask = _M_traits.lookup_classname(__s.data(), __s.data() + __s.size(), - _M_is_icase()); + __icase); if (__mask == 0) __throw_regex_error(regex_constants::error_ctype); _M_class_set |= __mask; @@ -314,12 +382,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION void _M_make_range(_CharT __l, _CharT __r) { - if (_M_flags & regex_constants::collate) - _M_range_set.insert( - make_pair(_M_get_str(_M_translate(__l)), - _M_get_str(_M_translate(__r)))); - else - _M_range_set.insert(make_pair(_M_get_str(__l), _M_get_str(__r))); + _M_range_set.push_back(make_pair(_M_translator._M_transform(__l), + _M_translator._M_transform(__r))); #ifdef _GLIBCXX_DEBUG _M_is_ready = false; #endif @@ -350,26 +414,6 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION _M_apply(_CharT __ch, true_type) const { return _M_cache[static_cast<_UnsignedCharT>(__ch)]; } - _CharT - _M_translate(_CharT __c) const - { - if (_M_is_icase()) - return _M_traits.translate_nocase(__c); - else - return _M_traits.translate(__c); - } - - bool - _M_is_icase() const - { return _M_flags & regex_constants::icase; } - - _StringT - _M_get_str(_CharT __c) const - { - _StringT __s(1, __c); - return _M_traits.transform(__s.begin(), __s.end()); - } - void _M_make_cache(true_type) { @@ -383,16 +427,16 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION { } private: - _CacheT _M_cache; - std::set<_CharT> _M_char_set; - std::set<_StringT> _M_equiv_set; - std::set<pair<_StringT, _StringT>> _M_range_set; - const _TraitsT& _M_traits; - _CharClassT _M_class_set; - _FlagT _M_flags; - bool _M_is_non_matching; + _CacheT _M_cache; + std::vector<_CharT> _M_char_set; + std::vector<_StringT> _M_equiv_set; + std::vector<pair<_StrTransT, _StrTransT>> _M_range_set; + _CharClassT _M_class_set; + _TransT _M_translator; + const _TraitsT& _M_traits; + bool _M_is_non_matching; #ifdef _GLIBCXX_DEBUG - bool _M_is_ready; + bool _M_is_ready; #endif }; diff --git a/libstdc++-v3/include/bits/regex_compiler.tcc b/libstdc++-v3/include/bits/regex_compiler.tcc index 4da653f..621e43f 100644 --- a/libstdc++-v3/include/bits/regex_compiler.tcc +++ b/libstdc++-v3/include/bits/regex_compiler.tcc @@ -59,9 +59,9 @@ namespace __detail { _GLIBCXX_BEGIN_NAMESPACE_VERSION - template<typename _FwdIter, typename _TraitsT> - _Compiler<_FwdIter, _TraitsT>:: - _Compiler(_FwdIter __b, _FwdIter __e, + template<typename _TraitsT> + _Compiler<_TraitsT>:: + _Compiler(_IterT __b, _IterT __e, const _TraitsT& __traits, _FlagT __flags) : _M_flags((__flags & (regex_constants::ECMAScript @@ -89,9 +89,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION _M_nfa._M_eliminate_dummy(); } - template<typename _FwdIter, typename _TraitsT> + template<typename _TraitsT> void - _Compiler<_FwdIter, _TraitsT>:: + _Compiler<_TraitsT>:: _M_disjunction() { this->_M_alternative(); @@ -110,9 +110,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION } } - template<typename _FwdIter, typename _TraitsT> + template<typename _TraitsT> void - _Compiler<_FwdIter, _TraitsT>:: + _Compiler<_TraitsT>:: _M_alternative() { if (this->_M_term()) @@ -126,9 +126,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION _M_stack.push(_StateSeqT(_M_nfa, _M_nfa._M_insert_dummy())); } - template<typename _FwdIter, typename _TraitsT> + template<typename _TraitsT> bool - _Compiler<_FwdIter, _TraitsT>:: + _Compiler<_TraitsT>:: _M_term() { if (this->_M_assertion()) @@ -141,9 +141,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION return false; } - template<typename _FwdIter, typename _TraitsT> + template<typename _TraitsT> bool - _Compiler<_FwdIter, _TraitsT>:: + _Compiler<_TraitsT>:: _M_assertion() { if (_M_match_token(_ScannerT::_S_token_line_begin)) @@ -172,9 +172,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION return true; } - template<typename _FwdIter, typename _TraitsT> + template<typename _TraitsT> void - _Compiler<_FwdIter, _TraitsT>:: + _Compiler<_TraitsT>:: _M_quantifier() { bool __neg = (_M_flags & regex_constants::ECMAScript); @@ -278,52 +278,39 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION } } - template<typename _FwdIter, typename _TraitsT> +#define __INSERT_REGEX_MATCHER(__func, args...)\ + do\ + if (!(_M_flags & regex_constants::icase))\ + if (!(_M_flags & regex_constants::collate))\ + __func<false, false>(args);\ + else\ + __func<false, true>(args);\ + else\ + if (!(_M_flags & regex_constants::collate))\ + __func<true, false>(args);\ + else\ + __func<true, true>(args);\ + while (false) + + template<typename _TraitsT> bool - _Compiler<_FwdIter, _TraitsT>:: + _Compiler<_TraitsT>:: _M_atom() { if (_M_match_token(_ScannerT::_S_token_anychar)) { - if (_M_flags & regex_constants::ECMAScript) - _M_stack.push(_StateSeqT(_M_nfa, - _M_nfa._M_insert_matcher - (_AnyMatcher<_TraitsT, - true>(_M_traits)))); + if (!(_M_flags & regex_constants::ECMAScript)) + __INSERT_REGEX_MATCHER(_M_insert_any_matcher_posix); else - _M_stack.push(_StateSeqT(_M_nfa, - _M_nfa._M_insert_matcher - (_AnyMatcher<_TraitsT, - false>(_M_traits)))); + __INSERT_REGEX_MATCHER(_M_insert_any_matcher_ecma); } else if (_M_try_char()) - { - if (_M_flags & regex_constants::icase) - _M_stack.push(_StateSeqT(_M_nfa, - _M_nfa._M_insert_matcher - (_CharMatcher<_TraitsT, - true>(_M_value[0], - _M_traits)))); - else - _M_stack.push(_StateSeqT(_M_nfa, - _M_nfa._M_insert_matcher - (_CharMatcher<_TraitsT, - false>(_M_value[0], - _M_traits)))); - } + __INSERT_REGEX_MATCHER(_M_insert_char_matcher); else if (_M_match_token(_ScannerT::_S_token_backref)) _M_stack.push(_StateSeqT(_M_nfa, _M_nfa. _M_insert_backref(_M_cur_int_value(10)))); else if (_M_match_token(_ScannerT::_S_token_quoted_class)) - { - _GLIBCXX_DEBUG_ASSERT(_M_value.size() == 1); - _BMatcherT __matcher(_M_ctype.is(_CtypeT::upper, _M_value[0]), - _M_traits, _M_flags); - __matcher._M_add_character_class(_M_value); - __matcher._M_ready(); - _M_stack.push(_StateSeqT(_M_nfa, - _M_nfa._M_insert_matcher(std::move(__matcher)))); - } + __INSERT_REGEX_MATCHER(_M_insert_character_class_matcher); else if (_M_match_token(_ScannerT::_S_token_subexpr_no_group_begin)) { _StateSeqT __r(_M_nfa, _M_nfa._M_insert_dummy()); @@ -348,28 +335,90 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION return true; } - template<typename _FwdIter, typename _TraitsT> + template<typename _TraitsT> bool - _Compiler<_FwdIter, _TraitsT>:: + _Compiler<_TraitsT>:: _M_bracket_expression() { bool __neg = _M_match_token(_ScannerT::_S_token_bracket_neg_begin); if (!(__neg || _M_match_token(_ScannerT::_S_token_bracket_begin))) return false; - _BMatcherT __matcher(__neg, _M_traits, _M_flags); + __INSERT_REGEX_MATCHER(_M_insert_bracket_matcher, __neg); + return true; + } +#undef __INSERT_REGEX_MATCHER + + template<typename _TraitsT> + template<bool __icase, bool __collate> + void + _Compiler<_TraitsT>:: + _M_insert_any_matcher_ecma() + { + _M_stack.push(_StateSeqT(_M_nfa, + _M_nfa._M_insert_matcher + (_AnyMatcher<_TraitsT, true, __icase, __collate> + (_M_traits)))); + } + + template<typename _TraitsT> + template<bool __icase, bool __collate> + void + _Compiler<_TraitsT>:: + _M_insert_any_matcher_posix() + { + _M_stack.push(_StateSeqT(_M_nfa, + _M_nfa._M_insert_matcher + (_AnyMatcher<_TraitsT, false, __icase, __collate> + (_M_traits)))); + } + + template<typename _TraitsT> + template<bool __icase, bool __collate> + void + _Compiler<_TraitsT>:: + _M_insert_char_matcher() + { + _M_stack.push(_StateSeqT(_M_nfa, + _M_nfa._M_insert_matcher + (_CharMatcher<_TraitsT, __icase, __collate> + (_M_value[0], _M_traits)))); + } + + template<typename _TraitsT> + template<bool __icase, bool __collate> + void + _Compiler<_TraitsT>:: + _M_insert_character_class_matcher() + { + _GLIBCXX_DEBUG_ASSERT(_M_value.size() == 1); + _BracketMatcher<_TraitsT, __icase, __collate> __matcher + (_M_ctype.is(_CtypeT::upper, _M_value[0]), _M_traits); + __matcher._M_add_character_class(_M_value); + __matcher._M_ready(); + _M_stack.push(_StateSeqT(_M_nfa, + _M_nfa._M_insert_matcher(std::move(__matcher)))); + } + + template<typename _TraitsT> + template<bool __icase, bool __collate> + void + _Compiler<_TraitsT>:: + _M_insert_bracket_matcher(bool __neg) + { + _BracketMatcher<_TraitsT, __icase, __collate> __matcher(__neg, _M_traits); while (!_M_match_token(_ScannerT::_S_token_bracket_end)) _M_expression_term(__matcher); __matcher._M_ready(); _M_stack.push(_StateSeqT(_M_nfa, _M_nfa._M_insert_matcher(std::move(__matcher)))); - return true; } - template<typename _FwdIter, typename _TraitsT> + template<typename _TraitsT> + template<bool __icase, bool __collate> void - _Compiler<_FwdIter, _TraitsT>:: - _M_expression_term(_BMatcherT& __matcher) + _Compiler<_TraitsT>:: + _M_expression_term(_BracketMatcher<_TraitsT, __icase, __collate>& __matcher) { if (_M_match_token(_ScannerT::_S_token_collsymbol)) __matcher._M_add_collating_element(_M_value); @@ -403,9 +452,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION __throw_regex_error(regex_constants::error_brack); } - template<typename _FwdIter, typename _TraitsT> + template<typename _TraitsT> bool - _Compiler<_FwdIter, _TraitsT>:: + _Compiler<_TraitsT>:: _M_try_char() { bool __is_char = false; @@ -424,9 +473,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION return __is_char; } - template<typename _FwdIter, typename _TraitsT> + template<typename _TraitsT> bool - _Compiler<_FwdIter, _TraitsT>:: + _Compiler<_TraitsT>:: _M_match_token(_TokenT token) { if (token == _M_scanner._M_get_token()) @@ -438,9 +487,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION return false; } - template<typename _FwdIter, typename _TraitsT> + template<typename _TraitsT> int - _Compiler<_FwdIter, _TraitsT>:: + _Compiler<_TraitsT>:: _M_cur_int_value(int __radix) { long __v = 0; @@ -450,25 +499,31 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION return __v; } - template<typename _TraitsT> + template<typename _TraitsT, bool __icase, bool __collate> bool - _BracketMatcher<_TraitsT>::_M_apply(_CharT __ch, false_type) const + _BracketMatcher<_TraitsT, __icase, __collate>:: + _M_apply(_CharT __ch, false_type) const { bool __ret = false; - if (_M_traits.isctype(__ch, _M_class_set) - || _M_char_set.count(_M_translate(__ch)) - || _M_equiv_set.count(_M_traits.transform_primary(&__ch, &__ch+1))) + if (std::find(_M_char_set.begin(), _M_char_set.end(), + _M_translator._M_translate(__ch)) + != _M_char_set.end()) __ret = true; else { - _StringT __s = _M_get_str(_M_flags & regex_constants::collate - ? _M_translate(__ch) : __ch); + auto __s = _M_translator._M_transform(__ch); for (auto& __it : _M_range_set) if (__it.first <= __s && __s <= __it.second) { __ret = true; break; } + if (_M_traits.isctype(__ch, _M_class_set)) + __ret = true; + else if (std::find(_M_equiv_set.begin(), _M_equiv_set.end(), + _M_traits.transform_primary(&__ch, &__ch+1)) + != _M_equiv_set.end()) + __ret = true; } if (_M_is_non_matching) return !__ret; diff --git a/libstdc++-v3/include/bits/regex_executor.h b/libstdc++-v3/include/bits/regex_executor.h index bed9014..0885716 100644 --- a/libstdc++-v3/include/bits/regex_executor.h +++ b/libstdc++-v3/include/bits/regex_executor.h @@ -65,7 +65,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION _M_nfa(*__re._M_automaton), _M_results(__results), _M_match_queue(__dfs_mode ? nullptr - : new queue<pair<_StateIdT, _ResultsVec>>()), + : new vector<pair<_StateIdT, _ResultsVec>>()), _M_visited(__dfs_mode ? nullptr : new vector<bool>(_M_nfa.size())), _M_flags((__flags & regex_constants::match_prev_avail) ? (__flags @@ -133,23 +133,23 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION _M_lookahead(_State<_TraitsT> __state); public: - _ResultsVec _M_cur_results; - _BiIter _M_current; - const _BiIter _M_begin; - const _BiIter _M_end; - const _RegexT& _M_re; - const _NFAT& _M_nfa; - _ResultsVec& _M_results; + _ResultsVec _M_cur_results; + _BiIter _M_current; + const _BiIter _M_begin; + const _BiIter _M_end; + const _RegexT& _M_re; + const _NFAT& _M_nfa; + _ResultsVec& _M_results; // Used in BFS, saving states that need to be considered for the next // character. - std::unique_ptr<queue<pair<_StateIdT, _ResultsVec>>> _M_match_queue; + std::unique_ptr<vector<pair<_StateIdT, _ResultsVec>>> _M_match_queue; // Used in BFS, indicating that which state is already visited. - std::unique_ptr<vector<bool>> _M_visited; - _FlagT _M_flags; + std::unique_ptr<vector<bool>> _M_visited; + _FlagT _M_flags; // To record current solution. - _StateIdT _M_start_state; + _StateIdT _M_start_state; // Do we have a solution so far? - bool _M_has_sol; + bool _M_has_sol; }; //@} regex-detail diff --git a/libstdc++-v3/include/bits/regex_executor.tcc b/libstdc++-v3/include/bits/regex_executor.tcc index 85b70b8..e1cfcb0 100644 --- a/libstdc++-v3/include/bits/regex_executor.tcc +++ b/libstdc++-v3/include/bits/regex_executor.tcc @@ -111,7 +111,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION } else { - _M_match_queue->push(make_pair(_M_start_state, _M_results)); + _M_match_queue->push_back(make_pair(_M_start_state, _M_results)); bool __ret = false; while (1) { @@ -120,10 +120,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION break; _M_visited->assign(_M_visited->size(), false); auto _M_old_queue = std::move(*_M_match_queue); - while (!_M_old_queue.empty()) + for (auto __task : _M_old_queue) { - auto __task = _M_old_queue.front(); - _M_old_queue.pop(); _M_cur_results = __task.second; _M_dfs<__match_mode>(__task.first); } @@ -279,7 +277,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION } else if (__state._M_matches(*_M_current)) - _M_match_queue->push(make_pair(__state._M_next, _M_cur_results)); + _M_match_queue->push_back(make_pair(__state._M_next, + _M_cur_results)); break; // First fetch the matched result from _M_cur_results as __submatch; // then compare it with diff --git a/libstdc++-v3/include/bits/regex_scanner.h b/libstdc++-v3/include/bits/regex_scanner.h index d113c5d..6dc2b4e 100644 --- a/libstdc++-v3/include/bits/regex_scanner.h +++ b/libstdc++-v3/include/bits/regex_scanner.h @@ -39,6 +39,154 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION * @{ */ + struct _ScannerBase + { + public: + /// Token types returned from the scanner. + enum _TokenT + { + _S_token_anychar, + _S_token_ord_char, + _S_token_oct_num, + _S_token_hex_num, + _S_token_backref, + _S_token_subexpr_begin, + _S_token_subexpr_no_group_begin, + _S_token_subexpr_lookahead_begin, // neg if _M_value[0] == 'n' + _S_token_subexpr_end, + _S_token_bracket_begin, + _S_token_bracket_neg_begin, + _S_token_bracket_end, + _S_token_interval_begin, + _S_token_interval_end, + _S_token_quoted_class, + _S_token_char_class_name, + _S_token_collsymbol, + _S_token_equiv_class_name, + _S_token_opt, + _S_token_or, + _S_token_closure0, + _S_token_closure1, + _S_token_ungreedy, + _S_token_line_begin, + _S_token_line_end, + _S_token_word_bound, // neg if _M_value[0] == 'n' + _S_token_comma, + _S_token_dup_count, + _S_token_eof, + _S_token_unknown + }; + + protected: + typedef regex_constants::syntax_option_type _FlagT; + + enum _StateT + { + _S_state_normal, + _S_state_in_brace, + _S_state_in_bracket, + }; + + protected: + _ScannerBase(_FlagT __flags) + : _M_state(_S_state_normal), + _M_flags(__flags), + _M_escape_tbl(_M_is_ecma() + ? _M_ecma_escape_tbl + : _M_awk_escape_tbl), + _M_spec_char(_M_is_ecma() + ? _M_ecma_spec_char + : _M_is_basic() + ? _M_basic_spec_char + : _M_extended_spec_char), + _M_at_bracket_start(false) + { } + + protected: + const char* + _M_find_escape(char __c) + { + auto __it = _M_escape_tbl; + for (; __it->first != '\0'; ++__it) + if (__it->first == __c) + return &__it->second; + return nullptr; + } + + bool + _M_is_ecma() const + { return _M_flags & regex_constants::ECMAScript; } + + bool + _M_is_basic() const + { return _M_flags & (regex_constants::basic | regex_constants::grep); } + + bool + _M_is_extended() const + { + return _M_flags & (regex_constants::extended + | regex_constants::egrep + | regex_constants::awk); + } + + bool + _M_is_grep() const + { return _M_flags & (regex_constants::grep | regex_constants::egrep); } + + bool + _M_is_awk() const + { return _M_flags & regex_constants::awk; } + + protected: + const std::pair<char, _TokenT> _M_token_tbl[9] = + { + {'^', _S_token_line_begin}, + {'$', _S_token_line_end}, + {'.', _S_token_anychar}, + {'*', _S_token_closure0}, + {'+', _S_token_closure1}, + {'?', _S_token_opt}, + {'|', _S_token_or}, + {'\n', _S_token_or}, // grep and egrep + {'\0', _S_token_or}, + }; + const std::pair<char, char> _M_ecma_escape_tbl[8] = + { + {'0', '\0'}, + {'b', '\b'}, + {'f', '\f'}, + {'n', '\n'}, + {'r', '\r'}, + {'t', '\t'}, + {'v', '\v'}, + {'\0', '\0'}, + }; + const std::pair<char, char> _M_awk_escape_tbl[11] = + { + {'"', '"'}, + {'/', '/'}, + {'\\', '\\'}, + {'a', '\a'}, + {'b', '\b'}, + {'f', '\f'}, + {'n', '\n'}, + {'r', '\r'}, + {'t', '\t'}, + {'v', '\v'}, + {'\0', '\0'}, + }; + const char* _M_ecma_spec_char = "^$\\.*+?()[]{}|"; + const char* _M_basic_spec_char = ".[\\*^$"; + const char* _M_extended_spec_char = ".[\\()*+?{|^$"; + + _StateT _M_state; + _FlagT _M_flags; + _TokenT _M_token; + const std::pair<char, char>* _M_escape_tbl; + const char* _M_spec_char; + bool _M_at_bracket_start; + }; + /** * @brief struct _Scanner. Scans an input range for regex tokens. * @@ -49,51 +197,17 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION * constructor: different regular expression grammars will interpret * the same input pattern in syntactically different ways. */ - template<typename _FwdIter> + template<typename _CharT> class _Scanner + : public _ScannerBase { public: - typedef typename std::iterator_traits<_FwdIter>::value_type _CharT; + typedef const _CharT* _IterT; typedef std::basic_string<_CharT> _StringT; typedef regex_constants::syntax_option_type _FlagT; typedef const std::ctype<_CharT> _CtypeT; - /// Token types returned from the scanner. - enum _TokenT - { - _S_token_anychar, - _S_token_ord_char, - _S_token_oct_num, - _S_token_hex_num, - _S_token_backref, - _S_token_subexpr_begin, - _S_token_subexpr_no_group_begin, - _S_token_subexpr_lookahead_begin, // neg if _M_value[0] == 'n' - _S_token_subexpr_end, - _S_token_bracket_begin, - _S_token_bracket_neg_begin, - _S_token_bracket_end, - _S_token_interval_begin, - _S_token_interval_end, - _S_token_quoted_class, - _S_token_char_class_name, - _S_token_collsymbol, - _S_token_equiv_class_name, - _S_token_opt, - _S_token_or, - _S_token_closure0, - _S_token_closure1, - _S_token_ungreedy, - _S_token_line_begin, - _S_token_line_end, - _S_token_word_bound, // neg if _M_value[0] == 'n' - _S_token_comma, - _S_token_dup_count, - _S_token_eof, - _S_token_unknown - }; - - _Scanner(_FwdIter __begin, _FwdIter __end, + _Scanner(_IterT __begin, _IterT __end, _FlagT __flags, std::locale __loc); void @@ -113,13 +227,6 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION #endif private: - enum _StateT - { - _S_state_normal, - _S_state_in_brace, - _S_state_in_bracket, - }; - void _M_scan_normal(); @@ -141,49 +248,10 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION void _M_eat_class(char); - constexpr bool - _M_is_ecma() - { return _M_flags & regex_constants::ECMAScript; } - - constexpr bool - _M_is_basic() - { return _M_flags & (regex_constants::basic | regex_constants::grep); } - - constexpr bool - _M_is_extended() - { - return _M_flags & (regex_constants::extended - | regex_constants::egrep - | regex_constants::awk); - } - - constexpr bool - _M_is_grep() - { return _M_flags & (regex_constants::grep | regex_constants::egrep); } - - constexpr bool - _M_is_awk() - { return _M_flags & regex_constants::awk; } - - _StateT _M_state; - _FwdIter _M_current; - _FwdIter _M_end; - _FlagT _M_flags; + _IterT _M_current; + _IterT _M_end; _CtypeT& _M_ctype; - _TokenT _M_token; _StringT _M_value; - bool _M_at_bracket_start; - public: - // FIXME: make them static when this file is stable. - const std::map<char, _TokenT> _M_token_map; - const std::map<char, char> _M_ecma_escape_map; - const std::map<char, char> _M_awk_escape_map; - const std::set<char> _M_ecma_spec_char; - const std::set<char> _M_basic_spec_char; - const std::set<char> _M_extended_spec_char; - - const std::map<char, char>& _M_escape_map; - const std::set<char>& _M_spec_char; void (_Scanner::* _M_eat_escape)(); }; diff --git a/libstdc++-v3/include/bits/regex_scanner.tcc b/libstdc++-v3/include/bits/regex_scanner.tcc index 34d78ec..d954d07 100644 --- a/libstdc++-v3/include/bits/regex_scanner.tcc +++ b/libstdc++-v3/include/bits/regex_scanner.tcc @@ -52,106 +52,22 @@ namespace __detail { _GLIBCXX_BEGIN_NAMESPACE_VERSION - template<typename _FwdIter> - _Scanner<_FwdIter>:: - _Scanner(_FwdIter __begin, _FwdIter __end, + template<typename _CharT> + _Scanner<_CharT>:: + _Scanner(typename _Scanner::_IterT __begin, + typename _Scanner::_IterT __end, _FlagT __flags, std::locale __loc) - : _M_state(_S_state_normal), _M_current(__begin), _M_end(__end), - _M_flags(__flags), + : _ScannerBase(__flags), + _M_current(__begin), _M_end(__end), _M_ctype(std::use_facet<_CtypeT>(__loc)), - _M_at_bracket_start(false), - _M_token_map - { - {'^', _S_token_line_begin}, - {'$', _S_token_line_end}, - {'.', _S_token_anychar}, - {'*', _S_token_closure0}, - {'+', _S_token_closure1}, - {'?', _S_token_opt}, - {'|', _S_token_or}, - // grep and egrep - {'\n', _S_token_or}, - }, - _M_ecma_escape_map - { - {'0', '\0'}, - {'b', '\b'}, - {'f', '\f'}, - {'n', '\n'}, - {'r', '\r'}, - {'t', '\t'}, - {'v', '\v'}, - }, - _M_awk_escape_map - { - {'"', '"'}, - {'/', '/'}, - {'\\', '\\'}, - {'a', '\a'}, - {'b', '\b'}, - {'f', '\f'}, - {'n', '\n'}, - {'r', '\r'}, - {'t', '\t'}, - {'v', '\v'}, - }, - _M_ecma_spec_char - { - '^', - '$', - '\\', - '.', - '*', - '+', - '?', - '(', - ')', - '[', - ']', - '{', - '}', - '|', - }, - _M_basic_spec_char - { - '.', - '[', - '\\', - '*', - '^', - '$', - }, - _M_extended_spec_char - { - '.', - '[', - '\\', - '(', - ')', - '*', - '+', - '?', - '{', - '|', - '^', - '$', - }, - _M_escape_map(_M_is_ecma() - ? _M_ecma_escape_map - : _M_awk_escape_map), - _M_spec_char(_M_is_ecma() - ? _M_ecma_spec_char - : _M_is_basic() - ? _M_basic_spec_char - : _M_extended_spec_char), _M_eat_escape(_M_is_ecma() ? &_Scanner::_M_eat_escape_ecma : &_Scanner::_M_eat_escape_posix) { _M_advance(); } - template<typename _FwdIter> + template<typename _CharT> void - _Scanner<_FwdIter>:: + _Scanner<_CharT>:: _M_advance() { if (_M_current == _M_end) @@ -173,12 +89,13 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION // Differences between styles: // 1) "\(", "\)", "\{" in basic. It's not escaping. // 2) "(?:", "(?=", "(?!" in ECMAScript. - template<typename _FwdIter> + template<typename _CharT> void - _Scanner<_FwdIter>:: + _Scanner<_CharT>:: _M_scan_normal() { auto __c = *_M_current++; + const char* __pos; if (__c == '\\') { @@ -244,11 +161,23 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION _M_state = _S_state_in_brace; _M_token = _S_token_interval_begin; } - else if ((_M_spec_char.count(_M_ctype.narrow(__c, '\0')) + else if (((__pos = std::strchr(_M_spec_char, _M_ctype.narrow(__c, '\0'))) + != nullptr + && *__pos != '\0' && __c != ']' && __c != '}') || (_M_is_grep() && __c == '\n')) - _M_token = _M_token_map.at(__c); + { + auto __it = _M_token_tbl; + auto __narrowc = _M_ctype.narrow(__c, '\0'); + for (; __it->first != '\0'; ++__it) + if (__it->first == __narrowc) + { + _M_token = __it->second; + return; + } + _GLIBCXX_DEBUG_ASSERT(false); + } else { _M_token = _S_token_ord_char; @@ -259,9 +188,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION // Differences between styles: // 1) different semantics of "[]" and "[^]". // 2) Escaping in bracket expr. - template<typename _FwdIter> + template<typename _CharT> void - _Scanner<_FwdIter>:: + _Scanner<_CharT>:: _M_scan_in_bracket() { if (_M_current == _M_end) @@ -316,9 +245,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION // Differences between styles: // 1) "\}" in basic style. - template<typename _FwdIter> + template<typename _CharT> void - _Scanner<_FwdIter>:: + _Scanner<_CharT>:: _M_scan_in_brace() { if (_M_current == _M_end) @@ -357,21 +286,21 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION __throw_regex_error(regex_constants::error_badbrace); } - template<typename _FwdIter> + template<typename _CharT> void - _Scanner<_FwdIter>:: + _Scanner<_CharT>:: _M_eat_escape_ecma() { if (_M_current == _M_end) __throw_regex_error(regex_constants::error_escape); auto __c = *_M_current++; + auto __pos = _M_find_escape(_M_ctype.narrow(__c, '\0')); - if (_M_escape_map.count(_M_ctype.narrow(__c, '\0')) - && (__c != 'b' || _M_state == _S_state_in_bracket)) + if (__pos != nullptr && (__c != 'b' || _M_state == _S_state_in_bracket)) { _M_token = _S_token_ord_char; - _M_value.assign(1, _M_escape_map.at(__c)); + _M_value.assign(1, *__pos); } else if (__c == 'b') { @@ -431,17 +360,18 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION // Differences between styles: // 1) Extended doesn't support backref, but basic does. - template<typename _FwdIter> + template<typename _CharT> void - _Scanner<_FwdIter>:: + _Scanner<_CharT>:: _M_eat_escape_posix() { if (_M_current == _M_end) __throw_regex_error(regex_constants::error_escape); auto __c = *_M_current; + auto __pos = std::strchr(_M_spec_char, _M_ctype.narrow(__c, '\0')); - if (_M_spec_char.count(_M_ctype.narrow(__c, '\0'))) + if (__pos != nullptr && *__pos != '\0') { _M_token = _S_token_ord_char; _M_value.assign(1, __c); @@ -469,17 +399,18 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION ++_M_current; } - template<typename _FwdIter> + template<typename _CharT> void - _Scanner<_FwdIter>:: + _Scanner<_CharT>:: _M_eat_escape_awk() { auto __c = *_M_current++; + auto __pos = _M_find_escape(_M_ctype.narrow(__c, '\0')); - if (_M_escape_map.count(_M_ctype.narrow(__c, '\0'))) + if (__pos != nullptr) { _M_token = _S_token_ord_char; - _M_value.assign(1, _M_escape_map.at(__c)); + _M_value.assign(1, *__pos); } // \ddd for oct representation else if (_M_ctype.is(_CtypeT::digit, __c) @@ -505,9 +436,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION // Eats a character class or throwns an exception. // __ch cound be ':', '.' or '=', _M_current is the char after ']' when // returning. - template<typename _FwdIter> + template<typename _CharT> void - _Scanner<_FwdIter>:: + _Scanner<_CharT>:: _M_eat_class(char __ch) { for (_M_value.clear(); _M_current != _M_end && *_M_current != __ch;) @@ -525,9 +456,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION } #ifdef _GLIBCXX_DEBUG - template<typename _FwdIter> + template<typename _CharT> std::ostream& - _Scanner<_FwdIter>:: + _Scanner<_CharT>:: _M_print(std::ostream& ostr) { switch (_M_token) diff --git a/libstdc++-v3/include/std/regex b/libstdc++-v3/include/std/regex index 9395f50..9161f48 100644 --- a/libstdc++-v3/include/std/regex +++ b/libstdc++-v3/include/std/regex @@ -44,15 +44,13 @@ #include <iterator> #include <locale> #include <memory> -#include <map> -#include <queue> -#include <set> #include <sstream> #include <stack> #include <stdexcept> #include <string> #include <utility> #include <vector> +#include <cstring> #include <bits/regex_constants.h> #include <bits/regex_error.h> |